{
  "cells": [
    {
      "cell_type": "markdown",
      "metadata": {
        "id": "UQK1MhvbUiO1"
      },
      "source": [
        "#KNN on the Iris Dataset"
      ]
    },
    {
      "cell_type": "code",
      "execution_count": 10,
      "metadata": {
        "id": "CtfYG2yrQ0i-"
      },
      "outputs": [],
      "source": [
        "from sklearn import neighbors\n",
        "from sklearn.datasets import load_iris\n",
        "from sklearn.metrics import confusion_matrix\n",
        "from sklearn.metrics import f1_score\n",
        "from sklearn.metrics import accuracy_score\n",
        "import pandas as pd"
      ]
    },
    {
      "cell_type": "code",
      "execution_count": 11,
      "metadata": {
        "colab": {
          "base_uri": "https://localhost:8080/"
        },
        "id": "BznYCXGPQ4dA",
        "outputId": "4b30e9e8-2774-4870-9ff3-4f680d12b7c2"
      },
      "outputs": [],
      "source": [
        " # Load the dataset\n",
        "data = pd.read_csv('indeed.csv')\n",
        "\n",
        "X = data.iloc[:, [0, 2]].values\n",
        "y = data.iloc[:, 1].values"
      ]
    },
    {
      "cell_type": "code",
      "execution_count": 12,
      "metadata": {
        "id": "vICs_orGRMDG"
      },
      "outputs": [],
      "source": [
        "# Split data into training and testing sets\n",
        "from sklearn.model_selection import train_test_split\n",
        "X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.3, random_state=0)"
      ]
    },
    {
      "cell_type": "code",
      "execution_count": 13,
      "metadata": {
        "id": "ah1zLnrYRONr"
      },
      "outputs": [],
      "source": [
        "# Number of nearest neighbors\n",
        "num_neighbors = 12\n",
        "# Step size of the visualization grid\n",
        "step_size = 0.01\n",
        "# Create a K Nearest Neighbors classifier model\n",
        "clfKNN = neighbors.KNeighborsClassifier()\n",
        "\n",
        "clfKNN.fit(X_train, y_train)\n",
        "\n",
        "y_test_pred = clfKNN.predict(X_test)"
      ]
    },
    {
      "cell_type": "code",
      "execution_count": 14,
      "metadata": {
        "colab": {
          "base_uri": "https://localhost:8080/"
        },
        "id": "wBfBd6dtSzIn",
        "outputId": "89876b62-9471-4474-8d15-e60732565836"
      },
      "outputs": [
        {
          "name": "stdout",
          "output_type": "stream",
          "text": [
            "f1_score: 0.8139534883720931\n"
          ]
        }
      ],
      "source": [
        "f1_score = f1_score(y_test, y_test_pred, average='micro')\n",
        "print(f'f1_score: {f1_score}')"
      ]
    },
    {
      "cell_type": "code",
      "execution_count": 15,
      "metadata": {
        "colab": {
          "base_uri": "https://localhost:8080/"
        },
        "id": "E4qA4gpUNH9X",
        "outputId": "fbc9e502-a49e-422a-8863-86548187bfc2"
      },
      "outputs": [
        {
          "name": "stdout",
          "output_type": "stream",
          "text": [
            "Accuracy: 81.3953488372093\n"
          ]
        }
      ],
      "source": [
        "# Evaluate the model on the test data\n",
        "accuracy = 100 * accuracy_score(y_test, y_test_pred)\n",
        "print(f'Accuracy: {accuracy}')"
      ]
    },
    {
      "cell_type": "code",
      "execution_count": 16,
      "metadata": {
        "colab": {
          "base_uri": "https://localhost:8080/"
        },
        "id": "p0dObkPrSlEZ",
        "outputId": "06d843c3-94ff-486c-d4c6-7a411edb0e40"
      },
      "outputs": [
        {
          "name": "stdout",
          "output_type": "stream",
          "text": [
            "[[17  2  0  0]\n",
            " [ 0 14  5  0]\n",
            " [ 0  4 18  1]\n",
            " [ 1  1  2 21]]\n"
          ]
        }
      ],
      "source": [
        "cmKNN = confusion_matrix(y_test, y_test_pred)\n",
        "print(cmKNN)"
      ]
    },
    {
      "cell_type": "code",
      "execution_count": 19,
      "metadata": {},
      "outputs": [
        {
          "name": "stdout",
          "output_type": "stream",
          "text": [
            "[1]\n"
          ]
        }
      ],
      "source": [
        "import pickle\n",
        "\n",
        "pickle.dump(clfKNN, open('knn_model.pkl', 'wb'))\n",
        "\n",
        "knn_model_dump = pickle.load(open('knn_model.pkl', 'rb'))\n",
        "\n",
        "print(clfKNN.predict([[1.2, 100]]))"
      ]
    }
  ],
  "metadata": {
    "colab": {
      "provenance": []
    },
    "interpreter": {
      "hash": "e9abd7bf17300ee9df567d8d9580282ee75f9a695f2d1fb59e9523387a62f2ed"
    },
    "kernelspec": {
      "display_name": "Python 3.11.2 64-bit",
      "language": "python",
      "name": "python3"
    },
    "language_info": {
      "codemirror_mode": {
        "name": "ipython",
        "version": 3
      },
      "file_extension": ".py",
      "mimetype": "text/x-python",
      "name": "python",
      "nbconvert_exporter": "python",
      "pygments_lexer": "ipython3",
      "version": "3.11.2"
    }
  },
  "nbformat": 4,
  "nbformat_minor": 0
}