{ "cells": [ { "cell_type": "markdown", "metadata": { "id": "UQK1MhvbUiO1" }, "source": [ "#KNN on the Iris Dataset" ] }, { "cell_type": "code", "execution_count": 10, "metadata": { "id": "CtfYG2yrQ0i-" }, "outputs": [], "source": [ "from sklearn import neighbors\n", "from sklearn.datasets import load_iris\n", "from sklearn.metrics import confusion_matrix\n", "from sklearn.metrics import f1_score\n", "from sklearn.metrics import accuracy_score\n", "import pandas as pd" ] }, { "cell_type": "code", "execution_count": 11, "metadata": { "colab": { "base_uri": "https://localhost:8080/" }, "id": "BznYCXGPQ4dA", "outputId": "4b30e9e8-2774-4870-9ff3-4f680d12b7c2" }, "outputs": [], "source": [ " # Load the dataset\n", "data = pd.read_csv('indeed.csv')\n", "\n", "X = data.iloc[:, [0, 2]].values\n", "y = data.iloc[:, 1].values" ] }, { "cell_type": "code", "execution_count": 12, "metadata": { "id": "vICs_orGRMDG" }, "outputs": [], "source": [ "# Split data into training and testing sets\n", "from sklearn.model_selection import train_test_split\n", "X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.3, random_state=0)" ] }, { "cell_type": "code", "execution_count": 13, "metadata": { "id": "ah1zLnrYRONr" }, "outputs": [], "source": [ "# Number of nearest neighbors\n", "num_neighbors = 12\n", "# Step size of the visualization grid\n", "step_size = 0.01\n", "# Create a K Nearest Neighbors classifier model\n", "clfKNN = neighbors.KNeighborsClassifier()\n", "\n", "clfKNN.fit(X_train, y_train)\n", "\n", "y_test_pred = clfKNN.predict(X_test)" ] }, { "cell_type": "code", "execution_count": 14, "metadata": { "colab": { "base_uri": "https://localhost:8080/" }, "id": "wBfBd6dtSzIn", "outputId": "89876b62-9471-4474-8d15-e60732565836" }, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ "f1_score: 0.8139534883720931\n" ] } ], "source": [ "f1_score = f1_score(y_test, y_test_pred, average='micro')\n", "print(f'f1_score: {f1_score}')" ] }, { "cell_type": "code", "execution_count": 15, "metadata": { "colab": { "base_uri": "https://localhost:8080/" }, "id": "E4qA4gpUNH9X", "outputId": "fbc9e502-a49e-422a-8863-86548187bfc2" }, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ "Accuracy: 81.3953488372093\n" ] } ], "source": [ "# Evaluate the model on the test data\n", "accuracy = 100 * accuracy_score(y_test, y_test_pred)\n", "print(f'Accuracy: {accuracy}')" ] }, { "cell_type": "code", "execution_count": 16, "metadata": { "colab": { "base_uri": "https://localhost:8080/" }, "id": "p0dObkPrSlEZ", "outputId": "06d843c3-94ff-486c-d4c6-7a411edb0e40" }, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ "[[17 2 0 0]\n", " [ 0 14 5 0]\n", " [ 0 4 18 1]\n", " [ 1 1 2 21]]\n" ] } ], "source": [ "cmKNN = confusion_matrix(y_test, y_test_pred)\n", "print(cmKNN)" ] }, { "cell_type": "code", "execution_count": 19, "metadata": {}, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ "[1]\n" ] } ], "source": [ "import pickle\n", "\n", "pickle.dump(clfKNN, open('knn_model.pkl', 'wb'))\n", "\n", "knn_model_dump = pickle.load(open('knn_model.pkl', 'rb'))\n", "\n", "print(clfKNN.predict([[1.2, 100]]))" ] } ], "metadata": { "colab": { "provenance": [] }, "interpreter": { "hash": "e9abd7bf17300ee9df567d8d9580282ee75f9a695f2d1fb59e9523387a62f2ed" }, "kernelspec": { "display_name": "Python 3.11.2 64-bit", "language": "python", "name": "python3" }, "language_info": { "codemirror_mode": { "name": "ipython", "version": 3 }, "file_extension": ".py", "mimetype": "text/x-python", "name": "python", "nbconvert_exporter": "python", "pygments_lexer": "ipython3", "version": "3.11.2" } }, "nbformat": 4, "nbformat_minor": 0 }