{ "nbformat": 4, "nbformat_minor": 0, "metadata": { "colab": { "provenance": [] }, "kernelspec": { "name": "python3", "display_name": "Python 3" }, "language_info": { "name": "python" } }, "cells": [ { "cell_type": "markdown", "source": [ "# Sugar Kinetics" ], "metadata": { "id": "iLEeLWoV-tpx" } }, { "cell_type": "code", "execution_count": null, "metadata": { "id": "L96SNQ8HVI7m" }, "outputs": [], "source": [ "# imports\n", "import tensorflow as tf\n", "import pandas as pd\n", "import numpy as np\n", "import matplotlib.pyplot as plt\n", "from sklearn.preprocessing import StandardScaler\n", "from imblearn.over_sampling import RandomOverSampler\n", "import seaborn as sns\n", "from sklearn.model_selection import train_test_split" ] }, { "cell_type": "code", "source": [ "# using drive to load our dataset\n", "from google.colab import drive\n", "drive.mount('/content/drive')" ], "metadata": { "colab": { "base_uri": "https://localhost:8080/" }, "id": "Ea3adROCVORJ", "outputId": "ba91f1a3-532e-49d4-b664-4b79a7c27887" }, "execution_count": null, "outputs": [ { "output_type": "stream", "name": "stdout", "text": [ "Mounted at /content/drive\n" ] } ] }, { "cell_type": "code", "source": [ "df=pd.read_csv(\"/content/drive/MyDrive/dataset/diabetes.csv\")\n", "del df['Pregnancies'],df['DiabetesPedigreeFunction'],df['SkinThickness']\n", "df" ], "metadata": { "colab": { "base_uri": "https://localhost:8080/", "height": 423 }, "id": "td0NDw6QlrIk", "outputId": "39e6502d-04f4-4807-df25-9ac4bdb1d51c" }, "execution_count": null, "outputs": [ { "output_type": "execute_result", "data": { "text/plain": [ " Glucose BloodPressure Insulin BMI Age Outcome\n", "0 148 72 0 33.6 50 1\n", "1 85 66 0 26.6 31 0\n", "2 183 64 0 23.3 32 1\n", "3 89 66 94 28.1 21 0\n", "4 137 40 168 43.1 33 1\n", ".. ... ... ... ... ... ...\n", "763 101 76 180 32.9 63 0\n", "764 122 70 0 36.8 27 0\n", "765 121 72 112 26.2 30 0\n", "766 126 60 0 30.1 47 1\n", "767 93 70 0 30.4 23 0\n", "\n", "[768 rows x 6 columns]" ], "text/html": [ "\n", "
\n", "
\n", "\n", "\n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", "
GlucoseBloodPressureInsulinBMIAgeOutcome
014872033.6501
18566026.6310
218364023.3321
389669428.1210
41374016843.1331
.....................
7631017618032.9630
76412270036.8270
7651217211226.2300
76612660030.1471
7679370030.4230
\n", "

768 rows × 6 columns

\n", "
\n", "
\n", "\n", "
\n", " \n", "\n", " \n", "\n", " \n", "
\n", "\n", "\n", "
\n", " \n", "\n", "\n", "\n", " \n", "
\n", "\n", "
\n", " \n", " \n", " \n", "
\n", "\n", "
\n", "
\n" ], "application/vnd.google.colaboratory.intrinsic+json": { "type": "dataframe", "variable_name": "df", "summary": "{\n \"name\": \"df\",\n \"rows\": 768,\n \"fields\": [\n {\n \"column\": \"Glucose\",\n \"properties\": {\n \"dtype\": \"number\",\n \"std\": 31,\n \"min\": 0,\n \"max\": 199,\n \"num_unique_values\": 136,\n \"samples\": [\n 151,\n 101,\n 112\n ],\n \"semantic_type\": \"\",\n \"description\": \"\"\n }\n },\n {\n \"column\": \"BloodPressure\",\n \"properties\": {\n \"dtype\": \"number\",\n \"std\": 19,\n \"min\": 0,\n \"max\": 122,\n \"num_unique_values\": 47,\n \"samples\": [\n 86,\n 46,\n 85\n ],\n \"semantic_type\": \"\",\n \"description\": \"\"\n }\n },\n {\n \"column\": \"Insulin\",\n \"properties\": {\n \"dtype\": \"number\",\n \"std\": 115,\n \"min\": 0,\n \"max\": 846,\n \"num_unique_values\": 186,\n \"samples\": [\n 52,\n 41,\n 183\n ],\n \"semantic_type\": \"\",\n \"description\": \"\"\n }\n },\n {\n \"column\": \"BMI\",\n \"properties\": {\n \"dtype\": \"number\",\n \"std\": 7.884160320375446,\n \"min\": 0.0,\n \"max\": 67.1,\n \"num_unique_values\": 248,\n \"samples\": [\n 19.9,\n 31.0,\n 38.1\n ],\n \"semantic_type\": \"\",\n \"description\": \"\"\n }\n },\n {\n \"column\": \"Age\",\n \"properties\": {\n \"dtype\": \"number\",\n \"std\": 11,\n \"min\": 21,\n \"max\": 81,\n \"num_unique_values\": 52,\n \"samples\": [\n 60,\n 47,\n 72\n ],\n \"semantic_type\": \"\",\n \"description\": \"\"\n }\n },\n {\n \"column\": \"Outcome\",\n \"properties\": {\n \"dtype\": \"number\",\n \"std\": 0,\n \"min\": 0,\n \"max\": 1,\n \"num_unique_values\": 2,\n \"samples\": [\n 0,\n 1\n ],\n \"semantic_type\": \"\",\n \"description\": \"\"\n }\n }\n ]\n}" } }, "metadata": {}, "execution_count": 6 } ] }, { "cell_type": "code", "source": [ "x_data = df.drop(['Outcome'], axis = 1)\n", "y = df.Outcome.values" ], "metadata": { "id": "jvdxSOtN35up" }, "execution_count": null, "outputs": [] }, { "cell_type": "code", "source": [ "x_train, x_test, y_train, y_test = train_test_split(x_data, y, test_size = 0.2, random_state= 0)" ], "metadata": { "id": "dHaFMd8A94Ks" }, "execution_count": null, "outputs": [] }, { "cell_type": "code", "source": [ "from sklearn.ensemble import RandomForestClassifier\n", "rf = RandomForestClassifier(n_estimators = 1000, random_state= 1)\n", "rf.fit(x_train, y_train)" ], "metadata": { "colab": { "base_uri": "https://localhost:8080/", "height": 74 }, "id": "LvD2S2ZI7ucw", "outputId": "e4fd08d0-a046-4e35-8c6c-ed4c64eaeb67" }, "execution_count": null, "outputs": [ { "output_type": "execute_result", "data": { "text/plain": [ "RandomForestClassifier(n_estimators=1000, random_state=1)" ], "text/html": [ "
RandomForestClassifier(n_estimators=1000, random_state=1)
In a Jupyter environment, please rerun this cell to show the HTML representation or trust the notebook.
On GitHub, the HTML representation is unable to render, please try loading this page with nbviewer.org.
" ] }, "metadata": {}, "execution_count": 9 } ] }, { "cell_type": "code", "source": [ "y_pred=rf.predict(x_test)" ], "metadata": { "id": "M66dC8FOXNEt" }, "execution_count": null, "outputs": [] }, { "cell_type": "code", "source": [ "from sklearn.metrics import classification_report\n", "print(classification_report(y_pred,y_test))" ], "metadata": { "colab": { "base_uri": "https://localhost:8080/" }, "id": "L06DnXKhXPzS", "outputId": "4ea67626-fba1-45de-9cc3-290c784e15f7" }, "execution_count": null, "outputs": [ { "output_type": "stream", "name": "stdout", "text": [ " precision recall f1-score support\n", "\n", " 0 0.86 0.86 0.86 107\n", " 1 0.68 0.68 0.68 47\n", "\n", " accuracy 0.81 154\n", " macro avg 0.77 0.77 0.77 154\n", "weighted avg 0.81 0.81 0.81 154\n", "\n" ] } ] }, { "cell_type": "code", "source": [ "import pickle\n", "\n", "with open('sk.pkl','wb') as f:\n", " pickle.dump(rf,f)\n", "\n", "# load\n", "with open('sk.pkl', 'rb') as f:\n", " rf = pickle.load(f)" ], "metadata": { "id": "4IrkPQCLXhYw" }, "execution_count": null, "outputs": [] } ] }