{ "nbformat": 4, "nbformat_minor": 0, "metadata": { "colab": { "provenance": [] }, "kernelspec": { "name": "python3", "display_name": "Python 3" }, "language_info": { "name": "python" } }, "cells": [ { "cell_type": "markdown", "source": [ "\n", "\n", "# Mind Pulse\n", "\n" ], "metadata": { "id": "uVBQ8eFYMJii" } }, { "cell_type": "code", "execution_count": 1, "metadata": { "id": "sOKb4InlIWgE" }, "outputs": [], "source": [ "# imports\n", "import tensorflow as tf\n", "import pandas as pd\n", "import numpy as np\n", "import matplotlib.pyplot as plt\n", "from sklearn.preprocessing import StandardScaler\n", "from imblearn.over_sampling import RandomOverSampler\n", "import seaborn as sns\n", "from sklearn.model_selection import train_test_split" ] }, { "cell_type": "code", "source": [ "# using drive to load our dataset\n", "from google.colab import drive\n", "drive.mount('/content/drive')" ], "metadata": { "colab": { "base_uri": "https://localhost:8080/" }, "id": "Zt5eI3jZI-HI", "outputId": "f396fb7a-04ab-4656-d1c7-634bc71e5916" }, "execution_count": 2, "outputs": [ { "output_type": "stream", "name": "stdout", "text": [ "Mounted at /content/drive\n" ] } ] }, { "cell_type": "code", "source": [ "df=pd.read_csv(\"/content/drive/MyDrive/dataset/bs.csv\")\n", "del df['id'],df['ever_married'],df['work_type'],df['Residence_type']\n", "df" ], "metadata": { "colab": { "base_uri": "https://localhost:8080/", "height": 423 }, "id": "q17IF39-JA5c", "outputId": "7827647c-c8a7-48bb-8c61-86df6396fc0d" }, "execution_count": 5, "outputs": [ { "output_type": "execute_result", "data": { "text/plain": [ " gender age hypertension heart_disease avg_glucose_level bmi \\\n", "0 Male 67.0 0 1 228.69 36.6 \n", "1 Female 61.0 0 0 202.21 NaN \n", "2 Male 80.0 0 1 105.92 32.5 \n", "3 Female 49.0 0 0 171.23 34.4 \n", "4 Female 79.0 1 0 174.12 24.0 \n", "... ... ... ... ... ... ... \n", "5105 Female 80.0 1 0 83.75 NaN \n", "5106 Female 81.0 0 0 125.20 40.0 \n", "5107 Female 35.0 0 0 82.99 30.6 \n", "5108 Male 51.0 0 0 166.29 25.6 \n", "5109 Female 44.0 0 0 85.28 26.2 \n", "\n", " smoking_status stroke \n", "0 formerly smoked 1 \n", "1 never smoked 1 \n", "2 never smoked 1 \n", "3 smokes 1 \n", "4 never smoked 1 \n", "... ... ... \n", "5105 never smoked 0 \n", "5106 never smoked 0 \n", "5107 never smoked 0 \n", "5108 formerly smoked 0 \n", "5109 Unknown 0 \n", "\n", "[5110 rows x 8 columns]" ], "text/html": [ "\n", "
\n", "
\n", "\n", "\n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", "
genderagehypertensionheart_diseaseavg_glucose_levelbmismoking_statusstroke
0Male67.001228.6936.6formerly smoked1
1Female61.000202.21NaNnever smoked1
2Male80.001105.9232.5never smoked1
3Female49.000171.2334.4smokes1
4Female79.010174.1224.0never smoked1
...........................
5105Female80.01083.75NaNnever smoked0
5106Female81.000125.2040.0never smoked0
5107Female35.00082.9930.6never smoked0
5108Male51.000166.2925.6formerly smoked0
5109Female44.00085.2826.2Unknown0
\n", "

5110 rows × 8 columns

\n", "
\n", "
\n", "\n", "
\n", " \n", "\n", " \n", "\n", " \n", "
\n", "\n", "\n", "
\n", " \n", "\n", "\n", "\n", " \n", "
\n", "\n", "
\n", " \n", " \n", " \n", "
\n", "\n", "
\n", "
\n" ], "application/vnd.google.colaboratory.intrinsic+json": { "type": "dataframe", "variable_name": "df", "summary": "{\n \"name\": \"df\",\n \"rows\": 5110,\n \"fields\": [\n {\n \"column\": \"gender\",\n \"properties\": {\n \"dtype\": \"category\",\n \"num_unique_values\": 3,\n \"samples\": [\n \"Male\",\n \"Female\",\n \"Other\"\n ],\n \"semantic_type\": \"\",\n \"description\": \"\"\n }\n },\n {\n \"column\": \"age\",\n \"properties\": {\n \"dtype\": \"number\",\n \"std\": 22.61264672311349,\n \"min\": 0.08,\n \"max\": 82.0,\n \"num_unique_values\": 104,\n \"samples\": [\n 45.0,\n 24.0,\n 33.0\n ],\n \"semantic_type\": \"\",\n \"description\": \"\"\n }\n },\n {\n \"column\": \"hypertension\",\n \"properties\": {\n \"dtype\": \"number\",\n \"std\": 0,\n \"min\": 0,\n \"max\": 1,\n \"num_unique_values\": 2,\n \"samples\": [\n 1,\n 0\n ],\n \"semantic_type\": \"\",\n \"description\": \"\"\n }\n },\n {\n \"column\": \"heart_disease\",\n \"properties\": {\n \"dtype\": \"number\",\n \"std\": 0,\n \"min\": 0,\n \"max\": 1,\n \"num_unique_values\": 2,\n \"samples\": [\n 0,\n 1\n ],\n \"semantic_type\": \"\",\n \"description\": \"\"\n }\n },\n {\n \"column\": \"avg_glucose_level\",\n \"properties\": {\n \"dtype\": \"number\",\n \"std\": 45.28356015058198,\n \"min\": 55.12,\n \"max\": 271.74,\n \"num_unique_values\": 3979,\n \"samples\": [\n 178.29,\n 156.69\n ],\n \"semantic_type\": \"\",\n \"description\": \"\"\n }\n },\n {\n \"column\": \"bmi\",\n \"properties\": {\n \"dtype\": \"number\",\n \"std\": 7.854066729680164,\n \"min\": 10.3,\n \"max\": 97.6,\n \"num_unique_values\": 418,\n \"samples\": [\n 49.5,\n 18.5\n ],\n \"semantic_type\": \"\",\n \"description\": \"\"\n }\n },\n {\n \"column\": \"smoking_status\",\n \"properties\": {\n \"dtype\": \"category\",\n \"num_unique_values\": 4,\n \"samples\": [\n \"never smoked\",\n \"Unknown\"\n ],\n \"semantic_type\": \"\",\n \"description\": \"\"\n }\n },\n {\n \"column\": \"stroke\",\n \"properties\": {\n \"dtype\": \"number\",\n \"std\": 0,\n \"min\": 0,\n \"max\": 1,\n \"num_unique_values\": 2,\n \"samples\": [\n 0,\n 1\n ],\n \"semantic_type\": \"\",\n \"description\": \"\"\n }\n }\n ]\n}" } }, "metadata": {}, "execution_count": 5 } ] }, { "cell_type": "code", "source": [ "df['gender']=(df['gender']=='Male').astype(int)\n", "df['smoking_status']=(df['smoking_status']=='smokes').astype(int)\n" ], "metadata": { "colab": { "base_uri": "https://localhost:8080/", "height": 423 }, "id": "P9x3rK02KCaZ", "outputId": "75594c20-f7b6-4365-fa1b-edeb50d1b94c" }, "execution_count": 7, "outputs": [ { "output_type": "execute_result", "data": { "text/plain": [ " gender age hypertension heart_disease avg_glucose_level bmi \\\n", "0 0 67.0 0 1 228.69 36.6 \n", "1 0 61.0 0 0 202.21 0.0 \n", "2 0 80.0 0 1 105.92 32.5 \n", "3 0 49.0 0 0 171.23 34.4 \n", "4 0 79.0 1 0 174.12 24.0 \n", "... ... ... ... ... ... ... \n", "5105 0 80.0 1 0 83.75 0.0 \n", "5106 0 81.0 0 0 125.20 40.0 \n", "5107 0 35.0 0 0 82.99 30.6 \n", "5108 0 51.0 0 0 166.29 25.6 \n", "5109 0 44.0 0 0 85.28 26.2 \n", "\n", " smoking_status stroke \n", "0 0 1 \n", "1 0 1 \n", "2 0 1 \n", "3 1 1 \n", "4 0 1 \n", "... ... ... \n", "5105 0 0 \n", "5106 0 0 \n", "5107 0 0 \n", "5108 0 0 \n", "5109 0 0 \n", "\n", "[5110 rows x 8 columns]" ], "text/html": [ "\n", "
\n", "
\n", "\n", "\n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", "
genderagehypertensionheart_diseaseavg_glucose_levelbmismoking_statusstroke
0067.001228.6936.601
1061.000202.210.001
2080.001105.9232.501
3049.000171.2334.411
4079.010174.1224.001
...........................
5105080.01083.750.000
5106081.000125.2040.000
5107035.00082.9930.600
5108051.000166.2925.600
5109044.00085.2826.200
\n", "

5110 rows × 8 columns

\n", "
\n", "
\n", "\n", "
\n", " \n", "\n", " \n", "\n", " \n", "
\n", "\n", "\n", "
\n", " \n", "\n", "\n", "\n", " \n", "
\n", "\n", "
\n", "
\n" ], "application/vnd.google.colaboratory.intrinsic+json": { "type": "dataframe", "summary": "{\n \"name\": \"df\",\n \"rows\": 5110,\n \"fields\": [\n {\n \"column\": \"gender\",\n \"properties\": {\n \"dtype\": \"number\",\n \"std\": 0,\n \"min\": 0,\n \"max\": 0,\n \"num_unique_values\": 1,\n \"samples\": [\n 0\n ],\n \"semantic_type\": \"\",\n \"description\": \"\"\n }\n },\n {\n \"column\": \"age\",\n \"properties\": {\n \"dtype\": \"number\",\n \"std\": 22.61264672311349,\n \"min\": 0.08,\n \"max\": 82.0,\n \"num_unique_values\": 104,\n \"samples\": [\n 45.0\n ],\n \"semantic_type\": \"\",\n \"description\": \"\"\n }\n },\n {\n \"column\": \"hypertension\",\n \"properties\": {\n \"dtype\": \"number\",\n \"std\": 0,\n \"min\": 0,\n \"max\": 1,\n \"num_unique_values\": 2,\n \"samples\": [\n 1\n ],\n \"semantic_type\": \"\",\n \"description\": \"\"\n }\n },\n {\n \"column\": \"heart_disease\",\n \"properties\": {\n \"dtype\": \"number\",\n \"std\": 0,\n \"min\": 0,\n \"max\": 1,\n \"num_unique_values\": 2,\n \"samples\": [\n 0\n ],\n \"semantic_type\": \"\",\n \"description\": \"\"\n }\n },\n {\n \"column\": \"avg_glucose_level\",\n \"properties\": {\n \"dtype\": \"number\",\n \"std\": 45.28356015058198,\n \"min\": 55.12,\n \"max\": 271.74,\n \"num_unique_values\": 3979,\n \"samples\": [\n 178.29\n ],\n \"semantic_type\": \"\",\n \"description\": \"\"\n }\n },\n {\n \"column\": \"bmi\",\n \"properties\": {\n \"dtype\": \"number\",\n \"std\": 9.529497256055075,\n \"min\": 0.0,\n \"max\": 97.6,\n \"num_unique_values\": 419,\n \"samples\": [\n 36.3\n ],\n \"semantic_type\": \"\",\n \"description\": \"\"\n }\n },\n {\n \"column\": \"smoking_status\",\n \"properties\": {\n \"dtype\": \"number\",\n \"std\": 0,\n \"min\": 0,\n \"max\": 1,\n \"num_unique_values\": 2,\n \"samples\": [\n 1\n ],\n \"semantic_type\": \"\",\n \"description\": \"\"\n }\n },\n {\n \"column\": \"stroke\",\n \"properties\": {\n \"dtype\": \"number\",\n \"std\": 0,\n \"min\": 0,\n \"max\": 1,\n \"num_unique_values\": 2,\n \"samples\": [\n 0\n ],\n \"semantic_type\": \"\",\n \"description\": \"\"\n }\n }\n ]\n}" } }, "metadata": {}, "execution_count": 7 } ] }, { "cell_type": "code", "source": [ "df=df.fillna(0)" ], "metadata": { "id": "Z3VztZ8HLtmD" }, "execution_count": 11, "outputs": [] }, { "cell_type": "code", "source": [ "x_data = df.drop(['stroke'], axis = 1)\n", "y = df.stroke.values" ], "metadata": { "id": "jvdxSOtN35up" }, "execution_count": 12, "outputs": [] }, { "cell_type": "code", "source": [ "x_train, x_test, y_train, y_test = train_test_split(x_data, y, test_size = 0.2, random_state= 0)" ], "metadata": { "id": "nk7QPMxYLian" }, "execution_count": 13, "outputs": [] }, { "cell_type": "code", "source": [ "from sklearn.linear_model import LogisticRegression\n", "lr = LogisticRegression()\n", "lr.fit(x_train, y_train)" ], "metadata": { "colab": { "base_uri": "https://localhost:8080/", "height": 74 }, "id": "TB8qV9OnkH_5", "outputId": "b710672b-3a9b-4cf4-fd59-7bb0cd436e41" }, "execution_count": 16, "outputs": [ { "output_type": "execute_result", "data": { "text/plain": [ "LogisticRegression()" ], "text/html": [ "
LogisticRegression()
In a Jupyter environment, please rerun this cell to show the HTML representation or trust the notebook.
On GitHub, the HTML representation is unable to render, please try loading this page with nbviewer.org.
" ] }, "metadata": {}, "execution_count": 16 } ] }, { "cell_type": "code", "source": [ "y_pred=lr.predict(x_test)" ], "metadata": { "id": "M66dC8FOXNEt" }, "execution_count": 17, "outputs": [] }, { "cell_type": "code", "source": [ "from sklearn.metrics import classification_report\n", "print(classification_report(y_pred,y_test))" ], "metadata": { "colab": { "base_uri": "https://localhost:8080/" }, "id": "L06DnXKhXPzS", "outputId": "cd79637c-876e-4d65-c515-f58c8b145481" }, "execution_count": null, "outputs": [ { "output_type": "stream", "name": "stdout", "text": [ " precision recall f1-score support\n", "\n", " 0 0.50 0.56 0.53 9\n", " 1 0.92 0.91 0.91 53\n", "\n", " accuracy 0.85 62\n", " macro avg 0.71 0.73 0.72 62\n", "weighted avg 0.86 0.85 0.86 62\n", "\n" ] } ] }, { "cell_type": "code", "source": [ "import pickle\n", "\n", "with open('mp.pkl','wb') as f:\n", " pickle.dump(lr,f)\n", "\n", "# load\n", "with open('mp.pkl', 'rb') as f:\n", " lr = pickle.load(f)" ], "metadata": { "id": "4IrkPQCLXhYw" }, "execution_count": 18, "outputs": [] } ] }