rishabh5752 commited on
Commit
89c10f6
1 Parent(s): 0075743

Upload 5 files

Browse files
Diabetes Prediction Web App.py ADDED
@@ -0,0 +1,75 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ #!/usr/bin/env python3
2
+ # -*- coding: utf-8 -*-
3
+ """
4
+ Created on Sat Jun 24 23:11:36 2023
5
+
6
+ @author: rishabhsharma
7
+ """
8
+
9
+ import numpy as np
10
+ import pickle
11
+ import streamlit as st
12
+
13
+
14
+ # loading the saved model
15
+ loaded_model = pickle.load(open('/Users/rishabhsharma/Desktop/Diabetes Prediction/trained_model.sav', 'rb'))
16
+
17
+
18
+ # creating a function for Prediction
19
+
20
+ def diabetes_prediction(input_data):
21
+
22
+
23
+ # changing the input_data to numpy array
24
+ input_data_as_numpy_array = np.asarray(input_data)
25
+
26
+ # reshape the array as we are predicting for one instance
27
+ input_data_reshaped = input_data_as_numpy_array.reshape(1,-1)
28
+
29
+ prediction = loaded_model.predict(input_data_reshaped)
30
+ print(prediction)
31
+
32
+ if (prediction[0] == 0):
33
+ return 'The person is not diabetic'
34
+ else:
35
+ return 'The person is diabetic'
36
+
37
+
38
+
39
+ def main():
40
+
41
+
42
+ # giving a title
43
+ st.title('Diabetes Prediction Web App')
44
+
45
+
46
+ # getting the input data from the user
47
+
48
+
49
+ Pregnancies = st.text_input('Number of Pregnancies')
50
+ Glucose = st.text_input('Glucose Level')
51
+ BloodPressure = st.text_input('Blood Pressure value')
52
+ SkinThickness = st.text_input('Skin Thickness value')
53
+ Insulin = st.text_input('Insulin Level')
54
+ BMI = st.text_input('BMI value')
55
+ DiabetesPedigreeFunction = st.text_input('Diabetes Pedigree Function value')
56
+ Age = st.text_input('Age of the Person')
57
+
58
+
59
+ # code for Prediction
60
+ diagnosis = ''
61
+
62
+ # creating a button for Prediction
63
+
64
+ if st.button('Diabetes Test Result'):
65
+ diagnosis = diabetes_prediction([Pregnancies, Glucose, BloodPressure, SkinThickness, Insulin, BMI, DiabetesPedigreeFunction, Age])
66
+
67
+
68
+ st.success(diagnosis)
69
+
70
+
71
+
72
+
73
+
74
+ if __name__ == '__main__':
75
+ main()
Diabetes_Predicition.ipynb ADDED
@@ -0,0 +1,1216 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "nbformat": 4,
3
+ "nbformat_minor": 0,
4
+ "metadata": {
5
+ "colab": {
6
+ "provenance": []
7
+ },
8
+ "kernelspec": {
9
+ "name": "python3",
10
+ "display_name": "Python 3"
11
+ },
12
+ "language_info": {
13
+ "name": "python"
14
+ }
15
+ },
16
+ "cells": [
17
+ {
18
+ "cell_type": "markdown",
19
+ "metadata": {
20
+ "id": "LnPbntVRnfvV"
21
+ },
22
+ "source": [
23
+ "Importing the Dependencies"
24
+ ]
25
+ },
26
+ {
27
+ "cell_type": "code",
28
+ "metadata": {
29
+ "id": "-71UtHzNVWjB"
30
+ },
31
+ "source": [
32
+ "import numpy as np\n",
33
+ "import pandas as pd\n",
34
+ "from sklearn.model_selection import train_test_split\n",
35
+ "from sklearn import svm\n",
36
+ "from sklearn.metrics import accuracy_score"
37
+ ],
38
+ "execution_count": null,
39
+ "outputs": []
40
+ },
41
+ {
42
+ "cell_type": "markdown",
43
+ "metadata": {
44
+ "id": "bmfOfG8joBBy"
45
+ },
46
+ "source": [
47
+ "Data Collection and Analysis\n",
48
+ "\n",
49
+ "PIMA Diabetes Dataset"
50
+ ]
51
+ },
52
+ {
53
+ "cell_type": "code",
54
+ "metadata": {
55
+ "id": "Xpw6Mj_pn_TL"
56
+ },
57
+ "source": [
58
+ "# loading the diabetes dataset to a pandas DataFrame\n",
59
+ "diabetes_dataset = pd.read_csv('/content/diabetes.csv')"
60
+ ],
61
+ "execution_count": null,
62
+ "outputs": []
63
+ },
64
+ {
65
+ "cell_type": "code",
66
+ "metadata": {
67
+ "colab": {
68
+ "base_uri": "https://localhost:8080/",
69
+ "height": 206
70
+ },
71
+ "id": "-tjO09ncovoh",
72
+ "outputId": "4dd3939d-9cc2-4f80-cf6d-88dd95d02c2e"
73
+ },
74
+ "source": [
75
+ "# printing the first 5 rows of the dataset\n",
76
+ "diabetes_dataset.head()"
77
+ ],
78
+ "execution_count": null,
79
+ "outputs": [
80
+ {
81
+ "output_type": "execute_result",
82
+ "data": {
83
+ "text/plain": [
84
+ " Pregnancies Glucose BloodPressure SkinThickness Insulin BMI \\\n",
85
+ "0 6 148 72 35 0 33.6 \n",
86
+ "1 1 85 66 29 0 26.6 \n",
87
+ "2 8 183 64 0 0 23.3 \n",
88
+ "3 1 89 66 23 94 28.1 \n",
89
+ "4 0 137 40 35 168 43.1 \n",
90
+ "\n",
91
+ " DiabetesPedigreeFunction Age Outcome \n",
92
+ "0 0.627 50 1 \n",
93
+ "1 0.351 31 0 \n",
94
+ "2 0.672 32 1 \n",
95
+ "3 0.167 21 0 \n",
96
+ "4 2.288 33 1 "
97
+ ],
98
+ "text/html": [
99
+ "\n",
100
+ " <div id=\"df-039a6e3c-7e3d-4d2e-b59d-1cb24047d0b7\">\n",
101
+ " <div class=\"colab-df-container\">\n",
102
+ " <div>\n",
103
+ "<style scoped>\n",
104
+ " .dataframe tbody tr th:only-of-type {\n",
105
+ " vertical-align: middle;\n",
106
+ " }\n",
107
+ "\n",
108
+ " .dataframe tbody tr th {\n",
109
+ " vertical-align: top;\n",
110
+ " }\n",
111
+ "\n",
112
+ " .dataframe thead th {\n",
113
+ " text-align: right;\n",
114
+ " }\n",
115
+ "</style>\n",
116
+ "<table border=\"1\" class=\"dataframe\">\n",
117
+ " <thead>\n",
118
+ " <tr style=\"text-align: right;\">\n",
119
+ " <th></th>\n",
120
+ " <th>Pregnancies</th>\n",
121
+ " <th>Glucose</th>\n",
122
+ " <th>BloodPressure</th>\n",
123
+ " <th>SkinThickness</th>\n",
124
+ " <th>Insulin</th>\n",
125
+ " <th>BMI</th>\n",
126
+ " <th>DiabetesPedigreeFunction</th>\n",
127
+ " <th>Age</th>\n",
128
+ " <th>Outcome</th>\n",
129
+ " </tr>\n",
130
+ " </thead>\n",
131
+ " <tbody>\n",
132
+ " <tr>\n",
133
+ " <th>0</th>\n",
134
+ " <td>6</td>\n",
135
+ " <td>148</td>\n",
136
+ " <td>72</td>\n",
137
+ " <td>35</td>\n",
138
+ " <td>0</td>\n",
139
+ " <td>33.6</td>\n",
140
+ " <td>0.627</td>\n",
141
+ " <td>50</td>\n",
142
+ " <td>1</td>\n",
143
+ " </tr>\n",
144
+ " <tr>\n",
145
+ " <th>1</th>\n",
146
+ " <td>1</td>\n",
147
+ " <td>85</td>\n",
148
+ " <td>66</td>\n",
149
+ " <td>29</td>\n",
150
+ " <td>0</td>\n",
151
+ " <td>26.6</td>\n",
152
+ " <td>0.351</td>\n",
153
+ " <td>31</td>\n",
154
+ " <td>0</td>\n",
155
+ " </tr>\n",
156
+ " <tr>\n",
157
+ " <th>2</th>\n",
158
+ " <td>8</td>\n",
159
+ " <td>183</td>\n",
160
+ " <td>64</td>\n",
161
+ " <td>0</td>\n",
162
+ " <td>0</td>\n",
163
+ " <td>23.3</td>\n",
164
+ " <td>0.672</td>\n",
165
+ " <td>32</td>\n",
166
+ " <td>1</td>\n",
167
+ " </tr>\n",
168
+ " <tr>\n",
169
+ " <th>3</th>\n",
170
+ " <td>1</td>\n",
171
+ " <td>89</td>\n",
172
+ " <td>66</td>\n",
173
+ " <td>23</td>\n",
174
+ " <td>94</td>\n",
175
+ " <td>28.1</td>\n",
176
+ " <td>0.167</td>\n",
177
+ " <td>21</td>\n",
178
+ " <td>0</td>\n",
179
+ " </tr>\n",
180
+ " <tr>\n",
181
+ " <th>4</th>\n",
182
+ " <td>0</td>\n",
183
+ " <td>137</td>\n",
184
+ " <td>40</td>\n",
185
+ " <td>35</td>\n",
186
+ " <td>168</td>\n",
187
+ " <td>43.1</td>\n",
188
+ " <td>2.288</td>\n",
189
+ " <td>33</td>\n",
190
+ " <td>1</td>\n",
191
+ " </tr>\n",
192
+ " </tbody>\n",
193
+ "</table>\n",
194
+ "</div>\n",
195
+ " <button class=\"colab-df-convert\" onclick=\"convertToInteractive('df-039a6e3c-7e3d-4d2e-b59d-1cb24047d0b7')\"\n",
196
+ " title=\"Convert this dataframe to an interactive table.\"\n",
197
+ " style=\"display:none;\">\n",
198
+ " \n",
199
+ " <svg xmlns=\"http://www.w3.org/2000/svg\" height=\"24px\"viewBox=\"0 0 24 24\"\n",
200
+ " width=\"24px\">\n",
201
+ " <path d=\"M0 0h24v24H0V0z\" fill=\"none\"/>\n",
202
+ " <path d=\"M18.56 5.44l.94 2.06.94-2.06 2.06-.94-2.06-.94-.94-2.06-.94 2.06-2.06.94zm-11 1L8.5 8.5l.94-2.06 2.06-.94-2.06-.94L8.5 2.5l-.94 2.06-2.06.94zm10 10l.94 2.06.94-2.06 2.06-.94-2.06-.94-.94-2.06-.94 2.06-2.06.94z\"/><path d=\"M17.41 7.96l-1.37-1.37c-.4-.4-.92-.59-1.43-.59-.52 0-1.04.2-1.43.59L10.3 9.45l-7.72 7.72c-.78.78-.78 2.05 0 2.83L4 21.41c.39.39.9.59 1.41.59.51 0 1.02-.2 1.41-.59l7.78-7.78 2.81-2.81c.8-.78.8-2.07 0-2.86zM5.41 20L4 18.59l7.72-7.72 1.47 1.35L5.41 20z\"/>\n",
203
+ " </svg>\n",
204
+ " </button>\n",
205
+ " \n",
206
+ " <style>\n",
207
+ " .colab-df-container {\n",
208
+ " display:flex;\n",
209
+ " flex-wrap:wrap;\n",
210
+ " gap: 12px;\n",
211
+ " }\n",
212
+ "\n",
213
+ " .colab-df-convert {\n",
214
+ " background-color: #E8F0FE;\n",
215
+ " border: none;\n",
216
+ " border-radius: 50%;\n",
217
+ " cursor: pointer;\n",
218
+ " display: none;\n",
219
+ " fill: #1967D2;\n",
220
+ " height: 32px;\n",
221
+ " padding: 0 0 0 0;\n",
222
+ " width: 32px;\n",
223
+ " }\n",
224
+ "\n",
225
+ " .colab-df-convert:hover {\n",
226
+ " background-color: #E2EBFA;\n",
227
+ " box-shadow: 0px 1px 2px rgba(60, 64, 67, 0.3), 0px 1px 3px 1px rgba(60, 64, 67, 0.15);\n",
228
+ " fill: #174EA6;\n",
229
+ " }\n",
230
+ "\n",
231
+ " [theme=dark] .colab-df-convert {\n",
232
+ " background-color: #3B4455;\n",
233
+ " fill: #D2E3FC;\n",
234
+ " }\n",
235
+ "\n",
236
+ " [theme=dark] .colab-df-convert:hover {\n",
237
+ " background-color: #434B5C;\n",
238
+ " box-shadow: 0px 1px 3px 1px rgba(0, 0, 0, 0.15);\n",
239
+ " filter: drop-shadow(0px 1px 2px rgba(0, 0, 0, 0.3));\n",
240
+ " fill: #FFFFFF;\n",
241
+ " }\n",
242
+ " </style>\n",
243
+ "\n",
244
+ " <script>\n",
245
+ " const buttonEl =\n",
246
+ " document.querySelector('#df-039a6e3c-7e3d-4d2e-b59d-1cb24047d0b7 button.colab-df-convert');\n",
247
+ " buttonEl.style.display =\n",
248
+ " google.colab.kernel.accessAllowed ? 'block' : 'none';\n",
249
+ "\n",
250
+ " async function convertToInteractive(key) {\n",
251
+ " const element = document.querySelector('#df-039a6e3c-7e3d-4d2e-b59d-1cb24047d0b7');\n",
252
+ " const dataTable =\n",
253
+ " await google.colab.kernel.invokeFunction('convertToInteractive',\n",
254
+ " [key], {});\n",
255
+ " if (!dataTable) return;\n",
256
+ "\n",
257
+ " const docLinkHtml = 'Like what you see? Visit the ' +\n",
258
+ " '<a target=\"_blank\" href=https://colab.research.google.com/notebooks/data_table.ipynb>data table notebook</a>'\n",
259
+ " + ' to learn more about interactive tables.';\n",
260
+ " element.innerHTML = '';\n",
261
+ " dataTable['output_type'] = 'display_data';\n",
262
+ " await google.colab.output.renderOutput(dataTable, element);\n",
263
+ " const docLink = document.createElement('div');\n",
264
+ " docLink.innerHTML = docLinkHtml;\n",
265
+ " element.appendChild(docLink);\n",
266
+ " }\n",
267
+ " </script>\n",
268
+ " </div>\n",
269
+ " </div>\n",
270
+ " "
271
+ ]
272
+ },
273
+ "metadata": {},
274
+ "execution_count": 3
275
+ }
276
+ ]
277
+ },
278
+ {
279
+ "cell_type": "code",
280
+ "metadata": {
281
+ "colab": {
282
+ "base_uri": "https://localhost:8080/"
283
+ },
284
+ "id": "lynParo6pEMB",
285
+ "outputId": "8d134bf4-ed17-4ee5-9cbe-48d88cdd4495"
286
+ },
287
+ "source": [
288
+ "# number of rows and Columns in this dataset\n",
289
+ "diabetes_dataset.shape"
290
+ ],
291
+ "execution_count": null,
292
+ "outputs": [
293
+ {
294
+ "output_type": "execute_result",
295
+ "data": {
296
+ "text/plain": [
297
+ "(768, 9)"
298
+ ]
299
+ },
300
+ "metadata": {},
301
+ "execution_count": 4
302
+ }
303
+ ]
304
+ },
305
+ {
306
+ "cell_type": "code",
307
+ "metadata": {
308
+ "colab": {
309
+ "base_uri": "https://localhost:8080/",
310
+ "height": 364
311
+ },
312
+ "id": "3NDJOlrEpmoL",
313
+ "outputId": "7a404a6f-955b-4c04-fbe4-8634869eaf8f"
314
+ },
315
+ "source": [
316
+ "# getting the statistical measures of the data\n",
317
+ "diabetes_dataset.describe()"
318
+ ],
319
+ "execution_count": null,
320
+ "outputs": [
321
+ {
322
+ "output_type": "execute_result",
323
+ "data": {
324
+ "text/plain": [
325
+ " Pregnancies Glucose BloodPressure SkinThickness Insulin \\\n",
326
+ "count 768.000000 768.000000 768.000000 768.000000 768.000000 \n",
327
+ "mean 3.845052 120.894531 69.105469 20.536458 79.799479 \n",
328
+ "std 3.369578 31.972618 19.355807 15.952218 115.244002 \n",
329
+ "min 0.000000 0.000000 0.000000 0.000000 0.000000 \n",
330
+ "25% 1.000000 99.000000 62.000000 0.000000 0.000000 \n",
331
+ "50% 3.000000 117.000000 72.000000 23.000000 30.500000 \n",
332
+ "75% 6.000000 140.250000 80.000000 32.000000 127.250000 \n",
333
+ "max 17.000000 199.000000 122.000000 99.000000 846.000000 \n",
334
+ "\n",
335
+ " BMI DiabetesPedigreeFunction Age Outcome \n",
336
+ "count 768.000000 768.000000 768.000000 768.000000 \n",
337
+ "mean 31.992578 0.471876 33.240885 0.348958 \n",
338
+ "std 7.884160 0.331329 11.760232 0.476951 \n",
339
+ "min 0.000000 0.078000 21.000000 0.000000 \n",
340
+ "25% 27.300000 0.243750 24.000000 0.000000 \n",
341
+ "50% 32.000000 0.372500 29.000000 0.000000 \n",
342
+ "75% 36.600000 0.626250 41.000000 1.000000 \n",
343
+ "max 67.100000 2.420000 81.000000 1.000000 "
344
+ ],
345
+ "text/html": [
346
+ "\n",
347
+ " <div id=\"df-7e6e1f73-08b2-436f-8bd1-cee4cb79b0e4\">\n",
348
+ " <div class=\"colab-df-container\">\n",
349
+ " <div>\n",
350
+ "<style scoped>\n",
351
+ " .dataframe tbody tr th:only-of-type {\n",
352
+ " vertical-align: middle;\n",
353
+ " }\n",
354
+ "\n",
355
+ " .dataframe tbody tr th {\n",
356
+ " vertical-align: top;\n",
357
+ " }\n",
358
+ "\n",
359
+ " .dataframe thead th {\n",
360
+ " text-align: right;\n",
361
+ " }\n",
362
+ "</style>\n",
363
+ "<table border=\"1\" class=\"dataframe\">\n",
364
+ " <thead>\n",
365
+ " <tr style=\"text-align: right;\">\n",
366
+ " <th></th>\n",
367
+ " <th>Pregnancies</th>\n",
368
+ " <th>Glucose</th>\n",
369
+ " <th>BloodPressure</th>\n",
370
+ " <th>SkinThickness</th>\n",
371
+ " <th>Insulin</th>\n",
372
+ " <th>BMI</th>\n",
373
+ " <th>DiabetesPedigreeFunction</th>\n",
374
+ " <th>Age</th>\n",
375
+ " <th>Outcome</th>\n",
376
+ " </tr>\n",
377
+ " </thead>\n",
378
+ " <tbody>\n",
379
+ " <tr>\n",
380
+ " <th>count</th>\n",
381
+ " <td>768.000000</td>\n",
382
+ " <td>768.000000</td>\n",
383
+ " <td>768.000000</td>\n",
384
+ " <td>768.000000</td>\n",
385
+ " <td>768.000000</td>\n",
386
+ " <td>768.000000</td>\n",
387
+ " <td>768.000000</td>\n",
388
+ " <td>768.000000</td>\n",
389
+ " <td>768.000000</td>\n",
390
+ " </tr>\n",
391
+ " <tr>\n",
392
+ " <th>mean</th>\n",
393
+ " <td>3.845052</td>\n",
394
+ " <td>120.894531</td>\n",
395
+ " <td>69.105469</td>\n",
396
+ " <td>20.536458</td>\n",
397
+ " <td>79.799479</td>\n",
398
+ " <td>31.992578</td>\n",
399
+ " <td>0.471876</td>\n",
400
+ " <td>33.240885</td>\n",
401
+ " <td>0.348958</td>\n",
402
+ " </tr>\n",
403
+ " <tr>\n",
404
+ " <th>std</th>\n",
405
+ " <td>3.369578</td>\n",
406
+ " <td>31.972618</td>\n",
407
+ " <td>19.355807</td>\n",
408
+ " <td>15.952218</td>\n",
409
+ " <td>115.244002</td>\n",
410
+ " <td>7.884160</td>\n",
411
+ " <td>0.331329</td>\n",
412
+ " <td>11.760232</td>\n",
413
+ " <td>0.476951</td>\n",
414
+ " </tr>\n",
415
+ " <tr>\n",
416
+ " <th>min</th>\n",
417
+ " <td>0.000000</td>\n",
418
+ " <td>0.000000</td>\n",
419
+ " <td>0.000000</td>\n",
420
+ " <td>0.000000</td>\n",
421
+ " <td>0.000000</td>\n",
422
+ " <td>0.000000</td>\n",
423
+ " <td>0.078000</td>\n",
424
+ " <td>21.000000</td>\n",
425
+ " <td>0.000000</td>\n",
426
+ " </tr>\n",
427
+ " <tr>\n",
428
+ " <th>25%</th>\n",
429
+ " <td>1.000000</td>\n",
430
+ " <td>99.000000</td>\n",
431
+ " <td>62.000000</td>\n",
432
+ " <td>0.000000</td>\n",
433
+ " <td>0.000000</td>\n",
434
+ " <td>27.300000</td>\n",
435
+ " <td>0.243750</td>\n",
436
+ " <td>24.000000</td>\n",
437
+ " <td>0.000000</td>\n",
438
+ " </tr>\n",
439
+ " <tr>\n",
440
+ " <th>50%</th>\n",
441
+ " <td>3.000000</td>\n",
442
+ " <td>117.000000</td>\n",
443
+ " <td>72.000000</td>\n",
444
+ " <td>23.000000</td>\n",
445
+ " <td>30.500000</td>\n",
446
+ " <td>32.000000</td>\n",
447
+ " <td>0.372500</td>\n",
448
+ " <td>29.000000</td>\n",
449
+ " <td>0.000000</td>\n",
450
+ " </tr>\n",
451
+ " <tr>\n",
452
+ " <th>75%</th>\n",
453
+ " <td>6.000000</td>\n",
454
+ " <td>140.250000</td>\n",
455
+ " <td>80.000000</td>\n",
456
+ " <td>32.000000</td>\n",
457
+ " <td>127.250000</td>\n",
458
+ " <td>36.600000</td>\n",
459
+ " <td>0.626250</td>\n",
460
+ " <td>41.000000</td>\n",
461
+ " <td>1.000000</td>\n",
462
+ " </tr>\n",
463
+ " <tr>\n",
464
+ " <th>max</th>\n",
465
+ " <td>17.000000</td>\n",
466
+ " <td>199.000000</td>\n",
467
+ " <td>122.000000</td>\n",
468
+ " <td>99.000000</td>\n",
469
+ " <td>846.000000</td>\n",
470
+ " <td>67.100000</td>\n",
471
+ " <td>2.420000</td>\n",
472
+ " <td>81.000000</td>\n",
473
+ " <td>1.000000</td>\n",
474
+ " </tr>\n",
475
+ " </tbody>\n",
476
+ "</table>\n",
477
+ "</div>\n",
478
+ " <button class=\"colab-df-convert\" onclick=\"convertToInteractive('df-7e6e1f73-08b2-436f-8bd1-cee4cb79b0e4')\"\n",
479
+ " title=\"Convert this dataframe to an interactive table.\"\n",
480
+ " style=\"display:none;\">\n",
481
+ " \n",
482
+ " <svg xmlns=\"http://www.w3.org/2000/svg\" height=\"24px\"viewBox=\"0 0 24 24\"\n",
483
+ " width=\"24px\">\n",
484
+ " <path d=\"M0 0h24v24H0V0z\" fill=\"none\"/>\n",
485
+ " <path d=\"M18.56 5.44l.94 2.06.94-2.06 2.06-.94-2.06-.94-.94-2.06-.94 2.06-2.06.94zm-11 1L8.5 8.5l.94-2.06 2.06-.94-2.06-.94L8.5 2.5l-.94 2.06-2.06.94zm10 10l.94 2.06.94-2.06 2.06-.94-2.06-.94-.94-2.06-.94 2.06-2.06.94z\"/><path d=\"M17.41 7.96l-1.37-1.37c-.4-.4-.92-.59-1.43-.59-.52 0-1.04.2-1.43.59L10.3 9.45l-7.72 7.72c-.78.78-.78 2.05 0 2.83L4 21.41c.39.39.9.59 1.41.59.51 0 1.02-.2 1.41-.59l7.78-7.78 2.81-2.81c.8-.78.8-2.07 0-2.86zM5.41 20L4 18.59l7.72-7.72 1.47 1.35L5.41 20z\"/>\n",
486
+ " </svg>\n",
487
+ " </button>\n",
488
+ " \n",
489
+ " <style>\n",
490
+ " .colab-df-container {\n",
491
+ " display:flex;\n",
492
+ " flex-wrap:wrap;\n",
493
+ " gap: 12px;\n",
494
+ " }\n",
495
+ "\n",
496
+ " .colab-df-convert {\n",
497
+ " background-color: #E8F0FE;\n",
498
+ " border: none;\n",
499
+ " border-radius: 50%;\n",
500
+ " cursor: pointer;\n",
501
+ " display: none;\n",
502
+ " fill: #1967D2;\n",
503
+ " height: 32px;\n",
504
+ " padding: 0 0 0 0;\n",
505
+ " width: 32px;\n",
506
+ " }\n",
507
+ "\n",
508
+ " .colab-df-convert:hover {\n",
509
+ " background-color: #E2EBFA;\n",
510
+ " box-shadow: 0px 1px 2px rgba(60, 64, 67, 0.3), 0px 1px 3px 1px rgba(60, 64, 67, 0.15);\n",
511
+ " fill: #174EA6;\n",
512
+ " }\n",
513
+ "\n",
514
+ " [theme=dark] .colab-df-convert {\n",
515
+ " background-color: #3B4455;\n",
516
+ " fill: #D2E3FC;\n",
517
+ " }\n",
518
+ "\n",
519
+ " [theme=dark] .colab-df-convert:hover {\n",
520
+ " background-color: #434B5C;\n",
521
+ " box-shadow: 0px 1px 3px 1px rgba(0, 0, 0, 0.15);\n",
522
+ " filter: drop-shadow(0px 1px 2px rgba(0, 0, 0, 0.3));\n",
523
+ " fill: #FFFFFF;\n",
524
+ " }\n",
525
+ " </style>\n",
526
+ "\n",
527
+ " <script>\n",
528
+ " const buttonEl =\n",
529
+ " document.querySelector('#df-7e6e1f73-08b2-436f-8bd1-cee4cb79b0e4 button.colab-df-convert');\n",
530
+ " buttonEl.style.display =\n",
531
+ " google.colab.kernel.accessAllowed ? 'block' : 'none';\n",
532
+ "\n",
533
+ " async function convertToInteractive(key) {\n",
534
+ " const element = document.querySelector('#df-7e6e1f73-08b2-436f-8bd1-cee4cb79b0e4');\n",
535
+ " const dataTable =\n",
536
+ " await google.colab.kernel.invokeFunction('convertToInteractive',\n",
537
+ " [key], {});\n",
538
+ " if (!dataTable) return;\n",
539
+ "\n",
540
+ " const docLinkHtml = 'Like what you see? Visit the ' +\n",
541
+ " '<a target=\"_blank\" href=https://colab.research.google.com/notebooks/data_table.ipynb>data table notebook</a>'\n",
542
+ " + ' to learn more about interactive tables.';\n",
543
+ " element.innerHTML = '';\n",
544
+ " dataTable['output_type'] = 'display_data';\n",
545
+ " await google.colab.output.renderOutput(dataTable, element);\n",
546
+ " const docLink = document.createElement('div');\n",
547
+ " docLink.innerHTML = docLinkHtml;\n",
548
+ " element.appendChild(docLink);\n",
549
+ " }\n",
550
+ " </script>\n",
551
+ " </div>\n",
552
+ " </div>\n",
553
+ " "
554
+ ]
555
+ },
556
+ "metadata": {},
557
+ "execution_count": 5
558
+ }
559
+ ]
560
+ },
561
+ {
562
+ "cell_type": "code",
563
+ "metadata": {
564
+ "colab": {
565
+ "base_uri": "https://localhost:8080/"
566
+ },
567
+ "id": "LrpHzaGpp5dQ",
568
+ "outputId": "ccc5292b-de17-4fd0-e7a9-ecd379a08404"
569
+ },
570
+ "source": [
571
+ "diabetes_dataset['Outcome'].value_counts()"
572
+ ],
573
+ "execution_count": null,
574
+ "outputs": [
575
+ {
576
+ "output_type": "execute_result",
577
+ "data": {
578
+ "text/plain": [
579
+ "0 500\n",
580
+ "1 268\n",
581
+ "Name: Outcome, dtype: int64"
582
+ ]
583
+ },
584
+ "metadata": {},
585
+ "execution_count": 6
586
+ }
587
+ ]
588
+ },
589
+ {
590
+ "cell_type": "markdown",
591
+ "metadata": {
592
+ "id": "cB1qRaNcqeh5"
593
+ },
594
+ "source": [
595
+ "0 --> Non-Diabetic\n",
596
+ "\n",
597
+ "1 --> Diabetic"
598
+ ]
599
+ },
600
+ {
601
+ "cell_type": "code",
602
+ "metadata": {
603
+ "colab": {
604
+ "base_uri": "https://localhost:8080/",
605
+ "height": 187
606
+ },
607
+ "id": "I6MWR0k_qSCK",
608
+ "outputId": "2ac9729d-c4ce-4866-d886-5a2da81ba7dd"
609
+ },
610
+ "source": [
611
+ "diabetes_dataset.groupby('Outcome').mean()"
612
+ ],
613
+ "execution_count": null,
614
+ "outputs": [
615
+ {
616
+ "output_type": "execute_result",
617
+ "data": {
618
+ "text/plain": [
619
+ " Pregnancies Glucose BloodPressure SkinThickness Insulin \\\n",
620
+ "Outcome \n",
621
+ "0 3.298000 109.980000 68.184000 19.664000 68.792000 \n",
622
+ "1 4.865672 141.257463 70.824627 22.164179 100.335821 \n",
623
+ "\n",
624
+ " BMI DiabetesPedigreeFunction Age \n",
625
+ "Outcome \n",
626
+ "0 30.304200 0.429734 31.190000 \n",
627
+ "1 35.142537 0.550500 37.067164 "
628
+ ],
629
+ "text/html": [
630
+ "\n",
631
+ " <div id=\"df-98fb70f3-cf03-414d-87c7-1dea5a48b28f\">\n",
632
+ " <div class=\"colab-df-container\">\n",
633
+ " <div>\n",
634
+ "<style scoped>\n",
635
+ " .dataframe tbody tr th:only-of-type {\n",
636
+ " vertical-align: middle;\n",
637
+ " }\n",
638
+ "\n",
639
+ " .dataframe tbody tr th {\n",
640
+ " vertical-align: top;\n",
641
+ " }\n",
642
+ "\n",
643
+ " .dataframe thead th {\n",
644
+ " text-align: right;\n",
645
+ " }\n",
646
+ "</style>\n",
647
+ "<table border=\"1\" class=\"dataframe\">\n",
648
+ " <thead>\n",
649
+ " <tr style=\"text-align: right;\">\n",
650
+ " <th></th>\n",
651
+ " <th>Pregnancies</th>\n",
652
+ " <th>Glucose</th>\n",
653
+ " <th>BloodPressure</th>\n",
654
+ " <th>SkinThickness</th>\n",
655
+ " <th>Insulin</th>\n",
656
+ " <th>BMI</th>\n",
657
+ " <th>DiabetesPedigreeFunction</th>\n",
658
+ " <th>Age</th>\n",
659
+ " </tr>\n",
660
+ " <tr>\n",
661
+ " <th>Outcome</th>\n",
662
+ " <th></th>\n",
663
+ " <th></th>\n",
664
+ " <th></th>\n",
665
+ " <th></th>\n",
666
+ " <th></th>\n",
667
+ " <th></th>\n",
668
+ " <th></th>\n",
669
+ " <th></th>\n",
670
+ " </tr>\n",
671
+ " </thead>\n",
672
+ " <tbody>\n",
673
+ " <tr>\n",
674
+ " <th>0</th>\n",
675
+ " <td>3.298000</td>\n",
676
+ " <td>109.980000</td>\n",
677
+ " <td>68.184000</td>\n",
678
+ " <td>19.664000</td>\n",
679
+ " <td>68.792000</td>\n",
680
+ " <td>30.304200</td>\n",
681
+ " <td>0.429734</td>\n",
682
+ " <td>31.190000</td>\n",
683
+ " </tr>\n",
684
+ " <tr>\n",
685
+ " <th>1</th>\n",
686
+ " <td>4.865672</td>\n",
687
+ " <td>141.257463</td>\n",
688
+ " <td>70.824627</td>\n",
689
+ " <td>22.164179</td>\n",
690
+ " <td>100.335821</td>\n",
691
+ " <td>35.142537</td>\n",
692
+ " <td>0.550500</td>\n",
693
+ " <td>37.067164</td>\n",
694
+ " </tr>\n",
695
+ " </tbody>\n",
696
+ "</table>\n",
697
+ "</div>\n",
698
+ " <button class=\"colab-df-convert\" onclick=\"convertToInteractive('df-98fb70f3-cf03-414d-87c7-1dea5a48b28f')\"\n",
699
+ " title=\"Convert this dataframe to an interactive table.\"\n",
700
+ " style=\"display:none;\">\n",
701
+ " \n",
702
+ " <svg xmlns=\"http://www.w3.org/2000/svg\" height=\"24px\"viewBox=\"0 0 24 24\"\n",
703
+ " width=\"24px\">\n",
704
+ " <path d=\"M0 0h24v24H0V0z\" fill=\"none\"/>\n",
705
+ " <path d=\"M18.56 5.44l.94 2.06.94-2.06 2.06-.94-2.06-.94-.94-2.06-.94 2.06-2.06.94zm-11 1L8.5 8.5l.94-2.06 2.06-.94-2.06-.94L8.5 2.5l-.94 2.06-2.06.94zm10 10l.94 2.06.94-2.06 2.06-.94-2.06-.94-.94-2.06-.94 2.06-2.06.94z\"/><path d=\"M17.41 7.96l-1.37-1.37c-.4-.4-.92-.59-1.43-.59-.52 0-1.04.2-1.43.59L10.3 9.45l-7.72 7.72c-.78.78-.78 2.05 0 2.83L4 21.41c.39.39.9.59 1.41.59.51 0 1.02-.2 1.41-.59l7.78-7.78 2.81-2.81c.8-.78.8-2.07 0-2.86zM5.41 20L4 18.59l7.72-7.72 1.47 1.35L5.41 20z\"/>\n",
706
+ " </svg>\n",
707
+ " </button>\n",
708
+ " \n",
709
+ " <style>\n",
710
+ " .colab-df-container {\n",
711
+ " display:flex;\n",
712
+ " flex-wrap:wrap;\n",
713
+ " gap: 12px;\n",
714
+ " }\n",
715
+ "\n",
716
+ " .colab-df-convert {\n",
717
+ " background-color: #E8F0FE;\n",
718
+ " border: none;\n",
719
+ " border-radius: 50%;\n",
720
+ " cursor: pointer;\n",
721
+ " display: none;\n",
722
+ " fill: #1967D2;\n",
723
+ " height: 32px;\n",
724
+ " padding: 0 0 0 0;\n",
725
+ " width: 32px;\n",
726
+ " }\n",
727
+ "\n",
728
+ " .colab-df-convert:hover {\n",
729
+ " background-color: #E2EBFA;\n",
730
+ " box-shadow: 0px 1px 2px rgba(60, 64, 67, 0.3), 0px 1px 3px 1px rgba(60, 64, 67, 0.15);\n",
731
+ " fill: #174EA6;\n",
732
+ " }\n",
733
+ "\n",
734
+ " [theme=dark] .colab-df-convert {\n",
735
+ " background-color: #3B4455;\n",
736
+ " fill: #D2E3FC;\n",
737
+ " }\n",
738
+ "\n",
739
+ " [theme=dark] .colab-df-convert:hover {\n",
740
+ " background-color: #434B5C;\n",
741
+ " box-shadow: 0px 1px 3px 1px rgba(0, 0, 0, 0.15);\n",
742
+ " filter: drop-shadow(0px 1px 2px rgba(0, 0, 0, 0.3));\n",
743
+ " fill: #FFFFFF;\n",
744
+ " }\n",
745
+ " </style>\n",
746
+ "\n",
747
+ " <script>\n",
748
+ " const buttonEl =\n",
749
+ " document.querySelector('#df-98fb70f3-cf03-414d-87c7-1dea5a48b28f button.colab-df-convert');\n",
750
+ " buttonEl.style.display =\n",
751
+ " google.colab.kernel.accessAllowed ? 'block' : 'none';\n",
752
+ "\n",
753
+ " async function convertToInteractive(key) {\n",
754
+ " const element = document.querySelector('#df-98fb70f3-cf03-414d-87c7-1dea5a48b28f');\n",
755
+ " const dataTable =\n",
756
+ " await google.colab.kernel.invokeFunction('convertToInteractive',\n",
757
+ " [key], {});\n",
758
+ " if (!dataTable) return;\n",
759
+ "\n",
760
+ " const docLinkHtml = 'Like what you see? Visit the ' +\n",
761
+ " '<a target=\"_blank\" href=https://colab.research.google.com/notebooks/data_table.ipynb>data table notebook</a>'\n",
762
+ " + ' to learn more about interactive tables.';\n",
763
+ " element.innerHTML = '';\n",
764
+ " dataTable['output_type'] = 'display_data';\n",
765
+ " await google.colab.output.renderOutput(dataTable, element);\n",
766
+ " const docLink = document.createElement('div');\n",
767
+ " docLink.innerHTML = docLinkHtml;\n",
768
+ " element.appendChild(docLink);\n",
769
+ " }\n",
770
+ " </script>\n",
771
+ " </div>\n",
772
+ " </div>\n",
773
+ " "
774
+ ]
775
+ },
776
+ "metadata": {},
777
+ "execution_count": 7
778
+ }
779
+ ]
780
+ },
781
+ {
782
+ "cell_type": "code",
783
+ "metadata": {
784
+ "id": "RoDW7l9mqqHZ"
785
+ },
786
+ "source": [
787
+ "# separating the data and labels\n",
788
+ "X = diabetes_dataset.drop(columns = 'Outcome', axis=1)\n",
789
+ "Y = diabetes_dataset['Outcome']"
790
+ ],
791
+ "execution_count": null,
792
+ "outputs": []
793
+ },
794
+ {
795
+ "cell_type": "code",
796
+ "metadata": {
797
+ "colab": {
798
+ "base_uri": "https://localhost:8080/"
799
+ },
800
+ "id": "3eiRW9M9raMm",
801
+ "outputId": "a4dbd160-65e3-4f7f-f65e-e089695ad3b9"
802
+ },
803
+ "source": [
804
+ "print(X)"
805
+ ],
806
+ "execution_count": null,
807
+ "outputs": [
808
+ {
809
+ "output_type": "stream",
810
+ "name": "stdout",
811
+ "text": [
812
+ " Pregnancies Glucose BloodPressure SkinThickness Insulin BMI \\\n",
813
+ "0 6 148 72 35 0 33.6 \n",
814
+ "1 1 85 66 29 0 26.6 \n",
815
+ "2 8 183 64 0 0 23.3 \n",
816
+ "3 1 89 66 23 94 28.1 \n",
817
+ "4 0 137 40 35 168 43.1 \n",
818
+ ".. ... ... ... ... ... ... \n",
819
+ "763 10 101 76 48 180 32.9 \n",
820
+ "764 2 122 70 27 0 36.8 \n",
821
+ "765 5 121 72 23 112 26.2 \n",
822
+ "766 1 126 60 0 0 30.1 \n",
823
+ "767 1 93 70 31 0 30.4 \n",
824
+ "\n",
825
+ " DiabetesPedigreeFunction Age \n",
826
+ "0 0.627 50 \n",
827
+ "1 0.351 31 \n",
828
+ "2 0.672 32 \n",
829
+ "3 0.167 21 \n",
830
+ "4 2.288 33 \n",
831
+ ".. ... ... \n",
832
+ "763 0.171 63 \n",
833
+ "764 0.340 27 \n",
834
+ "765 0.245 30 \n",
835
+ "766 0.349 47 \n",
836
+ "767 0.315 23 \n",
837
+ "\n",
838
+ "[768 rows x 8 columns]\n"
839
+ ]
840
+ }
841
+ ]
842
+ },
843
+ {
844
+ "cell_type": "code",
845
+ "metadata": {
846
+ "colab": {
847
+ "base_uri": "https://localhost:8080/"
848
+ },
849
+ "id": "AoxgTJAMrcCl",
850
+ "outputId": "a76a9089-12b4-4319-da60-0bfc7c638ad0"
851
+ },
852
+ "source": [
853
+ "print(Y)"
854
+ ],
855
+ "execution_count": null,
856
+ "outputs": [
857
+ {
858
+ "output_type": "stream",
859
+ "name": "stdout",
860
+ "text": [
861
+ "0 1\n",
862
+ "1 0\n",
863
+ "2 1\n",
864
+ "3 0\n",
865
+ "4 1\n",
866
+ " ..\n",
867
+ "763 0\n",
868
+ "764 0\n",
869
+ "765 0\n",
870
+ "766 1\n",
871
+ "767 0\n",
872
+ "Name: Outcome, Length: 768, dtype: int64\n"
873
+ ]
874
+ }
875
+ ]
876
+ },
877
+ {
878
+ "cell_type": "markdown",
879
+ "metadata": {
880
+ "id": "gHciEFkxsoQP"
881
+ },
882
+ "source": [
883
+ "Train Test Split"
884
+ ]
885
+ },
886
+ {
887
+ "cell_type": "code",
888
+ "metadata": {
889
+ "id": "AEfKGj_yslvD"
890
+ },
891
+ "source": [
892
+ "X_train, X_test, Y_train, Y_test = train_test_split(X,Y, test_size = 0.2, stratify=Y, random_state=2)"
893
+ ],
894
+ "execution_count": null,
895
+ "outputs": []
896
+ },
897
+ {
898
+ "cell_type": "code",
899
+ "metadata": {
900
+ "colab": {
901
+ "base_uri": "https://localhost:8080/"
902
+ },
903
+ "id": "DR05T-o0t3FQ",
904
+ "outputId": "24f5b16d-a500-49ca-de75-6503b41528d5"
905
+ },
906
+ "source": [
907
+ "print(X.shape, X_train.shape, X_test.shape)"
908
+ ],
909
+ "execution_count": null,
910
+ "outputs": [
911
+ {
912
+ "output_type": "stream",
913
+ "name": "stdout",
914
+ "text": [
915
+ "(768, 8) (614, 8) (154, 8)\n"
916
+ ]
917
+ }
918
+ ]
919
+ },
920
+ {
921
+ "cell_type": "markdown",
922
+ "metadata": {
923
+ "id": "ElJ3tkOtuC_n"
924
+ },
925
+ "source": [
926
+ "Training the Model"
927
+ ]
928
+ },
929
+ {
930
+ "cell_type": "code",
931
+ "metadata": {
932
+ "id": "5szLWHlNt9xc"
933
+ },
934
+ "source": [
935
+ "classifier = svm.SVC(kernel='linear')"
936
+ ],
937
+ "execution_count": null,
938
+ "outputs": []
939
+ },
940
+ {
941
+ "cell_type": "code",
942
+ "metadata": {
943
+ "colab": {
944
+ "base_uri": "https://localhost:8080/",
945
+ "height": 75
946
+ },
947
+ "id": "ncJWY_7suPAb",
948
+ "outputId": "e6e9a274-acb9-4d42-f0e0-f5c37e378f8a"
949
+ },
950
+ "source": [
951
+ "#training the support vector Machine Classifier\n",
952
+ "classifier.fit(X_train, Y_train)"
953
+ ],
954
+ "execution_count": null,
955
+ "outputs": [
956
+ {
957
+ "output_type": "execute_result",
958
+ "data": {
959
+ "text/plain": [
960
+ "SVC(kernel='linear')"
961
+ ],
962
+ "text/html": [
963
+ "<style>#sk-container-id-1 {color: black;background-color: white;}#sk-container-id-1 pre{padding: 0;}#sk-container-id-1 div.sk-toggleable {background-color: white;}#sk-container-id-1 label.sk-toggleable__label {cursor: pointer;display: block;width: 100%;margin-bottom: 0;padding: 0.3em;box-sizing: border-box;text-align: center;}#sk-container-id-1 label.sk-toggleable__label-arrow:before {content: \"▸\";float: left;margin-right: 0.25em;color: #696969;}#sk-container-id-1 label.sk-toggleable__label-arrow:hover:before {color: black;}#sk-container-id-1 div.sk-estimator:hover label.sk-toggleable__label-arrow:before {color: black;}#sk-container-id-1 div.sk-toggleable__content {max-height: 0;max-width: 0;overflow: hidden;text-align: left;background-color: #f0f8ff;}#sk-container-id-1 div.sk-toggleable__content pre {margin: 0.2em;color: black;border-radius: 0.25em;background-color: #f0f8ff;}#sk-container-id-1 input.sk-toggleable__control:checked~div.sk-toggleable__content {max-height: 200px;max-width: 100%;overflow: auto;}#sk-container-id-1 input.sk-toggleable__control:checked~label.sk-toggleable__label-arrow:before {content: \"▾\";}#sk-container-id-1 div.sk-estimator input.sk-toggleable__control:checked~label.sk-toggleable__label {background-color: #d4ebff;}#sk-container-id-1 div.sk-label input.sk-toggleable__control:checked~label.sk-toggleable__label {background-color: #d4ebff;}#sk-container-id-1 input.sk-hidden--visually {border: 0;clip: rect(1px 1px 1px 1px);clip: rect(1px, 1px, 1px, 1px);height: 1px;margin: -1px;overflow: hidden;padding: 0;position: absolute;width: 1px;}#sk-container-id-1 div.sk-estimator {font-family: monospace;background-color: #f0f8ff;border: 1px dotted black;border-radius: 0.25em;box-sizing: border-box;margin-bottom: 0.5em;}#sk-container-id-1 div.sk-estimator:hover {background-color: #d4ebff;}#sk-container-id-1 div.sk-parallel-item::after {content: \"\";width: 100%;border-bottom: 1px solid gray;flex-grow: 1;}#sk-container-id-1 div.sk-label:hover label.sk-toggleable__label {background-color: #d4ebff;}#sk-container-id-1 div.sk-serial::before {content: \"\";position: absolute;border-left: 1px solid gray;box-sizing: border-box;top: 0;bottom: 0;left: 50%;z-index: 0;}#sk-container-id-1 div.sk-serial {display: flex;flex-direction: column;align-items: center;background-color: white;padding-right: 0.2em;padding-left: 0.2em;position: relative;}#sk-container-id-1 div.sk-item {position: relative;z-index: 1;}#sk-container-id-1 div.sk-parallel {display: flex;align-items: stretch;justify-content: center;background-color: white;position: relative;}#sk-container-id-1 div.sk-item::before, #sk-container-id-1 div.sk-parallel-item::before {content: \"\";position: absolute;border-left: 1px solid gray;box-sizing: border-box;top: 0;bottom: 0;left: 50%;z-index: -1;}#sk-container-id-1 div.sk-parallel-item {display: flex;flex-direction: column;z-index: 1;position: relative;background-color: white;}#sk-container-id-1 div.sk-parallel-item:first-child::after {align-self: flex-end;width: 50%;}#sk-container-id-1 div.sk-parallel-item:last-child::after {align-self: flex-start;width: 50%;}#sk-container-id-1 div.sk-parallel-item:only-child::after {width: 0;}#sk-container-id-1 div.sk-dashed-wrapped {border: 1px dashed gray;margin: 0 0.4em 0.5em 0.4em;box-sizing: border-box;padding-bottom: 0.4em;background-color: white;}#sk-container-id-1 div.sk-label label {font-family: monospace;font-weight: bold;display: inline-block;line-height: 1.2em;}#sk-container-id-1 div.sk-label-container {text-align: center;}#sk-container-id-1 div.sk-container {/* jupyter's `normalize.less` sets `[hidden] { display: none; }` but bootstrap.min.css set `[hidden] { display: none !important; }` so we also need the `!important` here to be able to override the default hidden behavior on the sphinx rendered scikit-learn.org. See: https://github.com/scikit-learn/scikit-learn/issues/21755 */display: inline-block !important;position: relative;}#sk-container-id-1 div.sk-text-repr-fallback {display: none;}</style><div id=\"sk-container-id-1\" class=\"sk-top-container\"><div class=\"sk-text-repr-fallback\"><pre>SVC(kernel=&#x27;linear&#x27;)</pre><b>In a Jupyter environment, please rerun this cell to show the HTML representation or trust the notebook. <br />On GitHub, the HTML representation is unable to render, please try loading this page with nbviewer.org.</b></div><div class=\"sk-container\" hidden><div class=\"sk-item\"><div class=\"sk-estimator sk-toggleable\"><input class=\"sk-toggleable__control sk-hidden--visually\" id=\"sk-estimator-id-1\" type=\"checkbox\" checked><label for=\"sk-estimator-id-1\" class=\"sk-toggleable__label sk-toggleable__label-arrow\">SVC</label><div class=\"sk-toggleable__content\"><pre>SVC(kernel=&#x27;linear&#x27;)</pre></div></div></div></div></div>"
964
+ ]
965
+ },
966
+ "metadata": {},
967
+ "execution_count": 14
968
+ }
969
+ ]
970
+ },
971
+ {
972
+ "cell_type": "markdown",
973
+ "metadata": {
974
+ "id": "UV4-CAfquiyP"
975
+ },
976
+ "source": [
977
+ "Model Evaluation"
978
+ ]
979
+ },
980
+ {
981
+ "cell_type": "markdown",
982
+ "metadata": {
983
+ "id": "yhAjGPJWunXa"
984
+ },
985
+ "source": [
986
+ "Accuracy Score"
987
+ ]
988
+ },
989
+ {
990
+ "cell_type": "code",
991
+ "metadata": {
992
+ "id": "fJLEPQK7ueXp"
993
+ },
994
+ "source": [
995
+ "# accuracy score on the training data\n",
996
+ "X_train_prediction = classifier.predict(X_train)\n",
997
+ "training_data_accuracy = accuracy_score(X_train_prediction, Y_train)"
998
+ ],
999
+ "execution_count": null,
1000
+ "outputs": []
1001
+ },
1002
+ {
1003
+ "cell_type": "code",
1004
+ "metadata": {
1005
+ "colab": {
1006
+ "base_uri": "https://localhost:8080/"
1007
+ },
1008
+ "id": "mmJ22qhVvNwj",
1009
+ "outputId": "7540f8ca-5527-4612-d5cd-8746d711220e"
1010
+ },
1011
+ "source": [
1012
+ "print('Accuracy score of the training data : ', training_data_accuracy)"
1013
+ ],
1014
+ "execution_count": null,
1015
+ "outputs": [
1016
+ {
1017
+ "output_type": "stream",
1018
+ "name": "stdout",
1019
+ "text": [
1020
+ "Accuracy score of the training data : 0.7833876221498371\n"
1021
+ ]
1022
+ }
1023
+ ]
1024
+ },
1025
+ {
1026
+ "cell_type": "code",
1027
+ "metadata": {
1028
+ "id": "G2CICFMEvcCl"
1029
+ },
1030
+ "source": [
1031
+ "# accuracy score on the test data\n",
1032
+ "X_test_prediction = classifier.predict(X_test)\n",
1033
+ "test_data_accuracy = accuracy_score(X_test_prediction, Y_test)"
1034
+ ],
1035
+ "execution_count": null,
1036
+ "outputs": []
1037
+ },
1038
+ {
1039
+ "cell_type": "code",
1040
+ "metadata": {
1041
+ "colab": {
1042
+ "base_uri": "https://localhost:8080/"
1043
+ },
1044
+ "id": "i2GcW_t_vz7C",
1045
+ "outputId": "e2b18fd9-f005-42fa-9444-81e8eb57d947"
1046
+ },
1047
+ "source": [
1048
+ "print('Accuracy score of the test data : ', test_data_accuracy)"
1049
+ ],
1050
+ "execution_count": null,
1051
+ "outputs": [
1052
+ {
1053
+ "output_type": "stream",
1054
+ "name": "stdout",
1055
+ "text": [
1056
+ "Accuracy score of the test data : 0.7727272727272727\n"
1057
+ ]
1058
+ }
1059
+ ]
1060
+ },
1061
+ {
1062
+ "cell_type": "markdown",
1063
+ "metadata": {
1064
+ "id": "gq8ZX1xpwPF5"
1065
+ },
1066
+ "source": [
1067
+ "Making a Predictive System"
1068
+ ]
1069
+ },
1070
+ {
1071
+ "cell_type": "code",
1072
+ "metadata": {
1073
+ "colab": {
1074
+ "base_uri": "https://localhost:8080/"
1075
+ },
1076
+ "id": "U-ULRe4yv5tH",
1077
+ "outputId": "c218e6cf-ac30-4246-9bc6-cc09ac9d81ae"
1078
+ },
1079
+ "source": [
1080
+ "input_data = (5,166,72,19,175,25.8,0.587,51)\n",
1081
+ "\n",
1082
+ "# changing the input_data to numpy array\n",
1083
+ "input_data_as_numpy_array = np.asarray(input_data)\n",
1084
+ "\n",
1085
+ "# reshape the array as we are predicting for one instance\n",
1086
+ "input_data_reshaped = input_data_as_numpy_array.reshape(1,-1)\n",
1087
+ "\n",
1088
+ "prediction = classifier.predict(input_data_reshaped)\n",
1089
+ "print(prediction)\n",
1090
+ "\n",
1091
+ "if (prediction[0] == 0):\n",
1092
+ " print('The person is not diabetic')\n",
1093
+ "else:\n",
1094
+ " print('The person is diabetic')"
1095
+ ],
1096
+ "execution_count": null,
1097
+ "outputs": [
1098
+ {
1099
+ "output_type": "stream",
1100
+ "name": "stdout",
1101
+ "text": [
1102
+ "[1]\n",
1103
+ "The person is diabetic\n"
1104
+ ]
1105
+ },
1106
+ {
1107
+ "output_type": "stream",
1108
+ "name": "stderr",
1109
+ "text": [
1110
+ "/usr/local/lib/python3.10/dist-packages/sklearn/base.py:439: UserWarning: X does not have valid feature names, but SVC was fitted with feature names\n",
1111
+ " warnings.warn(\n"
1112
+ ]
1113
+ }
1114
+ ]
1115
+ },
1116
+ {
1117
+ "cell_type": "markdown",
1118
+ "metadata": {
1119
+ "id": "vgL6wblpQUtX"
1120
+ },
1121
+ "source": [
1122
+ "Saving the trained model"
1123
+ ]
1124
+ },
1125
+ {
1126
+ "cell_type": "code",
1127
+ "metadata": {
1128
+ "id": "Nn60MdxByjgz"
1129
+ },
1130
+ "source": [
1131
+ "import pickle"
1132
+ ],
1133
+ "execution_count": null,
1134
+ "outputs": []
1135
+ },
1136
+ {
1137
+ "cell_type": "code",
1138
+ "metadata": {
1139
+ "id": "cWzPQs4mQZN_"
1140
+ },
1141
+ "source": [
1142
+ "filename = 'trained_model.sav'\n",
1143
+ "pickle.dump(classifier, open(filename, 'wb'))"
1144
+ ],
1145
+ "execution_count": null,
1146
+ "outputs": []
1147
+ },
1148
+ {
1149
+ "cell_type": "code",
1150
+ "metadata": {
1151
+ "id": "Wk1T2sMcQ6_U"
1152
+ },
1153
+ "source": [
1154
+ "# loading the saved model\n",
1155
+ "loaded_model = pickle.load(open('trained_model.sav', 'rb'))"
1156
+ ],
1157
+ "execution_count": null,
1158
+ "outputs": []
1159
+ },
1160
+ {
1161
+ "cell_type": "code",
1162
+ "metadata": {
1163
+ "colab": {
1164
+ "base_uri": "https://localhost:8080/"
1165
+ },
1166
+ "id": "Bd5OpxHnRPyy",
1167
+ "outputId": "daa664c6-683c-4ac6-986d-46654598fac6"
1168
+ },
1169
+ "source": [
1170
+ "input_data = (5,166,72,19,175,25.8,0.587,51)\n",
1171
+ "\n",
1172
+ "# changing the input_data to numpy array\n",
1173
+ "input_data_as_numpy_array = np.asarray(input_data)\n",
1174
+ "\n",
1175
+ "# reshape the array as we are predicting for one instance\n",
1176
+ "input_data_reshaped = input_data_as_numpy_array.reshape(1,-1)\n",
1177
+ "\n",
1178
+ "prediction = loaded_model.predict(input_data_reshaped)\n",
1179
+ "print(prediction)\n",
1180
+ "\n",
1181
+ "if (prediction[0] == 0):\n",
1182
+ " print('The person is not diabetic')\n",
1183
+ "else:\n",
1184
+ " print('The person is diabetic')"
1185
+ ],
1186
+ "execution_count": null,
1187
+ "outputs": [
1188
+ {
1189
+ "output_type": "stream",
1190
+ "name": "stdout",
1191
+ "text": [
1192
+ "[1]\n",
1193
+ "The person is diabetic\n"
1194
+ ]
1195
+ },
1196
+ {
1197
+ "output_type": "stream",
1198
+ "name": "stderr",
1199
+ "text": [
1200
+ "/usr/local/lib/python3.10/dist-packages/sklearn/base.py:439: UserWarning: X does not have valid feature names, but SVC was fitted with feature names\n",
1201
+ " warnings.warn(\n"
1202
+ ]
1203
+ }
1204
+ ]
1205
+ },
1206
+ {
1207
+ "cell_type": "code",
1208
+ "metadata": {
1209
+ "id": "iGRhGvgfRkvm"
1210
+ },
1211
+ "source": [],
1212
+ "execution_count": null,
1213
+ "outputs": []
1214
+ }
1215
+ ]
1216
+ }
Predicitive System.py ADDED
@@ -0,0 +1,22 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import numpy as np
2
+ import pickle
3
+
4
+ # loading the saved model
5
+ loaded_model = pickle.load(open('/Users/rishabhsharma/Desktop/Diabetes Prediction/trained_model.sav', 'rb'))
6
+
7
+
8
+ input_data = (5,166,72,19,175,25.8,0.587,51)
9
+
10
+ # changing the input_data to numpy array
11
+ input_data_as_numpy_array = np.asarray(input_data)
12
+
13
+ # reshape the array as we are predicting for one instance
14
+ input_data_reshaped = input_data_as_numpy_array.reshape(1,-1)
15
+
16
+ prediction = loaded_model.predict(input_data_reshaped)
17
+ print(prediction)
18
+
19
+ if (prediction[0] == 0):
20
+ print('The person is not diabetic')
21
+ else:
22
+ print('The person is diabetic')
diabetes_predicition.py ADDED
@@ -0,0 +1,125 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ # -*- coding: utf-8 -*-
2
+ """Diabetes Predicition.ipynb
3
+
4
+ Automatically generated by Colaboratory.
5
+
6
+ Original file is located at
7
+ https://colab.research.google.com/drive/1aNMlOsS2sOTF_m50QYOm5pAz-UmbD4_u
8
+
9
+ Importing the Dependencies
10
+ """
11
+
12
+ import numpy as np
13
+ import pandas as pd
14
+ from sklearn.model_selection import train_test_split
15
+ from sklearn import svm
16
+ from sklearn.metrics import accuracy_score
17
+
18
+ """Data Collection and Analysis
19
+
20
+ PIMA Diabetes Dataset
21
+ """
22
+
23
+ # loading the diabetes dataset to a pandas DataFrame
24
+ diabetes_dataset = pd.read_csv('/content/diabetes.csv')
25
+
26
+ # printing the first 5 rows of the dataset
27
+ diabetes_dataset.head()
28
+
29
+ # number of rows and Columns in this dataset
30
+ diabetes_dataset.shape
31
+
32
+ # getting the statistical measures of the data
33
+ diabetes_dataset.describe()
34
+
35
+ diabetes_dataset['Outcome'].value_counts()
36
+
37
+ """0 --> Non-Diabetic
38
+
39
+ 1 --> Diabetic
40
+ """
41
+
42
+ diabetes_dataset.groupby('Outcome').mean()
43
+
44
+ # separating the data and labels
45
+ X = diabetes_dataset.drop(columns = 'Outcome', axis=1)
46
+ Y = diabetes_dataset['Outcome']
47
+
48
+ print(X)
49
+
50
+ print(Y)
51
+
52
+ """Train Test Split"""
53
+
54
+ X_train, X_test, Y_train, Y_test = train_test_split(X,Y, test_size = 0.2, stratify=Y, random_state=2)
55
+
56
+ print(X.shape, X_train.shape, X_test.shape)
57
+
58
+ """Training the Model"""
59
+
60
+ classifier = svm.SVC(kernel='linear')
61
+
62
+ #training the support vector Machine Classifier
63
+ classifier.fit(X_train, Y_train)
64
+
65
+ """Model Evaluation
66
+
67
+ Accuracy Score
68
+ """
69
+
70
+ # accuracy score on the training data
71
+ X_train_prediction = classifier.predict(X_train)
72
+ training_data_accuracy = accuracy_score(X_train_prediction, Y_train)
73
+
74
+ print('Accuracy score of the training data : ', training_data_accuracy)
75
+
76
+ # accuracy score on the test data
77
+ X_test_prediction = classifier.predict(X_test)
78
+ test_data_accuracy = accuracy_score(X_test_prediction, Y_test)
79
+
80
+ print('Accuracy score of the test data : ', test_data_accuracy)
81
+
82
+ """Making a Predictive System"""
83
+
84
+ input_data = (5,166,72,19,175,25.8,0.587,51)
85
+
86
+ # changing the input_data to numpy array
87
+ input_data_as_numpy_array = np.asarray(input_data)
88
+
89
+ # reshape the array as we are predicting for one instance
90
+ input_data_reshaped = input_data_as_numpy_array.reshape(1,-1)
91
+
92
+ prediction = classifier.predict(input_data_reshaped)
93
+ print(prediction)
94
+
95
+ if (prediction[0] == 0):
96
+ print('The person is not diabetic')
97
+ else:
98
+ print('The person is diabetic')
99
+
100
+ """Saving the trained model"""
101
+
102
+ import pickle
103
+
104
+ filename = 'trained_model.sav'
105
+ pickle.dump(classifier, open(filename, 'wb'))
106
+
107
+ # loading the saved model
108
+ loaded_model = pickle.load(open('trained_model.sav', 'rb'))
109
+
110
+ input_data = (5,166,72,19,175,25.8,0.587,51)
111
+
112
+ # changing the input_data to numpy array
113
+ input_data_as_numpy_array = np.asarray(input_data)
114
+
115
+ # reshape the array as we are predicting for one instance
116
+ input_data_reshaped = input_data_as_numpy_array.reshape(1,-1)
117
+
118
+ prediction = loaded_model.predict(input_data_reshaped)
119
+ print(prediction)
120
+
121
+ if (prediction[0] == 0):
122
+ print('The person is not diabetic')
123
+ else:
124
+ print('The person is diabetic')
125
+
trained_model.sav ADDED
Binary file (27.6 kB). View file