Testys commited on
Commit
7709e78
1 Parent(s): 72e3d0f

Upload 3 files

Browse files
Files changed (3) hide show
  1. diabetes.csv +391 -0
  2. model.ipynb +1029 -0
  3. model.py +60 -0
diabetes.csv ADDED
@@ -0,0 +1,391 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ patient_number,cholesterol,glucose,hdl_chol,chol_hdl_ratio,age,gender,height,weight,bmi,systolic_bp,diastolic_bp,waist,hip,waist_hip_ratio,diabetes
2
+ 1,193,77,49,"3,9",19,female,61,119,"22,5",118,70,32,38,"0,84",No diabetes
3
+ 2,146,79,41,"3,6",19,female,60,135,"26,4",108,58,33,40,"0,83",No diabetes
4
+ 3,217,75,54,4,20,female,67,187,"29,3",110,72,40,45,"0,89",No diabetes
5
+ 4,226,97,70,"3,2",20,female,64,114,"19,6",122,64,31,39,"0,79",No diabetes
6
+ 5,164,91,67,"2,4",20,female,70,141,"20,2",122,86,32,39,"0,82",No diabetes
7
+ 6,170,69,64,"2,7",20,female,64,161,"27,6",108,70,37,40,"0,93",No diabetes
8
+ 7,149,77,49,3,20,female,62,115,21,105,82,31,37,"0,84",No diabetes
9
+ 8,164,71,63,"2,6",20,male,72,145,"19,7",108,78,29,36,"0,81",No diabetes
10
+ 9,230,112,64,"3,6",20,male,67,159,"24,9",100,90,31,39,"0,79",No diabetes
11
+ 10,179,105,60,3,20,female,58,170,"35,5",140,100,34,46,"0,74",No diabetes
12
+ 11,174,105,117,"1,5",20,male,70,187,"26,8",132,86,37,41,"0,9",No diabetes
13
+ 12,193,106,63,"3,1",20,female,68,274,"41,7",165,110,49,58,"0,84",No diabetes
14
+ 13,132,99,34,"3,9",21,female,65,169,"28,1",112,62,39,43,"0,91",No diabetes
15
+ 14,203,84,75,"2,7",21,female,63,142,"25,2",125,85,28,39,"0,72",No diabetes
16
+ 15,135,88,47,"2,9",21,male,69,155,"22,9",110,68,31,39,"0,79",No diabetes
17
+ 16,187,84,64,"2,9",21,female,63,158,28,138,88,39,43,"0,91",No diabetes
18
+ 17,244,89,92,"2,7",21,male,71,163,"22,7",116,76,34,39,"0,87",No diabetes
19
+ 18,193,75,49,"3,9",21,female,61,220,"41,6",130,82,40,52,"0,77",No diabetes
20
+ 19,165,76,46,"3,6",22,female,63,114,"20,2",112,78,28,35,"0,8",No diabetes
21
+ 20,172,70,36,"4,8",22,female,64,148,"25,4",90,48,35,38,"0,92",No diabetes
22
+ 21,217,81,60,"3,6",22,female,71,223,"31,1",120,75,46,50,"0,92",No diabetes
23
+ 22,223,75,85,"2,6",22,female,62,137,"25,1",120,70,28,35,"0,8",No diabetes
24
+ 23,136,81,51,"2,7",22,female,66,160,"25,8",105,85,35,40,"0,88",No diabetes
25
+ 24,175,91,42,"4,2",23,female,65,235,"39,1",110,80,44,50,"0,88",No diabetes
26
+ 25,230,86,37,"6,2",23,male,71,277,"38,6",150,99,50,49,"1,02",No diabetes
27
+ 26,147,78,42,"3,5",23,female,61,185,35,127,71,43,47,"0,91",No diabetes
28
+ 27,229,91,43,"5,3",23,male,72,180,"24,4",110,78,34,41,"0,83",No diabetes
29
+ 28,179,75,36,5,23,female,65,183,"30,4",120,80,43,45,"0,96",No diabetes
30
+ 29,185,76,58,"3,2",23,male,76,164,20,124,78,32,40,"0,8",No diabetes
31
+ 30,164,86,40,"4,1",23,female,69,245,"36,2",126,75,44,47,"0,94",No diabetes
32
+ 31,228,66,45,"5,1",24,female,61,113,"21,3",100,70,33,38,"0,87",No diabetes
33
+ 32,199,87,63,"3,2",25,male,66,118,19,120,78,32,34,"0,94",No diabetes
34
+ 33,134,101,36,"3,7",25,female,63,245,"43,4",142,78,47,58,"0,81",No diabetes
35
+ 34,169,104,58,"2,9",25,female,60,154,"30,1",140,95,40,42,"0,95",No diabetes
36
+ 35,227,98,66,"3,4",25,male,71,162,"22,6",123,82,35,39,"0,9",No diabetes
37
+ 36,149,138,50,3,26,female,62,174,"31,8",148,92,38,46,"0,83",No diabetes
38
+ 37,155,58,69,"2,2",26,male,73,174,23,110,76,30,35,"0,86",No diabetes
39
+ 38,179,90,60,3,26,female,60,130,"25,4",138,84,32,40,"0,8",No diabetes
40
+ 39,283,83,74,"3,8",26,male,72,227,"30,8",158,104,41,44,"0,93",No diabetes
41
+ 40,228,79,37,"6,2",26,male,72,259,"35,1",122,90,48,49,"0,98",No diabetes
42
+ 41,220,60,66,"3,3",26,male,70,150,"21,5",136,88,33,39,"0,85",Diabetes
43
+ 42,170,76,60,"2,8",27,female,63,119,"21,1",122,86,28,37,"0,76",No diabetes
44
+ 43,201,100,46,"4,4",27,female,65,145,"24,1",121,75,32,35,"0,91",No diabetes
45
+ 44,238,75,36,"6,6",27,female,60,170,"33,2",130,80,35,41,"0,85",No diabetes
46
+ 45,190,92,44,"4,3",27,female,65,210,"34,9",150,106,39,47,"0,83",No diabetes
47
+ 46,203,94,62,"3,3",27,female,67,209,"32,7",140,80,34,43,"0,79",No diabetes
48
+ 47,226,100,65,"3,5",27,male,69,289,"42,7",130,100,48,51,"0,94",No diabetes
49
+ 48,204,62,70,"2,9",27,female,67,185,29,110,90,35,44,"0,8",No diabetes
50
+ 49,166,77,68,"2,4",27,male,72,141,"19,1",110,58,33,38,"0,87",No diabetes
51
+ 50,241,92,40,6,27,female,63,179,"31,7",120,75,40,42,"0,95",No diabetes
52
+ 51,164,94,58,"2,8",28,female,67,180,"28,2",128,94,39,43,"0,91",No diabetes
53
+ 52,214,111,59,"3,6",28,male,68,204,31,130,90,40,41,"0,98",No diabetes
54
+ 53,151,74,47,"3,2",28,male,69,130,"19,2",135,75,29,35,"0,83",No diabetes
55
+ 54,184,99,36,"5,1",28,male,67,154,"24,1",124,94,35,38,"0,92",No diabetes
56
+ 55,168,69,45,"3,7",28,female,63,200,"35,4",111,65,42,46,"0,91",No diabetes
57
+ 56,146,77,60,"2,4",28,female,64,126,"21,6",120,90,28,32,"0,88",No diabetes
58
+ 57,189,96,47,4,28,female,64,200,"34,3",136,52,38,45,"0,84",No diabetes
59
+ 58,132,83,40,"3,3",28,female,68,225,"34,2",136,86,41,52,"0,79",No diabetes
60
+ 59,179,77,50,"3,6",29,male,68,170,"25,8",122,68,38,39,"0,97",No diabetes
61
+ 60,135,88,34,4,29,female,65,123,"20,5",118,61,26,37,"0,7",No diabetes
62
+ 61,163,69,48,"3,4",29,female,62,99,"18,1",125,60,30,36,"0,83",No diabetes
63
+ 62,204,71,55,"3,7",29,female,64,120,"20,6",110,70,33,38,"0,87",No diabetes
64
+ 63,165,97,24,"6,9",29,female,64,218,"37,4",112,68,46,48,"0,96",No diabetes
65
+ 64,181,101,44,"4,1",29,male,68,180,"27,4",130,78,38,42,"0,9",No diabetes
66
+ 65,194,269,38,"5,1",29,female,69,167,"24,7",120,70,33,40,"0,83",Diabetes
67
+ 66,158,74,64,"2,5",30,female,62,142,26,108,68,32,34,"0,94",No diabetes
68
+ 67,160,82,41,"3,9",30,female,63,143,"25,3",172,124,33,40,"0,83",No diabetes
69
+ 68,181,77,46,"3,9",30,female,66,257,"41,5",162,108,47,55,"0,85",No diabetes
70
+ 69,144,81,28,"5,1",30,male,72,165,"22,4",118,78,31,38,"0,82",No diabetes
71
+ 70,182,74,44,"4,1",30,female,62,125,"22,9",132,80,31,39,"0,79",No diabetes
72
+ 71,145,84,54,"2,7",30,female,65,165,"27,5",102,56,33,42,"0,79",No diabetes
73
+ 72,195,92,41,"4,8",30,male,69,191,"28,2",161,112,46,49,"0,94",No diabetes
74
+ 73,207,75,44,"4,7",30,male,72,180,"24,4",118,62,35,41,"0,85",No diabetes
75
+ 74,192,124,31,"5,6",30,male,72,250,"33,9",142,79,43,51,"0,84",No diabetes
76
+ 75,183,69,51,"3,6",31,female,66,190,"30,7",125,70,41,47,"0,87",No diabetes
77
+ 76,176,92,55,"3,2",31,female,62,145,"26,5",110,72,36,42,"0,86",No diabetes
78
+ 77,163,83,57,"2,9",31,female,65,120,20,136,86,29,40,"0,73",No diabetes
79
+ 78,188,77,45,"4,2",31,female,67,227,"35,5",122,70,47,53,"0,89",No diabetes
80
+ 79,209,89,43,"4,9",31,female,67,160,"25,1",108,58,30,44,"0,68",No diabetes
81
+ 80,179,77,72,"2,5",31,male,66,145,"23,4",131,79,33,38,"0,87",No diabetes
82
+ 81,293,85,94,"3,1",31,female,67,200,"31,3",110,90,41,42,"0,98",No diabetes
83
+ 82,305,91,44,"6,9",31,male,71,211,"29,4",100,60,40,45,"0,89",No diabetes
84
+ 83,191,155,58,"3,3",31,female,62,237,"43,3",140,87,53,56,"0,95",Diabetes
85
+ 84,155,81,70,"2,2",32,female,65,151,"25,1",120,68,33,40,"0,83",No diabetes
86
+ 85,179,85,52,"3,4",32,female,62,179,"32,7",140,96,37,47,"0,79",No diabetes
87
+ 86,176,90,34,"5,2",32,female,63,252,"44,6",100,72,45,58,"0,78",No diabetes
88
+ 87,244,101,36,"6,8",32,male,70,212,"30,4",132,90,39,44,"0,89",No diabetes
89
+ 88,213,83,47,"4,5",33,female,65,157,"26,1",130,90,37,41,"0,9",No diabetes
90
+ 89,217,87,40,"5,4",33,female,62,186,34,140,90,42,46,"0,91",No diabetes
91
+ 90,151,85,48,"3,1",33,male,69,308,"45,5",110,90,52,58,"0,9",No diabetes
92
+ 91,168,82,44,"3,8",33,female,66,118,19,98,66,29,35,"0,83",No diabetes
93
+ 92,231,84,91,"2,5",33,male,69,163,"24,1",140,70,35,38,"0,92",No diabetes
94
+ 93,262,93,43,"6,1",33,female,63,170,"30,1",110,68,33,46,"0,72",No diabetes
95
+ 94,179,70,52,"3,4",34,male,72,170,"23,1",138,82,31,39,"0,79",No diabetes
96
+ 95,300,65,59,"5,1",34,female,65,160,"26,6",120,60,40,47,"0,85",No diabetes
97
+ 96,248,94,69,"3,6",34,male,71,190,"26,5",132,86,36,42,"0,86",No diabetes
98
+ 97,217,88,40,"5,4",34,male,73,219,"28,9",145,100,41,42,"0,98",No diabetes
99
+ 98,224,71,42,"5,3",34,female,60,165,"32,2",135,80,34,46,"0,74",No diabetes
100
+ 99,171,85,61,"2,8",34,female,63,164,29,120,80,34,43,"0,79",No diabetes
101
+ 100,174,90,36,"4,8",34,male,71,210,"29,3",142,92,37,43,"0,86",No diabetes
102
+ 101,194,86,67,"2,9",35,male,66,159,"25,7",115,64,31,35,"0,89",No diabetes
103
+ 102,150,80,38,"3,9",35,male,73,179,"23,6",138,92,32,37,"0,86",No diabetes
104
+ 103,337,85,62,"5,4",35,male,72,189,"25,6",124,84,36,44,"0,82",No diabetes
105
+ 104,239,97,55,"4,3",35,male,74,170,"21,8",122,62,32,38,"0,84",No diabetes
106
+ 105,218,126,32,"6,8",35,male,69,169,25,139,90,39,41,"0,95",No diabetes
107
+ 106,122,82,43,"2,8",36,female,71,183,"25,5",110,80,41,45,"0,91",No diabetes
108
+ 107,225,83,42,"5,4",36,male,67,192,"30,1",149,89,40,42,"0,95",No diabetes
109
+ 108,160,71,44,"3,6",36,female,64,185,"31,8",110,80,39,45,"0,87",No diabetes
110
+ 109,191,76,30,"6,4",36,male,69,183,27,100,66,36,40,"0,9",No diabetes
111
+ 110,199,153,77,"2,6",36,female,66,255,"41,2",118,66,47,52,"0,9",No diabetes
112
+ 111,197,92,46,"4,3",36,female,64,136,"23,3",128,80,32,37,"0,86",No diabetes
113
+ 112,179,81,35,"5,1",36,female,63,125,"22,1",110,76,33,36,"0,92",No diabetes
114
+ 113,186,74,76,"2,4",36,male,69,150,"22,1",138,82,31,38,"0,82",No diabetes
115
+ 114,178,78,59,3,36,male,70,161,"23,1",130,79,34,40,"0,85",No diabetes
116
+ 115,224,85,30,"7,5",36,male,69,205,"30,3",150,99,37,41,"0,9",No diabetes
117
+ 116,194,81,36,"5,4",36,male,64,126,"21,6",110,76,30,34,"0,88",No diabetes
118
+ 117,347,197,42,"8,3",36,male,70,277,"39,7",140,86,51,49,"1,04",No diabetes
119
+ 118,245,119,26,"9,4",36,male,66,179,"28,9",150,92,37,42,"0,88",Diabetes
120
+ 119,227,75,44,"5,2",37,male,59,170,"34,3",140,84,34,39,"0,87",No diabetes
121
+ 120,192,89,30,"6,4",37,male,71,195,"27,2",136,96,36,43,"0,84",No diabetes
122
+ 121,215,64,84,"2,6",37,female,59,148,"29,9",140,100,32,42,"0,76",No diabetes
123
+ 122,214,67,47,"4,6",37,female,64,145,"24,9",108,76,34,42,"0,81",No diabetes
124
+ 123,243,52,59,"4,1",37,female,64,233,40,110,82,49,57,"0,86",No diabetes
125
+ 124,156,86,34,"4,6",37,female,67,212,"33,2",122,74,48,51,"0,94",No diabetes
126
+ 125,179,85,50,"3,6",37,male,66,136,"21,9",190,94,33,39,"0,85",No diabetes
127
+ 126,209,91,36,"5,8",37,male,70,262,"37,6",130,94,42,48,"0,88",No diabetes
128
+ 127,232,87,30,"7,7",37,male,68,252,"38,3",140,95,43,47,"0,91",No diabetes
129
+ 128,212,88,36,"5,9",37,female,64,160,"27,5",124,82,37,45,"0,82",No diabetes
130
+ 129,199,130,48,"4,1",37,female,61,203,"38,4",136,84,42,51,"0,82",No diabetes
131
+ 130,145,85,29,5,38,female,60,125,"24,4",132,82,31,35,"0,89",No diabetes
132
+ 131,206,90,38,"5,4",38,female,69,167,"24,7",138,90,36,47,"0,77",No diabetes
133
+ 132,147,86,34,"4,3",38,male,69,205,"30,3",130,96,39,41,"0,95",No diabetes
134
+ 133,302,81,57,"5,3",38,female,67,222,"34,8",128,82,41,51,"0,8",No diabetes
135
+ 134,138,95,40,"3,5",38,female,60,138,"26,9",140,90,31,39,"0,79",No diabetes
136
+ 135,215,128,34,"6,3",38,female,58,195,"40,8",102,68,42,50,"0,84",No diabetes
137
+ 136,159,88,43,"3,7",38,male,68,169,"25,7",138,79,34,40,"0,85",No diabetes
138
+ 137,268,90,48,"5,6",38,female,63,181,"32,1",142,100,38,46,"0,83",No diabetes
139
+ 138,251,118,38,"6,6",38,female,64,248,"42,6",110,80,49,58,"0,84",No diabetes
140
+ 139,216,155,30,"7,2",38,male,68,145,22,110,60,34,37,"0,92",No diabetes
141
+ 140,203,299,43,"4,7",38,female,69,288,"42,5",136,83,48,55,"0,87",Diabetes
142
+ 141,181,83,44,"4,1",39,female,66,255,"41,2",140,98,46,54,"0,85",No diabetes
143
+ 142,239,85,63,"3,8",39,male,60,144,"28,1",162,90,33,42,"0,79",No diabetes
144
+ 143,200,56,51,"3,9",40,female,62,105,"19,2",125,64,26,33,"0,79",No diabetes
145
+ 144,211,98,40,"5,3",40,female,68,179,"27,2",110,76,37,43,"0,86",No diabetes
146
+ 145,152,103,32,"4,8",40,female,52,187,"48,6",148,82,38,49,"0,78",No diabetes
147
+ 146,173,83,37,"4,7",40,female,62,130,"23,8",122,76,37,38,"0,97",No diabetes
148
+ 147,215,72,42,"5,1",40,male,70,189,"27,1",180,122,37,39,"0,95",No diabetes
149
+ 148,219,105,63,"3,5",40,female,62,153,28,106,82,36,44,"0,82",No diabetes
150
+ 149,180,76,46,"3,9",40,female,64,146,"25,1",128,82,37,43,"0,86",No diabetes
151
+ 150,214,77,48,"4,5",40,male,72,222,"30,1",120,84,40,44,"0,91",No diabetes
152
+ 151,171,92,54,"3,2",40,male,71,214,"29,8",138,94,41,39,"1,05",No diabetes
153
+ 152,183,79,46,4,40,female,59,165,"33,3",135,84,37,43,"0,86",No diabetes
154
+ 153,184,92,36,"5,1",40,female,63,285,"50,5",142,98,50,60,"0,83",No diabetes
155
+ 154,180,84,69,"2,6",40,female,68,264,"40,1",142,98,43,54,"0,8",No diabetes
156
+ 155,191,74,33,"5,8",40,male,72,270,"36,6",136,70,45,49,"0,92",No diabetes
157
+ 156,218,87,38,"5,7",40,male,73,200,"26,4",120,76,38,41,"0,93",No diabetes
158
+ 157,169,85,51,"3,3",40,female,65,180,30,106,82,40,44,"0,91",No diabetes
159
+ 158,267,133,34,"7,9",40,female,59,204,"41,2",118,69,40,47,"0,85",Diabetes
160
+ 159,234,78,54,"4,3",41,male,67,183,"28,7",122,96,38,40,"0,95",No diabetes
161
+ 160,206,112,33,"6,2",41,female,62,184,"33,7",104,80,39,44,"0,89",No diabetes
162
+ 161,184,79,39,"4,7",41,male,69,154,"22,7",136,96,34,39,"0,87",No diabetes
163
+ 162,178,64,52,"3,4",41,female,65,188,"31,3",130,76,35,46,"0,76",No diabetes
164
+ 163,179,80,92,"1,9",41,female,72,118,16,144,112,28,36,"0,78",No diabetes
165
+ 164,225,84,82,"2,7",41,male,71,156,"21,8",150,80,31,40,"0,78",No diabetes
166
+ 165,189,84,46,"4,1",41,female,63,153,"27,1",130,80,32,40,"0,8",No diabetes
167
+ 166,199,76,52,"3,8",41,female,63,197,"34,9",120,78,41,48,"0,85",No diabetes
168
+ 167,241,86,63,"3,8",41,female,59,139,"28,1",112,72,29,39,"0,74",No diabetes
169
+ 168,170,106,42,4,41,female,61,110,"20,8",103,64,29,30,"0,97",No diabetes
170
+ 169,269,59,66,"4,1",41,male,67,191,"29,9",130,73,38,41,"0,93",No diabetes
171
+ 170,269,73,34,"7,9",41,female,62,160,"29,3",126,90,39,41,"0,95",No diabetes
172
+ 171,270,73,40,"6,8",42,male,66,185,"29,9",146,94,39,41,"0,95",No diabetes
173
+ 172,172,101,46,"3,7",42,female,65,165,"27,5",118,68,33,45,"0,73",No diabetes
174
+ 173,193,77,45,"4,3",42,female,75,186,"23,2",125,90,37,46,"0,8",No diabetes
175
+ 174,199,81,36,"5,5",42,female,67,235,"36,8",178,100,47,52,"0,9",No diabetes
176
+ 175,177,101,36,"4,9",42,female,65,174,29,146,94,37,40,"0,93",No diabetes
177
+ 176,191,81,53,"3,6",42,female,61,156,"29,5",138,84,36,42,"0,86",No diabetes
178
+ 177,208,122,51,"4,1",42,female,62,141,"25,8",118,78,33,40,"0,83",No diabetes
179
+ 178,188,84,46,"4,1",43,female,66,152,"24,5",122,80,37,41,"0,9",No diabetes
180
+ 179,243,74,42,"5,8",43,female,64,239,41,128,90,48,53,"0,91",No diabetes
181
+ 180,173,85,58,3,43,female,69,210,31,130,75,44,47,"0,94",No diabetes
182
+ 181,162,76,40,"4,1",43,male,67,216,"33,8",100,70,41,44,"0,93",No diabetes
183
+ 182,322,87,92,"3,5",43,female,56,120,"26,9",120,98,32,41,"0,78",No diabetes
184
+ 183,254,84,52,"4,9",43,female,62,145,"26,5",125,70,31,38,"0,82",No diabetes
185
+ 184,160,100,36,"4,4",43,female,64,140,24,180,110,37,40,"0,93",No diabetes
186
+ 185,192,109,44,"4,4",43,female,64,325,"55,8",141,79,53,62,"0,85",No diabetes
187
+ 186,197,120,37,"5,3",43,male,71,179,25,146,98,37,44,"0,84",No diabetes
188
+ 187,237,87,41,"5,8",43,female,64,181,"31,1",104,90,36,46,"0,78",No diabetes
189
+ 188,190,84,44,"4,3",43,female,62,163,"29,8",135,88,40,45,"0,89",No diabetes
190
+ 189,190,228,57,"3,3",43,female,65,198,"32,9",110,64,40,49,"0,82",Diabetes
191
+ 190,202,84,33,"6,1",44,male,68,157,"23,9",125,80,33,37,"0,89",No diabetes
192
+ 191,244,101,39,"6,3",44,male,71,168,"23,4",140,89,36,39,"0,92",No diabetes
193
+ 192,168,101,59,"2,8",44,female,64,160,"27,5",130,88,40,43,"0,93",No diabetes
194
+ 193,260,67,46,"5,7",44,female,62,159,"29,1",140,94,36,43,"0,84",No diabetes
195
+ 194,214,87,35,"6,1",44,female,64,190,"32,6",140,75,38,44,"0,86",No diabetes
196
+ 195,207,187,46,"4,5",44,female,67,201,"31,5",150,74,46,49,"0,94",Diabetes
197
+ 196,203,71,78,"2,6",45,male,66,115,"18,6",135,88,30,34,"0,88",No diabetes
198
+ 197,189,80,40,"4,7",45,male,69,190,"28,1",140,75,39,44,"0,89",No diabetes
199
+ 198,216,109,86,"2,5",45,female,67,147,23,140,102,32,38,"0,84",No diabetes
200
+ 199,233,92,39,6,45,female,64,167,"28,7",124,86,39,44,"0,89",No diabetes
201
+ 200,177,87,49,"3,6",45,male,69,166,"24,5",160,80,34,40,"0,85",No diabetes
202
+ 201,191,83,88,"2,2",45,female,67,151,"23,6",130,90,33,38,"0,87",No diabetes
203
+ 202,142,155,25,"5,7",45,male,69,204,"30,1",165,115,40,43,"0,93",No diabetes
204
+ 203,219,130,44,5,45,male,67,218,"34,1",172,110,41,45,"0,91",Diabetes
205
+ 204,190,107,32,"5,9",46,male,72,205,"27,8",145,88,46,49,"0,94",No diabetes
206
+ 205,203,82,56,"3,6",46,female,62,121,"22,1",118,59,29,38,"0,76",No diabetes
207
+ 206,207,102,43,"4,8",46,female,63,179,"31,7",212,114,38,46,"0,83",No diabetes
208
+ 207,242,108,53,"4,6",46,female,62,183,"33,5",130,86,37,45,"0,82",No diabetes
209
+ 208,183,81,60,"3,1",47,female,66,186,30,140,97,39,44,"0,89",No diabetes
210
+ 209,234,65,76,"3,1",47,male,67,230,36,137,100,45,46,"0,98",No diabetes
211
+ 210,118,95,39,3,47,female,64,123,"21,1",140,76,30,36,"0,83",No diabetes
212
+ 211,266,82,54,"4,9",47,male,68,142,"21,6",118,78,35,39,"0,9",No diabetes
213
+ 212,223,90,48,"4,6",47,female,65,232,"38,6",120,86,46,54,"0,85",No diabetes
214
+ 213,245,120,39,"6,3",47,female,63,156,"27,6",142,102,35,39,"0,9",Diabetes
215
+ 214,173,225,31,"5,6",47,male,73,260,"34,3",150,98,42,47,"0,89",Diabetes
216
+ 215,172,117,56,"3,1",48,female,63,170,"30,1",130,82,35,42,"0,83",No diabetes
217
+ 216,190,74,50,"3,8",48,male,68,100,"15,2",120,85,27,33,"0,82",No diabetes
218
+ 217,134,105,42,"3,2",48,male,70,173,"24,8",178,120,36,40,"0,9",No diabetes
219
+ 218,268,85,51,"5,3",48,male,70,120,"17,2",150,105,32,35,"0,91",No diabetes
220
+ 219,209,87,34,"6,1",48,female,63,121,"21,4",111,62,32,38,"0,84",No diabetes
221
+ 220,201,81,87,"2,3",48,female,68,146,"22,2",145,95,32,41,"0,78",No diabetes
222
+ 221,204,89,56,"3,6",48,male,68,196,"29,8",170,96,38,42,"0,9",No diabetes
223
+ 222,307,87,58,"5,3",49,male,67,181,"28,3",120,80,41,42,"0,98",No diabetes
224
+ 223,189,75,72,"2,6",49,female,62,205,"37,5",120,80,40,49,"0,82",No diabetes
225
+ 224,160,196,33,"4,8",49,male,71,266,"37,1",150,98,49,45,"1,09",Diabetes
226
+ 225,237,233,58,"4,1",49,female,62,189,"34,6",130,90,43,47,"0,91",Diabetes
227
+ 226,158,91,48,"3,3",50,male,71,180,"25,1",136,90,36,40,"0,9",No diabetes
228
+ 227,255,78,38,"6,7",50,female,65,183,"30,4",130,100,37,43,"0,86",No diabetes
229
+ 228,196,115,62,"3,2",50,male,67,140,"21,9",176,110,35,37,"0,95",No diabetes
230
+ 229,185,67,59,"3,1",50,female,64,228,"39,1",142,90,42,54,"0,78",No diabetes
231
+ 230,293,115,54,"5,4",50,male,71,170,"23,7",131,75,34,39,"0,87",No diabetes
232
+ 231,188,88,51,"3,7",50,female,61,147,"27,8",160,66,34,41,"0,83",No diabetes
233
+ 232,174,173,34,"5,1",50,male,70,263,"37,7",159,99,51,64,"0,8",No diabetes
234
+ 233,158,91,31,"5,1",50,male,70,215,"30,8",138,89,40,45,"0,89",No diabetes
235
+ 234,181,255,26,7,50,male,71,320,"44,6",140,86,56,49,"1,14",Diabetes
236
+ 235,140,385,31,"4,5",50,male,69,172,"25,4",138,66,37,41,"0,9",Diabetes
237
+ 236,192,85,69,"2,8",51,male,65,146,"24,3",130,110,33,36,"0,92",No diabetes
238
+ 237,284,89,54,"5,3",51,female,63,154,"27,3",140,100,32,43,"0,74",No diabetes
239
+ 238,222,82,87,"2,6",51,female,66,110,"17,8",150,110,28,37,"0,76",No diabetes
240
+ 239,249,81,28,"8,9",51,female,65,200,"33,3",122,90,43,46,"0,93",No diabetes
241
+ 240,212,79,49,"4,3",51,female,65,145,"24,1",230,120,38,42,"0,9",No diabetes
242
+ 241,215,110,36,6,51,female,67,282,"44,2",142,78,52,59,"0,88",Diabetes
243
+ 242,218,182,54,4,51,female,66,215,"34,7",139,69,42,53,"0,79",Diabetes
244
+ 243,443,185,23,"19,3",51,female,70,235,"33,7",158,98,43,48,"0,9",Diabetes
245
+ 244,218,68,46,"4,7",52,female,62,170,"31,1",142,79,40,43,"0,93",No diabetes
246
+ 245,171,97,69,"2,5",52,male,71,159,"22,2",125,72,33,39,"0,85",No diabetes
247
+ 246,255,83,90,"2,8",52,male,70,120,"17,2",170,110,30,33,"0,91",No diabetes
248
+ 247,182,85,43,"4,2",52,male,68,139,"21,1",130,90,29,35,"0,83",No diabetes
249
+ 248,206,83,68,3,52,male,69,153,"22,6",140,98,36,40,"0,9",No diabetes
250
+ 249,261,101,83,"3,1",52,female,64,198,34,152,92,42,49,"0,86",No diabetes
251
+ 250,204,57,74,"2,8",52,male,75,142,"17,7",140,90,31,35,"0,89",No diabetes
252
+ 251,196,120,67,"2,9",52,female,62,147,"26,9",144,94,34,42,"0,81",Diabetes
253
+ 252,219,78,67,"3,3",53,female,64,179,"30,7",135,100,39,47,"0,83",No diabetes
254
+ 253,273,94,49,"5,6",53,female,64,174,"29,9",160,96,34,43,"0,79",No diabetes
255
+ 254,225,74,36,"6,3",53,female,63,182,"32,2",126,80,38,46,"0,83",No diabetes
256
+ 255,185,84,52,"3,6",53,female,61,145,"27,4",147,72,37,40,"0,93",No diabetes
257
+ 256,242,297,34,"7,1",53,male,69,216,"31,9",142,96,43,45,"0,96",Diabetes
258
+ 257,296,369,46,"6,4",53,male,69,173,"25,5",138,94,35,39,"0,9",Diabetes
259
+ 258,228,76,53,"4,3",54,male,66,170,"27,4",121,62,36,41,"0,88",No diabetes
260
+ 259,194,87,65,3,54,male,69,129,19,170,96,30,37,"0,81",No diabetes
261
+ 260,216,79,46,"4,7",54,female,65,138,23,132,80,33,39,"0,85",No diabetes
262
+ 261,240,96,57,"4,2",54,female,65,175,"29,1",152,100,37,43,"0,86",No diabetes
263
+ 262,148,193,14,"10,6",54,female,67,165,"25,8",140,65,42,42,1,No diabetes
264
+ 263,271,103,90,3,55,female,63,114,"20,2",180,105,30,37,"0,81",No diabetes
265
+ 264,204,94,54,"3,8",55,female,66,202,"32,6",140,90,43,47,"0,91",No diabetes
266
+ 265,174,93,77,"2,3",55,male,70,140,"20,1",118,86,32,33,"0,97",No diabetes
267
+ 266,157,74,47,"3,3",55,female,66,219,"35,3",150,82,43,52,"0,83",No diabetes
268
+ 267,263,89,40,"6,6",55,female,63,202,"35,8",108,72,45,50,"0,9",No diabetes
269
+ 268,160,122,41,"3,9",55,female,67,223,"34,9",136,83,43,48,"0,9",No diabetes
270
+ 269,179,236,63,"2,8",55,male,75,186,"23,2",122,74,38,38,1,Diabetes
271
+ 270,208,95,32,"6,5",56,male,68,183,"27,8",131,75,36,39,"0,92",No diabetes
272
+ 271,129,110,42,"3,1",56,male,74,151,"19,4",140,75,34,38,"0,89",No diabetes
273
+ 272,219,173,31,"7,1",56,female,65,197,"32,8",100,50,41,50,"0,82",Diabetes
274
+ 273,404,206,33,"12,2",56,male,69,159,"23,5",162,88,38,39,"0,97",Diabetes
275
+ 274,138,81,45,"3,1",57,male,73,164,"21,6",148,81,31,37,"0,84",No diabetes
276
+ 275,173,80,57,3,57,male,71,145,"20,2",124,64,31,36,"0,86",No diabetes
277
+ 276,209,176,55,"3,8",57,female,61,150,"28,3",115,68,36,39,"0,92",Diabetes
278
+ 277,228,92,37,"6,2",58,female,61,256,"48,4",190,92,49,57,"0,86",No diabetes
279
+ 278,227,85,26,"8,7",58,male,70,211,"30,3",144,82,38,43,"0,88",No diabetes
280
+ 279,201,106,53,"3,8",58,male,66,215,"34,7",186,102,46,44,"1,05",No diabetes
281
+ 280,251,94,36,7,58,female,63,154,"27,3",174,75,38,41,"0,93",No diabetes
282
+ 281,211,48,34,"6,2",58,male,67,177,"27,7",162,78,38,43,"0,88",No diabetes
283
+ 282,115,239,36,"3,2",58,male,69,200,"29,5",125,69,30,37,"0,81",Diabetes
284
+ 283,204,113,35,"5,8",59,male,73,187,"24,7",148,76,38,37,"1,03",No diabetes
285
+ 284,215,97,46,"4,7",59,female,63,176,"31,2",140,70,34,44,"0,77",No diabetes
286
+ 285,221,126,48,"4,6",59,female,62,177,"32,4",130,78,39,45,"0,87",No diabetes
287
+ 286,220,95,58,"3,8",59,female,66,138,"22,3",138,80,32,38,"0,84",No diabetes
288
+ 287,193,248,24,8,59,female,66,189,"30,5",140,90,38,45,"0,84",Diabetes
289
+ 288,195,108,46,"4,2",59,female,67,172,"26,9",150,102,38,43,"0,88",Diabetes
290
+ 289,219,112,73,3,59,male,66,170,"27,4",146,92,37,40,"0,93",Diabetes
291
+ 290,289,267,38,"7,6",59,male,68,169,"25,7",142,79,36,38,"0,95",Diabetes
292
+ 291,198,92,62,"3,2",60,male,70,163,"23,4",126,78,36,40,"0,9",No diabetes
293
+ 292,192,56,42,"4,6",60,female,62,134,"24,5",130,70,31,40,"0,78",No diabetes
294
+ 293,242,82,54,"4,5",60,female,65,156,26,130,90,39,45,"0,87",No diabetes
295
+ 294,235,102,42,"5,6",60,male,69,186,"27,5",148,98,40,42,"0,95",No diabetes
296
+ 295,277,119,62,"4,5",60,female,61,128,"24,2",140,86,33,39,"0,85",No diabetes
297
+ 296,162,90,46,"3,5",60,female,63,121,"21,4",110,64,32,34,"0,94",No diabetes
298
+ 297,318,270,108,"2,9",60,female,65,167,"27,8",132,72,38,44,"0,86",No diabetes
299
+ 298,279,270,40,7,60,female,68,224,"34,1",174,90,48,50,"0,96",Diabetes
300
+ 299,128,223,24,"5,3",60,male,67,196,"30,7",110,68,42,43,"0,98",Diabetes
301
+ 300,203,90,51,4,60,female,59,123,"24,8",130,72,36,41,"0,88",Diabetes
302
+ 301,143,91,37,"3,9",61,female,65,220,"36,6",160,92,40,50,"0,8",No diabetes
303
+ 302,300,103,44,"6,8",61,female,67,169,"26,5",138,78,40,44,"0,91",No diabetes
304
+ 303,206,94,44,"4,7",61,female,63,199,"35,2",180,96,41,47,"0,87",No diabetes
305
+ 304,182,85,37,"4,9",61,female,69,174,"25,7",176,86,49,43,"1,14",No diabetes
306
+ 305,198,86,66,3,61,male,74,152,"19,5",138,76,33,38,"0,87",No diabetes
307
+ 306,211,225,29,"7,3",61,female,63,144,"25,5",190,100,40,42,"0,95",Diabetes
308
+ 307,265,330,34,"7,8",61,male,74,191,"24,5",170,88,39,41,"0,95",Diabetes
309
+ 308,204,128,61,"3,3",62,male,68,180,"27,4",141,81,38,41,"0,93",No diabetes
310
+ 309,169,95,29,"5,8",62,male,66,251,"40,5",118,72,50,47,"1,06",No diabetes
311
+ 310,236,102,36,"6,6",62,male,76,160,"19,5",150,80,35,39,"0,9",No diabetes
312
+ 311,235,109,59,4,62,female,63,290,"51,4",175,80,55,62,"0,89",Diabetes
313
+ 312,196,206,41,"4,8",62,female,65,196,"32,6",178,90,46,51,"0,9",Diabetes
314
+ 313,180,92,34,"5,3",63,male,69,169,25,145,72,35,39,"0,9",No diabetes
315
+ 314,194,54,57,"3,4",63,male,70,181,26,184,76,37,42,"0,88",No diabetes
316
+ 315,194,80,34,"5,7",63,male,73,175,"23,1",131,88,34,39,"0,87",No diabetes
317
+ 316,212,82,68,"3,1",63,male,70,161,"23,1",180,110,37,40,"0,93",No diabetes
318
+ 317,293,87,120,"2,4",63,female,64,179,"30,7",142,80,47,45,"1,04",No diabetes
319
+ 318,194,95,36,"5,4",63,female,58,210,"43,9",140,100,44,53,"0,83",No diabetes
320
+ 319,277,88,45,"6,2",63,female,64,223,"38,3",220,100,45,54,"0,83",No diabetes
321
+ 320,157,91,34,"4,6",63,male,69,166,"24,5",106,82,39,38,"1,03",No diabetes
322
+ 321,283,145,39,"7,3",63,female,61,200,"37,8",190,110,44,48,"0,92",Diabetes
323
+ 322,215,119,44,"3,9",63,female,63,158,28,160,68,34,42,"0,81",Diabetes
324
+ 323,342,251,48,"7,1",63,female,65,201,"33,4",178,88,45,46,"0,98",Diabetes
325
+ 324,202,81,55,"3,7",64,female,62,167,"30,5",190,118,44,47,"0,94",No diabetes
326
+ 325,255,100,34,"7,5",64,male,68,227,"34,5",134,74,44,47,"0,94",No diabetes
327
+ 326,181,177,24,"7,5",64,male,71,225,"31,4",130,66,44,47,"0,94",Diabetes
328
+ 327,249,90,28,"8,9",64,male,68,183,"27,8",138,80,44,41,"1,07",Diabetes
329
+ 328,249,197,44,"5,7",64,female,63,159,"28,2",151,85,33,41,"0,8",Diabetes
330
+ 329,219,106,50,"4,4",65,female,63,233,"41,3",140,90,40,53,"0,75",No diabetes
331
+ 330,229,95,74,"3,1",65,female,62,151,"27,6",125,64,37,42,"0,88",No diabetes
332
+ 331,212,97,45,"4,7",65,female,61,187,"35,3",158,94,43,47,"0,91",No diabetes
333
+ 332,170,67,33,"5,2",65,male,69,182,"26,9",140,65,42,39,"1,08",No diabetes
334
+ 333,159,172,28,"5,7",65,male,70,181,26,142,81,43,49,"0,88",Diabetes
335
+ 334,224,341,33,"6,8",65,male,67,197,"30,9",160,80,42,43,"0,98",Diabetes
336
+ 335,263,82,92,"2,9",66,female,66,121,"19,5",104,64,31,33,"0,94",No diabetes
337
+ 336,184,76,42,"4,4",66,male,74,185,"23,8",130,75,40,41,"0,98",No diabetes
338
+ 337,281,92,41,"6,9",66,female,62,185,"33,8",158,88,48,44,"1,09",No diabetes
339
+ 338,221,120,83,"2,7",66,female,64,130,"22,3",110,64,31,38,"0,82",No diabetes
340
+ 339,188,174,24,"7,8",66,male,68,210,"31,9",160,78,45,48,"0,94",No diabetes
341
+ 340,246,104,62,4,66,female,66,189,"30,5",200,94,45,46,"0,98",Diabetes
342
+ 341,204,173,37,"5,5",66,male,67,146,"22,9",138,78,36,48,"0,75",Diabetes
343
+ 342,78,93,12,"6,5",67,male,67,119,"18,6",110,50,33,38,"0,87",No diabetes
344
+ 343,206,85,46,"4,5",67,male,67,178,"27,9",119,68,37,41,"0,9",No diabetes
345
+ 344,174,125,44,4,67,male,68,198,"30,1",119,72,36,43,"0,84",No diabetes
346
+ 345,254,121,39,"6,5",67,male,68,167,"25,4",161,118,36,39,"0,92",Diabetes
347
+ 346,198,118,46,"4,3",68,female,63,124,22,130,70,32,38,"0,84",No diabetes
348
+ 347,143,371,46,"3,1",68,male,67,158,"24,7",138,82,37,43,"0,86",No diabetes
349
+ 348,207,77,46,"4,5",68,male,55,130,"30,2",199,115,29,33,"0,88",No diabetes
350
+ 349,236,111,82,"2,9",68,female,61,119,"22,5",142,96,29,37,"0,78",No diabetes
351
+ 350,260,68,60,"4,3",69,female,59,179,"36,1",158,98,45,48,"0,94",No diabetes
352
+ 351,242,74,55,"4,4",70,female,66,200,"32,3",140,65,41,47,"0,87",No diabetes
353
+ 352,186,97,50,"3,7",70,male,67,178,"27,9",148,88,42,41,"1,02",No diabetes
354
+ 353,182,206,43,"4,2",70,male,69,214,"31,6",158,90,45,48,"0,94",Diabetes
355
+ 354,289,111,50,"5,8",70,female,60,220,43,126,80,51,54,"0,94",Diabetes
356
+ 355,231,70,110,"2,1",71,female,63,155,"27,5",150,78,33,41,"0,8",No diabetes
357
+ 356,199,85,59,"3,4",71,male,69,171,"25,2",136,86,38,40,"0,95",No diabetes
358
+ 357,228,115,61,"3,7",71,female,63,244,"43,2",170,92,48,51,"0,94",No diabetes
359
+ 358,213,203,75,"2,8",71,female,63,165,"29,2",150,80,34,42,"0,81",Diabetes
360
+ 359,204,120,44,"4,6",72,male,65,167,"27,8",140,72,45,46,"0,98",No diabetes
361
+ 360,205,83,42,"4,9",72,female,61,180,34,170,90,39,47,"0,83",No diabetes
362
+ 361,213,76,40,"5,3",72,female,59,137,"27,7",130,60,40,40,1,No diabetes
363
+ 362,207,71,41,5,72,male,70,180,"25,8",138,88,39,40,"0,98",Diabetes
364
+ 363,235,106,37,"6,4",73,male,65,183,"30,4",134,78,43,46,"0,93",No diabetes
365
+ 364,237,118,45,"5,3",73,female,64,174,"29,9",162,75,38,44,"0,86",Diabetes
366
+ 365,306,92,56,"5,5",74,male,69,184,"27,2",140,72,39,41,"0,95",No diabetes
367
+ 366,223,88,42,"5,3",74,female,62,165,"30,2",250,100,41,46,"0,89",No diabetes
368
+ 367,296,262,60,"4,9",74,female,63,183,"32,4",159,99,42,48,"0,88",Diabetes
369
+ 368,205,79,32,"6,4",75,male,69,204,"30,1",136,90,44,42,"1,05",No diabetes
370
+ 369,254,342,37,"6,9",75,male,68,210,"31,9",151,87,44,45,"0,98",Diabetes
371
+ 370,159,100,54,"2,9",76,male,66,188,"30,3",116,53,40,41,"0,98",No diabetes
372
+ 371,196,82,58,"3,4",76,male,65,154,"25,6",158,78,37,41,"0,9",No diabetes
373
+ 372,173,131,69,"2,5",76,female,61,102,"19,3",160,60,31,33,"0,94",No diabetes
374
+ 373,219,112,73,3,76,male,64,105,18,125,82,29,33,"0,88",No diabetes
375
+ 374,209,113,65,"3,2",76,female,60,143,"27,9",156,78,35,40,"0,88",Diabetes
376
+ 375,215,80,100,"2,2",78,male,65,109,"18,1",170,88,33,34,"0,97",No diabetes
377
+ 376,210,81,81,"2,6",78,male,66,145,"23,4",110,70,38,39,"0,97",No diabetes
378
+ 377,224,98,44,"5,1",78,female,63,160,"28,3",150,81,36,45,"0,8",No diabetes
379
+ 378,195,171,29,"6,7",78,male,66,172,"27,8",130,82,40,40,1,No diabetes
380
+ 379,235,91,37,"6,4",79,female,65,134,"22,3",142,70,34,38,"0,89",No diabetes
381
+ 380,292,235,55,"5,3",79,male,70,165,"23,7",170,90,39,41,"0,95",Diabetes
382
+ 381,157,92,47,"3,3",80,male,71,212,"29,6",156,88,47,48,"0,98",No diabetes
383
+ 382,252,161,87,"2,9",80,female,62,162,"29,6",160,100,44,41,"1,07",Diabetes
384
+ 383,271,121,40,"6,8",81,female,64,158,"27,1",146,76,36,43,"0,84",No diabetes
385
+ 384,240,88,49,"4,9",82,female,63,170,"30,1",180,86,41,46,"0,89",No diabetes
386
+ 385,255,112,34,"7,5",82,male,66,163,"26,3",179,89,37,43,"0,86",No diabetes
387
+ 386,227,105,44,"5,2",83,female,59,125,"25,2",150,90,35,40,"0,88",No diabetes
388
+ 387,226,279,52,"4,3",84,female,60,192,"37,5",144,88,41,48,"0,85",Diabetes
389
+ 388,301,90,118,"2,6",89,female,61,115,"21,7",218,90,31,41,"0,76",No diabetes
390
+ 389,232,184,114,2,91,female,61,127,24,170,82,35,38,"0,92",Diabetes
391
+ 390,165,94,69,"2,4",92,female,62,217,"39,7",160,82,51,51,1,No diabetes
model.ipynb ADDED
@@ -0,0 +1,1029 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "cells": [
3
+ {
4
+ "cell_type": "markdown",
5
+ "source": [
6
+ "## Importing modules and Loading dataset.\n",
7
+ "This section contains importing the important python modules. Also, the dataset to be used, in this case the \n",
8
+ "\n"
9
+ ],
10
+ "metadata": {
11
+ "id": "r5yZ0Codo2rF"
12
+ }
13
+ },
14
+ {
15
+ "cell_type": "code",
16
+ "execution_count": 1,
17
+ "metadata": {
18
+ "collapsed": true,
19
+ "pycharm": {
20
+ "name": "#%%\n"
21
+ },
22
+ "id": "Xfbj4kG4UwcC"
23
+ },
24
+ "outputs": [],
25
+ "source": [
26
+ "# importing python module.\n",
27
+ "import pandas as pd\n",
28
+ "from lightgbm.sklearn import LGBMClassifier\n",
29
+ "from sklearn.preprocessing import RobustScaler, OrdinalEncoder\n",
30
+ "from sklearn.model_selection import train_test_split, StratifiedShuffleSplit\n",
31
+ "from xgboost.sklearn import XGBClassifier\n",
32
+ "from sklearn.metrics import f1_score\n",
33
+ "\n",
34
+ "import warnings\n",
35
+ "warnings.filterwarnings(\"ignore\")"
36
+ ]
37
+ },
38
+ {
39
+ "cell_type": "code",
40
+ "execution_count": 2,
41
+ "outputs": [
42
+ {
43
+ "data": {
44
+ "text/plain": " patient_number cholesterol glucose hdl_chol chol_hdl_ratio age gender \\\n0 1 193 77 49 3,9 19 female \n1 2 146 79 41 3,6 19 female \n2 3 217 75 54 4 20 female \n3 4 226 97 70 3,2 20 female \n4 5 164 91 67 2,4 20 female \n\n height weight bmi systolic_bp diastolic_bp waist hip \\\n0 61 119 22,5 118 70 32 38 \n1 60 135 26,4 108 58 33 40 \n2 67 187 29,3 110 72 40 45 \n3 64 114 19,6 122 64 31 39 \n4 70 141 20,2 122 86 32 39 \n\n waist_hip_ratio diabetes \n0 0,84 No diabetes \n1 0,83 No diabetes \n2 0,89 No diabetes \n3 0,79 No diabetes \n4 0,82 No diabetes ",
45
+ "text/html": "<div>\n<style scoped>\n .dataframe tbody tr th:only-of-type {\n vertical-align: middle;\n }\n\n .dataframe tbody tr th {\n vertical-align: top;\n }\n\n .dataframe thead th {\n text-align: right;\n }\n</style>\n<table border=\"1\" class=\"dataframe\">\n <thead>\n <tr style=\"text-align: right;\">\n <th></th>\n <th>patient_number</th>\n <th>cholesterol</th>\n <th>glucose</th>\n <th>hdl_chol</th>\n <th>chol_hdl_ratio</th>\n <th>age</th>\n <th>gender</th>\n <th>height</th>\n <th>weight</th>\n <th>bmi</th>\n <th>systolic_bp</th>\n <th>diastolic_bp</th>\n <th>waist</th>\n <th>hip</th>\n <th>waist_hip_ratio</th>\n <th>diabetes</th>\n </tr>\n </thead>\n <tbody>\n <tr>\n <th>0</th>\n <td>1</td>\n <td>193</td>\n <td>77</td>\n <td>49</td>\n <td>3,9</td>\n <td>19</td>\n <td>female</td>\n <td>61</td>\n <td>119</td>\n <td>22,5</td>\n <td>118</td>\n <td>70</td>\n <td>32</td>\n <td>38</td>\n <td>0,84</td>\n <td>No diabetes</td>\n </tr>\n <tr>\n <th>1</th>\n <td>2</td>\n <td>146</td>\n <td>79</td>\n <td>41</td>\n <td>3,6</td>\n <td>19</td>\n <td>female</td>\n <td>60</td>\n <td>135</td>\n <td>26,4</td>\n <td>108</td>\n <td>58</td>\n <td>33</td>\n <td>40</td>\n <td>0,83</td>\n <td>No diabetes</td>\n </tr>\n <tr>\n <th>2</th>\n <td>3</td>\n <td>217</td>\n <td>75</td>\n <td>54</td>\n <td>4</td>\n <td>20</td>\n <td>female</td>\n <td>67</td>\n <td>187</td>\n <td>29,3</td>\n <td>110</td>\n <td>72</td>\n <td>40</td>\n <td>45</td>\n <td>0,89</td>\n <td>No diabetes</td>\n </tr>\n <tr>\n <th>3</th>\n <td>4</td>\n <td>226</td>\n <td>97</td>\n <td>70</td>\n <td>3,2</td>\n <td>20</td>\n <td>female</td>\n <td>64</td>\n <td>114</td>\n <td>19,6</td>\n <td>122</td>\n <td>64</td>\n <td>31</td>\n <td>39</td>\n <td>0,79</td>\n <td>No diabetes</td>\n </tr>\n <tr>\n <th>4</th>\n <td>5</td>\n <td>164</td>\n <td>91</td>\n <td>67</td>\n <td>2,4</td>\n <td>20</td>\n <td>female</td>\n <td>70</td>\n <td>141</td>\n <td>20,2</td>\n <td>122</td>\n <td>86</td>\n <td>32</td>\n <td>39</td>\n <td>0,82</td>\n <td>No diabetes</td>\n </tr>\n </tbody>\n</table>\n</div>"
46
+ },
47
+ "execution_count": 2,
48
+ "metadata": {},
49
+ "output_type": "execute_result"
50
+ }
51
+ ],
52
+ "source": [
53
+ "# loading dataset with pandas\n",
54
+ "dia = pd.read_csv(\"./dataset/diabetes.csv\")\n",
55
+ "\n",
56
+ "dia.head()"
57
+ ],
58
+ "metadata": {
59
+ "pycharm": {
60
+ "name": "#%%\n"
61
+ },
62
+ "colab": {
63
+ "base_uri": "https://localhost:8080/",
64
+ "height": 357
65
+ },
66
+ "id": "yx869JONUwcJ",
67
+ "outputId": "ccdf4d8b-adf1-40b1-fe75-bc8a0dccfa6c"
68
+ }
69
+ },
70
+ {
71
+ "cell_type": "code",
72
+ "execution_count": 3,
73
+ "outputs": [
74
+ {
75
+ "name": "stdout",
76
+ "output_type": "stream",
77
+ "text": [
78
+ "<class 'pandas.core.frame.DataFrame'>\n",
79
+ "RangeIndex: 390 entries, 0 to 389\n",
80
+ "Data columns (total 16 columns):\n",
81
+ " # Column Non-Null Count Dtype \n",
82
+ "--- ------ -------------- ----- \n",
83
+ " 0 patient_number 390 non-null int64 \n",
84
+ " 1 cholesterol 390 non-null int64 \n",
85
+ " 2 glucose 390 non-null int64 \n",
86
+ " 3 hdl_chol 390 non-null int64 \n",
87
+ " 4 chol_hdl_ratio 390 non-null object\n",
88
+ " 5 age 390 non-null int64 \n",
89
+ " 6 gender 390 non-null object\n",
90
+ " 7 height 390 non-null int64 \n",
91
+ " 8 weight 390 non-null int64 \n",
92
+ " 9 bmi 390 non-null object\n",
93
+ " 10 systolic_bp 390 non-null int64 \n",
94
+ " 11 diastolic_bp 390 non-null int64 \n",
95
+ " 12 waist 390 non-null int64 \n",
96
+ " 13 hip 390 non-null int64 \n",
97
+ " 14 waist_hip_ratio 390 non-null object\n",
98
+ " 15 diabetes 390 non-null object\n",
99
+ "dtypes: int64(11), object(5)\n",
100
+ "memory usage: 48.9+ KB\n"
101
+ ]
102
+ }
103
+ ],
104
+ "source": [
105
+ "# wrangling datasets with pandas\n",
106
+ "dia.info()"
107
+ ],
108
+ "metadata": {
109
+ "pycharm": {
110
+ "name": "#%%\n"
111
+ },
112
+ "colab": {
113
+ "base_uri": "https://localhost:8080/"
114
+ },
115
+ "id": "rftvNRifUwcL",
116
+ "outputId": "d0799384-e171-4bc9-c149-873c22252711"
117
+ }
118
+ },
119
+ {
120
+ "cell_type": "code",
121
+ "execution_count": 4,
122
+ "outputs": [
123
+ {
124
+ "data": {
125
+ "text/plain": " patient_number cholesterol glucose hdl_chol age \\\ncount 390.000000 390.000000 390.000000 390.000000 390.000000 \nmean 195.500000 207.230769 107.338462 50.266667 46.774359 \nstd 112.727548 44.666005 53.798188 17.279069 16.435911 \nmin 1.000000 78.000000 48.000000 12.000000 19.000000 \n25% 98.250000 179.000000 81.000000 38.000000 34.000000 \n50% 195.500000 203.000000 90.000000 46.000000 44.500000 \n75% 292.750000 229.000000 107.750000 59.000000 60.000000 \nmax 390.000000 443.000000 385.000000 120.000000 92.000000 \n\n height weight systolic_bp diastolic_bp waist \\\ncount 390.000000 390.000000 390.000000 390.000000 390.000000 \nmean 65.951282 177.407692 137.133333 83.289744 37.869231 \nstd 3.918867 40.407824 22.859528 13.498192 5.760947 \nmin 52.000000 99.000000 90.000000 48.000000 26.000000 \n25% 63.000000 150.250000 122.000000 75.000000 33.000000 \n50% 66.000000 173.000000 136.000000 82.000000 37.000000 \n75% 69.000000 200.000000 148.000000 90.000000 41.000000 \nmax 76.000000 325.000000 250.000000 124.000000 56.000000 \n\n hip \ncount 390.000000 \nmean 42.992308 \nstd 5.664342 \nmin 30.000000 \n25% 39.000000 \n50% 42.000000 \n75% 46.000000 \nmax 64.000000 ",
126
+ "text/html": "<div>\n<style scoped>\n .dataframe tbody tr th:only-of-type {\n vertical-align: middle;\n }\n\n .dataframe tbody tr th {\n vertical-align: top;\n }\n\n .dataframe thead th {\n text-align: right;\n }\n</style>\n<table border=\"1\" class=\"dataframe\">\n <thead>\n <tr style=\"text-align: right;\">\n <th></th>\n <th>patient_number</th>\n <th>cholesterol</th>\n <th>glucose</th>\n <th>hdl_chol</th>\n <th>age</th>\n <th>height</th>\n <th>weight</th>\n <th>systolic_bp</th>\n <th>diastolic_bp</th>\n <th>waist</th>\n <th>hip</th>\n </tr>\n </thead>\n <tbody>\n <tr>\n <th>count</th>\n <td>390.000000</td>\n <td>390.000000</td>\n <td>390.000000</td>\n <td>390.000000</td>\n <td>390.000000</td>\n <td>390.000000</td>\n <td>390.000000</td>\n <td>390.000000</td>\n <td>390.000000</td>\n <td>390.000000</td>\n <td>390.000000</td>\n </tr>\n <tr>\n <th>mean</th>\n <td>195.500000</td>\n <td>207.230769</td>\n <td>107.338462</td>\n <td>50.266667</td>\n <td>46.774359</td>\n <td>65.951282</td>\n <td>177.407692</td>\n <td>137.133333</td>\n <td>83.289744</td>\n <td>37.869231</td>\n <td>42.992308</td>\n </tr>\n <tr>\n <th>std</th>\n <td>112.727548</td>\n <td>44.666005</td>\n <td>53.798188</td>\n <td>17.279069</td>\n <td>16.435911</td>\n <td>3.918867</td>\n <td>40.407824</td>\n <td>22.859528</td>\n <td>13.498192</td>\n <td>5.760947</td>\n <td>5.664342</td>\n </tr>\n <tr>\n <th>min</th>\n <td>1.000000</td>\n <td>78.000000</td>\n <td>48.000000</td>\n <td>12.000000</td>\n <td>19.000000</td>\n <td>52.000000</td>\n <td>99.000000</td>\n <td>90.000000</td>\n <td>48.000000</td>\n <td>26.000000</td>\n <td>30.000000</td>\n </tr>\n <tr>\n <th>25%</th>\n <td>98.250000</td>\n <td>179.000000</td>\n <td>81.000000</td>\n <td>38.000000</td>\n <td>34.000000</td>\n <td>63.000000</td>\n <td>150.250000</td>\n <td>122.000000</td>\n <td>75.000000</td>\n <td>33.000000</td>\n <td>39.000000</td>\n </tr>\n <tr>\n <th>50%</th>\n <td>195.500000</td>\n <td>203.000000</td>\n <td>90.000000</td>\n <td>46.000000</td>\n <td>44.500000</td>\n <td>66.000000</td>\n <td>173.000000</td>\n <td>136.000000</td>\n <td>82.000000</td>\n <td>37.000000</td>\n <td>42.000000</td>\n </tr>\n <tr>\n <th>75%</th>\n <td>292.750000</td>\n <td>229.000000</td>\n <td>107.750000</td>\n <td>59.000000</td>\n <td>60.000000</td>\n <td>69.000000</td>\n <td>200.000000</td>\n <td>148.000000</td>\n <td>90.000000</td>\n <td>41.000000</td>\n <td>46.000000</td>\n </tr>\n <tr>\n <th>max</th>\n <td>390.000000</td>\n <td>443.000000</td>\n <td>385.000000</td>\n <td>120.000000</td>\n <td>92.000000</td>\n <td>76.000000</td>\n <td>325.000000</td>\n <td>250.000000</td>\n <td>124.000000</td>\n <td>56.000000</td>\n <td>64.000000</td>\n </tr>\n </tbody>\n</table>\n</div>"
127
+ },
128
+ "execution_count": 4,
129
+ "metadata": {},
130
+ "output_type": "execute_result"
131
+ }
132
+ ],
133
+ "source": [
134
+ "dia.describe()"
135
+ ],
136
+ "metadata": {
137
+ "pycharm": {
138
+ "name": "#%%\n"
139
+ },
140
+ "colab": {
141
+ "base_uri": "https://localhost:8080/",
142
+ "height": 364
143
+ },
144
+ "id": "mPXcxzuwUwcN",
145
+ "outputId": "a1107ea3-215d-4400-e6ff-2d8fd7ff8b55"
146
+ }
147
+ },
148
+ {
149
+ "cell_type": "markdown",
150
+ "source": [
151
+ "## Wrangling dataset."
152
+ ],
153
+ "metadata": {
154
+ "id": "S2hRdWcPqcrP"
155
+ }
156
+ },
157
+ {
158
+ "cell_type": "code",
159
+ "execution_count": 5,
160
+ "outputs": [],
161
+ "source": [
162
+ "dia.chol_hdl_ratio = round(dia.cholesterol / dia.hdl_chol,2)"
163
+ ],
164
+ "metadata": {
165
+ "pycharm": {
166
+ "name": "#%%\n"
167
+ },
168
+ "id": "DCl3woxiUwcO"
169
+ }
170
+ },
171
+ {
172
+ "cell_type": "code",
173
+ "execution_count": 6,
174
+ "outputs": [
175
+ {
176
+ "data": {
177
+ "text/plain": " patient_number cholesterol glucose hdl_chol chol_hdl_ratio age \\\n0 1 193 77 49 3.94 19 \n1 2 146 79 41 3.56 19 \n2 3 217 75 54 4.02 20 \n3 4 226 97 70 3.23 20 \n4 5 164 91 67 2.45 20 \n\n gender height weight bmi systolic_bp diastolic_bp waist hip \\\n0 female 61 119 22,5 118 70 32 38 \n1 female 60 135 26,4 108 58 33 40 \n2 female 67 187 29,3 110 72 40 45 \n3 female 64 114 19,6 122 64 31 39 \n4 female 70 141 20,2 122 86 32 39 \n\n waist_hip_ratio diabetes \n0 0,84 No diabetes \n1 0,83 No diabetes \n2 0,89 No diabetes \n3 0,79 No diabetes \n4 0,82 No diabetes ",
178
+ "text/html": "<div>\n<style scoped>\n .dataframe tbody tr th:only-of-type {\n vertical-align: middle;\n }\n\n .dataframe tbody tr th {\n vertical-align: top;\n }\n\n .dataframe thead th {\n text-align: right;\n }\n</style>\n<table border=\"1\" class=\"dataframe\">\n <thead>\n <tr style=\"text-align: right;\">\n <th></th>\n <th>patient_number</th>\n <th>cholesterol</th>\n <th>glucose</th>\n <th>hdl_chol</th>\n <th>chol_hdl_ratio</th>\n <th>age</th>\n <th>gender</th>\n <th>height</th>\n <th>weight</th>\n <th>bmi</th>\n <th>systolic_bp</th>\n <th>diastolic_bp</th>\n <th>waist</th>\n <th>hip</th>\n <th>waist_hip_ratio</th>\n <th>diabetes</th>\n </tr>\n </thead>\n <tbody>\n <tr>\n <th>0</th>\n <td>1</td>\n <td>193</td>\n <td>77</td>\n <td>49</td>\n <td>3.94</td>\n <td>19</td>\n <td>female</td>\n <td>61</td>\n <td>119</td>\n <td>22,5</td>\n <td>118</td>\n <td>70</td>\n <td>32</td>\n <td>38</td>\n <td>0,84</td>\n <td>No diabetes</td>\n </tr>\n <tr>\n <th>1</th>\n <td>2</td>\n <td>146</td>\n <td>79</td>\n <td>41</td>\n <td>3.56</td>\n <td>19</td>\n <td>female</td>\n <td>60</td>\n <td>135</td>\n <td>26,4</td>\n <td>108</td>\n <td>58</td>\n <td>33</td>\n <td>40</td>\n <td>0,83</td>\n <td>No diabetes</td>\n </tr>\n <tr>\n <th>2</th>\n <td>3</td>\n <td>217</td>\n <td>75</td>\n <td>54</td>\n <td>4.02</td>\n <td>20</td>\n <td>female</td>\n <td>67</td>\n <td>187</td>\n <td>29,3</td>\n <td>110</td>\n <td>72</td>\n <td>40</td>\n <td>45</td>\n <td>0,89</td>\n <td>No diabetes</td>\n </tr>\n <tr>\n <th>3</th>\n <td>4</td>\n <td>226</td>\n <td>97</td>\n <td>70</td>\n <td>3.23</td>\n <td>20</td>\n <td>female</td>\n <td>64</td>\n <td>114</td>\n <td>19,6</td>\n <td>122</td>\n <td>64</td>\n <td>31</td>\n <td>39</td>\n <td>0,79</td>\n <td>No diabetes</td>\n </tr>\n <tr>\n <th>4</th>\n <td>5</td>\n <td>164</td>\n <td>91</td>\n <td>67</td>\n <td>2.45</td>\n <td>20</td>\n <td>female</td>\n <td>70</td>\n <td>141</td>\n <td>20,2</td>\n <td>122</td>\n <td>86</td>\n <td>32</td>\n <td>39</td>\n <td>0,82</td>\n <td>No diabetes</td>\n </tr>\n </tbody>\n</table>\n</div>"
179
+ },
180
+ "execution_count": 6,
181
+ "metadata": {},
182
+ "output_type": "execute_result"
183
+ }
184
+ ],
185
+ "source": [
186
+ "dia.head()"
187
+ ],
188
+ "metadata": {
189
+ "pycharm": {
190
+ "name": "#%%\n"
191
+ },
192
+ "colab": {
193
+ "base_uri": "https://localhost:8080/",
194
+ "height": 357
195
+ },
196
+ "id": "QNlQedszUwcP",
197
+ "outputId": "63231eb5-798a-4c07-8aae-851004ab3787"
198
+ }
199
+ },
200
+ {
201
+ "cell_type": "markdown",
202
+ "source": [],
203
+ "metadata": {
204
+ "collapsed": false,
205
+ "id": "SFHdSj2YUwcQ"
206
+ }
207
+ },
208
+ {
209
+ "cell_type": "code",
210
+ "execution_count": 7,
211
+ "outputs": [],
212
+ "source": [
213
+ "dia.waist_hip_ratio= round(dia.waist/dia.hip,2)"
214
+ ],
215
+ "metadata": {
216
+ "pycharm": {
217
+ "name": "#%%\n"
218
+ },
219
+ "id": "ovJyqVa2UwcX"
220
+ }
221
+ },
222
+ {
223
+ "cell_type": "code",
224
+ "execution_count": 8,
225
+ "outputs": [
226
+ {
227
+ "data": {
228
+ "text/plain": " patient_number cholesterol glucose hdl_chol chol_hdl_ratio age \\\n0 1 193 77 49 3.94 19 \n1 2 146 79 41 3.56 19 \n2 3 217 75 54 4.02 20 \n3 4 226 97 70 3.23 20 \n4 5 164 91 67 2.45 20 \n\n gender height weight bmi systolic_bp diastolic_bp waist hip \\\n0 female 61 119 22,5 118 70 32 38 \n1 female 60 135 26,4 108 58 33 40 \n2 female 67 187 29,3 110 72 40 45 \n3 female 64 114 19,6 122 64 31 39 \n4 female 70 141 20,2 122 86 32 39 \n\n waist_hip_ratio diabetes \n0 0.84 No diabetes \n1 0.82 No diabetes \n2 0.89 No diabetes \n3 0.79 No diabetes \n4 0.82 No diabetes ",
229
+ "text/html": "<div>\n<style scoped>\n .dataframe tbody tr th:only-of-type {\n vertical-align: middle;\n }\n\n .dataframe tbody tr th {\n vertical-align: top;\n }\n\n .dataframe thead th {\n text-align: right;\n }\n</style>\n<table border=\"1\" class=\"dataframe\">\n <thead>\n <tr style=\"text-align: right;\">\n <th></th>\n <th>patient_number</th>\n <th>cholesterol</th>\n <th>glucose</th>\n <th>hdl_chol</th>\n <th>chol_hdl_ratio</th>\n <th>age</th>\n <th>gender</th>\n <th>height</th>\n <th>weight</th>\n <th>bmi</th>\n <th>systolic_bp</th>\n <th>diastolic_bp</th>\n <th>waist</th>\n <th>hip</th>\n <th>waist_hip_ratio</th>\n <th>diabetes</th>\n </tr>\n </thead>\n <tbody>\n <tr>\n <th>0</th>\n <td>1</td>\n <td>193</td>\n <td>77</td>\n <td>49</td>\n <td>3.94</td>\n <td>19</td>\n <td>female</td>\n <td>61</td>\n <td>119</td>\n <td>22,5</td>\n <td>118</td>\n <td>70</td>\n <td>32</td>\n <td>38</td>\n <td>0.84</td>\n <td>No diabetes</td>\n </tr>\n <tr>\n <th>1</th>\n <td>2</td>\n <td>146</td>\n <td>79</td>\n <td>41</td>\n <td>3.56</td>\n <td>19</td>\n <td>female</td>\n <td>60</td>\n <td>135</td>\n <td>26,4</td>\n <td>108</td>\n <td>58</td>\n <td>33</td>\n <td>40</td>\n <td>0.82</td>\n <td>No diabetes</td>\n </tr>\n <tr>\n <th>2</th>\n <td>3</td>\n <td>217</td>\n <td>75</td>\n <td>54</td>\n <td>4.02</td>\n <td>20</td>\n <td>female</td>\n <td>67</td>\n <td>187</td>\n <td>29,3</td>\n <td>110</td>\n <td>72</td>\n <td>40</td>\n <td>45</td>\n <td>0.89</td>\n <td>No diabetes</td>\n </tr>\n <tr>\n <th>3</th>\n <td>4</td>\n <td>226</td>\n <td>97</td>\n <td>70</td>\n <td>3.23</td>\n <td>20</td>\n <td>female</td>\n <td>64</td>\n <td>114</td>\n <td>19,6</td>\n <td>122</td>\n <td>64</td>\n <td>31</td>\n <td>39</td>\n <td>0.79</td>\n <td>No diabetes</td>\n </tr>\n <tr>\n <th>4</th>\n <td>5</td>\n <td>164</td>\n <td>91</td>\n <td>67</td>\n <td>2.45</td>\n <td>20</td>\n <td>female</td>\n <td>70</td>\n <td>141</td>\n <td>20,2</td>\n <td>122</td>\n <td>86</td>\n <td>32</td>\n <td>39</td>\n <td>0.82</td>\n <td>No diabetes</td>\n </tr>\n </tbody>\n</table>\n</div>"
230
+ },
231
+ "execution_count": 8,
232
+ "metadata": {},
233
+ "output_type": "execute_result"
234
+ }
235
+ ],
236
+ "source": [
237
+ "dia.head()"
238
+ ],
239
+ "metadata": {
240
+ "pycharm": {
241
+ "name": "#%%\n"
242
+ },
243
+ "colab": {
244
+ "base_uri": "https://localhost:8080/",
245
+ "height": 357
246
+ },
247
+ "id": "PWqYDcnYUwcZ",
248
+ "outputId": "d0e278d1-d7ed-4503-ee2b-5f94661e56e5"
249
+ }
250
+ },
251
+ {
252
+ "cell_type": "code",
253
+ "execution_count": 9,
254
+ "outputs": [],
255
+ "source": [
256
+ "dia.bmi = pd.to_numeric(dia.bmi.str.replace(\",\",\".\"))"
257
+ ],
258
+ "metadata": {
259
+ "pycharm": {
260
+ "name": "#%%\n"
261
+ },
262
+ "id": "CXAX15VHUwce"
263
+ }
264
+ },
265
+ {
266
+ "cell_type": "code",
267
+ "execution_count": 10,
268
+ "outputs": [
269
+ {
270
+ "data": {
271
+ "text/plain": " patient_number cholesterol glucose hdl_chol chol_hdl_ratio age \\\n0 1 193 77 49 3.94 19 \n1 2 146 79 41 3.56 19 \n2 3 217 75 54 4.02 20 \n3 4 226 97 70 3.23 20 \n4 5 164 91 67 2.45 20 \n\n gender height weight bmi systolic_bp diastolic_bp waist hip \\\n0 female 61 119 22.5 118 70 32 38 \n1 female 60 135 26.4 108 58 33 40 \n2 female 67 187 29.3 110 72 40 45 \n3 female 64 114 19.6 122 64 31 39 \n4 female 70 141 20.2 122 86 32 39 \n\n waist_hip_ratio diabetes \n0 0.84 No diabetes \n1 0.82 No diabetes \n2 0.89 No diabetes \n3 0.79 No diabetes \n4 0.82 No diabetes ",
272
+ "text/html": "<div>\n<style scoped>\n .dataframe tbody tr th:only-of-type {\n vertical-align: middle;\n }\n\n .dataframe tbody tr th {\n vertical-align: top;\n }\n\n .dataframe thead th {\n text-align: right;\n }\n</style>\n<table border=\"1\" class=\"dataframe\">\n <thead>\n <tr style=\"text-align: right;\">\n <th></th>\n <th>patient_number</th>\n <th>cholesterol</th>\n <th>glucose</th>\n <th>hdl_chol</th>\n <th>chol_hdl_ratio</th>\n <th>age</th>\n <th>gender</th>\n <th>height</th>\n <th>weight</th>\n <th>bmi</th>\n <th>systolic_bp</th>\n <th>diastolic_bp</th>\n <th>waist</th>\n <th>hip</th>\n <th>waist_hip_ratio</th>\n <th>diabetes</th>\n </tr>\n </thead>\n <tbody>\n <tr>\n <th>0</th>\n <td>1</td>\n <td>193</td>\n <td>77</td>\n <td>49</td>\n <td>3.94</td>\n <td>19</td>\n <td>female</td>\n <td>61</td>\n <td>119</td>\n <td>22.5</td>\n <td>118</td>\n <td>70</td>\n <td>32</td>\n <td>38</td>\n <td>0.84</td>\n <td>No diabetes</td>\n </tr>\n <tr>\n <th>1</th>\n <td>2</td>\n <td>146</td>\n <td>79</td>\n <td>41</td>\n <td>3.56</td>\n <td>19</td>\n <td>female</td>\n <td>60</td>\n <td>135</td>\n <td>26.4</td>\n <td>108</td>\n <td>58</td>\n <td>33</td>\n <td>40</td>\n <td>0.82</td>\n <td>No diabetes</td>\n </tr>\n <tr>\n <th>2</th>\n <td>3</td>\n <td>217</td>\n <td>75</td>\n <td>54</td>\n <td>4.02</td>\n <td>20</td>\n <td>female</td>\n <td>67</td>\n <td>187</td>\n <td>29.3</td>\n <td>110</td>\n <td>72</td>\n <td>40</td>\n <td>45</td>\n <td>0.89</td>\n <td>No diabetes</td>\n </tr>\n <tr>\n <th>3</th>\n <td>4</td>\n <td>226</td>\n <td>97</td>\n <td>70</td>\n <td>3.23</td>\n <td>20</td>\n <td>female</td>\n <td>64</td>\n <td>114</td>\n <td>19.6</td>\n <td>122</td>\n <td>64</td>\n <td>31</td>\n <td>39</td>\n <td>0.79</td>\n <td>No diabetes</td>\n </tr>\n <tr>\n <th>4</th>\n <td>5</td>\n <td>164</td>\n <td>91</td>\n <td>67</td>\n <td>2.45</td>\n <td>20</td>\n <td>female</td>\n <td>70</td>\n <td>141</td>\n <td>20.2</td>\n <td>122</td>\n <td>86</td>\n <td>32</td>\n <td>39</td>\n <td>0.82</td>\n <td>No diabetes</td>\n </tr>\n </tbody>\n</table>\n</div>"
273
+ },
274
+ "execution_count": 10,
275
+ "metadata": {},
276
+ "output_type": "execute_result"
277
+ }
278
+ ],
279
+ "source": [
280
+ "dia.head()"
281
+ ],
282
+ "metadata": {
283
+ "pycharm": {
284
+ "name": "#%%\n"
285
+ },
286
+ "colab": {
287
+ "base_uri": "https://localhost:8080/",
288
+ "height": 357
289
+ },
290
+ "id": "Y9Rg5DkoUwcf",
291
+ "outputId": "de5133be-4736-4098-c94c-300eaac58f7d"
292
+ }
293
+ },
294
+ {
295
+ "cell_type": "code",
296
+ "source": [
297
+ "dia.weight.describe()"
298
+ ],
299
+ "metadata": {
300
+ "colab": {
301
+ "base_uri": "https://localhost:8080/"
302
+ },
303
+ "id": "PglRQVOhWq6F",
304
+ "outputId": "9e50d452-d5c4-41f0-a632-f148bb85c44f"
305
+ },
306
+ "execution_count": 11,
307
+ "outputs": [
308
+ {
309
+ "data": {
310
+ "text/plain": "count 390.000000\nmean 177.407692\nstd 40.407824\nmin 99.000000\n25% 150.250000\n50% 173.000000\n75% 200.000000\nmax 325.000000\nName: weight, dtype: float64"
311
+ },
312
+ "execution_count": 11,
313
+ "metadata": {},
314
+ "output_type": "execute_result"
315
+ }
316
+ ]
317
+ },
318
+ {
319
+ "cell_type": "code",
320
+ "execution_count": 12,
321
+ "outputs": [
322
+ {
323
+ "data": {
324
+ "text/plain": "patient_number 0\ncholesterol 0\nglucose 0\nhdl_chol 0\nchol_hdl_ratio 0\nage 0\ngender 0\nheight 0\nweight 0\nbmi 0\nsystolic_bp 0\ndiastolic_bp 0\nwaist 0\nhip 0\nwaist_hip_ratio 0\ndiabetes 0\ndtype: int64"
325
+ },
326
+ "execution_count": 12,
327
+ "metadata": {},
328
+ "output_type": "execute_result"
329
+ }
330
+ ],
331
+ "source": [
332
+ "dia.isnull().sum()"
333
+ ],
334
+ "metadata": {
335
+ "collapsed": false,
336
+ "pycharm": {
337
+ "name": "#%%\n"
338
+ }
339
+ }
340
+ },
341
+ {
342
+ "cell_type": "code",
343
+ "source": [
344
+ "dia.head()"
345
+ ],
346
+ "metadata": {
347
+ "colab": {
348
+ "base_uri": "https://localhost:8080/",
349
+ "height": 357
350
+ },
351
+ "id": "GY9af0LOoUrQ",
352
+ "outputId": "5cb087ef-8459-40e3-c65d-515007489006"
353
+ },
354
+ "execution_count": 13,
355
+ "outputs": [
356
+ {
357
+ "data": {
358
+ "text/plain": " patient_number cholesterol glucose hdl_chol chol_hdl_ratio age \\\n0 1 193 77 49 3.94 19 \n1 2 146 79 41 3.56 19 \n2 3 217 75 54 4.02 20 \n3 4 226 97 70 3.23 20 \n4 5 164 91 67 2.45 20 \n\n gender height weight bmi systolic_bp diastolic_bp waist hip \\\n0 female 61 119 22.5 118 70 32 38 \n1 female 60 135 26.4 108 58 33 40 \n2 female 67 187 29.3 110 72 40 45 \n3 female 64 114 19.6 122 64 31 39 \n4 female 70 141 20.2 122 86 32 39 \n\n waist_hip_ratio diabetes \n0 0.84 No diabetes \n1 0.82 No diabetes \n2 0.89 No diabetes \n3 0.79 No diabetes \n4 0.82 No diabetes ",
359
+ "text/html": "<div>\n<style scoped>\n .dataframe tbody tr th:only-of-type {\n vertical-align: middle;\n }\n\n .dataframe tbody tr th {\n vertical-align: top;\n }\n\n .dataframe thead th {\n text-align: right;\n }\n</style>\n<table border=\"1\" class=\"dataframe\">\n <thead>\n <tr style=\"text-align: right;\">\n <th></th>\n <th>patient_number</th>\n <th>cholesterol</th>\n <th>glucose</th>\n <th>hdl_chol</th>\n <th>chol_hdl_ratio</th>\n <th>age</th>\n <th>gender</th>\n <th>height</th>\n <th>weight</th>\n <th>bmi</th>\n <th>systolic_bp</th>\n <th>diastolic_bp</th>\n <th>waist</th>\n <th>hip</th>\n <th>waist_hip_ratio</th>\n <th>diabetes</th>\n </tr>\n </thead>\n <tbody>\n <tr>\n <th>0</th>\n <td>1</td>\n <td>193</td>\n <td>77</td>\n <td>49</td>\n <td>3.94</td>\n <td>19</td>\n <td>female</td>\n <td>61</td>\n <td>119</td>\n <td>22.5</td>\n <td>118</td>\n <td>70</td>\n <td>32</td>\n <td>38</td>\n <td>0.84</td>\n <td>No diabetes</td>\n </tr>\n <tr>\n <th>1</th>\n <td>2</td>\n <td>146</td>\n <td>79</td>\n <td>41</td>\n <td>3.56</td>\n <td>19</td>\n <td>female</td>\n <td>60</td>\n <td>135</td>\n <td>26.4</td>\n <td>108</td>\n <td>58</td>\n <td>33</td>\n <td>40</td>\n <td>0.82</td>\n <td>No diabetes</td>\n </tr>\n <tr>\n <th>2</th>\n <td>3</td>\n <td>217</td>\n <td>75</td>\n <td>54</td>\n <td>4.02</td>\n <td>20</td>\n <td>female</td>\n <td>67</td>\n <td>187</td>\n <td>29.3</td>\n <td>110</td>\n <td>72</td>\n <td>40</td>\n <td>45</td>\n <td>0.89</td>\n <td>No diabetes</td>\n </tr>\n <tr>\n <th>3</th>\n <td>4</td>\n <td>226</td>\n <td>97</td>\n <td>70</td>\n <td>3.23</td>\n <td>20</td>\n <td>female</td>\n <td>64</td>\n <td>114</td>\n <td>19.6</td>\n <td>122</td>\n <td>64</td>\n <td>31</td>\n <td>39</td>\n <td>0.79</td>\n <td>No diabetes</td>\n </tr>\n <tr>\n <th>4</th>\n <td>5</td>\n <td>164</td>\n <td>91</td>\n <td>67</td>\n <td>2.45</td>\n <td>20</td>\n <td>female</td>\n <td>70</td>\n <td>141</td>\n <td>20.2</td>\n <td>122</td>\n <td>86</td>\n <td>32</td>\n <td>39</td>\n <td>0.82</td>\n <td>No diabetes</td>\n </tr>\n </tbody>\n</table>\n</div>"
360
+ },
361
+ "execution_count": 13,
362
+ "metadata": {},
363
+ "output_type": "execute_result"
364
+ }
365
+ ]
366
+ },
367
+ {
368
+ "cell_type": "code",
369
+ "source": [
370
+ "s= (dia.dtypes == \"object\")\n",
371
+ "obj_col= list(s[s].index)"
372
+ ],
373
+ "metadata": {
374
+ "id": "cKtpXdi6pwdJ"
375
+ },
376
+ "execution_count": 14,
377
+ "outputs": []
378
+ },
379
+ {
380
+ "cell_type": "code",
381
+ "source": [
382
+ "obj_col"
383
+ ],
384
+ "metadata": {
385
+ "colab": {
386
+ "base_uri": "https://localhost:8080/"
387
+ },
388
+ "id": "_jnwJCli1cA6",
389
+ "outputId": "563aa279-aae2-4d4e-d5c8-f8d668946539"
390
+ },
391
+ "execution_count": 15,
392
+ "outputs": [
393
+ {
394
+ "data": {
395
+ "text/plain": "['gender', 'diabetes']"
396
+ },
397
+ "execution_count": 15,
398
+ "metadata": {},
399
+ "output_type": "execute_result"
400
+ }
401
+ ]
402
+ },
403
+ {
404
+ "cell_type": "code",
405
+ "source": [
406
+ "orde = OrdinalEncoder()\n",
407
+ "dia[obj_col] = orde.fit_transform(dia[obj_col])"
408
+ ],
409
+ "metadata": {
410
+ "id": "KvSeVC8K2FvU"
411
+ },
412
+ "execution_count": 16,
413
+ "outputs": []
414
+ },
415
+ {
416
+ "cell_type": "code",
417
+ "source": [
418
+ "dia.head()"
419
+ ],
420
+ "metadata": {
421
+ "colab": {
422
+ "base_uri": "https://localhost:8080/",
423
+ "height": 270
424
+ },
425
+ "id": "bY5dg9H53eVS",
426
+ "outputId": "27963a81-6560-455f-8d8d-10240bc5dc33"
427
+ },
428
+ "execution_count": 17,
429
+ "outputs": [
430
+ {
431
+ "data": {
432
+ "text/plain": " patient_number cholesterol glucose hdl_chol chol_hdl_ratio age \\\n0 1 193 77 49 3.94 19 \n1 2 146 79 41 3.56 19 \n2 3 217 75 54 4.02 20 \n3 4 226 97 70 3.23 20 \n4 5 164 91 67 2.45 20 \n\n gender height weight bmi systolic_bp diastolic_bp waist hip \\\n0 0.0 61 119 22.5 118 70 32 38 \n1 0.0 60 135 26.4 108 58 33 40 \n2 0.0 67 187 29.3 110 72 40 45 \n3 0.0 64 114 19.6 122 64 31 39 \n4 0.0 70 141 20.2 122 86 32 39 \n\n waist_hip_ratio diabetes \n0 0.84 1.0 \n1 0.82 1.0 \n2 0.89 1.0 \n3 0.79 1.0 \n4 0.82 1.0 ",
433
+ "text/html": "<div>\n<style scoped>\n .dataframe tbody tr th:only-of-type {\n vertical-align: middle;\n }\n\n .dataframe tbody tr th {\n vertical-align: top;\n }\n\n .dataframe thead th {\n text-align: right;\n }\n</style>\n<table border=\"1\" class=\"dataframe\">\n <thead>\n <tr style=\"text-align: right;\">\n <th></th>\n <th>patient_number</th>\n <th>cholesterol</th>\n <th>glucose</th>\n <th>hdl_chol</th>\n <th>chol_hdl_ratio</th>\n <th>age</th>\n <th>gender</th>\n <th>height</th>\n <th>weight</th>\n <th>bmi</th>\n <th>systolic_bp</th>\n <th>diastolic_bp</th>\n <th>waist</th>\n <th>hip</th>\n <th>waist_hip_ratio</th>\n <th>diabetes</th>\n </tr>\n </thead>\n <tbody>\n <tr>\n <th>0</th>\n <td>1</td>\n <td>193</td>\n <td>77</td>\n <td>49</td>\n <td>3.94</td>\n <td>19</td>\n <td>0.0</td>\n <td>61</td>\n <td>119</td>\n <td>22.5</td>\n <td>118</td>\n <td>70</td>\n <td>32</td>\n <td>38</td>\n <td>0.84</td>\n <td>1.0</td>\n </tr>\n <tr>\n <th>1</th>\n <td>2</td>\n <td>146</td>\n <td>79</td>\n <td>41</td>\n <td>3.56</td>\n <td>19</td>\n <td>0.0</td>\n <td>60</td>\n <td>135</td>\n <td>26.4</td>\n <td>108</td>\n <td>58</td>\n <td>33</td>\n <td>40</td>\n <td>0.82</td>\n <td>1.0</td>\n </tr>\n <tr>\n <th>2</th>\n <td>3</td>\n <td>217</td>\n <td>75</td>\n <td>54</td>\n <td>4.02</td>\n <td>20</td>\n <td>0.0</td>\n <td>67</td>\n <td>187</td>\n <td>29.3</td>\n <td>110</td>\n <td>72</td>\n <td>40</td>\n <td>45</td>\n <td>0.89</td>\n <td>1.0</td>\n </tr>\n <tr>\n <th>3</th>\n <td>4</td>\n <td>226</td>\n <td>97</td>\n <td>70</td>\n <td>3.23</td>\n <td>20</td>\n <td>0.0</td>\n <td>64</td>\n <td>114</td>\n <td>19.6</td>\n <td>122</td>\n <td>64</td>\n <td>31</td>\n <td>39</td>\n <td>0.79</td>\n <td>1.0</td>\n </tr>\n <tr>\n <th>4</th>\n <td>5</td>\n <td>164</td>\n <td>91</td>\n <td>67</td>\n <td>2.45</td>\n <td>20</td>\n <td>0.0</td>\n <td>70</td>\n <td>141</td>\n <td>20.2</td>\n <td>122</td>\n <td>86</td>\n <td>32</td>\n <td>39</td>\n <td>0.82</td>\n <td>1.0</td>\n </tr>\n </tbody>\n</table>\n</div>"
434
+ },
435
+ "execution_count": 17,
436
+ "metadata": {},
437
+ "output_type": "execute_result"
438
+ }
439
+ ]
440
+ },
441
+ {
442
+ "cell_type": "code",
443
+ "source": [
444
+ "X = dia.drop([\"patient_number\", \"diabetes\"], axis= 1)\n",
445
+ "y= dia.diabetes"
446
+ ],
447
+ "metadata": {
448
+ "id": "ZbHayB553gRB"
449
+ },
450
+ "execution_count": 18,
451
+ "outputs": []
452
+ },
453
+ {
454
+ "cell_type": "code",
455
+ "source": [
456
+ "scale = RobustScaler()\n",
457
+ "scaled_x = scale.fit_transform(X, y=y)"
458
+ ],
459
+ "metadata": {
460
+ "id": "GpABdNvA3_8-"
461
+ },
462
+ "execution_count": 19,
463
+ "outputs": []
464
+ },
465
+ {
466
+ "cell_type": "code",
467
+ "source": [
468
+ "scaled_x"
469
+ ],
470
+ "metadata": {
471
+ "colab": {
472
+ "base_uri": "https://localhost:8080/"
473
+ },
474
+ "id": "qgImunKB4r2i",
475
+ "outputId": "76b3b342-70c9-4aee-b3b6-1c21b9aac379"
476
+ },
477
+ "execution_count": 20,
478
+ "outputs": [
479
+ {
480
+ "data": {
481
+ "text/plain": "array([[-0.2 , -0.48598131, 0.14285714, ..., -0.625 ,\n -0.57142857, -0.41025641],\n [-1.14 , -0.41121495, -0.23809524, ..., -0.5 ,\n -0.28571429, -0.61538462],\n [ 0.28 , -0.56074766, 0.38095238, ..., 0.375 ,\n 0.42857143, 0.1025641 ],\n ...,\n [ 1.96 , 0. , 3.42857143, ..., -0.75 ,\n -0.14285714, -1.23076923],\n [ 0.58 , 3.51401869, 3.23809524, ..., -0.25 ,\n -0.57142857, 0.41025641],\n [-0.76 , 0.14953271, 1.0952381 , ..., 1.75 ,\n 1.28571429, 1.23076923]])"
482
+ },
483
+ "execution_count": 20,
484
+ "metadata": {},
485
+ "output_type": "execute_result"
486
+ }
487
+ ]
488
+ },
489
+ {
490
+ "cell_type": "code",
491
+ "source": [
492
+ "X_train, X_test, y_train, y_test = train_test_split(scaled_x, y, test_size= 0.2, random_state=42)"
493
+ ],
494
+ "metadata": {
495
+ "id": "ZECN7XuJ4uAR"
496
+ },
497
+ "execution_count": 21,
498
+ "outputs": []
499
+ },
500
+ {
501
+ "cell_type": "code",
502
+ "execution_count": 22,
503
+ "outputs": [],
504
+ "source": [
505
+ "split = StratifiedShuffleSplit(n_splits=4, random_state=42 )\n",
506
+ "\n",
507
+ "for train_index, test_index in split.split(scaled_x, y):\n",
508
+ " strat_X, strat_test = scaled_x[train_index], scaled_x[test_index]\n",
509
+ " strat_y, strat_ytest = y[train_index], y[test_index]"
510
+ ],
511
+ "metadata": {
512
+ "collapsed": false,
513
+ "pycharm": {
514
+ "name": "#%%\n"
515
+ }
516
+ }
517
+ },
518
+ {
519
+ "cell_type": "code",
520
+ "source": [
521
+ "X_train"
522
+ ],
523
+ "metadata": {
524
+ "colab": {
525
+ "base_uri": "https://localhost:8080/"
526
+ },
527
+ "id": "Zo2R6TH55gTW",
528
+ "outputId": "b12de013-4626-4a0e-aaac-f16281bd50b6"
529
+ },
530
+ "execution_count": 23,
531
+ "outputs": [
532
+ {
533
+ "data": {
534
+ "text/plain": "array([[-0.22 , -1.27102804, -0.19047619, ..., -0.75 ,\n -0.28571429, -1.02564103],\n [-0.44 , 0.41121495, -0.0952381 , ..., 0.125 ,\n 0. , 0.20512821],\n [ 0.18 , -0.41121495, 0.14285714, ..., 0.125 ,\n 0. , 0.20512821],\n ...,\n [-1.48 , 0.74766355, -0.19047619, ..., -0.375 ,\n -0.57142857, 0.1025641 ],\n [ 0.66 , 0.78504673, 1.71428571, ..., -1. ,\n -0.71428571, -1.02564103],\n [ 2.68 , -0.18691589, 0.76190476, ..., -0.125 ,\n 0.28571429, -0.61538462]])"
535
+ },
536
+ "execution_count": 23,
537
+ "metadata": {},
538
+ "output_type": "execute_result"
539
+ }
540
+ ]
541
+ },
542
+ {
543
+ "cell_type": "code",
544
+ "source": [
545
+ "lgbm_model = LGBMClassifier(n_estimators=200, max_depth=-2, random_state=42)"
546
+ ],
547
+ "metadata": {
548
+ "id": "S9cGQrMp5iug"
549
+ },
550
+ "execution_count": 24,
551
+ "outputs": []
552
+ },
553
+ {
554
+ "cell_type": "code",
555
+ "source": [
556
+ "lgbm_model.fit(X_train, y_train)"
557
+ ],
558
+ "metadata": {
559
+ "colab": {
560
+ "base_uri": "https://localhost:8080/"
561
+ },
562
+ "id": "SWj3o6Cg6nUD",
563
+ "outputId": "b64a97a4-1f1d-46c3-c11e-e429feedd6db"
564
+ },
565
+ "execution_count": 25,
566
+ "outputs": [
567
+ {
568
+ "data": {
569
+ "text/plain": "LGBMClassifier(max_depth=-2, n_estimators=200, random_state=42)"
570
+ },
571
+ "execution_count": 25,
572
+ "metadata": {},
573
+ "output_type": "execute_result"
574
+ }
575
+ ]
576
+ },
577
+ {
578
+ "cell_type": "code",
579
+ "source": [
580
+ "y_pred=lgbm_model.predict(X_test)"
581
+ ],
582
+ "metadata": {
583
+ "id": "8LFEmpW16yNk"
584
+ },
585
+ "execution_count": 26,
586
+ "outputs": []
587
+ },
588
+ {
589
+ "cell_type": "code",
590
+ "source": [
591
+ "f1_score(y_pred, y_test)"
592
+ ],
593
+ "metadata": {
594
+ "colab": {
595
+ "base_uri": "https://localhost:8080/"
596
+ },
597
+ "id": "I1pWPR0x6_r9",
598
+ "outputId": "4ecaee82-9c32-4ca2-f71a-c376ea853419"
599
+ },
600
+ "execution_count": 27,
601
+ "outputs": [
602
+ {
603
+ "data": {
604
+ "text/plain": "0.9354838709677419"
605
+ },
606
+ "execution_count": 27,
607
+ "metadata": {},
608
+ "output_type": "execute_result"
609
+ }
610
+ ]
611
+ },
612
+ {
613
+ "cell_type": "code",
614
+ "source": [
615
+ "xg_model= XGBClassifier(n_estimators=200, max_depth=4, scale_pos_weight=5.5)"
616
+ ],
617
+ "metadata": {
618
+ "id": "e6JqauyE7Luq"
619
+ },
620
+ "execution_count": 28,
621
+ "outputs": []
622
+ },
623
+ {
624
+ "cell_type": "code",
625
+ "execution_count": 29,
626
+ "outputs": [
627
+ {
628
+ "name": "stdout",
629
+ "output_type": "stream",
630
+ "text": [
631
+ "[23:22:04] WARNING: C:/Users/Administrator/workspace/xgboost-win64_release_1.4.0/src/learner.cc:1095: Starting in XGBoost 1.3.0, the default evaluation metric used with the objective 'binary:logistic' was changed from 'error' to 'logloss'. Explicitly set eval_metric if you'd like to restore the old behavior.\n"
632
+ ]
633
+ },
634
+ {
635
+ "data": {
636
+ "text/plain": "XGBClassifier(base_score=0.5, booster='gbtree', colsample_bylevel=1,\n colsample_bynode=1, colsample_bytree=1, gamma=0, gpu_id=-1,\n importance_type='gain', interaction_constraints='',\n learning_rate=0.300000012, max_delta_step=0, max_depth=4,\n min_child_weight=1, missing=nan, monotone_constraints='()',\n n_estimators=200, n_jobs=4, num_parallel_tree=1, random_state=0,\n reg_alpha=0, reg_lambda=1, scale_pos_weight=5.5, subsample=1,\n tree_method='exact', validate_parameters=1, verbosity=None)"
637
+ },
638
+ "execution_count": 29,
639
+ "metadata": {},
640
+ "output_type": "execute_result"
641
+ }
642
+ ],
643
+ "source": [
644
+ "xg_model.fit(X_train, y_train)"
645
+ ],
646
+ "metadata": {
647
+ "collapsed": false,
648
+ "pycharm": {
649
+ "name": "#%%\n"
650
+ }
651
+ }
652
+ },
653
+ {
654
+ "cell_type": "code",
655
+ "execution_count": 30,
656
+ "outputs": [],
657
+ "source": [
658
+ "xg_pred = xg_model.predict(X_test)"
659
+ ],
660
+ "metadata": {
661
+ "collapsed": false,
662
+ "pycharm": {
663
+ "name": "#%%\n"
664
+ }
665
+ }
666
+ },
667
+ {
668
+ "cell_type": "code",
669
+ "execution_count": 31,
670
+ "outputs": [
671
+ {
672
+ "data": {
673
+ "text/plain": "0.943089430894309"
674
+ },
675
+ "execution_count": 31,
676
+ "metadata": {},
677
+ "output_type": "execute_result"
678
+ }
679
+ ],
680
+ "source": [
681
+ "f1_score(y_test, xg_pred)"
682
+ ],
683
+ "metadata": {
684
+ "collapsed": false,
685
+ "pycharm": {
686
+ "name": "#%%\n"
687
+ }
688
+ }
689
+ },
690
+ {
691
+ "cell_type": "markdown",
692
+ "source": [
693
+ "## Stratified Shuffle Test"
694
+ ],
695
+ "metadata": {
696
+ "collapsed": false,
697
+ "pycharm": {
698
+ "name": "#%% md\n"
699
+ }
700
+ }
701
+ },
702
+ {
703
+ "cell_type": "code",
704
+ "execution_count": 32,
705
+ "outputs": [],
706
+ "source": [
707
+ "lgbm_strat = LGBMClassifier(n_estimators=200, learning_rate=0.0099, max_depth=-2, )"
708
+ ],
709
+ "metadata": {
710
+ "collapsed": false,
711
+ "pycharm": {
712
+ "name": "#%%\n"
713
+ }
714
+ }
715
+ },
716
+ {
717
+ "cell_type": "code",
718
+ "execution_count": 33,
719
+ "outputs": [
720
+ {
721
+ "data": {
722
+ "text/plain": "LGBMClassifier(learning_rate=0.0099, max_depth=-2, n_estimators=200)"
723
+ },
724
+ "execution_count": 33,
725
+ "metadata": {},
726
+ "output_type": "execute_result"
727
+ }
728
+ ],
729
+ "source": [
730
+ "lgbm_strat.fit(strat_X, strat_y)"
731
+ ],
732
+ "metadata": {
733
+ "collapsed": false,
734
+ "pycharm": {
735
+ "name": "#%%\n"
736
+ }
737
+ }
738
+ },
739
+ {
740
+ "cell_type": "code",
741
+ "execution_count": 34,
742
+ "outputs": [],
743
+ "source": [
744
+ "strat_pred = lgbm_strat.predict(strat_test)"
745
+ ],
746
+ "metadata": {
747
+ "collapsed": false,
748
+ "pycharm": {
749
+ "name": "#%%\n"
750
+ }
751
+ }
752
+ },
753
+ {
754
+ "cell_type": "code",
755
+ "execution_count": 35,
756
+ "outputs": [
757
+ {
758
+ "data": {
759
+ "text/plain": "0.955223880597015"
760
+ },
761
+ "execution_count": 35,
762
+ "metadata": {},
763
+ "output_type": "execute_result"
764
+ }
765
+ ],
766
+ "source": [
767
+ "f1_score(strat_pred, strat_ytest)"
768
+ ],
769
+ "metadata": {
770
+ "collapsed": false,
771
+ "pycharm": {
772
+ "name": "#%%\n"
773
+ }
774
+ }
775
+ },
776
+ {
777
+ "cell_type": "code",
778
+ "execution_count": 38,
779
+ "outputs": [],
780
+ "source": [
781
+ "import numpy as np\n",
782
+ "\n",
783
+ "def predict(var_name):\n",
784
+ " pred = [var_name]\n",
785
+ " np_pred = np.array(pred)\n",
786
+ " score = lgbm_strat.predict(np_pred)\n",
787
+ " return score"
788
+ ],
789
+ "metadata": {
790
+ "collapsed": false,
791
+ "pycharm": {
792
+ "name": "#%%\n"
793
+ }
794
+ }
795
+ },
796
+ {
797
+ "cell_type": "code",
798
+ "execution_count": 39,
799
+ "outputs": [
800
+ {
801
+ "data": {
802
+ "text/plain": "1.0 330\n0.0 60\nName: diabetes, dtype: int64"
803
+ },
804
+ "execution_count": 39,
805
+ "metadata": {},
806
+ "output_type": "execute_result"
807
+ }
808
+ ],
809
+ "source": [
810
+ "dia.diabetes.value_counts()"
811
+ ],
812
+ "metadata": {
813
+ "collapsed": false,
814
+ "pycharm": {
815
+ "name": "#%%\n"
816
+ }
817
+ }
818
+ },
819
+ {
820
+ "cell_type": "code",
821
+ "execution_count": 40,
822
+ "outputs": [
823
+ {
824
+ "data": {
825
+ "text/plain": "5.5"
826
+ },
827
+ "execution_count": 40,
828
+ "metadata": {},
829
+ "output_type": "execute_result"
830
+ }
831
+ ],
832
+ "source": [
833
+ "330/60"
834
+ ],
835
+ "metadata": {
836
+ "collapsed": false,
837
+ "pycharm": {
838
+ "name": "#%%\n"
839
+ }
840
+ }
841
+ },
842
+ {
843
+ "cell_type": "code",
844
+ "execution_count": 41,
845
+ "outputs": [],
846
+ "source": [
847
+ "xgb= XGBClassifier(max_depth=7, n_estimators=1000, scale_pos_weight=5.5)"
848
+ ],
849
+ "metadata": {
850
+ "collapsed": false,
851
+ "pycharm": {
852
+ "name": "#%%\n"
853
+ }
854
+ }
855
+ },
856
+ {
857
+ "cell_type": "code",
858
+ "execution_count": 42,
859
+ "outputs": [
860
+ {
861
+ "name": "stdout",
862
+ "output_type": "stream",
863
+ "text": [
864
+ "[00:36:49] WARNING: C:/Users/Administrator/workspace/xgboost-win64_release_1.4.0/src/learner.cc:1095: Starting in XGBoost 1.3.0, the default evaluation metric used with the objective 'binary:logistic' was changed from 'error' to 'logloss'. Explicitly set eval_metric if you'd like to restore the old behavior.\n"
865
+ ]
866
+ },
867
+ {
868
+ "data": {
869
+ "text/plain": "XGBClassifier(base_score=0.5, booster='gbtree', colsample_bylevel=1,\n colsample_bynode=1, colsample_bytree=1, gamma=0, gpu_id=-1,\n importance_type='gain', interaction_constraints='',\n learning_rate=0.300000012, max_delta_step=0, max_depth=7,\n min_child_weight=1, missing=nan, monotone_constraints='()',\n n_estimators=1000, n_jobs=4, num_parallel_tree=1, random_state=0,\n reg_alpha=0, reg_lambda=1, scale_pos_weight=5.5, subsample=1,\n tree_method='exact', validate_parameters=1, verbosity=None)"
870
+ },
871
+ "execution_count": 42,
872
+ "metadata": {},
873
+ "output_type": "execute_result"
874
+ }
875
+ ],
876
+ "source": [
877
+ "xgb.fit(strat_X, strat_y)"
878
+ ],
879
+ "metadata": {
880
+ "collapsed": false,
881
+ "pycharm": {
882
+ "name": "#%%\n"
883
+ }
884
+ }
885
+ },
886
+ {
887
+ "cell_type": "code",
888
+ "execution_count": 43,
889
+ "outputs": [],
890
+ "source": [
891
+ "y=xgb.predict(strat_test)"
892
+ ],
893
+ "metadata": {
894
+ "collapsed": false,
895
+ "pycharm": {
896
+ "name": "#%%\n"
897
+ }
898
+ }
899
+ },
900
+ {
901
+ "cell_type": "code",
902
+ "execution_count": 44,
903
+ "outputs": [
904
+ {
905
+ "data": {
906
+ "text/plain": "0.955223880597015"
907
+ },
908
+ "execution_count": 44,
909
+ "metadata": {},
910
+ "output_type": "execute_result"
911
+ }
912
+ ],
913
+ "source": [
914
+ "f1_score(y, strat_ytest)"
915
+ ],
916
+ "metadata": {
917
+ "collapsed": false,
918
+ "pycharm": {
919
+ "name": "#%%\n"
920
+ }
921
+ }
922
+ },
923
+ {
924
+ "cell_type": "code",
925
+ "execution_count": 45,
926
+ "outputs": [],
927
+ "source": [
928
+ "import sqlite3"
929
+ ],
930
+ "metadata": {
931
+ "collapsed": false,
932
+ "pycharm": {
933
+ "name": "#%%\n"
934
+ }
935
+ }
936
+ },
937
+ {
938
+ "cell_type": "code",
939
+ "execution_count": 46,
940
+ "outputs": [],
941
+ "source": [
942
+ "conn = sqlite3.connect(\"diabetes.db\")\n",
943
+ "c = conn.cursor()"
944
+ ],
945
+ "metadata": {
946
+ "collapsed": false,
947
+ "pycharm": {
948
+ "name": "#%%\n"
949
+ }
950
+ }
951
+ },
952
+ {
953
+ "cell_type": "code",
954
+ "execution_count": 47,
955
+ "outputs": [
956
+ {
957
+ "data": {
958
+ "text/plain": "('patient_number',\n 'cholesterol',\n 'glucose',\n 'hdl_chol',\n 'chol_hdl_ratio',\n 'age',\n 'gender',\n 'height',\n 'weight',\n 'bmi',\n 'systolic_bp',\n 'diastolic_bp',\n 'waist',\n 'hip',\n 'waist_hip_ratio',\n 'diabetes')"
959
+ },
960
+ "execution_count": 47,
961
+ "metadata": {},
962
+ "output_type": "execute_result"
963
+ }
964
+ ],
965
+ "source": [
966
+ "col= tuple(dia.columns)\n",
967
+ "col"
968
+ ],
969
+ "metadata": {
970
+ "collapsed": false,
971
+ "pycharm": {
972
+ "name": "#%%\n"
973
+ }
974
+ }
975
+ },
976
+ {
977
+ "cell_type": "code",
978
+ "execution_count": 48,
979
+ "outputs": [],
980
+ "source": [
981
+ "conn.commit()"
982
+ ],
983
+ "metadata": {
984
+ "collapsed": false,
985
+ "pycharm": {
986
+ "name": "#%%\n"
987
+ }
988
+ }
989
+ },
990
+ {
991
+ "cell_type": "code",
992
+ "execution_count": 49,
993
+ "outputs": [],
994
+ "source": [
995
+ "dia.to_sql(name=\"diabetes.db\", con=conn, if_exists= \"replace\", index=False)"
996
+ ],
997
+ "metadata": {
998
+ "collapsed": false,
999
+ "pycharm": {
1000
+ "name": "#%%\n"
1001
+ }
1002
+ }
1003
+ }
1004
+ ],
1005
+ "metadata": {
1006
+ "kernelspec": {
1007
+ "display_name": "Python 3",
1008
+ "language": "python",
1009
+ "name": "python3"
1010
+ },
1011
+ "language_info": {
1012
+ "codemirror_mode": {
1013
+ "name": "ipython",
1014
+ "version": 2
1015
+ },
1016
+ "file_extension": ".py",
1017
+ "mimetype": "text/x-python",
1018
+ "name": "python",
1019
+ "nbconvert_exporter": "python",
1020
+ "pygments_lexer": "ipython2",
1021
+ "version": "2.7.6"
1022
+ },
1023
+ "colab": {
1024
+ "provenance": []
1025
+ }
1026
+ },
1027
+ "nbformat": 4,
1028
+ "nbformat_minor": 0
1029
+ }
model.py ADDED
@@ -0,0 +1,60 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ # importing python libraries
2
+ import pandas as pd
3
+ import pickle as pkl
4
+ from lightgbm.sklearn import LGBMClassifier
5
+ from sklearn.model_selection import StratifiedShuffleSplit
6
+ from sklearn.preprocessing import RobustScaler, OrdinalEncoder
7
+ from sklearn.metrics import f1_score
8
+
9
+ import warnings
10
+ warnings.filterwarnings("ignore")
11
+
12
+ # loading diabetes data into variable data
13
+ data = pd.read_csv("./dataset/diabetes.csv")
14
+
15
+ # wrangling dataset.
16
+ data.chol_hdl_ratio = round(data.cholesterol / data.hdl_chol, 2)
17
+ data.waist_hip_ratio = round(data.waist / data.hip, 2)
18
+
19
+ # correcting comma separated number to decimal separated number.
20
+ data.bmi = pd.to_numeric(data.bmi.str.replace(",", "."))
21
+
22
+ print(data.head())
23
+ # encoding columns with object values using Ordinal Encoding
24
+ s = (data.dtypes == "object")
25
+ obj_col = s[s].index
26
+
27
+ print("Ordinal Encoding")
28
+ orde = OrdinalEncoder()
29
+ data[obj_col] = orde.fit_transform(data[obj_col])
30
+
31
+ print("Splitting features and target.")
32
+ # dropping off target and unnecessary columns (diabetes and patient number columns)
33
+ X = data.drop(["patient_number", "diabetes"], axis=1)
34
+ y = data.diabetes
35
+
36
+ print("Robust Scaling on X, y.")
37
+ # scaling data using RobustScaler
38
+ scale = RobustScaler()
39
+ scaled_X = scale.fit_transform(X, y)
40
+
41
+ print("Stratified Split.")
42
+ # StratifiedShuffleSplit on Data
43
+ split = StratifiedShuffleSplit(n_splits=4, random_state=42)
44
+
45
+ for train_index, test_index in split.split(scaled_X, y):
46
+ X_train, X_test = scaled_X[train_index], scaled_X[test_index]
47
+ y_train, y_test = y[train_index], y[test_index]
48
+
49
+ # Loading LightGBM classifier to be used for training model
50
+ lgbm = LGBMClassifier(n_estimators=200, max_depth=-2, random_state=42)
51
+ lgbm.fit(X_train, y_train)
52
+ pred = lgbm.predict(X_test)
53
+
54
+ f1 = f1_score(pred, y_test)
55
+ print(f"F1 Score for LightGBM: {f1}.")
56
+
57
+ # Using pickle to save model
58
+ lightgbm = open("../deployment/lightgbm.pickle", "wb")
59
+ pkl.dump(lgbm, lightgbm)
60
+ lightgbm.close()