Spaces:
Build error
Build error
Upload 3 files
Browse files- diabetes.csv +391 -0
- model.ipynb +1029 -0
- model.py +60 -0
diabetes.csv
ADDED
@@ -0,0 +1,391 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
patient_number,cholesterol,glucose,hdl_chol,chol_hdl_ratio,age,gender,height,weight,bmi,systolic_bp,diastolic_bp,waist,hip,waist_hip_ratio,diabetes
|
2 |
+
1,193,77,49,"3,9",19,female,61,119,"22,5",118,70,32,38,"0,84",No diabetes
|
3 |
+
2,146,79,41,"3,6",19,female,60,135,"26,4",108,58,33,40,"0,83",No diabetes
|
4 |
+
3,217,75,54,4,20,female,67,187,"29,3",110,72,40,45,"0,89",No diabetes
|
5 |
+
4,226,97,70,"3,2",20,female,64,114,"19,6",122,64,31,39,"0,79",No diabetes
|
6 |
+
5,164,91,67,"2,4",20,female,70,141,"20,2",122,86,32,39,"0,82",No diabetes
|
7 |
+
6,170,69,64,"2,7",20,female,64,161,"27,6",108,70,37,40,"0,93",No diabetes
|
8 |
+
7,149,77,49,3,20,female,62,115,21,105,82,31,37,"0,84",No diabetes
|
9 |
+
8,164,71,63,"2,6",20,male,72,145,"19,7",108,78,29,36,"0,81",No diabetes
|
10 |
+
9,230,112,64,"3,6",20,male,67,159,"24,9",100,90,31,39,"0,79",No diabetes
|
11 |
+
10,179,105,60,3,20,female,58,170,"35,5",140,100,34,46,"0,74",No diabetes
|
12 |
+
11,174,105,117,"1,5",20,male,70,187,"26,8",132,86,37,41,"0,9",No diabetes
|
13 |
+
12,193,106,63,"3,1",20,female,68,274,"41,7",165,110,49,58,"0,84",No diabetes
|
14 |
+
13,132,99,34,"3,9",21,female,65,169,"28,1",112,62,39,43,"0,91",No diabetes
|
15 |
+
14,203,84,75,"2,7",21,female,63,142,"25,2",125,85,28,39,"0,72",No diabetes
|
16 |
+
15,135,88,47,"2,9",21,male,69,155,"22,9",110,68,31,39,"0,79",No diabetes
|
17 |
+
16,187,84,64,"2,9",21,female,63,158,28,138,88,39,43,"0,91",No diabetes
|
18 |
+
17,244,89,92,"2,7",21,male,71,163,"22,7",116,76,34,39,"0,87",No diabetes
|
19 |
+
18,193,75,49,"3,9",21,female,61,220,"41,6",130,82,40,52,"0,77",No diabetes
|
20 |
+
19,165,76,46,"3,6",22,female,63,114,"20,2",112,78,28,35,"0,8",No diabetes
|
21 |
+
20,172,70,36,"4,8",22,female,64,148,"25,4",90,48,35,38,"0,92",No diabetes
|
22 |
+
21,217,81,60,"3,6",22,female,71,223,"31,1",120,75,46,50,"0,92",No diabetes
|
23 |
+
22,223,75,85,"2,6",22,female,62,137,"25,1",120,70,28,35,"0,8",No diabetes
|
24 |
+
23,136,81,51,"2,7",22,female,66,160,"25,8",105,85,35,40,"0,88",No diabetes
|
25 |
+
24,175,91,42,"4,2",23,female,65,235,"39,1",110,80,44,50,"0,88",No diabetes
|
26 |
+
25,230,86,37,"6,2",23,male,71,277,"38,6",150,99,50,49,"1,02",No diabetes
|
27 |
+
26,147,78,42,"3,5",23,female,61,185,35,127,71,43,47,"0,91",No diabetes
|
28 |
+
27,229,91,43,"5,3",23,male,72,180,"24,4",110,78,34,41,"0,83",No diabetes
|
29 |
+
28,179,75,36,5,23,female,65,183,"30,4",120,80,43,45,"0,96",No diabetes
|
30 |
+
29,185,76,58,"3,2",23,male,76,164,20,124,78,32,40,"0,8",No diabetes
|
31 |
+
30,164,86,40,"4,1",23,female,69,245,"36,2",126,75,44,47,"0,94",No diabetes
|
32 |
+
31,228,66,45,"5,1",24,female,61,113,"21,3",100,70,33,38,"0,87",No diabetes
|
33 |
+
32,199,87,63,"3,2",25,male,66,118,19,120,78,32,34,"0,94",No diabetes
|
34 |
+
33,134,101,36,"3,7",25,female,63,245,"43,4",142,78,47,58,"0,81",No diabetes
|
35 |
+
34,169,104,58,"2,9",25,female,60,154,"30,1",140,95,40,42,"0,95",No diabetes
|
36 |
+
35,227,98,66,"3,4",25,male,71,162,"22,6",123,82,35,39,"0,9",No diabetes
|
37 |
+
36,149,138,50,3,26,female,62,174,"31,8",148,92,38,46,"0,83",No diabetes
|
38 |
+
37,155,58,69,"2,2",26,male,73,174,23,110,76,30,35,"0,86",No diabetes
|
39 |
+
38,179,90,60,3,26,female,60,130,"25,4",138,84,32,40,"0,8",No diabetes
|
40 |
+
39,283,83,74,"3,8",26,male,72,227,"30,8",158,104,41,44,"0,93",No diabetes
|
41 |
+
40,228,79,37,"6,2",26,male,72,259,"35,1",122,90,48,49,"0,98",No diabetes
|
42 |
+
41,220,60,66,"3,3",26,male,70,150,"21,5",136,88,33,39,"0,85",Diabetes
|
43 |
+
42,170,76,60,"2,8",27,female,63,119,"21,1",122,86,28,37,"0,76",No diabetes
|
44 |
+
43,201,100,46,"4,4",27,female,65,145,"24,1",121,75,32,35,"0,91",No diabetes
|
45 |
+
44,238,75,36,"6,6",27,female,60,170,"33,2",130,80,35,41,"0,85",No diabetes
|
46 |
+
45,190,92,44,"4,3",27,female,65,210,"34,9",150,106,39,47,"0,83",No diabetes
|
47 |
+
46,203,94,62,"3,3",27,female,67,209,"32,7",140,80,34,43,"0,79",No diabetes
|
48 |
+
47,226,100,65,"3,5",27,male,69,289,"42,7",130,100,48,51,"0,94",No diabetes
|
49 |
+
48,204,62,70,"2,9",27,female,67,185,29,110,90,35,44,"0,8",No diabetes
|
50 |
+
49,166,77,68,"2,4",27,male,72,141,"19,1",110,58,33,38,"0,87",No diabetes
|
51 |
+
50,241,92,40,6,27,female,63,179,"31,7",120,75,40,42,"0,95",No diabetes
|
52 |
+
51,164,94,58,"2,8",28,female,67,180,"28,2",128,94,39,43,"0,91",No diabetes
|
53 |
+
52,214,111,59,"3,6",28,male,68,204,31,130,90,40,41,"0,98",No diabetes
|
54 |
+
53,151,74,47,"3,2",28,male,69,130,"19,2",135,75,29,35,"0,83",No diabetes
|
55 |
+
54,184,99,36,"5,1",28,male,67,154,"24,1",124,94,35,38,"0,92",No diabetes
|
56 |
+
55,168,69,45,"3,7",28,female,63,200,"35,4",111,65,42,46,"0,91",No diabetes
|
57 |
+
56,146,77,60,"2,4",28,female,64,126,"21,6",120,90,28,32,"0,88",No diabetes
|
58 |
+
57,189,96,47,4,28,female,64,200,"34,3",136,52,38,45,"0,84",No diabetes
|
59 |
+
58,132,83,40,"3,3",28,female,68,225,"34,2",136,86,41,52,"0,79",No diabetes
|
60 |
+
59,179,77,50,"3,6",29,male,68,170,"25,8",122,68,38,39,"0,97",No diabetes
|
61 |
+
60,135,88,34,4,29,female,65,123,"20,5",118,61,26,37,"0,7",No diabetes
|
62 |
+
61,163,69,48,"3,4",29,female,62,99,"18,1",125,60,30,36,"0,83",No diabetes
|
63 |
+
62,204,71,55,"3,7",29,female,64,120,"20,6",110,70,33,38,"0,87",No diabetes
|
64 |
+
63,165,97,24,"6,9",29,female,64,218,"37,4",112,68,46,48,"0,96",No diabetes
|
65 |
+
64,181,101,44,"4,1",29,male,68,180,"27,4",130,78,38,42,"0,9",No diabetes
|
66 |
+
65,194,269,38,"5,1",29,female,69,167,"24,7",120,70,33,40,"0,83",Diabetes
|
67 |
+
66,158,74,64,"2,5",30,female,62,142,26,108,68,32,34,"0,94",No diabetes
|
68 |
+
67,160,82,41,"3,9",30,female,63,143,"25,3",172,124,33,40,"0,83",No diabetes
|
69 |
+
68,181,77,46,"3,9",30,female,66,257,"41,5",162,108,47,55,"0,85",No diabetes
|
70 |
+
69,144,81,28,"5,1",30,male,72,165,"22,4",118,78,31,38,"0,82",No diabetes
|
71 |
+
70,182,74,44,"4,1",30,female,62,125,"22,9",132,80,31,39,"0,79",No diabetes
|
72 |
+
71,145,84,54,"2,7",30,female,65,165,"27,5",102,56,33,42,"0,79",No diabetes
|
73 |
+
72,195,92,41,"4,8",30,male,69,191,"28,2",161,112,46,49,"0,94",No diabetes
|
74 |
+
73,207,75,44,"4,7",30,male,72,180,"24,4",118,62,35,41,"0,85",No diabetes
|
75 |
+
74,192,124,31,"5,6",30,male,72,250,"33,9",142,79,43,51,"0,84",No diabetes
|
76 |
+
75,183,69,51,"3,6",31,female,66,190,"30,7",125,70,41,47,"0,87",No diabetes
|
77 |
+
76,176,92,55,"3,2",31,female,62,145,"26,5",110,72,36,42,"0,86",No diabetes
|
78 |
+
77,163,83,57,"2,9",31,female,65,120,20,136,86,29,40,"0,73",No diabetes
|
79 |
+
78,188,77,45,"4,2",31,female,67,227,"35,5",122,70,47,53,"0,89",No diabetes
|
80 |
+
79,209,89,43,"4,9",31,female,67,160,"25,1",108,58,30,44,"0,68",No diabetes
|
81 |
+
80,179,77,72,"2,5",31,male,66,145,"23,4",131,79,33,38,"0,87",No diabetes
|
82 |
+
81,293,85,94,"3,1",31,female,67,200,"31,3",110,90,41,42,"0,98",No diabetes
|
83 |
+
82,305,91,44,"6,9",31,male,71,211,"29,4",100,60,40,45,"0,89",No diabetes
|
84 |
+
83,191,155,58,"3,3",31,female,62,237,"43,3",140,87,53,56,"0,95",Diabetes
|
85 |
+
84,155,81,70,"2,2",32,female,65,151,"25,1",120,68,33,40,"0,83",No diabetes
|
86 |
+
85,179,85,52,"3,4",32,female,62,179,"32,7",140,96,37,47,"0,79",No diabetes
|
87 |
+
86,176,90,34,"5,2",32,female,63,252,"44,6",100,72,45,58,"0,78",No diabetes
|
88 |
+
87,244,101,36,"6,8",32,male,70,212,"30,4",132,90,39,44,"0,89",No diabetes
|
89 |
+
88,213,83,47,"4,5",33,female,65,157,"26,1",130,90,37,41,"0,9",No diabetes
|
90 |
+
89,217,87,40,"5,4",33,female,62,186,34,140,90,42,46,"0,91",No diabetes
|
91 |
+
90,151,85,48,"3,1",33,male,69,308,"45,5",110,90,52,58,"0,9",No diabetes
|
92 |
+
91,168,82,44,"3,8",33,female,66,118,19,98,66,29,35,"0,83",No diabetes
|
93 |
+
92,231,84,91,"2,5",33,male,69,163,"24,1",140,70,35,38,"0,92",No diabetes
|
94 |
+
93,262,93,43,"6,1",33,female,63,170,"30,1",110,68,33,46,"0,72",No diabetes
|
95 |
+
94,179,70,52,"3,4",34,male,72,170,"23,1",138,82,31,39,"0,79",No diabetes
|
96 |
+
95,300,65,59,"5,1",34,female,65,160,"26,6",120,60,40,47,"0,85",No diabetes
|
97 |
+
96,248,94,69,"3,6",34,male,71,190,"26,5",132,86,36,42,"0,86",No diabetes
|
98 |
+
97,217,88,40,"5,4",34,male,73,219,"28,9",145,100,41,42,"0,98",No diabetes
|
99 |
+
98,224,71,42,"5,3",34,female,60,165,"32,2",135,80,34,46,"0,74",No diabetes
|
100 |
+
99,171,85,61,"2,8",34,female,63,164,29,120,80,34,43,"0,79",No diabetes
|
101 |
+
100,174,90,36,"4,8",34,male,71,210,"29,3",142,92,37,43,"0,86",No diabetes
|
102 |
+
101,194,86,67,"2,9",35,male,66,159,"25,7",115,64,31,35,"0,89",No diabetes
|
103 |
+
102,150,80,38,"3,9",35,male,73,179,"23,6",138,92,32,37,"0,86",No diabetes
|
104 |
+
103,337,85,62,"5,4",35,male,72,189,"25,6",124,84,36,44,"0,82",No diabetes
|
105 |
+
104,239,97,55,"4,3",35,male,74,170,"21,8",122,62,32,38,"0,84",No diabetes
|
106 |
+
105,218,126,32,"6,8",35,male,69,169,25,139,90,39,41,"0,95",No diabetes
|
107 |
+
106,122,82,43,"2,8",36,female,71,183,"25,5",110,80,41,45,"0,91",No diabetes
|
108 |
+
107,225,83,42,"5,4",36,male,67,192,"30,1",149,89,40,42,"0,95",No diabetes
|
109 |
+
108,160,71,44,"3,6",36,female,64,185,"31,8",110,80,39,45,"0,87",No diabetes
|
110 |
+
109,191,76,30,"6,4",36,male,69,183,27,100,66,36,40,"0,9",No diabetes
|
111 |
+
110,199,153,77,"2,6",36,female,66,255,"41,2",118,66,47,52,"0,9",No diabetes
|
112 |
+
111,197,92,46,"4,3",36,female,64,136,"23,3",128,80,32,37,"0,86",No diabetes
|
113 |
+
112,179,81,35,"5,1",36,female,63,125,"22,1",110,76,33,36,"0,92",No diabetes
|
114 |
+
113,186,74,76,"2,4",36,male,69,150,"22,1",138,82,31,38,"0,82",No diabetes
|
115 |
+
114,178,78,59,3,36,male,70,161,"23,1",130,79,34,40,"0,85",No diabetes
|
116 |
+
115,224,85,30,"7,5",36,male,69,205,"30,3",150,99,37,41,"0,9",No diabetes
|
117 |
+
116,194,81,36,"5,4",36,male,64,126,"21,6",110,76,30,34,"0,88",No diabetes
|
118 |
+
117,347,197,42,"8,3",36,male,70,277,"39,7",140,86,51,49,"1,04",No diabetes
|
119 |
+
118,245,119,26,"9,4",36,male,66,179,"28,9",150,92,37,42,"0,88",Diabetes
|
120 |
+
119,227,75,44,"5,2",37,male,59,170,"34,3",140,84,34,39,"0,87",No diabetes
|
121 |
+
120,192,89,30,"6,4",37,male,71,195,"27,2",136,96,36,43,"0,84",No diabetes
|
122 |
+
121,215,64,84,"2,6",37,female,59,148,"29,9",140,100,32,42,"0,76",No diabetes
|
123 |
+
122,214,67,47,"4,6",37,female,64,145,"24,9",108,76,34,42,"0,81",No diabetes
|
124 |
+
123,243,52,59,"4,1",37,female,64,233,40,110,82,49,57,"0,86",No diabetes
|
125 |
+
124,156,86,34,"4,6",37,female,67,212,"33,2",122,74,48,51,"0,94",No diabetes
|
126 |
+
125,179,85,50,"3,6",37,male,66,136,"21,9",190,94,33,39,"0,85",No diabetes
|
127 |
+
126,209,91,36,"5,8",37,male,70,262,"37,6",130,94,42,48,"0,88",No diabetes
|
128 |
+
127,232,87,30,"7,7",37,male,68,252,"38,3",140,95,43,47,"0,91",No diabetes
|
129 |
+
128,212,88,36,"5,9",37,female,64,160,"27,5",124,82,37,45,"0,82",No diabetes
|
130 |
+
129,199,130,48,"4,1",37,female,61,203,"38,4",136,84,42,51,"0,82",No diabetes
|
131 |
+
130,145,85,29,5,38,female,60,125,"24,4",132,82,31,35,"0,89",No diabetes
|
132 |
+
131,206,90,38,"5,4",38,female,69,167,"24,7",138,90,36,47,"0,77",No diabetes
|
133 |
+
132,147,86,34,"4,3",38,male,69,205,"30,3",130,96,39,41,"0,95",No diabetes
|
134 |
+
133,302,81,57,"5,3",38,female,67,222,"34,8",128,82,41,51,"0,8",No diabetes
|
135 |
+
134,138,95,40,"3,5",38,female,60,138,"26,9",140,90,31,39,"0,79",No diabetes
|
136 |
+
135,215,128,34,"6,3",38,female,58,195,"40,8",102,68,42,50,"0,84",No diabetes
|
137 |
+
136,159,88,43,"3,7",38,male,68,169,"25,7",138,79,34,40,"0,85",No diabetes
|
138 |
+
137,268,90,48,"5,6",38,female,63,181,"32,1",142,100,38,46,"0,83",No diabetes
|
139 |
+
138,251,118,38,"6,6",38,female,64,248,"42,6",110,80,49,58,"0,84",No diabetes
|
140 |
+
139,216,155,30,"7,2",38,male,68,145,22,110,60,34,37,"0,92",No diabetes
|
141 |
+
140,203,299,43,"4,7",38,female,69,288,"42,5",136,83,48,55,"0,87",Diabetes
|
142 |
+
141,181,83,44,"4,1",39,female,66,255,"41,2",140,98,46,54,"0,85",No diabetes
|
143 |
+
142,239,85,63,"3,8",39,male,60,144,"28,1",162,90,33,42,"0,79",No diabetes
|
144 |
+
143,200,56,51,"3,9",40,female,62,105,"19,2",125,64,26,33,"0,79",No diabetes
|
145 |
+
144,211,98,40,"5,3",40,female,68,179,"27,2",110,76,37,43,"0,86",No diabetes
|
146 |
+
145,152,103,32,"4,8",40,female,52,187,"48,6",148,82,38,49,"0,78",No diabetes
|
147 |
+
146,173,83,37,"4,7",40,female,62,130,"23,8",122,76,37,38,"0,97",No diabetes
|
148 |
+
147,215,72,42,"5,1",40,male,70,189,"27,1",180,122,37,39,"0,95",No diabetes
|
149 |
+
148,219,105,63,"3,5",40,female,62,153,28,106,82,36,44,"0,82",No diabetes
|
150 |
+
149,180,76,46,"3,9",40,female,64,146,"25,1",128,82,37,43,"0,86",No diabetes
|
151 |
+
150,214,77,48,"4,5",40,male,72,222,"30,1",120,84,40,44,"0,91",No diabetes
|
152 |
+
151,171,92,54,"3,2",40,male,71,214,"29,8",138,94,41,39,"1,05",No diabetes
|
153 |
+
152,183,79,46,4,40,female,59,165,"33,3",135,84,37,43,"0,86",No diabetes
|
154 |
+
153,184,92,36,"5,1",40,female,63,285,"50,5",142,98,50,60,"0,83",No diabetes
|
155 |
+
154,180,84,69,"2,6",40,female,68,264,"40,1",142,98,43,54,"0,8",No diabetes
|
156 |
+
155,191,74,33,"5,8",40,male,72,270,"36,6",136,70,45,49,"0,92",No diabetes
|
157 |
+
156,218,87,38,"5,7",40,male,73,200,"26,4",120,76,38,41,"0,93",No diabetes
|
158 |
+
157,169,85,51,"3,3",40,female,65,180,30,106,82,40,44,"0,91",No diabetes
|
159 |
+
158,267,133,34,"7,9",40,female,59,204,"41,2",118,69,40,47,"0,85",Diabetes
|
160 |
+
159,234,78,54,"4,3",41,male,67,183,"28,7",122,96,38,40,"0,95",No diabetes
|
161 |
+
160,206,112,33,"6,2",41,female,62,184,"33,7",104,80,39,44,"0,89",No diabetes
|
162 |
+
161,184,79,39,"4,7",41,male,69,154,"22,7",136,96,34,39,"0,87",No diabetes
|
163 |
+
162,178,64,52,"3,4",41,female,65,188,"31,3",130,76,35,46,"0,76",No diabetes
|
164 |
+
163,179,80,92,"1,9",41,female,72,118,16,144,112,28,36,"0,78",No diabetes
|
165 |
+
164,225,84,82,"2,7",41,male,71,156,"21,8",150,80,31,40,"0,78",No diabetes
|
166 |
+
165,189,84,46,"4,1",41,female,63,153,"27,1",130,80,32,40,"0,8",No diabetes
|
167 |
+
166,199,76,52,"3,8",41,female,63,197,"34,9",120,78,41,48,"0,85",No diabetes
|
168 |
+
167,241,86,63,"3,8",41,female,59,139,"28,1",112,72,29,39,"0,74",No diabetes
|
169 |
+
168,170,106,42,4,41,female,61,110,"20,8",103,64,29,30,"0,97",No diabetes
|
170 |
+
169,269,59,66,"4,1",41,male,67,191,"29,9",130,73,38,41,"0,93",No diabetes
|
171 |
+
170,269,73,34,"7,9",41,female,62,160,"29,3",126,90,39,41,"0,95",No diabetes
|
172 |
+
171,270,73,40,"6,8",42,male,66,185,"29,9",146,94,39,41,"0,95",No diabetes
|
173 |
+
172,172,101,46,"3,7",42,female,65,165,"27,5",118,68,33,45,"0,73",No diabetes
|
174 |
+
173,193,77,45,"4,3",42,female,75,186,"23,2",125,90,37,46,"0,8",No diabetes
|
175 |
+
174,199,81,36,"5,5",42,female,67,235,"36,8",178,100,47,52,"0,9",No diabetes
|
176 |
+
175,177,101,36,"4,9",42,female,65,174,29,146,94,37,40,"0,93",No diabetes
|
177 |
+
176,191,81,53,"3,6",42,female,61,156,"29,5",138,84,36,42,"0,86",No diabetes
|
178 |
+
177,208,122,51,"4,1",42,female,62,141,"25,8",118,78,33,40,"0,83",No diabetes
|
179 |
+
178,188,84,46,"4,1",43,female,66,152,"24,5",122,80,37,41,"0,9",No diabetes
|
180 |
+
179,243,74,42,"5,8",43,female,64,239,41,128,90,48,53,"0,91",No diabetes
|
181 |
+
180,173,85,58,3,43,female,69,210,31,130,75,44,47,"0,94",No diabetes
|
182 |
+
181,162,76,40,"4,1",43,male,67,216,"33,8",100,70,41,44,"0,93",No diabetes
|
183 |
+
182,322,87,92,"3,5",43,female,56,120,"26,9",120,98,32,41,"0,78",No diabetes
|
184 |
+
183,254,84,52,"4,9",43,female,62,145,"26,5",125,70,31,38,"0,82",No diabetes
|
185 |
+
184,160,100,36,"4,4",43,female,64,140,24,180,110,37,40,"0,93",No diabetes
|
186 |
+
185,192,109,44,"4,4",43,female,64,325,"55,8",141,79,53,62,"0,85",No diabetes
|
187 |
+
186,197,120,37,"5,3",43,male,71,179,25,146,98,37,44,"0,84",No diabetes
|
188 |
+
187,237,87,41,"5,8",43,female,64,181,"31,1",104,90,36,46,"0,78",No diabetes
|
189 |
+
188,190,84,44,"4,3",43,female,62,163,"29,8",135,88,40,45,"0,89",No diabetes
|
190 |
+
189,190,228,57,"3,3",43,female,65,198,"32,9",110,64,40,49,"0,82",Diabetes
|
191 |
+
190,202,84,33,"6,1",44,male,68,157,"23,9",125,80,33,37,"0,89",No diabetes
|
192 |
+
191,244,101,39,"6,3",44,male,71,168,"23,4",140,89,36,39,"0,92",No diabetes
|
193 |
+
192,168,101,59,"2,8",44,female,64,160,"27,5",130,88,40,43,"0,93",No diabetes
|
194 |
+
193,260,67,46,"5,7",44,female,62,159,"29,1",140,94,36,43,"0,84",No diabetes
|
195 |
+
194,214,87,35,"6,1",44,female,64,190,"32,6",140,75,38,44,"0,86",No diabetes
|
196 |
+
195,207,187,46,"4,5",44,female,67,201,"31,5",150,74,46,49,"0,94",Diabetes
|
197 |
+
196,203,71,78,"2,6",45,male,66,115,"18,6",135,88,30,34,"0,88",No diabetes
|
198 |
+
197,189,80,40,"4,7",45,male,69,190,"28,1",140,75,39,44,"0,89",No diabetes
|
199 |
+
198,216,109,86,"2,5",45,female,67,147,23,140,102,32,38,"0,84",No diabetes
|
200 |
+
199,233,92,39,6,45,female,64,167,"28,7",124,86,39,44,"0,89",No diabetes
|
201 |
+
200,177,87,49,"3,6",45,male,69,166,"24,5",160,80,34,40,"0,85",No diabetes
|
202 |
+
201,191,83,88,"2,2",45,female,67,151,"23,6",130,90,33,38,"0,87",No diabetes
|
203 |
+
202,142,155,25,"5,7",45,male,69,204,"30,1",165,115,40,43,"0,93",No diabetes
|
204 |
+
203,219,130,44,5,45,male,67,218,"34,1",172,110,41,45,"0,91",Diabetes
|
205 |
+
204,190,107,32,"5,9",46,male,72,205,"27,8",145,88,46,49,"0,94",No diabetes
|
206 |
+
205,203,82,56,"3,6",46,female,62,121,"22,1",118,59,29,38,"0,76",No diabetes
|
207 |
+
206,207,102,43,"4,8",46,female,63,179,"31,7",212,114,38,46,"0,83",No diabetes
|
208 |
+
207,242,108,53,"4,6",46,female,62,183,"33,5",130,86,37,45,"0,82",No diabetes
|
209 |
+
208,183,81,60,"3,1",47,female,66,186,30,140,97,39,44,"0,89",No diabetes
|
210 |
+
209,234,65,76,"3,1",47,male,67,230,36,137,100,45,46,"0,98",No diabetes
|
211 |
+
210,118,95,39,3,47,female,64,123,"21,1",140,76,30,36,"0,83",No diabetes
|
212 |
+
211,266,82,54,"4,9",47,male,68,142,"21,6",118,78,35,39,"0,9",No diabetes
|
213 |
+
212,223,90,48,"4,6",47,female,65,232,"38,6",120,86,46,54,"0,85",No diabetes
|
214 |
+
213,245,120,39,"6,3",47,female,63,156,"27,6",142,102,35,39,"0,9",Diabetes
|
215 |
+
214,173,225,31,"5,6",47,male,73,260,"34,3",150,98,42,47,"0,89",Diabetes
|
216 |
+
215,172,117,56,"3,1",48,female,63,170,"30,1",130,82,35,42,"0,83",No diabetes
|
217 |
+
216,190,74,50,"3,8",48,male,68,100,"15,2",120,85,27,33,"0,82",No diabetes
|
218 |
+
217,134,105,42,"3,2",48,male,70,173,"24,8",178,120,36,40,"0,9",No diabetes
|
219 |
+
218,268,85,51,"5,3",48,male,70,120,"17,2",150,105,32,35,"0,91",No diabetes
|
220 |
+
219,209,87,34,"6,1",48,female,63,121,"21,4",111,62,32,38,"0,84",No diabetes
|
221 |
+
220,201,81,87,"2,3",48,female,68,146,"22,2",145,95,32,41,"0,78",No diabetes
|
222 |
+
221,204,89,56,"3,6",48,male,68,196,"29,8",170,96,38,42,"0,9",No diabetes
|
223 |
+
222,307,87,58,"5,3",49,male,67,181,"28,3",120,80,41,42,"0,98",No diabetes
|
224 |
+
223,189,75,72,"2,6",49,female,62,205,"37,5",120,80,40,49,"0,82",No diabetes
|
225 |
+
224,160,196,33,"4,8",49,male,71,266,"37,1",150,98,49,45,"1,09",Diabetes
|
226 |
+
225,237,233,58,"4,1",49,female,62,189,"34,6",130,90,43,47,"0,91",Diabetes
|
227 |
+
226,158,91,48,"3,3",50,male,71,180,"25,1",136,90,36,40,"0,9",No diabetes
|
228 |
+
227,255,78,38,"6,7",50,female,65,183,"30,4",130,100,37,43,"0,86",No diabetes
|
229 |
+
228,196,115,62,"3,2",50,male,67,140,"21,9",176,110,35,37,"0,95",No diabetes
|
230 |
+
229,185,67,59,"3,1",50,female,64,228,"39,1",142,90,42,54,"0,78",No diabetes
|
231 |
+
230,293,115,54,"5,4",50,male,71,170,"23,7",131,75,34,39,"0,87",No diabetes
|
232 |
+
231,188,88,51,"3,7",50,female,61,147,"27,8",160,66,34,41,"0,83",No diabetes
|
233 |
+
232,174,173,34,"5,1",50,male,70,263,"37,7",159,99,51,64,"0,8",No diabetes
|
234 |
+
233,158,91,31,"5,1",50,male,70,215,"30,8",138,89,40,45,"0,89",No diabetes
|
235 |
+
234,181,255,26,7,50,male,71,320,"44,6",140,86,56,49,"1,14",Diabetes
|
236 |
+
235,140,385,31,"4,5",50,male,69,172,"25,4",138,66,37,41,"0,9",Diabetes
|
237 |
+
236,192,85,69,"2,8",51,male,65,146,"24,3",130,110,33,36,"0,92",No diabetes
|
238 |
+
237,284,89,54,"5,3",51,female,63,154,"27,3",140,100,32,43,"0,74",No diabetes
|
239 |
+
238,222,82,87,"2,6",51,female,66,110,"17,8",150,110,28,37,"0,76",No diabetes
|
240 |
+
239,249,81,28,"8,9",51,female,65,200,"33,3",122,90,43,46,"0,93",No diabetes
|
241 |
+
240,212,79,49,"4,3",51,female,65,145,"24,1",230,120,38,42,"0,9",No diabetes
|
242 |
+
241,215,110,36,6,51,female,67,282,"44,2",142,78,52,59,"0,88",Diabetes
|
243 |
+
242,218,182,54,4,51,female,66,215,"34,7",139,69,42,53,"0,79",Diabetes
|
244 |
+
243,443,185,23,"19,3",51,female,70,235,"33,7",158,98,43,48,"0,9",Diabetes
|
245 |
+
244,218,68,46,"4,7",52,female,62,170,"31,1",142,79,40,43,"0,93",No diabetes
|
246 |
+
245,171,97,69,"2,5",52,male,71,159,"22,2",125,72,33,39,"0,85",No diabetes
|
247 |
+
246,255,83,90,"2,8",52,male,70,120,"17,2",170,110,30,33,"0,91",No diabetes
|
248 |
+
247,182,85,43,"4,2",52,male,68,139,"21,1",130,90,29,35,"0,83",No diabetes
|
249 |
+
248,206,83,68,3,52,male,69,153,"22,6",140,98,36,40,"0,9",No diabetes
|
250 |
+
249,261,101,83,"3,1",52,female,64,198,34,152,92,42,49,"0,86",No diabetes
|
251 |
+
250,204,57,74,"2,8",52,male,75,142,"17,7",140,90,31,35,"0,89",No diabetes
|
252 |
+
251,196,120,67,"2,9",52,female,62,147,"26,9",144,94,34,42,"0,81",Diabetes
|
253 |
+
252,219,78,67,"3,3",53,female,64,179,"30,7",135,100,39,47,"0,83",No diabetes
|
254 |
+
253,273,94,49,"5,6",53,female,64,174,"29,9",160,96,34,43,"0,79",No diabetes
|
255 |
+
254,225,74,36,"6,3",53,female,63,182,"32,2",126,80,38,46,"0,83",No diabetes
|
256 |
+
255,185,84,52,"3,6",53,female,61,145,"27,4",147,72,37,40,"0,93",No diabetes
|
257 |
+
256,242,297,34,"7,1",53,male,69,216,"31,9",142,96,43,45,"0,96",Diabetes
|
258 |
+
257,296,369,46,"6,4",53,male,69,173,"25,5",138,94,35,39,"0,9",Diabetes
|
259 |
+
258,228,76,53,"4,3",54,male,66,170,"27,4",121,62,36,41,"0,88",No diabetes
|
260 |
+
259,194,87,65,3,54,male,69,129,19,170,96,30,37,"0,81",No diabetes
|
261 |
+
260,216,79,46,"4,7",54,female,65,138,23,132,80,33,39,"0,85",No diabetes
|
262 |
+
261,240,96,57,"4,2",54,female,65,175,"29,1",152,100,37,43,"0,86",No diabetes
|
263 |
+
262,148,193,14,"10,6",54,female,67,165,"25,8",140,65,42,42,1,No diabetes
|
264 |
+
263,271,103,90,3,55,female,63,114,"20,2",180,105,30,37,"0,81",No diabetes
|
265 |
+
264,204,94,54,"3,8",55,female,66,202,"32,6",140,90,43,47,"0,91",No diabetes
|
266 |
+
265,174,93,77,"2,3",55,male,70,140,"20,1",118,86,32,33,"0,97",No diabetes
|
267 |
+
266,157,74,47,"3,3",55,female,66,219,"35,3",150,82,43,52,"0,83",No diabetes
|
268 |
+
267,263,89,40,"6,6",55,female,63,202,"35,8",108,72,45,50,"0,9",No diabetes
|
269 |
+
268,160,122,41,"3,9",55,female,67,223,"34,9",136,83,43,48,"0,9",No diabetes
|
270 |
+
269,179,236,63,"2,8",55,male,75,186,"23,2",122,74,38,38,1,Diabetes
|
271 |
+
270,208,95,32,"6,5",56,male,68,183,"27,8",131,75,36,39,"0,92",No diabetes
|
272 |
+
271,129,110,42,"3,1",56,male,74,151,"19,4",140,75,34,38,"0,89",No diabetes
|
273 |
+
272,219,173,31,"7,1",56,female,65,197,"32,8",100,50,41,50,"0,82",Diabetes
|
274 |
+
273,404,206,33,"12,2",56,male,69,159,"23,5",162,88,38,39,"0,97",Diabetes
|
275 |
+
274,138,81,45,"3,1",57,male,73,164,"21,6",148,81,31,37,"0,84",No diabetes
|
276 |
+
275,173,80,57,3,57,male,71,145,"20,2",124,64,31,36,"0,86",No diabetes
|
277 |
+
276,209,176,55,"3,8",57,female,61,150,"28,3",115,68,36,39,"0,92",Diabetes
|
278 |
+
277,228,92,37,"6,2",58,female,61,256,"48,4",190,92,49,57,"0,86",No diabetes
|
279 |
+
278,227,85,26,"8,7",58,male,70,211,"30,3",144,82,38,43,"0,88",No diabetes
|
280 |
+
279,201,106,53,"3,8",58,male,66,215,"34,7",186,102,46,44,"1,05",No diabetes
|
281 |
+
280,251,94,36,7,58,female,63,154,"27,3",174,75,38,41,"0,93",No diabetes
|
282 |
+
281,211,48,34,"6,2",58,male,67,177,"27,7",162,78,38,43,"0,88",No diabetes
|
283 |
+
282,115,239,36,"3,2",58,male,69,200,"29,5",125,69,30,37,"0,81",Diabetes
|
284 |
+
283,204,113,35,"5,8",59,male,73,187,"24,7",148,76,38,37,"1,03",No diabetes
|
285 |
+
284,215,97,46,"4,7",59,female,63,176,"31,2",140,70,34,44,"0,77",No diabetes
|
286 |
+
285,221,126,48,"4,6",59,female,62,177,"32,4",130,78,39,45,"0,87",No diabetes
|
287 |
+
286,220,95,58,"3,8",59,female,66,138,"22,3",138,80,32,38,"0,84",No diabetes
|
288 |
+
287,193,248,24,8,59,female,66,189,"30,5",140,90,38,45,"0,84",Diabetes
|
289 |
+
288,195,108,46,"4,2",59,female,67,172,"26,9",150,102,38,43,"0,88",Diabetes
|
290 |
+
289,219,112,73,3,59,male,66,170,"27,4",146,92,37,40,"0,93",Diabetes
|
291 |
+
290,289,267,38,"7,6",59,male,68,169,"25,7",142,79,36,38,"0,95",Diabetes
|
292 |
+
291,198,92,62,"3,2",60,male,70,163,"23,4",126,78,36,40,"0,9",No diabetes
|
293 |
+
292,192,56,42,"4,6",60,female,62,134,"24,5",130,70,31,40,"0,78",No diabetes
|
294 |
+
293,242,82,54,"4,5",60,female,65,156,26,130,90,39,45,"0,87",No diabetes
|
295 |
+
294,235,102,42,"5,6",60,male,69,186,"27,5",148,98,40,42,"0,95",No diabetes
|
296 |
+
295,277,119,62,"4,5",60,female,61,128,"24,2",140,86,33,39,"0,85",No diabetes
|
297 |
+
296,162,90,46,"3,5",60,female,63,121,"21,4",110,64,32,34,"0,94",No diabetes
|
298 |
+
297,318,270,108,"2,9",60,female,65,167,"27,8",132,72,38,44,"0,86",No diabetes
|
299 |
+
298,279,270,40,7,60,female,68,224,"34,1",174,90,48,50,"0,96",Diabetes
|
300 |
+
299,128,223,24,"5,3",60,male,67,196,"30,7",110,68,42,43,"0,98",Diabetes
|
301 |
+
300,203,90,51,4,60,female,59,123,"24,8",130,72,36,41,"0,88",Diabetes
|
302 |
+
301,143,91,37,"3,9",61,female,65,220,"36,6",160,92,40,50,"0,8",No diabetes
|
303 |
+
302,300,103,44,"6,8",61,female,67,169,"26,5",138,78,40,44,"0,91",No diabetes
|
304 |
+
303,206,94,44,"4,7",61,female,63,199,"35,2",180,96,41,47,"0,87",No diabetes
|
305 |
+
304,182,85,37,"4,9",61,female,69,174,"25,7",176,86,49,43,"1,14",No diabetes
|
306 |
+
305,198,86,66,3,61,male,74,152,"19,5",138,76,33,38,"0,87",No diabetes
|
307 |
+
306,211,225,29,"7,3",61,female,63,144,"25,5",190,100,40,42,"0,95",Diabetes
|
308 |
+
307,265,330,34,"7,8",61,male,74,191,"24,5",170,88,39,41,"0,95",Diabetes
|
309 |
+
308,204,128,61,"3,3",62,male,68,180,"27,4",141,81,38,41,"0,93",No diabetes
|
310 |
+
309,169,95,29,"5,8",62,male,66,251,"40,5",118,72,50,47,"1,06",No diabetes
|
311 |
+
310,236,102,36,"6,6",62,male,76,160,"19,5",150,80,35,39,"0,9",No diabetes
|
312 |
+
311,235,109,59,4,62,female,63,290,"51,4",175,80,55,62,"0,89",Diabetes
|
313 |
+
312,196,206,41,"4,8",62,female,65,196,"32,6",178,90,46,51,"0,9",Diabetes
|
314 |
+
313,180,92,34,"5,3",63,male,69,169,25,145,72,35,39,"0,9",No diabetes
|
315 |
+
314,194,54,57,"3,4",63,male,70,181,26,184,76,37,42,"0,88",No diabetes
|
316 |
+
315,194,80,34,"5,7",63,male,73,175,"23,1",131,88,34,39,"0,87",No diabetes
|
317 |
+
316,212,82,68,"3,1",63,male,70,161,"23,1",180,110,37,40,"0,93",No diabetes
|
318 |
+
317,293,87,120,"2,4",63,female,64,179,"30,7",142,80,47,45,"1,04",No diabetes
|
319 |
+
318,194,95,36,"5,4",63,female,58,210,"43,9",140,100,44,53,"0,83",No diabetes
|
320 |
+
319,277,88,45,"6,2",63,female,64,223,"38,3",220,100,45,54,"0,83",No diabetes
|
321 |
+
320,157,91,34,"4,6",63,male,69,166,"24,5",106,82,39,38,"1,03",No diabetes
|
322 |
+
321,283,145,39,"7,3",63,female,61,200,"37,8",190,110,44,48,"0,92",Diabetes
|
323 |
+
322,215,119,44,"3,9",63,female,63,158,28,160,68,34,42,"0,81",Diabetes
|
324 |
+
323,342,251,48,"7,1",63,female,65,201,"33,4",178,88,45,46,"0,98",Diabetes
|
325 |
+
324,202,81,55,"3,7",64,female,62,167,"30,5",190,118,44,47,"0,94",No diabetes
|
326 |
+
325,255,100,34,"7,5",64,male,68,227,"34,5",134,74,44,47,"0,94",No diabetes
|
327 |
+
326,181,177,24,"7,5",64,male,71,225,"31,4",130,66,44,47,"0,94",Diabetes
|
328 |
+
327,249,90,28,"8,9",64,male,68,183,"27,8",138,80,44,41,"1,07",Diabetes
|
329 |
+
328,249,197,44,"5,7",64,female,63,159,"28,2",151,85,33,41,"0,8",Diabetes
|
330 |
+
329,219,106,50,"4,4",65,female,63,233,"41,3",140,90,40,53,"0,75",No diabetes
|
331 |
+
330,229,95,74,"3,1",65,female,62,151,"27,6",125,64,37,42,"0,88",No diabetes
|
332 |
+
331,212,97,45,"4,7",65,female,61,187,"35,3",158,94,43,47,"0,91",No diabetes
|
333 |
+
332,170,67,33,"5,2",65,male,69,182,"26,9",140,65,42,39,"1,08",No diabetes
|
334 |
+
333,159,172,28,"5,7",65,male,70,181,26,142,81,43,49,"0,88",Diabetes
|
335 |
+
334,224,341,33,"6,8",65,male,67,197,"30,9",160,80,42,43,"0,98",Diabetes
|
336 |
+
335,263,82,92,"2,9",66,female,66,121,"19,5",104,64,31,33,"0,94",No diabetes
|
337 |
+
336,184,76,42,"4,4",66,male,74,185,"23,8",130,75,40,41,"0,98",No diabetes
|
338 |
+
337,281,92,41,"6,9",66,female,62,185,"33,8",158,88,48,44,"1,09",No diabetes
|
339 |
+
338,221,120,83,"2,7",66,female,64,130,"22,3",110,64,31,38,"0,82",No diabetes
|
340 |
+
339,188,174,24,"7,8",66,male,68,210,"31,9",160,78,45,48,"0,94",No diabetes
|
341 |
+
340,246,104,62,4,66,female,66,189,"30,5",200,94,45,46,"0,98",Diabetes
|
342 |
+
341,204,173,37,"5,5",66,male,67,146,"22,9",138,78,36,48,"0,75",Diabetes
|
343 |
+
342,78,93,12,"6,5",67,male,67,119,"18,6",110,50,33,38,"0,87",No diabetes
|
344 |
+
343,206,85,46,"4,5",67,male,67,178,"27,9",119,68,37,41,"0,9",No diabetes
|
345 |
+
344,174,125,44,4,67,male,68,198,"30,1",119,72,36,43,"0,84",No diabetes
|
346 |
+
345,254,121,39,"6,5",67,male,68,167,"25,4",161,118,36,39,"0,92",Diabetes
|
347 |
+
346,198,118,46,"4,3",68,female,63,124,22,130,70,32,38,"0,84",No diabetes
|
348 |
+
347,143,371,46,"3,1",68,male,67,158,"24,7",138,82,37,43,"0,86",No diabetes
|
349 |
+
348,207,77,46,"4,5",68,male,55,130,"30,2",199,115,29,33,"0,88",No diabetes
|
350 |
+
349,236,111,82,"2,9",68,female,61,119,"22,5",142,96,29,37,"0,78",No diabetes
|
351 |
+
350,260,68,60,"4,3",69,female,59,179,"36,1",158,98,45,48,"0,94",No diabetes
|
352 |
+
351,242,74,55,"4,4",70,female,66,200,"32,3",140,65,41,47,"0,87",No diabetes
|
353 |
+
352,186,97,50,"3,7",70,male,67,178,"27,9",148,88,42,41,"1,02",No diabetes
|
354 |
+
353,182,206,43,"4,2",70,male,69,214,"31,6",158,90,45,48,"0,94",Diabetes
|
355 |
+
354,289,111,50,"5,8",70,female,60,220,43,126,80,51,54,"0,94",Diabetes
|
356 |
+
355,231,70,110,"2,1",71,female,63,155,"27,5",150,78,33,41,"0,8",No diabetes
|
357 |
+
356,199,85,59,"3,4",71,male,69,171,"25,2",136,86,38,40,"0,95",No diabetes
|
358 |
+
357,228,115,61,"3,7",71,female,63,244,"43,2",170,92,48,51,"0,94",No diabetes
|
359 |
+
358,213,203,75,"2,8",71,female,63,165,"29,2",150,80,34,42,"0,81",Diabetes
|
360 |
+
359,204,120,44,"4,6",72,male,65,167,"27,8",140,72,45,46,"0,98",No diabetes
|
361 |
+
360,205,83,42,"4,9",72,female,61,180,34,170,90,39,47,"0,83",No diabetes
|
362 |
+
361,213,76,40,"5,3",72,female,59,137,"27,7",130,60,40,40,1,No diabetes
|
363 |
+
362,207,71,41,5,72,male,70,180,"25,8",138,88,39,40,"0,98",Diabetes
|
364 |
+
363,235,106,37,"6,4",73,male,65,183,"30,4",134,78,43,46,"0,93",No diabetes
|
365 |
+
364,237,118,45,"5,3",73,female,64,174,"29,9",162,75,38,44,"0,86",Diabetes
|
366 |
+
365,306,92,56,"5,5",74,male,69,184,"27,2",140,72,39,41,"0,95",No diabetes
|
367 |
+
366,223,88,42,"5,3",74,female,62,165,"30,2",250,100,41,46,"0,89",No diabetes
|
368 |
+
367,296,262,60,"4,9",74,female,63,183,"32,4",159,99,42,48,"0,88",Diabetes
|
369 |
+
368,205,79,32,"6,4",75,male,69,204,"30,1",136,90,44,42,"1,05",No diabetes
|
370 |
+
369,254,342,37,"6,9",75,male,68,210,"31,9",151,87,44,45,"0,98",Diabetes
|
371 |
+
370,159,100,54,"2,9",76,male,66,188,"30,3",116,53,40,41,"0,98",No diabetes
|
372 |
+
371,196,82,58,"3,4",76,male,65,154,"25,6",158,78,37,41,"0,9",No diabetes
|
373 |
+
372,173,131,69,"2,5",76,female,61,102,"19,3",160,60,31,33,"0,94",No diabetes
|
374 |
+
373,219,112,73,3,76,male,64,105,18,125,82,29,33,"0,88",No diabetes
|
375 |
+
374,209,113,65,"3,2",76,female,60,143,"27,9",156,78,35,40,"0,88",Diabetes
|
376 |
+
375,215,80,100,"2,2",78,male,65,109,"18,1",170,88,33,34,"0,97",No diabetes
|
377 |
+
376,210,81,81,"2,6",78,male,66,145,"23,4",110,70,38,39,"0,97",No diabetes
|
378 |
+
377,224,98,44,"5,1",78,female,63,160,"28,3",150,81,36,45,"0,8",No diabetes
|
379 |
+
378,195,171,29,"6,7",78,male,66,172,"27,8",130,82,40,40,1,No diabetes
|
380 |
+
379,235,91,37,"6,4",79,female,65,134,"22,3",142,70,34,38,"0,89",No diabetes
|
381 |
+
380,292,235,55,"5,3",79,male,70,165,"23,7",170,90,39,41,"0,95",Diabetes
|
382 |
+
381,157,92,47,"3,3",80,male,71,212,"29,6",156,88,47,48,"0,98",No diabetes
|
383 |
+
382,252,161,87,"2,9",80,female,62,162,"29,6",160,100,44,41,"1,07",Diabetes
|
384 |
+
383,271,121,40,"6,8",81,female,64,158,"27,1",146,76,36,43,"0,84",No diabetes
|
385 |
+
384,240,88,49,"4,9",82,female,63,170,"30,1",180,86,41,46,"0,89",No diabetes
|
386 |
+
385,255,112,34,"7,5",82,male,66,163,"26,3",179,89,37,43,"0,86",No diabetes
|
387 |
+
386,227,105,44,"5,2",83,female,59,125,"25,2",150,90,35,40,"0,88",No diabetes
|
388 |
+
387,226,279,52,"4,3",84,female,60,192,"37,5",144,88,41,48,"0,85",Diabetes
|
389 |
+
388,301,90,118,"2,6",89,female,61,115,"21,7",218,90,31,41,"0,76",No diabetes
|
390 |
+
389,232,184,114,2,91,female,61,127,24,170,82,35,38,"0,92",Diabetes
|
391 |
+
390,165,94,69,"2,4",92,female,62,217,"39,7",160,82,51,51,1,No diabetes
|
model.ipynb
ADDED
@@ -0,0 +1,1029 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
{
|
2 |
+
"cells": [
|
3 |
+
{
|
4 |
+
"cell_type": "markdown",
|
5 |
+
"source": [
|
6 |
+
"## Importing modules and Loading dataset.\n",
|
7 |
+
"This section contains importing the important python modules. Also, the dataset to be used, in this case the \n",
|
8 |
+
"\n"
|
9 |
+
],
|
10 |
+
"metadata": {
|
11 |
+
"id": "r5yZ0Codo2rF"
|
12 |
+
}
|
13 |
+
},
|
14 |
+
{
|
15 |
+
"cell_type": "code",
|
16 |
+
"execution_count": 1,
|
17 |
+
"metadata": {
|
18 |
+
"collapsed": true,
|
19 |
+
"pycharm": {
|
20 |
+
"name": "#%%\n"
|
21 |
+
},
|
22 |
+
"id": "Xfbj4kG4UwcC"
|
23 |
+
},
|
24 |
+
"outputs": [],
|
25 |
+
"source": [
|
26 |
+
"# importing python module.\n",
|
27 |
+
"import pandas as pd\n",
|
28 |
+
"from lightgbm.sklearn import LGBMClassifier\n",
|
29 |
+
"from sklearn.preprocessing import RobustScaler, OrdinalEncoder\n",
|
30 |
+
"from sklearn.model_selection import train_test_split, StratifiedShuffleSplit\n",
|
31 |
+
"from xgboost.sklearn import XGBClassifier\n",
|
32 |
+
"from sklearn.metrics import f1_score\n",
|
33 |
+
"\n",
|
34 |
+
"import warnings\n",
|
35 |
+
"warnings.filterwarnings(\"ignore\")"
|
36 |
+
]
|
37 |
+
},
|
38 |
+
{
|
39 |
+
"cell_type": "code",
|
40 |
+
"execution_count": 2,
|
41 |
+
"outputs": [
|
42 |
+
{
|
43 |
+
"data": {
|
44 |
+
"text/plain": " patient_number cholesterol glucose hdl_chol chol_hdl_ratio age gender \\\n0 1 193 77 49 3,9 19 female \n1 2 146 79 41 3,6 19 female \n2 3 217 75 54 4 20 female \n3 4 226 97 70 3,2 20 female \n4 5 164 91 67 2,4 20 female \n\n height weight bmi systolic_bp diastolic_bp waist hip \\\n0 61 119 22,5 118 70 32 38 \n1 60 135 26,4 108 58 33 40 \n2 67 187 29,3 110 72 40 45 \n3 64 114 19,6 122 64 31 39 \n4 70 141 20,2 122 86 32 39 \n\n waist_hip_ratio diabetes \n0 0,84 No diabetes \n1 0,83 No diabetes \n2 0,89 No diabetes \n3 0,79 No diabetes \n4 0,82 No diabetes ",
|
45 |
+
"text/html": "<div>\n<style scoped>\n .dataframe tbody tr th:only-of-type {\n vertical-align: middle;\n }\n\n .dataframe tbody tr th {\n vertical-align: top;\n }\n\n .dataframe thead th {\n text-align: right;\n }\n</style>\n<table border=\"1\" class=\"dataframe\">\n <thead>\n <tr style=\"text-align: right;\">\n <th></th>\n <th>patient_number</th>\n <th>cholesterol</th>\n <th>glucose</th>\n <th>hdl_chol</th>\n <th>chol_hdl_ratio</th>\n <th>age</th>\n <th>gender</th>\n <th>height</th>\n <th>weight</th>\n <th>bmi</th>\n <th>systolic_bp</th>\n <th>diastolic_bp</th>\n <th>waist</th>\n <th>hip</th>\n <th>waist_hip_ratio</th>\n <th>diabetes</th>\n </tr>\n </thead>\n <tbody>\n <tr>\n <th>0</th>\n <td>1</td>\n <td>193</td>\n <td>77</td>\n <td>49</td>\n <td>3,9</td>\n <td>19</td>\n <td>female</td>\n <td>61</td>\n <td>119</td>\n <td>22,5</td>\n <td>118</td>\n <td>70</td>\n <td>32</td>\n <td>38</td>\n <td>0,84</td>\n <td>No diabetes</td>\n </tr>\n <tr>\n <th>1</th>\n <td>2</td>\n <td>146</td>\n <td>79</td>\n <td>41</td>\n <td>3,6</td>\n <td>19</td>\n <td>female</td>\n <td>60</td>\n <td>135</td>\n <td>26,4</td>\n <td>108</td>\n <td>58</td>\n <td>33</td>\n <td>40</td>\n <td>0,83</td>\n <td>No diabetes</td>\n </tr>\n <tr>\n <th>2</th>\n <td>3</td>\n <td>217</td>\n <td>75</td>\n <td>54</td>\n <td>4</td>\n <td>20</td>\n <td>female</td>\n <td>67</td>\n <td>187</td>\n <td>29,3</td>\n <td>110</td>\n <td>72</td>\n <td>40</td>\n <td>45</td>\n <td>0,89</td>\n <td>No diabetes</td>\n </tr>\n <tr>\n <th>3</th>\n <td>4</td>\n <td>226</td>\n <td>97</td>\n <td>70</td>\n <td>3,2</td>\n <td>20</td>\n <td>female</td>\n <td>64</td>\n <td>114</td>\n <td>19,6</td>\n <td>122</td>\n <td>64</td>\n <td>31</td>\n <td>39</td>\n <td>0,79</td>\n <td>No diabetes</td>\n </tr>\n <tr>\n <th>4</th>\n <td>5</td>\n <td>164</td>\n <td>91</td>\n <td>67</td>\n <td>2,4</td>\n <td>20</td>\n <td>female</td>\n <td>70</td>\n <td>141</td>\n <td>20,2</td>\n <td>122</td>\n <td>86</td>\n <td>32</td>\n <td>39</td>\n <td>0,82</td>\n <td>No diabetes</td>\n </tr>\n </tbody>\n</table>\n</div>"
|
46 |
+
},
|
47 |
+
"execution_count": 2,
|
48 |
+
"metadata": {},
|
49 |
+
"output_type": "execute_result"
|
50 |
+
}
|
51 |
+
],
|
52 |
+
"source": [
|
53 |
+
"# loading dataset with pandas\n",
|
54 |
+
"dia = pd.read_csv(\"./dataset/diabetes.csv\")\n",
|
55 |
+
"\n",
|
56 |
+
"dia.head()"
|
57 |
+
],
|
58 |
+
"metadata": {
|
59 |
+
"pycharm": {
|
60 |
+
"name": "#%%\n"
|
61 |
+
},
|
62 |
+
"colab": {
|
63 |
+
"base_uri": "https://localhost:8080/",
|
64 |
+
"height": 357
|
65 |
+
},
|
66 |
+
"id": "yx869JONUwcJ",
|
67 |
+
"outputId": "ccdf4d8b-adf1-40b1-fe75-bc8a0dccfa6c"
|
68 |
+
}
|
69 |
+
},
|
70 |
+
{
|
71 |
+
"cell_type": "code",
|
72 |
+
"execution_count": 3,
|
73 |
+
"outputs": [
|
74 |
+
{
|
75 |
+
"name": "stdout",
|
76 |
+
"output_type": "stream",
|
77 |
+
"text": [
|
78 |
+
"<class 'pandas.core.frame.DataFrame'>\n",
|
79 |
+
"RangeIndex: 390 entries, 0 to 389\n",
|
80 |
+
"Data columns (total 16 columns):\n",
|
81 |
+
" # Column Non-Null Count Dtype \n",
|
82 |
+
"--- ------ -------------- ----- \n",
|
83 |
+
" 0 patient_number 390 non-null int64 \n",
|
84 |
+
" 1 cholesterol 390 non-null int64 \n",
|
85 |
+
" 2 glucose 390 non-null int64 \n",
|
86 |
+
" 3 hdl_chol 390 non-null int64 \n",
|
87 |
+
" 4 chol_hdl_ratio 390 non-null object\n",
|
88 |
+
" 5 age 390 non-null int64 \n",
|
89 |
+
" 6 gender 390 non-null object\n",
|
90 |
+
" 7 height 390 non-null int64 \n",
|
91 |
+
" 8 weight 390 non-null int64 \n",
|
92 |
+
" 9 bmi 390 non-null object\n",
|
93 |
+
" 10 systolic_bp 390 non-null int64 \n",
|
94 |
+
" 11 diastolic_bp 390 non-null int64 \n",
|
95 |
+
" 12 waist 390 non-null int64 \n",
|
96 |
+
" 13 hip 390 non-null int64 \n",
|
97 |
+
" 14 waist_hip_ratio 390 non-null object\n",
|
98 |
+
" 15 diabetes 390 non-null object\n",
|
99 |
+
"dtypes: int64(11), object(5)\n",
|
100 |
+
"memory usage: 48.9+ KB\n"
|
101 |
+
]
|
102 |
+
}
|
103 |
+
],
|
104 |
+
"source": [
|
105 |
+
"# wrangling datasets with pandas\n",
|
106 |
+
"dia.info()"
|
107 |
+
],
|
108 |
+
"metadata": {
|
109 |
+
"pycharm": {
|
110 |
+
"name": "#%%\n"
|
111 |
+
},
|
112 |
+
"colab": {
|
113 |
+
"base_uri": "https://localhost:8080/"
|
114 |
+
},
|
115 |
+
"id": "rftvNRifUwcL",
|
116 |
+
"outputId": "d0799384-e171-4bc9-c149-873c22252711"
|
117 |
+
}
|
118 |
+
},
|
119 |
+
{
|
120 |
+
"cell_type": "code",
|
121 |
+
"execution_count": 4,
|
122 |
+
"outputs": [
|
123 |
+
{
|
124 |
+
"data": {
|
125 |
+
"text/plain": " patient_number cholesterol glucose hdl_chol age \\\ncount 390.000000 390.000000 390.000000 390.000000 390.000000 \nmean 195.500000 207.230769 107.338462 50.266667 46.774359 \nstd 112.727548 44.666005 53.798188 17.279069 16.435911 \nmin 1.000000 78.000000 48.000000 12.000000 19.000000 \n25% 98.250000 179.000000 81.000000 38.000000 34.000000 \n50% 195.500000 203.000000 90.000000 46.000000 44.500000 \n75% 292.750000 229.000000 107.750000 59.000000 60.000000 \nmax 390.000000 443.000000 385.000000 120.000000 92.000000 \n\n height weight systolic_bp diastolic_bp waist \\\ncount 390.000000 390.000000 390.000000 390.000000 390.000000 \nmean 65.951282 177.407692 137.133333 83.289744 37.869231 \nstd 3.918867 40.407824 22.859528 13.498192 5.760947 \nmin 52.000000 99.000000 90.000000 48.000000 26.000000 \n25% 63.000000 150.250000 122.000000 75.000000 33.000000 \n50% 66.000000 173.000000 136.000000 82.000000 37.000000 \n75% 69.000000 200.000000 148.000000 90.000000 41.000000 \nmax 76.000000 325.000000 250.000000 124.000000 56.000000 \n\n hip \ncount 390.000000 \nmean 42.992308 \nstd 5.664342 \nmin 30.000000 \n25% 39.000000 \n50% 42.000000 \n75% 46.000000 \nmax 64.000000 ",
|
126 |
+
"text/html": "<div>\n<style scoped>\n .dataframe tbody tr th:only-of-type {\n vertical-align: middle;\n }\n\n .dataframe tbody tr th {\n vertical-align: top;\n }\n\n .dataframe thead th {\n text-align: right;\n }\n</style>\n<table border=\"1\" class=\"dataframe\">\n <thead>\n <tr style=\"text-align: right;\">\n <th></th>\n <th>patient_number</th>\n <th>cholesterol</th>\n <th>glucose</th>\n <th>hdl_chol</th>\n <th>age</th>\n <th>height</th>\n <th>weight</th>\n <th>systolic_bp</th>\n <th>diastolic_bp</th>\n <th>waist</th>\n <th>hip</th>\n </tr>\n </thead>\n <tbody>\n <tr>\n <th>count</th>\n <td>390.000000</td>\n <td>390.000000</td>\n <td>390.000000</td>\n <td>390.000000</td>\n <td>390.000000</td>\n <td>390.000000</td>\n <td>390.000000</td>\n <td>390.000000</td>\n <td>390.000000</td>\n <td>390.000000</td>\n <td>390.000000</td>\n </tr>\n <tr>\n <th>mean</th>\n <td>195.500000</td>\n <td>207.230769</td>\n <td>107.338462</td>\n <td>50.266667</td>\n <td>46.774359</td>\n <td>65.951282</td>\n <td>177.407692</td>\n <td>137.133333</td>\n <td>83.289744</td>\n <td>37.869231</td>\n <td>42.992308</td>\n </tr>\n <tr>\n <th>std</th>\n <td>112.727548</td>\n <td>44.666005</td>\n <td>53.798188</td>\n <td>17.279069</td>\n <td>16.435911</td>\n <td>3.918867</td>\n <td>40.407824</td>\n <td>22.859528</td>\n <td>13.498192</td>\n <td>5.760947</td>\n <td>5.664342</td>\n </tr>\n <tr>\n <th>min</th>\n <td>1.000000</td>\n <td>78.000000</td>\n <td>48.000000</td>\n <td>12.000000</td>\n <td>19.000000</td>\n <td>52.000000</td>\n <td>99.000000</td>\n <td>90.000000</td>\n <td>48.000000</td>\n <td>26.000000</td>\n <td>30.000000</td>\n </tr>\n <tr>\n <th>25%</th>\n <td>98.250000</td>\n <td>179.000000</td>\n <td>81.000000</td>\n <td>38.000000</td>\n <td>34.000000</td>\n <td>63.000000</td>\n <td>150.250000</td>\n <td>122.000000</td>\n <td>75.000000</td>\n <td>33.000000</td>\n <td>39.000000</td>\n </tr>\n <tr>\n <th>50%</th>\n <td>195.500000</td>\n <td>203.000000</td>\n <td>90.000000</td>\n <td>46.000000</td>\n <td>44.500000</td>\n <td>66.000000</td>\n <td>173.000000</td>\n <td>136.000000</td>\n <td>82.000000</td>\n <td>37.000000</td>\n <td>42.000000</td>\n </tr>\n <tr>\n <th>75%</th>\n <td>292.750000</td>\n <td>229.000000</td>\n <td>107.750000</td>\n <td>59.000000</td>\n <td>60.000000</td>\n <td>69.000000</td>\n <td>200.000000</td>\n <td>148.000000</td>\n <td>90.000000</td>\n <td>41.000000</td>\n <td>46.000000</td>\n </tr>\n <tr>\n <th>max</th>\n <td>390.000000</td>\n <td>443.000000</td>\n <td>385.000000</td>\n <td>120.000000</td>\n <td>92.000000</td>\n <td>76.000000</td>\n <td>325.000000</td>\n <td>250.000000</td>\n <td>124.000000</td>\n <td>56.000000</td>\n <td>64.000000</td>\n </tr>\n </tbody>\n</table>\n</div>"
|
127 |
+
},
|
128 |
+
"execution_count": 4,
|
129 |
+
"metadata": {},
|
130 |
+
"output_type": "execute_result"
|
131 |
+
}
|
132 |
+
],
|
133 |
+
"source": [
|
134 |
+
"dia.describe()"
|
135 |
+
],
|
136 |
+
"metadata": {
|
137 |
+
"pycharm": {
|
138 |
+
"name": "#%%\n"
|
139 |
+
},
|
140 |
+
"colab": {
|
141 |
+
"base_uri": "https://localhost:8080/",
|
142 |
+
"height": 364
|
143 |
+
},
|
144 |
+
"id": "mPXcxzuwUwcN",
|
145 |
+
"outputId": "a1107ea3-215d-4400-e6ff-2d8fd7ff8b55"
|
146 |
+
}
|
147 |
+
},
|
148 |
+
{
|
149 |
+
"cell_type": "markdown",
|
150 |
+
"source": [
|
151 |
+
"## Wrangling dataset."
|
152 |
+
],
|
153 |
+
"metadata": {
|
154 |
+
"id": "S2hRdWcPqcrP"
|
155 |
+
}
|
156 |
+
},
|
157 |
+
{
|
158 |
+
"cell_type": "code",
|
159 |
+
"execution_count": 5,
|
160 |
+
"outputs": [],
|
161 |
+
"source": [
|
162 |
+
"dia.chol_hdl_ratio = round(dia.cholesterol / dia.hdl_chol,2)"
|
163 |
+
],
|
164 |
+
"metadata": {
|
165 |
+
"pycharm": {
|
166 |
+
"name": "#%%\n"
|
167 |
+
},
|
168 |
+
"id": "DCl3woxiUwcO"
|
169 |
+
}
|
170 |
+
},
|
171 |
+
{
|
172 |
+
"cell_type": "code",
|
173 |
+
"execution_count": 6,
|
174 |
+
"outputs": [
|
175 |
+
{
|
176 |
+
"data": {
|
177 |
+
"text/plain": " patient_number cholesterol glucose hdl_chol chol_hdl_ratio age \\\n0 1 193 77 49 3.94 19 \n1 2 146 79 41 3.56 19 \n2 3 217 75 54 4.02 20 \n3 4 226 97 70 3.23 20 \n4 5 164 91 67 2.45 20 \n\n gender height weight bmi systolic_bp diastolic_bp waist hip \\\n0 female 61 119 22,5 118 70 32 38 \n1 female 60 135 26,4 108 58 33 40 \n2 female 67 187 29,3 110 72 40 45 \n3 female 64 114 19,6 122 64 31 39 \n4 female 70 141 20,2 122 86 32 39 \n\n waist_hip_ratio diabetes \n0 0,84 No diabetes \n1 0,83 No diabetes \n2 0,89 No diabetes \n3 0,79 No diabetes \n4 0,82 No diabetes ",
|
178 |
+
"text/html": "<div>\n<style scoped>\n .dataframe tbody tr th:only-of-type {\n vertical-align: middle;\n }\n\n .dataframe tbody tr th {\n vertical-align: top;\n }\n\n .dataframe thead th {\n text-align: right;\n }\n</style>\n<table border=\"1\" class=\"dataframe\">\n <thead>\n <tr style=\"text-align: right;\">\n <th></th>\n <th>patient_number</th>\n <th>cholesterol</th>\n <th>glucose</th>\n <th>hdl_chol</th>\n <th>chol_hdl_ratio</th>\n <th>age</th>\n <th>gender</th>\n <th>height</th>\n <th>weight</th>\n <th>bmi</th>\n <th>systolic_bp</th>\n <th>diastolic_bp</th>\n <th>waist</th>\n <th>hip</th>\n <th>waist_hip_ratio</th>\n <th>diabetes</th>\n </tr>\n </thead>\n <tbody>\n <tr>\n <th>0</th>\n <td>1</td>\n <td>193</td>\n <td>77</td>\n <td>49</td>\n <td>3.94</td>\n <td>19</td>\n <td>female</td>\n <td>61</td>\n <td>119</td>\n <td>22,5</td>\n <td>118</td>\n <td>70</td>\n <td>32</td>\n <td>38</td>\n <td>0,84</td>\n <td>No diabetes</td>\n </tr>\n <tr>\n <th>1</th>\n <td>2</td>\n <td>146</td>\n <td>79</td>\n <td>41</td>\n <td>3.56</td>\n <td>19</td>\n <td>female</td>\n <td>60</td>\n <td>135</td>\n <td>26,4</td>\n <td>108</td>\n <td>58</td>\n <td>33</td>\n <td>40</td>\n <td>0,83</td>\n <td>No diabetes</td>\n </tr>\n <tr>\n <th>2</th>\n <td>3</td>\n <td>217</td>\n <td>75</td>\n <td>54</td>\n <td>4.02</td>\n <td>20</td>\n <td>female</td>\n <td>67</td>\n <td>187</td>\n <td>29,3</td>\n <td>110</td>\n <td>72</td>\n <td>40</td>\n <td>45</td>\n <td>0,89</td>\n <td>No diabetes</td>\n </tr>\n <tr>\n <th>3</th>\n <td>4</td>\n <td>226</td>\n <td>97</td>\n <td>70</td>\n <td>3.23</td>\n <td>20</td>\n <td>female</td>\n <td>64</td>\n <td>114</td>\n <td>19,6</td>\n <td>122</td>\n <td>64</td>\n <td>31</td>\n <td>39</td>\n <td>0,79</td>\n <td>No diabetes</td>\n </tr>\n <tr>\n <th>4</th>\n <td>5</td>\n <td>164</td>\n <td>91</td>\n <td>67</td>\n <td>2.45</td>\n <td>20</td>\n <td>female</td>\n <td>70</td>\n <td>141</td>\n <td>20,2</td>\n <td>122</td>\n <td>86</td>\n <td>32</td>\n <td>39</td>\n <td>0,82</td>\n <td>No diabetes</td>\n </tr>\n </tbody>\n</table>\n</div>"
|
179 |
+
},
|
180 |
+
"execution_count": 6,
|
181 |
+
"metadata": {},
|
182 |
+
"output_type": "execute_result"
|
183 |
+
}
|
184 |
+
],
|
185 |
+
"source": [
|
186 |
+
"dia.head()"
|
187 |
+
],
|
188 |
+
"metadata": {
|
189 |
+
"pycharm": {
|
190 |
+
"name": "#%%\n"
|
191 |
+
},
|
192 |
+
"colab": {
|
193 |
+
"base_uri": "https://localhost:8080/",
|
194 |
+
"height": 357
|
195 |
+
},
|
196 |
+
"id": "QNlQedszUwcP",
|
197 |
+
"outputId": "63231eb5-798a-4c07-8aae-851004ab3787"
|
198 |
+
}
|
199 |
+
},
|
200 |
+
{
|
201 |
+
"cell_type": "markdown",
|
202 |
+
"source": [],
|
203 |
+
"metadata": {
|
204 |
+
"collapsed": false,
|
205 |
+
"id": "SFHdSj2YUwcQ"
|
206 |
+
}
|
207 |
+
},
|
208 |
+
{
|
209 |
+
"cell_type": "code",
|
210 |
+
"execution_count": 7,
|
211 |
+
"outputs": [],
|
212 |
+
"source": [
|
213 |
+
"dia.waist_hip_ratio= round(dia.waist/dia.hip,2)"
|
214 |
+
],
|
215 |
+
"metadata": {
|
216 |
+
"pycharm": {
|
217 |
+
"name": "#%%\n"
|
218 |
+
},
|
219 |
+
"id": "ovJyqVa2UwcX"
|
220 |
+
}
|
221 |
+
},
|
222 |
+
{
|
223 |
+
"cell_type": "code",
|
224 |
+
"execution_count": 8,
|
225 |
+
"outputs": [
|
226 |
+
{
|
227 |
+
"data": {
|
228 |
+
"text/plain": " patient_number cholesterol glucose hdl_chol chol_hdl_ratio age \\\n0 1 193 77 49 3.94 19 \n1 2 146 79 41 3.56 19 \n2 3 217 75 54 4.02 20 \n3 4 226 97 70 3.23 20 \n4 5 164 91 67 2.45 20 \n\n gender height weight bmi systolic_bp diastolic_bp waist hip \\\n0 female 61 119 22,5 118 70 32 38 \n1 female 60 135 26,4 108 58 33 40 \n2 female 67 187 29,3 110 72 40 45 \n3 female 64 114 19,6 122 64 31 39 \n4 female 70 141 20,2 122 86 32 39 \n\n waist_hip_ratio diabetes \n0 0.84 No diabetes \n1 0.82 No diabetes \n2 0.89 No diabetes \n3 0.79 No diabetes \n4 0.82 No diabetes ",
|
229 |
+
"text/html": "<div>\n<style scoped>\n .dataframe tbody tr th:only-of-type {\n vertical-align: middle;\n }\n\n .dataframe tbody tr th {\n vertical-align: top;\n }\n\n .dataframe thead th {\n text-align: right;\n }\n</style>\n<table border=\"1\" class=\"dataframe\">\n <thead>\n <tr style=\"text-align: right;\">\n <th></th>\n <th>patient_number</th>\n <th>cholesterol</th>\n <th>glucose</th>\n <th>hdl_chol</th>\n <th>chol_hdl_ratio</th>\n <th>age</th>\n <th>gender</th>\n <th>height</th>\n <th>weight</th>\n <th>bmi</th>\n <th>systolic_bp</th>\n <th>diastolic_bp</th>\n <th>waist</th>\n <th>hip</th>\n <th>waist_hip_ratio</th>\n <th>diabetes</th>\n </tr>\n </thead>\n <tbody>\n <tr>\n <th>0</th>\n <td>1</td>\n <td>193</td>\n <td>77</td>\n <td>49</td>\n <td>3.94</td>\n <td>19</td>\n <td>female</td>\n <td>61</td>\n <td>119</td>\n <td>22,5</td>\n <td>118</td>\n <td>70</td>\n <td>32</td>\n <td>38</td>\n <td>0.84</td>\n <td>No diabetes</td>\n </tr>\n <tr>\n <th>1</th>\n <td>2</td>\n <td>146</td>\n <td>79</td>\n <td>41</td>\n <td>3.56</td>\n <td>19</td>\n <td>female</td>\n <td>60</td>\n <td>135</td>\n <td>26,4</td>\n <td>108</td>\n <td>58</td>\n <td>33</td>\n <td>40</td>\n <td>0.82</td>\n <td>No diabetes</td>\n </tr>\n <tr>\n <th>2</th>\n <td>3</td>\n <td>217</td>\n <td>75</td>\n <td>54</td>\n <td>4.02</td>\n <td>20</td>\n <td>female</td>\n <td>67</td>\n <td>187</td>\n <td>29,3</td>\n <td>110</td>\n <td>72</td>\n <td>40</td>\n <td>45</td>\n <td>0.89</td>\n <td>No diabetes</td>\n </tr>\n <tr>\n <th>3</th>\n <td>4</td>\n <td>226</td>\n <td>97</td>\n <td>70</td>\n <td>3.23</td>\n <td>20</td>\n <td>female</td>\n <td>64</td>\n <td>114</td>\n <td>19,6</td>\n <td>122</td>\n <td>64</td>\n <td>31</td>\n <td>39</td>\n <td>0.79</td>\n <td>No diabetes</td>\n </tr>\n <tr>\n <th>4</th>\n <td>5</td>\n <td>164</td>\n <td>91</td>\n <td>67</td>\n <td>2.45</td>\n <td>20</td>\n <td>female</td>\n <td>70</td>\n <td>141</td>\n <td>20,2</td>\n <td>122</td>\n <td>86</td>\n <td>32</td>\n <td>39</td>\n <td>0.82</td>\n <td>No diabetes</td>\n </tr>\n </tbody>\n</table>\n</div>"
|
230 |
+
},
|
231 |
+
"execution_count": 8,
|
232 |
+
"metadata": {},
|
233 |
+
"output_type": "execute_result"
|
234 |
+
}
|
235 |
+
],
|
236 |
+
"source": [
|
237 |
+
"dia.head()"
|
238 |
+
],
|
239 |
+
"metadata": {
|
240 |
+
"pycharm": {
|
241 |
+
"name": "#%%\n"
|
242 |
+
},
|
243 |
+
"colab": {
|
244 |
+
"base_uri": "https://localhost:8080/",
|
245 |
+
"height": 357
|
246 |
+
},
|
247 |
+
"id": "PWqYDcnYUwcZ",
|
248 |
+
"outputId": "d0e278d1-d7ed-4503-ee2b-5f94661e56e5"
|
249 |
+
}
|
250 |
+
},
|
251 |
+
{
|
252 |
+
"cell_type": "code",
|
253 |
+
"execution_count": 9,
|
254 |
+
"outputs": [],
|
255 |
+
"source": [
|
256 |
+
"dia.bmi = pd.to_numeric(dia.bmi.str.replace(\",\",\".\"))"
|
257 |
+
],
|
258 |
+
"metadata": {
|
259 |
+
"pycharm": {
|
260 |
+
"name": "#%%\n"
|
261 |
+
},
|
262 |
+
"id": "CXAX15VHUwce"
|
263 |
+
}
|
264 |
+
},
|
265 |
+
{
|
266 |
+
"cell_type": "code",
|
267 |
+
"execution_count": 10,
|
268 |
+
"outputs": [
|
269 |
+
{
|
270 |
+
"data": {
|
271 |
+
"text/plain": " patient_number cholesterol glucose hdl_chol chol_hdl_ratio age \\\n0 1 193 77 49 3.94 19 \n1 2 146 79 41 3.56 19 \n2 3 217 75 54 4.02 20 \n3 4 226 97 70 3.23 20 \n4 5 164 91 67 2.45 20 \n\n gender height weight bmi systolic_bp diastolic_bp waist hip \\\n0 female 61 119 22.5 118 70 32 38 \n1 female 60 135 26.4 108 58 33 40 \n2 female 67 187 29.3 110 72 40 45 \n3 female 64 114 19.6 122 64 31 39 \n4 female 70 141 20.2 122 86 32 39 \n\n waist_hip_ratio diabetes \n0 0.84 No diabetes \n1 0.82 No diabetes \n2 0.89 No diabetes \n3 0.79 No diabetes \n4 0.82 No diabetes ",
|
272 |
+
"text/html": "<div>\n<style scoped>\n .dataframe tbody tr th:only-of-type {\n vertical-align: middle;\n }\n\n .dataframe tbody tr th {\n vertical-align: top;\n }\n\n .dataframe thead th {\n text-align: right;\n }\n</style>\n<table border=\"1\" class=\"dataframe\">\n <thead>\n <tr style=\"text-align: right;\">\n <th></th>\n <th>patient_number</th>\n <th>cholesterol</th>\n <th>glucose</th>\n <th>hdl_chol</th>\n <th>chol_hdl_ratio</th>\n <th>age</th>\n <th>gender</th>\n <th>height</th>\n <th>weight</th>\n <th>bmi</th>\n <th>systolic_bp</th>\n <th>diastolic_bp</th>\n <th>waist</th>\n <th>hip</th>\n <th>waist_hip_ratio</th>\n <th>diabetes</th>\n </tr>\n </thead>\n <tbody>\n <tr>\n <th>0</th>\n <td>1</td>\n <td>193</td>\n <td>77</td>\n <td>49</td>\n <td>3.94</td>\n <td>19</td>\n <td>female</td>\n <td>61</td>\n <td>119</td>\n <td>22.5</td>\n <td>118</td>\n <td>70</td>\n <td>32</td>\n <td>38</td>\n <td>0.84</td>\n <td>No diabetes</td>\n </tr>\n <tr>\n <th>1</th>\n <td>2</td>\n <td>146</td>\n <td>79</td>\n <td>41</td>\n <td>3.56</td>\n <td>19</td>\n <td>female</td>\n <td>60</td>\n <td>135</td>\n <td>26.4</td>\n <td>108</td>\n <td>58</td>\n <td>33</td>\n <td>40</td>\n <td>0.82</td>\n <td>No diabetes</td>\n </tr>\n <tr>\n <th>2</th>\n <td>3</td>\n <td>217</td>\n <td>75</td>\n <td>54</td>\n <td>4.02</td>\n <td>20</td>\n <td>female</td>\n <td>67</td>\n <td>187</td>\n <td>29.3</td>\n <td>110</td>\n <td>72</td>\n <td>40</td>\n <td>45</td>\n <td>0.89</td>\n <td>No diabetes</td>\n </tr>\n <tr>\n <th>3</th>\n <td>4</td>\n <td>226</td>\n <td>97</td>\n <td>70</td>\n <td>3.23</td>\n <td>20</td>\n <td>female</td>\n <td>64</td>\n <td>114</td>\n <td>19.6</td>\n <td>122</td>\n <td>64</td>\n <td>31</td>\n <td>39</td>\n <td>0.79</td>\n <td>No diabetes</td>\n </tr>\n <tr>\n <th>4</th>\n <td>5</td>\n <td>164</td>\n <td>91</td>\n <td>67</td>\n <td>2.45</td>\n <td>20</td>\n <td>female</td>\n <td>70</td>\n <td>141</td>\n <td>20.2</td>\n <td>122</td>\n <td>86</td>\n <td>32</td>\n <td>39</td>\n <td>0.82</td>\n <td>No diabetes</td>\n </tr>\n </tbody>\n</table>\n</div>"
|
273 |
+
},
|
274 |
+
"execution_count": 10,
|
275 |
+
"metadata": {},
|
276 |
+
"output_type": "execute_result"
|
277 |
+
}
|
278 |
+
],
|
279 |
+
"source": [
|
280 |
+
"dia.head()"
|
281 |
+
],
|
282 |
+
"metadata": {
|
283 |
+
"pycharm": {
|
284 |
+
"name": "#%%\n"
|
285 |
+
},
|
286 |
+
"colab": {
|
287 |
+
"base_uri": "https://localhost:8080/",
|
288 |
+
"height": 357
|
289 |
+
},
|
290 |
+
"id": "Y9Rg5DkoUwcf",
|
291 |
+
"outputId": "de5133be-4736-4098-c94c-300eaac58f7d"
|
292 |
+
}
|
293 |
+
},
|
294 |
+
{
|
295 |
+
"cell_type": "code",
|
296 |
+
"source": [
|
297 |
+
"dia.weight.describe()"
|
298 |
+
],
|
299 |
+
"metadata": {
|
300 |
+
"colab": {
|
301 |
+
"base_uri": "https://localhost:8080/"
|
302 |
+
},
|
303 |
+
"id": "PglRQVOhWq6F",
|
304 |
+
"outputId": "9e50d452-d5c4-41f0-a632-f148bb85c44f"
|
305 |
+
},
|
306 |
+
"execution_count": 11,
|
307 |
+
"outputs": [
|
308 |
+
{
|
309 |
+
"data": {
|
310 |
+
"text/plain": "count 390.000000\nmean 177.407692\nstd 40.407824\nmin 99.000000\n25% 150.250000\n50% 173.000000\n75% 200.000000\nmax 325.000000\nName: weight, dtype: float64"
|
311 |
+
},
|
312 |
+
"execution_count": 11,
|
313 |
+
"metadata": {},
|
314 |
+
"output_type": "execute_result"
|
315 |
+
}
|
316 |
+
]
|
317 |
+
},
|
318 |
+
{
|
319 |
+
"cell_type": "code",
|
320 |
+
"execution_count": 12,
|
321 |
+
"outputs": [
|
322 |
+
{
|
323 |
+
"data": {
|
324 |
+
"text/plain": "patient_number 0\ncholesterol 0\nglucose 0\nhdl_chol 0\nchol_hdl_ratio 0\nage 0\ngender 0\nheight 0\nweight 0\nbmi 0\nsystolic_bp 0\ndiastolic_bp 0\nwaist 0\nhip 0\nwaist_hip_ratio 0\ndiabetes 0\ndtype: int64"
|
325 |
+
},
|
326 |
+
"execution_count": 12,
|
327 |
+
"metadata": {},
|
328 |
+
"output_type": "execute_result"
|
329 |
+
}
|
330 |
+
],
|
331 |
+
"source": [
|
332 |
+
"dia.isnull().sum()"
|
333 |
+
],
|
334 |
+
"metadata": {
|
335 |
+
"collapsed": false,
|
336 |
+
"pycharm": {
|
337 |
+
"name": "#%%\n"
|
338 |
+
}
|
339 |
+
}
|
340 |
+
},
|
341 |
+
{
|
342 |
+
"cell_type": "code",
|
343 |
+
"source": [
|
344 |
+
"dia.head()"
|
345 |
+
],
|
346 |
+
"metadata": {
|
347 |
+
"colab": {
|
348 |
+
"base_uri": "https://localhost:8080/",
|
349 |
+
"height": 357
|
350 |
+
},
|
351 |
+
"id": "GY9af0LOoUrQ",
|
352 |
+
"outputId": "5cb087ef-8459-40e3-c65d-515007489006"
|
353 |
+
},
|
354 |
+
"execution_count": 13,
|
355 |
+
"outputs": [
|
356 |
+
{
|
357 |
+
"data": {
|
358 |
+
"text/plain": " patient_number cholesterol glucose hdl_chol chol_hdl_ratio age \\\n0 1 193 77 49 3.94 19 \n1 2 146 79 41 3.56 19 \n2 3 217 75 54 4.02 20 \n3 4 226 97 70 3.23 20 \n4 5 164 91 67 2.45 20 \n\n gender height weight bmi systolic_bp diastolic_bp waist hip \\\n0 female 61 119 22.5 118 70 32 38 \n1 female 60 135 26.4 108 58 33 40 \n2 female 67 187 29.3 110 72 40 45 \n3 female 64 114 19.6 122 64 31 39 \n4 female 70 141 20.2 122 86 32 39 \n\n waist_hip_ratio diabetes \n0 0.84 No diabetes \n1 0.82 No diabetes \n2 0.89 No diabetes \n3 0.79 No diabetes \n4 0.82 No diabetes ",
|
359 |
+
"text/html": "<div>\n<style scoped>\n .dataframe tbody tr th:only-of-type {\n vertical-align: middle;\n }\n\n .dataframe tbody tr th {\n vertical-align: top;\n }\n\n .dataframe thead th {\n text-align: right;\n }\n</style>\n<table border=\"1\" class=\"dataframe\">\n <thead>\n <tr style=\"text-align: right;\">\n <th></th>\n <th>patient_number</th>\n <th>cholesterol</th>\n <th>glucose</th>\n <th>hdl_chol</th>\n <th>chol_hdl_ratio</th>\n <th>age</th>\n <th>gender</th>\n <th>height</th>\n <th>weight</th>\n <th>bmi</th>\n <th>systolic_bp</th>\n <th>diastolic_bp</th>\n <th>waist</th>\n <th>hip</th>\n <th>waist_hip_ratio</th>\n <th>diabetes</th>\n </tr>\n </thead>\n <tbody>\n <tr>\n <th>0</th>\n <td>1</td>\n <td>193</td>\n <td>77</td>\n <td>49</td>\n <td>3.94</td>\n <td>19</td>\n <td>female</td>\n <td>61</td>\n <td>119</td>\n <td>22.5</td>\n <td>118</td>\n <td>70</td>\n <td>32</td>\n <td>38</td>\n <td>0.84</td>\n <td>No diabetes</td>\n </tr>\n <tr>\n <th>1</th>\n <td>2</td>\n <td>146</td>\n <td>79</td>\n <td>41</td>\n <td>3.56</td>\n <td>19</td>\n <td>female</td>\n <td>60</td>\n <td>135</td>\n <td>26.4</td>\n <td>108</td>\n <td>58</td>\n <td>33</td>\n <td>40</td>\n <td>0.82</td>\n <td>No diabetes</td>\n </tr>\n <tr>\n <th>2</th>\n <td>3</td>\n <td>217</td>\n <td>75</td>\n <td>54</td>\n <td>4.02</td>\n <td>20</td>\n <td>female</td>\n <td>67</td>\n <td>187</td>\n <td>29.3</td>\n <td>110</td>\n <td>72</td>\n <td>40</td>\n <td>45</td>\n <td>0.89</td>\n <td>No diabetes</td>\n </tr>\n <tr>\n <th>3</th>\n <td>4</td>\n <td>226</td>\n <td>97</td>\n <td>70</td>\n <td>3.23</td>\n <td>20</td>\n <td>female</td>\n <td>64</td>\n <td>114</td>\n <td>19.6</td>\n <td>122</td>\n <td>64</td>\n <td>31</td>\n <td>39</td>\n <td>0.79</td>\n <td>No diabetes</td>\n </tr>\n <tr>\n <th>4</th>\n <td>5</td>\n <td>164</td>\n <td>91</td>\n <td>67</td>\n <td>2.45</td>\n <td>20</td>\n <td>female</td>\n <td>70</td>\n <td>141</td>\n <td>20.2</td>\n <td>122</td>\n <td>86</td>\n <td>32</td>\n <td>39</td>\n <td>0.82</td>\n <td>No diabetes</td>\n </tr>\n </tbody>\n</table>\n</div>"
|
360 |
+
},
|
361 |
+
"execution_count": 13,
|
362 |
+
"metadata": {},
|
363 |
+
"output_type": "execute_result"
|
364 |
+
}
|
365 |
+
]
|
366 |
+
},
|
367 |
+
{
|
368 |
+
"cell_type": "code",
|
369 |
+
"source": [
|
370 |
+
"s= (dia.dtypes == \"object\")\n",
|
371 |
+
"obj_col= list(s[s].index)"
|
372 |
+
],
|
373 |
+
"metadata": {
|
374 |
+
"id": "cKtpXdi6pwdJ"
|
375 |
+
},
|
376 |
+
"execution_count": 14,
|
377 |
+
"outputs": []
|
378 |
+
},
|
379 |
+
{
|
380 |
+
"cell_type": "code",
|
381 |
+
"source": [
|
382 |
+
"obj_col"
|
383 |
+
],
|
384 |
+
"metadata": {
|
385 |
+
"colab": {
|
386 |
+
"base_uri": "https://localhost:8080/"
|
387 |
+
},
|
388 |
+
"id": "_jnwJCli1cA6",
|
389 |
+
"outputId": "563aa279-aae2-4d4e-d5c8-f8d668946539"
|
390 |
+
},
|
391 |
+
"execution_count": 15,
|
392 |
+
"outputs": [
|
393 |
+
{
|
394 |
+
"data": {
|
395 |
+
"text/plain": "['gender', 'diabetes']"
|
396 |
+
},
|
397 |
+
"execution_count": 15,
|
398 |
+
"metadata": {},
|
399 |
+
"output_type": "execute_result"
|
400 |
+
}
|
401 |
+
]
|
402 |
+
},
|
403 |
+
{
|
404 |
+
"cell_type": "code",
|
405 |
+
"source": [
|
406 |
+
"orde = OrdinalEncoder()\n",
|
407 |
+
"dia[obj_col] = orde.fit_transform(dia[obj_col])"
|
408 |
+
],
|
409 |
+
"metadata": {
|
410 |
+
"id": "KvSeVC8K2FvU"
|
411 |
+
},
|
412 |
+
"execution_count": 16,
|
413 |
+
"outputs": []
|
414 |
+
},
|
415 |
+
{
|
416 |
+
"cell_type": "code",
|
417 |
+
"source": [
|
418 |
+
"dia.head()"
|
419 |
+
],
|
420 |
+
"metadata": {
|
421 |
+
"colab": {
|
422 |
+
"base_uri": "https://localhost:8080/",
|
423 |
+
"height": 270
|
424 |
+
},
|
425 |
+
"id": "bY5dg9H53eVS",
|
426 |
+
"outputId": "27963a81-6560-455f-8d8d-10240bc5dc33"
|
427 |
+
},
|
428 |
+
"execution_count": 17,
|
429 |
+
"outputs": [
|
430 |
+
{
|
431 |
+
"data": {
|
432 |
+
"text/plain": " patient_number cholesterol glucose hdl_chol chol_hdl_ratio age \\\n0 1 193 77 49 3.94 19 \n1 2 146 79 41 3.56 19 \n2 3 217 75 54 4.02 20 \n3 4 226 97 70 3.23 20 \n4 5 164 91 67 2.45 20 \n\n gender height weight bmi systolic_bp diastolic_bp waist hip \\\n0 0.0 61 119 22.5 118 70 32 38 \n1 0.0 60 135 26.4 108 58 33 40 \n2 0.0 67 187 29.3 110 72 40 45 \n3 0.0 64 114 19.6 122 64 31 39 \n4 0.0 70 141 20.2 122 86 32 39 \n\n waist_hip_ratio diabetes \n0 0.84 1.0 \n1 0.82 1.0 \n2 0.89 1.0 \n3 0.79 1.0 \n4 0.82 1.0 ",
|
433 |
+
"text/html": "<div>\n<style scoped>\n .dataframe tbody tr th:only-of-type {\n vertical-align: middle;\n }\n\n .dataframe tbody tr th {\n vertical-align: top;\n }\n\n .dataframe thead th {\n text-align: right;\n }\n</style>\n<table border=\"1\" class=\"dataframe\">\n <thead>\n <tr style=\"text-align: right;\">\n <th></th>\n <th>patient_number</th>\n <th>cholesterol</th>\n <th>glucose</th>\n <th>hdl_chol</th>\n <th>chol_hdl_ratio</th>\n <th>age</th>\n <th>gender</th>\n <th>height</th>\n <th>weight</th>\n <th>bmi</th>\n <th>systolic_bp</th>\n <th>diastolic_bp</th>\n <th>waist</th>\n <th>hip</th>\n <th>waist_hip_ratio</th>\n <th>diabetes</th>\n </tr>\n </thead>\n <tbody>\n <tr>\n <th>0</th>\n <td>1</td>\n <td>193</td>\n <td>77</td>\n <td>49</td>\n <td>3.94</td>\n <td>19</td>\n <td>0.0</td>\n <td>61</td>\n <td>119</td>\n <td>22.5</td>\n <td>118</td>\n <td>70</td>\n <td>32</td>\n <td>38</td>\n <td>0.84</td>\n <td>1.0</td>\n </tr>\n <tr>\n <th>1</th>\n <td>2</td>\n <td>146</td>\n <td>79</td>\n <td>41</td>\n <td>3.56</td>\n <td>19</td>\n <td>0.0</td>\n <td>60</td>\n <td>135</td>\n <td>26.4</td>\n <td>108</td>\n <td>58</td>\n <td>33</td>\n <td>40</td>\n <td>0.82</td>\n <td>1.0</td>\n </tr>\n <tr>\n <th>2</th>\n <td>3</td>\n <td>217</td>\n <td>75</td>\n <td>54</td>\n <td>4.02</td>\n <td>20</td>\n <td>0.0</td>\n <td>67</td>\n <td>187</td>\n <td>29.3</td>\n <td>110</td>\n <td>72</td>\n <td>40</td>\n <td>45</td>\n <td>0.89</td>\n <td>1.0</td>\n </tr>\n <tr>\n <th>3</th>\n <td>4</td>\n <td>226</td>\n <td>97</td>\n <td>70</td>\n <td>3.23</td>\n <td>20</td>\n <td>0.0</td>\n <td>64</td>\n <td>114</td>\n <td>19.6</td>\n <td>122</td>\n <td>64</td>\n <td>31</td>\n <td>39</td>\n <td>0.79</td>\n <td>1.0</td>\n </tr>\n <tr>\n <th>4</th>\n <td>5</td>\n <td>164</td>\n <td>91</td>\n <td>67</td>\n <td>2.45</td>\n <td>20</td>\n <td>0.0</td>\n <td>70</td>\n <td>141</td>\n <td>20.2</td>\n <td>122</td>\n <td>86</td>\n <td>32</td>\n <td>39</td>\n <td>0.82</td>\n <td>1.0</td>\n </tr>\n </tbody>\n</table>\n</div>"
|
434 |
+
},
|
435 |
+
"execution_count": 17,
|
436 |
+
"metadata": {},
|
437 |
+
"output_type": "execute_result"
|
438 |
+
}
|
439 |
+
]
|
440 |
+
},
|
441 |
+
{
|
442 |
+
"cell_type": "code",
|
443 |
+
"source": [
|
444 |
+
"X = dia.drop([\"patient_number\", \"diabetes\"], axis= 1)\n",
|
445 |
+
"y= dia.diabetes"
|
446 |
+
],
|
447 |
+
"metadata": {
|
448 |
+
"id": "ZbHayB553gRB"
|
449 |
+
},
|
450 |
+
"execution_count": 18,
|
451 |
+
"outputs": []
|
452 |
+
},
|
453 |
+
{
|
454 |
+
"cell_type": "code",
|
455 |
+
"source": [
|
456 |
+
"scale = RobustScaler()\n",
|
457 |
+
"scaled_x = scale.fit_transform(X, y=y)"
|
458 |
+
],
|
459 |
+
"metadata": {
|
460 |
+
"id": "GpABdNvA3_8-"
|
461 |
+
},
|
462 |
+
"execution_count": 19,
|
463 |
+
"outputs": []
|
464 |
+
},
|
465 |
+
{
|
466 |
+
"cell_type": "code",
|
467 |
+
"source": [
|
468 |
+
"scaled_x"
|
469 |
+
],
|
470 |
+
"metadata": {
|
471 |
+
"colab": {
|
472 |
+
"base_uri": "https://localhost:8080/"
|
473 |
+
},
|
474 |
+
"id": "qgImunKB4r2i",
|
475 |
+
"outputId": "76b3b342-70c9-4aee-b3b6-1c21b9aac379"
|
476 |
+
},
|
477 |
+
"execution_count": 20,
|
478 |
+
"outputs": [
|
479 |
+
{
|
480 |
+
"data": {
|
481 |
+
"text/plain": "array([[-0.2 , -0.48598131, 0.14285714, ..., -0.625 ,\n -0.57142857, -0.41025641],\n [-1.14 , -0.41121495, -0.23809524, ..., -0.5 ,\n -0.28571429, -0.61538462],\n [ 0.28 , -0.56074766, 0.38095238, ..., 0.375 ,\n 0.42857143, 0.1025641 ],\n ...,\n [ 1.96 , 0. , 3.42857143, ..., -0.75 ,\n -0.14285714, -1.23076923],\n [ 0.58 , 3.51401869, 3.23809524, ..., -0.25 ,\n -0.57142857, 0.41025641],\n [-0.76 , 0.14953271, 1.0952381 , ..., 1.75 ,\n 1.28571429, 1.23076923]])"
|
482 |
+
},
|
483 |
+
"execution_count": 20,
|
484 |
+
"metadata": {},
|
485 |
+
"output_type": "execute_result"
|
486 |
+
}
|
487 |
+
]
|
488 |
+
},
|
489 |
+
{
|
490 |
+
"cell_type": "code",
|
491 |
+
"source": [
|
492 |
+
"X_train, X_test, y_train, y_test = train_test_split(scaled_x, y, test_size= 0.2, random_state=42)"
|
493 |
+
],
|
494 |
+
"metadata": {
|
495 |
+
"id": "ZECN7XuJ4uAR"
|
496 |
+
},
|
497 |
+
"execution_count": 21,
|
498 |
+
"outputs": []
|
499 |
+
},
|
500 |
+
{
|
501 |
+
"cell_type": "code",
|
502 |
+
"execution_count": 22,
|
503 |
+
"outputs": [],
|
504 |
+
"source": [
|
505 |
+
"split = StratifiedShuffleSplit(n_splits=4, random_state=42 )\n",
|
506 |
+
"\n",
|
507 |
+
"for train_index, test_index in split.split(scaled_x, y):\n",
|
508 |
+
" strat_X, strat_test = scaled_x[train_index], scaled_x[test_index]\n",
|
509 |
+
" strat_y, strat_ytest = y[train_index], y[test_index]"
|
510 |
+
],
|
511 |
+
"metadata": {
|
512 |
+
"collapsed": false,
|
513 |
+
"pycharm": {
|
514 |
+
"name": "#%%\n"
|
515 |
+
}
|
516 |
+
}
|
517 |
+
},
|
518 |
+
{
|
519 |
+
"cell_type": "code",
|
520 |
+
"source": [
|
521 |
+
"X_train"
|
522 |
+
],
|
523 |
+
"metadata": {
|
524 |
+
"colab": {
|
525 |
+
"base_uri": "https://localhost:8080/"
|
526 |
+
},
|
527 |
+
"id": "Zo2R6TH55gTW",
|
528 |
+
"outputId": "b12de013-4626-4a0e-aaac-f16281bd50b6"
|
529 |
+
},
|
530 |
+
"execution_count": 23,
|
531 |
+
"outputs": [
|
532 |
+
{
|
533 |
+
"data": {
|
534 |
+
"text/plain": "array([[-0.22 , -1.27102804, -0.19047619, ..., -0.75 ,\n -0.28571429, -1.02564103],\n [-0.44 , 0.41121495, -0.0952381 , ..., 0.125 ,\n 0. , 0.20512821],\n [ 0.18 , -0.41121495, 0.14285714, ..., 0.125 ,\n 0. , 0.20512821],\n ...,\n [-1.48 , 0.74766355, -0.19047619, ..., -0.375 ,\n -0.57142857, 0.1025641 ],\n [ 0.66 , 0.78504673, 1.71428571, ..., -1. ,\n -0.71428571, -1.02564103],\n [ 2.68 , -0.18691589, 0.76190476, ..., -0.125 ,\n 0.28571429, -0.61538462]])"
|
535 |
+
},
|
536 |
+
"execution_count": 23,
|
537 |
+
"metadata": {},
|
538 |
+
"output_type": "execute_result"
|
539 |
+
}
|
540 |
+
]
|
541 |
+
},
|
542 |
+
{
|
543 |
+
"cell_type": "code",
|
544 |
+
"source": [
|
545 |
+
"lgbm_model = LGBMClassifier(n_estimators=200, max_depth=-2, random_state=42)"
|
546 |
+
],
|
547 |
+
"metadata": {
|
548 |
+
"id": "S9cGQrMp5iug"
|
549 |
+
},
|
550 |
+
"execution_count": 24,
|
551 |
+
"outputs": []
|
552 |
+
},
|
553 |
+
{
|
554 |
+
"cell_type": "code",
|
555 |
+
"source": [
|
556 |
+
"lgbm_model.fit(X_train, y_train)"
|
557 |
+
],
|
558 |
+
"metadata": {
|
559 |
+
"colab": {
|
560 |
+
"base_uri": "https://localhost:8080/"
|
561 |
+
},
|
562 |
+
"id": "SWj3o6Cg6nUD",
|
563 |
+
"outputId": "b64a97a4-1f1d-46c3-c11e-e429feedd6db"
|
564 |
+
},
|
565 |
+
"execution_count": 25,
|
566 |
+
"outputs": [
|
567 |
+
{
|
568 |
+
"data": {
|
569 |
+
"text/plain": "LGBMClassifier(max_depth=-2, n_estimators=200, random_state=42)"
|
570 |
+
},
|
571 |
+
"execution_count": 25,
|
572 |
+
"metadata": {},
|
573 |
+
"output_type": "execute_result"
|
574 |
+
}
|
575 |
+
]
|
576 |
+
},
|
577 |
+
{
|
578 |
+
"cell_type": "code",
|
579 |
+
"source": [
|
580 |
+
"y_pred=lgbm_model.predict(X_test)"
|
581 |
+
],
|
582 |
+
"metadata": {
|
583 |
+
"id": "8LFEmpW16yNk"
|
584 |
+
},
|
585 |
+
"execution_count": 26,
|
586 |
+
"outputs": []
|
587 |
+
},
|
588 |
+
{
|
589 |
+
"cell_type": "code",
|
590 |
+
"source": [
|
591 |
+
"f1_score(y_pred, y_test)"
|
592 |
+
],
|
593 |
+
"metadata": {
|
594 |
+
"colab": {
|
595 |
+
"base_uri": "https://localhost:8080/"
|
596 |
+
},
|
597 |
+
"id": "I1pWPR0x6_r9",
|
598 |
+
"outputId": "4ecaee82-9c32-4ca2-f71a-c376ea853419"
|
599 |
+
},
|
600 |
+
"execution_count": 27,
|
601 |
+
"outputs": [
|
602 |
+
{
|
603 |
+
"data": {
|
604 |
+
"text/plain": "0.9354838709677419"
|
605 |
+
},
|
606 |
+
"execution_count": 27,
|
607 |
+
"metadata": {},
|
608 |
+
"output_type": "execute_result"
|
609 |
+
}
|
610 |
+
]
|
611 |
+
},
|
612 |
+
{
|
613 |
+
"cell_type": "code",
|
614 |
+
"source": [
|
615 |
+
"xg_model= XGBClassifier(n_estimators=200, max_depth=4, scale_pos_weight=5.5)"
|
616 |
+
],
|
617 |
+
"metadata": {
|
618 |
+
"id": "e6JqauyE7Luq"
|
619 |
+
},
|
620 |
+
"execution_count": 28,
|
621 |
+
"outputs": []
|
622 |
+
},
|
623 |
+
{
|
624 |
+
"cell_type": "code",
|
625 |
+
"execution_count": 29,
|
626 |
+
"outputs": [
|
627 |
+
{
|
628 |
+
"name": "stdout",
|
629 |
+
"output_type": "stream",
|
630 |
+
"text": [
|
631 |
+
"[23:22:04] WARNING: C:/Users/Administrator/workspace/xgboost-win64_release_1.4.0/src/learner.cc:1095: Starting in XGBoost 1.3.0, the default evaluation metric used with the objective 'binary:logistic' was changed from 'error' to 'logloss'. Explicitly set eval_metric if you'd like to restore the old behavior.\n"
|
632 |
+
]
|
633 |
+
},
|
634 |
+
{
|
635 |
+
"data": {
|
636 |
+
"text/plain": "XGBClassifier(base_score=0.5, booster='gbtree', colsample_bylevel=1,\n colsample_bynode=1, colsample_bytree=1, gamma=0, gpu_id=-1,\n importance_type='gain', interaction_constraints='',\n learning_rate=0.300000012, max_delta_step=0, max_depth=4,\n min_child_weight=1, missing=nan, monotone_constraints='()',\n n_estimators=200, n_jobs=4, num_parallel_tree=1, random_state=0,\n reg_alpha=0, reg_lambda=1, scale_pos_weight=5.5, subsample=1,\n tree_method='exact', validate_parameters=1, verbosity=None)"
|
637 |
+
},
|
638 |
+
"execution_count": 29,
|
639 |
+
"metadata": {},
|
640 |
+
"output_type": "execute_result"
|
641 |
+
}
|
642 |
+
],
|
643 |
+
"source": [
|
644 |
+
"xg_model.fit(X_train, y_train)"
|
645 |
+
],
|
646 |
+
"metadata": {
|
647 |
+
"collapsed": false,
|
648 |
+
"pycharm": {
|
649 |
+
"name": "#%%\n"
|
650 |
+
}
|
651 |
+
}
|
652 |
+
},
|
653 |
+
{
|
654 |
+
"cell_type": "code",
|
655 |
+
"execution_count": 30,
|
656 |
+
"outputs": [],
|
657 |
+
"source": [
|
658 |
+
"xg_pred = xg_model.predict(X_test)"
|
659 |
+
],
|
660 |
+
"metadata": {
|
661 |
+
"collapsed": false,
|
662 |
+
"pycharm": {
|
663 |
+
"name": "#%%\n"
|
664 |
+
}
|
665 |
+
}
|
666 |
+
},
|
667 |
+
{
|
668 |
+
"cell_type": "code",
|
669 |
+
"execution_count": 31,
|
670 |
+
"outputs": [
|
671 |
+
{
|
672 |
+
"data": {
|
673 |
+
"text/plain": "0.943089430894309"
|
674 |
+
},
|
675 |
+
"execution_count": 31,
|
676 |
+
"metadata": {},
|
677 |
+
"output_type": "execute_result"
|
678 |
+
}
|
679 |
+
],
|
680 |
+
"source": [
|
681 |
+
"f1_score(y_test, xg_pred)"
|
682 |
+
],
|
683 |
+
"metadata": {
|
684 |
+
"collapsed": false,
|
685 |
+
"pycharm": {
|
686 |
+
"name": "#%%\n"
|
687 |
+
}
|
688 |
+
}
|
689 |
+
},
|
690 |
+
{
|
691 |
+
"cell_type": "markdown",
|
692 |
+
"source": [
|
693 |
+
"## Stratified Shuffle Test"
|
694 |
+
],
|
695 |
+
"metadata": {
|
696 |
+
"collapsed": false,
|
697 |
+
"pycharm": {
|
698 |
+
"name": "#%% md\n"
|
699 |
+
}
|
700 |
+
}
|
701 |
+
},
|
702 |
+
{
|
703 |
+
"cell_type": "code",
|
704 |
+
"execution_count": 32,
|
705 |
+
"outputs": [],
|
706 |
+
"source": [
|
707 |
+
"lgbm_strat = LGBMClassifier(n_estimators=200, learning_rate=0.0099, max_depth=-2, )"
|
708 |
+
],
|
709 |
+
"metadata": {
|
710 |
+
"collapsed": false,
|
711 |
+
"pycharm": {
|
712 |
+
"name": "#%%\n"
|
713 |
+
}
|
714 |
+
}
|
715 |
+
},
|
716 |
+
{
|
717 |
+
"cell_type": "code",
|
718 |
+
"execution_count": 33,
|
719 |
+
"outputs": [
|
720 |
+
{
|
721 |
+
"data": {
|
722 |
+
"text/plain": "LGBMClassifier(learning_rate=0.0099, max_depth=-2, n_estimators=200)"
|
723 |
+
},
|
724 |
+
"execution_count": 33,
|
725 |
+
"metadata": {},
|
726 |
+
"output_type": "execute_result"
|
727 |
+
}
|
728 |
+
],
|
729 |
+
"source": [
|
730 |
+
"lgbm_strat.fit(strat_X, strat_y)"
|
731 |
+
],
|
732 |
+
"metadata": {
|
733 |
+
"collapsed": false,
|
734 |
+
"pycharm": {
|
735 |
+
"name": "#%%\n"
|
736 |
+
}
|
737 |
+
}
|
738 |
+
},
|
739 |
+
{
|
740 |
+
"cell_type": "code",
|
741 |
+
"execution_count": 34,
|
742 |
+
"outputs": [],
|
743 |
+
"source": [
|
744 |
+
"strat_pred = lgbm_strat.predict(strat_test)"
|
745 |
+
],
|
746 |
+
"metadata": {
|
747 |
+
"collapsed": false,
|
748 |
+
"pycharm": {
|
749 |
+
"name": "#%%\n"
|
750 |
+
}
|
751 |
+
}
|
752 |
+
},
|
753 |
+
{
|
754 |
+
"cell_type": "code",
|
755 |
+
"execution_count": 35,
|
756 |
+
"outputs": [
|
757 |
+
{
|
758 |
+
"data": {
|
759 |
+
"text/plain": "0.955223880597015"
|
760 |
+
},
|
761 |
+
"execution_count": 35,
|
762 |
+
"metadata": {},
|
763 |
+
"output_type": "execute_result"
|
764 |
+
}
|
765 |
+
],
|
766 |
+
"source": [
|
767 |
+
"f1_score(strat_pred, strat_ytest)"
|
768 |
+
],
|
769 |
+
"metadata": {
|
770 |
+
"collapsed": false,
|
771 |
+
"pycharm": {
|
772 |
+
"name": "#%%\n"
|
773 |
+
}
|
774 |
+
}
|
775 |
+
},
|
776 |
+
{
|
777 |
+
"cell_type": "code",
|
778 |
+
"execution_count": 38,
|
779 |
+
"outputs": [],
|
780 |
+
"source": [
|
781 |
+
"import numpy as np\n",
|
782 |
+
"\n",
|
783 |
+
"def predict(var_name):\n",
|
784 |
+
" pred = [var_name]\n",
|
785 |
+
" np_pred = np.array(pred)\n",
|
786 |
+
" score = lgbm_strat.predict(np_pred)\n",
|
787 |
+
" return score"
|
788 |
+
],
|
789 |
+
"metadata": {
|
790 |
+
"collapsed": false,
|
791 |
+
"pycharm": {
|
792 |
+
"name": "#%%\n"
|
793 |
+
}
|
794 |
+
}
|
795 |
+
},
|
796 |
+
{
|
797 |
+
"cell_type": "code",
|
798 |
+
"execution_count": 39,
|
799 |
+
"outputs": [
|
800 |
+
{
|
801 |
+
"data": {
|
802 |
+
"text/plain": "1.0 330\n0.0 60\nName: diabetes, dtype: int64"
|
803 |
+
},
|
804 |
+
"execution_count": 39,
|
805 |
+
"metadata": {},
|
806 |
+
"output_type": "execute_result"
|
807 |
+
}
|
808 |
+
],
|
809 |
+
"source": [
|
810 |
+
"dia.diabetes.value_counts()"
|
811 |
+
],
|
812 |
+
"metadata": {
|
813 |
+
"collapsed": false,
|
814 |
+
"pycharm": {
|
815 |
+
"name": "#%%\n"
|
816 |
+
}
|
817 |
+
}
|
818 |
+
},
|
819 |
+
{
|
820 |
+
"cell_type": "code",
|
821 |
+
"execution_count": 40,
|
822 |
+
"outputs": [
|
823 |
+
{
|
824 |
+
"data": {
|
825 |
+
"text/plain": "5.5"
|
826 |
+
},
|
827 |
+
"execution_count": 40,
|
828 |
+
"metadata": {},
|
829 |
+
"output_type": "execute_result"
|
830 |
+
}
|
831 |
+
],
|
832 |
+
"source": [
|
833 |
+
"330/60"
|
834 |
+
],
|
835 |
+
"metadata": {
|
836 |
+
"collapsed": false,
|
837 |
+
"pycharm": {
|
838 |
+
"name": "#%%\n"
|
839 |
+
}
|
840 |
+
}
|
841 |
+
},
|
842 |
+
{
|
843 |
+
"cell_type": "code",
|
844 |
+
"execution_count": 41,
|
845 |
+
"outputs": [],
|
846 |
+
"source": [
|
847 |
+
"xgb= XGBClassifier(max_depth=7, n_estimators=1000, scale_pos_weight=5.5)"
|
848 |
+
],
|
849 |
+
"metadata": {
|
850 |
+
"collapsed": false,
|
851 |
+
"pycharm": {
|
852 |
+
"name": "#%%\n"
|
853 |
+
}
|
854 |
+
}
|
855 |
+
},
|
856 |
+
{
|
857 |
+
"cell_type": "code",
|
858 |
+
"execution_count": 42,
|
859 |
+
"outputs": [
|
860 |
+
{
|
861 |
+
"name": "stdout",
|
862 |
+
"output_type": "stream",
|
863 |
+
"text": [
|
864 |
+
"[00:36:49] WARNING: C:/Users/Administrator/workspace/xgboost-win64_release_1.4.0/src/learner.cc:1095: Starting in XGBoost 1.3.0, the default evaluation metric used with the objective 'binary:logistic' was changed from 'error' to 'logloss'. Explicitly set eval_metric if you'd like to restore the old behavior.\n"
|
865 |
+
]
|
866 |
+
},
|
867 |
+
{
|
868 |
+
"data": {
|
869 |
+
"text/plain": "XGBClassifier(base_score=0.5, booster='gbtree', colsample_bylevel=1,\n colsample_bynode=1, colsample_bytree=1, gamma=0, gpu_id=-1,\n importance_type='gain', interaction_constraints='',\n learning_rate=0.300000012, max_delta_step=0, max_depth=7,\n min_child_weight=1, missing=nan, monotone_constraints='()',\n n_estimators=1000, n_jobs=4, num_parallel_tree=1, random_state=0,\n reg_alpha=0, reg_lambda=1, scale_pos_weight=5.5, subsample=1,\n tree_method='exact', validate_parameters=1, verbosity=None)"
|
870 |
+
},
|
871 |
+
"execution_count": 42,
|
872 |
+
"metadata": {},
|
873 |
+
"output_type": "execute_result"
|
874 |
+
}
|
875 |
+
],
|
876 |
+
"source": [
|
877 |
+
"xgb.fit(strat_X, strat_y)"
|
878 |
+
],
|
879 |
+
"metadata": {
|
880 |
+
"collapsed": false,
|
881 |
+
"pycharm": {
|
882 |
+
"name": "#%%\n"
|
883 |
+
}
|
884 |
+
}
|
885 |
+
},
|
886 |
+
{
|
887 |
+
"cell_type": "code",
|
888 |
+
"execution_count": 43,
|
889 |
+
"outputs": [],
|
890 |
+
"source": [
|
891 |
+
"y=xgb.predict(strat_test)"
|
892 |
+
],
|
893 |
+
"metadata": {
|
894 |
+
"collapsed": false,
|
895 |
+
"pycharm": {
|
896 |
+
"name": "#%%\n"
|
897 |
+
}
|
898 |
+
}
|
899 |
+
},
|
900 |
+
{
|
901 |
+
"cell_type": "code",
|
902 |
+
"execution_count": 44,
|
903 |
+
"outputs": [
|
904 |
+
{
|
905 |
+
"data": {
|
906 |
+
"text/plain": "0.955223880597015"
|
907 |
+
},
|
908 |
+
"execution_count": 44,
|
909 |
+
"metadata": {},
|
910 |
+
"output_type": "execute_result"
|
911 |
+
}
|
912 |
+
],
|
913 |
+
"source": [
|
914 |
+
"f1_score(y, strat_ytest)"
|
915 |
+
],
|
916 |
+
"metadata": {
|
917 |
+
"collapsed": false,
|
918 |
+
"pycharm": {
|
919 |
+
"name": "#%%\n"
|
920 |
+
}
|
921 |
+
}
|
922 |
+
},
|
923 |
+
{
|
924 |
+
"cell_type": "code",
|
925 |
+
"execution_count": 45,
|
926 |
+
"outputs": [],
|
927 |
+
"source": [
|
928 |
+
"import sqlite3"
|
929 |
+
],
|
930 |
+
"metadata": {
|
931 |
+
"collapsed": false,
|
932 |
+
"pycharm": {
|
933 |
+
"name": "#%%\n"
|
934 |
+
}
|
935 |
+
}
|
936 |
+
},
|
937 |
+
{
|
938 |
+
"cell_type": "code",
|
939 |
+
"execution_count": 46,
|
940 |
+
"outputs": [],
|
941 |
+
"source": [
|
942 |
+
"conn = sqlite3.connect(\"diabetes.db\")\n",
|
943 |
+
"c = conn.cursor()"
|
944 |
+
],
|
945 |
+
"metadata": {
|
946 |
+
"collapsed": false,
|
947 |
+
"pycharm": {
|
948 |
+
"name": "#%%\n"
|
949 |
+
}
|
950 |
+
}
|
951 |
+
},
|
952 |
+
{
|
953 |
+
"cell_type": "code",
|
954 |
+
"execution_count": 47,
|
955 |
+
"outputs": [
|
956 |
+
{
|
957 |
+
"data": {
|
958 |
+
"text/plain": "('patient_number',\n 'cholesterol',\n 'glucose',\n 'hdl_chol',\n 'chol_hdl_ratio',\n 'age',\n 'gender',\n 'height',\n 'weight',\n 'bmi',\n 'systolic_bp',\n 'diastolic_bp',\n 'waist',\n 'hip',\n 'waist_hip_ratio',\n 'diabetes')"
|
959 |
+
},
|
960 |
+
"execution_count": 47,
|
961 |
+
"metadata": {},
|
962 |
+
"output_type": "execute_result"
|
963 |
+
}
|
964 |
+
],
|
965 |
+
"source": [
|
966 |
+
"col= tuple(dia.columns)\n",
|
967 |
+
"col"
|
968 |
+
],
|
969 |
+
"metadata": {
|
970 |
+
"collapsed": false,
|
971 |
+
"pycharm": {
|
972 |
+
"name": "#%%\n"
|
973 |
+
}
|
974 |
+
}
|
975 |
+
},
|
976 |
+
{
|
977 |
+
"cell_type": "code",
|
978 |
+
"execution_count": 48,
|
979 |
+
"outputs": [],
|
980 |
+
"source": [
|
981 |
+
"conn.commit()"
|
982 |
+
],
|
983 |
+
"metadata": {
|
984 |
+
"collapsed": false,
|
985 |
+
"pycharm": {
|
986 |
+
"name": "#%%\n"
|
987 |
+
}
|
988 |
+
}
|
989 |
+
},
|
990 |
+
{
|
991 |
+
"cell_type": "code",
|
992 |
+
"execution_count": 49,
|
993 |
+
"outputs": [],
|
994 |
+
"source": [
|
995 |
+
"dia.to_sql(name=\"diabetes.db\", con=conn, if_exists= \"replace\", index=False)"
|
996 |
+
],
|
997 |
+
"metadata": {
|
998 |
+
"collapsed": false,
|
999 |
+
"pycharm": {
|
1000 |
+
"name": "#%%\n"
|
1001 |
+
}
|
1002 |
+
}
|
1003 |
+
}
|
1004 |
+
],
|
1005 |
+
"metadata": {
|
1006 |
+
"kernelspec": {
|
1007 |
+
"display_name": "Python 3",
|
1008 |
+
"language": "python",
|
1009 |
+
"name": "python3"
|
1010 |
+
},
|
1011 |
+
"language_info": {
|
1012 |
+
"codemirror_mode": {
|
1013 |
+
"name": "ipython",
|
1014 |
+
"version": 2
|
1015 |
+
},
|
1016 |
+
"file_extension": ".py",
|
1017 |
+
"mimetype": "text/x-python",
|
1018 |
+
"name": "python",
|
1019 |
+
"nbconvert_exporter": "python",
|
1020 |
+
"pygments_lexer": "ipython2",
|
1021 |
+
"version": "2.7.6"
|
1022 |
+
},
|
1023 |
+
"colab": {
|
1024 |
+
"provenance": []
|
1025 |
+
}
|
1026 |
+
},
|
1027 |
+
"nbformat": 4,
|
1028 |
+
"nbformat_minor": 0
|
1029 |
+
}
|
model.py
ADDED
@@ -0,0 +1,60 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
# importing python libraries
|
2 |
+
import pandas as pd
|
3 |
+
import pickle as pkl
|
4 |
+
from lightgbm.sklearn import LGBMClassifier
|
5 |
+
from sklearn.model_selection import StratifiedShuffleSplit
|
6 |
+
from sklearn.preprocessing import RobustScaler, OrdinalEncoder
|
7 |
+
from sklearn.metrics import f1_score
|
8 |
+
|
9 |
+
import warnings
|
10 |
+
warnings.filterwarnings("ignore")
|
11 |
+
|
12 |
+
# loading diabetes data into variable data
|
13 |
+
data = pd.read_csv("./dataset/diabetes.csv")
|
14 |
+
|
15 |
+
# wrangling dataset.
|
16 |
+
data.chol_hdl_ratio = round(data.cholesterol / data.hdl_chol, 2)
|
17 |
+
data.waist_hip_ratio = round(data.waist / data.hip, 2)
|
18 |
+
|
19 |
+
# correcting comma separated number to decimal separated number.
|
20 |
+
data.bmi = pd.to_numeric(data.bmi.str.replace(",", "."))
|
21 |
+
|
22 |
+
print(data.head())
|
23 |
+
# encoding columns with object values using Ordinal Encoding
|
24 |
+
s = (data.dtypes == "object")
|
25 |
+
obj_col = s[s].index
|
26 |
+
|
27 |
+
print("Ordinal Encoding")
|
28 |
+
orde = OrdinalEncoder()
|
29 |
+
data[obj_col] = orde.fit_transform(data[obj_col])
|
30 |
+
|
31 |
+
print("Splitting features and target.")
|
32 |
+
# dropping off target and unnecessary columns (diabetes and patient number columns)
|
33 |
+
X = data.drop(["patient_number", "diabetes"], axis=1)
|
34 |
+
y = data.diabetes
|
35 |
+
|
36 |
+
print("Robust Scaling on X, y.")
|
37 |
+
# scaling data using RobustScaler
|
38 |
+
scale = RobustScaler()
|
39 |
+
scaled_X = scale.fit_transform(X, y)
|
40 |
+
|
41 |
+
print("Stratified Split.")
|
42 |
+
# StratifiedShuffleSplit on Data
|
43 |
+
split = StratifiedShuffleSplit(n_splits=4, random_state=42)
|
44 |
+
|
45 |
+
for train_index, test_index in split.split(scaled_X, y):
|
46 |
+
X_train, X_test = scaled_X[train_index], scaled_X[test_index]
|
47 |
+
y_train, y_test = y[train_index], y[test_index]
|
48 |
+
|
49 |
+
# Loading LightGBM classifier to be used for training model
|
50 |
+
lgbm = LGBMClassifier(n_estimators=200, max_depth=-2, random_state=42)
|
51 |
+
lgbm.fit(X_train, y_train)
|
52 |
+
pred = lgbm.predict(X_test)
|
53 |
+
|
54 |
+
f1 = f1_score(pred, y_test)
|
55 |
+
print(f"F1 Score for LightGBM: {f1}.")
|
56 |
+
|
57 |
+
# Using pickle to save model
|
58 |
+
lightgbm = open("../deployment/lightgbm.pickle", "wb")
|
59 |
+
pkl.dump(lgbm, lightgbm)
|
60 |
+
lightgbm.close()
|