Upload student_model1.ipynb

#2
by Azarthehulk - opened
Files changed (1) hide show
  1. student_model1.ipynb +1135 -0
student_model1.ipynb ADDED
@@ -0,0 +1,1135 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "cells": [
3
+ {
4
+ "cell_type": "code",
5
+ "execution_count": 1,
6
+ "id": "94fb3237",
7
+ "metadata": {},
8
+ "outputs": [],
9
+ "source": [
10
+ "import pandas as pd"
11
+ ]
12
+ },
13
+ {
14
+ "cell_type": "code",
15
+ "execution_count": 2,
16
+ "id": "5f713cf6",
17
+ "metadata": {},
18
+ "outputs": [],
19
+ "source": [
20
+ "std_data=pd.read_csv(\"StudentsPerformance.csv\")"
21
+ ]
22
+ },
23
+ {
24
+ "cell_type": "code",
25
+ "execution_count": 3,
26
+ "id": "a76812e9",
27
+ "metadata": {},
28
+ "outputs": [
29
+ {
30
+ "data": {
31
+ "text/html": [
32
+ "<div>\n",
33
+ "<style scoped>\n",
34
+ " .dataframe tbody tr th:only-of-type {\n",
35
+ " vertical-align: middle;\n",
36
+ " }\n",
37
+ "\n",
38
+ " .dataframe tbody tr th {\n",
39
+ " vertical-align: top;\n",
40
+ " }\n",
41
+ "\n",
42
+ " .dataframe thead th {\n",
43
+ " text-align: right;\n",
44
+ " }\n",
45
+ "</style>\n",
46
+ "<table border=\"1\" class=\"dataframe\">\n",
47
+ " <thead>\n",
48
+ " <tr style=\"text-align: right;\">\n",
49
+ " <th></th>\n",
50
+ " <th>gender</th>\n",
51
+ " <th>race/ethnicity</th>\n",
52
+ " <th>parental level of education</th>\n",
53
+ " <th>lunch</th>\n",
54
+ " <th>test preparation course</th>\n",
55
+ " <th>math score</th>\n",
56
+ " <th>reading score</th>\n",
57
+ " <th>writing score</th>\n",
58
+ " </tr>\n",
59
+ " </thead>\n",
60
+ " <tbody>\n",
61
+ " <tr>\n",
62
+ " <th>0</th>\n",
63
+ " <td>female</td>\n",
64
+ " <td>group B</td>\n",
65
+ " <td>bachelor's degree</td>\n",
66
+ " <td>standard</td>\n",
67
+ " <td>none</td>\n",
68
+ " <td>72</td>\n",
69
+ " <td>72</td>\n",
70
+ " <td>74</td>\n",
71
+ " </tr>\n",
72
+ " <tr>\n",
73
+ " <th>1</th>\n",
74
+ " <td>female</td>\n",
75
+ " <td>group C</td>\n",
76
+ " <td>some college</td>\n",
77
+ " <td>standard</td>\n",
78
+ " <td>completed</td>\n",
79
+ " <td>69</td>\n",
80
+ " <td>90</td>\n",
81
+ " <td>88</td>\n",
82
+ " </tr>\n",
83
+ " <tr>\n",
84
+ " <th>2</th>\n",
85
+ " <td>female</td>\n",
86
+ " <td>group B</td>\n",
87
+ " <td>master's degree</td>\n",
88
+ " <td>standard</td>\n",
89
+ " <td>none</td>\n",
90
+ " <td>90</td>\n",
91
+ " <td>95</td>\n",
92
+ " <td>93</td>\n",
93
+ " </tr>\n",
94
+ " <tr>\n",
95
+ " <th>3</th>\n",
96
+ " <td>male</td>\n",
97
+ " <td>group A</td>\n",
98
+ " <td>associate's degree</td>\n",
99
+ " <td>free/reduced</td>\n",
100
+ " <td>none</td>\n",
101
+ " <td>47</td>\n",
102
+ " <td>57</td>\n",
103
+ " <td>44</td>\n",
104
+ " </tr>\n",
105
+ " <tr>\n",
106
+ " <th>4</th>\n",
107
+ " <td>male</td>\n",
108
+ " <td>group C</td>\n",
109
+ " <td>some college</td>\n",
110
+ " <td>standard</td>\n",
111
+ " <td>none</td>\n",
112
+ " <td>76</td>\n",
113
+ " <td>78</td>\n",
114
+ " <td>75</td>\n",
115
+ " </tr>\n",
116
+ " </tbody>\n",
117
+ "</table>\n",
118
+ "</div>"
119
+ ],
120
+ "text/plain": [
121
+ " gender race/ethnicity parental level of education lunch \\\n",
122
+ "0 female group B bachelor's degree standard \n",
123
+ "1 female group C some college standard \n",
124
+ "2 female group B master's degree standard \n",
125
+ "3 male group A associate's degree free/reduced \n",
126
+ "4 male group C some college standard \n",
127
+ "\n",
128
+ " test preparation course math score reading score writing score \n",
129
+ "0 none 72 72 74 \n",
130
+ "1 completed 69 90 88 \n",
131
+ "2 none 90 95 93 \n",
132
+ "3 none 47 57 44 \n",
133
+ "4 none 76 78 75 "
134
+ ]
135
+ },
136
+ "execution_count": 3,
137
+ "metadata": {},
138
+ "output_type": "execute_result"
139
+ }
140
+ ],
141
+ "source": [
142
+ "std_data.head()"
143
+ ]
144
+ },
145
+ {
146
+ "cell_type": "code",
147
+ "execution_count": 22,
148
+ "id": "88fdf384",
149
+ "metadata": {},
150
+ "outputs": [],
151
+ "source": [
152
+ "std_features=['math score','reading_score','writing score']"
153
+ ]
154
+ },
155
+ {
156
+ "cell_type": "code",
157
+ "execution_count": 23,
158
+ "id": "5a98b351",
159
+ "metadata": {},
160
+ "outputs": [],
161
+ "source": [
162
+ "X=std_data[std_features]"
163
+ ]
164
+ },
165
+ {
166
+ "cell_type": "code",
167
+ "execution_count": 24,
168
+ "id": "68cb5c93",
169
+ "metadata": {},
170
+ "outputs": [
171
+ {
172
+ "data": {
173
+ "text/html": [
174
+ "<div>\n",
175
+ "<style scoped>\n",
176
+ " .dataframe tbody tr th:only-of-type {\n",
177
+ " vertical-align: middle;\n",
178
+ " }\n",
179
+ "\n",
180
+ " .dataframe tbody tr th {\n",
181
+ " vertical-align: top;\n",
182
+ " }\n",
183
+ "\n",
184
+ " .dataframe thead th {\n",
185
+ " text-align: right;\n",
186
+ " }\n",
187
+ "</style>\n",
188
+ "<table border=\"1\" class=\"dataframe\">\n",
189
+ " <thead>\n",
190
+ " <tr style=\"text-align: right;\">\n",
191
+ " <th></th>\n",
192
+ " <th>math score</th>\n",
193
+ " <th>reading_score</th>\n",
194
+ " <th>writing score</th>\n",
195
+ " </tr>\n",
196
+ " </thead>\n",
197
+ " <tbody>\n",
198
+ " <tr>\n",
199
+ " <th>0</th>\n",
200
+ " <td>72</td>\n",
201
+ " <td>72</td>\n",
202
+ " <td>74</td>\n",
203
+ " </tr>\n",
204
+ " <tr>\n",
205
+ " <th>1</th>\n",
206
+ " <td>69</td>\n",
207
+ " <td>90</td>\n",
208
+ " <td>88</td>\n",
209
+ " </tr>\n",
210
+ " <tr>\n",
211
+ " <th>2</th>\n",
212
+ " <td>90</td>\n",
213
+ " <td>95</td>\n",
214
+ " <td>93</td>\n",
215
+ " </tr>\n",
216
+ " <tr>\n",
217
+ " <th>3</th>\n",
218
+ " <td>47</td>\n",
219
+ " <td>57</td>\n",
220
+ " <td>44</td>\n",
221
+ " </tr>\n",
222
+ " <tr>\n",
223
+ " <th>4</th>\n",
224
+ " <td>76</td>\n",
225
+ " <td>78</td>\n",
226
+ " <td>75</td>\n",
227
+ " </tr>\n",
228
+ " <tr>\n",
229
+ " <th>...</th>\n",
230
+ " <td>...</td>\n",
231
+ " <td>...</td>\n",
232
+ " <td>...</td>\n",
233
+ " </tr>\n",
234
+ " <tr>\n",
235
+ " <th>995</th>\n",
236
+ " <td>88</td>\n",
237
+ " <td>99</td>\n",
238
+ " <td>95</td>\n",
239
+ " </tr>\n",
240
+ " <tr>\n",
241
+ " <th>996</th>\n",
242
+ " <td>62</td>\n",
243
+ " <td>55</td>\n",
244
+ " <td>55</td>\n",
245
+ " </tr>\n",
246
+ " <tr>\n",
247
+ " <th>997</th>\n",
248
+ " <td>59</td>\n",
249
+ " <td>71</td>\n",
250
+ " <td>65</td>\n",
251
+ " </tr>\n",
252
+ " <tr>\n",
253
+ " <th>998</th>\n",
254
+ " <td>68</td>\n",
255
+ " <td>78</td>\n",
256
+ " <td>77</td>\n",
257
+ " </tr>\n",
258
+ " <tr>\n",
259
+ " <th>999</th>\n",
260
+ " <td>77</td>\n",
261
+ " <td>86</td>\n",
262
+ " <td>86</td>\n",
263
+ " </tr>\n",
264
+ " </tbody>\n",
265
+ "</table>\n",
266
+ "<p>1000 rows × 3 columns</p>\n",
267
+ "</div>"
268
+ ],
269
+ "text/plain": [
270
+ " math score reading_score writing score\n",
271
+ "0 72 72 74\n",
272
+ "1 69 90 88\n",
273
+ "2 90 95 93\n",
274
+ "3 47 57 44\n",
275
+ "4 76 78 75\n",
276
+ ".. ... ... ...\n",
277
+ "995 88 99 95\n",
278
+ "996 62 55 55\n",
279
+ "997 59 71 65\n",
280
+ "998 68 78 77\n",
281
+ "999 77 86 86\n",
282
+ "\n",
283
+ "[1000 rows x 3 columns]"
284
+ ]
285
+ },
286
+ "execution_count": 24,
287
+ "metadata": {},
288
+ "output_type": "execute_result"
289
+ }
290
+ ],
291
+ "source": [
292
+ "X"
293
+ ]
294
+ },
295
+ {
296
+ "cell_type": "code",
297
+ "execution_count": 19,
298
+ "id": "962e6183",
299
+ "metadata": {},
300
+ "outputs": [],
301
+ "source": [
302
+ "std_data.rename(columns = {'reading score':'reading_score'}, inplace = True)"
303
+ ]
304
+ },
305
+ {
306
+ "cell_type": "code",
307
+ "execution_count": 27,
308
+ "id": "5da7ae54",
309
+ "metadata": {},
310
+ "outputs": [
311
+ {
312
+ "data": {
313
+ "text/html": [
314
+ "<div>\n",
315
+ "<style scoped>\n",
316
+ " .dataframe tbody tr th:only-of-type {\n",
317
+ " vertical-align: middle;\n",
318
+ " }\n",
319
+ "\n",
320
+ " .dataframe tbody tr th {\n",
321
+ " vertical-align: top;\n",
322
+ " }\n",
323
+ "\n",
324
+ " .dataframe thead th {\n",
325
+ " text-align: right;\n",
326
+ " }\n",
327
+ "</style>\n",
328
+ "<table border=\"1\" class=\"dataframe\">\n",
329
+ " <thead>\n",
330
+ " <tr style=\"text-align: right;\">\n",
331
+ " <th></th>\n",
332
+ " <th>math score</th>\n",
333
+ " <th>reading_score</th>\n",
334
+ " <th>writing score</th>\n",
335
+ " </tr>\n",
336
+ " </thead>\n",
337
+ " <tbody>\n",
338
+ " <tr>\n",
339
+ " <th>count</th>\n",
340
+ " <td>1000.00000</td>\n",
341
+ " <td>1000.000000</td>\n",
342
+ " <td>1000.000000</td>\n",
343
+ " </tr>\n",
344
+ " <tr>\n",
345
+ " <th>mean</th>\n",
346
+ " <td>66.08900</td>\n",
347
+ " <td>69.169000</td>\n",
348
+ " <td>68.054000</td>\n",
349
+ " </tr>\n",
350
+ " <tr>\n",
351
+ " <th>std</th>\n",
352
+ " <td>15.16308</td>\n",
353
+ " <td>14.600192</td>\n",
354
+ " <td>15.195657</td>\n",
355
+ " </tr>\n",
356
+ " <tr>\n",
357
+ " <th>min</th>\n",
358
+ " <td>0.00000</td>\n",
359
+ " <td>17.000000</td>\n",
360
+ " <td>10.000000</td>\n",
361
+ " </tr>\n",
362
+ " <tr>\n",
363
+ " <th>25%</th>\n",
364
+ " <td>57.00000</td>\n",
365
+ " <td>59.000000</td>\n",
366
+ " <td>57.750000</td>\n",
367
+ " </tr>\n",
368
+ " <tr>\n",
369
+ " <th>50%</th>\n",
370
+ " <td>66.00000</td>\n",
371
+ " <td>70.000000</td>\n",
372
+ " <td>69.000000</td>\n",
373
+ " </tr>\n",
374
+ " <tr>\n",
375
+ " <th>75%</th>\n",
376
+ " <td>77.00000</td>\n",
377
+ " <td>79.000000</td>\n",
378
+ " <td>79.000000</td>\n",
379
+ " </tr>\n",
380
+ " <tr>\n",
381
+ " <th>max</th>\n",
382
+ " <td>100.00000</td>\n",
383
+ " <td>100.000000</td>\n",
384
+ " <td>100.000000</td>\n",
385
+ " </tr>\n",
386
+ " </tbody>\n",
387
+ "</table>\n",
388
+ "</div>"
389
+ ],
390
+ "text/plain": [
391
+ " math score reading_score writing score\n",
392
+ "count 1000.00000 1000.000000 1000.000000\n",
393
+ "mean 66.08900 69.169000 68.054000\n",
394
+ "std 15.16308 14.600192 15.195657\n",
395
+ "min 0.00000 17.000000 10.000000\n",
396
+ "25% 57.00000 59.000000 57.750000\n",
397
+ "50% 66.00000 70.000000 69.000000\n",
398
+ "75% 77.00000 79.000000 79.000000\n",
399
+ "max 100.00000 100.000000 100.000000"
400
+ ]
401
+ },
402
+ "execution_count": 27,
403
+ "metadata": {},
404
+ "output_type": "execute_result"
405
+ }
406
+ ],
407
+ "source": [
408
+ "X.describe()"
409
+ ]
410
+ },
411
+ {
412
+ "cell_type": "code",
413
+ "execution_count": 25,
414
+ "id": "7f074fee",
415
+ "metadata": {},
416
+ "outputs": [],
417
+ "source": [
418
+ "y=std_data.reading_score"
419
+ ]
420
+ },
421
+ {
422
+ "cell_type": "code",
423
+ "execution_count": 28,
424
+ "id": "cc65b7cb",
425
+ "metadata": {},
426
+ "outputs": [],
427
+ "source": [
428
+ "from sklearn.tree import DecisionTreeRegressor"
429
+ ]
430
+ },
431
+ {
432
+ "cell_type": "code",
433
+ "execution_count": 30,
434
+ "id": "80b78ccb",
435
+ "metadata": {},
436
+ "outputs": [],
437
+ "source": [
438
+ "std_model=DecisionTreeRegressor(random_state=1)"
439
+ ]
440
+ },
441
+ {
442
+ "cell_type": "code",
443
+ "execution_count": 31,
444
+ "id": "b7a5fe43",
445
+ "metadata": {},
446
+ "outputs": [
447
+ {
448
+ "data": {
449
+ "text/plain": [
450
+ "DecisionTreeRegressor(random_state=1)"
451
+ ]
452
+ },
453
+ "execution_count": 31,
454
+ "metadata": {},
455
+ "output_type": "execute_result"
456
+ }
457
+ ],
458
+ "source": [
459
+ "std_model"
460
+ ]
461
+ },
462
+ {
463
+ "cell_type": "code",
464
+ "execution_count": 32,
465
+ "id": "10e72f85",
466
+ "metadata": {},
467
+ "outputs": [
468
+ {
469
+ "data": {
470
+ "text/plain": [
471
+ "DecisionTreeRegressor(random_state=1)"
472
+ ]
473
+ },
474
+ "execution_count": 32,
475
+ "metadata": {},
476
+ "output_type": "execute_result"
477
+ }
478
+ ],
479
+ "source": [
480
+ "std_model.fit(X,y)"
481
+ ]
482
+ },
483
+ {
484
+ "cell_type": "code",
485
+ "execution_count": 35,
486
+ "id": "33d20c6d",
487
+ "metadata": {},
488
+ "outputs": [
489
+ {
490
+ "name": "stdout",
491
+ "output_type": "stream",
492
+ "text": [
493
+ " math score reading_score writing score\n",
494
+ "0 72 72 74\n",
495
+ "1 69 90 88\n",
496
+ "2 90 95 93\n",
497
+ "3 47 57 44\n",
498
+ "4 76 78 75\n",
499
+ ".. ... ... ...\n",
500
+ "995 88 99 95\n",
501
+ "996 62 55 55\n",
502
+ "997 59 71 65\n",
503
+ "998 68 78 77\n",
504
+ "999 77 86 86\n",
505
+ "\n",
506
+ "[1000 rows x 3 columns]\n"
507
+ ]
508
+ }
509
+ ],
510
+ "source": [
511
+ "print(X)"
512
+ ]
513
+ },
514
+ {
515
+ "cell_type": "code",
516
+ "execution_count": 37,
517
+ "id": "e9f47dd4",
518
+ "metadata": {},
519
+ "outputs": [
520
+ {
521
+ "name": "stdout",
522
+ "output_type": "stream",
523
+ "text": [
524
+ "[72. 90. 95. 57. 78.]\n"
525
+ ]
526
+ }
527
+ ],
528
+ "source": [
529
+ "print(std_model.predict(X.head()))"
530
+ ]
531
+ },
532
+ {
533
+ "cell_type": "code",
534
+ "execution_count": 39,
535
+ "id": "297049c0",
536
+ "metadata": {},
537
+ "outputs": [
538
+ {
539
+ "data": {
540
+ "text/plain": [
541
+ "0.0"
542
+ ]
543
+ },
544
+ "execution_count": 39,
545
+ "metadata": {},
546
+ "output_type": "execute_result"
547
+ }
548
+ ],
549
+ "source": [
550
+ "#model validating:\n",
551
+ "from sklearn.metrics import mean_absolute_error\n",
552
+ "\n",
553
+ "reading_score = std_model.predict(X)\n",
554
+ "mean_absolute_error(y,reading_score )"
555
+ ]
556
+ },
557
+ {
558
+ "cell_type": "code",
559
+ "execution_count": 41,
560
+ "id": "ba83aa92",
561
+ "metadata": {
562
+ "scrolled": true
563
+ },
564
+ "outputs": [
565
+ {
566
+ "data": {
567
+ "text/plain": [
568
+ "array([ 72., 90., 95., 57., 78., 83., 95., 43., 64., 60., 54.,\n",
569
+ " 52., 81., 72., 53., 75., 89., 32., 42., 58., 69., 75.,\n",
570
+ " 54., 73., 71., 74., 54., 69., 70., 70., 74., 65., 72.,\n",
571
+ " 42., 87., 81., 81., 64., 90., 56., 61., 73., 58., 65.,\n",
572
+ " 56., 54., 65., 71., 74., 84., 55., 69., 44., 78., 84.,\n",
573
+ " 41., 85., 55., 59., 17., 74., 39., 61., 80., 58., 64.,\n",
574
+ " 37., 72., 58., 64., 63., 55., 51., 57., 49., 41., 26.,\n",
575
+ " 78., 74., 68., 49., 45., 47., 64., 39., 80., 83., 71.,\n",
576
+ " 70., 86., 72., 34., 79., 45., 86., 81., 66., 72., 67.,\n",
577
+ " 67., 67., 74., 91., 44., 86., 67., 100., 63., 76., 64.,\n",
578
+ " 89., 55., 53., 58., 100., 77., 85., 82., 63., 69., 92.,\n",
579
+ " 89., 93., 57., 80., 95., 68., 77., 82., 49., 84., 37.,\n",
580
+ " 74., 81., 79., 55., 54., 55., 66., 61., 72., 62., 55.,\n",
581
+ " 43., 73., 39., 84., 68., 75., 100., 67., 67., 70., 49.,\n",
582
+ " 67., 89., 74., 60., 86., 62., 78., 88., 53., 53., 92.,\n",
583
+ " 100., 51., 76., 83., 75., 73., 88., 86., 67., 51., 91.,\n",
584
+ " 54., 77., 70., 100., 68., 64., 50., 69., 52., 67., 76.,\n",
585
+ " 66., 52., 88., 65., 83., 64., 62., 84., 55., 69., 56.,\n",
586
+ " 53., 79., 84., 81., 77., 69., 41., 71., 62., 80., 81.,\n",
587
+ " 61., 79., 28., 62., 51., 91., 83., 86., 42., 77., 56.,\n",
588
+ " 68., 85., 65., 80., 66., 56., 72., 50., 72., 95., 64.,\n",
589
+ " 43., 86., 87., 82., 75., 66., 60., 52., 80., 68., 83.,\n",
590
+ " 52., 51., 74., 76., 76., 70., 64., 60., 49., 83., 70.,\n",
591
+ " 80., 52., 73., 73., 77., 75., 81., 79., 79., 50., 93.,\n",
592
+ " 73., 42., 75., 72., 92., 76., 63., 49., 53., 70., 85.,\n",
593
+ " 78., 92., 63., 86., 56., 52., 48., 79., 78., 46., 82.,\n",
594
+ " 82., 89., 75., 76., 70., 73., 60., 73., 77., 62., 41.,\n",
595
+ " 74., 46., 87., 78., 54., 84., 76., 75., 67., 87., 52.,\n",
596
+ " 71., 57., 76., 60., 61., 67., 64., 66., 82., 72., 71.,\n",
597
+ " 65., 79., 86., 81., 53., 46., 90., 61., 23., 75., 55.,\n",
598
+ " 60., 37., 56., 78., 93., 68., 70., 51., 38., 55., 61.,\n",
599
+ " 73., 76., 72., 73., 80., 61., 94., 74., 74., 65., 57.,\n",
600
+ " 78., 58., 71., 72., 61., 66., 62., 90., 62., 84., 58.,\n",
601
+ " 34., 60., 58., 58., 66., 64., 84., 77., 73., 74., 97.,\n",
602
+ " 70., 43., 90., 95., 83., 64., 86., 100., 81., 49., 43.,\n",
603
+ " 76., 73., 78., 64., 70., 67., 68., 67., 54., 74., 45.,\n",
604
+ " 67., 89., 63., 59., 54., 43., 65., 99., 59., 73., 65.,\n",
605
+ " 80., 57., 84., 71., 83., 66., 67., 72., 73., 74., 73.,\n",
606
+ " 59., 56., 93., 58., 58., 85., 39., 67., 83., 71., 59.,\n",
607
+ " 63., 66., 72., 56., 59., 66., 48., 68., 66., 56., 88.,\n",
608
+ " 81., 81., 73., 83., 82., 74., 66., 81., 46., 73., 85.,\n",
609
+ " 92., 77., 58., 61., 56., 89., 54., 100., 65., 58., 54.,\n",
610
+ " 70., 90., 58., 87., 31., 67., 88., 74., 85., 69., 86.,\n",
611
+ " 67., 90., 76., 62., 68., 64., 71., 71., 59., 68., 52.,\n",
612
+ " 52., 74., 47., 75., 53., 82., 85., 64., 83., 88., 64.,\n",
613
+ " 64., 48., 78., 69., 71., 79., 87., 61., 89., 59., 82.,\n",
614
+ " 70., 59., 78., 92., 71., 50., 49., 61., 97., 87., 89.,\n",
615
+ " 74., 78., 78., 49., 86., 58., 59., 52., 60., 61., 53.,\n",
616
+ " 41., 74., 67., 54., 61., 88., 69., 83., 60., 66., 66.,\n",
617
+ " 92., 69., 82., 77., 95., 63., 83., 100., 67., 67., 72.,\n",
618
+ " 76., 90., 48., 62., 45., 39., 72., 67., 70., 66., 75.,\n",
619
+ " 74., 90., 80., 51., 43., 100., 71., 48., 68., 75., 96.,\n",
620
+ " 62., 66., 81., 55., 51., 91., 56., 61., 97., 79., 73.,\n",
621
+ " 75., 77., 76., 73., 63., 64., 66., 57., 62., 68., 76.,\n",
622
+ " 100., 79., 24., 54., 77., 82., 60., 29., 78., 57., 89.,\n",
623
+ " 72., 84., 58., 64., 63., 60., 59., 90., 77., 93., 68.,\n",
624
+ " 45., 78., 81., 73., 61., 63., 51., 96., 58., 97., 70.,\n",
625
+ " 48., 57., 51., 64., 60., 74., 88., 84., 74., 80., 92.,\n",
626
+ " 76., 74., 52., 88., 81., 79., 65., 81., 70., 62., 53.,\n",
627
+ " 79., 56., 80., 86., 70., 79., 67., 67., 66., 60., 87.,\n",
628
+ " 77., 66., 71., 69., 63., 60., 73., 85., 74., 72., 76.,\n",
629
+ " 57., 78., 84., 77., 64., 78., 82., 75., 61., 72., 68.,\n",
630
+ " 55., 40., 66., 99., 75., 78., 58., 90., 53., 76., 74.,\n",
631
+ " 77., 63., 89., 82., 72., 78., 66., 81., 67., 84., 64.,\n",
632
+ " 63., 72., 34., 59., 87., 61., 84., 85., 100., 81., 70.,\n",
633
+ " 94., 78., 96., 76., 73., 72., 59., 90., 48., 43., 74.,\n",
634
+ " 75., 51., 92., 39., 77., 46., 89., 47., 58., 57., 79.,\n",
635
+ " 66., 71., 60., 73., 57., 84., 73., 55., 79., 75., 64.,\n",
636
+ " 60., 84., 69., 72., 77., 90., 55., 95., 58., 68., 59.,\n",
637
+ " 77., 72., 58., 81., 62., 63., 72., 75., 62., 71., 60.,\n",
638
+ " 48., 73., 67., 78., 65., 58., 72., 44., 79., 85., 56.,\n",
639
+ " 90., 85., 59., 81., 51., 79., 38., 65., 65., 62., 66.,\n",
640
+ " 74., 84., 52., 68., 70., 84., 60., 55., 73., 80., 94.,\n",
641
+ " 85., 76., 81., 74., 45., 75., 54., 31., 47., 64., 84.,\n",
642
+ " 80., 86., 59., 70., 72., 91., 90., 90., 52., 87., 58.,\n",
643
+ " 67., 68., 69., 86., 54., 60., 86., 60., 82., 50., 64.,\n",
644
+ " 64., 82., 57., 77., 52., 58., 44., 77., 65., 85., 85.,\n",
645
+ " 54., 72., 75., 67., 68., 85., 67., 64., 97., 68., 79.,\n",
646
+ " 49., 73., 62., 86., 42., 71., 93., 82., 53., 42., 74.,\n",
647
+ " 51., 58., 72., 84., 90., 62., 64., 82., 61., 72., 76.,\n",
648
+ " 64., 70., 73., 46., 51., 76., 100., 72., 65., 51., 85.,\n",
649
+ " 92., 67., 74., 62., 34., 29., 78., 54., 78., 84., 78.,\n",
650
+ " 48., 100., 84., 77., 48., 84., 75., 64., 42., 84., 61.,\n",
651
+ " 62., 61., 70., 100., 61., 77., 96., 70., 53., 66., 65.,\n",
652
+ " 70., 64., 56., 61., 43., 56., 74., 57., 71., 75., 87.,\n",
653
+ " 63., 57., 58., 81., 68., 66., 91., 66., 62., 68., 61.,\n",
654
+ " 82., 58., 50., 75., 73., 77., 74., 52., 69., 57., 87.,\n",
655
+ " 100., 63., 81., 58., 54., 100., 76., 57., 70., 68., 63.,\n",
656
+ " 76., 84., 100., 72., 50., 65., 63., 82., 62., 65., 41.,\n",
657
+ " 95., 24., 78., 85., 87., 75., 51., 59., 75., 45., 86.,\n",
658
+ " 81., 82., 76., 72., 63., 99., 55., 71., 78., 86.])"
659
+ ]
660
+ },
661
+ "execution_count": 41,
662
+ "metadata": {},
663
+ "output_type": "execute_result"
664
+ }
665
+ ],
666
+ "source": [
667
+ "reading_score"
668
+ ]
669
+ },
670
+ {
671
+ "cell_type": "code",
672
+ "execution_count": 40,
673
+ "id": "240817a8",
674
+ "metadata": {},
675
+ "outputs": [
676
+ {
677
+ "name": "stdout",
678
+ "output_type": "stream",
679
+ "text": [
680
+ "0.04\n"
681
+ ]
682
+ }
683
+ ],
684
+ "source": [
685
+ "from sklearn.model_selection import train_test_split\n",
686
+ "\n",
687
+ "# split data into training and validation data, for both features and target\n",
688
+ "# The split is based on a random number generator. Supplying a numeric value to\n",
689
+ "# the random_state argument guarantees we get the same split every time we\n",
690
+ "# run this script.\n",
691
+ "train_X, val_X, train_y, val_y = train_test_split(X, y, random_state = 0)\n",
692
+ "# Define model\n",
693
+ "std_model = DecisionTreeRegressor()\n",
694
+ "# Fit model\n",
695
+ "std_model.fit(train_X, train_y)\n",
696
+ "\n",
697
+ "# get predicted prices on validation data\n",
698
+ "read_score_predict = std_model.predict(val_X)\n",
699
+ "print(mean_absolute_error(val_y, read_score_predict))"
700
+ ]
701
+ },
702
+ {
703
+ "cell_type": "code",
704
+ "execution_count": null,
705
+ "id": "3dcd5b64",
706
+ "metadata": {},
707
+ "outputs": [],
708
+ "source": []
709
+ },
710
+ {
711
+ "cell_type": "code",
712
+ "execution_count": null,
713
+ "id": "48b7e2de",
714
+ "metadata": {},
715
+ "outputs": [],
716
+ "source": []
717
+ },
718
+ {
719
+ "cell_type": "code",
720
+ "execution_count": 42,
721
+ "id": "0b193859",
722
+ "metadata": {},
723
+ "outputs": [
724
+ {
725
+ "data": {
726
+ "text/html": [
727
+ "<div>\n",
728
+ "<style scoped>\n",
729
+ " .dataframe tbody tr th:only-of-type {\n",
730
+ " vertical-align: middle;\n",
731
+ " }\n",
732
+ "\n",
733
+ " .dataframe tbody tr th {\n",
734
+ " vertical-align: top;\n",
735
+ " }\n",
736
+ "\n",
737
+ " .dataframe thead th {\n",
738
+ " text-align: right;\n",
739
+ " }\n",
740
+ "</style>\n",
741
+ "<table border=\"1\" class=\"dataframe\">\n",
742
+ " <thead>\n",
743
+ " <tr style=\"text-align: right;\">\n",
744
+ " <th></th>\n",
745
+ " <th>math score</th>\n",
746
+ " <th>reading_score</th>\n",
747
+ " <th>writing score</th>\n",
748
+ " </tr>\n",
749
+ " </thead>\n",
750
+ " <tbody>\n",
751
+ " <tr>\n",
752
+ " <th>253</th>\n",
753
+ " <td>80</td>\n",
754
+ " <td>80</td>\n",
755
+ " <td>72</td>\n",
756
+ " </tr>\n",
757
+ " <tr>\n",
758
+ " <th>667</th>\n",
759
+ " <td>77</td>\n",
760
+ " <td>85</td>\n",
761
+ " <td>87</td>\n",
762
+ " </tr>\n",
763
+ " <tr>\n",
764
+ " <th>85</th>\n",
765
+ " <td>73</td>\n",
766
+ " <td>80</td>\n",
767
+ " <td>82</td>\n",
768
+ " </tr>\n",
769
+ " <tr>\n",
770
+ " <th>969</th>\n",
771
+ " <td>75</td>\n",
772
+ " <td>84</td>\n",
773
+ " <td>80</td>\n",
774
+ " </tr>\n",
775
+ " <tr>\n",
776
+ " <th>75</th>\n",
777
+ " <td>44</td>\n",
778
+ " <td>41</td>\n",
779
+ " <td>38</td>\n",
780
+ " </tr>\n",
781
+ " <tr>\n",
782
+ " <th>...</th>\n",
783
+ " <td>...</td>\n",
784
+ " <td>...</td>\n",
785
+ " <td>...</td>\n",
786
+ " </tr>\n",
787
+ " <tr>\n",
788
+ " <th>835</th>\n",
789
+ " <td>60</td>\n",
790
+ " <td>64</td>\n",
791
+ " <td>74</td>\n",
792
+ " </tr>\n",
793
+ " <tr>\n",
794
+ " <th>192</th>\n",
795
+ " <td>62</td>\n",
796
+ " <td>64</td>\n",
797
+ " <td>66</td>\n",
798
+ " </tr>\n",
799
+ " <tr>\n",
800
+ " <th>629</th>\n",
801
+ " <td>44</td>\n",
802
+ " <td>51</td>\n",
803
+ " <td>55</td>\n",
804
+ " </tr>\n",
805
+ " <tr>\n",
806
+ " <th>559</th>\n",
807
+ " <td>73</td>\n",
808
+ " <td>66</td>\n",
809
+ " <td>62</td>\n",
810
+ " </tr>\n",
811
+ " <tr>\n",
812
+ " <th>684</th>\n",
813
+ " <td>62</td>\n",
814
+ " <td>66</td>\n",
815
+ " <td>68</td>\n",
816
+ " </tr>\n",
817
+ " </tbody>\n",
818
+ "</table>\n",
819
+ "<p>750 rows × 3 columns</p>\n",
820
+ "</div>"
821
+ ],
822
+ "text/plain": [
823
+ " math score reading_score writing score\n",
824
+ "253 80 80 72\n",
825
+ "667 77 85 87\n",
826
+ "85 73 80 82\n",
827
+ "969 75 84 80\n",
828
+ "75 44 41 38\n",
829
+ ".. ... ... ...\n",
830
+ "835 60 64 74\n",
831
+ "192 62 64 66\n",
832
+ "629 44 51 55\n",
833
+ "559 73 66 62\n",
834
+ "684 62 66 68\n",
835
+ "\n",
836
+ "[750 rows x 3 columns]"
837
+ ]
838
+ },
839
+ "execution_count": 42,
840
+ "metadata": {},
841
+ "output_type": "execute_result"
842
+ }
843
+ ],
844
+ "source": [
845
+ "train_X"
846
+ ]
847
+ },
848
+ {
849
+ "cell_type": "code",
850
+ "execution_count": 44,
851
+ "id": "4358a31f",
852
+ "metadata": {},
853
+ "outputs": [
854
+ {
855
+ "data": {
856
+ "text/plain": [
857
+ "(750,)"
858
+ ]
859
+ },
860
+ "execution_count": 44,
861
+ "metadata": {},
862
+ "output_type": "execute_result"
863
+ }
864
+ ],
865
+ "source": [
866
+ "train_y.shape"
867
+ ]
868
+ },
869
+ {
870
+ "cell_type": "code",
871
+ "execution_count": 45,
872
+ "id": "67406471",
873
+ "metadata": {},
874
+ "outputs": [
875
+ {
876
+ "data": {
877
+ "text/plain": [
878
+ "253 80\n",
879
+ "667 85\n",
880
+ "85 80\n",
881
+ "969 84\n",
882
+ "75 41\n",
883
+ " ..\n",
884
+ "835 64\n",
885
+ "192 64\n",
886
+ "629 51\n",
887
+ "559 66\n",
888
+ "684 66\n",
889
+ "Name: reading_score, Length: 750, dtype: int64"
890
+ ]
891
+ },
892
+ "execution_count": 45,
893
+ "metadata": {},
894
+ "output_type": "execute_result"
895
+ }
896
+ ],
897
+ "source": [
898
+ "train_y"
899
+ ]
900
+ },
901
+ {
902
+ "cell_type": "code",
903
+ "execution_count": 47,
904
+ "id": "26bb9459",
905
+ "metadata": {},
906
+ "outputs": [
907
+ {
908
+ "data": {
909
+ "text/html": [
910
+ "<div>\n",
911
+ "<style scoped>\n",
912
+ " .dataframe tbody tr th:only-of-type {\n",
913
+ " vertical-align: middle;\n",
914
+ " }\n",
915
+ "\n",
916
+ " .dataframe tbody tr th {\n",
917
+ " vertical-align: top;\n",
918
+ " }\n",
919
+ "\n",
920
+ " .dataframe thead th {\n",
921
+ " text-align: right;\n",
922
+ " }\n",
923
+ "</style>\n",
924
+ "<table border=\"1\" class=\"dataframe\">\n",
925
+ " <thead>\n",
926
+ " <tr style=\"text-align: right;\">\n",
927
+ " <th></th>\n",
928
+ " <th>math score</th>\n",
929
+ " <th>reading_score</th>\n",
930
+ " <th>writing score</th>\n",
931
+ " </tr>\n",
932
+ " </thead>\n",
933
+ " <tbody>\n",
934
+ " <tr>\n",
935
+ " <th>993</th>\n",
936
+ " <td>62</td>\n",
937
+ " <td>72</td>\n",
938
+ " <td>74</td>\n",
939
+ " </tr>\n",
940
+ " <tr>\n",
941
+ " <th>859</th>\n",
942
+ " <td>87</td>\n",
943
+ " <td>73</td>\n",
944
+ " <td>72</td>\n",
945
+ " </tr>\n",
946
+ " <tr>\n",
947
+ " <th>298</th>\n",
948
+ " <td>40</td>\n",
949
+ " <td>46</td>\n",
950
+ " <td>50</td>\n",
951
+ " </tr>\n",
952
+ " <tr>\n",
953
+ " <th>553</th>\n",
954
+ " <td>77</td>\n",
955
+ " <td>62</td>\n",
956
+ " <td>64</td>\n",
957
+ " </tr>\n",
958
+ " <tr>\n",
959
+ " <th>672</th>\n",
960
+ " <td>69</td>\n",
961
+ " <td>78</td>\n",
962
+ " <td>76</td>\n",
963
+ " </tr>\n",
964
+ " <tr>\n",
965
+ " <th>...</th>\n",
966
+ " <td>...</td>\n",
967
+ " <td>...</td>\n",
968
+ " <td>...</td>\n",
969
+ " </tr>\n",
970
+ " <tr>\n",
971
+ " <th>462</th>\n",
972
+ " <td>71</td>\n",
973
+ " <td>70</td>\n",
974
+ " <td>76</td>\n",
975
+ " </tr>\n",
976
+ " <tr>\n",
977
+ " <th>356</th>\n",
978
+ " <td>63</td>\n",
979
+ " <td>61</td>\n",
980
+ " <td>61</td>\n",
981
+ " </tr>\n",
982
+ " <tr>\n",
983
+ " <th>2</th>\n",
984
+ " <td>90</td>\n",
985
+ " <td>95</td>\n",
986
+ " <td>93</td>\n",
987
+ " </tr>\n",
988
+ " <tr>\n",
989
+ " <th>478</th>\n",
990
+ " <td>55</td>\n",
991
+ " <td>64</td>\n",
992
+ " <td>70</td>\n",
993
+ " </tr>\n",
994
+ " <tr>\n",
995
+ " <th>695</th>\n",
996
+ " <td>79</td>\n",
997
+ " <td>89</td>\n",
998
+ " <td>86</td>\n",
999
+ " </tr>\n",
1000
+ " </tbody>\n",
1001
+ "</table>\n",
1002
+ "<p>250 rows × 3 columns</p>\n",
1003
+ "</div>"
1004
+ ],
1005
+ "text/plain": [
1006
+ " math score reading_score writing score\n",
1007
+ "993 62 72 74\n",
1008
+ "859 87 73 72\n",
1009
+ "298 40 46 50\n",
1010
+ "553 77 62 64\n",
1011
+ "672 69 78 76\n",
1012
+ ".. ... ... ...\n",
1013
+ "462 71 70 76\n",
1014
+ "356 63 61 61\n",
1015
+ "2 90 95 93\n",
1016
+ "478 55 64 70\n",
1017
+ "695 79 89 86\n",
1018
+ "\n",
1019
+ "[250 rows x 3 columns]"
1020
+ ]
1021
+ },
1022
+ "execution_count": 47,
1023
+ "metadata": {},
1024
+ "output_type": "execute_result"
1025
+ }
1026
+ ],
1027
+ "source": [
1028
+ "val_X"
1029
+ ]
1030
+ },
1031
+ {
1032
+ "cell_type": "code",
1033
+ "execution_count": 48,
1034
+ "id": "6262a739",
1035
+ "metadata": {},
1036
+ "outputs": [
1037
+ {
1038
+ "data": {
1039
+ "text/plain": [
1040
+ "993 72\n",
1041
+ "859 73\n",
1042
+ "298 46\n",
1043
+ "553 62\n",
1044
+ "672 78\n",
1045
+ " ..\n",
1046
+ "462 70\n",
1047
+ "356 61\n",
1048
+ "2 95\n",
1049
+ "478 64\n",
1050
+ "695 89\n",
1051
+ "Name: reading_score, Length: 250, dtype: int64"
1052
+ ]
1053
+ },
1054
+ "execution_count": 48,
1055
+ "metadata": {},
1056
+ "output_type": "execute_result"
1057
+ }
1058
+ ],
1059
+ "source": [
1060
+ "val_y"
1061
+ ]
1062
+ },
1063
+ {
1064
+ "cell_type": "code",
1065
+ "execution_count": 49,
1066
+ "id": "8328fdc3",
1067
+ "metadata": {},
1068
+ "outputs": [
1069
+ {
1070
+ "data": {
1071
+ "text/plain": [
1072
+ "(250,)"
1073
+ ]
1074
+ },
1075
+ "execution_count": 49,
1076
+ "metadata": {},
1077
+ "output_type": "execute_result"
1078
+ }
1079
+ ],
1080
+ "source": [
1081
+ "val_y.shape"
1082
+ ]
1083
+ },
1084
+ {
1085
+ "cell_type": "code",
1086
+ "execution_count": 50,
1087
+ "id": "c6663f05",
1088
+ "metadata": {},
1089
+ "outputs": [
1090
+ {
1091
+ "data": {
1092
+ "text/plain": [
1093
+ "(250, 3)"
1094
+ ]
1095
+ },
1096
+ "execution_count": 50,
1097
+ "metadata": {},
1098
+ "output_type": "execute_result"
1099
+ }
1100
+ ],
1101
+ "source": [
1102
+ "val_X.shape"
1103
+ ]
1104
+ },
1105
+ {
1106
+ "cell_type": "code",
1107
+ "execution_count": null,
1108
+ "id": "b52c954d",
1109
+ "metadata": {},
1110
+ "outputs": [],
1111
+ "source": []
1112
+ }
1113
+ ],
1114
+ "metadata": {
1115
+ "kernelspec": {
1116
+ "display_name": "Python 3 (ipykernel)",
1117
+ "language": "python",
1118
+ "name": "python3"
1119
+ },
1120
+ "language_info": {
1121
+ "codemirror_mode": {
1122
+ "name": "ipython",
1123
+ "version": 3
1124
+ },
1125
+ "file_extension": ".py",
1126
+ "mimetype": "text/x-python",
1127
+ "name": "python",
1128
+ "nbconvert_exporter": "python",
1129
+ "pygments_lexer": "ipython3",
1130
+ "version": "3.9.12"
1131
+ }
1132
+ },
1133
+ "nbformat": 4,
1134
+ "nbformat_minor": 5
1135
+ }