Jensen-holm commited on
Commit
5ddfe50
1 Parent(s): 64036a9

split up the mens and womens data and beginning building a neural

Browse files
data/AllSuperDetailedGames.csv CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:5c90b4929384af72ef071c3017e5cd3fd50c1c0b94ee91b212b200594f23803c
3
  size 929302721
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:565fdd04d067c0cc0b406ca74ac2fa7a8a6cf422961e14a971949b4fd72f1bc2
3
  size 929302721
data/AllTeamsAgg.csv CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:ee7a2a14cb165da65d33b252a7df2c79e155be061423995ce0421cd34f69d638
3
- size 29693952
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:b458f333d57030cde3f975417c82fd5100acc787754e546102c83c2ada7c4d0e
3
+ size 29572986
src/nn.ipynb CHANGED
@@ -18,408 +18,43 @@
18
  },
19
  {
20
  "cell_type": "code",
21
- "execution_count": 3,
22
  "metadata": {},
23
  "outputs": [
 
 
 
 
 
 
 
 
24
  {
25
  "name": "stdout",
26
  "output_type": "stream",
27
  "text": [
28
  "<class 'pandas.core.frame.DataFrame'>\n",
29
- "RangeIndex: 12857 entries, 0 to 12856\n",
30
- "Columns: 440 entries, Unnamed: 0 to ChalkSeed\n",
31
- "dtypes: float64(347), int64(89), object(4)\n",
32
- "memory usage: 43.2+ MB\n"
33
  ]
34
  }
35
  ],
36
  "source": [
37
- "all_teams_agg_df = pd.read_csv(os.path.join(DATA_DIR, \"AllTeamsAgg.csv\"), low_memory=False)\n",
 
 
38
  "\n",
39
- "all_teams_agg_df.info()"
40
- ]
41
- },
42
- {
43
- "cell_type": "code",
44
- "execution_count": 4,
45
- "metadata": {},
46
- "outputs": [
47
- {
48
- "data": {
49
- "text/html": [
50
- "<div>\n",
51
- "<style scoped>\n",
52
- " .dataframe tbody tr th:only-of-type {\n",
53
- " vertical-align: middle;\n",
54
- " }\n",
55
- "\n",
56
- " .dataframe tbody tr th {\n",
57
- " vertical-align: top;\n",
58
- " }\n",
59
- "\n",
60
- " .dataframe thead th {\n",
61
- " text-align: right;\n",
62
- " }\n",
63
- "</style>\n",
64
- "<table border=\"1\" class=\"dataframe\">\n",
65
- " <thead>\n",
66
- " <tr style=\"text-align: right;\">\n",
67
- " <th></th>\n",
68
- " <th>Unnamed: 0</th>\n",
69
- " <th>TeamID</th>\n",
70
- " <th>Season</th>\n",
71
- " <th>League</th>\n",
72
- " <th>TeamScore min reg</th>\n",
73
- " <th>TeamScore max reg</th>\n",
74
- " <th>TeamScore std reg</th>\n",
75
- " <th>TeamScore median reg</th>\n",
76
- " <th>TeamScore mean reg</th>\n",
77
- " <th>OppScore min reg</th>\n",
78
- " <th>...</th>\n",
79
- " <th>Win max tourney</th>\n",
80
- " <th>Win std tourney</th>\n",
81
- " <th>Win median tourney</th>\n",
82
- " <th>Win mean tourney</th>\n",
83
- " <th>ConfAbbrev</th>\n",
84
- " <th>Seed</th>\n",
85
- " <th>TeamName</th>\n",
86
- " <th>FirstD1Season</th>\n",
87
- " <th>LastD1Season</th>\n",
88
- " <th>ChalkSeed</th>\n",
89
- " </tr>\n",
90
- " </thead>\n",
91
- " <tbody>\n",
92
- " <tr>\n",
93
- " <th>12348</th>\n",
94
- " <td>12348</td>\n",
95
- " <td>3430</td>\n",
96
- " <td>2012</td>\n",
97
- " <td>W</td>\n",
98
- " <td>41</td>\n",
99
- " <td>78</td>\n",
100
- " <td>10.808339</td>\n",
101
- " <td>61.0</td>\n",
102
- " <td>58.965517</td>\n",
103
- " <td>36</td>\n",
104
- " <td>...</td>\n",
105
- " <td>NaN</td>\n",
106
- " <td>NaN</td>\n",
107
- " <td>NaN</td>\n",
108
- " <td>NaN</td>\n",
109
- " <td>NaN</td>\n",
110
- " <td>NaN</td>\n",
111
- " <td>NaN</td>\n",
112
- " <td>NaN</td>\n",
113
- " <td>NaN</td>\n",
114
- " <td>NaN</td>\n",
115
- " </tr>\n",
116
- " <tr>\n",
117
- " <th>6900</th>\n",
118
- " <td>6900</td>\n",
119
- " <td>1431</td>\n",
120
- " <td>2018</td>\n",
121
- " <td>M</td>\n",
122
- " <td>33</td>\n",
123
- " <td>88</td>\n",
124
- " <td>12.283247</td>\n",
125
- " <td>67.0</td>\n",
126
- " <td>66.466667</td>\n",
127
- " <td>44</td>\n",
128
- " <td>...</td>\n",
129
- " <td>NaN</td>\n",
130
- " <td>NaN</td>\n",
131
- " <td>NaN</td>\n",
132
- " <td>NaN</td>\n",
133
- " <td>NaN</td>\n",
134
- " <td>NaN</td>\n",
135
- " <td>NaN</td>\n",
136
- " <td>NaN</td>\n",
137
- " <td>NaN</td>\n",
138
- " <td>NaN</td>\n",
139
- " </tr>\n",
140
- " <tr>\n",
141
- " <th>4406</th>\n",
142
- " <td>4406</td>\n",
143
- " <td>1315</td>\n",
144
- " <td>2014</td>\n",
145
- " <td>M</td>\n",
146
- " <td>43</td>\n",
147
- " <td>95</td>\n",
148
- " <td>10.019980</td>\n",
149
- " <td>72.0</td>\n",
150
- " <td>73.000000</td>\n",
151
- " <td>61</td>\n",
152
- " <td>...</td>\n",
153
- " <td>NaN</td>\n",
154
- " <td>NaN</td>\n",
155
- " <td>NaN</td>\n",
156
- " <td>NaN</td>\n",
157
- " <td>NaN</td>\n",
158
- " <td>NaN</td>\n",
159
- " <td>NaN</td>\n",
160
- " <td>NaN</td>\n",
161
- " <td>NaN</td>\n",
162
- " <td>NaN</td>\n",
163
- " </tr>\n",
164
- " <tr>\n",
165
- " <th>4233</th>\n",
166
- " <td>4233</td>\n",
167
- " <td>1307</td>\n",
168
- " <td>2005</td>\n",
169
- " <td>M</td>\n",
170
- " <td>53</td>\n",
171
- " <td>101</td>\n",
172
- " <td>12.911860</td>\n",
173
- " <td>77.0</td>\n",
174
- " <td>75.870968</td>\n",
175
- " <td>47</td>\n",
176
- " <td>...</td>\n",
177
- " <td>0.0</td>\n",
178
- " <td>NaN</td>\n",
179
- " <td>0.0</td>\n",
180
- " <td>0.0</td>\n",
181
- " <td>mwc</td>\n",
182
- " <td>Z12</td>\n",
183
- " <td>New Mexico</td>\n",
184
- " <td>1985.0</td>\n",
185
- " <td>2024.0</td>\n",
186
- " <td>12.0</td>\n",
187
- " </tr>\n",
188
- " <tr>\n",
189
- " <th>3407</th>\n",
190
- " <td>3407</td>\n",
191
- " <td>1266</td>\n",
192
- " <td>2008</td>\n",
193
- " <td>M</td>\n",
194
- " <td>51</td>\n",
195
- " <td>100</td>\n",
196
- " <td>11.841315</td>\n",
197
- " <td>75.5</td>\n",
198
- " <td>75.906250</td>\n",
199
- " <td>37</td>\n",
200
- " <td>...</td>\n",
201
- " <td>1.0</td>\n",
202
- " <td>0.707107</td>\n",
203
- " <td>0.5</td>\n",
204
- " <td>0.5</td>\n",
205
- " <td>big_east</td>\n",
206
- " <td>Y06</td>\n",
207
- " <td>Marquette</td>\n",
208
- " <td>1985.0</td>\n",
209
- " <td>2024.0</td>\n",
210
- " <td>6.0</td>\n",
211
- " </tr>\n",
212
- " <tr>\n",
213
- " <th>5190</th>\n",
214
- " <td>5190</td>\n",
215
- " <td>1352</td>\n",
216
- " <td>2016</td>\n",
217
- " <td>M</td>\n",
218
- " <td>44</td>\n",
219
- " <td>89</td>\n",
220
- " <td>10.298567</td>\n",
221
- " <td>67.0</td>\n",
222
- " <td>65.062500</td>\n",
223
- " <td>45</td>\n",
224
- " <td>...</td>\n",
225
- " <td>NaN</td>\n",
226
- " <td>NaN</td>\n",
227
- " <td>NaN</td>\n",
228
- " <td>NaN</td>\n",
229
- " <td>NaN</td>\n",
230
- " <td>NaN</td>\n",
231
- " <td>NaN</td>\n",
232
- " <td>NaN</td>\n",
233
- " <td>NaN</td>\n",
234
- " <td>NaN</td>\n",
235
- " </tr>\n",
236
- " <tr>\n",
237
- " <th>1892</th>\n",
238
- " <td>1892</td>\n",
239
- " <td>1194</td>\n",
240
- " <td>2005</td>\n",
241
- " <td>M</td>\n",
242
- " <td>45</td>\n",
243
- " <td>104</td>\n",
244
- " <td>14.194618</td>\n",
245
- " <td>76.0</td>\n",
246
- " <td>76.777778</td>\n",
247
- " <td>59</td>\n",
248
- " <td>...</td>\n",
249
- " <td>NaN</td>\n",
250
- " <td>NaN</td>\n",
251
- " <td>NaN</td>\n",
252
- " <td>NaN</td>\n",
253
- " <td>NaN</td>\n",
254
- " <td>NaN</td>\n",
255
- " <td>NaN</td>\n",
256
- " <td>NaN</td>\n",
257
- " <td>NaN</td>\n",
258
- " <td>NaN</td>\n",
259
- " </tr>\n",
260
- " <tr>\n",
261
- " <th>10020</th>\n",
262
- " <td>10020</td>\n",
263
- " <td>3270</td>\n",
264
- " <td>2021</td>\n",
265
- " <td>W</td>\n",
266
- " <td>24</td>\n",
267
- " <td>80</td>\n",
268
- " <td>13.385137</td>\n",
269
- " <td>53.0</td>\n",
270
- " <td>55.476190</td>\n",
271
- " <td>41</td>\n",
272
- " <td>...</td>\n",
273
- " <td>NaN</td>\n",
274
- " <td>NaN</td>\n",
275
- " <td>NaN</td>\n",
276
- " <td>NaN</td>\n",
277
- " <td>NaN</td>\n",
278
- " <td>NaN</td>\n",
279
- " <td>NaN</td>\n",
280
- " <td>NaN</td>\n",
281
- " <td>NaN</td>\n",
282
- " <td>NaN</td>\n",
283
- " </tr>\n",
284
- " <tr>\n",
285
- " <th>9567</th>\n",
286
- " <td>9567</td>\n",
287
- " <td>3240</td>\n",
288
- " <td>2014</td>\n",
289
- " <td>W</td>\n",
290
- " <td>43</td>\n",
291
- " <td>84</td>\n",
292
- " <td>11.319009</td>\n",
293
- " <td>62.5</td>\n",
294
- " <td>63.593750</td>\n",
295
- " <td>45</td>\n",
296
- " <td>...</td>\n",
297
- " <td>NaN</td>\n",
298
- " <td>NaN</td>\n",
299
- " <td>NaN</td>\n",
300
- " <td>NaN</td>\n",
301
- " <td>NaN</td>\n",
302
- " <td>NaN</td>\n",
303
- " <td>NaN</td>\n",
304
- " <td>NaN</td>\n",
305
- " <td>NaN</td>\n",
306
- " <td>NaN</td>\n",
307
- " </tr>\n",
308
- " <tr>\n",
309
- " <th>12617</th>\n",
310
- " <td>12617</td>\n",
311
- " <td>3452</td>\n",
312
- " <td>2011</td>\n",
313
- " <td>W</td>\n",
314
- " <td>39</td>\n",
315
- " <td>90</td>\n",
316
- " <td>12.518374</td>\n",
317
- " <td>65.0</td>\n",
318
- " <td>65.750000</td>\n",
319
- " <td>21</td>\n",
320
- " <td>...</td>\n",
321
- " <td>1.0</td>\n",
322
- " <td>0.707107</td>\n",
323
- " <td>0.5</td>\n",
324
- " <td>0.5</td>\n",
325
- " <td>NaN</td>\n",
326
- " <td>NaN</td>\n",
327
- " <td>NaN</td>\n",
328
- " <td>NaN</td>\n",
329
- " <td>NaN</td>\n",
330
- " <td>NaN</td>\n",
331
- " </tr>\n",
332
- " </tbody>\n",
333
- "</table>\n",
334
- "<p>10 rows × 440 columns</p>\n",
335
- "</div>"
336
- ],
337
- "text/plain": [
338
- " Unnamed: 0 TeamID Season League TeamScore min reg \\\n",
339
- "12348 12348 3430 2012 W 41 \n",
340
- "6900 6900 1431 2018 M 33 \n",
341
- "4406 4406 1315 2014 M 43 \n",
342
- "4233 4233 1307 2005 M 53 \n",
343
- "3407 3407 1266 2008 M 51 \n",
344
- "5190 5190 1352 2016 M 44 \n",
345
- "1892 1892 1194 2005 M 45 \n",
346
- "10020 10020 3270 2021 W 24 \n",
347
- "9567 9567 3240 2014 W 43 \n",
348
- "12617 12617 3452 2011 W 39 \n",
349
- "\n",
350
- " TeamScore max reg TeamScore std reg TeamScore median reg \\\n",
351
- "12348 78 10.808339 61.0 \n",
352
- "6900 88 12.283247 67.0 \n",
353
- "4406 95 10.019980 72.0 \n",
354
- "4233 101 12.911860 77.0 \n",
355
- "3407 100 11.841315 75.5 \n",
356
- "5190 89 10.298567 67.0 \n",
357
- "1892 104 14.194618 76.0 \n",
358
- "10020 80 13.385137 53.0 \n",
359
- "9567 84 11.319009 62.5 \n",
360
- "12617 90 12.518374 65.0 \n",
361
- "\n",
362
- " TeamScore mean reg OppScore min reg ... Win max tourney \\\n",
363
- "12348 58.965517 36 ... NaN \n",
364
- "6900 66.466667 44 ... NaN \n",
365
- "4406 73.000000 61 ... NaN \n",
366
- "4233 75.870968 47 ... 0.0 \n",
367
- "3407 75.906250 37 ... 1.0 \n",
368
- "5190 65.062500 45 ... NaN \n",
369
- "1892 76.777778 59 ... NaN \n",
370
- "10020 55.476190 41 ... NaN \n",
371
- "9567 63.593750 45 ... NaN \n",
372
- "12617 65.750000 21 ... 1.0 \n",
373
- "\n",
374
- " Win std tourney Win median tourney Win mean tourney ConfAbbrev \\\n",
375
- "12348 NaN NaN NaN NaN \n",
376
- "6900 NaN NaN NaN NaN \n",
377
- "4406 NaN NaN NaN NaN \n",
378
- "4233 NaN 0.0 0.0 mwc \n",
379
- "3407 0.707107 0.5 0.5 big_east \n",
380
- "5190 NaN NaN NaN NaN \n",
381
- "1892 NaN NaN NaN NaN \n",
382
- "10020 NaN NaN NaN NaN \n",
383
- "9567 NaN NaN NaN NaN \n",
384
- "12617 0.707107 0.5 0.5 NaN \n",
385
- "\n",
386
- " Seed TeamName FirstD1Season LastD1Season ChalkSeed \n",
387
- "12348 NaN NaN NaN NaN NaN \n",
388
- "6900 NaN NaN NaN NaN NaN \n",
389
- "4406 NaN NaN NaN NaN NaN \n",
390
- "4233 Z12 New Mexico 1985.0 2024.0 12.0 \n",
391
- "3407 Y06 Marquette 1985.0 2024.0 6.0 \n",
392
- "5190 NaN NaN NaN NaN NaN \n",
393
- "1892 NaN NaN NaN NaN NaN \n",
394
- "10020 NaN NaN NaN NaN NaN \n",
395
- "9567 NaN NaN NaN NaN NaN \n",
396
- "12617 NaN NaN NaN NaN NaN \n",
397
- "\n",
398
- "[10 rows x 440 columns]"
399
- ]
400
- },
401
- "execution_count": 4,
402
- "metadata": {},
403
- "output_type": "execute_result"
404
- }
405
- ],
406
- "source": [
407
- "all_teams_agg_df.sample(10, random_state=1)"
408
  ]
409
  },
410
  {
411
  "cell_type": "code",
412
- "execution_count": 5,
413
  "metadata": {},
414
  "outputs": [],
415
- "source": [
416
- "def get_device() -> str:\n",
417
- " if torch.cuda.is_available():\n",
418
- " return \"cuda\"\n",
419
- " return \"cpu\"\n",
420
- "\n",
421
- "DEVICE = get_device() "
422
- ]
423
  },
424
  {
425
  "cell_type": "markdown",
@@ -430,54 +65,126 @@
430
  },
431
  {
432
  "cell_type": "code",
433
- "execution_count": 6,
434
  "metadata": {},
435
  "outputs": [],
436
  "source": [
437
- "mens_team_df = all_teams_agg_df[all_teams_agg_df[\"League\"] == \"M\"]\n",
438
- "womens_team_df = all_teams_agg_df[all_teams_agg_df[\"League\"] == \"W\"]"
439
  ]
440
  },
441
  {
442
  "cell_type": "code",
443
- "execution_count": 8,
444
  "metadata": {},
445
  "outputs": [],
446
  "source": [
447
  "# define the features and target for our models\n",
448
  "feature_cols = [\n",
449
- " \"ScoreDiff reg\",\n",
450
- " \"FGMDiff reg\",\n",
451
- " \"FGM3Diff reg\",\n",
452
- " \"TODiff reg\",\n",
453
  "]\n",
454
  "\n",
455
- "target_cols = [\"\"]"
456
  ]
457
  },
458
  {
459
  "cell_type": "code",
460
- "execution_count": null,
461
  "metadata": {},
462
  "outputs": [],
463
  "source": [
464
  "# split into training and testing datasets\n",
465
  "MX_train, MX_test, My_train, My_test = train_test_split(\n",
466
- " mens_team_df[feature_cols],\n",
467
- " mens_team_df[target_cols],\n",
468
  " test_size=0.2,\n",
469
  " random_state=1,\n",
470
  ")\n",
471
  "\n",
472
  "# same for womens\n",
473
  "WX_train, WX_test, Wy_train, Wy_test = train_test_split(\n",
474
- " womens_team_df[feature_cols],\n",
475
- " womens_team_df[target_cols],\n",
476
  " test_size=0.2,\n",
477
  " random_state=1,\n",
478
  ")"
479
  ]
480
  },
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
481
  {
482
  "cell_type": "code",
483
  "execution_count": null,
 
18
  },
19
  {
20
  "cell_type": "code",
21
+ "execution_count": 2,
22
  "metadata": {},
23
  "outputs": [
24
+ {
25
+ "name": "stderr",
26
+ "output_type": "stream",
27
+ "text": [
28
+ "/var/folders/v8/0hd98b512cn3ms2rz146k7jw0000gn/T/ipykernel_17426/685274063.py:1: DtypeWarning: Columns (481,482,483) have mixed types. Specify dtype option on import or set low_memory=False.\n",
29
+ " detailed_games_df = pd.read_csv(\n"
30
+ ]
31
+ },
32
  {
33
  "name": "stdout",
34
  "output_type": "stream",
35
  "text": [
36
  "<class 'pandas.core.frame.DataFrame'>\n",
37
+ "RangeIndex: 377608 entries, 0 to 377607\n",
38
+ "Columns: 487 entries, Unnamed: 0 to ChalkSeed\n",
39
+ "dtypes: float64(347), int64(133), object(7)\n",
40
+ "memory usage: 1.4+ GB\n"
41
  ]
42
  }
43
  ],
44
  "source": [
45
+ "detailed_games_df = pd.read_csv(\n",
46
+ " os.path.join(DATA_DIR, \"AllSuperDetailedGames.csv\"),\n",
47
+ ")\n",
48
  "\n",
49
+ "detailed_games_df.info()"
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
50
  ]
51
  },
52
  {
53
  "cell_type": "code",
54
+ "execution_count": null,
55
  "metadata": {},
56
  "outputs": [],
57
+ "source": []
 
 
 
 
 
 
 
58
  },
59
  {
60
  "cell_type": "markdown",
 
65
  },
66
  {
67
  "cell_type": "code",
68
+ "execution_count": 3,
69
  "metadata": {},
70
  "outputs": [],
71
  "source": [
72
+ "mens_games_df = detailed_games_df[detailed_games_df[\"League\"] == \"M\"]\n",
73
+ "wmns_games_df = detailed_games_df[detailed_games_df[\"League\"] == \"W\"]"
74
  ]
75
  },
76
  {
77
  "cell_type": "code",
78
+ "execution_count": 4,
79
  "metadata": {},
80
  "outputs": [],
81
  "source": [
82
  "# define the features and target for our models\n",
83
  "feature_cols = [\n",
84
+ " \"ScoreDiff mean reg\",\n",
85
+ " \"FGMDiff mean reg\",\n",
86
+ " \"FGM3Diff mean reg\",\n",
87
+ " \"TODiff mean reg\",\n",
88
  "]\n",
89
  "\n",
90
+ "target_cols = [\"Win\"]"
91
  ]
92
  },
93
  {
94
  "cell_type": "code",
95
+ "execution_count": 5,
96
  "metadata": {},
97
  "outputs": [],
98
  "source": [
99
  "# split into training and testing datasets\n",
100
  "MX_train, MX_test, My_train, My_test = train_test_split(\n",
101
+ " mens_games_df[feature_cols],\n",
102
+ " mens_games_df[target_cols],\n",
103
  " test_size=0.2,\n",
104
  " random_state=1,\n",
105
  ")\n",
106
  "\n",
107
  "# same for womens\n",
108
  "WX_train, WX_test, Wy_train, Wy_test = train_test_split(\n",
109
+ " wmns_games_df[feature_cols],\n",
110
+ " wmns_games_df[target_cols],\n",
111
  " test_size=0.2,\n",
112
  " random_state=1,\n",
113
  ")"
114
  ]
115
  },
116
+ {
117
+ "cell_type": "code",
118
+ "execution_count": 6,
119
+ "metadata": {},
120
+ "outputs": [],
121
+ "source": [
122
+ "# convert data to tensor objects and register to device\n",
123
+ "DEVICE = \"cuda\" if torch.cuda.is_available() else \"cpu\"\n",
124
+ "\n",
125
+ "MX_train_T = torch.tensor(\n",
126
+ " MX_train.values,\n",
127
+ " dtype=float,\n",
128
+ ").to(DEVICE)\n",
129
+ "\n",
130
+ "MX_test_T = torch.tensor(\n",
131
+ " MX_test.values,\n",
132
+ " dtype=float,\n",
133
+ ").to(DEVICE)\n",
134
+ "\n",
135
+ "My_train_T = torch.tensor(\n",
136
+ " My_train.values,\n",
137
+ " dtype=float,\n",
138
+ ").to(DEVICE)\n",
139
+ "\n",
140
+ "My_test_T = torch.tensor(\n",
141
+ " My_test.values,\n",
142
+ " dtype=float,\n",
143
+ ").to(DEVICE)\n",
144
+ "\n",
145
+ "# same for womens data\n",
146
+ "Wy_test_T = torch.tensor(\n",
147
+ " Wy_test.values,\n",
148
+ " dtype=float,\n",
149
+ ").to(DEVICE)\n",
150
+ "\n",
151
+ "Wy_test_T = torch.tensor(\n",
152
+ " Wy_test.values,\n",
153
+ " dtype=float,\n",
154
+ ").to(DEVICE)\n",
155
+ "\n",
156
+ "Wy_test_T = torch.tensor(\n",
157
+ " My_test.values,\n",
158
+ " dtype=float,\n",
159
+ ").to(DEVICE)\n",
160
+ "\n",
161
+ "Wy_test_T = torch.tensor(\n",
162
+ " Wy_test.values,\n",
163
+ " dtype=float,\n",
164
+ ").to(DEVICE)"
165
+ ]
166
+ },
167
+ {
168
+ "cell_type": "markdown",
169
+ "metadata": {},
170
+ "source": [
171
+ "# Generic Neural Network Framework\n",
172
+ "\n",
173
+ "I am using the same neural network structure for both the mens and womens data"
174
+ ]
175
+ },
176
+ {
177
+ "cell_type": "code",
178
+ "execution_count": 7,
179
+ "metadata": {},
180
+ "outputs": [],
181
+ "source": [
182
+ "\n",
183
+ "class Nigl(nn.Module):\n",
184
+ " def __init__(self):\n",
185
+ " super().__init__()\n"
186
+ ]
187
+ },
188
  {
189
  "cell_type": "code",
190
  "execution_count": null,
src/pre_processing.ipynb CHANGED
@@ -666,15 +666,15 @@
666
  " <th>TeamFGM3</th>\n",
667
  " <th>...</th>\n",
668
  " <th>TODiff</th>\n",
 
669
  " <th>FTMDiff</th>\n",
670
- " <th>DRDiff</th>\n",
 
671
  " <th>FGADiff</th>\n",
672
- " <th>FGMDiff</th>\n",
673
- " <th>StlDiff</th>\n",
674
- " <th>AstDiff</th>\n",
675
  " <th>ORDiff</th>\n",
676
  " <th>BlkDiff</th>\n",
677
  " <th>ScoreDiff</th>\n",
 
678
  " </tr>\n",
679
  " </thead>\n",
680
  " <tbody>\n",
@@ -693,14 +693,14 @@
693
  " <td>...</td>\n",
694
  " <td>7</td>\n",
695
  " <td>-11</td>\n",
696
- " <td>-4</td>\n",
 
 
697
  " <td>-12</td>\n",
698
- " <td>-7</td>\n",
699
- " <td>-7</td>\n",
700
- " <td>-1</td>\n",
701
  " <td>-11</td>\n",
702
  " <td>1</td>\n",
703
  " <td>-28</td>\n",
 
704
  " </tr>\n",
705
  " <tr>\n",
706
  " <th>100732</th>\n",
@@ -716,15 +716,15 @@
716
  " <td>8</td>\n",
717
  " <td>...</td>\n",
718
  " <td>-6</td>\n",
 
719
  " <td>16</td>\n",
720
- " <td>1</td>\n",
 
721
  " <td>-4</td>\n",
722
  " <td>-1</td>\n",
723
- " <td>4</td>\n",
724
- " <td>11</td>\n",
725
- " <td>-1</td>\n",
726
  " <td>2</td>\n",
727
  " <td>12</td>\n",
 
728
  " </tr>\n",
729
  " <tr>\n",
730
  " <th>83150</th>\n",
@@ -740,15 +740,15 @@
740
  " <td>8</td>\n",
741
  " <td>...</td>\n",
742
  " <td>1</td>\n",
 
743
  " <td>14</td>\n",
744
- " <td>13</td>\n",
745
- " <td>-6</td>\n",
746
- " <td>-1</td>\n",
747
- " <td>-5</td>\n",
748
  " <td>1</td>\n",
 
 
749
  " <td>4</td>\n",
750
  " <td>2</td>\n",
751
  " <td>13</td>\n",
 
752
  " </tr>\n",
753
  " <tr>\n",
754
  " <th>345009</th>\n",
@@ -764,15 +764,15 @@
764
  " <td>5</td>\n",
765
  " <td>...</td>\n",
766
  " <td>-5</td>\n",
 
767
  " <td>-8</td>\n",
768
- " <td>-7</td>\n",
 
769
  " <td>13</td>\n",
770
- " <td>1</td>\n",
771
- " <td>-3</td>\n",
772
- " <td>4</td>\n",
773
  " <td>2</td>\n",
774
  " <td>-3</td>\n",
775
  " <td>-7</td>\n",
 
776
  " </tr>\n",
777
  " <tr>\n",
778
  " <th>318707</th>\n",
@@ -788,15 +788,15 @@
788
  " <td>3</td>\n",
789
  " <td>...</td>\n",
790
  " <td>4</td>\n",
 
791
  " <td>-9</td>\n",
792
  " <td>-3</td>\n",
793
- " <td>3</td>\n",
794
- " <td>-3</td>\n",
795
- " <td>-7</td>\n",
796
  " <td>2</td>\n",
 
797
  " <td>2</td>\n",
798
  " <td>1</td>\n",
799
  " <td>-18</td>\n",
 
800
  " </tr>\n",
801
  " </tbody>\n",
802
  "</table>\n",
@@ -811,19 +811,19 @@
811
  "345009 2019 4 3435 58 65 H 0 19 \n",
812
  "318707 2013 128 3322 45 63 N 0 20 \n",
813
  "\n",
814
- " TeamFGA TeamFGM3 ... TODiff FTMDiff DRDiff FGADiff FGMDiff \\\n",
815
- "337067 55 3 ... 7 -11 -4 -12 -7 \n",
816
- "100732 60 8 ... -6 16 1 -4 -1 \n",
817
- "83150 58 8 ... 1 14 13 -6 -1 \n",
818
- "345009 55 5 ... -5 -8 -7 13 1 \n",
819
- "318707 51 3 ... 4 -9 -3 3 -3 \n",
820
  "\n",
821
- " StlDiff AstDiff ORDiff BlkDiff ScoreDiff \n",
822
- "337067 -7 -1 -11 1 -28 \n",
823
- "100732 4 11 -1 2 12 \n",
824
- "83150 -5 1 4 2 13 \n",
825
- "345009 -3 4 2 -3 -7 \n",
826
- "318707 -7 2 2 1 -18 \n",
827
  "\n",
828
  "[5 rows x 48 columns]"
829
  ]
@@ -940,11 +940,11 @@
940
  " <th>OppScore min</th>\n",
941
  " <th>OppScore max</th>\n",
942
  " <th>...</th>\n",
943
- " <th>ScoreDiff min</th>\n",
944
- " <th>ScoreDiff max</th>\n",
945
- " <th>ScoreDiff std</th>\n",
946
- " <th>ScoreDiff median</th>\n",
947
- " <th>ScoreDiff mean</th>\n",
948
  " <th>Win min</th>\n",
949
  " <th>Win max</th>\n",
950
  " <th>Win std</th>\n",
@@ -966,11 +966,11 @@
966
  " <td>36</td>\n",
967
  " <td>85</td>\n",
968
  " <td>...</td>\n",
969
- " <td>-32</td>\n",
970
- " <td>35</td>\n",
971
- " <td>16.997102</td>\n",
972
- " <td>-1.0</td>\n",
973
- " <td>-2.517241</td>\n",
974
  " <td>0</td>\n",
975
  " <td>1</td>\n",
976
  " <td>0.508548</td>\n",
@@ -990,11 +990,11 @@
990
  " <td>44</td>\n",
991
  " <td>97</td>\n",
992
  " <td>...</td>\n",
993
- " <td>-49</td>\n",
994
- " <td>29</td>\n",
995
- " <td>14.772645</td>\n",
996
- " <td>-5.0</td>\n",
997
- " <td>-5.100000</td>\n",
998
  " <td>0</td>\n",
999
  " <td>1</td>\n",
1000
  " <td>0.479463</td>\n",
@@ -1014,11 +1014,11 @@
1014
  " <td>61</td>\n",
1015
  " <td>103</td>\n",
1016
  " <td>...</td>\n",
1017
- " <td>-27</td>\n",
1018
- " <td>18</td>\n",
1019
- " <td>12.316786</td>\n",
1020
- " <td>-2.0</td>\n",
1021
- " <td>-2.645161</td>\n",
1022
  " <td>0</td>\n",
1023
  " <td>1</td>\n",
1024
  " <td>0.508001</td>\n",
@@ -1038,11 +1038,11 @@
1038
  " <td>47</td>\n",
1039
  " <td>81</td>\n",
1040
  " <td>...</td>\n",
1041
- " <td>-17</td>\n",
1042
- " <td>34</td>\n",
1043
- " <td>13.022891</td>\n",
1044
- " <td>11.0</td>\n",
1045
- " <td>10.935484</td>\n",
1046
  " <td>0</td>\n",
1047
  " <td>1</td>\n",
1048
  " <td>0.401610</td>\n",
@@ -1062,11 +1062,11 @@
1062
  " <td>37</td>\n",
1063
  " <td>89</td>\n",
1064
  " <td>...</td>\n",
1065
- " <td>-20</td>\n",
1066
- " <td>47</td>\n",
1067
- " <td>17.828682</td>\n",
1068
- " <td>10.0</td>\n",
1069
- " <td>11.593750</td>\n",
1070
  " <td>0</td>\n",
1071
  " <td>1</td>\n",
1072
  " <td>0.456803</td>\n",
@@ -1086,11 +1086,11 @@
1086
  " <td>45</td>\n",
1087
  " <td>106</td>\n",
1088
  " <td>...</td>\n",
1089
- " <td>-62</td>\n",
1090
- " <td>18</td>\n",
1091
- " <td>14.365582</td>\n",
1092
- " <td>-7.0</td>\n",
1093
- " <td>-5.781250</td>\n",
1094
  " <td>0</td>\n",
1095
  " <td>1</td>\n",
1096
  " <td>0.470929</td>\n",
@@ -1110,11 +1110,11 @@
1110
  " <td>59</td>\n",
1111
  " <td>107</td>\n",
1112
  " <td>...</td>\n",
1113
- " <td>-45</td>\n",
1114
- " <td>27</td>\n",
1115
- " <td>14.449736</td>\n",
1116
- " <td>-3.0</td>\n",
1117
- " <td>-1.888889</td>\n",
1118
  " <td>0</td>\n",
1119
  " <td>1</td>\n",
1120
  " <td>0.492103</td>\n",
@@ -1134,11 +1134,11 @@
1134
  " <td>41</td>\n",
1135
  " <td>117</td>\n",
1136
  " <td>...</td>\n",
1137
- " <td>-93</td>\n",
1138
- " <td>24</td>\n",
1139
- " <td>27.245445</td>\n",
1140
- " <td>-15.0</td>\n",
1141
- " <td>-14.285714</td>\n",
1142
  " <td>0</td>\n",
1143
  " <td>1</td>\n",
1144
  " <td>0.462910</td>\n",
@@ -1158,11 +1158,11 @@
1158
  " <td>45</td>\n",
1159
  " <td>100</td>\n",
1160
  " <td>...</td>\n",
1161
- " <td>-42</td>\n",
1162
- " <td>17</td>\n",
1163
- " <td>13.277095</td>\n",
1164
- " <td>-2.0</td>\n",
1165
- " <td>-4.093750</td>\n",
1166
  " <td>0</td>\n",
1167
  " <td>1</td>\n",
1168
  " <td>0.504016</td>\n",
@@ -1182,11 +1182,11 @@
1182
  " <td>21</td>\n",
1183
  " <td>79</td>\n",
1184
  " <td>...</td>\n",
1185
- " <td>-23</td>\n",
1186
- " <td>57</td>\n",
1187
- " <td>18.777131</td>\n",
1188
- " <td>13.5</td>\n",
1189
- " <td>13.500000</td>\n",
1190
  " <td>0</td>\n",
1191
  " <td>1</td>\n",
1192
  " <td>0.456803</td>\n",
@@ -1223,29 +1223,29 @@
1223
  "9567 62.5 63.593750 45 100 ... \n",
1224
  "12617 65.0 65.750000 21 79 ... \n",
1225
  "\n",
1226
- " ScoreDiff min ScoreDiff max ScoreDiff std ScoreDiff median \\\n",
1227
- "12348 -32 35 16.997102 -1.0 \n",
1228
- "6900 -49 29 14.772645 -5.0 \n",
1229
- "4406 -27 18 12.316786 -2.0 \n",
1230
- "4233 -17 34 13.022891 11.0 \n",
1231
- "3407 -20 47 17.828682 10.0 \n",
1232
- "5190 -62 18 14.365582 -7.0 \n",
1233
- "1892 -45 27 14.449736 -3.0 \n",
1234
- "10020 -93 24 27.245445 -15.0 \n",
1235
- "9567 -42 17 13.277095 -2.0 \n",
1236
- "12617 -23 57 18.777131 13.5 \n",
1237
  "\n",
1238
- " ScoreDiff mean Win min Win max Win std Win median Win mean \n",
1239
- "12348 -2.517241 0 1 0.508548 0.0 0.482759 \n",
1240
- "6900 -5.100000 0 1 0.479463 0.0 0.333333 \n",
1241
- "4406 -2.645161 0 1 0.508001 0.0 0.483871 \n",
1242
- "4233 10.935484 0 1 0.401610 1.0 0.806452 \n",
1243
- "3407 11.593750 0 1 0.456803 1.0 0.718750 \n",
1244
- "5190 -5.781250 0 1 0.470929 0.0 0.312500 \n",
1245
- "1892 -1.888889 0 1 0.492103 0.0 0.370370 \n",
1246
- "10020 -14.285714 0 1 0.462910 0.0 0.285714 \n",
1247
- "9567 -4.093750 0 1 0.504016 0.0 0.437500 \n",
1248
- "12617 13.500000 0 1 0.456803 1.0 0.718750 \n",
1249
  "\n",
1250
  "[10 rows x 218 columns]"
1251
  ]
@@ -1304,11 +1304,11 @@
1304
  " <th>OppScore min</th>\n",
1305
  " <th>OppScore max</th>\n",
1306
  " <th>...</th>\n",
1307
- " <th>ScoreDiff min</th>\n",
1308
- " <th>ScoreDiff max</th>\n",
1309
- " <th>ScoreDiff std</th>\n",
1310
- " <th>ScoreDiff median</th>\n",
1311
- " <th>ScoreDiff mean</th>\n",
1312
  " <th>Win min</th>\n",
1313
  " <th>Win max</th>\n",
1314
  " <th>Win std</th>\n",
@@ -1330,11 +1330,11 @@
1330
  " <td>53</td>\n",
1331
  " <td>82</td>\n",
1332
  " <td>...</td>\n",
1333
- " <td>-20</td>\n",
1334
- " <td>24</td>\n",
1335
- " <td>22.007574</td>\n",
1336
- " <td>1.0</td>\n",
1337
- " <td>1.666667</td>\n",
1338
  " <td>0</td>\n",
1339
  " <td>1</td>\n",
1340
  " <td>0.577350</td>\n",
@@ -1354,11 +1354,11 @@
1354
  " <td>71</td>\n",
1355
  " <td>71</td>\n",
1356
  " <td>...</td>\n",
1357
- " <td>-8</td>\n",
1358
- " <td>-8</td>\n",
1359
  " <td>NaN</td>\n",
1360
- " <td>-8.0</td>\n",
1361
- " <td>-8.000000</td>\n",
1362
  " <td>0</td>\n",
1363
  " <td>0</td>\n",
1364
  " <td>NaN</td>\n",
@@ -1378,11 +1378,11 @@
1378
  " <td>64</td>\n",
1379
  " <td>64</td>\n",
1380
  " <td>...</td>\n",
1381
- " <td>-1</td>\n",
1382
- " <td>-1</td>\n",
1383
  " <td>NaN</td>\n",
1384
- " <td>-1.0</td>\n",
1385
- " <td>-1.000000</td>\n",
1386
  " <td>0</td>\n",
1387
  " <td>0</td>\n",
1388
  " <td>NaN</td>\n",
@@ -1402,11 +1402,11 @@
1402
  " <td>72</td>\n",
1403
  " <td>79</td>\n",
1404
  " <td>...</td>\n",
1405
- " <td>-7</td>\n",
1406
- " <td>2</td>\n",
1407
- " <td>6.363961</td>\n",
1408
- " <td>-2.5</td>\n",
1409
- " <td>-2.500000</td>\n",
1410
  " <td>0</td>\n",
1411
  " <td>1</td>\n",
1412
  " <td>0.707107</td>\n",
@@ -1426,11 +1426,11 @@
1426
  " <td>79</td>\n",
1427
  " <td>79</td>\n",
1428
  " <td>...</td>\n",
1429
- " <td>-14</td>\n",
1430
- " <td>-14</td>\n",
1431
  " <td>NaN</td>\n",
1432
- " <td>-14.0</td>\n",
1433
- " <td>-14.000000</td>\n",
1434
  " <td>0</td>\n",
1435
  " <td>0</td>\n",
1436
  " <td>NaN</td>\n",
@@ -1450,11 +1450,11 @@
1450
  " <td>56</td>\n",
1451
  " <td>88</td>\n",
1452
  " <td>...</td>\n",
1453
- " <td>-19</td>\n",
1454
- " <td>19</td>\n",
1455
- " <td>18.025445</td>\n",
1456
- " <td>15.5</td>\n",
1457
- " <td>7.750000</td>\n",
1458
  " <td>0</td>\n",
1459
  " <td>1</td>\n",
1460
  " <td>0.500000</td>\n",
@@ -1474,11 +1474,11 @@
1474
  " <td>67</td>\n",
1475
  " <td>75</td>\n",
1476
  " <td>...</td>\n",
1477
- " <td>-3</td>\n",
1478
- " <td>17</td>\n",
1479
- " <td>14.142136</td>\n",
1480
- " <td>7.0</td>\n",
1481
- " <td>7.000000</td>\n",
1482
  " <td>0</td>\n",
1483
  " <td>1</td>\n",
1484
  " <td>0.707107</td>\n",
@@ -1522,11 +1522,11 @@
1522
  " <td>72</td>\n",
1523
  " <td>72</td>\n",
1524
  " <td>...</td>\n",
1525
- " <td>-9</td>\n",
1526
- " <td>-9</td>\n",
1527
  " <td>NaN</td>\n",
1528
- " <td>-9.0</td>\n",
1529
- " <td>-9.000000</td>\n",
1530
  " <td>0</td>\n",
1531
  " <td>0</td>\n",
1532
  " <td>NaN</td>\n",
@@ -1546,10 +1546,10 @@
1546
  " <td>60</td>\n",
1547
  " <td>88</td>\n",
1548
  " <td>...</td>\n",
1549
- " <td>-17</td>\n",
1550
- " <td>8</td>\n",
1551
- " <td>12.897028</td>\n",
1552
- " <td>1.0</td>\n",
1553
  " <td>-2.666667</td>\n",
1554
  " <td>0</td>\n",
1555
  " <td>1</td>\n",
@@ -1587,29 +1587,29 @@
1587
  "697 63.0 63.000000 72 72 ... \n",
1588
  "763 70.0 69.666667 60 88 ... \n",
1589
  "\n",
1590
- " ScoreDiff min ScoreDiff max ScoreDiff std ScoreDiff median \\\n",
1591
- "995 -20 24 22.007574 1.0 \n",
1592
- "1601 -8 -8 NaN -8.0 \n",
1593
- "1805 -1 -1 NaN -1.0 \n",
1594
- "952 -7 2 6.363961 -2.5 \n",
1595
- "924 -14 -14 NaN -14.0 \n",
1596
- "1381 -19 19 18.025445 15.5 \n",
1597
- "1266 -3 17 14.142136 7.0 \n",
1598
- "1810 -3 -3 NaN -3.0 \n",
1599
- "697 -9 -9 NaN -9.0 \n",
1600
- "763 -17 8 12.897028 1.0 \n",
1601
  "\n",
1602
- " ScoreDiff mean Win min Win max Win std Win median Win mean \n",
1603
- "995 1.666667 0 1 0.577350 1.0 0.666667 \n",
1604
- "1601 -8.000000 0 0 NaN 0.0 0.000000 \n",
1605
- "1805 -1.000000 0 0 NaN 0.0 0.000000 \n",
1606
- "952 -2.500000 0 1 0.707107 0.5 0.500000 \n",
1607
- "924 -14.000000 0 0 NaN 0.0 0.000000 \n",
1608
- "1381 7.750000 0 1 0.500000 1.0 0.750000 \n",
1609
- "1266 7.000000 0 1 0.707107 0.5 0.500000 \n",
1610
- "1810 -3.000000 0 0 NaN 0.0 0.000000 \n",
1611
- "697 -9.000000 0 0 NaN 0.0 0.000000 \n",
1612
- "763 -2.666667 0 1 0.577350 1.0 0.666667 \n",
1613
  "\n",
1614
  "[10 rows x 218 columns]"
1615
  ]
@@ -1796,7 +1796,7 @@
1796
  },
1797
  {
1798
  "cell_type": "code",
1799
- "execution_count": 15,
1800
  "metadata": {},
1801
  "outputs": [
1802
  {
@@ -1831,11 +1831,11 @@
1831
  " <th>OppScore min reg</th>\n",
1832
  " <th>OppScore max reg</th>\n",
1833
  " <th>...</th>\n",
1834
- " <th>ScoreDiff min tourney</th>\n",
1835
- " <th>ScoreDiff max tourney</th>\n",
1836
- " <th>ScoreDiff std tourney</th>\n",
1837
- " <th>ScoreDiff median tourney</th>\n",
1838
- " <th>ScoreDiff mean tourney</th>\n",
1839
  " <th>Win min tourney</th>\n",
1840
  " <th>Win max tourney</th>\n",
1841
  " <th>Win std tourney</th>\n",
@@ -1929,11 +1929,11 @@
1929
  " <td>47</td>\n",
1930
  " <td>81</td>\n",
1931
  " <td>...</td>\n",
1932
- " <td>-8.0</td>\n",
1933
- " <td>-8.0</td>\n",
1934
  " <td>NaN</td>\n",
1935
- " <td>-8.0</td>\n",
1936
- " <td>-8.0</td>\n",
1937
  " <td>0.0</td>\n",
1938
  " <td>0.0</td>\n",
1939
  " <td>NaN</td>\n",
@@ -1953,11 +1953,11 @@
1953
  " <td>37</td>\n",
1954
  " <td>89</td>\n",
1955
  " <td>...</td>\n",
1956
- " <td>-1.0</td>\n",
1957
- " <td>8.0</td>\n",
1958
- " <td>6.363961</td>\n",
1959
- " <td>3.5</td>\n",
1960
- " <td>3.5</td>\n",
1961
  " <td>0.0</td>\n",
1962
  " <td>1.0</td>\n",
1963
  " <td>0.707107</td>\n",
@@ -2073,11 +2073,11 @@
2073
  " <td>21</td>\n",
2074
  " <td>79</td>\n",
2075
  " <td>...</td>\n",
2076
- " <td>-14.0</td>\n",
2077
- " <td>6.0</td>\n",
2078
- " <td>14.142136</td>\n",
2079
- " <td>-4.0</td>\n",
2080
- " <td>-4.0</td>\n",
2081
  " <td>0.0</td>\n",
2082
  " <td>1.0</td>\n",
2083
  " <td>0.707107</td>\n",
@@ -2114,41 +2114,41 @@
2114
  "9567 11.319009 62.5 63.593750 \n",
2115
  "12617 12.518374 65.0 65.750000 \n",
2116
  "\n",
2117
- " OppScore min reg OppScore max reg ... ScoreDiff min tourney \\\n",
2118
- "12348 36 85 ... NaN \n",
2119
- "6900 44 97 ... NaN \n",
2120
- "4406 61 103 ... NaN \n",
2121
- "4233 47 81 ... -8.0 \n",
2122
- "3407 37 89 ... -1.0 \n",
2123
- "5190 45 106 ... NaN \n",
2124
- "1892 59 107 ... NaN \n",
2125
- "10020 41 117 ... NaN \n",
2126
- "9567 45 100 ... NaN \n",
2127
- "12617 21 79 ... -14.0 \n",
2128
  "\n",
2129
- " ScoreDiff max tourney ScoreDiff std tourney ScoreDiff median tourney \\\n",
2130
- "12348 NaN NaN NaN \n",
2131
- "6900 NaN NaN NaN \n",
2132
- "4406 NaN NaN NaN \n",
2133
- "4233 -8.0 NaN -8.0 \n",
2134
- "3407 8.0 6.363961 3.5 \n",
2135
- "5190 NaN NaN NaN \n",
2136
- "1892 NaN NaN NaN \n",
2137
- "10020 NaN NaN NaN \n",
2138
- "9567 NaN NaN NaN \n",
2139
- "12617 6.0 14.142136 -4.0 \n",
2140
  "\n",
2141
- " ScoreDiff mean tourney Win min tourney Win max tourney \\\n",
2142
- "12348 NaN NaN NaN \n",
2143
- "6900 NaN NaN NaN \n",
2144
- "4406 NaN NaN NaN \n",
2145
- "4233 -8.0 0.0 0.0 \n",
2146
- "3407 3.5 0.0 1.0 \n",
2147
- "5190 NaN NaN NaN \n",
2148
- "1892 NaN NaN NaN \n",
2149
- "10020 NaN NaN NaN \n",
2150
- "9567 NaN NaN NaN \n",
2151
- "12617 -4.0 0.0 1.0 \n",
2152
  "\n",
2153
  " Win std tourney Win median tourney Win mean tourney \n",
2154
  "12348 NaN NaN NaN \n",
@@ -2165,21 +2165,20 @@
2165
  "[10 rows x 433 columns]"
2166
  ]
2167
  },
2168
- "execution_count": 15,
2169
  "metadata": {},
2170
  "output_type": "execute_result"
2171
  }
2172
  ],
2173
  "source": [
2174
  "# merge the tournament aggregated metrics with the regular season aggregated metrics\n",
2175
- "team_agg_df = (\n",
2176
- " pd.merge(\n",
2177
- " left=team_reg_agg, \n",
2178
- " right=team_tourney_agg, \n",
2179
- " how=\"left\",\n",
2180
- " on=[\"TeamID\", \"Season\", \"League\"], \n",
2181
- " suffixes=(\" reg\", \" tourney\"),\n",
2182
- " )\n",
2183
  ")\n",
2184
  "\n",
2185
  "team_agg_df.sample(10, random_state=1)"
@@ -2187,7 +2186,77 @@
2187
  },
2188
  {
2189
  "cell_type": "code",
2190
- "execution_count": 16,
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
2191
  "metadata": {},
2192
  "outputs": [
2193
  {
@@ -2211,16 +2280,16 @@
2211
  " <thead>\n",
2212
  " <tr style=\"text-align: right;\">\n",
2213
  " <th></th>\n",
2214
- " <th>TeamID</th>\n",
2215
  " <th>Season</th>\n",
2216
- " <th>League</th>\n",
2217
- " <th>TeamScore min reg</th>\n",
2218
- " <th>TeamScore max reg</th>\n",
2219
- " <th>TeamScore std reg</th>\n",
2220
- " <th>TeamScore median reg</th>\n",
2221
- " <th>TeamScore mean reg</th>\n",
2222
- " <th>OppScore min reg</th>\n",
2223
- " <th>OppScore max reg</th>\n",
 
2224
  " <th>...</th>\n",
2225
  " <th>Win max tourney</th>\n",
2226
  " <th>Win std tourney</th>\n",
@@ -2236,65 +2305,89 @@
2236
  " </thead>\n",
2237
  " <tbody>\n",
2238
  " <tr>\n",
2239
- " <th>12348</th>\n",
2240
- " <td>3430</td>\n",
2241
- " <td>2012</td>\n",
2242
- " <td>W</td>\n",
2243
- " <td>41</td>\n",
2244
- " <td>78</td>\n",
2245
- " <td>10.808339</td>\n",
2246
- " <td>61.0</td>\n",
2247
- " <td>58.965517</td>\n",
2248
- " <td>36</td>\n",
2249
- " <td>85</td>\n",
2250
  " <td>...</td>\n",
 
2251
  " <td>NaN</td>\n",
2252
- " <td>NaN</td>\n",
2253
- " <td>NaN</td>\n",
2254
- " <td>NaN</td>\n",
2255
- " <td>NaN</td>\n",
2256
- " <td>NaN</td>\n",
2257
- " <td>NaN</td>\n",
2258
- " <td>NaN</td>\n",
2259
- " <td>NaN</td>\n",
2260
- " <td>NaN</td>\n",
2261
  " </tr>\n",
2262
  " <tr>\n",
2263
- " <th>6900</th>\n",
2264
- " <td>1431</td>\n",
2265
- " <td>2018</td>\n",
2266
- " <td>M</td>\n",
2267
- " <td>33</td>\n",
2268
- " <td>88</td>\n",
2269
- " <td>12.283247</td>\n",
2270
- " <td>67.0</td>\n",
2271
- " <td>66.466667</td>\n",
2272
- " <td>44</td>\n",
2273
- " <td>97</td>\n",
2274
  " <td>...</td>\n",
 
2275
  " <td>NaN</td>\n",
2276
- " <td>NaN</td>\n",
2277
- " <td>NaN</td>\n",
2278
- " <td>NaN</td>\n",
2279
- " <td>NaN</td>\n",
2280
- " <td>NaN</td>\n",
2281
- " <td>NaN</td>\n",
2282
- " <td>NaN</td>\n",
2283
- " <td>NaN</td>\n",
2284
- " <td>NaN</td>\n",
2285
  " </tr>\n",
2286
  " <tr>\n",
2287
- " <th>4406</th>\n",
2288
- " <td>1315</td>\n",
2289
- " <td>2014</td>\n",
2290
- " <td>M</td>\n",
2291
- " <td>43</td>\n",
2292
- " <td>95</td>\n",
2293
- " <td>10.019980</td>\n",
2294
- " <td>72.0</td>\n",
2295
- " <td>73.000000</td>\n",
2296
  " <td>61</td>\n",
2297
- " <td>103</td>\n",
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
2298
  " <td>...</td>\n",
2299
  " <td>NaN</td>\n",
2300
  " <td>NaN</td>\n",
@@ -2308,70 +2401,70 @@
2308
  " <td>NaN</td>\n",
2309
  " </tr>\n",
2310
  " <tr>\n",
2311
- " <th>4233</th>\n",
2312
- " <td>1307</td>\n",
2313
- " <td>2005</td>\n",
2314
- " <td>M</td>\n",
2315
- " <td>53</td>\n",
2316
- " <td>101</td>\n",
2317
- " <td>12.911860</td>\n",
2318
- " <td>77.0</td>\n",
2319
- " <td>75.870968</td>\n",
2320
- " <td>47</td>\n",
2321
- " <td>81</td>\n",
2322
  " <td>...</td>\n",
2323
- " <td>0.0</td>\n",
2324
- " <td>NaN</td>\n",
2325
- " <td>0.0</td>\n",
2326
- " <td>0.0</td>\n",
2327
- " <td>Z12</td>\n",
2328
- " <td>mwc</td>\n",
2329
- " <td>New Mexico</td>\n",
2330
  " <td>1985.0</td>\n",
2331
  " <td>2024.0</td>\n",
2332
- " <td>12.0</td>\n",
2333
  " </tr>\n",
2334
  " <tr>\n",
2335
- " <th>3407</th>\n",
2336
- " <td>1266</td>\n",
2337
- " <td>2008</td>\n",
2338
- " <td>M</td>\n",
2339
- " <td>51</td>\n",
2340
- " <td>100</td>\n",
2341
- " <td>11.841315</td>\n",
2342
- " <td>75.5</td>\n",
2343
- " <td>75.906250</td>\n",
2344
- " <td>37</td>\n",
2345
- " <td>89</td>\n",
 
 
 
 
 
 
 
 
 
 
2346
  " <td>...</td>\n",
2347
- " <td>1.0</td>\n",
2348
- " <td>0.707107</td>\n",
2349
- " <td>0.5</td>\n",
2350
- " <td>0.5</td>\n",
2351
- " <td>Y06</td>\n",
2352
- " <td>big_east</td>\n",
2353
- " <td>Marquette</td>\n",
2354
- " <td>1985.0</td>\n",
2355
- " <td>2024.0</td>\n",
2356
- " <td>6.0</td>\n",
2357
  " </tr>\n",
2358
  " <tr>\n",
2359
- " <th>5190</th>\n",
2360
- " <td>1352</td>\n",
2361
- " <td>2016</td>\n",
2362
- " <td>M</td>\n",
2363
- " <td>44</td>\n",
2364
- " <td>89</td>\n",
2365
- " <td>10.298567</td>\n",
2366
- " <td>67.0</td>\n",
2367
- " <td>65.062500</td>\n",
2368
- " <td>45</td>\n",
2369
- " <td>106</td>\n",
2370
  " <td>...</td>\n",
2371
- " <td>NaN</td>\n",
2372
- " <td>NaN</td>\n",
2373
- " <td>NaN</td>\n",
2374
- " <td>NaN</td>\n",
2375
  " <td>NaN</td>\n",
2376
  " <td>NaN</td>\n",
2377
  " <td>NaN</td>\n",
@@ -2380,22 +2473,22 @@
2380
  " <td>NaN</td>\n",
2381
  " </tr>\n",
2382
  " <tr>\n",
2383
- " <th>1892</th>\n",
2384
- " <td>1194</td>\n",
2385
- " <td>2005</td>\n",
2386
- " <td>M</td>\n",
2387
- " <td>45</td>\n",
2388
- " <td>104</td>\n",
2389
- " <td>14.194618</td>\n",
2390
- " <td>76.0</td>\n",
2391
- " <td>76.777778</td>\n",
2392
- " <td>59</td>\n",
2393
- " <td>107</td>\n",
2394
  " <td>...</td>\n",
2395
- " <td>NaN</td>\n",
2396
- " <td>NaN</td>\n",
2397
- " <td>NaN</td>\n",
2398
- " <td>NaN</td>\n",
2399
  " <td>NaN</td>\n",
2400
  " <td>NaN</td>\n",
2401
  " <td>NaN</td>\n",
@@ -2404,22 +2497,22 @@
2404
  " <td>NaN</td>\n",
2405
  " </tr>\n",
2406
  " <tr>\n",
2407
- " <th>10020</th>\n",
2408
- " <td>3270</td>\n",
2409
- " <td>2021</td>\n",
2410
- " <td>W</td>\n",
2411
- " <td>24</td>\n",
2412
- " <td>80</td>\n",
2413
- " <td>13.385137</td>\n",
2414
- " <td>53.0</td>\n",
2415
- " <td>55.476190</td>\n",
2416
- " <td>41</td>\n",
2417
- " <td>117</td>\n",
2418
  " <td>...</td>\n",
2419
- " <td>NaN</td>\n",
2420
- " <td>NaN</td>\n",
2421
- " <td>NaN</td>\n",
2422
- " <td>NaN</td>\n",
2423
  " <td>NaN</td>\n",
2424
  " <td>NaN</td>\n",
2425
  " <td>NaN</td>\n",
@@ -2428,22 +2521,22 @@
2428
  " <td>NaN</td>\n",
2429
  " </tr>\n",
2430
  " <tr>\n",
2431
- " <th>9567</th>\n",
2432
- " <td>3240</td>\n",
2433
- " <td>2014</td>\n",
2434
- " <td>W</td>\n",
2435
- " <td>43</td>\n",
2436
- " <td>84</td>\n",
2437
- " <td>11.319009</td>\n",
2438
- " <td>62.5</td>\n",
2439
- " <td>63.593750</td>\n",
2440
- " <td>45</td>\n",
2441
- " <td>100</td>\n",
2442
  " <td>...</td>\n",
2443
- " <td>NaN</td>\n",
2444
- " <td>NaN</td>\n",
2445
- " <td>NaN</td>\n",
2446
- " <td>NaN</td>\n",
2447
  " <td>NaN</td>\n",
2448
  " <td>NaN</td>\n",
2449
  " <td>NaN</td>\n",
@@ -2452,22 +2545,22 @@
2452
  " <td>NaN</td>\n",
2453
  " </tr>\n",
2454
  " <tr>\n",
2455
- " <th>12617</th>\n",
2456
- " <td>3452</td>\n",
2457
- " <td>2011</td>\n",
2458
- " <td>W</td>\n",
2459
- " <td>39</td>\n",
2460
- " <td>90</td>\n",
2461
- " <td>12.518374</td>\n",
2462
- " <td>65.0</td>\n",
2463
- " <td>65.750000</td>\n",
2464
- " <td>21</td>\n",
2465
- " <td>79</td>\n",
2466
  " <td>...</td>\n",
2467
  " <td>1.0</td>\n",
2468
- " <td>0.707107</td>\n",
2469
- " <td>0.5</td>\n",
2470
- " <td>0.5</td>\n",
2471
  " <td>NaN</td>\n",
2472
  " <td>NaN</td>\n",
2473
  " <td>NaN</td>\n",
@@ -2477,140 +2570,72 @@
2477
  " </tr>\n",
2478
  " </tbody>\n",
2479
  "</table>\n",
2480
- "<p>10 rows × 439 columns</p>\n",
2481
  "</div>"
2482
  ],
2483
  "text/plain": [
2484
- " TeamID Season League TeamScore min reg TeamScore max reg \\\n",
2485
- "12348 3430 2012 W 41 78 \n",
2486
- "6900 1431 2018 M 33 88 \n",
2487
- "4406 1315 2014 M 43 95 \n",
2488
- "4233 1307 2005 M 53 101 \n",
2489
- "3407 1266 2008 M 51 100 \n",
2490
- "5190 1352 2016 M 44 89 \n",
2491
- "1892 1194 2005 M 45 104 \n",
2492
- "10020 3270 2021 W 24 80 \n",
2493
- "9567 3240 2014 W 43 84 \n",
2494
- "12617 3452 2011 W 39 90 \n",
2495
- "\n",
2496
- " TeamScore std reg TeamScore median reg TeamScore mean reg \\\n",
2497
- "12348 10.808339 61.0 58.965517 \n",
2498
- "6900 12.283247 67.0 66.466667 \n",
2499
- "4406 10.019980 72.0 73.000000 \n",
2500
- "4233 12.911860 77.0 75.870968 \n",
2501
- "3407 11.841315 75.5 75.906250 \n",
2502
- "5190 10.298567 67.0 65.062500 \n",
2503
- "1892 14.194618 76.0 76.777778 \n",
2504
- "10020 13.385137 53.0 55.476190 \n",
2505
- "9567 11.319009 62.5 63.593750 \n",
2506
- "12617 12.518374 65.0 65.750000 \n",
2507
  "\n",
2508
- " OppScore min reg OppScore max reg ... Win max tourney \\\n",
2509
- "12348 36 85 ... NaN \n",
2510
- "6900 44 97 ... NaN \n",
2511
- "4406 61 103 ... NaN \n",
2512
- "4233 47 81 ... 0.0 \n",
2513
- "3407 37 89 ... 1.0 \n",
2514
- "5190 45 106 ... NaN \n",
2515
- "1892 59 107 ... NaN \n",
2516
- "10020 41 117 ... NaN \n",
2517
- "9567 45 100 ... NaN \n",
2518
- "12617 21 79 ... 1.0 \n",
 
2519
  "\n",
2520
- " Win std tourney Win median tourney Win mean tourney Seed \\\n",
2521
- "12348 NaN NaN NaN NaN \n",
2522
- "6900 NaN NaN NaN NaN \n",
2523
- "4406 NaN NaN NaN NaN \n",
2524
- "4233 NaN 0.0 0.0 Z12 \n",
2525
- "3407 0.707107 0.5 0.5 Y06 \n",
2526
- "5190 NaN NaN NaN NaN \n",
2527
- "1892 NaN NaN NaN NaN \n",
2528
- "10020 NaN NaN NaN NaN \n",
2529
- "9567 NaN NaN NaN NaN \n",
2530
- "12617 0.707107 0.5 0.5 NaN \n",
 
2531
  "\n",
2532
- " ConfAbbrev TeamName FirstD1Season LastD1Season ChalkSeed \n",
2533
- "12348 NaN NaN NaN NaN NaN \n",
2534
- "6900 NaN NaN NaN NaN NaN \n",
2535
- "4406 NaN NaN NaN NaN NaN \n",
2536
- "4233 mwc New Mexico 1985.0 2024.0 12.0 \n",
2537
- "3407 big_east Marquette 1985.0 2024.0 6.0 \n",
2538
- "5190 NaN NaN NaN NaN NaN \n",
2539
- "1892 NaN NaN NaN NaN NaN \n",
2540
- "10020 NaN NaN NaN NaN NaN \n",
2541
- "9567 NaN NaN NaN NaN NaN \n",
2542
- "12617 NaN NaN NaN NaN NaN \n",
 
2543
  "\n",
2544
- "[10 rows x 439 columns]"
2545
  ]
2546
  },
2547
- "execution_count": 16,
2548
  "metadata": {},
2549
  "output_type": "execute_result"
2550
  }
2551
  ],
2552
  "source": [
2553
- "# merge the team_conf_seeds_df with team attributes into the aggregated data\n",
2554
- "\n",
2555
- "team_agg_df = pd.merge(\n",
2556
- " left=team_agg_df,\n",
2557
- " right=team_conf_seeds_df[team_conf_seeds_df[\"Season\"] >= 2003],\n",
2558
- " how=\"left\",\n",
2559
- " on=[\"TeamID\", \"Season\", \"League\"],\n",
2560
- " suffixes=(\" Team\", \" Opp\")\n",
2561
- ")\n",
2562
- "\n",
2563
- "team_agg_df.sample(10, random_state=1)"
2564
- ]
2565
- },
2566
- {
2567
- "cell_type": "code",
2568
- "execution_count": 17,
2569
- "metadata": {},
2570
- "outputs": [
2571
- {
2572
- "name": "stdout",
2573
- "output_type": "stream",
2574
- "text": [
2575
- "<class 'pandas.core.frame.DataFrame'>\n",
2576
- "Int64Index: 12857 entries, 0 to 12856\n",
2577
- "Columns: 439 entries, TeamID to ChalkSeed\n",
2578
- "dtypes: float64(347), int64(88), object(4)\n",
2579
- "memory usage: 43.2+ MB\n"
2580
- ]
2581
- }
2582
- ],
2583
- "source": [
2584
- "team_agg_df.info()"
2585
- ]
2586
- },
2587
- {
2588
- "cell_type": "code",
2589
- "execution_count": 18,
2590
- "metadata": {},
2591
- "outputs": [
2592
- {
2593
- "name": "stdout",
2594
- "output_type": "stream",
2595
- "text": [
2596
- "<class 'pandas.core.frame.DataFrame'>\n",
2597
- "Int64Index: 377608 entries, 0 to 377607\n",
2598
- "Columns: 486 entries, Season to ChalkSeed\n",
2599
- "dtypes: float64(347), int64(132), object(7)\n",
2600
- "memory usage: 1.4+ GB\n"
2601
- ]
2602
- }
2603
- ],
2604
- "source": [
2605
- "# re merge the aggregated team stats to the games dataset\n",
2606
- "\n",
2607
- "super_detailed_games_df = pd.merge(\n",
2608
- " left=all_detailed_games_df[all_detailed_games_df[\"Season\"] >= 2003],\n",
2609
- " right=team_agg_df,\n",
2610
- " on=[\"TeamID\", \"Season\", \"League\"],\n",
2611
- ")\n",
2612
- "\n",
2613
- "super_detailed_games_df.info()"
2614
  ]
2615
  },
2616
  {
 
666
  " <th>TeamFGM3</th>\n",
667
  " <th>...</th>\n",
668
  " <th>TODiff</th>\n",
669
+ " <th>FTADiff</th>\n",
670
  " <th>FTMDiff</th>\n",
671
+ " <th>FGM3Diff</th>\n",
672
+ " <th>PFDiff</th>\n",
673
  " <th>FGADiff</th>\n",
 
 
 
674
  " <th>ORDiff</th>\n",
675
  " <th>BlkDiff</th>\n",
676
  " <th>ScoreDiff</th>\n",
677
+ " <th>FGMDiff</th>\n",
678
  " </tr>\n",
679
  " </thead>\n",
680
  " <tbody>\n",
 
693
  " <td>...</td>\n",
694
  " <td>7</td>\n",
695
  " <td>-11</td>\n",
696
+ " <td>-11</td>\n",
697
+ " <td>-3</td>\n",
698
+ " <td>9</td>\n",
699
  " <td>-12</td>\n",
 
 
 
700
  " <td>-11</td>\n",
701
  " <td>1</td>\n",
702
  " <td>-28</td>\n",
703
+ " <td>-7</td>\n",
704
  " </tr>\n",
705
  " <tr>\n",
706
  " <th>100732</th>\n",
 
716
  " <td>8</td>\n",
717
  " <td>...</td>\n",
718
  " <td>-6</td>\n",
719
+ " <td>17</td>\n",
720
  " <td>16</td>\n",
721
+ " <td>-2</td>\n",
722
+ " <td>-9</td>\n",
723
  " <td>-4</td>\n",
724
  " <td>-1</td>\n",
 
 
 
725
  " <td>2</td>\n",
726
  " <td>12</td>\n",
727
+ " <td>-1</td>\n",
728
  " </tr>\n",
729
  " <tr>\n",
730
  " <th>83150</th>\n",
 
740
  " <td>8</td>\n",
741
  " <td>...</td>\n",
742
  " <td>1</td>\n",
743
+ " <td>10</td>\n",
744
  " <td>14</td>\n",
 
 
 
 
745
  " <td>1</td>\n",
746
+ " <td>-5</td>\n",
747
+ " <td>-6</td>\n",
748
  " <td>4</td>\n",
749
  " <td>2</td>\n",
750
  " <td>13</td>\n",
751
+ " <td>-1</td>\n",
752
  " </tr>\n",
753
  " <tr>\n",
754
  " <th>345009</th>\n",
 
764
  " <td>5</td>\n",
765
  " <td>...</td>\n",
766
  " <td>-5</td>\n",
767
+ " <td>-11</td>\n",
768
  " <td>-8</td>\n",
769
+ " <td>-1</td>\n",
770
+ " <td>7</td>\n",
771
  " <td>13</td>\n",
 
 
 
772
  " <td>2</td>\n",
773
  " <td>-3</td>\n",
774
  " <td>-7</td>\n",
775
+ " <td>1</td>\n",
776
  " </tr>\n",
777
  " <tr>\n",
778
  " <th>318707</th>\n",
 
788
  " <td>3</td>\n",
789
  " <td>...</td>\n",
790
  " <td>4</td>\n",
791
+ " <td>-11</td>\n",
792
  " <td>-9</td>\n",
793
  " <td>-3</td>\n",
 
 
 
794
  " <td>2</td>\n",
795
+ " <td>3</td>\n",
796
  " <td>2</td>\n",
797
  " <td>1</td>\n",
798
  " <td>-18</td>\n",
799
+ " <td>-3</td>\n",
800
  " </tr>\n",
801
  " </tbody>\n",
802
  "</table>\n",
 
811
  "345009 2019 4 3435 58 65 H 0 19 \n",
812
  "318707 2013 128 3322 45 63 N 0 20 \n",
813
  "\n",
814
+ " TeamFGA TeamFGM3 ... TODiff FTADiff FTMDiff FGM3Diff PFDiff \\\n",
815
+ "337067 55 3 ... 7 -11 -11 -3 9 \n",
816
+ "100732 60 8 ... -6 17 16 -2 -9 \n",
817
+ "83150 58 8 ... 1 10 14 1 -5 \n",
818
+ "345009 55 5 ... -5 -11 -8 -1 7 \n",
819
+ "318707 51 3 ... 4 -11 -9 -3 2 \n",
820
  "\n",
821
+ " FGADiff ORDiff BlkDiff ScoreDiff FGMDiff \n",
822
+ "337067 -12 -11 1 -28 -7 \n",
823
+ "100732 -4 -1 2 12 -1 \n",
824
+ "83150 -6 4 2 13 -1 \n",
825
+ "345009 13 2 -3 -7 1 \n",
826
+ "318707 3 2 1 -18 -3 \n",
827
  "\n",
828
  "[5 rows x 48 columns]"
829
  ]
 
940
  " <th>OppScore min</th>\n",
941
  " <th>OppScore max</th>\n",
942
  " <th>...</th>\n",
943
+ " <th>FGMDiff min</th>\n",
944
+ " <th>FGMDiff max</th>\n",
945
+ " <th>FGMDiff std</th>\n",
946
+ " <th>FGMDiff median</th>\n",
947
+ " <th>FGMDiff mean</th>\n",
948
  " <th>Win min</th>\n",
949
  " <th>Win max</th>\n",
950
  " <th>Win std</th>\n",
 
966
  " <td>36</td>\n",
967
  " <td>85</td>\n",
968
  " <td>...</td>\n",
969
+ " <td>-17</td>\n",
970
+ " <td>14</td>\n",
971
+ " <td>7.252314</td>\n",
972
+ " <td>-2.0</td>\n",
973
+ " <td>-1.896552</td>\n",
974
  " <td>0</td>\n",
975
  " <td>1</td>\n",
976
  " <td>0.508548</td>\n",
 
990
  " <td>44</td>\n",
991
  " <td>97</td>\n",
992
  " <td>...</td>\n",
993
+ " <td>-18</td>\n",
994
+ " <td>9</td>\n",
995
+ " <td>6.210854</td>\n",
996
+ " <td>-1.0</td>\n",
997
+ " <td>-0.666667</td>\n",
998
  " <td>0</td>\n",
999
  " <td>1</td>\n",
1000
  " <td>0.479463</td>\n",
 
1014
  " <td>61</td>\n",
1015
  " <td>103</td>\n",
1016
  " <td>...</td>\n",
1017
+ " <td>-17</td>\n",
1018
+ " <td>9</td>\n",
1019
+ " <td>5.037707</td>\n",
1020
+ " <td>0.0</td>\n",
1021
+ " <td>-0.612903</td>\n",
1022
  " <td>0</td>\n",
1023
  " <td>1</td>\n",
1024
  " <td>0.508001</td>\n",
 
1038
  " <td>47</td>\n",
1039
  " <td>81</td>\n",
1040
  " <td>...</td>\n",
1041
+ " <td>-8</td>\n",
1042
+ " <td>12</td>\n",
1043
+ " <td>5.142904</td>\n",
1044
+ " <td>3.0</td>\n",
1045
+ " <td>2.129032</td>\n",
1046
  " <td>0</td>\n",
1047
  " <td>1</td>\n",
1048
  " <td>0.401610</td>\n",
 
1062
  " <td>37</td>\n",
1063
  " <td>89</td>\n",
1064
  " <td>...</td>\n",
1065
+ " <td>-7</td>\n",
1066
+ " <td>18</td>\n",
1067
+ " <td>6.097246</td>\n",
1068
+ " <td>4.5</td>\n",
1069
+ " <td>4.281250</td>\n",
1070
  " <td>0</td>\n",
1071
  " <td>1</td>\n",
1072
  " <td>0.456803</td>\n",
 
1086
  " <td>45</td>\n",
1087
  " <td>106</td>\n",
1088
  " <td>...</td>\n",
1089
+ " <td>-25</td>\n",
1090
+ " <td>7</td>\n",
1091
+ " <td>5.870151</td>\n",
1092
+ " <td>-0.5</td>\n",
1093
+ " <td>-1.843750</td>\n",
1094
  " <td>0</td>\n",
1095
  " <td>1</td>\n",
1096
  " <td>0.470929</td>\n",
 
1110
  " <td>59</td>\n",
1111
  " <td>107</td>\n",
1112
  " <td>...</td>\n",
1113
+ " <td>-19</td>\n",
1114
+ " <td>13</td>\n",
1115
+ " <td>6.835261</td>\n",
1116
+ " <td>1.0</td>\n",
1117
+ " <td>0.481481</td>\n",
1118
  " <td>0</td>\n",
1119
  " <td>1</td>\n",
1120
  " <td>0.492103</td>\n",
 
1134
  " <td>41</td>\n",
1135
  " <td>117</td>\n",
1136
  " <td>...</td>\n",
1137
+ " <td>-41</td>\n",
1138
+ " <td>11</td>\n",
1139
+ " <td>10.992205</td>\n",
1140
+ " <td>-4.0</td>\n",
1141
+ " <td>-6.142857</td>\n",
1142
  " <td>0</td>\n",
1143
  " <td>1</td>\n",
1144
  " <td>0.462910</td>\n",
 
1158
  " <td>45</td>\n",
1159
  " <td>100</td>\n",
1160
  " <td>...</td>\n",
1161
+ " <td>-21</td>\n",
1162
+ " <td>9</td>\n",
1163
+ " <td>6.956083</td>\n",
1164
+ " <td>-3.0</td>\n",
1165
+ " <td>-3.250000</td>\n",
1166
  " <td>0</td>\n",
1167
  " <td>1</td>\n",
1168
  " <td>0.504016</td>\n",
 
1182
  " <td>21</td>\n",
1183
  " <td>79</td>\n",
1184
  " <td>...</td>\n",
1185
+ " <td>-10</td>\n",
1186
+ " <td>22</td>\n",
1187
+ " <td>7.768254</td>\n",
1188
+ " <td>5.5</td>\n",
1189
+ " <td>5.906250</td>\n",
1190
  " <td>0</td>\n",
1191
  " <td>1</td>\n",
1192
  " <td>0.456803</td>\n",
 
1223
  "9567 62.5 63.593750 45 100 ... \n",
1224
  "12617 65.0 65.750000 21 79 ... \n",
1225
  "\n",
1226
+ " FGMDiff min FGMDiff max FGMDiff std FGMDiff median FGMDiff mean \\\n",
1227
+ "12348 -17 14 7.252314 -2.0 -1.896552 \n",
1228
+ "6900 -18 9 6.210854 -1.0 -0.666667 \n",
1229
+ "4406 -17 9 5.037707 0.0 -0.612903 \n",
1230
+ "4233 -8 12 5.142904 3.0 2.129032 \n",
1231
+ "3407 -7 18 6.097246 4.5 4.281250 \n",
1232
+ "5190 -25 7 5.870151 -0.5 -1.843750 \n",
1233
+ "1892 -19 13 6.835261 1.0 0.481481 \n",
1234
+ "10020 -41 11 10.992205 -4.0 -6.142857 \n",
1235
+ "9567 -21 9 6.956083 -3.0 -3.250000 \n",
1236
+ "12617 -10 22 7.768254 5.5 5.906250 \n",
1237
  "\n",
1238
+ " Win min Win max Win std Win median Win mean \n",
1239
+ "12348 0 1 0.508548 0.0 0.482759 \n",
1240
+ "6900 0 1 0.479463 0.0 0.333333 \n",
1241
+ "4406 0 1 0.508001 0.0 0.483871 \n",
1242
+ "4233 0 1 0.401610 1.0 0.806452 \n",
1243
+ "3407 0 1 0.456803 1.0 0.718750 \n",
1244
+ "5190 0 1 0.470929 0.0 0.312500 \n",
1245
+ "1892 0 1 0.492103 0.0 0.370370 \n",
1246
+ "10020 0 1 0.462910 0.0 0.285714 \n",
1247
+ "9567 0 1 0.504016 0.0 0.437500 \n",
1248
+ "12617 0 1 0.456803 1.0 0.718750 \n",
1249
  "\n",
1250
  "[10 rows x 218 columns]"
1251
  ]
 
1304
  " <th>OppScore min</th>\n",
1305
  " <th>OppScore max</th>\n",
1306
  " <th>...</th>\n",
1307
+ " <th>FGMDiff min</th>\n",
1308
+ " <th>FGMDiff max</th>\n",
1309
+ " <th>FGMDiff std</th>\n",
1310
+ " <th>FGMDiff median</th>\n",
1311
+ " <th>FGMDiff mean</th>\n",
1312
  " <th>Win min</th>\n",
1313
  " <th>Win max</th>\n",
1314
  " <th>Win std</th>\n",
 
1330
  " <td>53</td>\n",
1331
  " <td>82</td>\n",
1332
  " <td>...</td>\n",
1333
+ " <td>-10</td>\n",
1334
+ " <td>13</td>\n",
1335
+ " <td>11.789826</td>\n",
1336
+ " <td>-3.0</td>\n",
1337
+ " <td>0.000000</td>\n",
1338
  " <td>0</td>\n",
1339
  " <td>1</td>\n",
1340
  " <td>0.577350</td>\n",
 
1354
  " <td>71</td>\n",
1355
  " <td>71</td>\n",
1356
  " <td>...</td>\n",
1357
+ " <td>-2</td>\n",
1358
+ " <td>-2</td>\n",
1359
  " <td>NaN</td>\n",
1360
+ " <td>-2.0</td>\n",
1361
+ " <td>-2.000000</td>\n",
1362
  " <td>0</td>\n",
1363
  " <td>0</td>\n",
1364
  " <td>NaN</td>\n",
 
1378
  " <td>64</td>\n",
1379
  " <td>64</td>\n",
1380
  " <td>...</td>\n",
1381
+ " <td>2</td>\n",
1382
+ " <td>2</td>\n",
1383
  " <td>NaN</td>\n",
1384
+ " <td>2.0</td>\n",
1385
+ " <td>2.000000</td>\n",
1386
  " <td>0</td>\n",
1387
  " <td>0</td>\n",
1388
  " <td>NaN</td>\n",
 
1402
  " <td>72</td>\n",
1403
  " <td>79</td>\n",
1404
  " <td>...</td>\n",
1405
+ " <td>-3</td>\n",
1406
+ " <td>0</td>\n",
1407
+ " <td>2.121320</td>\n",
1408
+ " <td>-1.5</td>\n",
1409
+ " <td>-1.500000</td>\n",
1410
  " <td>0</td>\n",
1411
  " <td>1</td>\n",
1412
  " <td>0.707107</td>\n",
 
1426
  " <td>79</td>\n",
1427
  " <td>79</td>\n",
1428
  " <td>...</td>\n",
1429
+ " <td>-8</td>\n",
1430
+ " <td>-8</td>\n",
1431
  " <td>NaN</td>\n",
1432
+ " <td>-8.0</td>\n",
1433
+ " <td>-8.000000</td>\n",
1434
  " <td>0</td>\n",
1435
  " <td>0</td>\n",
1436
  " <td>NaN</td>\n",
 
1450
  " <td>56</td>\n",
1451
  " <td>88</td>\n",
1452
  " <td>...</td>\n",
1453
+ " <td>-4</td>\n",
1454
+ " <td>8</td>\n",
1455
+ " <td>5.123475</td>\n",
1456
+ " <td>3.5</td>\n",
1457
+ " <td>2.750000</td>\n",
1458
  " <td>0</td>\n",
1459
  " <td>1</td>\n",
1460
  " <td>0.500000</td>\n",
 
1474
  " <td>67</td>\n",
1475
  " <td>75</td>\n",
1476
  " <td>...</td>\n",
1477
+ " <td>-1</td>\n",
1478
+ " <td>6</td>\n",
1479
+ " <td>4.949747</td>\n",
1480
+ " <td>2.5</td>\n",
1481
+ " <td>2.500000</td>\n",
1482
  " <td>0</td>\n",
1483
  " <td>1</td>\n",
1484
  " <td>0.707107</td>\n",
 
1522
  " <td>72</td>\n",
1523
  " <td>72</td>\n",
1524
  " <td>...</td>\n",
1525
+ " <td>-2</td>\n",
1526
+ " <td>-2</td>\n",
1527
  " <td>NaN</td>\n",
1528
+ " <td>-2.0</td>\n",
1529
+ " <td>-2.000000</td>\n",
1530
  " <td>0</td>\n",
1531
  " <td>0</td>\n",
1532
  " <td>NaN</td>\n",
 
1546
  " <td>60</td>\n",
1547
  " <td>88</td>\n",
1548
  " <td>...</td>\n",
1549
+ " <td>-11</td>\n",
1550
+ " <td>3</td>\n",
1551
+ " <td>7.371115</td>\n",
1552
+ " <td>0.0</td>\n",
1553
  " <td>-2.666667</td>\n",
1554
  " <td>0</td>\n",
1555
  " <td>1</td>\n",
 
1587
  "697 63.0 63.000000 72 72 ... \n",
1588
  "763 70.0 69.666667 60 88 ... \n",
1589
  "\n",
1590
+ " FGMDiff min FGMDiff max FGMDiff std FGMDiff median FGMDiff mean \\\n",
1591
+ "995 -10 13 11.789826 -3.0 0.000000 \n",
1592
+ "1601 -2 -2 NaN -2.0 -2.000000 \n",
1593
+ "1805 2 2 NaN 2.0 2.000000 \n",
1594
+ "952 -3 0 2.121320 -1.5 -1.500000 \n",
1595
+ "924 -8 -8 NaN -8.0 -8.000000 \n",
1596
+ "1381 -4 8 5.123475 3.5 2.750000 \n",
1597
+ "1266 -1 6 4.949747 2.5 2.500000 \n",
1598
+ "1810 -3 -3 NaN -3.0 -3.000000 \n",
1599
+ "697 -2 -2 NaN -2.0 -2.000000 \n",
1600
+ "763 -11 3 7.371115 0.0 -2.666667 \n",
1601
  "\n",
1602
+ " Win min Win max Win std Win median Win mean \n",
1603
+ "995 0 1 0.577350 1.0 0.666667 \n",
1604
+ "1601 0 0 NaN 0.0 0.000000 \n",
1605
+ "1805 0 0 NaN 0.0 0.000000 \n",
1606
+ "952 0 1 0.707107 0.5 0.500000 \n",
1607
+ "924 0 0 NaN 0.0 0.000000 \n",
1608
+ "1381 0 1 0.500000 1.0 0.750000 \n",
1609
+ "1266 0 1 0.707107 0.5 0.500000 \n",
1610
+ "1810 0 0 NaN 0.0 0.000000 \n",
1611
+ "697 0 0 NaN 0.0 0.000000 \n",
1612
+ "763 0 1 0.577350 1.0 0.666667 \n",
1613
  "\n",
1614
  "[10 rows x 218 columns]"
1615
  ]
 
1796
  },
1797
  {
1798
  "cell_type": "code",
1799
+ "execution_count": 16,
1800
  "metadata": {},
1801
  "outputs": [
1802
  {
 
1831
  " <th>OppScore min reg</th>\n",
1832
  " <th>OppScore max reg</th>\n",
1833
  " <th>...</th>\n",
1834
+ " <th>FGMDiff min tourney</th>\n",
1835
+ " <th>FGMDiff max tourney</th>\n",
1836
+ " <th>FGMDiff std tourney</th>\n",
1837
+ " <th>FGMDiff median tourney</th>\n",
1838
+ " <th>FGMDiff mean tourney</th>\n",
1839
  " <th>Win min tourney</th>\n",
1840
  " <th>Win max tourney</th>\n",
1841
  " <th>Win std tourney</th>\n",
 
1929
  " <td>47</td>\n",
1930
  " <td>81</td>\n",
1931
  " <td>...</td>\n",
1932
+ " <td>2.0</td>\n",
1933
+ " <td>2.0</td>\n",
1934
  " <td>NaN</td>\n",
1935
+ " <td>2.0</td>\n",
1936
+ " <td>2.0</td>\n",
1937
  " <td>0.0</td>\n",
1938
  " <td>0.0</td>\n",
1939
  " <td>NaN</td>\n",
 
1953
  " <td>37</td>\n",
1954
  " <td>89</td>\n",
1955
  " <td>...</td>\n",
1956
+ " <td>0.0</td>\n",
1957
+ " <td>3.0</td>\n",
1958
+ " <td>2.121320</td>\n",
1959
+ " <td>1.5</td>\n",
1960
+ " <td>1.5</td>\n",
1961
  " <td>0.0</td>\n",
1962
  " <td>1.0</td>\n",
1963
  " <td>0.707107</td>\n",
 
2073
  " <td>21</td>\n",
2074
  " <td>79</td>\n",
2075
  " <td>...</td>\n",
2076
+ " <td>0.0</td>\n",
2077
+ " <td>4.0</td>\n",
2078
+ " <td>2.828427</td>\n",
2079
+ " <td>2.0</td>\n",
2080
+ " <td>2.0</td>\n",
2081
  " <td>0.0</td>\n",
2082
  " <td>1.0</td>\n",
2083
  " <td>0.707107</td>\n",
 
2114
  "9567 11.319009 62.5 63.593750 \n",
2115
  "12617 12.518374 65.0 65.750000 \n",
2116
  "\n",
2117
+ " OppScore min reg OppScore max reg ... FGMDiff min tourney \\\n",
2118
+ "12348 36 85 ... NaN \n",
2119
+ "6900 44 97 ... NaN \n",
2120
+ "4406 61 103 ... NaN \n",
2121
+ "4233 47 81 ... 2.0 \n",
2122
+ "3407 37 89 ... 0.0 \n",
2123
+ "5190 45 106 ... NaN \n",
2124
+ "1892 59 107 ... NaN \n",
2125
+ "10020 41 117 ... NaN \n",
2126
+ "9567 45 100 ... NaN \n",
2127
+ "12617 21 79 ... 0.0 \n",
2128
  "\n",
2129
+ " FGMDiff max tourney FGMDiff std tourney FGMDiff median tourney \\\n",
2130
+ "12348 NaN NaN NaN \n",
2131
+ "6900 NaN NaN NaN \n",
2132
+ "4406 NaN NaN NaN \n",
2133
+ "4233 2.0 NaN 2.0 \n",
2134
+ "3407 3.0 2.121320 1.5 \n",
2135
+ "5190 NaN NaN NaN \n",
2136
+ "1892 NaN NaN NaN \n",
2137
+ "10020 NaN NaN NaN \n",
2138
+ "9567 NaN NaN NaN \n",
2139
+ "12617 4.0 2.828427 2.0 \n",
2140
  "\n",
2141
+ " FGMDiff mean tourney Win min tourney Win max tourney \\\n",
2142
+ "12348 NaN NaN NaN \n",
2143
+ "6900 NaN NaN NaN \n",
2144
+ "4406 NaN NaN NaN \n",
2145
+ "4233 2.0 0.0 0.0 \n",
2146
+ "3407 1.5 0.0 1.0 \n",
2147
+ "5190 NaN NaN NaN \n",
2148
+ "1892 NaN NaN NaN \n",
2149
+ "10020 NaN NaN NaN \n",
2150
+ "9567 NaN NaN NaN \n",
2151
+ "12617 2.0 0.0 1.0 \n",
2152
  "\n",
2153
  " Win std tourney Win median tourney Win mean tourney \n",
2154
  "12348 NaN NaN NaN \n",
 
2165
  "[10 rows x 433 columns]"
2166
  ]
2167
  },
2168
+ "execution_count": 16,
2169
  "metadata": {},
2170
  "output_type": "execute_result"
2171
  }
2172
  ],
2173
  "source": [
2174
  "# merge the tournament aggregated metrics with the regular season aggregated metrics\n",
2175
+ "team_agg_df = pd.merge(\n",
2176
+ " left=team_reg_agg, \n",
2177
+ " right=team_tourney_agg, \n",
2178
+ " how=\"left\",\n",
2179
+ " on=[\"TeamID\", \"Season\", \"League\"], \n",
2180
+ " suffixes=(\" reg\", \" tourney\"),\n",
2181
+ " validate=\"1:1\",\n",
 
2182
  ")\n",
2183
  "\n",
2184
  "team_agg_df.sample(10, random_state=1)"
 
2186
  },
2187
  {
2188
  "cell_type": "code",
2189
+ "execution_count": 17,
2190
+ "metadata": {},
2191
+ "outputs": [],
2192
+ "source": [
2193
+ "# merge the team_conf_seeds_df with team attributes into the aggregated data\n",
2194
+ "\n",
2195
+ "team_agg_df2 = pd.merge(\n",
2196
+ " left=team_agg_df,\n",
2197
+ " right=team_conf_seeds_df[team_conf_seeds_df[\"Season\"] >= 2003],\n",
2198
+ " on=[\"TeamID\", \"Season\", \"League\"],\n",
2199
+ " how=\"outer\",\n",
2200
+ " validate=\"1:1\",\n",
2201
+ ")\n",
2202
+ "\n",
2203
+ "team_agg_df2 = team_agg_df2[team_agg_df2[\"Season\"] >= 2003]"
2204
+ ]
2205
+ },
2206
+ {
2207
+ "cell_type": "code",
2208
+ "execution_count": 18,
2209
+ "metadata": {},
2210
+ "outputs": [
2211
+ {
2212
+ "name": "stdout",
2213
+ "output_type": "stream",
2214
+ "text": [
2215
+ "<class 'pandas.core.frame.DataFrame'>\n",
2216
+ "Int64Index: 12857 entries, 0 to 12856\n",
2217
+ "Columns: 439 entries, TeamID to ChalkSeed\n",
2218
+ "dtypes: float64(347), int64(88), object(4)\n",
2219
+ "memory usage: 43.2+ MB\n"
2220
+ ]
2221
+ }
2222
+ ],
2223
+ "source": [
2224
+ "team_agg_df2.info()"
2225
+ ]
2226
+ },
2227
+ {
2228
+ "cell_type": "code",
2229
+ "execution_count": 19,
2230
+ "metadata": {},
2231
+ "outputs": [
2232
+ {
2233
+ "name": "stdout",
2234
+ "output_type": "stream",
2235
+ "text": [
2236
+ "<class 'pandas.core.frame.DataFrame'>\n",
2237
+ "Int64Index: 377608 entries, 0 to 377607\n",
2238
+ "Columns: 486 entries, Season to ChalkSeed\n",
2239
+ "dtypes: float64(347), int64(132), object(7)\n",
2240
+ "memory usage: 1.4+ GB\n"
2241
+ ]
2242
+ }
2243
+ ],
2244
+ "source": [
2245
+ "# re merge the aggregated team stats to the games dataset\n",
2246
+ "super_detailed_games_df = pd.merge(\n",
2247
+ " left=all_detailed_games_df[all_detailed_games_df[\"Season\"] >= 2003],\n",
2248
+ " right=team_agg_df2,\n",
2249
+ " on=[\"TeamID\", \"Season\", \"League\"],\n",
2250
+ " how=\"left\",\n",
2251
+ " validate=\"m:1\",\n",
2252
+ ")\n",
2253
+ "\n",
2254
+ "super_detailed_games_df.info()"
2255
+ ]
2256
+ },
2257
+ {
2258
+ "cell_type": "code",
2259
+ "execution_count": 34,
2260
  "metadata": {},
2261
  "outputs": [
2262
  {
 
2280
  " <thead>\n",
2281
  " <tr style=\"text-align: right;\">\n",
2282
  " <th></th>\n",
 
2283
  " <th>Season</th>\n",
2284
+ " <th>DayNum</th>\n",
2285
+ " <th>TeamID</th>\n",
2286
+ " <th>TeamScore</th>\n",
2287
+ " <th>OppScore</th>\n",
2288
+ " <th>WLoc</th>\n",
2289
+ " <th>NumOT</th>\n",
2290
+ " <th>TeamFGM</th>\n",
2291
+ " <th>TeamFGA</th>\n",
2292
+ " <th>TeamFGM3</th>\n",
2293
  " <th>...</th>\n",
2294
  " <th>Win max tourney</th>\n",
2295
  " <th>Win std tourney</th>\n",
 
2305
  " </thead>\n",
2306
  " <tbody>\n",
2307
  " <tr>\n",
2308
+ " <th>0</th>\n",
2309
+ " <td>2003</td>\n",
2310
+ " <td>10</td>\n",
2311
+ " <td>1104</td>\n",
2312
+ " <td>68</td>\n",
2313
+ " <td>62</td>\n",
2314
+ " <td>N</td>\n",
2315
+ " <td>0</td>\n",
2316
+ " <td>27</td>\n",
2317
+ " <td>58</td>\n",
2318
+ " <td>3</td>\n",
2319
  " <td>...</td>\n",
2320
+ " <td>0.0</td>\n",
2321
  " <td>NaN</td>\n",
2322
+ " <td>0.0</td>\n",
2323
+ " <td>0.000000</td>\n",
2324
+ " <td>Y10</td>\n",
2325
+ " <td>sec</td>\n",
2326
+ " <td>Alabama</td>\n",
2327
+ " <td>1985.0</td>\n",
2328
+ " <td>2024.0</td>\n",
2329
+ " <td>10.0</td>\n",
 
2330
  " </tr>\n",
2331
  " <tr>\n",
2332
+ " <th>1</th>\n",
2333
+ " <td>2003</td>\n",
2334
+ " <td>10</td>\n",
2335
+ " <td>1272</td>\n",
2336
+ " <td>70</td>\n",
2337
+ " <td>63</td>\n",
2338
+ " <td>N</td>\n",
2339
+ " <td>0</td>\n",
2340
+ " <td>26</td>\n",
2341
+ " <td>62</td>\n",
2342
+ " <td>8</td>\n",
2343
  " <td>...</td>\n",
2344
+ " <td>0.0</td>\n",
2345
  " <td>NaN</td>\n",
2346
+ " <td>0.0</td>\n",
2347
+ " <td>0.000000</td>\n",
2348
+ " <td>Z07</td>\n",
2349
+ " <td>cusa</td>\n",
2350
+ " <td>Memphis</td>\n",
2351
+ " <td>1985.0</td>\n",
2352
+ " <td>2024.0</td>\n",
2353
+ " <td>7.0</td>\n",
 
2354
  " </tr>\n",
2355
  " <tr>\n",
2356
+ " <th>2</th>\n",
2357
+ " <td>2003</td>\n",
2358
+ " <td>11</td>\n",
2359
+ " <td>1266</td>\n",
2360
+ " <td>73</td>\n",
 
 
 
 
2361
  " <td>61</td>\n",
2362
+ " <td>N</td>\n",
2363
+ " <td>0</td>\n",
2364
+ " <td>24</td>\n",
2365
+ " <td>58</td>\n",
2366
+ " <td>8</td>\n",
2367
+ " <td>...</td>\n",
2368
+ " <td>1.0</td>\n",
2369
+ " <td>0.447214</td>\n",
2370
+ " <td>1.0</td>\n",
2371
+ " <td>0.800000</td>\n",
2372
+ " <td>Y03</td>\n",
2373
+ " <td>cusa</td>\n",
2374
+ " <td>Marquette</td>\n",
2375
+ " <td>1985.0</td>\n",
2376
+ " <td>2024.0</td>\n",
2377
+ " <td>3.0</td>\n",
2378
+ " </tr>\n",
2379
+ " <tr>\n",
2380
+ " <th>3</th>\n",
2381
+ " <td>2003</td>\n",
2382
+ " <td>11</td>\n",
2383
+ " <td>1296</td>\n",
2384
+ " <td>56</td>\n",
2385
+ " <td>50</td>\n",
2386
+ " <td>N</td>\n",
2387
+ " <td>0</td>\n",
2388
+ " <td>18</td>\n",
2389
+ " <td>38</td>\n",
2390
+ " <td>3</td>\n",
2391
  " <td>...</td>\n",
2392
  " <td>NaN</td>\n",
2393
  " <td>NaN</td>\n",
 
2401
  " <td>NaN</td>\n",
2402
  " </tr>\n",
2403
  " <tr>\n",
2404
+ " <th>4</th>\n",
2405
+ " <td>2003</td>\n",
2406
+ " <td>11</td>\n",
2407
+ " <td>1400</td>\n",
2408
+ " <td>77</td>\n",
2409
+ " <td>71</td>\n",
2410
+ " <td>N</td>\n",
2411
+ " <td>0</td>\n",
2412
+ " <td>30</td>\n",
2413
+ " <td>61</td>\n",
2414
+ " <td>6</td>\n",
2415
  " <td>...</td>\n",
2416
+ " <td>1.0</td>\n",
2417
+ " <td>0.447214</td>\n",
2418
+ " <td>1.0</td>\n",
2419
+ " <td>0.800000</td>\n",
2420
+ " <td>X01</td>\n",
2421
+ " <td>big_twelve</td>\n",
2422
+ " <td>Texas</td>\n",
2423
  " <td>1985.0</td>\n",
2424
  " <td>2024.0</td>\n",
2425
+ " <td>1.0</td>\n",
2426
  " </tr>\n",
2427
  " <tr>\n",
2428
+ " <th>...</th>\n",
2429
+ " <td>...</td>\n",
2430
+ " <td>...</td>\n",
2431
+ " <td>...</td>\n",
2432
+ " <td>...</td>\n",
2433
+ " <td>...</td>\n",
2434
+ " <td>...</td>\n",
2435
+ " <td>...</td>\n",
2436
+ " <td>...</td>\n",
2437
+ " <td>...</td>\n",
2438
+ " <td>...</td>\n",
2439
+ " <td>...</td>\n",
2440
+ " <td>...</td>\n",
2441
+ " <td>...</td>\n",
2442
+ " <td>...</td>\n",
2443
+ " <td>...</td>\n",
2444
+ " <td>...</td>\n",
2445
+ " <td>...</td>\n",
2446
+ " <td>...</td>\n",
2447
+ " <td>...</td>\n",
2448
+ " <td>...</td>\n",
2449
  " <td>...</td>\n",
 
 
 
 
 
 
 
 
 
 
2450
  " </tr>\n",
2451
  " <tr>\n",
2452
+ " <th>377603</th>\n",
2453
+ " <td>2023</td>\n",
2454
+ " <td>147</td>\n",
2455
+ " <td>3268</td>\n",
2456
+ " <td>75</td>\n",
2457
+ " <td>86</td>\n",
2458
+ " <td>H</td>\n",
2459
+ " <td>0</td>\n",
2460
+ " <td>29</td>\n",
2461
+ " <td>58</td>\n",
2462
+ " <td>7</td>\n",
2463
  " <td>...</td>\n",
2464
+ " <td>1.0</td>\n",
2465
+ " <td>0.500000</td>\n",
2466
+ " <td>1.0</td>\n",
2467
+ " <td>0.750000</td>\n",
2468
  " <td>NaN</td>\n",
2469
  " <td>NaN</td>\n",
2470
  " <td>NaN</td>\n",
 
2473
  " <td>NaN</td>\n",
2474
  " </tr>\n",
2475
  " <tr>\n",
2476
+ " <th>377604</th>\n",
2477
+ " <td>2023</td>\n",
2478
+ " <td>147</td>\n",
2479
+ " <td>3326</td>\n",
2480
+ " <td>74</td>\n",
2481
+ " <td>84</td>\n",
2482
+ " <td>N</td>\n",
2483
+ " <td>0</td>\n",
2484
+ " <td>26</td>\n",
2485
+ " <td>57</td>\n",
2486
+ " <td>7</td>\n",
2487
  " <td>...</td>\n",
2488
+ " <td>1.0</td>\n",
2489
+ " <td>0.500000</td>\n",
2490
+ " <td>1.0</td>\n",
2491
+ " <td>0.750000</td>\n",
2492
  " <td>NaN</td>\n",
2493
  " <td>NaN</td>\n",
2494
  " <td>NaN</td>\n",
 
2497
  " <td>NaN</td>\n",
2498
  " </tr>\n",
2499
  " <tr>\n",
2500
+ " <th>377605</th>\n",
2501
+ " <td>2023</td>\n",
2502
+ " <td>151</td>\n",
2503
+ " <td>3376</td>\n",
2504
+ " <td>73</td>\n",
2505
+ " <td>77</td>\n",
2506
+ " <td>N</td>\n",
2507
+ " <td>0</td>\n",
2508
+ " <td>30</td>\n",
2509
+ " <td>77</td>\n",
2510
+ " <td>4</td>\n",
2511
  " <td>...</td>\n",
2512
+ " <td>1.0</td>\n",
2513
+ " <td>0.447214</td>\n",
2514
+ " <td>1.0</td>\n",
2515
+ " <td>0.800000</td>\n",
2516
  " <td>NaN</td>\n",
2517
  " <td>NaN</td>\n",
2518
  " <td>NaN</td>\n",
 
2521
  " <td>NaN</td>\n",
2522
  " </tr>\n",
2523
  " <tr>\n",
2524
+ " <th>377606</th>\n",
2525
+ " <td>2023</td>\n",
2526
+ " <td>151</td>\n",
2527
+ " <td>3439</td>\n",
2528
+ " <td>72</td>\n",
2529
+ " <td>79</td>\n",
2530
+ " <td>N</td>\n",
2531
+ " <td>0</td>\n",
2532
+ " <td>23</td>\n",
2533
+ " <td>57</td>\n",
2534
+ " <td>9</td>\n",
2535
  " <td>...</td>\n",
2536
+ " <td>1.0</td>\n",
2537
+ " <td>0.447214</td>\n",
2538
+ " <td>1.0</td>\n",
2539
+ " <td>0.800000</td>\n",
2540
  " <td>NaN</td>\n",
2541
  " <td>NaN</td>\n",
2542
  " <td>NaN</td>\n",
 
2545
  " <td>NaN</td>\n",
2546
  " </tr>\n",
2547
  " <tr>\n",
2548
+ " <th>377607</th>\n",
2549
+ " <td>2023</td>\n",
2550
+ " <td>153</td>\n",
2551
+ " <td>3234</td>\n",
2552
+ " <td>85</td>\n",
2553
+ " <td>102</td>\n",
2554
+ " <td>N</td>\n",
2555
+ " <td>0</td>\n",
2556
+ " <td>28</td>\n",
2557
+ " <td>56</td>\n",
2558
+ " <td>14</td>\n",
2559
  " <td>...</td>\n",
2560
  " <td>1.0</td>\n",
2561
+ " <td>0.408248</td>\n",
2562
+ " <td>1.0</td>\n",
2563
+ " <td>0.833333</td>\n",
2564
  " <td>NaN</td>\n",
2565
  " <td>NaN</td>\n",
2566
  " <td>NaN</td>\n",
 
2570
  " </tr>\n",
2571
  " </tbody>\n",
2572
  "</table>\n",
2573
+ "<p>377608 rows × 486 columns</p>\n",
2574
  "</div>"
2575
  ],
2576
  "text/plain": [
2577
+ " Season DayNum TeamID TeamScore OppScore WLoc NumOT TeamFGM \\\n",
2578
+ "0 2003 10 1104 68 62 N 0 27 \n",
2579
+ "1 2003 10 1272 70 63 N 0 26 \n",
2580
+ "2 2003 11 1266 73 61 N 0 24 \n",
2581
+ "3 2003 11 1296 56 50 N 0 18 \n",
2582
+ "4 2003 11 1400 77 71 N 0 30 \n",
2583
+ "... ... ... ... ... ... ... ... ... \n",
2584
+ "377603 2023 147 3268 75 86 H 0 29 \n",
2585
+ "377604 2023 147 3326 74 84 N 0 26 \n",
2586
+ "377605 2023 151 3376 73 77 N 0 30 \n",
2587
+ "377606 2023 151 3439 72 79 N 0 23 \n",
2588
+ "377607 2023 153 3234 85 102 N 0 28 \n",
 
 
 
 
 
 
 
 
 
 
 
2589
  "\n",
2590
+ " TeamFGA TeamFGM3 ... Win max tourney Win std tourney \\\n",
2591
+ "0 58 3 ... 0.0 NaN \n",
2592
+ "1 62 8 ... 0.0 NaN \n",
2593
+ "2 58 8 ... 1.0 0.447214 \n",
2594
+ "3 38 3 ... NaN NaN \n",
2595
+ "4 61 6 ... 1.0 0.447214 \n",
2596
+ "... ... ... ... ... ... \n",
2597
+ "377603 58 7 ... 1.0 0.500000 \n",
2598
+ "377604 57 7 ... 1.0 0.500000 \n",
2599
+ "377605 77 4 ... 1.0 0.447214 \n",
2600
+ "377606 57 9 ... 1.0 0.447214 \n",
2601
+ "377607 56 14 ... 1.0 0.408248 \n",
2602
  "\n",
2603
+ " Win median tourney Win mean tourney Seed ConfAbbrev TeamName \\\n",
2604
+ "0 0.0 0.000000 Y10 sec Alabama \n",
2605
+ "1 0.0 0.000000 Z07 cusa Memphis \n",
2606
+ "2 1.0 0.800000 Y03 cusa Marquette \n",
2607
+ "3 NaN NaN NaN NaN NaN \n",
2608
+ "4 1.0 0.800000 X01 big_twelve Texas \n",
2609
+ "... ... ... ... ... ... \n",
2610
+ "377603 1.0 0.750000 NaN NaN NaN \n",
2611
+ "377604 1.0 0.750000 NaN NaN NaN \n",
2612
+ "377605 1.0 0.800000 NaN NaN NaN \n",
2613
+ "377606 1.0 0.800000 NaN NaN NaN \n",
2614
+ "377607 1.0 0.833333 NaN NaN NaN \n",
2615
  "\n",
2616
+ " FirstD1Season LastD1Season ChalkSeed \n",
2617
+ "0 1985.0 2024.0 10.0 \n",
2618
+ "1 1985.0 2024.0 7.0 \n",
2619
+ "2 1985.0 2024.0 3.0 \n",
2620
+ "3 NaN NaN NaN \n",
2621
+ "4 1985.0 2024.0 1.0 \n",
2622
+ "... ... ... ... \n",
2623
+ "377603 NaN NaN NaN \n",
2624
+ "377604 NaN NaN NaN \n",
2625
+ "377605 NaN NaN NaN \n",
2626
+ "377606 NaN NaN NaN \n",
2627
+ "377607 NaN NaN NaN \n",
2628
  "\n",
2629
+ "[377608 rows x 486 columns]"
2630
  ]
2631
  },
2632
+ "execution_count": 34,
2633
  "metadata": {},
2634
  "output_type": "execute_result"
2635
  }
2636
  ],
2637
  "source": [
2638
+ "super_detailed_games_df[\"OppID\"]"
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
2639
  ]
2640
  },
2641
  {