Jensen-holm commited on
Commit
ee8ad8b
1 Parent(s): 4fb2726

making new columns Seed_Num and Region_Num so that the model can train

Browse files

withe seed in mind (if it explains any variance) without having to make
a crap load of dummy variables

data/MDetailedGamesOvrStats.csv CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:5bddd55618ad48a70efd2f8de4626867b640529be8a9735710fb6e4dbc36d312
3
- size 17420050
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:3befec5b96d853905bfc504c0de65565ee863a7cf1ed3e12ab6b908118989f81
3
+ size 17447016
data/MTeamsAgg.csv CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:333ca074666b0857fe15a49d55dc32d3a87778568467bcdb84e839a1a75a5e2d
3
- size 2113961
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:9729caff08c85e33b66e4476def7adbfa5c43067accc1cc3b7855738a6b74cd2
3
+ size 2117343
src/mens_pre_processing.ipynb CHANGED
@@ -73,6 +73,7 @@
73
  }
74
  ],
75
  "source": [
 
76
  "tourney_games_df = pd.read_csv(\n",
77
  " os.path.join(DATA_DIR, \"MNCAATourneyDetailedResults.csv\")\n",
78
  ")\n",
@@ -146,6 +147,7 @@
146
  }
147
  ],
148
  "source": [
 
149
  "reg_games_df = pd.read_csv(\n",
150
  " os.path.join(DATA_DIR, \"MRegularSeasonDetailedResults.csv\")\n",
151
  ")\n",
@@ -847,209 +849,86 @@
847
  "cell_type": "code",
848
  "execution_count": 14,
849
  "metadata": {},
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
850
  "outputs": [
851
  {
852
- "data": {
853
- "text/html": [
854
- "<div>\n",
855
- "<style scoped>\n",
856
- " .dataframe tbody tr th:only-of-type {\n",
857
- " vertical-align: middle;\n",
858
- " }\n",
859
- "\n",
860
- " .dataframe tbody tr th {\n",
861
- " vertical-align: top;\n",
862
- " }\n",
863
- "\n",
864
- " .dataframe thead th {\n",
865
- " text-align: right;\n",
866
- " }\n",
867
- "</style>\n",
868
- "<table border=\"1\" class=\"dataframe\">\n",
869
- " <thead>\n",
870
- " <tr style=\"text-align: right;\">\n",
871
- " <th></th>\n",
872
- " <th>Season</th>\n",
873
- " <th>DayNum</th>\n",
874
- " <th>WTeamID</th>\n",
875
- " <th>WScore</th>\n",
876
- " <th>LTeamID</th>\n",
877
- " <th>LScore</th>\n",
878
- " <th>WLoc</th>\n",
879
- " <th>NumOT</th>\n",
880
- " <th>WFGM</th>\n",
881
- " <th>WFGA</th>\n",
882
- " <th>...</th>\n",
883
- " <th>LDR</th>\n",
884
- " <th>LAst</th>\n",
885
- " <th>LTO</th>\n",
886
- " <th>LStl</th>\n",
887
- " <th>LBlk</th>\n",
888
- " <th>LPF</th>\n",
889
- " <th>GameType</th>\n",
890
- " <th>WPA</th>\n",
891
- " <th>LPA</th>\n",
892
- " <th>LLoc</th>\n",
893
- " </tr>\n",
894
- " </thead>\n",
895
- " <tbody>\n",
896
- " <tr>\n",
897
- " <th>0</th>\n",
898
- " <td>2003</td>\n",
899
- " <td>10</td>\n",
900
- " <td>1104</td>\n",
901
- " <td>68</td>\n",
902
- " <td>1328</td>\n",
903
- " <td>62</td>\n",
904
- " <td>1</td>\n",
905
- " <td>0</td>\n",
906
- " <td>27</td>\n",
907
- " <td>58</td>\n",
908
- " <td>...</td>\n",
909
- " <td>22</td>\n",
910
- " <td>8</td>\n",
911
- " <td>18</td>\n",
912
- " <td>9</td>\n",
913
- " <td>2</td>\n",
914
- " <td>20</td>\n",
915
- " <td>reg</td>\n",
916
- " <td>62</td>\n",
917
- " <td>68</td>\n",
918
- " <td>1</td>\n",
919
- " </tr>\n",
920
- " <tr>\n",
921
- " <th>1</th>\n",
922
- " <td>2003</td>\n",
923
- " <td>10</td>\n",
924
- " <td>1272</td>\n",
925
- " <td>70</td>\n",
926
- " <td>1393</td>\n",
927
- " <td>63</td>\n",
928
- " <td>1</td>\n",
929
- " <td>0</td>\n",
930
- " <td>26</td>\n",
931
- " <td>62</td>\n",
932
- " <td>...</td>\n",
933
- " <td>25</td>\n",
934
- " <td>7</td>\n",
935
- " <td>12</td>\n",
936
- " <td>8</td>\n",
937
- " <td>6</td>\n",
938
- " <td>16</td>\n",
939
- " <td>reg</td>\n",
940
- " <td>63</td>\n",
941
- " <td>70</td>\n",
942
- " <td>1</td>\n",
943
- " </tr>\n",
944
- " <tr>\n",
945
- " <th>2</th>\n",
946
- " <td>2003</td>\n",
947
- " <td>11</td>\n",
948
- " <td>1266</td>\n",
949
- " <td>73</td>\n",
950
- " <td>1437</td>\n",
951
- " <td>61</td>\n",
952
- " <td>1</td>\n",
953
- " <td>0</td>\n",
954
- " <td>24</td>\n",
955
- " <td>58</td>\n",
956
- " <td>...</td>\n",
957
- " <td>22</td>\n",
958
- " <td>9</td>\n",
959
- " <td>12</td>\n",
960
- " <td>2</td>\n",
961
- " <td>5</td>\n",
962
- " <td>23</td>\n",
963
- " <td>reg</td>\n",
964
- " <td>61</td>\n",
965
- " <td>73</td>\n",
966
- " <td>1</td>\n",
967
- " </tr>\n",
968
- " <tr>\n",
969
- " <th>3</th>\n",
970
- " <td>2003</td>\n",
971
- " <td>11</td>\n",
972
- " <td>1296</td>\n",
973
- " <td>56</td>\n",
974
- " <td>1457</td>\n",
975
- " <td>50</td>\n",
976
- " <td>1</td>\n",
977
- " <td>0</td>\n",
978
- " <td>18</td>\n",
979
- " <td>38</td>\n",
980
- " <td>...</td>\n",
981
- " <td>20</td>\n",
982
- " <td>9</td>\n",
983
- " <td>19</td>\n",
984
- " <td>4</td>\n",
985
- " <td>3</td>\n",
986
- " <td>23</td>\n",
987
- " <td>reg</td>\n",
988
- " <td>50</td>\n",
989
- " <td>56</td>\n",
990
- " <td>1</td>\n",
991
- " </tr>\n",
992
- " <tr>\n",
993
- " <th>4</th>\n",
994
- " <td>2003</td>\n",
995
- " <td>11</td>\n",
996
- " <td>1400</td>\n",
997
- " <td>77</td>\n",
998
- " <td>1208</td>\n",
999
- " <td>71</td>\n",
1000
- " <td>1</td>\n",
1001
- " <td>0</td>\n",
1002
- " <td>30</td>\n",
1003
- " <td>61</td>\n",
1004
- " <td>...</td>\n",
1005
- " <td>15</td>\n",
1006
- " <td>12</td>\n",
1007
- " <td>10</td>\n",
1008
- " <td>7</td>\n",
1009
- " <td>1</td>\n",
1010
- " <td>14</td>\n",
1011
- " <td>reg</td>\n",
1012
- " <td>71</td>\n",
1013
- " <td>77</td>\n",
1014
- " <td>1</td>\n",
1015
- " </tr>\n",
1016
- " </tbody>\n",
1017
- "</table>\n",
1018
- "<p>5 rows × 38 columns</p>\n",
1019
- "</div>"
1020
- ],
1021
- "text/plain": [
1022
- " Season DayNum WTeamID WScore LTeamID LScore WLoc NumOT WFGM WFGA \\\n",
1023
- "0 2003 10 1104 68 1328 62 1 0 27 58 \n",
1024
- "1 2003 10 1272 70 1393 63 1 0 26 62 \n",
1025
- "2 2003 11 1266 73 1437 61 1 0 24 58 \n",
1026
- "3 2003 11 1296 56 1457 50 1 0 18 38 \n",
1027
- "4 2003 11 1400 77 1208 71 1 0 30 61 \n",
1028
- "\n",
1029
- " ... LDR LAst LTO LStl LBlk LPF GameType WPA LPA LLoc \n",
1030
- "0 ... 22 8 18 9 2 20 reg 62 68 1 \n",
1031
- "1 ... 25 7 12 8 6 16 reg 63 70 1 \n",
1032
- "2 ... 22 9 12 2 5 23 reg 61 73 1 \n",
1033
- "3 ... 20 9 19 4 3 23 reg 50 56 1 \n",
1034
- "4 ... 15 12 10 7 1 14 reg 71 77 1 \n",
1035
- "\n",
1036
- "[5 rows x 38 columns]"
1037
- ]
1038
- },
1039
- "execution_count": 14,
1040
- "metadata": {},
1041
- "output_type": "execute_result"
1042
  }
1043
  ],
1044
  "source": [
1045
- "# get a dataframe of all games regardless of wether or not it was a tournament game or not\n",
1046
- "all_games_df = pd.concat([reg_games_df, tourney_games_df])\n",
1047
- "all_games_df.head()"
1048
  ]
1049
  },
1050
  {
1051
  "cell_type": "code",
1052
- "execution_count": 18,
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1053
  "metadata": {},
1054
  "outputs": [
1055
  {
@@ -1084,8 +963,6 @@
1084
  " <th>reg_WFGM_sum</th>\n",
1085
  " <th>reg_WFGM_min</th>\n",
1086
  " <th>...</th>\n",
1087
- " <th>tourney_PF_std</th>\n",
1088
- " <th>tourney_PF_mean</th>\n",
1089
  " <th>tourney_PA_sum</th>\n",
1090
  " <th>tourney_PA_min</th>\n",
1091
  " <th>tourney_PA_max</th>\n",
@@ -1094,6 +971,8 @@
1094
  " <th>tourney_PA_mean</th>\n",
1095
  " <th>FirstD1Season</th>\n",
1096
  " <th>LastD1Season</th>\n",
 
 
1097
  " </tr>\n",
1098
  " </thead>\n",
1099
  " <tbody>\n",
@@ -1110,8 +989,6 @@
1110
  " <td>661.00000</td>\n",
1111
  " <td>661.000000</td>\n",
1112
  " <td>...</td>\n",
1113
- " <td>661.0</td>\n",
1114
- " <td>661.000000</td>\n",
1115
  " <td>661.000000</td>\n",
1116
  " <td>661.000000</td>\n",
1117
  " <td>661.000000</td>\n",
@@ -1120,6 +997,8 @@
1120
  " <td>661.000000</td>\n",
1121
  " <td>661.000000</td>\n",
1122
  " <td>661.0</td>\n",
 
 
1123
  " </tr>\n",
1124
  " <tr>\n",
1125
  " <th>mean</th>\n",
@@ -1134,8 +1013,6 @@
1134
  " <td>644.32829</td>\n",
1135
  " <td>19.069592</td>\n",
1136
  " <td>...</td>\n",
1137
- " <td>0.0</td>\n",
1138
- " <td>18.833585</td>\n",
1139
  " <td>76.003026</td>\n",
1140
  " <td>76.003026</td>\n",
1141
  " <td>76.003026</td>\n",
@@ -1144,6 +1021,8 @@
1144
  " <td>76.003026</td>\n",
1145
  " <td>1985.512859</td>\n",
1146
  " <td>2024.0</td>\n",
 
 
1147
  " </tr>\n",
1148
  " <tr>\n",
1149
  " <th>std</th>\n",
@@ -1158,8 +1037,6 @@
1158
  " <td>120.26041</td>\n",
1159
  " <td>2.439200</td>\n",
1160
  " <td>...</td>\n",
1161
- " <td>0.0</td>\n",
1162
- " <td>4.233291</td>\n",
1163
  " <td>10.964128</td>\n",
1164
  " <td>10.964128</td>\n",
1165
  " <td>10.964128</td>\n",
@@ -1168,6 +1045,8 @@
1168
  " <td>10.964128</td>\n",
1169
  " <td>2.812079</td>\n",
1170
  " <td>0.0</td>\n",
 
 
1171
  " </tr>\n",
1172
  " <tr>\n",
1173
  " <th>min</th>\n",
@@ -1182,8 +1061,6 @@
1182
  " <td>248.00000</td>\n",
1183
  " <td>12.000000</td>\n",
1184
  " <td>...</td>\n",
1185
- " <td>0.0</td>\n",
1186
- " <td>7.000000</td>\n",
1187
  " <td>47.000000</td>\n",
1188
  " <td>47.000000</td>\n",
1189
  " <td>47.000000</td>\n",
@@ -1192,6 +1069,8 @@
1192
  " <td>47.000000</td>\n",
1193
  " <td>1985.000000</td>\n",
1194
  " <td>2024.0</td>\n",
 
 
1195
  " </tr>\n",
1196
  " <tr>\n",
1197
  " <th>25%</th>\n",
@@ -1206,8 +1085,6 @@
1206
  " <td>569.00000</td>\n",
1207
  " <td>17.000000</td>\n",
1208
  " <td>...</td>\n",
1209
- " <td>0.0</td>\n",
1210
- " <td>16.000000</td>\n",
1211
  " <td>69.000000</td>\n",
1212
  " <td>69.000000</td>\n",
1213
  " <td>69.000000</td>\n",
@@ -1216,6 +1093,8 @@
1216
  " <td>69.000000</td>\n",
1217
  " <td>1985.000000</td>\n",
1218
  " <td>2024.0</td>\n",
 
 
1219
  " </tr>\n",
1220
  " <tr>\n",
1221
  " <th>50%</th>\n",
@@ -1230,8 +1109,6 @@
1230
  " <td>639.00000</td>\n",
1231
  " <td>19.000000</td>\n",
1232
  " <td>...</td>\n",
1233
- " <td>0.0</td>\n",
1234
- " <td>19.000000</td>\n",
1235
  " <td>76.000000</td>\n",
1236
  " <td>76.000000</td>\n",
1237
  " <td>76.000000</td>\n",
@@ -1240,6 +1117,8 @@
1240
  " <td>76.000000</td>\n",
1241
  " <td>1985.000000</td>\n",
1242
  " <td>2024.0</td>\n",
 
 
1243
  " </tr>\n",
1244
  " <tr>\n",
1245
  " <th>75%</th>\n",
@@ -1254,8 +1133,6 @@
1254
  " <td>721.00000</td>\n",
1255
  " <td>21.000000</td>\n",
1256
  " <td>...</td>\n",
1257
- " <td>0.0</td>\n",
1258
- " <td>22.000000</td>\n",
1259
  " <td>83.000000</td>\n",
1260
  " <td>83.000000</td>\n",
1261
  " <td>83.000000</td>\n",
@@ -1264,6 +1141,8 @@
1264
  " <td>83.000000</td>\n",
1265
  " <td>1985.000000</td>\n",
1266
  " <td>2024.0</td>\n",
 
 
1267
  " </tr>\n",
1268
  " <tr>\n",
1269
  " <th>max</th>\n",
@@ -1278,8 +1157,6 @@
1278
  " <td>1025.00000</td>\n",
1279
  " <td>27.000000</td>\n",
1280
  " <td>...</td>\n",
1281
- " <td>0.0</td>\n",
1282
- " <td>33.000000</td>\n",
1283
  " <td>121.000000</td>\n",
1284
  " <td>121.000000</td>\n",
1285
  " <td>121.000000</td>\n",
@@ -1288,10 +1165,12 @@
1288
  " <td>121.000000</td>\n",
1289
  " <td>2014.000000</td>\n",
1290
  " <td>2024.0</td>\n",
 
 
1291
  " </tr>\n",
1292
  " </tbody>\n",
1293
  "</table>\n",
1294
- "<p>8 rows × 550 columns</p>\n",
1295
  "</div>"
1296
  ],
1297
  "text/plain": [
@@ -1315,40 +1194,40 @@
1315
  "75% 105.000000 80.000000 11.584882 80.541667 \n",
1316
  "max 144.000000 91.500000 16.534890 91.689655 \n",
1317
  "\n",
1318
- " reg_WFGM_sum reg_WFGM_min ... tourney_PF_std tourney_PF_mean \\\n",
1319
- "count 661.00000 661.000000 ... 661.0 661.000000 \n",
1320
- "mean 644.32829 19.069592 ... 0.0 18.833585 \n",
1321
- "std 120.26041 2.439200 ... 0.0 4.233291 \n",
1322
- "min 248.00000 12.000000 ... 0.0 7.000000 \n",
1323
- "25% 569.00000 17.000000 ... 0.0 16.000000 \n",
1324
- "50% 639.00000 19.000000 ... 0.0 19.000000 \n",
1325
- "75% 721.00000 21.000000 ... 0.0 22.000000 \n",
1326
- "max 1025.00000 27.000000 ... 0.0 33.000000 \n",
1327
  "\n",
1328
- " tourney_PA_sum tourney_PA_min tourney_PA_max tourney_PA_median \\\n",
1329
- "count 661.000000 661.000000 661.000000 661.000000 \n",
1330
- "mean 76.003026 76.003026 76.003026 76.003026 \n",
1331
- "std 10.964128 10.964128 10.964128 10.964128 \n",
1332
- "min 47.000000 47.000000 47.000000 47.000000 \n",
1333
- "25% 69.000000 69.000000 69.000000 69.000000 \n",
1334
- "50% 76.000000 76.000000 76.000000 76.000000 \n",
1335
- "75% 83.000000 83.000000 83.000000 83.000000 \n",
1336
- "max 121.000000 121.000000 121.000000 121.000000 \n",
1337
  "\n",
1338
- " tourney_PA_std tourney_PA_mean FirstD1Season LastD1Season \n",
1339
- "count 661.0 661.000000 661.000000 661.0 \n",
1340
- "mean 0.0 76.003026 1985.512859 2024.0 \n",
1341
- "std 0.0 10.964128 2.812079 0.0 \n",
1342
- "min 0.0 47.000000 1985.000000 2024.0 \n",
1343
- "25% 0.0 69.000000 1985.000000 2024.0 \n",
1344
- "50% 0.0 76.000000 1985.000000 2024.0 \n",
1345
- "75% 0.0 83.000000 1985.000000 2024.0 \n",
1346
- "max 0.0 121.000000 2014.000000 2024.0 \n",
1347
  "\n",
1348
- "[8 rows x 550 columns]"
1349
  ]
1350
  },
1351
- "execution_count": 18,
1352
  "metadata": {},
1353
  "output_type": "execute_result"
1354
  }
@@ -1358,9 +1237,220 @@
1358
  "tms_conf_seeds.describe()"
1359
  ]
1360
  },
 
 
 
 
 
 
 
1361
  {
1362
  "cell_type": "code",
1363
- "execution_count": 15,
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1364
  "metadata": {},
1365
  "outputs": [
1366
  {
@@ -1395,8 +1485,6 @@
1395
  " <th>WFGM</th>\n",
1396
  " <th>WFGA</th>\n",
1397
  " <th>...</th>\n",
1398
- " <th>tourney_PA_min_L</th>\n",
1399
- " <th>tourney_PA_max_L</th>\n",
1400
  " <th>tourney_PA_median_L</th>\n",
1401
  " <th>tourney_PA_std_L</th>\n",
1402
  " <th>tourney_PA_mean_L</th>\n",
@@ -1405,6 +1493,8 @@
1405
  " <th>LastD1Season_L</th>\n",
1406
  " <th>ConfAbbrev_L</th>\n",
1407
  " <th>Seed_L</th>\n",
 
 
1408
  " </tr>\n",
1409
  " </thead>\n",
1410
  " <tbody>\n",
@@ -1421,8 +1511,6 @@
1421
  " <td>24</td>\n",
1422
  " <td>46</td>\n",
1423
  " <td>...</td>\n",
1424
- " <td>62</td>\n",
1425
- " <td>62</td>\n",
1426
  " <td>62.0</td>\n",
1427
  " <td>0.0</td>\n",
1428
  " <td>62.0</td>\n",
@@ -1431,6 +1519,8 @@
1431
  " <td>2024</td>\n",
1432
  " <td>big_ten</td>\n",
1433
  " <td>Y05</td>\n",
 
 
1434
  " </tr>\n",
1435
  " <tr>\n",
1436
  " <th>1</th>\n",
@@ -1445,8 +1535,6 @@
1445
  " <td>23</td>\n",
1446
  " <td>51</td>\n",
1447
  " <td>...</td>\n",
1448
- " <td>62</td>\n",
1449
- " <td>62</td>\n",
1450
  " <td>62.0</td>\n",
1451
  " <td>0.0</td>\n",
1452
  " <td>62.0</td>\n",
@@ -1455,6 +1543,8 @@
1455
  " <td>2024</td>\n",
1456
  " <td>big_ten</td>\n",
1457
  " <td>Y05</td>\n",
 
 
1458
  " </tr>\n",
1459
  " <tr>\n",
1460
  " <th>2</th>\n",
@@ -1469,8 +1559,6 @@
1469
  " <td>25</td>\n",
1470
  " <td>50</td>\n",
1471
  " <td>...</td>\n",
1472
- " <td>62</td>\n",
1473
- " <td>62</td>\n",
1474
  " <td>62.0</td>\n",
1475
  " <td>0.0</td>\n",
1476
  " <td>62.0</td>\n",
@@ -1479,6 +1567,8 @@
1479
  " <td>2024</td>\n",
1480
  " <td>big_ten</td>\n",
1481
  " <td>Y05</td>\n",
 
 
1482
  " </tr>\n",
1483
  " <tr>\n",
1484
  " <th>3</th>\n",
@@ -1493,8 +1583,6 @@
1493
  " <td>24</td>\n",
1494
  " <td>49</td>\n",
1495
  " <td>...</td>\n",
1496
- " <td>62</td>\n",
1497
- " <td>62</td>\n",
1498
  " <td>62.0</td>\n",
1499
  " <td>0.0</td>\n",
1500
  " <td>62.0</td>\n",
@@ -1503,6 +1591,8 @@
1503
  " <td>2024</td>\n",
1504
  " <td>big_ten</td>\n",
1505
  " <td>Y05</td>\n",
 
 
1506
  " </tr>\n",
1507
  " <tr>\n",
1508
  " <th>4</th>\n",
@@ -1517,8 +1607,6 @@
1517
  " <td>33</td>\n",
1518
  " <td>61</td>\n",
1519
  " <td>...</td>\n",
1520
- " <td>62</td>\n",
1521
- " <td>62</td>\n",
1522
  " <td>62.0</td>\n",
1523
  " <td>0.0</td>\n",
1524
  " <td>62.0</td>\n",
@@ -1527,10 +1615,12 @@
1527
  " <td>2024</td>\n",
1528
  " <td>big_ten</td>\n",
1529
  " <td>Y05</td>\n",
 
 
1530
  " </tr>\n",
1531
  " </tbody>\n",
1532
  "</table>\n",
1533
- "<p>5 rows × 1142 columns</p>\n",
1534
  "</div>"
1535
  ],
1536
  "text/plain": [
@@ -1541,31 +1631,31 @@
1541
  "3 2003 143 1246 63 1458 57 1 0 24 49 \n",
1542
  "4 2003 30 1448 90 1458 80 1 0 33 61 \n",
1543
  "\n",
1544
- " ... tourney_PA_min_L tourney_PA_max_L tourney_PA_median_L \\\n",
1545
- "0 ... 62 62 62.0 \n",
1546
- "1 ... 62 62 62.0 \n",
1547
- "2 ... 62 62 62.0 \n",
1548
- "3 ... 62 62 62.0 \n",
1549
- "4 ... 62 62 62.0 \n",
1550
  "\n",
1551
- " tourney_PA_std_L tourney_PA_mean_L TeamName_L FirstD1Season_L \\\n",
1552
- "0 0.0 62.0 Wisconsin 1985 \n",
1553
- "1 0.0 62.0 Wisconsin 1985 \n",
1554
- "2 0.0 62.0 Wisconsin 1985 \n",
1555
- "3 0.0 62.0 Wisconsin 1985 \n",
1556
- "4 0.0 62.0 Wisconsin 1985 \n",
1557
  "\n",
1558
- " LastD1Season_L ConfAbbrev_L Seed_L \n",
1559
- "0 2024 big_ten Y05 \n",
1560
- "1 2024 big_ten Y05 \n",
1561
- "2 2024 big_ten Y05 \n",
1562
- "3 2024 big_ten Y05 \n",
1563
- "4 2024 big_ten Y05 \n",
1564
  "\n",
1565
- "[5 rows x 1142 columns]"
1566
  ]
1567
  },
1568
- "execution_count": 15,
1569
  "metadata": {},
1570
  "output_type": "execute_result"
1571
  }
@@ -1592,7 +1682,7 @@
1592
  },
1593
  {
1594
  "cell_type": "code",
1595
- "execution_count": 16,
1596
  "metadata": {},
1597
  "outputs": [],
1598
  "source": [
 
73
  }
74
  ],
75
  "source": [
76
+ "# read in the tournament games data\n",
77
  "tourney_games_df = pd.read_csv(\n",
78
  " os.path.join(DATA_DIR, \"MNCAATourneyDetailedResults.csv\")\n",
79
  ")\n",
 
147
  }
148
  ],
149
  "source": [
150
+ "# read in regular season games data\n",
151
  "reg_games_df = pd.read_csv(\n",
152
  " os.path.join(DATA_DIR, \"MRegularSeasonDetailedResults.csv\")\n",
153
  ")\n",
 
849
  "cell_type": "code",
850
  "execution_count": 14,
851
  "metadata": {},
852
+ "outputs": [],
853
+ "source": [
854
+ "def seed_to_num(seed: str) -> int:\n",
855
+ " chars = [char for char in seed]\n",
856
+ " all_nums = all([char.isnumeric() for char in chars])\n",
857
+ " if not all_nums:\n",
858
+ " return int(seed[1:-1])\n",
859
+ " return int(seed[1:])\n",
860
+ "\n",
861
+ "tms_conf_seeds[\"Seed_Num\"] = tms_conf_seeds.apply(\n",
862
+ " lambda row: seed_to_num(row[\"Seed\"]),\n",
863
+ " axis=1,\n",
864
+ ")\n",
865
+ "\n",
866
+ "\n",
867
+ "region_mappings = {seed[0]: idx for idx, seed in enumerate(sorted(tms_conf_seeds[\"Seed\"].unique()))}\n",
868
+ "\n",
869
+ "def region_to_num(region: str) -> int:\n",
870
+ " r_num = region_mappings.get(region, None)\n",
871
+ " if not r_num:\n",
872
+ " raise KeyError(f\"Invalid Region '{region}'\")\n",
873
+ " return r_num\n",
874
+ "\n",
875
+ "tms_conf_seeds[\"Seed_Region\"] = tms_conf_seeds.apply(\n",
876
+ " lambda row: region_to_num(row[\"Seed\"][0]),\n",
877
+ " axis=1,\n",
878
+ ")\n"
879
+ ]
880
+ },
881
+ {
882
+ "cell_type": "code",
883
+ "execution_count": 15,
884
+ "metadata": {},
885
  "outputs": [
886
  {
887
+ "name": "stdout",
888
+ "output_type": "stream",
889
+ "text": [
890
+ "<class 'pandas.core.series.Series'>\n",
891
+ "Int64Index: 661 entries, 0 to 660\n",
892
+ "Series name: Seed_Region\n",
893
+ "Non-Null Count Dtype\n",
894
+ "-------------- -----\n",
895
+ "661 non-null int64\n",
896
+ "dtypes: int64(1)\n",
897
+ "memory usage: 10.3 KB\n"
898
+ ]
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
899
  }
900
  ],
901
  "source": [
902
+ "tms_conf_seeds[\"Seed_Region\"].info()"
 
 
903
  ]
904
  },
905
  {
906
  "cell_type": "code",
907
+ "execution_count": 16,
908
+ "metadata": {},
909
+ "outputs": [
910
+ {
911
+ "name": "stdout",
912
+ "output_type": "stream",
913
+ "text": [
914
+ "<class 'pandas.core.series.Series'>\n",
915
+ "Int64Index: 661 entries, 0 to 660\n",
916
+ "Series name: Seed_Num\n",
917
+ "Non-Null Count Dtype\n",
918
+ "-------------- -----\n",
919
+ "661 non-null int64\n",
920
+ "dtypes: int64(1)\n",
921
+ "memory usage: 10.3 KB\n"
922
+ ]
923
+ }
924
+ ],
925
+ "source": [
926
+ "tms_conf_seeds[\"Seed_Num\"].info()"
927
+ ]
928
+ },
929
+ {
930
+ "cell_type": "code",
931
+ "execution_count": 17,
932
  "metadata": {},
933
  "outputs": [
934
  {
 
963
  " <th>reg_WFGM_sum</th>\n",
964
  " <th>reg_WFGM_min</th>\n",
965
  " <th>...</th>\n",
 
 
966
  " <th>tourney_PA_sum</th>\n",
967
  " <th>tourney_PA_min</th>\n",
968
  " <th>tourney_PA_max</th>\n",
 
971
  " <th>tourney_PA_mean</th>\n",
972
  " <th>FirstD1Season</th>\n",
973
  " <th>LastD1Season</th>\n",
974
+ " <th>Seed_Num</th>\n",
975
+ " <th>Seed_Region</th>\n",
976
  " </tr>\n",
977
  " </thead>\n",
978
  " <tbody>\n",
 
989
  " <td>661.00000</td>\n",
990
  " <td>661.000000</td>\n",
991
  " <td>...</td>\n",
 
 
992
  " <td>661.000000</td>\n",
993
  " <td>661.000000</td>\n",
994
  " <td>661.000000</td>\n",
 
997
  " <td>661.000000</td>\n",
998
  " <td>661.000000</td>\n",
999
  " <td>661.0</td>\n",
1000
+ " <td>661.000000</td>\n",
1001
+ " <td>661.000000</td>\n",
1002
  " </tr>\n",
1003
  " <tr>\n",
1004
  " <th>mean</th>\n",
 
1013
  " <td>644.32829</td>\n",
1014
  " <td>19.069592</td>\n",
1015
  " <td>...</td>\n",
 
 
1016
  " <td>76.003026</td>\n",
1017
  " <td>76.003026</td>\n",
1018
  " <td>76.003026</td>\n",
 
1021
  " <td>76.003026</td>\n",
1022
  " <td>1985.512859</td>\n",
1023
  " <td>2024.0</td>\n",
1024
+ " <td>1.366112</td>\n",
1025
+ " <td>49.022693</td>\n",
1026
  " </tr>\n",
1027
  " <tr>\n",
1028
  " <th>std</th>\n",
 
1037
  " <td>120.26041</td>\n",
1038
  " <td>2.439200</td>\n",
1039
  " <td>...</td>\n",
 
 
1040
  " <td>10.964128</td>\n",
1041
  " <td>10.964128</td>\n",
1042
  " <td>10.964128</td>\n",
 
1045
  " <td>10.964128</td>\n",
1046
  " <td>2.812079</td>\n",
1047
  " <td>0.0</td>\n",
1048
+ " <td>3.930341</td>\n",
1049
+ " <td>22.835534</td>\n",
1050
  " </tr>\n",
1051
  " <tr>\n",
1052
  " <th>min</th>\n",
 
1061
  " <td>248.00000</td>\n",
1062
  " <td>12.000000</td>\n",
1063
  " <td>...</td>\n",
 
 
1064
  " <td>47.000000</td>\n",
1065
  " <td>47.000000</td>\n",
1066
  " <td>47.000000</td>\n",
 
1069
  " <td>47.000000</td>\n",
1070
  " <td>1985.000000</td>\n",
1071
  " <td>2024.0</td>\n",
1072
+ " <td>0.000000</td>\n",
1073
+ " <td>19.000000</td>\n",
1074
  " </tr>\n",
1075
  " <tr>\n",
1076
  " <th>25%</th>\n",
 
1085
  " <td>569.00000</td>\n",
1086
  " <td>17.000000</td>\n",
1087
  " <td>...</td>\n",
 
 
1088
  " <td>69.000000</td>\n",
1089
  " <td>69.000000</td>\n",
1090
  " <td>69.000000</td>\n",
 
1093
  " <td>69.000000</td>\n",
1094
  " <td>1985.000000</td>\n",
1095
  " <td>2024.0</td>\n",
1096
+ " <td>0.000000</td>\n",
1097
+ " <td>19.000000</td>\n",
1098
  " </tr>\n",
1099
  " <tr>\n",
1100
  " <th>50%</th>\n",
 
1109
  " <td>639.00000</td>\n",
1110
  " <td>19.000000</td>\n",
1111
  " <td>...</td>\n",
 
 
1112
  " <td>76.000000</td>\n",
1113
  " <td>76.000000</td>\n",
1114
  " <td>76.000000</td>\n",
 
1117
  " <td>76.000000</td>\n",
1118
  " <td>1985.000000</td>\n",
1119
  " <td>2024.0</td>\n",
1120
+ " <td>0.000000</td>\n",
1121
+ " <td>39.000000</td>\n",
1122
  " </tr>\n",
1123
  " <tr>\n",
1124
  " <th>75%</th>\n",
 
1133
  " <td>721.00000</td>\n",
1134
  " <td>21.000000</td>\n",
1135
  " <td>...</td>\n",
 
 
1136
  " <td>83.000000</td>\n",
1137
  " <td>83.000000</td>\n",
1138
  " <td>83.000000</td>\n",
 
1141
  " <td>83.000000</td>\n",
1142
  " <td>1985.000000</td>\n",
1143
  " <td>2024.0</td>\n",
1144
+ " <td>1.000000</td>\n",
1145
+ " <td>59.000000</td>\n",
1146
  " </tr>\n",
1147
  " <tr>\n",
1148
  " <th>max</th>\n",
 
1157
  " <td>1025.00000</td>\n",
1158
  " <td>27.000000</td>\n",
1159
  " <td>...</td>\n",
 
 
1160
  " <td>121.000000</td>\n",
1161
  " <td>121.000000</td>\n",
1162
  " <td>121.000000</td>\n",
 
1165
  " <td>121.000000</td>\n",
1166
  " <td>2014.000000</td>\n",
1167
  " <td>2024.0</td>\n",
1168
+ " <td>16.000000</td>\n",
1169
+ " <td>80.000000</td>\n",
1170
  " </tr>\n",
1171
  " </tbody>\n",
1172
  "</table>\n",
1173
+ "<p>8 rows × 552 columns</p>\n",
1174
  "</div>"
1175
  ],
1176
  "text/plain": [
 
1194
  "75% 105.000000 80.000000 11.584882 80.541667 \n",
1195
  "max 144.000000 91.500000 16.534890 91.689655 \n",
1196
  "\n",
1197
+ " reg_WFGM_sum reg_WFGM_min ... tourney_PA_sum tourney_PA_min \\\n",
1198
+ "count 661.00000 661.000000 ... 661.000000 661.000000 \n",
1199
+ "mean 644.32829 19.069592 ... 76.003026 76.003026 \n",
1200
+ "std 120.26041 2.439200 ... 10.964128 10.964128 \n",
1201
+ "min 248.00000 12.000000 ... 47.000000 47.000000 \n",
1202
+ "25% 569.00000 17.000000 ... 69.000000 69.000000 \n",
1203
+ "50% 639.00000 19.000000 ... 76.000000 76.000000 \n",
1204
+ "75% 721.00000 21.000000 ... 83.000000 83.000000 \n",
1205
+ "max 1025.00000 27.000000 ... 121.000000 121.000000 \n",
1206
  "\n",
1207
+ " tourney_PA_max tourney_PA_median tourney_PA_std tourney_PA_mean \\\n",
1208
+ "count 661.000000 661.000000 661.0 661.000000 \n",
1209
+ "mean 76.003026 76.003026 0.0 76.003026 \n",
1210
+ "std 10.964128 10.964128 0.0 10.964128 \n",
1211
+ "min 47.000000 47.000000 0.0 47.000000 \n",
1212
+ "25% 69.000000 69.000000 0.0 69.000000 \n",
1213
+ "50% 76.000000 76.000000 0.0 76.000000 \n",
1214
+ "75% 83.000000 83.000000 0.0 83.000000 \n",
1215
+ "max 121.000000 121.000000 0.0 121.000000 \n",
1216
  "\n",
1217
+ " FirstD1Season LastD1Season Seed_Num Seed_Region \n",
1218
+ "count 661.000000 661.0 661.000000 661.000000 \n",
1219
+ "mean 1985.512859 2024.0 1.366112 49.022693 \n",
1220
+ "std 2.812079 0.0 3.930341 22.835534 \n",
1221
+ "min 1985.000000 2024.0 0.000000 19.000000 \n",
1222
+ "25% 1985.000000 2024.0 0.000000 19.000000 \n",
1223
+ "50% 1985.000000 2024.0 0.000000 39.000000 \n",
1224
+ "75% 1985.000000 2024.0 1.000000 59.000000 \n",
1225
+ "max 2014.000000 2024.0 16.000000 80.000000 \n",
1226
  "\n",
1227
+ "[8 rows x 552 columns]"
1228
  ]
1229
  },
1230
+ "execution_count": 17,
1231
  "metadata": {},
1232
  "output_type": "execute_result"
1233
  }
 
1237
  "tms_conf_seeds.describe()"
1238
  ]
1239
  },
1240
+ {
1241
+ "cell_type": "markdown",
1242
+ "metadata": {},
1243
+ "source": [
1244
+ "# Extra Dataset with Games and Team Data mapped together"
1245
+ ]
1246
+ },
1247
  {
1248
  "cell_type": "code",
1249
+ "execution_count": 18,
1250
+ "metadata": {},
1251
+ "outputs": [
1252
+ {
1253
+ "data": {
1254
+ "text/html": [
1255
+ "<div>\n",
1256
+ "<style scoped>\n",
1257
+ " .dataframe tbody tr th:only-of-type {\n",
1258
+ " vertical-align: middle;\n",
1259
+ " }\n",
1260
+ "\n",
1261
+ " .dataframe tbody tr th {\n",
1262
+ " vertical-align: top;\n",
1263
+ " }\n",
1264
+ "\n",
1265
+ " .dataframe thead th {\n",
1266
+ " text-align: right;\n",
1267
+ " }\n",
1268
+ "</style>\n",
1269
+ "<table border=\"1\" class=\"dataframe\">\n",
1270
+ " <thead>\n",
1271
+ " <tr style=\"text-align: right;\">\n",
1272
+ " <th></th>\n",
1273
+ " <th>Season</th>\n",
1274
+ " <th>DayNum</th>\n",
1275
+ " <th>WTeamID</th>\n",
1276
+ " <th>WScore</th>\n",
1277
+ " <th>LTeamID</th>\n",
1278
+ " <th>LScore</th>\n",
1279
+ " <th>WLoc</th>\n",
1280
+ " <th>NumOT</th>\n",
1281
+ " <th>WFGM</th>\n",
1282
+ " <th>WFGA</th>\n",
1283
+ " <th>...</th>\n",
1284
+ " <th>LDR</th>\n",
1285
+ " <th>LAst</th>\n",
1286
+ " <th>LTO</th>\n",
1287
+ " <th>LStl</th>\n",
1288
+ " <th>LBlk</th>\n",
1289
+ " <th>LPF</th>\n",
1290
+ " <th>GameType</th>\n",
1291
+ " <th>WPA</th>\n",
1292
+ " <th>LPA</th>\n",
1293
+ " <th>LLoc</th>\n",
1294
+ " </tr>\n",
1295
+ " </thead>\n",
1296
+ " <tbody>\n",
1297
+ " <tr>\n",
1298
+ " <th>0</th>\n",
1299
+ " <td>2003</td>\n",
1300
+ " <td>10</td>\n",
1301
+ " <td>1104</td>\n",
1302
+ " <td>68</td>\n",
1303
+ " <td>1328</td>\n",
1304
+ " <td>62</td>\n",
1305
+ " <td>1</td>\n",
1306
+ " <td>0</td>\n",
1307
+ " <td>27</td>\n",
1308
+ " <td>58</td>\n",
1309
+ " <td>...</td>\n",
1310
+ " <td>22</td>\n",
1311
+ " <td>8</td>\n",
1312
+ " <td>18</td>\n",
1313
+ " <td>9</td>\n",
1314
+ " <td>2</td>\n",
1315
+ " <td>20</td>\n",
1316
+ " <td>reg</td>\n",
1317
+ " <td>62</td>\n",
1318
+ " <td>68</td>\n",
1319
+ " <td>1</td>\n",
1320
+ " </tr>\n",
1321
+ " <tr>\n",
1322
+ " <th>1</th>\n",
1323
+ " <td>2003</td>\n",
1324
+ " <td>10</td>\n",
1325
+ " <td>1272</td>\n",
1326
+ " <td>70</td>\n",
1327
+ " <td>1393</td>\n",
1328
+ " <td>63</td>\n",
1329
+ " <td>1</td>\n",
1330
+ " <td>0</td>\n",
1331
+ " <td>26</td>\n",
1332
+ " <td>62</td>\n",
1333
+ " <td>...</td>\n",
1334
+ " <td>25</td>\n",
1335
+ " <td>7</td>\n",
1336
+ " <td>12</td>\n",
1337
+ " <td>8</td>\n",
1338
+ " <td>6</td>\n",
1339
+ " <td>16</td>\n",
1340
+ " <td>reg</td>\n",
1341
+ " <td>63</td>\n",
1342
+ " <td>70</td>\n",
1343
+ " <td>1</td>\n",
1344
+ " </tr>\n",
1345
+ " <tr>\n",
1346
+ " <th>2</th>\n",
1347
+ " <td>2003</td>\n",
1348
+ " <td>11</td>\n",
1349
+ " <td>1266</td>\n",
1350
+ " <td>73</td>\n",
1351
+ " <td>1437</td>\n",
1352
+ " <td>61</td>\n",
1353
+ " <td>1</td>\n",
1354
+ " <td>0</td>\n",
1355
+ " <td>24</td>\n",
1356
+ " <td>58</td>\n",
1357
+ " <td>...</td>\n",
1358
+ " <td>22</td>\n",
1359
+ " <td>9</td>\n",
1360
+ " <td>12</td>\n",
1361
+ " <td>2</td>\n",
1362
+ " <td>5</td>\n",
1363
+ " <td>23</td>\n",
1364
+ " <td>reg</td>\n",
1365
+ " <td>61</td>\n",
1366
+ " <td>73</td>\n",
1367
+ " <td>1</td>\n",
1368
+ " </tr>\n",
1369
+ " <tr>\n",
1370
+ " <th>3</th>\n",
1371
+ " <td>2003</td>\n",
1372
+ " <td>11</td>\n",
1373
+ " <td>1296</td>\n",
1374
+ " <td>56</td>\n",
1375
+ " <td>1457</td>\n",
1376
+ " <td>50</td>\n",
1377
+ " <td>1</td>\n",
1378
+ " <td>0</td>\n",
1379
+ " <td>18</td>\n",
1380
+ " <td>38</td>\n",
1381
+ " <td>...</td>\n",
1382
+ " <td>20</td>\n",
1383
+ " <td>9</td>\n",
1384
+ " <td>19</td>\n",
1385
+ " <td>4</td>\n",
1386
+ " <td>3</td>\n",
1387
+ " <td>23</td>\n",
1388
+ " <td>reg</td>\n",
1389
+ " <td>50</td>\n",
1390
+ " <td>56</td>\n",
1391
+ " <td>1</td>\n",
1392
+ " </tr>\n",
1393
+ " <tr>\n",
1394
+ " <th>4</th>\n",
1395
+ " <td>2003</td>\n",
1396
+ " <td>11</td>\n",
1397
+ " <td>1400</td>\n",
1398
+ " <td>77</td>\n",
1399
+ " <td>1208</td>\n",
1400
+ " <td>71</td>\n",
1401
+ " <td>1</td>\n",
1402
+ " <td>0</td>\n",
1403
+ " <td>30</td>\n",
1404
+ " <td>61</td>\n",
1405
+ " <td>...</td>\n",
1406
+ " <td>15</td>\n",
1407
+ " <td>12</td>\n",
1408
+ " <td>10</td>\n",
1409
+ " <td>7</td>\n",
1410
+ " <td>1</td>\n",
1411
+ " <td>14</td>\n",
1412
+ " <td>reg</td>\n",
1413
+ " <td>71</td>\n",
1414
+ " <td>77</td>\n",
1415
+ " <td>1</td>\n",
1416
+ " </tr>\n",
1417
+ " </tbody>\n",
1418
+ "</table>\n",
1419
+ "<p>5 rows × 38 columns</p>\n",
1420
+ "</div>"
1421
+ ],
1422
+ "text/plain": [
1423
+ " Season DayNum WTeamID WScore LTeamID LScore WLoc NumOT WFGM WFGA \\\n",
1424
+ "0 2003 10 1104 68 1328 62 1 0 27 58 \n",
1425
+ "1 2003 10 1272 70 1393 63 1 0 26 62 \n",
1426
+ "2 2003 11 1266 73 1437 61 1 0 24 58 \n",
1427
+ "3 2003 11 1296 56 1457 50 1 0 18 38 \n",
1428
+ "4 2003 11 1400 77 1208 71 1 0 30 61 \n",
1429
+ "\n",
1430
+ " ... LDR LAst LTO LStl LBlk LPF GameType WPA LPA LLoc \n",
1431
+ "0 ... 22 8 18 9 2 20 reg 62 68 1 \n",
1432
+ "1 ... 25 7 12 8 6 16 reg 63 70 1 \n",
1433
+ "2 ... 22 9 12 2 5 23 reg 61 73 1 \n",
1434
+ "3 ... 20 9 19 4 3 23 reg 50 56 1 \n",
1435
+ "4 ... 15 12 10 7 1 14 reg 71 77 1 \n",
1436
+ "\n",
1437
+ "[5 rows x 38 columns]"
1438
+ ]
1439
+ },
1440
+ "execution_count": 18,
1441
+ "metadata": {},
1442
+ "output_type": "execute_result"
1443
+ }
1444
+ ],
1445
+ "source": [
1446
+ "# get a dataframe of all games regardless of wether or not it was a tournament game or not\n",
1447
+ "all_games_df = pd.concat([reg_games_df, tourney_games_df])\n",
1448
+ "all_games_df.head()"
1449
+ ]
1450
+ },
1451
+ {
1452
+ "cell_type": "code",
1453
+ "execution_count": 19,
1454
  "metadata": {},
1455
  "outputs": [
1456
  {
 
1485
  " <th>WFGM</th>\n",
1486
  " <th>WFGA</th>\n",
1487
  " <th>...</th>\n",
 
 
1488
  " <th>tourney_PA_median_L</th>\n",
1489
  " <th>tourney_PA_std_L</th>\n",
1490
  " <th>tourney_PA_mean_L</th>\n",
 
1493
  " <th>LastD1Season_L</th>\n",
1494
  " <th>ConfAbbrev_L</th>\n",
1495
  " <th>Seed_L</th>\n",
1496
+ " <th>Seed_Num_L</th>\n",
1497
+ " <th>Seed_Region_L</th>\n",
1498
  " </tr>\n",
1499
  " </thead>\n",
1500
  " <tbody>\n",
 
1511
  " <td>24</td>\n",
1512
  " <td>46</td>\n",
1513
  " <td>...</td>\n",
 
 
1514
  " <td>62.0</td>\n",
1515
  " <td>0.0</td>\n",
1516
  " <td>62.0</td>\n",
 
1519
  " <td>2024</td>\n",
1520
  " <td>big_ten</td>\n",
1521
  " <td>Y05</td>\n",
1522
+ " <td>0</td>\n",
1523
+ " <td>59</td>\n",
1524
  " </tr>\n",
1525
  " <tr>\n",
1526
  " <th>1</th>\n",
 
1535
  " <td>23</td>\n",
1536
  " <td>51</td>\n",
1537
  " <td>...</td>\n",
 
 
1538
  " <td>62.0</td>\n",
1539
  " <td>0.0</td>\n",
1540
  " <td>62.0</td>\n",
 
1543
  " <td>2024</td>\n",
1544
  " <td>big_ten</td>\n",
1545
  " <td>Y05</td>\n",
1546
+ " <td>0</td>\n",
1547
+ " <td>59</td>\n",
1548
  " </tr>\n",
1549
  " <tr>\n",
1550
  " <th>2</th>\n",
 
1559
  " <td>25</td>\n",
1560
  " <td>50</td>\n",
1561
  " <td>...</td>\n",
 
 
1562
  " <td>62.0</td>\n",
1563
  " <td>0.0</td>\n",
1564
  " <td>62.0</td>\n",
 
1567
  " <td>2024</td>\n",
1568
  " <td>big_ten</td>\n",
1569
  " <td>Y05</td>\n",
1570
+ " <td>0</td>\n",
1571
+ " <td>59</td>\n",
1572
  " </tr>\n",
1573
  " <tr>\n",
1574
  " <th>3</th>\n",
 
1583
  " <td>24</td>\n",
1584
  " <td>49</td>\n",
1585
  " <td>...</td>\n",
 
 
1586
  " <td>62.0</td>\n",
1587
  " <td>0.0</td>\n",
1588
  " <td>62.0</td>\n",
 
1591
  " <td>2024</td>\n",
1592
  " <td>big_ten</td>\n",
1593
  " <td>Y05</td>\n",
1594
+ " <td>0</td>\n",
1595
+ " <td>59</td>\n",
1596
  " </tr>\n",
1597
  " <tr>\n",
1598
  " <th>4</th>\n",
 
1607
  " <td>33</td>\n",
1608
  " <td>61</td>\n",
1609
  " <td>...</td>\n",
 
 
1610
  " <td>62.0</td>\n",
1611
  " <td>0.0</td>\n",
1612
  " <td>62.0</td>\n",
 
1615
  " <td>2024</td>\n",
1616
  " <td>big_ten</td>\n",
1617
  " <td>Y05</td>\n",
1618
+ " <td>0</td>\n",
1619
+ " <td>59</td>\n",
1620
  " </tr>\n",
1621
  " </tbody>\n",
1622
  "</table>\n",
1623
+ "<p>5 rows × 1146 columns</p>\n",
1624
  "</div>"
1625
  ],
1626
  "text/plain": [
 
1631
  "3 2003 143 1246 63 1458 57 1 0 24 49 \n",
1632
  "4 2003 30 1448 90 1458 80 1 0 33 61 \n",
1633
  "\n",
1634
+ " ... tourney_PA_median_L tourney_PA_std_L tourney_PA_mean_L TeamName_L \\\n",
1635
+ "0 ... 62.0 0.0 62.0 Wisconsin \n",
1636
+ "1 ... 62.0 0.0 62.0 Wisconsin \n",
1637
+ "2 ... 62.0 0.0 62.0 Wisconsin \n",
1638
+ "3 ... 62.0 0.0 62.0 Wisconsin \n",
1639
+ "4 ... 62.0 0.0 62.0 Wisconsin \n",
1640
  "\n",
1641
+ " FirstD1Season_L LastD1Season_L ConfAbbrev_L Seed_L Seed_Num_L \\\n",
1642
+ "0 1985 2024 big_ten Y05 0 \n",
1643
+ "1 1985 2024 big_ten Y05 0 \n",
1644
+ "2 1985 2024 big_ten Y05 0 \n",
1645
+ "3 1985 2024 big_ten Y05 0 \n",
1646
+ "4 1985 2024 big_ten Y05 0 \n",
1647
  "\n",
1648
+ " Seed_Region_L \n",
1649
+ "0 59 \n",
1650
+ "1 59 \n",
1651
+ "2 59 \n",
1652
+ "3 59 \n",
1653
+ "4 59 \n",
1654
  "\n",
1655
+ "[5 rows x 1146 columns]"
1656
  ]
1657
  },
1658
+ "execution_count": 19,
1659
  "metadata": {},
1660
  "output_type": "execute_result"
1661
  }
 
1682
  },
1683
  {
1684
  "cell_type": "code",
1685
+ "execution_count": 20,
1686
  "metadata": {},
1687
  "outputs": [],
1688
  "source": [