Jensen-holm commited on
Commit
cb96cf5
1 Parent(s): 895afd0

adding the opponent chalk seed so that we can make baseline predictions

Browse files
data/AllSuperDetailedGames.csv CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:565fdd04d067c0cc0b406ca74ac2fa7a8a6cf422961e14a971949b4fd72f1bc2
3
- size 929302721
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:ed956b1d1296bd82b5f8da7e545e39e0ef6e93da301934abb3e0453391319073
3
+ size 976650726
data/AllTeamsAgg.csv CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:b458f333d57030cde3f975417c82fd5100acc787754e546102c83c2ada7c4d0e
3
- size 29572986
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:c6829b8ed8a2a85a07297ddd87f3243fac14e08d5131d02bd32900576aab26b1
3
+ size 31040659
src/baseline.ipynb ADDED
@@ -0,0 +1,92 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "cells": [
3
+ {
4
+ "cell_type": "code",
5
+ "execution_count": 2,
6
+ "metadata": {},
7
+ "outputs": [],
8
+ "source": [
9
+ "import pandas as pd\n",
10
+ "import os\n",
11
+ "\n",
12
+ "DATA_DIR = os.path.join(\"..\", \"data\")"
13
+ ]
14
+ },
15
+ {
16
+ "cell_type": "code",
17
+ "execution_count": 3,
18
+ "metadata": {},
19
+ "outputs": [
20
+ {
21
+ "name": "stderr",
22
+ "output_type": "stream",
23
+ "text": [
24
+ "/var/folders/v8/0hd98b512cn3ms2rz146k7jw0000gn/T/ipykernel_46104/712369024.py:1: DtypeWarning: Columns (481,482,483) have mixed types. Specify dtype option on import or set low_memory=False.\n",
25
+ " games_df = pd.read_csv(os.path.join(DATA_DIR, \"AllSuperDetailedGames.csv\"))\n"
26
+ ]
27
+ },
28
+ {
29
+ "name": "stdout",
30
+ "output_type": "stream",
31
+ "text": [
32
+ "<class 'pandas.core.frame.DataFrame'>\n",
33
+ "RangeIndex: 377608 entries, 0 to 377607\n",
34
+ "Columns: 487 entries, Unnamed: 0 to ChalkSeed\n",
35
+ "dtypes: float64(347), int64(133), object(7)\n",
36
+ "memory usage: 1.4+ GB\n"
37
+ ]
38
+ }
39
+ ],
40
+ "source": [
41
+ "games_df = pd.read_csv(os.path.join(DATA_DIR, \"AllSuperDetailedGames.csv\"))\n",
42
+ "games_df.info()"
43
+ ]
44
+ },
45
+ {
46
+ "cell_type": "code",
47
+ "execution_count": null,
48
+ "metadata": {},
49
+ "outputs": [],
50
+ "source": [
51
+ "# create baseline model that predicts the winner of the game only based on the team with the higher seed\n",
52
+ "# and if the seed is the same, have it be the winning percentage that determines the winner. We will compare\n",
53
+ "# our ML models to this one to decide if performance is actually good or not.\n",
54
+ "\n",
55
+ "def predict_baseline(row: pd.Series) -> int:\n",
56
+ " if row[\"ChalkSeed Team\"] > row[\"OppChalkSeed Opp\"]:\n",
57
+ " return 1\n",
58
+ " if row[\"Win mean reg\"] > row[\"OppWin mean reg\"]:\n",
59
+ " return 1\n",
60
+ " return 0\n",
61
+ "\n",
62
+ "games_df[\"BaselinePrediction\"] = games_df.apply(\n",
63
+ " lambda row: predict_baseline(row),\n",
64
+ " axis=1,\n",
65
+ ")\n",
66
+ "\n",
67
+ "games_df[\"BaselinePrediction\"]"
68
+ ]
69
+ }
70
+ ],
71
+ "metadata": {
72
+ "kernelspec": {
73
+ "display_name": "Python 3",
74
+ "language": "python",
75
+ "name": "python3"
76
+ },
77
+ "language_info": {
78
+ "codemirror_mode": {
79
+ "name": "ipython",
80
+ "version": 3
81
+ },
82
+ "file_extension": ".py",
83
+ "mimetype": "text/x-python",
84
+ "name": "python",
85
+ "nbconvert_exporter": "python",
86
+ "pygments_lexer": "ipython3",
87
+ "version": "3.11.7"
88
+ }
89
+ },
90
+ "nbformat": 4,
91
+ "nbformat_minor": 2
92
+ }
src/pre_processing.ipynb CHANGED
@@ -475,46 +475,47 @@
475
  "text": [
476
  "<class 'pandas.core.frame.DataFrame'>\n",
477
  "RangeIndex: 373324 entries, 0 to 373323\n",
478
- "Data columns (total 35 columns):\n",
479
  " # Column Non-Null Count Dtype \n",
480
  "--- ------ -------------- ----- \n",
481
  " 0 Season 373324 non-null int64 \n",
482
  " 1 DayNum 373324 non-null int64 \n",
483
  " 2 TeamID 373324 non-null int64 \n",
484
  " 3 TeamScore 373324 non-null int64 \n",
485
- " 4 OppScore 373324 non-null int64 \n",
486
- " 5 WLoc 373324 non-null object\n",
487
- " 6 NumOT 373324 non-null int64 \n",
488
- " 7 TeamFGM 373324 non-null int64 \n",
489
- " 8 TeamFGA 373324 non-null int64 \n",
490
- " 9 TeamFGM3 373324 non-null int64 \n",
491
- " 10 WFGA3 373324 non-null int64 \n",
492
- " 11 TeamFTM 373324 non-null int64 \n",
493
- " 12 TeamFTA 373324 non-null int64 \n",
494
- " 13 TeamOR 373324 non-null int64 \n",
495
- " 14 TeamDR 373324 non-null int64 \n",
496
- " 15 TeamAst 373324 non-null int64 \n",
497
- " 16 TeamTO 373324 non-null int64 \n",
498
- " 17 TeamStl 373324 non-null int64 \n",
499
- " 18 TeamBlk 373324 non-null int64 \n",
500
- " 19 TeamPF 373324 non-null int64 \n",
501
- " 20 OppFGM 373324 non-null int64 \n",
502
- " 21 OppFGA 373324 non-null int64 \n",
503
- " 22 OppFGM3 373324 non-null int64 \n",
504
- " 23 LFGA3 373324 non-null int64 \n",
505
- " 24 OppFTM 373324 non-null int64 \n",
506
- " 25 OppFTA 373324 non-null int64 \n",
507
- " 26 OppOR 373324 non-null int64 \n",
508
- " 27 OppDR 373324 non-null int64 \n",
509
- " 28 OppAst 373324 non-null int64 \n",
510
- " 29 OppTO 373324 non-null int64 \n",
511
- " 30 OppStl 373324 non-null int64 \n",
512
- " 31 OppBlk 373324 non-null int64 \n",
513
- " 32 OppPF 373324 non-null int64 \n",
514
- " 33 League 373324 non-null object\n",
515
- " 34 GameResult 373324 non-null object\n",
516
- "dtypes: int64(32), object(3)\n",
517
- "memory usage: 99.7+ MB\n"
 
518
  ]
519
  }
520
  ],
@@ -522,14 +523,16 @@
522
  "\n",
523
  "detailed_reg_games_df = pd.concat([\n",
524
  " (\n",
525
- " detailed_reg_games_df[[col for col in detailed_reg_games_df.columns if col != \"LTeamID\"]]\n",
 
526
  " .assign(GameResult=\"W\")\n",
527
- " .rename(columns=w_renamed_cols | {\"WTeamID\": \"TeamID\"})\n",
528
  " ),\n",
529
  " (\n",
530
- " detailed_reg_games_df[[col for col in detailed_reg_games_df.columns if col != \"WTeamID\"]]\n",
 
531
  " .assign(GameResult=\"L\")\n",
532
- " .rename(columns=l_renamed_cols | {\"LTeamID\": \"TeamID\"})\n",
533
  " )\n",
534
  "\n",
535
  "]).reset_index(drop=True)\n",
@@ -548,46 +551,47 @@
548
  "text": [
549
  "<class 'pandas.core.frame.DataFrame'>\n",
550
  "RangeIndex: 4284 entries, 0 to 4283\n",
551
- "Data columns (total 35 columns):\n",
552
  " # Column Non-Null Count Dtype \n",
553
  "--- ------ -------------- ----- \n",
554
  " 0 Season 4284 non-null int64 \n",
555
  " 1 DayNum 4284 non-null int64 \n",
556
  " 2 TeamID 4284 non-null int64 \n",
557
  " 3 TeamScore 4284 non-null int64 \n",
558
- " 4 OppScore 4284 non-null int64 \n",
559
- " 5 WLoc 4284 non-null object\n",
560
- " 6 NumOT 4284 non-null int64 \n",
561
- " 7 TeamFGM 4284 non-null int64 \n",
562
- " 8 TeamFGA 4284 non-null int64 \n",
563
- " 9 TeamFGM3 4284 non-null int64 \n",
564
- " 10 WFGA3 4284 non-null int64 \n",
565
- " 11 TeamFTM 4284 non-null int64 \n",
566
- " 12 TeamFTA 4284 non-null int64 \n",
567
- " 13 TeamOR 4284 non-null int64 \n",
568
- " 14 TeamDR 4284 non-null int64 \n",
569
- " 15 TeamAst 4284 non-null int64 \n",
570
- " 16 TeamTO 4284 non-null int64 \n",
571
- " 17 TeamStl 4284 non-null int64 \n",
572
- " 18 TeamBlk 4284 non-null int64 \n",
573
- " 19 TeamPF 4284 non-null int64 \n",
574
- " 20 OppFGM 4284 non-null int64 \n",
575
- " 21 OppFGA 4284 non-null int64 \n",
576
- " 22 OppFGM3 4284 non-null int64 \n",
577
- " 23 LFGA3 4284 non-null int64 \n",
578
- " 24 OppFTM 4284 non-null int64 \n",
579
- " 25 OppFTA 4284 non-null int64 \n",
580
- " 26 OppOR 4284 non-null int64 \n",
581
- " 27 OppDR 4284 non-null int64 \n",
582
- " 28 OppAst 4284 non-null int64 \n",
583
- " 29 OppTO 4284 non-null int64 \n",
584
- " 30 OppStl 4284 non-null int64 \n",
585
- " 31 OppBlk 4284 non-null int64 \n",
586
- " 32 OppPF 4284 non-null int64 \n",
587
- " 33 League 4284 non-null object\n",
588
- " 34 GameResult 4284 non-null object\n",
589
- "dtypes: int64(32), object(3)\n",
590
- "memory usage: 1.1+ MB\n"
 
591
  ]
592
  }
593
  ],
@@ -595,14 +599,16 @@
595
  "# do the same thing for the tournament games\n",
596
  "detailed_tourney_games_df = pd.concat([\n",
597
  " (\n",
598
- " detailed_tourney_games_df[[col for col in detailed_tourney_games_df.columns if col != \"LTeamID\"]]\n",
 
599
  " .assign(GameResult=\"W\")\n",
600
- " .rename(columns=w_renamed_cols | {\"WTeamID\": \"TeamID\"})\n",
601
  " ),\n",
602
  " (\n",
603
- " detailed_tourney_games_df[[col for col in detailed_tourney_games_df.columns if col != \"WTeamID\"]]\n",
 
604
  " .assign(GameResult=\"L\")\n",
605
- " .rename(columns=l_renamed_cols | {\"LTeamID\": \"TeamID\"})\n",
606
  " )\n",
607
  "]).reset_index(drop=True)\n",
608
  "\n",
@@ -658,23 +664,23 @@
658
  " <th>DayNum</th>\n",
659
  " <th>TeamID</th>\n",
660
  " <th>TeamScore</th>\n",
 
661
  " <th>OppScore</th>\n",
662
  " <th>WLoc</th>\n",
663
  " <th>NumOT</th>\n",
664
  " <th>TeamFGM</th>\n",
665
  " <th>TeamFGA</th>\n",
666
- " <th>TeamFGM3</th>\n",
667
  " <th>...</th>\n",
 
 
 
668
  " <th>TODiff</th>\n",
669
  " <th>FTADiff</th>\n",
670
- " <th>FTMDiff</th>\n",
671
- " <th>FGM3Diff</th>\n",
672
- " <th>PFDiff</th>\n",
673
  " <th>FGADiff</th>\n",
 
674
  " <th>ORDiff</th>\n",
675
- " <th>BlkDiff</th>\n",
676
- " <th>ScoreDiff</th>\n",
677
- " <th>FGMDiff</th>\n",
678
  " </tr>\n",
679
  " </thead>\n",
680
  " <tbody>\n",
@@ -684,23 +690,23 @@
684
  " <td>74</td>\n",
685
  " <td>3158</td>\n",
686
  " <td>56</td>\n",
 
687
  " <td>84</td>\n",
688
  " <td>A</td>\n",
689
  " <td>0</td>\n",
690
  " <td>21</td>\n",
691
  " <td>55</td>\n",
692
- " <td>3</td>\n",
693
  " <td>...</td>\n",
 
 
 
694
  " <td>7</td>\n",
695
  " <td>-11</td>\n",
696
- " <td>-11</td>\n",
697
- " <td>-3</td>\n",
698
- " <td>9</td>\n",
699
  " <td>-12</td>\n",
700
  " <td>-11</td>\n",
701
- " <td>1</td>\n",
702
- " <td>-28</td>\n",
703
- " <td>-7</td>\n",
704
  " </tr>\n",
705
  " <tr>\n",
706
  " <th>100732</th>\n",
@@ -708,23 +714,23 @@
708
  " <td>103</td>\n",
709
  " <td>1439</td>\n",
710
  " <td>71</td>\n",
 
711
  " <td>59</td>\n",
712
  " <td>H</td>\n",
713
  " <td>0</td>\n",
714
  " <td>23</td>\n",
715
  " <td>60</td>\n",
716
- " <td>8</td>\n",
717
  " <td>...</td>\n",
 
 
 
718
  " <td>-6</td>\n",
719
  " <td>17</td>\n",
720
- " <td>16</td>\n",
721
- " <td>-2</td>\n",
722
- " <td>-9</td>\n",
723
  " <td>-4</td>\n",
 
724
  " <td>-1</td>\n",
725
- " <td>2</td>\n",
726
- " <td>12</td>\n",
727
- " <td>-1</td>\n",
728
  " </tr>\n",
729
  " <tr>\n",
730
  " <th>83150</th>\n",
@@ -732,23 +738,23 @@
732
  " <td>26</td>\n",
733
  " <td>1180</td>\n",
734
  " <td>82</td>\n",
 
735
  " <td>69</td>\n",
736
  " <td>H</td>\n",
737
  " <td>0</td>\n",
738
  " <td>27</td>\n",
739
  " <td>58</td>\n",
740
- " <td>8</td>\n",
741
  " <td>...</td>\n",
742
  " <td>1</td>\n",
743
- " <td>10</td>\n",
744
- " <td>14</td>\n",
745
  " <td>1</td>\n",
746
- " <td>-5</td>\n",
747
  " <td>-6</td>\n",
 
748
  " <td>4</td>\n",
749
- " <td>2</td>\n",
750
- " <td>13</td>\n",
751
- " <td>-1</td>\n",
752
  " </tr>\n",
753
  " <tr>\n",
754
  " <th>345009</th>\n",
@@ -756,23 +762,23 @@
756
  " <td>4</td>\n",
757
  " <td>3435</td>\n",
758
  " <td>58</td>\n",
 
759
  " <td>65</td>\n",
760
  " <td>H</td>\n",
761
  " <td>0</td>\n",
762
  " <td>19</td>\n",
763
  " <td>55</td>\n",
764
- " <td>5</td>\n",
765
  " <td>...</td>\n",
 
 
 
766
  " <td>-5</td>\n",
767
  " <td>-11</td>\n",
768
- " <td>-8</td>\n",
769
- " <td>-1</td>\n",
770
- " <td>7</td>\n",
771
  " <td>13</td>\n",
 
772
  " <td>2</td>\n",
773
- " <td>-3</td>\n",
774
- " <td>-7</td>\n",
775
- " <td>1</td>\n",
776
  " </tr>\n",
777
  " <tr>\n",
778
  " <th>318707</th>\n",
@@ -780,52 +786,52 @@
780
  " <td>128</td>\n",
781
  " <td>3322</td>\n",
782
  " <td>45</td>\n",
 
783
  " <td>63</td>\n",
784
  " <td>N</td>\n",
785
  " <td>0</td>\n",
786
  " <td>20</td>\n",
787
  " <td>51</td>\n",
788
- " <td>3</td>\n",
789
  " <td>...</td>\n",
 
 
 
790
  " <td>4</td>\n",
791
  " <td>-11</td>\n",
 
792
  " <td>-9</td>\n",
793
- " <td>-3</td>\n",
794
  " <td>2</td>\n",
795
- " <td>3</td>\n",
796
  " <td>2</td>\n",
797
- " <td>1</td>\n",
798
- " <td>-18</td>\n",
799
  " <td>-3</td>\n",
800
  " </tr>\n",
801
  " </tbody>\n",
802
  "</table>\n",
803
- "<p>5 rows × 48 columns</p>\n",
804
  "</div>"
805
  ],
806
  "text/plain": [
807
- " Season DayNum TeamID TeamScore OppScore WLoc NumOT TeamFGM \\\n",
808
- "337067 2017 74 3158 56 84 A 0 21 \n",
809
- "100732 2022 103 1439 71 59 H 0 23 \n",
810
- "83150 2019 26 1180 82 69 H 0 27 \n",
811
- "345009 2019 4 3435 58 65 H 0 19 \n",
812
- "318707 2013 128 3322 45 63 N 0 20 \n",
813
  "\n",
814
- " TeamFGA TeamFGM3 ... TODiff FTADiff FTMDiff FGM3Diff PFDiff \\\n",
815
- "337067 55 3 ... 7 -11 -11 -3 9 \n",
816
- "100732 60 8 ... -6 17 16 -2 -9 \n",
817
- "83150 58 8 ... 1 10 14 1 -5 \n",
818
- "345009 55 5 ... -5 -11 -8 -1 7 \n",
819
- "318707 51 3 ... 4 -11 -9 -3 2 \n",
820
  "\n",
821
- " FGADiff ORDiff BlkDiff ScoreDiff FGMDiff \n",
822
- "337067 -12 -11 1 -28 -7 \n",
823
- "100732 -4 -1 2 12 -1 \n",
824
- "83150 -6 4 2 13 -1 \n",
825
- "345009 13 2 -3 -7 1 \n",
826
- "318707 3 2 1 -18 -3 \n",
827
  "\n",
828
- "[5 rows x 48 columns]"
829
  ]
830
  },
831
  "execution_count": 8,
@@ -848,9 +854,19 @@
848
  " axis=1,\n",
849
  ")\n",
850
  "\n",
 
 
 
 
 
851
  "detailed_tourney_games_df[\"Win\"] = detailed_tourney_games_df.apply(\n",
852
  " lambda row: 0 if row[\"GameResult\"] == \"L\" else 1,\n",
853
  " axis=1,\n",
 
 
 
 
 
854
  ")"
855
  ]
856
  },
@@ -868,6 +884,37 @@
868
  "])"
869
  ]
870
  },
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
871
  {
872
  "cell_type": "markdown",
873
  "metadata": {},
@@ -877,7 +924,7 @@
877
  },
878
  {
879
  "cell_type": "code",
880
- "execution_count": 11,
881
  "metadata": {},
882
  "outputs": [],
883
  "source": [
@@ -890,6 +937,7 @@
890
  " \"TeamLoc\",\n",
891
  " \"Season\",\n",
892
  " \"DayNum\",\n",
 
893
  "}\n",
894
  "\n",
895
  "agg_funcs = [\n",
@@ -903,7 +951,7 @@
903
  },
904
  {
905
  "cell_type": "code",
906
- "execution_count": 12,
907
  "metadata": {
908
  "tags": []
909
  },
@@ -937,19 +985,19 @@
937
  " <th>TeamScore std</th>\n",
938
  " <th>TeamScore median</th>\n",
939
  " <th>TeamScore mean</th>\n",
940
- " <th>OppScore min</th>\n",
941
- " <th>OppScore max</th>\n",
942
  " <th>...</th>\n",
943
- " <th>FGMDiff min</th>\n",
944
- " <th>FGMDiff max</th>\n",
945
- " <th>FGMDiff std</th>\n",
946
- " <th>FGMDiff median</th>\n",
947
- " <th>FGMDiff mean</th>\n",
948
  " <th>Win min</th>\n",
949
  " <th>Win max</th>\n",
950
  " <th>Win std</th>\n",
951
  " <th>Win median</th>\n",
952
  " <th>Win mean</th>\n",
 
 
 
 
 
953
  " </tr>\n",
954
  " </thead>\n",
955
  " <tbody>\n",
@@ -963,19 +1011,19 @@
963
  " <td>10.808339</td>\n",
964
  " <td>61.0</td>\n",
965
  " <td>58.965517</td>\n",
966
- " <td>36</td>\n",
967
- " <td>85</td>\n",
968
  " <td>...</td>\n",
969
- " <td>-17</td>\n",
970
- " <td>14</td>\n",
971
- " <td>7.252314</td>\n",
972
- " <td>-2.0</td>\n",
973
- " <td>-1.896552</td>\n",
974
  " <td>0</td>\n",
975
  " <td>1</td>\n",
976
  " <td>0.508548</td>\n",
977
  " <td>0.0</td>\n",
978
  " <td>0.482759</td>\n",
 
 
 
 
 
979
  " </tr>\n",
980
  " <tr>\n",
981
  " <th>6900</th>\n",
@@ -987,19 +1035,19 @@
987
  " <td>12.283247</td>\n",
988
  " <td>67.0</td>\n",
989
  " <td>66.466667</td>\n",
990
- " <td>44</td>\n",
991
- " <td>97</td>\n",
992
  " <td>...</td>\n",
993
- " <td>-18</td>\n",
994
- " <td>9</td>\n",
995
- " <td>6.210854</td>\n",
996
- " <td>-1.0</td>\n",
997
- " <td>-0.666667</td>\n",
998
  " <td>0</td>\n",
999
  " <td>1</td>\n",
1000
  " <td>0.479463</td>\n",
1001
  " <td>0.0</td>\n",
1002
  " <td>0.333333</td>\n",
 
 
 
 
 
1003
  " </tr>\n",
1004
  " <tr>\n",
1005
  " <th>4406</th>\n",
@@ -1011,19 +1059,19 @@
1011
  " <td>10.019980</td>\n",
1012
  " <td>72.0</td>\n",
1013
  " <td>73.000000</td>\n",
1014
- " <td>61</td>\n",
1015
- " <td>103</td>\n",
1016
  " <td>...</td>\n",
1017
- " <td>-17</td>\n",
1018
- " <td>9</td>\n",
1019
- " <td>5.037707</td>\n",
1020
- " <td>0.0</td>\n",
1021
- " <td>-0.612903</td>\n",
1022
  " <td>0</td>\n",
1023
  " <td>1</td>\n",
1024
  " <td>0.508001</td>\n",
1025
  " <td>0.0</td>\n",
1026
  " <td>0.483871</td>\n",
 
 
 
 
 
1027
  " </tr>\n",
1028
  " <tr>\n",
1029
  " <th>4233</th>\n",
@@ -1035,19 +1083,19 @@
1035
  " <td>12.911860</td>\n",
1036
  " <td>77.0</td>\n",
1037
  " <td>75.870968</td>\n",
1038
- " <td>47</td>\n",
1039
- " <td>81</td>\n",
1040
  " <td>...</td>\n",
1041
- " <td>-8</td>\n",
1042
- " <td>12</td>\n",
1043
- " <td>5.142904</td>\n",
1044
- " <td>3.0</td>\n",
1045
- " <td>2.129032</td>\n",
1046
  " <td>0</td>\n",
1047
  " <td>1</td>\n",
1048
  " <td>0.401610</td>\n",
1049
  " <td>1.0</td>\n",
1050
  " <td>0.806452</td>\n",
 
 
 
 
 
1051
  " </tr>\n",
1052
  " <tr>\n",
1053
  " <th>3407</th>\n",
@@ -1059,19 +1107,19 @@
1059
  " <td>11.841315</td>\n",
1060
  " <td>75.5</td>\n",
1061
  " <td>75.906250</td>\n",
1062
- " <td>37</td>\n",
1063
- " <td>89</td>\n",
1064
  " <td>...</td>\n",
1065
- " <td>-7</td>\n",
1066
- " <td>18</td>\n",
1067
- " <td>6.097246</td>\n",
1068
- " <td>4.5</td>\n",
1069
- " <td>4.281250</td>\n",
1070
  " <td>0</td>\n",
1071
  " <td>1</td>\n",
1072
  " <td>0.456803</td>\n",
1073
  " <td>1.0</td>\n",
1074
  " <td>0.718750</td>\n",
 
 
 
 
 
1075
  " </tr>\n",
1076
  " <tr>\n",
1077
  " <th>5190</th>\n",
@@ -1083,19 +1131,19 @@
1083
  " <td>10.298567</td>\n",
1084
  " <td>67.0</td>\n",
1085
  " <td>65.062500</td>\n",
1086
- " <td>45</td>\n",
1087
- " <td>106</td>\n",
1088
  " <td>...</td>\n",
1089
- " <td>-25</td>\n",
1090
- " <td>7</td>\n",
1091
- " <td>5.870151</td>\n",
1092
- " <td>-0.5</td>\n",
1093
- " <td>-1.843750</td>\n",
1094
  " <td>0</td>\n",
1095
  " <td>1</td>\n",
1096
  " <td>0.470929</td>\n",
1097
  " <td>0.0</td>\n",
1098
  " <td>0.312500</td>\n",
 
 
 
 
 
1099
  " </tr>\n",
1100
  " <tr>\n",
1101
  " <th>1892</th>\n",
@@ -1107,19 +1155,19 @@
1107
  " <td>14.194618</td>\n",
1108
  " <td>76.0</td>\n",
1109
  " <td>76.777778</td>\n",
1110
- " <td>59</td>\n",
1111
- " <td>107</td>\n",
1112
  " <td>...</td>\n",
1113
- " <td>-19</td>\n",
1114
- " <td>13</td>\n",
1115
- " <td>6.835261</td>\n",
1116
- " <td>1.0</td>\n",
1117
- " <td>0.481481</td>\n",
1118
  " <td>0</td>\n",
1119
  " <td>1</td>\n",
1120
  " <td>0.492103</td>\n",
1121
  " <td>0.0</td>\n",
1122
  " <td>0.370370</td>\n",
 
 
 
 
 
1123
  " </tr>\n",
1124
  " <tr>\n",
1125
  " <th>10020</th>\n",
@@ -1131,19 +1179,19 @@
1131
  " <td>13.385137</td>\n",
1132
  " <td>53.0</td>\n",
1133
  " <td>55.476190</td>\n",
1134
- " <td>41</td>\n",
1135
- " <td>117</td>\n",
1136
  " <td>...</td>\n",
1137
- " <td>-41</td>\n",
1138
- " <td>11</td>\n",
1139
- " <td>10.992205</td>\n",
1140
- " <td>-4.0</td>\n",
1141
- " <td>-6.142857</td>\n",
1142
  " <td>0</td>\n",
1143
  " <td>1</td>\n",
1144
  " <td>0.462910</td>\n",
1145
  " <td>0.0</td>\n",
1146
  " <td>0.285714</td>\n",
 
 
 
 
 
1147
  " </tr>\n",
1148
  " <tr>\n",
1149
  " <th>9567</th>\n",
@@ -1155,19 +1203,19 @@
1155
  " <td>11.319009</td>\n",
1156
  " <td>62.5</td>\n",
1157
  " <td>63.593750</td>\n",
1158
- " <td>45</td>\n",
1159
- " <td>100</td>\n",
1160
  " <td>...</td>\n",
1161
- " <td>-21</td>\n",
1162
- " <td>9</td>\n",
1163
- " <td>6.956083</td>\n",
1164
- " <td>-3.0</td>\n",
1165
- " <td>-3.250000</td>\n",
1166
  " <td>0</td>\n",
1167
  " <td>1</td>\n",
1168
  " <td>0.504016</td>\n",
1169
  " <td>0.0</td>\n",
1170
  " <td>0.437500</td>\n",
 
 
 
 
 
1171
  " </tr>\n",
1172
  " <tr>\n",
1173
  " <th>12617</th>\n",
@@ -1179,23 +1227,23 @@
1179
  " <td>12.518374</td>\n",
1180
  " <td>65.0</td>\n",
1181
  " <td>65.750000</td>\n",
1182
- " <td>21</td>\n",
1183
- " <td>79</td>\n",
1184
  " <td>...</td>\n",
1185
- " <td>-10</td>\n",
1186
- " <td>22</td>\n",
1187
- " <td>7.768254</td>\n",
1188
- " <td>5.5</td>\n",
1189
- " <td>5.906250</td>\n",
1190
  " <td>0</td>\n",
1191
  " <td>1</td>\n",
1192
  " <td>0.456803</td>\n",
1193
  " <td>1.0</td>\n",
1194
  " <td>0.718750</td>\n",
 
 
 
 
 
1195
  " </tr>\n",
1196
  " </tbody>\n",
1197
  "</table>\n",
1198
- "<p>10 rows × 218 columns</p>\n",
1199
  "</div>"
1200
  ],
1201
  "text/plain": [
@@ -1211,46 +1259,46 @@
1211
  "9567 3240 2014 W 43 84 11.319009 \n",
1212
  "12617 3452 2011 W 39 90 12.518374 \n",
1213
  "\n",
1214
- " TeamScore median TeamScore mean OppScore min OppScore max ... \\\n",
1215
- "12348 61.0 58.965517 36 85 ... \n",
1216
- "6900 67.0 66.466667 44 97 ... \n",
1217
- "4406 72.0 73.000000 61 103 ... \n",
1218
- "4233 77.0 75.870968 47 81 ... \n",
1219
- "3407 75.5 75.906250 37 89 ... \n",
1220
- "5190 67.0 65.062500 45 106 ... \n",
1221
- "1892 76.0 76.777778 59 107 ... \n",
1222
- "10020 53.0 55.476190 41 117 ... \n",
1223
- "9567 62.5 63.593750 45 100 ... \n",
1224
- "12617 65.0 65.750000 21 79 ... \n",
1225
  "\n",
1226
- " FGMDiff min FGMDiff max FGMDiff std FGMDiff median FGMDiff mean \\\n",
1227
- "12348 -17 14 7.252314 -2.0 -1.896552 \n",
1228
- "6900 -18 9 6.210854 -1.0 -0.666667 \n",
1229
- "4406 -17 9 5.037707 0.0 -0.612903 \n",
1230
- "4233 -8 12 5.142904 3.0 2.129032 \n",
1231
- "3407 -7 18 6.097246 4.5 4.281250 \n",
1232
- "5190 -25 7 5.870151 -0.5 -1.843750 \n",
1233
- "1892 -19 13 6.835261 1.0 0.481481 \n",
1234
- "10020 -41 11 10.992205 -4.0 -6.142857 \n",
1235
- "9567 -21 9 6.956083 -3.0 -3.250000 \n",
1236
- "12617 -10 22 7.768254 5.5 5.906250 \n",
1237
  "\n",
1238
- " Win min Win max Win std Win median Win mean \n",
1239
- "12348 0 1 0.508548 0.0 0.482759 \n",
1240
- "6900 0 1 0.479463 0.0 0.333333 \n",
1241
- "4406 0 1 0.508001 0.0 0.483871 \n",
1242
- "4233 0 1 0.401610 1.0 0.806452 \n",
1243
- "3407 0 1 0.456803 1.0 0.718750 \n",
1244
- "5190 0 1 0.470929 0.0 0.312500 \n",
1245
- "1892 0 1 0.492103 0.0 0.370370 \n",
1246
- "10020 0 1 0.462910 0.0 0.285714 \n",
1247
- "9567 0 1 0.504016 0.0 0.437500 \n",
1248
- "12617 0 1 0.456803 1.0 0.718750 \n",
1249
  "\n",
1250
- "[10 rows x 218 columns]"
1251
  ]
1252
  },
1253
- "execution_count": 12,
1254
  "metadata": {},
1255
  "output_type": "execute_result"
1256
  }
@@ -1269,7 +1317,7 @@
1269
  },
1270
  {
1271
  "cell_type": "code",
1272
- "execution_count": 13,
1273
  "metadata": {},
1274
  "outputs": [
1275
  {
@@ -1301,19 +1349,19 @@
1301
  " <th>TeamScore std</th>\n",
1302
  " <th>TeamScore median</th>\n",
1303
  " <th>TeamScore mean</th>\n",
1304
- " <th>OppScore min</th>\n",
1305
- " <th>OppScore max</th>\n",
1306
  " <th>...</th>\n",
1307
- " <th>FGMDiff min</th>\n",
1308
- " <th>FGMDiff max</th>\n",
1309
- " <th>FGMDiff std</th>\n",
1310
- " <th>FGMDiff median</th>\n",
1311
- " <th>FGMDiff mean</th>\n",
1312
  " <th>Win min</th>\n",
1313
  " <th>Win max</th>\n",
1314
  " <th>Win std</th>\n",
1315
  " <th>Win median</th>\n",
1316
  " <th>Win mean</th>\n",
 
 
 
 
 
1317
  " </tr>\n",
1318
  " </thead>\n",
1319
  " <tbody>\n",
@@ -1327,19 +1375,19 @@
1327
  " <td>10.408330</td>\n",
1328
  " <td>77.0</td>\n",
1329
  " <td>73.666667</td>\n",
1330
- " <td>53</td>\n",
1331
- " <td>82</td>\n",
1332
  " <td>...</td>\n",
1333
- " <td>-10</td>\n",
1334
- " <td>13</td>\n",
1335
- " <td>11.789826</td>\n",
1336
- " <td>-3.0</td>\n",
1337
- " <td>0.000000</td>\n",
1338
  " <td>0</td>\n",
1339
  " <td>1</td>\n",
1340
  " <td>0.577350</td>\n",
1341
  " <td>1.0</td>\n",
1342
  " <td>0.666667</td>\n",
 
 
 
 
 
1343
  " </tr>\n",
1344
  " <tr>\n",
1345
  " <th>1601</th>\n",
@@ -1351,19 +1399,19 @@
1351
  " <td>NaN</td>\n",
1352
  " <td>63.0</td>\n",
1353
  " <td>63.000000</td>\n",
1354
- " <td>71</td>\n",
1355
- " <td>71</td>\n",
1356
  " <td>...</td>\n",
1357
- " <td>-2</td>\n",
1358
- " <td>-2</td>\n",
1359
- " <td>NaN</td>\n",
1360
- " <td>-2.0</td>\n",
1361
- " <td>-2.000000</td>\n",
1362
  " <td>0</td>\n",
1363
  " <td>0</td>\n",
1364
  " <td>NaN</td>\n",
1365
  " <td>0.0</td>\n",
1366
  " <td>0.000000</td>\n",
 
 
 
 
 
1367
  " </tr>\n",
1368
  " <tr>\n",
1369
  " <th>1805</th>\n",
@@ -1375,19 +1423,19 @@
1375
  " <td>NaN</td>\n",
1376
  " <td>63.0</td>\n",
1377
  " <td>63.000000</td>\n",
1378
- " <td>64</td>\n",
1379
- " <td>64</td>\n",
1380
  " <td>...</td>\n",
1381
- " <td>2</td>\n",
1382
- " <td>2</td>\n",
1383
- " <td>NaN</td>\n",
1384
- " <td>2.0</td>\n",
1385
- " <td>2.000000</td>\n",
1386
  " <td>0</td>\n",
1387
  " <td>0</td>\n",
1388
  " <td>NaN</td>\n",
1389
  " <td>0.0</td>\n",
1390
  " <td>0.000000</td>\n",
 
 
 
 
 
1391
  " </tr>\n",
1392
  " <tr>\n",
1393
  " <th>952</th>\n",
@@ -1399,14 +1447,14 @@
1399
  " <td>1.414214</td>\n",
1400
  " <td>73.0</td>\n",
1401
  " <td>73.000000</td>\n",
1402
- " <td>72</td>\n",
1403
- " <td>79</td>\n",
1404
  " <td>...</td>\n",
1405
- " <td>-3</td>\n",
1406
  " <td>0</td>\n",
1407
- " <td>2.121320</td>\n",
1408
- " <td>-1.5</td>\n",
1409
- " <td>-1.500000</td>\n",
 
1410
  " <td>0</td>\n",
1411
  " <td>1</td>\n",
1412
  " <td>0.707107</td>\n",
@@ -1423,19 +1471,19 @@
1423
  " <td>NaN</td>\n",
1424
  " <td>65.0</td>\n",
1425
  " <td>65.000000</td>\n",
1426
- " <td>79</td>\n",
1427
- " <td>79</td>\n",
1428
  " <td>...</td>\n",
1429
- " <td>-8</td>\n",
1430
- " <td>-8</td>\n",
1431
- " <td>NaN</td>\n",
1432
- " <td>-8.0</td>\n",
1433
- " <td>-8.000000</td>\n",
1434
  " <td>0</td>\n",
1435
  " <td>0</td>\n",
1436
  " <td>NaN</td>\n",
1437
  " <td>0.0</td>\n",
1438
  " <td>0.000000</td>\n",
 
 
 
 
 
1439
  " </tr>\n",
1440
  " <tr>\n",
1441
  " <th>1381</th>\n",
@@ -1447,19 +1495,19 @@
1447
  " <td>9.912114</td>\n",
1448
  " <td>81.0</td>\n",
1449
  " <td>80.250000</td>\n",
1450
- " <td>56</td>\n",
1451
- " <td>88</td>\n",
1452
  " <td>...</td>\n",
1453
- " <td>-4</td>\n",
1454
- " <td>8</td>\n",
1455
- " <td>5.123475</td>\n",
1456
- " <td>3.5</td>\n",
1457
- " <td>2.750000</td>\n",
1458
  " <td>0</td>\n",
1459
  " <td>1</td>\n",
1460
  " <td>0.500000</td>\n",
1461
  " <td>1.0</td>\n",
1462
  " <td>0.750000</td>\n",
 
 
 
 
 
1463
  " </tr>\n",
1464
  " <tr>\n",
1465
  " <th>1266</th>\n",
@@ -1471,14 +1519,14 @@
1471
  " <td>8.485281</td>\n",
1472
  " <td>78.0</td>\n",
1473
  " <td>78.000000</td>\n",
1474
- " <td>67</td>\n",
1475
- " <td>75</td>\n",
1476
  " <td>...</td>\n",
1477
- " <td>-1</td>\n",
1478
- " <td>6</td>\n",
1479
- " <td>4.949747</td>\n",
1480
- " <td>2.5</td>\n",
1481
- " <td>2.500000</td>\n",
1482
  " <td>0</td>\n",
1483
  " <td>1</td>\n",
1484
  " <td>0.707107</td>\n",
@@ -1495,19 +1543,19 @@
1495
  " <td>NaN</td>\n",
1496
  " <td>69.0</td>\n",
1497
  " <td>69.000000</td>\n",
1498
- " <td>72</td>\n",
1499
- " <td>72</td>\n",
1500
  " <td>...</td>\n",
1501
- " <td>-3</td>\n",
1502
- " <td>-3</td>\n",
1503
- " <td>NaN</td>\n",
1504
- " <td>-3.0</td>\n",
1505
- " <td>-3.000000</td>\n",
1506
  " <td>0</td>\n",
1507
  " <td>0</td>\n",
1508
  " <td>NaN</td>\n",
1509
  " <td>0.0</td>\n",
1510
  " <td>0.000000</td>\n",
 
 
 
 
 
1511
  " </tr>\n",
1512
  " <tr>\n",
1513
  " <th>697</th>\n",
@@ -1519,19 +1567,19 @@
1519
  " <td>NaN</td>\n",
1520
  " <td>63.0</td>\n",
1521
  " <td>63.000000</td>\n",
1522
- " <td>72</td>\n",
1523
- " <td>72</td>\n",
1524
  " <td>...</td>\n",
1525
- " <td>-2</td>\n",
1526
- " <td>-2</td>\n",
1527
- " <td>NaN</td>\n",
1528
- " <td>-2.0</td>\n",
1529
- " <td>-2.000000</td>\n",
1530
  " <td>0</td>\n",
1531
  " <td>0</td>\n",
1532
  " <td>NaN</td>\n",
1533
  " <td>0.0</td>\n",
1534
  " <td>0.000000</td>\n",
 
 
 
 
 
1535
  " </tr>\n",
1536
  " <tr>\n",
1537
  " <th>763</th>\n",
@@ -1543,23 +1591,23 @@
1543
  " <td>1.527525</td>\n",
1544
  " <td>70.0</td>\n",
1545
  " <td>69.666667</td>\n",
1546
- " <td>60</td>\n",
1547
- " <td>88</td>\n",
1548
  " <td>...</td>\n",
1549
- " <td>-11</td>\n",
1550
- " <td>3</td>\n",
1551
- " <td>7.371115</td>\n",
1552
- " <td>0.0</td>\n",
1553
- " <td>-2.666667</td>\n",
1554
  " <td>0</td>\n",
1555
  " <td>1</td>\n",
1556
  " <td>0.577350</td>\n",
1557
  " <td>1.0</td>\n",
1558
  " <td>0.666667</td>\n",
 
 
 
 
 
1559
  " </tr>\n",
1560
  " </tbody>\n",
1561
  "</table>\n",
1562
- "<p>10 rows × 218 columns</p>\n",
1563
  "</div>"
1564
  ],
1565
  "text/plain": [
@@ -1575,46 +1623,46 @@
1575
  "697 1301 2023 M 63 63 NaN \n",
1576
  "763 1323 2003 M 68 71 1.527525 \n",
1577
  "\n",
1578
- " TeamScore median TeamScore mean OppScore min OppScore max ... \\\n",
1579
- "995 77.0 73.666667 53 82 ... \n",
1580
- "1601 63.0 63.000000 71 71 ... \n",
1581
- "1805 63.0 63.000000 64 64 ... \n",
1582
- "952 73.0 73.000000 72 79 ... \n",
1583
- "924 65.0 65.000000 79 79 ... \n",
1584
- "1381 81.0 80.250000 56 88 ... \n",
1585
- "1266 78.0 78.000000 67 75 ... \n",
1586
- "1810 69.0 69.000000 72 72 ... \n",
1587
- "697 63.0 63.000000 72 72 ... \n",
1588
- "763 70.0 69.666667 60 88 ... \n",
1589
  "\n",
1590
- " FGMDiff min FGMDiff max FGMDiff std FGMDiff median FGMDiff mean \\\n",
1591
- "995 -10 13 11.789826 -3.0 0.000000 \n",
1592
- "1601 -2 -2 NaN -2.0 -2.000000 \n",
1593
- "1805 2 2 NaN 2.0 2.000000 \n",
1594
- "952 -3 0 2.121320 -1.5 -1.500000 \n",
1595
- "924 -8 -8 NaN -8.0 -8.000000 \n",
1596
- "1381 -4 8 5.123475 3.5 2.750000 \n",
1597
- "1266 -1 6 4.949747 2.5 2.500000 \n",
1598
- "1810 -3 -3 NaN -3.0 -3.000000 \n",
1599
- "697 -2 -2 NaN -2.0 -2.000000 \n",
1600
- "763 -11 3 7.371115 0.0 -2.666667 \n",
1601
  "\n",
1602
- " Win min Win max Win std Win median Win mean \n",
1603
- "995 0 1 0.577350 1.0 0.666667 \n",
1604
- "1601 0 0 NaN 0.0 0.000000 \n",
1605
- "1805 0 0 NaN 0.0 0.000000 \n",
1606
- "952 0 1 0.707107 0.5 0.500000 \n",
1607
- "924 0 0 NaN 0.0 0.000000 \n",
1608
- "1381 0 1 0.500000 1.0 0.750000 \n",
1609
- "1266 0 1 0.707107 0.5 0.500000 \n",
1610
- "1810 0 0 NaN 0.0 0.000000 \n",
1611
- "697 0 0 NaN 0.0 0.000000 \n",
1612
- "763 0 1 0.577350 1.0 0.666667 \n",
1613
  "\n",
1614
- "[10 rows x 218 columns]"
1615
  ]
1616
  },
1617
- "execution_count": 13,
1618
  "metadata": {},
1619
  "output_type": "execute_result"
1620
  }
@@ -1642,7 +1690,7 @@
1642
  },
1643
  {
1644
  "cell_type": "code",
1645
- "execution_count": 14,
1646
  "metadata": {},
1647
  "outputs": [
1648
  {
@@ -1758,7 +1806,7 @@
1758
  "4 2024 1 "
1759
  ]
1760
  },
1761
- "execution_count": 14,
1762
  "metadata": {},
1763
  "output_type": "execute_result"
1764
  }
@@ -1828,19 +1876,19 @@
1828
  " <th>TeamScore std reg</th>\n",
1829
  " <th>TeamScore median reg</th>\n",
1830
  " <th>TeamScore mean reg</th>\n",
1831
- " <th>OppScore min reg</th>\n",
1832
- " <th>OppScore max reg</th>\n",
1833
  " <th>...</th>\n",
1834
- " <th>FGMDiff min tourney</th>\n",
1835
- " <th>FGMDiff max tourney</th>\n",
1836
- " <th>FGMDiff std tourney</th>\n",
1837
- " <th>FGMDiff median tourney</th>\n",
1838
- " <th>FGMDiff mean tourney</th>\n",
1839
  " <th>Win min tourney</th>\n",
1840
  " <th>Win max tourney</th>\n",
1841
  " <th>Win std tourney</th>\n",
1842
  " <th>Win median tourney</th>\n",
1843
  " <th>Win mean tourney</th>\n",
 
 
 
 
 
1844
  " </tr>\n",
1845
  " </thead>\n",
1846
  " <tbody>\n",
@@ -1854,8 +1902,8 @@
1854
  " <td>10.808339</td>\n",
1855
  " <td>61.0</td>\n",
1856
  " <td>58.965517</td>\n",
1857
- " <td>36</td>\n",
1858
- " <td>85</td>\n",
1859
  " <td>...</td>\n",
1860
  " <td>NaN</td>\n",
1861
  " <td>NaN</td>\n",
@@ -1878,8 +1926,8 @@
1878
  " <td>12.283247</td>\n",
1879
  " <td>67.0</td>\n",
1880
  " <td>66.466667</td>\n",
1881
- " <td>44</td>\n",
1882
- " <td>97</td>\n",
1883
  " <td>...</td>\n",
1884
  " <td>NaN</td>\n",
1885
  " <td>NaN</td>\n",
@@ -1902,8 +1950,8 @@
1902
  " <td>10.019980</td>\n",
1903
  " <td>72.0</td>\n",
1904
  " <td>73.000000</td>\n",
1905
- " <td>61</td>\n",
1906
- " <td>103</td>\n",
1907
  " <td>...</td>\n",
1908
  " <td>NaN</td>\n",
1909
  " <td>NaN</td>\n",
@@ -1926,19 +1974,19 @@
1926
  " <td>12.911860</td>\n",
1927
  " <td>77.0</td>\n",
1928
  " <td>75.870968</td>\n",
1929
- " <td>47</td>\n",
1930
- " <td>81</td>\n",
1931
  " <td>...</td>\n",
1932
- " <td>2.0</td>\n",
1933
- " <td>2.0</td>\n",
1934
- " <td>NaN</td>\n",
1935
- " <td>2.0</td>\n",
1936
- " <td>2.0</td>\n",
1937
  " <td>0.0</td>\n",
1938
  " <td>0.0</td>\n",
1939
  " <td>NaN</td>\n",
1940
  " <td>0.0</td>\n",
1941
  " <td>0.0</td>\n",
 
 
 
 
 
1942
  " </tr>\n",
1943
  " <tr>\n",
1944
  " <th>3407</th>\n",
@@ -1950,14 +1998,14 @@
1950
  " <td>11.841315</td>\n",
1951
  " <td>75.5</td>\n",
1952
  " <td>75.906250</td>\n",
1953
- " <td>37</td>\n",
1954
- " <td>89</td>\n",
1955
  " <td>...</td>\n",
1956
  " <td>0.0</td>\n",
1957
- " <td>3.0</td>\n",
1958
- " <td>2.121320</td>\n",
1959
- " <td>1.5</td>\n",
1960
- " <td>1.5</td>\n",
1961
  " <td>0.0</td>\n",
1962
  " <td>1.0</td>\n",
1963
  " <td>0.707107</td>\n",
@@ -1974,8 +2022,8 @@
1974
  " <td>10.298567</td>\n",
1975
  " <td>67.0</td>\n",
1976
  " <td>65.062500</td>\n",
1977
- " <td>45</td>\n",
1978
- " <td>106</td>\n",
1979
  " <td>...</td>\n",
1980
  " <td>NaN</td>\n",
1981
  " <td>NaN</td>\n",
@@ -1998,8 +2046,8 @@
1998
  " <td>14.194618</td>\n",
1999
  " <td>76.0</td>\n",
2000
  " <td>76.777778</td>\n",
2001
- " <td>59</td>\n",
2002
- " <td>107</td>\n",
2003
  " <td>...</td>\n",
2004
  " <td>NaN</td>\n",
2005
  " <td>NaN</td>\n",
@@ -2022,8 +2070,8 @@
2022
  " <td>13.385137</td>\n",
2023
  " <td>53.0</td>\n",
2024
  " <td>55.476190</td>\n",
2025
- " <td>41</td>\n",
2026
- " <td>117</td>\n",
2027
  " <td>...</td>\n",
2028
  " <td>NaN</td>\n",
2029
  " <td>NaN</td>\n",
@@ -2046,8 +2094,8 @@
2046
  " <td>11.319009</td>\n",
2047
  " <td>62.5</td>\n",
2048
  " <td>63.593750</td>\n",
2049
- " <td>45</td>\n",
2050
- " <td>100</td>\n",
2051
  " <td>...</td>\n",
2052
  " <td>NaN</td>\n",
2053
  " <td>NaN</td>\n",
@@ -2070,14 +2118,14 @@
2070
  " <td>12.518374</td>\n",
2071
  " <td>65.0</td>\n",
2072
  " <td>65.750000</td>\n",
2073
- " <td>21</td>\n",
2074
- " <td>79</td>\n",
2075
  " <td>...</td>\n",
2076
  " <td>0.0</td>\n",
2077
- " <td>4.0</td>\n",
2078
- " <td>2.828427</td>\n",
2079
- " <td>2.0</td>\n",
2080
- " <td>2.0</td>\n",
2081
  " <td>0.0</td>\n",
2082
  " <td>1.0</td>\n",
2083
  " <td>0.707107</td>\n",
@@ -2086,7 +2134,7 @@
2086
  " </tr>\n",
2087
  " </tbody>\n",
2088
  "</table>\n",
2089
- "<p>10 rows × 433 columns</p>\n",
2090
  "</div>"
2091
  ],
2092
  "text/plain": [
@@ -2114,55 +2162,55 @@
2114
  "9567 11.319009 62.5 63.593750 \n",
2115
  "12617 12.518374 65.0 65.750000 \n",
2116
  "\n",
2117
- " OppScore min reg OppScore max reg ... FGMDiff min tourney \\\n",
2118
- "12348 36 85 ... NaN \n",
2119
- "6900 44 97 ... NaN \n",
2120
- "4406 61 103 ... NaN \n",
2121
- "4233 47 81 ... 2.0 \n",
2122
- "3407 37 89 ... 0.0 \n",
2123
- "5190 45 106 ... NaN \n",
2124
- "1892 59 107 ... NaN \n",
2125
- "10020 41 117 ... NaN \n",
2126
- "9567 45 100 ... NaN \n",
2127
- "12617 21 79 ... 0.0 \n",
2128
  "\n",
2129
- " FGMDiff max tourney FGMDiff std tourney FGMDiff median tourney \\\n",
2130
- "12348 NaN NaN NaN \n",
2131
- "6900 NaN NaN NaN \n",
2132
- "4406 NaN NaN NaN \n",
2133
- "4233 2.0 NaN 2.0 \n",
2134
- "3407 3.0 2.121320 1.5 \n",
2135
- "5190 NaN NaN NaN \n",
2136
- "1892 NaN NaN NaN \n",
2137
- "10020 NaN NaN NaN \n",
2138
- "9567 NaN NaN NaN \n",
2139
- "12617 4.0 2.828427 2.0 \n",
2140
  "\n",
2141
- " FGMDiff mean tourney Win min tourney Win max tourney \\\n",
2142
- "12348 NaN NaN NaN \n",
2143
- "6900 NaN NaN NaN \n",
2144
- "4406 NaN NaN NaN \n",
2145
- "4233 2.0 0.0 0.0 \n",
2146
- "3407 1.5 0.0 1.0 \n",
2147
- "5190 NaN NaN NaN \n",
2148
- "1892 NaN NaN NaN \n",
2149
- "10020 NaN NaN NaN \n",
2150
- "9567 NaN NaN NaN \n",
2151
- "12617 2.0 0.0 1.0 \n",
2152
  "\n",
2153
- " Win std tourney Win median tourney Win mean tourney \n",
2154
- "12348 NaN NaN NaN \n",
2155
- "6900 NaN NaN NaN \n",
2156
- "4406 NaN NaN NaN \n",
2157
- "4233 NaN 0.0 0.0 \n",
2158
- "3407 0.707107 0.5 0.5 \n",
2159
- "5190 NaN NaN NaN \n",
2160
- "1892 NaN NaN NaN \n",
2161
- "10020 NaN NaN NaN \n",
2162
- "9567 NaN NaN NaN \n",
2163
- "12617 0.707107 0.5 0.5 \n",
2164
  "\n",
2165
- "[10 rows x 433 columns]"
2166
  ]
2167
  },
2168
  "execution_count": 16,
@@ -2191,7 +2239,6 @@
2191
  "outputs": [],
2192
  "source": [
2193
  "# merge the team_conf_seeds_df with team attributes into the aggregated data\n",
2194
- "\n",
2195
  "team_agg_df2 = pd.merge(\n",
2196
  " left=team_agg_df,\n",
2197
  " right=team_conf_seeds_df[team_conf_seeds_df[\"Season\"] >= 2003],\n",
@@ -2214,9 +2261,9 @@
2214
  "text": [
2215
  "<class 'pandas.core.frame.DataFrame'>\n",
2216
  "Int64Index: 12857 entries, 0 to 12856\n",
2217
- "Columns: 439 entries, TeamID to ChalkSeed\n",
2218
- "dtypes: float64(347), int64(88), object(4)\n",
2219
- "memory usage: 43.2+ MB\n"
2220
  ]
2221
  }
2222
  ],
@@ -2235,8 +2282,8 @@
2235
  "text": [
2236
  "<class 'pandas.core.frame.DataFrame'>\n",
2237
  "Int64Index: 377608 entries, 0 to 377607\n",
2238
- "Columns: 486 entries, Season to ChalkSeed\n",
2239
- "dtypes: float64(347), int64(132), object(7)\n",
2240
  "memory usage: 1.4+ GB\n"
2241
  ]
2242
  }
@@ -2256,386 +2303,77 @@
2256
  },
2257
  {
2258
  "cell_type": "code",
2259
- "execution_count": 34,
2260
  "metadata": {},
2261
  "outputs": [
2262
  {
2263
  "data": {
2264
- "text/html": [
2265
- "<div>\n",
2266
- "<style scoped>\n",
2267
- " .dataframe tbody tr th:only-of-type {\n",
2268
- " vertical-align: middle;\n",
2269
- " }\n",
2270
- "\n",
2271
- " .dataframe tbody tr th {\n",
2272
- " vertical-align: top;\n",
2273
- " }\n",
2274
- "\n",
2275
- " .dataframe thead th {\n",
2276
- " text-align: right;\n",
2277
- " }\n",
2278
- "</style>\n",
2279
- "<table border=\"1\" class=\"dataframe\">\n",
2280
- " <thead>\n",
2281
- " <tr style=\"text-align: right;\">\n",
2282
- " <th></th>\n",
2283
- " <th>Season</th>\n",
2284
- " <th>DayNum</th>\n",
2285
- " <th>TeamID</th>\n",
2286
- " <th>TeamScore</th>\n",
2287
- " <th>OppScore</th>\n",
2288
- " <th>WLoc</th>\n",
2289
- " <th>NumOT</th>\n",
2290
- " <th>TeamFGM</th>\n",
2291
- " <th>TeamFGA</th>\n",
2292
- " <th>TeamFGM3</th>\n",
2293
- " <th>...</th>\n",
2294
- " <th>Win max tourney</th>\n",
2295
- " <th>Win std tourney</th>\n",
2296
- " <th>Win median tourney</th>\n",
2297
- " <th>Win mean tourney</th>\n",
2298
- " <th>Seed</th>\n",
2299
- " <th>ConfAbbrev</th>\n",
2300
- " <th>TeamName</th>\n",
2301
- " <th>FirstD1Season</th>\n",
2302
- " <th>LastD1Season</th>\n",
2303
- " <th>ChalkSeed</th>\n",
2304
- " </tr>\n",
2305
- " </thead>\n",
2306
- " <tbody>\n",
2307
- " <tr>\n",
2308
- " <th>0</th>\n",
2309
- " <td>2003</td>\n",
2310
- " <td>10</td>\n",
2311
- " <td>1104</td>\n",
2312
- " <td>68</td>\n",
2313
- " <td>62</td>\n",
2314
- " <td>N</td>\n",
2315
- " <td>0</td>\n",
2316
- " <td>27</td>\n",
2317
- " <td>58</td>\n",
2318
- " <td>3</td>\n",
2319
- " <td>...</td>\n",
2320
- " <td>0.0</td>\n",
2321
- " <td>NaN</td>\n",
2322
- " <td>0.0</td>\n",
2323
- " <td>0.000000</td>\n",
2324
- " <td>Y10</td>\n",
2325
- " <td>sec</td>\n",
2326
- " <td>Alabama</td>\n",
2327
- " <td>1985.0</td>\n",
2328
- " <td>2024.0</td>\n",
2329
- " <td>10.0</td>\n",
2330
- " </tr>\n",
2331
- " <tr>\n",
2332
- " <th>1</th>\n",
2333
- " <td>2003</td>\n",
2334
- " <td>10</td>\n",
2335
- " <td>1272</td>\n",
2336
- " <td>70</td>\n",
2337
- " <td>63</td>\n",
2338
- " <td>N</td>\n",
2339
- " <td>0</td>\n",
2340
- " <td>26</td>\n",
2341
- " <td>62</td>\n",
2342
- " <td>8</td>\n",
2343
- " <td>...</td>\n",
2344
- " <td>0.0</td>\n",
2345
- " <td>NaN</td>\n",
2346
- " <td>0.0</td>\n",
2347
- " <td>0.000000</td>\n",
2348
- " <td>Z07</td>\n",
2349
- " <td>cusa</td>\n",
2350
- " <td>Memphis</td>\n",
2351
- " <td>1985.0</td>\n",
2352
- " <td>2024.0</td>\n",
2353
- " <td>7.0</td>\n",
2354
- " </tr>\n",
2355
- " <tr>\n",
2356
- " <th>2</th>\n",
2357
- " <td>2003</td>\n",
2358
- " <td>11</td>\n",
2359
- " <td>1266</td>\n",
2360
- " <td>73</td>\n",
2361
- " <td>61</td>\n",
2362
- " <td>N</td>\n",
2363
- " <td>0</td>\n",
2364
- " <td>24</td>\n",
2365
- " <td>58</td>\n",
2366
- " <td>8</td>\n",
2367
- " <td>...</td>\n",
2368
- " <td>1.0</td>\n",
2369
- " <td>0.447214</td>\n",
2370
- " <td>1.0</td>\n",
2371
- " <td>0.800000</td>\n",
2372
- " <td>Y03</td>\n",
2373
- " <td>cusa</td>\n",
2374
- " <td>Marquette</td>\n",
2375
- " <td>1985.0</td>\n",
2376
- " <td>2024.0</td>\n",
2377
- " <td>3.0</td>\n",
2378
- " </tr>\n",
2379
- " <tr>\n",
2380
- " <th>3</th>\n",
2381
- " <td>2003</td>\n",
2382
- " <td>11</td>\n",
2383
- " <td>1296</td>\n",
2384
- " <td>56</td>\n",
2385
- " <td>50</td>\n",
2386
- " <td>N</td>\n",
2387
- " <td>0</td>\n",
2388
- " <td>18</td>\n",
2389
- " <td>38</td>\n",
2390
- " <td>3</td>\n",
2391
- " <td>...</td>\n",
2392
- " <td>NaN</td>\n",
2393
- " <td>NaN</td>\n",
2394
- " <td>NaN</td>\n",
2395
- " <td>NaN</td>\n",
2396
- " <td>NaN</td>\n",
2397
- " <td>NaN</td>\n",
2398
- " <td>NaN</td>\n",
2399
- " <td>NaN</td>\n",
2400
- " <td>NaN</td>\n",
2401
- " <td>NaN</td>\n",
2402
- " </tr>\n",
2403
- " <tr>\n",
2404
- " <th>4</th>\n",
2405
- " <td>2003</td>\n",
2406
- " <td>11</td>\n",
2407
- " <td>1400</td>\n",
2408
- " <td>77</td>\n",
2409
- " <td>71</td>\n",
2410
- " <td>N</td>\n",
2411
- " <td>0</td>\n",
2412
- " <td>30</td>\n",
2413
- " <td>61</td>\n",
2414
- " <td>6</td>\n",
2415
- " <td>...</td>\n",
2416
- " <td>1.0</td>\n",
2417
- " <td>0.447214</td>\n",
2418
- " <td>1.0</td>\n",
2419
- " <td>0.800000</td>\n",
2420
- " <td>X01</td>\n",
2421
- " <td>big_twelve</td>\n",
2422
- " <td>Texas</td>\n",
2423
- " <td>1985.0</td>\n",
2424
- " <td>2024.0</td>\n",
2425
- " <td>1.0</td>\n",
2426
- " </tr>\n",
2427
- " <tr>\n",
2428
- " <th>...</th>\n",
2429
- " <td>...</td>\n",
2430
- " <td>...</td>\n",
2431
- " <td>...</td>\n",
2432
- " <td>...</td>\n",
2433
- " <td>...</td>\n",
2434
- " <td>...</td>\n",
2435
- " <td>...</td>\n",
2436
- " <td>...</td>\n",
2437
- " <td>...</td>\n",
2438
- " <td>...</td>\n",
2439
- " <td>...</td>\n",
2440
- " <td>...</td>\n",
2441
- " <td>...</td>\n",
2442
- " <td>...</td>\n",
2443
- " <td>...</td>\n",
2444
- " <td>...</td>\n",
2445
- " <td>...</td>\n",
2446
- " <td>...</td>\n",
2447
- " <td>...</td>\n",
2448
- " <td>...</td>\n",
2449
- " <td>...</td>\n",
2450
- " </tr>\n",
2451
- " <tr>\n",
2452
- " <th>377603</th>\n",
2453
- " <td>2023</td>\n",
2454
- " <td>147</td>\n",
2455
- " <td>3268</td>\n",
2456
- " <td>75</td>\n",
2457
- " <td>86</td>\n",
2458
- " <td>H</td>\n",
2459
- " <td>0</td>\n",
2460
- " <td>29</td>\n",
2461
- " <td>58</td>\n",
2462
- " <td>7</td>\n",
2463
- " <td>...</td>\n",
2464
- " <td>1.0</td>\n",
2465
- " <td>0.500000</td>\n",
2466
- " <td>1.0</td>\n",
2467
- " <td>0.750000</td>\n",
2468
- " <td>NaN</td>\n",
2469
- " <td>NaN</td>\n",
2470
- " <td>NaN</td>\n",
2471
- " <td>NaN</td>\n",
2472
- " <td>NaN</td>\n",
2473
- " <td>NaN</td>\n",
2474
- " </tr>\n",
2475
- " <tr>\n",
2476
- " <th>377604</th>\n",
2477
- " <td>2023</td>\n",
2478
- " <td>147</td>\n",
2479
- " <td>3326</td>\n",
2480
- " <td>74</td>\n",
2481
- " <td>84</td>\n",
2482
- " <td>N</td>\n",
2483
- " <td>0</td>\n",
2484
- " <td>26</td>\n",
2485
- " <td>57</td>\n",
2486
- " <td>7</td>\n",
2487
- " <td>...</td>\n",
2488
- " <td>1.0</td>\n",
2489
- " <td>0.500000</td>\n",
2490
- " <td>1.0</td>\n",
2491
- " <td>0.750000</td>\n",
2492
- " <td>NaN</td>\n",
2493
- " <td>NaN</td>\n",
2494
- " <td>NaN</td>\n",
2495
- " <td>NaN</td>\n",
2496
- " <td>NaN</td>\n",
2497
- " <td>NaN</td>\n",
2498
- " </tr>\n",
2499
- " <tr>\n",
2500
- " <th>377605</th>\n",
2501
- " <td>2023</td>\n",
2502
- " <td>151</td>\n",
2503
- " <td>3376</td>\n",
2504
- " <td>73</td>\n",
2505
- " <td>77</td>\n",
2506
- " <td>N</td>\n",
2507
- " <td>0</td>\n",
2508
- " <td>30</td>\n",
2509
- " <td>77</td>\n",
2510
- " <td>4</td>\n",
2511
- " <td>...</td>\n",
2512
- " <td>1.0</td>\n",
2513
- " <td>0.447214</td>\n",
2514
- " <td>1.0</td>\n",
2515
- " <td>0.800000</td>\n",
2516
- " <td>NaN</td>\n",
2517
- " <td>NaN</td>\n",
2518
- " <td>NaN</td>\n",
2519
- " <td>NaN</td>\n",
2520
- " <td>NaN</td>\n",
2521
- " <td>NaN</td>\n",
2522
- " </tr>\n",
2523
- " <tr>\n",
2524
- " <th>377606</th>\n",
2525
- " <td>2023</td>\n",
2526
- " <td>151</td>\n",
2527
- " <td>3439</td>\n",
2528
- " <td>72</td>\n",
2529
- " <td>79</td>\n",
2530
- " <td>N</td>\n",
2531
- " <td>0</td>\n",
2532
- " <td>23</td>\n",
2533
- " <td>57</td>\n",
2534
- " <td>9</td>\n",
2535
- " <td>...</td>\n",
2536
- " <td>1.0</td>\n",
2537
- " <td>0.447214</td>\n",
2538
- " <td>1.0</td>\n",
2539
- " <td>0.800000</td>\n",
2540
- " <td>NaN</td>\n",
2541
- " <td>NaN</td>\n",
2542
- " <td>NaN</td>\n",
2543
- " <td>NaN</td>\n",
2544
- " <td>NaN</td>\n",
2545
- " <td>NaN</td>\n",
2546
- " </tr>\n",
2547
- " <tr>\n",
2548
- " <th>377607</th>\n",
2549
- " <td>2023</td>\n",
2550
- " <td>153</td>\n",
2551
- " <td>3234</td>\n",
2552
- " <td>85</td>\n",
2553
- " <td>102</td>\n",
2554
- " <td>N</td>\n",
2555
- " <td>0</td>\n",
2556
- " <td>28</td>\n",
2557
- " <td>56</td>\n",
2558
- " <td>14</td>\n",
2559
- " <td>...</td>\n",
2560
- " <td>1.0</td>\n",
2561
- " <td>0.408248</td>\n",
2562
- " <td>1.0</td>\n",
2563
- " <td>0.833333</td>\n",
2564
- " <td>NaN</td>\n",
2565
- " <td>NaN</td>\n",
2566
- " <td>NaN</td>\n",
2567
- " <td>NaN</td>\n",
2568
- " <td>NaN</td>\n",
2569
- " <td>NaN</td>\n",
2570
- " </tr>\n",
2571
- " </tbody>\n",
2572
- "</table>\n",
2573
- "<p>377608 rows × 486 columns</p>\n",
2574
- "</div>"
2575
- ],
2576
  "text/plain": [
2577
- " Season DayNum TeamID TeamScore OppScore WLoc NumOT TeamFGM \\\n",
2578
- "0 2003 10 1104 68 62 N 0 27 \n",
2579
- "1 2003 10 1272 70 63 N 0 26 \n",
2580
- "2 2003 11 1266 73 61 N 0 24 \n",
2581
- "3 2003 11 1296 56 50 N 0 18 \n",
2582
- "4 2003 11 1400 77 71 N 0 30 \n",
2583
- "... ... ... ... ... ... ... ... ... \n",
2584
- "377603 2023 147 3268 75 86 H 0 29 \n",
2585
- "377604 2023 147 3326 74 84 N 0 26 \n",
2586
- "377605 2023 151 3376 73 77 N 0 30 \n",
2587
- "377606 2023 151 3439 72 79 N 0 23 \n",
2588
- "377607 2023 153 3234 85 102 N 0 28 \n",
2589
- "\n",
2590
- " TeamFGA TeamFGM3 ... Win max tourney Win std tourney \\\n",
2591
- "0 58 3 ... 0.0 NaN \n",
2592
- "1 62 8 ... 0.0 NaN \n",
2593
- "2 58 8 ... 1.0 0.447214 \n",
2594
- "3 38 3 ... NaN NaN \n",
2595
- "4 61 6 ... 1.0 0.447214 \n",
2596
- "... ... ... ... ... ... \n",
2597
- "377603 58 7 ... 1.0 0.500000 \n",
2598
- "377604 57 7 ... 1.0 0.500000 \n",
2599
- "377605 77 4 ... 1.0 0.447214 \n",
2600
- "377606 57 9 ... 1.0 0.447214 \n",
2601
- "377607 56 14 ... 1.0 0.408248 \n",
2602
- "\n",
2603
- " Win median tourney Win mean tourney Seed ConfAbbrev TeamName \\\n",
2604
- "0 0.0 0.000000 Y10 sec Alabama \n",
2605
- "1 0.0 0.000000 Z07 cusa Memphis \n",
2606
- "2 1.0 0.800000 Y03 cusa Marquette \n",
2607
- "3 NaN NaN NaN NaN NaN \n",
2608
- "4 1.0 0.800000 X01 big_twelve Texas \n",
2609
- "... ... ... ... ... ... \n",
2610
- "377603 1.0 0.750000 NaN NaN NaN \n",
2611
- "377604 1.0 0.750000 NaN NaN NaN \n",
2612
- "377605 1.0 0.800000 NaN NaN NaN \n",
2613
- "377606 1.0 0.800000 NaN NaN NaN \n",
2614
- "377607 1.0 0.833333 NaN NaN NaN \n",
2615
- "\n",
2616
- " FirstD1Season LastD1Season ChalkSeed \n",
2617
- "0 1985.0 2024.0 10.0 \n",
2618
- "1 1985.0 2024.0 7.0 \n",
2619
- "2 1985.0 2024.0 3.0 \n",
2620
- "3 NaN NaN NaN \n",
2621
- "4 1985.0 2024.0 1.0 \n",
2622
- "... ... ... ... \n",
2623
- "377603 NaN NaN NaN \n",
2624
- "377604 NaN NaN NaN \n",
2625
- "377605 NaN NaN NaN \n",
2626
- "377606 NaN NaN NaN \n",
2627
- "377607 NaN NaN NaN \n",
2628
- "\n",
2629
- "[377608 rows x 486 columns]"
2630
  ]
2631
  },
2632
- "execution_count": 34,
2633
  "metadata": {},
2634
  "output_type": "execute_result"
2635
  }
2636
  ],
2637
  "source": [
2638
- "super_detailed_games_df[\"OppID\"]"
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
2639
  ]
2640
  },
2641
  {
@@ -2647,7 +2385,7 @@
2647
  },
2648
  {
2649
  "cell_type": "code",
2650
- "execution_count": 19,
2651
  "metadata": {},
2652
  "outputs": [],
2653
  "source": [
@@ -2659,7 +2397,7 @@
2659
  },
2660
  {
2661
  "cell_type": "code",
2662
- "execution_count": 20,
2663
  "metadata": {},
2664
  "outputs": [],
2665
  "source": [
 
475
  "text": [
476
  "<class 'pandas.core.frame.DataFrame'>\n",
477
  "RangeIndex: 373324 entries, 0 to 373323\n",
478
+ "Data columns (total 36 columns):\n",
479
  " # Column Non-Null Count Dtype \n",
480
  "--- ------ -------------- ----- \n",
481
  " 0 Season 373324 non-null int64 \n",
482
  " 1 DayNum 373324 non-null int64 \n",
483
  " 2 TeamID 373324 non-null int64 \n",
484
  " 3 TeamScore 373324 non-null int64 \n",
485
+ " 4 OppTeamID 373324 non-null int64 \n",
486
+ " 5 OppScore 373324 non-null int64 \n",
487
+ " 6 WLoc 373324 non-null object\n",
488
+ " 7 NumOT 373324 non-null int64 \n",
489
+ " 8 TeamFGM 373324 non-null int64 \n",
490
+ " 9 TeamFGA 373324 non-null int64 \n",
491
+ " 10 TeamFGM3 373324 non-null int64 \n",
492
+ " 11 WFGA3 373324 non-null int64 \n",
493
+ " 12 TeamFTM 373324 non-null int64 \n",
494
+ " 13 TeamFTA 373324 non-null int64 \n",
495
+ " 14 TeamOR 373324 non-null int64 \n",
496
+ " 15 TeamDR 373324 non-null int64 \n",
497
+ " 16 TeamAst 373324 non-null int64 \n",
498
+ " 17 TeamTO 373324 non-null int64 \n",
499
+ " 18 TeamStl 373324 non-null int64 \n",
500
+ " 19 TeamBlk 373324 non-null int64 \n",
501
+ " 20 TeamPF 373324 non-null int64 \n",
502
+ " 21 OppFGM 373324 non-null int64 \n",
503
+ " 22 OppFGA 373324 non-null int64 \n",
504
+ " 23 OppFGM3 373324 non-null int64 \n",
505
+ " 24 LFGA3 373324 non-null int64 \n",
506
+ " 25 OppFTM 373324 non-null int64 \n",
507
+ " 26 OppFTA 373324 non-null int64 \n",
508
+ " 27 OppOR 373324 non-null int64 \n",
509
+ " 28 OppDR 373324 non-null int64 \n",
510
+ " 29 OppAst 373324 non-null int64 \n",
511
+ " 30 OppTO 373324 non-null int64 \n",
512
+ " 31 OppStl 373324 non-null int64 \n",
513
+ " 32 OppBlk 373324 non-null int64 \n",
514
+ " 33 OppPF 373324 non-null int64 \n",
515
+ " 34 League 373324 non-null object\n",
516
+ " 35 GameResult 373324 non-null object\n",
517
+ "dtypes: int64(33), object(3)\n",
518
+ "memory usage: 102.5+ MB\n"
519
  ]
520
  }
521
  ],
 
523
  "\n",
524
  "detailed_reg_games_df = pd.concat([\n",
525
  " (\n",
526
+ " # detailed_reg_games_df[[col for col in detailed_reg_games_df.columns if col != \"LTeamID\"]]\n",
527
+ " detailed_reg_games_df[[col for col in detailed_reg_games_df.columns]]\n",
528
  " .assign(GameResult=\"W\")\n",
529
+ " .rename(columns=w_renamed_cols | {\"WTeamID\": \"TeamID\", \"LTeamID\": \"OppTeamID\"})\n",
530
  " ),\n",
531
  " (\n",
532
+ " # detailed_reg_games_df[[col for col in detailed_reg_games_df.columns if col != \"WTeamID\"]]\n",
533
+ " detailed_reg_games_df[[col for col in detailed_reg_games_df.columns]]\n",
534
  " .assign(GameResult=\"L\")\n",
535
+ " .rename(columns=l_renamed_cols | {\"LTeamID\": \"TeamID\", \"WTeamID\": \"OppTeamID\"})\n",
536
  " )\n",
537
  "\n",
538
  "]).reset_index(drop=True)\n",
 
551
  "text": [
552
  "<class 'pandas.core.frame.DataFrame'>\n",
553
  "RangeIndex: 4284 entries, 0 to 4283\n",
554
+ "Data columns (total 36 columns):\n",
555
  " # Column Non-Null Count Dtype \n",
556
  "--- ------ -------------- ----- \n",
557
  " 0 Season 4284 non-null int64 \n",
558
  " 1 DayNum 4284 non-null int64 \n",
559
  " 2 TeamID 4284 non-null int64 \n",
560
  " 3 TeamScore 4284 non-null int64 \n",
561
+ " 4 OppTeamID 4284 non-null int64 \n",
562
+ " 5 OppScore 4284 non-null int64 \n",
563
+ " 6 WLoc 4284 non-null object\n",
564
+ " 7 NumOT 4284 non-null int64 \n",
565
+ " 8 TeamFGM 4284 non-null int64 \n",
566
+ " 9 TeamFGA 4284 non-null int64 \n",
567
+ " 10 TeamFGM3 4284 non-null int64 \n",
568
+ " 11 WFGA3 4284 non-null int64 \n",
569
+ " 12 TeamFTM 4284 non-null int64 \n",
570
+ " 13 TeamFTA 4284 non-null int64 \n",
571
+ " 14 TeamOR 4284 non-null int64 \n",
572
+ " 15 TeamDR 4284 non-null int64 \n",
573
+ " 16 TeamAst 4284 non-null int64 \n",
574
+ " 17 TeamTO 4284 non-null int64 \n",
575
+ " 18 TeamStl 4284 non-null int64 \n",
576
+ " 19 TeamBlk 4284 non-null int64 \n",
577
+ " 20 TeamPF 4284 non-null int64 \n",
578
+ " 21 OppFGM 4284 non-null int64 \n",
579
+ " 22 OppFGA 4284 non-null int64 \n",
580
+ " 23 OppFGM3 4284 non-null int64 \n",
581
+ " 24 LFGA3 4284 non-null int64 \n",
582
+ " 25 OppFTM 4284 non-null int64 \n",
583
+ " 26 OppFTA 4284 non-null int64 \n",
584
+ " 27 OppOR 4284 non-null int64 \n",
585
+ " 28 OppDR 4284 non-null int64 \n",
586
+ " 29 OppAst 4284 non-null int64 \n",
587
+ " 30 OppTO 4284 non-null int64 \n",
588
+ " 31 OppStl 4284 non-null int64 \n",
589
+ " 32 OppBlk 4284 non-null int64 \n",
590
+ " 33 OppPF 4284 non-null int64 \n",
591
+ " 34 League 4284 non-null object\n",
592
+ " 35 GameResult 4284 non-null object\n",
593
+ "dtypes: int64(33), object(3)\n",
594
+ "memory usage: 1.2+ MB\n"
595
  ]
596
  }
597
  ],
 
599
  "# do the same thing for the tournament games\n",
600
  "detailed_tourney_games_df = pd.concat([\n",
601
  " (\n",
602
+ " # detailed_tourney_games_df[[col for col in detailed_tourney_games_df.columns if col != \"LTeamID\"]]\n",
603
+ " detailed_tourney_games_df[[col for col in detailed_tourney_games_df.columns]]\n",
604
  " .assign(GameResult=\"W\")\n",
605
+ " .rename(columns=w_renamed_cols | {\"WTeamID\": \"TeamID\", \"LTeamID\": \"OppTeamID\"})\n",
606
  " ),\n",
607
  " (\n",
608
+ " # detailed_tourney_games_df[[col for col in detailed_tourney_games_df.columns if col != \"WTeamID\"]]\n",
609
+ " detailed_tourney_games_df[[col for col in detailed_tourney_games_df.columns]]\n",
610
  " .assign(GameResult=\"L\")\n",
611
+ " .rename(columns=l_renamed_cols | {\"LTeamID\": \"TeamID\", \"WTeamID\": \"OppTeamID\"})\n",
612
  " )\n",
613
  "]).reset_index(drop=True)\n",
614
  "\n",
 
664
  " <th>DayNum</th>\n",
665
  " <th>TeamID</th>\n",
666
  " <th>TeamScore</th>\n",
667
+ " <th>OppTeamID</th>\n",
668
  " <th>OppScore</th>\n",
669
  " <th>WLoc</th>\n",
670
  " <th>NumOT</th>\n",
671
  " <th>TeamFGM</th>\n",
672
  " <th>TeamFGA</th>\n",
 
673
  " <th>...</th>\n",
674
+ " <th>AstDiff</th>\n",
675
+ " <th>DRDiff</th>\n",
676
+ " <th>ScoreDiff</th>\n",
677
  " <th>TODiff</th>\n",
678
  " <th>FTADiff</th>\n",
 
 
 
679
  " <th>FGADiff</th>\n",
680
+ " <th>FTMDiff</th>\n",
681
  " <th>ORDiff</th>\n",
682
+ " <th>PFDiff</th>\n",
683
+ " <th>FGM3Diff</th>\n",
 
684
  " </tr>\n",
685
  " </thead>\n",
686
  " <tbody>\n",
 
690
  " <td>74</td>\n",
691
  " <td>3158</td>\n",
692
  " <td>56</td>\n",
693
+ " <td>3189</td>\n",
694
  " <td>84</td>\n",
695
  " <td>A</td>\n",
696
  " <td>0</td>\n",
697
  " <td>21</td>\n",
698
  " <td>55</td>\n",
 
699
  " <td>...</td>\n",
700
+ " <td>-1</td>\n",
701
+ " <td>-4</td>\n",
702
+ " <td>-28</td>\n",
703
  " <td>7</td>\n",
704
  " <td>-11</td>\n",
 
 
 
705
  " <td>-12</td>\n",
706
  " <td>-11</td>\n",
707
+ " <td>-11</td>\n",
708
+ " <td>9</td>\n",
709
+ " <td>-3</td>\n",
710
  " </tr>\n",
711
  " <tr>\n",
712
  " <th>100732</th>\n",
 
714
  " <td>103</td>\n",
715
  " <td>1439</td>\n",
716
  " <td>71</td>\n",
717
+ " <td>1393</td>\n",
718
  " <td>59</td>\n",
719
  " <td>H</td>\n",
720
  " <td>0</td>\n",
721
  " <td>23</td>\n",
722
  " <td>60</td>\n",
 
723
  " <td>...</td>\n",
724
+ " <td>11</td>\n",
725
+ " <td>1</td>\n",
726
+ " <td>12</td>\n",
727
  " <td>-6</td>\n",
728
  " <td>17</td>\n",
 
 
 
729
  " <td>-4</td>\n",
730
+ " <td>16</td>\n",
731
  " <td>-1</td>\n",
732
+ " <td>-9</td>\n",
733
+ " <td>-2</td>\n",
 
734
  " </tr>\n",
735
  " <tr>\n",
736
  " <th>83150</th>\n",
 
738
  " <td>26</td>\n",
739
  " <td>1180</td>\n",
740
  " <td>82</td>\n",
741
+ " <td>1352</td>\n",
742
  " <td>69</td>\n",
743
  " <td>H</td>\n",
744
  " <td>0</td>\n",
745
  " <td>27</td>\n",
746
  " <td>58</td>\n",
 
747
  " <td>...</td>\n",
748
  " <td>1</td>\n",
749
+ " <td>13</td>\n",
750
+ " <td>13</td>\n",
751
  " <td>1</td>\n",
752
+ " <td>10</td>\n",
753
  " <td>-6</td>\n",
754
+ " <td>14</td>\n",
755
  " <td>4</td>\n",
756
+ " <td>-5</td>\n",
757
+ " <td>1</td>\n",
 
758
  " </tr>\n",
759
  " <tr>\n",
760
  " <th>345009</th>\n",
 
762
  " <td>4</td>\n",
763
  " <td>3435</td>\n",
764
  " <td>58</td>\n",
765
+ " <td>3292</td>\n",
766
  " <td>65</td>\n",
767
  " <td>H</td>\n",
768
  " <td>0</td>\n",
769
  " <td>19</td>\n",
770
  " <td>55</td>\n",
 
771
  " <td>...</td>\n",
772
+ " <td>4</td>\n",
773
+ " <td>-7</td>\n",
774
+ " <td>-7</td>\n",
775
  " <td>-5</td>\n",
776
  " <td>-11</td>\n",
 
 
 
777
  " <td>13</td>\n",
778
+ " <td>-8</td>\n",
779
  " <td>2</td>\n",
780
+ " <td>7</td>\n",
781
+ " <td>-1</td>\n",
 
782
  " </tr>\n",
783
  " <tr>\n",
784
  " <th>318707</th>\n",
 
786
  " <td>128</td>\n",
787
  " <td>3322</td>\n",
788
  " <td>45</td>\n",
789
+ " <td>3270</td>\n",
790
  " <td>63</td>\n",
791
  " <td>N</td>\n",
792
  " <td>0</td>\n",
793
  " <td>20</td>\n",
794
  " <td>51</td>\n",
 
795
  " <td>...</td>\n",
796
+ " <td>2</td>\n",
797
+ " <td>-3</td>\n",
798
+ " <td>-18</td>\n",
799
  " <td>4</td>\n",
800
  " <td>-11</td>\n",
801
+ " <td>3</td>\n",
802
  " <td>-9</td>\n",
 
803
  " <td>2</td>\n",
 
804
  " <td>2</td>\n",
 
 
805
  " <td>-3</td>\n",
806
  " </tr>\n",
807
  " </tbody>\n",
808
  "</table>\n",
809
+ "<p>5 rows × 49 columns</p>\n",
810
  "</div>"
811
  ],
812
  "text/plain": [
813
+ " Season DayNum TeamID TeamScore OppTeamID OppScore WLoc NumOT \\\n",
814
+ "337067 2017 74 3158 56 3189 84 A 0 \n",
815
+ "100732 2022 103 1439 71 1393 59 H 0 \n",
816
+ "83150 2019 26 1180 82 1352 69 H 0 \n",
817
+ "345009 2019 4 3435 58 3292 65 H 0 \n",
818
+ "318707 2013 128 3322 45 3270 63 N 0 \n",
819
  "\n",
820
+ " TeamFGM TeamFGA ... AstDiff DRDiff ScoreDiff TODiff FTADiff \\\n",
821
+ "337067 21 55 ... -1 -4 -28 7 -11 \n",
822
+ "100732 23 60 ... 11 1 12 -6 17 \n",
823
+ "83150 27 58 ... 1 13 13 1 10 \n",
824
+ "345009 19 55 ... 4 -7 -7 -5 -11 \n",
825
+ "318707 20 51 ... 2 -3 -18 4 -11 \n",
826
  "\n",
827
+ " FGADiff FTMDiff ORDiff PFDiff FGM3Diff \n",
828
+ "337067 -12 -11 -11 9 -3 \n",
829
+ "100732 -4 16 -1 -9 -2 \n",
830
+ "83150 -6 14 4 -5 1 \n",
831
+ "345009 13 -8 2 7 -1 \n",
832
+ "318707 3 -9 2 2 -3 \n",
833
  "\n",
834
+ "[5 rows x 49 columns]"
835
  ]
836
  },
837
  "execution_count": 8,
 
854
  " axis=1,\n",
855
  ")\n",
856
  "\n",
857
+ "detailed_reg_games_df[\"OppWin\"] = detailed_reg_games_df.apply(\n",
858
+ " lambda row: 1 if row[\"GameResult\"] == \"L\" else 0,\n",
859
+ " axis=1,\n",
860
+ ")\n",
861
+ "\n",
862
  "detailed_tourney_games_df[\"Win\"] = detailed_tourney_games_df.apply(\n",
863
  " lambda row: 0 if row[\"GameResult\"] == \"L\" else 1,\n",
864
  " axis=1,\n",
865
+ ")\n",
866
+ "\n",
867
+ "detailed_tourney_games_df[\"OppWin\"] = detailed_tourney_games_df.apply(\n",
868
+ " lambda row: 1 if row[\"GameResult\"] == \"L\" else 0,\n",
869
+ " axis=1,\n",
870
  ")"
871
  ]
872
  },
 
884
  "])"
885
  ]
886
  },
887
+ {
888
+ "cell_type": "code",
889
+ "execution_count": 11,
890
+ "metadata": {},
891
+ "outputs": [
892
+ {
893
+ "data": {
894
+ "text/plain": [
895
+ "0 1328\n",
896
+ "1 1393\n",
897
+ "2 1437\n",
898
+ "3 1457\n",
899
+ "4 1208\n",
900
+ " ... \n",
901
+ "4279 3376\n",
902
+ "4280 3439\n",
903
+ "4281 3234\n",
904
+ "4282 3261\n",
905
+ "4283 3261\n",
906
+ "Name: OppTeamID, Length: 377608, dtype: int64"
907
+ ]
908
+ },
909
+ "execution_count": 11,
910
+ "metadata": {},
911
+ "output_type": "execute_result"
912
+ }
913
+ ],
914
+ "source": [
915
+ "all_detailed_games_df[\"OppTeamID\"]"
916
+ ]
917
+ },
918
  {
919
  "cell_type": "markdown",
920
  "metadata": {},
 
924
  },
925
  {
926
  "cell_type": "code",
927
+ "execution_count": 12,
928
  "metadata": {},
929
  "outputs": [],
930
  "source": [
 
937
  " \"TeamLoc\",\n",
938
  " \"Season\",\n",
939
  " \"DayNum\",\n",
940
+ " # \"OppTeamID\",\n",
941
  "}\n",
942
  "\n",
943
  "agg_funcs = [\n",
 
951
  },
952
  {
953
  "cell_type": "code",
954
+ "execution_count": 13,
955
  "metadata": {
956
  "tags": []
957
  },
 
985
  " <th>TeamScore std</th>\n",
986
  " <th>TeamScore median</th>\n",
987
  " <th>TeamScore mean</th>\n",
988
+ " <th>OppTeamID min</th>\n",
989
+ " <th>OppTeamID max</th>\n",
990
  " <th>...</th>\n",
 
 
 
 
 
991
  " <th>Win min</th>\n",
992
  " <th>Win max</th>\n",
993
  " <th>Win std</th>\n",
994
  " <th>Win median</th>\n",
995
  " <th>Win mean</th>\n",
996
+ " <th>OppWin min</th>\n",
997
+ " <th>OppWin max</th>\n",
998
+ " <th>OppWin std</th>\n",
999
+ " <th>OppWin median</th>\n",
1000
+ " <th>OppWin mean</th>\n",
1001
  " </tr>\n",
1002
  " </thead>\n",
1003
  " <tbody>\n",
 
1011
  " <td>10.808339</td>\n",
1012
  " <td>61.0</td>\n",
1013
  " <td>58.965517</td>\n",
1014
+ " <td>3129</td>\n",
1015
+ " <td>3451</td>\n",
1016
  " <td>...</td>\n",
 
 
 
 
 
1017
  " <td>0</td>\n",
1018
  " <td>1</td>\n",
1019
  " <td>0.508548</td>\n",
1020
  " <td>0.0</td>\n",
1021
  " <td>0.482759</td>\n",
1022
+ " <td>0</td>\n",
1023
+ " <td>1</td>\n",
1024
+ " <td>0.508548</td>\n",
1025
+ " <td>1.0</td>\n",
1026
+ " <td>0.517241</td>\n",
1027
  " </tr>\n",
1028
  " <tr>\n",
1029
  " <th>6900</th>\n",
 
1035
  " <td>12.283247</td>\n",
1036
  " <td>67.0</td>\n",
1037
  " <td>66.466667</td>\n",
1038
+ " <td>1111</td>\n",
1039
+ " <td>1450</td>\n",
1040
  " <td>...</td>\n",
 
 
 
 
 
1041
  " <td>0</td>\n",
1042
  " <td>1</td>\n",
1043
  " <td>0.479463</td>\n",
1044
  " <td>0.0</td>\n",
1045
  " <td>0.333333</td>\n",
1046
+ " <td>0</td>\n",
1047
+ " <td>1</td>\n",
1048
+ " <td>0.479463</td>\n",
1049
+ " <td>1.0</td>\n",
1050
+ " <td>0.666667</td>\n",
1051
  " </tr>\n",
1052
  " <tr>\n",
1053
  " <th>4406</th>\n",
 
1059
  " <td>10.019980</td>\n",
1060
  " <td>72.0</td>\n",
1061
  " <td>73.000000</td>\n",
1062
+ " <td>1132</td>\n",
1063
+ " <td>1458</td>\n",
1064
  " <td>...</td>\n",
 
 
 
 
 
1065
  " <td>0</td>\n",
1066
  " <td>1</td>\n",
1067
  " <td>0.508001</td>\n",
1068
  " <td>0.0</td>\n",
1069
  " <td>0.483871</td>\n",
1070
+ " <td>0</td>\n",
1071
+ " <td>1</td>\n",
1072
+ " <td>0.508001</td>\n",
1073
+ " <td>1.0</td>\n",
1074
+ " <td>0.516129</td>\n",
1075
  " </tr>\n",
1076
  " <tr>\n",
1077
  " <th>4233</th>\n",
 
1083
  " <td>12.911860</td>\n",
1084
  " <td>77.0</td>\n",
1085
  " <td>75.870968</td>\n",
1086
+ " <td>1102</td>\n",
1087
+ " <td>1461</td>\n",
1088
  " <td>...</td>\n",
 
 
 
 
 
1089
  " <td>0</td>\n",
1090
  " <td>1</td>\n",
1091
  " <td>0.401610</td>\n",
1092
  " <td>1.0</td>\n",
1093
  " <td>0.806452</td>\n",
1094
+ " <td>0</td>\n",
1095
+ " <td>1</td>\n",
1096
+ " <td>0.401610</td>\n",
1097
+ " <td>0.0</td>\n",
1098
+ " <td>0.193548</td>\n",
1099
  " </tr>\n",
1100
  " <tr>\n",
1101
  " <th>3407</th>\n",
 
1107
  " <td>11.841315</td>\n",
1108
  " <td>75.5</td>\n",
1109
  " <td>75.906250</td>\n",
1110
+ " <td>1153</td>\n",
1111
+ " <td>1458</td>\n",
1112
  " <td>...</td>\n",
 
 
 
 
 
1113
  " <td>0</td>\n",
1114
  " <td>1</td>\n",
1115
  " <td>0.456803</td>\n",
1116
  " <td>1.0</td>\n",
1117
  " <td>0.718750</td>\n",
1118
+ " <td>0</td>\n",
1119
+ " <td>1</td>\n",
1120
+ " <td>0.456803</td>\n",
1121
+ " <td>0.0</td>\n",
1122
+ " <td>0.281250</td>\n",
1123
  " </tr>\n",
1124
  " <tr>\n",
1125
  " <th>5190</th>\n",
 
1131
  " <td>10.298567</td>\n",
1132
  " <td>67.0</td>\n",
1133
  " <td>65.062500</td>\n",
1134
+ " <td>1102</td>\n",
1135
+ " <td>1464</td>\n",
1136
  " <td>...</td>\n",
 
 
 
 
 
1137
  " <td>0</td>\n",
1138
  " <td>1</td>\n",
1139
  " <td>0.470929</td>\n",
1140
  " <td>0.0</td>\n",
1141
  " <td>0.312500</td>\n",
1142
+ " <td>0</td>\n",
1143
+ " <td>1</td>\n",
1144
+ " <td>0.470929</td>\n",
1145
+ " <td>1.0</td>\n",
1146
+ " <td>0.687500</td>\n",
1147
  " </tr>\n",
1148
  " <tr>\n",
1149
  " <th>1892</th>\n",
 
1155
  " <td>14.194618</td>\n",
1156
  " <td>76.0</td>\n",
1157
  " <td>76.777778</td>\n",
1158
+ " <td>1125</td>\n",
1159
+ " <td>1424</td>\n",
1160
  " <td>...</td>\n",
 
 
 
 
 
1161
  " <td>0</td>\n",
1162
  " <td>1</td>\n",
1163
  " <td>0.492103</td>\n",
1164
  " <td>0.0</td>\n",
1165
  " <td>0.370370</td>\n",
1166
+ " <td>0</td>\n",
1167
+ " <td>1</td>\n",
1168
+ " <td>0.492103</td>\n",
1169
+ " <td>1.0</td>\n",
1170
+ " <td>0.629630</td>\n",
1171
  " </tr>\n",
1172
  " <tr>\n",
1173
  " <th>10020</th>\n",
 
1179
  " <td>13.385137</td>\n",
1180
  " <td>53.0</td>\n",
1181
  " <td>55.476190</td>\n",
1182
+ " <td>3124</td>\n",
1183
+ " <td>3418</td>\n",
1184
  " <td>...</td>\n",
 
 
 
 
 
1185
  " <td>0</td>\n",
1186
  " <td>1</td>\n",
1187
  " <td>0.462910</td>\n",
1188
  " <td>0.0</td>\n",
1189
  " <td>0.285714</td>\n",
1190
+ " <td>0</td>\n",
1191
+ " <td>1</td>\n",
1192
+ " <td>0.462910</td>\n",
1193
+ " <td>1.0</td>\n",
1194
+ " <td>0.714286</td>\n",
1195
  " </tr>\n",
1196
  " <tr>\n",
1197
  " <th>9567</th>\n",
 
1203
  " <td>11.319009</td>\n",
1204
  " <td>62.5</td>\n",
1205
  " <td>63.593750</td>\n",
1206
+ " <td>3120</td>\n",
1207
+ " <td>3404</td>\n",
1208
  " <td>...</td>\n",
 
 
 
 
 
1209
  " <td>0</td>\n",
1210
  " <td>1</td>\n",
1211
  " <td>0.504016</td>\n",
1212
  " <td>0.0</td>\n",
1213
  " <td>0.437500</td>\n",
1214
+ " <td>0</td>\n",
1215
+ " <td>1</td>\n",
1216
+ " <td>0.504016</td>\n",
1217
+ " <td>1.0</td>\n",
1218
+ " <td>0.562500</td>\n",
1219
  " </tr>\n",
1220
  " <tr>\n",
1221
  " <th>12617</th>\n",
 
1227
  " <td>12.518374</td>\n",
1228
  " <td>65.0</td>\n",
1229
  " <td>65.750000</td>\n",
1230
+ " <td>3148</td>\n",
1231
+ " <td>3438</td>\n",
1232
  " <td>...</td>\n",
 
 
 
 
 
1233
  " <td>0</td>\n",
1234
  " <td>1</td>\n",
1235
  " <td>0.456803</td>\n",
1236
  " <td>1.0</td>\n",
1237
  " <td>0.718750</td>\n",
1238
+ " <td>0</td>\n",
1239
+ " <td>1</td>\n",
1240
+ " <td>0.456803</td>\n",
1241
+ " <td>0.0</td>\n",
1242
+ " <td>0.281250</td>\n",
1243
  " </tr>\n",
1244
  " </tbody>\n",
1245
  "</table>\n",
1246
+ "<p>10 rows × 228 columns</p>\n",
1247
  "</div>"
1248
  ],
1249
  "text/plain": [
 
1259
  "9567 3240 2014 W 43 84 11.319009 \n",
1260
  "12617 3452 2011 W 39 90 12.518374 \n",
1261
  "\n",
1262
+ " TeamScore median TeamScore mean OppTeamID min OppTeamID max ... \\\n",
1263
+ "12348 61.0 58.965517 3129 3451 ... \n",
1264
+ "6900 67.0 66.466667 1111 1450 ... \n",
1265
+ "4406 72.0 73.000000 1132 1458 ... \n",
1266
+ "4233 77.0 75.870968 1102 1461 ... \n",
1267
+ "3407 75.5 75.906250 1153 1458 ... \n",
1268
+ "5190 67.0 65.062500 1102 1464 ... \n",
1269
+ "1892 76.0 76.777778 1125 1424 ... \n",
1270
+ "10020 53.0 55.476190 3124 3418 ... \n",
1271
+ "9567 62.5 63.593750 3120 3404 ... \n",
1272
+ "12617 65.0 65.750000 3148 3438 ... \n",
1273
  "\n",
1274
+ " Win min Win max Win std Win median Win mean OppWin min \\\n",
1275
+ "12348 0 1 0.508548 0.0 0.482759 0 \n",
1276
+ "6900 0 1 0.479463 0.0 0.333333 0 \n",
1277
+ "4406 0 1 0.508001 0.0 0.483871 0 \n",
1278
+ "4233 0 1 0.401610 1.0 0.806452 0 \n",
1279
+ "3407 0 1 0.456803 1.0 0.718750 0 \n",
1280
+ "5190 0 1 0.470929 0.0 0.312500 0 \n",
1281
+ "1892 0 1 0.492103 0.0 0.370370 0 \n",
1282
+ "10020 0 1 0.462910 0.0 0.285714 0 \n",
1283
+ "9567 0 1 0.504016 0.0 0.437500 0 \n",
1284
+ "12617 0 1 0.456803 1.0 0.718750 0 \n",
1285
  "\n",
1286
+ " OppWin max OppWin std OppWin median OppWin mean \n",
1287
+ "12348 1 0.508548 1.0 0.517241 \n",
1288
+ "6900 1 0.479463 1.0 0.666667 \n",
1289
+ "4406 1 0.508001 1.0 0.516129 \n",
1290
+ "4233 1 0.401610 0.0 0.193548 \n",
1291
+ "3407 1 0.456803 0.0 0.281250 \n",
1292
+ "5190 1 0.470929 1.0 0.687500 \n",
1293
+ "1892 1 0.492103 1.0 0.629630 \n",
1294
+ "10020 1 0.462910 1.0 0.714286 \n",
1295
+ "9567 1 0.504016 1.0 0.562500 \n",
1296
+ "12617 1 0.456803 0.0 0.281250 \n",
1297
  "\n",
1298
+ "[10 rows x 228 columns]"
1299
  ]
1300
  },
1301
+ "execution_count": 13,
1302
  "metadata": {},
1303
  "output_type": "execute_result"
1304
  }
 
1317
  },
1318
  {
1319
  "cell_type": "code",
1320
+ "execution_count": 14,
1321
  "metadata": {},
1322
  "outputs": [
1323
  {
 
1349
  " <th>TeamScore std</th>\n",
1350
  " <th>TeamScore median</th>\n",
1351
  " <th>TeamScore mean</th>\n",
1352
+ " <th>OppTeamID min</th>\n",
1353
+ " <th>OppTeamID max</th>\n",
1354
  " <th>...</th>\n",
 
 
 
 
 
1355
  " <th>Win min</th>\n",
1356
  " <th>Win max</th>\n",
1357
  " <th>Win std</th>\n",
1358
  " <th>Win median</th>\n",
1359
  " <th>Win mean</th>\n",
1360
+ " <th>OppWin min</th>\n",
1361
+ " <th>OppWin max</th>\n",
1362
+ " <th>OppWin std</th>\n",
1363
+ " <th>OppWin median</th>\n",
1364
+ " <th>OppWin mean</th>\n",
1365
  " </tr>\n",
1366
  " </thead>\n",
1367
  " <tbody>\n",
 
1375
  " <td>10.408330</td>\n",
1376
  " <td>77.0</td>\n",
1377
  " <td>73.666667</td>\n",
1378
+ " <td>1165</td>\n",
1379
+ " <td>1400</td>\n",
1380
  " <td>...</td>\n",
 
 
 
 
 
1381
  " <td>0</td>\n",
1382
  " <td>1</td>\n",
1383
  " <td>0.577350</td>\n",
1384
  " <td>1.0</td>\n",
1385
  " <td>0.666667</td>\n",
1386
+ " <td>0</td>\n",
1387
+ " <td>1</td>\n",
1388
+ " <td>0.577350</td>\n",
1389
+ " <td>0.0</td>\n",
1390
+ " <td>0.333333</td>\n",
1391
  " </tr>\n",
1392
  " <tr>\n",
1393
  " <th>1601</th>\n",
 
1399
  " <td>NaN</td>\n",
1400
  " <td>63.0</td>\n",
1401
  " <td>63.000000</td>\n",
1402
+ " <td>3246</td>\n",
1403
+ " <td>3246</td>\n",
1404
  " <td>...</td>\n",
 
 
 
 
 
1405
  " <td>0</td>\n",
1406
  " <td>0</td>\n",
1407
  " <td>NaN</td>\n",
1408
  " <td>0.0</td>\n",
1409
  " <td>0.000000</td>\n",
1410
+ " <td>1</td>\n",
1411
+ " <td>1</td>\n",
1412
+ " <td>NaN</td>\n",
1413
+ " <td>1.0</td>\n",
1414
+ " <td>1.000000</td>\n",
1415
  " </tr>\n",
1416
  " <tr>\n",
1417
  " <th>1805</th>\n",
 
1423
  " <td>NaN</td>\n",
1424
  " <td>63.0</td>\n",
1425
  " <td>63.000000</td>\n",
1426
+ " <td>3343</td>\n",
1427
+ " <td>3343</td>\n",
1428
  " <td>...</td>\n",
 
 
 
 
 
1429
  " <td>0</td>\n",
1430
  " <td>0</td>\n",
1431
  " <td>NaN</td>\n",
1432
  " <td>0.0</td>\n",
1433
  " <td>0.000000</td>\n",
1434
+ " <td>1</td>\n",
1435
+ " <td>1</td>\n",
1436
+ " <td>NaN</td>\n",
1437
+ " <td>1.0</td>\n",
1438
+ " <td>1.000000</td>\n",
1439
  " </tr>\n",
1440
  " <tr>\n",
1441
  " <th>952</th>\n",
 
1447
  " <td>1.414214</td>\n",
1448
  " <td>73.0</td>\n",
1449
  " <td>73.000000</td>\n",
1450
+ " <td>1257</td>\n",
1451
+ " <td>1326</td>\n",
1452
  " <td>...</td>\n",
 
1453
  " <td>0</td>\n",
1454
+ " <td>1</td>\n",
1455
+ " <td>0.707107</td>\n",
1456
+ " <td>0.5</td>\n",
1457
+ " <td>0.500000</td>\n",
1458
  " <td>0</td>\n",
1459
  " <td>1</td>\n",
1460
  " <td>0.707107</td>\n",
 
1471
  " <td>NaN</td>\n",
1472
  " <td>65.0</td>\n",
1473
  " <td>65.000000</td>\n",
1474
+ " <td>1301</td>\n",
1475
+ " <td>1301</td>\n",
1476
  " <td>...</td>\n",
 
 
 
 
 
1477
  " <td>0</td>\n",
1478
  " <td>0</td>\n",
1479
  " <td>NaN</td>\n",
1480
  " <td>0.0</td>\n",
1481
  " <td>0.000000</td>\n",
1482
+ " <td>1</td>\n",
1483
+ " <td>1</td>\n",
1484
+ " <td>NaN</td>\n",
1485
+ " <td>1.0</td>\n",
1486
+ " <td>1.000000</td>\n",
1487
  " </tr>\n",
1488
  " <tr>\n",
1489
  " <th>1381</th>\n",
 
1495
  " <td>9.912114</td>\n",
1496
  " <td>81.0</td>\n",
1497
  " <td>80.250000</td>\n",
1498
+ " <td>3143</td>\n",
1499
+ " <td>3443</td>\n",
1500
  " <td>...</td>\n",
 
 
 
 
 
1501
  " <td>0</td>\n",
1502
  " <td>1</td>\n",
1503
  " <td>0.500000</td>\n",
1504
  " <td>1.0</td>\n",
1505
  " <td>0.750000</td>\n",
1506
+ " <td>0</td>\n",
1507
+ " <td>1</td>\n",
1508
+ " <td>0.500000</td>\n",
1509
+ " <td>0.0</td>\n",
1510
+ " <td>0.250000</td>\n",
1511
  " </tr>\n",
1512
  " <tr>\n",
1513
  " <th>1266</th>\n",
 
1519
  " <td>8.485281</td>\n",
1520
  " <td>78.0</td>\n",
1521
  " <td>78.000000</td>\n",
1522
+ " <td>1287</td>\n",
1523
+ " <td>1393</td>\n",
1524
  " <td>...</td>\n",
1525
+ " <td>0</td>\n",
1526
+ " <td>1</td>\n",
1527
+ " <td>0.707107</td>\n",
1528
+ " <td>0.5</td>\n",
1529
+ " <td>0.500000</td>\n",
1530
  " <td>0</td>\n",
1531
  " <td>1</td>\n",
1532
  " <td>0.707107</td>\n",
 
1543
  " <td>NaN</td>\n",
1544
  " <td>69.0</td>\n",
1545
  " <td>69.000000</td>\n",
1546
+ " <td>3393</td>\n",
1547
+ " <td>3393</td>\n",
1548
  " <td>...</td>\n",
 
 
 
 
 
1549
  " <td>0</td>\n",
1550
  " <td>0</td>\n",
1551
  " <td>NaN</td>\n",
1552
  " <td>0.0</td>\n",
1553
  " <td>0.000000</td>\n",
1554
+ " <td>1</td>\n",
1555
+ " <td>1</td>\n",
1556
+ " <td>NaN</td>\n",
1557
+ " <td>1.0</td>\n",
1558
+ " <td>1.000000</td>\n",
1559
  " </tr>\n",
1560
  " <tr>\n",
1561
  " <th>697</th>\n",
 
1567
  " <td>NaN</td>\n",
1568
  " <td>63.0</td>\n",
1569
  " <td>63.000000</td>\n",
1570
+ " <td>1166</td>\n",
1571
+ " <td>1166</td>\n",
1572
  " <td>...</td>\n",
 
 
 
 
 
1573
  " <td>0</td>\n",
1574
  " <td>0</td>\n",
1575
  " <td>NaN</td>\n",
1576
  " <td>0.0</td>\n",
1577
  " <td>0.000000</td>\n",
1578
+ " <td>1</td>\n",
1579
+ " <td>1</td>\n",
1580
+ " <td>NaN</td>\n",
1581
+ " <td>1.0</td>\n",
1582
+ " <td>1.000000</td>\n",
1583
  " </tr>\n",
1584
  " <tr>\n",
1585
  " <th>763</th>\n",
 
1591
  " <td>1.527525</td>\n",
1592
  " <td>70.0</td>\n",
1593
  " <td>69.666667</td>\n",
1594
+ " <td>1112</td>\n",
1595
+ " <td>1454</td>\n",
1596
  " <td>...</td>\n",
 
 
 
 
 
1597
  " <td>0</td>\n",
1598
  " <td>1</td>\n",
1599
  " <td>0.577350</td>\n",
1600
  " <td>1.0</td>\n",
1601
  " <td>0.666667</td>\n",
1602
+ " <td>0</td>\n",
1603
+ " <td>1</td>\n",
1604
+ " <td>0.577350</td>\n",
1605
+ " <td>0.0</td>\n",
1606
+ " <td>0.333333</td>\n",
1607
  " </tr>\n",
1608
  " </tbody>\n",
1609
  "</table>\n",
1610
+ "<p>10 rows × 228 columns</p>\n",
1611
  "</div>"
1612
  ],
1613
  "text/plain": [
 
1623
  "697 1301 2023 M 63 63 NaN \n",
1624
  "763 1323 2003 M 68 71 1.527525 \n",
1625
  "\n",
1626
+ " TeamScore median TeamScore mean OppTeamID min OppTeamID max ... \\\n",
1627
+ "995 77.0 73.666667 1165 1400 ... \n",
1628
+ "1601 63.0 63.000000 3246 3246 ... \n",
1629
+ "1805 63.0 63.000000 3343 3343 ... \n",
1630
+ "952 73.0 73.000000 1257 1326 ... \n",
1631
+ "924 65.0 65.000000 1301 1301 ... \n",
1632
+ "1381 81.0 80.250000 3143 3443 ... \n",
1633
+ "1266 78.0 78.000000 1287 1393 ... \n",
1634
+ "1810 69.0 69.000000 3393 3393 ... \n",
1635
+ "697 63.0 63.000000 1166 1166 ... \n",
1636
+ "763 70.0 69.666667 1112 1454 ... \n",
1637
  "\n",
1638
+ " Win min Win max Win std Win median Win mean OppWin min \\\n",
1639
+ "995 0 1 0.577350 1.0 0.666667 0 \n",
1640
+ "1601 0 0 NaN 0.0 0.000000 1 \n",
1641
+ "1805 0 0 NaN 0.0 0.000000 1 \n",
1642
+ "952 0 1 0.707107 0.5 0.500000 0 \n",
1643
+ "924 0 0 NaN 0.0 0.000000 1 \n",
1644
+ "1381 0 1 0.500000 1.0 0.750000 0 \n",
1645
+ "1266 0 1 0.707107 0.5 0.500000 0 \n",
1646
+ "1810 0 0 NaN 0.0 0.000000 1 \n",
1647
+ "697 0 0 NaN 0.0 0.000000 1 \n",
1648
+ "763 0 1 0.577350 1.0 0.666667 0 \n",
1649
  "\n",
1650
+ " OppWin max OppWin std OppWin median OppWin mean \n",
1651
+ "995 1 0.577350 0.0 0.333333 \n",
1652
+ "1601 1 NaN 1.0 1.000000 \n",
1653
+ "1805 1 NaN 1.0 1.000000 \n",
1654
+ "952 1 0.707107 0.5 0.500000 \n",
1655
+ "924 1 NaN 1.0 1.000000 \n",
1656
+ "1381 1 0.500000 0.0 0.250000 \n",
1657
+ "1266 1 0.707107 0.5 0.500000 \n",
1658
+ "1810 1 NaN 1.0 1.000000 \n",
1659
+ "697 1 NaN 1.0 1.000000 \n",
1660
+ "763 1 0.577350 0.0 0.333333 \n",
1661
  "\n",
1662
+ "[10 rows x 228 columns]"
1663
  ]
1664
  },
1665
+ "execution_count": 14,
1666
  "metadata": {},
1667
  "output_type": "execute_result"
1668
  }
 
1690
  },
1691
  {
1692
  "cell_type": "code",
1693
+ "execution_count": 15,
1694
  "metadata": {},
1695
  "outputs": [
1696
  {
 
1806
  "4 2024 1 "
1807
  ]
1808
  },
1809
+ "execution_count": 15,
1810
  "metadata": {},
1811
  "output_type": "execute_result"
1812
  }
 
1876
  " <th>TeamScore std reg</th>\n",
1877
  " <th>TeamScore median reg</th>\n",
1878
  " <th>TeamScore mean reg</th>\n",
1879
+ " <th>OppTeamID min reg</th>\n",
1880
+ " <th>OppTeamID max reg</th>\n",
1881
  " <th>...</th>\n",
 
 
 
 
 
1882
  " <th>Win min tourney</th>\n",
1883
  " <th>Win max tourney</th>\n",
1884
  " <th>Win std tourney</th>\n",
1885
  " <th>Win median tourney</th>\n",
1886
  " <th>Win mean tourney</th>\n",
1887
+ " <th>OppWin min tourney</th>\n",
1888
+ " <th>OppWin max tourney</th>\n",
1889
+ " <th>OppWin std tourney</th>\n",
1890
+ " <th>OppWin median tourney</th>\n",
1891
+ " <th>OppWin mean tourney</th>\n",
1892
  " </tr>\n",
1893
  " </thead>\n",
1894
  " <tbody>\n",
 
1902
  " <td>10.808339</td>\n",
1903
  " <td>61.0</td>\n",
1904
  " <td>58.965517</td>\n",
1905
+ " <td>3129</td>\n",
1906
+ " <td>3451</td>\n",
1907
  " <td>...</td>\n",
1908
  " <td>NaN</td>\n",
1909
  " <td>NaN</td>\n",
 
1926
  " <td>12.283247</td>\n",
1927
  " <td>67.0</td>\n",
1928
  " <td>66.466667</td>\n",
1929
+ " <td>1111</td>\n",
1930
+ " <td>1450</td>\n",
1931
  " <td>...</td>\n",
1932
  " <td>NaN</td>\n",
1933
  " <td>NaN</td>\n",
 
1950
  " <td>10.019980</td>\n",
1951
  " <td>72.0</td>\n",
1952
  " <td>73.000000</td>\n",
1953
+ " <td>1132</td>\n",
1954
+ " <td>1458</td>\n",
1955
  " <td>...</td>\n",
1956
  " <td>NaN</td>\n",
1957
  " <td>NaN</td>\n",
 
1974
  " <td>12.911860</td>\n",
1975
  " <td>77.0</td>\n",
1976
  " <td>75.870968</td>\n",
1977
+ " <td>1102</td>\n",
1978
+ " <td>1461</td>\n",
1979
  " <td>...</td>\n",
 
 
 
 
 
1980
  " <td>0.0</td>\n",
1981
  " <td>0.0</td>\n",
1982
  " <td>NaN</td>\n",
1983
  " <td>0.0</td>\n",
1984
  " <td>0.0</td>\n",
1985
+ " <td>1.0</td>\n",
1986
+ " <td>1.0</td>\n",
1987
+ " <td>NaN</td>\n",
1988
+ " <td>1.0</td>\n",
1989
+ " <td>1.0</td>\n",
1990
  " </tr>\n",
1991
  " <tr>\n",
1992
  " <th>3407</th>\n",
 
1998
  " <td>11.841315</td>\n",
1999
  " <td>75.5</td>\n",
2000
  " <td>75.906250</td>\n",
2001
+ " <td>1153</td>\n",
2002
+ " <td>1458</td>\n",
2003
  " <td>...</td>\n",
2004
  " <td>0.0</td>\n",
2005
+ " <td>1.0</td>\n",
2006
+ " <td>0.707107</td>\n",
2007
+ " <td>0.5</td>\n",
2008
+ " <td>0.5</td>\n",
2009
  " <td>0.0</td>\n",
2010
  " <td>1.0</td>\n",
2011
  " <td>0.707107</td>\n",
 
2022
  " <td>10.298567</td>\n",
2023
  " <td>67.0</td>\n",
2024
  " <td>65.062500</td>\n",
2025
+ " <td>1102</td>\n",
2026
+ " <td>1464</td>\n",
2027
  " <td>...</td>\n",
2028
  " <td>NaN</td>\n",
2029
  " <td>NaN</td>\n",
 
2046
  " <td>14.194618</td>\n",
2047
  " <td>76.0</td>\n",
2048
  " <td>76.777778</td>\n",
2049
+ " <td>1125</td>\n",
2050
+ " <td>1424</td>\n",
2051
  " <td>...</td>\n",
2052
  " <td>NaN</td>\n",
2053
  " <td>NaN</td>\n",
 
2070
  " <td>13.385137</td>\n",
2071
  " <td>53.0</td>\n",
2072
  " <td>55.476190</td>\n",
2073
+ " <td>3124</td>\n",
2074
+ " <td>3418</td>\n",
2075
  " <td>...</td>\n",
2076
  " <td>NaN</td>\n",
2077
  " <td>NaN</td>\n",
 
2094
  " <td>11.319009</td>\n",
2095
  " <td>62.5</td>\n",
2096
  " <td>63.593750</td>\n",
2097
+ " <td>3120</td>\n",
2098
+ " <td>3404</td>\n",
2099
  " <td>...</td>\n",
2100
  " <td>NaN</td>\n",
2101
  " <td>NaN</td>\n",
 
2118
  " <td>12.518374</td>\n",
2119
  " <td>65.0</td>\n",
2120
  " <td>65.750000</td>\n",
2121
+ " <td>3148</td>\n",
2122
+ " <td>3438</td>\n",
2123
  " <td>...</td>\n",
2124
  " <td>0.0</td>\n",
2125
+ " <td>1.0</td>\n",
2126
+ " <td>0.707107</td>\n",
2127
+ " <td>0.5</td>\n",
2128
+ " <td>0.5</td>\n",
2129
  " <td>0.0</td>\n",
2130
  " <td>1.0</td>\n",
2131
  " <td>0.707107</td>\n",
 
2134
  " </tr>\n",
2135
  " </tbody>\n",
2136
  "</table>\n",
2137
+ "<p>10 rows × 453 columns</p>\n",
2138
  "</div>"
2139
  ],
2140
  "text/plain": [
 
2162
  "9567 11.319009 62.5 63.593750 \n",
2163
  "12617 12.518374 65.0 65.750000 \n",
2164
  "\n",
2165
+ " OppTeamID min reg OppTeamID max reg ... Win min tourney \\\n",
2166
+ "12348 3129 3451 ... NaN \n",
2167
+ "6900 1111 1450 ... NaN \n",
2168
+ "4406 1132 1458 ... NaN \n",
2169
+ "4233 1102 1461 ... 0.0 \n",
2170
+ "3407 1153 1458 ... 0.0 \n",
2171
+ "5190 1102 1464 ... NaN \n",
2172
+ "1892 1125 1424 ... NaN \n",
2173
+ "10020 3124 3418 ... NaN \n",
2174
+ "9567 3120 3404 ... NaN \n",
2175
+ "12617 3148 3438 ... 0.0 \n",
2176
  "\n",
2177
+ " Win max tourney Win std tourney Win median tourney Win mean tourney \\\n",
2178
+ "12348 NaN NaN NaN NaN \n",
2179
+ "6900 NaN NaN NaN NaN \n",
2180
+ "4406 NaN NaN NaN NaN \n",
2181
+ "4233 0.0 NaN 0.0 0.0 \n",
2182
+ "3407 1.0 0.707107 0.5 0.5 \n",
2183
+ "5190 NaN NaN NaN NaN \n",
2184
+ "1892 NaN NaN NaN NaN \n",
2185
+ "10020 NaN NaN NaN NaN \n",
2186
+ "9567 NaN NaN NaN NaN \n",
2187
+ "12617 1.0 0.707107 0.5 0.5 \n",
2188
  "\n",
2189
+ " OppWin min tourney OppWin max tourney OppWin std tourney \\\n",
2190
+ "12348 NaN NaN NaN \n",
2191
+ "6900 NaN NaN NaN \n",
2192
+ "4406 NaN NaN NaN \n",
2193
+ "4233 1.0 1.0 NaN \n",
2194
+ "3407 0.0 1.0 0.707107 \n",
2195
+ "5190 NaN NaN NaN \n",
2196
+ "1892 NaN NaN NaN \n",
2197
+ "10020 NaN NaN NaN \n",
2198
+ "9567 NaN NaN NaN \n",
2199
+ "12617 0.0 1.0 0.707107 \n",
2200
  "\n",
2201
+ " OppWin median tourney OppWin mean tourney \n",
2202
+ "12348 NaN NaN \n",
2203
+ "6900 NaN NaN \n",
2204
+ "4406 NaN NaN \n",
2205
+ "4233 1.0 1.0 \n",
2206
+ "3407 0.5 0.5 \n",
2207
+ "5190 NaN NaN \n",
2208
+ "1892 NaN NaN \n",
2209
+ "10020 NaN NaN \n",
2210
+ "9567 NaN NaN \n",
2211
+ "12617 0.5 0.5 \n",
2212
  "\n",
2213
+ "[10 rows x 453 columns]"
2214
  ]
2215
  },
2216
  "execution_count": 16,
 
2239
  "outputs": [],
2240
  "source": [
2241
  "# merge the team_conf_seeds_df with team attributes into the aggregated data\n",
 
2242
  "team_agg_df2 = pd.merge(\n",
2243
  " left=team_agg_df,\n",
2244
  " right=team_conf_seeds_df[team_conf_seeds_df[\"Season\"] >= 2003],\n",
 
2261
  "text": [
2262
  "<class 'pandas.core.frame.DataFrame'>\n",
2263
  "Int64Index: 12857 entries, 0 to 12856\n",
2264
+ "Columns: 459 entries, TeamID to ChalkSeed\n",
2265
+ "dtypes: float64(363), int64(92), object(4)\n",
2266
+ "memory usage: 45.1+ MB\n"
2267
  ]
2268
  }
2269
  ],
 
2282
  "text": [
2283
  "<class 'pandas.core.frame.DataFrame'>\n",
2284
  "Int64Index: 377608 entries, 0 to 377607\n",
2285
+ "Columns: 508 entries, Season to ChalkSeed\n",
2286
+ "dtypes: float64(363), int64(138), object(7)\n",
2287
  "memory usage: 1.4+ GB\n"
2288
  ]
2289
  }
 
2303
  },
2304
  {
2305
  "cell_type": "code",
2306
+ "execution_count": 20,
2307
  "metadata": {},
2308
  "outputs": [
2309
  {
2310
  "data": {
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
2311
  "text/plain": [
2312
+ "0 1328\n",
2313
+ "1 1393\n",
2314
+ "2 1437\n",
2315
+ "3 1457\n",
2316
+ "4 1208\n",
2317
+ " ... \n",
2318
+ "377603 3376\n",
2319
+ "377604 3439\n",
2320
+ "377605 3234\n",
2321
+ "377606 3261\n",
2322
+ "377607 3261\n",
2323
+ "Name: OppTeamID, Length: 377608, dtype: int64"
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
2324
  ]
2325
  },
2326
+ "execution_count": 20,
2327
  "metadata": {},
2328
  "output_type": "execute_result"
2329
  }
2330
  ],
2331
  "source": [
2332
+ "super_detailed_games_df[\"OppTeamID\"]"
2333
+ ]
2334
+ },
2335
+ {
2336
+ "cell_type": "code",
2337
+ "execution_count": 22,
2338
+ "metadata": {},
2339
+ "outputs": [],
2340
+ "source": [
2341
+ "opp_chalk_seed_map = team_conf_seeds_df.groupby(\"TeamID\")[\"ChalkSeed\"].last()\n",
2342
+ "\n",
2343
+ "super_detailed_games_df[\"OppChalkSeed\"] = super_detailed_games_df[\"OppTeamID\"].map(opp_chalk_seed_map)\n",
2344
+ "\n",
2345
+ "super_detailed_games_df.info()"
2346
+ ]
2347
+ },
2348
+ {
2349
+ "cell_type": "code",
2350
+ "execution_count": 24,
2351
+ "metadata": {},
2352
+ "outputs": [
2353
+ {
2354
+ "data": {
2355
+ "text/plain": [
2356
+ "0 8.0\n",
2357
+ "1 11.0\n",
2358
+ "2 2.0\n",
2359
+ "3 12.0\n",
2360
+ "4 10.0\n",
2361
+ " ... \n",
2362
+ "377603 NaN\n",
2363
+ "377604 NaN\n",
2364
+ "377605 NaN\n",
2365
+ "377606 NaN\n",
2366
+ "377607 NaN\n",
2367
+ "Name: OppChalkSeed, Length: 377608, dtype: float64"
2368
+ ]
2369
+ },
2370
+ "execution_count": 24,
2371
+ "metadata": {},
2372
+ "output_type": "execute_result"
2373
+ }
2374
+ ],
2375
+ "source": [
2376
+ "super_detailed_games_df[\"OppChalkSeed\"]"
2377
  ]
2378
  },
2379
  {
 
2385
  },
2386
  {
2387
  "cell_type": "code",
2388
+ "execution_count": 25,
2389
  "metadata": {},
2390
  "outputs": [],
2391
  "source": [
 
2397
  },
2398
  {
2399
  "cell_type": "code",
2400
+ "execution_count": 26,
2401
  "metadata": {},
2402
  "outputs": [],
2403
  "source": [