Jensen-holm commited on
Commit
ba98f59
1 Parent(s): 82c833d

got rid of unnessessary tournament and regular season split attributes

Browse files
Files changed (3) hide show
  1. data/AllTeamsAgg.csv +2 -2
  2. src/nn.ipynb +78 -0
  3. src/pre_processing.ipynb +198 -548
data/AllTeamsAgg.csv CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:7ab394a5b002a3fcb96df32c0ef114c4a1d69cb885ca5d8fff739a39cc56dba1
3
- size 2953616
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:08b49367eed1f1591006101b89cf44461ef268128d5865e8df4a3f708f8342c7
3
+ size 2913054
src/nn.ipynb ADDED
@@ -0,0 +1,78 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "cells": [
3
+ {
4
+ "cell_type": "code",
5
+ "execution_count": 5,
6
+ "metadata": {},
7
+ "outputs": [],
8
+ "source": [
9
+ "import pandas as pd\n",
10
+ "import torch\n",
11
+ "import torch.nn as nn\n",
12
+ "import torch.optim as optim\n",
13
+ "from sklearn.model_selection import train_test_split\n",
14
+ "import os\n",
15
+ "\n",
16
+ "DATA_DIR = os.path.join(\"..\", \"data\")"
17
+ ]
18
+ },
19
+ {
20
+ "cell_type": "code",
21
+ "execution_count": 7,
22
+ "metadata": {},
23
+ "outputs": [
24
+ {
25
+ "name": "stdout",
26
+ "output_type": "stream",
27
+ "text": [
28
+ "<class 'pandas.core.frame.DataFrame'>\n",
29
+ "RangeIndex: 1335 entries, 0 to 1334\n",
30
+ "Columns: 324 entries, Unnamed: 0 to LastD1Season tourney\n",
31
+ "dtypes: float64(186), int64(131), object(7)\n",
32
+ "memory usage: 3.3+ MB\n"
33
+ ]
34
+ }
35
+ ],
36
+ "source": [
37
+ "all_teams_agg_df = pd.read_csv(os.path.join(DATA_DIR, \"AllTeamsAgg.csv\"))\n",
38
+ "\n",
39
+ "all_teams_agg_df.info()"
40
+ ]
41
+ },
42
+ {
43
+ "cell_type": "code",
44
+ "execution_count": null,
45
+ "metadata": {},
46
+ "outputs": [],
47
+ "source": [
48
+ "def get_device() -> str:\n",
49
+ " if torch.cuda.is_available():\n",
50
+ " return \"cuda\"\n",
51
+ " return \"cpu\"\n",
52
+ "\n",
53
+ "DEVICE = get_device() "
54
+ ]
55
+ }
56
+ ],
57
+ "metadata": {
58
+ "kernelspec": {
59
+ "display_name": "Python 3",
60
+ "language": "python",
61
+ "name": "python3"
62
+ },
63
+ "language_info": {
64
+ "codemirror_mode": {
65
+ "name": "ipython",
66
+ "version": 3
67
+ },
68
+ "file_extension": ".py",
69
+ "mimetype": "text/x-python",
70
+ "name": "python",
71
+ "nbconvert_exporter": "python",
72
+ "pygments_lexer": "ipython3",
73
+ "version": "3.11.7"
74
+ }
75
+ },
76
+ "nbformat": 4,
77
+ "nbformat_minor": 2
78
+ }
src/pre_processing.ipynb CHANGED
@@ -2,7 +2,7 @@
2
  "cells": [
3
  {
4
  "cell_type": "code",
5
- "execution_count": 1,
6
  "metadata": {},
7
  "outputs": [],
8
  "source": [
@@ -15,7 +15,7 @@
15
  },
16
  {
17
  "cell_type": "code",
18
- "execution_count": 2,
19
  "metadata": {},
20
  "outputs": [
21
  {
@@ -206,7 +206,7 @@
206
  "[5 rows x 35 columns]"
207
  ]
208
  },
209
- "execution_count": 2,
210
  "metadata": {},
211
  "output_type": "execute_result"
212
  }
@@ -222,7 +222,7 @@
222
  },
223
  {
224
  "cell_type": "code",
225
- "execution_count": 3,
226
  "metadata": {},
227
  "outputs": [
228
  {
@@ -413,7 +413,7 @@
413
  "[5 rows x 35 columns]"
414
  ]
415
  },
416
- "execution_count": 3,
417
  "metadata": {},
418
  "output_type": "execute_result"
419
  }
@@ -436,7 +436,7 @@
436
  },
437
  {
438
  "cell_type": "code",
439
- "execution_count": 4,
440
  "metadata": {},
441
  "outputs": [],
442
  "source": [
@@ -466,7 +466,7 @@
466
  },
467
  {
468
  "cell_type": "code",
469
- "execution_count": 5,
470
  "metadata": {},
471
  "outputs": [
472
  {
@@ -540,7 +540,7 @@
540
  },
541
  {
542
  "cell_type": "code",
543
- "execution_count": 6,
544
  "metadata": {},
545
  "outputs": [
546
  {
@@ -613,7 +613,7 @@
613
  },
614
  {
615
  "cell_type": "code",
616
- "execution_count": 7,
617
  "metadata": {},
618
  "outputs": [],
619
  "source": [
@@ -630,7 +630,7 @@
630
  },
631
  {
632
  "cell_type": "code",
633
- "execution_count": 8,
634
  "metadata": {},
635
  "outputs": [],
636
  "source": [
@@ -654,7 +654,7 @@
654
  },
655
  {
656
  "cell_type": "code",
657
- "execution_count": 9,
658
  "metadata": {},
659
  "outputs": [],
660
  "source": [
@@ -680,7 +680,7 @@
680
  },
681
  {
682
  "cell_type": "code",
683
- "execution_count": 10,
684
  "metadata": {
685
  "tags": []
686
  },
@@ -1027,7 +1027,7 @@
1027
  "[10 rows x 158 columns]"
1028
  ]
1029
  },
1030
- "execution_count": 10,
1031
  "metadata": {},
1032
  "output_type": "execute_result"
1033
  }
@@ -1046,7 +1046,7 @@
1046
  },
1047
  {
1048
  "cell_type": "code",
1049
- "execution_count": 11,
1050
  "metadata": {},
1051
  "outputs": [
1052
  {
@@ -1391,7 +1391,7 @@
1391
  "[10 rows x 158 columns]"
1392
  ]
1393
  },
1394
- "execution_count": 11,
1395
  "metadata": {},
1396
  "output_type": "execute_result"
1397
  }
@@ -1419,7 +1419,7 @@
1419
  },
1420
  {
1421
  "cell_type": "code",
1422
- "execution_count": 12,
1423
  "metadata": {},
1424
  "outputs": [
1425
  {
@@ -1594,7 +1594,7 @@
1594
  "1998 2024 "
1595
  ]
1596
  },
1597
- "execution_count": 12,
1598
  "metadata": {},
1599
  "output_type": "execute_result"
1600
  }
@@ -1621,368 +1621,24 @@
1621
  },
1622
  {
1623
  "cell_type": "code",
1624
- "execution_count": 13,
1625
  "metadata": {},
1626
- "outputs": [
1627
- {
1628
- "data": {
1629
- "text/html": [
1630
- "<div>\n",
1631
- "<style scoped>\n",
1632
- " .dataframe tbody tr th:only-of-type {\n",
1633
- " vertical-align: middle;\n",
1634
- " }\n",
1635
- "\n",
1636
- " .dataframe tbody tr th {\n",
1637
- " vertical-align: top;\n",
1638
- " }\n",
1639
- "\n",
1640
- " .dataframe thead th {\n",
1641
- " text-align: right;\n",
1642
- " }\n",
1643
- "</style>\n",
1644
- "<table border=\"1\" class=\"dataframe\">\n",
1645
- " <thead>\n",
1646
- " <tr style=\"text-align: right;\">\n",
1647
- " <th></th>\n",
1648
- " <th>TeamID</th>\n",
1649
- " <th>Season</th>\n",
1650
- " <th>League</th>\n",
1651
- " <th>TeamScore min</th>\n",
1652
- " <th>TeamScore max</th>\n",
1653
- " <th>TeamScore std</th>\n",
1654
- " <th>TeamScore median</th>\n",
1655
- " <th>TeamScore mean</th>\n",
1656
- " <th>OppScore min</th>\n",
1657
- " <th>OppScore max</th>\n",
1658
- " <th>...</th>\n",
1659
- " <th>Win min</th>\n",
1660
- " <th>Win max</th>\n",
1661
- " <th>Win std</th>\n",
1662
- " <th>Win median</th>\n",
1663
- " <th>Win mean</th>\n",
1664
- " <th>ConfAbbrev</th>\n",
1665
- " <th>Seed</th>\n",
1666
- " <th>TeamName</th>\n",
1667
- " <th>FirstD1Season</th>\n",
1668
- " <th>LastD1Season</th>\n",
1669
- " </tr>\n",
1670
- " </thead>\n",
1671
- " <tbody>\n",
1672
- " <tr>\n",
1673
- " <th>976</th>\n",
1674
- " <td>1387</td>\n",
1675
- " <td>2012</td>\n",
1676
- " <td>M</td>\n",
1677
- " <td>58</td>\n",
1678
- " <td>86</td>\n",
1679
- " <td>7.354612</td>\n",
1680
- " <td>68.0</td>\n",
1681
- " <td>69.096774</td>\n",
1682
- " <td>35</td>\n",
1683
- " <td>79</td>\n",
1684
- " <td>...</td>\n",
1685
- " <td>0</td>\n",
1686
- " <td>1</td>\n",
1687
- " <td>0.425024</td>\n",
1688
- " <td>1.0</td>\n",
1689
- " <td>0.774194</td>\n",
1690
- " <td>a_ten</td>\n",
1691
- " <td>Z09</td>\n",
1692
- " <td>St Louis</td>\n",
1693
- " <td>1985</td>\n",
1694
- " <td>2024</td>\n",
1695
- " </tr>\n",
1696
- " <tr>\n",
1697
- " <th>834</th>\n",
1698
- " <td>1332</td>\n",
1699
- " <td>2019</td>\n",
1700
- " <td>M</td>\n",
1701
- " <td>47</td>\n",
1702
- " <td>84</td>\n",
1703
- " <td>11.186577</td>\n",
1704
- " <td>72.0</td>\n",
1705
- " <td>70.485714</td>\n",
1706
- " <td>46</td>\n",
1707
- " <td>90</td>\n",
1708
- " <td>...</td>\n",
1709
- " <td>0</td>\n",
1710
- " <td>1</td>\n",
1711
- " <td>0.481594</td>\n",
1712
- " <td>1.0</td>\n",
1713
- " <td>0.657143</td>\n",
1714
- " <td>pac_twelve</td>\n",
1715
- " <td>Z12</td>\n",
1716
- " <td>Oregon</td>\n",
1717
- " <td>1985</td>\n",
1718
- " <td>2024</td>\n",
1719
- " </tr>\n",
1720
- " <tr>\n",
1721
- " <th>305</th>\n",
1722
- " <td>1199</td>\n",
1723
- " <td>2011</td>\n",
1724
- " <td>M</td>\n",
1725
- " <td>44</td>\n",
1726
- " <td>97</td>\n",
1727
- " <td>13.496634</td>\n",
1728
- " <td>69.0</td>\n",
1729
- " <td>69.322581</td>\n",
1730
- " <td>38</td>\n",
1731
- " <td>89</td>\n",
1732
- " <td>...</td>\n",
1733
- " <td>0</td>\n",
1734
- " <td>1</td>\n",
1735
- " <td>0.475191</td>\n",
1736
- " <td>1.0</td>\n",
1737
- " <td>0.677419</td>\n",
1738
- " <td>acc</td>\n",
1739
- " <td>Z10</td>\n",
1740
- " <td>Florida St</td>\n",
1741
- " <td>1985</td>\n",
1742
- " <td>2024</td>\n",
1743
- " </tr>\n",
1744
- " <tr>\n",
1745
- " <th>124</th>\n",
1746
- " <td>1139</td>\n",
1747
- " <td>2016</td>\n",
1748
- " <td>M</td>\n",
1749
- " <td>55</td>\n",
1750
- " <td>144</td>\n",
1751
- " <td>16.508128</td>\n",
1752
- " <td>78.0</td>\n",
1753
- " <td>80.580645</td>\n",
1754
- " <td>52</td>\n",
1755
- " <td>88</td>\n",
1756
- " <td>...</td>\n",
1757
- " <td>0</td>\n",
1758
- " <td>1</td>\n",
1759
- " <td>0.475191</td>\n",
1760
- " <td>1.0</td>\n",
1761
- " <td>0.677419</td>\n",
1762
- " <td>big_east</td>\n",
1763
- " <td>X09</td>\n",
1764
- " <td>Butler</td>\n",
1765
- " <td>1985</td>\n",
1766
- " <td>2024</td>\n",
1767
- " </tr>\n",
1768
- " <tr>\n",
1769
- " <th>1305</th>\n",
1770
- " <td>1458</td>\n",
1771
- " <td>2019</td>\n",
1772
- " <td>M</td>\n",
1773
- " <td>46</td>\n",
1774
- " <td>101</td>\n",
1775
- " <td>12.044032</td>\n",
1776
- " <td>69.0</td>\n",
1777
- " <td>69.060606</td>\n",
1778
- " <td>45</td>\n",
1779
- " <td>84</td>\n",
1780
- " <td>...</td>\n",
1781
- " <td>0</td>\n",
1782
- " <td>1</td>\n",
1783
- " <td>0.466694</td>\n",
1784
- " <td>1.0</td>\n",
1785
- " <td>0.696970</td>\n",
1786
- " <td>big_ten</td>\n",
1787
- " <td>Z05</td>\n",
1788
- " <td>Wisconsin</td>\n",
1789
- " <td>1985</td>\n",
1790
- " <td>2024</td>\n",
1791
- " </tr>\n",
1792
- " <tr>\n",
1793
- " <th>1174</th>\n",
1794
- " <td>1433</td>\n",
1795
- " <td>2019</td>\n",
1796
- " <td>M</td>\n",
1797
- " <td>49</td>\n",
1798
- " <td>90</td>\n",
1799
- " <td>10.441658</td>\n",
1800
- " <td>70.5</td>\n",
1801
- " <td>71.437500</td>\n",
1802
- " <td>36</td>\n",
1803
- " <td>87</td>\n",
1804
- " <td>...</td>\n",
1805
- " <td>0</td>\n",
1806
- " <td>1</td>\n",
1807
- " <td>0.420013</td>\n",
1808
- " <td>1.0</td>\n",
1809
- " <td>0.781250</td>\n",
1810
- " <td>a_ten</td>\n",
1811
- " <td>W08</td>\n",
1812
- " <td>VCU</td>\n",
1813
- " <td>1985</td>\n",
1814
- " <td>2024</td>\n",
1815
- " </tr>\n",
1816
- " <tr>\n",
1817
- " <th>567</th>\n",
1818
- " <td>1272</td>\n",
1819
- " <td>2004</td>\n",
1820
- " <td>M</td>\n",
1821
- " <td>60</td>\n",
1822
- " <td>94</td>\n",
1823
- " <td>10.228604</td>\n",
1824
- " <td>73.0</td>\n",
1825
- " <td>73.571429</td>\n",
1826
- " <td>48</td>\n",
1827
- " <td>85</td>\n",
1828
- " <td>...</td>\n",
1829
- " <td>0</td>\n",
1830
- " <td>1</td>\n",
1831
- " <td>0.440959</td>\n",
1832
- " <td>1.0</td>\n",
1833
- " <td>0.750000</td>\n",
1834
- " <td>cusa</td>\n",
1835
- " <td>Y07</td>\n",
1836
- " <td>Memphis</td>\n",
1837
- " <td>1985</td>\n",
1838
- " <td>2024</td>\n",
1839
- " </tr>\n",
1840
- " <tr>\n",
1841
- " <th>665</th>\n",
1842
- " <td>1292</td>\n",
1843
- " <td>2013</td>\n",
1844
- " <td>M</td>\n",
1845
- " <td>45</td>\n",
1846
- " <td>97</td>\n",
1847
- " <td>11.877053</td>\n",
1848
- " <td>72.0</td>\n",
1849
- " <td>71.242424</td>\n",
1850
- " <td>41</td>\n",
1851
- " <td>82</td>\n",
1852
- " <td>...</td>\n",
1853
- " <td>0</td>\n",
1854
- " <td>1</td>\n",
1855
- " <td>0.364110</td>\n",
1856
- " <td>1.0</td>\n",
1857
- " <td>0.848485</td>\n",
1858
- " <td>sun_belt</td>\n",
1859
- " <td>Y11a</td>\n",
1860
- " <td>MTSU</td>\n",
1861
- " <td>1985</td>\n",
1862
- " <td>2024</td>\n",
1863
- " </tr>\n",
1864
- " <tr>\n",
1865
- " <th>271</th>\n",
1866
- " <td>1186</td>\n",
1867
- " <td>2004</td>\n",
1868
- " <td>M</td>\n",
1869
- " <td>49</td>\n",
1870
- " <td>100</td>\n",
1871
- " <td>12.568700</td>\n",
1872
- " <td>70.0</td>\n",
1873
- " <td>70.250000</td>\n",
1874
- " <td>52</td>\n",
1875
- " <td>104</td>\n",
1876
- " <td>...</td>\n",
1877
- " <td>0</td>\n",
1878
- " <td>1</td>\n",
1879
- " <td>0.503953</td>\n",
1880
- " <td>1.0</td>\n",
1881
- " <td>0.571429</td>\n",
1882
- " <td>big_sky</td>\n",
1883
- " <td>Y15</td>\n",
1884
- " <td>E Washington</td>\n",
1885
- " <td>1985</td>\n",
1886
- " <td>2024</td>\n",
1887
- " </tr>\n",
1888
- " <tr>\n",
1889
- " <th>325</th>\n",
1890
- " <td>1207</td>\n",
1891
- " <td>2011</td>\n",
1892
- " <td>M</td>\n",
1893
- " <td>46</td>\n",
1894
- " <td>111</td>\n",
1895
- " <td>14.956568</td>\n",
1896
- " <td>69.0</td>\n",
1897
- " <td>71.032258</td>\n",
1898
- " <td>51</td>\n",
1899
- " <td>102</td>\n",
1900
- " <td>...</td>\n",
1901
- " <td>0</td>\n",
1902
- " <td>1</td>\n",
1903
- " <td>0.475191</td>\n",
1904
- " <td>1.0</td>\n",
1905
- " <td>0.677419</td>\n",
1906
- " <td>big_east</td>\n",
1907
- " <td>Z06</td>\n",
1908
- " <td>Georgetown</td>\n",
1909
- " <td>1985</td>\n",
1910
- " <td>2024</td>\n",
1911
- " </tr>\n",
1912
- " </tbody>\n",
1913
- "</table>\n",
1914
- "<p>10 rows × 163 columns</p>\n",
1915
- "</div>"
1916
- ],
1917
- "text/plain": [
1918
- " TeamID Season League TeamScore min TeamScore max TeamScore std \\\n",
1919
- "976 1387 2012 M 58 86 7.354612 \n",
1920
- "834 1332 2019 M 47 84 11.186577 \n",
1921
- "305 1199 2011 M 44 97 13.496634 \n",
1922
- "124 1139 2016 M 55 144 16.508128 \n",
1923
- "1305 1458 2019 M 46 101 12.044032 \n",
1924
- "1174 1433 2019 M 49 90 10.441658 \n",
1925
- "567 1272 2004 M 60 94 10.228604 \n",
1926
- "665 1292 2013 M 45 97 11.877053 \n",
1927
- "271 1186 2004 M 49 100 12.568700 \n",
1928
- "325 1207 2011 M 46 111 14.956568 \n",
1929
- "\n",
1930
- " TeamScore median TeamScore mean OppScore min OppScore max ... \\\n",
1931
- "976 68.0 69.096774 35 79 ... \n",
1932
- "834 72.0 70.485714 46 90 ... \n",
1933
- "305 69.0 69.322581 38 89 ... \n",
1934
- "124 78.0 80.580645 52 88 ... \n",
1935
- "1305 69.0 69.060606 45 84 ... \n",
1936
- "1174 70.5 71.437500 36 87 ... \n",
1937
- "567 73.0 73.571429 48 85 ... \n",
1938
- "665 72.0 71.242424 41 82 ... \n",
1939
- "271 70.0 70.250000 52 104 ... \n",
1940
- "325 69.0 71.032258 51 102 ... \n",
1941
- "\n",
1942
- " Win min Win max Win std Win median Win mean ConfAbbrev Seed \\\n",
1943
- "976 0 1 0.425024 1.0 0.774194 a_ten Z09 \n",
1944
- "834 0 1 0.481594 1.0 0.657143 pac_twelve Z12 \n",
1945
- "305 0 1 0.475191 1.0 0.677419 acc Z10 \n",
1946
- "124 0 1 0.475191 1.0 0.677419 big_east X09 \n",
1947
- "1305 0 1 0.466694 1.0 0.696970 big_ten Z05 \n",
1948
- "1174 0 1 0.420013 1.0 0.781250 a_ten W08 \n",
1949
- "567 0 1 0.440959 1.0 0.750000 cusa Y07 \n",
1950
- "665 0 1 0.364110 1.0 0.848485 sun_belt Y11a \n",
1951
- "271 0 1 0.503953 1.0 0.571429 big_sky Y15 \n",
1952
- "325 0 1 0.475191 1.0 0.677419 big_east Z06 \n",
1953
- "\n",
1954
- " TeamName FirstD1Season LastD1Season \n",
1955
- "976 St Louis 1985 2024 \n",
1956
- "834 Oregon 1985 2024 \n",
1957
- "305 Florida St 1985 2024 \n",
1958
- "124 Butler 1985 2024 \n",
1959
- "1305 Wisconsin 1985 2024 \n",
1960
- "1174 VCU 1985 2024 \n",
1961
- "567 Memphis 1985 2024 \n",
1962
- "665 MTSU 1985 2024 \n",
1963
- "271 E Washington 1985 2024 \n",
1964
- "325 Georgetown 1985 2024 \n",
1965
- "\n",
1966
- "[10 rows x 163 columns]"
1967
- ]
1968
- },
1969
- "execution_count": 13,
1970
- "metadata": {},
1971
- "output_type": "execute_result"
1972
- }
1973
- ],
1974
  "source": [
1975
- "# merge the team_conf_seeds_df with team attributes into the aggregated data\n",
1976
- "\n",
1977
- "team_reg_agg_df = team_reg_agg.merge(right=team_conf_seeds_df, on=[\"TeamID\", \"Season\", \"League\"])\n",
1978
- "team_tourney_agg_df = team_tourney_agg.merge(right=team_conf_seeds_df, on=[\"TeamID\", \"Season\", \"League\"])\n",
1979
- "\n",
1980
- "team_reg_agg_df.sample(10, random_state=10)"
 
 
 
1981
  ]
1982
  },
1983
  {
1984
  "cell_type": "code",
1985
- "execution_count": 14,
1986
  "metadata": {},
1987
  "outputs": [
1988
  {
@@ -2009,19 +1665,19 @@
2009
  " <th>TeamID</th>\n",
2010
  " <th>Season</th>\n",
2011
  " <th>League</th>\n",
2012
- " <th>TeamScore min</th>\n",
2013
- " <th>TeamScore max</th>\n",
2014
- " <th>TeamScore std</th>\n",
2015
- " <th>TeamScore median</th>\n",
2016
- " <th>TeamScore mean</th>\n",
2017
- " <th>OppScore min</th>\n",
2018
- " <th>OppScore max</th>\n",
2019
  " <th>...</th>\n",
2020
- " <th>Win min</th>\n",
2021
- " <th>Win max</th>\n",
2022
- " <th>Win std</th>\n",
2023
- " <th>Win median</th>\n",
2024
- " <th>Win mean</th>\n",
2025
  " <th>ConfAbbrev</th>\n",
2026
  " <th>Seed</th>\n",
2027
  " <th>TeamName</th>\n",
@@ -2035,13 +1691,13 @@
2035
  " <td>1437</td>\n",
2036
  " <td>2011</td>\n",
2037
  " <td>M</td>\n",
2038
- " <td>57</td>\n",
2039
- " <td>57</td>\n",
2040
- " <td>NaN</td>\n",
2041
- " <td>57.0</td>\n",
2042
- " <td>57.000000</td>\n",
2043
- " <td>61</td>\n",
2044
- " <td>61</td>\n",
2045
  " <td>...</td>\n",
2046
  " <td>0</td>\n",
2047
  " <td>0</td>\n",
@@ -2060,12 +1716,12 @@
2060
  " <td>2013</td>\n",
2061
  " <td>M</td>\n",
2062
  " <td>56</td>\n",
2063
- " <td>56</td>\n",
2064
- " <td>NaN</td>\n",
2065
- " <td>56.0</td>\n",
2066
- " <td>56.000000</td>\n",
2067
- " <td>68</td>\n",
2068
- " <td>68</td>\n",
2069
  " <td>...</td>\n",
2070
  " <td>0</td>\n",
2071
  " <td>0</td>\n",
@@ -2083,13 +1739,13 @@
2083
  " <td>1417</td>\n",
2084
  " <td>2015</td>\n",
2085
  " <td>M</td>\n",
2086
- " <td>60</td>\n",
2087
- " <td>92</td>\n",
2088
- " <td>17.925773</td>\n",
2089
- " <td>62.0</td>\n",
2090
- " <td>71.333333</td>\n",
2091
- " <td>59</td>\n",
2092
- " <td>75</td>\n",
2093
  " <td>...</td>\n",
2094
  " <td>0</td>\n",
2095
  " <td>1</td>\n",
@@ -2107,13 +1763,13 @@
2107
  " <td>1305</td>\n",
2108
  " <td>2005</td>\n",
2109
  " <td>M</td>\n",
2110
- " <td>59</td>\n",
2111
- " <td>61</td>\n",
2112
- " <td>1.414214</td>\n",
2113
- " <td>60.0</td>\n",
2114
- " <td>60.000000</td>\n",
2115
- " <td>57</td>\n",
2116
- " <td>71</td>\n",
2117
  " <td>...</td>\n",
2118
  " <td>0</td>\n",
2119
  " <td>1</td>\n",
@@ -2131,13 +1787,13 @@
2131
  " <td>1332</td>\n",
2132
  " <td>2019</td>\n",
2133
  " <td>M</td>\n",
2134
- " <td>49</td>\n",
2135
- " <td>73</td>\n",
2136
- " <td>13.576941</td>\n",
2137
  " <td>72.0</td>\n",
2138
- " <td>64.666667</td>\n",
2139
- " <td>53</td>\n",
2140
- " <td>54</td>\n",
2141
  " <td>...</td>\n",
2142
  " <td>0</td>\n",
2143
  " <td>1</td>\n",
@@ -2155,13 +1811,13 @@
2155
  " <td>1393</td>\n",
2156
  " <td>2018</td>\n",
2157
  " <td>M</td>\n",
2158
- " <td>55</td>\n",
2159
- " <td>65</td>\n",
2160
- " <td>4.349329</td>\n",
2161
- " <td>58.5</td>\n",
2162
- " <td>59.250000</td>\n",
2163
- " <td>52</td>\n",
2164
- " <td>69</td>\n",
2165
  " <td>...</td>\n",
2166
  " <td>0</td>\n",
2167
  " <td>1</td>\n",
@@ -2179,13 +1835,13 @@
2179
  " <td>1373</td>\n",
2180
  " <td>2010</td>\n",
2181
  " <td>M</td>\n",
2182
- " <td>64</td>\n",
2183
- " <td>64</td>\n",
2184
- " <td>NaN</td>\n",
2185
- " <td>64.0</td>\n",
2186
- " <td>64.000000</td>\n",
2187
- " <td>72</td>\n",
2188
- " <td>72</td>\n",
2189
  " <td>...</td>\n",
2190
  " <td>0</td>\n",
2191
  " <td>0</td>\n",
@@ -2203,13 +1859,13 @@
2203
  " <td>1425</td>\n",
2204
  " <td>2022</td>\n",
2205
  " <td>M</td>\n",
2206
- " <td>66</td>\n",
2207
- " <td>66</td>\n",
2208
- " <td>NaN</td>\n",
2209
- " <td>66.0</td>\n",
2210
- " <td>66.000000</td>\n",
2211
- " <td>68</td>\n",
2212
- " <td>68</td>\n",
2213
  " <td>...</td>\n",
2214
  " <td>0</td>\n",
2215
  " <td>0</td>\n",
@@ -2227,13 +1883,13 @@
2227
  " <td>1234</td>\n",
2228
  " <td>2015</td>\n",
2229
  " <td>M</td>\n",
2230
- " <td>68</td>\n",
2231
- " <td>83</td>\n",
2232
- " <td>10.606602</td>\n",
2233
- " <td>75.5</td>\n",
2234
- " <td>75.500000</td>\n",
2235
- " <td>52</td>\n",
2236
- " <td>87</td>\n",
2237
  " <td>...</td>\n",
2238
  " <td>0</td>\n",
2239
  " <td>1</td>\n",
@@ -2251,13 +1907,13 @@
2251
  " <td>1386</td>\n",
2252
  " <td>2008</td>\n",
2253
  " <td>M</td>\n",
2254
- " <td>64</td>\n",
2255
- " <td>64</td>\n",
2256
- " <td>NaN</td>\n",
2257
- " <td>64.0</td>\n",
2258
- " <td>64.000000</td>\n",
2259
- " <td>72</td>\n",
2260
- " <td>72</td>\n",
2261
  " <td>...</td>\n",
2262
  " <td>0</td>\n",
2263
  " <td>0</td>\n",
@@ -2272,85 +1928,91 @@
2272
  " </tr>\n",
2273
  " </tbody>\n",
2274
  "</table>\n",
2275
- "<p>10 rows × 163 columns</p>\n",
2276
  "</div>"
2277
  ],
2278
  "text/plain": [
2279
- " TeamID Season League TeamScore min TeamScore max TeamScore std \\\n",
2280
- "1202 1437 2011 M 57 57 NaN \n",
2281
- "109 1137 2013 M 56 56 NaN \n",
2282
- "1105 1417 2015 M 60 92 17.925773 \n",
2283
- "700 1305 2005 M 59 61 1.414214 \n",
2284
- "834 1332 2019 M 49 73 13.576941 \n",
2285
- "1009 1393 2018 M 55 65 4.349329 \n",
2286
- "953 1373 2010 M 64 64 NaN \n",
2287
- "1141 1425 2022 M 66 66 NaN \n",
2288
- "419 1234 2015 M 68 83 10.606602 \n",
2289
- "973 1386 2008 M 64 64 NaN \n",
2290
  "\n",
2291
- " TeamScore median TeamScore mean OppScore min OppScore max ... \\\n",
2292
- "1202 57.0 57.000000 61 61 ... \n",
2293
- "109 56.0 56.000000 68 68 ... \n",
2294
- "1105 62.0 71.333333 59 75 ... \n",
2295
- "700 60.0 60.000000 57 71 ... \n",
2296
- "834 72.0 64.666667 53 54 ... \n",
2297
- "1009 58.5 59.250000 52 69 ... \n",
2298
- "953 64.0 64.000000 72 72 ... \n",
2299
- "1141 66.0 66.000000 68 68 ... \n",
2300
- "419 75.5 75.500000 52 87 ... \n",
2301
- "973 64.0 64.000000 72 72 ... \n",
2302
  "\n",
2303
- " Win min Win max Win std Win median Win mean ConfAbbrev Seed \\\n",
2304
- "1202 0 0 NaN 0.0 0.000000 big_east W09 \n",
2305
- "109 0 0 NaN 0.0 0.000000 patriot W11 \n",
2306
- "1105 0 1 0.577350 1.0 0.666667 pac_twelve X11 \n",
2307
- "700 0 1 0.707107 0.5 0.500000 wac X09 \n",
2308
- "834 0 1 0.577350 1.0 0.666667 pac_twelve Z12 \n",
2309
- "1009 0 1 0.500000 1.0 0.750000 acc X11b \n",
2310
- "953 0 0 NaN 0.0 0.000000 maac X13 \n",
2311
- "1141 0 0 NaN 0.0 0.000000 pac_twelve Y07 \n",
2312
- "419 0 1 0.707107 0.5 0.500000 big_ten X07 \n",
2313
- "973 0 0 NaN 0.0 0.000000 a_ten W11 \n",
 
 
 
 
 
 
 
 
 
 
 
 
2314
  "\n",
2315
- " TeamName FirstD1Season LastD1Season \n",
2316
- "1202 Villanova 1985 2024 \n",
2317
- "109 Bucknell 1985 2024 \n",
2318
- "1105 UCLA 1985 2024 \n",
2319
- "700 Nevada 1985 2024 \n",
2320
- "834 Oregon 1985 2024 \n",
2321
- "1009 Syracuse 1985 2024 \n",
2322
- "953 Siena 1985 2024 \n",
2323
- "1141 USC 1985 2024 \n",
2324
- "419 Iowa 1985 2024 \n",
2325
- "973 St Joseph's PA 1985 2024 \n",
2326
  "\n",
2327
- "[10 rows x 163 columns]"
2328
  ]
2329
  },
2330
- "execution_count": 14,
2331
  "metadata": {},
2332
  "output_type": "execute_result"
2333
  }
2334
  ],
2335
  "source": [
2336
- "team_tourney_agg_df.sample(10, random_state=10)"
2337
- ]
2338
- },
2339
- {
2340
- "cell_type": "code",
2341
- "execution_count": 15,
2342
- "metadata": {},
2343
- "outputs": [],
2344
- "source": [
2345
- "# merge the tournament aggregated metrics with the regular season aggregated metrics\n",
2346
- "team_agg_df = (\n",
2347
- " pd.merge(\n",
2348
- " left=team_reg_agg_df, \n",
2349
- " right=team_tourney_agg_df, \n",
2350
- " on=[\"TeamID\", \"Season\", \"League\"], \n",
2351
- " suffixes=(\" reg\", \" tourney\"),\n",
2352
- " )\n",
2353
- ")"
2354
  ]
2355
  },
2356
  {
@@ -2395,11 +2057,11 @@
2395
  " <th>Win std tourney</th>\n",
2396
  " <th>Win median tourney</th>\n",
2397
  " <th>Win mean tourney</th>\n",
2398
- " <th>ConfAbbrev tourney</th>\n",
2399
- " <th>Seed tourney</th>\n",
2400
- " <th>TeamName tourney</th>\n",
2401
- " <th>FirstD1Season tourney</th>\n",
2402
- " <th>LastD1Season tourney</th>\n",
2403
  " </tr>\n",
2404
  " </thead>\n",
2405
  " <tbody>\n",
@@ -2645,7 +2307,7 @@
2645
  " </tr>\n",
2646
  " </tbody>\n",
2647
  "</table>\n",
2648
- "<p>10 rows × 323 columns</p>\n",
2649
  "</div>"
2650
  ],
2651
  "text/plain": [
@@ -2697,31 +2359,19 @@
2697
  "891 0 NaN 0.0 0.000000 \n",
2698
  "559 1 0.707107 0.5 0.500000 \n",
2699
  "\n",
2700
- " ConfAbbrev tourney Seed tourney TeamName tourney \\\n",
2701
- "409 maac Z15 Iona \n",
2702
- "1189 aec Y13 Vermont \n",
2703
- "1275 mvc Z09 Wichita St \n",
2704
- "487 sec Z02 Kentucky \n",
2705
- "1135 pac_ten X06 USC \n",
2706
- "556 acc Y04 Maryland \n",
2707
- "368 aec Z16 Hartford \n",
2708
- "1037 sec W04 Tennessee \n",
2709
- "891 big_ten W01 Purdue \n",
2710
- "559 big_ten Y04 Maryland \n",
2711
- "\n",
2712
- " FirstD1Season tourney LastD1Season tourney \n",
2713
- "409 1985 2024 \n",
2714
- "1189 1985 2024 \n",
2715
- "1275 1985 2024 \n",
2716
- "487 1985 2024 \n",
2717
- "1135 1985 2024 \n",
2718
- "556 1985 2024 \n",
2719
- "368 1985 2023 \n",
2720
- "1037 1985 2024 \n",
2721
- "891 1985 2024 \n",
2722
- "559 1985 2024 \n",
2723
  "\n",
2724
- "[10 rows x 323 columns]"
2725
  ]
2726
  },
2727
  "execution_count": 16,
 
2
  "cells": [
3
  {
4
  "cell_type": "code",
5
+ "execution_count": 2,
6
  "metadata": {},
7
  "outputs": [],
8
  "source": [
 
15
  },
16
  {
17
  "cell_type": "code",
18
+ "execution_count": 3,
19
  "metadata": {},
20
  "outputs": [
21
  {
 
206
  "[5 rows x 35 columns]"
207
  ]
208
  },
209
+ "execution_count": 3,
210
  "metadata": {},
211
  "output_type": "execute_result"
212
  }
 
222
  },
223
  {
224
  "cell_type": "code",
225
+ "execution_count": 4,
226
  "metadata": {},
227
  "outputs": [
228
  {
 
413
  "[5 rows x 35 columns]"
414
  ]
415
  },
416
+ "execution_count": 4,
417
  "metadata": {},
418
  "output_type": "execute_result"
419
  }
 
436
  },
437
  {
438
  "cell_type": "code",
439
+ "execution_count": 5,
440
  "metadata": {},
441
  "outputs": [],
442
  "source": [
 
466
  },
467
  {
468
  "cell_type": "code",
469
+ "execution_count": 6,
470
  "metadata": {},
471
  "outputs": [
472
  {
 
540
  },
541
  {
542
  "cell_type": "code",
543
+ "execution_count": 7,
544
  "metadata": {},
545
  "outputs": [
546
  {
 
613
  },
614
  {
615
  "cell_type": "code",
616
+ "execution_count": 8,
617
  "metadata": {},
618
  "outputs": [],
619
  "source": [
 
630
  },
631
  {
632
  "cell_type": "code",
633
+ "execution_count": 9,
634
  "metadata": {},
635
  "outputs": [],
636
  "source": [
 
654
  },
655
  {
656
  "cell_type": "code",
657
+ "execution_count": 10,
658
  "metadata": {},
659
  "outputs": [],
660
  "source": [
 
680
  },
681
  {
682
  "cell_type": "code",
683
+ "execution_count": 11,
684
  "metadata": {
685
  "tags": []
686
  },
 
1027
  "[10 rows x 158 columns]"
1028
  ]
1029
  },
1030
+ "execution_count": 11,
1031
  "metadata": {},
1032
  "output_type": "execute_result"
1033
  }
 
1046
  },
1047
  {
1048
  "cell_type": "code",
1049
+ "execution_count": 12,
1050
  "metadata": {},
1051
  "outputs": [
1052
  {
 
1391
  "[10 rows x 158 columns]"
1392
  ]
1393
  },
1394
+ "execution_count": 12,
1395
  "metadata": {},
1396
  "output_type": "execute_result"
1397
  }
 
1419
  },
1420
  {
1421
  "cell_type": "code",
1422
+ "execution_count": 13,
1423
  "metadata": {},
1424
  "outputs": [
1425
  {
 
1594
  "1998 2024 "
1595
  ]
1596
  },
1597
+ "execution_count": 13,
1598
  "metadata": {},
1599
  "output_type": "execute_result"
1600
  }
 
1621
  },
1622
  {
1623
  "cell_type": "code",
1624
+ "execution_count": 14,
1625
  "metadata": {},
1626
+ "outputs": [],
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1627
  "source": [
1628
+ "# merge the tournament aggregated metrics with the regular season aggregated metrics\n",
1629
+ "team_agg_df = (\n",
1630
+ " pd.merge(\n",
1631
+ " left=team_reg_agg, \n",
1632
+ " right=team_tourney_agg, \n",
1633
+ " on=[\"TeamID\", \"Season\", \"League\"], \n",
1634
+ " suffixes=(\" reg\", \" tourney\"),\n",
1635
+ " )\n",
1636
+ ")"
1637
  ]
1638
  },
1639
  {
1640
  "cell_type": "code",
1641
+ "execution_count": 15,
1642
  "metadata": {},
1643
  "outputs": [
1644
  {
 
1665
  " <th>TeamID</th>\n",
1666
  " <th>Season</th>\n",
1667
  " <th>League</th>\n",
1668
+ " <th>TeamScore min reg</th>\n",
1669
+ " <th>TeamScore max reg</th>\n",
1670
+ " <th>TeamScore std reg</th>\n",
1671
+ " <th>TeamScore median reg</th>\n",
1672
+ " <th>TeamScore mean reg</th>\n",
1673
+ " <th>OppScore min reg</th>\n",
1674
+ " <th>OppScore max reg</th>\n",
1675
  " <th>...</th>\n",
1676
+ " <th>Win min tourney</th>\n",
1677
+ " <th>Win max tourney</th>\n",
1678
+ " <th>Win std tourney</th>\n",
1679
+ " <th>Win median tourney</th>\n",
1680
+ " <th>Win mean tourney</th>\n",
1681
  " <th>ConfAbbrev</th>\n",
1682
  " <th>Seed</th>\n",
1683
  " <th>TeamName</th>\n",
 
1691
  " <td>1437</td>\n",
1692
  " <td>2011</td>\n",
1693
  " <td>M</td>\n",
1694
+ " <td>50</td>\n",
1695
+ " <td>88</td>\n",
1696
+ " <td>9.450156</td>\n",
1697
+ " <td>73.0</td>\n",
1698
+ " <td>72.718750</td>\n",
1699
+ " <td>36</td>\n",
1700
+ " <td>93</td>\n",
1701
  " <td>...</td>\n",
1702
  " <td>0</td>\n",
1703
  " <td>0</td>\n",
 
1716
  " <td>2013</td>\n",
1717
  " <td>M</td>\n",
1718
  " <td>56</td>\n",
1719
+ " <td>88</td>\n",
1720
+ " <td>7.121073</td>\n",
1721
+ " <td>66.0</td>\n",
1722
+ " <td>67.250000</td>\n",
1723
+ " <td>42</td>\n",
1724
+ " <td>79</td>\n",
1725
  " <td>...</td>\n",
1726
  " <td>0</td>\n",
1727
  " <td>0</td>\n",
 
1739
  " <td>1417</td>\n",
1740
  " <td>2015</td>\n",
1741
  " <td>M</td>\n",
1742
+ " <td>39</td>\n",
1743
+ " <td>113</td>\n",
1744
+ " <td>16.594427</td>\n",
1745
+ " <td>72.0</td>\n",
1746
+ " <td>72.000000</td>\n",
1747
+ " <td>45</td>\n",
1748
+ " <td>87</td>\n",
1749
  " <td>...</td>\n",
1750
  " <td>0</td>\n",
1751
  " <td>1</td>\n",
 
1763
  " <td>1305</td>\n",
1764
  " <td>2005</td>\n",
1765
  " <td>M</td>\n",
1766
+ " <td>52</td>\n",
1767
+ " <td>84</td>\n",
1768
+ " <td>8.720199</td>\n",
1769
+ " <td>72.0</td>\n",
1770
+ " <td>69.551724</td>\n",
1771
+ " <td>46</td>\n",
1772
+ " <td>85</td>\n",
1773
  " <td>...</td>\n",
1774
  " <td>0</td>\n",
1775
  " <td>1</td>\n",
 
1787
  " <td>1332</td>\n",
1788
  " <td>2019</td>\n",
1789
  " <td>M</td>\n",
1790
+ " <td>47</td>\n",
1791
+ " <td>84</td>\n",
1792
+ " <td>11.186577</td>\n",
1793
  " <td>72.0</td>\n",
1794
+ " <td>70.485714</td>\n",
1795
+ " <td>46</td>\n",
1796
+ " <td>90</td>\n",
1797
  " <td>...</td>\n",
1798
  " <td>0</td>\n",
1799
  " <td>1</td>\n",
 
1811
  " <td>1393</td>\n",
1812
  " <td>2018</td>\n",
1813
  " <td>M</td>\n",
1814
+ " <td>44</td>\n",
1815
+ " <td>90</td>\n",
1816
+ " <td>11.519253</td>\n",
1817
+ " <td>70.0</td>\n",
1818
+ " <td>67.545455</td>\n",
1819
+ " <td>45</td>\n",
1820
+ " <td>101</td>\n",
1821
  " <td>...</td>\n",
1822
  " <td>0</td>\n",
1823
  " <td>1</td>\n",
 
1835
  " <td>1373</td>\n",
1836
  " <td>2010</td>\n",
1837
  " <td>M</td>\n",
1838
+ " <td>53</td>\n",
1839
+ " <td>99</td>\n",
1840
+ " <td>10.164555</td>\n",
1841
+ " <td>76.0</td>\n",
1842
+ " <td>75.454545</td>\n",
1843
+ " <td>51</td>\n",
1844
+ " <td>87</td>\n",
1845
  " <td>...</td>\n",
1846
  " <td>0</td>\n",
1847
  " <td>0</td>\n",
 
1859
  " <td>1425</td>\n",
1860
  " <td>2022</td>\n",
1861
  " <td>M</td>\n",
1862
+ " <td>58</td>\n",
1863
+ " <td>98</td>\n",
1864
+ " <td>10.506041</td>\n",
1865
+ " <td>70.0</td>\n",
1866
+ " <td>72.575758</td>\n",
1867
+ " <td>43</td>\n",
1868
+ " <td>91</td>\n",
1869
  " <td>...</td>\n",
1870
  " <td>0</td>\n",
1871
  " <td>0</td>\n",
 
1883
  " <td>1234</td>\n",
1884
  " <td>2015</td>\n",
1885
  " <td>M</td>\n",
1886
+ " <td>44</td>\n",
1887
+ " <td>90</td>\n",
1888
+ " <td>10.216556</td>\n",
1889
+ " <td>70.5</td>\n",
1890
+ " <td>69.406250</td>\n",
1891
+ " <td>44</td>\n",
1892
+ " <td>90</td>\n",
1893
  " <td>...</td>\n",
1894
  " <td>0</td>\n",
1895
  " <td>1</td>\n",
 
1907
  " <td>1386</td>\n",
1908
  " <td>2008</td>\n",
1909
  " <td>M</td>\n",
1910
+ " <td>55</td>\n",
1911
+ " <td>98</td>\n",
1912
+ " <td>10.801234</td>\n",
1913
+ " <td>72.0</td>\n",
1914
+ " <td>73.333333</td>\n",
1915
+ " <td>42</td>\n",
1916
+ " <td>102</td>\n",
1917
  " <td>...</td>\n",
1918
  " <td>0</td>\n",
1919
  " <td>0</td>\n",
 
1928
  " </tr>\n",
1929
  " </tbody>\n",
1930
  "</table>\n",
1931
+ "<p>10 rows × 318 columns</p>\n",
1932
  "</div>"
1933
  ],
1934
  "text/plain": [
1935
+ " TeamID Season League TeamScore min reg TeamScore max reg \\\n",
1936
+ "1202 1437 2011 M 50 88 \n",
1937
+ "109 1137 2013 M 56 88 \n",
1938
+ "1105 1417 2015 M 39 113 \n",
1939
+ "700 1305 2005 M 52 84 \n",
1940
+ "834 1332 2019 M 47 84 \n",
1941
+ "1009 1393 2018 M 44 90 \n",
1942
+ "953 1373 2010 M 53 99 \n",
1943
+ "1141 1425 2022 M 58 98 \n",
1944
+ "419 1234 2015 M 44 90 \n",
1945
+ "973 1386 2008 M 55 98 \n",
1946
  "\n",
1947
+ " TeamScore std reg TeamScore median reg TeamScore mean reg \\\n",
1948
+ "1202 9.450156 73.0 72.718750 \n",
1949
+ "109 7.121073 66.0 67.250000 \n",
1950
+ "1105 16.594427 72.0 72.000000 \n",
1951
+ "700 8.720199 72.0 69.551724 \n",
1952
+ "834 11.186577 72.0 70.485714 \n",
1953
+ "1009 11.519253 70.0 67.545455 \n",
1954
+ "953 10.164555 76.0 75.454545 \n",
1955
+ "1141 10.506041 70.0 72.575758 \n",
1956
+ "419 10.216556 70.5 69.406250 \n",
1957
+ "973 10.801234 72.0 73.333333 \n",
1958
  "\n",
1959
+ " OppScore min reg OppScore max reg ... Win min tourney \\\n",
1960
+ "1202 36 93 ... 0 \n",
1961
+ "109 42 79 ... 0 \n",
1962
+ "1105 45 87 ... 0 \n",
1963
+ "700 46 85 ... 0 \n",
1964
+ "834 46 90 ... 0 \n",
1965
+ "1009 45 101 ... 0 \n",
1966
+ "953 51 87 ... 0 \n",
1967
+ "1141 43 91 ... 0 \n",
1968
+ "419 44 90 ... 0 \n",
1969
+ "973 42 102 ... 0 \n",
1970
+ "\n",
1971
+ " Win max tourney Win std tourney Win median tourney Win mean tourney \\\n",
1972
+ "1202 0 NaN 0.0 0.000000 \n",
1973
+ "109 0 NaN 0.0 0.000000 \n",
1974
+ "1105 1 0.577350 1.0 0.666667 \n",
1975
+ "700 1 0.707107 0.5 0.500000 \n",
1976
+ "834 1 0.577350 1.0 0.666667 \n",
1977
+ "1009 1 0.500000 1.0 0.750000 \n",
1978
+ "953 0 NaN 0.0 0.000000 \n",
1979
+ "1141 0 NaN 0.0 0.000000 \n",
1980
+ "419 1 0.707107 0.5 0.500000 \n",
1981
+ "973 0 NaN 0.0 0.000000 \n",
1982
  "\n",
1983
+ " ConfAbbrev Seed TeamName FirstD1Season LastD1Season \n",
1984
+ "1202 big_east W09 Villanova 1985 2024 \n",
1985
+ "109 patriot W11 Bucknell 1985 2024 \n",
1986
+ "1105 pac_twelve X11 UCLA 1985 2024 \n",
1987
+ "700 wac X09 Nevada 1985 2024 \n",
1988
+ "834 pac_twelve Z12 Oregon 1985 2024 \n",
1989
+ "1009 acc X11b Syracuse 1985 2024 \n",
1990
+ "953 maac X13 Siena 1985 2024 \n",
1991
+ "1141 pac_twelve Y07 USC 1985 2024 \n",
1992
+ "419 big_ten X07 Iowa 1985 2024 \n",
1993
+ "973 a_ten W11 St Joseph's PA 1985 2024 \n",
1994
  "\n",
1995
+ "[10 rows x 318 columns]"
1996
  ]
1997
  },
1998
+ "execution_count": 15,
1999
  "metadata": {},
2000
  "output_type": "execute_result"
2001
  }
2002
  ],
2003
  "source": [
2004
+ "# merge the team_conf_seeds_df with team attributes into the aggregated data\n",
2005
+ "\n",
2006
+ "# team_reg_agg_df = team_reg_agg.merge(right=team_conf_seeds_df, on=[\"TeamID\", \"Season\", \"League\"])\n",
2007
+ "# team_tourney_agg_df = team_tourney_agg.merge(right=team_conf_seeds_df, on=[\"TeamID\", \"Season\", \"League\"])\n",
2008
+ "\n",
2009
+ "team_agg_df = pd.merge(\n",
2010
+ " left=team_agg_df,\n",
2011
+ " right=team_conf_seeds_df,\n",
2012
+ " on=[\"TeamID\", \"Season\", \"League\"],\n",
2013
+ ")\n",
2014
+ "\n",
2015
+ "team_agg_df.sample(10, random_state=10)"
 
 
 
 
 
 
2016
  ]
2017
  },
2018
  {
 
2057
  " <th>Win std tourney</th>\n",
2058
  " <th>Win median tourney</th>\n",
2059
  " <th>Win mean tourney</th>\n",
2060
+ " <th>ConfAbbrev</th>\n",
2061
+ " <th>Seed</th>\n",
2062
+ " <th>TeamName</th>\n",
2063
+ " <th>FirstD1Season</th>\n",
2064
+ " <th>LastD1Season</th>\n",
2065
  " </tr>\n",
2066
  " </thead>\n",
2067
  " <tbody>\n",
 
2307
  " </tr>\n",
2308
  " </tbody>\n",
2309
  "</table>\n",
2310
+ "<p>10 rows × 318 columns</p>\n",
2311
  "</div>"
2312
  ],
2313
  "text/plain": [
 
2359
  "891 0 NaN 0.0 0.000000 \n",
2360
  "559 1 0.707107 0.5 0.500000 \n",
2361
  "\n",
2362
+ " ConfAbbrev Seed TeamName FirstD1Season LastD1Season \n",
2363
+ "409 maac Z15 Iona 1985 2024 \n",
2364
+ "1189 aec Y13 Vermont 1985 2024 \n",
2365
+ "1275 mvc Z09 Wichita St 1985 2024 \n",
2366
+ "487 sec Z02 Kentucky 1985 2024 \n",
2367
+ "1135 pac_ten X06 USC 1985 2024 \n",
2368
+ "556 acc Y04 Maryland 1985 2024 \n",
2369
+ "368 aec Z16 Hartford 1985 2023 \n",
2370
+ "1037 sec W04 Tennessee 1985 2024 \n",
2371
+ "891 big_ten W01 Purdue 1985 2024 \n",
2372
+ "559 big_ten Y04 Maryland 1985 2024 \n",
 
 
 
 
 
 
 
 
 
 
 
 
2373
  "\n",
2374
+ "[10 rows x 318 columns]"
2375
  ]
2376
  },
2377
  "execution_count": 16,