Commit
•
ba98f59
1
Parent(s):
82c833d
got rid of unnessessary tournament and regular season split attributes
Browse files- data/AllTeamsAgg.csv +2 -2
- src/nn.ipynb +78 -0
- src/pre_processing.ipynb +198 -548
data/AllTeamsAgg.csv
CHANGED
@@ -1,3 +1,3 @@
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
-
oid sha256:
|
3 |
-
size
|
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:08b49367eed1f1591006101b89cf44461ef268128d5865e8df4a3f708f8342c7
|
3 |
+
size 2913054
|
src/nn.ipynb
ADDED
@@ -0,0 +1,78 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
{
|
2 |
+
"cells": [
|
3 |
+
{
|
4 |
+
"cell_type": "code",
|
5 |
+
"execution_count": 5,
|
6 |
+
"metadata": {},
|
7 |
+
"outputs": [],
|
8 |
+
"source": [
|
9 |
+
"import pandas as pd\n",
|
10 |
+
"import torch\n",
|
11 |
+
"import torch.nn as nn\n",
|
12 |
+
"import torch.optim as optim\n",
|
13 |
+
"from sklearn.model_selection import train_test_split\n",
|
14 |
+
"import os\n",
|
15 |
+
"\n",
|
16 |
+
"DATA_DIR = os.path.join(\"..\", \"data\")"
|
17 |
+
]
|
18 |
+
},
|
19 |
+
{
|
20 |
+
"cell_type": "code",
|
21 |
+
"execution_count": 7,
|
22 |
+
"metadata": {},
|
23 |
+
"outputs": [
|
24 |
+
{
|
25 |
+
"name": "stdout",
|
26 |
+
"output_type": "stream",
|
27 |
+
"text": [
|
28 |
+
"<class 'pandas.core.frame.DataFrame'>\n",
|
29 |
+
"RangeIndex: 1335 entries, 0 to 1334\n",
|
30 |
+
"Columns: 324 entries, Unnamed: 0 to LastD1Season tourney\n",
|
31 |
+
"dtypes: float64(186), int64(131), object(7)\n",
|
32 |
+
"memory usage: 3.3+ MB\n"
|
33 |
+
]
|
34 |
+
}
|
35 |
+
],
|
36 |
+
"source": [
|
37 |
+
"all_teams_agg_df = pd.read_csv(os.path.join(DATA_DIR, \"AllTeamsAgg.csv\"))\n",
|
38 |
+
"\n",
|
39 |
+
"all_teams_agg_df.info()"
|
40 |
+
]
|
41 |
+
},
|
42 |
+
{
|
43 |
+
"cell_type": "code",
|
44 |
+
"execution_count": null,
|
45 |
+
"metadata": {},
|
46 |
+
"outputs": [],
|
47 |
+
"source": [
|
48 |
+
"def get_device() -> str:\n",
|
49 |
+
" if torch.cuda.is_available():\n",
|
50 |
+
" return \"cuda\"\n",
|
51 |
+
" return \"cpu\"\n",
|
52 |
+
"\n",
|
53 |
+
"DEVICE = get_device() "
|
54 |
+
]
|
55 |
+
}
|
56 |
+
],
|
57 |
+
"metadata": {
|
58 |
+
"kernelspec": {
|
59 |
+
"display_name": "Python 3",
|
60 |
+
"language": "python",
|
61 |
+
"name": "python3"
|
62 |
+
},
|
63 |
+
"language_info": {
|
64 |
+
"codemirror_mode": {
|
65 |
+
"name": "ipython",
|
66 |
+
"version": 3
|
67 |
+
},
|
68 |
+
"file_extension": ".py",
|
69 |
+
"mimetype": "text/x-python",
|
70 |
+
"name": "python",
|
71 |
+
"nbconvert_exporter": "python",
|
72 |
+
"pygments_lexer": "ipython3",
|
73 |
+
"version": "3.11.7"
|
74 |
+
}
|
75 |
+
},
|
76 |
+
"nbformat": 4,
|
77 |
+
"nbformat_minor": 2
|
78 |
+
}
|
src/pre_processing.ipynb
CHANGED
@@ -2,7 +2,7 @@
|
|
2 |
"cells": [
|
3 |
{
|
4 |
"cell_type": "code",
|
5 |
-
"execution_count":
|
6 |
"metadata": {},
|
7 |
"outputs": [],
|
8 |
"source": [
|
@@ -15,7 +15,7 @@
|
|
15 |
},
|
16 |
{
|
17 |
"cell_type": "code",
|
18 |
-
"execution_count":
|
19 |
"metadata": {},
|
20 |
"outputs": [
|
21 |
{
|
@@ -206,7 +206,7 @@
|
|
206 |
"[5 rows x 35 columns]"
|
207 |
]
|
208 |
},
|
209 |
-
"execution_count":
|
210 |
"metadata": {},
|
211 |
"output_type": "execute_result"
|
212 |
}
|
@@ -222,7 +222,7 @@
|
|
222 |
},
|
223 |
{
|
224 |
"cell_type": "code",
|
225 |
-
"execution_count":
|
226 |
"metadata": {},
|
227 |
"outputs": [
|
228 |
{
|
@@ -413,7 +413,7 @@
|
|
413 |
"[5 rows x 35 columns]"
|
414 |
]
|
415 |
},
|
416 |
-
"execution_count":
|
417 |
"metadata": {},
|
418 |
"output_type": "execute_result"
|
419 |
}
|
@@ -436,7 +436,7 @@
|
|
436 |
},
|
437 |
{
|
438 |
"cell_type": "code",
|
439 |
-
"execution_count":
|
440 |
"metadata": {},
|
441 |
"outputs": [],
|
442 |
"source": [
|
@@ -466,7 +466,7 @@
|
|
466 |
},
|
467 |
{
|
468 |
"cell_type": "code",
|
469 |
-
"execution_count":
|
470 |
"metadata": {},
|
471 |
"outputs": [
|
472 |
{
|
@@ -540,7 +540,7 @@
|
|
540 |
},
|
541 |
{
|
542 |
"cell_type": "code",
|
543 |
-
"execution_count":
|
544 |
"metadata": {},
|
545 |
"outputs": [
|
546 |
{
|
@@ -613,7 +613,7 @@
|
|
613 |
},
|
614 |
{
|
615 |
"cell_type": "code",
|
616 |
-
"execution_count":
|
617 |
"metadata": {},
|
618 |
"outputs": [],
|
619 |
"source": [
|
@@ -630,7 +630,7 @@
|
|
630 |
},
|
631 |
{
|
632 |
"cell_type": "code",
|
633 |
-
"execution_count":
|
634 |
"metadata": {},
|
635 |
"outputs": [],
|
636 |
"source": [
|
@@ -654,7 +654,7 @@
|
|
654 |
},
|
655 |
{
|
656 |
"cell_type": "code",
|
657 |
-
"execution_count":
|
658 |
"metadata": {},
|
659 |
"outputs": [],
|
660 |
"source": [
|
@@ -680,7 +680,7 @@
|
|
680 |
},
|
681 |
{
|
682 |
"cell_type": "code",
|
683 |
-
"execution_count":
|
684 |
"metadata": {
|
685 |
"tags": []
|
686 |
},
|
@@ -1027,7 +1027,7 @@
|
|
1027 |
"[10 rows x 158 columns]"
|
1028 |
]
|
1029 |
},
|
1030 |
-
"execution_count":
|
1031 |
"metadata": {},
|
1032 |
"output_type": "execute_result"
|
1033 |
}
|
@@ -1046,7 +1046,7 @@
|
|
1046 |
},
|
1047 |
{
|
1048 |
"cell_type": "code",
|
1049 |
-
"execution_count":
|
1050 |
"metadata": {},
|
1051 |
"outputs": [
|
1052 |
{
|
@@ -1391,7 +1391,7 @@
|
|
1391 |
"[10 rows x 158 columns]"
|
1392 |
]
|
1393 |
},
|
1394 |
-
"execution_count":
|
1395 |
"metadata": {},
|
1396 |
"output_type": "execute_result"
|
1397 |
}
|
@@ -1419,7 +1419,7 @@
|
|
1419 |
},
|
1420 |
{
|
1421 |
"cell_type": "code",
|
1422 |
-
"execution_count":
|
1423 |
"metadata": {},
|
1424 |
"outputs": [
|
1425 |
{
|
@@ -1594,7 +1594,7 @@
|
|
1594 |
"1998 2024 "
|
1595 |
]
|
1596 |
},
|
1597 |
-
"execution_count":
|
1598 |
"metadata": {},
|
1599 |
"output_type": "execute_result"
|
1600 |
}
|
@@ -1621,368 +1621,24 @@
|
|
1621 |
},
|
1622 |
{
|
1623 |
"cell_type": "code",
|
1624 |
-
"execution_count":
|
1625 |
"metadata": {},
|
1626 |
-
"outputs": [
|
1627 |
-
{
|
1628 |
-
"data": {
|
1629 |
-
"text/html": [
|
1630 |
-
"<div>\n",
|
1631 |
-
"<style scoped>\n",
|
1632 |
-
" .dataframe tbody tr th:only-of-type {\n",
|
1633 |
-
" vertical-align: middle;\n",
|
1634 |
-
" }\n",
|
1635 |
-
"\n",
|
1636 |
-
" .dataframe tbody tr th {\n",
|
1637 |
-
" vertical-align: top;\n",
|
1638 |
-
" }\n",
|
1639 |
-
"\n",
|
1640 |
-
" .dataframe thead th {\n",
|
1641 |
-
" text-align: right;\n",
|
1642 |
-
" }\n",
|
1643 |
-
"</style>\n",
|
1644 |
-
"<table border=\"1\" class=\"dataframe\">\n",
|
1645 |
-
" <thead>\n",
|
1646 |
-
" <tr style=\"text-align: right;\">\n",
|
1647 |
-
" <th></th>\n",
|
1648 |
-
" <th>TeamID</th>\n",
|
1649 |
-
" <th>Season</th>\n",
|
1650 |
-
" <th>League</th>\n",
|
1651 |
-
" <th>TeamScore min</th>\n",
|
1652 |
-
" <th>TeamScore max</th>\n",
|
1653 |
-
" <th>TeamScore std</th>\n",
|
1654 |
-
" <th>TeamScore median</th>\n",
|
1655 |
-
" <th>TeamScore mean</th>\n",
|
1656 |
-
" <th>OppScore min</th>\n",
|
1657 |
-
" <th>OppScore max</th>\n",
|
1658 |
-
" <th>...</th>\n",
|
1659 |
-
" <th>Win min</th>\n",
|
1660 |
-
" <th>Win max</th>\n",
|
1661 |
-
" <th>Win std</th>\n",
|
1662 |
-
" <th>Win median</th>\n",
|
1663 |
-
" <th>Win mean</th>\n",
|
1664 |
-
" <th>ConfAbbrev</th>\n",
|
1665 |
-
" <th>Seed</th>\n",
|
1666 |
-
" <th>TeamName</th>\n",
|
1667 |
-
" <th>FirstD1Season</th>\n",
|
1668 |
-
" <th>LastD1Season</th>\n",
|
1669 |
-
" </tr>\n",
|
1670 |
-
" </thead>\n",
|
1671 |
-
" <tbody>\n",
|
1672 |
-
" <tr>\n",
|
1673 |
-
" <th>976</th>\n",
|
1674 |
-
" <td>1387</td>\n",
|
1675 |
-
" <td>2012</td>\n",
|
1676 |
-
" <td>M</td>\n",
|
1677 |
-
" <td>58</td>\n",
|
1678 |
-
" <td>86</td>\n",
|
1679 |
-
" <td>7.354612</td>\n",
|
1680 |
-
" <td>68.0</td>\n",
|
1681 |
-
" <td>69.096774</td>\n",
|
1682 |
-
" <td>35</td>\n",
|
1683 |
-
" <td>79</td>\n",
|
1684 |
-
" <td>...</td>\n",
|
1685 |
-
" <td>0</td>\n",
|
1686 |
-
" <td>1</td>\n",
|
1687 |
-
" <td>0.425024</td>\n",
|
1688 |
-
" <td>1.0</td>\n",
|
1689 |
-
" <td>0.774194</td>\n",
|
1690 |
-
" <td>a_ten</td>\n",
|
1691 |
-
" <td>Z09</td>\n",
|
1692 |
-
" <td>St Louis</td>\n",
|
1693 |
-
" <td>1985</td>\n",
|
1694 |
-
" <td>2024</td>\n",
|
1695 |
-
" </tr>\n",
|
1696 |
-
" <tr>\n",
|
1697 |
-
" <th>834</th>\n",
|
1698 |
-
" <td>1332</td>\n",
|
1699 |
-
" <td>2019</td>\n",
|
1700 |
-
" <td>M</td>\n",
|
1701 |
-
" <td>47</td>\n",
|
1702 |
-
" <td>84</td>\n",
|
1703 |
-
" <td>11.186577</td>\n",
|
1704 |
-
" <td>72.0</td>\n",
|
1705 |
-
" <td>70.485714</td>\n",
|
1706 |
-
" <td>46</td>\n",
|
1707 |
-
" <td>90</td>\n",
|
1708 |
-
" <td>...</td>\n",
|
1709 |
-
" <td>0</td>\n",
|
1710 |
-
" <td>1</td>\n",
|
1711 |
-
" <td>0.481594</td>\n",
|
1712 |
-
" <td>1.0</td>\n",
|
1713 |
-
" <td>0.657143</td>\n",
|
1714 |
-
" <td>pac_twelve</td>\n",
|
1715 |
-
" <td>Z12</td>\n",
|
1716 |
-
" <td>Oregon</td>\n",
|
1717 |
-
" <td>1985</td>\n",
|
1718 |
-
" <td>2024</td>\n",
|
1719 |
-
" </tr>\n",
|
1720 |
-
" <tr>\n",
|
1721 |
-
" <th>305</th>\n",
|
1722 |
-
" <td>1199</td>\n",
|
1723 |
-
" <td>2011</td>\n",
|
1724 |
-
" <td>M</td>\n",
|
1725 |
-
" <td>44</td>\n",
|
1726 |
-
" <td>97</td>\n",
|
1727 |
-
" <td>13.496634</td>\n",
|
1728 |
-
" <td>69.0</td>\n",
|
1729 |
-
" <td>69.322581</td>\n",
|
1730 |
-
" <td>38</td>\n",
|
1731 |
-
" <td>89</td>\n",
|
1732 |
-
" <td>...</td>\n",
|
1733 |
-
" <td>0</td>\n",
|
1734 |
-
" <td>1</td>\n",
|
1735 |
-
" <td>0.475191</td>\n",
|
1736 |
-
" <td>1.0</td>\n",
|
1737 |
-
" <td>0.677419</td>\n",
|
1738 |
-
" <td>acc</td>\n",
|
1739 |
-
" <td>Z10</td>\n",
|
1740 |
-
" <td>Florida St</td>\n",
|
1741 |
-
" <td>1985</td>\n",
|
1742 |
-
" <td>2024</td>\n",
|
1743 |
-
" </tr>\n",
|
1744 |
-
" <tr>\n",
|
1745 |
-
" <th>124</th>\n",
|
1746 |
-
" <td>1139</td>\n",
|
1747 |
-
" <td>2016</td>\n",
|
1748 |
-
" <td>M</td>\n",
|
1749 |
-
" <td>55</td>\n",
|
1750 |
-
" <td>144</td>\n",
|
1751 |
-
" <td>16.508128</td>\n",
|
1752 |
-
" <td>78.0</td>\n",
|
1753 |
-
" <td>80.580645</td>\n",
|
1754 |
-
" <td>52</td>\n",
|
1755 |
-
" <td>88</td>\n",
|
1756 |
-
" <td>...</td>\n",
|
1757 |
-
" <td>0</td>\n",
|
1758 |
-
" <td>1</td>\n",
|
1759 |
-
" <td>0.475191</td>\n",
|
1760 |
-
" <td>1.0</td>\n",
|
1761 |
-
" <td>0.677419</td>\n",
|
1762 |
-
" <td>big_east</td>\n",
|
1763 |
-
" <td>X09</td>\n",
|
1764 |
-
" <td>Butler</td>\n",
|
1765 |
-
" <td>1985</td>\n",
|
1766 |
-
" <td>2024</td>\n",
|
1767 |
-
" </tr>\n",
|
1768 |
-
" <tr>\n",
|
1769 |
-
" <th>1305</th>\n",
|
1770 |
-
" <td>1458</td>\n",
|
1771 |
-
" <td>2019</td>\n",
|
1772 |
-
" <td>M</td>\n",
|
1773 |
-
" <td>46</td>\n",
|
1774 |
-
" <td>101</td>\n",
|
1775 |
-
" <td>12.044032</td>\n",
|
1776 |
-
" <td>69.0</td>\n",
|
1777 |
-
" <td>69.060606</td>\n",
|
1778 |
-
" <td>45</td>\n",
|
1779 |
-
" <td>84</td>\n",
|
1780 |
-
" <td>...</td>\n",
|
1781 |
-
" <td>0</td>\n",
|
1782 |
-
" <td>1</td>\n",
|
1783 |
-
" <td>0.466694</td>\n",
|
1784 |
-
" <td>1.0</td>\n",
|
1785 |
-
" <td>0.696970</td>\n",
|
1786 |
-
" <td>big_ten</td>\n",
|
1787 |
-
" <td>Z05</td>\n",
|
1788 |
-
" <td>Wisconsin</td>\n",
|
1789 |
-
" <td>1985</td>\n",
|
1790 |
-
" <td>2024</td>\n",
|
1791 |
-
" </tr>\n",
|
1792 |
-
" <tr>\n",
|
1793 |
-
" <th>1174</th>\n",
|
1794 |
-
" <td>1433</td>\n",
|
1795 |
-
" <td>2019</td>\n",
|
1796 |
-
" <td>M</td>\n",
|
1797 |
-
" <td>49</td>\n",
|
1798 |
-
" <td>90</td>\n",
|
1799 |
-
" <td>10.441658</td>\n",
|
1800 |
-
" <td>70.5</td>\n",
|
1801 |
-
" <td>71.437500</td>\n",
|
1802 |
-
" <td>36</td>\n",
|
1803 |
-
" <td>87</td>\n",
|
1804 |
-
" <td>...</td>\n",
|
1805 |
-
" <td>0</td>\n",
|
1806 |
-
" <td>1</td>\n",
|
1807 |
-
" <td>0.420013</td>\n",
|
1808 |
-
" <td>1.0</td>\n",
|
1809 |
-
" <td>0.781250</td>\n",
|
1810 |
-
" <td>a_ten</td>\n",
|
1811 |
-
" <td>W08</td>\n",
|
1812 |
-
" <td>VCU</td>\n",
|
1813 |
-
" <td>1985</td>\n",
|
1814 |
-
" <td>2024</td>\n",
|
1815 |
-
" </tr>\n",
|
1816 |
-
" <tr>\n",
|
1817 |
-
" <th>567</th>\n",
|
1818 |
-
" <td>1272</td>\n",
|
1819 |
-
" <td>2004</td>\n",
|
1820 |
-
" <td>M</td>\n",
|
1821 |
-
" <td>60</td>\n",
|
1822 |
-
" <td>94</td>\n",
|
1823 |
-
" <td>10.228604</td>\n",
|
1824 |
-
" <td>73.0</td>\n",
|
1825 |
-
" <td>73.571429</td>\n",
|
1826 |
-
" <td>48</td>\n",
|
1827 |
-
" <td>85</td>\n",
|
1828 |
-
" <td>...</td>\n",
|
1829 |
-
" <td>0</td>\n",
|
1830 |
-
" <td>1</td>\n",
|
1831 |
-
" <td>0.440959</td>\n",
|
1832 |
-
" <td>1.0</td>\n",
|
1833 |
-
" <td>0.750000</td>\n",
|
1834 |
-
" <td>cusa</td>\n",
|
1835 |
-
" <td>Y07</td>\n",
|
1836 |
-
" <td>Memphis</td>\n",
|
1837 |
-
" <td>1985</td>\n",
|
1838 |
-
" <td>2024</td>\n",
|
1839 |
-
" </tr>\n",
|
1840 |
-
" <tr>\n",
|
1841 |
-
" <th>665</th>\n",
|
1842 |
-
" <td>1292</td>\n",
|
1843 |
-
" <td>2013</td>\n",
|
1844 |
-
" <td>M</td>\n",
|
1845 |
-
" <td>45</td>\n",
|
1846 |
-
" <td>97</td>\n",
|
1847 |
-
" <td>11.877053</td>\n",
|
1848 |
-
" <td>72.0</td>\n",
|
1849 |
-
" <td>71.242424</td>\n",
|
1850 |
-
" <td>41</td>\n",
|
1851 |
-
" <td>82</td>\n",
|
1852 |
-
" <td>...</td>\n",
|
1853 |
-
" <td>0</td>\n",
|
1854 |
-
" <td>1</td>\n",
|
1855 |
-
" <td>0.364110</td>\n",
|
1856 |
-
" <td>1.0</td>\n",
|
1857 |
-
" <td>0.848485</td>\n",
|
1858 |
-
" <td>sun_belt</td>\n",
|
1859 |
-
" <td>Y11a</td>\n",
|
1860 |
-
" <td>MTSU</td>\n",
|
1861 |
-
" <td>1985</td>\n",
|
1862 |
-
" <td>2024</td>\n",
|
1863 |
-
" </tr>\n",
|
1864 |
-
" <tr>\n",
|
1865 |
-
" <th>271</th>\n",
|
1866 |
-
" <td>1186</td>\n",
|
1867 |
-
" <td>2004</td>\n",
|
1868 |
-
" <td>M</td>\n",
|
1869 |
-
" <td>49</td>\n",
|
1870 |
-
" <td>100</td>\n",
|
1871 |
-
" <td>12.568700</td>\n",
|
1872 |
-
" <td>70.0</td>\n",
|
1873 |
-
" <td>70.250000</td>\n",
|
1874 |
-
" <td>52</td>\n",
|
1875 |
-
" <td>104</td>\n",
|
1876 |
-
" <td>...</td>\n",
|
1877 |
-
" <td>0</td>\n",
|
1878 |
-
" <td>1</td>\n",
|
1879 |
-
" <td>0.503953</td>\n",
|
1880 |
-
" <td>1.0</td>\n",
|
1881 |
-
" <td>0.571429</td>\n",
|
1882 |
-
" <td>big_sky</td>\n",
|
1883 |
-
" <td>Y15</td>\n",
|
1884 |
-
" <td>E Washington</td>\n",
|
1885 |
-
" <td>1985</td>\n",
|
1886 |
-
" <td>2024</td>\n",
|
1887 |
-
" </tr>\n",
|
1888 |
-
" <tr>\n",
|
1889 |
-
" <th>325</th>\n",
|
1890 |
-
" <td>1207</td>\n",
|
1891 |
-
" <td>2011</td>\n",
|
1892 |
-
" <td>M</td>\n",
|
1893 |
-
" <td>46</td>\n",
|
1894 |
-
" <td>111</td>\n",
|
1895 |
-
" <td>14.956568</td>\n",
|
1896 |
-
" <td>69.0</td>\n",
|
1897 |
-
" <td>71.032258</td>\n",
|
1898 |
-
" <td>51</td>\n",
|
1899 |
-
" <td>102</td>\n",
|
1900 |
-
" <td>...</td>\n",
|
1901 |
-
" <td>0</td>\n",
|
1902 |
-
" <td>1</td>\n",
|
1903 |
-
" <td>0.475191</td>\n",
|
1904 |
-
" <td>1.0</td>\n",
|
1905 |
-
" <td>0.677419</td>\n",
|
1906 |
-
" <td>big_east</td>\n",
|
1907 |
-
" <td>Z06</td>\n",
|
1908 |
-
" <td>Georgetown</td>\n",
|
1909 |
-
" <td>1985</td>\n",
|
1910 |
-
" <td>2024</td>\n",
|
1911 |
-
" </tr>\n",
|
1912 |
-
" </tbody>\n",
|
1913 |
-
"</table>\n",
|
1914 |
-
"<p>10 rows × 163 columns</p>\n",
|
1915 |
-
"</div>"
|
1916 |
-
],
|
1917 |
-
"text/plain": [
|
1918 |
-
" TeamID Season League TeamScore min TeamScore max TeamScore std \\\n",
|
1919 |
-
"976 1387 2012 M 58 86 7.354612 \n",
|
1920 |
-
"834 1332 2019 M 47 84 11.186577 \n",
|
1921 |
-
"305 1199 2011 M 44 97 13.496634 \n",
|
1922 |
-
"124 1139 2016 M 55 144 16.508128 \n",
|
1923 |
-
"1305 1458 2019 M 46 101 12.044032 \n",
|
1924 |
-
"1174 1433 2019 M 49 90 10.441658 \n",
|
1925 |
-
"567 1272 2004 M 60 94 10.228604 \n",
|
1926 |
-
"665 1292 2013 M 45 97 11.877053 \n",
|
1927 |
-
"271 1186 2004 M 49 100 12.568700 \n",
|
1928 |
-
"325 1207 2011 M 46 111 14.956568 \n",
|
1929 |
-
"\n",
|
1930 |
-
" TeamScore median TeamScore mean OppScore min OppScore max ... \\\n",
|
1931 |
-
"976 68.0 69.096774 35 79 ... \n",
|
1932 |
-
"834 72.0 70.485714 46 90 ... \n",
|
1933 |
-
"305 69.0 69.322581 38 89 ... \n",
|
1934 |
-
"124 78.0 80.580645 52 88 ... \n",
|
1935 |
-
"1305 69.0 69.060606 45 84 ... \n",
|
1936 |
-
"1174 70.5 71.437500 36 87 ... \n",
|
1937 |
-
"567 73.0 73.571429 48 85 ... \n",
|
1938 |
-
"665 72.0 71.242424 41 82 ... \n",
|
1939 |
-
"271 70.0 70.250000 52 104 ... \n",
|
1940 |
-
"325 69.0 71.032258 51 102 ... \n",
|
1941 |
-
"\n",
|
1942 |
-
" Win min Win max Win std Win median Win mean ConfAbbrev Seed \\\n",
|
1943 |
-
"976 0 1 0.425024 1.0 0.774194 a_ten Z09 \n",
|
1944 |
-
"834 0 1 0.481594 1.0 0.657143 pac_twelve Z12 \n",
|
1945 |
-
"305 0 1 0.475191 1.0 0.677419 acc Z10 \n",
|
1946 |
-
"124 0 1 0.475191 1.0 0.677419 big_east X09 \n",
|
1947 |
-
"1305 0 1 0.466694 1.0 0.696970 big_ten Z05 \n",
|
1948 |
-
"1174 0 1 0.420013 1.0 0.781250 a_ten W08 \n",
|
1949 |
-
"567 0 1 0.440959 1.0 0.750000 cusa Y07 \n",
|
1950 |
-
"665 0 1 0.364110 1.0 0.848485 sun_belt Y11a \n",
|
1951 |
-
"271 0 1 0.503953 1.0 0.571429 big_sky Y15 \n",
|
1952 |
-
"325 0 1 0.475191 1.0 0.677419 big_east Z06 \n",
|
1953 |
-
"\n",
|
1954 |
-
" TeamName FirstD1Season LastD1Season \n",
|
1955 |
-
"976 St Louis 1985 2024 \n",
|
1956 |
-
"834 Oregon 1985 2024 \n",
|
1957 |
-
"305 Florida St 1985 2024 \n",
|
1958 |
-
"124 Butler 1985 2024 \n",
|
1959 |
-
"1305 Wisconsin 1985 2024 \n",
|
1960 |
-
"1174 VCU 1985 2024 \n",
|
1961 |
-
"567 Memphis 1985 2024 \n",
|
1962 |
-
"665 MTSU 1985 2024 \n",
|
1963 |
-
"271 E Washington 1985 2024 \n",
|
1964 |
-
"325 Georgetown 1985 2024 \n",
|
1965 |
-
"\n",
|
1966 |
-
"[10 rows x 163 columns]"
|
1967 |
-
]
|
1968 |
-
},
|
1969 |
-
"execution_count": 13,
|
1970 |
-
"metadata": {},
|
1971 |
-
"output_type": "execute_result"
|
1972 |
-
}
|
1973 |
-
],
|
1974 |
"source": [
|
1975 |
-
"# merge the
|
1976 |
-
"\n",
|
1977 |
-
"
|
1978 |
-
"
|
1979 |
-
"\n",
|
1980 |
-
"
|
|
|
|
|
|
|
1981 |
]
|
1982 |
},
|
1983 |
{
|
1984 |
"cell_type": "code",
|
1985 |
-
"execution_count":
|
1986 |
"metadata": {},
|
1987 |
"outputs": [
|
1988 |
{
|
@@ -2009,19 +1665,19 @@
|
|
2009 |
" <th>TeamID</th>\n",
|
2010 |
" <th>Season</th>\n",
|
2011 |
" <th>League</th>\n",
|
2012 |
-
" <th>TeamScore min</th>\n",
|
2013 |
-
" <th>TeamScore max</th>\n",
|
2014 |
-
" <th>TeamScore std</th>\n",
|
2015 |
-
" <th>TeamScore median</th>\n",
|
2016 |
-
" <th>TeamScore mean</th>\n",
|
2017 |
-
" <th>OppScore min</th>\n",
|
2018 |
-
" <th>OppScore max</th>\n",
|
2019 |
" <th>...</th>\n",
|
2020 |
-
" <th>Win min</th>\n",
|
2021 |
-
" <th>Win max</th>\n",
|
2022 |
-
" <th>Win std</th>\n",
|
2023 |
-
" <th>Win median</th>\n",
|
2024 |
-
" <th>Win mean</th>\n",
|
2025 |
" <th>ConfAbbrev</th>\n",
|
2026 |
" <th>Seed</th>\n",
|
2027 |
" <th>TeamName</th>\n",
|
@@ -2035,13 +1691,13 @@
|
|
2035 |
" <td>1437</td>\n",
|
2036 |
" <td>2011</td>\n",
|
2037 |
" <td>M</td>\n",
|
2038 |
-
" <td>
|
2039 |
-
" <td>
|
2040 |
-
" <td>
|
2041 |
-
" <td>
|
2042 |
-
" <td>
|
2043 |
-
" <td>
|
2044 |
-
" <td>
|
2045 |
" <td>...</td>\n",
|
2046 |
" <td>0</td>\n",
|
2047 |
" <td>0</td>\n",
|
@@ -2060,12 +1716,12 @@
|
|
2060 |
" <td>2013</td>\n",
|
2061 |
" <td>M</td>\n",
|
2062 |
" <td>56</td>\n",
|
2063 |
-
" <td>
|
2064 |
-
" <td>
|
2065 |
-
" <td>
|
2066 |
-
" <td>
|
2067 |
-
" <td>
|
2068 |
-
" <td>
|
2069 |
" <td>...</td>\n",
|
2070 |
" <td>0</td>\n",
|
2071 |
" <td>0</td>\n",
|
@@ -2083,13 +1739,13 @@
|
|
2083 |
" <td>1417</td>\n",
|
2084 |
" <td>2015</td>\n",
|
2085 |
" <td>M</td>\n",
|
2086 |
-
" <td>
|
2087 |
-
" <td>
|
2088 |
-
" <td>
|
2089 |
-
" <td>
|
2090 |
-
" <td>
|
2091 |
-
" <td>
|
2092 |
-
" <td>
|
2093 |
" <td>...</td>\n",
|
2094 |
" <td>0</td>\n",
|
2095 |
" <td>1</td>\n",
|
@@ -2107,13 +1763,13 @@
|
|
2107 |
" <td>1305</td>\n",
|
2108 |
" <td>2005</td>\n",
|
2109 |
" <td>M</td>\n",
|
2110 |
-
" <td>
|
2111 |
-
" <td>
|
2112 |
-
" <td>
|
2113 |
-
" <td>
|
2114 |
-
" <td>
|
2115 |
-
" <td>
|
2116 |
-
" <td>
|
2117 |
" <td>...</td>\n",
|
2118 |
" <td>0</td>\n",
|
2119 |
" <td>1</td>\n",
|
@@ -2131,13 +1787,13 @@
|
|
2131 |
" <td>1332</td>\n",
|
2132 |
" <td>2019</td>\n",
|
2133 |
" <td>M</td>\n",
|
2134 |
-
" <td>
|
2135 |
-
" <td>
|
2136 |
-
" <td>
|
2137 |
" <td>72.0</td>\n",
|
2138 |
-
" <td>
|
2139 |
-
" <td>
|
2140 |
-
" <td>
|
2141 |
" <td>...</td>\n",
|
2142 |
" <td>0</td>\n",
|
2143 |
" <td>1</td>\n",
|
@@ -2155,13 +1811,13 @@
|
|
2155 |
" <td>1393</td>\n",
|
2156 |
" <td>2018</td>\n",
|
2157 |
" <td>M</td>\n",
|
2158 |
-
" <td>
|
2159 |
-
" <td>
|
2160 |
-
" <td>
|
2161 |
-
" <td>
|
2162 |
-
" <td>
|
2163 |
-
" <td>
|
2164 |
-
" <td>
|
2165 |
" <td>...</td>\n",
|
2166 |
" <td>0</td>\n",
|
2167 |
" <td>1</td>\n",
|
@@ -2179,13 +1835,13 @@
|
|
2179 |
" <td>1373</td>\n",
|
2180 |
" <td>2010</td>\n",
|
2181 |
" <td>M</td>\n",
|
2182 |
-
" <td>
|
2183 |
-
" <td>
|
2184 |
-
" <td>
|
2185 |
-
" <td>
|
2186 |
-
" <td>
|
2187 |
-
" <td>
|
2188 |
-
" <td>
|
2189 |
" <td>...</td>\n",
|
2190 |
" <td>0</td>\n",
|
2191 |
" <td>0</td>\n",
|
@@ -2203,13 +1859,13 @@
|
|
2203 |
" <td>1425</td>\n",
|
2204 |
" <td>2022</td>\n",
|
2205 |
" <td>M</td>\n",
|
2206 |
-
" <td>
|
2207 |
-
" <td>
|
2208 |
-
" <td>
|
2209 |
-
" <td>
|
2210 |
-
" <td>
|
2211 |
-
" <td>
|
2212 |
-
" <td>
|
2213 |
" <td>...</td>\n",
|
2214 |
" <td>0</td>\n",
|
2215 |
" <td>0</td>\n",
|
@@ -2227,13 +1883,13 @@
|
|
2227 |
" <td>1234</td>\n",
|
2228 |
" <td>2015</td>\n",
|
2229 |
" <td>M</td>\n",
|
2230 |
-
" <td>
|
2231 |
-
" <td>
|
2232 |
-
" <td>10.
|
2233 |
-
" <td>
|
2234 |
-
" <td>
|
2235 |
-
" <td>
|
2236 |
-
" <td>
|
2237 |
" <td>...</td>\n",
|
2238 |
" <td>0</td>\n",
|
2239 |
" <td>1</td>\n",
|
@@ -2251,13 +1907,13 @@
|
|
2251 |
" <td>1386</td>\n",
|
2252 |
" <td>2008</td>\n",
|
2253 |
" <td>M</td>\n",
|
2254 |
-
" <td>
|
2255 |
-
" <td>
|
2256 |
-
" <td>
|
2257 |
-
" <td>
|
2258 |
-
" <td>
|
2259 |
-
" <td>
|
2260 |
-
" <td>
|
2261 |
" <td>...</td>\n",
|
2262 |
" <td>0</td>\n",
|
2263 |
" <td>0</td>\n",
|
@@ -2272,85 +1928,91 @@
|
|
2272 |
" </tr>\n",
|
2273 |
" </tbody>\n",
|
2274 |
"</table>\n",
|
2275 |
-
"<p>10 rows ×
|
2276 |
"</div>"
|
2277 |
],
|
2278 |
"text/plain": [
|
2279 |
-
" TeamID Season League TeamScore min TeamScore max
|
2280 |
-
"1202 1437 2011 M
|
2281 |
-
"109 1137 2013 M
|
2282 |
-
"1105 1417 2015 M
|
2283 |
-
"700 1305 2005 M
|
2284 |
-
"834 1332 2019 M
|
2285 |
-
"1009 1393 2018 M
|
2286 |
-
"953 1373 2010 M
|
2287 |
-
"1141 1425 2022 M
|
2288 |
-
"419 1234 2015 M
|
2289 |
-
"973 1386 2008 M
|
2290 |
"\n",
|
2291 |
-
" TeamScore median TeamScore mean
|
2292 |
-
"1202
|
2293 |
-
"109
|
2294 |
-
"1105
|
2295 |
-
"700
|
2296 |
-
"834
|
2297 |
-
"1009
|
2298 |
-
"953
|
2299 |
-
"1141
|
2300 |
-
"419
|
2301 |
-
"973
|
2302 |
"\n",
|
2303 |
-
"
|
2304 |
-
"1202
|
2305 |
-
"109
|
2306 |
-
"1105
|
2307 |
-
"700
|
2308 |
-
"834
|
2309 |
-
"1009
|
2310 |
-
"953
|
2311 |
-
"1141
|
2312 |
-
"419
|
2313 |
-
"973
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
2314 |
"\n",
|
2315 |
-
"
|
2316 |
-
"1202 Villanova 1985 2024 \n",
|
2317 |
-
"109
|
2318 |
-
"1105 UCLA 1985 2024 \n",
|
2319 |
-
"700
|
2320 |
-
"834
|
2321 |
-
"1009 Syracuse 1985 2024 \n",
|
2322 |
-
"953
|
2323 |
-
"1141 USC 1985 2024 \n",
|
2324 |
-
"419
|
2325 |
-
"973 St Joseph's PA 1985 2024 \n",
|
2326 |
"\n",
|
2327 |
-
"[10 rows x
|
2328 |
]
|
2329 |
},
|
2330 |
-
"execution_count":
|
2331 |
"metadata": {},
|
2332 |
"output_type": "execute_result"
|
2333 |
}
|
2334 |
],
|
2335 |
"source": [
|
2336 |
-
"
|
2337 |
-
|
2338 |
-
|
2339 |
-
|
2340 |
-
|
2341 |
-
|
2342 |
-
|
2343 |
-
|
2344 |
-
|
2345 |
-
"
|
2346 |
-
"
|
2347 |
-
"
|
2348 |
-
" left=team_reg_agg_df, \n",
|
2349 |
-
" right=team_tourney_agg_df, \n",
|
2350 |
-
" on=[\"TeamID\", \"Season\", \"League\"], \n",
|
2351 |
-
" suffixes=(\" reg\", \" tourney\"),\n",
|
2352 |
-
" )\n",
|
2353 |
-
")"
|
2354 |
]
|
2355 |
},
|
2356 |
{
|
@@ -2395,11 +2057,11 @@
|
|
2395 |
" <th>Win std tourney</th>\n",
|
2396 |
" <th>Win median tourney</th>\n",
|
2397 |
" <th>Win mean tourney</th>\n",
|
2398 |
-
" <th>ConfAbbrev
|
2399 |
-
" <th>Seed
|
2400 |
-
" <th>TeamName
|
2401 |
-
" <th>FirstD1Season
|
2402 |
-
" <th>LastD1Season
|
2403 |
" </tr>\n",
|
2404 |
" </thead>\n",
|
2405 |
" <tbody>\n",
|
@@ -2645,7 +2307,7 @@
|
|
2645 |
" </tr>\n",
|
2646 |
" </tbody>\n",
|
2647 |
"</table>\n",
|
2648 |
-
"<p>10 rows ×
|
2649 |
"</div>"
|
2650 |
],
|
2651 |
"text/plain": [
|
@@ -2697,31 +2359,19 @@
|
|
2697 |
"891 0 NaN 0.0 0.000000 \n",
|
2698 |
"559 1 0.707107 0.5 0.500000 \n",
|
2699 |
"\n",
|
2700 |
-
" ConfAbbrev
|
2701 |
-
"409
|
2702 |
-
"1189
|
2703 |
-
"1275
|
2704 |
-
"487
|
2705 |
-
"1135
|
2706 |
-
"556
|
2707 |
-
"368
|
2708 |
-
"1037
|
2709 |
-
"891
|
2710 |
-
"559
|
2711 |
-
"\n",
|
2712 |
-
" FirstD1Season tourney LastD1Season tourney \n",
|
2713 |
-
"409 1985 2024 \n",
|
2714 |
-
"1189 1985 2024 \n",
|
2715 |
-
"1275 1985 2024 \n",
|
2716 |
-
"487 1985 2024 \n",
|
2717 |
-
"1135 1985 2024 \n",
|
2718 |
-
"556 1985 2024 \n",
|
2719 |
-
"368 1985 2023 \n",
|
2720 |
-
"1037 1985 2024 \n",
|
2721 |
-
"891 1985 2024 \n",
|
2722 |
-
"559 1985 2024 \n",
|
2723 |
"\n",
|
2724 |
-
"[10 rows x
|
2725 |
]
|
2726 |
},
|
2727 |
"execution_count": 16,
|
|
|
2 |
"cells": [
|
3 |
{
|
4 |
"cell_type": "code",
|
5 |
+
"execution_count": 2,
|
6 |
"metadata": {},
|
7 |
"outputs": [],
|
8 |
"source": [
|
|
|
15 |
},
|
16 |
{
|
17 |
"cell_type": "code",
|
18 |
+
"execution_count": 3,
|
19 |
"metadata": {},
|
20 |
"outputs": [
|
21 |
{
|
|
|
206 |
"[5 rows x 35 columns]"
|
207 |
]
|
208 |
},
|
209 |
+
"execution_count": 3,
|
210 |
"metadata": {},
|
211 |
"output_type": "execute_result"
|
212 |
}
|
|
|
222 |
},
|
223 |
{
|
224 |
"cell_type": "code",
|
225 |
+
"execution_count": 4,
|
226 |
"metadata": {},
|
227 |
"outputs": [
|
228 |
{
|
|
|
413 |
"[5 rows x 35 columns]"
|
414 |
]
|
415 |
},
|
416 |
+
"execution_count": 4,
|
417 |
"metadata": {},
|
418 |
"output_type": "execute_result"
|
419 |
}
|
|
|
436 |
},
|
437 |
{
|
438 |
"cell_type": "code",
|
439 |
+
"execution_count": 5,
|
440 |
"metadata": {},
|
441 |
"outputs": [],
|
442 |
"source": [
|
|
|
466 |
},
|
467 |
{
|
468 |
"cell_type": "code",
|
469 |
+
"execution_count": 6,
|
470 |
"metadata": {},
|
471 |
"outputs": [
|
472 |
{
|
|
|
540 |
},
|
541 |
{
|
542 |
"cell_type": "code",
|
543 |
+
"execution_count": 7,
|
544 |
"metadata": {},
|
545 |
"outputs": [
|
546 |
{
|
|
|
613 |
},
|
614 |
{
|
615 |
"cell_type": "code",
|
616 |
+
"execution_count": 8,
|
617 |
"metadata": {},
|
618 |
"outputs": [],
|
619 |
"source": [
|
|
|
630 |
},
|
631 |
{
|
632 |
"cell_type": "code",
|
633 |
+
"execution_count": 9,
|
634 |
"metadata": {},
|
635 |
"outputs": [],
|
636 |
"source": [
|
|
|
654 |
},
|
655 |
{
|
656 |
"cell_type": "code",
|
657 |
+
"execution_count": 10,
|
658 |
"metadata": {},
|
659 |
"outputs": [],
|
660 |
"source": [
|
|
|
680 |
},
|
681 |
{
|
682 |
"cell_type": "code",
|
683 |
+
"execution_count": 11,
|
684 |
"metadata": {
|
685 |
"tags": []
|
686 |
},
|
|
|
1027 |
"[10 rows x 158 columns]"
|
1028 |
]
|
1029 |
},
|
1030 |
+
"execution_count": 11,
|
1031 |
"metadata": {},
|
1032 |
"output_type": "execute_result"
|
1033 |
}
|
|
|
1046 |
},
|
1047 |
{
|
1048 |
"cell_type": "code",
|
1049 |
+
"execution_count": 12,
|
1050 |
"metadata": {},
|
1051 |
"outputs": [
|
1052 |
{
|
|
|
1391 |
"[10 rows x 158 columns]"
|
1392 |
]
|
1393 |
},
|
1394 |
+
"execution_count": 12,
|
1395 |
"metadata": {},
|
1396 |
"output_type": "execute_result"
|
1397 |
}
|
|
|
1419 |
},
|
1420 |
{
|
1421 |
"cell_type": "code",
|
1422 |
+
"execution_count": 13,
|
1423 |
"metadata": {},
|
1424 |
"outputs": [
|
1425 |
{
|
|
|
1594 |
"1998 2024 "
|
1595 |
]
|
1596 |
},
|
1597 |
+
"execution_count": 13,
|
1598 |
"metadata": {},
|
1599 |
"output_type": "execute_result"
|
1600 |
}
|
|
|
1621 |
},
|
1622 |
{
|
1623 |
"cell_type": "code",
|
1624 |
+
"execution_count": 14,
|
1625 |
"metadata": {},
|
1626 |
+
"outputs": [],
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1627 |
"source": [
|
1628 |
+
"# merge the tournament aggregated metrics with the regular season aggregated metrics\n",
|
1629 |
+
"team_agg_df = (\n",
|
1630 |
+
" pd.merge(\n",
|
1631 |
+
" left=team_reg_agg, \n",
|
1632 |
+
" right=team_tourney_agg, \n",
|
1633 |
+
" on=[\"TeamID\", \"Season\", \"League\"], \n",
|
1634 |
+
" suffixes=(\" reg\", \" tourney\"),\n",
|
1635 |
+
" )\n",
|
1636 |
+
")"
|
1637 |
]
|
1638 |
},
|
1639 |
{
|
1640 |
"cell_type": "code",
|
1641 |
+
"execution_count": 15,
|
1642 |
"metadata": {},
|
1643 |
"outputs": [
|
1644 |
{
|
|
|
1665 |
" <th>TeamID</th>\n",
|
1666 |
" <th>Season</th>\n",
|
1667 |
" <th>League</th>\n",
|
1668 |
+
" <th>TeamScore min reg</th>\n",
|
1669 |
+
" <th>TeamScore max reg</th>\n",
|
1670 |
+
" <th>TeamScore std reg</th>\n",
|
1671 |
+
" <th>TeamScore median reg</th>\n",
|
1672 |
+
" <th>TeamScore mean reg</th>\n",
|
1673 |
+
" <th>OppScore min reg</th>\n",
|
1674 |
+
" <th>OppScore max reg</th>\n",
|
1675 |
" <th>...</th>\n",
|
1676 |
+
" <th>Win min tourney</th>\n",
|
1677 |
+
" <th>Win max tourney</th>\n",
|
1678 |
+
" <th>Win std tourney</th>\n",
|
1679 |
+
" <th>Win median tourney</th>\n",
|
1680 |
+
" <th>Win mean tourney</th>\n",
|
1681 |
" <th>ConfAbbrev</th>\n",
|
1682 |
" <th>Seed</th>\n",
|
1683 |
" <th>TeamName</th>\n",
|
|
|
1691 |
" <td>1437</td>\n",
|
1692 |
" <td>2011</td>\n",
|
1693 |
" <td>M</td>\n",
|
1694 |
+
" <td>50</td>\n",
|
1695 |
+
" <td>88</td>\n",
|
1696 |
+
" <td>9.450156</td>\n",
|
1697 |
+
" <td>73.0</td>\n",
|
1698 |
+
" <td>72.718750</td>\n",
|
1699 |
+
" <td>36</td>\n",
|
1700 |
+
" <td>93</td>\n",
|
1701 |
" <td>...</td>\n",
|
1702 |
" <td>0</td>\n",
|
1703 |
" <td>0</td>\n",
|
|
|
1716 |
" <td>2013</td>\n",
|
1717 |
" <td>M</td>\n",
|
1718 |
" <td>56</td>\n",
|
1719 |
+
" <td>88</td>\n",
|
1720 |
+
" <td>7.121073</td>\n",
|
1721 |
+
" <td>66.0</td>\n",
|
1722 |
+
" <td>67.250000</td>\n",
|
1723 |
+
" <td>42</td>\n",
|
1724 |
+
" <td>79</td>\n",
|
1725 |
" <td>...</td>\n",
|
1726 |
" <td>0</td>\n",
|
1727 |
" <td>0</td>\n",
|
|
|
1739 |
" <td>1417</td>\n",
|
1740 |
" <td>2015</td>\n",
|
1741 |
" <td>M</td>\n",
|
1742 |
+
" <td>39</td>\n",
|
1743 |
+
" <td>113</td>\n",
|
1744 |
+
" <td>16.594427</td>\n",
|
1745 |
+
" <td>72.0</td>\n",
|
1746 |
+
" <td>72.000000</td>\n",
|
1747 |
+
" <td>45</td>\n",
|
1748 |
+
" <td>87</td>\n",
|
1749 |
" <td>...</td>\n",
|
1750 |
" <td>0</td>\n",
|
1751 |
" <td>1</td>\n",
|
|
|
1763 |
" <td>1305</td>\n",
|
1764 |
" <td>2005</td>\n",
|
1765 |
" <td>M</td>\n",
|
1766 |
+
" <td>52</td>\n",
|
1767 |
+
" <td>84</td>\n",
|
1768 |
+
" <td>8.720199</td>\n",
|
1769 |
+
" <td>72.0</td>\n",
|
1770 |
+
" <td>69.551724</td>\n",
|
1771 |
+
" <td>46</td>\n",
|
1772 |
+
" <td>85</td>\n",
|
1773 |
" <td>...</td>\n",
|
1774 |
" <td>0</td>\n",
|
1775 |
" <td>1</td>\n",
|
|
|
1787 |
" <td>1332</td>\n",
|
1788 |
" <td>2019</td>\n",
|
1789 |
" <td>M</td>\n",
|
1790 |
+
" <td>47</td>\n",
|
1791 |
+
" <td>84</td>\n",
|
1792 |
+
" <td>11.186577</td>\n",
|
1793 |
" <td>72.0</td>\n",
|
1794 |
+
" <td>70.485714</td>\n",
|
1795 |
+
" <td>46</td>\n",
|
1796 |
+
" <td>90</td>\n",
|
1797 |
" <td>...</td>\n",
|
1798 |
" <td>0</td>\n",
|
1799 |
" <td>1</td>\n",
|
|
|
1811 |
" <td>1393</td>\n",
|
1812 |
" <td>2018</td>\n",
|
1813 |
" <td>M</td>\n",
|
1814 |
+
" <td>44</td>\n",
|
1815 |
+
" <td>90</td>\n",
|
1816 |
+
" <td>11.519253</td>\n",
|
1817 |
+
" <td>70.0</td>\n",
|
1818 |
+
" <td>67.545455</td>\n",
|
1819 |
+
" <td>45</td>\n",
|
1820 |
+
" <td>101</td>\n",
|
1821 |
" <td>...</td>\n",
|
1822 |
" <td>0</td>\n",
|
1823 |
" <td>1</td>\n",
|
|
|
1835 |
" <td>1373</td>\n",
|
1836 |
" <td>2010</td>\n",
|
1837 |
" <td>M</td>\n",
|
1838 |
+
" <td>53</td>\n",
|
1839 |
+
" <td>99</td>\n",
|
1840 |
+
" <td>10.164555</td>\n",
|
1841 |
+
" <td>76.0</td>\n",
|
1842 |
+
" <td>75.454545</td>\n",
|
1843 |
+
" <td>51</td>\n",
|
1844 |
+
" <td>87</td>\n",
|
1845 |
" <td>...</td>\n",
|
1846 |
" <td>0</td>\n",
|
1847 |
" <td>0</td>\n",
|
|
|
1859 |
" <td>1425</td>\n",
|
1860 |
" <td>2022</td>\n",
|
1861 |
" <td>M</td>\n",
|
1862 |
+
" <td>58</td>\n",
|
1863 |
+
" <td>98</td>\n",
|
1864 |
+
" <td>10.506041</td>\n",
|
1865 |
+
" <td>70.0</td>\n",
|
1866 |
+
" <td>72.575758</td>\n",
|
1867 |
+
" <td>43</td>\n",
|
1868 |
+
" <td>91</td>\n",
|
1869 |
" <td>...</td>\n",
|
1870 |
" <td>0</td>\n",
|
1871 |
" <td>0</td>\n",
|
|
|
1883 |
" <td>1234</td>\n",
|
1884 |
" <td>2015</td>\n",
|
1885 |
" <td>M</td>\n",
|
1886 |
+
" <td>44</td>\n",
|
1887 |
+
" <td>90</td>\n",
|
1888 |
+
" <td>10.216556</td>\n",
|
1889 |
+
" <td>70.5</td>\n",
|
1890 |
+
" <td>69.406250</td>\n",
|
1891 |
+
" <td>44</td>\n",
|
1892 |
+
" <td>90</td>\n",
|
1893 |
" <td>...</td>\n",
|
1894 |
" <td>0</td>\n",
|
1895 |
" <td>1</td>\n",
|
|
|
1907 |
" <td>1386</td>\n",
|
1908 |
" <td>2008</td>\n",
|
1909 |
" <td>M</td>\n",
|
1910 |
+
" <td>55</td>\n",
|
1911 |
+
" <td>98</td>\n",
|
1912 |
+
" <td>10.801234</td>\n",
|
1913 |
+
" <td>72.0</td>\n",
|
1914 |
+
" <td>73.333333</td>\n",
|
1915 |
+
" <td>42</td>\n",
|
1916 |
+
" <td>102</td>\n",
|
1917 |
" <td>...</td>\n",
|
1918 |
" <td>0</td>\n",
|
1919 |
" <td>0</td>\n",
|
|
|
1928 |
" </tr>\n",
|
1929 |
" </tbody>\n",
|
1930 |
"</table>\n",
|
1931 |
+
"<p>10 rows × 318 columns</p>\n",
|
1932 |
"</div>"
|
1933 |
],
|
1934 |
"text/plain": [
|
1935 |
+
" TeamID Season League TeamScore min reg TeamScore max reg \\\n",
|
1936 |
+
"1202 1437 2011 M 50 88 \n",
|
1937 |
+
"109 1137 2013 M 56 88 \n",
|
1938 |
+
"1105 1417 2015 M 39 113 \n",
|
1939 |
+
"700 1305 2005 M 52 84 \n",
|
1940 |
+
"834 1332 2019 M 47 84 \n",
|
1941 |
+
"1009 1393 2018 M 44 90 \n",
|
1942 |
+
"953 1373 2010 M 53 99 \n",
|
1943 |
+
"1141 1425 2022 M 58 98 \n",
|
1944 |
+
"419 1234 2015 M 44 90 \n",
|
1945 |
+
"973 1386 2008 M 55 98 \n",
|
1946 |
"\n",
|
1947 |
+
" TeamScore std reg TeamScore median reg TeamScore mean reg \\\n",
|
1948 |
+
"1202 9.450156 73.0 72.718750 \n",
|
1949 |
+
"109 7.121073 66.0 67.250000 \n",
|
1950 |
+
"1105 16.594427 72.0 72.000000 \n",
|
1951 |
+
"700 8.720199 72.0 69.551724 \n",
|
1952 |
+
"834 11.186577 72.0 70.485714 \n",
|
1953 |
+
"1009 11.519253 70.0 67.545455 \n",
|
1954 |
+
"953 10.164555 76.0 75.454545 \n",
|
1955 |
+
"1141 10.506041 70.0 72.575758 \n",
|
1956 |
+
"419 10.216556 70.5 69.406250 \n",
|
1957 |
+
"973 10.801234 72.0 73.333333 \n",
|
1958 |
"\n",
|
1959 |
+
" OppScore min reg OppScore max reg ... Win min tourney \\\n",
|
1960 |
+
"1202 36 93 ... 0 \n",
|
1961 |
+
"109 42 79 ... 0 \n",
|
1962 |
+
"1105 45 87 ... 0 \n",
|
1963 |
+
"700 46 85 ... 0 \n",
|
1964 |
+
"834 46 90 ... 0 \n",
|
1965 |
+
"1009 45 101 ... 0 \n",
|
1966 |
+
"953 51 87 ... 0 \n",
|
1967 |
+
"1141 43 91 ... 0 \n",
|
1968 |
+
"419 44 90 ... 0 \n",
|
1969 |
+
"973 42 102 ... 0 \n",
|
1970 |
+
"\n",
|
1971 |
+
" Win max tourney Win std tourney Win median tourney Win mean tourney \\\n",
|
1972 |
+
"1202 0 NaN 0.0 0.000000 \n",
|
1973 |
+
"109 0 NaN 0.0 0.000000 \n",
|
1974 |
+
"1105 1 0.577350 1.0 0.666667 \n",
|
1975 |
+
"700 1 0.707107 0.5 0.500000 \n",
|
1976 |
+
"834 1 0.577350 1.0 0.666667 \n",
|
1977 |
+
"1009 1 0.500000 1.0 0.750000 \n",
|
1978 |
+
"953 0 NaN 0.0 0.000000 \n",
|
1979 |
+
"1141 0 NaN 0.0 0.000000 \n",
|
1980 |
+
"419 1 0.707107 0.5 0.500000 \n",
|
1981 |
+
"973 0 NaN 0.0 0.000000 \n",
|
1982 |
"\n",
|
1983 |
+
" ConfAbbrev Seed TeamName FirstD1Season LastD1Season \n",
|
1984 |
+
"1202 big_east W09 Villanova 1985 2024 \n",
|
1985 |
+
"109 patriot W11 Bucknell 1985 2024 \n",
|
1986 |
+
"1105 pac_twelve X11 UCLA 1985 2024 \n",
|
1987 |
+
"700 wac X09 Nevada 1985 2024 \n",
|
1988 |
+
"834 pac_twelve Z12 Oregon 1985 2024 \n",
|
1989 |
+
"1009 acc X11b Syracuse 1985 2024 \n",
|
1990 |
+
"953 maac X13 Siena 1985 2024 \n",
|
1991 |
+
"1141 pac_twelve Y07 USC 1985 2024 \n",
|
1992 |
+
"419 big_ten X07 Iowa 1985 2024 \n",
|
1993 |
+
"973 a_ten W11 St Joseph's PA 1985 2024 \n",
|
1994 |
"\n",
|
1995 |
+
"[10 rows x 318 columns]"
|
1996 |
]
|
1997 |
},
|
1998 |
+
"execution_count": 15,
|
1999 |
"metadata": {},
|
2000 |
"output_type": "execute_result"
|
2001 |
}
|
2002 |
],
|
2003 |
"source": [
|
2004 |
+
"# merge the team_conf_seeds_df with team attributes into the aggregated data\n",
|
2005 |
+
"\n",
|
2006 |
+
"# team_reg_agg_df = team_reg_agg.merge(right=team_conf_seeds_df, on=[\"TeamID\", \"Season\", \"League\"])\n",
|
2007 |
+
"# team_tourney_agg_df = team_tourney_agg.merge(right=team_conf_seeds_df, on=[\"TeamID\", \"Season\", \"League\"])\n",
|
2008 |
+
"\n",
|
2009 |
+
"team_agg_df = pd.merge(\n",
|
2010 |
+
" left=team_agg_df,\n",
|
2011 |
+
" right=team_conf_seeds_df,\n",
|
2012 |
+
" on=[\"TeamID\", \"Season\", \"League\"],\n",
|
2013 |
+
")\n",
|
2014 |
+
"\n",
|
2015 |
+
"team_agg_df.sample(10, random_state=10)"
|
|
|
|
|
|
|
|
|
|
|
|
|
2016 |
]
|
2017 |
},
|
2018 |
{
|
|
|
2057 |
" <th>Win std tourney</th>\n",
|
2058 |
" <th>Win median tourney</th>\n",
|
2059 |
" <th>Win mean tourney</th>\n",
|
2060 |
+
" <th>ConfAbbrev</th>\n",
|
2061 |
+
" <th>Seed</th>\n",
|
2062 |
+
" <th>TeamName</th>\n",
|
2063 |
+
" <th>FirstD1Season</th>\n",
|
2064 |
+
" <th>LastD1Season</th>\n",
|
2065 |
" </tr>\n",
|
2066 |
" </thead>\n",
|
2067 |
" <tbody>\n",
|
|
|
2307 |
" </tr>\n",
|
2308 |
" </tbody>\n",
|
2309 |
"</table>\n",
|
2310 |
+
"<p>10 rows × 318 columns</p>\n",
|
2311 |
"</div>"
|
2312 |
],
|
2313 |
"text/plain": [
|
|
|
2359 |
"891 0 NaN 0.0 0.000000 \n",
|
2360 |
"559 1 0.707107 0.5 0.500000 \n",
|
2361 |
"\n",
|
2362 |
+
" ConfAbbrev Seed TeamName FirstD1Season LastD1Season \n",
|
2363 |
+
"409 maac Z15 Iona 1985 2024 \n",
|
2364 |
+
"1189 aec Y13 Vermont 1985 2024 \n",
|
2365 |
+
"1275 mvc Z09 Wichita St 1985 2024 \n",
|
2366 |
+
"487 sec Z02 Kentucky 1985 2024 \n",
|
2367 |
+
"1135 pac_ten X06 USC 1985 2024 \n",
|
2368 |
+
"556 acc Y04 Maryland 1985 2024 \n",
|
2369 |
+
"368 aec Z16 Hartford 1985 2023 \n",
|
2370 |
+
"1037 sec W04 Tennessee 1985 2024 \n",
|
2371 |
+
"891 big_ten W01 Purdue 1985 2024 \n",
|
2372 |
+
"559 big_ten Y04 Maryland 1985 2024 \n",
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
2373 |
"\n",
|
2374 |
+
"[10 rows x 318 columns]"
|
2375 |
]
|
2376 |
},
|
2377 |
"execution_count": 16,
|