Jensen-holm
commited on
Commit
•
9c0cc03
1
Parent(s):
2a90490
columns that did not need to be aggregated were being aggregated. I
Browse files- src/pre_processing.ipynb +166 -162
src/pre_processing.ipynb
CHANGED
@@ -2,7 +2,7 @@
|
|
2 |
"cells": [
|
3 |
{
|
4 |
"cell_type": "code",
|
5 |
-
"execution_count":
|
6 |
"metadata": {},
|
7 |
"outputs": [],
|
8 |
"source": [
|
@@ -15,7 +15,7 @@
|
|
15 |
},
|
16 |
{
|
17 |
"cell_type": "code",
|
18 |
-
"execution_count":
|
19 |
"metadata": {},
|
20 |
"outputs": [
|
21 |
{
|
@@ -85,10 +85,12 @@
|
|
85 |
},
|
86 |
{
|
87 |
"cell_type": "code",
|
88 |
-
"execution_count":
|
89 |
"metadata": {},
|
90 |
"outputs": [],
|
91 |
"source": [
|
|
|
|
|
92 |
"\n",
|
93 |
"detailed_metrics = {\n",
|
94 |
" \"Score\",\n",
|
@@ -128,7 +130,7 @@
|
|
128 |
},
|
129 |
{
|
130 |
"cell_type": "code",
|
131 |
-
"execution_count":
|
132 |
"metadata": {},
|
133 |
"outputs": [
|
134 |
{
|
@@ -326,7 +328,7 @@
|
|
326 |
"[5 rows x 36 columns]"
|
327 |
]
|
328 |
},
|
329 |
-
"execution_count":
|
330 |
"metadata": {},
|
331 |
"output_type": "execute_result"
|
332 |
}
|
@@ -337,7 +339,7 @@
|
|
337 |
},
|
338 |
{
|
339 |
"cell_type": "code",
|
340 |
-
"execution_count":
|
341 |
"metadata": {},
|
342 |
"outputs": [],
|
343 |
"source": [
|
@@ -349,7 +351,7 @@
|
|
349 |
},
|
350 |
{
|
351 |
"cell_type": "code",
|
352 |
-
"execution_count":
|
353 |
"metadata": {},
|
354 |
"outputs": [
|
355 |
{
|
@@ -547,7 +549,7 @@
|
|
547 |
"[5 rows x 37 columns]"
|
548 |
]
|
549 |
},
|
550 |
-
"execution_count":
|
551 |
"metadata": {},
|
552 |
"output_type": "execute_result"
|
553 |
}
|
@@ -558,7 +560,7 @@
|
|
558 |
},
|
559 |
{
|
560 |
"cell_type": "code",
|
561 |
-
"execution_count":
|
562 |
"metadata": {},
|
563 |
"outputs": [
|
564 |
{
|
@@ -572,7 +574,7 @@
|
|
572 |
"Name: Win, dtype: int64"
|
573 |
]
|
574 |
},
|
575 |
-
"execution_count":
|
576 |
"metadata": {},
|
577 |
"output_type": "execute_result"
|
578 |
}
|
@@ -595,8 +597,36 @@
|
|
595 |
},
|
596 |
{
|
597 |
"cell_type": "code",
|
598 |
-
"execution_count":
|
599 |
"metadata": {},
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
600 |
"outputs": [
|
601 |
{
|
602 |
"data": {
|
@@ -622,13 +652,13 @@
|
|
622 |
" <th>TeamID</th>\n",
|
623 |
" <th>Season</th>\n",
|
624 |
" <th>League</th>\n",
|
625 |
-
" <th>
|
626 |
-
" <th>
|
627 |
-
" <th>
|
628 |
-
" <th>
|
629 |
-
" <th>
|
630 |
-
" <th>
|
631 |
-
" <th>
|
632 |
" <th>...</th>\n",
|
633 |
" <th>ScoreDiff min</th>\n",
|
634 |
" <th>ScoreDiff max</th>\n",
|
@@ -648,13 +678,13 @@
|
|
648 |
" <td>3430</td>\n",
|
649 |
" <td>2012</td>\n",
|
650 |
" <td>W</td>\n",
|
651 |
-
" <td>
|
652 |
-
" <td>
|
653 |
-
" <td>
|
654 |
-
" <td>
|
655 |
-
" <td>
|
656 |
-
" <td>
|
657 |
-
" <td>
|
658 |
" <td>...</td>\n",
|
659 |
" <td>-32</td>\n",
|
660 |
" <td>35</td>\n",
|
@@ -672,13 +702,13 @@
|
|
672 |
" <td>1431</td>\n",
|
673 |
" <td>2018</td>\n",
|
674 |
" <td>M</td>\n",
|
675 |
-
" <td>
|
676 |
-
" <td>
|
677 |
-
" <td>
|
678 |
-
" <td>
|
679 |
-
" <td>
|
680 |
-
" <td>
|
681 |
-
" <td>
|
682 |
" <td>...</td>\n",
|
683 |
" <td>-49</td>\n",
|
684 |
" <td>29</td>\n",
|
@@ -696,13 +726,13 @@
|
|
696 |
" <td>1315</td>\n",
|
697 |
" <td>2014</td>\n",
|
698 |
" <td>M</td>\n",
|
699 |
-
" <td>
|
700 |
-
" <td>
|
701 |
-
" <td>
|
702 |
-
" <td>
|
703 |
-
" <td>
|
704 |
-
" <td>
|
705 |
-
" <td>
|
706 |
" <td>...</td>\n",
|
707 |
" <td>-27</td>\n",
|
708 |
" <td>18</td>\n",
|
@@ -720,13 +750,13 @@
|
|
720 |
" <td>1307</td>\n",
|
721 |
" <td>2005</td>\n",
|
722 |
" <td>M</td>\n",
|
723 |
-
" <td>
|
724 |
-
" <td>
|
725 |
-
" <td>
|
726 |
-
" <td>
|
727 |
-
" <td>
|
728 |
-
" <td>
|
729 |
-
" <td>
|
730 |
" <td>...</td>\n",
|
731 |
" <td>-17</td>\n",
|
732 |
" <td>34</td>\n",
|
@@ -744,13 +774,13 @@
|
|
744 |
" <td>1266</td>\n",
|
745 |
" <td>2008</td>\n",
|
746 |
" <td>M</td>\n",
|
747 |
-
" <td>
|
748 |
-
" <td>
|
749 |
-
" <td>
|
750 |
-
" <td>
|
751 |
-
" <td>
|
752 |
-
" <td>
|
753 |
-
" <td>
|
754 |
" <td>...</td>\n",
|
755 |
" <td>-20</td>\n",
|
756 |
" <td>47</td>\n",
|
@@ -768,13 +798,13 @@
|
|
768 |
" <td>1352</td>\n",
|
769 |
" <td>2016</td>\n",
|
770 |
" <td>M</td>\n",
|
771 |
-
" <td>
|
772 |
-
" <td>
|
773 |
-
" <td>
|
774 |
-
" <td>
|
775 |
-
" <td>
|
776 |
-
" <td>
|
777 |
-
" <td>
|
778 |
" <td>...</td>\n",
|
779 |
" <td>-62</td>\n",
|
780 |
" <td>18</td>\n",
|
@@ -792,13 +822,13 @@
|
|
792 |
" <td>1194</td>\n",
|
793 |
" <td>2005</td>\n",
|
794 |
" <td>M</td>\n",
|
795 |
-
" <td>
|
796 |
-
" <td>
|
797 |
-
" <td>
|
798 |
-
" <td>
|
799 |
-
" <td>
|
800 |
-
" <td>
|
801 |
-
" <td>
|
802 |
" <td>...</td>\n",
|
803 |
" <td>-45</td>\n",
|
804 |
" <td>27</td>\n",
|
@@ -816,13 +846,13 @@
|
|
816 |
" <td>3270</td>\n",
|
817 |
" <td>2021</td>\n",
|
818 |
" <td>W</td>\n",
|
819 |
-
" <td>
|
820 |
-
" <td>
|
821 |
-
" <td>
|
822 |
-
" <td>
|
823 |
-
" <td>
|
824 |
-
" <td>
|
825 |
-
" <td>
|
826 |
" <td>...</td>\n",
|
827 |
" <td>-93</td>\n",
|
828 |
" <td>24</td>\n",
|
@@ -840,13 +870,13 @@
|
|
840 |
" <td>3240</td>\n",
|
841 |
" <td>2014</td>\n",
|
842 |
" <td>W</td>\n",
|
843 |
-
" <td>
|
844 |
-
" <td>
|
845 |
-
" <td>
|
846 |
-
" <td>
|
847 |
-
" <td>
|
848 |
-
" <td>
|
849 |
-
" <td>
|
850 |
" <td>...</td>\n",
|
851 |
" <td>-42</td>\n",
|
852 |
" <td>17</td>\n",
|
@@ -864,13 +894,13 @@
|
|
864 |
" <td>3452</td>\n",
|
865 |
" <td>2011</td>\n",
|
866 |
" <td>W</td>\n",
|
867 |
-
" <td>
|
868 |
-
" <td>
|
869 |
-
" <td>
|
870 |
-
" <td>
|
871 |
-
" <td>
|
872 |
-
" <td>
|
873 |
-
" <td>
|
874 |
" <td>...</td>\n",
|
875 |
" <td>-23</td>\n",
|
876 |
" <td>57</td>\n",
|
@@ -885,89 +915,70 @@
|
|
885 |
" </tr>\n",
|
886 |
" </tbody>\n",
|
887 |
"</table>\n",
|
888 |
-
"<p>10 rows ×
|
889 |
"</div>"
|
890 |
],
|
891 |
"text/plain": [
|
892 |
-
" TeamID Season League
|
893 |
-
"12348 3430 2012 W
|
894 |
-
"6900 1431 2018 M
|
895 |
-
"4406 1315 2014 M
|
896 |
-
"4233 1307 2005 M
|
897 |
-
"3407 1266 2008 M
|
898 |
-
"5190 1352 2016 M
|
899 |
-
"1892 1194 2005 M
|
900 |
-
"10020 3270 2021 W
|
901 |
-
"9567 3240 2014 W
|
902 |
-
"12617 3452 2011 W
|
903 |
"\n",
|
904 |
-
"
|
905 |
-
"12348
|
906 |
-
"6900
|
907 |
-
"4406
|
908 |
-
"4233
|
909 |
-
"3407
|
910 |
-
"5190
|
911 |
-
"1892
|
912 |
-
"10020
|
913 |
-
"9567
|
914 |
-
"12617
|
915 |
"\n",
|
916 |
-
" ScoreDiff max ScoreDiff std ScoreDiff median
|
917 |
-
"12348 35 16.997102 -1.0
|
918 |
-
"6900
|
919 |
-
"4406
|
920 |
-
"4233
|
921 |
-
"3407
|
922 |
-
"5190
|
923 |
-
"1892
|
924 |
-
"10020 24 27.245445 -15.0
|
925 |
-
"9567
|
926 |
-
"12617 57 18.777131 13.5
|
927 |
"\n",
|
928 |
-
" Win min Win max Win std Win median Win mean \n",
|
929 |
-
"12348 0 1 0.508548 0.0 0.482759 \n",
|
930 |
-
"6900
|
931 |
-
"4406
|
932 |
-
"4233
|
933 |
-
"3407
|
934 |
-
"5190
|
935 |
-
"1892
|
936 |
-
"10020 0 1 0.462910 0.0 0.285714 \n",
|
937 |
-
"9567
|
938 |
-
"12617 0 1 0.456803 1.0 0.718750 \n",
|
939 |
"\n",
|
940 |
-
"[10 rows x
|
941 |
]
|
942 |
},
|
943 |
-
"execution_count":
|
944 |
"metadata": {},
|
945 |
"output_type": "execute_result"
|
946 |
}
|
947 |
],
|
948 |
"source": [
|
949 |
-
"exclude_agg_cols = {\n",
|
950 |
-
" \"TeamID\",\n",
|
951 |
-
" \"Season\",\n",
|
952 |
-
" \"League\",\n",
|
953 |
-
" \"GameResult\",\n",
|
954 |
-
" \"OppLoc\",\n",
|
955 |
-
" \"TeamLoc\",\n",
|
956 |
-
"}\n",
|
957 |
-
"\n",
|
958 |
-
"agg_funcs = [\n",
|
959 |
-
" np.min,\n",
|
960 |
-
" np.max,\n",
|
961 |
-
" np.std,\n",
|
962 |
-
" np.median,\n",
|
963 |
-
" np.mean,\n",
|
964 |
-
"]\n",
|
965 |
-
"\n",
|
966 |
-
"# numeric_detailed_cols = detailed_team_results_df.select_dtypes(\"number\").columns\n",
|
967 |
-
"\n",
|
968 |
"team_reg_agg = (\n",
|
969 |
" detailed_team_results_df.groupby([\"TeamID\", \"Season\", \"League\"])\n",
|
970 |
-
" .agg({col: agg_funcs for col in detailed_team_results_df.select_dtypes(\"number\").columns})\n",
|
971 |
" .reset_index()\n",
|
972 |
")\n",
|
973 |
"\n",
|
@@ -975,13 +986,6 @@
|
|
975 |
"\n",
|
976 |
"team_reg_agg.sample(10, random_state=1)"
|
977 |
]
|
978 |
-
},
|
979 |
-
{
|
980 |
-
"cell_type": "code",
|
981 |
-
"execution_count": null,
|
982 |
-
"metadata": {},
|
983 |
-
"outputs": [],
|
984 |
-
"source": []
|
985 |
}
|
986 |
],
|
987 |
"metadata": {
|
|
|
2 |
"cells": [
|
3 |
{
|
4 |
"cell_type": "code",
|
5 |
+
"execution_count": 1,
|
6 |
"metadata": {},
|
7 |
"outputs": [],
|
8 |
"source": [
|
|
|
15 |
},
|
16 |
{
|
17 |
"cell_type": "code",
|
18 |
+
"execution_count": 2,
|
19 |
"metadata": {},
|
20 |
"outputs": [
|
21 |
{
|
|
|
85 |
},
|
86 |
{
|
87 |
"cell_type": "code",
|
88 |
+
"execution_count": 3,
|
89 |
"metadata": {},
|
90 |
"outputs": [],
|
91 |
"source": [
|
92 |
+
"# here we are making it such that each game has two rows, where each one is a team view of the game with\n",
|
93 |
+
"# opposing metrics.\n",
|
94 |
"\n",
|
95 |
"detailed_metrics = {\n",
|
96 |
" \"Score\",\n",
|
|
|
130 |
},
|
131 |
{
|
132 |
"cell_type": "code",
|
133 |
+
"execution_count": 4,
|
134 |
"metadata": {},
|
135 |
"outputs": [
|
136 |
{
|
|
|
328 |
"[5 rows x 36 columns]"
|
329 |
]
|
330 |
},
|
331 |
+
"execution_count": 4,
|
332 |
"metadata": {},
|
333 |
"output_type": "execute_result"
|
334 |
}
|
|
|
339 |
},
|
340 |
{
|
341 |
"cell_type": "code",
|
342 |
+
"execution_count": 5,
|
343 |
"metadata": {},
|
344 |
"outputs": [],
|
345 |
"source": [
|
|
|
351 |
},
|
352 |
{
|
353 |
"cell_type": "code",
|
354 |
+
"execution_count": 6,
|
355 |
"metadata": {},
|
356 |
"outputs": [
|
357 |
{
|
|
|
549 |
"[5 rows x 37 columns]"
|
550 |
]
|
551 |
},
|
552 |
+
"execution_count": 6,
|
553 |
"metadata": {},
|
554 |
"output_type": "execute_result"
|
555 |
}
|
|
|
560 |
},
|
561 |
{
|
562 |
"cell_type": "code",
|
563 |
+
"execution_count": 7,
|
564 |
"metadata": {},
|
565 |
"outputs": [
|
566 |
{
|
|
|
574 |
"Name: Win, dtype: int64"
|
575 |
]
|
576 |
},
|
577 |
+
"execution_count": 7,
|
578 |
"metadata": {},
|
579 |
"output_type": "execute_result"
|
580 |
}
|
|
|
597 |
},
|
598 |
{
|
599 |
"cell_type": "code",
|
600 |
+
"execution_count": 15,
|
601 |
"metadata": {},
|
602 |
+
"outputs": [],
|
603 |
+
"source": [
|
604 |
+
"exclude_agg_cols = {\n",
|
605 |
+
" \"TeamID\",\n",
|
606 |
+
" \"Season\",\n",
|
607 |
+
" \"League\",\n",
|
608 |
+
" \"GameResult\",\n",
|
609 |
+
" \"OppLoc\",\n",
|
610 |
+
" \"TeamLoc\",\n",
|
611 |
+
" \"Season\",\n",
|
612 |
+
" \"DayNum\",\n",
|
613 |
+
"}\n",
|
614 |
+
"\n",
|
615 |
+
"agg_funcs = [\n",
|
616 |
+
" np.min,\n",
|
617 |
+
" np.max,\n",
|
618 |
+
" np.std,\n",
|
619 |
+
" np.median,\n",
|
620 |
+
" np.mean,\n",
|
621 |
+
"]"
|
622 |
+
]
|
623 |
+
},
|
624 |
+
{
|
625 |
+
"cell_type": "code",
|
626 |
+
"execution_count": 16,
|
627 |
+
"metadata": {
|
628 |
+
"tags": []
|
629 |
+
},
|
630 |
"outputs": [
|
631 |
{
|
632 |
"data": {
|
|
|
652 |
" <th>TeamID</th>\n",
|
653 |
" <th>Season</th>\n",
|
654 |
" <th>League</th>\n",
|
655 |
+
" <th>TeamScore min</th>\n",
|
656 |
+
" <th>TeamScore max</th>\n",
|
657 |
+
" <th>TeamScore std</th>\n",
|
658 |
+
" <th>TeamScore median</th>\n",
|
659 |
+
" <th>TeamScore mean</th>\n",
|
660 |
+
" <th>OppScore min</th>\n",
|
661 |
+
" <th>OppScore max</th>\n",
|
662 |
" <th>...</th>\n",
|
663 |
" <th>ScoreDiff min</th>\n",
|
664 |
" <th>ScoreDiff max</th>\n",
|
|
|
678 |
" <td>3430</td>\n",
|
679 |
" <td>2012</td>\n",
|
680 |
" <td>W</td>\n",
|
681 |
+
" <td>41</td>\n",
|
682 |
+
" <td>78</td>\n",
|
683 |
+
" <td>10.808339</td>\n",
|
684 |
+
" <td>61.0</td>\n",
|
685 |
+
" <td>58.965517</td>\n",
|
686 |
+
" <td>36</td>\n",
|
687 |
+
" <td>85</td>\n",
|
688 |
" <td>...</td>\n",
|
689 |
" <td>-32</td>\n",
|
690 |
" <td>35</td>\n",
|
|
|
702 |
" <td>1431</td>\n",
|
703 |
" <td>2018</td>\n",
|
704 |
" <td>M</td>\n",
|
705 |
+
" <td>33</td>\n",
|
706 |
+
" <td>88</td>\n",
|
707 |
+
" <td>12.283247</td>\n",
|
708 |
+
" <td>67.0</td>\n",
|
709 |
+
" <td>66.466667</td>\n",
|
710 |
+
" <td>44</td>\n",
|
711 |
+
" <td>97</td>\n",
|
712 |
" <td>...</td>\n",
|
713 |
" <td>-49</td>\n",
|
714 |
" <td>29</td>\n",
|
|
|
726 |
" <td>1315</td>\n",
|
727 |
" <td>2014</td>\n",
|
728 |
" <td>M</td>\n",
|
729 |
+
" <td>43</td>\n",
|
730 |
+
" <td>95</td>\n",
|
731 |
+
" <td>10.019980</td>\n",
|
732 |
+
" <td>72.0</td>\n",
|
733 |
+
" <td>73.000000</td>\n",
|
734 |
+
" <td>61</td>\n",
|
735 |
+
" <td>103</td>\n",
|
736 |
" <td>...</td>\n",
|
737 |
" <td>-27</td>\n",
|
738 |
" <td>18</td>\n",
|
|
|
750 |
" <td>1307</td>\n",
|
751 |
" <td>2005</td>\n",
|
752 |
" <td>M</td>\n",
|
753 |
+
" <td>53</td>\n",
|
754 |
+
" <td>101</td>\n",
|
755 |
+
" <td>12.911860</td>\n",
|
756 |
+
" <td>77.0</td>\n",
|
757 |
+
" <td>75.870968</td>\n",
|
758 |
+
" <td>47</td>\n",
|
759 |
+
" <td>81</td>\n",
|
760 |
" <td>...</td>\n",
|
761 |
" <td>-17</td>\n",
|
762 |
" <td>34</td>\n",
|
|
|
774 |
" <td>1266</td>\n",
|
775 |
" <td>2008</td>\n",
|
776 |
" <td>M</td>\n",
|
777 |
+
" <td>51</td>\n",
|
778 |
+
" <td>100</td>\n",
|
779 |
+
" <td>11.841315</td>\n",
|
780 |
+
" <td>75.5</td>\n",
|
781 |
+
" <td>75.906250</td>\n",
|
782 |
+
" <td>37</td>\n",
|
783 |
+
" <td>89</td>\n",
|
784 |
" <td>...</td>\n",
|
785 |
" <td>-20</td>\n",
|
786 |
" <td>47</td>\n",
|
|
|
798 |
" <td>1352</td>\n",
|
799 |
" <td>2016</td>\n",
|
800 |
" <td>M</td>\n",
|
801 |
+
" <td>44</td>\n",
|
802 |
+
" <td>89</td>\n",
|
803 |
+
" <td>10.298567</td>\n",
|
804 |
+
" <td>67.0</td>\n",
|
805 |
+
" <td>65.062500</td>\n",
|
806 |
+
" <td>45</td>\n",
|
807 |
+
" <td>106</td>\n",
|
808 |
" <td>...</td>\n",
|
809 |
" <td>-62</td>\n",
|
810 |
" <td>18</td>\n",
|
|
|
822 |
" <td>1194</td>\n",
|
823 |
" <td>2005</td>\n",
|
824 |
" <td>M</td>\n",
|
825 |
+
" <td>45</td>\n",
|
826 |
+
" <td>104</td>\n",
|
827 |
+
" <td>14.194618</td>\n",
|
828 |
+
" <td>76.0</td>\n",
|
829 |
+
" <td>76.777778</td>\n",
|
830 |
+
" <td>59</td>\n",
|
831 |
+
" <td>107</td>\n",
|
832 |
" <td>...</td>\n",
|
833 |
" <td>-45</td>\n",
|
834 |
" <td>27</td>\n",
|
|
|
846 |
" <td>3270</td>\n",
|
847 |
" <td>2021</td>\n",
|
848 |
" <td>W</td>\n",
|
849 |
+
" <td>24</td>\n",
|
850 |
+
" <td>80</td>\n",
|
851 |
+
" <td>13.385137</td>\n",
|
852 |
+
" <td>53.0</td>\n",
|
853 |
+
" <td>55.476190</td>\n",
|
854 |
+
" <td>41</td>\n",
|
855 |
+
" <td>117</td>\n",
|
856 |
" <td>...</td>\n",
|
857 |
" <td>-93</td>\n",
|
858 |
" <td>24</td>\n",
|
|
|
870 |
" <td>3240</td>\n",
|
871 |
" <td>2014</td>\n",
|
872 |
" <td>W</td>\n",
|
873 |
+
" <td>43</td>\n",
|
874 |
+
" <td>84</td>\n",
|
875 |
+
" <td>11.319009</td>\n",
|
876 |
+
" <td>62.5</td>\n",
|
877 |
+
" <td>63.593750</td>\n",
|
878 |
+
" <td>45</td>\n",
|
879 |
+
" <td>100</td>\n",
|
880 |
" <td>...</td>\n",
|
881 |
" <td>-42</td>\n",
|
882 |
" <td>17</td>\n",
|
|
|
894 |
" <td>3452</td>\n",
|
895 |
" <td>2011</td>\n",
|
896 |
" <td>W</td>\n",
|
897 |
+
" <td>39</td>\n",
|
898 |
+
" <td>90</td>\n",
|
899 |
+
" <td>12.518374</td>\n",
|
900 |
+
" <td>65.0</td>\n",
|
901 |
+
" <td>65.750000</td>\n",
|
902 |
+
" <td>21</td>\n",
|
903 |
+
" <td>79</td>\n",
|
904 |
" <td>...</td>\n",
|
905 |
" <td>-23</td>\n",
|
906 |
" <td>57</td>\n",
|
|
|
915 |
" </tr>\n",
|
916 |
" </tbody>\n",
|
917 |
"</table>\n",
|
918 |
+
"<p>10 rows × 158 columns</p>\n",
|
919 |
"</div>"
|
920 |
],
|
921 |
"text/plain": [
|
922 |
+
" TeamID Season League TeamScore min TeamScore max TeamScore std \\\n",
|
923 |
+
"12348 3430 2012 W 41 78 10.808339 \n",
|
924 |
+
"6900 1431 2018 M 33 88 12.283247 \n",
|
925 |
+
"4406 1315 2014 M 43 95 10.019980 \n",
|
926 |
+
"4233 1307 2005 M 53 101 12.911860 \n",
|
927 |
+
"3407 1266 2008 M 51 100 11.841315 \n",
|
928 |
+
"5190 1352 2016 M 44 89 10.298567 \n",
|
929 |
+
"1892 1194 2005 M 45 104 14.194618 \n",
|
930 |
+
"10020 3270 2021 W 24 80 13.385137 \n",
|
931 |
+
"9567 3240 2014 W 43 84 11.319009 \n",
|
932 |
+
"12617 3452 2011 W 39 90 12.518374 \n",
|
933 |
"\n",
|
934 |
+
" TeamScore median TeamScore mean OppScore min OppScore max ... \\\n",
|
935 |
+
"12348 61.0 58.965517 36 85 ... \n",
|
936 |
+
"6900 67.0 66.466667 44 97 ... \n",
|
937 |
+
"4406 72.0 73.000000 61 103 ... \n",
|
938 |
+
"4233 77.0 75.870968 47 81 ... \n",
|
939 |
+
"3407 75.5 75.906250 37 89 ... \n",
|
940 |
+
"5190 67.0 65.062500 45 106 ... \n",
|
941 |
+
"1892 76.0 76.777778 59 107 ... \n",
|
942 |
+
"10020 53.0 55.476190 41 117 ... \n",
|
943 |
+
"9567 62.5 63.593750 45 100 ... \n",
|
944 |
+
"12617 65.0 65.750000 21 79 ... \n",
|
945 |
"\n",
|
946 |
+
" ScoreDiff min ScoreDiff max ScoreDiff std ScoreDiff median \\\n",
|
947 |
+
"12348 -32 35 16.997102 -1.0 \n",
|
948 |
+
"6900 -49 29 14.772645 -5.0 \n",
|
949 |
+
"4406 -27 18 12.316786 -2.0 \n",
|
950 |
+
"4233 -17 34 13.022891 11.0 \n",
|
951 |
+
"3407 -20 47 17.828682 10.0 \n",
|
952 |
+
"5190 -62 18 14.365582 -7.0 \n",
|
953 |
+
"1892 -45 27 14.449736 -3.0 \n",
|
954 |
+
"10020 -93 24 27.245445 -15.0 \n",
|
955 |
+
"9567 -42 17 13.277095 -2.0 \n",
|
956 |
+
"12617 -23 57 18.777131 13.5 \n",
|
957 |
"\n",
|
958 |
+
" ScoreDiff mean Win min Win max Win std Win median Win mean \n",
|
959 |
+
"12348 -2.517241 0 1 0.508548 0.0 0.482759 \n",
|
960 |
+
"6900 -5.100000 0 1 0.479463 0.0 0.333333 \n",
|
961 |
+
"4406 -2.645161 0 1 0.508001 0.0 0.483871 \n",
|
962 |
+
"4233 10.935484 0 1 0.401610 1.0 0.806452 \n",
|
963 |
+
"3407 11.593750 0 1 0.456803 1.0 0.718750 \n",
|
964 |
+
"5190 -5.781250 0 1 0.470929 0.0 0.312500 \n",
|
965 |
+
"1892 -1.888889 0 1 0.492103 0.0 0.370370 \n",
|
966 |
+
"10020 -14.285714 0 1 0.462910 0.0 0.285714 \n",
|
967 |
+
"9567 -4.093750 0 1 0.504016 0.0 0.437500 \n",
|
968 |
+
"12617 13.500000 0 1 0.456803 1.0 0.718750 \n",
|
969 |
"\n",
|
970 |
+
"[10 rows x 158 columns]"
|
971 |
]
|
972 |
},
|
973 |
+
"execution_count": 16,
|
974 |
"metadata": {},
|
975 |
"output_type": "execute_result"
|
976 |
}
|
977 |
],
|
978 |
"source": [
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
979 |
"team_reg_agg = (\n",
|
980 |
" detailed_team_results_df.groupby([\"TeamID\", \"Season\", \"League\"])\n",
|
981 |
+
" .agg({col: agg_funcs for col in detailed_team_results_df.select_dtypes(\"number\").columns if col not in exclude_agg_cols})\n",
|
982 |
" .reset_index()\n",
|
983 |
")\n",
|
984 |
"\n",
|
|
|
986 |
"\n",
|
987 |
"team_reg_agg.sample(10, random_state=1)"
|
988 |
]
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
989 |
}
|
990 |
],
|
991 |
"metadata": {
|