Jon Solow commited on
Commit
91793e3
1 Parent(s): 77fb55b

Add footballguys queries and Targets page

Browse files
src/pages/5_Targets.py ADDED
@@ -0,0 +1,42 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import datetime
2
+ import streamlit as st
3
+
4
+ from config import DEFAULT_ICON
5
+ from shared_page import common_page_config
6
+
7
+ from queries.footballguys.constants import YEAR
8
+ from queries.footballguys.refresh import request_stat
9
+ from streamlit_filter import filter_dataframe
10
+
11
+
12
+ @st.cache_data(ttl=60 * 60 * 1)
13
+ def load_data():
14
+ stat_name = "targets"
15
+ data = request_stat(stat_name)
16
+ data_load_time_str = datetime.datetime.utcnow().strftime("%m/%d/%Y %I:%M %p")
17
+ return data, data_load_time_str
18
+
19
+
20
+ def get_page():
21
+ page_title = f"Team Practice Reports - {YEAR}"
22
+ st.set_page_config(page_title=page_title, page_icon=DEFAULT_ICON, layout="wide")
23
+ common_page_config()
24
+ st.title(page_title)
25
+ if st.button("Refresh Data"):
26
+ st.cache_data.clear()
27
+ data, data_load_time_str = load_data()
28
+ st.write(f"Data loaded as of: {data_load_time_str} UTC")
29
+
30
+ with st.container():
31
+ filtered_data = filter_dataframe(data)
32
+ st.dataframe(
33
+ filtered_data,
34
+ hide_index=True,
35
+ height=35 * (len(filtered_data) + 1) + 12,
36
+ use_container_width=False,
37
+ column_config={},
38
+ )
39
+
40
+
41
+ if __name__ == "__main__":
42
+ get_page()
src/queries/footballguys/__init__.py ADDED
File without changes
src/queries/footballguys/constants.py ADDED
@@ -0,0 +1,34 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ from typing import List, Mapping
2
+
3
+ # constants relevant to parsing from footballguys
4
+
5
+ SNAP_PAGE_POSITON_ORDER: List[str] = [
6
+ "QB",
7
+ "RB",
8
+ "WR",
9
+ "TE",
10
+ "DT",
11
+ "DE",
12
+ "ILB",
13
+ "OLB",
14
+ "CB",
15
+ "S",
16
+ ]
17
+
18
+ POSITIONS_TO_OFFENSE_DEFENSE: Mapping[str, str] = {
19
+ "QB": "OFF",
20
+ "RB": "OFF",
21
+ "WR": "OFF",
22
+ "TE": "OFF",
23
+ "DT": "DEF",
24
+ "DE": "DEF",
25
+ "ILB": "DEF",
26
+ "OLB": "DEF",
27
+ "S": "DEF",
28
+ "CB": "DEF",
29
+ }
30
+
31
+
32
+ BASE_URL = "https://www.footballguys.com/stats"
33
+
34
+ YEAR = 2023
src/queries/footballguys/helpers.py ADDED
@@ -0,0 +1,131 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import lxml.html
2
+ import pandas as pd
3
+ import requests
4
+ from typing import List
5
+ from queries.footballguys import constants as fbgc
6
+
7
+
8
+ def url_to_pandas(url) -> List[pd.DataFrame]:
9
+ page = requests.get(url)
10
+ table = pd.read_html(page.text.replace("<br>", "-"))
11
+ return table
12
+
13
+
14
+ def create_html_table_from_header_body(header_html_str: str, body_html_str: str):
15
+ return f"""
16
+ <table>
17
+ {header_html_str}
18
+ {body_html_str}
19
+ </table>
20
+ """
21
+
22
+
23
+ def extract_snaps_to_pandas(url: str):
24
+ root = lxml.html.document_fromstring(requests.get(url).text)
25
+ table_element_list = root.xpath("""//*[@id="stats_snapcounts_data"]/div/table""")
26
+ assert isinstance(table_element_list, list)
27
+ table_element = table_element_list[0]
28
+ assert isinstance(table_element, lxml.html.HtmlElement)
29
+ table_child_list = table_element.getchildren()
30
+ assert len(table_child_list) % 2 == 0 # check is even
31
+ half_len = int(len(table_child_list) / 2)
32
+ df_list = []
33
+ for i in range(half_len):
34
+ table_html = create_html_table_from_header_body(
35
+ lxml.html.tostring(table_child_list[2 * i]), lxml.html.tostring(table_child_list[2 * i + 1])
36
+ ).replace("\\n", "")
37
+ df = pd.read_html(table_html)[0]
38
+ # First column contains name and is initially labeled as each position, example "Quarterback"
39
+ # Insert column at front called POS and fill with current first column label
40
+ position_name = df.columns[0]
41
+ df.insert(0, "POS", position_name)
42
+ df.rename(columns={position_name: "name"}, inplace=True)
43
+ df_list.append(df)
44
+ return df_list
45
+
46
+
47
+ def add_snap_off_def_column(team_snap_df: pd.DataFrame):
48
+ off_def = team_snap_df["POS"].apply(lambda x: fbgc.POSITIONS_TO_OFFENSE_DEFENSE[x])
49
+ team_snap_df.insert(0, "OFF/DEF", off_def)
50
+
51
+
52
+ def add_snap_position_column(
53
+ team_snap_df_list: List[pd.DataFrame],
54
+ position_name_array: List[str] = fbgc.SNAP_PAGE_POSITON_ORDER,
55
+ ):
56
+ # blank player names between positions, so we can use cumsum
57
+ # 8/22/23 - We are currently failing here because snap counts are incorrectly not split by position atm
58
+ assert len(team_snap_df_list) == len(position_name_array)
59
+ for pos_idx, pos_df in enumerate(team_snap_df_list):
60
+ pos_df.insert(0, "POS", position_name_array[pos_idx])
61
+
62
+
63
+ def set_multilevel_columns(df):
64
+ new_cols = [tuple(x.split("-")) if "-" in x else (x, x) for x in df.columns]
65
+ df.columns = pd.MultiIndex.from_tuples(new_cols)
66
+
67
+
68
+ def parse_snaps(team_short_name: str, base_url: str = fbgc.BASE_URL, year: int = fbgc.YEAR) -> pd.DataFrame:
69
+ print(f"Attempting to parse snaps for {team_short_name}")
70
+ team_snap_df_list = parse_team_page(team_short_name, base_url, "snap-counts", year)
71
+ team_snap_df = pd.concat(team_snap_df_list)
72
+ # add_snap_off_def_column(team_snap_df)
73
+ split_snap_count_percents(team_snap_df)
74
+ team_snap_df.dropna(subset=["name"], inplace=True)
75
+ # set_multilevel_columns(team_snap_df)
76
+ return team_snap_df
77
+
78
+
79
+ def add_targets_position(team_df: pd.DataFrame):
80
+ # fill blanks up by reversing index, fill down, and re-reversing
81
+ positions = team_df.name.apply(lambda x: x.replace(" Totals", "") if " Totals" in x else None)[::-1].fillna(
82
+ method="ffill"
83
+ )[::-1]
84
+ team_df.insert(0, "POS", positions)
85
+
86
+
87
+ def parse_targets(team_short_name: str, base_url: str = fbgc.BASE_URL, year: int = fbgc.YEAR) -> pd.DataFrame:
88
+ # snaps are index 2
89
+ print(f"Attempting to parse targets for {team_short_name}")
90
+ team_df = parse_team_page(team_short_name, base_url, "targets", year)[0]
91
+ add_targets_position(team_df)
92
+ return team_df[team_df.name.notna()]
93
+
94
+
95
+ def parse_redzone(team_short_name: str, base_url: str = fbgc.BASE_URL, year: int = fbgc.YEAR) -> pd.DataFrame:
96
+ # snaps are index 3
97
+ print(f"Attempting to parse redzone for {team_short_name}")
98
+ team_df = parse_team_page(team_short_name, base_url, "redzone", year)[0]
99
+ add_targets_position(team_df)
100
+ return team_df[team_df.name.notna()]
101
+
102
+
103
+ def split_snap_count_percents(team_snap_df: pd.DataFrame):
104
+ for week in range(1, 18):
105
+ if f"Wk {week}" not in team_snap_df.columns:
106
+ continue
107
+ # if values are all NaN column will be dtype float 64 and should skip
108
+ if team_snap_df[f"Wk {week}"].dtype == float:
109
+ team_snap_df[f"{week}-count"] = 0
110
+ team_snap_df[f"{week}-%"] = 0.0
111
+ else:
112
+ week_split = team_snap_df[f"Wk {week}"].astype(str).str.split("-")
113
+ week_count = week_split.apply(lambda x: 0 if len(x) == 1 or x[0] == "" else int(x[0]))
114
+ week_pct = week_split.apply(lambda x: 0.0 if len(x) == 1 else float(x[1].strip("%")) / 100.0)
115
+ team_snap_df[f"{week}-count"] = week_count
116
+ team_snap_df[f"{week}-%"] = week_pct
117
+ team_snap_df.drop(columns=f"Wk {week}", inplace=True)
118
+
119
+
120
+ def parse_team_page(
121
+ team_short_name: str,
122
+ base_url: str,
123
+ stat_name: str,
124
+ year: int,
125
+ ) -> List[pd.DataFrame]:
126
+ url = f"{base_url}/{stat_name}/teams?team={team_short_name}&year={year}"
127
+ if stat_name == "snap-counts":
128
+ all_tables = extract_snaps_to_pandas(url)
129
+ else:
130
+ all_tables = url_to_pandas(url)
131
+ return all_tables
src/queries/footballguys/refresh.py ADDED
@@ -0,0 +1,46 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ from domain.teams import NFLTeam, ALL_TEAMS
2
+ from queries.footballguys.helpers import parse_snaps, parse_targets, parse_redzone
3
+ from typing import List, Callable, Optional
4
+ import pandas as pd
5
+
6
+
7
+ def add_team_name_columns(team_df: pd.DataFrame, team_short_name: str, team_name: str):
8
+ team_df.insert(0, "TEAM", team_short_name)
9
+ team_df.insert(1, "TEAM_NAME", team_name)
10
+
11
+
12
+ def apply_intended_column_sorting(df: pd.DataFrame, first_columns: List[str]) -> pd.DataFrame:
13
+ first_columns_in_df = [col for col in first_columns if col in df.columns]
14
+ remaining_columns = [col for col in df.columns if col not in first_columns_in_df]
15
+ return df[first_columns_in_df + remaining_columns]
16
+
17
+
18
+ def get_all_teams_stat_type(
19
+ all_teams_list: List[NFLTeam],
20
+ parsing_function: Callable,
21
+ store_key: str,
22
+ intended_first_columns: Optional[List[str]] = None,
23
+ ):
24
+ team_df_list = []
25
+ for team in all_teams_list:
26
+ team_df = parsing_function(team.footballguys_short_name)
27
+ add_team_name_columns(team_df, team.team_short_name, team.team_name)
28
+ team_df_list.append(team_df)
29
+ df = pd.concat(team_df_list)
30
+ if intended_first_columns:
31
+ df = apply_intended_column_sorting(df, intended_first_columns)
32
+ print(f"footballguy {store_key} loaded")
33
+ return df
34
+
35
+
36
+ def request_stat(stat_name: str) -> pd.DataFrame:
37
+ intended_col_sort = None
38
+ if stat_name == "targets":
39
+ parse_fxn = parse_targets
40
+ intended_col_sort = ["TEAM", "TEAM_NAME", "POS", "name", "total"]
41
+ elif stat_name == "snap-counts":
42
+ parse_fxn = parse_snaps
43
+ elif stat_name == "redzone":
44
+ parse_fxn = parse_redzone
45
+ intended_col_sort = ["TEAM", "TEAM_NAME", "POS", "name", "total"]
46
+ return get_all_teams_stat_type(ALL_TEAMS, parse_fxn, stat_name, intended_col_sort)