nesticot commited on
Commit
1ebf57c
·
verified ·
1 Parent(s): 7b7fd66

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +174 -172
app.py CHANGED
@@ -1,172 +1,174 @@
1
- import seaborn as sns
2
- import streamlit as st
3
- from st_aggrid import AgGrid, GridOptionsBuilder, GridUpdateMode
4
- import requests
5
- import polars as pl
6
- from datetime import date
7
- import pandas as pd
8
- import matplotlib
9
-
10
-
11
-
12
- # Display the app title and description
13
- st.markdown("""
14
- ## tjStuff+ App
15
-
16
- ##### By: Thomas Nestico ([@TJStats](https://x.com/TJStats))
17
- ##### Code: [GitHub Repo](https://github.com/tnestico/streamlit_tjstuff)
18
- ##### Data: [MLB](https://baseballsavant.mlb.com/) ([Gathered from my MLB Scraper](https://github.com/tnestico/mlb_scraper))
19
-
20
- #### About
21
- This Streamlit app tabulates and plots my pitching metric, tjStuff+, for all MLB players during the 2024 MLB Season
22
-
23
- About tjStuff+:
24
- * tjStuff+ calculates the Expected Run Value (xRV) of a pitch regardless of type
25
- * tjStuff+ is normally distributed, where 100 is the mean and Standard Deviation is 10
26
- * Pitch Grade is based off tjStuff+ and scales the data to the traditional 20-80 Scouting Scale for a given pitch type
27
-
28
- """
29
- )
30
-
31
-
32
- # Dictionary to map pitch types to their corresponding colors and names
33
- pitch_colours = {
34
- ## Fastballs ##
35
- 'FF': {'colour': '#FF007D', 'name': '4-Seam Fastball'},
36
- 'FA': {'colour': '#FF007D', 'name': 'Fastball'},
37
- 'SI': {'colour': '#98165D', 'name': 'Sinker'},
38
- 'FC': {'colour': '#BE5FA0', 'name': 'Cutter'},
39
-
40
- ## Offspeed ##
41
- 'CH': {'colour': '#F79E70', 'name': 'Changeup'},
42
- 'FS': {'colour': '#FE6100', 'name': 'Splitter'},
43
- 'SC': {'colour': '#F08223', 'name': 'Screwball'},
44
- 'FO': {'colour': '#FFB000', 'name': 'Forkball'},
45
-
46
- ## Sliders ##
47
- 'SL': {'colour': '#67E18D', 'name': 'Slider'},
48
- 'ST': {'colour': '#1BB999', 'name': 'Sweeper'},
49
- 'SV': {'colour': '#376748', 'name': 'Slurve'},
50
-
51
- ## Curveballs ##
52
- 'KC': {'colour': '#311D8B', 'name': 'Knuckle Curve'},
53
- 'CU': {'colour': '#3025CE', 'name': 'Curveball'},
54
- 'CS': {'colour': '#274BFC', 'name': 'Slow Curve'},
55
- 'EP': {'colour': '#648FFF', 'name': 'Eephus'},
56
-
57
- ## Others ##
58
- 'KN': {'colour': '#867A08', 'name': 'Knuckleball'},
59
- 'PO': {'colour': '#472C30', 'name': 'Pitch Out'},
60
- 'UN': {'colour': '#9C8975', 'name': 'Unknown'},
61
- }
62
-
63
- # Create dictionaries for pitch types and their attributes
64
- dict_colour = {key: value['colour'] for key, value in pitch_colours.items()}
65
- dict_pitch = {key: value['name'] for key, value in pitch_colours.items()}
66
- dict_pitch_desc_type = {value['name']: key for key, value in pitch_colours.items()}
67
- dict_pitch_name = {value['name']: value['colour'] for key, value in pitch_colours.items()}
68
-
69
- # Define a custom colormap for styling
70
- cmap_sum = matplotlib.colors.LinearSegmentedColormap.from_list("", ['#648FFF', '#FFFFFF', '#FFB000'])
71
-
72
- # Initialize session state for cache status
73
- if 'cache_cleared' not in st.session_state:
74
- st.session_state.cache_cleared = False
75
-
76
- # Function to fetch data and cache it
77
- @st.cache_data
78
- def fetch_data():
79
- df = pl.read_csv("tjstuff_plus_pitch_data_2024.csv").fill_nan(None)
80
- return df
81
-
82
- # Fetch and preprocess data
83
- df = fetch_data()
84
- df_plot = df.clone()
85
- df = df.filter(df['pitches'] >= 10).drop_nulls(subset=['pitch_grade', 'tj_stuff_plus'])
86
- df = df.sort(['pitcher_name', 'pitch_type'], descending=[False, False])
87
-
88
- # Cast columns to appropriate data types
89
- df = df.with_columns([
90
- pl.col('tj_stuff_plus').cast(pl.Int64).alias('tj_stuff_plus'),
91
- pl.col('pitches').cast(pl.Int64).alias('pitches'),
92
- pl.col('pitcher_id').cast(pl.Int64).alias('pitcher_id'),
93
- pl.col('pitch_grade').cast(pl.Int64).alias('pitch_grade')
94
- ])
95
-
96
- # Define column configuration for Streamlit
97
- column_config_dict = {
98
- 'pitcher_id': 'Pitcher ID',
99
- 'pitcher_name': 'Pitcher Name',
100
- 'pitch_type': 'Pitch Type',
101
- 'pitches': 'Pitches',
102
- 'tj_stuff_plus': st.column_config.NumberColumn("tjStuff+", format="%.0f"),
103
- 'pitch_grade': st.column_config.NumberColumn("Pitch Grade", format="%.0f")
104
- }
105
-
106
- # Get unique pitch types for selection
107
- unique_pitch_types = [''] + sorted(df['pitch_type'].unique().to_list())
108
- unique_pitch_types = [dict_pitch.get(x, x) for x in unique_pitch_types]
109
-
110
-
111
- st.markdown("""
112
- #### tjStuff+ Table
113
-
114
- Filter and sort tjStuff+ Data for all MLB Pitchers
115
- """
116
- )
117
- # Create a selectbox widget for pitch types
118
- selected_pitch_types = st.selectbox('Select Pitch Types *(leave blank for all pitch types)*', unique_pitch_types)
119
-
120
- # Filter the DataFrame based on selected pitch types
121
- if selected_pitch_types == 'All':
122
- df = df.filter(pl.col('pitch_type') == 'All').sort('tj_stuff_plus', descending=True)
123
- elif selected_pitch_types != '':
124
- df = df.filter(pl.col('pitch_type') == dict_pitch_desc_type[selected_pitch_types]).sort('tj_stuff_plus', descending=True)
125
-
126
- # Convert Polars DataFrame to Pandas DataFrame and apply styling
127
- styled_df = df[['pitcher_id', 'pitcher_name', 'pitch_type', 'pitches', 'tj_stuff_plus', 'pitch_grade']].to_pandas().style
128
-
129
- # Apply background gradient styling to specific columns
130
- styled_df = styled_df.background_gradient(subset=['tj_stuff_plus'], cmap=cmap_sum, vmin=80, vmax=120)
131
- styled_df = styled_df.background_gradient(subset=['pitch_grade'], cmap=cmap_sum, vmin=20, vmax=80)
132
-
133
- # Display the styled DataFrame in Streamlit
134
- st.dataframe(styled_df, hide_index=True, column_config=column_config_dict, width=1500)
135
-
136
- # Create dictionaries for pitcher information
137
- pitcher_id_name = dict(zip(df_plot['pitcher_id'], df_plot['pitcher_name']))
138
- pitcher_id_name_id = dict(zip(df_plot['pitcher_id'], df_plot['pitcher_name'] + ' - ' + df_plot['pitcher_id']))
139
- pitcher_name_id_id = dict(zip(df_plot['pitcher_name'] + ' - ' + df_plot['pitcher_id'], df_plot['pitcher_id']))
140
- pitcher_id_position = dict(zip(df_plot['pitcher_id'], df_plot.drop_nulls(subset=['position'])['position']))
141
-
142
-
143
- st.markdown("""
144
- #### tjStuff+ Plot
145
-
146
- Visualize tjStuff+ and Pitching Grade by Pitcher
147
- """
148
- )
149
-
150
- # Create a selectbox widget for pitchers
151
- pitcher_id_name_select = st.selectbox('Select Pitcher', sorted(pitcher_name_id_id.keys()))
152
-
153
- # Get selected pitcher information
154
- pitcher_id = pitcher_name_id_id[pitcher_id_name_select]
155
- position = pitcher_id_position[pitcher_id]
156
- pitcher_name = pitcher_id_name[pitcher_id]
157
-
158
- import tjstuff_plot
159
- # Button to update plot
160
-
161
- # Get selected pitcher information
162
- pitcher_id = pitcher_name_id_id[pitcher_id_name_select]
163
- position = pitcher_id_position[pitcher_id]
164
- pitcher_name = pitcher_id_name[pitcher_id]
165
-
166
- import tjstuff_plot
167
-
168
- # Button to update plot
169
- if st.button('Update Plot'):
170
- st.session_state.update_plot = True
171
- tjstuff_plot.tjstuff_plot(df_plot, pitcher_id, position, pitcher_name)
172
-
 
 
 
1
+ import seaborn as sns
2
+ import streamlit as st
3
+ from st_aggrid import AgGrid, GridOptionsBuilder, GridUpdateMode
4
+ import requests
5
+ import polars as pl
6
+ from datetime import date
7
+ import pandas as pd
8
+ import matplotlib
9
+
10
+
11
+
12
+ # Display the app title and description
13
+ st.markdown("""
14
+ ## tjStuff+ App
15
+
16
+ ##### By: Thomas Nestico ([@TJStats](https://x.com/TJStats))
17
+ ##### Code: [GitHub Repo](https://github.com/tnestico/streamlit_tjstuff)
18
+ ##### Data: [MLB](https://baseballsavant.mlb.com/) ([Gathered from my MLB Scraper](https://github.com/tnestico/mlb_scraper))
19
+
20
+ #### About
21
+ This Streamlit app tabulates and plots my pitching metric, tjStuff+, for all MLB players during the 2024 MLB Season
22
+
23
+ About tjStuff+:
24
+ * tjStuff+ calculates the Expected Run Value (xRV) of a pitch regardless of type
25
+ * tjStuff+ is normally distributed, where 100 is the mean and Standard Deviation is 10
26
+ * Pitch Grade is based off tjStuff+ and scales the data to the traditional 20-80 Scouting Scale for a given pitch type
27
+
28
+ [Learn More about tjStuff+ here](https://github.com/tnestico/tjstuff_plus/tree/main)
29
+
30
+ """
31
+ )
32
+
33
+
34
+ # Dictionary to map pitch types to their corresponding colors and names
35
+ pitch_colours = {
36
+ ## Fastballs ##
37
+ 'FF': {'colour': '#FF007D', 'name': '4-Seam Fastball'},
38
+ 'FA': {'colour': '#FF007D', 'name': 'Fastball'},
39
+ 'SI': {'colour': '#98165D', 'name': 'Sinker'},
40
+ 'FC': {'colour': '#BE5FA0', 'name': 'Cutter'},
41
+
42
+ ## Offspeed ##
43
+ 'CH': {'colour': '#F79E70', 'name': 'Changeup'},
44
+ 'FS': {'colour': '#FE6100', 'name': 'Splitter'},
45
+ 'SC': {'colour': '#F08223', 'name': 'Screwball'},
46
+ 'FO': {'colour': '#FFB000', 'name': 'Forkball'},
47
+
48
+ ## Sliders ##
49
+ 'SL': {'colour': '#67E18D', 'name': 'Slider'},
50
+ 'ST': {'colour': '#1BB999', 'name': 'Sweeper'},
51
+ 'SV': {'colour': '#376748', 'name': 'Slurve'},
52
+
53
+ ## Curveballs ##
54
+ 'KC': {'colour': '#311D8B', 'name': 'Knuckle Curve'},
55
+ 'CU': {'colour': '#3025CE', 'name': 'Curveball'},
56
+ 'CS': {'colour': '#274BFC', 'name': 'Slow Curve'},
57
+ 'EP': {'colour': '#648FFF', 'name': 'Eephus'},
58
+
59
+ ## Others ##
60
+ 'KN': {'colour': '#867A08', 'name': 'Knuckleball'},
61
+ 'PO': {'colour': '#472C30', 'name': 'Pitch Out'},
62
+ 'UN': {'colour': '#9C8975', 'name': 'Unknown'},
63
+ }
64
+
65
+ # Create dictionaries for pitch types and their attributes
66
+ dict_colour = {key: value['colour'] for key, value in pitch_colours.items()}
67
+ dict_pitch = {key: value['name'] for key, value in pitch_colours.items()}
68
+ dict_pitch_desc_type = {value['name']: key for key, value in pitch_colours.items()}
69
+ dict_pitch_name = {value['name']: value['colour'] for key, value in pitch_colours.items()}
70
+
71
+ # Define a custom colormap for styling
72
+ cmap_sum = matplotlib.colors.LinearSegmentedColormap.from_list("", ['#648FFF', '#FFFFFF', '#FFB000'])
73
+
74
+ # Initialize session state for cache status
75
+ if 'cache_cleared' not in st.session_state:
76
+ st.session_state.cache_cleared = False
77
+
78
+ # Function to fetch data and cache it
79
+ @st.cache_data
80
+ def fetch_data():
81
+ df = pl.read_csv("tjstuff_plus_pitch_data_2024.csv").fill_nan(None)
82
+ return df
83
+
84
+ # Fetch and preprocess data
85
+ df = fetch_data()
86
+ df_plot = df.clone()
87
+ df = df.filter(df['pitches'] >= 10).drop_nulls(subset=['pitch_grade', 'tj_stuff_plus'])
88
+ df = df.sort(['pitcher_name', 'pitch_type'], descending=[False, False])
89
+
90
+ # Cast columns to appropriate data types
91
+ df = df.with_columns([
92
+ pl.col('tj_stuff_plus').cast(pl.Int64).alias('tj_stuff_plus'),
93
+ pl.col('pitches').cast(pl.Int64).alias('pitches'),
94
+ pl.col('pitcher_id').cast(pl.Int64).alias('pitcher_id'),
95
+ pl.col('pitch_grade').cast(pl.Int64).alias('pitch_grade')
96
+ ])
97
+
98
+ # Define column configuration for Streamlit
99
+ column_config_dict = {
100
+ 'pitcher_id': 'Pitcher ID',
101
+ 'pitcher_name': 'Pitcher Name',
102
+ 'pitch_type': 'Pitch Type',
103
+ 'pitches': 'Pitches',
104
+ 'tj_stuff_plus': st.column_config.NumberColumn("tjStuff+", format="%.0f"),
105
+ 'pitch_grade': st.column_config.NumberColumn("Pitch Grade", format="%.0f")
106
+ }
107
+
108
+ # Get unique pitch types for selection
109
+ unique_pitch_types = [''] + sorted(df['pitch_type'].unique().to_list())
110
+ unique_pitch_types = [dict_pitch.get(x, x) for x in unique_pitch_types]
111
+
112
+
113
+ st.markdown("""
114
+ #### tjStuff+ Table
115
+
116
+ Filter and sort tjStuff+ Data for all MLB Pitchers
117
+ """
118
+ )
119
+ # Create a selectbox widget for pitch types
120
+ selected_pitch_types = st.selectbox('Select Pitch Types *(leave blank for all pitch types)*', unique_pitch_types)
121
+
122
+ # Filter the DataFrame based on selected pitch types
123
+ if selected_pitch_types == 'All':
124
+ df = df.filter(pl.col('pitch_type') == 'All').sort('tj_stuff_plus', descending=True)
125
+ elif selected_pitch_types != '':
126
+ df = df.filter(pl.col('pitch_type') == dict_pitch_desc_type[selected_pitch_types]).sort('tj_stuff_plus', descending=True)
127
+
128
+ # Convert Polars DataFrame to Pandas DataFrame and apply styling
129
+ styled_df = df[['pitcher_id', 'pitcher_name', 'pitch_type', 'pitches', 'tj_stuff_plus', 'pitch_grade']].to_pandas().style
130
+
131
+ # Apply background gradient styling to specific columns
132
+ styled_df = styled_df.background_gradient(subset=['tj_stuff_plus'], cmap=cmap_sum, vmin=80, vmax=120)
133
+ styled_df = styled_df.background_gradient(subset=['pitch_grade'], cmap=cmap_sum, vmin=20, vmax=80)
134
+
135
+ # Display the styled DataFrame in Streamlit
136
+ st.dataframe(styled_df, hide_index=True, column_config=column_config_dict, width=1500)
137
+
138
+ # Create dictionaries for pitcher information
139
+ pitcher_id_name = dict(zip(df_plot['pitcher_id'], df_plot['pitcher_name']))
140
+ pitcher_id_name_id = dict(zip(df_plot['pitcher_id'], df_plot['pitcher_name'] + ' - ' + df_plot['pitcher_id']))
141
+ pitcher_name_id_id = dict(zip(df_plot['pitcher_name'] + ' - ' + df_plot['pitcher_id'], df_plot['pitcher_id']))
142
+ pitcher_id_position = dict(zip(df_plot['pitcher_id'], df_plot.drop_nulls(subset=['position'])['position']))
143
+
144
+
145
+ st.markdown("""
146
+ #### tjStuff+ Plot
147
+
148
+ Visualize tjStuff+ and Pitching Grade by Pitcher
149
+ """
150
+ )
151
+
152
+ # Create a selectbox widget for pitchers
153
+ pitcher_id_name_select = st.selectbox('Select Pitcher', sorted(pitcher_name_id_id.keys()))
154
+
155
+ # Get selected pitcher information
156
+ pitcher_id = pitcher_name_id_id[pitcher_id_name_select]
157
+ position = pitcher_id_position[pitcher_id]
158
+ pitcher_name = pitcher_id_name[pitcher_id]
159
+
160
+ import tjstuff_plot
161
+ # Button to update plot
162
+
163
+ # Get selected pitcher information
164
+ pitcher_id = pitcher_name_id_id[pitcher_id_name_select]
165
+ position = pitcher_id_position[pitcher_id]
166
+ pitcher_name = pitcher_id_name[pitcher_id]
167
+
168
+ import tjstuff_plot
169
+
170
+ # Button to update plot
171
+ if st.button('Update Plot'):
172
+ st.session_state.update_plot = True
173
+ tjstuff_plot.tjstuff_plot(df_plot, pitcher_id, position, pitcher_name)
174
+