Spaces:
Running
Running
patrickramos
commited on
Commit
·
024b191
1
Parent(s):
6a02cc9
Update app
Browse files- data.py +2 -66
- demo.py +9 -4
- gradio_function.py +49 -97
data.py
CHANGED
@@ -15,24 +15,17 @@ from translate import (
|
|
15 |
)
|
16 |
|
17 |
# load game data
|
18 |
-
# game_df = pd.read_csv('game.csv').drop_duplicates()
|
19 |
game_df = pl.read_csv('game.csv').unique()
|
20 |
assert len(game_df) == len(game_df['game_pk'].unique())
|
21 |
|
22 |
# load pa data
|
23 |
pa_df = []
|
24 |
-
# for game_pk in tqdm(game_df['game_pk']):
|
25 |
-
# pa_df.append(pd.read_csv(os.path.join('pa', f'{game_pk}.csv'), dtype={'pa_pk': str}))
|
26 |
-
# pa_df = pd.concat(pa_df, axis='rows')
|
27 |
for game_pk in tqdm(game_df['game_pk']):
|
28 |
pa_df.append(pl.read_csv(os.path.join('pa', f'{game_pk}.csv'), schema_overrides={'pa_pk': str}))
|
29 |
pa_df = pl.concat(pa_df)
|
30 |
|
31 |
# load pitch data
|
32 |
pitch_df = []
|
33 |
-
# for game_pk in tqdm(game_df['game_pk']):
|
34 |
-
# pitch_df.append(pd.read_csv(os.path.join('pitch', f'{game_pk}.csv'), dtype={'pa_pk': str}))
|
35 |
-
# pitch_df = pd.concat(pitch_df, axis='rows')
|
36 |
for game_pk in tqdm(game_df['game_pk']):
|
37 |
pitch_df.append(pl.read_csv(os.path.join('pitch', f'{game_pk}.csv'), schema_overrides={'pa_pk': str, 'on_1b': pl.Int64, 'on_2b': pl.Int64, 'on_3b': pl.Int64}))
|
38 |
pitch_df = pl.concat(pitch_df)
|
@@ -56,14 +49,6 @@ def identify_bb_type(hit_type):
|
|
56 |
else:
|
57 |
raise Exception(f'Unexpect hit_type {hit_type}')
|
58 |
|
59 |
-
# pa_df['_des'] = pa_df['des'].str.strip()
|
60 |
-
# pa_df['des'] = pa_df['des'].str.strip()
|
61 |
-
# pa_df['des_more'] = pa_df['des_more'].str.strip()
|
62 |
-
# pa_df.loc[pa_df['des'].isna(), 'des'] = pa_df[pa_df['des'].isna()]['des_more']
|
63 |
-
# pa_df.loc[:, 'des'] = pa_df['des'].apply(lambda item: item.split()[0] if (len(item.split()) > 1 and re.search(r'+\d+点', item)) else item)
|
64 |
-
# non_home_plate_outcome = (pa_df['des'].isin(['ボール', '見逃し', '空振り'])) | (pa_df['des'].str.endswith('塁けん制'))
|
65 |
-
# pa_df.loc[non_home_plate_outcome, 'des'] = pa_df.loc[non_home_plate_outcome, 'des_more']
|
66 |
-
# pa_df['des'] = pa_df['des'].apply(translate_pa_outcome)
|
67 |
pa_df = (
|
68 |
pa_df
|
69 |
.with_columns(
|
@@ -108,16 +93,6 @@ pa_df = (
|
|
108 |
)
|
109 |
|
110 |
# translate pitch data
|
111 |
-
# pitch_df = pitch_df[~pitch_df['pitch_name'].isna()]
|
112 |
-
# pitch_df['jp_pitch_name'] = pitch_df['pitch_name']
|
113 |
-
# pitch_df['pitch_name'] = pitch_df['jp_pitch_name'].apply(lambda pitch_name: jp_pitch_to_en_pitch[pitch_name])
|
114 |
-
# pitch_df['pitch_type'] = pitch_df['jp_pitch_name'].apply(lambda pitch_name: jp_pitch_to_pitch_code[pitch_name])
|
115 |
-
# pitch_df['description'] = pitch_df['description'].apply(lambda item: item.split()[0] if len(item.split()) > 1 else item)
|
116 |
-
# pitch_df['description'] = pitch_df['description'].apply(translate_pitch_outcome)
|
117 |
-
# pitch_df['release_speed'] = pitch_df['release_speed'].replace('-', np.nan)
|
118 |
-
# pitch_df.loc[~pitch_df['release_speed'].isna(), 'release_speed'] = pitch_df.loc[~pitch_df['release_speed'].isna(), 'release_speed'].str.removesuffix('km/h').astype(int)
|
119 |
-
# pitch_df['plate_x'] = (pitch_df['plate_x'] + 13) - 80
|
120 |
-
# pitch_df['plate_z'] = 200 - (pitch_df['plate_z'] + 13) - 100
|
121 |
pitch_df = (
|
122 |
pitch_df
|
123 |
.filter(pl.col('pitch_name').is_not_null())
|
@@ -147,31 +122,6 @@ pitch_df = (
|
|
147 |
)
|
148 |
|
149 |
# translate player data
|
150 |
-
# client = Client("Ramos-Ramos/npb_name_translator")
|
151 |
-
# # en_names = client.predict(
|
152 |
-
# # jp_names='\n'.join(player_df.name.tolist()),
|
153 |
-
# # api_name="/predict"
|
154 |
-
# # )
|
155 |
-
# # player_df['jp_name'] = player_df['name']
|
156 |
-
# # player_df['name'] = [name if name != 'nan' else np.nan for name in en_names.splitlines()]
|
157 |
-
# en_names = client.predict(
|
158 |
-
# jp_names='\n'.join(player_df['name'].to_list()),
|
159 |
-
# api_name="/predict"
|
160 |
-
# )
|
161 |
-
# player_df = (
|
162 |
-
# player_df
|
163 |
-
# .with_columns(
|
164 |
-
# pl.col('name').alias('jp_name'),
|
165 |
-
# pl.Series('name', en_names.splitlines())
|
166 |
-
# )
|
167 |
-
# .with_columns(
|
168 |
-
# pl.when(pl.col('name') == 'nan')
|
169 |
-
# .then(None)
|
170 |
-
# .otherwise(pl.col('name'))
|
171 |
-
# .alias('name')
|
172 |
-
# )
|
173 |
-
# )
|
174 |
-
|
175 |
player_df = pl.read_csv('player.csv')
|
176 |
register = (
|
177 |
pl.read_csv('register.csv')
|
@@ -187,13 +137,7 @@ register = (
|
|
187 |
)
|
188 |
player_df = player_df.join(register, on=['name', 'team'], how='inner').with_columns(pl.col('en_name').alias('name')).drop(pl.col('en_name'))
|
189 |
|
190 |
-
#
|
191 |
-
# df = pd.merge(pitch_df, pa_df, 'inner', on=['game_pk', 'pa_pk'])
|
192 |
-
# df = pd.merge(df, player_df.rename(columns={'player_id': 'pitcher'}), 'inner', on='pitcher')
|
193 |
-
# df['whiff'] = df['description'].isin(['SS', 'K'])
|
194 |
-
# df['swing'] = ~df['description'].isin(['B', 'BB', 'LS', 'inv_K', 'bunt_K', 'HBP', 'SH', 'SH E', 'SH FC', 'obstruction', 'illegal_pitch', 'defensive_interference'])
|
195 |
-
# df['csw'] = df['description'].isin(['SS', 'K', 'LS', 'inv_K'])
|
196 |
-
# df['normal_pitch'] = ~df['description'].isin(['obstruction', 'illegal_pitch', 'defensive_interference']) # guess
|
197 |
|
198 |
df = (
|
199 |
(
|
@@ -207,15 +151,7 @@ df = (
|
|
207 |
pl.col('description').is_in(['SS', 'K', 'LS', 'inv_K']).alias('csw'),
|
208 |
~pl.col('description').is_in(['obstruction', 'illegal_pitch', 'defensive_interference']).alias('normal_pitch') # guess
|
209 |
)
|
210 |
-
)
|
211 |
-
|
212 |
-
# df_by_player_pitch = df.groupby(['name', 'pitch_name'])
|
213 |
-
# whiff_rate = (df_by_player_pitch['whiff'].sum() / df_by_player_pitch['swing'].sum() * 100).round(1).rename('Whiff%')
|
214 |
-
# csw_rate = (df_by_player_pitch['csw'].sum() / df_by_player_pitch['normal_pitch'].sum() * 100).round(1).rename('CSW%')
|
215 |
-
# velo = df_by_player_pitch['release_speed'].apply(lambda x: round(x.mean(), 1)).rename('Velocity')
|
216 |
-
|
217 |
-
# pitch_stats = pd.concat([whiff_rate, csw_rate, velo], axis=1)
|
218 |
-
# league_pitch_stats = pd.DataFrame(df.groupby('pitch_name')['release_speed'].apply(lambda x: round(x.mean(), 1)).rename('Velocity'))
|
219 |
|
220 |
pitch_stats, rhb_pitch_stats, lhb_pitch_stats = [
|
221 |
(
|
|
|
15 |
)
|
16 |
|
17 |
# load game data
|
|
|
18 |
game_df = pl.read_csv('game.csv').unique()
|
19 |
assert len(game_df) == len(game_df['game_pk'].unique())
|
20 |
|
21 |
# load pa data
|
22 |
pa_df = []
|
|
|
|
|
|
|
23 |
for game_pk in tqdm(game_df['game_pk']):
|
24 |
pa_df.append(pl.read_csv(os.path.join('pa', f'{game_pk}.csv'), schema_overrides={'pa_pk': str}))
|
25 |
pa_df = pl.concat(pa_df)
|
26 |
|
27 |
# load pitch data
|
28 |
pitch_df = []
|
|
|
|
|
|
|
29 |
for game_pk in tqdm(game_df['game_pk']):
|
30 |
pitch_df.append(pl.read_csv(os.path.join('pitch', f'{game_pk}.csv'), schema_overrides={'pa_pk': str, 'on_1b': pl.Int64, 'on_2b': pl.Int64, 'on_3b': pl.Int64}))
|
31 |
pitch_df = pl.concat(pitch_df)
|
|
|
49 |
else:
|
50 |
raise Exception(f'Unexpect hit_type {hit_type}')
|
51 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
52 |
pa_df = (
|
53 |
pa_df
|
54 |
.with_columns(
|
|
|
93 |
)
|
94 |
|
95 |
# translate pitch data
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
96 |
pitch_df = (
|
97 |
pitch_df
|
98 |
.filter(pl.col('pitch_name').is_not_null())
|
|
|
122 |
)
|
123 |
|
124 |
# translate player data
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
125 |
player_df = pl.read_csv('player.csv')
|
126 |
register = (
|
127 |
pl.read_csv('register.csv')
|
|
|
137 |
)
|
138 |
player_df = player_df.join(register, on=['name', 'team'], how='inner').with_columns(pl.col('en_name').alias('name')).drop(pl.col('en_name'))
|
139 |
|
140 |
+
# merge pitch and pa data
|
|
|
|
|
|
|
|
|
|
|
|
|
141 |
|
142 |
df = (
|
143 |
(
|
|
|
151 |
pl.col('description').is_in(['SS', 'K', 'LS', 'inv_K']).alias('csw'),
|
152 |
~pl.col('description').is_in(['obstruction', 'illegal_pitch', 'defensive_interference']).alias('normal_pitch') # guess
|
153 |
)
|
154 |
+
).sort(['game_pk', 'pa_pk', 'pitch_id'])
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
155 |
|
156 |
pitch_stats, rhb_pitch_stats, lhb_pitch_stats = [
|
157 |
(
|
demo.py
CHANGED
@@ -19,7 +19,7 @@ css = '''
|
|
19 |
.pitch-velo {height: 100px}
|
20 |
.pitch-velo .js-plotly-plot {height: 100%}
|
21 |
|
22 |
-
.pitch-loc {height:
|
23 |
.pitch-loc .js-plotly-plot {height: 100%}
|
24 |
|
25 |
.pitch-velo-summary div.plotly-notifier {visibility: hidden}
|
@@ -33,6 +33,7 @@ with gr.Blocks(
|
|
33 |
[Data from SportsNavi](https://sports.yahoo.co.jp/)
|
34 |
''')
|
35 |
|
|
|
36 |
app_df = gr.State(df)
|
37 |
app_league_df = gr.State(df)
|
38 |
app_pitch_stats = gr.State(pitch_stats)
|
@@ -58,6 +59,10 @@ with gr.Blocks(
|
|
58 |
gr.Markdown('''
|
59 |
## Pitch Locations
|
60 |
Pitcher's persective
|
|
|
|
|
|
|
|
|
61 |
''')
|
62 |
pitch_rows = []
|
63 |
pitch_groups = []
|
@@ -86,10 +91,10 @@ with gr.Blocks(
|
|
86 |
|
87 |
(
|
88 |
player
|
89 |
-
.input(update_dfs, inputs=[player, handedness], outputs=[app_df, app_pitch_stats, app_league_pitch_stats])
|
90 |
.then(lambda : gr.update(value='Both', interactive=True), outputs=handedness)
|
91 |
)
|
92 |
-
handedness.input(update_dfs, inputs=[player, handedness], outputs=[app_df, app_pitch_stats, app_league_pitch_stats])
|
93 |
|
94 |
# app_df.change(preview_df, inputs=app_df, outputs=preview)
|
95 |
# app_df.change(set_download_file, inputs=app_df, outputs=download_file)
|
@@ -105,7 +110,7 @@ with gr.Blocks(
|
|
105 |
.then(plot_usage, inputs=[app_df, player], outputs=usage)
|
106 |
.then(plot_velo_summary, inputs=[app_df, app_league_df, player], outputs=velo_summary)
|
107 |
.then(lambda df: plot_loc(df), inputs=app_df, outputs=loc_summary)
|
108 |
-
.then(plot_pitch_cards, inputs=[app_df, app_pitch_stats], outputs=pitch_rows+pitch_groups+pitch_names+pitch_infos+pitch_velos+pitch_locs)
|
109 |
)
|
110 |
|
111 |
gr.Markdown('## Bugs and other notes')
|
|
|
19 |
.pitch-velo {height: 100px}
|
20 |
.pitch-velo .js-plotly-plot {height: 100%}
|
21 |
|
22 |
+
.pitch-loc {height: 320px}
|
23 |
.pitch-loc .js-plotly-plot {height: 100%}
|
24 |
|
25 |
.pitch-velo-summary div.plotly-notifier {visibility: hidden}
|
|
|
33 |
[Data from SportsNavi](https://sports.yahoo.co.jp/)
|
34 |
''')
|
35 |
|
36 |
+
source_df = gr.State(df)
|
37 |
app_df = gr.State(df)
|
38 |
app_league_df = gr.State(df)
|
39 |
app_pitch_stats = gr.State(pitch_stats)
|
|
|
59 |
gr.Markdown('''
|
60 |
## Pitch Locations
|
61 |
Pitcher's persective
|
62 |
+
<br>
|
63 |
+
`NPB` refers to the top 10% of pitches thrown across the league with the current search constraints e.g. handedness
|
64 |
+
<br>
|
65 |
+
Note: To speed up the KDE, we restrict the league-wide pitches to 5,000 pitches
|
66 |
''')
|
67 |
pitch_rows = []
|
68 |
pitch_groups = []
|
|
|
91 |
|
92 |
(
|
93 |
player
|
94 |
+
.input(update_dfs, inputs=[player, handedness, source_df], outputs=[app_df, app_league_df, app_pitch_stats, app_league_pitch_stats])
|
95 |
.then(lambda : gr.update(value='Both', interactive=True), outputs=handedness)
|
96 |
)
|
97 |
+
handedness.input(update_dfs, inputs=[player, handedness, source_df], outputs=[app_df, app_league_df, app_pitch_stats, app_league_pitch_stats])
|
98 |
|
99 |
# app_df.change(preview_df, inputs=app_df, outputs=preview)
|
100 |
# app_df.change(set_download_file, inputs=app_df, outputs=download_file)
|
|
|
110 |
.then(plot_usage, inputs=[app_df, player], outputs=usage)
|
111 |
.then(plot_velo_summary, inputs=[app_df, app_league_df, player], outputs=velo_summary)
|
112 |
.then(lambda df: plot_loc(df), inputs=app_df, outputs=loc_summary)
|
113 |
+
.then(plot_pitch_cards, inputs=[app_df, app_league_df, app_pitch_stats], outputs=pitch_rows+pitch_groups+pitch_names+pitch_infos+pitch_velos+pitch_locs)
|
114 |
)
|
115 |
|
116 |
gr.Markdown('## Bugs and other notes')
|
gradio_function.py
CHANGED
@@ -4,7 +4,6 @@ import plotly.graph_objects as go
|
|
4 |
import plotly.colors as pc
|
5 |
from scipy.stats import gaussian_kde
|
6 |
import numpy as np
|
7 |
-
# import pandas as pd
|
8 |
import polars as pl
|
9 |
import gradio as gr
|
10 |
|
@@ -21,6 +20,8 @@ from data import (
|
|
21 |
MAX_LOCS = len(jp_pitch_to_en_pitch)
|
22 |
LOCS_PER_ROW = 4
|
23 |
MAX_ROWS = ceil(MAX_LOCS/LOCS_PER_ROW)
|
|
|
|
|
24 |
|
25 |
# GRADIO FUNCTIONS
|
26 |
|
@@ -75,7 +76,7 @@ colorscale = [
|
|
75 |
|
76 |
|
77 |
@clone_df
|
78 |
-
def plot_loc(df, min_pitches=3):
|
79 |
|
80 |
loc = df.select(['plate_x', 'plate_z'])
|
81 |
|
@@ -111,12 +112,48 @@ def plot_loc(df, min_pitches=3):
|
|
111 |
fig.add_annotation(
|
112 |
x=0,
|
113 |
y=0,
|
114 |
-
text=
|
115 |
showarrow=False
|
116 |
)
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
117 |
fig.update_layout(
|
118 |
xaxis=dict(range=[-plot_s/2, plot_s/2+1], showticklabels=False),
|
119 |
yaxis=dict(range=[-plot_s/2, plot_s/2+1], scaleanchor='x', scaleratio=1, showticklabels=False),
|
|
|
120 |
# width=384,
|
121 |
# height=384
|
122 |
)
|
@@ -145,7 +182,7 @@ def plot_velo(df=None, player=None, velos=None, pitch_type=None, pitch_name=None
|
|
145 |
fig.add_annotation(
|
146 |
x=(170+125)/2,
|
147 |
y=0.3/2,
|
148 |
-
text=
|
149 |
showarrow=False,
|
150 |
)
|
151 |
x_range = [125, 170]
|
@@ -223,7 +260,7 @@ def plot_velo_summary(df, league_df, player):
|
|
223 |
fig.add_trace(go.Scatter(
|
224 |
x=[velo_center],
|
225 |
y=[pitch_name],
|
226 |
-
text=[
|
227 |
textposition='top center',
|
228 |
hovertext=False,
|
229 |
mode="lines+text",
|
@@ -271,7 +308,7 @@ def plot_velo_summary(df, league_df, player):
|
|
271 |
return fig
|
272 |
|
273 |
|
274 |
-
def update_dfs(player, handedness):
|
275 |
if handedness == 'Both':
|
276 |
handedness_filter = pl.col('stand').is_in(['R', 'L'])
|
277 |
_pitch_stats = pitch_stats
|
@@ -286,7 +323,7 @@ def update_dfs(player, handedness):
|
|
286 |
_league_pitch_stats = lhb_league_pitch_stats
|
287 |
player_filter = pl.col('name') == player
|
288 |
final_filter = player_filter & handedness_filter
|
289 |
-
return df.filter(final_filter), _pitch_stats.filter(player_filter), _league_pitch_stats,
|
290 |
|
291 |
def set_download_file(df):
|
292 |
file_path = 'files/npb.csv'
|
@@ -303,7 +340,7 @@ def plot_usage(df, player):
|
|
303 |
return fig
|
304 |
|
305 |
@clone_df
|
306 |
-
def plot_pitch_cards(df, pitch_stats):
|
307 |
pitch_counts = df['pitch_name'].value_counts().sort('count', descending=True)
|
308 |
|
309 |
pitch_rows = []
|
@@ -331,7 +368,10 @@ def plot_pitch_cards(df, pitch_stats):
|
|
331 |
visible=True
|
332 |
))
|
333 |
pitch_locs.append(gr.update(
|
334 |
-
value=plot_loc(
|
|
|
|
|
|
|
335 |
label='Pitch location',
|
336 |
visible=True
|
337 |
))
|
@@ -358,91 +398,3 @@ def update_velo_stats(pitch_stats, league_pitch_stats):
|
|
358 |
.sort('Count', descending=True)
|
359 |
.drop('Count')
|
360 |
)
|
361 |
-
|
362 |
-
# def get_data(player, handedness):
|
363 |
-
# player_name = f'# {player}'
|
364 |
-
|
365 |
-
# # _df = df.set_index('name').sort_index().loc[player]
|
366 |
-
# # _df.to_csv(f'files/npb.csv', index=False)
|
367 |
-
# # _df_by_pitch_name = _df.set_index('pitch_name').sort_index()
|
368 |
-
# _df = df.filter(pl.col('name') == player)
|
369 |
-
# league_df = df
|
370 |
-
# _pitch_stats = pitch_stats
|
371 |
-
# _league_pitch_stats = league_pitch_stats
|
372 |
-
# if handedness == 'Right':
|
373 |
-
# _df = _df.filter(pl.col('stand') == 'R')
|
374 |
-
# league_df = league_df.filter(pl.col('stand') == 'R')
|
375 |
-
# _pitch_stats = rhb_pitch_stats
|
376 |
-
# _league_pitch_stats = rhb_league_pitch_stats
|
377 |
-
# elif handedness == 'Left':
|
378 |
-
# _df = _df.filter(pl.col('stand') == 'L')
|
379 |
-
# league_df = league_df.filter(pl.col('stand') == 'L')
|
380 |
-
# _pitch_stats = lhb_pitch_stats
|
381 |
-
# _league_pitch_stats = lhb_league_pitch_stats
|
382 |
-
|
383 |
-
# handedness = gr.update(value=handedness, interactive=True)
|
384 |
-
|
385 |
-
# # usage_fig = px.pie(_df['pitch_name'], names='pitch_name')
|
386 |
-
# usage_fig = px.pie(_df.select('pitch_name'), names='pitch_name')
|
387 |
-
|
388 |
-
# usage_fig.update_traces(texttemplate='%{percent:.1%}', hovertemplate=f'<b>{player}</b><br>' + 'threw a <b>%{label}</b><br><b>%{percent:.1%}</b> of the time (<b>%{value}</b> pitches)')
|
389 |
-
|
390 |
-
# pitch_counts = _df['pitch_name'].value_counts().sort('count', descending=True)
|
391 |
-
|
392 |
-
# # pitch_velo_summary = plot_all_pitch_velo(player=player, player_df=_df_by_pitch_name, pitch_counts=pitch_counts.sort_values(ascending=True))
|
393 |
-
# pitch_velo_summary = plot_all_pitch_velo(df=league_df, player=player, player_df=_df.filter(pl.col('release_speed').is_not_null()), pitch_counts=pitch_counts.sort('count', descending=False))
|
394 |
-
# pitch_loc_summary = plot_pitch_map(df=_df, player=player, all_pitches=True)
|
395 |
-
|
396 |
-
# pitch_groups = []
|
397 |
-
# pitch_names = []
|
398 |
-
# pitch_infos = []
|
399 |
-
# pitch_velos = []
|
400 |
-
# pitch_maps = []
|
401 |
-
|
402 |
-
# # for pitch_name, count in pitch_counts.items():
|
403 |
-
# for pitch_name, count in pitch_counts.iter_rows():
|
404 |
-
# pitch_groups.append(gr.update(visible=True))
|
405 |
-
# pitch_names.append(gr.update(value=f'### {pitch_name}', visible=True))
|
406 |
-
# pitch_infos.append(gr.update(
|
407 |
-
# # value=pd.DataFrame([{
|
408 |
-
# # 'Whiff%': pitch_stats.loc[(player, pitch_name), 'Whiff%'].item(),
|
409 |
-
# # 'CSW%': pitch_stats.loc[(player, pitch_name), 'CSW%'].item()
|
410 |
-
# # }]),
|
411 |
-
# value=_pitch_stats.filter((pl.col('name') == player) & (pl.col('pitch_name') == pitch_name)).select(['Whiff%', 'CSW%']),
|
412 |
-
# visible=True
|
413 |
-
# ))
|
414 |
-
|
415 |
-
# pitch_velos.append(gr.update(
|
416 |
-
# # value=plot_pitch_velo(velos=_df_by_pitch_name.loc[pitch_name, 'release_speed']),
|
417 |
-
# value=plot_pitch_velo(velos=_df.filter((pl.col('pitch_name') == pitch_name) & (pl.col('release_speed').is_not_null()))['release_speed']),
|
418 |
-
# visible=True
|
419 |
-
# ))
|
420 |
-
# pitch_maps.append(gr.update(
|
421 |
-
# value=plot_pitch_map(df=_df, player=player, pitch_name=pitch_name),
|
422 |
-
# label='Pitch location',
|
423 |
-
# visible=True
|
424 |
-
# ))
|
425 |
-
|
426 |
-
# for _ in range(max_pitch_types - len(pitch_names)):
|
427 |
-
# pitch_groups.append(gr.update(visible=False))
|
428 |
-
# pitch_names.append(gr.update(value=None, visible=False))
|
429 |
-
# pitch_infos.append(gr.update(value=None, visible=False))
|
430 |
-
# for _ in range(max_pitch_types - len(pitch_maps)):
|
431 |
-
# pitch_velos.append(gr.update(value=None, visible=False))
|
432 |
-
# pitch_maps.append(gr.update(value=None, visible=False))
|
433 |
-
|
434 |
-
# # velo_stats = pd.concat([pitch_stats.loc[player, 'Velocity'].rename('Avg. Velo'), league_pitch_stats['Velocity'].rename('League Avg. Velo')], join='inner', axis=1).rename_axis(['Pitch']).reset_index()
|
435 |
-
# velo_stats = (
|
436 |
-
# _pitch_stats
|
437 |
-
# .filter(pl.col('name') == player)
|
438 |
-
# .select(pl.col('pitch_name').alias('Pitch'), pl.col('Velocity').alias('Avg. Velo'), pl.col('Count'))
|
439 |
-
# .join(
|
440 |
-
# _league_pitch_stats.select(pl.col('pitch_name').alias('Pitch'), pl.col('Velocity').alias('League Avg. Velo')),
|
441 |
-
# on='Pitch',
|
442 |
-
# how='inner'
|
443 |
-
# )
|
444 |
-
# .sort('Count', descending=True)
|
445 |
-
# .drop('Count')
|
446 |
-
# )
|
447 |
-
|
448 |
-
# return player_name, handedness, 'files/npb.csv', usage_fig, pitch_velo_summary, pitch_loc_summary, *pitch_groups, *pitch_names, *pitch_infos, *pitch_velos, *pitch_maps, velo_stats
|
|
|
4 |
import plotly.colors as pc
|
5 |
from scipy.stats import gaussian_kde
|
6 |
import numpy as np
|
|
|
7 |
import polars as pl
|
8 |
import gradio as gr
|
9 |
|
|
|
20 |
MAX_LOCS = len(jp_pitch_to_en_pitch)
|
21 |
LOCS_PER_ROW = 4
|
22 |
MAX_ROWS = ceil(MAX_LOCS/LOCS_PER_ROW)
|
23 |
+
INSUFFICIENT_PITCHES_MSG = 'No visualization: Not enough pitches thrown'
|
24 |
+
INSUFFICIENT_PITCHES_MSG_MULTI_LINE = 'No visualization:<br>Not enough pitches thrown'
|
25 |
|
26 |
# GRADIO FUNCTIONS
|
27 |
|
|
|
76 |
|
77 |
|
78 |
@clone_df
|
79 |
+
def plot_loc(df, league_df=None, min_pitches=3, max_pitches=5000):
|
80 |
|
81 |
loc = df.select(['plate_x', 'plate_z'])
|
82 |
|
|
|
112 |
fig.add_annotation(
|
113 |
x=0,
|
114 |
y=0,
|
115 |
+
text=INSUFFICIENT_PITCHES_MSG_MULTI_LINE,
|
116 |
showarrow=False
|
117 |
)
|
118 |
+
|
119 |
+
if league_df is not None:
|
120 |
+
league_loc = league_df.select(pl.col('plate_x', 'plate_z'))
|
121 |
+
if len(league_loc) > max_pitches:
|
122 |
+
league_loc = league_loc.sample(max_pitches, seed=0)
|
123 |
+
|
124 |
+
if len(league_loc) >= min_pitches:
|
125 |
+
league_Z = fit_pred_kde(league_loc.to_numpy().T, X, Y)
|
126 |
+
percentile = np.quantile(league_Z, 0.9)
|
127 |
+
fig.add_trace(go.Contour(
|
128 |
+
z=league_Z,
|
129 |
+
x=kde_range,
|
130 |
+
y=kde_range,
|
131 |
+
colorscale=[
|
132 |
+
[0, 'rgba(0, 0, 0, 0)'],
|
133 |
+
[1, 'rgba(0, 0, 0, 0)']
|
134 |
+
],
|
135 |
+
zmin=percentile,
|
136 |
+
zmax=league_Z.max(),
|
137 |
+
contours={
|
138 |
+
'start': percentile,
|
139 |
+
'end': league_Z.max(),
|
140 |
+
'size': league_Z.max() - percentile,
|
141 |
+
# 'coloring': 'heatmap'
|
142 |
+
},
|
143 |
+
line={
|
144 |
+
'width': 2,
|
145 |
+
'color': 'black',
|
146 |
+
'dash': 'dash'
|
147 |
+
},
|
148 |
+
showlegend=True,
|
149 |
+
showscale=False,
|
150 |
+
name='NPB'
|
151 |
+
))
|
152 |
+
|
153 |
fig.update_layout(
|
154 |
xaxis=dict(range=[-plot_s/2, plot_s/2+1], showticklabels=False),
|
155 |
yaxis=dict(range=[-plot_s/2, plot_s/2+1], scaleanchor='x', scaleratio=1, showticklabels=False),
|
156 |
+
legend=dict(orientation='h', y=0, yanchor='top'),
|
157 |
# width=384,
|
158 |
# height=384
|
159 |
)
|
|
|
182 |
fig.add_annotation(
|
183 |
x=(170+125)/2,
|
184 |
y=0.3/2,
|
185 |
+
text=INSUFFICIENT_PITCHES_MSG_MULTI_LINE,
|
186 |
showarrow=False,
|
187 |
)
|
188 |
x_range = [125, 170]
|
|
|
260 |
fig.add_trace(go.Scatter(
|
261 |
x=[velo_center],
|
262 |
y=[pitch_name],
|
263 |
+
text=[INSUFFICIENT_PITCHES_MSG],
|
264 |
textposition='top center',
|
265 |
hovertext=False,
|
266 |
mode="lines+text",
|
|
|
308 |
return fig
|
309 |
|
310 |
|
311 |
+
def update_dfs(player, handedness, df):
|
312 |
if handedness == 'Both':
|
313 |
handedness_filter = pl.col('stand').is_in(['R', 'L'])
|
314 |
_pitch_stats = pitch_stats
|
|
|
323 |
_league_pitch_stats = lhb_league_pitch_stats
|
324 |
player_filter = pl.col('name') == player
|
325 |
final_filter = player_filter & handedness_filter
|
326 |
+
return df.filter(final_filter), df.filter(handedness_filter), _pitch_stats.filter(player_filter), _league_pitch_stats,
|
327 |
|
328 |
def set_download_file(df):
|
329 |
file_path = 'files/npb.csv'
|
|
|
340 |
return fig
|
341 |
|
342 |
@clone_df
|
343 |
+
def plot_pitch_cards(df, league_df, pitch_stats):
|
344 |
pitch_counts = df['pitch_name'].value_counts().sort('count', descending=True)
|
345 |
|
346 |
pitch_rows = []
|
|
|
368 |
visible=True
|
369 |
))
|
370 |
pitch_locs.append(gr.update(
|
371 |
+
value=plot_loc(
|
372 |
+
df=df.filter(pl.col('pitch_name') == pitch_name),
|
373 |
+
league_df=league_df.filter(pl.col('pitch_name') == pitch_name)
|
374 |
+
),
|
375 |
label='Pitch location',
|
376 |
visible=True
|
377 |
))
|
|
|
398 |
.sort('Count', descending=True)
|
399 |
.drop('Count')
|
400 |
)
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|