Spaces:
Running
Running
Commit
·
0b50ce4
1
Parent(s):
48dfbf7
Add pitcher leaderboard
Browse files- app.py +4 -0
- convert.py +5 -0
- pitch_leaderboard.py +6 -25
- pitcher_leaderboard.py +158 -0
- plotting.py +1 -1
- stats.py +49 -13
app.py
CHANGED
|
@@ -3,6 +3,7 @@ import matplotlib as mpl
|
|
| 3 |
|
| 4 |
from data import data_df
|
| 5 |
from pitcher_overview import create_pitcher_overview
|
|
|
|
| 6 |
from pitch_leaderboard import create_pitch_leaderboard
|
| 7 |
from daily_weekly_leaderboard import create_daily_weekly_leaderboard_app
|
| 8 |
from css import css
|
|
@@ -15,6 +16,7 @@ with open('updated.txt') as f:
|
|
| 15 |
|
| 16 |
limitations = '''**General Limitations**
|
| 17 |
- As new players make their debut, some names may be missing or not translated/transliterated correctly.
|
|
|
|
| 18 |
'''
|
| 19 |
|
| 20 |
with open('acknowledgements.md', 'r') as f:
|
|
@@ -24,6 +26,8 @@ if __name__ == '__main__':
|
|
| 24 |
with gr.Blocks(css=css) as app:
|
| 25 |
with gr.Tab('Pitcher Overview'):
|
| 26 |
create_pitcher_overview(data_df)
|
|
|
|
|
|
|
| 27 |
with gr.Tab('Pitch Leaderboard'):
|
| 28 |
create_pitch_leaderboard()
|
| 29 |
with gr.Tab('Daily/Weekly Leaderboard'):
|
|
|
|
| 3 |
|
| 4 |
from data import data_df
|
| 5 |
from pitcher_overview import create_pitcher_overview
|
| 6 |
+
from pitcher_leaderboard import create_pitcher_leaderboard
|
| 7 |
from pitch_leaderboard import create_pitch_leaderboard
|
| 8 |
from daily_weekly_leaderboard import create_daily_weekly_leaderboard_app
|
| 9 |
from css import css
|
|
|
|
| 16 |
|
| 17 |
limitations = '''**General Limitations**
|
| 18 |
- As new players make their debut, some names may be missing or not translated/transliterated correctly.
|
| 19 |
+
- IP is overestimated
|
| 20 |
'''
|
| 21 |
|
| 22 |
with open('acknowledgements.md', 'r') as f:
|
|
|
|
| 26 |
with gr.Blocks(css=css) as app:
|
| 27 |
with gr.Tab('Pitcher Overview'):
|
| 28 |
create_pitcher_overview(data_df)
|
| 29 |
+
with gr.Tab('Pitcher Leaderboard'):
|
| 30 |
+
create_pitcher_leaderboard()
|
| 31 |
with gr.Tab('Pitch Leaderboard'):
|
| 32 |
create_pitch_leaderboard()
|
| 33 |
with gr.Tab('Daily/Weekly Leaderboard'):
|
convert.py
CHANGED
|
@@ -161,6 +161,11 @@ presult = {
|
|
| 161 |
141: 'Unknown'
|
| 162 |
}
|
| 163 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 164 |
bresult = {
|
| 165 |
0: '空振り三振',
|
| 166 |
1: '単打', # 1b gb p
|
|
|
|
| 161 |
141: 'Unknown'
|
| 162 |
}
|
| 163 |
|
| 164 |
+
def verify_and_return_presult(presults):
|
| 165 |
+
for _presult in presults:
|
| 166 |
+
assert _presult in presult.values(), f'{_presult} is invalid'
|
| 167 |
+
return presults
|
| 168 |
+
|
| 169 |
bresult = {
|
| 170 |
0: '空振り三振',
|
| 171 |
1: '単打', # 1b gb p
|
pitch_leaderboard.py
CHANGED
|
@@ -32,6 +32,9 @@ TEAMS = [
|
|
| 32 |
]
|
| 33 |
notes = '''**Limitations**
|
| 34 |
- [Column widths get messed up when filtering](https://github.com/gradio-app/gradio/issues/11564)
|
|
|
|
|
|
|
|
|
|
| 35 |
'''
|
| 36 |
|
| 37 |
|
|
@@ -44,29 +47,8 @@ def gr_create_pitch_leaderboard(start_date, end_date, min_pitches, pitcher_lr='B
|
|
| 44 |
if pitcher_lr != 'Both':
|
| 45 |
data = data.filter(pl.col('batLR') == pitcher_lr[0].lower())
|
| 46 |
|
| 47 |
-
# both, left, right = [
|
| 48 |
-
# (
|
| 49 |
-
# compute_pitch_stats(df, player_type='pitcher', min_pitches=min_pitches, pitch_class_type='specific')
|
| 50 |
-
# .filter(pl.col('qualified') & (pl.col('ballKind').is_in(include_pitches)))
|
| 51 |
-
# .drop('qualified')
|
| 52 |
-
# .rename({'pitcher_name': 'Pitcher', 'count': 'Count', 'usage': 'Usage', 'ballKind': 'Pitch', 'general_ballKind': 'Pitch (General)'} | {f'{stat}_pctl': f'{stat} (Pctl)' for stat in STATS_WITH_PCTLS})
|
| 53 |
-
# .with_columns(
|
| 54 |
-
# pl.col(stat).mul(100).round(1)
|
| 55 |
-
# for stat in PCT_STATS + [f'{stat} (Pctl)' for stat in STATS_WITH_PCTLS]
|
| 56 |
-
# )
|
| 57 |
-
# [['pitId', 'ballKind_code', 'Pitcher', 'Pitch', 'Pitch (General)', 'Count', 'Usage'] + STATS_WITH_PCTLS]
|
| 58 |
-
# )
|
| 59 |
-
# for df
|
| 60 |
-
# in [data, data.filter(pl.col('batLR') == 'l'), data.filter(pl.col('batLR') == 'r')]
|
| 61 |
-
# ]
|
| 62 |
-
# pitch_stats = (
|
| 63 |
-
# both
|
| 64 |
-
# .join(left, on=['pitId', 'ballKind_code'], suffix=' (LHH)', how='full')
|
| 65 |
-
# .join(right, on=['pitId', 'ballKind_code'], suffix=' (RHH)', how='full')
|
| 66 |
-
# .drop('pitId', 'ballKind_code', *list(chain.from_iterable([[f'{col} ({handedness}HH)' for col in ['pitId', 'ballKind_code', 'Pitcher', 'Pitch', 'Pitch (General)']] for handedness in ('L', 'R')])))
|
| 67 |
-
# )
|
| 68 |
pitch_stats = (
|
| 69 |
-
compute_pitch_stats(data, player_type='pitcher', min_pitches=min_pitches, pitch_class_type='specific')
|
| 70 |
.filter(pl.col('qualified') & (pl.col('ballKind').is_in(include_pitches)))
|
| 71 |
.drop('pitId', 'ballKind_code', 'qualified')
|
| 72 |
.rename({
|
|
@@ -85,7 +67,6 @@ def gr_create_pitch_leaderboard(start_date, end_date, min_pitches, pitcher_lr='B
|
|
| 85 |
pl.col(stat).mul(100)
|
| 86 |
for stat in PCT_STATS
|
| 87 |
)
|
| 88 |
-
# [['Pitcher', 'Team', 'Pitch', 'Pitch (General)'] + STATS + [f'{stat}_pctl' for stat in STATS_WITH_PCTLS]]
|
| 89 |
)
|
| 90 |
|
| 91 |
if include_teams is not None:
|
|
@@ -146,7 +127,7 @@ def create_pitch_leaderboard():
|
|
| 146 |
with gr.Column(scale=1):
|
| 147 |
all_pitches = gr.Button('Select/Deselect all pitches')
|
| 148 |
min_pitches = gr.Number(100, label='Min. Pitches', precision=0, minimum=0)
|
| 149 |
-
|
| 150 |
with gr.Row():
|
| 151 |
include_teams = gr.CheckboxGroup(TEAMS, value=TEAMS, label='Teams', scale=3)
|
| 152 |
all_teams = gr.Button('Select/Deselect all teams')
|
|
@@ -164,7 +145,7 @@ def create_pitch_leaderboard():
|
|
| 164 |
|
| 165 |
gr.Markdown(notes)
|
| 166 |
|
| 167 |
-
search.click(gr_create_pitch_leaderboard, inputs=[start_date, end_date, min_pitches,
|
| 168 |
all_pitches.click(lambda _pitch_types : [] if _pitch_types == PITCH_TYPES else PITCH_TYPES, inputs=include_pitches, outputs=include_pitches)
|
| 169 |
all_teams.click(lambda _teams : [] if _teams == TEAMS else TEAMS, inputs=include_teams, outputs=include_teams)
|
| 170 |
# pin_columns.input(
|
|
|
|
| 32 |
]
|
| 33 |
notes = '''**Limitations**
|
| 34 |
- [Column widths get messed up when filtering](https://github.com/gradio-app/gradio/issues/11564)
|
| 35 |
+
|
| 36 |
+
**To-do**
|
| 37 |
+
- Add post-season
|
| 38 |
'''
|
| 39 |
|
| 40 |
|
|
|
|
| 47 |
if pitcher_lr != 'Both':
|
| 48 |
data = data.filter(pl.col('batLR') == pitcher_lr[0].lower())
|
| 49 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 50 |
pitch_stats = (
|
| 51 |
+
compute_pitch_stats(data, player_type='pitcher', min_pitches=min_pitches, pitch_class_type='specific', group_by_team=True)
|
| 52 |
.filter(pl.col('qualified') & (pl.col('ballKind').is_in(include_pitches)))
|
| 53 |
.drop('pitId', 'ballKind_code', 'qualified')
|
| 54 |
.rename({
|
|
|
|
| 67 |
pl.col(stat).mul(100)
|
| 68 |
for stat in PCT_STATS
|
| 69 |
)
|
|
|
|
| 70 |
)
|
| 71 |
|
| 72 |
if include_teams is not None:
|
|
|
|
| 127 |
with gr.Column(scale=1):
|
| 128 |
all_pitches = gr.Button('Select/Deselect all pitches')
|
| 129 |
min_pitches = gr.Number(100, label='Min. Pitches', precision=0, minimum=0)
|
| 130 |
+
batter_lr = gr.Radio(['Both', 'Left', 'Right'], value='Both', label='Batter handedness')
|
| 131 |
with gr.Row():
|
| 132 |
include_teams = gr.CheckboxGroup(TEAMS, value=TEAMS, label='Teams', scale=3)
|
| 133 |
all_teams = gr.Button('Select/Deselect all teams')
|
|
|
|
| 145 |
|
| 146 |
gr.Markdown(notes)
|
| 147 |
|
| 148 |
+
search.click(gr_create_pitch_leaderboard, inputs=[start_date, end_date, min_pitches, batter_lr, include_pitches, include_teams], outputs=leaderboard)
|
| 149 |
all_pitches.click(lambda _pitch_types : [] if _pitch_types == PITCH_TYPES else PITCH_TYPES, inputs=include_pitches, outputs=include_pitches)
|
| 150 |
all_teams.click(lambda _teams : [] if _teams == TEAMS else TEAMS, inputs=include_teams, outputs=include_teams)
|
| 151 |
# pin_columns.input(
|
pitcher_leaderboard.py
ADDED
|
@@ -0,0 +1,158 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
import gradio as gr
|
| 2 |
+
import polars as pl
|
| 3 |
+
import numpy as np
|
| 4 |
+
|
| 5 |
+
from datetime import datetime
|
| 6 |
+
# from itertools import chain
|
| 7 |
+
|
| 8 |
+
from data import data_df
|
| 9 |
+
from stats import compute_player_stats, filter_data_by_date_and_game_kind
|
| 10 |
+
from convert import team_names_short_to_color, get_text_color_from_team
|
| 11 |
+
from plotting import stat_cmap
|
| 12 |
+
|
| 13 |
+
STATS = ['FB Velo', 'K%', 'BB%', 'Swing%', 'Z-Swing%', 'Chase%', 'Contact%', 'Z-Contact%', 'O-Contact%', 'SwStr%', 'Whiff%', 'CSW%', 'GB%', 'FB%', 'LD%', 'Zone%', 'Arm%', 'Glove%', 'High%', 'Low%', 'MM%']
|
| 14 |
+
PCT_STATS = ['K%', 'BB%', 'Swing%', 'Z-Swing%', 'Chase%', 'Contact%', 'Z-Contact%', 'O-Contact%', 'SwStr%', 'Whiff%', 'CSW%', 'GB%', 'FB%', 'LD%', 'Zone%', 'Arm%', 'Glove%', 'High%', 'Low%', 'MM%']
|
| 15 |
+
STATS_WITH_PCTLS = ['FB Velo', 'K%', 'BB%', 'Swing%', 'Z-Swing%', 'Chase%', 'Contact%', 'Z-Contact%', 'O-Contact%', 'SwStr%', 'Whiff%', 'CSW%', 'GB%', 'FB%', 'LD%', 'Zone%']
|
| 16 |
+
COLUMNS = ['Pitcher', 'Team', 'IP', 'TBF'] + STATS
|
| 17 |
+
|
| 18 |
+
TEAMS = [
|
| 19 |
+
'Yomiuri',
|
| 20 |
+
'Yakult',
|
| 21 |
+
'DeNA',
|
| 22 |
+
'Chunichi',
|
| 23 |
+
'Hanshin',
|
| 24 |
+
'Hiroshima',
|
| 25 |
+
'Nipponham',
|
| 26 |
+
'Rakuten',
|
| 27 |
+
'Seibu',
|
| 28 |
+
'Lotte',
|
| 29 |
+
'ORIX',
|
| 30 |
+
'SoftBank'
|
| 31 |
+
]
|
| 32 |
+
notes = '''**Limitations**
|
| 33 |
+
- [Column widths get messed up when filtering](https://github.com/gradio-app/gradio/issues/11564)
|
| 34 |
+
|
| 35 |
+
**To-do**
|
| 36 |
+
- Fix IP calculation
|
| 37 |
+
- Add post-season
|
| 38 |
+
'''
|
| 39 |
+
|
| 40 |
+
|
| 41 |
+
def gr_create_pitcher_leaderboard(start_date, end_date, min_ip, pitcher_lr='Both', include_teams=None):
|
| 42 |
+
assert pitcher_lr in ['Both', 'Left', 'Right']
|
| 43 |
+
|
| 44 |
+
data = data_df.filter(pl.col('ballKind_code') != '-')
|
| 45 |
+
|
| 46 |
+
data = filter_data_by_date_and_game_kind(data, start_date=start_date, end_date=end_date, game_kind='Regular Season')
|
| 47 |
+
if pitcher_lr != 'Both':
|
| 48 |
+
data = data.filter(pl.col('batLR') == pitcher_lr[0].lower())
|
| 49 |
+
|
| 50 |
+
pitcher_stats = (
|
| 51 |
+
compute_player_stats(data, player_type='pitcher', min_ip=min_ip, group_by_team=True)
|
| 52 |
+
.filter(pl.col('qualified'))
|
| 53 |
+
.drop('pitId', 'qualified')
|
| 54 |
+
.rename({
|
| 55 |
+
'pitcher_name': 'Pitcher',
|
| 56 |
+
'pitcher_team_name_short': 'Team',
|
| 57 |
+
'PA': 'TBF'
|
| 58 |
+
})
|
| 59 |
+
.with_columns(
|
| 60 |
+
pl.col(stat).mul(100)
|
| 61 |
+
for stat in PCT_STATS
|
| 62 |
+
)
|
| 63 |
+
)
|
| 64 |
+
|
| 65 |
+
if include_teams is not None:
|
| 66 |
+
pitcher_stats = pitcher_stats.filter(pl.col('Team').is_in(include_teams))
|
| 67 |
+
|
| 68 |
+
styling = []
|
| 69 |
+
for i, row in enumerate(pitcher_stats[COLUMNS].iter_rows()):
|
| 70 |
+
styling_row = []
|
| 71 |
+
for col, item in zip(pitcher_stats[COLUMNS].columns, row):
|
| 72 |
+
_styling = 'font-size: 0.75em; '
|
| 73 |
+
if col in STATS_WITH_PCTLS:
|
| 74 |
+
r, g, b = (stat_cmap([pitcher_stats[f'{col}_pctl'][i]])[0, :3]*255).astype(np.uint8)
|
| 75 |
+
styling_row.append(f'background-color: rgba({r}, {g}, {b})')
|
| 76 |
+
elif col == 'Team':
|
| 77 |
+
styling_row.append(f'color: {get_text_color_from_team(item)}; background-color: {team_names_short_to_color[item]}')
|
| 78 |
+
else:
|
| 79 |
+
styling_row.append('')
|
| 80 |
+
styling.append(styling_row)
|
| 81 |
+
|
| 82 |
+
display_value = []
|
| 83 |
+
for row in pitcher_stats[COLUMNS].iter_rows():
|
| 84 |
+
display_value_row = []
|
| 85 |
+
for col, item in zip(COLUMNS, row):
|
| 86 |
+
if col in PCT_STATS:
|
| 87 |
+
display_value_row.append(f'{item:.1f}%')
|
| 88 |
+
elif isinstance(item, float):
|
| 89 |
+
display_value_row.append(f'{item:.1f}')
|
| 90 |
+
else:
|
| 91 |
+
display_value_row.append(item)
|
| 92 |
+
display_value.append(display_value_row)
|
| 93 |
+
|
| 94 |
+
value = {
|
| 95 |
+
'data': pitcher_stats[COLUMNS].rows(),
|
| 96 |
+
'headers': COLUMNS,
|
| 97 |
+
'metadata': {
|
| 98 |
+
'styling': styling,
|
| 99 |
+
'display_value': display_value,
|
| 100 |
+
}
|
| 101 |
+
}
|
| 102 |
+
|
| 103 |
+
return value
|
| 104 |
+
|
| 105 |
+
|
| 106 |
+
def create_pitcher_leaderboard():
|
| 107 |
+
now = datetime.now()
|
| 108 |
+
start_datetime_init = datetime(now.year, 1, 1)
|
| 109 |
+
end_datetime_init = now
|
| 110 |
+
with gr.Blocks() as app:
|
| 111 |
+
gr.Markdown('# Pitch Leaderboard')
|
| 112 |
+
with gr.Row():
|
| 113 |
+
start_date = gr.DateTime(start_datetime_init, include_time=False, type='datetime', label='Start')
|
| 114 |
+
end_date = gr.DateTime(end_datetime_init, include_time=False, type='datetime', label='End')
|
| 115 |
+
with gr.Row():
|
| 116 |
+
with gr.Group():
|
| 117 |
+
min_ip_state = gr.State('qualified')
|
| 118 |
+
min_ip = gr.Number(100, label='Min. IP', precision=0, minimum=0, interactive=False)
|
| 119 |
+
qualified = gr.Checkbox(True, label='Qualified')
|
| 120 |
+
batter_lr = gr.Radio(['Both', 'Left', 'Right'], value='Both', label='Batter handedness')
|
| 121 |
+
with gr.Row():
|
| 122 |
+
include_teams = gr.CheckboxGroup(TEAMS, value=TEAMS, label='Teams', scale=3)
|
| 123 |
+
all_teams = gr.Button('Select/Deselect all teams')
|
| 124 |
+
|
| 125 |
+
search = gr.Button('Search')
|
| 126 |
+
pin_columns = gr.Button('Pin columns')
|
| 127 |
+
leaderboard = gr.DataFrame(
|
| 128 |
+
pl.DataFrame({'Pitcher': [], 'Pitch': []}),
|
| 129 |
+
column_widths=[125, 75, 50, 50] + [max(50, 10*len(stat)) for stat in STATS],
|
| 130 |
+
show_copy_button=True,
|
| 131 |
+
show_search='filter',
|
| 132 |
+
pinned_columns=2,
|
| 133 |
+
elem_id='leaderboard'
|
| 134 |
+
)
|
| 135 |
+
|
| 136 |
+
gr.Markdown(notes)
|
| 137 |
+
|
| 138 |
+
search.click(gr_create_pitcher_leaderboard, inputs=[start_date, end_date, min_ip_state, batter_lr, include_teams], outputs=leaderboard)
|
| 139 |
+
all_teams.click(lambda _teams : [] if _teams == TEAMS else TEAMS, inputs=include_teams, outputs=include_teams)
|
| 140 |
+
min_ip_state_kwargs = dict(fn=lambda min_ip, qualified: (qualified if qualified else min_ip, gr.Number(interactive=not qualified)), inputs=[min_ip, qualified], outputs=[min_ip_state, min_ip])
|
| 141 |
+
min_ip.change(**min_ip_state_kwargs)
|
| 142 |
+
qualified.change(**min_ip_state_kwargs)
|
| 143 |
+
# pin_columns.input(
|
| 144 |
+
# lambda _pin_columns : gr.update(pinned_columns=None if _pin_columns else 3),
|
| 145 |
+
# inputs=pin_columns,
|
| 146 |
+
# outputs=leaderboard
|
| 147 |
+
# )
|
| 148 |
+
# pin_columns_state = gr.State(True)
|
| 149 |
+
pin_columns.click(
|
| 150 |
+
lambda : gr.update(pinned_columns=None),
|
| 151 |
+
outputs=leaderboard
|
| 152 |
+
)
|
| 153 |
+
|
| 154 |
+
return app
|
| 155 |
+
|
| 156 |
+
if __name__ == '__main__':
|
| 157 |
+
app = create_pitcher_leaderboard()
|
| 158 |
+
app.launch()
|
plotting.py
CHANGED
|
@@ -92,7 +92,7 @@ def plot_loc(ax, locs):
|
|
| 92 |
|
| 93 |
def plot_velo(ax, velos):
|
| 94 |
trans = transforms.blended_transform_factory(ax.transData, ax.transAxes)
|
| 95 |
-
for (pitch,), _velos in velos.group_by('general_ballKind_code'):
|
| 96 |
_velos = _velos.filter(((pl.col('ballSpeed_mph') - pl.col('ballSpeed_mph').mean())/ pl.col('ballSpeed_mph').std()).abs() < 3)
|
| 97 |
|
| 98 |
if len(_velos) <= 1:
|
|
|
|
| 92 |
|
| 93 |
def plot_velo(ax, velos):
|
| 94 |
trans = transforms.blended_transform_factory(ax.transData, ax.transAxes)
|
| 95 |
+
for (pitch,), _velos in velos.sort(pl.len().over('general_ballKind_code'), descending=True).group_by('general_ballKind_code', maintain_order=True):
|
| 96 |
_velos = _velos.filter(((pl.col('ballSpeed_mph') - pl.col('ballSpeed_mph').mean())/ pl.col('ballSpeed_mph').std()).abs() < 3)
|
| 97 |
|
| 98 |
if len(_velos) <= 1:
|
stats.py
CHANGED
|
@@ -3,6 +3,12 @@ from data import data_df
|
|
| 3 |
|
| 4 |
from types import SimpleNamespace
|
| 5 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 6 |
def filter_data_by_date_and_game_kind(data, start_date=None, end_date=None, game_kind=None):
|
| 7 |
if start_date is not None:
|
| 8 |
data = data.filter(pl.col('date') >= start_date)
|
|
@@ -63,17 +69,19 @@ def compute_team_games(data):
|
|
| 63 |
)
|
| 64 |
|
| 65 |
|
| 66 |
-
def compute_pitch_stats(data, player_type, pitch_class_type, min_pitches=1):
|
| 67 |
assert player_type in ('pitcher', 'batter')
|
| 68 |
assert pitch_class_type in ('general', 'specific')
|
| 69 |
-
|
|
|
|
|
|
|
| 70 |
name_col = 'pitcher_name' if player_type == 'pitcher' else 'batter_name'
|
| 71 |
pitch_col = 'ballKind_code' if pitch_class_type == 'specific' else 'general_ballKind_code'
|
| 72 |
pitch_name_col = 'ballKind' if pitch_class_type == 'specific' else 'general_ballKind'
|
| 73 |
pitch_stats = (
|
| 74 |
data
|
| 75 |
.with_columns((pl.col('ballSpeed') / 1.609).round(1).alias('mph'))
|
| 76 |
-
.group_by(
|
| 77 |
.agg(
|
| 78 |
pl.first(name_col),
|
| 79 |
*([pl.first('general_ballKind')] if pitch_class_type == 'specific' else []),
|
|
@@ -119,17 +127,23 @@ def compute_pitch_stats(data, player_type, pitch_class_type, min_pitches=1):
|
|
| 119 |
for stat in ['Avg KPH', 'Max KPH', 'Avg MPH', 'Max MPH', 'Swing%', 'Z-Swing%', 'Chase%', 'Contact%', 'Z-Contact%', 'O-Contact%', 'SwStr%', 'Whiff%', 'CSW%', 'GB%', 'FB%', 'LD%', 'Zone%']
|
| 120 |
)
|
| 121 |
.rename({pitch_col: 'ballKind_code', pitch_name_col: 'ballKind'} if pitch_class_type == 'general' else {})
|
| 122 |
-
.sort(
|
| 123 |
)
|
| 124 |
return pitch_stats
|
| 125 |
-
|
| 126 |
|
| 127 |
-
def compute_player_stats(data, player_type, min_ip='qualified'):
|
| 128 |
data = (
|
| 129 |
compute_team_games(data)
|
| 130 |
.with_columns(
|
| 131 |
pl.when(pl.col('half_inning').str.ends_with('1')).then('home_games').otherwise('visitor_games').first().over('pitId').alias('games'),
|
| 132 |
pl.col('inning_code').unique().len().over('pitId').alias('IP')
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 133 |
)
|
| 134 |
)
|
| 135 |
|
|
@@ -139,17 +153,38 @@ def compute_player_stats(data, player_type, min_ip='qualified'):
|
|
| 139 |
data = data.with_columns((pl.col('IP') >= min_ip).alias('qualified'))
|
| 140 |
|
| 141 |
assert player_type in ('pitcher', 'batter')
|
| 142 |
-
|
|
|
|
|
|
|
| 143 |
name_col = 'pitcher_name' if player_type == 'pitcher' else 'batter_name'
|
| 144 |
player_stats = (
|
| 145 |
data
|
| 146 |
-
.
|
|
|
|
| 147 |
.agg(
|
| 148 |
pl.col(name_col).first(),
|
|
|
|
|
|
|
|
|
|
|
|
|
| 149 |
(pl.when(pl.col('presult').str.contains('strikeout')).then(1).otherwise(0).sum() / pl.col('pa_code').unique().len()).alias('K%'),
|
| 150 |
(pl.when(pl.col('presult') == 'Walk').then(1).otherwise(0).sum() / pl.col('pa_code').unique().len()).alias('BB%'),
|
| 151 |
-
(pl.col('csw').sum() / pl.col('pitch').sum()).alias('CSW%'),
|
| 152 |
pl.col('aux_bresult').struct.field('batType').drop_nulls().value_counts(normalize=True),
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 153 |
pl.first('qualified')
|
| 154 |
)
|
| 155 |
.explode('batType')
|
|
@@ -163,9 +198,10 @@ def compute_player_stats(data, player_type, min_ip='qualified'):
|
|
| 163 |
)
|
| 164 |
.drop('G', 'F', 'B', 'P', 'L')
|
| 165 |
.with_columns(
|
| 166 |
-
(pl.when(pl.col('qualified')).then(pl.col(stat)).rank(descending=
|
| 167 |
-
for stat in ['
|
| 168 |
)
|
|
|
|
| 169 |
)
|
| 170 |
return player_stats
|
| 171 |
|
|
@@ -186,7 +222,7 @@ def get_pitcher_stats(id, lr=None, game_kind=None, start_date=None, end_date=Non
|
|
| 186 |
if lr is not None:
|
| 187 |
source_data = source_data.filter(pl.col('batLR') == lr)
|
| 188 |
|
| 189 |
-
pitch_stats = compute_pitch_stats(source_data, player_type='pitcher', pitch_class_type=pitch_class_type, min_pitches=min_pitches).filter(pl.col('pitId') == id)
|
| 190 |
|
| 191 |
pitch_shapes = (
|
| 192 |
source_data
|
|
@@ -200,6 +236,6 @@ def get_pitcher_stats(id, lr=None, game_kind=None, start_date=None, end_date=Non
|
|
| 200 |
.with_columns((pl.col('ballSpeed')/1.609).alias('ballSpeed_mph'))
|
| 201 |
)
|
| 202 |
|
| 203 |
-
pitcher_stats = compute_player_stats(source_data, player_type='pitcher', min_ip=min_ip).filter(pl.col('pitId') == id)
|
| 204 |
|
| 205 |
return SimpleNamespace(pitcher_stats=pitcher_stats, pitch_stats=pitch_stats, pitch_shapes=pitch_shapes)
|
|
|
|
| 3 |
|
| 4 |
from types import SimpleNamespace
|
| 5 |
|
| 6 |
+
from convert import verify_and_return_presult
|
| 7 |
+
|
| 8 |
+
|
| 9 |
+
valid_pitch = pl.col('x').is_not_null() & pl.col('y').is_not_null() & (pl.col('ballSpeed') > 0)
|
| 10 |
+
|
| 11 |
+
|
| 12 |
def filter_data_by_date_and_game_kind(data, start_date=None, end_date=None, game_kind=None):
|
| 13 |
if start_date is not None:
|
| 14 |
data = data.filter(pl.col('date') >= start_date)
|
|
|
|
| 69 |
)
|
| 70 |
|
| 71 |
|
| 72 |
+
def compute_pitch_stats(data, player_type, pitch_class_type, min_pitches=1, group_by_team=False):
|
| 73 |
assert player_type in ('pitcher', 'batter')
|
| 74 |
assert pitch_class_type in ('general', 'specific')
|
| 75 |
+
id_cols = ['pitId' if player_type == 'pitcher' else 'batId']
|
| 76 |
+
if group_by_team:
|
| 77 |
+
id_cols.append('pitcher_team_name_short')
|
| 78 |
name_col = 'pitcher_name' if player_type == 'pitcher' else 'batter_name'
|
| 79 |
pitch_col = 'ballKind_code' if pitch_class_type == 'specific' else 'general_ballKind_code'
|
| 80 |
pitch_name_col = 'ballKind' if pitch_class_type == 'specific' else 'general_ballKind'
|
| 81 |
pitch_stats = (
|
| 82 |
data
|
| 83 |
.with_columns((pl.col('ballSpeed') / 1.609).round(1).alias('mph'))
|
| 84 |
+
.group_by(*id_cols, pitch_col)
|
| 85 |
.agg(
|
| 86 |
pl.first(name_col),
|
| 87 |
*([pl.first('general_ballKind')] if pitch_class_type == 'specific' else []),
|
|
|
|
| 127 |
for stat in ['Avg KPH', 'Max KPH', 'Avg MPH', 'Max MPH', 'Swing%', 'Z-Swing%', 'Chase%', 'Contact%', 'Z-Contact%', 'O-Contact%', 'SwStr%', 'Whiff%', 'CSW%', 'GB%', 'FB%', 'LD%', 'Zone%']
|
| 128 |
)
|
| 129 |
.rename({pitch_col: 'ballKind_code', pitch_name_col: 'ballKind'} if pitch_class_type == 'general' else {})
|
| 130 |
+
.sort(id_cols[0], 'count', descending=[False, True])
|
| 131 |
)
|
| 132 |
return pitch_stats
|
|
|
|
| 133 |
|
| 134 |
+
def compute_player_stats(data, player_type, min_ip='qualified', group_by_team=False):
|
| 135 |
data = (
|
| 136 |
compute_team_games(data)
|
| 137 |
.with_columns(
|
| 138 |
pl.when(pl.col('half_inning').str.ends_with('1')).then('home_games').otherwise('visitor_games').first().over('pitId').alias('games'),
|
| 139 |
pl.col('inning_code').unique().len().over('pitId').alias('IP')
|
| 140 |
+
# pl.col('presult').is_in(verify_and_return_presult([
|
| 141 |
+
# 'Groundout', 'Flyout', 'Lineout', 'Groundout (Double play)',
|
| 142 |
+
# 'Foul fly', 'Foul line (?)',
|
| 143 |
+
# 'Sacrifice bunt', 'Sacrifice fly',
|
| 144 |
+
# "Fielder's choice", "Sacrifice fielder's choice",
|
| 145 |
+
# 'Bunt strikeout', 'Swinging strikeout', 'Looking strikeout'
|
| 146 |
+
# ])).sum().over('pitId').mul(1/3).alias('IP')
|
| 147 |
)
|
| 148 |
)
|
| 149 |
|
|
|
|
| 153 |
data = data.with_columns((pl.col('IP') >= min_ip).alias('qualified'))
|
| 154 |
|
| 155 |
assert player_type in ('pitcher', 'batter')
|
| 156 |
+
id_cols = ['pitId' if player_type == 'pitcher' else 'batId']
|
| 157 |
+
if group_by_team:
|
| 158 |
+
id_cols.append('pitcher_team_name_short')
|
| 159 |
name_col = 'pitcher_name' if player_type == 'pitcher' else 'batter_name'
|
| 160 |
player_stats = (
|
| 161 |
data
|
| 162 |
+
.with_columns(pl.when(pl.col('general_ballKind_code').is_in(['4S', 'FC', 'SI'])).then(pl.when(valid_pitch).then('ballSpeed').mean().over('pitId', 'general_ballKind_code')).mul(1/1.609).round(1).alias('FB Velo'))
|
| 163 |
+
.group_by(id_cols)
|
| 164 |
.agg(
|
| 165 |
pl.col(name_col).first(),
|
| 166 |
+
*([] if group_by_team else [pl.col('pitcher_team_name_short').last()]),
|
| 167 |
+
pl.col('IP').first(),
|
| 168 |
+
pl.col('pa_code').unique().len().alias('PA'),
|
| 169 |
+
pl.col('FB Velo').max(),
|
| 170 |
(pl.when(pl.col('presult').str.contains('strikeout')).then(1).otherwise(0).sum() / pl.col('pa_code').unique().len()).alias('K%'),
|
| 171 |
(pl.when(pl.col('presult') == 'Walk').then(1).otherwise(0).sum() / pl.col('pa_code').unique().len()).alias('BB%'),
|
|
|
|
| 172 |
pl.col('aux_bresult').struct.field('batType').drop_nulls().value_counts(normalize=True),
|
| 173 |
+
(pl.col('swing').sum() / pl.col('pitch').sum()).alias('Swing%'),
|
| 174 |
+
((pl.col('swing') & pl.col('zone')).sum() / pl.col('pitch').sum()).alias('Z-Swing%'),
|
| 175 |
+
((pl.col('swing') & ~pl.col('zone')).sum() / pl.col('pitch').sum()).alias('Chase%'),
|
| 176 |
+
((pl.col('swing') & ~pl.col('whiff')).sum()/pl.col('swing').sum()).alias('Contact%'),
|
| 177 |
+
((pl.col('zone') & pl.col('swing') & ~pl.col('whiff')).sum()/(pl.col('zone') & pl.col('swing')).sum()).alias('Z-Contact%'),
|
| 178 |
+
((~pl.col('zone') & pl.col('swing') & ~pl.col('whiff')).sum()/(~pl.col('zone') & pl.col('swing')).sum()).alias('O-Contact%'),
|
| 179 |
+
(pl.col('whiff').sum() / pl.col('swing').sum()).alias('Whiff%'),
|
| 180 |
+
(pl.col('whiff').sum() / pl.col('pitch').sum()).alias('SwStr%'),
|
| 181 |
+
(pl.col('csw').sum() / pl.col('pitch').sum()).alias('CSW%'),
|
| 182 |
+
(pl.col('zone').sum() / pl.col('pitch').sum()).alias('Zone%'),
|
| 183 |
+
(pl.when(pl.col('pitLR') == 'r').then(pl.col('x') < 0).otherwise(pl.col('x') > 0)).mean().alias('Glove%'),
|
| 184 |
+
(pl.when(pl.col('pitLR') == 'r').then(pl.col('x') >= 0).otherwise(pl.col('x') <= 0)).mean().alias('Arm%'),
|
| 185 |
+
(pl.col('y') > 125).mean().alias('High%'),
|
| 186 |
+
(pl.col('y') <= 125).mean().alias('Low%'),
|
| 187 |
+
(pl.col('x').is_between(-20, 20) & pl.col('y').is_between(100, 100+50)).mean().alias('MM%'),
|
| 188 |
pl.first('qualified')
|
| 189 |
)
|
| 190 |
.explode('batType')
|
|
|
|
| 198 |
)
|
| 199 |
.drop('G', 'F', 'B', 'P', 'L')
|
| 200 |
.with_columns(
|
| 201 |
+
(pl.when(pl.col('qualified')).then(pl.col(stat)).rank(descending=stat in ['BB%', 'FB%', 'LD%'] or 'Contact%' in stat)/pl.when(pl.col('qualified')).then(pl.col(stat)).count()).alias(f'{stat}_pctl')
|
| 202 |
+
for stat in ['FB Velo', 'K%', 'BB%', 'Swing%', 'Z-Swing%', 'Chase%', 'Contact%', 'Z-Contact%', 'O-Contact%', 'SwStr%', 'Whiff%', 'CSW%', 'GB%', 'FB%', 'LD%', 'Zone%']
|
| 203 |
)
|
| 204 |
+
.sort('IP', descending=True)
|
| 205 |
)
|
| 206 |
return player_stats
|
| 207 |
|
|
|
|
| 222 |
if lr is not None:
|
| 223 |
source_data = source_data.filter(pl.col('batLR') == lr)
|
| 224 |
|
| 225 |
+
pitch_stats = compute_pitch_stats(source_data, player_type='pitcher', pitch_class_type=pitch_class_type, min_pitches=min_pitches, group_by_team=False).filter(pl.col('pitId') == id)
|
| 226 |
|
| 227 |
pitch_shapes = (
|
| 228 |
source_data
|
|
|
|
| 236 |
.with_columns((pl.col('ballSpeed')/1.609).alias('ballSpeed_mph'))
|
| 237 |
)
|
| 238 |
|
| 239 |
+
pitcher_stats = compute_player_stats(source_data, player_type='pitcher', min_ip=min_ip, group_by_team=False).filter(pl.col('pitId') == id)
|
| 240 |
|
| 241 |
return SimpleNamespace(pitcher_stats=pitcher_stats, pitch_stats=pitch_stats, pitch_shapes=pitch_shapes)
|