|
from shiny import ui, render, App |
|
import matplotlib.image as mpimg |
|
import pandas as pd |
|
import pygsheets |
|
import pytz |
|
from datetime import datetime |
|
import numpy as np |
|
import joblib |
|
|
|
print('Starting') |
|
df_2024 = pd.read_csv('2024_spring_data.csv',index_col=[0]) |
|
print('Starting') |
|
spring_teams = df_2024.groupby(['pitcher_id']).tail(1)[['pitcher_id','pitcher_team']].set_index(['pitcher_id'])['pitcher_team'].to_dict() |
|
|
|
|
|
df_2024['vy_f'] = -(df_2024['vy0']**2 - (2 * df_2024['ay'] * (df_2024['y0'] - 17/12)))**0.5 |
|
df_2024['t'] = (df_2024['vy_f'] - df_2024['vy0']) / df_2024['ay'] |
|
df_2024['vz_f'] = (df_2024['vz0']) + (df_2024['az'] * df_2024['t']) |
|
df_2024['vaa'] = -np.arctan(df_2024['vz_f'] / df_2024['vy_f']) * (180 / np.pi) |
|
|
|
|
|
|
|
df_2024['vx_f'] = (df_2024['vx0']) + (df_2024['ax'] * df_2024['t']) |
|
df_2024['haa'] = -np.arctan(df_2024['vx_f'] / df_2024['vy_f']) * (180 / np.pi) |
|
grouped_ivb_2023 = pd.read_csv('2023_pitch_group_data.csv',index_col=[0,3]) |
|
|
|
model = joblib.load('tjstuff_model_20240123.joblib') |
|
|
|
|
|
def percentile(n): |
|
def percentile_(x): |
|
return x.quantile(n) |
|
percentile_.__name__ = 'percentile_{:02.0f}'.format(n*100) |
|
return percentile_ |
|
|
|
def df_clean(df): |
|
df_copy = df.copy() |
|
df_copy.loc[df_copy['pitcher_hand'] == 'L','hb'] *= -1 |
|
df_copy.loc[df_copy['pitcher_hand'] == 'L','x0'] *= -1 |
|
df_copy.loc[df_copy['pitcher_hand'] == 'L','spin_direction'] = 360 - df_copy.loc[df_copy['pitcher_hand'] == 'L','spin_direction'] |
|
|
|
df_copy['pitch_l'] = [1 if x == 'L' else 0 for x in df_copy['pitcher_hand']] |
|
df_copy['bat_l'] = [1 if x == 'L' else 0 for x in df_copy['batter_hand']] |
|
df_copy = df_copy[~df_copy.pitch_type.isin(["EP", "PO", "KN", "FO", "CS", "SC", "FA"])].reset_index(drop=True) |
|
df_copy['pitch_type'] = df_copy['pitch_type'].replace({'FT':'SI','KC':'CU','ST':'SL','SV':'SL'}) |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
df_copy_fb_sum = df_copy[df_copy.pitch_type.isin(["FF", "FC", "SI"])].groupby(['pitcher_id']).agg( |
|
fb_velo = ('start_speed','mean'), |
|
fb_max_ivb = ('ivb',percentile(0.9)), |
|
fb_max_x = ('hb',percentile(0.9)), |
|
fb_min_x = ('hb',percentile(0.1)), |
|
fb_max_velo = ('start_speed',percentile(0.9)), |
|
fb_axis = ('spin_direction','mean'), |
|
) |
|
|
|
df_copy = df_copy.merge(df_copy_fb_sum,left_on='pitcher_id',right_index=True,how='left') |
|
|
|
df_copy['fb_velo_diff'] = df_copy['start_speed']- df_copy['fb_velo'] |
|
df_copy['fb_max_ivb_diff'] = df_copy['ivb']- df_copy['fb_max_ivb'] |
|
df_copy['fb_max_hb_diff'] = df_copy['hb']- df_copy['fb_max_x'] |
|
df_copy['fb_min_hb_diff'] = df_copy['hb']- df_copy['fb_min_x'] |
|
df_copy['fb_max_velo_diff'] = df_copy['start_speed']- df_copy['fb_max_velo'] |
|
df_copy['fb_axis_diff'] = df_copy['spin_direction']- df_copy['fb_axis'] |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
df_copy['max_speed'] = df_copy.groupby(['pitcher_id'])['start_speed'].transform('max') |
|
df_copy['max_speed_diff'] = df_copy['start_speed'] - df_copy['max_speed'] |
|
|
|
df_copy['max_ivb'] = df_copy.groupby(['pitcher_id'])['ivb'].transform('max') |
|
df_copy['max_ivb_diff'] = df_copy['ivb'] - df_copy['max_ivb'] |
|
|
|
df_copy['vy_f'] = -(df_copy['vy0']**2 - (2 * df_copy['ay'] * (df_copy['y0'] - 17/12)))**0.5 |
|
df_copy['t'] = (df_copy['vy_f'] - df_copy['vy0']) / df_copy['ay'] |
|
df_copy['vz_f'] = (df_copy['vz0']) + (df_copy['az'] * df_copy['t']) |
|
df_copy['vaa'] = -np.arctan(df_copy['vz_f'] / df_copy['vy_f']) * (180 / np.pi) |
|
|
|
|
|
|
|
df_copy['vx_f'] = (df_copy['vx0']) + (df_copy['ax'] * df_copy['t']) |
|
df_copy['haa'] = -np.arctan(df_copy['vx_f'] / df_copy['vy_f']) * (180 / np.pi) |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
df_copy = df_copy.dropna(subset=['pitch_type']).fillna(0) |
|
return df_copy |
|
|
|
|
|
app_ui = ui.page_fluid( |
|
ui.layout_sidebar( |
|
|
|
ui.panel_sidebar( |
|
ui.input_date_range("date_range_id", "Date range input",start = df_2024.game_date.min(), |
|
end = df_2024.game_date.max(),width=2,min=df_2024.game_date.min(), |
|
max=df_2024.game_date.max()),width=2), |
|
ui.panel_main( |
|
ui.navset_tab( |
|
|
|
|
|
ui.nav("Pitch Data", |
|
ui.output_data_frame("table")), |
|
ui.nav("Pitch Data (Daily)", |
|
ui.output_data_frame("table_daily")), |
|
ui.nav("2023 vs Spring", |
|
ui.output_data_frame("table_2023")), |
|
ui.nav("2023 vs Spring Difference", |
|
ui.output_data_frame("table_difference")), |
|
|
|
|
|
ui.nav("tjStuff+", |
|
ui.output_data_frame("table_stuff")), |
|
ui.nav("tjStuff+ (Daily)", |
|
ui.output_data_frame("table_stuff_day")), |
|
|
|
)))) |
|
|
|
|
|
from urllib.request import Request, urlopen |
|
from shiny import App, reactive, ui |
|
from shiny.ui import h2, tags |
|
|
|
|
|
|
|
|
|
|
|
|
|
def server(input, output, session): |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
@output |
|
@render.data_frame |
|
def table(): |
|
|
|
grouped_ivb = df_2024[(pd.to_datetime(df_2024['game_date']).dt.date>=input.date_range_id()[0])& |
|
(pd.to_datetime(df_2024['game_date']).dt.date<=input.date_range_id()[1])].groupby(['pitcher_id','pitcher_name','pitcher_team','pitcher_hand','pitch_type']).agg( |
|
pitches = ('start_speed','count'), |
|
|
|
start_speed = ('start_speed','mean'), |
|
ivb = ('ivb','mean'), |
|
hb = ('hb','mean'), |
|
spin_rate = ('spin_rate','mean'), |
|
vaa = ('vaa','mean'), |
|
haa = ('haa','mean'), |
|
horizontal_release = ('x0','mean'), |
|
vertical_release = ('z0','mean'), |
|
extension = ('extension','mean')).round(1).reset_index() |
|
|
|
|
|
return render.DataGrid( |
|
grouped_ivb, |
|
width='fit-content', |
|
height=750, |
|
filters=True, |
|
) |
|
|
|
@output |
|
@render.data_frame |
|
def table_daily(): |
|
|
|
grouped_ivb = df_2024[(pd.to_datetime(df_2024['game_date']).dt.date>=input.date_range_id()[0])& |
|
(pd.to_datetime(df_2024['game_date']).dt.date<=input.date_range_id()[1])].groupby(['pitcher_id','pitcher_name','pitcher_team','pitcher_hand','pitch_type','game_date']).agg( |
|
pitches = ('start_speed','count'), |
|
|
|
start_speed = ('start_speed','mean'), |
|
ivb = ('ivb','mean'), |
|
hb = ('hb','mean'), |
|
spin_rate = ('spin_rate','mean'), |
|
vaa = ('vaa','mean'), |
|
haa = ('haa','mean'), |
|
horizontal_release = ('x0','mean'), |
|
vertical_release = ('z0','mean'), |
|
extension = ('extension','mean')).round(1).reset_index() |
|
|
|
|
|
return render.DataGrid( |
|
grouped_ivb, |
|
width='fit-content', |
|
height=750, |
|
filters=True, |
|
) |
|
|
|
|
|
|
|
@output |
|
@render.data_frame |
|
def table_2023(): |
|
grouped_ivb = df_2024[(pd.to_datetime(df_2024['game_date']).dt.date>=input.date_range_id()[0])& |
|
(pd.to_datetime(df_2024['game_date']).dt.date<=input.date_range_id()[1])].groupby(['pitcher_id','pitcher_name','pitcher_hand','pitch_type']).agg( |
|
pitches = ('start_speed','count'), |
|
|
|
start_speed = ('start_speed','mean'), |
|
ivb = ('ivb','mean'), |
|
hb = ('hb','mean'), |
|
spin_rate = ('spin_rate','mean'), |
|
vaa = ('vaa','mean'), |
|
haa = ('haa','mean'), |
|
horizontal_release = ('x0','mean'), |
|
vertical_release = ('z0','mean'), |
|
extension = ('extension','mean')).round(1).reset_index() |
|
grouped_ivb = grouped_ivb.set_index(['pitcher_id','pitch_type']) |
|
|
|
|
|
|
|
|
|
ivb_merged = grouped_ivb_2023.merge(right=grouped_ivb, |
|
left_index=True, |
|
right_index=True, |
|
how='right',suffixes=['_2023','_spring']).reset_index() |
|
|
|
ivb_merged['pitcher_name'] = ivb_merged['pitcher_name_spring'] |
|
ivb_merged['pitcher_hand'] = ivb_merged['pitcher_hand_spring'] |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
ivb_merged['pitcher_team'] = ivb_merged['pitcher_id'].map(spring_teams) |
|
ivb_merged = ivb_merged.set_index(['pitcher_id', 'pitcher_name','pitcher_team', 'pitcher_hand', 'pitch_type',]) |
|
|
|
return render.DataGrid( |
|
ivb_merged[['pitches_2023','start_speed_2023', 'ivb_2023', 'hb_2023', |
|
'spin_rate_2023', 'vaa_2023','haa_2023', 'horizontal_release_2023', 'vertical_release_2023', |
|
'extension_2023','pitches_spring','start_speed_spring', 'ivb_spring', 'hb_spring', |
|
'spin_rate_spring','vaa_spring','haa_spring', 'horizontal_release_spring', 'vertical_release_spring', |
|
'extension_spring',]].reset_index(), |
|
width='fit-content', |
|
height=750, |
|
filters=True, |
|
) |
|
|
|
@output |
|
@render.data_frame |
|
def table_difference(): |
|
grouped_ivb = df_2024[(pd.to_datetime(df_2024['game_date']).dt.date>=input.date_range_id()[0])& |
|
(pd.to_datetime(df_2024['game_date']).dt.date<=input.date_range_id()[1])].groupby(['pitcher_id','pitcher_name','pitcher_hand','pitch_type']).agg( |
|
pitches = ('start_speed','count'), |
|
|
|
start_speed = ('start_speed','mean'), |
|
ivb = ('ivb','mean'), |
|
hb = ('hb','mean'), |
|
spin_rate = ('spin_rate','mean'), |
|
vaa = ('vaa','mean'), |
|
haa = ('haa','mean'), |
|
horizontal_release = ('x0','mean'), |
|
vertical_release = ('z0','mean'), |
|
extension = ('extension','mean')).round(1).reset_index() |
|
grouped_ivb = grouped_ivb.set_index(['pitcher_id','pitch_type']) |
|
|
|
|
|
|
|
|
|
ivb_merged = grouped_ivb_2023.merge(right=grouped_ivb, |
|
left_index=True, |
|
right_index=True, |
|
how='right',suffixes=['_2023','_spring']).reset_index() |
|
|
|
ivb_merged['pitcher_name'] = ivb_merged['pitcher_name_spring'] |
|
ivb_merged['pitcher_hand'] = ivb_merged['pitcher_hand_spring'] |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
ivb_merged['pitcher_team'] = ivb_merged['pitcher_id'].map(spring_teams) |
|
ivb_merged = ivb_merged.set_index(['pitcher_id', 'pitcher_name','pitcher_team', 'pitcher_hand', 'pitch_type',]) |
|
|
|
ivb_merged[['start_speed_difference', 'ivb_difference', 'hb_difference','spin_rate_difference','vaa_difference','haa_difference', |
|
'horizontal_release_difference', 'vertical_release_difference', |
|
'extension_difference']] = ivb_merged[['start_speed_spring', 'ivb_spring', 'hb_spring', |
|
'spin_rate_spring', 'vaa_spring','haa_spring','horizontal_release_spring', 'vertical_release_spring', |
|
'extension_spring']].values - ivb_merged[['start_speed_2023', 'ivb_2023', 'hb_2023', |
|
'spin_rate_2023', 'vaa_2023','haa_2023','horizontal_release_2023', 'vertical_release_2023', |
|
'extension_2023']].values |
|
|
|
|
|
return render.DataGrid( |
|
ivb_merged[['start_speed_difference', 'ivb_difference', 'hb_difference', |
|
'spin_rate_difference', |
|
'vaa_difference','haa_difference','horizontal_release_difference', 'vertical_release_difference', |
|
'extension_difference']].reset_index(), |
|
width='fit-content', |
|
height=750, |
|
filters=True, |
|
) |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
@output |
|
@render.data_frame |
|
def table_stuff(): |
|
|
|
|
|
df_2024_update = df_clean(df_2024[(pd.to_datetime(df_2024['game_date']).dt.date>=input.date_range_id()[0])& |
|
(pd.to_datetime(df_2024['game_date']).dt.date<=input.date_range_id()[1])]) |
|
|
|
features = ['start_speed','spin_rate','extension','ivb','hb','x0','z0','fb_max_velo_diff','fb_max_ivb_diff','fb_max_hb_diff'] |
|
|
|
targets = ['delta_run_exp_mean'] |
|
|
|
|
|
from scipy import stats |
|
df_2024_update['y_pred'] = model.predict(df_2024_update[features]) |
|
|
|
|
|
y_pred_mean = -0.0023964706 |
|
y_pred_std =0.0057581966 |
|
|
|
|
|
|
|
|
|
df_2024_stuff = df_2024_update.groupby(['pitcher_id','pitcher_name','pitcher_team']).agg( |
|
pitches = ('y_pred','count'), |
|
run_exp = ('y_pred','mean'),) |
|
|
|
|
|
df_2024_stuff['run_exp_mean'] = y_pred_mean |
|
df_2024_stuff['run_exp_std'] = y_pred_std |
|
|
|
df_2024_stuff_50 = df_2024_stuff[df_2024_stuff.pitches >= 1] |
|
|
|
df_2024_stuff_50['tj_stuff_plus'] = 100 + 10*((-df_2024_stuff_50.run_exp + df_2024_stuff_50.run_exp_mean) / df_2024_stuff_50.run_exp_std) |
|
|
|
|
|
df_2024_stuff_pitch = df_2024_update.groupby(['pitcher_id','pitcher_name','pitcher_team','pitch_type']).agg( |
|
pitches = ('y_pred','count'), |
|
run_exp = ('y_pred','mean'),) |
|
|
|
|
|
df_2024_stuff_pitch['run_exp_mean'] = y_pred_mean |
|
df_2024_stuff_pitch['run_exp_std'] = y_pred_std |
|
|
|
df_2024_stuff_pitch_50 = df_2024_stuff_pitch[df_2024_stuff_pitch.pitches >= 1] |
|
|
|
df_2024_stuff_pitch_50['tj_stuff_plus'] = 100 + 10*((-df_2024_stuff_pitch_50.run_exp + df_2024_stuff_pitch_50.run_exp_mean) / df_2024_stuff_pitch_50.run_exp_std) |
|
|
|
df_2024_stuff_pitch_50_pivot = df_2024_stuff_pitch_50.reset_index().pivot(index=['pitcher_id','pitcher_name','pitcher_team'], |
|
columns=['pitch_type'], |
|
values=['tj_stuff_plus']) |
|
|
|
df_2024_stuff_pitch_50_pivot['all'] = df_2024_stuff_pitch_50_pivot.index.map(df_2024_stuff_50['tj_stuff_plus'].to_dict()) |
|
|
|
print('Sheet6') |
|
df_2024_stuff_pitch_50_pivot = df_2024_stuff_pitch_50_pivot.sort_index(level=[1]) |
|
df_2024_stuff_pitch_50_pivot.columns = df_2024_stuff_pitch_50_pivot.columns.droplevel() |
|
column_list = list(df_2024_stuff_pitch_50_pivot.columns[:-1]) |
|
column_list.append('All') |
|
df_2024_stuff_pitch_50_pivot.columns = column_list |
|
df_2024_stuff_pitch_50_pivot = df_2024_stuff_pitch_50_pivot.applymap(lambda x: int(x) if not pd.isna(x) else x) |
|
df_2024_stuff_pitch_50_pivot = df_2024_stuff_pitch_50_pivot.reset_index() |
|
|
|
return render.DataGrid( |
|
df_2024_stuff_pitch_50_pivot, |
|
width='fit-content', |
|
height=750, |
|
filters=True) |
|
|
|
@output |
|
@render.data_frame |
|
def table_stuff_day(): |
|
|
|
|
|
df_2024_update = df_clean(df_2024[(pd.to_datetime(df_2024['game_date']).dt.date>=input.date_range_id()[0])& |
|
(pd.to_datetime(df_2024['game_date']).dt.date<=input.date_range_id()[1])]) |
|
print('made it here') |
|
|
|
features = ['start_speed','spin_rate','extension','ivb','hb','x0','z0','fb_max_velo_diff','fb_max_ivb_diff','fb_max_hb_diff'] |
|
|
|
targets = ['delta_run_exp_mean'] |
|
|
|
|
|
from scipy import stats |
|
df_2024_update['y_pred'] = model.predict(df_2024_update[features]) |
|
|
|
|
|
y_pred_mean = -0.0023964706 |
|
y_pred_std =0.0057581966 |
|
|
|
|
|
|
|
|
|
df_2024_stuff_daily = df_2024_update.groupby(['pitcher_id','pitcher_name','pitcher_team','game_date']).agg( |
|
pitches = ('y_pred','count'), |
|
run_exp = ('y_pred','mean'),) |
|
|
|
|
|
df_2024_stuff_daily['run_exp_mean'] = y_pred_mean |
|
df_2024_stuff_daily['run_exp_std'] = y_pred_std |
|
|
|
df_2024_stuff_daily_50 = df_2024_stuff_daily[df_2024_stuff_daily.pitches >= 1] |
|
|
|
df_2024_stuff_daily_50['tj_stuff_plus'] = 100 + 10*((-df_2024_stuff_daily_50.run_exp + df_2024_stuff_daily_50.run_exp_mean) / df_2024_stuff_daily_50.run_exp_std) |
|
|
|
|
|
df_2024_stuff_daily_pitch = df_2024_update.groupby(['pitcher_id','pitcher_name','pitcher_team','pitch_type','game_date']).agg( |
|
pitches = ('y_pred','count'), |
|
run_exp = ('y_pred','mean'),) |
|
|
|
|
|
df_2024_stuff_daily_pitch['run_exp_mean'] = y_pred_mean |
|
df_2024_stuff_daily_pitch['run_exp_std'] = y_pred_std |
|
|
|
df_2024_stuff_daily_pitch_50 = df_2024_stuff_daily_pitch[df_2024_stuff_daily_pitch.pitches >= 1] |
|
|
|
df_2024_stuff_daily_pitch_50['tj_stuff_plus'] = 100 + 10*((-df_2024_stuff_daily_pitch_50.run_exp + df_2024_stuff_daily_pitch_50.run_exp_mean) / df_2024_stuff_daily_pitch_50.run_exp_std) |
|
df_2024_stuff_daily_pitch_50 = df_2024_stuff_daily_pitch_50.reset_index() |
|
df_2024_stuff_daily_pitch_50_pivot = df_2024_stuff_daily_pitch_50.pivot(index=['pitcher_id','pitcher_name','pitcher_team','game_date'], |
|
columns=['pitch_type'], |
|
values=['tj_stuff_plus']) |
|
print('made it here') |
|
df_2024_stuff_daily_pitch_50_pivot['all'] = df_2024_stuff_daily_pitch_50_pivot.index.map(df_2024_stuff_daily_50['tj_stuff_plus'].to_dict()) |
|
df_2024_stuff_daily_pitch_50_pivot = df_2024_stuff_daily_pitch_50_pivot.sort_index(level=[1,3]) |
|
print(df_2024_stuff_daily_pitch_50_pivot) |
|
|
|
df_2024_stuff_daily_pitch_50_pivot.columns = df_2024_stuff_daily_pitch_50_pivot.columns.droplevel() |
|
column_list = list(df_2024_stuff_daily_pitch_50_pivot.columns[:-1]) |
|
column_list.append('All') |
|
df_2024_stuff_daily_pitch_50_pivot.columns = column_list |
|
df_2024_stuff_daily_pitch_50_pivot = df_2024_stuff_daily_pitch_50_pivot.applymap(lambda x: int(x) if not pd.isna(x) else x) |
|
df_2024_stuff_daily_pitch_50_pivot = df_2024_stuff_daily_pitch_50_pivot.reset_index() |
|
|
|
return render.DataGrid( |
|
df_2024_stuff_daily_pitch_50_pivot, |
|
width='fit-content', |
|
height=750, |
|
filters=True) |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
app = App(app_ui, server) |