2024_spray / rolling_batter.py
nesticot's picture
Upload 13 files
30629a5 verified
raw
history blame
40.1 kB
from shiny import App, Inputs, Outputs, Session, reactive, render, req, ui
import datasets
from datasets import load_dataset
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import seaborn as sns
import numpy as np
from scipy.stats import gaussian_kde
import matplotlib
from matplotlib.ticker import MaxNLocator
from matplotlib.gridspec import GridSpec
from scipy.stats import zscore
import math
import matplotlib
from adjustText import adjust_text
import matplotlib.ticker as mtick
from shinywidgets import output_widget, render_widget
import pandas as pd
from configure import base_url
import shinyswatch
import inflect
from matplotlib.pyplot import text
def percentile(n):
def percentile_(x):
return np.nanpercentile(x, n)
percentile_.__name__ = 'percentile_%s' % n
return percentile_
colour_palette = ['#FFB000','#648FFF','#785EF0',
'#DC267F','#FE6100','#3D1EB2','#894D80','#16AA02','#B5592B','#A3C1ED']
print('Starting Everything:')
# exit_velo_df = milb_a_ev_df.append([triple_a_ev_df,double_a_ev_df,a_high_a_ev_df,single_a_ev_df]).reset_index(drop=True)
# player_df_all = mlb_a_player_df.append([triple_a_player_df,double_a_player_df,a_high_a_player_df,single_a_player_df]).reset_index(drop=True)
# exit_velo_df = pd.read_csv('exit_velo_df_all.csv',index_col=[0])
# player_df_all = pd.read_csv('player_df_all.csv',index_col=[0])
# pa_df = pd.read_csv('pa_df_all.csv',index_col=[0])
# pa_df_full_na = pa_df.dropna()
### Import Datasets
dataset = load_dataset('nesticot/mlb_data', data_files=['mlb_pitch_data_2023.csv',
])
dataset_train = dataset['train']
exit_velo_df_mlb = dataset_train.to_pandas().set_index(list(dataset_train.features.keys())[0]).reset_index(drop=True)
#print(df_2023)
exit_velo_df_mlb['level'] = 'MLB'
# ### Import Datasets
# dataset = load_dataset('nesticot/mlb_data', data_files=['aaa_pitch_data_2023.csv',
# ])
# dataset_train = dataset['train']
# exit_velo_df_aaa = dataset_train.to_pandas().set_index(list(dataset_train.features.keys())[0]).reset_index(drop=True)
# #print(df_2023)
# exit_velo_df_aaa['level'] = 'AAA'
# ### Import Datasets
# dataset = load_dataset('nesticot/mlb_data', data_files=['aa_pitch_data_2023.csv',
# ])
# dataset_train = dataset['train']
# exit_velo_df_aa = dataset_train.to_pandas().set_index(list(dataset_train.features.keys())[0]).reset_index(drop=True)
# #print(df_2023)
# exit_velo_df_aa['level'] = 'AA'
# ### Import Datasets
# dataset = load_dataset('nesticot/mlb_data', data_files=['high_a_pitch_data_2023.csv',
# ])
# dataset_train = dataset['train']
# exit_velo_df_ha = dataset_train.to_pandas().set_index(list(dataset_train.features.keys())[0]).reset_index(drop=True)
# #print(df_2023)
# exit_velo_df_ha['level'] = 'A+'
# ### Import Datasets
# dataset = load_dataset('nesticot/mlb_data', data_files=['a_pitch_data_2023.csv',
# ])
# dataset_train = dataset['train']
# exit_velo_df_a = dataset_train.to_pandas().set_index(list(dataset_train.features.keys())[0]).reset_index(drop=True)
# #print(df_2023)
# exit_velo_df_a['level'] = 'A'
# exit_velo_df = pd.concat([exit_velo_df_mlb,exit_velo_df_aaa,exit_velo_df_aa,exit_velo_df_ha,exit_velo_df_a])
exit_velo_df = pd.concat([exit_velo_df_mlb])
# exit_velo_df_copy = exit_velo_df.copy()
# exit_velo_df = exit_velo_df_copy.copy()
end_codes = ['strikeout', 'field_out', 'single', 'walk', 'hit_by_pitch',
'double', 'sac_fly', 'force_out', 'home_run',
'grounded_into_double_play', 'fielders_choice', 'field_error',
'triple', 'sac_bunt', 'double_play', 'intent_walk',
'fielders_choice_out', 'strikeout_double_play',
'sac_fly_double_play', 'catcher_interf', 'other_out']
exit_velo_df['pa'] = exit_velo_df.event_type.isin(end_codes)
#exit_velo_df['pa'] = 1
exit_velo_df['k'] = exit_velo_df.event_type.isin(list(filter(None, [x if 'strikeout' in x else '' for x in exit_velo_df.event_type.fillna('None').unique()])))
exit_velo_df['bb'] = exit_velo_df.event_type.isin(list(filter(None, [x if 'walk' in x else '' for x in exit_velo_df.event_type.fillna('None').unique()])))
#exit_velo_df['k_minus_bb'] = exit_velo_df['k'].astype(np.float32)-exit_velo_df['bb'].astype(np.float32)
exit_velo_df['bb_minus_k'] = exit_velo_df['bb'].astype(np.float32)-exit_velo_df['k'].astype(np.float32)
exit_velo_df = exit_velo_df.drop_duplicates(subset=['play_id'])
swing_codes = ['Swinging Strike', 'In play, no out',
'Foul', 'In play, out(s)',
'In play, run(s)', 'Swinging Strike (Blocked)',
'Foul Bunt','Foul Tip', 'Missed Bunt','Foul Pitchout','Swinging Pitchout']
swings_in = ['Swinging Strike', 'In play, no out',
'Foul', 'In play, out(s)',
'In play, run(s)', 'Swinging Strike (Blocked)',
'Foul Bunt','Foul Tip', 'Missed Bunt','Foul Pitchout','Swinging Pitchout']
swing_strike_codes = ['Swinging Strike',
'Swinging Strike (Blocked)','Missed Bunt','Foul Tip','Swinging Pitchout']
contact_codes = ['In play, no out',
'Foul', 'In play, out(s)',
'In play, run(s)',
'Foul Bunt']
codes_in = ['In play, out(s)',
'Swinging Strike',
'Ball',
'Foul',
'In play, no out',
'Called Strike',
'Foul Tip',
'In play, run(s)',
'Hit By Pitch',
'Ball In Dirt',
'Pitchout',
'Swinging Strike (Blocked)',
'Foul Bunt',
'Missed Bunt',
'Foul Pitchout',
'Intent Ball',
'Swinging Pitchout']
exit_velo_df['in_zone'] = exit_velo_df['zone'] < 10
exit_velo_df = exit_velo_df.drop_duplicates(subset=['play_id'])
exit_velo_df_codes = exit_velo_df[exit_velo_df.play_description.isin(codes_in)].dropna(subset=['in_zone'])
exit_velo_df_codes['bip'] = ~exit_velo_df_codes.launch_speed.isna()
conditions = [
(exit_velo_df_codes['launch_speed'].isna()),
(exit_velo_df_codes['launch_speed']*1.5 - exit_velo_df_codes['launch_angle'] >= 117 ) & (exit_velo_df_codes['launch_speed'] + exit_velo_df_codes['launch_angle'] >= 124) & (exit_velo_df_codes['launch_speed'] > 98) & (exit_velo_df_codes['launch_angle'] >= 8) & (exit_velo_df_codes['launch_angle'] <= 50)
]
choices = [False,True]
exit_velo_df_codes['barrel'] = np.select(conditions, choices, default=np.nan)
conditions_ss = [
(exit_velo_df_codes['launch_angle'].isna()),
(exit_velo_df_codes['launch_angle'] >= 8 ) * (exit_velo_df_codes['launch_angle'] <= 32 )
]
choices_ss = [False,True]
exit_velo_df_codes['sweet_spot'] = np.select(conditions_ss, choices_ss, default=np.nan)
conditions_hh = [
(exit_velo_df_codes['launch_speed'].isna()),
(exit_velo_df_codes['launch_speed'] >= 94.5 )
]
choices_hh = [False,True]
exit_velo_df_codes['hard_hit'] = np.select(conditions_hh, choices_hh, default=np.nan)
conditions_tb = [
(exit_velo_df_codes['event_type']=='single'),
(exit_velo_df_codes['event_type']=='double'),
(exit_velo_df_codes['event_type']=='triple'),
(exit_velo_df_codes['event_type']=='home_run'),
]
choices_tb = [1,2,3,4]
exit_velo_df_codes['tb'] = np.select(conditions_tb, choices_tb, default=np.nan)
conditions_woba = [
(exit_velo_df_codes['event_type']=='walk'),
(exit_velo_df_codes['event_type']=='hit_by_pitch'),
(exit_velo_df_codes['event_type']=='single'),
(exit_velo_df_codes['event_type']=='double'),
(exit_velo_df_codes['event_type']=='triple'),
(exit_velo_df_codes['event_type']=='home_run'),
]
choices_woba = [0.705,
0.688,
0.897,
1.233,
1.612,
2.013]
exit_velo_df_codes['woba'] = np.select(conditions_woba, choices_woba, default=np.nan)
woba_codes = ['strikeout', 'field_out', 'single', 'walk', 'hit_by_pitch',
'double', 'sac_fly', 'force_out', 'home_run',
'grounded_into_double_play', 'fielders_choice', 'field_error',
'triple', 'sac_bunt', 'double_play',
'fielders_choice_out', 'strikeout_double_play',
'sac_fly_double_play', 'other_out']
conditions_woba_code = [
(exit_velo_df_codes['event_type'].isin(woba_codes))
]
choices_woba_code = [1]
exit_velo_df_codes['woba_codes'] = np.select(conditions_woba_code, choices_woba_code, default=np.nan)
#exit_velo_df_codes['barrel'] = (exit_velo_df_codes.launch_speed >= 98) & (exit_velo_df_codes.launch_angle >= (26 - (-98 + exit_velo_df_codes.launch_speed))) & (exit_velo_df_codes.launch_angle <= 30 + (-98 + exit_velo_df_codes.launch_speed)) & (exit_velo_df_codes.launch_angle >= 8) & (exit_velo_df_codes.launch_angle <= 50)
#exit_velo_df_codes['barrel'] = (exit_velo_df_codes.launch_speed >= 98) & (exit_velo_df_codes.launch_angle >= (26 - (-98 + exit_velo_df_codes.launch_speed))) & (exit_velo_df_codes.launch_angle <= 30 + (-98 + exit_velo_df_codes.launch_speed)) & (exit_velo_df_codes.launch_angle >= 8) & (exit_velo_df_codes.launch_angle <= 50)
exit_velo_df_codes['pitches'] = 1
exit_velo_df_codes['whiffs'] = [1 if ((x == 'S')|(x == 'W')|(x =='T')) else 0 for x in exit_velo_df_codes.play_code]
exit_velo_df_codes['csw'] = [1 if ((x == 'S')|(x == 'W')|(x =='T')|(x == 'C')) else 0 for x in exit_velo_df_codes.play_code]
exit_velo_df_codes['swings'] = [1 if x in swings_in else 0 for x in exit_velo_df_codes.play_description]
exit_velo_df_codes['out_zone'] = exit_velo_df_codes.in_zone == False
exit_velo_df_codes['zone_swing'] = (exit_velo_df_codes.in_zone == True)&(exit_velo_df_codes.swings == 1)
exit_velo_df_codes['zone_contact'] = (exit_velo_df_codes.in_zone == True)&(exit_velo_df_codes.swings == 1)&(exit_velo_df_codes.whiffs == 0)
exit_velo_df_codes['ozone_swing'] = (exit_velo_df_codes.in_zone==False)&(exit_velo_df_codes.swings == 1)
exit_velo_df_codes['ozone_contact'] = (exit_velo_df_codes.in_zone==False)&(exit_velo_df_codes.swings == 1)&(exit_velo_df_codes.whiffs == 0)
exit_velo_df_codes_summ = exit_velo_df_codes.groupby(['batter_id','batter_name','level']).agg(
pa = ('pa','sum'),
k = ('k','sum'),
bb = ('bb','sum'),
bb_minus_k = ('bb_minus_k','sum'),
csw = ('csw','sum'),
bip = ('bip','sum'),
tb = ('tb','sum'),
woba = ('woba','sum'),
woba_codes = ('woba_codes','sum'),
hard_hit = ('hard_hit','sum'),
barrel = ('barrel','sum'),
sweet_spot = ('sweet_spot','sum'),
max_launch_speed = ('launch_speed','max'),
launch_speed_90 = ('launch_speed',percentile(90)),
launch_speed = ('launch_speed','mean'),
launch_angle = ('launch_angle','mean'),
pitches = ('pitches','sum'),
swings = ('swings','sum'),
in_zone = ('in_zone','sum'),
out_zone = ('out_zone','sum'),
whiffs = ('whiffs','sum'),
zone_swing = ('zone_swing','sum'),
zone_contact = ('zone_contact','sum'),
ozone_swing = ('ozone_swing','sum'),
ozone_contact = ('ozone_contact','sum'),
).reset_index()
#exit_velo_df_codes_summ['out_zone'] = ~exit_velo_df_codes_summ.in_zone
#bip_min_input = int(input())
#bip_min = min(bip_min_input,50)
#exit_velo_df_codes_summ = exit_velo_df_codes_summ[exit_velo_df_codes_summ.balls_in_play>=bip_min]
exit_velo_df_codes_summ['k_percent'] = [exit_velo_df_codes_summ.k[x]/exit_velo_df_codes_summ.pa[x] if exit_velo_df_codes_summ.pa[x] != 0 else np.nan for x in range(len(exit_velo_df_codes_summ))]
exit_velo_df_codes_summ['bb_percent'] =[exit_velo_df_codes_summ.bb[x]/exit_velo_df_codes_summ.pa[x] if exit_velo_df_codes_summ.pa[x] != 0 else np.nan for x in range(len(exit_velo_df_codes_summ))]
exit_velo_df_codes_summ['bb_minus_k_percent'] =[exit_velo_df_codes_summ.bb_minus_k[x]/exit_velo_df_codes_summ.pa[x] if exit_velo_df_codes_summ.pa[x] != 0 else np.nan for x in range(len(exit_velo_df_codes_summ))]
exit_velo_df_codes_summ['csw_percent'] =[exit_velo_df_codes_summ.csw[x]/exit_velo_df_codes_summ.pitches[x] if exit_velo_df_codes_summ.pitches[x] != 0 else np.nan for x in range(len(exit_velo_df_codes_summ))]
exit_velo_df_codes_summ['sweet_spot_percent'] = [exit_velo_df_codes_summ.sweet_spot[x]/exit_velo_df_codes_summ.bip[x] if exit_velo_df_codes_summ.bip[x] != 0 else np.nan for x in range(len(exit_velo_df_codes_summ))]
exit_velo_df_codes_summ['woba_percent'] = [exit_velo_df_codes_summ.woba[x]/exit_velo_df_codes_summ.woba_codes[x] if exit_velo_df_codes_summ.woba_codes[x] != 0 else np.nan for x in range(len(exit_velo_df_codes_summ))]
#exit_velo_df_codes_summ['hard_hit_percent'] = [exit_velo_df_codes_summ.sweet_spot[x]/exit_velo_df_codes_summ.bip[x] if exit_velo_df_codes_summ.bip[x] != 0 else np.nan for x in range(len(exit_velo_df_codes_summ))]
exit_velo_df_codes_summ['hard_hit_percent'] = [exit_velo_df_codes_summ.hard_hit[x]/exit_velo_df_codes_summ.bip[x] if exit_velo_df_codes_summ.bip[x] != 0 else np.nan for x in range(len(exit_velo_df_codes_summ))]
exit_velo_df_codes_summ['barrel_percent'] = [exit_velo_df_codes_summ.barrel[x]/exit_velo_df_codes_summ.bip[x] if exit_velo_df_codes_summ.bip[x] != 0 else np.nan for x in range(len(exit_velo_df_codes_summ))]
exit_velo_df_codes_summ['zone_contact_percent'] = [exit_velo_df_codes_summ.zone_contact[x]/exit_velo_df_codes_summ.zone_swing[x] if exit_velo_df_codes_summ.zone_swing[x] != 0 else np.nan for x in range(len(exit_velo_df_codes_summ))]
exit_velo_df_codes_summ['zone_swing_percent'] = [exit_velo_df_codes_summ.zone_swing[x]/exit_velo_df_codes_summ.in_zone[x] if exit_velo_df_codes_summ.pitches[x] != 0 else np.nan for x in range(len(exit_velo_df_codes_summ))]
exit_velo_df_codes_summ['zone_percent'] = [exit_velo_df_codes_summ.in_zone[x]/exit_velo_df_codes_summ.pitches[x] if exit_velo_df_codes_summ.pitches[x] != 0 else np.nan for x in range(len(exit_velo_df_codes_summ))]
exit_velo_df_codes_summ['chase_percent'] = [exit_velo_df_codes_summ.ozone_swing[x]/(exit_velo_df_codes_summ.pitches[x] - exit_velo_df_codes_summ.in_zone[x]) if (exit_velo_df_codes_summ.pitches[x]- exit_velo_df_codes_summ.in_zone[x]) != 0 else np.nan for x in range(len(exit_velo_df_codes_summ))]
exit_velo_df_codes_summ['chase_contact'] = [exit_velo_df_codes_summ.ozone_contact[x]/exit_velo_df_codes_summ.ozone_swing[x] if exit_velo_df_codes_summ.ozone_swing[x] != 0 else np.nan for x in range(len(exit_velo_df_codes_summ))]
exit_velo_df_codes_summ['swing_percent'] = [exit_velo_df_codes_summ.swings[x]/exit_velo_df_codes_summ.pitches[x] if exit_velo_df_codes_summ.pitches[x] != 0 else np.nan for x in range(len(exit_velo_df_codes_summ))]
exit_velo_df_codes_summ['whiff_rate'] = [exit_velo_df_codes_summ.whiffs[x]/exit_velo_df_codes_summ.swings[x] if exit_velo_df_codes_summ.swings[x] != 0 else np.nan for x in range(len(exit_velo_df_codes_summ))]
exit_velo_df_codes_summ['swstr_rate'] = [exit_velo_df_codes_summ.whiffs[x]/exit_velo_df_codes_summ.pitches[x] if exit_velo_df_codes_summ.pitches[x] != 0 else np.nan for x in range(len(exit_velo_df_codes_summ))]
exit_velo_df_codes_summ = exit_velo_df_codes_summ.dropna(subset=['bip'])
woba_list = ['woba']
pa_list = ['k','bb','bb_minus_k']
balls_in_play_list = ['hard_hit','launch_speed','launch_speed_90','launch_angle','barrel','sweet_spot']
pitches_list = ['zone_percent','swing_percent','sw_str','csw']
swings_list = ['whiff_percent']
in_zone_pitches_list = ['zone_swing']
in_zone_swings_list = ['zone_contact']
out_zone_pitches_list = ['chase_percent']
out_zone_swings_list = ['chase_contact']
plot_dict = {
'k':{'x_axis':'Plate Appearances','y_axis':'K%','title':'K%','x_value':'k','x_range':[0.0,0.1,0.2,0.3,0.4],'percent':True,'percentile_label':'k_percent','flip_p':True,'percentile':False,'avg_adjust':False},
'bb':{'x_axis':'Plate Appearances','y_axis':'BB%','title':'BB%','x_value':'bb','x_range':[0.0,0.1,0.2,0.3],'percent':True,'percentile_label':'bb_percent','flip_p':False,'percentile':False,'avg_adjust':False},
'bb_minus_k':{'x_axis':'Plate Appearances','y_axis':'BB-K%','title':'BB-K%','x_value':'bb_minus_k','x_range':[-0.3,-0.2,-0.1,0,0.1,0.2],'percent':True,'percentile_label':'bb_minus_k_percent','flip_p':False,'percentile':False,'avg_adjust':False},
'csw':{'x_axis':'Pitches','y_axis':'CSW%','title':'CSW%','x_value':'csw','x_range':[.2,.25,.3,.35,.4],'percent':True,'percentile_label':'csw_percent','flip_p':True,'percentile':False,'avg_adjust':False},
'woba':{'x_axis':'wOBA PA','y_axis':'wOBA','title':'wOBA','x_value':'woba','x_range':[.20,.30,.40,.50],'percent':False,'percentile_label':'woba_percent','flip_p':False,'percentile':False,'avg_adjust':True},
'launch_speed':{'x_axis':'Balls In Play','y_axis':'Exit Velocity','title':'Exit Velocity','x_value':'launch_speed','x_range':[85,90,95,100],'percent':False,'percentile_label':'launch_speed','flip_p':False,'percentile':False,'avg_adjust':False},
'launch_speed_90':{'x_axis':'Balls In Play','y_axis':'90th Percentile Exit Velocity','title':'90th Percentile Exit Velocity','x_value':'launch_speed','x_range':[95,100,105,110,115],'percent':False,'percentile_label':'launch_speed_90','flip_p':False,'percentile':True,'avg_adjust':False},
'hard_hit':{'x_axis':'Balls In Play','y_axis':'HardHit%','title':'HardHit%','x_value':'hard_hit','x_range':[0.2,0.3,0.4,0.5,0.6,0.7],'percent':True,'percentile_label':'hard_hit_percent','flip_p':False,'percentile':False,'avg_adjust':False},
'sweet_spot':{'x_axis':'Balls In Play','y_axis':'SweetSpot%','title':'SweetSpot%','x_value':'sweet_spot','x_range':[0.2,0.3,0.4,0.5],'percent':True,'percentile_label':'sweet_spot_percent','flip_p':False,'percentile':False,'avg_adjust':False},
'launch_angle':{'x_axis':'Balls In Play','y_axis':'Launch Angle','title':'Launch Angle','x_value':'launch_angle','x_range':[-20,-10,0,10,20],'percent':False,'percentile_label':'launch_angle','flip_p':False,'percentile':False,'avg_adjust':False},
'barrel':{'x_axis':'Balls In Play','y_axis':'Barrel%','title':'Barrel%','x_value':'barrel','x_range':[0,0.05,0.10,.15,.20,.25,.30],'percent':True,'percentile_label':'barrel_percent','flip_p':False,'percentile':False,'avg_adjust':False},
'zone_percent':{'x_axis':'Pitches','y_axis':'Zone%','title':'Zone%','x_value':'in_zone','x_range':[0.3,0.4,0.5,0.6,0.7],'percent':True,'percentile_label':'zone_percent','flip_p':False,'percentile':False,'avg_adjust':False},
'swing_percent':{'x_axis':'Pitches','y_axis':'Swing%','title':'Swing%','x_value':'swings','x_range':[0.2,0.3,0.4,0.5,0.6,0.7,0.8],'percent':True,'percentile_label':'swing_percent','flip_p':False,'percentile':False,'avg_adjust':False},
'whiff_percent':{'x_axis':'Swings','y_axis':'Whiff%','title':'Whiff%','x_value':'whiffs','x_range':[0.0,0.1,0.2,0.3,0.4,0.5],'percent':True,'percentile_label':'whiff_rate','flip_p':True,'percentile':False,'avg_adjust':False},
'sw_str':{'x_axis':'Pitches','y_axis':'SwStr%','title':'SwStr%','x_value':'whiffs','x_range':[0.0,0.05,0.1,0.15,0.2,0.25],'percent':True,'percentile_label':'swstr_rate','flip_p':True,'percentile':False,'avg_adjust':False},
'zone_swing':{'x_axis':'In-Zone Pitches','y_axis':'Z-Swing%','title':'Z-Swing%','x_value':'zone_swing','x_range':[0.3,0.3,0.4,0.5,0.6,0.7,0.8,0.9,1],'percent':True,'percentile_label':'zone_swing_percent','flip_p':False,'percentile':False,'avg_adjust':False},
'zone_contact':{'x_axis':'In-Zone Swings','y_axis':'Z-Contact%','title':'Z-Contact%','x_value':'zone_contact','x_range':[0.5,0.6,0.7,0.8,0.9,1],'percent':True,'percentile_label':'zone_contact_percent','flip_p':False,'percentile':False,'avg_adjust':False},
'chase_percent':{'x_axis':'Out-of-Zone Pitches','y_axis':'O-Swing%','title':'O-Swing%','x_value':'ozone_swing','x_range':[0.0,0.1,0.2,0.3,0.4,0.5],'percent':True,'percentile_label':'chase_percent','flip_p':True,'percentile':False,'avg_adjust':False},
'chase_contact':{'x_axis':'Out-of-Zone Swings','y_axis':'O-Contact%','title':'O-Contact%','x_value':'ozone_contact','x_range':[0.2,0.3,0.4,0.5,0.6,0.7,0.8],'percent':True,'percentile_label':'chase_contact','flip_p':False,'percentile':False,'avg_adjust':False},}
test_df = exit_velo_df.sort_values(by='batter_name').drop_duplicates(subset='batter_id').reset_index(drop=True)[['batter_id','batter_name']]#['pitcher'].to_dict()
test_df = test_df.dropna()
test_df['batter_id'] = test_df['batter_id'].astype(int)
test_df = test_df.set_index('batter_id')
#test_df = test_df[test_df.pitcher == 'Chris Bassitt'].append(test_df[test_df.pitcher != 'Chris Bassitt'])
batter_dict = test_df['batter_name'].to_dict()
level_dict = {'MLB':'MLB','AAA':'AAA','AA':'AA','A+':'A+','A':'A'}
plot_dict_small = {
'k':'K%',
'bb':'BB%',
'csw':'CSW%',
'launch_speed':'Exit Velocity',
'launch_speed_90':'90th Percentile Exit Velocity',
'sweet_spot':'SweetSpot%',
'launch_angle':'Launch Angle',
'zone_percent':'Zone%',
'barrel':'Barrel%',
'swing_percent':'Swing%',
'whiff_percent':'Whiff%',
'sw_str':'SwStr%',
'zone_swing':'Z-Swing%',
'zone_contact':'Z-Contact%',
'chase_percent':'O-Swing%',
'chase_contact':'O-Contact%',}
def server(input,output,session):
@output
@render.plot(alt="A histogram")
@reactive.event(input.go, ignore_none=False)
def plot():
# np.random.seed(19680801)
# x = 100 + 15 * np.random.randn(437)
# fig, ax = plt.subplots()
# ax.hist(x, input.n(), density=True)
# return fig
sns.set_theme(style="whitegrid", palette="pastel")
if input.id() is "":
fig = plt.figure(figsize=(12, 12))
fig.text(s='Please Select a Pitcher',x=0.5,y=0.5)
return
swing_min = int(input.n())
fig, ax = plt.subplots(1, 1, figsize=(10, 10))
fig.set_facecolor('white')
#ax.set_facecolor('white')
#fig.patch.set_facecolor('lightblue')
print(input.stat_id())
if input.stat_id() in pa_list:
print('we hAVE MADE IT TO THIS PART OF THE CODE')
if input.stat_id() in pa_list:
elly_zone_df = exit_velo_df_codes[(exit_velo_df_codes.pa==1)&(exit_velo_df_codes.batter_id == int(input.id()))&(exit_velo_df_codes.level==input.level_id())]
divisor_x = 'pa'
print('this is short')
print(elly_zone_df)
if input.stat_id() in balls_in_play_list:
elly_zone_df = exit_velo_df_codes[(exit_velo_df_codes.bip)&(exit_velo_df_codes.batter_id == int(input.id()))&(exit_velo_df_codes.level==input.level_id())]
divisor_x = 'bip'
#print('this is short')
if input.stat_id() in balls_in_play_list:
elly_zone_df = exit_velo_df_codes[(exit_velo_df_codes.bip)&(exit_velo_df_codes.batter_id == int(input.id()))&(exit_velo_df_codes.level==input.level_id())]
divisor_x = 'bip'
print('this is short')
if input.stat_id() in pitches_list:
elly_zone_df = exit_velo_df_codes[(exit_velo_df_codes.pitches == 1)&(exit_velo_df_codes.batter_id == int(input.id()))&(exit_velo_df_codes.level==input.level_id())]
divisor_x = 'pitches'
if input.stat_id() in swings_list:
elly_zone_df = exit_velo_df_codes[(exit_velo_df_codes.swings == 1)&(exit_velo_df_codes.batter_id == int(input.id()))&(exit_velo_df_codes.level==input.level_id())]
divisor_x = 'swings'
if input.stat_id() in in_zone_pitches_list:
elly_zone_df = exit_velo_df_codes[(exit_velo_df_codes.in_zone)&(exit_velo_df_codes.batter_id == int(input.id()))&(exit_velo_df_codes.level==input.level_id())]
divisor_x = 'in_zone'
if input.stat_id() in in_zone_swings_list:
elly_zone_df = exit_velo_df_codes[(exit_velo_df_codes.zone_swing)&(exit_velo_df_codes.batter_id == int(input.id()))&(exit_velo_df_codes.level==input.level_id())]
divisor_x = 'zone_swing'
if input.stat_id() in out_zone_pitches_list:
elly_zone_df = exit_velo_df_codes[(exit_velo_df_codes.in_zone == False)&(exit_velo_df_codes.batter_id == int(input.id()))&(exit_velo_df_codes.level==input.level_id())]
divisor_x = 'out_zone'
if input.stat_id() in out_zone_swings_list:
elly_zone_df = exit_velo_df_codes[(exit_velo_df_codes.ozone_swing)&(exit_velo_df_codes.batter_id == int(input.id()))&(exit_velo_df_codes.level==input.level_id())]
divisor_x = 'ozone_swing'
# penguins = sns.load_dataset("penguins")
# sns.histplot(data=penguins, x="flipper_length_mm")
# print('we made it here:')
# print(int(input.id()))
# print(input.stat_id())
# print(input.level_id())
# print(exit_velo_df_codes[(exit_velo_df_codes.batter_id == int(input.id()))&(exit_velo_df_codes.level==input.level_id())])
# print(exit_velo_df_codes.columns)
# print(elly_zone_df[plot_dict[input.stat_id()]["x_value"]].sum())
exit_velo_df_codes_summ_new = exit_velo_df_codes_summ.copy()
exit_velo_df_codes_summ_new = exit_velo_df_codes_summ_new.set_index('batter_id','batter_name','level')
exit_velo_df_codes_summ_new = exit_velo_df_codes_summ_new[exit_velo_df_codes_summ_new[divisor_x] >= int(input.n())]
exit_velo_df_codes_summ_new = exit_velo_df_codes_summ_new[exit_velo_df_codes_summ_new.level==input.level_id()]
exit_velo_df_codes_summ_rank = exit_velo_df_codes_summ_new.rank(method='max',ascending=False)
exit_velo_df_codes_summ_rank.columns = exit_velo_df_codes_summ_rank.columns+['_rank']
exit_velo_df_codes_summ_rank_percent = exit_velo_df_codes_summ_new.rank(pct=True)
exit_velo_df_codes_summ_rank_percent.columns = exit_velo_df_codes_summ_rank_percent.columns+['_percent']
exit_velo_df_codes_summ_new = exit_velo_df_codes_summ_new.reset_index()
exit_velo_df_codes_summ_rank = exit_velo_df_codes_summ_rank.reset_index()
exit_velo_df_codes_summ_rank_percent = exit_velo_df_codes_summ_rank_percent.reset_index()
print('Table columns:')
exit_velo_df_codes_summ_new.batter_id = exit_velo_df_codes_summ_new.batter_id.astype(int)
exit_velo_df_codes_summ_rank.batter_id = exit_velo_df_codes_summ_rank.batter_id.astype(int)
exit_velo_df_codes_summ_rank_percent.batter_id = exit_velo_df_codes_summ_rank_percent.batter_id.astype(int)
print('Table columns2:')
exit_velo_df_codes_summ_new = exit_velo_df_codes_summ_new.merge(exit_velo_df_codes_summ_rank,left_on=['batter_id'],right_on=['batter_id'],how='left',suffixes=['','_rank'])
exit_velo_df_codes_summ_new = exit_velo_df_codes_summ_new.merge(exit_velo_df_codes_summ_rank_percent,left_on=['batter_id'],right_on=['batter_id'],how='left',suffixes=['','_percent'])
print(exit_velo_df_codes_summ_new)
print(exit_velo_df_codes_summ_rank)
print(exit_velo_df_codes_summ_rank_percent)
#sns.scatterplot(x=data_df.launch_speed_90,y=data_df.zone_contact,color=colour_palette[0],s=75,label=int(input.id()))
exit_velo_df_codes_summ_new_select = exit_velo_df_codes_summ_new[exit_velo_df_codes_summ_new.batter_id == int(input.id())].reset_index(drop=True)
print('whiffing')
print(exit_velo_df_codes)
print('Player _df:')
print(exit_velo_df_codes_summ_new_select)
if len(exit_velo_df_codes_summ_new_select) < 1:
ax.text(x=0.5,y=0.5,s='Please Select Different Parameters to Produce a plot',fontsize=18,ha='center')
return
p = inflect.engine()
exit_velo_df_codes_summ_new_select = exit_velo_df_codes_summ_new_select.loc[:,~exit_velo_df_codes_summ_new_select.columns.duplicated(keep='last')].copy()
print('Table for the player:')
print(list(exit_velo_df_codes_summ_new_select.columns))
print(plot_dict[input.stat_id()]["percentile_label"])
print(plot_dict[input.stat_id()]["percentile_label"]+'_percent')
print(exit_velo_df_codes_summ_new_select)
print(1*plot_dict[input.stat_id()]["flip_p"])
print(round(exit_velo_df_codes_summ_new_select[plot_dict[input.stat_id()]["percentile_label"]+"_percent"][0],2))
print((1*plot_dict[input.stat_id()]["flip_p"]-round(exit_velo_df_codes_summ_new_select[plot_dict[input.stat_id()]["percentile_label"]+"_percent"][0],2))*100)
# print(exit_velo_df_codes_summ_new_select[plot_dict[input.stat_id()]["percentile_label"]+'_percent'])
if plot_dict[input.stat_id()]['percent']:
label_1=f'{input.level_id()} Average {exit_velo_df_codes[exit_velo_df_codes.level == input.level_id()][plot_dict[input.stat_id()]["x_value"]].sum()/exit_velo_df_codes[exit_velo_df_codes.level == input.level_id()][divisor_x].sum():.1%}'
label_2=f'{batter_dict[int(input.id())]} Average {elly_zone_df[plot_dict[input.stat_id()]["x_value"]].sum()/elly_zone_df[divisor_x].sum():.1%} ({p.ordinal(abs(int((1*plot_dict[input.stat_id()]["flip_p"]-round(exit_velo_df_codes_summ_new_select[plot_dict[input.stat_id()]["percentile_label"]+"_percent"][0],2))*100)))} Percentile)'
#label_2=f'{batter_dict[int(input.id())]} Average {elly_zone_df[plot_dict[input.stat_id()]["x_value"]].sum()/elly_zone_df[divisor_x].sum():.1%}'
ax.yaxis.set_major_formatter(mtick.PercentFormatter(1))
else:
label_1=f'{input.level_id()} Average {exit_velo_df_codes[exit_velo_df_codes.level == input.level_id()][plot_dict[input.stat_id()]["x_value"]].sum()/exit_velo_df_codes[exit_velo_df_codes.level == input.level_id()][divisor_x].sum():.1f}'
label_2=f'{batter_dict[int(input.id())]} Average {elly_zone_df[plot_dict[input.stat_id()]["x_value"]].sum()/elly_zone_df[divisor_x].sum():.1f} ({p.ordinal(abs(int((1*plot_dict[input.stat_id()]["flip_p"]-round(exit_velo_df_codes_summ_new_select[plot_dict[input.stat_id()]["percentile_label"]+"_percent"][0],2))*100)))} Percentile)'
#label_2=f'{batter_dict[int(input.id())]} Average {elly_zone_df[plot_dict[input.stat_id()]["x_value"]].sum()/elly_zone_df[divisor_x].sum():.1f}'
#ax.yaxis.set_major_formatter(mtick.int)
if plot_dict[input.stat_id()]['percentile']:
label_1=f'{input.level_id()} Average {exit_velo_df_codes[exit_velo_df_codes.level == input.level_id()][plot_dict[input.stat_id()]["x_value"]].quantile(0.9):.1f}'
label_2=f'{batter_dict[int(input.id())]} Average {elly_zone_df[plot_dict[input.stat_id()]["x_value"]].quantile(0.9):.1f} ({p.ordinal(abs(int((1*plot_dict[input.stat_id()]["flip_p"]-round(exit_velo_df_codes_summ_new_select[plot_dict[input.stat_id()]["percentile_label"]+"_percent"][0],2))*100)))} Percentile)'
#label_2=f'{batter_dict[int(input.id())]} Average {elly_zone_df[plot_dict[input.stat_id()]["x_value"]].sum()/elly_zone_df[divisor_x].sum():.1%}'
#ax.yaxis.set_major_formatter(mtick.int)
print(plot_dict[input.stat_id()]["x_value"])
print(divisor_x)
# exit_velo_df_codes_summ_new = exit_velo_df_codes_summ.copy()
# exit_velo_df_codes_summ_new = exit_velo_df_codes_summ_new[exit_velo_df_codes_summ_new.balls_in_play >= int(input.n())]
# exit_velo_df_codes_summ_new = exit_velo_df_codes_summ_new[exit_velo_df_codes_summ_new.level==input.level_id()]
print('this is here:')
print(exit_velo_df_codes_summ_new.head())
print(exit_velo_df_codes_summ_new.columns)
if plot_dict[input.stat_id()]["flip_p"] == False:
ax.hlines(y=(exit_velo_df_codes_summ_new[plot_dict[input.stat_id()]["percentile_label"]]).quantile(0.9),xmin=swing_min,xmax=len(elly_zone_df),color=colour_palette[2],linestyle='dotted',alpha=0.5)
ax.hlines(y=(exit_velo_df_codes_summ_new[plot_dict[input.stat_id()]["percentile_label"]]).quantile(0.75),xmin=swing_min,xmax=len(elly_zone_df),color=colour_palette[3],linestyle='dotted',alpha=0.5)
ax.hlines(y=(exit_velo_df_codes_summ_new[plot_dict[input.stat_id()]["percentile_label"]]).quantile(0.25),xmin=swing_min,xmax=len(elly_zone_df),color=colour_palette[4],linestyle='dotted',alpha=0.5)
ax.hlines(y=(exit_velo_df_codes_summ_new[plot_dict[input.stat_id()]["percentile_label"]]).quantile(0.1),xmin=swing_min,xmax=len(elly_zone_df),color=colour_palette[5],linestyle='dotted',alpha=0.5)
hard_hit_dates = [(exit_velo_df_codes_summ_new[plot_dict[input.stat_id()]["percentile_label"]]).quantile(0.9),
(exit_velo_df_codes_summ_new[plot_dict[input.stat_id()]["percentile_label"]]).quantile(0.75),
(exit_velo_df_codes_summ_new[plot_dict[input.stat_id()]["percentile_label"]]).quantile(0.25),
(exit_velo_df_codes_summ_new[plot_dict[input.stat_id()]["percentile_label"]]).quantile(0.1)]
hard_hit_text = ['90th %','75th %','25th %','10th %']
for i, x in enumerate(hard_hit_dates):
text(min(input.n()+input.n()/100,+input.n()+1), x ,hard_hit_text[i], rotation=0, ha='left',
bbox=dict(facecolor='white',alpha=0.5, edgecolor=colour_palette[2+i], pad=2))
if plot_dict[input.stat_id()]["flip_p"] == True:
ax.hlines(y=(exit_velo_df_codes_summ_new[plot_dict[input.stat_id()]["percentile_label"]]).quantile(0.1),xmin=swing_min,xmax=len(elly_zone_df),color=colour_palette[2],linestyle='dotted',alpha=0.5)
ax.hlines(y=(exit_velo_df_codes_summ_new[plot_dict[input.stat_id()]["percentile_label"]]).quantile(0.25),xmin=swing_min,xmax=len(elly_zone_df),color=colour_palette[3],linestyle='dotted',alpha=0.5)
ax.hlines(y=(exit_velo_df_codes_summ_new[plot_dict[input.stat_id()]["percentile_label"]]).quantile(0.75),xmin=swing_min,xmax=len(elly_zone_df),color=colour_palette[4],linestyle='dotted',alpha=0.5)
ax.hlines(y=(exit_velo_df_codes_summ_new[plot_dict[input.stat_id()]["percentile_label"]]).quantile(0.9),xmin=swing_min,xmax=len(elly_zone_df),color=colour_palette[5],linestyle='dotted',alpha=0.5)
hard_hit_dates = [(exit_velo_df_codes_summ_new[plot_dict[input.stat_id()]["percentile_label"]]).quantile(0.9),
(exit_velo_df_codes_summ_new[plot_dict[input.stat_id()]["percentile_label"]]).quantile(0.75),
(exit_velo_df_codes_summ_new[plot_dict[input.stat_id()]["percentile_label"]]).quantile(0.25),
(exit_velo_df_codes_summ_new[plot_dict[input.stat_id()]["percentile_label"]]).quantile(0.1)]
hard_hit_text = ['10th %','25th %','75th %','90th %']
for i, x in enumerate(hard_hit_dates):
text(min(input.n()+input.n()/100,input.n()+input.n()+3), x ,hard_hit_text[i], rotation=0, ha='left',
bbox=dict(facecolor='white',alpha=0.5, edgecolor=colour_palette[2+i], pad=2))
if plot_dict[input.stat_id()]["percentile"] == False:
ax.hlines(y=exit_velo_df_codes[exit_velo_df_codes.level == input.level_id()][plot_dict[input.stat_id()]["x_value"]].sum()/exit_velo_df_codes[exit_velo_df_codes.level == input.level_id()][divisor_x].sum(),xmin=swing_min,xmax=len(elly_zone_df),color=colour_palette[1],linestyle='-.',label=label_1)
ax.hlines(y=elly_zone_df[plot_dict[input.stat_id()]["x_value"]].sum()/elly_zone_df[divisor_x].sum(),xmin=swing_min,xmax=len(elly_zone_df),color=colour_palette[0],linestyle='--',label=label_2)
sns.lineplot(x=range(1,len(elly_zone_df)+1),y=elly_zone_df[plot_dict[input.stat_id()]["x_value"]].fillna(0).rolling(window=swing_min).sum()/swing_min,color=colour_palette[0],linewidth=3,ax=ax)
if plot_dict[input.stat_id()]["percentile"] == True:
ax.hlines(y=exit_velo_df_codes[exit_velo_df_codes.level == input.level_id()][plot_dict[input.stat_id()]["x_value"]].quantile(0.9),xmin=swing_min,xmax=len(elly_zone_df),color=colour_palette[1],linestyle='-.',label=label_1)
ax.hlines(y=elly_zone_df[plot_dict[input.stat_id()]["x_value"]].fillna(0).quantile(0.9),xmin=swing_min,xmax=len(elly_zone_df),color=colour_palette[0],linestyle='--',label=label_2)
sns.lineplot(x=range(1,len(elly_zone_df)+1),y=elly_zone_df[plot_dict[input.stat_id()]["x_value"]].fillna(0).rolling(window=swing_min).quantile(0.9),color=colour_palette[0],linewidth=3,ax=ax)
#ax.set_xlim(input.n(),exit_velo_df_small.pitch.max())
#plt.yticks([0,0.2,0.4,0.6,0.8,1])
#ax.set_ylim(math.floor((min(exit_velo_df_codes_summ.zone_contact)/5)*100)*5/100,1)
ax.set_xlim(math.floor(swing_min),len(elly_zone_df))
ax.set_title(f'{batter_dict[int(input.id())]} - {input.level_id()} - {swing_min} {plot_dict[input.stat_id()]["x_axis"]} Rolling {plot_dict[input.stat_id()]["title"]}', fontsize=16,fontname='Century Gothic',)
#vals = ax.get_yticks()
ax.set_xlabel(plot_dict[input.stat_id()]['x_axis'], fontsize=16,fontname='Century Gothic')
ax.set_ylabel(plot_dict[input.stat_id()]['y_axis'], fontsize=16,fontname='Century Gothic')
#fig.axes[0].invert_yaxis()
#fig.subplots_adjust(wspace=.02, hspace=.02)
#ax.xaxis.set_major_formatter(FuncFormatter(lambda x, _: int(x)))
ax.set_yticks(plot_dict[input.stat_id()]["x_range"])
#fig.colorbar(plot_dist, ax=ax)
#fig.colorbar(plot_dist)
#fig.axes[0].invert_yaxis()
ax.legend(fontsize='16')
fig.text(x=0.03,y=0.02,s='By: @TJStats',fontname='Century Gothic')
fig.text(x=1-0.03,y=0.02,s='Data: MLB',ha='right',fontname='Century Gothic')
fig.tight_layout()
rolling_batter = App(ui.page_fluid(
ui.tags.base(href=base_url),
ui.tags.div(
{"style": "width:90%;margin: 0 auto;max-width: 1600px;"},
ui.tags.style(
"""
h4 {
margin-top: 1em;font-size:35px;
}
h2{
font-size:25px;
}
"""
),
shinyswatch.theme.simplex(),
ui.tags.h4("TJStats"),
ui.tags.i("Baseball Analytics and Visualizations"),
ui.markdown("""<a href='https://www.patreon.com/tj_stats'>Support me on Patreon for Access to 2024 Apps</a><sup>1</sup>"""),
ui.navset_tab(
ui.nav_control(
ui.a(
"Home",
href="home/"
),
),
ui.nav_menu(
"Batter Charts",
ui.nav_control(
ui.a(
"Batting Rolling",
href="rolling_batter/"
),
ui.a(
"Spray & Damage",
href="spray/"
),
ui.a(
"Decision Value",
href="decision_value/"
),
# ui.a(
# "Damage Model",
# href="damage_model/"
# ),
ui.a(
"Batter Scatter",
href="batter_scatter/"
),
# ui.a(
# "EV vs LA Plot",
# href="ev_angle/"
# ),
ui.a(
"Statcast Compare",
href="statcast_compare/"
)
),
),
ui.nav_menu(
"Pitcher Charts",
ui.nav_control(
ui.a(
"Pitcher Rolling",
href="rolling_pitcher/"
),
ui.a(
"Pitcher Summary",
href="pitching_summary_graphic_new/"
),
ui.a(
"Pitcher Scatter",
href="pitcher_scatter/"
)
),
)),ui.row(
ui.layout_sidebar(
ui.panel_sidebar(
ui.input_select("id", "Select Pitcher",batter_dict,selected=675911,width=1,size=1,selectize=True),
ui.input_select("level_id", "Select Level",level_dict,width=1,size=1),
ui.input_select("stat_id", "Select Stat",plot_dict_small,width=1,size=1),
ui.input_numeric("n", "Rolling Window Size", value=50),
ui.input_action_button("go", "Generate",class_="btn-primary"),
ui.output_table("result")
),
ui.panel_main(
ui.output_plot("plot",height = "1000px",width="1000px")
),
)),)),server)