Spaces:
Running
Running
import pandas as pd | |
import numpy as np | |
import joblib | |
import math | |
import pickle | |
loaded_model = joblib.load('joblib_model/barrel_model.joblib') | |
in_zone_model = joblib.load('joblib_model/in_zone_model_knn_20240410.joblib') | |
attack_zone_model = joblib.load('joblib_model/model_attack_zone.joblib') | |
xwoba_model = joblib.load('joblib_model/xwoba_model.joblib') | |
px_model = joblib.load('joblib_model/linear_reg_model_x.joblib') | |
pz_model = joblib.load('joblib_model/linear_reg_model_z.joblib') | |
barrel_model = joblib.load('joblib_model/barrel_model.joblib') | |
def percentile(n): | |
def percentile_(x): | |
return np.nanpercentile(x, n) | |
percentile_.__name__ = 'percentile_%s' % n | |
return percentile_ | |
def df_update(df=pd.DataFrame()): | |
df.loc[df['sz_top']==0,'sz_top'] = np.nan | |
df.loc[df['sz_bot']==0,'sz_bot'] = np.nan | |
df['in_zone'] = [x < 10 if x > 0 else np.nan for x in df['zone']] | |
if len(df.loc[(~df['x'].isnull())&(df['px'].isnull()),'px']) > 0: | |
df.loc[(~df['x'].isnull())&(df['px'].isnull()),'px'] = px_model.predict(df.loc[(~df['x'].isnull())&(df['px'].isnull())][['x']]) | |
df.loc[(~df['y'].isnull())&(df['pz'].isnull()),'pz'] = px_model.predict(df.loc[(~df['y'].isnull())&(df['pz'].isnull())][['y']]) + 3.2 | |
# df['in_zone'] = [x < 10 if x > 0 else np.nan for x in df['zone']] | |
# df_a['in_zone'] = [x < 10 if x > 0 else np.nan for x in df_a['zone']] | |
if len(df.loc[(~df['px'].isna())& | |
(df['in_zone'].isna())& | |
(~df['sz_top'].isna())]) > 0: | |
print('We found missing data') | |
df.loc[(~df['px'].isna())& | |
(df['in_zone'].isna())& | |
(~df['sz_top'].isna())& | |
(~df['pz'].isna())& | |
(~df['sz_bot'].isna()) | |
,'in_zone'] = in_zone_model.predict(df.loc[(~df['px'].isna())& | |
(df['in_zone'].isna())& | |
(~df['sz_top'].isna())& | |
(~df['pz'].isna())& | |
(~df['sz_bot'].isna())][['px','pz','sz_top','sz_bot']].values) | |
hit_codes = ['single', | |
'double','home_run', 'triple'] | |
ab_codes = ['single', 'strikeout', 'field_out', | |
'grounded_into_double_play', 'fielders_choice', 'force_out', | |
'double', 'field_error', 'home_run', 'triple', | |
'double_play', | |
'fielders_choice_out', 'strikeout_double_play', | |
'other_out','triple_play'] | |
obp_true_codes = ['single', 'walk', | |
'double','home_run', 'triple', | |
'hit_by_pitch', 'intent_walk'] | |
obp_codes = ['single', 'strikeout', 'walk', 'field_out', | |
'grounded_into_double_play', 'fielders_choice', 'force_out', | |
'double', 'sac_fly', 'field_error', 'home_run', 'triple', | |
'hit_by_pitch', 'double_play', 'intent_walk', | |
'fielders_choice_out', 'strikeout_double_play', | |
'sac_fly_double_play', | |
'other_out','triple_play'] | |
contact_codes = ['In play, no out', | |
'Foul', 'In play, out(s)', | |
'In play, run(s)', | |
'Foul Bunt'] | |
conditions_hit = [df.event_type.isin(hit_codes)] | |
choices_hit = [True] | |
df['hits'] = np.select(conditions_hit, choices_hit, default=False) | |
conditions_ab = [df.event_type.isin(ab_codes)] | |
choices_ab = [True] | |
df['ab'] = np.select(conditions_ab, choices_ab, default=False) | |
conditions_obp_true = [df.event_type.isin(obp_true_codes)] | |
choices_obp_true = [True] | |
df['on_base'] = np.select(conditions_obp_true, choices_obp_true, default=False) | |
conditions_obp = [df.event_type.isin(obp_codes)] | |
choices_obp = [True] | |
df['obp'] = np.select(conditions_obp, choices_obp, default=False) | |
bip_codes = ['In play, no out', 'In play, run(s)','In play, out(s)'] | |
conditions_bip = [df.play_description.isin(bip_codes)] | |
choices_bip = [True] | |
df['bip'] = np.select(conditions_bip, choices_bip, default=False) | |
# conditions = [ | |
# (df['launch_speed'].isna()), | |
# (df['launch_speed']*1.5 - df['launch_angle'] >= 117 ) & (df['launch_speed'] + df['launch_angle'] >= 124) & (df['launch_speed'] > 98) & (df['launch_angle'] >= 8) & (df['launch_angle'] <= 50) | |
# ] | |
df['bip_div'] = ~df.launch_speed.isna() | |
# choices = [False,True] | |
# df['barrel'] = np.select(conditions, choices, default=np.nan) | |
# df['barrel'] = loaded_model.predict(df[['launch_speed','launch_angle']].fillna(0).values) | |
df['barrel'] = np.nan | |
if len(df.loc[(~df['launch_speed'].isnull())]) > 0: | |
df.loc[(~df['launch_speed'].isnull())&(~df['launch_angle'].isnull()),'barrel'] = barrel_model.predict(df.loc[(~df['launch_speed'].isnull())&(~df['launch_angle'].isnull())][['launch_speed','launch_angle']]) | |
conditions_ss = [ | |
(df['launch_angle'].isna()), | |
(df['launch_angle'] >= 8 ) * (df['launch_angle'] <= 32 ) | |
] | |
choices_ss = [False,True] | |
df['sweet_spot'] = np.select(conditions_ss, choices_ss, default=np.nan) | |
conditions_hh = [ | |
(df['launch_speed'].isna()), | |
(df['launch_speed'] >= 94.5 ) | |
] | |
choices_hh = [False,True] | |
df['hard_hit'] = np.select(conditions_hh, choices_hh, default=np.nan) | |
conditions_tb = [ | |
(df['event_type']=='single'), | |
(df['event_type']=='double'), | |
(df['event_type']=='triple'), | |
(df['event_type']=='home_run'), | |
] | |
choices_tb = [1,2,3,4] | |
df['tb'] = np.select(conditions_tb, choices_tb, default=np.nan) | |
conditions_woba = [ | |
(df['event_type'].isin(['strikeout', 'field_out', 'sac_fly', 'force_out', | |
'grounded_into_double_play', 'fielders_choice', 'field_error', | |
'sac_bunt', 'double_play', 'fielders_choice_out', 'strikeout_double_play', | |
'sac_fly_double_play', 'other_out'])), | |
(df['event_type']=='walk'), | |
(df['event_type']=='hit_by_pitch'), | |
(df['event_type']=='single'), | |
(df['event_type']=='double'), | |
(df['event_type']=='triple'), | |
(df['event_type']=='home_run'), | |
] | |
choices_woba = [0, | |
0.696, | |
0.726, | |
0.883, | |
1.244, | |
1.569, | |
2.004] | |
df['woba'] = np.select(conditions_woba, choices_woba, default=np.nan) | |
woba_codes = ['strikeout', 'field_out', 'single', 'walk', 'hit_by_pitch', | |
'double', 'sac_fly', 'force_out', 'home_run', | |
'grounded_into_double_play', 'fielders_choice', 'field_error', | |
'triple', 'sac_bunt', 'double_play', | |
'fielders_choice_out', 'strikeout_double_play', | |
'sac_fly_double_play', 'other_out'] | |
conditions_woba_code = [ | |
(df['event_type'].isin(woba_codes)) | |
] | |
choices_woba_code = [1] | |
df['woba_codes'] = np.select(conditions_woba_code, choices_woba_code, default=np.nan) | |
df['woba_contact'] = [df['woba'].values[x] if df['bip'].values[x] == 1 else np.nan for x in range(len(df['woba_codes']))] | |
#df['in_zone'] = [x < 10 if type(x) == int else np.nan for x in df['zone']] | |
# df['in_zone_2'] = in_zone_model.predict(df[['x','y','sz_bot','sz_top']].fillna(0).values) | |
# df['in_zone_3'] = df['in_zone_2'] < 10 | |
# df.loc[df['in_zone'].isna(),'in_zone'] = df.loc[df['in_zone'].isna(),'in_zone_3'].fillna(0) | |
df['whiffs'] = [1 if ((x == 'S')|(x == 'W')|(x =='T')) else 0 for x in df.play_code] | |
df['csw'] = [1 if ((x == 'S')|(x == 'W')|(x =='T')|(x == 'C')) else 0 for x in df.play_code] | |
df['swings'] = [1 if x == True else 0 for x in df.is_swing] | |
df['out_zone'] = df.in_zone == False | |
df['zone_swing'] = (df.in_zone == True)&(df.swings == 1) | |
df['zone_contact'] = (df.in_zone == True)&(df.swings == 1)&(df.whiffs == 0) | |
df['ozone_swing'] = (df.in_zone==False)&(df.swings == 1) | |
df['ozone_contact'] = (df.in_zone==False)&(df.swings == 1)&(df.whiffs == 0) | |
df['k'] = df.event_type.isin(list(filter(None, [x if 'strikeout' in x else '' for x in df.event_type.dropna().unique()]))) | |
df['bb'] = df.event_type.isin(['walk','intent_walk']) | |
df['k_minus_bb'] = df['k'].astype(np.float32)-df['bb'].astype(np.float32) | |
df['bb_minus_k'] = df['bb'].astype(np.float32)-df['k'].astype(np.float32) | |
df['pa'] = [1 if isinstance(x, str) else 0 for x in df.event_type] | |
df['pitches'] = [1 if x else 0 for x in df.is_pitch] | |
df.loc[df['launch_speed'].isna(),'barrel'] = np.nan | |
pitch_cat = {'FA':'Fastball', | |
'FF':'Fastball', | |
'FT':'Fastball', | |
'FC':'Fastball', | |
'FS':'Off-Speed', | |
'FO':'Off-Speed', | |
'SI':'Fastball', | |
'ST':'Breaking', | |
'SL':'Breaking', | |
'CU':'Breaking', | |
'KC':'Breaking', | |
'SC':'Off-Speed', | |
'GY':'Off-Speed', | |
'SV':'Breaking', | |
'CS':'Breaking', | |
'CH':'Off-Speed', | |
'KN':'Off-Speed', | |
'EP':'Breaking', | |
'UN':np.nan, | |
'IN':np.nan, | |
'PO':np.nan, | |
'AB':np.nan, | |
'AS':np.nan, | |
'NP':np.nan} | |
df['pitch_category'] = df['pitch_type'].map(pitch_cat).fillna('Unknown') | |
df['average'] = 'average' | |
df.loc[df['trajectory'] == 'bunt_popup','trajectory'] = 'popup' | |
df.loc[df['trajectory'] == 'bunt_grounder','trajectory'] = 'ground_ball' | |
df.loc[df['trajectory'] == '','trajectory'] = np.nan | |
df.loc[df['trajectory'] == 'bunt_line_drive','trajectory'] = 'line_drive' | |
df[['trajectory_fly_ball','trajectory_ground_ball','trajectory_line_drive','trajectory_popup']] = pd.get_dummies(df['trajectory'], prefix='trajectory') | |
df['attack_zone'] = np.nan | |
df.loc[df[['px','pz','sz_top','sz_bot']].isnull().sum(axis=1)==0,'attack_zone'] = attack_zone_model.predict(df.loc[df[['px','pz','sz_top','sz_bot']].isnull().sum(axis=1)==0][['px','pz','sz_top','sz_bot']]) | |
df['heart'] = df['attack_zone'] == 0 | |
df['shadow'] = df['attack_zone'] == 1 | |
df['chase'] = df['attack_zone'] == 2 | |
df['waste'] = df['attack_zone'] == 3 | |
df['heart_swing'] = (df['attack_zone'] == 0)&(df['swings']==1) | |
df['shadow_swing'] = (df['attack_zone'] == 1)&(df['swings']==1) | |
df['chase_swing'] = (df['attack_zone'] == 2)&(df['swings']==1) | |
df['waste_swing'] = (df['attack_zone'] == 3)&(df['swings']==1) | |
df['xwoba'] = np.nan | |
df['xwoba_contact'] = np.nan | |
if len(df.loc[df[['launch_angle','launch_speed']].isnull().sum(axis=1)==0,'xwoba']) > 0: | |
df.loc[df[['launch_angle','launch_speed']].isnull().sum(axis=1)==0,'xwoba'] = [sum(x) for x in xwoba_model.predict_proba(df.loc[df[['launch_angle','launch_speed']].isnull().sum(axis=1)==0][['launch_angle','launch_speed']]) * ([0, 0.883,1.244,1.569,2.004])] | |
## Assign a value of 0.696 to every walk in the dataset | |
df.loc[df['event_type'].isin(['walk']),'xwoba'] = 0.696 | |
## Assign a value of 0.726 to every hit by pitch in the dataset | |
df.loc[df['event_type'].isin(['hit_by_pitch']),'xwoba'] = 0.726 | |
## Assign a value of 0 to every Strikeout in the dataset | |
df.loc[df['event_type'].isin(['strikeout','strikeout_double_play']),'xwoba'] = 0 | |
df.loc[df[['launch_angle','launch_speed']].isnull().sum(axis=1)==0,'xwoba_contact'] = [sum(x) for x in xwoba_model.predict_proba(df.loc[df[['launch_angle','launch_speed']].isnull().sum(axis=1)==0][['launch_angle','launch_speed']]) * ([0, 0.883,1.244,1.569,2.004])] | |
df['xwoba_codes'] = np.nan | |
df.loc[df[['launch_angle','launch_speed']].isnull().sum(axis=1)==0,'xwoba_codes'] = 1 | |
## Assign a value of 0.696 to every walk in the dataset | |
df.loc[df['event_type'].isin(['walk']),'xwoba_codes'] = 1 | |
## Assign a value of 0.726 to every hit by pitch in the dataset | |
df.loc[df['event_type'].isin(['hit_by_pitch']),'xwoba_codes'] = 1 | |
## Assign a value of 0 to every Strikeout in the dataset | |
df.loc[df['event_type'].isin(['strikeout','strikeout_double_play']),'xwoba_codes'] = 1 | |
return df | |
def df_update_summ(df=pd.DataFrame()): | |
df_summ = df.groupby(['batter_id','batter_name']).agg( | |
pa = ('pa','sum'), | |
ab = ('ab','sum'), | |
obp_pa = ('obp','sum'), | |
hits = ('hits','sum'), | |
on_base = ('on_base','sum'), | |
k = ('k','sum'), | |
bb = ('bb','sum'), | |
bb_minus_k = ('bb_minus_k','sum'), | |
csw = ('csw','sum'), | |
bip = ('bip','sum'), | |
bip_div = ('bip_div','sum'), | |
tb = ('tb','sum'), | |
woba = ('woba','sum'), | |
woba_contact = ('woba_contact','sum'), | |
xwoba = ('xwoba','sum'), | |
xwoba_contact = ('xwoba_contact','sum'), | |
woba_codes = ('woba_codes','sum'), | |
xwoba_codes = ('xwoba_codes','sum'), | |
hard_hit = ('hard_hit','sum'), | |
barrel = ('barrel','sum'), | |
sweet_spot = ('sweet_spot','sum'), | |
max_launch_speed = ('launch_speed','max'), | |
launch_speed_90 = ('launch_speed',percentile(90)), | |
launch_speed = ('launch_speed','mean'), | |
launch_angle = ('launch_angle','mean'), | |
pitches = ('is_pitch','sum'), | |
swings = ('swings','sum'), | |
in_zone = ('in_zone','sum'), | |
out_zone = ('out_zone','sum'), | |
whiffs = ('whiffs','sum'), | |
zone_swing = ('zone_swing','sum'), | |
zone_contact = ('zone_contact','sum'), | |
ozone_swing = ('ozone_swing','sum'), | |
ozone_contact = ('ozone_contact','sum'), | |
ground_ball = ('trajectory_ground_ball','sum'), | |
line_drive = ('trajectory_line_drive','sum'), | |
fly_ball =('trajectory_fly_ball','sum'), | |
pop_up = ('trajectory_popup','sum'), | |
attack_zone = ('attack_zone','count'), | |
heart = ('heart','sum'), | |
shadow = ('shadow','sum'), | |
chase = ('chase','sum'), | |
waste = ('waste','sum'), | |
heart_swing = ('heart_swing','sum'), | |
shadow_swing = ('shadow_swing','sum'), | |
chase_swing = ('chase_swing','sum'), | |
waste_swing = ('waste_swing','sum'), | |
).reset_index() | |
return df_summ | |
def df_update_summ_avg(df=pd.DataFrame()): | |
df_summ_avg = df.groupby(['average']).agg( | |
pa = ('pa','sum'), | |
ab = ('ab','sum'), | |
obp_pa = ('obp','sum'), | |
hits = ('hits','sum'), | |
on_base = ('on_base','sum'), | |
k = ('k','sum'), | |
bb = ('bb','sum'), | |
bb_minus_k = ('bb_minus_k','sum'), | |
csw = ('csw','sum'), | |
bip = ('bip','sum'), | |
bip_div = ('bip_div','sum'), | |
tb = ('tb','sum'), | |
woba = ('woba','sum'), | |
woba_contact = ('woba_contact','sum'), | |
xwoba = ('xwoba','sum'), | |
xwoba_contact = ('xwoba_contact','sum'), | |
woba_codes = ('woba_codes','sum'), | |
xwoba_codes = ('xwoba_codes','sum'), | |
hard_hit = ('hard_hit','sum'), | |
barrel = ('barrel','sum'), | |
sweet_spot = ('sweet_spot','sum'), | |
max_launch_speed = ('launch_speed','max'), | |
launch_speed_90 = ('launch_speed',percentile(90)), | |
launch_speed = ('launch_speed','mean'), | |
launch_angle = ('launch_angle','mean'), | |
pitches = ('is_pitch','sum'), | |
swings = ('swings','sum'), | |
in_zone = ('in_zone','sum'), | |
out_zone = ('out_zone','sum'), | |
whiffs = ('whiffs','sum'), | |
zone_swing = ('zone_swing','sum'), | |
zone_contact = ('zone_contact','sum'), | |
ozone_swing = ('ozone_swing','sum'), | |
ozone_contact = ('ozone_contact','sum'), | |
ground_ball = ('trajectory_ground_ball','sum'), | |
line_drive = ('trajectory_line_drive','sum'), | |
fly_ball =('trajectory_fly_ball','sum'), | |
pop_up = ('trajectory_popup','sum'), | |
attack_zone = ('attack_zone','count'), | |
heart = ('heart','sum'), | |
shadow = ('shadow','sum'), | |
chase = ('chase','sum'), | |
waste = ('waste','sum'), | |
heart_swing = ('heart_swing','sum'), | |
shadow_swing = ('shadow_swing','sum'), | |
chase_swing = ('chase_swing','sum'), | |
waste_swing = ('waste_swing','sum'), | |
).reset_index() | |
return df_summ_avg | |
def df_summ_changes(df_summ=pd.DataFrame()): | |
df_summ['avg'] = [df_summ.hits[x]/df_summ.ab[x] if df_summ.ab[x] != 0 else np.nan for x in range(len(df_summ))] | |
df_summ['obp'] = [df_summ.on_base[x]/df_summ.obp_pa[x] if df_summ.obp_pa[x] != 0 else np.nan for x in range(len(df_summ))] | |
df_summ['slg'] = [df_summ.tb[x]/df_summ.ab[x] if df_summ.ab[x] != 0 else np.nan for x in range(len(df_summ))] | |
df_summ['ops'] = df_summ['obp']+df_summ['slg'] | |
df_summ['k_percent'] = [df_summ.k[x]/df_summ.pa[x] if df_summ.pa[x] != 0 else np.nan for x in range(len(df_summ))] | |
df_summ['bb_percent'] =[df_summ.bb[x]/df_summ.pa[x] if df_summ.pa[x] != 0 else np.nan for x in range(len(df_summ))] | |
df_summ['bb_minus_k_percent'] =[(df_summ.bb_minus_k[x])/df_summ.pa[x] if df_summ.pa[x] != 0 else np.nan for x in range(len(df_summ))] | |
df_summ['bb_over_k_percent'] =[df_summ.bb[x]/df_summ.k[x] if df_summ.k[x] != 0 else np.nan for x in range(len(df_summ))] | |
df_summ['csw_percent'] =[df_summ.csw[x]/df_summ.pitches[x] if df_summ.pitches[x] != 0 else np.nan for x in range(len(df_summ))] | |
df_summ['sweet_spot_percent'] = [df_summ.sweet_spot[x]/df_summ.bip_div[x] if df_summ.bip_div[x] != 0 else np.nan for x in range(len(df_summ))] | |
df_summ['woba_percent'] = [df_summ.woba[x]/df_summ.woba_codes[x] if df_summ.woba_codes[x] != 0 else np.nan for x in range(len(df_summ))] | |
df_summ['woba_percent_contact'] = [df_summ.woba_contact[x]/df_summ.bip[x] if df_summ.bip[x] != 0 else np.nan for x in range(len(df_summ))] | |
#df_summ['hard_hit_percent'] = [df_summ.sweet_spot[x]/df_summ.bip[x] if df_summ.bip[x] != 0 else np.nan for x in range(len(df_summ))] | |
df_summ['hard_hit_percent'] = [df_summ.hard_hit[x]/df_summ.bip_div[x] if df_summ.bip_div[x] != 0 else np.nan for x in range(len(df_summ))] | |
df_summ['barrel_percent'] = [df_summ.barrel[x]/df_summ.bip_div[x] if df_summ.bip_div[x] != 0 else np.nan for x in range(len(df_summ))] | |
df_summ['zone_contact_percent'] = [df_summ.zone_contact[x]/df_summ.zone_swing[x] if df_summ.zone_swing[x] != 0 else np.nan for x in range(len(df_summ))] | |
df_summ['zone_swing_percent'] = [df_summ.zone_swing[x]/df_summ.in_zone[x] if df_summ.in_zone[x] != 0 else np.nan for x in range(len(df_summ))] | |
df_summ['zone_percent'] = [df_summ.in_zone[x]/df_summ.pitches[x] if df_summ.pitches[x] > 0 else np.nan for x in range(len(df_summ))] | |
df_summ['chase_percent'] = [df_summ.ozone_swing[x]/(df_summ.pitches[x] - df_summ.in_zone[x]) if (df_summ.pitches[x]- df_summ.in_zone[x]) != 0 else np.nan for x in range(len(df_summ))] | |
df_summ['chase_contact'] = [df_summ.ozone_contact[x]/df_summ.ozone_swing[x] if df_summ.ozone_swing[x] != 0 else np.nan for x in range(len(df_summ))] | |
df_summ['swing_percent'] = [df_summ.swings[x]/df_summ.pitches[x] if df_summ.pitches[x] > 0 else np.nan for x in range(len(df_summ))] | |
df_summ['whiff_rate'] = [df_summ.whiffs[x]/df_summ.swings[x] if df_summ.swings[x] != 0 else np.nan for x in range(len(df_summ))] | |
df_summ['swstr_rate'] = [df_summ.whiffs[x]/df_summ.pitches[x] if df_summ.pitches[x] > 0 else np.nan for x in range(len(df_summ))] | |
df_summ['ground_ball_percent'] = [df_summ.ground_ball[x]/df_summ.bip[x] if df_summ.bip[x] != 0 else np.nan for x in range(len(df_summ))] | |
df_summ['line_drive_percent'] = [df_summ.line_drive[x]/df_summ.bip[x] if df_summ.bip[x] != 0 else np.nan for x in range(len(df_summ))] | |
df_summ['fly_ball_percent'] = [df_summ.fly_ball[x]/df_summ.bip[x] if df_summ.bip[x] != 0 else np.nan for x in range(len(df_summ))] | |
df_summ['pop_up_percent'] = [df_summ.pop_up[x]/df_summ.bip[x] if df_summ.bip[x] != 0 else np.nan for x in range(len(df_summ))] | |
df_summ['heart_zone_percent'] = [df_summ.heart[x]/df_summ.attack_zone[x] if df_summ.attack_zone[x] != 0 else np.nan for x in range(len(df_summ))] | |
df_summ['shadow_zone_percent'] = [df_summ.shadow[x]/df_summ.attack_zone[x] if df_summ.attack_zone[x] != 0 else np.nan for x in range(len(df_summ))] | |
df_summ['chase_zone_percent'] = [df_summ.chase[x]/df_summ.attack_zone[x] if df_summ.attack_zone[x] != 0 else np.nan for x in range(len(df_summ))] | |
df_summ['waste_zone_percent'] = [df_summ.waste[x]/df_summ.attack_zone[x] if df_summ.attack_zone[x] != 0 else np.nan for x in range(len(df_summ))] | |
df_summ['heart_zone_swing_percent'] = [df_summ.heart_swing[x]/df_summ.heart[x] if df_summ.heart[x] != 0 else np.nan for x in range(len(df_summ))] | |
df_summ['shadow_zone_swing_percent'] = [df_summ.shadow_swing[x]/df_summ.shadow[x] if df_summ.shadow[x] != 0 else np.nan for x in range(len(df_summ))] | |
df_summ['chase_zone_swing_percent'] = [df_summ.chase_swing[x]/df_summ.chase[x] if df_summ.chase[x] != 0 else np.nan for x in range(len(df_summ))] | |
df_summ['waste_zone_swing_percent'] = [df_summ.waste_swing[x]/df_summ.waste[x] if df_summ.waste[x] != 0 else np.nan for x in range(len(df_summ))] | |
df_summ['xwoba_percent'] = [df_summ.xwoba[x]/df_summ.xwoba_codes[x] if df_summ.xwoba_codes[x] != 0 else np.nan for x in range(len(df_summ))] | |
df_summ['xwoba_percent_contact'] = [df_summ.xwoba_contact[x]/df_summ.bip[x] if df_summ.bip[x] != 0 else np.nan for x in range(len(df_summ))] | |
df_summ = df_summ.dropna(subset=['bip']) | |
return df_summ | |
def df_summ_filter_out(df_summ=pd.DataFrame(),batter_select = 0,date_min=0): | |
import datetime | |
def weeks_after(day): | |
today = datetime.date.today() | |
time_difference = today - day | |
weeks = time_difference.days // 7 | |
return weeks | |
df_summ_filter = df_summ[df_summ['pa'] >= min(math.floor(df_summ.xs(batter_select,level=0)['pa']/10)*10,500,weeks_after(date_min)*20)] | |
df_summ_filter_pct = df_summ_filter.rank(pct=True,ascending=True) | |
df_summ_player = df_summ.xs(batter_select,level=0) | |
df_summ_player_pct = df_summ_filter_pct.xs(batter_select,level=0) | |
return df_summ_filter,df_summ_filter_pct,df_summ_player,df_summ_player_pct | |
def df_summ_batter_pitch_up(df=pd.DataFrame()): | |
df_summ_batter_pitch = df.dropna(subset=['pitch_category']).groupby(['batter_id','batter_name','pitch_category']).agg( | |
pa = ('pa','sum'), | |
ab = ('ab','sum'), | |
obp_pa = ('obp','sum'), | |
hits = ('hits','sum'), | |
on_base = ('on_base','sum'), | |
k = ('k','sum'), | |
bb = ('bb','sum'), | |
bb_minus_k = ('bb_minus_k','sum'), | |
csw = ('csw','sum'), | |
bip = ('bip','sum'), | |
bip_div = ('bip_div','sum'), | |
tb = ('tb','sum'), | |
woba = ('woba','sum'), | |
woba_contact = ('xwoba_contact','sum'), | |
xwoba = ('xwoba','sum'), | |
xwoba_contact = ('xwoba','sum'), | |
woba_codes = ('woba_codes','sum'), | |
xwoba_codes = ('xwoba_codes','sum'), | |
hard_hit = ('hard_hit','sum'), | |
barrel = ('barrel','sum'), | |
sweet_spot = ('sweet_spot','sum'), | |
max_launch_speed = ('launch_speed','max'), | |
launch_speed_90 = ('launch_speed',percentile(90)), | |
launch_speed = ('launch_speed','mean'), | |
launch_angle = ('launch_angle','mean'), | |
pitches = ('is_pitch','sum'), | |
swings = ('swings','sum'), | |
in_zone = ('in_zone','sum'), | |
out_zone = ('out_zone','sum'), | |
whiffs = ('whiffs','sum'), | |
zone_swing = ('zone_swing','sum'), | |
zone_contact = ('zone_contact','sum'), | |
ozone_swing = ('ozone_swing','sum'), | |
ozone_contact = ('ozone_contact','sum'), | |
ground_ball = ('trajectory_ground_ball','sum'), | |
line_drive = ('trajectory_line_drive','sum'), | |
fly_ball =('trajectory_fly_ball','sum'), | |
pop_up = ('trajectory_popup','sum'), | |
attack_zone = ('attack_zone','count'), | |
heart = ('heart','sum'), | |
shadow = ('shadow','sum'), | |
chase = ('chase','sum'), | |
waste = ('waste','sum'), | |
heart_swing = ('heart_swing','sum'), | |
shadow_swing = ('shadow_swing','sum'), | |
chase_swing = ('chase_swing','sum'), | |
waste_swing = ('waste_swing','sum'), | |
).reset_index() | |
#return df_summ_batter_pitch | |
df_summ_batter_pitch['avg'] = [df_summ_batter_pitch.hits[x]/df_summ_batter_pitch.ab[x] if df_summ_batter_pitch.ab[x] != 0 else np.nan for x in range(len(df_summ_batter_pitch))] | |
df_summ_batter_pitch['obp'] = [df_summ_batter_pitch.on_base[x]/df_summ_batter_pitch.obp_pa[x] if df_summ_batter_pitch.obp_pa[x] != 0 else np.nan for x in range(len(df_summ_batter_pitch))] | |
df_summ_batter_pitch['slg'] = [df_summ_batter_pitch.tb[x]/df_summ_batter_pitch.ab[x] if df_summ_batter_pitch.ab[x] != 0 else np.nan for x in range(len(df_summ_batter_pitch))] | |
df_summ_batter_pitch['ops'] = df_summ_batter_pitch['obp']+df_summ_batter_pitch['slg'] | |
df_summ_batter_pitch['k_percent'] = [df_summ_batter_pitch.k[x]/df_summ_batter_pitch.pa[x] if df_summ_batter_pitch.pa[x] != 0 else np.nan for x in range(len(df_summ_batter_pitch))] | |
df_summ_batter_pitch['bb_percent'] =[df_summ_batter_pitch.bb[x]/df_summ_batter_pitch.pa[x] if df_summ_batter_pitch.pa[x] != 0 else np.nan for x in range(len(df_summ_batter_pitch))] | |
df_summ_batter_pitch['bb_minus_k_percent'] =[(df_summ_batter_pitch.bb_minus_k[x])/df_summ_batter_pitch.pa[x] if df_summ_batter_pitch.pa[x] != 0 else np.nan for x in range(len(df_summ_batter_pitch))] | |
df_summ_batter_pitch['bb_over_k_percent'] =[df_summ_batter_pitch.bb[x]/df_summ_batter_pitch.k[x] if df_summ_batter_pitch.k[x] != 0 else np.nan for x in range(len(df_summ_batter_pitch))] | |
df_summ_batter_pitch['csw_percent'] =[df_summ_batter_pitch.csw[x]/df_summ_batter_pitch.pitches[x] if df_summ_batter_pitch.pitches[x] != 0 else np.nan for x in range(len(df_summ_batter_pitch))] | |
df_summ_batter_pitch['sweet_spot_percent'] = [df_summ_batter_pitch.sweet_spot[x]/df_summ_batter_pitch.bip_div[x] if df_summ_batter_pitch.bip_div[x] != 0 else np.nan for x in range(len(df_summ_batter_pitch))] | |
df_summ_batter_pitch['woba_percent'] = [df_summ_batter_pitch.woba[x]/df_summ_batter_pitch.woba_codes[x] if df_summ_batter_pitch.woba_codes[x] != 0 else np.nan for x in range(len(df_summ_batter_pitch))] | |
df_summ_batter_pitch['woba_percent_contact'] = [df_summ_batter_pitch.woba_contact[x]/df_summ_batter_pitch.bip[x] if df_summ_batter_pitch.bip[x] != 0 else np.nan for x in range(len(df_summ_batter_pitch))] | |
#df_summ_batter_pitch['hard_hit_percent'] = [df_summ_batter_pitch.sweet_spot[x]/df_summ_batter_pitch.bip[x] if df_summ_batter_pitch.bip[x] != 0 else np.nan for x in range(len(df_summ_batter_pitch))] | |
df_summ_batter_pitch['hard_hit_percent'] = [df_summ_batter_pitch.hard_hit[x]/df_summ_batter_pitch.bip_div[x] if df_summ_batter_pitch.bip_div[x] != 0 else np.nan for x in range(len(df_summ_batter_pitch))] | |
df_summ_batter_pitch['barrel_percent'] = [df_summ_batter_pitch.barrel[x]/df_summ_batter_pitch.bip_div[x] if df_summ_batter_pitch.bip_div[x] != 0 else np.nan for x in range(len(df_summ_batter_pitch))] | |
df_summ_batter_pitch['zone_contact_percent'] = [df_summ_batter_pitch.zone_contact[x]/df_summ_batter_pitch.zone_swing[x] if df_summ_batter_pitch.zone_swing[x] != 0 else np.nan for x in range(len(df_summ_batter_pitch))] | |
df_summ_batter_pitch['zone_swing_percent'] = [df_summ_batter_pitch.zone_swing[x]/df_summ_batter_pitch.in_zone[x] if df_summ_batter_pitch.in_zone[x] != 0 else np.nan for x in range(len(df_summ_batter_pitch))] | |
df_summ_batter_pitch['zone_percent'] = [df_summ_batter_pitch.in_zone[x]/df_summ_batter_pitch.pitches[x] if df_summ_batter_pitch.pitches[x] != 0 else np.nan for x in range(len(df_summ_batter_pitch))] | |
df_summ_batter_pitch['chase_percent'] = [df_summ_batter_pitch.ozone_swing[x]/(df_summ_batter_pitch.pitches[x] - df_summ_batter_pitch.in_zone[x]) if (df_summ_batter_pitch.pitches[x]- df_summ_batter_pitch.in_zone[x]) != 0 else np.nan for x in range(len(df_summ_batter_pitch))] | |
df_summ_batter_pitch['chase_contact'] = [df_summ_batter_pitch.ozone_contact[x]/df_summ_batter_pitch.ozone_swing[x] if df_summ_batter_pitch.ozone_swing[x] != 0 else np.nan for x in range(len(df_summ_batter_pitch))] | |
df_summ_batter_pitch['swing_percent'] = [df_summ_batter_pitch.swings[x]/df_summ_batter_pitch.pitches[x] if df_summ_batter_pitch.pitches[x] != 0 else np.nan for x in range(len(df_summ_batter_pitch))] | |
df_summ_batter_pitch['whiff_rate'] = [df_summ_batter_pitch.whiffs[x]/df_summ_batter_pitch.swings[x] if df_summ_batter_pitch.swings[x] != 0 else np.nan for x in range(len(df_summ_batter_pitch))] | |
df_summ_batter_pitch['swstr_rate'] = [df_summ_batter_pitch.whiffs[x]/df_summ_batter_pitch.pitches[x] if df_summ_batter_pitch.pitches[x] != 0 else np.nan for x in range(len(df_summ_batter_pitch))] | |
df_summ_batter_pitch['heart_zone_percent'] = [df_summ_batter_pitch.heart[x]/df_summ_batter_pitch.attack_zone[x] if df_summ_batter_pitch.attack_zone[x] != 0 else np.nan for x in range(len(df_summ_batter_pitch))] | |
df_summ_batter_pitch['shadow_zone_percent'] = [df_summ_batter_pitch.shadow[x]/df_summ_batter_pitch.attack_zone[x] if df_summ_batter_pitch.attack_zone[x] != 0 else np.nan for x in range(len(df_summ_batter_pitch))] | |
df_summ_batter_pitch['chase_zone_percent'] = [df_summ_batter_pitch.chase[x]/df_summ_batter_pitch.attack_zone[x] if df_summ_batter_pitch.attack_zone[x] != 0 else np.nan for x in range(len(df_summ_batter_pitch))] | |
df_summ_batter_pitch['waste_zone_percent'] = [df_summ_batter_pitch.waste[x]/df_summ_batter_pitch.attack_zone[x] if df_summ_batter_pitch.attack_zone[x] != 0 else np.nan for x in range(len(df_summ_batter_pitch))] | |
df_summ_batter_pitch['heart_zone_swing_percent'] = [df_summ_batter_pitch.heart_swing[x]/df_summ_batter_pitch.heart[x] if df_summ_batter_pitch.heart[x] != 0 else np.nan for x in range(len(df_summ_batter_pitch))] | |
df_summ_batter_pitch['shadow_zone_swing_percent'] = [df_summ_batter_pitch.shadow_swing[x]/df_summ_batter_pitch.shadow[x] if df_summ_batter_pitch.shadow[x] != 0 else np.nan for x in range(len(df_summ_batter_pitch))] | |
df_summ_batter_pitch['chase_zone_swing_percent'] = [df_summ_batter_pitch.chase_swing[x]/df_summ_batter_pitch.chase[x] if df_summ_batter_pitch.chase[x] != 0 else np.nan for x in range(len(df_summ_batter_pitch))] | |
df_summ_batter_pitch['waste_zone_swing_percent'] = [df_summ_batter_pitch.waste_swing[x]/df_summ_batter_pitch.waste[x] if df_summ_batter_pitch.waste[x] != 0 else np.nan for x in range(len(df_summ_batter_pitch))] | |
df_summ_batter_pitch['xwoba_percent'] = [df_summ_batter_pitch.xwoba[x]/df_summ_batter_pitch.xwoba_codes[x] if df_summ_batter_pitch.xwoba_codes[x] != 0 else np.nan for x in range(len(df_summ_batter_pitch))] | |
df_summ_batter_pitch['xwoba_percent_contact'] = [df_summ_batter_pitch.xwoba_contact[x]/df_summ_batter_pitch.bip[x] if df_summ_batter_pitch.bip[x] != 0 else np.nan for x in range(len(df_summ_batter_pitch))] | |
df_summ_batter_pitch['bip'] = df_summ_batter_pitch['bip'].fillna(0) | |
return df_summ_batter_pitch |