|
|
|
import pandas as pd |
|
import numpy as np |
|
import json |
|
from matplotlib.ticker import FuncFormatter |
|
from matplotlib.ticker import MaxNLocator |
|
import math |
|
from matplotlib.patches import Ellipse |
|
import matplotlib.transforms as transforms |
|
import matplotlib.colors |
|
import matplotlib.colors as mcolors |
|
import seaborn as sns |
|
import matplotlib.pyplot as plt |
|
import requests |
|
|
|
font_properties = {'family': 'calibi', 'size': 12} |
|
font_properties_titles = {'family': 'calibi', 'size': 20} |
|
font_properties_axes = {'family': 'calibi', 'size': 16} |
|
|
|
|
|
colour_palette = ['#FFB000','#648FFF','#785EF0', |
|
'#DC267F','#FE6100','#3D1EB2','#894D80','#16AA02','#B5592B','#A3C1ED'] |
|
season_start = '2024-03-20' |
|
season_end = '2024-09-29' |
|
season_fg=2024 |
|
chad_fg = requests.get(f'https://www.fangraphs.com/api/leaders/major-league/data?age=&pos=all&stats=pit&lg=all&qual=0&season={season_fg}&season={season_fg}&month=1000&season1={season_fg}&ind=0&pageitems=2000000000&pagenum=1&ind=0&rost=0&players=&type=36&postseason=&sortdir=default&sortstat=sp_pitching').json() |
|
cmap_sum = matplotlib.colors.LinearSegmentedColormap.from_list("", ['#648FFF','#FFFFFF','#FFB000',]) |
|
|
|
|
|
chadwick_df_small = pd.DataFrame(data={ |
|
'key_mlbam':[x['xMLBAMID'] for x in chad_fg['data']], |
|
'key_fangraphs':[x['playerid'] for x in chad_fg['data']], |
|
'Name':[x['PlayerName'] for x in chad_fg['data']], |
|
}) |
|
|
|
pitcher_dicts = chadwick_df_small.set_index('key_mlbam')['Name'].sort_values().to_dict() |
|
mlb_fg_dicts = chadwick_df_small.set_index('key_mlbam')['key_fangraphs'].sort_values().to_dict() |
|
|
|
|
|
|
|
def df_update_code(df): |
|
print('Starting') |
|
|
|
print('Starting') |
|
|
|
|
|
df['vy_f'] = -(df['vy0']**2 - (2 * df['ay'] * (df['y0'] - 17/12)))**0.5 |
|
df['t'] = (df['vy_f'] - df['vy0']) / df['ay'] |
|
df['vz_f'] = (df['vz0']) + (df['az'] * df['t']) |
|
df['vaa'] = -np.arctan(df['vz_f'] / df['vy_f']) * (180 / np.pi) |
|
|
|
|
|
|
|
df['vx_f'] = (df['vx0']) + (df['ax'] * df['t']) |
|
df['haa'] = -np.arctan(df['vx_f'] / df['vy_f']) * (180 / np.pi) |
|
|
|
|
|
|
|
end_codes = ['strikeout', 'field_out', 'single', 'walk', 'hit_by_pitch', |
|
'double', 'sac_fly', 'force_out', 'home_run', |
|
'grounded_into_double_play', 'fielders_choice', 'field_error', |
|
'triple', 'sac_bunt', 'double_play', 'intent_walk', |
|
'fielders_choice_out', 'strikeout_double_play', |
|
'sac_fly_double_play', 'catcher_interf', 'other_out'] |
|
|
|
|
|
|
|
df['pa'] = df.event_type.isin(end_codes) |
|
|
|
df['k'] = df.event_type.isin(list(filter(None, [x if 'strikeout' in x else '' for x in df.event_type.fillna('None').unique()]))) |
|
df['bb'] = df.event_type.isin(list(filter(None, [x if 'walk' in x else '' for x in df.event_type.fillna('None').unique()]))) |
|
df['k_minus_bb'] = df['k'].astype(np.float32)-df['bb'].astype(np.float32) |
|
|
|
df = df.drop_duplicates(subset=['play_id']) |
|
df = df.dropna(subset=['start_speed']) |
|
|
|
|
|
|
|
swing_codes = ['Swinging Strike', 'In play, no out', |
|
'Foul', 'In play, out(s)', |
|
'In play, run(s)', 'Swinging Strike (Blocked)', |
|
'Foul Bunt','Foul Tip', 'Missed Bunt','Foul Pitchout','Swinging Pitchout'] |
|
|
|
swings_in = ['Swinging Strike', 'In play, no out', |
|
'Foul', 'In play, out(s)', |
|
'In play, run(s)', 'Swinging Strike (Blocked)', |
|
'Foul Bunt','Foul Tip', 'Missed Bunt','Foul Pitchout','Swinging Pitchout'] |
|
|
|
swing_strike_codes = ['Swinging Strike', |
|
'Swinging Strike (Blocked)','Missed Bunt','Foul Tip','Swinging Pitchout'] |
|
|
|
|
|
contact_codes = ['In play, no out', |
|
'Foul', 'In play, out(s)', |
|
'In play, run(s)', |
|
'Foul Bunt'] |
|
|
|
codes_in = ['In play, out(s)', |
|
'Swinging Strike', |
|
'Ball', |
|
'Foul', |
|
'In play, no out', |
|
'Called Strike', |
|
'Foul Tip', |
|
'In play, run(s)', |
|
'Hit By Pitch', |
|
'Ball In Dirt', |
|
'Pitchout', |
|
'Swinging Strike (Blocked)', |
|
'Foul Bunt', |
|
'Missed Bunt', |
|
'Foul Pitchout', |
|
'Intent Ball', |
|
'Swinging Pitchout'] |
|
|
|
df['in_zone'] = df['zone'] < 10 |
|
|
|
|
|
df = df.drop_duplicates(subset=['play_id']) |
|
|
|
|
|
|
|
df_codes = df[df.play_description.isin(codes_in)].dropna(subset=['in_zone']) |
|
|
|
df_codes['bip'] = ~df_codes.launch_speed.isna() |
|
conditions = [ |
|
(df_codes['launch_speed'].isna()), |
|
(df_codes['launch_speed']*1.5 - df_codes['launch_angle'] >= 117 ) & (df_codes['launch_speed'] + df_codes['launch_angle'] >= 124) & (df_codes['launch_speed'] > 98) & (df_codes['launch_angle'] >= 8) & (df_codes['launch_angle'] <= 50) |
|
] |
|
|
|
choices = [False,True] |
|
df_codes['barrel'] = np.select(conditions, choices, default=np.nan) |
|
|
|
conditions_ss = [ |
|
(df_codes['launch_angle'].isna()), |
|
(df_codes['launch_angle'] >= 8 ) * (df_codes['launch_angle'] <= 32 ) |
|
] |
|
|
|
choices_ss = [False,True] |
|
df_codes['sweet_spot'] = np.select(conditions_ss, choices_ss, default=np.nan) |
|
conditions_hh = [ |
|
(df_codes['launch_speed'].isna()), |
|
(df_codes['launch_speed'] >= 94.5 ) |
|
] |
|
|
|
choices_hh = [False,True] |
|
df_codes['hard_hit'] = np.select(conditions_hh, choices_hh, default=np.nan) |
|
|
|
|
|
conditions_tb = [ |
|
(df_codes['event_type']=='single'), |
|
(df_codes['event_type']=='double'), |
|
(df_codes['event_type']=='triple'), |
|
(df_codes['event_type']=='home_run'), |
|
] |
|
|
|
choices_tb = [1,2,3,4] |
|
|
|
df_codes['tb'] = np.select(conditions_tb, choices_tb, default=np.nan) |
|
|
|
conditions_woba = [ |
|
(df_codes['event_type']=='walk'), |
|
(df_codes['event_type']=='hit_by_pitch'), |
|
(df_codes['event_type']=='single'), |
|
(df_codes['event_type']=='double'), |
|
(df_codes['event_type']=='triple'), |
|
(df_codes['event_type']=='home_run'), |
|
] |
|
|
|
choices_woba = [0.705, |
|
0.688, |
|
0.897, |
|
1.233, |
|
1.612, |
|
2.013] |
|
|
|
df_codes['woba'] = np.select(conditions_woba, choices_woba, default=np.nan) |
|
|
|
|
|
woba_codes = ['strikeout', 'field_out', 'single', 'walk', 'hit_by_pitch', |
|
'double', 'sac_fly', 'force_out', 'home_run', |
|
'grounded_into_double_play', 'fielders_choice', 'field_error', |
|
'triple', 'sac_bunt', 'double_play', |
|
'fielders_choice_out', 'strikeout_double_play', |
|
'sac_fly_double_play', 'other_out'] |
|
|
|
|
|
|
|
|
|
|
|
conditions_woba_code = [ |
|
(df_codes['event_type'].isin(woba_codes)) |
|
] |
|
|
|
choices_woba_code = [1] |
|
|
|
df_codes['woba_codes'] = np.select(conditions_woba_code, choices_woba_code, default=np.nan) |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
df_codes['pitches'] = 1 |
|
df_codes['whiffs'] = [1 if ((x == 'S')|(x == 'W')|(x =='T')) else 0 for x in df_codes.play_code] |
|
df_codes['csw'] = [1 if ((x == 'S')|(x == 'W')|(x =='T')|(x == 'C')) else 0 for x in df_codes.play_code] |
|
df_codes['swings'] = [1 if x in swings_in else 0 for x in df_codes.play_description] |
|
|
|
df_codes['out_zone'] = df_codes.in_zone == False |
|
df_codes['zone_swing'] = (df_codes.in_zone == True)&(df_codes.swings == 1) |
|
df_codes['zone_contact'] = (df_codes.in_zone == True)&(df_codes.swings == 1)&(df_codes.whiffs == 0) |
|
df_codes['ozone_swing'] = (df_codes.in_zone==False)&(df_codes.swings == 1) |
|
df_codes['ozone_contact'] = (df_codes.in_zone==False)&(df_codes.swings == 1)&(df_codes.whiffs == 0) |
|
|
|
return df_codes |
|
|
|
|
|
def get_color(value,normalize,cmap_sum): |
|
color = cmap_sum(normalize(value)) |
|
return mcolors.to_hex(color) |
|
|
|
|
|
def percentile(n): |
|
def percentile_(x): |
|
return x.quantile(n) |
|
percentile_.__name__ = 'percentile_{:02.0f}'.format(n*100) |
|
return percentile_ |
|
|
|
|
|
def df_clean(df): |
|
df_copy = df.copy() |
|
|
|
df_copy = df_copy[(df_copy['spin_rate']>0)&(df_copy['extension']>0)] |
|
|
|
df_copy.loc[df_copy['pitcher_hand'] == 'L','hb'] *= -1 |
|
df_copy.loc[df_copy['pitcher_hand'] == 'L','x0'] *= -1 |
|
df_copy.loc[df_copy['pitcher_hand'] == 'L','spin_direction'] = 360 - df_copy.loc[df_copy['pitcher_hand'] == 'L','spin_direction'] |
|
|
|
df_copy['pitch_l'] = [1 if x == 'L' else 0 for x in df_copy['pitcher_hand']] |
|
df_copy['bat_l'] = [1 if x == 'L' else 0 for x in df_copy['batter_hand']] |
|
|
|
|
|
|
|
df_copy['pitch_type'] = df_copy['pitch_type'].replace({'FT':'SI', |
|
|
|
'SV':'SL', |
|
'FO':'FS'}) |
|
|
|
df_copy_fb_sum = df_copy[df_copy.pitch_type.isin(["FF", "FC", "SI"])].groupby(['pitcher_id']).agg( |
|
fb_velo = ('start_speed','mean'), |
|
fb_max_ivb = ('ivb',percentile(0.9)), |
|
fb_max_x = ('hb',percentile(0.9)), |
|
fb_min_x = ('hb',percentile(0.1)), |
|
fb_max_velo = ('start_speed',percentile(0.9)), |
|
fb_axis = ('spin_direction','mean'), |
|
) |
|
|
|
df_copy = df_copy.merge(df_copy_fb_sum,left_on='pitcher_id',right_index=True,how='left') |
|
|
|
df_copy['fb_velo_diff'] = df_copy['start_speed']- df_copy['fb_velo'] |
|
df_copy['fb_max_ivb_diff'] = df_copy['ivb']- df_copy['fb_max_ivb'] |
|
df_copy['fb_max_hb_diff'] = -abs(df_copy['hb']- df_copy['fb_max_x']) |
|
df_copy['fb_min_hb_diff'] = df_copy['hb']- df_copy['fb_min_x'] |
|
df_copy['fb_max_velo_diff'] = df_copy['start_speed']- df_copy['fb_max_velo'] |
|
df_copy['fb_axis_diff'] = df_copy['spin_direction']- df_copy['fb_axis'] |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
df_copy['max_speed'] = df_copy.groupby(['pitcher_id'])['start_speed'].transform('max') |
|
df_copy['max_speed_diff'] = df_copy['start_speed'] - df_copy['max_speed'] |
|
|
|
df_copy['max_ivb'] = df_copy.groupby(['pitcher_id'])['ivb'].transform('max') |
|
df_copy['max_ivb_diff'] = df_copy['ivb'] - df_copy['max_ivb'] |
|
|
|
df_copy['vy_f'] = -(df_copy['vy0']**2 - (2 * df_copy['ay'] * (df_copy['y0'] - 17/12)))**0.5 |
|
df_copy['t'] = (df_copy['vy_f'] - df_copy['vy0']) / df_copy['ay'] |
|
df_copy['vz_f'] = (df_copy['vz0']) + (df_copy['az'] * df_copy['t']) |
|
df_copy['vaa'] = -np.arctan(df_copy['vz_f'] / df_copy['vy_f']) * (180 / np.pi) |
|
|
|
|
|
|
|
df_copy['vx_f'] = (df_copy['vx0']) + (df_copy['ax'] * df_copy['t']) |
|
df_copy['haa'] = -np.arctan(df_copy['vx_f'] / df_copy['vy_f']) * (180 / np.pi) |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
df_copy = df_copy.dropna(subset=['pitch_type']) |
|
return df_copy |
|
|
|
|
|
pitch_colours = { |
|
'Four-Seam Fastball':'#FF007D', |
|
'Fastball':'#FF007D', |
|
'Sinker':'#98165D', |
|
'Cutter':'#BE5FA0', |
|
|
|
'Changeup':'#F79E70', |
|
'Splitter':'#FE6100', |
|
'Screwball':'#F08223', |
|
'Forkball':'#FFB000', |
|
|
|
'Slider':'#67E18D', |
|
'Sweeper':'#1BB999', |
|
'Slurve':'#376748', |
|
|
|
'Knuckle Curve':'#311D8B', |
|
'Curveball':'#3025CE', |
|
'Slow Curve':'#274BFC', |
|
'Eephus':'#648FFF', |
|
|
|
'Knuckle Ball':'#867A08', |
|
|
|
'Pitch Out':'#472C30', |
|
'Other':'#9C8975', |
|
} |
|
|
|
|
|
def confidence_ellipse(x, y, ax, n_std=3.0, facecolor='none', **kwargs): |
|
""" |
|
Create a plot of the covariance confidence ellipse of *x* and *y*. |
|
|
|
Parameters |
|
---------- |
|
x, y : array-like, shape (n, ) |
|
Input data. |
|
|
|
ax : matplotlib.axes.Axes |
|
The axes object to draw the ellipse into. |
|
|
|
n_std : float |
|
The number of standard deviations to determine the ellipse's radiuses. |
|
|
|
**kwargs |
|
Forwarded to `~matplotlib.patches.Ellipse` |
|
|
|
Returns |
|
------- |
|
matplotlib.patches.Ellipse |
|
""" |
|
|
|
if x.size != y.size: |
|
raise ValueError("x and y must be the same size") |
|
try: |
|
cov = np.cov(x, y) |
|
pearson = cov[0, 1]/np.sqrt(cov[0, 0] * cov[1, 1]) |
|
|
|
|
|
ell_radius_x = np.sqrt(1 + pearson) |
|
ell_radius_y = np.sqrt(1 - pearson) |
|
ellipse = Ellipse((0, 0), width=ell_radius_x * 2, height=ell_radius_y * 2, |
|
facecolor=facecolor,linewidth=2,linestyle='--', **kwargs) |
|
|
|
|
|
|
|
|
|
|
|
scale_x = np.sqrt(cov[0, 0]) * n_std |
|
mean_x = np.mean(x) |
|
|
|
|
|
|
|
scale_y = np.sqrt(cov[1, 1]) * n_std |
|
mean_y = np.mean(y) |
|
|
|
|
|
transf = transforms.Affine2D() \ |
|
.rotate_deg(45) \ |
|
.scale(scale_x, scale_y) \ |
|
.translate(mean_x, mean_y) |
|
|
|
|
|
|
|
ellipse.set_transform(transf + ax.transData) |
|
except ValueError: |
|
return |
|
|
|
return ax.add_patch(ellipse) |
|
|
|
|
|
strike_zone = pd.DataFrame({ |
|
'PlateLocSide': [-0.9, -0.9, 0.9, 0.9, -0.9], |
|
'PlateLocHeight': [1.5, 3.5, 3.5, 1.5, 1.5] |
|
}) |
|
|
|
|
|
def draw_line(axis,alpha_spot=1,catcher_p = True): |
|
|
|
axis.plot(strike_zone['PlateLocSide'], strike_zone['PlateLocHeight'], color='black', linewidth=1.3,zorder=3,alpha=alpha_spot,) |
|
|
|
|
|
|
|
|
|
|
|
if catcher_p: |
|
|
|
|
|
axis.plot([-0.708, 0.708], [0.15, 0.15], color='black', linewidth=1,alpha=alpha_spot,zorder=1) |
|
axis.plot([-0.708, -0.708], [0.15, 0.3], color='black', linewidth=1,alpha=alpha_spot,zorder=1) |
|
axis.plot([-0.708, 0], [0.3, 0.5], color='black', linewidth=1,alpha=alpha_spot,zorder=1) |
|
axis.plot([0, 0.708], [0.5, 0.3], color='black', linewidth=1,alpha=alpha_spot,zorder=1) |
|
axis.plot([0.708, 0.708], [0.3, 0.15], color='black', linewidth=1,alpha=alpha_spot,zorder=1) |
|
else: |
|
axis.plot([-0.708, 0.708], [0.4, 0.4], color='black', linewidth=1,alpha=alpha_spot,zorder=1) |
|
axis.plot([-0.708, -0.9], [0.4, -0.1], color='black', linewidth=1,alpha=alpha_spot,zorder=1) |
|
axis.plot([-0.9, 0], [-0.1, -0.35], color='black', linewidth=1,alpha=alpha_spot,zorder=1) |
|
axis.plot([0, 0.9], [-.35, -0.1], color='black', linewidth=1,alpha=alpha_spot,zorder=1) |
|
axis.plot([0.9, 0.708], [-0.1,0.4], color='black', linewidth=1,alpha=alpha_spot,zorder=1) |
|
|
|
|
|
|
|
|
|
fangraphs_stats_dict = {'IP':{'table_header':'$\\bf{IP}$','format':'.1f',} , |
|
'TBF':{'table_header':'$\\bf{PA}$','format':'.0f',} , |
|
'AVG':{'table_header':'$\\bf{AVG}$','format':'.3f',} , |
|
'K/9':{'table_header':'$\\bf{K\/9}$','format':'.2f',} , |
|
'BB/9':{'table_header':'$\\bf{BB\/9}$','format':'.2f',} , |
|
'K/BB':{'table_header':'$\\bf{K\/BB}$','format':'.2f',} , |
|
'HR/9':{'table_header':'$\\bf{HR\/9}$','format':'.2f',} , |
|
'K%':{'table_header':'$\\bf{K\%}$','format':'.1%',} , |
|
'BB%':{'table_header':'$\\bf{BB\%}$','format':'.1%',} , |
|
'K-BB%':{'table_header':'$\\bf{K-BB\%}$','format':'.1%',} , |
|
'WHIP':{'table_header':'$\\bf{WHIP}$','format':'.2f',} , |
|
'BABIP':{'table_header':'$\\bf{BABIP}$','format':'.3f',} , |
|
'LOB%':{'table_header':'$\\bf{LOB\%}$','format':'.1%',} , |
|
'xFIP':{'table_header':'$\\bf{xFIP}$','format':'.2f',} , |
|
'FIP':{'table_header':'$\\bf{FIP}$','format':'.2f',} , |
|
'H':{'table_header':'$\\bf{H}$','format':'.0f',} , |
|
'2B':{'table_header':'$\\bf{2B}$','format':'.0f',} , |
|
'3B':{'table_header':'$\\bf{3B}$','format':'.0f',} , |
|
'R':{'table_header':'$\\bf{R}$','format':'.0f',} , |
|
'ER':{'table_header':'$\\bf{ER}$','format':'.0f',} , |
|
'HR':{'table_header':'$\\bf{HR}$','format':'.0f',} , |
|
'BB':{'table_header':'$\\bf{BB}$','format':'.0f',} , |
|
'IBB':{'table_header':'$\\bf{IBB}$','format':'.0f',} , |
|
'HBP':{'table_header':'$\\bf{HBP}$','format':'.0f',} , |
|
'SO':{'table_header':'$\\bf{SO}$','format':'.0f',} , |
|
'OBP':{'table_header':'$\\bf{OBP}$','format':'.0f',} , |
|
'SLG':{'table_header':'$\\bf{SLG}$','format':'.0f',} , |
|
'ERA':{'table_header':'$\\bf{ERA}$','format':'.2f',} , |
|
'wOBA':{'table_header':'$\\bf{wOBA}$','format':'.3f',} , |
|
'G':{'table_header':'$\\bf{G}$','format':'.0f',} } |
|
|
|
|
|
|
|
|
|
|
|
split_dict = {'all':[], |
|
'left':['5'], |
|
'right':['6'] |
|
} |
|
|
|
def fangraphs_scrape(pitcher_id=808967, |
|
split='all', |
|
start_date='2024-03-20', |
|
end_date='2024-09-29'): |
|
|
|
|
|
url = "https://www.fangraphs.com/api/leaders/splits/splits-leaders" |
|
|
|
payload = { |
|
"strPlayerId": str(mlb_fg_dicts[pitcher_id]), |
|
"strSplitArr": split_dict[split], |
|
"strGroup": "season", |
|
"strPosition": "P", |
|
"strType": "2", |
|
"strStartDate": str(pd.to_datetime(start_date).strftime('%Y-%m-%d')), |
|
"strEndDate": str(pd.to_datetime(end_date).strftime('%Y-%m-%d')), |
|
"strSplitTeams": False, |
|
"dctFilters": [], |
|
"strStatType": "player", |
|
"strAutoPt": False, |
|
"arrPlayerId": [], |
|
"strSplitArrPitch": [], |
|
"arrWxTemperature": None, |
|
"arrWxPressure": None, |
|
"arrWxAirDensity": None, |
|
"arrWxElevation": None, |
|
"arrWxWindSpeed": None |
|
} |
|
json_payload = json.dumps(payload) |
|
headers = {'Content-Type': 'application/json'} |
|
response = requests.post(url, data=json_payload, headers=headers) |
|
data_pull = response.json()['data'][0] |
|
|
|
payload_advanced = { |
|
"strPlayerId": str(mlb_fg_dicts[pitcher_id]), |
|
"strSplitArr": split_dict[split], |
|
"strGroup": "season", |
|
"strPosition": "P", |
|
"strType": "1", |
|
"strStartDate": str(pd.to_datetime(start_date).strftime('%Y-%m-%d')), |
|
"strEndDate": str(pd.to_datetime(end_date).strftime('%Y-%m-%d')), |
|
"strSplitTeams": False, |
|
"dctFilters": [], |
|
"strStatType": "player", |
|
"strAutoPt": False, |
|
"arrPlayerId": [], |
|
"strSplitArrPitch": [], |
|
"arrWxTemperature": None, |
|
"arrWxPressure": None, |
|
"arrWxAirDensity": None, |
|
"arrWxElevation": None, |
|
"arrWxWindSpeed": None |
|
} |
|
|
|
json_payload_advanced = json.dumps(payload_advanced) |
|
headers = {'Content-Type': 'application/json'} |
|
response_advanced = requests.post(url, data=json_payload_advanced, headers=headers) |
|
data_pull_advanced = response_advanced.json()['data'][0] |
|
|
|
data_pull.update(data_pull_advanced) |
|
|
|
return data_pull |
|
|
|
|
|
|
|
def fangraphs_table(data, |
|
stats, |
|
ax): |
|
|
|
|
|
fg_values = [data[x] if x in data else '---' for x in stats] |
|
df_fg = pd.DataFrame(data=dict(zip(stats,fg_values)),index=[0]) |
|
|
|
df_fg.loc[0] = [format(df_fg[x][0],fangraphs_stats_dict[x]['format']) if df_fg[x][0] != '---' else '---' for x in df_fg] |
|
table_fg = ax.table(cellText=df_fg.values, colLabels=df_fg.columns, cellLoc='center', |
|
bbox=[0.04, 0.2, 0.92, 0.8]) |
|
|
|
min_font_size = 20 |
|
table_fg.set_fontsize(min_font_size) |
|
|
|
|
|
new_column_names = [fangraphs_stats_dict[x]['table_header'] if x in data else '---' for x in stats] |
|
|
|
for i, col_name in enumerate(new_column_names): |
|
table_fg.get_celld()[(0, i)].get_text().set_text(col_name) |
|
|
|
ax.axis('off') |
|
|
|
|
|
return table_fg |
|
|
|
|
|
def velocity_kdes(df, |
|
ax, |
|
gs, |
|
gs_list, |
|
fig): |
|
|
|
sorted_value_counts = df['pitch_type'].value_counts().sort_values(ascending=False) |
|
|
|
|
|
items_in_order = sorted_value_counts.index.tolist() |
|
|
|
|
|
import matplotlib.gridspec as gridspec |
|
ax.axis ('off') |
|
|
|
ax.set_title('Pitch Velocity Distribution', fontdict={'family': 'calibi', 'size': 20}) |
|
|
|
inner_grid_1 = gridspec.GridSpecFromSubplotSpec(len(items_in_order),1, subplot_spec=gs[2,gs_list]) |
|
ax_top = [] |
|
for inner in inner_grid_1: |
|
ax_top.append(fig.add_subplot(inner)) |
|
|
|
|
|
ax_number = 0 |
|
|
|
for i in items_in_order[0:]: |
|
if np.unique(df[df['pitch_type']==i]['start_speed']).size == 1: |
|
print('just') |
|
ax_top[ax_number].plot([np.unique(df[df['pitch_type']==i]['start_speed']),np.unique(df[df['pitch_type']==i]['start_speed'])],[0,1], linewidth=4, |
|
color=pitch_colours[df[df['pitch_type']==i]['pitch_description'].values[0]],zorder=20) |
|
|
|
else: |
|
sns.kdeplot(df[df['pitch_type']==i]['start_speed'],ax=ax_top[ax_number],fill=True, |
|
clip=(df[df['pitch_type']==i]['start_speed'].min(),df[df['pitch_type']==i]['start_speed'].max()), |
|
color=pitch_colours[df[df['pitch_type']==i]['pitch_description'].values[0]]) |
|
ax_top[ax_number].set_xlim(math.floor(df['start_speed'].min()/5)*5,math.ceil(df['start_speed'].max()/5)*5) |
|
ax_top[ax_number].set_xlabel('') |
|
ax_top[ax_number].set_ylabel('') |
|
if ax_number < len(items_in_order)-1: |
|
ax_top[ax_number].spines['top'].set_visible(False) |
|
ax_top[ax_number].spines['right'].set_visible(False) |
|
ax_top[ax_number].spines['left'].set_visible(False) |
|
ax_top[ax_number].tick_params(axis='x', colors='none') |
|
|
|
|
|
ax_top[ax_number].set_xticks(range(math.floor(df['start_speed'].min()/5)*5,math.ceil(df['start_speed'].max()/5)*5,5)) |
|
ax_top[ax_number].set_yticks([]) |
|
ax_top[ax_number].grid(axis='x', linestyle='--') |
|
ax_top[ax_number].text(-0.01, 0.5, i, transform=ax_top[ax_number].transAxes, |
|
fontsize=14, va='center', ha='right') |
|
ax_number = ax_number + 1 |
|
ax_top[-1].spines['top'].set_visible(False) |
|
ax_top[-1].spines['right'].set_visible(False) |
|
ax_top[-1].spines['left'].set_visible(False) |
|
|
|
|
|
ax_top[-1].set_xticks(list(range(math.floor(df['start_speed'].min()/5)*5,math.ceil(df['start_speed'].max()/5)*5,5))) |
|
ax_top[-1].set_xlabel('Velocity (mph)') |
|
|
|
|
|
def tj_stuff_roling(df, |
|
window, |
|
ax): |
|
|
|
sorted_value_counts = df['pitch_type'].value_counts().sort_values(ascending=False) |
|
|
|
|
|
items_in_order = sorted_value_counts.index.tolist() |
|
|
|
|
|
for i in items_in_order: |
|
if max(df[df['pitch_type']==i]['pitch_type_count_each']) >= window: |
|
sns.lineplot(x=range(1,max(df[df['pitch_type']==i]['pitch_type_count_each'])+1), |
|
y=df[df['pitch_type']==i]['tj_stuff_plus'].rolling(window).sum()/window, |
|
color=pitch_colours[df[df['pitch_type']==i]['pitch_description'].values[0]], |
|
ax=ax,linewidth=3) |
|
|
|
|
|
ax.set_xlim(window,max(df['pitch_type_count_each'])) |
|
ax.set_ylim(70,130) |
|
|
|
ax.set_xlabel('Pitches', fontdict=font_properties_axes) |
|
ax.set_ylabel('tjStuff+', fontdict=font_properties_axes) |
|
ax.set_title(f"{window} Pitch Rolling tjStuff+",fontdict=font_properties_titles) |
|
|
|
|
|
ax.xaxis.set_major_locator(MaxNLocator(integer=True)) |
|
|
|
|
|
def break_plot(df, |
|
ax): |
|
|
|
label_labels = df.sort_values(by=['prop','pitch_type'],ascending=[False,True]).pitch_description.unique() |
|
j = 0 |
|
for label in label_labels: |
|
subset = df[df['pitch_description'] == label] |
|
print(label) |
|
if len(subset) > 4: |
|
if df['pitcher_hand'].values[0] == 'R': |
|
subset['hb'] = subset['hb']*1 |
|
if df['pitcher_hand'].values[0] == 'L': |
|
subset['hb'] = subset['hb']*1 |
|
subset['ivb'] = subset['ivb']*1 |
|
|
|
try: |
|
confidence_ellipse(subset['hb'], subset['ivb'], ax=ax,edgecolor = pitch_colours[label],n_std=2,facecolor= pitch_colours[label],alpha=0.2) |
|
except ValueError: |
|
return |
|
j=j+1 |
|
else: |
|
j=j+1 |
|
|
|
if df['pitcher_hand'].values[0] == 'R': |
|
sns.scatterplot(ax=ax,x=df.hb*1,y=df.ivb*1,hue=df.pitch_description,palette=pitch_colours,ec='black',alpha=1,zorder=2) |
|
if df['pitcher_hand'].values[0] == 'L': |
|
sns.scatterplot(ax=ax,x=df.hb*1,y=df.ivb*1,hue=df.pitch_description,palette=pitch_colours,ec='black',alpha=1,zorder=2) |
|
|
|
ax.set_xlim((-25,25)) |
|
ax.set_ylim((-25,25)) |
|
|
|
ax.hlines(y=0,xmin=-50,xmax=50,color=colour_palette[8],alpha=0.5,linestyles='--',zorder=1) |
|
ax.vlines(x=0,ymin=-50,ymax=50,color=colour_palette[8],alpha=0.5,linestyles='--',zorder=1) |
|
ax.set_xlabel('Horizontal Break (in)', fontdict=font_properties_axes) |
|
ax.set_ylabel('Induced Vertical Break (in)', fontdict=font_properties_axes) |
|
ax.set_title("Pitch Breaks",fontdict=font_properties_titles) |
|
|
|
|
|
ax.get_legend().remove() |
|
|
|
|
|
|
|
ax.set_xticklabels(ax.get_xticks(), fontdict=font_properties) |
|
|
|
|
|
ax.set_yticklabels(ax.get_yticks(), fontdict=font_properties) |
|
|
|
|
|
|
|
|
|
if df['pitcher_hand'].values[0] == 'R': |
|
ax.text(-24.5,-24.5,s='← Glove Side',fontstyle='italic',ha='left',va='bottom', |
|
bbox=dict(facecolor='white', edgecolor='black'),fontsize=12,zorder=3) |
|
ax.text(24.5,-24.5,s='Arm Side →',fontstyle='italic',ha='right',va='bottom', |
|
bbox=dict(facecolor='white', edgecolor='black'),fontsize=12,zorder=3) |
|
|
|
if df['pitcher_hand'].values[0] == 'L': |
|
ax.invert_xaxis() |
|
ax.text(24.5,-24.5,s='← Arm Side',fontstyle='italic',ha='left',va='bottom', |
|
bbox=dict(facecolor='white', edgecolor='black'),fontsize=12,zorder=3) |
|
ax.text(-24.5,-24.5,s='Glove Side →',fontstyle='italic',ha='right',va='bottom', |
|
bbox=dict(facecolor='white', edgecolor='black'),fontsize=12,zorder=3) |
|
ax.set_aspect('equal', adjustable='box') |
|
|
|
ax.xaxis.set_major_formatter(FuncFormatter(lambda x, _: int(x))) |
|
ax.yaxis.set_major_formatter(FuncFormatter(lambda x, _: int(x))) |
|
|
|
|
|
def table_summary(df, |
|
pitcher_id, |
|
ax, |
|
df_group, |
|
df_group_all, |
|
statcast_pitch_summary): |
|
cmap_sum = matplotlib.colors.LinearSegmentedColormap.from_list("", ['#648FFF','#FFFFFF','#FFB000',]) |
|
|
|
ax.axis('off') |
|
df_group['spin_direction_adj'] = [(x + 180) for x in df_group['spin_direction']] |
|
|
|
clock_time = ((df_group.groupby('pitch_description').mean()['spin_direction_adj']) %360 // 30 )+ (((df_group.groupby('pitch_description').mean()['spin_direction_adj'] %360 % 30 / 30 /100 *60).round(2) *10).round(0)//1.5/4 ) |
|
|
|
|
|
clock_time = (clock_time.astype(int) + clock_time%1*60/100).round(2).astype(str).str.replace('.',':').str.replace(':0',':00').str.replace(':3',':30').str.replace('0:','12:').str.replace('112:','10:').to_frame() |
|
df_group = df_group.merge(right=clock_time,left_on='pitch_description',right_index=True,suffixes=['','_clock']) |
|
|
|
|
|
plot_table = df_group[df_group['pitcher_id']==pitcher_id].sort_values( |
|
by=['pitches'],ascending=False)[['pitch_description','pitches','start_speed','ivb', |
|
'hb', 'spin_rate','vaa', 'haa', 'vertical_release','horizontal_release', |
|
'extension','tj_stuff_plus','spin_direction_adj_clock','zone_percent','chase_percent','whiff_rate','xwobacon']] |
|
|
|
|
|
|
|
|
|
|
|
plot_table['horizontal_release'] = plot_table['horizontal_release']*-1 |
|
|
|
plot_table['pitch_percent'] = plot_table['pitches'] / plot_table['pitches'].sum() |
|
|
|
plot_table = plot_table[['pitch_description','pitches','pitch_percent','start_speed','ivb', |
|
'hb', 'spin_rate','vaa', 'haa', 'vertical_release','horizontal_release', |
|
'extension','spin_direction_adj_clock','tj_stuff_plus','zone_percent','chase_percent','whiff_rate','xwobacon']] |
|
|
|
plot_table_all = pd.DataFrame(data={'pitch_description': 'All', |
|
'pitches': plot_table['pitches'].sum(), |
|
'pitch_percent': 1.0, |
|
'start_speed': '—', |
|
'ivb': '—', |
|
'hb': '—', |
|
'spin_rate': '—', |
|
'vaa': '—', |
|
'haa': '—', |
|
'vertical_release': '—', |
|
'horizontal_release': '—', |
|
'extension': df['extension'].mean(), |
|
'spin_direction_adj_clock': '—', |
|
'tj_stuff_plus': df[df['pitcher_id']==pitcher_id]['tj_stuff_plus'].mean(), |
|
'zone_percent': df_group_all[df_group_all['pitcher_id']==pitcher_id]['zone_percent'].values[0], |
|
'chase_percent': df_group_all[df_group_all['pitcher_id']==pitcher_id]['chase_percent'].values[0], |
|
'whiff_rate': df_group_all[df_group_all['pitcher_id']==pitcher_id]['whiff_rate'].values[0], |
|
'xwobacon': df_group_all[df_group_all['pitcher_id']==pitcher_id]['xwobacon'].values[0], |
|
|
|
|
|
},index=[0] |
|
) |
|
print('LOOK HERE') |
|
print(plot_table) |
|
plot_table = pd.concat([plot_table,plot_table_all]).fillna('—') |
|
|
|
|
|
|
|
plt.rcParams['font.family'] = 'Calibri' |
|
|
|
table = ax.table(cellText=plot_table.values, colLabels=plot_table.columns, cellLoc='center', |
|
colWidths=[2.3,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1], bbox=[0.04, 0, 0.92, 0.8]) |
|
|
|
min_font_size = 14 |
|
|
|
table.auto_set_font_size(False) |
|
|
|
table.set_fontsize(min_font_size) |
|
table.scale(1, 0.5) |
|
|
|
min_font_size = 18 |
|
|
|
|
|
for i in range(len(plot_table)+1): |
|
for j in range(len(plot_table.columns)): |
|
if i > 0: |
|
cell = table.get_celld()[i, j] |
|
cell.set_fontsize(min_font_size) |
|
|
|
|
|
for i in range(len(plot_table)): |
|
|
|
if table.get_celld()[(i+1, 0)].get_text().get_text() != 'All': |
|
table.get_celld()[(i+1, 0)].set_facecolor(pitch_colours[table.get_celld()[(i+1, 0)].get_text().get_text()]) |
|
if table.get_celld()[(i+1, 0)].get_text().get_text() in ['Split-Finger','Slider','Changeup']: |
|
table.get_celld()[(i+1, 0)].set_text_props(color='#000000',fontweight='bold') |
|
else: |
|
table.get_celld()[(i+1, 0)].set_text_props(color='#ffffff',fontweight='bold') |
|
if table.get_celld()[(i+1, 0)].get_text().get_text() == 'Four-Seam Fastball': |
|
table.get_celld()[(i+1, 0)].get_text().set_text('4-Seam') |
|
print('LOOK HERE 3') |
|
print(statcast_pitch_summary) |
|
select_df = statcast_pitch_summary[statcast_pitch_summary['pitch_description'] == plot_table['pitch_description'].values[i]] |
|
|
|
normalize = mcolors.Normalize(vmin=select_df['start_speed'].mean()-select_df.pitch_velocity_std.mean(), |
|
vmax=select_df['start_speed'].mean()+select_df.pitch_velocity_std.mean()) |
|
|
|
if table.get_celld()[(i+1, 3)].get_text().get_text() != '—': |
|
table.get_celld()[(i+1, 3)].set_facecolor(get_color(float(table.get_celld()[(i+1, 3)].get_text().get_text()),normalize,cmap_sum)) |
|
|
|
|
|
cmap_sum = matplotlib.colors.LinearSegmentedColormap.from_list("", ['#648FFF','#FFFFFF','#FFB000',]) |
|
normalize = mcolors.Normalize(vmin=select_df['extension'].mean()*0.9, vmax=select_df['extension'].mean()*1.1) |
|
if table.get_celld()[(i+1,11)].get_text().get_text() != '—': |
|
table.get_celld()[(i+1,11)].set_facecolor(get_color(float(table.get_celld()[(i+1, 11)].get_text().get_text()),normalize,cmap_sum)) |
|
|
|
cmap_sum = matplotlib.colors.LinearSegmentedColormap.from_list("", ['#648FFF','#FFFFFF','#FFB000',]) |
|
normalize = mcolors.Normalize(vmin=80, vmax=120) |
|
print(normalize) |
|
if table.get_celld()[(i+1,13)].get_text().get_text() != '—': |
|
|
|
table.get_celld()[(i+1,13)].set_facecolor(get_color(float(table.get_celld()[(i+1, 13)].get_text().get_text()),normalize,cmap_sum)) |
|
|
|
cmap_sum = matplotlib.colors.LinearSegmentedColormap.from_list("", ['#648FFF','#FFFFFF','#FFB000',]) |
|
normalize = mcolors.Normalize(vmin=select_df['zone_percent'].mean()*0.7, vmax=select_df['zone_percent'].mean()*1.3) |
|
if table.get_celld()[(i+1,14)].get_text().get_text() != '—': |
|
table.get_celld()[(i+1,14)].set_facecolor(get_color(float(table.get_celld()[(i+1, 14)].get_text().get_text().strip('%')),normalize,cmap_sum)) |
|
|
|
cmap_sum = matplotlib.colors.LinearSegmentedColormap.from_list("", ['#648FFF','#FFFFFF','#FFB000',]) |
|
normalize = mcolors.Normalize(vmin=select_df['chase_percent'].mean()*0.7, vmax=select_df['chase_percent'].mean()*1.3) |
|
if table.get_celld()[(i+1,15)].get_text().get_text() != '—': |
|
table.get_celld()[(i+1,15)].set_facecolor(get_color(float(table.get_celld()[(i+1, 15)].get_text().get_text().strip('%')),normalize,cmap_sum)) |
|
|
|
|
|
cmap_sum = matplotlib.colors.LinearSegmentedColormap.from_list("", ['#648FFF','#FFFFFF','#FFB000',]) |
|
normalize = mcolors.Normalize(vmin=select_df['whiff_rate'].mean()*0.7, vmax=select_df['whiff_rate'].mean()*1.3) |
|
if table.get_celld()[(i+1,16)].get_text().get_text() != '—': |
|
table.get_celld()[(i+1,16)].set_facecolor(get_color(float(table.get_celld()[(i+1, 16)].get_text().get_text().strip('%')),normalize,cmap_sum)) |
|
print("LOOK HERE") |
|
print(select_df) |
|
cmap_sum_r = matplotlib.colors.LinearSegmentedColormap.from_list("", ['#FFB000','#FFFFFF','#648FFF',]) |
|
normalize = mcolors.Normalize(vmin=select_df['xwobacon'].mean()*0.7, vmax=select_df['xwobacon'].mean()*1.3) |
|
if table.get_celld()[(i+1,17)].get_text().get_text() != '—': |
|
table.get_celld()[(i+1,17)].set_facecolor(get_color(float(table.get_celld()[(i+1, 17)].get_text().get_text().strip('%')),normalize,cmap_sum_r)) |
|
|
|
|
|
|
|
table.get_celld()[(len(plot_table), 0)].set_text_props(color='#000000',fontweight='bold') |
|
|
|
|
|
new_column_names = ['$\\bf{Pitch\ Name}$', |
|
'$\\bf{Count}$', |
|
'$\\bf{Pitch\%}$', |
|
'$\\bf{Velocity}$', |
|
'$\\bf{iVB}$', |
|
'$\\bf{HB}$', |
|
'$\\bf{Spin}$', |
|
'$\\bf{VAA}$', |
|
'$\\bf{HAA}$', |
|
'$\\bf{vRel}$', |
|
'$\\bf{hRel}$', |
|
|
|
'$\\bf{Ext.}$', |
|
'$\\bf{Axis}$', |
|
'$\\bf{tjStuff+}$', |
|
'$\\bf{Zone\%}$', |
|
'$\\bf{Chase\%}$', |
|
'$\\bf{Whiff\%}$', |
|
'$\\bf{xwOBA}$\n$\\bf{Contact}$', |
|
] |
|
|
|
for i, col_name in enumerate(new_column_names): |
|
table.get_celld()[(0, i)].get_text().set_text(col_name) |
|
|
|
float_list = ['start_speed','ivb', |
|
'hb', 'vaa', 'haa', 'vertical_release','horizontal_release', 'extension'] |
|
for fl in float_list: |
|
|
|
subset_columns = [fl] |
|
|
|
|
|
column_indices = [plot_table.columns.get_loc(col) for col in subset_columns] |
|
|
|
|
|
for row_l in range(1,len(plot_table)+1): |
|
|
|
if table.get_celld()[(row_l,column_indices[0])].get_text().get_text() != '—': |
|
|
|
|
|
table.get_celld()[(row_l,column_indices[0])].get_text().set_text('{:,.1f}'.format(float(table.get_celld()[(row_l,column_indices[0])].get_text().get_text().strip('%')))) |
|
|
|
float_3_list = ['xwobacon'] |
|
for fl in float_3_list: |
|
|
|
subset_columns = [fl] |
|
|
|
|
|
column_indices = [plot_table.columns.get_loc(col) for col in subset_columns] |
|
|
|
|
|
for row_l in range(1,len(plot_table)+1): |
|
|
|
if table.get_celld()[(row_l,column_indices[0])].get_text().get_text() != '—': |
|
|
|
|
|
table.get_celld()[(row_l,column_indices[0])].get_text().set_text('{:,.3f}'.format(float(table.get_celld()[(row_l,column_indices[0])].get_text().get_text().strip('%')))) |
|
|
|
|
|
percent_list = ['pitch_percent','zone_percent','chase_percent','whiff_rate'] |
|
for fl in percent_list: |
|
|
|
subset_columns = [fl] |
|
|
|
|
|
column_indices = [plot_table.columns.get_loc(col) for col in subset_columns] |
|
|
|
|
|
for row_l in range(1,len(plot_table)+1): |
|
|
|
if table.get_celld()[(row_l,column_indices[0])].get_text().get_text() != '—': |
|
|
|
|
|
table.get_celld()[(row_l,column_indices[0])].get_text().set_text('{:,.1%}'.format(float(table.get_celld()[(row_l,column_indices[0])].get_text().get_text().strip('%')))) |
|
|
|
|
|
int_list = ['tj_stuff_plus','spin_rate'] |
|
for fl in int_list: |
|
|
|
subset_columns = [fl] |
|
|
|
|
|
column_indices = [plot_table.columns.get_loc(col) for col in subset_columns] |
|
|
|
|
|
for row_l in range(1,len(plot_table)+1): |
|
|
|
if table.get_celld()[(row_l,column_indices[0])].get_text().get_text() != '—': |
|
|
|
|
|
table.get_celld()[(row_l,column_indices[0])].get_text().set_text('{:,.0f}'.format(float(table.get_celld()[(row_l,column_indices[0])].get_text().get_text().strip('%')))) |
|
|
|
return table |
|
|
|
|
|
def group_ivb_update(df, |
|
agg_list=['pitcher_id','pitcher_name','pitcher_hand','pitch_type','pitch_description']): |
|
|
|
|
|
|
|
|
|
grouped_ivb = df.groupby(agg_list).agg( |
|
pitches = ('start_speed','count'), |
|
|
|
start_speed = ('start_speed','mean'), |
|
ivb = ('ivb','mean'), |
|
hb = ('hb','mean'), |
|
spin_rate = ('spin_rate','mean'), |
|
vaa = ('vaa','mean'), |
|
haa = ('haa','mean'), |
|
horizontal_release = ('x0','mean'), |
|
vertical_release = ('z0','mean'), |
|
extension = ('extension','mean'), |
|
spin_direction = ('spin_direction','mean'), |
|
tj_stuff_plus = ('tj_stuff_plus','mean'), |
|
swings = ('swings','sum'), |
|
in_zone = ('in_zone','sum'), |
|
out_zone = ('out_zone','sum'), |
|
whiffs = ('whiffs','sum'), |
|
zone_swing = ('zone_swing','sum'), |
|
zone_contact = ('zone_contact','sum'), |
|
ozone_swing = ('ozone_swing','sum'), |
|
ozone_contact = ('ozone_contact','sum'), |
|
woba_pred = ('woba_pred','sum'), |
|
bip = ('launch_speed','count'), |
|
|
|
).reset_index() |
|
|
|
|
|
grouped_ivb['zone_contact_percent'] = [grouped_ivb.zone_contact[x]/grouped_ivb.zone_swing[x] if grouped_ivb.zone_swing[x] != 0 else np.nan for x in range(len(grouped_ivb))] |
|
|
|
grouped_ivb['zone_swing_percent'] = [grouped_ivb.zone_swing[x]/grouped_ivb.in_zone[x] if grouped_ivb.pitches[x] != 0 else np.nan for x in range(len(grouped_ivb))] |
|
|
|
grouped_ivb['zone_percent'] = [grouped_ivb.in_zone[x]/grouped_ivb.pitches[x] if grouped_ivb.pitches[x] != 0 else np.nan for x in range(len(grouped_ivb))] |
|
|
|
grouped_ivb['chase_percent'] = [grouped_ivb.ozone_swing[x]/(grouped_ivb.pitches[x] - grouped_ivb.in_zone[x]) if (grouped_ivb.pitches[x]- grouped_ivb.in_zone[x]) != 0 else np.nan for x in range(len(grouped_ivb))] |
|
|
|
grouped_ivb['chase_contact'] = [grouped_ivb.ozone_contact[x]/grouped_ivb.ozone_swing[x] if grouped_ivb.ozone_swing[x] != 0 else np.nan for x in range(len(grouped_ivb))] |
|
|
|
grouped_ivb['swing_percent'] = [grouped_ivb.swings[x]/grouped_ivb.pitches[x] if grouped_ivb.pitches[x] != 0 else np.nan for x in range(len(grouped_ivb))] |
|
|
|
grouped_ivb['whiff_rate'] = [grouped_ivb.whiffs[x]/grouped_ivb.swings[x] if grouped_ivb.swings[x] != 0 else np.nan for x in range(len(grouped_ivb))] |
|
|
|
grouped_ivb['swstr_rate'] = [grouped_ivb.whiffs[x]/grouped_ivb.pitches[x] if grouped_ivb.pitches[x] != 0 else np.nan for x in range(len(grouped_ivb))] |
|
|
|
|
|
grouped_ivb['xwobacon'] = [grouped_ivb.woba_pred[x]/grouped_ivb.bip[x] if grouped_ivb.bip[x] != 0 else np.nan for x in range(len(grouped_ivb))] |
|
|
|
return grouped_ivb |
|
|
|
|
|
|
|
def location_plot(df,ax,hand): |
|
label_labels = df.sort_values(by=['prop','pitch_type'],ascending=[False,True]).pitch_description.unique() |
|
j = 0 |
|
for label in label_labels: |
|
|
|
subset = df[(df['pitch_description'] == label)&(df['batter_hand'] == hand)] |
|
print(label) |
|
if len(subset) >= 5: |
|
confidence_ellipse(subset['px'], subset['pz'], ax=ax,edgecolor = pitch_colours[label],n_std=1.5,facecolor= pitch_colours[label],alpha=0.3) |
|
j=j+1 |
|
else: |
|
j=j+1 |
|
|
|
pitch_location_group = df[(df['batter_hand'] == hand)].groupby(['pitch_description']).agg( |
|
pitches = ('start_speed','count'), |
|
px = ('px','mean'), |
|
pz = ('pz','mean')).reset_index() |
|
|
|
pitch_location_group['pitch_percent'] = pitch_location_group['pitches']/pitch_location_group['pitches'].sum() |
|
|
|
|
|
|
|
sns.scatterplot(ax=ax,x=pitch_location_group['px'], |
|
y=pitch_location_group['pz'], |
|
hue=pitch_location_group['pitch_description'], |
|
palette=pitch_colours,ec='black', |
|
s=pitch_location_group['pitch_percent']*750, |
|
linewidth=2, |
|
zorder=2) |
|
|
|
ax.axis('square') |
|
draw_line(ax,alpha_spot=0.75,catcher_p=False) |
|
ax.axis('off') |
|
ax.set_xlim((-2.75,2.75)) |
|
ax.set_ylim((-0.5,5)) |
|
if len(pitch_location_group['px'])>0: |
|
ax.get_legend().remove() |
|
ax.grid(False) |
|
ax.set_title(f"Pitch Locations vs {hand}HB\n{pitch_location_group['pitches'].sum()} Pitches",fontdict=font_properties_titles) |
|
|