import pandas as pd import numpy as np import json from matplotlib.ticker import FuncFormatter from matplotlib.ticker import MaxNLocator import math from matplotlib.patches import Ellipse import matplotlib.transforms as transforms import matplotlib.colors import matplotlib.colors as mcolors import seaborn as sns import matplotlib.pyplot as plt import requests font_properties = {'family': 'calibi', 'size': 12} font_properties_titles = {'family': 'calibi', 'size': 20} font_properties_axes = {'family': 'calibi', 'size': 16} colour_palette = ['#FFB000','#648FFF','#785EF0', '#DC267F','#FE6100','#3D1EB2','#894D80','#16AA02','#B5592B','#A3C1ED'] season_start = '2024-03-20' season_end = '2024-09-29' season_fg=2024 chad_fg = requests.get(f'https://www.fangraphs.com/api/leaders/major-league/data?age=&pos=all&stats=pit&lg=all&qual=0&season={season_fg}&season={season_fg}&month=1000&season1={season_fg}&ind=0&pageitems=2000000000&pagenum=1&ind=0&rost=0&players=&type=36&postseason=&sortdir=default&sortstat=sp_pitching').json() cmap_sum = matplotlib.colors.LinearSegmentedColormap.from_list("", ['#648FFF','#FFFFFF','#FFB000',]) chadwick_df_small = pd.DataFrame(data={ 'key_mlbam':[x['xMLBAMID'] for x in chad_fg['data']], 'key_fangraphs':[x['playerid'] for x in chad_fg['data']], 'Name':[x['PlayerName'] for x in chad_fg['data']], }) pitcher_dicts = chadwick_df_small.set_index('key_mlbam')['Name'].sort_values().to_dict() mlb_fg_dicts = chadwick_df_small.set_index('key_mlbam')['key_fangraphs'].sort_values().to_dict() ### DF UPDATE CODE ### def df_update_code(df): print('Starting') #df = pd.read_csv('2024_spring_data.csv',index_col=[0]) print('Starting') df['vy_f'] = -(df['vy0']**2 - (2 * df['ay'] * (df['y0'] - 17/12)))**0.5 df['t'] = (df['vy_f'] - df['vy0']) / df['ay'] df['vz_f'] = (df['vz0']) + (df['az'] * df['t']) df['vaa'] = -np.arctan(df['vz_f'] / df['vy_f']) * (180 / np.pi) #df['vy_f'] = -(df['vy0']**2 - (2 * df['ay'] * (df['y0'] - 17/12)))**0.5 #df['t'] = (df['vy_f'] - df['vy0']) / df['ay'] df['vx_f'] = (df['vx0']) + (df['ax'] * df['t']) df['haa'] = -np.arctan(df['vx_f'] / df['vy_f']) * (180 / np.pi) end_codes = ['strikeout', 'field_out', 'single', 'walk', 'hit_by_pitch', 'double', 'sac_fly', 'force_out', 'home_run', 'grounded_into_double_play', 'fielders_choice', 'field_error', 'triple', 'sac_bunt', 'double_play', 'intent_walk', 'fielders_choice_out', 'strikeout_double_play', 'sac_fly_double_play', 'catcher_interf', 'other_out'] df['pa'] = df.event_type.isin(end_codes) #df['pa'] = 1 df['k'] = df.event_type.isin(list(filter(None, [x if 'strikeout' in x else '' for x in df.event_type.fillna('None').unique()]))) df['bb'] = df.event_type.isin(list(filter(None, [x if 'walk' in x else '' for x in df.event_type.fillna('None').unique()]))) df['k_minus_bb'] = df['k'].astype(np.float32)-df['bb'].astype(np.float32) df = df.drop_duplicates(subset=['play_id']) df = df.dropna(subset=['start_speed']) swing_codes = ['Swinging Strike', 'In play, no out', 'Foul', 'In play, out(s)', 'In play, run(s)', 'Swinging Strike (Blocked)', 'Foul Bunt','Foul Tip', 'Missed Bunt','Foul Pitchout','Swinging Pitchout'] swings_in = ['Swinging Strike', 'In play, no out', 'Foul', 'In play, out(s)', 'In play, run(s)', 'Swinging Strike (Blocked)', 'Foul Bunt','Foul Tip', 'Missed Bunt','Foul Pitchout','Swinging Pitchout'] swing_strike_codes = ['Swinging Strike', 'Swinging Strike (Blocked)','Missed Bunt','Foul Tip','Swinging Pitchout'] contact_codes = ['In play, no out', 'Foul', 'In play, out(s)', 'In play, run(s)', 'Foul Bunt'] codes_in = ['In play, out(s)', 'Swinging Strike', 'Ball', 'Foul', 'In play, no out', 'Called Strike', 'Foul Tip', 'In play, run(s)', 'Hit By Pitch', 'Ball In Dirt', 'Pitchout', 'Swinging Strike (Blocked)', 'Foul Bunt', 'Missed Bunt', 'Foul Pitchout', 'Intent Ball', 'Swinging Pitchout'] df['in_zone'] = df['zone'] < 10 df = df.drop_duplicates(subset=['play_id']) df_codes = df[df.play_description.isin(codes_in)].dropna(subset=['in_zone']) df_codes['bip'] = ~df_codes.launch_speed.isna() conditions = [ (df_codes['launch_speed'].isna()), (df_codes['launch_speed']*1.5 - df_codes['launch_angle'] >= 117 ) & (df_codes['launch_speed'] + df_codes['launch_angle'] >= 124) & (df_codes['launch_speed'] > 98) & (df_codes['launch_angle'] >= 8) & (df_codes['launch_angle'] <= 50) ] choices = [False,True] df_codes['barrel'] = np.select(conditions, choices, default=np.nan) conditions_ss = [ (df_codes['launch_angle'].isna()), (df_codes['launch_angle'] >= 8 ) * (df_codes['launch_angle'] <= 32 ) ] choices_ss = [False,True] df_codes['sweet_spot'] = np.select(conditions_ss, choices_ss, default=np.nan) conditions_hh = [ (df_codes['launch_speed'].isna()), (df_codes['launch_speed'] >= 94.5 ) ] choices_hh = [False,True] df_codes['hard_hit'] = np.select(conditions_hh, choices_hh, default=np.nan) conditions_tb = [ (df_codes['event_type']=='single'), (df_codes['event_type']=='double'), (df_codes['event_type']=='triple'), (df_codes['event_type']=='home_run'), ] choices_tb = [1,2,3,4] df_codes['tb'] = np.select(conditions_tb, choices_tb, default=np.nan) conditions_woba = [ (df_codes['event_type']=='walk'), (df_codes['event_type']=='hit_by_pitch'), (df_codes['event_type']=='single'), (df_codes['event_type']=='double'), (df_codes['event_type']=='triple'), (df_codes['event_type']=='home_run'), ] choices_woba = [0.705, 0.688, 0.897, 1.233, 1.612, 2.013] df_codes['woba'] = np.select(conditions_woba, choices_woba, default=np.nan) woba_codes = ['strikeout', 'field_out', 'single', 'walk', 'hit_by_pitch', 'double', 'sac_fly', 'force_out', 'home_run', 'grounded_into_double_play', 'fielders_choice', 'field_error', 'triple', 'sac_bunt', 'double_play', 'fielders_choice_out', 'strikeout_double_play', 'sac_fly_double_play', 'other_out'] conditions_woba_code = [ (df_codes['event_type'].isin(woba_codes)) ] choices_woba_code = [1] df_codes['woba_codes'] = np.select(conditions_woba_code, choices_woba_code, default=np.nan) #df_codes['barrel'] = (df_codes.launch_speed >= 98) & (df_codes.launch_angle >= (26 - (-98 + df_codes.launch_speed))) & (df_codes.launch_angle <= 30 + (-98 + df_codes.launch_speed)) & (df_codes.launch_angle >= 8) & (df_codes.launch_angle <= 50) #df_codes['barrel'] = (df_codes.launch_speed >= 98) & (df_codes.launch_angle >= (26 - (-98 + df_codes.launch_speed))) & (df_codes.launch_angle <= 30 + (-98 + df_codes.launch_speed)) & (df_codes.launch_angle >= 8) & (df_codes.launch_angle <= 50) df_codes['pitches'] = 1 df_codes['whiffs'] = [1 if ((x == 'S')|(x == 'W')|(x =='T')) else 0 for x in df_codes.play_code] df_codes['csw'] = [1 if ((x == 'S')|(x == 'W')|(x =='T')|(x == 'C')) else 0 for x in df_codes.play_code] df_codes['swings'] = [1 if x in swings_in else 0 for x in df_codes.play_description] df_codes['out_zone'] = df_codes.in_zone == False df_codes['zone_swing'] = (df_codes.in_zone == True)&(df_codes.swings == 1) df_codes['zone_contact'] = (df_codes.in_zone == True)&(df_codes.swings == 1)&(df_codes.whiffs == 0) df_codes['ozone_swing'] = (df_codes.in_zone==False)&(df_codes.swings == 1) df_codes['ozone_contact'] = (df_codes.in_zone==False)&(df_codes.swings == 1)&(df_codes.whiffs == 0) return df_codes ### GET COLOURS## def get_color(value,normalize,cmap_sum): color = cmap_sum(normalize(value)) return mcolors.to_hex(color) ### PERCENTILE ### def percentile(n): def percentile_(x): return x.quantile(n) percentile_.__name__ = 'percentile_{:02.0f}'.format(n*100) return percentile_ ### TJ STUFF+ DF CLEAN ### def df_clean(df): df_copy = df.copy() df_copy.loc[df_copy['pitcher_hand'] == 'L','hb'] *= -1 df_copy.loc[df_copy['pitcher_hand'] == 'L','x0'] *= -1 df_copy.loc[df_copy['pitcher_hand'] == 'L','spin_direction'] = 360 - df_copy.loc[df_copy['pitcher_hand'] == 'L','spin_direction'] df_copy['pitch_l'] = [1 if x == 'L' else 0 for x in df_copy['pitcher_hand']] df_copy['bat_l'] = [1 if x == 'L' else 0 for x in df_copy['batter_hand']] #df_copy = df_copy[~df_copy.pitch_type.isin(["EP", "PO", "KN", "CS", "SC", "FA"])].reset_index(drop=True) #df_copy = df_copy[~df_copy.pitch_type.isin(["EP", "PO", "CS", "SC", "FA"])].reset_index(drop=True) df_copy['pitch_type'] = df_copy['pitch_type'].replace({'FT':'SI', #'KC':'CU', 'SV':'SL', 'FO':'FS'}) df_copy_fb_sum = df_copy[df_copy.pitch_type.isin(["FF", "FC", "SI"])].groupby(['pitcher_id']).agg( fb_velo = ('start_speed','mean'), fb_max_ivb = ('ivb',percentile(0.9)), fb_max_x = ('hb',percentile(0.9)), fb_min_x = ('hb',percentile(0.1)), fb_max_velo = ('start_speed',percentile(0.9)), fb_axis = ('spin_direction','mean'), ) df_copy = df_copy.merge(df_copy_fb_sum,left_on='pitcher_id',right_index=True,how='left') df_copy['fb_velo_diff'] = df_copy['start_speed']- df_copy['fb_velo'] df_copy['fb_max_ivb_diff'] = df_copy['ivb']- df_copy['fb_max_ivb'] df_copy['fb_max_hb_diff'] = -abs(df_copy['hb']- df_copy['fb_max_x']) df_copy['fb_min_hb_diff'] = df_copy['hb']- df_copy['fb_min_x'] df_copy['fb_max_velo_diff'] = df_copy['start_speed']- df_copy['fb_max_velo'] df_copy['fb_axis_diff'] = df_copy['spin_direction']- df_copy['fb_axis'] # df_copy.loc[df_copy.pitch_type.isin(["FF", "FC", "SI"]),'fb_velo_diff'] = 0 # df_copy.loc[df_copy.pitch_type.isin(["FF", "FC", "SI"]),'fb_max_ivb_diff'] = 0 # df_copy.loc[df_copy.pitch_type.isin(["FF", "FC", "SI"]),'fb_max_hb_diff'] = 0 # df_copy.loc[df_copy.pitch_type.isin(["FF", "FC", "SI"]),'fb_min_hb_diff'] = 0 # df_copy.loc[df_copy.pitch_type.isin(["FF", "FC", "SI"]),'fb_max_velo_diff'] = 0 # df_copy.loc[df_copy.pitch_type.isin(["FF", "FC", "SI"]),'fb_axis_diff'] = 0 df_copy['max_speed'] = df_copy.groupby(['pitcher_id'])['start_speed'].transform('max') df_copy['max_speed_diff'] = df_copy['start_speed'] - df_copy['max_speed'] df_copy['max_ivb'] = df_copy.groupby(['pitcher_id'])['ivb'].transform('max') df_copy['max_ivb_diff'] = df_copy['ivb'] - df_copy['max_ivb'] df_copy['vy_f'] = -(df_copy['vy0']**2 - (2 * df_copy['ay'] * (df_copy['y0'] - 17/12)))**0.5 df_copy['t'] = (df_copy['vy_f'] - df_copy['vy0']) / df_copy['ay'] df_copy['vz_f'] = (df_copy['vz0']) + (df_copy['az'] * df_copy['t']) df_copy['vaa'] = -np.arctan(df_copy['vz_f'] / df_copy['vy_f']) * (180 / np.pi) #df_copy['vy_f'] = -(df_copy['vy0']**2 - (2 * df_copy['ay'] * (df_copy['y0'] - 17/12)))**0.5 #df_copy['t'] = (df_copy['vy_f'] - df_copy['vy0']) / df_copy['ay'] df_copy['vx_f'] = (df_copy['vx0']) + (df_copy['ax'] * df_copy['t']) df_copy['haa'] = -np.arctan(df_copy['vx_f'] / df_copy['vy_f']) * (180 / np.pi) # df_copy['x_diff'] = df_copy['x0'] - df_copy['px'] # df_copy['z_diff'] = df_copy['z0'] - df_copy['pz'] # df_copy['vaa'] = np.arctan(df_copy['z_diff'] / df_copy['release_pos_y']) * 360 / np.pi # df_copy['haa'] = np.arctan(-df_copy['x_diff'] / df_copy['release_pos_y']) * 360 / np.pi df_copy = df_copy.dropna(subset=['pitch_type']).fillna(0) return df_copy ### PITCH COLOURS ### pitch_colours = { 'Four-Seam Fastball':'#FF007D',#BC136F 'Sinker':'#98165D',#DC267F 'Cutter':'#BE5FA0', 'Changeup':'#F79E70',#F75233 'Splitter':'#FE6100',#F75233 'Screwball':'#F08223', 'Forkball':'#FFB000', 'Slider':'#67E18D',#1BB999#785EF0 'Sweeper':'#1BB999',#37CD85#904039 'Slurve':'#376748',#785EF0#549C07#BEABD8 'Knuckle Curve':'#311D8B', 'Curveball':'#3025CE', 'Slow Curve':'#274BFC', 'Eephus':'#648FFF', 'Knuckleball':'#867A08', 'Pitch Out':'#472C30', 'Other':'#9C8975', } ### PITCH ELLIPSE ### def confidence_ellipse(x, y, ax, n_std=3.0, facecolor='none', **kwargs): """ Create a plot of the covariance confidence ellipse of *x* and *y*. Parameters ---------- x, y : array-like, shape (n, ) Input data. ax : matplotlib.axes.Axes The axes object to draw the ellipse into. n_std : float The number of standard deviations to determine the ellipse's radiuses. **kwargs Forwarded to `~matplotlib.patches.Ellipse` Returns ------- matplotlib.patches.Ellipse """ if x.size != y.size: raise ValueError("x and y must be the same size") try: cov = np.cov(x, y) pearson = cov[0, 1]/np.sqrt(cov[0, 0] * cov[1, 1]) # Using a special case to obtain the eigenvalues of this # two-dimensional dataset. ell_radius_x = np.sqrt(1 + pearson) ell_radius_y = np.sqrt(1 - pearson) ellipse = Ellipse((0, 0), width=ell_radius_x * 2, height=ell_radius_y * 2, facecolor=facecolor,linewidth=2,linestyle='--', **kwargs) # Calculating the standard deviation of x from # the squareroot of the variance and multiplying # with the given number of standard deviations. scale_x = np.sqrt(cov[0, 0]) * n_std mean_x = np.mean(x) # calculating the standard deviation of y ... scale_y = np.sqrt(cov[1, 1]) * n_std mean_y = np.mean(y) transf = transforms.Affine2D() \ .rotate_deg(45) \ .scale(scale_x, scale_y) \ .translate(mean_x, mean_y) ellipse.set_transform(transf + ax.transData) except ValueError: return return ax.add_patch(ellipse) # DEFINE STRIKE ZONE strike_zone = pd.DataFrame({ 'PlateLocSide': [-0.9, -0.9, 0.9, 0.9, -0.9], 'PlateLocHeight': [1.5, 3.5, 3.5, 1.5, 1.5] }) ### STRIKE ZONE ### def draw_line(axis,alpha_spot=1,catcher_p = True): axis.plot(strike_zone['PlateLocSide'], strike_zone['PlateLocHeight'], color='black', linewidth=1.3,zorder=3,alpha=alpha_spot,) # ax.plot([-0.2833333, -0.2833333], [1.6, 3.5], color='black', linestyle='dashed',alpha=alpha_spot,zorder=3) # ax.plot([0.2833333, 0.2833333], [1.6, 3.5], color='black', linestyle='dashed',alpha=alpha_spot,zorder=3) # ax.plot([-0.85, 0.85], [2.2, 2.2], color='black', linestyle='dashed',alpha=alpha_spot,zorder=3) # ax.plot([-0.85, 0.85], [2.9, 2.9], color='black', linestyle='dashed',alpha=alpha_spot,zorder=3) if catcher_p: # Add dashed line # Add home plate axis.plot([-0.708, 0.708], [0.15, 0.15], color='black', linewidth=1,alpha=alpha_spot,zorder=1) axis.plot([-0.708, -0.708], [0.15, 0.3], color='black', linewidth=1,alpha=alpha_spot,zorder=1) axis.plot([-0.708, 0], [0.3, 0.5], color='black', linewidth=1,alpha=alpha_spot,zorder=1) axis.plot([0, 0.708], [0.5, 0.3], color='black', linewidth=1,alpha=alpha_spot,zorder=1) axis.plot([0.708, 0.708], [0.3, 0.15], color='black', linewidth=1,alpha=alpha_spot,zorder=1) else: axis.plot([-0.708, 0.708], [0.4, 0.4], color='black', linewidth=1,alpha=alpha_spot,zorder=1) axis.plot([-0.708, -0.9], [0.4, -0.1], color='black', linewidth=1,alpha=alpha_spot,zorder=1) axis.plot([-0.9, 0], [-0.1, -0.35], color='black', linewidth=1,alpha=alpha_spot,zorder=1) axis.plot([0, 0.9], [-.35, -0.1], color='black', linewidth=1,alpha=alpha_spot,zorder=1) axis.plot([0.9, 0.708], [-0.1,0.4], color='black', linewidth=1,alpha=alpha_spot,zorder=1) ### FANGRAPHS STATS DICT ### fangraphs_stats_dict = {'IP':{'table_header':'$\\bf{IP}$','format':'.1f',} , 'TBF':{'table_header':'$\\bf{PA}$','format':'.0f',} , 'AVG':{'table_header':'$\\bf{AVG}$','format':'.3f',} , 'K/9':{'table_header':'$\\bf{K\/9}$','format':'.2f',} , 'BB/9':{'table_header':'$\\bf{BB\/9}$','format':'.2f',} , 'K/BB':{'table_header':'$\\bf{K\/BB}$','format':'.2f',} , 'HR/9':{'table_header':'$\\bf{HR\/9}$','format':'.2f',} , 'K%':{'table_header':'$\\bf{K\%}$','format':'.1%',} , 'BB%':{'table_header':'$\\bf{BB\%}$','format':'.1%',} , 'K-BB%':{'table_header':'$\\bf{K-BB\%}$','format':'.1%',} , 'WHIP':{'table_header':'$\\bf{WHIP}$','format':'.2f',} , 'BABIP':{'table_header':'$\\bf{BABIP}$','format':'.3f',} , 'LOB%':{'table_header':'$\\bf{LOB\%}$','format':'.1%',} , 'xFIP':{'table_header':'$\\bf{xFIP}$','format':'.2f',} , 'FIP':{'table_header':'$\\bf{FIP}$','format':'.2f',} , 'H':{'table_header':'$\\bf{H}$','format':'.0f',} , '2B':{'table_header':'$\\bf{2B}$','format':'.0f',} , '3B':{'table_header':'$\\bf{3B}$','format':'.0f',} , 'R':{'table_header':'$\\bf{R}$','format':'.0f',} , 'ER':{'table_header':'$\\bf{ER}$','format':'.0f',} , 'HR':{'table_header':'$\\bf{HR}$','format':'.0f',} , 'BB':{'table_header':'$\\bf{BB}$','format':'.0f',} , 'IBB':{'table_header':'$\\bf{IBB}$','format':'.0f',} , 'HBP':{'table_header':'$\\bf{HBP}$','format':'.0f',} , 'SO':{'table_header':'$\\bf{SO}$','format':'.0f',} , 'OBP':{'table_header':'$\\bf{OBP}$','format':'.0f',} , 'SLG':{'table_header':'$\\bf{SLG}$','format':'.0f',} , 'ERA':{'table_header':'$\\bf{ERA}$','format':'.2f',} , 'wOBA':{'table_header':'$\\bf{wOBA}$','format':'.3f',} , 'G':{'table_header':'$\\bf{G}$','format':'.0f',} } ## Fangraphs Table ### FANGRAPHS SPLITS SCRAPE ### split_dict = {'all':[], 'left':['5'], 'right':['6'] } def fangraphs_scrape(pitcher_id=808967, split='all', start_date='2024-03-20', end_date='2024-09-29'): url = "https://www.fangraphs.com/api/leaders/splits/splits-leaders" payload = { "strPlayerId": str(mlb_fg_dicts[pitcher_id]), "strSplitArr": split_dict[split], "strGroup": "season", "strPosition": "P", "strType": "2", "strStartDate": str(pd.to_datetime(start_date).strftime('%Y-%m-%d')), "strEndDate": str(pd.to_datetime(end_date).strftime('%Y-%m-%d')), "strSplitTeams": False, "dctFilters": [], "strStatType": "player", "strAutoPt": False, "arrPlayerId": [], "strSplitArrPitch": [], "arrWxTemperature": None, "arrWxPressure": None, "arrWxAirDensity": None, "arrWxElevation": None, "arrWxWindSpeed": None } json_payload = json.dumps(payload) headers = {'Content-Type': 'application/json'} response = requests.post(url, data=json_payload, headers=headers) data_pull = response.json()['data'][0] payload_advanced = { "strPlayerId": str(mlb_fg_dicts[pitcher_id]), "strSplitArr": split_dict[split], "strGroup": "season", "strPosition": "P", "strType": "1", "strStartDate": str(pd.to_datetime(start_date).strftime('%Y-%m-%d')), "strEndDate": str(pd.to_datetime(end_date).strftime('%Y-%m-%d')), "strSplitTeams": False, "dctFilters": [], "strStatType": "player", "strAutoPt": False, "arrPlayerId": [], "strSplitArrPitch": [], "arrWxTemperature": None, "arrWxPressure": None, "arrWxAirDensity": None, "arrWxElevation": None, "arrWxWindSpeed": None } json_payload_advanced = json.dumps(payload_advanced) headers = {'Content-Type': 'application/json'} response_advanced = requests.post(url, data=json_payload_advanced, headers=headers) data_pull_advanced = response_advanced.json()['data'][0] data_pull.update(data_pull_advanced) return data_pull ### FANGRAPHS TABLE PLOT ### def fangraphs_table(data, stats, ax): fg_values = [data[x] if x in data else '---' for x in stats] df_fg = pd.DataFrame(data=dict(zip(stats,fg_values)),index=[0]) df_fg.loc[0] = [format(df_fg[x][0],fangraphs_stats_dict[x]['format']) if df_fg[x][0] != '---' else '---' for x in df_fg] table_fg = ax.table(cellText=df_fg.values, colLabels=df_fg.columns, cellLoc='center', bbox=[0.04, 0.2, 0.92, 0.8]) min_font_size = 20 table_fg.set_fontsize(min_font_size) new_column_names = [fangraphs_stats_dict[x]['table_header'] if x in data else '---' for x in stats] # #new_column_names = ['Pitch Name', 'Pitch%', 'Velocity', 'Spin Rate','Exit Velocity', 'Whiff%', 'CSW%'] for i, col_name in enumerate(new_column_names): table_fg.get_celld()[(0, i)].get_text().set_text(col_name) ax.axis('off') return table_fg ### VELOCITY KDES ### def velocity_kdes(df, ax, gs, gs_list, fig): sorted_value_counts = df['pitch_type'].value_counts().sort_values(ascending=False) # Get the list of items ordered from most to least frequent items_in_order = sorted_value_counts.index.tolist() # Create the inner subplot inside the outer subplot import matplotlib.gridspec as gridspec ax.axis ('off') #ax.set_ylabel('Pitch Velocity Distribution', fontdict=font_properties_axes) ax.set_title('Pitch Velocity Distribution', fontdict={'family': 'calibi', 'size': 20}) inner_grid_1 = gridspec.GridSpecFromSubplotSpec(len(items_in_order),1, subplot_spec=gs[2,gs_list]) ax_top = [] for inner in inner_grid_1: ax_top.append(fig.add_subplot(inner)) ax_number = 0 for i in items_in_order[0:]: if np.unique(df[df['pitch_type']==i]['start_speed']).size == 1: # Check if all values are the same print('just') ax_top[ax_number].plot([np.unique(df[df['pitch_type']==i]['start_speed']),np.unique(df[df['pitch_type']==i]['start_speed'])],[0,1], linewidth=4, color=pitch_colours[df[df['pitch_type']==i]['pitch_description'].values[0]],zorder=20) # ax_top[ax_number].plot(np.unique(df_melt[df_melt['Player']==i]['value']), [0.5]*len(np.unique(df_melt[df_melt['Player']==i]['value'])), linewidth=4) else: sns.kdeplot(df[df['pitch_type']==i]['start_speed'],ax=ax_top[ax_number],fill=True, clip=(df[df['pitch_type']==i]['start_speed'].min(),df[df['pitch_type']==i]['start_speed'].max()), color=pitch_colours[df[df['pitch_type']==i]['pitch_description'].values[0]]) ax_top[ax_number].set_xlim(math.floor(df['start_speed'].min()/5)*5,math.ceil(df['start_speed'].max()/5)*5) ax_top[ax_number].set_xlabel('') ax_top[ax_number].set_ylabel('') if ax_number < len(items_in_order)-1: ax_top[ax_number].spines['top'].set_visible(False) ax_top[ax_number].spines['right'].set_visible(False) ax_top[ax_number].spines['left'].set_visible(False) ax_top[ax_number].tick_params(axis='x', colors='none') ax_top[ax_number].set_xticks(range(math.floor(df['start_speed'].min()/5)*5,math.ceil(df['start_speed'].max()/5)*5,5)) ax_top[ax_number].set_yticks([]) ax_top[ax_number].grid(axis='x', linestyle='--') ax_top[ax_number].text(-0.01, 0.5, i, transform=ax_top[ax_number].transAxes, fontsize=14, va='center', ha='right') ax_number = ax_number + 1 ax_top[-1].spines['top'].set_visible(False) ax_top[-1].spines['right'].set_visible(False) ax_top[-1].spines['left'].set_visible(False) ax_top[-1].set_xticks(list(range(math.floor(df['start_speed'].min()/5)*5,math.ceil(df['start_speed'].max()/5)*5,5))) ax_top[-1].set_xlabel('Velocity (mph)') ### TJ STUFF+ ROLLING ### def tj_stuff_roling(df, window, ax): ## Velocity Plot sorted_value_counts = df['pitch_type'].value_counts().sort_values(ascending=False) # Get the list of items ordered from most to least frequent items_in_order = sorted_value_counts.index.tolist() for i in items_in_order: if max(df[df['pitch_type']==i]['pitch_type_count_each']) >= window: sns.lineplot(x=range(1,max(df[df['pitch_type']==i]['pitch_type_count_each'])+1), y=df[df['pitch_type']==i]['tj_stuff_plus'].rolling(window).sum()/window, color=pitch_colours[df[df['pitch_type']==i]['pitch_description'].values[0]], ax=ax,linewidth=3) # Adjust x-axis limits to start from 1 ax.set_xlim(window,max(df['pitch_type_count_each'])) ax.set_ylim(70,130) #ax.get_legend().remove() ax.set_xlabel('Pitches', fontdict=font_properties_axes) ax.set_ylabel('tjStuff+', fontdict=font_properties_axes) ax.set_title(f"{window} Pitch Rolling tjStuff+",fontdict=font_properties_titles) # ax.axis('square') # ax.set_xlim(left=1) ax.xaxis.set_major_locator(MaxNLocator(integer=True)) ### BREAK PLOT ### def break_plot(df, ax): label_labels = df.sort_values(by=['prop','pitch_type'],ascending=[False,True]).pitch_description.unique() j = 0 for label in label_labels: subset = df[df['pitch_description'] == label] print(label) if len(subset) > 4: if df['pitcher_hand'].values[0] == 'R': subset['hb'] = subset['hb']*1 if df['pitcher_hand'].values[0] == 'L': subset['hb'] = subset['hb']*1 subset['ivb'] = subset['ivb']*1 try: confidence_ellipse(subset['hb'], subset['ivb'], ax=ax,edgecolor = pitch_colours[label],n_std=2,facecolor= pitch_colours[label],alpha=0.2) except ValueError: return j=j+1 else: j=j+1 if df['pitcher_hand'].values[0] == 'R': sns.scatterplot(ax=ax,x=df.hb*1,y=df.ivb*1,hue=df.pitch_description,palette=pitch_colours,ec='black',alpha=1,zorder=2) if df['pitcher_hand'].values[0] == 'L': sns.scatterplot(ax=ax,x=df.hb*1,y=df.ivb*1,hue=df.pitch_description,palette=pitch_colours,ec='black',alpha=1,zorder=2) ax.set_xlim((-25,25)) ax.set_ylim((-25,25)) ax.hlines(y=0,xmin=-50,xmax=50,color=colour_palette[8],alpha=0.5,linestyles='--',zorder=1) ax.vlines(x=0,ymin=-50,ymax=50,color=colour_palette[8],alpha=0.5,linestyles='--',zorder=1) ax.set_xlabel('Horizontal Break (in)', fontdict=font_properties_axes) ax.set_ylabel('Induced Vertical Break (in)', fontdict=font_properties_axes) ax.set_title("Pitch Breaks",fontdict=font_properties_titles) ax.get_legend().remove() # ax1.set_xticklabels(ax1.get_xticks(), fontdict=font_properties) ax.set_xticklabels(ax.get_xticks(), fontdict=font_properties) # ax1.set_yticklabels(ax1.get_yticks(), fontdict=font_properties) ax.set_yticklabels(ax.get_yticks(), fontdict=font_properties) #ax1.set_aspect('equal', adjustable='box') if df['pitcher_hand'].values[0] == 'R': ax.text(-24.5,-24.5,s='← Glove Side',fontstyle='italic',ha='left',va='bottom', bbox=dict(facecolor='white', edgecolor='black'),fontsize=12,zorder=3) ax.text(24.5,-24.5,s='Arm Side →',fontstyle='italic',ha='right',va='bottom', bbox=dict(facecolor='white', edgecolor='black'),fontsize=12,zorder=3) #ax.invert_xaxis() if df['pitcher_hand'].values[0] == 'L': ax.invert_xaxis() ax.text(24.5,-24.5,s='← Arm Side',fontstyle='italic',ha='left',va='bottom', bbox=dict(facecolor='white', edgecolor='black'),fontsize=12,zorder=3) ax.text(-24.5,-24.5,s='Glove Side →',fontstyle='italic',ha='right',va='bottom', bbox=dict(facecolor='white', edgecolor='black'),fontsize=12,zorder=3) ax.set_aspect('equal', adjustable='box') #ax1.yaxis.set_major_formatter(FuncFormatter(lambda x, _: int(x))) ax.xaxis.set_major_formatter(FuncFormatter(lambda x, _: int(x))) ax.yaxis.set_major_formatter(FuncFormatter(lambda x, _: int(x))) ### TABLE SUMMARY ### def table_summary(df, pitcher_id, ax, df_group, df_group_all, statcast_pitch_summary): cmap_sum = matplotlib.colors.LinearSegmentedColormap.from_list("", ['#648FFF','#FFFFFF','#FFB000',]) ax.axis('off') df_group['spin_direction_adj'] = [(x + 180) for x in df_group['spin_direction']] #(((df_group.groupby('pitch_description').mean()[['spin_direction_adj']] %360 % 30 / 30 /100 *60).round(2) *10).round(0)//1.5/4 ) clock_time = ((df_group.groupby('pitch_description').mean()['spin_direction_adj']) %360 // 30 )+ (((df_group.groupby('pitch_description').mean()['spin_direction_adj'] %360 % 30 / 30 /100 *60).round(2) *10).round(0)//1.5/4 ) # print('Clocks') # print(clock_time) clock_time = (clock_time.astype(int) + clock_time%1*60/100).round(2).astype(str).str.replace('.',':').str.replace(':0',':00').str.replace(':3',':30').str.replace('0:','12:').str.replace('112:','10:').to_frame() df_group = df_group.merge(right=clock_time,left_on='pitch_description',right_index=True,suffixes=['','_clock']) plot_table = df_group[df_group['pitcher_id']==pitcher_id].sort_values( by=['pitches'],ascending=False)[['pitch_description','pitches','start_speed','ivb', 'hb', 'spin_rate','vaa', 'haa', 'vertical_release','horizontal_release', 'extension','tj_stuff_plus','spin_direction_adj_clock','zone_percent','chase_percent','whiff_rate']] # if df['pitcher_hand'].values[0] == 'L': # plot_table['hb'] = plot_table['hb']*-1 #if df['pitcher_hand'].values[0] == 'R': plot_table['horizontal_release'] = plot_table['horizontal_release']*-1 plot_table['pitch_percent'] = plot_table['pitches'] / plot_table['pitches'].sum() plot_table = plot_table[['pitch_description','pitches','pitch_percent','start_speed','ivb', 'hb', 'spin_rate','vaa', 'haa', 'vertical_release','horizontal_release', 'extension','spin_direction_adj_clock','tj_stuff_plus','zone_percent','chase_percent','whiff_rate']] plot_table_all = pd.DataFrame(data={'pitch_description': 'All', 'pitches': plot_table['pitches'].sum(), 'pitch_percent': 1.0, 'start_speed': '—', 'ivb': '—', 'hb': '—', 'spin_rate': '—', 'vaa': '—', 'haa': '—', 'vertical_release': '—', 'horizontal_release': '—', 'extension': df['extension'].mean(), 'spin_direction_adj_clock': '—', 'tj_stuff_plus': df[df['pitcher_id']==pitcher_id]['tj_stuff_plus'].mean(), 'zone_percent': df_group_all[df_group_all['pitcher_id']==pitcher_id]['zone_percent'].values[0], 'chase_percent': df_group_all[df_group_all['pitcher_id']==pitcher_id]['chase_percent'].values[0], 'whiff_rate': df_group_all[df_group_all['pitcher_id']==pitcher_id]['whiff_rate'].values[0], },index=[0] ) plot_table = pd.concat([plot_table,plot_table_all]).fillna('—') plt.rcParams['font.family'] = 'Calibri' table = ax.table(cellText=plot_table.values, colLabels=plot_table.columns, cellLoc='center', colWidths=[2.3,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1], bbox=[0.04, 0, 0.92, 0.8]) min_font_size = 14 # Set table properties table.auto_set_font_size(False) #table.set_fontsize(min(min_font_size,max(min_font_size/((len(label_labels)/4)),10))) table.set_fontsize(min_font_size) table.scale(1, 0.5) min_font_size = 20 # Set font size for values # Adjust the font size as needed for i in range(len(plot_table)+1): for j in range(len(plot_table.columns)): if i > 0: # Skip the header row cell = table.get_celld()[i, j] cell.set_fontsize(min_font_size) for i in range(len(plot_table)): if table.get_celld()[(i+1, 0)].get_text().get_text() != 'All': table.get_celld()[(i+1, 0)].set_facecolor(pitch_colours[table.get_celld()[(i+1, 0)].get_text().get_text()]) # Header cell color if table.get_celld()[(i+1, 0)].get_text().get_text() in ['Split-Finger','Slider','Changeup']: table.get_celld()[(i+1, 0)].set_text_props(color='#000000',fontweight='bold') else: table.get_celld()[(i+1, 0)].set_text_props(color='#ffffff',fontweight='bold') if table.get_celld()[(i+1, 0)].get_text().get_text() == 'Four-Seam Fastball': table.get_celld()[(i+1, 0)].get_text().set_text('4-Seam Fastball') select_df = statcast_pitch_summary[statcast_pitch_summary['pitch_description'] == plot_table['pitch_description'].values[i]] normalize = mcolors.Normalize(vmin=select_df['start_speed'].mean()-select_df.pitch_velocity_std.mean(), vmax=select_df['start_speed'].mean()+select_df.pitch_velocity_std.mean()) # Define the range of values if table.get_celld()[(i+1, 3)].get_text().get_text() != '—': table.get_celld()[(i+1, 3)].set_facecolor(get_color(float(table.get_celld()[(i+1, 3)].get_text().get_text()),normalize,cmap_sum)) # Header cell color cmap_sum = matplotlib.colors.LinearSegmentedColormap.from_list("", ['#648FFF','#FFFFFF','#FFB000',]) normalize = mcolors.Normalize(vmin=select_df['extension'].mean()*0.9, vmax=select_df['extension'].mean()*1.1) if table.get_celld()[(i+1,11)].get_text().get_text() != '—': table.get_celld()[(i+1,11)].set_facecolor(get_color(float(table.get_celld()[(i+1, 11)].get_text().get_text()),normalize,cmap_sum)) # Header cell color cmap_sum = matplotlib.colors.LinearSegmentedColormap.from_list("", ['#648FFF','#FFFFFF','#FFB000',]) normalize = mcolors.Normalize(vmin=80, vmax=120) print(normalize) if table.get_celld()[(i+1,13)].get_text().get_text() != '—': table.get_celld()[(i+1,13)].set_facecolor(get_color(float(table.get_celld()[(i+1, 13)].get_text().get_text()),normalize,cmap_sum)) # Header cell color cmap_sum = matplotlib.colors.LinearSegmentedColormap.from_list("", ['#648FFF','#FFFFFF','#FFB000',]) normalize = mcolors.Normalize(vmin=select_df['zone_percent'].mean()*0.7, vmax=select_df['zone_percent'].mean()*1.3) if table.get_celld()[(i+1,14)].get_text().get_text() != '—': table.get_celld()[(i+1,14)].set_facecolor(get_color(float(table.get_celld()[(i+1, 14)].get_text().get_text().strip('%')),normalize,cmap_sum)) # Header cell color cmap_sum = matplotlib.colors.LinearSegmentedColormap.from_list("", ['#648FFF','#FFFFFF','#FFB000',]) normalize = mcolors.Normalize(vmin=select_df['chase_percent'].mean()*0.7, vmax=select_df['chase_percent'].mean()*1.3) if table.get_celld()[(i+1,15)].get_text().get_text() != '—': table.get_celld()[(i+1,15)].set_facecolor(get_color(float(table.get_celld()[(i+1, 15)].get_text().get_text().strip('%')),normalize,cmap_sum)) # Header cell color cmap_sum = matplotlib.colors.LinearSegmentedColormap.from_list("", ['#648FFF','#FFFFFF','#FFB000',]) normalize = mcolors.Normalize(vmin=select_df['whiff_rate'].mean()*0.7, vmax=select_df['whiff_rate'].mean()*1.3) if table.get_celld()[(i+1,16)].get_text().get_text() != '—': table.get_celld()[(i+1,16)].set_facecolor(get_color(float(table.get_celld()[(i+1, 16)].get_text().get_text().strip('%')),normalize,cmap_sum)) # Header cell color table.get_celld()[(len(plot_table), 0)].set_text_props(color='#000000',fontweight='bold') new_column_names = ['$\\bf{Pitch\ Name}$', '$\\bf{Count}$', '$\\bf{Pitch\%}$', '$\\bf{Velocity}$', '$\\bf{iVB}$', '$\\bf{HB}$', '$\\bf{Spin}$', '$\\bf{VAA}$', '$\\bf{HAA}$', '$\\bf{vRel}$', '$\\bf{hRel}$', '$\\bf{Ext.}$', '$\\bf{Axis}$', '$\\bf{tjStuff+}$', '$\\bf{Zone\%}$', '$\\bf{Chase\%}$', '$\\bf{Whiff\%}$', ] for i, col_name in enumerate(new_column_names): table.get_celld()[(0, i)].get_text().set_text(col_name) float_list = ['start_speed','ivb', 'hb', 'vaa', 'haa', 'vertical_release','horizontal_release', 'extension'] for fl in float_list: # Subset of column names subset_columns = [fl] # Get the list of column indices column_indices = [plot_table.columns.get_loc(col) for col in subset_columns] # # print(column_indices) for row_l in range(1,len(plot_table)+1): # print(row_l) if table.get_celld()[(row_l,column_indices[0])].get_text().get_text() != '—': # print() # print(fl) table.get_celld()[(row_l,column_indices[0])].get_text().set_text('{:,.1f}'.format(float(table.get_celld()[(row_l,column_indices[0])].get_text().get_text().strip('%')))) percent_list = ['pitch_percent','zone_percent','chase_percent','whiff_rate'] for fl in percent_list: # Subset of column names subset_columns = [fl] # Get the list of column indices column_indices = [plot_table.columns.get_loc(col) for col in subset_columns] # # print(column_indices) for row_l in range(1,len(plot_table)+1): # print(row_l) if table.get_celld()[(row_l,column_indices[0])].get_text().get_text() != '—': # print(fl) table.get_celld()[(row_l,column_indices[0])].get_text().set_text('{:,.1%}'.format(float(table.get_celld()[(row_l,column_indices[0])].get_text().get_text().strip('%')))) int_list = ['tj_stuff_plus','spin_rate'] for fl in int_list: # Subset of column names subset_columns = [fl] # Get the list of column indices column_indices = [plot_table.columns.get_loc(col) for col in subset_columns] # # print(column_indices) for row_l in range(1,len(plot_table)+1): # print(row_l) if table.get_celld()[(row_l,column_indices[0])].get_text().get_text() != '—': # print(fl) table.get_celld()[(row_l,column_indices[0])].get_text().set_text('{:,.0f}'.format(float(table.get_celld()[(row_l,column_indices[0])].get_text().get_text().strip('%')))) return table ### GROUED IVB CREATION ### def group_ivb_update(df, agg_list=['pitcher_id','pitcher_name','pitcher_hand','pitch_type','pitch_description']): grouped_ivb = df.groupby(agg_list).agg( pitches = ('start_speed','count'), start_speed = ('start_speed','mean'), ivb = ('ivb','mean'), hb = ('hb','mean'), spin_rate = ('spin_rate','mean'), vaa = ('vaa','mean'), haa = ('haa','mean'), horizontal_release = ('x0','mean'), vertical_release = ('z0','mean'), extension = ('extension','mean'), spin_direction = ('spin_direction','mean'), tj_stuff_plus = ('tj_stuff_plus','mean'), swings = ('swings','sum'), in_zone = ('in_zone','sum'), out_zone = ('out_zone','sum'), whiffs = ('whiffs','sum'), zone_swing = ('zone_swing','sum'), zone_contact = ('zone_contact','sum'), ozone_swing = ('ozone_swing','sum'), ozone_contact = ('ozone_contact','sum'), ).reset_index() grouped_ivb['zone_contact_percent'] = [grouped_ivb.zone_contact[x]/grouped_ivb.zone_swing[x] if grouped_ivb.zone_swing[x] != 0 else np.nan for x in range(len(grouped_ivb))] grouped_ivb['zone_swing_percent'] = [grouped_ivb.zone_swing[x]/grouped_ivb.in_zone[x] if grouped_ivb.pitches[x] != 0 else np.nan for x in range(len(grouped_ivb))] grouped_ivb['zone_percent'] = [grouped_ivb.in_zone[x]/grouped_ivb.pitches[x] if grouped_ivb.pitches[x] != 0 else np.nan for x in range(len(grouped_ivb))] grouped_ivb['chase_percent'] = [grouped_ivb.ozone_swing[x]/(grouped_ivb.pitches[x] - grouped_ivb.in_zone[x]) if (grouped_ivb.pitches[x]- grouped_ivb.in_zone[x]) != 0 else np.nan for x in range(len(grouped_ivb))] grouped_ivb['chase_contact'] = [grouped_ivb.ozone_contact[x]/grouped_ivb.ozone_swing[x] if grouped_ivb.ozone_swing[x] != 0 else np.nan for x in range(len(grouped_ivb))] grouped_ivb['swing_percent'] = [grouped_ivb.swings[x]/grouped_ivb.pitches[x] if grouped_ivb.pitches[x] != 0 else np.nan for x in range(len(grouped_ivb))] grouped_ivb['whiff_rate'] = [grouped_ivb.whiffs[x]/grouped_ivb.swings[x] if grouped_ivb.swings[x] != 0 else np.nan for x in range(len(grouped_ivb))] grouped_ivb['swstr_rate'] = [grouped_ivb.whiffs[x]/grouped_ivb.pitches[x] if grouped_ivb.pitches[x] != 0 else np.nan for x in range(len(grouped_ivb))] return grouped_ivb ####LHH def location_plot(df,ax,hand): label_labels = df.sort_values(by=['prop','pitch_type'],ascending=[False,True]).pitch_description.unique() j = 0 for label in label_labels: subset = df[(df['pitch_description'] == label)&(df['batter_hand'] == hand)] print(label) if len(subset) >= 5: confidence_ellipse(subset['px'], subset['pz'], ax=ax,edgecolor = pitch_colours[label],n_std=1.5,facecolor= pitch_colours[label],alpha=0.3) j=j+1 else: j=j+1 pitch_location_group = df[(df['batter_hand'] == hand)].groupby(['pitch_description']).agg( pitches = ('start_speed','count'), px = ('px','mean'), pz = ('pz','mean')).reset_index() pitch_location_group['pitch_percent'] = pitch_location_group['pitches']/pitch_location_group['pitches'].sum() ## Location Plot sns.scatterplot(ax=ax,x=pitch_location_group['px'], y=pitch_location_group['pz'], hue=pitch_location_group['pitch_description'], palette=pitch_colours,ec='black', s=pitch_location_group['pitch_percent']*750, linewidth=2, zorder=2) ax.axis('square') draw_line(ax,alpha_spot=0.75,catcher_p=False) ax.axis('off') ax.set_xlim((-2.75,2.75)) ax.set_ylim((-0.5,5)) if len(pitch_location_group['px'])>0: ax.get_legend().remove() ax.grid(False) ax.set_title(f"Pitch Locations vs {hand}HB\n{pitch_location_group['pitches'].sum()} Pitches",fontdict=font_properties_titles)