Upload pitch_summary_functions.py
Browse files- pitch_summary_functions.py +48 -8
pitch_summary_functions.py
CHANGED
@@ -230,6 +230,9 @@ def percentile(n):
|
|
230 |
### TJ STUFF+ DF CLEAN ###
|
231 |
def df_clean(df):
|
232 |
df_copy = df.copy()
|
|
|
|
|
|
|
233 |
df_copy.loc[df_copy['pitcher_hand'] == 'L','hb'] *= -1
|
234 |
df_copy.loc[df_copy['pitcher_hand'] == 'L','x0'] *= -1
|
235 |
df_copy.loc[df_copy['pitcher_hand'] == 'L','spin_direction'] = 360 - df_copy.loc[df_copy['pitcher_hand'] == 'L','spin_direction']
|
@@ -292,7 +295,7 @@ def df_clean(df):
|
|
292 |
# df_copy['vaa'] = np.arctan(df_copy['z_diff'] / df_copy['release_pos_y']) * 360 / np.pi
|
293 |
# df_copy['haa'] = np.arctan(-df_copy['x_diff'] / df_copy['release_pos_y']) * 360 / np.pi
|
294 |
|
295 |
-
df_copy = df_copy.dropna(subset=['pitch_type'])
|
296 |
return df_copy
|
297 |
|
298 |
### PITCH COLOURS ###
|
@@ -726,7 +729,7 @@ def table_summary(df,
|
|
726 |
plot_table = df_group[df_group['pitcher_id']==pitcher_id].sort_values(
|
727 |
by=['pitches'],ascending=False)[['pitch_description','pitches','start_speed','ivb',
|
728 |
'hb', 'spin_rate','vaa', 'haa', 'vertical_release','horizontal_release',
|
729 |
-
'extension','tj_stuff_plus','spin_direction_adj_clock','zone_percent','chase_percent','whiff_rate']]
|
730 |
|
731 |
# if df['pitcher_hand'].values[0] == 'L':
|
732 |
# plot_table['hb'] = plot_table['hb']*-1
|
@@ -738,7 +741,7 @@ def table_summary(df,
|
|
738 |
|
739 |
plot_table = plot_table[['pitch_description','pitches','pitch_percent','start_speed','ivb',
|
740 |
'hb', 'spin_rate','vaa', 'haa', 'vertical_release','horizontal_release',
|
741 |
-
'extension','spin_direction_adj_clock','tj_stuff_plus','zone_percent','chase_percent','whiff_rate']]
|
742 |
|
743 |
plot_table_all = pd.DataFrame(data={'pitch_description': 'All',
|
744 |
'pitches': plot_table['pitches'].sum(),
|
@@ -757,18 +760,21 @@ def table_summary(df,
|
|
757 |
'zone_percent': df_group_all[df_group_all['pitcher_id']==pitcher_id]['zone_percent'].values[0],
|
758 |
'chase_percent': df_group_all[df_group_all['pitcher_id']==pitcher_id]['chase_percent'].values[0],
|
759 |
'whiff_rate': df_group_all[df_group_all['pitcher_id']==pitcher_id]['whiff_rate'].values[0],
|
|
|
760 |
|
761 |
|
762 |
},index=[0]
|
763 |
)
|
764 |
-
|
|
|
765 |
plot_table = pd.concat([plot_table,plot_table_all]).fillna('—')
|
766 |
|
767 |
|
768 |
|
769 |
plt.rcParams['font.family'] = 'Calibri'
|
|
|
770 |
table = ax.table(cellText=plot_table.values, colLabels=plot_table.columns, cellLoc='center',
|
771 |
-
colWidths=[2.3,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1], bbox=[0.04, 0, 0.92, 0.8])
|
772 |
|
773 |
min_font_size = 14
|
774 |
# Set table properties
|
@@ -777,7 +783,7 @@ def table_summary(df,
|
|
777 |
table.set_fontsize(min_font_size)
|
778 |
table.scale(1, 0.5)
|
779 |
|
780 |
-
min_font_size =
|
781 |
# Set font size for values
|
782 |
# Adjust the font size as needed
|
783 |
for i in range(len(plot_table)+1):
|
@@ -797,7 +803,8 @@ def table_summary(df,
|
|
797 |
table.get_celld()[(i+1, 0)].set_text_props(color='#ffffff',fontweight='bold')
|
798 |
if table.get_celld()[(i+1, 0)].get_text().get_text() == 'Four-Seam Fastball':
|
799 |
table.get_celld()[(i+1, 0)].get_text().set_text('4-Seam')
|
800 |
-
|
|
|
801 |
select_df = statcast_pitch_summary[statcast_pitch_summary['pitch_description'] == plot_table['pitch_description'].values[i]]
|
802 |
|
803 |
normalize = mcolors.Normalize(vmin=select_df['start_speed'].mean()-select_df.pitch_velocity_std.mean(),
|
@@ -834,6 +841,14 @@ def table_summary(df,
|
|
834 |
normalize = mcolors.Normalize(vmin=select_df['whiff_rate'].mean()*0.7, vmax=select_df['whiff_rate'].mean()*1.3)
|
835 |
if table.get_celld()[(i+1,16)].get_text().get_text() != '—':
|
836 |
table.get_celld()[(i+1,16)].set_facecolor(get_color(float(table.get_celld()[(i+1, 16)].get_text().get_text().strip('%')),normalize,cmap_sum)) # Header cell color
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
837 |
|
838 |
table.get_celld()[(len(plot_table), 0)].set_text_props(color='#000000',fontweight='bold')
|
839 |
|
@@ -856,6 +871,7 @@ def table_summary(df,
|
|
856 |
'$\\bf{Zone\%}$',
|
857 |
'$\\bf{Chase\%}$',
|
858 |
'$\\bf{Whiff\%}$',
|
|
|
859 |
]
|
860 |
|
861 |
for i, col_name in enumerate(new_column_names):
|
@@ -878,6 +894,21 @@ def table_summary(df,
|
|
878 |
# print(fl)
|
879 |
table.get_celld()[(row_l,column_indices[0])].get_text().set_text('{:,.1f}'.format(float(table.get_celld()[(row_l,column_indices[0])].get_text().get_text().strip('%'))))
|
880 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
881 |
|
882 |
|
883 |
percent_list = ['pitch_percent','zone_percent','chase_percent','whiff_rate']
|
@@ -918,7 +949,10 @@ def table_summary(df,
|
|
918 |
### GROUED IVB CREATION ###
|
919 |
def group_ivb_update(df,
|
920 |
agg_list=['pitcher_id','pitcher_name','pitcher_hand','pitch_type','pitch_description']):
|
921 |
-
|
|
|
|
|
|
|
922 |
grouped_ivb = df.groupby(agg_list).agg(
|
923 |
pitches = ('start_speed','count'),
|
924 |
|
@@ -941,6 +975,9 @@ def group_ivb_update(df,
|
|
941 |
zone_contact = ('zone_contact','sum'),
|
942 |
ozone_swing = ('ozone_swing','sum'),
|
943 |
ozone_contact = ('ozone_contact','sum'),
|
|
|
|
|
|
|
944 |
).reset_index()
|
945 |
|
946 |
|
@@ -960,6 +997,9 @@ def group_ivb_update(df,
|
|
960 |
|
961 |
grouped_ivb['swstr_rate'] = [grouped_ivb.whiffs[x]/grouped_ivb.pitches[x] if grouped_ivb.pitches[x] != 0 else np.nan for x in range(len(grouped_ivb))]
|
962 |
|
|
|
|
|
|
|
963 |
return grouped_ivb
|
964 |
|
965 |
|
|
|
230 |
### TJ STUFF+ DF CLEAN ###
|
231 |
def df_clean(df):
|
232 |
df_copy = df.copy()
|
233 |
+
|
234 |
+
df_copy = df_copy[(df_copy['spin_rate']>0)&(df_copy['extension']>0)]
|
235 |
+
|
236 |
df_copy.loc[df_copy['pitcher_hand'] == 'L','hb'] *= -1
|
237 |
df_copy.loc[df_copy['pitcher_hand'] == 'L','x0'] *= -1
|
238 |
df_copy.loc[df_copy['pitcher_hand'] == 'L','spin_direction'] = 360 - df_copy.loc[df_copy['pitcher_hand'] == 'L','spin_direction']
|
|
|
295 |
# df_copy['vaa'] = np.arctan(df_copy['z_diff'] / df_copy['release_pos_y']) * 360 / np.pi
|
296 |
# df_copy['haa'] = np.arctan(-df_copy['x_diff'] / df_copy['release_pos_y']) * 360 / np.pi
|
297 |
|
298 |
+
df_copy = df_copy.dropna(subset=['pitch_type'])#.fillna(0)
|
299 |
return df_copy
|
300 |
|
301 |
### PITCH COLOURS ###
|
|
|
729 |
plot_table = df_group[df_group['pitcher_id']==pitcher_id].sort_values(
|
730 |
by=['pitches'],ascending=False)[['pitch_description','pitches','start_speed','ivb',
|
731 |
'hb', 'spin_rate','vaa', 'haa', 'vertical_release','horizontal_release',
|
732 |
+
'extension','tj_stuff_plus','spin_direction_adj_clock','zone_percent','chase_percent','whiff_rate','xwobacon']]
|
733 |
|
734 |
# if df['pitcher_hand'].values[0] == 'L':
|
735 |
# plot_table['hb'] = plot_table['hb']*-1
|
|
|
741 |
|
742 |
plot_table = plot_table[['pitch_description','pitches','pitch_percent','start_speed','ivb',
|
743 |
'hb', 'spin_rate','vaa', 'haa', 'vertical_release','horizontal_release',
|
744 |
+
'extension','spin_direction_adj_clock','tj_stuff_plus','zone_percent','chase_percent','whiff_rate','xwobacon']]
|
745 |
|
746 |
plot_table_all = pd.DataFrame(data={'pitch_description': 'All',
|
747 |
'pitches': plot_table['pitches'].sum(),
|
|
|
760 |
'zone_percent': df_group_all[df_group_all['pitcher_id']==pitcher_id]['zone_percent'].values[0],
|
761 |
'chase_percent': df_group_all[df_group_all['pitcher_id']==pitcher_id]['chase_percent'].values[0],
|
762 |
'whiff_rate': df_group_all[df_group_all['pitcher_id']==pitcher_id]['whiff_rate'].values[0],
|
763 |
+
'xwobacon': df_group_all[df_group_all['pitcher_id']==pitcher_id]['xwobacon'].values[0],
|
764 |
|
765 |
|
766 |
},index=[0]
|
767 |
)
|
768 |
+
print('LOOK HERE')
|
769 |
+
print(plot_table)
|
770 |
plot_table = pd.concat([plot_table,plot_table_all]).fillna('—')
|
771 |
|
772 |
|
773 |
|
774 |
plt.rcParams['font.family'] = 'Calibri'
|
775 |
+
|
776 |
table = ax.table(cellText=plot_table.values, colLabels=plot_table.columns, cellLoc='center',
|
777 |
+
colWidths=[2.3,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1], bbox=[0.04, 0, 0.92, 0.8])
|
778 |
|
779 |
min_font_size = 14
|
780 |
# Set table properties
|
|
|
783 |
table.set_fontsize(min_font_size)
|
784 |
table.scale(1, 0.5)
|
785 |
|
786 |
+
min_font_size = 20
|
787 |
# Set font size for values
|
788 |
# Adjust the font size as needed
|
789 |
for i in range(len(plot_table)+1):
|
|
|
803 |
table.get_celld()[(i+1, 0)].set_text_props(color='#ffffff',fontweight='bold')
|
804 |
if table.get_celld()[(i+1, 0)].get_text().get_text() == 'Four-Seam Fastball':
|
805 |
table.get_celld()[(i+1, 0)].get_text().set_text('4-Seam')
|
806 |
+
print('LOOK HERE 3')
|
807 |
+
print(statcast_pitch_summary)
|
808 |
select_df = statcast_pitch_summary[statcast_pitch_summary['pitch_description'] == plot_table['pitch_description'].values[i]]
|
809 |
|
810 |
normalize = mcolors.Normalize(vmin=select_df['start_speed'].mean()-select_df.pitch_velocity_std.mean(),
|
|
|
841 |
normalize = mcolors.Normalize(vmin=select_df['whiff_rate'].mean()*0.7, vmax=select_df['whiff_rate'].mean()*1.3)
|
842 |
if table.get_celld()[(i+1,16)].get_text().get_text() != '—':
|
843 |
table.get_celld()[(i+1,16)].set_facecolor(get_color(float(table.get_celld()[(i+1, 16)].get_text().get_text().strip('%')),normalize,cmap_sum)) # Header cell color
|
844 |
+
print("LOOK HERE")
|
845 |
+
print(select_df)
|
846 |
+
cmap_sum = matplotlib.colors.LinearSegmentedColormap.from_list("", ['#FFB000','#FFFFFF','#648FFF',])
|
847 |
+
normalize = mcolors.Normalize(vmin=select_df['xwobacon'].mean()*0.7, vmax=select_df['xwobacon'].mean()*1.3)
|
848 |
+
if table.get_celld()[(i+1,17)].get_text().get_text() != '—':
|
849 |
+
table.get_celld()[(i+1,17)].set_facecolor(get_color(float(table.get_celld()[(i+1, 17)].get_text().get_text().strip('%')),normalize,cmap_sum)) # Header cell color
|
850 |
+
|
851 |
+
|
852 |
|
853 |
table.get_celld()[(len(plot_table), 0)].set_text_props(color='#000000',fontweight='bold')
|
854 |
|
|
|
871 |
'$\\bf{Zone\%}$',
|
872 |
'$\\bf{Chase\%}$',
|
873 |
'$\\bf{Whiff\%}$',
|
874 |
+
'$\\bf{xwOBA}$\n$\\bf{Contact}$',
|
875 |
]
|
876 |
|
877 |
for i, col_name in enumerate(new_column_names):
|
|
|
894 |
# print(fl)
|
895 |
table.get_celld()[(row_l,column_indices[0])].get_text().set_text('{:,.1f}'.format(float(table.get_celld()[(row_l,column_indices[0])].get_text().get_text().strip('%'))))
|
896 |
|
897 |
+
float_3_list = ['xwobacon']
|
898 |
+
for fl in float_3_list:
|
899 |
+
# Subset of column names
|
900 |
+
subset_columns = [fl]
|
901 |
+
|
902 |
+
# Get the list of column indices
|
903 |
+
column_indices = [plot_table.columns.get_loc(col) for col in subset_columns]
|
904 |
+
|
905 |
+
# # print(column_indices)
|
906 |
+
for row_l in range(1,len(plot_table)+1):
|
907 |
+
# print(row_l)
|
908 |
+
if table.get_celld()[(row_l,column_indices[0])].get_text().get_text() != '—':
|
909 |
+
# print()
|
910 |
+
# print(fl)
|
911 |
+
table.get_celld()[(row_l,column_indices[0])].get_text().set_text('{:,.3f}'.format(float(table.get_celld()[(row_l,column_indices[0])].get_text().get_text().strip('%'))))
|
912 |
|
913 |
|
914 |
percent_list = ['pitch_percent','zone_percent','chase_percent','whiff_rate']
|
|
|
949 |
### GROUED IVB CREATION ###
|
950 |
def group_ivb_update(df,
|
951 |
agg_list=['pitcher_id','pitcher_name','pitcher_hand','pitch_type','pitch_description']):
|
952 |
+
|
953 |
+
|
954 |
+
#df.loc[df['launch_speed']==0,'launch_speed'] = np.nan
|
955 |
+
|
956 |
grouped_ivb = df.groupby(agg_list).agg(
|
957 |
pitches = ('start_speed','count'),
|
958 |
|
|
|
975 |
zone_contact = ('zone_contact','sum'),
|
976 |
ozone_swing = ('ozone_swing','sum'),
|
977 |
ozone_contact = ('ozone_contact','sum'),
|
978 |
+
woba_pred = ('woba_pred','sum'),
|
979 |
+
bip = ('launch_speed','count'),
|
980 |
+
|
981 |
).reset_index()
|
982 |
|
983 |
|
|
|
997 |
|
998 |
grouped_ivb['swstr_rate'] = [grouped_ivb.whiffs[x]/grouped_ivb.pitches[x] if grouped_ivb.pitches[x] != 0 else np.nan for x in range(len(grouped_ivb))]
|
999 |
|
1000 |
+
|
1001 |
+
grouped_ivb['xwobacon'] = [grouped_ivb.woba_pred[x]/grouped_ivb.bip[x] if grouped_ivb.bip[x] != 0 else np.nan for x in range(len(grouped_ivb))]
|
1002 |
+
|
1003 |
return grouped_ivb
|
1004 |
|
1005 |
|