from shiny import App, Inputs, Outputs, Session, reactive, render, req, ui import datasets from datasets import load_dataset import pandas as pd import numpy as np import matplotlib.pyplot as plt import seaborn as sns import numpy as np from scipy.stats import gaussian_kde import matplotlib from matplotlib.ticker import MaxNLocator from matplotlib.gridspec import GridSpec from scipy.stats import zscore import math import matplotlib from adjustText import adjust_text import matplotlib.ticker as mtick from shinywidgets import output_widget, render_widget import pandas as pd from configure import base_url import shinyswatch ### Import Datasets dataset = load_dataset('nesticot/mlb_data', data_files=['mlb_pitch_data_2023.csv' ]) dataset_train = dataset['train'] df_2023 = dataset_train.to_pandas().set_index(list(dataset_train.features.keys())[0]).reset_index(drop=True) print(df_2023) ### Normalize Hit Locations df_2023['season'] = df_2023['game_date'].str[0:4].astype(int) # df_2023['hit_x'] = df_2023['hit_x'] - df_2023['hit_x'].median() # df_2023['hit_y'] = -df_2023['hit_y']+df_2023['hit_y'].quantile(0.9999) df_2023['hit_x'] = df_2023['hit_x'] - 126#df_2023['hit_x'].median() df_2023['hit_y'] = -df_2023['hit_y']+204.5#df_2023['hit_y'].quantile(0.9999) df_2023['hit_x_og'] = df_2023['hit_x'] df_2023.loc[df_2023['batter_hand'] == 'R','hit_x'] = -1*df_2023.loc[df_2023['batter_hand'] == 'R','hit_x'] df_2023['h_la'] = np.arctan(df_2023['hit_x'] / df_2023['hit_y'])*180/np.pi conditions_ss = [ (df_2023['h_la']<-15), (df_2023['h_la']<15)&(df_2023['h_la']>=-15), (df_2023['h_la']>=15) ] choices_ss = ['Oppo','Straight','Pull'] df_2023['traj'] = np.select(conditions_ss, choices_ss, default=np.nan) df_2023['bip'] = [1 if x > 0 else np.nan for x in df_2023['launch_speed']] conditions_woba = [ (df_2023['event_type']=='walk'), (df_2023['event_type']=='hit_by_pitch'), (df_2023['event_type']=='single'), (df_2023['event_type']=='double'), (df_2023['event_type']=='triple'), (df_2023['event_type']=='home_run'), ] choices_woba = [1, 1, 1, 2, 3, 4] # choices_woba = [0.698, # 0.728, # 0.887, # 1.253, # 1.583, # 2.027] df_2023['woba'] = np.select(conditions_woba, choices_woba, default=0) choices_woba_train = [1, 1, 1, 2, 3, 4] df_2023['woba_train'] = np.select(conditions_woba, choices_woba_train, default=0) df_2023_bip = df_2023[~df_2023['bip'].isnull()].dropna(subset=['h_la','launch_angle']) df_2023_bip['h_la'] = df_2023_bip['h_la'].round(0) df_2023_bip['season'] = df_2023_bip['game_date'].str[0:4].astype(int) df_2023_bip = df_2023[~df_2023['bip'].isnull()].dropna(subset=['launch_angle','bip']) df_2023_bip_train = df_2023_bip[df_2023_bip['season'] == 2023] batter_dict = df_2023_bip.sort_values('batter_name').set_index('batter_id')['batter_name'].to_dict() features = ['launch_angle','launch_speed','h_la'] target = ['woba_train'] df_2023_bip_train = df_2023_bip_train.dropna(subset=features) import joblib # # Dump the model to a file named 'model.joblib' model = joblib.load('xtb_model.joblib') df_2023_bip_train['y_pred'] = [sum(x) for x in model.predict_proba(df_2023_bip_train[features]) * ([0,1,2,3,4])] # df_2023_bip_train['y_pred_noh'] = [sum(x) for x in model_noh.predict_proba(df_2023_bip_train[['launch_angle','launch_speed']]) * ([0,0.887,1.253,1.583,2.027])] df_2023_output = df_2023_bip_train.groupby(['batter_id','batter_name']).agg( bip = ('y_pred','count'), y_pred = ('y_pred','sum'), slgcon = ('woba','mean'), xslgcon = ('y_pred','mean'), launch_speed = ('launch_speed','mean'), launch_angle_std = ('launch_angle','median'), h_la_std = ('h_la','mean')) df_2023_output_copy = df_2023_output.copy() # df_2023_output = df_2023_output[df_2023_output['bip'] > 100] # df_2023_output[df_2023_output['bip'] > 100].sort_values(by='h_la_std',ascending=True).head(20) import pandas as pd import numpy as np # Create grid coordinates x = np.arange(30, 121,1 ) y = np.arange(-30, 61,1 ) z = np.arange(-45, 46,1 ) # Create a meshgrid X, Y, Z = np.meshgrid(x, y, z, indexing='ij') # Flatten the meshgrid to get x and y coordinates x_flat = X.flatten() y_flat = Y.flatten() z_flat = Z.flatten() # Create a DataFrame df = pd.DataFrame({'launch_speed': x_flat, 'launch_angle': y_flat,'h_la':z_flat}) df['y_pred'] = [sum(x) for x in model.predict_proba(df[features]) * ([0,1,2,3,4])] import matplotlib colour_palette = ['#FFB000','#648FFF','#785EF0', '#DC267F','#FE6100','#3D1EB2','#894D80','#16AA02','#B5592B','#A3C1ED'] cmap_hue = matplotlib.colors.LinearSegmentedColormap.from_list("", [colour_palette[1],'#ffffff',colour_palette[0]]) cmap_hue2 = matplotlib.colors.LinearSegmentedColormap.from_list("",['#ffffff',colour_palette[0]]) from matplotlib.pyplot import text import inflect from scipy.stats import percentileofscore p = inflect.engine() def server(input,output,session): @output @render.plot(alt="hex_plot") @reactive.event(input.go, ignore_none=False) def hex_plot(): if input.batter_id() is "": fig = plt.figure(figsize=(12, 12)) fig.text(s='Please Select a Batter',x=0.5,y=0.5) return batter_select_id = int(input.batter_id()) # batter_select_name = 'Edouard Julien' quant = int(input.quant())/100 df_batter_og = df_2023_bip_train[df_2023_bip_train['batter_id']==batter_select_id] # df_batter_og = df_2023_bip_train[df_2023_bip_train['batter_name']==batter_select_name] df_batter = df_batter_og[df_batter_og['launch_speed'] >= df_batter_og['launch_speed'].quantile(quant)] # df_batter_best_speed = df_batter['launch_speed'].mean().round() # df_bip_league = df_2023_bip_train[df_2023_bip_train['launch_speed'] >= df_2023_bip_train['launch_speed'].quantile(quant)] import pandas as pd import numpy as np # Create grid coordinates #x = np.arange(30, 121,1 ) y_b = np.arange(df_batter['launch_angle'].median()-df_batter['launch_angle'].std(), df_batter['launch_angle'].median()+df_batter['launch_angle'].std(),1 ) z_b = np.arange(df_batter['h_la'].median()-df_batter['h_la'].std(), df_batter['h_la'].median()+df_batter['h_la'].std(),1 ) # Create a meshgrid Y_b, Z_b = np.meshgrid( y_b,z_b, indexing='ij') # Flatten the meshgrid to get x and y coordinates y_flat_b = Y_b.flatten() z_flat_b = Z_b.flatten() # Create a DataFrame df_batter_base = pd.DataFrame({'launch_angle': y_flat_b,'h_la':z_flat_b,'c':[0]*len(y_flat_b)}) # df_batter_base['y_pred'] = [sum(x) for x in model.predict_proba(df_batter_base[features]) * ([0,1,2,3,4])] from matplotlib.gridspec import GridSpec # fig,ax = plt.subplots(figsize=(12, 12),dpi=150) fig = plt.figure(figsize=(12,12)) gs = GridSpec(4, 3, height_ratios=[0.5,10,1.5,0.2], width_ratios=[0.05,0.9,0.05]) axheader = fig.add_subplot(gs[0, :]) ax10 = fig.add_subplot(gs[1, 0]) ax = fig.add_subplot(gs[1, 1]) # Subplot at the top-right position ax12 = fig.add_subplot(gs[1, 2]) ax2_ = fig.add_subplot(gs[2, :]) axfooter1 = fig.add_subplot(gs[-1, :]) axheader.axis('off') ax10.axis('off') ax12.axis('off') ax2_.axis('off') axfooter1.axis('off') extents = [-45,45,-30,60] def hexLines(a=None,i=None,off=[0,0]): '''regular hexagon segment lines as `(xy1,xy2)` in clockwise order with points in line sorted top to bottom for irregular hexagon pass both `a` (vertical) and `i` (horizontal)''' if a is None: a = 2 / np.sqrt(3) * i; if i is None: i = np.sqrt(3) / 2 * a; h = a / 2 xy = np.array([ [ [ 0, a], [ i, h] ], [ [ i, h], [ i,-h] ], [ [ i,-h], [ 0,-a] ], [ [-i,-h], [ 0,-a] ], #flipped [ [-i, h], [-i,-h] ], #flipped [ [ 0, a], [-i, h] ] #flipped ]) return xy+off; h = ax.hexbin(x=df_batter_base['h_la'], y=df_batter_base['launch_angle'], gridsize=25, edgecolors='k', extent=extents,mincnt=1,lw=2,zorder=-3,) # cfg = {**cfg,'vmin':h.get_clim()[0], 'vmax':h.get_clim()[1]} # plt.hexbin( ec="black" ,lw=6,zorder=4,mincnt=2,**cfg,alpha=0.1) # plt.hexbin( ec="#ffffff",lw=1,zorder=5,mincnt=2,**cfg,alpha=0.1) ax.hexbin(x=df[(df['launch_angle']>=-30)&(df['launch_angle']<=60)&(df['launch_speed']>=df_batter['launch_speed'].median())&(df['launch_speed']<=df_batter['launch_speed'].max())]['h_la'], y=df[(df['launch_angle']>=-30)&(df['launch_angle']<=60)&(df['launch_speed']>=df_batter['launch_speed'].median())&(df['launch_speed']<=df_batter['launch_speed'].max())]['launch_angle'], C=df[(df['launch_angle']>=-30)&(df['launch_angle']<=60)&(df['launch_speed']>=df_batter['launch_speed'].median())&(df['launch_speed']<=df_batter['launch_speed'].max())]['y_pred'], gridsize=25, vmin=0, vmax=4, cmap=cmap_hue2, extent=extents,zorder=-3) # Get the counts and centers of the hexagons counts = ax.hexbin(x=df[(df['launch_angle']>=-30)&(df['launch_angle']<=60)&(df['launch_speed']>=df_batter['launch_speed'].median())&(df['launch_speed']<=df_batter['launch_speed'].max())]['h_la'], y=df[(df['launch_angle']>=-30)&(df['launch_angle']<=60)&(df['launch_speed']>=df_batter['launch_speed'].median())&(df['launch_speed']<=df_batter['launch_speed'].max())]['launch_angle'], C=df[(df['launch_angle']>=-30)&(df['launch_angle']<=60)&(df['launch_speed']>=df_batter['launch_speed'].median())&(df['launch_speed']<=df_batter['launch_speed'].max())]['y_pred'], gridsize=25, vmin=0, vmax=4, cmap=cmap_hue2, extent=extents).get_array() bin_centers = ax.hexbin(x=df[(df['launch_angle']>=-30)&(df['launch_angle']<=60)&(df['launch_speed']>=df_batter['launch_speed'].median())&(df['launch_speed']<=df_batter['launch_speed'].max())]['h_la'], y=df[(df['launch_angle']>=-30)&(df['launch_angle']<=60)&(df['launch_speed']>=df_batter['launch_speed'].median())&(df['launch_speed']<=df_batter['launch_speed'].max())]['launch_angle'], C=df[(df['launch_angle']>=-30)&(df['launch_angle']<=60)&(df['launch_speed']>=df_batter['launch_speed'].median())&(df['launch_speed']<=df_batter['launch_speed'].max())]['y_pred'], gridsize=25, vmin=0, vmax=4, cmap=cmap_hue2, extent=extents).get_offsets() # Add text with the values of "C" to each hexagon for count, (x, y) in zip(counts, bin_centers): if count >= 1: ax.text(x, y, f'{count:.1f}', color='black', ha='center', va='center',fontsize=7) #get hexagon centers that should be highlighted verts = h.get_offsets() cnts = h.get_array() highl = verts[cnts > .5*cnts.max()] #create hexagon lines a = ((verts[0,1]-verts[1,1])/3).round(6) i = ((verts[1:,0]-verts[:-1,0])/2).round(6) i = i[i>0][0] lines = np.concatenate([hexLines(a,i,off) for off in highl]) #select contour lines and draw uls,c = np.unique(lines.round(4),axis=0,return_counts=True) for l in uls[c==1]: ax.plot(*l.transpose(),'w-',lw=2,scalex=False,scaley=False,color=colour_palette[1],zorder=100) # Plot filled hexagons for hc in highl: hx = hc[0] + np.array([0, i, i, 0, -i, -i]) hy = hc[1] + np.array([a, a/2, -a/2, -a, -a/2, a/2]) ax.fill(hx, hy, color=colour_palette[1], alpha=0.15, edgecolor=None) # Adjust color and alpha as needed # # Create grid coordinates # #x = np.arange(30, 121,1 ) # y_b = np.arange(df_bip_league['launch_angle'].median()-df_bip_league['launch_angle'].std(), # df_bip_league['launch_angle'].median()+df_bip_league['launch_angle'].std(),1 ) # z_b = np.arange(df_bip_league['h_la'].median()-df_bip_league['h_la'].std(), # df_bip_league['h_la'].median()+df_bip_league['h_la'].std(),1 ) # # Create a meshgrid # Y_b, Z_b = np.meshgrid( y_b,z_b, indexing='ij') # # Flatten the meshgrid to get x and y coordinates # y_flat_b = Y_b.flatten() # z_flat_b = Z_b.flatten() # # Create a DataFrame # df_league_base = pd.DataFrame({'launch_angle': y_flat_b,'h_la':z_flat_b,'c':[0]*len(y_flat_b)}) # h_league = ax.hexbin(x=df_league_base['h_la'], # y=df_league_base['launch_angle'], # gridsize=25, # edgecolors=colour_palette[1], # extent=extents,mincnt=1,lw=2,zorder=-3,) # #get hexagon centers that should be highlighted # verts = h_league.get_offsets() # cnts = h_league.get_array() # highl = verts[cnts > .5*cnts.max()] # #create hexagon lines # a = ((verts[0,1]-verts[1,1])/3).round(6) # i = ((verts[1:,0]-verts[:-1,0])/2).round(6) # i = i[i>0][0] # lines = np.concatenate([hexLines(a,i,off) for off in highl]) # #select contour lines and draw # uls,c = np.unique(lines.round(4),axis=0,return_counts=True) # for l in uls[c==1]: ax.plot(*l.transpose(),'w-',lw=2,scalex=False,scaley=False,color=colour_palette[3],zorder=99) axheader.text(s=f"{df_batter['batter_name'].values[0]} - {int(quant*100)}th% EV and Greater Batted Ball Tendencies",x=0.5,y=0.2,fontsize=20,ha='center',va='bottom') axheader.text(s=f"2023 Season",x=0.5,y=-0.1,fontsize=14,ha='center',va='top') ax.set_xlabel(f"Horizontal Spray Angle (°)",fontsize=12) ax.set_ylabel(f"Vertical Launch Angle (°)",fontsize=12) ax2_.text(x=0.5, y=0.0, s="Notes:\n" \ f"- {int(quant*100)}th% EV and Greater BBE is defined as a batter's top {100 - int(quant*100)}% hardest hit BBE\n" \ f"- Colour Scale and Number Labels Represents the Expected Total Bases for a batter's range of Best Speeds\n" \ f"- Shaded Area Represents the 2-D Region bounded by ±1σ Launch Angle and Horizontal Spray Angle on batter's Best Speed BBE\n"\ f"- {df_batter['batter_name'].values[0]} {int(quant*100)}th% EV and Greater BBE Range from {df_batter['launch_speed'].min():.0f} to {df_batter['launch_speed'].max():.0f} mph ({len(df_batter)} BBE)\n"\ f"- Positive Horizontal Spray Angle Represents a BBE hit in same direction as batter handedness (i.e. Pulled)" , fontsize=11, fontstyle='oblique', va='bottom', ha='center', bbox=dict(facecolor='white', edgecolor='black'),ma='left') axfooter1.text(0.05, 0.5, "By: Thomas Nestico\n @TJStats",ha='left', va='bottom',fontsize=12) axfooter1.text(0.95, 0.5, "Data: MLB",ha='right', va='bottom',fontsize=12) if df_batter['batter_hand'].values[0] == 'R': ax.invert_xaxis() ax.grid(False) ax.axis('equal') # Adjusting subplot to center it within the figure fig.subplots_adjust(left=0.01, right=0.99, top=0.975, bottom=0.025) #ax.text(f"Vertical Spray Angle (°)") @output @render.plot(alt="roll_plot") @reactive.event(input.go, ignore_none=False) def roll_plot(): # player_select = 'Nolan Gorman' # player_select_full =player_select if input.batter_id() is "": fig = plt.figure(figsize=(12, 12)) fig.text(s='Please Select a Batter',x=0.5,y=0.5) return # df_will = df_model_2023[df_model_2023.batter_name == player_select].sort_values(by=['game_date','start_time']) # df_will = df_will[df_will['is_swing'] != 1] batter_select_id = int(input.batter_id()) # batter_select_name = 'Edouard Julien' df_batter_og = df_2023_bip_train[df_2023_bip_train['batter_id']==batter_select_id] batter_select_name = df_batter_og['batter_name'].values[0] win = min(int(input.rolling_window()),len(df_batter_og)) df_2023_output = df_2023_output_copy[df_2023_output_copy['bip'] >= win] sns.set_theme(style="whitegrid", palette="pastel") #fig, ax = plt.subplots(1, 1, figsize=(10, 10),dpi=300) from matplotlib.gridspec import GridSpec # fig,ax = plt.subplots(figsize=(12, 12),dpi=150) fig = plt.figure(figsize=(12,12)) gs = GridSpec(3, 3, height_ratios=[0.3,10,0.2], width_ratios=[0.01,2,0.01]) axheader = fig.add_subplot(gs[0, :]) ax10 = fig.add_subplot(gs[1, 0]) ax = fig.add_subplot(gs[1, 1]) # Subplot at the top-right position ax12 = fig.add_subplot(gs[1, 2]) axfooter1 = fig.add_subplot(gs[-1, :]) axheader.axis('off') ax10.axis('off') ax12.axis('off') axfooter1.axis('off') sns.lineplot( x= range(win,len(df_batter_og.y_pred.rolling(window=win).mean())+1), y= df_batter_og.y_pred.rolling(window=win).mean().dropna(), color=colour_palette[0],linewidth=2,ax=ax) ax.hlines(y=df_batter_og.y_pred.mean(),xmin=win,xmax=len(df_batter_og),color=colour_palette[0],linestyle='--', label=f'{batter_select_name} Average: {df_batter_og.y_pred.mean():.3f} xSLGCON ({p.ordinal(int(np.around(percentileofscore(df_2023_output["xslgcon"],df_batter_og.y_pred.mean(), kind="strict"))))} Percentile)') # ax.hlines(y=df_model_2023.y_pred_no_swing.std()*100,xmin=win,xmax=len(df_will)) # sns.scatterplot( x= [976], # y= df_will.y_pred.rolling(window=win).mean().min()*100, # color=colour_palette[0],linewidth=2,ax=ax,zorder=100,s=100,edgecolor=colour_palette[7]) ax.hlines(y=df_2023_bip_train['y_pred'].mean(),xmin=win,xmax=len(df_batter_og),color=colour_palette[1],linestyle='-.',alpha=1, label = f'MLB Average: {df_2023_bip_train["y_pred"].mean():.3f} xSLGCON') ax.legend() hard_hit_dates = [df_2023_output['xslgcon'].quantile(0.9), df_2023_output['xslgcon'].quantile(0.75), df_2023_output['xslgcon'].quantile(0.25), df_2023_output['xslgcon'].quantile(0.1)] ax.hlines(y=df_2023_output['xslgcon'].quantile(0.9),xmin=win,xmax=len(df_batter_og),color=colour_palette[2],linestyle='dotted',alpha=0.5,zorder=1) ax.hlines(y=df_2023_output['xslgcon'].quantile(0.75),xmin=win,xmax=len(df_batter_og),color=colour_palette[3],linestyle='dotted',alpha=0.5,zorder=1) ax.hlines(y=df_2023_output['xslgcon'].quantile(0.25),xmin=win,xmax=len(df_batter_og),color=colour_palette[4],linestyle='dotted',alpha=0.5,zorder=1) ax.hlines(y=df_2023_output['xslgcon'].quantile(0.1),xmin=win,xmax=len(df_batter_og),color=colour_palette[5],linestyle='dotted',alpha=0.5,zorder=1) hard_hit_text = ['90th %','75th %','25th %','10th %'] for i, x in enumerate(hard_hit_dates): ax.text(min(win+win/50,win+win+5), x ,hard_hit_text[i], rotation=0,va='center', ha='left', bbox=dict(facecolor='white',alpha=0.7, edgecolor=colour_palette[2+i], pad=2),zorder=11) # # Annotate with an arrow # ax.annotate('June 6, 2023\nSeason Worst Decision Value', xy=(976, df_will.y_pred.rolling(window=win).mean().min()*100-0.03), # xytext=(976 - 150, df_will.y_pred.rolling(window=win).mean().min()*100 - 0.2), # arrowprops=dict(facecolor=colour_palette[7], shrink=0.01),zorder=150,fontsize=10, # bbox=dict(facecolor='white', edgecolor='black'),va='top') ax.set_xlim(win,len(df_batter_og)) # ax.set_ylim(0.2,max(1,)) ax.set_yticks([0.2,0.3,0.4,0.5,0.6,0.7,0.8,0.9,1]) ax.set_xlabel('Balls In Play') ax.set_ylabel('Expected Total Bases per Ball In Play (xSLGCON)') from matplotlib.ticker import FormatStrFormatter ax.yaxis.set_major_formatter(FormatStrFormatter('%.3f')) axheader.text(s=f'{batter_select_name} - MLB - {win} Rolling BIP Expected Slugging on Contact (xSLGCON)',x=0.5,y=-0.5,ha='center',va='bottom',fontsize=14) axfooter1.text(.05, 0.2, "By: Thomas Nestico",ha='left', va='bottom',fontsize=12) axfooter1.text(0.95, 0.2, "Data: MLB",ha='right', va='bottom',fontsize=12) fig.subplots_adjust(left=0.01, right=0.99, top=0.98, bottom=0.02) damage = App(ui.page_fluid( ui.tags.base(href=base_url), ui.tags.div( {"style": "width:95%;margin: 0 auto;max-width: 1600px;"}, ui.tags.style( """ h4 { margin-top: 1em;font-size:35px; } h2{ font-size:25px; } """ ), shinyswatch.theme.simplex(), ui.tags.h4("TJStats"), ui.tags.i("Baseball Analytics and Visualizations"), ui.markdown("""Support me on Patreon for Access to 2024 Apps1"""), ui.navset_tab( ui.nav_control( ui.a( "Home", href="home/" ), ), ui.nav_menu( "Batter Charts", ui.nav_control( ui.a( "Batting Rolling", href="rolling_batter/" ), ui.a( "Spray", href="spray/" ), ui.a( "Decision Value", href="decision_value/" ), ui.a( "Damage Model", href="damage_model/" ), ui.a( "Batter Scatter", href="batter_scatter/" ), # ui.a( # "EV vs LA Plot", # href="ev_angle/" # ), ui.a( "Statcast Compare", href="statcast_compare/" ) ), ), ui.nav_menu( "Pitcher Charts", ui.nav_control( ui.a( "Pitcher Rolling", href="rolling_pitcher/" ), ui.a( "Pitcher Summary", href="pitching_summary_graphic_new/" ), ui.a( "Pitcher Scatter", href="pitcher_scatter/" ) ), )),ui.row( ui.layout_sidebar( ui.panel_sidebar( ui.input_select("batter_id", "Select Batter", batter_dict, width=1, size=1, selectize=True), ui.input_numeric("quant", "Select Percentile", value=50, min=0,max=100), ui.input_numeric("rolling_window", "Select Rolling Window", value=50, min=1), ui.input_action_button("go", "Generate",class_="btn-primary")), ui.panel_main( ui.navset_tab( ui.nav("Damage Hex", ui.output_plot('hex_plot', width='1200px', height='1200px')), ui.nav("Damage Roll", ui.output_plot('roll_plot', width='1200px', height='1200px')) )) )),)),server)