2024_mlb_pitch_heat_maps / decision_value.py
nesticot's picture
Upload 13 files
30629a5 verified
raw
history blame
34.1 kB
from shiny import App, Inputs, Outputs, Session, reactive, render, req, ui
import datasets
from datasets import load_dataset
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import seaborn as sns
import numpy as np
from scipy.stats import gaussian_kde
import matplotlib
from matplotlib.ticker import MaxNLocator
from matplotlib.gridspec import GridSpec
from scipy.stats import zscore
import math
import matplotlib
from adjustText import adjust_text
import matplotlib.ticker as mtick
from shinywidgets import output_widget, render_widget
import pandas as pd
from configure import base_url
import shinyswatch
### Import Datasets
dataset = load_dataset('nesticot/mlb_data', data_files=['mlb_pitch_data_2023.csv' ])
dataset_train = dataset['train']
df_2023_mlb = dataset_train.to_pandas().set_index(list(dataset_train.features.keys())[0]).reset_index(drop=True)
### Import Datasets
dataset = load_dataset('nesticot/mlb_data', data_files=['aaa_pitch_data_2023.csv' ])
dataset_train = dataset['train']
df_2023_aaa = dataset_train.to_pandas().set_index(list(dataset_train.features.keys())[0]).reset_index(drop=True)
df_2023_mlb['level'] = 'MLB'
df_2023_aaa['level'] = 'AAA'
df_2023 = pd.concat([df_2023_mlb,df_2023_aaa])
#print(df_2023)
### Normalize Hit Locations
import joblib
swing_model = joblib.load('swing.joblib')
no_swing_model = joblib.load('no_swing.joblib')
# Now you can use the loaded model for prediction or any other task
batter_dict = df_2023.sort_values('batter_name').set_index('batter_id')['batter_name'].to_dict()
## Make Predictions
## Define Features and Target
features = ['px','pz','strikes','balls']
## Set up 2023 Data for Prediction of Run Expectancy
df_model_2023_no_swing = df_2023[df_2023.is_swing != 1].dropna(subset=features)
df_model_2023_swing = df_2023[df_2023.is_swing == 1].dropna(subset=features)
import xgboost as xgb
df_model_2023_no_swing['y_pred'] = no_swing_model.predict(xgb.DMatrix(df_model_2023_no_swing[features]))
df_model_2023_swing['y_pred'] = swing_model.predict(xgb.DMatrix(df_model_2023_swing[features]))
df_model_2023 = pd.concat([df_model_2023_no_swing,df_model_2023_swing])
import joblib
# # Dump the model to a file named 'model.joblib'
# model = joblib.load('xtb_model.joblib')
# ## Create a Dataset to calculate xRV/100 Pitches
# df_model_2023['pitcher_name'] = df_model_2023.pitcher.map(pitcher_dict)
# df_model_2023['player_team'] = df_model_2023.batter.map(team_player_dict)
df_model_2023_group = df_model_2023.groupby(['batter_id','batter_name','level']).agg(
pitches = ('start_speed','count'),
y_pred = ('y_pred','mean'),
)
## Minimum 500 pitches faced
#min_pitches = 300
#df_model_2023_group = df_model_2023_group[df_model_2023_group.pitches >= min_pitches]
## Calculate 20-80 Scale
df_model_2023_group['decision_value'] = zscore(df_model_2023_group['y_pred'])
df_model_2023_group['decision_value'] = (50+df_model_2023_group['decision_value']*10)
## Create a Dataset to calculate xRV/100 for Pitches Taken
df_model_2023_group_no_swing = df_model_2023[df_model_2023.is_swing!=1].groupby(['batter_id','batter_name','level']).agg(
pitches = ('start_speed','count'),
y_pred = ('y_pred','mean')
)
# Select Pitches with 500 total pitches
df_model_2023_group_no_swing = df_model_2023_group_no_swing[df_model_2023_group_no_swing.index.get_level_values(1).isin(df_model_2023_group.index.get_level_values(1))]
## Calculate 20-80 Scale
df_model_2023_group_no_swing['iz_awareness'] = zscore(df_model_2023_group_no_swing['y_pred'])
df_model_2023_group_no_swing['iz_awareness'] = (((50+df_model_2023_group_no_swing['iz_awareness']*10)))
## Create a Dataset for xRV/100 Pitches Swung At
df_model_2023_group_swing = df_model_2023[df_model_2023.is_swing==1].groupby(['batter_id','batter_name','level']).agg(
pitches = ('start_speed','count'),
y_pred = ('y_pred','mean')
)
# Select Pitches with 500 total pitches
df_model_2023_group_swing = df_model_2023_group_swing[df_model_2023_group_swing.index.get_level_values(1).isin(df_model_2023_group.index.get_level_values(1))]
## Calculate 20-80 Scale
df_model_2023_group_swing['oz_awareness'] = zscore(df_model_2023_group_swing['y_pred'])
df_model_2023_group_swing['oz_awareness'] = (((50+df_model_2023_group_swing['oz_awareness']*10)))
## Create df for plotting
# Merge Datasets
df_model_2023_group_swing_plus_no = df_model_2023_group_swing.merge(df_model_2023_group_no_swing,left_index=True,right_index=True,suffixes=['_swing','_no_swing'])
df_model_2023_group_swing_plus_no['pitches'] = df_model_2023_group_swing_plus_no.pitches_swing + df_model_2023_group_swing_plus_no.pitches_no_swing
# Calculate xRV/100 Pitches
df_model_2023_group_swing_plus_no['y_pred'] = (df_model_2023_group_swing_plus_no.y_pred_swing*df_model_2023_group_swing_plus_no.pitches_swing + \
df_model_2023_group_swing_plus_no.y_pred_no_swing*df_model_2023_group_swing_plus_no.pitches_no_swing) / \
df_model_2023_group_swing_plus_no.pitches
df_model_2023_group_swing_plus_no = df_model_2023_group_swing_plus_no.merge(right=df_model_2023_group,
left_index=True,
right_index=True,
suffixes=['','_y'])
df_model_2023_group_swing_plus_no = df_model_2023_group_swing_plus_no.reset_index()
team_dict = df_2023.groupby(['batter_name'])[['batter_id','batter_team']].tail().set_index('batter_id')['batter_team'].to_dict()
df_model_2023_group_swing_plus_no['team'] = df_model_2023_group_swing_plus_no['batter_id'].map(team_dict)
df_model_2023_group_swing_plus_no = df_model_2023_group_swing_plus_no.set_index(['batter_id','batter_name','level','team'])
df_model_2023_group_swing_plus_no = df_model_2023_group_swing_plus_no[df_model_2023_group_swing_plus_no['pitches']>=250]
df_model_2023_group_swing_plus_no_copy = df_model_2023_group_swing_plus_no.copy()
import matplotlib
colour_palette = ['#FFB000','#648FFF','#785EF0',
'#DC267F','#FE6100','#3D1EB2','#894D80','#16AA02','#B5592B','#A3C1ED']
cmap_hue = matplotlib.colors.LinearSegmentedColormap.from_list("", [colour_palette[1],'#ffffff',colour_palette[0]])
cmap_hue2 = matplotlib.colors.LinearSegmentedColormap.from_list("",['#ffffff',colour_palette[0]])
from matplotlib.pyplot import text
import inflect
from scipy.stats import percentileofscore
p = inflect.engine()
def server(input,output,session):
@output
@render.plot(alt="hex_plot")
@reactive.event(input.go, ignore_none=False)
def scatter_plot():
if input.batter_id() is "":
fig = plt.figure(figsize=(12, 12))
fig.text(s='Please Select a Batter',x=0.5,y=0.5)
return
print(df_model_2023_group_swing_plus_no_copy)
print(input.level_list())
df_model_2023_group_swing_plus_no = df_model_2023_group_swing_plus_no_copy[df_model_2023_group_swing_plus_no_copy.index.get_level_values(2) == input.level_list()]
print('this one')
print(df_model_2023_group_swing_plus_no)
batter_select_id = int(input.batter_id())
# batter_select_name = 'Edouard Julien'
#max(1,int(input.pitch_min()))
plot_min = max(250,int(input.pitch_min()))
df_model_2023_group_swing_plus_no = df_model_2023_group_swing_plus_no[df_model_2023_group_swing_plus_no.pitches >= plot_min]
## Plot In-Zone vs Out-of-Zone Awareness
sns.set_theme(style="whitegrid", palette="pastel")
# fig, ax = plt.subplots(1,1,figsize=(12,12))
fig = plt.figure(figsize=(12,12))
gs = GridSpec(3, 3, height_ratios=[0.6,10,0.2], width_ratios=[0.25,0.50,0.25])
axheader = fig.add_subplot(gs[0, :])
#ax10 = fig.add_subplot(gs[1, 0])
ax = fig.add_subplot(gs[1, :]) # Subplot at the top-right position
#ax12 = fig.add_subplot(gs[1, 2])
axfooter1 = fig.add_subplot(gs[-1, 0])
axfooter2 = fig.add_subplot(gs[-1, 1])
axfooter3 = fig.add_subplot(gs[-1, 2])
cmap_hue = matplotlib.colors.LinearSegmentedColormap.from_list("", [colour_palette[1],colour_palette[3],colour_palette[0]])
norm = plt.Normalize(df_model_2023_group_swing_plus_no['y_pred'].min()*100, df_model_2023_group_swing_plus_no['y_pred'].max()*100)
sns.scatterplot(
x=df_model_2023_group_swing_plus_no['y_pred_swing']*100,
y=df_model_2023_group_swing_plus_no['y_pred_no_swing']*100,
hue=df_model_2023_group_swing_plus_no['y_pred']*100,
size=df_model_2023_group_swing_plus_no['pitches_swing']/df_model_2023_group_swing_plus_no['pitches'],
palette=cmap_hue,ax=ax)
sm = plt.cm.ScalarMappable(cmap=cmap_hue, norm=norm)
cbar = plt.colorbar(sm, cax=axfooter2, orientation='horizontal',shrink=1)
cbar.set_label('Decision Value xRV/100 Pitches',fontsize=12)
ax.hlines(xmin=(math.floor((df_model_2023_group_swing_plus_no['y_pred_swing'].min()*100*100-0.01)/5))*5/100,
xmax= (math.ceil((df_model_2023_group_swing_plus_no['y_pred_swing'].max()**100100+0.01)/5))*5/100,
y=df_model_2023_group_swing_plus_no['y_pred_no_swing'].mean()*100,color='gray',linewidth=3,linestyle='dotted',alpha=0.4)
ax.vlines(ymin=(math.floor((df_model_2023_group_swing_plus_no['y_pred_no_swing'].min()*100*100-0.01)/5))*5/100,
ymax= (math.ceil((df_model_2023_group_swing_plus_no['y_pred_no_swing'].max()*100*100+0.01)/5))*5/100,
x=df_model_2023_group_swing_plus_no['y_pred_swing'].mean()*100,color='gray',linewidth=3,linestyle='dotted',alpha=0.4)
x_lim_min = (math.floor((df_model_2023_group_swing_plus_no['y_pred_swing'].min()*100*100)/5))*5/100
x_lim_max = (math.ceil((df_model_2023_group_swing_plus_no['y_pred_swing'].max()*100*100)/5))*5/100
y_lim_min = (math.floor((df_model_2023_group_swing_plus_no['y_pred_no_swing'].min()*100*100)/5))*5/100
y_lim_max = (math.ceil((df_model_2023_group_swing_plus_no['y_pred_no_swing'].max()*100*100)/5))*5/100
ax.set_xlim(x_lim_min,x_lim_max)
ax.set_ylim(y_lim_min,y_lim_max)
ax.tick_params(axis='both', which='major', labelsize=12)
ax.set_xlabel('Out-of-Zone Awareness Value xRV/100 Swings',fontsize=16)
ax.set_ylabel('In-Zone Awareness Value xRV/100 Takes',fontsize=16)
ax.get_legend().remove()
ts=[]
# thresh = 0.5
# thresh_2 = -0.9
# for i in range(len(df_model_2023_group_swing_plus_no)):
# if (df_model_2023_group_swing_plus_no['y_pred'].values[i]*100) >= thresh or \
# (df_model_2023_group_swing_plus_no['y_pred'].values[i]*100) <= thresh_2 or \
# (str(df_model_2023_group_swing_plus_no.index.get_level_values(0).values[i]) in (input.name_list())) :
# ts.append(ax.text(x=df_model_2023_group_swing_plus_no['y_pred_swing'].values[i]*100,
# y=df_model_2023_group_swing_plus_no['y_pred_no_swing'].values[i]*100,
# s=df_model_2023_group_swing_plus_no.index.get_level_values(1).values[i],
# fontsize=8))
thresh = 0.5
thresh_2 = -0.9
for i in range(len(df_model_2023_group_swing_plus_no)):
if (df_model_2023_group_swing_plus_no['y_pred_swing'].values[i]) >= df_model_2023_group_swing_plus_no['y_pred_swing'].quantile(0.98) or \
(df_model_2023_group_swing_plus_no['y_pred_swing'].values[i]) <= df_model_2023_group_swing_plus_no['y_pred_swing'].quantile(0.02) or \
(df_model_2023_group_swing_plus_no['y_pred_no_swing'].values[i]) >= df_model_2023_group_swing_plus_no['y_pred_no_swing'].quantile(0.98) or \
(df_model_2023_group_swing_plus_no['y_pred_no_swing'].values[i]) <= df_model_2023_group_swing_plus_no['y_pred_no_swing'].quantile(0.02) or \
(df_model_2023_group_swing_plus_no['y_pred'].values[i]) >= df_model_2023_group_swing_plus_no['y_pred'].quantile(0.98) or \
(df_model_2023_group_swing_plus_no['y_pred'].values[i]) <= df_model_2023_group_swing_plus_no['y_pred'].quantile(0.02) or \
(str(df_model_2023_group_swing_plus_no.index.get_level_values(0).values[i]) in (input.name_list())) :
ts.append(ax.text(x=df_model_2023_group_swing_plus_no['y_pred_swing'].values[i]*100,
y=df_model_2023_group_swing_plus_no['y_pred_no_swing'].values[i]*100,
s=df_model_2023_group_swing_plus_no.index.get_level_values(1).values[i],
fontsize=8))
ax.text(x=x_lim_min+abs(x_lim_min)*0.02,y=y_lim_max-abs(y_lim_max-y_lim_min)*0.02,s=f'Min. {plot_min} Pitches',fontsize='10',fontstyle='oblique',va='top',
bbox=dict(facecolor='white', edgecolor='black'))
# ax.text(x=x_lim_min+abs(x_lim_min)*0.02,y=y_lim_max-abs(y_lim_max-y_lim_min)*0.06,s=f'Labels for Batters with\nDescion Value xRV/100 > {thresh:.2f}\nDescion Value xRV/100 < {thresh_2:.2f}',fontsize='10',fontstyle='oblique',va='top',
# bbox=dict(facecolor='white', edgecolor='black'))
ax.text(x=x_lim_min+abs(x_lim_min)*0.02,y=y_lim_max-abs(y_lim_max-y_lim_min)*0.06,s=f'Point Size Represents Swing%',fontsize='10',fontstyle='oblique',va='top',
bbox=dict(facecolor='white', edgecolor='black'))
adjust_text(ts,
arrowprops=dict(arrowstyle="-", color=colour_palette[4], lw=1),ax=ax)
axfooter1.axis('off')
axfooter3.axis('off')
axheader.axis('off')
axheader.text(s=f'{input.level_list()} In-Zone vs Out-of-Zone Awareness Value',fontsize=24,x=0.5,y=0,va='top',ha='center')
axfooter1.text(0.05, -0.5,"By: Thomas Nestico\n @TJStats",ha='left', va='bottom',fontsize=12)
axfooter3.text(0.95, -0.5, "Data: MLB",ha='right', va='bottom',fontsize=12)
fig.subplots_adjust(left=0.01, right=0.99, top=0.975, bottom=0.025)
@output
@render.plot(alt="hex_plot")
@reactive.event(input.go, ignore_none=False)
def dv_plot():
if input.batter_id() is "":
fig = plt.figure(figsize=(12, 12))
fig.text(s='Please Select a Batter',x=0.5,y=0.5)
return
player_select = int(input.batter_id())
player_select_full = batter_dict[player_select]
df_will = df_model_2023[df_model_2023.batter_id == player_select].sort_values(by=['game_date','start_time'])
df_will = df_will[df_will['level']==input.level_list()]
# df_will['y_pred'] = df_will['y_pred'] - df_will['y_pred'].mean()
win = max(1,int(input.rolling_window()))
sns.set_theme(style="whitegrid", palette="pastel")
#fig, ax = plt.subplots(1, 1, figsize=(10, 10),dpi=300)
from matplotlib.gridspec import GridSpec
# fig,ax = plt.subplots(figsize=(12, 12),dpi=150)
fig = plt.figure(figsize=(12,12))
gs = GridSpec(3, 3, height_ratios=[0.3,10,0.2], width_ratios=[0.01,2,0.01])
axheader = fig.add_subplot(gs[0, :])
ax10 = fig.add_subplot(gs[1, 0])
ax = fig.add_subplot(gs[1, 1]) # Subplot at the top-right position
ax12 = fig.add_subplot(gs[1, 2])
axfooter1 = fig.add_subplot(gs[-1, :])
axheader.axis('off')
ax10.axis('off')
ax12.axis('off')
axfooter1.axis('off')
sns.lineplot( x= range(win,len(df_will.y_pred.rolling(window=win).mean())+1),
y= df_will.y_pred.rolling(window=win).mean().dropna()*100,
color=colour_palette[0],linewidth=2,ax=ax,zorder=100)
ax.hlines(y=df_will.y_pred.mean()*100,xmin=win,xmax=len(df_will),color=colour_palette[0],linestyle='--',
label=f'{player_select_full} Average: {df_will.y_pred.mean()*100:.2} xRV/100 ({p.ordinal(int(np.around(percentileofscore(df_model_2023_group_swing_plus_no.y_pred,df_will.y_pred.mean(), kind="strict"))))} Percentile)')
# ax.hlines(y=df_model_2023.y_pred.std()*100,xmin=win,xmax=len(df_will))
# sns.scatterplot( x= [976],
# y= df_will.y_pred.rolling(window=win).mean().min()*100,
# color=colour_palette[0],linewidth=2,ax=ax,zorder=100,s=100,edgecolor=colour_palette[7])
ax.hlines(y=df_model_2023_group_swing_plus_no.y_pred.mean()*100,xmin=win,xmax=len(df_will),color=colour_palette[1],linestyle='-.',alpha=1,
label = f'{input.level_list()} Average: {df_model_2023_group_swing_plus_no.y_pred.mean()*100:.2f} xRV/100')
ax.legend()
hard_hit_dates = [df_model_2023_group_swing_plus_no.y_pred.quantile(0.9)*100,
df_model_2023_group_swing_plus_no.y_pred.quantile(0.75)*100,
df_model_2023_group_swing_plus_no.y_pred.quantile(0.25)*100,
df_model_2023_group_swing_plus_no.y_pred.quantile(0.1)*100]
ax.hlines(y=df_model_2023_group_swing_plus_no.y_pred.quantile(0.9)*100,xmin=win,xmax=len(df_will),color=colour_palette[2],linestyle='dotted',alpha=0.5,zorder=1)
ax.hlines(y=df_model_2023_group_swing_plus_no.y_pred.quantile(0.75)*100,xmin=win,xmax=len(df_will),color=colour_palette[3],linestyle='dotted',alpha=0.5,zorder=1)
ax.hlines(y=df_model_2023_group_swing_plus_no.y_pred.quantile(0.25)*100,xmin=win,xmax=len(df_will),color=colour_palette[4],linestyle='dotted',alpha=0.5,zorder=1)
ax.hlines(y=df_model_2023_group_swing_plus_no.y_pred.quantile(0.1)*100,xmin=win,xmax=len(df_will),color=colour_palette[5],linestyle='dotted',alpha=0.5,zorder=1)
hard_hit_text = ['90th %','75th %','25th %','10th %']
for i, x in enumerate(hard_hit_dates):
ax.text(min(win+win/1000,win+win+5), x ,hard_hit_text[i], rotation=0,va='center', ha='left',
bbox=dict(facecolor='white',alpha=0.7, edgecolor=colour_palette[2+i], pad=2),zorder=11)
# # Annotate with an arrow
# ax.annotate('June 6, 2023\nSeason Worst Decision Value', xy=(976, df_will.y_pred.rolling(window=win).mean().min()*100-0.03),
# xytext=(976 - 150, df_will.y_pred.rolling(window=win).mean().min()*100 - 0.2),
# arrowprops=dict(facecolor=colour_palette[7], shrink=0.01),zorder=150,fontsize=10,
# bbox=dict(facecolor='white', edgecolor='black'),va='top')
ax.set_xlim(win,len(df_will))
#ax.set_ylim(-1.5,1.5)
ax.set_yticks([-1.5,-1,-0.5,0,0.5,1,1.5])
ax.set_xlabel('Pitch')
ax.set_ylabel('Expected Run Value Added per 100 Pitches (xRV/100)')
axheader.text(s=f'{player_select_full} - {input.level_list()} - {win} Pitch Rolling Swing Decision Expected Run Value Added',x=0.5,y=-0.5,ha='center',va='bottom',fontsize=14)
axfooter1.text(.05, 0.2, "By: Thomas Nestico",ha='left', va='bottom',fontsize=12)
axfooter1.text(0.95, 0.2, "Data: MLB",ha='right', va='bottom',fontsize=12)
fig.subplots_adjust(left=0.01, right=0.99, top=0.98, bottom=0.02)
#fig.set_facecolor(colour_palette[5])
@output
@render.plot(alt="hex_plot")
@reactive.event(input.go, ignore_none=False)
def iz_plot():
if input.batter_id() is "":
fig = plt.figure(figsize=(12, 12))
fig.text(s='Please Select a Batter',x=0.5,y=0.5)
return
player_select = int(input.batter_id())
player_select_full = batter_dict[player_select]
df_will = df_model_2023[df_model_2023.batter_id == player_select].sort_values(by=['game_date','start_time'])
df_will = df_will[df_will['level']==input.level_list()]
df_will = df_will[df_will['is_swing'] != 1]
win = max(1,int(input.rolling_window()))
sns.set_theme(style="whitegrid", palette="pastel")
#fig, ax = plt.subplots(1, 1, figsize=(10, 10),dpi=300)
from matplotlib.gridspec import GridSpec
# fig,ax = plt.subplots(figsize=(12, 12),dpi=150)
fig = plt.figure(figsize=(12,12))
gs = GridSpec(3, 3, height_ratios=[0.3,10,0.2], width_ratios=[0.01,2,0.01])
axheader = fig.add_subplot(gs[0, :])
ax10 = fig.add_subplot(gs[1, 0])
ax = fig.add_subplot(gs[1, 1]) # Subplot at the top-right position
ax12 = fig.add_subplot(gs[1, 2])
axfooter1 = fig.add_subplot(gs[-1, :])
axheader.axis('off')
ax10.axis('off')
ax12.axis('off')
axfooter1.axis('off')
sns.lineplot( x= range(win,len(df_will.y_pred.rolling(window=win).mean())+1),
y= df_will.y_pred.rolling(window=win).mean().dropna()*100,
color=colour_palette[0],linewidth=2,ax=ax,zorder=100)
ax.hlines(y=df_will.y_pred.mean()*100,xmin=win,xmax=len(df_will),color=colour_palette[0],linestyle='--',
label=f'{player_select_full} Average: {df_will.y_pred.mean()*100:.2} xRV/100 ({p.ordinal(int(np.around(percentileofscore(df_model_2023_group_swing_plus_no.y_pred_no_swing,df_will.y_pred.mean(), kind="strict"))))} Percentile)')
# ax.hlines(y=df_model_2023.y_pred_no_swing.std()*100,xmin=win,xmax=len(df_will))
# sns.scatterplot( x= [976],
# y= df_will.y_pred.rolling(window=win).mean().min()*100,
# color=colour_palette[0],linewidth=2,ax=ax,zorder=100,s=100,edgecolor=colour_palette[7])
ax.hlines(y=df_model_2023_group_swing_plus_no.y_pred_no_swing.mean()*100,xmin=win,xmax=len(df_will),color=colour_palette[1],linestyle='-.',alpha=1,
label = f'{input.level_list()} Average: {df_model_2023_group_swing_plus_no.y_pred_no_swing.mean()*100:.2} xRV/100')
ax.legend()
hard_hit_dates = [df_model_2023_group_swing_plus_no.y_pred_no_swing.quantile(0.9)*100,
df_model_2023_group_swing_plus_no.y_pred_no_swing.quantile(0.75)*100,
df_model_2023_group_swing_plus_no.y_pred_no_swing.quantile(0.25)*100,
df_model_2023_group_swing_plus_no.y_pred_no_swing.quantile(0.1)*100]
ax.hlines(y=df_model_2023_group_swing_plus_no.y_pred_no_swing.quantile(0.9)*100,xmin=win,xmax=len(df_will),color=colour_palette[2],linestyle='dotted',alpha=0.5,zorder=1)
ax.hlines(y=df_model_2023_group_swing_plus_no.y_pred_no_swing.quantile(0.75)*100,xmin=win,xmax=len(df_will),color=colour_palette[3],linestyle='dotted',alpha=0.5,zorder=1)
ax.hlines(y=df_model_2023_group_swing_plus_no.y_pred_no_swing.quantile(0.25)*100,xmin=win,xmax=len(df_will),color=colour_palette[4],linestyle='dotted',alpha=0.5,zorder=1)
ax.hlines(y=df_model_2023_group_swing_plus_no.y_pred_no_swing.quantile(0.1)*100,xmin=win,xmax=len(df_will),color=colour_palette[5],linestyle='dotted',alpha=0.5,zorder=1)
hard_hit_text = ['90th %','75th %','25th %','10th %']
for i, x in enumerate(hard_hit_dates):
ax.text(min(win+win/1000,win+win+5), x ,hard_hit_text[i], rotation=0,va='center', ha='left',
bbox=dict(facecolor='white',alpha=0.7, edgecolor=colour_palette[2+i], pad=2),zorder=11)
# # Annotate with an arrow
# ax.annotate('June 6, 2023\nSeason Worst Decision Value', xy=(976, df_will.y_pred.rolling(window=win).mean().min()*100-0.03),
# xytext=(976 - 150, df_will.y_pred.rolling(window=win).mean().min()*100 - 0.2),
# arrowprops=dict(facecolor=colour_palette[7], shrink=0.01),zorder=150,fontsize=10,
# bbox=dict(facecolor='white', edgecolor='black'),va='top')
ax.set_xlim(win,len(df_will))
ax.set_yticks([1.0,1.5,2.0,2.5,3.0])
# ax.set_ylim(1,3)
ax.set_xlabel('Takes')
ax.set_ylabel('Expected Run Value Added per 100 Pitches (xRV/100)')
axheader.text(s=f'{player_select_full} - {input.level_list()} - {win} Pitch Rolling In-Zone Awareness Expected Run Value Added',x=0.5,y=-0.5,ha='center',va='bottom',fontsize=14)
axfooter1.text(.05, 0.2, "By: Thomas Nestico",ha='left', va='bottom',fontsize=12)
axfooter1.text(0.95, 0.2, "Data: MLB",ha='right', va='bottom',fontsize=12)
fig.subplots_adjust(left=0.01, right=0.99, top=0.98, bottom=0.02)
@output
@render.plot(alt="hex_plot")
@reactive.event(input.go, ignore_none=False)
def oz_plot():
if input.batter_id() is "":
fig = plt.figure(figsize=(12, 12))
fig.text(s='Please Select a Batter',x=0.5,y=0.5)
return
player_select = int(input.batter_id())
player_select_full = batter_dict[player_select]
df_will = df_model_2023[df_model_2023.batter_id == player_select].sort_values(by=['game_date','start_time'])
df_will = df_will[df_will['level']==input.level_list()]
df_will = df_will[df_will['is_swing'] == 1]
win = max(1,int(input.rolling_window()))
sns.set_theme(style="whitegrid", palette="pastel")
#fig, ax = plt.subplots(1, 1, figsize=(10, 10),dpi=300)
from matplotlib.gridspec import GridSpec
# fig,ax = plt.subplots(figsize=(12, 12),dpi=150)
fig = plt.figure(figsize=(12,12))
gs = GridSpec(3, 3, height_ratios=[0.3,10,0.2], width_ratios=[0.01,2,0.01])
axheader = fig.add_subplot(gs[0, :])
ax10 = fig.add_subplot(gs[1, 0])
ax = fig.add_subplot(gs[1, 1]) # Subplot at the top-right position
ax12 = fig.add_subplot(gs[1, 2])
axfooter1 = fig.add_subplot(gs[-1, :])
axheader.axis('off')
ax10.axis('off')
ax12.axis('off')
axfooter1.axis('off')
sns.lineplot( x= range(win,len(df_will.y_pred.rolling(window=win).mean())+1),
y= df_will.y_pred.rolling(window=win).mean().dropna()*100,
color=colour_palette[0],linewidth=2,ax=ax,zorder=100)
ax.hlines(y=df_will.y_pred.mean()*100,xmin=win,xmax=len(df_will),color=colour_palette[0],linestyle='--',
label=f'{player_select_full} Average: {df_will.y_pred.mean()*100:.2} xRV/100 ({p.ordinal(int(np.around(percentileofscore(df_model_2023_group_swing_plus_no.y_pred_swing,df_will.y_pred.mean(), kind="strict"))))} Percentile)')
# ax.hlines(y=df_model_2023.y_pred_swing.std()*100,xmin=win,xmax=len(df_will))
# sns.scatterplot( x= [976],
# y= df_will.y_pred.rolling(window=win).mean().min()*100,
# color=colour_palette[0],linewidth=2,ax=ax,zorder=100,s=100,edgecolor=colour_palette[7])
ax.hlines(y=df_model_2023_group_swing_plus_no.y_pred_swing.mean()*100,xmin=win,xmax=len(df_will),color=colour_palette[1],linestyle='-.',alpha=1,
label = f'{input.level_list()} Average: {df_model_2023_group_swing_plus_no.y_pred_swing.mean()*100:.2} xRV/100')
ax.legend()
hard_hit_dates = [df_model_2023_group_swing_plus_no.y_pred_swing.quantile(0.9)*100,
df_model_2023_group_swing_plus_no.y_pred_swing.quantile(0.75)*100,
df_model_2023_group_swing_plus_no.y_pred_swing.quantile(0.25)*100,
df_model_2023_group_swing_plus_no.y_pred_swing.quantile(0.1)*100]
ax.hlines(y=df_model_2023_group_swing_plus_no.y_pred_swing.quantile(0.9)*100,xmin=win,xmax=len(df_will),color=colour_palette[2],linestyle='dotted',alpha=0.5,zorder=1)
ax.hlines(y=df_model_2023_group_swing_plus_no.y_pred_swing.quantile(0.75)*100,xmin=win,xmax=len(df_will),color=colour_palette[3],linestyle='dotted',alpha=0.5,zorder=1)
ax.hlines(y=df_model_2023_group_swing_plus_no.y_pred_swing.quantile(0.25)*100,xmin=win,xmax=len(df_will),color=colour_palette[4],linestyle='dotted',alpha=0.5,zorder=1)
ax.hlines(y=df_model_2023_group_swing_plus_no.y_pred_swing.quantile(0.1)*100,xmin=win,xmax=len(df_will),color=colour_palette[5],linestyle='dotted',alpha=0.5,zorder=1)
hard_hit_text = ['90th %','75th %','25th %','10th %']
for i, x in enumerate(hard_hit_dates):
ax.text(min(win+win/1000,win+win+5), x ,hard_hit_text[i], rotation=0,va='center', ha='left',
bbox=dict(facecolor='white',alpha=0.7, edgecolor=colour_palette[2+i], pad=2),zorder=11)
# # Annotate with an arrow
# ax.annotate('June 6, 2023\nSeason Worst Decision Value', xy=(976, df_will.y_pred.rolling(window=win).mean().min()*100-0.03),
# xytext=(976 - 150, df_will.y_pred.rolling(window=win).mean().min()*100 - 0.2),
# arrowprops=dict(facecolor=colour_palette[7], shrink=0.01),zorder=150,fontsize=10,
# bbox=dict(facecolor='white', edgecolor='black'),va='top')
ax.set_xlim(win,len(df_will))
#ax.set_ylim(-3.25,-1.25)
ax.set_yticks([-3.25,-2.75,-2.25,-1.75,-1.25])
ax.set_xlabel('Swing')
ax.set_ylabel('Expected Run Value Added per 100 Pitches (xRV/100)')
axheader.text(s=f'{player_select_full} - {input.level_list()} - {win} Pitch Rolling Out of Zone Awareness Expected Run Value Added',x=0.5,y=-0.5,ha='center',va='bottom',fontsize=14)
axfooter1.text(.05, 0.2, "By: Thomas Nestico",ha='left', va='bottom',fontsize=12)
axfooter1.text(0.95, 0.2, "Data: MLB",ha='right', va='bottom',fontsize=12)
fig.subplots_adjust(left=0.01, right=0.99, top=0.98, bottom=0.02)
decision_value = App(ui.page_fluid(
ui.tags.base(href=base_url),
ui.tags.div(
{"style": "width:90%;margin: 0 auto;max-width: 1600px;"},
ui.tags.style(
"""
h4 {
margin-top: 1em;font-size:35px;
}
h2{
font-size:25px;
}
"""
),
shinyswatch.theme.simplex(),
ui.tags.h4("TJStats"),
ui.tags.i("Baseball Analytics and Visualizations"),
ui.markdown("""<a href='https://www.patreon.com/tj_stats'>Support me on Patreon for Access to 2024 Apps</a><sup>1</sup>"""),
ui.navset_tab(
ui.nav_control(
ui.a(
"Home",
href="home/"
),
),
ui.nav_menu(
"Batter Charts",
ui.nav_control(
ui.a(
"Batting Rolling",
href="rolling_batter/"
),
ui.a(
"Spray & Damage",
href="spray/"
),
ui.a(
"Decision Value",
href="decision_value/"
),
# ui.a(
# "Damage Model",
# href="damage_model/"
# ),
ui.a(
"Batter Scatter",
href="batter_scatter/"
),
# ui.a(
# "EV vs LA Plot",
# href="ev_angle/"
# ),
ui.a(
"Statcast Compare",
href="statcast_compare/"
)
),
),
ui.nav_menu(
"Pitcher Charts",
ui.nav_control(
ui.a(
"Pitcher Rolling",
href="rolling_pitcher/"
),
ui.a(
"Pitcher Summary",
href="pitching_summary_graphic_new/"
),
ui.a(
"Pitcher Scatter",
href="pitcher_scatter/"
)
),
)),ui.row(
ui.layout_sidebar(
ui.panel_sidebar(
ui.input_numeric("pitch_min",
"Select Pitch Minimum [min. 250] (Scatter)",
value=500,
min=250),
ui.input_select("name_list",
"Select Players to List (Scatter)",
batter_dict,
selectize=True,
multiple=True),
ui.input_select("batter_id",
"Select Batter (Rolling)",
batter_dict,
width=1,
size=1,
selectize=True),
ui.input_numeric("rolling_window",
"Select Rolling Window (Rolling)",
value=100,
min=1),
ui.input_select("level_list",
"Select Level",
['MLB','AAA'],
selected='MLB'),
ui.input_action_button("go", "Generate",class_="btn-primary"),
),
ui.panel_main(
ui.navset_tab(
ui.nav("Scatter Plot",
ui.output_plot('scatter_plot',
width='1000px',
height='1000px')),
ui.nav("Rolling DV",
ui.output_plot('dv_plot',
width='1000px',
height='1000px')),
ui.nav("Rolling In-Zone",
ui.output_plot('iz_plot',
width='1000px',
height='1000px')),
ui.nav("Rolling Out-of-Zone",
ui.output_plot('oz_plot',
width='1000px',
height='1000px'))
))
)),)),server)