2024_mlb_pitch_heat_maps / spray_new.py
nesticot's picture
Upload 13 files
30629a5 verified
raw
history blame
45.3 kB
##### games.,py #####
# Import modules
from shiny import *
import shinyswatch
#import plotly.express as px
from shinywidgets import output_widget, render_widget
import pandas as pd
from configure import base_url
import math
import datetime
import datasets
from datasets import load_dataset
import numpy as np
import matplotlib
from matplotlib.ticker import MaxNLocator
from matplotlib.gridspec import GridSpec
import matplotlib.pyplot as plt
from scipy.stats import gaussian_kde
import seaborn as sns
### Import Datasets
dataset = load_dataset('nesticot/mlb_data', data_files=['mlb_pitch_data_2023.csv',
'mlb_pitch_data_2022.csv'])
dataset_train = dataset['train']
df_2023 = dataset_train.to_pandas().set_index(list(dataset_train.features.keys())[0]).reset_index(drop=True)
# Paths to data
### Normalize Hit Locations
df_2023['hit_x'] = df_2023['hit_x'] - 126#df_2023['hit_x'].median()
df_2023['hit_y'] = -df_2023['hit_y']+204.5#df_2023['hit_y'].quantile(0.9999)
df_2023['hit_x_og'] = df_2023['hit_x']
df_2023.loc[df_2023['batter_hand'] == 'R','hit_x'] = -1*df_2023.loc[df_2023['batter_hand'] == 'R','hit_x']
### Calculate Horizontal Launch Angles
df_2023['h_la'] = np.arctan(df_2023['hit_x'] / df_2023['hit_y'])*180/np.pi
conditions_ss = [
(df_2023['h_la']<-16+5/6),
(df_2023['h_la']<16+5/6)&(df_2023['h_la']>=-16+5/6),
(df_2023['h_la']>=16+5/6)
]
choices_ss = ['Oppo','Straight','Pull']
df_2023['traj'] = np.select(conditions_ss, choices_ss, default=np.nan)
df_2023['bip'] = [1 if x > 0 else np.nan for x in df_2023['launch_speed']]
conditions_woba = [
(df_2023['event_type']=='walk'),
(df_2023['event_type']=='hit_by_pitch'),
(df_2023['event_type']=='single'),
(df_2023['event_type']=='double'),
(df_2023['event_type']=='triple'),
(df_2023['event_type']=='home_run'),
]
choices_woba = [1,
1,
1,
2,
3,
4]
choices_woba_train = [1,
1,
1,
2,
3,
4]
df_2023['woba_train'] = np.select(conditions_woba, choices_woba_train, default=0)
conditions = [
(df_2023['launch_speed'].isna()),
(df_2023['launch_speed']*1.5 - df_2023['launch_angle'] >= 117 ) & (df_2023['launch_speed'] + df_2023['launch_angle'] >= 124) & (df_2023['launch_speed'] > 98) & (df_2023['launch_angle'] >= 8) & (df_2023['launch_angle'] <= 50)
]
choices = [False,True]
df_2023['barrel'] = np.select(conditions, choices, default=np.nan)
test_df = df_2023.sort_values(by='batter_name').drop_duplicates(subset='batter_id').reset_index(drop=True)[['batter_id','batter_name']]#['pitcher'].to_dict()
test_df = test_df.set_index('batter_id')
#test_df = test_df[test_df.pitcher == 'Chris Bassitt'].append(test_df[test_df.pitcher != 'Chris Bassitt'])
batter_dict = test_df['batter_name'].to_dict()
colour_palette = ['#FFB000','#648FFF','#785EF0',
'#DC267F','#FE6100','#3D1EB2','#894D80','#16AA02','#B5592B','#A3C1ED']
angle_ev_list_df = pd.read_csv('angle_ev_list_df.csv')
ev_ranges = list(np.arange(97.5,130,0.1))
angle_ranges = list(range(8,51))
df_2023_bip = df_2023[~df_2023['bip'].isnull()].dropna(subset=['h_la','launch_angle'])
df_2023_bip['h_la'] = df_2023_bip['h_la'].round(0)
df_2023_bip['season'] = df_2023_bip['game_date'].str[0:4].astype(int)
#df_2023_bip = df_2023[~df_2023['bip'].isnull()].dropna(subset=['launch_angle','bip'])
df_2023_bip_train = df_2023_bip[df_2023_bip['season'] == 2023]
features = ['launch_angle','launch_speed','h_la']
target = ['woba_train']
df_2023_bip_train = df_2023_bip_train.dropna(subset=features)
import joblib
# # Dump the model to a file named 'model.joblib'
model = joblib.load('xtb_model.joblib')
df_2023_bip_train['y_pred'] = [sum(x) for x in model.predict_proba(df_2023_bip_train[features]) * ([0,1,2,3,4])]
# df_2023_bip_train['y_pred_noh'] = [sum(x) for x in model_noh.predict_proba(df_2023_bip_train[['launch_angle','launch_speed']]) * ([0,0.887,1.253,1.583,2.027])]
df_2023_output = df_2023_bip_train.groupby(['batter_id','batter_name']).agg(
bip = ('y_pred','count'),
y_pred = ('y_pred','sum'),
xslgcon = ('y_pred','mean'),
launch_speed = ('launch_speed','mean'),
launch_angle_std = ('launch_angle','median'),
h_la_std = ('h_la','mean'))
df_2023_output_copy = df_2023_output.copy()
# df_2023_output = df_2023_output[df_2023_output['bip'] > 100]
# df_2023_output[df_2023_output['bip'] > 100].sort_values(by='h_la_std',ascending=True).head(20)
import pandas as pd
import numpy as np
# Create grid coordinates
x = np.arange(30, 121,1 )
y = np.arange(-30, 61,1 )
z = np.arange(-45, 46,1 )
# Create a meshgrid
X, Y, Z = np.meshgrid(x, y, z, indexing='ij')
# Flatten the meshgrid to get x and y coordinates
x_flat = X.flatten()
y_flat = Y.flatten()
z_flat = Z.flatten()
# Create a DataFrame
df = pd.DataFrame({'launch_speed': x_flat, 'launch_angle': y_flat,'h_la':z_flat})
df['y_pred'] = [sum(x) for x in model.predict_proba(df[features]) * ([0,1,2,3,4])]
import matplotlib
colour_palette = ['#FFB000','#648FFF','#785EF0',
'#DC267F','#FE6100','#3D1EB2','#894D80','#16AA02','#B5592B','#A3C1ED']
cmap_hue = matplotlib.colors.LinearSegmentedColormap.from_list("", [colour_palette[1],'#ffffff',colour_palette[0]])
cmap_hue2 = matplotlib.colors.LinearSegmentedColormap.from_list("",['#ffffff',colour_palette[0]])
from matplotlib.pyplot import text
import inflect
from scipy.stats import percentileofscore
p = inflect.engine()
batter_dict = df_2023_bip.sort_values('batter_name').set_index('batter_id')['batter_name'].to_dict()
# def server(input: Inputs, output: Outputs, session: Session):
#if input.my_tabs() == '2023 vs MLB':
#return
# #if input.my_tabs() == 'Damage Hex':
# ui.insert_ui(
# ui.input_numeric("quant",
# "Select Percentile",
# value=50,
# min=0,max=100),
# selector="#go",
# where="beforeBegin",
# ),
# ui.insert_ui(
# ui.input_numeric("rolling_window",
# "Select Rolling Window",
# value=50,
# min=1),
# selector="#go",
# where="beforeBegin",
# )
#return
# ui.insert_ui(
# ui.input_numeric("quant",
# "Select Percentile",
# value=50,
# min=0,max=100),
# ),
# ui.insert_ui(
# ui.input_numeric("rolling_window",
# "Select Rolling Window",
# value=50,
# min=1),
# where="beforeEnd",
# )
# return
# if input.my_tabs() == 'Damage Roll':
# return ui.panel_sidebar(
# ui.input_select("batter_id",
# "Select Batter2",
# batter_dict,
# width=1,
# size=1,
# selectize=True),
# ui.input_action_button("go", "Generate",class_="btn-primary",
# )),
# if input.my_tabs() == 'EV vs LA':
# return ui.panel_sidebar(
# ui.input_select("batter_id",
# "Select Batter3",
# batter_dict,
# width=1,
# size=1,
# selectize=True),
# ui.input_action_button("go", "Generate",class_="btn-primary",
# )),
def server(input,output,session):
@reactive.Effect
@reactive.event(input.update_ui)
def test():
if input.my_tabs() == 'Damage Hex':
ui.remove_ui(selector="div:has(> #quant)")
ui.remove_ui(selector="div:has(> #rolling_window)")
ui.remove_ui(selector="div:has(> #plot_id)")
ui.insert_ui(
ui.input_numeric("quant",
"Select Percentile",
value=50,
min=0,max=100),
selector="#go",
where="beforeBegin")
print(input.quant())
if input.my_tabs() == 'Damage Roll':
ui.remove_ui(selector="div:has(> #rolling_window)")
ui.remove_ui(selector="div:has(> #quant)")
ui.remove_ui(selector="div:has(> #plot_id)")
ui.insert_ui(
ui.input_numeric("rolling_window",
"Select Rolling Window",
value=50,
min=1),
selector="#go",
where="beforeBegin",
)
# if input.my_tabs() == 'EV vs LA':
# ui.remove_ui(selector="div:has(> #rolling_window)")
# ui.remove_ui(selector="div:has(> #quant)")
# ui.remove_ui(selector="div:has(> #plot_id)")
# ui.insert_ui(
# ui.input_select("plot_id", "Select Plot",{'scatter':'Scatter Plot','dist':'Distribution Plot'}),
# selector="#go",
# where="beforeBegin",
# )
@output
@render.plot(alt="plot")
@reactive.event(input.go, ignore_none=False)
def plot():
batter_id_select = int(input.batter_id())
df_batter_2023 = df_2023_bip.loc[(df_2023_bip['batter_id'] == batter_id_select)&(df_2023_bip['season']==2023)]
df_batter_2022 = df_2023_bip.loc[(df_2023_bip['batter_id'] == batter_id_select)&(df_2023_bip['season']==2022)]
df_non_batter_2023 = df_2023_bip.loc[(df_2023_bip['batter_id'] != batter_id_select)&(df_2023_bip['season']==2023)]
df_non_batter_2022 = df_2023_bip.loc[(df_2023_bip['batter_id'] != batter_id_select)&(df_2023_bip['season']==2022)]
traj_df = df_batter_2023.groupby(['traj'])['launch_speed'].count() / len(df_batter_2023)
trajectory_df = df_batter_2023.groupby(['trajectory'])['launch_speed'].count() / len(df_batter_2023)#.loc['Oppo']
colour_palette = ['#FFB000','#648FFF','#785EF0',
'#DC267F','#FE6100','#3D1EB2','#894D80','#16AA02','#B5592B','#A3C1ED']
fig = plt.figure(figsize=(10, 10))
# Create a 2x2 grid of subplots using GridSpec
gs = GridSpec(3, 3, width_ratios=[0.1,0.8,0.1], height_ratios=[0.1,0.8,0.1])
# ax00 = fig.add_subplot(gs[0, 0])
ax01 = fig.add_subplot(gs[0, :]) # Subplot at the top-right position
# ax02 = fig.add_subplot(gs[0, 2])
# Subplot spanning the entire bottom row
ax10 = fig.add_subplot(gs[1, 0])
ax11 = fig.add_subplot(gs[1, 1]) # Subplot at the top-right position
ax12 = fig.add_subplot(gs[1, 2])
# ax20 = fig.add_subplot(gs[2, 0])
ax21 = fig.add_subplot(gs[2, :]) # Subplot at the top-right position
# ax22 = fig.add_subplot(gs[2, 2])
initial_position = ax12.get_position()
# Change the size of the axis
# new_width = 0.06 # Set your desired width
# new_height = 0.4 # Set your desired height
# new_position = [initial_position.x0-0.01, initial_position.y0+0.065, new_width, new_height]
# ax12.set_position(new_position)
cmap_hue = matplotlib.colors.LinearSegmentedColormap.from_list("", [colour_palette[1],'#ffffff',colour_palette[0]])
# Generate two sets of two-dimensional data
# data1 = np.random.multivariate_normal([0, 0], [[1, 0.5], [0.5, 1]], 1000)
# data2 = np.random.multivariate_normal([3, 3], [[1, -0.5], [-0.5, 1]], 1000)
bat_hand = df_batter_2023.groupby('batter_hand')['launch_speed'].count().sort_values(ascending=False).index[0]
bat_hand_value = 1
if bat_hand == 'R':
bat_hand_value = -1
kde1_df = df_batter_2023[['h_la','launch_angle']]
kde1_df['h_la'] = kde1_df['h_la'] * bat_hand_value
kde2_df = df_non_batter_2023[['h_la','launch_angle']].sample(n=50000, random_state=42)
kde2_df['h_la'] = kde2_df['h_la'] * bat_hand_value
# Calculate 2D KDE for each dataset
kde1 = gaussian_kde(kde1_df.values.T)
kde2 = gaussian_kde(kde2_df.values.T)
# Generate a grid of points for evaluation
x, y = np.meshgrid(np.arange(-45, 46,1 ), np.arange(-30, 61,1 ))
positions = np.vstack([x.ravel(), y.ravel()])
# Evaluate the KDEs on the grid
kde1_values = np.reshape(kde1(positions).T, x.shape)
kde2_values = np.reshape(kde2(positions).T, x.shape)
# Subtract one KDE from the other
result_kde_values = kde1_values - kde2_values
# Normalize the array to the range [0, 1]
# result_kde_values = (result_kde_values - np.min(result_kde_values)) / (np.max(result_kde_values) - np.min(result_kde_values))
result_kde_values = (result_kde_values - np.mean(result_kde_values)) / (np.std(result_kde_values))
result_kde_values = np.clip(result_kde_values, -3, 3)
# # Plot the original KDEs
# plt.contourf(x, y, kde1_values, cmap='Blues', alpha=0.5, levels=20)
# plt.contourf(x, y, kde2_values, cmap='Reds', alpha=0.5, levels=20)
# Plot the subtracted KDE
# Set the number of levels and midrange value
# Set the number of levels and midrange value
num_levels = 14
midrange_value = 0
# Create a filled contour plot with specified levels
levels = np.linspace(-3, 3, num_levels)
batter_plot = ax11.contourf(x, y, result_kde_values, cmap=cmap_hue, levels=levels, vmin=-3, vmax=3)
ax11.hlines(y=10,xmin=45,xmax=-45,color=colour_palette[3],linewidth=1)
ax11.hlines(y=25,xmin=45,xmax=-45,color=colour_palette[3],linewidth=1)
ax11.hlines(y=50,xmin=45,xmax=-45,color=colour_palette[3],linewidth=1)
ax11.vlines(x=-15,ymin=-30,ymax=60,color=colour_palette[3],linewidth=1)
ax11.vlines(x=15,ymin=-30,ymax=60,color=colour_palette[3],linewidth=1)
#ax11.axis('square')
#ax11.axis('off')
#ax.hlines(y=10,xmin=-45,xmax=-45)
# Add labels and legend
#plt.xlabel('X-axis')
#plt.ylabel('Y-axis')
#ax.plot('equal')
#plt.gca().set_aspect('equal')
#Choose a mappable (can be any plot or image)
ax12.set_ylim(0,1)
cbar = plt.colorbar(batter_plot, cax=ax12, orientation='vertical',shrink=1)
cbar.set_ticks([])
# Set the colorbar to have 13 levels
cbar_locator = MaxNLocator(nbins=13)
cbar.locator = cbar_locator
cbar.update_ticks()
#cbar.set_clim(vmin=-3, vmax=)
# Set ticks and tick labels
# cbar.set_ticks(np.linspace(-3, 3, 13))
# cbar.set_ticklabels(np.linspace(0, 3, 13))
cbar.set_ticks([])
ax10.text(s=f"Pop Up\n({trajectory_df.loc['popup']:.1%})",
x=1,
y=0.95,va='center',ha='right',fontsize=16)
# Choose a mappable (can be any plot or image)
ax10.text(s=f"Fly Ball\n({trajectory_df.loc['fly_ball']:.1%})",
x=1,
y=0.75,va='center',ha='right',fontsize=16)
ax10.text(s=f"Line\nDrive\n({trajectory_df.loc['line_drive']:.1%})",
x=1,
y=0.53,va='center',ha='right',fontsize=16)
ax10.text(s=f"Ground\nBall\n({trajectory_df.loc['ground_ball']:.1%})",
x=1,
y=0.23,va='center',ha='right',fontsize=16)
#ax12.axis(True)
# Set equal aspect ratio for the contour plot
if bat_hand == 'R':
ax21.text(s=f"Pull\n({traj_df.loc['Pull']:.1%})",
x=0.2+1/16*0.8,
y=1,va='top',ha='center',fontsize=16)
ax21.text(s=f"Straight\n({traj_df.loc['Straight']:.1%})",
x=0.5,
y=1,va='top',ha='center',fontsize=16)
ax21.text(s=f"Oppo\n({traj_df.loc['Oppo']:.1%})",
x=0.8-1/16*0.8,
y=1,va='top',ha='center',fontsize=16)
else:
ax21.text(s=f"Pull\n({traj_df.loc['Pull']:.1%})",
x=0.8-1/16*0.8,
y=1,va='top',ha='center',fontsize=16)
ax21.text(s=f"Straight\n({traj_df.loc['Straight']:.1%})",
x=0.5,
y=1,va='top',ha='center',fontsize=16)
ax21.text(s=f"Oppo\n({traj_df.loc['Oppo']:.1%})",
x=0.2+1/16*0.8,
y=1,va='top',ha='center',fontsize=16)
# Define the initial position of the axis
# Customize colorbar properties
# cbar = fig.colorbar(orientation='vertical', pad=0.1,ax=ax12)
#cbar.set_label('Difference', rotation=270, labelpad=15)
# Show the plot
# ax21.text(0.0, 0., "By: Thomas Nestico\n @TJStats",ha='left', va='bottom',fontsize=12)
# ax21.text(1, 0., "Data: MLB",ha='right', va='bottom',fontsize=12)
# ax21.text(0.5, 0., "Inspired by @blandalytics",ha='center', va='bottom',fontsize=12)
# ax00.axis('off')
ax01.axis('off')
# ax02.axis('off')
ax10.axis('off')
#ax11.axis('off')
#ax12.axis('off')
# ax20.axis('off')
ax21.axis('off')
# ax22.axis('off')
ax21.text(0.0, 0., "By: Thomas Nestico\n @TJStats",ha='left', va='bottom',fontsize=12)
ax21.text(0.98, 0., "Data: MLB",ha='right', va='bottom',fontsize=12)
ax21.text(0.5, 0., "Inspired by @blandalytics",ha='center', va='bottom',fontsize=12)
ax11.set_xticks([])
ax11.set_yticks([])
# ax12.text(s='Same',x=np.mean([x for x in ax12.get_xlim()]),y=np.median([x for x in ax12.get_ylim()]),
# va='center',ha='center',fontsize=12)
# ax12.text(s='More\nOften',x=0.5,y=0.74,
# va='top',ha='center',fontsize=12)
ax12.text(s='+3σ',x=0.5,y=3-1/14*3,
va='center',ha='center',fontsize=12)
ax12.text(s='+2σ',x=0.5,y=2-1/14*2,
va='center',ha='center',fontsize=12)
ax12.text(s='+1σ',x=0.5,y=1-1/14*1,
va='center',ha='center',fontsize=12)
ax12.text(s='±0σ',x=0.5,y=0,
va='center',ha='center',fontsize=12)
ax12.text(s='-1σ',x=0.5,y=-1-1/14*-1,
va='center',ha='center',fontsize=12)
ax12.text(s='-2σ',x=0.5,y=-2-1/14*-2,
va='center',ha='center',fontsize=12)
ax12.text(s='-3σ',x=0.5,y=-3-1/14*-3,
va='center',ha='center',fontsize=12)
# # ax12.text(s='Less\nOften',x=0.5,y=0.26,
# # va='bottom',ha='center',fontsize=12)
ax01.text(s=f"{df_batter_2023['batter_name'].values[0]}'s 2023 Batted Ball Tendencies",
x=0.5,
y=0.8,va='top',ha='center',fontsize=20)
ax01.text(s=f"(Compared to rest of MLB)",
x=0.5,
y=0.3,va='top',ha='center',fontsize=16)
#plt.show()
@output
@render.plot(alt="hex_plot")
@reactive.event(input.go, ignore_none=False)
def hex_plot():
if input.batter_id() is "":
fig = plt.figure(figsize=(12, 12))
fig.text(s='Please Select a Batter',x=0.5,y=0.5)
return
batter_select_id = int(input.batter_id())
# batter_select_name = 'Edouard Julien'
quant = int(input.quant())/100
df_batter_og = df_2023_bip_train[df_2023_bip_train['batter_id']==batter_select_id]
# df_batter_og = df_2023_bip_train[df_2023_bip_train['batter_name']==batter_select_name]
df_batter = df_batter_og[df_batter_og['launch_speed'] >= df_batter_og['launch_speed'].quantile(quant)]
# df_batter_best_speed = df_batter['launch_speed'].mean().round()
# df_bip_league = df_2023_bip_train[df_2023_bip_train['launch_speed'] >= df_2023_bip_train['launch_speed'].quantile(quant)]
import pandas as pd
import numpy as np
# Create grid coordinates
#x = np.arange(30, 121,1 )
y_b = np.arange(df_batter['launch_angle'].median()-df_batter['launch_angle'].std(),
df_batter['launch_angle'].median()+df_batter['launch_angle'].std(),1 )
z_b = np.arange(df_batter['h_la'].median()-df_batter['h_la'].std(),
df_batter['h_la'].median()+df_batter['h_la'].std(),1 )
# Create a meshgrid
Y_b, Z_b = np.meshgrid( y_b,z_b, indexing='ij')
# Flatten the meshgrid to get x and y coordinates
y_flat_b = Y_b.flatten()
z_flat_b = Z_b.flatten()
# Create a DataFrame
df_batter_base = pd.DataFrame({'launch_angle': y_flat_b,'h_la':z_flat_b,'c':[0]*len(y_flat_b)})
# df_batter_base['y_pred'] = [sum(x) for x in model.predict_proba(df_batter_base[features]) * ([0,1,2,3,4])]
from matplotlib.gridspec import GridSpec
# fig,ax = plt.subplots(figsize=(12, 12),dpi=150)
fig = plt.figure(figsize=(12,12))
gs = GridSpec(4, 3, height_ratios=[0.5,10,1.5,0.2], width_ratios=[0.05,0.9,0.05])
axheader = fig.add_subplot(gs[0, :])
ax10 = fig.add_subplot(gs[1, 0])
ax = fig.add_subplot(gs[1, 1]) # Subplot at the top-right position
ax12 = fig.add_subplot(gs[1, 2])
ax2_ = fig.add_subplot(gs[2, :])
axfooter1 = fig.add_subplot(gs[-1, :])
axheader.axis('off')
ax10.axis('off')
ax12.axis('off')
ax2_.axis('off')
axfooter1.axis('off')
extents = [-45,45,-30,60]
def hexLines(a=None,i=None,off=[0,0]):
'''regular hexagon segment lines as `(xy1,xy2)` in clockwise
order with points in line sorted top to bottom
for irregular hexagon pass both `a` (vertical) and `i` (horizontal)'''
if a is None: a = 2 / np.sqrt(3) * i;
if i is None: i = np.sqrt(3) / 2 * a;
h = a / 2
xy = np.array([ [ [ 0, a], [ i, h] ],
[ [ i, h], [ i,-h] ],
[ [ i,-h], [ 0,-a] ],
[ [-i,-h], [ 0,-a] ], #flipped
[ [-i, h], [-i,-h] ], #flipped
[ [ 0, a], [-i, h] ] #flipped
])
return xy+off;
h = ax.hexbin(x=df_batter_base['h_la'],
y=df_batter_base['launch_angle'],
gridsize=25,
edgecolors='k',
extent=extents,mincnt=1,lw=2,zorder=-3,)
# cfg = {**cfg,'vmin':h.get_clim()[0], 'vmax':h.get_clim()[1]}
# plt.hexbin( ec="black" ,lw=6,zorder=4,mincnt=2,**cfg,alpha=0.1)
# plt.hexbin( ec="#ffffff",lw=1,zorder=5,mincnt=2,**cfg,alpha=0.1)
ax.hexbin(x=df[(df['launch_angle']>=-30)&(df['launch_angle']<=60)&(df['launch_speed']>=df_batter['launch_speed'].median())&(df['launch_speed']<=df_batter['launch_speed'].max())]['h_la'],
y=df[(df['launch_angle']>=-30)&(df['launch_angle']<=60)&(df['launch_speed']>=df_batter['launch_speed'].median())&(df['launch_speed']<=df_batter['launch_speed'].max())]['launch_angle'],
C=df[(df['launch_angle']>=-30)&(df['launch_angle']<=60)&(df['launch_speed']>=df_batter['launch_speed'].median())&(df['launch_speed']<=df_batter['launch_speed'].max())]['y_pred'],
gridsize=25,
vmin=0,
vmax=4,
cmap=cmap_hue2,
extent=extents,zorder=-3)
# Get the counts and centers of the hexagons
counts = ax.hexbin(x=df[(df['launch_angle']>=-30)&(df['launch_angle']<=60)&(df['launch_speed']>=df_batter['launch_speed'].median())&(df['launch_speed']<=df_batter['launch_speed'].max())]['h_la'],
y=df[(df['launch_angle']>=-30)&(df['launch_angle']<=60)&(df['launch_speed']>=df_batter['launch_speed'].median())&(df['launch_speed']<=df_batter['launch_speed'].max())]['launch_angle'],
C=df[(df['launch_angle']>=-30)&(df['launch_angle']<=60)&(df['launch_speed']>=df_batter['launch_speed'].median())&(df['launch_speed']<=df_batter['launch_speed'].max())]['y_pred'],
gridsize=25,
vmin=0,
vmax=4,
cmap=cmap_hue2,
extent=extents).get_array()
bin_centers = ax.hexbin(x=df[(df['launch_angle']>=-30)&(df['launch_angle']<=60)&(df['launch_speed']>=df_batter['launch_speed'].median())&(df['launch_speed']<=df_batter['launch_speed'].max())]['h_la'],
y=df[(df['launch_angle']>=-30)&(df['launch_angle']<=60)&(df['launch_speed']>=df_batter['launch_speed'].median())&(df['launch_speed']<=df_batter['launch_speed'].max())]['launch_angle'],
C=df[(df['launch_angle']>=-30)&(df['launch_angle']<=60)&(df['launch_speed']>=df_batter['launch_speed'].median())&(df['launch_speed']<=df_batter['launch_speed'].max())]['y_pred'],
gridsize=25,
vmin=0,
vmax=4,
cmap=cmap_hue2,
extent=extents).get_offsets()
# Add text with the values of "C" to each hexagon
for count, (x, y) in zip(counts, bin_centers):
if count >= 1:
ax.text(x, y, f'{count:.1f}', color='black', ha='center', va='center',fontsize=7)
#get hexagon centers that should be highlighted
verts = h.get_offsets()
cnts = h.get_array()
highl = verts[cnts > .5*cnts.max()]
#create hexagon lines
a = ((verts[0,1]-verts[1,1])/3).round(6)
i = ((verts[1:,0]-verts[:-1,0])/2).round(6)
i = i[i>0][0]
lines = np.concatenate([hexLines(a,i,off) for off in highl])
#select contour lines and draw
uls,c = np.unique(lines.round(4),axis=0,return_counts=True)
for l in uls[c==1]: ax.plot(*l.transpose(),'w-',lw=2,scalex=False,scaley=False,color=colour_palette[1],zorder=100)
# Plot filled hexagons
for hc in highl:
hx = hc[0] + np.array([0, i, i, 0, -i, -i])
hy = hc[1] + np.array([a, a/2, -a/2, -a, -a/2, a/2])
ax.fill(hx, hy, color=colour_palette[1], alpha=0.15, edgecolor=None) # Adjust color and alpha as needed
# # Create grid coordinates
# #x = np.arange(30, 121,1 )
# y_b = np.arange(df_bip_league['launch_angle'].median()-df_bip_league['launch_angle'].std(),
# df_bip_league['launch_angle'].median()+df_bip_league['launch_angle'].std(),1 )
# z_b = np.arange(df_bip_league['h_la'].median()-df_bip_league['h_la'].std(),
# df_bip_league['h_la'].median()+df_bip_league['h_la'].std(),1 )
# # Create a meshgrid
# Y_b, Z_b = np.meshgrid( y_b,z_b, indexing='ij')
# # Flatten the meshgrid to get x and y coordinates
# y_flat_b = Y_b.flatten()
# z_flat_b = Z_b.flatten()
# # Create a DataFrame
# df_league_base = pd.DataFrame({'launch_angle': y_flat_b,'h_la':z_flat_b,'c':[0]*len(y_flat_b)})
# h_league = ax.hexbin(x=df_league_base['h_la'],
# y=df_league_base['launch_angle'],
# gridsize=25,
# edgecolors=colour_palette[1],
# extent=extents,mincnt=1,lw=2,zorder=-3,)
# #get hexagon centers that should be highlighted
# verts = h_league.get_offsets()
# cnts = h_league.get_array()
# highl = verts[cnts > .5*cnts.max()]
# #create hexagon lines
# a = ((verts[0,1]-verts[1,1])/3).round(6)
# i = ((verts[1:,0]-verts[:-1,0])/2).round(6)
# i = i[i>0][0]
# lines = np.concatenate([hexLines(a,i,off) for off in highl])
# #select contour lines and draw
# uls,c = np.unique(lines.round(4),axis=0,return_counts=True)
# for l in uls[c==1]: ax.plot(*l.transpose(),'w-',lw=2,scalex=False,scaley=False,color=colour_palette[3],zorder=99)
axheader.text(s=f"{df_batter['batter_name'].values[0]} - {int(quant*100)}th% EV and Greater Batted Ball Tendencies",x=0.5,y=0.2,fontsize=20,ha='center',va='bottom')
axheader.text(s=f"2023 Season",x=0.5,y=-0.1,fontsize=14,ha='center',va='top')
ax.set_xlabel(f"Horizontal Spray Angle (°)",fontsize=12)
ax.set_ylabel(f"Vertical Launch Angle (°)",fontsize=12)
ax2_.text(x=0.5,
y=0.0,
s="Notes:\n" \
f"- {int(quant*100)}th% EV and Greater BBE is defined as a batter's top {100 - int(quant*100)}% hardest hit BBE\n" \
f"- Colour Scale and Number Labels Represents the Expected Total Bases for a batter's range of Best Speeds\n" \
f"- Shaded Area Represents the 2-D Region bounded by ±1σ Launch Angle and Horizontal Spray Angle on batter's Best Speed BBE\n"\
f"- {df_batter['batter_name'].values[0]} {int(quant*100)}th% EV and Greater BBE Range from {df_batter['launch_speed'].min():.0f} to {df_batter['launch_speed'].max():.0f} mph ({len(df_batter)} BBE)\n"\
f"- Positive Horizontal Spray Angle Represents a BBE hit in same direction as batter handedness (i.e. Pulled)" ,
fontsize=11,
fontstyle='oblique',
va='bottom',
ha='center',
bbox=dict(facecolor='white', edgecolor='black'),ma='left')
axfooter1.text(0.05, 0.5, "By: Thomas Nestico\n @TJStats",ha='left', va='bottom',fontsize=12)
axfooter1.text(0.95, 0.5, "Data: MLB",ha='right', va='bottom',fontsize=12)
if df_batter['batter_hand'].values[0] == 'R':
ax.invert_xaxis()
ax.grid(False)
ax.axis('equal')
# Adjusting subplot to center it within the figure
fig.subplots_adjust(left=0.01, right=0.99, top=0.975, bottom=0.025)
#ax.text(f"Vertical Spray Angle (°)")
@output
@render.plot(alt="roll_plot")
@reactive.event(input.go, ignore_none=False)
def roll_plot():
# player_select = 'Nolan Gorman'
# player_select_full =player_select
if input.batter_id() is "":
fig = plt.figure(figsize=(12, 12))
fig.text(s='Please Select a Batter',x=0.5,y=0.5)
return
# df_will = df_model_2023[df_model_2023.batter_name == player_select].sort_values(by=['game_date','start_time'])
# df_will = df_will[df_will['is_swing'] != 1]
batter_select_id = int(input.batter_id())
# batter_select_name = 'Edouard Julien'
df_batter_og = df_2023_bip_train[df_2023_bip_train['batter_id']==batter_select_id]
batter_select_name = df_batter_og['batter_name'].values[0]
win = min(int(input.rolling_window()),len(df_batter_og))
df_2023_output = df_2023_output_copy[df_2023_output_copy['bip'] >= win]
sns.set_theme(style="whitegrid", palette="pastel")
#fig, ax = plt.subplots(1, 1, figsize=(10, 10),dpi=300)
from matplotlib.gridspec import GridSpec
# fig,ax = plt.subplots(figsize=(12, 12),dpi=150)
fig = plt.figure(figsize=(12,12))
gs = GridSpec(3, 3, height_ratios=[0.3,10,0.2], width_ratios=[0.01,2,0.01])
axheader = fig.add_subplot(gs[0, :])
ax10 = fig.add_subplot(gs[1, 0])
ax = fig.add_subplot(gs[1, 1]) # Subplot at the top-right position
ax12 = fig.add_subplot(gs[1, 2])
axfooter1 = fig.add_subplot(gs[-1, :])
axheader.axis('off')
ax10.axis('off')
ax12.axis('off')
axfooter1.axis('off')
sns.lineplot( x= range(win,len(df_batter_og.y_pred.rolling(window=win).mean())+1),
y= df_batter_og.y_pred.rolling(window=win).mean().dropna(),
color=colour_palette[0],linewidth=2,ax=ax)
ax.hlines(y=df_batter_og.y_pred.mean(),xmin=win,xmax=len(df_batter_og),color=colour_palette[0],linestyle='--',
label=f'{batter_select_name} Average: {df_batter_og.y_pred.mean():.3f} xSLGCON ({p.ordinal(int(np.around(percentileofscore(df_2023_output["xslgcon"],df_batter_og.y_pred.mean(), kind="strict"))))} Percentile)')
# ax.hlines(y=df_model_2023.y_pred_no_swing.std()*100,xmin=win,xmax=len(df_will))
# sns.scatterplot( x= [976],
# y= df_will.y_pred.rolling(window=win).mean().min()*100,
# color=colour_palette[0],linewidth=2,ax=ax,zorder=100,s=100,edgecolor=colour_palette[7])
ax.hlines(y=df_2023_bip_train['y_pred'].mean(),xmin=win,xmax=len(df_batter_og),color=colour_palette[1],linestyle='-.',alpha=1,
label = f'MLB Average: {df_2023_bip_train["y_pred"].mean():.3f} xSLGCON')
ax.legend()
hard_hit_dates = [df_2023_output['xslgcon'].quantile(0.9),
df_2023_output['xslgcon'].quantile(0.75),
df_2023_output['xslgcon'].quantile(0.25),
df_2023_output['xslgcon'].quantile(0.1)]
ax.hlines(y=df_2023_output['xslgcon'].quantile(0.9),xmin=win,xmax=len(df_batter_og),color=colour_palette[2],linestyle='dotted',alpha=0.5,zorder=1)
ax.hlines(y=df_2023_output['xslgcon'].quantile(0.75),xmin=win,xmax=len(df_batter_og),color=colour_palette[3],linestyle='dotted',alpha=0.5,zorder=1)
ax.hlines(y=df_2023_output['xslgcon'].quantile(0.25),xmin=win,xmax=len(df_batter_og),color=colour_palette[4],linestyle='dotted',alpha=0.5,zorder=1)
ax.hlines(y=df_2023_output['xslgcon'].quantile(0.1),xmin=win,xmax=len(df_batter_og),color=colour_palette[5],linestyle='dotted',alpha=0.5,zorder=1)
hard_hit_text = ['90th %','75th %','25th %','10th %']
for i, x in enumerate(hard_hit_dates):
ax.text(min(win+win/50,win+win+5), x ,hard_hit_text[i], rotation=0,va='center', ha='left',
bbox=dict(facecolor='white',alpha=0.7, edgecolor=colour_palette[2+i], pad=2),zorder=11)
# # Annotate with an arrow
# ax.annotate('June 6, 2023\nSeason Worst Decision Value', xy=(976, df_will.y_pred.rolling(window=win).mean().min()*100-0.03),
# xytext=(976 - 150, df_will.y_pred.rolling(window=win).mean().min()*100 - 0.2),
# arrowprops=dict(facecolor=colour_palette[7], shrink=0.01),zorder=150,fontsize=10,
# bbox=dict(facecolor='white', edgecolor='black'),va='top')
ax.set_xlim(win,len(df_batter_og))
# ax.set_ylim(0.2,max(1,))
ax.set_yticks([0.2,0.3,0.4,0.5,0.6,0.7,0.8,0.9,1])
ax.set_xlabel('Balls In Play')
ax.set_ylabel('Expected Total Bases per Ball In Play (xSLGCON)')
from matplotlib.ticker import FormatStrFormatter
ax.yaxis.set_major_formatter(FormatStrFormatter('%.3f'))
axheader.text(s=f'{batter_select_name} - MLB - {win} Rolling BIP Expected Slugging on Contact (xSLGCON)',x=0.5,y=-0.5,ha='center',va='bottom',fontsize=14)
axfooter1.text(.05, 0.2, "By: Thomas Nestico",ha='left', va='bottom',fontsize=12)
axfooter1.text(0.95, 0.2, "Data: MLB",ha='right', va='bottom',fontsize=12)
fig.subplots_adjust(left=0.01, right=0.99, top=0.98, bottom=0.02)
@output
@render.plot(alt="A histogram")
@reactive.event(input.go, ignore_none=False)
def ev_plot():
data_df = df_2023_bip_train[df_2023_bip_train.batter_id==int(input.batter_id())]
#pitch_list = df_2023_small.pitch_type.unique()
sns.set_theme(style="whitegrid", palette="pastel")
fig, ax = plt.subplots(1, 1, figsize=(10, 10))
# if input.plot_id() == 'dist':
# sns.histplot(x=data_df.launch_angle,y=data_df.launch_speed,cbar=colour_palette,binwidth=(5,2.5),ax=ax,cbar_kws=dict(shrink=.75,label='Count'),binrange=(
# (math.floor((min(data_df.launch_angle.dropna())/5))*5,math.ceil((max(data_df.launch_angle.dropna())/5))*5),(math.floor((min(data_df.launch_speed.dropna())/5))*5,math.ceil((max(data_df.launch_speed.dropna())/5))*5)))
sns.scatterplot(x=data_df.launch_angle,y=data_df.launch_speed,color=colour_palette[1])
ax.set_xlim(math.floor((min(data_df.launch_angle.dropna())/10))*10,math.ceil((max(data_df.launch_angle.dropna())/10))*10)
#ticks=np.arange(revels.values.min(),revels.values.max()+1 )
sns.lineplot(x=angle_ev_list_df.launch_angle,y=angle_ev_list_df.launch_speed,color=colour_palette[0])
ax.vlines(x=angle_ev_list_df.launch_angle[0],ymin=angle_ev_list_df.launch_speed[0],ymax=ev_ranges[-1],color=colour_palette[0])
ax.vlines(x=angle_ev_list_df.launch_angle[len(angle_ev_list_df)-1],ymin=angle_ev_list_df.launch_speed[len(angle_ev_list_df)-1],ymax=ev_ranges[-1],color=colour_palette[0])
groundball = f'{sum(data_df.launch_angle.dropna()<=10)/len(data_df.launch_angle.dropna()):.1%}'
linedrive = f'{sum((data_df.launch_angle.dropna()<=25) & (data_df.launch_angle.dropna()>10))/len(data_df.launch_angle.dropna()):.1%}'
flyball = f'{sum((data_df.launch_angle.dropna()<=50) & (data_df.launch_angle.dropna()>25))/len(data_df.launch_angle.dropna()):.1%}'
popup = f'{sum(data_df.launch_angle.dropna()>50)/len(data_df.launch_angle.dropna()):.1%}'
percentages_list = [groundball,linedrive,flyball,popup]
hard_hit_percent = f'{sum(data_df.launch_speed.dropna()>=95)/len(data_df.launch_speed.dropna()):.1%}'
barrel_percentage = f'{data_df.barrel.dropna().sum()/len(data_df.launch_angle.dropna()):.1%}'
plt.text(x=27, y=math.ceil((max(data_df.launch_speed.dropna())/5))*5+5-3, s=f'Barrel% {barrel_percentage}',ha='left',bbox=dict(facecolor='white',alpha=0.8, edgecolor=colour_palette[4], pad=5))
sample_dates = np.array([math.floor((min(data_df.launch_angle.dropna())/10))*10,10,25,50])
sample_text = [f'Groundball ({groundball})',f'Line Drive ({linedrive})',f'Fly Ball ({flyball})',f'Pop-up ({popup})']
hard_hit_dates = [95]
hard_hit_text = [f'Hard Hit% ({hard_hit_percent})']
#sample_dates = mdates.date2num(sample_dates)
plt.hlines(y=hard_hit_dates,xmin=math.floor((min(data_df.launch_angle.dropna())/10))*10, xmax=math.ceil((max(data_df.launch_angle.dropna())/10))*10, color = colour_palette[4],linestyles='--')
plt.vlines(x=sample_dates, ymin=0, ymax=130, color = colour_palette[3],linestyles='--')
# ax.vlines(x=10,ymin=0,ymax=ev_ranges[-1],color=colour_palette[3],linestyles='--')
# ax.vlines(x=25,ymin=0,ymax=ev_ranges[-1],color=colour_palette[3],linestyles='--')
# ax.vlines(x=50,ymin=0,ymax=ev_ranges[-1],color=colour_palette[3],linestyles='--')
for i, x in enumerate(hard_hit_dates):
text(math.ceil((max(data_df.launch_angle.dropna())/10))*10-2.5, x+1.25,hard_hit_text[i], rotation=0, ha='right',
bbox=dict(facecolor='white',alpha=0.5, edgecolor=colour_palette[4], pad=5))
for i, x in enumerate(sample_dates):
text(x+0.75, (math.floor((min(data_df.launch_speed.dropna())/5))*5)+1,sample_text[i], rotation=90, verticalalignment='bottom',
bbox=dict(facecolor='white',alpha=0.5, edgecolor=colour_palette[3], pad=5))
#ax.vlines(x=math.floor((min(data_df.launch_angle.dropna())/10))*10+1,ymin=0,ymax=ev_ranges[-1],color=colour_palette[3],linestyles='--')
ax.set_xlim((math.floor((min(data_df.launch_angle.dropna())/10))*10,math.ceil((max(data_df.launch_angle.dropna())/10))*10))
ax.set_ylim((math.floor((min(data_df.launch_speed.dropna())/5))*5,math.ceil((max(data_df.launch_speed.dropna())/5))*5+5))
# ax.set_xlim(-90,90)
# ax.set_ylim(0,125)
ax.set_title(f'MLB - {data_df.batter_name.unique()[0]} Launch Angle vs EV Plot', fontsize=18,fontname='Century Gothic',)
#vals = ax.get_yticks()
ax.set_xlabel('Launch Angle', fontsize=16,fontname='Century Gothic')
ax.set_ylabel('Exit Velocity', fontsize=16,fontname='Century Gothic')
ax.fill_between(angle_ev_list_df.launch_angle, 130, angle_ev_list_df.launch_speed, interpolate=True, color=colour_palette[3],alpha=0.1,label='Barrel')
#fig.colorbar(plot_dist, ax=ax)
#fig.colorbar(plot_dist)
#fig.axes[0].invert_yaxis()
ax.legend(fontsize='16',loc='upper left')
fig.text(x=0.03,y=0.02,s='By: @TJStats')
fig.text(x=1-0.03,y=0.02,s='Data: MLB',ha='right')
# fig.text(x=0.25,y=0.02,s='Data: MLB',ha='right')
# fig.text(x=0.25,y=0.02,s='Data: MLB',ha='right')
# fig.text(x=0.25,y=0.02,s='Data: MLB',ha='right')
#cbar = plt.colorbar()
#fig.subplots_adjust(wspace=.02, hspace=.02)
#ax.xaxis.set_major_formatter(FuncFormatter(lambda x, _: int(x)))
fig.set_facecolor('white')
fig.tight_layout()
spray = App(ui.page_fluid(
ui.tags.base(href=base_url),
ui.tags.div(
{"style": "width:90%;margin: 0 auto;max-width: 1600px;"},
ui.tags.style(
"""
h4 {
margin-top: 1em;font-size:35px;
}
h2{
font-size:25px;
}
"""
),
shinyswatch.theme.simplex(),
ui.tags.h4("TJStats"),
ui.tags.i("Baseball Analytics and Visualizations"),
ui.markdown("""<a href='https://www.patreon.com/tj_stats'>Support me on Patreon for Access to 2024 Apps</a><sup>1</sup>"""),
ui.navset_tab(
ui.nav_control(
ui.a(
"Home",
href="home/"
),
),
ui.nav_menu(
"Batter Charts",
ui.nav_control(
ui.a(
"Batting Rolling",
href="rolling_batter/"
),
ui.a(
"Spray & Damage",
href="spray/"
),
ui.a(
"Decision Value",
href="decision_value/"
),
# ui.a(
# "Damage Model",
# href="damage_model/"
# ),
ui.a(
"Batter Scatter",
href="batter_scatter/"
),
# ui.a(
# "EV vs LA Plot",
# href="ev_angle/"
# ),
ui.a(
"Statcast Compare",
href="statcast_compare/"
)
),
),
ui.nav_menu(
"Pitcher Charts",
ui.nav_control(
ui.a(
"Pitcher Rolling",
href="rolling_pitcher/"
),
ui.a(
"Pitcher Summary",
href="pitching_summary_graphic_new/"
),
ui.a(
"Pitcher Scatter",
href="pitcher_scatter/"
)
),
)),ui.row(
ui.layout_sidebar(
ui.panel_sidebar(
ui.input_select("batter_id",
"Select Batter",
batter_dict,
width=1,
size=1,
selectize=True),
ui.input_action_button("go", "Generate",class_="btn-primary",
),
ui.input_action_button("update_ui", "Update UI",class_="btn-secondary",
)),
ui.page_navbar(
ui.nav("2023 vs MLB",
ui.output_plot('plot',
width='1000px',
height='1000px')),
ui.nav("Damage Hex",
ui.output_plot('hex_plot',
width='1200px',
height='1200px')),
ui.nav("Damage Roll",
ui.output_plot('roll_plot',
width='1200px',
height='1200px')),
ui.nav("EV vs LA",
ui.output_plot("ev_plot",height = "1000px",width="1000px")),id="my_tabs",
)
)),)),server)