nesticot's picture
Update app.py
742ad72 verified
raw
history blame
21 kB
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import seaborn as sns
#import pitch_summary_functions as psf
import requests
import matplotlib
from api_scraper import MLB_Scrape
import math
season = 2024
colour_palette = ['#FFB000','#648FFF','#785EF0',
'#DC267F','#FE6100','#3D1EB2','#894D80','#16AA02','#B5592B','#A3C1ED']
import datasets
from datasets import load_dataset
# from shiny import App, Inputs, Outputs, Session, reactive, render, req, ui
from shiny import ui, render, App
# ### Import Datasets
# dataset = load_dataset('nesticot/mlb_data', data_files=[f'mlb_pitch_data_{season}.csv',
# f'mlb_pitch_data_{season-1}.csv',
# f'mlb_pitch_data_{season-2}.csv',
# f'mlb_pitch_data_{season-3}.csv',
# f'mlb_pitch_data_{season-4}.csv' ])
### Import Datasets
dataset = load_dataset('nesticot/mlb_data', data_files=[f'aaa_pitch_data_{season}.csv' ])
dataset_train = dataset['train']
df_2024 = dataset_train.to_pandas().set_index(list(dataset_train.features.keys())[0]).reset_index(drop=True).drop_duplicates(subset=['play_id'],keep='last')
batter_dict_stat = { 'sweet_spot_percent':{'x_axis':'SweetSpot%','title':'SweetSpot%','flip_p':False,'decimal_format':'percent_1','percent_adjust':100},
'max_launch_speed':{'x_axis':'Max Exit Velocity','title':'Max Exit Velocity','flip_p':False,'decimal_format':'string_0','percent_adjust':1},
'launch_speed_90':{'x_axis':'90th Percentile EV','title':'90th Percentile EV','flip_p':False,'decimal_format':'string_0','percent_adjust':1},
'launch_speed':{'x_axis':'Exit Velocity','title':'Exit Velocity','flip_p':False,'decimal_format':'string_0','percent_adjust':1},
'launch_angle':{'x_axis':'Launch Angle','title':'Launch Angle','flip_p':False,'decimal_format':'string_0','percent_adjust':100},
'avg':{'x_axis':'AVG','title':'AVG','flip_p':False,'decimal_format':'string_3','percent_adjust':100},
'obp':{'x_axis':'OBP','title':'OBP','flip_p':False,'decimal_format':'string_3','percent_adjust':100},
'slg':{'x_axis':'SLG','title':'SLG','flip_p':False,'decimal_format':'string_3','percent_adjust':100},
'ops':{'x_axis':'OPS','title':'OPS','flip_p':False,'decimal_format':'string_3','percent_adjust':100},
'k_percent':{'x_axis':'K%','title':'K%','flip_p':True,'decimal_format':'percent_1','percent_adjust':100},
'bb_percent':{'x_axis':'BB%','title':'BB%','flip_p':False,'decimal_format':'percent_1','percent_adjust':100},
'bb_over_k_percent':{'x_axis':'BB/K','title':'BB/K','flip_p':False,'decimal_format':'string_1','percent_adjust':100},
'bb_minus_k_percent':{'x_axis':'BB%-K%','title':'BB%-K%','flip_p':False,'decimal_format':'percent_1','percent_adjust':100},
'csw_percent':{'x_axis':'CSW%','title':'CSW%','flip_p':True,'decimal_format':'percent_1','percent_adjust':100},
'woba_percent':{'x_axis':'wOBA','title':'wOBA','flip_p':False,'decimal_format':'string_3','percent_adjust':100},
'hard_hit_percent':{'x_axis':'HardHit%','title':'HardHit%','flip_p':False,'decimal_format':'percent_1','percent_adjust':100},
'barrel_percent':{'x_axis':'Barrel%','title':'Barrel%','flip_p':False,'decimal_format':'percent_1','percent_adjust':100},
'zone_contact_percent':{'x_axis':'Z-Contact%','title':'Z-Contact%','flip_p':False,'decimal_format':'percent_1','percent_adjust':100},
'zone_swing_percent':{'x_axis':'Z-Swing%','title':'Z-Swing%','flip_p':False,'decimal_format':'percent_1','percent_adjust':100},
'zone_percent':{'x_axis':'Zone%','title':'Zone%','flip_p':False,'decimal_format':'percent_1','percent_adjust':100},
'chase_percent':{'x_axis':'O-Swing%','title':'O-Swing%','flip_p':True,'decimal_format':'percent_1','percent_adjust':100},
'chase_contact':{'x_axis':'O-Contact%','title':'O-Contact%','flip_p':True,'decimal_format':'percent_1','percent_adjust':100},
'swing_percent':{'x_axis':'Swing%','title':'Swing%','flip_p':False,'decimal_format':'percent_1','percent_adjust':100},
'whiff_rate':{'x_axis':'Whiff%','title':'Whiff%','flip_p':True,'decimal_format':'percent_1','percent_adjust':100},
'swstr_rate':{'x_axis':'SwStr%','title':'SwStr%','flip_p':True,'decimal_format':'percent_1','percent_adjust':100},
}
batter_dict_stat_small = { 'sweet_spot_percent':'SweetSpot%',
'max_launch_speed':'Max Exit Velocity',
'launch_speed_90':'90th Percentile EV',
'launch_speed':'Exit Velocity',
'launch_angle':'Launch Angle',
'avg':'AVG',
'obp':'OBP',
'slg':'SLG',
'ops':'OPS',
'k_percent':'K%',
'bb_percent':'BB%',
'bb_over_k_percent':'BB/K',
'bb_minus_k_percent':'BB%-K%',
'csw_percent':'CSW%',
'woba_percent':'wOBA',
'hard_hit_percent':'HardHit%',
'barrel_percent':'Barrel%',
'zone_contact_percent':'Z-Contact%',
'zone_swing_percent':'Z-Swing%',
'zone_percent':'Zone%',
'chase_percent':'O-Swing%',
'chase_contact':'O-Contact%',
'swing_percent':'Swing%',
'whiff_rate':'Whiff%',
'swstr_rate':'SwStr%',
}
batter_dict = df_2024.set_index(['batter_id'])['batter_name'].to_dict()
# df_2024['game_date'] = pd.to_datetime(df_2024['game_date'])
colour_palette = ['#FFB000','#648FFF','#785EF0',
'#DC267F','#FE6100','#3D1EB2','#894D80','#16AA02','#B5592B','#A3C1ED']
level_dict = {'MLB':'MLB','AAA':'AAA','AA':'AA','A+':'A+','A':'A','ROK':'ROK'}
print('MLB TOP',df_2024.head(5))
import matplotlib.ticker as mtick
def decimal_format_assign(x):
if x['decimal_format'] == 'percent_1':
return mtick.PercentFormatter(1,decimals=1)
if x['decimal_format'] == 'string_3':
return mtick.FormatStrFormatter('%.3f')
if x['decimal_format'] == 'string_0':
return mtick.FormatStrFormatter('%.0f')
if x['decimal_format'] == 'string_1':
return mtick.FormatStrFormatter('%.1f')
from batting_update import df_update, df_update_summ, df_update_summ_avg,df_summ_changes
df_2024_update_copy = df_update(df_2024)
print('MLB TOP',df_2024_update_copy.head(5))
from adjustText import adjust_text
import seaborn as sns
def server(input,output,session):
#
@output
@render.plot(alt="A histogram")
@reactive.event(input.go, ignore_none=False)
def plot():
print('we made it here2')
# start_date_input = '2024-03-20'
# end_date_input = '2024-12-31'
df_2024_update = df_2024_update_copy[(df_2024_update_copy['game_date']>=str(input.date_range_id()[0]))&
(df_2024_update_copy['game_date']<=str(input.date_range_id()[1]))]
df_2024_update_summ = df_update_summ(df_2024_update)
df_2024_update_summ_changes = df_summ_changes(df_2024_update_summ)
print('MLB TOP UPDATE ',df_2024_update_copy.head(5))
sns.set_theme(style="whitegrid", palette="pastel")
#print('we made it here')
#print(data_df)
#data_df = data_df.sort_values(by='level').reset_index(drop=True)
# x_flip = batter_dict_stat[x_stat]['flip_p']
# y_flip = batter_dict_stat[y_stat]['flip_p']
# cbr_flip = batter_dict_stat[z_stat]['flip_p']
x_stat = input.stat_x()
y_stat = input.stat_y()
z_stat = input.stat_z()
x_flip = batter_dict_stat[x_stat]['flip_p']
y_flip = batter_dict_stat[y_stat]['flip_p']
cbr_flip = batter_dict_stat[z_stat]['flip_p']
level_id = 'MLB'
n_input = int(input.n())
n_age_input = 50
data_df = df_2024_update_summ.copy()
data_df = data_df[data_df['pa'] >= n_input].reset_index(drop=True)
data_df[x_stat+'_percent'] = data_df[x_stat].rank(pct=True,ascending=abs(x_flip-1))
data_df[y_stat+'_percent'] = data_df[y_stat].rank(pct=True,ascending=abs(y_flip-1))
data_df[z_stat+'_percent'] = data_df[z_stat].rank(pct=True,ascending=abs(cbr_flip-1))
fig, ax = plt.subplots(1, 1, figsize=(9, 9))
if cbr_flip:
cmap_hue = matplotlib.colors.LinearSegmentedColormap.from_list("", [colour_palette[0],colour_palette[3],colour_palette[1]])
norm = plt.Normalize(data_df[z_stat].min(), data_df[z_stat].max())
else:
cmap_hue = matplotlib.colors.LinearSegmentedColormap.from_list("", [colour_palette[1],colour_palette[3],colour_palette[0]])
norm = plt.Normalize(data_df[z_stat].min(), data_df[z_stat].max())
sm = plt.cm.ScalarMappable(cmap=cmap_hue, norm=norm)
print('we made it here')
scatter = sns.scatterplot(x = x_stat, y = y_stat, data=data_df, color = '#b3b3b3')
#ax.get_legend().remove()
scatter = sns.scatterplot(x = x_stat, y = y_stat, data=data_df, color = colour_palette[0],ax=ax,hue=z_stat,palette=cmap_hue)
sns.set_theme(style="whitegrid", palette="pastel")
fig.set_facecolor('#F0F0F0')
ax.set_facecolor('white')
print('we made it here')
# for i in range(0,len(pitch_group_unique)):
# data_df = elly_zone_df[elly_zone_df.pitch_group==pitch_group_unique[i]]
# len_df.append(len(data_df))
# sns.lineplot(x=range(1,len(data_df)+1),y=data_df.swings.rolling(window=rolling_window_input).sum()/data_df.pitches.rolling(window=rolling_window_input).sum(),color=colour_palette[i],linewidth=3,ax=ax,
# label=f'{pitch_group_unique[i]} (Season Average {float(data_df.swings.sum()/data_df.pitches.sum()):.1%})',zorder=i+10)
# ax.hlines(xmin=0,xmax=len(elly_zone_df),y=data_df.swings.sum()/data_df.pitches.sum(),color=colour_palette[i],linewidth=3,linestyle='-.',alpha=0.4,zorder=i)
x_min = input.n_percent_bot_x()
x_max = input.n_percent_top_x()
y_min = input.n_percent_bot_y()
y_max = input.n_percent_top_y()
# z_min = input.n_percent_bot_z()
# z_max = input.n_percent_top_z()
names = True
ts=[]
print(len(data_df))
if names:
for i in range(len(data_df)):
if (data_df[x_stat].values[i] < x_min or data_df[x_stat].values[i] > x_max ) \
and (data_df[y_stat].values[i] < y_min or data_df[y_stat].values[i] > y_max):
#or (str(data_df.batter_id[i]) in (input.player_id())):
# print(data_df.batter[i])
# ax.annotate(data_df.batter[i], xy=((data_df[x_stat][i])+0.025/batter_dict_stat[x_stat]['percent_adjust'], data_df[y_stat][i]+0.01/batter_dict_stat[x_stat]['percent_adjust']), xytext=(-20,20),
# textcoords='offset points', ha='center', va='bottom',fontsize=7,
# bbox=dict(boxstyle='round,pad=0', fc=colour_palette[6], alpha=0.0),
# arrowprops=dict(arrowstyle='->', connectionstyle="angle,angleA=-90,angleB=-10,rad=2",
# color=colour_palette[8]))
#if data_df['batter'][i] != 'Jo Adell':
# ax.annotate(data_df.batter[i], (data_df[x_stat][i]-len(data_df.batter[i])*0.00025, data_df[y_stat][i]+0.001),fontsize=8)
ts.append(ax.text(data_df[x_stat][i], data_df[y_stat][i], data_df.batter_name[i],fontsize=8))
ax.hlines(xmin=(math.floor((data_df[x_stat].min()*batter_dict_stat[x_stat]['percent_adjust']-0.01)/5))*5/batter_dict_stat[x_stat]['percent_adjust'],
xmax= (math.ceil((data_df[x_stat].max()*batter_dict_stat[x_stat]['percent_adjust']+0.01)/5))*5/batter_dict_stat[x_stat]['percent_adjust'],
y=data_df[y_stat].mean(),color='gray',linewidth=3,linestyle='dotted',alpha=0.4)
print('we made it here')
ax.vlines(ymin=(math.floor((data_df[y_stat].min()*batter_dict_stat[y_stat]['percent_adjust']-0.01)/5))*5/batter_dict_stat[y_stat]['percent_adjust'],
ymax= (math.ceil((data_df[y_stat].max()*batter_dict_stat[y_stat]['percent_adjust']+0.01)/5))*5/batter_dict_stat[y_stat]['percent_adjust'],
x=data_df[x_stat].mean(),color='gray',linewidth=3,linestyle='dotted',alpha=0.4)
print(data_df[x_stat].min())
print(batter_dict_stat[x_stat]['percent_adjust'])
print((math.floor((data_df[x_stat].min()*batter_dict_stat[x_stat]['percent_adjust']-0.01)/5))*5/batter_dict_stat[x_stat]['percent_adjust'])
ax.set_xlim((math.floor((data_df[x_stat].min()*batter_dict_stat[x_stat]['percent_adjust'])/5))*5/batter_dict_stat[x_stat]['percent_adjust'],
(math.ceil((data_df[x_stat].max()*batter_dict_stat[x_stat]['percent_adjust'])/5))*5/batter_dict_stat[x_stat]['percent_adjust'])
ax.set_ylim((math.floor((data_df[y_stat].min()*batter_dict_stat[y_stat]['percent_adjust'])/5))*5/batter_dict_stat[y_stat]['percent_adjust'],
(math.ceil((data_df[y_stat].max()*batter_dict_stat[y_stat]['percent_adjust'])/5))*5/batter_dict_stat[y_stat]['percent_adjust'])
#title_level = str([x .strip("\'")for x in level_id]).strip('[').strip(']').replace("'",'')
title_level = level_id
if title_level == 'AAA, AA, A+, A':
title_level='MiLB'
# #title_level = level_id[0]
# if input.n_age() >= 50:
# title_spot = f'{title_level} Batter {batter_dict_stat[y_stat]["title"]} vs {batter_dict_stat[x_stat]["title"]} (min. {n_input} PA)'
else:
title_spot = f'{title_level} Batter - {season} - {batter_dict_stat[y_stat]["title"]} vs {batter_dict_stat[x_stat]["title"]} (min. {n_input} PA)'
ax.set_title(title_spot, fontsize=24/(len(title_spot)*0.03),fontname='Century Gothic')
# #vals = ax.get_yticks()
ax.set_xlabel(batter_dict_stat[x_stat]['x_axis'], fontsize=16,fontname='Century Gothic')
ax.set_ylabel(batter_dict_stat[y_stat]['x_axis'], fontsize=16,fontname='Century Gothic')
# if input.group_level():
# ax.get_legend().remove()
# if not input.group_level():
# if len(level_id) > 1:
# h,l = scatter.get_legend_handles_labels()
# l[-(len(level_id)+1)] = 'Level'
# ax.legend(h[-(len(level_id)+1):],l[-(len(level_id)+1):], borderaxespad=0.1,loc=0)
# else:
# ax.get_legend().remove()
#plt.show(g)
# ax.figure.colorbar(sm, ax=ax)
cbar = ax.figure.colorbar(sm, ax=ax,format=decimal_format_assign(x=batter_dict_stat[z_stat]),orientation='vertical',aspect=30)
cbar.set_label(batter_dict_stat[z_stat]['x_axis'])
#fig.axes[0].invert_yaxis()
print('we made it here5')
fig.subplots_adjust(wspace=.02, hspace=.02)
# ax.xaxis.set_major_formatter(FuncFormatter(lambda x, _: int(x)))
#ax.set_yticks([0,0.1,0.2,0.3,0.4,0.5])
# fig.colorbar(plot_dist, ax=ax)
# fig.colorbar(plot_dist)
if batter_dict_stat[x_stat]['flip_p']:
fig.axes[0].invert_xaxis()
if batter_dict_stat[y_stat]['flip_p']:
fig.axes[0].invert_yaxis()
# ax.xaxis.set_major_formatter(mtick.PercentFormatter(1,decimals=0))
# ax.yaxis.set_major_formatter(mtick.PercentFormatter(1))
print('we made it here6')
ax.xaxis.set_major_formatter(decimal_format_assign(x=batter_dict_stat[x_stat]))
ax.yaxis.set_major_formatter(decimal_format_assign(x=batter_dict_stat[y_stat]))
print('we made it here7')
# ax.text(0.5, 0.5, '/u/tomstoms', transform=ax.transAxes,
# fontsize=60, color='gray', alpha=0.075,
# ha='center', va='center', rotation=45)
print(ts)
if len(ts) > 0:
adjust_text(ts,
arrowprops=dict(arrowstyle="-", color=colour_palette[4], lw=1),ax=ax)
#ax.legend(fontsize='16')
ax.get_legend().remove()
fig.text(x=0.03,y=0.02,s='By: @TJStats',fontname='Century Gothic')
fig.text(x=1-0.03,y=0.02,s='Data: MLB',ha='right',fontname='Century Gothic')
fig.tight_layout()
import shinyswatch
# app_ui = ui.page_fluid(ui.output_plot("plot",height = "1000px",width="1000px"))
# app = App(ui.page_fluid(ui.output_plot("plot",height = "1000px",width="1000px")),server)
# app = App(app_ui, server)
app = App(ui.page_fluid(
# ui.tags.base(href=base_url),
ui.tags.div(
{"style": "width:90%;margin: 0 auto;max-width: 1600px;"},
ui.tags.style(
"""
h4 {
margin-top: 1em;font-size:35px;
}
h2{
font-size:25px;
}
"""
),
shinyswatch.theme.simplex(),
ui.tags.h4("TJStats"),
ui.tags.i("Baseball Analytics and Visualizations"),
# ui.markdown("""<a href='https://www.patreon.com/tj_stats'>Support me on Patreon for Access to 2024 Apps</a><sup>1</sup>"""),
ui.row(
ui.layout_sidebar(
ui.panel_sidebar(
#ui.input_select("id", "Select Batter",batter_dict,selected=675911,width=1,size=1),
ui.row(
#ui.column(4,ui.input_select("level_id", "Select Level",level_dict,width=1,size=1,multiple=True,selected='MLB',selectize=True),),
ui.input_date_range("date_range_id", "Date range input",start = df_2024.game_date.min(),
end = df_2024.game_date.max(),width=2,min=df_2024.game_date.min(),
max=df_2024.game_date.max())),
#ui.column(4,ui.input_select("team_id", "Select Team",team_list,width=1,size=1,multiple=True,selected='All',selectize=True),),
#ui.column(4,ui.input_select("position_id", "Select Position",position_list,width=1,size=1,selected='All',multiple=True,selectize=True))),
ui.row(
ui.column(6,ui.input_numeric("n", "Minimum PA", value=100))),
#ui.column(6,ui.input_numeric("n_age", "Maximum Age", value=50))),
ui.row(
ui.column(4,ui.input_select("stat_x", "X-Axis",batter_dict_stat_small,selected='k_percent',width=1,size=1)),
ui.column(4,ui.input_select("stat_y", "Y-Axis",batter_dict_stat_small,selected='bb_percent',width=1,size=1)),
ui.column(4,ui.input_select("stat_z", "Colour-Bar Axis",batter_dict_stat_small,selected='bb_over_k_percent',width=1,size=1))),
ui.row(
ui.column(6,ui.input_numeric("n_percent_top_x", "Greater Than X", value=100)),
ui.column(6,ui.input_numeric("n_percent_bot_x", "Less Than X", value=0))),
ui.row(
ui.column(6,ui.input_numeric("n_percent_top_y", "Greater Than Y", value=100)),
ui.column(6,ui.input_numeric("n_percent_bot_y", "Less Than Y", value=0))),
# ui.row(
# ui.column(6,ui.input_numeric("n_percent_top_z", "Greater Than Z", value=100)),
# ui.column(6,ui.input_numeric("n_percent_bot_z", "Less Than Z", value=0))),
ui.input_select("player_id", "Label Player",batter_dict,width=1,size=1,multiple=True,selectize=True),
ui.row(
ui.input_switch("names", "Toggle Names"),
ui.input_switch("group_level", "Group Levels")),
ui.input_action_button("go", "Generate",class_="btn-primary"),
),
ui.panel_main(
ui.navset_tab(
# ui.nav("Raw Data",
# ui.output_data_frame("raw_table")),
# ui.nav("Season Summary",
# ui.output_plot('plot',
# width='2000px',
# height='2000px')),
ui.nav("MLB",
ui.output_plot("plot",height = "1000px",width="1000px"))
# ui.nav("AAA",
# ui.output_plot("plot_aaa",height = "1000px",width="1000px")),
# ui.nav("AA",
# ui.output_plot("plot_aa",height = "1000px",width="1000px")) ,
# ui.nav("A+",
# ui.output_plot("plot_ha",height = "1000px",width="1000px")),
# ui.nav("A",
# ui.output_plot("plot_a",height = "1000px",width="1000px"))
,id="my_tabs")))))),server)