Spaces:
Running
Running
import pandas as pd | |
import numpy as np | |
import matplotlib.pyplot as plt | |
import seaborn as sns | |
#import pitch_summary_functions as psf | |
import requests | |
import matplotlib | |
from api_scraper import MLB_Scrape | |
import math | |
season = 2024 | |
colour_palette = ['#FFB000','#648FFF','#785EF0', | |
'#DC267F','#FE6100','#3D1EB2','#894D80','#16AA02','#B5592B','#A3C1ED'] | |
import datasets | |
from datasets import load_dataset | |
# from shiny import App, Inputs, Outputs, Session, reactive, render, req, ui | |
from shiny import ui, render, App | |
# ### Import Datasets | |
# dataset = load_dataset('nesticot/mlb_data', data_files=[f'mlb_pitch_data_{season}.csv', | |
# f'mlb_pitch_data_{season-1}.csv', | |
# f'mlb_pitch_data_{season-2}.csv', | |
# f'mlb_pitch_data_{season-3}.csv', | |
# f'mlb_pitch_data_{season-4}.csv' ]) | |
### Import Datasets | |
dataset = load_dataset('nesticot/mlb_data', data_files=[f'aaa_pitch_data_{season}.csv' ]) | |
dataset_train = dataset['train'] | |
df_2024 = dataset_train.to_pandas().set_index(list(dataset_train.features.keys())[0]).reset_index(drop=True).drop_duplicates(subset=['play_id'],keep='last') | |
batter_dict_stat = { 'sweet_spot_percent':{'x_axis':'SweetSpot%','title':'SweetSpot%','flip_p':False,'decimal_format':'percent_1','percent_adjust':100}, | |
'max_launch_speed':{'x_axis':'Max Exit Velocity','title':'Max Exit Velocity','flip_p':False,'decimal_format':'string_0','percent_adjust':1}, | |
'launch_speed_90':{'x_axis':'90th Percentile EV','title':'90th Percentile EV','flip_p':False,'decimal_format':'string_0','percent_adjust':1}, | |
'launch_speed':{'x_axis':'Exit Velocity','title':'Exit Velocity','flip_p':False,'decimal_format':'string_0','percent_adjust':1}, | |
'launch_angle':{'x_axis':'Launch Angle','title':'Launch Angle','flip_p':False,'decimal_format':'string_0','percent_adjust':100}, | |
'avg':{'x_axis':'AVG','title':'AVG','flip_p':False,'decimal_format':'string_3','percent_adjust':100}, | |
'obp':{'x_axis':'OBP','title':'OBP','flip_p':False,'decimal_format':'string_3','percent_adjust':100}, | |
'slg':{'x_axis':'SLG','title':'SLG','flip_p':False,'decimal_format':'string_3','percent_adjust':100}, | |
'ops':{'x_axis':'OPS','title':'OPS','flip_p':False,'decimal_format':'string_3','percent_adjust':100}, | |
'k_percent':{'x_axis':'K%','title':'K%','flip_p':True,'decimal_format':'percent_1','percent_adjust':100}, | |
'bb_percent':{'x_axis':'BB%','title':'BB%','flip_p':False,'decimal_format':'percent_1','percent_adjust':100}, | |
'bb_over_k_percent':{'x_axis':'BB/K','title':'BB/K','flip_p':False,'decimal_format':'string_1','percent_adjust':100}, | |
'bb_minus_k_percent':{'x_axis':'BB%-K%','title':'BB%-K%','flip_p':False,'decimal_format':'percent_1','percent_adjust':100}, | |
'csw_percent':{'x_axis':'CSW%','title':'CSW%','flip_p':True,'decimal_format':'percent_1','percent_adjust':100}, | |
'woba_percent':{'x_axis':'wOBA','title':'wOBA','flip_p':False,'decimal_format':'string_3','percent_adjust':100}, | |
'hard_hit_percent':{'x_axis':'HardHit%','title':'HardHit%','flip_p':False,'decimal_format':'percent_1','percent_adjust':100}, | |
'barrel_percent':{'x_axis':'Barrel%','title':'Barrel%','flip_p':False,'decimal_format':'percent_1','percent_adjust':100}, | |
'zone_contact_percent':{'x_axis':'Z-Contact%','title':'Z-Contact%','flip_p':False,'decimal_format':'percent_1','percent_adjust':100}, | |
'zone_swing_percent':{'x_axis':'Z-Swing%','title':'Z-Swing%','flip_p':False,'decimal_format':'percent_1','percent_adjust':100}, | |
'zone_percent':{'x_axis':'Zone%','title':'Zone%','flip_p':False,'decimal_format':'percent_1','percent_adjust':100}, | |
'chase_percent':{'x_axis':'O-Swing%','title':'O-Swing%','flip_p':True,'decimal_format':'percent_1','percent_adjust':100}, | |
'chase_contact':{'x_axis':'O-Contact%','title':'O-Contact%','flip_p':True,'decimal_format':'percent_1','percent_adjust':100}, | |
'swing_percent':{'x_axis':'Swing%','title':'Swing%','flip_p':False,'decimal_format':'percent_1','percent_adjust':100}, | |
'whiff_rate':{'x_axis':'Whiff%','title':'Whiff%','flip_p':True,'decimal_format':'percent_1','percent_adjust':100}, | |
'swstr_rate':{'x_axis':'SwStr%','title':'SwStr%','flip_p':True,'decimal_format':'percent_1','percent_adjust':100}, | |
} | |
batter_dict_stat_small = { 'sweet_spot_percent':'SweetSpot%', | |
'max_launch_speed':'Max Exit Velocity', | |
'launch_speed_90':'90th Percentile EV', | |
'launch_speed':'Exit Velocity', | |
'launch_angle':'Launch Angle', | |
'avg':'AVG', | |
'obp':'OBP', | |
'slg':'SLG', | |
'ops':'OPS', | |
'k_percent':'K%', | |
'bb_percent':'BB%', | |
'bb_over_k_percent':'BB/K', | |
'bb_minus_k_percent':'BB%-K%', | |
'csw_percent':'CSW%', | |
'woba_percent':'wOBA', | |
'hard_hit_percent':'HardHit%', | |
'barrel_percent':'Barrel%', | |
'zone_contact_percent':'Z-Contact%', | |
'zone_swing_percent':'Z-Swing%', | |
'zone_percent':'Zone%', | |
'chase_percent':'O-Swing%', | |
'chase_contact':'O-Contact%', | |
'swing_percent':'Swing%', | |
'whiff_rate':'Whiff%', | |
'swstr_rate':'SwStr%', | |
} | |
batter_dict = df_2024.set_index(['batter_id'])['batter_name'].to_dict() | |
# df_2024['game_date'] = pd.to_datetime(df_2024['game_date']) | |
colour_palette = ['#FFB000','#648FFF','#785EF0', | |
'#DC267F','#FE6100','#3D1EB2','#894D80','#16AA02','#B5592B','#A3C1ED'] | |
level_dict = {'MLB':'MLB','AAA':'AAA','AA':'AA','A+':'A+','A':'A','ROK':'ROK'} | |
print('MLB TOP',df_2024.head(5)) | |
import matplotlib.ticker as mtick | |
def decimal_format_assign(x): | |
if x['decimal_format'] == 'percent_1': | |
return mtick.PercentFormatter(1,decimals=1) | |
if x['decimal_format'] == 'string_3': | |
return mtick.FormatStrFormatter('%.3f') | |
if x['decimal_format'] == 'string_0': | |
return mtick.FormatStrFormatter('%.0f') | |
if x['decimal_format'] == 'string_1': | |
return mtick.FormatStrFormatter('%.1f') | |
from batting_update import df_update, df_update_summ, df_update_summ_avg,df_summ_changes | |
df_2024_update_copy = df_update(df_2024) | |
print('MLB TOP',df_2024_update_copy.head(5)) | |
from adjustText import adjust_text | |
import seaborn as sns | |
def server(input,output,session): | |
# | |
def plot(): | |
print('we made it here2') | |
# start_date_input = '2024-03-20' | |
# end_date_input = '2024-12-31' | |
df_2024_update = df_2024_update_copy[(df_2024_update_copy['game_date']>=str(input.date_range_id()[0]))& | |
(df_2024_update_copy['game_date']<=str(input.date_range_id()[1]))] | |
df_2024_update_summ = df_update_summ(df_2024_update) | |
df_2024_update_summ_changes = df_summ_changes(df_2024_update_summ) | |
print('MLB TOP UPDATE ',df_2024_update_copy.head(5)) | |
sns.set_theme(style="whitegrid", palette="pastel") | |
#print('we made it here') | |
#print(data_df) | |
#data_df = data_df.sort_values(by='level').reset_index(drop=True) | |
# x_flip = batter_dict_stat[x_stat]['flip_p'] | |
# y_flip = batter_dict_stat[y_stat]['flip_p'] | |
# cbr_flip = batter_dict_stat[z_stat]['flip_p'] | |
x_stat = input.stat_x() | |
y_stat = input.stat_y() | |
z_stat = input.stat_z() | |
x_flip = batter_dict_stat[x_stat]['flip_p'] | |
y_flip = batter_dict_stat[y_stat]['flip_p'] | |
cbr_flip = batter_dict_stat[z_stat]['flip_p'] | |
level_id = 'MLB' | |
n_input = int(input.n()) | |
n_age_input = 50 | |
data_df = df_2024_update_summ.copy() | |
data_df = data_df[data_df['pa'] >= n_input].reset_index(drop=True) | |
data_df[x_stat+'_percent'] = data_df[x_stat].rank(pct=True,ascending=abs(x_flip-1)) | |
data_df[y_stat+'_percent'] = data_df[y_stat].rank(pct=True,ascending=abs(y_flip-1)) | |
data_df[z_stat+'_percent'] = data_df[z_stat].rank(pct=True,ascending=abs(cbr_flip-1)) | |
fig, ax = plt.subplots(1, 1, figsize=(9, 9)) | |
if cbr_flip: | |
cmap_hue = matplotlib.colors.LinearSegmentedColormap.from_list("", [colour_palette[0],colour_palette[3],colour_palette[1]]) | |
norm = plt.Normalize(data_df[z_stat].min(), data_df[z_stat].max()) | |
else: | |
cmap_hue = matplotlib.colors.LinearSegmentedColormap.from_list("", [colour_palette[1],colour_palette[3],colour_palette[0]]) | |
norm = plt.Normalize(data_df[z_stat].min(), data_df[z_stat].max()) | |
sm = plt.cm.ScalarMappable(cmap=cmap_hue, norm=norm) | |
print('we made it here') | |
scatter = sns.scatterplot(x = x_stat, y = y_stat, data=data_df, color = '#b3b3b3') | |
#ax.get_legend().remove() | |
scatter = sns.scatterplot(x = x_stat, y = y_stat, data=data_df, color = colour_palette[0],ax=ax,hue=z_stat,palette=cmap_hue) | |
sns.set_theme(style="whitegrid", palette="pastel") | |
fig.set_facecolor('#F0F0F0') | |
ax.set_facecolor('white') | |
print('we made it here') | |
# for i in range(0,len(pitch_group_unique)): | |
# data_df = elly_zone_df[elly_zone_df.pitch_group==pitch_group_unique[i]] | |
# len_df.append(len(data_df)) | |
# sns.lineplot(x=range(1,len(data_df)+1),y=data_df.swings.rolling(window=rolling_window_input).sum()/data_df.pitches.rolling(window=rolling_window_input).sum(),color=colour_palette[i],linewidth=3,ax=ax, | |
# label=f'{pitch_group_unique[i]} (Season Average {float(data_df.swings.sum()/data_df.pitches.sum()):.1%})',zorder=i+10) | |
# ax.hlines(xmin=0,xmax=len(elly_zone_df),y=data_df.swings.sum()/data_df.pitches.sum(),color=colour_palette[i],linewidth=3,linestyle='-.',alpha=0.4,zorder=i) | |
x_min = input.n_percent_bot_x() | |
x_max = input.n_percent_top_x() | |
y_min = input.n_percent_bot_y() | |
y_max = input.n_percent_top_y() | |
# z_min = input.n_percent_bot_z() | |
# z_max = input.n_percent_top_z() | |
names = True | |
ts=[] | |
print(len(data_df)) | |
if names: | |
for i in range(len(data_df)): | |
if (data_df[x_stat].values[i] < x_min or data_df[x_stat].values[i] > x_max ) \ | |
and (data_df[y_stat].values[i] < y_min or data_df[y_stat].values[i] > y_max): | |
#or (str(data_df.batter_id[i]) in (input.player_id())): | |
# print(data_df.batter[i]) | |
# ax.annotate(data_df.batter[i], xy=((data_df[x_stat][i])+0.025/batter_dict_stat[x_stat]['percent_adjust'], data_df[y_stat][i]+0.01/batter_dict_stat[x_stat]['percent_adjust']), xytext=(-20,20), | |
# textcoords='offset points', ha='center', va='bottom',fontsize=7, | |
# bbox=dict(boxstyle='round,pad=0', fc=colour_palette[6], alpha=0.0), | |
# arrowprops=dict(arrowstyle='->', connectionstyle="angle,angleA=-90,angleB=-10,rad=2", | |
# color=colour_palette[8])) | |
#if data_df['batter'][i] != 'Jo Adell': | |
# ax.annotate(data_df.batter[i], (data_df[x_stat][i]-len(data_df.batter[i])*0.00025, data_df[y_stat][i]+0.001),fontsize=8) | |
ts.append(ax.text(data_df[x_stat][i], data_df[y_stat][i], data_df.batter_name[i],fontsize=8)) | |
ax.hlines(xmin=(math.floor((data_df[x_stat].min()*batter_dict_stat[x_stat]['percent_adjust']-0.01)/5))*5/batter_dict_stat[x_stat]['percent_adjust'], | |
xmax= (math.ceil((data_df[x_stat].max()*batter_dict_stat[x_stat]['percent_adjust']+0.01)/5))*5/batter_dict_stat[x_stat]['percent_adjust'], | |
y=data_df[y_stat].mean(),color='gray',linewidth=3,linestyle='dotted',alpha=0.4) | |
print('we made it here') | |
ax.vlines(ymin=(math.floor((data_df[y_stat].min()*batter_dict_stat[y_stat]['percent_adjust']-0.01)/5))*5/batter_dict_stat[y_stat]['percent_adjust'], | |
ymax= (math.ceil((data_df[y_stat].max()*batter_dict_stat[y_stat]['percent_adjust']+0.01)/5))*5/batter_dict_stat[y_stat]['percent_adjust'], | |
x=data_df[x_stat].mean(),color='gray',linewidth=3,linestyle='dotted',alpha=0.4) | |
print(data_df[x_stat].min()) | |
print(batter_dict_stat[x_stat]['percent_adjust']) | |
print((math.floor((data_df[x_stat].min()*batter_dict_stat[x_stat]['percent_adjust']-0.01)/5))*5/batter_dict_stat[x_stat]['percent_adjust']) | |
ax.set_xlim((math.floor((data_df[x_stat].min()*batter_dict_stat[x_stat]['percent_adjust'])/5))*5/batter_dict_stat[x_stat]['percent_adjust'], | |
(math.ceil((data_df[x_stat].max()*batter_dict_stat[x_stat]['percent_adjust'])/5))*5/batter_dict_stat[x_stat]['percent_adjust']) | |
ax.set_ylim((math.floor((data_df[y_stat].min()*batter_dict_stat[y_stat]['percent_adjust'])/5))*5/batter_dict_stat[y_stat]['percent_adjust'], | |
(math.ceil((data_df[y_stat].max()*batter_dict_stat[y_stat]['percent_adjust'])/5))*5/batter_dict_stat[y_stat]['percent_adjust']) | |
#title_level = str([x .strip("\'")for x in level_id]).strip('[').strip(']').replace("'",'') | |
title_level = level_id | |
if title_level == 'AAA, AA, A+, A': | |
title_level='MiLB' | |
# #title_level = level_id[0] | |
# if input.n_age() >= 50: | |
# title_spot = f'{title_level} Batter {batter_dict_stat[y_stat]["title"]} vs {batter_dict_stat[x_stat]["title"]} (min. {n_input} PA)' | |
else: | |
title_spot = f'{title_level} Batter - {season} - {batter_dict_stat[y_stat]["title"]} vs {batter_dict_stat[x_stat]["title"]} (min. {n_input} PA)' | |
ax.set_title(title_spot, fontsize=24/(len(title_spot)*0.03),fontname='Century Gothic') | |
# #vals = ax.get_yticks() | |
ax.set_xlabel(batter_dict_stat[x_stat]['x_axis'], fontsize=16,fontname='Century Gothic') | |
ax.set_ylabel(batter_dict_stat[y_stat]['x_axis'], fontsize=16,fontname='Century Gothic') | |
# if input.group_level(): | |
# ax.get_legend().remove() | |
# if not input.group_level(): | |
# if len(level_id) > 1: | |
# h,l = scatter.get_legend_handles_labels() | |
# l[-(len(level_id)+1)] = 'Level' | |
# ax.legend(h[-(len(level_id)+1):],l[-(len(level_id)+1):], borderaxespad=0.1,loc=0) | |
# else: | |
# ax.get_legend().remove() | |
#plt.show(g) | |
# ax.figure.colorbar(sm, ax=ax) | |
cbar = ax.figure.colorbar(sm, ax=ax,format=decimal_format_assign(x=batter_dict_stat[z_stat]),orientation='vertical',aspect=30) | |
cbar.set_label(batter_dict_stat[z_stat]['x_axis']) | |
#fig.axes[0].invert_yaxis() | |
print('we made it here5') | |
fig.subplots_adjust(wspace=.02, hspace=.02) | |
# ax.xaxis.set_major_formatter(FuncFormatter(lambda x, _: int(x))) | |
#ax.set_yticks([0,0.1,0.2,0.3,0.4,0.5]) | |
# fig.colorbar(plot_dist, ax=ax) | |
# fig.colorbar(plot_dist) | |
if batter_dict_stat[x_stat]['flip_p']: | |
fig.axes[0].invert_xaxis() | |
if batter_dict_stat[y_stat]['flip_p']: | |
fig.axes[0].invert_yaxis() | |
# ax.xaxis.set_major_formatter(mtick.PercentFormatter(1,decimals=0)) | |
# ax.yaxis.set_major_formatter(mtick.PercentFormatter(1)) | |
print('we made it here6') | |
ax.xaxis.set_major_formatter(decimal_format_assign(x=batter_dict_stat[x_stat])) | |
ax.yaxis.set_major_formatter(decimal_format_assign(x=batter_dict_stat[y_stat])) | |
print('we made it here7') | |
# ax.text(0.5, 0.5, '/u/tomstoms', transform=ax.transAxes, | |
# fontsize=60, color='gray', alpha=0.075, | |
# ha='center', va='center', rotation=45) | |
print(ts) | |
if len(ts) > 0: | |
adjust_text(ts, | |
arrowprops=dict(arrowstyle="-", color=colour_palette[4], lw=1),ax=ax) | |
#ax.legend(fontsize='16') | |
ax.get_legend().remove() | |
fig.text(x=0.03,y=0.02,s='By: @TJStats',fontname='Century Gothic') | |
fig.text(x=1-0.03,y=0.02,s='Data: MLB',ha='right',fontname='Century Gothic') | |
fig.tight_layout() | |
import shinyswatch | |
# app_ui = ui.page_fluid(ui.output_plot("plot",height = "1000px",width="1000px")) | |
# app = App(ui.page_fluid(ui.output_plot("plot",height = "1000px",width="1000px")),server) | |
# app = App(app_ui, server) | |
app = App(ui.page_fluid( | |
# ui.tags.base(href=base_url), | |
ui.tags.div( | |
{"style": "width:90%;margin: 0 auto;max-width: 1600px;"}, | |
ui.tags.style( | |
""" | |
h4 { | |
margin-top: 1em;font-size:35px; | |
} | |
h2{ | |
font-size:25px; | |
} | |
""" | |
), | |
shinyswatch.theme.simplex(), | |
ui.tags.h4("TJStats"), | |
ui.tags.i("Baseball Analytics and Visualizations"), | |
# ui.markdown("""<a href='https://www.patreon.com/tj_stats'>Support me on Patreon for Access to 2024 Apps</a><sup>1</sup>"""), | |
ui.row( | |
ui.layout_sidebar( | |
ui.panel_sidebar( | |
#ui.input_select("id", "Select Batter",batter_dict,selected=675911,width=1,size=1), | |
ui.row( | |
#ui.column(4,ui.input_select("level_id", "Select Level",level_dict,width=1,size=1,multiple=True,selected='MLB',selectize=True),), | |
ui.input_date_range("date_range_id", "Date range input",start = df_2024.game_date.min(), | |
end = df_2024.game_date.max(),width=2,min=df_2024.game_date.min(), | |
max=df_2024.game_date.max())), | |
#ui.column(4,ui.input_select("team_id", "Select Team",team_list,width=1,size=1,multiple=True,selected='All',selectize=True),), | |
#ui.column(4,ui.input_select("position_id", "Select Position",position_list,width=1,size=1,selected='All',multiple=True,selectize=True))), | |
ui.row( | |
ui.column(6,ui.input_numeric("n", "Minimum PA", value=100))), | |
#ui.column(6,ui.input_numeric("n_age", "Maximum Age", value=50))), | |
ui.row( | |
ui.column(4,ui.input_select("stat_x", "X-Axis",batter_dict_stat_small,selected='k_percent',width=1,size=1)), | |
ui.column(4,ui.input_select("stat_y", "Y-Axis",batter_dict_stat_small,selected='bb_percent',width=1,size=1)), | |
ui.column(4,ui.input_select("stat_z", "Colour-Bar Axis",batter_dict_stat_small,selected='bb_over_k_percent',width=1,size=1))), | |
ui.row( | |
ui.column(6,ui.input_numeric("n_percent_top_x", "Greater Than X", value=100)), | |
ui.column(6,ui.input_numeric("n_percent_bot_x", "Less Than X", value=0))), | |
ui.row( | |
ui.column(6,ui.input_numeric("n_percent_top_y", "Greater Than Y", value=100)), | |
ui.column(6,ui.input_numeric("n_percent_bot_y", "Less Than Y", value=0))), | |
# ui.row( | |
# ui.column(6,ui.input_numeric("n_percent_top_z", "Greater Than Z", value=100)), | |
# ui.column(6,ui.input_numeric("n_percent_bot_z", "Less Than Z", value=0))), | |
ui.input_select("player_id", "Label Player",batter_dict,width=1,size=1,multiple=True,selectize=True), | |
ui.row( | |
ui.input_switch("names", "Toggle Names"), | |
ui.input_switch("group_level", "Group Levels")), | |
ui.input_action_button("go", "Generate",class_="btn-primary"), | |
), | |
ui.panel_main( | |
ui.navset_tab( | |
# ui.nav("Raw Data", | |
# ui.output_data_frame("raw_table")), | |
# ui.nav("Season Summary", | |
# ui.output_plot('plot', | |
# width='2000px', | |
# height='2000px')), | |
ui.nav("MLB", | |
ui.output_plot("plot",height = "1000px",width="1000px")) | |
# ui.nav("AAA", | |
# ui.output_plot("plot_aaa",height = "1000px",width="1000px")), | |
# ui.nav("AA", | |
# ui.output_plot("plot_aa",height = "1000px",width="1000px")) , | |
# ui.nav("A+", | |
# ui.output_plot("plot_ha",height = "1000px",width="1000px")), | |
# ui.nav("A", | |
# ui.output_plot("plot_a",height = "1000px",width="1000px")) | |
,id="my_tabs")))))),server) |