nesticot's picture
Update app.py
e5d2c76 verified
raw
history blame
42.9 kB
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import seaborn as sns
import pitch_summary_functions as psf
import requests
import matplotlib
from api_scraper import MLB_Scrape
from shinywidgets import output_widget, render_widget
import shinyswatch
colour_palette = ['#FFB000','#648FFF','#785EF0',
'#DC267F','#FE6100','#3D1EB2','#894D80','#16AA02','#B5592B','#A3C1ED']
import datasets
from datasets import load_dataset
### Import Datasets
dataset = load_dataset('nesticot/mlb_data', data_files=['a_pitch_data_2024.csv' ])
dataset_train = dataset['train']
#df_2024 = dataset_train.to_pandas().set_index(list(dataset_train.features.keys())[0]).reset_index(drop=True).drop_duplicates(subset=['play_id'],keep='last')
df_2024.loc[(df_2024['pitcher_id']==804636)&(df_2024['pitch_type'].isin(['FF','FC']),'start_speed'] += 3
# ### Import Datasets
# import datasets
# from datasets import load_dataset
# dataset = load_dataset('nesticot/mlb_data', data_files=['mlb_pitch_data_2020.csv' ])
# dataset_train = dataset['train']
# df_2024 = dataset_train.to_pandas().set_index(list(dataset_train.features.keys())[0]).reset_index(drop=True)
### PITCH COLOURS ###
pitch_colours = {
'Four-Seam Fastball':'#FF007D',#BC136F
'Fastball':'#FF007D',
'Sinker':'#98165D',#DC267F
'Cutter':'#BE5FA0',
'Changeup':'#F79E70',#F75233
'Splitter':'#FE6100',#F75233
'Screwball':'#F08223',
'Forkball':'#FFB000',
'Slider':'#67E18D',#1BB999#785EF0
'Sweeper':'#1BB999',#37CD85#904039
'Slurve':'#376748',#785EF0#549C07#BEABD8
'Knuckle Curve':'#311D8B',
'Curveball':'#3025CE',
'Slow Curve':'#274BFC',
'Eephus':'#648FFF',
'Knuckleball':'#867A08',
'Pitch Out':'#472C30',
'Other':'#9C8975',
}
spring_teams = df_2024.groupby(['pitcher_id']).tail(1)[['pitcher_id','pitcher_team']].set_index(['pitcher_id'])['pitcher_team'].to_dict()
season_start = '2024-03-20'
season_end = '2024-09-29'
season_fg=2024
#chad_fg = requests.get(f'https://www.fangraphs.com/api/leaders/major-league/data?age=&pos=all&stats=pit&lg=all&qual=0&season={season_fg}&season={season_fg}&month=1000&season1={season_fg}&ind=0&pageitems=2000000000&pagenum=1&ind=0&rost=0&players=&type=36&postseason=&sortdir=default&sortstat=sp_pitching').json()
# chadwick_df_small = pd.DataFrame(data={
# 'key_mlbam':[x['xMLBAMID'] for x in chad_fg['data']],
# 'key_fangraphs':[x['playerid'] for x in chad_fg['data']],
# 'Name':[x['PlayerName'] for x in chad_fg['data']],
# })
# mlb_fg_dicts = chadwick_df_small.set_index('key_mlbam')['key_fangraphs'].sort_values().to_dict()
statcast_pitch_summary = pd.read_csv('statcast_pitch_summary.csv')
cmap_sum = matplotlib.colors.LinearSegmentedColormap.from_list("", ['#648FFF','#FFFFFF','#FFB000',])
df_2024_codes = psf.df_update_code(df_2024)
df_2024_update = psf.df_clean(df_2024_codes)
import joblib
model = joblib.load('joblib_model/tjstuff_model_20240318.joblib')
y_pred_mean = 0.0011434511
y_pred_std = 0.006554768
xwoba_model = joblib.load('joblib_model/xwoba_model.joblib')
features = ['start_speed','spin_rate','extension','ivb','hb','x0','z0','fb_max_velo_diff','fb_max_ivb_diff','fb_max_hb_diff']
targets = ['delta_run_exp_mean']
df_2024_update['y_pred'] = model.predict(df_2024_update[features])
df_2024_update['tj_stuff_plus'] = 100 + 10*((-df_2024_update.y_pred +y_pred_mean) / y_pred_std)
df_2024_update['woba_pred'] = np.nan
df_2024_update.loc[df_2024_update[['launch_angle','launch_speed']].isnull().sum(axis=1)==0,'woba_pred'] = [sum(x) for x in xwoba_model.predict_proba(df_2024_update.loc[df_2024_update[['launch_angle','launch_speed']].isnull().sum(axis=1)==0][['launch_angle','launch_speed']]) * ([0, 0.883,1.244,1.569,2.004])]
pitcher_dicts = df_2024_update.set_index('pitcher_id')['pitcher_name'].sort_values().to_dict()
team_logos = pd.read_csv('team_logos.csv')
mlb_stats = MLB_Scrape()
teams_df = mlb_stats.get_teams()
team_logo_dict = teams_df.set_index(['team_id'])['parent_org_id'].to_dict()
font_properties = {'family': 'calibi', 'size': 12}
font_properties_titles = {'family': 'calibi', 'size': 20}
font_properties_axes = {'family': 'calibi', 'size': 16}
df_plot = []
ax2_loc = []
gs = []
fig = []
function_dict={
'velocity_kde':'Velocity Distributions',
'break_plot':'Pitch Movement',
'rolling_tj_stuff':'Rolling tjStuff+',
'location_lhb':'Locations vs LHB',
'location_rhb':'Locations vs RHB',
}
split_dict = {'all':'All',
'left':'LHB',
'right':'RHB'}
split_dict_hand = {'all':['L','R'],
'left':['L'],
'right':['R']}
ball_dict = {'0':'0',
'1':'1',
'2':'2',
'3':'3'}
strike_dict = {'0':'0',
'1':'1',
'2':'2'}
# count_dict = {'0_0':'Through 0-0',
# '0_1':'Through 0-1',
# '0_2':'Through 0-2',
# '1_0':'Through 1-0',
# '1_1':'Through 1-1',
# '1_2':'Through 1-2',
# '2_1':'Through 2-1',
# '2_0':'Through 2-0',
# '3_0':'Through 3-0',
# '3_1':'Through 3-1',
# '2_2':'Through 2-2',
# '3_2':'Through 3-2'}
# count_dict_fg = {'0_0':'',
# '0_1':'61',
# '0_2':'62',
# '1_0':'63',
# '1_1':'64',
# '1_2':'65',
# '2_1':'66',
# '2_0':'67',
# '3_0':'68',
# '3_1':'69',
# '2_2':'70',
# '3_2':'71'}
from urllib.request import Request, urlopen
from shiny import App, reactive, ui, render
from shiny.ui import h2, tags
# importing OpenCV(cv2) module
app_ui = ui.page_fluid(
ui.tags.div(
{"style": "width:90%;margin: 0 auto;max-width: 1600px;"},
ui.tags.style(
"""
h4 {
margin-top: 1em;font-size:35px;
}
h2{
font-size:25px;
}
"""
),
shinyswatch.theme.simplex(),
ui.tags.h4("TJStats"),
ui.tags.i("Baseball Analytics and Visualizations"),
ui.row(
ui.layout_sidebar(
ui.panel_sidebar(
ui.row(
ui.column(6,
ui.input_select('player_id','Select Player',pitcher_dicts,selectize=True,multiple=False)),
ui.column(6, ui.output_ui('test','Select Game'))),
ui.row(
ui.column(4,
ui.input_select('plot_id_1','Plot Left',function_dict,multiple=False,selected='velocity_kde')),
ui.column(4,
ui.input_select('plot_id_2','Plot Middle',function_dict,multiple=False,selected='rolling_tj_stuff')),
ui.column(4,
ui.input_select('plot_id_3','Plot Right',function_dict,multiple=False,selected='break_plot'))),
# ui.input_select('count_id','Count',count_dict,multiple=True,selectize=True,selected='0_0'),
ui.row(
ui.column(6,
ui.input_select('ball_id','Balls',ball_dict,multiple=False,selected='0'),
ui.input_radio_buttons(
"count_id_balls",
"Count Filter Balls",
{
"exact": "Exact Balls",
"greater": ">= Balls",
"lesser": "<= Balls",
},selected='greater')),
ui.column(6,
ui.input_select('strike_id','Strikes',strike_dict,multiple=False,selected='0'),
ui.input_radio_buttons(
"count_id_strikes",
"Count Filter Strikes",
{
"exact": "Exact Strikes",
"greater": ">= Strikes",
"lesser": "<= Strikes",
},selected='greater'))),
ui.row(
ui.column(6,
ui.input_select('split_id','Select Split',split_dict,multiple=False)),
ui.column(6,
ui.input_numeric('rolling_window','Rolling Window (for tjStuff+ Plot)',min=1,value=10))),
ui.input_action_button("go", "Generate",class_="btn-primary"),
width=4)
,
ui.panel_main(
ui.navset_tab(
# ui.nav("Raw Data",
# ui.output_data_frame("raw_table")),
ui.nav("Season Summary",
ui.output_plot('plot',
width='2000px',
height='2000px')),
ui.nav("Game Summary",
ui.output_plot('plot_game',
width='2000px',
height='2000px'))
,id="my_tabs"))))))
#print(app_ui)
def server(input, output, session):
@render.ui
def test():
# @reactive.Effect
if input.my_tabs() == 'Season Summary':
return ui.input_date_range("date_range_id", "Date range input",start = df_2024.game_date.min(),
end = df_2024.game_date.max(),width=2,min=df_2024.game_date.min(),
max=df_2024.game_date.max()),
# @reactive.Effect
if input.my_tabs() == 'Game Summary':
pitcher_id_select = int(input.player_id())
df_plot = df_2024_update[(df_2024_update['pitcher_id']==pitcher_id_select)]
# ax0.text(x=0.5,y=0.30,s=f'2024 Spring Training',fontname='Calibri',ha='center',fontsize=30,va='top')
df_plot['game_opp'] = df_plot['game_date'].astype(str) + ' vs ' + df_plot['batter_team'].astype(str)
#print(df_plot['game_opp'])
date_dict = pd.concat([df_plot.drop_duplicates(subset=['pitcher_id','game_id','game_opp'])[['game_id','game_opp']]]).set_index('game_id').to_dict()
return ui.input_select("game_id", "Select Game",date_dict,selectize=True)
@output
@render.plot
@reactive.event(input.go, ignore_none=False)
def plot():
#fig, ax = plt.subplots(3, 2, figsize=(9, 9))
font_properties = {'family': 'calibi', 'size': 12}
font_properties_titles = {'family': 'calibi', 'size': 20}
font_properties_axes = {'family': 'calibi', 'size': 16}
if len((input.player_id()))<1:
fig, ax = plt.subplots(1, 1, figsize=(9, 9))
ax.text(x=0.5,y=0.5,s='Please Select\nA Player',fontsize=150,ha='center')
ax.grid('off')
return
pitcher_id_select = int(input.player_id())
df_plot = df_2024_update[(df_2024_update['pitcher_id']==pitcher_id_select)]
df_plot = df_plot[(pd.to_datetime(df_plot['game_date']).dt.date>=input.date_range_id()[0])&
(pd.to_datetime(df_plot['game_date']).dt.date<=input.date_range_id()[1])]
df_plot = df_plot[df_plot['batter_hand'].isin(split_dict_hand[input.split_id()])]
if input.count_id_balls()=='greater' and input.count_id_strikes()=='greater' and int(input.ball_id())==0 and int(input.strike_id())==0:
ball_title = ''
strike_title = ''
else:
if input.count_id_balls()=='exact':
df_plot = df_plot[df_plot['balls']==int(input.ball_id())]
ball_title = str(f'{(input.ball_id())} Ball Count; ')
elif input.count_id_balls()=='greater':
df_plot = df_plot[df_plot['balls']>=int(input.ball_id())]
ball_title = str(f'At Least {(input.ball_id())} Ball Count; ')
elif input.count_id_balls()=='lesser':
df_plot = df_plot[df_plot['balls']<=int(input.ball_id())]
ball_title = str(f'At Most {(input.ball_id())} Ball Count; ')
if input.count_id_strikes()=='exact':
df_plot = df_plot[df_plot['strikes']==int(input.strike_id())]
strike_title = str(f'{(input.strike_id())} Strike Count; ')
elif input.count_id_strikes()=='greater':
df_plot = df_plot[df_plot['strikes']>=int(input.strike_id())]
strike_title = str(f'At Least {(input.strike_id())} Strike Count; ')
elif input.count_id_strikes()=='lesser':
df_plot = df_plot[df_plot['strikes']<=int(input.strike_id())]
strike_title = str(f'At Most {(input.strike_id())} Strike Count; ')
if input.split_id() == 'all':
split_title = ''
elif input.split_id() == 'left':
split_title = 'vs. LHH'
elif input.split_id() == 'right':
split_title = 'vs. RHH'
if len(df_plot)<1:
fig, ax = plt.subplots(1, 1, figsize=(9, 9))
ax.text(x=0.5,y=0.5,s='Please Select\nOther Parameters',fontsize=150,ha='center')
ax.grid('off')
return
df_plot['pitch_type_count'] = df_plot.groupby(['pitcher_id'])['pitch_type'].cumcount()+1
df_plot['pitch_type_count_each'] = df_plot.groupby(['pitch_type'])['pitch_type'].cumcount()+1
#df_plot = df_plot.merge(df_2024_update[['tj_stuff_plus','play_id']],left_on=['play_id'],right_on=['play_id'],how='left')
df_plot = df_plot.sort_values(by=['pitch_description'])
df_plot = df_plot.sort_values(by=['start_time'])
grouped_ivb = psf.group_ivb_update(df=df_plot,agg_list=['pitcher_id','pitcher_name','pitcher_hand','pitch_type','pitch_description'])
grouped_ivb_all = psf.group_ivb_update(df=df_plot,agg_list=['pitcher_id','pitcher_name','pitcher_hand'])
from matplotlib.gridspec import GridSpec
plt.rcParams['font.family'] = 'Calibri'
df_plot['prop'] = df_plot.groupby("pitch_type")["is_pitch"].transform("sum")
label_labels = df_plot.sort_values(by=['prop','pitch_type'],ascending=[False,True]).pitch_description.unique()
#plt.rcParams["figure.figsize"] = [10,10]
fig = plt.figure(figsize=(20, 20))
plt.rcParams.update({'figure.autolayout': True})
fig.set_facecolor('white')
sns.set_theme(style="whitegrid", palette=colour_palette)
print('this is the one plot')
# gs = GridSpec(7, 2, width_ratios=[1,1], height_ratios=[1.5,1,1,1,1,1,2.5])
gs = GridSpec(5, 5, height_ratios=[150,75,225,325,50],width_ratios=[1,100,100,100,1])
#### NO FG
####gs = GridSpec(5, 5, height_ratios=[225,0,225,325,50],width_ratios=[1,100,100,100,1])
#gs = GridSpec(4, 1, width_ratios=[1], height_ratios=[1,0.75,7-len(label_labels)/4,1+len(label_labels)/4])
gs.update(hspace=0.2, wspace=0.3)
# Add subplots to the grid
ax0 = fig.add_subplot(gs[0, :])
ax1_table = fig.add_subplot(gs[1, :])
ax2_left = fig.add_subplot(gs[2, 1])
ax2_middle = fig.add_subplot(gs[2, 2])
ax2_right = fig.add_subplot(gs[2, 3])
ax3 = fig.add_subplot(gs[-2, :])
#axfooter = fig.add_subplot(gs[-1, :])
ax1_table.axis('off')
sns.set_theme(style="whitegrid", palette=colour_palette)
fig.set_facecolor('white')
font_properties = {'family': 'calibi', 'size': 12}
font_properties_titles = {'family': 'calibi', 'size': 20}
font_properties_axes = {'family': 'calibi', 'size': 16}
# ## FANGRAPHS TABLE ###
# data_pull = psf.fangraphs_scrape(pitcher_id=pitcher_id_select,
# split=input.split_id(),
# start_date=input.date_range_id()[0],
# end_date=input.date_range_id()[1])
# psf.fangraphs_table(data=data_pull,
# stats=['IP','WHIP','ERA','FIP','TBF','K%','BB%','K-BB%'],
# ax=ax1_table)
start_date = str(pd.to_datetime(input.date_range_id()[0]).strftime('%m/%d/%Y'))
end_date = str(pd.to_datetime(input.date_range_id()[1]).strftime('%m/%d/%Y'))
pitcher_stats_call = requests.get(f'https://statsapi.mlb.com/api/v1/people/{pitcher_id_select}?appContext=minorLeague&hydrate=stats(group=[pitching],type=[byDateRange],sportId=14,startDate={start_date},endDate={end_date})').json()
pitcher_stats_call_header = [x for x in pitcher_stats_call['people'][0]['stats'][0]['splits'][0]['stat']]
pitcher_stats_call_values = [pitcher_stats_call['people'][0]['stats'][0]['splits'][0]['stat'][x] for x in pitcher_stats_call['people'][0]['stats'][0]['splits'][0]['stat']]
pitcher_stats_call_df = pd.DataFrame(data=dict(zip(pitcher_stats_call_header,pitcher_stats_call_values)),index=[0])
pitcher_stats_call_df['k_percent'] = pitcher_stats_call_df['strikeOuts']/pitcher_stats_call_df['battersFaced']
pitcher_stats_call_df['bb_percent'] = pitcher_stats_call_df['baseOnBalls']/pitcher_stats_call_df['battersFaced']
pitcher_stats_call_df['k_bb_percent'] = pitcher_stats_call_df['k_percent']-pitcher_stats_call_df['bb_percent']
pitcher_stats_call_df_small = pitcher_stats_call_df[['inningsPitched','battersFaced','era','whip','k_percent','bb_percent','k_bb_percent']]
pitcher_stats_call_df_small['k_percent'] = pitcher_stats_call_df_small['k_percent'].astype(float).apply(lambda x: '{:.1%}'.format(x))
pitcher_stats_call_df_small['bb_percent'] = pitcher_stats_call_df_small['bb_percent'].astype(float).apply(lambda x: '{:.1%}'.format(x))
pitcher_stats_call_df_small['k_bb_percent'] = pitcher_stats_call_df_small['k_bb_percent'].astype(float).apply(lambda x: '{:.1%}'.format(x))
table_fg = ax1_table.table(cellText=pitcher_stats_call_df_small.values, colLabels=pitcher_stats_call_df_small.columns, cellLoc='center',
bbox=[0.04, 0.2, 0.92, 0.8])
min_font_size = 20
table_fg.set_fontsize(min_font_size)
new_column_names = ['$\\bf{IP}$','$\\bf{PA}$','$\\bf{ERA}$','$\\bf{WHIP}$','$\\bf{K\%}$','$\\bf{BB\%}$','$\\bf{K-BB\%}$']
# #new_column_names = ['Pitch Name', 'Pitch%', 'Velocity', 'Spin Rate','Exit Velocity', 'Whiff%', 'CSW%']
for i, col_name in enumerate(new_column_names):
table_fg.get_celld()[(0, i)].get_text().set_text(col_name)
ax1_table.axis('off')
for x,y,z in zip([input.plot_id_1(),input.plot_id_2(),input.plot_id_3()],[ax2_left,ax2_middle,ax2_right],[1,2,3]):
if x == 'velocity_kde':
psf.velocity_kdes(df=df_plot,ax=y,gs=gs,gs_list=z,fig=fig)
if x == 'rolling_tj_stuff':
psf.tj_stuff_roling(df = df_plot,window = int(input.rolling_window()),ax=y)
if x == 'break_plot':
psf.break_plot(df=df_plot,ax=y)
if x == 'location_lhb':
psf.location_plot(df=df_plot,ax=y,hand='L')
if x == 'location_rhb':
psf.location_plot(df=df_plot,ax=y,hand='R')
pitches_list = df_plot['pitch_description'].unique()
colour_pitches = [pitch_colours[x] for x in pitches_list]
# handles, labels = ax2_right.get_legend_handles_labels()
# # Manually create handles and labels for each pitch-color pair
handles = [plt.scatter([], [], color=color, marker='o', s=100) for color in colour_pitches]
labels = pitches_list
### FANGRAPHS TABLE ###
psf.table_summary(df=df_plot.copy(),
pitcher_id=pitcher_id_select,
ax=ax3,
df_group=grouped_ivb.copy(),
df_group_all=grouped_ivb_all.copy(),
statcast_pitch_summary=statcast_pitch_summary.copy())
# ############ FOOTER ################
# #fig.text(x=0.5,y=0.05,s='Note: Colour Coding Compares to League Average By Pitch',ha='center',fontname='Calibri',fontsize=10)
# axfooter.text(x=0.05,y=1,s='By: Thomas Nestico\n @TJStats',fontname='Calibri',ha='left',fontsize=24,va='top')
# axfooter.text(x=1-0.05,y=1,s='Data: MLB, Fangraphs',ha='right',fontname='Calibri',fontsize=24,va='top')
# axfooter.text(x=0.5,y=0.8,s='Colour Coding Compares to League Average By Pitch\ntjStuff+ calculates the Expected Run Value (xRV) of a pitch regardless of type\ntjStuff+ is normally distributed, where 100 is the mean and Standard Deviation is 10',
# ha='center',va='center',fontname='Calibri',fontsize=16)
# axfooter.axis('off')
# #fig.tight_layout()
# Get value counts of the column and sort in descending order
sorted_value_counts = df_plot['pitch_description'].value_counts().sort_values(ascending=False)
# Get the list of items ordered from most to least frequent
items_in_order = sorted_value_counts.index.tolist()
# Create a dictionary to map names to colors
name_to_color = dict(zip(labels, handles))
# Order the colors based on the correct order of names
ordered_colors = [name_to_color[name] for name in items_in_order]
ax3.legend(ordered_colors, items_in_order, bbox_to_anchor=(0.1, 0.81, 0.8, 0.2), ncol=5,
fancybox=True,loc='lower center',fontsize=20,framealpha=1.0, markerscale=2,prop={'family': 'calibi', 'size': 20})
################## Title ##########
title_spot = f'{df_plot.pitcher_name.values[0]}'
ax0.text(x=0.5,y=0.8,s=title_spot,fontname='Calibri',ha='center',fontsize=56,va='top')
ax0.text(x=0.5,y=0.35,s='A Season Pitching Summary',fontname='Calibri',ha='center',fontsize=40,va='top',fontstyle='italic')
player_bio = requests.get(url=f"https://statsapi.mlb.com/api/v1/people?personIds={pitcher_id_select}&hydrate=currentTeam").json()
#ax0.text(x=0.5,y=0.05,s=f'{ball_title}{strike_title}{split_title}',fontname='Calibri',ha='center',fontsize=20,va='top')
ax0.axis('off')
ax0.text(x=0.5,y=0.5,s=f"{ player_bio['people'][0]['pitchHand']['code']}HP, Age: {player_bio['people'][0]['currentAge']}, {player_bio['people'][0]['height']}/{player_bio['people'][0]['weight']}",fontname='Calibri',ha='center',fontsize=24,va='top')
#ax0.text(x=0.5,y=0.25,s=f'2024 Spring Training',fontname='Calibri',ha='center',fontsize=30,va='top')
# ax0.text(x=0.5,y=0.25,s=f'{season_fg} MLB Season',fontname='Calibri',ha='center',fontsize=30,va='top')
# ax0.axis('off')
ax0.text(x=0.5,y=0.15,s=f'{input.date_range_id()[0]} to {input.date_range_id()[1]}',fontname='Calibri',ha='center',fontsize=30,va='top',fontstyle='italic')
ax0.text(x=0.5,y=0.0,s=f'{ball_title}{strike_title}{split_title}',fontname='Calibri',ha='center',fontsize=20,va='top')
ax0.axis('off')
from matplotlib.offsetbox import (OffsetImage, AnnotationBbox)
import urllib
import urllib.request
import urllib.error
from urllib.error import HTTPError
try:
url = f'https://img.mlbstatic.com/mlb-photos/image/upload/c_fill,g_auto/w_180/v1/people/{pitcher_id_select}/headshot/milb/current.png'
test_mage = plt.imread(url)
except urllib.error.HTTPError as err:
url = f'https://img.mlbstatic.com/mlb-photos/image/upload/d_people:generic:headshot:67:current.png/w_213,q_auto:best/v1/people/1/headshot/67/current.png'
test_mage = plt.imread(url)
imagebox = OffsetImage(test_mage, zoom = 0.5)
ab = AnnotationBbox(imagebox, (0.125, 0.4), frameon = False)
ax0.add_artist(ab)
#player_bio = requests.get(url=f"https://statsapi.mlb.com/api/v1/people?personIds={pitcher_id_select}&hydrate=currentTeam").json()
if 'currentTeam' in player_bio['people'][0]:
try:
url = team_logos[team_logos['id'] == team_logo_dict[player_bio['people'][0]['currentTeam']['id']]]['imageLink'].values[0]
im = plt.imread(url)
# response = requests.get(url)
# im = Image.open(BytesIO(response.content))
# im = plt.imread(team_logos[team_logos['id'] == player_bio['people'][0]['currentTeam']['parentOrgId']]['imageLink'].values[0])
# ax = fig.add_axes([0,0,1,0.85], anchor='C', zorder=1)
imagebox = OffsetImage(im, zoom = 0.4)
ab = AnnotationBbox(imagebox, (0.875, 0.40), frameon = False)
ax0.add_artist(ab)
except IndexError:
print()
############ FOOTER ################
#fig.text(x=0.5,y=0.05,s='Note: Colour Coding Compares to League Average By Pitch',ha='center',fontname='Calibri',fontsize=10)
axfooter = fig.add_subplot(gs[-1, :])
axfooter.text(x=0.05,y=1,s='By: Thomas Nestico\n @TJStats',fontname='Calibri',ha='left',fontsize=24,va='top')
axfooter.text(x=1-0.05,y=1,s='Data: MLB',ha='right',fontname='Calibri',fontsize=24,va='top')
axfooter.text(x=0.5,y=0.8,s='Colour Coding Compares to League Average By Pitch\ntjStuff+ calculates the Expected Run Value (xRV) of a pitch regardless of type\ntjStuff+ is normally distributed, where 100 is the mean and Standard Deviation is 10',
ha='center',va='center',fontname='Calibri',fontsize=16)
axfooter.axis('off')
#fig.tight_layout()
fig.subplots_adjust(left=0.03, right=0.97, top=0.97, bottom=0.03)
@output
@render.plot
@reactive.event(input.go, ignore_none=False)
def plot_game():
#fig, ax = plt.subplots(3, 2, figsize=(9, 9))
font_properties = {'family': 'calibi', 'size': 12}
font_properties_titles = {'family': 'calibi', 'size': 20}
font_properties_axes = {'family': 'calibi', 'size': 16}
if len((input.player_id()))<1:
fig, ax = plt.subplots(1, 1, figsize=(9, 9))
ax.text(x=0.5,y=0.5,s='Please Select\nA Player',fontsize=150,ha='center')
ax.grid('off')
return
pitcher_id_select = int(input.player_id())
df_plot = df_2024_update[(df_2024_update['pitcher_id']==pitcher_id_select)&(df_2024_update['game_id']==int(input.game_id()))]
df_plot = df_plot[df_plot['batter_hand'].isin(split_dict_hand[input.split_id()])]
if input.count_id_balls()=='greater' and input.count_id_strikes()=='greater' and int(input.ball_id())==0 and int(input.strike_id())==0:
ball_title = ''
strike_title = ''
else:
if input.count_id_balls()=='exact':
df_plot = df_plot[df_plot['balls']==int(input.ball_id())]
ball_title = str(f'{(input.ball_id())} Ball Count; ')
elif input.count_id_balls()=='greater':
df_plot = df_plot[df_plot['balls']>=int(input.ball_id())]
ball_title = str(f'At Least {(input.ball_id())} Ball Count; ')
elif input.count_id_balls()=='lesser':
df_plot = df_plot[df_plot['balls']<=int(input.ball_id())]
ball_title = str(f'At Most {(input.ball_id())} Ball Count; ')
if input.count_id_strikes()=='exact':
df_plot = df_plot[df_plot['strikes']==int(input.strike_id())]
strike_title = str(f'{(input.strike_id())} Strike Count; ')
elif input.count_id_strikes()=='greater':
df_plot = df_plot[df_plot['strikes']>=int(input.strike_id())]
strike_title = str(f'At Least {(input.strike_id())} Strike Count; ')
elif input.count_id_strikes()=='lesser':
df_plot = df_plot[df_plot['strikes']<=int(input.strike_id())]
strike_title = str(f'At Most {(input.strike_id())} Strike Count; ')
if input.split_id() == 'all':
split_title = ''
elif input.split_id() == 'left':
split_title = 'vs. LHH'
elif input.split_id() == 'right':
split_title = 'vs. RHH'
if len(df_plot)<1:
fig, ax = plt.subplots(1, 1, figsize=(9, 9))
ax.text(x=0.5,y=0.5,s='Please Select\nOther Parameters',fontsize=150,ha='center')
ax.grid('off')
return
df_plot['pitch_type_count'] = df_plot.groupby(['pitcher_id'])['pitch_type'].cumcount()+1
df_plot['pitch_type_count_each'] = df_plot.groupby(['pitch_type'])['pitch_type'].cumcount()+1
#df_plot = df_plot.merge(df_2024_update[['tj_stuff_plus','play_id']],left_on=['play_id'],right_on=['play_id'],how='left')
df_plot = df_plot.sort_values(by=['pitch_description'])
df_plot = df_plot.sort_values(by=['start_time'])
# ax0.text(x=0.5,y=0.30,s=f'2024 Spring Training',fontname='Calibri',ha='center',fontsize=30,va='top')
df_plot['game_opp'] = df_plot['game_date'].astype(str) + ' vs ' + df_plot['batter_team'].astype(str)
#print(df_plot['game_opp'])
#date_dict = pd.concat([df_plot.drop_duplicates(subset=['pitcher_id','game_id','game_opp'])[['game_id','game_opp']]]).set_index('game_id').to_dict()
grouped_ivb = psf.group_ivb_update(df=df_plot,agg_list=['pitcher_id','pitcher_name','pitcher_hand','pitch_type','pitch_description'])
grouped_ivb_all = psf.group_ivb_update(df=df_plot,agg_list=['pitcher_id','pitcher_name','pitcher_hand'])
from matplotlib.gridspec import GridSpec
plt.rcParams['font.family'] = 'Calibri'
df_plot['prop'] = df_plot.groupby("pitch_type")["is_pitch"].transform("sum")
label_labels = df_plot.sort_values(by=['prop','pitch_type'],ascending=[False,True]).pitch_description.unique()
#plt.rcParams["figure.figsize"] = [10,10]
fig = plt.figure(figsize=(20, 20))
plt.rcParams.update({'figure.autolayout': True})
fig.set_facecolor('white')
sns.set_theme(style="whitegrid", palette=colour_palette)
print('this is the one plot')
# gs = GridSpec(7, 2, width_ratios=[1,1], height_ratios=[1.5,1,1,1,1,1,2.5])
gs = GridSpec(5, 5, height_ratios=[150,75,225,325,50],width_ratios=[1,100,100,100,1])
#### NO FG
####gs = GridSpec(5, 5, height_ratios=[225,0,225,325,50],width_ratios=[1,100,100,100,1])
#gs = GridSpec(4, 1, width_ratios=[1], height_ratios=[1,0.75,7-len(label_labels)/4,1+len(label_labels)/4])
gs.update(hspace=0.2, wspace=0.3)
# Add subplots to the grid
ax0 = fig.add_subplot(gs[0, :])
ax1_table = fig.add_subplot(gs[1, :])
ax2_left = fig.add_subplot(gs[2, 1])
ax2_middle = fig.add_subplot(gs[2, 2])
ax2_right = fig.add_subplot(gs[2, 3])
ax3 = fig.add_subplot(gs[-2, :])
# axfooter = fig.add_subplot(gs[-1, :])
ax1_table.axis('off')
sns.set_theme(style="whitegrid", palette=colour_palette)
fig.set_facecolor('white')
font_properties = {'family': 'calibi', 'size': 12}
font_properties_titles = {'family': 'calibi', 'size': 20}
font_properties_axes = {'family': 'calibi', 'size': 16}
print(df_2024_update['game_date'].values[0])
# ## FANGRAPHS TABLE ###
# data_pull = psf.fangraphs_scrape(pitcher_id=pitcher_id_select,
# split=input.split_id(),
# start_date=df_plot['game_date'].values[0],
# end_date=df_plot['game_date'].values[0])
start_date = str(pd.to_datetime(df_plot['game_date'].values[0]).strftime('%m/%d/%Y'))
end_date = str(pd.to_datetime(df_plot['game_date'].values[0]).strftime('%m/%d/%Y'))
pitcher_stats_call = requests.get(f'https://statsapi.mlb.com/api/v1/people/{pitcher_id_select}?appContext=minorLeague&hydrate=stats(group=[pitching],type=[byDateRange],sportId=14,startDate={start_date},endDate={end_date})').json()
pitcher_stats_call_header = [x for x in pitcher_stats_call['people'][0]['stats'][0]['splits'][0]['stat']]
pitcher_stats_call_values = [pitcher_stats_call['people'][0]['stats'][0]['splits'][0]['stat'][x] for x in pitcher_stats_call['people'][0]['stats'][0]['splits'][0]['stat']]
pitcher_stats_call_df = pd.DataFrame(data=dict(zip(pitcher_stats_call_header,pitcher_stats_call_values)),index=[0])
# pitcher_stats_call_df['k_percent'] = pitcher_stats_call_df['strikeOuts']/pitcher_stats_call_df['battersFaced']
# pitcher_stats_call_df['bb_percent'] = pitcher_stats_call_df['baseOnBalls']/pitcher_stats_call_df['battersFaced']
# pitcher_stats_call_df['k_bb_percent'] = pitcher_stats_call_df['k_percent']-pitcher_stats_call_df['bb_percent']
pitcher_stats_call_df_small = pitcher_stats_call_df[['inningsPitched','battersFaced','earnedRuns','hits','strikeOuts','baseOnBalls','hitByPitch','homeRuns']]
pitcher_stats_call_df_small['whiffs'] = int(df_plot['is_whiff'].sum())
# pitcher_stats_call_df_small['k_percent'] = pitcher_stats_call_df_small['k_percent'].astype(float).apply(lambda x: '{:.1%}'.format(x))
# pitcher_stats_call_df_small['bb_percent'] = pitcher_stats_call_df_small['bb_percent'].astype(float).apply(lambda x: '{:.1%}'.format(x))
# pitcher_stats_call_df_small['k_bb_percent'] = pitcher_stats_call_df_small['k_bb_percent'].astype(float).apply(lambda x: '{:.1%}'.format(x))
table_fg = ax1_table.table(cellText=pitcher_stats_call_df_small.values, colLabels=pitcher_stats_call_df_small.columns, cellLoc='center',
bbox=[0.04, 0.2, 0.92, 0.8])
min_font_size = 20
table_fg.set_fontsize(min_font_size)
new_column_names = ['$\\bf{IP}$','$\\bf{PA}$','$\\bf{ER}$','$\\bf{H}$','$\\bf{K}$','$\\bf{BB}$','$\\bf{HBP}$','$\\bf{HR}$','$\\bf{Whiffs}$']
# #new_column_names = ['Pitch Name', 'Pitch%', 'Velocity', 'Spin Rate','Exit Velocity', 'Whiff%', 'CSW%']
for i, col_name in enumerate(new_column_names):
table_fg.get_celld()[(0, i)].get_text().set_text(col_name)
ax1_table.axis('off')
# psf.fangraphs_table(data=data_pull,
# stats=['IP','WHIP','ERA','FIP','TBF','K%','BB%','K-BB%'],
# ax=ax1_table)
# psf.velocity_kdes(df=df_plot,
# ax=ax2_loc,
# gs=gs,
# fig=fig)
# # psf.tj_stuff_roling(df = df_plot,
# # window = 5,
# # ax=ax2_velo)
# psf.location_plot(df=df_plot,ax=ax2_velo,hand='L')
# psf.location_plot(df=df_plot,ax=ax2_loc,hand='R')
# # # ## Break Plot
# psf.break_plot(df=df_plot,ax=ax2)
for x,y,z in zip([input.plot_id_1(),input.plot_id_2(),input.plot_id_3()],[ax2_left,ax2_middle,ax2_right],[1,2,3]):
if x == 'velocity_kde':
psf.velocity_kdes(df=df_plot,ax=y,gs=gs,gs_list=z,fig=fig)
if x == 'rolling_tj_stuff':
psf.tj_stuff_roling(df = df_plot,window = int(input.rolling_window()),ax=y)
if x == 'break_plot':
psf.break_plot(df=df_plot,ax=y)
if x == 'location_lhb':
psf.location_plot(df=df_plot,ax=y,hand='L')
if x == 'location_rhb':
psf.location_plot(df=df_plot,ax=y,hand='R')
pitches_list = df_plot['pitch_description'].unique()
colour_pitches = [pitch_colours[x] for x in pitches_list]
# handles, labels = ax2_right.get_legend_handles_labels()
# # Manually create handles and labels for each pitch-color pair
handles = [plt.scatter([], [], color=color, marker='o', s=100) for color in colour_pitches]
labels = pitches_list
### FANGRAPHS TABLE ###
psf.table_summary(df=df_plot.copy(),
pitcher_id=pitcher_id_select,
ax=ax3,
df_group=grouped_ivb.copy(),
df_group_all=grouped_ivb_all.copy(),
statcast_pitch_summary=statcast_pitch_summary.copy())
# Get value counts of the column and sort in descending order
sorted_value_counts = df_plot['pitch_description'].value_counts().sort_values(ascending=False)
# Get the list of items ordered from most to least frequent
items_in_order = sorted_value_counts.index.tolist()
# Create a dictionary to map names to colors
name_to_color = dict(zip(labels, handles))
# Order the colors based on the correct order of names
ordered_colors = [name_to_color[name] for name in items_in_order]
ax3.legend(ordered_colors, items_in_order, bbox_to_anchor=(0.1, 0.81, 0.8, 0.2), ncol=5,
fancybox=True,loc='lower center',fontsize=20,framealpha=1.0, markerscale=2,prop={'family': 'calibi', 'size': 20})
################## Title ##########
title_spot = f'{df_plot.pitcher_name.values[0]}'
ax0.text(x=0.5,y=0.8,s=title_spot,fontname='Calibri',ha='center',fontsize=56,va='top')
ax0.text(x=0.5,y=0.35,s='A Game Pitching Summary',fontname='Calibri',ha='center',fontsize=40,va='top',fontstyle='italic')
#ax0.text(x=0.5,y=0.25,s=f'2024 Spring Training',fontname='Calibri',ha='center',fontsize=30,va='top')
#ax0.text(x=0.5,y=0.25,s=f'{season_fg} MLB Season',fontname='Calibri',ha='center',fontsize=30,va='top')
#ax0.text(x=0.5,y=0.25,s=f'2024 Spring Training',fontname='Calibri',ha='center',fontsize=30,va='top')
# ax0.text(x=0.5,y=0.25,s=f'{season_fg} MLB Season',fontname='Calibri',ha='center',fontsize=30,va='top')
ax0.text(x=0.5,y=0.15,s= df_plot['game_opp'].values[0],fontname='Calibri',ha='center',fontstyle='italic',fontsize=30,va='top')
ax0.text(x=0.5,y=0.00,s=f'{ball_title}{strike_title}{split_title}',fontname='Calibri',ha='center',fontsize=20,va='top')
ax0.axis('off')
from matplotlib.offsetbox import (OffsetImage, AnnotationBbox)
import urllib
import urllib.request
import urllib.error
from urllib.error import HTTPError
try:
url = f'https://img.mlbstatic.com/mlb-photos/image/upload/c_fill,g_auto/w_180/v1/people/{pitcher_id_select}/headshot/milb/current.png'
test_mage = plt.imread(url)
except urllib.error.HTTPError as err:
url = f'https://img.mlbstatic.com/mlb-photos/image/upload/d_people:generic:headshot:67:current.png/w_213,q_auto:best/v1/people/1/headshot/67/current.png'
test_mage = plt.imread(url)
imagebox = OffsetImage(test_mage, zoom = 0.5)
ab = AnnotationBbox(imagebox, (0.125, 0.4), frameon = False)
ax0.add_artist(ab)
#player_bio = requests.get(url=f"https://statsapi.mlb.com/api/v1/people?personIds={pitcher_id_select}&hydrate=currentTeam").json()
player_bio = requests.get(url=f"https://statsapi.mlb.com/api/v1/people?personIds={pitcher_id_select}&hydrate=currentTeam").json()
#ax0.text(x=0.5,y=0.05,s=f'{ball_title}{strike_title}{split_title}',fontname='Calibri',ha='center',fontsize=20,va='top')
ax0.axis('off')
ax0.text(x=0.5,y=0.5,s=f"{ player_bio['people'][0]['pitchHand']['code']}HP, Age: {player_bio['people'][0]['currentAge']}, {player_bio['people'][0]['height']}/{player_bio['people'][0]['weight']}",fontname='Calibri',ha='center',fontsize=24,va='top')
if 'currentTeam' in player_bio['people'][0]:
try:
url = team_logos[team_logos['id'] == team_logo_dict[player_bio['people'][0]['currentTeam']['id']]]['imageLink'].values[0]
im = plt.imread(url)
# response = requests.get(url)
# im = Image.open(BytesIO(response.content))
# im = plt.imread(team_logos[team_logos['id'] == player_bio['people'][0]['currentTeam']['parentOrgId']]['imageLink'].values[0])
# ax = fig.add_axes([0,0,1,0.85], anchor='C', zorder=1)
imagebox = OffsetImage(im, zoom = 0.4)
ab = AnnotationBbox(imagebox, (0.875, 0.40), frameon = False)
ax0.add_artist(ab)
except IndexError:
print()
############ FOOTER ################
#fig.text(x=0.5,y=0.05,s='Note: Colour Coding Compares to League Average By Pitch',ha='center',fontname='Calibri',fontsize=10)
axfooter = fig.add_subplot(gs[-1, :])
axfooter.text(x=0.05,y=1,s='By: Thomas Nestico\n @TJStats',fontname='Calibri',ha='left',fontsize=24,va='top')
axfooter.text(x=1-0.05,y=1,s='Data: MLB',ha='right',fontname='Calibri',fontsize=24,va='top')
axfooter.text(x=0.5,y=0.8,s='Colour Coding Compares to League Average By Pitch\ntjStuff+ calculates the Expected Run Value (xRV) of a pitch regardless of type\ntjStuff+ is normally distributed, where 100 is the mean and Standard Deviation is 10',
ha='center',va='center',fontname='Calibri',fontsize=16)
axfooter.axis('off')
#fig.tight_layout()
fig.subplots_adjust(left=0.03, right=0.97, top=0.97, bottom=0.03)
app = App(app_ui, server)