nesticot commited on
Commit
0550419
1 Parent(s): f960bc6

Upload 5 files

Browse files
Files changed (4) hide show
  1. app.py +518 -611
  2. left.png +0 -0
  3. pitcher_update.py +562 -0
  4. right.png +0 -0
app.py CHANGED
@@ -2,7 +2,7 @@ import pandas as pd
2
  import numpy as np
3
  import matplotlib.pyplot as plt
4
  import seaborn as sns
5
- import pitch_summary_functions as psf
6
  import requests
7
  import matplotlib
8
  from api_scraper import MLB_Scrape
@@ -10,19 +10,20 @@ from shinywidgets import output_widget, render_widget
10
  import shinyswatch
11
 
12
 
13
-
 
14
  colour_palette = ['#FFB000','#648FFF','#785EF0',
15
  '#DC267F','#FE6100','#3D1EB2','#894D80','#16AA02','#B5592B','#A3C1ED']
16
 
17
  import datasets
18
  from datasets import load_dataset
19
  ### Import Datasets
20
- dataset = load_dataset('nesticot/mlb_data', data_files=['a_pitch_data_2024.csv' ])
21
  dataset_train = dataset['train']
22
  df_2024 = dataset_train.to_pandas().set_index(list(dataset_train.features.keys())[0]).reset_index(drop=True).drop_duplicates(subset=['play_id'],keep='last')
23
 
 
24
 
25
- # df_2024.loc[(df_2024['pitcher_id']==804636)&(df_2024['pitch_type'].isin(['FF','FC']),'start_speed'] += 3
26
  # ### Import Datasets
27
  # import datasets
28
  # from datasets import load_dataset
@@ -33,7 +34,6 @@ df_2024 = dataset_train.to_pandas().set_index(list(dataset_train.features.keys()
33
  ### PITCH COLOURS ###
34
  pitch_colours = {
35
  'Four-Seam Fastball':'#FF007D',#BC136F
36
- 'Fastball':'#FF007D',
37
  'Sinker':'#98165D',#DC267F
38
  'Cutter':'#BE5FA0',
39
 
@@ -57,117 +57,48 @@ pitch_colours = {
57
  'Other':'#9C8975',
58
  }
59
 
60
- spring_teams = df_2024.groupby(['pitcher_id']).tail(1)[['pitcher_id','pitcher_team']].set_index(['pitcher_id'])['pitcher_team'].to_dict()
61
-
62
- season_start = '2024-03-20'
63
- season_end = '2024-09-29'
64
- season_fg=2024
65
- #chad_fg = requests.get(f'https://www.fangraphs.com/api/leaders/major-league/data?age=&pos=all&stats=pit&lg=all&qual=0&season={season_fg}&season={season_fg}&month=1000&season1={season_fg}&ind=0&pageitems=2000000000&pagenum=1&ind=0&rost=0&players=&type=36&postseason=&sortdir=default&sortstat=sp_pitching').json()
66
-
67
-
68
- # chadwick_df_small = pd.DataFrame(data={
69
- # 'key_mlbam':[x['xMLBAMID'] for x in chad_fg['data']],
70
- # 'key_fangraphs':[x['playerid'] for x in chad_fg['data']],
71
- # 'Name':[x['PlayerName'] for x in chad_fg['data']],
72
- # })
73
-
74
-
75
- # mlb_fg_dicts = chadwick_df_small.set_index('key_mlbam')['key_fangraphs'].sort_values().to_dict()
76
-
77
-
78
- statcast_pitch_summary = pd.read_csv('statcast_pitch_summary.csv')
79
- cmap_sum = matplotlib.colors.LinearSegmentedColormap.from_list("", ['#648FFF','#FFFFFF','#FFB000',])
80
-
81
-
82
- df_2024_codes = psf.df_update_code(df_2024)
83
-
84
- df_2024_update = psf.df_clean(df_2024_codes)
85
- import joblib
86
- model = joblib.load('joblib_model/tjstuff_model_20240318.joblib')
87
- y_pred_mean = 0.0011434511
88
- y_pred_std = 0.006554768
89
 
90
- xwoba_model = joblib.load('joblib_model/xwoba_model.joblib')
91
 
92
- features = ['start_speed','spin_rate','extension','ivb','hb','x0','z0','fb_max_velo_diff','fb_max_ivb_diff','fb_max_hb_diff']
93
 
94
- targets = ['delta_run_exp_mean']
 
 
 
 
95
 
 
 
96
 
97
- df_2024_update['y_pred'] = model.predict(df_2024_update[features])
98
 
99
- df_2024_update['tj_stuff_plus'] = 100 + 10*((-df_2024_update.y_pred +y_pred_mean) / y_pred_std)
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
100
 
101
- df_2024_update['woba_pred'] = np.nan
102
-
103
- df_2024_update.loc[df_2024_update[['launch_angle','launch_speed']].isnull().sum(axis=1)==0,'woba_pred'] = [sum(x) for x in xwoba_model.predict_proba(df_2024_update.loc[df_2024_update[['launch_angle','launch_speed']].isnull().sum(axis=1)==0][['launch_angle','launch_speed']]) * ([0, 0.883,1.244,1.569,2.004])]
104
-
105
- pitcher_dicts = df_2024_update.set_index('pitcher_id')['pitcher_name'].sort_values().to_dict()
106
 
107
  team_logos = pd.read_csv('team_logos.csv')
108
-
109
- mlb_stats = MLB_Scrape()
110
- teams_df = mlb_stats.get_teams()
111
- team_logo_dict = teams_df.set_index(['team_id'])['parent_org_id'].to_dict()
112
-
113
- font_properties = {'family': 'calibi', 'size': 12}
114
- font_properties_titles = {'family': 'calibi', 'size': 20}
115
- font_properties_axes = {'family': 'calibi', 'size': 16}
116
- df_plot = []
117
- ax2_loc = []
118
- gs = []
119
- fig = []
120
-
121
- function_dict={
122
- 'velocity_kde':'Velocity Distributions',
123
- 'break_plot':'Pitch Movement',
124
- 'rolling_tj_stuff':'Rolling tjStuff+',
125
- 'location_lhb':'Locations vs LHB',
126
- 'location_rhb':'Locations vs RHB',
127
- }
128
-
129
- split_dict = {'all':'All',
130
- 'left':'LHB',
131
- 'right':'RHB'}
132
-
133
- split_dict_hand = {'all':['L','R'],
134
- 'left':['L'],
135
- 'right':['R']}
136
-
137
- ball_dict = {'0':'0',
138
- '1':'1',
139
- '2':'2',
140
- '3':'3'}
141
-
142
- strike_dict = {'0':'0',
143
- '1':'1',
144
- '2':'2'}
145
-
146
- # count_dict = {'0_0':'Through 0-0',
147
- # '0_1':'Through 0-1',
148
- # '0_2':'Through 0-2',
149
- # '1_0':'Through 1-0',
150
- # '1_1':'Through 1-1',
151
- # '1_2':'Through 1-2',
152
- # '2_1':'Through 2-1',
153
- # '2_0':'Through 2-0',
154
- # '3_0':'Through 3-0',
155
- # '3_1':'Through 3-1',
156
- # '2_2':'Through 2-2',
157
- # '3_2':'Through 3-2'}
158
-
159
- # count_dict_fg = {'0_0':'',
160
- # '0_1':'61',
161
- # '0_2':'62',
162
- # '1_0':'63',
163
- # '1_1':'64',
164
- # '1_2':'65',
165
- # '2_1':'66',
166
- # '2_0':'67',
167
- # '3_0':'68',
168
- # '3_1':'69',
169
- # '2_2':'70',
170
- # '3_2':'71'}
171
 
172
  from urllib.request import Request, urlopen
173
  from shiny import App, reactive, ui, render
@@ -189,74 +120,39 @@ app_ui = ui.page_fluid(
189
  shinyswatch.theme.simplex(),
190
  ui.tags.h4("TJStats"),
191
  ui.tags.i("Baseball Analytics and Visualizations"),
 
192
  ui.row(
193
 
194
 
195
  ui.layout_sidebar(
196
 
197
  ui.panel_sidebar(
198
- ui.row(
199
- ui.column(6,
200
- ui.input_select('player_id','Select Player',pitcher_dicts,selectize=True,multiple=False)),
201
- ui.column(6, ui.output_ui('test','Select Game'))),
202
 
203
- ui.row(
204
- ui.column(4,
205
- ui.input_select('plot_id_1','Plot Left',function_dict,multiple=False,selected='velocity_kde')),
206
- ui.column(4,
207
- ui.input_select('plot_id_2','Plot Middle',function_dict,multiple=False,selected='rolling_tj_stuff')),
208
- ui.column(4,
209
- ui.input_select('plot_id_3','Plot Right',function_dict,multiple=False,selected='break_plot'))),
210
-
211
- # ui.input_select('count_id','Count',count_dict,multiple=True,selectize=True,selected='0_0'),
212
-
213
- ui.row(
214
- ui.column(6,
215
- ui.input_select('ball_id','Balls',ball_dict,multiple=False,selected='0'),
216
- ui.input_radio_buttons(
217
- "count_id_balls",
218
- "Count Filter Balls",
219
- {
220
- "exact": "Exact Balls",
221
- "greater": ">= Balls",
222
- "lesser": "<= Balls",
223
- },selected='greater')),
224
- ui.column(6,
225
- ui.input_select('strike_id','Strikes',strike_dict,multiple=False,selected='0'),
226
- ui.input_radio_buttons(
227
- "count_id_strikes",
228
- "Count Filter Strikes",
229
- {
230
- "exact": "Exact Strikes",
231
- "greater": ">= Strikes",
232
- "lesser": "<= Strikes",
233
- },selected='greater'))),
234
- ui.row(
235
- ui.column(6,
236
- ui.input_select('split_id','Select Split',split_dict,multiple=False)),
237
- ui.column(6,
238
- ui.input_numeric('rolling_window','Rolling Window (for tjStuff+ Plot)',min=1,value=10))),
239
 
 
 
 
 
 
 
 
 
 
240
 
241
 
242
- ui.input_action_button("go", "Generate",class_="btn-primary"),
243
 
244
 
245
- width=4)
246
- ,
 
247
  ui.panel_main(
248
  ui.navset_tab(
249
  # ui.nav("Raw Data",
250
  # ui.output_data_frame("raw_table")),
251
  ui.nav("Season Summary",
252
  ui.output_plot('plot',
253
- width='2000px',
254
- height='2000px')),
255
- ui.nav("Game Summary",
256
- ui.output_plot('plot_game',
257
- width='2000px',
258
- height='2000px'))
259
- ,id="my_tabs"))))))
260
 
261
 
262
 
@@ -271,7 +167,7 @@ app_ui = ui.page_fluid(
271
  def server(input, output, session):
272
 
273
  @render.ui
274
- def test():
275
 
276
  # @reactive.Effect
277
  if input.my_tabs() == 'Season Summary':
@@ -279,19 +175,14 @@ def server(input, output, session):
279
  return ui.input_date_range("date_range_id", "Date range input",start = df_2024.game_date.min(),
280
  end = df_2024.game_date.max(),width=2,min=df_2024.game_date.min(),
281
  max=df_2024.game_date.max()),
282
- # @reactive.Effect
283
- if input.my_tabs() == 'Game Summary':
284
- pitcher_id_select = int(input.player_id())
285
- df_plot = df_2024_update[(df_2024_update['pitcher_id']==pitcher_id_select)]
286
-
287
 
288
- # ax0.text(x=0.5,y=0.30,s=f'2024 Spring Training',fontname='Calibri',ha='center',fontsize=30,va='top')
289
- df_plot['game_opp'] = df_plot['game_date'].astype(str) + ' vs ' + df_plot['batter_team'].astype(str)
290
- #print(df_plot['game_opp'])
291
 
292
- date_dict = pd.concat([df_plot.drop_duplicates(subset=['pitcher_id','game_id','game_opp'])[['game_id','game_opp']]]).set_index('game_id').to_dict()
293
- return ui.input_select("game_id", "Select Game",date_dict,selectize=True)
294
-
295
  @output
296
  @render.plot
297
  @reactive.event(input.go, ignore_none=False)
@@ -307,559 +198,576 @@ def server(input, output, session):
307
  ax.text(x=0.5,y=0.5,s='Please Select\nA Player',fontsize=150,ha='center')
308
  ax.grid('off')
309
  return
310
-
311
- pitcher_id_select = int(input.player_id())
312
 
313
 
314
- df_plot = df_2024_update[(df_2024_update['pitcher_id']==pitcher_id_select)]
 
 
 
 
 
 
 
 
 
315
  df_plot = df_plot[(pd.to_datetime(df_plot['game_date']).dt.date>=input.date_range_id()[0])&
316
  (pd.to_datetime(df_plot['game_date']).dt.date<=input.date_range_id()[1])]
317
 
318
- df_plot = df_plot[df_plot['batter_hand'].isin(split_dict_hand[input.split_id()])]
319
-
320
- if input.count_id_balls()=='greater' and input.count_id_strikes()=='greater' and int(input.ball_id())==0 and int(input.strike_id())==0:
321
- ball_title = ''
322
- strike_title = ''
323
- else:
324
- if input.count_id_balls()=='exact':
325
- df_plot = df_plot[df_plot['balls']==int(input.ball_id())]
326
- ball_title = str(f'{(input.ball_id())} Ball Count; ')
327
- elif input.count_id_balls()=='greater':
328
- df_plot = df_plot[df_plot['balls']>=int(input.ball_id())]
329
- ball_title = str(f'At Least {(input.ball_id())} Ball Count; ')
330
- elif input.count_id_balls()=='lesser':
331
- df_plot = df_plot[df_plot['balls']<=int(input.ball_id())]
332
- ball_title = str(f'At Most {(input.ball_id())} Ball Count; ')
333
-
334
- if input.count_id_strikes()=='exact':
335
- df_plot = df_plot[df_plot['strikes']==int(input.strike_id())]
336
- strike_title = str(f'{(input.strike_id())} Strike Count; ')
337
- elif input.count_id_strikes()=='greater':
338
- df_plot = df_plot[df_plot['strikes']>=int(input.strike_id())]
339
- strike_title = str(f'At Least {(input.strike_id())} Strike Count; ')
340
- elif input.count_id_strikes()=='lesser':
341
- df_plot = df_plot[df_plot['strikes']<=int(input.strike_id())]
342
- strike_title = str(f'At Most {(input.strike_id())} Strike Count; ')
343
-
344
-
345
-
346
- if input.split_id() == 'all':
347
- split_title = ''
348
-
349
- elif input.split_id() == 'left':
350
- split_title = 'vs. LHH'
351
-
352
- elif input.split_id() == 'right':
353
- split_title = 'vs. RHH'
354
-
355
-
356
- if len(df_plot)<1:
357
- fig, ax = plt.subplots(1, 1, figsize=(9, 9))
358
- ax.text(x=0.5,y=0.5,s='Please Select\nOther Parameters',fontsize=150,ha='center')
359
- ax.grid('off')
360
- return
361
 
362
- df_plot['pitch_type_count'] = df_plot.groupby(['pitcher_id'])['pitch_type'].cumcount()+1
363
- df_plot['pitch_type_count_each'] = df_plot.groupby(['pitch_type'])['pitch_type'].cumcount()+1
364
- #df_plot = df_plot.merge(df_2024_update[['tj_stuff_plus','play_id']],left_on=['play_id'],right_on=['play_id'],how='left')
365
- df_plot = df_plot.sort_values(by=['pitch_description'])
366
- df_plot = df_plot.sort_values(by=['start_time'])
 
367
 
368
- grouped_ivb = psf.group_ivb_update(df=df_plot,agg_list=['pitcher_id','pitcher_name','pitcher_hand','pitch_type','pitch_description'])
369
- grouped_ivb_all = psf.group_ivb_update(df=df_plot,agg_list=['pitcher_id','pitcher_name','pitcher_hand'])
370
 
 
 
371
 
 
 
 
 
372
 
373
- from matplotlib.gridspec import GridSpec
374
- plt.rcParams['font.family'] = 'Calibri'
375
- df_plot['prop'] = df_plot.groupby("pitch_type")["is_pitch"].transform("sum")
376
- label_labels = df_plot.sort_values(by=['prop','pitch_type'],ascending=[False,True]).pitch_description.unique()
377
 
378
- #plt.rcParams["figure.figsize"] = [10,10]
379
- fig = plt.figure(figsize=(20, 20))
380
- plt.rcParams.update({'figure.autolayout': True})
381
- fig.set_facecolor('white')
382
- sns.set_theme(style="whitegrid", palette=colour_palette)
383
- print('this is the one plot')
384
- # gs = GridSpec(7, 2, width_ratios=[1,1], height_ratios=[1.5,1,1,1,1,1,2.5])
385
- gs = GridSpec(5, 5, height_ratios=[150,75,225,325,50],width_ratios=[1,100,100,100,1])
386
- #### NO FG
387
- ####gs = GridSpec(5, 5, height_ratios=[225,0,225,325,50],width_ratios=[1,100,100,100,1])
388
- #gs = GridSpec(4, 1, width_ratios=[1], height_ratios=[1,0.75,7-len(label_labels)/4,1+len(label_labels)/4])
389
 
390
- gs.update(hspace=0.2, wspace=0.3)
 
 
 
391
 
392
- # Add subplots to the grid
393
- ax0 = fig.add_subplot(gs[0, :])
394
- ax1_table = fig.add_subplot(gs[1, :])
395
- ax2_left = fig.add_subplot(gs[2, 1])
396
- ax2_middle = fig.add_subplot(gs[2, 2])
397
- ax2_right = fig.add_subplot(gs[2, 3])
398
- ax3 = fig.add_subplot(gs[-2, :])
399
- #axfooter = fig.add_subplot(gs[-1, :])
400
 
401
- ax1_table.axis('off')
 
 
 
402
 
403
- sns.set_theme(style="whitegrid", palette=colour_palette)
404
- fig.set_facecolor('white')
 
 
 
 
 
405
 
406
- font_properties = {'family': 'calibi', 'size': 12}
407
- font_properties_titles = {'family': 'calibi', 'size': 20}
408
- font_properties_axes = {'family': 'calibi', 'size': 16}
409
 
410
- # ## FANGRAPHS TABLE ###
411
- # data_pull = psf.fangraphs_scrape(pitcher_id=pitcher_id_select,
412
- # split=input.split_id(),
413
- # start_date=input.date_range_id()[0],
414
- # end_date=input.date_range_id()[1])
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
415
 
416
 
417
- # psf.fangraphs_table(data=data_pull,
418
- # stats=['IP','WHIP','ERA','FIP','TBF','K%','BB%','K-BB%'],
419
- # ax=ax1_table)
420
 
421
- start_date = str(pd.to_datetime(input.date_range_id()[0]).strftime('%m/%d/%Y'))
422
- end_date = str(pd.to_datetime(input.date_range_id()[1]).strftime('%m/%d/%Y'))
423
 
 
 
 
424
 
425
- pitcher_stats_call = requests.get(f'https://statsapi.mlb.com/api/v1/people/{pitcher_id_select}?appContext=minorLeague&hydrate=stats(group=[pitching],type=[byDateRange],sportId=14,startDate={start_date},endDate={end_date})').json()
426
 
427
- pitcher_stats_call_header = [x for x in pitcher_stats_call['people'][0]['stats'][0]['splits'][0]['stat']]
428
- pitcher_stats_call_values = [pitcher_stats_call['people'][0]['stats'][0]['splits'][0]['stat'][x] for x in pitcher_stats_call['people'][0]['stats'][0]['splits'][0]['stat']]
429
- pitcher_stats_call_df = pd.DataFrame(data=dict(zip(pitcher_stats_call_header,pitcher_stats_call_values)),index=[0])
430
- pitcher_stats_call_df['k_percent'] = pitcher_stats_call_df['strikeOuts']/pitcher_stats_call_df['battersFaced']
431
- pitcher_stats_call_df['bb_percent'] = pitcher_stats_call_df['baseOnBalls']/pitcher_stats_call_df['battersFaced']
432
- pitcher_stats_call_df['k_bb_percent'] = pitcher_stats_call_df['k_percent']-pitcher_stats_call_df['bb_percent']
433
- pitcher_stats_call_df_small = pitcher_stats_call_df[['inningsPitched','battersFaced','era','whip','k_percent','bb_percent','k_bb_percent']]
434
 
435
- pitcher_stats_call_df_small['k_percent'] = pitcher_stats_call_df_small['k_percent'].astype(float).apply(lambda x: '{:.1%}'.format(x))
436
- pitcher_stats_call_df_small['bb_percent'] = pitcher_stats_call_df_small['bb_percent'].astype(float).apply(lambda x: '{:.1%}'.format(x))
437
- pitcher_stats_call_df_small['k_bb_percent'] = pitcher_stats_call_df_small['k_bb_percent'].astype(float).apply(lambda x: '{:.1%}'.format(x))
438
 
439
- table_fg = ax1_table.table(cellText=pitcher_stats_call_df_small.values, colLabels=pitcher_stats_call_df_small.columns, cellLoc='center',
440
- bbox=[0.04, 0.2, 0.92, 0.8])
441
 
442
- min_font_size = 20
443
- table_fg.set_fontsize(min_font_size)
444
 
445
 
446
- new_column_names = ['$\\bf{IP}$','$\\bf{PA}$','$\\bf{ERA}$','$\\bf{WHIP}$','$\\bf{K\%}$','$\\bf{BB\%}$','$\\bf{K-BB\%}$']
447
- # #new_column_names = ['Pitch Name', 'Pitch%', 'Velocity', 'Spin Rate','Exit Velocity', 'Whiff%', 'CSW%']
448
- for i, col_name in enumerate(new_column_names):
449
- table_fg.get_celld()[(0, i)].get_text().set_text(col_name)
450
 
451
- ax1_table.axis('off')
 
 
 
452
 
453
 
 
454
 
455
- for x,y,z in zip([input.plot_id_1(),input.plot_id_2(),input.plot_id_3()],[ax2_left,ax2_middle,ax2_right],[1,2,3]):
456
- if x == 'velocity_kde':
457
- psf.velocity_kdes(df=df_plot,ax=y,gs=gs,gs_list=z,fig=fig)
458
- if x == 'rolling_tj_stuff':
459
- psf.tj_stuff_roling(df = df_plot,window = int(input.rolling_window()),ax=y)
460
- if x == 'break_plot':
461
- psf.break_plot(df=df_plot,ax=y)
462
- if x == 'location_lhb':
463
- psf.location_plot(df=df_plot,ax=y,hand='L')
464
- if x == 'location_rhb':
465
- psf.location_plot(df=df_plot,ax=y,hand='R')
466
 
467
- pitches_list = df_plot['pitch_description'].unique()
468
- colour_pitches = [pitch_colours[x] for x in pitches_list]
469
-
470
- # handles, labels = ax2_right.get_legend_handles_labels()
471
 
472
- # # Manually create handles and labels for each pitch-color pair
473
- handles = [plt.scatter([], [], color=color, marker='o', s=100) for color in colour_pitches]
474
- labels = pitches_list
475
-
476
-
477
 
 
478
 
479
- ### FANGRAPHS TABLE ###
480
- psf.table_summary(df=df_plot.copy(),
481
- pitcher_id=pitcher_id_select,
482
- ax=ax3,
483
- df_group=grouped_ivb.copy(),
484
- df_group_all=grouped_ivb_all.copy(),
485
- statcast_pitch_summary=statcast_pitch_summary.copy())
486
 
 
487
 
488
- # ############ FOOTER ################
489
- # #fig.text(x=0.5,y=0.05,s='Note: Colour Coding Compares to League Average By Pitch',ha='center',fontname='Calibri',fontsize=10)
490
- # axfooter.text(x=0.05,y=1,s='By: Thomas Nestico\n @TJStats',fontname='Calibri',ha='left',fontsize=24,va='top')
491
- # axfooter.text(x=1-0.05,y=1,s='Data: MLB, Fangraphs',ha='right',fontname='Calibri',fontsize=24,va='top')
492
 
 
493
 
494
- # axfooter.text(x=0.5,y=0.8,s='Colour Coding Compares to League Average By Pitch\ntjStuff+ calculates the Expected Run Value (xRV) of a pitch regardless of type\ntjStuff+ is normally distributed, where 100 is the mean and Standard Deviation is 10',
495
- # ha='center',va='center',fontname='Calibri',fontsize=16)
496
- # axfooter.axis('off')
497
- # #fig.tight_layout()
498
 
499
-
500
 
501
- # Get value counts of the column and sort in descending order
502
- sorted_value_counts = df_plot['pitch_description'].value_counts().sort_values(ascending=False)
503
 
504
- # Get the list of items ordered from most to least frequent
505
- items_in_order = sorted_value_counts.index.tolist()
506
- # Create a dictionary to map names to colors
507
- name_to_color = dict(zip(labels, handles))
508
 
509
- # Order the colors based on the correct order of names
510
- ordered_colors = [name_to_color[name] for name in items_in_order]
511
 
512
 
513
- ax3.legend(ordered_colors, items_in_order, bbox_to_anchor=(0.1, 0.81, 0.8, 0.2), ncol=5,
514
- fancybox=True,loc='lower center',fontsize=20,framealpha=1.0, markerscale=2,prop={'family': 'calibi', 'size': 20})
515
 
 
516
 
517
- ################## Title ##########
518
- title_spot = f'{df_plot.pitcher_name.values[0]}'
519
 
 
520
 
521
- ax0.text(x=0.5,y=0.8,s=title_spot,fontname='Calibri',ha='center',fontsize=56,va='top')
522
- ax0.text(x=0.5,y=0.35,s='A Season Pitching Summary',fontname='Calibri',ha='center',fontsize=40,va='top',fontstyle='italic')
523
 
524
- player_bio = requests.get(url=f"https://statsapi.mlb.com/api/v1/people?personIds={pitcher_id_select}&hydrate=currentTeam").json()
525
- #ax0.text(x=0.5,y=0.05,s=f'{ball_title}{strike_title}{split_title}',fontname='Calibri',ha='center',fontsize=20,va='top')
526
- ax0.axis('off')
527
- ax0.text(x=0.5,y=0.5,s=f"{ player_bio['people'][0]['pitchHand']['code']}HP, Age: {player_bio['people'][0]['currentAge']}, {player_bio['people'][0]['height']}/{player_bio['people'][0]['weight']}",fontname='Calibri',ha='center',fontsize=24,va='top')
528
 
529
- #ax0.text(x=0.5,y=0.25,s=f'2024 Spring Training',fontname='Calibri',ha='center',fontsize=30,va='top')
530
- # ax0.text(x=0.5,y=0.25,s=f'{season_fg} MLB Season',fontname='Calibri',ha='center',fontsize=30,va='top')
531
- # ax0.axis('off')
532
 
 
533
 
534
- ax0.text(x=0.5,y=0.15,s=f'{input.date_range_id()[0]} to {input.date_range_id()[1]}',fontname='Calibri',ha='center',fontsize=30,va='top',fontstyle='italic')
535
-
536
- ax0.text(x=0.5,y=0.0,s=f'{ball_title}{strike_title}{split_title}',fontname='Calibri',ha='center',fontsize=20,va='top')
537
- ax0.axis('off')
538
 
539
- from matplotlib.offsetbox import (OffsetImage, AnnotationBbox)
540
- import urllib
541
- import urllib.request
542
- import urllib.error
543
- from urllib.error import HTTPError
544
 
545
- try:
546
- url = f'https://img.mlbstatic.com/mlb-photos/image/upload/c_fill,g_auto/w_180/v1/people/{pitcher_id_select}/headshot/milb/current.png'
547
- test_mage = plt.imread(url)
548
- except urllib.error.HTTPError as err:
549
- url = f'https://img.mlbstatic.com/mlb-photos/image/upload/d_people:generic:headshot:67:current.png/w_213,q_auto:best/v1/people/1/headshot/67/current.png'
550
- test_mage = plt.imread(url)
551
- imagebox = OffsetImage(test_mage, zoom = 0.5)
552
- ab = AnnotationBbox(imagebox, (0.125, 0.4), frameon = False)
553
- ax0.add_artist(ab)
554
 
555
- #player_bio = requests.get(url=f"https://statsapi.mlb.com/api/v1/people?personIds={pitcher_id_select}&hydrate=currentTeam").json()
556
 
 
 
 
557
 
 
558
 
 
 
 
 
 
 
 
 
 
 
 
 
559
 
560
- if 'currentTeam' in player_bio['people'][0]:
561
- try:
562
- url = team_logos[team_logos['id'] == team_logo_dict[player_bio['people'][0]['currentTeam']['id']]]['imageLink'].values[0]
 
 
 
563
 
564
- im = plt.imread(url)
565
- # response = requests.get(url)
566
- # im = Image.open(BytesIO(response.content))
567
- # im = plt.imread(team_logos[team_logos['id'] == player_bio['people'][0]['currentTeam']['parentOrgId']]['imageLink'].values[0])
568
- # ax = fig.add_axes([0,0,1,0.85], anchor='C', zorder=1)
569
- imagebox = OffsetImage(im, zoom = 0.4)
570
- ab = AnnotationBbox(imagebox, (0.875, 0.40), frameon = False)
571
- ax0.add_artist(ab)
572
- except IndexError:
573
- print()
574
-
575
 
576
- ############ FOOTER ################
577
- #fig.text(x=0.5,y=0.05,s='Note: Colour Coding Compares to League Average By Pitch',ha='center',fontname='Calibri',fontsize=10)
578
- axfooter = fig.add_subplot(gs[-1, :])
579
- axfooter.text(x=0.05,y=1,s='By: Thomas Nestico\n @TJStats',fontname='Calibri',ha='left',fontsize=24,va='top')
580
- axfooter.text(x=1-0.05,y=1,s='Data: MLB',ha='right',fontname='Calibri',fontsize=24,va='top')
581
 
582
 
583
- axfooter.text(x=0.5,y=0.8,s='Colour Coding Compares to League Average By Pitch\ntjStuff+ calculates the Expected Run Value (xRV) of a pitch regardless of type\ntjStuff+ is normally distributed, where 100 is the mean and Standard Deviation is 10',
584
- ha='center',va='center',fontname='Calibri',fontsize=16)
585
- axfooter.axis('off')
586
- #fig.tight_layout()
587
 
588
- fig.subplots_adjust(left=0.03, right=0.97, top=0.97, bottom=0.03)
589
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
590
 
591
- @output
592
- @render.plot
593
- @reactive.event(input.go, ignore_none=False)
594
- def plot_game():
595
- #fig, ax = plt.subplots(3, 2, figsize=(9, 9))
596
 
597
- font_properties = {'family': 'calibi', 'size': 12}
598
- font_properties_titles = {'family': 'calibi', 'size': 20}
599
- font_properties_axes = {'family': 'calibi', 'size': 16}
600
-
601
- if len((input.player_id()))<1:
602
- fig, ax = plt.subplots(1, 1, figsize=(9, 9))
603
- ax.text(x=0.5,y=0.5,s='Please Select\nA Player',fontsize=150,ha='center')
604
- ax.grid('off')
605
- return
606
-
607
- pitcher_id_select = int(input.player_id())
608
-
609
-
610
-
611
-
612
- df_plot = df_2024_update[(df_2024_update['pitcher_id']==pitcher_id_select)&(df_2024_update['game_id']==int(input.game_id()))]
613
- df_plot = df_plot[df_plot['batter_hand'].isin(split_dict_hand[input.split_id()])]
614
-
615
- if input.count_id_balls()=='greater' and input.count_id_strikes()=='greater' and int(input.ball_id())==0 and int(input.strike_id())==0:
616
- ball_title = ''
617
- strike_title = ''
618
- else:
619
- if input.count_id_balls()=='exact':
620
- df_plot = df_plot[df_plot['balls']==int(input.ball_id())]
621
- ball_title = str(f'{(input.ball_id())} Ball Count; ')
622
- elif input.count_id_balls()=='greater':
623
- df_plot = df_plot[df_plot['balls']>=int(input.ball_id())]
624
- ball_title = str(f'At Least {(input.ball_id())} Ball Count; ')
625
- elif input.count_id_balls()=='lesser':
626
- df_plot = df_plot[df_plot['balls']<=int(input.ball_id())]
627
- ball_title = str(f'At Most {(input.ball_id())} Ball Count; ')
628
-
629
- if input.count_id_strikes()=='exact':
630
- df_plot = df_plot[df_plot['strikes']==int(input.strike_id())]
631
- strike_title = str(f'{(input.strike_id())} Strike Count; ')
632
- elif input.count_id_strikes()=='greater':
633
- df_plot = df_plot[df_plot['strikes']>=int(input.strike_id())]
634
- strike_title = str(f'At Least {(input.strike_id())} Strike Count; ')
635
- elif input.count_id_strikes()=='lesser':
636
- df_plot = df_plot[df_plot['strikes']<=int(input.strike_id())]
637
- strike_title = str(f'At Most {(input.strike_id())} Strike Count; ')
638
-
639
 
640
-
641
 
 
 
 
 
 
 
 
 
 
 
 
 
642
 
643
- if input.split_id() == 'all':
644
- split_title = ''
645
 
646
- elif input.split_id() == 'left':
647
- split_title = 'vs. LHH'
648
-
649
- elif input.split_id() == 'right':
650
- split_title = 'vs. RHH'
651
 
652
- if len(df_plot)<1:
653
- fig, ax = plt.subplots(1, 1, figsize=(9, 9))
654
- ax.text(x=0.5,y=0.5,s='Please Select\nOther Parameters',fontsize=150,ha='center')
655
- ax.grid('off')
656
- return
657
 
658
 
659
- df_plot['pitch_type_count'] = df_plot.groupby(['pitcher_id'])['pitch_type'].cumcount()+1
660
- df_plot['pitch_type_count_each'] = df_plot.groupby(['pitch_type'])['pitch_type'].cumcount()+1
661
- #df_plot = df_plot.merge(df_2024_update[['tj_stuff_plus','play_id']],left_on=['play_id'],right_on=['play_id'],how='left')
662
- df_plot = df_plot.sort_values(by=['pitch_description'])
663
- df_plot = df_plot.sort_values(by=['start_time'])
664
 
665
- # ax0.text(x=0.5,y=0.30,s=f'2024 Spring Training',fontname='Calibri',ha='center',fontsize=30,va='top')
666
- df_plot['game_opp'] = df_plot['game_date'].astype(str) + ' vs ' + df_plot['batter_team'].astype(str)
667
- #print(df_plot['game_opp'])
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
668
 
669
- #date_dict = pd.concat([df_plot.drop_duplicates(subset=['pitcher_id','game_id','game_opp'])[['game_id','game_opp']]]).set_index('game_id').to_dict()
 
670
 
671
- grouped_ivb = psf.group_ivb_update(df=df_plot,agg_list=['pitcher_id','pitcher_name','pitcher_hand','pitch_type','pitch_description'])
672
- grouped_ivb_all = psf.group_ivb_update(df=df_plot,agg_list=['pitcher_id','pitcher_name','pitcher_hand'])
 
 
 
673
 
 
 
 
 
674
 
 
675
 
676
- from matplotlib.gridspec import GridSpec
677
- plt.rcParams['font.family'] = 'Calibri'
678
- df_plot['prop'] = df_plot.groupby("pitch_type")["is_pitch"].transform("sum")
679
- label_labels = df_plot.sort_values(by=['prop','pitch_type'],ascending=[False,True]).pitch_description.unique()
680
 
681
- #plt.rcParams["figure.figsize"] = [10,10]
682
- fig = plt.figure(figsize=(20, 20))
683
- plt.rcParams.update({'figure.autolayout': True})
684
- fig.set_facecolor('white')
685
- sns.set_theme(style="whitegrid", palette=colour_palette)
686
- print('this is the one plot')
687
- # gs = GridSpec(7, 2, width_ratios=[1,1], height_ratios=[1.5,1,1,1,1,1,2.5])
688
- gs = GridSpec(5, 5, height_ratios=[150,75,225,325,50],width_ratios=[1,100,100,100,1])
689
- #### NO FG
690
- ####gs = GridSpec(5, 5, height_ratios=[225,0,225,325,50],width_ratios=[1,100,100,100,1])
691
- #gs = GridSpec(4, 1, width_ratios=[1], height_ratios=[1,0.75,7-len(label_labels)/4,1+len(label_labels)/4])
692
 
693
- gs.update(hspace=0.2, wspace=0.3)
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
694
 
695
- # Add subplots to the grid
696
- ax0 = fig.add_subplot(gs[0, :])
697
- ax1_table = fig.add_subplot(gs[1, :])
698
- ax2_left = fig.add_subplot(gs[2, 1])
699
- ax2_middle = fig.add_subplot(gs[2, 2])
700
- ax2_right = fig.add_subplot(gs[2, 3])
701
- ax3 = fig.add_subplot(gs[-2, :])
702
- # axfooter = fig.add_subplot(gs[-1, :])
 
 
 
 
 
 
 
 
703
 
704
- ax1_table.axis('off')
 
705
 
706
- sns.set_theme(style="whitegrid", palette=colour_palette)
707
- fig.set_facecolor('white')
708
 
709
- font_properties = {'family': 'calibi', 'size': 12}
710
- font_properties_titles = {'family': 'calibi', 'size': 20}
711
- font_properties_axes = {'family': 'calibi', 'size': 16}
712
 
713
- print(df_2024_update['game_date'].values[0])
714
- # ## FANGRAPHS TABLE ###
715
- # data_pull = psf.fangraphs_scrape(pitcher_id=pitcher_id_select,
716
- # split=input.split_id(),
717
- # start_date=df_plot['game_date'].values[0],
718
- # end_date=df_plot['game_date'].values[0])
719
 
720
- start_date = str(pd.to_datetime(df_plot['game_date'].values[0]).strftime('%m/%d/%Y'))
721
- end_date = str(pd.to_datetime(df_plot['game_date'].values[0]).strftime('%m/%d/%Y'))
722
 
723
 
724
- pitcher_stats_call = requests.get(f'https://statsapi.mlb.com/api/v1/people/{pitcher_id_select}?appContext=minorLeague&hydrate=stats(group=[pitching],type=[byDateRange],sportId=14,startDate={start_date},endDate={end_date})').json()
725
-
726
- pitcher_stats_call_header = [x for x in pitcher_stats_call['people'][0]['stats'][0]['splits'][0]['stat']]
727
- pitcher_stats_call_values = [pitcher_stats_call['people'][0]['stats'][0]['splits'][0]['stat'][x] for x in pitcher_stats_call['people'][0]['stats'][0]['splits'][0]['stat']]
728
- pitcher_stats_call_df = pd.DataFrame(data=dict(zip(pitcher_stats_call_header,pitcher_stats_call_values)),index=[0])
729
- # pitcher_stats_call_df['k_percent'] = pitcher_stats_call_df['strikeOuts']/pitcher_stats_call_df['battersFaced']
730
- # pitcher_stats_call_df['bb_percent'] = pitcher_stats_call_df['baseOnBalls']/pitcher_stats_call_df['battersFaced']
731
- # pitcher_stats_call_df['k_bb_percent'] = pitcher_stats_call_df['k_percent']-pitcher_stats_call_df['bb_percent']
732
- pitcher_stats_call_df_small = pitcher_stats_call_df[['inningsPitched','battersFaced','earnedRuns','hits','strikeOuts','baseOnBalls','hitByPitch','homeRuns']]
733
- pitcher_stats_call_df_small['whiffs'] = int(df_plot['is_whiff'].sum())
734
- # pitcher_stats_call_df_small['k_percent'] = pitcher_stats_call_df_small['k_percent'].astype(float).apply(lambda x: '{:.1%}'.format(x))
735
- # pitcher_stats_call_df_small['bb_percent'] = pitcher_stats_call_df_small['bb_percent'].astype(float).apply(lambda x: '{:.1%}'.format(x))
736
- # pitcher_stats_call_df_small['k_bb_percent'] = pitcher_stats_call_df_small['k_bb_percent'].astype(float).apply(lambda x: '{:.1%}'.format(x))
737
-
738
- table_fg = ax1_table.table(cellText=pitcher_stats_call_df_small.values, colLabels=pitcher_stats_call_df_small.columns, cellLoc='center',
739
- bbox=[0.04, 0.2, 0.92, 0.8])
740
-
741
- min_font_size = 20
742
- table_fg.set_fontsize(min_font_size)
743
-
744
-
745
- new_column_names = ['$\\bf{IP}$','$\\bf{PA}$','$\\bf{ER}$','$\\bf{H}$','$\\bf{K}$','$\\bf{BB}$','$\\bf{HBP}$','$\\bf{HR}$','$\\bf{Whiffs}$']
746
- # #new_column_names = ['Pitch Name', 'Pitch%', 'Velocity', 'Spin Rate','Exit Velocity', 'Whiff%', 'CSW%']
747
- for i, col_name in enumerate(new_column_names):
748
- table_fg.get_celld()[(0, i)].get_text().set_text(col_name)
749
-
750
- ax1_table.axis('off')
751
-
752
-
753
- # psf.fangraphs_table(data=data_pull,
754
- # stats=['IP','WHIP','ERA','FIP','TBF','K%','BB%','K-BB%'],
755
- # ax=ax1_table)
756
-
757
-
758
- # psf.velocity_kdes(df=df_plot,
759
- # ax=ax2_loc,
760
- # gs=gs,
761
- # fig=fig)
762
-
763
- # # psf.tj_stuff_roling(df = df_plot,
764
- # # window = 5,
765
- # # ax=ax2_velo)
766
- # psf.location_plot(df=df_plot,ax=ax2_velo,hand='L')
767
-
768
- # psf.location_plot(df=df_plot,ax=ax2_loc,hand='R')
769
- # # # ## Break Plot
770
- # psf.break_plot(df=df_plot,ax=ax2)
771
- for x,y,z in zip([input.plot_id_1(),input.plot_id_2(),input.plot_id_3()],[ax2_left,ax2_middle,ax2_right],[1,2,3]):
772
- if x == 'velocity_kde':
773
- psf.velocity_kdes(df=df_plot,ax=y,gs=gs,gs_list=z,fig=fig)
774
- if x == 'rolling_tj_stuff':
775
- psf.tj_stuff_roling(df = df_plot,window = int(input.rolling_window()),ax=y)
776
- if x == 'break_plot':
777
- psf.break_plot(df=df_plot,ax=y)
778
- if x == 'location_lhb':
779
- psf.location_plot(df=df_plot,ax=y,hand='L')
780
- if x == 'location_rhb':
781
- psf.location_plot(df=df_plot,ax=y,hand='R')
782
-
783
- pitches_list = df_plot['pitch_description'].unique()
784
- colour_pitches = [pitch_colours[x] for x in pitches_list]
785
-
786
- # handles, labels = ax2_right.get_legend_handles_labels()
787
 
788
- # # Manually create handles and labels for each pitch-color pair
789
- handles = [plt.scatter([], [], color=color, marker='o', s=100) for color in colour_pitches]
790
- labels = pitches_list
791
 
 
 
 
 
 
792
 
793
- ### FANGRAPHS TABLE ###
794
- psf.table_summary(df=df_plot.copy(),
795
- pitcher_id=pitcher_id_select,
796
- ax=ax3,
797
- df_group=grouped_ivb.copy(),
798
- df_group_all=grouped_ivb_all.copy(),
799
- statcast_pitch_summary=statcast_pitch_summary.copy())
800
 
 
 
 
 
 
801
 
 
802
 
803
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
804
 
805
- # Get value counts of the column and sort in descending order
806
- sorted_value_counts = df_plot['pitch_description'].value_counts().sort_values(ascending=False)
807
 
808
- # Get the list of items ordered from most to least frequent
809
- items_in_order = sorted_value_counts.index.tolist()
810
- # Create a dictionary to map names to colors
811
- name_to_color = dict(zip(labels, handles))
812
 
813
- # Order the colors based on the correct order of names
814
- ordered_colors = [name_to_color[name] for name in items_in_order]
815
 
 
 
816
 
817
- ax3.legend(ordered_colors, items_in_order, bbox_to_anchor=(0.1, 0.81, 0.8, 0.2), ncol=5,
818
- fancybox=True,loc='lower center',fontsize=20,framealpha=1.0, markerscale=2,prop={'family': 'calibi', 'size': 20})
 
819
 
820
 
821
- ################## Title ##########
822
- title_spot = f'{df_plot.pitcher_name.values[0]}'
 
 
 
823
 
824
 
825
- ax0.text(x=0.5,y=0.8,s=title_spot,fontname='Calibri',ha='center',fontsize=56,va='top')
826
- ax0.text(x=0.5,y=0.35,s='A Game Pitching Summary',fontname='Calibri',ha='center',fontsize=40,va='top',fontstyle='italic')
827
 
 
 
 
 
 
 
828
 
829
- #ax0.text(x=0.5,y=0.25,s=f'2024 Spring Training',fontname='Calibri',ha='center',fontsize=30,va='top')
830
- #ax0.text(x=0.5,y=0.25,s=f'{season_fg} MLB Season',fontname='Calibri',ha='center',fontsize=30,va='top')
831
- #ax0.text(x=0.5,y=0.25,s=f'2024 Spring Training',fontname='Calibri',ha='center',fontsize=30,va='top')
832
- # ax0.text(x=0.5,y=0.25,s=f'{season_fg} MLB Season',fontname='Calibri',ha='center',fontsize=30,va='top')
 
833
 
834
- ax0.text(x=0.5,y=0.15,s= df_plot['game_opp'].values[0],fontname='Calibri',ha='center',fontstyle='italic',fontsize=30,va='top')
835
-
836
- ax0.text(x=0.5,y=0.00,s=f'{ball_title}{strike_title}{split_title}',fontname='Calibri',ha='center',fontsize=20,va='top')
837
- ax0.axis('off')
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
838
 
839
 
840
- from matplotlib.offsetbox import (OffsetImage, AnnotationBbox)
841
  import urllib
842
  import urllib.request
843
  import urllib.error
844
  from urllib.error import HTTPError
845
 
846
  try:
847
- url = f'https://img.mlbstatic.com/mlb-photos/image/upload/c_fill,g_auto/w_180/v1/people/{pitcher_id_select}/headshot/milb/current.png'
848
  test_mage = plt.imread(url)
849
  except urllib.error.HTTPError as err:
850
  url = f'https://img.mlbstatic.com/mlb-photos/image/upload/d_people:generic:headshot:67:current.png/w_213,q_auto:best/v1/people/1/headshot/67/current.png'
851
- test_mage = plt.imread(url)
852
- imagebox = OffsetImage(test_mage, zoom = 0.5)
853
- ab = AnnotationBbox(imagebox, (0.125, 0.4), frameon = False)
854
- ax0.add_artist(ab)
855
 
856
- #player_bio = requests.get(url=f"https://statsapi.mlb.com/api/v1/people?personIds={pitcher_id_select}&hydrate=currentTeam").json()
857
 
858
- player_bio = requests.get(url=f"https://statsapi.mlb.com/api/v1/people?personIds={pitcher_id_select}&hydrate=currentTeam").json()
859
- #ax0.text(x=0.5,y=0.05,s=f'{ball_title}{strike_title}{split_title}',fontname='Calibri',ha='center',fontsize=20,va='top')
860
- ax0.axis('off')
861
- ax0.text(x=0.5,y=0.5,s=f"{ player_bio['people'][0]['pitchHand']['code']}HP, Age: {player_bio['people'][0]['currentAge']}, {player_bio['people'][0]['height']}/{player_bio['people'][0]['weight']}",fontname='Calibri',ha='center',fontsize=24,va='top')
862
 
 
 
 
 
 
 
863
 
864
 
865
  if 'currentTeam' in player_bio['people'][0]:
@@ -871,25 +779,24 @@ def server(input, output, session):
871
  # im = Image.open(BytesIO(response.content))
872
  # im = plt.imread(team_logos[team_logos['id'] == player_bio['people'][0]['currentTeam']['parentOrgId']]['imageLink'].values[0])
873
  # ax = fig.add_axes([0,0,1,0.85], anchor='C', zorder=1)
874
- imagebox = OffsetImage(im, zoom = 0.4)
875
- ab = AnnotationBbox(imagebox, (0.875, 0.40), frameon = False)
876
- ax0.add_artist(ab)
877
  except IndexError:
878
  print()
879
-
880
- ############ FOOTER ################
881
- #fig.text(x=0.5,y=0.05,s='Note: Colour Coding Compares to League Average By Pitch',ha='center',fontname='Calibri',fontsize=10)
882
- axfooter = fig.add_subplot(gs[-1, :])
883
- axfooter.text(x=0.05,y=1,s='By: Thomas Nestico\n @TJStats',fontname='Calibri',ha='left',fontsize=24,va='top')
884
- axfooter.text(x=1-0.05,y=1,s='Data: MLB',ha='right',fontname='Calibri',fontsize=24,va='top')
885
 
886
 
887
- axfooter.text(x=0.5,y=0.8,s='Colour Coding Compares to League Average By Pitch\ntjStuff+ calculates the Expected Run Value (xRV) of a pitch regardless of type\ntjStuff+ is normally distributed, where 100 is the mean and Standard Deviation is 10',
888
- ha='center',va='center',fontname='Calibri',fontsize=16)
889
- axfooter.axis('off')
890
- #fig.tight_layout()
891
 
 
 
892
 
893
- fig.subplots_adjust(left=0.03, right=0.97, top=0.97, bottom=0.03)
 
 
 
 
894
 
895
  app = App(app_ui, server)
 
2
  import numpy as np
3
  import matplotlib.pyplot as plt
4
  import seaborn as sns
5
+ #import pitch_summary_functions as psf
6
  import requests
7
  import matplotlib
8
  from api_scraper import MLB_Scrape
 
10
  import shinyswatch
11
 
12
 
13
+ season = 2024
14
+ level = 'mlb'
15
  colour_palette = ['#FFB000','#648FFF','#785EF0',
16
  '#DC267F','#FE6100','#3D1EB2','#894D80','#16AA02','#B5592B','#A3C1ED']
17
 
18
  import datasets
19
  from datasets import load_dataset
20
  ### Import Datasets
21
+ dataset = load_dataset('nesticot/mlb_data', data_files=[f'{level}_pitch_data_{season}.csv' ])
22
  dataset_train = dataset['train']
23
  df_2024 = dataset_train.to_pandas().set_index(list(dataset_train.features.keys())[0]).reset_index(drop=True).drop_duplicates(subset=['play_id'],keep='last')
24
 
25
+ # df_2024 = pd.read_csv('C:/Users/thoma/Google Drive/Python/Baseball/season_stats/2024/2024_regular_data.csv',index_col=[0])
26
 
 
27
  # ### Import Datasets
28
  # import datasets
29
  # from datasets import load_dataset
 
34
  ### PITCH COLOURS ###
35
  pitch_colours = {
36
  'Four-Seam Fastball':'#FF007D',#BC136F
 
37
  'Sinker':'#98165D',#DC267F
38
  'Cutter':'#BE5FA0',
39
 
 
57
  'Other':'#9C8975',
58
  }
59
 
60
+ import pitcher_update as pu
61
+ df_2024 = pu.df_update(df_2024)
62
+ df_2024['pitch_count_hand'] = df_2024.groupby(['pitcher_id','batter_hand'])['start_speed'].transform('count')
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
63
 
 
64
 
 
65
 
66
+ # DEFINE STRIKE ZONE
67
+ strike_zone = pd.DataFrame({
68
+ 'PlateLocSide': [-0.9, -0.9, 0.9, 0.9, -0.9],
69
+ 'PlateLocHeight': [1.5, 3.5, 3.5, 1.5, 1.5]
70
+ })
71
 
72
+ ### STRIKE ZONE ###
73
+ def draw_line(axis,alpha_spot=1,catcher_p = True):
74
 
75
+ axis.plot(strike_zone['PlateLocSide'], strike_zone['PlateLocHeight'], color='black', linewidth=1.3,zorder=3,alpha=alpha_spot,)
76
 
77
+ # ax.plot([-0.2833333, -0.2833333], [1.6, 3.5], color='black', linestyle='dashed',alpha=alpha_spot,zorder=3)
78
+ # ax.plot([0.2833333, 0.2833333], [1.6, 3.5], color='black', linestyle='dashed',alpha=alpha_spot,zorder=3)
79
+ # ax.plot([-0.85, 0.85], [2.2, 2.2], color='black', linestyle='dashed',alpha=alpha_spot,zorder=3)
80
+ # ax.plot([-0.85, 0.85], [2.9, 2.9], color='black', linestyle='dashed',alpha=alpha_spot,zorder=3)
81
+ if catcher_p:
82
+ # Add dashed line
83
+ # Add home plate
84
+ axis.plot([-0.708, 0.708], [0.15, 0.15], color='black', linewidth=1,alpha=alpha_spot,zorder=1)
85
+ axis.plot([-0.708, -0.708], [0.15, 0.3], color='black', linewidth=1,alpha=alpha_spot,zorder=1)
86
+ axis.plot([-0.708, 0], [0.3, 0.5], color='black', linewidth=1,alpha=alpha_spot,zorder=1)
87
+ axis.plot([0, 0.708], [0.5, 0.3], color='black', linewidth=1,alpha=alpha_spot,zorder=1)
88
+ axis.plot([0.708, 0.708], [0.3, 0.15], color='black', linewidth=1,alpha=alpha_spot,zorder=1)
89
+ else:
90
+ axis.plot([-0.708, 0.708], [0.4, 0.4], color='black', linewidth=1,alpha=alpha_spot,zorder=1)
91
+ axis.plot([-0.708, -0.9], [0.4, -0.1], color='black', linewidth=1,alpha=alpha_spot,zorder=1)
92
+ axis.plot([-0.9, 0], [-0.1, -0.35], color='black', linewidth=1,alpha=alpha_spot,zorder=1)
93
+ axis.plot([0, 0.9], [-.35, -0.1], color='black', linewidth=1,alpha=alpha_spot,zorder=1)
94
+ axis.plot([0.9, 0.708], [-0.1,0.4], color='black', linewidth=1,alpha=alpha_spot,zorder=1)
95
 
96
+ pitcher_dicts = df_2024.set_index('pitcher_id')['pitcher_name'].sort_values().to_dict()
 
 
 
 
97
 
98
  team_logos = pd.read_csv('team_logos.csv')
99
+ cmap_sum = matplotlib.colors.LinearSegmentedColormap.from_list("", ['#648FFF','#FFFFFF','#FFB000',])
100
+ cmap_sum2 = matplotlib.colors.LinearSegmentedColormap.from_list("", ['#FFFFFF','#FFB000',])
101
+
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
102
 
103
  from urllib.request import Request, urlopen
104
  from shiny import App, reactive, ui, render
 
120
  shinyswatch.theme.simplex(),
121
  ui.tags.h4("TJStats"),
122
  ui.tags.i("Baseball Analytics and Visualizations"),
123
+ ui.tags.h5("Pitcher Heat Maps"),
124
  ui.row(
125
 
126
 
127
  ui.layout_sidebar(
128
 
129
  ui.panel_sidebar(
 
 
 
 
130
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
131
 
132
+ ui.input_select('player_id','Select Player',pitcher_dicts,selectize=True,multiple=False),
133
+ ui.output_ui('game_id_select','Date Range'),
134
+
135
+
136
+ ui.output_ui('pitch_type_select','Select Pitch Type'),
137
+ ui.input_action_button("go", "Generate",class_="btn-primary"),width=2
138
+
139
+
140
+ ),
141
 
142
 
 
143
 
144
 
145
+
146
+
147
+
148
  ui.panel_main(
149
  ui.navset_tab(
150
  # ui.nav("Raw Data",
151
  # ui.output_data_frame("raw_table")),
152
  ui.nav("Season Summary",
153
  ui.output_plot('plot',
154
+ width='1600px',
155
+ height='900px')),id="my_tabs"))))))
 
 
 
 
 
156
 
157
 
158
 
 
167
  def server(input, output, session):
168
 
169
  @render.ui
170
+ def game_id_select():
171
 
172
  # @reactive.Effect
173
  if input.my_tabs() == 'Season Summary':
 
175
  return ui.input_date_range("date_range_id", "Date range input",start = df_2024.game_date.min(),
176
  end = df_2024.game_date.max(),width=2,min=df_2024.game_date.min(),
177
  max=df_2024.game_date.max()),
 
 
 
 
 
178
 
179
+ @render.ui
180
+ def pitch_type_select():
181
+ pitch_dicts = df_2024[(df_2024['pitcher_id']==int(input.player_id()))].set_index('pitch_type')['pitch_description'].sort_values().to_dict()
182
 
183
+ # @reactive.Effect
184
+ return ui.input_select('pitch_type','Select Pitch Type',pitch_dicts,selectize=True,multiple=False)
185
+
186
  @output
187
  @render.plot
188
  @reactive.event(input.go, ignore_none=False)
 
198
  ax.text(x=0.5,y=0.5,s='Please Select\nA Player',fontsize=150,ha='center')
199
  ax.grid('off')
200
  return
 
 
201
 
202
 
203
+ pitcher_input = int(input.player_id())
204
+ pitch_input = input.pitch_type()
205
+
206
+ df_plot_full = df_2024[(df_2024['pitcher_id']==pitcher_input)]
207
+ df_plot_full['h_s_b'] = df_plot_full.groupby(['batter_hand','strikes', 'balls']).transform('count')['pitcher_id']
208
+ df_plot_full['h_s_b_pitch'] = df_plot_full.groupby(['batter_hand','strikes', 'balls','pitch_type']).transform('count')['pitcher_id']
209
+ df_plot_full['h_s_b_pitch_percent'] = df_plot_full['h_s_b_pitch']/df_plot_full['h_s_b']
210
+
211
+
212
+ df_plot = df_plot_full[(df_plot_full['pitch_type']==pitch_input)]
213
  df_plot = df_plot[(pd.to_datetime(df_plot['game_date']).dt.date>=input.date_range_id()[0])&
214
  (pd.to_datetime(df_plot['game_date']).dt.date<=input.date_range_id()[1])]
215
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
216
 
217
+ print("THIS IS HERE")
218
+ print(df_plot)
219
+ pivot_table_l = df_plot[df_plot['batter_hand'].isin(['L'])].groupby(['batter_hand','strikes', 'balls'])[['h_s_b_pitch_percent']].mean().reset_index().pivot('strikes','balls','h_s_b_pitch_percent')#.fillna(0).style.background_gradient(cmap=cmap_sum2, axis=None).format("{:.0%}")
220
+ # Create a new index and columns range
221
+ new_index = range(3)
222
+ new_columns = range(4)
223
 
224
+ # Reindex the pivot table
225
+ pivot_table_l = pivot_table_l.reindex(index=new_index, columns=new_columns)
226
 
227
+ # Fill any missing values with 0
228
+ pivot_table_l = pivot_table_l.fillna(0)
229
 
230
+ pivot_table_l = df_plot[df_plot['batter_hand']=='L'].groupby(['batter_hand','strikes', 'balls'])[['h_s_b_pitch_percent']].mean().reset_index().pivot('strikes','balls','h_s_b_pitch_percent')#.fillna(0).style.background_gradient(cmap=cmap_sum2, axis=None).format("{:.0%}")
231
+ # Create a new index and columns range
232
+ new_index = range(3)
233
+ new_columns = range(4)
234
 
235
+ # Reindex the pivot table
236
+ pivot_table_l = pivot_table_l.reindex(index=new_index, columns=new_columns)
 
 
237
 
238
+ # Fill any missing values with 0
239
+ pivot_table_l = pivot_table_l.fillna(0)
 
 
 
 
 
 
 
 
 
240
 
241
+ pivot_table_r = df_plot[df_plot['batter_hand']=='R'].groupby(['batter_hand','strikes', 'balls'])[['h_s_b_pitch_percent']].mean().reset_index().pivot('strikes','balls','h_s_b_pitch_percent')#.fillna(0).style.background_gradient(cmap=cmap_sum2, axis=None).format("{:.0%}")
242
+ # Create a new index and columns range
243
+ new_index = range(3)
244
+ new_columns = range(4)
245
 
246
+ # Reindex the pivot table
247
+ pivot_table_r = pivot_table_r.reindex(index=new_index, columns=new_columns)
 
 
 
 
 
 
248
 
249
+ # Fill any missing values with 0
250
+ pivot_table_r = pivot_table_r.fillna(0)
251
+
252
+
253
 
254
+ # Assuming you have a DataFrame called 'df_plot_full' with columns 'pitch_type', 'strikes', and 'balls'
255
+
256
+ # Filter the dataset to include only slider pitches
257
+ # slider_pitches = df_plot_full[df_plot_full['pitch_type'] == 'SL']
258
+
259
+ # Group the filtered dataset by strike and ball counts
260
+ # grouped_counts = slider_pitches.groupby(['pitcher_hand','strikes', 'balls']).size().reset_index(name='total_pitches')
261
 
262
+ # Calculate the proportion of slider pitches for each strike and ball count
263
+ # grouped_counts['proportion'] = grouped_counts['total_pitches'] / grouped_counts['total_pitches'].sum()
 
264
 
265
+ # Print the resulting DataFrame
266
+ df_summ = df_plot.groupby(['batter_hand']).agg(
267
+ pitch_count = ('pitch_count_hand','max'),
268
+ pa = ('pa','sum'),
269
+ ab = ('ab','sum'),
270
+ obp_pa = ('obp','sum'),
271
+ hits = ('hits','sum'),
272
+ on_base = ('on_base','sum'),
273
+ k = ('k','sum'),
274
+ bb = ('bb','sum'),
275
+ bb_minus_k = ('bb_minus_k','sum'),
276
+ csw = ('csw','sum'),
277
+ bip = ('bip','sum'),
278
+ bip_div = ('bip_div','sum'),
279
+ tb = ('tb','sum'),
280
+ woba = ('woba','sum'),
281
+ woba_contact = ('woba_contact','sum'),
282
+ xwoba = ('woba_pred','sum'),
283
+ xwoba_contact = ('woba_pred_contact','sum'),
284
+ woba_codes = ('woba_codes','sum'),
285
+ hard_hit = ('hard_hit','sum'),
286
+ barrel = ('barrel','sum'),
287
+ sweet_spot = ('sweet_spot','sum'),
288
+ max_launch_speed = ('launch_speed','max'),
289
+ launch_speed = ('launch_speed','mean'),
290
+ launch_angle = ('launch_angle','mean'),
291
+ pitches = ('is_pitch','sum'),
292
+ swings = ('swings','sum'),
293
+ in_zone = ('in_zone','sum'),
294
+ out_zone = ('out_zone','sum'),
295
+ whiffs = ('whiffs','sum'),
296
+ zone_swing = ('zone_swing','sum'),
297
+ zone_contact = ('zone_contact','sum'),
298
+ ozone_swing = ('ozone_swing','sum'),
299
+ ozone_contact = ('ozone_contact','sum'),
300
+ ground_ball = ('trajectory_ground_ball','sum'),
301
+ line_drive = ('trajectory_line_drive','sum'),
302
+ fly_ball =('trajectory_fly_ball','sum'),
303
+ pop_up = ('trajectory_popup','sum'),
304
+ attack_zone = ('attack_zone','count'),
305
+ heart = ('heart','sum'),
306
+ shadow = ('shadow','sum'),
307
+ chase = ('chase','sum'),
308
+ waste = ('waste','sum'),
309
+ heart_swing = ('heart_swing','sum'),
310
+ shadow_swing = ('shadow_swing','sum'),
311
+ chase_swing = ('chase_swing','sum'),
312
+ waste_swing = ('waste_swing','sum'),
313
+ heart_whiff = ('heart_whiff','sum'),
314
+ shadow_whiff = ('shadow_whiff','sum'),
315
+ chase_whiff = ('chase_whiff','sum'),
316
+ waste_whiff = ('waste_whiff','sum'),
317
+ ).reset_index()
318
 
319
 
320
+ df_summ['avg'] = [df_summ.hits[x]/df_summ.ab[x] if df_summ.ab[x] != 0 else np.nan for x in range(len(df_summ))]
321
+ df_summ['obp'] = [df_summ.on_base[x]/df_summ.obp_pa[x] if df_summ.obp_pa[x] != 0 else np.nan for x in range(len(df_summ))]
322
+ df_summ['slg'] = [df_summ.tb[x]/df_summ.ab[x] if df_summ.ab[x] != 0 else np.nan for x in range(len(df_summ))]
323
 
324
+ df_summ['ops'] = df_summ['obp']+df_summ['slg']
 
325
 
326
+ df_summ['k_percent'] = [df_summ.k[x]/df_summ.pa[x] if df_summ.pa[x] != 0 else np.nan for x in range(len(df_summ))]
327
+ df_summ['bb_percent'] =[df_summ.bb[x]/df_summ.pa[x] if df_summ.pa[x] != 0 else np.nan for x in range(len(df_summ))]
328
+ df_summ['bb_minus_k_percent'] =[(df_summ.bb_minus_k[x])/df_summ.pa[x] if df_summ.pa[x] != 0 else np.nan for x in range(len(df_summ))]
329
 
330
+ df_summ['bb_over_k_percent'] =[df_summ.bb[x]/df_summ.k[x] if df_summ.k[x] != 0 else np.nan for x in range(len(df_summ))]
331
 
 
 
 
 
 
 
 
332
 
 
 
 
333
 
 
 
334
 
335
+ df_summ['csw_percent'] =[df_summ.csw[x]/df_summ.pitches[x] if df_summ.pitches[x] != 0 else np.nan for x in range(len(df_summ))]
 
336
 
337
 
338
+ df_summ['sweet_spot_percent'] = [df_summ.sweet_spot[x]/df_summ.bip_div[x] if df_summ.bip_div[x] != 0 else np.nan for x in range(len(df_summ))]
 
 
 
339
 
340
+ df_summ['woba_percent'] = [df_summ.woba[x]/df_summ.woba_codes[x] if df_summ.woba_codes[x] != 0 else np.nan for x in range(len(df_summ))]
341
+ df_summ['woba_percent_contact'] = [df_summ.woba_contact[x]/df_summ.bip[x] if df_summ.bip[x] != 0 else np.nan for x in range(len(df_summ))]
342
+ #df_summ['hard_hit_percent'] = [df_summ.sweet_spot[x]/df_summ.bip[x] if df_summ.bip[x] != 0 else np.nan for x in range(len(df_summ))]
343
+ df_summ['hard_hit_percent'] = [df_summ.hard_hit[x]/df_summ.bip_div[x] if df_summ.bip_div[x] != 0 else np.nan for x in range(len(df_summ))]
344
 
345
 
346
+ df_summ['barrel_percent'] = [df_summ.barrel[x]/df_summ.bip_div[x] if df_summ.bip_div[x] != 0 else np.nan for x in range(len(df_summ))]
347
 
348
+ df_summ['zone_contact_percent'] = [df_summ.zone_contact[x]/df_summ.zone_swing[x] if df_summ.zone_swing[x] != 0 else np.nan for x in range(len(df_summ))]
 
 
 
 
 
 
 
 
 
 
349
 
350
+ df_summ['zone_swing_percent'] = [df_summ.zone_swing[x]/df_summ.in_zone[x] if df_summ.in_zone[x] != 0 else np.nan for x in range(len(df_summ))]
 
 
 
351
 
352
+ df_summ['zone_percent'] = [df_summ.in_zone[x]/df_summ.pitches[x] if df_summ.pitches[x] > 0 else np.nan for x in range(len(df_summ))]
 
 
 
 
353
 
354
+ df_summ['chase_percent'] = [df_summ.ozone_swing[x]/(df_summ.pitches[x] - df_summ.in_zone[x]) if (df_summ.pitches[x]- df_summ.in_zone[x]) != 0 else np.nan for x in range(len(df_summ))]
355
 
356
+ df_summ['chase_contact'] = [df_summ.ozone_contact[x]/df_summ.ozone_swing[x] if df_summ.ozone_swing[x] != 0 else np.nan for x in range(len(df_summ))]
 
 
 
 
 
 
357
 
358
+ df_summ['swing_percent'] = [df_summ.swings[x]/df_summ.pitches[x] if df_summ.pitches[x] > 0 else np.nan for x in range(len(df_summ))]
359
 
360
+ df_summ['whiff_rate'] = [df_summ.whiffs[x]/df_summ.swings[x] if df_summ.swings[x] != 0 else np.nan for x in range(len(df_summ))]
 
 
 
361
 
362
+ df_summ['swstr_rate'] = [df_summ.whiffs[x]/df_summ.pitches[x] if df_summ.pitches[x] > 0 else np.nan for x in range(len(df_summ))]
363
 
364
+ df_summ['ground_ball_percent'] = [df_summ.ground_ball[x]/df_summ.bip[x] if df_summ.bip[x] != 0 else np.nan for x in range(len(df_summ))]
 
 
 
365
 
366
+ df_summ['line_drive_percent'] = [df_summ.line_drive[x]/df_summ.bip[x] if df_summ.bip[x] != 0 else np.nan for x in range(len(df_summ))]
367
 
368
+ df_summ['fly_ball_percent'] = [df_summ.fly_ball[x]/df_summ.bip[x] if df_summ.bip[x] != 0 else np.nan for x in range(len(df_summ))]
 
369
 
370
+ df_summ['pop_up_percent'] = [df_summ.pop_up[x]/df_summ.bip[x] if df_summ.bip[x] != 0 else np.nan for x in range(len(df_summ))]
 
 
 
371
 
 
 
372
 
373
 
374
+ df_summ['heart_zone_percent'] = [df_summ.heart[x]/df_summ.attack_zone[x] if df_summ.attack_zone[x] != 0 else np.nan for x in range(len(df_summ))]
 
375
 
376
+ df_summ['shadow_zone_percent'] = [df_summ.shadow[x]/df_summ.attack_zone[x] if df_summ.attack_zone[x] != 0 else np.nan for x in range(len(df_summ))]
377
 
378
+ df_summ['chase_zone_percent'] = [df_summ.chase[x]/df_summ.attack_zone[x] if df_summ.attack_zone[x] != 0 else np.nan for x in range(len(df_summ))]
 
379
 
380
+ df_summ['waste_zone_percent'] = [df_summ.waste[x]/df_summ.attack_zone[x] if df_summ.attack_zone[x] != 0 else np.nan for x in range(len(df_summ))]
381
 
 
 
382
 
383
+ df_summ['heart_zone_swing_percent'] = [df_summ.heart_swing[x]/df_summ.heart[x] if df_summ.heart[x] != 0 else np.nan for x in range(len(df_summ))]
 
 
 
384
 
385
+ df_summ['shadow_zone_swing_percent'] = [df_summ.shadow_swing[x]/df_summ.shadow[x] if df_summ.shadow[x] != 0 else np.nan for x in range(len(df_summ))]
 
 
386
 
387
+ df_summ['chase_zone_swing_percent'] = [df_summ.chase_swing[x]/df_summ.chase[x] if df_summ.chase[x] != 0 else np.nan for x in range(len(df_summ))]
388
 
389
+ df_summ['waste_zone_swing_percent'] = [df_summ.waste_swing[x]/df_summ.waste[x] if df_summ.waste[x] != 0 else np.nan for x in range(len(df_summ))]
 
 
 
390
 
391
+ df_summ['heart_zone_whiff_percent'] = [df_summ.heart_whiff[x]/df_summ.heart_swing[x] if df_summ.heart_swing[x] != 0 else np.nan for x in range(len(df_summ))]
 
 
 
 
392
 
393
+ df_summ['shadow_zone_whiff_percent'] = [df_summ.shadow_whiff[x]/df_summ.shadow_swing[x] if df_summ.shadow_swing[x] != 0 else np.nan for x in range(len(df_summ))]
 
 
 
 
 
 
 
 
394
 
395
+ df_summ['chase_zone_whiff_percent'] = [df_summ.chase_whiff[x]/df_summ.chase_swing[x] if df_summ.chase_swing[x] != 0 else np.nan for x in range(len(df_summ))]
396
 
397
+ df_summ['waste_zone_whiff_percent'] = [df_summ.waste_whiff[x]/df_summ.waste_swing[x] if df_summ.waste_swing[x] != 0 else np.nan for x in range(len(df_summ))]
398
+ df_summ['xwoba_percent'] = [df_summ.xwoba[x]/df_summ.woba_codes[x] if df_summ.woba_codes[x] != 0 else np.nan for x in range(len(df_summ))]
399
+ df_summ['xwoba_percent_contact'] = [df_summ.xwoba_contact[x]/df_summ.bip[x] if df_summ.bip[x] != 0 else np.nan for x in range(len(df_summ))]
400
 
401
+ df_summ['pitch_percent'] = [df_summ.pitches[x]/df_summ.pitch_count[x] if df_summ.pitch_count[x] != 0 else np.nan for x in range(len(df_summ))]
402
 
403
+ table_left = df_summ[df_summ['batter_hand']=='L'][['pitch_percent',
404
+ 'pitches',
405
+ 'heart_zone_percent',
406
+ 'shadow_zone_percent',
407
+ 'chase_zone_percent',
408
+ 'waste_zone_percent',
409
+ 'csw_percent',
410
+ 'whiff_rate',
411
+ 'chase_percent',
412
+ 'bip',
413
+ 'xwoba_percent_contact'
414
+ ]]
415
 
416
+ ### GET COLOURS##
417
+ import matplotlib.colors
418
+ import matplotlib.colors as mcolors
419
+ def get_color(value,normalize):
420
+ color = cmap_sum(normalize(value))
421
+ return mcolors.to_hex(color)
422
 
423
+ normalize = mcolors.Normalize(vmin=table_left['pitch_percent']*0.5,
424
+ vmax=table_left['pitch_percent']*1.5) # Define the range of values
 
 
 
 
 
 
 
 
 
425
 
 
 
 
 
 
426
 
427
 
428
+ df_colour_left = pd.DataFrame(data=[[get_color(x,normalize) for x in pivot_table_l.loc[0]],
429
+ [get_color(x,normalize) for x in pivot_table_l.loc[1]],
430
+ [get_color(x,normalize) for x in pivot_table_l.loc[2]]],)
 
431
 
 
432
 
433
+ table_left['pitch_percent'] = table_left['pitch_percent'].map('{:.1%}'.format)
434
+ table_left['pitches'] = table_left['pitches'].astype(int).astype(str)
435
+ # table_left['pa'] = table_left['pa'].astype(int).astype(str)
436
+ # table_left['k_percent'] = table_left['k_percent'].map('{:.1%}'.format)
437
+ # table_left['bb_percent'] = table_left['bb_percent'].map('{:.1%}'.format)
438
+ table_left['heart_zone_percent'] = table_left['heart_zone_percent'].map('{:.1%}'.format)
439
+ table_left['shadow_zone_percent'] = table_left['shadow_zone_percent'].map('{:.1%}'.format)
440
+ table_left['chase_zone_percent'] = table_left['chase_zone_percent'].map('{:.1%}'.format)
441
+ table_left['waste_zone_percent'] = table_left['waste_zone_percent'].map('{:.1%}'.format)
442
+ table_left['csw_percent'] = table_left['csw_percent'].map('{:.1%}'.format)
443
+ table_left['whiff_rate'] = table_left['whiff_rate'].map('{:.1%}'.format)
444
+ table_left['chase_percent'] = table_left['chase_percent'].map('{:.1%}'.format)
445
+ table_left['bip'] = table_left['bip'].astype(int).astype(str)
446
+ table_left['xwoba_percent_contact'] = table_left['xwoba_percent_contact'].map('{:.3f}'.format)
447
+ table_left.columns = ['Usage%','Pitches','Heart%','Shadow%','Chase%','Waste%','CSW%','Whiff%','O-Swing%','BBE','xwOBACON']
448
 
 
 
 
 
 
449
 
450
+ table_left = table_left.replace({'nan%':''})
451
+ table_left = table_left.replace({'nan':''})
452
+ table_left = table_left.T
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
453
 
 
454
 
455
+ table_right = df_summ[df_summ['batter_hand']=='R'][['pitch_percent',
456
+ 'pitches',
457
+ 'heart_zone_percent',
458
+ 'shadow_zone_percent',
459
+ 'chase_zone_percent',
460
+ 'waste_zone_percent',
461
+ 'csw_percent',
462
+ 'whiff_rate',
463
+ 'chase_percent',
464
+ 'bip',
465
+ 'xwoba_percent_contact'
466
+ ]]
467
 
468
+ normalize = mcolors.Normalize(vmin=table_right['pitch_percent']*0.5,
469
+ vmax=table_right['pitch_percent']*1.5) # Define the range of values
470
 
 
 
 
 
 
471
 
472
+ df_colour_right = pd.DataFrame(data=[[get_color(x,normalize) for x in pivot_table_r.loc[0]],
473
+ [get_color(x,normalize) for x in pivot_table_r.loc[1]],
474
+ [get_color(x,normalize) for x in pivot_table_r.loc[2]]],)
 
 
475
 
476
 
 
 
 
 
 
477
 
478
+ table_right['pitch_percent'] = table_right['pitch_percent'].map('{:.1%}'.format)
479
+ table_right['pitches'] = table_right['pitches'].astype(int).astype(str)
480
+ # table_right['pa'] = table_right['pa'].astype(int).astype(str)
481
+ # table_right['k_percent'] = table_right['k_percent'].map('{:.1%}'.format)
482
+ # table_right['bb_percent'] = table_right['bb_percent'].map('{:.1%}'.format)
483
+ table_right['heart_zone_percent'] = table_right['heart_zone_percent'].map('{:.1%}'.format)
484
+ table_right['shadow_zone_percent'] = table_right['shadow_zone_percent'].map('{:.1%}'.format)
485
+ table_right['chase_zone_percent'] = table_right['chase_zone_percent'].map('{:.1%}'.format)
486
+ table_right['waste_zone_percent'] = table_right['waste_zone_percent'].map('{:.1%}'.format)
487
+ table_right['csw_percent'] = table_right['csw_percent'].map('{:.1%}'.format)
488
+ table_right['whiff_rate'] = table_right['whiff_rate'].map('{:.1%}'.format)
489
+ table_right['chase_percent'] = table_right['chase_percent'].map('{:.1%}'.format)
490
+ table_right['bip'] = table_right['bip'].astype(int).astype(str)
491
+ table_right['xwoba_percent_contact'] = table_right['xwoba_percent_contact'].map('{:.3f}'.format)
492
+ table_right.columns = ['Usage%','Pitches','Heart%','Shadow%','Chase%','Waste%','CSW%','Whiff%','O-Swing%','BBE','xwOBACON']
493
+
494
+
495
+ table_right = table_right.replace({'nan%':'—'})
496
+ table_right = table_right.replace({'nan':'—'})
497
+ table_right = table_right.T
498
+
499
+
500
+ import matplotlib.pyplot as plt
501
+ import seaborn as sns
502
+ import matplotlib.gridspec as gridspec
503
+ from matplotlib.gridspec import GridSpec
504
 
505
+ # Assuming you have a list of pitch locations called 'pitch_locations'
506
+ # where each location is a tuple of (x, y) coordinates
507
 
508
+ fig = plt.figure(figsize=(16, 9))
509
+ fig.set_facecolor('white')
510
+ sns.set_theme(style="whitegrid", palette=colour_palette)
511
+ gs = GridSpec(3, 5, height_ratios=[2,9,1],width_ratios=[2,9,0.5,9,2])
512
+ gs.update(hspace=0.2, wspace=0.2)
513
 
514
+ # Add subplots to the grid
515
+ axheader = fig.add_subplot(gs[0, :])
516
+ ax_left = fig.add_subplot(gs[1, 1])
517
+ ax_right = fig.add_subplot(gs[1, 3])
518
 
519
+ axfooter = fig.add_subplot(gs[-1, :])
520
 
 
 
 
 
521
 
 
 
 
 
 
 
 
 
 
 
 
522
 
523
+ if df_plot[df_plot['batter_hand']=='L'].shape[0] > 3:
524
+ sns.kdeplot(data=df_plot[df_plot['batter_hand']=='L'],
525
+ x='px',
526
+ y='pz',
527
+ cmap=cmap_sum,
528
+ shade=True,
529
+ ax=ax_left,
530
+ thresh=0.3,
531
+ bw_adjust=0.5)
532
+ else:
533
+ sns.scatterplot(data=df_plot[df_plot['batter_hand']=='L'],
534
+ x='px',
535
+ y='pz',
536
+ cmap=cmap_sum,
537
+ ax=ax_left,
538
+ s=125)
539
 
540
+ if df_plot[df_plot['batter_hand']=='R'].shape[0] > 3:
541
+ sns.kdeplot(data=df_plot[df_plot['batter_hand']=='R'],
542
+ x='px',
543
+ y='pz',
544
+ cmap=cmap_sum,
545
+ shade=True,
546
+ ax=ax_right,
547
+ thresh=0.3,
548
+ bw_adjust=0.5)
549
+ else:
550
+ sns.scatterplot(data=df_plot[df_plot['batter_hand']=='R'],
551
+ x='px',
552
+ y='pz',
553
+ cmap=cmap_sum,
554
+ ax=ax_right,
555
+ s=125)
556
 
557
+ draw_line(ax_left,alpha_spot=1,catcher_p = False)
558
+ draw_line(ax_right,alpha_spot=1,catcher_p = False)
559
 
560
+ ax_left.axis('off')
561
+ ax_right.axis('off')
562
 
563
+ ax_left.axis('square')
564
+ ax_right.axis('square')
 
565
 
566
+ ax_left.set_xlim(-2.75,2.75)
567
+ ax_right.set_xlim(-2.75,2.75)
 
 
 
 
568
 
569
+ ax_left.set_ylim(-0.5,5)
570
+ ax_right.set_ylim(-0.5,5)
571
 
572
 
573
+ import matplotlib.pyplot as plt
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
574
 
575
+ import matplotlib.image as mpimg
576
+ from matplotlib.offsetbox import OffsetImage, AnnotationBbox
 
577
 
578
+ # Load the image
579
+ img = mpimg.imread('left.png')
580
+ imagebox = OffsetImage(img, zoom=0.7) # adjust zoom as needed
581
+ ab = AnnotationBbox(imagebox, (1.25, -0.5), box_alignment=(0, 0), frameon=False)
582
+ ax_left.add_artist(ab)
583
 
 
 
 
 
 
 
 
584
 
585
+ # Load the image
586
+ img = mpimg.imread('right.png')
587
+ imagebox = OffsetImage(img, zoom=0.7) # adjust zoom as needed
588
+ # Create an AnnotationBbox
589
+ ab = AnnotationBbox(imagebox, (-1.25, -0.5), box_alignment=(1, 0), frameon=False)
590
 
591
+ ax_right.add_artist(ab)
592
 
593
 
594
+ from matplotlib.transforms import Bbox
595
+ # Create a transformation that converts from data coordinates to axes coordinates
596
+ trans = ax_left.transData + ax_left.transAxes.inverted()
597
+
598
+ # Calculate the bbox in axes coordinates
599
+ bbox_data = Bbox.from_bounds(-4.2, -0.5, 2.5, 5) # replace width and height with the desired values
600
+ bbox_axes = trans.transform_bbox(bbox_data)
601
+
602
+
603
+ table_left_plot = ax_left.table(cellText=table_left.reset_index().values,
604
+ loc='right',
605
+ cellLoc='center',
606
+ colWidths=[0.52,0.3],
607
+ bbox=bbox_axes.bounds,zorder=100)
608
+
609
+
610
+ min_font_size = 14
611
+ # Set table properties
612
+ table_left_plot.auto_set_font_size(False)
613
+ #table.set_fontsize(min(min_font_size,max(min_font_size/((len(label_labels)/4)),10)))
614
+ table_left_plot.set_fontsize(min_font_size)
615
+ #table_left_plot.scale(1,3)
616
+ # Calculate the bbox in axes coordinates
617
+ bbox_data = Bbox.from_bounds(-0.75, 5, 2.5, 1) # replace width and height with the desired values
618
+ bbox_axes = trans.transform_bbox(bbox_data)
619
+
620
+ def format_as_percentage(val):
621
+ return f'{val * 100:.0f}%'
622
+
623
+ table_left_plot_pivot = ax_left.table(cellText=[[format_as_percentage(val) for val in row] for row in pivot_table_l.values],
624
+ colLabels =pivot_table_l.columns,
625
+ rowLabels =[' 0 ',' 1 ',' 2 '],
626
+ loc='center',
627
+ cellLoc='center',
628
+ colWidths=[0.3,0.3,0.30,0.3],
629
+ bbox=bbox_axes.bounds,zorder=100,cellColours =df_colour_left.values)
630
+
631
+
632
+ min_font_size = 11
633
+ # Set table properties
634
+ table_left_plot_pivot.auto_set_font_size(False)
635
+ #table.set_fontsize(min(min_font_size,max(min_font_size/((len(label_labels)/4)),10)))
636
+ table_left_plot_pivot.set_fontsize(min_font_size)
637
 
 
 
638
 
 
 
 
 
639
 
 
 
640
 
641
+ # Create a transformation that converts from data coordinates to axes coordinates
642
+ trans = ax_right.transData + ax_right.transAxes.inverted()
643
 
644
+ # Calculate the bbox in axes coordinates
645
+ bbox_data = Bbox.from_bounds(1.7, -0.5, 2.5, 5) # replace width and height with the desired values
646
+ bbox_axes = trans.transform_bbox(bbox_data)
647
 
648
 
649
+ table_right_plot = ax_right.table(cellText=table_right.reset_index().values,
650
+ loc='right',
651
+ cellLoc='center',
652
+ colWidths=[0.52,0.3],
653
+ bbox=bbox_axes.bounds,zorder=100)
654
 
655
 
 
 
656
 
657
+ min_font_size = 14
658
+ # Set table properties
659
+ table_right_plot.auto_set_font_size(False)
660
+ #table.set_fontsize(min(min_font_size,max(min_font_size/((len(label_labels)/4)),10)))
661
+ table_right_plot.set_fontsize(min_font_size)
662
+ table_right_plot.scale(0.5,3)
663
 
664
+ # Calculate the bbox in axes coordinates
665
+ # Create a transformation that converts from data coordinates to axes coordinates
666
+ trans = ax_right.transData + ax_right.transAxes.inverted()
667
+ bbox_data = Bbox.from_bounds(-0.75, 5, 2.5, 1) # replace width and height with the desired values
668
+ bbox_axes = trans.transform_bbox(bbox_data)
669
 
670
+ table_right_plot_pivot = ax_right.table(cellText=[[format_as_percentage(val) for val in row] for row in pivot_table_r.values],
671
+ colLabels =pivot_table_r.columns,
672
+ rowLabels =[' 0 ',' 1 ',' 2 '],
673
+ loc='center',
674
+ cellLoc='center',
675
+ colWidths=[0.3,0.3,0.30,0.3],
676
+ bbox=bbox_axes.bounds,zorder=100,cellColours =df_colour_right.values)
677
+
678
+
679
+ min_font_size = 11
680
+ # Set table properties
681
+ table_right_plot_pivot.auto_set_font_size(False)
682
+ #table.set_fontsize(min(min_font_size,max(min_font_size/((len(label_labels)/4)),10)))
683
+ table_right_plot_pivot.set_fontsize(min_font_size)
684
+
685
+ from matplotlib.cm import ScalarMappable
686
+ from matplotlib.colors import Normalize
687
+ # Create a ScalarMappable with the same colormap and normalization
688
+ sm = ScalarMappable(cmap=cmap_sum, norm=Normalize(vmin=0, vmax=1))
689
+
690
+ #from mpl_toolkits.axes_grid1.inset_locator import inset_axes
691
+ #######################
692
+ # Create a new Subplot object for the colorbar
693
+ # Create a new Axes object for the colorbar at the bottom middle of the figure
694
+ cbar = fig.colorbar(sm, ax=axfooter, orientation='horizontal',aspect=100)
695
+ # cbar.ax.set_aspect(20)
696
+
697
+ # cbar = plt.colorbar(batter_plot, cax=ax12, orientation='vertical',shrink=1, cmap=cmap_hue)
698
+
699
+ # cbar = plt.colorbar(batter_plot, cax=ax12, orientation='vertical',shrink=1)
700
+ cbar.set_ticks([])
701
+ # # Create an inset axes for the colorbar
702
+ # cax = inset_axes(axfooter,
703
+ # width="50%", # width = 50% of parent_bbox width
704
+ # height="100%", # height : 5%
705
+ # loc='center')
706
+
707
+ # # Add the colorbar to the inset axes
708
+ # cbar = fig.colorbar(sm, cax=cax, orientation='horizontal')
709
+ # # Set the labels on the low and high ends of the colorbar
710
+ # # Set the xticks to only include the low and high ends of the colorbar
711
+ cbar.set_ticks([sm.norm.vmin, sm.norm.vmax])
712
+
713
+ # # Set the labels on the low and high ends of the colorbar
714
+ cbar.ax.set_xticklabels(['Least', 'Most'])
715
+ # # Place the xticks on top of the colorbar
716
+ cbar.ax.tick_params(labeltop=True, labelbottom=False, labelsize=14)
717
+
718
+ # # Get the labels
719
+ labels = cbar.ax.get_xticklabels()
720
+
721
+ # # Set the alignment of the labels
722
+ labels[0].set_horizontalalignment('left')
723
+ labels[-1].set_horizontalalignment('right')
724
+ # # Get the labels
725
+ labels = cbar.ax.get_xticklabels()
726
+
727
+ # # Set the font size of the labels
728
+ # for label in labels:
729
+ # label.set_fontsize(16)
730
+
731
+ # # Set the labels
732
+ cbar.ax.set_xticklabels(labels)
733
+ # # Remove the tick lines on the colorbar
734
+ cbar.ax.tick_params(length=0)
735
+
736
+
737
+
738
+ axfooter.text(x=0.02,y=0.5,s='By: Thomas Nestico\n @TJStats',fontname='Calibri',ha='left',fontsize=18,va='top')
739
+ axfooter.text(x=1-0.02,y=0.5,s='Data: MLB',ha='right',fontname='Calibri',fontsize=18,va='top')
740
+
741
+ axfooter.axis('off')
742
+
743
+
744
+ axheader.text(x=0.5,y=1.2,s=f"{df_plot['pitcher_name'].values[0]} - {df_plot['pitcher_hand'].values[0]}HP\n{season} {df_plot['pitch_description'].values[0]} Pitch Frequency",ha='center',fontsize=24,va='top')
745
+ axheader.axis('off')
746
 
747
 
 
748
  import urllib
749
  import urllib.request
750
  import urllib.error
751
  from urllib.error import HTTPError
752
 
753
  try:
754
+ url = f'https://img.mlbstatic.com/mlb-photos/image/upload/d_people:generic:headshot:67:current.png/w_213,q_auto:best/v1/people/{df_plot["pitcher_id"].values[0]}/headshot/67/current.png'
755
  test_mage = plt.imread(url)
756
  except urllib.error.HTTPError as err:
757
  url = f'https://img.mlbstatic.com/mlb-photos/image/upload/d_people:generic:headshot:67:current.png/w_213,q_auto:best/v1/people/1/headshot/67/current.png'
758
+ imagebox = OffsetImage(test_mage, zoom = 0.4)
759
+ ab = AnnotationBbox(imagebox, (0.075, 0.4), frameon = False)
760
+ axheader.add_artist(ab)
 
761
 
762
+ player_bio = requests.get(url=f"https://statsapi.mlb.com/api/v1/people?personIds={df_plot['pitcher_id'].values[0]}&hydrate=currentTeam").json()
763
 
 
 
 
 
764
 
765
+ team_logos = pd.read_csv('team_logos.csv')
766
+
767
+
768
+ mlb_stats = MLB_Scrape()
769
+ teams_df = mlb_stats.get_teams()
770
+ team_logo_dict = teams_df.set_index(['team_id'])['parent_org_id'].to_dict()
771
 
772
 
773
  if 'currentTeam' in player_bio['people'][0]:
 
779
  # im = Image.open(BytesIO(response.content))
780
  # im = plt.imread(team_logos[team_logos['id'] == player_bio['people'][0]['currentTeam']['parentOrgId']]['imageLink'].values[0])
781
  # ax = fig.add_axes([0,0,1,0.85], anchor='C', zorder=1)
782
+ imagebox = OffsetImage(im, zoom = 0.3)
783
+ ab = AnnotationBbox(imagebox, (0.925, 0.40), frameon = False)
784
+ axheader.add_artist(ab)
785
  except IndexError:
786
  print()
 
 
 
 
 
 
787
 
788
 
789
+ ax_left.text(s='Against LHH',x=-2.95,y=4.65,fontsize=18,fontweight='bold',ha='center')
790
+ ax_right.text(s='Against RHH',x=2.95,y=4.65,fontsize=18,fontweight='bold',ha='center')
791
+ # Center the labels
 
792
 
793
+ ax_left.text(x=-1.72, y=5.08, s='Strikes', rotation=90,fontweight='bold')
794
+ ax_right.text(x=-1.72, y=5.08, s='Strikes', rotation=90,fontweight='bold')
795
 
796
+ ax_left.text(x=0, y=6.1, s='Balls',ha='center',fontweight='bold')
797
+ ax_right.text(x=0, y=6.1, s='Balls',ha='center',fontweight='bold')
798
+ #cbar.ax.set_xticklabels(cbar.ax.get_xticklabels(), ha='center')
799
+ fig.subplots_adjust(left=0.01, right=0.99, top=0.95, bottom=0.05)
800
+ return
801
 
802
  app = App(app_ui, server)
left.png ADDED
pitcher_update.py ADDED
@@ -0,0 +1,562 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import pandas as pd
2
+ import numpy as np
3
+ import joblib
4
+ import math
5
+ import pickle
6
+
7
+ loaded_model = joblib.load('joblib_model/barrel_model.joblib')
8
+ in_zone_model = joblib.load('joblib_model/in_zone_model_knn_20240410.joblib')
9
+ attack_zone_model = joblib.load('joblib_model/model_attack_zone.joblib')
10
+ xwoba_model = joblib.load('joblib_model/xwoba_model.joblib')
11
+ px_model = joblib.load('joblib_model/linear_reg_model_x.joblib')
12
+ pz_model = joblib.load('joblib_model/linear_reg_model_z.joblib')
13
+
14
+
15
+ def percentile(n):
16
+ def percentile_(x):
17
+ return np.nanpercentile(x, n)
18
+ percentile_.__name__ = 'percentile_%s' % n
19
+ return percentile_
20
+
21
+
22
+ def df_update(df=pd.DataFrame()):
23
+ df.loc[df['sz_top']==0,'sz_top'] = np.nan
24
+ df.loc[df['sz_bot']==0,'sz_bot'] = np.nan
25
+
26
+
27
+ df['in_zone'] = [x < 10 if x > 0 else np.nan for x in df['zone']]
28
+ if len(df.loc[(~df['x'].isnull())&(df['px'].isnull()),'px']) > 0:
29
+ df.loc[(~df['x'].isnull())&(df['px'].isnull()),'px'] = px_model.predict(df.loc[(~df['x'].isnull())&(df['px'].isnull())][['x']])
30
+ df.loc[(~df['y'].isnull())&(df['pz'].isnull()),'pz'] = px_model.predict(df.loc[(~df['y'].isnull())&(df['pz'].isnull())][['y']]) + 3.2
31
+
32
+
33
+ # df['in_zone'] = [x < 10 if x > 0 else np.nan for x in df['zone']]
34
+ if len(df.loc[(~df['px'].isna())&
35
+ (df['in_zone'].isna())&
36
+ (~df['sz_top'].isna())]) > 0:
37
+ print('We found missing data')
38
+ df.loc[(~df['px'].isna())&
39
+ (df['in_zone'].isna())&
40
+ (~df['sz_top'].isna())&
41
+ (~df['pz'].isna())&
42
+ (~df['sz_bot'].isna())
43
+ ,'in_zone'] = in_zone_model.predict(df.loc[(~df['px'].isna())&
44
+ (df['in_zone'].isna())&
45
+ (~df['sz_top'].isna())&
46
+ (~df['pz'].isna())&
47
+ (~df['sz_bot'].isna())][['px','pz','sz_top','sz_bot']].values)
48
+ hit_codes = ['single',
49
+ 'double','home_run', 'triple']
50
+
51
+ ab_codes = ['single', 'strikeout', 'field_out',
52
+ 'grounded_into_double_play', 'fielders_choice', 'force_out',
53
+ 'double', 'field_error', 'home_run', 'triple',
54
+ 'double_play',
55
+ 'fielders_choice_out', 'strikeout_double_play',
56
+ 'other_out','triple_play']
57
+
58
+
59
+ obp_true_codes = ['single', 'walk',
60
+ 'double','home_run', 'triple',
61
+ 'hit_by_pitch', 'intent_walk']
62
+
63
+ obp_codes = ['single', 'strikeout', 'walk', 'field_out',
64
+ 'grounded_into_double_play', 'fielders_choice', 'force_out',
65
+ 'double', 'sac_fly', 'field_error', 'home_run', 'triple',
66
+ 'hit_by_pitch', 'double_play', 'intent_walk',
67
+ 'fielders_choice_out', 'strikeout_double_play',
68
+ 'sac_fly_double_play',
69
+ 'other_out','triple_play']
70
+
71
+
72
+ contact_codes = ['In play, no out',
73
+ 'Foul', 'In play, out(s)',
74
+ 'In play, run(s)',
75
+ 'Foul Bunt']
76
+
77
+
78
+
79
+ conditions_hit = [df.event_type.isin(hit_codes)]
80
+ choices_hit = [True]
81
+ df['hits'] = np.select(conditions_hit, choices_hit, default=False)
82
+
83
+ conditions_ab = [df.event_type.isin(ab_codes)]
84
+ choices_ab = [True]
85
+ df['ab'] = np.select(conditions_ab, choices_ab, default=False)
86
+
87
+ conditions_obp_true = [df.event_type.isin(obp_true_codes)]
88
+ choices_obp_true = [True]
89
+ df['on_base'] = np.select(conditions_obp_true, choices_obp_true, default=False)
90
+
91
+ conditions_obp = [df.event_type.isin(obp_codes)]
92
+ choices_obp = [True]
93
+ df['obp'] = np.select(conditions_obp, choices_obp, default=False)
94
+
95
+ bip_codes = ['In play, no out', 'In play, run(s)','In play, out(s)']
96
+
97
+ conditions_bip = [df.play_description.isin(bip_codes)]
98
+ choices_bip = [True]
99
+ df['bip'] = np.select(conditions_bip, choices_bip, default=False)
100
+
101
+ conditions = [
102
+ (df['launch_speed'].isna()),
103
+ (df['launch_speed']*1.5 - df['launch_angle'] >= 117 ) & (df['launch_speed'] + df['launch_angle'] >= 124) & (df['launch_speed'] > 98) & (df['launch_angle'] >= 8) & (df['launch_angle'] <= 50)
104
+ ]
105
+ df['bip_div'] = ~df.launch_speed.isna()
106
+ choices = [False,True]
107
+ df['barrel'] = np.select(conditions, choices, default=np.nan)
108
+ df['barrel'] = loaded_model.predict(df[['launch_speed','launch_angle']].fillna(0).values)
109
+ conditions_ss = [
110
+ (df['launch_angle'].isna()),
111
+ (df['launch_angle'] >= 8 ) * (df['launch_angle'] <= 32 )
112
+ ]
113
+
114
+ choices_ss = [False,True]
115
+ df['sweet_spot'] = np.select(conditions_ss, choices_ss, default=np.nan)
116
+
117
+ conditions_hh = [
118
+ (df['launch_speed'].isna()),
119
+ (df['launch_speed'] >= 94.5 )
120
+ ]
121
+
122
+ choices_hh = [False,True]
123
+ df['hard_hit'] = np.select(conditions_hh, choices_hh, default=np.nan)
124
+
125
+
126
+ conditions_tb = [
127
+ (df['event_type']=='single'),
128
+ (df['event_type']=='double'),
129
+ (df['event_type']=='triple'),
130
+ (df['event_type']=='home_run'),
131
+ ]
132
+
133
+ choices_tb = [1,2,3,4]
134
+
135
+ df['tb'] = np.select(conditions_tb, choices_tb, default=np.nan)
136
+
137
+ conditions_woba = [
138
+ (df['event_type'].isin(['strikeout', 'field_out', 'sac_fly', 'force_out',
139
+ 'grounded_into_double_play', 'fielders_choice', 'field_error',
140
+ 'sac_bunt', 'double_play', 'fielders_choice_out', 'strikeout_double_play',
141
+ 'sac_fly_double_play', 'other_out'])),
142
+ (df['event_type']=='walk'),
143
+ (df['event_type']=='hit_by_pitch'),
144
+ (df['event_type']=='single'),
145
+ (df['event_type']=='double'),
146
+ (df['event_type']=='triple'),
147
+ (df['event_type']=='home_run'),
148
+ ]
149
+
150
+ choices_woba = [0,
151
+ 0.696,
152
+ 0.726,
153
+ 0.883,
154
+ 1.244,
155
+ 1.569,
156
+ 2.004]
157
+
158
+ df['woba'] = np.select(conditions_woba, choices_woba, default=np.nan)
159
+
160
+
161
+ woba_codes = ['strikeout', 'field_out', 'single', 'walk', 'hit_by_pitch',
162
+ 'double', 'sac_fly', 'force_out', 'home_run',
163
+ 'grounded_into_double_play', 'fielders_choice', 'field_error',
164
+ 'triple', 'sac_bunt', 'double_play',
165
+ 'fielders_choice_out', 'strikeout_double_play',
166
+ 'sac_fly_double_play', 'other_out']
167
+
168
+
169
+
170
+
171
+
172
+
173
+ conditions_woba_code = [
174
+ (df['event_type'].isin(woba_codes))
175
+ ]
176
+
177
+ choices_woba_code = [1]
178
+
179
+ df['woba_codes'] = np.select(conditions_woba_code, choices_woba_code, default=np.nan)
180
+
181
+
182
+ df['woba_contact'] = [df['woba'].values[x] if df['bip'].values[x] == 1 else np.nan for x in range(len(df['woba_codes']))]
183
+
184
+ #df['in_zone'] = [x < 10 if type(x) == int else np.nan for x in df['zone']]
185
+
186
+ # df['in_zone_2'] = in_zone_model.predict(df[['x','y','sz_bot','sz_top']].fillna(0).values)
187
+ # df['in_zone_3'] = df['in_zone_2'] < 10
188
+ # df.loc[df['in_zone'].isna(),'in_zone'] = df.loc[df['in_zone'].isna(),'in_zone_3'].fillna(0)
189
+
190
+
191
+ df['whiffs'] = [1 if ((x == 'S')|(x == 'W')|(x =='T')) else 0 for x in df.play_code]
192
+ df['csw'] = [1 if ((x == 'S')|(x == 'W')|(x =='T')|(x == 'C')) else 0 for x in df.play_code]
193
+ df['swings'] = [1 if x == True else 0 for x in df.is_swing]
194
+
195
+
196
+ df['out_zone'] = df.in_zone == False
197
+ df['zone_swing'] = (df.in_zone == True)&(df.swings == 1)
198
+ df['zone_contact'] = (df.in_zone == True)&(df.swings == 1)&(df.whiffs == 0)
199
+ df['ozone_swing'] = (df.in_zone==False)&(df.swings == 1)
200
+ df['ozone_contact'] = (df.in_zone==False)&(df.swings == 1)&(df.whiffs == 0)
201
+
202
+ df['k'] = df.event_type.isin(list(filter(None, [x if 'strikeout' in x else '' for x in df.event_type.dropna().unique()])))
203
+ df['bb'] = df.event_type.isin(['walk','intent_walk'])
204
+
205
+ df['k_minus_bb'] = df['k'].astype(np.float32)-df['bb'].astype(np.float32)
206
+ df['bb_minus_k'] = df['bb'].astype(np.float32)-df['k'].astype(np.float32)
207
+
208
+ df['pa'] = [1 if isinstance(x, str) else 0 for x in df.event_type]
209
+ df['pitches'] = [1 if x else 0 for x in df.is_pitch]
210
+
211
+
212
+ df.loc[df['launch_speed'].isna(),'barrel'] = np.nan
213
+
214
+
215
+ pitch_cat = {'FA':'Fastball',
216
+ 'FF':'Fastball',
217
+ 'FT':'Fastball',
218
+ 'FC':'Fastball',
219
+ 'FS':'Off-Speed',
220
+ 'FO':'Off-Speed',
221
+ 'SI':'Fastball',
222
+ 'ST':'Breaking',
223
+ 'SL':'Breaking',
224
+ 'CU':'Breaking',
225
+ 'KC':'Breaking',
226
+ 'SC':'Off-Speed',
227
+ 'GY':'Off-Speed',
228
+ 'SV':'Breaking',
229
+ 'CS':'Breaking',
230
+ 'CH':'Off-Speed',
231
+ 'KN':'Off-Speed',
232
+ 'EP':'Breaking',
233
+ 'UN':np.nan,
234
+ 'IN':np.nan,
235
+ 'PO':np.nan,
236
+ 'AB':np.nan,
237
+ 'AS':np.nan,
238
+ 'NP':np.nan}
239
+ #df['pitch_type'] = df['pitch_type'].map(pitch_cat).fillna('Unknown')
240
+ df['average'] = 'average'
241
+
242
+ df.loc[df['trajectory'] == 'bunt_popup','trajectory'] = 'popup'
243
+ df.loc[df['trajectory'] == 'bunt_grounder','trajectory'] = 'ground_ball'
244
+ df.loc[df['trajectory'] == '','trajectory'] = np.nan
245
+ df.loc[df['trajectory'] == 'bunt_line_drive','trajectory'] = 'line_drive'
246
+ df[['trajectory_fly_ball','trajectory_ground_ball','trajectory_line_drive','trajectory_popup']] = pd.get_dummies(df['trajectory'], prefix='trajectory')
247
+
248
+ df['attack_zone'] = np.nan
249
+
250
+
251
+
252
+ df.loc[df[['px','pz','sz_top','sz_bot']].isnull().sum(axis=1)==0,'attack_zone'] = attack_zone_model.predict(df.loc[df[['px','pz','sz_top','sz_bot']].isnull().sum(axis=1)==0][['px','pz','sz_top','sz_bot']])
253
+
254
+
255
+
256
+ df['heart'] = df['attack_zone'] == 0
257
+ df['shadow'] = df['attack_zone'] == 1
258
+ df['chase'] = df['attack_zone'] == 2
259
+ df['waste'] = df['attack_zone'] == 3
260
+
261
+ df['heart_swing'] = (df['attack_zone'] == 0)&(df['swings']==1)
262
+ df['shadow_swing'] = (df['attack_zone'] == 1)&(df['swings']==1)
263
+ df['chase_swing'] = (df['attack_zone'] == 2)&(df['swings']==1)
264
+ df['waste_swing'] = (df['attack_zone'] == 3)&(df['swings']==1)
265
+
266
+ df['heart_whiff'] = (df['attack_zone'] == 0)&(df['whiffs']==1)
267
+ df['shadow_whiff'] = (df['attack_zone'] == 1)&(df['whiffs']==1)
268
+ df['chase_whiff'] = (df['attack_zone'] == 2)&(df['whiffs']==1)
269
+ df['waste_whiff'] = (df['attack_zone'] == 3)&(df['whiffs']==1)
270
+
271
+ df['woba_pred'] = np.nan
272
+ df['woba_pred_contact'] = np.nan
273
+
274
+ if len(df.loc[df[['launch_angle','launch_speed']].isnull().sum(axis=1)==0,'woba_pred']) > 0:
275
+
276
+
277
+ df.loc[df[['launch_angle','launch_speed']].isnull().sum(axis=1)==0,'woba_pred'] = [sum(x) for x in xwoba_model.predict_proba(df.loc[df[['launch_angle','launch_speed']].isnull().sum(axis=1)==0][['launch_angle','launch_speed']]) * ([0, 0.883,1.244,1.569,2.004])]
278
+
279
+ ## Assign a value of 0.696 to every walk in the dataset
280
+ df.loc[df['event_type'].isin(['walk']),'woba_pred'] = 0.696
281
+
282
+ ## Assign a value of 0.726 to every hit by pitch in the dataset
283
+ df.loc[df['event_type'].isin(['hit_by_pitch']),'woba_pred'] = 0.726
284
+
285
+ ## Assign a value of 0 to every Strikeout in the dataset
286
+ df.loc[df['event_type'].isin(['strikeout','strikeout_double_play']),'woba_pred'] = 0
287
+
288
+
289
+ df.loc[df[['launch_angle','launch_speed']].isnull().sum(axis=1)==0,'woba_pred_contact'] = [sum(x) for x in xwoba_model.predict_proba(df.loc[df[['launch_angle','launch_speed']].isnull().sum(axis=1)==0][['launch_angle','launch_speed']]) * ([0, 0.883,1.244,1.569,2.004])]
290
+
291
+
292
+ return df
293
+
294
+ def df_update_summ(df=pd.DataFrame()):
295
+ df_summ = df.groupby(['pitcher_id','pitcher_name']).agg(
296
+ pa = ('pa','sum'),
297
+ ab = ('ab','sum'),
298
+ obp_pa = ('obp','sum'),
299
+ hits = ('hits','sum'),
300
+ on_base = ('on_base','sum'),
301
+ k = ('k','sum'),
302
+ bb = ('bb','sum'),
303
+ bb_minus_k = ('bb_minus_k','sum'),
304
+ csw = ('csw','sum'),
305
+ bip = ('bip','sum'),
306
+ bip_div = ('bip_div','sum'),
307
+ tb = ('tb','sum'),
308
+ woba = ('woba','sum'),
309
+ woba_contact = ('woba_contact','sum'),
310
+ xwoba = ('woba_pred','sum'),
311
+ xwoba_contact = ('woba_pred_contact','sum'),
312
+ woba_codes = ('woba_codes','sum'),
313
+ hard_hit = ('hard_hit','sum'),
314
+ barrel = ('barrel','sum'),
315
+ sweet_spot = ('sweet_spot','sum'),
316
+ max_launch_speed = ('launch_speed','max'),
317
+ launch_speed_90 = ('launch_speed',percentile(90)),
318
+ launch_speed = ('launch_speed','mean'),
319
+ launch_angle = ('launch_angle','mean'),
320
+ pitches = ('is_pitch','sum'),
321
+ swings = ('swings','sum'),
322
+ in_zone = ('in_zone','sum'),
323
+ out_zone = ('out_zone','sum'),
324
+ whiffs = ('whiffs','sum'),
325
+ zone_swing = ('zone_swing','sum'),
326
+ zone_contact = ('zone_contact','sum'),
327
+ ozone_swing = ('ozone_swing','sum'),
328
+ ozone_contact = ('ozone_contact','sum'),
329
+ ground_ball = ('trajectory_ground_ball','sum'),
330
+ line_drive = ('trajectory_line_drive','sum'),
331
+ fly_ball =('trajectory_fly_ball','sum'),
332
+ pop_up = ('trajectory_popup','sum'),
333
+ attack_zone = ('attack_zone','count'),
334
+ heart = ('heart','sum'),
335
+ shadow = ('shadow','sum'),
336
+ chase = ('chase','sum'),
337
+ waste = ('waste','sum'),
338
+ heart_swing = ('heart_swing','sum'),
339
+ shadow_swing = ('shadow_swing','sum'),
340
+ chase_swing = ('chase_swing','sum'),
341
+ waste_swing = ('waste_swing','sum'),
342
+ ).reset_index()
343
+ return df_summ
344
+
345
+ def df_update_summ_avg(df=pd.DataFrame()):
346
+ df_summ_avg = df.groupby(['average']).agg(
347
+
348
+ ).reset_index()
349
+ return df_summ_avg
350
+
351
+ def df_summ_changes(df_summ=pd.DataFrame()):
352
+ df_summ['avg'] = [df_summ.hits[x]/df_summ.ab[x] if df_summ.ab[x] != 0 else np.nan for x in range(len(df_summ))]
353
+ df_summ['obp'] = [df_summ.on_base[x]/df_summ.obp_pa[x] if df_summ.obp_pa[x] != 0 else np.nan for x in range(len(df_summ))]
354
+ df_summ['slg'] = [df_summ.tb[x]/df_summ.ab[x] if df_summ.ab[x] != 0 else np.nan for x in range(len(df_summ))]
355
+
356
+ df_summ['ops'] = df_summ['obp']+df_summ['slg']
357
+
358
+ df_summ['k_percent'] = [df_summ.k[x]/df_summ.pa[x] if df_summ.pa[x] != 0 else np.nan for x in range(len(df_summ))]
359
+ df_summ['bb_percent'] =[df_summ.bb[x]/df_summ.pa[x] if df_summ.pa[x] != 0 else np.nan for x in range(len(df_summ))]
360
+ df_summ['bb_minus_k_percent'] =[(df_summ.bb_minus_k[x])/df_summ.pa[x] if df_summ.pa[x] != 0 else np.nan for x in range(len(df_summ))]
361
+
362
+ df_summ['bb_over_k_percent'] =[df_summ.bb[x]/df_summ.k[x] if df_summ.k[x] != 0 else np.nan for x in range(len(df_summ))]
363
+
364
+
365
+
366
+
367
+ df_summ['csw_percent'] =[df_summ.csw[x]/df_summ.pitches[x] if df_summ.pitches[x] != 0 else np.nan for x in range(len(df_summ))]
368
+
369
+
370
+ df_summ['sweet_spot_percent'] = [df_summ.sweet_spot[x]/df_summ.bip_div[x] if df_summ.bip_div[x] != 0 else np.nan for x in range(len(df_summ))]
371
+
372
+ df_summ['woba_percent'] = [df_summ.woba[x]/df_summ.woba_codes[x] if df_summ.woba_codes[x] != 0 else np.nan for x in range(len(df_summ))]
373
+ df_summ['woba_percent_contact'] = [df_summ.woba_contact[x]/df_summ.bip[x] if df_summ.bip[x] != 0 else np.nan for x in range(len(df_summ))]
374
+ #df_summ['hard_hit_percent'] = [df_summ.sweet_spot[x]/df_summ.bip[x] if df_summ.bip[x] != 0 else np.nan for x in range(len(df_summ))]
375
+ df_summ['hard_hit_percent'] = [df_summ.hard_hit[x]/df_summ.bip_div[x] if df_summ.bip_div[x] != 0 else np.nan for x in range(len(df_summ))]
376
+
377
+
378
+ df_summ['barrel_percent'] = [df_summ.barrel[x]/df_summ.bip_div[x] if df_summ.bip_div[x] != 0 else np.nan for x in range(len(df_summ))]
379
+
380
+ df_summ['zone_contact_percent'] = [df_summ.zone_contact[x]/df_summ.zone_swing[x] if df_summ.zone_swing[x] != 0 else np.nan for x in range(len(df_summ))]
381
+
382
+ df_summ['zone_swing_percent'] = [df_summ.zone_swing[x]/df_summ.in_zone[x] if df_summ.in_zone[x] != 0 else np.nan for x in range(len(df_summ))]
383
+
384
+ df_summ['zone_percent'] = [df_summ.in_zone[x]/df_summ.pitches[x] if df_summ.pitches[x] > 0 else np.nan for x in range(len(df_summ))]
385
+
386
+ df_summ['chase_percent'] = [df_summ.ozone_swing[x]/(df_summ.pitches[x] - df_summ.in_zone[x]) if (df_summ.pitches[x]- df_summ.in_zone[x]) != 0 else np.nan for x in range(len(df_summ))]
387
+
388
+ df_summ['chase_contact'] = [df_summ.ozone_contact[x]/df_summ.ozone_swing[x] if df_summ.ozone_swing[x] != 0 else np.nan for x in range(len(df_summ))]
389
+
390
+ df_summ['swing_percent'] = [df_summ.swings[x]/df_summ.pitches[x] if df_summ.pitches[x] > 0 else np.nan for x in range(len(df_summ))]
391
+
392
+ df_summ['whiff_rate'] = [df_summ.whiffs[x]/df_summ.swings[x] if df_summ.swings[x] != 0 else np.nan for x in range(len(df_summ))]
393
+
394
+ df_summ['swstr_rate'] = [df_summ.whiffs[x]/df_summ.pitches[x] if df_summ.pitches[x] > 0 else np.nan for x in range(len(df_summ))]
395
+
396
+ df_summ['ground_ball_percent'] = [df_summ.ground_ball[x]/df_summ.bip[x] if df_summ.bip[x] != 0 else np.nan for x in range(len(df_summ))]
397
+
398
+ df_summ['line_drive_percent'] = [df_summ.line_drive[x]/df_summ.bip[x] if df_summ.bip[x] != 0 else np.nan for x in range(len(df_summ))]
399
+
400
+ df_summ['fly_ball_percent'] = [df_summ.fly_ball[x]/df_summ.bip[x] if df_summ.bip[x] != 0 else np.nan for x in range(len(df_summ))]
401
+
402
+ df_summ['pop_up_percent'] = [df_summ.pop_up[x]/df_summ.bip[x] if df_summ.bip[x] != 0 else np.nan for x in range(len(df_summ))]
403
+
404
+
405
+
406
+ df_summ['heart_zone_percent'] = [df_summ.heart[x]/df_summ.attack_zone[x] if df_summ.attack_zone[x] != 0 else np.nan for x in range(len(df_summ))]
407
+
408
+ df_summ['shadow_zone_percent'] = [df_summ.shadow[x]/df_summ.attack_zone[x] if df_summ.attack_zone[x] != 0 else np.nan for x in range(len(df_summ))]
409
+
410
+ df_summ['chase_zone_percent'] = [df_summ.chase[x]/df_summ.attack_zone[x] if df_summ.attack_zone[x] != 0 else np.nan for x in range(len(df_summ))]
411
+
412
+ df_summ['waste_zone_percent'] = [df_summ.waste[x]/df_summ.attack_zone[x] if df_summ.attack_zone[x] != 0 else np.nan for x in range(len(df_summ))]
413
+
414
+
415
+ df_summ['heart_zone_swing_percent'] = [df_summ.heart_swing[x]/df_summ.heart[x] if df_summ.heart[x] != 0 else np.nan for x in range(len(df_summ))]
416
+
417
+ df_summ['shadow_zone_swing_percent'] = [df_summ.shadow_swing[x]/df_summ.shadow[x] if df_summ.shadow[x] != 0 else np.nan for x in range(len(df_summ))]
418
+
419
+ df_summ['chase_zone_swing_percent'] = [df_summ.chase_swing[x]/df_summ.chase[x] if df_summ.chase[x] != 0 else np.nan for x in range(len(df_summ))]
420
+
421
+ df_summ['waste_zone_swing_percent'] = [df_summ.waste_swing[x]/df_summ.waste[x] if df_summ.waste[x] != 0 else np.nan for x in range(len(df_summ))]
422
+
423
+
424
+
425
+
426
+ df_summ['xwoba_percent'] = [df_summ.xwoba[x]/df_summ.woba_codes[x] if df_summ.woba_codes[x] != 0 else np.nan for x in range(len(df_summ))]
427
+ df_summ['xwoba_percent_contact'] = [df_summ.xwoba_contact[x]/df_summ.bip[x] if df_summ.bip[x] != 0 else np.nan for x in range(len(df_summ))]
428
+
429
+ df_summ = df_summ.dropna(subset=['bip'])
430
+ return df_summ
431
+
432
+ def df_summ_filter_out(df_summ=pd.DataFrame(),batter_select = 0):
433
+ df_summ_filter = df_summ[df_summ['pa'] >= min(math.floor(df_summ.xs(batter_select,level=0)['pa']/10)*10,500)]
434
+ df_summ_filter_pct = df_summ_filter.rank(pct=True,ascending=True)
435
+ df_summ_player = df_summ.xs(batter_select,level=0)
436
+ df_summ_player_pct = df_summ_filter_pct.xs(batter_select,level=0)
437
+ return df_summ_filter,df_summ_filter_pct,df_summ_player,df_summ_player_pct
438
+
439
+ def df_summ_batter_pitch_up(df=pd.DataFrame()):
440
+ df_summ_batter_pitch = df.dropna(subset=['pitch_type']).groupby(['pitcher_id','pitcher_name','pitch_type']).agg(
441
+ pa = ('pa','sum'),
442
+ ab = ('ab','sum'),
443
+ obp_pa = ('obp','sum'),
444
+ hits = ('hits','sum'),
445
+ on_base = ('on_base','sum'),
446
+ k = ('k','sum'),
447
+ bb = ('bb','sum'),
448
+ bb_minus_k = ('bb_minus_k','sum'),
449
+ csw = ('csw','sum'),
450
+ bip = ('bip','sum'),
451
+ bip_div = ('bip_div','sum'),
452
+ tb = ('tb','sum'),
453
+ woba = ('woba','sum'),
454
+ woba_contact = ('woba_pred_contact','sum'),
455
+ xwoba = ('woba_pred','sum'),
456
+ xwoba_contact = ('woba_pred','sum'),
457
+ woba_codes = ('woba_codes','sum'),
458
+ hard_hit = ('hard_hit','sum'),
459
+ barrel = ('barrel','sum'),
460
+ sweet_spot = ('sweet_spot','sum'),
461
+ max_launch_speed = ('launch_speed','max'),
462
+ launch_speed_90 = ('launch_speed',percentile(90)),
463
+ launch_speed = ('launch_speed','mean'),
464
+ launch_angle = ('launch_angle','mean'),
465
+ pitches = ('is_pitch','sum'),
466
+ swings = ('swings','sum'),
467
+ in_zone = ('in_zone','sum'),
468
+ out_zone = ('out_zone','sum'),
469
+ whiffs = ('whiffs','sum'),
470
+ zone_swing = ('zone_swing','sum'),
471
+ zone_contact = ('zone_contact','sum'),
472
+ ozone_swing = ('ozone_swing','sum'),
473
+ ozone_contact = ('ozone_contact','sum'),
474
+ ground_ball = ('trajectory_ground_ball','sum'),
475
+ line_drive = ('trajectory_line_drive','sum'),
476
+ fly_ball =('trajectory_fly_ball','sum'),
477
+ pop_up = ('trajectory_popup','sum'),
478
+ attack_zone = ('attack_zone','count'),
479
+ heart = ('heart','sum'),
480
+ shadow = ('shadow','sum'),
481
+ chase = ('chase','sum'),
482
+ waste = ('waste','sum'),
483
+ heart_swing = ('heart_swing','sum'),
484
+ shadow_swing = ('shadow_swing','sum'),
485
+ chase_swing = ('chase_swing','sum'),
486
+ waste_swing = ('waste_swing','sum'),
487
+ ).reset_index()
488
+
489
+ #return df_summ_batter_pitch
490
+ df_summ_batter_pitch['avg'] = [df_summ_batter_pitch.hits[x]/df_summ_batter_pitch.ab[x] if df_summ_batter_pitch.ab[x] != 0 else np.nan for x in range(len(df_summ_batter_pitch))]
491
+ df_summ_batter_pitch['obp'] = [df_summ_batter_pitch.on_base[x]/df_summ_batter_pitch.obp_pa[x] if df_summ_batter_pitch.obp_pa[x] != 0 else np.nan for x in range(len(df_summ_batter_pitch))]
492
+ df_summ_batter_pitch['slg'] = [df_summ_batter_pitch.tb[x]/df_summ_batter_pitch.ab[x] if df_summ_batter_pitch.ab[x] != 0 else np.nan for x in range(len(df_summ_batter_pitch))]
493
+
494
+ df_summ_batter_pitch['ops'] = df_summ_batter_pitch['obp']+df_summ_batter_pitch['slg']
495
+
496
+ df_summ_batter_pitch['k_percent'] = [df_summ_batter_pitch.k[x]/df_summ_batter_pitch.pa[x] if df_summ_batter_pitch.pa[x] != 0 else np.nan for x in range(len(df_summ_batter_pitch))]
497
+ df_summ_batter_pitch['bb_percent'] =[df_summ_batter_pitch.bb[x]/df_summ_batter_pitch.pa[x] if df_summ_batter_pitch.pa[x] != 0 else np.nan for x in range(len(df_summ_batter_pitch))]
498
+ df_summ_batter_pitch['bb_minus_k_percent'] =[(df_summ_batter_pitch.bb_minus_k[x])/df_summ_batter_pitch.pa[x] if df_summ_batter_pitch.pa[x] != 0 else np.nan for x in range(len(df_summ_batter_pitch))]
499
+
500
+ df_summ_batter_pitch['bb_over_k_percent'] =[df_summ_batter_pitch.bb[x]/df_summ_batter_pitch.k[x] if df_summ_batter_pitch.k[x] != 0 else np.nan for x in range(len(df_summ_batter_pitch))]
501
+
502
+
503
+
504
+
505
+ df_summ_batter_pitch['csw_percent'] =[df_summ_batter_pitch.csw[x]/df_summ_batter_pitch.pitches[x] if df_summ_batter_pitch.pitches[x] != 0 else np.nan for x in range(len(df_summ_batter_pitch))]
506
+
507
+
508
+ df_summ_batter_pitch['sweet_spot_percent'] = [df_summ_batter_pitch.sweet_spot[x]/df_summ_batter_pitch.bip_div[x] if df_summ_batter_pitch.bip_div[x] != 0 else np.nan for x in range(len(df_summ_batter_pitch))]
509
+
510
+ df_summ_batter_pitch['woba_percent'] = [df_summ_batter_pitch.woba[x]/df_summ_batter_pitch.woba_codes[x] if df_summ_batter_pitch.woba_codes[x] != 0 else np.nan for x in range(len(df_summ_batter_pitch))]
511
+ df_summ_batter_pitch['woba_percent_contact'] = [df_summ_batter_pitch.woba_contact[x]/df_summ_batter_pitch.bip[x] if df_summ_batter_pitch.bip[x] != 0 else np.nan for x in range(len(df_summ_batter_pitch))]
512
+ #df_summ_batter_pitch['hard_hit_percent'] = [df_summ_batter_pitch.sweet_spot[x]/df_summ_batter_pitch.bip[x] if df_summ_batter_pitch.bip[x] != 0 else np.nan for x in range(len(df_summ_batter_pitch))]
513
+ df_summ_batter_pitch['hard_hit_percent'] = [df_summ_batter_pitch.hard_hit[x]/df_summ_batter_pitch.bip_div[x] if df_summ_batter_pitch.bip_div[x] != 0 else np.nan for x in range(len(df_summ_batter_pitch))]
514
+
515
+
516
+ df_summ_batter_pitch['barrel_percent'] = [df_summ_batter_pitch.barrel[x]/df_summ_batter_pitch.bip_div[x] if df_summ_batter_pitch.bip_div[x] != 0 else np.nan for x in range(len(df_summ_batter_pitch))]
517
+
518
+ df_summ_batter_pitch['zone_contact_percent'] = [df_summ_batter_pitch.zone_contact[x]/df_summ_batter_pitch.zone_swing[x] if df_summ_batter_pitch.zone_swing[x] != 0 else np.nan for x in range(len(df_summ_batter_pitch))]
519
+
520
+ df_summ_batter_pitch['zone_swing_percent'] = [df_summ_batter_pitch.zone_swing[x]/df_summ_batter_pitch.in_zone[x] if df_summ_batter_pitch.in_zone[x] != 0 else np.nan for x in range(len(df_summ_batter_pitch))]
521
+
522
+ df_summ_batter_pitch['zone_percent'] = [df_summ_batter_pitch.in_zone[x]/df_summ_batter_pitch.pitches[x] if df_summ_batter_pitch.pitches[x] != 0 else np.nan for x in range(len(df_summ_batter_pitch))]
523
+
524
+ df_summ_batter_pitch['chase_percent'] = [df_summ_batter_pitch.ozone_swing[x]/(df_summ_batter_pitch.pitches[x] - df_summ_batter_pitch.in_zone[x]) if (df_summ_batter_pitch.pitches[x]- df_summ_batter_pitch.in_zone[x]) != 0 else np.nan for x in range(len(df_summ_batter_pitch))]
525
+
526
+ df_summ_batter_pitch['chase_contact'] = [df_summ_batter_pitch.ozone_contact[x]/df_summ_batter_pitch.ozone_swing[x] if df_summ_batter_pitch.ozone_swing[x] != 0 else np.nan for x in range(len(df_summ_batter_pitch))]
527
+
528
+ df_summ_batter_pitch['swing_percent'] = [df_summ_batter_pitch.swings[x]/df_summ_batter_pitch.pitches[x] if df_summ_batter_pitch.pitches[x] != 0 else np.nan for x in range(len(df_summ_batter_pitch))]
529
+
530
+ df_summ_batter_pitch['whiff_rate'] = [df_summ_batter_pitch.whiffs[x]/df_summ_batter_pitch.swings[x] if df_summ_batter_pitch.swings[x] != 0 else np.nan for x in range(len(df_summ_batter_pitch))]
531
+
532
+ df_summ_batter_pitch['swstr_rate'] = [df_summ_batter_pitch.whiffs[x]/df_summ_batter_pitch.pitches[x] if df_summ_batter_pitch.pitches[x] != 0 else np.nan for x in range(len(df_summ_batter_pitch))]
533
+
534
+ df_summ_batter_pitch['heart_zone_percent'] = [df_summ_batter_pitch.heart[x]/df_summ_batter_pitch.attack_zone[x] if df_summ_batter_pitch.attack_zone[x] != 0 else np.nan for x in range(len(df_summ_batter_pitch))]
535
+
536
+ df_summ_batter_pitch['shadow_zone_percent'] = [df_summ_batter_pitch.shadow[x]/df_summ_batter_pitch.attack_zone[x] if df_summ_batter_pitch.attack_zone[x] != 0 else np.nan for x in range(len(df_summ_batter_pitch))]
537
+
538
+ df_summ_batter_pitch['chase_zone_percent'] = [df_summ_batter_pitch.chase[x]/df_summ_batter_pitch.attack_zone[x] if df_summ_batter_pitch.attack_zone[x] != 0 else np.nan for x in range(len(df_summ_batter_pitch))]
539
+
540
+ df_summ_batter_pitch['waste_zone_percent'] = [df_summ_batter_pitch.waste[x]/df_summ_batter_pitch.attack_zone[x] if df_summ_batter_pitch.attack_zone[x] != 0 else np.nan for x in range(len(df_summ_batter_pitch))]
541
+
542
+
543
+ df_summ_batter_pitch['heart_zone_swing_percent'] = [df_summ_batter_pitch.heart_swing[x]/df_summ_batter_pitch.heart[x] if df_summ_batter_pitch.heart[x] != 0 else np.nan for x in range(len(df_summ_batter_pitch))]
544
+
545
+ df_summ_batter_pitch['shadow_zone_swing_percent'] = [df_summ_batter_pitch.shadow_swing[x]/df_summ_batter_pitch.shadow[x] if df_summ_batter_pitch.shadow[x] != 0 else np.nan for x in range(len(df_summ_batter_pitch))]
546
+
547
+ df_summ_batter_pitch['chase_zone_swing_percent'] = [df_summ_batter_pitch.chase_swing[x]/df_summ_batter_pitch.chase[x] if df_summ_batter_pitch.chase[x] != 0 else np.nan for x in range(len(df_summ_batter_pitch))]
548
+
549
+ df_summ_batter_pitch['waste_zone_swing_percent'] = [df_summ_batter_pitch.waste_swing[x]/df_summ_batter_pitch.waste[x] if df_summ_batter_pitch.waste[x] != 0 else np.nan for x in range(len(df_summ_batter_pitch))]
550
+
551
+
552
+
553
+
554
+ df_summ_batter_pitch['xwoba_percent'] = [df_summ_batter_pitch.xwoba[x]/df_summ_batter_pitch.woba_codes[x] if df_summ_batter_pitch.woba_codes[x] != 0 else np.nan for x in range(len(df_summ_batter_pitch))]
555
+ df_summ_batter_pitch['xwoba_percent_contact'] = [df_summ_batter_pitch.xwoba_contact[x]/df_summ_batter_pitch.bip[x] if df_summ_batter_pitch.bip[x] != 0 else np.nan for x in range(len(df_summ_batter_pitch))]
556
+
557
+
558
+
559
+
560
+ df_summ_batter_pitch['bip'] = df_summ_batter_pitch['bip'].fillna(0)
561
+
562
+ return df_summ_batter_pitch
right.png ADDED