nesticot commited on
Commit
6d0de70
1 Parent(s): 968950a

Upload 2 files

Browse files
Files changed (2) hide show
  1. app.py +468 -660
  2. tjstuff_model_20240123.joblib +3 -0
app.py CHANGED
@@ -1,736 +1,544 @@
1
- from shiny import App, Inputs, Outputs, Session, reactive, render, req, ui
2
- import datasets
3
- from datasets import load_dataset
4
  import pandas as pd
 
 
 
5
  import numpy as np
6
- import matplotlib.pyplot as plt
7
- import seaborn as sns
8
- import numpy as np
9
- from scipy.stats import gaussian_kde
10
- import matplotlib
11
- from matplotlib.ticker import MaxNLocator
12
- from matplotlib.gridspec import GridSpec
13
- from scipy.stats import zscore
14
- import math
15
- import matplotlib
16
- from adjustText import adjust_text
17
- import matplotlib.ticker as mtick
18
- from shinywidgets import output_widget, render_widget
19
- import pandas as pd
20
- from configure import base_url
21
- import shinyswatch
22
-
23
- ### Import Datasets
24
- dataset = load_dataset('nesticot/mlb_data', data_files=['mlb_pitch_data_2023.csv' ])
25
- dataset_train = dataset['train']
26
- df_2023_mlb = dataset_train.to_pandas().set_index(list(dataset_train.features.keys())[0]).reset_index(drop=True)
27
-
28
- ### Import Datasets
29
- dataset = load_dataset('nesticot/mlb_data', data_files=['aaa_pitch_data_2023.csv' ])
30
- dataset_train = dataset['train']
31
- df_2023_aaa = dataset_train.to_pandas().set_index(list(dataset_train.features.keys())[0]).reset_index(drop=True)
32
-
33
- df_2023_mlb['level'] = 'MLB'
34
- df_2023_aaa['level'] = 'AAA'
35
-
36
- df_2023 = pd.concat([df_2023_mlb,df_2023_aaa])
37
-
38
- #print(df_2023)
39
- ### Normalize Hit Locations
40
  import joblib
41
- swing_model = joblib.load('swing.joblib')
42
-
43
- no_swing_model = joblib.load('no_swing.joblib')
44
-
45
- # Now you can use the loaded model for prediction or any other task
46
-
47
 
48
- batter_dict = df_2023.sort_values('batter_name').set_index('batter_id')['batter_name'].to_dict()
49
-
50
- ## Make Predictions
51
- ## Define Features and Target
52
- features = ['px','pz','strikes','balls']
53
- ## Set up 2023 Data for Prediction of Run Expectancy
54
- df_model_2023_no_swing = df_2023[df_2023.is_swing != 1].dropna(subset=features)
55
- df_model_2023_swing = df_2023[df_2023.is_swing == 1].dropna(subset=features)
56
-
57
-
58
- import xgboost as xgb
59
- df_model_2023_no_swing['y_pred'] = no_swing_model.predict(xgb.DMatrix(df_model_2023_no_swing[features]))
60
- df_model_2023_swing['y_pred'] = swing_model.predict(xgb.DMatrix(df_model_2023_swing[features]))
61
-
62
- df_model_2023 = pd.concat([df_model_2023_no_swing,df_model_2023_swing])
63
- import joblib
64
- # # Dump the model to a file named 'model.joblib'
65
- # model = joblib.load('xtb_model.joblib')
66
-
67
- # ## Create a Dataset to calculate xRV/100 Pitches
68
- # df_model_2023['pitcher_name'] = df_model_2023.pitcher.map(pitcher_dict)
69
- # df_model_2023['player_team'] = df_model_2023.batter.map(team_player_dict)
70
- df_model_2023_group = df_model_2023.groupby(['batter_id','batter_name','level']).agg(
71
- pitches = ('start_speed','count'),
72
- y_pred = ('y_pred','mean'),
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
73
  )
74
 
75
- ## Minimum 500 pitches faced
76
- #min_pitches = 300
77
- #df_model_2023_group = df_model_2023_group[df_model_2023_group.pitches >= min_pitches]
78
- ## Calculate 20-80 Scale
79
- df_model_2023_group['decision_value'] = zscore(df_model_2023_group['y_pred'])
80
- df_model_2023_group['decision_value'] = (50+df_model_2023_group['decision_value']*10)
81
-
82
- ## Create a Dataset to calculate xRV/100 for Pitches Taken
83
- df_model_2023_group_no_swing = df_model_2023[df_model_2023.is_swing!=1].groupby(['batter_id','batter_name','level']).agg(
84
- pitches = ('start_speed','count'),
85
- y_pred = ('y_pred','mean')
86
- )
87
 
88
- # Select Pitches with 500 total pitches
89
- df_model_2023_group_no_swing = df_model_2023_group_no_swing[df_model_2023_group_no_swing.index.get_level_values(1).isin(df_model_2023_group.index.get_level_values(1))]
90
- ## Calculate 20-80 Scale
91
- df_model_2023_group_no_swing['iz_awareness'] = zscore(df_model_2023_group_no_swing['y_pred'])
92
- df_model_2023_group_no_swing['iz_awareness'] = (((50+df_model_2023_group_no_swing['iz_awareness']*10)))
 
93
 
94
- ## Create a Dataset for xRV/100 Pitches Swung At
95
- df_model_2023_group_swing = df_model_2023[df_model_2023.is_swing==1].groupby(['batter_id','batter_name','level']).agg(
96
- pitches = ('start_speed','count'),
97
- y_pred = ('y_pred','mean')
98
- )
 
99
 
100
- # Select Pitches with 500 total pitches
101
- df_model_2023_group_swing = df_model_2023_group_swing[df_model_2023_group_swing.index.get_level_values(1).isin(df_model_2023_group.index.get_level_values(1))]
102
- ## Calculate 20-80 Scale
103
- df_model_2023_group_swing['oz_awareness'] = zscore(df_model_2023_group_swing['y_pred'])
104
- df_model_2023_group_swing['oz_awareness'] = (((50+df_model_2023_group_swing['oz_awareness']*10)))
105
 
106
- ## Create df for plotting
107
- # Merge Datasets
108
- df_model_2023_group_swing_plus_no = df_model_2023_group_swing.merge(df_model_2023_group_no_swing,left_index=True,right_index=True,suffixes=['_swing','_no_swing'])
109
- df_model_2023_group_swing_plus_no['pitches'] = df_model_2023_group_swing_plus_no.pitches_swing + df_model_2023_group_swing_plus_no.pitches_no_swing
110
 
111
- # Calculate xRV/100 Pitches
112
- df_model_2023_group_swing_plus_no['y_pred'] = (df_model_2023_group_swing_plus_no.y_pred_swing*df_model_2023_group_swing_plus_no.pitches_swing + \
113
- df_model_2023_group_swing_plus_no.y_pred_no_swing*df_model_2023_group_swing_plus_no.pitches_no_swing) / \
114
- df_model_2023_group_swing_plus_no.pitches
115
 
116
- df_model_2023_group_swing_plus_no = df_model_2023_group_swing_plus_no.merge(right=df_model_2023_group,
117
- left_index=True,
118
- right_index=True,
119
- suffixes=['','_y'])
120
 
121
- df_model_2023_group_swing_plus_no = df_model_2023_group_swing_plus_no.reset_index()
122
- team_dict = df_2023.groupby(['batter_name'])[['batter_id','batter_team']].tail().set_index('batter_id')['batter_team'].to_dict()
123
- df_model_2023_group_swing_plus_no['team'] = df_model_2023_group_swing_plus_no['batter_id'].map(team_dict)
124
- df_model_2023_group_swing_plus_no = df_model_2023_group_swing_plus_no.set_index(['batter_id','batter_name','level','team'])
125
 
126
- df_model_2023_group_swing_plus_no = df_model_2023_group_swing_plus_no[df_model_2023_group_swing_plus_no['pitches']>=250]
127
- df_model_2023_group_swing_plus_no_copy = df_model_2023_group_swing_plus_no.copy()
128
- import matplotlib
129
 
130
- colour_palette = ['#FFB000','#648FFF','#785EF0',
131
- '#DC267F','#FE6100','#3D1EB2','#894D80','#16AA02','#B5592B','#A3C1ED']
132
 
133
- cmap_hue = matplotlib.colors.LinearSegmentedColormap.from_list("", [colour_palette[1],'#ffffff',colour_palette[0]])
134
- cmap_hue2 = matplotlib.colors.LinearSegmentedColormap.from_list("",['#ffffff',colour_palette[0]])
135
 
136
 
137
- from matplotlib.pyplot import text
138
- import inflect
139
- from scipy.stats import percentileofscore
140
- p = inflect.engine()
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
141
 
 
 
 
142
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
143
 
 
 
 
144
 
145
- def server(input,output,session):
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
146
 
147
  @output
148
- @render.plot(alt="hex_plot")
149
- @reactive.event(input.go, ignore_none=False)
150
- def scatter_plot():
151
-
152
- if input.batter_id() is "":
153
- fig = plt.figure(figsize=(12, 12))
154
- fig.text(s='Please Select a Batter',x=0.5,y=0.5)
155
- return
156
- print(df_model_2023_group_swing_plus_no_copy)
157
- print(input.level_list())
158
- df_model_2023_group_swing_plus_no = df_model_2023_group_swing_plus_no_copy[df_model_2023_group_swing_plus_no_copy.index.get_level_values(2) == input.level_list()]
159
- print('this one')
160
- print(df_model_2023_group_swing_plus_no)
161
- batter_select_id = int(input.batter_id())
162
- # batter_select_name = 'Edouard Julien'
163
- #max(1,int(input.pitch_min()))
164
- plot_min = max(250,int(input.pitch_min()))
165
- df_model_2023_group_swing_plus_no = df_model_2023_group_swing_plus_no[df_model_2023_group_swing_plus_no.pitches >= plot_min]
166
- ## Plot In-Zone vs Out-of-Zone Awareness
167
- sns.set_theme(style="whitegrid", palette="pastel")
168
- # fig, ax = plt.subplots(1,1,figsize=(12,12))
169
- fig = plt.figure(figsize=(12,12))
170
- gs = GridSpec(3, 3, height_ratios=[0.6,10,0.2], width_ratios=[0.25,0.50,0.25])
171
-
172
- axheader = fig.add_subplot(gs[0, :])
173
- #ax10 = fig.add_subplot(gs[1, 0])
174
- ax = fig.add_subplot(gs[1, :]) # Subplot at the top-right position
175
- #ax12 = fig.add_subplot(gs[1, 2])
176
- axfooter1 = fig.add_subplot(gs[-1, 0])
177
- axfooter2 = fig.add_subplot(gs[-1, 1])
178
- axfooter3 = fig.add_subplot(gs[-1, 2])
179
-
180
- cmap_hue = matplotlib.colors.LinearSegmentedColormap.from_list("", [colour_palette[1],colour_palette[3],colour_palette[0]])
181
- norm = plt.Normalize(df_model_2023_group_swing_plus_no['y_pred'].min()*100, df_model_2023_group_swing_plus_no['y_pred'].max()*100)
182
-
183
- sns.scatterplot(
184
- x=df_model_2023_group_swing_plus_no['y_pred_swing']*100,
185
- y=df_model_2023_group_swing_plus_no['y_pred_no_swing']*100,
186
- hue=df_model_2023_group_swing_plus_no['y_pred']*100,
187
- size=df_model_2023_group_swing_plus_no['pitches_swing']/df_model_2023_group_swing_plus_no['pitches'],
188
- palette=cmap_hue,ax=ax)
189
-
190
- sm = plt.cm.ScalarMappable(cmap=cmap_hue, norm=norm)
191
- cbar = plt.colorbar(sm, cax=axfooter2, orientation='horizontal',shrink=1)
192
- cbar.set_label('Decision Value xRV/100 Pitches',fontsize=12)
193
-
194
- ax.hlines(xmin=(math.floor((df_model_2023_group_swing_plus_no['y_pred_swing'].min()*100*100-0.01)/5))*5/100,
195
- xmax= (math.ceil((df_model_2023_group_swing_plus_no['y_pred_swing'].max()**100100+0.01)/5))*5/100,
196
- y=df_model_2023_group_swing_plus_no['y_pred_no_swing'].mean()*100,color='gray',linewidth=3,linestyle='dotted',alpha=0.4)
197
-
198
- ax.vlines(ymin=(math.floor((df_model_2023_group_swing_plus_no['y_pred_no_swing'].min()*100*100-0.01)/5))*5/100,
199
- ymax= (math.ceil((df_model_2023_group_swing_plus_no['y_pred_no_swing'].max()*100*100+0.01)/5))*5/100,
200
- x=df_model_2023_group_swing_plus_no['y_pred_swing'].mean()*100,color='gray',linewidth=3,linestyle='dotted',alpha=0.4)
201
-
202
- x_lim_min = (math.floor((df_model_2023_group_swing_plus_no['y_pred_swing'].min()*100*100)/5))*5/100
203
- x_lim_max = (math.ceil((df_model_2023_group_swing_plus_no['y_pred_swing'].max()*100*100)/5))*5/100
204
-
205
- y_lim_min = (math.floor((df_model_2023_group_swing_plus_no['y_pred_no_swing'].min()*100*100)/5))*5/100
206
- y_lim_max = (math.ceil((df_model_2023_group_swing_plus_no['y_pred_no_swing'].max()*100*100)/5))*5/100
207
-
208
- ax.set_xlim(x_lim_min,x_lim_max)
209
- ax.set_ylim(y_lim_min,y_lim_max)
210
-
211
- ax.tick_params(axis='both', which='major', labelsize=12)
212
-
213
- ax.set_xlabel('Out-of-Zone Awareness Value xRV/100 Swings',fontsize=16)
214
- ax.set_ylabel('In-Zone Awareness Value xRV/100 Takes',fontsize=16)
215
- ax.get_legend().remove()
216
-
217
-
218
- ts=[]
219
-
220
-
221
- # thresh = 0.5
222
- # thresh_2 = -0.9
223
- # for i in range(len(df_model_2023_group_swing_plus_no)):
224
- # if (df_model_2023_group_swing_plus_no['y_pred'].values[i]*100) >= thresh or \
225
- # (df_model_2023_group_swing_plus_no['y_pred'].values[i]*100) <= thresh_2 or \
226
- # (str(df_model_2023_group_swing_plus_no.index.get_level_values(0).values[i]) in (input.name_list())) :
227
- # ts.append(ax.text(x=df_model_2023_group_swing_plus_no['y_pred_swing'].values[i]*100,
228
- # y=df_model_2023_group_swing_plus_no['y_pred_no_swing'].values[i]*100,
229
- # s=df_model_2023_group_swing_plus_no.index.get_level_values(1).values[i],
230
- # fontsize=8))
231
- thresh = 0.5
232
- thresh_2 = -0.9
233
- for i in range(len(df_model_2023_group_swing_plus_no)):
234
- if (df_model_2023_group_swing_plus_no['y_pred_swing'].values[i]) >= df_model_2023_group_swing_plus_no['y_pred_swing'].quantile(0.98) or \
235
- (df_model_2023_group_swing_plus_no['y_pred_swing'].values[i]) <= df_model_2023_group_swing_plus_no['y_pred_swing'].quantile(0.02) or \
236
- (df_model_2023_group_swing_plus_no['y_pred_no_swing'].values[i]) >= df_model_2023_group_swing_plus_no['y_pred_no_swing'].quantile(0.98) or \
237
- (df_model_2023_group_swing_plus_no['y_pred_no_swing'].values[i]) <= df_model_2023_group_swing_plus_no['y_pred_no_swing'].quantile(0.02) or \
238
- (df_model_2023_group_swing_plus_no['y_pred'].values[i]) >= df_model_2023_group_swing_plus_no['y_pred'].quantile(0.98) or \
239
- (df_model_2023_group_swing_plus_no['y_pred'].values[i]) <= df_model_2023_group_swing_plus_no['y_pred'].quantile(0.02) or \
240
- (str(df_model_2023_group_swing_plus_no.index.get_level_values(0).values[i]) in (input.name_list())) :
241
- ts.append(ax.text(x=df_model_2023_group_swing_plus_no['y_pred_swing'].values[i]*100,
242
- y=df_model_2023_group_swing_plus_no['y_pred_no_swing'].values[i]*100,
243
- s=df_model_2023_group_swing_plus_no.index.get_level_values(1).values[i],
244
- fontsize=8))
245
-
246
- ax.text(x=x_lim_min+abs(x_lim_min)*0.02,y=y_lim_max-abs(y_lim_max-y_lim_min)*0.02,s=f'Min. {plot_min} Pitches',fontsize='10',fontstyle='oblique',va='top',
247
- bbox=dict(facecolor='white', edgecolor='black'))
248
- # ax.text(x=x_lim_min+abs(x_lim_min)*0.02,y=y_lim_max-abs(y_lim_max-y_lim_min)*0.06,s=f'Labels for Batters with\nDescion Value xRV/100 > {thresh:.2f}\nDescion Value xRV/100 < {thresh_2:.2f}',fontsize='10',fontstyle='oblique',va='top',
249
- # bbox=dict(facecolor='white', edgecolor='black'))
250
- ax.text(x=x_lim_min+abs(x_lim_min)*0.02,y=y_lim_max-abs(y_lim_max-y_lim_min)*0.06,s=f'Point Size Represents Swing%',fontsize='10',fontstyle='oblique',va='top',
251
- bbox=dict(facecolor='white', edgecolor='black'))
252
-
253
- adjust_text(ts,
254
- arrowprops=dict(arrowstyle="-", color=colour_palette[4], lw=1),ax=ax)
255
-
256
- axfooter1.axis('off')
257
- axfooter3.axis('off')
258
- axheader.axis('off')
259
-
260
- axheader.text(s=f'{input.level_list()} In-Zone vs Out-of-Zone Awareness Value',fontsize=24,x=0.5,y=0,va='top',ha='center')
261
-
262
- axfooter1.text(0.05, -0.5,"By: Thomas Nestico\n @TJStats",ha='left', va='bottom',fontsize=12)
263
- axfooter3.text(0.95, -0.5, "Data: MLB",ha='right', va='bottom',fontsize=12)
264
- fig.subplots_adjust(left=0.01, right=0.99, top=0.975, bottom=0.025)
265
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
266
  @output
267
- @render.plot(alt="hex_plot")
268
- @reactive.event(input.go, ignore_none=False)
269
- def dv_plot():
270
-
271
- if input.batter_id() is "":
272
- fig = plt.figure(figsize=(12, 12))
273
- fig.text(s='Please Select a Batter',x=0.5,y=0.5)
274
- return
 
 
 
 
 
 
 
 
 
275
 
276
- player_select = int(input.batter_id())
277
- player_select_full = batter_dict[player_select]
278
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
279
 
280
- df_will = df_model_2023[df_model_2023.batter_id == player_select].sort_values(by=['game_date','start_time'])
281
- df_will = df_will[df_will['level']==input.level_list()]
282
- # df_will['y_pred'] = df_will['y_pred'] - df_will['y_pred'].mean()
 
283
 
284
- win = max(1,int(input.rolling_window()))
285
- sns.set_theme(style="whitegrid", palette="pastel")
286
- #fig, ax = plt.subplots(1, 1, figsize=(10, 10),dpi=300)
287
 
288
- from matplotlib.gridspec import GridSpec
289
- # fig,ax = plt.subplots(figsize=(12, 12),dpi=150)
290
- fig = plt.figure(figsize=(12,12))
291
- gs = GridSpec(3, 3, height_ratios=[0.3,10,0.2], width_ratios=[0.01,2,0.01])
292
 
293
- axheader = fig.add_subplot(gs[0, :])
294
- ax10 = fig.add_subplot(gs[1, 0])
295
- ax = fig.add_subplot(gs[1, 1]) # Subplot at the top-right position
296
- ax12 = fig.add_subplot(gs[1, 2])
297
- axfooter1 = fig.add_subplot(gs[-1, :])
298
 
299
- axheader.axis('off')
300
- ax10.axis('off')
301
- ax12.axis('off')
302
- axfooter1.axis('off')
303
 
 
 
304
 
305
- sns.lineplot( x= range(win,len(df_will.y_pred.rolling(window=win).mean())+1),
306
- y= df_will.y_pred.rolling(window=win).mean().dropna()*100,
307
- color=colour_palette[0],linewidth=2,ax=ax,zorder=100)
308
 
309
- ax.hlines(y=df_will.y_pred.mean()*100,xmin=win,xmax=len(df_will),color=colour_palette[0],linestyle='--',
310
- label=f'{player_select_full} Average: {df_will.y_pred.mean()*100:.2} xRV/100 ({p.ordinal(int(np.around(percentileofscore(df_model_2023_group_swing_plus_no.y_pred,df_will.y_pred.mean(), kind="strict"))))} Percentile)')
 
 
311
 
312
- # ax.hlines(y=df_model_2023.y_pred.std()*100,xmin=win,xmax=len(df_will))
 
 
 
 
313
 
314
- # sns.scatterplot( x= [976],
315
- # y= df_will.y_pred.rolling(window=win).mean().min()*100,
316
- # color=colour_palette[0],linewidth=2,ax=ax,zorder=100,s=100,edgecolor=colour_palette[7])
317
 
 
318
 
319
- ax.hlines(y=df_model_2023_group_swing_plus_no.y_pred.mean()*100,xmin=win,xmax=len(df_will),color=colour_palette[1],linestyle='-.',alpha=1,
320
- label = f'{input.level_list()} Average: {df_model_2023_group_swing_plus_no.y_pred.mean()*100:.2f} xRV/100')
321
 
322
- ax.legend()
323
 
324
- hard_hit_dates = [df_model_2023_group_swing_plus_no.y_pred.quantile(0.9)*100,
325
- df_model_2023_group_swing_plus_no.y_pred.quantile(0.75)*100,
326
- df_model_2023_group_swing_plus_no.y_pred.quantile(0.25)*100,
327
- df_model_2023_group_swing_plus_no.y_pred.quantile(0.1)*100]
328
 
 
 
329
 
 
330
 
331
- ax.hlines(y=df_model_2023_group_swing_plus_no.y_pred.quantile(0.9)*100,xmin=win,xmax=len(df_will),color=colour_palette[2],linestyle='dotted',alpha=0.5,zorder=1)
332
- ax.hlines(y=df_model_2023_group_swing_plus_no.y_pred.quantile(0.75)*100,xmin=win,xmax=len(df_will),color=colour_palette[3],linestyle='dotted',alpha=0.5,zorder=1)
333
- ax.hlines(y=df_model_2023_group_swing_plus_no.y_pred.quantile(0.25)*100,xmin=win,xmax=len(df_will),color=colour_palette[4],linestyle='dotted',alpha=0.5,zorder=1)
334
- ax.hlines(y=df_model_2023_group_swing_plus_no.y_pred.quantile(0.1)*100,xmin=win,xmax=len(df_will),color=colour_palette[5],linestyle='dotted',alpha=0.5,zorder=1)
335
 
336
- hard_hit_text = ['90th %','75th %','25th %','10th %']
337
- for i, x in enumerate(hard_hit_dates):
338
- ax.text(min(win+win/1000,win+win+5), x ,hard_hit_text[i], rotation=0,va='center', ha='left',
339
- bbox=dict(facecolor='white',alpha=0.7, edgecolor=colour_palette[2+i], pad=2),zorder=11)
340
 
341
- # # Annotate with an arrow
342
- # ax.annotate('June 6, 2023\nSeason Worst Decision Value', xy=(976, df_will.y_pred.rolling(window=win).mean().min()*100-0.03),
343
- # xytext=(976 - 150, df_will.y_pred.rolling(window=win).mean().min()*100 - 0.2),
344
- # arrowprops=dict(facecolor=colour_palette[7], shrink=0.01),zorder=150,fontsize=10,
345
- # bbox=dict(facecolor='white', edgecolor='black'),va='top')
 
 
 
 
 
346
 
347
- ax.set_xlim(win,len(df_will))
348
- #ax.set_ylim(-1.5,1.5)
349
- ax.set_yticks([-1.5,-1,-0.5,0,0.5,1,1.5])
350
- ax.set_xlabel('Pitch')
351
- ax.set_ylabel('Expected Run Value Added per 100 Pitches (xRV/100)')
 
 
 
 
352
 
353
- axheader.text(s=f'{player_select_full} - {input.level_list()} - {win} Pitch Rolling Swing Decision Expected Run Value Added',x=0.5,y=-0.5,ha='center',va='bottom',fontsize=14)
354
- axfooter1.text(.05, 0.2, "By: Thomas Nestico",ha='left', va='bottom',fontsize=12)
355
- axfooter1.text(0.95, 0.2, "Data: MLB",ha='right', va='bottom',fontsize=12)
356
 
357
- fig.subplots_adjust(left=0.01, right=0.99, top=0.98, bottom=0.02)
358
- #fig.set_facecolor(colour_palette[5])
 
359
 
360
- @output
361
- @render.plot(alt="hex_plot")
362
- @reactive.event(input.go, ignore_none=False)
363
- def iz_plot():
364
-
365
- if input.batter_id() is "":
366
- fig = plt.figure(figsize=(12, 12))
367
- fig.text(s='Please Select a Batter',x=0.5,y=0.5)
368
- return
369
-
370
- player_select = int(input.batter_id())
371
- player_select_full = batter_dict[player_select]
372
 
 
373
 
374
- df_will = df_model_2023[df_model_2023.batter_id == player_select].sort_values(by=['game_date','start_time'])
375
- df_will = df_will[df_will['level']==input.level_list()]
376
- df_will = df_will[df_will['is_swing'] != 1]
377
-
378
- win = max(1,int(input.rolling_window()))
379
- sns.set_theme(style="whitegrid", palette="pastel")
380
- #fig, ax = plt.subplots(1, 1, figsize=(10, 10),dpi=300)
381
 
382
- from matplotlib.gridspec import GridSpec
383
- # fig,ax = plt.subplots(figsize=(12, 12),dpi=150)
384
- fig = plt.figure(figsize=(12,12))
385
- gs = GridSpec(3, 3, height_ratios=[0.3,10,0.2], width_ratios=[0.01,2,0.01])
386
 
387
- axheader = fig.add_subplot(gs[0, :])
388
- ax10 = fig.add_subplot(gs[1, 0])
389
- ax = fig.add_subplot(gs[1, 1]) # Subplot at the top-right position
390
- ax12 = fig.add_subplot(gs[1, 2])
391
- axfooter1 = fig.add_subplot(gs[-1, :])
392
 
393
- axheader.axis('off')
394
- ax10.axis('off')
395
- ax12.axis('off')
396
- axfooter1.axis('off')
397
 
 
 
 
 
 
398
 
399
- sns.lineplot( x= range(win,len(df_will.y_pred.rolling(window=win).mean())+1),
400
- y= df_will.y_pred.rolling(window=win).mean().dropna()*100,
401
- color=colour_palette[0],linewidth=2,ax=ax,zorder=100)
402
 
403
- ax.hlines(y=df_will.y_pred.mean()*100,xmin=win,xmax=len(df_will),color=colour_palette[0],linestyle='--',
404
- label=f'{player_select_full} Average: {df_will.y_pred.mean()*100:.2} xRV/100 ({p.ordinal(int(np.around(percentileofscore(df_model_2023_group_swing_plus_no.y_pred_no_swing,df_will.y_pred.mean(), kind="strict"))))} Percentile)')
405
 
406
- # ax.hlines(y=df_model_2023.y_pred_no_swing.std()*100,xmin=win,xmax=len(df_will))
407
 
408
- # sns.scatterplot( x= [976],
409
- # y= df_will.y_pred.rolling(window=win).mean().min()*100,
410
- # color=colour_palette[0],linewidth=2,ax=ax,zorder=100,s=100,edgecolor=colour_palette[7])
411
 
 
 
 
 
412
 
413
- ax.hlines(y=df_model_2023_group_swing_plus_no.y_pred_no_swing.mean()*100,xmin=win,xmax=len(df_will),color=colour_palette[1],linestyle='-.',alpha=1,
414
- label = f'{input.level_list()} Average: {df_model_2023_group_swing_plus_no.y_pred_no_swing.mean()*100:.2} xRV/100')
415
 
416
- ax.legend()
417
 
418
- hard_hit_dates = [df_model_2023_group_swing_plus_no.y_pred_no_swing.quantile(0.9)*100,
419
- df_model_2023_group_swing_plus_no.y_pred_no_swing.quantile(0.75)*100,
420
- df_model_2023_group_swing_plus_no.y_pred_no_swing.quantile(0.25)*100,
421
- df_model_2023_group_swing_plus_no.y_pred_no_swing.quantile(0.1)*100]
 
 
 
 
 
422
 
 
 
 
 
 
 
423
 
 
 
 
 
 
424
 
425
- ax.hlines(y=df_model_2023_group_swing_plus_no.y_pred_no_swing.quantile(0.9)*100,xmin=win,xmax=len(df_will),color=colour_palette[2],linestyle='dotted',alpha=0.5,zorder=1)
426
- ax.hlines(y=df_model_2023_group_swing_plus_no.y_pred_no_swing.quantile(0.75)*100,xmin=win,xmax=len(df_will),color=colour_palette[3],linestyle='dotted',alpha=0.5,zorder=1)
427
- ax.hlines(y=df_model_2023_group_swing_plus_no.y_pred_no_swing.quantile(0.25)*100,xmin=win,xmax=len(df_will),color=colour_palette[4],linestyle='dotted',alpha=0.5,zorder=1)
428
- ax.hlines(y=df_model_2023_group_swing_plus_no.y_pred_no_swing.quantile(0.1)*100,xmin=win,xmax=len(df_will),color=colour_palette[5],linestyle='dotted',alpha=0.5,zorder=1)
429
 
430
- hard_hit_text = ['90th %','75th %','25th %','10th %']
431
- for i, x in enumerate(hard_hit_dates):
432
- ax.text(min(win+win/1000,win+win+5), x ,hard_hit_text[i], rotation=0,va='center', ha='left',
433
- bbox=dict(facecolor='white',alpha=0.7, edgecolor=colour_palette[2+i], pad=2),zorder=11)
434
 
435
- # # Annotate with an arrow
436
- # ax.annotate('June 6, 2023\nSeason Worst Decision Value', xy=(976, df_will.y_pred.rolling(window=win).mean().min()*100-0.03),
437
- # xytext=(976 - 150, df_will.y_pred.rolling(window=win).mean().min()*100 - 0.2),
438
- # arrowprops=dict(facecolor=colour_palette[7], shrink=0.01),zorder=150,fontsize=10,
439
- # bbox=dict(facecolor='white', edgecolor='black'),va='top')
440
 
441
- ax.set_xlim(win,len(df_will))
442
- ax.set_yticks([1.0,1.5,2.0,2.5,3.0])
443
- # ax.set_ylim(1,3)
444
 
445
- ax.set_xlabel('Takes')
446
- ax.set_ylabel('Expected Run Value Added per 100 Pitches (xRV/100)')
447
 
448
- axheader.text(s=f'{player_select_full} - {input.level_list()} - {win} Pitch Rolling In-Zone Awareness Expected Run Value Added',x=0.5,y=-0.5,ha='center',va='bottom',fontsize=14)
449
- axfooter1.text(.05, 0.2, "By: Thomas Nestico",ha='left', va='bottom',fontsize=12)
450
- axfooter1.text(0.95, 0.2, "Data: MLB",ha='right', va='bottom',fontsize=12)
451
 
452
- fig.subplots_adjust(left=0.01, right=0.99, top=0.98, bottom=0.02)
453
 
454
- @output
455
- @render.plot(alt="hex_plot")
456
- @reactive.event(input.go, ignore_none=False)
457
- def oz_plot():
458
- if input.batter_id() is "":
459
- fig = plt.figure(figsize=(12, 12))
460
- fig.text(s='Please Select a Batter',x=0.5,y=0.5)
461
- return
462
-
463
- player_select = int(input.batter_id())
464
- player_select_full = batter_dict[player_select]
465
 
466
-
467
-
468
- df_will = df_model_2023[df_model_2023.batter_id == player_select].sort_values(by=['game_date','start_time'])
469
- df_will = df_will[df_will['level']==input.level_list()]
470
- df_will = df_will[df_will['is_swing'] == 1]
471
-
472
- win = max(1,int(input.rolling_window()))
473
- sns.set_theme(style="whitegrid", palette="pastel")
474
- #fig, ax = plt.subplots(1, 1, figsize=(10, 10),dpi=300)
475
-
476
- from matplotlib.gridspec import GridSpec
477
- # fig,ax = plt.subplots(figsize=(12, 12),dpi=150)
478
- fig = plt.figure(figsize=(12,12))
479
- gs = GridSpec(3, 3, height_ratios=[0.3,10,0.2], width_ratios=[0.01,2,0.01])
480
-
481
- axheader = fig.add_subplot(gs[0, :])
482
- ax10 = fig.add_subplot(gs[1, 0])
483
- ax = fig.add_subplot(gs[1, 1]) # Subplot at the top-right position
484
- ax12 = fig.add_subplot(gs[1, 2])
485
- axfooter1 = fig.add_subplot(gs[-1, :])
486
-
487
- axheader.axis('off')
488
- ax10.axis('off')
489
- ax12.axis('off')
490
- axfooter1.axis('off')
491
-
492
-
493
- sns.lineplot( x= range(win,len(df_will.y_pred.rolling(window=win).mean())+1),
494
- y= df_will.y_pred.rolling(window=win).mean().dropna()*100,
495
- color=colour_palette[0],linewidth=2,ax=ax,zorder=100)
496
-
497
- ax.hlines(y=df_will.y_pred.mean()*100,xmin=win,xmax=len(df_will),color=colour_palette[0],linestyle='--',
498
- label=f'{player_select_full} Average: {df_will.y_pred.mean()*100:.2} xRV/100 ({p.ordinal(int(np.around(percentileofscore(df_model_2023_group_swing_plus_no.y_pred_swing,df_will.y_pred.mean(), kind="strict"))))} Percentile)')
499
-
500
- # ax.hlines(y=df_model_2023.y_pred_swing.std()*100,xmin=win,xmax=len(df_will))
501
-
502
- # sns.scatterplot( x= [976],
503
- # y= df_will.y_pred.rolling(window=win).mean().min()*100,
504
- # color=colour_palette[0],linewidth=2,ax=ax,zorder=100,s=100,edgecolor=colour_palette[7])
505
-
506
-
507
- ax.hlines(y=df_model_2023_group_swing_plus_no.y_pred_swing.mean()*100,xmin=win,xmax=len(df_will),color=colour_palette[1],linestyle='-.',alpha=1,
508
- label = f'{input.level_list()} Average: {df_model_2023_group_swing_plus_no.y_pred_swing.mean()*100:.2} xRV/100')
509
-
510
- ax.legend()
511
-
512
- hard_hit_dates = [df_model_2023_group_swing_plus_no.y_pred_swing.quantile(0.9)*100,
513
- df_model_2023_group_swing_plus_no.y_pred_swing.quantile(0.75)*100,
514
- df_model_2023_group_swing_plus_no.y_pred_swing.quantile(0.25)*100,
515
- df_model_2023_group_swing_plus_no.y_pred_swing.quantile(0.1)*100]
516
-
517
-
518
-
519
- ax.hlines(y=df_model_2023_group_swing_plus_no.y_pred_swing.quantile(0.9)*100,xmin=win,xmax=len(df_will),color=colour_palette[2],linestyle='dotted',alpha=0.5,zorder=1)
520
- ax.hlines(y=df_model_2023_group_swing_plus_no.y_pred_swing.quantile(0.75)*100,xmin=win,xmax=len(df_will),color=colour_palette[3],linestyle='dotted',alpha=0.5,zorder=1)
521
- ax.hlines(y=df_model_2023_group_swing_plus_no.y_pred_swing.quantile(0.25)*100,xmin=win,xmax=len(df_will),color=colour_palette[4],linestyle='dotted',alpha=0.5,zorder=1)
522
- ax.hlines(y=df_model_2023_group_swing_plus_no.y_pred_swing.quantile(0.1)*100,xmin=win,xmax=len(df_will),color=colour_palette[5],linestyle='dotted',alpha=0.5,zorder=1)
523
-
524
- hard_hit_text = ['90th %','75th %','25th %','10th %']
525
- for i, x in enumerate(hard_hit_dates):
526
- ax.text(min(win+win/1000,win+win+5), x ,hard_hit_text[i], rotation=0,va='center', ha='left',
527
- bbox=dict(facecolor='white',alpha=0.7, edgecolor=colour_palette[2+i], pad=2),zorder=11)
528
-
529
- # # Annotate with an arrow
530
- # ax.annotate('June 6, 2023\nSeason Worst Decision Value', xy=(976, df_will.y_pred.rolling(window=win).mean().min()*100-0.03),
531
- # xytext=(976 - 150, df_will.y_pred.rolling(window=win).mean().min()*100 - 0.2),
532
- # arrowprops=dict(facecolor=colour_palette[7], shrink=0.01),zorder=150,fontsize=10,
533
- # bbox=dict(facecolor='white', edgecolor='black'),va='top')
534
-
535
- ax.set_xlim(win,len(df_will))
536
- #ax.set_ylim(-3.25,-1.25)
537
- ax.set_yticks([-3.25,-2.75,-2.25,-1.75,-1.25])
538
- ax.set_xlabel('Swing')
539
- ax.set_ylabel('Expected Run Value Added per 100 Pitches (xRV/100)')
540
-
541
- axheader.text(s=f'{player_select_full} - {input.level_list()} - {win} Pitch Rolling Out of Zone Awareness Expected Run Value Added',x=0.5,y=-0.5,ha='center',va='bottom',fontsize=14)
542
- axfooter1.text(.05, 0.2, "By: Thomas Nestico",ha='left', va='bottom',fontsize=12)
543
- axfooter1.text(0.95, 0.2, "Data: MLB",ha='right', va='bottom',fontsize=12)
544
-
545
- fig.subplots_adjust(left=0.01, right=0.99, top=0.98, bottom=0.02)
546
-
547
- app = App(ui.page_fluid(
548
- ui.tags.base(href=base_url),
549
- ui.tags.div(
550
- {"style": "width:90%;margin: 0 auto;max-width: 1600px;"},
551
- ui.tags.style(
552
- """
553
- h4 {
554
- margin-top: 1em;font-size:35px;
555
- }
556
- h2{
557
- font-size:25px;
558
- }
559
- """
560
- ),
561
- shinyswatch.theme.simplex(),
562
- ui.tags.h4("TJStats"),
563
- ui.tags.i("Baseball Analytics and Visualizations"),
564
- ui.markdown("""<a href='https://www.patreon.com/tj_stats'>Support me on Patreon for Access to 2024 Apps</a><sup>1</sup>"""),
565
- # ui.navset_tab(
566
- # ui.nav_control(
567
- # ui.a(
568
- # "Home",
569
- # href="home/"
570
- # ),
571
- # ),
572
- # ui.nav_menu(
573
- # "Batter Charts",
574
- # ui.nav_control(
575
- # ui.a(
576
- # "Batting Rolling",
577
- # href="rolling_batter/"
578
- # ),
579
- # ui.a(
580
- # "Spray & Damage",
581
- # href="https://nesticot-tjstats-site-spray.hf.space/"
582
- # ),
583
- # ui.a(
584
- # "Decision Value",
585
- # href="decision_value/"
586
- # ),
587
- # # ui.a(
588
- # # "Damage Model",
589
- # # href="damage_model/"
590
- # # ),
591
- # ui.a(
592
- # "Batter Scatter",
593
- # href="batter_scatter/"
594
- # ),
595
- # # ui.a(
596
- # # "EV vs LA Plot",
597
- # # href="ev_angle/"
598
- # # ),
599
- # ui.a(
600
- # "Statcast Compare",
601
- # href="statcast_compare/"
602
- # )
603
- # ),
604
- # ),
605
- # ui.nav_menu(
606
- # "Pitcher Charts",
607
- # ui.nav_control(
608
- # ui.a(
609
- # "Pitcher Rolling",
610
- # href="rolling_pitcher/"
611
- # ),
612
- # ui.a(
613
- # "Pitcher Summary",
614
- # href="pitching_summary_graphic_new/"
615
- # ),
616
- # ui.a(
617
- # "Pitcher Scatter",
618
- # href="pitcher_scatter/"
619
- # )
620
- # ),
621
- # )),
622
- ui.navset_tab(
623
- ui.nav_control(
624
- ui.a(
625
- "Home",
626
- href="home/"
627
- ),
628
- ),
629
- ui.nav_menu(
630
- "Batter Charts",
631
- ui.nav_control(
632
- ui.a(
633
- "Batting Rolling",
634
- href="https://nesticot-tjstats-site-rolling-batter.hf.space/"
635
- ),
636
- ui.a(
637
- "Spray",
638
- href="https://nesticot-tjstats-site-spray.hf.space/"
639
- ),
640
- ui.a(
641
- "Decision Value",
642
- href="https://nesticot-tjstats-site-decision-value.hf.space/"
643
- ),
644
- ui.a(
645
- "Damage Model",
646
- href="https://nesticot-tjstats-site-damage.hf.space/"
647
- ),
648
- ui.a(
649
- "Batter Scatter",
650
- href="https://nesticot-tjstats-site-batter-scatter.hf.space/"
651
- ),
652
- ui.a(
653
- "EV vs LA Plot",
654
- href="https://nesticot-tjstats-site-ev-angle.hf.space/"
655
- ),
656
- ui.a(
657
- "Statcast Compare",
658
- href="https://nesticot-tjstats-site-statcast-compare.hf.space/"
659
- ),
660
- ui.a(
661
- "MLB/MiLB Cards",
662
- href="https://nesticot-tjstats-site-mlb-cards.hf.space/"
663
- )
664
- ),
665
- ),
666
- ui.nav_menu(
667
- "Pitcher Charts",
668
- ui.nav_control(
669
- ui.a(
670
- "Pitcher Rolling",
671
- href="https://nesticot-tjstats-site-rolling-pitcher.hf.space/"
672
- ),
673
- ui.a(
674
- "Pitcher Summary",
675
- href="https://nesticot-tjstats-site-pitching-summary-graphic-new.hf.space/"
676
- ),
677
- ui.a(
678
- "Pitcher Scatter",
679
- href="https://nesticot-tjstats-site-pitcher-scatter.hf.space"
680
- )
681
- ),
682
- )), ui.row(
683
- ui.layout_sidebar(
684
-
685
- ui.panel_sidebar(
686
-
687
-
688
- ui.input_numeric("pitch_min",
689
- "Select Pitch Minimum [min. 250] (Scatter)",
690
- value=500,
691
- min=250),
692
-
693
- ui.input_select("name_list",
694
- "Select Players to List (Scatter)",
695
- batter_dict,
696
- selectize=True,
697
- multiple=True),
698
- ui.input_select("batter_id",
699
- "Select Batter (Rolling)",
700
- batter_dict,
701
- width=1,
702
- size=1,
703
- selectize=True),
704
- ui.input_numeric("rolling_window",
705
- "Select Rolling Window (Rolling)",
706
- value=100,
707
- min=1),
708
-
709
- ui.input_select("level_list",
710
- "Select Level",
711
- ['MLB','AAA'],
712
- selected='MLB'),
713
- ui.input_action_button("go", "Generate",class_="btn-primary"),
714
- ),
715
-
716
- ui.panel_main(
717
- ui.navset_tab(
718
-
719
- ui.nav("Scatter Plot",
720
- ui.output_plot('scatter_plot',
721
- width='1000px',
722
- height='1000px')),
723
- ui.nav("Rolling DV",
724
- ui.output_plot('dv_plot',
725
- width='1000px',
726
- height='1000px')),
727
- ui.nav("Rolling In-Zone",
728
- ui.output_plot('iz_plot',
729
- width='1000px',
730
- height='1000px')),
731
- ui.nav("Rolling Out-of-Zone",
732
- ui.output_plot('oz_plot',
733
- width='1000px',
734
- height='1000px'))
735
- ))
736
- )),)),server)
 
1
+ from shiny import ui, render, App
2
+ import matplotlib.image as mpimg
 
3
  import pandas as pd
4
+ import pygsheets
5
+ import pytz
6
+ from datetime import datetime
7
  import numpy as np
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
8
  import joblib
 
 
 
 
 
 
9
 
10
+ print('Starting')
11
+ df_2024 = pd.read_csv('2024_spring_data.csv',index_col=[0])
12
+ print('Starting')
13
+ spring_teams = df_2024.groupby(['pitcher_id']).tail(1)[['pitcher_id','pitcher_team']].set_index(['pitcher_id'])['pitcher_team'].to_dict()
14
+
15
+
16
+ df_2024['vy_f'] = -(df_2024['vy0']**2 - (2 * df_2024['ay'] * (df_2024['y0'] - 17/12)))**0.5
17
+ df_2024['t'] = (df_2024['vy_f'] - df_2024['vy0']) / df_2024['ay']
18
+ df_2024['vz_f'] = (df_2024['vz0']) + (df_2024['az'] * df_2024['t'])
19
+ df_2024['vaa'] = -np.arctan(df_2024['vz_f'] / df_2024['vy_f']) * (180 / np.pi)
20
+
21
+ #df_2024['vy_f'] = -(df_2024['vy0']**2 - (2 * df_2024['ay'] * (df_2024['y0'] - 17/12)))**0.5
22
+ #df_2024['t'] = (df_2024['vy_f'] - df_2024['vy0']) / df_2024['ay']
23
+ df_2024['vx_f'] = (df_2024['vx0']) + (df_2024['ax'] * df_2024['t'])
24
+ df_2024['haa'] = -np.arctan(df_2024['vx_f'] / df_2024['vy_f']) * (180 / np.pi)
25
+ grouped_ivb_2023 = pd.read_csv('2023_pitch_group_data.csv',index_col=[0,3])
26
+
27
+ model = joblib.load('tjstuff_model_20240123.joblib')
28
+
29
+
30
+ def percentile(n):
31
+ def percentile_(x):
32
+ return x.quantile(n)
33
+ percentile_.__name__ = 'percentile_{:02.0f}'.format(n*100)
34
+ return percentile_
35
+
36
+ def df_clean(df):
37
+ df_copy = df.copy()
38
+ df_copy.loc[df_copy['pitcher_hand'] == 'L','hb'] *= -1
39
+ df_copy.loc[df_copy['pitcher_hand'] == 'L','x0'] *= -1
40
+ df_copy.loc[df_copy['pitcher_hand'] == 'L','spin_direction'] = 360 - df_copy.loc[df_copy['pitcher_hand'] == 'L','spin_direction']
41
+
42
+ df_copy['pitch_l'] = [1 if x == 'L' else 0 for x in df_copy['pitcher_hand']]
43
+ df_copy['bat_l'] = [1 if x == 'L' else 0 for x in df_copy['batter_hand']]
44
+ df_copy = df_copy[~df_copy.pitch_type.isin(["EP", "PO", "KN", "FO", "CS", "SC", "FA"])].reset_index(drop=True)
45
+ df_copy['pitch_type'] = df_copy['pitch_type'].replace({'FT':'SI','KC':'CU','ST':'SL','SV':'SL'})
46
+
47
+ # df_copy['des_new'] = df_copy['play_description'].map(des_dict)
48
+ # df_copy['ev_new'] = df_copy.loc[df_copy['des_new'] == 'hit_into_play','event_type'].map(ev_dict)
49
+ # df_copy.loc[df_copy['des_new']=='hit_into_play','des_new'] = df_copy.loc[df_copy['des_new']=='hit_into_play','ev_new']
50
+ # df_copy = df_copy.dropna(subset=['des_new'])
51
+ # des_values = df_copy.groupby(['des_new'])['delta_run_exp'].mean()
52
+ # df_copy = df_copy.merge(des_values,left_on='des_new',right_on='des_new',suffixes=['','_mean'])
53
+
54
+ df_copy_fb_sum = df_copy[df_copy.pitch_type.isin(["FF", "FC", "SI"])].groupby(['pitcher_id']).agg(
55
+ fb_velo = ('start_speed','mean'),
56
+ fb_max_ivb = ('ivb',percentile(0.9)),
57
+ fb_max_x = ('hb',percentile(0.9)),
58
+ fb_min_x = ('hb',percentile(0.1)),
59
+ fb_max_velo = ('start_speed',percentile(0.9)),
60
+ fb_axis = ('spin_direction','mean'),
61
  )
62
 
63
+ df_copy = df_copy.merge(df_copy_fb_sum,left_on='pitcher_id',right_index=True,how='left')
 
 
 
 
 
 
 
 
 
 
 
64
 
65
+ df_copy['fb_velo_diff'] = df_copy['start_speed']- df_copy['fb_velo']
66
+ df_copy['fb_max_ivb_diff'] = df_copy['ivb']- df_copy['fb_max_ivb']
67
+ df_copy['fb_max_hb_diff'] = df_copy['hb']- df_copy['fb_max_x']
68
+ df_copy['fb_min_hb_diff'] = df_copy['hb']- df_copy['fb_min_x']
69
+ df_copy['fb_max_velo_diff'] = df_copy['start_speed']- df_copy['fb_max_velo']
70
+ df_copy['fb_axis_diff'] = df_copy['spin_direction']- df_copy['fb_axis']
71
 
72
+ # df_copy.loc[df_copy.pitch_type.isin(["FF", "FC", "SI"]),'fb_velo_diff'] = 0
73
+ # df_copy.loc[df_copy.pitch_type.isin(["FF", "FC", "SI"]),'fb_max_ivb_diff'] = 0
74
+ # df_copy.loc[df_copy.pitch_type.isin(["FF", "FC", "SI"]),'fb_max_hb_diff'] = 0
75
+ # df_copy.loc[df_copy.pitch_type.isin(["FF", "FC", "SI"]),'fb_min_hb_diff'] = 0
76
+ # df_copy.loc[df_copy.pitch_type.isin(["FF", "FC", "SI"]),'fb_max_velo_diff'] = 0
77
+ # df_copy.loc[df_copy.pitch_type.isin(["FF", "FC", "SI"]),'fb_axis_diff'] = 0
78
 
 
 
 
 
 
79
 
80
+ df_copy['max_speed'] = df_copy.groupby(['pitcher_id'])['start_speed'].transform('max')
81
+ df_copy['max_speed_diff'] = df_copy['start_speed'] - df_copy['max_speed']
 
 
82
 
83
+ df_copy['max_ivb'] = df_copy.groupby(['pitcher_id'])['ivb'].transform('max')
84
+ df_copy['max_ivb_diff'] = df_copy['ivb'] - df_copy['max_ivb']
 
 
85
 
86
+ df_copy['vy_f'] = -(df_copy['vy0']**2 - (2 * df_copy['ay'] * (df_copy['y0'] - 17/12)))**0.5
87
+ df_copy['t'] = (df_copy['vy_f'] - df_copy['vy0']) / df_copy['ay']
88
+ df_copy['vz_f'] = (df_copy['vz0']) + (df_copy['az'] * df_copy['t'])
89
+ df_copy['vaa'] = -np.arctan(df_copy['vz_f'] / df_copy['vy_f']) * (180 / np.pi)
90
 
91
+ #df_copy['vy_f'] = -(df_copy['vy0']**2 - (2 * df_copy['ay'] * (df_copy['y0'] - 17/12)))**0.5
92
+ #df_copy['t'] = (df_copy['vy_f'] - df_copy['vy0']) / df_copy['ay']
93
+ df_copy['vx_f'] = (df_copy['vx0']) + (df_copy['ax'] * df_copy['t'])
94
+ df_copy['haa'] = -np.arctan(df_copy['vx_f'] / df_copy['vy_f']) * (180 / np.pi)
95
 
96
+ # df_copy['x_diff'] = df_copy['x0'] - df_copy['px']
97
+ # df_copy['z_diff'] = df_copy['z0'] - df_copy['pz']
 
98
 
99
+ # df_copy['vaa'] = np.arctan(df_copy['z_diff'] / df_copy['release_pos_y']) * 360 / np.pi
100
+ # df_copy['haa'] = np.arctan(-df_copy['x_diff'] / df_copy['release_pos_y']) * 360 / np.pi
101
 
102
+ df_copy = df_copy.dropna(subset=['pitch_type']).fillna(0)
103
+ return df_copy
104
 
105
 
106
+ app_ui = ui.page_fluid(
107
+ ui.layout_sidebar(
108
+
109
+ ui.panel_sidebar(
110
+ ui.input_date_range("date_range_id", "Date range input",start = df_2024.game_date.min(),
111
+ end = df_2024.game_date.max(),width=2,min=df_2024.game_date.min(),
112
+ max=df_2024.game_date.max()),width=2),
113
+ ui.panel_main(
114
+ ui.navset_tab(
115
+ # ui.nav("Raw Data",
116
+ # ui.output_data_frame("raw_table")),
117
+ ui.nav("Pitch Data",
118
+ ui.output_data_frame("table")),
119
+ ui.nav("Pitch Data (Daily)",
120
+ ui.output_data_frame("table_daily")),
121
+ ui.nav("2023 vs Spring",
122
+ ui.output_data_frame("table_2023")),
123
+ ui.nav("2023 vs Spring Difference",
124
+ ui.output_data_frame("table_difference")),
125
+ # ui.nav("New Pitches",
126
+ # ui.output_data_frame("table_new")),
127
+ ui.nav("tjStuff+",
128
+ ui.output_data_frame("table_stuff")),
129
+ ui.nav("tjStuff+ (Daily)",
130
+ ui.output_data_frame("table_stuff_day")),
131
+
132
+ ))))
133
+
134
+
135
+ from urllib.request import Request, urlopen
136
+ from shiny import App, reactive, ui
137
+ from shiny.ui import h2, tags
138
+ # importing OpenCV(cv2) module
139
+
140
+
141
+
142
+
143
+ #print(app_ui)
144
+ def server(input, output, session):
145
+
146
+ # @output
147
+ # @render.data_frame
148
+ # def raw_table():
149
+
150
+ # return render.DataGrid(
151
+ # df_2024,
152
+ # width='fit-content',
153
+ # height=750,
154
+ # filters=True,
155
+ # )
156
 
157
+ @output
158
+ @render.data_frame
159
+ def table():
160
 
161
+ grouped_ivb = df_2024[(pd.to_datetime(df_2024['game_date']).dt.date>=input.date_range_id()[0])&
162
+ (pd.to_datetime(df_2024['game_date']).dt.date<=input.date_range_id()[1])].groupby(['pitcher_id','pitcher_name','pitcher_team','pitcher_hand','pitch_type']).agg(
163
+ pitches = ('start_speed','count'),
164
+
165
+ start_speed = ('start_speed','mean'),
166
+ ivb = ('ivb','mean'),
167
+ hb = ('hb','mean'),
168
+ spin_rate = ('spin_rate','mean'),
169
+ vaa = ('vaa','mean'),
170
+ haa = ('haa','mean'),
171
+ horizontal_release = ('x0','mean'),
172
+ vertical_release = ('z0','mean'),
173
+ extension = ('extension','mean')).round(1).reset_index()
174
+ #grouped_ivb = grouped_ivb.set_index(['pitcher_id']).reset_index()
175
+ # return grouped_ivb
176
+ return render.DataGrid(
177
+ grouped_ivb,
178
+ width='fit-content',
179
+ height=750,
180
+ filters=True,
181
+ )
182
 
183
+ @output
184
+ @render.data_frame
185
+ def table_daily():
186
 
187
+ grouped_ivb = df_2024[(pd.to_datetime(df_2024['game_date']).dt.date>=input.date_range_id()[0])&
188
+ (pd.to_datetime(df_2024['game_date']).dt.date<=input.date_range_id()[1])].groupby(['pitcher_id','pitcher_name','pitcher_team','pitcher_hand','pitch_type','game_date']).agg(
189
+ pitches = ('start_speed','count'),
190
+
191
+ start_speed = ('start_speed','mean'),
192
+ ivb = ('ivb','mean'),
193
+ hb = ('hb','mean'),
194
+ spin_rate = ('spin_rate','mean'),
195
+ vaa = ('vaa','mean'),
196
+ haa = ('haa','mean'),
197
+ horizontal_release = ('x0','mean'),
198
+ vertical_release = ('z0','mean'),
199
+ extension = ('extension','mean')).round(1).reset_index()
200
+ #grouped_ivb = grouped_ivb.set_index(['pitcher_id']).reset_index()
201
+ # return grouped_ivb
202
+ return render.DataGrid(
203
+ grouped_ivb,
204
+ width='fit-content',
205
+ height=750,
206
+ filters=True,
207
+ )
208
+
209
+ #return grouped_ivb
210
 
211
  @output
212
+ @render.data_frame
213
+ def table_2023():
214
+ grouped_ivb = df_2024[(pd.to_datetime(df_2024['game_date']).dt.date>=input.date_range_id()[0])&
215
+ (pd.to_datetime(df_2024['game_date']).dt.date<=input.date_range_id()[1])].groupby(['pitcher_id','pitcher_name','pitcher_hand','pitch_type']).agg(
216
+ pitches = ('start_speed','count'),
217
+
218
+ start_speed = ('start_speed','mean'),
219
+ ivb = ('ivb','mean'),
220
+ hb = ('hb','mean'),
221
+ spin_rate = ('spin_rate','mean'),
222
+ vaa = ('vaa','mean'),
223
+ haa = ('haa','mean'),
224
+ horizontal_release = ('x0','mean'),
225
+ vertical_release = ('z0','mean'),
226
+ extension = ('extension','mean')).round(1).reset_index()
227
+ grouped_ivb = grouped_ivb.set_index(['pitcher_id','pitch_type'])
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
228
 
229
+
230
+
231
+ #####
232
+ ivb_merged = grouped_ivb_2023.merge(right=grouped_ivb,
233
+ left_index=True,
234
+ right_index=True,
235
+ how='right',suffixes=['_2023','_spring']).reset_index()
236
+
237
+ ivb_merged['pitcher_name'] = ivb_merged['pitcher_name_spring']
238
+ ivb_merged['pitcher_hand'] = ivb_merged['pitcher_hand_spring']
239
+ #ivb_merged['pitch_type'] = ivb_merged['pitch_type_spring']
240
+
241
+
242
+ # ivb_merged = ivb_merged[['pitcher_id', 'pitcher_name', 'pitcher_hand', 'pitch_type',
243
+ # 'pitches_spring', 'start_speed_spring', 'ivb_spring',
244
+ # 'hb_spring', 'spin_rate_spring', 'horizontal_release_spring',
245
+ # 'vertical_release_spring', 'extension_spring']]
246
+ ivb_merged['pitcher_team'] = ivb_merged['pitcher_id'].map(spring_teams)
247
+ ivb_merged = ivb_merged.set_index(['pitcher_id', 'pitcher_name','pitcher_team', 'pitcher_hand', 'pitch_type',])
248
+
249
+ return render.DataGrid(
250
+ ivb_merged[['pitches_2023','start_speed_2023', 'ivb_2023', 'hb_2023',
251
+ 'spin_rate_2023', 'vaa_2023','haa_2023', 'horizontal_release_2023', 'vertical_release_2023',
252
+ 'extension_2023','pitches_spring','start_speed_spring', 'ivb_spring', 'hb_spring',
253
+ 'spin_rate_spring','vaa_spring','haa_spring', 'horizontal_release_spring', 'vertical_release_spring',
254
+ 'extension_spring',]].reset_index(),
255
+ width='fit-content',
256
+ height=750,
257
+ filters=True,
258
+ )
259
+
260
  @output
261
+ @render.data_frame
262
+ def table_difference():
263
+ grouped_ivb = df_2024[(pd.to_datetime(df_2024['game_date']).dt.date>=input.date_range_id()[0])&
264
+ (pd.to_datetime(df_2024['game_date']).dt.date<=input.date_range_id()[1])].groupby(['pitcher_id','pitcher_name','pitcher_hand','pitch_type']).agg(
265
+ pitches = ('start_speed','count'),
266
+
267
+ start_speed = ('start_speed','mean'),
268
+ ivb = ('ivb','mean'),
269
+ hb = ('hb','mean'),
270
+ spin_rate = ('spin_rate','mean'),
271
+ vaa = ('vaa','mean'),
272
+ haa = ('haa','mean'),
273
+ horizontal_release = ('x0','mean'),
274
+ vertical_release = ('z0','mean'),
275
+ extension = ('extension','mean')).round(1).reset_index()
276
+ grouped_ivb = grouped_ivb.set_index(['pitcher_id','pitch_type'])
277
+
278
 
 
 
279
 
280
+ #####
281
+ ivb_merged = grouped_ivb_2023.merge(right=grouped_ivb,
282
+ left_index=True,
283
+ right_index=True,
284
+ how='right',suffixes=['_2023','_spring']).reset_index()
285
+
286
+ ivb_merged['pitcher_name'] = ivb_merged['pitcher_name_spring']
287
+ ivb_merged['pitcher_hand'] = ivb_merged['pitcher_hand_spring']
288
+ #ivb_merged['pitch_type'] = ivb_merged['pitch_type_spring']
289
+
290
+
291
+ # ivb_merged = ivb_merged[['pitcher_id', 'pitcher_name', 'pitcher_hand', 'pitch_type',
292
+ # 'pitches_spring', 'start_speed_spring', 'ivb_spring',
293
+ # 'hb_spring', 'spin_rate_spring', 'horizontal_release_spring',
294
+ # 'vertical_release_spring', 'extension_spring']]
295
+ ivb_merged['pitcher_team'] = ivb_merged['pitcher_id'].map(spring_teams)
296
+ ivb_merged = ivb_merged.set_index(['pitcher_id', 'pitcher_name','pitcher_team', 'pitcher_hand', 'pitch_type',])
297
+
298
+ ivb_merged[['start_speed_difference', 'ivb_difference', 'hb_difference','spin_rate_difference','vaa_difference','haa_difference',
299
+ 'horizontal_release_difference', 'vertical_release_difference',
300
+ 'extension_difference']] = ivb_merged[['start_speed_spring', 'ivb_spring', 'hb_spring',
301
+ 'spin_rate_spring', 'vaa_spring','haa_spring','horizontal_release_spring', 'vertical_release_spring',
302
+ 'extension_spring']].values - ivb_merged[['start_speed_2023', 'ivb_2023', 'hb_2023',
303
+ 'spin_rate_2023', 'vaa_2023','haa_2023','horizontal_release_2023', 'vertical_release_2023',
304
+ 'extension_2023']].values
305
+
306
+
307
+ return render.DataGrid(
308
+ ivb_merged[['start_speed_difference', 'ivb_difference', 'hb_difference',
309
+ 'spin_rate_difference',
310
+ 'vaa_difference','haa_difference','horizontal_release_difference', 'vertical_release_difference',
311
+ 'extension_difference']].reset_index(),
312
+ width='fit-content',
313
+ height=750,
314
+ filters=True,
315
+ )
316
+
317
+ # @output
318
+ # @render.data_frame
319
+ # def table_new():
320
+ # grouped_ivb = df_2024.groupby(['pitcher_id','pitcher_name','pitcher_hand','pitch_type']).agg(
321
+ # pitches = ('start_speed','count'),
322
+
323
+ # start_speed = ('start_speed','mean'),
324
+ # ivb = ('ivb','mean'),
325
+ # hb = ('hb','mean'),
326
+ # spin_rate = ('spin_rate','mean'),
327
+ # vaa = ('vaa','mean'),
328
+ # haa = ('haa','mean'),
329
+ # horizontal_release = ('x0','mean'),
330
+ # vertical_release = ('z0','mean'),
331
+ # extension = ('extension','mean')).round(1).reset_index()
332
+ # grouped_ivb = grouped_ivb.set_index(['pitcher_id','pitch_type'])
333
+
334
+ # grouped_ivb_2023 = pd.read_csv('2023_pitch_group_data.csv',index_col=[0,3])
335
+
336
+ # #####
337
+ # ivb_merged = grouped_ivb_2023.merge(right=grouped_ivb,
338
+ # left_index=True,
339
+ # right_index=True,
340
+ # how='right',suffixes=['_2023','_spring']).reset_index()
341
+
342
+ # ivb_merged['pitcher_name'] = ivb_merged['pitcher_name_spring']
343
+ # ivb_merged['pitcher_hand'] = ivb_merged['pitcher_hand_spring']
344
+ # #ivb_merged['pitch_type'] = ivb_merged['pitch_type_spring']
345
+
346
+
347
+ # # ivb_merged = ivb_merged[['pitcher_id', 'pitcher_name', 'pitcher_hand', 'pitch_type',
348
+ # # 'pitches_spring', 'start_speed_spring', 'ivb_spring',
349
+ # # 'hb_spring', 'spin_rate_spring', 'horizontal_release_spring',
350
+ # # 'vertical_release_spring', 'extension_spring']]
351
+ # ivb_merged['pitcher_team'] = ivb_merged['pitcher_id'].map(spring_teams)
352
+ # ivb_merged = ivb_merged.set_index(['pitcher_id', 'pitcher_name','pitcher_team', 'pitcher_hand', 'pitch_type',])
353
+
354
+
355
+
356
+
357
+
358
+
359
+ # ivb_merged[['start_speed_difference', 'ivb_difference', 'hb_difference','spin_rate_difference','vaa_difference','haa_difference',
360
+ # 'horizontal_release_difference', 'vertical_release_difference',
361
+ # 'extension_difference']] = ivb_merged[['start_speed_spring', 'ivb_spring', 'hb_spring',
362
+ # 'spin_rate_spring', 'vaa_spring','haa_spring','horizontal_release_spring', 'vertical_release_spring',
363
+ # 'extension_spring']].values - ivb_merged[['start_speed_2023', 'ivb_2023', 'hb_2023',
364
+ # 'spin_rate_2023', 'vaa_2023','haa_2023','horizontal_release_2023', 'vertical_release_2023',
365
+ # 'extension_2023']].values
366
+ # ivb_merged_new = ivb_merged.reset_index()
367
+
368
+ # ivb_merged_new = ivb_merged_new[
369
+ # pd.isnull(ivb_merged_new['pitches_2023']) &
370
+ # pd.notnull(ivb_merged_new['pitches_spring']) &
371
+ # ivb_merged_new['pitcher_id'].isin(ivb_merged_new[pd.notnull(ivb_merged_new['pitches_2023'])]['pitcher_id'])
372
+ # ][
373
+
374
+ # ['pitcher_id', 'pitcher_name', 'pitcher_hand', 'pitch_type',
375
+ # 'pitches_spring', 'start_speed_spring', 'ivb_spring',
376
+ # 'hb_spring', 'spin_rate_spring', 'vaa_spring','haa_spring', 'horizontal_release_spring',
377
+ # 'vertical_release_spring', 'extension_spring']
378
+ # ]#.reset_index()
379
+
380
+ # # ivb_merged_new = ivb_merged.copy().reset_index()
381
+ # ivb_merged_new['pitcher_team'] = ivb_merged_new['pitcher_id'].map(spring_teams)
382
+ # ivb_merged_new = ivb_merged_new.set_index(['pitcher_id', 'pitcher_name','pitcher_team', 'pitcher_hand', 'pitch_type',])
383
+ # #ivb_merged_new.to_clipboard(header=False)
384
+ # df_2024_date_min = df_2024.groupby(['pitcher_id','pitcher_name','pitcher_hand','pitch_type','game_date'])[['game_date']].min()
385
+ # ivb_merged_new = ivb_merged_new.merge(right=df_2024_date_min,
386
+ # left_index=True,
387
+ # right_index=True)
388
+ # ivb_merged_new = ivb_merged_new.drop(columns=['game_date'])
389
+
390
+ # return render.DataGrid(
391
+ # ivb_merged_new.reset_index(),
392
+ # width='fit-content',
393
+ # height=750,
394
+ # filters=True,
395
+ # )
396
 
397
+ @output
398
+ @render.data_frame
399
+ def table_stuff():
400
+
401
 
402
+ df_2024_update = df_clean(df_2024[(pd.to_datetime(df_2024['game_date']).dt.date>=input.date_range_id()[0])&
403
+ (pd.to_datetime(df_2024['game_date']).dt.date<=input.date_range_id()[1])])
 
404
 
405
+ features = ['start_speed','spin_rate','extension','ivb','hb','x0','z0','fb_max_velo_diff','fb_max_ivb_diff','fb_max_hb_diff']
 
 
 
406
 
407
+ targets = ['delta_run_exp_mean']
 
 
 
 
408
 
 
 
 
 
409
 
410
+ from scipy import stats
411
+ df_2024_update['y_pred'] = model.predict(df_2024_update[features])
412
 
 
 
 
413
 
414
+ y_pred_mean = -0.0023964706
415
+ y_pred_std =0.0057581966
416
+ # y_pred_mean = -0.0136602735
417
+ # y_pred_std = 0.006434487
418
 
419
+ ## tjStuff+
420
+ df_2024_stuff = df_2024_update.groupby(['pitcher_id','pitcher_name','pitcher_team']).agg(
421
+ pitches = ('y_pred','count'),
422
+ run_exp = ('y_pred','mean'),)
423
+ # run_exp_loc = ('y_pred_loc','mean'))
424
 
425
+ df_2024_stuff['run_exp_mean'] = y_pred_mean
426
+ df_2024_stuff['run_exp_std'] = y_pred_std
 
427
 
428
+ df_2024_stuff_50 = df_2024_stuff[df_2024_stuff.pitches >= 1]
429
 
430
+ df_2024_stuff_50['tj_stuff_plus'] = 100 + 10*((-df_2024_stuff_50.run_exp + df_2024_stuff_50.run_exp_mean) / df_2024_stuff_50.run_exp_std)
 
431
 
 
432
 
433
+ df_2024_stuff_pitch = df_2024_update.groupby(['pitcher_id','pitcher_name','pitcher_team','pitch_type']).agg(
434
+ pitches = ('y_pred','count'),
435
+ run_exp = ('y_pred','mean'),)
436
+ # run_exp_loc = ('y_pred_loc','mean'))
437
 
438
+ df_2024_stuff_pitch['run_exp_mean'] = y_pred_mean
439
+ df_2024_stuff_pitch['run_exp_std'] = y_pred_std
440
 
441
+ df_2024_stuff_pitch_50 = df_2024_stuff_pitch[df_2024_stuff_pitch.pitches >= 1]
442
 
443
+ df_2024_stuff_pitch_50['tj_stuff_plus'] = 100 + 10*((-df_2024_stuff_pitch_50.run_exp + df_2024_stuff_pitch_50.run_exp_mean) / df_2024_stuff_pitch_50.run_exp_std)
 
 
 
444
 
445
+ df_2024_stuff_pitch_50_pivot = df_2024_stuff_pitch_50.reset_index().pivot(index=['pitcher_id','pitcher_name','pitcher_team'],
446
+ columns=['pitch_type'],
447
+ values=['tj_stuff_plus'])
 
448
 
449
+ df_2024_stuff_pitch_50_pivot['all'] = df_2024_stuff_pitch_50_pivot.index.map(df_2024_stuff_50['tj_stuff_plus'].to_dict())
450
+ ## Difference
451
+ print('Sheet6')
452
+ df_2024_stuff_pitch_50_pivot = df_2024_stuff_pitch_50_pivot.sort_index(level=[1])
453
+ df_2024_stuff_pitch_50_pivot.columns = df_2024_stuff_pitch_50_pivot.columns.droplevel()
454
+ column_list = list(df_2024_stuff_pitch_50_pivot.columns[:-1])
455
+ column_list.append('All')
456
+ df_2024_stuff_pitch_50_pivot.columns = column_list
457
+ df_2024_stuff_pitch_50_pivot = df_2024_stuff_pitch_50_pivot.applymap(lambda x: int(x) if not pd.isna(x) else x)
458
+ df_2024_stuff_pitch_50_pivot = df_2024_stuff_pitch_50_pivot.reset_index()
459
 
460
+ return render.DataGrid(
461
+ df_2024_stuff_pitch_50_pivot,
462
+ width='fit-content',
463
+ height=750,
464
+ filters=True)
465
+
466
+ @output
467
+ @render.data_frame
468
+ def table_stuff_day():
469
 
 
 
 
470
 
471
+ df_2024_update = df_clean(df_2024[(pd.to_datetime(df_2024['game_date']).dt.date>=input.date_range_id()[0])&
472
+ (pd.to_datetime(df_2024['game_date']).dt.date<=input.date_range_id()[1])])
473
+ print('made it here')
474
 
475
+ features = ['start_speed','spin_rate','extension','ivb','hb','x0','z0','fb_max_velo_diff','fb_max_ivb_diff','fb_max_hb_diff']
 
 
 
 
 
 
 
 
 
 
 
476
 
477
+ targets = ['delta_run_exp_mean']
478
 
 
 
 
 
 
 
 
479
 
480
+ from scipy import stats
481
+ df_2024_update['y_pred'] = model.predict(df_2024_update[features])
 
 
482
 
 
 
 
 
 
483
 
484
+ y_pred_mean = -0.0023964706
485
+ y_pred_std =0.0057581966
486
+ # y_pred_mean = -0.0136602735
487
+ # y_pred_std = 0.006434487
488
 
489
+ ## tjStuff+
490
+ df_2024_stuff_daily = df_2024_update.groupby(['pitcher_id','pitcher_name','pitcher_team','game_date']).agg(
491
+ pitches = ('y_pred','count'),
492
+ run_exp = ('y_pred','mean'),)
493
+ # run_exp_loc = ('y_pred_loc','mean'))
494
 
495
+ df_2024_stuff_daily['run_exp_mean'] = y_pred_mean
496
+ df_2024_stuff_daily['run_exp_std'] = y_pred_std
 
497
 
498
+ df_2024_stuff_daily_50 = df_2024_stuff_daily[df_2024_stuff_daily.pitches >= 1]
 
499
 
500
+ df_2024_stuff_daily_50['tj_stuff_plus'] = 100 + 10*((-df_2024_stuff_daily_50.run_exp + df_2024_stuff_daily_50.run_exp_mean) / df_2024_stuff_daily_50.run_exp_std)
501
 
 
 
 
502
 
503
+ df_2024_stuff_daily_pitch = df_2024_update.groupby(['pitcher_id','pitcher_name','pitcher_team','pitch_type','game_date']).agg(
504
+ pitches = ('y_pred','count'),
505
+ run_exp = ('y_pred','mean'),)
506
+ # run_exp_loc = ('y_pred_loc','mean'))
507
 
508
+ df_2024_stuff_daily_pitch['run_exp_mean'] = y_pred_mean
509
+ df_2024_stuff_daily_pitch['run_exp_std'] = y_pred_std
510
 
511
+ df_2024_stuff_daily_pitch_50 = df_2024_stuff_daily_pitch[df_2024_stuff_daily_pitch.pitches >= 1]
512
 
513
+ df_2024_stuff_daily_pitch_50['tj_stuff_plus'] = 100 + 10*((-df_2024_stuff_daily_pitch_50.run_exp + df_2024_stuff_daily_pitch_50.run_exp_mean) / df_2024_stuff_daily_pitch_50.run_exp_std)
514
+ df_2024_stuff_daily_pitch_50 = df_2024_stuff_daily_pitch_50.reset_index()
515
+ df_2024_stuff_daily_pitch_50_pivot = df_2024_stuff_daily_pitch_50.pivot(index=['pitcher_id','pitcher_name','pitcher_team','game_date'],
516
+ columns=['pitch_type'],
517
+ values=['tj_stuff_plus'])
518
+ print('made it here')
519
+ df_2024_stuff_daily_pitch_50_pivot['all'] = df_2024_stuff_daily_pitch_50_pivot.index.map(df_2024_stuff_daily_50['tj_stuff_plus'].to_dict())
520
+ df_2024_stuff_daily_pitch_50_pivot = df_2024_stuff_daily_pitch_50_pivot.sort_index(level=[1,3])
521
+ print(df_2024_stuff_daily_pitch_50_pivot)
522
 
523
+ df_2024_stuff_daily_pitch_50_pivot.columns = df_2024_stuff_daily_pitch_50_pivot.columns.droplevel()
524
+ column_list = list(df_2024_stuff_daily_pitch_50_pivot.columns[:-1])
525
+ column_list.append('All')
526
+ df_2024_stuff_daily_pitch_50_pivot.columns = column_list
527
+ df_2024_stuff_daily_pitch_50_pivot = df_2024_stuff_daily_pitch_50_pivot.applymap(lambda x: int(x) if not pd.isna(x) else x)
528
+ df_2024_stuff_daily_pitch_50_pivot = df_2024_stuff_daily_pitch_50_pivot.reset_index()
529
 
530
+ return render.DataGrid(
531
+ df_2024_stuff_daily_pitch_50_pivot,
532
+ width='fit-content',
533
+ height=750,
534
+ filters=True)
535
 
 
 
 
 
536
 
 
 
 
 
537
 
 
 
 
 
 
538
 
 
 
 
539
 
 
 
540
 
 
 
 
541
 
 
542
 
 
 
 
 
 
 
 
 
 
 
 
543
 
544
+ app = App(app_ui, server)
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
tjstuff_model_20240123.joblib ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:f9b4a969c59aee26f0ee612edbd5b376fb96f949cbdf00a2a61a67e50b65e606
3
+ size 121729