nesticot commited on
Commit
6e4417c
1 Parent(s): e6a725b

Upload 23 files

Browse files
Dockerfile CHANGED
@@ -1,20 +1,16 @@
1
  FROM python:3.9
2
 
3
- WORKDIR /code
 
 
4
 
5
- COPY ./requirements.txt /code/requirements.txt
 
 
 
6
 
7
- RUN pip install --no-cache-dir --upgrade -r /code/requirements.txt
 
8
 
9
- # Switch to the "user" user
10
- RUN useradd -m -u 1000 user
11
- USER user
12
- ENV HOME=/home/user \
13
- PATH=/home/user/.local/bin:$PATH
14
-
15
-
16
- COPY . .
17
-
18
- EXPOSE 7860
19
-
20
- CMD ["shiny", "run", "app.py", "--host", "0.0.0.0", "--port", "7860"]
 
1
  FROM python:3.9
2
 
3
+ # Install dependencies
4
+ COPY requirements.txt /app/
5
+ RUN pip install -r /app/requirements.txt
6
 
7
+ # Copy app files
8
+ COPY app_name /app/app_name/
9
+ COPY static/ /app/static/
10
+ COPY templates/ /app/templates/
11
 
12
+ # Set working directory
13
+ WORKDIR /app/app_name
14
 
15
+ # Set the command to run the app
16
+ CMD ["python", "app.py"]
 
 
 
 
 
 
 
 
 
 
__pycache__/api_scraper.cpython-39.pyc ADDED
Binary file (22.8 kB). View file
 
__pycache__/app.cpython-39.pyc CHANGED
Binary files a/__pycache__/app.cpython-39.pyc and b/__pycache__/app.cpython-39.pyc differ
 
__pycache__/pitch_summary_functions.cpython-39.pyc ADDED
Binary file (26.7 kB). View file
 
api_scraper.py ADDED
@@ -0,0 +1,747 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import requests
2
+ import pandas as pd
3
+ import numpy as np
4
+ from datetime import datetime
5
+ from tqdm import tqdm
6
+ import time
7
+ from pytz import timezone
8
+
9
+
10
+ class MLB_Scrape:
11
+
12
+ # def __init__(self):
13
+ # # Initialize your class here if needed
14
+ # pass
15
+
16
+ def get_sport_id(self):
17
+ df = pd.DataFrame(requests.get(url=f'https://statsapi.mlb.com/api/v1/sports').json()['sports']).set_index('id')
18
+ return df
19
+
20
+ def get_sport_id_check(self,sport_id):
21
+ sport_id_df = self.get_sport_id()
22
+ if sport_id not in sport_id_df.index:
23
+ print('Please Select a New Sport ID from the following')
24
+ print(sport_id_df)
25
+ return False
26
+ return True
27
+
28
+ def get_schedule(self,year_input=2023,
29
+ sport_id=1,
30
+ start_date='YYYY-MM-DD',
31
+ end_date='YYYY-MM-DD',
32
+ final=True,
33
+ regular=True,
34
+ spring=False):
35
+ # Get MLB Schedule
36
+
37
+ if not self.get_sport_id_check(sport_id=sport_id):
38
+ return
39
+ if regular == True:
40
+ game_call = requests.get(url=f'https://statsapi.mlb.com/api/v1/schedule/?sportId={sport_id}&gameTypes=R&season={year_input}&hydrate=lineup,players').json()
41
+ print(f'https://statsapi.mlb.com/api/v1/schedule/?sportId={sport_id}&gameTypes=R&season={year_input}&hydrate=lineup,players')
42
+ elif spring == True:
43
+ print('spring')
44
+ game_call = requests.get(url=f'https://statsapi.mlb.com/api/v1/schedule/?sportId={sport_id}&gameTypes=S&season={year_input}&hydrate=lineup,players').json()
45
+ print(f'https://statsapi.mlb.com/api/v1/schedule/?sportId={sport_id}&gameTypes=S&season={year_input}&hydrate=lineup,players')
46
+ else:
47
+ game_call = requests.get(url=f'https://statsapi.mlb.com/api/v1/schedule/?sportId={sport_id}&season={year_input}&hydrate=lineup,players').json()
48
+
49
+ # Grab data from MLB Schedule (game id, away, home, state)
50
+ game_list = [item for sublist in [[y['gamePk'] for y in x['games']] for x in game_call['dates']] for item in sublist]
51
+ time_list = [item for sublist in [[y['gameDate'] for y in x['games']] for x in game_call['dates']] for item in sublist]
52
+ date_list = [item for sublist in [[y['officialDate'] for y in x['games']] for x in game_call['dates']] for item in sublist]
53
+ away_team_list = [item for sublist in [[y['teams']['away']['team']['name'] for y in x['games']] for x in game_call['dates']] for item in sublist]
54
+ home_team_list = [item for sublist in [[y['teams']['home']['team']['name'] for y in x['games']] for x in game_call['dates']] for item in sublist]
55
+ state_list = [item for sublist in [[y['status']['codedGameState'] for y in x['games']] for x in game_call['dates']] for item in sublist]
56
+ venue_id = [item for sublist in [[y['venue']['id'] for y in x['games']] for x in game_call['dates']] for item in sublist]
57
+ venue_name = [item for sublist in [[y['venue']['name'] for y in x['games']] for x in game_call['dates']] for item in sublist]
58
+
59
+ game_df = pd.DataFrame(data={'game_id':game_list,
60
+ 'time':time_list,
61
+ 'date':date_list,
62
+ 'away':away_team_list,
63
+ 'home':home_team_list,
64
+ 'state':state_list,
65
+ 'venue_id':venue_id,
66
+ 'venue_name':venue_name})
67
+
68
+ # game_list = [item for sublist in [[y['gamePk'] for y in x['games']] for x in game_call['dates']] for item in sublist]
69
+ # date_list = [item for sublist in [[y['officialDate'] for y in x['games']] for x in game_call['dates']] for item in sublist]
70
+ # cancel_list = [item for sublist in [[y['status']['codedGameState'] for y in x['games']] for x in game_call['dates']] for item in sublist]
71
+ # game_df = pd.DataFrame(data={'game_id':game_list,'date':date_list,'state':cancel_list})
72
+ #game_df = pd.concat([game_df,game_df])
73
+ if len(game_df) == 0:
74
+ return 'Schedule Length of 0, please select different parameters.'
75
+
76
+ game_df['date'] = pd.to_datetime(game_df['date']).dt.date
77
+ #game_df['time'] = game_df['time'].dt.tz_localize('UTC')
78
+ #game_df['time'] = game_df['time'].dt.tz_localize('UTC')
79
+ game_df['time'] = pd.to_datetime(game_df['time'])
80
+ eastern = timezone('US/Eastern')
81
+ game_df['time'] = game_df['time'].dt.tz_convert(eastern)
82
+ game_df['time'] = game_df['time'].dt.strftime("%I:%M %p EST")#.dt.time
83
+
84
+ if not start_date == 'YYYY-MM-DD' or not end_date == 'YYYY-MM-DD':
85
+ try:
86
+ start_date = datetime.strptime(start_date, "%Y-%m-%d").date()
87
+ end_date = datetime.strptime(end_date, "%Y-%m-%d").date()
88
+ game_df = game_df[(game_df['date'] >= start_date) & (game_df['date'] <= end_date)]
89
+
90
+ except ValueError:
91
+ return 'Please use YYYY-MM-DD Format for Start and End Dates'
92
+ if final:
93
+ game_df = game_df[game_df['state'] == 'F'].drop_duplicates(subset='game_id').reset_index(drop=True)
94
+
95
+ game_df = game_df.drop_duplicates(subset='game_id').reset_index(drop=True)
96
+
97
+ if len(game_df) == 0:
98
+ return 'Schedule Length of 0, please select different parameters.'
99
+
100
+ return game_df
101
+
102
+ def get_data(self,game_list_input = [748540]):
103
+ data_total = []
104
+ #n_count = 0
105
+ print('This May Take a While. Progress Bar shows Completion of Data Retrieval.')
106
+ for i in tqdm(range(len(game_list_input)), desc="Processing", unit="iteration"):
107
+ #for game_id_select in game_list:
108
+ # if n_count%50 == 0:
109
+ # print(n_count)
110
+ r = requests.get(f'https://statsapi.mlb.com/api/v1.1/game/{game_list_input[i]}/feed/live')
111
+ data_total.append(r.json())
112
+ #n_count = n_count + 1
113
+ return data_total
114
+
115
+ def get_data_df(self,data_list):
116
+
117
+ swing_list = ['X','F','S','D','E','T','W']
118
+ whiff_list = ['S','T','W']
119
+ print('Converting Data to Dataframe.')
120
+ game_id = []
121
+ game_date = []
122
+ batter_id = []
123
+ batter_name = []
124
+ batter_hand = []
125
+ batter_team = []
126
+ batter_team_id = []
127
+ pitcher_id = []
128
+ pitcher_name = []
129
+ pitcher_hand = []
130
+ pitcher_team = []
131
+ pitcher_team_id = []
132
+
133
+ play_description = []
134
+ play_code = []
135
+ in_play = []
136
+ is_strike = []
137
+ is_swing = []
138
+ is_whiff = []
139
+ is_out = []
140
+ is_ball = []
141
+ is_review = []
142
+ pitch_type = []
143
+ pitch_description = []
144
+ strikes = []
145
+ balls = []
146
+ outs = []
147
+
148
+ start_speed = []
149
+ end_speed = []
150
+ sz_top = []
151
+ sz_bot = []
152
+ x = []
153
+ y = []
154
+ ax = []
155
+ ay = []
156
+ az = []
157
+ pfxx = []
158
+ pfxz = []
159
+ px = []
160
+ pz = []
161
+ vx0 = []
162
+ vy0 = []
163
+ vz0 = []
164
+ x0 = []
165
+ y0 = []
166
+ z0 = []
167
+ zone = []
168
+ type_confidence = []
169
+ plate_time = []
170
+ extension = []
171
+ spin_rate = []
172
+ spin_direction = []
173
+ ivb = []
174
+ hb = []
175
+
176
+ launch_speed = []
177
+ launch_angle = []
178
+ launch_distance = []
179
+ launch_location = []
180
+ trajectory = []
181
+ hardness = []
182
+ hit_x = []
183
+ hit_y = []
184
+
185
+ index_play = []
186
+ play_id = []
187
+ start_time = []
188
+ end_time = []
189
+ is_pitch = []
190
+ type_type = []
191
+
192
+
193
+ type_ab = []
194
+ ab_number = []
195
+ event = []
196
+ event_type = []
197
+ rbi = []
198
+ away_score = []
199
+ home_score = []
200
+
201
+ #data[0]['liveData']['plays']['allPlays'][32]['playEvents'][-1]['details']['call']['code'] in ['VP']
202
+
203
+ for data in data_list:
204
+ for ab_id in range(len(data['liveData']['plays']['allPlays'])):
205
+ ab_list = data['liveData']['plays']['allPlays'][ab_id]
206
+ for n in range(len(ab_list['playEvents'])):
207
+ if ab_list['playEvents'][n]['isPitch'] == True or 'call' in ab_list['playEvents'][n]['details']:
208
+
209
+ game_id.append(data['gamePk'])
210
+ game_date.append(data['gameData']['datetime']['officialDate'])
211
+ if 'matchup' in ab_list:
212
+ batter_id.append(ab_list['matchup']['batter']['id'] if 'batter' in ab_list['matchup'] else np.nan)
213
+ if 'batter' in ab_list['matchup']:
214
+ batter_name.append(ab_list['matchup']['batter']['fullName'] if 'fullName' in ab_list['matchup']['batter'] else np.nan)
215
+ else:
216
+ batter_name.append(np.nan)
217
+
218
+ batter_hand.append(ab_list['matchup']['batSide']['code'] if 'batSide' in ab_list['matchup'] else np.nan)
219
+ pitcher_id.append(ab_list['matchup']['pitcher']['id'] if 'pitcher' in ab_list['matchup'] else np.nan)
220
+ if 'pitcher' in ab_list['matchup']:
221
+ pitcher_name.append(ab_list['matchup']['pitcher']['fullName'] if 'fullName' in ab_list['matchup']['pitcher'] else np.nan)
222
+ else:
223
+ pitcher_name.append(np.nan)
224
+ #pitcher_name.append(ab_list['matchup']['pitcher']['fullName'] if 'pitcher' in ab_list['matchup'] else np.nan)
225
+ pitcher_hand.append(ab_list['matchup']['pitchHand']['code'] if 'pitchHand' in ab_list['matchup'] else np.nan)
226
+
227
+
228
+ # batter_id.append(ab_list['matchup']['batter']['id'] if 'batter' in ab_list['matchup'] else np.nan)
229
+ # batter_name.append(ab_list['matchup']['batter']['fullName'] if 'batter' in ab_list['matchup'] else np.nan)
230
+ # batter_hand.append(ab_list['matchup']['batSide']['code'] if 'batSide' in ab_list['matchup'] else np.nan)
231
+ # pitcher_id.append(ab_list['matchup']['pitcher']['id'] if 'pitcher' in ab_list['matchup'] else np.nan)
232
+ # pitcher_name.append(ab_list['matchup']['pitcher']['fullName'] if 'pitcher' in ab_list['matchup'] else np.nan)
233
+ # pitcher_hand.append(ab_list['matchup']['pitchHand']['code'] if 'pitchHand' in ab_list['matchup'] else np.nan)
234
+
235
+ if ab_list['about']['isTopInning']:
236
+ batter_team.append(data['gameData']['teams']['away']['abbreviation'] if 'away' in data['gameData']['teams'] else np.nan)
237
+ batter_team_id.append(data['gameData']['teams']['away']['id'] if 'away' in data['gameData']['teams'] else np.nan)
238
+ pitcher_team.append(data['gameData']['teams']['home']['abbreviation'] if 'home' in data['gameData']['teams'] else np.nan)
239
+ pitcher_team_id.append(data['gameData']['teams']['away']['id'] if 'away' in data['gameData']['teams'] else np.nan)
240
+
241
+ else:
242
+ batter_team.append(data['gameData']['teams']['home']['abbreviation'] if 'home' in data['gameData']['teams'] else np.nan)
243
+ batter_team_id.append(data['gameData']['teams']['home']['id'] if 'home' in data['gameData']['teams'] else np.nan)
244
+ pitcher_team.append(data['gameData']['teams']['away']['abbreviation'] if 'away' in data['gameData']['teams'] else np.nan)
245
+ pitcher_team_id.append(data['gameData']['teams']['home']['id'] if 'home' in data['gameData']['teams'] else np.nan)
246
+
247
+ play_description.append(ab_list['playEvents'][n]['details']['description'] if 'description' in ab_list['playEvents'][n]['details'] else np.nan)
248
+ play_code.append(ab_list['playEvents'][n]['details']['code'] if 'code' in ab_list['playEvents'][n]['details'] else np.nan)
249
+ in_play.append(ab_list['playEvents'][n]['details']['isInPlay'] if 'isInPlay' in ab_list['playEvents'][n]['details'] else np.nan)
250
+ is_strike.append(ab_list['playEvents'][n]['details']['isStrike'] if 'isStrike' in ab_list['playEvents'][n]['details'] else np.nan)
251
+
252
+ if 'details' in ab_list['playEvents'][n]:
253
+ is_swing.append(True if ab_list['playEvents'][n]['details']['code'] in swing_list else np.nan)
254
+ is_whiff.append(True if ab_list['playEvents'][n]['details']['code'] in whiff_list else np.nan)
255
+ else:
256
+ is_swing.append(np.nan)
257
+ is_whiff.append(np.nan)
258
+
259
+ #is_out.append(ab_list['playEvents'][n]['details']['isBall'] if 'isBall' in ab_list['playEvents'][n]['details'] else np.nan)
260
+ is_ball.append(ab_list['playEvents'][n]['details']['isOut'] if 'isOut' in ab_list['playEvents'][n]['details'] else np.nan)
261
+ is_review.append(ab_list['playEvents'][n]['details']['hasReview'] if 'hasReview' in ab_list['playEvents'][n]['details'] else np.nan)
262
+ pitch_type.append(ab_list['playEvents'][n]['details']['type']['code'] if 'type' in ab_list['playEvents'][n]['details'] else np.nan)
263
+ pitch_description.append(ab_list['playEvents'][n]['details']['type']['description'] if 'type' in ab_list['playEvents'][n]['details'] else np.nan)
264
+
265
+ #if ab_list['playEvents'][n]['isPitch'] == True:
266
+ if ab_list['playEvents'][n]['pitchNumber'] == 1:
267
+ ab_number.append(ab_list['playEvents'][n]['atBatIndex'] if 'atBatIndex' in ab_list['playEvents'][n] else np.nan)
268
+ strikes.append(0)
269
+ balls.append(0)
270
+ outs.append(0)
271
+ else:
272
+ ab_number.append(ab_list['playEvents'][n]['atBatIndex'] if 'atBatIndex' in ab_list['playEvents'][n] else np.nan)
273
+ strikes.append(ab_list['playEvents'][n-1]['count']['strikes'] if 'strikes' in ab_list['playEvents'][n-1]['count'] else np.nan)
274
+ balls.append(ab_list['playEvents'][n-1]['count']['balls'] if 'balls' in ab_list['playEvents'][n-1]['count'] else np.nan)
275
+ outs.append(ab_list['playEvents'][n-1]['count']['outs'] if 'outs' in ab_list['playEvents'][n-1]['count'] else np.nan)
276
+
277
+ if 'pitchData' in ab_list['playEvents'][n]:
278
+
279
+ start_speed.append(ab_list['playEvents'][n]['pitchData']['startSpeed'] if 'startSpeed' in ab_list['playEvents'][n]['pitchData'] else np.nan)
280
+ end_speed.append(ab_list['playEvents'][n]['pitchData']['endSpeed'] if 'endSpeed' in ab_list['playEvents'][n]['pitchData'] else np.nan)
281
+
282
+ sz_top.append(ab_list['playEvents'][n]['pitchData']['strikeZoneTop'] if 'strikeZoneTop' in ab_list['playEvents'][n]['pitchData'] else np.nan)
283
+ sz_bot.append(ab_list['playEvents'][n]['pitchData']['strikeZoneBottom'] if 'strikeZoneBottom' in ab_list['playEvents'][n]['pitchData'] else np.nan)
284
+ x.append(ab_list['playEvents'][n]['pitchData']['coordinates']['x'] if 'x' in ab_list['playEvents'][n]['pitchData']['coordinates'] else np.nan)
285
+ y.append(ab_list['playEvents'][n]['pitchData']['coordinates']['y'] if 'y' in ab_list['playEvents'][n]['pitchData']['coordinates'] else np.nan)
286
+
287
+ ax.append(ab_list['playEvents'][n]['pitchData']['coordinates']['aX'] if 'aX' in ab_list['playEvents'][n]['pitchData']['coordinates'] else np.nan)
288
+ ay.append(ab_list['playEvents'][n]['pitchData']['coordinates']['aY'] if 'aY' in ab_list['playEvents'][n]['pitchData']['coordinates'] else np.nan)
289
+ az.append(ab_list['playEvents'][n]['pitchData']['coordinates']['aZ'] if 'aZ' in ab_list['playEvents'][n]['pitchData']['coordinates'] else np.nan)
290
+ pfxx.append(ab_list['playEvents'][n]['pitchData']['coordinates']['pfxX'] if 'pfxX' in ab_list['playEvents'][n]['pitchData']['coordinates'] else np.nan)
291
+ pfxz.append(ab_list['playEvents'][n]['pitchData']['coordinates']['pfxZ'] if 'pfxZ' in ab_list['playEvents'][n]['pitchData']['coordinates'] else np.nan)
292
+ px.append(ab_list['playEvents'][n]['pitchData']['coordinates']['pX'] if 'pX' in ab_list['playEvents'][n]['pitchData']['coordinates'] else np.nan)
293
+ pz.append(ab_list['playEvents'][n]['pitchData']['coordinates']['pZ'] if 'pZ' in ab_list['playEvents'][n]['pitchData']['coordinates'] else np.nan)
294
+ vx0.append(ab_list['playEvents'][n]['pitchData']['coordinates']['vX0'] if 'vX0' in ab_list['playEvents'][n]['pitchData']['coordinates'] else np.nan)
295
+ vy0.append(ab_list['playEvents'][n]['pitchData']['coordinates']['vY0'] if 'vY0' in ab_list['playEvents'][n]['pitchData']['coordinates'] else np.nan)
296
+ vz0.append(ab_list['playEvents'][n]['pitchData']['coordinates']['vZ0'] if 'vZ0' in ab_list['playEvents'][n]['pitchData']['coordinates'] else np.nan)
297
+ x0.append(ab_list['playEvents'][n]['pitchData']['coordinates']['x0'] if 'x0' in ab_list['playEvents'][n]['pitchData']['coordinates'] else np.nan)
298
+ y0.append(ab_list['playEvents'][n]['pitchData']['coordinates']['y0'] if 'y0' in ab_list['playEvents'][n]['pitchData']['coordinates'] else np.nan)
299
+ z0.append(ab_list['playEvents'][n]['pitchData']['coordinates']['z0'] if 'z0' in ab_list['playEvents'][n]['pitchData']['coordinates'] else np.nan)
300
+
301
+ zone.append(ab_list['playEvents'][n]['pitchData']['zone'] if 'zone' in ab_list['playEvents'][n]['pitchData'] else np.nan)
302
+ type_confidence.append(ab_list['playEvents'][n]['pitchData']['typeConfidence'] if 'typeConfidence' in ab_list['playEvents'][n]['pitchData'] else np.nan)
303
+ plate_time.append(ab_list['playEvents'][n]['pitchData']['plateTime'] if 'plateTime' in ab_list['playEvents'][n]['pitchData'] else np.nan)
304
+ extension.append(ab_list['playEvents'][n]['pitchData']['extension'] if 'extension' in ab_list['playEvents'][n]['pitchData'] else np.nan)
305
+
306
+ if 'breaks' in ab_list['playEvents'][n]['pitchData']:
307
+ spin_rate.append(ab_list['playEvents'][n]['pitchData']['breaks']['spinRate'] if 'spinRate' in ab_list['playEvents'][n]['pitchData']['breaks'] else np.nan)
308
+ spin_direction.append(ab_list['playEvents'][n]['pitchData']['breaks']['spinDirection'] if 'spinDirection' in ab_list['playEvents'][n]['pitchData']['breaks'] else np.nan)
309
+ ivb.append(ab_list['playEvents'][n]['pitchData']['breaks']['breakVerticalInduced'] if 'breakVerticalInduced' in ab_list['playEvents'][n]['pitchData']['breaks'] else np.nan)
310
+ hb.append(ab_list['playEvents'][n]['pitchData']['breaks']['breakHorizontal'] if 'breakHorizontal' in ab_list['playEvents'][n]['pitchData']['breaks'] else np.nan)
311
+
312
+ else:
313
+ start_speed.append(np.nan)
314
+ end_speed.append(np.nan)
315
+
316
+ sz_top.append(np.nan)
317
+ sz_bot.append(np.nan)
318
+ x.append(np.nan)
319
+ y.append(np.nan)
320
+
321
+ ax.append(np.nan)
322
+ ay.append(np.nan)
323
+ az.append(np.nan)
324
+ pfxx.append(np.nan)
325
+ pfxz.append(np.nan)
326
+ px.append(np.nan)
327
+ pz.append(np.nan)
328
+ vx0.append(np.nan)
329
+ vy0.append(np.nan)
330
+ vz0.append(np.nan)
331
+ x0.append(np.nan)
332
+ y0.append(np.nan)
333
+ z0.append(np.nan)
334
+
335
+ zone.append(np.nan)
336
+ type_confidence.append(np.nan)
337
+ plate_time.append(np.nan)
338
+ extension.append(np.nan)
339
+ spin_rate.append(np.nan)
340
+ spin_direction.append(np.nan)
341
+ ivb.append(np.nan)
342
+ hb.append(np.nan)
343
+
344
+ if 'hitData' in ab_list['playEvents'][n]:
345
+ launch_speed.append(ab_list['playEvents'][n]['hitData']['launchSpeed'] if 'launchSpeed' in ab_list['playEvents'][n]['hitData'] else np.nan)
346
+ launch_angle.append(ab_list['playEvents'][n]['hitData']['launchAngle'] if 'launchAngle' in ab_list['playEvents'][n]['hitData'] else np.nan)
347
+ launch_distance.append(ab_list['playEvents'][n]['hitData']['totalDistance'] if 'totalDistance' in ab_list['playEvents'][n]['hitData'] else np.nan)
348
+ launch_location.append(ab_list['playEvents'][n]['hitData']['location'] if 'location' in ab_list['playEvents'][n]['hitData'] else np.nan)
349
+
350
+ trajectory.append(ab_list['playEvents'][n]['hitData']['trajectory'] if 'trajectory' in ab_list['playEvents'][n]['hitData'] else np.nan)
351
+ hardness.append(ab_list['playEvents'][n]['hitData']['hardness'] if 'hardness' in ab_list['playEvents'][n]['hitData'] else np.nan)
352
+ hit_x.append(ab_list['playEvents'][n]['hitData']['coordinates']['coordX'] if 'coordX' in ab_list['playEvents'][n]['hitData']['coordinates'] else np.nan)
353
+ hit_y.append(ab_list['playEvents'][n]['hitData']['coordinates']['coordY'] if 'coordY' in ab_list['playEvents'][n]['hitData']['coordinates'] else np.nan)
354
+ else:
355
+ launch_speed.append(np.nan)
356
+ launch_angle.append(np.nan)
357
+ launch_distance.append(np.nan)
358
+ launch_location.append(np.nan)
359
+ trajectory.append(np.nan)
360
+ hardness.append(np.nan)
361
+ hit_x.append(np.nan)
362
+ hit_y.append(np.nan)
363
+
364
+ index_play.append(ab_list['playEvents'][n]['index'] if 'index' in ab_list['playEvents'][n] else np.nan)
365
+ play_id.append(ab_list['playEvents'][n]['playId'] if 'playId' in ab_list['playEvents'][n] else np.nan)
366
+ start_time.append(ab_list['playEvents'][n]['startTime'] if 'startTime' in ab_list['playEvents'][n] else np.nan)
367
+ end_time.append(ab_list['playEvents'][n]['endTime'] if 'endTime' in ab_list['playEvents'][n] else np.nan)
368
+ is_pitch.append(ab_list['playEvents'][n]['isPitch'] if 'isPitch' in ab_list['playEvents'][n] else np.nan)
369
+ type_type.append(ab_list['playEvents'][n]['type'] if 'type' in ab_list['playEvents'][n] else np.nan)
370
+
371
+
372
+
373
+ if n == len(ab_list['playEvents']) - 1 :
374
+
375
+ type_ab.append(data['liveData']['plays']['allPlays'][ab_id]['result']['type'] if 'type' in data['liveData']['plays']['allPlays'][ab_id]['result'] else np.nan)
376
+ event.append(data['liveData']['plays']['allPlays'][ab_id]['result']['event'] if 'event' in data['liveData']['plays']['allPlays'][ab_id]['result'] else np.nan)
377
+ event_type.append(data['liveData']['plays']['allPlays'][ab_id]['result']['eventType'] if 'eventType' in data['liveData']['plays']['allPlays'][ab_id]['result'] else np.nan)
378
+ rbi.append(data['liveData']['plays']['allPlays'][ab_id]['result']['rbi'] if 'rbi' in data['liveData']['plays']['allPlays'][ab_id]['result'] else np.nan)
379
+ away_score.append(data['liveData']['plays']['allPlays'][ab_id]['result']['awayScore'] if 'awayScore' in data['liveData']['plays']['allPlays'][ab_id]['result'] else np.nan)
380
+ home_score.append(data['liveData']['plays']['allPlays'][ab_id]['result']['homeScore'] if 'homeScore' in data['liveData']['plays']['allPlays'][ab_id]['result'] else np.nan)
381
+ is_out.append(data['liveData']['plays']['allPlays'][ab_id]['result']['isOut'] if 'isOut' in data['liveData']['plays']['allPlays'][ab_id]['result'] else np.nan)
382
+
383
+ else:
384
+
385
+ type_ab.append(np.nan)
386
+ event.append(np.nan)
387
+ event_type.append(np.nan)
388
+ rbi.append(np.nan)
389
+ away_score.append(np.nan)
390
+ home_score.append(np.nan)
391
+ is_out.append(np.nan)
392
+
393
+ elif ab_list['playEvents'][n]['count']['balls'] == 4:
394
+
395
+ event.append(data['liveData']['plays']['allPlays'][ab_id]['result']['event'])
396
+ event_type.append(data['liveData']['plays']['allPlays'][ab_id]['result']['eventType'])
397
+
398
+
399
+ game_id.append(data['gamePk'])
400
+ game_date.append(data['gameData']['datetime']['officialDate'])
401
+ batter_id.append(ab_list['matchup']['batter']['id'] if 'batter' in ab_list['matchup'] else np.nan)
402
+ batter_name.append(ab_list['matchup']['batter']['fullName'] if 'batter' in ab_list['matchup'] else np.nan)
403
+ batter_hand.append(ab_list['matchup']['batSide']['code'] if 'batSide' in ab_list['matchup'] else np.nan)
404
+ pitcher_id.append(ab_list['matchup']['pitcher']['id'] if 'pitcher' in ab_list['matchup'] else np.nan)
405
+ pitcher_name.append(ab_list['matchup']['pitcher']['fullName'] if 'pitcher' in ab_list['matchup'] else np.nan)
406
+ pitcher_hand.append(ab_list['matchup']['pitchHand']['code'] if 'pitchHand' in ab_list['matchup'] else np.nan)
407
+ if ab_list['about']['isTopInning']:
408
+ batter_team.append(data['gameData']['teams']['away']['abbreviation'] if 'away' in data['gameData']['teams'] else np.nan)
409
+ batter_team_id.append(data['gameData']['teams']['away']['id'] if 'away' in data['gameData']['teams'] else np.nan)
410
+ pitcher_team.append(data['gameData']['teams']['home']['abbreviation'] if 'home' in data['gameData']['teams'] else np.nan)
411
+ pitcher_team_id.append(data['gameData']['teams']['away']['id'] if 'away' in data['gameData']['teams'] else np.nan)
412
+ else:
413
+ batter_team.append(data['gameData']['teams']['home']['abbreviation'] if 'home' in data['gameData']['teams'] else np.nan)
414
+ batter_team_id.append(data['gameData']['teams']['home']['id'] if 'home' in data['gameData']['teams'] else np.nan)
415
+ pitcher_team.append(data['gameData']['teams']['away']['abbreviation'] if 'away' in data['gameData']['teams'] else np.nan)
416
+ pitcher_team_id.append(data['gameData']['teams']['home']['id'] if 'home' in data['gameData']['teams'] else np.nan)
417
+
418
+ play_description.append(np.nan)
419
+ play_code.append(np.nan)
420
+ in_play.append(np.nan)
421
+ is_strike.append(np.nan)
422
+ is_ball.append(np.nan)
423
+ is_review.append(np.nan)
424
+ pitch_type.append(np.nan)
425
+ pitch_description.append(np.nan)
426
+ strikes.append(ab_list['playEvents'][n]['count']['balls'] if 'balls' in ab_list['playEvents'][n]['count'] else np.nan)
427
+ balls.append(ab_list['playEvents'][n]['count']['strikes'] if 'strikes' in ab_list['playEvents'][n]['count'] else np.nan)
428
+ outs.append(ab_list['playEvents'][n]['count']['outs'] if 'outs' in ab_list['playEvents'][n]['count'] else np.nan)
429
+ index_play.append(ab_list['playEvents'][n]['index'] if 'index' in ab_list['playEvents'][n] else np.nan)
430
+ play_id.append(ab_list['playEvents'][n]['playId'] if 'playId' in ab_list['playEvents'][n] else np.nan)
431
+ start_time.append(ab_list['playEvents'][n]['startTime'] if 'startTime' in ab_list['playEvents'][n] else np.nan)
432
+ end_time.append(ab_list['playEvents'][n]['endTime'] if 'endTime' in ab_list['playEvents'][n] else np.nan)
433
+ is_pitch.append(ab_list['playEvents'][n]['isPitch'] if 'isPitch' in ab_list['playEvents'][n] else np.nan)
434
+ type_type.append(ab_list['playEvents'][n]['type'] if 'type' in ab_list['playEvents'][n] else np.nan)
435
+
436
+
437
+
438
+ is_swing.append(np.nan)
439
+ is_whiff.append(np.nan)
440
+ start_speed.append(np.nan)
441
+ end_speed.append(np.nan)
442
+ sz_top.append(np.nan)
443
+ sz_bot.append(np.nan)
444
+ x.append(np.nan)
445
+ y.append(np.nan)
446
+ ax.append(np.nan)
447
+ ay.append(np.nan)
448
+ az.append(np.nan)
449
+ pfxx.append(np.nan)
450
+ pfxz.append(np.nan)
451
+ px.append(np.nan)
452
+ pz.append(np.nan)
453
+ vx0.append(np.nan)
454
+ vy0.append(np.nan)
455
+ vz0.append(np.nan)
456
+ x0.append(np.nan)
457
+ y0.append(np.nan)
458
+ z0.append(np.nan)
459
+ zone.append(np.nan)
460
+ type_confidence.append(np.nan)
461
+ plate_time.append(np.nan)
462
+ extension.append(np.nan)
463
+ spin_rate.append(np.nan)
464
+ spin_direction.append(np.nan)
465
+ ivb.append(np.nan)
466
+ hb.append(np.nan)
467
+ launch_speed.append(np.nan)
468
+ launch_angle.append(np.nan)
469
+ launch_distance.append(np.nan)
470
+ launch_location.append(np.nan)
471
+ trajectory.append(np.nan)
472
+ hardness.append(np.nan)
473
+ hit_x.append(np.nan)
474
+ hit_y.append(np.nan)
475
+ type_ab.append(np.nan)
476
+ ab_number.append(np.nan)
477
+
478
+ rbi.append(np.nan)
479
+ away_score.append(np.nan)
480
+ home_score.append(np.nan)
481
+ is_out.append(np.nan)
482
+ print({
483
+ 'game_id':len(game_id),
484
+ 'game_date':len(game_date),
485
+ 'batter_id':len(batter_id),
486
+ 'batter_name':len(batter_name),
487
+ 'batter_hand':len(batter_hand),
488
+ 'batter_team':len(batter_team),
489
+ 'batter_team_id':len(batter_team_id),
490
+ 'pitcher_id':len(pitcher_id),
491
+ 'pitcher_name':len(pitcher_name),
492
+ 'pitcher_hand':len(pitcher_hand),
493
+ 'pitcher_team':len(pitcher_team),
494
+ 'pitcher_team_id':len(pitcher_team_id),
495
+ 'play_description':len(play_description),
496
+ 'play_code':len(play_code),
497
+ 'in_play':len(in_play),
498
+ 'is_strike':len(is_strike),
499
+ 'is_swing':len(is_swing),
500
+ 'is_whiff':len(is_whiff),
501
+ 'is_out':len(is_out),
502
+ 'is_ball':len(is_ball),
503
+ 'is_review':len(is_review),
504
+ 'pitch_type':len(pitch_type),
505
+ 'pitch_description':len(pitch_description),
506
+ 'strikes':len(strikes),
507
+ 'balls':len(balls),
508
+ 'outs':len(outs),
509
+ 'start_speed':len(start_speed),
510
+ 'end_speed':len(end_speed),
511
+ 'sz_top':len(sz_top),
512
+ 'sz_bot':len(sz_bot),
513
+ 'x':len(x),
514
+ 'y':len(y),
515
+ 'ax':len(ax),
516
+ 'ay':len(ay),
517
+ 'az':len(az),
518
+ 'pfxx':len(pfxx),
519
+ 'pfxz':len(pfxz),
520
+ 'px':len(px),
521
+ 'pz':len(pz),
522
+ 'vx0':len(vx0),
523
+ 'vy0':len(vy0),
524
+ 'vz0':len(vz0),
525
+ 'x0':len(x0),
526
+ 'y0':len(y0),
527
+ 'z0':len(z0),
528
+ 'zone':len(zone),
529
+ 'type_confidence':len(type_confidence),
530
+ 'plate_time':len(plate_time),
531
+ 'extension':len(extension),
532
+ 'spin_rate':len(spin_rate),
533
+ 'spin_direction':len(spin_direction),
534
+ 'ivb':len(ivb),
535
+ 'hb':len(hb),
536
+ 'launch_speed':len(launch_speed),
537
+ 'launch_angle':len(launch_angle),
538
+ 'launch_distance':len(launch_distance),
539
+ 'launch_location':len(launch_location),
540
+ 'trajectory':len(trajectory),
541
+ 'hardness':len(hardness),
542
+ 'hit_x':len(hit_x),
543
+ 'hit_y':len(hit_y),
544
+ 'index_play':len(index_play),
545
+ 'play_id':len(play_id),
546
+ 'start_time':len(start_time),
547
+ 'end_time':len(end_time),
548
+ 'is_pitch':len(is_pitch),
549
+ 'type_type':len(type_type),
550
+ 'type_ab':len(type_ab),
551
+ 'event':len(event),
552
+ 'event_type':len(event_type),
553
+ 'rbi':len(rbi),
554
+ 'away_score':len(away_score),
555
+ 'home_score':len(home_score),
556
+ }
557
+
558
+
559
+ )
560
+ df = pd.DataFrame(data={
561
+ 'game_id':game_id,
562
+ 'game_date':game_date,
563
+ 'batter_id':batter_id,
564
+ 'batter_name':batter_name,
565
+ 'batter_hand':batter_hand,
566
+ 'batter_team':batter_team,
567
+ 'batter_team_id':batter_team_id,
568
+ 'pitcher_id':pitcher_id,
569
+ 'pitcher_name':pitcher_name,
570
+ 'pitcher_hand':pitcher_hand,
571
+ 'pitcher_team':pitcher_team,
572
+ 'pitcher_team_id':pitcher_team_id,
573
+ 'play_description':play_description,
574
+ 'play_code':play_code,
575
+ 'in_play':in_play,
576
+ 'is_strike':is_strike,
577
+ 'is_swing':is_swing,
578
+ 'is_whiff':is_whiff,
579
+ 'is_out':is_out,
580
+ 'is_ball':is_ball,
581
+ 'is_review':is_review,
582
+ 'pitch_type':pitch_type,
583
+ 'pitch_description':pitch_description,
584
+ 'strikes':strikes,
585
+ 'balls':balls,
586
+ 'outs':outs,
587
+ 'start_speed':start_speed,
588
+ 'end_speed':end_speed,
589
+ 'sz_top':sz_top,
590
+ 'sz_bot':sz_bot,
591
+ 'x':x,
592
+ 'y':y,
593
+ 'ax':ax,
594
+ 'ay':ay,
595
+ 'az':az,
596
+ 'pfxx':pfxx,
597
+ 'pfxz':pfxz,
598
+ 'px':px,
599
+ 'pz':pz,
600
+ 'vx0':vx0,
601
+ 'vy0':vy0,
602
+ 'vz0':vz0,
603
+ 'x0':x0,
604
+ 'y0':y0,
605
+ 'z0':z0,
606
+ 'zone':zone,
607
+ 'type_confidence':type_confidence,
608
+ 'plate_time':plate_time,
609
+ 'extension':extension,
610
+ 'spin_rate':spin_rate,
611
+ 'spin_direction':spin_direction,
612
+ 'ivb':ivb,
613
+ 'hb':hb,
614
+ 'launch_speed':launch_speed,
615
+ 'launch_angle':launch_angle,
616
+ 'launch_distance':launch_distance,
617
+ 'launch_location':launch_location,
618
+ 'trajectory':trajectory,
619
+ 'hardness':hardness,
620
+ 'hit_x':hit_x,
621
+ 'hit_y':hit_y,
622
+ 'index_play':index_play,
623
+ 'play_id':play_id,
624
+ 'start_time':start_time,
625
+ 'end_time':end_time,
626
+ 'is_pitch':is_pitch,
627
+ 'type_type':type_type,
628
+ 'type_ab':type_ab,
629
+ 'event':event,
630
+ 'event_type':event_type,
631
+ 'rbi':rbi,
632
+ 'away_score':away_score,
633
+ 'home_score':home_score,
634
+
635
+ }
636
+ )
637
+ return df
638
+
639
+ def get_players(self,sport_id=1):
640
+ player_data = requests.get(url=f'https://statsapi.mlb.com/api/v1/sports/{sport_id}/players').json()
641
+
642
+ #Select relevant data that will help distinguish players from one another
643
+ fullName_list = [x['fullName'] for x in player_data['people']]
644
+ id_list = [x['id'] for x in player_data['people']]
645
+ position_list = [x['primaryPosition']['abbreviation'] for x in player_data['people']]
646
+ team_list = [x['currentTeam']['id']for x in player_data['people']]
647
+ age_list = [x['currentAge']for x in player_data['people']]
648
+
649
+ player_df = pd.DataFrame(data={'player_id':id_list,
650
+ 'name':fullName_list,
651
+ 'position':position_list,
652
+ 'team':team_list,
653
+ 'age':age_list})
654
+ return player_df
655
+
656
+ def get_teams(self):
657
+ teams = requests.get(url='https://statsapi.mlb.com/api/v1/teams/').json()
658
+ #Select only teams that are at the MLB level
659
+ # mlb_teams_city = [x['franchiseName'] for x in teams['teams'] if x['sport']['name'] == 'Major League Baseball']
660
+ # mlb_teams_name = [x['teamName'] for x in teams['teams'] if x['sport']['name'] == 'Major League Baseball']
661
+ # mlb_teams_franchise = [x['name'] for x in teams['teams'] if x['sport']['name'] == 'Major League Baseball']
662
+ # mlb_teams_id = [x['id'] for x in teams['teams'] if x['sport']['name'] == 'Major League Baseball']
663
+ # mlb_teams_abb = [x['abbreviation'] for x in teams['teams'] if x['sport']['name'] == 'Major League Baseball']
664
+
665
+ mlb_teams_city = [x['franchiseName'] if 'franchiseName' in x else None for x in teams['teams']]
666
+ mlb_teams_name = [x['teamName'] if 'franchiseName' in x else None for x in teams['teams']]
667
+ mlb_teams_franchise = [x['name'] if 'franchiseName' in x else None for x in teams['teams']]
668
+ mlb_teams_id = [x['id'] if 'franchiseName' in x else None for x in teams['teams']]
669
+ mlb_teams_abb = [x['abbreviation'] if 'franchiseName' in x else None for x in teams['teams']]
670
+ mlb_teams_parent_id = [x['parentOrgId'] if 'parentOrgId' in x else None for x in teams['teams']]
671
+ mlb_teams_parent = [x['parentOrgName'] if 'parentOrgName' in x else None for x in teams['teams']]
672
+ mlb_teams_league_id = [x['league']['id'] if 'id' in x['league'] else None for x in teams['teams']]
673
+ mlb_teams_league_name = [x['league']['name'] if 'name' in x['league'] else None for x in teams['teams']]
674
+
675
+
676
+
677
+ #Create a dataframe of all the teams
678
+ mlb_teams_df = pd.DataFrame(data={'team_id':mlb_teams_id,
679
+ 'city':mlb_teams_franchise,
680
+ 'name':mlb_teams_name,
681
+ 'franchise':mlb_teams_franchise,
682
+ 'abbreviation':mlb_teams_abb,
683
+ 'parent_org_id':mlb_teams_parent_id,
684
+ 'parent_org':mlb_teams_parent,
685
+ 'league_id':mlb_teams_league_id,
686
+ 'league_name':mlb_teams_league_name
687
+
688
+ }).drop_duplicates().dropna(subset=['team_id']).reset_index(drop=True).sort_values('team_id')
689
+
690
+ mlb_teams_df.loc[mlb_teams_df['parent_org_id'].isnull(),'parent_org_id'] = mlb_teams_df.loc[mlb_teams_df['parent_org_id'].isnull(),'team_id']
691
+ mlb_teams_df.loc[mlb_teams_df['parent_org'].isnull(),'parent_org'] = mlb_teams_df.loc[mlb_teams_df['parent_org'].isnull(),'franchise']
692
+
693
+
694
+ mlb_teams_df['parent_org_abbreviation'] = mlb_teams_df['parent_org_id'].map(mlb_teams_df.set_index('team_id')['abbreviation'].to_dict())
695
+
696
+
697
+ #mlb_teams_df.loc[mlb_teams_df.franchise.isin(mlb_teams_df.parent_org.unique()),'parent_org'] = mlb_teams_df.loc[mlb_teams_df.franchise.isin(mlb_teams_df.parent_org.unique()),'franchise']
698
+
699
+ return mlb_teams_df
700
+
701
+ def get_leagues(self):
702
+ leagues = requests.get(url='https://statsapi.mlb.com/api/v1/leagues/').json()
703
+
704
+ sport_id = [x['sport']['id'] if 'sport' in x else None for x in leagues['leagues']]
705
+ league_id = [x['id'] if 'id' in x else None for x in leagues['leagues']]
706
+ league_name = [x['name'] if 'name' in x else None for x in leagues['leagues']]
707
+ league_abbreviation = [x['abbreviation'] if 'abbreviation' in x else None for x in leagues['leagues']]
708
+
709
+
710
+
711
+ leagues_df = pd.DataFrame(data= {
712
+ 'league_id':league_id,
713
+ 'league_name':league_name,
714
+ 'league_abbreviation':league_abbreviation,
715
+ 'sport_id':sport_id,
716
+ })
717
+
718
+ return leagues_df
719
+
720
+ def get_player_games_list(self,player_id=691587):
721
+ player_game_list = [x['game']['gamePk'] for x in requests.get(url=f'http://statsapi.mlb.com/api/v1/people/{player_id}?hydrate=stats(type=gameLog,season=2023),hydrations').json()['people'][0]['stats'][0]['splits']]
722
+ return player_game_list
723
+
724
+ def get_team_schedule(self,year=2023,sport_id=1,mlb_team='Toronto Blue Jays'):
725
+ if not self.get_sport_id_check(sport_id=sport_id):
726
+ print('Please Select a New Sport ID from the following')
727
+ print(self.get_sport_id())
728
+ return False, False
729
+
730
+ schedule_df = self.get_schedule(year_input=year,sport_id=sport_id)
731
+ teams_df = self.get_teams().merge(self.get_leagues()).merge(self.get_sport_id(),left_on=['sport_id'],right_index=True,suffixes=['','_sport'])
732
+ teams_df = teams_df[teams_df['sport_id'] == sport_id]
733
+ team_abb_select = teams_df[teams_df['parent_org'] == mlb_team]['abbreviation'].values[0]
734
+ team_name_select = teams_df[teams_df['parent_org'] == mlb_team]['franchise'].values[0]
735
+ schedule_df = schedule_df[((schedule_df.away == team_name_select) | (schedule_df.home == team_name_select)) & (schedule_df.state == 'F')].reset_index(drop=True)
736
+ return schedule_df,teams_df
737
+
738
+ def get_team_game_data(self,year=2023,sport_id=1,mlb_team='Toronto Blue Jays'):
739
+ schedule_df,teams_df = self.get_team_schedule(year=year,sport_id=sport_id,mlb_team=mlb_team)
740
+ if not schedule_df:
741
+ return
742
+ data = self.get_data(schedule_df['game_id'][:])
743
+ df = self.get_data_df(data_list = data)
744
+ df['mlb_team'] = teams_df[teams_df['parent_org'] == mlb_team]['parent_org_abbreviation'].values[0]
745
+ df['level'] = teams_df[teams_df['parent_org'] == mlb_team]['abbreviation_sport'].values[0]
746
+
747
+ return df
app.py CHANGED
@@ -1,551 +1,779 @@
1
- from shiny import ui, render, App
2
- import matplotlib.image as mpimg
3
  import pandas as pd
4
- import pygsheets
5
- import pytz
6
- from datetime import datetime
7
  import numpy as np
8
- import joblib
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
9
 
10
- import datasets
11
- from datasets import load_dataset
12
- dataset = load_dataset('nesticot/mlb_data', data_files=['mlb_pitch_data_2024_spring.csv'])
13
- dataset_train = dataset['train']
14
- df_2024 = dataset_train.to_pandas().set_index(list(dataset_train.features.keys())[0]).reset_index(drop=True)
15
 
 
 
 
 
 
16
 
17
- # print('Starting')
18
- # df_2024 = pd.read_csv('2024_spring_data.csv',index_col=[0])
19
- print('Starting')
20
  spring_teams = df_2024.groupby(['pitcher_id']).tail(1)[['pitcher_id','pitcher_team']].set_index(['pitcher_id'])['pitcher_team'].to_dict()
21
 
 
 
 
 
22
 
23
- df_2024['vy_f'] = -(df_2024['vy0']**2 - (2 * df_2024['ay'] * (df_2024['y0'] - 17/12)))**0.5
24
- df_2024['t'] = (df_2024['vy_f'] - df_2024['vy0']) / df_2024['ay']
25
- df_2024['vz_f'] = (df_2024['vz0']) + (df_2024['az'] * df_2024['t'])
26
- df_2024['vaa'] = -np.arctan(df_2024['vz_f'] / df_2024['vy_f']) * (180 / np.pi)
27
 
28
- #df_2024['vy_f'] = -(df_2024['vy0']**2 - (2 * df_2024['ay'] * (df_2024['y0'] - 17/12)))**0.5
29
- #df_2024['t'] = (df_2024['vy_f'] - df_2024['vy0']) / df_2024['ay']
30
- df_2024['vx_f'] = (df_2024['vx0']) + (df_2024['ax'] * df_2024['t'])
31
- df_2024['haa'] = -np.arctan(df_2024['vx_f'] / df_2024['vy_f']) * (180 / np.pi)
32
- grouped_ivb_2023 = pd.read_csv('2023_pitch_group_data.csv',index_col=[0,3])
33
 
34
- model = joblib.load('tjstuff_model_20240123.joblib')
 
35
 
36
 
37
- def percentile(n):
38
- def percentile_(x):
39
- return x.quantile(n)
40
- percentile_.__name__ = 'percentile_{:02.0f}'.format(n*100)
41
- return percentile_
42
 
43
- def df_clean(df):
44
- df_copy = df.copy()
45
- df_copy.loc[df_copy['pitcher_hand'] == 'L','hb'] *= -1
46
- df_copy.loc[df_copy['pitcher_hand'] == 'L','x0'] *= -1
47
- df_copy.loc[df_copy['pitcher_hand'] == 'L','spin_direction'] = 360 - df_copy.loc[df_copy['pitcher_hand'] == 'L','spin_direction']
48
 
49
- df_copy['pitch_l'] = [1 if x == 'L' else 0 for x in df_copy['pitcher_hand']]
50
- df_copy['bat_l'] = [1 if x == 'L' else 0 for x in df_copy['batter_hand']]
51
- df_copy = df_copy[~df_copy.pitch_type.isin(["EP", "PO", "KN", "FO", "CS", "SC", "FA"])].reset_index(drop=True)
52
- df_copy['pitch_type'] = df_copy['pitch_type'].replace({'FT':'SI','KC':'CU','ST':'SL','SV':'SL'})
53
 
54
- # df_copy['des_new'] = df_copy['play_description'].map(des_dict)
55
- # df_copy['ev_new'] = df_copy.loc[df_copy['des_new'] == 'hit_into_play','event_type'].map(ev_dict)
56
- # df_copy.loc[df_copy['des_new']=='hit_into_play','des_new'] = df_copy.loc[df_copy['des_new']=='hit_into_play','ev_new']
57
- # df_copy = df_copy.dropna(subset=['des_new'])
58
- # des_values = df_copy.groupby(['des_new'])['delta_run_exp'].mean()
59
- # df_copy = df_copy.merge(des_values,left_on='des_new',right_on='des_new',suffixes=['','_mean'])
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
60
 
61
- df_copy_fb_sum = df_copy[df_copy.pitch_type.isin(["FF", "FC", "SI"])].groupby(['pitcher_id']).agg(
62
- fb_velo = ('start_speed','mean'),
63
- fb_max_ivb = ('ivb',percentile(0.9)),
64
- fb_max_x = ('hb',percentile(0.9)),
65
- fb_min_x = ('hb',percentile(0.1)),
66
- fb_max_velo = ('start_speed',percentile(0.9)),
67
- fb_axis = ('spin_direction','mean'),
68
- )
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
69
 
70
- df_copy = df_copy.merge(df_copy_fb_sum,left_on='pitcher_id',right_index=True,how='left')
71
 
72
- df_copy['fb_velo_diff'] = df_copy['start_speed']- df_copy['fb_velo']
73
- df_copy['fb_max_ivb_diff'] = df_copy['ivb']- df_copy['fb_max_ivb']
74
- df_copy['fb_max_hb_diff'] = df_copy['hb']- df_copy['fb_max_x']
75
- df_copy['fb_min_hb_diff'] = df_copy['hb']- df_copy['fb_min_x']
76
- df_copy['fb_max_velo_diff'] = df_copy['start_speed']- df_copy['fb_max_velo']
77
- df_copy['fb_axis_diff'] = df_copy['spin_direction']- df_copy['fb_axis']
78
 
79
- # df_copy.loc[df_copy.pitch_type.isin(["FF", "FC", "SI"]),'fb_velo_diff'] = 0
80
- # df_copy.loc[df_copy.pitch_type.isin(["FF", "FC", "SI"]),'fb_max_ivb_diff'] = 0
81
- # df_copy.loc[df_copy.pitch_type.isin(["FF", "FC", "SI"]),'fb_max_hb_diff'] = 0
82
- # df_copy.loc[df_copy.pitch_type.isin(["FF", "FC", "SI"]),'fb_min_hb_diff'] = 0
83
- # df_copy.loc[df_copy.pitch_type.isin(["FF", "FC", "SI"]),'fb_max_velo_diff'] = 0
84
- # df_copy.loc[df_copy.pitch_type.isin(["FF", "FC", "SI"]),'fb_axis_diff'] = 0
85
 
86
 
87
- df_copy['max_speed'] = df_copy.groupby(['pitcher_id'])['start_speed'].transform('max')
88
- df_copy['max_speed_diff'] = df_copy['start_speed'] - df_copy['max_speed']
89
 
90
- df_copy['max_ivb'] = df_copy.groupby(['pitcher_id'])['ivb'].transform('max')
91
- df_copy['max_ivb_diff'] = df_copy['ivb'] - df_copy['max_ivb']
92
 
93
- df_copy['vy_f'] = -(df_copy['vy0']**2 - (2 * df_copy['ay'] * (df_copy['y0'] - 17/12)))**0.5
94
- df_copy['t'] = (df_copy['vy_f'] - df_copy['vy0']) / df_copy['ay']
95
- df_copy['vz_f'] = (df_copy['vz0']) + (df_copy['az'] * df_copy['t'])
96
- df_copy['vaa'] = -np.arctan(df_copy['vz_f'] / df_copy['vy_f']) * (180 / np.pi)
97
 
98
- #df_copy['vy_f'] = -(df_copy['vy0']**2 - (2 * df_copy['ay'] * (df_copy['y0'] - 17/12)))**0.5
99
- #df_copy['t'] = (df_copy['vy_f'] - df_copy['vy0']) / df_copy['ay']
100
- df_copy['vx_f'] = (df_copy['vx0']) + (df_copy['ax'] * df_copy['t'])
101
- df_copy['haa'] = -np.arctan(df_copy['vx_f'] / df_copy['vy_f']) * (180 / np.pi)
102
 
103
- # df_copy['x_diff'] = df_copy['x0'] - df_copy['px']
104
- # df_copy['z_diff'] = df_copy['z0'] - df_copy['pz']
105
 
106
- # df_copy['vaa'] = np.arctan(df_copy['z_diff'] / df_copy['release_pos_y']) * 360 / np.pi
107
- # df_copy['haa'] = np.arctan(-df_copy['x_diff'] / df_copy['release_pos_y']) * 360 / np.pi
108
 
109
- df_copy = df_copy.dropna(subset=['pitch_type']).fillna(0)
110
- return df_copy
111
 
 
 
112
 
113
- app_ui = ui.page_fluid(
114
- ui.layout_sidebar(
115
-
116
- ui.panel_sidebar(
117
- ui.input_date_range("date_range_id", "Date range input",start = df_2024.game_date.min(),
118
  end = df_2024.game_date.max(),width=2,min=df_2024.game_date.min(),
119
- max=df_2024.game_date.max()),width=2),
120
- ui.panel_main(
121
- ui.navset_tab(
122
- # ui.nav("Raw Data",
123
- # ui.output_data_frame("raw_table")),
124
- ui.nav("Pitch Data",
125
- ui.output_data_frame("table")),
126
- ui.nav("Pitch Data (Daily)",
127
- ui.output_data_frame("table_daily")),
128
- ui.nav("2023 vs Spring",
129
- ui.output_data_frame("table_2023")),
130
- ui.nav("2023 vs Spring Difference",
131
- ui.output_data_frame("table_difference")),
132
- # ui.nav("New Pitches",
133
- # ui.output_data_frame("table_new")),
134
- ui.nav("tjStuff+",
135
- ui.output_data_frame("table_stuff")),
136
- ui.nav("tjStuff+ (Daily)",
137
- ui.output_data_frame("table_stuff_day")),
138
-
139
- ))))
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
140
 
141
 
142
- from urllib.request import Request, urlopen
143
- from shiny import App, reactive, ui
144
- from shiny.ui import h2, tags
145
- # importing OpenCV(cv2) module
 
 
 
146
 
147
 
 
 
 
 
148
 
149
 
150
- #print(app_ui)
151
- def server(input, output, session):
 
 
152
 
153
- # @output
154
- # @render.data_frame
155
- # def raw_table():
156
 
157
- # return render.DataGrid(
158
- # df_2024,
159
- # width='fit-content',
160
- # height=750,
161
- # filters=True,
162
- # )
163
 
164
- @output
165
- @render.data_frame
166
- def table():
 
167
 
168
- grouped_ivb = df_2024[(pd.to_datetime(df_2024['game_date']).dt.date>=input.date_range_id()[0])&
169
- (pd.to_datetime(df_2024['game_date']).dt.date<=input.date_range_id()[1])].groupby(['pitcher_id','pitcher_name','pitcher_team','pitcher_hand','pitch_type']).agg(
170
- pitches = ('start_speed','count'),
171
-
172
- start_speed = ('start_speed','mean'),
173
- ivb = ('ivb','mean'),
174
- hb = ('hb','mean'),
175
- spin_rate = ('spin_rate','mean'),
176
- vaa = ('vaa','mean'),
177
- haa = ('haa','mean'),
178
- horizontal_release = ('x0','mean'),
179
- vertical_release = ('z0','mean'),
180
- extension = ('extension','mean')).round(1).reset_index()
181
- #grouped_ivb = grouped_ivb.set_index(['pitcher_id']).reset_index()
182
- # return grouped_ivb
183
- return render.DataGrid(
184
- grouped_ivb,
185
- width='fit-content',
186
- height=750,
187
- filters=True,
188
- )
189
 
190
- @output
191
- @render.data_frame
192
- def table_daily():
193
 
194
- grouped_ivb = df_2024[(pd.to_datetime(df_2024['game_date']).dt.date>=input.date_range_id()[0])&
195
- (pd.to_datetime(df_2024['game_date']).dt.date<=input.date_range_id()[1])].groupby(['pitcher_id','pitcher_name','pitcher_team','pitcher_hand','pitch_type','game_date']).agg(
196
- pitches = ('start_speed','count'),
197
-
198
- start_speed = ('start_speed','mean'),
199
- ivb = ('ivb','mean'),
200
- hb = ('hb','mean'),
201
- spin_rate = ('spin_rate','mean'),
202
- vaa = ('vaa','mean'),
203
- haa = ('haa','mean'),
204
- horizontal_release = ('x0','mean'),
205
- vertical_release = ('z0','mean'),
206
- extension = ('extension','mean')).round(1).reset_index()
207
- #grouped_ivb = grouped_ivb.set_index(['pitcher_id']).reset_index()
208
- # return grouped_ivb
209
- return render.DataGrid(
210
- grouped_ivb,
211
- width='fit-content',
212
- height=750,
213
- filters=True,
214
- )
215
-
216
- #return grouped_ivb
217
 
218
- @output
219
- @render.data_frame
220
- def table_2023():
221
- grouped_ivb = df_2024[(pd.to_datetime(df_2024['game_date']).dt.date>=input.date_range_id()[0])&
222
- (pd.to_datetime(df_2024['game_date']).dt.date<=input.date_range_id()[1])].groupby(['pitcher_id','pitcher_name','pitcher_hand','pitch_type']).agg(
223
- pitches = ('start_speed','count'),
224
-
225
- start_speed = ('start_speed','mean'),
226
- ivb = ('ivb','mean'),
227
- hb = ('hb','mean'),
228
- spin_rate = ('spin_rate','mean'),
229
- vaa = ('vaa','mean'),
230
- haa = ('haa','mean'),
231
- horizontal_release = ('x0','mean'),
232
- vertical_release = ('z0','mean'),
233
- extension = ('extension','mean')).round(1).reset_index()
234
- grouped_ivb = grouped_ivb.set_index(['pitcher_id','pitch_type'])
235
 
236
-
 
237
 
238
- #####
239
- ivb_merged = grouped_ivb_2023.merge(right=grouped_ivb,
240
- left_index=True,
241
- right_index=True,
242
- how='right',suffixes=['_2023','_spring']).reset_index()
243
-
244
- ivb_merged['pitcher_name'] = ivb_merged['pitcher_name_spring']
245
- ivb_merged['pitcher_hand'] = ivb_merged['pitcher_hand_spring']
246
- #ivb_merged['pitch_type'] = ivb_merged['pitch_type_spring']
247
-
248
-
249
- # ivb_merged = ivb_merged[['pitcher_id', 'pitcher_name', 'pitcher_hand', 'pitch_type',
250
- # 'pitches_spring', 'start_speed_spring', 'ivb_spring',
251
- # 'hb_spring', 'spin_rate_spring', 'horizontal_release_spring',
252
- # 'vertical_release_spring', 'extension_spring']]
253
- ivb_merged['pitcher_team'] = ivb_merged['pitcher_id'].map(spring_teams)
254
- ivb_merged = ivb_merged.set_index(['pitcher_id', 'pitcher_name','pitcher_team', 'pitcher_hand', 'pitch_type',])
255
-
256
- return render.DataGrid(
257
- ivb_merged[['pitches_2023','start_speed_2023', 'ivb_2023', 'hb_2023',
258
- 'spin_rate_2023', 'vaa_2023','haa_2023', 'horizontal_release_2023', 'vertical_release_2023',
259
- 'extension_2023','pitches_spring','start_speed_spring', 'ivb_spring', 'hb_spring',
260
- 'spin_rate_spring','vaa_spring','haa_spring', 'horizontal_release_spring', 'vertical_release_spring',
261
- 'extension_spring',]].reset_index(),
262
- width='fit-content',
263
- height=750,
264
- filters=True,
265
- )
266
-
267
- @output
268
- @render.data_frame
269
- def table_difference():
270
- grouped_ivb = df_2024[(pd.to_datetime(df_2024['game_date']).dt.date>=input.date_range_id()[0])&
271
- (pd.to_datetime(df_2024['game_date']).dt.date<=input.date_range_id()[1])].groupby(['pitcher_id','pitcher_name','pitcher_hand','pitch_type']).agg(
272
- pitches = ('start_speed','count'),
273
-
274
- start_speed = ('start_speed','mean'),
275
- ivb = ('ivb','mean'),
276
- hb = ('hb','mean'),
277
- spin_rate = ('spin_rate','mean'),
278
- vaa = ('vaa','mean'),
279
- haa = ('haa','mean'),
280
- horizontal_release = ('x0','mean'),
281
- vertical_release = ('z0','mean'),
282
- extension = ('extension','mean')).round(1).reset_index()
283
- grouped_ivb = grouped_ivb.set_index(['pitcher_id','pitch_type'])
284
 
285
-
 
 
 
 
 
 
286
 
287
- #####
288
- ivb_merged = grouped_ivb_2023.merge(right=grouped_ivb,
289
- left_index=True,
290
- right_index=True,
291
- how='right',suffixes=['_2023','_spring']).reset_index()
292
-
293
- ivb_merged['pitcher_name'] = ivb_merged['pitcher_name_spring']
294
- ivb_merged['pitcher_hand'] = ivb_merged['pitcher_hand_spring']
295
- #ivb_merged['pitch_type'] = ivb_merged['pitch_type_spring']
296
-
297
-
298
- # ivb_merged = ivb_merged[['pitcher_id', 'pitcher_name', 'pitcher_hand', 'pitch_type',
299
- # 'pitches_spring', 'start_speed_spring', 'ivb_spring',
300
- # 'hb_spring', 'spin_rate_spring', 'horizontal_release_spring',
301
- # 'vertical_release_spring', 'extension_spring']]
302
- ivb_merged['pitcher_team'] = ivb_merged['pitcher_id'].map(spring_teams)
303
- ivb_merged = ivb_merged.set_index(['pitcher_id', 'pitcher_name','pitcher_team', 'pitcher_hand', 'pitch_type',])
304
-
305
- ivb_merged[['start_speed_difference', 'ivb_difference', 'hb_difference','spin_rate_difference','vaa_difference','haa_difference',
306
- 'horizontal_release_difference', 'vertical_release_difference',
307
- 'extension_difference']] = ivb_merged[['start_speed_spring', 'ivb_spring', 'hb_spring',
308
- 'spin_rate_spring', 'vaa_spring','haa_spring','horizontal_release_spring', 'vertical_release_spring',
309
- 'extension_spring']].values - ivb_merged[['start_speed_2023', 'ivb_2023', 'hb_2023',
310
- 'spin_rate_2023', 'vaa_2023','haa_2023','horizontal_release_2023', 'vertical_release_2023',
311
- 'extension_2023']].values
312
-
313
-
314
- return render.DataGrid(
315
- ivb_merged[['start_speed_difference', 'ivb_difference', 'hb_difference',
316
- 'spin_rate_difference',
317
- 'vaa_difference','haa_difference','horizontal_release_difference', 'vertical_release_difference',
318
- 'extension_difference']].reset_index(),
319
- width='fit-content',
320
- height=750,
321
- filters=True,
322
- )
323
-
324
- # @output
325
- # @render.data_frame
326
- # def table_new():
327
- # grouped_ivb = df_2024.groupby(['pitcher_id','pitcher_name','pitcher_hand','pitch_type']).agg(
328
- # pitches = ('start_speed','count'),
329
 
330
- # start_speed = ('start_speed','mean'),
331
- # ivb = ('ivb','mean'),
332
- # hb = ('hb','mean'),
333
- # spin_rate = ('spin_rate','mean'),
334
- # vaa = ('vaa','mean'),
335
- # haa = ('haa','mean'),
336
- # horizontal_release = ('x0','mean'),
337
- # vertical_release = ('z0','mean'),
338
- # extension = ('extension','mean')).round(1).reset_index()
339
- # grouped_ivb = grouped_ivb.set_index(['pitcher_id','pitch_type'])
340
-
341
- # grouped_ivb_2023 = pd.read_csv('2023_pitch_group_data.csv',index_col=[0,3])
342
-
343
- # #####
344
- # ivb_merged = grouped_ivb_2023.merge(right=grouped_ivb,
345
- # left_index=True,
346
- # right_index=True,
347
- # how='right',suffixes=['_2023','_spring']).reset_index()
348
-
349
- # ivb_merged['pitcher_name'] = ivb_merged['pitcher_name_spring']
350
- # ivb_merged['pitcher_hand'] = ivb_merged['pitcher_hand_spring']
351
- # #ivb_merged['pitch_type'] = ivb_merged['pitch_type_spring']
352
-
353
-
354
- # # ivb_merged = ivb_merged[['pitcher_id', 'pitcher_name', 'pitcher_hand', 'pitch_type',
355
- # # 'pitches_spring', 'start_speed_spring', 'ivb_spring',
356
- # # 'hb_spring', 'spin_rate_spring', 'horizontal_release_spring',
357
- # # 'vertical_release_spring', 'extension_spring']]
358
- # ivb_merged['pitcher_team'] = ivb_merged['pitcher_id'].map(spring_teams)
359
- # ivb_merged = ivb_merged.set_index(['pitcher_id', 'pitcher_name','pitcher_team', 'pitcher_hand', 'pitch_type',])
360
-
361
-
362
-
363
-
364
-
365
-
366
- # ivb_merged[['start_speed_difference', 'ivb_difference', 'hb_difference','spin_rate_difference','vaa_difference','haa_difference',
367
- # 'horizontal_release_difference', 'vertical_release_difference',
368
- # 'extension_difference']] = ivb_merged[['start_speed_spring', 'ivb_spring', 'hb_spring',
369
- # 'spin_rate_spring', 'vaa_spring','haa_spring','horizontal_release_spring', 'vertical_release_spring',
370
- # 'extension_spring']].values - ivb_merged[['start_speed_2023', 'ivb_2023', 'hb_2023',
371
- # 'spin_rate_2023', 'vaa_2023','haa_2023','horizontal_release_2023', 'vertical_release_2023',
372
- # 'extension_2023']].values
373
- # ivb_merged_new = ivb_merged.reset_index()
374
-
375
- # ivb_merged_new = ivb_merged_new[
376
- # pd.isnull(ivb_merged_new['pitches_2023']) &
377
- # pd.notnull(ivb_merged_new['pitches_spring']) &
378
- # ivb_merged_new['pitcher_id'].isin(ivb_merged_new[pd.notnull(ivb_merged_new['pitches_2023'])]['pitcher_id'])
379
- # ][
380
-
381
- # ['pitcher_id', 'pitcher_name', 'pitcher_hand', 'pitch_type',
382
- # 'pitches_spring', 'start_speed_spring', 'ivb_spring',
383
- # 'hb_spring', 'spin_rate_spring', 'vaa_spring','haa_spring', 'horizontal_release_spring',
384
- # 'vertical_release_spring', 'extension_spring']
385
- # ]#.reset_index()
386
-
387
- # # ivb_merged_new = ivb_merged.copy().reset_index()
388
- # ivb_merged_new['pitcher_team'] = ivb_merged_new['pitcher_id'].map(spring_teams)
389
- # ivb_merged_new = ivb_merged_new.set_index(['pitcher_id', 'pitcher_name','pitcher_team', 'pitcher_hand', 'pitch_type',])
390
- # #ivb_merged_new.to_clipboard(header=False)
391
- # df_2024_date_min = df_2024.groupby(['pitcher_id','pitcher_name','pitcher_hand','pitch_type','game_date'])[['game_date']].min()
392
- # ivb_merged_new = ivb_merged_new.merge(right=df_2024_date_min,
393
- # left_index=True,
394
- # right_index=True)
395
- # ivb_merged_new = ivb_merged_new.drop(columns=['game_date'])
396
-
397
- # return render.DataGrid(
398
- # ivb_merged_new.reset_index(),
399
- # width='fit-content',
400
- # height=750,
401
- # filters=True,
402
- # )
403
 
404
- @output
405
- @render.data_frame
406
- def table_stuff():
407
-
 
408
 
409
- df_2024_update = df_clean(df_2024[(pd.to_datetime(df_2024['game_date']).dt.date>=input.date_range_id()[0])&
410
- (pd.to_datetime(df_2024['game_date']).dt.date<=input.date_range_id()[1])])
411
 
412
- features = ['start_speed','spin_rate','extension','ivb','hb','x0','z0','fb_max_velo_diff','fb_max_ivb_diff','fb_max_hb_diff']
 
 
 
413
 
414
- targets = ['delta_run_exp_mean']
415
 
416
 
417
- from scipy import stats
418
- df_2024_update['y_pred'] = model.predict(df_2024_update[features])
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
419
 
420
 
421
- y_pred_mean = -0.0023964706
422
- y_pred_std =0.0057581966
423
- # y_pred_mean = -0.0136602735
424
- # y_pred_std = 0.006434487
425
 
426
- ## tjStuff+
427
- df_2024_stuff = df_2024_update.groupby(['pitcher_id','pitcher_name','pitcher_team']).agg(
428
- pitches = ('y_pred','count'),
429
- run_exp = ('y_pred','mean'),)
430
- # run_exp_loc = ('y_pred_loc','mean'))
431
 
432
- df_2024_stuff['run_exp_mean'] = y_pred_mean
433
- df_2024_stuff['run_exp_std'] = y_pred_std
434
 
435
- df_2024_stuff_50 = df_2024_stuff[df_2024_stuff.pitches >= 1]
 
 
 
 
 
 
 
 
 
 
 
 
436
 
437
- df_2024_stuff_50['tj_stuff_plus'] = 100 + 10*((-df_2024_stuff_50.run_exp + df_2024_stuff_50.run_exp_mean) / df_2024_stuff_50.run_exp_std)
 
 
 
 
 
 
 
 
 
438
 
 
439
 
440
- df_2024_stuff_pitch = df_2024_update.groupby(['pitcher_id','pitcher_name','pitcher_team','pitch_type']).agg(
441
- pitches = ('y_pred','count'),
442
- run_exp = ('y_pred','mean'),)
443
- # run_exp_loc = ('y_pred_loc','mean'))
444
 
445
- df_2024_stuff_pitch['run_exp_mean'] = y_pred_mean
446
- df_2024_stuff_pitch['run_exp_std'] = y_pred_std
447
 
448
- df_2024_stuff_pitch_50 = df_2024_stuff_pitch[df_2024_stuff_pitch.pitches >= 1]
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
449
 
450
- df_2024_stuff_pitch_50['tj_stuff_plus'] = 100 + 10*((-df_2024_stuff_pitch_50.run_exp + df_2024_stuff_pitch_50.run_exp_mean) / df_2024_stuff_pitch_50.run_exp_std)
 
 
451
 
452
- df_2024_stuff_pitch_50_pivot = df_2024_stuff_pitch_50.reset_index().pivot(index=['pitcher_id','pitcher_name','pitcher_team'],
453
- columns=['pitch_type'],
454
- values=['tj_stuff_plus'])
455
 
456
- df_2024_stuff_pitch_50_pivot['all'] = df_2024_stuff_pitch_50_pivot.index.map(df_2024_stuff_50['tj_stuff_plus'].to_dict())
457
- ## Difference
458
- print('Sheet6')
459
- df_2024_stuff_pitch_50_pivot = df_2024_stuff_pitch_50_pivot.sort_index(level=[1])
460
- df_2024_stuff_pitch_50_pivot.columns = df_2024_stuff_pitch_50_pivot.columns.droplevel()
461
- column_list = list(df_2024_stuff_pitch_50_pivot.columns[:-1])
462
- column_list.append('All')
463
- df_2024_stuff_pitch_50_pivot.columns = column_list
464
- df_2024_stuff_pitch_50_pivot = df_2024_stuff_pitch_50_pivot.applymap(lambda x: int(x) if not pd.isna(x) else x)
465
- df_2024_stuff_pitch_50_pivot = df_2024_stuff_pitch_50_pivot.reset_index()
466
 
467
- return render.DataGrid(
468
- df_2024_stuff_pitch_50_pivot,
469
- width='fit-content',
470
- height=750,
471
- filters=True)
472
-
473
- @output
474
- @render.data_frame
475
- def table_stuff_day():
476
 
477
 
478
- df_2024_update = df_clean(df_2024[(pd.to_datetime(df_2024['game_date']).dt.date>=input.date_range_id()[0])&
479
- (pd.to_datetime(df_2024['game_date']).dt.date<=input.date_range_id()[1])])
480
- print('made it here')
481
 
482
- features = ['start_speed','spin_rate','extension','ivb','hb','x0','z0','fb_max_velo_diff','fb_max_ivb_diff','fb_max_hb_diff']
483
 
484
- targets = ['delta_run_exp_mean']
 
485
 
 
 
 
 
486
 
487
- from scipy import stats
488
- df_2024_update['y_pred'] = model.predict(df_2024_update[features])
489
 
490
 
491
- y_pred_mean = -0.0023964706
492
- y_pred_std =0.0057581966
493
- # y_pred_mean = -0.0136602735
494
- # y_pred_std = 0.006434487
495
 
496
- ## tjStuff+
497
- df_2024_stuff_daily = df_2024_update.groupby(['pitcher_id','pitcher_name','pitcher_team','game_date']).agg(
498
- pitches = ('y_pred','count'),
499
- run_exp = ('y_pred','mean'),)
500
- # run_exp_loc = ('y_pred_loc','mean'))
501
 
502
- df_2024_stuff_daily['run_exp_mean'] = y_pred_mean
503
- df_2024_stuff_daily['run_exp_std'] = y_pred_std
504
 
505
- df_2024_stuff_daily_50 = df_2024_stuff_daily[df_2024_stuff_daily.pitches >= 1]
506
 
507
- df_2024_stuff_daily_50['tj_stuff_plus'] = 100 + 10*((-df_2024_stuff_daily_50.run_exp + df_2024_stuff_daily_50.run_exp_mean) / df_2024_stuff_daily_50.run_exp_std)
 
508
 
509
 
510
- df_2024_stuff_daily_pitch = df_2024_update.groupby(['pitcher_id','pitcher_name','pitcher_team','pitch_type','game_date']).agg(
511
- pitches = ('y_pred','count'),
512
- run_exp = ('y_pred','mean'),)
513
- # run_exp_loc = ('y_pred_loc','mean'))
 
 
 
 
 
514
 
515
- df_2024_stuff_daily_pitch['run_exp_mean'] = y_pred_mean
516
- df_2024_stuff_daily_pitch['run_exp_std'] = y_pred_std
517
 
518
- df_2024_stuff_daily_pitch_50 = df_2024_stuff_daily_pitch[df_2024_stuff_daily_pitch.pitches >= 1]
 
 
 
 
519
 
520
- df_2024_stuff_daily_pitch_50['tj_stuff_plus'] = 100 + 10*((-df_2024_stuff_daily_pitch_50.run_exp + df_2024_stuff_daily_pitch_50.run_exp_mean) / df_2024_stuff_daily_pitch_50.run_exp_std)
521
- df_2024_stuff_daily_pitch_50 = df_2024_stuff_daily_pitch_50.reset_index()
522
- df_2024_stuff_daily_pitch_50_pivot = df_2024_stuff_daily_pitch_50.pivot(index=['pitcher_id','pitcher_name','pitcher_team','game_date'],
523
- columns=['pitch_type'],
524
- values=['tj_stuff_plus'])
525
- print('made it here')
526
- df_2024_stuff_daily_pitch_50_pivot['all'] = df_2024_stuff_daily_pitch_50_pivot.index.map(df_2024_stuff_daily_50['tj_stuff_plus'].to_dict())
527
- df_2024_stuff_daily_pitch_50_pivot = df_2024_stuff_daily_pitch_50_pivot.sort_index(level=[1,3])
528
- print(df_2024_stuff_daily_pitch_50_pivot)
529
 
530
- df_2024_stuff_daily_pitch_50_pivot.columns = df_2024_stuff_daily_pitch_50_pivot.columns.droplevel()
531
- column_list = list(df_2024_stuff_daily_pitch_50_pivot.columns[:-1])
532
- column_list.append('All')
533
- df_2024_stuff_daily_pitch_50_pivot.columns = column_list
534
- df_2024_stuff_daily_pitch_50_pivot = df_2024_stuff_daily_pitch_50_pivot.applymap(lambda x: int(x) if not pd.isna(x) else x)
535
- df_2024_stuff_daily_pitch_50_pivot = df_2024_stuff_daily_pitch_50_pivot.reset_index()
536
 
537
- return render.DataGrid(
538
- df_2024_stuff_daily_pitch_50_pivot,
539
- width='fit-content',
540
- height=750,
541
- filters=True)
542
 
543
 
544
 
 
 
 
545
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
546
 
547
 
 
 
 
 
548
 
549
 
 
550
 
551
  app = App(app_ui, server)
 
 
 
1
  import pandas as pd
 
 
 
2
  import numpy as np
3
+ import matplotlib.pyplot as plt
4
+ import seaborn as sns
5
+ import pitch_summary_functions as psf
6
+ import requests
7
+ import matplotlib
8
+ from api_scraper import MLB_Scrape
9
+
10
+
11
+
12
+ colour_palette = ['#FFB000','#648FFF','#785EF0',
13
+ '#DC267F','#FE6100','#3D1EB2','#894D80','#16AA02','#B5592B','#A3C1ED']
14
+
15
+ df_2024 = pd.read_csv('C:/Users/thoma/Google Drive/Python/Baseball/season_stats/2024/2024_regular_data.csv',index_col=[0])
16
+ df_2024 = df_2024.drop_duplicates(subset=['play_id'],keep='last')
17
+
18
+ # ### Import Datasets
19
+ # import datasets
20
+ # from datasets import load_dataset
21
+ # dataset = load_dataset('nesticot/mlb_data', data_files=['mlb_pitch_data_2020.csv' ])
22
+ # dataset_train = dataset['train']
23
+ # df_2024 = dataset_train.to_pandas().set_index(list(dataset_train.features.keys())[0]).reset_index(drop=True)
24
+
25
+ ### PITCH COLOURS ###
26
+ pitch_colours = {
27
+ 'Four-Seam Fastball':'#FF007D',#BC136F
28
+ 'Sinker':'#98165D',#DC267F
29
+ 'Cutter':'#BE5FA0',
30
+
31
+ 'Changeup':'#F79E70',#F75233
32
+ 'Splitter':'#FE6100',#F75233
33
+ 'Screwball':'#F08223',
34
+ 'Forkball':'#FFB000',
35
+
36
+ 'Slider':'#67E18D',#1BB999#785EF0
37
+ 'Sweeper':'#1BB999',#37CD85#904039
38
+ 'Slurve':'#376748',#785EF0#549C07#BEABD8
39
 
40
+ 'Knuckle Curve':'#311D8B',
41
+ 'Curveball':'#3025CE',
42
+ 'Slow Curve':'#274BFC',
43
+ 'Eephus':'#648FFF',
 
44
 
45
+ 'Knuckleball':'#867A08',
46
+
47
+ 'Pitch Out':'#472C30',
48
+ 'Other':'#9C8975',
49
+ }
50
 
 
 
 
51
  spring_teams = df_2024.groupby(['pitcher_id']).tail(1)[['pitcher_id','pitcher_team']].set_index(['pitcher_id'])['pitcher_team'].to_dict()
52
 
53
+ season_start = '2024-03-20'
54
+ season_end = '2024-09-29'
55
+ season_fg=2024
56
+ chad_fg = requests.get(f'https://www.fangraphs.com/api/leaders/major-league/data?age=&pos=all&stats=pit&lg=all&qual=0&season={season_fg}&season={season_fg}&month=1000&season1={season_fg}&ind=0&pageitems=2000000000&pagenum=1&ind=0&rost=0&players=&type=36&postseason=&sortdir=default&sortstat=sp_pitching').json()
57
 
 
 
 
 
58
 
59
+ chadwick_df_small = pd.DataFrame(data={
60
+ 'key_mlbam':[x['xMLBAMID'] for x in chad_fg['data']],
61
+ 'key_fangraphs':[x['playerid'] for x in chad_fg['data']],
62
+ 'Name':[x['PlayerName'] for x in chad_fg['data']],
63
+ })
64
 
65
+ pitcher_dicts = chadwick_df_small.set_index('key_mlbam')['Name'].sort_values().to_dict()
66
+ mlb_fg_dicts = chadwick_df_small.set_index('key_mlbam')['key_fangraphs'].sort_values().to_dict()
67
 
68
 
69
+ statcast_pitch_summary = pd.read_csv('statcast_pitch_summary.csv')
70
+ cmap_sum = matplotlib.colors.LinearSegmentedColormap.from_list("", ['#648FFF','#FFFFFF','#FFB000',])
 
 
 
71
 
 
 
 
 
 
72
 
73
+ df_2024_codes = psf.df_update_code(df_2024)
 
 
 
74
 
75
+ df_2024_update = psf.df_clean(df_2024_codes)
76
+ import joblib
77
+ model = joblib.load('joblib_model/tjstuff_model_20240318.joblib')
78
+ y_pred_mean = 0.0011434511
79
+ y_pred_std = 0.006554768
80
+
81
+ features = ['start_speed','spin_rate','extension','ivb','hb','x0','z0','fb_max_velo_diff','fb_max_ivb_diff','fb_max_hb_diff']
82
+
83
+ targets = ['delta_run_exp_mean']
84
+
85
+
86
+ df_2024_update['y_pred'] = model.predict(df_2024_update[features])
87
+
88
+ df_2024_update['tj_stuff_plus'] = 100 + 10*((-df_2024_update.y_pred +y_pred_mean) / y_pred_std)
89
+
90
+ team_logos = pd.read_csv('team_logos.csv')
91
+
92
+ mlb_stats = MLB_Scrape()
93
+ teams_df = mlb_stats.get_teams()
94
+ team_logo_dict = teams_df.set_index(['team_id'])['parent_org_id'].to_dict()
95
+
96
+ font_properties = {'family': 'calibi', 'size': 12}
97
+ font_properties_titles = {'family': 'calibi', 'size': 20}
98
+ font_properties_axes = {'family': 'calibi', 'size': 16}
99
+ df_plot = []
100
+ ax2_loc = []
101
+ gs = []
102
+ fig = []
103
+
104
+ function_dict={
105
+ 'velocity_kde':'Velocity Distributions',
106
+ 'break_plot':'Pitch Movement',
107
+ 'rolling_tj_stuff':'Rolling tjStuff+',
108
+ 'location_lhb':'Locations vs LHB',
109
+ 'location_rhb':'Locations vs RHB',
110
+ }
111
+
112
+ split_dict = {'all':'All',
113
+ 'left':'LHB',
114
+ 'right':'RHB'}
115
+
116
+ split_dict_hand = {'all':['L','R'],
117
+ 'left':['L'],
118
+ 'right':['R']}
119
+
120
+ ball_dict = {'0':'0',
121
+ '1':'1',
122
+ '2':'2',
123
+ '3':'3'}
124
+
125
+ strike_dict = {'0':'0',
126
+ '1':'1',
127
+ '2':'2'}
128
+
129
+ # count_dict = {'0_0':'Through 0-0',
130
+ # '0_1':'Through 0-1',
131
+ # '0_2':'Through 0-2',
132
+ # '1_0':'Through 1-0',
133
+ # '1_1':'Through 1-1',
134
+ # '1_2':'Through 1-2',
135
+ # '2_1':'Through 2-1',
136
+ # '2_0':'Through 2-0',
137
+ # '3_0':'Through 3-0',
138
+ # '3_1':'Through 3-1',
139
+ # '2_2':'Through 2-2',
140
+ # '3_2':'Through 3-2'}
141
+
142
+ # count_dict_fg = {'0_0':'',
143
+ # '0_1':'61',
144
+ # '0_2':'62',
145
+ # '1_0':'63',
146
+ # '1_1':'64',
147
+ # '1_2':'65',
148
+ # '2_1':'66',
149
+ # '2_0':'67',
150
+ # '3_0':'68',
151
+ # '3_1':'69',
152
+ # '2_2':'70',
153
+ # '3_2':'71'}
154
 
155
+ from urllib.request import Request, urlopen
156
+ from shiny import App, reactive, ui, render
157
+ from shiny.ui import h2, tags
158
+ # importing OpenCV(cv2) module
159
+ app_ui = ui.page_fluid(
160
+ ui.layout_sidebar(
161
+
162
+ ui.panel_sidebar(
163
+ ui.row(
164
+ ui.column(6,
165
+ ui.input_select('player_id','Select Player',pitcher_dicts,selectize=True,multiple=False)),
166
+ ui.column(6, ui.output_ui('test','Select Game'))),
167
+
168
+ ui.row(
169
+ ui.column(4,
170
+ ui.input_select('plot_id_1','Plot Left',function_dict,multiple=False,selected='velocity_kde')),
171
+ ui.column(4,
172
+ ui.input_select('plot_id_2','Plot Middle',function_dict,multiple=False,selected='rolling_tj_stuff')),
173
+ ui.column(4,
174
+ ui.input_select('plot_id_3','Plot Right',function_dict,multiple=False,selected='break_plot'))),
175
+
176
+ # ui.input_select('count_id','Count',count_dict,multiple=True,selectize=True,selected='0_0'),
177
+
178
+ ui.row(
179
+ ui.column(6,
180
+ ui.input_select('ball_id','Balls',ball_dict,multiple=False,selected='0'),
181
+ ui.input_radio_buttons(
182
+ "count_id_balls",
183
+ "Count Filter Balls",
184
+ {
185
+ "exact": "Exact Balls",
186
+ "greater": ">= Balls",
187
+ "lesser": "<= Balls",
188
+ },selected='greater')),
189
+ ui.column(6,
190
+ ui.input_select('strike_id','Strikes',strike_dict,multiple=False,selected='0'),
191
+ ui.input_radio_buttons(
192
+ "count_id_strikes",
193
+ "Count Filter Strikes",
194
+ {
195
+ "exact": "Exact Strikes",
196
+ "greater": ">= Strikes",
197
+ "lesser": "<= Strikes",
198
+ },selected='greater'))),
199
+ ui.row(
200
+ ui.column(6,
201
+ ui.input_select('split_id','Select Split',split_dict,multiple=False)),
202
+ ui.column(6,
203
+ ui.input_numeric('rolling_window','Rolling Window (for tjStuff+ Plot)',min=1,value=10))),
204
+
205
+
206
+
207
+ ui.input_action_button("go", "Generate",class_="btn-primary"),
208
+
209
+
210
+ width=4)
211
+ ,
212
+ ui.panel_main(
213
+ ui.navset_tab(
214
+ # ui.nav("Raw Data",
215
+ # ui.output_data_frame("raw_table")),
216
+ ui.nav("Season Summary",
217
+ ui.output_plot('plot',
218
+ width='2000px',
219
+ height='2000px')),
220
+ ui.nav("Game Summary",
221
+ ui.output_plot('plot_game',
222
+ width='2000px',
223
+ height='2000px'))
224
+ ,id="my_tabs"))))
225
 
 
226
 
 
 
 
 
 
 
227
 
 
 
 
 
 
 
228
 
229
 
 
 
230
 
 
 
231
 
 
 
 
 
232
 
 
 
 
 
233
 
 
 
234
 
235
+ #print(app_ui)
236
+ def server(input, output, session):
237
 
238
+ @render.ui
239
+ def test():
240
 
241
+ # @reactive.Effect
242
+ if input.my_tabs() == 'Season Summary':
243
 
244
+ return ui.input_date_range("date_range_id", "Date range input",start = df_2024.game_date.min(),
 
 
 
 
245
  end = df_2024.game_date.max(),width=2,min=df_2024.game_date.min(),
246
+ max=df_2024.game_date.max()),
247
+ # @reactive.Effect
248
+ if input.my_tabs() == 'Game Summary':
249
+ pitcher_id_select = int(input.player_id())
250
+ df_plot = df_2024_update[(df_2024_update['pitcher_id']==pitcher_id_select)]
251
+
252
+
253
+ # ax0.text(x=0.5,y=0.30,s=f'2024 Spring Training',fontname='Calibri',ha='center',fontsize=30,va='top')
254
+ df_plot['game_opp'] = df_plot['game_date'].astype(str) + ' vs ' + df_plot['batter_team'].astype(str)
255
+ #print(df_plot['game_opp'])
256
+
257
+ date_dict = pd.concat([df_plot.drop_duplicates(subset=['pitcher_id','game_id','game_opp'])[['game_id','game_opp']]]).set_index('game_id').to_dict()
258
+ return ui.input_select("game_id", "Select Game",date_dict,selectize=True)
259
+
260
+ @output
261
+ @render.plot
262
+ @reactive.event(input.go, ignore_none=False)
263
+ def plot():
264
+ #fig, ax = plt.subplots(3, 2, figsize=(9, 9))
265
+
266
+ font_properties = {'family': 'calibi', 'size': 12}
267
+ font_properties_titles = {'family': 'calibi', 'size': 20}
268
+ font_properties_axes = {'family': 'calibi', 'size': 16}
269
+
270
+ if len((input.player_id()))<1:
271
+ fig, ax = plt.subplots(1, 1, figsize=(9, 9))
272
+ ax.text(x=0.5,y=0.5,s='Please Select\nA Player',fontsize=150,ha='center')
273
+ ax.grid('off')
274
+ return
275
+
276
+ pitcher_id_select = int(input.player_id())
277
+
278
+
279
+ df_plot = df_2024_update[(df_2024_update['pitcher_id']==pitcher_id_select)]
280
+
281
+ df_plot = df_plot[df_plot['batter_hand'].isin(split_dict_hand[input.split_id()])]
282
+
283
+ if input.count_id_balls()=='greater' and input.count_id_strikes()=='greater' and int(input.ball_id())==0 and int(input.strike_id())==0:
284
+ ball_title = ''
285
+ strike_title = ''
286
+ else:
287
+ if input.count_id_balls()=='exact':
288
+ df_plot = df_plot[df_plot['balls']==int(input.ball_id())]
289
+ ball_title = str(f'{(input.ball_id())} Ball Count; ')
290
+ elif input.count_id_balls()=='greater':
291
+ df_plot = df_plot[df_plot['balls']>=int(input.ball_id())]
292
+ ball_title = str(f'At Least {(input.ball_id())} Ball Count; ')
293
+ elif input.count_id_balls()=='lesser':
294
+ df_plot = df_plot[df_plot['balls']<=int(input.ball_id())]
295
+ ball_title = str(f'At Most {(input.ball_id())} Ball Count; ')
296
+
297
+ if input.count_id_strikes()=='exact':
298
+ df_plot = df_plot[df_plot['strikes']==int(input.strike_id())]
299
+ strike_title = str(f'{(input.strike_id())} Strike Count; ')
300
+ elif input.count_id_strikes()=='greater':
301
+ df_plot = df_plot[df_plot['strikes']>=int(input.strike_id())]
302
+ strike_title = str(f'At Least {(input.strike_id())} Strike Count; ')
303
+ elif input.count_id_strikes()=='lesser':
304
+ df_plot = df_plot[df_plot['strikes']<=int(input.strike_id())]
305
+ strike_title = str(f'At Most {(input.strike_id())} Strike Count; ')
306
+
307
+
308
+
309
+ if input.split_id() == 'all':
310
+ split_title = ''
311
+
312
+ elif input.split_id() == 'left':
313
+ split_title = 'vs. LHH'
314
+
315
+ elif input.split_id() == 'right':
316
+ split_title = 'vs. RHH'
317
+
318
+
319
+ if len(df_plot)<1:
320
+ fig, ax = plt.subplots(1, 1, figsize=(9, 9))
321
+ ax.text(x=0.5,y=0.5,s='Please Select\nOther Parameters',fontsize=150,ha='center')
322
+ ax.grid('off')
323
+ return
324
+
325
+ df_plot['pitch_type_count'] = df_plot.groupby(['pitcher_id'])['pitch_type'].cumcount()+1
326
+ df_plot['pitch_type_count_each'] = df_plot.groupby(['pitch_type'])['pitch_type'].cumcount()+1
327
+ #df_plot = df_plot.merge(df_2024_update[['tj_stuff_plus','play_id']],left_on=['play_id'],right_on=['play_id'],how='left')
328
+ df_plot = df_plot.sort_values(by=['pitch_description'])
329
+
330
+ grouped_ivb = psf.group_ivb_update(df=df_plot,agg_list=['pitcher_id','pitcher_name','pitcher_hand','pitch_type','pitch_description'])
331
+ grouped_ivb_all = psf.group_ivb_update(df=df_plot,agg_list=['pitcher_id','pitcher_name','pitcher_hand'])
332
+
333
+
334
+
335
+ from matplotlib.gridspec import GridSpec
336
+ plt.rcParams['font.family'] = 'Calibri'
337
+ df_plot['prop'] = df_plot.groupby("pitch_type")["is_pitch"].transform("sum")
338
+ label_labels = df_plot.sort_values(by=['prop','pitch_type'],ascending=[False,True]).pitch_description.unique()
339
+
340
+ #plt.rcParams["figure.figsize"] = [10,10]
341
+ fig = plt.figure(figsize=(20, 20))
342
+ plt.rcParams.update({'figure.autolayout': True})
343
+ fig.set_facecolor('white')
344
+ sns.set_theme(style="whitegrid", palette=colour_palette)
345
+ print('this is the one plot')
346
+ # gs = GridSpec(7, 2, width_ratios=[1,1], height_ratios=[1.5,1,1,1,1,1,2.5])
347
+ gs = GridSpec(5, 5, height_ratios=[150,75,225,325,50],width_ratios=[1,100,100,100,1])
348
+ #### NO FG
349
+ ####gs = GridSpec(5, 5, height_ratios=[225,0,225,325,50],width_ratios=[1,100,100,100,1])
350
+ #gs = GridSpec(4, 1, width_ratios=[1], height_ratios=[1,0.75,7-len(label_labels)/4,1+len(label_labels)/4])
351
+
352
+ gs.update(hspace=0.2, wspace=0.3)
353
+
354
+ # Add subplots to the grid
355
+ ax0 = fig.add_subplot(gs[0, :])
356
+ ax1_table = fig.add_subplot(gs[1, :])
357
+ ax2_left = fig.add_subplot(gs[2, 1])
358
+ ax2_middle = fig.add_subplot(gs[2, 2])
359
+ ax2_right = fig.add_subplot(gs[2, 3])
360
+ ax3 = fig.add_subplot(gs[-2, :])
361
+ #axfooter = fig.add_subplot(gs[-1, :])
362
+
363
+ ax1_table.axis('off')
364
+
365
+ sns.set_theme(style="whitegrid", palette=colour_palette)
366
+ fig.set_facecolor('white')
367
+
368
+ font_properties = {'family': 'calibi', 'size': 12}
369
+ font_properties_titles = {'family': 'calibi', 'size': 20}
370
+ font_properties_axes = {'family': 'calibi', 'size': 16}
371
+
372
+ ## FANGRAPHS TABLE ###
373
+ data_pull = psf.fangraphs_scrape(pitcher_id=pitcher_id_select,
374
+ split=input.split_id(),
375
+ start_date=input.date_range_id()[0],
376
+ end_date=input.date_range_id()[1])
377
+
378
+
379
+ psf.fangraphs_table(data=data_pull,
380
+ stats=['IP','WHIP','ERA','FIP','TBF','K%','BB%','K-BB%'],
381
+ ax=ax1_table)
382
+
383
+ for x,y,z in zip([input.plot_id_1(),input.plot_id_2(),input.plot_id_3()],[ax2_left,ax2_middle,ax2_right],[1,2,3]):
384
+ if x == 'velocity_kde':
385
+ psf.velocity_kdes(df=df_plot,ax=y,gs=gs,gs_list=z,fig=fig)
386
+ if x == 'rolling_tj_stuff':
387
+ psf.tj_stuff_roling(df = df_plot,window = int(input.rolling_window()),ax=y)
388
+ if x == 'break_plot':
389
+ psf.break_plot(df=df_plot,ax=y)
390
+ if x == 'location_lhb':
391
+ psf.location_plot(df=df_plot,ax=y,hand='L')
392
+ if x == 'location_rhb':
393
+ psf.location_plot(df=df_plot,ax=y,hand='R')
394
+
395
+ pitches_list = df_plot['pitch_description'].unique()
396
+ colour_pitches = [pitch_colours[x] for x in pitches_list]
397
+
398
+ # handles, labels = ax2_right.get_legend_handles_labels()
399
+
400
+ # # Manually create handles and labels for each pitch-color pair
401
+ handles = [plt.scatter([], [], color=color, marker='o', s=100) for color in colour_pitches]
402
+ labels = pitches_list
403
+
404
+
405
 
406
 
407
+ ### FANGRAPHS TABLE ###
408
+ psf.table_summary(df=df_plot.copy(),
409
+ pitcher_id=pitcher_id_select,
410
+ ax=ax3,
411
+ df_group=grouped_ivb.copy(),
412
+ df_group_all=grouped_ivb_all.copy(),
413
+ statcast_pitch_summary=statcast_pitch_summary.copy())
414
 
415
 
416
+ # ############ FOOTER ################
417
+ # #fig.text(x=0.5,y=0.05,s='Note: Colour Coding Compares to League Average By Pitch',ha='center',fontname='Calibri',fontsize=10)
418
+ # axfooter.text(x=0.05,y=1,s='By: Thomas Nestico\n @TJStats',fontname='Calibri',ha='left',fontsize=24,va='top')
419
+ # axfooter.text(x=1-0.05,y=1,s='Data: MLB, Fangraphs',ha='right',fontname='Calibri',fontsize=24,va='top')
420
 
421
 
422
+ # axfooter.text(x=0.5,y=0.8,s='Colour Coding Compares to League Average By Pitch\ntjStuff+ calculates the Expected Run Value (xRV) of a pitch regardless of type\ntjStuff+ is normally distributed, where 100 is the mean and Standard Deviation is 10',
423
+ # ha='center',va='center',fontname='Calibri',fontsize=16)
424
+ # axfooter.axis('off')
425
+ # #fig.tight_layout()
426
 
427
+
 
 
428
 
429
+ # Get value counts of the column and sort in descending order
430
+ sorted_value_counts = df_plot['pitch_description'].value_counts().sort_values(ascending=False)
 
 
 
 
431
 
432
+ # Get the list of items ordered from most to least frequent
433
+ items_in_order = sorted_value_counts.index.tolist()
434
+ # Create a dictionary to map names to colors
435
+ name_to_color = dict(zip(labels, handles))
436
 
437
+ # Order the colors based on the correct order of names
438
+ ordered_colors = [name_to_color[name] for name in items_in_order]
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
439
 
 
 
 
440
 
441
+ ax3.legend(ordered_colors, items_in_order, bbox_to_anchor=(0.1, 0.81, 0.8, 0.2), ncol=5,
442
+ fancybox=True,loc='lower center',fontsize=20,framealpha=1.0, markerscale=2,prop={'family': 'calibi', 'size': 20})
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
443
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
444
 
445
+ ################## Title ##########
446
+ title_spot = f'{df_plot.pitcher_name.values[0]}'
447
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
448
 
449
+ ax0.text(x=0.5,y=0.8,s=title_spot,fontname='Calibri',ha='center',fontsize=56,va='top')
450
+ ax0.text(x=0.5,y=0.5,s='Season Pitching Summary',fontname='Calibri',ha='center',fontsize=40,va='top',fontstyle='italic')
451
+
452
+
453
+ #ax0.text(x=0.5,y=0.25,s=f'2024 Spring Training',fontname='Calibri',ha='center',fontsize=30,va='top')
454
+ # ax0.text(x=0.5,y=0.25,s=f'{season_fg} MLB Season',fontname='Calibri',ha='center',fontsize=30,va='top')
455
+ # ax0.axis('off')
456
 
457
+
458
+ ax0.text(x=0.5,y=0.25,s=f'{input.date_range_id()[0]} to {input.date_range_id()[1]}',fontname='Calibri',ha='center',fontsize=30,va='top',fontstyle='italic')
459
+
460
+ ax0.text(x=0.5,y=0.05,s=f'{ball_title}{strike_title}{split_title}',fontname='Calibri',ha='center',fontsize=20,va='top')
461
+ ax0.axis('off')
462
+
463
+ from matplotlib.offsetbox import (OffsetImage, AnnotationBbox)
464
+ import urllib
465
+ import urllib.request
466
+ import urllib.error
467
+ from urllib.error import HTTPError
468
+
469
+ try:
470
+ url = f'https://img.mlbstatic.com/mlb-photos/image/upload/d_people:generic:headshot:67:current.png/w_213,q_auto:best/v1/people/{pitcher_id_select}/headshot/67/current.png'
471
+ test_mage = plt.imread(url)
472
+ except urllib.error.HTTPError as err:
473
+ url = f'https://img.mlbstatic.com/mlb-photos/image/upload/d_people:generic:headshot:67:current.png/w_213,q_auto:best/v1/people/1/headshot/67/current.png'
474
+ imagebox = OffsetImage(test_mage, zoom = 0.5)
475
+ ab = AnnotationBbox(imagebox, (0.125, 0.4), frameon = False)
476
+ ax0.add_artist(ab)
477
+
478
+ player_bio = requests.get(url=f"https://statsapi.mlb.com/api/v1/people?personIds={pitcher_id_select}&hydrate=currentTeam").json()
479
+
480
+
481
+
482
+
483
+ if 'currentTeam' in player_bio['people'][0]:
484
+ try:
485
+ url = team_logos[team_logos['id'] == team_logo_dict[player_bio['people'][0]['currentTeam']['id']]]['imageLink'].values[0]
486
+
487
+ im = plt.imread(url)
488
+ # response = requests.get(url)
489
+ # im = Image.open(BytesIO(response.content))
490
+ # im = plt.imread(team_logos[team_logos['id'] == player_bio['people'][0]['currentTeam']['parentOrgId']]['imageLink'].values[0])
491
+ # ax = fig.add_axes([0,0,1,0.85], anchor='C', zorder=1)
492
+ imagebox = OffsetImage(im, zoom = 0.4)
493
+ ab = AnnotationBbox(imagebox, (0.875, 0.40), frameon = False)
494
+ ax0.add_artist(ab)
495
+ except IndexError:
496
+ print()
 
 
497
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
498
 
499
+ ############ FOOTER ################
500
+ #fig.text(x=0.5,y=0.05,s='Note: Colour Coding Compares to League Average By Pitch',ha='center',fontname='Calibri',fontsize=10)
501
+ axfooter = fig.add_subplot(gs[-1, :])
502
+ axfooter.text(x=0.05,y=1,s='By: Thomas Nestico\n @TJStats',fontname='Calibri',ha='left',fontsize=24,va='top')
503
+ axfooter.text(x=1-0.05,y=1,s='Data: MLB, Fangraphs',ha='right',fontname='Calibri',fontsize=24,va='top')
504
 
 
 
505
 
506
+ axfooter.text(x=0.5,y=0.8,s='Colour Coding Compares to League Average By Pitch\ntjStuff+ calculates the Expected Run Value (xRV) of a pitch regardless of type\ntjStuff+ is normally distributed, where 100 is the mean and Standard Deviation is 10',
507
+ ha='center',va='center',fontname='Calibri',fontsize=16)
508
+ axfooter.axis('off')
509
+ #fig.tight_layout()
510
 
511
+ fig.subplots_adjust(left=0.03, right=0.97, top=0.97, bottom=0.03)
512
 
513
 
514
+ @output
515
+ @render.plot
516
+ @reactive.event(input.go, ignore_none=False)
517
+ def plot_game():
518
+ #fig, ax = plt.subplots(3, 2, figsize=(9, 9))
519
+
520
+ font_properties = {'family': 'calibi', 'size': 12}
521
+ font_properties_titles = {'family': 'calibi', 'size': 20}
522
+ font_properties_axes = {'family': 'calibi', 'size': 16}
523
+
524
+ if len((input.player_id()))<1:
525
+ fig, ax = plt.subplots(1, 1, figsize=(9, 9))
526
+ ax.text(x=0.5,y=0.5,s='Please Select\nA Player',fontsize=150,ha='center')
527
+ ax.grid('off')
528
+ return
529
+
530
+ pitcher_id_select = int(input.player_id())
531
 
532
 
 
 
 
 
533
 
 
 
 
 
 
534
 
535
+ df_plot = df_2024_update[(df_2024_update['pitcher_id']==pitcher_id_select)&(df_2024_update['game_id']==int(input.game_id()))]
536
+ df_plot = df_plot[df_plot['batter_hand'].isin(split_dict_hand[input.split_id()])]
537
 
538
+ if input.count_id_balls()=='greater' and input.count_id_strikes()=='greater' and int(input.ball_id())==0 and int(input.strike_id())==0:
539
+ ball_title = ''
540
+ strike_title = ''
541
+ else:
542
+ if input.count_id_balls()=='exact':
543
+ df_plot = df_plot[df_plot['balls']==int(input.ball_id())]
544
+ ball_title = str(f'{(input.ball_id())} Ball Count; ')
545
+ elif input.count_id_balls()=='greater':
546
+ df_plot = df_plot[df_plot['balls']>=int(input.ball_id())]
547
+ ball_title = str(f'At Least {(input.ball_id())} Ball Count; ')
548
+ elif input.count_id_balls()=='lesser':
549
+ df_plot = df_plot[df_plot['balls']<=int(input.ball_id())]
550
+ ball_title = str(f'At Most {(input.ball_id())} Ball Count; ')
551
 
552
+ if input.count_id_strikes()=='exact':
553
+ df_plot = df_plot[df_plot['strikes']==int(input.strike_id())]
554
+ strike_title = str(f'{(input.strike_id())} Strike Count; ')
555
+ elif input.count_id_strikes()=='greater':
556
+ df_plot = df_plot[df_plot['strikes']>=int(input.strike_id())]
557
+ strike_title = str(f'At Least {(input.strike_id())} Strike Count; ')
558
+ elif input.count_id_strikes()=='lesser':
559
+ df_plot = df_plot[df_plot['strikes']<=int(input.strike_id())]
560
+ strike_title = str(f'At Most {(input.strike_id())} Strike Count; ')
561
+
562
 
563
+
564
 
 
 
 
 
565
 
566
+ if input.split_id() == 'all':
567
+ split_title = ''
568
 
569
+ elif input.split_id() == 'left':
570
+ split_title = 'vs. LHH'
571
+
572
+ elif input.split_id() == 'right':
573
+ split_title = 'vs. RHH'
574
+
575
+ if len(df_plot)<1:
576
+ fig, ax = plt.subplots(1, 1, figsize=(9, 9))
577
+ ax.text(x=0.5,y=0.5,s='Please Select\nOther Parameters',fontsize=150,ha='center')
578
+ ax.grid('off')
579
+ return
580
+
581
+
582
+ df_plot['pitch_type_count'] = df_plot.groupby(['pitcher_id'])['pitch_type'].cumcount()+1
583
+ df_plot['pitch_type_count_each'] = df_plot.groupby(['pitch_type'])['pitch_type'].cumcount()+1
584
+ #df_plot = df_plot.merge(df_2024_update[['tj_stuff_plus','play_id']],left_on=['play_id'],right_on=['play_id'],how='left')
585
+ df_plot = df_plot.sort_values(by=['pitch_description'])
586
+
587
+ # ax0.text(x=0.5,y=0.30,s=f'2024 Spring Training',fontname='Calibri',ha='center',fontsize=30,va='top')
588
+ df_plot['game_opp'] = df_plot['game_date'].astype(str) + ' vs ' + df_plot['batter_team'].astype(str)
589
+ #print(df_plot['game_opp'])
590
+
591
+ #date_dict = pd.concat([df_plot.drop_duplicates(subset=['pitcher_id','game_id','game_opp'])[['game_id','game_opp']]]).set_index('game_id').to_dict()
592
+
593
+ grouped_ivb = psf.group_ivb_update(df=df_plot,agg_list=['pitcher_id','pitcher_name','pitcher_hand','pitch_type','pitch_description'])
594
+ grouped_ivb_all = psf.group_ivb_update(df=df_plot,agg_list=['pitcher_id','pitcher_name','pitcher_hand'])
595
+
596
+
597
+
598
+ from matplotlib.gridspec import GridSpec
599
+ plt.rcParams['font.family'] = 'Calibri'
600
+ df_plot['prop'] = df_plot.groupby("pitch_type")["is_pitch"].transform("sum")
601
+ label_labels = df_plot.sort_values(by=['prop','pitch_type'],ascending=[False,True]).pitch_description.unique()
602
+
603
+ #plt.rcParams["figure.figsize"] = [10,10]
604
+ fig = plt.figure(figsize=(20, 20))
605
+ plt.rcParams.update({'figure.autolayout': True})
606
+ fig.set_facecolor('white')
607
+ sns.set_theme(style="whitegrid", palette=colour_palette)
608
+ print('this is the one plot')
609
+ # gs = GridSpec(7, 2, width_ratios=[1,1], height_ratios=[1.5,1,1,1,1,1,2.5])
610
+ gs = GridSpec(5, 5, height_ratios=[150,75,225,325,50],width_ratios=[1,100,100,100,1])
611
+ #### NO FG
612
+ ####gs = GridSpec(5, 5, height_ratios=[225,0,225,325,50],width_ratios=[1,100,100,100,1])
613
+ #gs = GridSpec(4, 1, width_ratios=[1], height_ratios=[1,0.75,7-len(label_labels)/4,1+len(label_labels)/4])
614
+
615
+ gs.update(hspace=0.2, wspace=0.3)
616
+
617
+ # Add subplots to the grid
618
+ ax0 = fig.add_subplot(gs[0, :])
619
+ ax1_table = fig.add_subplot(gs[1, :])
620
+ ax2_left = fig.add_subplot(gs[2, 1])
621
+ ax2_middle = fig.add_subplot(gs[2, 2])
622
+ ax2_right = fig.add_subplot(gs[2, 3])
623
+ ax3 = fig.add_subplot(gs[-2, :])
624
+ # axfooter = fig.add_subplot(gs[-1, :])
625
+
626
+ ax1_table.axis('off')
627
+
628
+ sns.set_theme(style="whitegrid", palette=colour_palette)
629
+ fig.set_facecolor('white')
630
+
631
+ font_properties = {'family': 'calibi', 'size': 12}
632
+ font_properties_titles = {'family': 'calibi', 'size': 20}
633
+ font_properties_axes = {'family': 'calibi', 'size': 16}
634
+
635
+ print(df_2024_update['game_date'].values[0])
636
+ ## FANGRAPHS TABLE ###
637
+ data_pull = psf.fangraphs_scrape(pitcher_id=pitcher_id_select,
638
+ split=input.split_id(),
639
+ start_date=df_plot['game_date'].values[0],
640
+ end_date=df_plot['game_date'].values[0])
641
+
642
+
643
+ psf.fangraphs_table(data=data_pull,
644
+ stats=['IP','WHIP','ERA','FIP','TBF','K%','BB%','K-BB%'],
645
+ ax=ax1_table)
646
+
647
+ # psf.velocity_kdes(df=df_plot,
648
+ # ax=ax2_loc,
649
+ # gs=gs,
650
+ # fig=fig)
651
+
652
+ # # psf.tj_stuff_roling(df = df_plot,
653
+ # # window = 5,
654
+ # # ax=ax2_velo)
655
+ # psf.location_plot(df=df_plot,ax=ax2_velo,hand='L')
656
+
657
+ # psf.location_plot(df=df_plot,ax=ax2_loc,hand='R')
658
+ # # # ## Break Plot
659
+ # psf.break_plot(df=df_plot,ax=ax2)
660
+ for x,y,z in zip([input.plot_id_1(),input.plot_id_2(),input.plot_id_3()],[ax2_left,ax2_middle,ax2_right],[1,2,3]):
661
+ if x == 'velocity_kde':
662
+ psf.velocity_kdes(df=df_plot,ax=y,gs=gs,gs_list=z,fig=fig)
663
+ if x == 'rolling_tj_stuff':
664
+ psf.tj_stuff_roling(df = df_plot,window = int(input.rolling_window()),ax=y)
665
+ if x == 'break_plot':
666
+ psf.break_plot(df=df_plot,ax=y)
667
+ if x == 'location_lhb':
668
+ psf.location_plot(df=df_plot,ax=y,hand='L')
669
+ if x == 'location_rhb':
670
+ psf.location_plot(df=df_plot,ax=y,hand='R')
671
+
672
+ pitches_list = df_plot['pitch_description'].unique()
673
+ colour_pitches = [pitch_colours[x] for x in pitches_list]
674
+
675
+ # handles, labels = ax2_right.get_legend_handles_labels()
676
 
677
+ # # Manually create handles and labels for each pitch-color pair
678
+ handles = [plt.scatter([], [], color=color, marker='o', s=100) for color in colour_pitches]
679
+ labels = pitches_list
680
 
 
 
 
681
 
682
+ ### FANGRAPHS TABLE ###
683
+ psf.table_summary(df=df_plot.copy(),
684
+ pitcher_id=pitcher_id_select,
685
+ ax=ax3,
686
+ df_group=grouped_ivb.copy(),
687
+ df_group_all=grouped_ivb_all.copy(),
688
+ statcast_pitch_summary=statcast_pitch_summary.copy())
 
 
 
689
 
 
 
 
 
 
 
 
 
 
690
 
691
 
 
 
 
692
 
 
693
 
694
+ # Get value counts of the column and sort in descending order
695
+ sorted_value_counts = df_plot['pitch_description'].value_counts().sort_values(ascending=False)
696
 
697
+ # Get the list of items ordered from most to least frequent
698
+ items_in_order = sorted_value_counts.index.tolist()
699
+ # Create a dictionary to map names to colors
700
+ name_to_color = dict(zip(labels, handles))
701
 
702
+ # Order the colors based on the correct order of names
703
+ ordered_colors = [name_to_color[name] for name in items_in_order]
704
 
705
 
706
+ ax3.legend(ordered_colors, items_in_order, bbox_to_anchor=(0.1, 0.81, 0.8, 0.2), ncol=5,
707
+ fancybox=True,loc='lower center',fontsize=20,framealpha=1.0, markerscale=2,prop={'family': 'calibi', 'size': 20})
 
 
708
 
 
 
 
 
 
709
 
710
+ ################## Title ##########
711
+ title_spot = f'{df_plot.pitcher_name.values[0]}'
712
 
 
713
 
714
+ ax0.text(x=0.5,y=0.8,s=title_spot,fontname='Calibri',ha='center',fontsize=56,va='top')
715
+ ax0.text(x=0.5,y=0.5,s='Game Pitching Summary',fontname='Calibri',ha='center',fontsize=40,va='top',fontstyle='italic')
716
 
717
 
718
+ #ax0.text(x=0.5,y=0.25,s=f'2024 Spring Training',fontname='Calibri',ha='center',fontsize=30,va='top')
719
+ #ax0.text(x=0.5,y=0.25,s=f'{season_fg} MLB Season',fontname='Calibri',ha='center',fontsize=30,va='top')
720
+ #ax0.text(x=0.5,y=0.25,s=f'2024 Spring Training',fontname='Calibri',ha='center',fontsize=30,va='top')
721
+ # ax0.text(x=0.5,y=0.25,s=f'{season_fg} MLB Season',fontname='Calibri',ha='center',fontsize=30,va='top')
722
+
723
+ ax0.text(x=0.5,y=0.25,s= df_plot['game_opp'].values[0],fontname='Calibri',ha='center',fontstyle='italic',fontsize=30,va='top')
724
+
725
+ ax0.text(x=0.5,y=0.05,s=f'{ball_title}{strike_title}{split_title}',fontname='Calibri',ha='center',fontsize=20,va='top')
726
+ ax0.axis('off')
727
 
 
 
728
 
729
+ from matplotlib.offsetbox import (OffsetImage, AnnotationBbox)
730
+ import urllib
731
+ import urllib.request
732
+ import urllib.error
733
+ from urllib.error import HTTPError
734
 
735
+ try:
736
+ url = f'https://img.mlbstatic.com/mlb-photos/image/upload/d_people:generic:headshot:67:current.png/w_213,q_auto:best/v1/people/{pitcher_id_select}/headshot/67/current.png'
737
+ test_mage = plt.imread(url)
738
+ except urllib.error.HTTPError as err:
739
+ url = f'https://img.mlbstatic.com/mlb-photos/image/upload/d_people:generic:headshot:67:current.png/w_213,q_auto:best/v1/people/1/headshot/67/current.png'
740
+ imagebox = OffsetImage(test_mage, zoom = 0.5)
741
+ ab = AnnotationBbox(imagebox, (0.125, 0.4), frameon = False)
742
+ ax0.add_artist(ab)
 
743
 
744
+ player_bio = requests.get(url=f"https://statsapi.mlb.com/api/v1/people?personIds={pitcher_id_select}&hydrate=currentTeam").json()
 
 
 
 
 
745
 
 
 
 
 
 
746
 
747
 
748
 
749
+ if 'currentTeam' in player_bio['people'][0]:
750
+ try:
751
+ url = team_logos[team_logos['id'] == team_logo_dict[player_bio['people'][0]['currentTeam']['id']]]['imageLink'].values[0]
752
 
753
+ im = plt.imread(url)
754
+ # response = requests.get(url)
755
+ # im = Image.open(BytesIO(response.content))
756
+ # im = plt.imread(team_logos[team_logos['id'] == player_bio['people'][0]['currentTeam']['parentOrgId']]['imageLink'].values[0])
757
+ # ax = fig.add_axes([0,0,1,0.85], anchor='C', zorder=1)
758
+ imagebox = OffsetImage(im, zoom = 0.4)
759
+ ab = AnnotationBbox(imagebox, (0.875, 0.40), frameon = False)
760
+ ax0.add_artist(ab)
761
+ except IndexError:
762
+ print()
763
+
764
+ ############ FOOTER ################
765
+ #fig.text(x=0.5,y=0.05,s='Note: Colour Coding Compares to League Average By Pitch',ha='center',fontname='Calibri',fontsize=10)
766
+ axfooter = fig.add_subplot(gs[-1, :])
767
+ axfooter.text(x=0.05,y=1,s='By: Thomas Nestico\n @TJStats',fontname='Calibri',ha='left',fontsize=24,va='top')
768
+ axfooter.text(x=1-0.05,y=1,s='Data: MLB, Fangraphs',ha='right',fontname='Calibri',fontsize=24,va='top')
769
 
770
 
771
+ axfooter.text(x=0.5,y=0.8,s='Colour Coding Compares to League Average By Pitch\ntjStuff+ calculates the Expected Run Value (xRV) of a pitch regardless of type\ntjStuff+ is normally distributed, where 100 is the mean and Standard Deviation is 10',
772
+ ha='center',va='center',fontname='Calibri',fontsize=16)
773
+ axfooter.axis('off')
774
+ #fig.tight_layout()
775
 
776
 
777
+ fig.subplots_adjust(left=0.03, right=0.97, top=0.97, bottom=0.03)
778
 
779
  app = App(app_ui, server)
joblib_model/barrel_model.joblib ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:9428e89f2a408148377efb3cd169dc8790bcc89df9495cb895b9db5a955e8fb7
3
+ size 11447
joblib_model/in_zone.joblib ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:c5300b15a6ccfb1dd1e79c85bd9ea478a1945c454845e6be31cd8815e4063a3e
3
+ size 54459064
joblib_model/model_attack_zone.joblib ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:2671d4db2606cfee299dcffba2a94138fce77c1b7ef6ad14695a972a38dda3c8
3
+ size 50570139
joblib_model/no_swing.joblib ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:c3da3e7ab2b513b87d05e90ae30c788ac819dfcaa7cc1cd9943fc13d2958a00f
3
+ size 279409
joblib_model/swing.joblib ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:4fef4a66363e5f3fdc70ae45c5382bd986c800ff8bf9296a1f9b334461e70fd4
3
+ size 262137
joblib_model/tjloc_model_20240311.joblib ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:4a3c0bf3adc88f904fedc22117f7aa6802926519e1587729107cd2902bace574
3
+ size 121855
joblib_model/tjstuff_model_20240123.joblib ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:f8f5c09c7c37ef8bba7476cd3c864c0060a196aa6c9591850352190eab0458b4
3
+ size 121388
joblib_model/tjstuff_model_20240317.joblib ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:f8f5c09c7c37ef8bba7476cd3c864c0060a196aa6c9591850352190eab0458b4
3
+ size 121388
joblib_model/tjstuff_model_20240318.joblib ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:ef4a03c6d8877e68b4098ff139292c75ee9fab2f58ee8687cd4484182e15c0ca
3
+ size 432529
joblib_model/xwoba_model.joblib ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:05bade9c0420657d3f0dfe35f0b1adbd2d5ae25c87a07bdf6629987f29926438
3
+ size 10684246
pitch_summary_functions.py ADDED
@@ -0,0 +1,1005 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+
2
+ import pandas as pd
3
+ import numpy as np
4
+ import json
5
+ from matplotlib.ticker import FuncFormatter
6
+ from matplotlib.ticker import MaxNLocator
7
+ import math
8
+ from matplotlib.patches import Ellipse
9
+ import matplotlib.transforms as transforms
10
+ import matplotlib.colors
11
+ import matplotlib.colors as mcolors
12
+ import seaborn as sns
13
+ import matplotlib.pyplot as plt
14
+ import requests
15
+
16
+ font_properties = {'family': 'calibi', 'size': 12}
17
+ font_properties_titles = {'family': 'calibi', 'size': 20}
18
+ font_properties_axes = {'family': 'calibi', 'size': 16}
19
+
20
+
21
+ colour_palette = ['#FFB000','#648FFF','#785EF0',
22
+ '#DC267F','#FE6100','#3D1EB2','#894D80','#16AA02','#B5592B','#A3C1ED']
23
+ season_start = '2024-03-20'
24
+ season_end = '2024-09-29'
25
+ season_fg=2024
26
+ chad_fg = requests.get(f'https://www.fangraphs.com/api/leaders/major-league/data?age=&pos=all&stats=pit&lg=all&qual=0&season={season_fg}&season={season_fg}&month=1000&season1={season_fg}&ind=0&pageitems=2000000000&pagenum=1&ind=0&rost=0&players=&type=36&postseason=&sortdir=default&sortstat=sp_pitching').json()
27
+ cmap_sum = matplotlib.colors.LinearSegmentedColormap.from_list("", ['#648FFF','#FFFFFF','#FFB000',])
28
+
29
+
30
+ chadwick_df_small = pd.DataFrame(data={
31
+ 'key_mlbam':[x['xMLBAMID'] for x in chad_fg['data']],
32
+ 'key_fangraphs':[x['playerid'] for x in chad_fg['data']],
33
+ 'Name':[x['PlayerName'] for x in chad_fg['data']],
34
+ })
35
+
36
+ pitcher_dicts = chadwick_df_small.set_index('key_mlbam')['Name'].sort_values().to_dict()
37
+ mlb_fg_dicts = chadwick_df_small.set_index('key_mlbam')['key_fangraphs'].sort_values().to_dict()
38
+
39
+
40
+ ### DF UPDATE CODE ###
41
+ def df_update_code(df):
42
+ print('Starting')
43
+ #df = pd.read_csv('2024_spring_data.csv',index_col=[0])
44
+ print('Starting')
45
+
46
+
47
+ df['vy_f'] = -(df['vy0']**2 - (2 * df['ay'] * (df['y0'] - 17/12)))**0.5
48
+ df['t'] = (df['vy_f'] - df['vy0']) / df['ay']
49
+ df['vz_f'] = (df['vz0']) + (df['az'] * df['t'])
50
+ df['vaa'] = -np.arctan(df['vz_f'] / df['vy_f']) * (180 / np.pi)
51
+
52
+ #df['vy_f'] = -(df['vy0']**2 - (2 * df['ay'] * (df['y0'] - 17/12)))**0.5
53
+ #df['t'] = (df['vy_f'] - df['vy0']) / df['ay']
54
+ df['vx_f'] = (df['vx0']) + (df['ax'] * df['t'])
55
+ df['haa'] = -np.arctan(df['vx_f'] / df['vy_f']) * (180 / np.pi)
56
+
57
+
58
+
59
+ end_codes = ['strikeout', 'field_out', 'single', 'walk', 'hit_by_pitch',
60
+ 'double', 'sac_fly', 'force_out', 'home_run',
61
+ 'grounded_into_double_play', 'fielders_choice', 'field_error',
62
+ 'triple', 'sac_bunt', 'double_play', 'intent_walk',
63
+ 'fielders_choice_out', 'strikeout_double_play',
64
+ 'sac_fly_double_play', 'catcher_interf', 'other_out']
65
+
66
+
67
+
68
+ df['pa'] = df.event_type.isin(end_codes)
69
+ #df['pa'] = 1
70
+ df['k'] = df.event_type.isin(list(filter(None, [x if 'strikeout' in x else '' for x in df.event_type.fillna('None').unique()])))
71
+ df['bb'] = df.event_type.isin(list(filter(None, [x if 'walk' in x else '' for x in df.event_type.fillna('None').unique()])))
72
+ df['k_minus_bb'] = df['k'].astype(np.float32)-df['bb'].astype(np.float32)
73
+
74
+ df = df.drop_duplicates(subset=['play_id'])
75
+ df = df.dropna(subset=['start_speed'])
76
+
77
+
78
+
79
+ swing_codes = ['Swinging Strike', 'In play, no out',
80
+ 'Foul', 'In play, out(s)',
81
+ 'In play, run(s)', 'Swinging Strike (Blocked)',
82
+ 'Foul Bunt','Foul Tip', 'Missed Bunt','Foul Pitchout','Swinging Pitchout']
83
+
84
+ swings_in = ['Swinging Strike', 'In play, no out',
85
+ 'Foul', 'In play, out(s)',
86
+ 'In play, run(s)', 'Swinging Strike (Blocked)',
87
+ 'Foul Bunt','Foul Tip', 'Missed Bunt','Foul Pitchout','Swinging Pitchout']
88
+
89
+ swing_strike_codes = ['Swinging Strike',
90
+ 'Swinging Strike (Blocked)','Missed Bunt','Foul Tip','Swinging Pitchout']
91
+
92
+
93
+ contact_codes = ['In play, no out',
94
+ 'Foul', 'In play, out(s)',
95
+ 'In play, run(s)',
96
+ 'Foul Bunt']
97
+
98
+ codes_in = ['In play, out(s)',
99
+ 'Swinging Strike',
100
+ 'Ball',
101
+ 'Foul',
102
+ 'In play, no out',
103
+ 'Called Strike',
104
+ 'Foul Tip',
105
+ 'In play, run(s)',
106
+ 'Hit By Pitch',
107
+ 'Ball In Dirt',
108
+ 'Pitchout',
109
+ 'Swinging Strike (Blocked)',
110
+ 'Foul Bunt',
111
+ 'Missed Bunt',
112
+ 'Foul Pitchout',
113
+ 'Intent Ball',
114
+ 'Swinging Pitchout']
115
+
116
+ df['in_zone'] = df['zone'] < 10
117
+
118
+
119
+ df = df.drop_duplicates(subset=['play_id'])
120
+
121
+
122
+
123
+ df_codes = df[df.play_description.isin(codes_in)].dropna(subset=['in_zone'])
124
+
125
+ df_codes['bip'] = ~df_codes.launch_speed.isna()
126
+ conditions = [
127
+ (df_codes['launch_speed'].isna()),
128
+ (df_codes['launch_speed']*1.5 - df_codes['launch_angle'] >= 117 ) & (df_codes['launch_speed'] + df_codes['launch_angle'] >= 124) & (df_codes['launch_speed'] > 98) & (df_codes['launch_angle'] >= 8) & (df_codes['launch_angle'] <= 50)
129
+ ]
130
+
131
+ choices = [False,True]
132
+ df_codes['barrel'] = np.select(conditions, choices, default=np.nan)
133
+
134
+ conditions_ss = [
135
+ (df_codes['launch_angle'].isna()),
136
+ (df_codes['launch_angle'] >= 8 ) * (df_codes['launch_angle'] <= 32 )
137
+ ]
138
+
139
+ choices_ss = [False,True]
140
+ df_codes['sweet_spot'] = np.select(conditions_ss, choices_ss, default=np.nan)
141
+ conditions_hh = [
142
+ (df_codes['launch_speed'].isna()),
143
+ (df_codes['launch_speed'] >= 94.5 )
144
+ ]
145
+
146
+ choices_hh = [False,True]
147
+ df_codes['hard_hit'] = np.select(conditions_hh, choices_hh, default=np.nan)
148
+
149
+
150
+ conditions_tb = [
151
+ (df_codes['event_type']=='single'),
152
+ (df_codes['event_type']=='double'),
153
+ (df_codes['event_type']=='triple'),
154
+ (df_codes['event_type']=='home_run'),
155
+ ]
156
+
157
+ choices_tb = [1,2,3,4]
158
+
159
+ df_codes['tb'] = np.select(conditions_tb, choices_tb, default=np.nan)
160
+
161
+ conditions_woba = [
162
+ (df_codes['event_type']=='walk'),
163
+ (df_codes['event_type']=='hit_by_pitch'),
164
+ (df_codes['event_type']=='single'),
165
+ (df_codes['event_type']=='double'),
166
+ (df_codes['event_type']=='triple'),
167
+ (df_codes['event_type']=='home_run'),
168
+ ]
169
+
170
+ choices_woba = [0.705,
171
+ 0.688,
172
+ 0.897,
173
+ 1.233,
174
+ 1.612,
175
+ 2.013]
176
+
177
+ df_codes['woba'] = np.select(conditions_woba, choices_woba, default=np.nan)
178
+
179
+
180
+ woba_codes = ['strikeout', 'field_out', 'single', 'walk', 'hit_by_pitch',
181
+ 'double', 'sac_fly', 'force_out', 'home_run',
182
+ 'grounded_into_double_play', 'fielders_choice', 'field_error',
183
+ 'triple', 'sac_bunt', 'double_play',
184
+ 'fielders_choice_out', 'strikeout_double_play',
185
+ 'sac_fly_double_play', 'other_out']
186
+
187
+
188
+
189
+
190
+
191
+ conditions_woba_code = [
192
+ (df_codes['event_type'].isin(woba_codes))
193
+ ]
194
+
195
+ choices_woba_code = [1]
196
+
197
+ df_codes['woba_codes'] = np.select(conditions_woba_code, choices_woba_code, default=np.nan)
198
+
199
+
200
+ #df_codes['barrel'] = (df_codes.launch_speed >= 98) & (df_codes.launch_angle >= (26 - (-98 + df_codes.launch_speed))) & (df_codes.launch_angle <= 30 + (-98 + df_codes.launch_speed)) & (df_codes.launch_angle >= 8) & (df_codes.launch_angle <= 50)
201
+
202
+
203
+
204
+ #df_codes['barrel'] = (df_codes.launch_speed >= 98) & (df_codes.launch_angle >= (26 - (-98 + df_codes.launch_speed))) & (df_codes.launch_angle <= 30 + (-98 + df_codes.launch_speed)) & (df_codes.launch_angle >= 8) & (df_codes.launch_angle <= 50)
205
+ df_codes['pitches'] = 1
206
+ df_codes['whiffs'] = [1 if ((x == 'S')|(x == 'W')|(x =='T')) else 0 for x in df_codes.play_code]
207
+ df_codes['csw'] = [1 if ((x == 'S')|(x == 'W')|(x =='T')|(x == 'C')) else 0 for x in df_codes.play_code]
208
+ df_codes['swings'] = [1 if x in swings_in else 0 for x in df_codes.play_description]
209
+
210
+ df_codes['out_zone'] = df_codes.in_zone == False
211
+ df_codes['zone_swing'] = (df_codes.in_zone == True)&(df_codes.swings == 1)
212
+ df_codes['zone_contact'] = (df_codes.in_zone == True)&(df_codes.swings == 1)&(df_codes.whiffs == 0)
213
+ df_codes['ozone_swing'] = (df_codes.in_zone==False)&(df_codes.swings == 1)
214
+ df_codes['ozone_contact'] = (df_codes.in_zone==False)&(df_codes.swings == 1)&(df_codes.whiffs == 0)
215
+
216
+ return df_codes
217
+
218
+ ### GET COLOURS##
219
+ def get_color(value,normalize,cmap_sum):
220
+ color = cmap_sum(normalize(value))
221
+ return mcolors.to_hex(color)
222
+
223
+ ### PERCENTILE ###
224
+ def percentile(n):
225
+ def percentile_(x):
226
+ return x.quantile(n)
227
+ percentile_.__name__ = 'percentile_{:02.0f}'.format(n*100)
228
+ return percentile_
229
+
230
+ ### TJ STUFF+ DF CLEAN ###
231
+ def df_clean(df):
232
+ df_copy = df.copy()
233
+ df_copy.loc[df_copy['pitcher_hand'] == 'L','hb'] *= -1
234
+ df_copy.loc[df_copy['pitcher_hand'] == 'L','x0'] *= -1
235
+ df_copy.loc[df_copy['pitcher_hand'] == 'L','spin_direction'] = 360 - df_copy.loc[df_copy['pitcher_hand'] == 'L','spin_direction']
236
+
237
+ df_copy['pitch_l'] = [1 if x == 'L' else 0 for x in df_copy['pitcher_hand']]
238
+ df_copy['bat_l'] = [1 if x == 'L' else 0 for x in df_copy['batter_hand']]
239
+ #df_copy = df_copy[~df_copy.pitch_type.isin(["EP", "PO", "KN", "CS", "SC", "FA"])].reset_index(drop=True)
240
+ #df_copy = df_copy[~df_copy.pitch_type.isin(["EP", "PO", "CS", "SC", "FA"])].reset_index(drop=True)
241
+
242
+ df_copy['pitch_type'] = df_copy['pitch_type'].replace({'FT':'SI',
243
+ #'KC':'CU',
244
+ 'SV':'SL',
245
+ 'FO':'FS'})
246
+
247
+ df_copy_fb_sum = df_copy[df_copy.pitch_type.isin(["FF", "FC", "SI"])].groupby(['pitcher_id']).agg(
248
+ fb_velo = ('start_speed','mean'),
249
+ fb_max_ivb = ('ivb',percentile(0.9)),
250
+ fb_max_x = ('hb',percentile(0.9)),
251
+ fb_min_x = ('hb',percentile(0.1)),
252
+ fb_max_velo = ('start_speed',percentile(0.9)),
253
+ fb_axis = ('spin_direction','mean'),
254
+ )
255
+
256
+ df_copy = df_copy.merge(df_copy_fb_sum,left_on='pitcher_id',right_index=True,how='left')
257
+
258
+ df_copy['fb_velo_diff'] = df_copy['start_speed']- df_copy['fb_velo']
259
+ df_copy['fb_max_ivb_diff'] = df_copy['ivb']- df_copy['fb_max_ivb']
260
+ df_copy['fb_max_hb_diff'] = -abs(df_copy['hb']- df_copy['fb_max_x'])
261
+ df_copy['fb_min_hb_diff'] = df_copy['hb']- df_copy['fb_min_x']
262
+ df_copy['fb_max_velo_diff'] = df_copy['start_speed']- df_copy['fb_max_velo']
263
+ df_copy['fb_axis_diff'] = df_copy['spin_direction']- df_copy['fb_axis']
264
+
265
+ # df_copy.loc[df_copy.pitch_type.isin(["FF", "FC", "SI"]),'fb_velo_diff'] = 0
266
+ # df_copy.loc[df_copy.pitch_type.isin(["FF", "FC", "SI"]),'fb_max_ivb_diff'] = 0
267
+ # df_copy.loc[df_copy.pitch_type.isin(["FF", "FC", "SI"]),'fb_max_hb_diff'] = 0
268
+ # df_copy.loc[df_copy.pitch_type.isin(["FF", "FC", "SI"]),'fb_min_hb_diff'] = 0
269
+ # df_copy.loc[df_copy.pitch_type.isin(["FF", "FC", "SI"]),'fb_max_velo_diff'] = 0
270
+ # df_copy.loc[df_copy.pitch_type.isin(["FF", "FC", "SI"]),'fb_axis_diff'] = 0
271
+
272
+
273
+ df_copy['max_speed'] = df_copy.groupby(['pitcher_id'])['start_speed'].transform('max')
274
+ df_copy['max_speed_diff'] = df_copy['start_speed'] - df_copy['max_speed']
275
+
276
+ df_copy['max_ivb'] = df_copy.groupby(['pitcher_id'])['ivb'].transform('max')
277
+ df_copy['max_ivb_diff'] = df_copy['ivb'] - df_copy['max_ivb']
278
+
279
+ df_copy['vy_f'] = -(df_copy['vy0']**2 - (2 * df_copy['ay'] * (df_copy['y0'] - 17/12)))**0.5
280
+ df_copy['t'] = (df_copy['vy_f'] - df_copy['vy0']) / df_copy['ay']
281
+ df_copy['vz_f'] = (df_copy['vz0']) + (df_copy['az'] * df_copy['t'])
282
+ df_copy['vaa'] = -np.arctan(df_copy['vz_f'] / df_copy['vy_f']) * (180 / np.pi)
283
+
284
+ #df_copy['vy_f'] = -(df_copy['vy0']**2 - (2 * df_copy['ay'] * (df_copy['y0'] - 17/12)))**0.5
285
+ #df_copy['t'] = (df_copy['vy_f'] - df_copy['vy0']) / df_copy['ay']
286
+ df_copy['vx_f'] = (df_copy['vx0']) + (df_copy['ax'] * df_copy['t'])
287
+ df_copy['haa'] = -np.arctan(df_copy['vx_f'] / df_copy['vy_f']) * (180 / np.pi)
288
+
289
+ # df_copy['x_diff'] = df_copy['x0'] - df_copy['px']
290
+ # df_copy['z_diff'] = df_copy['z0'] - df_copy['pz']
291
+
292
+ # df_copy['vaa'] = np.arctan(df_copy['z_diff'] / df_copy['release_pos_y']) * 360 / np.pi
293
+ # df_copy['haa'] = np.arctan(-df_copy['x_diff'] / df_copy['release_pos_y']) * 360 / np.pi
294
+
295
+ df_copy = df_copy.dropna(subset=['pitch_type']).fillna(0)
296
+ return df_copy
297
+
298
+ ### PITCH COLOURS ###
299
+ pitch_colours = {
300
+ 'Four-Seam Fastball':'#FF007D',#BC136F
301
+ 'Sinker':'#98165D',#DC267F
302
+ 'Cutter':'#BE5FA0',
303
+
304
+ 'Changeup':'#F79E70',#F75233
305
+ 'Splitter':'#FE6100',#F75233
306
+ 'Screwball':'#F08223',
307
+ 'Forkball':'#FFB000',
308
+
309
+ 'Slider':'#67E18D',#1BB999#785EF0
310
+ 'Sweeper':'#1BB999',#37CD85#904039
311
+ 'Slurve':'#376748',#785EF0#549C07#BEABD8
312
+
313
+ 'Knuckle Curve':'#311D8B',
314
+ 'Curveball':'#3025CE',
315
+ 'Slow Curve':'#274BFC',
316
+ 'Eephus':'#648FFF',
317
+
318
+ 'Knuckleball':'#867A08',
319
+
320
+ 'Pitch Out':'#472C30',
321
+ 'Other':'#9C8975',
322
+ }
323
+
324
+ ### PITCH ELLIPSE ###
325
+ def confidence_ellipse(x, y, ax, n_std=3.0, facecolor='none', **kwargs):
326
+ """
327
+ Create a plot of the covariance confidence ellipse of *x* and *y*.
328
+
329
+ Parameters
330
+ ----------
331
+ x, y : array-like, shape (n, )
332
+ Input data.
333
+
334
+ ax : matplotlib.axes.Axes
335
+ The axes object to draw the ellipse into.
336
+
337
+ n_std : float
338
+ The number of standard deviations to determine the ellipse's radiuses.
339
+
340
+ **kwargs
341
+ Forwarded to `~matplotlib.patches.Ellipse`
342
+
343
+ Returns
344
+ -------
345
+ matplotlib.patches.Ellipse
346
+ """
347
+
348
+ if x.size != y.size:
349
+ raise ValueError("x and y must be the same size")
350
+ try:
351
+ cov = np.cov(x, y)
352
+ pearson = cov[0, 1]/np.sqrt(cov[0, 0] * cov[1, 1])
353
+ # Using a special case to obtain the eigenvalues of this
354
+ # two-dimensional dataset.
355
+ ell_radius_x = np.sqrt(1 + pearson)
356
+ ell_radius_y = np.sqrt(1 - pearson)
357
+ ellipse = Ellipse((0, 0), width=ell_radius_x * 2, height=ell_radius_y * 2,
358
+ facecolor=facecolor,linewidth=2,linestyle='--', **kwargs)
359
+
360
+
361
+ # Calculating the standard deviation of x from
362
+ # the squareroot of the variance and multiplying
363
+ # with the given number of standard deviations.
364
+ scale_x = np.sqrt(cov[0, 0]) * n_std
365
+ mean_x = np.mean(x)
366
+
367
+
368
+ # calculating the standard deviation of y ...
369
+ scale_y = np.sqrt(cov[1, 1]) * n_std
370
+ mean_y = np.mean(y)
371
+
372
+
373
+ transf = transforms.Affine2D() \
374
+ .rotate_deg(45) \
375
+ .scale(scale_x, scale_y) \
376
+ .translate(mean_x, mean_y)
377
+
378
+
379
+
380
+ ellipse.set_transform(transf + ax.transData)
381
+ except ValueError:
382
+ return
383
+
384
+ return ax.add_patch(ellipse)
385
+
386
+ # DEFINE STRIKE ZONE
387
+ strike_zone = pd.DataFrame({
388
+ 'PlateLocSide': [-0.9, -0.9, 0.9, 0.9, -0.9],
389
+ 'PlateLocHeight': [1.5, 3.5, 3.5, 1.5, 1.5]
390
+ })
391
+
392
+ ### STRIKE ZONE ###
393
+ def draw_line(axis,alpha_spot=1,catcher_p = True):
394
+
395
+ axis.plot(strike_zone['PlateLocSide'], strike_zone['PlateLocHeight'], color='black', linewidth=1.3,zorder=3,alpha=alpha_spot,)
396
+
397
+ # ax.plot([-0.2833333, -0.2833333], [1.6, 3.5], color='black', linestyle='dashed',alpha=alpha_spot,zorder=3)
398
+ # ax.plot([0.2833333, 0.2833333], [1.6, 3.5], color='black', linestyle='dashed',alpha=alpha_spot,zorder=3)
399
+ # ax.plot([-0.85, 0.85], [2.2, 2.2], color='black', linestyle='dashed',alpha=alpha_spot,zorder=3)
400
+ # ax.plot([-0.85, 0.85], [2.9, 2.9], color='black', linestyle='dashed',alpha=alpha_spot,zorder=3)
401
+ if catcher_p:
402
+ # Add dashed line
403
+ # Add home plate
404
+ axis.plot([-0.708, 0.708], [0.15, 0.15], color='black', linewidth=1,alpha=alpha_spot,zorder=1)
405
+ axis.plot([-0.708, -0.708], [0.15, 0.3], color='black', linewidth=1,alpha=alpha_spot,zorder=1)
406
+ axis.plot([-0.708, 0], [0.3, 0.5], color='black', linewidth=1,alpha=alpha_spot,zorder=1)
407
+ axis.plot([0, 0.708], [0.5, 0.3], color='black', linewidth=1,alpha=alpha_spot,zorder=1)
408
+ axis.plot([0.708, 0.708], [0.3, 0.15], color='black', linewidth=1,alpha=alpha_spot,zorder=1)
409
+ else:
410
+ axis.plot([-0.708, 0.708], [0.4, 0.4], color='black', linewidth=1,alpha=alpha_spot,zorder=1)
411
+ axis.plot([-0.708, -0.9], [0.4, -0.1], color='black', linewidth=1,alpha=alpha_spot,zorder=1)
412
+ axis.plot([-0.9, 0], [-0.1, -0.35], color='black', linewidth=1,alpha=alpha_spot,zorder=1)
413
+ axis.plot([0, 0.9], [-.35, -0.1], color='black', linewidth=1,alpha=alpha_spot,zorder=1)
414
+ axis.plot([0.9, 0.708], [-0.1,0.4], color='black', linewidth=1,alpha=alpha_spot,zorder=1)
415
+
416
+
417
+
418
+ ### FANGRAPHS STATS DICT ###
419
+ fangraphs_stats_dict = {'IP':{'table_header':'$\\bf{IP}$','format':'.1f',} ,
420
+ 'TBF':{'table_header':'$\\bf{PA}$','format':'.0f',} ,
421
+ 'AVG':{'table_header':'$\\bf{AVG}$','format':'.3f',} ,
422
+ 'K/9':{'table_header':'$\\bf{K\/9}$','format':'.2f',} ,
423
+ 'BB/9':{'table_header':'$\\bf{BB\/9}$','format':'.2f',} ,
424
+ 'K/BB':{'table_header':'$\\bf{K\/BB}$','format':'.2f',} ,
425
+ 'HR/9':{'table_header':'$\\bf{HR\/9}$','format':'.2f',} ,
426
+ 'K%':{'table_header':'$\\bf{K\%}$','format':'.1%',} ,
427
+ 'BB%':{'table_header':'$\\bf{BB\%}$','format':'.1%',} ,
428
+ 'K-BB%':{'table_header':'$\\bf{K-BB\%}$','format':'.1%',} ,
429
+ 'WHIP':{'table_header':'$\\bf{WHIP}$','format':'.2f',} ,
430
+ 'BABIP':{'table_header':'$\\bf{BABIP}$','format':'.3f',} ,
431
+ 'LOB%':{'table_header':'$\\bf{LOB\%}$','format':'.1%',} ,
432
+ 'xFIP':{'table_header':'$\\bf{xFIP}$','format':'.2f',} ,
433
+ 'FIP':{'table_header':'$\\bf{FIP}$','format':'.2f',} ,
434
+ 'H':{'table_header':'$\\bf{H}$','format':'.0f',} ,
435
+ '2B':{'table_header':'$\\bf{2B}$','format':'.0f',} ,
436
+ '3B':{'table_header':'$\\bf{3B}$','format':'.0f',} ,
437
+ 'R':{'table_header':'$\\bf{R}$','format':'.0f',} ,
438
+ 'ER':{'table_header':'$\\bf{ER}$','format':'.0f',} ,
439
+ 'HR':{'table_header':'$\\bf{HR}$','format':'.0f',} ,
440
+ 'BB':{'table_header':'$\\bf{BB}$','format':'.0f',} ,
441
+ 'IBB':{'table_header':'$\\bf{IBB}$','format':'.0f',} ,
442
+ 'HBP':{'table_header':'$\\bf{HBP}$','format':'.0f',} ,
443
+ 'SO':{'table_header':'$\\bf{SO}$','format':'.0f',} ,
444
+ 'OBP':{'table_header':'$\\bf{OBP}$','format':'.0f',} ,
445
+ 'SLG':{'table_header':'$\\bf{SLG}$','format':'.0f',} ,
446
+ 'ERA':{'table_header':'$\\bf{ERA}$','format':'.2f',} ,
447
+ 'wOBA':{'table_header':'$\\bf{wOBA}$','format':'.3f',} ,
448
+ 'G':{'table_header':'$\\bf{G}$','format':'.0f',} }
449
+
450
+
451
+ ## Fangraphs Table
452
+
453
+ ### FANGRAPHS SPLITS SCRAPE ###
454
+ split_dict = {'all':[],
455
+ 'left':['5'],
456
+ 'right':['6']
457
+ }
458
+
459
+ def fangraphs_scrape(pitcher_id=808967,
460
+ split='all',
461
+ start_date='2024-03-20',
462
+ end_date='2024-09-29'):
463
+
464
+
465
+ url = "https://www.fangraphs.com/api/leaders/splits/splits-leaders"
466
+
467
+ payload = {
468
+ "strPlayerId": str(mlb_fg_dicts[pitcher_id]),
469
+ "strSplitArr": split_dict[split],
470
+ "strGroup": "season",
471
+ "strPosition": "P",
472
+ "strType": "2",
473
+ "strStartDate": str(pd.to_datetime(start_date).strftime('%Y-%m-%d')),
474
+ "strEndDate": str(pd.to_datetime(end_date).strftime('%Y-%m-%d')),
475
+ "strSplitTeams": False,
476
+ "dctFilters": [],
477
+ "strStatType": "player",
478
+ "strAutoPt": False,
479
+ "arrPlayerId": [],
480
+ "strSplitArrPitch": [],
481
+ "arrWxTemperature": None,
482
+ "arrWxPressure": None,
483
+ "arrWxAirDensity": None,
484
+ "arrWxElevation": None,
485
+ "arrWxWindSpeed": None
486
+ }
487
+ json_payload = json.dumps(payload)
488
+ headers = {'Content-Type': 'application/json'}
489
+ response = requests.post(url, data=json_payload, headers=headers)
490
+ data_pull = response.json()['data'][0]
491
+
492
+ payload_advanced = {
493
+ "strPlayerId": str(mlb_fg_dicts[pitcher_id]),
494
+ "strSplitArr": split_dict[split],
495
+ "strGroup": "season",
496
+ "strPosition": "P",
497
+ "strType": "1",
498
+ "strStartDate": str(pd.to_datetime(start_date).strftime('%Y-%m-%d')),
499
+ "strEndDate": str(pd.to_datetime(end_date).strftime('%Y-%m-%d')),
500
+ "strSplitTeams": False,
501
+ "dctFilters": [],
502
+ "strStatType": "player",
503
+ "strAutoPt": False,
504
+ "arrPlayerId": [],
505
+ "strSplitArrPitch": [],
506
+ "arrWxTemperature": None,
507
+ "arrWxPressure": None,
508
+ "arrWxAirDensity": None,
509
+ "arrWxElevation": None,
510
+ "arrWxWindSpeed": None
511
+ }
512
+
513
+ json_payload_advanced = json.dumps(payload_advanced)
514
+ headers = {'Content-Type': 'application/json'}
515
+ response_advanced = requests.post(url, data=json_payload_advanced, headers=headers)
516
+ data_pull_advanced = response_advanced.json()['data'][0]
517
+
518
+ data_pull.update(data_pull_advanced)
519
+
520
+ return data_pull
521
+
522
+
523
+ ### FANGRAPHS TABLE PLOT ###
524
+ def fangraphs_table(data,
525
+ stats,
526
+ ax):
527
+
528
+
529
+ fg_values = [data[x] if x in data else '---' for x in stats]
530
+ df_fg = pd.DataFrame(data=dict(zip(stats,fg_values)),index=[0])
531
+
532
+ df_fg.loc[0] = [format(df_fg[x][0],fangraphs_stats_dict[x]['format']) if df_fg[x][0] != '---' else '---' for x in df_fg]
533
+ table_fg = ax.table(cellText=df_fg.values, colLabels=df_fg.columns, cellLoc='center',
534
+ bbox=[0.04, 0.2, 0.92, 0.8])
535
+
536
+ min_font_size = 20
537
+ table_fg.set_fontsize(min_font_size)
538
+
539
+
540
+ new_column_names = [fangraphs_stats_dict[x]['table_header'] if x in data else '---' for x in stats]
541
+ # #new_column_names = ['Pitch Name', 'Pitch%', 'Velocity', 'Spin Rate','Exit Velocity', 'Whiff%', 'CSW%']
542
+ for i, col_name in enumerate(new_column_names):
543
+ table_fg.get_celld()[(0, i)].get_text().set_text(col_name)
544
+
545
+ ax.axis('off')
546
+
547
+
548
+ return table_fg
549
+
550
+ ### VELOCITY KDES ###
551
+ def velocity_kdes(df,
552
+ ax,
553
+ gs,
554
+ gs_list,
555
+ fig):
556
+
557
+ sorted_value_counts = df['pitch_type'].value_counts().sort_values(ascending=False)
558
+
559
+ # Get the list of items ordered from most to least frequent
560
+ items_in_order = sorted_value_counts.index.tolist()
561
+
562
+ # Create the inner subplot inside the outer subplot
563
+ import matplotlib.gridspec as gridspec
564
+ ax.axis ('off')
565
+ #ax.set_ylabel('Pitch Velocity Distribution', fontdict=font_properties_axes)
566
+ ax.set_title('Pitch Velocity Distribution', fontdict={'family': 'calibi', 'size': 20})
567
+
568
+ inner_grid_1 = gridspec.GridSpecFromSubplotSpec(len(items_in_order),1, subplot_spec=gs[2,gs_list])
569
+ ax_top = []
570
+ for inner in inner_grid_1:
571
+ ax_top.append(fig.add_subplot(inner))
572
+
573
+
574
+ ax_number = 0
575
+
576
+ for i in items_in_order[0:]:
577
+ if np.unique(df[df['pitch_type']==i]['start_speed']).size == 1: # Check if all values are the same
578
+ print('just')
579
+ ax_top[ax_number].plot([np.unique(df[df['pitch_type']==i]['start_speed']),np.unique(df[df['pitch_type']==i]['start_speed'])],[0,1], linewidth=4,
580
+ color=pitch_colours[df[df['pitch_type']==i]['pitch_description'].values[0]],zorder=20)
581
+ # ax_top[ax_number].plot(np.unique(df_melt[df_melt['Player']==i]['value']), [0.5]*len(np.unique(df_melt[df_melt['Player']==i]['value'])), linewidth=4)
582
+ else:
583
+ sns.kdeplot(df[df['pitch_type']==i]['start_speed'],ax=ax_top[ax_number],fill=True,
584
+ clip=(df[df['pitch_type']==i]['start_speed'].min(),df[df['pitch_type']==i]['start_speed'].max()),
585
+ color=pitch_colours[df[df['pitch_type']==i]['pitch_description'].values[0]])
586
+ ax_top[ax_number].set_xlim(math.floor(df['start_speed'].min()/5)*5,math.ceil(df['start_speed'].max()/5)*5)
587
+ ax_top[ax_number].set_xlabel('')
588
+ ax_top[ax_number].set_ylabel('')
589
+ if ax_number < len(items_in_order)-1:
590
+ ax_top[ax_number].spines['top'].set_visible(False)
591
+ ax_top[ax_number].spines['right'].set_visible(False)
592
+ ax_top[ax_number].spines['left'].set_visible(False)
593
+ ax_top[ax_number].tick_params(axis='x', colors='none')
594
+
595
+
596
+ ax_top[ax_number].set_xticks(range(math.floor(df['start_speed'].min()/5)*5,math.ceil(df['start_speed'].max()/5)*5,5))
597
+ ax_top[ax_number].set_yticks([])
598
+ ax_top[ax_number].grid(axis='x', linestyle='--')
599
+ ax_top[ax_number].text(-0.01, 0.5, i, transform=ax_top[ax_number].transAxes,
600
+ fontsize=14, va='center', ha='right')
601
+ ax_number = ax_number + 1
602
+ ax_top[-1].spines['top'].set_visible(False)
603
+ ax_top[-1].spines['right'].set_visible(False)
604
+ ax_top[-1].spines['left'].set_visible(False)
605
+
606
+
607
+ ax_top[-1].set_xticks(list(range(math.floor(df['start_speed'].min()/5)*5,math.ceil(df['start_speed'].max()/5)*5,5)))
608
+ ax_top[-1].set_xlabel('Velocity (mph)')
609
+
610
+ ### TJ STUFF+ ROLLING ###
611
+ def tj_stuff_roling(df,
612
+ window,
613
+ ax):
614
+ ## Velocity Plot
615
+ sorted_value_counts = df['pitch_type'].value_counts().sort_values(ascending=False)
616
+
617
+ # Get the list of items ordered from most to least frequent
618
+ items_in_order = sorted_value_counts.index.tolist()
619
+
620
+
621
+ for i in items_in_order:
622
+ if max(df[df['pitch_type']==i]['pitch_type_count_each']) >= window:
623
+ sns.lineplot(x=range(1,max(df[df['pitch_type']==i]['pitch_type_count_each'])+1),
624
+ y=df[df['pitch_type']==i]['tj_stuff_plus'].rolling(window).sum()/window,
625
+ color=pitch_colours[df[df['pitch_type']==i]['pitch_description'].values[0]],
626
+ ax=ax,linewidth=3)
627
+
628
+ # Adjust x-axis limits to start from 1
629
+ ax.set_xlim(window,max(df['pitch_type_count_each']))
630
+ ax.set_ylim(70,130)
631
+ #ax.get_legend().remove()
632
+ ax.set_xlabel('Pitches', fontdict=font_properties_axes)
633
+ ax.set_ylabel('tjStuff+', fontdict=font_properties_axes)
634
+ ax.set_title(f"{window} Pitch Rolling tjStuff+",fontdict=font_properties_titles)
635
+ # ax.axis('square')
636
+ # ax.set_xlim(left=1)
637
+ ax.xaxis.set_major_locator(MaxNLocator(integer=True))
638
+
639
+ ### BREAK PLOT ###
640
+ def break_plot(df,
641
+ ax):
642
+
643
+ label_labels = df.sort_values(by=['prop','pitch_type'],ascending=[False,True]).pitch_description.unique()
644
+ j = 0
645
+ for label in label_labels:
646
+ subset = df[df['pitch_description'] == label]
647
+ print(label)
648
+ if len(subset) > 4:
649
+ if df['pitcher_hand'].values[0] == 'R':
650
+ subset['hb'] = subset['hb']*1
651
+ if df['pitcher_hand'].values[0] == 'L':
652
+ subset['hb'] = subset['hb']*1
653
+ subset['ivb'] = subset['ivb']*1
654
+
655
+ try:
656
+ confidence_ellipse(subset['hb'], subset['ivb'], ax=ax,edgecolor = pitch_colours[label],n_std=2,facecolor= pitch_colours[label],alpha=0.2)
657
+ except ValueError:
658
+ return
659
+ j=j+1
660
+ else:
661
+ j=j+1
662
+
663
+ if df['pitcher_hand'].values[0] == 'R':
664
+ sns.scatterplot(ax=ax,x=df.hb*1,y=df.ivb*1,hue=df.pitch_description,palette=pitch_colours,ec='black',alpha=1,zorder=2)
665
+ if df['pitcher_hand'].values[0] == 'L':
666
+ sns.scatterplot(ax=ax,x=df.hb*1,y=df.ivb*1,hue=df.pitch_description,palette=pitch_colours,ec='black',alpha=1,zorder=2)
667
+
668
+ ax.set_xlim((-25,25))
669
+ ax.set_ylim((-25,25))
670
+
671
+ ax.hlines(y=0,xmin=-50,xmax=50,color=colour_palette[8],alpha=0.5,linestyles='--',zorder=1)
672
+ ax.vlines(x=0,ymin=-50,ymax=50,color=colour_palette[8],alpha=0.5,linestyles='--',zorder=1)
673
+ ax.set_xlabel('Horizontal Break (in)', fontdict=font_properties_axes)
674
+ ax.set_ylabel('Induced Vertical Break (in)', fontdict=font_properties_axes)
675
+ ax.set_title("Pitch Breaks",fontdict=font_properties_titles)
676
+
677
+
678
+ ax.get_legend().remove()
679
+
680
+
681
+ # ax1.set_xticklabels(ax1.get_xticks(), fontdict=font_properties)
682
+ ax.set_xticklabels(ax.get_xticks(), fontdict=font_properties)
683
+
684
+ # ax1.set_yticklabels(ax1.get_yticks(), fontdict=font_properties)
685
+ ax.set_yticklabels(ax.get_yticks(), fontdict=font_properties)
686
+
687
+
688
+
689
+ #ax1.set_aspect('equal', adjustable='box')
690
+ if df['pitcher_hand'].values[0] == 'R':
691
+ ax.text(-24.5,-24.5,s='← Glove Side',fontstyle='italic',ha='left',va='bottom',
692
+ bbox=dict(facecolor='white', edgecolor='black'),fontsize=12,zorder=3)
693
+ ax.text(24.5,-24.5,s='Arm Side →',fontstyle='italic',ha='right',va='bottom',
694
+ bbox=dict(facecolor='white', edgecolor='black'),fontsize=12,zorder=3)
695
+ #ax.invert_xaxis()
696
+ if df['pitcher_hand'].values[0] == 'L':
697
+ ax.invert_xaxis()
698
+ ax.text(24.5,-24.5,s='← Arm Side',fontstyle='italic',ha='left',va='bottom',
699
+ bbox=dict(facecolor='white', edgecolor='black'),fontsize=12,zorder=3)
700
+ ax.text(-24.5,-24.5,s='Glove Side →',fontstyle='italic',ha='right',va='bottom',
701
+ bbox=dict(facecolor='white', edgecolor='black'),fontsize=12,zorder=3)
702
+ ax.set_aspect('equal', adjustable='box')
703
+ #ax1.yaxis.set_major_formatter(FuncFormatter(lambda x, _: int(x)))
704
+ ax.xaxis.set_major_formatter(FuncFormatter(lambda x, _: int(x)))
705
+ ax.yaxis.set_major_formatter(FuncFormatter(lambda x, _: int(x)))
706
+
707
+ ### TABLE SUMMARY ###
708
+ def table_summary(df,
709
+ pitcher_id,
710
+ ax,
711
+ df_group,
712
+ df_group_all,
713
+ statcast_pitch_summary):
714
+ cmap_sum = matplotlib.colors.LinearSegmentedColormap.from_list("", ['#648FFF','#FFFFFF','#FFB000',])
715
+
716
+ ax.axis('off')
717
+ df_group['spin_direction_adj'] = [(x + 180) for x in df_group['spin_direction']]
718
+ #(((df_group.groupby('pitch_description').mean()[['spin_direction_adj']] %360 % 30 / 30 /100 *60).round(2) *10).round(0)//1.5/4 )
719
+ clock_time = ((df_group.groupby('pitch_description').mean()['spin_direction_adj']) %360 // 30 )+ (((df_group.groupby('pitch_description').mean()['spin_direction_adj'] %360 % 30 / 30 /100 *60).round(2) *10).round(0)//1.5/4 )
720
+ # print('Clocks')
721
+ # print(clock_time)
722
+ clock_time = (clock_time.astype(int) + clock_time%1*60/100).round(2).astype(str).str.replace('.',':').str.replace(':0',':00').str.replace(':3',':30').str.replace('0:','12:').str.replace('112:','10:').to_frame()
723
+ df_group = df_group.merge(right=clock_time,left_on='pitch_description',right_index=True,suffixes=['','_clock'])
724
+
725
+
726
+ plot_table = df_group[df_group['pitcher_id']==pitcher_id].sort_values(
727
+ by=['pitches'],ascending=False)[['pitch_description','pitches','start_speed','ivb',
728
+ 'hb', 'spin_rate','vaa', 'haa', 'vertical_release','horizontal_release',
729
+ 'extension','tj_stuff_plus','spin_direction_adj_clock','zone_percent','chase_percent','whiff_rate']]
730
+
731
+ # if df['pitcher_hand'].values[0] == 'L':
732
+ # plot_table['hb'] = plot_table['hb']*-1
733
+
734
+ #if df['pitcher_hand'].values[0] == 'R':
735
+ plot_table['horizontal_release'] = plot_table['horizontal_release']*-1
736
+
737
+ plot_table['pitch_percent'] = plot_table['pitches'] / plot_table['pitches'].sum()
738
+
739
+ plot_table = plot_table[['pitch_description','pitches','pitch_percent','start_speed','ivb',
740
+ 'hb', 'spin_rate','vaa', 'haa', 'vertical_release','horizontal_release',
741
+ 'extension','spin_direction_adj_clock','tj_stuff_plus','zone_percent','chase_percent','whiff_rate']]
742
+
743
+ plot_table_all = pd.DataFrame(data={'pitch_description': 'All',
744
+ 'pitches': plot_table['pitches'].sum(),
745
+ 'pitch_percent': 1.0,
746
+ 'start_speed': '—',
747
+ 'ivb': '—',
748
+ 'hb': '—',
749
+ 'spin_rate': '—',
750
+ 'vaa': '—',
751
+ 'haa': '—',
752
+ 'vertical_release': '—',
753
+ 'horizontal_release': '—',
754
+ 'extension': df['extension'].mean(),
755
+ 'spin_direction_adj_clock': '—',
756
+ 'tj_stuff_plus': df[df['pitcher_id']==pitcher_id]['tj_stuff_plus'].mean(),
757
+ 'zone_percent': df_group_all[df_group_all['pitcher_id']==pitcher_id]['zone_percent'].values[0],
758
+ 'chase_percent': df_group_all[df_group_all['pitcher_id']==pitcher_id]['chase_percent'].values[0],
759
+ 'whiff_rate': df_group_all[df_group_all['pitcher_id']==pitcher_id]['whiff_rate'].values[0],
760
+
761
+
762
+ },index=[0]
763
+ )
764
+
765
+ plot_table = pd.concat([plot_table,plot_table_all]).fillna('—')
766
+
767
+
768
+
769
+ plt.rcParams['font.family'] = 'Calibri'
770
+ table = ax.table(cellText=plot_table.values, colLabels=plot_table.columns, cellLoc='center',
771
+ colWidths=[2.3,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1], bbox=[0.04, 0, 0.92, 0.8])
772
+
773
+ min_font_size = 14
774
+ # Set table properties
775
+ table.auto_set_font_size(False)
776
+ #table.set_fontsize(min(min_font_size,max(min_font_size/((len(label_labels)/4)),10)))
777
+ table.set_fontsize(min_font_size)
778
+ table.scale(1, 0.5)
779
+
780
+ min_font_size = 20
781
+ # Set font size for values
782
+ # Adjust the font size as needed
783
+ for i in range(len(plot_table)+1):
784
+ for j in range(len(plot_table.columns)):
785
+ if i > 0: # Skip the header row
786
+ cell = table.get_celld()[i, j]
787
+ cell.set_fontsize(min_font_size)
788
+
789
+
790
+ for i in range(len(plot_table)):
791
+
792
+ if table.get_celld()[(i+1, 0)].get_text().get_text() != 'All':
793
+ table.get_celld()[(i+1, 0)].set_facecolor(pitch_colours[table.get_celld()[(i+1, 0)].get_text().get_text()]) # Header cell color
794
+ if table.get_celld()[(i+1, 0)].get_text().get_text() in ['Split-Finger','Slider','Changeup']:
795
+ table.get_celld()[(i+1, 0)].set_text_props(color='#000000',fontweight='bold')
796
+ else:
797
+ table.get_celld()[(i+1, 0)].set_text_props(color='#ffffff',fontweight='bold')
798
+ if table.get_celld()[(i+1, 0)].get_text().get_text() == 'Four-Seam Fastball':
799
+ table.get_celld()[(i+1, 0)].get_text().set_text('4-Seam Fastball')
800
+
801
+ select_df = statcast_pitch_summary[statcast_pitch_summary['pitch_description'] == plot_table['pitch_description'].values[i]]
802
+
803
+ normalize = mcolors.Normalize(vmin=select_df['start_speed'].mean()-select_df.pitch_velocity_std.mean(),
804
+ vmax=select_df['start_speed'].mean()+select_df.pitch_velocity_std.mean()) # Define the range of values
805
+
806
+ if table.get_celld()[(i+1, 3)].get_text().get_text() != '—':
807
+ table.get_celld()[(i+1, 3)].set_facecolor(get_color(float(table.get_celld()[(i+1, 3)].get_text().get_text()),normalize,cmap_sum)) # Header cell color
808
+
809
+
810
+ cmap_sum = matplotlib.colors.LinearSegmentedColormap.from_list("", ['#648FFF','#FFFFFF','#FFB000',])
811
+ normalize = mcolors.Normalize(vmin=select_df['extension'].mean()*0.9, vmax=select_df['extension'].mean()*1.1)
812
+ if table.get_celld()[(i+1,11)].get_text().get_text() != '—':
813
+ table.get_celld()[(i+1,11)].set_facecolor(get_color(float(table.get_celld()[(i+1, 11)].get_text().get_text()),normalize,cmap_sum)) # Header cell color
814
+
815
+ cmap_sum = matplotlib.colors.LinearSegmentedColormap.from_list("", ['#648FFF','#FFFFFF','#FFB000',])
816
+ normalize = mcolors.Normalize(vmin=80, vmax=120)
817
+ print(normalize)
818
+ if table.get_celld()[(i+1,13)].get_text().get_text() != '—':
819
+
820
+ table.get_celld()[(i+1,13)].set_facecolor(get_color(float(table.get_celld()[(i+1, 13)].get_text().get_text()),normalize,cmap_sum)) # Header cell color
821
+
822
+ cmap_sum = matplotlib.colors.LinearSegmentedColormap.from_list("", ['#648FFF','#FFFFFF','#FFB000',])
823
+ normalize = mcolors.Normalize(vmin=select_df['zone_percent'].mean()*0.7, vmax=select_df['zone_percent'].mean()*1.3)
824
+ if table.get_celld()[(i+1,14)].get_text().get_text() != '—':
825
+ table.get_celld()[(i+1,14)].set_facecolor(get_color(float(table.get_celld()[(i+1, 14)].get_text().get_text().strip('%')),normalize,cmap_sum)) # Header cell color
826
+
827
+ cmap_sum = matplotlib.colors.LinearSegmentedColormap.from_list("", ['#648FFF','#FFFFFF','#FFB000',])
828
+ normalize = mcolors.Normalize(vmin=select_df['chase_percent'].mean()*0.7, vmax=select_df['chase_percent'].mean()*1.3)
829
+ if table.get_celld()[(i+1,15)].get_text().get_text() != '—':
830
+ table.get_celld()[(i+1,15)].set_facecolor(get_color(float(table.get_celld()[(i+1, 15)].get_text().get_text().strip('%')),normalize,cmap_sum)) # Header cell color
831
+
832
+
833
+ cmap_sum = matplotlib.colors.LinearSegmentedColormap.from_list("", ['#648FFF','#FFFFFF','#FFB000',])
834
+ normalize = mcolors.Normalize(vmin=select_df['whiff_rate'].mean()*0.7, vmax=select_df['whiff_rate'].mean()*1.3)
835
+ if table.get_celld()[(i+1,16)].get_text().get_text() != '—':
836
+ table.get_celld()[(i+1,16)].set_facecolor(get_color(float(table.get_celld()[(i+1, 16)].get_text().get_text().strip('%')),normalize,cmap_sum)) # Header cell color
837
+
838
+ table.get_celld()[(len(plot_table), 0)].set_text_props(color='#000000',fontweight='bold')
839
+
840
+
841
+ new_column_names = ['$\\bf{Pitch\ Name}$',
842
+ '$\\bf{Count}$',
843
+ '$\\bf{Pitch\%}$',
844
+ '$\\bf{Velocity}$',
845
+ '$\\bf{iVB}$',
846
+ '$\\bf{HB}$',
847
+ '$\\bf{Spin}$',
848
+ '$\\bf{VAA}$',
849
+ '$\\bf{HAA}$',
850
+ '$\\bf{vRel}$',
851
+ '$\\bf{hRel}$',
852
+
853
+ '$\\bf{Ext.}$',
854
+ '$\\bf{Axis}$',
855
+ '$\\bf{tjStuff+}$',
856
+ '$\\bf{Zone\%}$',
857
+ '$\\bf{Chase\%}$',
858
+ '$\\bf{Whiff\%}$',
859
+ ]
860
+
861
+ for i, col_name in enumerate(new_column_names):
862
+ table.get_celld()[(0, i)].get_text().set_text(col_name)
863
+
864
+ float_list = ['start_speed','ivb',
865
+ 'hb', 'vaa', 'haa', 'vertical_release','horizontal_release', 'extension']
866
+ for fl in float_list:
867
+ # Subset of column names
868
+ subset_columns = [fl]
869
+
870
+ # Get the list of column indices
871
+ column_indices = [plot_table.columns.get_loc(col) for col in subset_columns]
872
+
873
+ # # print(column_indices)
874
+ for row_l in range(1,len(plot_table)+1):
875
+ # print(row_l)
876
+ if table.get_celld()[(row_l,column_indices[0])].get_text().get_text() != '—':
877
+ # print()
878
+ # print(fl)
879
+ table.get_celld()[(row_l,column_indices[0])].get_text().set_text('{:,.1f}'.format(float(table.get_celld()[(row_l,column_indices[0])].get_text().get_text().strip('%'))))
880
+
881
+
882
+
883
+ percent_list = ['pitch_percent','zone_percent','chase_percent','whiff_rate']
884
+ for fl in percent_list:
885
+ # Subset of column names
886
+ subset_columns = [fl]
887
+
888
+ # Get the list of column indices
889
+ column_indices = [plot_table.columns.get_loc(col) for col in subset_columns]
890
+
891
+ # # print(column_indices)
892
+ for row_l in range(1,len(plot_table)+1):
893
+ # print(row_l)
894
+ if table.get_celld()[(row_l,column_indices[0])].get_text().get_text() != '—':
895
+
896
+ # print(fl)
897
+ table.get_celld()[(row_l,column_indices[0])].get_text().set_text('{:,.1%}'.format(float(table.get_celld()[(row_l,column_indices[0])].get_text().get_text().strip('%'))))
898
+
899
+
900
+ int_list = ['tj_stuff_plus','spin_rate']
901
+ for fl in int_list:
902
+ # Subset of column names
903
+ subset_columns = [fl]
904
+
905
+ # Get the list of column indices
906
+ column_indices = [plot_table.columns.get_loc(col) for col in subset_columns]
907
+
908
+ # # print(column_indices)
909
+ for row_l in range(1,len(plot_table)+1):
910
+ # print(row_l)
911
+ if table.get_celld()[(row_l,column_indices[0])].get_text().get_text() != '—':
912
+ # print(fl)
913
+
914
+ table.get_celld()[(row_l,column_indices[0])].get_text().set_text('{:,.0f}'.format(float(table.get_celld()[(row_l,column_indices[0])].get_text().get_text().strip('%'))))
915
+
916
+ return table
917
+
918
+ ### GROUED IVB CREATION ###
919
+ def group_ivb_update(df,
920
+ agg_list=['pitcher_id','pitcher_name','pitcher_hand','pitch_type','pitch_description']):
921
+
922
+ grouped_ivb = df.groupby(agg_list).agg(
923
+ pitches = ('start_speed','count'),
924
+
925
+ start_speed = ('start_speed','mean'),
926
+ ivb = ('ivb','mean'),
927
+ hb = ('hb','mean'),
928
+ spin_rate = ('spin_rate','mean'),
929
+ vaa = ('vaa','mean'),
930
+ haa = ('haa','mean'),
931
+ horizontal_release = ('x0','mean'),
932
+ vertical_release = ('z0','mean'),
933
+ extension = ('extension','mean'),
934
+ spin_direction = ('spin_direction','mean'),
935
+ tj_stuff_plus = ('tj_stuff_plus','mean'),
936
+ swings = ('swings','sum'),
937
+ in_zone = ('in_zone','sum'),
938
+ out_zone = ('out_zone','sum'),
939
+ whiffs = ('whiffs','sum'),
940
+ zone_swing = ('zone_swing','sum'),
941
+ zone_contact = ('zone_contact','sum'),
942
+ ozone_swing = ('ozone_swing','sum'),
943
+ ozone_contact = ('ozone_contact','sum'),
944
+ ).reset_index()
945
+
946
+
947
+ grouped_ivb['zone_contact_percent'] = [grouped_ivb.zone_contact[x]/grouped_ivb.zone_swing[x] if grouped_ivb.zone_swing[x] != 0 else np.nan for x in range(len(grouped_ivb))]
948
+
949
+ grouped_ivb['zone_swing_percent'] = [grouped_ivb.zone_swing[x]/grouped_ivb.in_zone[x] if grouped_ivb.pitches[x] != 0 else np.nan for x in range(len(grouped_ivb))]
950
+
951
+ grouped_ivb['zone_percent'] = [grouped_ivb.in_zone[x]/grouped_ivb.pitches[x] if grouped_ivb.pitches[x] != 0 else np.nan for x in range(len(grouped_ivb))]
952
+
953
+ grouped_ivb['chase_percent'] = [grouped_ivb.ozone_swing[x]/(grouped_ivb.pitches[x] - grouped_ivb.in_zone[x]) if (grouped_ivb.pitches[x]- grouped_ivb.in_zone[x]) != 0 else np.nan for x in range(len(grouped_ivb))]
954
+
955
+ grouped_ivb['chase_contact'] = [grouped_ivb.ozone_contact[x]/grouped_ivb.ozone_swing[x] if grouped_ivb.ozone_swing[x] != 0 else np.nan for x in range(len(grouped_ivb))]
956
+
957
+ grouped_ivb['swing_percent'] = [grouped_ivb.swings[x]/grouped_ivb.pitches[x] if grouped_ivb.pitches[x] != 0 else np.nan for x in range(len(grouped_ivb))]
958
+
959
+ grouped_ivb['whiff_rate'] = [grouped_ivb.whiffs[x]/grouped_ivb.swings[x] if grouped_ivb.swings[x] != 0 else np.nan for x in range(len(grouped_ivb))]
960
+
961
+ grouped_ivb['swstr_rate'] = [grouped_ivb.whiffs[x]/grouped_ivb.pitches[x] if grouped_ivb.pitches[x] != 0 else np.nan for x in range(len(grouped_ivb))]
962
+
963
+ return grouped_ivb
964
+
965
+
966
+ ####LHH
967
+ def location_plot(df,ax,hand):
968
+ label_labels = df.sort_values(by=['prop','pitch_type'],ascending=[False,True]).pitch_description.unique()
969
+ j = 0
970
+ for label in label_labels:
971
+
972
+ subset = df[(df['pitch_description'] == label)&(df['batter_hand'] == hand)]
973
+ print(label)
974
+ if len(subset) >= 5:
975
+ confidence_ellipse(subset['px'], subset['pz'], ax=ax,edgecolor = pitch_colours[label],n_std=1.5,facecolor= pitch_colours[label],alpha=0.3)
976
+ j=j+1
977
+ else:
978
+ j=j+1
979
+
980
+ pitch_location_group = df[(df['batter_hand'] == hand)].groupby(['pitch_description']).agg(
981
+ pitches = ('start_speed','count'),
982
+ px = ('px','mean'),
983
+ pz = ('pz','mean')).reset_index()
984
+
985
+ pitch_location_group['pitch_percent'] = pitch_location_group['pitches']/pitch_location_group['pitches'].sum()
986
+
987
+
988
+ ## Location Plot
989
+ sns.scatterplot(ax=ax,x=pitch_location_group['px'],
990
+ y=pitch_location_group['pz'],
991
+ hue=pitch_location_group['pitch_description'],
992
+ palette=pitch_colours,ec='black',
993
+ s=pitch_location_group['pitch_percent']*750,
994
+ linewidth=2,
995
+ zorder=2)
996
+
997
+ ax.axis('square')
998
+ draw_line(ax,alpha_spot=0.75,catcher_p=False)
999
+ ax.axis('off')
1000
+ ax.set_xlim((-2.75,2.75))
1001
+ ax.set_ylim((-0.5,5))
1002
+ if len(pitch_location_group['px'])>0:
1003
+ ax.get_legend().remove()
1004
+ ax.grid(False)
1005
+ ax.set_title(f"Pitch Locations vs {hand}HB\n{pitch_location_group['pitches'].sum()} Pitches",fontdict=font_properties_titles)
statcast_pitch_summary.csv ADDED
@@ -0,0 +1,18 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ pitch_description,pitches,start_speed,ivb,hb,spin_rate,vaa,haa,horizontal_release,vertical_release,extension,swings,in_zone,out_zone,whiffs,zone_swing,zone_contact,ozone_swing,ozone_contact,zone_contact_percent,zone_swing_percent,zone_percent,chase_percent,chase_contact,swing_percent,whiff_rate,swstr_rate,pitch_velocity_std
2
+ Changeup,78501,85.45244264,5.696969465,4.756409472,1788.814529,-7.397429532,0.066029801,-0.505147738,5.637483042,6.459312389,39508,30631,47870,12279,23324,18225,16184,9004,0.781383982,0.761450818,0.390198851,0.338082306,0.556351953,0.503280213,0.310797813,0.15641839,3.411101475
3
+ Curveball,51673,79.28082171,-9.353269599,-3.755365471,2529.177413,-9.685324545,1.128810646,-0.615065727,5.935777432,6.356118766,21550,22503,29170,6681,13243,11063,8307,3806,0.835384732,0.588499311,0.435488553,0.284778882,0.45816781,0.417045652,0.310023202,0.129293828,3.845559953
4
+ Cutter,55802,89.23017813,7.67180567,-1.473115659,2387.582408,-6.38378616,1.393012805,-0.885454511,5.73043569,6.364762822,27663,28608,27194,6695,19891,16452,7772,4516,0.827107737,0.695295022,0.512669797,0.285798338,0.581060216,0.49573492,0.242020027,0.119977779,3.290564827
5
+ Eephus,523,47.97782027,16.7292543,-0.099235182,1170.40153,-14.91983588,1.506952393,-1.3219283,7.275302765,4.554736692,227,184,339,7,135,131,92,89,0.97037037,0.733695652,0.351816444,0.271386431,0.967391304,0.434034417,0.030837004,0.013384321,6.869976111
6
+ Fastball,1140,67.09807018,15.87526316,5.559912281,1638.396309,-8.730726579,1.203451128,-1.423913573,6.371107598,4.816106427,573,543,597,41,405,379,168,153,0.935802469,0.745856354,0.476315789,0.281407035,0.910714286,0.502631579,0.071553229,0.035964912,9.112324944
7
+ Forkball,778,82.96773779,1.274164524,7.219151671,1079.151436,-8.98002479,1.264533775,-1.651045178,5.833003132,6.46649016,358,188,590,199,144,93,214,66,0.645833333,0.765957447,0.241645244,0.362711864,0.308411215,0.460154242,0.555865922,0.255784062,1.581383091
8
+ Four-Seam Fastball,230963,94.190831,15.70507094,3.1856687,2282.171475,-4.780780532,0.546832454,-0.72858478,5.693779333,6.512547603,111297,125822,105141,24710,85724,69571,25573,17016,0.811569689,0.681311694,0.54477124,0.243225763,0.665389278,0.481882379,0.222018563,0.106986833,2.47865324
9
+ Knuckle Ball,190,75.68631579,-1.973157895,3.434736842,336.9684211,-8.570861876,1.201227902,-1.463108102,5.626929407,6.28061513,81,93,97,20,63,48,18,13,0.761904762,0.677419355,0.489473684,0.18556701,0.722222222,0.426315789,0.24691358,0.105263158,4.960630412
10
+ Knuckle Curve,12153,81.43644368,-9.543701144,-5.97670534,2469.155167,-9.592522092,1.924129612,-1.025192611,5.916888283,6.36037149,5439,5095,7058,1800,3147,2683,2292,956,0.852557992,0.617664377,0.419238048,0.324737886,0.417102967,0.447543816,0.330943188,0.148111577,3.335305502
11
+ Screwball,74,80.18648649,-4.459459459,9.043243243,2094.554054,-8.648739106,-0.179827995,-1.153713198,6.063270539,6.211306526,34,33,41,5,24,23,10,6,0.958333333,0.727272727,0.445945946,0.243902439,0.6,0.459459459,0.147058824,0.067567568,1.450679985
12
+ Sinker,110889,93.31860599,7.843805066,6.938142647,2149.451219,-5.886868132,0.131331991,-0.761006834,5.525340998,6.420015363,50199,61433,49456,7185,37708,33570,12491,9444,0.890262013,0.613806912,0.554004455,0.252567939,0.756064366,0.452695939,0.143130341,0.064794524,2.892717921
13
+ Slider,126080,85.12149032,1.606287278,-3.060486199,2408.269843,-7.741990027,1.624081206,-0.917878953,5.66131706,6.416260991,60686,56660,69420,20416,38692,31069,21994,9201,0.802982529,0.682880339,0.449397208,0.316825122,0.418341366,0.481329315,0.336420262,0.161928934,3.327382065
14
+ Slow Curve,51,59.1745098,-9.101960784,-0.780392157,2058.285714,-14.41006285,1.056678233,-0.636672072,6.687193223,5.631413586,21,15,36,1,13,13,8,7,1,0.866666667,0.294117647,0.222222222,0.875,0.411764706,0.047619048,0.019607843,11.82586729
15
+ Slurve,2330,82.12420601,-3.000300429,-4.449828326,2523.909406,-8.360925975,1.378365433,-0.669493725,5.578432792,6.027864672,984,1068,1262,273,619,538,365,173,0.86914378,0.579588015,0.458369099,0.289223455,0.473972603,0.422317597,0.277439024,0.117167382,2.558741806
16
+ Splitter,15569,86.61465733,3.256830882,10.44272593,1355.748708,-7.739259105,0.553210998,-1.537501892,5.729877252,6.425062745,8129,5689,9880,2825,4542,3517,3587,1787,0.77432849,0.798382844,0.365405614,0.36305668,0.498187901,0.522127304,0.34752122,0.181450318,3.210384888
17
+ Sweeper,30959,81.87371039,1.235039891,-7.533357021,2573.086585,-7.702769263,2.197097666,-0.997430186,5.419746312,6.442381843,14382,13480,17479,4702,8883,7108,5499,2572,0.800180119,0.658976261,0.435414581,0.314606099,0.467721404,0.464549889,0.326936448,0.151878291,2.916945647
18
+ All,717675,88.99915324,7.084329815,1.49919922,2248.528397,-6.5395245,0.826992658,-0.783178307,5.67100296,6.439312301,341131,352045,365630,87839,236557,194483,104574,58809,0.822140118,0.671951029,0.490535409,0.286010448,0.562367319,0.475327969,0.257493456,0.122393841,6.109454214
team_logos.csv ADDED
@@ -0,0 +1,32 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ id,city,name,franchise,abbreviation,imageAbbreviation,imageLink
2
+ 108,Los Angeles Angels,Angels,Los Angeles Angels,LAA,LAA,https://a.espncdn.com/combiner/i?img=/i/teamlogos/mlb/500/scoreboard/LAA.png&h=400&w=400
3
+ 109,Arizona Diamondbacks,D-backs,Arizona Diamondbacks,AZ,ARI,https://a.espncdn.com/combiner/i?img=/i/teamlogos/mlb/500/scoreboard/ARI.png&h=400&w=400
4
+ 110,Baltimore Orioles,Orioles,Baltimore Orioles,BAL,BAL,https://a.espncdn.com/combiner/i?img=/i/teamlogos/mlb/500/scoreboard/BAL.png&h=400&w=400
5
+ 111,Boston Red Sox,Red Sox,Boston Red Sox,BOS,BOS,https://a.espncdn.com/combiner/i?img=/i/teamlogos/mlb/500/scoreboard/BOS.png&h=400&w=400
6
+ 112,Chicago Cubs,Cubs,Chicago Cubs,CHC,CHC,https://a.espncdn.com/combiner/i?img=/i/teamlogos/mlb/500/scoreboard/CHC.png&h=400&w=400
7
+ 113,Cincinnati Reds,Reds,Cincinnati Reds,CIN,CIN,https://a.espncdn.com/combiner/i?img=/i/teamlogos/mlb/500/scoreboard/CIN.png&h=400&w=400
8
+ 114,Cleveland Guardians,Guardians,Cleveland Guardians,CLE,CLE,https://a.espncdn.com/combiner/i?img=/i/teamlogos/mlb/500/scoreboard/CLE.png&h=400&w=400
9
+ 115,Colorado Rockies,Rockies,Colorado Rockies,COL,COL,https://a.espncdn.com/combiner/i?img=/i/teamlogos/mlb/500/scoreboard/COL.png&h=400&w=400
10
+ 116,Detroit Tigers,Tigers,Detroit Tigers,DET,DET,https://a.espncdn.com/combiner/i?img=/i/teamlogos/mlb/500/scoreboard/DET.png&h=400&w=400
11
+ 117,Houston Astros,Astros,Houston Astros,HOU,HOU,https://a.espncdn.com/combiner/i?img=/i/teamlogos/mlb/500/scoreboard/HOU.png&h=400&w=400
12
+ 118,Kansas City Royals,Royals,Kansas City Royals,KC,KC,https://a.espncdn.com/combiner/i?img=/i/teamlogos/mlb/500/scoreboard/KC.png&h=400&w=400
13
+ 119,Los Angeles Dodgers,Dodgers,Los Angeles Dodgers,LAD,LAD,https://a.espncdn.com/combiner/i?img=/i/teamlogos/mlb/500/scoreboard/LAD.png&h=400&w=400
14
+ 120,Washington Nationals,Nationals,Washington Nationals,WSH,WSH,https://a.espncdn.com/combiner/i?img=/i/teamlogos/mlb/500/scoreboard/WSH.png&h=400&w=400
15
+ 121,New York Mets,Mets,New York Mets,NYM,NYM,https://a.espncdn.com/combiner/i?img=/i/teamlogos/mlb/500/scoreboard/NYM.png&h=400&w=400
16
+ 133,Oakland Athletics,Athletics,Oakland Athletics,OAK,OAK,https://a.espncdn.com/combiner/i?img=/i/teamlogos/mlb/500/scoreboard/OAK.png&h=400&w=400
17
+ 134,Pittsburgh Pirates,Pirates,Pittsburgh Pirates,PIT,PIT,https://a.espncdn.com/combiner/i?img=/i/teamlogos/mlb/500/scoreboard/PIT.png&h=400&w=400
18
+ 135,San Diego Padres,Padres,San Diego Padres,SD,SD,https://a.espncdn.com/combiner/i?img=/i/teamlogos/mlb/500/scoreboard/SD.png&h=400&w=400
19
+ 136,Seattle Mariners,Mariners,Seattle Mariners,SEA,SEA,https://a.espncdn.com/combiner/i?img=/i/teamlogos/mlb/500/scoreboard/SEA.png&h=400&w=400
20
+ 137,San Francisco Giants,Giants,San Francisco Giants,SF,SF,https://a.espncdn.com/combiner/i?img=/i/teamlogos/mlb/500/scoreboard/SF.png&h=400&w=400
21
+ 138,St. Louis Cardinals,Cardinals,St. Louis Cardinals,STL,STL,https://a.espncdn.com/combiner/i?img=/i/teamlogos/mlb/500/scoreboard/STL.png&h=400&w=400
22
+ 139,Tampa Bay Rays,Rays,Tampa Bay Rays,TB,TB,https://a.espncdn.com/combiner/i?img=/i/teamlogos/mlb/500/scoreboard/TB.png&h=400&w=400
23
+ 140,Texas Rangers,Rangers,Texas Rangers,TEX,TEX,https://a.espncdn.com/combiner/i?img=/i/teamlogos/mlb/500/scoreboard/TEX.png&h=400&w=400
24
+ 141,Toronto Blue Jays,Blue Jays,Toronto Blue Jays,TOR,TOR,https://a.espncdn.com/combiner/i?img=/i/teamlogos/mlb/500/scoreboard/TOR.png&h=400&w=400
25
+ 142,Minnesota Twins,Twins,Minnesota Twins,MIN,MIN,https://a.espncdn.com/combiner/i?img=/i/teamlogos/mlb/500/scoreboard/MIN.png&h=400&w=400
26
+ 143,Philadelphia Phillies,Phillies,Philadelphia Phillies,PHI,PHI,https://a.espncdn.com/combiner/i?img=/i/teamlogos/mlb/500/scoreboard/PHI.png&h=400&w=400
27
+ 144,Atlanta Braves,Braves,Atlanta Braves,ATL,ATL,https://a.espncdn.com/combiner/i?img=/i/teamlogos/mlb/500/scoreboard/ATL.png&h=400&w=400
28
+ 145,Chicago White Sox,White Sox,Chicago White Sox,CWS,CHW,https://a.espncdn.com/combiner/i?img=/i/teamlogos/mlb/500/scoreboard/CHW.png&h=400&w=400
29
+ 146,Miami Marlins,Marlins,Miami Marlins,MIA,MIA,https://a.espncdn.com/combiner/i?img=/i/teamlogos/mlb/500/scoreboard/MIA.png&h=400&w=400
30
+ 147,New York Yankees,Yankees,New York Yankees,NYY,NYY,https://a.espncdn.com/combiner/i?img=/i/teamlogos/mlb/500/scoreboard/NYY.png&h=400&w=400
31
+ 158,Milwaukee Brewers,Brewers,Milwaukee Brewers,MIL,MIL,https://a.espncdn.com/combiner/i?img=/i/teamlogos/mlb/500/scoreboard/MIL.png&h=400&w=400
32
+ 11,MLB,MLB,Free Agent,FA,MLB,https://a.espncdn.com/combiner/i?img=/i/teamlogos/leagues/500/mlb.png?w=400&h=400&transparent=true