print('Running') import time import requests import pandas as pd import seaborn as sns import matplotlib.pyplot as plt from matplotlib.pyplot import figure from matplotlib.offsetbox import OffsetImage, AnnotationBbox from scipy import stats import matplotlib.lines as mlines import matplotlib.transforms as mtransforms import numpy as np import time #import plotly.express as px #!pip install chart_studio #import chart_studio.tools as tls from bs4 import BeautifulSoup import matplotlib.pyplot as plt import numpy as np import matplotlib.font_manager as font_manager from datetime import datetime import pytz from matplotlib.ticker import MaxNLocator from matplotlib.patches import Ellipse import matplotlib.transforms as transforms from matplotlib.gridspec import GridSpec datetime.now(pytz.timezone('US/Pacific')).strftime('%B %d, %Y') # Configure Notebook #%matplotlib inline plt.style.use('fivethirtyeight') sns.set_context("notebook") import warnings warnings.filterwarnings('ignore') # import yfpy # from yfpy.query import YahooFantasySportsQuery # import yahoo_oauth import json import urllib #import openpyxl from sklearn import preprocessing from datetime import timedelta #import dataframe_image as dfi # from google.colab import drive def percentile(n): def percentile_(x): return np.percentile(x, n) percentile_.__name__ = 'percentile_%s' % n return percentile_ import os import praw import matplotlib.pyplot as plt import matplotlib.colors import matplotlib.colors as mcolors cmap_sum = matplotlib.colors.LinearSegmentedColormap.from_list("", ["#4285f4","#FFFFFF","#F0E442"]) #import pybaseball import math import matplotlib.ticker as mtick import matplotlib.ticker as ticker colour_palette = ['#FFB000','#648FFF','#785EF0', '#DC267F','#FE6100','#3D1EB2','#894D80','#16AA02','#B5592B','#A3C1ED'] import matplotlib.colors as mcolors from matplotlib.ticker import FuncFormatter from matplotlib.font_manager import FontProperties import matplotlib.patheffects as path_effects def confidence_ellipse(x, y, ax, n_std=3.0, facecolor='none', **kwargs): """ Create a plot of the covariance confidence ellipse of *x* and *y*. Parameters ---------- x, y : array-like, shape (n, ) Input data. ax : matplotlib.axes.Axes The axes object to draw the ellipse into. n_std : float The number of standard deviations to determine the ellipse's radiuses. **kwargs Forwarded to `~matplotlib.patches.Ellipse` Returns ------- matplotlib.patches.Ellipse """ if x.size != y.size: raise ValueError("x and y must be the same size") cov = np.cov(x, y) pearson = cov[0, 1]/np.sqrt(cov[0, 0] * cov[1, 1]) # Using a special case to obtain the eigenvalues of this # two-dimensional dataset. ell_radius_x = np.sqrt(1 + pearson) ell_radius_y = np.sqrt(1 - pearson) ellipse = Ellipse((0, 0), width=ell_radius_x * 2, height=ell_radius_y * 2, facecolor=facecolor, **kwargs) # Calculating the standard deviation of x from # the squareroot of the variance and multiplying # with the given number of standard deviations. scale_x = np.sqrt(cov[0, 0]) * n_std mean_x = np.mean(x) # calculating the standard deviation of y ... scale_y = np.sqrt(cov[1, 1]) * n_std mean_y = np.mean(y) transf = transforms.Affine2D() \ .rotate_deg(45) \ .scale(scale_x, scale_y) \ .translate(mean_x, mean_y) ellipse.set_transform(transf + ax.transData) return ax.add_patch(ellipse) statcast_df = pd.read_csv('2023_statcast_pybaseball_data.csv',index_col=[0]) #player_df = pd.read_csv('player_df_all.csv',index_col=[0]) #player_df = pd.concat([player_df,pd.DataFrame({'player_id':668909,'team_id':114.0,'abbreviation':'CLE'},index=[2000])]) sport_id=1 teams = requests.get(url='https://statsapi.mlb.com/api/v1/teams/').json() #Select only teams that are at the MLB level # mlb_teams_city = [x['franchiseName'] for x in teams['teams'] if x['sport']['name'] == 'Major League Baseball'] # mlb_teams_name = [x['teamName'] for x in teams['teams'] if x['sport']['name'] == 'Major League Baseball'] # mlb_teams_franchise = [x['name'] for x in teams['teams'] if x['sport']['name'] == 'Major League Baseball'] # mlb_teams_id = [x['id'] for x in teams['teams'] if x['sport']['name'] == 'Major League Baseball'] # mlb_teams_abb = [x['abbreviation'] for x in teams['teams'] if x['sport']['name'] == 'Major League Baseball'] mlb_teams_city = [x['franchiseName'] if 'franchiseName' in x else None for x in teams['teams']] mlb_teams_name = [x['teamName'] if 'franchiseName' in x else None for x in teams['teams']] mlb_teams_franchise = [x['name'] if 'franchiseName' in x else None for x in teams['teams']] mlb_teams_id = [x['id'] if 'franchiseName' in x else None for x in teams['teams']] mlb_teams_abb = [x['abbreviation'] if 'franchiseName' in x else None for x in teams['teams']] mlb_teams_parent = [x['parentOrgName'] if 'parentOrgName' in x else None for x in teams['teams']] #Create a dataframe of all the teams mlb_teams_df = pd.DataFrame(data={'team_id':mlb_teams_id,'city':mlb_teams_franchise,'name':mlb_teams_name,'franchise':mlb_teams_franchise,'abbreviation':mlb_teams_abb,'parent_org':mlb_teams_parent}).drop_duplicates() ##Create a dataframe of all players in the database #Make an api call to get a dictionary of all players player_data = requests.get(url=f'https://statsapi.mlb.com/api/v1/sports/{sport_id}/players').json() #Select relevant data that will help distinguish players from one another fullName_list = [x['fullName'] for x in player_data['people']] id_list = [x['id'] for x in player_data['people']] position_list = [x['primaryPosition']['abbreviation'] for x in player_data['people']] team_list = [x['currentTeam']['id']for x in player_data['people']] player_list = pd.DataFrame(data={'player_id':id_list,'full_name':fullName_list,'position':position_list,'team_id':team_list}) player_list = player_list.drop_duplicates(subset=['player_id'],keep='last') player_df_all = player_list.merge(right=mlb_teams_df[['team_id','abbreviation']],left_on = 'team_id',right_on='team_id',how='left').drop_duplicates(keep='last') mlb_teams_df = mlb_teams_df.merge(right=mlb_teams_df[['abbreviation','franchise']],left_on='parent_org',right_on='franchise',how='left').drop_duplicates().reset_index(drop=True) mlb_teams_df = mlb_teams_df[mlb_teams_df.columns[:-1]] mlb_teams_df.columns = ['team_id', 'city', 'name', 'franchise', 'abbreviation', 'parent_org', 'parent_org_abb'] statcast_df = statcast_df.merge(right=player_df_all,left_on='batter',right_on='player_id',suffixes=['','_batter']) statcast_df = statcast_df.merge(right=player_df_all,left_on='pitcher',right_on='player_id',suffixes=['','_pitcher']) end_codes = ['single', 'strikeout', 'walk', 'field_out', 'grounded_into_double_play', 'fielders_choice', 'force_out', 'double', 'sac_fly', 'field_error', 'home_run', 'triple', 'hit_by_pitch', 'sac_bunt', 'double_play', 'intent_walk', 'fielders_choice_out', 'strikeout_double_play', 'sac_fly_double_play', 'catcher_interf', 'other_out','triple_play'] pa_df_full_na_codes = statcast_df[statcast_df.events.isin(end_codes)] pa_df_full_na_codes['pa'] = 1 statcast_df = statcast_df.merge(pa_df_full_na_codes[['pa','play_id']],left_on='play_id',right_on='play_id',how='left') test_df = statcast_df.sort_values(by='full_name_pitcher').drop_duplicates(subset='pitcher').reset_index(drop=True)[['pitcher','full_name_pitcher']]#['pitcher'].to_dict() test_df = test_df.set_index('pitcher') # #test_df = test_df[test_df.pitcher == 'Chris Bassitt'].append(test_df[test_df.pitcher != 'Chris Bassitt']) pitcher_dict = test_df['full_name_pitcher'].to_dict() statcast_df['game_opp'] = statcast_df['game_date'].astype(str) + ' vs ' + statcast_df['abbreviation'].astype(str) print(statcast_df['game_opp']) date_dict = pd.concat([pd.DataFrame(data={'game_pk':0,'game_opp':'Season'},index=[0]), statcast_df.drop_duplicates(subset=['pitcher','game_pk','game_opp'])[['game_pk','game_opp']]]).set_index('game_pk').to_dict() chadwick_df_small = pd.read_csv('chadwick_df.csv') statcast_df = statcast_df.merge(right=chadwick_df_small[['key_mlbam','key_fangraphs']],left_on = 'pitcher',right_on='key_mlbam',how='left') statcast_df['home_away'] = 'h' statcast_df.loc[statcast_df.abbreviation_pitcher == statcast_df.away_team,'home_away'] = 'a' print('home_away') print(statcast_df.home_away) # stuff_plus_season_df = pd.read_csv('stuff_df_melt.csv',index_col=[0]) # loc_plus_season_df = pd.read_csv('loc_df_melt.csv',index_col=[0]) # pitching_plus_season_df = pd.read_csv('pitching_df_melt.csv',index_col=[0]) # stuff_plus_full_df = pd.read_csv('stuff_plus_full.csv',index_col=[0]) # loc_plus_full_df = pd.read_csv('loc_plus_full.csv',index_col=[0]) # pitching_plus_full_df = pd.read_csv('pitching_plus_full.csv',index_col=[0]) types_in = ['hit_into_play', 'ball', 'swinging_strike', 'foul', 'blocked_ball', 'called_strike', 'foul_tip', 'swinging_strike_blocked', 'hit_by_pitch', 'foul_bunt', 'pitchout', 'missed_bunt', 'bunt_foul_tip'] whiffs_in = ['swinging_strike', 'foul_tip', 'swinging_strike_blocked','missed_bunt','bunt_foul_tip'] swing_in = ['foul_bunt','foul','hit_into_play','swinging_strike', 'foul_tip', 'swinging_strike_blocked','missed_bunt','bunt_foul_tip'] csw_in = ['swinging_strike', 'called_strike', 'foul_tip', 'swinging_strike_blocked','missed_bunt','bunt_foul_tip'] conditions_pitch = [ (statcast_df['description'].isin(types_in)), ] choices_pitch = [True] statcast_df['pitch'] = np.select(conditions_pitch, choices_pitch,default=np.nan) conditions_swings = [ (statcast_df['description'].isin(swing_in)), ] choices_swings = [True] statcast_df['swing'] = np.select(conditions_swings, choices_swings, default=np.nan) conditions_whiff = [ (statcast_df['description'].isin(whiffs_in)), ] choices_whiff = [True] statcast_df['whiff'] = np.select(conditions_whiff, choices_whiff, default=np.nan) conditions_csw = [ (statcast_df['description'].isin(csw_in)), ] choices_csw = [True] statcast_df['csw'] = np.select(conditions_csw, choices_csw, default=np.nan) # conditions_out = [ # (statcast_df['zone']>9), # ] # choices_out = [True] # statcast_df['outside'] = np.select(conditions_out, choices_out, default=np.nan) statcast_df['in_zone'] = statcast_df.zone < 10 statcast_df['out_zone'] = statcast_df.zone >= 10 conditions_chase = [ ((statcast_df['description'].isin(swing_in))&(statcast_df.out_zone)), ] choices_chase = [True] statcast_df['chase'] = np.select(conditions_chase, choices_chase, default=np.nan) statcast_df = statcast_df[statcast_df.pitch==1].reset_index(drop=True) statcast_df.loc[(statcast_df.swing==1)&(statcast_df.whiff!=1),'whiff'] = 0 statcast_df.loc[(statcast_df.pitch==1)&(statcast_df.csw!=1),'csw'] = 0 statcast_df['cs'] = 0 statcast_df.loc[(statcast_df.csw==1)&(statcast_df.whiff!=1),'cs'] = 1 bip_in = ['field_out', 'double', 'single', 'sac_fly', 'home_run', 'grounded_into_double_play', 'triple', 'force_out', 'field_error', 'double_play', 'fielders_choice_out', 'sac_bunt', 'fielders_choice', 'sac_fly_double_play', 'other_out'] strikeout_in = ['strikeout','strikeout_double_play'] walk_in = ['walk'] conditions_bip = [ (statcast_df['events'].isin(bip_in)), ] choices_bip = [True] statcast_df['bip'] = np.select(conditions_bip, choices_bip, default=np.nan) conditions_k = [ (statcast_df['events'].isin(strikeout_in)), ] choices_k = [True] statcast_df['k'] = np.select(conditions_k, choices_k, default=np.nan) conditions_bb = [ (statcast_df['events'].isin(walk_in)), ] choices_bb = [True] statcast_df['bb'] = np.select(conditions_bb, choices_bb, default=np.nan) statcast_df.game_date = pd.to_datetime(statcast_df.game_date).dt.date statcast_df_df_pitch = statcast_df[statcast_df['pitch']==1].groupby(['pitch_name']).agg( pitches = ('pitch','sum'), swings = ('swing','sum'), whiff = ('whiff','sum'), csw = ('csw','sum'), out_zone = ('out_zone','sum'), chase = ('chase','sum'), pitch_velocity = ('release_speed','mean'), spin_rate = ('release_spin_rate','mean'), exit_velocity = ('launch_speed','mean'), pitch_velocity_std = ('release_speed','std'), spin_rate_std = ('release_spin_rate','std'), exit_velocity_std = ('launch_speed','std'), pfx_x = ('pfx_x','mean'), pfx_z = ('pfx_z','mean'), extension = ('release_extension','mean'), release_x = ('release_pos_x','mean'), release_z = ('release_pos_z','mean'), zone_percent = ('in_zone','mean') , xwOBA = ('estimated_woba_using_speedangle','mean') #pitch_velocity = ('pitch_velocity','mean'), # pitch_velocity = ('launch_speed',percentile(95)), # launch_speed = ('launch_speed','mean'), # launch_angle = ('launch_angle','mean'), ).sort_values(by='pitches',ascending=False).reset_index() pitches_all_df = statcast_df[statcast_df['pitch']==1].groupby('pitch').agg( pitches = ('pitch','sum'), swings = ('swing','sum'), whiff = ('whiff','sum'), csw = ('csw','sum'), chase = ('chase','sum'), out_zone = ('out_zone','sum'), pitch_velocity = ('release_speed','mean'), spin_rate = ('release_spin_rate','mean'), exit_velocity = ('launch_speed','mean'), pitch_velocity_std = ('release_speed','std'), spin_rate_std = ('release_spin_rate','std'), exit_velocity_std = ('launch_speed','std'), pfx_x = ('pfx_x','mean'), pfx_z = ('pfx_z','mean'), extension = ('release_extension','mean'), release_x = ('release_pos_x','mean'), release_z = ('release_pos_z','mean'), zone_percent = ('in_zone','mean') , xwOBA = ('estimated_woba_using_speedangle','mean') #pitch_velocity = ('pitch_velocity','mean'), # pitch_velocity = ('launch_speed',percentile(95)), # launch_speed = ('launch_speed','mean'), # launch_angle = ('launch_angle','mean'), ).sort_values(by='pitches',ascending=False).reset_index() pitches_all_df['pitch_name'] = 'All' statcast_df_df_pitch = pd.concat([statcast_df_df_pitch,pitches_all_df]).reset_index(drop=True) statcast_df_df_pitch['whiff_rate'] = statcast_df_df_pitch['whiff']/statcast_df_df_pitch['swings'] statcast_df_df_pitch['csw_rate'] = statcast_df_df_pitch['csw']/statcast_df_df_pitch['pitches'] statcast_df_df_pitch['chase_percent'] = statcast_df_df_pitch['chase']/statcast_df_df_pitch['out_zone'] statcast_df_df_pitch['pitch_percent'] = statcast_df_df_pitch['pitches']/statcast_df_df_pitch['pitches'].sum() statcast_df = statcast_df.merge(statcast_df_df_pitch[['pitch_name','whiff_rate','csw_rate','xwOBA']],left_on='pitch_name',right_on='pitch_name') statcast_df = statcast_df.rename(columns={'whiff_rate':'whiff_rate_league','csw_rate':'csw_rate_league'}) statcast_df['whiff_rate_diff'] = statcast_df.whiff - statcast_df.whiff_rate_league statcast_df['csw_rate_diff'] = statcast_df.csw - statcast_df.csw_rate_league statcast_df['xwobacon_diff'] = statcast_df.estimated_woba_using_speedangle - statcast_df.xwOBA statcast_df['whiff_rate_diff_100'] = (statcast_df.whiff/statcast_df.whiff_rate_league)*100 statcast_df['csw_rate_diff_100'] = (statcast_df.csw/statcast_df.csw_rate_league)*100 statcast_df['xwobacon_diff_100'] = (statcast_df.estimated_woba_using_speedangle/statcast_df.xwOBA)*100 print('all df') print(statcast_df_df_pitch) pitch_colours = { '4-Seam Fastball':'#648FFF', 'Slider':'#785EF0', 'Sinker':'#49A71E', 'Changeup':'#FE6100', 'Cutter':'#FFB000', 'Curveball':'#D9E54B', 'Sweeper':'#904039', 'Split-Finger':'#79B3FC', 'Knuckle Curve':'#450C37', 'Slurve':'#BEABD8', 'Other':'#9C8975', 'Forkball':'#F98A6C', 'Eephus':'#5CD0D2', 'Screwball':'#D64012', 'Slow Curve':'#601CF9', 'Pitch Out':'#6F2F5C', 'Knuckleball':'#534B26'} home_away_dict = { 'a':'Away', 'h':'Home '} dict_plots = { 'pitch_heat':{'title':'Pitch Distribution','note':'Pitches'}, 'whiff_rate':{'stat':'whiff','decimal_format':'percent_1','title':'Whiff%','plus':'whiff_rate_diff_100','note':'Swings'}, 'csw_rate':{'stat':'csw','decimal_format':'percent_1','title':'CSW%','plus':'csw_rate_diff_100','note':'Pitches'}, 'xwOBA':{'stat':'estimated_woba_using_speedangle','decimal_format':'string_3','title':'xwOBACON','plus':'xwobacon_diff_100','title':'xwOBACON','note':'Balls In Play'} } dict_plots_name = { 'pitch_heat':'Pitch Locations', 'whiff_rate':'Whiff%', 'csw_rate':'CSW%', 'xwOBA':'xwOBACON', } #stand_list = ['L','R'] cbar_dict = { 'stat':[0,1], } # def decimal_format_assign(x): # if dict_plots[stat_pick]['decimal_format'] == 'percent_1': # return mtick.PercentFormatter(1,decimals=1) # if dict_plots[stat_pick]['decimal_format'] == 'string_3': # return mtick.FormatStrFormatter('%.3f') # if dict_plotsp[stat_pick]['decimal_format'] == 'string_0': # return mtick.FormatStrFormatter('%.0f') # if dict_plots[stat_pick]['decimal_format'] == 'string_1': # return mtick.FormatStrFormatter('%.1f') # headers = {'User-agent': 'your bot 0.1'} headers = {'User-agent': 'your bot 0.1'} fangraphs_table = 7 from shiny import ui, render, App import matplotlib.image as mpimg app_ui = ui.page_navbar( ui.nav( "Pitcher Summary", #ui.panel_title("Simulate a normal distribution"), ui.layout_sidebar( ui.panel_sidebar( ui.input_select("id", "Select Pitcher",pitcher_dict,width=1), ui.input_select("date_id", "Select Date",date_dict,width=1), ui.input_date_range("date_range_id", "Date range input (Set 'Select Date' to 'Season')",start = statcast_df.game_date.min(), end = statcast_df.game_date.max()), ui.input_radio_buttons("radio_id", "Handedness", {"a": "All", "R": "Right","L": "Left"}), ui.input_radio_buttons("home_id", "Setting", {"all": "All", "h": "Home","a": "Away"}), ui.input_radio_buttons("heat_id", "Heat Map Plot (On 2nd Tab)", dict_plots_name ),width=2 ), ui.panel_main( ui.navset_tab( ui.nav('Pitching Summary', ui.output_plot("plot",height = "1400px",width="1400px"),), ui.nav('Heat Maps', ui.output_plot("plot_heat",height = "1400px",width="1400px"),) ), ), ), )) from urllib.request import Request, urlopen from shiny import App, reactive, ui from shiny.ui import h2, tags # importing OpenCV(cv2) module #print(app_ui) def server(input, output, session): @reactive.Effect def _(): print('this guy') print(type(list(statcast_df.pitcher.unique())[0])) print(input.id()) print(statcast_df.pitcher) opts_dict = pd.concat([pd.DataFrame(data={'game_pk':0,'game_opp':'Season'},index=[0]), statcast_df[statcast_df.pitcher == int(input.id())].drop_duplicates(subset=['pitcher','game_pk','game_opp'])[['game_pk','game_opp']].sort_values( by='game_opp')]).set_index('game_pk')['game_opp'].astype(str).to_dict() ui.update_select( "date_id", label="Select Date", choices=opts_dict, ) #@output # @render.text # def txt(): # return f'pitcher_id: "{input.pitcher_id()}"' @output @render.plot(alt="A histogram") def plot(): input_id = input.id() input_date_range_id = input.date_range_id() input_date_id = input.date_id() eury_df = statcast_df[statcast_df.pitcher.astype(int) == int(input_id)].sort_values(by=['game_date','play_id']) #print(input.id()) print(input_date_range_id == '0') print(len(eury_df)) print(str(input_date_id[0])) if input_date_id == '0': if input_date_range_id[0] == statcast_df.game_date.min() and input_date_range_id[1] == statcast_df.game_date.max(): data_df = eury_df.copy() data_df = data_df.reset_index(drop=True) data_df = data_df.dropna(subset=['pitch_name']) else: data_df = eury_df[(eury_df.game_date >= input_date_range_id[0]) & (eury_df.game_date <= input_date_range_id[1])].reset_index(drop=True) data_df = data_df.reset_index(drop=True) data_df = data_df.dropna(subset=['pitch_name']) else: data_df = eury_df[eury_df.game_pk == int(input_date_id)].reset_index(drop=True) data_df = data_df.dropna(subset=['pitch_name']) if len(data_df) < 1: fig, ax = plt.subplots(1, 1, figsize=(16, 16)) ax.text(x=0.5,y=0.5,s='Plot Is Generating',fontsize=32,ha='center') ###return # if input.radio_id() != 'a': data_df = data_df[data_df.stand == input.radio_id()] if input.home_id() != 'all': data_df = data_df[data_df.home_away == input.home_id()] #data_df = data_df.reset_index(drop=True) print('NOWWWW') print(type(eury_df.game_pk.reset_index(drop=True)[0])) print(type(input_date_id)) #time.sleep(5) if input_date_id != '0': time.sleep(5) # stuff_plus_full_df_cut = stuff_plus_full_df[pd.to_datetime(stuff_plus_full_df.date).dt.date == pd.to_datetime(data_df.game_date)[0]] # loc_plus_full_df_cut = loc_plus_full_df[pd.to_datetime(loc_plus_full_df.date).dt.date == pd.to_datetime(data_df.game_date)[0]] # pitching_plus_full_df_cut = pitching_plus_full_df[pd.to_datetime(pitching_plus_full_df.date).dt.date == pd.to_datetime(data_df.game_date)[0]] print('Game Log') try: url = f'https://www.fangraphs.com/leaders-legacy.aspx?pos=all&stats=pit&lg=all&type=36&season=2023&month=1000&season1=2023&ind=0&startdate={str(data_df.game_date[0])}&enddate={str(data_df.game_date[0])}&team=0&qual=0&pagenum=1&pageitems=5000' data = requests.get(url, headers = {'User-agent': 'your bot 0.1'}).text soup = BeautifulSoup(data, 'html.parser') stuff_df = pd.read_html(data)[fangraphs_table]#.droplevel(1) stuff_df.columns = stuff_df.columns.droplevel(0) stuff_df = stuff_df.iloc[:-1] stuff_df = stuff_df[stuff_df.columns[1:]] stuff_df.columns = [x.replace('Stf+ ','') for x in stuff_df.columns] stuff_df = stuff_df.rename(columns = {'FA':'FF'}) stuff_df['ST'] = stuff_df.SL stuff_df['SV'] = stuff_df.CU # except (KeyError, requests.exceptions.HTTPError) as e: # fig, ax = plt.subplots(1, 1, figsize=(16, 16)) # ax.text(x=0.5,y=0.5,s='Plot Is Generating',fontsize=32,ha='center') # return tables = soup.find_all('table') # Looking for the table with the classes 'wikitable' and 'sortable' table = soup.find('table', class_='rgMasterTable') import re stuff_df['fg_id'] = [re.findall(r'\d+', x[:10])[0] for x in str(table).split('playerid=')[1:]] stuff_plus_full_df_cut = stuff_df.melt(id_vars=['fg_id','Name','Team','IP']).dropna().sort_values(by='IP',ascending=False).reset_index(drop=True) stuff_plus_full_df_cut.fg_id = stuff_plus_full_df_cut.fg_id.astype(int) except (KeyError, requests.exceptions.HTTPError,ValueError,requests.exceptions.RequestException,urllib.error.HTTPError) as e: stuff_plus_full_df_cut = pd.DataFrame() # except: # print('lol') time.sleep(5) try: url = f'https://www.fangraphs.com/leaders-legacy.aspx?pos=all&stats=pit&lg=all&type=37&season=2023&month=1000&season1=2023&ind=0&startdate={str(data_df.game_date[0])}&enddate={str(data_df.game_date[0])}&team=0&qual=0&pagenum=1&pageitems=5000' data = requests.get(url,headers=headers).text soup = BeautifulSoup(data, 'html.parser') loc_df = pd.read_html(data)[fangraphs_table]#.droplevel(1) loc_df.columns = loc_df.columns.droplevel(0) loc_df = loc_df.iloc[:-1] loc_df = loc_df[loc_df.columns[1:]] loc_df.columns = [x.replace('Loc+ ','') for x in loc_df.columns] loc_df = loc_df.rename(columns = {'FA':'FF'}) loc_df['ST'] = loc_df.SL loc_df['SV'] = loc_df.CU tables = soup.find_all('table') # Looking for the table with the classes 'wikitable' and 'sortable' table = soup.find('table', class_='rgMasterTable') import re loc_df['fg_id'] = [re.findall(r'\d+', x[:10])[0] for x in str(table).split('playerid=')[1:]] loc_plus_full_df_cut = loc_df.melt(id_vars=['fg_id','Name','Team','IP']).dropna().sort_values(by='IP',ascending=False).reset_index(drop=True) loc_plus_full_df_cut.fg_id = loc_plus_full_df_cut.fg_id.astype(int) except (KeyError, requests.exceptions.HTTPError,ValueError,requests.exceptions.RequestException,urllib.error.HTTPError) as e: loc_plus_full_df_cut = pd.DataFrame() time.sleep(5) try: url = f'https://www.fangraphs.com/leaders-legacy.aspx?pos=all&stats=pit&lg=all&type=38&season=2023&month=1000&season1=2023&ind=0&startdate={str(data_df.game_date[0])}&enddate={str(data_df.game_date[0])}&team=0&qual=0&pagenum=1&pageitems=5000' data = requests.get(url,headers=headers).text soup = BeautifulSoup(data, 'html.parser') pitching_df = pd.read_html(data)[fangraphs_table]#.droplevel(1) pitching_df.columns = pitching_df.columns.droplevel(0) pitching_df = pitching_df.iloc[:-1] pitching_df = pitching_df[pitching_df.columns[1:]] pitching_df.columns = [x.replace('Pit+ ','') for x in pitching_df.columns] pitching_df = pitching_df.rename(columns = {'FA':'FF'}) pitching_df['ST'] = pitching_df.SL pitching_df['SV'] = pitching_df.CU tables = soup.find_all('table') # Looking for the table with the classes 'wikitable' and 'sortable' table = soup.find('table', class_='rgMasterTable') import re pitching_df['fg_id'] = [re.findall(r'\d+', x[:10])[0] for x in str(table).split('playerid=')[1:]] pitching_plus_full_df_cut = pitching_df.melt(id_vars=['fg_id','Name','Team','IP']).dropna().sort_values(by='IP',ascending=False).reset_index(drop=True) pitching_plus_full_df_cut.fg_id = pitching_plus_full_df_cut.fg_id.astype(int) except (KeyError, requests.exceptions.HTTPError,ValueError,requests.exceptions.RequestException,urllib.error.HTTPError) as e: pitching_plus_full_df_cut = pd.DataFrame() else: if input_date_range_id[0] <= statcast_df.game_date.min() and input_date_range_id[1] >= statcast_df.game_date.max(): #time.sleep(5) # stuff_plus_full_df_cut = stuff_plus_season_df.copy() # loc_plus_full_df_cut = loc_plus_season_df.copy() # pitching_plus_full_df_cut = pitching_plus_season_df.copy() print('Running') try: url = f'https://www.fangraphs.com/leaders-legacy.aspx?pos=all&stats=pit&lg=all&qual=0&type=36&season=2023&month=0&season1=2023&ind=0&team=0&rost=0&age=0&filter=&players=0&startdate=2023-03-30&enddate=2023-12-31&page=1_5000' data = requests.get(url,headers=headers).text soup = BeautifulSoup(data, 'html.parser') stuff_df = pd.read_html(data)[fangraphs_table]#.droplevel(1) stuff_df.columns = stuff_df.columns.droplevel(0) stuff_df = stuff_df.iloc[:-1] stuff_df = stuff_df[stuff_df.columns[1:]] stuff_df.columns = [x.replace('Stf+ ','') for x in stuff_df.columns] stuff_df = stuff_df.rename(columns = {'FA':'FF'}) stuff_df['ST'] = stuff_df.SL stuff_df['SV'] = stuff_df.CU tables = soup.find_all('table') # Looking for the table with the classes 'wikitable' and 'sortable' table = soup.find('table', class_='rgMasterTable') import re stuff_df['fg_id'] = [re.findall(r'\d+', x[:10])[0] for x in str(table).split('playerid=')[1:]] stuff_plus_full_df_cut = stuff_df.melt(id_vars=['fg_id','Name','Team','IP']).dropna().sort_values(by='IP',ascending=False).reset_index(drop=True) stuff_plus_full_df_cut.fg_id = stuff_plus_full_df_cut.fg_id.astype(int) except (KeyError, requests.exceptions.HTTPError,ValueError,requests.exceptions.RequestException,urllib.error.HTTPError) as e: stuff_plus_full_df_cut = pd.DataFrame() try: #time.sleep(5) url = f'https://www.fangraphs.com/leaders-legacy.aspx?pos=all&stats=pit&lg=all&qual=0&type=37&season=2023&month=0&season1=2023&ind=0&team=0&rost=0&age=0&filter=&players=0&startdate=2023-03-30&enddate=2023-12-31&page=1_5000' data = requests.get(url,headers=headers).text soup = BeautifulSoup(data, 'html.parser') loc_df = pd.read_html(data)[fangraphs_table]#.droplevel(1) loc_df.columns = loc_df.columns.droplevel(0) loc_df = loc_df.iloc[:-1] loc_df = loc_df[loc_df.columns[1:]] loc_df.columns = [x.replace('Loc+ ','') for x in loc_df.columns] loc_df = loc_df.rename(columns = {'FA':'FF'}) loc_df['ST'] = loc_df.SL loc_df['SV'] = loc_df.CU tables = soup.find_all('table') # Looking for the table with the classes 'wikitable' and 'sortable' table = soup.find('table', class_='rgMasterTable') import re loc_df['fg_id'] = [re.findall(r'\d+', x[:10])[0] for x in str(table).split('playerid=')[1:]] loc_plus_full_df_cut = loc_df.melt(id_vars=['fg_id','Name','Team','IP']).dropna().sort_values(by='IP',ascending=False).reset_index(drop=True) loc_plus_full_df_cut.fg_id = loc_plus_full_df_cut.fg_id.astype(int) except (KeyError, requests.exceptions.HTTPError,ValueError,requests.exceptions.RequestException,urllib.error.HTTPError) as e: loc_plus_full_df_cut = pd.DataFrame() try: #time.sleep(5) url = f'https://www.fangraphs.com/leaders-legacy.aspx?pos=all&stats=pit&lg=all&qual=0&type=38&season=2023&month=0&season1=2023&ind=0&team=0&rost=0&age=0&filter=&players=0&startdate=2023-03-30&enddate=2023-12-31&page=1_5000' data = requests.get(url,headers=headers).text soup = BeautifulSoup(data, 'html.parser') pitching_df = pd.read_html(data)[fangraphs_table]#.droplevel(1) pitching_df.columns = pitching_df.columns.droplevel(0) pitching_df = pitching_df.iloc[:-1] pitching_df = pitching_df[pitching_df.columns[1:]] pitching_df.columns = [x.replace('Pit+ ','') for x in pitching_df.columns] pitching_df = pitching_df.rename(columns = {'FA':'FF'}) pitching_df['ST'] = pitching_df.SL pitching_df['SV'] = pitching_df.CU tables = soup.find_all('table') # Looking for the table with the classes 'wikitable' and 'sortable' table = soup.find('table', class_='rgMasterTable') import re pitching_df['fg_id'] = [re.findall(r'\d+', x[:10])[0] for x in str(table).split('playerid=')[1:]] pitching_plus_full_df_cut = pitching_df.melt(id_vars=['fg_id','Name','Team','IP']).dropna().sort_values(by='IP',ascending=False).reset_index(drop=True) pitching_plus_full_df_cut.fg_id = pitching_plus_full_df_cut.fg_id.astype(int) except (KeyError, requests.exceptions.HTTPError,ValueError,requests.exceptions.RequestException,urllib.error.HTTPError) as e: pitching_plus_full_df_cut = pd.DataFrame() else: print('Running') try: #time.sleep(5) url = f'https://www.fangraphs.com/leaders-legacy.aspx?pos=all&stats=pit&lg=all&type=36&season=2023&month=1000&season1=2023&ind=0&startdate={str(input.date_range_id()[0])}&enddate={str(input.date_range_id()[1])}&team=0&qual=0&pagenum=1&pageitems=5000&page=1_5000' print(url) data = requests.get(url,headers=headers).text stuff_df = pd.read_html(data)[fangraphs_table]#.droplevel(1) stuff_df.columns = stuff_df.columns.droplevel(0) stuff_df = stuff_df.iloc[:-1] stuff_df = stuff_df[stuff_df.columns[1:]] stuff_df.columns = [x.replace('Stf+ ','') for x in stuff_df.columns] stuff_df = stuff_df.rename(columns = {'FA':'FF'}) stuff_df['ST'] = stuff_df.SL stuff_df['SV'] = stuff_df.CU stuff_df = stuff_df[stuff_df.Name!='No records to display.'] soup = BeautifulSoup(data, 'html.parser') tables = soup.find_all('table') # Looking for the table with the classes 'wikitable' and 'sortable' table = soup.find('table', class_='rgMasterTable') import re stuff_df['fg_id'] = [re.findall(r'\d+', x[:10])[0] for x in str(table).split('playerid=')[1:]] stuff_plus_full_df_cut = stuff_df.melt(id_vars=['fg_id','Name','Team','IP']).dropna().sort_values(by='IP',ascending=False).reset_index(drop=True) stuff_plus_full_df_cut.fg_id = stuff_plus_full_df_cut.fg_id.astype(int) except (KeyError, requests.exceptions.HTTPError,ValueError,requests.exceptions.RequestException,urllib.error.HTTPError) as e: stuff_plus_full_df_cut = pd.DataFrame() try: #time.sleep(5) url = f'https://www.fangraphs.com/leaders-legacy.aspx?pos=all&stats=pit&lg=all&type=37&season=2023&month=1000&season1=2023&ind=0&startdate={str(input.date_range_id()[0])}&enddate={str(input.date_range_id()[1])}&team=0&qual=0&pagenum=1&pageitems=5000&page=1_5000' data = requests.get(url,headers=headers).text soup = BeautifulSoup(data, 'html.parser') loc_df = pd.read_html(data)[fangraphs_table]#.droplevel(1) loc_df.columns = loc_df.columns.droplevel(0) loc_df = loc_df.iloc[:-1] loc_df = loc_df[loc_df.columns[1:]] loc_df.columns = [x.replace('Loc+ ','') for x in loc_df.columns] loc_df = loc_df.rename(columns = {'FA':'FF'}) loc_df['ST'] = loc_df.SL loc_df['SV'] = loc_df.CU loc_df = loc_df[loc_df.Name!='No records to display.'] # url = f'https://www.fangraphs.com/leaders-legacy.aspx?pos=all&stats=pit&lg=all&type=37&season=2023&month=1000&season1=2023&ind=0&startdate={str(input.date_range_id()[0])}&enddate={str(input.date_range_id()[1])}&team=0&qual=0&pagenum=1&pageitems=5000&page=1_5000' # data = requests.get(url).text # soup = BeautifulSoup(data, 'html.parser') tables = soup.find_all('table') # Looking for the table with the classes 'wikitable' and 'sortable' table = soup.find('table', class_='rgMasterTable') import re loc_df['fg_id'] = [re.findall(r'\d+', x[:10])[0] for x in str(table).split('playerid=')[1:]] loc_plus_full_df_cut = loc_df.melt(id_vars=['fg_id','Name','Team','IP']).dropna().sort_values(by='IP',ascending=False).reset_index(drop=True) loc_plus_full_df_cut.fg_id = loc_plus_full_df_cut.fg_id.astype(int) except (KeyError, requests.exceptions.HTTPError,ValueError,requests.exceptions.RequestException,urllib.error.HTTPError) as e: loc_plus_full_df_cut = pd.DataFrame() try: #time.sleep(5) url = f'https://www.fangraphs.com/leaders-legacy.aspx?pos=all&stats=pit&lg=all&type=38&season=2023&month=1000&season1=2023&ind=0&startdate={str(input.date_range_id()[0])}&enddate={str(input.date_range_id()[1])}&team=0&qual=0&pagenum=1&pageitems=5000&page=1_5000' data = requests.get(url,headers=headers).text soup = BeautifulSoup(data, 'html.parser') pitching_df = pd.read_html(data)[fangraphs_table]#.droplevel(1) pitching_df.columns = pitching_df.columns.droplevel(0) pitching_df = pitching_df.iloc[:-1] pitching_df = pitching_df[pitching_df.columns[1:]] pitching_df.columns = [x.replace('Pit+ ','') for x in pitching_df.columns] pitching_df = pitching_df.rename(columns = {'FA':'FF'}) pitching_df['ST'] = pitching_df.SL pitching_df['SV'] = pitching_df.CU pitching_df = pitching_df[pitching_df.Name!='No records to display.'] tables = soup.find_all('table') # Looking for the table with the classes 'wikitable' and 'sortable' table = soup.find('table', class_='rgMasterTable') import re pitching_df['fg_id'] = [re.findall(r'\d+', x[:10])[0] for x in str(table).split('playerid=')[1:]] pitching_plus_full_df_cut = pitching_df.melt(id_vars=['fg_id','Name','Team','IP']).dropna().sort_values(by='IP',ascending=False).reset_index(drop=True) pitching_plus_full_df_cut.fg_id = pitching_plus_full_df_cut.fg_id.astype(int) except (KeyError, requests.exceptions.HTTPError,ValueError,requests.exceptions.RequestException,urllib.error.HTTPError) as e: pitching_plus_full_df_cut = pd.DataFrame() if len(stuff_plus_full_df_cut) < 1: stuff_plus_full_df_cut = pd.DataFrame(columns=['fg_id', 'Name', 'Team', 'IP', 'variable', 'value']) if len(loc_plus_full_df_cut) < 1: loc_plus_full_df_cut = pd.DataFrame(columns=['fg_id', 'Name', 'Team', 'IP', 'variable', 'value']) if len(pitching_plus_full_df_cut) < 1: pitching_plus_full_df_cut = pd.DataFrame(columns=['fg_id', 'Name', 'Team', 'IP', 'variable', 'value']) print(stuff_plus_full_df_cut) data_df = data_df.merge(right=stuff_plus_full_df_cut,left_on=['key_fangraphs','pitch_type'],right_on=['fg_id','variable'],how='left') data_df = data_df.merge(right=loc_plus_full_df_cut,left_on=['key_fangraphs','pitch_type'],right_on=['fg_id','variable'],how='left',suffixes=['','_loc']) data_df = data_df.merge(right=pitching_plus_full_df_cut,left_on=['key_fangraphs','pitch_type'],right_on=['fg_id','variable'],how='left',suffixes=['','_pitching']) data_df['value'] = data_df['value'].astype(float) data_df['value_loc'] = data_df['value_loc'].astype(float) data_df['value_pitching'] = data_df['value_pitching'].astype(float) data_df['prop'] = data_df.groupby("pitch_name")["pitch"].transform("sum") data_df = data_df.sort_values(by=['prop','value','pitch_name'],ascending=[False,False,True]) if input_date_id == '0': if input_date_range_id[0] == statcast_df.game_date.min() and input_date_range_id[1] == statcast_df.game_date.max(): season_sum = requests.get(url=f"https://statsapi.mlb.com/api/v1/people?personIds={str((int(data_df.pitcher.reset_index(drop=True)[0])))}&appContext=majorLeague&" "hydrate=currentTeam,awards," "stats(group=[pitching],type=[yearByYear])").json() print(f"https://statsapi.mlb.com/api/v1/people?personIds={str((int(data_df.pitcher.reset_index(drop=True)[0])))}&appContext=majorLeague&" "hydrate=currentTeam,awards," "stats(group=[pitching],type=[yearByYear])") p_ip = season_sum['people'][0]['stats'][0]['splits'][-1]['stat']['inningsPitched'] p_hits = season_sum['people'][0]['stats'][0]['splits'][-1]['stat']['whip'] p_er = season_sum['people'][0]['stats'][0]['splits'][-1]['stat']['era'] p_pa = season_sum['people'][0]['stats'][0]['splits'][-1]['stat']['battersFaced'] p_k = season_sum['people'][0]['stats'][0]['splits'][-1]['stat']['strikeOuts'] p_bb = season_sum['people'][0]['stats'][0]['splits'][-1]['stat']['baseOnBalls'] summary_df_pitch = pd.DataFrame(data={'ip':p_ip,'hits':p_hits,'er':p_er,'k':p_k,'bb':p_bb,'pitches':p_pa}, index=[0]) summary_df_pitch['k'] = summary_df_pitch['k']/summary_df_pitch['pitches'] summary_df_pitch['bb'] = summary_df_pitch['bb']/summary_df_pitch['pitches'] else: start_date = pd.to_datetime(input_date_range_id[0]).strftime('%m/%d/%Y') end_date = pd.to_datetime(input_date_range_id[1]).strftime('%m/%d/%Y') print(str((int(data_df.pitcher.reset_index(drop=True)[0]))),start_date,end_date) season_sum = requests.get(f"https://statsapi.mlb.com/api/v1/people?personIds={str((int(data_df.pitcher.reset_index(drop=True)[0])))}&hydrate=stats(group=[pitching],type=[byDateRange],startDate={start_date},endDate={end_date},season=2023)").json() print(f"https://statsapi.mlb.com/api/v1/people?personIds={str((int(data_df.pitcher.reset_index(drop=True)[0])))}&hydrate=stats(group=[pitching],type=[byDateRange],startDate={start_date},endDate={end_date},season=2023)") #season_sum = requests.get(f'https://statsapi.mlb.com/api/v1/people?personIds={str((int(data_df.pitcher.reset_index(drop=True)[0])))}&hydrate=stats(group=[hitting],type=[byDateRange],startDate={start_date},endDate={end_date},season=2023)').json() print(season_sum) #test_json['people'][0]['stats'][0]['splits'][0]['stat'] p_ip = season_sum['people'][0]['stats'][0]['splits'][-1]['stat']['inningsPitched'] p_hits = season_sum['people'][0]['stats'][0]['splits'][-1]['stat']['whip'] p_er = season_sum['people'][0]['stats'][0]['splits'][-1]['stat']['era'] p_pa = season_sum['people'][0]['stats'][0]['splits'][-1]['stat']['battersFaced'] p_k = season_sum['people'][0]['stats'][0]['splits'][-1]['stat']['strikeOuts'] p_bb = season_sum['people'][0]['stats'][0]['splits'][-1]['stat']['baseOnBalls'] summary_df_pitch = pd.DataFrame(data={'ip':p_ip,'hits':p_hits,'er':p_er,'k':p_k,'bb':p_bb,'pitches':p_pa}, index=[0]) summary_df_pitch['k'] = summary_df_pitch['k']/summary_df_pitch['pitches'] summary_df_pitch['bb'] = summary_df_pitch['bb']/summary_df_pitch['pitches'] else: game_sum = requests.get(url='https://statsapi.mlb.com/api/v1.1/game/'+str((int(input_date_id)))+'/feed/live').json() if int(data_df.pitcher.unique()[0]) in game_sum['liveData']['boxscore']['teams']['away']['pitchers']: p_ip = game_sum['liveData']['boxscore']['teams']['away']['players']['ID'+str(int(data_df.pitcher.unique()[0]))]['stats']['pitching']['inningsPitched'] p_hits = game_sum['liveData']['boxscore']['teams']['away']['players']['ID'+str(int(data_df.pitcher.unique()[0]))]['stats']['pitching']['hits'] p_er = game_sum['liveData']['boxscore']['teams']['away']['players']['ID'+str(int(data_df.pitcher.unique()[0]))]['stats']['pitching']['earnedRuns'] p_k = game_sum['liveData']['boxscore']['teams']['away']['players']['ID'+str(int(data_df.pitcher.unique()[0]))]['stats']['pitching']['strikeOuts'] p_bb = game_sum['liveData']['boxscore']['teams']['away']['players']['ID'+str(int(data_df.pitcher.unique()[0]))]['stats']['pitching']['baseOnBalls'] if int(data_df.pitcher.unique()[0]) in game_sum['liveData']['boxscore']['teams']['home']['pitchers']: p_ip = game_sum['liveData']['boxscore']['teams']['home']['players']['ID'+str(int(data_df.pitcher.unique()[0]))]['stats']['pitching']['inningsPitched'] p_hits = game_sum['liveData']['boxscore']['teams']['home']['players']['ID'+str(int(data_df.pitcher.unique()[0]))]['stats']['pitching']['hits'] p_er = game_sum['liveData']['boxscore']['teams']['home']['players']['ID'+str(int(data_df.pitcher.unique()[0]))]['stats']['pitching']['earnedRuns'] p_k = game_sum['liveData']['boxscore']['teams']['home']['players']['ID'+str(int(data_df.pitcher.unique()[0]))]['stats']['pitching']['strikeOuts'] p_bb = game_sum['liveData']['boxscore']['teams']['home']['players']['ID'+str(int(data_df.pitcher.unique()[0]))]['stats']['pitching']['baseOnBalls'] summary_df_pitch = pd.DataFrame(data={'ip':p_ip,'hits':p_hits,'er':p_er,'k':p_k,'bb':p_bb}, index=[0]) types_in = ['hit_into_play', 'ball', 'swinging_strike', 'foul', 'blocked_ball', 'called_strike', 'foul_tip', 'swinging_strike_blocked', 'hit_by_pitch', 'foul_bunt', 'pitchout', 'missed_bunt', 'bunt_foul_tip'] whiffs_in = ['swinging_strike', 'foul_tip', 'swinging_strike_blocked','missed_bunt','bunt_foul_tip'] swing_in = ['foul_bunt','foul','hit_into_play','swinging_strike', 'foul_tip', 'swinging_strike_blocked','missed_bunt','bunt_foul_tip'] csw_in = ['swinging_strike', 'called_strike', 'foul_tip', 'swinging_strike_blocked','missed_bunt','bunt_foul_tip'] conditions_pitch = [ (data_df['description'].isin(types_in)), ] choices_pitch = [True] data_df['pitch'] = np.select(conditions_pitch, choices_pitch, default=np.nan) conditions_swings = [ (data_df['description'].isin(swing_in)), ] choices_swings = [True] data_df['swing'] = np.select(conditions_swings, choices_swings, default=np.nan) conditions_whiff = [ (data_df['description'].isin(whiffs_in)), ] choices_whiff = [True] data_df['whiff'] = np.select(conditions_whiff, choices_whiff, default=np.nan) conditions_csw = [ (data_df['description'].isin(csw_in)), ] choices_csw = [True] data_df['csw'] = np.select(conditions_csw, choices_csw, default=np.nan) bip_in = ['field_out', 'double', 'single', 'sac_fly', 'home_run', 'grounded_into_double_play', 'triple', 'force_out', 'field_error', 'double_play', 'fielders_choice_out', 'sac_bunt', 'fielders_choice', 'sac_fly_double_play', 'other_out'] strikeout_in = ['strikeout','strikeout_double_play'] walk_in = ['walk'] conditions_bip = [ (data_df['events'].isin(bip_in)), ] choices_bip = [True] data_df['bip'] = np.select(conditions_bip, choices_bip, default=np.nan) conditions_k = [ (data_df['events'].isin(strikeout_in)), ] choices_k = [True] data_df['k'] = np.select(conditions_k, choices_k, default=np.nan) conditions_bb = [ (data_df['events'].isin(walk_in)), ] choices_bb = [True] data_df['bb'] = np.select(conditions_bb, choices_bb, default=np.nan) data_df.game_date = pd.to_datetime(data_df.game_date).dt.date data_df['in_zone'] = data_df['zone'] < 10 data_df['out_zone'] = data_df['zone'] >= 10 print('OUT OF THE ZONE') print(data_df['chase'].sum()) print(data_df['out_zone'].sum()) conditions_chase = [ ((data_df['description'].isin(swing_in))&(data_df.out_zone)), ] choices_chase = [True] data_df['chase'] = np.select(conditions_chase, choices_chase, default=np.nan) pitch_df_pitch = data_df[data_df['pitch']==1].groupby(['pitch_name']).agg( pitches = ('pitch','sum'), swings = ('swing','sum'), whiff = ('whiff','sum'), csw = ('csw','sum'), out_zone = ('out_zone','sum'), chase = ('chase','sum'), pitch_velocity = ('release_speed','mean'), spin_rate = ('release_spin_rate','mean'), exit_velocity = ('launch_speed','mean'), pfx_x = ('pfx_x','mean'), pfx_z = ('pfx_z','mean'), extension = ('release_extension','mean'), release_x = ('release_pos_x','mean'), release_z = ('release_pos_z','mean'), zone_percent = ('in_zone','mean') , xwOBA = ('estimated_woba_using_speedangle','mean') , stuff_plus = ('value','mean'), loc_plus = ('value_loc','mean'), pitching_plus = ('value_pitching','mean'), #pitch_velocity = ('pitch_velocity','mean'), # pitch_velocity = ('launch_speed',percentile(95)), # launch_speed = ('launch_speed','mean'), # launch_angle = ('launch_angle','mean'), ).sort_values(by='pitches',ascending=False).reset_index() print('plot df') print(pitch_df_pitch) stuff_plus_all_day_df = stuff_plus_full_df_cut.copy() data_df['spin_axis_pitch'] = [(x + 180) for x in data_df.spin_axis] (((data_df.groupby('pitch_name').mean()[['spin_axis_pitch']] %360 % 30 / 30 /100 *60).round(2) *10).round(0)//1.5/4 ) clock_time = ((data_df.groupby('pitch_name').mean()['spin_axis_pitch']) %360 // 30 )+ (((data_df.groupby('pitch_name').mean()['spin_axis_pitch'] %360 % 30 / 30 /100 *60).round(2) *10).round(0)//1.5/4 ) print('Clocks') print(clock_time) clock_time = (clock_time.astype(int) + clock_time%1*60/100).round(2).astype(str).str.replace('.',':').str.replace(':0',':00').str.replace(':3',':30').to_frame() #print() pitch_df_pitch = pitch_df_pitch.merge(right=clock_time,left_on='pitch_name',right_index=True) #print(pitch_df_pitch['clock_time']) # if len(stuff_plus_all_day_df) < 1: # stuff_plus_all_day_df = pd.DataFrame(columns=['fg_id', 'Name', 'Team', 'IP', 'variable', 'value']) # # loc_plus_full_df_cut = pd.DataFrame(columns=['fg_id', 'Name', 'Team', 'IP', 'variable', 'value']) # # pitching_plus_full_df_cut = pd.DataFrame(columns=['fg_id', 'Name', 'Team', 'IP', 'variable', 'value']) # if input_date_id != '0': # stuff_plus_all_day_df = stuff_plus_full_df[(stuff_plus_full_df.fg_id == data_df.reset_index(drop=True).key_fangraphs[0]) & # (stuff_plus_full_df.date == str(data_df.reset_index(drop=True).game_date[0]))] # else: # if input_date_range_id[0] == statcast_df.game_date.min() and input_date_range_id[1] == statcast_df.game_date.max(): # stuff_plus_all_day_df = stuff_plus_season_df[(stuff_plus_season_df.fg_id == data_df.reset_index(drop=True).key_fangraphs[0])] # else: # stuff_plus_all_day_df = stuff_plus_full_df_cut[(stuff_plus_full_df_cut.fg_id == data_df.reset_index(drop=True).key_fangraphs[0])] pitch_df_pitch_all = data_df[data_df['pitch']==1].groupby(['pitcher']).agg( pitches = ('pitch','sum'), swings = ('swing','sum'), whiff = ('whiff','sum'), csw = ('csw','sum'), out_zone = ('out_zone','sum'), chase = ('chase','sum'), pitch_velocity = ('release_speed','mean'), spin_rate = ('release_spin_rate','mean'), exit_velocity = ('launch_speed','mean'), pfx_x = ('pfx_x','mean'), pfx_z = ('pfx_z','mean'), extension = ('release_extension','mean'), release_x = ('release_pos_x','mean'), release_z = ('release_pos_z','mean'), zone_percent = ('in_zone','mean') , xwOBA = ('estimated_woba_using_speedangle','mean') , stuff_plus = ('value','mean'), loc_plus = ('value_loc','mean'), pitching_plus = ('value_pitching','mean'), ).sort_values(by='pitches',ascending=False).reset_index() #print('stff df') #print(stuff_plus_all_day_df) print('Pitch Sum') print(pitch_df_pitch_all) if len(stuff_plus_all_day_df) > 0: stuff_plus_all_day_df = stuff_plus_all_day_df[stuff_plus_all_day_df.fg_id == data_df.key_fangraphs[0]] else: stuff_plus_all_day_df = pd.DataFrame(columns=['fg_id', 'Name', 'Team', 'IP', 'variable', 'value']) pitch_df_pitch_all['pitch_name'] = 'All' if len(stuff_plus_all_day_df) > 0: pitch_df_pitch_all['stuff_plus'] = int(stuff_plus_all_day_df[stuff_plus_all_day_df.variable == 'Stuff+'].reset_index(drop=True)['value'][0]) pitch_df_pitch_all['loc_plus'] = int(stuff_plus_all_day_df[stuff_plus_all_day_df.variable == 'Location+'].reset_index(drop=True)['value'][0]) pitch_df_pitch_all['pitching_plus'] = int(stuff_plus_all_day_df[stuff_plus_all_day_df.variable == 'Pitching+'].reset_index(drop=True)['value'][0]) else: pitch_df_pitch_all['stuff_plus'] = np.nan pitch_df_pitch_all['loc_plus'] = np.nan pitch_df_pitch_all['pitching_plus'] = np.nan print('Pitch Sum') print(pitch_df_pitch_all) if input_date_id != '0': summary_df_pitch['pitcher'] = data_df.full_name_pitcher.unique()[0] summary_df_pitch['pitches'] = pitch_df_pitch.pitches.sum() summary_df_pitch['pitches'] = summary_df_pitch['pitches'].astype(int) summary_df_pitch= summary_df_pitch[['pitcher', 'pitches','ip', 'hits', 'er', 'k', 'bb']] #summary_df_pitch_new.columns = ['Pitcher', 'Pitches','IP', 'Hits', 'ER', 'K', 'BB'] else: summary_df_pitch['pitcher'] = data_df.full_name_pitcher.unique()[0] summary_df_pitch = summary_df_pitch[['pitcher', 'pitches','ip', 'hits', 'er', 'k', 'bb']] pitch_df_pitch['whiff_rate'] = pitch_df_pitch['whiff']/pitch_df_pitch['swings'] pitch_df_pitch['csw_rate'] = pitch_df_pitch['csw']/pitch_df_pitch['pitches'] pitch_df_pitch['chase_percent'] = pitch_df_pitch['chase']/pitch_df_pitch['out_zone'] pitch_df_pitch['pitch_percent'] = pitch_df_pitch['pitches']/pitch_df_pitch['pitches'].sum() pitch_df_pitch_all['whiff_rate'] = pitch_df_pitch_all['whiff']/pitch_df_pitch_all['swings'] pitch_df_pitch_all['csw_rate'] = pitch_df_pitch_all['csw']/pitch_df_pitch_all['pitches'] pitch_df_pitch_all['chase_percent'] = pitch_df_pitch_all['chase']/pitch_df_pitch_all['out_zone'] pitch_df_pitch_all['pitch_percent'] = pitch_df_pitch_all['pitches']/pitch_df_pitch_all['pitches'].sum() pitch_df_pitch_all['spin_axis_pitch'] = '—' pitch_df_pitch = pd.concat([pitch_df_pitch,pitch_df_pitch_all]).reset_index(drop=True) #fig, ax = plt.subplots(3, 2, figsize=(9, 9)) label_labels = data_df.sort_values(by=['prop','value','pitch_name'],ascending=[False,False,True]).pitch_name.unique() #plt.rcParams["figure.figsize"] = [10,10] fig = plt.figure(figsize=(15, 15),dpi=600) plt.rcParams.update({'figure.autolayout': True}) fig.set_facecolor('white') sns.set_theme(style="whitegrid", palette="pastel") # gs = GridSpec(7, 2, width_ratios=[1,1], height_ratios=[1.5,1,1,1,1,1,2.5]) gs = GridSpec(4, 1, width_ratios=[1], height_ratios=[1,0.75,5.5,2.5]) #gs = GridSpec(4, 1, width_ratios=[1], height_ratios=[1,0.75,7-len(label_labels)/4,1+len(label_labels)/4]) gs.update(hspace=0.1, wspace=0.2) #gs.update(hspace=0.1/(len(label_labels)/4), wspace=0.2) # gs.update(left=0.1,right=0.9,top=0.97,bottom=0.03,wspace=0.3,hspace=0.09) # ax1 = plt.subplot(4,1,1) # ax2 = plt.subplot(2,2,2) # ax3 = plt.subplot(2,2,3) # ax4 = plt.subplot(4,1,4) #ax2 = plt.subplot(3,3,2) # Add subplots to the grid ax0 = fig.add_subplot(gs[0, :]) #ax1 = fig.add_subplot(gs[2, 0]) ax2 = fig.add_subplot(gs[2, :]) # Subplot at the top-right position ax3 = fig.add_subplot(gs[-1, :]) ax4 = fig.add_subplot(gs[1, :]) # Subplot spanning the entire bottom row # a = {} # k = 0 # while k < len(label_labels): # # dynamically create key # key = f'{k}_plot' # Subplot at the top-left position # # calculate value # value = fig.add_subplot(gs[1+k, 0]) # Subplot at the top-left position # a[key] = value # k += 1 # ax1 = fig.add_subplot(gs[1, 0]) # Subplot at the top-left position # ax3.yaxis.set_visible(False) # ax4.yaxis.set_visible(False) # Customize subplots ax3.tick_params(left = False, right = False , labelleft = False , labelbottom = False, bottom = False) ax4.tick_params(left = False, right = False , labelleft = False , labelbottom = False, bottom = False) ax3.axis('off') ax4.axis('off') ax0.axis('off') # Calculate and set the position of the subplot ax3.set_anchor('C') ax4.set_anchor('C') sns.set_theme(style="whitegrid", palette="pastel") fig.set_facecolor('white') # ax2.set_facecolor('white') ## Legend Plot # sns.scatterplot(ax=ax4,x=data_df.plate_x,y=data_df.plate_z,hue=data_df.pitch_name,palette=colour_palette[:len(data_df.pitch_name.unique())],s=1) # ax4.legend(loc='center',bbox_to_anchor=(0, -0.1, 1, 0.1), # ncol=len(data_df['pitch_name'].unique()), fancybox=True, fontsize=16,facecolor='white',handleheight=2, framealpha=1.0) # # Show the plot # ax4.axis('off') ## Pitch Plot # label_labels = data_df['pitch_name'].unique() # j = 0 # for label in label_labels: # subset = data_df[data_df['pitch_name'] == label] # confidence_ellipse(subset['plate_x'], subset['plate_z'], ax=ax1,edgecolor= colour_palette[j],n_std=1,facecolor= colour_palette[j],alpha=0.2) # j=j+1 font_properties = {'family': 'century gothic', 'size': 16} # n = 0 # ax1.xaxis.set_major_locator(MaxNLocator(integer=True)) # # ax1.set_xlim(1,max(data_df['pitch_count'])) # j = 0 # for label in label_labels: # subset = data_df[data_df['pitch_name'] == label] # if len(subset) >= 1: # print('test',label, len(subset),colour_palette[j]) # confidence_ellipse(subset['release_speed'], subset['release_spin_rate'], ax=ax1,edgecolor= pitch_colours[label],n_std=2,facecolor = pitch_colours[label],alpha=0.2) # j=j+1 # else: # j=j+1 # #sns.kdeplot(data=data_df[data_df.pitch_name == label_labels[n]].release_speed,ax=a[x],color=colour_palette[n],fill=True) # #sns.lineplot(data=data_df[data_df.pitch_name==x],x='pitch_count',y='release_speed',color=colour_palette[n],ax=ax1,zorder=1) # sns.scatterplot(data=data_df,x='release_speed',y='release_spin_rate',hue='pitch_name',palette=pitch_colours,ax=ax1,marker='o',size=50,ec='black',zorder=100,alpha=1) # # ax1.hlines(y=data_df[data_df.pitch_name == label_labels[n]].release_speed.mean(),xmin=-1,xmax=max(data_df['pitch_count']),color=colour_palette[n],linestyles='--',linewidth=1) # # ax1.hlines(y=statcast_df[statcast_df.pitch_name == label_labels[n]].release_speed.mean(),xmin=-1,xmax=max(data_df['pitch_count']),color=colour_palette[n],linestyles='dotted',linewidth=1) # # ax1.text(1.5,statcast_df[statcast_df.pitch_name == label_labels[n]].release_speed.mean(),'League Average', rotation=0, verticalalignment='bottom',ha='left', # # bbox=dict(facecolor='white',alpha=0.5, edgecolor=colour_palette[n], pad=1),fontsize=4) # n = n+1 # ax1.set_xticklabels(ax1.get_xticks(), fontdict=font_properties) # ax1.set_yticklabels(ax1.get_yticks(), fontdict=font_properties) # #a[x].set_ylim(0,1) # ax1.set_xlabel('Velocity (mph)', fontdict=font_properties) # ax1.set_ylabel('Spin Rate (rpm)', fontdict=font_properties) # ax1.set_title('Spin Rate vs Velocity',fontdict={'family': 'century gothic', 'size': 12}) # a[x].set_yticks([]) # a[x].vlines(x=data_df[data_df.pitch_name == label_labels[n]].release_speed.mean(),ymin=0,ymax=1,color=colour_palette[n],linestyles='--') # a[x].vlines(x=statcast_df[statcast_df.pitch_name == label_labels[n]].release_speed.mean(),ymin=0,ymax=1,color=colour_palette[n],linestyles='dotted') # sns.scatterplot(ax=ax1,x=data_df.release_pos_x,y=data_df.release_pos_z,hue=data_df.pitch_name,palette=colour_palette[:len(data_df.pitch_name.unique())],s=50,ec='black',alpha=0.7) # ax1.set_xlim(-3.5,3.5) # ax1.set_ylim(0,7) # ax1.hlines(y=statcast_df[statcast_df.zone.isin([1,2,3])].plate_z.quantile(0.95),xmin=-17/12/2,xmax=17/12/2,color=colour_palette[8],alpha=0.5,linestyles='-') # ax1.hlines(y=statcast_df[statcast_df.zone.isin([7,8,9])].plate_z.quantile(0.05),xmin=-17/12/2,xmax=17/12/2,color=colour_palette[8],alpha=0.5,linestyles='-') # ax1.hlines(y=(-statcast_df[statcast_df.zone.isin([7,8,9])].plate_z.quantile(0.05)+ # statcast_df[statcast_df.zone.isin([1,2,3])].plate_z.quantile(0.95))/3+statcast_df[statcast_df.zone.isin([7,8,9])].plate_z.quantile(0.05),xmin=-17/12/2,xmax=17/12/2,color=colour_palette[8],alpha=0.5,linestyles='dotted') # ax1.hlines(y=(-statcast_df[statcast_df.zone.isin([7,8,9])].plate_z.quantile(0.05)+ # statcast_df[statcast_df.zone.isin([1,2,3])].plate_z.quantile(0.95))/3*2+statcast_df[statcast_df.zone.isin([7,8,9])].plate_z.quantile(0.05),xmin=-17/12/2,xmax=17/12/2,color=colour_palette[8],alpha=0.5,linestyles='dotted') # ax1.vlines(x=-17/12/2,ymin=statcast_df[statcast_df.zone.isin([7,8,9])].plate_z.quantile(0.05),ymax=statcast_df[statcast_df.zone.isin([1,2,3])].plate_z.quantile(0.95),color=colour_palette[8],alpha=0.5,linestyles='-') # ax1.vlines(x=17/12/2,ymin=statcast_df[statcast_df.zone.isin([7,8,9])].plate_z.quantile(0.05),ymax=statcast_df[statcast_df.zone.isin([1,2,3])].plate_z.quantile(0.95),color=colour_palette[8],alpha=0.5,linestyles='-') # ax1.vlines(x=(-17/12/2)+17/12/3,ymin=statcast_df[statcast_df.zone.isin([7,8,9])].plate_z.quantile(0.05),ymax=statcast_df[statcast_df.zone.isin([1,2,3])].plate_z.quantile(0.95),color=colour_palette[8],alpha=0.5,linestyles='dotted') # ax1.vlines(x=(-17/12/2)+17/12*2/3,ymin=statcast_df[statcast_df.zone.isin([7,8,9])].plate_z.quantile(0.05),ymax=statcast_df[statcast_df.zone.isin([1,2,3])].plate_z.quantile(0.95),color=colour_palette[8],alpha=0.5,linestyles='dotted') # ax1.set_xlabel("Catcher's Presepctive (ft)", fontsize=10,fontname='Century Gothic') # ax1.set_ylabel('Vertical Distance From Plate (ft)', fontsize=10,fontname='Century Gothic') ## Break Plot j = 0 for label in label_labels: subset = data_df[data_df['pitch_name'] == label] print(label) if len(subset) > 1: subset['pfx_x'] = subset['pfx_x']*12 subset['pfx_z'] = subset['pfx_z']*12 confidence_ellipse(subset['pfx_x'], subset['pfx_z'], ax=ax2,edgecolor = pitch_colours[label],n_std=2,facecolor= pitch_colours[label],alpha=0.2) j=j+1 else: j=j+1 #data_df = data_df.sort_values(by='prop',ascending=False) sns.scatterplot(ax=ax2,x=data_df.pfx_x*12,y=data_df.pfx_z*12,hue=data_df.pitch_name,palette=pitch_colours,ec='black',alpha=0.7) # ax2.set_xlim(min(-25,-abs(math.floor((data_df['pfx_x'].min()*12-0.01)/5)*5), # -abs(math.floor((data_df['pfx_z'].min()*12-0.01)/5)*5), # -abs(math.ceil((data_df['pfx_x'].max()*12+0.01)/5)*5), # -abs(math.ceil((data_df['pfx_z'].max()*12+0.01)/5)*5)), # max(25,abs(math.floor((data_df['pfx_x'].min()*12-0.01)/5)*5), # abs(math.floor((data_df['pfx_z'].min()*12-0.01)/5)*5), # abs(math.ceil((data_df['pfx_x'].max()*12+0.01)/5)*5), # abs(math.ceil((data_df['pfx_z'].max()*12+0.01)/5)*5))) # ax2.set_ylim(min(-25,-abs(math.floor((data_df['pfx_x'].min()*12-0.01)/5)*5), # -abs(math.floor((data_df['pfx_z'].min()*12-0.01)/5)*5), # -abs(math.ceil((data_df['pfx_x'].max()*12+0.01)/5)*5), # -abs(math.ceil((data_df['pfx_z'].max()*12+0.01)/5)*5)), # max(25,abs(math.floor((data_df['pfx_x'].min()*12-0.01)/5)*5), # abs(math.floor((data_df['pfx_z'].min()*12-0.01)/5)*5), # abs(math.ceil((data_df['pfx_x'].max()*12+0.01)/5)*5), # abs(math.ceil((data_df['pfx_z'].max()*12+0.01)/5)*5))) ax2.set_xlim((-25,25)) ax2.set_ylim((-25,25)) ax2.set_title('Pitch Breaks',fontdict={'family': 'century gothic', 'size': 20}) # ax2.set_xlim(math.floor((data_df['pfx_x'].min()*12-0.01)/5)*5,math.ceil((data_df['pfx_x'].max()*12+0.01)/5)*5) # ax2.set_ylim(math.floor((data_df['pfx_z'].min()*12-0.01)/5)*5,math.ceil((data_df['pfx_z'].max()*12+0.01)/5)*5) ax2.hlines(y=0,xmin=-50,xmax=50,color=colour_palette[8],alpha=0.5,linestyles='--') ax2.vlines(x=0,ymin=-50,ymax=50,color=colour_palette[8],alpha=0.5,linestyles='--') ax2.set_xlabel('Horizontal Break (in)', fontsize=14,fontname='Century Gothic') ax2.set_ylabel('Induced Vertical Break (in)', fontsize=14,fontname='Century Gothic') ## Table Plot ## Table Plot df_plot = pitch_df_pitch[['pitch_name','pitches','pitch_percent','pitch_velocity','pfx_z','pfx_x', 'extension','release_z','stuff_plus','loc_plus','whiff_rate','chase_percent','zone_percent','xwOBA','spin_axis_pitch']] df_plot['pitches'] = [int(x) if not math.isnan(x) else np.nan for x in df_plot['pitches']] df_plot['pitch_percent'] = df_plot['pitch_percent'].round(3) df_plot['pitch_velocity'] = df_plot['pitch_velocity'].round(1) df_plot['pfx_z'] = (df_plot['pfx_z']*12).round(1) df_plot['pfx_x'] = (df_plot['pfx_x']*12).round(1) df_plot['extension'] = df_plot['extension'].round(1) df_plot['release_z'] = df_plot['release_z'].round(1) df_plot['stuff_plus'] = [int(x) if not math.isnan(x) else np.nan for x in df_plot['stuff_plus']] df_plot['loc_plus'] = [int(x) if not math.isnan(x) else np.nan for x in df_plot['loc_plus']] # df_plot['pitching_plus'] = [int(x) if not math.isnan(x) else np.nan for x in df_plot['pitching_plus']] df_plot['whiff_rate'] = [round(x,3) if not math.isnan(x) else '—' for x in df_plot['whiff_rate']] df_plot['chase_percent'] = [round(x,3) if not math.isnan(x) else '—' for x in df_plot['chase_percent']] df_plot['zone_percent'] = [round(x,3) if not math.isnan(x) else '—' for x in df_plot['zone_percent']] df_plot['xwOBA'] = [round(x,3) if not math.isnan(x) else '—' for x in df_plot['xwOBA']] #df_plot['spin_axis_pitch'] = [x if not np.nan else '—' for x in df_plot['spin_axis_pitch']] [['pitch_name','pitch_percent','pitch_velocity','pfx_z','pfx_x', 'extension','release_z','stuff_plus','loc_plus','whiff_rate','zone_percent','xwOBA']] plt.rcParams['font.family'] = 'Century Gothic' table = ax3.table(cellText=df_plot.values, colLabels=df_plot.columns, cellLoc='center', colWidths=[0.08,0.04,0.04,.04,0.03, 0.03, 0.05,.08, 0.04,.05, 0.04,.04,0.04, 0.06,0.06], bbox=[0.0, 0, 1, 0.8]) min_font_size = 11 # Set table properties table.auto_set_font_size(False) table.set_fontsize(min(min_font_size,max(min_font_size/((len(label_labels)/4)),10))) table.scale(1, 0.5) # Customize cell colors #table.get_celld()[(0, 0)].set_facecolor('#56B4E9') # Header cell color def get_color(value): color = cmap_sum(normalize(value)) return mcolors.to_hex(color) up_percent = 1.5 down_percent = 0.5 print(df_plot) label_labels_plot = df_plot.pitch_name.unique() for i in range(len(df_plot)): if table.get_celld()[(i+1, 0)].get_text().get_text() != 'All': #print(float(table.get_celld()[(i+1, 3)].get_text().get_text())) #print(get_color(float(table.get_celld()[(i+1, 3)].get_text().get_text()))) #colour_of_pitch table.get_celld()[(i+1, 0)].set_facecolor(pitch_colours[table.get_celld()[(i+1, 0)].get_text().get_text()]) # Header cell color if table.get_celld()[(i+1, 0)].get_text().get_text() in ['Curveball','Split-Finger','Slurve','Forkball']: table.get_celld()[(i+1, 0)].set_text_props(color='#000000',fontweight='bold') else: table.get_celld()[(i+1, 0)].set_text_props(color='#ffffff',fontweight='bold') #table.get_celld()[(i+1, 0)].set_path_effects([path_effects.withStroke(linewidth=2, foreground='black')]) # Header cell color print(label_labels_plot[i]) select_df = statcast_df_df_pitch[statcast_df_df_pitch.pitch_name == label_labels_plot[i]] print(f'test: {select_df.pitch_velocity_std.mean()}') cmap_sum = matplotlib.colors.LinearSegmentedColormap.from_list("", ['#648FFF','#FFFFFF','#FFB000',]) print(select_df.pitch_velocity.mean()-select_df.pitch_velocity_std.mean(),select_df.pitch_velocity.mean()+select_df.pitch_velocity_std.mean(),) #print(get_color(float(table.get_celld()[(i+1, 3)].get_text().get_text()))) normalize = mcolors.Normalize(vmin=select_df.pitch_velocity.mean()-select_df.pitch_velocity_std.mean(), vmax=select_df.pitch_velocity.mean()+select_df.pitch_velocity_std.mean()) # Define the range of values if table.get_celld()[(i+1, 3)].get_text().get_text() != '—': #print(float(table.get_celld()[(i+1, 3)].get_text().get_text())) print(get_color(float(table.get_celld()[(i+1, 3)].get_text().get_text()))) table.get_celld()[(i+1, 3)].set_facecolor(get_color(float(table.get_celld()[(i+1, 3)].get_text().get_text()))) # Header cell color #Header cell color cmap_sum = matplotlib.colors.LinearSegmentedColormap.from_list("", ['#648FFF','#FFFFFF','#FFB000',]) normalize = mcolors.Normalize(vmin=70,vmax=130) table.get_celld()[(i+1, 8)].set_facecolor(get_color(float(table.get_celld()[(i+1, 8)].get_text().get_text()))) table.get_celld()[(i+1, 9)].set_facecolor(get_color(float(table.get_celld()[(i+1, 9)].get_text().get_text()))) #table.get_celld()[(i+1, 4)].set_facecolor(get_color(float(table.get_celld()[(i+1, 4)].get_text().get_text()))) # Header cell color normalize = mcolors.Normalize(vmin=select_df.whiff_rate.mean()*down_percent, vmax=select_df.whiff_rate.mean()*up_percent) if table.get_celld()[(i+1, 10)].get_text().get_text() != '—': table.get_celld()[(i+1, 10)].set_facecolor(get_color(float(table.get_celld()[(i+1, 10)].get_text().get_text()))) # Header cell color normalize = mcolors.Normalize(vmin=select_df.chase_percent.mean()*down_percent, vmax=select_df.chase_percent.mean()*up_percent) if table.get_celld()[(i+1, 11)].get_text().get_text() != '—': table.get_celld()[(i+1, 11)].set_facecolor(get_color(float(table.get_celld()[(i+1, 11)].get_text().get_text()))) # Header cell color normalize = mcolors.Normalize(vmin=select_df.zone_percent.mean()*down_percent, vmax=select_df.zone_percent.mean()*up_percent) if table.get_celld()[(i+1, 12)].get_text().get_text() != '—': table.get_celld()[(i+1, 12)].set_facecolor(get_color(float(table.get_celld()[(i+1, 12)].get_text().get_text()))) # Header cell color normalize = mcolors.Normalize(vmin=select_df.xwOBA.mean()*down_percent, vmax=select_df.xwOBA.mean()*up_percent) if table.get_celld()[(i+1, 13)].get_text().get_text() != '—': table.get_celld()[(i+1, 13)].set_facecolor(get_color(float(table.get_celld()[(i+1, 13)].get_text().get_text()))) # Header cell color # normalize = mcolors.Normalize(vmin=select_df[df_plot.columns[2]].mean()*0.9, vmax=select_df[df_plot.columns[2]].mean()*1.1) # if table.get_celld()[(i+1, 3)].get_text().get_text() != '—': # table.get_celld()[(i+1, 3)].set_facecolor(get_color(float(table.get_celld()[(i+1, 3)].get_text().get_text()))) # Header cell color # normalize = mcolors.Normalize(vmin=select_df[df_plot.columns[3]].mean()*down_percent, vmax=select_df[df_plot.columns[3]].mean()*down_percent) # if table.get_celld()[(i+1, 3)].get_text().get_text() != '—': # table.get_celld()[(i+1, 3)].set_facecolor(get_color(float(table.get_celld()[(i+1, 3)].get_text().get_text()))) # Header cell color # normalize = mcolors.Normalize(vmin=select_df[df_plot.columns[4]].mean()*down_percent, vmax=select_df[df_plot.columns[4]].mean()*down_percent) # if table.get_celld()[(i+1, 4)].get_text().get_text() != '—': # table.get_celld()[(i+1, 4)].set_facecolor(get_color(float(table.get_celld()[(i+1, 4)].get_text().get_text()))) # Header cell color cmap_sum = matplotlib.colors.LinearSegmentedColormap.from_list("", ['#648FFF','#FFFFFF','#FFB000',]) normalize = mcolors.Normalize(vmin=select_df[df_plot.columns[6]].mean()*0.9, vmax=select_df[df_plot.columns[6]].mean()*1.1) if table.get_celld()[(i+1, 6)].get_text().get_text() != '—': table.get_celld()[(i+1, 6)].set_facecolor(get_color(float(table.get_celld()[(i+1, 6)].get_text().get_text()))) # Header cell color # normalize = mcolors.Normalize(vmin=select_df[df_plot.columns[6]].mean()*down_percent, vmax=[df_plot.columns[6]].mean()*down_percent) # if table.get_celld()[(i+1, 6)].get_text().get_text() != '—': # table.get_celld()[(i+1, 6)].set_facecolor(get_color(float(table.get_celld()[(i+1, 6)].get_text().get_text()))) # Header cell color normalize = mcolors.Normalize(vmin=select_df.xwOBA.mean()*down_percent, vmax=select_df.xwOBA.mean()*up_percent) cmap_sum = matplotlib.colors.LinearSegmentedColormap.from_list("", ['#FFB000','#FFFFFF','#648FFF',]) if table.get_celld()[(i+1, 13)].get_text().get_text() != '—': table.get_celld()[(i+1, 13)].set_facecolor(get_color(float(table.get_celld()[(i+1, 13)].get_text().get_text()))) # Header cell color # normalize = mcolors.Normalize(vmin=select_df.csw_rate.mean()*down_percent, vmax=select_df.csw_rate.mean()*up_percent) # Define the range of values # table.get_celld()[(i+1, 6)].set_facecolor(get_color(float(table.get_celld()[(i+1, 6)].get_text().get_text()))) # Header cell color normalize = mcolors.Normalize(vmin=select_df.pitch_velocity.mean()-select_df.pitch_velocity_std.mean(), vmax=select_df.pitch_velocity.mean()+select_df.pitch_velocity_std.mean()) # Define the range of values #Header cell color # [['pitch_name','pitch_percent','pitch_velocity','pfx_z','pfx_x', # 'extension','release_z','stuff_plus','loc_plus','whiff_rate','zone_percent','xwOBA']] new_column_names = ['$\\bf{Pitch\ Name}$', '$\\bf{Count}$', '$\\bf{Pitch\%}$', '$\\bf{Velocity}$', '$\\bf{iVB}$', '$\\bf{HB}$', '$\\bf{Extension}$', '$\\bf{Release\ Height}$', '$\\bf{Stuff+}$', '$\\bf{Location+}$', '$\\bf{Whiff\%}$', '$\\bf{Chase\%}$', '$\\bf{Zone\%}$', '$\\bf{xwOBACON}$', '$\\bf{Spin\ Axis}$'] # #new_column_names = ['Pitch Name', 'Pitch%', 'Velocity', 'Spin Rate','Exit Velocity', 'Whiff%', 'CSW%'] for i, col_name in enumerate(new_column_names): table.get_celld()[(0, i)].get_text().set_text(col_name) pitch_col = df_plot['pitch_percent'] for cell in table.get_celld().values(): if cell.get_text().get_text() in pitch_col.astype(str).values: cell.get_text().set_text('{:,.1%}'.format(float(cell.get_text().get_text()))) pitch_col = df_plot['whiff_rate'] for cell in table.get_celld().values(): if cell.get_text().get_text() in pitch_col.astype(str).values and cell.get_text().get_text() != '—': cell.get_text().set_text('{:,.1%}'.format(float(cell.get_text().get_text()))) pitch_col = df_plot['chase_percent'] for cell in table.get_celld().values(): if cell.get_text().get_text() in pitch_col.astype(str).values and cell.get_text().get_text() != '—': cell.get_text().set_text('{:,.1%}'.format(float(cell.get_text().get_text()))) pitch_col = df_plot['zone_percent'] for cell in table.get_celld().values(): if cell.get_text().get_text() in pitch_col.astype(str).values and cell.get_text().get_text() != '—': cell.get_text().set_text('{:,.1%}'.format(float(cell.get_text().get_text()))) pitch_col = df_plot['xwOBA'] for cell in table.get_celld().values(): if cell.get_text().get_text() in pitch_col.astype(str).values and cell.get_text().get_text() != '—': print('xwOBA') cell.get_text().set_text('{:,.3f}'.format(float(cell.get_text().get_text()))) float_list = ['pitch_velocity','pfx_z','pfx_x', 'extension','release_z'] for fl in float_list: pitch_col = df_plot[fl] for cell in table.get_celld().values(): if cell.get_text().get_text() in pitch_col.astype(str).values and cell.get_text().get_text() != '—': cell.get_text().set_text('{:,.1f}'.format(float(cell.get_text().get_text()))) int_list = ['stuff_plus','loc_plus',] for fl in int_list: pitch_col = df_plot[fl] for cell in table.get_celld().values(): if cell.get_text().get_text() in pitch_col.astype(str).values and cell.get_text().get_text() != '—': cell.get_text().set_text('{:,.0f}'.format(float(cell.get_text().get_text()))) # pitch_col = df_plot['csw_rate'] # for cell in table.get_celld().values(): # if cell.get_text().get_text() in pitch_col.astype(str).values: # cell.get_text().set_text('{:,.1%}'.format(float(cell.get_text().get_text()))) # for (row, col), cell in table.get_celld().items(): # if (row == len(df_plot)): # cell.set_text_props(fontproperties=FontProperties(weight='bold',style='italic'),fontsize=(min(12,max(12/((len(label_labels)/4)),8)))) # # new_column_names = ['$\\bf{'+str(x)+'}$' for x in list(df_plot.loc[len(df_plot)-1])] # #new_column_names = ['Pitch Name', 'Pitch%', 'Velocity', 'Spin Rate','Exit Velocity', 'Whiff%', 'CSW%'] # # for i in len(df_plot.columns): # # table.get_celld()[(len(df_plot), i)].get_text().set_fontweight('bold') table2 = ax4.table(cellText=summary_df_pitch.values, colLabels=summary_df_pitch.columns, cellLoc='center', colWidths=[0.1,0.05,.05,.05, 0.05, 0.05,.05,.05], bbox=[0.00, 0.4, 0.955, .80]) # table2 = ax4.table(cellText=summary_df_pitch.values, colLabels=summary_df_pitch.columns, cellLoc='center', # colWidths=[0.1,0.05,.05,.05, 0.05, 0.05,.05,.05], bbox=[0.00, 0.4, 0.955, min(.8,0.8/(len(df_plot)/4))]) # Set table properties table2.auto_set_font_size(False) min_font_size = 11 table2.set_fontsize(min(min_font_size,max(min_font_size/((len(label_labels)/4)),10))) table2.scale(1, 1) if input_date_id == '0': pitch_col = summary_df_pitch['k'] for cell in table2.get_celld().values(): if cell.get_text().get_text() in pitch_col.astype(str).values and cell.get_text().get_text() != '—': cell.get_text().set_text('{:,.1%}'.format(float(cell.get_text().get_text()))) pitch_col = summary_df_pitch['bb'] for cell in table2.get_celld().values(): if cell.get_text().get_text() in pitch_col.astype(str).values: cell.get_text().set_text('{:,.1%}'.format(float(cell.get_text().get_text()))) new_column_names = ['$\\bf{Pitcher}$', '$\\bf{PA}$', '$\\bf{IP}$', '$\\bf{WHIP}$', '$\\bf{ERA}$', '$\\bf{K\%}$', '$\\bf{BB\%}$'] else: new_column_names = ['$\\bf{Pitcher}$', '$\\bf{Pitches}$', '$\\bf{IP}$', '$\\bf{Hits}$', '$\\bf{ER}$', '$\\bf{K}$', '$\\bf{BB}$'] for i, col_name in enumerate(new_column_names): table2.get_celld()[(0, i)].get_text().set_text(col_name) for (row, col), cell in table.get_celld().items(): if (row == len(df_plot)): cell.set_text_props(fontproperties=FontProperties(weight='bold',style='italic'),fontsize=(min(min_font_size,max(min_font_size/((len(label_labels)/4)),10)))) # # table = ax3.table(cellText=pitch_df_pitch[['pitch_name','pitch_percent','spin_rate','exit_velocity','whiff_rate','csw_rate']].values, colLabels=pitch_df_pitch[['pitch_name','pitch_percent','spin_rate','exit_velocity','whiff_rate','csw_rate']].columns, loc='center') # # Set the table properties # table.auto_set_font_size(False) # table.set_fontsize(12) # table.scale(1.2, 1.2) #ax1.get_legend().remove() ax2.get_legend().remove() # ax1.set_xticklabels(ax1.get_xticks(), fontdict=font_properties) ax2.set_xticklabels(ax2.get_xticks(), fontdict=font_properties) # ax1.set_yticklabels(ax1.get_yticks(), fontdict=font_properties) ax2.set_yticklabels(ax2.get_yticks(), fontdict=font_properties) # ax1.xaxis.set_major_locator(ticker.MaxNLocator(integer=True)) # ax2.xaxis.set_major_locator(ticker.MaxNLocator(integer=True)) # ax1.yaxis.set_major_locator(ticker.MaxNLocator(integer=True)) # ax2.yaxis.set_major_locator(ticker.MaxNLocator(integer=True)) # ax1.set_facecolor('white') # ax2.set_facecolor('white') # ax1.xaxis.set_major_formatter(mtick.FormatStrFormatter('%.0f')) # ax2.yaxis.set_major_formatter(decimal_format_assign(x=pitcher_dict_stat[input.stat_y()])) # ax1.xaxis.set_major_formatter(decimal_format_assign(x=pitcher_dict_stat[input.stat_x()])) # ax2.yaxis.set_major_formatter(decimal_format_assign(x=pitcher_dict_stat[input.stat_y()])) # ax1.legend(loc='upper center', bbox_to_anchor=(1, 1.05), # ncol=len(label_labels), fancybox=True, shadow=True) handles, labels = ax2.get_legend_handles_labels() fig.legend(handles, labels, bbox_to_anchor=(0, 0.775-0.005*len(label_labels), 1, 0.1), ncol=len(label_labels),fancybox=True,loc='upper center',fontsize=min(max(16/len(handles)*5,7.5),16/3*4),framealpha=1.0, markerscale=2*5/len(handles)) title_spot = f'{summary_df_pitch.pitcher[0]} Pitching Summary' if input_date_id != '0': if sum(data_df.home_team == data_df.abbreviation_pitcher.reset_index(drop=True)[0]) > 0: line2 = f"{mlb_teams_df[mlb_teams_df.team_id == data_df.team_id_pitcher.reset_index(drop=True)[0]].reset_index(drop=True)['franchise'][0]} vs {mlb_teams_df[mlb_teams_df.team_id == data_df.team_id.reset_index(drop=True)[0]].reset_index(drop=True)['franchise'][0]}" if sum(data_df.away_team == data_df.abbreviation_pitcher.reset_index(drop=True)[0]) > 0: line2 = f"{mlb_teams_df[mlb_teams_df.team_id == data_df.team_id_pitcher.reset_index(drop=True)[0]].reset_index(drop=True)['franchise'][0]} @ {mlb_teams_df[mlb_teams_df.team_id == data_df.team_id.reset_index(drop=True)[0]].reset_index(drop=True)['franchise'][0]}" if input.radio_id() != 'a': line2 = f"{line2} , vs {input.radio_id()}" if input.home_id() != 'all': line2 = f"{line2} , {home_away_dict[input.home_id()]}" fig.text(x=0.5,y=0.89,s=line2,fontname='Century Gothic',ha='center',fontstyle='italic',fontsize=20) else: if input_date_range_id[0] == statcast_df.game_date.min() and input_date_range_id[1] == statcast_df.game_date.max(): line2 = f'2023 Season' else: line2 = f'{str(input_date_range_id[0])} to {str(input_date_range_id[1])}' if input.radio_id() != 'a': line2 = f"{line2} , vs {input.radio_id()}" if input.home_id() != 'all': line2 = f"{line2} , {home_away_dict[input.home_id()]}" fig.text(x=0.5,y=0.88,s=line2,fontname='Century Gothic',ha='center',fontstyle='italic',fontsize=20) fig.text(x=0.5,y=0.92,s=title_spot,fontname='Century Gothic',ha='center',fontsize=36) if input_date_id != '0': fig.text(x=0.5,y=0.87,s=data_df.game_date[0],fontname='Century Gothic',ha='center',fontstyle='italic',fontsize=16) #ax1.set_aspect('equal', adjustable='box') ax2.invert_xaxis() ax2.set_aspect('equal', adjustable='box') #ax1.yaxis.set_major_formatter(FuncFormatter(lambda x, _: int(x))) ax2.xaxis.set_major_formatter(FuncFormatter(lambda x, _: int(x))) ax2.yaxis.set_major_formatter(FuncFormatter(lambda x, _: int(x))) #ax1.set_xticklabels(range(1,data_df.pitch_count.max()+1)) fig.text(x=0.05,y=0.03,s='By: @TJStats',fontname='Century Gothic',ha='left',fontsize=16) fig.text(x=1-0.05,y=0.03,s='Data: MLB, Eno Sarris',ha='right',fontname='Century Gothic',fontsize=16) fig.text(x=0.5,y=0.05,s='Note: Colour Coding Compares to League Average By Pitch',ha='center',fontname='Century Gothic',fontsize=10) fig.tight_layout() #fig.set_size_inches(10, 10) #fig.subplots_adjust(left=0.03, right=0.97, bottom=0.05, top=0.95) matplotlib.rcParams["figure.dpi"] = 600 #plt.axis('scaled') @output @render.plot(alt="heat plot") def plot_heat(dpi=600): print('HEAT') input_id = input.id() input_date_range_id = input.date_range_id() input_date_id = input.date_id() eury_df = statcast_df[statcast_df.pitcher.astype(int) == int(input_id)].sort_values(by=['game_date','play_id']) #print(input.id()) print(input_date_range_id == '0') print(len(eury_df)) print(str(input_date_id[0])) if input_date_id == '0': if input_date_range_id[0] == statcast_df.game_date.min() and input_date_range_id[1] == statcast_df.game_date.max(): data_df = eury_df.copy() data_df = data_df.reset_index(drop=True) data_df = data_df.dropna(subset=['pitch_name']) else: data_df = eury_df[(eury_df.game_date >= input_date_range_id[0]) & (eury_df.game_date <= input_date_range_id[1])].reset_index(drop=True) data_df = data_df.reset_index(drop=True) data_df = data_df.dropna(subset=['pitch_name']) else: data_df = eury_df[eury_df.game_pk == int(input_date_id)].reset_index(drop=True) data_df = data_df.dropna(subset=['pitch_name']) if len(data_df) < 1: fig, ax = plt.subplots(1, 1, figsize=(16, 16)) ax.text(x=0.5,y=0.5,s='Plot Is Generating',fontsize=32,ha='center') ###return # if input.radio_id() != 'a': data_df = data_df[data_df.stand == input.radio_id()] if input.home_id() != 'all': data_df = data_df[data_df.home_away == input.home_id()] #data_df = data_df.reset_index(drop=True) print('NOWWWW') print(data_df) print(type(eury_df.game_pk.reset_index(drop=True)[0])) print(type(input_date_id)) eury_df['pitch_name'].value_counts(normalize=True) eury_df['prop'] = eury_df.groupby("pitch_name")["pitch_name"].transform("count")/len(eury_df) eury_df = eury_df.sort_values(by='prop',ascending=False) pitch_list = eury_df.pitch_name.unique() stat_pick = input.heat_id() def decimal_format_assign(x): if dict_plots[stat_pick]['decimal_format'] == 'percent_1': return mtick.PercentFormatter(1,decimals=1) if dict_plots[stat_pick]['decimal_format'] == 'string_3': return mtick.FormatStrFormatter('%.3f') if dict_plots[stat_pick]['decimal_format'] == 'string_0': return mtick.FormatStrFormatter('%.0f') if dict_plots[stat_pick]['decimal_format'] == 'string_1': return mtick.FormatStrFormatter('%.1f') rate_pick = 'rate' stand_list = input.radio_id() cmap_sum = matplotlib.colors.LinearSegmentedColormap.from_list("", ['#0C7BDC','#FFFFFF','#FF0A0A',]) # DEFINE STRIKE ZONE strike_zone = pd.DataFrame({ 'PlateLocSide': [-0.9, -0.9, 0.9, 0.9, -0.9], 'PlateLocHeight': [1.5, 3.5, 3.5, 1.5, 1.5] }) # Add strike zone def draw_line(axis,alpha_spot=1,catcher_p = True): axis.plot(strike_zone['PlateLocSide'], strike_zone['PlateLocHeight'], color='black', linewidth=1.3,zorder=3,alpha=alpha_spot,) # ax.plot([-0.2833333, -0.2833333], [1.6, 3.5], color='black', linestyle='dashed',alpha=alpha_spot,zorder=3) # ax.plot([0.2833333, 0.2833333], [1.6, 3.5], color='black', linestyle='dashed',alpha=alpha_spot,zorder=3) # ax.plot([-0.85, 0.85], [2.2, 2.2], color='black', linestyle='dashed',alpha=alpha_spot,zorder=3) # ax.plot([-0.85, 0.85], [2.9, 2.9], color='black', linestyle='dashed',alpha=alpha_spot,zorder=3) if catcher_p: # Add dashed line # Add home plate axis.plot([-0.708, 0.708], [0.15, 0.15], color='black', linewidth=1,alpha=alpha_spot,zorder=1) axis.plot([-0.708, -0.708], [0.15, 0.3], color='black', linewidth=1,alpha=alpha_spot,zorder=1) axis.plot([-0.708, 0], [0.3, 0.5], color='black', linewidth=1,alpha=alpha_spot,zorder=1) axis.plot([0, 0.708], [0.5, 0.3], color='black', linewidth=1,alpha=alpha_spot,zorder=1) axis.plot([0.708, 0.708], [0.3, 0.15], color='black', linewidth=1,alpha=alpha_spot,zorder=1) else: axis.plot([-0.708, 0.708], [0.4, 0.4], color='black', linewidth=1,alpha=alpha_spot,zorder=1) axis.plot([-0.708, -0.9], [0.4, -0.1], color='black', linewidth=1,alpha=alpha_spot,zorder=1) axis.plot([-0.9, 0], [-0.1, -0.35], color='black', linewidth=1,alpha=alpha_spot,zorder=1) axis.plot([0, 0.9], [-.35, -0.1], color='black', linewidth=1,alpha=alpha_spot,zorder=1) axis.plot([0.9, 0.708], [-0.1,0.4], color='black', linewidth=1,alpha=alpha_spot,zorder=1) # eury_df = eury_df[eury_df.prop > 0.05] # pitch_list = eury_df.pitch_name.unique() # fig = plt.figure(figsize=(10, 10),dpi=600) #plt.rcParams.update({'figure.autolayout': True}) #plt.rcParams.update({'figure.autolayout': True}) plt.rcParams['font.family'] = 'Century Gothic' sns.set_theme(style="whitegrid", palette="pastel") font_plot = 'Arial' fig, ax = plt.subplots(math.ceil(len(pitch_list)/3), 3, figsize=(9, 2*math.ceil(len(pitch_list)/3))) fig.set_facecolor('white') print(stat_pick) print(len(ax)) if stat_pick == 'pitch_heat': data_df = data_df.dropna(subset=['plate_x']) #data_df = data_df[data_df.stand.isin(stand_list)] data_df['prop'] = data_df.groupby("pitch_name")["pitch_name"].transform("count")/len(eury_df) #data_df = data_df.sort_values(by='prop',ascending=False) #pitch_list = data_df.pitch_name.unique() if len(pitch_list) <= 3: ax_list = [[]] for a in range(len(ax)): ax_list[0].append(ax[a]) ax = np.array(ax_list) #print() k = 0 for i in range(math.ceil(len(pitch_list)/3)): for j in range(3): ax[i,j].axis('off') if k < len(pitch_list): # sns.kdeplot(data=eury_df_left[(eury_df.pitch_name==pitch_list[k])],x='plate_x',y='plate_z',fill=True,cmap=cmap_sum,ax=ax[i,1],levels=8,bw_adjust=0.75,zorder=2) try: if len(data_df[(data_df.pitch_name==pitch_list[k])]) > 5: sns.kdeplot(data=data_df[(data_df.pitch_name==pitch_list[k])],x='plate_x',y='plate_z',fill=True,cmap=cmap_sum,ax=ax[i,j],levels=8,bw_adjust=0.7,zorder=2,alpha=0.7) else: sns.scatterplot(data=data_df[(data_df.pitch_name==pitch_list[k])],x='plate_x',y='plate_z',ax=ax[i,j],color='#FF0A0A',zorder=2,alpha=0.7) except np.linalg.LinAlgError: sns.scatterplot(data=data_df[(data_df.pitch_name==pitch_list[k])],x='plate_x',y='plate_z',ax=ax[i,j],color='#FF0A0A',zorder=2,alpha=0.7) #sns.kdeplot(data=eury_df[eury_df.pitch_name==i],x='plate_x',y='plate_z',ax=ax[i,j]) draw_line(ax[i,j],alpha_spot=0.75,catcher_p=True) ax[i,j].set_aspect('equal') ax[i,j].set_title(f'\n\n\n\n\n{pitch_list[k]} ({len(data_df[data_df.pitch_name==pitch_list[k]])/len(data_df):.1%})\n{data_df[data_df.pitch_name==pitch_list[k]].release_speed.mean():.1f} mph',fontname=font_plot,fontsize=16) # ax[i,j].set_xlabel('Distance X-Axis',fontname='Century Gothic') # ax[i,j].set_ylabel('Distance Z-Axis',fontname='Century Gothic') ax[i,j].set_xlim((-2.5,2.5)) ax[i,j].set_ylim((-1,6)) ax[i,j].invert_xaxis() if i < math.ceil(len(pitch_list)/3) - 1: ax[i,j].axhline(-1, color='black', linestyle=':') k = k + 1 #ax_list.append([ax[0],ax[1],ax[2]]) # eury_df = eury_df[eury_df.prop > 0.05] # pitch_list = eury_df.pitch_name.unique() #fig = plt.figure(figsize=(1 #fig = plt.figure(figsize=(10, 10),dpi=600) else: # data_df = eury_df[(eury_df.stand.isin(stand_list))].dropna(subset=[dict_plots[stat_pick]['stat']]) # data_df = data_df[data_df.stand.isin(stand_list)] data_df = data_df.dropna(subset=[dict_plots[stat_pick]['stat']]) data_df['prop'] = data_df.groupby("pitch_name")["pitch_name"].transform("count")/len(eury_df) #data_df = data_df.sort_values(by='prop',ascending=False) #pitch_list = data_df.pitch_name.unique() if len(pitch_list) <= 3: ax_list = [[]] for a in range(len(ax)): ax_list[0].append(ax[a]) ax = np.array(ax_list) # Compute the common extent for both plots x_min = -2.5 x_max = 2.5 y_min = -1 y_max = 6 extent = [x_min, x_max, y_min, y_max] k = 0 for i in range(math.ceil(len(pitch_list)/3)): print(i) for j in range(3): print(j) ax[i,j].axis('off') if k < len(pitch_list): # cbar_min = statcast_df_df_pitch[statcast_df_df_pitch.pitch_name==pitch_list[k]]['xwobacon']*0 # cbar_max = statcast_df_df_pitch[statcast_df_df_pitch.pitch_name==pitch_list[k]]['xwobacon']*2 cbar_min = cbar_dict['stat'][0] cbar_max = cbar_dict['stat'][1] ax[i,j].hexbin(data=data_df[(data_df.pitch_name==pitch_list[k])],x='plate_x',y='plate_z', cmap=cmap_sum, C=dict_plots[stat_pick]['stat'], vmin=cbar_min, vmax=cbar_max, gridsize=(15,int(15/7*5)),extent=extent, edgecolors='black',linewidth=0.5) ax[i,j].set_aspect('equal') ax[i,j].set_xlim((-2.5,2.5)) ax[i,j].set_ylim((-1,6)) ax[i,j].axis('off') draw_line(ax[i,j],alpha_spot=0.75,catcher_p=True) ax[i,j].invert_xaxis() norm = plt.Normalize(cbar_min,cbar_max) sm = plt.cm.ScalarMappable(cmap=cmap_sum, norm=norm) cbar = ax[i,j].figure.colorbar(sm, ax=ax[i,j],orientation='vertical',aspect=15,shrink=0.5,format=decimal_format_assign(x=dict_plots[stat_pick]['decimal_format'])) cbar.ax.plot([0, 1], [data_df[data_df.pitch_name==pitch_list[k]][dict_plots[stat_pick]['stat']].mean(),data_df[data_df.pitch_name==pitch_list[k]][dict_plots[stat_pick]["stat"]].mean()], '#000000') cbar.ax.plot([0, 1], [statcast_df_df_pitch[statcast_df_df_pitch['pitch_name']==pitch_list[k]][stat_pick].values[0],statcast_df_df_pitch[statcast_df_df_pitch['pitch_name']==pitch_list[k]][stat_pick].values[0]], '#000000',linestyle='dotted',linewidth='1') ax[i,j].set_title(f'\n\n\n\n\n{pitch_list[k]} ({len(data_df[data_df.pitch_name==pitch_list[k]])/len(data_df):.1%})\n{data_df[data_df.pitch_name==pitch_list[k]][dict_plots[stat_pick]["stat"]].mean():.1%} {dict_plots[stat_pick]["title"]}\n {data_df[data_df.pitch_name==pitch_list[k]][dict_plots[stat_pick]["plus"]].mean():.0f} {dict_plots[stat_pick]["title"]}+',fontname=font_plot,fontsize=16) k = k + 1 if i < math.ceil(len(pitch_list)/3) - 1: ax[i,j].axhline(-1, color='black', linestyle=':') # fig.suptitle(f'{name_select} {dict_plots[stat_pick]["title"]} By Pitch',x=0.5,y=1.02,fontsize=24,fontname=font_plot) # #fig.text(s=f'Colour Scale Compares to League Average xwOBACON+',x=0.5,y=-0.02,fontsize=8,fontname=font_plot,ha='center') # fig.text(s=f'Percentage Beside Pitch Name Indicates Proportions of {dict_plots[stat_pick]["note"]}',x=0.5,y=-0.02,fontsize=8,fontname=font_plot,ha='center') # fig.text(s=f'By: @TJStats',x=0.05,y=-0.02,fontsize=12,fontname=font_plot,ha='left') # fig.text(s=f'Data: MLB',x=1-0.05,y=-0.02,fontsize=12,fontname=font_plot,ha='right') title_spot = f'{data_df.full_name_pitcher.values[0]} {dict_plots[stat_pick]["title"]} Heat Map' if input_date_id != '0': if sum(data_df.home_team == data_df.abbreviation_pitcher.reset_index(drop=True)[0]) > 0: line2 = f"{mlb_teams_df[mlb_teams_df.team_id == data_df.team_id_pitcher.reset_index(drop=True)[0]].reset_index(drop=True)['franchise'][0]} vs {mlb_teams_df[mlb_teams_df.team_id == data_df.team_id.reset_index(drop=True)[0]].reset_index(drop=True)['franchise'][0]}" if sum(data_df.away_team == data_df.abbreviation_pitcher.reset_index(drop=True)[0]) > 0: line2 = f"{mlb_teams_df[mlb_teams_df.team_id == data_df.team_id_pitcher.reset_index(drop=True)[0]].reset_index(drop=True)['franchise'][0]} @ {mlb_teams_df[mlb_teams_df.team_id == data_df.team_id.reset_index(drop=True)[0]].reset_index(drop=True)['franchise'][0]}" if input.radio_id() != 'a': line2 = f"{line2} , vs {input.radio_id()}" if input.home_id() != 'all': line2 = f"{line2} , {home_away_dict[input.home_id()]}" fig.text(x=0.5,y=0.91,s=line2,fontname='Century Gothic',ha='center',fontstyle='italic',fontsize=20) else: if input_date_range_id[0] == statcast_df.game_date.min() and input_date_range_id[1] == statcast_df.game_date.max(): line2 = f'2023 Season' else: line2 = f'{str(input_date_range_id[0])} to {str(input_date_range_id[1])}' if input.radio_id() != 'a': line2 = f"{line2} , vs {input.radio_id()}" if input.home_id() != 'all': line2 = f"{line2} , {home_away_dict[input.home_id()]}" fig.text(x=0.5,y=.91,s=line2,fontname='Century Gothic',ha='center',fontstyle='italic',fontsize=20) fig.suptitle(x=0.5,y=.97,t=title_spot,fontname='Century Gothic',ha='center',fontsize=36) if input_date_id != '0': try: fig.text(x=0.5,y=.89,s=data_df.game_date[0],fontname='Century Gothic',ha='center',fontstyle='italic',fontsize=16) except KeyError: fig.text(x=0.5,y=.89,s=data_df.game_date[0],fontname='Century Gothic',ha='center',fontstyle='italic',fontsize=16) #matplotlib.rcParams["figure.dpi"] = 600 fig.text(s=f'Percentage Beside Pitch Name Indicates Proportions of {dict_plots[stat_pick]["note"]}',x=0.5,y=0.01,fontsize=10,fontname=font_plot,ha='center') fig.text(s=f'By: @TJStats',x=0.05,y=0.01,fontsize=16,fontname=font_plot,ha='left') fig.text(s=f'Data: MLB',x=1-0.05,y=0.01,fontsize=16,fontname=font_plot,ha='right') fig.tight_layout() app = App(app_ui, server)