| import polars as pl
|
| import joblib
|
|
|
| model = joblib.load('stuff_model/lgbm_model_2020_2024.joblib')
|
|
|
| with open('stuff_model/target_stats.txt', 'r') as file:
|
| lines = file.readlines()
|
| target_mean = float(lines[0].strip())
|
| target_std = float(lines[1].strip())
|
|
|
|
|
| features = ['start_speed',
|
| 'spin_rate',
|
| 'extension',
|
| 'az',
|
| 'ax',
|
| 'x0',
|
| 'z0',
|
| 'speed_diff',
|
| 'az_diff',
|
| 'ax_diff']
|
|
|
|
|
| def stuff_apply(df:pl.DataFrame) -> pl.DataFrame:
|
|
|
|
|
| df_test = df.clone()
|
|
|
|
|
| df_test = df_test.with_columns(
|
| pl.Series(name="target", values=model.predict(df_test[features].to_numpy()))
|
| )
|
|
|
| df_test = df_test.with_columns(
|
| ((pl.col('target') - target_mean) / target_std).alias('target_zscore')
|
| )
|
|
|
|
|
| df_test = df_test.with_columns(
|
| (100 - (pl.col('target_zscore') * 10)).alias('tj_stuff_plus')
|
| )
|
|
|
| df_pitch_types = pl.read_csv('stuff_model/tj_stuff_plus_pitch.csv')
|
|
|
|
|
| df_pitch_all = df_test.join(df_pitch_types, left_on='pitch_type', right_on='pitch_type')
|
|
|
|
|
| df_pitch_all = df_pitch_all.with_columns(
|
| ((pl.col('tj_stuff_plus') - pl.col('mean')) / pl.col('std')).alias('pitch_grade')
|
| )
|
|
|
|
|
| df_pitch_all = df_pitch_all.with_columns(
|
| (pl.col('pitch_grade') * 10 + 50).clip(20, 80)
|
| )
|
| return df_pitch_all |