Spaces:
Sleeping
Sleeping
rsm-roguchi
commited on
Commit
·
c75151e
0
Parent(s):
Initial clean commit (no binaries, no venv)
Browse files- ,dockerignore +6 -0
- .gitattributes +35 -0
- .gitignore +17 -0
- Dockerfile +65 -0
- README.md +19 -0
- app.py +80 -0
- bin/cli.py +92 -0
- build.ipynb +0 -0
- docker-compose.yml +12 -0
- pyproject.toml +25 -0
- requirements.txt +3 -0
- src/__init__.py +0 -0
- src/data.py +28 -0
- src/featurize.py +118 -0
- src/model.py +54 -0
- src/pitchxy.egg-info/PKG-INFO +13 -0
- src/pitchxy.egg-info/SOURCES.txt +7 -0
- src/pitchxy.egg-info/dependency_links.txt +1 -0
- src/pitchxy.egg-info/entry_points.txt +2 -0
- src/pitchxy.egg-info/requires.txt +7 -0
- src/pitchxy.egg-info/top_level.txt +1 -0
- src/plots.py +61 -0
- src/streamlit_app.py +40 -0
- src/tags.py +71 -0
- src/utils.py +9 -0
,dockerignore
ADDED
|
@@ -0,0 +1,6 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
.venv
|
| 2 |
+
__pycache__/
|
| 3 |
+
*.pyc
|
| 4 |
+
data/
|
| 5 |
+
artifacts/
|
| 6 |
+
.git
|
.gitattributes
ADDED
|
@@ -0,0 +1,35 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
*.7z filter=lfs diff=lfs merge=lfs -text
|
| 2 |
+
*.arrow filter=lfs diff=lfs merge=lfs -text
|
| 3 |
+
*.bin filter=lfs diff=lfs merge=lfs -text
|
| 4 |
+
*.bz2 filter=lfs diff=lfs merge=lfs -text
|
| 5 |
+
*.ckpt filter=lfs diff=lfs merge=lfs -text
|
| 6 |
+
*.ftz filter=lfs diff=lfs merge=lfs -text
|
| 7 |
+
*.gz filter=lfs diff=lfs merge=lfs -text
|
| 8 |
+
*.h5 filter=lfs diff=lfs merge=lfs -text
|
| 9 |
+
*.joblib filter=lfs diff=lfs merge=lfs -text
|
| 10 |
+
*.lfs.* filter=lfs diff=lfs merge=lfs -text
|
| 11 |
+
*.mlmodel filter=lfs diff=lfs merge=lfs -text
|
| 12 |
+
*.model filter=lfs diff=lfs merge=lfs -text
|
| 13 |
+
*.msgpack filter=lfs diff=lfs merge=lfs -text
|
| 14 |
+
*.npy filter=lfs diff=lfs merge=lfs -text
|
| 15 |
+
*.npz filter=lfs diff=lfs merge=lfs -text
|
| 16 |
+
*.onnx filter=lfs diff=lfs merge=lfs -text
|
| 17 |
+
*.ot filter=lfs diff=lfs merge=lfs -text
|
| 18 |
+
*.parquet filter=lfs diff=lfs merge=lfs -text
|
| 19 |
+
*.pb filter=lfs diff=lfs merge=lfs -text
|
| 20 |
+
*.pickle filter=lfs diff=lfs merge=lfs -text
|
| 21 |
+
*.pkl filter=lfs diff=lfs merge=lfs -text
|
| 22 |
+
*.pt filter=lfs diff=lfs merge=lfs -text
|
| 23 |
+
*.pth filter=lfs diff=lfs merge=lfs -text
|
| 24 |
+
*.rar filter=lfs diff=lfs merge=lfs -text
|
| 25 |
+
*.safetensors filter=lfs diff=lfs merge=lfs -text
|
| 26 |
+
saved_model/**/* filter=lfs diff=lfs merge=lfs -text
|
| 27 |
+
*.tar.* filter=lfs diff=lfs merge=lfs -text
|
| 28 |
+
*.tar filter=lfs diff=lfs merge=lfs -text
|
| 29 |
+
*.tflite filter=lfs diff=lfs merge=lfs -text
|
| 30 |
+
*.tgz filter=lfs diff=lfs merge=lfs -text
|
| 31 |
+
*.wasm filter=lfs diff=lfs merge=lfs -text
|
| 32 |
+
*.xz filter=lfs diff=lfs merge=lfs -text
|
| 33 |
+
*.zip filter=lfs diff=lfs merge=lfs -text
|
| 34 |
+
*.zst filter=lfs diff=lfs merge=lfs -text
|
| 35 |
+
*tfevents* filter=lfs diff=lfs merge=lfs -text
|
.gitignore
ADDED
|
@@ -0,0 +1,17 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
.venv/
|
| 2 |
+
__pycache__/
|
| 3 |
+
*.pyc
|
| 4 |
+
*.pyo
|
| 5 |
+
|
| 6 |
+
# compiled / native artifacts
|
| 7 |
+
*.so
|
| 8 |
+
*.dll
|
| 9 |
+
*.dylib
|
| 10 |
+
|
| 11 |
+
# local data & outputs
|
| 12 |
+
data/
|
| 13 |
+
artifacts/
|
| 14 |
+
|
| 15 |
+
# misc
|
| 16 |
+
.DS_Store
|
| 17 |
+
EOF
|
Dockerfile
ADDED
|
@@ -0,0 +1,65 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
<<<<<<< HEAD
|
| 2 |
+
FROM python:3.13.5-slim
|
| 3 |
+
|
| 4 |
+
WORKDIR /app
|
| 5 |
+
|
| 6 |
+
RUN apt-get update && apt-get install -y \
|
| 7 |
+
build-essential \
|
| 8 |
+
curl \
|
| 9 |
+
git \
|
| 10 |
+
&& rm -rf /var/lib/apt/lists/*
|
| 11 |
+
|
| 12 |
+
COPY requirements.txt ./
|
| 13 |
+
COPY src/ ./src/
|
| 14 |
+
|
| 15 |
+
RUN pip3 install -r requirements.txt
|
| 16 |
+
|
| 17 |
+
EXPOSE 8501
|
| 18 |
+
|
| 19 |
+
HEALTHCHECK CMD curl --fail http://localhost:8501/_stcore/health
|
| 20 |
+
|
| 21 |
+
ENTRYPOINT ["streamlit", "run", "src/streamlit_app.py", "--server.port=8501", "--server.address=0.0.0.0"]
|
| 22 |
+
=======
|
| 23 |
+
# ---- base build (dependencies) ----
|
| 24 |
+
FROM python:3.11-slim AS base
|
| 25 |
+
|
| 26 |
+
ENV PYTHONDONTWRITEBYTECODE=1 \
|
| 27 |
+
PYTHONUNBUFFERED=1
|
| 28 |
+
|
| 29 |
+
# System deps (git for pybaseball, plus basic build tooling if needed)
|
| 30 |
+
RUN apt-get update && apt-get install -y --no-install-recommends \
|
| 31 |
+
git && \
|
| 32 |
+
rm -rf /var/lib/apt/lists/*
|
| 33 |
+
|
| 34 |
+
WORKDIR /app
|
| 35 |
+
|
| 36 |
+
# Copy metadata first to leverage Docker layer caching
|
| 37 |
+
COPY pyproject.toml README.md ./
|
| 38 |
+
COPY src ./src
|
| 39 |
+
|
| 40 |
+
# Install package in editable mode (or regular if you prefer)
|
| 41 |
+
RUN pip install --no-cache-dir --upgrade pip && \
|
| 42 |
+
pip install --no-cache-dir -e .
|
| 43 |
+
|
| 44 |
+
# Copy the Streamlit app (optional; used in default CMD)
|
| 45 |
+
COPY app.py ./app.py
|
| 46 |
+
|
| 47 |
+
# Create mount points for persistent cache/artifacts
|
| 48 |
+
RUN mkdir -p /app/data/cache /app/artifacts
|
| 49 |
+
|
| 50 |
+
# ---- runtime image ----
|
| 51 |
+
FROM python:3.11-slim AS runtime
|
| 52 |
+
ENV PYTHONDONTWRITEBYTECODE=1 \
|
| 53 |
+
PYTHONUNBUFFERED=1
|
| 54 |
+
|
| 55 |
+
WORKDIR /app
|
| 56 |
+
COPY --from=base /usr/local/lib/python3.11 /usr/local/lib/python3.11
|
| 57 |
+
COPY --from=base /usr/local/bin /usr/local/bin
|
| 58 |
+
COPY --from=base /app /app
|
| 59 |
+
|
| 60 |
+
# Expose Streamlit port
|
| 61 |
+
EXPOSE 7860
|
| 62 |
+
|
| 63 |
+
# Default: run Streamlit app (Hugging Face Space style)
|
| 64 |
+
CMD ["streamlit", "run", "app.py", "--server.port=7860", "--server.address=0.0.0.0"]
|
| 65 |
+
>>>>>>> 63696d41 (Initial Commit)
|
README.md
ADDED
|
@@ -0,0 +1,19 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
---
|
| 2 |
+
title: Pitch Dash
|
| 3 |
+
emoji: 🚀
|
| 4 |
+
colorFrom: red
|
| 5 |
+
colorTo: red
|
| 6 |
+
sdk: docker
|
| 7 |
+
app_port: 8501
|
| 8 |
+
tags:
|
| 9 |
+
- streamlit
|
| 10 |
+
pinned: false
|
| 11 |
+
short_description: Pitching Data Dashboard
|
| 12 |
+
---
|
| 13 |
+
|
| 14 |
+
# Welcome to Streamlit!
|
| 15 |
+
|
| 16 |
+
Edit `/src/streamlit_app.py` to customize this app to your heart's desire. :heart:
|
| 17 |
+
|
| 18 |
+
If you have any questions, checkout our [documentation](https://docs.streamlit.io) and [community
|
| 19 |
+
forums](https://discuss.streamlit.io).
|
app.py
ADDED
|
@@ -0,0 +1,80 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
import streamlit as st
|
| 2 |
+
import pandas as pd
|
| 3 |
+
from data import load_statcast, default_window
|
| 4 |
+
from featurize import infer_ivb_sign, engineer_pitch_features
|
| 5 |
+
from model import fit_kmeans, nearest_comps
|
| 6 |
+
from tags import xy_cluster_tags
|
| 7 |
+
from plots import movement_scatter_xy, radar_quality
|
| 8 |
+
import os, sys
|
| 9 |
+
|
| 10 |
+
sys.path.append(os.path.join(os.path.dirname(__file__), "src"))
|
| 11 |
+
|
| 12 |
+
st.set_page_config(page_title="PitchXY (Handedness-Aware)", layout="wide")
|
| 13 |
+
st.title("⚾ PitchXY — Handedness-Aware Pitch Archetypes & Scouting Cards")
|
| 14 |
+
|
| 15 |
+
with st.sidebar:
|
| 16 |
+
st.header("Data Window")
|
| 17 |
+
dstart, dend = default_window()
|
| 18 |
+
start = st.text_input("Start YYYY-MM-DD", dstart)
|
| 19 |
+
end = st.text_input("End YYYY-MM-DD", dend)
|
| 20 |
+
k = st.slider("Clusters (k)", 5, 12, 8)
|
| 21 |
+
force = st.checkbox("Force re-download", value=False)
|
| 22 |
+
|
| 23 |
+
df_raw = load_statcast(start, end, force=force)
|
| 24 |
+
if df_raw.empty:
|
| 25 |
+
st.warning("No data for that window.")
|
| 26 |
+
st.stop()
|
| 27 |
+
|
| 28 |
+
ivb_sign = infer_ivb_sign(df_raw)
|
| 29 |
+
df_feat = engineer_pitch_features(df_raw, ivb_sign)
|
| 30 |
+
df_fit, scaler, km, nn = fit_kmeans(df_feat, k=k)
|
| 31 |
+
cluster_names = xy_cluster_tags(df_fit)
|
| 32 |
+
df_fit["cluster_name"] = df_fit["cluster"].map(cluster_names)
|
| 33 |
+
|
| 34 |
+
pitcher = st.selectbox("Pitcher", sorted(df_fit["player_name"].unique()))
|
| 35 |
+
df_p = df_fit[df_fit["player_name"] == pitcher].sort_values("pitch_type")
|
| 36 |
+
|
| 37 |
+
tab1, tab2, tab3 = st.tabs(["Movement", "Scouting Card", "Comps"])
|
| 38 |
+
|
| 39 |
+
with tab1:
|
| 40 |
+
view = st.radio("View", ["Selected pitcher", "All pitchers"], horizontal=True)
|
| 41 |
+
if view == "Selected pitcher":
|
| 42 |
+
st.subheader(f"Movement — {pitcher}")
|
| 43 |
+
st.plotly_chart(
|
| 44 |
+
movement_scatter_xy(df_p, color="pitch_type"), use_container_width=True
|
| 45 |
+
)
|
| 46 |
+
else:
|
| 47 |
+
st.subheader("Movement — All pitchers (cluster context)")
|
| 48 |
+
st.plotly_chart(
|
| 49 |
+
movement_scatter_xy(df_fit, color="cluster_name"), use_container_width=True
|
| 50 |
+
)
|
| 51 |
+
|
| 52 |
+
|
| 53 |
+
with tab2:
|
| 54 |
+
st.subheader(f"Scouting Card — {pitcher}")
|
| 55 |
+
st.dataframe(
|
| 56 |
+
df_p[
|
| 57 |
+
[
|
| 58 |
+
"pitch_type",
|
| 59 |
+
"p_throws",
|
| 60 |
+
"n",
|
| 61 |
+
"velo",
|
| 62 |
+
"ivb_in",
|
| 63 |
+
"hb_as_in",
|
| 64 |
+
"csw",
|
| 65 |
+
"whiff_rate",
|
| 66 |
+
"gb_rate",
|
| 67 |
+
"zone_pct",
|
| 68 |
+
"cluster_name",
|
| 69 |
+
]
|
| 70 |
+
]
|
| 71 |
+
)
|
| 72 |
+
for _, row in df_p.iterrows():
|
| 73 |
+
st.markdown(f"### {row['pitch_type']} — {row['cluster_name']}")
|
| 74 |
+
st.plotly_chart(radar_quality(row), use_container_width=True)
|
| 75 |
+
|
| 76 |
+
with tab3:
|
| 77 |
+
for _, row in df_p.iterrows():
|
| 78 |
+
st.markdown(f"#### {row['pitch_type']} comps")
|
| 79 |
+
comps = nearest_comps(row, df_fit, scaler, nn, within_pitch_type=True, k=6)
|
| 80 |
+
st.dataframe(comps)
|
bin/cli.py
ADDED
|
@@ -0,0 +1,92 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
from __future__ import annotations
|
| 2 |
+
import argparse
|
| 3 |
+
from data import load_statcast, default_window
|
| 4 |
+
from featurize import infer_ivb_sign, engineer_pitch_features
|
| 5 |
+
from model import fit_kmeans, nearest_comps
|
| 6 |
+
from tags import xy_cluster_tags
|
| 7 |
+
from plots import movement_scatter_xy
|
| 8 |
+
from utils import ensure_dirs, ARTIFACTS_DIR
|
| 9 |
+
import plotly.io as pio
|
| 10 |
+
|
| 11 |
+
|
| 12 |
+
def main():
|
| 13 |
+
parser = argparse.ArgumentParser(
|
| 14 |
+
description="PitchXY: handedness-aware pitch archetypes"
|
| 15 |
+
)
|
| 16 |
+
parser.add_argument("--start", type=str, help="YYYY-MM-DD")
|
| 17 |
+
parser.add_argument("--end", type=str, help="YYYY-MM-DD")
|
| 18 |
+
parser.add_argument("-k", type=int, default=8)
|
| 19 |
+
parser.add_argument(
|
| 20 |
+
"--pitcher", type=str, help='Filter pitcher by name (e.g. "Cole")'
|
| 21 |
+
)
|
| 22 |
+
parser.add_argument(
|
| 23 |
+
"--save-html", action="store_true", help="Save plots to artifacts/"
|
| 24 |
+
)
|
| 25 |
+
parser.add_argument(
|
| 26 |
+
"--force", action="store_true", help="Force re-download Statcast"
|
| 27 |
+
)
|
| 28 |
+
args = parser.parse_args()
|
| 29 |
+
|
| 30 |
+
ensure_dirs()
|
| 31 |
+
start, end = (
|
| 32 |
+
(args.start, args.end) if (args.start and args.end) else default_window()
|
| 33 |
+
)
|
| 34 |
+
print(f"Window: {start} → {end}")
|
| 35 |
+
|
| 36 |
+
df_raw = load_statcast(start, end, force=args.force)
|
| 37 |
+
ivb_sign = infer_ivb_sign(df_raw)
|
| 38 |
+
print(f"IVB sign inferred = {ivb_sign} (ride should be positive)")
|
| 39 |
+
|
| 40 |
+
df_feat = engineer_pitch_features(df_raw, ivb_sign)
|
| 41 |
+
df_fit, scaler, km, nn = fit_kmeans(df_feat, k=args.k)
|
| 42 |
+
cluster_names = xy_cluster_tags(df_fit)
|
| 43 |
+
df_fit["cluster_name"] = df_fit["cluster"].map(cluster_names)
|
| 44 |
+
|
| 45 |
+
# Save artifacts
|
| 46 |
+
feat_p = ARTIFACTS_DIR / "pitch_features.parquet"
|
| 47 |
+
fit_p = ARTIFACTS_DIR / "pitch_features_clusters.parquet"
|
| 48 |
+
df_feat.to_parquet(feat_p, index=False)
|
| 49 |
+
df_fit.to_parquet(fit_p, index=False)
|
| 50 |
+
print(f"Saved: {feat_p}, {fit_p}")
|
| 51 |
+
|
| 52 |
+
# Optional pitcher card + comps
|
| 53 |
+
if args.pitcher:
|
| 54 |
+
sub = df_fit[
|
| 55 |
+
df_fit["player_name"].str.contains(args.pitcher, case=False, na=False)
|
| 56 |
+
]
|
| 57 |
+
if sub.empty:
|
| 58 |
+
print(f"No pitcher matched '{args.pitcher}'")
|
| 59 |
+
else:
|
| 60 |
+
name = sub["player_name"].iloc[0]
|
| 61 |
+
df_p = df_fit[df_fit["player_name"] == name].sort_values("pitch_type")
|
| 62 |
+
print(f"\n=== Scouting Card: {name} ===")
|
| 63 |
+
print(
|
| 64 |
+
df_p[
|
| 65 |
+
[
|
| 66 |
+
"pitch_type",
|
| 67 |
+
"p_throws",
|
| 68 |
+
"n",
|
| 69 |
+
"velo",
|
| 70 |
+
"ivb_in",
|
| 71 |
+
"hb_as_in",
|
| 72 |
+
"csw",
|
| 73 |
+
"whiff_rate",
|
| 74 |
+
"gb_rate",
|
| 75 |
+
"zone_pct",
|
| 76 |
+
"cluster_name",
|
| 77 |
+
]
|
| 78 |
+
].to_string(index=False)
|
| 79 |
+
)
|
| 80 |
+
for _, row in df_p.iterrows():
|
| 81 |
+
comps = nearest_comps(
|
| 82 |
+
row, df_fit, scaler, nn, within_pitch_type=True, k=6
|
| 83 |
+
)
|
| 84 |
+
print(f"\nNearest comps — {row['pitch_type']} ({row['cluster_name']}):")
|
| 85 |
+
print(comps.to_string(index=False))
|
| 86 |
+
|
| 87 |
+
# Movement plot
|
| 88 |
+
fig = movement_scatter_xy(df_fit, color="cluster_name")
|
| 89 |
+
if args.save_html:
|
| 90 |
+
out = ARTIFACTS_DIR / "movement_all.html"
|
| 91 |
+
pio.write_html(fig, file=str(out), auto_open=False, include_plotlyjs="cdn")
|
| 92 |
+
print(f"Saved plot: {out}")
|
build.ipynb
ADDED
|
The diff for this file is too large to render.
See raw diff
|
|
|
docker-compose.yml
ADDED
|
@@ -0,0 +1,12 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version: "3.9"
|
| 2 |
+
services:
|
| 3 |
+
pitchxy:
|
| 4 |
+
build: .
|
| 5 |
+
image: pitchxy:latest
|
| 6 |
+
ports:
|
| 7 |
+
- "7860:7860"
|
| 8 |
+
volumes:
|
| 9 |
+
- ./data:/app/data # cache persisted on host
|
| 10 |
+
- ./artifacts:/app/artifacts # outputs on host
|
| 11 |
+
environment:
|
| 12 |
+
- PYTHONPATH=/app/src
|
pyproject.toml
ADDED
|
@@ -0,0 +1,25 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
[project]
|
| 2 |
+
name = "pitchxy"
|
| 3 |
+
version = "0.1.0"
|
| 4 |
+
description = "Handedness-aware pitch archetypes & scouting cards (XY ride/drop vs arm/glove side)"
|
| 5 |
+
readme = "README.md"
|
| 6 |
+
requires-python = ">=3.9"
|
| 7 |
+
dependencies = [
|
| 8 |
+
"pandas",
|
| 9 |
+
"numpy",
|
| 10 |
+
"pybaseball",
|
| 11 |
+
"scikit-learn",
|
| 12 |
+
"plotly",
|
| 13 |
+
"pyarrow",
|
| 14 |
+
"streamlit" # needed for HF Space app below
|
| 15 |
+
]
|
| 16 |
+
|
| 17 |
+
[project.scripts]
|
| 18 |
+
pitchxy = "pitchxy.cli:main" # <-- console entry point
|
| 19 |
+
|
| 20 |
+
[build-system]
|
| 21 |
+
requires = ["setuptools>=61"]
|
| 22 |
+
build-backend = "setuptools.build_meta"
|
| 23 |
+
|
| 24 |
+
[tool.setuptools.packages.find]
|
| 25 |
+
where = ["src"]
|
requirements.txt
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
altair
|
| 2 |
+
pandas
|
| 3 |
+
streamlit
|
src/__init__.py
ADDED
|
File without changes
|
src/data.py
ADDED
|
@@ -0,0 +1,28 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
from __future__ import annotations
|
| 2 |
+
from datetime import date
|
| 3 |
+
from pathlib import Path
|
| 4 |
+
import pandas as pd
|
| 5 |
+
from pybaseball import statcast
|
| 6 |
+
from utils import CACHE_DIR
|
| 7 |
+
|
| 8 |
+
|
| 9 |
+
def default_window() -> tuple[str, str]:
|
| 10 |
+
today = date.today()
|
| 11 |
+
start = date(today.year if today.month >= 3 else today.year - 1, 3, 1)
|
| 12 |
+
return start.isoformat(), today.isoformat()
|
| 13 |
+
|
| 14 |
+
|
| 15 |
+
def _cache_path(start: str, end: str) -> Path:
|
| 16 |
+
return CACHE_DIR / f"statcast_{start}_{end}.parquet"
|
| 17 |
+
|
| 18 |
+
|
| 19 |
+
def load_statcast(start_date: str, end_date: str, force: bool = False) -> pd.DataFrame:
|
| 20 |
+
CACHE_DIR.mkdir(parents=True, exist_ok=True)
|
| 21 |
+
cp = _cache_path(start_date, end_date)
|
| 22 |
+
if cp.exists() and not force:
|
| 23 |
+
return pd.read_parquet(cp)
|
| 24 |
+
df = statcast(start_dt=start_date, end_dt=end_date)
|
| 25 |
+
if "pitch_type" in df.columns:
|
| 26 |
+
df = df[df["pitch_type"].notna()]
|
| 27 |
+
df.to_parquet(cp, index=False)
|
| 28 |
+
return df
|
src/featurize.py
ADDED
|
@@ -0,0 +1,118 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
from __future__ import annotations
|
| 2 |
+
import numpy as np
|
| 3 |
+
import pandas as pd
|
| 4 |
+
|
| 5 |
+
INCHES_PER_FOOT = 12.0
|
| 6 |
+
|
| 7 |
+
|
| 8 |
+
def infer_ivb_sign(df_raw: pd.DataFrame) -> int:
|
| 9 |
+
"""
|
| 10 |
+
Data-driven IVB orientation: pick +1 or -1 so 'ride' is positive.
|
| 11 |
+
Uses only df_raw['pfx_z'] (no hardcoding of pitch types).
|
| 12 |
+
"""
|
| 13 |
+
if "pfx_z" not in df_raw.columns or df_raw["pfx_z"].dropna().empty:
|
| 14 |
+
return -1
|
| 15 |
+
med = df_raw["pfx_z"].median()
|
| 16 |
+
return -1 if med < 0 else +1
|
| 17 |
+
|
| 18 |
+
|
| 19 |
+
def signed_arm_side(hb_in_raw: pd.Series, p_throws: pd.Series) -> pd.Series:
|
| 20 |
+
"""
|
| 21 |
+
Convert Statcast pfx_x (catcher-right +) into 'arm-side positive' regardless of handedness.
|
| 22 |
+
RHP → +pfx_x is arm-side ; LHP → -pfx_x is arm-side.
|
| 23 |
+
"""
|
| 24 |
+
handed = p_throws.fillna("R").str.upper().str[0]
|
| 25 |
+
sign = np.where(handed == "R", 1.0, -1.0)
|
| 26 |
+
return -hb_in_raw * sign
|
| 27 |
+
|
| 28 |
+
|
| 29 |
+
def _safe_rate(num, den):
|
| 30 |
+
return np.divide(
|
| 31 |
+
num, den, out=np.full_like(num, np.nan, dtype=float), where=den > 0
|
| 32 |
+
)
|
| 33 |
+
|
| 34 |
+
|
| 35 |
+
def engineer_pitch_features(df: pd.DataFrame, ivb_sign: int) -> pd.DataFrame:
|
| 36 |
+
cols = [
|
| 37 |
+
"pitch_type",
|
| 38 |
+
"player_name",
|
| 39 |
+
"game_date",
|
| 40 |
+
"events",
|
| 41 |
+
"description",
|
| 42 |
+
"p_throws",
|
| 43 |
+
"stand",
|
| 44 |
+
"release_pos_x",
|
| 45 |
+
"release_pos_z",
|
| 46 |
+
"pfx_x",
|
| 47 |
+
"pfx_z",
|
| 48 |
+
"release_speed",
|
| 49 |
+
"release_spin_rate",
|
| 50 |
+
"plate_x",
|
| 51 |
+
"plate_z",
|
| 52 |
+
"zone",
|
| 53 |
+
]
|
| 54 |
+
have = [c for c in cols if c in df.columns]
|
| 55 |
+
df = df[have].copy()
|
| 56 |
+
|
| 57 |
+
# outcomes
|
| 58 |
+
df["is_called_strike"] = (df["description"] == "called_strike").astype(int)
|
| 59 |
+
df["is_swing"] = (
|
| 60 |
+
df["description"]
|
| 61 |
+
.isin(["swinging_strike", "swinging_strike_blocked", "foul", "hit_into_play"])
|
| 62 |
+
.astype(int)
|
| 63 |
+
)
|
| 64 |
+
df["is_whiff"] = (
|
| 65 |
+
df["description"]
|
| 66 |
+
.isin(["swinging_strike", "swinging_strike_blocked"])
|
| 67 |
+
.astype(int)
|
| 68 |
+
)
|
| 69 |
+
df["is_in_play"] = (df["description"] == "hit_into_play").astype(int)
|
| 70 |
+
df["is_gb"] = (
|
| 71 |
+
df["events"]
|
| 72 |
+
.isin(["groundout", "field_error", "single", "double", "triple"])
|
| 73 |
+
.astype(int)
|
| 74 |
+
)
|
| 75 |
+
|
| 76 |
+
# movement (handedness-aware XY)
|
| 77 |
+
df["hb_in_raw"] = df["pfx_x"] * INCHES_PER_FOOT
|
| 78 |
+
df["ivb_in"] = ivb_sign * df["pfx_z"] * INCHES_PER_FOOT # + = ride, − = drop
|
| 79 |
+
df["hb_as_in"] = signed_arm_side(df["hb_in_raw"], df.get("p_throws"))
|
| 80 |
+
|
| 81 |
+
grp = df.groupby(["player_name", "pitch_type", "p_throws"], as_index=False)
|
| 82 |
+
agg = grp.agg(
|
| 83 |
+
n=("pitch_type", "size"),
|
| 84 |
+
velo=("release_speed", "mean"),
|
| 85 |
+
spin=("release_spin_rate", "mean"),
|
| 86 |
+
ivb_in=("ivb_in", "mean"),
|
| 87 |
+
hb_as_in=("hb_as_in", "mean"),
|
| 88 |
+
rel_height=("release_pos_z", "mean"),
|
| 89 |
+
rel_side=("release_pos_x", "mean"),
|
| 90 |
+
cs=("is_called_strike", "sum"),
|
| 91 |
+
swings=("is_swing", "sum"),
|
| 92 |
+
whiffs=("is_whiff", "sum"),
|
| 93 |
+
inplay=("is_in_play", "sum"),
|
| 94 |
+
gb=("is_gb", "sum"),
|
| 95 |
+
)
|
| 96 |
+
|
| 97 |
+
agg["csw"] = _safe_rate(agg["cs"] + agg["whiffs"], agg["n"])
|
| 98 |
+
agg["whiff_rate"] = _safe_rate(agg["whiffs"], agg["swings"])
|
| 99 |
+
agg["gb_rate"] = _safe_rate(agg["gb"], agg["inplay"])
|
| 100 |
+
agg["zone_pct"] = _safe_rate(agg["cs"] + agg["inplay"], agg["n"])
|
| 101 |
+
|
| 102 |
+
keep = [
|
| 103 |
+
"player_name",
|
| 104 |
+
"pitch_type",
|
| 105 |
+
"p_throws",
|
| 106 |
+
"n",
|
| 107 |
+
"velo",
|
| 108 |
+
"spin",
|
| 109 |
+
"ivb_in",
|
| 110 |
+
"hb_as_in",
|
| 111 |
+
"rel_height",
|
| 112 |
+
"rel_side",
|
| 113 |
+
"csw",
|
| 114 |
+
"whiff_rate",
|
| 115 |
+
"gb_rate",
|
| 116 |
+
"zone_pct",
|
| 117 |
+
]
|
| 118 |
+
return agg[keep].dropna(subset=["velo", "ivb_in", "hb_as_in"])
|
src/model.py
ADDED
|
@@ -0,0 +1,54 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
from __future__ import annotations
|
| 2 |
+
import pandas as pd
|
| 3 |
+
from sklearn.preprocessing import StandardScaler
|
| 4 |
+
from sklearn.cluster import KMeans
|
| 5 |
+
from sklearn.neighbors import NearestNeighbors
|
| 6 |
+
|
| 7 |
+
ARCH_FEATURES = [
|
| 8 |
+
"velo",
|
| 9 |
+
"ivb_in",
|
| 10 |
+
"hb_as_in",
|
| 11 |
+
"rel_height",
|
| 12 |
+
"rel_side",
|
| 13 |
+
"spin",
|
| 14 |
+
"csw",
|
| 15 |
+
"whiff_rate",
|
| 16 |
+
"gb_rate",
|
| 17 |
+
"zone_pct",
|
| 18 |
+
]
|
| 19 |
+
|
| 20 |
+
|
| 21 |
+
def fit_kmeans(df_feat: pd.DataFrame, k: int = 8, random_state: int = 42):
|
| 22 |
+
df = df_feat.dropna(subset=ARCH_FEATURES).copy()
|
| 23 |
+
X = df[ARCH_FEATURES].values
|
| 24 |
+
scaler = StandardScaler()
|
| 25 |
+
Xs = scaler.fit_transform(X)
|
| 26 |
+
km = KMeans(n_clusters=k, n_init=20, random_state=random_state)
|
| 27 |
+
labels = km.fit_predict(Xs)
|
| 28 |
+
df["cluster"] = labels
|
| 29 |
+
|
| 30 |
+
nn = NearestNeighbors(n_neighbors=6, metric="euclidean")
|
| 31 |
+
nn.fit(Xs)
|
| 32 |
+
return df, scaler, km, nn
|
| 33 |
+
|
| 34 |
+
|
| 35 |
+
def nearest_comps(
|
| 36 |
+
row: pd.Series, df_fit: pd.DataFrame, scaler, nn, within_pitch_type=True, k=6
|
| 37 |
+
):
|
| 38 |
+
xq = scaler.transform(row[ARCH_FEATURES].values.reshape(1, -1))
|
| 39 |
+
dists, idxs = nn.kneighbors(xq, n_neighbors=k)
|
| 40 |
+
comps = df_fit.iloc[idxs[0]].copy()
|
| 41 |
+
if within_pitch_type:
|
| 42 |
+
comps = comps[comps["pitch_type"] == row["pitch_type"]]
|
| 43 |
+
cols = [
|
| 44 |
+
"player_name",
|
| 45 |
+
"pitch_type",
|
| 46 |
+
"p_throws",
|
| 47 |
+
"velo",
|
| 48 |
+
"ivb_in",
|
| 49 |
+
"hb_as_in",
|
| 50 |
+
"whiff_rate",
|
| 51 |
+
"gb_rate",
|
| 52 |
+
"cluster_name",
|
| 53 |
+
]
|
| 54 |
+
return comps[cols].head(k - 1)
|
src/pitchxy.egg-info/PKG-INFO
ADDED
|
@@ -0,0 +1,13 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
Metadata-Version: 2.4
|
| 2 |
+
Name: pitchxy
|
| 3 |
+
Version: 0.1.0
|
| 4 |
+
Summary: Handedness-aware pitch archetypes & scouting cards (XY ride/drop vs arm/glove side)
|
| 5 |
+
Requires-Python: >=3.9
|
| 6 |
+
Description-Content-Type: text/markdown
|
| 7 |
+
Requires-Dist: pandas
|
| 8 |
+
Requires-Dist: numpy
|
| 9 |
+
Requires-Dist: pybaseball
|
| 10 |
+
Requires-Dist: scikit-learn
|
| 11 |
+
Requires-Dist: plotly
|
| 12 |
+
Requires-Dist: pyarrow
|
| 13 |
+
Requires-Dist: streamlit
|
src/pitchxy.egg-info/SOURCES.txt
ADDED
|
@@ -0,0 +1,7 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
pyproject.toml
|
| 2 |
+
src/pitchxy.egg-info/PKG-INFO
|
| 3 |
+
src/pitchxy.egg-info/SOURCES.txt
|
| 4 |
+
src/pitchxy.egg-info/dependency_links.txt
|
| 5 |
+
src/pitchxy.egg-info/entry_points.txt
|
| 6 |
+
src/pitchxy.egg-info/requires.txt
|
| 7 |
+
src/pitchxy.egg-info/top_level.txt
|
src/pitchxy.egg-info/dependency_links.txt
ADDED
|
@@ -0,0 +1 @@
|
|
|
|
|
|
|
| 1 |
+
|
src/pitchxy.egg-info/entry_points.txt
ADDED
|
@@ -0,0 +1,2 @@
|
|
|
|
|
|
|
|
|
|
| 1 |
+
[console_scripts]
|
| 2 |
+
pitchxy = pitchxy.cli:main
|
src/pitchxy.egg-info/requires.txt
ADDED
|
@@ -0,0 +1,7 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
pandas
|
| 2 |
+
numpy
|
| 3 |
+
pybaseball
|
| 4 |
+
scikit-learn
|
| 5 |
+
plotly
|
| 6 |
+
pyarrow
|
| 7 |
+
streamlit
|
src/pitchxy.egg-info/top_level.txt
ADDED
|
@@ -0,0 +1 @@
|
|
|
|
|
|
|
| 1 |
+
|
src/plots.py
ADDED
|
@@ -0,0 +1,61 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
from __future__ import annotations
|
| 2 |
+
import pandas as pd
|
| 3 |
+
import plotly.express as px
|
| 4 |
+
import plotly.graph_objects as go
|
| 5 |
+
|
| 6 |
+
|
| 7 |
+
def movement_scatter_xy(
|
| 8 |
+
df: pd.DataFrame, color="pitch_type", facet_by_handedness=False
|
| 9 |
+
):
|
| 10 |
+
dfp = df.copy()
|
| 11 |
+
if facet_by_handedness:
|
| 12 |
+
fig = px.scatter(
|
| 13 |
+
dfp,
|
| 14 |
+
x="hb_as_in",
|
| 15 |
+
y="ivb_in",
|
| 16 |
+
color=color,
|
| 17 |
+
facet_col="p_throws",
|
| 18 |
+
hover_data=[
|
| 19 |
+
"player_name",
|
| 20 |
+
"pitch_type",
|
| 21 |
+
"p_throws",
|
| 22 |
+
"velo",
|
| 23 |
+
"whiff_rate",
|
| 24 |
+
"gb_rate",
|
| 25 |
+
"csw",
|
| 26 |
+
],
|
| 27 |
+
)
|
| 28 |
+
else:
|
| 29 |
+
fig = px.scatter(
|
| 30 |
+
dfp,
|
| 31 |
+
x="hb_as_in",
|
| 32 |
+
y="ivb_in",
|
| 33 |
+
color=color,
|
| 34 |
+
hover_data=[
|
| 35 |
+
"player_name",
|
| 36 |
+
"pitch_type",
|
| 37 |
+
"p_throws",
|
| 38 |
+
"velo",
|
| 39 |
+
"whiff_rate",
|
| 40 |
+
"gb_rate",
|
| 41 |
+
"csw",
|
| 42 |
+
],
|
| 43 |
+
)
|
| 44 |
+
fig.update_layout(
|
| 45 |
+
xaxis_title="Horizontal: Arm-Side (+) | Glove-Side (−)",
|
| 46 |
+
yaxis_title="Vertical: Ride (+) | Drop (−)",
|
| 47 |
+
legend_title_text=color,
|
| 48 |
+
)
|
| 49 |
+
fig.add_hline(y=0, line_dash="dot")
|
| 50 |
+
fig.add_vline(x=0, line_dash="dot")
|
| 51 |
+
return fig
|
| 52 |
+
|
| 53 |
+
|
| 54 |
+
def radar_quality(row: pd.Series):
|
| 55 |
+
cats = ["csw", "whiff_rate", "gb_rate", "zone_pct"]
|
| 56 |
+
vals = [row[c] for c in cats]
|
| 57 |
+
fig = go.Figure(data=go.Scatterpolar(r=vals, theta=cats, fill="toself"))
|
| 58 |
+
fig.update_layout(
|
| 59 |
+
polar=dict(radialaxis=dict(visible=True, range=[0, 1])), showlegend=False
|
| 60 |
+
)
|
| 61 |
+
return fig
|
src/streamlit_app.py
ADDED
|
@@ -0,0 +1,40 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
import altair as alt
|
| 2 |
+
import numpy as np
|
| 3 |
+
import pandas as pd
|
| 4 |
+
import streamlit as st
|
| 5 |
+
|
| 6 |
+
"""
|
| 7 |
+
# Welcome to Streamlit!
|
| 8 |
+
|
| 9 |
+
Edit `/streamlit_app.py` to customize this app to your heart's desire :heart:.
|
| 10 |
+
If you have any questions, checkout our [documentation](https://docs.streamlit.io) and [community
|
| 11 |
+
forums](https://discuss.streamlit.io).
|
| 12 |
+
|
| 13 |
+
In the meantime, below is an example of what you can do with just a few lines of code:
|
| 14 |
+
"""
|
| 15 |
+
|
| 16 |
+
num_points = st.slider("Number of points in spiral", 1, 10000, 1100)
|
| 17 |
+
num_turns = st.slider("Number of turns in spiral", 1, 300, 31)
|
| 18 |
+
|
| 19 |
+
indices = np.linspace(0, 1, num_points)
|
| 20 |
+
theta = 2 * np.pi * num_turns * indices
|
| 21 |
+
radius = indices
|
| 22 |
+
|
| 23 |
+
x = radius * np.cos(theta)
|
| 24 |
+
y = radius * np.sin(theta)
|
| 25 |
+
|
| 26 |
+
df = pd.DataFrame({
|
| 27 |
+
"x": x,
|
| 28 |
+
"y": y,
|
| 29 |
+
"idx": indices,
|
| 30 |
+
"rand": np.random.randn(num_points),
|
| 31 |
+
})
|
| 32 |
+
|
| 33 |
+
st.altair_chart(alt.Chart(df, height=700, width=700)
|
| 34 |
+
.mark_point(filled=True)
|
| 35 |
+
.encode(
|
| 36 |
+
x=alt.X("x", axis=None),
|
| 37 |
+
y=alt.Y("y", axis=None),
|
| 38 |
+
color=alt.Color("idx", legend=None, scale=alt.Scale()),
|
| 39 |
+
size=alt.Size("rand", legend=None, scale=alt.Scale(range=[1, 150])),
|
| 40 |
+
))
|
src/tags.py
ADDED
|
@@ -0,0 +1,71 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
from __future__ import annotations
|
| 2 |
+
import numpy as np
|
| 3 |
+
import pandas as pd
|
| 4 |
+
|
| 5 |
+
|
| 6 |
+
def _mag_label(v, q25, q75, small="Subtle", mid="Moderate", big="Heavy"):
|
| 7 |
+
if v >= q75:
|
| 8 |
+
return big
|
| 9 |
+
if v <= q25:
|
| 10 |
+
return small
|
| 11 |
+
return mid
|
| 12 |
+
|
| 13 |
+
|
| 14 |
+
def _side_label(hb_as):
|
| 15 |
+
return "Arm-Side" if hb_as >= 0 else "Glove-Side"
|
| 16 |
+
|
| 17 |
+
|
| 18 |
+
def _vert_label(ivb):
|
| 19 |
+
return "Ride" if ivb >= 0 else "Drop"
|
| 20 |
+
|
| 21 |
+
|
| 22 |
+
def xy_cluster_tags(df_with_clusters: pd.DataFrame) -> dict[int, str]:
|
| 23 |
+
df = df_with_clusters.copy()
|
| 24 |
+
|
| 25 |
+
q_abs_ivb25 = np.nanquantile(np.abs(df["ivb_in"]), 0.25)
|
| 26 |
+
q_abs_ivb75 = np.nanquantile(np.abs(df["ivb_in"]), 0.75)
|
| 27 |
+
q_abs_hb25 = np.nanquantile(np.abs(df["hb_as_in"]), 0.25)
|
| 28 |
+
q_abs_hb75 = np.nanquantile(np.abs(df["hb_as_in"]), 0.75)
|
| 29 |
+
|
| 30 |
+
q_wh75 = np.nanquantile(df["whiff_rate"], 0.75)
|
| 31 |
+
q_gb75 = np.nanquantile(df["gb_rate"], 0.75)
|
| 32 |
+
q_zn75 = np.nanquantile(df["zone_pct"], 0.75)
|
| 33 |
+
q_wh50 = np.nanquantile(df["whiff_rate"], 0.50)
|
| 34 |
+
q_gb50 = np.nanquantile(df["gb_rate"], 0.50)
|
| 35 |
+
q_zn50 = np.nanquantile(df["zone_pct"], 0.50)
|
| 36 |
+
|
| 37 |
+
tags = {}
|
| 38 |
+
for c, sub in df.groupby("cluster"):
|
| 39 |
+
row = sub.mean(numeric_only=True)
|
| 40 |
+
dom_pt = (
|
| 41 |
+
sub["pitch_type"].mode().iloc[0]
|
| 42 |
+
if not sub["pitch_type"].mode().empty
|
| 43 |
+
else "Pitch"
|
| 44 |
+
)
|
| 45 |
+
|
| 46 |
+
side = _side_label(row["hb_as_in"])
|
| 47 |
+
vert = _vert_label(row["ivb_in"])
|
| 48 |
+
mag_side = _mag_label(abs(row["hb_as_in"]), q_abs_hb25, q_abs_hb75)
|
| 49 |
+
mag_vert = _mag_label(abs(row["ivb_in"]), q_abs_ivb25, q_abs_ivb75)
|
| 50 |
+
|
| 51 |
+
flavor = []
|
| 52 |
+
if row["whiff_rate"] >= q_wh75:
|
| 53 |
+
flavor.append("Whiff-First")
|
| 54 |
+
if row["gb_rate"] >= q_gb75:
|
| 55 |
+
flavor.append("Grounder-First")
|
| 56 |
+
if row["zone_pct"] >= q_zn75:
|
| 57 |
+
flavor.append("Strike-Throwing")
|
| 58 |
+
if not flavor:
|
| 59 |
+
diffs = {
|
| 60 |
+
"Whiff-First": row["whiff_rate"] - q_wh50,
|
| 61 |
+
"Grounder-First": row["gb_rate"] - q_gb50,
|
| 62 |
+
"Strike-Throwing": row["zone_pct"] - q_zn50,
|
| 63 |
+
}
|
| 64 |
+
flavor.append(max(diffs, key=diffs.get))
|
| 65 |
+
|
| 66 |
+
side_noun = "Run" if side == "Arm-Side" else "Sweep"
|
| 67 |
+
vert_noun = "Ride" if vert == "Ride" else "Drop"
|
| 68 |
+
shape = f"{side} • {mag_side} {side_noun}, {mag_vert} {vert_noun}"
|
| 69 |
+
tags[c] = f"{dom_pt}: {shape} • " + " / ".join(flavor)
|
| 70 |
+
|
| 71 |
+
return tags
|
src/utils.py
ADDED
|
@@ -0,0 +1,9 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
from pathlib import Path
|
| 2 |
+
|
| 3 |
+
CACHE_DIR = Path("data/cache")
|
| 4 |
+
ARTIFACTS_DIR = Path("artifacts")
|
| 5 |
+
|
| 6 |
+
|
| 7 |
+
def ensure_dirs():
|
| 8 |
+
CACHE_DIR.mkdir(parents=True, exist_ok=True)
|
| 9 |
+
ARTIFACTS_DIR.mkdir(parents=True, exist_ok=True)
|