Spaces:

roguchi
/

pitch_dash

Sleeping

App Files Files Community

rsm-roguchi commited on Oct 30, 2025

Commit

c75151e

0 Parent(s):

Initial clean commit (no binaries, no venv)

Browse files

Files changed (25) hide show

,dockerignore +6 -0
.gitattributes +35 -0
.gitignore +17 -0
Dockerfile +65 -0
README.md +19 -0
app.py +80 -0
bin/cli.py +92 -0
build.ipynb +0 -0
docker-compose.yml +12 -0
pyproject.toml +25 -0
requirements.txt +3 -0
src/__init__.py +0 -0
src/data.py +28 -0
src/featurize.py +118 -0
src/model.py +54 -0
src/pitchxy.egg-info/PKG-INFO +13 -0
src/pitchxy.egg-info/SOURCES.txt +7 -0
src/pitchxy.egg-info/dependency_links.txt +1 -0
src/pitchxy.egg-info/entry_points.txt +2 -0
src/pitchxy.egg-info/requires.txt +7 -0
src/pitchxy.egg-info/top_level.txt +1 -0
src/plots.py +61 -0
src/streamlit_app.py +40 -0
src/tags.py +71 -0
src/utils.py +9 -0

,dockerignore ADDED Viewed

	@@ -0,0 +1,6 @@

+.venv
+__pycache__/
+*.pyc
+data/
+artifacts/
+.git

.gitattributes ADDED Viewed

	@@ -0,0 +1,35 @@

+*.7z filter=lfs diff=lfs merge=lfs -text
+*.arrow filter=lfs diff=lfs merge=lfs -text
+*.bin filter=lfs diff=lfs merge=lfs -text
+*.bz2 filter=lfs diff=lfs merge=lfs -text
+*.ckpt filter=lfs diff=lfs merge=lfs -text
+*.ftz filter=lfs diff=lfs merge=lfs -text
+*.gz filter=lfs diff=lfs merge=lfs -text
+*.h5 filter=lfs diff=lfs merge=lfs -text
+*.joblib filter=lfs diff=lfs merge=lfs -text
+*.lfs.* filter=lfs diff=lfs merge=lfs -text
+*.mlmodel filter=lfs diff=lfs merge=lfs -text
+*.model filter=lfs diff=lfs merge=lfs -text
+*.msgpack filter=lfs diff=lfs merge=lfs -text
+*.npy filter=lfs diff=lfs merge=lfs -text
+*.npz filter=lfs diff=lfs merge=lfs -text
+*.onnx filter=lfs diff=lfs merge=lfs -text
+*.ot filter=lfs diff=lfs merge=lfs -text
+*.parquet filter=lfs diff=lfs merge=lfs -text
+*.pb filter=lfs diff=lfs merge=lfs -text
+*.pickle filter=lfs diff=lfs merge=lfs -text
+*.pkl filter=lfs diff=lfs merge=lfs -text
+*.pt filter=lfs diff=lfs merge=lfs -text
+*.pth filter=lfs diff=lfs merge=lfs -text
+*.rar filter=lfs diff=lfs merge=lfs -text
+*.safetensors filter=lfs diff=lfs merge=lfs -text
+saved_model/**/* filter=lfs diff=lfs merge=lfs -text
+*.tar.* filter=lfs diff=lfs merge=lfs -text
+*.tar filter=lfs diff=lfs merge=lfs -text
+*.tflite filter=lfs diff=lfs merge=lfs -text
+*.tgz filter=lfs diff=lfs merge=lfs -text
+*.wasm filter=lfs diff=lfs merge=lfs -text
+*.xz filter=lfs diff=lfs merge=lfs -text
+*.zip filter=lfs diff=lfs merge=lfs -text
+*.zst filter=lfs diff=lfs merge=lfs -text
+*tfevents* filter=lfs diff=lfs merge=lfs -text

.gitignore ADDED Viewed

	@@ -0,0 +1,17 @@

+.venv/
+__pycache__/
+*.pyc
+*.pyo
+# compiled / native artifacts
+*.so
+*.dll
+*.dylib
+# local data & outputs
+data/
+artifacts/
+# misc
+.DS_Store
+EOF

Dockerfile ADDED Viewed

	@@ -0,0 +1,65 @@

+<<<<<<< HEAD
+FROM python:3.13.5-slim
+WORKDIR /app
+RUN apt-get update && apt-get install -y \
+    build-essential \
+    curl \
+    git \
+    && rm -rf /var/lib/apt/lists/*
+COPY requirements.txt ./
+COPY src/ ./src/
+RUN pip3 install -r requirements.txt
+EXPOSE 8501
+HEALTHCHECK CMD curl --fail http://localhost:8501/_stcore/health
+ENTRYPOINT ["streamlit", "run", "src/streamlit_app.py", "--server.port=8501", "--server.address=0.0.0.0"]
+=======
+# ---- base build (dependencies) ----
+FROM python:3.11-slim AS base
+ENV PYTHONDONTWRITEBYTECODE=1 \
+    PYTHONUNBUFFERED=1
+# System deps (git for pybaseball, plus basic build tooling if needed)
+RUN apt-get update && apt-get install -y --no-install-recommends \
+    git && \
+    rm -rf /var/lib/apt/lists/*
+WORKDIR /app
+# Copy metadata first to leverage Docker layer caching
+COPY pyproject.toml README.md ./
+COPY src ./src
+# Install package in editable mode (or regular if you prefer)
+RUN pip install --no-cache-dir --upgrade pip && \
+    pip install --no-cache-dir -e .
+# Copy the Streamlit app (optional; used in default CMD)
+COPY app.py ./app.py
+# Create mount points for persistent cache/artifacts
+RUN mkdir -p /app/data/cache /app/artifacts
+# ---- runtime image ----
+FROM python:3.11-slim AS runtime
+ENV PYTHONDONTWRITEBYTECODE=1 \
+    PYTHONUNBUFFERED=1
+WORKDIR /app
+COPY --from=base /usr/local/lib/python3.11 /usr/local/lib/python3.11
+COPY --from=base /usr/local/bin /usr/local/bin
+COPY --from=base /app /app
+# Expose Streamlit port
+EXPOSE 7860
+# Default: run Streamlit app (Hugging Face Space style)
+CMD ["streamlit", "run", "app.py", "--server.port=7860", "--server.address=0.0.0.0"]
+>>>>>>> 63696d41 (Initial Commit)

README.md ADDED Viewed

	@@ -0,0 +1,19 @@

+---
+title: Pitch Dash
+emoji: 🚀
+colorFrom: red
+colorTo: red
+sdk: docker
+app_port: 8501
+tags:
+- streamlit
+pinned: false
+short_description: Pitching Data Dashboard
+---
+# Welcome to Streamlit!
+Edit `/src/streamlit_app.py` to customize this app to your heart's desire. :heart:
+If you have any questions, checkout our [documentation](https://docs.streamlit.io) and [community
+forums](https://discuss.streamlit.io).

app.py ADDED Viewed

	@@ -0,0 +1,80 @@

+import streamlit as st
+import pandas as pd
+from data import load_statcast, default_window
+from featurize import infer_ivb_sign, engineer_pitch_features
+from model import fit_kmeans, nearest_comps
+from tags import xy_cluster_tags
+from plots import movement_scatter_xy, radar_quality
+import os, sys
+sys.path.append(os.path.join(os.path.dirname(__file__), "src"))
+st.set_page_config(page_title="PitchXY (Handedness-Aware)", layout="wide")
+st.title("⚾ PitchXY — Handedness-Aware Pitch Archetypes & Scouting Cards")
+with st.sidebar:
+    st.header("Data Window")
+    dstart, dend = default_window()
+    start = st.text_input("Start YYYY-MM-DD", dstart)
+    end = st.text_input("End YYYY-MM-DD", dend)
+    k = st.slider("Clusters (k)", 5, 12, 8)
+    force = st.checkbox("Force re-download", value=False)
+df_raw = load_statcast(start, end, force=force)
+if df_raw.empty:
+    st.warning("No data for that window.")
+    st.stop()
+ivb_sign = infer_ivb_sign(df_raw)
+df_feat = engineer_pitch_features(df_raw, ivb_sign)
+df_fit, scaler, km, nn = fit_kmeans(df_feat, k=k)
+cluster_names = xy_cluster_tags(df_fit)
+df_fit["cluster_name"] = df_fit["cluster"].map(cluster_names)
+pitcher = st.selectbox("Pitcher", sorted(df_fit["player_name"].unique()))
+df_p = df_fit[df_fit["player_name"] == pitcher].sort_values("pitch_type")
+tab1, tab2, tab3 = st.tabs(["Movement", "Scouting Card", "Comps"])
+with tab1:
+    view = st.radio("View", ["Selected pitcher", "All pitchers"], horizontal=True)
+    if view == "Selected pitcher":
+        st.subheader(f"Movement — {pitcher}")
+        st.plotly_chart(
+            movement_scatter_xy(df_p, color="pitch_type"), use_container_width=True
+        )
+    else:
+        st.subheader("Movement — All pitchers (cluster context)")
+        st.plotly_chart(
+            movement_scatter_xy(df_fit, color="cluster_name"), use_container_width=True
+        )
+with tab2:
+    st.subheader(f"Scouting Card — {pitcher}")
+    st.dataframe(
+        df_p[
+            [
+                "pitch_type",
+                "p_throws",
+                "n",
+                "velo",
+                "ivb_in",
+                "hb_as_in",
+                "csw",
+                "whiff_rate",
+                "gb_rate",
+                "zone_pct",
+                "cluster_name",
+            ]
+        ]
+    )
+    for _, row in df_p.iterrows():
+        st.markdown(f"### {row['pitch_type']} — {row['cluster_name']}")
+        st.plotly_chart(radar_quality(row), use_container_width=True)
+with tab3:
+    for _, row in df_p.iterrows():
+        st.markdown(f"#### {row['pitch_type']} comps")
+        comps = nearest_comps(row, df_fit, scaler, nn, within_pitch_type=True, k=6)
+        st.dataframe(comps)

bin/cli.py ADDED Viewed

	@@ -0,0 +1,92 @@

+from __future__ import annotations
+import argparse
+from data import load_statcast, default_window
+from featurize import infer_ivb_sign, engineer_pitch_features
+from model import fit_kmeans, nearest_comps
+from tags import xy_cluster_tags
+from plots import movement_scatter_xy
+from utils import ensure_dirs, ARTIFACTS_DIR
+import plotly.io as pio
+def main():
+    parser = argparse.ArgumentParser(
+        description="PitchXY: handedness-aware pitch archetypes"
+    )
+    parser.add_argument("--start", type=str, help="YYYY-MM-DD")
+    parser.add_argument("--end", type=str, help="YYYY-MM-DD")
+    parser.add_argument("-k", type=int, default=8)
+    parser.add_argument(
+        "--pitcher", type=str, help='Filter pitcher by name (e.g. "Cole")'
+    )
+    parser.add_argument(
+        "--save-html", action="store_true", help="Save plots to artifacts/"
+    )
+    parser.add_argument(
+        "--force", action="store_true", help="Force re-download Statcast"
+    )
+    args = parser.parse_args()
+    ensure_dirs()
+    start, end = (
+        (args.start, args.end) if (args.start and args.end) else default_window()
+    )
+    print(f"Window: {start} → {end}")
+    df_raw = load_statcast(start, end, force=args.force)
+    ivb_sign = infer_ivb_sign(df_raw)
+    print(f"IVB sign inferred = {ivb_sign} (ride should be positive)")
+    df_feat = engineer_pitch_features(df_raw, ivb_sign)
+    df_fit, scaler, km, nn = fit_kmeans(df_feat, k=args.k)
+    cluster_names = xy_cluster_tags(df_fit)
+    df_fit["cluster_name"] = df_fit["cluster"].map(cluster_names)
+    # Save artifacts
+    feat_p = ARTIFACTS_DIR / "pitch_features.parquet"
+    fit_p = ARTIFACTS_DIR / "pitch_features_clusters.parquet"
+    df_feat.to_parquet(feat_p, index=False)
+    df_fit.to_parquet(fit_p, index=False)
+    print(f"Saved: {feat_p}, {fit_p}")
+    # Optional pitcher card + comps
+    if args.pitcher:
+        sub = df_fit[
+            df_fit["player_name"].str.contains(args.pitcher, case=False, na=False)
+        ]
+        if sub.empty:
+            print(f"No pitcher matched '{args.pitcher}'")
+        else:
+            name = sub["player_name"].iloc[0]
+            df_p = df_fit[df_fit["player_name"] == name].sort_values("pitch_type")
+            print(f"\n=== Scouting Card: {name} ===")
+            print(
+                df_p[
+                    [
+                        "pitch_type",
+                        "p_throws",
+                        "n",
+                        "velo",
+                        "ivb_in",
+                        "hb_as_in",
+                        "csw",
+                        "whiff_rate",
+                        "gb_rate",
+                        "zone_pct",
+                        "cluster_name",
+                    ]
+                ].to_string(index=False)
+            )
+            for _, row in df_p.iterrows():
+                comps = nearest_comps(
+                    row, df_fit, scaler, nn, within_pitch_type=True, k=6
+                )
+                print(f"\nNearest comps — {row['pitch_type']} ({row['cluster_name']}):")
+                print(comps.to_string(index=False))
+    # Movement plot
+    fig = movement_scatter_xy(df_fit, color="cluster_name")
+    if args.save_html:
+        out = ARTIFACTS_DIR / "movement_all.html"
+        pio.write_html(fig, file=str(out), auto_open=False, include_plotlyjs="cdn")
+        print(f"Saved plot: {out}")

build.ipynb ADDED Viewed

The diff for this file is too large to render. See raw diff

docker-compose.yml ADDED Viewed

	@@ -0,0 +1,12 @@

+version: "3.9"
+services:
+  pitchxy:
+    build: .
+    image: pitchxy:latest
+    ports:
+      - "7860:7860"
+    volumes:
+      - ./data:/app/data              # cache persisted on host
+      - ./artifacts:/app/artifacts    # outputs on host
+    environment:
+      - PYTHONPATH=/app/src

pyproject.toml ADDED Viewed

	@@ -0,0 +1,25 @@

+[project]
+name = "pitchxy"
+version = "0.1.0"
+description = "Handedness-aware pitch archetypes & scouting cards (XY ride/drop vs arm/glove side)"
+readme = "README.md"
+requires-python = ">=3.9"
+dependencies = [
+  "pandas",
+  "numpy",
+  "pybaseball",
+  "scikit-learn",
+  "plotly",
+  "pyarrow",
+  "streamlit"  # needed for HF Space app below
+]
+[project.scripts]
+pitchxy = "pitchxy.cli:main"   # <-- console entry point
+[build-system]
+requires = ["setuptools>=61"]
+build-backend = "setuptools.build_meta"
+[tool.setuptools.packages.find]
+where = ["src"]

requirements.txt ADDED Viewed

	@@ -0,0 +1,3 @@

+altair
+pandas
+streamlit

src/__init__.py ADDED Viewed

File without changes

src/data.py ADDED Viewed

	@@ -0,0 +1,28 @@

+from __future__ import annotations
+from datetime import date
+from pathlib import Path
+import pandas as pd
+from pybaseball import statcast
+from utils import CACHE_DIR
+def default_window() -> tuple[str, str]:
+    today = date.today()
+    start = date(today.year if today.month >= 3 else today.year - 1, 3, 1)
+    return start.isoformat(), today.isoformat()
+def _cache_path(start: str, end: str) -> Path:
+    return CACHE_DIR / f"statcast_{start}_{end}.parquet"
+def load_statcast(start_date: str, end_date: str, force: bool = False) -> pd.DataFrame:
+    CACHE_DIR.mkdir(parents=True, exist_ok=True)
+    cp = _cache_path(start_date, end_date)
+    if cp.exists() and not force:
+        return pd.read_parquet(cp)
+    df = statcast(start_dt=start_date, end_dt=end_date)
+    if "pitch_type" in df.columns:
+        df = df[df["pitch_type"].notna()]
+    df.to_parquet(cp, index=False)
+    return df

src/featurize.py ADDED Viewed

	@@ -0,0 +1,118 @@

+from __future__ import annotations
+import numpy as np
+import pandas as pd
+INCHES_PER_FOOT = 12.0
+def infer_ivb_sign(df_raw: pd.DataFrame) -> int:
+    """
+    Data-driven IVB orientation: pick +1 or -1 so 'ride' is positive.
+    Uses only df_raw['pfx_z'] (no hardcoding of pitch types).
+    """
+    if "pfx_z" not in df_raw.columns or df_raw["pfx_z"].dropna().empty:
+        return -1
+    med = df_raw["pfx_z"].median()
+    return -1 if med < 0 else +1
+def signed_arm_side(hb_in_raw: pd.Series, p_throws: pd.Series) -> pd.Series:
+    """
+    Convert Statcast pfx_x (catcher-right +) into 'arm-side positive' regardless of handedness.
+    RHP → +pfx_x is arm-side ; LHP → -pfx_x is arm-side.
+    """
+    handed = p_throws.fillna("R").str.upper().str[0]
+    sign = np.where(handed == "R", 1.0, -1.0)
+    return -hb_in_raw * sign
+def _safe_rate(num, den):
+    return np.divide(
+        num, den, out=np.full_like(num, np.nan, dtype=float), where=den > 0
+    )
+def engineer_pitch_features(df: pd.DataFrame, ivb_sign: int) -> pd.DataFrame:
+    cols = [
+        "pitch_type",
+        "player_name",
+        "game_date",
+        "events",
+        "description",
+        "p_throws",
+        "stand",
+        "release_pos_x",
+        "release_pos_z",
+        "pfx_x",
+        "pfx_z",
+        "release_speed",
+        "release_spin_rate",
+        "plate_x",
+        "plate_z",
+        "zone",
+    ]
+    have = [c for c in cols if c in df.columns]
+    df = df[have].copy()
+    # outcomes
+    df["is_called_strike"] = (df["description"] == "called_strike").astype(int)
+    df["is_swing"] = (
+        df["description"]
+        .isin(["swinging_strike", "swinging_strike_blocked", "foul", "hit_into_play"])
+        .astype(int)
+    )
+    df["is_whiff"] = (
+        df["description"]
+        .isin(["swinging_strike", "swinging_strike_blocked"])
+        .astype(int)
+    )
+    df["is_in_play"] = (df["description"] == "hit_into_play").astype(int)
+    df["is_gb"] = (
+        df["events"]
+        .isin(["groundout", "field_error", "single", "double", "triple"])
+        .astype(int)
+    )
+    # movement (handedness-aware XY)
+    df["hb_in_raw"] = df["pfx_x"] * INCHES_PER_FOOT
+    df["ivb_in"] = ivb_sign * df["pfx_z"] * INCHES_PER_FOOT  # + = ride, − = drop
+    df["hb_as_in"] = signed_arm_side(df["hb_in_raw"], df.get("p_throws"))
+    grp = df.groupby(["player_name", "pitch_type", "p_throws"], as_index=False)
+    agg = grp.agg(
+        n=("pitch_type", "size"),
+        velo=("release_speed", "mean"),
+        spin=("release_spin_rate", "mean"),
+        ivb_in=("ivb_in", "mean"),
+        hb_as_in=("hb_as_in", "mean"),
+        rel_height=("release_pos_z", "mean"),
+        rel_side=("release_pos_x", "mean"),
+        cs=("is_called_strike", "sum"),
+        swings=("is_swing", "sum"),
+        whiffs=("is_whiff", "sum"),
+        inplay=("is_in_play", "sum"),
+        gb=("is_gb", "sum"),
+    )
+    agg["csw"] = _safe_rate(agg["cs"] + agg["whiffs"], agg["n"])
+    agg["whiff_rate"] = _safe_rate(agg["whiffs"], agg["swings"])
+    agg["gb_rate"] = _safe_rate(agg["gb"], agg["inplay"])
+    agg["zone_pct"] = _safe_rate(agg["cs"] + agg["inplay"], agg["n"])
+    keep = [
+        "player_name",
+        "pitch_type",
+        "p_throws",
+        "n",
+        "velo",
+        "spin",
+        "ivb_in",
+        "hb_as_in",
+        "rel_height",
+        "rel_side",
+        "csw",
+        "whiff_rate",
+        "gb_rate",
+        "zone_pct",
+    ]
+    return agg[keep].dropna(subset=["velo", "ivb_in", "hb_as_in"])

src/model.py ADDED Viewed

	@@ -0,0 +1,54 @@

+from __future__ import annotations
+import pandas as pd
+from sklearn.preprocessing import StandardScaler
+from sklearn.cluster import KMeans
+from sklearn.neighbors import NearestNeighbors
+ARCH_FEATURES = [
+    "velo",
+    "ivb_in",
+    "hb_as_in",
+    "rel_height",
+    "rel_side",
+    "spin",
+    "csw",
+    "whiff_rate",
+    "gb_rate",
+    "zone_pct",
+]
+def fit_kmeans(df_feat: pd.DataFrame, k: int = 8, random_state: int = 42):
+    df = df_feat.dropna(subset=ARCH_FEATURES).copy()
+    X = df[ARCH_FEATURES].values
+    scaler = StandardScaler()
+    Xs = scaler.fit_transform(X)
+    km = KMeans(n_clusters=k, n_init=20, random_state=random_state)
+    labels = km.fit_predict(Xs)
+    df["cluster"] = labels
+    nn = NearestNeighbors(n_neighbors=6, metric="euclidean")
+    nn.fit(Xs)
+    return df, scaler, km, nn
+def nearest_comps(
+    row: pd.Series, df_fit: pd.DataFrame, scaler, nn, within_pitch_type=True, k=6
+):
+    xq = scaler.transform(row[ARCH_FEATURES].values.reshape(1, -1))
+    dists, idxs = nn.kneighbors(xq, n_neighbors=k)
+    comps = df_fit.iloc[idxs[0]].copy()
+    if within_pitch_type:
+        comps = comps[comps["pitch_type"] == row["pitch_type"]]
+    cols = [
+        "player_name",
+        "pitch_type",
+        "p_throws",
+        "velo",
+        "ivb_in",
+        "hb_as_in",
+        "whiff_rate",
+        "gb_rate",
+        "cluster_name",
+    ]
+    return comps[cols].head(k - 1)

src/pitchxy.egg-info/PKG-INFO ADDED Viewed

	@@ -0,0 +1,13 @@

+Metadata-Version: 2.4
+Name: pitchxy
+Version: 0.1.0
+Summary: Handedness-aware pitch archetypes & scouting cards (XY ride/drop vs arm/glove side)
+Requires-Python: >=3.9
+Description-Content-Type: text/markdown
+Requires-Dist: pandas
+Requires-Dist: numpy
+Requires-Dist: pybaseball
+Requires-Dist: scikit-learn
+Requires-Dist: plotly
+Requires-Dist: pyarrow
+Requires-Dist: streamlit

src/pitchxy.egg-info/SOURCES.txt ADDED Viewed

	@@ -0,0 +1,7 @@

+pyproject.toml
+src/pitchxy.egg-info/PKG-INFO
+src/pitchxy.egg-info/SOURCES.txt
+src/pitchxy.egg-info/dependency_links.txt
+src/pitchxy.egg-info/entry_points.txt
+src/pitchxy.egg-info/requires.txt
+src/pitchxy.egg-info/top_level.txt

src/pitchxy.egg-info/dependency_links.txt ADDED Viewed

	@@ -0,0 +1 @@


1	+

src/pitchxy.egg-info/entry_points.txt ADDED Viewed

	@@ -0,0 +1,2 @@


1	+ [console_scripts]
2	+ pitchxy = pitchxy.cli:main

src/pitchxy.egg-info/requires.txt ADDED Viewed

	@@ -0,0 +1,7 @@

+pandas
+numpy
+pybaseball
+scikit-learn
+plotly
+pyarrow
+streamlit

src/pitchxy.egg-info/top_level.txt ADDED Viewed

	@@ -0,0 +1 @@


1	+

src/plots.py ADDED Viewed

	@@ -0,0 +1,61 @@

+from __future__ import annotations
+import pandas as pd
+import plotly.express as px
+import plotly.graph_objects as go
+def movement_scatter_xy(
+    df: pd.DataFrame, color="pitch_type", facet_by_handedness=False
+):
+    dfp = df.copy()
+    if facet_by_handedness:
+        fig = px.scatter(
+            dfp,
+            x="hb_as_in",
+            y="ivb_in",
+            color=color,
+            facet_col="p_throws",
+            hover_data=[
+                "player_name",
+                "pitch_type",
+                "p_throws",
+                "velo",
+                "whiff_rate",
+                "gb_rate",
+                "csw",
+            ],
+        )
+    else:
+        fig = px.scatter(
+            dfp,
+            x="hb_as_in",
+            y="ivb_in",
+            color=color,
+            hover_data=[
+                "player_name",
+                "pitch_type",
+                "p_throws",
+                "velo",
+                "whiff_rate",
+                "gb_rate",
+                "csw",
+            ],
+        )
+    fig.update_layout(
+        xaxis_title="Horizontal: Arm-Side (+)  |  Glove-Side (−)",
+        yaxis_title="Vertical: Ride (+)  |  Drop (−)",
+        legend_title_text=color,
+    )
+    fig.add_hline(y=0, line_dash="dot")
+    fig.add_vline(x=0, line_dash="dot")
+    return fig
+def radar_quality(row: pd.Series):
+    cats = ["csw", "whiff_rate", "gb_rate", "zone_pct"]
+    vals = [row[c] for c in cats]
+    fig = go.Figure(data=go.Scatterpolar(r=vals, theta=cats, fill="toself"))
+    fig.update_layout(
+        polar=dict(radialaxis=dict(visible=True, range=[0, 1])), showlegend=False
+    )
+    return fig

src/streamlit_app.py ADDED Viewed

	@@ -0,0 +1,40 @@

+import altair as alt
+import numpy as np
+import pandas as pd
+import streamlit as st
+"""
+# Welcome to Streamlit!
+Edit `/streamlit_app.py` to customize this app to your heart's desire :heart:.
+If you have any questions, checkout our [documentation](https://docs.streamlit.io) and [community
+forums](https://discuss.streamlit.io).
+In the meantime, below is an example of what you can do with just a few lines of code:
+"""
+num_points = st.slider("Number of points in spiral", 1, 10000, 1100)
+num_turns = st.slider("Number of turns in spiral", 1, 300, 31)
+indices = np.linspace(0, 1, num_points)
+theta = 2 * np.pi * num_turns * indices
+radius = indices
+x = radius * np.cos(theta)
+y = radius * np.sin(theta)
+df = pd.DataFrame({
+    "x": x,
+    "y": y,
+    "idx": indices,
+    "rand": np.random.randn(num_points),
+})
+st.altair_chart(alt.Chart(df, height=700, width=700)
+    .mark_point(filled=True)
+    .encode(
+        x=alt.X("x", axis=None),
+        y=alt.Y("y", axis=None),
+        color=alt.Color("idx", legend=None, scale=alt.Scale()),
+        size=alt.Size("rand", legend=None, scale=alt.Scale(range=[1, 150])),
+    ))

src/tags.py ADDED Viewed

	@@ -0,0 +1,71 @@

+from __future__ import annotations
+import numpy as np
+import pandas as pd
+def _mag_label(v, q25, q75, small="Subtle", mid="Moderate", big="Heavy"):
+    if v >= q75:
+        return big
+    if v <= q25:
+        return small
+    return mid
+def _side_label(hb_as):
+    return "Arm-Side" if hb_as >= 0 else "Glove-Side"
+def _vert_label(ivb):
+    return "Ride" if ivb >= 0 else "Drop"
+def xy_cluster_tags(df_with_clusters: pd.DataFrame) -> dict[int, str]:
+    df = df_with_clusters.copy()
+    q_abs_ivb25 = np.nanquantile(np.abs(df["ivb_in"]), 0.25)
+    q_abs_ivb75 = np.nanquantile(np.abs(df["ivb_in"]), 0.75)
+    q_abs_hb25 = np.nanquantile(np.abs(df["hb_as_in"]), 0.25)
+    q_abs_hb75 = np.nanquantile(np.abs(df["hb_as_in"]), 0.75)
+    q_wh75 = np.nanquantile(df["whiff_rate"], 0.75)
+    q_gb75 = np.nanquantile(df["gb_rate"], 0.75)
+    q_zn75 = np.nanquantile(df["zone_pct"], 0.75)
+    q_wh50 = np.nanquantile(df["whiff_rate"], 0.50)
+    q_gb50 = np.nanquantile(df["gb_rate"], 0.50)
+    q_zn50 = np.nanquantile(df["zone_pct"], 0.50)
+    tags = {}
+    for c, sub in df.groupby("cluster"):
+        row = sub.mean(numeric_only=True)
+        dom_pt = (
+            sub["pitch_type"].mode().iloc[0]
+            if not sub["pitch_type"].mode().empty
+            else "Pitch"
+        )
+        side = _side_label(row["hb_as_in"])
+        vert = _vert_label(row["ivb_in"])
+        mag_side = _mag_label(abs(row["hb_as_in"]), q_abs_hb25, q_abs_hb75)
+        mag_vert = _mag_label(abs(row["ivb_in"]), q_abs_ivb25, q_abs_ivb75)
+        flavor = []
+        if row["whiff_rate"] >= q_wh75:
+            flavor.append("Whiff-First")
+        if row["gb_rate"] >= q_gb75:
+            flavor.append("Grounder-First")
+        if row["zone_pct"] >= q_zn75:
+            flavor.append("Strike-Throwing")
+        if not flavor:
+            diffs = {
+                "Whiff-First": row["whiff_rate"] - q_wh50,
+                "Grounder-First": row["gb_rate"] - q_gb50,
+                "Strike-Throwing": row["zone_pct"] - q_zn50,
+            }
+            flavor.append(max(diffs, key=diffs.get))
+        side_noun = "Run" if side == "Arm-Side" else "Sweep"
+        vert_noun = "Ride" if vert == "Ride" else "Drop"
+        shape = f"{side} • {mag_side} {side_noun}, {mag_vert} {vert_noun}"
+        tags[c] = f"{dom_pt}: {shape} • " + " / ".join(flavor)
+    return tags

src/utils.py ADDED Viewed

	@@ -0,0 +1,9 @@

+from pathlib import Path
+CACHE_DIR = Path("data/cache")
+ARTIFACTS_DIR = Path("artifacts")
+def ensure_dirs():
+    CACHE_DIR.mkdir(parents=True, exist_ok=True)
+    ARTIFACTS_DIR.mkdir(parents=True, exist_ok=True)