2026_MLB_Model / models /hr_probability_engine.py
Syntrex's picture
Optimize props load path and reuse modeled state
2885bcc
raw
history blame
41.1 kB
from __future__ import annotations
from typing import Any
import pandas as pd
from models.batter_baseline import build_batter_feature_row, compute_batter_baseline
from models.batter_trend_model import build_batter_trend_row
from models.environment_model import compute_environment_adjustment
from models.opportunity_model import compute_opportunity_adjustment
from models.pitcher_adjustment import build_pitcher_feature_row, compute_pitcher_adjustment
from models.rolling_form_model import (
build_batter_rolling_form_row,
build_pitcher_rolling_form_row,
compute_upcoming_rolling_adjustment,
)
from models.shared_matchup_engine import compose_shared_matchup_context
from models.trajectory_model import build_trajectory_features, compute_trajectory_adjustment
def _clamp(val: float, lo: float, hi: float) -> float:
return max(lo, min(hi, val))
def _safe_float(value: Any, default: float = 0.0) -> float:
try:
if value is None:
return default
text = str(value).strip().lower()
if text in {"", "nan", "none"}:
return default
return float(value)
except Exception:
return default
def _empty_result(player_name: str, mode: str) -> dict[str, Any]:
skipped = [
"live_pitch_telemetry",
"bullpen_transition",
"count_base_out_state",
"live_opportunity_window",
"live_fatigue_degradation",
] if mode == "pregame" else []
return {
"player_name": player_name,
"pitcher_name": "",
"projected_home_pitcher": "",
"projected_away_pitcher": "",
"projected_starter_available": False,
"projected_starter_match_status": "projected_starter_unavailable",
"mode": mode,
"formula_version": "hr_v1_shared_matchup",
"baseline_hr_prob": None,
"adjusted_hr_prob": None,
"raw_hr_prob": None,
"calibrated_hr_prob": None,
"pregame_hr_prob": None,
"bet_ev": None,
"confidence_score": None,
"confidence_bucket": None,
"confidence_reasons": [],
"lineup_slot_used": None,
"lineup_slot_source": "unknown",
"team_total_used": None,
"team_total_source": "unknown",
"expected_pa": None,
"pa_multiplier": None,
"opportunity_mode": None,
"opportunity_reason": None,
"opportunity_hr_adjustment": 0.0,
"pitcher_hr_adjustment": 0.0,
"trend_hr_adjustment": 0.0,
"zone_hr_adjustment": 0.0,
"family_zone_hr_adjustment": 0.0,
"arsenal_hr_adjustment": 0.0,
"pulled_contact_hr_adjustment": 0.0,
"env_hr_adjustment": 0.0,
"park_hr_adjustment": 0.0,
"weather_hr_adjustment": 0.0,
"platoon_hr_adjustment": 0.0,
"trajectory_hr_adjustment": 0.0,
"rolling_hr_adjustment": 0.0,
"applied_layers": "",
"skipped_layers": "|".join(skipped),
"pregame_pitcher_context_adj": None,
"pregame_park_context_adj": None,
"pregame_weather_context_adj": None,
"pregame_context_applied": False,
"matchup_platoon_multiplier": 1.0,
"matchup_platoon_reason": "unknown",
"pitcher_reliability": 0.0,
"pitcher_resolution_status": "pitcher_missing",
"trend_reliability": 0.0,
"zone_reliability": 0.0,
"zone_status": "unavailable",
"zone_store_sample_size": 0,
"family_zone_reliability": 0.0,
"family_zone_status": "unavailable",
"family_zone_batter_sample_size": 0,
"family_zone_pitcher_sample_size": 0,
"arsenal_reliability": 0.0,
"arsenal_status": "unavailable",
"arsenal_batter_sample_size": 0,
"arsenal_pitcher_sample_size": 0,
"pulled_contact_reliability": 0.0,
"environment_reliability": 0.0,
"trajectory_reliability": 0.0,
"rolling_reliability": 0.0,
"opportunity_reliability": 0.0,
"damage_zone_alignment_subscore": None,
"pitch_mix_exposure_subscore": None,
"tunnel_damage_subscore": None,
"count_pattern_damage_subscore": None,
"handedness_damage_subscore": None,
"arsenal_fit_subscore": None,
"environment_amplification_subscore": None,
"hr_opportunity_projection": None,
"matchup_coverage_confidence": None,
"shared_matchup_available": False,
"component_source_map": {},
"telemetry_path_status": "baseline_only",
"hr_model_tier": "baseline_only_degraded",
"modeled_row_available": False,
"modeled_row_missing_reason": "missing_baseline",
"expected_pitch_mix_by_count": {},
"expected_zone_mix_by_count": {},
"expected_pitch_zone_mix_by_count": {},
"tunnel_pair_scores": [],
"predicted_attack_regions": [],
"predicted_damage_regions": [],
"predicted_whiff_regions": [],
"model_voice_reason_candidates": [],
"model_voice_tags": [],
"reason_candidate_count": 0,
}
def _sample_reliability(sample_size: Any, k: float, minimum: float = 0.0) -> float:
sample = max(0.0, _safe_float(sample_size, 0.0))
if sample <= 0.0:
return 0.0
reliability = sample / (sample + max(1.0, float(k)))
return _clamp(reliability, minimum, 1.0)
def _apply_reliability(raw_adjustment: float, reliability: float) -> float:
return raw_adjustment * _clamp(reliability, 0.0, 1.0)
def _append_reason_candidate(
reason_candidates: list[dict[str, Any]],
*,
category: str,
direction: str,
magnitude: float,
template_key: str,
template_inputs: dict[str, Any] | None = None,
) -> None:
mag = abs(_safe_float(magnitude, 0.0))
if mag <= 1e-6:
return
reason_candidates.append(
{
"category": category,
"direction": direction,
"magnitude": mag,
"signed_magnitude": _safe_float(magnitude, 0.0),
"template_key": template_key,
"template_inputs": dict(template_inputs or {}),
}
)
def _compute_environment_reliability(game_row: dict[str, Any], weather_row: dict[str, Any] | None) -> float:
has_venue = bool(str(game_row.get("venue") or "").strip())
weather_row = dict(weather_row or {})
has_weather = any(
weather_row.get(key) is not None and str(weather_row.get(key)).strip() not in {"", "nan", "None"}
for key in ("temperature_f", "wind_speed_mph", "wind_direction_deg")
)
if has_venue and has_weather:
return 1.0
if has_venue:
return 0.82
if has_weather:
return 0.74
return 0.55
def _calibrate_hr_probability(raw_prob: float, baseline_prob: float | None) -> float:
baseline_anchor = _clamp(_safe_float(baseline_prob, 0.045), 0.015, 0.12)
calibrated = baseline_anchor + (raw_prob - baseline_anchor) * 0.90
if raw_prob < 0.02:
calibrated += min(0.002, (0.02 - raw_prob) * 0.10)
if raw_prob > 0.12:
calibrated -= min(0.010, (raw_prob - 0.12) * 0.25)
return _clamp(calibrated, 0.005, 0.25)
def _compute_props_confidence(
*,
batter_features: dict[str, Any],
pitcher_row: dict[str, Any],
result: dict[str, Any],
applied_layers: list[str],
) -> dict[str, Any]:
score = 52.0
reasons: list[str] = []
batter_pa = int(_safe_float(batter_features.get("plate_appearances"), 0.0) or 0.0)
pitcher_sample = int(_safe_float(pitcher_row.get("sample_size"), 0.0) or 0.0)
batter_rel = _sample_reliability(batter_pa, 160.0)
pitcher_rel = _sample_reliability(pitcher_sample, 180.0)
score += batter_rel * 16.0
if batter_rel < 0.30:
reasons.append("Limited batter sample")
if str(result.get("pitcher_name") or "").strip():
score += 8.0 + pitcher_rel * 8.0
if pitcher_rel < 0.30:
reasons.append("Limited pitcher sample")
else:
score -= 12.0
reasons.append("Pitcher unresolved")
lineup_slot = result.get("lineup_slot_used")
lineup_source = str(result.get("lineup_slot_source") or "unknown")
team_total = result.get("team_total_used")
if lineup_slot is not None and lineup_source == "confirmed":
score += 8.0
elif lineup_slot is not None:
score += 5.0
reasons.append("Using projected lineup slot")
else:
score -= 4.0
reasons.append("Lineup slot unavailable")
if team_total is not None:
score += 4.0
else:
reasons.append("Team total unavailable")
env_rel = _safe_float(result.get("environment_reliability"), 0.0) or 0.0
score += env_rel * 6.0
if env_rel < 0.75:
reasons.append("Incomplete environment context")
layer_keys = [
"pitcher_reliability",
"trend_reliability",
"zone_reliability",
"family_zone_reliability",
"arsenal_reliability",
"pulled_contact_reliability",
"trajectory_reliability",
"rolling_reliability",
"opportunity_reliability",
]
layer_vals = [_safe_float(result.get(key), 0.0) or 0.0 for key in layer_keys]
if layer_vals:
score += (sum(layer_vals) / len(layer_vals)) * 12.0
if len(applied_layers) >= 5:
score += 6.0
elif len(applied_layers) >= 3:
score += 3.0
else:
reasons.append("Limited context layers active")
raw_prob = _safe_float(result.get("raw_hr_prob"))
calibrated_prob = _safe_float(result.get("calibrated_hr_prob"))
if raw_prob is not None and calibrated_prob is not None:
if 0.003 <= calibrated_prob <= 0.25:
score += 5.0
else:
reasons.append("Probability outside stable range")
if abs(calibrated_prob - raw_prob) > 0.025:
reasons.append("Large calibration delta")
score = _clamp(score, 1.0, 100.0)
if score >= 80:
bucket = "high"
elif score >= 60:
bucket = "medium"
else:
bucket = "low"
return {
"confidence_score": round(score, 1),
"confidence_bucket": bucket,
"confidence_reasons": list(dict.fromkeys(reasons)),
}
def _compute_trend_hr_adjustment(
batter_trend_row: dict[str, Any],
batter_features: dict[str, Any],
) -> float:
trend_delta_ev90 = batter_trend_row.get("trend_delta_ev90")
trend_delta_barrel = batter_trend_row.get("trend_delta_barrel")
xwoba_7d = batter_trend_row.get("xwoba_7d")
xwoba_season = batter_features.get("xwoba")
hot_flag = batter_trend_row.get("batter_hot_flag", False)
cold_flag = batter_trend_row.get("batter_cold_flag", False)
trend_adj_hr = 0.0
if trend_delta_ev90 is not None:
if float(trend_delta_ev90) >= 2.0:
trend_adj_hr += 0.006
elif float(trend_delta_ev90) <= -2.0:
trend_adj_hr -= 0.006
if trend_delta_barrel is not None:
if float(trend_delta_barrel) >= 0.02:
trend_adj_hr += 0.008
elif float(trend_delta_barrel) <= -0.02:
trend_adj_hr -= 0.008
if xwoba_7d is not None and xwoba_season is not None:
xwoba_delta = float(xwoba_7d) - float(xwoba_season)
if xwoba_delta >= 0.030:
trend_adj_hr += 0.002
elif xwoba_delta <= -0.030:
trend_adj_hr -= 0.002
if hot_flag:
trend_adj_hr += 0.003
if cold_flag:
trend_adj_hr -= 0.003
return _clamp(trend_adj_hr, -0.010, 0.010)
def _compute_platoon_adjustment(
batter_features: dict[str, Any],
pitcher_row: dict[str, Any],
) -> tuple[float, float, str]:
batter_stand = str(batter_features.get("batter_stand", "") or "").strip().upper()
p_throws = str(pitcher_row.get("p_throws", "") or "").strip().upper()
if batter_stand not in {"L", "R"} or p_throws not in {"L", "R"}:
return (0.0, 1.0, "unknown")
same_hand = (
(batter_stand == "L" and p_throws == "L")
or (batter_stand == "R" and p_throws == "R")
)
if same_hand:
return (-0.008, 0.92, "same_hand_suppressed")
return (0.007, 1.08, "opposite_hand_enhanced")
def _compute_pulled_contact_adjustment(
batter_features: dict[str, Any],
) -> float:
pulled_barrel_rate = batter_features.get("pulled_barrel_rate")
pulled_hard_air_rate = batter_features.get("pulled_hard_air_rate")
pull_air_rate = batter_features.get("pull_air_rate")
if pulled_barrel_rate is not None:
return max(-0.01, min(0.045, (float(pulled_barrel_rate) - 0.020) * 1.50))
if pulled_hard_air_rate is not None:
return max(-0.008, min(0.030, (float(pulled_hard_air_rate) - 0.040) * 0.55))
if pull_air_rate is not None:
return max(-0.006, min(0.020, (float(pull_air_rate) - 0.10) * 0.18))
return 0.0
def build_hr_probability_result(
batter_name: str,
batter_statcast_df: pd.DataFrame | None = None,
pitcher_statcast_df: pd.DataFrame | None = None,
statcast_df: pd.DataFrame | None = None,
pitcher_name: str = "",
pitcher_id: int | None = None,
game_row: dict[str, Any] | None = None,
weather_row: dict[str, Any] | None = None,
mode: str = "pregame",
runtime_cache: dict[str, Any] | None = None,
) -> dict[str, Any]:
mode = str(mode or "pregame").strip().lower()
if mode not in {"pregame", "live"}:
mode = "pregame"
result = _empty_result(batter_name, mode)
result["pitcher_name"] = str(pitcher_name or "").strip()
result["projected_home_pitcher"] = str(game_row.get("projected_home_pitcher") or "").strip() if game_row else ""
result["projected_away_pitcher"] = str(game_row.get("projected_away_pitcher") or "").strip() if game_row else ""
result["projected_starter_available"] = bool(game_row.get("projected_starter_available")) if game_row else False
result["projected_starter_match_status"] = str(game_row.get("projected_starter_match_status") or "projected_starter_unavailable") if game_row else "projected_starter_unavailable"
batter_df = batter_statcast_df if batter_statcast_df is not None else statcast_df
pitcher_df = pitcher_statcast_df if pitcher_statcast_df is not None else batter_df
if batter_df is None or batter_df.empty or not batter_name:
return result
game_row = dict(game_row or {})
batter_features = build_batter_feature_row(batter_df, batter_name)
if int(batter_features.get("plate_appearances", 0) or 0) <= 0:
return result
baseline = compute_batter_baseline(batter_features)
hr_prob = float(baseline.get("hr_prob_base", 0.0) or 0.0)
result["baseline_hr_prob"] = hr_prob
batter_pa = int(_safe_float(batter_features.get("plate_appearances"), 0.0) or 0.0)
applied_layers: list[str] = []
skipped_layers = result["skipped_layers"].split("|") if result["skipped_layers"] else []
reason_candidates: list[dict[str, Any]] = []
pitcher_row = build_pitcher_feature_row(
statcast_df=pitcher_df,
pitcher_name=result["pitcher_name"],
pitcher_id=pitcher_id,
)
context = {"game_row": game_row} if mode == "live" else {}
pitcher_adj = compute_pitcher_adjustment(
batter_row=batter_features,
pitcher_row=pitcher_row,
context=context,
)
pitcher_reliability = _sample_reliability(pitcher_row.get("sample_size"), 180.0)
result["pitcher_reliability"] = pitcher_reliability
result["pitcher_resolution_status"] = (
"resolved" if result["pitcher_name"] and _safe_float(pitcher_row.get("sample_size"), 0.0) > 0 else
"resolved_no_pitcher_statcast" if result["pitcher_name"] else
"pitcher_missing"
)
result["pitcher_hr_adjustment"] = _apply_reliability(
_safe_float(pitcher_adj.get("hr_adj")),
pitcher_reliability,
)
result["pregame_pitcher_context_adj"] = result["pitcher_hr_adjustment"]
hr_prob = _clamp(hr_prob + result["pitcher_hr_adjustment"], 0.005, 0.25)
if abs(result["pitcher_hr_adjustment"]) > 1e-6:
applied_layers.append("pitcher")
_append_reason_candidate(
reason_candidates,
category="pitcher",
direction="supportive" if result["pitcher_hr_adjustment"] > 0 else "caution",
magnitude=result["pitcher_hr_adjustment"],
template_key="pitcher_attackable" if result["pitcher_hr_adjustment"] > 0 else "pitcher_suppresses_hr",
template_inputs={"pitcher_name": result["pitcher_name"]},
)
reference_date = game_row.get("game_datetime_utc") or game_row.get("game_date")
batter_trend_row = build_batter_trend_row(
statcast_df=batter_df,
player_name=batter_name,
reference_date=reference_date,
)
result["trend_hr_adjustment"] = _compute_trend_hr_adjustment(
batter_trend_row=batter_trend_row,
batter_features=batter_features,
)
result["trend_reliability"] = _sample_reliability(batter_pa, 140.0)
result["trend_hr_adjustment"] = _apply_reliability(
result["trend_hr_adjustment"],
result["trend_reliability"],
)
hr_prob = _clamp(hr_prob + result["trend_hr_adjustment"], 0.005, 0.25)
if abs(result["trend_hr_adjustment"]) > 1e-6:
applied_layers.append("trend")
_append_reason_candidate(
reason_candidates,
category="trend",
direction="supportive" if result["trend_hr_adjustment"] > 0 else "caution",
magnitude=result["trend_hr_adjustment"],
template_key="trend_up" if result["trend_hr_adjustment"] > 0 else "trend_down",
)
matchup_multiplier = 1.0
if result["pitcher_name"]:
matchup_reliability = min(
_sample_reliability(batter_pa, 180.0),
_sample_reliability(pitcher_row.get("sample_size"), 180.0),
)
shared_matchup = {}
try:
shared_matchup = compose_shared_matchup_context(
batter_name=batter_name,
pitcher_name=result["pitcher_name"],
batter_statcast_df=batter_df,
pitcher_statcast_df=pitcher_df,
batter_features=batter_features,
pitcher_row=pitcher_row,
game_row=game_row,
runtime_cache=runtime_cache,
)
result["shared_matchup_available"] = True
except Exception:
skipped_layers.append("shared_matchup_unavailable")
result["shared_matchup_available"] = False
shared_matchup = {}
result["expected_pitch_mix_by_count"] = shared_matchup.get("expected_pitch_mix_by_count") or {}
result["expected_zone_mix_by_count"] = shared_matchup.get("expected_zone_mix_by_count") or {}
result["expected_pitch_zone_mix_by_count"] = shared_matchup.get("expected_pitch_zone_mix_by_count") or {}
result["tunnel_pair_scores"] = shared_matchup.get("tunnel_pair_scores") or []
result["predicted_attack_regions"] = shared_matchup.get("predicted_attack_regions") or []
result["predicted_damage_regions"] = shared_matchup.get("predicted_damage_regions") or []
result["predicted_whiff_regions"] = shared_matchup.get("predicted_whiff_regions") or []
result["matchup_coverage_confidence"] = shared_matchup.get("matchup_coverage_confidence")
result["component_source_map"] = shared_matchup.get("component_source_map") or {}
component_rows = shared_matchup.get("_component_rows") or {}
zone_eff = 0.0
batter_zone_row: dict[str, Any] = dict(component_rows.get("batter_zone_row") or {})
pitcher_zone_row: dict[str, Any] = dict(component_rows.get("pitcher_zone_row") or {})
try:
from models.batter_zone_model import build_batter_zone_feature_row
from models.pitcher_zone_model import build_pitcher_zone_feature_row
from models.zone_matchup_model import compute_zone_matchup_adjustment
if not batter_zone_row:
batter_zone_row = build_batter_zone_feature_row(batter_df, batter_name)
if not pitcher_zone_row:
pitcher_zone_row = build_pitcher_zone_feature_row(pitcher_df, result["pitcher_name"])
zone_matchup_adj = compute_zone_matchup_adjustment(
batter_zone_row=batter_zone_row,
pitcher_zone_row=pitcher_zone_row,
)
zone_eff = _safe_float(zone_matchup_adj.get("hr_zone_boost")) * 0.10
result["zone_store_sample_size"] = int(_safe_float(batter_zone_row.get("zone_sample_size"), 0.0) or 0.0)
except Exception:
skipped_layers.append("zone_matchup_unavailable")
family_zone_eff = 0.0
batter_family_zone_row: dict[str, Any] = dict(component_rows.get("batter_family_zone_row") or {})
pitcher_family_zone_row: dict[str, Any] = dict(component_rows.get("pitcher_family_zone_row") or {})
try:
from models.family_zone_profile_store import (
build_batter_family_zone_feature_row,
build_pitcher_family_zone_feature_row,
)
from models.matchup_model import compute_family_zone_matchup_adjustment
if not batter_family_zone_row:
batter_family_zone_row = build_batter_family_zone_feature_row(batter_df, batter_name)
if not pitcher_family_zone_row:
pitcher_family_zone_row = build_pitcher_family_zone_feature_row(pitcher_df, result["pitcher_name"])
family_zone_matchup_adj = compute_family_zone_matchup_adjustment(
batter_family_zone_row=batter_family_zone_row,
pitcher_family_zone_row=pitcher_family_zone_row,
)
family_zone_eff = _safe_float(
family_zone_matchup_adj.get("family_zone_hr_boost")
) * 0.07
result["family_zone_batter_sample_size"] = int(_safe_float(batter_family_zone_row.get("family_zone_sample_size"), 0.0) or 0.0)
result["family_zone_pitcher_sample_size"] = int(_safe_float(pitcher_family_zone_row.get("family_zone_sample_size"), 0.0) or 0.0)
except Exception:
skipped_layers.append("family_zone_db_unavailable")
platoon_adj, matchup_multiplier, matchup_reason = _compute_platoon_adjustment(
batter_features=batter_features,
pitcher_row=pitcher_row,
)
result["platoon_hr_adjustment"] = platoon_adj
result["matchup_platoon_multiplier"] = matchup_multiplier
result["matchup_platoon_reason"] = matchup_reason
result["zone_reliability"] = matchup_reliability
result["family_zone_reliability"] = matchup_reliability
result["zone_hr_adjustment"] = _apply_reliability(zone_eff * matchup_multiplier, matchup_reliability)
result["family_zone_hr_adjustment"] = _apply_reliability(
family_zone_eff * matchup_multiplier,
matchup_reliability,
)
result["zone_status"] = (
"applied" if abs(result["zone_hr_adjustment"]) > 1e-6 else
"missing_batter_zone_profile" if int(_safe_float(batter_zone_row.get("zone_sample_size"), 0.0) or 0.0) <= 0 else
"missing_pitcher_zone_profile" if int(_safe_float(pitcher_zone_row.get("zone_sample_size"), 0.0) or 0.0) <= 0 else
"available_zero_effect"
)
result["damage_zone_alignment_subscore"] = round(_safe_float(zone_matchup_adj.get("hr_zone_boost"), 0.0), 4) if "zone_matchup_adj" in locals() else 0.0
result["family_zone_status"] = (
"applied" if abs(result["family_zone_hr_adjustment"]) > 1e-6 else
"missing_batter_family_zone_profile" if int(_safe_float(batter_family_zone_row.get("family_zone_sample_size"), 0.0) or 0.0) <= 0 else
"missing_pitcher_family_zone_profile" if int(_safe_float(pitcher_family_zone_row.get("family_zone_sample_size"), 0.0) or 0.0) <= 0 else
"available_zero_effect"
)
hr_prob = _clamp(hr_prob + result["zone_hr_adjustment"], 0.005, 0.25)
hr_prob = _clamp(hr_prob + result["family_zone_hr_adjustment"], 0.005, 0.25)
if abs(result["zone_hr_adjustment"]) > 1e-6:
applied_layers.append("zone")
_append_reason_candidate(
reason_candidates,
category="zone",
direction="supportive" if result["zone_hr_adjustment"] > 0 else "caution",
magnitude=result["zone_hr_adjustment"],
template_key="zone_favorable" if result["zone_hr_adjustment"] > 0 else "zone_tough",
)
if abs(result["family_zone_hr_adjustment"]) > 1e-6:
applied_layers.append("family_zone")
_append_reason_candidate(
reason_candidates,
category="family_zone",
direction="supportive" if result["family_zone_hr_adjustment"] > 0 else "caution",
magnitude=result["family_zone_hr_adjustment"],
template_key="family_zone_favorable" if result["family_zone_hr_adjustment"] > 0 else "family_zone_tough",
)
arsenal_eff = 0.0
batter_arsenal_row: dict[str, Any] = dict(component_rows.get("batter_arsenal_row") or {})
pitcher_arsenal_row: dict[str, Any] = dict(component_rows.get("pitcher_arsenal_row") or {})
try:
from models.arsenal_matchup_model import compute_arsenal_matchup_adjustment
from models.batter_arsenal_model import build_batter_arsenal_feature_row
from models.pitcher_arsenal_model import build_pitcher_arsenal_feature_row
if not batter_arsenal_row:
batter_arsenal_row = build_batter_arsenal_feature_row(batter_df, batter_name)
if not pitcher_arsenal_row:
pitcher_arsenal_row = build_pitcher_arsenal_feature_row(pitcher_df, result["pitcher_name"])
arsenal_matchup_adj = compute_arsenal_matchup_adjustment(
batter_arsenal_row=batter_arsenal_row,
pitcher_arsenal_row=pitcher_arsenal_row,
)
arsenal_eff = (
_safe_float(arsenal_matchup_adj.get("arsenal_hr_boost")) * 0.05
) * matchup_multiplier
result["arsenal_batter_sample_size"] = int(_safe_float(batter_arsenal_row.get("arsenal_sample_size"), 0.0) or 0.0)
result["arsenal_pitcher_sample_size"] = int(_safe_float(pitcher_arsenal_row.get("arsenal_sample_size"), 0.0) or 0.0)
except Exception:
skipped_layers.append("arsenal_matchup_unavailable")
result["arsenal_reliability"] = matchup_reliability
result["arsenal_hr_adjustment"] = _apply_reliability(arsenal_eff, matchup_reliability)
result["arsenal_status"] = (
"applied" if abs(result["arsenal_hr_adjustment"]) > 1e-6 else
"missing_batter_arsenal_profile" if int(_safe_float(batter_arsenal_row.get("arsenal_sample_size"), 0.0) or 0.0) <= 0 else
"missing_pitcher_arsenal_profile" if int(_safe_float(pitcher_arsenal_row.get("arsenal_sample_size"), 0.0) or 0.0) <= 0 else
"available_zero_effect"
)
result["pitch_mix_exposure_subscore"] = round(
_safe_float(shared_matchup.get("arsenal_matchup", {}).get("arsenal_hr_boost"), 0.0),
4,
)
result["count_pattern_damage_subscore"] = round(
sum(float(item.get("score") or 0.0) for item in (result["predicted_damage_regions"] or [])[:3]),
4,
)
result["arsenal_fit_subscore"] = round(_safe_float(arsenal_matchup_adj.get("arsenal_hr_boost"), 0.0), 4) if "arsenal_matchup_adj" in locals() else 0.0
hr_prob = _clamp(hr_prob + result["arsenal_hr_adjustment"], 0.005, 0.25)
if abs(result["arsenal_hr_adjustment"]) > 1e-6:
applied_layers.append("arsenal")
_append_reason_candidate(
reason_candidates,
category="arsenal",
direction="supportive" if result["arsenal_hr_adjustment"] > 0 else "caution",
magnitude=result["arsenal_hr_adjustment"],
template_key="arsenal_favorable" if result["arsenal_hr_adjustment"] > 0 else "arsenal_tough",
)
result["platoon_hr_adjustment"] = platoon_adj
result["handedness_damage_subscore"] = round(_safe_float(platoon_adj, 0.0), 4)
hr_prob = _clamp(hr_prob + platoon_adj, 0.005, 0.25)
if abs(platoon_adj) > 1e-6:
applied_layers.append("platoon")
_append_reason_candidate(
reason_candidates,
category="platoon",
direction="supportive" if platoon_adj > 0 else "caution",
magnitude=platoon_adj,
template_key="platoon_advantage" if platoon_adj > 0 else "platoon_disadvantage",
template_inputs={"matchup_reason": matchup_reason},
)
else:
skipped_layers.extend(["pitcher_missing", "zone_matchup_unavailable", "arsenal_matchup_unavailable"])
result["zone_status"] = "missing_pitcher_identity"
result["family_zone_status"] = "missing_pitcher_identity"
result["arsenal_status"] = "missing_pitcher_identity"
result["shared_matchup_available"] = False
result["pulled_contact_reliability"] = _sample_reliability(batter_pa, 155.0)
result["pulled_contact_hr_adjustment"] = _apply_reliability(
_compute_pulled_contact_adjustment(batter_features),
result["pulled_contact_reliability"],
)
hr_prob = _clamp(hr_prob + result["pulled_contact_hr_adjustment"], 0.005, 0.30)
if abs(result["pulled_contact_hr_adjustment"]) > 1e-6:
applied_layers.append("pulled_contact")
_append_reason_candidate(
reason_candidates,
category="pulled_contact",
direction="supportive" if result["pulled_contact_hr_adjustment"] > 0 else "caution",
magnitude=result["pulled_contact_hr_adjustment"],
template_key="pulled_contact_strength" if result["pulled_contact_hr_adjustment"] > 0 else "pulled_contact_light",
)
env_adj = compute_environment_adjustment(game_row=game_row, weather_row=weather_row)
result["environment_reliability"] = _compute_environment_reliability(game_row, weather_row)
result["env_hr_adjustment"] = _apply_reliability(
_safe_float(env_adj.get("env_hr_boost")),
result["environment_reliability"],
)
result["park_hr_adjustment"] = _apply_reliability(
_safe_float(env_adj.get("park_hr_boost")),
result["environment_reliability"],
)
result["weather_hr_adjustment"] = _apply_reliability(
_safe_float(env_adj.get("weather_hr_boost")),
result["environment_reliability"],
)
result["pregame_park_context_adj"] = result["park_hr_adjustment"]
result["pregame_weather_context_adj"] = result["weather_hr_adjustment"]
hr_prob = _clamp(hr_prob + result["env_hr_adjustment"], 0.005, 0.30)
if abs(result["env_hr_adjustment"]) > 1e-6:
applied_layers.append("environment")
dominant_env_key = "weather_supportive" if abs(result["weather_hr_adjustment"]) >= abs(result["park_hr_adjustment"]) else "park_supportive"
dominant_env_tough_key = "weather_suppressive" if abs(result["weather_hr_adjustment"]) >= abs(result["park_hr_adjustment"]) else "park_suppressive"
_append_reason_candidate(
reason_candidates,
category="environment",
direction="supportive" if result["env_hr_adjustment"] > 0 else "caution",
magnitude=result["env_hr_adjustment"],
template_key=dominant_env_key if result["env_hr_adjustment"] > 0 else dominant_env_tough_key,
template_inputs={"venue": game_row.get("venue")},
)
trajectory_row = build_trajectory_features(
statcast_df=pitcher_df,
pitcher_name=result["pitcher_name"],
pitcher_id=pitcher_id,
)
traj_adj = compute_trajectory_adjustment(trajectory_row)
result["trajectory_reliability"] = _sample_reliability(pitcher_row.get("sample_size"), 200.0)
result["trajectory_hr_adjustment"] = _apply_reliability(
_safe_float(traj_adj.get("hr_adj")),
result["trajectory_reliability"],
)
result["tunnel_damage_subscore"] = round(_safe_float(trajectory_row.get("tunnel_score"), 0.0), 4)
hr_prob = _clamp(hr_prob + result["trajectory_hr_adjustment"], 0.005, 0.25)
if abs(result["trajectory_hr_adjustment"]) > 1e-6:
applied_layers.append("trajectory")
_append_reason_candidate(
reason_candidates,
category="trajectory",
direction="supportive" if result["trajectory_hr_adjustment"] > 0 else "caution",
magnitude=result["trajectory_hr_adjustment"],
template_key="trajectory_helpful" if result["trajectory_hr_adjustment"] > 0 else "trajectory_tough",
)
pitcher_rolling_row = build_pitcher_rolling_form_row(
statcast_df=pitcher_df,
pitcher_name=result["pitcher_name"],
pitcher_id=pitcher_id,
reference_date=reference_date,
)
batter_rolling_row = build_batter_rolling_form_row(
statcast_df=batter_df,
player_name=batter_name,
reference_date=reference_date,
)
rolling_adj = compute_upcoming_rolling_adjustment(
batter_roll=batter_rolling_row,
pitcher_roll=pitcher_rolling_row,
batter_features=batter_features,
pitcher_row=pitcher_row,
)
rolling_reliability = min(
_sample_reliability(batter_rolling_row.get("batter_games_in_window_5g"), 4.0),
_safe_float(rolling_adj.get("pitcher_rolling_confidence"), 0.0) or 0.0 or 0.0,
)
result["rolling_reliability"] = rolling_reliability
result["rolling_hr_adjustment"] = _apply_reliability(
_safe_float(rolling_adj.get("rolling_hr_adjustment")),
rolling_reliability,
)
hr_prob = _clamp(hr_prob + result["rolling_hr_adjustment"], 0.005, 0.30)
if abs(result["rolling_hr_adjustment"]) > 1e-6:
applied_layers.append("rolling")
_append_reason_candidate(
reason_candidates,
category="rolling",
direction="supportive" if result["rolling_hr_adjustment"] > 0 else "caution",
magnitude=result["rolling_hr_adjustment"],
template_key="rolling_up" if result["rolling_hr_adjustment"] > 0 else "rolling_down",
)
lineup_slot = game_row.get("lineup_slot")
try:
lineup_slot = int(lineup_slot) if lineup_slot is not None and str(lineup_slot).strip() not in {"", "nan", "None"} else None
except Exception:
lineup_slot = None
team_total = game_row.get("team_total")
try:
team_total = float(team_total) if team_total is not None and str(team_total).strip() not in {"", "nan", "None"} else None
except Exception:
team_total = None
result["lineup_slot_used"] = lineup_slot
result["lineup_slot_source"] = str(game_row.get("lineup_slot_source") or ("unknown" if lineup_slot is None else "projected"))
result["team_total_used"] = team_total
result["team_total_source"] = str(game_row.get("team_total_source") or ("unknown" if team_total is None else "projected"))
opportunity = compute_opportunity_adjustment(
lineup_slot=lineup_slot,
team_total=team_total,
pitcher_row=pitcher_row,
)
result["expected_pa"] = opportunity.get("expected_pa")
result["pa_multiplier"] = opportunity.get("pa_multiplier")
result["opportunity_mode"] = opportunity.get("opportunity_mode")
result["opportunity_reason"] = opportunity.get("opportunity_reason")
result["hr_opportunity_projection"] = round(_safe_float(opportunity.get("expected_pa"), 0.0), 3)
if lineup_slot is not None and team_total is not None:
result["opportunity_reliability"] = 1.0 if result["lineup_slot_source"] == "confirmed" else 0.82
elif lineup_slot is not None:
result["opportunity_reliability"] = 0.72 if result["lineup_slot_source"] == "confirmed" else 0.60
elif team_total is not None:
result["opportunity_reliability"] = 0.48
else:
result["opportunity_reliability"] = 0.0
raw_opportunity_adj = hr_prob * ((_safe_float(opportunity.get("pa_multiplier"), 1.0) or 1.0) - 1.0)
result["opportunity_hr_adjustment"] = _apply_reliability(
raw_opportunity_adj,
result["opportunity_reliability"],
)
hr_prob = _clamp(hr_prob + result["opportunity_hr_adjustment"], 0.005, 0.30)
if abs(result["opportunity_hr_adjustment"]) > 1e-6:
applied_layers.append("opportunity")
_append_reason_candidate(
reason_candidates,
category="opportunity",
direction="supportive" if result["opportunity_hr_adjustment"] > 0 else "caution",
magnitude=result["opportunity_hr_adjustment"],
template_key="opportunity_strong" if result["opportunity_hr_adjustment"] > 0 else "opportunity_light",
template_inputs={
"lineup_slot_used": lineup_slot,
"lineup_slot_source": result["lineup_slot_source"],
},
)
result["raw_hr_prob"] = hr_prob
result["adjusted_hr_prob"] = hr_prob
result["calibrated_hr_prob"] = _calibrate_hr_probability(
raw_prob=hr_prob,
baseline_prob=result.get("baseline_hr_prob"),
)
result["environment_amplification_subscore"] = round(_safe_float(result["env_hr_adjustment"], 0.0), 4)
if mode == "pregame":
result["pregame_hr_prob"] = result["calibrated_hr_prob"]
else:
result["pregame_hr_prob"] = result["calibrated_hr_prob"]
confidence = _compute_props_confidence(
batter_features=batter_features,
pitcher_row=pitcher_row,
result=result,
applied_layers=applied_layers,
)
result.update(confidence)
if "Pitcher unresolved" in result.get("confidence_reasons", []):
_append_reason_candidate(
reason_candidates,
category="confidence",
direction="caution",
magnitude=0.004,
template_key="pitcher_unresolved",
)
if "Lineup slot unavailable" in result.get("confidence_reasons", []):
_append_reason_candidate(
reason_candidates,
category="confidence",
direction="caution",
magnitude=0.003,
template_key="lineup_unknown",
)
if "Using projected lineup slot" in result.get("confidence_reasons", []):
_append_reason_candidate(
reason_candidates,
category="confidence",
direction="caution",
magnitude=0.002,
template_key="lineup_projected",
)
result["applied_layers"] = "|".join(dict.fromkeys(applied_layers))
result["skipped_layers"] = "|".join(dict.fromkeys([s for s in skipped_layers if s]))
ranked_reasons = sorted(
reason_candidates,
key=lambda item: abs(_safe_float(item.get("signed_magnitude"))),
reverse=True,
)
result["model_voice_reason_candidates"] = ranked_reasons
result["model_voice_tags"] = [str(item.get("template_key") or "") for item in ranked_reasons if str(item.get("template_key") or "").strip()]
result["reason_candidate_count"] = len(ranked_reasons)
result["pregame_context_applied"] = any(
abs(_safe_float(result.get(key))) > 1e-6
for key in [
"pitcher_hr_adjustment",
"trend_hr_adjustment",
"zone_hr_adjustment",
"family_zone_hr_adjustment",
"arsenal_hr_adjustment",
"pulled_contact_hr_adjustment",
"env_hr_adjustment",
"platoon_hr_adjustment",
"trajectory_hr_adjustment",
"rolling_hr_adjustment",
"opportunity_hr_adjustment",
]
)
result["modeled_row_available"] = result.get("calibrated_hr_prob") is not None
result["modeled_row_missing_reason"] = None if result["modeled_row_available"] else "missing_baseline"
if result["pitcher_name"] and result["shared_matchup_available"]:
telemetry_components = [
result.get("zone_status"),
result.get("family_zone_status"),
result.get("arsenal_status"),
]
if all(str(status or "").startswith(("applied", "available_zero_effect")) for status in telemetry_components):
result["telemetry_path_status"] = "full_telemetry"
result["hr_model_tier"] = "full_telemetry"
else:
result["telemetry_path_status"] = "partial_telemetry"
result["hr_model_tier"] = "partial_telemetry"
elif result["pitcher_name"]:
result["telemetry_path_status"] = "core_baseline_plus_projected_pitcher"
result["hr_model_tier"] = "core_baseline_plus_projected_pitcher"
else:
result["telemetry_path_status"] = "baseline_only"
result["hr_model_tier"] = "baseline_only_degraded"
return result