Spaces:
Running
Fix pitcher resolution: add MLB roster fallback for batter team lookup
Browse filesRoot cause: _resolve_batter_team() used historical statcast (2025 season)
to identify a batter's team. Players who changed teams in the offseason
had stale team names that no longer matched either participant in today's
game, causing `batter_team_unresolved` and leaving pitchers unresolved
even when both starters were known.
Changes:
- props_mapper.py: restructure _resolve_batter_team() so statcast early-
exits no longer skip the roster fallback; suppress stale historical-team
returns that don't match the current game (removed old line 349)
- mlb_starters.py: add fetch_mlb_current_roster_map() (MLB Stats API
/api/v1/sports/1/players?season=2026, process-lifetime cached) and
lookup_batter_current_team() which is called as Level 4 fallback
in _resolve_batter_team() when statcast resolution fails
Players now resolve via "mlb_roster_lookup" source for offseason
acquisitions, new players, and anyone not in statcast data.
Co-Authored-By: Claude Sonnet 4.6 <noreply@anthropic.com>
- analytics/props_mapper.py +36 -32
- data/mlb_starters.py +69 -0
|
@@ -13,7 +13,7 @@ import pandas as pd
|
|
| 13 |
|
| 14 |
from analytics.no_vig_props import american_to_implied_prob, compute_bet_ev, compute_edge
|
| 15 |
from analytics.model_voice import build_hr_model_voice, build_strikeout_model_voice
|
| 16 |
-
from data.mlb_starters import lookup_pitchers_for_game
|
| 17 |
from data.odds_name_map import map_odds_name_to_model_name
|
| 18 |
from models.hr_probability_engine import build_hr_probability_result
|
| 19 |
from models.pitcher_adjustment import build_pitcher_feature_row
|
|
@@ -316,37 +316,41 @@ def _resolve_batter_team(
|
|
| 316 |
away_norm = _normalize_team_name(away_team)
|
| 317 |
home_norm = _normalize_team_name(home_team)
|
| 318 |
|
| 319 |
-
|
| 320 |
-
batter_statcast_df is None
|
| 321 |
-
|
| 322 |
-
|
| 323 |
-
|
| 324 |
-
)
|
| 325 |
-
|
| 326 |
-
|
| 327 |
-
|
| 328 |
-
|
| 329 |
-
|
| 330 |
-
|
| 331 |
-
|
| 332 |
-
|
| 333 |
-
|
| 334 |
-
|
| 335 |
-
|
| 336 |
-
|
| 337 |
-
|
| 338 |
-
|
| 339 |
-
|
| 340 |
-
|
| 341 |
-
|
| 342 |
-
|
| 343 |
-
|
| 344 |
-
|
| 345 |
-
|
| 346 |
-
|
| 347 |
-
|
| 348 |
-
|
| 349 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
| 350 |
|
| 351 |
return ("", "unresolved")
|
| 352 |
|
|
|
|
| 13 |
|
| 14 |
from analytics.no_vig_props import american_to_implied_prob, compute_bet_ev, compute_edge
|
| 15 |
from analytics.model_voice import build_hr_model_voice, build_strikeout_model_voice
|
| 16 |
+
from data.mlb_starters import lookup_pitchers_for_game, lookup_batter_current_team
|
| 17 |
from data.odds_name_map import map_odds_name_to_model_name
|
| 18 |
from models.hr_probability_engine import build_hr_probability_result
|
| 19 |
from models.pitcher_adjustment import build_pitcher_feature_row
|
|
|
|
| 316 |
away_norm = _normalize_team_name(away_team)
|
| 317 |
home_norm = _normalize_team_name(home_team)
|
| 318 |
|
| 319 |
+
statcast_ok = (
|
| 320 |
+
batter_statcast_df is not None
|
| 321 |
+
and not batter_statcast_df.empty
|
| 322 |
+
and batter_name
|
| 323 |
+
and "player_name" in batter_statcast_df.columns
|
| 324 |
+
)
|
| 325 |
+
if statcast_ok:
|
| 326 |
+
normalized_target = _normalize_person_name(batter_name)
|
| 327 |
+
player_rows = batter_statcast_df[
|
| 328 |
+
batter_statcast_df["player_name"].astype(str).map(_normalize_person_name) == normalized_target
|
| 329 |
+
].copy()
|
| 330 |
+
if not player_rows.empty:
|
| 331 |
+
if "source_season" in player_rows.columns:
|
| 332 |
+
current_rows = player_rows[pd.to_numeric(player_rows["source_season"], errors="coerce") == 2026].copy()
|
| 333 |
+
current_team = _infer_batter_team(batter_name=batter_name, batter_statcast_df=current_rows)
|
| 334 |
+
if current_team:
|
| 335 |
+
if current_team == away_norm and away_team:
|
| 336 |
+
return (away_team, "current_season_statcast")
|
| 337 |
+
if current_team == home_norm and home_team:
|
| 338 |
+
return (home_team, "current_season_statcast")
|
| 339 |
+
|
| 340 |
+
historical_team = _infer_batter_team(batter_name=batter_name, batter_statcast_df=player_rows)
|
| 341 |
+
if historical_team:
|
| 342 |
+
if historical_team == away_norm and away_team:
|
| 343 |
+
return (away_team, "historical_statcast")
|
| 344 |
+
if historical_team == home_norm and home_team:
|
| 345 |
+
return (home_team, "historical_statcast")
|
| 346 |
+
# historical_team doesn't match either current game team (player changed teams);
|
| 347 |
+
# fall through to roster lookup instead of returning a stale team name
|
| 348 |
+
|
| 349 |
+
# Level 4: current-season MLB roster lookup (handles offseason moves and new players)
|
| 350 |
+
if batter_name:
|
| 351 |
+
roster_team = lookup_batter_current_team(batter_name, away_team or "", home_team or "")
|
| 352 |
+
if roster_team:
|
| 353 |
+
return (roster_team, "mlb_roster_lookup")
|
| 354 |
|
| 355 |
return ("", "unresolved")
|
| 356 |
|
|
@@ -465,3 +465,72 @@ def lookup_pitchers_for_game(
|
|
| 465 |
return v
|
| 466 |
|
| 467 |
return {"home_pitcher": None, "away_pitcher": None}
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 465 |
return v
|
| 466 |
|
| 467 |
return {"home_pitcher": None, "away_pitcher": None}
|
| 468 |
+
|
| 469 |
+
|
| 470 |
+
# ---------------------------------------------------------------------------
|
| 471 |
+
# Current-season roster lookup (batter team resolution fallback)
|
| 472 |
+
# ---------------------------------------------------------------------------
|
| 473 |
+
|
| 474 |
+
_ROSTER_MAP_CACHE: dict[str, str] | None = None
|
| 475 |
+
|
| 476 |
+
|
| 477 |
+
def fetch_mlb_current_roster_map(season: int = 2026) -> dict[str, str]:
|
| 478 |
+
"""
|
| 479 |
+
Returns {normalized_player_name: canonical_team_code} for all active MLB players.
|
| 480 |
+
Cached for the process lifetime. Uses MLB Stats API players endpoint.
|
| 481 |
+
"""
|
| 482 |
+
global _ROSTER_MAP_CACHE
|
| 483 |
+
if _ROSTER_MAP_CACHE is not None:
|
| 484 |
+
return _ROSTER_MAP_CACHE
|
| 485 |
+
|
| 486 |
+
url = "https://statsapi.mlb.com/api/v1/sports/1/players"
|
| 487 |
+
params: dict[str, Any] = {"season": season, "gameType": "R"}
|
| 488 |
+
try:
|
| 489 |
+
r = requests.get(url, params=params, timeout=15)
|
| 490 |
+
r.raise_for_status()
|
| 491 |
+
data = r.json()
|
| 492 |
+
except Exception as exc:
|
| 493 |
+
_log.warning("[mlb_roster] fetch failed: %s", exc)
|
| 494 |
+
_ROSTER_MAP_CACHE = {}
|
| 495 |
+
return _ROSTER_MAP_CACHE
|
| 496 |
+
|
| 497 |
+
people = data.get("people", []) if isinstance(data, dict) else []
|
| 498 |
+
roster: dict[str, str] = {}
|
| 499 |
+
for person in people:
|
| 500 |
+
full_name = str(person.get("fullName", "") or "")
|
| 501 |
+
team_name = str((person.get("currentTeam") or {}).get("name", "") or "")
|
| 502 |
+
if not full_name or not team_name:
|
| 503 |
+
continue
|
| 504 |
+
norm_name = _normalize_person(full_name)
|
| 505 |
+
canon_team = _canonical_team(team_name)
|
| 506 |
+
if norm_name and canon_team:
|
| 507 |
+
roster[norm_name] = canon_team
|
| 508 |
+
|
| 509 |
+
_log.warning("[mlb_roster] loaded %d players for season %d", len(roster), season)
|
| 510 |
+
_ROSTER_MAP_CACHE = roster
|
| 511 |
+
return _ROSTER_MAP_CACHE
|
| 512 |
+
|
| 513 |
+
|
| 514 |
+
def lookup_batter_current_team(
|
| 515 |
+
batter_name: str,
|
| 516 |
+
away_team: str,
|
| 517 |
+
home_team: str,
|
| 518 |
+
season: int = 2026,
|
| 519 |
+
) -> str | None:
|
| 520 |
+
"""
|
| 521 |
+
Returns the display team name (away_team or home_team) for a batter based on
|
| 522 |
+
the current MLB roster. Returns None if the player is not found or is not
|
| 523 |
+
participating in this specific game.
|
| 524 |
+
"""
|
| 525 |
+
roster = fetch_mlb_current_roster_map(season)
|
| 526 |
+
if not roster:
|
| 527 |
+
return None
|
| 528 |
+
norm_name = _normalize_person(batter_name)
|
| 529 |
+
canon_team = roster.get(norm_name)
|
| 530 |
+
if not canon_team:
|
| 531 |
+
return None
|
| 532 |
+
if canon_team == _canonical_team(away_team) and away_team:
|
| 533 |
+
return away_team
|
| 534 |
+
if canon_team == _canonical_team(home_team) and home_team:
|
| 535 |
+
return home_team
|
| 536 |
+
return None
|