Syntrex Claude Sonnet 4.6 commited on
Commit
f2d8d50
·
1 Parent(s): bd7830d

Fix pitcher resolution: add MLB roster fallback for batter team lookup

Browse files

Root cause: _resolve_batter_team() used historical statcast (2025 season)
to identify a batter's team. Players who changed teams in the offseason
had stale team names that no longer matched either participant in today's
game, causing `batter_team_unresolved` and leaving pitchers unresolved
even when both starters were known.

Changes:
- props_mapper.py: restructure _resolve_batter_team() so statcast early-
exits no longer skip the roster fallback; suppress stale historical-team
returns that don't match the current game (removed old line 349)
- mlb_starters.py: add fetch_mlb_current_roster_map() (MLB Stats API
/api/v1/sports/1/players?season=2026, process-lifetime cached) and
lookup_batter_current_team() which is called as Level 4 fallback
in _resolve_batter_team() when statcast resolution fails

Players now resolve via "mlb_roster_lookup" source for offseason
acquisitions, new players, and anyone not in statcast data.

Co-Authored-By: Claude Sonnet 4.6 <noreply@anthropic.com>

Files changed (2) hide show
  1. analytics/props_mapper.py +36 -32
  2. data/mlb_starters.py +69 -0
analytics/props_mapper.py CHANGED
@@ -13,7 +13,7 @@ import pandas as pd
13
 
14
  from analytics.no_vig_props import american_to_implied_prob, compute_bet_ev, compute_edge
15
  from analytics.model_voice import build_hr_model_voice, build_strikeout_model_voice
16
- from data.mlb_starters import lookup_pitchers_for_game
17
  from data.odds_name_map import map_odds_name_to_model_name
18
  from models.hr_probability_engine import build_hr_probability_result
19
  from models.pitcher_adjustment import build_pitcher_feature_row
@@ -316,37 +316,41 @@ def _resolve_batter_team(
316
  away_norm = _normalize_team_name(away_team)
317
  home_norm = _normalize_team_name(home_team)
318
 
319
- if (
320
- batter_statcast_df is None
321
- or batter_statcast_df.empty
322
- or not batter_name
323
- or "player_name" not in batter_statcast_df.columns
324
- ):
325
- return ("", "unresolved")
326
-
327
- normalized_target = _normalize_person_name(batter_name)
328
- player_rows = batter_statcast_df[
329
- batter_statcast_df["player_name"].astype(str).map(_normalize_person_name) == normalized_target
330
- ].copy()
331
- if player_rows.empty:
332
- return ("", "unresolved")
333
-
334
- if "source_season" in player_rows.columns:
335
- current_rows = player_rows[pd.to_numeric(player_rows["source_season"], errors="coerce") == 2026].copy()
336
- current_team = _infer_batter_team(batter_name=batter_name, batter_statcast_df=current_rows)
337
- if current_team:
338
- if current_team == away_norm and away_team:
339
- return (away_team, "current_season_statcast")
340
- if current_team == home_norm and home_team:
341
- return (home_team, "current_season_statcast")
342
-
343
- historical_team = _infer_batter_team(batter_name=batter_name, batter_statcast_df=player_rows)
344
- if historical_team:
345
- if historical_team == away_norm and away_team:
346
- return (away_team, "historical_statcast")
347
- if historical_team == home_norm and home_team:
348
- return (home_team, "historical_statcast")
349
- return (historical_team, "historical_statcast")
 
 
 
 
350
 
351
  return ("", "unresolved")
352
 
 
13
 
14
  from analytics.no_vig_props import american_to_implied_prob, compute_bet_ev, compute_edge
15
  from analytics.model_voice import build_hr_model_voice, build_strikeout_model_voice
16
+ from data.mlb_starters import lookup_pitchers_for_game, lookup_batter_current_team
17
  from data.odds_name_map import map_odds_name_to_model_name
18
  from models.hr_probability_engine import build_hr_probability_result
19
  from models.pitcher_adjustment import build_pitcher_feature_row
 
316
  away_norm = _normalize_team_name(away_team)
317
  home_norm = _normalize_team_name(home_team)
318
 
319
+ statcast_ok = (
320
+ batter_statcast_df is not None
321
+ and not batter_statcast_df.empty
322
+ and batter_name
323
+ and "player_name" in batter_statcast_df.columns
324
+ )
325
+ if statcast_ok:
326
+ normalized_target = _normalize_person_name(batter_name)
327
+ player_rows = batter_statcast_df[
328
+ batter_statcast_df["player_name"].astype(str).map(_normalize_person_name) == normalized_target
329
+ ].copy()
330
+ if not player_rows.empty:
331
+ if "source_season" in player_rows.columns:
332
+ current_rows = player_rows[pd.to_numeric(player_rows["source_season"], errors="coerce") == 2026].copy()
333
+ current_team = _infer_batter_team(batter_name=batter_name, batter_statcast_df=current_rows)
334
+ if current_team:
335
+ if current_team == away_norm and away_team:
336
+ return (away_team, "current_season_statcast")
337
+ if current_team == home_norm and home_team:
338
+ return (home_team, "current_season_statcast")
339
+
340
+ historical_team = _infer_batter_team(batter_name=batter_name, batter_statcast_df=player_rows)
341
+ if historical_team:
342
+ if historical_team == away_norm and away_team:
343
+ return (away_team, "historical_statcast")
344
+ if historical_team == home_norm and home_team:
345
+ return (home_team, "historical_statcast")
346
+ # historical_team doesn't match either current game team (player changed teams);
347
+ # fall through to roster lookup instead of returning a stale team name
348
+
349
+ # Level 4: current-season MLB roster lookup (handles offseason moves and new players)
350
+ if batter_name:
351
+ roster_team = lookup_batter_current_team(batter_name, away_team or "", home_team or "")
352
+ if roster_team:
353
+ return (roster_team, "mlb_roster_lookup")
354
 
355
  return ("", "unresolved")
356
 
data/mlb_starters.py CHANGED
@@ -465,3 +465,72 @@ def lookup_pitchers_for_game(
465
  return v
466
 
467
  return {"home_pitcher": None, "away_pitcher": None}
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
465
  return v
466
 
467
  return {"home_pitcher": None, "away_pitcher": None}
468
+
469
+
470
+ # ---------------------------------------------------------------------------
471
+ # Current-season roster lookup (batter team resolution fallback)
472
+ # ---------------------------------------------------------------------------
473
+
474
+ _ROSTER_MAP_CACHE: dict[str, str] | None = None
475
+
476
+
477
+ def fetch_mlb_current_roster_map(season: int = 2026) -> dict[str, str]:
478
+ """
479
+ Returns {normalized_player_name: canonical_team_code} for all active MLB players.
480
+ Cached for the process lifetime. Uses MLB Stats API players endpoint.
481
+ """
482
+ global _ROSTER_MAP_CACHE
483
+ if _ROSTER_MAP_CACHE is not None:
484
+ return _ROSTER_MAP_CACHE
485
+
486
+ url = "https://statsapi.mlb.com/api/v1/sports/1/players"
487
+ params: dict[str, Any] = {"season": season, "gameType": "R"}
488
+ try:
489
+ r = requests.get(url, params=params, timeout=15)
490
+ r.raise_for_status()
491
+ data = r.json()
492
+ except Exception as exc:
493
+ _log.warning("[mlb_roster] fetch failed: %s", exc)
494
+ _ROSTER_MAP_CACHE = {}
495
+ return _ROSTER_MAP_CACHE
496
+
497
+ people = data.get("people", []) if isinstance(data, dict) else []
498
+ roster: dict[str, str] = {}
499
+ for person in people:
500
+ full_name = str(person.get("fullName", "") or "")
501
+ team_name = str((person.get("currentTeam") or {}).get("name", "") or "")
502
+ if not full_name or not team_name:
503
+ continue
504
+ norm_name = _normalize_person(full_name)
505
+ canon_team = _canonical_team(team_name)
506
+ if norm_name and canon_team:
507
+ roster[norm_name] = canon_team
508
+
509
+ _log.warning("[mlb_roster] loaded %d players for season %d", len(roster), season)
510
+ _ROSTER_MAP_CACHE = roster
511
+ return _ROSTER_MAP_CACHE
512
+
513
+
514
+ def lookup_batter_current_team(
515
+ batter_name: str,
516
+ away_team: str,
517
+ home_team: str,
518
+ season: int = 2026,
519
+ ) -> str | None:
520
+ """
521
+ Returns the display team name (away_team or home_team) for a batter based on
522
+ the current MLB roster. Returns None if the player is not found or is not
523
+ participating in this specific game.
524
+ """
525
+ roster = fetch_mlb_current_roster_map(season)
526
+ if not roster:
527
+ return None
528
+ norm_name = _normalize_person(batter_name)
529
+ canon_team = roster.get(norm_name)
530
+ if not canon_team:
531
+ return None
532
+ if canon_team == _canonical_team(away_team) and away_team:
533
+ return away_team
534
+ if canon_team == _canonical_team(home_team) and home_team:
535
+ return home_team
536
+ return None