Syntrex commited on
Commit
c4ae8c3
·
1 Parent(s): 7b3d14f

Improve load-time caching and baseline snapshots

Browse files
app.py CHANGED
@@ -92,7 +92,7 @@ from utils.dates import current_wbc_date_str
92
  from data.scores import fetch_scores_for_date
93
  from data.odds import fetch_featured_odds
94
  from data.schedule import fetch_schedule_for_date
95
- from data.shared_baseline import build_shared_baseline_bundle
96
  from data.statcast import fetch_statcast_range, fetch_statcast_range_pitcher, normalize_statcast
97
  from data.weather import fetch_weather_for_venue
98
  from database.db import (
@@ -100,8 +100,19 @@ from database.db import (
100
  insert_bet,
101
  next_bet_id,
102
  read_table,
 
 
 
 
 
 
103
  update_bet_result,
104
  upsert_dataframe,
 
 
 
 
 
105
  ensure_recommendation_logs_table,
106
  insert_recommendation_logs,
107
  ensure_recommendation_outcomes_table,
@@ -600,7 +611,21 @@ def load_statcast_previous_season_full_pitcher() -> pd.DataFrame:
600
  def load_probable_starters() -> dict:
601
  """Probable starting pitchers for next 7 days from MLB Stats API."""
602
  from data.mlb_starters import fetch_probable_starters_for_props
603
- return fetch_probable_starters_for_props()
 
 
 
 
 
 
 
 
 
 
 
 
 
 
604
 
605
 
606
  @st.cache_data(ttl=STATCAST_TTL_SECONDS)
@@ -653,14 +678,63 @@ def _extract_probable_starter_names(probable_starters: dict | None) -> tuple[str
653
  return _coerce_name_tuple(names)
654
 
655
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
656
  @st.cache_data(ttl=STATCAST_TTL_SECONDS, show_spinner=False)
657
  def load_shared_baseline_bundle_cached(
658
  batter_names: tuple[str, ...] = (),
659
  pitcher_names: tuple[str, ...] = (),
660
  ) -> dict:
661
- return build_shared_baseline_bundle(
662
  batter_names=batter_names,
663
  pitcher_names=pitcher_names,
 
 
664
  )
665
 
666
 
@@ -695,7 +769,30 @@ def load_upcoming_hr_props() -> pd.DataFrame:
695
  @st.cache_data(ttl=300, show_spinner=False)
696
  def load_upcoming_hr_props_bundle() -> dict:
697
  try:
698
- return fetch_all_upcoming_hr_props_bundle(sportsbooks=DEFAULT_PROP_BOOKS)
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
699
  except Exception as exc:
700
  logger.warning("[load_upcoming_hr_props_bundle] failure: %s", exc)
701
  return {
@@ -703,16 +800,43 @@ def load_upcoming_hr_props_bundle() -> dict:
703
  "scraper_raw": pd.DataFrame(),
704
  "merged_props_feed": pd.DataFrame(),
705
  "coverage_summary": pd.DataFrame(),
 
 
706
  }
707
 
708
 
709
  @st.cache_data(ttl=REFRESH_TTL_SECONDS)
710
  def load_odds() -> pd.DataFrame:
711
- return fetch_featured_odds()
 
 
 
 
 
 
 
 
 
 
 
 
712
 
713
  @st.cache_data(ttl=SCHEDULE_TTL_SECONDS)
714
  def load_dashboard_schedule_for_date(date_str: str) -> pd.DataFrame:
715
- return fetch_schedule_for_date(date_str)
 
 
 
 
 
 
 
 
 
 
 
 
 
716
 
717
 
718
  @st.cache_data(ttl=SCORES_TTL_SECONDS)
@@ -742,7 +866,21 @@ def get_stable_scores_for_dashboard_date(date_str: str) -> pd.DataFrame:
742
  return fresh_scores
743
 
744
  def load_weather(venue_name: str) -> pd.DataFrame:
745
- return fetch_weather_for_venue(venue_name)
 
 
 
 
 
 
 
 
 
 
 
 
 
 
746
 
747
 
748
  def render_header() -> None:
@@ -2952,13 +3090,6 @@ def render_dashboard() -> None:
2952
  pass
2953
 
2954
  schedule_df = load_dashboard_schedule_for_date(schedule_date_str)
2955
- baseline_bundle = load_shared_baseline_bundle_cached()
2956
- statcast_df = baseline_bundle.get("blended_batter_df", pd.DataFrame())
2957
- pitcher_statcast_df = baseline_bundle.get("blended_pitcher_df", pd.DataFrame())
2958
- if statcast_df.empty:
2959
- statcast_df = load_statcast_recent()
2960
- if pitcher_statcast_df.empty:
2961
- pitcher_statcast_df = statcast_df
2962
 
2963
  live_games, final_games, scheduled_games = split_games_for_scoreboard(
2964
  schedule_df=schedule_df,
@@ -3006,6 +3137,18 @@ def render_dashboard() -> None:
3006
  live_games = recovered_live_games
3007
  final_games = recovered_final_games
3008
 
 
 
 
 
 
 
 
 
 
 
 
 
3009
  filter_option = st.radio(
3010
  "Game Status",
3011
  ["All", "Live", "Final", "Scheduled"],
 
92
  from data.scores import fetch_scores_for_date
93
  from data.odds import fetch_featured_odds
94
  from data.schedule import fetch_schedule_for_date
95
+ from data.shared_baseline import load_or_build_shared_baseline_bundle
96
  from data.statcast import fetch_statcast_range, fetch_statcast_range_pitcher, normalize_statcast
97
  from data.weather import fetch_weather_for_venue
98
  from database.db import (
 
100
  insert_bet,
101
  next_bet_id,
102
  read_table,
103
+ read_cached_odds,
104
+ read_cached_probable_starters,
105
+ read_cached_probable_starters_meta,
106
+ read_cached_schedule_for_date,
107
+ read_cached_upcoming_props_bundle,
108
+ read_cached_weather_for_venue,
109
  update_bet_result,
110
  upsert_dataframe,
111
+ replace_cached_odds,
112
+ replace_cached_probable_starters,
113
+ replace_cached_schedule,
114
+ replace_cached_upcoming_props_bundle,
115
+ replace_cached_weather,
116
  ensure_recommendation_logs_table,
117
  insert_recommendation_logs,
118
  ensure_recommendation_outcomes_table,
 
611
  def load_probable_starters() -> dict:
612
  """Probable starting pitchers for next 7 days from MLB Stats API."""
613
  from data.mlb_starters import fetch_probable_starters_for_props
614
+ try:
615
+ cached_meta = read_cached_probable_starters_meta(conn)
616
+ if not cached_meta.empty and _is_fetched_at_fresh(cached_meta.iloc[0]["fetched_at"], 60 * 60):
617
+ cached = read_cached_probable_starters(conn)
618
+ if cached:
619
+ return cached
620
+ except Exception:
621
+ pass
622
+
623
+ fresh = fetch_probable_starters_for_props()
624
+ try:
625
+ replace_cached_probable_starters(conn, fresh)
626
+ except Exception as exc:
627
+ logger.warning("[load_probable_starters] cache persist failure: %s", exc)
628
+ return fresh
629
 
630
 
631
  @st.cache_data(ttl=STATCAST_TTL_SECONDS)
 
678
  return _coerce_name_tuple(names)
679
 
680
 
681
+ def _extract_live_dashboard_participants(live_games: pd.DataFrame) -> tuple[tuple[str, ...], tuple[str, ...]]:
682
+ if live_games is None or live_games.empty:
683
+ return tuple(), tuple()
684
+
685
+ batter_names: list[str] = []
686
+ pitcher_names: list[str] = []
687
+ for col in ["on_deck_name", "in_hole_name", "three_away_name", "batter_name"]:
688
+ if col in live_games.columns:
689
+ batter_names.extend(
690
+ [
691
+ str(value).strip()
692
+ for value in live_games[col].dropna().astype(str).tolist()
693
+ if str(value).strip()
694
+ ]
695
+ )
696
+ for col in ["pitcher_name"]:
697
+ if col in live_games.columns:
698
+ pitcher_names.extend(
699
+ [
700
+ str(value).strip()
701
+ for value in live_games[col].dropna().astype(str).tolist()
702
+ if str(value).strip()
703
+ ]
704
+ )
705
+ return _coerce_name_tuple(batter_names), _coerce_name_tuple(pitcher_names)
706
+
707
+
708
+ def _is_fetched_at_fresh(value: object, max_age_seconds: int) -> bool:
709
+ try:
710
+ ts = pd.to_datetime(value, errors="coerce", utc=True)
711
+ if pd.isna(ts):
712
+ return False
713
+ age_seconds = max(0.0, float((pd.Timestamp.now(tz="UTC") - ts).total_seconds()))
714
+ return age_seconds <= float(max_age_seconds)
715
+ except Exception:
716
+ return False
717
+
718
+
719
+ def _latest_fetched_at_from_df(df: pd.DataFrame) -> object:
720
+ if df is None or df.empty or "fetched_at" not in df.columns:
721
+ return None
722
+ try:
723
+ return pd.to_datetime(df["fetched_at"], errors="coerce", utc=True).max()
724
+ except Exception:
725
+ return None
726
+
727
+
728
  @st.cache_data(ttl=STATCAST_TTL_SECONDS, show_spinner=False)
729
  def load_shared_baseline_bundle_cached(
730
  batter_names: tuple[str, ...] = (),
731
  pitcher_names: tuple[str, ...] = (),
732
  ) -> dict:
733
+ return load_or_build_shared_baseline_bundle(
734
  batter_names=batter_names,
735
  pitcher_names=pitcher_names,
736
+ max_age_seconds=max(STATCAST_TTL_SECONDS, 60 * 60),
737
+ persist_runtime_refresh=True,
738
  )
739
 
740
 
 
769
  @st.cache_data(ttl=300, show_spinner=False)
770
  def load_upcoming_hr_props_bundle() -> dict:
771
  try:
772
+ cached_bundle = read_cached_upcoming_props_bundle(conn, cache_key="default")
773
+ cache_meta = cached_bundle.get("cache_meta", pd.DataFrame())
774
+ if not cache_meta.empty and _is_fetched_at_fresh(cache_meta.iloc[0]["fetched_at"], 300):
775
+ merged = cached_bundle.get("merged_props_feed", pd.DataFrame())
776
+ coverage = cached_bundle.get("coverage_summary", pd.DataFrame())
777
+ return {
778
+ "odds_api_raw": pd.DataFrame(),
779
+ "scraper_raw": pd.DataFrame(),
780
+ "merged_props_feed": merged if isinstance(merged, pd.DataFrame) else pd.DataFrame(),
781
+ "coverage_summary": coverage if isinstance(coverage, pd.DataFrame) else pd.DataFrame(),
782
+ "cache_meta": cache_meta,
783
+ "cache_source": "db_snapshot",
784
+ }
785
+ except Exception:
786
+ pass
787
+
788
+ try:
789
+ bundle = fetch_all_upcoming_hr_props_bundle(sportsbooks=DEFAULT_PROP_BOOKS)
790
+ try:
791
+ replace_cached_upcoming_props_bundle(conn, bundle, cache_key="default")
792
+ except Exception as exc:
793
+ logger.warning("[load_upcoming_hr_props_bundle] cache persist failure: %s", exc)
794
+ bundle["cache_source"] = "live_fetch"
795
+ return bundle
796
  except Exception as exc:
797
  logger.warning("[load_upcoming_hr_props_bundle] failure: %s", exc)
798
  return {
 
800
  "scraper_raw": pd.DataFrame(),
801
  "merged_props_feed": pd.DataFrame(),
802
  "coverage_summary": pd.DataFrame(),
803
+ "cache_meta": pd.DataFrame(),
804
+ "cache_source": "unavailable",
805
  }
806
 
807
 
808
  @st.cache_data(ttl=REFRESH_TTL_SECONDS)
809
  def load_odds() -> pd.DataFrame:
810
+ try:
811
+ cached = read_cached_odds(conn)
812
+ if not cached.empty and _is_fetched_at_fresh(cached.iloc[0]["fetched_at"], REFRESH_TTL_SECONDS):
813
+ return cached
814
+ except Exception:
815
+ pass
816
+
817
+ fresh = fetch_featured_odds()
818
+ try:
819
+ replace_cached_odds(conn, fresh)
820
+ except Exception as exc:
821
+ logger.warning("[load_odds] cache persist failure: %s", exc)
822
+ return fresh
823
 
824
  @st.cache_data(ttl=SCHEDULE_TTL_SECONDS)
825
  def load_dashboard_schedule_for_date(date_str: str) -> pd.DataFrame:
826
+ try:
827
+ cached = read_cached_schedule_for_date(conn, date_str)
828
+ latest_cached_at = _latest_fetched_at_from_df(cached)
829
+ if not cached.empty and _is_fetched_at_fresh(latest_cached_at, SCHEDULE_TTL_SECONDS):
830
+ return cached
831
+ except Exception:
832
+ pass
833
+
834
+ fresh = fetch_schedule_for_date(date_str)
835
+ try:
836
+ replace_cached_schedule(conn, fresh)
837
+ except Exception as exc:
838
+ logger.warning("[load_dashboard_schedule_for_date] cache persist failure: %s", exc)
839
+ return fresh
840
 
841
 
842
  @st.cache_data(ttl=SCORES_TTL_SECONDS)
 
866
  return fresh_scores
867
 
868
  def load_weather(venue_name: str) -> pd.DataFrame:
869
+ try:
870
+ cached = read_cached_weather_for_venue(conn, venue_name)
871
+ latest_cached_at = _latest_fetched_at_from_df(cached)
872
+ if not cached.empty and _is_fetched_at_fresh(latest_cached_at, REFRESH_TTL_SECONDS):
873
+ return cached.head(1).reset_index(drop=True)
874
+ except Exception:
875
+ pass
876
+
877
+ fresh = fetch_weather_for_venue(venue_name)
878
+ try:
879
+ if fresh is not None and not fresh.empty:
880
+ replace_cached_weather(conn, fresh)
881
+ except Exception as exc:
882
+ logger.warning("[load_weather] cache persist failure: %s", exc)
883
+ return fresh
884
 
885
 
886
  def render_header() -> None:
 
3090
  pass
3091
 
3092
  schedule_df = load_dashboard_schedule_for_date(schedule_date_str)
 
 
 
 
 
 
 
3093
 
3094
  live_games, final_games, scheduled_games = split_games_for_scoreboard(
3095
  schedule_df=schedule_df,
 
3137
  live_games = recovered_live_games
3138
  final_games = recovered_final_games
3139
 
3140
+ dashboard_batter_names, dashboard_pitcher_names = _extract_live_dashboard_participants(live_games)
3141
+ baseline_bundle = load_shared_baseline_bundle_cached(
3142
+ batter_names=dashboard_batter_names,
3143
+ pitcher_names=dashboard_pitcher_names,
3144
+ ) if not live_games.empty else {}
3145
+ statcast_df = baseline_bundle.get("blended_batter_df", pd.DataFrame()) if isinstance(baseline_bundle, dict) else pd.DataFrame()
3146
+ pitcher_statcast_df = baseline_bundle.get("blended_pitcher_df", pd.DataFrame()) if isinstance(baseline_bundle, dict) else pd.DataFrame()
3147
+ if statcast_df.empty and not live_games.empty:
3148
+ statcast_df = load_statcast_recent()
3149
+ if pitcher_statcast_df.empty and not live_games.empty:
3150
+ pitcher_statcast_df = statcast_df
3151
+
3152
  filter_option = st.radio(
3153
  "Game Status",
3154
  ["All", "Live", "Final", "Scheduled"],
data/shared_baseline.py CHANGED
@@ -1,13 +1,19 @@
1
  from __future__ import annotations
2
 
3
  from collections import defaultdict
 
4
  from typing import Any
5
 
6
  import pandas as pd
7
  from sqlalchemy import text
8
 
9
- from database.db import get_connection
10
  from features.pitch_features import add_pitch_features
 
 
 
 
 
11
  from visualization.cards.player_identity import load_identity_map, normalize_for_matching
12
 
13
  PRIOR_SEASONS = (2021, 2022, 2023, 2024, 2025)
@@ -17,6 +23,8 @@ _HITTER_BLEND_K = 260.0
17
  _PITCHER_BLEND_K = 320.0
18
  _MAX_ROWS_PER_PLAYER = 420
19
  _MIN_CURRENT_ROWS_WHEN_AVAILABLE = 20
 
 
20
  _PRIOR_SEASON_RECENCY_WEIGHTS = {
21
  2025: 1.00,
22
  2024: 0.85,
@@ -189,6 +197,62 @@ def _prepare_frame(df: pd.DataFrame) -> pd.DataFrame:
189
  return add_pitch_features(out)
190
 
191
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
192
  def _load_current_events(conn, current_season: int) -> pd.DataFrame:
193
  query = text(
194
  """
@@ -600,6 +664,485 @@ def _blend_entity_frames(
600
  return blended_df, metadata_df
601
 
602
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
603
  def build_shared_baseline_bundle(
604
  batter_names: tuple[str, ...] | None = None,
605
  pitcher_names: tuple[str, ...] | None = None,
 
1
  from __future__ import annotations
2
 
3
  from collections import defaultdict
4
+ import json
5
  from typing import Any
6
 
7
  import pandas as pd
8
  from sqlalchemy import text
9
 
10
+ from database.db import get_connection, replace_table_contents
11
  from features.pitch_features import add_pitch_features
12
+ from models.rolling_form_model import (
13
+ build_batter_rolling_form_row,
14
+ build_pitcher_rolling_form_row,
15
+ )
16
+ from utils.helpers import utc_now_iso
17
  from visualization.cards.player_identity import load_identity_map, normalize_for_matching
18
 
19
  PRIOR_SEASONS = (2021, 2022, 2023, 2024, 2025)
 
23
  _PITCHER_BLEND_K = 320.0
24
  _MAX_ROWS_PER_PLAYER = 420
25
  _MIN_CURRENT_ROWS_WHEN_AVAILABLE = 20
26
+ _SNAPSHOT_VERSION = "shared_baseline_v1"
27
+ _DEFAULT_SNAPSHOT_MAX_AGE_SECONDS = 60 * 30
28
  _PRIOR_SEASON_RECENCY_WEIGHTS = {
29
  2025: 1.00,
30
  2024: 0.85,
 
197
  return add_pitch_features(out)
198
 
199
 
200
+ def _json_default(value: Any) -> Any:
201
+ if isinstance(value, (pd.Timestamp, pd.Timedelta)):
202
+ return str(value)
203
+ return str(value)
204
+
205
+
206
+ def _serialize_payload_frame(df: pd.DataFrame) -> str:
207
+ if df is None or df.empty:
208
+ return "[]"
209
+ out = df.copy()
210
+ if "game_date" in out.columns:
211
+ out["game_date"] = out["game_date"].astype(str)
212
+ return json.dumps(out.where(out.notna(), other=None).to_dict("records"), default=_json_default)
213
+
214
+
215
+ def _deserialize_payload_frame(payload_json: str) -> pd.DataFrame:
216
+ try:
217
+ payload = json.loads(str(payload_json or "[]"))
218
+ except Exception:
219
+ payload = []
220
+ if not payload:
221
+ return pd.DataFrame()
222
+ return _prepare_frame(pd.DataFrame(payload))
223
+
224
+
225
+ def _normalize_names_tuple(values: tuple[str, ...] | None) -> tuple[str, ...]:
226
+ if not values:
227
+ return tuple()
228
+ out = []
229
+ seen = set()
230
+ for value in values:
231
+ cleaned = str(value or "").strip()
232
+ if not cleaned:
233
+ continue
234
+ lowered = cleaned.lower()
235
+ if lowered in seen:
236
+ continue
237
+ seen.add(lowered)
238
+ out.append(cleaned)
239
+ return tuple(sorted(out))
240
+
241
+
242
+ def _is_snapshot_stale(built_at: Any, max_age_seconds: int) -> bool:
243
+ if not built_at:
244
+ return True
245
+ try:
246
+ built_ts = pd.to_datetime(built_at, errors="coerce", utc=True)
247
+ if pd.isna(built_ts):
248
+ return True
249
+ now_ts = pd.Timestamp.now(tz="UTC")
250
+ age_seconds = max(0.0, float((now_ts - built_ts).total_seconds()))
251
+ return age_seconds > float(max_age_seconds)
252
+ except Exception:
253
+ return True
254
+
255
+
256
  def _load_current_events(conn, current_season: int) -> pd.DataFrame:
257
  query = text(
258
  """
 
664
  return blended_df, metadata_df
665
 
666
 
667
+ def _build_snapshot_rows(
668
+ frame: pd.DataFrame,
669
+ built_at: str,
670
+ snapshot_version: str,
671
+ source_status: str,
672
+ ) -> pd.DataFrame:
673
+ rows: list[dict[str, Any]] = []
674
+ if frame is None or frame.empty or "player_name" not in frame.columns:
675
+ return pd.DataFrame(
676
+ columns=[
677
+ "player_name",
678
+ "source_row_count",
679
+ "payload_json",
680
+ "snapshot_built_at",
681
+ "snapshot_version",
682
+ "source_status",
683
+ ]
684
+ )
685
+
686
+ for player_name, player_df in frame.groupby("player_name", dropna=False):
687
+ player_name_str = str(player_name or "").strip()
688
+ if not player_name_str:
689
+ continue
690
+ rows.append(
691
+ {
692
+ "player_name": player_name_str,
693
+ "source_row_count": int(len(player_df)),
694
+ "payload_json": _serialize_payload_frame(player_df.reset_index(drop=True)),
695
+ "snapshot_built_at": built_at,
696
+ "snapshot_version": snapshot_version,
697
+ "source_status": source_status,
698
+ }
699
+ )
700
+ return pd.DataFrame(rows)
701
+
702
+
703
+ def _build_meta_snapshot_rows(
704
+ meta_df: pd.DataFrame,
705
+ built_at: str,
706
+ snapshot_version: str,
707
+ source_status: str,
708
+ ) -> pd.DataFrame:
709
+ if meta_df is None:
710
+ meta_df = pd.DataFrame()
711
+ out = meta_df.copy()
712
+ for col in [
713
+ "player_name",
714
+ "baseline_role",
715
+ "baseline_mode",
716
+ "prior_sample_size",
717
+ "season_2026_sample_size",
718
+ "prior_weight",
719
+ "season_2026_weight",
720
+ "baseline_driver",
721
+ "rolling_overlay_active",
722
+ ]:
723
+ if col not in out.columns:
724
+ out[col] = None
725
+ out["snapshot_built_at"] = built_at
726
+ out["snapshot_version"] = snapshot_version
727
+ out["source_status"] = source_status
728
+ return out[
729
+ [
730
+ "player_name",
731
+ "baseline_role",
732
+ "baseline_mode",
733
+ "prior_sample_size",
734
+ "season_2026_sample_size",
735
+ "prior_weight",
736
+ "season_2026_weight",
737
+ "baseline_driver",
738
+ "rolling_overlay_active",
739
+ "snapshot_built_at",
740
+ "snapshot_version",
741
+ "source_status",
742
+ ]
743
+ ].copy()
744
+
745
+
746
+ def _build_rolling_snapshot_rows(
747
+ frame: pd.DataFrame,
748
+ role_label: str,
749
+ built_at: str,
750
+ snapshot_version: str,
751
+ source_status: str,
752
+ ) -> pd.DataFrame:
753
+ rows: list[dict[str, Any]] = []
754
+ if frame is None or frame.empty or "player_name" not in frame.columns:
755
+ return pd.DataFrame(
756
+ columns=[
757
+ "player_name",
758
+ "source_row_count",
759
+ "payload_json",
760
+ "snapshot_built_at",
761
+ "snapshot_version",
762
+ "source_status",
763
+ ]
764
+ )
765
+
766
+ for player_name, player_df in frame.groupby("player_name", dropna=False):
767
+ player_name_str = str(player_name or "").strip()
768
+ if not player_name_str:
769
+ continue
770
+ reference_date = None
771
+ if "game_date" in player_df.columns:
772
+ try:
773
+ reference_date = pd.to_datetime(
774
+ player_df["game_date"], errors="coerce"
775
+ ).max()
776
+ except Exception:
777
+ reference_date = None
778
+
779
+ if role_label == "batter":
780
+ payload = build_batter_rolling_form_row(
781
+ statcast_df=frame,
782
+ player_name=player_name_str,
783
+ reference_date=reference_date,
784
+ )
785
+ else:
786
+ payload = build_pitcher_rolling_form_row(
787
+ statcast_df=frame,
788
+ pitcher_name=player_name_str,
789
+ reference_date=reference_date,
790
+ )
791
+
792
+ rows.append(
793
+ {
794
+ "player_name": player_name_str,
795
+ "source_row_count": int(len(player_df)),
796
+ "payload_json": json.dumps(payload, default=_json_default),
797
+ "snapshot_built_at": built_at,
798
+ "snapshot_version": snapshot_version,
799
+ "source_status": source_status,
800
+ }
801
+ )
802
+ return pd.DataFrame(rows)
803
+
804
+
805
+ def _read_snapshot_table(
806
+ conn,
807
+ table_name: str,
808
+ player_names: tuple[str, ...] = (),
809
+ ) -> pd.DataFrame:
810
+ if player_names:
811
+ clauses = []
812
+ params: dict[str, Any] = {}
813
+ for idx, player_name in enumerate(player_names):
814
+ key = f"name_{idx}"
815
+ clauses.append(f":{key}")
816
+ params[key] = str(player_name)
817
+ query = text(
818
+ f"SELECT * FROM {table_name} WHERE player_name IN ({', '.join(clauses)}) ORDER BY player_name"
819
+ )
820
+ return pd.read_sql(query, conn, params=params)
821
+ return pd.read_sql(text(f"SELECT * FROM {table_name} ORDER BY player_name"), conn)
822
+
823
+
824
+ def _hydrate_snapshot_frame(snapshot_df: pd.DataFrame) -> pd.DataFrame:
825
+ if snapshot_df is None or snapshot_df.empty:
826
+ return pd.DataFrame()
827
+ frames: list[pd.DataFrame] = []
828
+ for _, row in snapshot_df.iterrows():
829
+ frame = _deserialize_payload_frame(row.get("payload_json"))
830
+ if frame.empty:
831
+ continue
832
+ frames.append(frame)
833
+ if not frames:
834
+ return pd.DataFrame()
835
+ return pd.concat(frames, ignore_index=True, sort=False)
836
+
837
+
838
+ def _hydrate_rolling_snapshot_frame(snapshot_df: pd.DataFrame) -> pd.DataFrame:
839
+ if snapshot_df is None or snapshot_df.empty:
840
+ return pd.DataFrame()
841
+ rows: list[dict[str, Any]] = []
842
+ for _, row in snapshot_df.iterrows():
843
+ try:
844
+ payload = json.loads(str(row.get("payload_json") or "{}"))
845
+ except Exception:
846
+ payload = {}
847
+ payload["player_name"] = str(row.get("player_name") or "").strip()
848
+ payload["snapshot_built_at"] = row.get("snapshot_built_at")
849
+ payload["snapshot_version"] = row.get("snapshot_version")
850
+ payload["source_status"] = row.get("source_status")
851
+ rows.append(payload)
852
+ return pd.DataFrame(rows)
853
+
854
+
855
+ def persist_shared_baseline_snapshots(
856
+ bundle: dict[str, pd.DataFrame],
857
+ source_status: str = "runtime_refreshed",
858
+ ) -> dict[str, pd.DataFrame]:
859
+ built_at = utc_now_iso()
860
+
861
+ hitter_snapshot = _build_snapshot_rows(
862
+ bundle.get("blended_batter_df", pd.DataFrame()),
863
+ built_at=built_at,
864
+ snapshot_version=_SNAPSHOT_VERSION,
865
+ source_status=source_status,
866
+ )
867
+ pitcher_snapshot = _build_snapshot_rows(
868
+ bundle.get("blended_pitcher_df", pd.DataFrame()),
869
+ built_at=built_at,
870
+ snapshot_version=_SNAPSHOT_VERSION,
871
+ source_status=source_status,
872
+ )
873
+ hitter_meta = _build_meta_snapshot_rows(
874
+ bundle.get("batter_baseline_meta", pd.DataFrame()),
875
+ built_at=built_at,
876
+ snapshot_version=_SNAPSHOT_VERSION,
877
+ source_status=source_status,
878
+ )
879
+ pitcher_meta = _build_meta_snapshot_rows(
880
+ bundle.get("pitcher_baseline_meta", pd.DataFrame()),
881
+ built_at=built_at,
882
+ snapshot_version=_SNAPSHOT_VERSION,
883
+ source_status=source_status,
884
+ )
885
+ hitter_rolling = _build_rolling_snapshot_rows(
886
+ bundle.get("season_2026_ytd_hitter_df", pd.DataFrame()),
887
+ role_label="batter",
888
+ built_at=built_at,
889
+ snapshot_version=_SNAPSHOT_VERSION,
890
+ source_status=source_status,
891
+ )
892
+ pitcher_rolling = _build_rolling_snapshot_rows(
893
+ bundle.get("season_2026_ytd_pitcher_df", pd.DataFrame()),
894
+ role_label="pitcher",
895
+ built_at=built_at,
896
+ snapshot_version=_SNAPSHOT_VERSION,
897
+ source_status=source_status,
898
+ )
899
+
900
+ conn = get_connection()
901
+ try:
902
+ replace_table_contents(conn, "shared_hitter_baseline_snapshot", hitter_snapshot)
903
+ replace_table_contents(conn, "shared_pitcher_baseline_snapshot", pitcher_snapshot)
904
+ replace_table_contents(conn, "shared_hitter_baseline_meta", hitter_meta)
905
+ replace_table_contents(conn, "shared_pitcher_baseline_meta", pitcher_meta)
906
+ replace_table_contents(conn, "shared_hitter_rolling_snapshot", hitter_rolling)
907
+ replace_table_contents(conn, "shared_pitcher_rolling_snapshot", pitcher_rolling)
908
+ finally:
909
+ try:
910
+ conn.close()
911
+ except Exception:
912
+ pass
913
+
914
+ snapshot_status = pd.DataFrame(
915
+ [
916
+ {
917
+ "table_name": "shared_hitter_baseline_snapshot",
918
+ "row_count": int(len(hitter_snapshot)),
919
+ "snapshot_built_at": built_at,
920
+ "snapshot_version": _SNAPSHOT_VERSION,
921
+ "source_status": source_status,
922
+ "stale": False,
923
+ },
924
+ {
925
+ "table_name": "shared_pitcher_baseline_snapshot",
926
+ "row_count": int(len(pitcher_snapshot)),
927
+ "snapshot_built_at": built_at,
928
+ "snapshot_version": _SNAPSHOT_VERSION,
929
+ "source_status": source_status,
930
+ "stale": False,
931
+ },
932
+ {
933
+ "table_name": "shared_hitter_baseline_meta",
934
+ "row_count": int(len(hitter_meta)),
935
+ "snapshot_built_at": built_at,
936
+ "snapshot_version": _SNAPSHOT_VERSION,
937
+ "source_status": source_status,
938
+ "stale": False,
939
+ },
940
+ {
941
+ "table_name": "shared_pitcher_baseline_meta",
942
+ "row_count": int(len(pitcher_meta)),
943
+ "snapshot_built_at": built_at,
944
+ "snapshot_version": _SNAPSHOT_VERSION,
945
+ "source_status": source_status,
946
+ "stale": False,
947
+ },
948
+ {
949
+ "table_name": "shared_hitter_rolling_snapshot",
950
+ "row_count": int(len(hitter_rolling)),
951
+ "snapshot_built_at": built_at,
952
+ "snapshot_version": _SNAPSHOT_VERSION,
953
+ "source_status": source_status,
954
+ "stale": False,
955
+ },
956
+ {
957
+ "table_name": "shared_pitcher_rolling_snapshot",
958
+ "row_count": int(len(pitcher_rolling)),
959
+ "snapshot_built_at": built_at,
960
+ "snapshot_version": _SNAPSHOT_VERSION,
961
+ "source_status": source_status,
962
+ "stale": False,
963
+ },
964
+ ]
965
+ )
966
+ bundle["snapshot_status"] = snapshot_status
967
+ bundle["snapshot_source_status"] = source_status
968
+ bundle["runtime_fallback_used"] = False
969
+ return bundle
970
+
971
+
972
+ def load_shared_baseline_bundle_from_snapshots(
973
+ batter_names: tuple[str, ...] = (),
974
+ pitcher_names: tuple[str, ...] = (),
975
+ max_age_seconds: int = _DEFAULT_SNAPSHOT_MAX_AGE_SECONDS,
976
+ ) -> dict[str, pd.DataFrame]:
977
+ batter_names = _normalize_names_tuple(batter_names)
978
+ pitcher_names = _normalize_names_tuple(pitcher_names)
979
+
980
+ conn = get_connection()
981
+ try:
982
+ hitter_snapshot = _read_snapshot_table(
983
+ conn,
984
+ "shared_hitter_baseline_snapshot",
985
+ player_names=batter_names,
986
+ )
987
+ pitcher_snapshot = _read_snapshot_table(
988
+ conn,
989
+ "shared_pitcher_baseline_snapshot",
990
+ player_names=pitcher_names,
991
+ )
992
+ hitter_meta = _read_snapshot_table(
993
+ conn,
994
+ "shared_hitter_baseline_meta",
995
+ player_names=batter_names,
996
+ )
997
+ pitcher_meta = _read_snapshot_table(
998
+ conn,
999
+ "shared_pitcher_baseline_meta",
1000
+ player_names=pitcher_names,
1001
+ )
1002
+ hitter_rolling = _read_snapshot_table(
1003
+ conn,
1004
+ "shared_hitter_rolling_snapshot",
1005
+ player_names=batter_names,
1006
+ )
1007
+ pitcher_rolling = _read_snapshot_table(
1008
+ conn,
1009
+ "shared_pitcher_rolling_snapshot",
1010
+ player_names=pitcher_names,
1011
+ )
1012
+ except Exception:
1013
+ return {
1014
+ "multi_year_prior_hitter_df": pd.DataFrame(),
1015
+ "season_2026_ytd_hitter_df": pd.DataFrame(),
1016
+ "multi_year_prior_pitcher_df": pd.DataFrame(),
1017
+ "season_2026_ytd_pitcher_df": pd.DataFrame(),
1018
+ "blended_batter_df": pd.DataFrame(),
1019
+ "blended_pitcher_df": pd.DataFrame(),
1020
+ "batter_baseline_meta": pd.DataFrame(),
1021
+ "pitcher_baseline_meta": pd.DataFrame(),
1022
+ "hitter_rolling_snapshot": pd.DataFrame(),
1023
+ "pitcher_rolling_snapshot": pd.DataFrame(),
1024
+ "snapshot_status": pd.DataFrame(),
1025
+ "snapshot_source_status": "snapshot_unavailable",
1026
+ "runtime_fallback_used": False,
1027
+ }
1028
+ finally:
1029
+ try:
1030
+ conn.close()
1031
+ except Exception:
1032
+ pass
1033
+
1034
+ snapshot_status_rows: list[dict[str, Any]] = []
1035
+ for table_name, frame in [
1036
+ ("shared_hitter_baseline_snapshot", hitter_snapshot),
1037
+ ("shared_pitcher_baseline_snapshot", pitcher_snapshot),
1038
+ ("shared_hitter_baseline_meta", hitter_meta),
1039
+ ("shared_pitcher_baseline_meta", pitcher_meta),
1040
+ ("shared_hitter_rolling_snapshot", hitter_rolling),
1041
+ ("shared_pitcher_rolling_snapshot", pitcher_rolling),
1042
+ ]:
1043
+ built_at = None
1044
+ version = None
1045
+ source_status = None
1046
+ if isinstance(frame, pd.DataFrame) and not frame.empty:
1047
+ built_at = frame.get("snapshot_built_at", pd.Series(dtype="object")).iloc[0]
1048
+ version = frame.get("snapshot_version", pd.Series(dtype="object")).iloc[0]
1049
+ source_status = frame.get("source_status", pd.Series(dtype="object")).iloc[0]
1050
+ snapshot_status_rows.append(
1051
+ {
1052
+ "table_name": table_name,
1053
+ "row_count": 0 if frame is None else int(len(frame)),
1054
+ "snapshot_built_at": built_at,
1055
+ "snapshot_version": version,
1056
+ "source_status": source_status,
1057
+ "stale": _is_snapshot_stale(built_at, max_age_seconds),
1058
+ }
1059
+ )
1060
+
1061
+ return {
1062
+ "multi_year_prior_hitter_df": pd.DataFrame(),
1063
+ "season_2026_ytd_hitter_df": pd.DataFrame(),
1064
+ "multi_year_prior_pitcher_df": pd.DataFrame(),
1065
+ "season_2026_ytd_pitcher_df": pd.DataFrame(),
1066
+ "blended_batter_df": _hydrate_snapshot_frame(hitter_snapshot),
1067
+ "blended_pitcher_df": _hydrate_snapshot_frame(pitcher_snapshot),
1068
+ "batter_baseline_meta": hitter_meta,
1069
+ "pitcher_baseline_meta": pitcher_meta,
1070
+ "hitter_rolling_snapshot": _hydrate_rolling_snapshot_frame(hitter_rolling),
1071
+ "pitcher_rolling_snapshot": _hydrate_rolling_snapshot_frame(pitcher_rolling),
1072
+ "snapshot_status": pd.DataFrame(snapshot_status_rows),
1073
+ "snapshot_source_status": "snapshot",
1074
+ "runtime_fallback_used": False,
1075
+ }
1076
+
1077
+
1078
+ def load_or_build_shared_baseline_bundle(
1079
+ batter_names: tuple[str, ...] = (),
1080
+ pitcher_names: tuple[str, ...] = (),
1081
+ max_age_seconds: int = _DEFAULT_SNAPSHOT_MAX_AGE_SECONDS,
1082
+ persist_runtime_refresh: bool = True,
1083
+ ) -> dict[str, pd.DataFrame]:
1084
+ batter_names = _normalize_names_tuple(batter_names)
1085
+ pitcher_names = _normalize_names_tuple(pitcher_names)
1086
+
1087
+ snapshot_bundle = load_shared_baseline_bundle_from_snapshots(
1088
+ batter_names=batter_names,
1089
+ pitcher_names=pitcher_names,
1090
+ max_age_seconds=max_age_seconds,
1091
+ )
1092
+ snapshot_status = snapshot_bundle.get("snapshot_status", pd.DataFrame())
1093
+
1094
+ requested_hitter_covered = True
1095
+ if batter_names:
1096
+ available_hitters = {
1097
+ str(name).strip().lower()
1098
+ for name in snapshot_bundle.get("batter_baseline_meta", pd.DataFrame())
1099
+ .get("player_name", pd.Series(dtype="object"))
1100
+ .dropna()
1101
+ .astype(str)
1102
+ .tolist()
1103
+ }
1104
+ requested_hitter_covered = all(name.lower() in available_hitters for name in batter_names)
1105
+
1106
+ requested_pitcher_covered = True
1107
+ if pitcher_names:
1108
+ available_pitchers = {
1109
+ str(name).strip().lower()
1110
+ for name in snapshot_bundle.get("pitcher_baseline_meta", pd.DataFrame())
1111
+ .get("player_name", pd.Series(dtype="object"))
1112
+ .dropna()
1113
+ .astype(str)
1114
+ .tolist()
1115
+ }
1116
+ requested_pitcher_covered = all(name.lower() in available_pitchers for name in pitcher_names)
1117
+
1118
+ snapshot_has_data = not snapshot_bundle.get("blended_batter_df", pd.DataFrame()).empty or not snapshot_bundle.get("blended_pitcher_df", pd.DataFrame()).empty
1119
+ snapshot_stale = bool(
1120
+ isinstance(snapshot_status, pd.DataFrame)
1121
+ and not snapshot_status.empty
1122
+ and snapshot_status["stale"].fillna(False).any()
1123
+ )
1124
+
1125
+ if snapshot_has_data and requested_hitter_covered and requested_pitcher_covered and not snapshot_stale:
1126
+ return snapshot_bundle
1127
+
1128
+ runtime_bundle = build_shared_baseline_bundle(
1129
+ batter_names=batter_names,
1130
+ pitcher_names=pitcher_names,
1131
+ )
1132
+ runtime_bundle["snapshot_source_status"] = "runtime_fallback"
1133
+ runtime_bundle["runtime_fallback_used"] = True
1134
+
1135
+ if persist_runtime_refresh and not batter_names and not pitcher_names:
1136
+ runtime_bundle = persist_shared_baseline_snapshots(
1137
+ runtime_bundle,
1138
+ source_status="runtime_refreshed",
1139
+ )
1140
+
1141
+ if "snapshot_status" not in runtime_bundle:
1142
+ runtime_bundle["snapshot_status"] = snapshot_status
1143
+ return runtime_bundle
1144
+
1145
+
1146
  def build_shared_baseline_bundle(
1147
  batter_names: tuple[str, ...] | None = None,
1148
  pitcher_names: tuple[str, ...] | None = None,
data/statcast.py CHANGED
@@ -90,6 +90,8 @@ def normalize_statcast(df: pd.DataFrame) -> pd.DataFrame:
90
 
91
  rename_map = {
92
  "player_name": "player_name",
 
 
93
  "pitch_type": "pitch_type",
94
  "pitch_name": "pitch_name",
95
  "release_speed": "release_speed",
@@ -114,6 +116,10 @@ def normalize_statcast(df: pd.DataFrame) -> pd.DataFrame:
114
  "p_throws": "pitcher_hand",
115
  "home_team": "home_team",
116
  "away_team": "away_team",
 
 
 
 
117
  "game_date": "game_date",
118
  "game_pk": "game_pk",
119
  "inning": "inning",
@@ -130,6 +136,11 @@ def normalize_statcast(df: pd.DataFrame) -> pd.DataFrame:
130
  out = df[keep_cols].copy()
131
  out = out.rename(columns={col: rename_map[col] for col in keep_cols})
132
 
 
 
 
 
 
133
  numeric_cols = [
134
  "release_speed",
135
  "release_spin_rate",
@@ -160,4 +171,4 @@ def normalize_statcast(df: pd.DataFrame) -> pd.DataFrame:
160
  if "game_date" in out.columns:
161
  out["game_date"] = pd.to_datetime(out["game_date"], errors="coerce")
162
 
163
- return out
 
90
 
91
  rename_map = {
92
  "player_name": "player_name",
93
+ "batter": "batter",
94
+ "pitcher": "pitcher",
95
  "pitch_type": "pitch_type",
96
  "pitch_name": "pitch_name",
97
  "release_speed": "release_speed",
 
116
  "p_throws": "pitcher_hand",
117
  "home_team": "home_team",
118
  "away_team": "away_team",
119
+ "inning_topbot": "inning_topbot",
120
+ "team": "team",
121
+ "batter_team": "batter_team",
122
+ "team_name": "team_name",
123
  "game_date": "game_date",
124
  "game_pk": "game_pk",
125
  "inning": "inning",
 
136
  out = df[keep_cols].copy()
137
  out = out.rename(columns={col: rename_map[col] for col in keep_cols})
138
 
139
+ if "pitcher_hand" in out.columns and "p_throws" not in out.columns:
140
+ out["p_throws"] = out["pitcher_hand"]
141
+ if "batter_stand" in out.columns and "stand" not in out.columns:
142
+ out["stand"] = out["batter_stand"]
143
+
144
  numeric_cols = [
145
  "release_speed",
146
  "release_spin_rate",
 
171
  if "game_date" in out.columns:
172
  out["game_date"] = pd.to_datetime(out["game_date"], errors="coerce")
173
 
174
+ return out
database/db.py CHANGED
@@ -22,6 +22,7 @@ so they can be added later without structural changes.
22
 
23
  from __future__ import annotations
24
 
 
25
  from typing import Any, Iterable, Mapping
26
 
27
  import pandas as pd
@@ -287,6 +288,7 @@ def initialize_schema(conn) -> None:
287
  """
288
  ensure_statcast_core_tables(conn)
289
  ensure_live_pitch_tables(conn)
 
290
 
291
  conn.execute(text(
292
  """
@@ -311,17 +313,42 @@ def initialize_schema(conn) -> None:
311
  CREATE TABLE IF NOT EXISTS cached_schedule (
312
  fetched_at TEXT,
313
  game_id TEXT,
 
314
  game_date TEXT,
315
  status TEXT,
316
  away_team TEXT,
317
  home_team TEXT,
318
  away_score INTEGER,
319
  home_score INTEGER,
320
- venue TEXT
 
 
 
 
 
 
 
 
321
  )
322
  """
323
  ))
324
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
325
  conn.execute(text(
326
  """
327
  CREATE TABLE IF NOT EXISTS cached_odds (
@@ -354,6 +381,58 @@ def initialize_schema(conn) -> None:
354
  """
355
  ))
356
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
357
  conn.execute(text(
358
  "CREATE INDEX IF NOT EXISTS idx_statcast_player_date "
359
  "ON statcast_event_core (player_name, source_season, game_date)"
@@ -386,6 +465,34 @@ def initialize_schema(conn) -> None:
386
  "CREATE INDEX IF NOT EXISTS idx_lbgl_player_date "
387
  "ON live_batter_game_log_2026 (player_name, game_date)"
388
  ))
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
389
 
390
 
391
  # ---------------------------------------------------------------------------
@@ -405,10 +512,415 @@ def upsert_dataframe(
405
  _bulk_insert(conn, table_name, df)
406
 
407
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
408
  def read_table(conn, table_name: str) -> pd.DataFrame:
409
  return pd.read_sql(text(f"SELECT * FROM {table_name}"), conn)
410
 
411
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
412
  # ---------------------------------------------------------------------------
413
  # Bets
414
  # ---------------------------------------------------------------------------
@@ -876,6 +1388,113 @@ def read_upcoming_hr_props(conn) -> pd.DataFrame:
876
  )
877
 
878
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
879
  # ---------------------------------------------------------------------------
880
  # Batter prop audit view
881
  # ---------------------------------------------------------------------------
 
22
 
23
  from __future__ import annotations
24
 
25
+ import json
26
  from typing import Any, Iterable, Mapping
27
 
28
  import pandas as pd
 
288
  """
289
  ensure_statcast_core_tables(conn)
290
  ensure_live_pitch_tables(conn)
291
+ ensure_shared_baseline_snapshot_tables(conn)
292
 
293
  conn.execute(text(
294
  """
 
313
  CREATE TABLE IF NOT EXISTS cached_schedule (
314
  fetched_at TEXT,
315
  game_id TEXT,
316
+ game_pk TEXT,
317
  game_date TEXT,
318
  status TEXT,
319
  away_team TEXT,
320
  home_team TEXT,
321
  away_score INTEGER,
322
  home_score INTEGER,
323
+ away_hits INTEGER,
324
+ home_hits INTEGER,
325
+ away_errors INTEGER,
326
+ home_errors INTEGER,
327
+ venue TEXT,
328
+ game_datetime_utc TEXT,
329
+ tv TEXT,
330
+ start_time_et TEXT,
331
+ sport_id INTEGER
332
  )
333
  """
334
  ))
335
 
336
+ for _stmt in [
337
+ "ALTER TABLE cached_schedule ADD COLUMN IF NOT EXISTS game_pk TEXT",
338
+ "ALTER TABLE cached_schedule ADD COLUMN IF NOT EXISTS away_hits INTEGER",
339
+ "ALTER TABLE cached_schedule ADD COLUMN IF NOT EXISTS home_hits INTEGER",
340
+ "ALTER TABLE cached_schedule ADD COLUMN IF NOT EXISTS away_errors INTEGER",
341
+ "ALTER TABLE cached_schedule ADD COLUMN IF NOT EXISTS home_errors INTEGER",
342
+ "ALTER TABLE cached_schedule ADD COLUMN IF NOT EXISTS game_datetime_utc TEXT",
343
+ "ALTER TABLE cached_schedule ADD COLUMN IF NOT EXISTS tv TEXT",
344
+ "ALTER TABLE cached_schedule ADD COLUMN IF NOT EXISTS start_time_et TEXT",
345
+ "ALTER TABLE cached_schedule ADD COLUMN IF NOT EXISTS sport_id INTEGER",
346
+ ]:
347
+ try:
348
+ conn.execute(text(_stmt))
349
+ except Exception:
350
+ pass
351
+
352
  conn.execute(text(
353
  """
354
  CREATE TABLE IF NOT EXISTS cached_odds (
 
381
  """
382
  ))
383
 
384
+ conn.execute(text(
385
+ """
386
+ CREATE TABLE IF NOT EXISTS cached_probable_starters (
387
+ fetched_at TEXT,
388
+ away_team_norm TEXT,
389
+ home_team_norm TEXT,
390
+ away_team_raw TEXT,
391
+ home_team_raw TEXT,
392
+ away_pitcher TEXT,
393
+ home_pitcher TEXT
394
+ )
395
+ """
396
+ ))
397
+
398
+ conn.execute(text(
399
+ """
400
+ CREATE TABLE IF NOT EXISTS cached_projected_lineups (
401
+ fetched_at TEXT,
402
+ team_name_norm TEXT,
403
+ team_name TEXT,
404
+ source TEXT,
405
+ lineup_vs_rhp_json TEXT,
406
+ lineup_vs_lhp_json TEXT,
407
+ rotation_json TEXT
408
+ )
409
+ """
410
+ ))
411
+
412
+ conn.execute(text(
413
+ """
414
+ CREATE TABLE IF NOT EXISTS cached_upcoming_props_feed (
415
+ fetched_at TEXT,
416
+ cache_key TEXT,
417
+ row_count INTEGER,
418
+ payload_json TEXT
419
+ )
420
+ """
421
+ ))
422
+
423
+ conn.execute(text(
424
+ """
425
+ CREATE TABLE IF NOT EXISTS cached_upcoming_props_bundle_meta (
426
+ fetched_at TEXT,
427
+ cache_key TEXT,
428
+ merged_row_count INTEGER,
429
+ odds_api_row_count INTEGER,
430
+ scraper_row_count INTEGER,
431
+ coverage_summary_json TEXT
432
+ )
433
+ """
434
+ ))
435
+
436
  conn.execute(text(
437
  "CREATE INDEX IF NOT EXISTS idx_statcast_player_date "
438
  "ON statcast_event_core (player_name, source_season, game_date)"
 
465
  "CREATE INDEX IF NOT EXISTS idx_lbgl_player_date "
466
  "ON live_batter_game_log_2026 (player_name, game_date)"
467
  ))
468
+ conn.execute(text(
469
+ "CREATE INDEX IF NOT EXISTS idx_cached_schedule_game_date "
470
+ "ON cached_schedule (game_date)"
471
+ ))
472
+ conn.execute(text(
473
+ "CREATE INDEX IF NOT EXISTS idx_cached_odds_fetched_at "
474
+ "ON cached_odds (fetched_at)"
475
+ ))
476
+ conn.execute(text(
477
+ "CREATE INDEX IF NOT EXISTS idx_cached_weather_venue "
478
+ "ON cached_weather (venue_key, fetched_at)"
479
+ ))
480
+ conn.execute(text(
481
+ "CREATE INDEX IF NOT EXISTS idx_cached_probable_starters_matchup "
482
+ "ON cached_probable_starters (away_team_norm, home_team_norm)"
483
+ ))
484
+ conn.execute(text(
485
+ "CREATE INDEX IF NOT EXISTS idx_cached_projected_lineups_team "
486
+ "ON cached_projected_lineups (team_name_norm)"
487
+ ))
488
+ conn.execute(text(
489
+ "CREATE INDEX IF NOT EXISTS idx_cached_upcoming_props_feed_key "
490
+ "ON cached_upcoming_props_feed (cache_key, fetched_at)"
491
+ ))
492
+ conn.execute(text(
493
+ "CREATE INDEX IF NOT EXISTS idx_cached_upcoming_props_bundle_meta_key "
494
+ "ON cached_upcoming_props_bundle_meta (cache_key, fetched_at)"
495
+ ))
496
 
497
 
498
  # ---------------------------------------------------------------------------
 
512
  _bulk_insert(conn, table_name, df)
513
 
514
 
515
+ def replace_table_contents(
516
+ conn,
517
+ table_name: str,
518
+ df: pd.DataFrame | None,
519
+ ) -> None:
520
+ """
521
+ Replace the full contents of a table, even when the replacement frame is empty.
522
+ """
523
+ conn.execute(text(f"DELETE FROM {table_name}"))
524
+ if df is None or df.empty:
525
+ return
526
+ _bulk_insert(conn, table_name, df)
527
+
528
+
529
  def read_table(conn, table_name: str) -> pd.DataFrame:
530
  return pd.read_sql(text(f"SELECT * FROM {table_name}"), conn)
531
 
532
 
533
+ def _safe_json_dump(value: Any) -> str:
534
+ return json.dumps(value, default=str)
535
+
536
+
537
+ def _safe_json_load(value: Any, default: Any) -> Any:
538
+ try:
539
+ if value is None or str(value).strip() == "":
540
+ return default
541
+ return json.loads(str(value))
542
+ except Exception:
543
+ return default
544
+
545
+
546
+ def _latest_fetched_at(df: pd.DataFrame) -> str:
547
+ if df is None or df.empty or "fetched_at" not in df.columns:
548
+ return utc_now_iso()
549
+ try:
550
+ ts = pd.to_datetime(df["fetched_at"], errors="coerce").max()
551
+ if pd.isna(ts):
552
+ return utc_now_iso()
553
+ return str(ts)
554
+ except Exception:
555
+ return utc_now_iso()
556
+
557
+
558
+ def replace_cached_schedule(conn, df: pd.DataFrame) -> None:
559
+ if df is None:
560
+ df = pd.DataFrame()
561
+ cols = [
562
+ "fetched_at",
563
+ "game_id",
564
+ "game_pk",
565
+ "game_date",
566
+ "status",
567
+ "away_team",
568
+ "home_team",
569
+ "away_score",
570
+ "home_score",
571
+ "away_hits",
572
+ "home_hits",
573
+ "away_errors",
574
+ "home_errors",
575
+ "venue",
576
+ "game_datetime_utc",
577
+ "tv",
578
+ "start_time_et",
579
+ "sport_id",
580
+ ]
581
+ out = df.copy()
582
+ for col in cols:
583
+ if col not in out.columns:
584
+ out[col] = None
585
+ date_values = {
586
+ str(value).strip()
587
+ for value in out["game_date"].dropna().astype(str).tolist()
588
+ if str(value).strip()
589
+ }
590
+ if date_values:
591
+ clauses = []
592
+ params: dict[str, Any] = {}
593
+ for idx, value in enumerate(sorted(date_values)):
594
+ key = f"date_{idx}"
595
+ clauses.append(f":{key}")
596
+ params[key] = value
597
+ conn.execute(
598
+ text(f"DELETE FROM cached_schedule WHERE game_date IN ({', '.join(clauses)})"),
599
+ params,
600
+ )
601
+ _bulk_insert(conn, "cached_schedule", out[cols])
602
+
603
+
604
+ def read_cached_schedule_for_date(conn, date_str: str) -> pd.DataFrame:
605
+ return pd.read_sql(
606
+ text("SELECT * FROM cached_schedule WHERE game_date = :date ORDER BY game_id"),
607
+ conn,
608
+ params={"date": str(date_str)},
609
+ )
610
+
611
+
612
+ def replace_cached_odds(conn, df: pd.DataFrame) -> None:
613
+ if df is None:
614
+ df = pd.DataFrame()
615
+ cols = [
616
+ "fetched_at",
617
+ "event_id",
618
+ "commence_time",
619
+ "home_team",
620
+ "away_team",
621
+ "sportsbook",
622
+ "market_key",
623
+ "outcome_name",
624
+ "price",
625
+ "point",
626
+ ]
627
+ out = df.copy()
628
+ for col in cols:
629
+ if col not in out.columns:
630
+ out[col] = None
631
+ replace_table_contents(conn, "cached_odds", out[cols])
632
+
633
+
634
+ def read_cached_odds(conn) -> pd.DataFrame:
635
+ return pd.read_sql(text("SELECT * FROM cached_odds ORDER BY fetched_at DESC"), conn)
636
+
637
+
638
+ def replace_cached_weather(conn, df: pd.DataFrame) -> None:
639
+ if df is None:
640
+ df = pd.DataFrame()
641
+ cols = [
642
+ "fetched_at",
643
+ "venue_key",
644
+ "location_name",
645
+ "temperature_f",
646
+ "humidity",
647
+ "wind_speed_mph",
648
+ "wind_deg",
649
+ "description",
650
+ ]
651
+ out = df.copy()
652
+ for col in cols:
653
+ if col not in out.columns:
654
+ out[col] = None
655
+ venue_values = {
656
+ str(value).strip()
657
+ for value in out["venue_key"].dropna().astype(str).tolist()
658
+ if str(value).strip()
659
+ }
660
+ if venue_values:
661
+ clauses = []
662
+ params: dict[str, Any] = {}
663
+ for idx, value in enumerate(sorted(venue_values)):
664
+ key = f"venue_{idx}"
665
+ clauses.append(f":{key}")
666
+ params[key] = value
667
+ conn.execute(
668
+ text(f"DELETE FROM cached_weather WHERE venue_key IN ({', '.join(clauses)})"),
669
+ params,
670
+ )
671
+ _bulk_insert(conn, "cached_weather", out[cols])
672
+
673
+
674
+ def read_cached_weather_for_venue(conn, venue_key: str) -> pd.DataFrame:
675
+ return pd.read_sql(
676
+ text(
677
+ """
678
+ SELECT * FROM cached_weather
679
+ WHERE venue_key = :venue
680
+ ORDER BY fetched_at DESC
681
+ """
682
+ ),
683
+ conn,
684
+ params={"venue": str(venue_key)},
685
+ )
686
+
687
+
688
+ def replace_cached_probable_starters(
689
+ conn,
690
+ starters_map: Mapping[tuple[str, str], Mapping[str, Any]] | None,
691
+ ) -> None:
692
+ rows: list[dict[str, Any]] = []
693
+ fetched_at = utc_now_iso()
694
+ for key, payload in (starters_map or {}).items():
695
+ if not isinstance(key, tuple) or len(key) != 2:
696
+ continue
697
+ away_norm, home_norm = key
698
+ payload = dict(payload or {})
699
+ rows.append(
700
+ {
701
+ "fetched_at": fetched_at,
702
+ "away_team_norm": str(away_norm or "").strip(),
703
+ "home_team_norm": str(home_norm or "").strip(),
704
+ "away_team_raw": str(payload.get("away_team_raw") or "").strip(),
705
+ "home_team_raw": str(payload.get("home_team_raw") or "").strip(),
706
+ "away_pitcher": str(payload.get("away_pitcher") or "").strip() or None,
707
+ "home_pitcher": str(payload.get("home_pitcher") or "").strip() or None,
708
+ }
709
+ )
710
+ replace_table_contents(conn, "cached_probable_starters", pd.DataFrame(rows))
711
+
712
+
713
+ def read_cached_probable_starters(conn) -> dict[tuple[str, str], dict[str, str | None]]:
714
+ df = pd.read_sql(text("SELECT * FROM cached_probable_starters"), conn)
715
+ if df.empty:
716
+ return {}
717
+ out: dict[tuple[str, str], dict[str, str | None]] = {}
718
+ for _, row in df.iterrows():
719
+ key = (
720
+ str(row.get("away_team_norm") or "").strip(),
721
+ str(row.get("home_team_norm") or "").strip(),
722
+ )
723
+ if not key[0] or not key[1]:
724
+ continue
725
+ out[key] = {
726
+ "away_team_raw": str(row.get("away_team_raw") or "").strip(),
727
+ "home_team_raw": str(row.get("home_team_raw") or "").strip(),
728
+ "away_pitcher": str(row.get("away_pitcher") or "").strip() or None,
729
+ "home_pitcher": str(row.get("home_pitcher") or "").strip() or None,
730
+ }
731
+ return out
732
+
733
+
734
+ def read_cached_probable_starters_meta(conn) -> pd.DataFrame:
735
+ return pd.read_sql(
736
+ text(
737
+ """
738
+ SELECT fetched_at, COUNT(*) AS matchup_count
739
+ FROM cached_probable_starters
740
+ GROUP BY fetched_at
741
+ ORDER BY fetched_at DESC
742
+ """
743
+ ),
744
+ conn,
745
+ )
746
+
747
+
748
+ def replace_cached_projected_lineups(
749
+ conn,
750
+ projected_lineups: Mapping[str, Mapping[str, Any]] | None,
751
+ ) -> None:
752
+ rows: list[dict[str, Any]] = []
753
+ fetched_at = utc_now_iso()
754
+ for team_norm, payload in (projected_lineups or {}).items():
755
+ payload = dict(payload or {})
756
+ rows.append(
757
+ {
758
+ "fetched_at": fetched_at,
759
+ "team_name_norm": str(team_norm or "").strip(),
760
+ "team_name": str(payload.get("team_name") or "").strip(),
761
+ "source": str(payload.get("source") or "").strip(),
762
+ "lineup_vs_rhp_json": _safe_json_dump(payload.get("lineup_vs_rhp") or []),
763
+ "lineup_vs_lhp_json": _safe_json_dump(payload.get("lineup_vs_lhp") or []),
764
+ "rotation_json": _safe_json_dump(payload.get("rotation") or []),
765
+ }
766
+ )
767
+ df = pd.DataFrame(rows)
768
+ if not df.empty:
769
+ team_values = {
770
+ str(value).strip()
771
+ for value in df["team_name_norm"].dropna().astype(str).tolist()
772
+ if str(value).strip()
773
+ }
774
+ if team_values:
775
+ clauses = []
776
+ params: dict[str, Any] = {}
777
+ for idx, value in enumerate(sorted(team_values)):
778
+ key = f"team_{idx}"
779
+ clauses.append(f":{key}")
780
+ params[key] = value
781
+ conn.execute(
782
+ text(f"DELETE FROM cached_projected_lineups WHERE team_name_norm IN ({', '.join(clauses)})"),
783
+ params,
784
+ )
785
+ if df.empty:
786
+ return
787
+ _bulk_insert(conn, "cached_projected_lineups", df)
788
+
789
+
790
+ def read_cached_projected_lineups(
791
+ conn,
792
+ team_names_norm: Iterable[str] | None = None,
793
+ ) -> dict[str, dict[str, Any]]:
794
+ team_names = [str(v or "").strip() for v in (team_names_norm or []) if str(v or "").strip()]
795
+ if team_names:
796
+ clauses = []
797
+ params: dict[str, Any] = {}
798
+ for idx, team_name in enumerate(team_names):
799
+ key = f"team_{idx}"
800
+ clauses.append(f":{key}")
801
+ params[key] = team_name
802
+ df = pd.read_sql(
803
+ text(
804
+ f"SELECT * FROM cached_projected_lineups WHERE team_name_norm IN ({', '.join(clauses)})"
805
+ ),
806
+ conn,
807
+ params=params,
808
+ )
809
+ else:
810
+ df = pd.read_sql(text("SELECT * FROM cached_projected_lineups"), conn)
811
+ if df.empty:
812
+ return {}
813
+ out: dict[str, dict[str, Any]] = {}
814
+ for _, row in df.iterrows():
815
+ team_norm = str(row.get("team_name_norm") or "").strip()
816
+ if not team_norm:
817
+ continue
818
+ out[team_norm] = {
819
+ "team_name": str(row.get("team_name") or "").strip(),
820
+ "source": str(row.get("source") or "").strip(),
821
+ "lineup_vs_rhp": _safe_json_load(row.get("lineup_vs_rhp_json"), []),
822
+ "lineup_vs_lhp": _safe_json_load(row.get("lineup_vs_lhp_json"), []),
823
+ "rotation": _safe_json_load(row.get("rotation_json"), []),
824
+ }
825
+ return out
826
+
827
+
828
+ def read_cached_projected_lineups_meta(conn) -> pd.DataFrame:
829
+ return pd.read_sql(
830
+ text(
831
+ """
832
+ SELECT fetched_at, COUNT(*) AS team_count
833
+ FROM cached_projected_lineups
834
+ GROUP BY fetched_at
835
+ ORDER BY fetched_at DESC
836
+ """
837
+ ),
838
+ conn,
839
+ )
840
+
841
+
842
+ def replace_cached_upcoming_props_bundle(
843
+ conn,
844
+ bundle: Mapping[str, pd.DataFrame] | None,
845
+ cache_key: str = "default",
846
+ ) -> None:
847
+ bundle = dict(bundle or {})
848
+ merged = bundle.get("merged_props_feed", pd.DataFrame())
849
+ coverage = bundle.get("coverage_summary", pd.DataFrame())
850
+ odds_api_raw = bundle.get("odds_api_raw", pd.DataFrame())
851
+ scraper_raw = bundle.get("scraper_raw", pd.DataFrame())
852
+
853
+ fetched_at = _latest_fetched_at(merged if isinstance(merged, pd.DataFrame) else pd.DataFrame())
854
+ feed_df = pd.DataFrame(
855
+ [
856
+ {
857
+ "fetched_at": fetched_at,
858
+ "cache_key": cache_key,
859
+ "row_count": int(len(merged)) if isinstance(merged, pd.DataFrame) else 0,
860
+ "payload_json": _safe_json_dump(
861
+ [] if merged is None or not isinstance(merged, pd.DataFrame)
862
+ else merged.where(merged.notna(), other=None).to_dict("records")
863
+ ),
864
+ }
865
+ ]
866
+ )
867
+ meta_df = pd.DataFrame(
868
+ [
869
+ {
870
+ "fetched_at": fetched_at,
871
+ "cache_key": cache_key,
872
+ "merged_row_count": int(len(merged)) if isinstance(merged, pd.DataFrame) else 0,
873
+ "odds_api_row_count": int(len(odds_api_raw)) if isinstance(odds_api_raw, pd.DataFrame) else 0,
874
+ "scraper_row_count": int(len(scraper_raw)) if isinstance(scraper_raw, pd.DataFrame) else 0,
875
+ "coverage_summary_json": _safe_json_dump(
876
+ [] if coverage is None or not isinstance(coverage, pd.DataFrame)
877
+ else coverage.where(coverage.notna(), other=None).to_dict("records")
878
+ ),
879
+ }
880
+ ]
881
+ )
882
+ replace_table_contents(conn, "cached_upcoming_props_feed", feed_df)
883
+ replace_table_contents(conn, "cached_upcoming_props_bundle_meta", meta_df)
884
+
885
+
886
+ def read_cached_upcoming_props_bundle(
887
+ conn,
888
+ cache_key: str = "default",
889
+ ) -> dict[str, pd.DataFrame]:
890
+ feed_df = pd.read_sql(
891
+ text(
892
+ """
893
+ SELECT * FROM cached_upcoming_props_feed
894
+ WHERE cache_key = :cache_key
895
+ ORDER BY fetched_at DESC
896
+ LIMIT 1
897
+ """
898
+ ),
899
+ conn,
900
+ params={"cache_key": cache_key},
901
+ )
902
+ meta_df = pd.read_sql(
903
+ text(
904
+ """
905
+ SELECT * FROM cached_upcoming_props_bundle_meta
906
+ WHERE cache_key = :cache_key
907
+ ORDER BY fetched_at DESC
908
+ LIMIT 1
909
+ """
910
+ ),
911
+ conn,
912
+ params={"cache_key": cache_key},
913
+ )
914
+
915
+ merged = pd.DataFrame(_safe_json_load(feed_df.iloc[0]["payload_json"], [])) if not feed_df.empty else pd.DataFrame()
916
+ coverage = pd.DataFrame(_safe_json_load(meta_df.iloc[0]["coverage_summary_json"], [])) if not meta_df.empty else pd.DataFrame()
917
+ return {
918
+ "merged_props_feed": merged,
919
+ "coverage_summary": coverage,
920
+ "cache_meta": meta_df,
921
+ }
922
+
923
+
924
  # ---------------------------------------------------------------------------
925
  # Bets
926
  # ---------------------------------------------------------------------------
 
1388
  )
1389
 
1390
 
1391
+ # ---------------------------------------------------------------------------
1392
+ # Shared baseline snapshots
1393
+ # ---------------------------------------------------------------------------
1394
+
1395
+ def ensure_shared_baseline_snapshot_tables(conn) -> None:
1396
+ conn.execute(text(
1397
+ """
1398
+ CREATE TABLE IF NOT EXISTS shared_hitter_baseline_snapshot (
1399
+ player_name TEXT,
1400
+ source_row_count INTEGER,
1401
+ payload_json TEXT,
1402
+ snapshot_built_at TEXT,
1403
+ snapshot_version TEXT,
1404
+ source_status TEXT
1405
+ )
1406
+ """
1407
+ ))
1408
+ conn.execute(text(
1409
+ """
1410
+ CREATE TABLE IF NOT EXISTS shared_pitcher_baseline_snapshot (
1411
+ player_name TEXT,
1412
+ source_row_count INTEGER,
1413
+ payload_json TEXT,
1414
+ snapshot_built_at TEXT,
1415
+ snapshot_version TEXT,
1416
+ source_status TEXT
1417
+ )
1418
+ """
1419
+ ))
1420
+ conn.execute(text(
1421
+ """
1422
+ CREATE TABLE IF NOT EXISTS shared_hitter_baseline_meta (
1423
+ player_name TEXT,
1424
+ baseline_role TEXT,
1425
+ baseline_mode TEXT,
1426
+ prior_sample_size INTEGER,
1427
+ season_2026_sample_size INTEGER,
1428
+ prior_weight DOUBLE PRECISION,
1429
+ season_2026_weight DOUBLE PRECISION,
1430
+ baseline_driver TEXT,
1431
+ rolling_overlay_active BOOLEAN,
1432
+ snapshot_built_at TEXT,
1433
+ snapshot_version TEXT,
1434
+ source_status TEXT
1435
+ )
1436
+ """
1437
+ ))
1438
+ conn.execute(text(
1439
+ """
1440
+ CREATE TABLE IF NOT EXISTS shared_pitcher_baseline_meta (
1441
+ player_name TEXT,
1442
+ baseline_role TEXT,
1443
+ baseline_mode TEXT,
1444
+ prior_sample_size INTEGER,
1445
+ season_2026_sample_size INTEGER,
1446
+ prior_weight DOUBLE PRECISION,
1447
+ season_2026_weight DOUBLE PRECISION,
1448
+ baseline_driver TEXT,
1449
+ rolling_overlay_active BOOLEAN,
1450
+ snapshot_built_at TEXT,
1451
+ snapshot_version TEXT,
1452
+ source_status TEXT
1453
+ )
1454
+ """
1455
+ ))
1456
+ conn.execute(text(
1457
+ """
1458
+ CREATE TABLE IF NOT EXISTS shared_hitter_rolling_snapshot (
1459
+ player_name TEXT,
1460
+ source_row_count INTEGER,
1461
+ payload_json TEXT,
1462
+ snapshot_built_at TEXT,
1463
+ snapshot_version TEXT,
1464
+ source_status TEXT
1465
+ )
1466
+ """
1467
+ ))
1468
+ conn.execute(text(
1469
+ """
1470
+ CREATE TABLE IF NOT EXISTS shared_pitcher_rolling_snapshot (
1471
+ player_name TEXT,
1472
+ source_row_count INTEGER,
1473
+ payload_json TEXT,
1474
+ snapshot_built_at TEXT,
1475
+ snapshot_version TEXT,
1476
+ source_status TEXT
1477
+ )
1478
+ """
1479
+ ))
1480
+
1481
+ for _table in [
1482
+ "shared_hitter_baseline_snapshot",
1483
+ "shared_pitcher_baseline_snapshot",
1484
+ "shared_hitter_baseline_meta",
1485
+ "shared_pitcher_baseline_meta",
1486
+ "shared_hitter_rolling_snapshot",
1487
+ "shared_pitcher_rolling_snapshot",
1488
+ ]:
1489
+ try:
1490
+ conn.execute(text(
1491
+ f"CREATE UNIQUE INDEX IF NOT EXISTS uq_{_table}_player_name "
1492
+ f"ON {_table} (player_name)"
1493
+ ))
1494
+ except Exception:
1495
+ pass
1496
+
1497
+
1498
  # ---------------------------------------------------------------------------
1499
  # Batter prop audit view
1500
  # ---------------------------------------------------------------------------
models/hr_probability_engine.py CHANGED
@@ -85,10 +85,19 @@ def _empty_result(player_name: str, mode: str) -> dict[str, Any]:
85
  "matchup_platoon_multiplier": 1.0,
86
  "matchup_platoon_reason": "unknown",
87
  "pitcher_reliability": 0.0,
 
88
  "trend_reliability": 0.0,
89
  "zone_reliability": 0.0,
 
 
90
  "family_zone_reliability": 0.0,
 
 
 
91
  "arsenal_reliability": 0.0,
 
 
 
92
  "pulled_contact_reliability": 0.0,
93
  "environment_reliability": 0.0,
94
  "trajectory_reliability": 0.0,
@@ -96,6 +105,7 @@ def _empty_result(player_name: str, mode: str) -> dict[str, Any]:
96
  "opportunity_reliability": 0.0,
97
  "model_voice_reason_candidates": [],
98
  "model_voice_tags": [],
 
99
  }
100
 
101
 
@@ -387,6 +397,11 @@ def build_hr_probability_result(
387
  )
388
  pitcher_reliability = _sample_reliability(pitcher_row.get("sample_size"), 180.0)
389
  result["pitcher_reliability"] = pitcher_reliability
 
 
 
 
 
390
  result["pitcher_hr_adjustment"] = _apply_reliability(
391
  _safe_float(pitcher_adj.get("hr_adj")),
392
  pitcher_reliability,
@@ -437,6 +452,8 @@ def build_hr_probability_result(
437
  _sample_reliability(pitcher_row.get("sample_size"), 180.0),
438
  )
439
  zone_eff = 0.0
 
 
440
  try:
441
  from models.batter_zone_model import build_batter_zone_feature_row
442
  from models.pitcher_zone_model import build_pitcher_zone_feature_row
@@ -449,10 +466,13 @@ def build_hr_probability_result(
449
  pitcher_zone_row=pitcher_zone_row,
450
  )
451
  zone_eff = _safe_float(zone_matchup_adj.get("hr_zone_boost")) * 0.10
 
452
  except Exception:
453
  skipped_layers.append("zone_matchup_unavailable")
454
 
455
  family_zone_eff = 0.0
 
 
456
  try:
457
  from models.family_zone_profile_store import (
458
  build_batter_family_zone_feature_row,
@@ -469,6 +489,8 @@ def build_hr_probability_result(
469
  family_zone_eff = _safe_float(
470
  family_zone_matchup_adj.get("family_zone_hr_boost")
471
  ) * 0.07
 
 
472
  except Exception:
473
  skipped_layers.append("family_zone_db_unavailable")
474
 
@@ -487,6 +509,18 @@ def build_hr_probability_result(
487
  family_zone_eff * matchup_multiplier,
488
  matchup_reliability,
489
  )
 
 
 
 
 
 
 
 
 
 
 
 
490
 
491
  hr_prob = _clamp(hr_prob + result["zone_hr_adjustment"], 0.005, 0.25)
492
  hr_prob = _clamp(hr_prob + result["family_zone_hr_adjustment"], 0.005, 0.25)
@@ -510,6 +544,8 @@ def build_hr_probability_result(
510
  )
511
 
512
  arsenal_eff = 0.0
 
 
513
  try:
514
  from models.arsenal_matchup_model import compute_arsenal_matchup_adjustment
515
  from models.batter_arsenal_model import build_batter_arsenal_feature_row
@@ -524,11 +560,19 @@ def build_hr_probability_result(
524
  arsenal_eff = (
525
  _safe_float(arsenal_matchup_adj.get("arsenal_hr_boost")) * 0.05
526
  ) * matchup_multiplier
 
 
527
  except Exception:
528
  skipped_layers.append("arsenal_matchup_unavailable")
529
 
530
  result["arsenal_reliability"] = matchup_reliability
531
  result["arsenal_hr_adjustment"] = _apply_reliability(arsenal_eff, matchup_reliability)
 
 
 
 
 
 
532
  hr_prob = _clamp(hr_prob + result["arsenal_hr_adjustment"], 0.005, 0.25)
533
  if abs(result["arsenal_hr_adjustment"]) > 1e-6:
534
  applied_layers.append("arsenal")
@@ -554,6 +598,9 @@ def build_hr_probability_result(
554
  )
555
  else:
556
  skipped_layers.extend(["pitcher_missing", "zone_matchup_unavailable", "arsenal_matchup_unavailable"])
 
 
 
557
 
558
  result["pulled_contact_reliability"] = _sample_reliability(batter_pa, 155.0)
559
  result["pulled_contact_hr_adjustment"] = _apply_reliability(
@@ -764,6 +811,7 @@ def build_hr_probability_result(
764
  )
765
  result["model_voice_reason_candidates"] = ranked_reasons
766
  result["model_voice_tags"] = [str(item.get("template_key") or "") for item in ranked_reasons if str(item.get("template_key") or "").strip()]
 
767
  result["pregame_context_applied"] = any(
768
  abs(_safe_float(result.get(key))) > 1e-6
769
  for key in [
 
85
  "matchup_platoon_multiplier": 1.0,
86
  "matchup_platoon_reason": "unknown",
87
  "pitcher_reliability": 0.0,
88
+ "pitcher_resolution_status": "pitcher_missing",
89
  "trend_reliability": 0.0,
90
  "zone_reliability": 0.0,
91
+ "zone_status": "unavailable",
92
+ "zone_store_sample_size": 0,
93
  "family_zone_reliability": 0.0,
94
+ "family_zone_status": "unavailable",
95
+ "family_zone_batter_sample_size": 0,
96
+ "family_zone_pitcher_sample_size": 0,
97
  "arsenal_reliability": 0.0,
98
+ "arsenal_status": "unavailable",
99
+ "arsenal_batter_sample_size": 0,
100
+ "arsenal_pitcher_sample_size": 0,
101
  "pulled_contact_reliability": 0.0,
102
  "environment_reliability": 0.0,
103
  "trajectory_reliability": 0.0,
 
105
  "opportunity_reliability": 0.0,
106
  "model_voice_reason_candidates": [],
107
  "model_voice_tags": [],
108
+ "reason_candidate_count": 0,
109
  }
110
 
111
 
 
397
  )
398
  pitcher_reliability = _sample_reliability(pitcher_row.get("sample_size"), 180.0)
399
  result["pitcher_reliability"] = pitcher_reliability
400
+ result["pitcher_resolution_status"] = (
401
+ "resolved" if result["pitcher_name"] and _safe_float(pitcher_row.get("sample_size"), 0.0) > 0 else
402
+ "resolved_no_pitcher_statcast" if result["pitcher_name"] else
403
+ "pitcher_missing"
404
+ )
405
  result["pitcher_hr_adjustment"] = _apply_reliability(
406
  _safe_float(pitcher_adj.get("hr_adj")),
407
  pitcher_reliability,
 
452
  _sample_reliability(pitcher_row.get("sample_size"), 180.0),
453
  )
454
  zone_eff = 0.0
455
+ batter_zone_row: dict[str, Any] = {}
456
+ pitcher_zone_row: dict[str, Any] = {}
457
  try:
458
  from models.batter_zone_model import build_batter_zone_feature_row
459
  from models.pitcher_zone_model import build_pitcher_zone_feature_row
 
466
  pitcher_zone_row=pitcher_zone_row,
467
  )
468
  zone_eff = _safe_float(zone_matchup_adj.get("hr_zone_boost")) * 0.10
469
+ result["zone_store_sample_size"] = int(_safe_float(batter_zone_row.get("zone_sample_size"), 0.0) or 0.0)
470
  except Exception:
471
  skipped_layers.append("zone_matchup_unavailable")
472
 
473
  family_zone_eff = 0.0
474
+ batter_family_zone_row: dict[str, Any] = {}
475
+ pitcher_family_zone_row: dict[str, Any] = {}
476
  try:
477
  from models.family_zone_profile_store import (
478
  build_batter_family_zone_feature_row,
 
489
  family_zone_eff = _safe_float(
490
  family_zone_matchup_adj.get("family_zone_hr_boost")
491
  ) * 0.07
492
+ result["family_zone_batter_sample_size"] = int(_safe_float(batter_family_zone_row.get("family_zone_sample_size"), 0.0) or 0.0)
493
+ result["family_zone_pitcher_sample_size"] = int(_safe_float(pitcher_family_zone_row.get("family_zone_sample_size"), 0.0) or 0.0)
494
  except Exception:
495
  skipped_layers.append("family_zone_db_unavailable")
496
 
 
509
  family_zone_eff * matchup_multiplier,
510
  matchup_reliability,
511
  )
512
+ result["zone_status"] = (
513
+ "applied" if abs(result["zone_hr_adjustment"]) > 1e-6 else
514
+ "missing_batter_zone_profile" if int(_safe_float(batter_zone_row.get("zone_sample_size"), 0.0) or 0.0) <= 0 else
515
+ "missing_pitcher_zone_profile" if int(_safe_float(pitcher_zone_row.get("zone_sample_size"), 0.0) or 0.0) <= 0 else
516
+ "available_zero_effect"
517
+ )
518
+ result["family_zone_status"] = (
519
+ "applied" if abs(result["family_zone_hr_adjustment"]) > 1e-6 else
520
+ "missing_batter_family_zone_profile" if int(_safe_float(batter_family_zone_row.get("family_zone_sample_size"), 0.0) or 0.0) <= 0 else
521
+ "missing_pitcher_family_zone_profile" if int(_safe_float(pitcher_family_zone_row.get("family_zone_sample_size"), 0.0) or 0.0) <= 0 else
522
+ "available_zero_effect"
523
+ )
524
 
525
  hr_prob = _clamp(hr_prob + result["zone_hr_adjustment"], 0.005, 0.25)
526
  hr_prob = _clamp(hr_prob + result["family_zone_hr_adjustment"], 0.005, 0.25)
 
544
  )
545
 
546
  arsenal_eff = 0.0
547
+ batter_arsenal_row: dict[str, Any] = {}
548
+ pitcher_arsenal_row: dict[str, Any] = {}
549
  try:
550
  from models.arsenal_matchup_model import compute_arsenal_matchup_adjustment
551
  from models.batter_arsenal_model import build_batter_arsenal_feature_row
 
560
  arsenal_eff = (
561
  _safe_float(arsenal_matchup_adj.get("arsenal_hr_boost")) * 0.05
562
  ) * matchup_multiplier
563
+ result["arsenal_batter_sample_size"] = int(_safe_float(batter_arsenal_row.get("arsenal_sample_size"), 0.0) or 0.0)
564
+ result["arsenal_pitcher_sample_size"] = int(_safe_float(pitcher_arsenal_row.get("arsenal_sample_size"), 0.0) or 0.0)
565
  except Exception:
566
  skipped_layers.append("arsenal_matchup_unavailable")
567
 
568
  result["arsenal_reliability"] = matchup_reliability
569
  result["arsenal_hr_adjustment"] = _apply_reliability(arsenal_eff, matchup_reliability)
570
+ result["arsenal_status"] = (
571
+ "applied" if abs(result["arsenal_hr_adjustment"]) > 1e-6 else
572
+ "missing_batter_arsenal_profile" if int(_safe_float(batter_arsenal_row.get("arsenal_sample_size"), 0.0) or 0.0) <= 0 else
573
+ "missing_pitcher_arsenal_profile" if int(_safe_float(pitcher_arsenal_row.get("arsenal_sample_size"), 0.0) or 0.0) <= 0 else
574
+ "available_zero_effect"
575
+ )
576
  hr_prob = _clamp(hr_prob + result["arsenal_hr_adjustment"], 0.005, 0.25)
577
  if abs(result["arsenal_hr_adjustment"]) > 1e-6:
578
  applied_layers.append("arsenal")
 
598
  )
599
  else:
600
  skipped_layers.extend(["pitcher_missing", "zone_matchup_unavailable", "arsenal_matchup_unavailable"])
601
+ result["zone_status"] = "missing_pitcher_identity"
602
+ result["family_zone_status"] = "missing_pitcher_identity"
603
+ result["arsenal_status"] = "missing_pitcher_identity"
604
 
605
  result["pulled_contact_reliability"] = _sample_reliability(batter_pa, 155.0)
606
  result["pulled_contact_hr_adjustment"] = _apply_reliability(
 
811
  )
812
  result["model_voice_reason_candidates"] = ranked_reasons
813
  result["model_voice_tags"] = [str(item.get("template_key") or "") for item in ranked_reasons if str(item.get("template_key") or "").strip()]
814
+ result["reason_candidate_count"] = len(ranked_reasons)
815
  result["pregame_context_applied"] = any(
816
  abs(_safe_float(result.get(key))) > 1e-6
817
  for key in [
visualization/debug_page.py CHANGED
@@ -37,6 +37,11 @@ from analytics.recommendation_engine import build_upcoming_hitter_recommendation
37
  from database.db import (
38
  read_batter_prop_audit_view,
39
  read_batter_prop_outcomes,
 
 
 
 
 
40
  read_game_outcomes,
41
  read_recommendation_audit_view,
42
  read_table,
@@ -845,6 +850,20 @@ def render_debug(
845
  baseline_summary_frames: list[pd.DataFrame] = []
846
  batter_meta = (baseline_bundle or {}).get("batter_baseline_meta", pd.DataFrame())
847
  pitcher_meta = (baseline_bundle or {}).get("pitcher_baseline_meta", pd.DataFrame())
 
 
 
 
 
 
 
 
 
 
 
 
 
 
848
 
849
  if isinstance(batter_meta, pd.DataFrame) and not batter_meta.empty:
850
  batter_display = batter_meta.copy()
@@ -897,6 +916,131 @@ def render_debug(
897
  else:
898
  st.info("Shared baseline metadata is not loaded.")
899
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
900
  st.subheader("Upcoming Props Feed Diagnostics")
901
 
902
  props_debug = upcoming_props_debug or {}
@@ -904,6 +1048,19 @@ def render_debug(
904
  odds_api_raw_df = props_debug.get("odds_api_raw", pd.DataFrame())
905
  scraper_raw_df = props_debug.get("scraper_raw", pd.DataFrame())
906
  merged_props_df = props_debug.get("merged_props_feed", pd.DataFrame())
 
 
 
 
 
 
 
 
 
 
 
 
 
907
 
908
  if not coverage_summary_df.empty:
909
  st.write("Coverage Summary")
 
37
  from database.db import (
38
  read_batter_prop_audit_view,
39
  read_batter_prop_outcomes,
40
+ read_cached_probable_starters_meta,
41
+ read_cached_projected_lineups_meta,
42
+ read_cached_upcoming_props_bundle,
43
+ read_cached_schedule_for_date,
44
+ read_cached_odds,
45
  read_game_outcomes,
46
  read_recommendation_audit_view,
47
  read_table,
 
850
  baseline_summary_frames: list[pd.DataFrame] = []
851
  batter_meta = (baseline_bundle or {}).get("batter_baseline_meta", pd.DataFrame())
852
  pitcher_meta = (baseline_bundle or {}).get("pitcher_baseline_meta", pd.DataFrame())
853
+ snapshot_status = (baseline_bundle or {}).get("snapshot_status", pd.DataFrame())
854
+ hitter_rolling_snapshot = (baseline_bundle or {}).get("hitter_rolling_snapshot", pd.DataFrame())
855
+ pitcher_rolling_snapshot = (baseline_bundle or {}).get("pitcher_rolling_snapshot", pd.DataFrame())
856
+
857
+ source_status = str((baseline_bundle or {}).get("snapshot_source_status") or "unknown")
858
+ runtime_fallback_used = bool((baseline_bundle or {}).get("runtime_fallback_used"))
859
+
860
+ c1, c2 = st.columns(2)
861
+ c1.metric("Baseline Source", source_status.replace("_", " ").title())
862
+ c2.metric("Runtime Fallback Used", "Yes" if runtime_fallback_used else "No")
863
+
864
+ if isinstance(snapshot_status, pd.DataFrame) and not snapshot_status.empty:
865
+ st.write("Snapshot Freshness")
866
+ st.dataframe(snapshot_status, use_container_width=True, hide_index=True)
867
 
868
  if isinstance(batter_meta, pd.DataFrame) and not batter_meta.empty:
869
  batter_display = batter_meta.copy()
 
916
  else:
917
  st.info("Shared baseline metadata is not loaded.")
918
 
919
+ rolling_summary_frames: list[pd.DataFrame] = []
920
+ if isinstance(hitter_rolling_snapshot, pd.DataFrame) and not hitter_rolling_snapshot.empty:
921
+ hitter_roll = hitter_rolling_snapshot.copy()
922
+ hitter_roll["baseline_role"] = "batter"
923
+ rolling_summary_frames.append(
924
+ hitter_roll[
925
+ [
926
+ c for c in [
927
+ "baseline_role",
928
+ "player_name",
929
+ "batter_games_in_window_5g",
930
+ "batter_games_in_window_10g",
931
+ "batter_recent_form_available",
932
+ "snapshot_built_at",
933
+ "source_status",
934
+ ] if c in hitter_roll.columns
935
+ ]
936
+ ]
937
+ )
938
+
939
+ if isinstance(pitcher_rolling_snapshot, pd.DataFrame) and not pitcher_rolling_snapshot.empty:
940
+ pitcher_roll = pitcher_rolling_snapshot.copy()
941
+ pitcher_roll["baseline_role"] = "pitcher"
942
+ rolling_summary_frames.append(
943
+ pitcher_roll[
944
+ [
945
+ c for c in [
946
+ "baseline_role",
947
+ "player_name",
948
+ "pitcher_games_in_window_5g",
949
+ "pitcher_games_in_window_10g",
950
+ "pitcher_recent_form_available",
951
+ "pitcher_rolling_confidence",
952
+ "snapshot_built_at",
953
+ "source_status",
954
+ ] if c in pitcher_roll.columns
955
+ ]
956
+ ]
957
+ )
958
+
959
+ if rolling_summary_frames:
960
+ st.write("Rolling Snapshot Diagnostics")
961
+ st.dataframe(
962
+ pd.concat(rolling_summary_frames, ignore_index=True),
963
+ use_container_width=True,
964
+ hide_index=True,
965
+ )
966
+
967
+ with st.expander("Cached Source Freshness", expanded=False):
968
+ freshness_rows: list[dict[str, Any]] = []
969
+
970
+ try:
971
+ schedule_cached = read_cached_schedule_for_date(conn, current_wbc_date_str())
972
+ freshness_rows.append(
973
+ {
974
+ "source": "cached_schedule",
975
+ "row_count": int(len(schedule_cached)),
976
+ "latest_fetched_at": (
977
+ pd.to_datetime(schedule_cached["fetched_at"], errors="coerce").max()
978
+ if not schedule_cached.empty and "fetched_at" in schedule_cached.columns
979
+ else None
980
+ ),
981
+ }
982
+ )
983
+ except Exception:
984
+ pass
985
+
986
+ try:
987
+ odds_cached = read_cached_odds(conn)
988
+ freshness_rows.append(
989
+ {
990
+ "source": "cached_odds",
991
+ "row_count": int(len(odds_cached)),
992
+ "latest_fetched_at": (
993
+ pd.to_datetime(odds_cached["fetched_at"], errors="coerce").max()
994
+ if not odds_cached.empty and "fetched_at" in odds_cached.columns
995
+ else None
996
+ ),
997
+ }
998
+ )
999
+ except Exception:
1000
+ pass
1001
+
1002
+ try:
1003
+ starters_meta = read_cached_probable_starters_meta(conn)
1004
+ freshness_rows.append(
1005
+ {
1006
+ "source": "cached_probable_starters",
1007
+ "row_count": int(starters_meta.iloc[0]["matchup_count"]) if not starters_meta.empty else 0,
1008
+ "latest_fetched_at": starters_meta.iloc[0]["fetched_at"] if not starters_meta.empty else None,
1009
+ }
1010
+ )
1011
+ except Exception:
1012
+ pass
1013
+
1014
+ try:
1015
+ lineups_meta = read_cached_projected_lineups_meta(conn)
1016
+ freshness_rows.append(
1017
+ {
1018
+ "source": "cached_projected_lineups",
1019
+ "row_count": int(lineups_meta.iloc[0]["team_count"]) if not lineups_meta.empty else 0,
1020
+ "latest_fetched_at": lineups_meta.iloc[0]["fetched_at"] if not lineups_meta.empty else None,
1021
+ }
1022
+ )
1023
+ except Exception:
1024
+ pass
1025
+
1026
+ try:
1027
+ props_cache = read_cached_upcoming_props_bundle(conn, cache_key="default")
1028
+ props_meta = props_cache.get("cache_meta", pd.DataFrame())
1029
+ freshness_rows.append(
1030
+ {
1031
+ "source": "cached_upcoming_props_bundle",
1032
+ "row_count": int(props_meta.iloc[0]["merged_row_count"]) if not props_meta.empty else 0,
1033
+ "latest_fetched_at": props_meta.iloc[0]["fetched_at"] if not props_meta.empty else None,
1034
+ }
1035
+ )
1036
+ except Exception:
1037
+ pass
1038
+
1039
+ if freshness_rows:
1040
+ st.dataframe(pd.DataFrame(freshness_rows), use_container_width=True, hide_index=True)
1041
+ else:
1042
+ st.info("No cached source freshness rows available.")
1043
+
1044
  st.subheader("Upcoming Props Feed Diagnostics")
1045
 
1046
  props_debug = upcoming_props_debug or {}
 
1048
  odds_api_raw_df = props_debug.get("odds_api_raw", pd.DataFrame())
1049
  scraper_raw_df = props_debug.get("scraper_raw", pd.DataFrame())
1050
  merged_props_df = props_debug.get("merged_props_feed", pd.DataFrame())
1051
+ props_cache_meta = props_debug.get("cache_meta", pd.DataFrame())
1052
+ props_cache_source = str(props_debug.get("cache_source") or "unknown")
1053
+
1054
+ c1, c2 = st.columns(2)
1055
+ c1.metric("Props Cache Source", props_cache_source.replace("_", " ").title())
1056
+ c2.metric(
1057
+ "Props Cached Rows",
1058
+ int(props_cache_meta.iloc[0]["merged_row_count"]) if isinstance(props_cache_meta, pd.DataFrame) and not props_cache_meta.empty and "merged_row_count" in props_cache_meta.columns else int(len(merged_props_df)),
1059
+ )
1060
+
1061
+ if isinstance(props_cache_meta, pd.DataFrame) and not props_cache_meta.empty:
1062
+ st.write("Props Bundle Cache Meta")
1063
+ st.dataframe(props_cache_meta, use_container_width=True, hide_index=True)
1064
 
1065
  if not coverage_summary_df.empty:
1066
  st.write("Coverage Summary")
visualization/props_page.py CHANGED
@@ -20,7 +20,14 @@ from analytics.props_mapper import map_props_to_models
20
  from analytics.props_view_model import build_hr_props_view_model, select_best_lines_per_prop
21
  from config.settings import DEFAULT_PROP_BOOKS
22
  from data.live_prop_odds import fetch_all_upcoming_hr_props
23
- from database.db import ensure_upcoming_hr_props_table, insert_upcoming_hr_props
 
 
 
 
 
 
 
24
  from utils.helpers import utc_now_iso
25
 
26
 
@@ -425,7 +432,38 @@ def _load_projected_lineups_for_props(teams: tuple[str, ...]) -> dict[str, dict[
425
 
426
  if not teams:
427
  return {}
428
- return fetch_projected_lineups_for_teams(list(teams))
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
429
 
430
 
431
  def _render_empty_props_state() -> None:
 
20
  from analytics.props_view_model import build_hr_props_view_model, select_best_lines_per_prop
21
  from config.settings import DEFAULT_PROP_BOOKS
22
  from data.live_prop_odds import fetch_all_upcoming_hr_props
23
+ from database.db import (
24
+ ensure_upcoming_hr_props_table,
25
+ get_connection,
26
+ insert_upcoming_hr_props,
27
+ read_cached_projected_lineups,
28
+ read_cached_projected_lineups_meta,
29
+ replace_cached_projected_lineups,
30
+ )
31
  from utils.helpers import utc_now_iso
32
 
33
 
 
432
 
433
  if not teams:
434
  return {}
435
+ team_names = tuple(sorted({str(team or "").strip().lower() for team in teams if str(team or "").strip()}))
436
+
437
+ conn = get_connection()
438
+ try:
439
+ cached_meta = read_cached_projected_lineups_meta(conn)
440
+ if not cached_meta.empty:
441
+ try:
442
+ latest = pd.to_datetime(cached_meta.iloc[0]["fetched_at"], errors="coerce", utc=True)
443
+ if pd.notna(latest):
444
+ age_seconds = float((pd.Timestamp.now(tz="UTC") - latest).total_seconds())
445
+ if age_seconds <= float(60 * 60 * 6):
446
+ cached = read_cached_projected_lineups(conn, team_names_norm=team_names)
447
+ if cached:
448
+ return cached
449
+ except Exception:
450
+ pass
451
+ finally:
452
+ try:
453
+ conn.close()
454
+ except Exception:
455
+ pass
456
+
457
+ fresh = fetch_projected_lineups_for_teams(list(teams))
458
+ try:
459
+ conn = get_connection()
460
+ try:
461
+ replace_cached_projected_lineups(conn, fresh)
462
+ finally:
463
+ conn.close()
464
+ except Exception:
465
+ pass
466
+ return fresh
467
 
468
 
469
  def _render_empty_props_state() -> None: