Dmitry Beresnev commited on
Commit
fc67a49
·
1 Parent(s): 20fa678

Add earnings calendar column to Market Intelligence and fix thread exhaustion

Browse files

- Add 4th column to Market Intelligence showing upcoming earnings reports
for top companies as a scrollable table (date, EPS estimate, urgency
color coding). Data fetched from Alpha Vantage EARNINGS_CALENDAR with
StockAnalysis.com as fallback, no mock data.
- Fix RuntimeError: can't start new thread by reducing ThreadPoolExecutor
workers from 8→4 and adding a sequential fallback when thread creation
fails.
- Harden source count references to use _TOTAL_SOURCES instead of
hardcoded "8 sources".

app/components/news.py CHANGED
@@ -721,3 +721,89 @@ def display_economic_calendar_widget(events_df: pd.DataFrame):
721
  widget_html += "</div>"
722
 
723
  st.markdown(widget_html, unsafe_allow_html=True)
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
721
  widget_html += "</div>"
722
 
723
  st.markdown(widget_html, unsafe_allow_html=True)
724
+
725
+
726
+ def display_earnings_calendar(entries: list, height: str = "600px"):
727
+ """Display upcoming earnings reports as a scrollable table."""
728
+
729
+ header_html = """<div style="background: linear-gradient(135deg, #2A2E39 0%, #1E222D 100%); border: 1px solid #363A45; border-radius: 8px 8px 0 0; padding: 16px 20px;">
730
+ <div style="display: flex; justify-content: space-between; align-items: center;">
731
+ <div>
732
+ <h3 style="color: #D1D4DC; margin: 0; font-size: 18px; font-weight: 600;">📋 Earnings Calendar</h3>
733
+ <p style="color: #787B86; margin: 4px 0 0 0; font-size: 12px;">Upcoming financial reports · Top companies</p>
734
+ </div>
735
+ <div style="background: rgba(56, 97, 251, 0.15); color: #3861FB; padding: 6px 12px; border-radius: 6px; font-size: 13px; font-weight: 600;">{count} reports</div>
736
+ </div>
737
+ </div>""".format(count=len(entries))
738
+
739
+ st.markdown(header_html, unsafe_allow_html=True)
740
+
741
+ if not entries:
742
+ st.markdown("""
743
+ <div style="background: #131722; border: 1px solid #2A2E39; border-radius: 0 0 8px 8px; padding: 30px; text-align: center; color: #787B86;">
744
+ No upcoming earnings data available
745
+ </div>""", unsafe_allow_html=True)
746
+ return
747
+
748
+ # Column header row
749
+ col_header = """<div style="background: #131722; border-left: 1px solid #2A2E39; border-right: 1px solid #2A2E39; padding: 8px 14px; display: flex; gap: 0;">
750
+ <div style="color: #787B86; font-size: 11px; font-weight: 600; text-transform: uppercase; flex: 2;">Company</div>
751
+ <div style="color: #787B86; font-size: 11px; font-weight: 600; text-transform: uppercase; flex: 2; text-align: center;">Date</div>
752
+ <div style="color: #787B86; font-size: 11px; font-weight: 600; text-transform: uppercase; flex: 1; text-align: center;">When</div>
753
+ <div style="color: #787B86; font-size: 11px; font-weight: 600; text-transform: uppercase; flex: 1; text-align: right;">EPS Est.</div>
754
+ </div>"""
755
+
756
+ # Build scrollable rows
757
+ rows_html = ""
758
+ for entry in entries:
759
+ days = entry.get("days_until", 0)
760
+ date: datetime = entry["date"]
761
+ ticker = html_module.escape(entry["ticker"])
762
+ company = html_module.escape(entry["company"])
763
+
764
+ # Urgency color
765
+ if days == 0:
766
+ urgency_color = "#F23645" # red — today
767
+ when_label = "Today"
768
+ elif days == 1:
769
+ urgency_color = "#FF9800" # orange — tomorrow
770
+ when_label = "Tomorrow"
771
+ elif days <= 7:
772
+ urgency_color = "#F0B429" # yellow — this week
773
+ when_label = f"in {days}d"
774
+ else:
775
+ urgency_color = "#787B86" # grey — further out
776
+ when_label = f"in {days}d"
777
+
778
+ date_str = date.strftime("%b %d, %Y")
779
+
780
+ report_time = entry.get("report_time")
781
+ time_badge = ""
782
+ if report_time == "BMO":
783
+ time_badge = ' <span style="color:#089981; font-size:10px; background:rgba(8,153,129,0.1); padding:1px 5px; border-radius:3px;">BMO</span>'
784
+ elif report_time == "AMC":
785
+ time_badge = ' <span style="color:#3861FB; font-size:10px; background:rgba(56,97,251,0.1); padding:1px 5px; border-radius:3px;">AMC</span>'
786
+
787
+ eps = entry.get("eps_estimate")
788
+ eps_str = f"${eps:.2f}" if eps is not None else "—"
789
+
790
+ row = f"""<div style="display: flex; gap: 0; padding: 9px 14px; border-bottom: 1px solid #1E222D; align-items: center;">
791
+ <div style="flex: 2; min-width: 0;">
792
+ <span style="color: #D1D4DC; font-size: 13px; font-weight: 600;">{ticker}</span>
793
+ <span style="color: #787B86; font-size: 11px; margin-left: 5px; white-space: nowrap; overflow: hidden; text-overflow: ellipsis;">{company}</span>
794
+ {time_badge}
795
+ </div>
796
+ <div style="flex: 2; color: #B2B5BE; font-size: 12px; text-align: center;">{date_str}</div>
797
+ <div style="flex: 1; text-align: center;">
798
+ <span style="color: {urgency_color}; font-size: 12px; font-weight: 600;">{when_label}</span>
799
+ </div>
800
+ <div style="flex: 1; color: #D1D4DC; font-size: 13px; font-weight: 500; text-align: right;">{eps_str}</div>
801
+ </div>"""
802
+ rows_html += row
803
+
804
+ scroll_container = f"""<div style="background: #131722; border: 1px solid #2A2E39; border-top: none; border-radius: 0 0 8px 8px; overflow-y: auto; height: {height};">
805
+ {col_header}
806
+ {rows_html}
807
+ </div>"""
808
+
809
+ st.markdown(scroll_container, unsafe_allow_html=True)
app/pages/05_Dashboard.py CHANGED
@@ -23,7 +23,8 @@ from components.news import (
23
  display_scrollable_news_section,
24
  display_prediction_card,
25
  display_economic_event_card,
26
- display_economic_calendar_widget
 
27
  )
28
  from utils.breaking_news_scorer import get_breaking_news_scorer
29
  from utils.ai_summary_store import init_storage, enqueue_items, fetch_summaries, get_status
@@ -78,6 +79,12 @@ try:
78
  except ImportError:
79
  CALENDAR_AVAILABLE = False
80
 
 
 
 
 
 
 
81
 
82
  # ---- Page Configuration ----
83
  st.set_page_config(
@@ -115,6 +122,12 @@ if 'market_events_monitor' not in st.session_state and EVENTS_AVAILABLE:
115
  if 'economic_calendar_service' not in st.session_state and CALENDAR_AVAILABLE:
116
  st.session_state.economic_calendar_service = EconomicCalendarService()
117
 
 
 
 
 
 
 
118
  rss_monitor = st.session_state.get('rss_monitor')
119
  twitter_monitor = st.session_state.get('twitter_monitor')
120
  reddit_monitor = st.session_state.get('reddit_monitor')
@@ -123,6 +136,7 @@ prediction_markets_monitor = st.session_state.get('prediction_markets_monitor')
123
  sectoral_news_monitor = st.session_state.get('sectoral_news_monitor')
124
  market_events_monitor = st.session_state.get('market_events_monitor')
125
  economic_calendar_service = st.session_state.get('economic_calendar_service')
 
126
 
127
  # Initialize unified cache manager
128
  if 'news_cache_manager' not in st.session_state:
@@ -283,6 +297,7 @@ predictions_df = pd.DataFrame()
283
  sectoral_news_df = pd.DataFrame()
284
  market_events_df = pd.DataFrame()
285
  economic_calendar_df = pd.DataFrame()
 
286
 
287
  def fetch_twitter_news():
288
  """Fetch Twitter/X news via cache manager"""
@@ -445,6 +460,16 @@ def fetch_economic_calendar():
445
  return pd.DataFrame(), f"Economic calendar unavailable: {e}"
446
  return pd.DataFrame(), None
447
 
 
 
 
 
 
 
 
 
 
 
448
  # Progressive loading: Display results as they arrive
449
  # Create a status placeholder to show progress
450
  status_placeholder = st.empty()
@@ -459,36 +484,41 @@ _fetch_tasks = [
459
  (fetch_sectoral_news, 'sectoral_news'),
460
  (fetch_market_events, 'market_events'),
461
  (fetch_economic_calendar, 'economic_calendar'),
 
462
  ]
 
463
 
464
- def _apply_result(source_name, result_df, error):
465
  global twitter_df, reddit_df, rss_all_df, rss_main_df, ai_tech_df
466
  global predictions_df, sectoral_news_df, market_events_df, economic_calendar_df
 
467
  if source_name == 'twitter':
468
- twitter_df = result_df
469
  elif source_name == 'reddit':
470
- reddit_df = result_df
471
  elif source_name == 'rss':
472
- rss_all_df = result_df
473
  if not rss_all_df.empty and 'from_web' in rss_all_df.columns:
474
  rss_main_df = rss_all_df[rss_all_df['from_web'] == True].copy()
475
  elif source_name == 'ai_tech':
476
- ai_tech_df = result_df
477
  elif source_name == 'predictions':
478
- predictions_df = result_df
479
  elif source_name == 'sectoral_news':
480
- sectoral_news_df = result_df
481
  elif source_name == 'market_events':
482
- market_events_df = result_df
483
  elif source_name == 'economic_calendar':
484
- economic_calendar_df = result_df
 
 
485
  if error:
486
  fetch_errors.append(error)
487
 
488
  fetch_errors = []
489
  completed_sources = []
490
 
491
- with st.spinner("Loading news from 8 sources..."):
492
  try:
493
  with ThreadPoolExecutor(max_workers=4) as executor:
494
  futures_map = {executor.submit(fn): name for fn, name in _fetch_tasks}
@@ -496,17 +526,17 @@ with st.spinner("Loading news from 8 sources..."):
496
  for future in as_completed(futures_map, timeout=90):
497
  source_name = futures_map[future]
498
  try:
499
- result_df, error = future.result()
500
  completed_sources.append(source_name)
501
- status_placeholder.info(f"🔍 Loaded {len(completed_sources)}/8 sources ({', '.join(completed_sources)})")
502
- _apply_result(source_name, result_df, error)
503
  except Exception as e:
504
  fetch_errors.append(f"Error fetching {source_name} news: {e}")
505
  completed_sources.append(f"{source_name} (error)")
506
  status_placeholder.warning(f"⚠️ {source_name} failed, continuing with other sources...")
507
  except TimeoutError:
508
  fetch_errors.append("⏱️ Some sources timed out after 90 seconds - displaying available results")
509
- status_placeholder.warning(f"⚠️ {len(completed_sources)}/8 sources loaded (some timed out)")
510
  all_sources = set(futures_map.values())
511
  for source in all_sources - set(completed_sources):
512
  fetch_errors.append(f"{source} timed out - skipped")
@@ -516,21 +546,21 @@ with st.spinner("Loading news from 8 sources..."):
516
  fetch_errors.append("⚠️ Thread limit reached, falling back to sequential fetch")
517
  for fn, name in _fetch_tasks:
518
  try:
519
- result_df, error = fn()
520
  completed_sources.append(name)
521
- status_placeholder.info(f"🔍 Loaded {len(completed_sources)}/8 sources (sequential mode)")
522
- _apply_result(name, result_df, error)
523
  except Exception as e:
524
  fetch_errors.append(f"Error fetching {name} news: {e}")
525
  completed_sources.append(f"{name} (error)")
526
 
527
  # Clear the status message after all sources complete
528
- status_placeholder.success(f"✅ Loaded {len(completed_sources)}/8 sources successfully")
529
 
530
  # Debug logging (console only, not displayed on page)
531
  import logging
532
  logger = logging.getLogger(__name__)
533
- logger.info(f"News Fetch Results: Twitter={len(twitter_df)}, Reddit={len(reddit_df)}, RSS={len(rss_all_df)}, AI/Tech={len(ai_tech_df)}, Predictions={len(predictions_df)}, Sectoral={len(sectoral_news_df)}, Events={len(market_events_df)}, Calendar={len(economic_calendar_df)}")
534
  logger.info(f"Availability: Predictions={PREDICTIONS_AVAILABLE}, Sectoral={SECTORAL_AVAILABLE}, Events={EVENTS_AVAILABLE}, Calendar={CALENDAR_AVAILABLE}")
535
  if fetch_errors:
536
  for err in fetch_errors:
@@ -770,7 +800,7 @@ with col4:
770
  st.markdown("---")
771
  st.markdown("## 📊 Market Intelligence - Predictions, Sectors & Events")
772
 
773
- col5, col6, col7 = st.columns(3)
774
 
775
  with col5:
776
  # Prediction Markets Column
@@ -850,6 +880,10 @@ with col7:
850
  </style>
851
  """, unsafe_allow_html=True)
852
 
 
 
 
 
853
  # Display fetch errors in expander (less intrusive)
854
  if 'fetch_errors' in locals() and fetch_errors:
855
  with st.expander("⚠️ Source Fetch Warnings", expanded=False):
 
23
  display_scrollable_news_section,
24
  display_prediction_card,
25
  display_economic_event_card,
26
+ display_economic_calendar_widget,
27
+ display_earnings_calendar,
28
  )
29
  from utils.breaking_news_scorer import get_breaking_news_scorer
30
  from utils.ai_summary_store import init_storage, enqueue_items, fetch_summaries, get_status
 
79
  except ImportError:
80
  CALENDAR_AVAILABLE = False
81
 
82
+ try:
83
+ from services.earnings_calendar import EarningsCalendarService
84
+ EARNINGS_AVAILABLE = True
85
+ except ImportError:
86
+ EARNINGS_AVAILABLE = False
87
+
88
 
89
  # ---- Page Configuration ----
90
  st.set_page_config(
 
122
  if 'economic_calendar_service' not in st.session_state and CALENDAR_AVAILABLE:
123
  st.session_state.economic_calendar_service = EconomicCalendarService()
124
 
125
+ if 'earnings_calendar_service' not in st.session_state and EARNINGS_AVAILABLE:
126
+ from utils.config import Config
127
+ st.session_state.earnings_calendar_service = EarningsCalendarService(
128
+ api_key=Config.ALPHA_VANTAGE_KEY or None
129
+ )
130
+
131
  rss_monitor = st.session_state.get('rss_monitor')
132
  twitter_monitor = st.session_state.get('twitter_monitor')
133
  reddit_monitor = st.session_state.get('reddit_monitor')
 
136
  sectoral_news_monitor = st.session_state.get('sectoral_news_monitor')
137
  market_events_monitor = st.session_state.get('market_events_monitor')
138
  economic_calendar_service = st.session_state.get('economic_calendar_service')
139
+ earnings_calendar_service = st.session_state.get('earnings_calendar_service')
140
 
141
  # Initialize unified cache manager
142
  if 'news_cache_manager' not in st.session_state:
 
297
  sectoral_news_df = pd.DataFrame()
298
  market_events_df = pd.DataFrame()
299
  economic_calendar_df = pd.DataFrame()
300
+ earnings_data = [] # List[Dict] from EarningsCalendarService
301
 
302
  def fetch_twitter_news():
303
  """Fetch Twitter/X news via cache manager"""
 
460
  return pd.DataFrame(), f"Economic calendar unavailable: {e}"
461
  return pd.DataFrame(), None
462
 
463
+ def fetch_earnings_calendar():
464
+ """Fetch upcoming earnings reports for top companies"""
465
+ try:
466
+ if earnings_calendar_service:
467
+ entries = earnings_calendar_service.get_upcoming_earnings(days_ahead=30)
468
+ return entries, None
469
+ except Exception as e:
470
+ return [], f"Earnings calendar unavailable: {e}"
471
+ return [], None
472
+
473
  # Progressive loading: Display results as they arrive
474
  # Create a status placeholder to show progress
475
  status_placeholder = st.empty()
 
484
  (fetch_sectoral_news, 'sectoral_news'),
485
  (fetch_market_events, 'market_events'),
486
  (fetch_economic_calendar, 'economic_calendar'),
487
+ (fetch_earnings_calendar, 'earnings_calendar'),
488
  ]
489
+ _TOTAL_SOURCES = len(_fetch_tasks)
490
 
491
+ def _apply_result(source_name, result, error):
492
  global twitter_df, reddit_df, rss_all_df, rss_main_df, ai_tech_df
493
  global predictions_df, sectoral_news_df, market_events_df, economic_calendar_df
494
+ global earnings_data
495
  if source_name == 'twitter':
496
+ twitter_df = result
497
  elif source_name == 'reddit':
498
+ reddit_df = result
499
  elif source_name == 'rss':
500
+ rss_all_df = result
501
  if not rss_all_df.empty and 'from_web' in rss_all_df.columns:
502
  rss_main_df = rss_all_df[rss_all_df['from_web'] == True].copy()
503
  elif source_name == 'ai_tech':
504
+ ai_tech_df = result
505
  elif source_name == 'predictions':
506
+ predictions_df = result
507
  elif source_name == 'sectoral_news':
508
+ sectoral_news_df = result
509
  elif source_name == 'market_events':
510
+ market_events_df = result
511
  elif source_name == 'economic_calendar':
512
+ economic_calendar_df = result
513
+ elif source_name == 'earnings_calendar':
514
+ earnings_data = result
515
  if error:
516
  fetch_errors.append(error)
517
 
518
  fetch_errors = []
519
  completed_sources = []
520
 
521
+ with st.spinner(f"Loading news from {_TOTAL_SOURCES} sources..."):
522
  try:
523
  with ThreadPoolExecutor(max_workers=4) as executor:
524
  futures_map = {executor.submit(fn): name for fn, name in _fetch_tasks}
 
526
  for future in as_completed(futures_map, timeout=90):
527
  source_name = futures_map[future]
528
  try:
529
+ result, error = future.result()
530
  completed_sources.append(source_name)
531
+ status_placeholder.info(f"🔍 Loaded {len(completed_sources)}/{_TOTAL_SOURCES} sources ({', '.join(completed_sources)})")
532
+ _apply_result(source_name, result, error)
533
  except Exception as e:
534
  fetch_errors.append(f"Error fetching {source_name} news: {e}")
535
  completed_sources.append(f"{source_name} (error)")
536
  status_placeholder.warning(f"⚠️ {source_name} failed, continuing with other sources...")
537
  except TimeoutError:
538
  fetch_errors.append("⏱️ Some sources timed out after 90 seconds - displaying available results")
539
+ status_placeholder.warning(f"⚠️ {len(completed_sources)}/{_TOTAL_SOURCES} sources loaded (some timed out)")
540
  all_sources = set(futures_map.values())
541
  for source in all_sources - set(completed_sources):
542
  fetch_errors.append(f"{source} timed out - skipped")
 
546
  fetch_errors.append("⚠️ Thread limit reached, falling back to sequential fetch")
547
  for fn, name in _fetch_tasks:
548
  try:
549
+ result, error = fn()
550
  completed_sources.append(name)
551
+ status_placeholder.info(f"🔍 Loaded {len(completed_sources)}/{_TOTAL_SOURCES} sources (sequential mode)")
552
+ _apply_result(name, result, error)
553
  except Exception as e:
554
  fetch_errors.append(f"Error fetching {name} news: {e}")
555
  completed_sources.append(f"{name} (error)")
556
 
557
  # Clear the status message after all sources complete
558
+ status_placeholder.success(f"✅ Loaded {len(completed_sources)}/{_TOTAL_SOURCES} sources successfully")
559
 
560
  # Debug logging (console only, not displayed on page)
561
  import logging
562
  logger = logging.getLogger(__name__)
563
+ logger.info(f"News Fetch Results: Twitter={len(twitter_df)}, Reddit={len(reddit_df)}, RSS={len(rss_all_df)}, AI/Tech={len(ai_tech_df)}, Predictions={len(predictions_df)}, Sectoral={len(sectoral_news_df)}, Events={len(market_events_df)}, Calendar={len(economic_calendar_df)}, Earnings={len(earnings_data)}")
564
  logger.info(f"Availability: Predictions={PREDICTIONS_AVAILABLE}, Sectoral={SECTORAL_AVAILABLE}, Events={EVENTS_AVAILABLE}, Calendar={CALENDAR_AVAILABLE}")
565
  if fetch_errors:
566
  for err in fetch_errors:
 
800
  st.markdown("---")
801
  st.markdown("## 📊 Market Intelligence - Predictions, Sectors & Events")
802
 
803
+ col5, col6, col7, col8 = st.columns(4)
804
 
805
  with col5:
806
  # Prediction Markets Column
 
880
  </style>
881
  """, unsafe_allow_html=True)
882
 
883
+ with col8:
884
+ # Earnings Calendar Column
885
+ display_earnings_calendar(earnings_data, height="600px")
886
+
887
  # Display fetch errors in expander (less intrusive)
888
  if 'fetch_errors' in locals() and fetch_errors:
889
  with st.expander("⚠️ Source Fetch Warnings", expanded=False):
app/services/earnings_calendar.py ADDED
@@ -0,0 +1,214 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ """
2
+ Earnings Calendar Service
3
+ Primary: Alpha Vantage EARNINGS_CALENDAR (free, CSV, uses ALPHA_VANTAGE_KEY or demo key)
4
+ Fallback: StockAnalysis.com JSON scraping
5
+ """
6
+
7
+ import csv
8
+ import io
9
+ import json
10
+ import logging
11
+ import re
12
+ from datetime import datetime, timedelta
13
+ from typing import List, Dict, Optional
14
+
15
+ import requests
16
+ from bs4 import BeautifulSoup
17
+
18
+ logger = logging.getLogger(__name__)
19
+
20
+ # Top companies to highlight (filter from the full calendar)
21
+ TOP_TICKERS = {
22
+ "AAPL", "MSFT", "NVDA", "AMZN", "GOOGL", "GOOG", "META", "TSLA",
23
+ "AVGO", "JPM", "LLY", "V", "MA", "UNH", "XOM", "COST", "HD",
24
+ "WMT", "PG", "NFLX", "BAC", "ORCL", "CRM", "AMD", "INTC", "QCOM",
25
+ "GS", "MS", "CVX", "ABBV", "MRK", "BRK-B", "BRKB", "KO", "PEP",
26
+ "DIS", "PYPL", "ADBE", "CSCO", "TXN", "HON", "RTX", "CAT", "IBM",
27
+ }
28
+
29
+
30
+ class EarningsCalendarService:
31
+ """Fetches upcoming earnings report dates without mock data."""
32
+
33
+ ALPHA_VANTAGE_URL = (
34
+ "https://www.alphavantage.co/query"
35
+ "?function=EARNINGS_CALENDAR&horizon=3month&apikey={api_key}"
36
+ )
37
+ STOCKANALYSIS_URL = "https://stockanalysis.com/stocks/earnings-calendar/"
38
+
39
+ def __init__(self, api_key: Optional[str] = None):
40
+ self.api_key = api_key or "demo"
41
+ self.session = requests.Session()
42
+ self.session.headers.update({
43
+ "User-Agent": (
44
+ "Mozilla/5.0 (Macintosh; Intel Mac OS X 10_15_7) "
45
+ "AppleWebKit/537.36 (KHTML, like Gecko) "
46
+ "Chrome/120.0.0.0 Safari/537.36"
47
+ ),
48
+ "Accept": "text/html,application/xhtml+xml,application/xml;q=0.9,*/*;q=0.8",
49
+ "Accept-Language": "en-US,en;q=0.9",
50
+ })
51
+
52
+ def get_upcoming_earnings(self, days_ahead: int = 30) -> List[Dict]:
53
+ """
54
+ Returns upcoming earnings entries sorted by date.
55
+ Each entry: {ticker, company, date, days_until, report_time, eps_estimate}
56
+ """
57
+ entries = self._fetch_alpha_vantage(days_ahead)
58
+ if not entries:
59
+ logger.warning("Alpha Vantage returned no data, trying StockAnalysis fallback")
60
+ entries = self._fetch_stockanalysis(days_ahead)
61
+ return entries
62
+
63
+ # ------------------------------------------------------------------
64
+ # Primary: Alpha Vantage
65
+ # ------------------------------------------------------------------
66
+
67
+ def _fetch_alpha_vantage(self, days_ahead: int) -> List[Dict]:
68
+ try:
69
+ url = self.ALPHA_VANTAGE_URL.format(api_key=self.api_key)
70
+ resp = self.session.get(url, timeout=15)
71
+ resp.raise_for_status()
72
+
73
+ # Alpha Vantage returns CSV for this endpoint
74
+ content = resp.text
75
+ if not content or "symbol" not in content[:200].lower():
76
+ logger.warning("Alpha Vantage response doesn't look like CSV")
77
+ return []
78
+
79
+ reader = csv.DictReader(io.StringIO(content))
80
+ now = datetime.now()
81
+ cutoff = now + timedelta(days=days_ahead)
82
+ entries = []
83
+
84
+ for row in reader:
85
+ try:
86
+ ticker = (row.get("symbol") or "").strip().upper()
87
+ if not ticker:
88
+ continue
89
+
90
+ # Filter to top companies only
91
+ if ticker not in TOP_TICKERS:
92
+ continue
93
+
94
+ date_str = (row.get("reportDate") or "").strip()
95
+ if not date_str:
96
+ continue
97
+ date = datetime.strptime(date_str, "%Y-%m-%d")
98
+
99
+ if date < now - timedelta(days=1) or date > cutoff:
100
+ continue
101
+
102
+ days_until = (date.date() - now.date()).days
103
+
104
+ eps_str = (row.get("estimate") or "").strip()
105
+ eps_estimate = float(eps_str) if eps_str and eps_str != "-" else None
106
+
107
+ fiscal_end = (row.get("fiscalDateEnding") or "").strip()
108
+ currency = (row.get("currency") or "USD").strip()
109
+
110
+ entries.append({
111
+ "ticker": ticker,
112
+ "company": row.get("name", ticker).strip(),
113
+ "date": date,
114
+ "days_until": days_until,
115
+ "report_time": None, # AV doesn't provide BMO/AMC
116
+ "eps_estimate": eps_estimate,
117
+ "fiscal_end": fiscal_end,
118
+ "currency": currency,
119
+ "source": "Alpha Vantage",
120
+ })
121
+
122
+ except (ValueError, KeyError) as e:
123
+ logger.debug(f"Skipping AV row: {e}")
124
+ continue
125
+
126
+ entries.sort(key=lambda x: x["date"])
127
+ logger.info(f"Alpha Vantage: {len(entries)} top-company earnings fetched")
128
+ return entries
129
+
130
+ except Exception as e:
131
+ logger.error(f"Alpha Vantage earnings fetch failed: {e}")
132
+ return []
133
+
134
+ # ------------------------------------------------------------------
135
+ # Fallback: StockAnalysis.com
136
+ # ------------------------------------------------------------------
137
+
138
+ def _fetch_stockanalysis(self, days_ahead: int) -> List[Dict]:
139
+ try:
140
+ resp = self.session.get(self.STOCKANALYSIS_URL, timeout=15)
141
+ resp.raise_for_status()
142
+
143
+ soup = BeautifulSoup(resp.text, "html.parser")
144
+
145
+ # StockAnalysis embeds earnings data as JSON in a <script> tag
146
+ script_tag = soup.find("script", string=re.compile(r'"earningsCalendar"'))
147
+ if not script_tag:
148
+ for tag in soup.find_all("script", type="application/json"):
149
+ text = tag.get_text() or ""
150
+ if "earningsCalendar" in text:
151
+ script_tag = tag
152
+ break
153
+
154
+ if not script_tag:
155
+ logger.warning("StockAnalysis: could not locate earnings JSON")
156
+ return []
157
+
158
+ # Extract JSON blob
159
+ raw = script_tag.get_text() or ""
160
+ match = re.search(r'"earningsCalendar"\s*:\s*(\[.*?\])', raw, re.DOTALL)
161
+ if not match:
162
+ logger.warning("StockAnalysis: earnings JSON pattern not found")
163
+ return []
164
+
165
+ data = json.loads(match.group(1))
166
+
167
+ now = datetime.now()
168
+ cutoff = now + timedelta(days=days_ahead)
169
+ entries = []
170
+
171
+ for week in data:
172
+ date_str = week.get("date") or week.get("week", "")
173
+ try:
174
+ date = datetime.strptime(date_str[:10], "%Y-%m-%d")
175
+ except ValueError:
176
+ continue
177
+
178
+ if date < now - timedelta(days=1) or date > cutoff:
179
+ continue
180
+
181
+ days_until = (date.date() - now.date()).days
182
+
183
+ for item in week.get("stocks", []):
184
+ ticker = (item.get("s") or "").upper()
185
+ if ticker not in TOP_TICKERS:
186
+ continue
187
+
188
+ report_time_raw = item.get("t", "")
189
+ report_time = (
190
+ "BMO" if report_time_raw == "bmo"
191
+ else "AMC" if report_time_raw == "amc"
192
+ else None
193
+ )
194
+ eps_est = item.get("e")
195
+
196
+ entries.append({
197
+ "ticker": ticker,
198
+ "company": item.get("n", ticker),
199
+ "date": date,
200
+ "days_until": days_until,
201
+ "report_time": report_time,
202
+ "eps_estimate": float(eps_est) if eps_est is not None else None,
203
+ "fiscal_end": None,
204
+ "currency": "USD",
205
+ "source": "StockAnalysis",
206
+ })
207
+
208
+ entries.sort(key=lambda x: x["date"])
209
+ logger.info(f"StockAnalysis: {len(entries)} earnings fetched")
210
+ return entries
211
+
212
+ except Exception as e:
213
+ logger.error(f"StockAnalysis earnings fetch failed: {e}")
214
+ return []
requirements.txt CHANGED
@@ -11,3 +11,4 @@ lxml>=5.0.0
11
  ntscraper
12
  playwright>=1.40.0
13
  huggingface_hub>=0.22.2
 
 
11
  ntscraper
12
  playwright>=1.40.0
13
  huggingface_hub>=0.22.2
14
+ yfinance>=0.2.0