Dev Goyal
refactor: improve 8-K filing retrieval logic with Item 2.02 validation and update ingestion script error handling
25d293a | #!/usr/bin/env python3 | |
| """ | |
| CLI script to ingest earnings-call transcripts into ChromaDB. | |
| Usage: | |
| python scripts/ingest_earnings_calls.py --tickers AAPL MSFT --quarters Q4-2024 Q1-2025 | |
| python scripts/ingest_earnings_calls.py --tickers TSLA --quarters Q1-2025 | |
| Data sources (tried in order): | |
| 1. Financial Modeling Prep (FMP) (free tier, 250 req/day) | |
| 2. SEC EDGAR 8-K filings (free, always available) | |
| """ | |
| import argparse | |
| import os | |
| import sys | |
| from dotenv import load_dotenv | |
| load_dotenv() | |
| # Ensure project root is on sys.path so `core.*` imports work | |
| sys.path.insert(0, os.path.abspath(os.path.join(os.path.dirname(__file__), ".."))) | |
| from core.config import Settings | |
| from core.earnings_tools import ingest_earnings_call, parse_quarter | |
| def main(): | |
| parser = argparse.ArgumentParser( | |
| description="Ingest earnings-call transcripts into ChromaDB." | |
| ) | |
| parser.add_argument( | |
| "--tickers", | |
| nargs="+", | |
| required=True, | |
| help="Stock tickers to ingest (e.g. --tickers AAPL MSFT)", | |
| ) | |
| parser.add_argument( | |
| "--quarters", | |
| nargs="+", | |
| required=True, | |
| help="Quarters to ingest, format Q<N>-<YYYY> (e.g. --quarters Q4-2024 Q1-2025)", | |
| ) | |
| args = parser.parse_args() | |
| settings = Settings() | |
| api_key = settings.fmp_api_key or os.getenv("FMP_API_KEY", "") | |
| chroma_path = settings.earnings_chroma_path | |
| os.makedirs(chroma_path, exist_ok=True) | |
| # Parse quarters upfront to fail fast on bad formats | |
| parsed_quarters: list[tuple[int, int]] = [] | |
| for q_str in args.quarters: | |
| try: | |
| q, y = parse_quarter(q_str) | |
| parsed_quarters.append((q, y)) | |
| except ValueError as e: | |
| print(f"[Error] {e}") | |
| sys.exit(1) | |
| results: list[dict] = [] | |
| for ticker in args.tickers: | |
| ticker = ticker.upper() | |
| for quarter, year in parsed_quarters: | |
| print(f"\n{'=' * 50}") | |
| print(f"Ingesting {ticker} Q{quarter}-{year}") | |
| print(f"{'=' * 50}") | |
| try: | |
| status = ingest_earnings_call( | |
| ticker=ticker, | |
| quarter=quarter, | |
| year=year, | |
| api_key=api_key, | |
| chroma_path=chroma_path, | |
| ) | |
| except Exception as e: | |
| print(f"[Error] Failed to ingest {ticker} Q{quarter}-{year}: {e}") | |
| status = "error" | |
| results.append( | |
| {"ticker": ticker, "quarter": f"Q{quarter}-{year}", "status": status} | |
| ) | |
| # Summary | |
| print(f"\n{'=' * 50}") | |
| print("INGEST SUMMARY") | |
| print(f"{'=' * 50}") | |
| for r in results: | |
| icon = { | |
| "success": "β ", | |
| "partial": "π‘", | |
| "failed": "β", | |
| "exists": "βοΈ", | |
| "error": "π₯", | |
| }.get(r["status"], "β") | |
| print(f" {icon} {r['ticker']} {r['quarter']}: {r['status']}") | |
| errors = [r for r in results if r["status"] == "error"] | |
| failed = [r for r in results if r["status"] == "failed"] | |
| if errors: | |
| print(f"\n[CRITICAL] {len(errors)} ingest(s) hit technical errors. Check logs.") | |
| sys.exit(1) | |
| if failed: | |
| print(f"\n[INFO] {len(failed)} transcript(s) could not be found (likely not yet reported).") | |
| print("This is not treated as a build failure.") | |
| print("\nIngestion process completed successfully.") | |
| sys.exit(0) | |
| if __name__ == "__main__": | |
| main() | |