radextract / tools /rebuild_cache.py
goelak's picture
Initial commit for RadExtract
fab8051
#!/usr/bin/env python3
"""Utility script to rebuild the demonstration cache with current structurer output.
This development tool rebuilds the cache using the current
RadiologyReportStructurer implementation, ensuring that cached results
include the latest features such as raw_prompt data. The script processes
all sample reports from the static JSON file and caches their structured
results for improved demo performance.
The script requires the KEY environment variable to be set with a valid
Gemini API key and optionally accepts MODEL_ID to specify which model
to use for processing.
Usage:
export KEY=your_gemini_api_key_here
export MODEL_ID=gemini-2.5-pro # optional, defaults to gemini-2.5-pro
python tools/rebuild_cache.py
"""
import json
import os
import sys
from pathlib import Path
# Add parent directory to path to import modules
sys.path.append(str(Path(__file__).parent.parent))
from cache_manager import CacheManager
from structure_report import RadiologyReportStructurer
API_KEY = os.environ.get("KEY")
if not API_KEY:
sys.exit("KEY environment variable not set. Export KEY before running.")
SAMPLES_PATH = Path("static/sample_reports.json")
if not SAMPLES_PATH.exists():
sys.exit("static/sample_reports.json not found")
samples = json.loads(SAMPLES_PATH.read_text())["samples"]
MODEL_ID = os.environ.get("MODEL_ID", "gemini-2.5-pro")
structurer = RadiologyReportStructurer(api_key=API_KEY, model_id=MODEL_ID)
import time
cache = CacheManager(cache_dir="cache")
print("Clearing existing cache...")
cache.clear_cache()
print(f"Processing {len(samples)} samples with {MODEL_ID}...")
for s in samples:
sid = s["id"]
text = s["text"]
print(f" Processing {sid}...")
retries = 0
while retries < 5:
try:
result = structurer.predict(text)
cache.cache_result(text, result, sample_id=sid)
break
except Exception as e:
retries += 1
print(f" Warning: {e}. Retry {retries}/5...")
time.sleep(5)
else:
print(f" Error: Failed to process {sid} after 5 retries, skipping.")
time.sleep(3) # base throttle
print("Cache rebuild completed successfully.")