Spaces:
Running
on
CPU Upgrade
Running
on
CPU Upgrade
#!/usr/bin/env python3 | |
"""Utility script to rebuild the demonstration cache with current structurer output. | |
This development tool rebuilds the cache using the current | |
RadiologyReportStructurer implementation, ensuring that cached results | |
include the latest features such as raw_prompt data. The script processes | |
all sample reports from the static JSON file and caches their structured | |
results for improved demo performance. | |
The script requires the KEY environment variable to be set with a valid | |
Gemini API key and optionally accepts MODEL_ID to specify which model | |
to use for processing. | |
Usage: | |
export KEY=your_gemini_api_key_here | |
export MODEL_ID=gemini-2.5-pro # optional, defaults to gemini-2.5-pro | |
python tools/rebuild_cache.py | |
""" | |
import json | |
import os | |
import sys | |
from pathlib import Path | |
# Add parent directory to path to import modules | |
sys.path.append(str(Path(__file__).parent.parent)) | |
from cache_manager import CacheManager | |
from structure_report import RadiologyReportStructurer | |
API_KEY = os.environ.get("KEY") | |
if not API_KEY: | |
sys.exit("KEY environment variable not set. Export KEY before running.") | |
SAMPLES_PATH = Path("static/sample_reports.json") | |
if not SAMPLES_PATH.exists(): | |
sys.exit("static/sample_reports.json not found") | |
samples = json.loads(SAMPLES_PATH.read_text())["samples"] | |
MODEL_ID = os.environ.get("MODEL_ID", "gemini-2.5-pro") | |
structurer = RadiologyReportStructurer(api_key=API_KEY, model_id=MODEL_ID) | |
import time | |
cache = CacheManager(cache_dir="cache") | |
print("Clearing existing cache...") | |
cache.clear_cache() | |
print(f"Processing {len(samples)} samples with {MODEL_ID}...") | |
for s in samples: | |
sid = s["id"] | |
text = s["text"] | |
print(f" Processing {sid}...") | |
retries = 0 | |
while retries < 5: | |
try: | |
result = structurer.predict(text) | |
cache.cache_result(text, result, sample_id=sid) | |
break | |
except Exception as e: | |
retries += 1 | |
print(f" Warning: {e}. Retry {retries}/5...") | |
time.sleep(5) | |
else: | |
print(f" Error: Failed to process {sid} after 5 retries, skipping.") | |
time.sleep(3) # base throttle | |
print("Cache rebuild completed successfully.") | |