| import os |
| import hashlib |
| import json |
| import shutil |
|
|
| CACHE_DIR = os.path.join(os.path.dirname(__file__), "cache") |
| os.makedirs(CACHE_DIR, exist_ok=True) |
|
|
|
|
| def _hash_file(path: str) -> str: |
| """Compute SHA256 fingerprint for a file.""" |
| h = hashlib.sha256() |
| with open(path, "rb") as f: |
| while True: |
| chunk = f.read(8192) |
| if not chunk: |
| break |
| h.update(chunk) |
| return h.hexdigest() |
|
|
|
|
| def get_cache_path(pdf_path: str) -> str: |
| fid = _hash_file(pdf_path) |
| return os.path.join(CACHE_DIR, f"{fid}.json") |
|
|
|
|
| def save_chunks_to_cache(pdf_path: str, chunks) -> str: |
| path = get_cache_path(pdf_path) |
| with open(path, "w", encoding="utf-8") as f: |
| json.dump(chunks, f, indent=2, ensure_ascii=False) |
| return path |
|
|
|
|
| def load_chunks_from_cache(pdf_path: str): |
| path = get_cache_path(pdf_path) |
| if os.path.exists(path): |
| with open(path, "r", encoding="utf-8") as f: |
| return json.load(f) |
| return None |
|
|
|
|
| def clear_cache() -> bool: |
| """Delete all cached JSON files and recreate cache directory.""" |
| shutil.rmtree(CACHE_DIR, ignore_errors=True) |
| os.makedirs(CACHE_DIR, exist_ok=True) |
| return True |
|
|