Spaces:
Sleeping
Sleeping
from __future__ import annotations | |
import hashlib | |
import json | |
import sqlite3 | |
from pathlib import Path | |
from typing import Any | |
from datetime import datetime | |
class CacheManager: | |
def __init__(self, cache_dir: str | Path = "cache"): | |
self.cache_dir = Path(cache_dir) | |
self.cache_dir.mkdir(parents=True, exist_ok=True) | |
# Create SQLite database for structured results | |
self.db_path = self.cache_dir / "extraction_cache.db" | |
self._init_db() | |
def _init_db(self): | |
"""Initialize the SQLite database with necessary tables.""" | |
with sqlite3.connect(self.db_path) as conn: | |
conn.execute(""" | |
CREATE TABLE IF NOT EXISTS extractions ( | |
input_hash TEXT, | |
form_type TEXT, | |
result TEXT, | |
model_name TEXT, | |
timestamp DATETIME, | |
PRIMARY KEY (input_hash, form_type) | |
) | |
""") | |
conn.execute(""" | |
CREATE TABLE IF NOT EXISTS transcripts ( | |
video_id TEXT PRIMARY KEY, | |
transcript TEXT, | |
timestamp DATETIME | |
) | |
""") | |
def _hash_content(self, content: str) -> str: | |
"""Generate a stable hash for input content.""" | |
return hashlib.sha256(content.encode('utf-8')).hexdigest() | |
def get_transcript(self, video_id: str) -> str | None: | |
"""Retrieve a cached transcript if it exists.""" | |
with sqlite3.connect(self.db_path) as conn: | |
cursor = conn.execute( | |
"SELECT transcript FROM transcripts WHERE video_id = ?", | |
(video_id,) | |
) | |
result = cursor.fetchone() | |
return result[0] if result else None | |
def store_transcript(self, video_id: str, transcript: str): | |
"""Store a transcript in the cache.""" | |
with sqlite3.connect(self.db_path) as conn: | |
conn.execute( | |
""" | |
INSERT OR REPLACE INTO transcripts (video_id, transcript, timestamp) | |
VALUES (?, ?, ?) | |
""", | |
(video_id, transcript, datetime.now()) | |
) | |
def get_extraction( | |
self, | |
input_content: str, | |
form_type: str, | |
model_name: str | |
) -> dict | None: | |
"""Retrieve cached extraction results if they exist.""" | |
input_hash = self._hash_content(input_content) | |
with sqlite3.connect(self.db_path) as conn: | |
cursor = conn.execute( | |
""" | |
SELECT result FROM extractions | |
WHERE input_hash = ? AND form_type = ? AND model_name = ? | |
""", | |
(input_hash, form_type, model_name) | |
) | |
result = cursor.fetchone() | |
if result: | |
return json.loads(result[0]) | |
return None | |
def store_extraction( | |
self, | |
input_content: str, | |
form_type: str, | |
result: dict, | |
model_name: str | |
): | |
"""Store extraction results in the cache.""" | |
input_hash = self._hash_content(input_content) | |
with sqlite3.connect(self.db_path) as conn: | |
conn.execute( | |
""" | |
INSERT OR REPLACE INTO extractions | |
(input_hash, form_type, result, model_name, timestamp) | |
VALUES (?, ?, ?, ?, ?) | |
""", | |
( | |
input_hash, | |
form_type, | |
json.dumps(result), | |
model_name, | |
datetime.now() | |
) | |
) | |
def clear_cache(self, older_than_days: int | None = None): | |
"""Clear the cache, optionally only entries older than specified days.""" | |
with sqlite3.connect(self.db_path) as conn: | |
if older_than_days is not None: | |
conn.execute( | |
""" | |
DELETE FROM extractions | |
WHERE timestamp < datetime('now', ?) | |
""", | |
(f'-{older_than_days} days',) | |
) | |
conn.execute( | |
""" | |
DELETE FROM transcripts | |
WHERE timestamp < datetime('now', ?) | |
""", | |
(f'-{older_than_days} days',) | |
) | |
else: | |
conn.execute("DELETE FROM extractions") | |
conn.execute("DELETE FROM transcripts") | |
def cleanup_gradio_cache(self): | |
"""Clean up Gradio's example cache directory.""" | |
gradio_cache = Path(".gradio") | |
if gradio_cache.exists(): | |
import shutil | |
shutil.rmtree(gradio_cache) | |
print("Cleaned up Gradio cache") |