BotSpace / index_local_runtime.py
profgabrielramos's picture
Upload folder using huggingface_hub
1a0b19c verified
import json
import os
import threading
import time
from pathlib import Path
from typing import Any, Dict, List
import faiss
import numpy as np
from sentence_transformers import SentenceTransformer
EMBED_MODEL = os.getenv("EMBED_MODEL", "sentence-transformers/all-MiniLM-L6-v2")
WORK_DIR = Path(os.getenv("WORK_DIR", "/data/work"))
ART_DIR = WORK_DIR / "out" / "artifacts"
RELOAD_POLL_SECONDS = int(os.getenv("RELOAD_POLL_SECONDS", "30"))
class LocalIndexRuntime:
def __init__(self) -> None:
self.model = SentenceTransformer(EMBED_MODEL)
self.index: faiss.Index | None = None
self.meta: List[Dict[str, Any]] | None = None
self.last_mtime: float | None = None
self.last_check = 0.0
self._lock = threading.RLock()
def _paths(self) -> tuple[Path, Path]:
return (ART_DIR / "faiss.index", ART_DIR / "meta.json")
def exists(self) -> bool:
idx, meta = self._paths()
return idx.exists() and meta.exists()
def load(self) -> None:
with self._lock:
idx, meta = self._paths()
self.index = faiss.read_index(str(idx))
self.meta = json.loads(meta.read_text(encoding="utf-8"))
self.last_mtime = idx.stat().st_mtime
self.last_check = time.time()
print(f"[INDEX] loaded local index from {idx}")
def ensure_loaded(self) -> None:
with self._lock:
if not self.exists():
raise RuntimeError(f"Indice nao existe em {ART_DIR}. Rode reindex primeiro.")
if self.index is None or self.meta is None:
self.load()
def maybe_reload(self) -> None:
now = time.time()
if now - self.last_check < RELOAD_POLL_SECONDS:
return
self.last_check = now
idx, _ = self._paths()
if not idx.exists():
return
mtime = idx.stat().st_mtime
if self.last_mtime is None or mtime > self.last_mtime:
print("[INDEX] detected updated index; reloading...")
self.load()
def search(self, query: str, k: int = 4) -> List[Dict[str, Any]]:
self.ensure_loaded()
with self._lock:
assert self.index is not None
assert self.meta is not None
qv = self.model.encode([query], normalize_embeddings=True)
qv = np.asarray(qv, dtype="float32")
scores, idxs = self.index.search(qv, k)
out: List[Dict[str, Any]] = []
for score, i in zip(scores[0], idxs[0]):
if i == -1:
continue
item = dict(self.meta[i])
item["score"] = float(score)
out.append(item)
return out