Spaces:
Running
Running
| from __future__ import annotations | |
| import os | |
| from pathlib import Path | |
| from threading import Lock | |
| from typing import Any, Dict, Tuple | |
| import chromadb | |
| from sentence_transformers import SentenceTransformer | |
| _VECTORSTORE_LOCK = Lock() | |
| _VECTORSTORE_SINGLETON: Tuple[Any, Any, SentenceTransformer] | None = None | |
| def _resolve_vectorstore_dir() -> Path: | |
| raw = os.getenv("CURRICULUM_VECTORSTORE_DIR", "datasets/vectorstore") | |
| path = Path(raw) | |
| if path.is_absolute(): | |
| return path | |
| cwd_candidate = Path.cwd() / path | |
| if cwd_candidate.exists() or str(Path.cwd()).endswith("MATHPULSE-AI"): | |
| return cwd_candidate | |
| backend_candidate = Path(__file__).resolve().parents[2] / path | |
| return backend_candidate | |
| def get_vectorstore_components( | |
| collection_name: str = "curriculum_chunks", | |
| model_name: str = "BAAI/bge-small-en-v1.5", | |
| ): | |
| global _VECTORSTORE_SINGLETON | |
| if _VECTORSTORE_SINGLETON is None: | |
| with _VECTORSTORE_LOCK: | |
| if _VECTORSTORE_SINGLETON is None: | |
| vectorstore_dir = _resolve_vectorstore_dir() | |
| vectorstore_dir.mkdir(parents=True, exist_ok=True) | |
| client = chromadb.PersistentClient(path=str(vectorstore_dir)) | |
| collection = client.get_or_create_collection(name=collection_name) | |
| embedder = SentenceTransformer(model_name) | |
| _VECTORSTORE_SINGLETON = (client, collection, embedder) | |
| return _VECTORSTORE_SINGLETON | |
| def get_vectorstore_health() -> Dict[str, Any]: | |
| _, collection, _ = get_vectorstore_components() | |
| payload = collection.get(include=["metadatas"]) | |
| metadatas = payload.get("metadatas") or [] | |
| subjects: Dict[str, int] = {} | |
| for md in metadatas: | |
| if not isinstance(md, dict): | |
| continue | |
| subject = str(md.get("subject") or "unknown") | |
| subjects[subject] = subjects.get(subject, 0) + 1 | |
| return { | |
| "chunkCount": len(payload.get("ids") or []), | |
| "subjects": subjects, | |
| "vectorstoreDir": str(_resolve_vectorstore_dir()), | |
| } | |