Spaces:
Runtime error
Runtime error
from sentence_transformers import SentenceTransformer | |
import faiss | |
import numpy as np | |
import hashlib | |
# Load model once | |
embedder = SentenceTransformer('all-MiniLM-L6-v2') | |
class VectorStore: | |
def __init__(self): | |
self.texts = [] | |
self.embeddings = [] | |
self.index = None | |
self.text_hashes = set() | |
def add_texts(self, texts): | |
"""Add list of texts to the store, avoiding duplicates""" | |
new_texts = [] | |
for text in texts: | |
text_hash = hashlib.md5(text.encode()).hexdigest() | |
if text_hash not in self.text_hashes: | |
new_texts.append(text) | |
self.text_hashes.add(text_hash) | |
if not new_texts: | |
return | |
# Encode new texts | |
new_embeds = embedder.encode(new_texts) | |
self.texts.extend(new_texts) | |
self.embeddings.extend(new_embeds) | |
# Update FAISS index | |
if self.index is None: | |
self.index = faiss.IndexFlatL2(new_embeds[0].shape[0]) | |
# Convert to numpy array and add to index | |
embeds_array = np.array(self.embeddings).astype('float32') | |
self.index.reset() | |
self.index.add(embeds_array) | |
def retrieve(self, query, top_k=3): | |
"""Return top-k relevant texts and their indices""" | |
if not self.index or not self.texts: | |
return [], [] | |
# Encode query | |
query_embed = embedder.encode([query]) | |
query_array = np.array(query_embed).astype('float32') | |
# Search | |
distances, indices = self.index.search(query_array, k=min(top_k, len(self.texts))) | |
# Return texts and indices | |
return [self.texts[i] for i in indices[0]], indices[0].tolist() | |
def clear(self): | |
"""Clear the vector store""" | |
self.texts = [] | |
self.embeddings = [] | |
self.index = None | |
self.text_hashes = set() |