Spaces:
Sleeping
Sleeping
File size: 1,510 Bytes
c71ac5b d43cabf c71ac5b |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 |
"""
Vector database handler for storing and retrieving text chunks using embeddings.
TODO:
- Replace in-memory store with a persistent backend (e.g., FAISS, Pinecone).
- Customize embedding provider as needed.
"""
from langchain_community.vectorstores import FAISS
from backend.core.embeddings import EmbeddingProvider
class VectorDatabase:
"""
Handles the creation and querying of a vector database using text embeddings.
"""
def __init__(self):
self.db = None
self.embedding_provider = EmbeddingProvider()
async def abuild_from_list(self, chunks):
"""
Build the vector database from a list of text chunks.
Parameters
----------
chunks : list of str
The list of preprocessed text segments.
"""
self.db = FAISS.from_texts(texts=chunks, embedding=self.embedding_provider.model)
def search_by_text(self, query, k=4):
"""
Search the vector database for the most relevant chunks based on the query.
Parameters
----------
query : str
The user's input question or topic.
k : int, optional
The number of top matches to return (default is 4).
Returns
-------
list of tuple
List of matched chunks with relevance metadata.
"""
if self.db is None:
raise ValueError("Vector database is not initialized.")
return self.db.similarity_search_with_score(query, k=k)
|