Spaces:
Sleeping
Sleeping
from langchain_text_splitters import RecursiveCharacterTextSplitter | |
from langchain_core.vectorstores import InMemoryVectorStore | |
from langchain_community.docstore.document import Document | |
from langchain_huggingface import HuggingFaceEmbeddings | |
from src.utils.exceptions import CustomException | |
from src.utils.functions import getConfig | |
from src.utils.logging import logger | |
class VectorStore: | |
def __init__(self): | |
"""Initialize the VectorStore with configuration, embeddings, and text splitter.""" | |
self.config = getConfig(path="config.ini") | |
self.vectorEmbeddings = HuggingFaceEmbeddings( | |
model_name=self.config.get("EMBEDDINGS", "embeddingModel"), | |
model_kwargs={"device": self.config.get("EMBEDDINGS", "device")}, | |
encode_kwargs={"normalize_embeddings": self.config.getboolean("EMBEDDINGS", "normalize_embeddings")} | |
) | |
self.splitter = RecursiveCharacterTextSplitter( | |
chunk_size=self.config.getint("VECTORSTORE", "chunkSize"), | |
chunk_overlap=self.config.getint("VECTORSTORE", "chunkOverlap"), | |
add_start_index=self.config.getboolean("VECTORSTORE", "addStartIndex") | |
) | |
def setupStore(self, text: str): | |
""" | |
Set up the vector store with the provided text. | |
Args: | |
text (str): The text to store and process. | |
Returns: | |
Retriever: A retriever for querying the vector store. | |
""" | |
try: | |
store = InMemoryVectorStore(self.vectorEmbeddings) | |
textDocument = Document(page_content=text) | |
documents = self.splitter.split_documents([textDocument]) | |
store.add_documents(documents=documents) | |
return store.as_retriever( | |
search_type=self.config.get("RETRIEVER", "searchType"), | |
search_kwargs={ | |
"k": self.config.getint("RETRIEVER", "k"), | |
"fetch_k": self.config.getint("RETRIEVER", "fetchK") | |
} | |
) | |
except Exception as e: | |
logger.error(CustomException(e)) | |
print(CustomException(e)) |