Spaces:
Runtime error
Runtime error
""" | |
Python Backend API to chat with private data | |
08/14/2023 | |
D.M. Theekshana Samaradiwakara | |
""" | |
import os | |
from dotenv import load_dotenv | |
import glob | |
import torch | |
import pickle | |
import io | |
from langchain.vectorstores import Chroma | |
from langchain.vectorstores import FAISS | |
from langchain.embeddings import HuggingFaceEmbeddings | |
from chromadb.config import Settings | |
load_dotenv() | |
import streamlit as st | |
embeddings_model_name = os.environ.get("EMBEDDINGS_MODEL_NAME") | |
embeddings = HuggingFaceEmbeddings(model_name=embeddings_model_name) | |
def does_chroma_vectorstore_exist(persist_directory: str) -> bool: | |
# Checks if vectorstore exists | |
if os.path.exists(os.path.join(persist_directory, 'index')): | |
if os.path.exists(os.path.join(persist_directory, 'chroma-collections.parquet')) and os.path.exists(os.path.join(persist_directory, 'chroma-embeddings.parquet')): | |
list_index_files = glob.glob(os.path.join(persist_directory, 'index/*.bin')) | |
list_index_files += glob.glob(os.path.join(persist_directory, 'index/*.pkl')) | |
# At least 3 documents are needed in a working vectorstore | |
if len(list_index_files) > 3: | |
return True | |
return False | |
def load_store(directory: str) -> Chroma: | |
index_path = "data/{0}".format(directory) | |
# index_exists = os.path.exists(index_path) | |
index_exists = does_chroma_vectorstore_exist(index_path) | |
if index_exists: | |
try: | |
CHROMA_SETTINGS = Settings( | |
chroma_db_impl='duckdb+parquet', | |
persist_directory=index_path, | |
anonymized_telemetry=False | |
) | |
# return Chroma.load(index_path) | |
vectorstore= Chroma( | |
persist_directory=index_path, | |
embedding_function=embeddings, | |
client_settings=CHROMA_SETTINGS | |
) | |
# with open("vectorstore.pkl", "wb") as f: | |
# pickle.dump(vectorstore, f) | |
return vectorstore | |
except Exception as e: | |
raise Exception(f"Error loading vector store: {e} ") | |
else: | |
# raise exception if model_type is not supported | |
raise Exception(f"A vector store in directory {directory} is not created. Please choose a valid one") | |
class CPU_Unpickler(pickle.Unpickler): | |
def find_class(self, module, name): | |
if module == 'torch.storage' and name == '_load_from_bytes': | |
return lambda b: torch.load(io.BytesIO(b), map_location='cpu') | |
else: | |
return super().find_class(module, name) | |
def create_db(document_splits,persist_directory): | |
return Chroma.from_documents( | |
documents=document_splits, | |
embedding=embeddings, | |
persist_directory=persist_directory | |
) | |
def save_files(persist_directory, document_splits): | |
print(f"Saving document splits...") | |
if does_chroma_vectorstore_exist(persist_directory): | |
print(f"Updating esisting vector store. May take some minutes...") | |
#update function | |
db = Chroma( | |
persist_directory=index_path, | |
embedding_function=embeddings, | |
) | |
db.aadd_documents(document_splits) | |
else: | |
print(f"Creating new vector store. May take some minutes...") | |
index_path = "data/{0}".format(persist_directory) | |
db = create_db(document_splits,index_path) | |
db.persist() | |