Spaces:

sabazo
/

insurance_advisor_wb

Sleeping

App Files Files Community

isayahc commited on Jul 18, 2024

Commit

96f0f38

unverified ·

1 Parent(s): a2ede9f

changed constant to VECTOR_DATABASE_LOCATION

Browse files

Files changed (3) hide show

config.py +1 -1
rag_app/get_db_retriever.py +42 -9
rag_app/structured_tools/structured_tools.py +4 -4

config.py CHANGED Viewed

@@ -6,7 +6,7 @@ from langchain_huggingface import HuggingFaceEndpoint
 load_dotenv()
 SQLITE_FILE_NAME = os.getenv('SOURCES_CACHE')
-PERSIST_DIRECTORY = os.getenv('VECTOR_DATABASE_LOCATION')
 EMBEDDING_MODEL = os.getenv("EMBEDDING_MODEL")
 SEVEN_B_LLM_MODEL = os.getenv("SEVEN_B_LLM_MODEL")
 BERT_MODEL = os.getenv("BERT_MODEL")

 load_dotenv()
 SQLITE_FILE_NAME = os.getenv('SOURCES_CACHE')
+VECTOR_DATABASE_LOCATION = os.getenv('VECTOR_DATABASE_LOCATION')
 EMBEDDING_MODEL = os.getenv("EMBEDDING_MODEL")
 SEVEN_B_LLM_MODEL = os.getenv("SEVEN_B_LLM_MODEL")
 BERT_MODEL = os.getenv("BERT_MODEL")

rag_app/get_db_retriever.py CHANGED Viewed

@@ -10,19 +10,52 @@ from langchain.chains import RetrievalQA
 # prompt template
 from langchain.prompts import PromptTemplate
 from langchain.memory import ConversationBufferMemory
-from config import EMBEDDING_MODEL
-def get_db_retriever(vector_db:str=None):
-    embeddings = HuggingFaceHubEmbeddings(repo_id=EMBEDDING_MODEL)
-    if not vector_db:
-        FAISS_INDEX_PATH='./vectorstore/py-faiss-multi-mpnet-500'
-    else:
-        FAISS_INDEX_PATH=vector_db
-    db = FAISS.load_local(FAISS_INDEX_PATH, embeddings)
-    retriever = db.as_retriever()
     return retriever

 # prompt template
 from langchain.prompts import PromptTemplate
 from langchain.memory import ConversationBufferMemory
+from config import EMBEDDING_MODEL, VECTOR_DATABASE_LOCATION
+def get_db_retriever():
+    """
+    Creates and returns a retriever object based on a FAISS vector database.
+    This function initializes an embedding model and loads a pre-existing FAISS
+    vector database from a local location. It then creates a retriever from this
+    database.
+    Returns:
+    --------
+    retriever : langchain.vectorstores.FAISS.VectorStoreRetriever
+        A retriever object that can be used to fetch relevant documents from the
+        vector database.
+    Global Variables Used:
+    ----------------------
+    EMBEDDING_MODEL : str
+        The identifier for the Hugging Face Hub embedding model to be used.
+    VECTOR_DATABASE_LOCATION : str
+        The local path where the FAISS vector database is stored.
+    Dependencies:
+    -------------
+    - langchain_huggingface.HuggingFaceHubEmbeddings
+    - langchain_community.vectorstores.FAISS
+    Note:
+    -----
+    This function assumes that a FAISS vector database has already been created
+    and saved at the location specified by VECTOR_DATABASE_LOCATION.
+    """
+    # Initialize the embedding model
+    embeddings = HuggingFaceHubEmbeddings(repo_id=EMBEDDING_MODEL)
+    # Load the FAISS vector database from the local storage
+    db = FAISS.load_local(
+        VECTOR_DATABASE_LOCATION,
+        embeddings,
+    )
+    # Create and return a retriever from the loaded database
+    retriever = db.as_retriever()
     return retriever

rag_app/structured_tools/structured_tools.py CHANGED Viewed

@@ -13,9 +13,9 @@ from rag_app.utils.utils import (
 )
 import chromadb
 import os
-from config import db, PERSIST_DIRECTORY, EMBEDDING_MODEL
-if not os.path.exists(PERSIST_DIRECTORY):
     get_chroma_vs()
 @tool
@@ -24,7 +24,7 @@ def memory_search(query:str) -> str:
         This is your primary source to start your search with checking what you already have learned from the past, before going online."""
     # Since we have more than one collections we should change the name of this tool
     client = chromadb.PersistentClient(
-     path=PERSIST_DIRECTORY,
     )
     collection_name = os.getenv('CONVERSATION_COLLECTION_NAME')
@@ -71,7 +71,7 @@ def knowledgeBase_search(query:str) -> str:
     # #collection_name=collection_name,
     # embedding_function=embedding_function,
     # )
-    vector_db = Chroma(persist_directory=PERSIST_DIRECTORY, embedding_function=embedding_function)
     retriever = vector_db.as_retriever(search_type="mmr", search_kwargs={'k':5, 'fetch_k':10})
     # This is deprecated, changed to invoke
     # LangChainDeprecationWarning: The method `BaseRetriever.get_relevant_documents` was deprecated in langchain-core 0.1.46 and will be removed in 0.3.0. Use invoke instead.

 )
 import chromadb
 import os
+from config import db, VECTOR_DATABASE_LOCATION, EMBEDDING_MODEL
+if not os.path.exists(VECTOR_DATABASE_LOCATION):
     get_chroma_vs()
 @tool
         This is your primary source to start your search with checking what you already have learned from the past, before going online."""
     # Since we have more than one collections we should change the name of this tool
     client = chromadb.PersistentClient(
+     path=VECTOR_DATABASE_LOCATION,
     )
     collection_name = os.getenv('CONVERSATION_COLLECTION_NAME')
     # #collection_name=collection_name,
     # embedding_function=embedding_function,
     # )
+    vector_db = Chroma(persist_directory=VECTOR_DATABASE_LOCATION, embedding_function=embedding_function)
     retriever = vector_db.as_retriever(search_type="mmr", search_kwargs={'k':5, 'fetch_k':10})
     # This is deprecated, changed to invoke
     # LangChainDeprecationWarning: The method `BaseRetriever.get_relevant_documents` was deprecated in langchain-core 0.1.46 and will be removed in 0.3.0. Use invoke instead.