PeacePal

Sleeping

App Files Files Community

SwatGarg

Oritsemisan commited on Mar 27

Commit

c057c78

•

1 Parent(s): 57b7a80

Update retrieverV2.py (#5)

Browse files

- Update retrieverV2.py (76bcde657c2c129ef29a3db2bacc09b542481aae)

Co-authored-by: Meggison <Oritsemisan@users.noreply.huggingface.co>

Files changed (1) hide show

retrieverV2.py +3 -13

retrieverV2.py CHANGED Viewed

@@ -17,17 +17,14 @@ db_path = 'chroma_db'
 def process_pdf_document(file_path_list):
     '''
     Process a PDF document and return the documents and text splitters
     Args:
         file_path (str): The path to the PDF document
         parent_chunk_size (int): The size of the parent chunks
         child_chunk_size (int): The size of the child chunks
     Returns:
         documents (list): The list of documents
         parent_splitter (RecursiveCharacterTextSplitter): The text splitter for the parent documents
         child_splitter (RecursiveCharacterTextSplitter): The text splitter for the child documents
     '''
     # # Load the PDF document
     # loader = PyMuPDFLoader(file_path)
@@ -46,15 +43,12 @@ def process_pdf_document(file_path_list):
 def create_vectorstore(embeddings_model="all-MiniLM-L6-v2"):
     '''
     Create the vectorstore and store for the documents
     Args:
         embeddings_model (HuggingFaceEmbeddings): The embeddings model
         documents (list): The list of documents
     Returns:
         vectorstore (Chroma): The vectorstore
         store (InMemoryStore): The store
     '''
     # Initialize the embedding model
@@ -84,13 +78,11 @@ def create_vectorstore(embeddings_model="all-MiniLM-L6-v2"):
 def rag_retriever(vectorstore, store, documents, parent_splitter, child_splitter):
     '''
     Create the retriever for the RAG model
     Args:
         vectorstore (Chroma): The vectorstore
         store (InMemoryStore): The store
         parent_splitter (RecursiveCharacterTextSplitter): The text splitter for the parent documents
         child_splitter (RecursiveCharacterTextSplitter): The text splitter for the child documents
     Returns:
         retriever (ParentDocumentRetriever): The retriever
@@ -101,12 +93,10 @@ def rag_retriever(vectorstore, store, documents, parent_splitter, child_splitter
         docstore=store,
         child_splitter=child_splitter,
         parent_splitter=parent_splitter,
-        # docs=documents
     )
-    retriever.add_documents(documents)
     # retriever = vectorstore.as_retriever()
-    return retriever

 def process_pdf_document(file_path_list):
     '''
     Process a PDF document and return the documents and text splitters
     Args:
         file_path (str): The path to the PDF document
         parent_chunk_size (int): The size of the parent chunks
         child_chunk_size (int): The size of the child chunks
     Returns:
         documents (list): The list of documents
         parent_splitter (RecursiveCharacterTextSplitter): The text splitter for the parent documents
         child_splitter (RecursiveCharacterTextSplitter): The text splitter for the child documents
     '''
     # # Load the PDF document
     # loader = PyMuPDFLoader(file_path)
 def create_vectorstore(embeddings_model="all-MiniLM-L6-v2"):
     '''
     Create the vectorstore and store for the documents
     Args:
         embeddings_model (HuggingFaceEmbeddings): The embeddings model
         documents (list): The list of documents
     Returns:
         vectorstore (Chroma): The vectorstore
         store (InMemoryStore): The store
     '''
     # Initialize the embedding model
 def rag_retriever(vectorstore, store, documents, parent_splitter, child_splitter):
     '''
     Create the retriever for the RAG model
     Args:
         vectorstore (Chroma): The vectorstore
         store (InMemoryStore): The store
         parent_splitter (RecursiveCharacterTextSplitter): The text splitter for the parent documents
         child_splitter (RecursiveCharacterTextSplitter): The text splitter for the child documents
     Returns:
         retriever (ParentDocumentRetriever): The retriever
         docstore=store,
         child_splitter=child_splitter,
         parent_splitter=parent_splitter,
+        docs=documents
     )
+    # retriever.add_documents(documents)
     # retriever = vectorstore.as_retriever()
+    return retriever