SwatGarg Oritsemisan commited on
Commit
c057c78
1 Parent(s): 57b7a80

Update retrieverV2.py (#5)

Browse files

- Update retrieverV2.py (76bcde657c2c129ef29a3db2bacc09b542481aae)


Co-authored-by: Meggison <Oritsemisan@users.noreply.huggingface.co>

Files changed (1) hide show
  1. retrieverV2.py +3 -13
retrieverV2.py CHANGED
@@ -17,17 +17,14 @@ db_path = 'chroma_db'
17
  def process_pdf_document(file_path_list):
18
  '''
19
  Process a PDF document and return the documents and text splitters
20
-
21
  Args:
22
  file_path (str): The path to the PDF document
23
  parent_chunk_size (int): The size of the parent chunks
24
  child_chunk_size (int): The size of the child chunks
25
-
26
  Returns:
27
  documents (list): The list of documents
28
  parent_splitter (RecursiveCharacterTextSplitter): The text splitter for the parent documents
29
  child_splitter (RecursiveCharacterTextSplitter): The text splitter for the child documents
30
-
31
  '''
32
  # # Load the PDF document
33
  # loader = PyMuPDFLoader(file_path)
@@ -46,15 +43,12 @@ def process_pdf_document(file_path_list):
46
  def create_vectorstore(embeddings_model="all-MiniLM-L6-v2"):
47
  '''
48
  Create the vectorstore and store for the documents
49
-
50
  Args:
51
  embeddings_model (HuggingFaceEmbeddings): The embeddings model
52
  documents (list): The list of documents
53
-
54
  Returns:
55
  vectorstore (Chroma): The vectorstore
56
  store (InMemoryStore): The store
57
-
58
  '''
59
 
60
  # Initialize the embedding model
@@ -84,13 +78,11 @@ def create_vectorstore(embeddings_model="all-MiniLM-L6-v2"):
84
  def rag_retriever(vectorstore, store, documents, parent_splitter, child_splitter):
85
  '''
86
  Create the retriever for the RAG model
87
-
88
  Args:
89
  vectorstore (Chroma): The vectorstore
90
  store (InMemoryStore): The store
91
  parent_splitter (RecursiveCharacterTextSplitter): The text splitter for the parent documents
92
  child_splitter (RecursiveCharacterTextSplitter): The text splitter for the child documents
93
-
94
  Returns:
95
  retriever (ParentDocumentRetriever): The retriever
96
 
@@ -101,12 +93,10 @@ def rag_retriever(vectorstore, store, documents, parent_splitter, child_splitter
101
  docstore=store,
102
  child_splitter=child_splitter,
103
  parent_splitter=parent_splitter,
104
- # docs=documents
105
  )
106
 
107
- retriever.add_documents(documents)
108
  # retriever = vectorstore.as_retriever()
109
 
110
- return retriever
111
-
112
-
 
17
  def process_pdf_document(file_path_list):
18
  '''
19
  Process a PDF document and return the documents and text splitters
 
20
  Args:
21
  file_path (str): The path to the PDF document
22
  parent_chunk_size (int): The size of the parent chunks
23
  child_chunk_size (int): The size of the child chunks
 
24
  Returns:
25
  documents (list): The list of documents
26
  parent_splitter (RecursiveCharacterTextSplitter): The text splitter for the parent documents
27
  child_splitter (RecursiveCharacterTextSplitter): The text splitter for the child documents
 
28
  '''
29
  # # Load the PDF document
30
  # loader = PyMuPDFLoader(file_path)
 
43
  def create_vectorstore(embeddings_model="all-MiniLM-L6-v2"):
44
  '''
45
  Create the vectorstore and store for the documents
 
46
  Args:
47
  embeddings_model (HuggingFaceEmbeddings): The embeddings model
48
  documents (list): The list of documents
 
49
  Returns:
50
  vectorstore (Chroma): The vectorstore
51
  store (InMemoryStore): The store
 
52
  '''
53
 
54
  # Initialize the embedding model
 
78
  def rag_retriever(vectorstore, store, documents, parent_splitter, child_splitter):
79
  '''
80
  Create the retriever for the RAG model
 
81
  Args:
82
  vectorstore (Chroma): The vectorstore
83
  store (InMemoryStore): The store
84
  parent_splitter (RecursiveCharacterTextSplitter): The text splitter for the parent documents
85
  child_splitter (RecursiveCharacterTextSplitter): The text splitter for the child documents
 
86
  Returns:
87
  retriever (ParentDocumentRetriever): The retriever
88
 
 
93
  docstore=store,
94
  child_splitter=child_splitter,
95
  parent_splitter=parent_splitter,
96
+ docs=documents
97
  )
98
 
99
+ # retriever.add_documents(documents)
100
  # retriever = vectorstore.as_retriever()
101
 
102
+ return retriever