Oritsemisan
commited on
Commit
•
76bcde6
1
Parent(s):
53fadf1
Update retrieverV2.py
Browse files- retrieverV2.py +3 -13
retrieverV2.py
CHANGED
@@ -17,17 +17,14 @@ db_path = 'chroma_db'
|
|
17 |
def process_pdf_document(file_path_list):
|
18 |
'''
|
19 |
Process a PDF document and return the documents and text splitters
|
20 |
-
|
21 |
Args:
|
22 |
file_path (str): The path to the PDF document
|
23 |
parent_chunk_size (int): The size of the parent chunks
|
24 |
child_chunk_size (int): The size of the child chunks
|
25 |
-
|
26 |
Returns:
|
27 |
documents (list): The list of documents
|
28 |
parent_splitter (RecursiveCharacterTextSplitter): The text splitter for the parent documents
|
29 |
child_splitter (RecursiveCharacterTextSplitter): The text splitter for the child documents
|
30 |
-
|
31 |
'''
|
32 |
# # Load the PDF document
|
33 |
# loader = PyMuPDFLoader(file_path)
|
@@ -46,15 +43,12 @@ def process_pdf_document(file_path_list):
|
|
46 |
def create_vectorstore(embeddings_model="all-MiniLM-L6-v2"):
|
47 |
'''
|
48 |
Create the vectorstore and store for the documents
|
49 |
-
|
50 |
Args:
|
51 |
embeddings_model (HuggingFaceEmbeddings): The embeddings model
|
52 |
documents (list): The list of documents
|
53 |
-
|
54 |
Returns:
|
55 |
vectorstore (Chroma): The vectorstore
|
56 |
store (InMemoryStore): The store
|
57 |
-
|
58 |
'''
|
59 |
|
60 |
# Initialize the embedding model
|
@@ -84,13 +78,11 @@ def create_vectorstore(embeddings_model="all-MiniLM-L6-v2"):
|
|
84 |
def rag_retriever(vectorstore, store, documents, parent_splitter, child_splitter):
|
85 |
'''
|
86 |
Create the retriever for the RAG model
|
87 |
-
|
88 |
Args:
|
89 |
vectorstore (Chroma): The vectorstore
|
90 |
store (InMemoryStore): The store
|
91 |
parent_splitter (RecursiveCharacterTextSplitter): The text splitter for the parent documents
|
92 |
child_splitter (RecursiveCharacterTextSplitter): The text splitter for the child documents
|
93 |
-
|
94 |
Returns:
|
95 |
retriever (ParentDocumentRetriever): The retriever
|
96 |
|
@@ -101,12 +93,10 @@ def rag_retriever(vectorstore, store, documents, parent_splitter, child_splitter
|
|
101 |
docstore=store,
|
102 |
child_splitter=child_splitter,
|
103 |
parent_splitter=parent_splitter,
|
104 |
-
|
105 |
)
|
106 |
|
107 |
-
retriever.add_documents(documents)
|
108 |
# retriever = vectorstore.as_retriever()
|
109 |
|
110 |
-
return retriever
|
111 |
-
|
112 |
-
|
|
|
17 |
def process_pdf_document(file_path_list):
|
18 |
'''
|
19 |
Process a PDF document and return the documents and text splitters
|
|
|
20 |
Args:
|
21 |
file_path (str): The path to the PDF document
|
22 |
parent_chunk_size (int): The size of the parent chunks
|
23 |
child_chunk_size (int): The size of the child chunks
|
|
|
24 |
Returns:
|
25 |
documents (list): The list of documents
|
26 |
parent_splitter (RecursiveCharacterTextSplitter): The text splitter for the parent documents
|
27 |
child_splitter (RecursiveCharacterTextSplitter): The text splitter for the child documents
|
|
|
28 |
'''
|
29 |
# # Load the PDF document
|
30 |
# loader = PyMuPDFLoader(file_path)
|
|
|
43 |
def create_vectorstore(embeddings_model="all-MiniLM-L6-v2"):
|
44 |
'''
|
45 |
Create the vectorstore and store for the documents
|
|
|
46 |
Args:
|
47 |
embeddings_model (HuggingFaceEmbeddings): The embeddings model
|
48 |
documents (list): The list of documents
|
|
|
49 |
Returns:
|
50 |
vectorstore (Chroma): The vectorstore
|
51 |
store (InMemoryStore): The store
|
|
|
52 |
'''
|
53 |
|
54 |
# Initialize the embedding model
|
|
|
78 |
def rag_retriever(vectorstore, store, documents, parent_splitter, child_splitter):
|
79 |
'''
|
80 |
Create the retriever for the RAG model
|
|
|
81 |
Args:
|
82 |
vectorstore (Chroma): The vectorstore
|
83 |
store (InMemoryStore): The store
|
84 |
parent_splitter (RecursiveCharacterTextSplitter): The text splitter for the parent documents
|
85 |
child_splitter (RecursiveCharacterTextSplitter): The text splitter for the child documents
|
|
|
86 |
Returns:
|
87 |
retriever (ParentDocumentRetriever): The retriever
|
88 |
|
|
|
93 |
docstore=store,
|
94 |
child_splitter=child_splitter,
|
95 |
parent_splitter=parent_splitter,
|
96 |
+
docs=documents
|
97 |
)
|
98 |
|
99 |
+
# retriever.add_documents(documents)
|
100 |
# retriever = vectorstore.as_retriever()
|
101 |
|
102 |
+
return retriever
|
|
|
|