rag-tool

Sleeping

Chris4K commited on Nov 24, 2023

Commit

f8472cb

•

1 Parent(s): c99439e

Update app.py

Files changed (1) hide show

app.py CHANGED Viewed

@@ -17,21 +17,24 @@ api_hf_embeddings = HuggingFaceInferenceAPIEmbeddings(
 loader = PyPDFLoader("./new_papers/ReACT.pdf")
 documents = loader.load()
 print("-----------")
-print(documents)
 print("-----------")
 # Load the document, split it into chunks, embed each chunk, and load it into the vector store.
-text_splitter = CharacterTextSplitter(chunk_size=1000, chunk_overlap=0)
 vdocuments = text_splitter.split_documents(documents)
 # Add these lines before creating the Chroma vector store
 #print("Length of embeddings: %s", len(api_hf_embeddings))
 print("Length of documents: %s", len(documents))
 print("Length of vdocuments: %s", len(vdocuments))
-if vdocuments:
-    first_document_embeddings = vdocuments[0].embeddings
     print("Length of embeddings for the first document: %s", len(first_document_embeddings))
 # Create Chroma vector store for API embeddings
 api_db = Chroma.from_documents(vdocuments, api_hf_embeddings, collection_name="api-collection")

 loader = PyPDFLoader("./new_papers/ReACT.pdf")
 documents = loader.load()
 print("-----------")
+print(documents[0])
 print("-----------")
 # Load the document, split it into chunks, embed each chunk, and load it into the vector store.
+text_splitter = CharacterTextSplitter(chunk_size=100, chunk_overlap=0)
 vdocuments = text_splitter.split_documents(documents)
 # Add these lines before creating the Chroma vector store
 #print("Length of embeddings: %s", len(api_hf_embeddings))
 print("Length of documents: %s", len(documents))
 print("Length of vdocuments: %s", len(vdocuments))
+# Add these lines before creating the Chroma vector store
+#logger.debug("Length of vdocuments: %s", len(vdocuments))
+if vdocuments and 'embeddings' in vdocuments[0]:
+    first_document_embeddings = vdocuments[0]['embeddings']
     print("Length of embeddings for the first document: %s", len(first_document_embeddings))
 # Create Chroma vector store for API embeddings
 api_db = Chroma.from_documents(vdocuments, api_hf_embeddings, collection_name="api-collection")