Chris4K commited on
Commit
f8472cb
1 Parent(s): c99439e

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +7 -4
app.py CHANGED
@@ -17,21 +17,24 @@ api_hf_embeddings = HuggingFaceInferenceAPIEmbeddings(
17
  loader = PyPDFLoader("./new_papers/ReACT.pdf")
18
  documents = loader.load()
19
  print("-----------")
20
- print(documents)
21
  print("-----------")
22
 
23
  # Load the document, split it into chunks, embed each chunk, and load it into the vector store.
24
- text_splitter = CharacterTextSplitter(chunk_size=1000, chunk_overlap=0)
25
  vdocuments = text_splitter.split_documents(documents)
26
 
27
  # Add these lines before creating the Chroma vector store
28
  #print("Length of embeddings: %s", len(api_hf_embeddings))
29
  print("Length of documents: %s", len(documents))
30
  print("Length of vdocuments: %s", len(vdocuments))
31
- if vdocuments:
32
- first_document_embeddings = vdocuments[0].embeddings
 
 
33
  print("Length of embeddings for the first document: %s", len(first_document_embeddings))
34
 
 
35
  # Create Chroma vector store for API embeddings
36
  api_db = Chroma.from_documents(vdocuments, api_hf_embeddings, collection_name="api-collection")
37
 
 
17
  loader = PyPDFLoader("./new_papers/ReACT.pdf")
18
  documents = loader.load()
19
  print("-----------")
20
+ print(documents[0])
21
  print("-----------")
22
 
23
  # Load the document, split it into chunks, embed each chunk, and load it into the vector store.
24
+ text_splitter = CharacterTextSplitter(chunk_size=100, chunk_overlap=0)
25
  vdocuments = text_splitter.split_documents(documents)
26
 
27
  # Add these lines before creating the Chroma vector store
28
  #print("Length of embeddings: %s", len(api_hf_embeddings))
29
  print("Length of documents: %s", len(documents))
30
  print("Length of vdocuments: %s", len(vdocuments))
31
+ # Add these lines before creating the Chroma vector store
32
+ #logger.debug("Length of vdocuments: %s", len(vdocuments))
33
+ if vdocuments and 'embeddings' in vdocuments[0]:
34
+ first_document_embeddings = vdocuments[0]['embeddings']
35
  print("Length of embeddings for the first document: %s", len(first_document_embeddings))
36
 
37
+
38
  # Create Chroma vector store for API embeddings
39
  api_db = Chroma.from_documents(vdocuments, api_hf_embeddings, collection_name="api-collection")
40