Chris4K commited on
Commit
403222a
1 Parent(s): cbed288

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +15 -1
app.py CHANGED
@@ -19,11 +19,25 @@ print("-----------")
19
  print(documents)
20
  print("-----------")
21
 
 
 
 
 
 
 
 
 
22
 
23
  # Create Chroma vector store for API embeddings
24
- api_db = Chroma.from_documents(documents, api_hf_embeddings, collection_name="api-collection")
25
  #api_db = Chroma.from_texts(documents, api_hf_embeddings, collection_name="api-collection")
26
 
 
 
 
 
 
 
27
  class PDFRetrievalTool:
28
  def __init__(self, retriever):
29
  self.retriever = retriever
 
19
  print(documents)
20
  print("-----------")
21
 
22
+ # Load the document, split it into chunks, embed each chunk and load it into the vector store.
23
+ text_splitter = CharacterTextSplitter(chunk_size=1000, chunk_overlap=0)
24
+ vdocuments = text_splitter.split_documents(documents)
25
+
26
+
27
+
28
+
29
+
30
 
31
  # Create Chroma vector store for API embeddings
32
+ api_db = Chroma.from_documents(vdocuments, api_hf_embeddings, collection_name="api-collection")
33
  #api_db = Chroma.from_texts(documents, api_hf_embeddings, collection_name="api-collection")
34
 
35
+ #Similarity search
36
+ query = "What did the president say about Ketanji Brown Jackson"
37
+ docs = db.similarity_search(query)
38
+ print(docs[0].page_content)
39
+
40
+
41
  class PDFRetrievalTool:
42
  def __init__(self, retriever):
43
  self.retriever = retriever