rag-tool

Sleeping

Chris4K commited on Nov 23, 2023

Commit

403222a

•

1 Parent(s): cbed288

Update app.py

Files changed (1) hide show

app.py CHANGED Viewed

@@ -19,11 +19,25 @@ print("-----------")
 print(documents)
 print("-----------")
 # Create Chroma vector store for API embeddings
-api_db = Chroma.from_documents(documents, api_hf_embeddings, collection_name="api-collection")
 #api_db = Chroma.from_texts(documents, api_hf_embeddings, collection_name="api-collection")
 class PDFRetrievalTool:
     def __init__(self, retriever):
         self.retriever = retriever

 print(documents)
 print("-----------")
+# Load the document, split it into chunks, embed each chunk and load it into the vector store.
+text_splitter = CharacterTextSplitter(chunk_size=1000, chunk_overlap=0)
+vdocuments = text_splitter.split_documents(documents)
 # Create Chroma vector store for API embeddings
+api_db = Chroma.from_documents(vdocuments, api_hf_embeddings, collection_name="api-collection")
 #api_db = Chroma.from_texts(documents, api_hf_embeddings, collection_name="api-collection")
+#Similarity search
+query = "What did the president say about Ketanji Brown Jackson"
+docs = db.similarity_search(query)
+print(docs[0].page_content)
 class PDFRetrievalTool:
     def __init__(self, retriever):
         self.retriever = retriever