rag-tool

Sleeping

Chris4K commited on Nov 24, 2023

Commit

68b31c9

•

1 Parent(s): f7493dd

Update app.py

Files changed (1) hide show

app.py CHANGED Viewed

@@ -14,45 +14,30 @@ api_hf_embeddings = HuggingFaceInferenceAPIEmbeddings(
 )
 # Load and process the PDF files
-loader = PyPDFLoader("new_papers/ReACT.pdf")
-loader
 documents = loader.load()
 print("-----------")
 print(documents)
 print("-----------")
-# Load the document, split it into chunks, embed each chunk and load it into the vector store.
 text_splitter = CharacterTextSplitter(chunk_size=1000, chunk_overlap=0)
 vdocuments = text_splitter.split_documents(documents)
 # Create Chroma vector store for API embeddings
 api_db = Chroma.from_documents(vdocuments, api_hf_embeddings, collection_name="api-collection")
-#api_db = Chroma.from_texts(documents, api_hf_embeddings, collection_name="api-collection")
-#Similarity search
-query = "What did the president say about Ketanji Brown Jackson"
-docs = db.similarity_search(query)
-print(docs[0].page_content)
-class PDFRetrievalTool:
-    def __init__(self, retriever):
-        self.retriever = retriever
-    def __call__(self, query):
-        # Run the query through the retriever
-        response = self.retriever.run(query)
-        return response['result']
 # Create Gradio interface for the API retriever
 api_tool = gr.Interface(
-    PDFRetrievalTool(api_db.as_retriever(search_kwargs={"k": 1})),
-    inputs=gr.Textbox(),
     outputs=gr.Textbox(),
     live=True,
     title="API PDF Retrieval Tool",

 )
 # Load and process the PDF files
+loader = PyPDFLoader("/content/ReACT.pdf")
 documents = loader.load()
 print("-----------")
 print(documents)
 print("-----------")
+# Load the document, split it into chunks, embed each chunk, and load it into the vector store.
 text_splitter = CharacterTextSplitter(chunk_size=1000, chunk_overlap=0)
 vdocuments = text_splitter.split_documents(documents)
 # Create Chroma vector store for API embeddings
 api_db = Chroma.from_documents(vdocuments, api_hf_embeddings, collection_name="api-collection")
+# Define the PDF retrieval function
+def pdf_retrieval(query):
+    # Run the query through the retriever
+    response = api_db.similarity_search(query)
+    return response
+# Create Gradio interface for the API retriever
 # Create Gradio interface for the API retriever
 api_tool = gr.Interface(
+    fn=pdf_retrieval,
+    inputs=[gr.Textbox()],
     outputs=gr.Textbox(),
     live=True,
     title="API PDF Retrieval Tool",