Chris4K commited on
Commit
018fb30
1 Parent(s): 11501be

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +39 -3
app.py CHANGED
@@ -1,4 +1,40 @@
1
- from transformers.tools.base import launch_gradio_demo
2
- from vector_store_retriever import VectorStoreRetrievalTool
 
 
3
 
4
- launch_gradio_demo(VectorStoreRetrievalTool)
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import gradio as gr
2
+ from langchain.vectorstores import Chroma
3
+ from langchain.document_loaders import PyPDFLoader
4
+ from langchain.embeddings import HuggingFaceInstructEmbeddings
5
 
6
+ # Initialize the HuggingFaceInstructEmbeddings
7
+ hf = HuggingFaceInstructEmbeddings(
8
+ model_name="hkunlp/instructor-large",
9
+ embed_instruction="Represent the document for retrieval: ",
10
+ query_instruction="Represent the query for retrieval: "
11
+ )
12
+
13
+ # Load and process the PDF files
14
+ loader = PyPDFLoader('./new_papers/', glob="./*.pdf")
15
+ documents = loader.load()
16
+
17
+ # Create a Chroma vector store from the PDF documents
18
+ db = Chroma.from_documents(documents, hf, collection_name="my-collection")
19
+
20
+ class PDFRetrievalTool:
21
+ def __init__(self):
22
+ self.retriever = db.as_retriever(search_kwargs={"k": 1})
23
+
24
+ def __call__(self, query):
25
+ # Run the query through the retriever
26
+ response = self.retriever.run(query)
27
+ return response['result']
28
+
29
+ # Create the Gradio interface using the PDFRetrievalTool
30
+ tool = gr.Interface(
31
+ PDFRetrievalTool(),
32
+ inputs=gr.Textbox(),
33
+ outputs=gr.Textbox(),
34
+ live=True,
35
+ title="PDF Retrieval Tool",
36
+ description="This tool indexes PDF documents and retrieves relevant answers based on a given query.",
37
+ )
38
+
39
+ # Launch the Gradio interface
40
+ tool.launch()