Chris4K commited on
Commit
68b31c9
1 Parent(s): f7493dd

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +10 -25
app.py CHANGED
@@ -14,45 +14,30 @@ api_hf_embeddings = HuggingFaceInferenceAPIEmbeddings(
14
  )
15
 
16
  # Load and process the PDF files
17
- loader = PyPDFLoader("new_papers/ReACT.pdf")
18
- loader
19
  documents = loader.load()
20
  print("-----------")
21
  print(documents)
22
  print("-----------")
23
 
24
- # Load the document, split it into chunks, embed each chunk and load it into the vector store.
25
  text_splitter = CharacterTextSplitter(chunk_size=1000, chunk_overlap=0)
26
  vdocuments = text_splitter.split_documents(documents)
27
 
28
-
29
-
30
-
31
-
32
-
33
  # Create Chroma vector store for API embeddings
34
  api_db = Chroma.from_documents(vdocuments, api_hf_embeddings, collection_name="api-collection")
35
- #api_db = Chroma.from_texts(documents, api_hf_embeddings, collection_name="api-collection")
36
 
37
- #Similarity search
38
- query = "What did the president say about Ketanji Brown Jackson"
39
- docs = db.similarity_search(query)
40
- print(docs[0].page_content)
41
-
42
-
43
- class PDFRetrievalTool:
44
- def __init__(self, retriever):
45
- self.retriever = retriever
46
-
47
- def __call__(self, query):
48
- # Run the query through the retriever
49
- response = self.retriever.run(query)
50
- return response['result']
51
 
 
52
  # Create Gradio interface for the API retriever
53
  api_tool = gr.Interface(
54
- PDFRetrievalTool(api_db.as_retriever(search_kwargs={"k": 1})),
55
- inputs=gr.Textbox(),
56
  outputs=gr.Textbox(),
57
  live=True,
58
  title="API PDF Retrieval Tool",
 
14
  )
15
 
16
  # Load and process the PDF files
17
+ loader = PyPDFLoader("/content/ReACT.pdf")
 
18
  documents = loader.load()
19
  print("-----------")
20
  print(documents)
21
  print("-----------")
22
 
23
+ # Load the document, split it into chunks, embed each chunk, and load it into the vector store.
24
  text_splitter = CharacterTextSplitter(chunk_size=1000, chunk_overlap=0)
25
  vdocuments = text_splitter.split_documents(documents)
26
 
 
 
 
 
 
27
  # Create Chroma vector store for API embeddings
28
  api_db = Chroma.from_documents(vdocuments, api_hf_embeddings, collection_name="api-collection")
 
29
 
30
+ # Define the PDF retrieval function
31
+ def pdf_retrieval(query):
32
+ # Run the query through the retriever
33
+ response = api_db.similarity_search(query)
34
+ return response
 
 
 
 
 
 
 
 
 
35
 
36
+ # Create Gradio interface for the API retriever
37
  # Create Gradio interface for the API retriever
38
  api_tool = gr.Interface(
39
+ fn=pdf_retrieval,
40
+ inputs=[gr.Textbox()],
41
  outputs=gr.Textbox(),
42
  live=True,
43
  title="API PDF Retrieval Tool",