Chris4K commited on
Commit
037c950
1 Parent(s): ae1abcc

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +17 -20
app.py CHANGED
@@ -1,44 +1,41 @@
1
  import gradio as gr
 
2
  from langchain.vectorstores import Chroma
3
  from langchain.document_loaders import PyPDFLoader
4
- from langchain.embeddings import HuggingFaceInstructEmbeddings
5
 
6
- # Initialize the HuggingFaceInstructEmbeddings
7
- hf = HuggingFaceInstructEmbeddings(
8
- model_name="gpt2",
9
- embed_instruction="Represent the document for retrieval: ",
10
- query_instruction="Represent the query for retrieval: "
11
  )
12
- # Add a padding token to the tokenizer
13
- hf.tokenizer.add_special_tokens({'pad_token': '[PAD]'})
14
 
15
  # Load and process the PDF files
16
  loader = PyPDFLoader("./new_papers/ReACT.pdf")
17
-
18
- #loader = PyPDFLoader('./new_papers/', glob="./*.pdf")
19
  documents = loader.load()
20
 
21
- # Create a Chroma vector store from the PDF documents
22
- db = Chroma.from_documents(documents, hf, collection_name="my-collection")
23
 
24
  class PDFRetrievalTool:
25
- def __init__(self):
26
- self.retriever = db.as_retriever(search_kwargs={"k": 1})
27
 
28
  def __call__(self, query):
29
  # Run the query through the retriever
30
  response = self.retriever.run(query)
31
  return response['result']
32
 
33
- # Create the Gradio interface using the PDFRetrievalTool
34
- tool = gr.Interface(
35
- PDFRetrievalTool(),
36
  inputs=gr.Textbox(),
37
  outputs=gr.Textbox(),
38
  live=True,
39
- title="PDF Retrieval Tool",
40
- description="This tool indexes PDF documents and retrieves relevant answers based on a given query.",
41
  )
42
 
43
  # Launch the Gradio interface
44
- tool.launch()
 
1
  import gradio as gr
2
+ import os
3
  from langchain.vectorstores import Chroma
4
  from langchain.document_loaders import PyPDFLoader
5
+ from langchain.embeddings import HuggingFaceInferenceAPIEmbeddings
6
 
7
+ # Use Hugging Face Inference API embeddings
8
+ inference_api_key = os.environ['hf']
9
+ api_hf_embeddings = HuggingFaceInferenceAPIEmbeddings(
10
+ api_key=inference_api_key,
11
+ model_name="sentence-transformers/all-MiniLM-l6-v2"
12
  )
 
 
13
 
14
  # Load and process the PDF files
15
  loader = PyPDFLoader("./new_papers/ReACT.pdf")
 
 
16
  documents = loader.load()
17
 
18
+ # Create Chroma vector store for API embeddings
19
+ api_db = Chroma.from_documents(documents, api_hf_embeddings, collection_name="api-collection")
20
 
21
  class PDFRetrievalTool:
22
+ def __init__(self, retriever):
23
+ self.retriever = retriever
24
 
25
  def __call__(self, query):
26
  # Run the query through the retriever
27
  response = self.retriever.run(query)
28
  return response['result']
29
 
30
+ # Create Gradio interface for the API retriever
31
+ api_tool = gr.Interface(
32
+ PDFRetrievalTool(api_db.as_retriever(search_kwargs={"k": 1})),
33
  inputs=gr.Textbox(),
34
  outputs=gr.Textbox(),
35
  live=True,
36
+ title="API PDF Retrieval Tool",
37
+ description="This tool indexes PDF documents and retrieves relevant answers based on a given query (HF Inference API Embeddings).",
38
  )
39
 
40
  # Launch the Gradio interface
41
+ api_tool.launch()