Update app.py
Browse files
app.py
CHANGED
@@ -1,44 +1,41 @@
|
|
1 |
import gradio as gr
|
|
|
2 |
from langchain.vectorstores import Chroma
|
3 |
from langchain.document_loaders import PyPDFLoader
|
4 |
-
from langchain.embeddings import
|
5 |
|
6 |
-
#
|
7 |
-
|
8 |
-
|
9 |
-
|
10 |
-
|
11 |
)
|
12 |
-
# Add a padding token to the tokenizer
|
13 |
-
hf.tokenizer.add_special_tokens({'pad_token': '[PAD]'})
|
14 |
|
15 |
# Load and process the PDF files
|
16 |
loader = PyPDFLoader("./new_papers/ReACT.pdf")
|
17 |
-
|
18 |
-
#loader = PyPDFLoader('./new_papers/', glob="./*.pdf")
|
19 |
documents = loader.load()
|
20 |
|
21 |
-
# Create
|
22 |
-
|
23 |
|
24 |
class PDFRetrievalTool:
|
25 |
-
def __init__(self):
|
26 |
-
self.retriever =
|
27 |
|
28 |
def __call__(self, query):
|
29 |
# Run the query through the retriever
|
30 |
response = self.retriever.run(query)
|
31 |
return response['result']
|
32 |
|
33 |
-
# Create
|
34 |
-
|
35 |
-
PDFRetrievalTool(),
|
36 |
inputs=gr.Textbox(),
|
37 |
outputs=gr.Textbox(),
|
38 |
live=True,
|
39 |
-
title="PDF Retrieval Tool",
|
40 |
-
description="This tool indexes PDF documents and retrieves relevant answers based on a given query.",
|
41 |
)
|
42 |
|
43 |
# Launch the Gradio interface
|
44 |
-
|
|
|
1 |
import gradio as gr
|
2 |
+
import os
|
3 |
from langchain.vectorstores import Chroma
|
4 |
from langchain.document_loaders import PyPDFLoader
|
5 |
+
from langchain.embeddings import HuggingFaceInferenceAPIEmbeddings
|
6 |
|
7 |
+
# Use Hugging Face Inference API embeddings
|
8 |
+
inference_api_key = os.environ['hf']
|
9 |
+
api_hf_embeddings = HuggingFaceInferenceAPIEmbeddings(
|
10 |
+
api_key=inference_api_key,
|
11 |
+
model_name="sentence-transformers/all-MiniLM-l6-v2"
|
12 |
)
|
|
|
|
|
13 |
|
14 |
# Load and process the PDF files
|
15 |
loader = PyPDFLoader("./new_papers/ReACT.pdf")
|
|
|
|
|
16 |
documents = loader.load()
|
17 |
|
18 |
+
# Create Chroma vector store for API embeddings
|
19 |
+
api_db = Chroma.from_documents(documents, api_hf_embeddings, collection_name="api-collection")
|
20 |
|
21 |
class PDFRetrievalTool:
|
22 |
+
def __init__(self, retriever):
|
23 |
+
self.retriever = retriever
|
24 |
|
25 |
def __call__(self, query):
|
26 |
# Run the query through the retriever
|
27 |
response = self.retriever.run(query)
|
28 |
return response['result']
|
29 |
|
30 |
+
# Create Gradio interface for the API retriever
|
31 |
+
api_tool = gr.Interface(
|
32 |
+
PDFRetrievalTool(api_db.as_retriever(search_kwargs={"k": 1})),
|
33 |
inputs=gr.Textbox(),
|
34 |
outputs=gr.Textbox(),
|
35 |
live=True,
|
36 |
+
title="API PDF Retrieval Tool",
|
37 |
+
description="This tool indexes PDF documents and retrieves relevant answers based on a given query (HF Inference API Embeddings).",
|
38 |
)
|
39 |
|
40 |
# Launch the Gradio interface
|
41 |
+
api_tool.launch()
|