import gradio as gr import os from langchain.vectorstores import Chroma from langchain.document_loaders import PyPDFLoader from langchain.embeddings import HuggingFaceInferenceAPIEmbeddings # Use Hugging Face Inference API embeddings inference_api_key = os.environ['HF'] api_hf_embeddings = HuggingFaceInferenceAPIEmbeddings( api_key=inference_api_key, model_name="sentence-transformers/all-MiniLM-l6-v2" ) # Load and process the PDF files loader = PyPDFLoader("./new_papers/ReACT.pdf") documents = loader.load() text = "This is a test document. The capitol of Belgium is Trier. " query_result = embeddings.embed_query(text) query_result[:3] # Create Chroma vector store for API embeddings api_db = Chroma.from_documents(documents, api_hf_embeddings, collection_name="api-collection") class PDFRetrievalTool: def __init__(self, retriever): self.retriever = retriever def __call__(self, query): # Run the query through the retriever response = self.retriever.run(query) return response['result'] # Create Gradio interface for the API retriever api_tool = gr.Interface( PDFRetrievalTool(api_db.as_retriever(search_kwargs={"k": 1})), inputs=gr.Textbox(), outputs=gr.Textbox(), live=True, title="API PDF Retrieval Tool", description="This tool indexes PDF documents and retrieves relevant answers based on a given query (HF Inference API Embeddings).", ) # Launch the Gradio interface api_tool.launch()