|
import gradio as gr |
|
import httpx |
|
import time |
|
from datetime import datetime |
|
from llama_index.core import VectorStoreIndex, SimpleDirectoryReader |
|
from llama_parse import LlamaParse |
|
from llama_index.embeddings.huggingface import HuggingFaceEmbedding |
|
from llama_index.llms.huggingface_api import HuggingFaceInferenceAPI |
|
|
|
|
|
llm = HuggingFaceInferenceAPI(model_name="tiiuae/falcon-7b-instruct") |
|
parser = LlamaParse(api_key='llx-zKtsC5UBLs8DOApOsLluXMBdQhC75ea0Vs80SmPSjsmDzuhh', result_type='markdown') |
|
file_extractor = {'.pdf': parser} |
|
documents = SimpleDirectoryReader('data/', file_extractor=file_extractor).load_data() |
|
|
|
embed_model = HuggingFaceEmbedding(model_name="BAAI/bge-small-en-v1.5") |
|
vector_index = VectorStoreIndex.from_documents(documents, embed_model=embed_model) |
|
query_engine = vector_index.as_query_engine(llm=llm) |
|
|
|
|
|
def query_with_retry(query, max_retries=3, wait_time=5): |
|
for attempt in range(max_retries): |
|
try: |
|
start_time = datetime.now() |
|
response = query_engine.query(query) |
|
end_time = datetime.now() |
|
duration = (end_time - start_time).total_seconds() |
|
return response |
|
except httpx.ReadTimeout: |
|
if attempt < max_retries - 1: |
|
time.sleep(wait_time) |
|
else: |
|
raise |
|
except Exception as e: |
|
return f"An error occurred: {e}" |
|
|
|
|
|
def gradio_interface(query): |
|
response = query_with_retry(query) |
|
return response |
|
|
|
|
|
iface = gr.Interface( |
|
fn=gradio_interface, |
|
inputs=gr.Textbox(label="Ask a question", placeholder="Enter your query here...", lines=2), |
|
outputs=gr.Textbox(label="Response", interactive=False), |
|
title="Document-based Question Answering", |
|
description="Ask questions based on the documents you uploaded. This model answers queries using your uploaded PDFs.", |
|
live=True |
|
) |
|
|
|
iface.launch() |
|
|