kanishka089's picture
Update app.py
794af9d verified
raw
history blame contribute delete
No virus
5.07 kB
import os
import requests
import tempfile
import gradio as gr
from dotenv import load_dotenv
from huggingface_hub import InferenceClient
from langchain.text_splitter import RecursiveCharacterTextSplitter
from langchain_community.document_loaders import pdf
from langchain_community.embeddings import OllamaEmbeddings
from langchain_community.vectorstores import Chroma
from langchain_core.vectorstores import VectorStoreRetriever
from langchain.embeddings import GPT4AllEmbeddings
load_dotenv('secret.env') # remove string if hosting in huggingface
token = os.getenv('HUGGINGFACE_TOKEN')
client = InferenceClient(
"meta-llama/Meta-Llama-3-8B-Instruct",
token=token,
)
model_name = "all-MiniLM-L6-v2.gguf2.f16.gguf"
gpt4all_kwargs = {'allow_download': 'false'}
# Function to download the PDF from a URL and load documents
def loadAndRetrieveDocuments(url: str, local_file_path: str) -> VectorStoreRetriever:
try:
# Attempt to download PDF
response = requests.get(url)
response.raise_for_status() # Ensure we notice bad responses
# Save PDF to a temporary file
with tempfile.NamedTemporaryFile(delete=False, suffix=".pdf") as temp_file:
temp_file.write(response.content)
temp_pdf_path = temp_file.name
except Exception as e:
print(f"Failed to download PDF from URL: {e}")
# Use local file if URL download fails
temp_pdf_path = local_file_path
# Load the PDF from the temporary file
loader = pdf.PyPDFLoader(temp_pdf_path)
documents = loader.load()
# Clean up temporary file if created
if temp_pdf_path != local_file_path:
os.remove(temp_pdf_path)
# Process documents
textSplitter = RecursiveCharacterTextSplitter(chunk_size=1000, chunk_overlap=200)
documentSplits = textSplitter.split_documents(documents)
vectorStore = Chroma.from_documents(documents=documentSplits, embedding=GPT4AllEmbeddings(model_name=model_name,
gpt4all_kwargs=gpt4all_kwargs))
return vectorStore.as_retriever()
def formatDocuments(documents: list) -> str:
return "\n\n".join(document.page_content for document in documents)
# Define URL and local file path
url = "http://www.parliament.lk/files/pdf/constitution.pdf"
local_file_path = "constitution.pdf" # Local file path
# Load documents from URL or local file
retriever = loadAndRetrieveDocuments(url, local_file_path)
# Chat history
chat_history = []
def ragChain(question: str) -> str:
global chat_history
retrievedDocuments = retriever.invoke(question)
formattedContext = formatDocuments(retrievedDocuments)
formattedPrompt = (f"Question: {question}\n\n"
f"Context: {formattedContext}\n\n"
f"Please provide a detailed answer based solely on the provided context.")
messages = chat_history + [{"role": "user", "content": formattedPrompt}]
response = client.chat_completion(
messages=messages,
max_tokens=700,
stream=False
)
# Extract the generated response text using dataclass attributes
generated_text = ""
if response and response.choices:
generated_text = response.choices[0].message.content
# Update chat history
chat_history.append({"role": "user", "content": formattedPrompt})
chat_history.append({"role": "assistant", "content": generated_text})
return generated_text or "No response generated"
# Gradio interface
with gr.Blocks() as demo:
with gr.Row():
with gr.Column():
textbox = gr.Textbox(label="Question")
with gr.Row():
buttonTerms = gr.Button("Terms of use")
button = gr.Button("Submit")
with gr.Column():
output = gr.Textbox(label="Output", lines=25)
def on_button_click(question):
# Call the ragChain function with the question
answer = ragChain(question)
return answer
def on_term_button_click():
return ("The information provided by this application is generated using advanced technologies, including "
"natural language processing models, document retrieval systems, and embeddings-based search "
"algorithms. While these technologies are designed to offer accurate and relevant information, "
"they may not always be up-to-date or fully accurate.The owner of this application does not accept "
"any responsibility for potential inaccuracies, misleading information, or any consequences that may "
"arise from the use of the application. Users are encouraged to verify the information independently "
"and consult additional sources when making decisions based on the information provided by this app.")
# Bind the button to the function
button.click(on_button_click, inputs=textbox, outputs=output)
buttonTerms.click(on_term_button_click, outputs=output)
demo.launch()