Spaces:
Runtime error
Runtime error
File size: 5,071 Bytes
190fd0b 794af9d 190fd0b 388b261 90cfba0 190fd0b 388b261 90cfba0 794af9d 190fd0b 794af9d 190fd0b 90cfba0 190fd0b 794af9d 190fd0b 90cfba0 190fd0b b9f5ac7 794af9d 190fd0b 90cfba0 190fd0b 794af9d 190fd0b 90cfba0 190fd0b b9f5ac7 794af9d b9f5ac7 794af9d b9f5ac7 190fd0b b9f5ac7 |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 |
import os
import requests
import tempfile
import gradio as gr
from dotenv import load_dotenv
from huggingface_hub import InferenceClient
from langchain.text_splitter import RecursiveCharacterTextSplitter
from langchain_community.document_loaders import pdf
from langchain_community.embeddings import OllamaEmbeddings
from langchain_community.vectorstores import Chroma
from langchain_core.vectorstores import VectorStoreRetriever
from langchain.embeddings import GPT4AllEmbeddings
load_dotenv('secret.env') # remove string if hosting in huggingface
token = os.getenv('HUGGINGFACE_TOKEN')
client = InferenceClient(
"meta-llama/Meta-Llama-3-8B-Instruct",
token=token,
)
model_name = "all-MiniLM-L6-v2.gguf2.f16.gguf"
gpt4all_kwargs = {'allow_download': 'false'}
# Function to download the PDF from a URL and load documents
def loadAndRetrieveDocuments(url: str, local_file_path: str) -> VectorStoreRetriever:
try:
# Attempt to download PDF
response = requests.get(url)
response.raise_for_status() # Ensure we notice bad responses
# Save PDF to a temporary file
with tempfile.NamedTemporaryFile(delete=False, suffix=".pdf") as temp_file:
temp_file.write(response.content)
temp_pdf_path = temp_file.name
except Exception as e:
print(f"Failed to download PDF from URL: {e}")
# Use local file if URL download fails
temp_pdf_path = local_file_path
# Load the PDF from the temporary file
loader = pdf.PyPDFLoader(temp_pdf_path)
documents = loader.load()
# Clean up temporary file if created
if temp_pdf_path != local_file_path:
os.remove(temp_pdf_path)
# Process documents
textSplitter = RecursiveCharacterTextSplitter(chunk_size=1000, chunk_overlap=200)
documentSplits = textSplitter.split_documents(documents)
vectorStore = Chroma.from_documents(documents=documentSplits, embedding=GPT4AllEmbeddings(model_name=model_name,
gpt4all_kwargs=gpt4all_kwargs))
return vectorStore.as_retriever()
def formatDocuments(documents: list) -> str:
return "\n\n".join(document.page_content for document in documents)
# Define URL and local file path
url = "http://www.parliament.lk/files/pdf/constitution.pdf"
local_file_path = "constitution.pdf" # Local file path
# Load documents from URL or local file
retriever = loadAndRetrieveDocuments(url, local_file_path)
# Chat history
chat_history = []
def ragChain(question: str) -> str:
global chat_history
retrievedDocuments = retriever.invoke(question)
formattedContext = formatDocuments(retrievedDocuments)
formattedPrompt = (f"Question: {question}\n\n"
f"Context: {formattedContext}\n\n"
f"Please provide a detailed answer based solely on the provided context.")
messages = chat_history + [{"role": "user", "content": formattedPrompt}]
response = client.chat_completion(
messages=messages,
max_tokens=700,
stream=False
)
# Extract the generated response text using dataclass attributes
generated_text = ""
if response and response.choices:
generated_text = response.choices[0].message.content
# Update chat history
chat_history.append({"role": "user", "content": formattedPrompt})
chat_history.append({"role": "assistant", "content": generated_text})
return generated_text or "No response generated"
# Gradio interface
with gr.Blocks() as demo:
with gr.Row():
with gr.Column():
textbox = gr.Textbox(label="Question")
with gr.Row():
buttonTerms = gr.Button("Terms of use")
button = gr.Button("Submit")
with gr.Column():
output = gr.Textbox(label="Output", lines=25)
def on_button_click(question):
# Call the ragChain function with the question
answer = ragChain(question)
return answer
def on_term_button_click():
return ("The information provided by this application is generated using advanced technologies, including "
"natural language processing models, document retrieval systems, and embeddings-based search "
"algorithms. While these technologies are designed to offer accurate and relevant information, "
"they may not always be up-to-date or fully accurate.The owner of this application does not accept "
"any responsibility for potential inaccuracies, misleading information, or any consequences that may "
"arise from the use of the application. Users are encouraged to verify the information independently "
"and consult additional sources when making decisions based on the information provided by this app.")
# Bind the button to the function
button.click(on_button_click, inputs=textbox, outputs=output)
buttonTerms.click(on_term_button_click, outputs=output)
demo.launch()
|