import os import requests import tempfile import gradio as gr from dotenv import load_dotenv from huggingface_hub import InferenceClient from langchain.text_splitter import RecursiveCharacterTextSplitter from langchain_community.document_loaders import pdf from langchain_community.embeddings import OllamaEmbeddings from langchain_community.vectorstores import Chroma from langchain_core.vectorstores import VectorStoreRetriever from langchain.embeddings import GPT4AllEmbeddings load_dotenv('secret.env') # remove string if hosting in huggingface token = os.getenv('HUGGINGFACE_TOKEN') client = InferenceClient( "meta-llama/Meta-Llama-3-8B-Instruct", token=token, ) model_name = "all-MiniLM-L6-v2.gguf2.f16.gguf" gpt4all_kwargs = {'allow_download': 'false'} # Function to download the PDF from a URL and load documents def loadAndRetrieveDocuments(url: str, local_file_path: str) -> VectorStoreRetriever: try: # Attempt to download PDF response = requests.get(url) response.raise_for_status() # Ensure we notice bad responses # Save PDF to a temporary file with tempfile.NamedTemporaryFile(delete=False, suffix=".pdf") as temp_file: temp_file.write(response.content) temp_pdf_path = temp_file.name except Exception as e: print(f"Failed to download PDF from URL: {e}") # Use local file if URL download fails temp_pdf_path = local_file_path # Load the PDF from the temporary file loader = pdf.PyPDFLoader(temp_pdf_path) documents = loader.load() # Clean up temporary file if created if temp_pdf_path != local_file_path: os.remove(temp_pdf_path) # Process documents textSplitter = RecursiveCharacterTextSplitter(chunk_size=1000, chunk_overlap=200) documentSplits = textSplitter.split_documents(documents) vectorStore = Chroma.from_documents(documents=documentSplits, embedding=GPT4AllEmbeddings(model_name=model_name, gpt4all_kwargs=gpt4all_kwargs)) return vectorStore.as_retriever() def formatDocuments(documents: list) -> str: return "\n\n".join(document.page_content for document in documents) # Define URL and local file path url = "http://www.parliament.lk/files/pdf/constitution.pdf" local_file_path = "constitution.pdf" # Local file path # Load documents from URL or local file retriever = loadAndRetrieveDocuments(url, local_file_path) # Chat history chat_history = [] def ragChain(question: str) -> str: global chat_history retrievedDocuments = retriever.invoke(question) formattedContext = formatDocuments(retrievedDocuments) formattedPrompt = (f"Question: {question}\n\n" f"Context: {formattedContext}\n\n" f"Please provide a detailed answer based solely on the provided context.") messages = chat_history + [{"role": "user", "content": formattedPrompt}] response = client.chat_completion( messages=messages, max_tokens=700, stream=False ) # Extract the generated response text using dataclass attributes generated_text = "" if response and response.choices: generated_text = response.choices[0].message.content # Update chat history chat_history.append({"role": "user", "content": formattedPrompt}) chat_history.append({"role": "assistant", "content": generated_text}) return generated_text or "No response generated" # Gradio interface with gr.Blocks() as demo: with gr.Row(): with gr.Column(): textbox = gr.Textbox(label="Question") with gr.Row(): buttonTerms = gr.Button("Terms of use") button = gr.Button("Submit") with gr.Column(): output = gr.Textbox(label="Output", lines=25) def on_button_click(question): # Call the ragChain function with the question answer = ragChain(question) return answer def on_term_button_click(): return ("The information provided by this application is generated using advanced technologies, including " "natural language processing models, document retrieval systems, and embeddings-based search " "algorithms. While these technologies are designed to offer accurate and relevant information, " "they may not always be up-to-date or fully accurate.The owner of this application does not accept " "any responsibility for potential inaccuracies, misleading information, or any consequences that may " "arise from the use of the application. Users are encouraged to verify the information independently " "and consult additional sources when making decisions based on the information provided by this app.") # Bind the button to the function button.click(on_button_click, inputs=textbox, outputs=output) buttonTerms.click(on_term_button_click, outputs=output) demo.launch()