#!/usr/bin/env python # coding: utf-8 # # **Q&A system (Using LangChain)** # ## **Setup** # ### Importing libraries # In[ ]: import urllib.request from langchain.document_loaders import PyPDFLoader #for loading .pdf file from langchain.vectorstores import DocArrayInMemorySearch import openai #wrt UI import time import gradio as gr # ### Utilities # In[ ]: def download_pdf(url, output_path): """ download .pdf file from URL & save it at output_path """ urllib.request.urlretrieve(url, output_path) # ## **UI** # In[ ]: from langchain.embeddings import OpenAIEmbeddings from langchain.chains import RetrievalQA from langchain.chat_models import ChatOpenAI import os def get_ans(filename, question, model_to_use, api_key): if(model_to_use=="GPT-3"): os.environ["OPENAI_API_KEY"] = api_key embeddings = OpenAIEmbeddings() llm = ChatOpenAI(temperature = 0) pdf_path = filename loader = PyPDFLoader(pdf_path) docs = loader.load_and_split() db = DocArrayInMemorySearch.from_documents( docs, embeddings ) qa_stuff = RetrievalQA.from_chain_type( retriever=db.as_retriever(), return_source_documents = True, chain_type="stuff", llm=llm, verbose=True ) response = qa_stuff(question) #dict_keys(['query', 'result', 'source_documents']) answer = response["result"] context = response["source_documents"] #formatting context context = "" for i in range(len(response["source_documents"])): source_document_path = response["source_documents"][i].metadata["source"] page_number = str(response["source_documents"][i].metadata["page"]) context += "\n" + "#"*50 + "\n" context += f"Relevant source text: {source_document_path}, page {page_number}"+"\n"+"#"*50+"\n" context += response["source_documents"][i].page_content + "\n" return(answer, context) def question_answer(url, file, question, model_to_use, api_key): start_time = time.perf_counter() if url.strip() == "" and file == None: return "[ERROR]: Both URL and PDF is empty. Provide atleast one." if url.strip() != "" and file != None: return "[ERROR]: Both URL and PDF is provided. Please provide only one (eiter URL or PDF)." if question.strip() == "": return "[ERROR]: Question field is empty" if url.strip() != "": glob_url = url download_pdf(glob_url, "document.pdf") filename="document.pdf" else: filename = file.name answer, context = get_ans(filename, question, model_to_use, api_key) end_time = time.perf_counter() exec_time = end_time - start_time #second return(answer, context, exec_time) # In[ ]: title = "Question & Answering System: Ask a Pdf" description = """ This Q&A System allows you to input an entire document & ask questions about its contents. This system has ability to add reference to the specific page number from where the information was found. This adds credibility to the answers generated & also helps you locate the relevant information in the document.\n Disclaimer: This application is only an interface for you to upload your data & select the relevant model. Please be conscious of using this responsibly. The data, model, & API key belong to the respective owners. The application owner doesn't take any responsibility for any unintended consequence of use of this application. App owner: Ishant A """ with gr.Blocks() as demo: gr.Markdown(f"

{title}

") gr.Markdown(description) with gr.Row(): with gr.Group(): url = gr.Textbox(label="URL (Eg: 'https://clinicaltrials.gov/ProvidedDocs/00/NCT02415400/Prot_000.pdf')") gr.Markdown("
or
") file = gr.File(label='PDF', file_types=['.pdf']) question = gr.Textbox(label="question (Eg: 'When to perform randomization')") model_to_use = gr.Dropdown(["GPT-3"], value="GPT-3", label="model_to_use") api_key = gr.Textbox(label="Enter API key (if using GPT-3 to avoid error)") btn = gr.Button(value='Submit') btn.style(full_width=True) with gr.Group(): exec_time = gr.Textbox(label="execution time (s)") answer = gr.Textbox(label="answer") context = gr.Textbox(label="Relevant chunks within document (Context)") btn.click(question_answer, inputs=[url, file, question, model_to_use, api_key], outputs=[answer, context, exec_time]) demo.queue() demo.launch()