#!/usr/bin/env python # coding: utf-8 # # **Q&A system (Using LangChain)** # ## **Setup** # ### Importing libraries # In[ ]: import urllib.request from langchain.document_loaders import PyPDFLoader #for loading .pdf file from langchain.vectorstores import DocArrayInMemorySearch import openai #wrt UI import time import gradio as gr # ### Utilities # In[ ]: def download_pdf(url, output_path): """ download .pdf file from URL & save it at output_path """ urllib.request.urlretrieve(url, output_path) # ## **UI** # In[ ]: from langchain.embeddings import OpenAIEmbeddings from langchain.chains import RetrievalQA from langchain.chat_models import ChatOpenAI import os def get_ans(filename, question, model_to_use, api_key): if(model_to_use=="GPT-3"): os.environ["OPENAI_API_KEY"] = api_key embeddings = OpenAIEmbeddings() llm = ChatOpenAI(temperature = 0) pdf_path = filename loader = PyPDFLoader(pdf_path) docs = loader.load_and_split() db = DocArrayInMemorySearch.from_documents( docs, embeddings ) qa_stuff = RetrievalQA.from_chain_type( retriever=db.as_retriever(), return_source_documents = True, chain_type="stuff", llm=llm, verbose=True ) response = qa_stuff(question) #dict_keys(['query', 'result', 'source_documents']) answer = response["result"] context = response["source_documents"] #formatting context context = "" for i in range(len(response["source_documents"])): source_document_path = response["source_documents"][i].metadata["source"] page_number = str(response["source_documents"][i].metadata["page"]) context += "\n" + "#"*50 + "\n" context += f"Relevant source text: {source_document_path}, page {page_number}"+"\n"+"#"*50+"\n" context += response["source_documents"][i].page_content + "\n" return(answer, context) def question_answer(url, file, question, model_to_use, api_key): start_time = time.perf_counter() if url.strip() == "" and file == None: return "[ERROR]: Both URL and PDF is empty. Provide atleast one." if url.strip() != "" and file != None: return "[ERROR]: Both URL and PDF is provided. Please provide only one (eiter URL or PDF)." if question.strip() == "": return "[ERROR]: Question field is empty" if url.strip() != "": glob_url = url download_pdf(glob_url, "document.pdf") filename="document.pdf" else: filename = file.name answer, context = get_ans(filename, question, model_to_use, api_key) end_time = time.perf_counter() exec_time = end_time - start_time #second return(answer, context, exec_time) # In[ ]: title = "Question & Answering System: Ask a Pdf" description = """ This Q&A System allows you to input an entire document & ask questions about its contents. This system has ability to add reference to the specific page number from where the information was found. This adds credibility to the answers generated & also helps you locate the relevant information in the document.\n Disclaimer: This application is only an interface for you to upload your data & select the relevant model. Please be conscious of using this responsibly. The data, model, & API key belong to the respective owners. The application owner doesn't take any responsibility for any unintended consequence of use of this application. App owner: Ishant A """ with gr.Blocks() as demo: gr.Markdown(f"