{title}

#!/usr/bin/env python
# coding: utf-8

# # **Q&A system (Using LangChain)**

# ## **Setup**

# ### Importing libraries

# In[ ]:


import urllib.request

from langchain.document_loaders import PyPDFLoader #for loading .pdf file
from langchain.vectorstores import DocArrayInMemorySearch

import openai

#wrt UI
import time
import gradio as gr


# ### Utilities

# In[ ]:


def download_pdf(url, output_path):
    """
    download .pdf file from URL & save it at output_path
    """
    urllib.request.urlretrieve(url, output_path)


# ## **UI**

# In[ ]:


from langchain.embeddings import OpenAIEmbeddings
from langchain.chains import RetrievalQA
from langchain.chat_models import ChatOpenAI

import os


def get_ans(filename, question, model_to_use, api_key):
  if(model_to_use=="GPT-3"):
    os.environ["OPENAI_API_KEY"] = api_key

    embeddings = OpenAIEmbeddings()
    llm = ChatOpenAI(temperature = 0)

    pdf_path = filename
    loader = PyPDFLoader(pdf_path)
    docs = loader.load_and_split()
    db = DocArrayInMemorySearch.from_documents(
                                              docs,
                                              embeddings
                                              )

    qa_stuff = RetrievalQA.from_chain_type(
                                          retriever=db.as_retriever(),
                                          return_source_documents = True,
                                          chain_type="stuff",
                                          llm=llm,
                                          verbose=True
                                          )

    response = qa_stuff(question) #dict_keys(['query', 'result', 'source_documents'])
    answer = response["result"]
    context = response["source_documents"]

    #formatting context
    context = ""
    for i in range(len(response["source_documents"])):
        source_document_path = response["source_documents"][i].metadata["source"]
        page_number = str(response["source_documents"][i].metadata["page"])

        context += "\n" + "#"*50 + "\n"
        context += f"Relevant source text: {source_document_path}, page {page_number}"+"\n"+"#"*50+"\n"
        context += response["source_documents"][i].page_content + "\n"

    return(answer, context)


def question_answer(url, file, question, model_to_use, api_key):
  start_time = time.perf_counter()

  if url.strip() == "" and file == None:
      return "[ERROR]: Both URL and PDF is empty. Provide atleast one."

  if url.strip() != "" and file != None:
      return "[ERROR]: Both URL and PDF is provided. Please provide only one (eiter URL or PDF)."

  if question.strip() == "":
      return "[ERROR]: Question field is empty"

  if url.strip() != "":
      glob_url = url
      download_pdf(glob_url, "document.pdf")
      filename="document.pdf"
  else:
    filename = file.name


  answer, context = get_ans(filename, question, model_to_use, api_key)

  end_time = time.perf_counter()
  exec_time = end_time - start_time #second


  return(answer, context, exec_time)


# In[ ]:


title = "Question & Answering System: Ask a Pdf"
description = """
This Q&A System allows you to input an entire document & ask questions about its contents. This system has ability to add reference to the specific page number from where the information was found. This adds credibility to the answers generated & also helps you locate the relevant information in the document.\n
Disclaimer: This application is only an interface for you to upload your data & select the relevant model. Please be conscious of using this responsibly. The data, model, & API key belong to the respective owners. The application owner doesn't take any responsibility for any unintended consequence of use of this application.
App owner: Ishant A
"""

with gr.Blocks() as demo:

  gr.Markdown(f"<center><h1>{title}</h1></center>")
  gr.Markdown(description)

  with gr.Row():

    with gr.Group():
      url = gr.Textbox(label="URL (Eg: 'https://clinicaltrials.gov/ProvidedDocs/00/NCT02415400/Prot_000.pdf')")
      gr.Markdown("<center><h6>or<h6></center>")
      file = gr.File(label='PDF', file_types=['.pdf'])
      question = gr.Textbox(label="question (Eg: 'When to perform randomization')")
      model_to_use = gr.Dropdown(["GPT-3"], value="GPT-3", label="model_to_use")
      api_key = gr.Textbox(label="Enter API key (if using GPT-3 to avoid error)")
      btn = gr.Button(value='Submit')
      btn.style(full_width=True)

  with gr.Group():
      exec_time = gr.Textbox(label="execution time (s)")
      answer = gr.Textbox(label="answer")
      context = gr.Textbox(label="Relevant chunks within document (Context)")

  btn.click(question_answer, inputs=[url, file, question, model_to_use, api_key], outputs=[answer, context, exec_time])

demo.queue()

demo.launch()