Spaces:

ADOPLE
/

Adopleai-DocumentQA

Running

App Files Files Community

Karthikeyan commited on Jun 21, 2023

Commit

87b3a2f

1 Parent(s): 18aeb65

Update app.py

Browse files

Files changed (1) hide show

app.py +122 -88

app.py CHANGED Viewed

@@ -1,103 +1,137 @@
-import gradio as gr
 import os
-import time
-from langchain.document_loaders import OnlinePDFLoader
 from langchain.text_splitter import CharacterTextSplitter
-from langchain.llms import OpenAI
-from langchain.embeddings import OpenAIEmbeddings
-from langchain.vectorstores import Chroma
-from langchain.chains import ConversationalRetrievalChain
-def loading_pdf():
-    return "Loading..."
-def pdf_changes(pdf_doc, open_ai_key):
-    if openai_key is not None:
-        os.environ['OPENAI_API_KEY'] = open_ai_key
-        loader = OnlinePDFLoader(pdf_doc.name)
-        documents = loader.load()
-        text_splitter = CharacterTextSplitter(chunk_size=1000, chunk_overlap=0)
-        texts = text_splitter.split_documents(documents)
-        embeddings = OpenAIEmbeddings()
-        db = Chroma.from_documents(texts, embeddings)
-        retriever = db.as_retriever()
-        global qa
-        qa = ConversationalRetrievalChain.from_llm(
-            llm=OpenAI(temperature=0.5),
-            retriever=retriever,
-            return_source_documents=False)
-        return "Ready"
     else:
-        return "You forgot OpenAI API key"
-def add_text(history, text):
-    history = history + [(text, None)]
-    return history, ""
-def bot(history):
-    response = infer(history[-1][0], history)
-    history[-1][1] = ""
-    for character in response:
-        history[-1][1] += character
-        time.sleep(0.05)
-        yield history
-def infer(question, history):
-    res = []
-    for human, ai in history[:-1]:
-        pair = (human, ai)
-        res.append(pair)
-    chat_history = res
-    #print(chat_history)
-    query = question
-    result = qa({"question": query, "chat_history": chat_history})
-    #print(result)
-    return result["answer"]
-css="""
-#col-container {max-width: 700px; margin-left: auto; margin-right: auto;}
-"""
 title = """
 <div style="text-align: center;max-width: 700px;">
-    <h1>LangChain ChatBot</h1>
-    <p style="text-align: center;">Upload a PDF, click the "Load PDF to LangChain" button, <br /></p>
     <a style="display:inline-block; margin-left: 1em" href="https://www.adople.com"><img src="https://lh6.googleusercontent.com/FQJXx8B6Tbq7SvSE3wvJyXusFZxKcsY92eQaPnZj5pIDdXHVjs10tXXBqWcF0BgC_riSFcje2qUd-XWaiaJByI6dMOkEFdAtpeG7KK8xh7nH8KE3GfSOMrySKPVWXGdEvg=w1280" alt="Adople AI"></a>
 </div>
 """
-with gr.Blocks(css=css,theme=gr.themes.Soft()) as demo:
     with gr.Column(elem_id="col-container"):
-        gr.HTML(title)
-        with gr.Column():
-            openai_key = gr.Textbox(label="You OpenAI API key", type="password")
-            pdf_doc = gr.File(label="Load a pdf", file_types=['.pdf'], type="file")
-            with gr.Row():
-                langchain_status = gr.Textbox(label="Status", placeholder="", interactive=False)
-                load_pdf = gr.Button("Load pdf to langchain")
-        chatbot = gr.Chatbot([], elem_id="chatbot").style(height=350)
-        question = gr.Textbox(label="Question", placeholder="Type your question and hit Enter ")
-        submit_btn = gr.Button("Send Message")
-    load_pdf.click(loading_pdf, None, langchain_status, queue=False)
-    load_pdf.click(pdf_changes, inputs=[pdf_doc, openai_key], outputs=[langchain_status], queue=False)
-    question.submit(add_text, [chatbot, question], [chatbot, question]).then(
-        bot, chatbot, chatbot
-    )
-    submit_btn.click(add_text, [chatbot, question], [chatbot, question]).then(
-        bot, chatbot, chatbot)
-demo.launch()

+from pydantic import NoneStr
 import os
+from langchain.chains.question_answering import load_qa_chain
+from langchain.document_loaders import UnstructuredFileLoader
+from langchain.embeddings.openai import OpenAIEmbeddings
+from langchain.llms import OpenAI
 from langchain.text_splitter import CharacterTextSplitter
+from langchain.vectorstores import FAISS
+from pypdf import PdfReader
+import mimetypes
+import validators
+import requests
+import tempfile
+import gradio as gr
+import openai
+openai_api_key ='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'
+os.environ["OPENAI_API_KEY"] = openai_api_key
+def get_empty_state():
+    return {"knowledge_base": None}
+def create_knowledge_base(docs):
+    # split into chunks
+    text_splitter = CharacterTextSplitter(
+        separator="\n", chunk_size=500, chunk_overlap=0, length_function=len
+    )
+    chunks = text_splitter.split_documents(docs)
+    # Create embeddings
+    embeddings = OpenAIEmbeddings()
+    knowledge_base = FAISS.from_documents(chunks, embeddings)
+    return knowledge_base
+def upload_file(file_obj):
+    try:
+      loader = UnstructuredFileLoader(file_obj.name, strategy="fast")
+      docs = loader.load()
+      knowledge_base = create_knowledge_base(docs)
+    except:
+      text="Try Another file"
+      return  file_obj.name, text
+    return file_obj.name, {"knowledge_base": knowledge_base}
+def upload_via_url(url):
+    if validators.url(url):
+        r = requests.get(url)
+        if r.status_code != 200:
+            raise ValueError(
+                "Check the url of your file; returned status code %s" % r.status_code
+            )
+        content_type = r.headers.get("content-type")
+        file_extension = mimetypes.guess_extension(content_type)
+        temp_file = tempfile.NamedTemporaryFile(suffix=file_extension, delete=False)
+        temp_file.write(r.content)
+        file_path = temp_file.name
+        loader = UnstructuredFileLoader(file_path, strategy="fast")
+        docs = loader.load()
+        with open(file_path, mode="rb") as f:
+            pass
+        knowledge_base = create_knowledge_base(docs)
+        return file_path, {"knowledge_base": knowledge_base}
     else:
+        raise ValueError("Please enter a valid URL")
+def answer_question(question, state):
+    try:
+        knowledge_base = state["knowledge_base"]
+        docs = knowledge_base.similarity_search(question)
+        llm = OpenAI(temperature=0.4)
+        chain = load_qa_chain(llm, chain_type="stuff")
+        response = chain.run(input_documents=docs, question=question)
+        return response
+    except:
+        return "Please upload Proper Document"
 title = """
 <div style="text-align: center;max-width: 700px;">
+    <h1>ADOPLE AI - Document ChatBot</h1>
+    <p style="text-align: center;">Upload a PDF, click the "Load PDF" button, <br /></p>
     <a style="display:inline-block; margin-left: 1em" href="https://www.adople.com"><img src="https://lh6.googleusercontent.com/FQJXx8B6Tbq7SvSE3wvJyXusFZxKcsY92eQaPnZj5pIDdXHVjs10tXXBqWcF0BgC_riSFcje2qUd-XWaiaJByI6dMOkEFdAtpeG7KK8xh7nH8KE3GfSOMrySKPVWXGdEvg=w1280" alt="Adople AI"></a>
 </div>
 """
+with gr.Blocks(css="style.css",theme=gr.themes.Soft()) as demo:
+    state = gr.State(get_empty_state())
+    gr.HTML(title)
     with gr.Column(elem_id="col-container"):
+        gr.HTML(
+            """<br>
+            <h1 style="text-align:center;">
+               Adople AI Document QA Model
+              </h1> """
+        )
+        gr.HTML(
+            """<hr style="border-top: 5px solid white;">"""
+            )
+        gr.Markdown("**Upload your file**")
+        with gr.Row(elem_id="row-flex"):
+          with gr.Column(scale=0.85):
+              file_url = gr.Textbox(
+                  value="",
+                  label="Upload your file",
+                  placeholder="Enter a url",
+                  show_label=False,
+                  visible=False
+              )
+          with gr.Column(scale=0.15, min_width=160):
+              upload_button = gr.UploadButton(
+                  "Browse File", file_types=[".txt", ".pdf", ".doc", ".docx"],
+                )
+        file_output = gr.File(elem_classes="filenameshow")
+        with gr.Row():
+          with gr.Column(scale=1, min_width=0):
+            user_question = gr.Textbox(value="",label='Question Box :',show_label=True, placeholder="Ask a question about your file:",elem_classes="spaceH")
+        with gr.Row():
+          with gr.Column(scale=1, min_width=0):
+            answer = gr.Textbox(value="",label='Answer Box :',show_label=True, placeholder="",lines=5)
+    file_url.submit(upload_via_url, file_url, [file_output, state])
+    upload_button.upload(upload_file, upload_button, [file_output,state])
+    user_question.submit(answer_question, [user_question, state], [answer])
+demo.queue().launch(share=True)