DEV-chat-with-pdf-openai

Sleeping

App Files Files Community

wholewhale commited on Oct 20, 2023

Commit

cfc65ef

•

1 Parent(s): 2b90e18

revert

Browse files

Files changed (1) hide show

app.py +83 -48

app.py CHANGED Viewed

@@ -35,26 +35,49 @@ summary_state = gr.State(initial_value="pending")
 # PDF summary and query using stuffing
 def pdf_changes(pdf_doc):
     try:
-        if pdf_doc is None:
-            return "No PDF uploaded."
         loader = OnlinePDFLoader(pdf_doc.name)
         documents = loader.load()
-        text_splitter = CharacterTextSplitter(chunk_size=1000, chunk_overlap=100)
-        texts = text_splitter.split_documents(documents)
         embeddings = OpenAIEmbeddings()
         global db
-        db = Chroma.from_documents(texts, embeddings)
         retriever = db.as_retriever()
         global qa
         qa = ConversationalRetrievalChain.from_llm(
-            llm=OpenAI(temperature=0.2, model_name="gpt-3.5-turbo", max_tokens=-1, n=2),
             retriever=retriever,
             return_source_documents=False
         )
-        return "Ready"
     except Exception as e:
-        return f"Error loading PDF: {e}"
 def clear_data():
@@ -98,23 +121,23 @@ def infer(question, history):
         return f"Error querying chatbot: {str(e)}"
 def auto_clear_data():
-      global qa, db, last_interaction_time
-      if time.time() - last_interaction_time > 1000:
         qa = None
         db = None
         print("Data cleared successfully.")  # Logging
 def periodic_clear():
-      while True:
         auto_clear_data()
         time.sleep(1000)
 threading.Thread(target=periodic_clear).start()
 css = """
 #col-container {max-width: 700px; margin-left: auto; margin-right: auto;}
 """
 title = """
 <div style="text-align: center;max-width: 700px;">
     <h1>CauseWriter Chat with PDF • OpenAI</h1>
@@ -123,40 +146,52 @@ title = """
     This version is set to erase chat history automatically after page timeout and uses OpenAI.</p>
 </div>
 """
 with gr.Blocks(css=css) as demo:
-        with gr.Column(elem_id="col-container"):
-            gr.HTML(title)
-            with gr.Column():
-                  pdf_doc = gr.File(label="Load a pdf", file_types=['.pdf'], type="file")
-                  with gr.Row():
-                      langchain_status = gr.Textbox(label="Status", placeholder="", interactive=False)
-                      load_pdf = gr.Button("Convert PDF to Magic AI language")
-                      clear_btn = gr.Button("Clear Data")
-                    # New Textbox to display summary
-                  summary_box = gr.Textbox(
-                      label="Document Summary",
-                      placeholder="Summary will appear here.",
-                      interactive=False,
-                      rows=5,
-                      elem_id="summary_box"  # Set the elem_id to match the state key
-                    )
-            chatbot = gr.Chatbot([], elem_id="chatbot").style(height=450)
-            question = gr.Textbox(label="Question", placeholder="Type your question and hit Enter")
-            submit_btn = gr.Button("Send Message")
-            # Step 2 and 3: Put the State object as an input and output
-            load_pdf.click(pdf_changes, inputs=[pdf_doc, summary_state], outputs=[langchain_status, summary_state])
-            clear_btn.click(clear_data, outputs=[langchain_status])
-            question.submit(add_text, [chatbot, question], [chatbot, question]).then(
-                  bot, chatbot, chatbot
-            )
-            submit_btn.click(add_text, [chatbot, question], [chatbot, question]).then(
-                  bot, chatbot, chatbot
             )
-demo.launch()

 # PDF summary and query using stuffing
 def pdf_changes(pdf_doc):
     try:
+        # Initialize loader and load documents
         loader = OnlinePDFLoader(pdf_doc.name)
         documents = loader.load()
+        # Define the prompt for summarization
+        prompt_template = """Write a concise summary of the following:
+        "{text}"
+        CONCISE SUMMARY:"""
+        prompt = PromptTemplate.from_template(prompt_template)
+        # Define the LLM chain with the specified prompt
+        llm = ChatOpenAI(temperature=0, model_name="gpt-3.5-turbo-16k")
+        llm_chain = LLMChain(llm=llm, prompt=prompt)
+        # Initialize StuffDocumentsChain
+        stuff_chain = StuffDocumentsChain(
+            llm_chain=llm_chain, document_variable_name="text"
+        )
+        # Generate summary using StuffDocumentsChain
+        global full_summary
+        full_summary = stuff_chain.run(documents)
+        # Update the state variable
+        return {summary_state: full_summary}
+        # Other existing logic for Chroma, embeddings, and retrieval
         embeddings = OpenAIEmbeddings()
         global db
+        db = Chroma.from_documents(documents, embeddings)
         retriever = db.as_retriever()
         global qa
         qa = ConversationalRetrievalChain.from_llm(
+            llm=OpenAI(temperature=0.2, model_name="gpt-3.5-turbo-16k", max_tokens=-1, n=2),
             retriever=retriever,
             return_source_documents=False
         )
+        summary_box.set_value(full_summary)
+        return f"Ready. Full Summary loaded."
     except Exception as e:
+        return f"Error processing PDF: {str(e)}"
 def clear_data():
         return f"Error querying chatbot: {str(e)}"
 def auto_clear_data():
+    global qa, db, last_interaction_time
+    if time.time() - last_interaction_time > 1000:
         qa = None
         db = None
         print("Data cleared successfully.")  # Logging
 def periodic_clear():
+    while True:
         auto_clear_data()
         time.sleep(1000)
 threading.Thread(target=periodic_clear).start()
 css = """
 #col-container {max-width: 700px; margin-left: auto; margin-right: auto;}
 """
 title = """
 <div style="text-align: center;max-width: 700px;">
     <h1>CauseWriter Chat with PDF • OpenAI</h1>
     This version is set to erase chat history automatically after page timeout and uses OpenAI.</p>
 </div>
 """
+# Global variable for tracking last interaction time
+last_interaction_time = 0
+full_summary = ""  # Added global full_summary
+def update_summary_box():
+    global full_summary
+    return {"summary_box": full_summary}
 with gr.Blocks(css=css) as demo:
+    with gr.Column(elem_id="col-container"):
+        gr.HTML(title)
+        with gr.Column():
+            pdf_doc = gr.File(label="Load a pdf", file_types=['.pdf'], type="file")
+            with gr.Row():
+                langchain_status = gr.Textbox(label="Status", placeholder="", interactive=False)
+                load_pdf = gr.Button("Convert PDF to Magic AI language")
+                clear_btn = gr.Button("Clear Data")
+            # New Textbox to display summary
+            summary_box = gr.Textbox(
+              label="Document Summary",
+              placeholder="Summary will appear here.",
+              interactive=False,
+              rows=5,
+              elem_id="summary_box"  # Set the elem_id to match the state key
             )
+        chatbot = gr.Chatbot([], elem_id="chatbot").style(height=450)
+        question = gr.Textbox(label="Question", placeholder="Type your question and hit Enter")
+        submit_btn = gr.Button("Send Message")
+        load_pdf.click(loading_pdf, None, langchain_status, queue=False)
+        load_pdf.click(pdf_changes, inputs=[pdf_doc], outputs=[langchain_status], queue=False).then(
+          update_summary_box
+      )
+    # Then update the summary_box
+    clear_btn.click(clear_data, outputs=[langchain_status], queue=False)
+    question.submit(add_text, [chatbot, question], [chatbot, question]).then(
+        bot, chatbot, chatbot
+    )
+    submit_btn.click(add_text, [chatbot, question], [chatbot, question]).then(
+        bot, chatbot, chatbot
+    )
+demo.launch()