causewriter-chat-with-pdf-openai

Running on CPU Upgrade

App Files Files Community

wholewhale commited on Oct 20, 2023

Commit

e9619d2

•

1 Parent(s): 837d48f

clear data DB

Browse files

Files changed (1) hide show

app.py +33 -15

app.py CHANGED Viewed

@@ -1,6 +1,7 @@
 import gradio as gr
 import os
 import time
 from langchain.document_loaders import OnlinePDFLoader
 from langchain.text_splitter import CharacterTextSplitter
 from langchain.llms import OpenAI
@@ -10,42 +11,46 @@ from langchain.chains import ConversationalRetrievalChain
 os.environ['OPENAI_API_KEY'] = os.getenv("Your_API_Key")
 def loading_pdf():
-    return "Working the upload. Also, pondering the usefulness of sporks..."
 def pdf_changes(pdf_doc):
     loader = OnlinePDFLoader(pdf_doc.name)
     documents = loader.load()
-    text_splitter = CharacterTextSplitter(chunk_size=500, chunk_overlap=50)
     texts = text_splitter.split_documents(documents)
     embeddings = OpenAIEmbeddings()
     db = Chroma.from_documents(texts, embeddings)
     retriever = db.as_retriever()
     global qa
     qa = ConversationalRetrievalChain.from_llm(
-        llm=OpenAI(temperature=0.5),
         retriever=retriever,
         return_source_documents=False)
     return "Ready"
 def clear_data():
-    global qa
     qa = None
     return "Data cleared"
 def add_text(history, text):
     history = history + [(text, None)]
     return history, ""
 def bot(history):
     response = infer(history[-1][0], history)
-    formatted_response = "**Bot:**  \n" + '  \n'.join(response.split('. '))
-    history[-1][1] = ""
-    for character in formatted_response:
-        history[-1][1] += character
-        time.sleep(0.05)
-        yield history
 def infer(question, history):
     res = []
@@ -55,9 +60,22 @@ def infer(question, history):
     chat_history = res
     query = question
-    result = qa({"question": query, "chat_history": chat_history})
     return result["answer"]
 css = """
 #col-container {max-width: 700px; margin-left: auto; margin-right: auto;}
 """
@@ -66,8 +84,8 @@ title = """
 <div style="text-align: center;max-width: 700px;">
     <h1>CauseWriter Chat with PDF • OpenAI</h1>
     <p style="text-align: center;">Upload a .PDF from your computer, click the "Load PDF to LangChain" button, <br />
-    when everything is ready, you can start asking questions about the pdf ;) <br />
-    This version is set to store chat history, and uses OpenAI as LLM.</p>
 </div>
 """
@@ -82,7 +100,7 @@ with gr.Blocks(css=css) as demo:
                 load_pdf = gr.Button("Convert PDF to Magic AI language")
                 clear_btn = gr.Button("Clear Data")
-        chatbot = gr.Chatbot([], elem_id="chatbot").style(height=350)
         question = gr.Textbox(label="Question", placeholder="Type your question and hit Enter")
         submit_btn = gr.Button("Send Message")

 import gradio as gr
 import os
 import time
+import threading
 from langchain.document_loaders import OnlinePDFLoader
 from langchain.text_splitter import CharacterTextSplitter
 from langchain.llms import OpenAI
 os.environ['OPENAI_API_KEY'] = os.getenv("Your_API_Key")
+# Global variable for tracking last interaction time
+last_interaction_time = 0
 def loading_pdf():
+    return "Working on the upload. Also, pondering the usefulness of sporks..."
 def pdf_changes(pdf_doc):
     loader = OnlinePDFLoader(pdf_doc.name)
     documents = loader.load()
+    text_splitter = CharacterTextSplitter(chunk_size=1000, chunk_overlap=100)
     texts = text_splitter.split_documents(documents)
     embeddings = OpenAIEmbeddings()
+    global db
     db = Chroma.from_documents(texts, embeddings)
     retriever = db.as_retriever()
     global qa
     qa = ConversationalRetrievalChain.from_llm(
+        llm=OpenAI(temperature=0.2, model_name="gpt-3.5-turbo", max_tokens=-1, n=2),
         retriever=retriever,
         return_source_documents=False)
     return "Ready"
 def clear_data():
+    global qa, db
     qa = None
+    db = None
     return "Data cleared"
 def add_text(history, text):
+    global last_interaction_time
+    last_interaction_time = time.time()
     history = history + [(text, None)]
     return history, ""
 def bot(history):
     response = infer(history[-1][0], history)
+    sentences = '  \n'.join(response.split('. '))
+    formatted_response = f"**Bot:**\n\n{sentences}"
+    history[-1][1] = formatted_response
+    return history
 def infer(question, history):
     res = []
     chat_history = res
     query = question
+    result = qa({"question": query, "chat_history": chat_history, "system:":"This is a world-class summarizing AI, be helpful."})
     return result["answer"]
+def auto_clear_data():
+    global qa, db, last_interaction_time
+    if time.time() - last_interaction_time > 1000:
+        qa = None
+        db = None
+def periodic_clear():
+    while True:
+        auto_clear_data()
+        time.sleep(600)
+threading.Thread(target=periodic_clear).start()
 css = """
 #col-container {max-width: 700px; margin-left: auto; margin-right: auto;}
 """
 <div style="text-align: center;max-width: 700px;">
     <h1>CauseWriter Chat with PDF • OpenAI</h1>
     <p style="text-align: center;">Upload a .PDF from your computer, click the "Load PDF to LangChain" button, <br />
+    when everything is ready, you can start asking questions about the pdf. <br />
+    This version is set to store chat history and uses OpenAI as LLM.</p>
 </div>
 """
                 load_pdf = gr.Button("Convert PDF to Magic AI language")
                 clear_btn = gr.Button("Clear Data")
+        chatbot = gr.Chatbot([], elem_id="chatbot").style(height=450)
         question = gr.Textbox(label="Question", placeholder="Type your question and hit Enter")
         submit_btn = gr.Button("Send Message")