wholewhale commited on
Commit
cfc65ef
1 Parent(s): 2b90e18
Files changed (1) hide show
  1. app.py +83 -48
app.py CHANGED
@@ -35,26 +35,49 @@ summary_state = gr.State(initial_value="pending")
35
  # PDF summary and query using stuffing
36
  def pdf_changes(pdf_doc):
37
  try:
38
- if pdf_doc is None:
39
- return "No PDF uploaded."
40
-
41
  loader = OnlinePDFLoader(pdf_doc.name)
42
  documents = loader.load()
43
- text_splitter = CharacterTextSplitter(chunk_size=1000, chunk_overlap=100)
44
- texts = text_splitter.split_documents(documents)
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
45
  embeddings = OpenAIEmbeddings()
46
  global db
47
- db = Chroma.from_documents(texts, embeddings)
 
48
  retriever = db.as_retriever()
49
  global qa
50
  qa = ConversationalRetrievalChain.from_llm(
51
- llm=OpenAI(temperature=0.2, model_name="gpt-3.5-turbo", max_tokens=-1, n=2),
52
  retriever=retriever,
53
  return_source_documents=False
54
  )
55
- return "Ready"
 
 
56
  except Exception as e:
57
- return f"Error loading PDF: {e}"
 
58
 
59
 
60
  def clear_data():
@@ -98,23 +121,23 @@ def infer(question, history):
98
  return f"Error querying chatbot: {str(e)}"
99
 
100
  def auto_clear_data():
101
- global qa, db, last_interaction_time
102
- if time.time() - last_interaction_time > 1000:
103
  qa = None
104
  db = None
105
  print("Data cleared successfully.") # Logging
106
-
107
  def periodic_clear():
108
- while True:
109
  auto_clear_data()
110
  time.sleep(1000)
111
-
112
  threading.Thread(target=periodic_clear).start()
113
-
114
  css = """
115
  #col-container {max-width: 700px; margin-left: auto; margin-right: auto;}
116
  """
117
-
118
  title = """
119
  <div style="text-align: center;max-width: 700px;">
120
  <h1>CauseWriter Chat with PDF • OpenAI</h1>
@@ -123,40 +146,52 @@ title = """
123
  This version is set to erase chat history automatically after page timeout and uses OpenAI.</p>
124
  </div>
125
  """
126
-
 
 
 
 
 
 
 
127
  with gr.Blocks(css=css) as demo:
128
- with gr.Column(elem_id="col-container"):
129
- gr.HTML(title)
130
-
131
- with gr.Column():
132
- pdf_doc = gr.File(label="Load a pdf", file_types=['.pdf'], type="file")
133
- with gr.Row():
134
- langchain_status = gr.Textbox(label="Status", placeholder="", interactive=False)
135
- load_pdf = gr.Button("Convert PDF to Magic AI language")
136
- clear_btn = gr.Button("Clear Data")
137
-
138
- # New Textbox to display summary
139
- summary_box = gr.Textbox(
140
- label="Document Summary",
141
- placeholder="Summary will appear here.",
142
- interactive=False,
143
- rows=5,
144
- elem_id="summary_box" # Set the elem_id to match the state key
145
- )
146
-
147
- chatbot = gr.Chatbot([], elem_id="chatbot").style(height=450)
148
- question = gr.Textbox(label="Question", placeholder="Type your question and hit Enter")
149
- submit_btn = gr.Button("Send Message")
150
 
151
- # Step 2 and 3: Put the State object as an input and output
152
- load_pdf.click(pdf_changes, inputs=[pdf_doc, summary_state], outputs=[langchain_status, summary_state])
153
- clear_btn.click(clear_data, outputs=[langchain_status])
154
- question.submit(add_text, [chatbot, question], [chatbot, question]).then(
155
- bot, chatbot, chatbot
156
- )
157
- submit_btn.click(add_text, [chatbot, question], [chatbot, question]).then(
158
- bot, chatbot, chatbot
 
 
 
 
 
 
159
  )
 
160
 
161
- demo.launch()
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
162
 
 
 
35
  # PDF summary and query using stuffing
36
  def pdf_changes(pdf_doc):
37
  try:
38
+ # Initialize loader and load documents
 
 
39
  loader = OnlinePDFLoader(pdf_doc.name)
40
  documents = loader.load()
41
+
42
+ # Define the prompt for summarization
43
+ prompt_template = """Write a concise summary of the following:
44
+ "{text}"
45
+ CONCISE SUMMARY:"""
46
+ prompt = PromptTemplate.from_template(prompt_template)
47
+
48
+ # Define the LLM chain with the specified prompt
49
+ llm = ChatOpenAI(temperature=0, model_name="gpt-3.5-turbo-16k")
50
+ llm_chain = LLMChain(llm=llm, prompt=prompt)
51
+
52
+ # Initialize StuffDocumentsChain
53
+ stuff_chain = StuffDocumentsChain(
54
+ llm_chain=llm_chain, document_variable_name="text"
55
+ )
56
+
57
+ # Generate summary using StuffDocumentsChain
58
+ global full_summary
59
+ full_summary = stuff_chain.run(documents)
60
+ # Update the state variable
61
+ return {summary_state: full_summary}
62
+
63
+ # Other existing logic for Chroma, embeddings, and retrieval
64
  embeddings = OpenAIEmbeddings()
65
  global db
66
+ db = Chroma.from_documents(documents, embeddings)
67
+
68
  retriever = db.as_retriever()
69
  global qa
70
  qa = ConversationalRetrievalChain.from_llm(
71
+ llm=OpenAI(temperature=0.2, model_name="gpt-3.5-turbo-16k", max_tokens=-1, n=2),
72
  retriever=retriever,
73
  return_source_documents=False
74
  )
75
+ summary_box.set_value(full_summary)
76
+ return f"Ready. Full Summary loaded."
77
+
78
  except Exception as e:
79
+ return f"Error processing PDF: {str(e)}"
80
+
81
 
82
 
83
  def clear_data():
 
121
  return f"Error querying chatbot: {str(e)}"
122
 
123
  def auto_clear_data():
124
+ global qa, db, last_interaction_time
125
+ if time.time() - last_interaction_time > 1000:
126
  qa = None
127
  db = None
128
  print("Data cleared successfully.") # Logging
129
+
130
  def periodic_clear():
131
+ while True:
132
  auto_clear_data()
133
  time.sleep(1000)
134
+
135
  threading.Thread(target=periodic_clear).start()
136
+
137
  css = """
138
  #col-container {max-width: 700px; margin-left: auto; margin-right: auto;}
139
  """
140
+
141
  title = """
142
  <div style="text-align: center;max-width: 700px;">
143
  <h1>CauseWriter Chat with PDF • OpenAI</h1>
 
146
  This version is set to erase chat history automatically after page timeout and uses OpenAI.</p>
147
  </div>
148
  """
149
+ # Global variable for tracking last interaction time
150
+ last_interaction_time = 0
151
+ full_summary = "" # Added global full_summary
152
+
153
+ def update_summary_box():
154
+ global full_summary
155
+ return {"summary_box": full_summary}
156
+
157
  with gr.Blocks(css=css) as demo:
158
+ with gr.Column(elem_id="col-container"):
159
+ gr.HTML(title)
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
160
 
161
+ with gr.Column():
162
+ pdf_doc = gr.File(label="Load a pdf", file_types=['.pdf'], type="file")
163
+ with gr.Row():
164
+ langchain_status = gr.Textbox(label="Status", placeholder="", interactive=False)
165
+ load_pdf = gr.Button("Convert PDF to Magic AI language")
166
+ clear_btn = gr.Button("Clear Data")
167
+
168
+ # New Textbox to display summary
169
+ summary_box = gr.Textbox(
170
+ label="Document Summary",
171
+ placeholder="Summary will appear here.",
172
+ interactive=False,
173
+ rows=5,
174
+ elem_id="summary_box" # Set the elem_id to match the state key
175
  )
176
+
177
 
178
+ chatbot = gr.Chatbot([], elem_id="chatbot").style(height=450)
179
+ question = gr.Textbox(label="Question", placeholder="Type your question and hit Enter")
180
+ submit_btn = gr.Button("Send Message")
181
+
182
+ load_pdf.click(loading_pdf, None, langchain_status, queue=False)
183
+ load_pdf.click(pdf_changes, inputs=[pdf_doc], outputs=[langchain_status], queue=False).then(
184
+ update_summary_box
185
+ )
186
+
187
+
188
+ # Then update the summary_box
189
+ clear_btn.click(clear_data, outputs=[langchain_status], queue=False)
190
+ question.submit(add_text, [chatbot, question], [chatbot, question]).then(
191
+ bot, chatbot, chatbot
192
+ )
193
+ submit_btn.click(add_text, [chatbot, question], [chatbot, question]).then(
194
+ bot, chatbot, chatbot
195
+ )
196
 
197
+ demo.launch()