Keane Moraes commited on
Commit
359769b
1 Parent(s): aec1dec

fix for the key error

Browse files
Files changed (2) hide show
  1. app.py +13 -6
  2. summary.py +23 -9
app.py CHANGED
@@ -88,13 +88,13 @@ def generate_word_embeddings():
88
 
89
  def generate_text_chunks_lib():
90
 
 
91
  global title_entry, text_chunks_lib
92
  global keywords
93
  global tldr
94
  global summary
95
  global takeaways
96
  global input_accepted
97
- global data_transcription
98
 
99
  # For each body of text, create text chunks of a certain token size required for the transformer
100
  text_df = pd.DataFrame.from_dict({"title": [data_transcription["title"]], "text":[data_transcription["text"]]})
@@ -191,13 +191,20 @@ with st.sidebar:
191
  thread1.join()
192
  thread2.join()
193
 
 
 
 
 
 
 
 
 
 
194
  # Generate the summary
195
  if gen_summary == 'Yes':
196
- print("\n\nTITLE ENTRY: ", title_entry)
197
  se = TextSummarizer(title_entry)
198
  text_transcription = data_transcription['text']
199
  with st.spinner("Generating summary and TLDR..."):
200
- print("\n\nTEXT_CHNK_SUMMARY\n\n", text_chunks_lib)
201
  summary = se.generate_full_summary(text_chunks_lib)
202
  summary_list = summary.split("\n\n")
203
  tldr = se.generate_short_summary(summary_list)
@@ -208,6 +215,9 @@ with st.sidebar:
208
  takeaways = kt.generate_key_takeaways(text_chunks_lib)
209
  is_completed_analysis = True
210
  bar.progress(100)
 
 
 
211
 
212
  if is_completed_analysis:
213
  st.header("Key Takeaways")
@@ -331,9 +341,6 @@ with tab6:
331
  print("user input is ", user_input)
332
  print("the folder name at got here 0.5 is ", folder_name)
333
 
334
- # if 'messages' not in st.session_state:
335
- # st.session_state['messages'] = get_initial_message()
336
-
337
  if user_input:
338
  print("got here 1")
339
  print("the folder name at got here 1.5 is ", folder_name)
 
88
 
89
  def generate_text_chunks_lib():
90
 
91
+ global data_transcription
92
  global title_entry, text_chunks_lib
93
  global keywords
94
  global tldr
95
  global summary
96
  global takeaways
97
  global input_accepted
 
98
 
99
  # For each body of text, create text chunks of a certain token size required for the transformer
100
  text_df = pd.DataFrame.from_dict({"title": [data_transcription["title"]], "text":[data_transcription["text"]]})
 
191
  thread1.join()
192
  thread2.join()
193
 
194
+ def generate_summary():
195
+ pass
196
+
197
+ def generate_key_takeaways():
198
+ pass
199
+
200
+ threadSum = Thread(target=generate_summary)
201
+ threadTak = Thread(target=generate_key_takeaways)
202
+
203
  # Generate the summary
204
  if gen_summary == 'Yes':
 
205
  se = TextSummarizer(title_entry)
206
  text_transcription = data_transcription['text']
207
  with st.spinner("Generating summary and TLDR..."):
 
208
  summary = se.generate_full_summary(text_chunks_lib)
209
  summary_list = summary.split("\n\n")
210
  tldr = se.generate_short_summary(summary_list)
 
215
  takeaways = kt.generate_key_takeaways(text_chunks_lib)
216
  is_completed_analysis = True
217
  bar.progress(100)
218
+
219
+ with open(f"{folder_name}/data.json", "w") as f:
220
+ json.dump(data_transcription, f, indent=4)
221
 
222
  if is_completed_analysis:
223
  st.header("Key Takeaways")
 
341
  print("user input is ", user_input)
342
  print("the folder name at got here 0.5 is ", folder_name)
343
 
 
 
 
344
  if user_input:
345
  print("got here 1")
346
  print("the folder name at got here 1.5 is ", folder_name)
summary.py CHANGED
@@ -1,6 +1,6 @@
1
  import models as md
2
  import nltk
3
-
4
  import openai
5
  import os
6
 
@@ -39,17 +39,31 @@ class TextSummarizer:
39
 
40
  def generate_full_summary(self, text_chunks_lib:dict) -> str:
41
  sum_dict = dict()
 
 
 
 
 
 
 
42
  for _, key in enumerate(text_chunks_lib):
43
 
44
- # for key in text_chunks_lib:
45
  summary = []
46
- for _, text_chunk in enumerate(text_chunks_lib[key]):
47
- chunk_summary = md.summarizer_gen(self.summarizer, sequence=text_chunk, maximum_tokens=500, minimum_tokens=100)
48
- summary.append(chunk_summary)
49
-
50
- # Combine all the summaries into a list and compress into one document, again
51
- final_summary = "\n\n".join(list(summary))
52
- sum_dict[key] = [final_summary]
 
 
 
 
 
 
 
 
53
 
54
  return sum_dict[self.title][0]
55
 
 
1
  import models as md
2
  import nltk
3
+ from threading import Thread
4
  import openai
5
  import os
6
 
 
39
 
40
  def generate_full_summary(self, text_chunks_lib:dict) -> str:
41
  sum_dict = dict()
42
+
43
+ chunk_summaries = []
44
+
45
+ def generate_chunk_summary(text_chunk:str, i: int) -> str:
46
+ chunk_summary = md.summarizer_gen(self.summarizer, sequence=text_chunk, maximum_tokens=500, minimum_tokens=100)
47
+ chunk_summaries[i] = chunk_summary
48
+
49
  for _, key in enumerate(text_chunks_lib):
50
 
 
51
  summary = []
52
+ threads = []
53
+
54
+ # make the chunk summaries in parallel
55
+ chunk_summaries = [None] * len(text_chunks_lib[key])
56
+ for i, text_chunk in enumerate(text_chunks_lib[key]):
57
+ threads.append(Thread(target=generate_chunk_summary, args=(text_chunk, i)))
58
+
59
+ for thread in threads:
60
+ thread.start()
61
+
62
+ for thread in threads:
63
+ thread.join()
64
+
65
+ final_summary = "\n\n".join(chunk_summaries)
66
+ sum_dict[key] = [final_summary]
67
 
68
  return sum_dict[self.title][0]
69