Spaces:

lordvader31
/

almithal

Running

App Files Files Community

Keane Moraes commited on May 7, 2023

Commit

359769b

•

1 Parent(s): aec1dec

fix for the key error

Browse files

Files changed (2) hide show

app.py +13 -6
summary.py +23 -9

app.py CHANGED Viewed

@@ -88,13 +88,13 @@ def generate_word_embeddings():
 def generate_text_chunks_lib():
     global title_entry, text_chunks_lib
     global keywords
     global tldr
     global summary
     global takeaways
     global input_accepted
-    global data_transcription
     # For each body of text, create text chunks of a certain token size required for the transformer
     text_df = pd.DataFrame.from_dict({"title": [data_transcription["title"]], "text":[data_transcription["text"]]})
@@ -191,13 +191,20 @@ with st.sidebar:
         thread1.join()
         thread2.join()
         # Generate the summary
         if gen_summary == 'Yes':
-            print("\n\nTITLE ENTRY: ", title_entry)
             se = TextSummarizer(title_entry)
             text_transcription = data_transcription['text']
             with st.spinner("Generating summary and TLDR..."):
-                print("\n\nTEXT_CHNK_SUMMARY\n\n", text_chunks_lib)
                 summary = se.generate_full_summary(text_chunks_lib)
                 summary_list = summary.split("\n\n")
                 tldr = se.generate_short_summary(summary_list)
@@ -208,6 +215,9 @@ with st.sidebar:
             takeaways = kt.generate_key_takeaways(text_chunks_lib)
             is_completed_analysis = True
             bar.progress(100)
 if is_completed_analysis:
     st.header("Key Takeaways")
@@ -331,9 +341,6 @@ with tab6:
     print("user input is ", user_input)
     print("the folder name at got here 0.5 is ", folder_name)
-    # if 'messages' not in st.session_state:
-    #     st.session_state['messages'] = get_initial_message()
     if user_input:
         print("got here 1")
         print("the folder name at got here 1.5 is ", folder_name)

 def generate_text_chunks_lib():
+    global data_transcription
     global title_entry, text_chunks_lib
     global keywords
     global tldr
     global summary
     global takeaways
     global input_accepted
     # For each body of text, create text chunks of a certain token size required for the transformer
     text_df = pd.DataFrame.from_dict({"title": [data_transcription["title"]], "text":[data_transcription["text"]]})
         thread1.join()
         thread2.join()
+        def generate_summary():
+            pass
+        def generate_key_takeaways():
+            pass
+        threadSum = Thread(target=generate_summary)
+        threadTak = Thread(target=generate_key_takeaways)
         # Generate the summary
         if gen_summary == 'Yes':
             se = TextSummarizer(title_entry)
             text_transcription = data_transcription['text']
             with st.spinner("Generating summary and TLDR..."):
                 summary = se.generate_full_summary(text_chunks_lib)
                 summary_list = summary.split("\n\n")
                 tldr = se.generate_short_summary(summary_list)
             takeaways = kt.generate_key_takeaways(text_chunks_lib)
             is_completed_analysis = True
             bar.progress(100)
+        with open(f"{folder_name}/data.json", "w") as f:
+            json.dump(data_transcription, f, indent=4)
 if is_completed_analysis:
     st.header("Key Takeaways")
     print("user input is ", user_input)
     print("the folder name at got here 0.5 is ", folder_name)
     if user_input:
         print("got here 1")
         print("the folder name at got here 1.5 is ", folder_name)

summary.py CHANGED Viewed

@@ -1,6 +1,6 @@
 import models as md
 import nltk
 import openai
 import os
@@ -39,17 +39,31 @@ class TextSummarizer:
     def generate_full_summary(self, text_chunks_lib:dict) -> str:
         sum_dict = dict()
         for _, key in enumerate(text_chunks_lib):
-            # for key in text_chunks_lib:
             summary = []
-            for _, text_chunk in enumerate(text_chunks_lib[key]):
-                chunk_summary = md.summarizer_gen(self.summarizer, sequence=text_chunk, maximum_tokens=500, minimum_tokens=100)
-                summary.append(chunk_summary)
-                # Combine all the summaries into a list and compress into one document, again
-                final_summary = "\n\n".join(list(summary))
-                sum_dict[key] = [final_summary]
         return sum_dict[self.title][0]

 import models as md
 import nltk
+from threading import Thread
 import openai
 import os
     def generate_full_summary(self, text_chunks_lib:dict) -> str:
         sum_dict = dict()
+        chunk_summaries =  []
+        def generate_chunk_summary(text_chunk:str, i: int) -> str:
+            chunk_summary = md.summarizer_gen(self.summarizer, sequence=text_chunk, maximum_tokens=500, minimum_tokens=100)
+            chunk_summaries[i] = chunk_summary
         for _, key in enumerate(text_chunks_lib):
             summary = []
+            threads = []
+            # make the chunk summaries in parallel
+            chunk_summaries = [None] * len(text_chunks_lib[key])
+            for i, text_chunk in enumerate(text_chunks_lib[key]):
+                threads.append(Thread(target=generate_chunk_summary, args=(text_chunk, i)))
+            for thread in threads:
+                thread.start()
+            for thread in threads:
+                thread.join()
+            final_summary = "\n\n".join(chunk_summaries)
+            sum_dict[key] = [final_summary]
         return sum_dict[self.title][0]