Paula Leonova commited on
Commit
51fcc5c
1 Parent(s): e452a5c

Update spinners and sentence chunking

Browse files
Files changed (1) hide show
  1. app.py +69 -39
app.py CHANGED
@@ -32,6 +32,11 @@ with st.form(key='my_form'):
32
  if text_input == display_text:
33
  text_input = example_text
34
 
 
 
 
 
 
35
  labels = st.text_input('Enter possible labels (comma-separated):',ex_labels, max_chars=1000)
36
  labels = list(set([x.strip() for x in labels.strip().split(',') if len(x.strip()) > 0]))
37
 
@@ -45,51 +50,76 @@ with st.form(key='my_form'):
45
  submit_button = st.form_submit_button(label='Submit')
46
 
47
 
48
- with st.spinner('Loading pretrained summarizer mnli model...'):
 
49
  start = time.time()
50
  summarizer = md.load_summary_model()
51
- st.success(f'Time taken to load summarizer mnli model: {round(time.time() - start,4)} seconds')
52
 
53
- with st.spinner('Loading pretrained classifier mnli model...'):
54
  start = time.time()
55
- classifier = md.load_model()
56
- st.success(f'Time taken to load classifier mnli model: {round(time.time() - start,4)} seconds')
 
 
 
 
 
 
 
57
 
58
 
59
  if submit_button:
60
- if len(labels) == 0:
61
- st.write('Enter some text and at least one possible topic to see predictions.')
62
-
63
- with st.spinner('Generating summaries and matching labels...'):
64
- my_expander = st.expander(label='Expand to see summary generation details')
65
- with my_expander:
66
  # For each body of text, create text chunks of a certain token size required for the transformer
67
  nested_sentences = md.create_nest_sentences(document = text_input, token_max_length = 1024)
68
-
69
- summary = []
70
- # st.markdown("### Text Chunk & Summaries")
71
- st.markdown("_Breaks up the original text into sections with complete sentences totaling \
72
- less than 1024 tokens, a requirement for the summarizer. Each block of text is than summarized separately \
73
- and then combined at the very end to generate the final summary._")
74
-
75
- # For each chunk of sentences (within the token max), generate a summary
76
- for n in range(0, len(nested_sentences)):
77
- text_chunk = " ".join(map(str, nested_sentences[n]))
78
- st.markdown(f"###### Original Text Chunk {n+1}/{len(nested_sentences)}" )
79
- st.markdown(text_chunk)
80
-
81
- chunk_summary = md.summarizer_gen(summarizer, sequence=text_chunk, maximum_tokens = 300, minimum_tokens = 20)
82
- summary.append(chunk_summary)
83
- st.markdown(f"###### Partial Summary {n+1}/{len(nested_sentences)}")
84
- st.markdown(chunk_summary)
85
- # Combine all the summaries into a list and compress into one document, again
86
- final_summary = " \n\n".join(list(summary))
87
-
88
- # final_summary = md.summarizer_gen(summarizer, sequence=text_input, maximum_tokens = 30, minimum_tokens = 100)
89
- st.markdown("### Combined Summary")
90
- st.markdown(final_summary)
91
-
92
-
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
93
  st.markdown("### Top Label Predictions on Summary & Full Text")
94
  with st.spinner('Matching labels...'):
95
  topics, scores = md.classifier_zero(classifier, sequence=final_summary, labels=labels, multi_class=True)
@@ -146,7 +176,7 @@ if submit_button:
146
  section_header_description = ['Summary Label Performance', 'Original Full Text Label Performance']
147
  data_headers = ['scores_from_summary', 'scores_from_full_text']
148
  for i in range(0,2):
149
- st.markdown(f"##### {section_header_description[i]}")
150
  report = classification_report(y_true = data2[['is_true_label']],
151
  y_pred = (data2[[data_headers[i]]] >= threshold_value) * 1.0,
152
  output_dict=True)
@@ -154,5 +184,5 @@ if submit_button:
154
  st.markdown(f"Threshold set for: {threshold_value}")
155
  st.dataframe(df_report)
156
 
157
- st.success('All done!')
158
- st.balloons()
 
32
  if text_input == display_text:
33
  text_input = example_text
34
 
35
+ gen_keywords = st.radio(
36
+ "Generate keywords from text?",
37
+ ('Yes', 'No')
38
+ )
39
+
40
  labels = st.text_input('Enter possible labels (comma-separated):',ex_labels, max_chars=1000)
41
  labels = list(set([x.strip() for x in labels.strip().split(',') if len(x.strip()) > 0]))
42
 
 
50
  submit_button = st.form_submit_button(label='Submit')
51
 
52
 
53
+
54
+ with st.spinner('Loading pretrained summarizer and classifier mnli model...'):
55
  start = time.time()
56
  summarizer = md.load_summary_model()
57
+ s_time = round(time.time() - start,4)
58
 
 
59
  start = time.time()
60
+ classifier = md.load_model()
61
+ c_time = round(time.time() - start,4)
62
+
63
+ st.success(f'Time taken to load: summarizer mnli model {s_time}s & classifier mnli model {c_time}s')
64
+
65
+ # with st.spinner('Loading pretrained classifier mnli model...'):
66
+ # start = time.time()
67
+ # classifier = md.load_model()
68
+ # st.success(f'Time taken to load classifier mnli model: {round(time.time() - start,4)} seconds')
69
 
70
 
71
  if submit_button:
72
+ if len(text_input) == 0:
73
+ st.write("Enter some text to generate a summary")
74
+ else:
75
+ with st.spinner('Breaking up text into more reasonable chunks (tranformers cannot exceed a 1024 token max)...'):
 
 
76
  # For each body of text, create text chunks of a certain token size required for the transformer
77
  nested_sentences = md.create_nest_sentences(document = text_input, token_max_length = 1024)
78
+ # For each chunk of sentences (within the token max)
79
+ text_chunks = []
80
+ for n in range(0, len(nested_sentences)):
81
+ tc = " ".join(map(str, nested_sentences[n]))
82
+ text_chunks.append(tc)
83
+
84
+ with st.spinner('Generating summaries for text chunks...'):
85
+
86
+ my_expander = st.expander(label='Expand to see summary generation details')
87
+ with my_expander:
88
+ summary = []
89
+ st.markdown("### Text Chunk & Summaries")
90
+ # st.markdown("_Breaks up the original text into sections with complete sentences totaling \
91
+ # less than 1024 tokens, a requirement for the summarizer. Each block of text is than summarized separately \
92
+ # and then combined at the very end to generate the final summary._")
93
+
94
+ # # For each chunk of sentences (within the token max), generate a summary
95
+ # for n in range(0, len(nested_sentences)):
96
+ # text_chunk = " ".join(map(str, nested_sentences[n]))
97
+ # st.markdown(f"###### Original Text Chunk {n+1}/{len(nested_sentences)}" )
98
+ # st.markdown(text_chunk)
99
+
100
+ for num_chunk, text_chunk in enumerate(text_chunks):
101
+ st.markdown(f"###### Original Text Chunk {num_chunk+1}/{len(text_chunks)}" )
102
+ st.markdown(text_chunk)
103
+
104
+ chunk_summary = md.summarizer_gen(summarizer, sequence=text_chunk, maximum_tokens = 300, minimum_tokens = 20)
105
+ summary.append(chunk_summary)
106
+ st.markdown(f"###### Partial Summary {num_chunk+1}/{len(text_chunks)}")
107
+ st.markdown(chunk_summary)
108
+ # Combine all the summaries into a list and compress into one document, again
109
+ final_summary = " \n\n".join(list(summary))
110
+
111
+ # final_summary = md.summarizer_gen(summarizer, sequence=text_input, maximum_tokens = 30, minimum_tokens = 100)
112
+ st.markdown("### Combined Summary")
113
+ st.markdown(final_summary)
114
+
115
+ # if gen_keywords == 'Yes':
116
+ # st.markdown("### Top Keywords")
117
+ # with st.spinner("Generating keywords from text...")
118
+ # keywords =
119
+
120
+ if len(text_input) == 0 or len(labels) == 0:
121
+ st.write('Enter some text and at least one possible topic to see predictions.')
122
+ else:
123
  st.markdown("### Top Label Predictions on Summary & Full Text")
124
  with st.spinner('Matching labels...'):
125
  topics, scores = md.classifier_zero(classifier, sequence=final_summary, labels=labels, multi_class=True)
 
176
  section_header_description = ['Summary Label Performance', 'Original Full Text Label Performance']
177
  data_headers = ['scores_from_summary', 'scores_from_full_text']
178
  for i in range(0,2):
179
+ st.markdown(f"###### {section_header_description[i]}")
180
  report = classification_report(y_true = data2[['is_true_label']],
181
  y_pred = (data2[[data_headers[i]]] >= threshold_value) * 1.0,
182
  output_dict=True)
 
184
  st.markdown(f"Threshold set for: {threshold_value}")
185
  st.dataframe(df_report)
186
 
187
+ st.success('All done!')
188
+ st.balloons()