Paula Leonova commited on
Commit
bd0c13f
1 Parent(s): 42ea8fa

Add section comment headers for easer code navigation

Browse files
Files changed (1) hide show
  1. app.py +44 -5
app.py CHANGED
@@ -18,12 +18,20 @@ ex_long_text = example_long_text_load()
18
 
19
 
20
  # if __name__ == '__main__':
 
 
 
21
  st.markdown("### Long Text Summarization & Multi-Label Classification")
22
  st.write("This app summarizes and then classifies your long text(s) with multiple labels using [BART Large MNLI](https://huggingface.co/facebook/bart-large-mnli). The keywords are generated using [KeyBERT](https://github.com/MaartenGr/KeyBERT).")
23
  st.write("__Inputs__: User enters their own custom text(s) and labels.")
24
  st.write("__Outputs__: A summary of the text, likelihood match score for each label and a downloadable csv of the results. \
25
  Includes additional options to generate a list of keywords and/or evaluate results against a list of ground truth labels, if available.")
26
 
 
 
 
 
 
27
  example_button = st.button(label='See Example')
28
  if example_button:
29
  example_text = ex_long_text #ex_text
@@ -38,7 +46,11 @@ else:
38
  title_name = 'Submitted Text'
39
 
40
 
 
41
  with st.form(key='my_form'):
 
 
 
42
  st.markdown("##### Step 1: Upload Text")
43
  text_input = st.text_area("Input any text you want to summarize & classify here (keep in mind very long text will take a while to process):", display_text)
44
 
@@ -67,7 +79,9 @@ with st.form(key='my_form'):
67
  ('Yes', 'No')
68
  )
69
 
70
- st.text("\n\n\n")
 
 
71
  st.markdown("##### Step 2: Enter Labels")
72
  labels = st.text_input('Enter possible topic labels, which can be either keywords and/or general themes (comma-separated):',input_labels, max_chars=2000)
73
  labels = list(set([x.strip() for x in labels.strip().split(',') if len(x.strip()) > 0]))
@@ -77,7 +91,9 @@ with st.form(key='my_form'):
77
  uploaded_labels_file = st.file_uploader("Choose a CSV file with one column and no header, where each cell is a separate label",
78
  key='labels_uploader')
79
 
80
- st.text("\n\n\n")
 
 
81
  st.markdown("##### Step 3: Provide Ground Truth Labels (_Optional_)")
82
  glabels = st.text_input('If available, enter ground truth topic labels to evaluate results, otherwise leave blank (comma-separated):',input_glabels, max_chars=2000)
83
  glabels = list(set([x.strip() for x in glabels.strip().split(',') if len(x.strip()) > 0]))
@@ -94,7 +110,6 @@ with st.form(key='my_form'):
94
  key = 'multitext_glabels_uploader')
95
 
96
 
97
-
98
  # threshold_value = st.slider(
99
  # 'Select a threshold cutoff for matching percentage (used for ground truth label evaluation)',
100
  # 0.0, 1.0, (0.5))
@@ -103,6 +118,10 @@ with st.form(key='my_form'):
103
 
104
  st.write("_For improvments/suggestions, please file an issue here: https://github.com/pleonova/multi-label-summary-text_")
105
 
 
 
 
 
106
  with st.spinner('Loading pretrained models...'):
107
  start = time.time()
108
  summarizer = md.load_summary_model()
@@ -119,7 +138,11 @@ with st.spinner('Loading pretrained models...'):
119
  st.spinner(f'Time taken to load various models: {k_time}s for KeyBERT model & {s_time}s for BART summarizer mnli model & {c_time}s for BART classifier mnli model.')
120
  # st.success(None)
121
 
 
122
  if submit_button or example_button:
 
 
 
123
  if len(text_input) == 0 and uploaded_text_files is None and uploaded_csv_text_files is None:
124
  st.error("Enter some text to generate a summary")
125
  else:
@@ -157,6 +180,10 @@ if submit_button or example_button:
157
  else:
158
  title_element = ['title']
159
 
 
 
 
 
160
  with st.spinner('Breaking up text into more reasonable chunks (transformers cannot exceed a 1024 token max)...'):
161
  # For each body of text, create text chunks of a certain token size required for the transformer
162
 
@@ -172,6 +199,10 @@ if submit_button or example_button:
172
  title_entry = text_df['title'][i]
173
  text_chunks_lib[title_entry] = text_chunks
174
 
 
 
 
 
175
  if gen_keywords == 'Yes':
176
  st.markdown("### Top Keywords")
177
  with st.spinner("Generating keywords from text..."):
@@ -201,7 +232,9 @@ if submit_button or example_button:
201
  )
202
 
203
 
204
-
 
 
205
  if gen_summary == 'Yes':
206
  st.markdown("### Summary")
207
  with st.spinner(f'Generating summaries for {len(text_df)} texts consisting of a total of {text_chunk_counter} chunks (this may take a minute)...'):
@@ -235,6 +268,9 @@ if submit_button or example_button:
235
  mime='title_summary/csv',
236
  )
237
 
 
 
 
238
  if ((len(text_input) == 0 and uploaded_text_files is None and uploaded_csv_text_files is None)
239
  or (len(labels) == 0 and uploaded_labels_file is None)):
240
  st.error('Enter some text and at least one possible topic to see label predictions.')
@@ -281,6 +317,9 @@ if submit_button or example_button:
281
  else:
282
  label_match_df = labels_full_df.copy()
283
 
 
 
 
284
  if len(glabels) > 0:
285
  gdata = pd.DataFrame({'label': glabels})
286
  join_list = ['label']
@@ -322,4 +361,4 @@ if submit_button or example_button:
322
  # st.dataframe(df_report)
323
 
324
  st.success('All done!')
325
- st.balloons()
 
18
 
19
 
20
  # if __name__ == '__main__':
21
+ ###################################
22
+ ######## App Description ##########
23
+ ###################################
24
  st.markdown("### Long Text Summarization & Multi-Label Classification")
25
  st.write("This app summarizes and then classifies your long text(s) with multiple labels using [BART Large MNLI](https://huggingface.co/facebook/bart-large-mnli). The keywords are generated using [KeyBERT](https://github.com/MaartenGr/KeyBERT).")
26
  st.write("__Inputs__: User enters their own custom text(s) and labels.")
27
  st.write("__Outputs__: A summary of the text, likelihood match score for each label and a downloadable csv of the results. \
28
  Includes additional options to generate a list of keywords and/or evaluate results against a list of ground truth labels, if available.")
29
 
30
+
31
+
32
+ ###################################
33
+ ######## Example Input ##########
34
+ ###################################
35
  example_button = st.button(label='See Example')
36
  if example_button:
37
  example_text = ex_long_text #ex_text
 
46
  title_name = 'Submitted Text'
47
 
48
 
49
+
50
  with st.form(key='my_form'):
51
+ ###################################
52
+ ######## Form: Step 1 ##########
53
+ ###################################
54
  st.markdown("##### Step 1: Upload Text")
55
  text_input = st.text_area("Input any text you want to summarize & classify here (keep in mind very long text will take a while to process):", display_text)
56
 
 
79
  ('Yes', 'No')
80
  )
81
 
82
+ ###################################
83
+ ######## Form: Step 2 ##########
84
+ ###################################
85
  st.markdown("##### Step 2: Enter Labels")
86
  labels = st.text_input('Enter possible topic labels, which can be either keywords and/or general themes (comma-separated):',input_labels, max_chars=2000)
87
  labels = list(set([x.strip() for x in labels.strip().split(',') if len(x.strip()) > 0]))
 
91
  uploaded_labels_file = st.file_uploader("Choose a CSV file with one column and no header, where each cell is a separate label",
92
  key='labels_uploader')
93
 
94
+ ###################################
95
+ ######## Form: Step 3 ##########
96
+ ###################################
97
  st.markdown("##### Step 3: Provide Ground Truth Labels (_Optional_)")
98
  glabels = st.text_input('If available, enter ground truth topic labels to evaluate results, otherwise leave blank (comma-separated):',input_glabels, max_chars=2000)
99
  glabels = list(set([x.strip() for x in glabels.strip().split(',') if len(x.strip()) > 0]))
 
110
  key = 'multitext_glabels_uploader')
111
 
112
 
 
113
  # threshold_value = st.slider(
114
  # 'Select a threshold cutoff for matching percentage (used for ground truth label evaluation)',
115
  # 0.0, 1.0, (0.5))
 
118
 
119
  st.write("_For improvments/suggestions, please file an issue here: https://github.com/pleonova/multi-label-summary-text_")
120
 
121
+
122
+ ###################################
123
+ ####### Model Load Time #########
124
+ ###################################
125
  with st.spinner('Loading pretrained models...'):
126
  start = time.time()
127
  summarizer = md.load_summary_model()
 
138
  st.spinner(f'Time taken to load various models: {k_time}s for KeyBERT model & {s_time}s for BART summarizer mnli model & {c_time}s for BART classifier mnli model.')
139
  # st.success(None)
140
 
141
+
142
  if submit_button or example_button:
143
+ ###################################
144
+ ######## Load Text Data #######
145
+ ###################################
146
  if len(text_input) == 0 and uploaded_text_files is None and uploaded_csv_text_files is None:
147
  st.error("Enter some text to generate a summary")
148
  else:
 
180
  else:
181
  title_element = ['title']
182
 
183
+
184
+ ###################################
185
+ ######## Text Chunks ##########
186
+ ###################################
187
  with st.spinner('Breaking up text into more reasonable chunks (transformers cannot exceed a 1024 token max)...'):
188
  # For each body of text, create text chunks of a certain token size required for the transformer
189
 
 
199
  title_entry = text_df['title'][i]
200
  text_chunks_lib[title_entry] = text_chunks
201
 
202
+
203
+ ################################
204
+ ######## Keywords ##########
205
+ ################################
206
  if gen_keywords == 'Yes':
207
  st.markdown("### Top Keywords")
208
  with st.spinner("Generating keywords from text..."):
 
232
  )
233
 
234
 
235
+ ###################################
236
+ ########## Summarize ##########
237
+ ###################################
238
  if gen_summary == 'Yes':
239
  st.markdown("### Summary")
240
  with st.spinner(f'Generating summaries for {len(text_df)} texts consisting of a total of {text_chunk_counter} chunks (this may take a minute)...'):
 
268
  mime='title_summary/csv',
269
  )
270
 
271
+ ###################################
272
+ ########## Classifier #########
273
+ ###################################
274
  if ((len(text_input) == 0 and uploaded_text_files is None and uploaded_csv_text_files is None)
275
  or (len(labels) == 0 and uploaded_labels_file is None)):
276
  st.error('Enter some text and at least one possible topic to see label predictions.')
 
317
  else:
318
  label_match_df = labels_full_df.copy()
319
 
320
+ ###################################
321
+ ####### Ground Truth Labels ######
322
+ ###################################
323
  if len(glabels) > 0:
324
  gdata = pd.DataFrame({'label': glabels})
325
  join_list = ['label']
 
361
  # st.dataframe(df_report)
362
 
363
  st.success('All done!')
364
+ st.balloons()