Paula Leonova commited on
Commit
39c7695
1 Parent(s): 7055ca6

Add back option for single text entry

Browse files
Files changed (1) hide show
  1. app.py +21 -9
app.py CHANGED
@@ -19,8 +19,8 @@ ex_long_text = example_long_text_load()
19
 
20
  # if __name__ == '__main__':
21
  st.markdown("### Long Text Summarization & Multi-Label Classification")
22
- st.write("This app summarizes and then classifies your long text with multiple labels using [BART Large MNLI](https://huggingface.co/facebook/bart-large-mnli). The keywords are generated using [KeyBERT](https://github.com/MaartenGr/KeyBERT).")
23
- st.write("__Inputs__: User enters their own custom text and labels.")
24
  st.write("__Outputs__: A summary of the text, likelihood percentages for each label and a downloadable csv of the results. \
25
  Includes additional options to generate a list of keywords and/or evaluate results against a list of ground truth labels, if available.")
26
 
@@ -110,16 +110,19 @@ with st.spinner('Loading pretrained models...'):
110
  kw_model = md.load_keyword_model()
111
  k_time = round(time.time() - start,4)
112
 
113
- st.success(f'Time taken to load various models: {k_time}s for KeyBERT model & {s_time}s for BART summarizer mnli model & {c_time}s for BART classifier mnli model.')
114
-
115
 
116
  if submit_button or example_button:
117
  if len(text_input) == 0 and uploaded_text_files is None and uploaded_csv_text_files is None:
118
  st.error("Enter some text to generate a summary")
119
  else:
120
 
 
 
 
121
  # OPTION A:
122
- if uploaded_text_files is not None:
123
  st.markdown("### Text Inputs")
124
  st.write('Files concatenated into a dataframe:')
125
  file_names = []
@@ -141,6 +144,10 @@ if submit_button or example_button:
141
  # OPTION B: [TO DO: DIRECT CSV UPLOAD INSTEAD]
142
 
143
 
 
 
 
 
144
  with st.spinner('Breaking up text into more reasonable chunks (transformers cannot exceed a 1024 token max)...'):
145
  # For each body of text, create text chunks of a certain token size required for the transformer
146
 
@@ -165,17 +172,22 @@ if submit_button or example_button:
165
  for text_chunk in text_chunks_lib[key]:
166
  keywords_list = md.keyword_gen(kw_model, text_chunk)
167
  kw_dict[key] = dict(keywords_list)
168
-
169
  kw_df0 = pd.DataFrame.from_dict(kw_dict).reset_index()
170
  kw_df0.rename(columns={'index': 'keyword'}, inplace=True)
171
  kw_df = pd.melt(kw_df0, id_vars=['keyword'], var_name='title', value_name='score').dropna()
172
- kw_df = kw_df[kw_df['score'] > 0.1][['title', 'keyword', 'score']].reset_index().drop(columns='index').sort_values(['title', 'score'], ascending=False)
 
 
 
 
 
173
  st.dataframe(kw_df)
174
  st.download_button(
175
  label="Download data as CSV",
176
  data=kw_df.to_csv().encode('utf-8'),
177
- file_name='title_kewyords.csv',
178
- mime='title_kewyords/csv',
179
  )
180
 
181
 
 
19
 
20
  # if __name__ == '__main__':
21
  st.markdown("### Long Text Summarization & Multi-Label Classification")
22
+ st.write("This app summarizes and then classifies your long text(s) with multiple labels using [BART Large MNLI](https://huggingface.co/facebook/bart-large-mnli). The keywords are generated using [KeyBERT](https://github.com/MaartenGr/KeyBERT).")
23
+ st.write("__Inputs__: User enters their own custom text(s) and labels.")
24
  st.write("__Outputs__: A summary of the text, likelihood percentages for each label and a downloadable csv of the results. \
25
  Includes additional options to generate a list of keywords and/or evaluate results against a list of ground truth labels, if available.")
26
 
 
110
  kw_model = md.load_keyword_model()
111
  k_time = round(time.time() - start,4)
112
 
113
+ st.spinner(f'Time taken to load various models: {k_time}s for KeyBERT model & {s_time}s for BART summarizer mnli model & {c_time}s for BART classifier mnli model.')
114
+ # st.success(None)
115
 
116
  if submit_button or example_button:
117
  if len(text_input) == 0 and uploaded_text_files is None and uploaded_csv_text_files is None:
118
  st.error("Enter some text to generate a summary")
119
  else:
120
 
121
+ if len(text_input) != 0:
122
+ text_df = pd.DataFrame.from_dict({'title': ['sample'], 'text': [text_input]})
123
+
124
  # OPTION A:
125
+ elif uploaded_text_files is not None:
126
  st.markdown("### Text Inputs")
127
  st.write('Files concatenated into a dataframe:')
128
  file_names = []
 
144
  # OPTION B: [TO DO: DIRECT CSV UPLOAD INSTEAD]
145
 
146
 
147
+ if len(text_input) != 0:
148
+ text_df = pd.DataFrame.from_dict({'title': ['sample'], 'text': [text_input]})
149
+
150
+
151
  with st.spinner('Breaking up text into more reasonable chunks (transformers cannot exceed a 1024 token max)...'):
152
  # For each body of text, create text chunks of a certain token size required for the transformer
153
 
 
172
  for text_chunk in text_chunks_lib[key]:
173
  keywords_list = md.keyword_gen(kw_model, text_chunk)
174
  kw_dict[key] = dict(keywords_list)
175
+ # Display as a dataframe
176
  kw_df0 = pd.DataFrame.from_dict(kw_dict).reset_index()
177
  kw_df0.rename(columns={'index': 'keyword'}, inplace=True)
178
  kw_df = pd.melt(kw_df0, id_vars=['keyword'], var_name='title', value_name='score').dropna()
179
+ if len(text_input) != 0:
180
+ title_element = []
181
+ else:
182
+ title_element = ['title']
183
+ kw_column_list = ['keyword', 'score']
184
+ kw_df = kw_df[kw_df['score'] > 0.1][title_element + kw_column_list].sort_values(title_element + ['score'], ascending=False).reset_index().drop(columns='index')
185
  st.dataframe(kw_df)
186
  st.download_button(
187
  label="Download data as CSV",
188
  data=kw_df.to_csv().encode('utf-8'),
189
+ file_name='title_keywords.csv',
190
+ mime='title_keywords/csv',
191
  )
192
 
193