Paula Leonova commited on
Commit
89f1955
1 Parent(s): 44ef896

Update summary section to include multiple text inputs

Browse files
Files changed (1) hide show
  1. app.py +35 -26
app.py CHANGED
@@ -48,7 +48,7 @@ with st.form(key='my_form'):
48
  accept_multiple_files=True, key = 'text_uploader',
49
  type = 'txt')
50
  st.write("__Option B:__")
51
- uploaded_csv_text_files = st.file_uploader(label='Upload a CSV file with columns: "title" and "text"',
52
  accept_multiple_files=False, key = 'csv_text_uploader',
53
  type = 'csv')
54
 
@@ -145,7 +145,7 @@ if submit_button or example_button:
145
 
146
 
147
  if len(text_input) != 0:
148
- text_df = pd.DataFrame.from_dict({'title': ['sample'], 'text': [text_input]})
149
 
150
 
151
  with st.spinner('Breaking up text into more reasonable chunks (transformers cannot exceed a 1024 token max)...'):
@@ -185,6 +185,7 @@ if submit_button or example_button:
185
  title_element = ['title']
186
  kw_column_list = ['keyword', 'score']
187
  kw_df = kw_df[kw_df['score'] > 0.25][title_element + kw_column_list].sort_values(title_element + ['score'], ascending=False).reset_index().drop(columns='index')
 
188
  st.dataframe(kw_df)
189
  st.download_button(
190
  label="Download data as CSV",
@@ -195,30 +196,38 @@ if submit_button or example_button:
195
 
196
 
197
  st.markdown("### Summary")
198
- with st.spinner(f'Generating summaries for {text_chunk_counter} text chunks (this may take a minute)...'):
199
-
200
- my_summary_expander = st.expander(label=f'Expand to see intermediate summary generation details for {len(text_chunks)} text chunks')
201
- with my_summary_expander:
202
- summary = []
203
-
204
- st.markdown("_Once the original text is broken into smaller chunks (totaling no more than 1024 tokens, \
205
- with complete sentences), each block of text is then summarized separately using BART NLI \
206
- and then combined at the very end to generate the final summary._")
207
-
208
- for num_chunk, text_chunk in enumerate(text_chunks):
209
- st.markdown(f"###### Original Text Chunk {num_chunk+1}/{len(text_chunks)}" )
210
- st.markdown(text_chunk)
211
-
212
- chunk_summary = md.summarizer_gen(summarizer, sequence=text_chunk, maximum_tokens = 300, minimum_tokens = 20)
213
- summary.append(chunk_summary)
214
- st.markdown(f"###### Partial Summary {num_chunk+1}/{len(text_chunks)}")
215
- st.markdown(chunk_summary)
216
- # Combine all the summaries into a list and compress into one document, again
217
- final_summary = " \n\n".join(list(summary))
218
-
219
- st.markdown(final_summary)
220
-
221
- if len(text_input) == 0 or len(labels) == 0:
 
 
 
 
 
 
 
 
222
  st.error('Enter some text and at least one possible topic to see label predictions.')
223
  else:
224
  st.markdown("### Top Label Predictions on Summary vs Full Text")
 
48
  accept_multiple_files=True, key = 'text_uploader',
49
  type = 'txt')
50
  st.write("__Option B:__")
51
+ uploaded_csv_text_files = st.file_uploader(label='Upload a CSV file with two columns: "title" and "text"',
52
  accept_multiple_files=False, key = 'csv_text_uploader',
53
  type = 'csv')
54
 
 
145
 
146
 
147
  if len(text_input) != 0:
148
+ text_df = pd.DataFrame.from_dict({'title': ['Submitted Text'], 'text': [text_input]})
149
 
150
 
151
  with st.spinner('Breaking up text into more reasonable chunks (transformers cannot exceed a 1024 token max)...'):
 
185
  title_element = ['title']
186
  kw_column_list = ['keyword', 'score']
187
  kw_df = kw_df[kw_df['score'] > 0.25][title_element + kw_column_list].sort_values(title_element + ['score'], ascending=False).reset_index().drop(columns='index')
188
+
189
  st.dataframe(kw_df)
190
  st.download_button(
191
  label="Download data as CSV",
 
196
 
197
 
198
  st.markdown("### Summary")
199
+ with st.spinner(f'Generating summaries for {len(text_df)} texts consisting of a total of {text_chunk_counter} chunks (this may take a minute)...'):
200
+ sum_dict = dict()
201
+ for i, key in enumerate(text_chunks_lib):
202
+ with st.expander(label=f'({i+1}/{len(text_df)}) Expand to see intermediate summary generation details for: {key}', expanded=False):
203
+ # for key in text_chunks_lib:
204
+ summary = []
205
+ for num_chunk, text_chunk in enumerate(text_chunks_lib[key]):
206
+ chunk_summary = md.summarizer_gen(summarizer, sequence=text_chunk, maximum_tokens=300, minimum_tokens=20)
207
+ summary.append(chunk_summary)
208
+
209
+ st.markdown(f"###### Original Text Chunk {num_chunk+1}/{len(text_chunks)}" )
210
+ st.markdown(text_chunk)
211
+ st.markdown(f"###### Partial Summary {num_chunk+1}/{len(text_chunks)}")
212
+ st.markdown(chunk_summary)
213
+
214
+ # Combine all the summaries into a list and compress into one document, again
215
+ final_summary = "\n\n".join(list(summary))
216
+ sum_dict[key] = [final_summary]
217
+
218
+ sum_df = pd.DataFrame.from_dict(sum_dict).reset_index().T
219
+ # sum_df.columns = ['title', 'summary_text']
220
+
221
+ st.dataframe(sum_df)
222
+ st.download_button(
223
+ label="Download data as CSV",
224
+ data=sum_df.to_csv().encode('utf-8'),
225
+ file_name='title_summary.csv',
226
+ mime='title_summary/csv',
227
+ )
228
+
229
+ if (len(text_input) == 0 or len(labels) == 0
230
+ or uploaded_labels_file is None or uploaded_text_files is None or uploaded_csv_text_files is None):
231
  st.error('Enter some text and at least one possible topic to see label predictions.')
232
  else:
233
  st.markdown("### Top Label Predictions on Summary vs Full Text")