Paula Leonova commited on
Commit
71541e6
1 Parent(s): 32dc2d3

Update summarization to use BART Large CNN

Browse files
Files changed (2) hide show
  1. app.py +2 -2
  2. models.py +1 -1
app.py CHANGED
@@ -22,7 +22,7 @@ ex_long_text = example_long_text_load()
22
  ######## App Description ##########
23
  ###################################
24
  st.markdown("### Long Text Summarization & Multi-Label Classification")
25
- st.write("This app summarizes and then classifies your long text(s) with multiple labels using [BART Large MNLI](https://huggingface.co/facebook/bart-large-mnli). The keywords are generated using [KeyBERT](https://github.com/MaartenGr/KeyBERT).")
26
  st.write("__Inputs__: User enters their own custom text(s) and labels.")
27
  st.write("__Outputs__: A summary of the text, likelihood match score for each label and a downloadable csv of the results. \
28
  Includes additional options to generate a list of keywords and/or evaluate results against a list of ground truth labels, if available.")
@@ -246,7 +246,7 @@ if submit_button or example_button:
246
  # for key in text_chunks_lib:
247
  summary = []
248
  for num_chunk, text_chunk in enumerate(text_chunks_lib[key]):
249
- chunk_summary = md.summarizer_gen(summarizer, sequence=text_chunk, maximum_tokens=300, minimum_tokens=20)
250
  summary.append(chunk_summary)
251
 
252
  st.markdown(f"###### Original Text Chunk {num_chunk+1}/{len(text_chunks)}" )
22
  ######## App Description ##########
23
  ###################################
24
  st.markdown("### Long Text Summarization & Multi-Label Classification")
25
+ st.write("This app summarizes and then classifies your long text(s) with multiple labels using [BART Large CNN](https://huggingface.co/facebook/bart-large-cnn) for the summarization task and [BART Large MNLI](https://huggingface.co/facebook/bart-large-mnli) for the multi-labels matching. The keywords are independently generated using [KeyBERT](https://github.com/MaartenGr/KeyBERT) and not used in any downstream tasks.")
26
  st.write("__Inputs__: User enters their own custom text(s) and labels.")
27
  st.write("__Outputs__: A summary of the text, likelihood match score for each label and a downloadable csv of the results. \
28
  Includes additional options to generate a list of keywords and/or evaluate results against a list of ground truth labels, if available.")
246
  # for key in text_chunks_lib:
247
  summary = []
248
  for num_chunk, text_chunk in enumerate(text_chunks_lib[key]):
249
+ chunk_summary = md.summarizer_gen(summarizer, sequence=text_chunk, maximum_tokens=400, minimum_tokens=100)
250
  summary.append(chunk_summary)
251
 
252
  st.markdown(f"###### Original Text Chunk {num_chunk+1}/{len(text_chunks)}" )
models.py CHANGED
@@ -50,7 +50,7 @@ def keyword_gen(kw_model, sequence:str):
50
  # Reference: https://huggingface.co/facebook/bart-large-mnli
51
  @st.cache(allow_output_mutation=True)
52
  def load_summary_model():
53
- model_name = "facebook/bart-large-mnli"
54
  summarizer = pipeline(task='summarization', model=model_name)
55
  return summarizer
56
 
50
  # Reference: https://huggingface.co/facebook/bart-large-mnli
51
  @st.cache(allow_output_mutation=True)
52
  def load_summary_model():
53
+ model_name = "facebook/bart-large-cnn"
54
  summarizer = pipeline(task='summarization', model=model_name)
55
  return summarizer
56