nickmuchi commited on
Commit
12da133
1 Parent(s): b0f397c

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +17 -15
app.py CHANGED
@@ -254,11 +254,11 @@ def highlight_entities(article_content,summary_output):
254
  matched_entities, unmatched_entities = get_and_compare_entities(article_content,summary_output)
255
 
256
  for entity in matched_entities:
257
- summary_content = summary_output.replace(entity, markdown_start_green + entity + markdown_end)
258
 
259
  for entity in unmatched_entities:
260
- summary_content = summary_output.replace(entity, markdown_start_red + entity + markdown_end)
261
- soup = BeautifulSoup(summary_content, features="html.parser")
262
  return HTML_WRAPPER.format(soup)
263
 
264
 
@@ -338,11 +338,12 @@ def schleifer_model():
338
  device=0 if torch.cuda.is_available() else -1)
339
  return summarizer
340
 
341
- #@st.experimental_singleton(suppress_st_warning=True)
342
- #def google_model():
343
-
344
- #summarizer = pipeline('summarization',model='google/pegasus-cnn_dailymail')
345
- r#eturn summarizer
 
346
 
347
  @st.experimental_singleton(suppress_st_warning=True)
348
  def get_sentence_embedding_model():
@@ -450,8 +451,8 @@ if summarize:
450
  text="Loading Facebook-Bart Model and Extracting summary. This might take a few seconds depending on the length of your text..."
451
  ):
452
  summarizer_model = facebook_model()
453
- summarized_text = summarizer_model(text_to_summarize.strip().replace("\n", " "), max_length=max_len, min_length=min_len,clean_up_tokenization_spaces=True)
454
- summarized_text = ' '.join([summ['summary_text'].replace("<n>", " ") for summ in summarized_text])
455
 
456
  elif model_type == "Sshleifer-DistilBart":
457
  if url_text:
@@ -463,25 +464,26 @@ if summarize:
463
  text="Loading Sshleifer-DistilBart Model and Extracting summary. This might take a few seconds depending on the length of your text..."
464
  ):
465
  summarizer_model = schleifer_model()
466
- summarized_text = summarizer_model(text_to_summarize, max_length=max_len, min_length=min_len)
467
  summarized_text = ' '.join([summ['summary_text'] for summ in summarized_text])
468
 
469
  elif model_type == "Google-Pegasus":
470
  if url_text:
471
- text_to_summarize = cleaned_text
 
472
  else:
473
- text_to_summarize = cleaned_text
474
 
475
  with st.spinner(
476
  text="Loading Sshleifer-DistilBart Model and Extracting summary. This might take a few seconds depending on the length of your text..."
477
  ):
478
  summarizer_model = google_model()
479
- summarized_text = summarizer_model(text_to_summarize, max_length=max_len, min_length=min_len)
480
  summarized_text = ' '.join([summ['summary_text'] for summ in summarized_text])
481
 
482
  with st.spinner("Calculating and matching entities, this takes a few seconds..."):
483
 
484
- entity_match_html = highlight_entities(' '.join(cleaned_text),summarized_text)
485
  st.subheader("Summarized text with matched entities in Green and mismatched entities in Red relative to the original text")
486
  st.markdown("####")
487
 
 
254
  matched_entities, unmatched_entities = get_and_compare_entities(article_content,summary_output)
255
 
256
  for entity in matched_entities:
257
+ summary_output = summary_output.replace(entity, markdown_start_green + entity + markdown_end)
258
 
259
  for entity in unmatched_entities:
260
+ summary_output = summary_output.replace(entity, markdown_start_red + entity + markdown_end)
261
+ soup = BeautifulSoup(summary_output, features="html.parser")
262
  return HTML_WRAPPER.format(soup)
263
 
264
 
 
338
  device=0 if torch.cuda.is_available() else -1)
339
  return summarizer
340
 
341
+ @st.experimental_singleton(suppress_st_warning=True)
342
+ def google_model():
343
+ model_name = 'google/pegasus-cnn_dailymail'
344
+ summarizer = pipeline('summarization',model=model=model_name, tokenizer=model_name,
345
+ device=0 if torch.cuda.is_available() else -1)
346
+ return summarizer
347
 
348
  @st.experimental_singleton(suppress_st_warning=True)
349
  def get_sentence_embedding_model():
 
451
  text="Loading Facebook-Bart Model and Extracting summary. This might take a few seconds depending on the length of your text..."
452
  ):
453
  summarizer_model = facebook_model()
454
+ summarized_text = summarizer_model(text_to_summarize, max_length=max_len, min_length=min_len,clean_up_tokenization_spaces=True,no_repeat_ngram_size=4)
455
+ summarized_text = ' '.join([summ['summary_text'] for summ in summarized_text])
456
 
457
  elif model_type == "Sshleifer-DistilBart":
458
  if url_text:
 
464
  text="Loading Sshleifer-DistilBart Model and Extracting summary. This might take a few seconds depending on the length of your text..."
465
  ):
466
  summarizer_model = schleifer_model()
467
+ summarized_text = summarizer_model(text_to_summarize, max_length=max_len, min_length=min_len,clean_up_tokenization_spaces=True,no_repeat_ngram_size=4)
468
  summarized_text = ' '.join([summ['summary_text'] for summ in summarized_text])
469
 
470
  elif model_type == "Google-Pegasus":
471
  if url_text:
472
+ text_to_summarize = cleaned_text[0]
473
+
474
  else:
475
+ text_to_summarize = cleaned_text[0]
476
 
477
  with st.spinner(
478
  text="Loading Sshleifer-DistilBart Model and Extracting summary. This might take a few seconds depending on the length of your text..."
479
  ):
480
  summarizer_model = google_model()
481
+ summarized_text = summarizer_model(text_to_summarize, max_length=max_len, min_length=min_len,clean_up_tokenization_spaces=True,no_repeat_ngram_size=4)
482
  summarized_text = ' '.join([summ['summary_text'] for summ in summarized_text])
483
 
484
  with st.spinner("Calculating and matching entities, this takes a few seconds..."):
485
 
486
+ entity_match_html = highlight_entities(' '.join(text_to_summarize),summarized_text)
487
  st.subheader("Summarized text with matched entities in Green and mismatched entities in Red relative to the original text")
488
  st.markdown("####")
489