sdhanabal1 commited on
Commit
6977cda
1 Parent(s): 1a9aed5

Fix the extract summary display

Browse files
Files changed (2) hide show
  1. Summarizer.py +10 -8
  2. app.py +11 -13
Summarizer.py CHANGED
@@ -27,20 +27,22 @@ class Summarizer:
27
  summarized_list.append(sentence._text)
28
  return summarized_list
29
 
30
- def __extractive_summary(self, parser: DocumentParser, sentences_count):
31
  summarized_sentences = self.lsa_summarizer(parser.document, sentences_count)
32
  summarized_list = Summarizer.sentence_list(summarized_sentences)
33
- all_sentences_list = Summarizer.sentence_list(parser.document.sentences)
34
- return all_sentences_list, summarized_list
35
 
36
- def extractive_summary_from_text(self, text: str, sentences_count: int) -> (list, list):
37
  parser = PlaintextParser.from_string(text, Tokenizer(Summarizer.DEFAULT_LANGUAGE))
38
  return self.__extractive_summary(parser, sentences_count)
39
 
40
- def extractive_summary_from_url(self, url: str, sentences_count: int) -> (list, list):
41
  parser = HtmlParser.from_url(url, Tokenizer(Summarizer.DEFAULT_LANGUAGE))
42
  return self.__extractive_summary(parser, sentences_count)
43
 
44
- def abstractive_summary(self, summary: str) -> str:
45
- summary_text = " ".join([result['summary_text'] for result in self.pipeline(wrap(summary, 2048))])
46
- return summary_text
 
 
 
 
27
  summarized_list.append(sentence._text)
28
  return summarized_list
29
 
30
+ def __extractive_summary(self, parser: DocumentParser, sentences_count) -> list:
31
  summarized_sentences = self.lsa_summarizer(parser.document, sentences_count)
32
  summarized_list = Summarizer.sentence_list(summarized_sentences)
33
+ return summarized_list
 
34
 
35
+ def extractive_summary_from_text(self, text: str, sentences_count: int) -> list:
36
  parser = PlaintextParser.from_string(text, Tokenizer(Summarizer.DEFAULT_LANGUAGE))
37
  return self.__extractive_summary(parser, sentences_count)
38
 
39
+ def extractive_summary_from_url(self, url: str, sentences_count: int) -> list:
40
  parser = HtmlParser.from_url(url, Tokenizer(Summarizer.DEFAULT_LANGUAGE))
41
  return self.__extractive_summary(parser, sentences_count)
42
 
43
+ def abstractive_summary(self, extract_summary_sentences: list) -> list:
44
+ extract_summary = " ".join([sentence for sentence in extract_summary_sentences])
45
+ abstractive_summary_list = []
46
+ for result in self.pipeline(wrap(extract_summary, 512)):
47
+ abstractive_summary_list.append(result['summary_text'])
48
+ return abstractive_summary_list
app.py CHANGED
@@ -33,19 +33,20 @@ def main() -> None:
33
  )
34
  return terms_and_conditions_pipeline
35
 
36
- def display_abstractive_summary(summary) -> None:
37
  st.subheader("Abstractive Summary")
38
  st.markdown('#####')
39
- st.markdown(summary)
 
40
 
41
- def display_extractive_summary(terms_and_conditions_sentences: list, summary_sentences: list) -> None:
42
  st.subheader("Extractive Summary")
43
  st.markdown('#####')
44
- terms_and_conditions = " ".join(sentence for sentence in terms_and_conditions_sentences)
45
- replaced_text = terms_and_conditions
46
  for sentence in summary_sentences:
47
  replaced_text = replaced_text.replace(sentence,
48
  f"<span style='background-color: #FFFF00'>{sentence}</span>")
 
49
  st.write(replaced_text, unsafe_allow_html=True)
50
 
51
  def is_valid_url(url: str) -> bool:
@@ -82,17 +83,14 @@ def main() -> None:
82
  if submit_button:
83
 
84
  if is_valid_url(tc_text_input):
85
- (all_sentences, extract_summary_sentences) = summarizer.extractive_summary_from_url(tc_text_input,
86
- sentences_length_input)
87
  else:
88
- (all_sentences, extract_summary_sentences) = summarizer.extractive_summary_from_text(tc_text_input,
89
- sentences_length_input)
90
 
91
- extract_summary = " ".join([sentence for sentence in extract_summary_sentences])
92
- abstract_summary = summarizer.abstractive_summary(extract_summary)
93
 
94
- display_extractive_summary(all_sentences, extract_summary_sentences)
95
- display_abstractive_summary(abstract_summary)
96
 
97
 
98
  if __name__ == "__main__":
 
33
  )
34
  return terms_and_conditions_pipeline
35
 
36
+ def display_abstractive_summary(summary_sentences: list) -> None:
37
  st.subheader("Abstractive Summary")
38
  st.markdown('#####')
39
+ for sentence in summary_sentences:
40
+ st.markdown(f"- {sentence}", unsafe_allow_html=True)
41
 
42
+ def display_extractive_summary(terms_and_conditions_text: list, summary_sentences: list) -> None:
43
  st.subheader("Extractive Summary")
44
  st.markdown('#####')
45
+ replaced_text = terms_and_conditions_text
 
46
  for sentence in summary_sentences:
47
  replaced_text = replaced_text.replace(sentence,
48
  f"<span style='background-color: #FFFF00'>{sentence}</span>")
49
+ replaced_text = replaced_text.replace('/\r?\n/g', '<br/>')
50
  st.write(replaced_text, unsafe_allow_html=True)
51
 
52
  def is_valid_url(url: str) -> bool:
 
83
  if submit_button:
84
 
85
  if is_valid_url(tc_text_input):
86
+ extract_summary_sentences = summarizer.extractive_summary_from_url(tc_text_input, sentences_length_input)
 
87
  else:
88
+ extract_summary_sentences = summarizer.extractive_summary_from_text(tc_text_input, sentences_length_input)
 
89
 
90
+ abstract_summary_list = summarizer.abstractive_summary(extract_summary_sentences)
 
91
 
92
+ display_abstractive_summary(abstract_summary_list)
93
+ display_extractive_summary(tc_text_input, extract_summary_sentences)
94
 
95
 
96
  if __name__ == "__main__":