sdhanabal1 commited on
Commit
8d4dd5e
1 Parent(s): 90f2ef6

Add extractive summary information using LSA

Browse files
Files changed (2) hide show
  1. app.py +75 -20
  2. requirements.txt +4 -2
app.py CHANGED
@@ -2,13 +2,31 @@ from textwrap import wrap
2
  from transformers import pipeline
3
  import streamlit as st
4
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
5
  st.markdown('# Terms & conditions abstractive summarization model :pencil:')
6
- st.write('This app summarizes the provided terms & conditions.')
 
7
  st.write('Information about the model :point_right: https://huggingface.co/ml6team/distilbart-tos-summarizer-tosdr')
8
 
9
  st.markdown("""
10
  To use this:
11
- - Copy terms & conditions and hit 'Summarize':point_down:""")
 
 
12
 
13
 
14
  @st.cache(allow_output_mutation=True,
@@ -26,31 +44,68 @@ def load_model():
26
 
27
  tc_pipeline = load_model()
28
 
29
- if 'text' not in st.session_state:
30
- st.session_state['text'] = ""
 
 
 
31
 
32
  st.header("Input")
33
- form = st.form(key='terms-and-conditions')
34
- placeholder = form.empty()
35
- placeholder.empty()
36
- tc_text = placeholder.text_area(
37
- value=st.session_state.text,
38
- label='Terms & conditions text:',
39
- key='tc_text',
40
- height=240
41
-
42
- )
43
- submit_button = form.form_submit_button(label='Summarize')
 
44
 
45
  st.header("Output")
46
 
47
- if submit_button:
48
- base_text = st.session_state.tc_text
49
- output_text = " ".join([result['summary_text'] for result in tc_pipeline(wrap(base_text, 2048))])
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
50
  st.markdown('#####')
51
  st.text_area(
52
- value=output_text,
53
- label="Summary",
54
  height=240
55
  )
56
 
 
 
 
 
 
 
 
 
 
 
 
 
2
  from transformers import pipeline
3
  import streamlit as st
4
 
5
+ from sumy.parsers.plaintext import PlaintextParser
6
+ from sumy.nlp.tokenizers import Tokenizer
7
+ from sumy.nlp.stemmers import Stemmer
8
+ from sumy.summarizers.lsa import LsaSummarizer
9
+ from sumy.utils import get_stop_words
10
+
11
+ import nltk
12
+ nltk.download('punkt')
13
+
14
+ DEFAULT_LANGUAGE = "english"
15
+ DEFAULT_EXTRACTED_ARTICLE_SENTENCES_LENGTH = 10
16
+ stemmer = Stemmer(DEFAULT_LANGUAGE)
17
+ lsa_summarizer = LsaSummarizer(stemmer)
18
+ lsa_summarizer.stop_words = get_stop_words(language=DEFAULT_LANGUAGE)
19
+
20
  st.markdown('# Terms & conditions abstractive summarization model :pencil:')
21
+ st.write('This app provides the abstract summary of the provided terms & conditions. '
22
+ 'The abstractive summarization is preceded by LSA (Latent Semantic Analysis) extractive summarization')
23
  st.write('Information about the model :point_right: https://huggingface.co/ml6team/distilbart-tos-summarizer-tosdr')
24
 
25
  st.markdown("""
26
  To use this:
27
+ - Number of sentences to be extracted is configurable
28
+ - Copy terms & conditions and hit 'Summarize'
29
+ """)
30
 
31
 
32
  @st.cache(allow_output_mutation=True,
 
44
 
45
  tc_pipeline = load_model()
46
 
47
+ if 'tc_text' not in st.session_state:
48
+ st.session_state['tc_text'] = ""
49
+
50
+ if 'sentences_length' not in st.session_state:
51
+ st.session_state['sentences_length'] = DEFAULT_EXTRACTED_ARTICLE_SENTENCES_LENGTH
52
 
53
  st.header("Input")
54
+ with st.form(key='terms-and-conditions'):
55
+ sentences_length_input = st.number_input(
56
+ label='Number of sentences to be extracted:',
57
+ min_value=1,
58
+ value=st.session_state.sentences_length
59
+ )
60
+ tc_text_input = st.text_area(
61
+ value=st.session_state.tc_text,
62
+ label='Terms & conditions text:',
63
+ height=240
64
+ )
65
+ submit_button = st.form_submit_button(label='Summarize')
66
 
67
  st.header("Output")
68
 
69
+
70
+ def generate_abstractive_summary(summary) -> str:
71
+ summary_text = " ".join([result['summary_text'] for result in tc_pipeline(wrap(summary, 2048))])
72
+ return summary_text
73
+
74
+
75
+ def generate_extractive_summary(text, sentences_count: int) -> str:
76
+ parser = PlaintextParser.from_string(text, Tokenizer(DEFAULT_LANGUAGE))
77
+ summarized_sentences = lsa_summarizer(parser.document, sentences_count)
78
+ summarized_text = " ".join([sentence._text for sentence in summarized_sentences])
79
+ return summarized_text
80
+
81
+
82
+ def display_abstractive_summary(summary) -> None:
83
+ st.subheader("Abstractive Summary")
84
+ st.markdown('#####')
85
+ st.text_area(
86
+ value=summary,
87
+ label='',
88
+ height=240
89
+ )
90
+
91
+
92
+ def display_extractive_summary(summary) -> None:
93
+ st.subheader("Extractive Summary")
94
  st.markdown('#####')
95
  st.text_area(
96
+ value=summary,
97
+ label='',
98
  height=240
99
  )
100
 
101
+
102
+ if submit_button:
103
+ tc_text = tc_text_input
104
+ sentences_length = sentences_length_input
105
+
106
+ extract_summary = generate_extractive_summary(tc_text, sentences_length)
107
+ abstract_summary = generate_abstractive_summary(extract_summary)
108
+
109
+ display_extractive_summary(extract_summary)
110
+ display_abstractive_summary(abstract_summary)
111
+
requirements.txt CHANGED
@@ -1,5 +1,7 @@
1
  nlpaug==1.1.7
2
- streamlit==1.0.0
3
  torch==1.9.1
4
  torchvision==0.10.1
5
- transformers==4.10.3
 
 
 
1
  nlpaug==1.1.7
2
+ streamlit
3
  torch==1.9.1
4
  torchvision==0.10.1
5
+ transformers
6
+ sumy==0.9.0
7
+ nltk