Spaces:

ml6team
/

distilbart-tos-summarizer-tosdr

Build error

App Files Files Community

distilbart-tos-summarizer-tosdr / app.py

sdhanabal1

Add extractive summary information using LSA

8d4dd5e almost 3 years ago

raw

history blame

3.49 kB

	from textwrap import wrap
	from transformers import pipeline
	import streamlit as st

	from sumy.parsers.plaintext import PlaintextParser
	from sumy.nlp.tokenizers import Tokenizer
	from sumy.nlp.stemmers import Stemmer
	from sumy.summarizers.lsa import LsaSummarizer
	from sumy.utils import get_stop_words

	import nltk
	nltk.download('punkt')

	DEFAULT_LANGUAGE = "english"
	DEFAULT_EXTRACTED_ARTICLE_SENTENCES_LENGTH = 10
	stemmer = Stemmer(DEFAULT_LANGUAGE)
	lsa_summarizer = LsaSummarizer(stemmer)
	lsa_summarizer.stop_words = get_stop_words(language=DEFAULT_LANGUAGE)

	st.markdown('# Terms & conditions abstractive summarization model :pencil:')
	st.write('This app provides the abstract summary of the provided terms & conditions. '
	'The abstractive summarization is preceded by LSA (Latent Semantic Analysis) extractive summarization')
	st.write('Information about the model :point_right: https://huggingface.co/ml6team/distilbart-tos-summarizer-tosdr')

	st.markdown("""
	To use this:
	- Number of sentences to be extracted is configurable
	- Copy terms & conditions and hit 'Summarize'
	""")


	@st.cache(allow_output_mutation=True,
	suppress_st_warning=True,
	show_spinner=False)
	def load_model():
	with st.spinner('Please wait for the model to load...'):
	terms_and_conditions_pipeline = pipeline(
	task='summarization',
	model='ml6team/distilbart-tos-summarizer-tosdr',
	tokenizer='ml6team/distilbart-tos-summarizer-tosdr'
	)
	return terms_and_conditions_pipeline


	tc_pipeline = load_model()

	if 'tc_text' not in st.session_state:
	st.session_state['tc_text'] = ""

	if 'sentences_length' not in st.session_state:
	st.session_state['sentences_length'] = DEFAULT_EXTRACTED_ARTICLE_SENTENCES_LENGTH

	st.header("Input")
	with st.form(key='terms-and-conditions'):
	sentences_length_input = st.number_input(
	label='Number of sentences to be extracted:',
	min_value=1,
	value=st.session_state.sentences_length
	)
	tc_text_input = st.text_area(
	value=st.session_state.tc_text,
	label='Terms & conditions text:',
	height=240
	)
	submit_button = st.form_submit_button(label='Summarize')

	st.header("Output")


	def generate_abstractive_summary(summary) -> str:
	summary_text = " ".join([result['summary_text'] for result in tc_pipeline(wrap(summary, 2048))])
	return summary_text


	def generate_extractive_summary(text, sentences_count: int) -> str:
	parser = PlaintextParser.from_string(text, Tokenizer(DEFAULT_LANGUAGE))
	summarized_sentences = lsa_summarizer(parser.document, sentences_count)
	summarized_text = " ".join([sentence._text for sentence in summarized_sentences])
	return summarized_text


	def display_abstractive_summary(summary) -> None:
	st.subheader("Abstractive Summary")
	st.markdown('#####')
	st.text_area(
	value=summary,
	label='',
	height=240
	)


	def display_extractive_summary(summary) -> None:
	st.subheader("Extractive Summary")
	st.markdown('#####')
	st.text_area(
	value=summary,
	label='',
	height=240
	)


	if submit_button:
	tc_text = tc_text_input
	sentences_length = sentences_length_input

	extract_summary = generate_extractive_summary(tc_text, sentences_length)
	abstract_summary = generate_abstractive_summary(extract_summary)

	display_extractive_summary(extract_summary)
	display_abstractive_summary(abstract_summary)