Spaces:

Pratick
/

Summarizer

Runtime error

Summarizer / app.py

Update app.py

df24c7f almost 2 years ago

2.37 kB

	from sklearn.feature_extraction.text import TfidfVectorizer
	from spacy.lang.en import English
	import numpy as np
	import gradio as gr

	nlp = English()
	nlp.create_pipe('sentencizer')


	def summarizer(text, tokenizer, max_sent_in_summary=5):
	# Create spacy document for further sentence level tokenization
	doc = nlp(text.replace("\n", ""))
	sentences = [sent.text.strip() for sent in doc.sents]
	# Let's create an organizer which will store the sentence ordering to later reorganize the
	# scored sentences in their correct order
	sentence_organizer = {k:v for v,k in enumerate(sentences)}
	# Let's now create a tf-idf (Term frequnecy Inverse Document Frequency) model
	tf_idf_vectorizer = TfidfVectorizer(min_df=2, max_features=None,
	strip_accents='unicode',
	analyzer='word',
	token_pattern=r'\w{1,}',
	ngram_range=(1, 3),
	use_idf=1,smooth_idf=1,
	sublinear_tf=1,
	stop_words = 'english')
	# Passing our sentences treating each as one document to TF-IDF vectorizer
	tf_idf_vectorizer.fit(sentences)
	# Transforming our sentences to TF-IDF vectors
	sentence_vectors = tf_idf_vectorizer.transform(sentences)
	# Getting sentence scores for each sentences
	sentence_scores = np.array(sentence_vectors.sum(axis=1)).ravel()
	# Getting top-n sentences
	N = max_sent_in_summary
	top_n_sentences = [sentences[ind] for ind in np.argsort(sentence_scores, axis=0)[::-1][:N]]
	# Let's now do the sentence ordering using our prebaked sentence_organizer
	# Let's map the scored sentences with their indexes
	mapped_top_n_sentences = [(sentence,sentence_organizer[sentence]) for sentence in top_n_sentences]
	# Ordering our top-n sentences in their original ordering
	mapped_top_n_sentences = sorted(mapped_top_n_sentences, key = lambda x: x[1])
	ordered_scored_sentences = [element[0] for element in mapped_top_n_sentences]
	# Our final summary
	summary = " ".join(ordered_scored_sentences)
	return summary

	demo = gr.Interface(
	fn=summarizer,
	inputs=["text","text"],
	outputs="text",
	)
	demo.launch(debug=True)