Spaces:
Runtime error
Runtime error
from sklearn.feature_extraction.text import TfidfVectorizer | |
from spacy.lang.en import English | |
import numpy as np | |
import gradio as gr | |
nlp = English() | |
nlp.create_pipe('sentencizer') | |
def summarizer(text, tokenizer, max_sent_in_summary=5): | |
# Create spacy document for further sentence level tokenization | |
doc = nlp(text.replace("\n", "")) | |
sentences = [sent.text.strip() for sent in doc.sents] | |
# Let's create an organizer which will store the sentence ordering to later reorganize the | |
# scored sentences in their correct order | |
sentence_organizer = {k:v for v,k in enumerate(sentences)} | |
# Let's now create a tf-idf (Term frequnecy Inverse Document Frequency) model | |
tf_idf_vectorizer = TfidfVectorizer(min_df=2, max_features=None, | |
strip_accents='unicode', | |
analyzer='word', | |
token_pattern=r'\w{1,}', | |
ngram_range=(1, 3), | |
use_idf=1,smooth_idf=1, | |
sublinear_tf=1, | |
stop_words = 'english') | |
# Passing our sentences treating each as one document to TF-IDF vectorizer | |
tf_idf_vectorizer.fit(sentences) | |
# Transforming our sentences to TF-IDF vectors | |
sentence_vectors = tf_idf_vectorizer.transform(sentences) | |
# Getting sentence scores for each sentences | |
sentence_scores = np.array(sentence_vectors.sum(axis=1)).ravel() | |
# Getting top-n sentences | |
N = max_sent_in_summary | |
top_n_sentences = [sentences[ind] for ind in np.argsort(sentence_scores, axis=0)[::-1][:N]] | |
# Let's now do the sentence ordering using our prebaked sentence_organizer | |
# Let's map the scored sentences with their indexes | |
mapped_top_n_sentences = [(sentence,sentence_organizer[sentence]) for sentence in top_n_sentences] | |
# Ordering our top-n sentences in their original ordering | |
mapped_top_n_sentences = sorted(mapped_top_n_sentences, key = lambda x: x[1]) | |
ordered_scored_sentences = [element[0] for element in mapped_top_n_sentences] | |
# Our final summary | |
summary = " ".join(ordered_scored_sentences) | |
return summary | |
demo = gr.Interface( | |
fn=summarizer, | |
inputs=["text","text"], | |
outputs="text", | |
) | |
demo.launch(debug=True) | |