Pratick commited on
Commit
889674b
1 Parent(s): 6913547

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +6 -49
app.py CHANGED
@@ -1,50 +1,7 @@
1
- from sklearn.feature_extraction.text import TfidfVectorizer
2
- from spacy.lang.en import English
3
- import numpy as np
4
  import gradio as gr
5
-
6
- nlp = English()
7
- nlp.create_pipe('sentencizer')
8
-
9
-
10
- def summarizer(text, tokenizer, max_sent_in_summary=5):
11
- # Create spacy document for further sentence level tokenization
12
- doc = nlp(text.replace("\n", ""))
13
- sentences = [sent.text.strip() for sent in doc.sents]
14
- # Let's create an organizer which will store the sentence ordering to later reorganize the
15
- # scored sentences in their correct order
16
- sentence_organizer = {k:v for v,k in enumerate(sentences)}
17
- # Let's now create a tf-idf (Term frequnecy Inverse Document Frequency) model
18
- tf_idf_vectorizer = TfidfVectorizer(min_df=2, max_features=None,
19
- strip_accents='unicode',
20
- analyzer='word',
21
- token_pattern=r'\w{1,}',
22
- ngram_range=(1, 3),
23
- use_idf=1,smooth_idf=1,
24
- sublinear_tf=1,
25
- stop_words = 'english')
26
- # Passing our sentences treating each as one document to TF-IDF vectorizer
27
- tf_idf_vectorizer.fit(sentences)
28
- # Transforming our sentences to TF-IDF vectors
29
- sentence_vectors = tf_idf_vectorizer.transform(sentences)
30
- # Getting sentence scores for each sentences
31
- sentence_scores = np.array(sentence_vectors.sum(axis=1)).ravel()
32
- # Getting top-n sentences
33
- N = max_sent_in_summary
34
- top_n_sentences = [sentences[ind] for ind in np.argsort(sentence_scores, axis=0)[::-1][:N]]
35
- # Let's now do the sentence ordering using our prebaked sentence_organizer
36
- # Let's map the scored sentences with their indexes
37
- mapped_top_n_sentences = [(sentence,sentence_organizer[sentence]) for sentence in top_n_sentences]
38
- # Ordering our top-n sentences in their original ordering
39
- mapped_top_n_sentences = sorted(mapped_top_n_sentences, key = lambda x: x[1])
40
- ordered_scored_sentences = [element[0] for element in mapped_top_n_sentences]
41
- # Our final summary
42
- summary = " ".join(ordered_scored_sentences)
43
- return summary
44
-
45
- demo = gr.Interface(
46
- fn=summarizer,
47
- inputs=["text","text"],
48
- outputs="text",
49
- )
50
- demo.launch(debug=True)
 
 
 
1
  import gradio as gr
2
+ title = 'Text Summarization'
3
+ text_ = "The tower is 324 metres (1,063 ft) tall, about the same height as an 81-storey building, and the tallest structure in Paris. Its base is square, measuring 125 metres (410 ft) on each side. During its construction, the Eiffel Tower surpassed the Washington Monument to become the tallest man-made structure in the world, a title it held for 41 years until the Chrysler Building in New York City was finished in 1930. It was the first structure to reach a height of 300 metres. Due to the addition of a broadcasting aerial at the top of the tower in 1957, it is now taller than the Chrysler Building by 5.2 metres (17 ft). Excluding transmitters, the Eiffel Tower is the second tallest free-standing structure in France after the Millau Viaduct."
4
+ interface = gr.Interface.load("huggingface/facebook/bart-large-cnn",
5
+ title = title,
6
+ theme = "peach",
7
+ examples = [[text_]]).launch()