Spaces:
Runtime error
Runtime error
File size: 4,029 Bytes
4d940bf 96d3c78 4d940bf b1eaf99 4d940bf 96d3c78 4d940bf 96d3c78 06e6ec2 24cb4f9 96d3c78 8e3a4c7 4d940bf 7272581 4d940bf 7272581 4d940bf 7272581 4d940bf 695cbcc 4d940bf 7272581 4d940bf 7272581 4d940bf b2e388a 4d940bf 96d3c78 7f35581 96d3c78 e7ac810 96d3c78 e7ac810 7f35581 77cd680 96d3c78 |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 |
import gradio as gr
from gradio.mix import Parallel, Series
import wikipedia
import spacy
from spacy.lang.en.stop_words import STOP_WORDS
from string import punctuation
import nltk
nltk.download('wordnet', quiet=True)
nltk.download('punkt', quiet=True)
from nltk.stem import WordNetLemmatizer
from heapq import nlargest
import warnings
from sklearn.feature_extraction.text import TfidfVectorizer
import numpy as np
warnings.filterwarnings("ignore")
def get_wiki_original_text(inp):
text = wikipedia.summary(inp)
return text
def get_wiki_summary_by_lem(inp):
text = wikipedia.summary(inp)
print(text)
stopwords = list(STOP_WORDS)
lemmatizer = WordNetLemmatizer()
tokens = [lemmatizer.lemmatize(str(token).lower()) for token in nltk.word_tokenize(text) if str(token) not in punctuation and str(token).lower() not in stopwords and len(token) >1]
word_counts = {}
for token in tokens:
if token in word_counts.keys():
word_counts[token] += 1
else:
word_counts[token] = 1
sentence_scores = {}
for sentence in nltk.sent_tokenize(text):
sentence_scores[sentence] = 0
for wrd in nltk.word_tokenize(sentence):
if lemmatizer.lemmatize(str(wrd).lower()) in word_counts.keys():
sentence_scores[sentence] += word_counts[lemmatizer.lemmatize(str(wrd).lower())]
summary_length = 0
if len(sentence_scores) > 5 :
summary_length = int(len(sentence_scores)*0.20)
else:
summary_length = int(len(sentence_scores)*0.50)
summary = str()
for sentence in nltk.sent_tokenize(text):
for i in range(0,summary_length):
if str(sentence).find(str(nlargest(summary_length, sentence_scores, key = sentence_scores.get)[i])) == 0:
summary += str(sentence).replace('\n','')
summary += ' '
print('\033[1m' + "Summarized Text" + '\033[0m')
return summary
def get_wiki_summary_by_tfidf(inp):
text = wikipedia.summary(inp)
tfidf_vectorizer = TfidfVectorizer(ngram_range=(1,3))
all_sentences = [str(sent) for sent in nltk.sent_tokenize(text)]
sentence_vectors = tfidf_vectorizer.fit_transform(all_sentences)
sentence_scores_vector = np.hstack(np.array(sentence_vectors.sum(axis=1)))
sentence_scores = dict(zip(all_sentences, sentence_scores_vector))
summary_length = 0
if len(sentence_scores) > 5 :
summary_length = int(len(sentence_scores)*0.20)
else:
summary_length = int(len(sentence_scores)*0.50)
summary = str()
for sentence in nltk.sent_tokenize(text):
for i in range(0,summary_length):
if str(sentence).find(str(nlargest(summary_length, sentence_scores, key = sentence_scores.get)[i])) == 0:
summary += str(sentence).replace('\n','')
summary += ' '
return summary
desc = """This interface allows you to summarize Wikipedia contents. Only requirement is to write the topic and it collects content by fetching from Wikipedia. For summarization this model uses 2 different extractive summarization methods and the number of sentences in the output depends on the length of the original text."""
sample = [['Europe'],['Great Depression'],['Crocodile Dundee']]
iface = Parallel(gr.Interface(fn=get_wiki_original_text, inputs=gr.inputs.Textbox(label="Text"), outputs="text", description='Original Text'),
gr.Interface(fn=get_wiki_summary_by_lem, inputs=gr.inputs.Textbox(label="Text"), outputs="text", description='Summary 1'),
gr.Interface(fn=get_wiki_summary_by_tfidf, inputs=gr.inputs.Textbox(label="Text"), outputs="text", description='Summary 2'),
title= 'Text Summarizer',
description = desc,
examples=sample,
inputs = gr.inputs.Textbox(label="Text"))
iface.launch(inline = False) |