|
|
|
|
|
|
|
from sumy.parsers.html import HtmlParser |
|
from sumy.parsers.plaintext import PlaintextParser |
|
from sumy.nlp.tokenizers import Tokenizer |
|
from sumy.summarizers.lex_rank import LexRankSummarizer |
|
from sumy.nlp.stemmers import Stemmer |
|
from sumy.utils import get_stop_words |
|
|
|
def getSummary(text, nr_sentences): |
|
summary=[] |
|
LANGUAGE = "english" |
|
SENTENCES_COUNT = nr_sentences |
|
|
|
parser = PlaintextParser.from_string(text, Tokenizer(LANGUAGE)) |
|
|
|
stemmer = Stemmer(LANGUAGE) |
|
summarizer = LexRankSummarizer(stemmer) |
|
summarizer.stop_words = get_stop_words(LANGUAGE) |
|
for sentence in summarizer(parser.document, SENTENCES_COUNT): |
|
summary.append(sentence) |
|
|
|
return summary |
|
|