File size: 890 Bytes
837fdb6 |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 |
#import nltk
#nltk.download('punkt')
from sumy.parsers.html import HtmlParser
from sumy.parsers.plaintext import PlaintextParser
from sumy.nlp.tokenizers import Tokenizer
from sumy.summarizers.lex_rank import LexRankSummarizer
from sumy.nlp.stemmers import Stemmer
from sumy.utils import get_stop_words
def getSummary(text, nr_sentences):
summary=[]
LANGUAGE = "english"
SENTENCES_COUNT = nr_sentences
#parser = PlaintextParser.from_file("/Users/hujo/Downloads/Channel_Summaries/wholesaleted.srt.pnct.txt", Tokenizer(LANGUAGE))
parser = PlaintextParser.from_string(text, Tokenizer(LANGUAGE))
#print(parser.document)
stemmer = Stemmer(LANGUAGE)
summarizer = LexRankSummarizer(stemmer)
summarizer.stop_words = get_stop_words(LANGUAGE)
for sentence in summarizer(parser.document, SENTENCES_COUNT):
summary.append(sentence)
return summary
|