Spaces:

wldmr
/

lexrank-gr

Sleeping

lexrank-gr / lexrank.py

lex2

4b8cf81 almost 2 years ago

1.47 kB

	#import nltk
	#nltk.download('punkt')

	from sumy.parsers.html import HtmlParser
	from sumy.parsers.plaintext import PlaintextParser
	from sumy.nlp.tokenizers import Tokenizer
	from sumy.summarizers.lex_rank import LexRankSummarizer
	from sumy.nlp.stemmers import Stemmer
	from sumy.utils import get_stop_words

	def get_Summary(in_text):

	sentences = in_text.split('. ')
	# summarize small part of the text
	nr_sentences = 1 #len(sentences)
	print('nr_sentences: '+str(nr_sentences))

	if nr_sentences == 0:
	return 'Error: No sentences available', None
	list_summary = get_Lexrank(in_text,nr_sentences)
	# it can happen that for lexrank a sentence consists of multiple actual sentences,
	# that are separated with full stops. Then the correspoinding timestamp cannot be found
	# all items from the lexrank summary must be concatinated and split up by full stops.
	concat_list_summary = '. '.join([str(item).replace('.','') for item in list_summary])#.split('. ')

	return concat_list_summary

	def get_Lexrank(text, nr_sentences):
	summary=[]
	LANGUAGE = "english"
	SENTENCES_COUNT = nr_sentences
	parser = PlaintextParser.from_string(text, Tokenizer(LANGUAGE))
	stemmer = Stemmer(LANGUAGE)
	summarizer = LexRankSummarizer(stemmer)
	summarizer.stop_words = get_stop_words(LANGUAGE)
	for sentence in summarizer(parser.document, SENTENCES_COUNT):
	summary.append(sentence)

	return summary