Spaces:

Cat125
/

text-generator-v2

Running

text-generator-v2 / classes.py

Cat0125

add train tab, improve quality

8e637c7 almost 2 years ago

2.17 kB

	from random import choice

	import pymorphy3

	morph = pymorphy3.MorphAnalyzer()

	# The Token class takes in a word, previous word, text, sentence, and a boolean value and creates a
	# token object with attributes such as count, score, and contexts.
	class Token:
	def __init__(self, word, prev_word, text, sentence, starter = False, turbo = False):
	"""
	This function initializes a Token with various properties related to a given word and its context
	within a sentence.

	:param word: The current word being analyzed
	:param prev_word: The word that comes before the current word in the text
	:param text: a string containing the entire text to be analyzed
	:param sentence: a string representing a sentence in which the word and prev_word occur
	:param turbo: A boolean parameter that, when set to True, skips the morphological analysis of words
	in the sentence and simply adds all words to the context list. This can be useful for faster
	processing, but may result in less accurate context information, defaults to False (optional)
	"""
	self.word = word
	self.prev_word = prev_word
	self.count = text.count(prev_word + " " + word)
	self.score = 0
	self.starter = starter
	self.contexts = []
	for w in sentence.strip().split():
	if turbo:
	self.contexts.append(w)
	continue
	result = morph.parse(w)
	if len(result) == 0:
	continue
	result = result[0]
	if 'LATN' in result.tag:
	continue
	if result.tag.POS == 'NOUN':
	self.contexts.append(w)
	self.contexts.append(result.normal_form)

	def __repr__(self):
	"""
	This function returns a string representation of a Token with information about the previous
	word, current word, number of matches, and number of contexts.
	:return: A string representation of a Token.
	"""
	return f"'{self.prev_word} > {self.word} ({'starter, ' if self.starter else ''}{self.count}m, {len(self.contexts)}c)'"