text-generator-v2 / classes.py
Cat0125
add train tab, improve quality
8e637c7
from random import choice
import pymorphy3
morph = pymorphy3.MorphAnalyzer()
# The Token class takes in a word, previous word, text, sentence, and a boolean value and creates a
# token object with attributes such as count, score, and contexts.
class Token:
def __init__(self, word, prev_word, text, sentence, starter = False, turbo = False):
"""
This function initializes a Token with various properties related to a given word and its context
within a sentence.
:param word: The current word being analyzed
:param prev_word: The word that comes before the current word in the text
:param text: a string containing the entire text to be analyzed
:param sentence: a string representing a sentence in which the word and prev_word occur
:param turbo: A boolean parameter that, when set to True, skips the morphological analysis of words
in the sentence and simply adds all words to the context list. This can be useful for faster
processing, but may result in less accurate context information, defaults to False (optional)
"""
self.word = word
self.prev_word = prev_word
self.count = text.count(prev_word + " " + word)
self.score = 0
self.starter = starter
self.contexts = []
for w in sentence.strip().split():
if turbo:
self.contexts.append(w)
continue
result = morph.parse(w)
if len(result) == 0:
continue
result = result[0]
if 'LATN' in result.tag:
continue
if result.tag.POS == 'NOUN':
self.contexts.append(w)
self.contexts.append(result.normal_form)
def __repr__(self):
"""
This function returns a string representation of a Token with information about the previous
word, current word, number of matches, and number of contexts.
:return: A string representation of a Token.
"""
return f"'{self.prev_word} > {self.word} ({'starter, ' if self.starter else ''}{self.count}m, {len(self.contexts)}c)'"