Spaces:
Running
Running
from random import choice | |
import pymorphy3 | |
morph = pymorphy3.MorphAnalyzer() | |
# The Token class takes in a word, previous word, text, sentence, and a boolean value and creates a | |
# token object with attributes such as count, score, and contexts. | |
class Token: | |
def __init__(self, word, prev_word, text, sentence, starter = False, turbo = False): | |
""" | |
This function initializes a Token with various properties related to a given word and its context | |
within a sentence. | |
:param word: The current word being analyzed | |
:param prev_word: The word that comes before the current word in the text | |
:param text: a string containing the entire text to be analyzed | |
:param sentence: a string representing a sentence in which the word and prev_word occur | |
:param turbo: A boolean parameter that, when set to True, skips the morphological analysis of words | |
in the sentence and simply adds all words to the context list. This can be useful for faster | |
processing, but may result in less accurate context information, defaults to False (optional) | |
""" | |
self.word = word | |
self.prev_word = prev_word | |
self.count = text.count(prev_word + " " + word) | |
self.score = 0 | |
self.starter = starter | |
self.contexts = [] | |
for w in sentence.strip().split(): | |
if turbo: | |
self.contexts.append(w) | |
continue | |
result = morph.parse(w) | |
if len(result) == 0: | |
continue | |
result = result[0] | |
if 'LATN' in result.tag: | |
continue | |
if result.tag.POS == 'NOUN': | |
self.contexts.append(w) | |
self.contexts.append(result.normal_form) | |
def __repr__(self): | |
""" | |
This function returns a string representation of a Token with information about the previous | |
word, current word, number of matches, and number of contexts. | |
:return: A string representation of a Token. | |
""" | |
return f"'{self.prev_word} > {self.word} ({'starter, ' if self.starter else ''}{self.count}m, {len(self.contexts)}c)'" |