Spaces:
Runtime error
Runtime error
| from random import choice | |
| import pymorphy3 | |
| morph = pymorphy3.MorphAnalyzer() | |
| # The Token class takes in a word, previous word, text, sentence, and a boolean value and creates a | |
| # token object with attributes such as count, score, and contexts. | |
| class Token: | |
| def __init__(self, word, prev_word, text, sentence, starter = False, turbo = False): | |
| """ | |
| This function initializes a Token with various properties related to a given word and its context | |
| within a sentence. | |
| :param word: The current word being analyzed | |
| :param prev_word: The word that comes before the current word in the text | |
| :param text: a string containing the entire text to be analyzed | |
| :param sentence: a string representing a sentence in which the word and prev_word occur | |
| :param turbo: A boolean parameter that, when set to True, skips the morphological analysis of words | |
| in the sentence and simply adds all words to the context list. This can be useful for faster | |
| processing, but may result in less accurate context information, defaults to False (optional) | |
| """ | |
| self.word = word | |
| self.prev_word = prev_word | |
| self.count = text.count(prev_word + " " + word) | |
| self.score = 0 | |
| self.starter = starter | |
| self.contexts = [] | |
| for w in sentence.strip().split(): | |
| if turbo: | |
| self.contexts.append(w) | |
| continue | |
| result = morph.parse(w) | |
| if len(result) == 0: | |
| continue | |
| result = result[0] | |
| if 'LATN' in result.tag: | |
| continue | |
| if result.tag.POS == 'NOUN': | |
| self.contexts.append(w) | |
| self.contexts.append(result.normal_form) | |
| def __repr__(self): | |
| """ | |
| This function returns a string representation of a Token with information about the previous | |
| word, current word, number of matches, and number of contexts. | |
| :return: A string representation of a Token. | |
| """ | |
| return f"'{self.prev_word} > {self.word} ({'starter, ' if self.starter else ''}{self.count}m, {len(self.contexts)}c)'" |