Spaces:
Running
Running
from unittest import skip | |
from gruut import sentences | |
import torch | |
importer = torch.package.PackageImporter("ukrainian-accentor/accentor-lite.pt") | |
accentor = importer.load_pickle("uk-accentor", "model") | |
replace_accents = importer.load_pickle("uk-accentor", "replace_accents") | |
# Using GPU | |
# accentor.cuda() | |
# Back to CPU | |
# accentor.cpu() | |
vowels = "аеєиіїоуюя" | |
consonants = "бвгґджзйклмнпрстфхцчшщь" | |
special = "'" | |
alphabet = vowels + consonants + special | |
def accent_word(word): | |
with torch.no_grad(): | |
stressed_words = accentor.predict([word], mode='stress') | |
plused_words = [replace_accents(x) for x in stressed_words] | |
return plused_words[0] | |
def sentence_to_stress(sentence): | |
words = sentence.split() | |
words = sum([[word, " "] for word in words], start=[]) | |
new_list = [] | |
for word in words: | |
first_word_sep = list(map(lambda letter: letter in alphabet, word.lower())) | |
if all(first_word_sep): | |
new_list.append(word) | |
else: | |
current_index = 0 | |
past_index = 0 | |
for letter in first_word_sep: | |
if letter == False: | |
new_list.append(word[past_index:current_index]) | |
new_list.append(word[current_index]) | |
past_index = current_index + 1 | |
current_index += 1 | |
new_list.append(word[past_index:current_index]) | |
#print(list(filter(lambda x: len(x) > 0, new_list))) | |
for word_index in range(0, len(new_list)): | |
element = new_list[word_index] | |
first_word_sep = list(map(lambda letter: letter in alphabet, element.lower())) | |
if not all(first_word_sep) or len(element) == 0: | |
continue | |
else: | |
vowels_in_words = list(map(lambda letter: letter in vowels, new_list[word_index])) | |
if vowels_in_words.count(True) == 0: | |
continue | |
elif vowels_in_words.count(True) == 1: | |
vowel_index = vowels_in_words.index(True) | |
new_list[word_index] = new_list[word_index][0:vowel_index] + "+" + new_list[word_index][vowel_index::] | |
else: | |
new_list[word_index] = accent_word(new_list[word_index]) | |
return "".join(new_list) | |
if __name__ == "__main__": | |
sentence = "Кам'янець-Подільський - місто в Хмельницькій області України, центр Кам'янець-Подільської міської об'єднаної територіальної громади і Кам'янець-Подільського району." | |
print(sentence_to_stress(sentence)) | |
sentence = "Привіт, як тебе звати?" | |
print(sentence_to_stress(sentence)) | |
#test_words1 = ["словотворення", "архаїчний", "програма", "а-ля-фуршет"] | |
stressed_words = accentor.predict(["привіт"], mode='stress') | |
plused_words = [replace_accents(x) for x in stressed_words] | |
print('With stress:', stressed_words) | |
print('With pluses:', plused_words) | |