ukrainian-tts / stress.py
Yurii Paniv
Apply stress model for words with vowel_count > 1
9e96240
raw history blame
No virus
3.09 kB
from unittest import skip
from gruut import sentences
import torch
importer = torch.package.PackageImporter("ukrainian-accentor/accentor-lite.pt")
accentor = importer.load_pickle("uk-accentor", "model")
replace_accents = importer.load_pickle("uk-accentor", "replace_accents")
# Using GPU
# accentor.cuda()
# Back to CPU
# accentor.cpu()
vowels = "аеєиіїоуюя"
consonants = "бвгґджзйклмнпрстфхцчшщь"
special = "'"
alphabet = vowels + consonants + special
def accent_word(word):
with torch.no_grad():
stressed_words = accentor.predict([word], mode='stress')
plused_words = [replace_accents(x) for x in stressed_words]
return plused_words[0]
def sentence_to_stress(sentence):
words = sentence.split()
words = sum([[word, " "] for word in words], start=[])
new_list = []
for word in words:
first_word_sep = list(map(lambda letter: letter in alphabet, word.lower()))
if all(first_word_sep):
new_list.append(word)
else:
current_index = 0
past_index = 0
for letter in first_word_sep:
if letter == False:
new_list.append(word[past_index:current_index])
new_list.append(word[current_index])
past_index = current_index + 1
current_index += 1
new_list.append(word[past_index:current_index])
#print(list(filter(lambda x: len(x) > 0, new_list)))
for word_index in range(0, len(new_list)):
element = new_list[word_index]
first_word_sep = list(map(lambda letter: letter in alphabet, element.lower()))
if not all(first_word_sep) or len(element) == 0:
continue
else:
vowels_in_words = list(map(lambda letter: letter in vowels, new_list[word_index]))
if vowels_in_words.count(True) == 0:
continue
elif vowels_in_words.count(True) == 1:
vowel_index = vowels_in_words.index(True)
new_list[word_index] = new_list[word_index][0:vowel_index] + "+" + new_list[word_index][vowel_index::]
else:
new_list[word_index] = accent_word(new_list[word_index])
return "".join(new_list)
if __name__ == "__main__":
sentence = "Кам'янець-Подільський - місто в Хмельницькій області України, центр Кам'янець-Подільської міської об'єднаної територіальної громади і Кам'янець-Подільського району."
print(sentence_to_stress(sentence))
sentence = "Привіт, як тебе звати?"
print(sentence_to_stress(sentence))
#test_words1 = ["словотворення", "архаїчний", "програма", "а-ля-фуршет"]
stressed_words = accentor.predict(["привіт"], mode='stress')
plused_words = [replace_accents(x) for x in stressed_words]
print('With stress:', stressed_words)
print('With pluses:', plused_words)