Spaces:

robinhad
/

ukrainian-tts

Running

ukrainian-tts / stress.py

Yurii Paniv

Apply stress model for words with vowel_count > 1

9e96240 over 2 years ago

3.09 kB

	from unittest import skip
	from gruut import sentences
	import torch

	importer = torch.package.PackageImporter("ukrainian-accentor/accentor-lite.pt")
	accentor = importer.load_pickle("uk-accentor", "model")
	replace_accents = importer.load_pickle("uk-accentor", "replace_accents")

	# Using GPU
	# accentor.cuda()
	# Back to CPU
	# accentor.cpu()

	vowels = "аеєиіїоуюя"
	consonants = "бвгґджзйклмнпрстфхцчшщь"
	special = "'"
	alphabet = vowels + consonants + special

	def accent_word(word):
	with torch.no_grad():
	stressed_words = accentor.predict([word], mode='stress')
	plused_words = [replace_accents(x) for x in stressed_words]
	return plused_words[0]

	def sentence_to_stress(sentence):
	words = sentence.split()
	words = sum([[word, " "] for word in words], start=[])
	new_list = []
	for word in words:
	first_word_sep = list(map(lambda letter: letter in alphabet, word.lower()))
	if all(first_word_sep):
	new_list.append(word)
	else:
	current_index = 0
	past_index = 0
	for letter in first_word_sep:
	if letter == False:
	new_list.append(word[past_index:current_index])
	new_list.append(word[current_index])
	past_index = current_index + 1
	current_index += 1
	new_list.append(word[past_index:current_index])
	#print(list(filter(lambda x: len(x) > 0, new_list)))
	for word_index in range(0, len(new_list)):
	element = new_list[word_index]
	first_word_sep = list(map(lambda letter: letter in alphabet, element.lower()))
	if not all(first_word_sep) or len(element) == 0:
	continue
	else:
	vowels_in_words = list(map(lambda letter: letter in vowels, new_list[word_index]))
	if vowels_in_words.count(True) == 0:
	continue
	elif vowels_in_words.count(True) == 1:
	vowel_index = vowels_in_words.index(True)
	new_list[word_index] = new_list[word_index][0:vowel_index] + "+" + new_list[word_index][vowel_index::]
	else:
	new_list[word_index] = accent_word(new_list[word_index])

	return "".join(new_list)


	if __name__ == "__main__":
	sentence = "Кам'янець-Подільський - місто в Хмельницькій області України, центр Кам'янець-Подільської міської об'єднаної територіальної громади і Кам'янець-Подільського району."
	print(sentence_to_stress(sentence))
	sentence = "Привіт, як тебе звати?"
	print(sentence_to_stress(sentence))
	#test_words1 = ["словотворення", "архаїчний", "програма", "а-ля-фуршет"]

	stressed_words = accentor.predict(["привіт"], mode='stress')
	plused_words = [replace_accents(x) for x in stressed_words]

	print('With stress:', stressed_words)
	print('With pluses:', plused_words)