Serhiy Stetskovych
New multispeaker model
3d2700d
raw
history blame contribute delete
No virus
491 Bytes
from pflow.text.textnormalizer import norm
from ukrainian_word_stress import Stressifier
import regex
import re
from ipa_uk import ipa
stressify = Stressifier(stress_symbol="ˈ")
_whitespace_re = re.compile(r"\s+")
def collapse_whitespace(text):
return re.sub(_whitespace_re, " ", text)
def ukr_cleaners(text):
text = collapse_whitespace(text)
text = norm(text).lower()
text = regex.sub(r'[^\ˈ\p{L}\p{N}\?\!\,\.\-\: ]', '', text)
return ipa(stressify(text), False)