""" from https://github.com/keithito/tacotron """ from text import cleaners #from text.symbols import symbols from viphoneme import syms, vi2IPA_split symbols = syms # Mappings from symbol to numeric ID and vice versa: _symbol_to_id = {s: i for i, s in enumerate(symbols)} _id_to_symbol = {i: s for i, s in enumerate(symbols)} def text_to_sequence(text, cleaner_names): sequence = [] text = text.replace('\s+',' ').lower() phon = vi2IPA_split(text,"/") phon = phon.split("/")[1:] eol = -1 for i,p in reversed(list(enumerate(phon))): if p not in ["..",""," ","."," "]: eol = i break phones = phon[:i+1]+[" ","."] phones_id =[] for i in phones: if i in _symbol_to_id: phones_id.append(_symbol_to_id[i]) #phones_id = [_symbol_to_id[i] for i in phones] sequence.extend(phones_id) return sequence def cleaned_text_to_sequence(cleaned_text): sequence = [] phon = cleaned_text.split("/")[1:] eol = -1 for i,p in reversed(list(enumerate(phon))): if p not in ["..",""," ","."," "]: eol = i break phones = phon[:i+1]+[" ","."] phones_id =[] for i in phones: if i in _symbol_to_id: phones_id.append(_symbol_to_id[i]) #phones_id = [_symbol_to_id[i] for i in phones] sequence.extend(phones_id) return sequence def sequence_to_text(sequence): result = '' for symbol_id in sequence: if symbol_id in _id_to_symbol: result += _id_to_symbol[symbol_id] return result def _clean_text(text, cleaner_names): for name in cleaner_names: cleaner = getattr(cleaners, name) if not cleaner: raise Exception("Unknown cleaner: %s" % name) text = cleaner(text) return text