""" from https://github.com/PlayVoice/vits_chinese """ import pypinyin from pypinyin.contrib.neutral_tone import NeutralToneWith5Mixin from pypinyin.converter import DefaultConverter from pypinyin.core import Pinyin import numpy as np from vits.bert.prosody_tool import pinyin_dict from vits.bert import TTSProsody class MyConverter(NeutralToneWith5Mixin, DefaultConverter): pass def is_chinese(uchar): if uchar >= u'\u4e00' and uchar <= u'\u9fa5': return True else: return False def clean_chinese(text: str): text = text.strip() text_clean = [] for char in text: if (is_chinese(char)): text_clean.append(char) else: if len(text_clean) > 1 and is_chinese(text_clean[-1]): text_clean.append(',') text_clean = ''.join(text_clean).strip(',') return text_clean class VITS_PinYin: def __init__(self, bert_path, device): self.pinyin_parser = Pinyin(MyConverter()) self.prosody = TTSProsody(bert_path, device) def chinese_to_phonemes(self, text): # 考虑使用g2pw的chinese bert替换原始的pypinyin,目前测试下来运行速度太慢。 # 将标准中文文本符号替换成 bert 符号库中的单符号,以保证bert的效果. text = text.replace("——", "...") \ .replace("—", "...") \ .replace("……", "...") \ .replace("…", "...") \ .replace('“', '"') \ .replace('”', '"') \ .replace("\n", "") tokens = self.prosody.char_model.tokenizer.tokenize(text) text = ''.join(tokens) assert not tokens.count("[UNK]") pinyins = np.reshape(pypinyin.pinyin(text, style=pypinyin.TONE3), (-1)) try: phone_index = 0 phone_items = [] phone_items.append('sil') count_phone = [] count_phone.append(1) temp = "" len_pys = len(tokens) for word in tokens: if is_chinese(word): count_phone.append(2) if (phone_index >= len_pys): print( f"!!!![{text}]plz check ur text whether includes MULTIBYTE symbol.\ (请检查你的文本中是否包含多字节符号)") pinyin = pinyins[phone_index] phone_index = phone_index + 1 if not pinyin[-1].isdigit(): pinyin += "5" if pinyin[:-1] in pinyin_dict: tone = pinyin[-1] a = pinyin[:-1] a1, a2 = pinyin_dict[a] phone_items += [a1, a2 + tone] else: temp += word if temp == pinyins[phone_index]: temp = "" phone_index += 1 count_phone.append(1) phone_items.append('sp') count_phone.append(1) phone_items.append('sil') phone_items_str = ' '.join(phone_items) except IndexError as e: print('except:', e) text = f'[PAD]{text}[PAD]' char_embeds = self.prosody.get_char_embeds(text) char_embeds = self.prosody.expand_for_phone(char_embeds, count_phone) return phone_items_str, char_embeds