vits-simple-api / vits /text /mandarin.py
Artrajz's picture
update
dc13618
raw
history blame
8.93 kB
import config
import re
from pypinyin import lazy_pinyin, BOPOMOFO
import jieba
import cn2an
import logging
logging.getLogger('jieba').setLevel(logging.WARNING)
jieba.set_dictionary(config.ABS_PATH + '/vits/text/jieba/dict.txt')
jieba.initialize()
# List of (Latin alphabet, bopomofo) pairs:
_latin_to_bopomofo = [(re.compile('%s' % x[0], re.IGNORECASE), x[1]) for x in [
('a', 'ㄟˉ'),
('b', 'ㄅㄧˋ'),
('c', 'ㄙㄧˉ'),
('d', 'ㄉㄧˋ'),
('e', 'ㄧˋ'),
('f', 'ㄝˊㄈㄨˋ'),
('g', 'ㄐㄧˋ'),
('h', 'ㄝˇㄑㄩˋ'),
('i', 'ㄞˋ'),
('j', 'ㄐㄟˋ'),
('k', 'ㄎㄟˋ'),
('l', 'ㄝˊㄛˋ'),
('m', 'ㄝˊㄇㄨˋ'),
('n', 'ㄣˉ'),
('o', 'ㄡˉ'),
('p', 'ㄆㄧˉ'),
('q', 'ㄎㄧㄡˉ'),
('r', 'ㄚˋ'),
('s', 'ㄝˊㄙˋ'),
('t', 'ㄊㄧˋ'),
('u', 'ㄧㄡˉ'),
('v', 'ㄨㄧˉ'),
('w', 'ㄉㄚˋㄅㄨˋㄌㄧㄡˋ'),
('x', 'ㄝˉㄎㄨˋㄙˋ'),
('y', 'ㄨㄞˋ'),
('z', 'ㄗㄟˋ')
]]
# List of (bopomofo, romaji) pairs:
_bopomofo_to_romaji = [(re.compile('%s' % x[0]), x[1]) for x in [
('ㄅㄛ', 'p⁼wo'),
('ㄆㄛ', 'pʰwo'),
('ㄇㄛ', 'mwo'),
('ㄈㄛ', 'fwo'),
('ㄅ', 'p⁼'),
('ㄆ', 'pʰ'),
('ㄇ', 'm'),
('ㄈ', 'f'),
('ㄉ', 't⁼'),
('ㄊ', 'tʰ'),
('ㄋ', 'n'),
('ㄌ', 'l'),
('ㄍ', 'k⁼'),
('ㄎ', 'kʰ'),
('ㄏ', 'h'),
('ㄐ', 'ʧ⁼'),
('ㄑ', 'ʧʰ'),
('ㄒ', 'ʃ'),
('ㄓ', 'ʦ`⁼'),
('ㄔ', 'ʦ`ʰ'),
('ㄕ', 's`'),
('ㄖ', 'ɹ`'),
('ㄗ', 'ʦ⁼'),
('ㄘ', 'ʦʰ'),
('ㄙ', 's'),
('ㄚ', 'a'),
('ㄛ', 'o'),
('ㄜ', 'ə'),
('ㄝ', 'e'),
('ㄞ', 'ai'),
('ㄟ', 'ei'),
('ㄠ', 'au'),
('ㄡ', 'ou'),
('ㄧㄢ', 'yeNN'),
('ㄢ', 'aNN'),
('ㄧㄣ', 'iNN'),
('ㄣ', 'əNN'),
('ㄤ', 'aNg'),
('ㄧㄥ', 'iNg'),
('ㄨㄥ', 'uNg'),
('ㄩㄥ', 'yuNg'),
('ㄥ', 'əNg'),
('ㄦ', 'əɻ'),
('ㄧ', 'i'),
('ㄨ', 'u'),
('ㄩ', 'ɥ'),
('ˉ', '→'),
('ˊ', '↑'),
('ˇ', '↓↑'),
('ˋ', '↓'),
('˙', ''),
(',', ','),
('。', '.'),
('!', '!'),
('?', '?'),
('—', '-')
]]
# List of (romaji, ipa) pairs:
_romaji_to_ipa = [(re.compile('%s' % x[0], re.IGNORECASE), x[1]) for x in [
('ʃy', 'ʃ'),
('ʧʰy', 'ʧʰ'),
('ʧ⁼y', 'ʧ⁼'),
('NN', 'n'),
('Ng', 'ŋ'),
('y', 'j'),
('h', 'x')
]]
# List of (bopomofo, ipa) pairs:
_bopomofo_to_ipa = [(re.compile('%s' % x[0]), x[1]) for x in [
('ㄅㄛ', 'p⁼wo'),
('ㄆㄛ', 'pʰwo'),
('ㄇㄛ', 'mwo'),
('ㄈㄛ', 'fwo'),
('ㄅ', 'p⁼'),
('ㄆ', 'pʰ'),
('ㄇ', 'm'),
('ㄈ', 'f'),
('ㄉ', 't⁼'),
('ㄊ', 'tʰ'),
('ㄋ', 'n'),
('ㄌ', 'l'),
('ㄍ', 'k⁼'),
('ㄎ', 'kʰ'),
('ㄏ', 'x'),
('ㄐ', 'tʃ⁼'),
('ㄑ', 'tʃʰ'),
('ㄒ', 'ʃ'),
('ㄓ', 'ts`⁼'),
('ㄔ', 'ts`ʰ'),
('ㄕ', 's`'),
('ㄖ', 'ɹ`'),
('ㄗ', 'ts⁼'),
('ㄘ', 'tsʰ'),
('ㄙ', 's'),
('ㄚ', 'a'),
('ㄛ', 'o'),
('ㄜ', 'ə'),
('ㄝ', 'ɛ'),
('ㄞ', 'aɪ'),
('ㄟ', 'eɪ'),
('ㄠ', 'ɑʊ'),
('ㄡ', 'oʊ'),
('ㄧㄢ', 'jɛn'),
('ㄩㄢ', 'ɥæn'),
('ㄢ', 'an'),
('ㄧㄣ', 'in'),
('ㄩㄣ', 'ɥn'),
('ㄣ', 'ən'),
('ㄤ', 'ɑŋ'),
('ㄧㄥ', 'iŋ'),
('ㄨㄥ', 'ʊŋ'),
('ㄩㄥ', 'jʊŋ'),
('ㄥ', 'əŋ'),
('ㄦ', 'əɻ'),
('ㄧ', 'i'),
('ㄨ', 'u'),
('ㄩ', 'ɥ'),
('ˉ', '→'),
('ˊ', '↑'),
('ˇ', '↓↑'),
('ˋ', '↓'),
('˙', ''),
(',', ','),
('。', '.'),
('!', '!'),
('?', '?'),
('—', '-')
]]
# List of (bopomofo, ipa2) pairs:
_bopomofo_to_ipa2 = [(re.compile('%s' % x[0]), x[1]) for x in [
('ㄅㄛ', 'pwo'),
('ㄆㄛ', 'pʰwo'),
('ㄇㄛ', 'mwo'),
('ㄈㄛ', 'fwo'),
('ㄅ', 'p'),
('ㄆ', 'pʰ'),
('ㄇ', 'm'),
('ㄈ', 'f'),
('ㄉ', 't'),
('ㄊ', 'tʰ'),
('ㄋ', 'n'),
('ㄌ', 'l'),
('ㄍ', 'k'),
('ㄎ', 'kʰ'),
('ㄏ', 'h'),
('ㄐ', 'tɕ'),
('ㄑ', 'tɕʰ'),
('ㄒ', 'ɕ'),
('ㄓ', 'tʂ'),
('ㄔ', 'tʂʰ'),
('ㄕ', 'ʂ'),
('ㄖ', 'ɻ'),
('ㄗ', 'ts'),
('ㄘ', 'tsʰ'),
('ㄙ', 's'),
('ㄚ', 'a'),
('ㄛ', 'o'),
('ㄜ', 'ɤ'),
('ㄝ', 'ɛ'),
('ㄞ', 'aɪ'),
('ㄟ', 'eɪ'),
('ㄠ', 'ɑʊ'),
('ㄡ', 'oʊ'),
('ㄧㄢ', 'jɛn'),
('ㄩㄢ', 'yæn'),
('ㄢ', 'an'),
('ㄧㄣ', 'in'),
('ㄩㄣ', 'yn'),
('ㄣ', 'ən'),
('ㄤ', 'ɑŋ'),
('ㄧㄥ', 'iŋ'),
('ㄨㄥ', 'ʊŋ'),
('ㄩㄥ', 'jʊŋ'),
('ㄥ', 'ɤŋ'),
('ㄦ', 'əɻ'),
('ㄧ', 'i'),
('ㄨ', 'u'),
('ㄩ', 'y'),
('ˉ', '˥'),
('ˊ', '˧˥'),
('ˇ', '˨˩˦'),
('ˋ', '˥˩'),
('˙', ''),
(',', ','),
('。', '.'),
('!', '!'),
('?', '?'),
('—', '-')
]]
_symbols_to_chinese = [(re.compile(f'{x[0]}'), x[1]) for x in [
('([0-9]+(?:\.?[0-9]+)?)%', r'百分之\1'),
('([0-9]+)/([0-9]+)', r'\2分之\1'),
('\+', r'加'),
('([0-9]+)-([0-9]+)', r'\1减\2'),
('×', r'乘以'),
('([0-9]+)x([0-9]+)', r'\1乘以\2'),
('([0-9]+)\*([0-9]+)', r'\1乘以\2'),
('÷', r'除以'),
('=', r'等于'),
('≠', r'不等于'),
]]
def symbols_to_chinese(text):
for regex, replacement in _symbols_to_chinese:
text = re.sub(regex, replacement, text)
return text
def number_to_chinese(text):
numbers = re.findall(r'[0-9]+(?:\.?[0-9]+)?', text)
for number in numbers:
text = text.replace(number, cn2an.an2cn(number), 1)
return text
def number_transform_to_chinese(text):
text = cn2an.transform(text, "an2cn")
return text
def chinese_to_bopomofo(text):
text = text.replace('、', ',').replace(';', ',').replace(':', ',')
words = jieba.lcut(text, cut_all=False)
text = ''
for word in words:
bopomofos = lazy_pinyin(word, BOPOMOFO)
if not re.search('[\u4e00-\u9fff]', word):
text += word
continue
for i in range(len(bopomofos)):
bopomofos[i] = re.sub(r'([\u3105-\u3129])$', r'\1ˉ', bopomofos[i])
if text != '':
text += ' '
text += ''.join(bopomofos)
return text
def latin_to_bopomofo(text):
for regex, replacement in _latin_to_bopomofo:
text = re.sub(regex, replacement, text)
return text
def bopomofo_to_romaji(text):
for regex, replacement in _bopomofo_to_romaji:
text = re.sub(regex, replacement, text)
return text
def bopomofo_to_ipa(text):
for regex, replacement in _bopomofo_to_ipa:
text = re.sub(regex, replacement, text)
return text
def bopomofo_to_ipa2(text):
for regex, replacement in _bopomofo_to_ipa2:
text = re.sub(regex, replacement, text)
return text
def chinese_to_romaji(text):
text = symbols_to_chinese(text)
text = number_transform_to_chinese(text)
text = chinese_to_bopomofo(text)
text = latin_to_bopomofo(text)
text = bopomofo_to_romaji(text)
text = re.sub('i([aoe])', r'y\1', text)
text = re.sub('u([aoəe])', r'w\1', text)
text = re.sub('([ʦsɹ]`[⁼ʰ]?)([→↓↑ ]+|$)',
r'\1ɹ`\2', text).replace('ɻ', 'ɹ`')
text = re.sub('([ʦs][⁼ʰ]?)([→↓↑ ]+|$)', r'\1ɹ\2', text)
return text
def chinese_to_lazy_ipa(text):
text = chinese_to_romaji(text)
for regex, replacement in _romaji_to_ipa:
text = re.sub(regex, replacement, text)
return text
def chinese_to_ipa(text):
text = symbols_to_chinese(text)
text = number_transform_to_chinese(text)
text = chinese_to_bopomofo(text)
text = latin_to_bopomofo(text)
text = bopomofo_to_ipa(text)
text = re.sub('i([aoe])', r'j\1', text)
text = re.sub('u([aoəe])', r'w\1', text)
text = re.sub('([sɹ]`[⁼ʰ]?)([→↓↑ ]+|$)',
r'\1ɹ`\2', text).replace('ɻ', 'ɹ`')
text = re.sub('([s][⁼ʰ]?)([→↓↑ ]+|$)', r'\1ɹ\2', text)
return text
def chinese_to_ipa2(text):
text = symbols_to_chinese(text)
text = number_transform_to_chinese(text)
text = chinese_to_bopomofo(text)
text = latin_to_bopomofo(text)
text = bopomofo_to_ipa2(text)
text = re.sub(r'i([aoe])', r'j\1', text)
text = re.sub(r'u([aoəe])', r'w\1', text)
text = re.sub(r'([ʂɹ]ʰ?)([˩˨˧˦˥ ]+|$)', r'\1ʅ\2', text)
text = re.sub(r'(sʰ?)([˩˨˧˦˥ ]+|$)', r'\1ɿ\2', text)
return text
def VITS_PinYin_model():
import torch
import config
from vits.text.vits_pinyin import VITS_PinYin
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
# pinyin
tts_front = VITS_PinYin(f"{config.ABS_PATH}/vits/bert", device)
return tts_front