Spaces:

candlend
/

vits-hoshimi

Runtime error

App Files Files Community

candlend commited on Dec 17, 2022

Commit

0cdb167

•

1 Parent(s): 51a465c

remove opencc

Browse files

Files changed (5) hide show

requirements.txt +0 -1
vits/text/cantonese.py +0 -59
vits/text/cleaners.py +26 -26
vits/text/ngu_dialect.py +0 -29
vits/text/shanghainese.py +0 -64

requirements.txt CHANGED Viewed

@@ -15,7 +15,6 @@ ko-pron==1.3
 inflect==6.0.0
 eng-to-ipa==0.0.2
 num-thai==0.0.5
-opencc==1.1.1
 scikit-maad
 torch
 torchvision

 inflect==6.0.0
 eng-to-ipa==0.0.2
 num-thai==0.0.5
 scikit-maad
 torch
 torchvision

vits/text/cantonese.py DELETED Viewed

@@ -1,59 +0,0 @@
-import re
-import cn2an
-import opencc
-converter = opencc.OpenCC('jyutjyu')
-# List of (Latin alphabet, ipa) pairs:
-_latin_to_ipa = [(re.compile('%s' % x[0]), x[1]) for x in [
-    ('A', 'ei˥'),
-    ('B', 'biː˥'),
-    ('C', 'siː˥'),
-    ('D', 'tiː˥'),
-    ('E', 'iː˥'),
-    ('F', 'e˥fuː˨˩'),
-    ('G', 'tsiː˥'),
-    ('H', 'ɪk̚˥tsʰyː˨˩'),
-    ('I', 'ɐi˥'),
-    ('J', 'tsei˥'),
-    ('K', 'kʰei˥'),
-    ('L', 'e˥llou˨˩'),
-    ('M', 'ɛːm˥'),
-    ('N', 'ɛːn˥'),
-    ('O', 'ou˥'),
-    ('P', 'pʰiː˥'),
-    ('Q', 'kʰiːu˥'),
-    ('R', 'aː˥lou˨˩'),
-    ('S', 'ɛː˥siː˨˩'),
-    ('T', 'tʰiː˥'),
-    ('U', 'juː˥'),
-    ('V', 'wiː˥'),
-    ('W', 'tʊk̚˥piː˥juː˥'),
-    ('X', 'ɪk̚˥siː˨˩'),
-    ('Y', 'waːi˥'),
-    ('Z', 'iː˨sɛːt̚˥')
-]]
-def number_to_cantonese(text):
-    return re.sub(r'\d+(?:\.?\d+)?', lambda x: cn2an.an2cn(x.group()), text)
-def latin_to_ipa(text):
-    for regex, replacement in _latin_to_ipa:
-        text = re.sub(regex, replacement, text)
-    return text
-def cantonese_to_ipa(text):
-    text = number_to_cantonese(text.upper())
-    text = converter.convert(text).replace('-','').replace('$',' ')
-    text = re.sub(r'[A-Z]', lambda x: latin_to_ipa(x.group())+' ', text)
-    text = re.sub(r'[、；：]', '，', text)
-    text = re.sub(r'\s*，\s*', ', ', text)
-    text = re.sub(r'\s*。\s*', '. ', text)
-    text = re.sub(r'\s*？\s*', '? ', text)
-    text = re.sub(r'\s*！\s*', '! ', text)
-    text = re.sub(r'\s*$', '', text)
-    return text

vits/text/cleaners.py CHANGED Viewed

@@ -7,7 +7,7 @@ from text.english import english_to_lazy_ipa, english_to_ipa2, english_to_lazy_i
 from text.thai import num_to_thai, latin_to_thai
 # from text.shanghainese import shanghainese_to_ipa
 # from text.cantonese import cantonese_to_ipa
-from text.ngu_dialect import ngu_dialect_to_ipa
 def japanese_cleaners(text):
@@ -149,28 +149,28 @@ def thai_cleaners(text):
     return text
-def shanghainese_cleaners(text):
-    text = shanghainese_to_ipa(text)
-    if re.match(r'[^\.,!\?\-…~]', text[-1]):
-        text += '.'
-    return text
-def chinese_dialect_cleaners(text):
-    text = re.sub(r'\[MD\](.*?)\[MD\]',
-                  lambda x: chinese_to_ipa2(x.group(1))+' ', text)
-    text = re.sub(r'\[TW\](.*?)\[TW\]',
-                  lambda x: chinese_to_ipa2(x.group(1), True)+' ', text)
-    text = re.sub(r'\[JA\](.*?)\[JA\]',
-                  lambda x: japanese_to_ipa3(x.group(1)).replace('Q', 'ʔ')+' ', text)
-    text = re.sub(r'\[SH\](.*?)\[SH\]', lambda x: shanghainese_to_ipa(x.group(1)).replace('1', '˥˧').replace('5',
-                  '˧˧˦').replace('6', '˩˩˧').replace('7', '˥').replace('8', '˩˨').replace('ᴀ', 'ɐ').replace('ᴇ', 'e')+' ', text)
-    text = re.sub(r'\[GD\](.*?)\[GD\]',
-                  lambda x: cantonese_to_ipa(x.group(1))+' ', text)
-    text = re.sub(r'\[EN\](.*?)\[EN\]',
-                  lambda x: english_to_lazy_ipa2(x.group(1))+' ', text)
-    text = re.sub(r'\[([A-Z]{2})\](.*?)\[\1\]', lambda x: ngu_dialect_to_ipa(x.group(2), x.group(
-        1)).replace('ʣ', 'dz').replace('ʥ', 'dʑ').replace('ʦ', 'ts').replace('ʨ', 'tɕ')+' ', text)
-    text = re.sub(r'\s+$', '', text)
-    text = re.sub(r'([^\.,!\?\-…~])$', r'\1.', text)
-    return text

 from text.thai import num_to_thai, latin_to_thai
 # from text.shanghainese import shanghainese_to_ipa
 # from text.cantonese import cantonese_to_ipa
+# from text.ngu_dialect import ngu_dialect_to_ipa
 def japanese_cleaners(text):
     return text
+# def shanghainese_cleaners(text):
+#     text = shanghainese_to_ipa(text)
+#     if re.match(r'[^\.,!\?\-…~]', text[-1]):
+#         text += '.'
+#     return text
+# def chinese_dialect_cleaners(text):
+#     text = re.sub(r'\[MD\](.*?)\[MD\]',
+#                   lambda x: chinese_to_ipa2(x.group(1))+' ', text)
+#     text = re.sub(r'\[TW\](.*?)\[TW\]',
+#                   lambda x: chinese_to_ipa2(x.group(1), True)+' ', text)
+#     text = re.sub(r'\[JA\](.*?)\[JA\]',
+#                   lambda x: japanese_to_ipa3(x.group(1)).replace('Q', 'ʔ')+' ', text)
+#     text = re.sub(r'\[SH\](.*?)\[SH\]', lambda x: shanghainese_to_ipa(x.group(1)).replace('1', '˥˧').replace('5',
+#                   '˧˧˦').replace('6', '˩˩˧').replace('7', '˥').replace('8', '˩˨').replace('ᴀ', 'ɐ').replace('ᴇ', 'e')+' ', text)
+#     text = re.sub(r'\[GD\](.*?)\[GD\]',
+#                   lambda x: cantonese_to_ipa(x.group(1))+' ', text)
+#     text = re.sub(r'\[EN\](.*?)\[EN\]',
+#                   lambda x: english_to_lazy_ipa2(x.group(1))+' ', text)
+#     text = re.sub(r'\[([A-Z]{2})\](.*?)\[\1\]', lambda x: ngu_dialect_to_ipa(x.group(2), x.group(
+#         1)).replace('ʣ', 'dz').replace('ʥ', 'dʑ').replace('ʦ', 'ts').replace('ʨ', 'tɕ')+' ', text)
+#     text = re.sub(r'\s+$', '', text)
+#     text = re.sub(r'([^\.,!\?\-…~])$', r'\1.', text)
+#     return text

vits/text/ngu_dialect.py DELETED Viewed

@@ -1,29 +0,0 @@
-import re
-import opencc
-dialects = {'SZ': 'suzhou', 'WX': 'wuxi', 'CZ': 'changzhou', 'HZ': 'hangzhou',
-            'SX': 'shaoxing', 'NB': 'ningbo', 'JJ': 'jingjiang', 'YX': 'yixing',
-            'JD': 'jiading', 'ZR': 'zhenru', 'PH': 'pinghu', 'TX': 'tongxiang',
-            'JS': 'jiashan', 'XS': 'xiashi', 'LP': 'linping', 'XS': 'xiaoshan',
-            'FY': 'fuyang', 'RA': 'ruao', 'CX': 'cixi', 'SM': 'sanmen', 'TT': 'tiantai'}
-converters = {}
-for dialect in dialects.values():
-    try:
-        converters[dialect] = opencc.OpenCC(dialect)
-    except:
-        pass
-def ngu_dialect_to_ipa(text, dialect):
-    dialect = dialects[dialect]
-    text = converters[dialect].convert(text).replace('$',' ')
-    text = re.sub(r'[、；：]', '，', text)
-    text = re.sub(r'\s*，\s*', ', ', text)
-    text = re.sub(r'\s*。\s*', '. ', text)
-    text = re.sub(r'\s*？\s*', '? ', text)
-    text = re.sub(r'\s*！\s*', '! ', text)
-    text = re.sub(r'\s*$', '', text)
-    return text

vits/text/shanghainese.py DELETED Viewed

@@ -1,64 +0,0 @@
-import os, sys, re
-import cn2an
-import opencc
-converter = opencc.OpenCC('zaonhe')
-# List of (Latin alphabet, ipa) pairs:
-_latin_to_ipa = [(re.compile('%s' % x[0]), x[1]) for x in [
-    ('A', 'ᴇ'),
-    ('B', 'bi'),
-    ('C', 'si'),
-    ('D', 'di'),
-    ('E', 'i'),
-    ('F', 'ᴇf'),
-    ('G', 'dʑi'),
-    ('H', 'ᴇtɕʰ'),
-    ('I', 'ᴀi'),
-    ('J', 'dʑᴇ'),
-    ('K', 'kʰᴇ'),
-    ('L', 'ᴇl'),
-    ('M', 'ᴇm'),
-    ('N', 'ᴇn'),
-    ('O', 'o'),
-    ('P', 'pʰi'),
-    ('Q', 'kʰiu'),
-    ('R', 'ᴀl'),
-    ('S', 'ᴇs'),
-    ('T', 'tʰi'),
-    ('U', 'ɦiu'),
-    ('V', 'vi'),
-    ('W', 'dᴀbɤliu'),
-    ('X', 'ᴇks'),
-    ('Y', 'uᴀi'),
-    ('Z', 'zᴇ')
-]]
-def _number_to_shanghainese(num):
-    num = cn2an.an2cn(num).replace('一十','十').replace('二十', '廿').replace('二', '两')
-    return re.sub(r'(?:(?:^|[^三四五六七八九])十|廿)两', lambda x: x.group()[:-1]+'二', num)
-def number_to_shanghainese(text):
-    return re.sub(r'\d+(?:\.?\d+)?', lambda x: _number_to_shanghainese(x.group()), text)
-def latin_to_ipa(text):
-    for regex, replacement in _latin_to_ipa:
-        text = re.sub(regex, replacement, text)
-    return text
-def shanghainese_to_ipa(text):
-    text = number_to_shanghainese(text.upper())
-    text = converter.convert(text).replace('-','').replace('$',' ')
-    text = re.sub(r'[A-Z]', lambda x: latin_to_ipa(x.group())+' ', text)
-    text = re.sub(r'[、；：]', '，', text)
-    text = re.sub(r'\s*，\s*', ', ', text)
-    text = re.sub(r'\s*。\s*', '. ', text)
-    text = re.sub(r'\s*？\s*', '? ', text)
-    text = re.sub(r'\s*！\s*', '! ', text)
-    text = re.sub(r'\s*$', '', text)
-    return text