Spaces:
Runtime error
Runtime error
candlend
commited on
Commit
•
0cdb167
1
Parent(s):
51a465c
remove opencc
Browse files- requirements.txt +0 -1
- vits/text/cantonese.py +0 -59
- vits/text/cleaners.py +26 -26
- vits/text/ngu_dialect.py +0 -29
- vits/text/shanghainese.py +0 -64
requirements.txt
CHANGED
@@ -15,7 +15,6 @@ ko-pron==1.3
|
|
15 |
inflect==6.0.0
|
16 |
eng-to-ipa==0.0.2
|
17 |
num-thai==0.0.5
|
18 |
-
opencc==1.1.1
|
19 |
scikit-maad
|
20 |
torch
|
21 |
torchvision
|
|
|
15 |
inflect==6.0.0
|
16 |
eng-to-ipa==0.0.2
|
17 |
num-thai==0.0.5
|
|
|
18 |
scikit-maad
|
19 |
torch
|
20 |
torchvision
|
vits/text/cantonese.py
DELETED
@@ -1,59 +0,0 @@
|
|
1 |
-
import re
|
2 |
-
import cn2an
|
3 |
-
import opencc
|
4 |
-
|
5 |
-
|
6 |
-
converter = opencc.OpenCC('jyutjyu')
|
7 |
-
|
8 |
-
# List of (Latin alphabet, ipa) pairs:
|
9 |
-
_latin_to_ipa = [(re.compile('%s' % x[0]), x[1]) for x in [
|
10 |
-
('A', 'ei˥'),
|
11 |
-
('B', 'biː˥'),
|
12 |
-
('C', 'siː˥'),
|
13 |
-
('D', 'tiː˥'),
|
14 |
-
('E', 'iː˥'),
|
15 |
-
('F', 'e˥fuː˨˩'),
|
16 |
-
('G', 'tsiː˥'),
|
17 |
-
('H', 'ɪk̚˥tsʰyː˨˩'),
|
18 |
-
('I', 'ɐi˥'),
|
19 |
-
('J', 'tsei˥'),
|
20 |
-
('K', 'kʰei˥'),
|
21 |
-
('L', 'e˥llou˨˩'),
|
22 |
-
('M', 'ɛːm˥'),
|
23 |
-
('N', 'ɛːn˥'),
|
24 |
-
('O', 'ou˥'),
|
25 |
-
('P', 'pʰiː˥'),
|
26 |
-
('Q', 'kʰiːu˥'),
|
27 |
-
('R', 'aː˥lou˨˩'),
|
28 |
-
('S', 'ɛː˥siː˨˩'),
|
29 |
-
('T', 'tʰiː˥'),
|
30 |
-
('U', 'juː˥'),
|
31 |
-
('V', 'wiː˥'),
|
32 |
-
('W', 'tʊk̚˥piː˥juː˥'),
|
33 |
-
('X', 'ɪk̚˥siː˨˩'),
|
34 |
-
('Y', 'waːi˥'),
|
35 |
-
('Z', 'iː˨sɛːt̚˥')
|
36 |
-
]]
|
37 |
-
|
38 |
-
|
39 |
-
def number_to_cantonese(text):
|
40 |
-
return re.sub(r'\d+(?:\.?\d+)?', lambda x: cn2an.an2cn(x.group()), text)
|
41 |
-
|
42 |
-
|
43 |
-
def latin_to_ipa(text):
|
44 |
-
for regex, replacement in _latin_to_ipa:
|
45 |
-
text = re.sub(regex, replacement, text)
|
46 |
-
return text
|
47 |
-
|
48 |
-
|
49 |
-
def cantonese_to_ipa(text):
|
50 |
-
text = number_to_cantonese(text.upper())
|
51 |
-
text = converter.convert(text).replace('-','').replace('$',' ')
|
52 |
-
text = re.sub(r'[A-Z]', lambda x: latin_to_ipa(x.group())+' ', text)
|
53 |
-
text = re.sub(r'[、;:]', ',', text)
|
54 |
-
text = re.sub(r'\s*,\s*', ', ', text)
|
55 |
-
text = re.sub(r'\s*。\s*', '. ', text)
|
56 |
-
text = re.sub(r'\s*?\s*', '? ', text)
|
57 |
-
text = re.sub(r'\s*!\s*', '! ', text)
|
58 |
-
text = re.sub(r'\s*$', '', text)
|
59 |
-
return text
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
vits/text/cleaners.py
CHANGED
@@ -7,7 +7,7 @@ from text.english import english_to_lazy_ipa, english_to_ipa2, english_to_lazy_i
|
|
7 |
from text.thai import num_to_thai, latin_to_thai
|
8 |
# from text.shanghainese import shanghainese_to_ipa
|
9 |
# from text.cantonese import cantonese_to_ipa
|
10 |
-
from text.ngu_dialect import ngu_dialect_to_ipa
|
11 |
|
12 |
|
13 |
def japanese_cleaners(text):
|
@@ -149,28 +149,28 @@ def thai_cleaners(text):
|
|
149 |
return text
|
150 |
|
151 |
|
152 |
-
def shanghainese_cleaners(text):
|
153 |
-
|
154 |
-
|
155 |
-
|
156 |
-
|
157 |
-
|
158 |
-
|
159 |
-
def chinese_dialect_cleaners(text):
|
160 |
-
|
161 |
-
|
162 |
-
|
163 |
-
|
164 |
-
|
165 |
-
|
166 |
-
|
167 |
-
|
168 |
-
|
169 |
-
|
170 |
-
|
171 |
-
|
172 |
-
|
173 |
-
|
174 |
-
|
175 |
-
|
176 |
-
|
|
|
7 |
from text.thai import num_to_thai, latin_to_thai
|
8 |
# from text.shanghainese import shanghainese_to_ipa
|
9 |
# from text.cantonese import cantonese_to_ipa
|
10 |
+
# from text.ngu_dialect import ngu_dialect_to_ipa
|
11 |
|
12 |
|
13 |
def japanese_cleaners(text):
|
|
|
149 |
return text
|
150 |
|
151 |
|
152 |
+
# def shanghainese_cleaners(text):
|
153 |
+
# text = shanghainese_to_ipa(text)
|
154 |
+
# if re.match(r'[^\.,!\?\-…~]', text[-1]):
|
155 |
+
# text += '.'
|
156 |
+
# return text
|
157 |
+
|
158 |
+
|
159 |
+
# def chinese_dialect_cleaners(text):
|
160 |
+
# text = re.sub(r'\[MD\](.*?)\[MD\]',
|
161 |
+
# lambda x: chinese_to_ipa2(x.group(1))+' ', text)
|
162 |
+
# text = re.sub(r'\[TW\](.*?)\[TW\]',
|
163 |
+
# lambda x: chinese_to_ipa2(x.group(1), True)+' ', text)
|
164 |
+
# text = re.sub(r'\[JA\](.*?)\[JA\]',
|
165 |
+
# lambda x: japanese_to_ipa3(x.group(1)).replace('Q', 'ʔ')+' ', text)
|
166 |
+
# text = re.sub(r'\[SH\](.*?)\[SH\]', lambda x: shanghainese_to_ipa(x.group(1)).replace('1', '˥˧').replace('5',
|
167 |
+
# '˧˧˦').replace('6', '˩˩˧').replace('7', '˥').replace('8', '˩˨').replace('ᴀ', 'ɐ').replace('ᴇ', 'e')+' ', text)
|
168 |
+
# text = re.sub(r'\[GD\](.*?)\[GD\]',
|
169 |
+
# lambda x: cantonese_to_ipa(x.group(1))+' ', text)
|
170 |
+
# text = re.sub(r'\[EN\](.*?)\[EN\]',
|
171 |
+
# lambda x: english_to_lazy_ipa2(x.group(1))+' ', text)
|
172 |
+
# text = re.sub(r'\[([A-Z]{2})\](.*?)\[\1\]', lambda x: ngu_dialect_to_ipa(x.group(2), x.group(
|
173 |
+
# 1)).replace('ʣ', 'dz').replace('ʥ', 'dʑ').replace('ʦ', 'ts').replace('ʨ', 'tɕ')+' ', text)
|
174 |
+
# text = re.sub(r'\s+$', '', text)
|
175 |
+
# text = re.sub(r'([^\.,!\?\-…~])$', r'\1.', text)
|
176 |
+
# return text
|
vits/text/ngu_dialect.py
DELETED
@@ -1,29 +0,0 @@
|
|
1 |
-
import re
|
2 |
-
import opencc
|
3 |
-
|
4 |
-
|
5 |
-
dialects = {'SZ': 'suzhou', 'WX': 'wuxi', 'CZ': 'changzhou', 'HZ': 'hangzhou',
|
6 |
-
'SX': 'shaoxing', 'NB': 'ningbo', 'JJ': 'jingjiang', 'YX': 'yixing',
|
7 |
-
'JD': 'jiading', 'ZR': 'zhenru', 'PH': 'pinghu', 'TX': 'tongxiang',
|
8 |
-
'JS': 'jiashan', 'XS': 'xiashi', 'LP': 'linping', 'XS': 'xiaoshan',
|
9 |
-
'FY': 'fuyang', 'RA': 'ruao', 'CX': 'cixi', 'SM': 'sanmen', 'TT': 'tiantai'}
|
10 |
-
|
11 |
-
converters = {}
|
12 |
-
|
13 |
-
for dialect in dialects.values():
|
14 |
-
try:
|
15 |
-
converters[dialect] = opencc.OpenCC(dialect)
|
16 |
-
except:
|
17 |
-
pass
|
18 |
-
|
19 |
-
|
20 |
-
def ngu_dialect_to_ipa(text, dialect):
|
21 |
-
dialect = dialects[dialect]
|
22 |
-
text = converters[dialect].convert(text).replace('$',' ')
|
23 |
-
text = re.sub(r'[、;:]', ',', text)
|
24 |
-
text = re.sub(r'\s*,\s*', ', ', text)
|
25 |
-
text = re.sub(r'\s*。\s*', '. ', text)
|
26 |
-
text = re.sub(r'\s*?\s*', '? ', text)
|
27 |
-
text = re.sub(r'\s*!\s*', '! ', text)
|
28 |
-
text = re.sub(r'\s*$', '', text)
|
29 |
-
return text
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
vits/text/shanghainese.py
DELETED
@@ -1,64 +0,0 @@
|
|
1 |
-
import os, sys, re
|
2 |
-
import cn2an
|
3 |
-
import opencc
|
4 |
-
|
5 |
-
|
6 |
-
converter = opencc.OpenCC('zaonhe')
|
7 |
-
|
8 |
-
# List of (Latin alphabet, ipa) pairs:
|
9 |
-
_latin_to_ipa = [(re.compile('%s' % x[0]), x[1]) for x in [
|
10 |
-
('A', 'ᴇ'),
|
11 |
-
('B', 'bi'),
|
12 |
-
('C', 'si'),
|
13 |
-
('D', 'di'),
|
14 |
-
('E', 'i'),
|
15 |
-
('F', 'ᴇf'),
|
16 |
-
('G', 'dʑi'),
|
17 |
-
('H', 'ᴇtɕʰ'),
|
18 |
-
('I', 'ᴀi'),
|
19 |
-
('J', 'dʑᴇ'),
|
20 |
-
('K', 'kʰᴇ'),
|
21 |
-
('L', 'ᴇl'),
|
22 |
-
('M', 'ᴇm'),
|
23 |
-
('N', 'ᴇn'),
|
24 |
-
('O', 'o'),
|
25 |
-
('P', 'pʰi'),
|
26 |
-
('Q', 'kʰiu'),
|
27 |
-
('R', 'ᴀl'),
|
28 |
-
('S', 'ᴇs'),
|
29 |
-
('T', 'tʰi'),
|
30 |
-
('U', 'ɦiu'),
|
31 |
-
('V', 'vi'),
|
32 |
-
('W', 'dᴀbɤliu'),
|
33 |
-
('X', 'ᴇks'),
|
34 |
-
('Y', 'uᴀi'),
|
35 |
-
('Z', 'zᴇ')
|
36 |
-
]]
|
37 |
-
|
38 |
-
|
39 |
-
def _number_to_shanghainese(num):
|
40 |
-
num = cn2an.an2cn(num).replace('一十','十').replace('二十', '廿').replace('二', '两')
|
41 |
-
return re.sub(r'(?:(?:^|[^三四五六七八九])十|廿)两', lambda x: x.group()[:-1]+'二', num)
|
42 |
-
|
43 |
-
|
44 |
-
def number_to_shanghainese(text):
|
45 |
-
return re.sub(r'\d+(?:\.?\d+)?', lambda x: _number_to_shanghainese(x.group()), text)
|
46 |
-
|
47 |
-
|
48 |
-
def latin_to_ipa(text):
|
49 |
-
for regex, replacement in _latin_to_ipa:
|
50 |
-
text = re.sub(regex, replacement, text)
|
51 |
-
return text
|
52 |
-
|
53 |
-
|
54 |
-
def shanghainese_to_ipa(text):
|
55 |
-
text = number_to_shanghainese(text.upper())
|
56 |
-
text = converter.convert(text).replace('-','').replace('$',' ')
|
57 |
-
text = re.sub(r'[A-Z]', lambda x: latin_to_ipa(x.group())+' ', text)
|
58 |
-
text = re.sub(r'[、;:]', ',', text)
|
59 |
-
text = re.sub(r'\s*,\s*', ', ', text)
|
60 |
-
text = re.sub(r'\s*。\s*', '. ', text)
|
61 |
-
text = re.sub(r'\s*?\s*', '? ', text)
|
62 |
-
text = re.sub(r'\s*!\s*', '! ', text)
|
63 |
-
text = re.sub(r'\s*$', '', text)
|
64 |
-
return text
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|