candlend commited on
Commit
0cdb167
1 Parent(s): 51a465c

remove opencc

Browse files
requirements.txt CHANGED
@@ -15,7 +15,6 @@ ko-pron==1.3
15
  inflect==6.0.0
16
  eng-to-ipa==0.0.2
17
  num-thai==0.0.5
18
- opencc==1.1.1
19
  scikit-maad
20
  torch
21
  torchvision
 
15
  inflect==6.0.0
16
  eng-to-ipa==0.0.2
17
  num-thai==0.0.5
 
18
  scikit-maad
19
  torch
20
  torchvision
vits/text/cantonese.py DELETED
@@ -1,59 +0,0 @@
1
- import re
2
- import cn2an
3
- import opencc
4
-
5
-
6
- converter = opencc.OpenCC('jyutjyu')
7
-
8
- # List of (Latin alphabet, ipa) pairs:
9
- _latin_to_ipa = [(re.compile('%s' % x[0]), x[1]) for x in [
10
- ('A', 'ei˥'),
11
- ('B', 'biː˥'),
12
- ('C', 'siː˥'),
13
- ('D', 'tiː˥'),
14
- ('E', 'iː˥'),
15
- ('F', 'e˥fuː˨˩'),
16
- ('G', 'tsiː˥'),
17
- ('H', 'ɪk̚˥tsʰyː˨˩'),
18
- ('I', 'ɐi˥'),
19
- ('J', 'tsei˥'),
20
- ('K', 'kʰei˥'),
21
- ('L', 'e˥llou˨˩'),
22
- ('M', 'ɛːm˥'),
23
- ('N', 'ɛːn˥'),
24
- ('O', 'ou˥'),
25
- ('P', 'pʰiː˥'),
26
- ('Q', 'kʰiːu˥'),
27
- ('R', 'aː˥lou˨˩'),
28
- ('S', 'ɛː˥siː˨˩'),
29
- ('T', 'tʰiː˥'),
30
- ('U', 'juː˥'),
31
- ('V', 'wiː˥'),
32
- ('W', 'tʊk̚˥piː˥juː˥'),
33
- ('X', 'ɪk̚˥siː˨˩'),
34
- ('Y', 'waːi˥'),
35
- ('Z', 'iː˨sɛːt̚˥')
36
- ]]
37
-
38
-
39
- def number_to_cantonese(text):
40
- return re.sub(r'\d+(?:\.?\d+)?', lambda x: cn2an.an2cn(x.group()), text)
41
-
42
-
43
- def latin_to_ipa(text):
44
- for regex, replacement in _latin_to_ipa:
45
- text = re.sub(regex, replacement, text)
46
- return text
47
-
48
-
49
- def cantonese_to_ipa(text):
50
- text = number_to_cantonese(text.upper())
51
- text = converter.convert(text).replace('-','').replace('$',' ')
52
- text = re.sub(r'[A-Z]', lambda x: latin_to_ipa(x.group())+' ', text)
53
- text = re.sub(r'[、;:]', ',', text)
54
- text = re.sub(r'\s*,\s*', ', ', text)
55
- text = re.sub(r'\s*。\s*', '. ', text)
56
- text = re.sub(r'\s*?\s*', '? ', text)
57
- text = re.sub(r'\s*!\s*', '! ', text)
58
- text = re.sub(r'\s*$', '', text)
59
- return text
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
vits/text/cleaners.py CHANGED
@@ -7,7 +7,7 @@ from text.english import english_to_lazy_ipa, english_to_ipa2, english_to_lazy_i
7
  from text.thai import num_to_thai, latin_to_thai
8
  # from text.shanghainese import shanghainese_to_ipa
9
  # from text.cantonese import cantonese_to_ipa
10
- from text.ngu_dialect import ngu_dialect_to_ipa
11
 
12
 
13
  def japanese_cleaners(text):
@@ -149,28 +149,28 @@ def thai_cleaners(text):
149
  return text
150
 
151
 
152
- def shanghainese_cleaners(text):
153
- text = shanghainese_to_ipa(text)
154
- if re.match(r'[^\.,!\?\-…~]', text[-1]):
155
- text += '.'
156
- return text
157
-
158
-
159
- def chinese_dialect_cleaners(text):
160
- text = re.sub(r'\[MD\](.*?)\[MD\]',
161
- lambda x: chinese_to_ipa2(x.group(1))+' ', text)
162
- text = re.sub(r'\[TW\](.*?)\[TW\]',
163
- lambda x: chinese_to_ipa2(x.group(1), True)+' ', text)
164
- text = re.sub(r'\[JA\](.*?)\[JA\]',
165
- lambda x: japanese_to_ipa3(x.group(1)).replace('Q', 'ʔ')+' ', text)
166
- text = re.sub(r'\[SH\](.*?)\[SH\]', lambda x: shanghainese_to_ipa(x.group(1)).replace('1', '˥˧').replace('5',
167
- '˧˧˦').replace('6', '˩˩˧').replace('7', '˥').replace('8', '˩˨').replace('ᴀ', 'ɐ').replace('ᴇ', 'e')+' ', text)
168
- text = re.sub(r'\[GD\](.*?)\[GD\]',
169
- lambda x: cantonese_to_ipa(x.group(1))+' ', text)
170
- text = re.sub(r'\[EN\](.*?)\[EN\]',
171
- lambda x: english_to_lazy_ipa2(x.group(1))+' ', text)
172
- text = re.sub(r'\[([A-Z]{2})\](.*?)\[\1\]', lambda x: ngu_dialect_to_ipa(x.group(2), x.group(
173
- 1)).replace('ʣ', 'dz').replace('ʥ', 'dʑ').replace('ʦ', 'ts').replace('ʨ', 'tɕ')+' ', text)
174
- text = re.sub(r'\s+$', '', text)
175
- text = re.sub(r'([^\.,!\?\-…~])$', r'\1.', text)
176
- return text
 
7
  from text.thai import num_to_thai, latin_to_thai
8
  # from text.shanghainese import shanghainese_to_ipa
9
  # from text.cantonese import cantonese_to_ipa
10
+ # from text.ngu_dialect import ngu_dialect_to_ipa
11
 
12
 
13
  def japanese_cleaners(text):
 
149
  return text
150
 
151
 
152
+ # def shanghainese_cleaners(text):
153
+ # text = shanghainese_to_ipa(text)
154
+ # if re.match(r'[^\.,!\?\-…~]', text[-1]):
155
+ # text += '.'
156
+ # return text
157
+
158
+
159
+ # def chinese_dialect_cleaners(text):
160
+ # text = re.sub(r'\[MD\](.*?)\[MD\]',
161
+ # lambda x: chinese_to_ipa2(x.group(1))+' ', text)
162
+ # text = re.sub(r'\[TW\](.*?)\[TW\]',
163
+ # lambda x: chinese_to_ipa2(x.group(1), True)+' ', text)
164
+ # text = re.sub(r'\[JA\](.*?)\[JA\]',
165
+ # lambda x: japanese_to_ipa3(x.group(1)).replace('Q', 'ʔ')+' ', text)
166
+ # text = re.sub(r'\[SH\](.*?)\[SH\]', lambda x: shanghainese_to_ipa(x.group(1)).replace('1', '˥˧').replace('5',
167
+ # '˧˧˦').replace('6', '˩˩˧').replace('7', '˥').replace('8', '˩˨').replace('ᴀ', 'ɐ').replace('ᴇ', 'e')+' ', text)
168
+ # text = re.sub(r'\[GD\](.*?)\[GD\]',
169
+ # lambda x: cantonese_to_ipa(x.group(1))+' ', text)
170
+ # text = re.sub(r'\[EN\](.*?)\[EN\]',
171
+ # lambda x: english_to_lazy_ipa2(x.group(1))+' ', text)
172
+ # text = re.sub(r'\[([A-Z]{2})\](.*?)\[\1\]', lambda x: ngu_dialect_to_ipa(x.group(2), x.group(
173
+ # 1)).replace('ʣ', 'dz').replace('ʥ', 'dʑ').replace('ʦ', 'ts').replace('ʨ', 'tɕ')+' ', text)
174
+ # text = re.sub(r'\s+$', '', text)
175
+ # text = re.sub(r'([^\.,!\?\-…~])$', r'\1.', text)
176
+ # return text
vits/text/ngu_dialect.py DELETED
@@ -1,29 +0,0 @@
1
- import re
2
- import opencc
3
-
4
-
5
- dialects = {'SZ': 'suzhou', 'WX': 'wuxi', 'CZ': 'changzhou', 'HZ': 'hangzhou',
6
- 'SX': 'shaoxing', 'NB': 'ningbo', 'JJ': 'jingjiang', 'YX': 'yixing',
7
- 'JD': 'jiading', 'ZR': 'zhenru', 'PH': 'pinghu', 'TX': 'tongxiang',
8
- 'JS': 'jiashan', 'XS': 'xiashi', 'LP': 'linping', 'XS': 'xiaoshan',
9
- 'FY': 'fuyang', 'RA': 'ruao', 'CX': 'cixi', 'SM': 'sanmen', 'TT': 'tiantai'}
10
-
11
- converters = {}
12
-
13
- for dialect in dialects.values():
14
- try:
15
- converters[dialect] = opencc.OpenCC(dialect)
16
- except:
17
- pass
18
-
19
-
20
- def ngu_dialect_to_ipa(text, dialect):
21
- dialect = dialects[dialect]
22
- text = converters[dialect].convert(text).replace('$',' ')
23
- text = re.sub(r'[、;:]', ',', text)
24
- text = re.sub(r'\s*,\s*', ', ', text)
25
- text = re.sub(r'\s*。\s*', '. ', text)
26
- text = re.sub(r'\s*?\s*', '? ', text)
27
- text = re.sub(r'\s*!\s*', '! ', text)
28
- text = re.sub(r'\s*$', '', text)
29
- return text
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
vits/text/shanghainese.py DELETED
@@ -1,64 +0,0 @@
1
- import os, sys, re
2
- import cn2an
3
- import opencc
4
-
5
-
6
- converter = opencc.OpenCC('zaonhe')
7
-
8
- # List of (Latin alphabet, ipa) pairs:
9
- _latin_to_ipa = [(re.compile('%s' % x[0]), x[1]) for x in [
10
- ('A', 'ᴇ'),
11
- ('B', 'bi'),
12
- ('C', 'si'),
13
- ('D', 'di'),
14
- ('E', 'i'),
15
- ('F', 'ᴇf'),
16
- ('G', 'dʑi'),
17
- ('H', 'ᴇtɕʰ'),
18
- ('I', 'ᴀi'),
19
- ('J', 'dʑᴇ'),
20
- ('K', 'kʰᴇ'),
21
- ('L', 'ᴇl'),
22
- ('M', 'ᴇm'),
23
- ('N', 'ᴇn'),
24
- ('O', 'o'),
25
- ('P', 'pʰi'),
26
- ('Q', 'kʰiu'),
27
- ('R', 'ᴀl'),
28
- ('S', 'ᴇs'),
29
- ('T', 'tʰi'),
30
- ('U', 'ɦiu'),
31
- ('V', 'vi'),
32
- ('W', 'dᴀbɤliu'),
33
- ('X', 'ᴇks'),
34
- ('Y', 'uᴀi'),
35
- ('Z', 'zᴇ')
36
- ]]
37
-
38
-
39
- def _number_to_shanghainese(num):
40
- num = cn2an.an2cn(num).replace('一十','十').replace('二十', '廿').replace('二', '两')
41
- return re.sub(r'(?:(?:^|[^三四五六七八九])十|廿)两', lambda x: x.group()[:-1]+'二', num)
42
-
43
-
44
- def number_to_shanghainese(text):
45
- return re.sub(r'\d+(?:\.?\d+)?', lambda x: _number_to_shanghainese(x.group()), text)
46
-
47
-
48
- def latin_to_ipa(text):
49
- for regex, replacement in _latin_to_ipa:
50
- text = re.sub(regex, replacement, text)
51
- return text
52
-
53
-
54
- def shanghainese_to_ipa(text):
55
- text = number_to_shanghainese(text.upper())
56
- text = converter.convert(text).replace('-','').replace('$',' ')
57
- text = re.sub(r'[A-Z]', lambda x: latin_to_ipa(x.group())+' ', text)
58
- text = re.sub(r'[、;:]', ',', text)
59
- text = re.sub(r'\s*,\s*', ', ', text)
60
- text = re.sub(r'\s*。\s*', '. ', text)
61
- text = re.sub(r'\s*?\s*', '? ', text)
62
- text = re.sub(r'\s*!\s*', '! ', text)
63
- text = re.sub(r'\s*$', '', text)
64
- return text