kritias commited on
Commit
7973e25
1 Parent(s): aec166e

Fix : Path, Text, Requirements

Browse files
.DS_Store ADDED
Binary file (6.15 kB). View file
 
requirements.txt ADDED
@@ -0,0 +1,13 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ numba
2
+ librosa
3
+ matplotlib
4
+ numpy
5
+ phonemizer
6
+ scipy
7
+ tensorboard
8
+ torch
9
+ torchvision
10
+ torchaudio
11
+ Unidecode
12
+ pyopenjtalk
13
+ gradio
resource/.DS_Store ADDED
Binary file (6.15 kB). View file
 
resource/cover.png ADDED
src/text/cleaners.py CHANGED
@@ -1,5 +1,4 @@
1
  import re
2
- from src.text.korean import latin_to_hangul, number_to_hangul, divide_hangul, korean_to_lazy_ipa, korean_to_ipa
3
 
4
  def japanese_cleaners(text):
5
  from text.japanese import japanese_to_romaji_with_accent
@@ -15,13 +14,4 @@ def japanese_cleaners2(text):
15
  .replace('(', '').replace(')', '') \
16
  .replace('[', '').replace(']', '') \
17
  .replace('*', ' ').replace('{', '').replace('}', '')
18
- return text
19
-
20
- def korean_cleaners(text):
21
- text = latin_to_hangul(text)
22
- text = number_to_hangul(text)
23
- #text = divide_hangul(text)
24
- if re.match('[\u3131-\u3163]', text[-1]):
25
- text += '.'
26
- text = korean_to_ipa(text)
27
  return text
 
1
  import re
 
2
 
3
  def japanese_cleaners(text):
4
  from text.japanese import japanese_to_romaji_with_accent
 
14
  .replace('(', '').replace(')', '') \
15
  .replace('[', '').replace(']', '') \
16
  .replace('*', ' ').replace('{', '').replace('}', '')
 
 
 
 
 
 
 
 
 
17
  return text
src/text/korean.py DELETED
@@ -1,210 +0,0 @@
1
- import re
2
- from jamo import h2j, j2hcj
3
- import ko_pron
4
-
5
-
6
- # This is a list of Korean classifiers preceded by pure Korean numerals.
7
- _korean_classifiers = '군데 권 개 그루 닢 대 두 마리 모 모금 뭇 발 발짝 방 번 벌 보루 살 수 술 시 쌈 움큼 정 짝 채 척 첩 축 켤레 톨 통'
8
-
9
- # List of (hangul, hangul divided) pairs:
10
- _hangul_divided = [(re.compile('%s' % x[0]), x[1]) for x in [
11
- ('ㄳ', 'ㄱㅅ'),
12
- ('ㄵ', 'ㄴㅈ'),
13
- ('ㄶ', 'ㄴㅎ'),
14
- ('ㄺ', 'ㄹㄱ'),
15
- ('ㄻ', 'ㄹㅁ'),
16
- ('ㄼ', 'ㄹㅂ'),
17
- ('ㄽ', 'ㄹㅅ'),
18
- ('ㄾ', 'ㄹㅌ'),
19
- ('ㄿ', 'ㄹㅍ'),
20
- ('ㅀ', 'ㄹㅎ'),
21
- ('ㅄ', 'ㅂㅅ'),
22
- ('ㅘ', 'ㅗㅏ'),
23
- ('ㅙ', 'ㅗㅐ'),
24
- ('ㅚ', 'ㅗㅣ'),
25
- ('ㅝ', 'ㅜㅓ'),
26
- ('ㅞ', 'ㅜㅔ'),
27
- ('ㅟ', 'ㅜㅣ'),
28
- ('ㅢ', 'ㅡㅣ'),
29
- ('ㅑ', 'ㅣㅏ'),
30
- ('ㅒ', 'ㅣㅐ'),
31
- ('ㅕ', 'ㅣㅓ'),
32
- ('ㅖ', 'ㅣㅔ'),
33
- ('ㅛ', 'ㅣㅗ'),
34
- ('ㅠ', 'ㅣㅜ')
35
- ]]
36
-
37
- # List of (Latin alphabet, hangul) pairs:
38
- _latin_to_hangul = [(re.compile('%s' % x[0], re.IGNORECASE), x[1]) for x in [
39
- ('a', '에이'),
40
- ('b', '비'),
41
- ('c', '시'),
42
- ('d', '디'),
43
- ('e', '이'),
44
- ('f', '에프'),
45
- ('g', '지'),
46
- ('h', '에이치'),
47
- ('i', '아이'),
48
- ('j', '제이'),
49
- ('k', '케이'),
50
- ('l', '엘'),
51
- ('m', '엠'),
52
- ('n', '엔'),
53
- ('o', '오'),
54
- ('p', '피'),
55
- ('q', '큐'),
56
- ('r', '아르'),
57
- ('s', '에스'),
58
- ('t', '티'),
59
- ('u', '유'),
60
- ('v', '브이'),
61
- ('w', '더블유'),
62
- ('x', '엑스'),
63
- ('y', '와이'),
64
- ('z', '제트')
65
- ]]
66
-
67
- # List of (ipa, lazy ipa) pairs:
68
- _ipa_to_lazy_ipa = [(re.compile('%s' % x[0], re.IGNORECASE), x[1]) for x in [
69
- ('t͡ɕ','ʧ'),
70
- ('d͡ʑ','ʥ'),
71
- ('ɲ','n^'),
72
- ('ɕ','ʃ'),
73
- ('ʷ','w'),
74
- ('ɭ','l`'),
75
- ('ʎ','ɾ'),
76
- ('ɣ','ŋ'),
77
- ('ɰ','ɯ'),
78
- ('ʝ','j'),
79
- ('ʌ','ə'),
80
- ('ɡ','g'),
81
- ('\u031a','#'),
82
- ('\u0348','='),
83
- ('\u031e',''),
84
- ('\u0320',''),
85
- ('\u0339','')
86
- ]]
87
-
88
-
89
- def latin_to_hangul(text):
90
- for regex, replacement in _latin_to_hangul:
91
- text = re.sub(regex, replacement, text)
92
- return text
93
-
94
-
95
- def divide_hangul(text):
96
- text = j2hcj(h2j(text))
97
- for regex, replacement in _hangul_divided:
98
- text = re.sub(regex, replacement, text)
99
- return text
100
-
101
-
102
- def hangul_number(num, sino=True):
103
- '''Reference https://github.com/Kyubyong/g2pK'''
104
- num = re.sub(',', '', num)
105
-
106
- if num == '0':
107
- return '영'
108
- if not sino and num == '20':
109
- return '스무'
110
-
111
- digits = '123456789'
112
- names = '일이삼사오육칠팔구'
113
- digit2name = {d: n for d, n in zip(digits, names)}
114
-
115
- modifiers = '한 두 세 네 다섯 여섯 일곱 여덟 아홉'
116
- decimals = '열 스물 서른 마흔 쉰 예순 일흔 여든 아흔'
117
- digit2mod = {d: mod for d, mod in zip(digits, modifiers.split())}
118
- digit2dec = {d: dec for d, dec in zip(digits, decimals.split())}
119
-
120
- spelledout = []
121
- for i, digit in enumerate(num):
122
- i = len(num) - i - 1
123
- if sino:
124
- if i == 0:
125
- name = digit2name.get(digit, '')
126
- elif i == 1:
127
- name = digit2name.get(digit, '') + '십'
128
- name = name.replace('일십', '십')
129
- else:
130
- if i == 0:
131
- name = digit2mod.get(digit, '')
132
- elif i == 1:
133
- name = digit2dec.get(digit, '')
134
- if digit == '0':
135
- if i % 4 == 0:
136
- last_three = spelledout[-min(3, len(spelledout)):]
137
- if ''.join(last_three) == '':
138
- spelledout.append('')
139
- continue
140
- else:
141
- spelledout.append('')
142
- continue
143
- if i == 2:
144
- name = digit2name.get(digit, '') + '백'
145
- name = name.replace('일백', '백')
146
- elif i == 3:
147
- name = digit2name.get(digit, '') + '천'
148
- name = name.replace('일천', '천')
149
- elif i == 4:
150
- name = digit2name.get(digit, '') + '만'
151
- name = name.replace('일만', '만')
152
- elif i == 5:
153
- name = digit2name.get(digit, '') + '십'
154
- name = name.replace('일십', '십')
155
- elif i == 6:
156
- name = digit2name.get(digit, '') + '백'
157
- name = name.replace('일백', '백')
158
- elif i == 7:
159
- name = digit2name.get(digit, '') + '천'
160
- name = name.replace('일천', '천')
161
- elif i == 8:
162
- name = digit2name.get(digit, '') + '억'
163
- elif i == 9:
164
- name = digit2name.get(digit, '') + '십'
165
- elif i == 10:
166
- name = digit2name.get(digit, '') + '백'
167
- elif i == 11:
168
- name = digit2name.get(digit, '') + '천'
169
- elif i == 12:
170
- name = digit2name.get(digit, '') + '조'
171
- elif i == 13:
172
- name = digit2name.get(digit, '') + '십'
173
- elif i == 14:
174
- name = digit2name.get(digit, '') + '백'
175
- elif i == 15:
176
- name = digit2name.get(digit, '') + '천'
177
- spelledout.append(name)
178
- return ''.join(elem for elem in spelledout)
179
-
180
-
181
- def number_to_hangul(text):
182
- '''Reference https://github.com/Kyubyong/g2pK'''
183
- tokens = set(re.findall(r'(\d[\d,]*)([\uac00-\ud71f]+)', text))
184
- for token in tokens:
185
- num, classifier = token
186
- if classifier[:2] in _korean_classifiers or classifier[0] in _korean_classifiers:
187
- spelledout = hangul_number(num, sino=False)
188
- else:
189
- spelledout = hangul_number(num, sino=True)
190
- text = text.replace(f'{num}{classifier}', f'{spelledout}{classifier}')
191
- # digit by digit for remaining digits
192
- digits = '0123456789'
193
- names = '영일이삼사오육칠팔구'
194
- for d, n in zip(digits, names):
195
- text = text.replace(d, n)
196
- return text
197
-
198
-
199
- def korean_to_lazy_ipa(text):
200
- text = latin_to_hangul(text)
201
- text = number_to_hangul(text)
202
- text=re.sub('[\uac00-\ud7af]+',lambda x:ko_pron.romanise(x.group(0),'ipa').split('] ~ [')[0],text)
203
- for regex, replacement in _ipa_to_lazy_ipa:
204
- text = re.sub(regex, replacement, text)
205
- return text
206
-
207
-
208
- def korean_to_ipa(text):
209
- text = korean_to_lazy_ipa(text)
210
- return text.replace('ʧ','tʃ').replace('ʥ','dʑ')