edit cleaners
Browse files- saved_model/config.json +2 -2
- text/cleaners.py +12 -35
saved_model/config.json
CHANGED
@@ -1,3 +1,3 @@
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
-
oid sha256:
|
3 |
-
size
|
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:284f7d38e892008e195482b8490359d503f634fbc8b4b92ffa333b56848e6678
|
3 |
+
size 1781
|
text/cleaners.py
CHANGED
@@ -1,40 +1,17 @@
|
|
1 |
import re
|
2 |
-
import pyopenjtalk
|
3 |
-
from unidecode import unidecode
|
4 |
-
from text.japanese import _japanese_marks
|
5 |
|
|
|
|
|
|
|
|
|
|
|
|
|
6 |
|
7 |
-
def japanese_triphone_cleaners(text):
|
8 |
-
sentences = re.split(_japanese_marks, text)
|
9 |
-
marks = re.findall(_japanese_marks, text)
|
10 |
-
text = ''
|
11 |
-
for i, sentence in enumerate(sentences):
|
12 |
-
phones = pyopenjtalk.g2p(sentence, kana=False)
|
13 |
-
phones = phones.replace(' ','')
|
14 |
-
phones = phones.replace('A', 'a').replace('I', 'i').replace('U', 'u').replace('E', 'e').replace('O', 'o')
|
15 |
-
phones = phones.replace('ch','ʧ').replace('sh','ʃ').replace('cl','Q')
|
16 |
-
triphones = []
|
17 |
-
length = len(phones)
|
18 |
-
|
19 |
-
for j, phone in enumerate(phones):
|
20 |
-
if length == 1:
|
21 |
-
triphone = phone
|
22 |
-
else:
|
23 |
-
if j == 0:
|
24 |
-
triphone = f'{phone}+{phones[j+1]}'
|
25 |
-
elif j == length - 1:
|
26 |
-
triphone = f'{phones[j-1]}-{phone}'
|
27 |
-
else:
|
28 |
-
triphone = f'{phones[j-1]}-{phone}+{phones[j+1]}'
|
29 |
-
|
30 |
-
triphones.append(triphone)
|
31 |
-
|
32 |
-
subtext = ' '.join(triphones)
|
33 |
-
text += subtext
|
34 |
-
if i < len(marks):
|
35 |
-
text += unidecode(marks[i]).replace(' ', '')
|
36 |
|
37 |
-
|
38 |
-
|
39 |
-
|
|
|
|
|
|
|
40 |
return text
|
|
|
1 |
import re
|
|
|
|
|
|
|
2 |
|
3 |
+
def japanese_cleaners(text):
|
4 |
+
from text.japanese import japanese_to_romaji_with_accent
|
5 |
+
text = japanese_to_romaji_with_accent(text)
|
6 |
+
if len(text) == 0 or re.match('[A-Za-z]', text[-1]):
|
7 |
+
text += '.'
|
8 |
+
return text
|
9 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
10 |
|
11 |
+
def japanese_cleaners2(text):
|
12 |
+
text = text.replace('・・・', '…').replace('・', ' ')
|
13 |
+
text = japanese_cleaners(text).replace('ts', 'ʦ').replace('...', '…') \
|
14 |
+
.replace('(', '').replace(')', '') \
|
15 |
+
.replace('[', '').replace(']', '') \
|
16 |
+
.replace('*', ' ').replace('{', '').replace('}', '')
|
17 |
return text
|