kdrkdrkdr commited on
Commit
82337c8
1 Parent(s): ce0bb2e

edit cleaners

Browse files
Files changed (2) hide show
  1. saved_model/config.json +2 -2
  2. text/cleaners.py +12 -35
saved_model/config.json CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:d4b4c6cae3dad062f31b0d87a4601d9266a24767f9f41949ec2eab3a13824dcd
3
- size 1794
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:284f7d38e892008e195482b8490359d503f634fbc8b4b92ffa333b56848e6678
3
+ size 1781
text/cleaners.py CHANGED
@@ -1,40 +1,17 @@
1
  import re
2
- import pyopenjtalk
3
- from unidecode import unidecode
4
- from text.japanese import _japanese_marks
5
 
 
 
 
 
 
 
6
 
7
- def japanese_triphone_cleaners(text):
8
- sentences = re.split(_japanese_marks, text)
9
- marks = re.findall(_japanese_marks, text)
10
- text = ''
11
- for i, sentence in enumerate(sentences):
12
- phones = pyopenjtalk.g2p(sentence, kana=False)
13
- phones = phones.replace(' ','')
14
- phones = phones.replace('A', 'a').replace('I', 'i').replace('U', 'u').replace('E', 'e').replace('O', 'o')
15
- phones = phones.replace('ch','ʧ').replace('sh','ʃ').replace('cl','Q')
16
- triphones = []
17
- length = len(phones)
18
-
19
- for j, phone in enumerate(phones):
20
- if length == 1:
21
- triphone = phone
22
- else:
23
- if j == 0:
24
- triphone = f'{phone}+{phones[j+1]}'
25
- elif j == length - 1:
26
- triphone = f'{phones[j-1]}-{phone}'
27
- else:
28
- triphone = f'{phones[j-1]}-{phone}+{phones[j+1]}'
29
-
30
- triphones.append(triphone)
31
-
32
- subtext = ' '.join(triphones)
33
- text += subtext
34
- if i < len(marks):
35
- text += unidecode(marks[i]).replace(' ', '')
36
 
37
- if len(text) > 0 and re.match('[A-Za-z]',text[-1]):
38
- text += '.'
39
-
 
 
 
40
  return text
 
1
  import re
 
 
 
2
 
3
+ def japanese_cleaners(text):
4
+ from text.japanese import japanese_to_romaji_with_accent
5
+ text = japanese_to_romaji_with_accent(text)
6
+ if len(text) == 0 or re.match('[A-Za-z]', text[-1]):
7
+ text += '.'
8
+ return text
9
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
10
 
11
+ def japanese_cleaners2(text):
12
+ text = text.replace('・・・', '…').replace('・', ' ')
13
+ text = japanese_cleaners(text).replace('ts', 'ʦ').replace('...', '…') \
14
+ .replace('(', '').replace(')', '') \
15
+ .replace('[', '').replace(']', '') \
16
+ .replace('*', ' ').replace('{', '').replace('}', '')
17
  return text