File size: 668 Bytes
bfc486e
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
import re

def japanese_cleaners(text):
    from text.japanese import japanese_to_romaji_with_accent
    text = japanese_to_romaji_with_accent(text)
    if len(text) == 0 or re.match('[A-Za-z]', text[-1]):
        text += '.'
    return text


def japanese_cleaners2(text):
    text = text.replace('・・・', '…').replace('・', ' ')
    text = japanese_cleaners(text).replace('ts', 'ʦ').replace('...', '…') \
                                    .replace('(', '').replace(')', '') \
                                    .replace('[', '').replace(']', '') \
                                    .replace('*', ' ').replace('{', '').replace('}', '')
    return text