File size: 408 Bytes
f73a66e
 
 
 
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
import re
from text.japanese import japanese_to_romaji_with_accent

def japanese_cleaners(text):
    text = f'[JA]{text}[JA]'
    text = re.sub(r'\[JA\](.*?)\[JA\]', lambda x: japanese_to_romaji_with_accent(
        x.group(1)).replace('ts', 'ʦ').replace('u', 'ɯ').replace('...', '…')+' ', text)
    text = re.sub(r'\s+$', '', text)
    text = re.sub(r'([^\.,!\?\-…~])$', r'\1.', text)
    return text