File size: 1,120 Bytes
64f41a2
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
import re
from text.korean import latin_to_hangul, number_to_hangul, divide_hangul, korean_to_lazy_ipa, korean_to_ipa

def cjke_cleaners2(text):
    chinese_texts = re.findall(r'\[ZH\].*?\[ZH\]', text)
    japanese_texts = re.findall(r'\[JA\].*?\[JA\]', text)
    korean_texts = re.findall(r'\[KO\].*?\[KO\]', text)
    english_texts = re.findall(r'\[EN\].*?\[EN\]', text)
    for chinese_text in chinese_texts:
        cleaned_text = chinese_to_ipa(chinese_text[4:-4])
        text = text.replace(chinese_text, cleaned_text+' ', 1)
    for japanese_text in japanese_texts:
        cleaned_text = japanese_to_ipa2(japanese_text[4:-4])
        text = text.replace(japanese_text, cleaned_text+' ', 1)
    for korean_text in korean_texts:
        cleaned_text = korean_to_ipa(korean_text[4:-4])
        text = text.replace(korean_text, cleaned_text+' ', 1)
    for english_text in english_texts:
        cleaned_text = english_to_ipa2(english_text[4:-4])
        text = text.replace(english_text, cleaned_text+' ', 1)
    #text = text[:-1]
    #if re.match(r'[^\.,!\?\-…~]', text[-1]):
        text += '.'
    return text