import re from text.korean import latin_to_hangul, number_to_hangul, divide_hangul, korean_to_lazy_ipa, korean_to_ipa def cjke_cleaners2(text): chinese_texts = re.findall(r'\[ZH\].*?\[ZH\]', text) japanese_texts = re.findall(r'\[JA\].*?\[JA\]', text) korean_texts = re.findall(r'\[KO\].*?\[KO\]', text) english_texts = re.findall(r'\[EN\].*?\[EN\]', text) for chinese_text in chinese_texts: cleaned_text = chinese_to_ipa(chinese_text[4:-4]) text = text.replace(chinese_text, cleaned_text+' ', 1) for japanese_text in japanese_texts: cleaned_text = japanese_to_ipa2(japanese_text[4:-4]) text = text.replace(japanese_text, cleaned_text+' ', 1) for korean_text in korean_texts: cleaned_text = korean_to_ipa(korean_text[4:-4]) text = text.replace(korean_text, cleaned_text+' ', 1) for english_text in english_texts: cleaned_text = english_to_ipa2(english_text[4:-4]) text = text.replace(english_text, cleaned_text+' ', 1) #text = text[:-1] #if re.match(r'[^\.,!\?\-…~]', text[-1]): text += '.' return text