Spaces:
Sleeping
Sleeping
| # Convert Japanese text to phonemes which is | |
| # compatible with Julius https://github.com/julius-speech/segmentation-kit | |
| import re | |
| import unicodedata | |
| from transformers import AutoTokenizer | |
| from . import symbols | |
| punctuation = ["!", "?", "โฆ", ",", ".", "'", "-"] | |
| try: | |
| import MeCab | |
| except ImportError as e: | |
| raise ImportError("Japanese requires mecab-python3 and unidic-lite.") from e | |
| from num2words import num2words | |
| _CONVRULES = [ | |
| # Conversion of 2 letters | |
| "ใขใก/ a a", | |
| "ใคใฃ/ i i", | |
| "ใคใง/ i e", | |
| "ใคใฃ/ y a", | |
| "ใฆใฅ/ u:", | |
| "ใจใง/ e e", | |
| "ใชใฉ/ o:", | |
| "ใซใก/ k a:", | |
| "ใญใฃ/ k i:", | |
| "ใฏใฅ/ k u:", | |
| "ใฏใฃ/ ky a", | |
| "ใฏใฅ/ ky u", | |
| "ใฏใง/ ky o", | |
| "ใฑใง/ k e:", | |
| "ใณใฉ/ k o:", | |
| "ใฌใก/ g a:", | |
| "ใฎใฃ/ g i:", | |
| "ใฐใฅ/ g u:", | |
| "ใฐใฃ/ gy a", | |
| "ใฐใฅ/ gy u", | |
| "ใฐใง/ gy o", | |
| "ใฒใง/ g e:", | |
| "ใดใฉ/ g o:", | |
| "ใตใก/ s a:", | |
| "ใทใฃ/ sh i:", | |
| "ในใฅ/ s u:", | |
| "ในใฃ/ sh a", | |
| "ในใฅ/ sh u", | |
| "ในใง/ sh o", | |
| "ใปใง/ s e:", | |
| "ใฝใฉ/ s o:", | |
| "ใถใก/ z a:", | |
| "ใธใฃ/ j i:", | |
| "ใบใฅ/ z u:", | |
| "ใบใฃ/ zy a", | |
| "ใบใฅ/ zy u", | |
| "ใบใง/ zy o", | |
| "ใผใง/ z e:", | |
| "ใพใฉ/ z o:", | |
| "ใฟใก/ t a:", | |
| "ใใฃ/ ch i:", | |
| "ใใก/ ts a", | |
| "ใใฃ/ ts i", | |
| "ใใฅ/ ts u:", | |
| "ใใฃ/ ch a", | |
| "ใใฅ/ ch u", | |
| "ใใง/ ch o", | |
| "ใใง/ ts e", | |
| "ใใฉ/ ts o", | |
| "ใใง/ t e:", | |
| "ใใฉ/ t o:", | |
| "ใใก/ d a:", | |
| "ใใฃ/ j i:", | |
| "ใ ใฅ/ d u:", | |
| "ใ ใฃ/ zy a", | |
| "ใ ใฅ/ zy u", | |
| "ใ ใง/ zy o", | |
| "ใใง/ d e:", | |
| "ใใฉ/ d o:", | |
| "ใใก/ n a:", | |
| "ใใฃ/ n i:", | |
| "ใใฅ/ n u:", | |
| "ใใฃ/ ny a", | |
| "ใใฅ/ ny u", | |
| "ใใง/ ny o", | |
| "ใใง/ n e:", | |
| "ใใฉ/ n o:", | |
| "ใใก/ h a:", | |
| "ใใฃ/ h i:", | |
| "ใใฅ/ f u:", | |
| "ใใฃ/ hy a", | |
| "ใใฅ/ hy u", | |
| "ใใง/ hy o", | |
| "ใใง/ h e:", | |
| "ใใฉ/ h o:", | |
| "ใใก/ b a:", | |
| "ใใฃ/ b i:", | |
| "ใใฅ/ b u:", | |
| "ใใฃ/ hy a", | |
| "ใใฅ/ by u", | |
| "ใใง/ hy o", | |
| "ใใง/ b e:", | |
| "ใใฉ/ b o:", | |
| "ใใก/ p a:", | |
| "ใใฃ/ p i:", | |
| "ใใฅ/ p u:", | |
| "ใใฃ/ py a", | |
| "ใใฅ/ py u", | |
| "ใใง/ py o", | |
| "ใใง/ p e:", | |
| "ใใฉ/ p o:", | |
| "ใใก/ m a:", | |
| "ใใฃ/ m i:", | |
| "ใ ใฅ/ m u:", | |
| "ใ ใฃ/ my a", | |
| "ใ ใฅ/ my u", | |
| "ใ ใง/ my o", | |
| "ใกใง/ m e:", | |
| "ใขใฉ/ m o:", | |
| "ใคใก/ y a:", | |
| "ใฆใฅ/ y u:", | |
| "ใฆใฃ/ y a:", | |
| "ใฆใฅ/ y u:", | |
| "ใฆใง/ y o:", | |
| "ใจใฉ/ y o:", | |
| "ใฉใก/ r a:", | |
| "ใชใฃ/ r i:", | |
| "ใซใฅ/ r u:", | |
| "ใซใฃ/ ry a", | |
| "ใซใฅ/ ry u", | |
| "ใซใง/ ry o", | |
| "ใฌใง/ r e:", | |
| "ใญใฉ/ r o:", | |
| "ใฏใก/ w a:", | |
| "ใฒใฉ/ o:", | |
| "ใใฃ/ d i", | |
| "ใใง/ d e:", | |
| "ใใฃ/ dy a", | |
| "ใใฅ/ dy u", | |
| "ใใง/ dy o", | |
| "ใใฃ/ t i", | |
| "ใใง/ t e:", | |
| "ใใฃ/ ty a", | |
| "ใใฅ/ ty u", | |
| "ใใง/ ty o", | |
| "ในใฃ/ s i", | |
| "ใบใก/ z u a", | |
| "ใบใฃ/ z i", | |
| "ใบใฅ/ z u", | |
| "ใบใฃ/ zy a", | |
| "ใบใฅ/ zy u", | |
| "ใบใง/ zy o", | |
| "ใบใง/ z e", | |
| "ใบใฉ/ z o", | |
| "ใญใฃ/ ky a", | |
| "ใญใฅ/ ky u", | |
| "ใญใง/ ky o", | |
| "ใทใฃ/ sh a", | |
| "ใทใฅ/ sh u", | |
| "ใทใง/ sh e", | |
| "ใทใง/ sh o", | |
| "ใใฃ/ ch a", | |
| "ใใฅ/ ch u", | |
| "ใใง/ ch e", | |
| "ใใง/ ch o", | |
| "ใใฅ/ t u", | |
| "ใใฃ/ ty a", | |
| "ใใฅ/ ty u", | |
| "ใใง/ ty o", | |
| "ใใก/ d o a", | |
| "ใใฅ/ d u", | |
| "ใใฃ/ dy a", | |
| "ใใฅ/ dy u", | |
| "ใใง/ dy o", | |
| "ใใฉ/ d o:", | |
| "ใใฃ/ ny a", | |
| "ใใฅ/ ny u", | |
| "ใใง/ ny o", | |
| "ใใฃ/ hy a", | |
| "ใใฅ/ hy u", | |
| "ใใง/ hy o", | |
| "ใใฃ/ my a", | |
| "ใใฅ/ my u", | |
| "ใใง/ my o", | |
| "ใชใฃ/ ry a", | |
| "ใชใฅ/ ry u", | |
| "ใชใง/ ry o", | |
| "ใฎใฃ/ gy a", | |
| "ใฎใฅ/ gy u", | |
| "ใฎใง/ gy o", | |
| "ใใง/ j e", | |
| "ใใฃ/ j a", | |
| "ใใฅ/ j u", | |
| "ใใง/ j o", | |
| "ใธใง/ j e", | |
| "ใธใฃ/ j a", | |
| "ใธใฅ/ j u", | |
| "ใธใง/ j o", | |
| "ใใฃ/ by a", | |
| "ใใฅ/ by u", | |
| "ใใง/ by o", | |
| "ใใฃ/ py a", | |
| "ใใฅ/ py u", | |
| "ใใง/ py o", | |
| "ใฆใก/ u a", | |
| "ใฆใฃ/ w i", | |
| "ใฆใง/ w e", | |
| "ใฆใฉ/ w o", | |
| "ใใก/ f a", | |
| "ใใฃ/ f i", | |
| "ใใฅ/ f u", | |
| "ใใฃ/ hy a", | |
| "ใใฅ/ hy u", | |
| "ใใง/ hy o", | |
| "ใใง/ f e", | |
| "ใใฉ/ f o", | |
| "ใดใก/ b a", | |
| "ใดใฃ/ b i", | |
| "ใดใง/ b e", | |
| "ใดใฉ/ b o", | |
| "ใดใฅ/ by u", | |
| # Conversion of 1 letter | |
| "ใข/ a", | |
| "ใค/ i", | |
| "ใฆ/ u", | |
| "ใจ/ e", | |
| "ใช/ o", | |
| "ใซ/ k a", | |
| "ใญ/ k i", | |
| "ใฏ/ k u", | |
| "ใฑ/ k e", | |
| "ใณ/ k o", | |
| "ใต/ s a", | |
| "ใท/ sh i", | |
| "ใน/ s u", | |
| "ใป/ s e", | |
| "ใฝ/ s o", | |
| "ใฟ/ t a", | |
| "ใ/ ch i", | |
| "ใ/ ts u", | |
| "ใ/ t e", | |
| "ใ/ t o", | |
| "ใ/ n a", | |
| "ใ/ n i", | |
| "ใ/ n u", | |
| "ใ/ n e", | |
| "ใ/ n o", | |
| "ใ/ h a", | |
| "ใ/ h i", | |
| "ใ/ f u", | |
| "ใ/ h e", | |
| "ใ/ h o", | |
| "ใ/ m a", | |
| "ใ/ m i", | |
| "ใ / m u", | |
| "ใก/ m e", | |
| "ใข/ m o", | |
| "ใฉ/ r a", | |
| "ใช/ r i", | |
| "ใซ/ r u", | |
| "ใฌ/ r e", | |
| "ใญ/ r o", | |
| "ใฌ/ g a", | |
| "ใฎ/ g i", | |
| "ใฐ/ g u", | |
| "ใฒ/ g e", | |
| "ใด/ g o", | |
| "ใถ/ z a", | |
| "ใธ/ j i", | |
| "ใบ/ z u", | |
| "ใผ/ z e", | |
| "ใพ/ z o", | |
| "ใ/ d a", | |
| "ใ/ j i", | |
| "ใ / z u", | |
| "ใ/ d e", | |
| "ใ/ d o", | |
| "ใ/ b a", | |
| "ใ/ b i", | |
| "ใ/ b u", | |
| "ใ/ b e", | |
| "ใ/ b o", | |
| "ใ/ p a", | |
| "ใ/ p i", | |
| "ใ/ p u", | |
| "ใ/ p e", | |
| "ใ/ p o", | |
| "ใค/ y a", | |
| "ใฆ/ y u", | |
| "ใจ/ y o", | |
| "ใฏ/ w a", | |
| "ใฐ/ i", | |
| "ใฑ/ e", | |
| "ใฒ/ o", | |
| "ใณ/ N", | |
| "ใ/ q", | |
| "ใด/ b u", | |
| "ใผ/:", | |
| # Try converting broken text | |
| "ใก/ a", | |
| "ใฃ/ i", | |
| "ใฅ/ u", | |
| "ใง/ e", | |
| "ใฉ/ o", | |
| "ใฎ/ w a", | |
| "ใฉ/ o", | |
| # Try converting broken text | |
| "ใฃ/ y a", | |
| "ใง/ y o", | |
| "ใฅ/ y u", | |
| "็ฆ/ ch i", | |
| "ใถ/ k e", | |
| "้ซ/ t a k a", | |
| "็ / sh y a", | |
| # Symbols | |
| "ใ/ ,", | |
| "ใ/ .", | |
| "๏ผ/ !", | |
| "๏ผ/ ?", | |
| "ใป/ ,", | |
| ] | |
| _COLON_RX = re.compile(":+") | |
| _REJECT_RX = re.compile("[^ a-zA-Z:,.?]") | |
| def _makerulemap(): | |
| l = [tuple(x.split("/")) for x in _CONVRULES] | |
| return tuple({k: v for k, v in l if len(k) == i} for i in (1, 2)) | |
| _RULEMAP1, _RULEMAP2 = _makerulemap() | |
| def kata2phoneme(text: str) -> str: | |
| """Convert katakana text to phonemes.""" | |
| text = text.strip() | |
| res = [] | |
| while text: | |
| if len(text) >= 2: | |
| x = _RULEMAP2.get(text[:2]) | |
| if x is not None: | |
| text = text[2:] | |
| res += x.split(" ")[1:] | |
| continue | |
| x = _RULEMAP1.get(text[0]) | |
| if x is not None: | |
| text = text[1:] | |
| res += x.split(" ")[1:] | |
| continue | |
| res.append(text[0]) | |
| text = text[1:] | |
| # res = _COLON_RX.sub(":", res) | |
| return res | |
| _KATAKANA = "".join(chr(ch) for ch in range(ord("ใก"), ord("ใณ") + 1)) | |
| _HIRAGANA = "".join(chr(ch) for ch in range(ord("ใ"), ord("ใ") + 1)) | |
| _HIRA2KATATRANS = str.maketrans(_HIRAGANA, _KATAKANA) | |
| def hira2kata(text: str) -> str: | |
| text = text.translate(_HIRA2KATATRANS) | |
| return text.replace("ใใ", "ใด") | |
| _SYMBOL_TOKENS = set(list("ใปใใ๏ผ๏ผ")) | |
| _NO_YOMI_TOKENS = set(list("ใใใใโ๏ผ๏ผ๏ผป๏ผฝ[]")) | |
| _TAGGER = MeCab.Tagger() | |
| def text2kata(text: str) -> str: | |
| parsed = _TAGGER.parse(text) | |
| res = [] | |
| for line in parsed.split("\n"): | |
| if line == "EOS": | |
| break | |
| parts = line.split("\t") | |
| word, yomi = parts[0], parts[1] | |
| if yomi: | |
| try: | |
| res.append(yomi.split(',')[6]) | |
| except: | |
| import pdb; pdb.set_trace() | |
| else: | |
| if word in _SYMBOL_TOKENS: | |
| res.append(word) | |
| elif word in ("ใฃ", "ใ"): | |
| res.append("ใ") | |
| elif word in _NO_YOMI_TOKENS: | |
| pass | |
| else: | |
| res.append(word) | |
| return hira2kata("".join(res)) | |
| _ALPHASYMBOL_YOMI = { | |
| "#": "ใทใฃใผใ", | |
| "%": "ใใผใปใณใ", | |
| "&": "ใขใณใ", | |
| "+": "ใใฉใน", | |
| "-": "ใใคใใน", | |
| ":": "ใณใญใณ", | |
| ";": "ใปใใณใญใณ", | |
| "<": "ๅฐใชใ", | |
| "=": "ใคใณใผใซ", | |
| ">": "ๅคงใชใ", | |
| "@": "ใขใใ", | |
| "a": "ใจใผ", | |
| "b": "ใใผ", | |
| "c": "ใทใผ", | |
| "d": "ใใฃใผ", | |
| "e": "ใคใผ", | |
| "f": "ใจใ", | |
| "g": "ใธใผ", | |
| "h": "ใจใคใ", | |
| "i": "ใขใค", | |
| "j": "ใธใงใผ", | |
| "k": "ใฑใผ", | |
| "l": "ใจใซ", | |
| "m": "ใจใ ", | |
| "n": "ใจใ", | |
| "o": "ใชใผ", | |
| "p": "ใใผ", | |
| "q": "ใญใฅใผ", | |
| "r": "ใขใผใซ", | |
| "s": "ใจใน", | |
| "t": "ใใฃใผ", | |
| "u": "ใฆใผ", | |
| "v": "ใใค", | |
| "w": "ใใใชใฅใผ", | |
| "x": "ใจใใฏใน", | |
| "y": "ใฏใค", | |
| "z": "ใผใใ", | |
| "ฮฑ": "ใขใซใใก", | |
| "ฮฒ": "ใใผใฟ", | |
| "ฮณ": "ใฌใณใ", | |
| "ฮด": "ใใซใฟ", | |
| "ฮต": "ใคใใทใญใณ", | |
| "ฮถ": "ใผใผใฟ", | |
| "ฮท": "ใคใผใฟ", | |
| "ฮธ": "ใทใผใฟ", | |
| "ฮน": "ใคใชใฟ", | |
| "ฮบ": "ใซใใ", | |
| "ฮป": "ใฉใ ใ", | |
| "ฮผ": "ใใฅใผ", | |
| "ฮฝ": "ใใฅใผ", | |
| "ฮพ": "ใฏใตใค", | |
| "ฮฟ": "ใชใใฏใญใณ", | |
| "ฯ": "ใใค", | |
| "ฯ": "ใญใผ", | |
| "ฯ": "ใทใฐใ", | |
| "ฯ": "ใฟใฆ", | |
| "ฯ ": "ใฆใใทใญใณ", | |
| "ฯ": "ใใกใค", | |
| "ฯ": "ใซใค", | |
| "ฯ": "ใใตใค", | |
| "ฯ": "ใชใกใฌ", | |
| } | |
| _NUMBER_WITH_SEPARATOR_RX = re.compile("[0-9]{1,3}(,[0-9]{3})+") | |
| _CURRENCY_MAP = {"$": "ใใซ", "ยฅ": "ๅ", "ยฃ": "ใใณใ", "โฌ": "ใฆใผใญ"} | |
| _CURRENCY_RX = re.compile(r"([$ยฅยฃโฌ])([0-9.]*[0-9])") | |
| _NUMBER_RX = re.compile(r"[0-9]+(\.[0-9]+)?") | |
| def japanese_convert_numbers_to_words(text: str) -> str: | |
| res = _NUMBER_WITH_SEPARATOR_RX.sub(lambda m: m[0].replace(",", ""), text) | |
| res = _CURRENCY_RX.sub(lambda m: m[2] + _CURRENCY_MAP.get(m[1], m[1]), res) | |
| res = _NUMBER_RX.sub(lambda m: num2words(m[0], lang="ja"), res) | |
| return res | |
| def japanese_convert_alpha_symbols_to_words(text: str) -> str: | |
| return "".join([_ALPHASYMBOL_YOMI.get(ch, ch) for ch in text.lower()]) | |
| def japanese_text_to_phonemes(text: str) -> str: | |
| """Convert Japanese text to phonemes.""" | |
| res = unicodedata.normalize("NFKC", text) | |
| res = japanese_convert_numbers_to_words(res) | |
| res = japanese_convert_alpha_symbols_to_words(res) | |
| res = text2kata(res) | |
| res = kata2phoneme(res) | |
| return res | |
| def is_japanese_character(char): | |
| # ๅฎไนๆฅ่ฏญๆๅญ็ณป็ป็ Unicode ่ๅด | |
| japanese_ranges = [ | |
| (0x3040, 0x309F), # ๅนณๅๅ | |
| (0x30A0, 0x30FF), # ็ๅๅ | |
| (0x4E00, 0x9FFF), # ๆฑๅญ (CJK Unified Ideographs) | |
| (0x3400, 0x4DBF), # ๆฑๅญๆฉๅฑ A | |
| (0x20000, 0x2A6DF), # ๆฑๅญๆฉๅฑ B | |
| # ๅฏไปฅๆ นๆฎ้่ฆๆทปๅ ๅ ถไปๆฑๅญๆฉๅฑ่ๅด | |
| ] | |
| # ๅฐๅญ็ฌฆ็ Unicode ็ผ็ ่ฝฌๆขไธบๆดๆฐ | |
| char_code = ord(char) | |
| # ๆฃๆฅๅญ็ฌฆๆฏๅฆๅจไปปไฝไธไธชๆฅ่ฏญ่ๅดๅ | |
| for start, end in japanese_ranges: | |
| if start <= char_code <= end: | |
| return True | |
| return False | |
| rep_map = { | |
| "๏ผ": ",", | |
| "๏ผ": ",", | |
| "๏ผ": ",", | |
| "ใ": ".", | |
| "๏ผ": "!", | |
| "๏ผ": "?", | |
| "\n": ".", | |
| "ยท": ",", | |
| "ใ": ",", | |
| "...": "โฆ", | |
| } | |
| def replace_punctuation(text): | |
| pattern = re.compile("|".join(re.escape(p) for p in rep_map.keys())) | |
| replaced_text = pattern.sub(lambda x: rep_map[x.group()], text) | |
| replaced_text = re.sub( | |
| r"[^\u3040-\u309F\u30A0-\u30FF\u4E00-\u9FFF\u3400-\u4DBF" | |
| + "".join(punctuation) | |
| + r"]+", | |
| "", | |
| replaced_text, | |
| ) | |
| return replaced_text | |
| from pykakasi import kakasi | |
| # Initialize kakasi object | |
| kakasi = kakasi() | |
| # Set options for converting Chinese characters to Katakana | |
| kakasi.setMode("J", "K") # Chinese to Katakana | |
| kakasi.setMode("H", "K") # Hiragana to Katakana | |
| # Convert Chinese characters to Katakana | |
| conv = kakasi.getConverter() | |
| def text_normalize(text): | |
| res = unicodedata.normalize("NFKC", text) | |
| res = japanese_convert_numbers_to_words(res) | |
| res = "".join([i for i in res if is_japanese_character(i)]) | |
| res = replace_punctuation(res) | |
| res = conv.do(res) | |
| return res | |
| def distribute_phone(n_phone, n_word): | |
| phones_per_word = [0] * n_word | |
| for task in range(n_phone): | |
| min_tasks = min(phones_per_word) | |
| min_index = phones_per_word.index(min_tasks) | |
| phones_per_word[min_index] += 1 | |
| return phones_per_word | |
| # tokenizer = AutoTokenizer.from_pretrained('cl-tohoku/bert-base-japanese-v3') | |
| model_id = 'cl-tohoku/bert-base-japanese-v3' | |
| tokenizer = AutoTokenizer.from_pretrained(model_id) | |
| def g2p(norm_text): | |
| tokenized = tokenizer.tokenize(norm_text) | |
| phs = [] | |
| ph_groups = [] | |
| for t in tokenized: | |
| if not t.startswith("#"): | |
| ph_groups.append([t]) | |
| else: | |
| ph_groups[-1].append(t.replace("#", "")) | |
| word2ph = [] | |
| for group in ph_groups: | |
| text = "" | |
| for ch in group: | |
| text += ch | |
| if text == '[UNK]': | |
| phs += ['_'] | |
| word2ph += [1] | |
| continue | |
| elif text in punctuation: | |
| phs += [text] | |
| word2ph += [1] | |
| continue | |
| # import pdb; pdb.set_trace() | |
| # phonemes = japanese_text_to_phonemes(text) | |
| phonemes = kata2phoneme(text) | |
| # phonemes = [i for i in phonemes if i in symbols] | |
| for i in phonemes: | |
| assert i in symbols, (group, norm_text, tokenized, i) | |
| phone_len = len(phonemes) | |
| word_len = len(group) | |
| aaa = distribute_phone(phone_len, word_len) | |
| assert len(aaa) == word_len | |
| word2ph += aaa | |
| phs += phonemes | |
| phones = ["_"] + phs + ["_"] | |
| tones = [0 for i in phones] | |
| word2ph = [1] + word2ph + [1] | |
| assert len(word2ph) == len(tokenized) + 2 | |
| return phones, tones, word2ph | |
| def get_bert_feature(text, word2ph, device): | |
| from text import japanese_bert | |
| return japanese_bert.get_bert_feature(text, word2ph, device=device) | |
| if __name__ == "__main__": | |
| # tokenizer = AutoTokenizer.from_pretrained("./bert/bert-base-japanese-v3") | |
| text = "ใใใซใกใฏใไธ็๏ผ..." | |
| text = 'ใใใๅใฏใใใชใจ็ณใใพใใใใกใใฎๅฐใใใใในใฏๆๅญใใๆจๆถใ้ ใใฆใใพใใใฟใพใใใใใชใใฎๅใฏ?' | |
| text = 'ใใฎใใๅไปฅๅคใฎใฟใใชใฏใๅ จๅก็ใใฆใใใจ?' | |
| from text.japanese_bert import get_bert_feature | |
| text = text_normalize(text) | |
| print(text) | |
| phones, tones, word2ph = g2p(text) | |
| bert = get_bert_feature(text, word2ph) | |
| print(phones, tones, word2ph, bert.shape) | |
| # if __name__ == '__main__': | |
| # from pykakasi import kakasi | |
| # # Initialize kakasi object | |
| # kakasi = kakasi() | |
| # # Set options for converting Chinese characters to Katakana | |
| # kakasi.setMode("J", "H") # Chinese to Katakana | |
| # kakasi.setMode("K", "H") # Hiragana to Katakana | |
| # # Convert Chinese characters to Katakana | |
| # conv = kakasi.getConverter() | |
| # katakana_text = conv.do('ใใใๅใฏใใใชใจ็ณใใพใใใใกใใฎๅฐใใใใในใฏๆๅญใใๆจๆถใ้ ใใฆใใพใใใฟใพใใใใใชใใฎๅใฏ?') # Replace with your Chinese text | |
| # print(katakana_text) # Output: ใใผใใชใปใซใค |