Spaces:
Runtime error
Runtime error
| # Copyright (c) 2024 Amphion. | |
| # | |
| # This source code is licensed under the MIT license found in the | |
| # LICENSE file in the root directory of this source tree. | |
| import re | |
| import jieba | |
| import cn2an | |
| from pypinyin import lazy_pinyin, BOPOMOFO | |
| from typing import List | |
| from diffrhythm.g2p.g2p.chinese_model_g2p import BertPolyPredict | |
| from diffrhythm.g2p.utils.front_utils import * | |
| import os | |
| from huggingface_hub import hf_hub_download | |
| # from g2pw import G2PWConverter | |
| # set blank level, {0οΌ"none",1:"char", 2:"word"} | |
| BLANK_LEVEL = 0 | |
| # conv = G2PWConverter(style='pinyin', enable_non_tradional_chinese=True) | |
| resource_path = r"./diffrhythm/g2p" | |
| poly_all_class_path = os.path.join( | |
| resource_path, "sources", "g2p_chinese_model", "polychar.txt" | |
| ) | |
| if not os.path.exists(poly_all_class_path): | |
| print( | |
| "Incorrect path for polyphonic character class dictionary: {}, please check...".format( | |
| poly_all_class_path | |
| ) | |
| ) | |
| exit() | |
| poly_dict = generate_poly_lexicon(poly_all_class_path) | |
| # Set up G2PW model parameters | |
| g2pw_poly_model_path = os.path.join(resource_path, "sources", "g2p_chinese_model") | |
| if not os.path.exists(g2pw_poly_model_path): | |
| print( | |
| "Incorrect path for g2pw polyphonic character model: {}, please check...".format( | |
| g2pw_poly_model_path | |
| ) | |
| ) | |
| exit() | |
| json_file_path = os.path.join( | |
| resource_path, "sources", "g2p_chinese_model", "polydict.json" | |
| ) | |
| if not os.path.exists(json_file_path): | |
| print( | |
| "Incorrect path for g2pw id to pinyin dictionary: {}, please check...".format( | |
| json_file_path | |
| ) | |
| ) | |
| exit() | |
| jsonr_file_path = os.path.join( | |
| resource_path, "sources", "g2p_chinese_model", "polydict_r.json" | |
| ) | |
| if not os.path.exists(jsonr_file_path): | |
| print( | |
| "Incorrect path for g2pw pinyin to id dictionary: {}, please check...".format( | |
| jsonr_file_path | |
| ) | |
| ) | |
| exit() | |
| g2pw_poly_predict = BertPolyPredict( | |
| g2pw_poly_model_path, jsonr_file_path, json_file_path | |
| ) | |
| """ | |
| Text clean time | |
| """ | |
| # List of (Latin alphabet, bopomofo) pairs: | |
| _latin_to_bopomofo = [ | |
| (re.compile("%s" % x[0], re.IGNORECASE), x[1]) | |
| for x in [ | |
| ("a", "γΛ"), | |
| ("b", "γ γ§Λ"), | |
| ("c", "γγ§Λ"), | |
| ("d", "γγ§Λ"), | |
| ("e", "γ§Λ"), | |
| ("f", "γΛγγ¨Λ"), | |
| ("g", "γγ§Λ"), | |
| ("h", "γΛγγ©Λ"), | |
| ("i", "γΛ"), | |
| ("j", "γγΛ"), | |
| ("k", "γγΛ"), | |
| ("l", "γΛγΛ"), | |
| ("m", "γΛγγ¨Λ"), | |
| ("n", "γ£Λ"), | |
| ("o", "γ‘Λ"), | |
| ("p", "γγ§Λ"), | |
| ("q", "γγ§γ‘Λ"), | |
| ("r", "γΛ"), | |
| ("s", "γΛγΛ"), | |
| ("t", "γγ§Λ"), | |
| ("u", "γ§γ‘Λ"), | |
| ("v", "γ¨γ§Λ"), | |
| ("w", "γγΛγ γ¨Λγγ§γ‘Λ"), | |
| ("x", "γΛγγ¨ΛγΛ"), | |
| ("y", "γ¨γΛ"), | |
| ("z", "γγΛ"), | |
| ] | |
| ] | |
| # List of (bopomofo, ipa) pairs: | |
| _bopomofo_to_ipa = [ | |
| (re.compile("%s" % x[0]), x[1]) | |
| for x in [ | |
| ("γ γ", "pβΌwo"), | |
| ("γγ", "pΚ°wo"), | |
| ("γγ", "mwo"), | |
| ("γγ", "fwo"), | |
| ("γ§γ’", "|jΙn"), | |
| ("γ©γ’", "|Ι₯Γ¦n"), | |
| ("γ§γ£", "|in"), | |
| ("γ©γ£", "|Ι₯n"), | |
| ("γ§γ₯", "|iΕ"), | |
| ("γ¨γ₯", "|ΚΕ"), | |
| ("γ©γ₯", "|jΚΕ"), | |
| # Add | |
| ("γ§γ", "|ia"), | |
| ("γ§γ", "|iΙ"), | |
| ("γ§γ ", "|iΙΚ"), | |
| ("γ§γ‘", "|ioΚ"), | |
| ("γ§γ€", "|iΙΕ"), | |
| ("γ¨γ", "|ua"), | |
| ("γ¨γ", "|uo"), | |
| ("γ¨γ", "|uaΙͺ"), | |
| ("γ¨γ", "|ueΙͺ"), | |
| ("γ¨γ’", "|uan"), | |
| ("γ¨γ£", "|uΙn"), | |
| ("γ¨γ€", "|uΙΕ"), | |
| ("γ©γ", "|Ι₯Ι"), | |
| # End | |
| ("γ ", "pβΌ"), | |
| ("γ", "pΚ°"), | |
| ("γ", "m"), | |
| ("γ", "f"), | |
| ("γ", "tβΌ"), | |
| ("γ", "tΚ°"), | |
| ("γ", "n"), | |
| ("γ", "l"), | |
| ("γ", "kβΌ"), | |
| ("γ", "kΚ°"), | |
| ("γ", "x"), | |
| ("γ", "tΚβΌ"), | |
| ("γ", "tΚΚ°"), | |
| ("γ", "Κ"), | |
| ("γ", "ts`βΌ"), | |
| ("γ", "ts`Κ°"), | |
| ("γ", "s`"), | |
| ("γ", "ΙΉ`"), | |
| ("γ", "tsβΌ"), | |
| ("γ", "tsΚ°"), | |
| ("γ", "|s"), | |
| ("γ", "|a"), | |
| ("γ", "|o"), | |
| ("γ", "|Ι"), | |
| ("γ", "|Ι"), | |
| ("γ", "|aΙͺ"), | |
| ("γ", "|eΙͺ"), | |
| ("γ ", "|ΙΚ"), | |
| ("γ‘", "|oΚ"), | |
| ("γ’", "|an"), | |
| ("γ£", "|Ιn"), | |
| ("γ€", "|ΙΕ"), | |
| ("γ₯", "|ΙΕ"), | |
| ("γ¦", "ΙΙΉ"), | |
| ("γ§", "|i"), | |
| ("γ¨", "|u"), | |
| ("γ©", "|Ι₯"), | |
| ("Λ", "β|"), | |
| ("Λ", "β|"), | |
| ("Λ", "ββ|"), | |
| ("Λ", "β|"), | |
| ("Λ", "|"), | |
| ] | |
| ] | |
| must_not_er_words = {"ε₯³εΏ", "θεΏ", "η·εΏ", "ε°εΏ", "ε°εΏ"} | |
| chinese_lexicon_path = hf_hub_download( | |
| repo_id="ASLP-lab/DiffRhythm", | |
| filename="chinese_lexicon.txt" | |
| ) | |
| word_pinyin_dict = {} | |
| with open(chinese_lexicon_path, "r", encoding="utf-8") as fread: | |
| txt_list = fread.readlines() | |
| for txt in txt_list: | |
| word, pinyin = txt.strip().split("\t") | |
| word_pinyin_dict[word] = pinyin | |
| fread.close() | |
| pinyin_2_bopomofo_dict = {} | |
| with open( | |
| r"./diffrhythm/g2p/sources/pinyin_2_bpmf.txt", "r", encoding="utf-8" | |
| ) as fread: | |
| txt_list = fread.readlines() | |
| for txt in txt_list: | |
| pinyin, bopomofo = txt.strip().split("\t") | |
| pinyin_2_bopomofo_dict[pinyin] = bopomofo | |
| fread.close() | |
| tone_dict = { | |
| "0": "Λ", | |
| "5": "Λ", | |
| "1": "", | |
| "2": "Λ", | |
| "3": "Λ", | |
| "4": "Λ", | |
| } | |
| bopomofos2pinyin_dict = {} | |
| with open( | |
| r"./diffrhythm/g2p/sources/bpmf_2_pinyin.txt", "r", encoding="utf-8" | |
| ) as fread: | |
| txt_list = fread.readlines() | |
| for txt in txt_list: | |
| v, k = txt.strip().split("\t") | |
| bopomofos2pinyin_dict[k] = v | |
| fread.close() | |
| def bpmf_to_pinyin(text): | |
| bopomofo_list = text.split("|") | |
| pinyin_list = [] | |
| for info in bopomofo_list: | |
| pinyin = "" | |
| for c in info: | |
| if c in bopomofos2pinyin_dict: | |
| pinyin += bopomofos2pinyin_dict[c] | |
| if len(pinyin) == 0: | |
| continue | |
| if pinyin[-1] not in "01234": | |
| pinyin += "1" | |
| if pinyin[:-1] == "ve": | |
| pinyin = "y" + pinyin | |
| if pinyin[:-1] == "sh": | |
| pinyin = pinyin[:-1] + "i" + pinyin[-1] | |
| if pinyin == "sh": | |
| pinyin = pinyin[:-1] + "i" | |
| if pinyin[:-1] == "s": | |
| pinyin = "si" + pinyin[-1] | |
| if pinyin[:-1] == "c": | |
| pinyin = "ci" + pinyin[-1] | |
| if pinyin[:-1] == "i": | |
| pinyin = "yi" + pinyin[-1] | |
| if pinyin[:-1] == "iou": | |
| pinyin = "you" + pinyin[-1] | |
| if pinyin[:-1] == "ien": | |
| pinyin = "yin" + pinyin[-1] | |
| if "iou" in pinyin and pinyin[-4:-1] == "iou": | |
| pinyin = pinyin[:-4] + "iu" + pinyin[-1] | |
| if "uei" in pinyin: | |
| if pinyin[:-1] == "uei": | |
| pinyin = "wei" + pinyin[-1] | |
| elif pinyin[-4:-1] == "uei": | |
| pinyin = pinyin[:-4] + "ui" + pinyin[-1] | |
| if "uen" in pinyin and pinyin[-4:-1] == "uen": | |
| if pinyin[:-1] == "uen": | |
| pinyin = "wen" + pinyin[-1] | |
| elif pinyin[-4:-1] == "uei": | |
| pinyin = pinyin[:-4] + "un" + pinyin[-1] | |
| if "van" in pinyin and pinyin[-4:-1] == "van": | |
| if pinyin[:-1] == "van": | |
| pinyin = "yuan" + pinyin[-1] | |
| elif pinyin[-4:-1] == "van": | |
| pinyin = pinyin[:-4] + "uan" + pinyin[-1] | |
| if "ueng" in pinyin and pinyin[-5:-1] == "ueng": | |
| pinyin = pinyin[:-5] + "ong" + pinyin[-1] | |
| if pinyin[:-1] == "veng": | |
| pinyin = "yong" + pinyin[-1] | |
| if "veng" in pinyin and pinyin[-5:-1] == "veng": | |
| pinyin = pinyin[:-5] + "iong" + pinyin[-1] | |
| if pinyin[:-1] == "ieng": | |
| pinyin = "ying" + pinyin[-1] | |
| if pinyin[:-1] == "u": | |
| pinyin = "wu" + pinyin[-1] | |
| if pinyin[:-1] == "v": | |
| pinyin = "yv" + pinyin[-1] | |
| if pinyin[:-1] == "ing": | |
| pinyin = "ying" + pinyin[-1] | |
| if pinyin[:-1] == "z": | |
| pinyin = "zi" + pinyin[-1] | |
| if pinyin[:-1] == "zh": | |
| pinyin = "zhi" + pinyin[-1] | |
| if pinyin[0] == "u": | |
| pinyin = "w" + pinyin[1:] | |
| if pinyin[0] == "i": | |
| pinyin = "y" + pinyin[1:] | |
| pinyin = pinyin.replace("ien", "in") | |
| pinyin_list.append(pinyin) | |
| return " ".join(pinyin_list) | |
| # Convert numbers to Chinese pronunciation | |
| def number_to_chinese(text): | |
| # numbers = re.findall(r'\d+(?:\.?\d+)?', text) | |
| # for number in numbers: | |
| # text = text.replace(number, cn2an.an2cn(number), 1) | |
| text = cn2an.transform(text, "an2cn") | |
| return text | |
| def normalization(text): | |
| text = text.replace("οΌ", ",") | |
| text = text.replace("γ", ".") | |
| text = text.replace("οΌ", "!") | |
| text = text.replace("οΌ", "?") | |
| text = text.replace("οΌ", ";") | |
| text = text.replace("οΌ", ":") | |
| text = text.replace("γ", ",") | |
| text = text.replace("β", "'") | |
| text = text.replace("β", "'") | |
| text = text.replace("β―", "β¦") | |
| text = text.replace("Β·Β·Β·", "β¦") | |
| text = text.replace("γ»γ»γ»", "β¦") | |
| text = text.replace("...", "β¦") | |
| text = re.sub(r"\s+", "", text) | |
| text = re.sub(r"[^\u4e00-\u9fff\s_,\.\?!;:\'β¦]", "", text) | |
| text = re.sub(r"\s*([,\.\?!;:\'β¦])\s*", r"\1", text) | |
| return text | |
| def change_tone(bopomofo: str, tone: str) -> str: | |
| if bopomofo[-1] not in "ΛΛΛΛ": | |
| bopomofo = bopomofo + tone | |
| else: | |
| bopomofo = bopomofo[:-1] + tone | |
| return bopomofo | |
| def er_sandhi(word: str, bopomofos: List[str]) -> List[str]: | |
| if len(word) > 1 and word[-1] == "εΏ" and word not in must_not_er_words: | |
| bopomofos[-1] = change_tone(bopomofos[-1], "Λ") | |
| return bopomofos | |
| def bu_sandhi(word: str, bopomofos: List[str]) -> List[str]: | |
| valid_char = set(word) | |
| if len(valid_char) == 1 and "δΈ" in valid_char: | |
| pass | |
| elif word in ["δΈε"]: | |
| pass | |
| elif len(word) == 3 and word[1] == "δΈ" and bopomofos[1][:-1] == "γ γ¨": | |
| bopomofos[1] = bopomofos[1][:-1] + "Λ" | |
| else: | |
| for i, char in enumerate(word): | |
| if ( | |
| i + 1 < len(bopomofos) | |
| and char == "δΈ" | |
| and i + 1 < len(word) | |
| and 0 < len(bopomofos[i + 1]) | |
| and bopomofos[i + 1][-1] == "Λ" | |
| ): | |
| bopomofos[i] = bopomofos[i][:-1] + "Λ" | |
| return bopomofos | |
| def yi_sandhi(word: str, bopomofos: List[str]) -> List[str]: | |
| punc = "οΌοΌοΌγοΌοΌββββ':,;.?!()οΌοΌ{}γγ[]-~`γ " | |
| if word.find("δΈ") != -1 and any( | |
| [item.isnumeric() for item in word if item != "δΈ"] | |
| ): | |
| for i in range(len(word)): | |
| if ( | |
| i == 0 | |
| and word[0] == "δΈ" | |
| and len(word) > 1 | |
| and word[1] | |
| not in [ | |
| "ιΆ", | |
| "δΈ", | |
| "δΊ", | |
| "δΈ", | |
| "ε", | |
| "δΊ", | |
| "ε ", | |
| "δΈ", | |
| "ε «", | |
| "δΉ", | |
| "ε", | |
| ] | |
| ): | |
| if len(bopomofos[0]) > 0 and bopomofos[1][-1] in ["Λ", "Λ"]: | |
| bopomofos[0] = change_tone(bopomofos[0], "Λ") | |
| else: | |
| bopomofos[0] = change_tone(bopomofos[0], "Λ") | |
| elif word[i] == "δΈ": | |
| bopomofos[i] = change_tone(bopomofos[i], "") | |
| return bopomofos | |
| elif len(word) == 3 and word[1] == "δΈ" and word[0] == word[-1]: | |
| bopomofos[1] = change_tone(bopomofos[1], "Λ") | |
| elif word.startswith("第δΈ"): | |
| bopomofos[1] = change_tone(bopomofos[1], "") | |
| elif word.startswith("δΈζ") or word.startswith("δΈζ₯") or word.startswith("δΈε·"): | |
| bopomofos[0] = change_tone(bopomofos[0], "") | |
| else: | |
| for i, char in enumerate(word): | |
| if char == "δΈ" and i + 1 < len(word): | |
| if ( | |
| len(bopomofos) > i + 1 | |
| and len(bopomofos[i + 1]) > 0 | |
| and bopomofos[i + 1][-1] in {"Λ"} | |
| ): | |
| bopomofos[i] = change_tone(bopomofos[i], "Λ") | |
| else: | |
| if word[i + 1] not in punc: | |
| bopomofos[i] = change_tone(bopomofos[i], "Λ") | |
| else: | |
| pass | |
| return bopomofos | |
| def merge_bu(seg: List) -> List: | |
| new_seg = [] | |
| last_word = "" | |
| for word in seg: | |
| if word != "δΈ": | |
| if last_word == "δΈ": | |
| word = last_word + word | |
| new_seg.append(word) | |
| last_word = word | |
| return new_seg | |
| def merge_er(seg: List) -> List: | |
| new_seg = [] | |
| for i, word in enumerate(seg): | |
| if i - 1 >= 0 and word == "εΏ": | |
| new_seg[-1] = new_seg[-1] + seg[i] | |
| else: | |
| new_seg.append(word) | |
| return new_seg | |
| def merge_yi(seg: List) -> List: | |
| new_seg = [] | |
| # function 1 | |
| for i, word in enumerate(seg): | |
| if ( | |
| i - 1 >= 0 | |
| and word == "δΈ" | |
| and i + 1 < len(seg) | |
| and seg[i - 1] == seg[i + 1] | |
| ): | |
| if i - 1 < len(new_seg): | |
| new_seg[i - 1] = new_seg[i - 1] + "δΈ" + new_seg[i - 1] | |
| else: | |
| new_seg.append(word) | |
| new_seg.append(seg[i + 1]) | |
| else: | |
| if i - 2 >= 0 and seg[i - 1] == "δΈ" and seg[i - 2] == word: | |
| continue | |
| else: | |
| new_seg.append(word) | |
| seg = new_seg | |
| new_seg = [] | |
| isnumeric_flag = False | |
| for i, word in enumerate(seg): | |
| if all([item.isnumeric() for item in word]) and not isnumeric_flag: | |
| isnumeric_flag = True | |
| new_seg.append(word) | |
| else: | |
| new_seg.append(word) | |
| seg = new_seg | |
| new_seg = [] | |
| # function 2 | |
| for i, word in enumerate(seg): | |
| if new_seg and new_seg[-1] == "δΈ": | |
| new_seg[-1] = new_seg[-1] + word | |
| else: | |
| new_seg.append(word) | |
| return new_seg | |
| # Word Segmentation, and convert Chinese pronunciation to pinyin (bopomofo) | |
| def chinese_to_bopomofo(text_short, sentence): | |
| # bopomofos = conv(text_short) | |
| words = jieba.lcut(text_short, cut_all=False) | |
| words = merge_yi(words) | |
| words = merge_bu(words) | |
| words = merge_er(words) | |
| text = "" | |
| char_index = 0 | |
| for word in words: | |
| bopomofos = [] | |
| if word in word_pinyin_dict and word not in poly_dict: | |
| pinyin = word_pinyin_dict[word] | |
| for py in pinyin.split(" "): | |
| if py[:-1] in pinyin_2_bopomofo_dict and py[-1] in tone_dict: | |
| bopomofos.append( | |
| pinyin_2_bopomofo_dict[py[:-1]] + tone_dict[py[-1]] | |
| ) | |
| if BLANK_LEVEL == 1: | |
| bopomofos.append("_") | |
| else: | |
| bopomofos_lazy = lazy_pinyin(word, BOPOMOFO) | |
| bopomofos += bopomofos_lazy | |
| if BLANK_LEVEL == 1: | |
| bopomofos.append("_") | |
| else: | |
| for i in range(len(word)): | |
| c = word[i] | |
| if c in poly_dict: | |
| poly_pinyin = g2pw_poly_predict.predict_process( | |
| [text_short, char_index + i] | |
| )[0] | |
| py = poly_pinyin[2:-1] | |
| bopomofos.append( | |
| pinyin_2_bopomofo_dict[py[:-1]] + tone_dict[py[-1]] | |
| ) | |
| if BLANK_LEVEL == 1: | |
| bopomofos.append("_") | |
| elif c in word_pinyin_dict: | |
| py = word_pinyin_dict[c] | |
| bopomofos.append( | |
| pinyin_2_bopomofo_dict[py[:-1]] + tone_dict[py[-1]] | |
| ) | |
| if BLANK_LEVEL == 1: | |
| bopomofos.append("_") | |
| else: | |
| bopomofos.append(c) | |
| if BLANK_LEVEL == 1: | |
| bopomofos.append("_") | |
| if BLANK_LEVEL == 2: | |
| bopomofos.append("_") | |
| char_index += len(word) | |
| if ( | |
| len(word) == 3 | |
| and bopomofos[0][-1] == "Λ" | |
| and bopomofos[1][-1] == "Λ" | |
| and bopomofos[-1][-1] == "Λ" | |
| ): | |
| bopomofos[0] = bopomofos[0] + "Λ" | |
| bopomofos[1] = bopomofos[1] + "Λ" | |
| if len(word) == 2 and bopomofos[0][-1] == "Λ" and bopomofos[-1][-1] == "Λ": | |
| bopomofos[0] = bopomofos[0][:-1] + "Λ" | |
| bopomofos = bu_sandhi(word, bopomofos) | |
| bopomofos = yi_sandhi(word, bopomofos) | |
| bopomofos = er_sandhi(word, bopomofos) | |
| if not re.search("[\u4e00-\u9fff]", word): | |
| text += "|" + word | |
| continue | |
| for i in range(len(bopomofos)): | |
| bopomofos[i] = re.sub(r"([\u3105-\u3129])$", r"\1Λ", bopomofos[i]) | |
| if text != "": | |
| text += "|" | |
| text += "|".join(bopomofos) | |
| return text | |
| # Convert latin pronunciation to pinyin (bopomofo) | |
| def latin_to_bopomofo(text): | |
| for regex, replacement in _latin_to_bopomofo: | |
| text = re.sub(regex, replacement, text) | |
| return text | |
| # Convert pinyin (bopomofo) to IPA | |
| def bopomofo_to_ipa(text): | |
| for regex, replacement in _bopomofo_to_ipa: | |
| text = re.sub(regex, replacement, text) | |
| return text | |
| def _chinese_to_ipa(text, sentence): | |
| text = number_to_chinese(text.strip()) | |
| text = normalization(text) | |
| text = chinese_to_bopomofo(text, sentence) | |
| # pinyin = bpmf_to_pinyin(text) | |
| text = latin_to_bopomofo(text) | |
| text = bopomofo_to_ipa(text) | |
| text = re.sub("([sΙΉ]`[βΌΚ°]?)([βββ ]+|$)", r"\1ΙΉ\2", text) | |
| text = re.sub("([s][βΌΚ°]?)([βββ ]+|$)", r"\1ΙΉ\2", text) | |
| text = re.sub(r"^\||[^\w\s_,\.\?!;:\'β¦\|ββββΌΚ°`]", "", text) | |
| text = re.sub(r"([,\.\?!;:\'β¦])", r"|\1|", text) | |
| text = re.sub(r"\|+", "|", text) | |
| text = text.rstrip("|") | |
| return text | |
| # Convert Chinese to IPA | |
| def chinese_to_ipa(text, sentence, text_tokenizer): | |
| # phonemes = text_tokenizer(text.strip()) | |
| if type(text) == str: | |
| return _chinese_to_ipa(text, sentence) | |
| else: | |
| result_ph = [] | |
| for t in text: | |
| result_ph.append(_chinese_to_ipa(t, sentence)) | |
| return result_ph | |