# Punctuations PUNCTUATIONS = ["!", "?", "…", ",", ".", "'", "-"] # Punctuations and special tokens PUNCTUATION_SYMBOLS = PUNCTUATIONS + ["SP", "UNK"] # Padding PAD = "_" # Chinese symbols ZH_SYMBOLS = [ "E", "En", "a", "ai", "an", "ang", "ao", "b", "c", "ch", "d", "e", "ei", "en", "eng", "er", "f", "g", "h", "i", "i0", "ia", "ian", "iang", "iao", "ie", "in", "ing", "iong", "ir", "iu", "j", "k", "l", "m", "n", "o", "ong", "ou", "p", "q", "r", "s", "sh", "t", "u", "ua", "uai", "uan", "uang", "ui", "un", "uo", "v", "van", "ve", "vn", "w", "x", "y", "z", "zh", "AA", "EE", "OO", ] NUM_ZH_TONES = 6 # Japanese JP_SYMBOLS = [ "N", "a", "a:", "b", "by", "ch", "d", "dy", "e", "e:", "f", "g", "gy", "h", "hy", "i", "i:", "j", "k", "ky", "m", "my", "n", "ny", "o", "o:", "p", "py", "q", "r", "ry", "s", "sh", "t", "ts", "ty", "u", "u:", "w", "y", "z", "zy", ] NUM_JP_TONES = 2 # English EN_SYMBOLS = [ "aa", "ae", "ah", "ao", "aw", "ay", "b", "ch", "d", "dh", "eh", "er", "ey", "f", "g", "hh", "ih", "iy", "jh", "k", "l", "m", "n", "ng", "ow", "oy", "p", "r", "s", "sh", "t", "th", "uh", "uw", "V", "w", "y", "z", "zh", ] NUM_EN_TONES = 4 # Combine all symbols NORMAL_SYMBOLS = sorted(set(ZH_SYMBOLS + JP_SYMBOLS + EN_SYMBOLS)) SYMBOLS = [PAD] + NORMAL_SYMBOLS + PUNCTUATION_SYMBOLS SIL_PHONEMES_IDS = [SYMBOLS.index(i) for i in PUNCTUATION_SYMBOLS] # Combine all tones NUM_TONES = NUM_ZH_TONES + NUM_JP_TONES + NUM_EN_TONES # Language maps LANGUAGE_ID_MAP = {"ZH": 0, "JP": 1, "EN": 2} NUM_LANGUAGES = len(LANGUAGE_ID_MAP.keys()) # Language tone start map LANGUAGE_TONE_START_MAP = { "ZH": 0, "JP": NUM_ZH_TONES, "EN": NUM_ZH_TONES + NUM_JP_TONES, } if __name__ == "__main__": a = set(ZH_SYMBOLS) b = set(EN_SYMBOLS) print(sorted(a & b))