Spaces:
Sleeping
Sleeping
File size: 1,527 Bytes
e62fb95 |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 |
punctuation = ["!", "?", "…", ",", ".", "'", "-"]
pu_symbols = punctuation + ["SP", "UNK"]
pad = "_"
# waitau
waitau_symbols = [
"a",
"ai",
"ak",
"am",
"an",
"ang",
"ap",
"at",
"au",
"äi",
"äk",
"äm",
"än",
"äng",
"äp",
"ät",
"äu",
"b",
"c",
"d",
"e",
"ei",
"ek",
"eng",
"öi",
"eon",
"öt",
"eu",
"em",
"en",
"ep",
"et",
"f",
"g",
"gw",
"h",
"i",
"ik",
"im",
"in",
"ing",
"ip",
"it",
"iu",
"y",
"k",
"kw",
"l",
"m",
"n",
"ng",
"o",
"ö",
"ök",
"öng",
"oi",
"ok",
"on",
"ong",
"ot",
"ou",
"p",
"s",
"t",
"u",
"ui",
"uk",
"un",
"ung",
"ut",
"w",
"ü",
"ün",
"üt",
"oing",
"z",
"ä",
"äing",
"æ",
"æing",
"æk",
"æng",
"ön",
"ük",
"üng",
]
num_waitau_tones = 7
# combine all symbols
normal_symbols = sorted(set(waitau_symbols))
symbols = [pad] + normal_symbols + pu_symbols
symbols = symbols + sorted((set(waitau_symbols) - set(symbols)))
sil_phonemes_ids = [symbols.index(i) for i in pu_symbols]
# combine all tones
num_tones = num_waitau_tones
# language maps
language_id_map = {"WAITAU": 0}
num_languages = len(language_id_map.keys())
language_tone_start_map = {
"WAITAU": 0,
}
if __name__ == "__main__":
a = set(waitau_symbols)
print(sorted(a))
|