e2-tts-hakka-test-concat / tokenizer.json
txya900619's picture
Upload tokenizer.json with huggingface_hub
ef7fa2b verified
raw
history blame
15.2 kB
{
"version": "1.0",
"truncation": null,
"padding": {
"strategy": "BatchLongest",
"direction": "Right",
"pad_to_multiple_of": null,
"pad_id": 0,
"pad_type_id": 0,
"pad_token": "<pad>"
},
"added_tokens": [
{
"id": 0,
"content": "<pad>",
"single_word": false,
"lstrip": false,
"rstrip": false,
"normalized": false,
"special": true
},
{
"id": 1,
"content": "<sil>",
"single_word": false,
"lstrip": false,
"rstrip": false,
"normalized": false,
"special": true
}
],
"normalizer": null,
"pre_tokenizer": null,
"post_processor": null,
"decoder": null,
"model": {
"type": "BPE",
"dropout": null,
"unk_token": null,
"continuing_subword_prefix": null,
"end_of_word_suffix": null,
"fuse_unk": false,
"byte_fallback": false,
"ignore_merges": false,
"vocab": {
"<pad>": 0,
"<sil>": 1,
" ": 2,
"1": 3,
"2": 4,
"3": 5,
"4": 6,
"5": 7,
"_": 8,
"a": 9,
"b": 10,
"d": 11,
"e": 12,
"f": 13,
"h": 14,
"i": 15,
"j": 16,
"k": 17,
"l": 18,
"m": 19,
"n": 20,
"o": 21,
"p": 22,
"s": 23,
"t": 24,
"u": 25,
"v": 26,
"w": 27,
"z": 28,
"æ": 29,
"ð": 30,
"ŋ": 31,
"ɑ": 32,
"ɔ": 33,
"ɕ": 34,
"ə": 35,
"ɚ": 36,
"ɛ": 37,
"ɡ": 38,
"ɨ": 39,
"ɪ": 40,
"ɹ": 41,
"ʃ": 42,
"ʊ": 43,
"ʌ": 44,
"ʒ": 45,
"ʰ": 46,
"̩": 47,
"͡": 48,
"θ": 49,
"_5": 50,
"_55": 51,
"_2": 52,
"_24": 53,
"11": 54,
"_11": 55,
"31": 56,
"_31": 57,
"_55 ": 58,
"t͡": 59,
"_24 ": 60,
"t͡s": 61,
"_11 ": 62,
"i_55": 63,
"_31 ": 64,
"tʰ": 65,
"t_2": 66,
"oŋ": 67,
"i_24": 68,
"en": 69,
"i_11": 70,
"i_55 ": 71,
"e_55 ": 72,
"uŋ": 73,
"in": 74,
"t͡sʰ": 75,
"sɨ": 76,
"i_31": 77,
"t͡ɕ": 78,
"ke_55 ": 79,
"o_55": 80,
"kʰ": 81,
"an": 82,
"k_2": 83,
"t_2 ": 84,
"u_55": 85,
"aŋ": 86,
"t͡ɕʰ": 87,
"a_55": 88,
"_5 ": 89,
"a_24": 90,
"un": 91,
"u_55 ": 92,
"o_11": 93,
"u_24 ": 94,
"u_24": 95,
"am": 96,
"i_11 ": 97,
"pʰ": 98,
"on": 99,
"e_55": 100,
"o_55 ": 101,
"o_55i_55 ": 102,
"u_31": 103,
"i_11en": 104,
"a_31": 105,
"i_2": 106,
"it_2": 107,
"k_2 ": 108,
"i_5": 109,
"et_2 ": 110,
"i_24 ": 111,
"a_24 ": 112,
"o_31 ": 113,
"ŋin": 114,
"t͡sɨ": 115,
"i_55en": 116,
"e_31": 117,
"uŋ_24": 118,
"e_31 ": 119,
"t_5": 120,
"a_11": 121,
"u_31 ": 122,
"e_24": 123,
"o_31": 124,
"t_5 ": 125,
"tʰuŋ": 126,
"o_24": 127,
"ap": 128,
"he_55 ": 129,
"u_11": 130,
"i_24u_24 ": 131,
"i_31 ": 132,
"lo_11": 133,
"i_11en_11 ": 134,
"a_55 ": 135,
"oŋ_24": 136,
"it_2 ": 137,
"et_2": 138,
"t͡ɕʰi_55": 139,
"i_24en": 140,
"a_55i_55": 141,
"k_5": 142,
"im": 143,
"̩_11": 144,
"oŋ_55 ": 145,
"oŋ_31": 146,
"oŋ_31 ": 147,
"e_11": 148,
"k_5 ": 149,
"in_24": 150,
"m̩_11": 151,
"sɨp": 152,
"u_11 ": 153,
"vo_55i_55 ": 154,
"to_55 ": 155,
"han": 156,
"oŋ_24 ": 157,
"ŋin_11 ": 158,
"ti_55 ": 159,
"i_55en_55": 160,
"ki_24": 161,
"lo_11i_11 ": 162,
"i_11en_11": 163,
"ak_2 ": 164,
"uŋ_24 ": 165,
"uk_2": 166,
"to_31 ": 167,
"ke_55": 168,
"oŋ_11 ": 169,
"i_55en_55 ": 170,
"ŋi_5": 171,
"tʰo_11": 172,
"oŋ_55": 173,
"t͡so_55": 174,
"ak_2": 175,
"i_31a_31": 176,
"tʰa_55i_55": 177,
"i_55uŋ": 178,
"sɨn": 179,
"ŋ̩": 180,
"o_24 ": 181,
"t͡sʰɨ": 182,
"a_31 ": 183,
"aŋ_24": 184,
"a_55i_55 ": 185,
"sɨ_55 ": 186,
"en_31 ": 187,
"ok_5": 188,
"ko_55 ": 189,
"sɨp_5": 190,
"ŋin_11": 191,
"ki_31": 192,
"ŋi_55": 193,
"o_55i_55": 194,
"sɨ_55": 195,
"tʰe_11": 196,
"ki_11 ": 197,
"in_55": 198,
"u_55i_55 ": 199,
"t͡sʰu": 200,
"pun": 201,
"am_24": 202,
"in_11": 203,
"han_11": 204,
"ka_24 ": 205,
"an_24 ": 206,
"ku": 207,
"i_24en_24": 208,
"aŋ_11": 209,
"at_2": 210,
"oŋ_11": 211,
"o_11 ": 212,
"i_24en_24 ": 213,
"kʰi_11": 214,
"ŋi_11en_11 ": 215,
"e_24u_24 ": 216,
"i_31oŋ_31": 217,
"tet_2 ": 218,
"e_31u_31": 219,
"tʰo_11i_11": 220,
"ki_55": 221,
"ku_2": 222,
"an_24": 223,
"e_24u_24": 224,
"ŋa_11": 225,
"ka_24": 226,
"kin": 227,
"t͡sɨn": 228,
"aŋ_11 ": 229,
"mo_11": 230,
"et_5 ": 231,
"uk_2 ": 232,
"ip": 233,
"ap_2": 234,
"im_24": 235,
"on_24": 236,
"su_31": 237,
"kuŋ_24": 238,
"ok_2": 239,
"e_55u_55": 240,
"tʰuŋ_11 ": 241,
"tʰi_55": 242,
"on_55": 243,
"un_11": 244,
"hi_55 ": 245,
"aŋ_24 ": 246,
"an_55 ": 247,
"t͡suŋ": 248,
"in_55 ": 249,
"ku_24": 250,
"ŋi_11": 251,
"ok_2 ": 252,
"ok_5 ": 253,
"in_11 ": 254,
"e_55u_55 ": 255
},
"merges": [
[
"_",
"5"
],
[
"_5",
"5"
],
[
"_",
"2"
],
[
"_2",
"4"
],
[
"1",
"1"
],
[
"_",
"11"
],
[
"3",
"1"
],
[
"_",
"31"
],
[
"_55",
" "
],
[
"t",
"͡"
],
[
"_24",
" "
],
[
"t͡",
"s"
],
[
"_11",
" "
],
[
"i",
"_55"
],
[
"_31",
" "
],
[
"t",
"ʰ"
],
[
"t",
"_2"
],
[
"o",
"ŋ"
],
[
"i",
"_24"
],
[
"e",
"n"
],
[
"i",
"_11"
],
[
"i",
"_55 "
],
[
"e",
"_55 "
],
[
"u",
"ŋ"
],
[
"i",
"n"
],
[
"t͡s",
"ʰ"
],
[
"s",
"ɨ"
],
[
"i",
"_31"
],
[
"t͡",
"ɕ"
],
[
"k",
"e_55 "
],
[
"o",
"_55"
],
[
"k",
"ʰ"
],
[
"a",
"n"
],
[
"k",
"_2"
],
[
"t_2",
" "
],
[
"u",
"_55"
],
[
"a",
"ŋ"
],
[
"t͡ɕ",
"ʰ"
],
[
"a",
"_55"
],
[
"_5",
" "
],
[
"a",
"_24"
],
[
"u",
"n"
],
[
"u",
"_55 "
],
[
"o",
"_11"
],
[
"u",
"_24 "
],
[
"u",
"_24"
],
[
"a",
"m"
],
[
"i",
"_11 "
],
[
"p",
"ʰ"
],
[
"o",
"n"
],
[
"e",
"_55"
],
[
"o",
"_55 "
],
[
"o_55",
"i_55 "
],
[
"u",
"_31"
],
[
"i_11",
"en"
],
[
"a",
"_31"
],
[
"i",
"_2"
],
[
"i",
"t_2"
],
[
"k_2",
" "
],
[
"i",
"_5"
],
[
"e",
"t_2 "
],
[
"i",
"_24 "
],
[
"a",
"_24 "
],
[
"o",
"_31 "
],
[
"ŋ",
"in"
],
[
"t͡s",
"ɨ"
],
[
"i_55",
"en"
],
[
"e",
"_31"
],
[
"uŋ",
"_24"
],
[
"e",
"_31 "
],
[
"t",
"_5"
],
[
"a",
"_11"
],
[
"u",
"_31 "
],
[
"e",
"_24"
],
[
"o",
"_31"
],
[
"t",
"_5 "
],
[
"tʰ",
"uŋ"
],
[
"o",
"_24"
],
[
"a",
"p"
],
[
"h",
"e_55 "
],
[
"u",
"_11"
],
[
"i_24",
"u_24 "
],
[
"i",
"_31 "
],
[
"l",
"o_11"
],
[
"i_11en",
"_11 "
],
[
"a",
"_55 "
],
[
"oŋ",
"_24"
],
[
"i",
"t_2 "
],
[
"e",
"t_2"
],
[
"t͡ɕʰ",
"i_55"
],
[
"i_24",
"en"
],
[
"a_55",
"i_55"
],
[
"k",
"_5"
],
[
"i",
"m"
],
[
"̩",
"_11"
],
[
"oŋ",
"_55 "
],
[
"oŋ",
"_31"
],
[
"oŋ",
"_31 "
],
[
"e",
"_11"
],
[
"k",
"_5 "
],
[
"in",
"_24"
],
[
"m",
"̩_11"
],
[
"sɨ",
"p"
],
[
"u",
"_11 "
],
[
"v",
"o_55i_55 "
],
[
"t",
"o_55 "
],
[
"h",
"an"
],
[
"oŋ",
"_24 "
],
[
"ŋin",
"_11 "
],
[
"t",
"i_55 "
],
[
"i_55en",
"_55"
],
[
"k",
"i_24"
],
[
"lo_11",
"i_11 "
],
[
"i_11en",
"_11"
],
[
"a",
"k_2 "
],
[
"uŋ",
"_24 "
],
[
"u",
"k_2"
],
[
"t",
"o_31 "
],
[
"k",
"e_55"
],
[
"oŋ",
"_11 "
],
[
"i_55en",
"_55 "
],
[
"ŋ",
"i_5"
],
[
"tʰ",
"o_11"
],
[
"oŋ",
"_55"
],
[
"t͡s",
"o_55"
],
[
"a",
"k_2"
],
[
"i_31",
"a_31"
],
[
"tʰ",
"a_55i_55"
],
[
"i_55",
"uŋ"
],
[
"sɨ",
"n"
],
[
"ŋ",
"̩"
],
[
"o",
"_24 "
],
[
"t͡sʰ",
"ɨ"
],
[
"a",
"_31 "
],
[
"aŋ",
"_24"
],
[
"a_55",
"i_55 "
],
[
"sɨ",
"_55 "
],
[
"en",
"_31 "
],
[
"o",
"k_5"
],
[
"k",
"o_55 "
],
[
"sɨp",
"_5"
],
[
"ŋin",
"_11"
],
[
"k",
"i_31"
],
[
"ŋ",
"i_55"
],
[
"o_55",
"i_55"
],
[
"sɨ",
"_55"
],
[
"tʰ",
"e_11"
],
[
"k",
"i_11 "
],
[
"in",
"_55"
],
[
"u_55",
"i_55 "
],
[
"t͡sʰ",
"u"
],
[
"p",
"un"
],
[
"am",
"_24"
],
[
"in",
"_11"
],
[
"han",
"_11"
],
[
"k",
"a_24 "
],
[
"an",
"_24 "
],
[
"k",
"u"
],
[
"i_24en",
"_24"
],
[
"aŋ",
"_11"
],
[
"a",
"t_2"
],
[
"oŋ",
"_11"
],
[
"o",
"_11 "
],
[
"i_24en",
"_24 "
],
[
"kʰ",
"i_11"
],
[
"ŋ",
"i_11en_11 "
],
[
"e_24",
"u_24 "
],
[
"i_31",
"oŋ_31"
],
[
"t",
"et_2 "
],
[
"e_31",
"u_31"
],
[
"tʰo_11",
"i_11"
],
[
"k",
"i_55"
],
[
"ku",
"_2"
],
[
"an",
"_24"
],
[
"e_24",
"u_24"
],
[
"ŋ",
"a_11"
],
[
"k",
"a_24"
],
[
"k",
"in"
],
[
"t͡sɨ",
"n"
],
[
"aŋ",
"_11 "
],
[
"m",
"o_11"
],
[
"e",
"t_5 "
],
[
"u",
"k_2 "
],
[
"i",
"p"
],
[
"ap",
"_2"
],
[
"im",
"_24"
],
[
"on",
"_24"
],
[
"s",
"u_31"
],
[
"k",
"uŋ_24"
],
[
"o",
"k_2"
],
[
"e_55",
"u_55"
],
[
"tʰuŋ",
"_11 "
],
[
"tʰ",
"i_55"
],
[
"on",
"_55"
],
[
"un",
"_11"
],
[
"h",
"i_55 "
],
[
"aŋ",
"_24 "
],
[
"an",
"_55 "
],
[
"t͡s",
"uŋ"
],
[
"in",
"_55 "
],
[
"k",
"u_24"
],
[
"ŋ",
"i_11"
],
[
"o",
"k_2 "
],
[
"o",
"k_5 "
],
[
"in",
"_11 "
],
[
"e_55",
"u_55 "
]
]
}
}