{ "version": "1.0", "truncation": null, "padding": { "strategy": "BatchLongest", "direction": "Right", "pad_to_multiple_of": null, "pad_id": 0, "pad_type_id": 0, "pad_token": "" }, "added_tokens": [ { "id": 0, "content": "", "single_word": false, "lstrip": false, "rstrip": false, "normalized": false, "special": true }, { "id": 1, "content": "", "single_word": false, "lstrip": false, "rstrip": false, "normalized": false, "special": true } ], "normalizer": null, "pre_tokenizer": null, "post_processor": null, "decoder": null, "model": { "type": "BPE", "dropout": null, "unk_token": null, "continuing_subword_prefix": null, "end_of_word_suffix": null, "fuse_unk": false, "byte_fallback": false, "ignore_merges": false, "vocab": { "": 0, "": 1, " ": 2, "1": 3, "2": 4, "3": 5, "4": 6, "5": 7, "_": 8, "a": 9, "b": 10, "d": 11, "e": 12, "f": 13, "h": 14, "i": 15, "j": 16, "k": 17, "l": 18, "m": 19, "n": 20, "o": 21, "p": 22, "s": 23, "t": 24, "u": 25, "v": 26, "w": 27, "z": 28, "æ": 29, "ð": 30, "ŋ": 31, "ɑ": 32, "ɔ": 33, "ɕ": 34, "ə": 35, "ɚ": 36, "ɛ": 37, "ɡ": 38, "ɨ": 39, "ɪ": 40, "ɹ": 41, "ʃ": 42, "ʊ": 43, "ʌ": 44, "ʒ": 45, "ʰ": 46, "̩": 47, "͡": 48, "θ": 49, "_5": 50, "_55": 51, "_2": 52, "_24": 53, "11": 54, "_11": 55, "31": 56, "_31": 57, "_55 ": 58, "t͡": 59, "_24 ": 60, "t͡s": 61, "_11 ": 62, "i_55": 63, "_31 ": 64, "tʰ": 65, "t_2": 66, "oŋ": 67, "i_24": 68, "en": 69, "i_11": 70, "i_55 ": 71, "e_55 ": 72, "uŋ": 73, "in": 74, "t͡sʰ": 75, "sɨ": 76, "i_31": 77, "t͡ɕ": 78, "ke_55 ": 79, "o_55": 80, "kʰ": 81, "an": 82, "k_2": 83, "t_2 ": 84, "u_55": 85, "aŋ": 86, "t͡ɕʰ": 87, "a_55": 88, "_5 ": 89, "a_24": 90, "un": 91, "u_55 ": 92, "o_11": 93, "u_24 ": 94, "u_24": 95, "am": 96, "i_11 ": 97, "pʰ": 98, "on": 99, "e_55": 100, "o_55 ": 101, "o_55i_55 ": 102, "u_31": 103, "i_11en": 104, "a_31": 105, "i_2": 106, "it_2": 107, "k_2 ": 108, "i_5": 109, "et_2 ": 110, "i_24 ": 111, "a_24 ": 112, "o_31 ": 113, "ŋin": 114, "t͡sɨ": 115, "i_55en": 116, "e_31": 117, "uŋ_24": 118, "e_31 ": 119, "t_5": 120, "a_11": 121, "u_31 ": 122, "e_24": 123, "o_31": 124, "t_5 ": 125, "tʰuŋ": 126, "o_24": 127, "ap": 128, "he_55 ": 129, "u_11": 130, "i_24u_24 ": 131, "i_31 ": 132, "lo_11": 133, "i_11en_11 ": 134, "a_55 ": 135, "oŋ_24": 136, "it_2 ": 137, "et_2": 138, "t͡ɕʰi_55": 139, "i_24en": 140, "a_55i_55": 141, "k_5": 142, "im": 143, "̩_11": 144, "oŋ_55 ": 145, "oŋ_31": 146, "oŋ_31 ": 147, "e_11": 148, "k_5 ": 149, "in_24": 150, "m̩_11": 151, "sɨp": 152, "u_11 ": 153, "vo_55i_55 ": 154, "to_55 ": 155, "han": 156, "oŋ_24 ": 157, "ŋin_11 ": 158, "ti_55 ": 159, "i_55en_55": 160, "ki_24": 161, "lo_11i_11 ": 162, "i_11en_11": 163, "ak_2 ": 164, "uŋ_24 ": 165, "uk_2": 166, "to_31 ": 167, "ke_55": 168, "oŋ_11 ": 169, "i_55en_55 ": 170, "ŋi_5": 171, "tʰo_11": 172, "oŋ_55": 173, "t͡so_55": 174, "ak_2": 175, "i_31a_31": 176, "tʰa_55i_55": 177, "i_55uŋ": 178, "sɨn": 179, "ŋ̩": 180, "o_24 ": 181, "t͡sʰɨ": 182, "a_31 ": 183, "aŋ_24": 184, "a_55i_55 ": 185, "sɨ_55 ": 186, "en_31 ": 187, "ok_5": 188, "ko_55 ": 189, "sɨp_5": 190, "ŋin_11": 191, "ki_31": 192, "ŋi_55": 193, "o_55i_55": 194, "sɨ_55": 195, "tʰe_11": 196, "ki_11 ": 197, "in_55": 198, "u_55i_55 ": 199, "t͡sʰu": 200, "pun": 201, "am_24": 202, "in_11": 203, "han_11": 204, "ka_24 ": 205, "an_24 ": 206, "ku": 207, "i_24en_24": 208, "aŋ_11": 209, "at_2": 210, "oŋ_11": 211, "o_11 ": 212, "i_24en_24 ": 213, "kʰi_11": 214, "ŋi_11en_11 ": 215, "e_24u_24 ": 216, "i_31oŋ_31": 217, "tet_2 ": 218, "e_31u_31": 219, "tʰo_11i_11": 220, "ki_55": 221, "ku_2": 222, "an_24": 223, "e_24u_24": 224, "ŋa_11": 225, "ka_24": 226, "kin": 227, "t͡sɨn": 228, "aŋ_11 ": 229, "mo_11": 230, "et_5 ": 231, "uk_2 ": 232, "ip": 233, "ap_2": 234, "im_24": 235, "on_24": 236, "su_31": 237, "kuŋ_24": 238, "ok_2": 239, "e_55u_55": 240, "tʰuŋ_11 ": 241, "tʰi_55": 242, "on_55": 243, "un_11": 244, "hi_55 ": 245, "aŋ_24 ": 246, "an_55 ": 247, "t͡suŋ": 248, "in_55 ": 249, "ku_24": 250, "ŋi_11": 251, "ok_2 ": 252, "ok_5 ": 253, "in_11 ": 254, "e_55u_55 ": 255 }, "merges": [ [ "_", "5" ], [ "_5", "5" ], [ "_", "2" ], [ "_2", "4" ], [ "1", "1" ], [ "_", "11" ], [ "3", "1" ], [ "_", "31" ], [ "_55", " " ], [ "t", "͡" ], [ "_24", " " ], [ "t͡", "s" ], [ "_11", " " ], [ "i", "_55" ], [ "_31", " " ], [ "t", "ʰ" ], [ "t", "_2" ], [ "o", "ŋ" ], [ "i", "_24" ], [ "e", "n" ], [ "i", "_11" ], [ "i", "_55 " ], [ "e", "_55 " ], [ "u", "ŋ" ], [ "i", "n" ], [ "t͡s", "ʰ" ], [ "s", "ɨ" ], [ "i", "_31" ], [ "t͡", "ɕ" ], [ "k", "e_55 " ], [ "o", "_55" ], [ "k", "ʰ" ], [ "a", "n" ], [ "k", "_2" ], [ "t_2", " " ], [ "u", "_55" ], [ "a", "ŋ" ], [ "t͡ɕ", "ʰ" ], [ "a", "_55" ], [ "_5", " " ], [ "a", "_24" ], [ "u", "n" ], [ "u", "_55 " ], [ "o", "_11" ], [ "u", "_24 " ], [ "u", "_24" ], [ "a", "m" ], [ "i", "_11 " ], [ "p", "ʰ" ], [ "o", "n" ], [ "e", "_55" ], [ "o", "_55 " ], [ "o_55", "i_55 " ], [ "u", "_31" ], [ "i_11", "en" ], [ "a", "_31" ], [ "i", "_2" ], [ "i", "t_2" ], [ "k_2", " " ], [ "i", "_5" ], [ "e", "t_2 " ], [ "i", "_24 " ], [ "a", "_24 " ], [ "o", "_31 " ], [ "ŋ", "in" ], [ "t͡s", "ɨ" ], [ "i_55", "en" ], [ "e", "_31" ], [ "uŋ", "_24" ], [ "e", "_31 " ], [ "t", "_5" ], [ "a", "_11" ], [ "u", "_31 " ], [ "e", "_24" ], [ "o", "_31" ], [ "t", "_5 " ], [ "tʰ", "uŋ" ], [ "o", "_24" ], [ "a", "p" ], [ "h", "e_55 " ], [ "u", "_11" ], [ "i_24", "u_24 " ], [ "i", "_31 " ], [ "l", "o_11" ], [ "i_11en", "_11 " ], [ "a", "_55 " ], [ "oŋ", "_24" ], [ "i", "t_2 " ], [ "e", "t_2" ], [ "t͡ɕʰ", "i_55" ], [ "i_24", "en" ], [ "a_55", "i_55" ], [ "k", "_5" ], [ "i", "m" ], [ "̩", "_11" ], [ "oŋ", "_55 " ], [ "oŋ", "_31" ], [ "oŋ", "_31 " ], [ "e", "_11" ], [ "k", "_5 " ], [ "in", "_24" ], [ "m", "̩_11" ], [ "sɨ", "p" ], [ "u", "_11 " ], [ "v", "o_55i_55 " ], [ "t", "o_55 " ], [ "h", "an" ], [ "oŋ", "_24 " ], [ "ŋin", "_11 " ], [ "t", "i_55 " ], [ "i_55en", "_55" ], [ "k", "i_24" ], [ "lo_11", "i_11 " ], [ "i_11en", "_11" ], [ "a", "k_2 " ], [ "uŋ", "_24 " ], [ "u", "k_2" ], [ "t", "o_31 " ], [ "k", "e_55" ], [ "oŋ", "_11 " ], [ "i_55en", "_55 " ], [ "ŋ", "i_5" ], [ "tʰ", "o_11" ], [ "oŋ", "_55" ], [ "t͡s", "o_55" ], [ "a", "k_2" ], [ "i_31", "a_31" ], [ "tʰ", "a_55i_55" ], [ "i_55", "uŋ" ], [ "sɨ", "n" ], [ "ŋ", "̩" ], [ "o", "_24 " ], [ "t͡sʰ", "ɨ" ], [ "a", "_31 " ], [ "aŋ", "_24" ], [ "a_55", "i_55 " ], [ "sɨ", "_55 " ], [ "en", "_31 " ], [ "o", "k_5" ], [ "k", "o_55 " ], [ "sɨp", "_5" ], [ "ŋin", "_11" ], [ "k", "i_31" ], [ "ŋ", "i_55" ], [ "o_55", "i_55" ], [ "sɨ", "_55" ], [ "tʰ", "e_11" ], [ "k", "i_11 " ], [ "in", "_55" ], [ "u_55", "i_55 " ], [ "t͡sʰ", "u" ], [ "p", "un" ], [ "am", "_24" ], [ "in", "_11" ], [ "han", "_11" ], [ "k", "a_24 " ], [ "an", "_24 " ], [ "k", "u" ], [ "i_24en", "_24" ], [ "aŋ", "_11" ], [ "a", "t_2" ], [ "oŋ", "_11" ], [ "o", "_11 " ], [ "i_24en", "_24 " ], [ "kʰ", "i_11" ], [ "ŋ", "i_11en_11 " ], [ "e_24", "u_24 " ], [ "i_31", "oŋ_31" ], [ "t", "et_2 " ], [ "e_31", "u_31" ], [ "tʰo_11", "i_11" ], [ "k", "i_55" ], [ "ku", "_2" ], [ "an", "_24" ], [ "e_24", "u_24" ], [ "ŋ", "a_11" ], [ "k", "a_24" ], [ "k", "in" ], [ "t͡sɨ", "n" ], [ "aŋ", "_11 " ], [ "m", "o_11" ], [ "e", "t_5 " ], [ "u", "k_2 " ], [ "i", "p" ], [ "ap", "_2" ], [ "im", "_24" ], [ "on", "_24" ], [ "s", "u_31" ], [ "k", "uŋ_24" ], [ "o", "k_2" ], [ "e_55", "u_55" ], [ "tʰuŋ", "_11 " ], [ "tʰ", "i_55" ], [ "on", "_55" ], [ "un", "_11" ], [ "h", "i_55 " ], [ "aŋ", "_24 " ], [ "an", "_55 " ], [ "t͡s", "uŋ" ], [ "in", "_55 " ], [ "k", "u_24" ], [ "ŋ", "i_11" ], [ "o", "k_2 " ], [ "o", "k_5 " ], [ "in", "_11 " ], [ "e_55", "u_55 " ] ] } }