musiclang-bpe / tokenizer.json
floriangardin's picture
Upload tokenizer
53f85bf verified
{
"version": "1.0",
"truncation": null,
"padding": null,
"added_tokens": [
{
"id": 0,
"content": "<unk>",
"single_word": false,
"lstrip": false,
"rstrip": false,
"normalized": false,
"special": true
}
],
"normalizer": {
"type": "NFKC"
},
"pre_tokenizer": {
"type": "Metaspace",
"replacement": "▁",
"add_prefix_space": true
},
"post_processor": null,
"decoder": {
"type": "Metaspace",
"replacement": "▁",
"add_prefix_space": true
},
"model": {
"type": "BPE",
"dropout": null,
"unk_token": "<unk>",
"continuing_subword_prefix": null,
"end_of_word_suffix": null,
"fuse_unk": false,
"byte_fallback": false,
"vocab": {
"<unk>": 0,
"'": 1,
"(": 2,
")": 3,
"*": 4,
"+": 5,
".": 6,
"1": 7,
"6": 8,
"@": 9,
"C": 10,
"D": 11,
"F": 12,
"S": 13,
"U": 14,
"Z": 15,
"[": 16,
"\\": 17,
"^": 18,
"_": 19,
"`": 20,
"a": 21,
"k": 22,
"l": 23,
"p": 24,
"r": 25,
"~": 26,
"": 27,
"ˆ": 28,
"Š": 29,
"Đ": 30,
"Δ‘": 31,
"▁": 32,
"'.": 33,
"(+": 34,
"(+'.": 35,
"(+'.*": 36,
")(+'.*": 37,
"6(+'.*": 38,
"D(+'.*": 39,
")(+'.*6(+'.*": 40,
"D(+'.*C": 41,
")(+'.*6(+'.*D(+'.*C": 42,
"*)(+'.*6(+'.*D(+'.*C": 43,
"▁1": 44,
"▁@": 45,
"▁F": 46,
"▁S": 47,
"▁U": 48,
"▁Z": 49,
"▁[": 50,
"▁^": 51,
"▁`": 52,
"▁k": 53,
"▁p": 54,
"▁~": 55,
"β–Βˆ": 56,
"▁*)(+'.*6(+'.*D(+'.*C": 57,
"(+'.*)(+'.*6(+'.*D(+'.*C": 58,
"▁*)(+'.*6(+'.*D(+'.*C(+'.*)(+'.*6(+'.*D(+'.*C": 59,
"▁*)(+'.*6(+'.*D(+'.*C(+'.*)(+'.*6(+'.*D(+'.*C(+'.": 60,
"▁\\": 61,
"▁_": 62,
"▁a": 63,
"▁l": 64,
"▁r": 65,
"▁": 66,
"β–ΒŠ": 67
},
"merges": [
"' .",
"( +",
"(+ '.",
"(+'. *",
") (+'.*",
"6 (+'.*",
"D (+'.*",
")(+'.* 6(+'.*",
"D(+'.* C",
")(+'.*6(+'.* D(+'.*C",
"* )(+'.*6(+'.*D(+'.*C",
"▁ 1",
"▁ @",
"▁ F",
"▁ S",
"▁ U",
"▁ Z",
"▁ [",
"▁ ^",
"▁ `",
"▁ k",
"▁ p",
"▁ ~",
"▁ ˆ",
"▁ *)(+'.*6(+'.*D(+'.*C",
"(+'.* )(+'.*6(+'.*D(+'.*C",
"▁*)(+'.*6(+'.*D(+'.*C (+'.*)(+'.*6(+'.*D(+'.*C",
"▁*)(+'.*6(+'.*D(+'.*C(+'.*)(+'.*6(+'.*D(+'.*C (+'.",
"▁ \\",
"▁ _",
"▁ a",
"▁ l",
"▁ r",
"▁ ",
"▁ Š"
]
}
}