unicosys-hypergraph / tokenizer.json
drzo's picture
chore: update unicosys hypergraph model from CI pipeline
e867105 verified
{
"vocab_size": 32000,
"max_length": 128,
"char_to_id": {
"p": 4,
"e": 5,
"t": 6,
"r": 7,
" ": 8,
"a": 9,
"n": 10,
"d": 11,
"w": 12,
"f": 13,
"u": 14,
"c": 15,
"i": 16,
"j": 17,
"q": 18,
"l": 19,
"y": 20,
"o": 21,
"b": 22,
"s": 23,
"k": 24,
"v": 25,
"m": 26,
"h": 27,
"g": 28,
"(": 29,
")": 30,
"\u00e9": 31,
"z": 32,
":": 33,
"4": 34,
"8": 35,
"3": 36,
"1": 37,
"0": 38,
"7": 39,
"6": 40,
"9": 41,
"2": 42,
"5": 43,
"x": 44,
",": 45,
"-": 46,
"/": 47,
"&": 48,
".": 49,
"+": 50,
"%": 51,
"#": 52,
"'": 53,
"@": 54,
"_": 55,
"*": 56,
"=": 57,
"<": 58,
">": 59,
"\n": 60
},
"next_id": 61
}