mindi-backup / final_model /tokenizer_config.json
Mindigenous
Sync latest workspace state: data/scripts updates and archive cleanup
5ae3e12
{
"auto_map": {
"AutoTokenizer": [
"tokenization_mindi.MindiTokenizer",
null
]
},
"backend": "tokenizers",
"bos_token": "<BOS>",
"eos_token": "<EOS>",
"is_local": true,
"model_max_length": 2048,
"pad_token": "<PAD>",
"padding_side": "right",
"tokenizer_class": "MindiTokenizer",
"truncation_side": "right",
"unk_token": "<UNK>",
"vocab": {
"(": 11,
")": 12,
"+": 13,
",": 14,
".": 15,
"0": 16,
"4": 17,
"5": 18,
":": 19,
";": 20,
"<": 21,
"<BOS>": 2,
"<CODE>": 8,
"<DEDENT>": 6,
"<EOS>": 3,
"<INDENT>": 5,
"<JAVASCRIPT>": 10,
"<NL>": 4,
"<PAD>": 0,
"<PROMPT>": 7,
"<PYTHON>": 9,
"<UNK>": 1,
"=": 22,
">": 23,
"A": 24,
"AS": 123,
"ASCR": 162,
"AV": 124,
"C": 25,
"CO": 88,
"CR": 125,
"Cre": 126,
"D": 26,
"DE": 69,
"DE>": 99,
"DENT>": 77,
"E": 27,
"F": 28,
"H": 29,
"HO": 127,
"I": 30,
"IN": 89,
"IPT>": 128,
"J": 31,
"JAV": 130,
"Ja": 129,
"L": 32,
"L>": 72,
"M": 33,
"MPT>": 90,
"N": 34,
"N>": 131,
"NL>": 73,
"NT>": 75,
"O": 35,
"OMPT>": 91,
"P": 36,
"PT>": 79,
"Py": 132,
"R": 37,
"ROMPT>": 92,
"S": 38,
"Sc": 133,
"T": 39,
"T>": 70,
"THO": 134,
"V": 40,
"W": 41,
"Y": 42,
"YTHO": 135,
"_": 43,
"_(": 80,
"_)": 81,
"_+": 106,
"_,": 136,
"_0": 107,
"_4": 137,
"_5": 138,
"_:": 139,
"_;": 93,
"_<": 68,
"_<CO": 97,
"_<CODE>": 104,
"_<DE": 96,
"_<DEDENT>": 103,
"_<IN": 98,
"_<INDENT>": 105,
"_<JAV": 152,
"_<JAVASCR": 167,
"_<JAVASCRIPT>": 170,
"_<NL>": 74,
"_<P": 83,
"_<PROMPT>": 101,
"_<PYTHO": 153,
"_<PYTHON>": 168,
"_Cre": 143,
"_Create": 164,
"_Ja": 144,
"_JavaSc": 165,
"_JavaScript": 169,
"_Py": 145,
"_Python": 166,
"_a": 71,
"_add": 117,
"_b": 94,
"_f": 84,
"_for": 154,
"_funct": 119,
"_function": 121,
"_i": 78,
"_l": 85,
"_lo": 102,
"_log": 158,
"_loop": 159,
"_p": 140,
"_print": 163,
"_re": 108,
"_retu": 120,
"_return": 122,
"_t": 76,
"_th": 118,
"_that": 161,
"_to": 100,
"_{": 141,
"_}": 142,
"a": 44,
"at": 95,
"ate": 157,
"b": 45,
"c": 46,
"ct": 109,
"d": 47,
"dd": 110,
"e": 48,
"f": 49,
"g": 50,
"h": 51,
"hon": 146,
"i": 52,
"ion": 111,
"l": 53,
"m": 54,
"n": 55,
"nct": 112,
"nt": 147,
"o": 56,
"on": 82,
"op": 148,
"or": 149,
"p": 57,
"pt": 150,
"r": 58,
"re": 86,
"ri": 87,
"rint": 155,
"ript": 156,
"rn": 113,
"s": 59,
"t": 60,
"thon": 151,
"tu": 114,
"u": 61,
"unct": 115,
"v": 62,
"va": 116,
"vaSc": 160,
"w": 63,
"x": 64,
"y": 65,
"{": 66,
"}": 67
}
}