CWKSC's picture
Upload tokenizer
ec9489f verified
raw
history blame
418 Bytes
{
"[PAD]": 34,
"[UNK]": 33,
"|": 0,
"а": 1,
"б": 2,
"в": 3,
"г": 4,
"д": 5,
"е": 6,
"ж": 7,
"з": 8,
"и": 9,
"й": 10,
"к": 11,
"л": 12,
"м": 13,
"н": 14,
"о": 15,
"п": 16,
"р": 17,
"с": 18,
"т": 19,
"у": 20,
"х": 21,
"ц": 22,
"ч": 23,
"ш": 24,
"ы": 25,
"ь": 26,
"э": 27,
"ю": 28,
"я": 29,
"ё": 30,
"ү": 31,
"ө": 32
}