Anujgr8's picture
Upload tokenizer
e8278e3 verified
raw
history blame
No virus
1.59 kB
{
"\t": 0,
"\n": 1,
"!": 3,
"\"": 4,
"%": 5,
"'": 6,
",": 7,
"-": 8,
".": 9,
"/": 10,
"0": 11,
"1": 12,
"2": 13,
"3": 14,
"4": 15,
"5": 16,
"6": 17,
"7": 18,
"8": 19,
"9": 20,
":": 21,
"B": 22,
"L": 23,
"T": 24,
"[PAD]": 126,
"[UNK]": 125,
"a": 25,
"b": 26,
"c": 27,
"d": 28,
"e": 29,
"f": 30,
"g": 31,
"i": 32,
"l": 33,
"m": 34,
"n": 35,
"o": 36,
"p": 37,
"r": 38,
"s": 39,
"t": 40,
"u": 41,
"v": 42,
"w": 43,
"z": 44,
"|": 2,
"“": 45,
"”": 46,
"।": 47,
"ঁ": 48,
"ং": 49,
"ঃ": 50,
"অ": 51,
"আ": 52,
"ই": 53,
"ঈ": 54,
"উ": 55,
"ঋ": 56,
"এ": 57,
"ঐ": 58,
"ও": 59,
"ঔ": 60,
"ক": 61,
"খ": 62,
"গ": 63,
"ঘ": 64,
"ঙ": 65,
"চ": 66,
"ছ": 67,
"জ": 68,
"ঝ": 69,
"ঞ": 70,
"ট": 71,
"ঠ": 72,
"ড": 73,
"ঢ": 74,
"ণ": 75,
"ত": 76,
"থ": 77,
"দ": 78,
"ধ": 79,
"ন": 80,
"প": 81,
"ফ": 82,
"ব": 83,
"ভ": 84,
"ম": 85,
"য": 86,
"র": 87,
"ল": 88,
"শ": 89,
"ষ": 90,
"স": 91,
"হ": 92,
"়": 93,
"া": 94,
"ি": 95,
"ী": 96,
"ু": 97,
"ূ": 98,
"ৃ": 99,
"ে": 100,
"ৈ": 101,
"ো": 102,
"ৌ": 103,
"্": 104,
"ৎ": 105,
"ড়": 106,
"য়": 107,
"০": 108,
"১": 109,
"২": 110,
"৩": 111,
"৪": 112,
"৫": 113,
"৬": 114,
"৭": 115,
"৮": 116,
"৯": 117,
"‌": 118,
"‍": 119,
"‘": 120,
"’": 121,
"“": 122,
"”": 123,
"…": 124
}