sqrk's picture
Upload tokenizer
c01901f verified
raw
history blame
1.59 kB
{
"ara": {
"\t": 0,
"!": 2,
"'": 3,
",": 4,
"-": 5,
".": 6,
"0": 7,
"3": 8,
"4": 9,
"=": 10,
">": 11,
"?": 12,
"A": 13,
"B": 14,
"C": 15,
"D": 16,
"E": 17,
"F": 18,
"G": 19,
"H": 20,
"I": 21,
"J": 22,
"K": 23,
"L": 24,
"M": 25,
"N": 26,
"O": 27,
"P": 28,
"R": 29,
"S": 30,
"T": 31,
"U": 32,
"V": 33,
"W": 34,
"X": 35,
"Y": 36,
"Z": 37,
"[": 38,
"[PAD]": 115,
"[UNK]": 114,
"]": 39,
"a": 40,
"b": 41,
"c": 42,
"d": 43,
"e": 44,
"f": 45,
"g": 46,
"h": 47,
"i": 48,
"j": 49,
"k": 50,
"l": 51,
"m": 52,
"n": 53,
"o": 54,
"p": 55,
"q": 56,
"r": 57,
"s": 58,
"t": 59,
"u": 60,
"v": 61,
"w": 62,
"x": 63,
"y": 64,
"z": 65,
"|": 1,
"،": 66,
"؛": 67,
"؟": 68,
"ء": 69,
"آ": 70,
"أ": 71,
"ؤ": 72,
"إ": 73,
"ئ": 74,
"ا": 75,
"ب": 76,
"ة": 77,
"ت": 78,
"ث": 79,
"ج": 80,
"ح": 81,
"خ": 82,
"د": 83,
"ذ": 84,
"ر": 85,
"ز": 86,
"س": 87,
"ش": 88,
"ص": 89,
"ض": 90,
"ط": 91,
"ظ": 92,
"ع": 93,
"غ": 94,
"ف": 95,
"ق": 96,
"ك": 97,
"ل": 98,
"م": 99,
"ن": 100,
"ه": 101,
"و": 102,
"ى": 103,
"ي": 104,
"ً": 105,
"ٍ": 106,
"َ": 107,
"ُ": 108,
"ِ": 109,
"ّ": 110,
"ی": 111,
"–": 112,
"’": 113
}
}