mms-MGB3 / vocab.json
herwoww's picture
Upload tokenizer
f1c722b verified
raw
history blame
1.23 kB
{
"ara": {
"#": 1,
"-": 2,
".": 3,
"A": 4,
"C": 5,
"D": 6,
"E": 7,
"F": 8,
"G": 9,
"H": 10,
"I": 11,
"L": 12,
"M": 13,
"N": 14,
"O": 15,
"P": 16,
"R": 17,
"S": 18,
"T": 19,
"U": 20,
"Y": 21,
"[PAD]": 89,
"[UNK]": 88,
"_": 22,
"a": 23,
"b": 24,
"c": 25,
"d": 26,
"e": 27,
"f": 28,
"g": 29,
"h": 30,
"i": 31,
"j": 32,
"k": 33,
"l": 34,
"m": 35,
"n": 36,
"o": 37,
"p": 38,
"r": 39,
"s": 40,
"t": 41,
"u": 42,
"v": 43,
"w": 44,
"y": 45,
"z": 46,
"|": 0,
"،": 47,
"؟": 48,
"ء": 49,
"آ": 50,
"أ": 51,
"ؤ": 52,
"إ": 53,
"ئ": 54,
"ا": 55,
"ب": 56,
"ة": 57,
"ت": 58,
"ث": 59,
"ج": 60,
"ح": 61,
"خ": 62,
"د": 63,
"ذ": 64,
"ر": 65,
"ز": 66,
"س": 67,
"ش": 68,
"ص": 69,
"ض": 70,
"ط": 71,
"ظ": 72,
"ع": 73,
"غ": 74,
"ف": 75,
"ق": 76,
"ك": 77,
"ل": 78,
"م": 79,
"ن": 80,
"ه": 81,
"و": 82,
"ى": 83,
"ي": 84,
"ً": 85,
"ُ": 86,
"ّ": 87
}
}