EngNada's picture
add tokenizer
88dc1f5
{"ک": 0, "ب": 1, "ٌ": 2, "—": 3, "َ": 4, "ج": 5, "ی": 6, "ز": 7, "ي": 8, "ظ": 9, "ّ": 10, "چ": 11, "ط": 12, "غ": 13, "t": 14, "ُ": 15, "ﺃ": 16, "ك": 17, "«": 18, "ٰ": 19, "»": 20, "خ": 21, "ه": 22, "ث": 23, "ة": 24, "آ": 25, "ض": 26, "ش": 27, "_": 28, "؛": 29, "g": 30, "ۚ": 31, "ِ": 32, "ا": 33, "ً": 34, "ﻻ": 35, "ء": 36, "ذ": 37, "د": 38, "☭": 39, "م": 40, "؟": 41, "ع": 43, "ـ": 44, "ٍ": 45, "ۖ": 46, "ل": 47, "إ": 48, "ى": 49, "ئ": 50, "e": 51, "ر": 52, "ص": 53, "ڨ": 54, "ھ": 55, "أ": 56, "ن": 57, "ق": 58, "ت": 59, "،": 60, "ح": 61, "ف": 62, "و": 63, "س": 64, "ؤ": 65, "ْ": 66, "|": 42, "[UNK]": 67, "[PAD]": 68}