Upload tokenizer.json
8a9a2f7 | { |
| "char_to_id": { |
| "[PAD]": 0, |
| "[UNK]": 1, |
| "[BOS]": 2, |
| "[EOS]": 3, |
| "[MASK]": 4, |
| " ": 5, |
| "ا": 6, |
| "ل": 7, |
| "ي": 8, |
| "م": 9, |
| "و": 10, |
| "ن": 11, |
| "ه": 12, |
| "ب": 13, |
| "ر": 14, |
| "ع": 15, |
| "ف": 16, |
| "أ": 17, |
| "ق": 18, |
| "ت": 19, |
| "د": 20, |
| "ك": 21, |
| "ح": 22, |
| "ة": 23, |
| "س": 24, |
| "ج": 25, |
| "إ": 26, |
| "ص": 27, |
| "ذ": 28, |
| "ى": 29, |
| "خ": 30, |
| "ش": 31, |
| "ث": 32, |
| "ض": 33, |
| "ط": 34, |
| "ز": 35, |
| ":": 36, |
| "غ": 37, |
| "ء": 38, |
| "ئ": 39, |
| "ظ": 40, |
| "؛": 41, |
| "آ": 42, |
| "-": 43, |
| "ؤ": 44, |
| ",": 45, |
| "ٰ": 46, |
| "ۚ": 47, |
| "ۖ": 48, |
| "ۗ": 49 |
| }, |
| "vocab_size": 50, |
| "special_tokens": { |
| "PAD": 0, |
| "UNK": 1, |
| "BOS": 2, |
| "EOS": 3, |
| "MASK": 4, |
| "SPACE": 5 |
| }, |
| "diacritic_classes": [ |
| "NO_DIACRITIC", |
| "FATHA", |
| "FATHATAN", |
| "DAMMA", |
| "DAMMATAN", |
| "KASRA", |
| "KASRATAN", |
| "SUKUN", |
| "SHADDA", |
| "SHADDA_FATHA", |
| "SHADDA_FATHATAN", |
| "SHADDA_DAMMA", |
| "SHADDA_DAMMATAN", |
| "SHADDA_KASRA", |
| "SHADDA_KASRATAN" |
| ], |
| "num_labels": 15 |
| } |