bouim commited on
Commit
41439f2
1 Parent(s): 7348260

add tokenizer

Browse files
Files changed (1) hide show
  1. vocab.json +1 -1
vocab.json CHANGED
@@ -1 +1 @@
1
- {"ق": 0, "ِ": 1, "إ": 2, "ي": 3, "ّ": 4, "،": 5, "ب": 6, "ة": 7, "خ": 8, "َ": 9, "ئ": 10, "ه": 11, "ش": 12, "ؤ": 13, "أ": 14, "ى": 15, "ل": 16, "ح": 17, "ن": 18, "ٌ": 19, "ض": 20, "س": 21, "ط": 22, "ث": 23, "ج": 25, "د": 26, "و": 27, "ع": 28, "ظ": 29, "ذ": 30, "ف": 31, "ُ": 32, "ر": 33, "غ": 34, "ص": 35, "ء": 36, "ز": 37, "ٍ": 38, "ً": 39, "ْ": 40, "م": 41, "ك": 42, "آ": 43, "ا": 44, "ت": 45, "|": 24, "[UNK]": 0, "[PAD]": 0}
 
1
+ {"ء": 0, "غ": 1, "ى": 2, "،": 3, "َ": 4, "ه": 5, "أ": 6, "ا": 7, "ض": 8, "د": 9, "ي": 10, "ر": 11, "ث": 12, "ُ": 13, "ذ": 14, "ب": 15, "خ": 16, "ة": 17, "ؤ": 18, "آ": 19, "ف": 20, "ٍ": 21, "ط": 22, "ع": 23, "ئ": 24, "ز": 25, "ن": 26, "م": 27, "إ": 28, "س": 29, "ً": 31, "ج": 32, "ح": 33, "ِ": 34, "ٌ": 35, "ش": 36, "ك": 37, "ّ": 38, "ت": 39, "ق": 40, "ْ": 41, "و": 42, "ص": 43, "ظ": 44, "ل": 45, "|": 30, "[UNK]": 0, "[PAD]": 0}