nadaAlnada commited on
Commit
5f1daa0
1 Parent(s): b04ffe9

add tokenizer

Browse files
Files changed (3) hide show
  1. special_tokens_map.json +1 -0
  2. tokenizer_config.json +1 -0
  3. vocab.json +1 -0
special_tokens_map.json ADDED
@@ -0,0 +1 @@
 
 
1
+ {"bos_token": "<s>", "eos_token": "</s>", "unk_token": "[UNK]", "pad_token": "[PAD]"}
tokenizer_config.json ADDED
@@ -0,0 +1 @@
 
 
1
+ {"unk_token": "[UNK]", "bos_token": "<s>", "eos_token": "</s>", "pad_token": "[PAD]", "do_lower_case": false, "word_delimiter_token": "|", "tokenizer_class": "Wav2Vec2CTCTokenizer"}
vocab.json ADDED
@@ -0,0 +1 @@
 
 
1
+ {"ا": 0, "ھ": 1, "َ": 2, "ٍ": 3, "_": 4, "ﺃ": 5, "،": 6, "ط": 7, "ً": 8, "ّ": 9, "غ": 10, "ء": 11, "س": 13, "ﻻ": 14, "ع": 15, "إ": 16, "ن": 17, "ُ": 18, "ب": 19, "ک": 20, "ز": 21, "ح": 22, "ڨ": 23, "“": 24, "»": 25, "ؤ": 26, "—": 27, "خ": 28, "ج": 29, "ت": 30, "ة": 31, "ۖ": 32, "؛": 33, "ص": 34, "ذ": 35, "ك": 36, "چ": 37, "آ": 38, "ی": 39, "ئ": 40, "ث": 41, "ق": 42, "ش": 43, "ـ": 44, "ٌ": 45, "”": 46, "ظ": 47, "ه": 48, "و": 49, "ٰ": 50, "ِ": 51, "م": 52, "ل": 53, "د": 54, "ض": 55, "«": 56, "ي": 57, "ى": 58, "؟": 59, "ر": 60, "☭": 61, "ۚ": 62, "ف": 63, "أ": 64, "ْ": 65, "|": 12, "[UNK]": 66, "[PAD]": 67}