Umut commited on
Commit
7e8304e
1 Parent(s): 496cf5f

add tokenizer

Browse files
Files changed (3) hide show
  1. special_tokens_map.json +1 -0
  2. tokenizer_config.json +1 -0
  3. vocab.json +1 -0
special_tokens_map.json ADDED
@@ -0,0 +1 @@
 
 
1
+ {"bos_token": "<s>", "eos_token": "</s>", "unk_token": "[UNK]", "pad_token": "[PAD]"}
tokenizer_config.json ADDED
@@ -0,0 +1 @@
 
 
1
+ {"unk_token": "[UNK]", "bos_token": "<s>", "eos_token": "</s>", "pad_token": "[PAD]", "do_lower_case": false, "word_delimiter_token": "|", "tokenizer_class": "Wav2Vec2CTCTokenizer"}
vocab.json ADDED
@@ -0,0 +1 @@
 
 
1
+ {"l": 0, "u": 1, "2": 3, "t": 4, "ć": 5, "'": 6, "1": 7, "p": 8, "9": 9, "_": 10, "k": 11, ")": 12, "ö": 13, "v": 14, "ü": 15, "x": 16, "ş": 17, "4": 18, "7": 19, "b": 20, "m": 21, "û": 22, "â": 23, "ă": 24, "g": 25, "r": 26, "s": 27, "f": 28, "̇": 29, "d": 30, "0": 31, "q": 32, "c": 33, "\n": 34, "o": 35, "5": 36, "a": 37, "ç": 38, "ğ": 39, "j": 40, "ı": 41, "i": 42, "ž": 43, "8": 44, "w": 45, "z": 46, "î": 47, "e": 48, "n": 49, "6": 50, "/": 51, "3": 52, "š": 53, "’": 54, "h": 55, "(": 56, "y": 57, "|": 2, "[UNK]": 58, "[PAD]": 59}