vitouphy commited on
Commit
93a566f
โ€ข
1 Parent(s): 0530d4b

add tokenizer

Browse files
added_tokens.json ADDED
@@ -0,0 +1 @@
 
 
1
+ {"<s>": 73, "</s>": 74}
special_tokens_map.json ADDED
@@ -0,0 +1 @@
 
 
1
+ {"bos_token": "<s>", "eos_token": "</s>", "unk_token": "[UNK]", "pad_token": "[PAD]", "additional_special_tokens": [{"content": "<s>", "single_word": false, "lstrip": false, "rstrip": false, "normalized": true}, {"content": "</s>", "single_word": false, "lstrip": false, "rstrip": false, "normalized": true}]}
tokenizer_config.json ADDED
@@ -0,0 +1 @@
 
 
1
+ {"unk_token": "[UNK]", "bos_token": "<s>", "eos_token": "</s>", "pad_token": "[PAD]", "do_lower_case": false, "word_delimiter_token": "|", "special_tokens_map_file": null, "tokenizer_file": null, "name_or_path": "./", "tokenizer_class": "Wav2Vec2CTCTokenizer"}
vocab.json CHANGED
@@ -1 +1 @@
1
- {"A": 1, "B": 2, "C": 3, "D": 4, "E": 5, "F": 6, "G": 7, "H": 8, "I": 9, "J": 10, "K": 11, "L": 12, "M": 13, "N": 14, "O": 15, "P": 16, "Q": 17, "R": 18, "S": 19, "T": 20, "U": 21, "V": 22, "W": 23, "X": 24, "Y": 25, "Z": 26, "a": 27, "b": 28, "c": 29, "d": 30, "e": 31, "f": 32, "g": 33, "h": 34, "i": 35, "j": 36, "k": 37, "l": 38, "m": 39, "n": 40, "o": 41, "p": 42, "r": 43, "s": 44, "t": 45, "u": 46, "v": 47, "w": 48, "x": 49, "y": 50, "z": 51, "\u00c7": 52, "\u00d6": 53, "\u00dc": 54, "\u00e2": 55, "\u00e7": 56, "\u00eb": 57, "\u00ee": 58, "\u00f6": 59, "\u00fc": 60, "\u011f": 61, "\u0130": 62, "\u0131": 63, "\u015e": 64, "\u015f": 65, "|": 0, "[UNK]": 67, "[PAD]": 68}
 
1
+ {"แž€": 1, "แž": 2, "แž‚": 3, "แžƒ": 4, "แž„": 5, "แž…": 6, "แž†": 7, "แž‡": 8, "แžˆ": 9, "แž‰": 10, "แžŠ": 11, "แž‹": 12, "แžŒ": 13, "แž": 14, "แžŽ": 15, "แž": 16, "แž": 17, "แž‘": 18, "แž’": 19, "แž“": 20, "แž”": 21, "แž•": 22, "แž–": 23, "แž—": 24, "แž˜": 25, "แž™": 26, "แžš": 27, "แž›": 28, "แžœ": 29, "แžŸ": 30, "แž ": 31, "แžก": 32, "แžข": 33, "แžฅ": 34, "แžง": 35, "แžช": 36, "แžซ": 37, "แžฌ": 38, "แžญ": 39, "แžฎ": 40, "แžฏ": 41, "แžฑ": 42, "แžถ": 43, "แžท": 44, "แžธ": 45, "แžน": 46, "แžบ": 47, "แžป": 48, "แžผ": 49, "แžฝ": 50, "แžพ": 51, "แžฟ": 52, "แŸ€": 53, "แŸ": 54, "แŸ‚": 55, "แŸƒ": 56, "แŸ„": 57, "แŸ…": 58, "แŸ†": 59, "แŸ‡": 60, "แŸˆ": 61, "แŸ‰": 62, "แŸŠ": 63, "แŸ‹": 64, "แŸŒ": 65, "แŸ": 66, "แŸŽ": 67, "แŸ": 68, "แŸ": 69, "แŸ’": 70, "|": 0, "[UNK]": 71, "[PAD]": 72}