metythorn commited on
Commit
81b76bb
โ€ข
1 Parent(s): 26d94ad

add tokenizer

Browse files
added_tokens.json ADDED
@@ -0,0 +1 @@
 
 
1
+ {"<s>": 73, "</s>": 74}
special_tokens_map.json ADDED
@@ -0,0 +1 @@
 
 
1
+ {"bos_token": "<s>", "eos_token": "</s>", "unk_token": "[UNK]", "pad_token": "[PAD]", "additional_special_tokens": [{"content": "<s>", "single_word": false, "lstrip": false, "rstrip": false, "normalized": true}, {"content": "</s>", "single_word": false, "lstrip": false, "rstrip": false, "normalized": true}]}
tokenizer_config.json ADDED
@@ -0,0 +1 @@
 
 
1
+ {"unk_token": "[UNK]", "bos_token": "<s>", "eos_token": "</s>", "pad_token": "[PAD]", "do_lower_case": false, "word_delimiter_token": "|", "replace_word_delimiter_char": " ", "special_tokens_map_file": null, "name_or_path": "./", "tokenizer_class": "Wav2Vec2CTCTokenizer"}
vocab.json ADDED
@@ -0,0 +1 @@
 
 
1
+ {"แž€": 1, "แž": 2, "แž‚": 3, "แžƒ": 4, "แž„": 5, "แž…": 6, "แž†": 7, "แž‡": 8, "แžˆ": 9, "แž‰": 10, "แžŠ": 11, "แž‹": 12, "แžŒ": 13, "แž": 14, "แžŽ": 15, "แž": 16, "แž": 17, "แž‘": 18, "แž’": 19, "แž“": 20, "แž”": 21, "แž•": 22, "แž–": 23, "แž—": 24, "แž˜": 25, "แž™": 26, "แžš": 27, "แž›": 28, "แžœ": 29, "แžŸ": 30, "แž ": 31, "แžก": 32, "แžข": 33, "แžฅ": 34, "แžง": 35, "แžช": 36, "แžซ": 37, "แžฌ": 38, "แžญ": 39, "แžฎ": 40, "แžฏ": 41, "แžฑ": 42, "แžถ": 43, "แžท": 44, "แžธ": 45, "แžน": 46, "แžบ": 47, "แžป": 48, "แžผ": 49, "แžฝ": 50, "แžพ": 51, "แžฟ": 52, "แŸ€": 53, "แŸ": 54, "แŸ‚": 55, "แŸƒ": 56, "แŸ„": 57, "แŸ…": 58, "แŸ†": 59, "แŸ‡": 60, "แŸˆ": 61, "แŸ‰": 62, "แŸŠ": 63, "แŸ‹": 64, "แŸŒ": 65, "แŸ": 66, "แŸŽ": 67, "แŸ": 68, "แŸ": 69, "แŸ’": 70, "|": 0, "[UNK]": 71, "[PAD]": 72}