hyyoka commited on
Commit
937dcd1
1 Parent(s): fdce186

add tokenizer

Browse files
added_tokens.json ADDED
@@ -0,0 +1 @@
 
1
+ {"<s>": 50, "</s>": 51, "<pad>": 52}
special_tokens_map.json ADDED
@@ -0,0 +1 @@
 
1
+ {"bos_token": "<s>", "eos_token": "</s>", "unk_token": "<unk>", "pad_token": "<pad>", "additional_special_tokens": [{"content": "<s>", "single_word": false, "lstrip": false, "rstrip": false, "normalized": true}, {"content": "</s>", "single_word": false, "lstrip": false, "rstrip": false, "normalized": true}, {"content": "<pad>", "single_word": false, "lstrip": false, "rstrip": false, "normalized": true}]}
tokenizer_config.json ADDED
@@ -0,0 +1 @@
 
1
+ {"unk_token": "<unk>", "bos_token": "<s>", "eos_token": "</s>", "pad_token": "<pad>", "do_lower_case": false, "word_delimiter_token": "|", "pad_token_id": 49, "special_tokens_map_file": null, "tokenizer_file": null, "name_or_path": "fleek/wav2vec-large-xlsr-korean", "tokenizer_class": "Wav2Vec2CTCTokenizer"}
vocab.json ADDED
@@ -0,0 +1 @@
 
1
+ {"ㅞ": 0, "ㅆ": 1, "ㅐ": 2, "ㅋ": 3, "ㅢ": 4, "ㄹ": 5, "ㅓ": 6, "ㄻ": 7, "ㅣ": 8, "ㄼ": 9, "ㅄ": 10, "ㅝ": 11, "ㅇ": 12, "ㅕ": 13, "ㅟ": 14, "ㄱ": 15, "ㅒ": 16, "ㅘ": 17, "ㅛ": 18, "ㄵ": 19, "ㅠ": 20, "ㄶ": 21, "ㅈ": 22, "ㅜ": 23, "ㅀ": 24, "ㅏ": 25, "ㄲ": 26, "ㄷ": 27, "ㅍ": 28, "ㅗ": 29, "ㅎ": 30, "ㅑ": 31, "ㅡ": 32, "ㅃ": 33, "ㅌ": 34, "ㅅ": 35, "ㅙ": 36, "ㅚ": 37, "ㅁ": 38, "ㅖ": 40, "ㅂ": 41, "ㄴ": 42, "ㅉ": 43, "ㄺ": 44, "ㅔ": 45, "ㄸ": 46, "ㅊ": 47, "|": 39, "[UNK]": 48, "[PAD]": 49}