project2you commited on
Commit
c24cb8d
1 Parent(s): 92dc02c

add tokenizer

Browse files
added_tokens.json ADDED
@@ -0,0 +1 @@
 
 
1
+ {"<s>": 71, "</s>": 72}
special_tokens_map.json ADDED
@@ -0,0 +1 @@
 
 
1
+ {"bos_token": "<s>", "eos_token": "</s>", "unk_token": "[UNK]", "pad_token": "[PAD]", "additional_special_tokens": [{"content": "<s>", "single_word": false, "lstrip": false, "rstrip": false, "normalized": true}, {"content": "</s>", "single_word": false, "lstrip": false, "rstrip": false, "normalized": true}]}
tokenizer_config.json ADDED
@@ -0,0 +1 @@
 
 
1
+ {"unk_token": "[UNK]", "bos_token": "<s>", "eos_token": "</s>", "pad_token": "[PAD]", "do_lower_case": false, "word_delimiter_token": "|", "special_tokens_map_file": null, "tokenizer_file": null, "name_or_path": "./", "tokenizer_class": "Wav2Vec2CTCTokenizer"}
vocab.json ADDED
@@ -0,0 +1 @@
 
 
1
+ {"๋": 0, "่": 1, "ิ": 2, "ง": 3, "ก": 4, "ท": 5, "ศ": 6, "’": 7, "ฤ": 8, "ร": 9, "ซ": 10, "ฒ": 11, "ด": 12, "ธ": 13, "ฏ": 14, "ั": 15, "ณ": 16, "ฟ": 17, "ห": 18, "ฉ": 19, "ำ": 20, "โ": 21, "ภ": 22, "ม": 23, "ฑ": 24, "ถ": 25, "ใ": 26, "ึ": 27, "ะ": 28, "ุ": 29, "เ": 30, "ฝ": 31, "ี": 32, "ํ": 33, "ช": 34, "ว": 35, "ฬ": 36, "ป": 37, "แ": 38, "ฮ": 39, "จ": 40, "ู": 41, "ๅ": 42, "ผ": 43, "ญ": 44, "อ": 45, "๊": 46, "'": 47, "บ": 48, "็": 49, "ื": 50, "ไ": 51, "ฐ": 52, "์": 53, "ๆ": 54, "ฎ": 55, "ล": 56, "า": 57, "ฆ": 59, "ค": 60, "น": 61, "พ": 62, "ษ": 63, "ส": 64, "ย": 65, "ต": 66, "ข": 67, "้": 68, "|": 58, "[UNK]": 69, "[PAD]": 70}