KISSz commited on
Commit
0c062d5
1 Parent(s): 4f31956

add tokenizer

Browse files
added_tokens.json ADDED
@@ -0,0 +1 @@
 
 
1
+ {"<s>": 70, "</s>": 71}
special_tokens_map.json ADDED
@@ -0,0 +1 @@
 
 
1
+ {"bos_token": "<s>", "eos_token": "</s>", "unk_token": "[UNK]", "pad_token": "[PAD]", "additional_special_tokens": [{"content": "<s>", "single_word": false, "lstrip": false, "rstrip": false, "normalized": true}, {"content": "</s>", "single_word": false, "lstrip": false, "rstrip": false, "normalized": true}]}
tokenizer_config.json ADDED
@@ -0,0 +1 @@
 
 
1
+ {"unk_token": "[UNK]", "bos_token": "<s>", "eos_token": "</s>", "pad_token": "[PAD]", "do_lower_case": false, "word_delimiter_token": "|", "special_tokens_map_file": "/root/.cache/huggingface/transformers/e2f4de852e6b998b568b7742262188577a6228605e0c618af06f515e567e5345.a21d51735cf8667bcd610f057e88548d5d6a381401f6b4501a8bc6c1a9dc8498", "tokenizer_file": null, "name_or_path": "airesearch/wav2vec2-large-xlsr-53-th", "tokenizer_class": "Wav2Vec2CTCTokenizer"}
vocab.json ADDED
@@ -0,0 +1 @@
 
 
1
+ {"ฑ": 0, "ๅ": 1, "ก": 2, "ง": 3, "ฒ": 4, "ะ": 5, "๊": 6, "้": 7, "ฌ": 8, "ซ": 9, "ด": 10, "ฯ": 11, "ใ": 12, "ึ": 13, "ญ": 14, "่": 15, "า": 16, "ฤ": 17, "๋": 18, "อ": 19, "ฬ": 20, "ท": 21, "โ": 22, "ภ": 23, "ย": 24, "็": 25, "ล": 26, "ุ": 27, "เ": 28, "ฮ": 29, "ฝ": 30, "ป": 31, "ี": 32, "บ": 33, "ฐ": 34, "ต": 35, "ถ": 36, "ศ": 37, "ฟ": 38, "ณ": 39, "ห": 40, "ร": 41, "พ": 43, "ฆ": 44, "ั": 45, "ค": 46, "ว": 47, "ฏ": 48, "จ": 49, "แ": 50, "ม": 51, "ฎ": 52, "ฉ": 53, "์": 54, "ษ": 55, "ำ": 56, "ผ": 57, "ข": 58, "ไ": 59, "ู": 60, "ื": 61, "น": 62, "ช": 63, "ิ": 64, "ธ": 65, "ฃ": 66, "ส": 67, "|": 42, "[UNK]": 68, "[PAD]": 69}