lilitket commited on
Commit
a1e6d92
1 Parent(s): a36bef9

add tokenizer

Browse files
added_tokens.json ADDED
@@ -0,0 +1 @@
 
 
1
+ {"<s>": 88, "</s>": 89}
special_tokens_map.json ADDED
@@ -0,0 +1 @@
 
 
1
+ {"bos_token": "<s>", "eos_token": "</s>", "unk_token": "[UNK]", "pad_token": "[PAD]", "additional_special_tokens": [{"content": "<s>", "single_word": false, "lstrip": false, "rstrip": false, "normalized": true}, {"content": "</s>", "single_word": false, "lstrip": false, "rstrip": false, "normalized": true}]}
tokenizer_config.json ADDED
@@ -0,0 +1 @@
 
 
1
+ {"unk_token": "[UNK]", "bos_token": "<s>", "eos_token": "</s>", "pad_token": "[PAD]", "do_lower_case": false, "word_delimiter_token": "|", "replace_word_delimiter_char": " ", "special_tokens_map_file": null, "name_or_path": "./", "tokenizer_class": "Wav2Vec2CTCTokenizer"}
vocab.json ADDED
@@ -0,0 +1 @@
 
 
1
+ {"(": 1, ")": 2, ",": 3, ".": 4, "Ա": 5, "Բ": 6, "Գ": 7, "Դ": 8, "Ե": 9, "Զ": 10, "Է": 11, "Ը": 12, "Թ": 13, "Ժ": 14, "Ի": 15, "Լ": 16, "Խ": 17, "Ծ": 18, "Կ": 19, "Հ": 20, "Ձ": 21, "Ղ": 22, "Ճ": 23, "Մ": 24, "Յ": 25, "Ն": 26, "Շ": 27, "Ո": 28, "Չ": 29, "Պ": 30, "Ջ": 31, "Ռ": 32, "Ս": 33, "Վ": 34, "Տ": 35, "Ց": 36, "Ւ": 37, "Փ": 38, "Ք": 39, "Օ": 40, "Ֆ": 41, "՛": 42, "՝": 43, "՞": 44, "ա": 45, "բ": 46, "գ": 47, "դ": 48, "ե": 49, "զ": 50, "է": 51, "ը": 52, "թ": 53, "ժ": 54, "ի": 55, "լ": 56, "խ": 57, "ծ": 58, "կ": 59, "հ": 60, "ձ": 61, "ղ": 62, "ճ": 63, "մ": 64, "յ": 65, "ն": 66, "շ": 67, "ո": 68, "չ": 69, "պ": 70, "ջ": 71, "ռ": 72, "ս": 73, "վ": 74, "տ": 75, "ր": 76, "ց": 77, "ւ": 78, "փ": 79, "ք": 80, "օ": 81, "ֆ": 82, "և": 83, "։": 84, "֊": 85, "|": 0, "[UNK]": 86, "[PAD]": 87}