arampacha commited on
Commit
650f83d
1 Parent(s): 010ba88

add tokenizer

Browse files
added_tokens.json ADDED
@@ -0,0 +1 @@
 
 
1
+ {"<s>": 42, "</s>": 43}
special_tokens_map.json ADDED
@@ -0,0 +1 @@
 
 
1
+ {"bos_token": "<s>", "eos_token": "</s>", "unk_token": "[UNK]", "pad_token": "[PAD]", "additional_special_tokens": [{"content": "<s>", "single_word": false, "lstrip": false, "rstrip": false, "normalized": true}, {"content": "</s>", "single_word": false, "lstrip": false, "rstrip": false, "normalized": true}]}
tokenizer_config.json ADDED
@@ -0,0 +1 @@
 
 
1
+ {"unk_token": "[UNK]", "bos_token": "<s>", "eos_token": "</s>", "pad_token": "[PAD]", "do_lower_case": false, "word_delimiter_token": "|", "special_tokens_map_file": null, "tokenizer_file": null, "name_or_path": "./", "tokenizer_class": "Wav2Vec2CTCTokenizer"}
vocab.json ADDED
@@ -0,0 +1 @@
 
 
1
+ {"ա": 1, "բ": 2, "գ": 3, "դ": 4, "ե": 5, "զ": 6, "է": 7, "ը": 8, "թ": 9, "ժ": 10, "ի": 11, "լ": 12, "խ": 13, "ծ": 14, "կ": 15, "հ": 16, "ձ": 17, "ղ": 18, "ճ": 19, "մ": 20, "յ": 21, "ն": 22, "շ": 23, "ո": 24, "չ": 25, "պ": 26, "ջ": 27, "ռ": 28, "ս": 29, "վ": 30, "տ": 31, "ր": 32, "ց": 33, "ւ": 34, "փ": 35, "ք": 36, "օ": 37, "ֆ": 38, "և": 39, "|": 0, "[UNK]": 40, "[PAD]": 41}