lilitket commited on
Commit
ecc849a
1 Parent(s): ea5669b

add tokenizer

Browse files
added_tokens.json ADDED
@@ -0,0 +1 @@
 
 
1
+ {"<s>": 89, "</s>": 90}
special_tokens_map.json ADDED
@@ -0,0 +1 @@
 
 
1
+ {"bos_token": "<s>", "eos_token": "</s>", "unk_token": "[UNK]", "pad_token": "[PAD]", "additional_special_tokens": [{"content": "<s>", "single_word": false, "lstrip": false, "rstrip": false, "normalized": true}, {"content": "</s>", "single_word": false, "lstrip": false, "rstrip": false, "normalized": true}]}
tokenizer_config.json ADDED
@@ -0,0 +1 @@
 
 
1
+ {"unk_token": "[UNK]", "bos_token": "<s>", "eos_token": "</s>", "pad_token": "[PAD]", "do_lower_case": false, "word_delimiter_token": "|", "replace_word_delimiter_char": " ", "special_tokens_map_file": null, "name_or_path": "./", "tokenizer_class": "Wav2Vec2CTCTokenizer"}
vocab.json ADDED
@@ -0,0 +1 @@
 
 
1
+ {"(": 1, ")": 2, ",": 3, ".": 4, "Ա": 5, "Բ": 6, "Գ": 7, "Դ": 8, "Ե": 9, "Զ": 10, "Է": 11, "Ը": 12, "Թ": 13, "Ժ": 14, "Ի": 15, "Լ": 16, "Խ": 17, "Ծ": 18, "Կ": 19, "Հ": 20, "Ձ": 21, "Ղ": 22, "Ճ": 23, "Մ": 24, "Յ": 25, "Ն": 26, "Շ": 27, "Ո": 28, "Չ": 29, "Պ": 30, "Ջ": 31, "Ռ": 32, "Ս": 33, "Վ": 34, "Տ": 35, "Ց": 36, "Ւ": 37, "Փ": 38, "Ք": 39, "Օ": 40, "Ֆ": 41, "՚": 42, "՛": 43, "՝": 44, "՞": 45, "ա": 46, "բ": 47, "գ": 48, "դ": 49, "ե": 50, "զ": 51, "է": 52, "ը": 53, "թ": 54, "ժ": 55, "ի": 56, "լ": 57, "խ": 58, "ծ": 59, "կ": 60, "հ": 61, "ձ": 62, "ղ": 63, "ճ": 64, "մ": 65, "յ": 66, "ն": 67, "շ": 68, "ո": 69, "չ": 70, "պ": 71, "ջ": 72, "ռ": 73, "ս": 74, "վ": 75, "տ": 76, "ր": 77, "ց": 78, "ւ": 79, "փ": 80, "ք": 81, "օ": 82, "ֆ": 83, "և": 84, "։": 85, "֊": 86, "|": 0, "[UNK]": 87, "[PAD]": 88}