anuragshas commited on
Commit
50e2ea6
1 Parent(s): aa975ff

add tokenizer

Browse files
added_tokens.json ADDED
@@ -0,0 +1 @@
 
 
1
+ {"<s>": 63, "</s>": 64}
special_tokens_map.json ADDED
@@ -0,0 +1 @@
 
 
1
+ {"bos_token": "<s>", "eos_token": "</s>", "unk_token": "[UNK]", "pad_token": "[PAD]", "additional_special_tokens": [{"content": "<s>", "single_word": false, "lstrip": false, "rstrip": false, "normalized": true}, {"content": "</s>", "single_word": false, "lstrip": false, "rstrip": false, "normalized": true}]}
tokenizer_config.json ADDED
@@ -0,0 +1 @@
 
 
1
+ {"unk_token": "[UNK]", "bos_token": "<s>", "eos_token": "</s>", "pad_token": "[PAD]", "do_lower_case": false, "word_delimiter_token": "|", "special_tokens_map_file": null, "tokenizer_file": null, "name_or_path": "./", "tokenizer_class": "Wav2Vec2CTCTokenizer"}
vocab.json ADDED
@@ -0,0 +1 @@
 
 
1
+ {"ਂ": 1, "ਅ": 2, "ਆ": 3, "ਇ": 4, "ਈ": 5, "ਉ": 6, "ਊ": 7, "ਏ": 8, "ਐ": 9, "ਓ": 10, "ਔ": 11, "ਕ": 12, "ਖ": 13, "ਗ": 14, "ਘ": 15, "ਚ": 16, "ਛ": 17, "ਜ": 18, "ਝ": 19, "ਟ": 20, "ਠ": 21, "ਡ": 22, "ਢ": 23, "ਣ": 24, "ਤ": 25, "ਥ": 26, "ਦ": 27, "ਧ": 28, "ਨ": 29, "ਪ": 30, "ਫ": 31, "ਬ": 32, "ਭ": 33, "ਮ": 34, "ਯ": 35, "ਰ": 36, "ਲ": 37, "ਲ਼": 38, "ਵ": 39, "ਸ਼": 40, "ਸ": 41, "ਹ": 42, "਼": 43, "ਾ": 44, "ਿ": 45, "ੀ": 46, "ੁ": 47, "ੂ": 48, "ੇ": 49, "ੈ": 50, "ੋ": 51, "ੌ": 52, "੍": 53, "ਖ਼": 54, "ਗ਼": 55, "ਜ਼": 56, "ੜ": 57, "ਫ਼": 58, "ੰ": 59, "ੱ": 60, "|": 0, "[UNK]": 61, "[PAD]": 62}