kingabzpro
commited on
Commit
•
0f7f9d3
1
Parent(s):
2f3bb8f
add tokenizer
Browse files
added_tokens.json
ADDED
@@ -0,0 +1 @@
|
|
|
|
|
1 |
+
{"<s>": 63, "</s>": 64}
|
runs/Jan21_21-07-11_e132e839b4f5/events.out.tfevents.1642799269.e132e839b4f5.72.0
CHANGED
@@ -1,3 +1,3 @@
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
-
oid sha256:
|
3 |
-
size
|
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:c96384b3a2f571bc90dc55626f5092b6e62e4f83c0371fe2beab17b1f2a1f907
|
3 |
+
size 5991
|
special_tokens_map.json
CHANGED
@@ -1 +1 @@
|
|
1 |
-
{"bos_token": "<s>", "eos_token": "</s>", "unk_token": "
|
|
|
1 |
+
{"bos_token": "<s>", "eos_token": "</s>", "unk_token": "[UNK]", "pad_token": "[PAD]", "additional_special_tokens": [{"content": "<s>", "single_word": false, "lstrip": false, "rstrip": false, "normalized": true}, {"content": "</s>", "single_word": false, "lstrip": false, "rstrip": false, "normalized": true}]}
|
tokenizer_config.json
CHANGED
@@ -1 +1 @@
|
|
1 |
-
{"unk_token": "
|
|
|
1 |
+
{"unk_token": "[UNK]", "bos_token": "<s>", "eos_token": "</s>", "pad_token": "[PAD]", "do_lower_case": false, "word_delimiter_token": "|", "special_tokens_map_file": "/root/.cache/huggingface/transformers/02c153bf53bcd0a6d3bc7f4a41f5448df74644b94a7137e6471c3cd75a57452f.a21d51735cf8667bcd610f057e88548d5d6a381401f6b4501a8bc6c1a9dc8498", "tokenizer_file": null, "name_or_path": "manandey/wav2vec2-large-xlsr-punjabi", "tokenizer_class": "Wav2Vec2CTCTokenizer"}
|
vocab.json
CHANGED
@@ -1 +1 @@
|
|
1 |
-
{"
|
|
|
1 |
+
{"ਜ": 0, "ਖ": 1, "ਝ": 2, "ਗ਼": 3, "ੁ": 4, "ਰ": 5, "ਸ": 6, "ਢ": 7, "ਦ": 8, "ੋ": 9, "ਠ": 10, "ਣ": 11, "ਸ਼": 12, "ਊ": 13, "ੌ": 14, "ਹ": 15, "ਓ": 16, "ਆ": 17, "ਇ": 18, "ੰ": 19, "ਾ": 20, "ਘ": 21, "ਕ": 23, "ੀ": 24, "ਤ": 25, "ਐ": 26, "ਜ਼": 27, "ਡ": 28, "ਈ": 29, "ਚ": 30, "ਛ": 31, "ਖ਼": 32, "ਧ": 33, "ੜ": 34, "ੱ": 35, "ਥ": 36, "ਫ਼": 37, "ਬ": 38, "ਂ": 39, "ਟ": 40, "ਭ": 41, "ਨ": 42, "ੂ": 43, "ਮ": 44, "੍": 45, "ਪ": 46, "ਵ": 47, "ਉ": 48, "ਫ": 49, "ਏ": 50, "ਔ": 51, "ੇ": 52, "ਲ਼": 53, "਼": 54, "ਯ": 55, "ਗ": 56, "ਅ": 57, "ਿ": 58, "ੈ": 59, "ਲ": 60, "|": 22, "[UNK]": 61, "[PAD]": 62}
|