ndeclarke
/

wav2vec2-mms-1b-CV17.0

@@ -1,11 +1,11 @@
 ---
 library_name: transformers
 license: cc-by-nc-4.0
-base_model: facebook/mms-1b-all
 tags:
 - generated_from_trainer
-datasets:
-- common_voice_17_0
 model-index:
 - name: wav2vec2-mms-1b-CV17.0
   results: []

 ---
+base_model: facebook/mms-1b-all
+datasets:
+- common_voice_17_0
 library_name: transformers
 license: cc-by-nc-4.0
 tags:
 - generated_from_trainer
 model-index:
 - name: wav2vec2-mms-1b-CV17.0
   results: []

added_tokens.json CHANGED Viewed

@@ -1,4 +1,4 @@
 {
-  "</s>": 54,
-  "<s>": 53
 }

 {
+  "</s>": 55,
+  "<s>": 54
 }

tokenizer_config.json CHANGED Viewed

@@ -1,6 +1,6 @@
 {
   "added_tokens_decoder": {
-    "51": {
       "content": "[UNK]",
       "lstrip": true,
       "normalized": false,
@@ -8,7 +8,7 @@
       "single_word": false,
       "special": false
     },
-    "52": {
       "content": "[PAD]",
       "lstrip": true,
       "normalized": false,
@@ -16,7 +16,7 @@
       "single_word": false,
       "special": false
     },
-    "53": {
       "content": "<s>",
       "lstrip": false,
       "normalized": false,
@@ -24,7 +24,7 @@
       "single_word": false,
       "special": true
     },
-    "54": {
       "content": "</s>",
       "lstrip": false,
       "normalized": false,
@@ -39,9 +39,8 @@
   "eos_token": "</s>",
   "model_max_length": 1000000000000000019884624838656,
   "pad_token": "[PAD]",
-  "processor_class": "Wav2Vec2Processor",
   "replace_word_delimiter_char": " ",
-  "target_lang": "tam",
   "tokenizer_class": "Wav2Vec2CTCTokenizer",
   "unk_token": "[UNK]",
   "word_delimiter_token": "|"

 {
   "added_tokens_decoder": {
+    "52": {
       "content": "[UNK]",
       "lstrip": true,
       "normalized": false,
       "single_word": false,
       "special": false
     },
+    "53": {
       "content": "[PAD]",
       "lstrip": true,
       "normalized": false,
       "single_word": false,
       "special": false
     },
+    "54": {
       "content": "<s>",
       "lstrip": false,
       "normalized": false,
       "single_word": false,
       "special": true
     },
+    "55": {
       "content": "</s>",
       "lstrip": false,
       "normalized": false,
   "eos_token": "</s>",
   "model_max_length": 1000000000000000019884624838656,
   "pad_token": "[PAD]",
   "replace_word_delimiter_char": " ",
+  "target_lang": "tel",
   "tokenizer_class": "Wav2Vec2CTCTokenizer",
   "unk_token": "[UNK]",
   "word_delimiter_token": "|"

vocab.json CHANGED Viewed

@@ -128,5 +128,61 @@
     "’": 48,
     "●": 49,
     "◯": 50
   }
 }

     "’": 48,
     "●": 49,
     "◯": 50
+  },
+  "tel": {
+    "[PAD]": 53,
+    "[UNK]": 52,
+    "|": 0,
+    "ం": 1,
+    "అ": 2,
+    "ఆ": 3,
+    "ఇ": 4,
+    "ఈ": 5,
+    "ఉ": 6,
+    "ఊ": 7,
+    "ఎ": 8,
+    "ఏ": 9,
+    "ఒ": 10,
+    "క": 11,
+    "ఖ": 12,
+    "గ": 13,
+    "ఘ": 14,
+    "చ": 15,
+    "జ": 16,
+    "ట": 17,
+    "డ": 18,
+    "ణ": 19,
+    "త": 20,
+    "థ": 21,
+    "ద": 22,
+    "ధ": 23,
+    "న": 24,
+    "ప": 25,
+    "ఫ": 26,
+    "బ": 27,
+    "భ": 28,
+    "మ": 29,
+    "య": 30,
+    "ర": 31,
+    "ల": 32,
+    "ళ": 33,
+    "వ": 34,
+    "శ": 35,
+    "ష": 36,
+    "స": 37,
+    "హ": 38,
+    "ా": 39,
+    "ి": 40,
+    "ీ": 41,
+    "ు": 42,
+    "ూ": 43,
+    "ృ": 44,
+    "ె": 45,
+    "ే": 46,
+    "ై": 47,
+    "ొ": 48,
+    "ో": 49,
+    "ౌ": 50,
+    "్": 51
   }
 }