eonrad
/

whisper-small-dv

@@ -1,41 +1,43 @@
 ---
 library_name: transformers
 license: apache-2.0
-base_model: openai/whisper-tiny
 tags:
 - generated_from_trainer
 datasets:
-- PolyAI/minds14
 metrics:
 - wer
 model-index:
-- name: whisper_tiny-finetuned-minds14
   results:
   - task:
       name: Automatic Speech Recognition
       type: automatic-speech-recognition
     dataset:
-      name: PolyAI/minds14
-      type: PolyAI/minds14
-      config: en-US
-      split: train
-      args: en-US
     metrics:
     - name: Wer
       type: wer
-      value: 0.32585596221959856
 ---
 <!-- This model card has been generated automatically according to the information the Trainer had access to. You
 should probably proofread and complete it, then remove this comment. -->
-# whisper_tiny-finetuned-minds14
-This model is a fine-tuned version of [openai/whisper-tiny](https://huggingface.co/openai/whisper-tiny) on the PolyAI/minds14 dataset.
 It achieves the following results on the evaluation set:
-- Loss: 0.7246
-- Wer Ortho: 0.3233
-- Wer: 0.3259
 ## Model description
@@ -68,9 +70,9 @@ The following hyperparameters were used during training:
 ### Training results
-| Training Loss | Epoch   | Step | Validation Loss | Wer Ortho | Wer    |
-|:-------------:|:-------:|:----:|:---------------:|:---------:|:------:|
-| 0.0007        | 17.8571 | 500  | 0.7246          | 0.3233    | 0.3259 |
 ### Framework versions

 ---
 library_name: transformers
+language:
+- dv
 license: apache-2.0
+base_model: openai/whisper-small
 tags:
 - generated_from_trainer
 datasets:
+- mozilla-foundation/common_voice_13_0
 metrics:
 - wer
 model-index:
+- name: Whisper Small Dv - Sanchit Gandhi
   results:
   - task:
       name: Automatic Speech Recognition
       type: automatic-speech-recognition
     dataset:
+      name: Common Voice 13
+      type: mozilla-foundation/common_voice_13_0
+      config: dv
+      split: test
+      args: dv
     metrics:
     - name: Wer
       type: wer
+      value: 13.504538025524221
 ---
 <!-- This model card has been generated automatically according to the information the Trainer had access to. You
 should probably proofread and complete it, then remove this comment. -->
+# Whisper Small Dv - Sanchit Gandhi
+This model is a fine-tuned version of [openai/whisper-small](https://huggingface.co/openai/whisper-small) on the Common Voice 13 dataset.
 It achieves the following results on the evaluation set:
+- Loss: 0.1714
+- Wer Ortho: 62.7829
+- Wer: 13.5045
 ## Model description
 ### Training results
+| Training Loss | Epoch  | Step | Validation Loss | Wer Ortho | Wer     |
+|:-------------:|:------:|:----:|:---------------:|:---------:|:-------:|
+| 0.2436        | 1.6313 | 500  | 0.1714          | 62.7829   | 13.5045 |
 ### Framework versions

config.json CHANGED Viewed

@@ -1,5 +1,5 @@
 {
-  "_name_or_path": "openai/whisper-tiny",
   "activation_dropout": 0.0,
   "activation_function": "gelu",
   "apply_spec_augment": false,
@@ -10,17 +10,17 @@
   "begin_suppress_tokens": null,
   "bos_token_id": 50257,
   "classifier_proj_size": 256,
-  "d_model": 384,
-  "decoder_attention_heads": 6,
-  "decoder_ffn_dim": 1536,
   "decoder_layerdrop": 0.0,
-  "decoder_layers": 4,
   "decoder_start_token_id": 50258,
   "dropout": 0.0,
-  "encoder_attention_heads": 6,
-  "encoder_ffn_dim": 1536,
   "encoder_layerdrop": 0.0,
-  "encoder_layers": 4,
   "eos_token_id": 50257,
   "forced_decoder_ids": [
     [
@@ -49,7 +49,7 @@
   "max_target_positions": 448,
   "median_filter_width": 7,
   "model_type": "whisper",
-  "num_hidden_layers": 4,
   "num_mel_bins": 80,
   "pad_token_id": 50257,
   "scale_embedding": false,

 {
+  "_name_or_path": "openai/whisper-small",
   "activation_dropout": 0.0,
   "activation_function": "gelu",
   "apply_spec_augment": false,
   "begin_suppress_tokens": null,
   "bos_token_id": 50257,
   "classifier_proj_size": 256,
+  "d_model": 768,
+  "decoder_attention_heads": 12,
+  "decoder_ffn_dim": 3072,
   "decoder_layerdrop": 0.0,
+  "decoder_layers": 12,
   "decoder_start_token_id": 50258,
   "dropout": 0.0,
+  "encoder_attention_heads": 12,
+  "encoder_ffn_dim": 3072,
   "encoder_layerdrop": 0.0,
+  "encoder_layers": 12,
   "eos_token_id": 50257,
   "forced_decoder_ids": [
     [
   "max_target_positions": 448,
   "median_filter_width": 7,
   "model_type": "whisper",
+  "num_hidden_layers": 12,
   "num_mel_bins": 80,
   "pad_token_id": 50257,
   "scale_embedding": false,

generation_config.json CHANGED Viewed

@@ -1,27 +1,43 @@
 {
   "alignment_heads": [
     [
-      2,
-      2
     ],
     [
-      3,
       0
     ],
     [
-      3,
-      2
     ],
     [
-      3,
-      3
     ],
     [
-      3,
-      4
     ],
     [
-      3,
       5
     ]
   ],
@@ -234,8 +250,6 @@
     49870,
     50254,
     50258,
-    50358,
-    50359,
     50360,
     50361,
     50362

 {
   "alignment_heads": [
     [
+      5,
+      3
+    ],
+    [
+      5,
+      9
     ],
     [
+      8,
       0
     ],
     [
+      8,
+      4
     ],
     [
+      8,
+      7
     ],
     [
+      8,
+      8
+    ],
+    [
+      9,
+      0
+    ],
+    [
+      9,
+      7
+    ],
+    [
+      9,
+      9
     ],
     [
+      10,
       5
     ]
   ],
     49870,
     50254,
     50258,
     50360,
     50361,
     50362

model.safetensors CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:220568787aa0e82b780bef3788f299f67af413baa8f39388f218c318e0a2dd67
-size 151061672

 version https://git-lfs.github.com/spec/v1
+oid sha256:c98848fe9f966d34d6861fa5a9c9086d76812d64c473a8f08cf91283aeea8d65
+size 966995080

training_args.bin CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:30942822f9c0c09daf08569ae0eb85413a6cb2edc01296508247abf1ed8bbb6f
 size 5432

 version https://git-lfs.github.com/spec/v1
+oid sha256:40b883be428f81fa724a6591b81844c37352a5c0c9f99a087b8380fa59330d1a
 size 5432