dataprizma
/

whisper-large-v3-turbo

@@ -1,15 +1,30 @@
 ---
 language:
-- hi
-license: apache-2.0
-base_model: openai/whisper-small
 tags:
 - generated_from_trainer
 datasets:
 - mozilla-foundation/common_voice_16_1
 model-index:
 - name: Whisper Large v3 Turbo - Bahriddin Muminov
-  results: []
 ---
 <!-- This model card has been generated automatically according to the information the Trainer had access to. You
@@ -17,7 +32,10 @@ should probably proofread and complete it, then remove this comment. -->
 # Whisper Large v3 Turbo - Bahriddin Muminov
-This model is a fine-tuned version of [openai/whisper-small](https://huggingface.co/openai/whisper-small) on the Common Voice 16.1 dataset.
 ## Model description
@@ -42,10 +60,21 @@ The following hyperparameters were used during training:
 - seed: 42
 - optimizer: Adam with betas=(0.9,0.999) and epsilon=1e-08
 - lr_scheduler_type: linear
-- lr_scheduler_warmup_steps: 500
-- training_steps: 4000
 - mixed_precision_training: Native AMP
 ### Framework versions
 - Transformers 4.37.2

 ---
 language:
+- uz
+license: mit
+base_model: openai/whisper-large-v3-turbo
 tags:
 - generated_from_trainer
 datasets:
 - mozilla-foundation/common_voice_16_1
+metrics:
+- wer
 model-index:
 - name: Whisper Large v3 Turbo - Bahriddin Muminov
+  results:
+  - task:
+      name: Automatic Speech Recognition
+      type: automatic-speech-recognition
+    dataset:
+      name: Common Voice 16.1
+      type: mozilla-foundation/common_voice_16_1
+      config: uz
+      split: test
+      args: 'config: uz, split: test'
+    metrics:
+    - name: Wer
+      type: wer
+      value: 28.258182136033867
 ---
 <!-- This model card has been generated automatically according to the information the Trainer had access to. You
 # Whisper Large v3 Turbo - Bahriddin Muminov
+This model is a fine-tuned version of [openai/whisper-large-v3-turbo](https://huggingface.co/openai/whisper-large-v3-turbo) on the Common Voice 16.1 dataset.
+It achieves the following results on the evaluation set:
+- Loss: 0.2958
+- Wer: 28.2582
 ## Model description
 - seed: 42
 - optimizer: Adam with betas=(0.9,0.999) and epsilon=1e-08
 - lr_scheduler_type: linear
+- lr_scheduler_warmup_steps: 1000
+- training_steps: 10000
 - mixed_precision_training: Native AMP
+### Training results
+| Training Loss | Epoch | Step  | Validation Loss | Wer     |
+|:-------------:|:-----:|:-----:|:---------------:|:-------:|
+| 0.429         | 0.66  | 2000  | 0.4073          | 38.0018 |
+| 0.2671        | 1.32  | 4000  | 0.3378          | 31.0778 |
+| 0.2511        | 1.98  | 6000  | 0.3102          | 29.2484 |
+| 0.1539        | 2.64  | 8000  | 0.3022          | 30.0763 |
+| 0.111         | 3.3   | 10000 | 0.2958          | 28.2582 |
 ### Framework versions
 - Transformers 4.37.2

generation_config.json CHANGED Viewed

@@ -32,6 +32,20 @@
   "bos_token_id": 50257,
   "decoder_start_token_id": 50258,
   "eos_token_id": 50257,
   "is_multilingual": true,
   "lang_to_id": {
     "<|af|>": 50327,
@@ -135,7 +149,6 @@
     "<|yue|>": 50358,
     "<|zh|>": 50260
   },
-  "language": "uzbek",
   "max_initial_timestamp_index": 50,
   "max_length": 448,
   "no_timestamps_token_id": 50364,
@@ -232,7 +245,6 @@
     50362,
     50363
   ],
-  "task": "transcribe",
   "task_to_id": {
     "transcribe": 50360,
     "translate": 50359

   "bos_token_id": 50257,
   "decoder_start_token_id": 50258,
   "eos_token_id": 50257,
+  "forced_decoder_ids": [
+    [
+      1,
+      50337
+    ],
+    [
+      2,
+      50360
+    ],
+    [
+      3,
+      50364
+    ]
+  ],
   "is_multilingual": true,
   "lang_to_id": {
     "<|af|>": 50327,
     "<|yue|>": 50358,
     "<|zh|>": 50260
   },
   "max_initial_timestamp_index": 50,
   "max_length": 448,
   "no_timestamps_token_id": 50364,
     50362,
     50363
   ],
   "task_to_id": {
     "transcribe": 50360,
     "translate": 50359