|
--- |
|
language: |
|
- fi |
|
license: apache-2.0 |
|
tags: |
|
- whisper-event |
|
- finnish |
|
datasets: |
|
- mozilla-foundation/common_voice_11_0 |
|
- google/fleurs |
|
metrics: |
|
- wer |
|
- cer |
|
model-index: |
|
- name: Whisper Large V3 Finnish |
|
results: |
|
- task: |
|
name: Automatic Speech Recognition |
|
type: automatic-speech-recognition |
|
dataset: |
|
name: Common Voice 11.0 |
|
type: mozilla-foundation/common_voice_11_0 |
|
config: fi |
|
split: test |
|
args: fi |
|
metrics: |
|
- name: Wer |
|
type: wer |
|
value: 8.23 |
|
- name: Cer |
|
type: cer |
|
value: 1.43 |
|
- task: |
|
name: Automatic Speech Recognition |
|
type: automatic-speech-recognition |
|
dataset: |
|
name: FLEURS |
|
type: google/fleurs |
|
config: fi_fi |
|
split: test |
|
args: fi_fi |
|
metrics: |
|
- name: Wer |
|
type: wer |
|
value: 8.21 |
|
- name: Cer |
|
type: cer |
|
value: 3.23 |
|
--- |
|
|
|
<h3>This is our improved Whisper model that is now finetuned from OpenAI Whisper Large V3 <h3> |
|
<p>We improve from our finetuned V2 model:</p> |
|
<p>CV11 WER 10.42 --> 8.23</p> |
|
<p>Fleurs WER 10.20 --> 8.21</p> |
|
|
|
|
|
Original Whisper Large V3 |
|
- CV11 |
|
- WER: 14.81 |
|
- WER NORMALIZED: 10.82 |
|
- CER: 2.7 |
|
- CER NORMALIZED: 2.07 |
|
|
|
- Fleurs |
|
- WER: 12.04 |
|
- WER NORMALIZED: 9.63 |
|
- CER: 2.48 |
|
- CER NORMALIZED: 3.64 |
|
|
|
|
|
After Finetuning V3: |
|
|
|
- @14000 steps |
|
- CV11 |
|
- WER: 11.36 |
|
- WER NORMALIZED: 8.31 |
|
- CER: 1.93 |
|
- CER NORMALIZED: 1.48 |
|
|
|
- Fleurs |
|
- WER: 10.2 |
|
- WER NORMALIZED: 8.56 |
|
- CER: 2.26 |
|
- CER NORMALIZED: 3.54 |
|
|
|
- @32000 steps |
|
- CV11 |
|
- WER: 11.47 |
|
- WER NORMALIZED: 8.23 |
|
- CER: 1.91 |
|
- CER NORMALIZED: 1.43 |
|
|
|
- Fleurs |
|
- WER: 10.1 |
|
- WER NORMALIZED: 8.21 |
|
- CER: 2.2 |
|
- CER NORMALIZED: 3.23 |