fsicoli commited on
Commit
38bbb8d
1 Parent(s): 742a8a7

Upload 17 files

Browse files
Files changed (3) hide show
  1. README.md +18 -19
  2. all_results.json +5 -5
  3. eval_results.json +6 -6
README.md CHANGED
@@ -4,36 +4,34 @@ base_model: openai/whisper-medium
4
  tags:
5
  - generated_from_trainer
6
  datasets:
7
- - mozilla-foundation/common_voice_16_1
8
  metrics:
9
  - wer
10
  model-index:
11
- - name: whisper-large-v3-pt-cv16-fleurs
12
  results:
13
  - task:
14
  name: Automatic Speech Recognition
15
  type: automatic-speech-recognition
16
  dataset:
17
- name: mozilla-foundation/common_voice_16_1 pt
18
- type: mozilla-foundation/common_voice_16_1
19
- config: pt
20
- split: test
21
- args: pt
22
  metrics:
23
  - name: Wer
24
  type: wer
25
- value: 0.11905377038591959
26
  ---
27
 
28
  <!-- This model card has been generated automatically according to the information the Trainer had access to. You
29
  should probably proofread and complete it, then remove this comment. -->
30
 
31
- # whisper-large-v3-pt-cv16-fleurs
32
 
33
- This model is a fine-tuned version of [openai/whisper-medium](https://huggingface.co/openai/whisper-medium) on the mozilla-foundation/common_voice_16_1 pt dataset.
34
  It achieves the following results on the evaluation set:
35
- - Loss: 0.1975
36
- - Wer: 0.1191
37
 
38
  ## Model description
39
 
@@ -58,11 +56,12 @@ The following hyperparameters were used during training:
58
  - seed: 42
59
  - distributed_type: multi-GPU
60
  - num_devices: 2
61
- - total_train_batch_size: 2
 
62
  - total_eval_batch_size: 2
63
  - optimizer: Adam with betas=(0.9,0.999) and epsilon=1e-08
64
  - lr_scheduler_type: linear
65
- - lr_scheduler_warmup_steps: 2000
66
  - training_steps: 5000
67
  - mixed_precision_training: Native AMP
68
 
@@ -70,11 +69,11 @@ The following hyperparameters were used during training:
70
 
71
  | Training Loss | Epoch | Step | Validation Loss | Wer |
72
  |:-------------:|:-----:|:----:|:---------------:|:------:|
73
- | 0.2614 | 0.06 | 1000 | 0.2986 | 0.1466 |
74
- | 0.2632 | 0.13 | 2000 | 0.2244 | 0.1316 |
75
- | 0.1694 | 0.19 | 3000 | 0.2086 | 0.1234 |
76
- | 0.1658 | 0.26 | 4000 | 0.1987 | 0.1205 |
77
- | 0.1391 | 0.32 | 5000 | 0.1975 | 0.1191 |
78
 
79
 
80
  ### Framework versions
 
4
  tags:
5
  - generated_from_trainer
6
  datasets:
7
+ - fsicoli/cv16-fleurs
8
  metrics:
9
  - wer
10
  model-index:
11
+ - name: whisper-medium-pt-cv16-fleurs
12
  results:
13
  - task:
14
  name: Automatic Speech Recognition
15
  type: automatic-speech-recognition
16
  dataset:
17
+ name: fsicoli/cv16-fleurs default
18
+ type: fsicoli/cv16-fleurs
19
+ args: default
 
 
20
  metrics:
21
  - name: Wer
22
  type: wer
23
+ value: 0.09421927983206846
24
  ---
25
 
26
  <!-- This model card has been generated automatically according to the information the Trainer had access to. You
27
  should probably proofread and complete it, then remove this comment. -->
28
 
29
+ # whisper-medium-pt-cv16-fleurs
30
 
31
+ This model is a fine-tuned version of [openai/whisper-medium](https://huggingface.co/openai/whisper-medium) on the fsicoli/cv16-fleurs default dataset.
32
  It achieves the following results on the evaluation set:
33
+ - Loss: 0.1409
34
+ - Wer: 0.0942
35
 
36
  ## Model description
37
 
 
56
  - seed: 42
57
  - distributed_type: multi-GPU
58
  - num_devices: 2
59
+ - gradient_accumulation_steps: 16
60
+ - total_train_batch_size: 32
61
  - total_eval_batch_size: 2
62
  - optimizer: Adam with betas=(0.9,0.999) and epsilon=1e-08
63
  - lr_scheduler_type: linear
64
+ - lr_scheduler_warmup_steps: 5000
65
  - training_steps: 5000
66
  - mixed_precision_training: Native AMP
67
 
 
69
 
70
  | Training Loss | Epoch | Step | Validation Loss | Wer |
71
  |:-------------:|:-----:|:----:|:---------------:|:------:|
72
+ | 0.2552 | 0.93 | 1000 | 0.2200 | 0.1220 |
73
+ | 0.1928 | 1.87 | 2000 | 0.1645 | 0.1062 |
74
+ | 0.1646 | 2.8 | 3000 | 0.1508 | 0.1016 |
75
+ | 0.1333 | 3.74 | 4000 | 0.1438 | 0.0970 |
76
+ | 0.1027 | 4.67 | 5000 | 0.1409 | 0.0942 |
77
 
78
 
79
  ### Framework versions
all_results.json CHANGED
@@ -1,11 +1,11 @@
1
  {
2
  "epoch": 4.67,
3
- "eval_loss": 0.19753539562225342,
4
- "eval_runtime": 7876.6978,
5
  "eval_samples": 9414,
6
- "eval_samples_per_second": 1.195,
7
- "eval_steps_per_second": 0.598,
8
- "eval_wer": 0.11905377038591959,
9
  "train_loss": 0.2694411336898804,
10
  "train_runtime": 106369.5753,
11
  "train_samples": 34267,
 
1
  {
2
  "epoch": 4.67,
3
+ "eval_loss": 0.14086556434631348,
4
+ "eval_runtime": 7908.2656,
5
  "eval_samples": 9414,
6
+ "eval_samples_per_second": 1.19,
7
+ "eval_steps_per_second": 0.595,
8
+ "eval_wer": 0.09421927983206846,
9
  "train_loss": 0.2694411336898804,
10
  "train_runtime": 106369.5753,
11
  "train_samples": 34267,
eval_results.json CHANGED
@@ -1,9 +1,9 @@
1
  {
2
- "epoch": 0.32,
3
- "eval_loss": 0.19753539562225342,
4
- "eval_runtime": 7876.6978,
5
  "eval_samples": 9414,
6
- "eval_samples_per_second": 1.195,
7
- "eval_steps_per_second": 0.598,
8
- "eval_wer": 0.11905377038591959
9
  }
 
1
  {
2
+ "epoch": 4.67,
3
+ "eval_loss": 0.14086556434631348,
4
+ "eval_runtime": 7908.2656,
5
  "eval_samples": 9414,
6
+ "eval_samples_per_second": 1.19,
7
+ "eval_steps_per_second": 0.595,
8
+ "eval_wer": 0.09421927983206846
9
  }