not-lain commited on
Commit
4b0b268
1 Parent(s): fcb4b06

End of training

Browse files
Files changed (2) hide show
  1. README.md +17 -14
  2. generation_config.json +7 -1
README.md CHANGED
@@ -1,5 +1,6 @@
1
  ---
2
  license: apache-2.0
 
3
  tags:
4
  - generated_from_trainer
5
  datasets:
@@ -7,22 +8,21 @@ datasets:
7
  metrics:
8
  - wer
9
  model-index:
10
- - name: whisper-tiny-finetuned-minds14
11
  results:
12
  - task:
13
  name: Automatic Speech Recognition
14
  type: automatic-speech-recognition
15
  dataset:
16
- name: minds14
17
  type: PolyAI/minds14
18
  config: en-US
19
- split: train[450:]
20
  args: en-US
21
  metrics:
22
  - name: Wer
23
  type: wer
24
- value: 26.711906
25
- pipeline_tag: automatic-speech-recognition
26
  ---
27
 
28
  <!-- This model card has been generated automatically according to the information the Trainer had access to. You
@@ -32,13 +32,9 @@ should probably proofread and complete it, then remove this comment. -->
32
 
33
  This model is a fine-tuned version of [openai/whisper-tiny](https://huggingface.co/openai/whisper-tiny) on the PolyAI/minds14 dataset.
34
  It achieves the following results on the evaluation set:
35
- - eval_loss: 0.5904
36
- - eval_wer: 26.7119
37
- - eval_runtime: 61.9232
38
- - eval_samples_per_second: 1.825
39
- - eval_steps_per_second: 0.129
40
- - epoch: 17.24
41
- - step: 500
42
 
43
  ## Model description
44
 
@@ -64,12 +60,19 @@ The following hyperparameters were used during training:
64
  - optimizer: Adam with betas=(0.9,0.999) and epsilon=1e-08
65
  - lr_scheduler_type: constant_with_warmup
66
  - lr_scheduler_warmup_steps: 50
67
- - training_steps: 2500
68
  - mixed_precision_training: Native AMP
69
 
 
 
 
 
 
 
 
70
  ### Framework versions
71
 
72
  - Transformers 4.35.2
73
  - Pytorch 2.1.0+cu121
74
  - Datasets 2.16.1
75
- - Tokenizers 0.15.1
 
1
  ---
2
  license: apache-2.0
3
+ base_model: openai/whisper-tiny
4
  tags:
5
  - generated_from_trainer
6
  datasets:
 
8
  metrics:
9
  - wer
10
  model-index:
11
+ - name: whisper-small-dv
12
  results:
13
  - task:
14
  name: Automatic Speech Recognition
15
  type: automatic-speech-recognition
16
  dataset:
17
+ name: PolyAI/minds14
18
  type: PolyAI/minds14
19
  config: en-US
20
+ split: train
21
  args: en-US
22
  metrics:
23
  - name: Wer
24
  type: wer
25
+ value: 27.390791027154666
 
26
  ---
27
 
28
  <!-- This model card has been generated automatically according to the information the Trainer had access to. You
 
32
 
33
  This model is a fine-tuned version of [openai/whisper-tiny](https://huggingface.co/openai/whisper-tiny) on the PolyAI/minds14 dataset.
34
  It achieves the following results on the evaluation set:
35
+ - Loss: 0.5904
36
+ - Wer Ortho: 26.7119
37
+ - Wer: 27.3908
 
 
 
 
38
 
39
  ## Model description
40
 
 
60
  - optimizer: Adam with betas=(0.9,0.999) and epsilon=1e-08
61
  - lr_scheduler_type: constant_with_warmup
62
  - lr_scheduler_warmup_steps: 50
63
+ - training_steps: 500
64
  - mixed_precision_training: Native AMP
65
 
66
+ ### Training results
67
+
68
+ | Training Loss | Epoch | Step | Validation Loss | Wer Ortho | Wer |
69
+ |:-------------:|:-----:|:----:|:---------------:|:---------:|:-------:|
70
+ | 0.0002 | 17.24 | 500 | 0.5904 | 26.7119 | 27.3908 |
71
+
72
+
73
  ### Framework versions
74
 
75
  - Transformers 4.35.2
76
  - Pytorch 2.1.0+cu121
77
  - Datasets 2.16.1
78
+ - Tokenizers 0.15.1
generation_config.json CHANGED
@@ -51,11 +51,15 @@
51
  "forced_decoder_ids": [
52
  [
53
  1,
54
- null
55
  ],
56
  [
57
  2,
58
  50359
 
 
 
 
59
  ]
60
  ],
61
  "is_multilingual": true,
@@ -160,6 +164,7 @@
160
  "<|yo|>": 50325,
161
  "<|zh|>": 50260
162
  },
 
163
  "max_initial_timestamp_index": 50,
164
  "max_length": 448,
165
  "no_timestamps_token_id": 50363,
@@ -256,6 +261,7 @@
256
  50361,
257
  50362
258
  ],
 
259
  "task_to_id": {
260
  "transcribe": 50359,
261
  "translate": 50358
 
51
  "forced_decoder_ids": [
52
  [
53
  1,
54
+ 50259
55
  ],
56
  [
57
  2,
58
  50359
59
+ ],
60
+ [
61
+ 3,
62
+ 50363
63
  ]
64
  ],
65
  "is_multilingual": true,
 
164
  "<|yo|>": 50325,
165
  "<|zh|>": 50260
166
  },
167
+ "language": "en",
168
  "max_initial_timestamp_index": 50,
169
  "max_length": 448,
170
  "no_timestamps_token_id": 50363,
 
261
  50361,
262
  50362
263
  ],
264
+ "task": "transcribe",
265
  "task_to_id": {
266
  "transcribe": 50359,
267
  "translate": 50358