arampacha commited on
Commit
925711f
1 Parent(s): d6eb9bc
README.md CHANGED
@@ -16,9 +16,9 @@ should probably proofread and complete it, then remove this comment. -->
16
 
17
  This model is a fine-tuned version of [facebook/wav2vec2-xls-r-1b](https://huggingface.co/facebook/wav2vec2-xls-r-1b) on the /WORKSPACE/DATA/HY/NOIZY_STUDENT_3/ - NA dataset.
18
  It achieves the following results on the evaluation set:
19
- - Loss: 0.1827
20
- - Wer: 0.2389
21
- - Cer: 0.0427
22
 
23
  ## Model description
24
 
@@ -37,7 +37,7 @@ More information needed
37
  ### Training hyperparameters
38
 
39
  The following hyperparameters were used during training:
40
- - learning_rate: 8e-05
41
  - train_batch_size: 16
42
  - eval_batch_size: 64
43
  - seed: 842
@@ -46,34 +46,28 @@ The following hyperparameters were used during training:
46
  - optimizer: Adam with betas=(0.9,0.98) and epsilon=1e-08
47
  - lr_scheduler_type: cosine
48
  - lr_scheduler_warmup_ratio: 0.1
49
- - training_steps: 3200
50
  - mixed_precision_training: Native AMP
51
 
52
  ### Training results
53
 
54
- | Training Loss | Epoch | Step | Validation Loss | Wer | Cer |
55
- |:-------------:|:-----:|:----:|:---------------:|:------:|:------:|
56
- | 4.0311 | 3.51 | 200 | 0.7943 | 0.8981 | 0.2374 |
57
- | 1.4388 | 7.02 | 400 | 0.2546 | 0.3821 | 0.0658 |
58
- | 1.0949 | 10.53 | 600 | 0.2201 | 0.3216 | 0.0573 |
59
- | 1.0279 | 14.04 | 800 | 0.2250 | 0.3271 | 0.0583 |
60
- | 0.9923 | 17.54 | 1000 | 0.2074 | 0.3111 | 0.0543 |
61
- | 0.972 | 21.05 | 1200 | 0.2165 | 0.2955 | 0.0536 |
62
- | 0.9587 | 24.56 | 1400 | 0.2064 | 0.3017 | 0.0535 |
63
- | 0.9421 | 28.07 | 1600 | 0.2062 | 0.2884 | 0.0519 |
64
- | 0.9189 | 31.58 | 1800 | 0.2014 | 0.2822 | 0.0507 |
65
- | 0.8919 | 35.09 | 2000 | 0.1952 | 0.2689 | 0.0488 |
66
- | 0.8615 | 38.6 | 2200 | 0.2020 | 0.2685 | 0.0480 |
67
- | 0.834 | 42.11 | 2400 | 0.2001 | 0.2654 | 0.0467 |
68
- | 0.8056 | 45.61 | 2600 | 0.1935 | 0.2498 | 0.0448 |
69
- | 0.7888 | 49.12 | 2800 | 0.1892 | 0.2451 | 0.0446 |
70
- | 0.761 | 52.63 | 3000 | 0.1884 | 0.2432 | 0.0441 |
71
- | 0.742 | 56.14 | 3200 | 0.1827 | 0.2389 | 0.0427 |
72
 
73
 
74
  ### Framework versions
75
 
76
  - Transformers 4.17.0.dev0
77
- - Pytorch 1.10.2+cu102
78
- - Datasets 1.18.2.dev0
79
  - Tokenizers 0.11.0
 
16
 
17
  This model is a fine-tuned version of [facebook/wav2vec2-xls-r-1b](https://huggingface.co/facebook/wav2vec2-xls-r-1b) on the /WORKSPACE/DATA/HY/NOIZY_STUDENT_3/ - NA dataset.
18
  It achieves the following results on the evaluation set:
19
+ - Loss: 0.1726
20
+ - Wer: 0.2260
21
+ - Cer: 0.0405
22
 
23
  ## Model description
24
 
 
37
  ### Training hyperparameters
38
 
39
  The following hyperparameters were used during training:
40
+ - learning_rate: 5e-05
41
  - train_batch_size: 16
42
  - eval_batch_size: 64
43
  - seed: 842
 
46
  - optimizer: Adam with betas=(0.9,0.98) and epsilon=1e-08
47
  - lr_scheduler_type: cosine
48
  - lr_scheduler_warmup_ratio: 0.1
49
+ - training_steps: 4000
50
  - mixed_precision_training: Native AMP
51
 
52
  ### Training results
53
 
54
+ | Training Loss | Epoch | Step | Cer | Validation Loss | Wer |
55
+ |:-------------:|:-----:|:----:|:------:|:---------------:|:------:|
56
+ | 1.331 | 7.02 | 400 | 0.0832 | 0.3281 | 0.4617 |
57
+ | 0.9968 | 14.04 | 800 | 0.0528 | 0.2114 | 0.3115 |
58
+ | 0.9367 | 21.05 | 1200 | 0.0481 | 0.1928 | 0.2771 |
59
+ | 0.9066 | 28.07 | 1600 | 0.0477 | 0.1905 | 0.2728 |
60
+ | 0.869 | 35.09 | 2000 | 0.0449 | 0.1817 | 0.2564 |
61
+ | 0.8319 | 42.11 | 2400 | 0.0433 | 0.1810 | 0.2490 |
62
+ | 0.8113 | 49.12 | 2800 | 0.0421 | 0.1769 | 0.2404 |
63
+ | 0.7624 | 56.14 | 3200 | 0.0432 | 0.1838 | 0.2443 |
64
+ | 0.7328 | 63.16 | 3600 | 0.0415 | 0.1756 | 0.2303 |
65
+ | 0.7209 | 70.18 | 4000 | 0.1726 | 0.2260 | 0.0405 |
 
 
 
 
 
 
66
 
67
 
68
  ### Framework versions
69
 
70
  - Transformers 4.17.0.dev0
71
+ - Pytorch 1.10.2
72
+ - Datasets 1.18.4.dev0
73
  - Tokenizers 0.11.0
all_results.json CHANGED
@@ -1,15 +1,15 @@
1
  {
2
- "epoch": 56.14,
3
- "eval_cer": 0.04274152756702074,
4
- "eval_loss": 0.18266724050045013,
5
- "eval_runtime": 15.3733,
6
  "eval_samples": 335,
7
- "eval_samples_per_second": 21.791,
8
- "eval_steps_per_second": 0.39,
9
- "eval_wer": 0.2388758782201405,
10
- "train_loss": 1.1288447761535645,
11
- "train_runtime": 34425.8492,
12
  "train_samples": 7284,
13
- "train_samples_per_second": 11.898,
14
- "train_steps_per_second": 0.093
15
  }
 
1
  {
2
+ "epoch": 70.18,
3
+ "eval_cer": 0.04046535154274153,
4
+ "eval_loss": 0.1725786179304123,
5
+ "eval_runtime": 15.7657,
6
  "eval_samples": 335,
7
+ "eval_samples_per_second": 21.249,
8
+ "eval_steps_per_second": 0.381,
9
+ "eval_wer": 0.2259953161592506,
10
+ "train_loss": 0.07268305778503419,
11
+ "train_runtime": 4050.6325,
12
  "train_samples": 7284,
13
+ "train_samples_per_second": 126.4,
14
+ "train_steps_per_second": 0.988
15
  }
eval_results.json CHANGED
@@ -1,10 +1,10 @@
1
  {
2
- "epoch": 56.14,
3
- "eval_cer": 0.04274152756702074,
4
- "eval_loss": 0.18266724050045013,
5
- "eval_runtime": 15.3733,
6
  "eval_samples": 335,
7
- "eval_samples_per_second": 21.791,
8
- "eval_steps_per_second": 0.39,
9
- "eval_wer": 0.2388758782201405
10
  }
 
1
  {
2
+ "epoch": 70.18,
3
+ "eval_cer": 0.04046535154274153,
4
+ "eval_loss": 0.1725786179304123,
5
+ "eval_runtime": 15.7657,
6
  "eval_samples": 335,
7
+ "eval_samples_per_second": 21.249,
8
+ "eval_steps_per_second": 0.381,
9
+ "eval_wer": 0.2259953161592506
10
  }
mozilla-foundation_common_voice_8_0_hy-AM_test_eval_results.txt CHANGED
@@ -1,2 +1,2 @@
1
- WER: 0.1092896174863388
2
- CER: 0.023773394031360646
 
1
+ WER: 0.1053864168618267
2
+ CER: 0.02159838138593829
pytorch_model.bin CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:5841178926d310ff9b34929b7a12bd2a6884370660d7b8a36e5e68cad7b398b5
3
  size 3850538161
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:9d282e451339c6cef472abf37106541b534ada05ced6e7b82c4e05efc17f7ebf
3
  size 3850538161
special_tokens_map.json CHANGED
@@ -1 +1 @@
1
- {"bos_token": "<s>", "eos_token": "</s>", "unk_token": "[UNK]", "pad_token": "[PAD]", "additional_special_tokens": [{"content": "<s>", "single_word": false, "lstrip": false, "rstrip": false, "normalized": true}, {"content": "</s>", "single_word": false, "lstrip": false, "rstrip": false, "normalized": true}]}
 
1
+ {"bos_token": "<s>", "eos_token": "</s>", "unk_token": "[UNK]", "pad_token": "[PAD]", "additional_special_tokens": [{"content": "<s>", "single_word": false, "lstrip": false, "rstrip": false, "normalized": true}, {"content": "</s>", "single_word": false, "lstrip": false, "rstrip": false, "normalized": true}, {"content": "<s>", "single_word": false, "lstrip": false, "rstrip": false, "normalized": true}, {"content": "</s>", "single_word": false, "lstrip": false, "rstrip": false, "normalized": true}, {"content": "<s>", "single_word": false, "lstrip": false, "rstrip": false, "normalized": true}, {"content": "</s>", "single_word": false, "lstrip": false, "rstrip": false, "normalized": true}]}
train_results.json CHANGED
@@ -1,8 +1,8 @@
1
  {
2
- "epoch": 56.14,
3
- "train_loss": 1.1288447761535645,
4
- "train_runtime": 34425.8492,
5
  "train_samples": 7284,
6
- "train_samples_per_second": 11.898,
7
- "train_steps_per_second": 0.093
8
  }
 
1
  {
2
+ "epoch": 70.18,
3
+ "train_loss": 0.07268305778503419,
4
+ "train_runtime": 4050.6325,
5
  "train_samples": 7284,
6
+ "train_samples_per_second": 126.4,
7
+ "train_steps_per_second": 0.988
8
  }
trainer_state.json CHANGED
@@ -1,281 +1,365 @@
1
  {
2
- "best_metric": 0.18266724050045013,
3
- "best_model_checkpoint": "./checkpoint-3200",
4
- "epoch": 56.14035087719298,
5
- "global_step": 3200,
6
  "is_hyper_param_search": false,
7
  "is_local_process_zero": true,
8
  "is_world_process_zero": true,
9
  "log_history": [
10
  {
11
- "epoch": 3.51,
12
- "learning_rate": 4.980500000000001e-05,
13
- "loss": 4.0311,
14
- "step": 200
15
  },
16
  {
17
  "epoch": 3.51,
18
- "eval_cer": 0.23737986848760748,
19
- "eval_loss": 0.794314444065094,
20
- "eval_runtime": 15.7,
21
- "eval_samples_per_second": 21.338,
22
- "eval_steps_per_second": 0.382,
23
- "eval_wer": 0.8981264637002342,
24
  "step": 200
25
  },
 
 
 
 
 
 
26
  {
27
  "epoch": 7.02,
28
- "learning_rate": 8e-05,
29
- "loss": 1.4388,
30
  "step": 400
31
  },
32
  {
33
  "epoch": 7.02,
34
- "eval_cer": 0.06575619625695499,
35
- "eval_loss": 0.2545942962169647,
36
- "eval_runtime": 14.891,
37
- "eval_samples_per_second": 22.497,
38
- "eval_steps_per_second": 0.403,
39
- "eval_wer": 0.38212334113973456,
40
  "step": 400
41
  },
42
  {
43
- "epoch": 10.53,
44
- "learning_rate": 8e-05,
45
- "loss": 1.0949,
46
- "step": 600
47
  },
48
  {
49
  "epoch": 10.53,
50
- "eval_cer": 0.057309054122407686,
51
- "eval_loss": 0.22006691992282867,
52
- "eval_runtime": 15.2331,
53
- "eval_samples_per_second": 21.992,
54
- "eval_steps_per_second": 0.394,
55
- "eval_wer": 0.32162373145979706,
56
  "step": 600
57
  },
 
 
 
 
 
 
58
  {
59
  "epoch": 14.04,
60
- "learning_rate": 8e-05,
61
- "loss": 1.0279,
62
  "step": 800
63
  },
64
  {
65
  "epoch": 14.04,
66
- "eval_cer": 0.0582701062215478,
67
- "eval_loss": 0.22504645586013794,
68
- "eval_runtime": 15.1767,
69
- "eval_samples_per_second": 22.073,
70
- "eval_steps_per_second": 0.395,
71
- "eval_wer": 0.32708821233411395,
72
  "step": 800
73
  },
74
  {
75
- "epoch": 17.54,
76
- "learning_rate": 8e-05,
77
- "loss": 0.9923,
78
- "step": 1000
79
  },
80
  {
81
  "epoch": 17.54,
82
- "eval_cer": 0.0543247344461305,
83
- "eval_loss": 0.2073642760515213,
84
- "eval_runtime": 15.1305,
85
- "eval_samples_per_second": 22.141,
86
- "eval_steps_per_second": 0.397,
87
- "eval_wer": 0.3110850897736144,
88
  "step": 1000
89
  },
 
 
 
 
 
 
90
  {
91
  "epoch": 21.05,
92
- "learning_rate": 8e-05,
93
- "loss": 0.972,
94
  "step": 1200
95
  },
96
  {
97
  "epoch": 21.05,
98
- "eval_cer": 0.05361659079413252,
99
- "eval_loss": 0.21649114787578583,
100
- "eval_runtime": 14.9827,
101
- "eval_samples_per_second": 22.359,
102
- "eval_steps_per_second": 0.4,
103
- "eval_wer": 0.29547228727556596,
104
  "step": 1200
105
  },
106
  {
107
- "epoch": 24.56,
108
- "learning_rate": 8e-05,
109
- "loss": 0.9587,
110
- "step": 1400
111
  },
112
  {
113
  "epoch": 24.56,
114
- "eval_cer": 0.05351542741527567,
115
- "eval_loss": 0.2064175009727478,
116
- "eval_runtime": 15.1921,
117
- "eval_samples_per_second": 22.051,
118
- "eval_steps_per_second": 0.395,
119
- "eval_wer": 0.3017174082747853,
120
  "step": 1400
121
  },
 
 
 
 
 
 
122
  {
123
  "epoch": 28.07,
124
- "learning_rate": 8e-05,
125
- "loss": 0.9421,
126
  "step": 1600
127
  },
128
  {
129
  "epoch": 28.07,
130
- "eval_cer": 0.051947395042994435,
131
- "eval_loss": 0.2061864584684372,
132
- "eval_runtime": 15.0418,
133
- "eval_samples_per_second": 22.271,
134
- "eval_steps_per_second": 0.399,
135
- "eval_wer": 0.28844652615144417,
136
  "step": 1600
137
  },
138
  {
139
- "epoch": 31.58,
140
- "learning_rate": 7.059500000000001e-05,
141
- "loss": 0.9189,
142
- "step": 1800
143
  },
144
  {
145
  "epoch": 31.58,
146
- "eval_cer": 0.05073343449671219,
147
- "eval_loss": 0.2014162391424179,
148
- "eval_runtime": 15.125,
149
- "eval_samples_per_second": 22.149,
150
- "eval_steps_per_second": 0.397,
151
- "eval_wer": 0.2822014051522248,
152
  "step": 1800
153
  },
 
 
 
 
 
 
154
  {
155
  "epoch": 35.09,
156
- "learning_rate": 6.109500000000001e-05,
157
- "loss": 0.8919,
158
  "step": 2000
159
  },
160
  {
161
  "epoch": 35.09,
162
- "eval_cer": 0.04881133029843197,
163
- "eval_loss": 0.19518214464187622,
164
- "eval_runtime": 15.0854,
165
- "eval_samples_per_second": 22.207,
166
- "eval_steps_per_second": 0.398,
167
- "eval_wer": 0.2689305230288837,
168
  "step": 2000
169
  },
170
  {
171
- "epoch": 38.6,
172
- "learning_rate": 5.1594999999999996e-05,
173
- "loss": 0.8615,
174
- "step": 2200
175
  },
176
  {
177
  "epoch": 38.6,
178
- "eval_cer": 0.04795144157814871,
179
- "eval_loss": 0.20196911692619324,
180
- "eval_runtime": 15.1604,
181
- "eval_samples_per_second": 22.097,
182
- "eval_steps_per_second": 0.396,
183
- "eval_wer": 0.2685402029664325,
184
  "step": 2200
185
  },
 
 
 
 
 
 
186
  {
187
  "epoch": 42.11,
188
- "learning_rate": 4.2095e-05,
189
- "loss": 0.834,
190
  "step": 2400
191
  },
192
  {
193
  "epoch": 42.11,
194
- "eval_cer": 0.04668689934243804,
195
- "eval_loss": 0.2001034915447235,
196
- "eval_runtime": 15.091,
197
- "eval_samples_per_second": 22.199,
198
- "eval_steps_per_second": 0.398,
199
- "eval_wer": 0.2654176424668228,
200
  "step": 2400
201
  },
202
  {
203
- "epoch": 45.61,
204
- "learning_rate": 3.2595e-05,
205
- "loss": 0.8056,
206
- "step": 2600
207
  },
208
  {
209
  "epoch": 45.61,
210
- "eval_cer": 0.04481537683358624,
211
- "eval_loss": 0.1934908777475357,
212
- "eval_runtime": 15.4173,
213
- "eval_samples_per_second": 21.729,
214
- "eval_steps_per_second": 0.389,
215
- "eval_wer": 0.2498048399687744,
216
  "step": 2600
217
  },
 
 
 
 
 
 
218
  {
219
  "epoch": 49.12,
220
- "learning_rate": 2.3095e-05,
221
- "loss": 0.7888,
222
  "step": 2800
223
  },
224
  {
225
  "epoch": 49.12,
226
- "eval_cer": 0.04461305007587253,
227
- "eval_loss": 0.18915079534053802,
228
- "eval_runtime": 15.29,
229
- "eval_samples_per_second": 21.91,
230
- "eval_steps_per_second": 0.392,
231
- "eval_wer": 0.24512099921935987,
232
  "step": 2800
233
  },
234
  {
235
- "epoch": 52.63,
236
- "learning_rate": 1.3595000000000008e-05,
237
- "loss": 0.761,
238
- "step": 3000
239
  },
240
  {
241
  "epoch": 52.63,
242
- "eval_cer": 0.044056651492159836,
243
- "eval_loss": 0.18836112320423126,
244
- "eval_runtime": 16.204,
245
- "eval_samples_per_second": 20.674,
246
- "eval_steps_per_second": 0.37,
247
- "eval_wer": 0.24316939890710382,
248
  "step": 3000
249
  },
 
 
 
 
 
 
250
  {
251
  "epoch": 56.14,
252
- "learning_rate": 4.095000000000005e-06,
253
- "loss": 0.742,
254
  "step": 3200
255
  },
256
  {
257
  "epoch": 56.14,
258
- "eval_cer": 0.04274152756702074,
259
- "eval_loss": 0.18266724050045013,
260
- "eval_runtime": 15.3463,
261
- "eval_samples_per_second": 21.829,
262
- "eval_steps_per_second": 0.391,
263
- "eval_wer": 0.2388758782201405,
264
  "step": 3200
265
  },
266
  {
267
- "epoch": 56.14,
268
- "step": 3200,
269
- "total_flos": 2.6268075931237872e+20,
270
- "train_loss": 1.1288447761535645,
271
- "train_runtime": 34425.8492,
272
- "train_samples_per_second": 11.898,
273
- "train_steps_per_second": 0.093
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
274
  }
275
  ],
276
- "max_steps": 3200,
277
- "num_train_epochs": 57,
278
- "total_flos": 2.6268075931237872e+20,
279
  "trial_name": null,
280
  "trial_params": null
281
  }
 
1
  {
2
+ "best_metric": 0.1725786179304123,
3
+ "best_model_checkpoint": "./checkpoint-4000",
4
+ "epoch": 70.17543859649123,
5
+ "global_step": 4000,
6
  "is_hyper_param_search": false,
7
  "is_local_process_zero": true,
8
  "is_world_process_zero": true,
9
  "log_history": [
10
  {
11
+ "epoch": 1.75,
12
+ "learning_rate": 1.26275e-05,
13
+ "loss": 6.0202,
14
+ "step": 100
15
  },
16
  {
17
  "epoch": 3.51,
18
+ "learning_rate": 2.5002499999999997e-05,
19
+ "loss": 2.9203,
 
 
 
 
20
  "step": 200
21
  },
22
+ {
23
+ "epoch": 5.26,
24
+ "learning_rate": 3.73775e-05,
25
+ "loss": 1.7768,
26
+ "step": 300
27
+ },
28
  {
29
  "epoch": 7.02,
30
+ "learning_rate": 4.9752499999999995e-05,
31
+ "loss": 1.331,
32
  "step": 400
33
  },
34
  {
35
  "epoch": 7.02,
36
+ "eval_cer": 0.08315629742033384,
37
+ "eval_loss": 0.32805994153022766,
38
+ "eval_runtime": 15.7786,
39
+ "eval_samples_per_second": 21.231,
40
+ "eval_steps_per_second": 0.38,
41
+ "eval_wer": 0.46174863387978143,
42
  "step": 400
43
  },
44
  {
45
+ "epoch": 8.77,
46
+ "learning_rate": 5e-05,
47
+ "loss": 1.165,
48
+ "step": 500
49
  },
50
  {
51
  "epoch": 10.53,
52
+ "learning_rate": 5e-05,
53
+ "loss": 1.0755,
 
 
 
 
54
  "step": 600
55
  },
56
+ {
57
+ "epoch": 12.28,
58
+ "learning_rate": 5e-05,
59
+ "loss": 1.0359,
60
+ "step": 700
61
+ },
62
  {
63
  "epoch": 14.04,
64
+ "learning_rate": 5e-05,
65
+ "loss": 0.9968,
66
  "step": 800
67
  },
68
  {
69
  "epoch": 14.04,
70
+ "eval_cer": 0.05275670207384927,
71
+ "eval_loss": 0.21139617264270782,
72
+ "eval_runtime": 15.2468,
73
+ "eval_samples_per_second": 21.972,
74
+ "eval_steps_per_second": 0.394,
75
+ "eval_wer": 0.3114754098360656,
76
  "step": 800
77
  },
78
  {
79
+ "epoch": 15.79,
80
+ "learning_rate": 5e-05,
81
+ "loss": 0.9746,
82
+ "step": 900
83
  },
84
  {
85
  "epoch": 17.54,
86
+ "learning_rate": 5e-05,
87
+ "loss": 0.9629,
 
 
 
 
88
  "step": 1000
89
  },
90
+ {
91
+ "epoch": 19.3,
92
+ "learning_rate": 5e-05,
93
+ "loss": 0.9639,
94
+ "step": 1100
95
+ },
96
  {
97
  "epoch": 21.05,
98
+ "learning_rate": 5e-05,
99
+ "loss": 0.9367,
100
  "step": 1200
101
  },
102
  {
103
  "epoch": 21.05,
104
+ "eval_cer": 0.048052604957005564,
105
+ "eval_loss": 0.19284144043922424,
106
+ "eval_runtime": 15.2894,
107
+ "eval_samples_per_second": 21.911,
108
+ "eval_steps_per_second": 0.392,
109
+ "eval_wer": 0.27712724434035907,
110
  "step": 1200
111
  },
112
  {
113
+ "epoch": 22.81,
114
+ "learning_rate": 5e-05,
115
+ "loss": 0.9265,
116
+ "step": 1300
117
  },
118
  {
119
  "epoch": 24.56,
120
+ "learning_rate": 5e-05,
121
+ "loss": 0.9093,
 
 
 
 
122
  "step": 1400
123
  },
124
+ {
125
+ "epoch": 26.32,
126
+ "learning_rate": 5e-05,
127
+ "loss": 0.9126,
128
+ "step": 1500
129
+ },
130
  {
131
  "epoch": 28.07,
132
+ "learning_rate": 5e-05,
133
+ "loss": 0.9066,
134
  "step": 1600
135
  },
136
  {
137
  "epoch": 28.07,
138
+ "eval_cer": 0.047698533131006575,
139
+ "eval_loss": 0.19045932590961456,
140
+ "eval_runtime": 15.3651,
141
+ "eval_samples_per_second": 21.803,
142
+ "eval_steps_per_second": 0.39,
143
+ "eval_wer": 0.2728337236533958,
144
  "step": 1600
145
  },
146
  {
147
+ "epoch": 29.82,
148
+ "learning_rate": 5e-05,
149
+ "loss": 0.8954,
150
+ "step": 1700
151
  },
152
  {
153
  "epoch": 31.58,
154
+ "learning_rate": 5e-05,
155
+ "loss": 0.8807,
 
 
 
 
156
  "step": 1800
157
  },
158
+ {
159
+ "epoch": 33.33,
160
+ "learning_rate": 5e-05,
161
+ "loss": 0.8753,
162
+ "step": 1900
163
+ },
164
  {
165
  "epoch": 35.09,
166
+ "learning_rate": 5e-05,
167
+ "loss": 0.869,
168
  "step": 2000
169
  },
170
  {
171
  "epoch": 35.09,
172
+ "eval_cer": 0.04486595852301467,
173
+ "eval_loss": 0.181670144200325,
174
+ "eval_runtime": 15.5069,
175
+ "eval_samples_per_second": 21.603,
176
+ "eval_steps_per_second": 0.387,
177
+ "eval_wer": 0.25644028103044497,
178
  "step": 2000
179
  },
180
  {
181
+ "epoch": 36.84,
182
+ "learning_rate": 4.76725e-05,
183
+ "loss": 0.8637,
184
+ "step": 2100
185
  },
186
  {
187
  "epoch": 38.6,
188
+ "learning_rate": 4.5297500000000005e-05,
189
+ "loss": 0.8557,
 
 
 
 
190
  "step": 2200
191
  },
192
+ {
193
+ "epoch": 40.35,
194
+ "learning_rate": 4.29225e-05,
195
+ "loss": 0.8537,
196
+ "step": 2300
197
+ },
198
  {
199
  "epoch": 42.11,
200
+ "learning_rate": 4.0547500000000004e-05,
201
+ "loss": 0.8319,
202
  "step": 2400
203
  },
204
  {
205
  "epoch": 42.11,
206
+ "eval_cer": 0.04334850784016186,
207
+ "eval_loss": 0.18102943897247314,
208
+ "eval_runtime": 15.5131,
209
+ "eval_samples_per_second": 21.595,
210
+ "eval_steps_per_second": 0.387,
211
+ "eval_wer": 0.24902419984387197,
212
  "step": 2400
213
  },
214
  {
215
+ "epoch": 43.86,
216
+ "learning_rate": 3.81725e-05,
217
+ "loss": 0.837,
218
+ "step": 2500
219
  },
220
  {
221
  "epoch": 45.61,
222
+ "learning_rate": 3.57975e-05,
223
+ "loss": 0.8098,
 
 
 
 
224
  "step": 2600
225
  },
226
+ {
227
+ "epoch": 47.37,
228
+ "learning_rate": 3.34225e-05,
229
+ "loss": 0.8212,
230
+ "step": 2700
231
+ },
232
  {
233
  "epoch": 49.12,
234
+ "learning_rate": 3.1047500000000004e-05,
235
+ "loss": 0.8113,
236
  "step": 2800
237
  },
238
  {
239
  "epoch": 49.12,
240
+ "eval_cer": 0.042134547293879616,
241
+ "eval_loss": 0.17691758275032043,
242
+ "eval_runtime": 15.2685,
243
+ "eval_samples_per_second": 21.941,
244
+ "eval_steps_per_second": 0.393,
245
+ "eval_wer": 0.24043715846994534,
246
  "step": 2800
247
  },
248
  {
249
+ "epoch": 50.88,
250
+ "learning_rate": 2.8672500000000004e-05,
251
+ "loss": 0.7928,
252
+ "step": 2900
253
  },
254
  {
255
  "epoch": 52.63,
256
+ "learning_rate": 2.6297500000000004e-05,
257
+ "loss": 0.7871,
 
 
 
 
258
  "step": 3000
259
  },
260
+ {
261
+ "epoch": 54.39,
262
+ "learning_rate": 2.3922500000000003e-05,
263
+ "loss": 0.777,
264
+ "step": 3100
265
+ },
266
  {
267
  "epoch": 56.14,
268
+ "learning_rate": 2.1571249999999998e-05,
269
+ "loss": 0.7624,
270
  "step": 3200
271
  },
272
  {
273
  "epoch": 56.14,
274
+ "eval_cer": 0.04319676277187658,
275
+ "eval_loss": 0.1837695837020874,
276
+ "eval_runtime": 15.5608,
277
+ "eval_samples_per_second": 21.528,
278
+ "eval_steps_per_second": 0.386,
279
+ "eval_wer": 0.24434035909445745,
280
  "step": 3200
281
  },
282
  {
283
+ "epoch": 57.89,
284
+ "learning_rate": 1.9196249999999998e-05,
285
+ "loss": 0.7624,
286
+ "step": 3300
287
+ },
288
+ {
289
+ "epoch": 59.65,
290
+ "learning_rate": 1.682125e-05,
291
+ "loss": 0.7517,
292
+ "step": 3400
293
+ },
294
+ {
295
+ "epoch": 61.4,
296
+ "learning_rate": 1.444625e-05,
297
+ "loss": 0.7417,
298
+ "step": 3500
299
+ },
300
+ {
301
+ "epoch": 63.16,
302
+ "learning_rate": 1.207125e-05,
303
+ "loss": 0.7328,
304
+ "step": 3600
305
+ },
306
+ {
307
+ "epoch": 63.16,
308
+ "eval_cer": 0.041476985331310064,
309
+ "eval_loss": 0.17556767165660858,
310
+ "eval_runtime": 15.151,
311
+ "eval_samples_per_second": 22.111,
312
+ "eval_steps_per_second": 0.396,
313
+ "eval_wer": 0.2302888368462139,
314
+ "step": 3600
315
+ },
316
+ {
317
+ "epoch": 64.91,
318
+ "learning_rate": 9.69625e-06,
319
+ "loss": 0.7334,
320
+ "step": 3700
321
+ },
322
+ {
323
+ "epoch": 66.67,
324
+ "learning_rate": 7.321250000000002e-06,
325
+ "loss": 0.7261,
326
+ "step": 3800
327
+ },
328
+ {
329
+ "epoch": 68.42,
330
+ "learning_rate": 4.946250000000002e-06,
331
+ "loss": 0.7268,
332
+ "step": 3900
333
+ },
334
+ {
335
+ "epoch": 70.18,
336
+ "learning_rate": 2.5712500000000027e-06,
337
+ "loss": 0.7209,
338
+ "step": 4000
339
+ },
340
+ {
341
+ "epoch": 70.18,
342
+ "eval_cer": 0.04046535154274153,
343
+ "eval_loss": 0.1725786179304123,
344
+ "eval_runtime": 14.417,
345
+ "eval_samples_per_second": 23.236,
346
+ "eval_steps_per_second": 0.416,
347
+ "eval_wer": 0.2259953161592506,
348
+ "step": 4000
349
+ },
350
+ {
351
+ "epoch": 70.18,
352
+ "step": 4000,
353
+ "total_flos": 3.2826266564131357e+20,
354
+ "train_loss": 0.07268305778503419,
355
+ "train_runtime": 4050.6325,
356
+ "train_samples_per_second": 126.4,
357
+ "train_steps_per_second": 0.988
358
  }
359
  ],
360
+ "max_steps": 4000,
361
+ "num_train_epochs": 71,
362
+ "total_flos": 3.2826266564131357e+20,
363
  "trial_name": null,
364
  "trial_params": null
365
  }
training_args.bin CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:bfeb12863ac5de618f2b958c1b03e980705dda909c6bff76a929e0e5bfb2b372
3
  size 3055
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:3fb837578da84752ebc49617aed9d2bd5c09891be06d518ff85dd1e449ce2a44
3
  size 3055