nrshoudi commited on
Commit
7862b1b
·
1 Parent(s): adf52bc

End of training

Browse files
Files changed (5) hide show
  1. README.md +24 -24
  2. config.json +1 -1
  3. model.safetensors +3 -0
  4. trainer_state.json +189 -189
  5. training_args.bin +2 -2
README.md CHANGED
@@ -17,9 +17,9 @@ should probably proofread and complete it, then remove this comment. -->
17
 
18
  This model is a fine-tuned version of [facebook/wav2vec2-xls-r-300m](https://huggingface.co/facebook/wav2vec2-xls-r-300m) on an unknown dataset.
19
  It achieves the following results on the evaluation set:
20
- - Loss: 0.3192
21
- - Wer: 0.0429
22
- - Per: 0.0326
23
 
24
  ## Model description
25
 
@@ -51,31 +51,31 @@ The following hyperparameters were used during training:
51
 
52
  | Training Loss | Epoch | Step | Validation Loss | Wer | Per |
53
  |:-------------:|:-----:|:-----:|:---------------:|:------:|:------:|
54
- | 8.6256 | 1.0 | 1617 | 3.2158 | 1.0 | 1.0 |
55
- | 1.3467 | 2.0 | 3234 | 0.3775 | 0.0753 | 0.0605 |
56
- | 0.1996 | 3.0 | 4851 | 0.3054 | 0.0561 | 0.0421 |
57
- | 0.1311 | 4.0 | 6468 | 0.3120 | 0.0507 | 0.0387 |
58
- | 0.1044 | 5.0 | 8085 | 0.3169 | 0.0507 | 0.0386 |
59
- | 0.0891 | 6.0 | 9702 | 0.3030 | 0.0496 | 0.0372 |
60
- | 0.0788 | 7.0 | 11319 | 0.3174 | 0.0520 | 0.0391 |
61
- | 0.0619 | 8.0 | 12936 | 0.3312 | 0.0546 | 0.0417 |
62
- | 0.0552 | 9.0 | 14553 | 0.3353 | 0.0480 | 0.0358 |
63
- | 0.0493 | 10.0 | 16170 | 0.2742 | 0.0452 | 0.0336 |
64
- | 0.0417 | 11.0 | 17787 | 0.2894 | 0.0459 | 0.0355 |
65
- | 0.0382 | 12.0 | 19404 | 0.2949 | 0.0463 | 0.0350 |
66
- | 0.0306 | 13.0 | 21021 | 0.3041 | 0.0472 | 0.0348 |
67
- | 0.03 | 14.0 | 22638 | 0.3109 | 0.0453 | 0.0339 |
68
- | 0.0244 | 15.0 | 24255 | 0.3234 | 0.0448 | 0.0335 |
69
- | 0.0215 | 16.0 | 25872 | 0.3321 | 0.0436 | 0.0332 |
70
- | 0.0182 | 17.0 | 27489 | 0.3367 | 0.0420 | 0.0315 |
71
- | 0.0171 | 18.0 | 29106 | 0.3317 | 0.0430 | 0.0328 |
72
- | 0.0155 | 19.0 | 30723 | 0.3249 | 0.0423 | 0.0321 |
73
- | 0.0132 | 20.0 | 32340 | 0.3192 | 0.0429 | 0.0326 |
74
 
75
 
76
  ### Framework versions
77
 
78
- - Transformers 4.34.1
79
  - Pytorch 2.1.0+cu118
80
  - Datasets 2.14.6
81
  - Tokenizers 0.14.1
 
17
 
18
  This model is a fine-tuned version of [facebook/wav2vec2-xls-r-300m](https://huggingface.co/facebook/wav2vec2-xls-r-300m) on an unknown dataset.
19
  It achieves the following results on the evaluation set:
20
+ - Loss: 0.3501
21
+ - Wer: 0.0432
22
+ - Per: 0.0327
23
 
24
  ## Model description
25
 
 
51
 
52
  | Training Loss | Epoch | Step | Validation Loss | Wer | Per |
53
  |:-------------:|:-----:|:-----:|:---------------:|:------:|:------:|
54
+ | 8.1087 | 1.0 | 1637 | 3.1701 | 1.0 | 1.0 |
55
+ | 1.1869 | 2.0 | 3274 | 0.4045 | 0.0800 | 0.0646 |
56
+ | 0.2 | 3.0 | 4911 | 0.3260 | 0.0591 | 0.0464 |
57
+ | 0.1283 | 4.0 | 6548 | 0.3042 | 0.0618 | 0.0475 |
58
+ | 0.1037 | 5.0 | 8185 | 0.2727 | 0.0531 | 0.0410 |
59
+ | 0.0844 | 6.0 | 9822 | 0.3184 | 0.0543 | 0.0409 |
60
+ | 0.0738 | 7.0 | 11459 | 0.2886 | 0.0485 | 0.0366 |
61
+ | 0.0613 | 8.0 | 13096 | 0.3345 | 0.0488 | 0.0374 |
62
+ | 0.0573 | 9.0 | 14733 | 0.3469 | 0.0505 | 0.0394 |
63
+ | 0.0514 | 10.0 | 16370 | 0.3245 | 0.0510 | 0.0386 |
64
+ | 0.0469 | 11.0 | 18007 | 0.3094 | 0.0492 | 0.0374 |
65
+ | 0.0375 | 12.0 | 19644 | 0.3656 | 0.0521 | 0.0392 |
66
+ | 0.0356 | 13.0 | 21281 | 0.3296 | 0.0472 | 0.0356 |
67
+ | 0.0291 | 14.0 | 22918 | 0.3301 | 0.0448 | 0.0336 |
68
+ | 0.0242 | 15.0 | 24555 | 0.3575 | 0.0460 | 0.0357 |
69
+ | 0.0216 | 16.0 | 26192 | 0.3376 | 0.0443 | 0.0335 |
70
+ | 0.0208 | 17.0 | 27829 | 0.3688 | 0.0436 | 0.0332 |
71
+ | 0.018 | 18.0 | 29466 | 0.3673 | 0.0445 | 0.0340 |
72
+ | 0.0164 | 19.0 | 31103 | 0.3576 | 0.0432 | 0.0327 |
73
+ | 0.0128 | 20.0 | 32740 | 0.3501 | 0.0432 | 0.0327 |
74
 
75
 
76
  ### Framework versions
77
 
78
+ - Transformers 4.35.0
79
  - Pytorch 2.1.0+cu118
80
  - Datasets 2.14.6
81
  - Tokenizers 0.14.1
config.json CHANGED
@@ -102,7 +102,7 @@
102
  1
103
  ],
104
  "torch_dtype": "float32",
105
- "transformers_version": "4.34.1",
106
  "use_weighted_layer_sum": false,
107
  "vocab_size": 40,
108
  "xvector_output_dim": 512
 
102
  1
103
  ],
104
  "torch_dtype": "float32",
105
+ "transformers_version": "4.35.0",
106
  "use_weighted_layer_sum": false,
107
  "vocab_size": 40,
108
  "xvector_output_dim": 512
model.safetensors ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:62a1ad6b551d23fd41826148687406ae9d40d16b74e3ccad082a3defedec9a1b
3
+ size 1261971480
trainer_state.json CHANGED
@@ -1,9 +1,9 @@
1
  {
2
- "best_metric": 0.2741861045360565,
3
- "best_model_checkpoint": "nrshoudi/wav2vec_arabic_mdd/checkpoint-16170",
4
  "epoch": 20.0,
5
  "eval_steps": 500,
6
- "global_step": 32340,
7
  "is_hyper_param_search": false,
8
  "is_local_process_zero": true,
9
  "is_world_process_zero": true,
@@ -11,338 +11,338 @@
11
  {
12
  "epoch": 1.0,
13
  "learning_rate": 5e-05,
14
- "loss": 8.6256,
15
- "step": 1617
16
  },
17
  {
18
  "epoch": 1.0,
19
- "eval_loss": 3.215786933898926,
20
  "eval_per": 1.0,
21
- "eval_runtime": 116.0176,
22
- "eval_samples_per_second": 7.008,
23
- "eval_steps_per_second": 3.508,
24
  "eval_wer": 1.0,
25
- "step": 1617
26
  },
27
  {
28
  "epoch": 2.0,
29
  "learning_rate": 0.0001,
30
- "loss": 1.3467,
31
- "step": 3234
32
  },
33
  {
34
  "epoch": 2.0,
35
- "eval_loss": 0.37750244140625,
36
- "eval_per": 0.060468293602185995,
37
- "eval_runtime": 116.9742,
38
- "eval_samples_per_second": 6.95,
39
- "eval_steps_per_second": 3.479,
40
- "eval_wer": 0.07531146470066725,
41
- "step": 3234
42
  },
43
  {
44
  "epoch": 3.0,
45
  "learning_rate": 9.444444444444444e-05,
46
- "loss": 0.1996,
47
- "step": 4851
48
  },
49
  {
50
  "epoch": 3.0,
51
- "eval_loss": 0.30538317561149597,
52
- "eval_per": 0.042094600961085464,
53
- "eval_runtime": 117.0156,
54
- "eval_samples_per_second": 6.948,
55
- "eval_steps_per_second": 3.478,
56
- "eval_wer": 0.056133638187672065,
57
- "step": 4851
58
  },
59
  {
60
  "epoch": 4.0,
61
  "learning_rate": 8.888888888888889e-05,
62
- "loss": 0.1311,
63
- "step": 6468
64
  },
65
  {
66
  "epoch": 4.0,
67
- "eval_loss": 0.31199753284454346,
68
- "eval_per": 0.038655422594930745,
69
- "eval_runtime": 116.6027,
70
- "eval_samples_per_second": 6.972,
71
- "eval_steps_per_second": 3.49,
72
- "eval_wer": 0.05067425691754934,
73
- "step": 6468
74
  },
75
  {
76
  "epoch": 5.0,
77
  "learning_rate": 8.333333333333334e-05,
78
- "loss": 0.1044,
79
- "step": 8085
80
  },
81
  {
82
  "epoch": 5.0,
83
- "eval_loss": 0.31690752506256104,
84
- "eval_per": 0.038608310562517664,
85
- "eval_runtime": 118.0869,
86
- "eval_samples_per_second": 6.885,
87
- "eval_steps_per_second": 3.447,
88
- "eval_wer": 0.05072091829592646,
89
- "step": 8085
90
  },
91
  {
92
  "epoch": 6.0,
93
  "learning_rate": 7.777777777777778e-05,
94
- "loss": 0.0891,
95
- "step": 9702
96
  },
97
  {
98
  "epoch": 6.0,
99
- "eval_loss": 0.30304601788520813,
100
- "eval_per": 0.03721850560633186,
101
- "eval_runtime": 118.602,
102
- "eval_samples_per_second": 6.855,
103
- "eval_steps_per_second": 3.432,
104
- "eval_wer": 0.04960104521487565,
105
- "step": 9702
106
  },
107
  {
108
  "epoch": 7.0,
109
  "learning_rate": 7.222222222222222e-05,
110
- "loss": 0.0788,
111
- "step": 11319
112
  },
113
  {
114
  "epoch": 7.0,
115
- "eval_loss": 0.31736186146736145,
116
- "eval_per": 0.03910298690285499,
117
- "eval_runtime": 118.2233,
118
- "eval_samples_per_second": 6.877,
119
- "eval_steps_per_second": 3.443,
120
- "eval_wer": 0.052027436890485744,
121
- "step": 11319
122
  },
123
  {
124
  "epoch": 8.0,
125
  "learning_rate": 6.666666666666667e-05,
126
- "loss": 0.0619,
127
- "step": 12936
128
  },
129
  {
130
  "epoch": 8.0,
131
- "eval_loss": 0.33120226860046387,
132
- "eval_per": 0.0416941486855743,
133
- "eval_runtime": 118.3706,
134
- "eval_samples_per_second": 6.868,
135
- "eval_steps_per_second": 3.438,
136
- "eval_wer": 0.05459381270122719,
137
- "step": 12936
138
  },
139
  {
140
  "epoch": 9.0,
141
  "learning_rate": 6.111111111111112e-05,
142
- "loss": 0.0552,
143
- "step": 14553
144
  },
145
  {
146
  "epoch": 9.0,
147
- "eval_loss": 0.3352554142475128,
148
- "eval_per": 0.03575803260152643,
149
- "eval_runtime": 119.3222,
150
- "eval_samples_per_second": 6.813,
151
- "eval_steps_per_second": 3.411,
152
- "eval_wer": 0.04796789697167654,
153
- "step": 14553
154
  },
155
  {
156
  "epoch": 10.0,
157
  "learning_rate": 5.555555555555556e-05,
158
- "loss": 0.0493,
159
- "step": 16170
160
  },
161
  {
162
  "epoch": 10.0,
163
- "eval_loss": 0.2741861045360565,
164
- "eval_per": 0.033567323094318285,
165
- "eval_runtime": 119.9093,
166
- "eval_samples_per_second": 6.78,
167
- "eval_steps_per_second": 3.394,
168
- "eval_wer": 0.04521487564742663,
169
- "step": 16170
170
  },
171
  {
172
  "epoch": 11.0,
173
  "learning_rate": 5e-05,
174
- "loss": 0.0417,
175
- "step": 17787
176
  },
177
  {
178
  "epoch": 11.0,
179
- "eval_loss": 0.28943416476249695,
180
- "eval_per": 0.03547536040704796,
181
- "eval_runtime": 120.1333,
182
- "eval_samples_per_second": 6.767,
183
- "eval_steps_per_second": 3.388,
184
- "eval_wer": 0.04586813494470627,
185
- "step": 17787
186
  },
187
  {
188
  "epoch": 12.0,
189
  "learning_rate": 4.4444444444444447e-05,
190
- "loss": 0.0382,
191
- "step": 19404
192
  },
193
  {
194
  "epoch": 12.0,
195
- "eval_loss": 0.29486820101737976,
196
- "eval_per": 0.03500424008291718,
197
- "eval_runtime": 119.6619,
198
- "eval_samples_per_second": 6.794,
199
- "eval_steps_per_second": 3.401,
200
- "eval_wer": 0.04633474872847744,
201
- "step": 19404
202
  },
203
  {
204
  "epoch": 13.0,
205
  "learning_rate": 3.888888888888889e-05,
206
- "loss": 0.0306,
207
- "step": 21021
208
  },
209
  {
210
  "epoch": 13.0,
211
- "eval_loss": 0.30409368872642517,
212
- "eval_per": 0.034792235937058325,
213
- "eval_runtime": 120.2788,
214
- "eval_samples_per_second": 6.759,
215
- "eval_steps_per_second": 3.384,
216
- "eval_wer": 0.04717465353926555,
217
- "step": 21021
218
  },
219
  {
220
  "epoch": 14.0,
221
  "learning_rate": 3.3333333333333335e-05,
222
- "loss": 0.03,
223
- "step": 22638
224
  },
225
  {
226
  "epoch": 14.0,
227
- "eval_loss": 0.3109176456928253,
228
- "eval_per": 0.033920663337416376,
229
- "eval_runtime": 118.5483,
230
- "eval_samples_per_second": 6.858,
231
- "eval_steps_per_second": 3.433,
232
- "eval_wer": 0.045261537025803746,
233
- "step": 22638
234
  },
235
  {
236
  "epoch": 15.0,
237
  "learning_rate": 2.777777777777778e-05,
238
- "loss": 0.0244,
239
- "step": 24255
240
  },
241
  {
242
  "epoch": 15.0,
243
- "eval_loss": 0.3234339952468872,
244
- "eval_per": 0.03347309902949213,
245
- "eval_runtime": 118.5882,
246
- "eval_samples_per_second": 6.856,
247
- "eval_steps_per_second": 3.432,
248
- "eval_wer": 0.044841584620409686,
249
- "step": 24255
250
  },
251
  {
252
  "epoch": 16.0,
253
  "learning_rate": 2.2222222222222223e-05,
254
- "loss": 0.0215,
255
- "step": 25872
256
  },
257
  {
258
  "epoch": 16.0,
259
- "eval_loss": 0.33212560415267944,
260
- "eval_per": 0.03316687081880712,
261
- "eval_runtime": 118.6022,
262
- "eval_samples_per_second": 6.855,
263
- "eval_steps_per_second": 3.432,
264
- "eval_wer": 0.04362838878260464,
265
- "step": 25872
266
  },
267
  {
268
  "epoch": 17.0,
269
  "learning_rate": 1.6666666666666667e-05,
270
- "loss": 0.0182,
271
- "step": 27489
272
  },
273
  {
274
  "epoch": 17.0,
275
- "eval_loss": 0.3367431163787842,
276
- "eval_per": 0.03154150570055592,
277
- "eval_runtime": 118.0708,
278
- "eval_samples_per_second": 6.886,
279
- "eval_steps_per_second": 3.447,
280
- "eval_wer": 0.04204190191778265,
281
- "step": 27489
282
  },
283
  {
284
  "epoch": 18.0,
285
  "learning_rate": 1.1111111111111112e-05,
286
- "loss": 0.0171,
287
- "step": 29106
288
  },
289
  {
290
  "epoch": 18.0,
291
- "eval_loss": 0.3317033350467682,
292
- "eval_per": 0.032766418543295955,
293
- "eval_runtime": 117.8323,
294
- "eval_samples_per_second": 6.9,
295
- "eval_steps_per_second": 3.454,
296
- "eval_wer": 0.043021790863702114,
297
- "step": 29106
298
  },
299
  {
300
  "epoch": 19.0,
301
  "learning_rate": 5.555555555555556e-06,
302
- "loss": 0.0155,
303
- "step": 30723
304
  },
305
  {
306
  "epoch": 19.0,
307
- "eval_loss": 0.3248533606529236,
308
- "eval_per": 0.03210685008951286,
309
- "eval_runtime": 118.4543,
310
- "eval_samples_per_second": 6.863,
311
- "eval_steps_per_second": 3.436,
312
- "eval_wer": 0.04227520880966824,
313
- "step": 30723
314
  },
315
  {
316
  "epoch": 20.0,
317
  "learning_rate": 0.0,
318
- "loss": 0.0132,
319
- "step": 32340
320
  },
321
  {
322
  "epoch": 20.0,
323
- "eval_loss": 0.31921908259391785,
324
- "eval_per": 0.032577970413643645,
325
- "eval_runtime": 118.1674,
326
- "eval_samples_per_second": 6.88,
327
- "eval_steps_per_second": 3.444,
328
- "eval_wer": 0.04292846810694788,
329
- "step": 32340
330
  },
331
  {
332
  "epoch": 20.0,
333
- "step": 32340,
334
- "total_flos": 1.0355998175044045e+19,
335
- "train_loss": 0.54961431122298,
336
- "train_runtime": 19355.5909,
337
- "train_samples_per_second": 3.342,
338
- "train_steps_per_second": 1.671
339
  }
340
  ],
341
  "logging_steps": 500,
342
- "max_steps": 32340,
343
  "num_train_epochs": 20,
344
  "save_steps": 500,
345
- "total_flos": 1.0355998175044045e+19,
346
  "trial_name": null,
347
  "trial_params": null
348
  }
 
1
  {
2
+ "best_metric": 0.2726672291755676,
3
+ "best_model_checkpoint": "nrshoudi/wav2vec_arabic_mdd/checkpoint-8185",
4
  "epoch": 20.0,
5
  "eval_steps": 500,
6
+ "global_step": 32740,
7
  "is_hyper_param_search": false,
8
  "is_local_process_zero": true,
9
  "is_world_process_zero": true,
 
11
  {
12
  "epoch": 1.0,
13
  "learning_rate": 5e-05,
14
+ "loss": 8.1087,
15
+ "step": 1637
16
  },
17
  {
18
  "epoch": 1.0,
19
+ "eval_loss": 3.1701228618621826,
20
  "eval_per": 1.0,
21
+ "eval_runtime": 115.1384,
22
+ "eval_samples_per_second": 7.061,
23
+ "eval_steps_per_second": 3.535,
24
  "eval_wer": 1.0,
25
+ "step": 1637
26
  },
27
  {
28
  "epoch": 2.0,
29
  "learning_rate": 0.0001,
30
+ "loss": 1.1869,
31
+ "step": 3274
32
  },
33
  {
34
  "epoch": 2.0,
35
+ "eval_loss": 0.40448808670043945,
36
+ "eval_per": 0.06459059643833034,
37
+ "eval_runtime": 117.2186,
38
+ "eval_samples_per_second": 6.936,
39
+ "eval_steps_per_second": 3.472,
40
+ "eval_wer": 0.07997760253837899,
41
+ "step": 3274
42
  },
43
  {
44
  "epoch": 3.0,
45
  "learning_rate": 9.444444444444444e-05,
46
+ "loss": 0.2,
47
+ "step": 4911
48
  },
49
  {
50
  "epoch": 3.0,
51
+ "eval_loss": 0.3259519040584564,
52
+ "eval_per": 0.04635823989446905,
53
+ "eval_runtime": 116.6876,
54
+ "eval_samples_per_second": 6.967,
55
+ "eval_steps_per_second": 3.488,
56
+ "eval_wer": 0.05907330502543045,
57
+ "step": 4911
58
  },
59
  {
60
  "epoch": 4.0,
61
  "learning_rate": 8.888888888888889e-05,
62
+ "loss": 0.1283,
63
+ "step": 6548
64
  },
65
  {
66
  "epoch": 4.0,
67
+ "eval_loss": 0.3042123317718506,
68
+ "eval_per": 0.04748892867238293,
69
+ "eval_runtime": 116.8704,
70
+ "eval_samples_per_second": 6.956,
71
+ "eval_steps_per_second": 3.482,
72
+ "eval_wer": 0.06182632634968037,
73
+ "step": 6548
74
  },
75
  {
76
  "epoch": 5.0,
77
  "learning_rate": 8.333333333333334e-05,
78
+ "loss": 0.1037,
79
+ "step": 8185
80
  },
81
  {
82
  "epoch": 5.0,
83
+ "eval_loss": 0.2726672291755676,
84
+ "eval_per": 0.04101102421558466,
85
+ "eval_runtime": 117.5633,
86
+ "eval_samples_per_second": 6.915,
87
+ "eval_steps_per_second": 3.462,
88
+ "eval_wer": 0.053147309971536556,
89
+ "step": 8185
90
  },
91
  {
92
  "epoch": 6.0,
93
  "learning_rate": 7.777777777777778e-05,
94
+ "loss": 0.0844,
95
+ "step": 9822
96
  },
97
  {
98
  "epoch": 6.0,
99
+ "eval_loss": 0.31839433312416077,
100
+ "eval_per": 0.040916800150758506,
101
+ "eval_runtime": 117.5398,
102
+ "eval_samples_per_second": 6.917,
103
+ "eval_steps_per_second": 3.463,
104
+ "eval_wer": 0.05431384443096449,
105
+ "step": 9822
106
  },
107
  {
108
  "epoch": 7.0,
109
  "learning_rate": 7.222222222222222e-05,
110
+ "loss": 0.0738,
111
+ "step": 11459
112
  },
113
  {
114
  "epoch": 7.0,
115
+ "eval_loss": 0.28862160444259644,
116
+ "eval_per": 0.03655893715254876,
117
+ "eval_runtime": 117.5297,
118
+ "eval_samples_per_second": 6.917,
119
+ "eval_steps_per_second": 3.463,
120
+ "eval_wer": 0.048527833512201954,
121
+ "step": 11459
122
  },
123
  {
124
  "epoch": 8.0,
125
  "learning_rate": 6.666666666666667e-05,
126
+ "loss": 0.0613,
127
+ "step": 13096
128
  },
129
  {
130
  "epoch": 8.0,
131
+ "eval_loss": 0.3345281481742859,
132
+ "eval_per": 0.037359841703571095,
133
+ "eval_runtime": 117.0501,
134
+ "eval_samples_per_second": 6.946,
135
+ "eval_steps_per_second": 3.477,
136
+ "eval_wer": 0.04880780178246465,
137
+ "step": 13096
138
  },
139
  {
140
  "epoch": 9.0,
141
  "learning_rate": 6.111111111111112e-05,
142
+ "loss": 0.0573,
143
+ "step": 14733
144
  },
145
  {
146
  "epoch": 9.0,
147
+ "eval_loss": 0.34689363837242126,
148
+ "eval_per": 0.03943277112974654,
149
+ "eval_runtime": 117.6593,
150
+ "eval_samples_per_second": 6.91,
151
+ "eval_steps_per_second": 3.459,
152
+ "eval_wer": 0.05053427278241799,
153
+ "step": 14733
154
  },
155
  {
156
  "epoch": 10.0,
157
  "learning_rate": 5.555555555555556e-05,
158
+ "loss": 0.0514,
159
+ "step": 16370
160
  },
161
  {
162
  "epoch": 10.0,
163
+ "eval_loss": 0.3245474100112915,
164
+ "eval_per": 0.038608310562517664,
165
+ "eval_runtime": 117.7221,
166
+ "eval_samples_per_second": 6.906,
167
+ "eval_steps_per_second": 3.457,
168
+ "eval_wer": 0.051047547944566285,
169
+ "step": 16370
170
  },
171
  {
172
  "epoch": 11.0,
173
  "learning_rate": 5e-05,
174
+ "loss": 0.0469,
175
+ "step": 18007
176
  },
177
  {
178
  "epoch": 11.0,
179
+ "eval_loss": 0.30943331122398376,
180
+ "eval_per": 0.03738339771977763,
181
+ "eval_runtime": 118.4378,
182
+ "eval_samples_per_second": 6.864,
183
+ "eval_steps_per_second": 3.436,
184
+ "eval_wer": 0.049181092809481596,
185
+ "step": 18007
186
  },
187
  {
188
  "epoch": 12.0,
189
  "learning_rate": 4.4444444444444447e-05,
190
+ "loss": 0.0375,
191
+ "step": 19644
192
  },
193
  {
194
  "epoch": 12.0,
195
+ "eval_loss": 0.36555343866348267,
196
+ "eval_per": 0.03919721096768115,
197
+ "eval_runtime": 117.4812,
198
+ "eval_samples_per_second": 6.92,
199
+ "eval_steps_per_second": 3.464,
200
+ "eval_wer": 0.05207409826886286,
201
+ "step": 19644
202
  },
203
  {
204
  "epoch": 13.0,
205
  "learning_rate": 3.888888888888889e-05,
206
+ "loss": 0.0356,
207
+ "step": 21281
208
  },
209
  {
210
  "epoch": 13.0,
211
+ "eval_loss": 0.3295977711677551,
212
+ "eval_per": 0.03561669650428719,
213
+ "eval_runtime": 120.3969,
214
+ "eval_samples_per_second": 6.753,
215
+ "eval_steps_per_second": 3.38,
216
+ "eval_wer": 0.04722131491764267,
217
+ "step": 21281
218
  },
219
  {
220
  "epoch": 14.0,
221
  "learning_rate": 3.3333333333333335e-05,
222
+ "loss": 0.0291,
223
+ "step": 22918
224
  },
225
  {
226
  "epoch": 14.0,
227
+ "eval_loss": 0.3301165699958801,
228
+ "eval_per": 0.033637991142937904,
229
+ "eval_runtime": 120.2853,
230
+ "eval_samples_per_second": 6.759,
231
+ "eval_steps_per_second": 3.384,
232
+ "eval_wer": 0.04479492324203257,
233
+ "step": 22918
234
  },
235
  {
236
  "epoch": 15.0,
237
  "learning_rate": 2.777777777777778e-05,
238
+ "loss": 0.0242,
239
+ "step": 24555
240
  },
241
  {
242
  "epoch": 15.0,
243
+ "eval_loss": 0.3575274646282196,
244
+ "eval_per": 0.03573447658531989,
245
+ "eval_runtime": 120.2073,
246
+ "eval_samples_per_second": 6.763,
247
+ "eval_steps_per_second": 3.386,
248
+ "eval_wer": 0.046008119079837616,
249
+ "step": 24555
250
  },
251
  {
252
  "epoch": 16.0,
253
  "learning_rate": 2.2222222222222223e-05,
254
+ "loss": 0.0216,
255
+ "step": 26192
256
  },
257
  {
258
  "epoch": 16.0,
259
+ "eval_loss": 0.3376178741455078,
260
+ "eval_per": 0.033496655045698674,
261
+ "eval_runtime": 119.4419,
262
+ "eval_samples_per_second": 6.807,
263
+ "eval_steps_per_second": 3.408,
264
+ "eval_wer": 0.04428164807988428,
265
+ "step": 26192
266
  },
267
  {
268
  "epoch": 17.0,
269
  "learning_rate": 1.6666666666666667e-05,
270
+ "loss": 0.0208,
271
+ "step": 27829
272
  },
273
  {
274
  "epoch": 17.0,
275
+ "eval_loss": 0.3688383400440216,
276
+ "eval_per": 0.033190426835013664,
277
+ "eval_runtime": 118.4061,
278
+ "eval_samples_per_second": 6.866,
279
+ "eval_steps_per_second": 3.437,
280
+ "eval_wer": 0.04358172740422752,
281
+ "step": 27829
282
  },
283
  {
284
  "epoch": 18.0,
285
  "learning_rate": 1.1111111111111112e-05,
286
+ "loss": 0.018,
287
+ "step": 29466
288
  },
289
  {
290
  "epoch": 18.0,
291
+ "eval_loss": 0.3673172891139984,
292
+ "eval_per": 0.03403844341844907,
293
+ "eval_runtime": 118.1876,
294
+ "eval_samples_per_second": 6.879,
295
+ "eval_steps_per_second": 3.444,
296
+ "eval_wer": 0.04446829359339275,
297
+ "step": 29466
298
  },
299
  {
300
  "epoch": 19.0,
301
  "learning_rate": 5.555555555555556e-06,
302
+ "loss": 0.0164,
303
+ "step": 31103
304
  },
305
  {
306
  "epoch": 19.0,
307
+ "eval_loss": 0.35759395360946655,
308
+ "eval_per": 0.03271930651088288,
309
+ "eval_runtime": 119.0412,
310
+ "eval_samples_per_second": 6.83,
311
+ "eval_steps_per_second": 3.419,
312
+ "eval_wer": 0.043208436377210585,
313
+ "step": 31103
314
  },
315
  {
316
  "epoch": 20.0,
317
  "learning_rate": 0.0,
318
+ "loss": 0.0128,
319
+ "step": 32740
320
  },
321
  {
322
  "epoch": 20.0,
323
+ "eval_loss": 0.3501162827014923,
324
+ "eval_per": 0.03269575049467634,
325
+ "eval_runtime": 118.2142,
326
+ "eval_samples_per_second": 6.877,
327
+ "eval_steps_per_second": 3.443,
328
+ "eval_wer": 0.04316177499883347,
329
+ "step": 32740
330
  },
331
  {
332
  "epoch": 20.0,
333
+ "step": 32740,
334
+ "total_flos": 1.0473975577487264e+19,
335
+ "train_loss": 0.5159456469186866,
336
+ "train_runtime": 19614.1198,
337
+ "train_samples_per_second": 3.337,
338
+ "train_steps_per_second": 1.669
339
  }
340
  ],
341
  "logging_steps": 500,
342
+ "max_steps": 32740,
343
  "num_train_epochs": 20,
344
  "save_steps": 500,
345
+ "total_flos": 1.0473975577487264e+19,
346
  "trial_name": null,
347
  "trial_params": null
348
  }
training_args.bin CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:84b4a7ac0e1c6b093474175c4fd5e8c724dc3907c9fb9bd69ef08bd309794b0d
3
- size 4536
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:c84c431828ae995bf9a2d808d39b34e50bb5c6715b86b7684e2870eeb4303e57
3
+ size 4600