stanfous commited on
Commit
71e2448
1 Parent(s): fcedba0
config.json CHANGED
@@ -1,6 +1,6 @@
1
  {
2
  "_name_or_path": "facebook/wav2vec2-xls-r-1b",
3
- "activation_dropout": 0.0,
4
  "adapter_kernel_size": 3,
5
  "adapter_stride": 2,
6
  "add_adapter": false,
@@ -49,22 +49,22 @@
49
  "feat_extract_activation": "gelu",
50
  "feat_extract_dropout": 0.0,
51
  "feat_extract_norm": "layer",
52
- "feat_proj_dropout": 0.1,
53
  "feat_quantizer_dropout": 0.0,
54
- "final_dropout": 0.0,
55
  "hidden_act": "gelu",
56
- "hidden_dropout": 0.1,
57
  "hidden_size": 1280,
58
  "initializer_range": 0.02,
59
  "intermediate_size": 5120,
60
  "layer_norm_eps": 1e-05,
61
- "layerdrop": 0.0,
62
- "mask_feature_length": 64,
63
  "mask_feature_min_masks": 0,
64
- "mask_feature_prob": 0.75,
65
  "mask_time_length": 10,
66
  "mask_time_min_masks": 2,
67
- "mask_time_prob": 0.75,
68
  "model_type": "wav2vec2",
69
  "num_adapter_layers": 3,
70
  "num_attention_heads": 16,
 
1
  {
2
  "_name_or_path": "facebook/wav2vec2-xls-r-1b",
3
+ "activation_dropout": 0.05,
4
  "adapter_kernel_size": 3,
5
  "adapter_stride": 2,
6
  "add_adapter": false,
 
49
  "feat_extract_activation": "gelu",
50
  "feat_extract_dropout": 0.0,
51
  "feat_extract_norm": "layer",
52
+ "feat_proj_dropout": 0.05,
53
  "feat_quantizer_dropout": 0.0,
54
+ "final_dropout": 0.05,
55
  "hidden_act": "gelu",
56
+ "hidden_dropout": 0.05,
57
  "hidden_size": 1280,
58
  "initializer_range": 0.02,
59
  "intermediate_size": 5120,
60
  "layer_norm_eps": 1e-05,
61
+ "layerdrop": 0.05,
62
+ "mask_feature_length": 10,
63
  "mask_feature_min_masks": 0,
64
+ "mask_feature_prob": 0.0,
65
  "mask_time_length": 10,
66
  "mask_time_min_masks": 2,
67
+ "mask_time_prob": 0.05,
68
  "model_type": "wav2vec2",
69
  "num_adapter_layers": 3,
70
  "num_attention_heads": 16,
pytorch_model.bin CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:e0ef01c35610eab6fb41f429a21addceaceee697cf2e36e693a07badd8f05baf
3
  size 3850538161
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:7cb9a8b57162c1c438d0a2b0d1085d13ac7d3fd09f0d51460738c91258f81a1e
3
  size 3850538161
special_tokens_map.json CHANGED
@@ -1 +1 @@
1
- {"bos_token": "<s>", "eos_token": "</s>", "unk_token": "[UNK]", "pad_token": "[PAD]", "additional_special_tokens": [{"content": "<s>", "single_word": false, "lstrip": false, "rstrip": false, "normalized": true}, {"content": "</s>", "single_word": false, "lstrip": false, "rstrip": false, "normalized": true}, {"content": "<s>", "single_word": false, "lstrip": false, "rstrip": false, "normalized": true}, {"content": "</s>", "single_word": false, "lstrip": false, "rstrip": false, "normalized": true}, {"content": "<s>", "single_word": false, "lstrip": false, "rstrip": false, "normalized": true}, {"content": "</s>", "single_word": false, "lstrip": false, "rstrip": false, "normalized": true}, {"content": "<s>", "single_word": false, "lstrip": false, "rstrip": false, "normalized": true}, {"content": "</s>", "single_word": false, "lstrip": false, "rstrip": false, "normalized": true}]}
 
1
+ {"bos_token": "<s>", "eos_token": "</s>", "unk_token": "[UNK]", "pad_token": "[PAD]", "additional_special_tokens": [{"content": "<s>", "single_word": false, "lstrip": false, "rstrip": false, "normalized": true}, {"content": "</s>", "single_word": false, "lstrip": false, "rstrip": false, "normalized": true}]}
tokenizer_config.json CHANGED
@@ -1 +1 @@
1
- {"unk_token": "[UNK]", "bos_token": "<s>", "eos_token": "</s>", "pad_token": "[PAD]", "do_lower_case": false, "word_delimiter_token": "|", "special_tokens_map_file": null, "tokenizer_file": null, "name_or_path": "./output_wav2vec2_1b_model", "tokenizer_class": "Wav2Vec2CTCTokenizer"}
 
1
+ {"unk_token": "[UNK]", "bos_token": "<s>", "eos_token": "</s>", "pad_token": "[PAD]", "do_lower_case": false, "word_delimiter_token": "|", "special_tokens_map_file": null, "tokenizer_file": null, "name_or_path": "./output_wav2vec2_1b_model_v2", "tokenizer_class": "Wav2Vec2CTCTokenizer"}
trainer_state.json CHANGED
@@ -1,1216 +1,736 @@
1
  {
2
  "best_metric": null,
3
  "best_model_checkpoint": null,
4
- "epoch": 2.4867677668748143,
5
- "global_step": 40000,
6
  "is_hyper_param_search": false,
7
  "is_local_process_zero": true,
8
  "is_world_process_zero": true,
9
  "log_history": [
10
  {
11
  "epoch": 0.04,
12
- "learning_rate": 4.960000000000001e-06,
13
- "loss": 4.2847,
14
  "step": 500
15
  },
16
  {
17
  "epoch": 0.04,
18
- "eval_loss": 9.491365432739258,
19
- "eval_runtime": 68.325,
20
- "eval_samples_per_second": 14.636,
21
- "eval_steps_per_second": 1.829,
22
- "eval_wer": 1.0,
23
  "step": 500
24
  },
25
  {
26
  "epoch": 0.07,
27
- "learning_rate": 9.960000000000001e-06,
28
- "loss": 2.9257,
29
  "step": 1000
30
  },
31
  {
32
  "epoch": 0.07,
33
- "eval_loss": 2.919541358947754,
34
- "eval_runtime": 69.2986,
35
- "eval_samples_per_second": 14.43,
36
- "eval_steps_per_second": 1.804,
37
- "eval_wer": 0.9981783220321829,
38
  "step": 1000
39
  },
40
  {
41
  "epoch": 0.11,
42
- "learning_rate": 9.962852007189935e-06,
43
- "loss": 2.1055,
44
  "step": 1500
45
  },
46
  {
47
  "epoch": 0.11,
48
- "eval_loss": 0.6934791207313538,
49
- "eval_runtime": 66.8756,
50
- "eval_samples_per_second": 14.953,
51
- "eval_steps_per_second": 1.869,
52
- "eval_wer": 0.5524744459062848,
53
  "step": 1500
54
  },
55
  {
56
  "epoch": 0.15,
57
- "learning_rate": 9.925479328939486e-06,
58
- "loss": 1.636,
59
  "step": 2000
60
  },
61
  {
62
  "epoch": 0.15,
63
- "eval_loss": 0.5287454724311829,
64
- "eval_runtime": 71.8895,
65
- "eval_samples_per_second": 13.91,
66
- "eval_steps_per_second": 1.739,
67
- "eval_wer": 0.4343689909928145,
68
  "step": 2000
69
  },
70
  {
71
  "epoch": 0.19,
72
- "learning_rate": 9.888031755542241e-06,
73
- "loss": 1.4987,
74
  "step": 2500
75
  },
76
  {
77
  "epoch": 0.19,
78
- "eval_loss": 0.46598005294799805,
79
- "eval_runtime": 66.9977,
80
- "eval_samples_per_second": 14.926,
81
- "eval_steps_per_second": 1.866,
82
- "eval_wer": 0.37212832709239957,
83
  "step": 2500
84
  },
85
  {
86
  "epoch": 0.22,
87
- "learning_rate": 9.850584182144997e-06,
88
- "loss": 1.4193,
89
  "step": 3000
90
  },
91
  {
92
  "epoch": 0.22,
93
- "eval_loss": 0.42630982398986816,
94
- "eval_runtime": 67.5683,
95
- "eval_samples_per_second": 14.8,
96
- "eval_steps_per_second": 1.85,
97
- "eval_wer": 0.3432850926019634,
98
  "step": 3000
99
  },
100
  {
101
  "epoch": 0.26,
102
- "learning_rate": 9.813211503894548e-06,
103
- "loss": 1.3742,
104
  "step": 3500
105
  },
106
  {
107
  "epoch": 0.26,
108
- "eval_loss": 0.39030832052230835,
109
- "eval_runtime": 66.3672,
110
- "eval_samples_per_second": 15.068,
111
- "eval_steps_per_second": 1.883,
112
- "eval_wer": 0.3097864588604392,
113
  "step": 3500
114
  },
115
  {
116
  "epoch": 0.3,
117
- "learning_rate": 9.775763930497305e-06,
118
- "loss": 1.3247,
119
  "step": 4000
120
  },
121
  {
122
  "epoch": 0.3,
123
- "eval_loss": 0.36887359619140625,
124
- "eval_runtime": 66.4986,
125
- "eval_samples_per_second": 15.038,
126
- "eval_steps_per_second": 1.88,
127
- "eval_wer": 0.29986843436899097,
128
  "step": 4000
129
  },
130
  {
131
  "epoch": 0.33,
132
- "learning_rate": 9.738316357100061e-06,
133
- "loss": 1.2996,
134
  "step": 4500
135
  },
136
  {
137
  "epoch": 0.33,
138
- "eval_loss": 0.36502307653427124,
139
- "eval_runtime": 67.8152,
140
- "eval_samples_per_second": 14.746,
141
- "eval_steps_per_second": 1.843,
142
- "eval_wer": 0.29126606618763284,
143
  "step": 4500
144
  },
145
  {
146
  "epoch": 0.37,
147
- "learning_rate": 9.700868783702818e-06,
148
- "loss": 1.2644,
149
  "step": 5000
150
  },
151
  {
152
  "epoch": 0.37,
153
- "eval_loss": 0.360256165266037,
154
- "eval_runtime": 72.128,
155
- "eval_samples_per_second": 13.864,
156
- "eval_steps_per_second": 1.733,
157
- "eval_wer": 0.283270923995547,
158
  "step": 5000
159
  },
160
  {
161
  "epoch": 0.41,
162
- "learning_rate": 9.663421210305572e-06,
163
- "loss": 1.2435,
164
  "step": 5500
165
  },
166
  {
167
  "epoch": 0.41,
168
- "eval_loss": 0.34515851736068726,
169
- "eval_runtime": 69.6418,
170
- "eval_samples_per_second": 14.359,
171
- "eval_steps_per_second": 1.795,
172
- "eval_wer": 0.27659143811355125,
173
  "step": 5500
174
  },
175
  {
176
  "epoch": 0.45,
177
- "learning_rate": 9.625973636908329e-06,
178
- "loss": 1.2311,
179
  "step": 6000
180
  },
181
  {
182
  "epoch": 0.45,
183
- "eval_loss": 0.3393237292766571,
184
- "eval_runtime": 67.6501,
185
- "eval_samples_per_second": 14.782,
186
- "eval_steps_per_second": 1.848,
187
- "eval_wer": 0.27244206052019027,
188
  "step": 6000
189
  },
190
  {
191
  "epoch": 0.48,
192
- "learning_rate": 9.588526063511085e-06,
193
- "loss": 1.2169,
194
  "step": 6500
195
  },
196
  {
197
  "epoch": 0.48,
198
- "eval_loss": 0.33635956048965454,
199
- "eval_runtime": 66.3045,
200
- "eval_samples_per_second": 15.082,
201
- "eval_steps_per_second": 1.885,
202
- "eval_wer": 0.26019633640319806,
203
  "step": 6500
204
  },
205
  {
206
  "epoch": 0.52,
207
- "learning_rate": 9.551078490113842e-06,
208
- "loss": 1.2028,
209
  "step": 7000
210
  },
211
  {
212
  "epoch": 0.52,
213
- "eval_loss": 0.32824915647506714,
214
- "eval_runtime": 65.7731,
215
- "eval_samples_per_second": 15.204,
216
- "eval_steps_per_second": 1.9,
217
- "eval_wer": 0.2574638194514725,
218
  "step": 7000
219
  },
220
  {
221
  "epoch": 0.56,
222
- "learning_rate": 9.513630916716598e-06,
223
- "loss": 1.1934,
224
  "step": 7500
225
  },
226
  {
227
  "epoch": 0.56,
228
- "eval_loss": 0.3112964332103729,
229
- "eval_runtime": 66.2591,
230
- "eval_samples_per_second": 15.092,
231
- "eval_steps_per_second": 1.887,
232
- "eval_wer": 0.25311203319502074,
233
  "step": 7500
234
  },
235
  {
236
  "epoch": 0.59,
237
- "learning_rate": 9.476258238466149e-06,
238
- "loss": 1.1835,
239
  "step": 8000
240
  },
241
  {
242
  "epoch": 0.59,
243
- "eval_loss": 0.3178713917732239,
244
- "eval_runtime": 66.2586,
245
- "eval_samples_per_second": 15.092,
246
- "eval_steps_per_second": 1.887,
247
- "eval_wer": 0.25392166784738385,
248
  "step": 8000
249
  },
250
  {
251
  "epoch": 0.63,
252
- "learning_rate": 9.4388855602157e-06,
253
- "loss": 1.1745,
254
  "step": 8500
255
  },
256
  {
257
  "epoch": 0.63,
258
- "eval_loss": 0.31336280703544617,
259
- "eval_runtime": 67.0368,
260
- "eval_samples_per_second": 14.917,
261
- "eval_steps_per_second": 1.865,
262
- "eval_wer": 0.2527072158688392,
263
  "step": 8500
264
  },
265
  {
266
  "epoch": 0.67,
267
- "learning_rate": 9.401437986818455e-06,
268
- "loss": 1.1649,
269
  "step": 9000
270
  },
271
  {
272
  "epoch": 0.67,
273
- "eval_loss": 0.3034641742706299,
274
- "eval_runtime": 66.9062,
275
- "eval_samples_per_second": 14.946,
276
- "eval_steps_per_second": 1.868,
277
- "eval_wer": 0.24299160004048173,
278
  "step": 9000
279
  },
280
  {
281
  "epoch": 0.71,
282
- "learning_rate": 9.363990413421211e-06,
283
- "loss": 1.1581,
284
  "step": 9500
285
  },
286
  {
287
  "epoch": 0.71,
288
- "eval_loss": 0.296359121799469,
289
- "eval_runtime": 65.5061,
290
- "eval_samples_per_second": 15.266,
291
- "eval_steps_per_second": 1.908,
292
- "eval_wer": 0.24056269608339237,
293
  "step": 9500
294
  },
295
  {
296
  "epoch": 0.74,
297
- "learning_rate": 9.326542840023968e-06,
298
- "loss": 1.1486,
299
  "step": 10000
300
  },
301
  {
302
  "epoch": 0.74,
303
- "eval_loss": 0.30081668496131897,
304
- "eval_runtime": 67.6588,
305
- "eval_samples_per_second": 14.78,
306
- "eval_steps_per_second": 1.848,
307
- "eval_wer": 0.23853860945248456,
308
  "step": 10000
309
  },
310
  {
311
  "epoch": 0.78,
312
- "learning_rate": 9.289095266626722e-06,
313
- "loss": 1.1381,
314
  "step": 10500
315
  },
316
  {
317
  "epoch": 0.78,
318
- "eval_loss": 0.2892506718635559,
319
- "eval_runtime": 67.7871,
320
- "eval_samples_per_second": 14.752,
321
- "eval_steps_per_second": 1.844,
322
- "eval_wer": 0.23772897480012145,
323
  "step": 10500
324
  },
325
  {
326
  "epoch": 0.82,
327
- "learning_rate": 9.25164769322948e-06,
328
- "loss": 1.137,
329
  "step": 11000
330
  },
331
  {
332
  "epoch": 0.82,
333
- "eval_loss": 0.29484400153160095,
334
- "eval_runtime": 66.6708,
335
- "eval_samples_per_second": 14.999,
336
- "eval_steps_per_second": 1.875,
337
- "eval_wer": 0.23550247950612285,
338
  "step": 11000
339
  },
340
  {
341
  "epoch": 0.85,
342
- "learning_rate": 9.214200119832237e-06,
343
- "loss": 1.1356,
344
  "step": 11500
345
  },
346
  {
347
  "epoch": 0.85,
348
- "eval_loss": 0.2855495512485504,
349
- "eval_runtime": 65.5923,
350
- "eval_samples_per_second": 15.246,
351
- "eval_steps_per_second": 1.906,
352
- "eval_wer": 0.2341868231960328,
353
  "step": 11500
354
  },
355
  {
356
  "epoch": 0.89,
357
- "learning_rate": 9.176752546434992e-06,
358
- "loss": 1.1332,
359
  "step": 12000
360
  },
361
  {
362
  "epoch": 0.89,
363
- "eval_loss": 0.28792810440063477,
364
- "eval_runtime": 66.6576,
365
- "eval_samples_per_second": 15.002,
366
- "eval_steps_per_second": 1.875,
367
- "eval_wer": 0.2258880680093108,
368
  "step": 12000
369
  },
370
  {
371
  "epoch": 0.93,
372
- "learning_rate": 9.139304973037748e-06,
373
- "loss": 1.1233,
374
  "step": 12500
375
  },
376
  {
377
  "epoch": 0.93,
378
- "eval_loss": 0.28108343482017517,
379
- "eval_runtime": 65.8778,
380
- "eval_samples_per_second": 15.18,
381
- "eval_steps_per_second": 1.897,
382
- "eval_wer": 0.22912660661876327,
383
  "step": 12500
384
  },
385
  {
386
  "epoch": 0.97,
387
- "learning_rate": 9.101857399640503e-06,
388
- "loss": 1.121,
389
  "step": 13000
390
  },
391
  {
392
  "epoch": 0.97,
393
- "eval_loss": 0.2926315367221832,
394
- "eval_runtime": 66.2723,
395
- "eval_samples_per_second": 15.089,
396
- "eval_steps_per_second": 1.886,
397
- "eval_wer": 0.22619168100394696,
398
  "step": 13000
399
  },
400
  {
401
  "epoch": 1.0,
402
- "learning_rate": 9.06440982624326e-06,
403
- "loss": 1.1116,
404
  "step": 13500
405
  },
406
  {
407
  "epoch": 1.0,
408
- "eval_loss": 0.2861410975456238,
409
- "eval_runtime": 67.533,
410
- "eval_samples_per_second": 14.808,
411
- "eval_steps_per_second": 1.851,
412
- "eval_wer": 0.21829774314340653,
413
  "step": 13500
414
  },
415
  {
416
  "epoch": 1.04,
417
- "learning_rate": 9.026962252846017e-06,
418
- "loss": 1.1098,
419
  "step": 14000
420
  },
421
  {
422
  "epoch": 1.04,
423
- "eval_loss": 0.27931922674179077,
424
- "eval_runtime": 67.0283,
425
- "eval_samples_per_second": 14.919,
426
- "eval_steps_per_second": 1.865,
427
- "eval_wer": 0.22092905576358668,
428
  "step": 14000
429
  },
430
  {
431
- "epoch": 1.04,
432
- "learning_rate": 8.989514679448772e-06,
433
- "loss": 1.1047,
434
  "step": 14500
435
  },
436
  {
437
- "epoch": 1.04,
438
- "eval_loss": 0.27964508533477783,
439
- "eval_runtime": 67.4614,
440
- "eval_samples_per_second": 14.823,
441
- "eval_steps_per_second": 1.853,
442
- "eval_wer": 0.22062544276895052,
443
  "step": 14500
444
  },
445
  {
446
- "epoch": 1.07,
447
- "learning_rate": 8.952067106051529e-06,
448
- "loss": 1.0967,
449
  "step": 15000
450
  },
451
  {
452
- "epoch": 1.07,
453
- "eval_loss": 0.27417540550231934,
454
- "eval_runtime": 68.8465,
455
- "eval_samples_per_second": 14.525,
456
- "eval_steps_per_second": 1.816,
457
- "eval_wer": 0.21779172148567957,
458
  "step": 15000
459
  },
460
  {
461
- "epoch": 1.11,
462
- "learning_rate": 8.91469442780108e-06,
463
- "loss": 1.0979,
464
  "step": 15500
465
  },
466
  {
467
- "epoch": 1.11,
468
- "eval_loss": 0.27622443437576294,
469
- "eval_runtime": 67.9285,
470
- "eval_samples_per_second": 14.721,
471
- "eval_steps_per_second": 1.84,
472
- "eval_wer": 0.21961339945349662,
473
  "step": 15500
474
  },
475
  {
476
- "epoch": 1.15,
477
- "learning_rate": 8.877246854403834e-06,
478
- "loss": 1.0984,
479
  "step": 16000
480
  },
481
  {
482
- "epoch": 1.15,
483
- "eval_loss": 0.27836254239082336,
484
- "eval_runtime": 68.0427,
485
- "eval_samples_per_second": 14.697,
486
- "eval_steps_per_second": 1.837,
487
- "eval_wer": 0.2207266471004959,
488
  "step": 16000
489
  },
490
  {
491
- "epoch": 1.19,
492
- "learning_rate": 8.83979928100659e-06,
493
- "loss": 1.0865,
494
  "step": 16500
495
  },
496
  {
497
- "epoch": 1.19,
498
- "eval_loss": 0.27439314126968384,
499
- "eval_runtime": 67.5315,
500
- "eval_samples_per_second": 14.808,
501
- "eval_steps_per_second": 1.851,
502
- "eval_wer": 0.21799413014877037,
503
  "step": 16500
504
  },
505
  {
506
- "epoch": 1.22,
507
- "learning_rate": 8.802426602756142e-06,
508
- "loss": 1.089,
509
  "step": 17000
510
  },
511
  {
512
- "epoch": 1.22,
513
- "eval_loss": 0.2671995460987091,
514
- "eval_runtime": 67.2515,
515
- "eval_samples_per_second": 14.87,
516
- "eval_steps_per_second": 1.859,
517
- "eval_wer": 0.21586883918631716,
518
  "step": 17000
519
  },
520
  {
521
- "epoch": 1.26,
522
- "learning_rate": 8.764979029358898e-06,
523
- "loss": 1.087,
524
  "step": 17500
525
  },
526
  {
527
- "epoch": 1.26,
528
- "eval_loss": 0.2648696303367615,
529
- "eval_runtime": 67.0017,
530
- "eval_samples_per_second": 14.925,
531
- "eval_steps_per_second": 1.866,
532
- "eval_wer": 0.21941099079040582,
533
  "step": 17500
534
  },
535
  {
536
- "epoch": 1.3,
537
- "learning_rate": 8.727531455961654e-06,
538
- "loss": 1.0798,
539
  "step": 18000
540
  },
541
  {
542
- "epoch": 1.3,
543
- "eval_loss": 0.26909056305885315,
544
- "eval_runtime": 68.173,
545
- "eval_samples_per_second": 14.669,
546
- "eval_steps_per_second": 1.834,
547
- "eval_wer": 0.2134399352292278,
548
  "step": 18000
549
  },
550
  {
551
- "epoch": 1.33,
552
- "learning_rate": 8.690083882564411e-06,
553
- "loss": 1.0761,
554
  "step": 18500
555
  },
556
  {
557
- "epoch": 1.33,
558
- "eval_loss": 0.26856786012649536,
559
- "eval_runtime": 67.7073,
560
- "eval_samples_per_second": 14.769,
561
- "eval_steps_per_second": 1.846,
562
- "eval_wer": 0.21192187025604695,
563
  "step": 18500
564
  },
565
  {
566
- "epoch": 1.37,
567
- "learning_rate": 8.652636309167166e-06,
568
- "loss": 1.0756,
569
  "step": 19000
570
  },
571
  {
572
- "epoch": 1.37,
573
- "eval_loss": 0.259956419467926,
574
- "eval_runtime": 67.7698,
575
- "eval_samples_per_second": 14.756,
576
- "eval_steps_per_second": 1.844,
577
- "eval_wer": 0.20706406234186822,
578
  "step": 19000
579
  },
580
  {
581
- "epoch": 1.41,
582
- "learning_rate": 8.615188735769922e-06,
583
- "loss": 1.0698,
584
  "step": 19500
585
  },
586
  {
587
- "epoch": 1.41,
588
- "eval_loss": 0.2702154517173767,
589
- "eval_runtime": 66.8292,
590
- "eval_samples_per_second": 14.964,
591
- "eval_steps_per_second": 1.87,
592
- "eval_wer": 0.21627365651249875,
593
  "step": 19500
594
  },
595
  {
596
- "epoch": 1.45,
597
- "learning_rate": 8.577741162372678e-06,
598
- "loss": 1.0766,
599
  "step": 20000
600
  },
601
  {
602
- "epoch": 1.45,
603
- "eval_loss": 0.26256585121154785,
604
- "eval_runtime": 66.5305,
605
- "eval_samples_per_second": 15.031,
606
- "eval_steps_per_second": 1.879,
607
- "eval_wer": 0.20888574030968526,
608
  "step": 20000
609
  },
610
  {
611
- "epoch": 1.04,
612
- "learning_rate": 8.540293588975435e-06,
613
- "loss": 1.0656,
614
  "step": 20500
615
  },
616
  {
617
- "epoch": 1.04,
618
- "eval_loss": 0.26638808846473694,
619
- "eval_runtime": 67.2186,
620
- "eval_samples_per_second": 14.877,
621
- "eval_steps_per_second": 1.86,
622
- "eval_wer": 0.2074688796680498,
623
  "step": 20500
624
  },
625
  {
626
- "epoch": 1.07,
627
- "learning_rate": 8.502846015578191e-06,
628
- "loss": 1.0624,
629
  "step": 21000
630
  },
631
  {
632
- "epoch": 1.07,
633
- "eval_loss": 0.25911062955856323,
634
- "eval_runtime": 66.2654,
635
- "eval_samples_per_second": 15.091,
636
- "eval_steps_per_second": 1.886,
637
- "eval_wer": 0.20605201902641432,
638
  "step": 21000
639
  },
640
  {
641
- "epoch": 1.11,
642
- "learning_rate": 8.465398442180948e-06,
643
- "loss": 1.0603,
644
  "step": 21500
645
  },
646
  {
647
- "epoch": 1.11,
648
- "eval_loss": 0.25329792499542236,
649
- "eval_runtime": 67.3851,
650
- "eval_samples_per_second": 14.84,
651
- "eval_steps_per_second": 1.855,
652
- "eval_wer": 0.2115170529298654,
653
  "step": 21500
654
  },
655
  {
656
- "epoch": 1.15,
657
- "learning_rate": 8.427950868783703e-06,
658
- "loss": 1.0665,
659
  "step": 22000
660
  },
661
  {
662
- "epoch": 1.15,
663
- "eval_loss": 0.24775004386901855,
664
- "eval_runtime": 66.795,
665
- "eval_samples_per_second": 14.971,
666
- "eval_steps_per_second": 1.871,
667
- "eval_wer": 0.20483756704786965,
668
  "step": 22000
669
  },
670
  {
671
- "epoch": 1.19,
672
- "learning_rate": 8.390503295386459e-06,
673
- "loss": 1.0617,
674
  "step": 22500
675
  },
676
  {
677
- "epoch": 1.19,
678
- "eval_loss": 0.25084131956100464,
679
- "eval_runtime": 66.9347,
680
- "eval_samples_per_second": 14.94,
681
- "eval_steps_per_second": 1.867,
682
- "eval_wer": 0.2115170529298654,
683
  "step": 22500
684
  },
685
  {
686
- "epoch": 1.22,
687
- "learning_rate": 8.35313061713601e-06,
688
- "loss": 1.062,
689
  "step": 23000
690
  },
691
  {
692
- "epoch": 1.22,
693
- "eval_loss": 0.2520281672477722,
694
- "eval_runtime": 66.385,
695
- "eval_samples_per_second": 15.064,
696
- "eval_steps_per_second": 1.883,
697
- "eval_wer": 0.2063556320210505,
698
  "step": 23000
699
  },
700
  {
701
- "epoch": 1.26,
702
- "learning_rate": 8.315683043738766e-06,
703
- "loss": 1.0648,
704
  "step": 23500
705
  },
706
  {
707
- "epoch": 1.26,
708
- "eval_loss": 0.2521745264530182,
709
- "eval_runtime": 66.9658,
710
- "eval_samples_per_second": 14.933,
711
- "eval_steps_per_second": 1.867,
712
- "eval_wer": 0.2051411800425058,
713
  "step": 23500
714
  },
715
- {
716
- "epoch": 1.3,
717
- "learning_rate": 8.278235470341523e-06,
718
- "loss": 1.0537,
719
- "step": 24000
720
- },
721
- {
722
- "epoch": 1.3,
723
- "eval_loss": 0.25272318720817566,
724
- "eval_runtime": 66.1076,
725
- "eval_samples_per_second": 15.127,
726
- "eval_steps_per_second": 1.891,
727
- "eval_wer": 0.20493877137941505,
728
- "step": 24000
729
- },
730
- {
731
- "epoch": 1.33,
732
- "learning_rate": 8.24078789694428e-06,
733
- "loss": 1.0497,
734
- "step": 24500
735
- },
736
- {
737
- "epoch": 1.33,
738
- "eval_loss": 0.2538837492465973,
739
- "eval_runtime": 66.1852,
740
- "eval_samples_per_second": 15.109,
741
- "eval_steps_per_second": 1.889,
742
- "eval_wer": 0.20159902843841718,
743
- "step": 24500
744
- },
745
- {
746
- "epoch": 1.37,
747
- "learning_rate": 8.20341521869383e-06,
748
- "loss": 1.0497,
749
- "step": 25000
750
- },
751
- {
752
- "epoch": 1.37,
753
- "eval_loss": 0.25169914960861206,
754
- "eval_runtime": 68.2448,
755
- "eval_samples_per_second": 14.653,
756
- "eval_steps_per_second": 1.832,
757
- "eval_wer": 0.20170023276996255,
758
- "step": 25000
759
- },
760
- {
761
- "epoch": 1.41,
762
- "learning_rate": 8.165967645296585e-06,
763
- "loss": 1.0455,
764
- "step": 25500
765
- },
766
- {
767
- "epoch": 1.41,
768
- "eval_loss": 0.24599966406822205,
769
- "eval_runtime": 66.1513,
770
- "eval_samples_per_second": 15.117,
771
- "eval_steps_per_second": 1.89,
772
- "eval_wer": 0.20058698512296327,
773
- "step": 25500
774
- },
775
- {
776
- "epoch": 1.45,
777
- "learning_rate": 8.128520071899341e-06,
778
- "loss": 1.0372,
779
- "step": 26000
780
- },
781
- {
782
- "epoch": 1.45,
783
- "eval_loss": 0.24594834446907043,
784
- "eval_runtime": 66.7808,
785
- "eval_samples_per_second": 14.974,
786
- "eval_steps_per_second": 1.872,
787
- "eval_wer": 0.19623519886651147,
788
- "step": 26000
789
- },
790
- {
791
- "epoch": 1.48,
792
- "learning_rate": 8.091072498502098e-06,
793
- "loss": 1.0413,
794
- "step": 26500
795
- },
796
- {
797
- "epoch": 1.48,
798
- "eval_loss": 0.24532605707645416,
799
- "eval_runtime": 69.6969,
800
- "eval_samples_per_second": 14.348,
801
- "eval_steps_per_second": 1.793,
802
- "eval_wer": 0.20210505009614413,
803
- "step": 26500
804
- },
805
- {
806
- "epoch": 1.52,
807
- "learning_rate": 8.053624925104854e-06,
808
- "loss": 1.0431,
809
- "step": 27000
810
- },
811
- {
812
- "epoch": 1.52,
813
- "eval_loss": 0.24955426156520844,
814
- "eval_runtime": 66.1954,
815
- "eval_samples_per_second": 15.107,
816
- "eval_steps_per_second": 1.888,
817
- "eval_wer": 0.19977735047060013,
818
- "step": 27000
819
- },
820
- {
821
- "epoch": 1.56,
822
- "learning_rate": 8.01617735170761e-06,
823
- "loss": 1.0334,
824
- "step": 27500
825
- },
826
- {
827
- "epoch": 1.56,
828
- "eval_loss": 0.25152114033699036,
829
- "eval_runtime": 65.6055,
830
- "eval_samples_per_second": 15.243,
831
- "eval_steps_per_second": 1.905,
832
- "eval_wer": 0.19805687683432852,
833
- "step": 27500
834
- },
835
- {
836
- "epoch": 1.59,
837
- "learning_rate": 7.978729778310365e-06,
838
- "loss": 1.0431,
839
- "step": 28000
840
- },
841
- {
842
- "epoch": 1.59,
843
- "eval_loss": 0.2484857439994812,
844
- "eval_runtime": 65.8366,
845
- "eval_samples_per_second": 15.189,
846
- "eval_steps_per_second": 1.899,
847
- "eval_wer": 0.19987855480214553,
848
- "step": 28000
849
- },
850
- {
851
- "epoch": 1.63,
852
- "learning_rate": 7.941282204913122e-06,
853
- "loss": 1.0424,
854
- "step": 28500
855
- },
856
- {
857
- "epoch": 1.63,
858
- "eval_loss": 0.246125265955925,
859
- "eval_runtime": 66.1461,
860
- "eval_samples_per_second": 15.118,
861
- "eval_steps_per_second": 1.89,
862
- "eval_wer": 0.19896771581823702,
863
- "step": 28500
864
- },
865
- {
866
- "epoch": 1.67,
867
- "learning_rate": 7.903909526662673e-06,
868
- "loss": 1.0364,
869
- "step": 29000
870
- },
871
- {
872
- "epoch": 1.67,
873
- "eval_loss": 0.25167015194892883,
874
- "eval_runtime": 66.4041,
875
- "eval_samples_per_second": 15.059,
876
- "eval_steps_per_second": 1.882,
877
- "eval_wer": 0.2001821677967817,
878
- "step": 29000
879
- },
880
- {
881
- "epoch": 1.71,
882
- "learning_rate": 7.866461953265429e-06,
883
- "loss": 1.0321,
884
- "step": 29500
885
- },
886
- {
887
- "epoch": 1.71,
888
- "eval_loss": 0.2517380714416504,
889
- "eval_runtime": 67.6899,
890
- "eval_samples_per_second": 14.773,
891
- "eval_steps_per_second": 1.847,
892
- "eval_wer": 0.19552676854569376,
893
- "step": 29500
894
- },
895
- {
896
- "epoch": 1.74,
897
- "learning_rate": 7.829014379868186e-06,
898
- "loss": 1.036,
899
- "step": 30000
900
- },
901
- {
902
- "epoch": 1.74,
903
- "eval_loss": 0.25128865242004395,
904
- "eval_runtime": 67.6941,
905
- "eval_samples_per_second": 14.772,
906
- "eval_steps_per_second": 1.847,
907
- "eval_wer": 0.20068818945450864,
908
- "step": 30000
909
- },
910
  {
911
  "epoch": 1.78,
912
- "learning_rate": 7.791566806470942e-06,
913
- "loss": 1.0245,
914
- "step": 30500
915
  },
916
  {
917
  "epoch": 1.78,
918
- "eval_loss": 0.24514135718345642,
919
- "eval_runtime": 67.3671,
920
- "eval_samples_per_second": 14.844,
921
- "eval_steps_per_second": 1.856,
922
- "eval_wer": 0.19846169416051007,
923
- "step": 30500
924
- },
925
- {
926
- "epoch": 1.82,
927
- "learning_rate": 7.754119233073697e-06,
928
- "loss": 1.0296,
929
- "step": 31000
930
- },
931
- {
932
- "epoch": 1.82,
933
- "eval_loss": 0.24760562181472778,
934
- "eval_runtime": 65.961,
935
- "eval_samples_per_second": 15.16,
936
- "eval_steps_per_second": 1.895,
937
- "eval_wer": 0.19684242485578382,
938
- "step": 31000
939
- },
940
- {
941
- "epoch": 1.85,
942
- "learning_rate": 7.716746554823248e-06,
943
- "loss": 1.0254,
944
- "step": 31500
945
- },
946
- {
947
- "epoch": 1.85,
948
- "eval_loss": 0.23863530158996582,
949
- "eval_runtime": 65.594,
950
- "eval_samples_per_second": 15.245,
951
- "eval_steps_per_second": 1.906,
952
- "eval_wer": 0.19866410282360086,
953
- "step": 31500
954
- },
955
- {
956
- "epoch": 1.89,
957
- "learning_rate": 7.679298981426004e-06,
958
- "loss": 1.0256,
959
- "step": 32000
960
- },
961
- {
962
- "epoch": 1.89,
963
- "eval_loss": 0.24572543799877167,
964
- "eval_runtime": 68.2944,
965
- "eval_samples_per_second": 14.642,
966
- "eval_steps_per_second": 1.83,
967
- "eval_wer": 0.1928954559255136,
968
- "step": 32000
969
- },
970
- {
971
- "epoch": 1.93,
972
- "learning_rate": 7.64185140802876e-06,
973
- "loss": 1.0224,
974
- "step": 32500
975
- },
976
- {
977
- "epoch": 1.93,
978
- "eval_loss": 0.24238397181034088,
979
- "eval_runtime": 65.9244,
980
- "eval_samples_per_second": 15.169,
981
- "eval_steps_per_second": 1.896,
982
- "eval_wer": 0.1928954559255136,
983
- "step": 32500
984
- },
985
- {
986
- "epoch": 1.97,
987
- "learning_rate": 7.604403834631516e-06,
988
- "loss": 1.0176,
989
- "step": 33000
990
- },
991
- {
992
- "epoch": 1.97,
993
- "eval_loss": 0.24562890827655792,
994
- "eval_runtime": 66.3545,
995
- "eval_samples_per_second": 15.071,
996
- "eval_steps_per_second": 1.884,
997
- "eval_wer": 0.19684242485578382,
998
- "step": 33000
999
- },
1000
- {
1001
- "epoch": 2.0,
1002
- "learning_rate": 7.5669562612342726e-06,
1003
- "loss": 1.0143,
1004
- "step": 33500
1005
- },
1006
- {
1007
- "epoch": 2.0,
1008
- "eval_loss": 0.23884166777133942,
1009
- "eval_runtime": 66.6643,
1010
- "eval_samples_per_second": 15.001,
1011
- "eval_steps_per_second": 1.875,
1012
- "eval_wer": 0.192996660257059,
1013
- "step": 33500
1014
- },
1015
- {
1016
- "epoch": 2.04,
1017
- "learning_rate": 7.529508687837028e-06,
1018
- "loss": 1.0156,
1019
- "step": 34000
1020
- },
1021
- {
1022
- "epoch": 2.04,
1023
- "eval_loss": 0.2417694628238678,
1024
- "eval_runtime": 67.1813,
1025
- "eval_samples_per_second": 14.885,
1026
- "eval_steps_per_second": 1.861,
1027
- "eval_wer": 0.19532435988260297,
1028
- "step": 34000
1029
- },
1030
- {
1031
- "epoch": 2.08,
1032
- "learning_rate": 7.492061114439785e-06,
1033
- "loss": 1.0174,
1034
- "step": 34500
1035
- },
1036
- {
1037
- "epoch": 2.08,
1038
- "eval_loss": 0.24063818156719208,
1039
- "eval_runtime": 65.9342,
1040
- "eval_samples_per_second": 15.167,
1041
- "eval_steps_per_second": 1.896,
1042
- "eval_wer": 0.19168100394696894,
1043
- "step": 34500
1044
- },
1045
- {
1046
- "epoch": 2.12,
1047
- "learning_rate": 7.4546884361893355e-06,
1048
- "loss": 1.0161,
1049
- "step": 35000
1050
- },
1051
- {
1052
- "epoch": 2.12,
1053
- "eval_loss": 0.24444276094436646,
1054
- "eval_runtime": 67.5245,
1055
- "eval_samples_per_second": 14.809,
1056
- "eval_steps_per_second": 1.851,
1057
- "eval_wer": 0.19350268191478595,
1058
- "step": 35000
1059
- },
1060
- {
1061
- "epoch": 2.15,
1062
- "learning_rate": 7.417240862792092e-06,
1063
- "loss": 1.0087,
1064
- "step": 35500
1065
- },
1066
- {
1067
- "epoch": 2.15,
1068
- "eval_loss": 0.2432408481836319,
1069
- "eval_runtime": 67.0715,
1070
- "eval_samples_per_second": 14.909,
1071
- "eval_steps_per_second": 1.864,
1072
- "eval_wer": 0.19157979961542354,
1073
- "step": 35500
1074
- },
1075
- {
1076
- "epoch": 2.19,
1077
- "learning_rate": 7.379868184541642e-06,
1078
- "loss": 1.0116,
1079
- "step": 36000
1080
- },
1081
- {
1082
- "epoch": 2.19,
1083
- "eval_loss": 0.2412765473127365,
1084
- "eval_runtime": 69.9743,
1085
- "eval_samples_per_second": 14.291,
1086
- "eval_steps_per_second": 1.786,
1087
- "eval_wer": 0.19330027325169516,
1088
- "step": 36000
1089
- },
1090
- {
1091
- "epoch": 2.23,
1092
- "learning_rate": 7.3424206111443985e-06,
1093
- "loss": 1.0077,
1094
- "step": 36500
1095
- },
1096
- {
1097
- "epoch": 2.23,
1098
- "eval_loss": 0.24223345518112183,
1099
- "eval_runtime": 66.1264,
1100
- "eval_samples_per_second": 15.123,
1101
- "eval_steps_per_second": 1.89,
1102
- "eval_wer": 0.19238943426778665,
1103
- "step": 36500
1104
- },
1105
- {
1106
- "epoch": 2.26,
1107
- "learning_rate": 7.304973037747155e-06,
1108
- "loss": 1.0037,
1109
- "step": 37000
1110
- },
1111
- {
1112
- "epoch": 2.26,
1113
- "eval_loss": 0.24134577810764313,
1114
- "eval_runtime": 66.1667,
1115
- "eval_samples_per_second": 15.113,
1116
- "eval_steps_per_second": 1.889,
1117
- "eval_wer": 0.19279425159396824,
1118
- "step": 37000
1119
- },
1120
- {
1121
- "epoch": 2.3,
1122
- "learning_rate": 7.2675254643499105e-06,
1123
- "loss": 1.0037,
1124
- "step": 37500
1125
- },
1126
- {
1127
- "epoch": 2.3,
1128
- "eval_loss": 0.23671011626720428,
1129
- "eval_runtime": 67.1919,
1130
- "eval_samples_per_second": 14.883,
1131
- "eval_steps_per_second": 1.86,
1132
- "eval_wer": 0.19137739095233275,
1133
- "step": 37500
1134
- },
1135
- {
1136
- "epoch": 2.34,
1137
- "learning_rate": 7.230077890952667e-06,
1138
- "loss": 1.0047,
1139
- "step": 38000
1140
- },
1141
- {
1142
- "epoch": 2.34,
1143
- "eval_loss": 0.23813079297542572,
1144
- "eval_runtime": 65.6673,
1145
- "eval_samples_per_second": 15.228,
1146
- "eval_steps_per_second": 1.904,
1147
- "eval_wer": 0.19117498228924198,
1148
- "step": 38000
1149
- },
1150
- {
1151
- "epoch": 2.38,
1152
- "learning_rate": 7.192630317555423e-06,
1153
- "loss": 1.0009,
1154
- "step": 38500
1155
- },
1156
- {
1157
- "epoch": 2.38,
1158
- "eval_loss": 0.23694893717765808,
1159
- "eval_runtime": 66.8681,
1160
- "eval_samples_per_second": 14.955,
1161
- "eval_steps_per_second": 1.869,
1162
- "eval_wer": 0.18834126100597107,
1163
- "step": 38500
1164
- },
1165
- {
1166
- "epoch": 2.41,
1167
- "learning_rate": 7.155182744158179e-06,
1168
- "loss": 1.0009,
1169
- "step": 39000
1170
- },
1171
- {
1172
- "epoch": 2.41,
1173
- "eval_loss": 0.23513327538967133,
1174
- "eval_runtime": 66.1285,
1175
- "eval_samples_per_second": 15.122,
1176
- "eval_steps_per_second": 1.89,
1177
- "eval_wer": 0.19036534763687885,
1178
- "step": 39000
1179
- },
1180
- {
1181
- "epoch": 2.45,
1182
- "learning_rate": 7.117735170760935e-06,
1183
- "loss": 1.0046,
1184
- "step": 39500
1185
- },
1186
- {
1187
- "epoch": 2.45,
1188
- "eval_loss": 0.2393663376569748,
1189
- "eval_runtime": 65.179,
1190
- "eval_samples_per_second": 15.342,
1191
- "eval_steps_per_second": 1.918,
1192
- "eval_wer": 0.18824005667442567,
1193
- "step": 39500
1194
- },
1195
- {
1196
- "epoch": 2.49,
1197
- "learning_rate": 7.080362492510486e-06,
1198
- "loss": 0.9973,
1199
- "step": 40000
1200
- },
1201
- {
1202
- "epoch": 2.49,
1203
- "eval_loss": 0.23612412810325623,
1204
- "eval_runtime": 66.8117,
1205
- "eval_samples_per_second": 14.967,
1206
- "eval_steps_per_second": 1.871,
1207
- "eval_wer": 0.1909725736261512,
1208
- "step": 40000
1209
  }
1210
  ],
1211
  "max_steps": 134520,
1212
  "num_train_epochs": 10,
1213
- "total_flos": 8.42103488439163e+20,
1214
  "trial_name": null,
1215
  "trial_params": null
1216
  }
 
1
  {
2
  "best_metric": null,
3
  "best_model_checkpoint": null,
4
+ "epoch": 1.784121320249777,
5
+ "global_step": 24000,
6
  "is_hyper_param_search": false,
7
  "is_local_process_zero": true,
8
  "is_world_process_zero": true,
9
  "log_history": [
10
  {
11
  "epoch": 0.04,
12
+ "learning_rate": 4.9000000000000005e-06,
13
+ "loss": 4.5935,
14
  "step": 500
15
  },
16
  {
17
  "epoch": 0.04,
18
+ "eval_loss": 2.184311866760254,
19
+ "eval_runtime": 62.4666,
20
+ "eval_samples_per_second": 16.009,
21
+ "eval_steps_per_second": 2.001,
22
+ "eval_wer": 0.9602743933654142,
23
  "step": 500
24
  },
25
  {
26
  "epoch": 0.07,
27
+ "learning_rate": 9.9e-06,
28
+ "loss": 1.232,
29
  "step": 1000
30
  },
31
  {
32
  "epoch": 0.07,
33
+ "eval_loss": 0.5929161310195923,
34
+ "eval_runtime": 62.6394,
35
+ "eval_samples_per_second": 15.964,
36
+ "eval_steps_per_second": 1.996,
37
+ "eval_wer": 0.45070134125115185,
38
  "step": 1000
39
  },
40
  {
41
  "epoch": 0.11,
42
+ "learning_rate": 9.963301378070702e-06,
43
+ "loss": 0.7175,
44
  "step": 1500
45
  },
46
  {
47
  "epoch": 0.11,
48
+ "eval_loss": 0.40349480509757996,
49
+ "eval_runtime": 60.7894,
50
+ "eval_samples_per_second": 16.45,
51
+ "eval_steps_per_second": 2.056,
52
+ "eval_wer": 0.3189310945018941,
53
  "step": 1500
54
  },
55
  {
56
  "epoch": 0.15,
57
+ "learning_rate": 9.925853804673457e-06,
58
+ "loss": 0.6209,
59
  "step": 2000
60
  },
61
  {
62
  "epoch": 0.15,
63
+ "eval_loss": 0.34757670760154724,
64
+ "eval_runtime": 61.0833,
65
+ "eval_samples_per_second": 16.371,
66
+ "eval_steps_per_second": 2.046,
67
+ "eval_wer": 0.28278898331114977,
68
  "step": 2000
69
  },
70
  {
71
  "epoch": 0.19,
72
+ "learning_rate": 9.888406231276214e-06,
73
+ "loss": 0.5681,
74
  "step": 2500
75
  },
76
  {
77
  "epoch": 0.19,
78
+ "eval_loss": 0.30338042974472046,
79
+ "eval_runtime": 61.5264,
80
+ "eval_samples_per_second": 16.253,
81
+ "eval_steps_per_second": 2.032,
82
+ "eval_wer": 0.25002559639602745,
83
  "step": 2500
84
  },
85
  {
86
  "epoch": 0.22,
87
+ "learning_rate": 9.85095865787897e-06,
88
+ "loss": 0.5155,
89
  "step": 3000
90
  },
91
  {
92
  "epoch": 0.22,
93
+ "eval_loss": 0.28614458441734314,
94
+ "eval_runtime": 60.5217,
95
+ "eval_samples_per_second": 16.523,
96
+ "eval_steps_per_second": 2.065,
97
+ "eval_wer": 0.22954847957407598,
98
  "step": 3000
99
  },
100
  {
101
  "epoch": 0.26,
102
+ "learning_rate": 9.813585979628521e-06,
103
+ "loss": 0.5228,
104
  "step": 3500
105
  },
106
  {
107
  "epoch": 0.26,
108
+ "eval_loss": 0.26384735107421875,
109
+ "eval_runtime": 60.6845,
110
+ "eval_samples_per_second": 16.479,
111
+ "eval_steps_per_second": 2.06,
112
+ "eval_wer": 0.22401965803214907,
113
  "step": 3500
114
  },
115
  {
116
  "epoch": 0.3,
117
+ "learning_rate": 9.776138406231277e-06,
118
+ "loss": 0.4896,
119
  "step": 4000
120
  },
121
  {
122
  "epoch": 0.3,
123
+ "eval_loss": 0.266423761844635,
124
+ "eval_runtime": 62.3289,
125
+ "eval_samples_per_second": 16.044,
126
+ "eval_steps_per_second": 2.005,
127
+ "eval_wer": 0.2162383536398075,
128
  "step": 4000
129
  },
130
  {
131
  "epoch": 0.33,
132
+ "learning_rate": 9.738690832834034e-06,
133
+ "loss": 0.438,
134
  "step": 4500
135
  },
136
  {
137
  "epoch": 0.33,
138
+ "eval_loss": 0.2519201636314392,
139
+ "eval_runtime": 60.2733,
140
+ "eval_samples_per_second": 16.591,
141
+ "eval_steps_per_second": 2.074,
142
+ "eval_wer": 0.21306440053240502,
143
  "step": 4500
144
  },
145
  {
146
  "epoch": 0.37,
147
+ "learning_rate": 9.701243259436789e-06,
148
+ "loss": 0.3917,
149
  "step": 5000
150
  },
151
  {
152
  "epoch": 0.37,
153
+ "eval_loss": 0.2455732226371765,
154
+ "eval_runtime": 60.2434,
155
+ "eval_samples_per_second": 16.599,
156
+ "eval_steps_per_second": 2.075,
157
+ "eval_wer": 0.20599979522883177,
158
  "step": 5000
159
  },
160
  {
161
  "epoch": 0.41,
162
+ "learning_rate": 9.663795686039545e-06,
163
+ "loss": 0.3597,
164
  "step": 5500
165
  },
166
  {
167
  "epoch": 0.41,
168
+ "eval_loss": 0.2465256154537201,
169
+ "eval_runtime": 60.0373,
170
+ "eval_samples_per_second": 16.656,
171
+ "eval_steps_per_second": 2.082,
172
+ "eval_wer": 0.19862803317292926,
173
  "step": 5500
174
  },
175
  {
176
  "epoch": 0.45,
177
+ "learning_rate": 9.626348112642301e-06,
178
+ "loss": 0.348,
179
  "step": 6000
180
  },
181
  {
182
  "epoch": 0.45,
183
+ "eval_loss": 0.23667797446250916,
184
+ "eval_runtime": 60.4285,
185
+ "eval_samples_per_second": 16.548,
186
+ "eval_steps_per_second": 2.069,
187
+ "eval_wer": 0.19627316473840484,
188
  "step": 6000
189
  },
190
  {
191
  "epoch": 0.48,
192
+ "learning_rate": 9.588900539245058e-06,
193
+ "loss": 0.3356,
194
  "step": 6500
195
  },
196
  {
197
  "epoch": 0.48,
198
+ "eval_loss": 0.23445868492126465,
199
+ "eval_runtime": 60.4449,
200
+ "eval_samples_per_second": 16.544,
201
+ "eval_steps_per_second": 2.068,
202
+ "eval_wer": 0.19432783864031944,
203
  "step": 6500
204
  },
205
  {
206
  "epoch": 0.52,
207
+ "learning_rate": 9.551452965847814e-06,
208
+ "loss": 0.3187,
209
  "step": 7000
210
  },
211
  {
212
  "epoch": 0.52,
213
+ "eval_loss": 0.2273026555776596,
214
+ "eval_runtime": 62.085,
215
+ "eval_samples_per_second": 16.107,
216
+ "eval_steps_per_second": 2.013,
217
+ "eval_wer": 0.18828708917784376,
218
  "step": 7000
219
  },
220
  {
221
  "epoch": 0.56,
222
+ "learning_rate": 9.51400539245057e-06,
223
+ "loss": 0.3175,
224
  "step": 7500
225
  },
226
  {
227
  "epoch": 0.56,
228
+ "eval_loss": 0.2206442505121231,
229
+ "eval_runtime": 61.7597,
230
+ "eval_samples_per_second": 16.192,
231
+ "eval_steps_per_second": 2.024,
232
+ "eval_wer": 0.1840892802293437,
233
  "step": 7500
234
  },
235
  {
236
  "epoch": 0.59,
237
+ "learning_rate": 9.476557819053325e-06,
238
+ "loss": 0.308,
239
  "step": 8000
240
  },
241
  {
242
  "epoch": 0.59,
243
+ "eval_loss": 0.21740911900997162,
244
+ "eval_runtime": 61.9328,
245
+ "eval_samples_per_second": 16.147,
246
+ "eval_steps_per_second": 2.018,
247
+ "eval_wer": 0.18132486945838025,
248
  "step": 8000
249
  },
250
  {
251
  "epoch": 0.63,
252
+ "learning_rate": 9.439185140802876e-06,
253
+ "loss": 0.3015,
254
  "step": 8500
255
  },
256
  {
257
  "epoch": 0.63,
258
+ "eval_loss": 0.21573692560195923,
259
+ "eval_runtime": 61.9764,
260
+ "eval_samples_per_second": 16.135,
261
+ "eval_steps_per_second": 2.017,
262
+ "eval_wer": 0.17948192894440462,
263
  "step": 8500
264
  },
265
  {
266
  "epoch": 0.67,
267
+ "learning_rate": 9.401737567405633e-06,
268
+ "loss": 0.2966,
269
  "step": 9000
270
  },
271
  {
272
  "epoch": 0.67,
273
+ "eval_loss": 0.2121206372976303,
274
+ "eval_runtime": 61.6202,
275
+ "eval_samples_per_second": 16.228,
276
+ "eval_steps_per_second": 2.029,
277
+ "eval_wer": 0.1764103614211119,
278
  "step": 9000
279
  },
280
  {
281
  "epoch": 0.71,
282
+ "learning_rate": 9.36428999400839e-06,
283
+ "loss": 0.2896,
284
  "step": 9500
285
  },
286
  {
287
  "epoch": 0.71,
288
+ "eval_loss": 0.21005117893218994,
289
+ "eval_runtime": 61.7796,
290
+ "eval_samples_per_second": 16.187,
291
+ "eval_steps_per_second": 2.023,
292
+ "eval_wer": 0.17180301013617283,
293
  "step": 9500
294
  },
295
  {
296
  "epoch": 0.74,
297
+ "learning_rate": 9.326842420611146e-06,
298
+ "loss": 0.2895,
299
  "step": 10000
300
  },
301
  {
302
  "epoch": 0.74,
303
+ "eval_loss": 0.20771950483322144,
304
+ "eval_runtime": 62.291,
305
+ "eval_samples_per_second": 16.054,
306
+ "eval_steps_per_second": 2.007,
307
+ "eval_wer": 0.1711886966315143,
308
  "step": 10000
309
  },
310
  {
311
  "epoch": 0.78,
312
+ "learning_rate": 9.289469742360697e-06,
313
+ "loss": 0.2883,
314
  "step": 10500
315
  },
316
  {
317
  "epoch": 0.78,
318
+ "eval_loss": 0.20661789178848267,
319
+ "eval_runtime": 62.3024,
320
+ "eval_samples_per_second": 16.051,
321
+ "eval_steps_per_second": 2.006,
322
+ "eval_wer": 0.17374833623425823,
323
  "step": 10500
324
  },
325
  {
326
  "epoch": 0.82,
327
+ "learning_rate": 9.252022168963451e-06,
328
+ "loss": 0.2781,
329
  "step": 11000
330
  },
331
  {
332
  "epoch": 0.82,
333
+ "eval_loss": 0.20887774229049683,
334
+ "eval_runtime": 61.6124,
335
+ "eval_samples_per_second": 16.23,
336
+ "eval_steps_per_second": 2.029,
337
+ "eval_wer": 0.1744650353230265,
338
  "step": 11000
339
  },
340
  {
341
  "epoch": 0.85,
342
+ "learning_rate": 9.214574595566208e-06,
343
+ "loss": 0.2774,
344
  "step": 11500
345
  },
346
  {
347
  "epoch": 0.85,
348
+ "eval_loss": 0.2101556360721588,
349
+ "eval_runtime": 61.722,
350
+ "eval_samples_per_second": 16.202,
351
+ "eval_steps_per_second": 2.025,
352
+ "eval_wer": 0.17415787857069726,
353
  "step": 11500
354
  },
355
  {
356
  "epoch": 0.89,
357
+ "learning_rate": 9.177127022168964e-06,
358
+ "loss": 0.2728,
359
  "step": 12000
360
  },
361
  {
362
  "epoch": 0.89,
363
+ "eval_loss": 0.21667562425136566,
364
+ "eval_runtime": 61.6295,
365
+ "eval_samples_per_second": 16.226,
366
+ "eval_steps_per_second": 2.028,
367
+ "eval_wer": 0.1680147435241118,
368
  "step": 12000
369
  },
370
  {
371
  "epoch": 0.93,
372
+ "learning_rate": 9.13967944877172e-06,
373
+ "loss": 0.2687,
374
  "step": 12500
375
  },
376
  {
377
  "epoch": 0.93,
378
+ "eval_loss": 0.2106214314699173,
379
+ "eval_runtime": 61.887,
380
+ "eval_samples_per_second": 16.158,
381
+ "eval_steps_per_second": 2.02,
382
+ "eval_wer": 0.16873144261288012,
383
  "step": 12500
384
  },
385
  {
386
  "epoch": 0.97,
387
+ "learning_rate": 9.102231875374477e-06,
388
+ "loss": 0.267,
389
  "step": 13000
390
  },
391
  {
392
  "epoch": 0.97,
393
+ "eval_loss": 0.2063138782978058,
394
+ "eval_runtime": 61.9148,
395
+ "eval_samples_per_second": 16.151,
396
+ "eval_steps_per_second": 2.019,
397
+ "eval_wer": 0.1699600696221972,
398
  "step": 13000
399
  },
400
  {
401
  "epoch": 1.0,
402
+ "learning_rate": 9.064859197124028e-06,
403
+ "loss": 0.2649,
404
  "step": 13500
405
  },
406
  {
407
  "epoch": 1.0,
408
+ "eval_loss": 0.20692496001720428,
409
+ "eval_runtime": 62.6641,
410
+ "eval_samples_per_second": 15.958,
411
+ "eval_steps_per_second": 1.995,
412
+ "eval_wer": 0.16985768403808743,
413
  "step": 13500
414
  },
415
  {
416
  "epoch": 1.04,
417
+ "learning_rate": 9.027411623726783e-06,
418
+ "loss": 0.2535,
419
  "step": 14000
420
  },
421
  {
422
  "epoch": 1.04,
423
+ "eval_loss": 0.20183749496936798,
424
+ "eval_runtime": 62.2731,
425
+ "eval_samples_per_second": 16.058,
426
+ "eval_steps_per_second": 2.007,
427
+ "eval_wer": 0.16504556158492883,
428
  "step": 14000
429
  },
430
  {
431
+ "epoch": 1.08,
432
+ "learning_rate": 8.98996405032954e-06,
433
+ "loss": 0.2517,
434
  "step": 14500
435
  },
436
  {
437
+ "epoch": 1.08,
438
+ "eval_loss": 0.20589770376682281,
439
+ "eval_runtime": 60.5308,
440
+ "eval_samples_per_second": 16.521,
441
+ "eval_steps_per_second": 2.065,
442
+ "eval_wer": 0.16555748950547763,
443
  "step": 14500
444
  },
445
  {
446
+ "epoch": 1.12,
447
+ "learning_rate": 8.952516476932296e-06,
448
+ "loss": 0.2533,
449
  "step": 15000
450
  },
451
  {
452
+ "epoch": 1.12,
453
+ "eval_loss": 0.19426169991493225,
454
+ "eval_runtime": 60.3766,
455
+ "eval_samples_per_second": 16.563,
456
+ "eval_steps_per_second": 2.07,
457
+ "eval_wer": 0.1632026210709532,
458
  "step": 15000
459
  },
460
  {
461
+ "epoch": 1.15,
462
+ "learning_rate": 8.915068903535052e-06,
463
+ "loss": 0.2495,
464
  "step": 15500
465
  },
466
  {
467
+ "epoch": 1.15,
468
+ "eval_loss": 0.198073610663414,
469
+ "eval_runtime": 60.6441,
470
+ "eval_samples_per_second": 16.49,
471
+ "eval_steps_per_second": 2.061,
472
+ "eval_wer": 0.15931196887478244,
473
  "step": 15500
474
  },
475
  {
476
+ "epoch": 1.19,
477
+ "learning_rate": 8.877696225284603e-06,
478
+ "loss": 0.2485,
479
  "step": 16000
480
  },
481
  {
482
+ "epoch": 1.19,
483
+ "eval_loss": 0.194337397813797,
484
+ "eval_runtime": 60.9216,
485
+ "eval_samples_per_second": 16.415,
486
+ "eval_steps_per_second": 2.052,
487
+ "eval_wer": 0.16381693457561175,
488
  "step": 16000
489
  },
490
  {
491
+ "epoch": 1.23,
492
+ "learning_rate": 8.840323547034154e-06,
493
+ "loss": 0.2488,
494
  "step": 16500
495
  },
496
  {
497
+ "epoch": 1.23,
498
+ "eval_loss": 0.19501833617687225,
499
+ "eval_runtime": 60.832,
500
+ "eval_samples_per_second": 16.439,
501
+ "eval_steps_per_second": 2.055,
502
+ "eval_wer": 0.15808334186546535,
503
  "step": 16500
504
  },
505
  {
506
+ "epoch": 1.26,
507
+ "learning_rate": 8.80287597363691e-06,
508
+ "loss": 0.2482,
509
  "step": 17000
510
  },
511
  {
512
+ "epoch": 1.26,
513
+ "eval_loss": 0.19424903392791748,
514
+ "eval_runtime": 62.4224,
515
+ "eval_samples_per_second": 16.02,
516
+ "eval_steps_per_second": 2.002,
517
+ "eval_wer": 0.156957100440258,
518
  "step": 17000
519
  },
520
  {
521
+ "epoch": 1.3,
522
+ "learning_rate": 8.765428400239665e-06,
523
+ "loss": 0.2454,
524
  "step": 17500
525
  },
526
  {
527
+ "epoch": 1.3,
528
+ "eval_loss": 0.18983419239521027,
529
+ "eval_runtime": 62.7598,
530
+ "eval_samples_per_second": 15.934,
531
+ "eval_steps_per_second": 1.992,
532
+ "eval_wer": 0.15521654551039213,
533
  "step": 17500
534
  },
535
  {
536
+ "epoch": 1.34,
537
+ "learning_rate": 8.728055721989216e-06,
538
+ "loss": 0.2423,
539
  "step": 18000
540
  },
541
  {
542
+ "epoch": 1.34,
543
+ "eval_loss": 0.1893950253725052,
544
+ "eval_runtime": 61.9276,
545
+ "eval_samples_per_second": 16.148,
546
+ "eval_steps_per_second": 2.018,
547
+ "eval_wer": 0.15521654551039213,
548
  "step": 18000
549
  },
550
  {
551
+ "epoch": 1.38,
552
+ "learning_rate": 8.690608148591972e-06,
553
+ "loss": 0.2426,
554
  "step": 18500
555
  },
556
  {
557
+ "epoch": 1.38,
558
+ "eval_loss": 0.19094379246234894,
559
+ "eval_runtime": 62.4801,
560
+ "eval_samples_per_second": 16.005,
561
+ "eval_steps_per_second": 2.001,
562
+ "eval_wer": 0.15521654551039213,
563
  "step": 18500
564
  },
565
  {
566
+ "epoch": 1.41,
567
+ "learning_rate": 8.653160575194727e-06,
568
+ "loss": 0.2362,
569
  "step": 19000
570
  },
571
  {
572
+ "epoch": 1.41,
573
+ "eval_loss": 0.18532328307628632,
574
+ "eval_runtime": 62.2339,
575
+ "eval_samples_per_second": 16.068,
576
+ "eval_steps_per_second": 2.009,
577
+ "eval_wer": 0.15449984642162384,
578
  "step": 19000
579
  },
580
  {
581
+ "epoch": 1.45,
582
+ "learning_rate": 8.615787896944278e-06,
583
+ "loss": 0.2411,
584
  "step": 19500
585
  },
586
  {
587
+ "epoch": 1.45,
588
+ "eval_loss": 0.18669460713863373,
589
+ "eval_runtime": 61.7976,
590
+ "eval_samples_per_second": 16.182,
591
+ "eval_steps_per_second": 2.023,
592
+ "eval_wer": 0.15306644824408724,
593
  "step": 19500
594
  },
595
  {
596
+ "epoch": 1.49,
597
+ "learning_rate": 8.578340323547035e-06,
598
+ "loss": 0.2386,
599
  "step": 20000
600
  },
601
  {
602
+ "epoch": 1.49,
603
+ "eval_loss": 0.18574979901313782,
604
+ "eval_runtime": 62.0229,
605
+ "eval_samples_per_second": 16.123,
606
+ "eval_steps_per_second": 2.015,
607
+ "eval_wer": 0.15501177434217261,
608
  "step": 20000
609
  },
610
  {
611
+ "epoch": 1.52,
612
+ "learning_rate": 8.540892750149791e-06,
613
+ "loss": 0.2399,
614
  "step": 20500
615
  },
616
  {
617
+ "epoch": 1.52,
618
+ "eval_loss": 0.18443606793880463,
619
+ "eval_runtime": 62.46,
620
+ "eval_samples_per_second": 16.01,
621
+ "eval_steps_per_second": 2.001,
622
+ "eval_wer": 0.15040442305723353,
623
  "step": 20500
624
  },
625
  {
626
+ "epoch": 1.56,
627
+ "learning_rate": 8.503445176752547e-06,
628
+ "loss": 0.2372,
629
  "step": 21000
630
  },
631
  {
632
+ "epoch": 1.56,
633
+ "eval_loss": 0.1922898292541504,
634
+ "eval_runtime": 62.2693,
635
+ "eval_samples_per_second": 16.059,
636
+ "eval_steps_per_second": 2.007,
637
+ "eval_wer": 0.15654755810381898,
638
  "step": 21000
639
  },
640
  {
641
+ "epoch": 1.6,
642
+ "learning_rate": 8.465997603355304e-06,
643
+ "loss": 0.2329,
644
  "step": 21500
645
  },
646
  {
647
+ "epoch": 1.6,
648
+ "eval_loss": 0.18686270713806152,
649
+ "eval_runtime": 62.3656,
650
+ "eval_samples_per_second": 16.034,
651
+ "eval_steps_per_second": 2.004,
652
+ "eval_wer": 0.1553189310945019,
653
  "step": 21500
654
  },
655
  {
656
+ "epoch": 1.64,
657
+ "learning_rate": 8.428550029958059e-06,
658
+ "loss": 0.239,
659
  "step": 22000
660
  },
661
  {
662
+ "epoch": 1.64,
663
+ "eval_loss": 0.18338114023208618,
664
+ "eval_runtime": 61.5958,
665
+ "eval_samples_per_second": 16.235,
666
+ "eval_steps_per_second": 2.029,
667
+ "eval_wer": 0.15296406265997747,
668
  "step": 22000
669
  },
670
  {
671
+ "epoch": 1.67,
672
+ "learning_rate": 8.391102456560815e-06,
673
+ "loss": 0.2362,
674
  "step": 22500
675
  },
676
  {
677
+ "epoch": 1.67,
678
+ "eval_loss": 0.1865980476140976,
679
+ "eval_runtime": 61.9855,
680
+ "eval_samples_per_second": 16.133,
681
+ "eval_steps_per_second": 2.017,
682
+ "eval_wer": 0.15613801576737996,
683
  "step": 22500
684
  },
685
  {
686
+ "epoch": 1.71,
687
+ "learning_rate": 8.353654883163571e-06,
688
+ "loss": 0.2339,
689
  "step": 23000
690
  },
691
  {
692
+ "epoch": 1.71,
693
+ "eval_loss": 0.18834584951400757,
694
+ "eval_runtime": 62.0192,
695
+ "eval_samples_per_second": 16.124,
696
+ "eval_steps_per_second": 2.016,
697
+ "eval_wer": 0.15275929149175796,
698
  "step": 23000
699
  },
700
  {
701
+ "epoch": 1.75,
702
+ "learning_rate": 8.316207309766328e-06,
703
+ "loss": 0.2244,
704
  "step": 23500
705
  },
706
  {
707
+ "epoch": 1.75,
708
+ "eval_loss": 0.18758971989154816,
709
+ "eval_runtime": 61.7138,
710
+ "eval_samples_per_second": 16.204,
711
+ "eval_steps_per_second": 2.025,
712
+ "eval_wer": 0.15081396539367256,
713
  "step": 23500
714
  },
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
715
  {
716
  "epoch": 1.78,
717
+ "learning_rate": 8.278759736369084e-06,
718
+ "loss": 0.2305,
719
+ "step": 24000
720
  },
721
  {
722
  "epoch": 1.78,
723
+ "eval_loss": 0.18345214426517487,
724
+ "eval_runtime": 61.5754,
725
+ "eval_samples_per_second": 16.24,
726
+ "eval_steps_per_second": 2.03,
727
+ "eval_wer": 0.14989249513668476,
728
+ "step": 24000
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
729
  }
730
  ],
731
  "max_steps": 134520,
732
  "num_train_epochs": 10,
733
+ "total_flos": 5.0257004948378596e+20,
734
  "trial_name": null,
735
  "trial_params": null
736
  }
training_args.bin CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:ef036419d64a2effc8f44a7df7e9650d3b7737500695435b7ef79da8ec68c861
3
  size 3055
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:40bfb6388b81773860d3e86d1ef130690e421d3a9da23a43b855e3f2f27d2f70
3
  size 3055