marinone94 commited on
Commit
4b40d1e
1 Parent(s): 6caeebe

End of training

Browse files
Files changed (4) hide show
  1. all_results.json +12 -12
  2. eval_results.json +7 -7
  3. train_results.json +6 -6
  4. trainer_state.json +364 -961
all_results.json CHANGED
@@ -1,14 +1,14 @@
1
  {
2
- "epoch": 99.99,
3
- "eval_loss": 0.15951618552207947,
4
- "eval_runtime": 182.1421,
5
- "eval_samples": 4843,
6
- "eval_samples_per_second": 26.589,
7
- "eval_steps_per_second": 0.835,
8
- "eval_wer": 0.11995458416122623,
9
- "train_loss": 1.0890738963032818,
10
- "train_runtime": 51321.8258,
11
- "train_samples": 11690,
12
- "train_samples_per_second": 22.778,
13
- "train_steps_per_second": 0.177
14
  }
1
  {
2
+ "epoch": 100.0,
3
+ "eval_loss": 0.1318359673023224,
4
+ "eval_runtime": 200.408,
5
+ "eval_samples": 5045,
6
+ "eval_samples_per_second": 25.174,
7
+ "eval_steps_per_second": 0.788,
8
+ "eval_wer": 0.11208951988728731,
9
+ "train_loss": 1.3699857131640116,
10
+ "train_runtime": 54887.9396,
11
+ "train_samples": 12307,
12
+ "train_samples_per_second": 22.422,
13
+ "train_steps_per_second": 0.175
14
  }
eval_results.json CHANGED
@@ -1,9 +1,9 @@
1
  {
2
- "epoch": 99.99,
3
- "eval_loss": 0.15951618552207947,
4
- "eval_runtime": 182.1421,
5
- "eval_samples": 4843,
6
- "eval_samples_per_second": 26.589,
7
- "eval_steps_per_second": 0.835,
8
- "eval_wer": 0.11995458416122623
9
  }
1
  {
2
+ "epoch": 100.0,
3
+ "eval_loss": 0.1318359673023224,
4
+ "eval_runtime": 200.408,
5
+ "eval_samples": 5045,
6
+ "eval_samples_per_second": 25.174,
7
+ "eval_steps_per_second": 0.788,
8
+ "eval_wer": 0.11208951988728731
9
  }
train_results.json CHANGED
@@ -1,8 +1,8 @@
1
  {
2
- "epoch": 99.99,
3
- "train_loss": 1.0890738963032818,
4
- "train_runtime": 51321.8258,
5
- "train_samples": 11690,
6
- "train_samples_per_second": 22.778,
7
- "train_steps_per_second": 0.177
8
  }
1
  {
2
+ "epoch": 100.0,
3
+ "train_loss": 1.3699857131640116,
4
+ "train_runtime": 54887.9396,
5
+ "train_samples": 12307,
6
+ "train_samples_per_second": 22.422,
7
+ "train_steps_per_second": 0.175
8
  }
trainer_state.json CHANGED
@@ -1,1279 +1,682 @@
1
  {
2
  "best_metric": null,
3
  "best_model_checkpoint": null,
4
- "epoch": 99.99453551912568,
5
- "global_step": 9100,
6
  "is_hyper_param_search": false,
7
  "is_local_process_zero": true,
8
  "is_world_process_zero": true,
9
  "log_history": [
10
  {
11
- "epoch": 0.55,
12
- "learning_rate": 5.384615384615385e-06,
13
- "loss": 14.944,
14
- "step": 50
15
- },
16
- {
17
- "epoch": 1.1,
18
- "learning_rate": 1.076923076923077e-05,
19
- "loss": 14.3202,
20
  "step": 100
21
  },
22
  {
23
- "epoch": 1.64,
24
- "learning_rate": 1.6263736263736265e-05,
25
- "loss": 8.0014,
26
- "step": 150
27
- },
28
- {
29
- "epoch": 2.2,
30
- "learning_rate": 2.175824175824176e-05,
31
- "loss": 3.9265,
32
  "step": 200
33
  },
34
  {
35
- "epoch": 2.74,
36
- "learning_rate": 2.7252747252747255e-05,
37
- "loss": 3.2842,
38
- "step": 250
39
- },
40
- {
41
- "epoch": 3.3,
42
- "learning_rate": 3.274725274725275e-05,
43
- "loss": 3.1869,
44
  "step": 300
45
  },
46
  {
47
- "epoch": 3.84,
48
- "learning_rate": 3.824175824175824e-05,
49
- "loss": 3.108,
50
- "step": 350
51
- },
52
- {
53
- "epoch": 4.39,
54
- "learning_rate": 4.3736263736263734e-05,
55
- "loss": 3.1175,
56
  "step": 400
57
  },
58
  {
59
- "epoch": 4.94,
60
- "learning_rate": 4.923076923076923e-05,
61
- "loss": 3.0525,
62
- "step": 450
63
- },
64
- {
65
- "epoch": 5.49,
66
- "learning_rate": 5.472527472527473e-05,
67
- "loss": 3.0418,
68
- "step": 500
69
- },
70
- {
71
- "epoch": 5.49,
72
- "eval_loss": 3.0175631046295166,
73
- "eval_runtime": 180.0397,
74
- "eval_samples_per_second": 26.9,
75
- "eval_steps_per_second": 0.844,
76
- "eval_wer": 1.0,
77
  "step": 500
78
  },
79
  {
80
- "epoch": 6.04,
81
- "learning_rate": 6.021978021978022e-05,
82
- "loss": 2.9976,
83
- "step": 550
84
- },
85
- {
86
- "epoch": 6.59,
87
- "learning_rate": 6.571428571428571e-05,
88
- "loss": 2.9326,
89
  "step": 600
90
  },
91
  {
92
- "epoch": 7.14,
93
- "learning_rate": 7.120879120879122e-05,
94
- "loss": 2.9006,
95
- "step": 650
96
- },
97
- {
98
- "epoch": 7.69,
99
- "learning_rate": 7.67032967032967e-05,
100
- "loss": 2.5847,
101
  "step": 700
102
  },
103
  {
104
- "epoch": 8.24,
105
- "learning_rate": 8.219780219780219e-05,
106
- "loss": 1.9329,
107
- "step": 750
108
- },
109
- {
110
- "epoch": 8.79,
111
- "learning_rate": 8.76923076923077e-05,
112
- "loss": 1.5005,
113
  "step": 800
114
  },
115
  {
116
- "epoch": 9.34,
117
- "learning_rate": 9.318681318681319e-05,
118
- "loss": 1.3603,
119
- "step": 850
120
- },
121
- {
122
- "epoch": 9.89,
123
- "learning_rate": 9.868131868131869e-05,
124
- "loss": 1.2882,
125
  "step": 900
126
  },
127
  {
128
- "epoch": 10.44,
129
- "learning_rate": 0.00010417582417582417,
130
- "loss": 1.2259,
131
- "step": 950
132
- },
133
- {
134
- "epoch": 10.98,
135
- "learning_rate": 0.00010967032967032966,
136
- "loss": 1.1819,
137
  "step": 1000
138
  },
139
  {
140
- "epoch": 10.98,
141
- "eval_loss": 0.2561783790588379,
142
- "eval_runtime": 183.3784,
143
- "eval_samples_per_second": 26.41,
144
- "eval_steps_per_second": 0.829,
145
- "eval_wer": 0.21680386034629576,
146
  "step": 1000
147
  },
148
  {
149
- "epoch": 11.54,
150
- "learning_rate": 0.00011516483516483517,
151
- "loss": 1.1526,
152
- "step": 1050
153
- },
154
- {
155
- "epoch": 12.09,
156
- "learning_rate": 0.00012065934065934066,
157
- "loss": 1.1248,
158
  "step": 1100
159
  },
160
  {
161
- "epoch": 12.63,
162
- "learning_rate": 0.00012615384615384615,
163
- "loss": 1.0837,
164
- "step": 1150
165
- },
166
- {
167
- "epoch": 13.19,
168
- "learning_rate": 0.00013164835164835166,
169
- "loss": 1.0713,
170
  "step": 1200
171
  },
172
  {
173
- "epoch": 13.73,
174
- "learning_rate": 0.00013714285714285716,
175
- "loss": 1.0511,
176
- "step": 1250
177
- },
178
- {
179
- "epoch": 14.28,
180
- "learning_rate": 0.00014263736263736264,
181
- "loss": 1.0454,
182
  "step": 1300
183
  },
184
  {
185
- "epoch": 14.83,
186
- "learning_rate": 0.00014813186813186812,
187
- "loss": 1.0157,
188
- "step": 1350
189
- },
190
- {
191
- "epoch": 15.38,
192
- "learning_rate": 0.00015362637362637362,
193
- "loss": 1.0207,
194
  "step": 1400
195
  },
196
  {
197
- "epoch": 15.93,
198
- "learning_rate": 0.00015912087912087913,
199
- "loss": 0.9859,
200
- "step": 1450
201
- },
202
- {
203
- "epoch": 16.48,
204
- "learning_rate": 0.0001646153846153846,
205
- "loss": 1.0032,
206
- "step": 1500
207
- },
208
- {
209
- "epoch": 16.48,
210
- "eval_loss": 0.1746312528848648,
211
- "eval_runtime": 184.5587,
212
- "eval_samples_per_second": 26.241,
213
- "eval_steps_per_second": 0.824,
214
- "eval_wer": 0.15461254612546124,
215
  "step": 1500
216
  },
217
  {
218
- "epoch": 17.03,
219
- "learning_rate": 0.0001701098901098901,
220
- "loss": 0.974,
221
- "step": 1550
222
- },
223
- {
224
- "epoch": 17.58,
225
- "learning_rate": 0.00017560439560439562,
226
- "loss": 0.9528,
227
  "step": 1600
228
  },
229
  {
230
- "epoch": 18.13,
231
- "learning_rate": 0.0001810989010989011,
232
- "loss": 0.9774,
233
- "step": 1650
234
- },
235
- {
236
- "epoch": 18.68,
237
- "learning_rate": 0.0001865934065934066,
238
- "loss": 0.9495,
239
  "step": 1700
240
  },
241
  {
242
- "epoch": 19.23,
243
- "learning_rate": 0.0001920879120879121,
244
- "loss": 0.9525,
245
- "step": 1750
246
- },
247
- {
248
- "epoch": 19.78,
249
- "learning_rate": 0.00019758241758241759,
250
- "loss": 0.9385,
251
  "step": 1800
252
  },
253
  {
254
- "epoch": 20.33,
255
- "learning_rate": 0.00020307692307692306,
256
- "loss": 0.9422,
257
- "step": 1850
258
- },
259
- {
260
- "epoch": 20.87,
261
- "learning_rate": 0.00020857142857142857,
262
- "loss": 0.9028,
263
  "step": 1900
264
  },
265
  {
266
- "epoch": 21.43,
267
- "learning_rate": 0.00021406593406593407,
268
- "loss": 0.9091,
269
- "step": 1950
270
- },
271
- {
272
- "epoch": 21.97,
273
- "learning_rate": 0.00021956043956043955,
274
- "loss": 0.9077,
275
  "step": 2000
276
  },
277
  {
278
- "epoch": 21.97,
279
- "eval_loss": 0.15996481478214264,
280
- "eval_runtime": 182.6325,
281
- "eval_samples_per_second": 26.518,
282
- "eval_steps_per_second": 0.832,
283
- "eval_wer": 0.13391995458416123,
284
  "step": 2000
285
  },
286
  {
287
- "epoch": 22.52,
288
- "learning_rate": 0.00022505494505494506,
289
- "loss": 0.9073,
290
- "step": 2050
291
- },
292
- {
293
- "epoch": 23.08,
294
- "learning_rate": 0.00023054945054945056,
295
- "loss": 0.9046,
296
  "step": 2100
297
  },
298
  {
299
- "epoch": 23.62,
300
- "learning_rate": 0.00023604395604395604,
301
- "loss": 0.8864,
302
- "step": 2150
303
- },
304
- {
305
- "epoch": 24.17,
306
- "learning_rate": 0.00024153846153846155,
307
- "loss": 0.8888,
308
  "step": 2200
309
  },
310
  {
311
- "epoch": 24.72,
312
- "learning_rate": 0.00024703296703296705,
313
- "loss": 0.8757,
314
- "step": 2250
315
- },
316
- {
317
- "epoch": 25.27,
318
- "learning_rate": 0.00024915750915750914,
319
- "loss": 0.8819,
320
  "step": 2300
321
  },
322
  {
323
- "epoch": 25.82,
324
- "learning_rate": 0.00024732600732600734,
325
- "loss": 0.8722,
326
- "step": 2350
327
- },
328
- {
329
- "epoch": 26.37,
330
- "learning_rate": 0.0002454945054945055,
331
- "loss": 0.8757,
332
  "step": 2400
333
  },
334
  {
335
- "epoch": 26.92,
336
- "learning_rate": 0.00024366300366300369,
337
- "loss": 0.8544,
338
- "step": 2450
339
- },
340
- {
341
- "epoch": 27.47,
342
- "learning_rate": 0.00024183150183150186,
343
- "loss": 0.8687,
344
- "step": 2500
345
- },
346
- {
347
- "epoch": 27.47,
348
- "eval_loss": 0.1647317260503769,
349
- "eval_runtime": 181.9124,
350
- "eval_samples_per_second": 26.623,
351
- "eval_steps_per_second": 0.836,
352
- "eval_wer": 0.1378370706783991,
353
  "step": 2500
354
  },
355
  {
356
- "epoch": 28.02,
357
- "learning_rate": 0.00024,
358
- "loss": 0.8661,
359
- "step": 2550
360
- },
361
- {
362
- "epoch": 28.57,
363
- "learning_rate": 0.00023816849816849818,
364
- "loss": 0.8187,
365
  "step": 2600
366
  },
367
  {
368
- "epoch": 29.12,
369
- "learning_rate": 0.00023633699633699635,
370
- "loss": 0.8425,
371
- "step": 2650
372
- },
373
- {
374
- "epoch": 29.67,
375
- "learning_rate": 0.00023450549450549453,
376
- "loss": 0.8455,
377
  "step": 2700
378
  },
379
  {
380
- "epoch": 30.22,
381
- "learning_rate": 0.00023267399267399267,
382
- "loss": 0.8369,
383
- "step": 2750
384
- },
385
- {
386
- "epoch": 30.77,
387
- "learning_rate": 0.00023084249084249085,
388
- "loss": 0.8253,
389
  "step": 2800
390
  },
391
  {
392
- "epoch": 31.32,
393
- "learning_rate": 0.00022901098901098902,
394
- "loss": 0.8213,
395
- "step": 2850
396
- },
397
- {
398
- "epoch": 31.86,
399
- "learning_rate": 0.0002271794871794872,
400
- "loss": 0.808,
401
  "step": 2900
402
  },
403
  {
404
- "epoch": 32.42,
405
- "learning_rate": 0.00022534798534798534,
406
- "loss": 0.8352,
407
- "step": 2950
408
- },
409
- {
410
- "epoch": 32.96,
411
- "learning_rate": 0.00022351648351648352,
412
- "loss": 0.8081,
413
  "step": 3000
414
  },
415
  {
416
- "epoch": 32.96,
417
- "eval_loss": 0.16083544492721558,
418
- "eval_runtime": 184.6287,
419
- "eval_samples_per_second": 26.231,
420
- "eval_steps_per_second": 0.823,
421
- "eval_wer": 0.135310814646608,
422
  "step": 3000
423
  },
424
  {
425
- "epoch": 33.51,
426
- "learning_rate": 0.0002216849816849817,
427
- "loss": 0.8157,
428
- "step": 3050
429
- },
430
- {
431
- "epoch": 34.07,
432
- "learning_rate": 0.00021985347985347986,
433
- "loss": 0.8035,
434
  "step": 3100
435
  },
436
  {
437
- "epoch": 34.61,
438
- "learning_rate": 0.00021802197802197804,
439
- "loss": 0.7844,
440
- "step": 3150
441
- },
442
- {
443
- "epoch": 35.16,
444
- "learning_rate": 0.00021619047619047619,
445
- "loss": 0.7835,
446
  "step": 3200
447
  },
448
  {
449
- "epoch": 35.71,
450
- "learning_rate": 0.00021435897435897436,
451
- "loss": 0.7854,
452
- "step": 3250
453
- },
454
- {
455
- "epoch": 36.26,
456
- "learning_rate": 0.00021252747252747253,
457
- "loss": 0.7845,
458
  "step": 3300
459
  },
460
  {
461
- "epoch": 36.81,
462
- "learning_rate": 0.0002106959706959707,
463
- "loss": 0.7677,
464
- "step": 3350
465
- },
466
- {
467
- "epoch": 37.36,
468
- "learning_rate": 0.00020886446886446885,
469
- "loss": 0.7933,
470
  "step": 3400
471
  },
472
  {
473
- "epoch": 37.91,
474
- "learning_rate": 0.00020703296703296703,
475
- "loss": 0.7781,
476
- "step": 3450
477
- },
478
- {
479
- "epoch": 38.46,
480
- "learning_rate": 0.0002052014652014652,
481
- "loss": 0.7923,
482
  "step": 3500
483
  },
484
  {
485
- "epoch": 38.46,
486
- "eval_loss": 0.15337252616882324,
487
- "eval_runtime": 182.8057,
488
- "eval_samples_per_second": 26.493,
489
- "eval_steps_per_second": 0.831,
490
- "eval_wer": 0.12773204655123474,
491
- "step": 3500
492
- },
493
- {
494
- "epoch": 39.01,
495
- "learning_rate": 0.00020336996336996338,
496
- "loss": 0.7861,
497
- "step": 3550
498
- },
499
- {
500
- "epoch": 39.56,
501
- "learning_rate": 0.00020157509157509157,
502
- "loss": 0.767,
503
  "step": 3600
504
  },
505
  {
506
- "epoch": 40.11,
507
- "learning_rate": 0.00019974358974358974,
508
- "loss": 0.7688,
509
- "step": 3650
510
- },
511
- {
512
- "epoch": 40.66,
513
- "learning_rate": 0.0001979120879120879,
514
- "loss": 0.7515,
515
  "step": 3700
516
  },
517
  {
518
- "epoch": 41.21,
519
- "learning_rate": 0.00019608058608058606,
520
- "loss": 0.7602,
521
- "step": 3750
522
- },
523
- {
524
- "epoch": 41.75,
525
- "learning_rate": 0.00019424908424908423,
526
- "loss": 0.7565,
527
  "step": 3800
528
  },
529
  {
530
- "epoch": 42.31,
531
- "learning_rate": 0.0001924175824175824,
532
- "loss": 0.7646,
533
- "step": 3850
534
- },
535
- {
536
- "epoch": 42.85,
537
- "learning_rate": 0.0001905860805860806,
538
- "loss": 0.7547,
539
  "step": 3900
540
  },
541
  {
542
- "epoch": 43.4,
543
- "learning_rate": 0.00018875457875457878,
544
- "loss": 0.7474,
545
- "step": 3950
546
- },
547
- {
548
- "epoch": 43.95,
549
- "learning_rate": 0.00018692307692307693,
550
- "loss": 0.7349,
551
  "step": 4000
552
  },
553
  {
554
- "epoch": 43.95,
555
- "eval_loss": 0.15459321439266205,
556
- "eval_runtime": 182.8482,
557
- "eval_samples_per_second": 26.486,
558
- "eval_steps_per_second": 0.831,
559
- "eval_wer": 0.13034345728072666,
560
  "step": 4000
561
  },
562
  {
563
- "epoch": 44.5,
564
- "learning_rate": 0.0001850915750915751,
565
- "loss": 0.7447,
566
- "step": 4050
567
- },
568
- {
569
- "epoch": 45.05,
570
- "learning_rate": 0.00018326007326007328,
571
- "loss": 0.7442,
572
  "step": 4100
573
  },
574
  {
575
- "epoch": 45.6,
576
- "learning_rate": 0.00018142857142857145,
577
- "loss": 0.7311,
578
- "step": 4150
579
- },
580
- {
581
- "epoch": 46.15,
582
- "learning_rate": 0.0001795970695970696,
583
- "loss": 0.7432,
584
  "step": 4200
585
  },
586
  {
587
- "epoch": 46.7,
588
- "learning_rate": 0.00017776556776556777,
589
- "loss": 0.7361,
590
- "step": 4250
591
- },
592
- {
593
- "epoch": 47.25,
594
- "learning_rate": 0.00017593406593406595,
595
- "loss": 0.7358,
596
  "step": 4300
597
  },
598
  {
599
- "epoch": 47.8,
600
- "learning_rate": 0.00017410256410256412,
601
- "loss": 0.718,
602
- "step": 4350
603
- },
604
- {
605
- "epoch": 48.35,
606
- "learning_rate": 0.00017227106227106227,
607
- "loss": 0.7327,
608
  "step": 4400
609
  },
610
  {
611
- "epoch": 48.9,
612
- "learning_rate": 0.00017043956043956044,
613
- "loss": 0.7129,
614
- "step": 4450
615
- },
616
- {
617
- "epoch": 49.45,
618
- "learning_rate": 0.00016860805860805861,
619
- "loss": 0.7199,
620
- "step": 4500
621
- },
622
- {
623
- "epoch": 49.45,
624
- "eval_loss": 0.16171683371067047,
625
- "eval_runtime": 181.6699,
626
- "eval_samples_per_second": 26.658,
627
- "eval_steps_per_second": 0.837,
628
- "eval_wer": 0.12770366165200114,
629
  "step": 4500
630
  },
631
  {
632
- "epoch": 49.99,
633
- "learning_rate": 0.0001667765567765568,
634
- "loss": 0.7156,
635
- "step": 4550
636
- },
637
- {
638
- "epoch": 50.55,
639
- "learning_rate": 0.00016494505494505496,
640
- "loss": 0.7265,
641
  "step": 4600
642
  },
643
  {
644
- "epoch": 51.1,
645
- "learning_rate": 0.0001631135531135531,
646
- "loss": 0.7169,
647
- "step": 4650
648
- },
649
- {
650
- "epoch": 51.64,
651
- "learning_rate": 0.00016128205128205128,
652
- "loss": 0.7022,
653
  "step": 4700
654
  },
655
  {
656
- "epoch": 52.2,
657
- "learning_rate": 0.00015945054945054946,
658
- "loss": 0.7182,
659
- "step": 4750
660
- },
661
- {
662
- "epoch": 52.74,
663
- "learning_rate": 0.00015761904761904763,
664
- "loss": 0.7239,
665
  "step": 4800
666
  },
667
  {
668
- "epoch": 53.3,
669
- "learning_rate": 0.00015578754578754578,
670
- "loss": 0.702,
671
- "step": 4850
672
- },
673
- {
674
- "epoch": 53.84,
675
- "learning_rate": 0.00015395604395604395,
676
- "loss": 0.6951,
677
  "step": 4900
678
  },
679
  {
680
- "epoch": 54.39,
681
- "learning_rate": 0.00015212454212454213,
682
- "loss": 0.6972,
683
- "step": 4950
684
- },
685
- {
686
- "epoch": 54.94,
687
- "learning_rate": 0.0001502930402930403,
688
- "loss": 0.7028,
689
  "step": 5000
690
  },
691
  {
692
- "epoch": 54.94,
693
- "eval_loss": 0.15723808109760284,
694
- "eval_runtime": 179.3818,
695
- "eval_samples_per_second": 26.998,
696
- "eval_steps_per_second": 0.847,
697
- "eval_wer": 0.1286687482259438,
698
  "step": 5000
699
  },
700
  {
701
- "epoch": 55.49,
702
- "learning_rate": 0.00014846153846153845,
703
- "loss": 0.6829,
704
- "step": 5050
705
- },
706
- {
707
- "epoch": 56.04,
708
- "learning_rate": 0.00014663003663003662,
709
- "loss": 0.6864,
710
  "step": 5100
711
  },
712
  {
713
- "epoch": 56.59,
714
- "learning_rate": 0.0001447985347985348,
715
- "loss": 0.6854,
716
- "step": 5150
717
- },
718
- {
719
- "epoch": 57.14,
720
- "learning_rate": 0.00014296703296703297,
721
- "loss": 0.6759,
722
  "step": 5200
723
  },
724
  {
725
- "epoch": 57.69,
726
- "learning_rate": 0.00014113553113553112,
727
- "loss": 0.6775,
728
- "step": 5250
729
- },
730
- {
731
- "epoch": 58.24,
732
- "learning_rate": 0.0001393040293040293,
733
- "loss": 0.7114,
734
  "step": 5300
735
  },
736
  {
737
- "epoch": 58.79,
738
- "learning_rate": 0.00013747252747252746,
739
- "loss": 0.6791,
740
- "step": 5350
741
- },
742
- {
743
- "epoch": 59.34,
744
- "learning_rate": 0.00013564102564102566,
745
- "loss": 0.6862,
746
  "step": 5400
747
  },
748
  {
749
- "epoch": 59.89,
750
- "learning_rate": 0.00013380952380952384,
751
- "loss": 0.6716,
752
- "step": 5450
753
- },
754
- {
755
- "epoch": 60.44,
756
- "learning_rate": 0.00013197802197802198,
757
- "loss": 0.6912,
758
  "step": 5500
759
  },
760
  {
761
- "epoch": 60.44,
762
- "eval_loss": 0.15595602989196777,
763
- "eval_runtime": 177.0801,
764
- "eval_samples_per_second": 27.349,
765
- "eval_steps_per_second": 0.858,
766
- "eval_wer": 0.12489355662787398,
767
- "step": 5500
768
- },
769
- {
770
- "epoch": 60.98,
771
- "learning_rate": 0.00013014652014652016,
772
- "loss": 0.6743,
773
- "step": 5550
774
- },
775
- {
776
- "epoch": 61.54,
777
- "learning_rate": 0.00012831501831501833,
778
- "loss": 0.6683,
779
  "step": 5600
780
  },
781
  {
782
- "epoch": 62.09,
783
- "learning_rate": 0.0001264835164835165,
784
- "loss": 0.6654,
785
- "step": 5650
786
- },
787
- {
788
- "epoch": 62.63,
789
- "learning_rate": 0.00012465201465201465,
790
- "loss": 0.6583,
791
  "step": 5700
792
  },
793
  {
794
- "epoch": 63.19,
795
- "learning_rate": 0.00012282051282051283,
796
- "loss": 0.6634,
797
- "step": 5750
798
- },
799
- {
800
- "epoch": 63.73,
801
- "learning_rate": 0.000120989010989011,
802
- "loss": 0.6566,
803
  "step": 5800
804
  },
805
  {
806
- "epoch": 64.28,
807
- "learning_rate": 0.00011915750915750916,
808
- "loss": 0.67,
809
- "step": 5850
810
- },
811
- {
812
- "epoch": 64.83,
813
- "learning_rate": 0.00011732600732600734,
814
- "loss": 0.6524,
815
  "step": 5900
816
  },
817
  {
818
- "epoch": 65.38,
819
- "learning_rate": 0.0001154945054945055,
820
- "loss": 0.6673,
821
- "step": 5950
822
- },
823
- {
824
- "epoch": 65.93,
825
- "learning_rate": 0.00011366300366300367,
826
- "loss": 0.6492,
827
  "step": 6000
828
  },
829
  {
830
- "epoch": 65.93,
831
- "eval_loss": 0.15421651303768158,
832
- "eval_runtime": 182.1682,
833
- "eval_samples_per_second": 26.585,
834
- "eval_steps_per_second": 0.834,
835
- "eval_wer": 0.12600056769798468,
836
  "step": 6000
837
  },
838
  {
839
- "epoch": 66.48,
840
- "learning_rate": 0.00011183150183150183,
841
- "loss": 0.6548,
842
- "step": 6050
843
- },
844
- {
845
- "epoch": 67.03,
846
- "learning_rate": 0.00011,
847
- "loss": 0.6542,
848
  "step": 6100
849
  },
850
  {
851
- "epoch": 67.58,
852
- "learning_rate": 0.00010816849816849816,
853
- "loss": 0.6351,
854
- "step": 6150
855
- },
856
- {
857
- "epoch": 68.13,
858
- "learning_rate": 0.00010633699633699634,
859
- "loss": 0.6513,
860
  "step": 6200
861
  },
862
  {
863
- "epoch": 68.68,
864
- "learning_rate": 0.00010450549450549451,
865
- "loss": 0.6328,
866
- "step": 6250
867
- },
868
- {
869
- "epoch": 69.23,
870
- "learning_rate": 0.00010267399267399267,
871
- "loss": 0.6507,
872
  "step": 6300
873
  },
874
  {
875
- "epoch": 69.78,
876
- "learning_rate": 0.00010084249084249085,
877
- "loss": 0.6389,
878
- "step": 6350
879
- },
880
- {
881
- "epoch": 70.33,
882
- "learning_rate": 9.901098901098901e-05,
883
- "loss": 0.6525,
884
  "step": 6400
885
  },
886
  {
887
- "epoch": 70.87,
888
- "learning_rate": 9.717948717948718e-05,
889
- "loss": 0.6436,
890
- "step": 6450
891
- },
892
- {
893
- "epoch": 71.43,
894
- "learning_rate": 9.534798534798534e-05,
895
- "loss": 0.6407,
896
  "step": 6500
897
  },
898
  {
899
- "epoch": 71.43,
900
- "eval_loss": 0.16047754883766174,
901
- "eval_runtime": 178.7321,
902
- "eval_samples_per_second": 27.096,
903
- "eval_steps_per_second": 0.85,
904
- "eval_wer": 0.12398523985239852,
905
- "step": 6500
906
- },
907
- {
908
- "epoch": 71.97,
909
- "learning_rate": 9.351648351648353e-05,
910
- "loss": 0.6274,
911
- "step": 6550
912
- },
913
- {
914
- "epoch": 72.52,
915
- "learning_rate": 9.168498168498169e-05,
916
- "loss": 0.6338,
917
  "step": 6600
918
  },
919
  {
920
- "epoch": 73.08,
921
- "learning_rate": 8.989010989010989e-05,
922
- "loss": 0.6296,
923
- "step": 6650
924
- },
925
- {
926
- "epoch": 73.62,
927
- "learning_rate": 8.805860805860807e-05,
928
- "loss": 0.6202,
929
  "step": 6700
930
  },
931
  {
932
- "epoch": 74.17,
933
- "learning_rate": 8.622710622710623e-05,
934
- "loss": 0.6332,
935
- "step": 6750
936
- },
937
- {
938
- "epoch": 74.72,
939
- "learning_rate": 8.43956043956044e-05,
940
- "loss": 0.623,
941
  "step": 6800
942
  },
943
  {
944
- "epoch": 75.27,
945
- "learning_rate": 8.256410256410257e-05,
946
- "loss": 0.6406,
947
- "step": 6850
948
- },
949
- {
950
- "epoch": 75.82,
951
- "learning_rate": 8.073260073260073e-05,
952
- "loss": 0.6136,
953
  "step": 6900
954
  },
955
  {
956
- "epoch": 76.37,
957
- "learning_rate": 7.890109890109891e-05,
958
- "loss": 0.6313,
959
- "step": 6950
960
- },
961
- {
962
- "epoch": 76.92,
963
- "learning_rate": 7.706959706959707e-05,
964
- "loss": 0.6222,
965
  "step": 7000
966
  },
967
  {
968
- "epoch": 76.92,
969
- "eval_loss": 0.15765224397182465,
970
- "eval_runtime": 176.2542,
971
- "eval_samples_per_second": 27.477,
972
- "eval_steps_per_second": 0.862,
973
- "eval_wer": 0.12185637240987794,
974
  "step": 7000
975
  },
976
  {
977
- "epoch": 77.47,
978
- "learning_rate": 7.523809523809524e-05,
979
- "loss": 0.6149,
980
- "step": 7050
981
- },
982
- {
983
- "epoch": 78.02,
984
- "learning_rate": 7.34065934065934e-05,
985
- "loss": 0.6338,
986
  "step": 7100
987
  },
988
  {
989
- "epoch": 78.57,
990
- "learning_rate": 7.157509157509158e-05,
991
- "loss": 0.6231,
992
- "step": 7150
993
- },
994
- {
995
- "epoch": 79.12,
996
- "learning_rate": 6.974358974358974e-05,
997
- "loss": 0.6245,
998
  "step": 7200
999
  },
1000
  {
1001
- "epoch": 79.67,
1002
- "learning_rate": 6.791208791208791e-05,
1003
- "loss": 0.6022,
1004
- "step": 7250
1005
- },
1006
- {
1007
- "epoch": 80.22,
1008
- "learning_rate": 6.608058608058607e-05,
1009
- "loss": 0.6138,
1010
  "step": 7300
1011
  },
1012
  {
1013
- "epoch": 80.77,
1014
- "learning_rate": 6.424908424908426e-05,
1015
- "loss": 0.6163,
1016
- "step": 7350
1017
- },
1018
- {
1019
- "epoch": 81.32,
1020
- "learning_rate": 6.241758241758242e-05,
1021
- "loss": 0.6275,
1022
  "step": 7400
1023
  },
1024
  {
1025
- "epoch": 81.86,
1026
- "learning_rate": 6.0586080586080586e-05,
1027
- "loss": 0.6085,
1028
- "step": 7450
1029
- },
1030
- {
1031
- "epoch": 82.42,
1032
- "learning_rate": 5.8754578754578754e-05,
1033
- "loss": 0.6039,
1034
  "step": 7500
1035
  },
1036
  {
1037
- "epoch": 82.42,
1038
- "eval_loss": 0.16446340084075928,
1039
- "eval_runtime": 178.7505,
1040
- "eval_samples_per_second": 27.094,
1041
- "eval_steps_per_second": 0.85,
1042
- "eval_wer": 0.12489355662787398,
1043
- "step": 7500
1044
- },
1045
- {
1046
- "epoch": 82.96,
1047
- "learning_rate": 5.692307692307693e-05,
1048
- "loss": 0.6063,
1049
- "step": 7550
1050
- },
1051
- {
1052
- "epoch": 83.51,
1053
- "learning_rate": 5.5091575091575095e-05,
1054
- "loss": 0.6039,
1055
  "step": 7600
1056
  },
1057
  {
1058
- "epoch": 84.07,
1059
- "learning_rate": 5.326007326007326e-05,
1060
- "loss": 0.6204,
1061
- "step": 7650
1062
- },
1063
- {
1064
- "epoch": 84.61,
1065
- "learning_rate": 5.142857142857143e-05,
1066
- "loss": 0.5922,
1067
  "step": 7700
1068
  },
1069
  {
1070
- "epoch": 85.16,
1071
- "learning_rate": 4.9597069597069596e-05,
1072
- "loss": 0.6031,
1073
- "step": 7750
1074
- },
1075
- {
1076
- "epoch": 85.71,
1077
- "learning_rate": 4.776556776556776e-05,
1078
- "loss": 0.5984,
1079
  "step": 7800
1080
  },
1081
  {
1082
- "epoch": 86.26,
1083
- "learning_rate": 4.593406593406593e-05,
1084
- "loss": 0.6213,
1085
- "step": 7850
1086
- },
1087
- {
1088
- "epoch": 86.81,
1089
- "learning_rate": 4.41025641025641e-05,
1090
- "loss": 0.5927,
1091
  "step": 7900
1092
  },
1093
  {
1094
- "epoch": 87.36,
1095
- "learning_rate": 4.227106227106227e-05,
1096
- "loss": 0.598,
1097
- "step": 7950
1098
- },
1099
- {
1100
- "epoch": 87.91,
1101
- "learning_rate": 4.0439560439560445e-05,
1102
- "loss": 0.5928,
1103
  "step": 8000
1104
  },
1105
  {
1106
- "epoch": 87.91,
1107
- "eval_loss": 0.15899540483951569,
1108
- "eval_runtime": 178.7907,
1109
- "eval_samples_per_second": 27.088,
1110
- "eval_steps_per_second": 0.85,
1111
- "eval_wer": 0.12140221402214021,
1112
  "step": 8000
1113
  },
1114
  {
1115
- "epoch": 88.46,
1116
- "learning_rate": 3.860805860805861e-05,
1117
- "loss": 0.6021,
1118
- "step": 8050
1119
- },
1120
- {
1121
- "epoch": 89.01,
1122
- "learning_rate": 3.677655677655678e-05,
1123
- "loss": 0.5962,
1124
  "step": 8100
1125
  },
1126
  {
1127
- "epoch": 89.56,
1128
- "learning_rate": 3.494505494505495e-05,
1129
- "loss": 0.5798,
1130
- "step": 8150
1131
- },
1132
- {
1133
- "epoch": 90.11,
1134
- "learning_rate": 3.3113553113553114e-05,
1135
- "loss": 0.6024,
1136
  "step": 8200
1137
  },
1138
  {
1139
- "epoch": 90.66,
1140
- "learning_rate": 3.128205128205128e-05,
1141
- "loss": 0.5831,
1142
- "step": 8250
1143
- },
1144
- {
1145
- "epoch": 91.21,
1146
- "learning_rate": 2.945054945054945e-05,
1147
- "loss": 0.6001,
1148
  "step": 8300
1149
  },
1150
  {
1151
- "epoch": 91.75,
1152
- "learning_rate": 2.761904761904762e-05,
1153
- "loss": 0.5901,
1154
- "step": 8350
1155
- },
1156
- {
1157
- "epoch": 92.31,
1158
- "learning_rate": 2.578754578754579e-05,
1159
- "loss": 0.6078,
1160
  "step": 8400
1161
  },
1162
  {
1163
- "epoch": 92.85,
1164
- "learning_rate": 2.3956043956043956e-05,
1165
- "loss": 0.5853,
1166
- "step": 8450
1167
- },
1168
- {
1169
- "epoch": 93.4,
1170
- "learning_rate": 2.2124542124542124e-05,
1171
- "loss": 0.6022,
1172
- "step": 8500
1173
- },
1174
- {
1175
- "epoch": 93.4,
1176
- "eval_loss": 0.1596660017967224,
1177
- "eval_runtime": 178.6421,
1178
- "eval_samples_per_second": 27.11,
1179
- "eval_steps_per_second": 0.851,
1180
- "eval_wer": 0.1213170593244394,
1181
  "step": 8500
1182
  },
1183
  {
1184
- "epoch": 93.95,
1185
- "learning_rate": 2.029304029304029e-05,
1186
- "loss": 0.5721,
1187
- "step": 8550
1188
- },
1189
- {
1190
- "epoch": 94.5,
1191
- "learning_rate": 1.8461538461538465e-05,
1192
- "loss": 0.5839,
1193
  "step": 8600
1194
  },
1195
  {
1196
- "epoch": 95.05,
1197
- "learning_rate": 1.6630036630036632e-05,
1198
- "loss": 0.587,
1199
- "step": 8650
1200
  },
1201
  {
1202
- "epoch": 95.6,
1203
- "learning_rate": 1.4835164835164835e-05,
1204
- "loss": 0.5817,
1205
- "step": 8700
1206
  },
1207
  {
1208
- "epoch": 96.15,
1209
- "learning_rate": 1.3003663003663005e-05,
1210
- "loss": 0.5819,
1211
- "step": 8750
1212
  },
1213
  {
1214
- "epoch": 96.7,
1215
- "learning_rate": 1.1172161172161172e-05,
1216
- "loss": 0.5853,
1217
- "step": 8800
1218
  },
1219
  {
1220
- "epoch": 97.25,
1221
- "learning_rate": 9.340659340659341e-06,
1222
- "loss": 0.5778,
1223
- "step": 8850
 
 
 
1224
  },
1225
  {
1226
- "epoch": 97.8,
1227
- "learning_rate": 7.509157509157509e-06,
1228
- "loss": 0.6038,
1229
- "step": 8900
1230
  },
1231
  {
1232
- "epoch": 98.35,
1233
- "learning_rate": 5.677655677655678e-06,
1234
- "loss": 0.5756,
1235
- "step": 8950
1236
  },
1237
  {
1238
- "epoch": 98.9,
1239
- "learning_rate": 3.846153846153847e-06,
1240
- "loss": 0.5814,
1241
- "step": 9000
1242
  },
1243
  {
1244
- "epoch": 98.9,
1245
- "eval_loss": 0.1598692536354065,
1246
- "eval_runtime": 178.176,
1247
- "eval_samples_per_second": 27.181,
1248
- "eval_steps_per_second": 0.853,
1249
- "eval_wer": 0.11989781436275901,
1250
- "step": 9000
1251
  },
1252
  {
1253
- "epoch": 99.45,
1254
- "learning_rate": 2.0146520146520148e-06,
1255
- "loss": 0.5807,
1256
- "step": 9050
1257
  },
1258
  {
1259
- "epoch": 99.99,
1260
- "learning_rate": 1.8315018315018315e-07,
1261
- "loss": 0.5798,
1262
- "step": 9100
1263
  },
1264
  {
1265
- "epoch": 99.99,
1266
- "step": 9100,
1267
- "total_flos": 1.4065789113067918e+20,
1268
- "train_loss": 1.0890738963032818,
1269
- "train_runtime": 51321.8258,
1270
- "train_samples_per_second": 22.778,
1271
- "train_steps_per_second": 0.177
1272
  }
1273
  ],
1274
- "max_steps": 9100,
1275
  "num_train_epochs": 100,
1276
- "total_flos": 1.4065789113067918e+20,
1277
  "trial_name": null,
1278
  "trial_params": null
1279
  }
1
  {
2
  "best_metric": null,
3
  "best_model_checkpoint": null,
4
+ "epoch": 99.9974025974026,
5
+ "global_step": 9600,
6
  "is_hyper_param_search": false,
7
  "is_local_process_zero": true,
8
  "is_world_process_zero": true,
9
  "log_history": [
10
  {
11
+ "epoch": 1.04,
12
+ "learning_rate": 3.828125e-06,
13
+ "loss": 15.104,
 
 
 
 
 
 
14
  "step": 100
15
  },
16
  {
17
+ "epoch": 2.08,
18
+ "learning_rate": 7.6953125e-06,
19
+ "loss": 12.2149,
 
 
 
 
 
 
20
  "step": 200
21
  },
22
  {
23
+ "epoch": 3.12,
24
+ "learning_rate": 1.16015625e-05,
25
+ "loss": 4.9885,
 
 
 
 
 
 
26
  "step": 300
27
  },
28
  {
29
+ "epoch": 4.17,
30
+ "learning_rate": 1.55078125e-05,
31
+ "loss": 3.3788,
 
 
 
 
 
 
32
  "step": 400
33
  },
34
  {
35
+ "epoch": 5.21,
36
+ "learning_rate": 1.94140625e-05,
37
+ "loss": 3.1705,
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
38
  "step": 500
39
  },
40
  {
41
+ "epoch": 6.25,
42
+ "learning_rate": 2.3320312499999995e-05,
43
+ "loss": 3.11,
 
 
 
 
 
 
44
  "step": 600
45
  },
46
  {
47
+ "epoch": 7.29,
48
+ "learning_rate": 2.72265625e-05,
49
+ "loss": 3.0526,
 
 
 
 
 
 
50
  "step": 700
51
  },
52
  {
53
+ "epoch": 8.33,
54
+ "learning_rate": 3.11328125e-05,
55
+ "loss": 3.0113,
 
 
 
 
 
 
56
  "step": 800
57
  },
58
  {
59
+ "epoch": 9.37,
60
+ "learning_rate": 3.5039062499999995e-05,
61
+ "loss": 2.9601,
 
 
 
 
 
 
62
  "step": 900
63
  },
64
  {
65
+ "epoch": 10.42,
66
+ "learning_rate": 3.89453125e-05,
67
+ "loss": 2.9099,
 
 
 
 
 
 
68
  "step": 1000
69
  },
70
  {
71
+ "epoch": 10.42,
72
+ "eval_loss": 2.8369038105010986,
73
+ "eval_runtime": 198.5866,
74
+ "eval_samples_per_second": 25.405,
75
+ "eval_steps_per_second": 0.796,
76
+ "eval_wer": 1.0,
77
  "step": 1000
78
  },
79
  {
80
+ "epoch": 11.46,
81
+ "learning_rate": 4.28515625e-05,
82
+ "loss": 2.6653,
 
 
 
 
 
 
83
  "step": 1100
84
  },
85
  {
86
+ "epoch": 12.5,
87
+ "learning_rate": 4.675781249999999e-05,
88
+ "loss": 1.8958,
 
 
 
 
 
 
89
  "step": 1200
90
  },
91
  {
92
+ "epoch": 13.54,
93
+ "learning_rate": 5.0664062499999996e-05,
94
+ "loss": 1.4812,
 
 
 
 
 
 
95
  "step": 1300
96
  },
97
  {
98
+ "epoch": 14.58,
99
+ "learning_rate": 5.45703125e-05,
100
+ "loss": 1.3358,
 
 
 
 
 
 
101
  "step": 1400
102
  },
103
  {
104
+ "epoch": 15.62,
105
+ "learning_rate": 5.8476562499999996e-05,
106
+ "loss": 1.2522,
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
107
  "step": 1500
108
  },
109
  {
110
+ "epoch": 16.66,
111
+ "learning_rate": 6.238281249999999e-05,
112
+ "loss": 1.1919,
 
 
 
 
 
 
113
  "step": 1600
114
  },
115
  {
116
+ "epoch": 17.71,
117
+ "learning_rate": 6.62890625e-05,
118
+ "loss": 1.1663,
 
 
 
 
 
 
119
  "step": 1700
120
  },
121
  {
122
+ "epoch": 18.75,
123
+ "learning_rate": 7.019531249999999e-05,
124
+ "loss": 1.1289,
 
 
 
 
 
 
125
  "step": 1800
126
  },
127
  {
128
+ "epoch": 19.79,
129
+ "learning_rate": 7.410156249999999e-05,
130
+ "loss": 1.0902,
 
 
 
 
 
 
131
  "step": 1900
132
  },
133
  {
134
+ "epoch": 20.83,
135
+ "learning_rate": 7.4248046875e-05,
136
+ "loss": 1.0745,
 
 
 
 
 
 
137
  "step": 2000
138
  },
139
  {
140
+ "epoch": 20.83,
141
+ "eval_loss": 0.19572903215885162,
142
+ "eval_runtime": 202.06,
143
+ "eval_samples_per_second": 24.968,
144
+ "eval_steps_per_second": 0.782,
145
+ "eval_wer": 0.16725371193237237,
146
  "step": 2000
147
  },
148
  {
149
+ "epoch": 21.87,
150
+ "learning_rate": 7.327148437499999e-05,
151
+ "loss": 1.0485,
 
 
 
 
 
 
152
  "step": 2100
153
  },
154
  {
155
+ "epoch": 22.91,
156
+ "learning_rate": 7.2294921875e-05,
157
+ "loss": 1.0291,
 
 
 
 
 
 
158
  "step": 2200
159
  },
160
  {
161
+ "epoch": 23.96,
162
+ "learning_rate": 7.1318359375e-05,
163
+ "loss": 1.007,
 
 
 
 
 
 
164
  "step": 2300
165
  },
166
  {
167
+ "epoch": 25.0,
168
+ "learning_rate": 7.0341796875e-05,
169
+ "loss": 1.0008,
 
 
 
 
 
 
170
  "step": 2400
171
  },
172
  {
173
+ "epoch": 26.04,
174
+ "learning_rate": 6.9365234375e-05,
175
+ "loss": 0.988,
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
176
  "step": 2500
177
  },
178
  {
179
+ "epoch": 27.08,
180
+ "learning_rate": 6.8388671875e-05,
181
+ "loss": 0.9766,
 
 
 
 
 
 
182
  "step": 2600
183
  },
184
  {
185
+ "epoch": 28.12,
186
+ "learning_rate": 6.7412109375e-05,
187
+ "loss": 0.9663,
 
 
 
 
 
 
188
  "step": 2700
189
  },
190
  {
191
+ "epoch": 29.17,
192
+ "learning_rate": 6.6435546875e-05,
193
+ "loss": 0.9539,
 
 
 
 
 
 
194
  "step": 2800
195
  },
196
  {
197
+ "epoch": 30.21,
198
+ "learning_rate": 6.545898437499999e-05,
199
+ "loss": 0.9479,
 
 
 
 
 
 
200
  "step": 2900
201
  },
202
  {
203
+ "epoch": 31.25,
204
+ "learning_rate": 6.448242187499999e-05,
205
+ "loss": 0.934,
 
 
 
 
 
 
206
  "step": 3000
207
  },
208
  {
209
+ "epoch": 31.25,
210
+ "eval_loss": 0.1579357087612152,
211
+ "eval_runtime": 198.7733,
212
+ "eval_samples_per_second": 25.381,
213
+ "eval_steps_per_second": 0.795,
214
+ "eval_wer": 0.1388858784003468,
215
  "step": 3000
216
  },
217
  {
218
+ "epoch": 32.29,
219
+ "learning_rate": 6.3505859375e-05,
220
+ "loss": 0.9285,
 
 
 
 
 
 
221
  "step": 3100
222
  },
223
  {
224
+ "epoch": 33.33,
225
+ "learning_rate": 6.252929687499999e-05,
226
+ "loss": 0.9121,
 
 
 
 
 
 
227
  "step": 3200
228
  },
229
  {
230
+ "epoch": 34.37,
231
+ "learning_rate": 6.155273437499999e-05,
232
+ "loss": 0.9016,
 
 
 
 
 
 
233
  "step": 3300
234
  },
235
  {
236
+ "epoch": 35.42,
237
+ "learning_rate": 6.0576171875e-05,
238
+ "loss": 0.9023,
 
 
 
 
 
 
239
  "step": 3400
240
  },
241
  {
242
+ "epoch": 36.46,
243
+ "learning_rate": 5.9599609374999994e-05,
244
+ "loss": 0.9004,
 
 
 
 
 
 
245
  "step": 3500
246
  },
247
  {
248
+ "epoch": 37.5,
249
+ "learning_rate": 5.862304687499999e-05,
250
+ "loss": 0.8844,
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
251
  "step": 3600
252
  },
253
  {
254
+ "epoch": 38.54,
255
+ "learning_rate": 5.7646484375e-05,
256
+ "loss": 0.8771,
 
 
 
 
 
 
257
  "step": 3700
258
  },
259
  {
260
+ "epoch": 39.58,
261
+ "learning_rate": 5.6669921875e-05,
262
+ "loss": 0.876,
 
 
 
 
 
 
263
  "step": 3800
264
  },
265
  {
266
+ "epoch": 40.62,
267
+ "learning_rate": 5.569335937499999e-05,
268
+ "loss": 0.8708,
 
 
 
 
 
 
269
  "step": 3900
270
  },
271
  {
272
+ "epoch": 41.66,
273
+ "learning_rate": 5.4716796874999997e-05,
274
+ "loss": 0.8691,
 
 
 
 
 
 
275
  "step": 4000
276
  },
277
  {
278
+ "epoch": 41.66,
279
+ "eval_loss": 0.14571049809455872,
280
+ "eval_runtime": 195.237,
281
+ "eval_samples_per_second": 25.84,
282
+ "eval_steps_per_second": 0.809,
283
+ "eval_wer": 0.12899642353961202,
284
  "step": 4000
285
  },
286
  {
287
+ "epoch": 42.71,
288
+ "learning_rate": 5.3740234374999996e-05,
289
+ "loss": 0.8624,
 
 
 
 
 
 
290
  "step": 4100
291
  },
292
  {
293
+ "epoch": 43.75,
294
+ "learning_rate": 5.2763671874999995e-05,
295
+ "loss": 0.8556,
 
 
 
 
 
 
296
  "step": 4200
297
  },
298
  {
299
+ "epoch": 44.79,
300
+ "learning_rate": 5.1787109375e-05,
301
+ "loss": 0.8607,
 
 
 
 
 
 
302
  "step": 4300
303
  },
304
  {
305
+ "epoch": 45.83,
306
+ "learning_rate": 5.0810546875e-05,
307
+ "loss": 0.8536,
 
 
 
 
 
 
308
  "step": 4400
309
  },
310
  {
311
+ "epoch": 46.87,
312
+ "learning_rate": 4.983398437499999e-05,
313
+ "loss": 0.8493,
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
314
  "step": 4500
315
  },
316
  {
317
+ "epoch": 47.91,
318
+ "learning_rate": 4.8857421875e-05,
319
+ "loss": 0.8456,
 
 
 
 
 
 
320
  "step": 4600
321
  },
322
  {
323
+ "epoch": 48.96,
324
+ "learning_rate": 4.7880859375e-05,
325
+ "loss": 0.8333,
 
 
 
 
 
 
326
  "step": 4700
327
  },
328
  {
329
+ "epoch": 50.0,
330
+ "learning_rate": 4.6904296874999996e-05,
331
+ "loss": 0.8346,
 
 
 
 
 
 
332
  "step": 4800
333
  },
334
  {
335
+ "epoch": 51.04,
336
+ "learning_rate": 4.5927734375e-05,
337
+ "loss": 0.8403,
 
 
 
 
 
 
338
  "step": 4900
339
  },
340
  {
341
+ "epoch": 52.08,
342
+ "learning_rate": 4.4951171874999995e-05,
343
+ "loss": 0.8328,
 
 
 
 
 
 
344
  "step": 5000
345
  },
346
  {
347
+ "epoch": 52.08,
348
+ "eval_loss": 0.14348936080932617,
349
+ "eval_runtime": 197.7739,
350
+ "eval_samples_per_second": 25.509,
351
+ "eval_steps_per_second": 0.799,
352
+ "eval_wer": 0.12054297171344966,
353
  "step": 5000
354
  },
355
  {
356
+ "epoch": 53.12,
357
+ "learning_rate": 4.3974609374999994e-05,
358
+ "loss": 0.8275,
 
 
 
 
 
 
359
  "step": 5100
360
  },
361
  {
362
+ "epoch": 54.17,
363
+ "learning_rate": 4.2998046875e-05,
364
+ "loss": 0.8262,
 
 
 
 
 
 
365
  "step": 5200
366
  },
367
  {
368
+ "epoch": 55.21,
369
+ "learning_rate": 4.2021484375e-05,
370
+ "loss": 0.8167,
 
 
 
 
 
 
371
  "step": 5300
372
  },
373
  {
374
+ "epoch": 56.25,
375
+ "learning_rate": 4.1044921875e-05,
376
+ "loss": 0.8194,
 
 
 
 
 
 
377
  "step": 5400
378
  },
379
  {
380
+ "epoch": 57.29,
381
+ "learning_rate": 4.0068359375e-05,
382
+ "loss": 0.8192,
 
 
 
 
 
 
383
  "step": 5500
384
  },
385
  {
386
+ "epoch": 58.33,
387
+ "learning_rate": 3.9091796874999996e-05,
388
+ "loss": 0.8176,
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
389
  "step": 5600
390
  },
391
  {
392
+ "epoch": 59.37,
393
+ "learning_rate": 3.8115234374999995e-05,
394
+ "loss": 0.8115,
 
 
 
 
 
 
395
  "step": 5700
396
  },
397
  {
398
+ "epoch": 60.42,
399
+ "learning_rate": 3.7138671874999994e-05,
400
+ "loss": 0.8129,
 
 
 
 
 
 
401
  "step": 5800
402
  },
403
  {
404
+ "epoch": 61.46,
405
+ "learning_rate": 3.6162109375e-05,
406
+ "loss": 0.8052,
 
 
 
 
 
 
407
  "step": 5900
408
  },
409
  {
410
+ "epoch": 62.5,
411
+ "learning_rate": 3.518554687499999e-05,
412
+ "loss": 0.8068,
 
 
 
 
 
 
413
  "step": 6000
414
  },
415
  {
416
+ "epoch": 62.5,
417
+ "eval_loss": 0.13501569628715515,
418
+ "eval_runtime": 207.5786,
419
+ "eval_samples_per_second": 24.304,
420
+ "eval_steps_per_second": 0.761,
421
+ "eval_wer": 0.11913406307575593,
422
  "step": 6000
423
  },
424
  {
425
+ "epoch": 63.54,
426
+ "learning_rate": 3.4208984375e-05,
427
+ "loss": 0.8004,
 
 
 
 
 
 
428
  "step": 6100
429
  },
430
  {
431
+ "epoch": 64.58,
432
+ "learning_rate": 3.32421875e-05,
433
+ "loss": 0.7904,
 
 
 
 
 
 
434
  "step": 6200
435
  },
436
  {
437
+ "epoch": 65.62,
438
+ "learning_rate": 3.2265625e-05,
439
+ "loss": 0.7947,
 
 
 
 
 
 
440
  "step": 6300
441
  },
442
  {
443
+ "epoch": 66.66,
444
+ "learning_rate": 3.12890625e-05,
445
+ "loss": 0.7981,
 
 
 
 
 
 
446
  "step": 6400
447
  },
448
  {
449
+ "epoch": 67.71,
450
+ "learning_rate": 3.0312499999999998e-05,
451
+ "loss": 0.8018,
 
 
 
 
 
 
452
  "step": 6500
453
  },
454
  {
455
+ "epoch": 68.75,
456
+ "learning_rate": 2.93359375e-05,
457
+ "loss": 0.7922,
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
458
  "step": 6600
459
  },
460
  {
461
+ "epoch": 69.79,
462
+ "learning_rate": 2.8359374999999996e-05,
463
+ "loss": 0.796,
 
 
 
 
 
 
464
  "step": 6700
465
  },
466
  {
467
+ "epoch": 70.83,
468
+ "learning_rate": 2.73828125e-05,
469
+ "loss": 0.7887,
 
 
 
 
 
 
470
  "step": 6800
471
  },
472
  {
473
+ "epoch": 71.87,
474
+ "learning_rate": 2.6406249999999998e-05,
475
+ "loss": 0.7796,
 
 
 
 
 
 
476
  "step": 6900
477
  },
478
  {
479
+ "epoch": 72.91,
480
+ "learning_rate": 2.5429687499999997e-05,
481
+ "loss": 0.7822,
 
 
 
 
 
 
482
  "step": 7000
483
  },
484
  {
485
+ "epoch": 72.91,
486
+ "eval_loss": 0.1346774846315384,
487
+ "eval_runtime": 195.6804,
488
+ "eval_samples_per_second": 25.782,
489
+ "eval_steps_per_second": 0.807,
490
+ "eval_wer": 0.11550341389400672,
491
  "step": 7000
492
  },
493
  {
494
+ "epoch": 73.96,
495
+ "learning_rate": 2.4453125e-05,
496
+ "loss": 0.7882,
 
 
 
 
 
 
497
  "step": 7100
498
  },
499
  {
500
+ "epoch": 75.0,
501
+ "learning_rate": 2.34765625e-05,
502
+ "loss": 0.7821,
 
 
 
 
 
 
503
  "step": 7200
504
  },
505
  {
506
+ "epoch": 76.04,
507
+ "learning_rate": 2.2499999999999998e-05,
508
+ "loss": 0.7814,
 
 
 
 
 
 
509
  "step": 7300
510
  },
511
  {
512
+ "epoch": 77.08,
513
+ "learning_rate": 2.1523437499999997e-05,
514
+ "loss": 0.784,
 
 
 
 
 
 
515
  "step": 7400
516
  },
517
  {
518
+ "epoch": 78.12,
519
+ "learning_rate": 2.0546875e-05,
520
+ "loss": 0.7772,
 
 
 
 
 
 
521
  "step": 7500
522
  },
523
  {
524
+ "epoch": 79.17,
525
+ "learning_rate": 1.95703125e-05,
526
+ "loss": 0.7785,
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
527
  "step": 7600
528
  },
529
  {
530
+ "epoch": 80.21,
531
+ "learning_rate": 1.8593749999999998e-05,
532
+ "loss": 0.7731,
 
 
 
 
 
 
533
  "step": 7700
534
  },
535
  {
536
+ "epoch": 81.25,
537
+ "learning_rate": 1.7617187499999997e-05,
538
+ "loss": 0.7726,
 
 
 
 
 
 
539
  "step": 7800
540
  },
541
  {
542
+ "epoch": 82.29,
543
+ "learning_rate": 1.6640624999999996e-05,
544
+ "loss": 0.7765,
 
 
 
 
 
 
545
  "step": 7900
546
  },
547
  {
548
+ "epoch": 83.33,
549
+ "learning_rate": 1.56640625e-05,
550
+ "loss": 0.7769,
 
 
 
 
 
 
551
  "step": 8000
552
  },
553
  {
554
+ "epoch": 83.33,
555
+ "eval_loss": 0.1320926398038864,
556
+ "eval_runtime": 197.3103,
557
+ "eval_samples_per_second": 25.569,
558
+ "eval_steps_per_second": 0.801,
559
+ "eval_wer": 0.11306491817492142,
560
  "step": 8000
561
  },
562
  {
563
+ "epoch": 84.37,
564
+ "learning_rate": 1.4687499999999998e-05,
565
+ "loss": 0.7713,
 
 
 
 
 
 
566
  "step": 8100
567
  },
568
  {
569
+ "epoch": 85.42,
570
+ "learning_rate": 1.3710937499999999e-05,
571
+ "loss": 0.773,
 
 
 
 
 
 
572
  "step": 8200
573
  },
574
  {
575
+ "epoch": 86.46,
576
+ "learning_rate": 1.2734375e-05,
577
+ "loss": 0.7668,
 
 
 
 
 
 
578
  "step": 8300
579
  },
580
  {
581
+ "epoch": 87.5,
582
+ "learning_rate": 1.1757812499999999e-05,
583
+ "loss": 0.7766,
 
 
 
 
 
 
584
  "step": 8400
585
  },
586
  {
587
+ "epoch": 88.54,
588
+ "learning_rate": 1.0781249999999998e-05,
589
+ "loss": 0.7596,
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
590
  "step": 8500
591
  },
592
  {
593
+ "epoch": 89.58,
594
+ "learning_rate": 9.8046875e-06,
595
+ "loss": 0.7561,
 
 
 
 
 
 
596
  "step": 8600
597
  },
598
  {
599
+ "epoch": 90.62,
600
+ "learning_rate": 8.837890624999999e-06,
601
+ "loss": 0.7585,
602
+ "step": 8700
603
  },
604
  {
605
+ "epoch": 91.66,
606
+ "learning_rate": 7.861328124999998e-06,
607
+ "loss": 0.7524,
608
+ "step": 8800
609
  },
610
  {
611
+ "epoch": 92.71,
612
+ "learning_rate": 6.884765624999999e-06,
613
+ "loss": 0.7591,
614
+ "step": 8900
615
  },
616
  {
617
+ "epoch": 93.75,
618
+ "learning_rate": 5.908203125e-06,
619
+ "loss": 0.7678,
620
+ "step": 9000
621
  },
622
  {
623
+ "epoch": 93.75,
624
+ "eval_loss": 0.13205420970916748,
625
+ "eval_runtime": 199.4472,
626
+ "eval_samples_per_second": 25.295,
627
+ "eval_steps_per_second": 0.792,
628
+ "eval_wer": 0.11146634875907663,
629
+ "step": 9000
630
  },
631
  {
632
+ "epoch": 94.79,
633
+ "learning_rate": 4.931640624999999e-06,
634
+ "loss": 0.7609,
635
+ "step": 9100
636
  },
637
  {
638
+ "epoch": 95.83,
639
+ "learning_rate": 3.955078125e-06,
640
+ "loss": 0.7611,
641
+ "step": 9200
642
  },
643
  {
644
+ "epoch": 96.87,
645
+ "learning_rate": 2.9785156249999998e-06,
646
+ "loss": 0.7692,
647
+ "step": 9300
648
  },
649
  {
650
+ "epoch": 97.91,
651
+ "learning_rate": 2.0019531249999997e-06,
652
+ "loss": 0.7576,
653
+ "step": 9400
 
 
 
654
  },
655
  {
656
+ "epoch": 98.96,
657
+ "learning_rate": 1.025390625e-06,
658
+ "loss": 0.7552,
659
+ "step": 9500
660
  },
661
  {
662
+ "epoch": 100.0,
663
+ "learning_rate": 4.8828124999999996e-08,
664
+ "loss": 0.771,
665
+ "step": 9600
666
  },
667
  {
668
+ "epoch": 100.0,
669
+ "step": 9600,
670
+ "total_flos": 1.4910750173005185e+20,
671
+ "train_loss": 1.3699857131640116,
672
+ "train_runtime": 54887.9396,
673
+ "train_samples_per_second": 22.422,
674
+ "train_steps_per_second": 0.175
675
  }
676
  ],
677
+ "max_steps": 9600,
678
  "num_train_epochs": 100,
679
+ "total_flos": 1.4910750173005185e+20,
680
  "trial_name": null,
681
  "trial_params": null
682
  }