AndrewMcDowell commited on
Commit
4ae5ace
1 Parent(s): eb8fbe4

End of training

Browse files
Files changed (4) hide show
  1. all_results.json +10 -10
  2. eval_results.json +6 -6
  3. train_results.json +5 -5
  4. trainer_state.json +188 -656
all_results.json CHANGED
@@ -1,14 +1,14 @@
1
  {
2
- "epoch": 20.0,
3
- "eval_loss": 1.3742073774337769,
4
- "eval_runtime": 417.2357,
5
  "eval_samples": 10388,
6
- "eval_samples_per_second": 24.897,
7
- "eval_steps_per_second": 3.113,
8
- "eval_wer": 0.9387498381338217,
9
- "train_loss": 1.2195568717304786,
10
- "train_runtime": 26512.2441,
11
  "train_samples": 38209,
12
- "train_samples_per_second": 28.824,
13
- "train_steps_per_second": 0.45
14
  }
 
1
  {
2
+ "epoch": 10.0,
3
+ "eval_loss": 1.3699203729629517,
4
+ "eval_runtime": 441.7782,
5
  "eval_samples": 10388,
6
+ "eval_samples_per_second": 23.514,
7
+ "eval_steps_per_second": 2.94,
8
+ "eval_wer": 0.928593891632906,
9
+ "train_loss": 2.4458747799871756,
10
+ "train_runtime": 25997.2696,
11
  "train_samples": 38209,
12
+ "train_samples_per_second": 14.697,
13
+ "train_steps_per_second": 0.23
14
  }
eval_results.json CHANGED
@@ -1,9 +1,9 @@
1
  {
2
- "epoch": 20.0,
3
- "eval_loss": 1.3742073774337769,
4
- "eval_runtime": 417.2357,
5
  "eval_samples": 10388,
6
- "eval_samples_per_second": 24.897,
7
- "eval_steps_per_second": 3.113,
8
- "eval_wer": 0.9387498381338217
9
  }
 
1
  {
2
+ "epoch": 10.0,
3
+ "eval_loss": 1.3699203729629517,
4
+ "eval_runtime": 441.7782,
5
  "eval_samples": 10388,
6
+ "eval_samples_per_second": 23.514,
7
+ "eval_steps_per_second": 2.94,
8
+ "eval_wer": 0.928593891632906
9
  }
train_results.json CHANGED
@@ -1,8 +1,8 @@
1
  {
2
- "epoch": 20.0,
3
- "train_loss": 1.2195568717304786,
4
- "train_runtime": 26512.2441,
5
  "train_samples": 38209,
6
- "train_samples_per_second": 28.824,
7
- "train_steps_per_second": 0.45
8
  }
 
1
  {
2
+ "epoch": 10.0,
3
+ "train_loss": 2.4458747799871756,
4
+ "train_runtime": 25997.2696,
5
  "train_samples": 38209,
6
+ "train_samples_per_second": 14.697,
7
+ "train_steps_per_second": 0.23
8
  }
trainer_state.json CHANGED
@@ -1,946 +1,478 @@
1
  {
2
  "best_metric": null,
3
  "best_model_checkpoint": null,
4
- "epoch": 19.99916317991632,
5
- "global_step": 11940,
6
  "is_hyper_param_search": false,
7
  "is_local_process_zero": true,
8
  "is_world_process_zero": true,
9
  "log_history": [
10
  {
11
  "epoch": 0.17,
12
- "learning_rate": 0.000194,
13
- "loss": 4.6647,
14
  "step": 100
15
  },
16
  {
17
  "epoch": 0.33,
18
- "learning_rate": 0.00039400000000000004,
19
- "loss": 3.2495,
20
  "step": 200
21
  },
22
  {
23
  "epoch": 0.5,
24
- "learning_rate": 0.000594,
25
- "loss": 2.8044,
26
  "step": 300
27
  },
28
  {
29
  "epoch": 0.67,
30
- "learning_rate": 0.0007940000000000001,
31
- "loss": 2.6636,
32
  "step": 400
33
  },
34
  {
35
  "epoch": 0.84,
36
- "learning_rate": 0.000994,
37
- "loss": 2.6638,
38
  "step": 500
39
  },
40
  {
41
  "epoch": 0.84,
42
- "eval_loss": 2.3851921558380127,
43
- "eval_runtime": 415.3375,
44
- "eval_samples_per_second": 25.011,
45
- "eval_steps_per_second": 3.128,
46
- "eval_wer": 0.9974286401391124,
47
  "step": 500
48
  },
49
  {
50
  "epoch": 1.01,
51
- "learning_rate": 0.0009822669104204754,
52
- "loss": 2.6437,
53
  "step": 600
54
  },
55
  {
56
  "epoch": 1.17,
57
- "learning_rate": 0.0009639853747714809,
58
- "loss": 2.6394,
59
  "step": 700
60
  },
61
  {
62
  "epoch": 1.34,
63
- "learning_rate": 0.0009457038391224862,
64
- "loss": 2.6303,
65
  "step": 800
66
  },
67
  {
68
  "epoch": 1.51,
69
- "learning_rate": 0.0009274223034734918,
70
- "loss": 2.6351,
71
  "step": 900
72
  },
73
  {
74
  "epoch": 1.67,
75
- "learning_rate": 0.0009091407678244972,
76
- "loss": 2.6578,
77
  "step": 1000
78
  },
79
  {
80
  "epoch": 1.67,
81
- "eval_loss": 2.2796220779418945,
82
- "eval_runtime": 416.0072,
83
- "eval_samples_per_second": 24.971,
84
- "eval_steps_per_second": 3.123,
85
- "eval_wer": 0.9970586603030135,
86
  "step": 1000
87
  },
88
  {
89
  "epoch": 1.84,
90
- "learning_rate": 0.0008908592321755028,
91
- "loss": 2.6462,
92
  "step": 1100
93
  },
94
  {
95
  "epoch": 2.01,
96
- "learning_rate": 0.0008725776965265082,
97
- "loss": 2.6498,
98
  "step": 1200
99
  },
100
  {
101
  "epoch": 2.18,
102
- "learning_rate": 0.0008542961608775137,
103
- "loss": 2.614,
104
  "step": 1300
105
  },
106
  {
107
  "epoch": 2.34,
108
- "learning_rate": 0.0008360146252285192,
109
- "loss": 2.6209,
110
  "step": 1400
111
  },
112
  {
113
  "epoch": 2.51,
114
- "learning_rate": 0.0008177330895795247,
115
- "loss": 2.6016,
116
  "step": 1500
117
  },
118
  {
119
  "epoch": 2.51,
120
- "eval_loss": 2.004575252532959,
121
- "eval_runtime": 417.037,
122
- "eval_samples_per_second": 24.909,
123
- "eval_steps_per_second": 3.115,
124
- "eval_wer": 0.9960782137373513,
125
  "step": 1500
126
  },
127
  {
128
  "epoch": 2.68,
129
- "learning_rate": 0.0007994515539305302,
130
- "loss": 2.5937,
131
  "step": 1600
132
  },
133
  {
134
  "epoch": 2.85,
135
- "learning_rate": 0.0007811700182815357,
136
- "loss": 2.5909,
137
  "step": 1700
138
  },
139
  {
140
  "epoch": 3.02,
141
- "learning_rate": 0.0007628884826325412,
142
- "loss": 2.5913,
143
  "step": 1800
144
  },
145
  {
146
  "epoch": 3.18,
147
- "learning_rate": 0.0007446069469835466,
148
- "loss": 2.5828,
149
  "step": 1900
150
  },
151
  {
152
  "epoch": 3.35,
153
- "learning_rate": 0.0007263254113345521,
154
- "loss": 2.5752,
155
  "step": 2000
156
  },
157
  {
158
  "epoch": 3.35,
159
- "eval_loss": 1.960595726966858,
160
- "eval_runtime": 416.9162,
161
- "eval_samples_per_second": 24.916,
162
- "eval_steps_per_second": 3.116,
163
- "eval_wer": 0.9961152117209612,
164
  "step": 2000
165
  },
166
  {
167
  "epoch": 3.52,
168
- "learning_rate": 0.0007080438756855576,
169
- "loss": 2.5663,
170
  "step": 2100
171
  },
172
  {
173
  "epoch": 3.68,
174
- "learning_rate": 0.0006897623400365632,
175
- "loss": 2.5729,
176
  "step": 2200
177
  },
178
  {
179
  "epoch": 3.85,
180
- "learning_rate": 0.0006714808043875685,
181
- "loss": 2.5767,
182
  "step": 2300
183
  },
184
  {
185
  "epoch": 4.02,
186
- "learning_rate": 0.0006531992687385741,
187
- "loss": 2.5661,
188
  "step": 2400
189
  },
190
  {
191
  "epoch": 4.19,
192
- "learning_rate": 0.0006349177330895795,
193
- "loss": 2.539,
194
  "step": 2500
195
  },
196
  {
197
  "epoch": 4.19,
198
- "eval_loss": 1.8835679292678833,
199
- "eval_runtime": 417.6452,
200
- "eval_samples_per_second": 24.873,
201
- "eval_steps_per_second": 3.11,
202
- "eval_wer": 0.9939693286715874,
203
  "step": 2500
204
  },
205
  {
206
  "epoch": 4.35,
207
- "learning_rate": 0.000616636197440585,
208
- "loss": 2.5237,
209
  "step": 2600
210
  },
211
  {
212
  "epoch": 4.52,
213
- "learning_rate": 0.0005983546617915904,
214
- "loss": 2.5464,
215
  "step": 2700
216
  },
217
  {
218
  "epoch": 4.69,
219
- "learning_rate": 0.000580073126142596,
220
- "loss": 2.5135,
221
  "step": 2800
222
  },
223
  {
224
  "epoch": 4.86,
225
- "learning_rate": 0.0005617915904936015,
226
- "loss": 2.5058,
227
  "step": 2900
228
  },
229
  {
230
  "epoch": 5.03,
231
- "learning_rate": 0.0005435100548446069,
232
- "loss": 2.5214,
233
  "step": 3000
234
  },
235
  {
236
  "epoch": 5.03,
237
- "eval_loss": 1.859293818473816,
238
- "eval_runtime": 418.88,
239
- "eval_samples_per_second": 24.799,
240
- "eval_steps_per_second": 3.101,
241
- "eval_wer": 0.9933033649666093,
242
  "step": 3000
243
  },
244
  {
245
  "epoch": 5.19,
246
- "learning_rate": 0.0005252285191956125,
247
- "loss": 2.4984,
248
  "step": 3100
249
  },
250
  {
251
  "epoch": 5.36,
252
- "learning_rate": 0.0005069469835466179,
253
- "loss": 2.4812,
254
  "step": 3200
255
  },
256
  {
257
  "epoch": 5.53,
258
- "learning_rate": 0.0004886654478976234,
259
- "loss": 2.4626,
260
  "step": 3300
261
  },
262
  {
263
  "epoch": 5.69,
264
- "learning_rate": 0.0004703839122486289,
265
- "loss": 2.476,
266
  "step": 3400
267
  },
268
  {
269
  "epoch": 5.86,
270
- "learning_rate": 0.00045210237659963436,
271
- "loss": 2.4684,
272
  "step": 3500
273
  },
274
  {
275
  "epoch": 5.86,
276
- "eval_loss": 1.781636357307434,
277
- "eval_runtime": 415.6525,
278
- "eval_samples_per_second": 24.992,
279
- "eval_steps_per_second": 3.125,
280
- "eval_wer": 0.9884566291137133,
281
  "step": 3500
282
  },
283
  {
284
  "epoch": 6.03,
285
- "learning_rate": 0.0004338208409506398,
286
- "loss": 2.4739,
287
  "step": 3600
288
  },
289
  {
290
  "epoch": 6.2,
291
- "learning_rate": 0.0004155393053016453,
292
- "loss": 2.4494,
293
  "step": 3700
294
  },
295
  {
296
- "epoch": 6.36,
297
- "learning_rate": 0.00039725776965265084,
298
- "loss": 2.4263,
299
  "step": 3800
300
  },
301
  {
302
  "epoch": 6.53,
303
- "learning_rate": 0.00037897623400365635,
304
- "loss": 2.4187,
305
  "step": 3900
306
  },
307
  {
308
  "epoch": 6.7,
309
- "learning_rate": 0.0003606946983546618,
310
- "loss": 2.4134,
311
  "step": 4000
312
  },
313
  {
314
  "epoch": 6.7,
315
- "eval_loss": 1.7167690992355347,
316
- "eval_runtime": 416.8699,
317
- "eval_samples_per_second": 24.919,
318
- "eval_steps_per_second": 3.116,
319
- "eval_wer": 0.9808165454982704,
320
  "step": 4000
321
  },
322
  {
323
  "epoch": 6.87,
324
- "learning_rate": 0.0003424131627056673,
325
- "loss": 2.4008,
326
  "step": 4100
327
  },
328
  {
329
  "epoch": 7.04,
330
- "learning_rate": 0.00032413162705667277,
331
- "loss": 2.4048,
332
  "step": 4200
333
  },
334
  {
335
  "epoch": 7.2,
336
- "learning_rate": 0.0003058500914076783,
337
- "loss": 2.3795,
338
  "step": 4300
339
  },
340
  {
341
  "epoch": 7.37,
342
- "learning_rate": 0.00028756855575868374,
343
- "loss": 2.3803,
344
  "step": 4400
345
  },
346
  {
347
  "epoch": 7.54,
348
- "learning_rate": 0.0002692870201096892,
349
- "loss": 2.3732,
350
  "step": 4500
351
  },
352
  {
353
  "epoch": 7.54,
354
- "eval_loss": 1.6406092643737793,
355
- "eval_runtime": 415.1084,
356
- "eval_samples_per_second": 25.025,
357
- "eval_steps_per_second": 3.129,
358
- "eval_wer": 0.976432284440498,
359
  "step": 4500
360
  },
361
  {
362
  "epoch": 7.7,
363
- "learning_rate": 0.0002510054844606947,
364
- "loss": 2.3657,
365
  "step": 4600
366
  },
367
  {
368
  "epoch": 7.87,
369
- "learning_rate": 0.0002327239488117002,
370
- "loss": 2.3565,
371
  "step": 4700
372
  },
373
  {
374
  "epoch": 8.04,
375
- "learning_rate": 0.00021462522851919562,
376
- "loss": 2.3679,
377
  "step": 4800
378
  },
379
  {
380
  "epoch": 8.21,
381
- "learning_rate": 0.0001963436928702011,
382
- "loss": 2.34,
383
  "step": 4900
384
  },
385
  {
386
- "epoch": 8.37,
387
- "learning_rate": 0.00017806215722120658,
388
- "loss": 2.3371,
389
  "step": 5000
390
  },
391
  {
392
- "epoch": 8.37,
393
- "eval_loss": 1.6087424755096436,
394
- "eval_runtime": 417.7716,
395
- "eval_samples_per_second": 24.865,
396
- "eval_steps_per_second": 3.109,
397
- "eval_wer": 0.9739349205468302,
398
  "step": 5000
399
  },
400
  {
401
  "epoch": 8.54,
402
- "learning_rate": 0.00015978062157221207,
403
- "loss": 2.3216,
404
  "step": 5100
405
  },
406
  {
407
  "epoch": 8.71,
408
- "learning_rate": 0.00014149908592321755,
409
- "loss": 2.3004,
410
  "step": 5200
411
  },
412
  {
413
  "epoch": 8.88,
414
- "learning_rate": 0.00012321755027422303,
415
- "loss": 2.3028,
416
  "step": 5300
417
  },
418
  {
419
  "epoch": 9.05,
420
- "learning_rate": 0.00010493601462522852,
421
- "loss": 2.3099,
422
  "step": 5400
423
  },
424
  {
425
  "epoch": 9.21,
426
- "learning_rate": 8.6654478976234e-05,
427
- "loss": 2.2824,
428
  "step": 5500
429
  },
430
  {
431
  "epoch": 9.21,
432
- "eval_loss": 1.5476473569869995,
433
- "eval_runtime": 417.8751,
434
- "eval_samples_per_second": 24.859,
435
- "eval_steps_per_second": 3.109,
436
- "eval_wer": 0.9695691584808628,
437
  "step": 5500
438
  },
439
  {
440
  "epoch": 9.38,
441
- "learning_rate": 0.0005545454545454546,
442
- "loss": 2.3577,
443
  "step": 5600
444
  },
445
  {
446
  "epoch": 9.55,
447
- "learning_rate": 0.0005458041958041959,
448
- "loss": 2.3723,
449
  "step": 5700
450
  },
451
  {
452
  "epoch": 9.71,
453
- "learning_rate": 0.000537062937062937,
454
- "loss": 2.3758,
455
  "step": 5800
456
  },
457
  {
458
  "epoch": 9.88,
459
- "learning_rate": 0.0005283216783216783,
460
- "loss": 2.3833,
461
  "step": 5900
462
  },
463
  {
464
- "epoch": 10.05,
465
- "learning_rate": 0.0005195804195804196,
466
- "loss": 2.3771,
467
- "step": 6000
468
- },
469
- {
470
- "epoch": 10.05,
471
- "eval_loss": 1.6468309164047241,
472
- "eval_runtime": 414.4359,
473
- "eval_samples_per_second": 25.065,
474
- "eval_steps_per_second": 3.134,
475
- "eval_wer": 0.9773017370553305,
476
- "step": 6000
477
- },
478
- {
479
- "epoch": 10.22,
480
- "learning_rate": 0.0005108391608391608,
481
- "loss": 2.3673,
482
- "step": 6100
483
- },
484
- {
485
- "epoch": 10.38,
486
- "learning_rate": 0.0005020979020979021,
487
- "loss": 2.3555,
488
- "step": 6200
489
- },
490
- {
491
- "epoch": 10.55,
492
- "learning_rate": 0.0004933566433566434,
493
- "loss": 2.3645,
494
- "step": 6300
495
- },
496
- {
497
- "epoch": 10.72,
498
- "learning_rate": 0.0004846153846153846,
499
- "loss": 2.3617,
500
- "step": 6400
501
- },
502
- {
503
- "epoch": 10.89,
504
- "learning_rate": 0.0004758741258741259,
505
- "loss": 2.3499,
506
- "step": 6500
507
- },
508
- {
509
- "epoch": 10.89,
510
- "eval_loss": 1.6116454601287842,
511
- "eval_runtime": 413.4109,
512
- "eval_samples_per_second": 25.128,
513
- "eval_steps_per_second": 3.142,
514
- "eval_wer": 0.9737314316369757,
515
- "step": 6500
516
- },
517
- {
518
- "epoch": 11.06,
519
- "learning_rate": 0.0004671328671328671,
520
- "loss": 2.3634,
521
- "step": 6600
522
- },
523
- {
524
- "epoch": 11.22,
525
- "learning_rate": 0.0004583916083916084,
526
- "loss": 2.3573,
527
- "step": 6700
528
- },
529
- {
530
- "epoch": 11.39,
531
- "learning_rate": 0.0004496503496503497,
532
- "loss": 2.355,
533
- "step": 6800
534
- },
535
- {
536
- "epoch": 11.56,
537
- "learning_rate": 0.00044090909090909093,
538
- "loss": 2.3543,
539
- "step": 6900
540
- },
541
- {
542
- "epoch": 11.72,
543
- "learning_rate": 0.00043216783216783216,
544
- "loss": 2.3283,
545
- "step": 7000
546
- },
547
- {
548
- "epoch": 11.72,
549
- "eval_loss": 1.6059322357177734,
550
- "eval_runtime": 409.6633,
551
- "eval_samples_per_second": 25.357,
552
- "eval_steps_per_second": 3.171,
553
- "eval_wer": 0.9743973953419539,
554
- "step": 7000
555
- },
556
- {
557
- "epoch": 11.89,
558
- "learning_rate": 0.00042342657342657344,
559
- "loss": 2.3277,
560
- "step": 7100
561
- },
562
- {
563
- "epoch": 12.06,
564
- "learning_rate": 0.0004146853146853147,
565
- "loss": 2.3361,
566
- "step": 7200
567
- },
568
- {
569
- "epoch": 12.23,
570
- "learning_rate": 0.00040594405594405596,
571
- "loss": 2.3199,
572
- "step": 7300
573
- },
574
- {
575
- "epoch": 12.39,
576
- "learning_rate": 0.00039720279720279725,
577
- "loss": 2.3216,
578
- "step": 7400
579
- },
580
- {
581
- "epoch": 12.56,
582
- "learning_rate": 0.0003884615384615385,
583
- "loss": 2.3153,
584
- "step": 7500
585
- },
586
- {
587
- "epoch": 12.56,
588
- "eval_loss": 1.5888867378234863,
589
- "eval_runtime": 416.4655,
590
- "eval_samples_per_second": 24.943,
591
- "eval_steps_per_second": 3.119,
592
- "eval_wer": 0.9758218177109348,
593
- "step": 7500
594
- },
595
- {
596
- "epoch": 12.73,
597
- "learning_rate": 0.0003797202797202797,
598
- "loss": 2.3192,
599
- "step": 7600
600
- },
601
- {
602
- "epoch": 12.9,
603
- "learning_rate": 0.000370979020979021,
604
- "loss": 2.3053,
605
- "step": 7700
606
- },
607
- {
608
- "epoch": 13.07,
609
- "learning_rate": 0.00036223776223776223,
610
- "loss": 2.3185,
611
- "step": 7800
612
- },
613
- {
614
- "epoch": 13.23,
615
- "learning_rate": 0.0003534965034965035,
616
- "loss": 2.3101,
617
- "step": 7900
618
- },
619
- {
620
- "epoch": 13.4,
621
- "learning_rate": 0.0003447552447552448,
622
- "loss": 2.3016,
623
- "step": 8000
624
- },
625
- {
626
- "epoch": 13.4,
627
- "eval_loss": 1.5663487911224365,
628
- "eval_runtime": 415.4876,
629
- "eval_samples_per_second": 25.002,
630
- "eval_steps_per_second": 3.126,
631
- "eval_wer": 0.9727509850713136,
632
- "step": 8000
633
- },
634
- {
635
- "epoch": 13.57,
636
- "learning_rate": 0.00033601398601398603,
637
- "loss": 2.2916,
638
- "step": 8100
639
- },
640
- {
641
- "epoch": 13.73,
642
- "learning_rate": 0.00032727272727272726,
643
- "loss": 2.2904,
644
- "step": 8200
645
- },
646
- {
647
- "epoch": 13.9,
648
- "learning_rate": 0.00031853146853146855,
649
- "loss": 2.2708,
650
- "step": 8300
651
- },
652
- {
653
- "epoch": 14.07,
654
- "learning_rate": 0.0003097902097902098,
655
- "loss": 2.2876,
656
- "step": 8400
657
- },
658
- {
659
- "epoch": 14.24,
660
- "learning_rate": 0.00030104895104895107,
661
- "loss": 2.2731,
662
- "step": 8500
663
- },
664
- {
665
- "epoch": 14.24,
666
- "eval_loss": 1.567448377609253,
667
- "eval_runtime": 407.9046,
668
- "eval_samples_per_second": 25.467,
669
- "eval_steps_per_second": 3.185,
670
- "eval_wer": 0.9626135375622029,
671
- "step": 8500
672
- },
673
- {
674
- "epoch": 14.41,
675
- "learning_rate": 0.00029230769230769235,
676
- "loss": 2.2482,
677
- "step": 8600
678
- },
679
- {
680
- "epoch": 14.57,
681
- "learning_rate": 0.0002835664335664336,
682
- "loss": 2.2559,
683
- "step": 8700
684
- },
685
- {
686
- "epoch": 14.74,
687
- "learning_rate": 0.0002748251748251748,
688
- "loss": 2.2726,
689
- "step": 8800
690
- },
691
- {
692
- "epoch": 14.91,
693
- "learning_rate": 0.0002660839160839161,
694
- "loss": 2.2508,
695
- "step": 8900
696
- },
697
- {
698
- "epoch": 15.08,
699
- "learning_rate": 0.00025743006993006993,
700
- "loss": 2.2617,
701
- "step": 9000
702
- },
703
- {
704
- "epoch": 15.08,
705
- "eval_loss": 1.5032401084899902,
706
- "eval_runtime": 409.6274,
707
- "eval_samples_per_second": 25.36,
708
- "eval_steps_per_second": 3.171,
709
- "eval_wer": 0.9583402704552602,
710
- "step": 9000
711
- },
712
- {
713
- "epoch": 15.24,
714
- "learning_rate": 0.0002486888111888112,
715
- "loss": 2.2396,
716
- "step": 9100
717
- },
718
- {
719
- "epoch": 15.41,
720
- "learning_rate": 0.00023994755244755245,
721
- "loss": 2.2448,
722
- "step": 9200
723
- },
724
- {
725
- "epoch": 15.58,
726
- "learning_rate": 0.0002312062937062937,
727
- "loss": 2.225,
728
- "step": 9300
729
- },
730
- {
731
- "epoch": 15.74,
732
- "learning_rate": 0.00022246503496503497,
733
- "loss": 2.2319,
734
- "step": 9400
735
- },
736
- {
737
- "epoch": 15.91,
738
- "learning_rate": 0.00021372377622377623,
739
- "loss": 2.2252,
740
- "step": 9500
741
- },
742
- {
743
- "epoch": 15.91,
744
- "eval_loss": 1.466213345527649,
745
- "eval_runtime": 409.5049,
746
- "eval_samples_per_second": 25.367,
747
- "eval_steps_per_second": 3.172,
748
- "eval_wer": 0.9516436354218695,
749
- "step": 9500
750
- },
751
- {
752
- "epoch": 16.08,
753
- "learning_rate": 0.00020498251748251749,
754
- "loss": 2.2324,
755
- "step": 9600
756
- },
757
- {
758
- "epoch": 16.25,
759
- "learning_rate": 0.00019624125874125875,
760
- "loss": 2.2197,
761
- "step": 9700
762
- },
763
- {
764
- "epoch": 16.42,
765
- "learning_rate": 0.0001875,
766
- "loss": 2.2061,
767
- "step": 9800
768
- },
769
- {
770
- "epoch": 16.58,
771
- "learning_rate": 0.00017875874125874126,
772
- "loss": 2.2062,
773
- "step": 9900
774
- },
775
- {
776
- "epoch": 16.75,
777
- "learning_rate": 0.00017001748251748252,
778
- "loss": 2.2048,
779
- "step": 10000
780
- },
781
- {
782
- "epoch": 16.75,
783
- "eval_loss": 1.4410929679870605,
784
- "eval_runtime": 408.38,
785
- "eval_samples_per_second": 25.437,
786
- "eval_steps_per_second": 3.181,
787
- "eval_wer": 0.9561018924468616,
788
- "step": 10000
789
- },
790
- {
791
- "epoch": 16.92,
792
- "learning_rate": 0.00016127622377622378,
793
- "loss": 2.1942,
794
- "step": 10100
795
- },
796
- {
797
- "epoch": 17.09,
798
- "learning_rate": 0.00015253496503496504,
799
- "loss": 2.2158,
800
- "step": 10200
801
- },
802
- {
803
- "epoch": 17.25,
804
- "learning_rate": 0.0001437937062937063,
805
- "loss": 2.1851,
806
- "step": 10300
807
- },
808
- {
809
- "epoch": 17.42,
810
- "learning_rate": 0.00013505244755244756,
811
- "loss": 2.1798,
812
- "step": 10400
813
- },
814
- {
815
- "epoch": 17.59,
816
- "learning_rate": 0.00012631118881118882,
817
- "loss": 2.1731,
818
- "step": 10500
819
- },
820
- {
821
- "epoch": 17.59,
822
- "eval_loss": 1.422843337059021,
823
- "eval_runtime": 412.7138,
824
- "eval_samples_per_second": 25.17,
825
- "eval_steps_per_second": 3.147,
826
- "eval_wer": 0.9521061102169932,
827
- "step": 10500
828
- },
829
- {
830
- "epoch": 17.75,
831
- "learning_rate": 0.00011756993006993007,
832
- "loss": 2.1736,
833
- "step": 10600
834
- },
835
- {
836
- "epoch": 17.92,
837
- "learning_rate": 0.00010882867132867133,
838
- "loss": 2.182,
839
- "step": 10700
840
- },
841
- {
842
- "epoch": 18.09,
843
- "learning_rate": 0.00010008741258741259,
844
- "loss": 2.1741,
845
- "step": 10800
846
- },
847
- {
848
- "epoch": 18.26,
849
- "learning_rate": 9.134615384615384e-05,
850
- "loss": 2.1636,
851
- "step": 10900
852
- },
853
- {
854
- "epoch": 18.43,
855
- "learning_rate": 8.260489510489511e-05,
856
- "loss": 2.1732,
857
- "step": 11000
858
- },
859
- {
860
- "epoch": 18.43,
861
- "eval_loss": 1.4052633047103882,
862
- "eval_runtime": 410.158,
863
- "eval_samples_per_second": 25.327,
864
- "eval_steps_per_second": 3.167,
865
- "eval_wer": 0.9428566143145198,
866
- "step": 11000
867
- },
868
- {
869
- "epoch": 18.59,
870
- "learning_rate": 7.386363636363637e-05,
871
- "loss": 2.1666,
872
- "step": 11100
873
- },
874
- {
875
- "epoch": 18.76,
876
- "learning_rate": 6.512237762237761e-05,
877
- "loss": 2.1612,
878
- "step": 11200
879
- },
880
- {
881
- "epoch": 18.93,
882
- "learning_rate": 5.638111888111888e-05,
883
- "loss": 2.1616,
884
- "step": 11300
885
- },
886
- {
887
- "epoch": 19.1,
888
- "learning_rate": 4.763986013986014e-05,
889
- "loss": 2.1752,
890
- "step": 11400
891
- },
892
- {
893
- "epoch": 19.26,
894
- "learning_rate": 3.88986013986014e-05,
895
- "loss": 2.1502,
896
- "step": 11500
897
- },
898
- {
899
- "epoch": 19.26,
900
- "eval_loss": 1.3827834129333496,
901
- "eval_runtime": 410.5205,
902
- "eval_samples_per_second": 25.304,
903
- "eval_steps_per_second": 3.164,
904
- "eval_wer": 0.9399522726011432,
905
- "step": 11500
906
- },
907
- {
908
- "epoch": 19.43,
909
- "learning_rate": 3.0157342657342658e-05,
910
- "loss": 2.1506,
911
- "step": 11600
912
- },
913
- {
914
- "epoch": 19.6,
915
- "learning_rate": 2.1416083916083917e-05,
916
- "loss": 2.1489,
917
- "step": 11700
918
- },
919
- {
920
- "epoch": 19.76,
921
- "learning_rate": 1.2674825174825174e-05,
922
- "loss": 2.1472,
923
- "step": 11800
924
- },
925
- {
926
- "epoch": 19.93,
927
- "learning_rate": 3.933566433566434e-06,
928
- "loss": 2.1453,
929
- "step": 11900
930
- },
931
- {
932
- "epoch": 20.0,
933
- "step": 11940,
934
- "total_flos": 2.027398301943103e+20,
935
- "train_loss": 1.2195568717304786,
936
- "train_runtime": 26512.2441,
937
- "train_samples_per_second": 28.824,
938
- "train_steps_per_second": 0.45
939
  }
940
  ],
941
- "max_steps": 11940,
942
- "num_train_epochs": 20,
943
- "total_flos": 2.027398301943103e+20,
944
  "trial_name": null,
945
  "trial_params": null
946
  }
 
1
  {
2
  "best_metric": null,
3
  "best_model_checkpoint": null,
4
+ "epoch": 9.999581414817916,
5
+ "global_step": 5970,
6
  "is_hyper_param_search": false,
7
  "is_local_process_zero": true,
8
  "is_world_process_zero": true,
9
  "log_history": [
10
  {
11
  "epoch": 0.17,
12
+ "learning_rate": 4.9000000000000005e-05,
13
+ "loss": 5.3488,
14
  "step": 100
15
  },
16
  {
17
  "epoch": 0.33,
18
+ "learning_rate": 9.900000000000001e-05,
19
+ "loss": 3.2572,
20
  "step": 200
21
  },
22
  {
23
  "epoch": 0.5,
24
+ "learning_rate": 0.000149,
25
+ "loss": 2.4392,
26
  "step": 300
27
  },
28
  {
29
  "epoch": 0.67,
30
+ "learning_rate": 0.000199,
31
+ "loss": 2.2566,
32
  "step": 400
33
  },
34
  {
35
  "epoch": 0.84,
36
+ "learning_rate": 0.000249,
37
+ "loss": 2.2416,
38
  "step": 500
39
  },
40
  {
41
  "epoch": 0.84,
42
+ "eval_loss": 1.2866647243499756,
43
+ "eval_runtime": 434.4225,
44
+ "eval_samples_per_second": 23.912,
45
+ "eval_steps_per_second": 2.99,
46
+ "eval_wer": 0.8874521338587047,
47
  "step": 500
48
  },
49
  {
50
  "epoch": 1.01,
51
+ "learning_rate": 0.000299,
52
+ "loss": 2.2596,
53
  "step": 600
54
  },
55
  {
56
  "epoch": 1.17,
57
+ "learning_rate": 0.00034899999999999997,
58
+ "loss": 2.2575,
59
  "step": 700
60
  },
61
  {
62
  "epoch": 1.34,
63
+ "learning_rate": 0.00039900000000000005,
64
+ "loss": 2.2978,
65
  "step": 800
66
  },
67
  {
68
  "epoch": 1.51,
69
+ "learning_rate": 0.000449,
70
+ "loss": 2.2998,
71
  "step": 900
72
  },
73
  {
74
  "epoch": 1.67,
75
+ "learning_rate": 0.000499,
76
+ "loss": 2.3089,
77
  "step": 1000
78
  },
79
  {
80
  "epoch": 1.67,
81
+ "eval_loss": 1.8336485624313354,
82
+ "eval_runtime": 430.7741,
83
+ "eval_samples_per_second": 24.115,
84
+ "eval_steps_per_second": 3.016,
85
+ "eval_wer": 0.9547514660451005,
86
  "step": 1000
87
  },
88
  {
89
  "epoch": 1.84,
90
+ "learning_rate": 0.000549,
91
+ "loss": 2.3156,
92
  "step": 1100
93
  },
94
  {
95
  "epoch": 2.01,
96
+ "learning_rate": 0.000599,
97
+ "loss": 2.3298,
98
  "step": 1200
99
  },
100
  {
101
  "epoch": 2.18,
102
+ "learning_rate": 0.0006490000000000001,
103
+ "loss": 2.3174,
104
  "step": 1300
105
  },
106
  {
107
  "epoch": 2.34,
108
+ "learning_rate": 0.000699,
109
+ "loss": 2.349,
110
  "step": 1400
111
  },
112
  {
113
  "epoch": 2.51,
114
+ "learning_rate": 0.000749,
115
+ "loss": 2.3614,
116
  "step": 1500
117
  },
118
  {
119
  "epoch": 2.51,
120
+ "eval_loss": 1.5936506986618042,
121
+ "eval_runtime": 430.1389,
122
+ "eval_samples_per_second": 24.15,
123
+ "eval_steps_per_second": 3.02,
124
+ "eval_wer": 0.9468893945279983,
125
  "step": 1500
126
  },
127
  {
128
  "epoch": 2.68,
129
+ "learning_rate": 0.000799,
130
+ "loss": 2.3597,
131
  "step": 1600
132
  },
133
  {
134
  "epoch": 2.85,
135
+ "learning_rate": 0.000849,
136
+ "loss": 2.397,
137
  "step": 1700
138
  },
139
  {
140
  "epoch": 3.02,
141
+ "learning_rate": 0.0008990000000000001,
142
+ "loss": 2.4454,
143
  "step": 1800
144
  },
145
  {
146
  "epoch": 3.18,
147
+ "learning_rate": 0.000949,
148
+ "loss": 2.4806,
149
  "step": 1900
150
  },
151
  {
152
  "epoch": 3.35,
153
+ "learning_rate": 0.000999,
154
+ "loss": 2.5234,
155
  "step": 2000
156
  },
157
  {
158
  "epoch": 3.35,
159
+ "eval_loss": 1.9764641523361206,
160
+ "eval_runtime": 434.6883,
161
+ "eval_samples_per_second": 23.898,
162
+ "eval_steps_per_second": 2.988,
163
+ "eval_wer": 0.9866992248922434,
164
  "step": 2000
165
  },
166
  {
167
  "epoch": 3.52,
168
+ "learning_rate": 0.0009753148614609572,
169
+ "loss": 2.5559,
170
  "step": 2100
171
  },
172
  {
173
  "epoch": 3.68,
174
+ "learning_rate": 0.0009501259445843828,
175
+ "loss": 2.5543,
176
  "step": 2200
177
  },
178
  {
179
  "epoch": 3.85,
180
+ "learning_rate": 0.0009249370277078086,
181
+ "loss": 2.5819,
182
  "step": 2300
183
  },
184
  {
185
  "epoch": 4.02,
186
+ "learning_rate": 0.0008997481108312343,
187
+ "loss": 2.5837,
188
  "step": 2400
189
  },
190
  {
191
  "epoch": 4.19,
192
+ "learning_rate": 0.00087455919395466,
193
+ "loss": 2.5373,
194
  "step": 2500
195
  },
196
  {
197
  "epoch": 4.19,
198
+ "eval_loss": 1.9062319993972778,
199
+ "eval_runtime": 430.3022,
200
+ "eval_samples_per_second": 24.141,
201
+ "eval_steps_per_second": 3.019,
202
+ "eval_wer": 0.9916014577205542,
203
  "step": 2500
204
  },
205
  {
206
  "epoch": 4.35,
207
+ "learning_rate": 0.0008493702770780856,
208
+ "loss": 2.5617,
209
  "step": 2600
210
  },
211
  {
212
  "epoch": 4.52,
213
+ "learning_rate": 0.0008241813602015113,
214
+ "loss": 2.5553,
215
  "step": 2700
216
  },
217
  {
218
  "epoch": 4.69,
219
+ "learning_rate": 0.0007989924433249371,
220
+ "loss": 2.549,
221
  "step": 2800
222
  },
223
  {
224
  "epoch": 4.86,
225
+ "learning_rate": 0.0007738035264483628,
226
+ "loss": 2.5636,
227
  "step": 2900
228
  },
229
  {
230
  "epoch": 5.03,
231
+ "learning_rate": 0.0007486146095717884,
232
+ "loss": 2.5703,
233
  "step": 3000
234
  },
235
  {
236
  "epoch": 5.03,
237
+ "eval_loss": 1.977164387702942,
238
+ "eval_runtime": 431.1212,
239
+ "eval_samples_per_second": 24.095,
240
+ "eval_steps_per_second": 3.013,
241
+ "eval_wer": 0.9914719647779197,
242
  "step": 3000
243
  },
244
  {
245
  "epoch": 5.19,
246
+ "learning_rate": 0.0007234256926952141,
247
+ "loss": 2.5526,
248
  "step": 3100
249
  },
250
  {
251
  "epoch": 5.36,
252
+ "learning_rate": 0.0006982367758186398,
253
+ "loss": 2.5277,
254
  "step": 3200
255
  },
256
  {
257
  "epoch": 5.53,
258
+ "learning_rate": 0.0006730478589420656,
259
+ "loss": 2.4969,
260
  "step": 3300
261
  },
262
  {
263
  "epoch": 5.69,
264
+ "learning_rate": 0.0006478589420654912,
265
+ "loss": 2.4849,
266
  "step": 3400
267
  },
268
  {
269
  "epoch": 5.86,
270
+ "learning_rate": 0.0006226700251889169,
271
+ "loss": 2.4656,
272
  "step": 3500
273
  },
274
  {
275
  "epoch": 5.86,
276
+ "eval_loss": 1.8083465099334717,
277
+ "eval_runtime": 432.7157,
278
+ "eval_samples_per_second": 24.007,
279
+ "eval_steps_per_second": 3.002,
280
+ "eval_wer": 0.9829069315722293,
281
  "step": 3500
282
  },
283
  {
284
  "epoch": 6.03,
285
+ "learning_rate": 0.0005974811083123426,
286
+ "loss": 2.4858,
287
  "step": 3600
288
  },
289
  {
290
  "epoch": 6.2,
291
+ "learning_rate": 0.0005722921914357682,
292
+ "loss": 2.4552,
293
  "step": 3700
294
  },
295
  {
296
+ "epoch": 6.37,
297
+ "learning_rate": 0.0005471032745591939,
298
+ "loss": 2.4302,
299
  "step": 3800
300
  },
301
  {
302
  "epoch": 6.53,
303
+ "learning_rate": 0.0005219143576826196,
304
+ "loss": 2.4397,
305
  "step": 3900
306
  },
307
  {
308
  "epoch": 6.7,
309
+ "learning_rate": 0.0004967254408060454,
310
+ "loss": 2.4339,
311
  "step": 4000
312
  },
313
  {
314
  "epoch": 6.7,
315
+ "eval_loss": 1.754757046699524,
316
+ "eval_runtime": 442.6863,
317
+ "eval_samples_per_second": 23.466,
318
+ "eval_steps_per_second": 2.934,
319
+ "eval_wer": 0.9752483489649814,
320
  "step": 4000
321
  },
322
  {
323
  "epoch": 6.87,
324
+ "learning_rate": 0.00047153652392947104,
325
+ "loss": 2.4069,
326
  "step": 4100
327
  },
328
  {
329
  "epoch": 7.04,
330
+ "learning_rate": 0.0004463476070528967,
331
+ "loss": 2.3863,
332
  "step": 4200
333
  },
334
  {
335
  "epoch": 7.2,
336
+ "learning_rate": 0.00042115869017632243,
337
+ "loss": 2.3614,
338
  "step": 4300
339
  },
340
  {
341
  "epoch": 7.37,
342
+ "learning_rate": 0.0003959697732997481,
343
+ "loss": 2.3534,
344
  "step": 4400
345
  },
346
  {
347
  "epoch": 7.54,
348
+ "learning_rate": 0.00037078085642317383,
349
+ "loss": 2.344,
350
  "step": 4500
351
  },
352
  {
353
  "epoch": 7.54,
354
+ "eval_loss": 1.6146422624588013,
355
+ "eval_runtime": 432.0313,
356
+ "eval_samples_per_second": 24.045,
357
+ "eval_steps_per_second": 3.007,
358
+ "eval_wer": 0.9638344710213294,
359
  "step": 4500
360
  },
361
  {
362
  "epoch": 7.7,
363
+ "learning_rate": 0.0003455919395465995,
364
+ "loss": 2.3322,
365
  "step": 4600
366
  },
367
  {
368
  "epoch": 7.87,
369
+ "learning_rate": 0.0003204030226700252,
370
+ "loss": 2.315,
371
  "step": 4700
372
  },
373
  {
374
  "epoch": 8.04,
375
+ "learning_rate": 0.00029521410579345085,
376
+ "loss": 2.3035,
377
  "step": 4800
378
  },
379
  {
380
  "epoch": 8.21,
381
+ "learning_rate": 0.0002700251889168766,
382
+ "loss": 2.2715,
383
  "step": 4900
384
  },
385
  {
386
+ "epoch": 8.38,
387
+ "learning_rate": 0.00024483627204030224,
388
+ "loss": 2.2677,
389
  "step": 5000
390
  },
391
  {
392
+ "epoch": 8.38,
393
+ "eval_loss": 1.5104962587356567,
394
+ "eval_runtime": 431.1839,
395
+ "eval_samples_per_second": 24.092,
396
+ "eval_steps_per_second": 3.013,
397
+ "eval_wer": 0.9499232291840095,
398
  "step": 5000
399
  },
400
  {
401
  "epoch": 8.54,
402
+ "learning_rate": 0.00021964735516372797,
403
+ "loss": 2.266,
404
  "step": 5100
405
  },
406
  {
407
  "epoch": 8.71,
408
+ "learning_rate": 0.00019445843828715364,
409
+ "loss": 2.2473,
410
  "step": 5200
411
  },
412
  {
413
  "epoch": 8.88,
414
+ "learning_rate": 0.00016926952141057937,
415
+ "loss": 2.2419,
416
  "step": 5300
417
  },
418
  {
419
  "epoch": 9.05,
420
+ "learning_rate": 0.00014408060453400504,
421
+ "loss": 2.2305,
422
  "step": 5400
423
  },
424
  {
425
  "epoch": 9.21,
426
+ "learning_rate": 0.00011889168765743074,
427
+ "loss": 2.2074,
428
  "step": 5500
429
  },
430
  {
431
  "epoch": 9.21,
432
+ "eval_loss": 1.4190884828567505,
433
+ "eval_runtime": 440.4999,
434
+ "eval_samples_per_second": 23.582,
435
+ "eval_steps_per_second": 2.949,
436
+ "eval_wer": 0.9356790054942006,
437
  "step": 5500
438
  },
439
  {
440
  "epoch": 9.38,
441
+ "learning_rate": 9.370277078085642e-05,
442
+ "loss": 2.1936,
443
  "step": 5600
444
  },
445
  {
446
  "epoch": 9.55,
447
+ "learning_rate": 6.851385390428212e-05,
448
+ "loss": 2.1796,
449
  "step": 5700
450
  },
451
  {
452
  "epoch": 9.71,
453
+ "learning_rate": 4.332493702770781e-05,
454
+ "loss": 2.1748,
455
  "step": 5800
456
  },
457
  {
458
  "epoch": 9.88,
459
+ "learning_rate": 1.8136020151133502e-05,
460
+ "loss": 2.1846,
461
  "step": 5900
462
  },
463
  {
464
+ "epoch": 10.0,
465
+ "step": 5970,
466
+ "total_flos": 1.0051413716540667e+20,
467
+ "train_loss": 2.4458747799871756,
468
+ "train_runtime": 25997.2696,
469
+ "train_samples_per_second": 14.697,
470
+ "train_steps_per_second": 0.23
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
471
  }
472
  ],
473
+ "max_steps": 5970,
474
+ "num_train_epochs": 10,
475
+ "total_flos": 1.0051413716540667e+20,
476
  "trial_name": null,
477
  "trial_params": null
478
  }