infinitejoy commited on
Commit
07c24cc
1 Parent(s): 75539f4

End of training

Browse files
Files changed (5) hide show
  1. all_results.json +10 -10
  2. eval_results.json +6 -6
  3. pytorch_model.bin +1 -1
  4. train_results.json +5 -5
  5. trainer_state.json +1008 -273
all_results.json CHANGED
@@ -1,14 +1,14 @@
1
  {
2
- "epoch": 50.0,
3
- "eval_loss": 0.27504467964172363,
4
- "eval_runtime": 100.6356,
5
  "eval_samples": 3038,
6
- "eval_samples_per_second": 30.188,
7
- "eval_steps_per_second": 0.944,
8
- "eval_wer": 0.3574979056129573,
9
- "train_loss": 1.4489057677646853,
10
- "train_runtime": 15709.6404,
11
  "train_samples": 6769,
12
- "train_samples_per_second": 21.544,
13
- "train_steps_per_second": 0.675
14
  }
 
1
  {
2
+ "epoch": 100.0,
3
+ "eval_loss": 0.2759484350681305,
4
+ "eval_runtime": 93.8442,
5
  "eval_samples": 3038,
6
+ "eval_samples_per_second": 32.373,
7
+ "eval_steps_per_second": 1.012,
8
+ "eval_wer": 0.3256073722423904,
9
+ "train_loss": 1.235663890838623,
10
+ "train_runtime": 30132.3883,
11
  "train_samples": 6769,
12
+ "train_samples_per_second": 22.464,
13
+ "train_steps_per_second": 0.704
14
  }
eval_results.json CHANGED
@@ -1,9 +1,9 @@
1
  {
2
- "epoch": 50.0,
3
- "eval_loss": 0.27504467964172363,
4
- "eval_runtime": 100.6356,
5
  "eval_samples": 3038,
6
- "eval_samples_per_second": 30.188,
7
- "eval_steps_per_second": 0.944,
8
- "eval_wer": 0.3574979056129573
9
  }
 
1
  {
2
+ "epoch": 100.0,
3
+ "eval_loss": 0.2759484350681305,
4
+ "eval_runtime": 93.8442,
5
  "eval_samples": 3038,
6
+ "eval_samples_per_second": 32.373,
7
+ "eval_steps_per_second": 1.012,
8
+ "eval_wer": 0.3256073722423904
9
  }
pytorch_model.bin CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:180dd7e2d9f846743174a2be28a566acc006274ae8c4a9ca6be4190b18861f68
3
  size 1262157361
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:e6baa59b6dc263be91cec1bb3d88fc58c0fdba4311b736a9b4b0f11fb378cc36
3
  size 1262157361
train_results.json CHANGED
@@ -1,8 +1,8 @@
1
  {
2
- "epoch": 50.0,
3
- "train_loss": 1.4489057677646853,
4
- "train_runtime": 15709.6404,
5
  "train_samples": 6769,
6
- "train_samples_per_second": 21.544,
7
- "train_steps_per_second": 0.675
8
  }
 
1
  {
2
+ "epoch": 100.0,
3
+ "train_loss": 1.235663890838623,
4
+ "train_runtime": 30132.3883,
5
  "train_samples": 6769,
6
+ "train_samples_per_second": 22.464,
7
+ "train_steps_per_second": 0.704
8
  }
trainer_state.json CHANGED
@@ -1,751 +1,1486 @@
1
  {
2
  "best_metric": null,
3
  "best_model_checkpoint": null,
4
- "epoch": 50.0,
5
- "global_step": 10600,
6
  "is_hyper_param_search": false,
7
  "is_local_process_zero": true,
8
  "is_world_process_zero": true,
9
  "log_history": [
10
  {
11
  "epoch": 0.47,
12
- "learning_rate": 3.4299999999999998e-06,
13
- "loss": 11.8059,
14
  "step": 100
15
  },
16
  {
17
  "epoch": 0.94,
18
- "learning_rate": 6.93e-06,
19
- "loss": 6.741,
20
  "step": 200
21
  },
22
  {
23
  "epoch": 1.42,
24
- "learning_rate": 1.0429999999999998e-05,
25
- "loss": 4.6303,
26
  "step": 300
27
  },
28
  {
29
  "epoch": 1.89,
30
- "learning_rate": 1.3929999999999999e-05,
31
- "loss": 4.0577,
32
  "step": 400
33
  },
34
  {
35
  "epoch": 2.36,
36
- "learning_rate": 1.7429999999999997e-05,
37
- "loss": 3.6055,
38
  "step": 500
39
  },
40
  {
41
  "epoch": 2.83,
42
- "learning_rate": 2.0929999999999998e-05,
43
- "loss": 3.2771,
44
  "step": 600
45
  },
46
  {
47
  "epoch": 3.3,
48
- "learning_rate": 2.4429999999999995e-05,
49
- "loss": 3.0775,
50
  "step": 700
51
  },
52
  {
53
  "epoch": 3.77,
54
- "learning_rate": 2.793e-05,
55
- "loss": 2.9768,
56
  "step": 800
57
  },
58
  {
59
  "epoch": 4.25,
60
- "learning_rate": 3.1429999999999996e-05,
61
- "loss": 2.8612,
62
  "step": 900
63
  },
64
  {
65
  "epoch": 4.72,
66
- "learning_rate": 3.493e-05,
67
- "loss": 2.8112,
68
  "step": 1000
69
  },
70
  {
71
  "epoch": 4.72,
72
- "eval_loss": 2.778607130050659,
73
- "eval_runtime": 101.5624,
74
- "eval_samples_per_second": 29.913,
75
- "eval_steps_per_second": 0.935,
76
  "eval_wer": 1.0,
77
  "step": 1000
78
  },
79
  {
80
  "epoch": 5.19,
81
- "learning_rate": 3.843e-05,
82
- "loss": 2.7937,
83
  "step": 1100
84
  },
85
  {
86
  "epoch": 5.66,
87
- "learning_rate": 4.192999999999999e-05,
88
- "loss": 2.7598,
89
  "step": 1200
90
  },
91
  {
92
  "epoch": 6.13,
93
- "learning_rate": 4.543e-05,
94
- "loss": 2.5782,
95
  "step": 1300
96
  },
97
  {
98
  "epoch": 6.6,
99
- "learning_rate": 4.8929999999999994e-05,
100
- "loss": 2.0779,
101
  "step": 1400
102
  },
103
  {
104
  "epoch": 7.08,
105
- "learning_rate": 5.243e-05,
106
- "loss": 1.7522,
107
  "step": 1500
108
  },
109
  {
110
  "epoch": 7.55,
111
- "learning_rate": 5.593e-05,
112
- "loss": 1.6003,
113
  "step": 1600
114
  },
115
  {
116
  "epoch": 8.02,
117
- "learning_rate": 5.942999999999999e-05,
118
- "loss": 1.534,
119
  "step": 1700
120
  },
121
  {
122
  "epoch": 8.49,
123
- "learning_rate": 6.293e-05,
124
- "loss": 1.4587,
125
  "step": 1800
126
  },
127
  {
128
  "epoch": 8.96,
129
- "learning_rate": 6.642999999999999e-05,
130
- "loss": 1.4313,
131
  "step": 1900
132
  },
133
  {
134
  "epoch": 9.43,
135
- "learning_rate": 6.992999999999999e-05,
136
- "loss": 1.3803,
137
  "step": 2000
138
  },
139
  {
140
  "epoch": 9.43,
141
- "eval_loss": 0.49710744619369507,
142
- "eval_runtime": 101.1307,
143
- "eval_samples_per_second": 30.04,
144
- "eval_steps_per_second": 0.939,
145
- "eval_wer": 0.5787768779670484,
146
  "step": 2000
147
  },
148
  {
149
  "epoch": 9.91,
150
- "learning_rate": 6.920232558139535e-05,
151
- "loss": 1.3528,
152
  "step": 2100
153
  },
154
  {
155
  "epoch": 10.38,
156
- "learning_rate": 6.838837209302325e-05,
157
- "loss": 1.3382,
158
  "step": 2200
159
  },
160
  {
161
  "epoch": 10.85,
162
- "learning_rate": 6.757441860465116e-05,
163
- "loss": 1.3103,
164
  "step": 2300
165
  },
166
  {
167
  "epoch": 11.32,
168
- "learning_rate": 6.676046511627906e-05,
169
- "loss": 1.2818,
170
  "step": 2400
171
  },
172
  {
173
  "epoch": 11.79,
174
- "learning_rate": 6.594651162790697e-05,
175
- "loss": 1.265,
176
  "step": 2500
177
  },
178
  {
179
  "epoch": 12.26,
180
- "learning_rate": 6.513255813953489e-05,
181
- "loss": 1.2608,
182
  "step": 2600
183
  },
184
  {
185
  "epoch": 12.74,
186
- "learning_rate": 6.431860465116279e-05,
187
- "loss": 1.2263,
188
  "step": 2700
189
  },
190
  {
191
  "epoch": 13.21,
192
- "learning_rate": 6.350465116279069e-05,
193
- "loss": 1.2069,
194
  "step": 2800
195
  },
196
  {
197
  "epoch": 13.68,
198
- "learning_rate": 6.26906976744186e-05,
199
- "loss": 1.2102,
200
  "step": 2900
201
  },
202
  {
203
  "epoch": 14.15,
204
- "learning_rate": 6.18767441860465e-05,
205
- "loss": 1.1894,
206
  "step": 3000
207
  },
208
  {
209
  "epoch": 14.15,
210
- "eval_loss": 0.36050862073898315,
211
- "eval_runtime": 100.2529,
212
- "eval_samples_per_second": 30.303,
213
- "eval_steps_per_second": 0.948,
214
- "eval_wer": 0.44417760402122314,
215
  "step": 3000
216
  },
217
  {
218
  "epoch": 14.62,
219
- "learning_rate": 6.106279069767441e-05,
220
- "loss": 1.1845,
221
  "step": 3100
222
  },
223
  {
224
  "epoch": 15.09,
225
- "learning_rate": 6.024883720930232e-05,
226
- "loss": 1.1819,
227
  "step": 3200
228
  },
229
  {
230
  "epoch": 15.57,
231
- "learning_rate": 5.943488372093023e-05,
232
- "loss": 1.1565,
233
  "step": 3300
234
  },
235
  {
236
  "epoch": 16.04,
237
- "learning_rate": 5.8620930232558136e-05,
238
- "loss": 1.1506,
239
  "step": 3400
240
  },
241
  {
242
  "epoch": 16.51,
243
- "learning_rate": 5.7806976744186036e-05,
244
- "loss": 1.1254,
245
  "step": 3500
246
  },
247
  {
248
  "epoch": 16.98,
249
- "learning_rate": 5.699302325581395e-05,
250
- "loss": 1.1444,
251
  "step": 3600
252
  },
253
  {
254
  "epoch": 17.45,
255
- "learning_rate": 5.6179069767441856e-05,
256
- "loss": 1.1189,
257
  "step": 3700
258
  },
259
  {
260
  "epoch": 17.92,
261
- "learning_rate": 5.536511627906976e-05,
262
- "loss": 1.1219,
263
  "step": 3800
264
  },
265
  {
266
  "epoch": 18.4,
267
- "learning_rate": 5.455116279069767e-05,
268
- "loss": 1.1075,
269
  "step": 3900
270
  },
271
  {
272
  "epoch": 18.87,
273
- "learning_rate": 5.373720930232558e-05,
274
- "loss": 1.1019,
275
  "step": 4000
276
  },
277
  {
278
  "epoch": 18.87,
279
- "eval_loss": 0.3251776695251465,
280
- "eval_runtime": 100.5327,
281
- "eval_samples_per_second": 30.219,
282
- "eval_steps_per_second": 0.945,
283
- "eval_wer": 0.42356883552080427,
284
  "step": 4000
285
  },
286
  {
287
  "epoch": 19.34,
288
- "learning_rate": 5.292325581395348e-05,
289
- "loss": 1.1104,
290
  "step": 4100
291
  },
292
  {
293
  "epoch": 19.81,
294
- "learning_rate": 5.212558139534883e-05,
295
- "loss": 1.0985,
296
  "step": 4200
297
  },
298
  {
299
  "epoch": 20.28,
300
- "learning_rate": 5.1311627906976736e-05,
301
- "loss": 1.0878,
302
  "step": 4300
303
  },
304
  {
305
  "epoch": 20.75,
306
- "learning_rate": 5.049767441860465e-05,
307
- "loss": 1.0766,
308
  "step": 4400
309
  },
310
  {
311
  "epoch": 21.23,
312
- "learning_rate": 4.968372093023256e-05,
313
- "loss": 1.071,
314
  "step": 4500
315
  },
316
  {
317
  "epoch": 21.7,
318
- "learning_rate": 4.8869767441860464e-05,
319
- "loss": 1.0508,
320
  "step": 4600
321
  },
322
  {
323
  "epoch": 22.17,
324
- "learning_rate": 4.8055813953488364e-05,
325
- "loss": 1.0644,
326
  "step": 4700
327
  },
328
  {
329
  "epoch": 22.64,
330
- "learning_rate": 4.7249999999999997e-05,
331
- "loss": 1.0571,
332
  "step": 4800
333
  },
334
  {
335
  "epoch": 23.11,
336
- "learning_rate": 4.6436046511627903e-05,
337
- "loss": 1.0457,
338
  "step": 4900
339
  },
340
  {
341
  "epoch": 23.58,
342
- "learning_rate": 4.562209302325581e-05,
343
- "loss": 1.0366,
344
  "step": 5000
345
  },
346
  {
347
  "epoch": 23.58,
348
- "eval_loss": 0.31269633769989014,
349
- "eval_runtime": 101.0057,
350
- "eval_samples_per_second": 30.078,
351
- "eval_steps_per_second": 0.941,
352
- "eval_wer": 0.40234571348785253,
353
  "step": 5000
354
  },
355
  {
356
  "epoch": 24.06,
357
- "learning_rate": 4.480813953488372e-05,
358
- "loss": 1.0395,
359
  "step": 5100
360
  },
361
  {
362
  "epoch": 24.53,
363
- "learning_rate": 4.3994186046511624e-05,
364
- "loss": 1.0344,
365
  "step": 5200
366
  },
367
  {
368
  "epoch": 25.0,
369
- "learning_rate": 4.318023255813954e-05,
370
- "loss": 1.0318,
371
  "step": 5300
372
  },
373
  {
374
  "epoch": 25.47,
375
- "learning_rate": 4.236627906976744e-05,
376
- "loss": 1.0291,
377
  "step": 5400
378
  },
379
  {
380
  "epoch": 25.94,
381
- "learning_rate": 4.1552325581395345e-05,
382
- "loss": 1.0325,
383
  "step": 5500
384
  },
385
  {
386
  "epoch": 26.42,
387
- "learning_rate": 4.073837209302325e-05,
388
- "loss": 1.0205,
389
  "step": 5600
390
  },
391
  {
392
  "epoch": 26.89,
393
- "learning_rate": 3.992441860465116e-05,
394
- "loss": 1.0276,
395
  "step": 5700
396
  },
397
  {
398
  "epoch": 27.36,
399
- "learning_rate": 3.9110465116279065e-05,
400
- "loss": 1.0122,
401
  "step": 5800
402
  },
403
  {
404
  "epoch": 27.83,
405
- "learning_rate": 3.829651162790698e-05,
406
- "loss": 1.0054,
407
  "step": 5900
408
  },
409
  {
410
  "epoch": 28.3,
411
- "learning_rate": 3.748255813953488e-05,
412
- "loss": 1.0217,
413
  "step": 6000
414
  },
415
  {
416
  "epoch": 28.3,
417
- "eval_loss": 0.3026880621910095,
418
- "eval_runtime": 100.3358,
419
- "eval_samples_per_second": 30.278,
420
- "eval_steps_per_second": 0.947,
421
- "eval_wer": 0.39525272270315553,
422
  "step": 6000
423
  },
424
  {
425
  "epoch": 28.77,
426
- "learning_rate": 3.6668604651162786e-05,
427
- "loss": 1.0134,
428
  "step": 6100
429
  },
430
  {
431
  "epoch": 29.25,
432
- "learning_rate": 3.585465116279069e-05,
433
- "loss": 0.9899,
434
  "step": 6200
435
  },
436
  {
437
  "epoch": 29.72,
438
- "learning_rate": 3.50406976744186e-05,
439
- "loss": 0.9984,
440
  "step": 6300
441
  },
442
  {
443
  "epoch": 30.19,
444
- "learning_rate": 3.422674418604651e-05,
445
- "loss": 0.9952,
446
  "step": 6400
447
  },
448
  {
449
  "epoch": 30.66,
450
- "learning_rate": 3.341279069767442e-05,
451
- "loss": 0.9957,
452
  "step": 6500
453
  },
454
  {
455
  "epoch": 31.13,
456
- "learning_rate": 3.259883720930232e-05,
457
- "loss": 0.9922,
458
  "step": 6600
459
  },
460
  {
461
  "epoch": 31.6,
462
- "learning_rate": 3.178488372093023e-05,
463
- "loss": 0.9707,
464
  "step": 6700
465
  },
466
  {
467
  "epoch": 32.08,
468
- "learning_rate": 3.097093023255814e-05,
469
- "loss": 0.9876,
470
  "step": 6800
471
  },
472
  {
473
  "epoch": 32.55,
474
- "learning_rate": 3.0156976744186045e-05,
475
- "loss": 0.9701,
476
  "step": 6900
477
  },
478
  {
479
  "epoch": 33.02,
480
- "learning_rate": 2.9343023255813948e-05,
481
- "loss": 0.9774,
482
  "step": 7000
483
  },
484
  {
485
  "epoch": 33.02,
486
- "eval_loss": 0.2924236059188843,
487
- "eval_runtime": 101.009,
488
- "eval_samples_per_second": 30.077,
489
- "eval_steps_per_second": 0.941,
490
- "eval_wer": 0.3965372800893605,
491
  "step": 7000
492
  },
493
  {
494
  "epoch": 33.49,
495
- "learning_rate": 2.852906976744186e-05,
496
- "loss": 0.9745,
497
  "step": 7100
498
  },
499
  {
500
  "epoch": 33.96,
501
- "learning_rate": 2.7715116279069765e-05,
502
- "loss": 0.9682,
503
  "step": 7200
504
  },
505
  {
506
  "epoch": 34.43,
507
- "learning_rate": 2.6901162790697676e-05,
508
- "loss": 0.9593,
509
  "step": 7300
510
  },
511
  {
512
  "epoch": 34.91,
513
- "learning_rate": 2.608720930232558e-05,
514
- "loss": 0.9591,
515
  "step": 7400
516
  },
517
  {
518
  "epoch": 35.38,
519
- "learning_rate": 2.5273255813953486e-05,
520
- "loss": 0.9582,
521
  "step": 7500
522
  },
523
  {
524
  "epoch": 35.85,
525
- "learning_rate": 2.4459302325581396e-05,
526
- "loss": 0.9567,
527
  "step": 7600
528
  },
529
  {
530
  "epoch": 36.32,
531
- "learning_rate": 2.36453488372093e-05,
532
- "loss": 0.9476,
533
  "step": 7700
534
  },
535
  {
536
  "epoch": 36.79,
537
- "learning_rate": 2.2831395348837207e-05,
538
- "loss": 0.9534,
539
  "step": 7800
540
  },
541
  {
542
  "epoch": 37.26,
543
- "learning_rate": 2.2017441860465117e-05,
544
- "loss": 0.9473,
545
  "step": 7900
546
  },
547
  {
548
  "epoch": 37.74,
549
- "learning_rate": 2.120348837209302e-05,
550
- "loss": 0.9485,
551
  "step": 8000
552
  },
553
  {
554
  "epoch": 37.74,
555
- "eval_loss": 0.2948923110961914,
556
- "eval_runtime": 99.2137,
557
- "eval_samples_per_second": 30.621,
558
- "eval_steps_per_second": 0.958,
559
- "eval_wer": 0.3755375593409662,
560
  "step": 8000
561
  },
562
  {
563
  "epoch": 38.21,
564
- "learning_rate": 2.0389534883720927e-05,
565
- "loss": 0.9344,
566
  "step": 8100
567
  },
568
  {
569
  "epoch": 38.68,
570
- "learning_rate": 1.9575581395348838e-05,
571
- "loss": 0.9357,
572
  "step": 8200
573
  },
574
  {
575
  "epoch": 39.15,
576
- "learning_rate": 1.876162790697674e-05,
577
- "loss": 0.9545,
578
  "step": 8300
579
  },
580
  {
581
  "epoch": 39.62,
582
- "learning_rate": 1.794767441860465e-05,
583
- "loss": 0.9289,
584
  "step": 8400
585
  },
586
  {
587
  "epoch": 40.09,
588
- "learning_rate": 1.7133720930232558e-05,
589
- "loss": 0.9434,
590
  "step": 8500
591
  },
592
  {
593
  "epoch": 40.57,
594
- "learning_rate": 1.6319767441860465e-05,
595
- "loss": 0.9319,
596
  "step": 8600
597
  },
598
  {
599
  "epoch": 41.04,
600
- "learning_rate": 1.551395348837209e-05,
601
- "loss": 0.9357,
602
  "step": 8700
603
  },
604
  {
605
  "epoch": 41.51,
606
- "learning_rate": 1.4699999999999998e-05,
607
- "loss": 0.9105,
608
  "step": 8800
609
  },
610
  {
611
  "epoch": 41.98,
612
- "learning_rate": 1.3886046511627905e-05,
613
- "loss": 0.9299,
614
  "step": 8900
615
  },
616
  {
617
  "epoch": 42.45,
618
- "learning_rate": 1.3072093023255814e-05,
619
- "loss": 0.9332,
620
  "step": 9000
621
  },
622
  {
623
  "epoch": 42.45,
624
- "eval_loss": 0.28152021765708923,
625
- "eval_runtime": 100.0017,
626
- "eval_samples_per_second": 30.379,
627
- "eval_steps_per_second": 0.95,
628
- "eval_wer": 0.3664898073163921,
629
  "step": 9000
630
  },
631
  {
632
  "epoch": 42.92,
633
- "learning_rate": 1.225813953488372e-05,
634
- "loss": 0.922,
635
  "step": 9100
636
  },
637
  {
638
  "epoch": 43.4,
639
- "learning_rate": 1.1444186046511626e-05,
640
- "loss": 0.9161,
641
  "step": 9200
642
  },
643
  {
644
  "epoch": 43.87,
645
- "learning_rate": 1.0630232558139534e-05,
646
- "loss": 0.9203,
647
  "step": 9300
648
  },
649
  {
650
  "epoch": 44.34,
651
- "learning_rate": 9.816279069767441e-06,
652
- "loss": 0.9221,
653
  "step": 9400
654
  },
655
  {
656
  "epoch": 44.81,
657
- "learning_rate": 9.002325581395346e-06,
658
- "loss": 0.8945,
659
  "step": 9500
660
  },
661
  {
662
  "epoch": 45.28,
663
- "learning_rate": 8.188372093023255e-06,
664
- "loss": 0.9347,
665
  "step": 9600
666
  },
667
  {
668
  "epoch": 45.75,
669
- "learning_rate": 7.374418604651162e-06,
670
- "loss": 0.8967,
671
  "step": 9700
672
  },
673
  {
674
  "epoch": 46.23,
675
- "learning_rate": 6.560465116279069e-06,
676
- "loss": 0.9211,
677
  "step": 9800
678
  },
679
  {
680
  "epoch": 46.7,
681
- "learning_rate": 5.7465116279069765e-06,
682
- "loss": 0.9007,
683
  "step": 9900
684
  },
685
  {
686
  "epoch": 47.17,
687
- "learning_rate": 4.9325581395348825e-06,
688
- "loss": 0.9093,
689
  "step": 10000
690
  },
691
  {
692
  "epoch": 47.17,
693
- "eval_loss": 0.2779529094696045,
694
- "eval_runtime": 102.291,
695
- "eval_samples_per_second": 29.7,
696
- "eval_steps_per_second": 0.929,
697
- "eval_wer": 0.3623010332309411,
698
  "step": 10000
699
  },
700
  {
701
  "epoch": 47.64,
702
- "learning_rate": 4.11860465116279e-06,
703
- "loss": 0.9066,
704
  "step": 10100
705
  },
706
  {
707
  "epoch": 48.11,
708
- "learning_rate": 3.3046511627906976e-06,
709
- "loss": 0.9154,
710
  "step": 10200
711
  },
712
  {
713
  "epoch": 48.58,
714
- "learning_rate": 2.4906976744186045e-06,
715
- "loss": 0.9112,
716
  "step": 10300
717
  },
718
  {
719
  "epoch": 49.06,
720
- "learning_rate": 1.6767441860465114e-06,
721
- "loss": 0.8968,
722
  "step": 10400
723
  },
724
  {
725
  "epoch": 49.53,
726
- "learning_rate": 8.627906976744186e-07,
727
- "loss": 0.9083,
728
  "step": 10500
729
  },
730
  {
731
  "epoch": 50.0,
732
- "learning_rate": 5.697674418604651e-08,
733
- "loss": 0.9076,
734
  "step": 10600
735
  },
736
  {
737
- "epoch": 50.0,
738
- "step": 10600,
739
- "total_flos": 3.899540035775118e+19,
740
- "train_loss": 1.4489057677646853,
741
- "train_runtime": 15709.6404,
742
- "train_samples_per_second": 21.544,
743
- "train_steps_per_second": 0.675
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
744
  }
745
  ],
746
- "max_steps": 10600,
747
- "num_train_epochs": 50,
748
- "total_flos": 3.899540035775118e+19,
749
  "trial_name": null,
750
  "trial_params": null
751
  }
 
1
  {
2
  "best_metric": null,
3
  "best_model_checkpoint": null,
4
+ "epoch": 100.0,
5
+ "global_step": 21200,
6
  "is_hyper_param_search": false,
7
  "is_local_process_zero": true,
8
  "is_world_process_zero": true,
9
  "log_history": [
10
  {
11
  "epoch": 0.47,
12
+ "learning_rate": 1.7149999999999999e-06,
13
+ "loss": 12.1703,
14
  "step": 100
15
  },
16
  {
17
  "epoch": 0.94,
18
+ "learning_rate": 3.465e-06,
19
+ "loss": 9.0128,
20
  "step": 200
21
  },
22
  {
23
  "epoch": 1.42,
24
+ "learning_rate": 5.214999999999999e-06,
25
+ "loss": 5.5165,
26
  "step": 300
27
  },
28
  {
29
  "epoch": 1.89,
30
+ "learning_rate": 6.964999999999999e-06,
31
+ "loss": 4.6455,
32
  "step": 400
33
  },
34
  {
35
  "epoch": 2.36,
36
+ "learning_rate": 8.714999999999999e-06,
37
+ "loss": 4.1839,
38
  "step": 500
39
  },
40
  {
41
  "epoch": 2.83,
42
+ "learning_rate": 1.0464999999999999e-05,
43
+ "loss": 3.8664,
44
  "step": 600
45
  },
46
  {
47
  "epoch": 3.3,
48
+ "learning_rate": 1.2214999999999997e-05,
49
+ "loss": 3.5691,
50
  "step": 700
51
  },
52
  {
53
  "epoch": 3.77,
54
+ "learning_rate": 1.3965e-05,
55
+ "loss": 3.3224,
56
  "step": 800
57
  },
58
  {
59
  "epoch": 4.25,
60
+ "learning_rate": 1.5714999999999998e-05,
61
+ "loss": 3.1431,
62
  "step": 900
63
  },
64
  {
65
  "epoch": 4.72,
66
+ "learning_rate": 1.7465e-05,
67
+ "loss": 3.0387,
68
  "step": 1000
69
  },
70
  {
71
  "epoch": 4.72,
72
+ "eval_loss": 3.0891902446746826,
73
+ "eval_runtime": 96.3286,
74
+ "eval_samples_per_second": 31.538,
75
+ "eval_steps_per_second": 0.986,
76
  "eval_wer": 1.0,
77
  "step": 1000
78
  },
79
  {
80
  "epoch": 5.19,
81
+ "learning_rate": 1.9215e-05,
82
+ "loss": 2.9827,
83
  "step": 1100
84
  },
85
  {
86
  "epoch": 5.66,
87
+ "learning_rate": 2.0964999999999997e-05,
88
+ "loss": 2.8891,
89
  "step": 1200
90
  },
91
  {
92
  "epoch": 6.13,
93
+ "learning_rate": 2.2715e-05,
94
+ "loss": 2.8309,
95
  "step": 1300
96
  },
97
  {
98
  "epoch": 6.6,
99
+ "learning_rate": 2.4464999999999997e-05,
100
+ "loss": 2.8077,
101
  "step": 1400
102
  },
103
  {
104
  "epoch": 7.08,
105
+ "learning_rate": 2.6215e-05,
106
+ "loss": 2.7824,
107
  "step": 1500
108
  },
109
  {
110
  "epoch": 7.55,
111
+ "learning_rate": 2.7965e-05,
112
+ "loss": 2.7682,
113
  "step": 1600
114
  },
115
  {
116
  "epoch": 8.02,
117
+ "learning_rate": 2.9714999999999996e-05,
118
+ "loss": 2.7261,
119
  "step": 1700
120
  },
121
  {
122
  "epoch": 8.49,
123
+ "learning_rate": 3.1465e-05,
124
+ "loss": 2.4865,
125
  "step": 1800
126
  },
127
  {
128
  "epoch": 8.96,
129
+ "learning_rate": 3.321499999999999e-05,
130
+ "loss": 2.0721,
131
  "step": 1900
132
  },
133
  {
134
  "epoch": 9.43,
135
+ "learning_rate": 3.4964999999999995e-05,
136
+ "loss": 1.7911,
137
  "step": 2000
138
  },
139
  {
140
  "epoch": 9.43,
141
+ "eval_loss": 0.845119059085846,
142
+ "eval_runtime": 95.4637,
143
+ "eval_samples_per_second": 31.824,
144
+ "eval_steps_per_second": 0.995,
145
+ "eval_wer": 0.6702038536721586,
146
  "step": 2000
147
  },
148
  {
149
  "epoch": 9.91,
150
+ "learning_rate": 3.6714999999999997e-05,
151
+ "loss": 1.667,
152
  "step": 2100
153
  },
154
  {
155
  "epoch": 10.38,
156
+ "learning_rate": 3.8465e-05,
157
+ "loss": 1.5783,
158
  "step": 2200
159
  },
160
  {
161
  "epoch": 10.85,
162
+ "learning_rate": 4.0215e-05,
163
+ "loss": 1.5165,
164
  "step": 2300
165
  },
166
  {
167
  "epoch": 11.32,
168
+ "learning_rate": 4.1964999999999995e-05,
169
+ "loss": 1.4551,
170
  "step": 2400
171
  },
172
  {
173
  "epoch": 11.79,
174
+ "learning_rate": 4.3715e-05,
175
+ "loss": 1.4246,
176
  "step": 2500
177
  },
178
  {
179
  "epoch": 12.26,
180
+ "learning_rate": 4.546499999999999e-05,
181
+ "loss": 1.3976,
182
  "step": 2600
183
  },
184
  {
185
  "epoch": 12.74,
186
+ "learning_rate": 4.7214999999999994e-05,
187
+ "loss": 1.3508,
188
  "step": 2700
189
  },
190
  {
191
  "epoch": 13.21,
192
+ "learning_rate": 4.8964999999999996e-05,
193
+ "loss": 1.3231,
194
  "step": 2800
195
  },
196
  {
197
  "epoch": 13.68,
198
+ "learning_rate": 5.0715e-05,
199
+ "loss": 1.319,
200
  "step": 2900
201
  },
202
  {
203
  "epoch": 14.15,
204
+ "learning_rate": 5.2465e-05,
205
+ "loss": 1.2826,
206
  "step": 3000
207
  },
208
  {
209
  "epoch": 14.15,
210
+ "eval_loss": 0.4210692048072815,
211
+ "eval_runtime": 96.3761,
212
+ "eval_samples_per_second": 31.522,
213
+ "eval_steps_per_second": 0.986,
214
+ "eval_wer": 0.41658754537838594,
215
  "step": 3000
216
  },
217
  {
218
  "epoch": 14.62,
219
+ "learning_rate": 5.4214999999999995e-05,
220
+ "loss": 1.279,
221
  "step": 3100
222
  },
223
  {
224
  "epoch": 15.09,
225
+ "learning_rate": 5.5965e-05,
226
+ "loss": 1.2713,
227
  "step": 3200
228
  },
229
  {
230
  "epoch": 15.57,
231
+ "learning_rate": 5.7715e-05,
232
+ "loss": 1.2416,
233
  "step": 3300
234
  },
235
  {
236
  "epoch": 16.04,
237
+ "learning_rate": 5.9464999999999994e-05,
238
+ "loss": 1.2412,
239
  "step": 3400
240
  },
241
  {
242
  "epoch": 16.51,
243
+ "learning_rate": 6.1215e-05,
244
+ "loss": 1.2136,
245
  "step": 3500
246
  },
247
  {
248
  "epoch": 16.98,
249
+ "learning_rate": 6.296499999999999e-05,
250
+ "loss": 1.2282,
251
  "step": 3600
252
  },
253
  {
254
  "epoch": 17.45,
255
+ "learning_rate": 6.471499999999999e-05,
256
+ "loss": 1.1977,
257
  "step": 3700
258
  },
259
  {
260
  "epoch": 17.92,
261
+ "learning_rate": 6.6465e-05,
262
+ "loss": 1.204,
263
  "step": 3800
264
  },
265
  {
266
  "epoch": 18.4,
267
+ "learning_rate": 6.8215e-05,
268
+ "loss": 1.1807,
269
  "step": 3900
270
  },
271
  {
272
  "epoch": 18.87,
273
+ "learning_rate": 6.9965e-05,
274
+ "loss": 1.1802,
275
  "step": 4000
276
  },
277
  {
278
  "epoch": 18.87,
279
+ "eval_loss": 0.35076606273651123,
280
+ "eval_runtime": 97.3621,
281
+ "eval_samples_per_second": 31.203,
282
+ "eval_steps_per_second": 0.976,
283
+ "eval_wer": 0.468975146607093,
284
  "step": 4000
285
  },
286
  {
287
  "epoch": 19.34,
288
+ "learning_rate": 6.960116279069767e-05,
289
+ "loss": 1.1843,
290
  "step": 4100
291
  },
292
  {
293
  "epoch": 19.81,
294
+ "learning_rate": 6.919418604651161e-05,
295
+ "loss": 1.1726,
296
  "step": 4200
297
  },
298
  {
299
  "epoch": 20.28,
300
+ "learning_rate": 6.879127906976743e-05,
301
+ "loss": 1.1681,
302
  "step": 4300
303
  },
304
  {
305
  "epoch": 20.75,
306
+ "learning_rate": 6.838430232558139e-05,
307
+ "loss": 1.1553,
308
  "step": 4400
309
  },
310
  {
311
  "epoch": 21.23,
312
+ "learning_rate": 6.79813953488372e-05,
313
+ "loss": 1.1477,
314
  "step": 4500
315
  },
316
  {
317
  "epoch": 21.7,
318
+ "learning_rate": 6.757441860465116e-05,
319
+ "loss": 1.1251,
320
  "step": 4600
321
  },
322
  {
323
  "epoch": 22.17,
324
+ "learning_rate": 6.71674418604651e-05,
325
+ "loss": 1.1271,
326
  "step": 4700
327
  },
328
  {
329
  "epoch": 22.64,
330
+ "learning_rate": 6.676046511627906e-05,
331
+ "loss": 1.1251,
332
  "step": 4800
333
  },
334
  {
335
  "epoch": 23.11,
336
+ "learning_rate": 6.635755813953488e-05,
337
+ "loss": 1.1315,
338
  "step": 4900
339
  },
340
  {
341
  "epoch": 23.58,
342
+ "learning_rate": 6.595058139534883e-05,
343
+ "loss": 1.1065,
344
  "step": 5000
345
  },
346
  {
347
  "epoch": 23.58,
348
+ "eval_loss": 0.3318960666656494,
349
+ "eval_runtime": 96.472,
350
+ "eval_samples_per_second": 31.491,
351
+ "eval_steps_per_second": 0.985,
352
+ "eval_wer": 0.46618263055012565,
353
  "step": 5000
354
  },
355
  {
356
  "epoch": 24.06,
357
+ "learning_rate": 6.554360465116279e-05,
358
+ "loss": 1.1061,
359
  "step": 5100
360
  },
361
  {
362
  "epoch": 24.53,
363
+ "learning_rate": 6.513662790697673e-05,
364
+ "loss": 1.1054,
365
  "step": 5200
366
  },
367
  {
368
  "epoch": 25.0,
369
+ "learning_rate": 6.472965116279069e-05,
370
+ "loss": 1.0993,
371
  "step": 5300
372
  },
373
  {
374
  "epoch": 25.47,
375
+ "learning_rate": 6.432267441860465e-05,
376
+ "loss": 1.1,
377
  "step": 5400
378
  },
379
  {
380
  "epoch": 25.94,
381
+ "learning_rate": 6.39156976744186e-05,
382
+ "loss": 1.1019,
383
  "step": 5500
384
  },
385
  {
386
  "epoch": 26.42,
387
+ "learning_rate": 6.350872093023256e-05,
388
+ "loss": 1.078,
389
  "step": 5600
390
  },
391
  {
392
  "epoch": 26.89,
393
+ "learning_rate": 6.31017441860465e-05,
394
+ "loss": 1.0847,
395
  "step": 5700
396
  },
397
  {
398
  "epoch": 27.36,
399
+ "learning_rate": 6.269476744186046e-05,
400
+ "loss": 1.0744,
401
  "step": 5800
402
  },
403
  {
404
  "epoch": 27.83,
405
+ "learning_rate": 6.228779069767442e-05,
406
+ "loss": 1.0703,
407
  "step": 5900
408
  },
409
  {
410
  "epoch": 28.3,
411
+ "learning_rate": 6.188081395348836e-05,
412
+ "loss": 1.0921,
413
  "step": 6000
414
  },
415
  {
416
  "epoch": 28.3,
417
+ "eval_loss": 0.30558520555496216,
418
+ "eval_runtime": 94.8784,
419
+ "eval_samples_per_second": 32.02,
420
+ "eval_steps_per_second": 1.001,
421
+ "eval_wer": 0.3879921809550405,
422
  "step": 6000
423
  },
424
  {
425
  "epoch": 28.77,
426
+ "learning_rate": 6.147383720930232e-05,
427
+ "loss": 1.0704,
428
  "step": 6100
429
  },
430
  {
431
  "epoch": 29.25,
432
+ "learning_rate": 6.106686046511627e-05,
433
+ "loss": 1.0479,
434
  "step": 6200
435
  },
436
  {
437
  "epoch": 29.72,
438
+ "learning_rate": 6.0659883720930225e-05,
439
+ "loss": 1.0583,
440
  "step": 6300
441
  },
442
  {
443
  "epoch": 30.19,
444
+ "learning_rate": 6.0252906976744175e-05,
445
+ "loss": 1.0515,
446
  "step": 6400
447
  },
448
  {
449
  "epoch": 30.66,
450
+ "learning_rate": 5.984593023255814e-05,
451
+ "loss": 1.0531,
452
  "step": 6500
453
  },
454
  {
455
  "epoch": 31.13,
456
+ "learning_rate": 5.943895348837209e-05,
457
+ "loss": 1.0505,
458
  "step": 6600
459
  },
460
  {
461
  "epoch": 31.6,
462
+ "learning_rate": 5.9031976744186045e-05,
463
+ "loss": 1.0314,
464
  "step": 6700
465
  },
466
  {
467
  "epoch": 32.08,
468
+ "learning_rate": 5.8624999999999995e-05,
469
+ "loss": 1.0473,
470
  "step": 6800
471
  },
472
  {
473
  "epoch": 32.55,
474
+ "learning_rate": 5.821802325581395e-05,
475
+ "loss": 1.0362,
476
  "step": 6900
477
  },
478
  {
479
  "epoch": 33.02,
480
+ "learning_rate": 5.78110465116279e-05,
481
+ "loss": 1.0366,
482
  "step": 7000
483
  },
484
  {
485
  "epoch": 33.02,
486
+ "eval_loss": 0.29966893792152405,
487
+ "eval_runtime": 94.2922,
488
+ "eval_samples_per_second": 32.219,
489
+ "eval_steps_per_second": 1.008,
490
+ "eval_wer": 0.36654565763753144,
491
  "step": 7000
492
  },
493
  {
494
  "epoch": 33.49,
495
+ "learning_rate": 5.740406976744185e-05,
496
+ "loss": 1.0361,
497
  "step": 7100
498
  },
499
  {
500
  "epoch": 33.96,
501
+ "learning_rate": 5.699709302325581e-05,
502
+ "loss": 1.0298,
503
  "step": 7200
504
  },
505
  {
506
  "epoch": 34.43,
507
+ "learning_rate": 5.659011627906976e-05,
508
+ "loss": 1.0185,
509
  "step": 7300
510
  },
511
  {
512
  "epoch": 34.91,
513
+ "learning_rate": 5.6183139534883716e-05,
514
+ "loss": 1.0161,
515
  "step": 7400
516
  },
517
  {
518
  "epoch": 35.38,
519
+ "learning_rate": 5.5776162790697666e-05,
520
+ "loss": 1.0134,
521
  "step": 7500
522
  },
523
  {
524
  "epoch": 35.85,
525
+ "learning_rate": 5.536918604651163e-05,
526
+ "loss": 1.0138,
527
  "step": 7600
528
  },
529
  {
530
  "epoch": 36.32,
531
+ "learning_rate": 5.496220930232558e-05,
532
+ "loss": 1.0076,
533
  "step": 7700
534
  },
535
  {
536
  "epoch": 36.79,
537
+ "learning_rate": 5.455523255813953e-05,
538
+ "loss": 1.0134,
539
  "step": 7800
540
  },
541
  {
542
  "epoch": 37.26,
543
+ "learning_rate": 5.415232558139534e-05,
544
+ "loss": 0.9983,
545
  "step": 7900
546
  },
547
  {
548
  "epoch": 37.74,
549
+ "learning_rate": 5.37453488372093e-05,
550
+ "loss": 0.9988,
551
  "step": 8000
552
  },
553
  {
554
  "epoch": 37.74,
555
+ "eval_loss": 0.2971556782722473,
556
+ "eval_runtime": 94.706,
557
+ "eval_samples_per_second": 32.078,
558
+ "eval_steps_per_second": 1.003,
559
+ "eval_wer": 0.36526110025132646,
560
  "step": 8000
561
  },
562
  {
563
  "epoch": 38.21,
564
+ "learning_rate": 5.333837209302325e-05,
565
+ "loss": 0.9974,
566
  "step": 8100
567
  },
568
  {
569
  "epoch": 38.68,
570
+ "learning_rate": 5.293139534883721e-05,
571
+ "loss": 0.9906,
572
  "step": 8200
573
  },
574
  {
575
  "epoch": 39.15,
576
+ "learning_rate": 5.252441860465116e-05,
577
+ "loss": 1.0178,
578
  "step": 8300
579
  },
580
  {
581
  "epoch": 39.62,
582
+ "learning_rate": 5.211744186046511e-05,
583
+ "loss": 0.99,
584
  "step": 8400
585
  },
586
  {
587
  "epoch": 40.09,
588
+ "learning_rate": 5.171046511627907e-05,
589
+ "loss": 0.9983,
590
  "step": 8500
591
  },
592
  {
593
  "epoch": 40.57,
594
+ "learning_rate": 5.130348837209302e-05,
595
+ "loss": 0.9835,
596
  "step": 8600
597
  },
598
  {
599
  "epoch": 41.04,
600
+ "learning_rate": 5.0896511627906974e-05,
601
+ "loss": 0.9902,
602
  "step": 8700
603
  },
604
  {
605
  "epoch": 41.51,
606
+ "learning_rate": 5.0489534883720924e-05,
607
+ "loss": 0.9604,
608
  "step": 8800
609
  },
610
  {
611
  "epoch": 41.98,
612
+ "learning_rate": 5.0082558139534874e-05,
613
+ "loss": 0.9836,
614
  "step": 8900
615
  },
616
  {
617
  "epoch": 42.45,
618
+ "learning_rate": 4.967558139534883e-05,
619
+ "loss": 0.9864,
620
  "step": 9000
621
  },
622
  {
623
  "epoch": 42.45,
624
+ "eval_loss": 0.2696739435195923,
625
+ "eval_runtime": 94.7296,
626
+ "eval_samples_per_second": 32.07,
627
+ "eval_steps_per_second": 1.003,
628
+ "eval_wer": 0.33705668807595646,
629
  "step": 9000
630
  },
631
  {
632
  "epoch": 42.92,
633
+ "learning_rate": 4.926860465116279e-05,
634
+ "loss": 0.9692,
635
  "step": 9100
636
  },
637
  {
638
  "epoch": 43.4,
639
+ "learning_rate": 4.8861627906976744e-05,
640
+ "loss": 0.9643,
641
  "step": 9200
642
  },
643
  {
644
  "epoch": 43.87,
645
+ "learning_rate": 4.8454651162790694e-05,
646
+ "loss": 0.9682,
647
  "step": 9300
648
  },
649
  {
650
  "epoch": 44.34,
651
+ "learning_rate": 4.804767441860465e-05,
652
+ "loss": 0.9651,
653
  "step": 9400
654
  },
655
  {
656
  "epoch": 44.81,
657
+ "learning_rate": 4.76406976744186e-05,
658
+ "loss": 0.9409,
659
  "step": 9500
660
  },
661
  {
662
  "epoch": 45.28,
663
+ "learning_rate": 4.723372093023255e-05,
664
+ "loss": 0.9792,
665
  "step": 9600
666
  },
667
  {
668
  "epoch": 45.75,
669
+ "learning_rate": 4.682674418604651e-05,
670
+ "loss": 0.9405,
671
  "step": 9700
672
  },
673
  {
674
  "epoch": 46.23,
675
+ "learning_rate": 4.641976744186046e-05,
676
+ "loss": 0.9679,
677
  "step": 9800
678
  },
679
  {
680
  "epoch": 46.7,
681
+ "learning_rate": 4.6012790697674415e-05,
682
+ "loss": 0.9468,
683
  "step": 9900
684
  },
685
  {
686
  "epoch": 47.17,
687
+ "learning_rate": 4.5605813953488365e-05,
688
+ "loss": 0.9558,
689
  "step": 10000
690
  },
691
  {
692
  "epoch": 47.17,
693
+ "eval_loss": 0.27388861775398254,
694
+ "eval_runtime": 94.8478,
695
+ "eval_samples_per_second": 32.03,
696
+ "eval_steps_per_second": 1.002,
697
+ "eval_wer": 0.314102206087685,
698
  "step": 10000
699
  },
700
  {
701
  "epoch": 47.64,
702
+ "learning_rate": 4.519883720930233e-05,
703
+ "loss": 0.95,
704
  "step": 10100
705
  },
706
  {
707
  "epoch": 48.11,
708
+ "learning_rate": 4.479186046511628e-05,
709
+ "loss": 0.9574,
710
  "step": 10200
711
  },
712
  {
713
  "epoch": 48.58,
714
+ "learning_rate": 4.438895348837209e-05,
715
+ "loss": 0.9473,
716
  "step": 10300
717
  },
718
  {
719
  "epoch": 49.06,
720
+ "learning_rate": 4.398197674418604e-05,
721
+ "loss": 0.9319,
722
  "step": 10400
723
  },
724
  {
725
  "epoch": 49.53,
726
+ "learning_rate": 4.3575e-05,
727
+ "loss": 0.9413,
728
  "step": 10500
729
  },
730
  {
731
  "epoch": 50.0,
732
+ "learning_rate": 4.316802325581395e-05,
733
+ "loss": 0.9442,
734
  "step": 10600
735
  },
736
  {
737
+ "epoch": 50.47,
738
+ "learning_rate": 4.27610465116279e-05,
739
+ "loss": 0.9315,
740
+ "step": 10700
741
+ },
742
+ {
743
+ "epoch": 50.94,
744
+ "learning_rate": 4.235406976744186e-05,
745
+ "loss": 0.9334,
746
+ "step": 10800
747
+ },
748
+ {
749
+ "epoch": 51.42,
750
+ "learning_rate": 4.194709302325581e-05,
751
+ "loss": 0.928,
752
+ "step": 10900
753
+ },
754
+ {
755
+ "epoch": 51.89,
756
+ "learning_rate": 4.1540116279069766e-05,
757
+ "loss": 0.9094,
758
+ "step": 11000
759
+ },
760
+ {
761
+ "epoch": 51.89,
762
+ "eval_loss": 0.2657303512096405,
763
+ "eval_runtime": 93.9674,
764
+ "eval_samples_per_second": 32.33,
765
+ "eval_steps_per_second": 1.011,
766
+ "eval_wer": 0.35330913152750626,
767
+ "step": 11000
768
+ },
769
+ {
770
+ "epoch": 52.36,
771
+ "learning_rate": 4.1133139534883716e-05,
772
+ "loss": 0.917,
773
+ "step": 11100
774
+ },
775
+ {
776
+ "epoch": 52.83,
777
+ "learning_rate": 4.072616279069767e-05,
778
+ "loss": 0.9164,
779
+ "step": 11200
780
+ },
781
+ {
782
+ "epoch": 53.3,
783
+ "learning_rate": 4.031918604651162e-05,
784
+ "loss": 0.915,
785
+ "step": 11300
786
+ },
787
+ {
788
+ "epoch": 53.77,
789
+ "learning_rate": 3.991220930232557e-05,
790
+ "loss": 0.905,
791
+ "step": 11400
792
+ },
793
+ {
794
+ "epoch": 54.25,
795
+ "learning_rate": 3.950523255813953e-05,
796
+ "loss": 0.9144,
797
+ "step": 11500
798
+ },
799
+ {
800
+ "epoch": 54.72,
801
+ "learning_rate": 3.9098255813953487e-05,
802
+ "loss": 0.8963,
803
+ "step": 11600
804
+ },
805
+ {
806
+ "epoch": 55.19,
807
+ "learning_rate": 3.8691279069767443e-05,
808
+ "loss": 0.8935,
809
+ "step": 11700
810
+ },
811
+ {
812
+ "epoch": 55.66,
813
+ "learning_rate": 3.8284302325581393e-05,
814
+ "loss": 0.8964,
815
+ "step": 11800
816
+ },
817
+ {
818
+ "epoch": 56.13,
819
+ "learning_rate": 3.7877325581395344e-05,
820
+ "loss": 0.9213,
821
+ "step": 11900
822
+ },
823
+ {
824
+ "epoch": 56.6,
825
+ "learning_rate": 3.74703488372093e-05,
826
+ "loss": 0.9034,
827
+ "step": 12000
828
+ },
829
+ {
830
+ "epoch": 56.6,
831
+ "eval_loss": 0.2699211537837982,
832
+ "eval_runtime": 95.9143,
833
+ "eval_samples_per_second": 31.674,
834
+ "eval_steps_per_second": 0.99,
835
+ "eval_wer": 0.3396816531695057,
836
+ "step": 12000
837
+ },
838
+ {
839
+ "epoch": 57.08,
840
+ "learning_rate": 3.706337209302325e-05,
841
+ "loss": 0.8992,
842
+ "step": 12100
843
+ },
844
+ {
845
+ "epoch": 57.55,
846
+ "learning_rate": 3.665639534883721e-05,
847
+ "loss": 0.8911,
848
+ "step": 12200
849
+ },
850
+ {
851
+ "epoch": 58.02,
852
+ "learning_rate": 3.624941860465116e-05,
853
+ "loss": 0.8907,
854
+ "step": 12300
855
+ },
856
+ {
857
+ "epoch": 58.49,
858
+ "learning_rate": 3.5842441860465114e-05,
859
+ "loss": 0.8957,
860
+ "step": 12400
861
+ },
862
+ {
863
+ "epoch": 58.96,
864
+ "learning_rate": 3.5439534883720924e-05,
865
+ "loss": 0.8888,
866
+ "step": 12500
867
+ },
868
+ {
869
+ "epoch": 59.43,
870
+ "learning_rate": 3.503255813953488e-05,
871
+ "loss": 0.8835,
872
+ "step": 12600
873
+ },
874
+ {
875
+ "epoch": 59.91,
876
+ "learning_rate": 3.462558139534883e-05,
877
+ "loss": 0.8866,
878
+ "step": 12700
879
+ },
880
+ {
881
+ "epoch": 60.38,
882
+ "learning_rate": 3.421860465116279e-05,
883
+ "loss": 0.8887,
884
+ "step": 12800
885
+ },
886
+ {
887
+ "epoch": 60.85,
888
+ "learning_rate": 3.3811627906976744e-05,
889
+ "loss": 0.8919,
890
+ "step": 12900
891
+ },
892
+ {
893
+ "epoch": 61.32,
894
+ "learning_rate": 3.3404651162790694e-05,
895
+ "loss": 0.8907,
896
+ "step": 13000
897
+ },
898
+ {
899
+ "epoch": 61.32,
900
+ "eval_loss": 0.2764558792114258,
901
+ "eval_runtime": 94.1878,
902
+ "eval_samples_per_second": 32.255,
903
+ "eval_steps_per_second": 1.009,
904
+ "eval_wer": 0.3469980452387601,
905
+ "step": 13000
906
+ },
907
+ {
908
+ "epoch": 61.79,
909
+ "learning_rate": 3.2997674418604644e-05,
910
+ "loss": 0.8729,
911
+ "step": 13100
912
+ },
913
+ {
914
+ "epoch": 62.26,
915
+ "learning_rate": 3.25906976744186e-05,
916
+ "loss": 0.8808,
917
+ "step": 13200
918
+ },
919
+ {
920
+ "epoch": 62.74,
921
+ "learning_rate": 3.218372093023256e-05,
922
+ "loss": 0.8845,
923
+ "step": 13300
924
+ },
925
+ {
926
+ "epoch": 63.21,
927
+ "learning_rate": 3.177674418604651e-05,
928
+ "loss": 0.8616,
929
+ "step": 13400
930
+ },
931
+ {
932
+ "epoch": 63.68,
933
+ "learning_rate": 3.1369767441860465e-05,
934
+ "loss": 0.8755,
935
+ "step": 13500
936
+ },
937
+ {
938
+ "epoch": 64.15,
939
+ "learning_rate": 3.0962790697674415e-05,
940
+ "loss": 0.8808,
941
+ "step": 13600
942
+ },
943
+ {
944
+ "epoch": 64.62,
945
+ "learning_rate": 3.055988372093023e-05,
946
+ "loss": 0.8823,
947
+ "step": 13700
948
+ },
949
+ {
950
+ "epoch": 65.09,
951
+ "learning_rate": 3.0152906976744185e-05,
952
+ "loss": 0.8667,
953
+ "step": 13800
954
+ },
955
+ {
956
+ "epoch": 65.57,
957
+ "learning_rate": 2.974593023255814e-05,
958
+ "loss": 0.8628,
959
+ "step": 13900
960
+ },
961
+ {
962
+ "epoch": 66.04,
963
+ "learning_rate": 2.933895348837209e-05,
964
+ "loss": 0.8631,
965
+ "step": 14000
966
+ },
967
+ {
968
+ "epoch": 66.04,
969
+ "eval_loss": 0.27741023898124695,
970
+ "eval_runtime": 93.7633,
971
+ "eval_samples_per_second": 32.401,
972
+ "eval_steps_per_second": 1.013,
973
+ "eval_wer": 0.33459927394582517,
974
+ "step": 14000
975
+ },
976
+ {
977
+ "epoch": 66.51,
978
+ "learning_rate": 2.8931976744186042e-05,
979
+ "loss": 0.865,
980
+ "step": 14100
981
+ },
982
+ {
983
+ "epoch": 66.98,
984
+ "learning_rate": 2.8524999999999995e-05,
985
+ "loss": 0.857,
986
+ "step": 14200
987
+ },
988
+ {
989
+ "epoch": 67.45,
990
+ "learning_rate": 2.8118023255813952e-05,
991
+ "loss": 0.8575,
992
+ "step": 14300
993
+ },
994
+ {
995
+ "epoch": 67.92,
996
+ "learning_rate": 2.7711046511627906e-05,
997
+ "loss": 0.8497,
998
+ "step": 14400
999
+ },
1000
+ {
1001
+ "epoch": 68.4,
1002
+ "learning_rate": 2.730406976744186e-05,
1003
+ "loss": 0.855,
1004
+ "step": 14500
1005
+ },
1006
+ {
1007
+ "epoch": 68.87,
1008
+ "learning_rate": 2.689709302325581e-05,
1009
+ "loss": 0.845,
1010
+ "step": 14600
1011
+ },
1012
+ {
1013
+ "epoch": 69.34,
1014
+ "learning_rate": 2.6490116279069763e-05,
1015
+ "loss": 0.8324,
1016
+ "step": 14700
1017
+ },
1018
+ {
1019
+ "epoch": 69.81,
1020
+ "learning_rate": 2.608313953488372e-05,
1021
+ "loss": 0.844,
1022
+ "step": 14800
1023
+ },
1024
+ {
1025
+ "epoch": 70.28,
1026
+ "learning_rate": 2.5676162790697673e-05,
1027
+ "loss": 0.8483,
1028
+ "step": 14900
1029
+ },
1030
+ {
1031
+ "epoch": 70.75,
1032
+ "learning_rate": 2.5269186046511626e-05,
1033
+ "loss": 0.8389,
1034
+ "step": 15000
1035
+ },
1036
+ {
1037
+ "epoch": 70.75,
1038
+ "eval_loss": 0.2742805778980255,
1039
+ "eval_runtime": 94.8277,
1040
+ "eval_samples_per_second": 32.037,
1041
+ "eval_steps_per_second": 1.002,
1042
+ "eval_wer": 0.33649818486456295,
1043
+ "step": 15000
1044
+ },
1045
+ {
1046
+ "epoch": 71.23,
1047
+ "learning_rate": 2.486220930232558e-05,
1048
+ "loss": 0.8383,
1049
+ "step": 15100
1050
+ },
1051
+ {
1052
+ "epoch": 71.7,
1053
+ "learning_rate": 2.445523255813953e-05,
1054
+ "loss": 0.8468,
1055
+ "step": 15200
1056
+ },
1057
+ {
1058
+ "epoch": 72.17,
1059
+ "learning_rate": 2.4048255813953487e-05,
1060
+ "loss": 0.8511,
1061
+ "step": 15300
1062
+ },
1063
+ {
1064
+ "epoch": 72.64,
1065
+ "learning_rate": 2.364127906976744e-05,
1066
+ "loss": 0.8457,
1067
+ "step": 15400
1068
+ },
1069
+ {
1070
+ "epoch": 73.11,
1071
+ "learning_rate": 2.3234302325581394e-05,
1072
+ "loss": 0.8314,
1073
+ "step": 15500
1074
+ },
1075
+ {
1076
+ "epoch": 73.58,
1077
+ "learning_rate": 2.2827325581395347e-05,
1078
+ "loss": 0.8278,
1079
+ "step": 15600
1080
+ },
1081
+ {
1082
+ "epoch": 74.06,
1083
+ "learning_rate": 2.24203488372093e-05,
1084
+ "loss": 0.8481,
1085
+ "step": 15700
1086
+ },
1087
+ {
1088
+ "epoch": 74.53,
1089
+ "learning_rate": 2.201337209302325e-05,
1090
+ "loss": 0.8158,
1091
+ "step": 15800
1092
+ },
1093
+ {
1094
+ "epoch": 75.0,
1095
+ "learning_rate": 2.1606395348837207e-05,
1096
+ "loss": 0.8356,
1097
+ "step": 15900
1098
+ },
1099
+ {
1100
+ "epoch": 75.47,
1101
+ "learning_rate": 2.119941860465116e-05,
1102
+ "loss": 0.8214,
1103
+ "step": 16000
1104
+ },
1105
+ {
1106
+ "epoch": 75.47,
1107
+ "eval_loss": 0.2778330445289612,
1108
+ "eval_runtime": 94.0407,
1109
+ "eval_samples_per_second": 32.305,
1110
+ "eval_steps_per_second": 1.01,
1111
+ "eval_wer": 0.32013404077073443,
1112
+ "step": 16000
1113
+ },
1114
+ {
1115
+ "epoch": 75.94,
1116
+ "learning_rate": 2.0792441860465114e-05,
1117
+ "loss": 0.8293,
1118
+ "step": 16100
1119
+ },
1120
+ {
1121
+ "epoch": 76.42,
1122
+ "learning_rate": 2.0389534883720927e-05,
1123
+ "loss": 0.834,
1124
+ "step": 16200
1125
+ },
1126
+ {
1127
+ "epoch": 76.89,
1128
+ "learning_rate": 1.9982558139534884e-05,
1129
+ "loss": 0.8308,
1130
+ "step": 16300
1131
+ },
1132
+ {
1133
+ "epoch": 77.36,
1134
+ "learning_rate": 1.9575581395348838e-05,
1135
+ "loss": 0.8292,
1136
+ "step": 16400
1137
+ },
1138
+ {
1139
+ "epoch": 77.83,
1140
+ "learning_rate": 1.9168604651162788e-05,
1141
+ "loss": 0.8224,
1142
+ "step": 16500
1143
+ },
1144
+ {
1145
+ "epoch": 78.3,
1146
+ "learning_rate": 1.876162790697674e-05,
1147
+ "loss": 0.8172,
1148
+ "step": 16600
1149
+ },
1150
+ {
1151
+ "epoch": 78.77,
1152
+ "learning_rate": 1.8354651162790695e-05,
1153
+ "loss": 0.8202,
1154
+ "step": 16700
1155
+ },
1156
+ {
1157
+ "epoch": 79.25,
1158
+ "learning_rate": 1.795174418604651e-05,
1159
+ "loss": 0.8222,
1160
+ "step": 16800
1161
+ },
1162
+ {
1163
+ "epoch": 79.72,
1164
+ "learning_rate": 1.7544767441860464e-05,
1165
+ "loss": 0.811,
1166
+ "step": 16900
1167
+ },
1168
+ {
1169
+ "epoch": 80.19,
1170
+ "learning_rate": 1.7137790697674414e-05,
1171
+ "loss": 0.8195,
1172
+ "step": 17000
1173
+ },
1174
+ {
1175
+ "epoch": 80.19,
1176
+ "eval_loss": 0.2725354731082916,
1177
+ "eval_runtime": 94.0258,
1178
+ "eval_samples_per_second": 32.31,
1179
+ "eval_steps_per_second": 1.01,
1180
+ "eval_wer": 0.32856743926277576,
1181
+ "step": 17000
1182
+ },
1183
+ {
1184
+ "epoch": 80.66,
1185
+ "learning_rate": 1.673081395348837e-05,
1186
+ "loss": 0.8053,
1187
+ "step": 17100
1188
+ },
1189
+ {
1190
+ "epoch": 81.13,
1191
+ "learning_rate": 1.6323837209302325e-05,
1192
+ "loss": 0.8152,
1193
+ "step": 17200
1194
+ },
1195
+ {
1196
+ "epoch": 81.6,
1197
+ "learning_rate": 1.5916860465116278e-05,
1198
+ "loss": 0.8074,
1199
+ "step": 17300
1200
+ },
1201
+ {
1202
+ "epoch": 82.08,
1203
+ "learning_rate": 1.550988372093023e-05,
1204
+ "loss": 0.8179,
1205
+ "step": 17400
1206
+ },
1207
+ {
1208
+ "epoch": 82.55,
1209
+ "learning_rate": 1.5102906976744183e-05,
1210
+ "loss": 0.8078,
1211
+ "step": 17500
1212
+ },
1213
+ {
1214
+ "epoch": 83.02,
1215
+ "learning_rate": 1.4695930232558139e-05,
1216
+ "loss": 0.8078,
1217
+ "step": 17600
1218
+ },
1219
+ {
1220
+ "epoch": 83.49,
1221
+ "learning_rate": 1.4288953488372092e-05,
1222
+ "loss": 0.8061,
1223
+ "step": 17700
1224
+ },
1225
+ {
1226
+ "epoch": 83.96,
1227
+ "learning_rate": 1.3881976744186044e-05,
1228
+ "loss": 0.813,
1229
+ "step": 17800
1230
+ },
1231
+ {
1232
+ "epoch": 84.43,
1233
+ "learning_rate": 1.3474999999999999e-05,
1234
+ "loss": 0.7891,
1235
+ "step": 17900
1236
+ },
1237
+ {
1238
+ "epoch": 84.91,
1239
+ "learning_rate": 1.3068023255813952e-05,
1240
+ "loss": 0.7994,
1241
+ "step": 18000
1242
+ },
1243
+ {
1244
+ "epoch": 84.91,
1245
+ "eval_loss": 0.2781510651111603,
1246
+ "eval_runtime": 92.9473,
1247
+ "eval_samples_per_second": 32.685,
1248
+ "eval_steps_per_second": 1.022,
1249
+ "eval_wer": 0.33147165596202177,
1250
+ "step": 18000
1251
+ },
1252
+ {
1253
+ "epoch": 85.38,
1254
+ "learning_rate": 1.2661046511627907e-05,
1255
+ "loss": 0.8011,
1256
+ "step": 18100
1257
+ },
1258
+ {
1259
+ "epoch": 85.85,
1260
+ "learning_rate": 1.225406976744186e-05,
1261
+ "loss": 0.7886,
1262
+ "step": 18200
1263
+ },
1264
+ {
1265
+ "epoch": 86.32,
1266
+ "learning_rate": 1.1847093023255813e-05,
1267
+ "loss": 0.8107,
1268
+ "step": 18300
1269
+ },
1270
+ {
1271
+ "epoch": 86.79,
1272
+ "learning_rate": 1.1440116279069768e-05,
1273
+ "loss": 0.7976,
1274
+ "step": 18400
1275
+ },
1276
+ {
1277
+ "epoch": 87.26,
1278
+ "learning_rate": 1.103313953488372e-05,
1279
+ "loss": 0.7871,
1280
+ "step": 18500
1281
+ },
1282
+ {
1283
+ "epoch": 87.74,
1284
+ "learning_rate": 1.0626162790697673e-05,
1285
+ "loss": 0.7922,
1286
+ "step": 18600
1287
+ },
1288
+ {
1289
+ "epoch": 88.21,
1290
+ "learning_rate": 1.0219186046511628e-05,
1291
+ "loss": 0.7826,
1292
+ "step": 18700
1293
+ },
1294
+ {
1295
+ "epoch": 88.68,
1296
+ "learning_rate": 9.81220930232558e-06,
1297
+ "loss": 0.7901,
1298
+ "step": 18800
1299
+ },
1300
+ {
1301
+ "epoch": 89.15,
1302
+ "learning_rate": 9.405232558139535e-06,
1303
+ "loss": 0.793,
1304
+ "step": 18900
1305
+ },
1306
+ {
1307
+ "epoch": 89.62,
1308
+ "learning_rate": 8.998255813953488e-06,
1309
+ "loss": 0.7816,
1310
+ "step": 19000
1311
+ },
1312
+ {
1313
+ "epoch": 89.62,
1314
+ "eval_loss": 0.2774805724620819,
1315
+ "eval_runtime": 94.1962,
1316
+ "eval_samples_per_second": 32.252,
1317
+ "eval_steps_per_second": 1.009,
1318
+ "eval_wer": 0.3363306339011449,
1319
+ "step": 19000
1320
+ },
1321
+ {
1322
+ "epoch": 90.09,
1323
+ "learning_rate": 8.591279069767442e-06,
1324
+ "loss": 0.7922,
1325
+ "step": 19100
1326
+ },
1327
+ {
1328
+ "epoch": 90.57,
1329
+ "learning_rate": 8.188372093023255e-06,
1330
+ "loss": 0.7937,
1331
+ "step": 19200
1332
+ },
1333
+ {
1334
+ "epoch": 91.04,
1335
+ "learning_rate": 7.781395348837208e-06,
1336
+ "loss": 0.784,
1337
+ "step": 19300
1338
+ },
1339
+ {
1340
+ "epoch": 91.51,
1341
+ "learning_rate": 7.374418604651162e-06,
1342
+ "loss": 0.7811,
1343
+ "step": 19400
1344
+ },
1345
+ {
1346
+ "epoch": 91.98,
1347
+ "learning_rate": 6.967441860465116e-06,
1348
+ "loss": 0.7784,
1349
+ "step": 19500
1350
+ },
1351
+ {
1352
+ "epoch": 92.45,
1353
+ "learning_rate": 6.560465116279069e-06,
1354
+ "loss": 0.789,
1355
+ "step": 19600
1356
+ },
1357
+ {
1358
+ "epoch": 92.92,
1359
+ "learning_rate": 6.153488372093022e-06,
1360
+ "loss": 0.7785,
1361
+ "step": 19700
1362
+ },
1363
+ {
1364
+ "epoch": 93.4,
1365
+ "learning_rate": 5.7465116279069765e-06,
1366
+ "loss": 0.7799,
1367
+ "step": 19800
1368
+ },
1369
+ {
1370
+ "epoch": 93.87,
1371
+ "learning_rate": 5.33953488372093e-06,
1372
+ "loss": 0.7784,
1373
+ "step": 19900
1374
+ },
1375
+ {
1376
+ "epoch": 94.34,
1377
+ "learning_rate": 4.9325581395348825e-06,
1378
+ "loss": 0.7816,
1379
+ "step": 20000
1380
+ },
1381
+ {
1382
+ "epoch": 94.34,
1383
+ "eval_loss": 0.2731296420097351,
1384
+ "eval_runtime": 94.2664,
1385
+ "eval_samples_per_second": 32.228,
1386
+ "eval_steps_per_second": 1.008,
1387
+ "eval_wer": 0.32784138508796423,
1388
+ "step": 20000
1389
+ },
1390
+ {
1391
+ "epoch": 94.81,
1392
+ "learning_rate": 4.525581395348837e-06,
1393
+ "loss": 0.7788,
1394
+ "step": 20100
1395
+ },
1396
+ {
1397
+ "epoch": 95.28,
1398
+ "learning_rate": 4.11860465116279e-06,
1399
+ "loss": 0.781,
1400
+ "step": 20200
1401
+ },
1402
+ {
1403
+ "epoch": 95.75,
1404
+ "learning_rate": 3.7116279069767437e-06,
1405
+ "loss": 0.785,
1406
+ "step": 20300
1407
+ },
1408
+ {
1409
+ "epoch": 96.23,
1410
+ "learning_rate": 3.3046511627906976e-06,
1411
+ "loss": 0.7664,
1412
+ "step": 20400
1413
+ },
1414
+ {
1415
+ "epoch": 96.7,
1416
+ "learning_rate": 2.8976744186046506e-06,
1417
+ "loss": 0.7771,
1418
+ "step": 20500
1419
+ },
1420
+ {
1421
+ "epoch": 97.17,
1422
+ "learning_rate": 2.4906976744186045e-06,
1423
+ "loss": 0.7848,
1424
+ "step": 20600
1425
+ },
1426
+ {
1427
+ "epoch": 97.64,
1428
+ "learning_rate": 2.083720930232558e-06,
1429
+ "loss": 0.765,
1430
+ "step": 20700
1431
+ },
1432
+ {
1433
+ "epoch": 98.11,
1434
+ "learning_rate": 1.6767441860465114e-06,
1435
+ "loss": 0.7877,
1436
+ "step": 20800
1437
+ },
1438
+ {
1439
+ "epoch": 98.58,
1440
+ "learning_rate": 1.2697674418604648e-06,
1441
+ "loss": 0.7681,
1442
+ "step": 20900
1443
+ },
1444
+ {
1445
+ "epoch": 99.06,
1446
+ "learning_rate": 8.627906976744186e-07,
1447
+ "loss": 0.7635,
1448
+ "step": 21000
1449
+ },
1450
+ {
1451
+ "epoch": 99.06,
1452
+ "eval_loss": 0.276745080947876,
1453
+ "eval_runtime": 95.4293,
1454
+ "eval_samples_per_second": 31.835,
1455
+ "eval_steps_per_second": 0.996,
1456
+ "eval_wer": 0.32594247416922645,
1457
+ "step": 21000
1458
+ },
1459
+ {
1460
+ "epoch": 99.53,
1461
+ "learning_rate": 4.558139534883721e-07,
1462
+ "loss": 0.7818,
1463
+ "step": 21100
1464
+ },
1465
+ {
1466
+ "epoch": 100.0,
1467
+ "learning_rate": 4.883720930232558e-08,
1468
+ "loss": 0.7716,
1469
+ "step": 21200
1470
+ },
1471
+ {
1472
+ "epoch": 100.0,
1473
+ "step": 21200,
1474
+ "total_flos": 7.799992701307658e+19,
1475
+ "train_loss": 1.235663890838623,
1476
+ "train_runtime": 30132.3883,
1477
+ "train_samples_per_second": 22.464,
1478
+ "train_steps_per_second": 0.704
1479
  }
1480
  ],
1481
+ "max_steps": 21200,
1482
+ "num_train_epochs": 100,
1483
+ "total_flos": 7.799992701307658e+19,
1484
  "trial_name": null,
1485
  "trial_params": null
1486
  }