lucio commited on
Commit
e818b32
1 Parent(s): bea1a8e

End of training

Browse files
all_results.json CHANGED
@@ -1,14 +1,14 @@
1
  {
2
  "epoch": 100.0,
3
- "eval_loss": 0.20362737774848938,
4
- "eval_runtime": 146.0593,
5
- "eval_samples": 2744,
6
- "eval_samples_per_second": 18.787,
7
- "eval_steps_per_second": 2.348,
8
- "eval_wer": 0.2976980458560249,
9
- "train_loss": 1.1418190615227881,
10
- "train_runtime": 52190.9896,
11
- "train_samples": 6034,
12
- "train_samples_per_second": 11.561,
13
- "train_steps_per_second": 0.36
14
  }
 
1
  {
2
  "epoch": 100.0,
3
+ "eval_loss": 0.21627500653266907,
4
+ "eval_runtime": 133.3587,
5
+ "eval_samples": 2742,
6
+ "eval_samples_per_second": 20.561,
7
+ "eval_steps_per_second": 2.572,
8
+ "eval_wer": 0.32487632188081517,
9
+ "train_loss": 1.3660302423273476,
10
+ "train_runtime": 26867.6077,
11
+ "train_samples": 3292,
12
+ "train_samples_per_second": 12.253,
13
+ "train_steps_per_second": 0.383
14
  }
eval_results.json CHANGED
@@ -1,9 +1,9 @@
1
  {
2
  "epoch": 100.0,
3
- "eval_loss": 0.20362737774848938,
4
- "eval_runtime": 146.0593,
5
- "eval_samples": 2744,
6
- "eval_samples_per_second": 18.787,
7
- "eval_steps_per_second": 2.348,
8
- "eval_wer": 0.2976980458560249
9
  }
 
1
  {
2
  "epoch": 100.0,
3
+ "eval_loss": 0.21627500653266907,
4
+ "eval_runtime": 133.3587,
5
+ "eval_samples": 2742,
6
+ "eval_samples_per_second": 20.561,
7
+ "eval_steps_per_second": 2.572,
8
+ "eval_wer": 0.32487632188081517
9
  }
runs/Feb03_08-05-51_job-699ba53c-fea9-4eb2-81af-a97f440eaa45/events.out.tfevents.1643902743.job-699ba53c-fea9-4eb2-81af-a97f440eaa45.626095.2 ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:1ca1948e90401480b87dead8c3b414610af200d57c96040f29802725beeeac19
3
+ size 358
train_results.json CHANGED
@@ -1,8 +1,8 @@
1
  {
2
  "epoch": 100.0,
3
- "train_loss": 1.1418190615227881,
4
- "train_runtime": 52190.9896,
5
- "train_samples": 6034,
6
- "train_samples_per_second": 11.561,
7
- "train_steps_per_second": 0.36
8
  }
 
1
  {
2
  "epoch": 100.0,
3
+ "train_loss": 1.3660302423273476,
4
+ "train_runtime": 26867.6077,
5
+ "train_samples": 3292,
6
+ "train_samples_per_second": 12.253,
7
+ "train_steps_per_second": 0.383
8
  }
trainer_state.json CHANGED
@@ -1,1486 +1,823 @@
1
  {
2
  "best_metric": null,
3
  "best_model_checkpoint": null,
4
- "epoch": 99.99602649006623,
5
- "global_step": 18800,
6
  "is_hyper_param_search": false,
7
  "is_local_process_zero": true,
8
  "is_world_process_zero": true,
9
  "log_history": [
10
  {
11
- "epoch": 0.53,
12
  "learning_rate": 4.950000000000001e-06,
13
- "loss": 13.7083,
14
  "step": 100
15
  },
16
  {
17
- "epoch": 1.06,
18
  "learning_rate": 9.950000000000001e-06,
19
- "loss": 5.8958,
20
  "step": 200
21
  },
22
  {
23
- "epoch": 1.59,
24
  "learning_rate": 1.4950000000000001e-05,
25
- "loss": 4.115,
26
  "step": 300
27
  },
28
  {
29
- "epoch": 2.13,
30
  "learning_rate": 1.995e-05,
31
- "loss": 3.6068,
32
  "step": 400
33
  },
34
  {
35
- "epoch": 2.66,
36
  "learning_rate": 2.495e-05,
37
- "loss": 3.2892,
38
  "step": 500
39
  },
40
  {
41
- "epoch": 2.66,
42
- "eval_loss": 3.241530179977417,
43
- "eval_runtime": 149.1051,
44
- "eval_samples_per_second": 18.403,
45
- "eval_steps_per_second": 2.3,
46
  "eval_wer": 1.0,
47
  "step": 500
48
  },
49
  {
50
- "epoch": 3.19,
51
  "learning_rate": 2.995e-05,
52
- "loss": 3.2316,
53
  "step": 600
54
  },
55
  {
56
- "epoch": 3.72,
57
  "learning_rate": 3.495e-05,
58
- "loss": 3.1529,
59
  "step": 700
60
  },
61
  {
62
- "epoch": 4.25,
63
  "learning_rate": 3.995e-05,
64
- "loss": 3.1279,
65
  "step": 800
66
  },
67
  {
68
- "epoch": 4.78,
69
  "learning_rate": 4.495e-05,
70
- "loss": 3.0647,
71
  "step": 900
72
  },
73
  {
74
- "epoch": 5.32,
75
  "learning_rate": 4.995e-05,
76
- "loss": 2.9206,
77
  "step": 1000
78
  },
79
  {
80
- "epoch": 5.32,
81
- "eval_loss": 2.4381155967712402,
82
- "eval_runtime": 158.7474,
83
- "eval_samples_per_second": 17.285,
84
- "eval_steps_per_second": 2.161,
85
- "eval_wer": 1.0055832685002974,
86
  "step": 1000
87
  },
88
  {
89
- "epoch": 5.85,
90
  "learning_rate": 5.495e-05,
91
- "loss": 2.2804,
92
  "step": 1100
93
  },
94
  {
95
- "epoch": 6.38,
96
  "learning_rate": 5.995000000000001e-05,
97
- "loss": 1.8138,
98
  "step": 1200
99
  },
100
  {
101
- "epoch": 6.91,
102
- "learning_rate": 6.49e-05,
103
- "loss": 1.6251,
104
  "step": 1300
105
  },
106
  {
107
- "epoch": 7.45,
108
- "learning_rate": 6.99e-05,
109
- "loss": 1.5483,
110
  "step": 1400
111
  },
112
  {
113
- "epoch": 7.97,
114
- "learning_rate": 7.49e-05,
115
- "loss": 1.4909,
116
  "step": 1500
117
  },
118
  {
119
- "epoch": 7.97,
120
- "eval_loss": 0.5427731275558472,
121
- "eval_runtime": 150.2181,
122
- "eval_samples_per_second": 18.267,
123
- "eval_steps_per_second": 2.283,
124
- "eval_wer": 0.6704956294906411,
125
  "step": 1500
126
  },
127
  {
128
- "epoch": 8.51,
129
- "learning_rate": 7.99e-05,
130
- "loss": 1.4577,
131
  "step": 1600
132
  },
133
  {
134
- "epoch": 9.04,
135
- "learning_rate": 8.49e-05,
136
- "loss": 1.4197,
137
  "step": 1700
138
  },
139
  {
140
- "epoch": 9.57,
141
- "learning_rate": 8.985e-05,
142
- "loss": 1.3877,
143
  "step": 1800
144
  },
145
  {
146
- "epoch": 10.11,
147
- "learning_rate": 9.485e-05,
148
- "loss": 1.3704,
149
  "step": 1900
150
  },
151
  {
152
- "epoch": 10.64,
153
- "learning_rate": 9.985000000000001e-05,
154
- "loss": 1.3395,
155
  "step": 2000
156
  },
157
  {
158
- "epoch": 10.64,
159
- "eval_loss": 0.4207160472869873,
160
- "eval_runtime": 153.5378,
161
- "eval_samples_per_second": 17.872,
162
- "eval_steps_per_second": 2.234,
163
- "eval_wer": 0.5995148963434168,
164
  "step": 2000
165
  },
166
  {
167
- "epoch": 11.17,
168
- "learning_rate": 9.942261904761904e-05,
169
- "loss": 1.3349,
170
  "step": 2100
171
  },
172
  {
173
- "epoch": 11.7,
174
- "learning_rate": 9.882738095238095e-05,
175
- "loss": 1.3064,
176
  "step": 2200
177
  },
178
  {
179
- "epoch": 12.23,
180
- "learning_rate": 9.823214285714287e-05,
181
- "loss": 1.3132,
182
  "step": 2300
183
  },
184
  {
185
- "epoch": 12.76,
186
- "learning_rate": 9.763690476190477e-05,
187
- "loss": 1.2813,
188
  "step": 2400
189
  },
190
  {
191
- "epoch": 13.3,
192
- "learning_rate": 9.704166666666668e-05,
193
- "loss": 1.2718,
194
  "step": 2500
195
  },
196
  {
197
- "epoch": 13.3,
198
- "eval_loss": 0.37430423498153687,
199
- "eval_runtime": 148.1352,
200
- "eval_samples_per_second": 18.524,
201
- "eval_steps_per_second": 2.315,
202
- "eval_wer": 0.5648254084481259,
203
  "step": 2500
204
  },
205
  {
206
- "epoch": 13.83,
207
- "learning_rate": 9.644642857142857e-05,
208
- "loss": 1.247,
209
  "step": 2600
210
  },
211
  {
212
- "epoch": 14.36,
213
- "learning_rate": 9.585119047619047e-05,
214
- "loss": 1.2446,
215
  "step": 2700
216
  },
217
  {
218
- "epoch": 14.89,
219
- "learning_rate": 9.525595238095239e-05,
220
- "loss": 1.2404,
221
  "step": 2800
222
  },
223
  {
224
- "epoch": 15.42,
225
- "learning_rate": 9.46607142857143e-05,
226
- "loss": 1.234,
227
  "step": 2900
228
  },
229
  {
230
- "epoch": 15.95,
231
- "learning_rate": 9.40654761904762e-05,
232
- "loss": 1.1798,
233
  "step": 3000
234
  },
235
  {
236
- "epoch": 15.95,
237
- "eval_loss": 0.32250717282295227,
238
- "eval_runtime": 150.8327,
239
- "eval_samples_per_second": 18.192,
240
- "eval_steps_per_second": 2.274,
241
- "eval_wer": 0.4927005629032996,
242
  "step": 3000
243
  },
244
  {
245
- "epoch": 16.49,
246
- "learning_rate": 9.34702380952381e-05,
247
- "loss": 1.1849,
248
  "step": 3100
249
  },
250
  {
251
- "epoch": 17.02,
252
- "learning_rate": 9.2875e-05,
253
- "loss": 1.1931,
254
  "step": 3200
255
  },
256
  {
257
- "epoch": 17.55,
258
- "learning_rate": 9.227976190476191e-05,
259
- "loss": 1.1628,
260
  "step": 3300
261
  },
262
  {
263
- "epoch": 18.08,
264
- "learning_rate": 9.168452380952382e-05,
265
- "loss": 1.1743,
266
  "step": 3400
267
  },
268
  {
269
- "epoch": 18.61,
270
- "learning_rate": 9.108928571428572e-05,
271
- "loss": 1.1392,
272
  "step": 3500
273
  },
274
  {
275
- "epoch": 18.61,
276
- "eval_loss": 0.3096984922885895,
277
- "eval_runtime": 151.6787,
278
- "eval_samples_per_second": 18.091,
279
- "eval_steps_per_second": 2.261,
280
- "eval_wer": 0.4626790535902247,
281
  "step": 3500
282
  },
283
  {
284
- "epoch": 19.15,
285
- "learning_rate": 9.049404761904763e-05,
286
- "loss": 1.1458,
287
  "step": 3600
288
  },
289
  {
290
- "epoch": 19.68,
291
- "learning_rate": 8.989880952380953e-05,
292
- "loss": 1.1323,
293
  "step": 3700
294
  },
295
  {
296
- "epoch": 20.21,
297
- "learning_rate": 8.930357142857143e-05,
298
- "loss": 1.1293,
299
  "step": 3800
300
  },
301
  {
302
- "epoch": 20.74,
303
- "learning_rate": 8.870833333333334e-05,
304
- "loss": 1.1179,
305
  "step": 3900
306
  },
307
  {
308
- "epoch": 21.28,
309
- "learning_rate": 8.811309523809524e-05,
310
- "loss": 1.1143,
311
  "step": 4000
312
  },
313
  {
314
- "epoch": 21.28,
315
- "eval_loss": 0.29957136511802673,
316
- "eval_runtime": 147.8067,
317
- "eval_samples_per_second": 18.565,
318
- "eval_steps_per_second": 2.321,
319
- "eval_wer": 0.450459933183836,
320
  "step": 4000
321
  },
322
  {
323
- "epoch": 21.81,
324
- "learning_rate": 8.751785714285715e-05,
325
- "loss": 1.1121,
326
  "step": 4100
327
  },
328
  {
329
- "epoch": 22.34,
330
- "learning_rate": 8.692261904761905e-05,
331
- "loss": 1.1049,
332
  "step": 4200
333
  },
334
  {
335
- "epoch": 22.87,
336
- "learning_rate": 8.632738095238096e-05,
337
- "loss": 1.092,
338
  "step": 4300
339
  },
340
  {
341
- "epoch": 23.4,
342
- "learning_rate": 8.573214285714286e-05,
343
- "loss": 1.0936,
344
  "step": 4400
345
  },
346
  {
347
- "epoch": 23.93,
348
- "learning_rate": 8.513690476190477e-05,
349
- "loss": 1.0923,
350
  "step": 4500
351
  },
352
  {
353
- "epoch": 23.93,
354
- "eval_loss": 0.2841183543205261,
355
- "eval_runtime": 145.4218,
356
- "eval_samples_per_second": 18.869,
357
- "eval_steps_per_second": 2.359,
358
- "eval_wer": 0.4229097066495813,
359
  "step": 4500
360
  },
361
  {
362
- "epoch": 24.47,
363
- "learning_rate": 8.454166666666667e-05,
364
- "loss": 1.0748,
365
  "step": 4600
366
  },
367
  {
368
- "epoch": 25.0,
369
- "learning_rate": 8.395238095238095e-05,
370
- "loss": 1.0786,
371
  "step": 4700
372
  },
373
  {
374
- "epoch": 25.53,
375
- "learning_rate": 8.335714285714286e-05,
376
- "loss": 1.0685,
377
  "step": 4800
378
  },
379
  {
380
- "epoch": 26.06,
381
- "learning_rate": 8.276190476190476e-05,
382
- "loss": 1.0681,
383
  "step": 4900
384
  },
385
  {
386
- "epoch": 26.59,
387
- "learning_rate": 8.216666666666667e-05,
388
- "loss": 1.0516,
389
  "step": 5000
390
  },
391
  {
392
- "epoch": 26.59,
393
- "eval_loss": 0.2705024182796478,
394
- "eval_runtime": 156.97,
395
- "eval_samples_per_second": 17.481,
396
- "eval_steps_per_second": 2.185,
397
- "eval_wer": 0.4113312891858496,
398
  "step": 5000
399
  },
400
  {
401
- "epoch": 27.13,
402
- "learning_rate": 8.157142857142857e-05,
403
- "loss": 1.0631,
404
  "step": 5100
405
  },
406
  {
407
- "epoch": 27.66,
408
- "learning_rate": 8.097619047619049e-05,
409
- "loss": 1.0438,
410
  "step": 5200
411
  },
412
  {
413
- "epoch": 28.19,
414
- "learning_rate": 8.03809523809524e-05,
415
- "loss": 1.0437,
416
  "step": 5300
417
  },
418
  {
419
- "epoch": 28.72,
420
- "learning_rate": 7.978571428571429e-05,
421
- "loss": 1.0334,
422
  "step": 5400
423
  },
424
  {
425
- "epoch": 29.25,
426
- "learning_rate": 7.919047619047619e-05,
427
- "loss": 1.051,
428
  "step": 5500
429
  },
430
  {
431
- "epoch": 29.25,
432
- "eval_loss": 0.26215311884880066,
433
- "eval_runtime": 149.1886,
434
- "eval_samples_per_second": 18.393,
435
- "eval_steps_per_second": 2.299,
436
- "eval_wer": 0.4078074230012356,
437
  "step": 5500
438
  },
439
  {
440
- "epoch": 29.78,
441
- "learning_rate": 7.85952380952381e-05,
442
- "loss": 1.0209,
443
  "step": 5600
444
  },
445
  {
446
- "epoch": 30.32,
447
- "learning_rate": 7.800000000000001e-05,
448
- "loss": 1.0393,
449
  "step": 5700
450
  },
451
  {
452
- "epoch": 30.85,
453
- "learning_rate": 7.740476190476192e-05,
454
- "loss": 1.0175,
455
  "step": 5800
456
  },
457
  {
458
- "epoch": 31.38,
459
- "learning_rate": 7.680952380952381e-05,
460
- "loss": 1.0208,
461
  "step": 5900
462
  },
463
  {
464
- "epoch": 31.91,
465
- "learning_rate": 7.621428571428571e-05,
466
- "loss": 1.021,
467
  "step": 6000
468
  },
469
  {
470
- "epoch": 31.91,
471
- "eval_loss": 0.26111042499542236,
472
- "eval_runtime": 144.8158,
473
- "eval_samples_per_second": 18.948,
474
- "eval_steps_per_second": 2.369,
475
- "eval_wer": 0.40085121962381587,
476
  "step": 6000
477
  },
478
  {
479
- "epoch": 32.45,
480
- "learning_rate": 7.561904761904762e-05,
481
- "loss": 1.0091,
482
  "step": 6100
483
  },
484
  {
485
- "epoch": 32.97,
486
- "learning_rate": 7.502380952380953e-05,
487
- "loss": 1.0007,
488
  "step": 6200
489
  },
490
  {
491
- "epoch": 33.51,
492
- "learning_rate": 7.442857142857144e-05,
493
- "loss": 1.0141,
494
  "step": 6300
495
  },
496
  {
497
- "epoch": 34.04,
498
- "learning_rate": 7.383333333333333e-05,
499
- "loss": 1.0072,
500
  "step": 6400
501
  },
502
  {
503
- "epoch": 34.57,
504
- "learning_rate": 7.323809523809523e-05,
505
- "loss": 0.9886,
506
  "step": 6500
507
  },
508
  {
509
- "epoch": 34.57,
510
- "eval_loss": 0.24984091520309448,
511
- "eval_runtime": 146.0157,
512
- "eval_samples_per_second": 18.792,
513
- "eval_steps_per_second": 2.349,
514
- "eval_wer": 0.3920644364102329,
515
  "step": 6500
516
  },
517
  {
518
- "epoch": 35.11,
519
- "learning_rate": 7.264285714285715e-05,
520
- "loss": 0.9955,
521
  "step": 6600
522
  },
523
  {
524
- "epoch": 35.64,
525
- "learning_rate": 7.205357142857144e-05,
526
- "loss": 0.9811,
527
  "step": 6700
528
  },
529
  {
530
- "epoch": 36.17,
531
- "learning_rate": 7.145833333333334e-05,
532
- "loss": 0.987,
533
  "step": 6800
534
  },
535
  {
536
- "epoch": 36.7,
537
- "learning_rate": 7.086309523809524e-05,
538
- "loss": 0.9776,
539
  "step": 6900
540
  },
541
  {
542
- "epoch": 37.23,
543
- "learning_rate": 7.026785714285714e-05,
544
- "loss": 0.984,
545
  "step": 7000
546
  },
547
  {
548
- "epoch": 37.23,
549
- "eval_loss": 0.25214260816574097,
550
- "eval_runtime": 151.9558,
551
- "eval_samples_per_second": 18.058,
552
- "eval_steps_per_second": 2.257,
553
- "eval_wer": 0.38446753009015605,
554
  "step": 7000
555
  },
556
  {
557
- "epoch": 37.76,
558
- "learning_rate": 6.967261904761905e-05,
559
- "loss": 0.98,
560
  "step": 7100
561
  },
562
  {
563
- "epoch": 38.3,
564
- "learning_rate": 6.907738095238096e-05,
565
- "loss": 0.9762,
566
  "step": 7200
567
  },
568
  {
569
- "epoch": 38.83,
570
- "learning_rate": 6.848214285714286e-05,
571
- "loss": 0.9601,
572
  "step": 7300
573
  },
574
  {
575
- "epoch": 39.36,
576
- "learning_rate": 6.788690476190477e-05,
577
- "loss": 0.9633,
578
  "step": 7400
579
  },
580
  {
581
- "epoch": 39.89,
582
- "learning_rate": 6.729166666666667e-05,
583
- "loss": 0.9631,
584
  "step": 7500
585
  },
586
  {
587
- "epoch": 39.89,
588
- "eval_loss": 0.24125833809375763,
589
- "eval_runtime": 185.0035,
590
- "eval_samples_per_second": 14.832,
591
- "eval_steps_per_second": 1.854,
592
- "eval_wer": 0.3790673195734749,
593
  "step": 7500
594
  },
595
  {
596
- "epoch": 40.42,
597
- "learning_rate": 6.669642857142858e-05,
598
- "loss": 0.9653,
599
  "step": 7600
600
  },
601
  {
602
- "epoch": 40.95,
603
- "learning_rate": 6.610119047619048e-05,
604
- "loss": 0.9482,
605
  "step": 7700
606
  },
607
  {
608
- "epoch": 41.49,
609
- "learning_rate": 6.550595238095238e-05,
610
- "loss": 0.9547,
611
  "step": 7800
612
  },
613
  {
614
- "epoch": 42.02,
615
- "learning_rate": 6.491071428571429e-05,
616
- "loss": 0.9635,
617
  "step": 7900
618
  },
619
  {
620
- "epoch": 42.55,
621
- "learning_rate": 6.432142857142857e-05,
622
- "loss": 0.9353,
623
  "step": 8000
624
  },
625
  {
626
- "epoch": 42.55,
627
- "eval_loss": 0.23913756012916565,
628
- "eval_runtime": 145.0257,
629
- "eval_samples_per_second": 18.921,
630
- "eval_steps_per_second": 2.365,
631
- "eval_wer": 0.3611734016749806,
632
  "step": 8000
633
  },
634
  {
635
- "epoch": 43.08,
636
- "learning_rate": 6.372619047619049e-05,
637
- "loss": 0.9484,
638
  "step": 8100
639
  },
640
  {
641
- "epoch": 43.61,
642
- "learning_rate": 6.313095238095238e-05,
643
- "loss": 0.933,
644
  "step": 8200
645
  },
646
  {
647
- "epoch": 44.15,
648
- "learning_rate": 6.253571428571429e-05,
649
- "loss": 0.9315,
650
  "step": 8300
651
  },
652
  {
653
- "epoch": 44.68,
654
- "learning_rate": 6.194047619047619e-05,
655
- "loss": 0.9337,
656
  "step": 8400
657
  },
658
  {
659
- "epoch": 45.21,
660
- "learning_rate": 6.13452380952381e-05,
661
- "loss": 0.922,
662
  "step": 8500
663
  },
664
  {
665
- "epoch": 45.21,
666
- "eval_loss": 0.2362910658121109,
667
- "eval_runtime": 152.2446,
668
- "eval_samples_per_second": 18.024,
669
- "eval_steps_per_second": 2.253,
670
- "eval_wer": 0.3570545970436136,
671
  "step": 8500
672
  },
673
  {
674
- "epoch": 45.74,
675
- "learning_rate": 6.0750000000000006e-05,
676
- "loss": 0.9274,
677
  "step": 8600
678
  },
679
  {
680
- "epoch": 46.28,
681
- "learning_rate": 6.0154761904761904e-05,
682
- "loss": 0.9243,
683
  "step": 8700
684
  },
685
  {
686
- "epoch": 46.81,
687
- "learning_rate": 5.955952380952381e-05,
688
- "loss": 0.9148,
689
  "step": 8800
690
  },
691
  {
692
- "epoch": 47.34,
693
- "learning_rate": 5.896428571428572e-05,
694
- "loss": 0.9309,
695
  "step": 8900
696
  },
697
  {
698
- "epoch": 47.87,
699
- "learning_rate": 5.8369047619047624e-05,
700
- "loss": 0.9116,
701
  "step": 9000
702
  },
703
  {
704
- "epoch": 47.87,
705
- "eval_loss": 0.2284734547138214,
706
- "eval_runtime": 145.6764,
707
- "eval_samples_per_second": 18.836,
708
- "eval_steps_per_second": 2.355,
709
- "eval_wer": 0.366756670175278,
710
  "step": 9000
711
  },
712
  {
713
- "epoch": 48.4,
714
- "learning_rate": 5.777380952380953e-05,
715
- "loss": 0.908,
716
  "step": 9100
717
  },
718
  {
719
- "epoch": 48.93,
720
- "learning_rate": 5.7178571428571426e-05,
721
- "loss": 0.9045,
722
  "step": 9200
723
  },
724
  {
725
- "epoch": 49.47,
726
- "learning_rate": 5.658333333333333e-05,
727
- "loss": 0.9069,
728
  "step": 9300
729
  },
730
  {
731
- "epoch": 50.0,
732
- "learning_rate": 5.598809523809524e-05,
733
- "loss": 0.886,
734
  "step": 9400
735
  },
736
  {
737
- "epoch": 50.53,
738
- "learning_rate": 5.539285714285715e-05,
739
- "loss": 0.8951,
740
  "step": 9500
741
  },
742
  {
743
- "epoch": 50.53,
744
- "eval_loss": 0.22562462091445923,
745
- "eval_runtime": 146.467,
746
- "eval_samples_per_second": 18.735,
747
- "eval_steps_per_second": 2.342,
748
- "eval_wer": 0.3729348771223285,
749
  "step": 9500
750
  },
751
  {
752
- "epoch": 51.06,
753
- "learning_rate": 5.479761904761905e-05,
754
- "loss": 0.9011,
755
  "step": 9600
756
  },
757
  {
758
- "epoch": 51.59,
759
- "learning_rate": 5.420238095238096e-05,
760
- "loss": 0.8858,
761
  "step": 9700
762
  },
763
  {
764
- "epoch": 52.13,
765
- "learning_rate": 5.360714285714285e-05,
766
- "loss": 0.8955,
767
  "step": 9800
768
  },
769
  {
770
- "epoch": 52.66,
771
- "learning_rate": 5.3011904761904765e-05,
772
- "loss": 0.8747,
773
  "step": 9900
774
  },
775
  {
776
- "epoch": 53.19,
777
- "learning_rate": 5.241666666666667e-05,
778
- "loss": 0.8865,
779
  "step": 10000
780
  },
781
  {
782
- "epoch": 53.19,
783
- "eval_loss": 0.22283457219600677,
784
- "eval_runtime": 147.1995,
785
- "eval_samples_per_second": 18.641,
786
- "eval_steps_per_second": 2.33,
787
- "eval_wer": 0.3663447897121413,
788
  "step": 10000
789
  },
790
  {
791
- "epoch": 53.72,
792
- "learning_rate": 5.1821428571428574e-05,
793
- "loss": 0.8671,
794
  "step": 10100
795
  },
796
  {
797
- "epoch": 54.25,
798
- "learning_rate": 5.1226190476190485e-05,
799
- "loss": 0.8771,
800
  "step": 10200
801
  },
802
- {
803
- "epoch": 54.78,
804
- "learning_rate": 5.063690476190477e-05,
805
- "loss": 0.8777,
806
- "step": 10300
807
- },
808
- {
809
- "epoch": 55.32,
810
- "learning_rate": 5.0041666666666666e-05,
811
- "loss": 0.8805,
812
- "step": 10400
813
- },
814
- {
815
- "epoch": 55.85,
816
- "learning_rate": 4.944642857142857e-05,
817
- "loss": 0.8792,
818
- "step": 10500
819
- },
820
- {
821
- "epoch": 55.85,
822
- "eval_loss": 0.22211238741874695,
823
- "eval_runtime": 149.6455,
824
- "eval_samples_per_second": 18.337,
825
- "eval_steps_per_second": 2.292,
826
- "eval_wer": 0.3656125577776761,
827
- "step": 10500
828
- },
829
- {
830
- "epoch": 56.38,
831
- "learning_rate": 4.885119047619048e-05,
832
- "loss": 0.8805,
833
- "step": 10600
834
- },
835
- {
836
- "epoch": 56.91,
837
- "learning_rate": 4.8255952380952386e-05,
838
- "loss": 0.8653,
839
- "step": 10700
840
- },
841
- {
842
- "epoch": 57.45,
843
- "learning_rate": 4.7660714285714284e-05,
844
- "loss": 0.8749,
845
- "step": 10800
846
- },
847
- {
848
- "epoch": 57.97,
849
- "learning_rate": 4.7065476190476195e-05,
850
- "loss": 0.8713,
851
- "step": 10900
852
- },
853
- {
854
- "epoch": 58.51,
855
- "learning_rate": 4.64702380952381e-05,
856
- "loss": 0.8682,
857
- "step": 11000
858
- },
859
- {
860
- "epoch": 58.51,
861
- "eval_loss": 0.22277939319610596,
862
- "eval_runtime": 153.8771,
863
- "eval_samples_per_second": 17.832,
864
- "eval_steps_per_second": 2.229,
865
- "eval_wer": 0.3322960047595076,
866
- "step": 11000
867
- },
868
- {
869
- "epoch": 59.04,
870
- "learning_rate": 4.5875000000000004e-05,
871
- "loss": 0.8556,
872
- "step": 11100
873
- },
874
- {
875
- "epoch": 59.57,
876
- "learning_rate": 4.528571428571429e-05,
877
- "loss": 0.8508,
878
- "step": 11200
879
- },
880
- {
881
- "epoch": 60.11,
882
- "learning_rate": 4.469047619047619e-05,
883
- "loss": 0.8594,
884
- "step": 11300
885
- },
886
- {
887
- "epoch": 60.64,
888
- "learning_rate": 4.4095238095238096e-05,
889
- "loss": 0.8441,
890
- "step": 11400
891
- },
892
- {
893
- "epoch": 61.17,
894
- "learning_rate": 4.35e-05,
895
- "loss": 0.8492,
896
- "step": 11500
897
- },
898
- {
899
- "epoch": 61.17,
900
- "eval_loss": 0.2166604995727539,
901
- "eval_runtime": 147.2191,
902
- "eval_samples_per_second": 18.639,
903
- "eval_steps_per_second": 2.33,
904
- "eval_wer": 0.3446066541577045,
905
- "step": 11500
906
- },
907
- {
908
- "epoch": 61.7,
909
- "learning_rate": 4.290476190476191e-05,
910
- "loss": 0.8353,
911
- "step": 11600
912
- },
913
- {
914
- "epoch": 62.23,
915
- "learning_rate": 4.230952380952381e-05,
916
- "loss": 0.8407,
917
- "step": 11700
918
- },
919
- {
920
- "epoch": 62.76,
921
- "learning_rate": 4.1714285714285714e-05,
922
- "loss": 0.8335,
923
- "step": 11800
924
- },
925
- {
926
- "epoch": 63.3,
927
- "learning_rate": 4.1119047619047625e-05,
928
- "loss": 0.8596,
929
- "step": 11900
930
- },
931
- {
932
- "epoch": 63.83,
933
- "learning_rate": 4.052380952380952e-05,
934
- "loss": 0.8365,
935
- "step": 12000
936
- },
937
- {
938
- "epoch": 63.83,
939
- "eval_loss": 0.21556589007377625,
940
- "eval_runtime": 144.6076,
941
- "eval_samples_per_second": 18.975,
942
- "eval_steps_per_second": 2.372,
943
- "eval_wer": 0.33211294677589126,
944
- "step": 12000
945
- },
946
- {
947
- "epoch": 64.36,
948
- "learning_rate": 3.9928571428571434e-05,
949
- "loss": 0.8412,
950
- "step": 12100
951
- },
952
- {
953
- "epoch": 64.89,
954
- "learning_rate": 3.933333333333333e-05,
955
- "loss": 0.8301,
956
- "step": 12200
957
- },
958
- {
959
- "epoch": 65.42,
960
- "learning_rate": 3.873809523809524e-05,
961
- "loss": 0.8283,
962
- "step": 12300
963
- },
964
- {
965
- "epoch": 65.95,
966
- "learning_rate": 3.814285714285715e-05,
967
- "loss": 0.8257,
968
- "step": 12400
969
- },
970
- {
971
- "epoch": 66.49,
972
- "learning_rate": 3.7547619047619045e-05,
973
- "loss": 0.8298,
974
- "step": 12500
975
- },
976
- {
977
- "epoch": 66.49,
978
- "eval_loss": 0.21417230367660522,
979
- "eval_runtime": 151.8253,
980
- "eval_samples_per_second": 18.073,
981
- "eval_steps_per_second": 2.259,
982
- "eval_wer": 0.3400302045672967,
983
- "step": 12500
984
- },
985
- {
986
- "epoch": 67.02,
987
- "learning_rate": 3.6952380952380956e-05,
988
- "loss": 0.834,
989
- "step": 12600
990
- },
991
- {
992
- "epoch": 67.55,
993
- "learning_rate": 3.6357142857142854e-05,
994
- "loss": 0.8155,
995
- "step": 12700
996
- },
997
- {
998
- "epoch": 68.08,
999
- "learning_rate": 3.5761904761904765e-05,
1000
- "loss": 0.8157,
1001
- "step": 12800
1002
- },
1003
- {
1004
- "epoch": 68.61,
1005
- "learning_rate": 3.516666666666667e-05,
1006
- "loss": 0.8027,
1007
- "step": 12900
1008
- },
1009
- {
1010
- "epoch": 69.15,
1011
- "learning_rate": 3.4571428571428574e-05,
1012
- "loss": 0.808,
1013
- "step": 13000
1014
- },
1015
- {
1016
- "epoch": 69.15,
1017
- "eval_loss": 0.20793649554252625,
1018
- "eval_runtime": 147.4919,
1019
- "eval_samples_per_second": 18.604,
1020
- "eval_steps_per_second": 2.326,
1021
- "eval_wer": 0.31476820282824586,
1022
- "step": 13000
1023
- },
1024
- {
1025
- "epoch": 69.68,
1026
- "learning_rate": 3.397619047619048e-05,
1027
- "loss": 0.807,
1028
- "step": 13100
1029
- },
1030
- {
1031
- "epoch": 70.21,
1032
- "learning_rate": 3.338095238095238e-05,
1033
- "loss": 0.8164,
1034
- "step": 13200
1035
- },
1036
- {
1037
- "epoch": 70.74,
1038
- "learning_rate": 3.278571428571429e-05,
1039
- "loss": 0.7979,
1040
- "step": 13300
1041
- },
1042
- {
1043
- "epoch": 71.28,
1044
- "learning_rate": 3.219047619047619e-05,
1045
- "loss": 0.815,
1046
- "step": 13400
1047
- },
1048
- {
1049
- "epoch": 71.81,
1050
- "learning_rate": 3.15952380952381e-05,
1051
- "loss": 0.7999,
1052
- "step": 13500
1053
- },
1054
- {
1055
- "epoch": 71.81,
1056
- "eval_loss": 0.21165262162685394,
1057
- "eval_runtime": 145.5259,
1058
- "eval_samples_per_second": 18.856,
1059
- "eval_steps_per_second": 2.357,
1060
- "eval_wer": 0.32254816713193907,
1061
- "step": 13500
1062
- },
1063
- {
1064
- "epoch": 72.34,
1065
- "learning_rate": 3.1e-05,
1066
- "loss": 0.8143,
1067
- "step": 13600
1068
- },
1069
- {
1070
- "epoch": 72.87,
1071
- "learning_rate": 3.040476190476191e-05,
1072
- "loss": 0.7952,
1073
- "step": 13700
1074
- },
1075
- {
1076
- "epoch": 73.4,
1077
- "learning_rate": 2.980952380952381e-05,
1078
- "loss": 0.8075,
1079
- "step": 13800
1080
- },
1081
- {
1082
- "epoch": 73.93,
1083
- "learning_rate": 2.9214285714285715e-05,
1084
- "loss": 0.8021,
1085
- "step": 13900
1086
- },
1087
- {
1088
- "epoch": 74.47,
1089
- "learning_rate": 2.8619047619047623e-05,
1090
- "loss": 0.7871,
1091
- "step": 14000
1092
- },
1093
- {
1094
- "epoch": 74.47,
1095
- "eval_loss": 0.2087966501712799,
1096
- "eval_runtime": 154.8982,
1097
- "eval_samples_per_second": 17.715,
1098
- "eval_steps_per_second": 2.214,
1099
- "eval_wer": 0.31742254359068234,
1100
- "step": 14000
1101
- },
1102
- {
1103
- "epoch": 75.0,
1104
- "learning_rate": 2.8023809523809524e-05,
1105
- "loss": 0.7898,
1106
- "step": 14100
1107
- },
1108
- {
1109
- "epoch": 75.53,
1110
- "learning_rate": 2.742857142857143e-05,
1111
- "loss": 0.7972,
1112
- "step": 14200
1113
- },
1114
- {
1115
- "epoch": 76.06,
1116
- "learning_rate": 2.6833333333333333e-05,
1117
- "loss": 0.801,
1118
- "step": 14300
1119
- },
1120
- {
1121
- "epoch": 76.59,
1122
- "learning_rate": 2.623809523809524e-05,
1123
- "loss": 0.786,
1124
- "step": 14400
1125
- },
1126
- {
1127
- "epoch": 77.13,
1128
- "learning_rate": 2.5642857142857145e-05,
1129
- "loss": 0.7858,
1130
- "step": 14500
1131
- },
1132
- {
1133
- "epoch": 77.13,
1134
- "eval_loss": 0.2059505730867386,
1135
- "eval_runtime": 147.0151,
1136
- "eval_samples_per_second": 18.665,
1137
- "eval_steps_per_second": 2.333,
1138
- "eval_wer": 0.3008100315775022,
1139
- "step": 14500
1140
- },
1141
- {
1142
- "epoch": 77.66,
1143
- "learning_rate": 2.5047619047619046e-05,
1144
- "loss": 0.7753,
1145
- "step": 14600
1146
- },
1147
- {
1148
- "epoch": 78.19,
1149
- "learning_rate": 2.4452380952380954e-05,
1150
- "loss": 0.7794,
1151
- "step": 14700
1152
- },
1153
- {
1154
- "epoch": 78.72,
1155
- "learning_rate": 2.385714285714286e-05,
1156
- "loss": 0.7775,
1157
- "step": 14800
1158
- },
1159
- {
1160
- "epoch": 79.25,
1161
- "learning_rate": 2.3261904761904763e-05,
1162
- "loss": 0.7753,
1163
- "step": 14900
1164
- },
1165
- {
1166
- "epoch": 79.78,
1167
- "learning_rate": 2.2666666666666668e-05,
1168
- "loss": 0.7764,
1169
- "step": 15000
1170
- },
1171
- {
1172
- "epoch": 79.78,
1173
- "eval_loss": 0.2128456085920334,
1174
- "eval_runtime": 150.2974,
1175
- "eval_samples_per_second": 18.257,
1176
- "eval_steps_per_second": 2.282,
1177
- "eval_wer": 0.3145851448446295,
1178
- "step": 15000
1179
- },
1180
- {
1181
- "epoch": 80.32,
1182
- "learning_rate": 2.2071428571428572e-05,
1183
- "loss": 0.7847,
1184
- "step": 15100
1185
- },
1186
- {
1187
- "epoch": 80.85,
1188
- "learning_rate": 2.1476190476190477e-05,
1189
- "loss": 0.774,
1190
- "step": 15200
1191
- },
1192
- {
1193
- "epoch": 81.38,
1194
- "learning_rate": 2.0886904761904763e-05,
1195
- "loss": 0.7739,
1196
- "step": 15300
1197
- },
1198
- {
1199
- "epoch": 81.91,
1200
- "learning_rate": 2.0291666666666667e-05,
1201
- "loss": 0.7579,
1202
- "step": 15400
1203
- },
1204
- {
1205
- "epoch": 82.45,
1206
- "learning_rate": 1.9696428571428572e-05,
1207
- "loss": 0.7684,
1208
- "step": 15500
1209
- },
1210
- {
1211
- "epoch": 82.45,
1212
- "eval_loss": 0.20856936275959015,
1213
- "eval_runtime": 149.2381,
1214
- "eval_samples_per_second": 18.387,
1215
- "eval_steps_per_second": 2.298,
1216
- "eval_wer": 0.31005445975012585,
1217
- "step": 15500
1218
- },
1219
- {
1220
- "epoch": 82.97,
1221
- "learning_rate": 1.9101190476190476e-05,
1222
- "loss": 0.7584,
1223
- "step": 15600
1224
- },
1225
- {
1226
- "epoch": 83.51,
1227
- "learning_rate": 1.850595238095238e-05,
1228
- "loss": 0.7653,
1229
- "step": 15700
1230
- },
1231
- {
1232
- "epoch": 84.04,
1233
- "learning_rate": 1.7910714285714285e-05,
1234
- "loss": 0.7628,
1235
- "step": 15800
1236
- },
1237
- {
1238
- "epoch": 84.57,
1239
- "learning_rate": 1.731547619047619e-05,
1240
- "loss": 0.76,
1241
- "step": 15900
1242
- },
1243
- {
1244
- "epoch": 85.11,
1245
- "learning_rate": 1.6720238095238098e-05,
1246
- "loss": 0.7717,
1247
- "step": 16000
1248
- },
1249
- {
1250
- "epoch": 85.11,
1251
- "eval_loss": 0.20475880801677704,
1252
- "eval_runtime": 151.8607,
1253
- "eval_samples_per_second": 18.069,
1254
- "eval_steps_per_second": 2.259,
1255
- "eval_wer": 0.3068509450368404,
1256
- "step": 16000
1257
- },
1258
- {
1259
- "epoch": 85.64,
1260
- "learning_rate": 1.6125000000000002e-05,
1261
- "loss": 0.7459,
1262
- "step": 16100
1263
- },
1264
- {
1265
- "epoch": 86.17,
1266
- "learning_rate": 1.5529761904761907e-05,
1267
- "loss": 0.7561,
1268
- "step": 16200
1269
- },
1270
- {
1271
- "epoch": 86.7,
1272
- "learning_rate": 1.493452380952381e-05,
1273
- "loss": 0.7457,
1274
- "step": 16300
1275
- },
1276
- {
1277
- "epoch": 87.23,
1278
- "learning_rate": 1.4339285714285716e-05,
1279
- "loss": 0.7599,
1280
- "step": 16400
1281
- },
1282
- {
1283
- "epoch": 87.76,
1284
- "learning_rate": 1.374404761904762e-05,
1285
- "loss": 0.7435,
1286
- "step": 16500
1287
- },
1288
- {
1289
- "epoch": 87.76,
1290
- "eval_loss": 0.20274706184864044,
1291
- "eval_runtime": 146.6164,
1292
- "eval_samples_per_second": 18.716,
1293
- "eval_steps_per_second": 2.339,
1294
- "eval_wer": 0.3054780101597181,
1295
- "step": 16500
1296
- },
1297
- {
1298
- "epoch": 88.3,
1299
- "learning_rate": 1.3148809523809525e-05,
1300
- "loss": 0.7519,
1301
- "step": 16600
1302
- },
1303
- {
1304
- "epoch": 88.83,
1305
- "learning_rate": 1.255357142857143e-05,
1306
- "loss": 0.7483,
1307
- "step": 16700
1308
- },
1309
- {
1310
- "epoch": 89.36,
1311
- "learning_rate": 1.1958333333333334e-05,
1312
- "loss": 0.7483,
1313
- "step": 16800
1314
- },
1315
- {
1316
- "epoch": 89.89,
1317
- "learning_rate": 1.1363095238095238e-05,
1318
- "loss": 0.7353,
1319
- "step": 16900
1320
- },
1321
- {
1322
- "epoch": 90.42,
1323
- "learning_rate": 1.0767857142857143e-05,
1324
- "loss": 0.7378,
1325
- "step": 17000
1326
- },
1327
- {
1328
- "epoch": 90.42,
1329
- "eval_loss": 0.20591045916080475,
1330
- "eval_runtime": 153.244,
1331
- "eval_samples_per_second": 17.906,
1332
- "eval_steps_per_second": 2.238,
1333
- "eval_wer": 0.29925403871676354,
1334
- "step": 17000
1335
- },
1336
- {
1337
- "epoch": 90.95,
1338
- "learning_rate": 1.0172619047619047e-05,
1339
- "loss": 0.7383,
1340
- "step": 17100
1341
- },
1342
- {
1343
- "epoch": 91.49,
1344
- "learning_rate": 9.577380952380953e-06,
1345
- "loss": 0.7466,
1346
- "step": 17200
1347
- },
1348
- {
1349
- "epoch": 92.02,
1350
- "learning_rate": 8.982142857142856e-06,
1351
- "loss": 0.7377,
1352
- "step": 17300
1353
- },
1354
- {
1355
- "epoch": 92.55,
1356
- "learning_rate": 8.386904761904762e-06,
1357
- "loss": 0.7369,
1358
- "step": 17400
1359
- },
1360
- {
1361
- "epoch": 93.08,
1362
- "learning_rate": 7.791666666666667e-06,
1363
- "loss": 0.7406,
1364
- "step": 17500
1365
- },
1366
- {
1367
- "epoch": 93.08,
1368
- "eval_loss": 0.2040216028690338,
1369
- "eval_runtime": 148.029,
1370
- "eval_samples_per_second": 18.537,
1371
- "eval_steps_per_second": 2.317,
1372
- "eval_wer": 0.2966454624502311,
1373
- "step": 17500
1374
- },
1375
- {
1376
- "epoch": 93.61,
1377
- "learning_rate": 7.2023809523809524e-06,
1378
- "loss": 0.7348,
1379
- "step": 17600
1380
- },
1381
- {
1382
- "epoch": 94.15,
1383
- "learning_rate": 6.613095238095239e-06,
1384
- "loss": 0.7356,
1385
- "step": 17700
1386
- },
1387
- {
1388
- "epoch": 94.68,
1389
- "learning_rate": 6.017857142857143e-06,
1390
- "loss": 0.7305,
1391
- "step": 17800
1392
- },
1393
- {
1394
- "epoch": 95.21,
1395
- "learning_rate": 5.422619047619048e-06,
1396
- "loss": 0.741,
1397
- "step": 17900
1398
- },
1399
- {
1400
- "epoch": 95.74,
1401
- "learning_rate": 4.827380952380952e-06,
1402
- "loss": 0.7361,
1403
- "step": 18000
1404
- },
1405
- {
1406
- "epoch": 95.74,
1407
- "eval_loss": 0.20563913881778717,
1408
- "eval_runtime": 145.4837,
1409
- "eval_samples_per_second": 18.861,
1410
- "eval_steps_per_second": 2.358,
1411
- "eval_wer": 0.30003203514713284,
1412
- "step": 18000
1413
- },
1414
- {
1415
- "epoch": 96.28,
1416
- "learning_rate": 4.232142857142858e-06,
1417
- "loss": 0.7359,
1418
- "step": 18100
1419
- },
1420
- {
1421
- "epoch": 96.81,
1422
- "learning_rate": 3.636904761904762e-06,
1423
- "loss": 0.7246,
1424
- "step": 18200
1425
- },
1426
- {
1427
- "epoch": 97.34,
1428
- "learning_rate": 3.041666666666667e-06,
1429
- "loss": 0.7353,
1430
- "step": 18300
1431
- },
1432
- {
1433
- "epoch": 97.87,
1434
- "learning_rate": 2.4464285714285715e-06,
1435
- "loss": 0.7305,
1436
- "step": 18400
1437
- },
1438
- {
1439
- "epoch": 98.4,
1440
- "learning_rate": 1.8511904761904762e-06,
1441
- "loss": 0.7379,
1442
- "step": 18500
1443
- },
1444
- {
1445
- "epoch": 98.4,
1446
- "eval_loss": 0.20308499038219452,
1447
- "eval_runtime": 146.2002,
1448
- "eval_samples_per_second": 18.769,
1449
- "eval_steps_per_second": 2.346,
1450
- "eval_wer": 0.29756075236831264,
1451
- "step": 18500
1452
- },
1453
- {
1454
- "epoch": 98.93,
1455
- "learning_rate": 1.2559523809523812e-06,
1456
- "loss": 0.724,
1457
- "step": 18600
1458
- },
1459
- {
1460
- "epoch": 99.47,
1461
- "learning_rate": 6.607142857142858e-07,
1462
- "loss": 0.7339,
1463
- "step": 18700
1464
- },
1465
  {
1466
  "epoch": 100.0,
1467
- "learning_rate": 6.547619047619047e-08,
1468
- "loss": 0.7277,
1469
- "step": 18800
1470
  },
1471
  {
1472
  "epoch": 100.0,
1473
- "step": 18800,
1474
- "total_flos": 1.0789860816574084e+20,
1475
- "train_loss": 1.1418190615227881,
1476
- "train_runtime": 52190.9896,
1477
- "train_samples_per_second": 11.561,
1478
- "train_steps_per_second": 0.36
1479
  }
1480
  ],
1481
- "max_steps": 18800,
1482
  "num_train_epochs": 100,
1483
- "total_flos": 1.0789860816574084e+20,
1484
  "trial_name": null,
1485
  "trial_params": null
1486
  }
 
1
  {
2
  "best_metric": null,
3
  "best_model_checkpoint": null,
4
+ "epoch": 100.0,
5
+ "global_step": 10300,
6
  "is_hyper_param_search": false,
7
  "is_local_process_zero": true,
8
  "is_world_process_zero": true,
9
  "log_history": [
10
  {
11
+ "epoch": 0.97,
12
  "learning_rate": 4.950000000000001e-06,
13
+ "loss": 13.4586,
14
  "step": 100
15
  },
16
  {
17
+ "epoch": 1.94,
18
  "learning_rate": 9.950000000000001e-06,
19
+ "loss": 5.8722,
20
  "step": 200
21
  },
22
  {
23
+ "epoch": 2.91,
24
  "learning_rate": 1.4950000000000001e-05,
25
+ "loss": 4.0954,
26
  "step": 300
27
  },
28
  {
29
+ "epoch": 3.88,
30
  "learning_rate": 1.995e-05,
31
+ "loss": 3.572,
32
  "step": 400
33
  },
34
  {
35
+ "epoch": 4.85,
36
  "learning_rate": 2.495e-05,
37
+ "loss": 3.2914,
38
  "step": 500
39
  },
40
  {
41
+ "epoch": 4.85,
42
+ "eval_loss": 3.2282841205596924,
43
+ "eval_runtime": 135.7815,
44
+ "eval_samples_per_second": 20.194,
45
+ "eval_steps_per_second": 2.526,
46
  "eval_wer": 1.0,
47
  "step": 500
48
  },
49
  {
50
+ "epoch": 5.83,
51
  "learning_rate": 2.995e-05,
52
+ "loss": 3.2087,
53
  "step": 600
54
  },
55
  {
56
+ "epoch": 6.8,
57
  "learning_rate": 3.495e-05,
58
+ "loss": 3.1802,
59
  "step": 700
60
  },
61
  {
62
+ "epoch": 7.77,
63
  "learning_rate": 3.995e-05,
64
+ "loss": 3.1307,
65
  "step": 800
66
  },
67
  {
68
+ "epoch": 8.74,
69
  "learning_rate": 4.495e-05,
70
+ "loss": 3.0922,
71
  "step": 900
72
  },
73
  {
74
+ "epoch": 9.71,
75
  "learning_rate": 4.995e-05,
76
+ "loss": 3.0068,
77
  "step": 1000
78
  },
79
  {
80
+ "epoch": 9.71,
81
+ "eval_loss": 2.7939300537109375,
82
+ "eval_runtime": 134.6432,
83
+ "eval_samples_per_second": 20.365,
84
+ "eval_steps_per_second": 2.547,
85
+ "eval_wer": 0.997957609040984,
86
  "step": 1000
87
  },
88
  {
89
+ "epoch": 10.68,
90
  "learning_rate": 5.495e-05,
91
+ "loss": 2.5073,
92
  "step": 1100
93
  },
94
  {
95
+ "epoch": 11.65,
96
  "learning_rate": 5.995000000000001e-05,
97
+ "loss": 1.8326,
98
  "step": 1200
99
  },
100
  {
101
+ "epoch": 12.62,
102
+ "learning_rate": 6.494999999999999e-05,
103
+ "loss": 1.598,
104
  "step": 1300
105
  },
106
  {
107
+ "epoch": 13.59,
108
+ "learning_rate": 6.995e-05,
109
+ "loss": 1.5016,
110
  "step": 1400
111
  },
112
  {
113
+ "epoch": 14.56,
114
+ "learning_rate": 7.495e-05,
115
+ "loss": 1.4306,
116
  "step": 1500
117
  },
118
  {
119
+ "epoch": 14.56,
120
+ "eval_loss": 0.48574715852737427,
121
+ "eval_runtime": 133.5239,
122
+ "eval_samples_per_second": 20.536,
123
+ "eval_steps_per_second": 2.569,
124
+ "eval_wer": 0.6313711251304861,
125
  "step": 1500
126
  },
127
  {
128
+ "epoch": 15.53,
129
+ "learning_rate": 7.995e-05,
130
+ "loss": 1.3756,
131
  "step": 1600
132
  },
133
  {
134
+ "epoch": 16.5,
135
+ "learning_rate": 8.495e-05,
136
+ "loss": 1.3583,
137
  "step": 1700
138
  },
139
  {
140
+ "epoch": 17.48,
141
+ "learning_rate": 8.995e-05,
142
+ "loss": 1.3058,
143
  "step": 1800
144
  },
145
  {
146
+ "epoch": 18.45,
147
+ "learning_rate": 9.495e-05,
148
+ "loss": 1.2949,
149
  "step": 1900
150
  },
151
  {
152
+ "epoch": 19.42,
153
+ "learning_rate": 9.995e-05,
154
+ "loss": 1.2831,
155
  "step": 2000
156
  },
157
  {
158
+ "epoch": 19.42,
159
+ "eval_loss": 0.3678707182407379,
160
+ "eval_runtime": 134.1491,
161
+ "eval_samples_per_second": 20.44,
162
+ "eval_steps_per_second": 2.557,
163
+ "eval_wer": 0.6065901148277584,
164
  "step": 2000
165
  },
166
  {
167
+ "epoch": 20.39,
168
+ "learning_rate": 9.880722891566265e-05,
169
+ "loss": 1.2725,
170
  "step": 2100
171
  },
172
  {
173
+ "epoch": 21.36,
174
+ "learning_rate": 9.76144578313253e-05,
175
+ "loss": 1.2436,
176
  "step": 2200
177
  },
178
  {
179
+ "epoch": 22.33,
180
+ "learning_rate": 9.640963855421687e-05,
181
+ "loss": 1.2363,
182
  "step": 2300
183
  },
184
  {
185
+ "epoch": 23.3,
186
+ "learning_rate": 9.521686746987952e-05,
187
+ "loss": 1.2243,
188
  "step": 2400
189
  },
190
  {
191
+ "epoch": 24.27,
192
+ "learning_rate": 9.402409638554217e-05,
193
+ "loss": 1.2065,
194
  "step": 2500
195
  },
196
  {
197
+ "epoch": 24.27,
198
+ "eval_loss": 0.33028003573417664,
199
+ "eval_runtime": 134.2277,
200
+ "eval_samples_per_second": 20.428,
201
+ "eval_steps_per_second": 2.555,
202
+ "eval_wer": 0.5559842055099169,
203
  "step": 2500
204
  },
205
  {
206
+ "epoch": 25.24,
207
+ "learning_rate": 9.281927710843374e-05,
208
+ "loss": 1.192,
209
  "step": 2600
210
  },
211
  {
212
+ "epoch": 26.21,
213
+ "learning_rate": 9.161445783132531e-05,
214
+ "loss": 1.1816,
215
  "step": 2700
216
  },
217
  {
218
+ "epoch": 27.18,
219
+ "learning_rate": 9.040963855421686e-05,
220
+ "loss": 1.1869,
221
  "step": 2800
222
  },
223
  {
224
+ "epoch": 28.16,
225
+ "learning_rate": 8.920481927710844e-05,
226
+ "loss": 1.1728,
227
  "step": 2900
228
  },
229
  {
230
+ "epoch": 29.13,
231
+ "learning_rate": 8.800000000000001e-05,
232
+ "loss": 1.1449,
233
  "step": 3000
234
  },
235
  {
236
+ "epoch": 29.13,
237
+ "eval_loss": 0.3007894456386566,
238
+ "eval_runtime": 133.5503,
239
+ "eval_samples_per_second": 20.532,
240
+ "eval_steps_per_second": 2.568,
241
+ "eval_wer": 0.46902373712159035,
242
  "step": 3000
243
  },
244
  {
245
+ "epoch": 30.1,
246
+ "learning_rate": 8.679518072289157e-05,
247
+ "loss": 1.1408,
248
  "step": 3100
249
  },
250
  {
251
+ "epoch": 31.07,
252
+ "learning_rate": 8.559036144578315e-05,
253
+ "loss": 1.1319,
254
  "step": 3200
255
  },
256
  {
257
+ "epoch": 32.04,
258
+ "learning_rate": 8.43855421686747e-05,
259
+ "loss": 1.1178,
260
  "step": 3300
261
  },
262
  {
263
+ "epoch": 33.01,
264
+ "learning_rate": 8.318072289156627e-05,
265
+ "loss": 1.1122,
266
  "step": 3400
267
  },
268
  {
269
+ "epoch": 33.98,
270
+ "learning_rate": 8.197590361445784e-05,
271
+ "loss": 1.0926,
272
  "step": 3500
273
  },
274
  {
275
+ "epoch": 33.98,
276
+ "eval_loss": 0.28173714876174927,
277
+ "eval_runtime": 132.429,
278
+ "eval_samples_per_second": 20.705,
279
+ "eval_steps_per_second": 2.59,
280
+ "eval_wer": 0.4618980619979122,
281
  "step": 3500
282
  },
283
  {
284
+ "epoch": 34.95,
285
+ "learning_rate": 8.07710843373494e-05,
286
+ "loss": 1.0935,
287
  "step": 3600
288
  },
289
  {
290
+ "epoch": 35.92,
291
+ "learning_rate": 7.956626506024096e-05,
292
+ "loss": 1.0815,
293
  "step": 3700
294
  },
295
  {
296
+ "epoch": 36.89,
297
+ "learning_rate": 7.836144578313254e-05,
298
+ "loss": 1.0856,
299
  "step": 3800
300
  },
301
  {
302
+ "epoch": 37.86,
303
+ "learning_rate": 7.71566265060241e-05,
304
+ "loss": 1.0732,
305
  "step": 3900
306
  },
307
  {
308
+ "epoch": 38.83,
309
+ "learning_rate": 7.595180722891566e-05,
310
+ "loss": 1.0635,
311
  "step": 4000
312
  },
313
  {
314
+ "epoch": 38.83,
315
+ "eval_loss": 0.2665168046951294,
316
+ "eval_runtime": 133.7977,
317
+ "eval_samples_per_second": 20.494,
318
+ "eval_steps_per_second": 2.564,
319
+ "eval_wer": 0.4391140561884446,
320
  "step": 4000
321
  },
322
  {
323
+ "epoch": 39.81,
324
+ "learning_rate": 7.474698795180723e-05,
325
+ "loss": 1.0614,
326
  "step": 4100
327
  },
328
  {
329
+ "epoch": 40.78,
330
+ "learning_rate": 7.35421686746988e-05,
331
+ "loss": 1.0457,
332
  "step": 4200
333
  },
334
  {
335
+ "epoch": 41.75,
336
+ "learning_rate": 7.233734939759036e-05,
337
+ "loss": 1.039,
338
  "step": 4300
339
  },
340
  {
341
+ "epoch": 42.72,
342
+ "learning_rate": 7.113253012048193e-05,
343
+ "loss": 1.0151,
344
  "step": 4400
345
  },
346
  {
347
+ "epoch": 43.69,
348
+ "learning_rate": 6.99277108433735e-05,
349
+ "loss": 1.029,
350
  "step": 4500
351
  },
352
  {
353
+ "epoch": 43.69,
354
+ "eval_loss": 0.26156488060951233,
355
+ "eval_runtime": 133.7699,
356
+ "eval_samples_per_second": 20.498,
357
+ "eval_steps_per_second": 2.564,
358
+ "eval_wer": 0.4175100984886307,
359
  "step": 4500
360
  },
361
  {
362
+ "epoch": 44.66,
363
+ "learning_rate": 6.873493975903614e-05,
364
+ "loss": 1.0254,
365
  "step": 4600
366
  },
367
  {
368
+ "epoch": 45.63,
369
+ "learning_rate": 6.753012048192771e-05,
370
+ "loss": 1.0328,
371
  "step": 4700
372
  },
373
  {
374
+ "epoch": 46.6,
375
+ "learning_rate": 6.632530120481928e-05,
376
+ "loss": 1.022,
377
  "step": 4800
378
  },
379
  {
380
+ "epoch": 47.57,
381
+ "learning_rate": 6.512048192771085e-05,
382
+ "loss": 1.0021,
383
  "step": 4900
384
  },
385
  {
386
+ "epoch": 48.54,
387
+ "learning_rate": 6.391566265060241e-05,
388
+ "loss": 1.0064,
389
  "step": 5000
390
  },
391
  {
392
+ "epoch": 48.54,
393
+ "eval_loss": 0.24684669077396393,
394
+ "eval_runtime": 133.5,
395
+ "eval_samples_per_second": 20.539,
396
+ "eval_steps_per_second": 2.569,
397
+ "eval_wer": 0.4051195933372668,
398
  "step": 5000
399
  },
400
  {
401
+ "epoch": 49.51,
402
+ "learning_rate": 6.271084337349398e-05,
403
+ "loss": 0.9791,
404
  "step": 5100
405
  },
406
  {
407
+ "epoch": 50.49,
408
+ "learning_rate": 6.150602409638555e-05,
409
+ "loss": 0.9722,
410
  "step": 5200
411
  },
412
  {
413
+ "epoch": 51.46,
414
+ "learning_rate": 6.030120481927711e-05,
415
+ "loss": 0.9815,
416
  "step": 5300
417
  },
418
  {
419
+ "epoch": 52.43,
420
+ "learning_rate": 5.909638554216868e-05,
421
+ "loss": 0.9633,
422
  "step": 5400
423
  },
424
  {
425
+ "epoch": 53.4,
426
+ "learning_rate": 5.789156626506025e-05,
427
+ "loss": 0.9659,
428
  "step": 5500
429
  },
430
  {
431
+ "epoch": 53.4,
432
+ "eval_loss": 0.2394031286239624,
433
+ "eval_runtime": 133.1725,
434
+ "eval_samples_per_second": 20.59,
435
+ "eval_steps_per_second": 2.576,
436
+ "eval_wer": 0.38596650478827216,
437
  "step": 5500
438
  },
439
  {
440
+ "epoch": 54.37,
441
+ "learning_rate": 5.668674698795181e-05,
442
+ "loss": 0.9544,
443
  "step": 5600
444
  },
445
  {
446
+ "epoch": 55.34,
447
+ "learning_rate": 5.5481927710843374e-05,
448
+ "loss": 0.9581,
449
  "step": 5700
450
  },
451
  {
452
+ "epoch": 56.31,
453
+ "learning_rate": 5.427710843373495e-05,
454
+ "loss": 0.9437,
455
  "step": 5800
456
  },
457
  {
458
+ "epoch": 57.28,
459
+ "learning_rate": 5.307228915662651e-05,
460
+ "loss": 0.9378,
461
  "step": 5900
462
  },
463
  {
464
+ "epoch": 58.25,
465
+ "learning_rate": 5.186746987951807e-05,
466
+ "loss": 0.9254,
467
  "step": 6000
468
  },
469
  {
470
+ "epoch": 58.25,
471
+ "eval_loss": 0.2373155653476715,
472
+ "eval_runtime": 133.3175,
473
+ "eval_samples_per_second": 20.567,
474
+ "eval_steps_per_second": 2.573,
475
+ "eval_wer": 0.3688558071982935,
476
  "step": 6000
477
  },
478
  {
479
+ "epoch": 59.22,
480
+ "learning_rate": 5.0662650602409644e-05,
481
+ "loss": 0.9321,
482
  "step": 6100
483
  },
484
  {
485
+ "epoch": 60.19,
486
+ "learning_rate": 4.9457831325301205e-05,
487
+ "loss": 0.9122,
488
  "step": 6200
489
  },
490
  {
491
+ "epoch": 61.17,
492
+ "learning_rate": 4.825301204819277e-05,
493
+ "loss": 0.9148,
494
  "step": 6300
495
  },
496
  {
497
+ "epoch": 62.14,
498
+ "learning_rate": 4.704819277108434e-05,
499
+ "loss": 0.9177,
500
  "step": 6400
501
  },
502
  {
503
+ "epoch": 63.11,
504
+ "learning_rate": 4.584337349397591e-05,
505
+ "loss": 0.9209,
506
  "step": 6500
507
  },
508
  {
509
+ "epoch": 63.11,
510
+ "eval_loss": 0.23466718196868896,
511
+ "eval_runtime": 134.3014,
512
+ "eval_samples_per_second": 20.417,
513
+ "eval_steps_per_second": 2.554,
514
+ "eval_wer": 0.367040348568057,
515
  "step": 6500
516
  },
517
  {
518
+ "epoch": 64.08,
519
+ "learning_rate": 4.4638554216867476e-05,
520
+ "loss": 0.8981,
521
  "step": 6600
522
  },
523
  {
524
+ "epoch": 65.05,
525
+ "learning_rate": 4.344578313253012e-05,
526
+ "loss": 0.8927,
527
  "step": 6700
528
  },
529
  {
530
+ "epoch": 66.02,
531
+ "learning_rate": 4.224096385542169e-05,
532
+ "loss": 0.8986,
533
  "step": 6800
534
  },
535
  {
536
+ "epoch": 66.99,
537
+ "learning_rate": 4.1036144578313255e-05,
538
+ "loss": 0.8867,
539
  "step": 6900
540
  },
541
  {
542
+ "epoch": 67.96,
543
+ "learning_rate": 3.983132530120482e-05,
544
+ "loss": 0.889,
545
  "step": 7000
546
  },
547
  {
548
+ "epoch": 67.96,
549
+ "eval_loss": 0.22911565005779266,
550
+ "eval_runtime": 133.5899,
551
+ "eval_samples_per_second": 20.526,
552
+ "eval_steps_per_second": 2.568,
553
+ "eval_wer": 0.36871964780102573,
554
  "step": 7000
555
  },
556
  {
557
+ "epoch": 68.93,
558
+ "learning_rate": 3.862650602409639e-05,
559
+ "loss": 0.885,
560
  "step": 7100
561
  },
562
  {
563
+ "epoch": 69.9,
564
+ "learning_rate": 3.742168674698796e-05,
565
+ "loss": 0.8772,
566
  "step": 7200
567
  },
568
  {
569
+ "epoch": 70.87,
570
+ "learning_rate": 3.62289156626506e-05,
571
+ "loss": 0.8798,
572
  "step": 7300
573
  },
574
  {
575
+ "epoch": 71.84,
576
+ "learning_rate": 3.502409638554217e-05,
577
+ "loss": 0.8808,
578
  "step": 7400
579
  },
580
  {
581
+ "epoch": 72.82,
582
+ "learning_rate": 3.3819277108433736e-05,
583
+ "loss": 0.8859,
584
  "step": 7500
585
  },
586
  {
587
+ "epoch": 72.82,
588
+ "eval_loss": 0.22717151045799255,
589
+ "eval_runtime": 134.7148,
590
+ "eval_samples_per_second": 20.354,
591
+ "eval_steps_per_second": 2.546,
592
+ "eval_wer": 0.3615939726773476,
593
  "step": 7500
594
  },
595
  {
596
+ "epoch": 73.79,
597
+ "learning_rate": 3.2614457831325304e-05,
598
+ "loss": 0.8713,
599
  "step": 7600
600
  },
601
  {
602
+ "epoch": 74.76,
603
+ "learning_rate": 3.140963855421687e-05,
604
+ "loss": 0.8734,
605
  "step": 7700
606
  },
607
  {
608
+ "epoch": 75.73,
609
+ "learning_rate": 3.0204819277108436e-05,
610
+ "loss": 0.8565,
611
  "step": 7800
612
  },
613
  {
614
+ "epoch": 76.7,
615
+ "learning_rate": 2.9e-05,
616
+ "loss": 0.8492,
617
  "step": 7900
618
  },
619
  {
620
+ "epoch": 77.67,
621
+ "learning_rate": 2.7795180722891568e-05,
622
+ "loss": 0.8441,
623
  "step": 8000
624
  },
625
  {
626
+ "epoch": 77.67,
627
+ "eval_loss": 0.22322185337543488,
628
+ "eval_runtime": 134.4634,
629
+ "eval_samples_per_second": 20.392,
630
+ "eval_steps_per_second": 2.551,
631
+ "eval_wer": 0.35383288703308674,
632
  "step": 8000
633
  },
634
  {
635
+ "epoch": 78.64,
636
+ "learning_rate": 2.6590361445783136e-05,
637
+ "loss": 0.8516,
638
  "step": 8100
639
  },
640
  {
641
+ "epoch": 79.61,
642
+ "learning_rate": 2.5385542168674696e-05,
643
+ "loss": 0.8451,
644
  "step": 8200
645
  },
646
  {
647
+ "epoch": 80.58,
648
+ "learning_rate": 2.4180722891566264e-05,
649
+ "loss": 0.8346,
650
  "step": 8300
651
  },
652
  {
653
+ "epoch": 81.55,
654
+ "learning_rate": 2.2975903614457832e-05,
655
+ "loss": 0.8378,
656
  "step": 8400
657
  },
658
  {
659
+ "epoch": 82.52,
660
+ "learning_rate": 2.17710843373494e-05,
661
+ "loss": 0.8284,
662
  "step": 8500
663
  },
664
  {
665
+ "epoch": 82.52,
666
+ "eval_loss": 0.22235004603862762,
667
+ "eval_runtime": 133.9778,
668
+ "eval_samples_per_second": 20.466,
669
+ "eval_steps_per_second": 2.56,
670
+ "eval_wer": 0.33817455634729726,
671
  "step": 8500
672
  },
673
  {
674
+ "epoch": 83.5,
675
+ "learning_rate": 2.0566265060240967e-05,
676
+ "loss": 0.8269,
677
  "step": 8600
678
  },
679
  {
680
+ "epoch": 84.47,
681
+ "learning_rate": 1.936144578313253e-05,
682
+ "loss": 0.8186,
683
  "step": 8700
684
  },
685
  {
686
+ "epoch": 85.44,
687
+ "learning_rate": 1.8156626506024096e-05,
688
+ "loss": 0.8243,
689
  "step": 8800
690
  },
691
  {
692
+ "epoch": 86.41,
693
+ "learning_rate": 1.6951807228915663e-05,
694
+ "loss": 0.8279,
695
  "step": 8900
696
  },
697
  {
698
+ "epoch": 87.38,
699
+ "learning_rate": 1.574698795180723e-05,
700
+ "loss": 0.8142,
701
  "step": 9000
702
  },
703
  {
704
+ "epoch": 87.38,
705
+ "eval_loss": 0.2192818820476532,
706
+ "eval_runtime": 132.2621,
707
+ "eval_samples_per_second": 20.732,
708
+ "eval_steps_per_second": 2.593,
709
+ "eval_wer": 0.33104888122361914,
710
  "step": 9000
711
  },
712
  {
713
+ "epoch": 88.35,
714
+ "learning_rate": 1.4542168674698795e-05,
715
+ "loss": 0.8071,
716
  "step": 9100
717
  },
718
  {
719
+ "epoch": 89.32,
720
+ "learning_rate": 1.3337349397590363e-05,
721
+ "loss": 0.8075,
722
  "step": 9200
723
  },
724
  {
725
+ "epoch": 90.29,
726
+ "learning_rate": 1.2132530120481929e-05,
727
+ "loss": 0.8042,
728
  "step": 9300
729
  },
730
  {
731
+ "epoch": 91.26,
732
+ "learning_rate": 1.0927710843373493e-05,
733
+ "loss": 0.7916,
734
  "step": 9400
735
  },
736
  {
737
+ "epoch": 92.23,
738
+ "learning_rate": 9.722891566265061e-06,
739
+ "loss": 0.8012,
740
  "step": 9500
741
  },
742
  {
743
+ "epoch": 92.23,
744
+ "eval_loss": 0.21682003140449524,
745
+ "eval_runtime": 133.9404,
746
+ "eval_samples_per_second": 20.472,
747
+ "eval_steps_per_second": 2.561,
748
+ "eval_wer": 0.3276448962919257,
749
  "step": 9500
750
  },
751
  {
752
+ "epoch": 93.2,
753
+ "learning_rate": 8.518072289156627e-06,
754
+ "loss": 0.8055,
755
  "step": 9600
756
  },
757
  {
758
+ "epoch": 94.17,
759
+ "learning_rate": 7.313253012048194e-06,
760
+ "loss": 0.7955,
761
  "step": 9700
762
  },
763
  {
764
+ "epoch": 95.15,
765
+ "learning_rate": 6.108433734939759e-06,
766
+ "loss": 0.7961,
767
  "step": 9800
768
  },
769
  {
770
+ "epoch": 96.12,
771
+ "learning_rate": 4.903614457831326e-06,
772
+ "loss": 0.7843,
773
  "step": 9900
774
  },
775
  {
776
+ "epoch": 97.09,
777
+ "learning_rate": 3.6987951807228917e-06,
778
+ "loss": 0.7781,
779
  "step": 10000
780
  },
781
  {
782
+ "epoch": 97.09,
783
+ "eval_loss": 0.21628263592720032,
784
+ "eval_runtime": 133.5255,
785
+ "eval_samples_per_second": 20.535,
786
+ "eval_steps_per_second": 2.569,
787
+ "eval_wer": 0.3240593654972087,
788
  "step": 10000
789
  },
790
  {
791
+ "epoch": 98.06,
792
+ "learning_rate": 2.493975903614458e-06,
793
+ "loss": 0.7842,
794
  "step": 10100
795
  },
796
  {
797
+ "epoch": 99.03,
798
+ "learning_rate": 1.2891566265060241e-06,
799
+ "loss": 0.7821,
800
  "step": 10200
801
  },
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
802
  {
803
  "epoch": 100.0,
804
+ "learning_rate": 9.638554216867469e-08,
805
+ "loss": 0.7779,
806
+ "step": 10300
807
  },
808
  {
809
  "epoch": 100.0,
810
+ "step": 10300,
811
+ "total_flos": 5.823193156406256e+19,
812
+ "train_loss": 1.3660302423273476,
813
+ "train_runtime": 26867.6077,
814
+ "train_samples_per_second": 12.253,
815
+ "train_steps_per_second": 0.383
816
  }
817
  ],
818
+ "max_steps": 10300,
819
  "num_train_epochs": 100,
820
+ "total_flos": 5.823193156406256e+19,
821
  "trial_name": null,
822
  "trial_params": null
823
  }