marinone94 commited on
Commit
a0cfb9d
β€’
1 Parent(s): ff83dcc

Training in progress, step 60

Browse files
checkpoint-3000/trainer_state.json DELETED
@@ -1,1186 +0,0 @@
1
- {
2
- "best_metric": null,
3
- "best_model_checkpoint": null,
4
- "epoch": 1.3810838798757334,
5
- "global_step": 3000,
6
- "is_hyper_param_search": false,
7
- "is_local_process_zero": true,
8
- "is_world_process_zero": true,
9
- "log_history": [
10
- {
11
- "epoch": 0.01,
12
- "learning_rate": 1.0305343511450381e-05,
13
- "loss": 13.0539,
14
- "step": 20
15
- },
16
- {
17
- "epoch": 0.02,
18
- "learning_rate": 2.118320610687023e-05,
19
- "loss": 20.9649,
20
- "step": 40
21
- },
22
- {
23
- "epoch": 0.03,
24
- "learning_rate": 3.206106870229007e-05,
25
- "loss": 21.1001,
26
- "step": 60
27
- },
28
- {
29
- "epoch": 0.04,
30
- "learning_rate": 4.3511450381679383e-05,
31
- "loss": 5.5852,
32
- "step": 80
33
- },
34
- {
35
- "epoch": 0.05,
36
- "learning_rate": 5.438931297709923e-05,
37
- "loss": 5.2002,
38
- "step": 100
39
- },
40
- {
41
- "epoch": 0.05,
42
- "eval_loss": 3.5950469970703125,
43
- "eval_runtime": 194.8685,
44
- "eval_samples_per_second": 24.853,
45
- "eval_steps_per_second": 0.78,
46
- "eval_wer": 1.0,
47
- "step": 100
48
- },
49
- {
50
- "epoch": 0.06,
51
- "learning_rate": 6.583969465648854e-05,
52
- "loss": 3.281,
53
- "step": 120
54
- },
55
- {
56
- "epoch": 0.06,
57
- "learning_rate": 7.495301487862176e-05,
58
- "loss": 3.3442,
59
- "step": 140
60
- },
61
- {
62
- "epoch": 0.07,
63
- "learning_rate": 7.472983555207517e-05,
64
- "loss": 3.5229,
65
- "step": 160
66
- },
67
- {
68
- "epoch": 0.08,
69
- "learning_rate": 7.449490994518402e-05,
70
- "loss": 3.089,
71
- "step": 180
72
- },
73
- {
74
- "epoch": 0.09,
75
- "learning_rate": 7.427173061863743e-05,
76
- "loss": 3.482,
77
- "step": 200
78
- },
79
- {
80
- "epoch": 0.09,
81
- "eval_loss": 3.274627923965454,
82
- "eval_runtime": 194.5649,
83
- "eval_samples_per_second": 24.891,
84
- "eval_steps_per_second": 0.781,
85
- "eval_wer": 1.0,
86
- "step": 200
87
- },
88
- {
89
- "epoch": 0.1,
90
- "learning_rate": 7.403680501174628e-05,
91
- "loss": 3.1517,
92
- "step": 220
93
- },
94
- {
95
- "epoch": 0.11,
96
- "learning_rate": 7.380187940485512e-05,
97
- "loss": 3.1617,
98
- "step": 240
99
- },
100
- {
101
- "epoch": 0.12,
102
- "learning_rate": 7.357870007830852e-05,
103
- "loss": 3.2971,
104
- "step": 260
105
- },
106
- {
107
- "epoch": 0.13,
108
- "learning_rate": 7.334377447141738e-05,
109
- "loss": 3.022,
110
- "step": 280
111
- },
112
- {
113
- "epoch": 0.14,
114
- "learning_rate": 7.312059514487078e-05,
115
- "loss": 3.3322,
116
- "step": 300
117
- },
118
- {
119
- "epoch": 0.14,
120
- "eval_loss": 3.0716073513031006,
121
- "eval_runtime": 196.1674,
122
- "eval_samples_per_second": 24.688,
123
- "eval_steps_per_second": 0.775,
124
- "eval_wer": 1.0,
125
- "step": 300
126
- },
127
- {
128
- "epoch": 0.15,
129
- "learning_rate": 7.288566953797964e-05,
130
- "loss": 2.9964,
131
- "step": 320
132
- },
133
- {
134
- "epoch": 0.16,
135
- "learning_rate": 7.265074393108849e-05,
136
- "loss": 3.0777,
137
- "step": 340
138
- },
139
- {
140
- "epoch": 0.17,
141
- "learning_rate": 7.242756460454189e-05,
142
- "loss": 3.2703,
143
- "step": 360
144
- },
145
- {
146
- "epoch": 0.17,
147
- "learning_rate": 7.219263899765073e-05,
148
- "loss": 3.1161,
149
- "step": 380
150
- },
151
- {
152
- "epoch": 0.18,
153
- "learning_rate": 7.195771339075958e-05,
154
- "loss": 3.2435,
155
- "step": 400
156
- },
157
- {
158
- "epoch": 0.18,
159
- "eval_loss": 3.063333034515381,
160
- "eval_runtime": 190.5938,
161
- "eval_samples_per_second": 25.41,
162
- "eval_steps_per_second": 0.798,
163
- "eval_wer": 1.0,
164
- "step": 400
165
- },
166
- {
167
- "epoch": 0.19,
168
- "learning_rate": 7.172278778386843e-05,
169
- "loss": 2.9993,
170
- "step": 420
171
- },
172
- {
173
- "epoch": 0.2,
174
- "learning_rate": 7.148786217697727e-05,
175
- "loss": 3.0555,
176
- "step": 440
177
- },
178
- {
179
- "epoch": 0.21,
180
- "learning_rate": 7.126468285043069e-05,
181
- "loss": 3.1879,
182
- "step": 460
183
- },
184
- {
185
- "epoch": 0.22,
186
- "learning_rate": 7.102975724353953e-05,
187
- "loss": 2.9861,
188
- "step": 480
189
- },
190
- {
191
- "epoch": 0.23,
192
- "learning_rate": 7.080657791699295e-05,
193
- "loss": 3.0833,
194
- "step": 500
195
- },
196
- {
197
- "epoch": 0.23,
198
- "eval_loss": 2.9769718647003174,
199
- "eval_runtime": 195.2639,
200
- "eval_samples_per_second": 24.802,
201
- "eval_steps_per_second": 0.778,
202
- "eval_wer": 1.0,
203
- "step": 500
204
- },
205
- {
206
- "epoch": 0.24,
207
- "learning_rate": 7.05716523101018e-05,
208
- "loss": 3.1608,
209
- "step": 520
210
- },
211
- {
212
- "epoch": 0.25,
213
- "learning_rate": 7.033672670321064e-05,
214
- "loss": 3.0206,
215
- "step": 540
216
- },
217
- {
218
- "epoch": 0.26,
219
- "learning_rate": 7.01018010963195e-05,
220
- "loss": 3.0523,
221
- "step": 560
222
- },
223
- {
224
- "epoch": 0.27,
225
- "learning_rate": 6.986687548942835e-05,
226
- "loss": 3.0222,
227
- "step": 580
228
- },
229
- {
230
- "epoch": 0.28,
231
- "learning_rate": 6.964369616288175e-05,
232
- "loss": 3.0913,
233
- "step": 600
234
- },
235
- {
236
- "epoch": 0.28,
237
- "eval_loss": 3.052091598510742,
238
- "eval_runtime": 189.8149,
239
- "eval_samples_per_second": 25.514,
240
- "eval_steps_per_second": 0.801,
241
- "eval_wer": 1.0,
242
- "step": 600
243
- },
244
- {
245
- "epoch": 0.29,
246
- "learning_rate": 6.94087705559906e-05,
247
- "loss": 3.1987,
248
- "step": 620
249
- },
250
- {
251
- "epoch": 0.29,
252
- "learning_rate": 6.917384494909944e-05,
253
- "loss": 3.1504,
254
- "step": 640
255
- },
256
- {
257
- "epoch": 0.3,
258
- "learning_rate": 6.893891934220829e-05,
259
- "loss": 3.2406,
260
- "step": 660
261
- },
262
- {
263
- "epoch": 0.31,
264
- "learning_rate": 6.870399373531715e-05,
265
- "loss": 3.0632,
266
- "step": 680
267
- },
268
- {
269
- "epoch": 0.32,
270
- "learning_rate": 6.848081440877055e-05,
271
- "loss": 3.1487,
272
- "step": 700
273
- },
274
- {
275
- "epoch": 0.32,
276
- "eval_loss": 3.1951305866241455,
277
- "eval_runtime": 196.8636,
278
- "eval_samples_per_second": 24.601,
279
- "eval_steps_per_second": 0.772,
280
- "eval_wer": 1.0,
281
- "step": 700
282
- },
283
- {
284
- "epoch": 0.33,
285
- "learning_rate": 6.824588880187941e-05,
286
- "loss": 3.298,
287
- "step": 720
288
- },
289
- {
290
- "epoch": 0.34,
291
- "learning_rate": 6.801096319498825e-05,
292
- "loss": 3.1669,
293
- "step": 740
294
- },
295
- {
296
- "epoch": 0.35,
297
- "learning_rate": 6.778778386844165e-05,
298
- "loss": 3.2651,
299
- "step": 760
300
- },
301
- {
302
- "epoch": 0.36,
303
- "learning_rate": 6.75528582615505e-05,
304
- "loss": 3.1562,
305
- "step": 780
306
- },
307
- {
308
- "epoch": 0.37,
309
- "learning_rate": 6.732967893500391e-05,
310
- "loss": 3.3254,
311
- "step": 800
312
- },
313
- {
314
- "epoch": 0.37,
315
- "eval_loss": 3.0070509910583496,
316
- "eval_runtime": 192.0978,
317
- "eval_samples_per_second": 25.211,
318
- "eval_steps_per_second": 0.791,
319
- "eval_wer": 1.0,
320
- "step": 800
321
- },
322
- {
323
- "epoch": 0.38,
324
- "learning_rate": 6.709475332811276e-05,
325
- "loss": 3.0487,
326
- "step": 820
327
- },
328
- {
329
- "epoch": 0.39,
330
- "learning_rate": 6.68598277212216e-05,
331
- "loss": 3.1936,
332
- "step": 840
333
- },
334
- {
335
- "epoch": 0.4,
336
- "learning_rate": 6.663664839467502e-05,
337
- "loss": 3.323,
338
- "step": 860
339
- },
340
- {
341
- "epoch": 0.41,
342
- "learning_rate": 6.640172278778387e-05,
343
- "loss": 2.998,
344
- "step": 880
345
- },
346
- {
347
- "epoch": 0.41,
348
- "learning_rate": 6.617854346123726e-05,
349
- "loss": 3.4698,
350
- "step": 900
351
- },
352
- {
353
- "epoch": 0.41,
354
- "eval_loss": 2.995044469833374,
355
- "eval_runtime": 195.1464,
356
- "eval_samples_per_second": 24.817,
357
- "eval_steps_per_second": 0.779,
358
- "eval_wer": 1.0,
359
- "step": 900
360
- },
361
- {
362
- "epoch": 0.42,
363
- "learning_rate": 6.594361785434611e-05,
364
- "loss": 3.0048,
365
- "step": 920
366
- },
367
- {
368
- "epoch": 0.43,
369
- "learning_rate": 6.570869224745496e-05,
370
- "loss": 3.1447,
371
- "step": 940
372
- },
373
- {
374
- "epoch": 0.44,
375
- "learning_rate": 6.548551292090837e-05,
376
- "loss": 3.3347,
377
- "step": 960
378
- },
379
- {
380
- "epoch": 0.45,
381
- "learning_rate": 6.525058731401722e-05,
382
- "loss": 2.9795,
383
- "step": 980
384
- },
385
- {
386
- "epoch": 0.46,
387
- "learning_rate": 6.503915426781518e-05,
388
- "loss": 3.5905,
389
- "step": 1000
390
- },
391
- {
392
- "epoch": 0.46,
393
- "eval_loss": 2.9961767196655273,
394
- "eval_runtime": 188.3505,
395
- "eval_samples_per_second": 25.713,
396
- "eval_steps_per_second": 0.807,
397
- "eval_wer": 1.0,
398
- "step": 1000
399
- },
400
- {
401
- "epoch": 0.47,
402
- "learning_rate": 6.480422866092403e-05,
403
- "loss": 2.9884,
404
- "step": 1020
405
- },
406
- {
407
- "epoch": 0.48,
408
- "learning_rate": 6.456930305403288e-05,
409
- "loss": 3.175,
410
- "step": 1040
411
- },
412
- {
413
- "epoch": 0.49,
414
- "learning_rate": 6.434612372748629e-05,
415
- "loss": 3.3163,
416
- "step": 1060
417
- },
418
- {
419
- "epoch": 0.5,
420
- "learning_rate": 6.411119812059513e-05,
421
- "loss": 3.0063,
422
- "step": 1080
423
- },
424
- {
425
- "epoch": 0.51,
426
- "learning_rate": 6.387627251370398e-05,
427
- "loss": 3.4965,
428
- "step": 1100
429
- },
430
- {
431
- "epoch": 0.51,
432
- "eval_loss": 3.0483579635620117,
433
- "eval_runtime": 193.4167,
434
- "eval_samples_per_second": 25.039,
435
- "eval_steps_per_second": 0.786,
436
- "eval_wer": 1.0,
437
- "step": 1100
438
- },
439
- {
440
- "epoch": 0.52,
441
- "learning_rate": 6.364134690681284e-05,
442
- "loss": 2.9967,
443
- "step": 1120
444
- },
445
- {
446
- "epoch": 0.52,
447
- "learning_rate": 6.340642129992169e-05,
448
- "loss": 3.2329,
449
- "step": 1140
450
- },
451
- {
452
- "epoch": 0.53,
453
- "learning_rate": 6.31832419733751e-05,
454
- "loss": 3.3814,
455
- "step": 1160
456
- },
457
- {
458
- "epoch": 0.54,
459
- "learning_rate": 6.294831636648395e-05,
460
- "loss": 3.0108,
461
- "step": 1180
462
- },
463
- {
464
- "epoch": 0.55,
465
- "learning_rate": 6.272513703993735e-05,
466
- "loss": 3.6227,
467
- "step": 1200
468
- },
469
- {
470
- "epoch": 0.55,
471
- "eval_loss": 3.0750181674957275,
472
- "eval_runtime": 193.4538,
473
- "eval_samples_per_second": 25.034,
474
- "eval_steps_per_second": 0.786,
475
- "eval_wer": 1.0,
476
- "step": 1200
477
- },
478
- {
479
- "epoch": 0.56,
480
- "learning_rate": 6.24902114330462e-05,
481
- "loss": 2.9917,
482
- "step": 1220
483
- },
484
- {
485
- "epoch": 0.57,
486
- "learning_rate": 6.225528582615504e-05,
487
- "loss": 3.1386,
488
- "step": 1240
489
- },
490
- {
491
- "epoch": 0.58,
492
- "learning_rate": 6.203210649960845e-05,
493
- "loss": 3.3022,
494
- "step": 1260
495
- },
496
- {
497
- "epoch": 0.59,
498
- "learning_rate": 6.17971808927173e-05,
499
- "loss": 3.0139,
500
- "step": 1280
501
- },
502
- {
503
- "epoch": 0.6,
504
- "learning_rate": 6.157400156617071e-05,
505
- "loss": 3.2921,
506
- "step": 1300
507
- },
508
- {
509
- "epoch": 0.6,
510
- "eval_loss": 3.019115924835205,
511
- "eval_runtime": 194.2188,
512
- "eval_samples_per_second": 24.936,
513
- "eval_steps_per_second": 0.783,
514
- "eval_wer": 1.0,
515
- "step": 1300
516
- },
517
- {
518
- "epoch": 0.61,
519
- "learning_rate": 6.133907595927956e-05,
520
- "loss": 3.0432,
521
- "step": 1320
522
- },
523
- {
524
- "epoch": 0.62,
525
- "learning_rate": 6.11041503523884e-05,
526
- "loss": 3.0951,
527
- "step": 1340
528
- },
529
- {
530
- "epoch": 0.63,
531
- "learning_rate": 6.088097102584181e-05,
532
- "loss": 3.189,
533
- "step": 1360
534
- },
535
- {
536
- "epoch": 0.64,
537
- "learning_rate": 6.064604541895066e-05,
538
- "loss": 3.1296,
539
- "step": 1380
540
- },
541
- {
542
- "epoch": 0.64,
543
- "learning_rate": 6.0422866092404065e-05,
544
- "loss": 3.2907,
545
- "step": 1400
546
- },
547
- {
548
- "epoch": 0.64,
549
- "eval_loss": 3.06713604927063,
550
- "eval_runtime": 192.5311,
551
- "eval_samples_per_second": 25.154,
552
- "eval_steps_per_second": 0.789,
553
- "eval_wer": 1.0,
554
- "step": 1400
555
- },
556
- {
557
- "epoch": 0.65,
558
- "learning_rate": 6.018794048551292e-05,
559
- "loss": 3.1849,
560
- "step": 1420
561
- },
562
- {
563
- "epoch": 0.66,
564
- "learning_rate": 5.9953014878621765e-05,
565
- "loss": 3.1682,
566
- "step": 1440
567
- },
568
- {
569
- "epoch": 0.67,
570
- "learning_rate": 5.9729835552075165e-05,
571
- "loss": 3.2002,
572
- "step": 1460
573
- },
574
- {
575
- "epoch": 0.68,
576
- "learning_rate": 5.949490994518402e-05,
577
- "loss": 3.2207,
578
- "step": 1480
579
- },
580
- {
581
- "epoch": 0.69,
582
- "learning_rate": 5.9271730618637424e-05,
583
- "loss": 3.1716,
584
- "step": 1500
585
- },
586
- {
587
- "epoch": 0.69,
588
- "eval_loss": 3.093482732772827,
589
- "eval_runtime": 193.22,
590
- "eval_samples_per_second": 25.065,
591
- "eval_steps_per_second": 0.787,
592
- "eval_wer": 1.0,
593
- "step": 1500
594
- },
595
- {
596
- "epoch": 0.7,
597
- "learning_rate": 5.903680501174628e-05,
598
- "loss": 3.2105,
599
- "step": 1520
600
- },
601
- {
602
- "epoch": 0.71,
603
- "learning_rate": 5.8801879404855124e-05,
604
- "loss": 3.173,
605
- "step": 1540
606
- },
607
- {
608
- "epoch": 0.72,
609
- "learning_rate": 5.857870007830853e-05,
610
- "loss": 3.2066,
611
- "step": 1560
612
- },
613
- {
614
- "epoch": 0.73,
615
- "learning_rate": 5.8343774471417384e-05,
616
- "loss": 3.2196,
617
- "step": 1580
618
- },
619
- {
620
- "epoch": 0.74,
621
- "learning_rate": 5.812059514487078e-05,
622
- "loss": 3.1788,
623
- "step": 1600
624
- },
625
- {
626
- "epoch": 0.74,
627
- "eval_loss": 3.092198133468628,
628
- "eval_runtime": 194.0448,
629
- "eval_samples_per_second": 24.958,
630
- "eval_steps_per_second": 0.783,
631
- "eval_wer": 1.0,
632
- "step": 1600
633
- },
634
- {
635
- "epoch": 0.75,
636
- "learning_rate": 5.788566953797963e-05,
637
- "loss": 3.2075,
638
- "step": 1620
639
- },
640
- {
641
- "epoch": 0.75,
642
- "learning_rate": 5.765074393108848e-05,
643
- "loss": 3.1772,
644
- "step": 1640
645
- },
646
- {
647
- "epoch": 0.76,
648
- "learning_rate": 5.742756460454189e-05,
649
- "loss": 3.2101,
650
- "step": 1660
651
- },
652
- {
653
- "epoch": 0.77,
654
- "learning_rate": 5.7192638997650736e-05,
655
- "loss": 3.2115,
656
- "step": 1680
657
- },
658
- {
659
- "epoch": 0.78,
660
- "learning_rate": 5.69812059514487e-05,
661
- "loss": 3.2986,
662
- "step": 1700
663
- },
664
- {
665
- "epoch": 0.78,
666
- "eval_loss": 3.092197895050049,
667
- "eval_runtime": 194.3305,
668
- "eval_samples_per_second": 24.921,
669
- "eval_steps_per_second": 0.782,
670
- "eval_wer": 1.0,
671
- "step": 1700
672
- },
673
- {
674
- "epoch": 0.79,
675
- "learning_rate": 5.674628034455755e-05,
676
- "loss": 3.2063,
677
- "step": 1720
678
- },
679
- {
680
- "epoch": 0.8,
681
- "learning_rate": 5.65113547376664e-05,
682
- "loss": 3.1723,
683
- "step": 1740
684
- },
685
- {
686
- "epoch": 0.81,
687
- "learning_rate": 5.627642913077525e-05,
688
- "loss": 3.0945,
689
- "step": 1760
690
- },
691
- {
692
- "epoch": 0.82,
693
- "learning_rate": 5.6041503523884095e-05,
694
- "loss": 3.2153,
695
- "step": 1780
696
- },
697
- {
698
- "epoch": 0.83,
699
- "learning_rate": 5.580657791699295e-05,
700
- "loss": 3.0761,
701
- "step": 1800
702
- },
703
- {
704
- "epoch": 0.83,
705
- "eval_loss": 3.092197895050049,
706
- "eval_runtime": 192.7837,
707
- "eval_samples_per_second": 25.121,
708
- "eval_steps_per_second": 0.788,
709
- "eval_wer": 1.0,
710
- "step": 1800
711
- },
712
- {
713
- "epoch": 0.84,
714
- "learning_rate": 5.5571652310101795e-05,
715
- "loss": 3.2061,
716
- "step": 1820
717
- },
718
- {
719
- "epoch": 0.85,
720
- "learning_rate": 5.533672670321064e-05,
721
- "loss": 3.1768,
722
- "step": 1840
723
- },
724
- {
725
- "epoch": 0.86,
726
- "learning_rate": 5.5101801096319495e-05,
727
- "loss": 3.096,
728
- "step": 1860
729
- },
730
- {
731
- "epoch": 0.87,
732
- "learning_rate": 5.486687548942834e-05,
733
- "loss": 3.213,
734
- "step": 1880
735
- },
736
- {
737
- "epoch": 0.87,
738
- "learning_rate": 5.464369616288175e-05,
739
- "loss": 3.1832,
740
- "step": 1900
741
- },
742
- {
743
- "epoch": 0.87,
744
- "eval_loss": 3.092197895050049,
745
- "eval_runtime": 196.1769,
746
- "eval_samples_per_second": 24.687,
747
- "eval_steps_per_second": 0.775,
748
- "eval_wer": 1.0,
749
- "step": 1900
750
- },
751
- {
752
- "epoch": 0.88,
753
- "learning_rate": 5.44087705559906e-05,
754
- "loss": 3.2046,
755
- "step": 1920
756
- },
757
- {
758
- "epoch": 0.89,
759
- "learning_rate": 5.417384494909945e-05,
760
- "loss": 3.1743,
761
- "step": 1940
762
- },
763
- {
764
- "epoch": 0.9,
765
- "learning_rate": 5.395066562255285e-05,
766
- "loss": 3.2055,
767
- "step": 1960
768
- },
769
- {
770
- "epoch": 0.91,
771
- "learning_rate": 5.37157400156617e-05,
772
- "loss": 3.2152,
773
- "step": 1980
774
- },
775
- {
776
- "epoch": 0.92,
777
- "learning_rate": 5.349256068911511e-05,
778
- "loss": 3.1845,
779
- "step": 2000
780
- },
781
- {
782
- "epoch": 0.92,
783
- "eval_loss": 3.092197895050049,
784
- "eval_runtime": 195.9315,
785
- "eval_samples_per_second": 24.718,
786
- "eval_steps_per_second": 0.776,
787
- "eval_wer": 1.0,
788
- "step": 2000
789
- },
790
- {
791
- "epoch": 0.93,
792
- "learning_rate": 5.3257635082223954e-05,
793
- "loss": 3.2093,
794
- "step": 2020
795
- },
796
- {
797
- "epoch": 0.94,
798
- "learning_rate": 5.302270947533281e-05,
799
- "loss": 3.1748,
800
- "step": 2040
801
- },
802
- {
803
- "epoch": 0.95,
804
- "learning_rate": 5.279953014878621e-05,
805
- "loss": 3.199,
806
- "step": 2060
807
- },
808
- {
809
- "epoch": 0.96,
810
- "learning_rate": 5.2564604541895067e-05,
811
- "loss": 3.2169,
812
- "step": 2080
813
- },
814
- {
815
- "epoch": 0.97,
816
- "learning_rate": 5.2341425215348466e-05,
817
- "loss": 3.1817,
818
- "step": 2100
819
- },
820
- {
821
- "epoch": 0.97,
822
- "eval_loss": 3.092197895050049,
823
- "eval_runtime": 193.8566,
824
- "eval_samples_per_second": 24.982,
825
- "eval_steps_per_second": 0.784,
826
- "eval_wer": 1.0,
827
- "step": 2100
828
- },
829
- {
830
- "epoch": 0.98,
831
- "learning_rate": 5.210649960845731e-05,
832
- "loss": 3.2075,
833
- "step": 2120
834
- },
835
- {
836
- "epoch": 0.98,
837
- "learning_rate": 5.1871574001566166e-05,
838
- "loss": 3.1693,
839
- "step": 2140
840
- },
841
- {
842
- "epoch": 0.99,
843
- "learning_rate": 5.164839467501957e-05,
844
- "loss": 3.2049,
845
- "step": 2160
846
- },
847
- {
848
- "epoch": 1.0,
849
- "learning_rate": 5.1425215348472984e-05,
850
- "loss": 3.3657,
851
- "step": 2180
852
- },
853
- {
854
- "epoch": 1.01,
855
- "learning_rate": 5.119028974158183e-05,
856
- "loss": 3.2209,
857
- "step": 2200
858
- },
859
- {
860
- "epoch": 1.01,
861
- "eval_loss": 3.092197895050049,
862
- "eval_runtime": 194.751,
863
- "eval_samples_per_second": 24.868,
864
- "eval_steps_per_second": 0.78,
865
- "eval_wer": 1.0,
866
- "step": 2200
867
- },
868
- {
869
- "epoch": 1.02,
870
- "learning_rate": 5.095536413469068e-05,
871
- "loss": 3.0961,
872
- "step": 2220
873
- },
874
- {
875
- "epoch": 1.03,
876
- "learning_rate": 5.073218480814408e-05,
877
- "loss": 3.2735,
878
- "step": 2240
879
- },
880
- {
881
- "epoch": 1.04,
882
- "learning_rate": 5.049725920125293e-05,
883
- "loss": 3.1904,
884
- "step": 2260
885
- },
886
- {
887
- "epoch": 1.05,
888
- "learning_rate": 5.0274079874706336e-05,
889
- "loss": 3.1892,
890
- "step": 2280
891
- },
892
- {
893
- "epoch": 1.06,
894
- "learning_rate": 5.003915426781519e-05,
895
- "loss": 3.2138,
896
- "step": 2300
897
- },
898
- {
899
- "epoch": 1.06,
900
- "eval_loss": 3.092197895050049,
901
- "eval_runtime": 195.285,
902
- "eval_samples_per_second": 24.8,
903
- "eval_steps_per_second": 0.778,
904
- "eval_wer": 1.0,
905
- "step": 2300
906
- },
907
- {
908
- "epoch": 1.07,
909
- "learning_rate": 4.980422866092404e-05,
910
- "loss": 3.0819,
911
- "step": 2320
912
- },
913
- {
914
- "epoch": 1.08,
915
- "learning_rate": 4.958104933437744e-05,
916
- "loss": 3.272,
917
- "step": 2340
918
- },
919
- {
920
- "epoch": 1.09,
921
- "learning_rate": 4.9346123727486296e-05,
922
- "loss": 3.2011,
923
- "step": 2360
924
- },
925
- {
926
- "epoch": 1.1,
927
- "learning_rate": 4.9122944400939695e-05,
928
- "loss": 3.1918,
929
- "step": 2380
930
- },
931
- {
932
- "epoch": 1.1,
933
- "learning_rate": 4.888801879404854e-05,
934
- "loss": 3.2154,
935
- "step": 2400
936
- },
937
- {
938
- "epoch": 1.1,
939
- "eval_loss": 3.092197895050049,
940
- "eval_runtime": 196.4218,
941
- "eval_samples_per_second": 24.656,
942
- "eval_steps_per_second": 0.774,
943
- "eval_wer": 1.0,
944
- "step": 2400
945
- },
946
- {
947
- "epoch": 1.11,
948
- "learning_rate": 4.8653093187157396e-05,
949
- "loss": 3.0946,
950
- "step": 2420
951
- },
952
- {
953
- "epoch": 1.12,
954
- "learning_rate": 4.84299138606108e-05,
955
- "loss": 3.2753,
956
- "step": 2440
957
- },
958
- {
959
- "epoch": 1.13,
960
- "learning_rate": 4.8194988253719655e-05,
961
- "loss": 3.1829,
962
- "step": 2460
963
- },
964
- {
965
- "epoch": 1.14,
966
- "learning_rate": 4.797180892717306e-05,
967
- "loss": 3.1839,
968
- "step": 2480
969
- },
970
- {
971
- "epoch": 1.15,
972
- "learning_rate": 4.773688332028191e-05,
973
- "loss": 3.2185,
974
- "step": 2500
975
- },
976
- {
977
- "epoch": 1.15,
978
- "eval_loss": 3.092197895050049,
979
- "eval_runtime": 192.8929,
980
- "eval_samples_per_second": 25.107,
981
- "eval_steps_per_second": 0.788,
982
- "eval_wer": 1.0,
983
- "step": 2500
984
- },
985
- {
986
- "epoch": 1.16,
987
- "learning_rate": 4.750195771339076e-05,
988
- "loss": 3.0971,
989
- "step": 2520
990
- },
991
- {
992
- "epoch": 1.17,
993
- "learning_rate": 4.727877838684416e-05,
994
- "loss": 3.2719,
995
- "step": 2540
996
- },
997
- {
998
- "epoch": 1.18,
999
- "learning_rate": 4.704385277995301e-05,
1000
- "loss": 3.1974,
1001
- "step": 2560
1002
- },
1003
- {
1004
- "epoch": 1.19,
1005
- "learning_rate": 4.680892717306186e-05,
1006
- "loss": 3.0775,
1007
- "step": 2580
1008
- },
1009
- {
1010
- "epoch": 1.2,
1011
- "learning_rate": 4.657400156617071e-05,
1012
- "loss": 3.2201,
1013
- "step": 2600
1014
- },
1015
- {
1016
- "epoch": 1.2,
1017
- "eval_loss": 3.092197895050049,
1018
- "eval_runtime": 192.9488,
1019
- "eval_samples_per_second": 25.1,
1020
- "eval_steps_per_second": 0.788,
1021
- "eval_wer": 1.0,
1022
- "step": 2600
1023
- },
1024
- {
1025
- "epoch": 1.21,
1026
- "learning_rate": 4.6339075959279554e-05,
1027
- "loss": 3.0946,
1028
- "step": 2620
1029
- },
1030
- {
1031
- "epoch": 1.22,
1032
- "learning_rate": 4.611589663273297e-05,
1033
- "loss": 3.2708,
1034
- "step": 2640
1035
- },
1036
- {
1037
- "epoch": 1.22,
1038
- "learning_rate": 4.5880971025841814e-05,
1039
- "loss": 3.1949,
1040
- "step": 2660
1041
- },
1042
- {
1043
- "epoch": 1.23,
1044
- "learning_rate": 4.564604541895066e-05,
1045
- "loss": 3.0763,
1046
- "step": 2680
1047
- },
1048
- {
1049
- "epoch": 1.24,
1050
- "learning_rate": 4.5411119812059514e-05,
1051
- "loss": 3.2184,
1052
- "step": 2700
1053
- },
1054
- {
1055
- "epoch": 1.24,
1056
- "eval_loss": 3.092197895050049,
1057
- "eval_runtime": 189.7646,
1058
- "eval_samples_per_second": 25.521,
1059
- "eval_steps_per_second": 0.801,
1060
- "eval_wer": 1.0,
1061
- "step": 2700
1062
- },
1063
- {
1064
- "epoch": 1.25,
1065
- "learning_rate": 4.517619420516836e-05,
1066
- "loss": 3.0962,
1067
- "step": 2720
1068
- },
1069
- {
1070
- "epoch": 1.26,
1071
- "learning_rate": 4.495301487862176e-05,
1072
- "loss": 3.2795,
1073
- "step": 2740
1074
- },
1075
- {
1076
- "epoch": 1.27,
1077
- "learning_rate": 4.4718089271730614e-05,
1078
- "loss": 3.1901,
1079
- "step": 2760
1080
- },
1081
- {
1082
- "epoch": 1.28,
1083
- "learning_rate": 4.449490994518402e-05,
1084
- "loss": 3.1902,
1085
- "step": 2780
1086
- },
1087
- {
1088
- "epoch": 1.29,
1089
- "learning_rate": 4.425998433829287e-05,
1090
- "loss": 3.2196,
1091
- "step": 2800
1092
- },
1093
- {
1094
- "epoch": 1.29,
1095
- "eval_loss": 3.092197895050049,
1096
- "eval_runtime": 192.1661,
1097
- "eval_samples_per_second": 25.202,
1098
- "eval_steps_per_second": 0.791,
1099
- "eval_wer": 1.0,
1100
- "step": 2800
1101
- },
1102
- {
1103
- "epoch": 1.3,
1104
- "learning_rate": 4.402505873140172e-05,
1105
- "loss": 3.0923,
1106
- "step": 2820
1107
- },
1108
- {
1109
- "epoch": 1.31,
1110
- "learning_rate": 4.3801879404855126e-05,
1111
- "loss": 3.2783,
1112
- "step": 2840
1113
- },
1114
- {
1115
- "epoch": 1.32,
1116
- "learning_rate": 4.356695379796398e-05,
1117
- "loss": 3.195,
1118
- "step": 2860
1119
- },
1120
- {
1121
- "epoch": 1.33,
1122
- "learning_rate": 4.335552075176194e-05,
1123
- "loss": 3.3041,
1124
- "step": 2880
1125
- },
1126
- {
1127
- "epoch": 1.34,
1128
- "learning_rate": 4.3120595144870784e-05,
1129
- "loss": 3.2158,
1130
- "step": 2900
1131
- },
1132
- {
1133
- "epoch": 1.34,
1134
- "eval_loss": 3.092197895050049,
1135
- "eval_runtime": 194.7934,
1136
- "eval_samples_per_second": 24.862,
1137
- "eval_steps_per_second": 0.78,
1138
- "eval_wer": 1.0,
1139
- "step": 2900
1140
- },
1141
- {
1142
- "epoch": 1.34,
1143
- "learning_rate": 4.288566953797964e-05,
1144
- "loss": 3.0976,
1145
- "step": 2920
1146
- },
1147
- {
1148
- "epoch": 1.35,
1149
- "learning_rate": 4.2662490211433043e-05,
1150
- "loss": 3.2737,
1151
- "step": 2940
1152
- },
1153
- {
1154
- "epoch": 1.36,
1155
- "learning_rate": 4.24275646045419e-05,
1156
- "loss": 3.1927,
1157
- "step": 2960
1158
- },
1159
- {
1160
- "epoch": 1.37,
1161
- "learning_rate": 4.2204385277995296e-05,
1162
- "loss": 3.1896,
1163
- "step": 2980
1164
- },
1165
- {
1166
- "epoch": 1.38,
1167
- "learning_rate": 4.196945967110414e-05,
1168
- "loss": 3.2169,
1169
- "step": 3000
1170
- },
1171
- {
1172
- "epoch": 1.38,
1173
- "eval_loss": 3.092197895050049,
1174
- "eval_runtime": 196.5983,
1175
- "eval_samples_per_second": 24.634,
1176
- "eval_steps_per_second": 0.773,
1177
- "eval_wer": 1.0,
1178
- "step": 3000
1179
- }
1180
- ],
1181
- "max_steps": 6516,
1182
- "num_train_epochs": 3,
1183
- "total_flos": 5.730765756456842e+19,
1184
- "trial_name": null,
1185
- "trial_params": null
1186
- }
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
{checkpoint-3000 β†’ checkpoint-60}/config.json RENAMED
File without changes
{checkpoint-3000 β†’ checkpoint-60}/optimizer.pt RENAMED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:451c860a843d9f24cd86a79754e01529291d9c93ed7ae857fc15b2165971ef2e
3
- size 2490337809
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:d06df765e9a7d7ea6615404de49841893d9b9ce55d42f0bfd5486fd7105d83c4
3
+ size 2490337361
{checkpoint-3000 β†’ checkpoint-60}/preprocessor_config.json RENAMED
File without changes
{checkpoint-3000 β†’ checkpoint-60}/pytorch_model.bin RENAMED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:0052f401fc490352c790ad1c044b6d3276da131bfbf9ec8d804b048cb318fc44
3
  size 1262063089
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:ed09bae03da69e6e7d54c45ff801a7cd8e65f5bb600585b021024798e5fafa4a
3
  size 1262063089
{checkpoint-3000 β†’ checkpoint-60}/rng_state.pth RENAMED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:3b6404ed02f448394a66c9e17203ff3a69646d1570b4570e36ea6110a5d5e330
3
  size 14567
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:0d5db96c9803a1c8f4051b38118bfa8389597dfc7915de251da9a5e1c69d3bc9
3
  size 14567
{checkpoint-3000 β†’ checkpoint-60}/scaler.pt RENAMED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:a7c4960c2340a2bf713dd0ea0bbca386f6a864f908028378e9ceac66da9c35c7
3
  size 559
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:81ed4ec308038905cbb975b9b2ae3c7852901f7a7bb341d7393b61de64cbb13f
3
  size 559
{checkpoint-3000 β†’ checkpoint-60}/scheduler.pt RENAMED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:477f93ca257d8ff2b5b08a14ea64bd27150efc48fbd9c9ef1895a607c1cff8b5
3
  size 623
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:d173dd827ce26f584cfa95a40de1490d99e5852c0a8f273527eda424dd24205a
3
  size 623
checkpoint-60/trainer_state.json ADDED
@@ -0,0 +1,79 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "best_metric": null,
3
+ "best_model_checkpoint": null,
4
+ "epoch": 0.7947019867549668,
5
+ "global_step": 60,
6
+ "is_hyper_param_search": false,
7
+ "is_local_process_zero": true,
8
+ "is_world_process_zero": true,
9
+ "log_history": [
10
+ {
11
+ "epoch": 0.13,
12
+ "learning_rate": 4.9999999999999996e-05,
13
+ "loss": 13.2048,
14
+ "step": 10
15
+ },
16
+ {
17
+ "epoch": 0.26,
18
+ "learning_rate": 7.46938775510204e-05,
19
+ "loss": 26.5657,
20
+ "step": 20
21
+ },
22
+ {
23
+ "epoch": 0.26,
24
+ "eval_loss": 17.199691772460938,
25
+ "eval_runtime": 5.248,
26
+ "eval_samples_per_second": 18.674,
27
+ "eval_steps_per_second": 0.762,
28
+ "eval_wer": 1.0,
29
+ "step": 20
30
+ },
31
+ {
32
+ "epoch": 0.4,
33
+ "learning_rate": 7.36734693877551e-05,
34
+ "loss": 8.0091,
35
+ "step": 30
36
+ },
37
+ {
38
+ "epoch": 0.53,
39
+ "learning_rate": 7.265306122448979e-05,
40
+ "loss": 7.6332,
41
+ "step": 40
42
+ },
43
+ {
44
+ "epoch": 0.53,
45
+ "eval_loss": 4.783994197845459,
46
+ "eval_runtime": 4.9818,
47
+ "eval_samples_per_second": 19.672,
48
+ "eval_steps_per_second": 0.803,
49
+ "eval_wer": 1.0,
50
+ "step": 40
51
+ },
52
+ {
53
+ "epoch": 0.66,
54
+ "learning_rate": 7.163265306122449e-05,
55
+ "loss": 3.8261,
56
+ "step": 50
57
+ },
58
+ {
59
+ "epoch": 0.79,
60
+ "learning_rate": 7.061224489795918e-05,
61
+ "loss": 3.8717,
62
+ "step": 60
63
+ },
64
+ {
65
+ "epoch": 0.79,
66
+ "eval_loss": 3.6783852577209473,
67
+ "eval_runtime": 4.9141,
68
+ "eval_samples_per_second": 19.943,
69
+ "eval_steps_per_second": 0.814,
70
+ "eval_wer": 1.0,
71
+ "step": 60
72
+ }
73
+ ],
74
+ "max_steps": 750,
75
+ "num_train_epochs": 10,
76
+ "total_flos": 1.21965615465984e+18,
77
+ "trial_name": null,
78
+ "trial_params": null
79
+ }
{checkpoint-3000 β†’ checkpoint-60}/training_args.bin RENAMED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:962dbb61fb0395a025f0dae3475a50ad9719299753b3a01bede5cb391a4aca78
3
  size 3055
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:470abf25a211ef5ec3a9821c884f2749839bee5e95f633b168d26f90c1809409
3
  size 3055
pytorch_model.bin CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:7825b23117146d129b9fbfae78e758d25f4f6e94f91b7f4977dd069a0f502f81
3
  size 1262063089
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:ed09bae03da69e6e7d54c45ff801a7cd8e65f5bb600585b021024798e5fafa4a
3
  size 1262063089