lsb commited on
Commit
9351fd8
β€’
1 Parent(s): 070b684
checkpoint-1000/trainer_state.json DELETED
@@ -1,1234 +0,0 @@
1
- {
2
- "best_metric": null,
3
- "best_model_checkpoint": null,
4
- "epoch": 0.18625442354255914,
5
- "global_step": 1000,
6
- "is_hyper_param_search": false,
7
- "is_local_process_zero": true,
8
- "is_world_process_zero": true,
9
- "log_history": [
10
- {
11
- "epoch": 0.0,
12
- "learning_rate": 1e-08,
13
- "loss": 24.0683,
14
- "step": 5
15
- },
16
- {
17
- "epoch": 0.0,
18
- "learning_rate": 3.5e-08,
19
- "loss": 17.9067,
20
- "step": 10
21
- },
22
- {
23
- "epoch": 0.0,
24
- "learning_rate": 6.000000000000001e-08,
25
- "loss": 16.9004,
26
- "step": 15
27
- },
28
- {
29
- "epoch": 0.0,
30
- "learning_rate": 8.500000000000001e-08,
31
- "loss": 16.8411,
32
- "step": 20
33
- },
34
- {
35
- "epoch": 0.0,
36
- "learning_rate": 1.1e-07,
37
- "loss": 15.0697,
38
- "step": 25
39
- },
40
- {
41
- "epoch": 0.01,
42
- "learning_rate": 1.35e-07,
43
- "loss": 15.2061,
44
- "step": 30
45
- },
46
- {
47
- "epoch": 0.01,
48
- "learning_rate": 1.6e-07,
49
- "loss": 14.8504,
50
- "step": 35
51
- },
52
- {
53
- "epoch": 0.01,
54
- "learning_rate": 1.85e-07,
55
- "loss": 14.6816,
56
- "step": 40
57
- },
58
- {
59
- "epoch": 0.01,
60
- "learning_rate": 2.1000000000000003e-07,
61
- "loss": 14.9665,
62
- "step": 45
63
- },
64
- {
65
- "epoch": 0.01,
66
- "learning_rate": 2.3500000000000003e-07,
67
- "loss": 14.3427,
68
- "step": 50
69
- },
70
- {
71
- "epoch": 0.01,
72
- "learning_rate": 2.6e-07,
73
- "loss": 18.4574,
74
- "step": 55
75
- },
76
- {
77
- "epoch": 0.01,
78
- "learning_rate": 2.8e-07,
79
- "loss": 18.379,
80
- "step": 60
81
- },
82
- {
83
- "epoch": 0.01,
84
- "learning_rate": 3.0500000000000004e-07,
85
- "loss": 17.7823,
86
- "step": 65
87
- },
88
- {
89
- "epoch": 0.01,
90
- "learning_rate": 3.3e-07,
91
- "loss": 15.2998,
92
- "step": 70
93
- },
94
- {
95
- "epoch": 0.01,
96
- "learning_rate": 3.55e-07,
97
- "loss": 15.2838,
98
- "step": 75
99
- },
100
- {
101
- "epoch": 0.01,
102
- "learning_rate": 3.8e-07,
103
- "loss": 14.8496,
104
- "step": 80
105
- },
106
- {
107
- "epoch": 0.02,
108
- "learning_rate": 4.0500000000000004e-07,
109
- "loss": 14.3004,
110
- "step": 85
111
- },
112
- {
113
- "epoch": 0.02,
114
- "learning_rate": 4.3e-07,
115
- "loss": 14.2005,
116
- "step": 90
117
- },
118
- {
119
- "epoch": 0.02,
120
- "learning_rate": 4.5500000000000004e-07,
121
- "loss": 14.5054,
122
- "step": 95
123
- },
124
- {
125
- "epoch": 0.02,
126
- "learning_rate": 4.800000000000001e-07,
127
- "loss": 14.3888,
128
- "step": 100
129
- },
130
- {
131
- "epoch": 0.02,
132
- "learning_rate": 5.05e-07,
133
- "loss": 17.3825,
134
- "step": 105
135
- },
136
- {
137
- "epoch": 0.02,
138
- "learning_rate": 5.3e-07,
139
- "loss": 17.0812,
140
- "step": 110
141
- },
142
- {
143
- "epoch": 0.02,
144
- "learning_rate": 5.550000000000001e-07,
145
- "loss": 17.0701,
146
- "step": 115
147
- },
148
- {
149
- "epoch": 0.02,
150
- "learning_rate": 5.800000000000001e-07,
151
- "loss": 14.9025,
152
- "step": 120
153
- },
154
- {
155
- "epoch": 0.02,
156
- "learning_rate": 6.05e-07,
157
- "loss": 14.4225,
158
- "step": 125
159
- },
160
- {
161
- "epoch": 0.02,
162
- "learning_rate": 6.3e-07,
163
- "loss": 13.9425,
164
- "step": 130
165
- },
166
- {
167
- "epoch": 0.03,
168
- "learning_rate": 6.550000000000001e-07,
169
- "loss": 13.8037,
170
- "step": 135
171
- },
172
- {
173
- "epoch": 0.03,
174
- "learning_rate": 6.800000000000001e-07,
175
- "loss": 13.4372,
176
- "step": 140
177
- },
178
- {
179
- "epoch": 0.03,
180
- "learning_rate": 7.05e-07,
181
- "loss": 13.6807,
182
- "step": 145
183
- },
184
- {
185
- "epoch": 0.03,
186
- "learning_rate": 7.3e-07,
187
- "loss": 13.742,
188
- "step": 150
189
- },
190
- {
191
- "epoch": 0.03,
192
- "learning_rate": 7.550000000000001e-07,
193
- "loss": 15.1545,
194
- "step": 155
195
- },
196
- {
197
- "epoch": 0.03,
198
- "learning_rate": 7.8e-07,
199
- "loss": 16.1792,
200
- "step": 160
201
- },
202
- {
203
- "epoch": 0.03,
204
- "learning_rate": 8.000000000000001e-07,
205
- "loss": 14.6018,
206
- "step": 165
207
- },
208
- {
209
- "epoch": 0.03,
210
- "learning_rate": 8.250000000000001e-07,
211
- "loss": 12.6032,
212
- "step": 170
213
- },
214
- {
215
- "epoch": 0.03,
216
- "learning_rate": 8.500000000000001e-07,
217
- "loss": 13.0217,
218
- "step": 175
219
- },
220
- {
221
- "epoch": 0.03,
222
- "learning_rate": 8.75e-07,
223
- "loss": 12.2336,
224
- "step": 180
225
- },
226
- {
227
- "epoch": 0.03,
228
- "learning_rate": 9.000000000000001e-07,
229
- "loss": 11.8643,
230
- "step": 185
231
- },
232
- {
233
- "epoch": 0.04,
234
- "learning_rate": 9.25e-07,
235
- "loss": 11.8572,
236
- "step": 190
237
- },
238
- {
239
- "epoch": 0.04,
240
- "learning_rate": 9.500000000000001e-07,
241
- "loss": 10.8976,
242
- "step": 195
243
- },
244
- {
245
- "epoch": 0.04,
246
- "learning_rate": 9.750000000000002e-07,
247
- "loss": 12.2617,
248
- "step": 200
249
- },
250
- {
251
- "epoch": 0.04,
252
- "learning_rate": 1.0000000000000002e-06,
253
- "loss": 13.7753,
254
- "step": 205
255
- },
256
- {
257
- "epoch": 0.04,
258
- "learning_rate": 1.025e-06,
259
- "loss": 11.6137,
260
- "step": 210
261
- },
262
- {
263
- "epoch": 0.04,
264
- "learning_rate": 1.0500000000000001e-06,
265
- "loss": 10.6402,
266
- "step": 215
267
- },
268
- {
269
- "epoch": 0.04,
270
- "learning_rate": 1.075e-06,
271
- "loss": 12.9591,
272
- "step": 220
273
- },
274
- {
275
- "epoch": 0.04,
276
- "learning_rate": 1.1e-06,
277
- "loss": 9.8613,
278
- "step": 225
279
- },
280
- {
281
- "epoch": 0.04,
282
- "learning_rate": 1.125e-06,
283
- "loss": 10.487,
284
- "step": 230
285
- },
286
- {
287
- "epoch": 0.04,
288
- "learning_rate": 1.1500000000000002e-06,
289
- "loss": 9.3773,
290
- "step": 235
291
- },
292
- {
293
- "epoch": 0.04,
294
- "learning_rate": 1.175e-06,
295
- "loss": 9.5665,
296
- "step": 240
297
- },
298
- {
299
- "epoch": 0.05,
300
- "learning_rate": 1.2000000000000002e-06,
301
- "loss": 8.2941,
302
- "step": 245
303
- },
304
- {
305
- "epoch": 0.05,
306
- "learning_rate": 1.2250000000000001e-06,
307
- "loss": 8.5563,
308
- "step": 250
309
- },
310
- {
311
- "epoch": 0.05,
312
- "learning_rate": 1.25e-06,
313
- "loss": 10.4941,
314
- "step": 255
315
- },
316
- {
317
- "epoch": 0.05,
318
- "learning_rate": 1.275e-06,
319
- "loss": 8.4986,
320
- "step": 260
321
- },
322
- {
323
- "epoch": 0.05,
324
- "learning_rate": 1.3e-06,
325
- "loss": 10.2295,
326
- "step": 265
327
- },
328
- {
329
- "epoch": 0.05,
330
- "learning_rate": 1.3250000000000002e-06,
331
- "loss": 8.7026,
332
- "step": 270
333
- },
334
- {
335
- "epoch": 0.05,
336
- "learning_rate": 1.3500000000000002e-06,
337
- "loss": 8.6009,
338
- "step": 275
339
- },
340
- {
341
- "epoch": 0.05,
342
- "learning_rate": 1.3750000000000002e-06,
343
- "loss": 7.7613,
344
- "step": 280
345
- },
346
- {
347
- "epoch": 0.05,
348
- "learning_rate": 1.4000000000000001e-06,
349
- "loss": 7.8609,
350
- "step": 285
351
- },
352
- {
353
- "epoch": 0.05,
354
- "learning_rate": 1.425e-06,
355
- "loss": 7.0097,
356
- "step": 290
357
- },
358
- {
359
- "epoch": 0.05,
360
- "learning_rate": 1.45e-06,
361
- "loss": 5.5692,
362
- "step": 295
363
- },
364
- {
365
- "epoch": 0.06,
366
- "learning_rate": 1.475e-06,
367
- "loss": 5.6402,
368
- "step": 300
369
- },
370
- {
371
- "epoch": 0.06,
372
- "learning_rate": 1.5e-06,
373
- "loss": 9.0815,
374
- "step": 305
375
- },
376
- {
377
- "epoch": 0.06,
378
- "learning_rate": 1.525e-06,
379
- "loss": 8.0803,
380
- "step": 310
381
- },
382
- {
383
- "epoch": 0.06,
384
- "learning_rate": 1.5500000000000002e-06,
385
- "loss": 7.6229,
386
- "step": 315
387
- },
388
- {
389
- "epoch": 0.06,
390
- "learning_rate": 1.5750000000000002e-06,
391
- "loss": 5.5354,
392
- "step": 320
393
- },
394
- {
395
- "epoch": 0.06,
396
- "learning_rate": 1.6000000000000001e-06,
397
- "loss": 8.1564,
398
- "step": 325
399
- },
400
- {
401
- "epoch": 0.06,
402
- "learning_rate": 1.6250000000000001e-06,
403
- "loss": 7.0378,
404
- "step": 330
405
- },
406
- {
407
- "epoch": 0.06,
408
- "learning_rate": 1.6500000000000003e-06,
409
- "loss": 5.3662,
410
- "step": 335
411
- },
412
- {
413
- "epoch": 0.06,
414
- "learning_rate": 1.6750000000000003e-06,
415
- "loss": 7.6436,
416
- "step": 340
417
- },
418
- {
419
- "epoch": 0.06,
420
- "learning_rate": 1.7000000000000002e-06,
421
- "loss": 5.3403,
422
- "step": 345
423
- },
424
- {
425
- "epoch": 0.07,
426
- "learning_rate": 1.725e-06,
427
- "loss": 8.1018,
428
- "step": 350
429
- },
430
- {
431
- "epoch": 0.07,
432
- "learning_rate": 1.745e-06,
433
- "loss": 7.5364,
434
- "step": 355
435
- },
436
- {
437
- "epoch": 0.07,
438
- "learning_rate": 1.77e-06,
439
- "loss": 5.2764,
440
- "step": 360
441
- },
442
- {
443
- "epoch": 0.07,
444
- "learning_rate": 1.7950000000000002e-06,
445
- "loss": 6.4889,
446
- "step": 365
447
- },
448
- {
449
- "epoch": 0.07,
450
- "learning_rate": 1.8200000000000002e-06,
451
- "loss": 5.1625,
452
- "step": 370
453
- },
454
- {
455
- "epoch": 0.07,
456
- "learning_rate": 1.8450000000000001e-06,
457
- "loss": 7.4417,
458
- "step": 375
459
- },
460
- {
461
- "epoch": 0.07,
462
- "learning_rate": 1.87e-06,
463
- "loss": 4.8924,
464
- "step": 380
465
- },
466
- {
467
- "epoch": 0.07,
468
- "learning_rate": 1.895e-06,
469
- "loss": 9.9698,
470
- "step": 385
471
- },
472
- {
473
- "epoch": 0.07,
474
- "learning_rate": 1.9200000000000003e-06,
475
- "loss": 7.4896,
476
- "step": 390
477
- },
478
- {
479
- "epoch": 0.07,
480
- "learning_rate": 1.945e-06,
481
- "loss": 6.7582,
482
- "step": 395
483
- },
484
- {
485
- "epoch": 0.07,
486
- "learning_rate": 1.97e-06,
487
- "loss": 6.0409,
488
- "step": 400
489
- },
490
- {
491
- "epoch": 0.08,
492
- "learning_rate": 1.9950000000000004e-06,
493
- "loss": 7.4142,
494
- "step": 405
495
- },
496
- {
497
- "epoch": 0.08,
498
- "learning_rate": 2.02e-06,
499
- "loss": 7.9007,
500
- "step": 410
501
- },
502
- {
503
- "epoch": 0.08,
504
- "learning_rate": 2.045e-06,
505
- "loss": 5.15,
506
- "step": 415
507
- },
508
- {
509
- "epoch": 0.08,
510
- "learning_rate": 2.07e-06,
511
- "loss": 5.6287,
512
- "step": 420
513
- },
514
- {
515
- "epoch": 0.08,
516
- "learning_rate": 2.0950000000000003e-06,
517
- "loss": 4.7277,
518
- "step": 425
519
- },
520
- {
521
- "epoch": 0.08,
522
- "learning_rate": 2.12e-06,
523
- "loss": 4.9955,
524
- "step": 430
525
- },
526
- {
527
- "epoch": 0.08,
528
- "learning_rate": 2.1450000000000002e-06,
529
- "loss": 6.4111,
530
- "step": 435
531
- },
532
- {
533
- "epoch": 0.08,
534
- "learning_rate": 2.17e-06,
535
- "loss": 5.7243,
536
- "step": 440
537
- },
538
- {
539
- "epoch": 0.08,
540
- "learning_rate": 2.195e-06,
541
- "loss": 5.3146,
542
- "step": 445
543
- },
544
- {
545
- "epoch": 0.08,
546
- "learning_rate": 2.2200000000000003e-06,
547
- "loss": 4.8272,
548
- "step": 450
549
- },
550
- {
551
- "epoch": 0.08,
552
- "learning_rate": 2.245e-06,
553
- "loss": 6.3851,
554
- "step": 455
555
- },
556
- {
557
- "epoch": 0.09,
558
- "learning_rate": 2.2700000000000003e-06,
559
- "loss": 5.0858,
560
- "step": 460
561
- },
562
- {
563
- "epoch": 0.09,
564
- "learning_rate": 2.2950000000000005e-06,
565
- "loss": 13.9773,
566
- "step": 465
567
- },
568
- {
569
- "epoch": 0.09,
570
- "learning_rate": 2.3200000000000002e-06,
571
- "loss": 5.1323,
572
- "step": 470
573
- },
574
- {
575
- "epoch": 0.09,
576
- "learning_rate": 2.345e-06,
577
- "loss": 4.5519,
578
- "step": 475
579
- },
580
- {
581
- "epoch": 0.09,
582
- "learning_rate": 2.37e-06,
583
- "loss": 4.9828,
584
- "step": 480
585
- },
586
- {
587
- "epoch": 0.09,
588
- "learning_rate": 2.395e-06,
589
- "loss": 10.9601,
590
- "step": 485
591
- },
592
- {
593
- "epoch": 0.09,
594
- "learning_rate": 2.42e-06,
595
- "loss": 4.715,
596
- "step": 490
597
- },
598
- {
599
- "epoch": 0.09,
600
- "learning_rate": 2.4450000000000003e-06,
601
- "loss": 6.1112,
602
- "step": 495
603
- },
604
- {
605
- "epoch": 0.09,
606
- "learning_rate": 2.47e-06,
607
- "loss": 4.8677,
608
- "step": 500
609
- },
610
- {
611
- "epoch": 0.09,
612
- "eval_loss": 6.384158611297607,
613
- "eval_runtime": 97.6612,
614
- "eval_samples_per_second": 12.226,
615
- "eval_steps_per_second": 1.536,
616
- "eval_wer": 1.0,
617
- "step": 500
618
- },
619
- {
620
- "epoch": 0.09,
621
- "learning_rate": 2.4950000000000003e-06,
622
- "loss": 5.6367,
623
- "step": 505
624
- },
625
- {
626
- "epoch": 0.09,
627
- "learning_rate": 2.52e-06,
628
- "loss": 4.798,
629
- "step": 510
630
- },
631
- {
632
- "epoch": 0.1,
633
- "learning_rate": 2.545e-06,
634
- "loss": 4.7321,
635
- "step": 515
636
- },
637
- {
638
- "epoch": 0.1,
639
- "learning_rate": 2.5700000000000004e-06,
640
- "loss": 9.0061,
641
- "step": 520
642
- },
643
- {
644
- "epoch": 0.1,
645
- "learning_rate": 2.595e-06,
646
- "loss": 4.5565,
647
- "step": 525
648
- },
649
- {
650
- "epoch": 0.1,
651
- "learning_rate": 2.6200000000000003e-06,
652
- "loss": 4.9119,
653
- "step": 530
654
- },
655
- {
656
- "epoch": 0.1,
657
- "learning_rate": 2.6450000000000005e-06,
658
- "loss": 4.6625,
659
- "step": 535
660
- },
661
- {
662
- "epoch": 0.1,
663
- "learning_rate": 2.6700000000000003e-06,
664
- "loss": 4.4122,
665
- "step": 540
666
- },
667
- {
668
- "epoch": 0.1,
669
- "learning_rate": 2.6950000000000005e-06,
670
- "loss": 6.515,
671
- "step": 545
672
- },
673
- {
674
- "epoch": 0.1,
675
- "learning_rate": 2.7200000000000002e-06,
676
- "loss": 6.2983,
677
- "step": 550
678
- },
679
- {
680
- "epoch": 0.1,
681
- "learning_rate": 2.7450000000000004e-06,
682
- "loss": 7.8164,
683
- "step": 555
684
- },
685
- {
686
- "epoch": 0.1,
687
- "learning_rate": 2.7700000000000006e-06,
688
- "loss": 5.5597,
689
- "step": 560
690
- },
691
- {
692
- "epoch": 0.11,
693
- "learning_rate": 2.7950000000000003e-06,
694
- "loss": 4.1535,
695
- "step": 565
696
- },
697
- {
698
- "epoch": 0.11,
699
- "learning_rate": 2.82e-06,
700
- "loss": 5.9433,
701
- "step": 570
702
- },
703
- {
704
- "epoch": 0.11,
705
- "learning_rate": 2.845e-06,
706
- "loss": 4.8775,
707
- "step": 575
708
- },
709
- {
710
- "epoch": 0.11,
711
- "learning_rate": 2.87e-06,
712
- "loss": 6.3474,
713
- "step": 580
714
- },
715
- {
716
- "epoch": 0.11,
717
- "learning_rate": 2.8950000000000002e-06,
718
- "loss": 8.6641,
719
- "step": 585
720
- },
721
- {
722
- "epoch": 0.11,
723
- "learning_rate": 2.92e-06,
724
- "loss": 6.0309,
725
- "step": 590
726
- },
727
- {
728
- "epoch": 0.11,
729
- "learning_rate": 2.945e-06,
730
- "loss": 4.1472,
731
- "step": 595
732
- },
733
- {
734
- "epoch": 0.11,
735
- "learning_rate": 2.97e-06,
736
- "loss": 4.5871,
737
- "step": 600
738
- },
739
- {
740
- "epoch": 0.11,
741
- "learning_rate": 2.995e-06,
742
- "loss": 5.0069,
743
- "step": 605
744
- },
745
- {
746
- "epoch": 0.11,
747
- "learning_rate": 3.0200000000000003e-06,
748
- "loss": 4.694,
749
- "step": 610
750
- },
751
- {
752
- "epoch": 0.11,
753
- "learning_rate": 3.045e-06,
754
- "loss": 4.502,
755
- "step": 615
756
- },
757
- {
758
- "epoch": 0.12,
759
- "learning_rate": 3.0700000000000003e-06,
760
- "loss": 7.2696,
761
- "step": 620
762
- },
763
- {
764
- "epoch": 0.12,
765
- "learning_rate": 3.0950000000000004e-06,
766
- "loss": 4.5793,
767
- "step": 625
768
- },
769
- {
770
- "epoch": 0.12,
771
- "learning_rate": 3.12e-06,
772
- "loss": 4.1937,
773
- "step": 630
774
- },
775
- {
776
- "epoch": 0.12,
777
- "learning_rate": 3.1450000000000004e-06,
778
- "loss": 6.3539,
779
- "step": 635
780
- },
781
- {
782
- "epoch": 0.12,
783
- "learning_rate": 3.17e-06,
784
- "loss": 4.1621,
785
- "step": 640
786
- },
787
- {
788
- "epoch": 0.12,
789
- "learning_rate": 3.1950000000000003e-06,
790
- "loss": 4.4824,
791
- "step": 645
792
- },
793
- {
794
- "epoch": 0.12,
795
- "learning_rate": 3.2200000000000005e-06,
796
- "loss": 4.4693,
797
- "step": 650
798
- },
799
- {
800
- "epoch": 0.12,
801
- "learning_rate": 3.2450000000000003e-06,
802
- "loss": 5.6584,
803
- "step": 655
804
- },
805
- {
806
- "epoch": 0.12,
807
- "learning_rate": 3.2700000000000005e-06,
808
- "loss": 4.813,
809
- "step": 660
810
- },
811
- {
812
- "epoch": 0.12,
813
- "learning_rate": 3.2950000000000002e-06,
814
- "loss": 6.1425,
815
- "step": 665
816
- },
817
- {
818
- "epoch": 0.12,
819
- "learning_rate": 3.3200000000000004e-06,
820
- "loss": 7.5331,
821
- "step": 670
822
- },
823
- {
824
- "epoch": 0.13,
825
- "learning_rate": 3.3450000000000006e-06,
826
- "loss": 9.772,
827
- "step": 675
828
- },
829
- {
830
- "epoch": 0.13,
831
- "learning_rate": 3.3700000000000003e-06,
832
- "loss": 4.1922,
833
- "step": 680
834
- },
835
- {
836
- "epoch": 0.13,
837
- "learning_rate": 3.3950000000000005e-06,
838
- "loss": 3.8468,
839
- "step": 685
840
- },
841
- {
842
- "epoch": 0.13,
843
- "learning_rate": 3.4200000000000007e-06,
844
- "loss": 3.5808,
845
- "step": 690
846
- },
847
- {
848
- "epoch": 0.13,
849
- "learning_rate": 3.445e-06,
850
- "loss": 5.5091,
851
- "step": 695
852
- },
853
- {
854
- "epoch": 0.13,
855
- "learning_rate": 3.4700000000000002e-06,
856
- "loss": 3.768,
857
- "step": 700
858
- },
859
- {
860
- "epoch": 0.13,
861
- "learning_rate": 3.495e-06,
862
- "loss": 4.9197,
863
- "step": 705
864
- },
865
- {
866
- "epoch": 0.13,
867
- "learning_rate": 3.52e-06,
868
- "loss": 4.1167,
869
- "step": 710
870
- },
871
- {
872
- "epoch": 0.13,
873
- "learning_rate": 3.545e-06,
874
- "loss": 7.1805,
875
- "step": 715
876
- },
877
- {
878
- "epoch": 0.13,
879
- "learning_rate": 3.57e-06,
880
- "loss": 7.7838,
881
- "step": 720
882
- },
883
- {
884
- "epoch": 0.14,
885
- "learning_rate": 3.5950000000000003e-06,
886
- "loss": 6.4851,
887
- "step": 725
888
- },
889
- {
890
- "epoch": 0.14,
891
- "learning_rate": 3.62e-06,
892
- "loss": 4.2357,
893
- "step": 730
894
- },
895
- {
896
- "epoch": 0.14,
897
- "learning_rate": 3.6450000000000003e-06,
898
- "loss": 5.7078,
899
- "step": 735
900
- },
901
- {
902
- "epoch": 0.14,
903
- "learning_rate": 3.6700000000000004e-06,
904
- "loss": 5.5984,
905
- "step": 740
906
- },
907
- {
908
- "epoch": 0.14,
909
- "learning_rate": 3.695e-06,
910
- "loss": 5.8425,
911
- "step": 745
912
- },
913
- {
914
- "epoch": 0.14,
915
- "learning_rate": 3.7200000000000004e-06,
916
- "loss": 3.9582,
917
- "step": 750
918
- },
919
- {
920
- "epoch": 0.14,
921
- "learning_rate": 3.745e-06,
922
- "loss": 6.1284,
923
- "step": 755
924
- },
925
- {
926
- "epoch": 0.14,
927
- "learning_rate": 3.7700000000000003e-06,
928
- "loss": 6.4174,
929
- "step": 760
930
- },
931
- {
932
- "epoch": 0.14,
933
- "learning_rate": 3.7950000000000005e-06,
934
- "loss": 4.0111,
935
- "step": 765
936
- },
937
- {
938
- "epoch": 0.14,
939
- "learning_rate": 3.820000000000001e-06,
940
- "loss": 6.0067,
941
- "step": 770
942
- },
943
- {
944
- "epoch": 0.14,
945
- "learning_rate": 3.8450000000000005e-06,
946
- "loss": 5.5284,
947
- "step": 775
948
- },
949
- {
950
- "epoch": 0.15,
951
- "learning_rate": 3.87e-06,
952
- "loss": 5.7256,
953
- "step": 780
954
- },
955
- {
956
- "epoch": 0.15,
957
- "learning_rate": 3.895000000000001e-06,
958
- "loss": 5.4687,
959
- "step": 785
960
- },
961
- {
962
- "epoch": 0.15,
963
- "learning_rate": 3.920000000000001e-06,
964
- "loss": 5.5711,
965
- "step": 790
966
- },
967
- {
968
- "epoch": 0.15,
969
- "learning_rate": 3.945e-06,
970
- "loss": 3.9495,
971
- "step": 795
972
- },
973
- {
974
- "epoch": 0.15,
975
- "learning_rate": 3.97e-06,
976
- "loss": 7.5433,
977
- "step": 800
978
- },
979
- {
980
- "epoch": 0.15,
981
- "learning_rate": 3.995000000000001e-06,
982
- "loss": 6.6596,
983
- "step": 805
984
- },
985
- {
986
- "epoch": 0.15,
987
- "learning_rate": 4.0200000000000005e-06,
988
- "loss": 3.8816,
989
- "step": 810
990
- },
991
- {
992
- "epoch": 0.15,
993
- "learning_rate": 4.045e-06,
994
- "loss": 6.0984,
995
- "step": 815
996
- },
997
- {
998
- "epoch": 0.15,
999
- "learning_rate": 4.07e-06,
1000
- "loss": 3.6398,
1001
- "step": 820
1002
- },
1003
- {
1004
- "epoch": 0.15,
1005
- "learning_rate": 4.095e-06,
1006
- "loss": 6.2519,
1007
- "step": 825
1008
- },
1009
- {
1010
- "epoch": 0.15,
1011
- "learning_rate": 4.12e-06,
1012
- "loss": 5.4348,
1013
- "step": 830
1014
- },
1015
- {
1016
- "epoch": 0.16,
1017
- "learning_rate": 4.145e-06,
1018
- "loss": 5.7349,
1019
- "step": 835
1020
- },
1021
- {
1022
- "epoch": 0.16,
1023
- "learning_rate": 4.17e-06,
1024
- "loss": 5.6213,
1025
- "step": 840
1026
- },
1027
- {
1028
- "epoch": 0.16,
1029
- "learning_rate": 4.1950000000000005e-06,
1030
- "loss": 3.7084,
1031
- "step": 845
1032
- },
1033
- {
1034
- "epoch": 0.16,
1035
- "learning_rate": 4.22e-06,
1036
- "loss": 7.2329,
1037
- "step": 850
1038
- },
1039
- {
1040
- "epoch": 0.16,
1041
- "learning_rate": 4.245e-06,
1042
- "loss": 5.7584,
1043
- "step": 855
1044
- },
1045
- {
1046
- "epoch": 0.16,
1047
- "learning_rate": 4.270000000000001e-06,
1048
- "loss": 6.9726,
1049
- "step": 860
1050
- },
1051
- {
1052
- "epoch": 0.16,
1053
- "learning_rate": 4.295e-06,
1054
- "loss": 3.7044,
1055
- "step": 865
1056
- },
1057
- {
1058
- "epoch": 0.16,
1059
- "learning_rate": 4.32e-06,
1060
- "loss": 4.0139,
1061
- "step": 870
1062
- },
1063
- {
1064
- "epoch": 0.16,
1065
- "learning_rate": 4.345000000000001e-06,
1066
- "loss": 6.19,
1067
- "step": 875
1068
- },
1069
- {
1070
- "epoch": 0.16,
1071
- "learning_rate": 4.3700000000000005e-06,
1072
- "loss": 6.5308,
1073
- "step": 880
1074
- },
1075
- {
1076
- "epoch": 0.16,
1077
- "learning_rate": 4.395e-06,
1078
- "loss": 5.7261,
1079
- "step": 885
1080
- },
1081
- {
1082
- "epoch": 0.17,
1083
- "learning_rate": 4.42e-06,
1084
- "loss": 3.8616,
1085
- "step": 890
1086
- },
1087
- {
1088
- "epoch": 0.17,
1089
- "learning_rate": 4.445000000000001e-06,
1090
- "loss": 3.4939,
1091
- "step": 895
1092
- },
1093
- {
1094
- "epoch": 0.17,
1095
- "learning_rate": 4.47e-06,
1096
- "loss": 3.5178,
1097
- "step": 900
1098
- },
1099
- {
1100
- "epoch": 0.17,
1101
- "learning_rate": 4.495e-06,
1102
- "loss": 3.6419,
1103
- "step": 905
1104
- },
1105
- {
1106
- "epoch": 0.17,
1107
- "learning_rate": 4.520000000000001e-06,
1108
- "loss": 8.8566,
1109
- "step": 910
1110
- },
1111
- {
1112
- "epoch": 0.17,
1113
- "learning_rate": 4.5450000000000005e-06,
1114
- "loss": 3.7769,
1115
- "step": 915
1116
- },
1117
- {
1118
- "epoch": 0.17,
1119
- "learning_rate": 4.57e-06,
1120
- "loss": 3.6874,
1121
- "step": 920
1122
- },
1123
- {
1124
- "epoch": 0.17,
1125
- "learning_rate": 4.595000000000001e-06,
1126
- "loss": 5.7525,
1127
- "step": 925
1128
- },
1129
- {
1130
- "epoch": 0.17,
1131
- "learning_rate": 4.620000000000001e-06,
1132
- "loss": 5.7784,
1133
- "step": 930
1134
- },
1135
- {
1136
- "epoch": 0.17,
1137
- "learning_rate": 4.645e-06,
1138
- "loss": 5.0083,
1139
- "step": 935
1140
- },
1141
- {
1142
- "epoch": 0.18,
1143
- "learning_rate": 4.670000000000001e-06,
1144
- "loss": 3.4095,
1145
- "step": 940
1146
- },
1147
- {
1148
- "epoch": 0.18,
1149
- "learning_rate": 4.695e-06,
1150
- "loss": 3.212,
1151
- "step": 945
1152
- },
1153
- {
1154
- "epoch": 0.18,
1155
- "learning_rate": 4.7200000000000005e-06,
1156
- "loss": 5.4597,
1157
- "step": 950
1158
- },
1159
- {
1160
- "epoch": 0.18,
1161
- "learning_rate": 4.745e-06,
1162
- "loss": 3.7717,
1163
- "step": 955
1164
- },
1165
- {
1166
- "epoch": 0.18,
1167
- "learning_rate": 4.77e-06,
1168
- "loss": 3.3286,
1169
- "step": 960
1170
- },
1171
- {
1172
- "epoch": 0.18,
1173
- "learning_rate": 4.795e-06,
1174
- "loss": 3.3198,
1175
- "step": 965
1176
- },
1177
- {
1178
- "epoch": 0.18,
1179
- "learning_rate": 4.8200000000000004e-06,
1180
- "loss": 3.6952,
1181
- "step": 970
1182
- },
1183
- {
1184
- "epoch": 0.18,
1185
- "learning_rate": 4.845e-06,
1186
- "loss": 3.3483,
1187
- "step": 975
1188
- },
1189
- {
1190
- "epoch": 0.18,
1191
- "learning_rate": 4.87e-06,
1192
- "loss": 4.8449,
1193
- "step": 980
1194
- },
1195
- {
1196
- "epoch": 0.18,
1197
- "learning_rate": 4.8950000000000006e-06,
1198
- "loss": 3.5239,
1199
- "step": 985
1200
- },
1201
- {
1202
- "epoch": 0.18,
1203
- "learning_rate": 4.92e-06,
1204
- "loss": 5.7172,
1205
- "step": 990
1206
- },
1207
- {
1208
- "epoch": 0.19,
1209
- "learning_rate": 4.945e-06,
1210
- "loss": 3.9009,
1211
- "step": 995
1212
- },
1213
- {
1214
- "epoch": 0.19,
1215
- "learning_rate": 4.970000000000001e-06,
1216
- "loss": 3.293,
1217
- "step": 1000
1218
- },
1219
- {
1220
- "epoch": 0.19,
1221
- "eval_loss": 5.126453876495361,
1222
- "eval_runtime": 138.3538,
1223
- "eval_samples_per_second": 8.63,
1224
- "eval_steps_per_second": 1.084,
1225
- "eval_wer": 1.0,
1226
- "step": 1000
1227
- }
1228
- ],
1229
- "max_steps": 161070,
1230
- "num_train_epochs": 30,
1231
- "total_flos": 9.309229789594176e+16,
1232
- "trial_name": null,
1233
- "trial_params": null
1234
- }
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
{checkpoint-1000 β†’ checkpoint-10000}/config.json RENAMED
File without changes
{checkpoint-1000 β†’ checkpoint-10000}/optimizer.pt RENAMED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:756d616dea6333b020fcf966b9e165b838b91da46642986c3558f9a864af2e52
3
  size 721685265
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:974c17bae60a8af69b1dfc86b972b0944c3fcc6217dc8aa5ce245127f3725de7
3
  size 721685265
{checkpoint-1000 β†’ checkpoint-10000}/preprocessor_config.json RENAMED
File without changes
{checkpoint-500 β†’ checkpoint-10000}/pytorch_model.bin RENAMED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:46317041606caab77c0ad60e704f2cc9a5a3d8fcd5be06e52d61b524fab2f16d
3
  size 377670039
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:b8a4c3b7637f1160a90257ed667413bf6fbdb3abc07a2ffde8d8c69b6a165a74
3
  size 377670039
{checkpoint-1000 β†’ checkpoint-10000}/rng_state.pth RENAMED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:b087b4fc4ab07baf9801f2880dccc210ef12179b734d6c190e30af73f5c44b1c
3
- size 14567
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:48285f1a2a5dbe8af4d92ea822a7b060c4c7506f39cfd07e1367a72cd2774924
3
+ size 14503
{checkpoint-500 β†’ checkpoint-10000}/scaler.pt RENAMED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:9e23af8c1bb1f8592677c1178bd4b2c7bc377906569378fcbd429440c1eb9f6f
3
  size 559
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:9659dd2198192a9ef7113aeaaca50ec77c302792b18f5121d119b183466afdd7
3
  size 559
{checkpoint-1000 β†’ checkpoint-10000}/scheduler.pt RENAMED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:42571f4e0abc1ba5c0353b7a97b263ea72c970799a16a8b489420ffa80a0f6b7
3
  size 623
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:00cecb50e7f9d3453ded24f5442c6f91d31645a98736fbd7ac47c63afc645e48
3
  size 623
checkpoint-10000/trainer_state.json ADDED
The diff for this file is too large to render. See raw diff
 
{checkpoint-500 β†’ checkpoint-10000}/training_args.bin RENAMED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:922e76dc3691c51d5f9343a92f5af52216f5d3cea75030305b0ccd89e28a94a7
3
  size 2991
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:ad4f6c4322fc85af69a20adc4630b589ea417dfb95da2589d88f3c258fb8b0d0
3
  size 2991
{checkpoint-500 β†’ checkpoint-10500}/config.json RENAMED
File without changes
{checkpoint-500 β†’ checkpoint-10500}/optimizer.pt RENAMED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:805063f6f76295565aa2446ad59a4471544db1f20c8e3665c94833fd00aca33a
3
  size 721685265
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:40191cc65518139b17215be1cbb9b1bee0de259609e6b3904724bb028d04cdec
3
  size 721685265
{checkpoint-500 β†’ checkpoint-10500}/preprocessor_config.json RENAMED
File without changes
{checkpoint-1000 β†’ checkpoint-10500}/pytorch_model.bin RENAMED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:10c2b84917b1857d55f5a4d2225cefc3ffa1cf416c8b2b1f2b19bb5b6706209e
3
  size 377670039
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:8343a47858e8beffda9c9f367d0f1b6648124763843a80fef290981e8fd49ee5
3
  size 377670039
{checkpoint-500 β†’ checkpoint-10500}/rng_state.pth RENAMED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:d175baed4be3fac4c9221252b16cc3e710a9904aa63cdbad7cdc197ffa7a3fcb
3
  size 14567
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:7145e306023eff99b74e97622186354f5d61aa0f2bc714993ec3776c5607656d
3
  size 14567
{checkpoint-1000 β†’ checkpoint-10500}/scaler.pt RENAMED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:020228eb3ae095194ff32686b7e36027b299ba9209b4ba69c90b4ac3aaf606cd
3
  size 559
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:fec1d4d7bb1223fd066907b3eb7435fb3b67f5c51044839039b9cd3036408e37
3
  size 559
{checkpoint-500 β†’ checkpoint-10500}/scheduler.pt RENAMED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:ffd7f7416190754d4a242098b711847951ae6b96ee11f204d1d53a1c9d36e872
3
  size 623
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:7ea3fa22f2a861825b32f5123ce426355b146d457768e67f65dfa0a6dbfb05dd
3
  size 623
checkpoint-10500/trainer_state.json ADDED
The diff for this file is too large to render. See raw diff
 
{checkpoint-1000 β†’ checkpoint-10500}/training_args.bin RENAMED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:922e76dc3691c51d5f9343a92f5af52216f5d3cea75030305b0ccd89e28a94a7
3
  size 2991
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:ad4f6c4322fc85af69a20adc4630b589ea417dfb95da2589d88f3c258fb8b0d0
3
  size 2991
checkpoint-500/trainer_state.json DELETED
@@ -1,625 +0,0 @@
1
- {
2
- "best_metric": null,
3
- "best_model_checkpoint": null,
4
- "epoch": 0.09312721177127957,
5
- "global_step": 500,
6
- "is_hyper_param_search": false,
7
- "is_local_process_zero": true,
8
- "is_world_process_zero": true,
9
- "log_history": [
10
- {
11
- "epoch": 0.0,
12
- "learning_rate": 1e-08,
13
- "loss": 24.0683,
14
- "step": 5
15
- },
16
- {
17
- "epoch": 0.0,
18
- "learning_rate": 3.5e-08,
19
- "loss": 17.9067,
20
- "step": 10
21
- },
22
- {
23
- "epoch": 0.0,
24
- "learning_rate": 6.000000000000001e-08,
25
- "loss": 16.9004,
26
- "step": 15
27
- },
28
- {
29
- "epoch": 0.0,
30
- "learning_rate": 8.500000000000001e-08,
31
- "loss": 16.8411,
32
- "step": 20
33
- },
34
- {
35
- "epoch": 0.0,
36
- "learning_rate": 1.1e-07,
37
- "loss": 15.0697,
38
- "step": 25
39
- },
40
- {
41
- "epoch": 0.01,
42
- "learning_rate": 1.35e-07,
43
- "loss": 15.2061,
44
- "step": 30
45
- },
46
- {
47
- "epoch": 0.01,
48
- "learning_rate": 1.6e-07,
49
- "loss": 14.8504,
50
- "step": 35
51
- },
52
- {
53
- "epoch": 0.01,
54
- "learning_rate": 1.85e-07,
55
- "loss": 14.6816,
56
- "step": 40
57
- },
58
- {
59
- "epoch": 0.01,
60
- "learning_rate": 2.1000000000000003e-07,
61
- "loss": 14.9665,
62
- "step": 45
63
- },
64
- {
65
- "epoch": 0.01,
66
- "learning_rate": 2.3500000000000003e-07,
67
- "loss": 14.3427,
68
- "step": 50
69
- },
70
- {
71
- "epoch": 0.01,
72
- "learning_rate": 2.6e-07,
73
- "loss": 18.4574,
74
- "step": 55
75
- },
76
- {
77
- "epoch": 0.01,
78
- "learning_rate": 2.8e-07,
79
- "loss": 18.379,
80
- "step": 60
81
- },
82
- {
83
- "epoch": 0.01,
84
- "learning_rate": 3.0500000000000004e-07,
85
- "loss": 17.7823,
86
- "step": 65
87
- },
88
- {
89
- "epoch": 0.01,
90
- "learning_rate": 3.3e-07,
91
- "loss": 15.2998,
92
- "step": 70
93
- },
94
- {
95
- "epoch": 0.01,
96
- "learning_rate": 3.55e-07,
97
- "loss": 15.2838,
98
- "step": 75
99
- },
100
- {
101
- "epoch": 0.01,
102
- "learning_rate": 3.8e-07,
103
- "loss": 14.8496,
104
- "step": 80
105
- },
106
- {
107
- "epoch": 0.02,
108
- "learning_rate": 4.0500000000000004e-07,
109
- "loss": 14.3004,
110
- "step": 85
111
- },
112
- {
113
- "epoch": 0.02,
114
- "learning_rate": 4.3e-07,
115
- "loss": 14.2005,
116
- "step": 90
117
- },
118
- {
119
- "epoch": 0.02,
120
- "learning_rate": 4.5500000000000004e-07,
121
- "loss": 14.5054,
122
- "step": 95
123
- },
124
- {
125
- "epoch": 0.02,
126
- "learning_rate": 4.800000000000001e-07,
127
- "loss": 14.3888,
128
- "step": 100
129
- },
130
- {
131
- "epoch": 0.02,
132
- "learning_rate": 5.05e-07,
133
- "loss": 17.3825,
134
- "step": 105
135
- },
136
- {
137
- "epoch": 0.02,
138
- "learning_rate": 5.3e-07,
139
- "loss": 17.0812,
140
- "step": 110
141
- },
142
- {
143
- "epoch": 0.02,
144
- "learning_rate": 5.550000000000001e-07,
145
- "loss": 17.0701,
146
- "step": 115
147
- },
148
- {
149
- "epoch": 0.02,
150
- "learning_rate": 5.800000000000001e-07,
151
- "loss": 14.9025,
152
- "step": 120
153
- },
154
- {
155
- "epoch": 0.02,
156
- "learning_rate": 6.05e-07,
157
- "loss": 14.4225,
158
- "step": 125
159
- },
160
- {
161
- "epoch": 0.02,
162
- "learning_rate": 6.3e-07,
163
- "loss": 13.9425,
164
- "step": 130
165
- },
166
- {
167
- "epoch": 0.03,
168
- "learning_rate": 6.550000000000001e-07,
169
- "loss": 13.8037,
170
- "step": 135
171
- },
172
- {
173
- "epoch": 0.03,
174
- "learning_rate": 6.800000000000001e-07,
175
- "loss": 13.4372,
176
- "step": 140
177
- },
178
- {
179
- "epoch": 0.03,
180
- "learning_rate": 7.05e-07,
181
- "loss": 13.6807,
182
- "step": 145
183
- },
184
- {
185
- "epoch": 0.03,
186
- "learning_rate": 7.3e-07,
187
- "loss": 13.742,
188
- "step": 150
189
- },
190
- {
191
- "epoch": 0.03,
192
- "learning_rate": 7.550000000000001e-07,
193
- "loss": 15.1545,
194
- "step": 155
195
- },
196
- {
197
- "epoch": 0.03,
198
- "learning_rate": 7.8e-07,
199
- "loss": 16.1792,
200
- "step": 160
201
- },
202
- {
203
- "epoch": 0.03,
204
- "learning_rate": 8.000000000000001e-07,
205
- "loss": 14.6018,
206
- "step": 165
207
- },
208
- {
209
- "epoch": 0.03,
210
- "learning_rate": 8.250000000000001e-07,
211
- "loss": 12.6032,
212
- "step": 170
213
- },
214
- {
215
- "epoch": 0.03,
216
- "learning_rate": 8.500000000000001e-07,
217
- "loss": 13.0217,
218
- "step": 175
219
- },
220
- {
221
- "epoch": 0.03,
222
- "learning_rate": 8.75e-07,
223
- "loss": 12.2336,
224
- "step": 180
225
- },
226
- {
227
- "epoch": 0.03,
228
- "learning_rate": 9.000000000000001e-07,
229
- "loss": 11.8643,
230
- "step": 185
231
- },
232
- {
233
- "epoch": 0.04,
234
- "learning_rate": 9.25e-07,
235
- "loss": 11.8572,
236
- "step": 190
237
- },
238
- {
239
- "epoch": 0.04,
240
- "learning_rate": 9.500000000000001e-07,
241
- "loss": 10.8976,
242
- "step": 195
243
- },
244
- {
245
- "epoch": 0.04,
246
- "learning_rate": 9.750000000000002e-07,
247
- "loss": 12.2617,
248
- "step": 200
249
- },
250
- {
251
- "epoch": 0.04,
252
- "learning_rate": 1.0000000000000002e-06,
253
- "loss": 13.7753,
254
- "step": 205
255
- },
256
- {
257
- "epoch": 0.04,
258
- "learning_rate": 1.025e-06,
259
- "loss": 11.6137,
260
- "step": 210
261
- },
262
- {
263
- "epoch": 0.04,
264
- "learning_rate": 1.0500000000000001e-06,
265
- "loss": 10.6402,
266
- "step": 215
267
- },
268
- {
269
- "epoch": 0.04,
270
- "learning_rate": 1.075e-06,
271
- "loss": 12.9591,
272
- "step": 220
273
- },
274
- {
275
- "epoch": 0.04,
276
- "learning_rate": 1.1e-06,
277
- "loss": 9.8613,
278
- "step": 225
279
- },
280
- {
281
- "epoch": 0.04,
282
- "learning_rate": 1.125e-06,
283
- "loss": 10.487,
284
- "step": 230
285
- },
286
- {
287
- "epoch": 0.04,
288
- "learning_rate": 1.1500000000000002e-06,
289
- "loss": 9.3773,
290
- "step": 235
291
- },
292
- {
293
- "epoch": 0.04,
294
- "learning_rate": 1.175e-06,
295
- "loss": 9.5665,
296
- "step": 240
297
- },
298
- {
299
- "epoch": 0.05,
300
- "learning_rate": 1.2000000000000002e-06,
301
- "loss": 8.2941,
302
- "step": 245
303
- },
304
- {
305
- "epoch": 0.05,
306
- "learning_rate": 1.2250000000000001e-06,
307
- "loss": 8.5563,
308
- "step": 250
309
- },
310
- {
311
- "epoch": 0.05,
312
- "learning_rate": 1.25e-06,
313
- "loss": 10.4941,
314
- "step": 255
315
- },
316
- {
317
- "epoch": 0.05,
318
- "learning_rate": 1.275e-06,
319
- "loss": 8.4986,
320
- "step": 260
321
- },
322
- {
323
- "epoch": 0.05,
324
- "learning_rate": 1.3e-06,
325
- "loss": 10.2295,
326
- "step": 265
327
- },
328
- {
329
- "epoch": 0.05,
330
- "learning_rate": 1.3250000000000002e-06,
331
- "loss": 8.7026,
332
- "step": 270
333
- },
334
- {
335
- "epoch": 0.05,
336
- "learning_rate": 1.3500000000000002e-06,
337
- "loss": 8.6009,
338
- "step": 275
339
- },
340
- {
341
- "epoch": 0.05,
342
- "learning_rate": 1.3750000000000002e-06,
343
- "loss": 7.7613,
344
- "step": 280
345
- },
346
- {
347
- "epoch": 0.05,
348
- "learning_rate": 1.4000000000000001e-06,
349
- "loss": 7.8609,
350
- "step": 285
351
- },
352
- {
353
- "epoch": 0.05,
354
- "learning_rate": 1.425e-06,
355
- "loss": 7.0097,
356
- "step": 290
357
- },
358
- {
359
- "epoch": 0.05,
360
- "learning_rate": 1.45e-06,
361
- "loss": 5.5692,
362
- "step": 295
363
- },
364
- {
365
- "epoch": 0.06,
366
- "learning_rate": 1.475e-06,
367
- "loss": 5.6402,
368
- "step": 300
369
- },
370
- {
371
- "epoch": 0.06,
372
- "learning_rate": 1.5e-06,
373
- "loss": 9.0815,
374
- "step": 305
375
- },
376
- {
377
- "epoch": 0.06,
378
- "learning_rate": 1.525e-06,
379
- "loss": 8.0803,
380
- "step": 310
381
- },
382
- {
383
- "epoch": 0.06,
384
- "learning_rate": 1.5500000000000002e-06,
385
- "loss": 7.6229,
386
- "step": 315
387
- },
388
- {
389
- "epoch": 0.06,
390
- "learning_rate": 1.5750000000000002e-06,
391
- "loss": 5.5354,
392
- "step": 320
393
- },
394
- {
395
- "epoch": 0.06,
396
- "learning_rate": 1.6000000000000001e-06,
397
- "loss": 8.1564,
398
- "step": 325
399
- },
400
- {
401
- "epoch": 0.06,
402
- "learning_rate": 1.6250000000000001e-06,
403
- "loss": 7.0378,
404
- "step": 330
405
- },
406
- {
407
- "epoch": 0.06,
408
- "learning_rate": 1.6500000000000003e-06,
409
- "loss": 5.3662,
410
- "step": 335
411
- },
412
- {
413
- "epoch": 0.06,
414
- "learning_rate": 1.6750000000000003e-06,
415
- "loss": 7.6436,
416
- "step": 340
417
- },
418
- {
419
- "epoch": 0.06,
420
- "learning_rate": 1.7000000000000002e-06,
421
- "loss": 5.3403,
422
- "step": 345
423
- },
424
- {
425
- "epoch": 0.07,
426
- "learning_rate": 1.725e-06,
427
- "loss": 8.1018,
428
- "step": 350
429
- },
430
- {
431
- "epoch": 0.07,
432
- "learning_rate": 1.745e-06,
433
- "loss": 7.5364,
434
- "step": 355
435
- },
436
- {
437
- "epoch": 0.07,
438
- "learning_rate": 1.77e-06,
439
- "loss": 5.2764,
440
- "step": 360
441
- },
442
- {
443
- "epoch": 0.07,
444
- "learning_rate": 1.7950000000000002e-06,
445
- "loss": 6.4889,
446
- "step": 365
447
- },
448
- {
449
- "epoch": 0.07,
450
- "learning_rate": 1.8200000000000002e-06,
451
- "loss": 5.1625,
452
- "step": 370
453
- },
454
- {
455
- "epoch": 0.07,
456
- "learning_rate": 1.8450000000000001e-06,
457
- "loss": 7.4417,
458
- "step": 375
459
- },
460
- {
461
- "epoch": 0.07,
462
- "learning_rate": 1.87e-06,
463
- "loss": 4.8924,
464
- "step": 380
465
- },
466
- {
467
- "epoch": 0.07,
468
- "learning_rate": 1.895e-06,
469
- "loss": 9.9698,
470
- "step": 385
471
- },
472
- {
473
- "epoch": 0.07,
474
- "learning_rate": 1.9200000000000003e-06,
475
- "loss": 7.4896,
476
- "step": 390
477
- },
478
- {
479
- "epoch": 0.07,
480
- "learning_rate": 1.945e-06,
481
- "loss": 6.7582,
482
- "step": 395
483
- },
484
- {
485
- "epoch": 0.07,
486
- "learning_rate": 1.97e-06,
487
- "loss": 6.0409,
488
- "step": 400
489
- },
490
- {
491
- "epoch": 0.08,
492
- "learning_rate": 1.9950000000000004e-06,
493
- "loss": 7.4142,
494
- "step": 405
495
- },
496
- {
497
- "epoch": 0.08,
498
- "learning_rate": 2.02e-06,
499
- "loss": 7.9007,
500
- "step": 410
501
- },
502
- {
503
- "epoch": 0.08,
504
- "learning_rate": 2.045e-06,
505
- "loss": 5.15,
506
- "step": 415
507
- },
508
- {
509
- "epoch": 0.08,
510
- "learning_rate": 2.07e-06,
511
- "loss": 5.6287,
512
- "step": 420
513
- },
514
- {
515
- "epoch": 0.08,
516
- "learning_rate": 2.0950000000000003e-06,
517
- "loss": 4.7277,
518
- "step": 425
519
- },
520
- {
521
- "epoch": 0.08,
522
- "learning_rate": 2.12e-06,
523
- "loss": 4.9955,
524
- "step": 430
525
- },
526
- {
527
- "epoch": 0.08,
528
- "learning_rate": 2.1450000000000002e-06,
529
- "loss": 6.4111,
530
- "step": 435
531
- },
532
- {
533
- "epoch": 0.08,
534
- "learning_rate": 2.17e-06,
535
- "loss": 5.7243,
536
- "step": 440
537
- },
538
- {
539
- "epoch": 0.08,
540
- "learning_rate": 2.195e-06,
541
- "loss": 5.3146,
542
- "step": 445
543
- },
544
- {
545
- "epoch": 0.08,
546
- "learning_rate": 2.2200000000000003e-06,
547
- "loss": 4.8272,
548
- "step": 450
549
- },
550
- {
551
- "epoch": 0.08,
552
- "learning_rate": 2.245e-06,
553
- "loss": 6.3851,
554
- "step": 455
555
- },
556
- {
557
- "epoch": 0.09,
558
- "learning_rate": 2.2700000000000003e-06,
559
- "loss": 5.0858,
560
- "step": 460
561
- },
562
- {
563
- "epoch": 0.09,
564
- "learning_rate": 2.2950000000000005e-06,
565
- "loss": 13.9773,
566
- "step": 465
567
- },
568
- {
569
- "epoch": 0.09,
570
- "learning_rate": 2.3200000000000002e-06,
571
- "loss": 5.1323,
572
- "step": 470
573
- },
574
- {
575
- "epoch": 0.09,
576
- "learning_rate": 2.345e-06,
577
- "loss": 4.5519,
578
- "step": 475
579
- },
580
- {
581
- "epoch": 0.09,
582
- "learning_rate": 2.37e-06,
583
- "loss": 4.9828,
584
- "step": 480
585
- },
586
- {
587
- "epoch": 0.09,
588
- "learning_rate": 2.395e-06,
589
- "loss": 10.9601,
590
- "step": 485
591
- },
592
- {
593
- "epoch": 0.09,
594
- "learning_rate": 2.42e-06,
595
- "loss": 4.715,
596
- "step": 490
597
- },
598
- {
599
- "epoch": 0.09,
600
- "learning_rate": 2.4450000000000003e-06,
601
- "loss": 6.1112,
602
- "step": 495
603
- },
604
- {
605
- "epoch": 0.09,
606
- "learning_rate": 2.47e-06,
607
- "loss": 4.8677,
608
- "step": 500
609
- },
610
- {
611
- "epoch": 0.09,
612
- "eval_loss": 6.384158611297607,
613
- "eval_runtime": 97.6612,
614
- "eval_samples_per_second": 12.226,
615
- "eval_steps_per_second": 1.536,
616
- "eval_wer": 1.0,
617
- "step": 500
618
- }
619
- ],
620
- "max_steps": 161070,
621
- "num_train_epochs": 30,
622
- "total_flos": 4.640032532404224e+16,
623
- "trial_name": null,
624
- "trial_params": null
625
- }
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
config.json ADDED
@@ -0,0 +1,109 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "_name_or_path": "facebook/wav2vec2-base-it-voxpopuli",
3
+ "activation_dropout": 0.1,
4
+ "adapter_kernel_size": 3,
5
+ "adapter_stride": 2,
6
+ "add_adapter": false,
7
+ "apply_spec_augment": true,
8
+ "architectures": [
9
+ "Wav2Vec2ForCTC"
10
+ ],
11
+ "attention_dropout": 0.1,
12
+ "bos_token_id": 1,
13
+ "classifier_proj_size": 256,
14
+ "codevector_dim": 256,
15
+ "contrastive_logits_temperature": 0.1,
16
+ "conv_bias": false,
17
+ "conv_dim": [
18
+ 512,
19
+ 512,
20
+ 512,
21
+ 512,
22
+ 512,
23
+ 512,
24
+ 512
25
+ ],
26
+ "conv_kernel": [
27
+ 10,
28
+ 3,
29
+ 3,
30
+ 3,
31
+ 3,
32
+ 2,
33
+ 2
34
+ ],
35
+ "conv_stride": [
36
+ 5,
37
+ 2,
38
+ 2,
39
+ 2,
40
+ 2,
41
+ 2,
42
+ 2
43
+ ],
44
+ "ctc_loss_reduction": "mean",
45
+ "ctc_zero_infinity": false,
46
+ "diversity_loss_weight": 0.1,
47
+ "do_stable_layer_norm": false,
48
+ "eos_token_id": 2,
49
+ "feat_extract_activation": "gelu",
50
+ "feat_extract_dropout": 0.0,
51
+ "feat_extract_norm": "group",
52
+ "feat_proj_dropout": 0.1,
53
+ "feat_quantizer_dropout": 0.0,
54
+ "final_dropout": 0.1,
55
+ "gradient_checkpointing": false,
56
+ "hidden_act": "gelu",
57
+ "hidden_dropout": 0.1,
58
+ "hidden_dropout_prob": 0.1,
59
+ "hidden_size": 768,
60
+ "initializer_range": 0.02,
61
+ "intermediate_size": 3072,
62
+ "layer_norm_eps": 1e-05,
63
+ "layerdrop": 0.1,
64
+ "mask_feature_length": 10,
65
+ "mask_feature_min_masks": 0,
66
+ "mask_feature_prob": 0.0,
67
+ "mask_time_length": 10,
68
+ "mask_time_min_masks": 2,
69
+ "mask_time_prob": 0.05,
70
+ "model_type": "wav2vec2",
71
+ "num_adapter_layers": 3,
72
+ "num_attention_heads": 12,
73
+ "num_codevector_groups": 2,
74
+ "num_codevectors_per_group": 320,
75
+ "num_conv_pos_embedding_groups": 16,
76
+ "num_conv_pos_embeddings": 128,
77
+ "num_feat_extract_layers": 7,
78
+ "num_hidden_layers": 12,
79
+ "num_negatives": 100,
80
+ "output_hidden_size": 768,
81
+ "pad_token_id": 25,
82
+ "proj_codevector_dim": 256,
83
+ "tdnn_dilation": [
84
+ 1,
85
+ 2,
86
+ 3,
87
+ 1,
88
+ 1
89
+ ],
90
+ "tdnn_dim": [
91
+ 512,
92
+ 512,
93
+ 512,
94
+ 512,
95
+ 1500
96
+ ],
97
+ "tdnn_kernel": [
98
+ 5,
99
+ 3,
100
+ 3,
101
+ 1,
102
+ 1
103
+ ],
104
+ "torch_dtype": "float32",
105
+ "transformers_version": "4.16.1",
106
+ "use_weighted_layer_sum": false,
107
+ "vocab_size": 32,
108
+ "xvector_output_dim": 512
109
+ }
preprocessor_config.json ADDED
@@ -0,0 +1,9 @@
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "do_normalize": true,
3
+ "feature_extractor_type": "Wav2Vec2FeatureExtractor",
4
+ "feature_size": 1,
5
+ "padding_side": "right",
6
+ "padding_value": 0.0,
7
+ "return_attention_mask": false,
8
+ "sampling_rate": 16000
9
+ }
pytorch_model.bin ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:305de1aa1ae7a26a327384b3f9b1f352cf0439dc42ca0bcd4d22eaa488a1a39d
3
+ size 377670039
training_args.bin ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:ad4f6c4322fc85af69a20adc4630b589ea417dfb95da2589d88f3c258fb8b0d0
3
+ size 2991