TenzinGayche commited on
Commit
da8f1b3
β€’
1 Parent(s): b09b7d8

Training in progress, step 3500

Browse files
{checkpoint-2300 β†’ checkpoint-3400}/config.json RENAMED
File without changes
{checkpoint-2300 β†’ checkpoint-3400}/optimizer.pt RENAMED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:dbad45bff05a119c155307c8c11c1892878504528085f311ec7cc2bfbec75f92
3
  size 2490946501
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:0155669d8d78dc52747668d0b42eb560e4765834195634a0b4b47be31ac20d8b
3
  size 2490946501
{checkpoint-2300 β†’ checkpoint-3400}/preprocessor_config.json RENAMED
File without changes
{checkpoint-2400 β†’ checkpoint-3400}/pytorch_model.bin RENAMED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:66e4f13af4571dc4fc776d37a670bd005d274231a78ff352023ba5c4fadd8614
3
  size 1262344621
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:32217beff331fa3114f28e0275103916da8ce5a7181c533f9c804cbde37f309b
3
  size 1262344621
{checkpoint-2400 β†’ checkpoint-3400}/rng_state.pth RENAMED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:89ee801de1abf33cd6ebc4f0b38240fe2dfecaa66947acc7503d6a21ded7f76f
3
  size 14639
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:6c4434b2064b73f9bf293826024571f12f809882a08cda4c28bec84dbe416b42
3
  size 14639
{checkpoint-2400 β†’ checkpoint-3400}/scaler.pt RENAMED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:2dcc907e7c7cdb7f74446d1478d9321af62972312f69212a9da63fbf093fd591
3
  size 557
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:517b3951b7b7fa883fc465ac17bdbebbf42c218ce832245b7095ad857dd3ea8b
3
  size 557
{checkpoint-2300 β†’ checkpoint-3400}/scheduler.pt RENAMED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:3cf6461683881fab2828ae13727bd0d098da0dff54d9acaddb978ed9c072d839
3
  size 627
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:5ea8e60bb0e12681fe1a9e00ddfc643b756dfc2093beb59f68e1a27419f359de
3
  size 627
{checkpoint-2400 β†’ checkpoint-3400}/trainer_state.json RENAMED
@@ -1,8 +1,8 @@
1
  {
2
  "best_metric": null,
3
  "best_model_checkpoint": null,
4
- "epoch": 16.3265306122449,
5
- "global_step": 2400,
6
  "is_hyper_param_search": false,
7
  "is_local_process_zero": true,
8
  "is_world_process_zero": true,
@@ -510,11 +510,221 @@
510
  "eval_samples_per_second": 23.537,
511
  "eval_steps_per_second": 2.962,
512
  "step": 2400
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
513
  }
514
  ],
515
  "max_steps": 3675,
516
  "num_train_epochs": 25,
517
- "total_flos": 9.330801216434254e+18,
518
  "trial_name": null,
519
  "trial_params": null
520
  }
1
  {
2
  "best_metric": null,
3
  "best_model_checkpoint": null,
4
+ "epoch": 23.12925170068027,
5
+ "global_step": 3400,
6
  "is_hyper_param_search": false,
7
  "is_local_process_zero": true,
8
  "is_world_process_zero": true,
510
  "eval_samples_per_second": 23.537,
511
  "eval_steps_per_second": 2.962,
512
  "step": 2400
513
+ },
514
+ {
515
+ "epoch": 16.67,
516
+ "learning_rate": 0.0001160314960629921,
517
+ "loss": 0.2364,
518
+ "step": 2450
519
+ },
520
+ {
521
+ "epoch": 17.01,
522
+ "learning_rate": 0.00011130708661417321,
523
+ "loss": 0.2291,
524
+ "step": 2500
525
+ },
526
+ {
527
+ "epoch": 17.01,
528
+ "eval_cer": 0.1471058209685309,
529
+ "eval_loss": 0.35567909479141235,
530
+ "eval_runtime": 24.4749,
531
+ "eval_samples_per_second": 24.025,
532
+ "eval_steps_per_second": 3.024,
533
+ "step": 2500
534
+ },
535
+ {
536
+ "epoch": 17.35,
537
+ "learning_rate": 0.00010658267716535431,
538
+ "loss": 0.2045,
539
+ "step": 2550
540
+ },
541
+ {
542
+ "epoch": 17.69,
543
+ "learning_rate": 0.00010185826771653542,
544
+ "loss": 0.2172,
545
+ "step": 2600
546
+ },
547
+ {
548
+ "epoch": 17.69,
549
+ "eval_cer": 0.14792108266753629,
550
+ "eval_loss": 0.3606509566307068,
551
+ "eval_runtime": 25.1105,
552
+ "eval_samples_per_second": 23.416,
553
+ "eval_steps_per_second": 2.947,
554
+ "step": 2600
555
+ },
556
+ {
557
+ "epoch": 18.03,
558
+ "learning_rate": 9.713385826771652e-05,
559
+ "loss": 0.2271,
560
+ "step": 2650
561
+ },
562
+ {
563
+ "epoch": 18.37,
564
+ "learning_rate": 9.240944881889763e-05,
565
+ "loss": 0.1858,
566
+ "step": 2700
567
+ },
568
+ {
569
+ "epoch": 18.37,
570
+ "eval_cer": 0.15144301320723952,
571
+ "eval_loss": 0.3589307963848114,
572
+ "eval_runtime": 24.5005,
573
+ "eval_samples_per_second": 24.0,
574
+ "eval_steps_per_second": 3.02,
575
+ "step": 2700
576
+ },
577
+ {
578
+ "epoch": 18.71,
579
+ "learning_rate": 8.768503937007873e-05,
580
+ "loss": 0.1995,
581
+ "step": 2750
582
+ },
583
+ {
584
+ "epoch": 19.05,
585
+ "learning_rate": 8.296062992125984e-05,
586
+ "loss": 0.1872,
587
+ "step": 2800
588
+ },
589
+ {
590
+ "epoch": 19.05,
591
+ "eval_cer": 0.14766019892385457,
592
+ "eval_loss": 0.36663514375686646,
593
+ "eval_runtime": 24.4383,
594
+ "eval_samples_per_second": 24.061,
595
+ "eval_steps_per_second": 3.028,
596
+ "step": 2800
597
+ },
598
+ {
599
+ "epoch": 19.39,
600
+ "learning_rate": 7.823622047244094e-05,
601
+ "loss": 0.177,
602
+ "step": 2850
603
+ },
604
+ {
605
+ "epoch": 19.73,
606
+ "learning_rate": 7.351181102362205e-05,
607
+ "loss": 0.1855,
608
+ "step": 2900
609
+ },
610
+ {
611
+ "epoch": 19.73,
612
+ "eval_cer": 0.1432577857492255,
613
+ "eval_loss": 0.3651330769062042,
614
+ "eval_runtime": 24.7826,
615
+ "eval_samples_per_second": 23.726,
616
+ "eval_steps_per_second": 2.986,
617
+ "step": 2900
618
+ },
619
+ {
620
+ "epoch": 20.07,
621
+ "learning_rate": 6.878740157480315e-05,
622
+ "loss": 0.1773,
623
+ "step": 2950
624
+ },
625
+ {
626
+ "epoch": 20.41,
627
+ "learning_rate": 6.406299212598424e-05,
628
+ "loss": 0.185,
629
+ "step": 3000
630
+ },
631
+ {
632
+ "epoch": 20.41,
633
+ "eval_cer": 0.1447578672753954,
634
+ "eval_loss": 0.36554473638534546,
635
+ "eval_runtime": 24.7061,
636
+ "eval_samples_per_second": 23.8,
637
+ "eval_steps_per_second": 2.995,
638
+ "step": 3000
639
+ },
640
+ {
641
+ "epoch": 20.75,
642
+ "learning_rate": 5.933858267716535e-05,
643
+ "loss": 0.1735,
644
+ "step": 3050
645
+ },
646
+ {
647
+ "epoch": 21.09,
648
+ "learning_rate": 5.461417322834645e-05,
649
+ "loss": 0.1599,
650
+ "step": 3100
651
+ },
652
+ {
653
+ "epoch": 21.09,
654
+ "eval_cer": 0.14867112343062122,
655
+ "eval_loss": 0.37344449758529663,
656
+ "eval_runtime": 24.8752,
657
+ "eval_samples_per_second": 23.638,
658
+ "eval_steps_per_second": 2.975,
659
+ "step": 3100
660
+ },
661
+ {
662
+ "epoch": 21.43,
663
+ "learning_rate": 4.9889763779527555e-05,
664
+ "loss": 0.1578,
665
+ "step": 3150
666
+ },
667
+ {
668
+ "epoch": 21.77,
669
+ "learning_rate": 4.516535433070866e-05,
670
+ "loss": 0.1742,
671
+ "step": 3200
672
+ },
673
+ {
674
+ "epoch": 21.77,
675
+ "eval_cer": 0.148638512962661,
676
+ "eval_loss": 0.3685232102870941,
677
+ "eval_runtime": 25.1361,
678
+ "eval_samples_per_second": 23.393,
679
+ "eval_steps_per_second": 2.944,
680
+ "step": 3200
681
+ },
682
+ {
683
+ "epoch": 22.11,
684
+ "learning_rate": 4.0440944881889764e-05,
685
+ "loss": 0.1666,
686
+ "step": 3250
687
+ },
688
+ {
689
+ "epoch": 22.45,
690
+ "learning_rate": 3.571653543307086e-05,
691
+ "loss": 0.1605,
692
+ "step": 3300
693
+ },
694
+ {
695
+ "epoch": 22.45,
696
+ "eval_cer": 0.14746453611609325,
697
+ "eval_loss": 0.3691498637199402,
698
+ "eval_runtime": 24.7916,
699
+ "eval_samples_per_second": 23.718,
700
+ "eval_steps_per_second": 2.985,
701
+ "step": 3300
702
+ },
703
+ {
704
+ "epoch": 22.79,
705
+ "learning_rate": 3.0992125984251966e-05,
706
+ "loss": 0.1576,
707
+ "step": 3350
708
+ },
709
+ {
710
+ "epoch": 23.13,
711
+ "learning_rate": 2.626771653543307e-05,
712
+ "loss": 0.158,
713
+ "step": 3400
714
+ },
715
+ {
716
+ "epoch": 23.13,
717
+ "eval_cer": 0.14792108266753629,
718
+ "eval_loss": 0.3718402683734894,
719
+ "eval_runtime": 25.1969,
720
+ "eval_samples_per_second": 23.336,
721
+ "eval_steps_per_second": 2.937,
722
+ "step": 3400
723
  }
724
  ],
725
  "max_steps": 3675,
726
  "num_train_epochs": 25,
727
+ "total_flos": 1.3212293608860776e+19,
728
  "trial_name": null,
729
  "trial_params": null
730
  }
{checkpoint-2300 β†’ checkpoint-3400}/training_args.bin RENAMED
File without changes
{checkpoint-2400 β†’ checkpoint-3500}/config.json RENAMED
File without changes
{checkpoint-2400 β†’ checkpoint-3500}/optimizer.pt RENAMED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:21ec413dbf6dce3f66f0847998faf0727145082a010278c772b5854d5056f118
3
  size 2490946501
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:f04041306edbbac18605924f8df50a5e03706a70792d783f76ed32424300e2c4
3
  size 2490946501
{checkpoint-2400 β†’ checkpoint-3500}/preprocessor_config.json RENAMED
File without changes
{checkpoint-2300 β†’ checkpoint-3500}/pytorch_model.bin RENAMED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:b720f0acc795f0ce8dfdc5369d926dbb4f0c576ab9a917bc26dcfdd9c8bb55f7
3
  size 1262344621
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:818fb31b610a3b7a22588d0ae86d69e50589408512a65b1d7fc146223ccf1b1b
3
  size 1262344621
{checkpoint-2300 β†’ checkpoint-3500}/rng_state.pth RENAMED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:c10f001cd94004ed8e139f1abb106e02ae7c2d0fce73d744121e622abf6f82fb
3
- size 14639
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:dafc311b7e5f564adf4679af0330bb536167c71f804b37ac8f873865b58cfa2b
3
+ size 14575
{checkpoint-2300 β†’ checkpoint-3500}/scaler.pt RENAMED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:dd56176e725920af6b7720880038e0e973fdda3a8cf9ee899ad7b77393e85f3c
3
  size 557
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:e0b7ce33c47aa1ee0ba2ffb842fb2976f29bf5cd6cf51d56b243ce711b00be22
3
  size 557
{checkpoint-2400 β†’ checkpoint-3500}/scheduler.pt RENAMED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:3212d7bddbe681e165c9fd72ab4f4e8de8c824c01389aa4a534800797bb190ca
3
  size 627
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:99a01f1abc9bc9b7a6d49af45a235647089ddad25aebc02aaf8c80d94ad3699d
3
  size 627
{checkpoint-2300 β†’ checkpoint-3500}/trainer_state.json RENAMED
@@ -1,8 +1,8 @@
1
  {
2
  "best_metric": null,
3
  "best_model_checkpoint": null,
4
- "epoch": 15.646258503401361,
5
- "global_step": 2300,
6
  "is_hyper_param_search": false,
7
  "is_local_process_zero": true,
8
  "is_world_process_zero": true,
@@ -489,11 +489,263 @@
489
  "eval_samples_per_second": 23.932,
490
  "eval_steps_per_second": 3.012,
491
  "step": 2300
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
492
  }
493
  ],
494
  "max_steps": 3675,
495
  "num_train_epochs": 25,
496
- "total_flos": 8.938326603459674e+18,
497
  "trial_name": null,
498
  "trial_params": null
499
  }
1
  {
2
  "best_metric": null,
3
  "best_model_checkpoint": null,
4
+ "epoch": 23.80952380952381,
5
+ "global_step": 3500,
6
  "is_hyper_param_search": false,
7
  "is_local_process_zero": true,
8
  "is_world_process_zero": true,
489
  "eval_samples_per_second": 23.932,
490
  "eval_steps_per_second": 3.012,
491
  "step": 2300
492
+ },
493
+ {
494
+ "epoch": 15.99,
495
+ "learning_rate": 0.00012548031496062992,
496
+ "loss": 0.2524,
497
+ "step": 2350
498
+ },
499
+ {
500
+ "epoch": 16.33,
501
+ "learning_rate": 0.00012075590551181102,
502
+ "loss": 0.2219,
503
+ "step": 2400
504
+ },
505
+ {
506
+ "epoch": 16.33,
507
+ "eval_cer": 0.14801891407141693,
508
+ "eval_loss": 0.35925593972206116,
509
+ "eval_runtime": 24.982,
510
+ "eval_samples_per_second": 23.537,
511
+ "eval_steps_per_second": 2.962,
512
+ "step": 2400
513
+ },
514
+ {
515
+ "epoch": 16.67,
516
+ "learning_rate": 0.0001160314960629921,
517
+ "loss": 0.2364,
518
+ "step": 2450
519
+ },
520
+ {
521
+ "epoch": 17.01,
522
+ "learning_rate": 0.00011130708661417321,
523
+ "loss": 0.2291,
524
+ "step": 2500
525
+ },
526
+ {
527
+ "epoch": 17.01,
528
+ "eval_cer": 0.1471058209685309,
529
+ "eval_loss": 0.35567909479141235,
530
+ "eval_runtime": 24.4749,
531
+ "eval_samples_per_second": 24.025,
532
+ "eval_steps_per_second": 3.024,
533
+ "step": 2500
534
+ },
535
+ {
536
+ "epoch": 17.35,
537
+ "learning_rate": 0.00010658267716535431,
538
+ "loss": 0.2045,
539
+ "step": 2550
540
+ },
541
+ {
542
+ "epoch": 17.69,
543
+ "learning_rate": 0.00010185826771653542,
544
+ "loss": 0.2172,
545
+ "step": 2600
546
+ },
547
+ {
548
+ "epoch": 17.69,
549
+ "eval_cer": 0.14792108266753629,
550
+ "eval_loss": 0.3606509566307068,
551
+ "eval_runtime": 25.1105,
552
+ "eval_samples_per_second": 23.416,
553
+ "eval_steps_per_second": 2.947,
554
+ "step": 2600
555
+ },
556
+ {
557
+ "epoch": 18.03,
558
+ "learning_rate": 9.713385826771652e-05,
559
+ "loss": 0.2271,
560
+ "step": 2650
561
+ },
562
+ {
563
+ "epoch": 18.37,
564
+ "learning_rate": 9.240944881889763e-05,
565
+ "loss": 0.1858,
566
+ "step": 2700
567
+ },
568
+ {
569
+ "epoch": 18.37,
570
+ "eval_cer": 0.15144301320723952,
571
+ "eval_loss": 0.3589307963848114,
572
+ "eval_runtime": 24.5005,
573
+ "eval_samples_per_second": 24.0,
574
+ "eval_steps_per_second": 3.02,
575
+ "step": 2700
576
+ },
577
+ {
578
+ "epoch": 18.71,
579
+ "learning_rate": 8.768503937007873e-05,
580
+ "loss": 0.1995,
581
+ "step": 2750
582
+ },
583
+ {
584
+ "epoch": 19.05,
585
+ "learning_rate": 8.296062992125984e-05,
586
+ "loss": 0.1872,
587
+ "step": 2800
588
+ },
589
+ {
590
+ "epoch": 19.05,
591
+ "eval_cer": 0.14766019892385457,
592
+ "eval_loss": 0.36663514375686646,
593
+ "eval_runtime": 24.4383,
594
+ "eval_samples_per_second": 24.061,
595
+ "eval_steps_per_second": 3.028,
596
+ "step": 2800
597
+ },
598
+ {
599
+ "epoch": 19.39,
600
+ "learning_rate": 7.823622047244094e-05,
601
+ "loss": 0.177,
602
+ "step": 2850
603
+ },
604
+ {
605
+ "epoch": 19.73,
606
+ "learning_rate": 7.351181102362205e-05,
607
+ "loss": 0.1855,
608
+ "step": 2900
609
+ },
610
+ {
611
+ "epoch": 19.73,
612
+ "eval_cer": 0.1432577857492255,
613
+ "eval_loss": 0.3651330769062042,
614
+ "eval_runtime": 24.7826,
615
+ "eval_samples_per_second": 23.726,
616
+ "eval_steps_per_second": 2.986,
617
+ "step": 2900
618
+ },
619
+ {
620
+ "epoch": 20.07,
621
+ "learning_rate": 6.878740157480315e-05,
622
+ "loss": 0.1773,
623
+ "step": 2950
624
+ },
625
+ {
626
+ "epoch": 20.41,
627
+ "learning_rate": 6.406299212598424e-05,
628
+ "loss": 0.185,
629
+ "step": 3000
630
+ },
631
+ {
632
+ "epoch": 20.41,
633
+ "eval_cer": 0.1447578672753954,
634
+ "eval_loss": 0.36554473638534546,
635
+ "eval_runtime": 24.7061,
636
+ "eval_samples_per_second": 23.8,
637
+ "eval_steps_per_second": 2.995,
638
+ "step": 3000
639
+ },
640
+ {
641
+ "epoch": 20.75,
642
+ "learning_rate": 5.933858267716535e-05,
643
+ "loss": 0.1735,
644
+ "step": 3050
645
+ },
646
+ {
647
+ "epoch": 21.09,
648
+ "learning_rate": 5.461417322834645e-05,
649
+ "loss": 0.1599,
650
+ "step": 3100
651
+ },
652
+ {
653
+ "epoch": 21.09,
654
+ "eval_cer": 0.14867112343062122,
655
+ "eval_loss": 0.37344449758529663,
656
+ "eval_runtime": 24.8752,
657
+ "eval_samples_per_second": 23.638,
658
+ "eval_steps_per_second": 2.975,
659
+ "step": 3100
660
+ },
661
+ {
662
+ "epoch": 21.43,
663
+ "learning_rate": 4.9889763779527555e-05,
664
+ "loss": 0.1578,
665
+ "step": 3150
666
+ },
667
+ {
668
+ "epoch": 21.77,
669
+ "learning_rate": 4.516535433070866e-05,
670
+ "loss": 0.1742,
671
+ "step": 3200
672
+ },
673
+ {
674
+ "epoch": 21.77,
675
+ "eval_cer": 0.148638512962661,
676
+ "eval_loss": 0.3685232102870941,
677
+ "eval_runtime": 25.1361,
678
+ "eval_samples_per_second": 23.393,
679
+ "eval_steps_per_second": 2.944,
680
+ "step": 3200
681
+ },
682
+ {
683
+ "epoch": 22.11,
684
+ "learning_rate": 4.0440944881889764e-05,
685
+ "loss": 0.1666,
686
+ "step": 3250
687
+ },
688
+ {
689
+ "epoch": 22.45,
690
+ "learning_rate": 3.571653543307086e-05,
691
+ "loss": 0.1605,
692
+ "step": 3300
693
+ },
694
+ {
695
+ "epoch": 22.45,
696
+ "eval_cer": 0.14746453611609325,
697
+ "eval_loss": 0.3691498637199402,
698
+ "eval_runtime": 24.7916,
699
+ "eval_samples_per_second": 23.718,
700
+ "eval_steps_per_second": 2.985,
701
+ "step": 3300
702
+ },
703
+ {
704
+ "epoch": 22.79,
705
+ "learning_rate": 3.0992125984251966e-05,
706
+ "loss": 0.1576,
707
+ "step": 3350
708
+ },
709
+ {
710
+ "epoch": 23.13,
711
+ "learning_rate": 2.626771653543307e-05,
712
+ "loss": 0.158,
713
+ "step": 3400
714
+ },
715
+ {
716
+ "epoch": 23.13,
717
+ "eval_cer": 0.14792108266753629,
718
+ "eval_loss": 0.3718402683734894,
719
+ "eval_runtime": 25.1969,
720
+ "eval_samples_per_second": 23.336,
721
+ "eval_steps_per_second": 2.937,
722
+ "step": 3400
723
+ },
724
+ {
725
+ "epoch": 23.47,
726
+ "learning_rate": 2.1543307086614174e-05,
727
+ "loss": 0.1609,
728
+ "step": 3450
729
+ },
730
+ {
731
+ "epoch": 23.81,
732
+ "learning_rate": 1.6818897637795275e-05,
733
+ "loss": 0.1475,
734
+ "step": 3500
735
+ },
736
+ {
737
+ "epoch": 23.81,
738
+ "eval_cer": 0.14671449535300832,
739
+ "eval_loss": 0.37474170327186584,
740
+ "eval_runtime": 24.7655,
741
+ "eval_samples_per_second": 23.743,
742
+ "eval_steps_per_second": 2.988,
743
+ "step": 3500
744
  }
745
  ],
746
  "max_steps": 3675,
747
  "num_train_epochs": 25,
748
+ "total_flos": 1.3594647658299998e+19,
749
  "trial_name": null,
750
  "trial_params": null
751
  }
{checkpoint-2400 β†’ checkpoint-3500}/training_args.bin RENAMED
File without changes
pytorch_model.bin CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:71b3268161afb2bcdf8b28a1a1d911f835d6d1a33dd0c929b5d6cf7248b065dc
3
  size 1262344621
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:818fb31b610a3b7a22588d0ae86d69e50589408512a65b1d7fc146223ccf1b1b
3
  size 1262344621