marcel commited on
Commit
1c3dcb9
1 Parent(s): a809a2f
Files changed (4) hide show
  1. optimizer.pt +1 -1
  2. pytorch_model.bin +1 -1
  3. scheduler.pt +1 -1
  4. trainer_state.json +101 -3
optimizer.pt CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:5dc817f3687769e04e797f4a2d65e705085e218a0e605bfbc439368268116963
3
  size 2490339591
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:0945f1bf7c98f4ac934097e8ef8ab2919316ce53ea4cc4751119da54d414cda7
3
  size 2490339591
pytorch_model.bin CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:a9fe9ece7c8d596406cb8184e772cd600ac8713a89bd8b7cfcd2b5fcf1aef922
3
  size 1262065048
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:a09990b9efc7ffabb4bdc496b4da095e3488af429a66a70ca846910f231ab2e3
3
  size 1262065048
scheduler.pt CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:2b8d971c35266efc366e4ed2b2f0e0bd8cccd0cc38cab1592331781a3b1c85ea
3
  size 623
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:37249dc8b4dcbc8b9631b65ac84943e7b2fc20fb2b2e37f4921afe4b0f120bdd
3
  size 623
trainer_state.json CHANGED
@@ -1,8 +1,8 @@
1
  {
2
  "best_metric": null,
3
  "best_model_checkpoint": null,
4
- "epoch": 17.904374364191252,
5
- "global_step": 17600,
6
  "is_hyper_param_search": false,
7
  "is_local_process_zero": true,
8
  "is_world_process_zero": true,
@@ -622,11 +622,109 @@
622
  "eval_samples_per_second": 7.935,
623
  "eval_wer": 0.3484029484029484,
624
  "step": 17600
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
625
  }
626
  ],
627
  "max_steps": 29490,
628
  "num_train_epochs": 30,
629
- "total_flos": 9.766671614435893e+19,
630
  "trial_name": null,
631
  "trial_params": null
632
  }
 
1
  {
2
  "best_metric": null,
3
  "best_model_checkpoint": null,
4
+ "epoch": 20.752797558494404,
5
+ "global_step": 20400,
6
  "is_hyper_param_search": false,
7
  "is_local_process_zero": true,
8
  "is_world_process_zero": true,
 
622
  "eval_samples_per_second": 7.935,
623
  "eval_wer": 0.3484029484029484,
624
  "step": 17600
625
+ },
626
+ {
627
+ "epoch": 18.31,
628
+ "learning_rate": 0.00011890307002414625,
629
+ "loss": 0.0582,
630
+ "step": 18000
631
+ },
632
+ {
633
+ "epoch": 18.31,
634
+ "eval_loss": 0.47386595606803894,
635
+ "eval_runtime": 238.3257,
636
+ "eval_samples_per_second": 7.851,
637
+ "eval_wer": 0.34864864864864864,
638
+ "step": 18000
639
+ },
640
+ {
641
+ "epoch": 18.72,
642
+ "learning_rate": 0.00011476371162469816,
643
+ "loss": 0.0593,
644
+ "step": 18400
645
+ },
646
+ {
647
+ "epoch": 18.72,
648
+ "eval_loss": 0.46009212732315063,
649
+ "eval_runtime": 238.7244,
650
+ "eval_samples_per_second": 7.837,
651
+ "eval_wer": 0.3468673218673219,
652
+ "step": 18400
653
+ },
654
+ {
655
+ "epoch": 19.13,
656
+ "learning_rate": 0.00011062435322525008,
657
+ "loss": 0.0554,
658
+ "step": 18800
659
+ },
660
+ {
661
+ "epoch": 19.13,
662
+ "eval_loss": 0.506970226764679,
663
+ "eval_runtime": 236.9238,
664
+ "eval_samples_per_second": 7.897,
665
+ "eval_wer": 0.34594594594594597,
666
+ "step": 18800
667
+ },
668
+ {
669
+ "epoch": 19.53,
670
+ "learning_rate": 0.00010648499482580198,
671
+ "loss": 0.0567,
672
+ "step": 19200
673
+ },
674
+ {
675
+ "epoch": 19.53,
676
+ "eval_loss": 0.47986453771591187,
677
+ "eval_runtime": 237.1448,
678
+ "eval_samples_per_second": 7.89,
679
+ "eval_wer": 0.3435503685503685,
680
+ "step": 19200
681
+ },
682
+ {
683
+ "epoch": 19.94,
684
+ "learning_rate": 0.00010234563642635392,
685
+ "loss": 0.0554,
686
+ "step": 19600
687
+ },
688
+ {
689
+ "epoch": 19.94,
690
+ "eval_loss": 0.47748732566833496,
691
+ "eval_runtime": 236.6967,
692
+ "eval_samples_per_second": 7.905,
693
+ "eval_wer": 0.34877149877149877,
694
+ "step": 19600
695
+ },
696
+ {
697
+ "epoch": 20.35,
698
+ "learning_rate": 9.820627802690582e-05,
699
+ "loss": 0.0532,
700
+ "step": 20000
701
+ },
702
+ {
703
+ "epoch": 20.35,
704
+ "eval_loss": 0.5038613677024841,
705
+ "eval_runtime": 237.1224,
706
+ "eval_samples_per_second": 7.89,
707
+ "eval_wer": 0.34342751842751845,
708
+ "step": 20000
709
+ },
710
+ {
711
+ "epoch": 20.75,
712
+ "learning_rate": 9.406691962745773e-05,
713
+ "loss": 0.0503,
714
+ "step": 20400
715
+ },
716
+ {
717
+ "epoch": 20.75,
718
+ "eval_loss": 0.48931312561035156,
719
+ "eval_runtime": 236.3616,
720
+ "eval_samples_per_second": 7.916,
721
+ "eval_wer": 0.3406633906633907,
722
+ "step": 20400
723
  }
724
  ],
725
  "max_steps": 29490,
726
  "num_train_epochs": 30,
727
+ "total_flos": 1.1319628335303795e+20,
728
  "trial_name": null,
729
  "trial_params": null
730
  }