nbtpj commited on
Commit
b184666
1 Parent(s): 518c562

Training in progress, step 12500

Browse files
last-checkpoint/optimizer.pt CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:83e26aa9d961b153eb39f40717703226047b1acbf11f069146c5b3e243f1cc9c
3
  size 1115513717
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:57f3c69d772afc58e37ee903b2b7079a268d08467aafd7213c557e89eabc81a1
3
  size 1115513717
last-checkpoint/pytorch_model.bin CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:f99858e3c21b907dfaa9a430a7827c8a6a05713100d80743af281217de43680f
3
  size 557969145
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:87db461c28c81d8c1f329067e24f4856e8aa966760e3915178a00b7c12a98ad9
3
  size 557969145
last-checkpoint/rng_state.pth CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:072b62bbf3d9acdacc06abc15a04c205e72d5bda5cf1bf40a5b6dcbb83bd3857
3
  size 15523
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:0c498e4129e3e140b55c067c9e46781a5ac9ad4114d9a53b3d323278fa7cab1b
3
  size 15523
last-checkpoint/scheduler.pt CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:de4e0f6dd04181f2245ca4e861eb942143b0203adeeec2bd8c0e3bf4752a809d
3
  size 623
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:b9ff76b4edb8bccd8e7ba583674a071e8b77e61693d6d7c7089e776edb889d9d
3
  size 623
last-checkpoint/trainer_state.json CHANGED
@@ -1,8 +1,8 @@
1
  {
2
  "best_metric": null,
3
  "best_model_checkpoint": null,
4
- "epoch": 0.5605381165919282,
5
- "global_step": 10000,
6
  "is_hyper_param_search": false,
7
  "is_local_process_zero": true,
8
  "is_world_process_zero": true,
@@ -606,11 +606,161 @@
606
  "learning_rate": 1.883408071748879e-05,
607
  "loss": 0.6866,
608
  "step": 10000
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
609
  }
610
  ],
611
  "max_steps": 32112,
612
  "num_train_epochs": 2,
613
- "total_flos": 4.620457598754816e+16,
614
  "trial_name": null,
615
  "trial_params": null
616
  }
 
1
  {
2
  "best_metric": null,
3
  "best_model_checkpoint": null,
4
+ "epoch": 0.7006726457399103,
5
+ "global_step": 12500,
6
  "is_hyper_param_search": false,
7
  "is_local_process_zero": true,
8
  "is_world_process_zero": true,
 
606
  "learning_rate": 1.883408071748879e-05,
607
  "loss": 0.6866,
608
  "step": 10000
609
+ },
610
+ {
611
+ "epoch": 0.57,
612
+ "learning_rate": 1.8522670652715494e-05,
613
+ "loss": 0.7075,
614
+ "step": 10100
615
+ },
616
+ {
617
+ "epoch": 0.57,
618
+ "learning_rate": 1.82112605879422e-05,
619
+ "loss": 0.7549,
620
+ "step": 10200
621
+ },
622
+ {
623
+ "epoch": 0.58,
624
+ "learning_rate": 1.7899850523168908e-05,
625
+ "loss": 0.7435,
626
+ "step": 10300
627
+ },
628
+ {
629
+ "epoch": 0.58,
630
+ "learning_rate": 1.7588440458395615e-05,
631
+ "loss": 0.7484,
632
+ "step": 10400
633
+ },
634
+ {
635
+ "epoch": 0.59,
636
+ "learning_rate": 1.7277030393622322e-05,
637
+ "loss": 0.7309,
638
+ "step": 10500
639
+ },
640
+ {
641
+ "epoch": 0.59,
642
+ "learning_rate": 1.696562032884903e-05,
643
+ "loss": 0.8038,
644
+ "step": 10600
645
+ },
646
+ {
647
+ "epoch": 0.6,
648
+ "learning_rate": 1.6654210264075736e-05,
649
+ "loss": 0.7695,
650
+ "step": 10700
651
+ },
652
+ {
653
+ "epoch": 0.61,
654
+ "learning_rate": 1.634280019930244e-05,
655
+ "loss": 0.7406,
656
+ "step": 10800
657
+ },
658
+ {
659
+ "epoch": 0.61,
660
+ "learning_rate": 1.6031390134529147e-05,
661
+ "loss": 0.6867,
662
+ "step": 10900
663
+ },
664
+ {
665
+ "epoch": 0.62,
666
+ "learning_rate": 1.5719980069755854e-05,
667
+ "loss": 0.6952,
668
+ "step": 11000
669
+ },
670
+ {
671
+ "epoch": 0.62,
672
+ "learning_rate": 1.540857000498256e-05,
673
+ "loss": 0.6863,
674
+ "step": 11100
675
+ },
676
+ {
677
+ "epoch": 0.63,
678
+ "learning_rate": 1.5097159940209268e-05,
679
+ "loss": 0.7765,
680
+ "step": 11200
681
+ },
682
+ {
683
+ "epoch": 0.63,
684
+ "learning_rate": 1.4785749875435975e-05,
685
+ "loss": 0.7468,
686
+ "step": 11300
687
+ },
688
+ {
689
+ "epoch": 0.64,
690
+ "learning_rate": 1.4474339810662682e-05,
691
+ "loss": 0.8029,
692
+ "step": 11400
693
+ },
694
+ {
695
+ "epoch": 0.64,
696
+ "learning_rate": 1.4162929745889389e-05,
697
+ "loss": 0.7224,
698
+ "step": 11500
699
+ },
700
+ {
701
+ "epoch": 0.65,
702
+ "learning_rate": 1.3851519681116093e-05,
703
+ "loss": 0.7289,
704
+ "step": 11600
705
+ },
706
+ {
707
+ "epoch": 0.66,
708
+ "learning_rate": 1.35401096163428e-05,
709
+ "loss": 0.758,
710
+ "step": 11700
711
+ },
712
+ {
713
+ "epoch": 0.66,
714
+ "learning_rate": 1.3228699551569507e-05,
715
+ "loss": 0.7118,
716
+ "step": 11800
717
+ },
718
+ {
719
+ "epoch": 0.67,
720
+ "learning_rate": 1.2917289486796214e-05,
721
+ "loss": 0.7478,
722
+ "step": 11900
723
+ },
724
+ {
725
+ "epoch": 0.67,
726
+ "learning_rate": 1.260587942202292e-05,
727
+ "loss": 0.7617,
728
+ "step": 12000
729
+ },
730
+ {
731
+ "epoch": 0.68,
732
+ "learning_rate": 1.2294469357249628e-05,
733
+ "loss": 0.7288,
734
+ "step": 12100
735
+ },
736
+ {
737
+ "epoch": 0.68,
738
+ "learning_rate": 1.1983059292476335e-05,
739
+ "loss": 0.7643,
740
+ "step": 12200
741
+ },
742
+ {
743
+ "epoch": 0.69,
744
+ "learning_rate": 1.167164922770304e-05,
745
+ "loss": 0.7059,
746
+ "step": 12300
747
+ },
748
+ {
749
+ "epoch": 0.7,
750
+ "learning_rate": 1.1360239162929747e-05,
751
+ "loss": 0.7185,
752
+ "step": 12400
753
+ },
754
+ {
755
+ "epoch": 0.7,
756
+ "learning_rate": 1.1048829098156454e-05,
757
+ "loss": 0.6894,
758
+ "step": 12500
759
  }
760
  ],
761
  "max_steps": 32112,
762
  "num_train_epochs": 2,
763
+ "total_flos": 5.775039312666624e+16,
764
  "trial_name": null,
765
  "trial_params": null
766
  }
pytorch_model.bin CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:f99858e3c21b907dfaa9a430a7827c8a6a05713100d80743af281217de43680f
3
  size 557969145
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:87db461c28c81d8c1f329067e24f4856e8aa966760e3915178a00b7c12a98ad9
3
  size 557969145
runs/Jan03_01-47-25_21bb7eba274c/events.out.tfevents.1672710460.21bb7eba274c.23.0 CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:676d0297bfea30711b3d71a47eea34d72cd91e97c73b68283c3fbb798b0843dc
3
- size 25339
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:5eb77c9add49418cf9c6164ece1dbd00292b09e7083152c9df2cb409c236cba4
3
+ size 29264