d0rj commited on
Commit
8f257ea
1 Parent(s): 6ae0f58

feat: update to step 1480

Browse files
Files changed (6) hide show
  1. optimizer.pt +1 -1
  2. pytorch_model.bin +1 -1
  3. rng_state.pth +1 -1
  4. scheduler.pt +1 -1
  5. trainer_state.json +219 -3
  6. training_args.bin +1 -1
optimizer.pt CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:99c56ade3ce8ce7f4cd660c9f56cfd465189c0f3e6cea93876304e1f0a19b386
3
  size 5922605
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:9095cf091f13fef28be3b337c0de7a461356f4207894f2e9211e1e414b251244
3
  size 5922605
pytorch_model.bin CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:17e20280f7ac97d48e5623917bbbbf3a30fba6331dd764e6dbcb38e5e5f4373f
3
  size 3282182981
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:f94103909eef81b12ecf00936e61da446886031622a51849f53753c95eecdeff
3
  size 3282182981
rng_state.pth CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:5b9e29cdc3030e4b9c9f1d5b3772b55f7698772a789cb32861fca81f87bb6ae0
3
  size 14575
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:1b8eabf76fcd8bc7449a6b05ba51e815397e565ade9671197a70e60312fa2c02
3
  size 14575
scheduler.pt CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:c111d4fd09f2ba45d9ce9a3f9e472c9d29e15e5ddc9015049a18f079a487648d
3
  size 627
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:92a33c4cb7d8c9dad3255cdd0b6067ad6b7d77dc9943d79714a9d617088adb67
3
  size 627
trainer_state.json CHANGED
@@ -1,8 +1,8 @@
1
  {
2
  "best_metric": null,
3
  "best_model_checkpoint": null,
4
- "epoch": 0.01949769498511016,
5
- "global_step": 1120,
6
  "is_hyper_param_search": false,
7
  "is_local_process_zero": true,
8
  "is_world_process_zero": true,
@@ -678,11 +678,227 @@
678
  "learning_rate": 0.0022400000000000002,
679
  "loss": 2.4506,
680
  "step": 1120
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
681
  }
682
  ],
683
  "max_steps": 172326,
684
  "num_train_epochs": 3,
685
- "total_flos": 5.5330613854470144e+17,
686
  "trial_name": null,
687
  "trial_params": null
688
  }
 
1
  {
2
  "best_metric": null,
3
  "best_model_checkpoint": null,
4
+ "epoch": 0.025764811230324137,
5
+ "global_step": 1480,
6
  "is_hyper_param_search": false,
7
  "is_local_process_zero": true,
8
  "is_world_process_zero": true,
 
678
  "learning_rate": 0.0022400000000000002,
679
  "loss": 2.4506,
680
  "step": 1120
681
+ },
682
+ {
683
+ "epoch": 0.02,
684
+ "learning_rate": 0.00226,
685
+ "loss": 2.462,
686
+ "step": 1130
687
+ },
688
+ {
689
+ "epoch": 0.02,
690
+ "learning_rate": 0.00228,
691
+ "loss": 2.4535,
692
+ "step": 1140
693
+ },
694
+ {
695
+ "epoch": 0.02,
696
+ "learning_rate": 0.0023,
697
+ "loss": 2.4162,
698
+ "step": 1150
699
+ },
700
+ {
701
+ "epoch": 0.02,
702
+ "learning_rate": 0.00232,
703
+ "loss": 2.3739,
704
+ "step": 1160
705
+ },
706
+ {
707
+ "epoch": 0.02,
708
+ "learning_rate": 0.00234,
709
+ "loss": 2.4125,
710
+ "step": 1170
711
+ },
712
+ {
713
+ "epoch": 0.02,
714
+ "learning_rate": 0.00236,
715
+ "loss": 2.4172,
716
+ "step": 1180
717
+ },
718
+ {
719
+ "epoch": 0.02,
720
+ "learning_rate": 0.0023799999999999997,
721
+ "loss": 2.4006,
722
+ "step": 1190
723
+ },
724
+ {
725
+ "epoch": 0.02,
726
+ "learning_rate": 0.0024,
727
+ "loss": 2.517,
728
+ "step": 1200
729
+ },
730
+ {
731
+ "epoch": 0.02,
732
+ "learning_rate": 0.00242,
733
+ "loss": 2.4768,
734
+ "step": 1210
735
+ },
736
+ {
737
+ "epoch": 0.02,
738
+ "learning_rate": 0.00244,
739
+ "loss": 2.4303,
740
+ "step": 1220
741
+ },
742
+ {
743
+ "epoch": 0.02,
744
+ "learning_rate": 0.00246,
745
+ "loss": 2.4002,
746
+ "step": 1230
747
+ },
748
+ {
749
+ "epoch": 0.02,
750
+ "learning_rate": 0.00248,
751
+ "loss": 2.4263,
752
+ "step": 1240
753
+ },
754
+ {
755
+ "epoch": 0.02,
756
+ "learning_rate": 0.0025,
757
+ "loss": 2.4114,
758
+ "step": 1250
759
+ },
760
+ {
761
+ "epoch": 0.02,
762
+ "learning_rate": 0.00252,
763
+ "loss": 2.4964,
764
+ "step": 1260
765
+ },
766
+ {
767
+ "epoch": 0.02,
768
+ "learning_rate": 0.00254,
769
+ "loss": 2.4425,
770
+ "step": 1270
771
+ },
772
+ {
773
+ "epoch": 0.02,
774
+ "learning_rate": 0.00256,
775
+ "loss": 2.4163,
776
+ "step": 1280
777
+ },
778
+ {
779
+ "epoch": 0.02,
780
+ "learning_rate": 0.0025800000000000003,
781
+ "loss": 2.4091,
782
+ "step": 1290
783
+ },
784
+ {
785
+ "epoch": 0.02,
786
+ "learning_rate": 0.0026000000000000003,
787
+ "loss": 2.4029,
788
+ "step": 1300
789
+ },
790
+ {
791
+ "epoch": 0.02,
792
+ "learning_rate": 0.0026200000000000004,
793
+ "loss": 2.4061,
794
+ "step": 1310
795
+ },
796
+ {
797
+ "epoch": 0.02,
798
+ "learning_rate": 0.00264,
799
+ "loss": 2.3963,
800
+ "step": 1320
801
+ },
802
+ {
803
+ "epoch": 0.02,
804
+ "learning_rate": 0.00266,
805
+ "loss": 2.412,
806
+ "step": 1330
807
+ },
808
+ {
809
+ "epoch": 0.02,
810
+ "learning_rate": 0.00268,
811
+ "loss": 2.3639,
812
+ "step": 1340
813
+ },
814
+ {
815
+ "epoch": 0.02,
816
+ "learning_rate": 0.0027,
817
+ "loss": 2.3454,
818
+ "step": 1350
819
+ },
820
+ {
821
+ "epoch": 0.02,
822
+ "learning_rate": 0.00272,
823
+ "loss": 2.41,
824
+ "step": 1360
825
+ },
826
+ {
827
+ "epoch": 0.02,
828
+ "learning_rate": 0.0027400000000000002,
829
+ "loss": 2.3901,
830
+ "step": 1370
831
+ },
832
+ {
833
+ "epoch": 0.02,
834
+ "learning_rate": 0.00276,
835
+ "loss": 2.352,
836
+ "step": 1380
837
+ },
838
+ {
839
+ "epoch": 0.02,
840
+ "learning_rate": 0.00278,
841
+ "loss": 2.3873,
842
+ "step": 1390
843
+ },
844
+ {
845
+ "epoch": 0.02,
846
+ "learning_rate": 0.0028,
847
+ "loss": 2.4287,
848
+ "step": 1400
849
+ },
850
+ {
851
+ "epoch": 0.02,
852
+ "learning_rate": 0.00282,
853
+ "loss": 2.4252,
854
+ "step": 1410
855
+ },
856
+ {
857
+ "epoch": 0.02,
858
+ "learning_rate": 0.00284,
859
+ "loss": 2.4787,
860
+ "step": 1420
861
+ },
862
+ {
863
+ "epoch": 0.02,
864
+ "learning_rate": 0.00286,
865
+ "loss": 3.0007,
866
+ "step": 1430
867
+ },
868
+ {
869
+ "epoch": 0.03,
870
+ "learning_rate": 0.0028799999999999997,
871
+ "loss": 2.4423,
872
+ "step": 1440
873
+ },
874
+ {
875
+ "epoch": 0.03,
876
+ "learning_rate": 0.0029,
877
+ "loss": 2.4445,
878
+ "step": 1450
879
+ },
880
+ {
881
+ "epoch": 0.03,
882
+ "learning_rate": 0.00292,
883
+ "loss": 2.4369,
884
+ "step": 1460
885
+ },
886
+ {
887
+ "epoch": 0.03,
888
+ "learning_rate": 0.00294,
889
+ "loss": 2.423,
890
+ "step": 1470
891
+ },
892
+ {
893
+ "epoch": 0.03,
894
+ "learning_rate": 0.00296,
895
+ "loss": 3.6744,
896
+ "step": 1480
897
  }
898
  ],
899
  "max_steps": 172326,
900
  "num_train_epochs": 3,
901
+ "total_flos": 8.798474662104269e+17,
902
  "trial_name": null,
903
  "trial_params": null
904
  }
training_args.bin CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:958eaa6a0110d49c3a54d902ba1dc985dc8e8f3b8eb2c03dc0e099f08d75886a
3
  size 4091
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:c6a68bfa120887ec0f5e2ec991631838aa62faba58a52de7e0feea3f2c228567
3
  size 4091