nutorbit commited on
Commit
9a6f4b8
1 Parent(s): 497b3c9

Training in progress, step 150, checkpoint

Browse files
last-checkpoint/adapter_model.safetensors CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:fb976cf7898c7af69200f2113a29b7785b019b770d22039687f7f5c4a96ed7fa
3
  size 72673016
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:9128c30f8a3b26280560a868afe89cca76661efd2a33ceb3c658eddf190de8bb
3
  size 72673016
last-checkpoint/optimizer.pt CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:76358ba8b0d5790094202317f145647c6a255e57fe759204944b37c5cf4cd2e5
3
  size 36892116
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:8cc10cf500af2626b5d2b4f076a07c678e565cf08a810b0e33e8a52555d5cc40
3
  size 36892116
last-checkpoint/rng_state.pth CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:c28d755d296a8737ed6670f14159a70f28b726a19cbe726d0f62337231c234c4
3
  size 14244
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:3ece262c534d1b7d1db5a2ce2fd578c733d0164261019904322133288edf8c12
3
  size 14244
last-checkpoint/scheduler.pt CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:26645e1de4f1ee60c55cf09276f6d2fe8bef6c9276d818480910d55b43b1d8a1
3
  size 1064
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:34561662f8e54ce65c36d4fcca2b1c512c5ba02e6fc1a025cbd92eb43f288458
3
  size 1064
last-checkpoint/trainer_state.json CHANGED
@@ -1,9 +1,9 @@
1
  {
2
  "best_metric": null,
3
  "best_model_checkpoint": null,
4
- "epoch": 0.023631723225257587,
5
  "eval_steps": 1000,
6
- "global_step": 125,
7
  "is_hyper_param_search": false,
8
  "is_local_process_zero": true,
9
  "is_world_process_zero": true,
@@ -757,13 +757,163 @@
757
  "learning_rate": 0.00015524193548387098,
758
  "loss": 1.8046,
759
  "step": 125
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
760
  }
761
  ],
762
  "logging_steps": 1,
763
  "max_steps": 501,
764
  "num_train_epochs": 1,
765
  "save_steps": 25,
766
- "total_flos": 6054706182193152.0,
767
  "trial_name": null,
768
  "trial_params": null
769
  }
 
1
  {
2
  "best_metric": null,
3
  "best_model_checkpoint": null,
4
+ "epoch": 0.028358067870309103,
5
  "eval_steps": 1000,
6
+ "global_step": 150,
7
  "is_hyper_param_search": false,
8
  "is_local_process_zero": true,
9
  "is_world_process_zero": true,
 
757
  "learning_rate": 0.00015524193548387098,
758
  "loss": 1.8046,
759
  "step": 125
760
+ },
761
+ {
762
+ "epoch": 0.02,
763
+ "learning_rate": 0.00015483870967741937,
764
+ "loss": 1.8889,
765
+ "step": 126
766
+ },
767
+ {
768
+ "epoch": 0.02,
769
+ "learning_rate": 0.00015443548387096776,
770
+ "loss": 1.9183,
771
+ "step": 127
772
+ },
773
+ {
774
+ "epoch": 0.02,
775
+ "learning_rate": 0.00015403225806451615,
776
+ "loss": 1.6254,
777
+ "step": 128
778
+ },
779
+ {
780
+ "epoch": 0.02,
781
+ "learning_rate": 0.00015362903225806454,
782
+ "loss": 1.8339,
783
+ "step": 129
784
+ },
785
+ {
786
+ "epoch": 0.02,
787
+ "learning_rate": 0.0001532258064516129,
788
+ "loss": 1.94,
789
+ "step": 130
790
+ },
791
+ {
792
+ "epoch": 0.02,
793
+ "learning_rate": 0.00015282258064516131,
794
+ "loss": 1.9235,
795
+ "step": 131
796
+ },
797
+ {
798
+ "epoch": 0.02,
799
+ "learning_rate": 0.00015241935483870968,
800
+ "loss": 1.78,
801
+ "step": 132
802
+ },
803
+ {
804
+ "epoch": 0.03,
805
+ "learning_rate": 0.0001520161290322581,
806
+ "loss": 1.7514,
807
+ "step": 133
808
+ },
809
+ {
810
+ "epoch": 0.03,
811
+ "learning_rate": 0.00015161290322580646,
812
+ "loss": 1.8281,
813
+ "step": 134
814
+ },
815
+ {
816
+ "epoch": 0.03,
817
+ "learning_rate": 0.00015120967741935485,
818
+ "loss": 1.8109,
819
+ "step": 135
820
+ },
821
+ {
822
+ "epoch": 0.03,
823
+ "learning_rate": 0.00015080645161290323,
824
+ "loss": 1.9297,
825
+ "step": 136
826
+ },
827
+ {
828
+ "epoch": 0.03,
829
+ "learning_rate": 0.00015040322580645162,
830
+ "loss": 1.7198,
831
+ "step": 137
832
+ },
833
+ {
834
+ "epoch": 0.03,
835
+ "learning_rate": 0.00015000000000000001,
836
+ "loss": 1.7704,
837
+ "step": 138
838
+ },
839
+ {
840
+ "epoch": 0.03,
841
+ "learning_rate": 0.00014959677419354838,
842
+ "loss": 1.8345,
843
+ "step": 139
844
+ },
845
+ {
846
+ "epoch": 0.03,
847
+ "learning_rate": 0.0001491935483870968,
848
+ "loss": 1.7694,
849
+ "step": 140
850
+ },
851
+ {
852
+ "epoch": 0.03,
853
+ "learning_rate": 0.00014879032258064516,
854
+ "loss": 1.8196,
855
+ "step": 141
856
+ },
857
+ {
858
+ "epoch": 0.03,
859
+ "learning_rate": 0.00014838709677419355,
860
+ "loss": 1.8625,
861
+ "step": 142
862
+ },
863
+ {
864
+ "epoch": 0.03,
865
+ "learning_rate": 0.00014798387096774193,
866
+ "loss": 1.9182,
867
+ "step": 143
868
+ },
869
+ {
870
+ "epoch": 0.03,
871
+ "learning_rate": 0.00014758064516129032,
872
+ "loss": 1.4297,
873
+ "step": 144
874
+ },
875
+ {
876
+ "epoch": 0.03,
877
+ "learning_rate": 0.00014717741935483871,
878
+ "loss": 1.929,
879
+ "step": 145
880
+ },
881
+ {
882
+ "epoch": 0.03,
883
+ "learning_rate": 0.0001467741935483871,
884
+ "loss": 1.9118,
885
+ "step": 146
886
+ },
887
+ {
888
+ "epoch": 0.03,
889
+ "learning_rate": 0.0001463709677419355,
890
+ "loss": 2.0179,
891
+ "step": 147
892
+ },
893
+ {
894
+ "epoch": 0.03,
895
+ "learning_rate": 0.00014596774193548388,
896
+ "loss": 1.8938,
897
+ "step": 148
898
+ },
899
+ {
900
+ "epoch": 0.03,
901
+ "learning_rate": 0.00014556451612903224,
902
+ "loss": 1.8689,
903
+ "step": 149
904
+ },
905
+ {
906
+ "epoch": 0.03,
907
+ "learning_rate": 0.00014516129032258066,
908
+ "loss": 1.6793,
909
+ "step": 150
910
  }
911
  ],
912
  "logging_steps": 1,
913
  "max_steps": 501,
914
  "num_train_epochs": 1,
915
  "save_steps": 25,
916
+ "total_flos": 7280753121067008.0,
917
  "trial_name": null,
918
  "trial_params": null
919
  }