Federic commited on
Commit
1bba4a9
1 Parent(s): eb91e70

Training in progress, step 150, checkpoint

Browse files
last-checkpoint/adapter_model.safetensors CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:90a1b1fb9f8253f6441ef115b4458cba2bdf1fe8cd75328dc3b87e6eec2539fe
3
  size 838904832
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:2dcdb3ff8486c0bfc9625b0f78fcc4eca11f76d799b250082cf4037cbf25bc66
3
  size 838904832
last-checkpoint/optimizer.pt CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:cab906812334647498aa7b82f69356d8d7f86e1c78bb3bc7736fe343d375e991
3
  size 420633876
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:8736edb269c9527e777378ba29ed5502e10bb70bbbfd035a3fa179d3f89b031d
3
  size 420633876
last-checkpoint/rng_state.pth CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:467f431518071c9f0aa37c15a15a2b2d8e5fa5f9428cf5454723afa2dacb265b
3
  size 14244
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:c413d93a4659760569b07138e03c91b39e3bcca23fe1a4355f5bbe3805581647
3
  size 14244
last-checkpoint/scheduler.pt CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:efdbfe8676cd24a75fed7e6e38125bb1bb838a4b70c0fbf7469557659d9b1fec
3
  size 1064
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:d31dc31a119769737d72f3df4c8cdf99522596cafc12bf2eea05a4ff374f599c
3
  size 1064
last-checkpoint/trainer_state.json CHANGED
@@ -1,9 +1,9 @@
1
  {
2
  "best_metric": null,
3
  "best_model_checkpoint": null,
4
- "epoch": 0.5,
5
  "eval_steps": 500,
6
- "global_step": 125,
7
  "is_hyper_param_search": false,
8
  "is_local_process_zero": true,
9
  "is_world_process_zero": true,
@@ -757,13 +757,163 @@
757
  "learning_rate": 0.0002,
758
  "loss": 0.3803,
759
  "step": 125
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
760
  }
761
  ],
762
  "logging_steps": 1,
763
  "max_steps": 250,
764
  "num_train_epochs": 1,
765
  "save_steps": 25,
766
- "total_flos": 1.93042060713984e+16,
767
  "trial_name": null,
768
  "trial_params": null
769
  }
 
1
  {
2
  "best_metric": null,
3
  "best_model_checkpoint": null,
4
+ "epoch": 0.6,
5
  "eval_steps": 500,
6
+ "global_step": 150,
7
  "is_hyper_param_search": false,
8
  "is_local_process_zero": true,
9
  "is_world_process_zero": true,
 
757
  "learning_rate": 0.0002,
758
  "loss": 0.3803,
759
  "step": 125
760
+ },
761
+ {
762
+ "epoch": 0.5,
763
+ "learning_rate": 0.0002,
764
+ "loss": 0.3578,
765
+ "step": 126
766
+ },
767
+ {
768
+ "epoch": 0.51,
769
+ "learning_rate": 0.0002,
770
+ "loss": 0.384,
771
+ "step": 127
772
+ },
773
+ {
774
+ "epoch": 0.51,
775
+ "learning_rate": 0.0002,
776
+ "loss": 0.3869,
777
+ "step": 128
778
+ },
779
+ {
780
+ "epoch": 0.52,
781
+ "learning_rate": 0.0002,
782
+ "loss": 0.3881,
783
+ "step": 129
784
+ },
785
+ {
786
+ "epoch": 0.52,
787
+ "learning_rate": 0.0002,
788
+ "loss": 0.3808,
789
+ "step": 130
790
+ },
791
+ {
792
+ "epoch": 0.52,
793
+ "learning_rate": 0.0002,
794
+ "loss": 0.3545,
795
+ "step": 131
796
+ },
797
+ {
798
+ "epoch": 0.53,
799
+ "learning_rate": 0.0002,
800
+ "loss": 0.4008,
801
+ "step": 132
802
+ },
803
+ {
804
+ "epoch": 0.53,
805
+ "learning_rate": 0.0002,
806
+ "loss": 0.3862,
807
+ "step": 133
808
+ },
809
+ {
810
+ "epoch": 0.54,
811
+ "learning_rate": 0.0002,
812
+ "loss": 0.387,
813
+ "step": 134
814
+ },
815
+ {
816
+ "epoch": 0.54,
817
+ "learning_rate": 0.0002,
818
+ "loss": 0.3681,
819
+ "step": 135
820
+ },
821
+ {
822
+ "epoch": 0.54,
823
+ "learning_rate": 0.0002,
824
+ "loss": 0.3414,
825
+ "step": 136
826
+ },
827
+ {
828
+ "epoch": 0.55,
829
+ "learning_rate": 0.0002,
830
+ "loss": 0.3895,
831
+ "step": 137
832
+ },
833
+ {
834
+ "epoch": 0.55,
835
+ "learning_rate": 0.0002,
836
+ "loss": 0.4336,
837
+ "step": 138
838
+ },
839
+ {
840
+ "epoch": 0.56,
841
+ "learning_rate": 0.0002,
842
+ "loss": 0.3338,
843
+ "step": 139
844
+ },
845
+ {
846
+ "epoch": 0.56,
847
+ "learning_rate": 0.0002,
848
+ "loss": 0.3223,
849
+ "step": 140
850
+ },
851
+ {
852
+ "epoch": 0.56,
853
+ "learning_rate": 0.0002,
854
+ "loss": 0.337,
855
+ "step": 141
856
+ },
857
+ {
858
+ "epoch": 0.57,
859
+ "learning_rate": 0.0002,
860
+ "loss": 0.3579,
861
+ "step": 142
862
+ },
863
+ {
864
+ "epoch": 0.57,
865
+ "learning_rate": 0.0002,
866
+ "loss": 0.283,
867
+ "step": 143
868
+ },
869
+ {
870
+ "epoch": 0.58,
871
+ "learning_rate": 0.0002,
872
+ "loss": 0.3138,
873
+ "step": 144
874
+ },
875
+ {
876
+ "epoch": 0.58,
877
+ "learning_rate": 0.0002,
878
+ "loss": 0.3492,
879
+ "step": 145
880
+ },
881
+ {
882
+ "epoch": 0.58,
883
+ "learning_rate": 0.0002,
884
+ "loss": 0.2995,
885
+ "step": 146
886
+ },
887
+ {
888
+ "epoch": 0.59,
889
+ "learning_rate": 0.0002,
890
+ "loss": 0.3199,
891
+ "step": 147
892
+ },
893
+ {
894
+ "epoch": 0.59,
895
+ "learning_rate": 0.0002,
896
+ "loss": 0.2826,
897
+ "step": 148
898
+ },
899
+ {
900
+ "epoch": 0.6,
901
+ "learning_rate": 0.0002,
902
+ "loss": 0.2774,
903
+ "step": 149
904
+ },
905
+ {
906
+ "epoch": 0.6,
907
+ "learning_rate": 0.0002,
908
+ "loss": 0.2919,
909
+ "step": 150
910
  }
911
  ],
912
  "logging_steps": 1,
913
  "max_steps": 250,
914
  "num_train_epochs": 1,
915
  "save_steps": 25,
916
+ "total_flos": 2.259012175896576e+16,
917
  "trial_name": null,
918
  "trial_params": null
919
  }