plip commited on
Commit
a43f978
1 Parent(s): 003ce32

Training in progress, step 50000

Browse files
last-checkpoint/optimizer.pt CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:0dff537922713b87383b64684bd79e5a7ae236a98cba1517816ac46551a11ebb
3
  size 50044241
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:dadcb41aea0d881ec7489fc153864312daace23a87556e2dabeb7f00ab20de52
3
  size 50044241
last-checkpoint/pytorch_model.bin CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:fb889cc10aaf071995df6da342ff4aec3f65438039925fc96cfdce7c89df56fc
3
  size 25761253
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:cf773b1bedf0e3d4076d93eb3be69d1083c7a6321f77e59758b3adc872017b3a
3
  size 25761253
last-checkpoint/rng_state_0.pth CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:315800999724442b576d330985cea2d1b63fff0d8573a95c6cd16e9183cf5350
3
  size 14503
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:39febded125ea43849ae44cc91a2089d4c554f45131ad78886b088e5c989bab9
3
  size 14503
last-checkpoint/rng_state_1.pth CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:315800999724442b576d330985cea2d1b63fff0d8573a95c6cd16e9183cf5350
3
  size 14503
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:39febded125ea43849ae44cc91a2089d4c554f45131ad78886b088e5c989bab9
3
  size 14503
last-checkpoint/rng_state_2.pth CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:315800999724442b576d330985cea2d1b63fff0d8573a95c6cd16e9183cf5350
3
  size 14503
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:39febded125ea43849ae44cc91a2089d4c554f45131ad78886b088e5c989bab9
3
  size 14503
last-checkpoint/rng_state_3.pth CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:315800999724442b576d330985cea2d1b63fff0d8573a95c6cd16e9183cf5350
3
  size 14503
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:39febded125ea43849ae44cc91a2089d4c554f45131ad78886b088e5c989bab9
3
  size 14503
last-checkpoint/rng_state_4.pth CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:315800999724442b576d330985cea2d1b63fff0d8573a95c6cd16e9183cf5350
3
  size 14503
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:39febded125ea43849ae44cc91a2089d4c554f45131ad78886b088e5c989bab9
3
  size 14503
last-checkpoint/rng_state_5.pth CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:315800999724442b576d330985cea2d1b63fff0d8573a95c6cd16e9183cf5350
3
  size 14503
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:39febded125ea43849ae44cc91a2089d4c554f45131ad78886b088e5c989bab9
3
  size 14503
last-checkpoint/rng_state_6.pth CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:315800999724442b576d330985cea2d1b63fff0d8573a95c6cd16e9183cf5350
3
  size 14503
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:39febded125ea43849ae44cc91a2089d4c554f45131ad78886b088e5c989bab9
3
  size 14503
last-checkpoint/rng_state_7.pth CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:315800999724442b576d330985cea2d1b63fff0d8573a95c6cd16e9183cf5350
3
  size 14503
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:39febded125ea43849ae44cc91a2089d4c554f45131ad78886b088e5c989bab9
3
  size 14503
last-checkpoint/scheduler.pt CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:c7750ccd53e61fd7fcec6ad8e54086c4abb8aa56c6a0781b4ecadd56cbb87c42
3
  size 623
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:f9511edec0a698219a4fec94630f2bf1ec60055a31bda0393e6aadb5a36db13d
3
  size 623
last-checkpoint/trainer_state.json CHANGED
@@ -1,8 +1,8 @@
1
  {
2
  "best_metric": null,
3
  "best_model_checkpoint": null,
4
- "epoch": 2.450980392156863,
5
- "global_step": 40000,
6
  "is_hyper_param_search": false,
7
  "is_local_process_zero": true,
8
  "is_world_process_zero": true,
@@ -806,11 +806,211 @@
806
  "eval_samples_per_second": 746.204,
807
  "eval_steps_per_second": 11.939,
808
  "step": 40000
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
809
  }
810
  ],
811
  "max_steps": 250000,
812
  "num_train_epochs": 16,
813
- "total_flos": 6.406519904559862e+20,
814
  "trial_name": null,
815
  "trial_params": null
816
  }
 
1
  {
2
  "best_metric": null,
3
  "best_model_checkpoint": null,
4
+ "epoch": 3.063725490196078,
5
+ "global_step": 50000,
6
  "is_hyper_param_search": false,
7
  "is_local_process_zero": true,
8
  "is_world_process_zero": true,
 
806
  "eval_samples_per_second": 746.204,
807
  "eval_steps_per_second": 11.939,
808
  "step": 40000
809
+ },
810
+ {
811
+ "epoch": 2.48,
812
+ "learning_rate": 0.0005799963048115559,
813
+ "loss": 0.5353,
814
+ "step": 40500
815
+ },
816
+ {
817
+ "epoch": 2.51,
818
+ "learning_rate": 0.0005792840633370341,
819
+ "loss": 0.5336,
820
+ "step": 41000
821
+ },
822
+ {
823
+ "epoch": 2.51,
824
+ "eval_loss": 0.8631040453910828,
825
+ "eval_runtime": 1.3088,
826
+ "eval_samples_per_second": 764.037,
827
+ "eval_steps_per_second": 12.225,
828
+ "step": 41000
829
+ },
830
+ {
831
+ "epoch": 2.54,
832
+ "learning_rate": 0.0005785598237890247,
833
+ "loss": 0.5327,
834
+ "step": 41500
835
+ },
836
+ {
837
+ "epoch": 2.57,
838
+ "learning_rate": 0.0005778236178481119,
839
+ "loss": 0.5316,
840
+ "step": 42000
841
+ },
842
+ {
843
+ "epoch": 2.57,
844
+ "eval_loss": 0.8605585098266602,
845
+ "eval_runtime": 1.2836,
846
+ "eval_samples_per_second": 779.077,
847
+ "eval_steps_per_second": 12.465,
848
+ "step": 42000
849
+ },
850
+ {
851
+ "epoch": 2.6,
852
+ "learning_rate": 0.0005770754777183285,
853
+ "loss": 0.5306,
854
+ "step": 42500
855
+ },
856
+ {
857
+ "epoch": 2.63,
858
+ "learning_rate": 0.0005763154361257473,
859
+ "loss": 0.5297,
860
+ "step": 43000
861
+ },
862
+ {
863
+ "epoch": 2.63,
864
+ "eval_loss": 0.8589205145835876,
865
+ "eval_runtime": 1.2763,
866
+ "eval_samples_per_second": 783.485,
867
+ "eval_steps_per_second": 12.536,
868
+ "step": 43000
869
+ },
870
+ {
871
+ "epoch": 2.67,
872
+ "learning_rate": 0.0005755435263170498,
873
+ "loss": 0.5287,
874
+ "step": 43500
875
+ },
876
+ {
877
+ "epoch": 2.7,
878
+ "learning_rate": 0.0005747597820580717,
879
+ "loss": 0.5305,
880
+ "step": 44000
881
+ },
882
+ {
883
+ "epoch": 2.7,
884
+ "eval_loss": 0.8569635152816772,
885
+ "eval_runtime": 1.2713,
886
+ "eval_samples_per_second": 786.624,
887
+ "eval_steps_per_second": 12.586,
888
+ "step": 44000
889
+ },
890
+ {
891
+ "epoch": 2.73,
892
+ "learning_rate": 0.000573964237632326,
893
+ "loss": 0.527,
894
+ "step": 44500
895
+ },
896
+ {
897
+ "epoch": 2.76,
898
+ "learning_rate": 0.0005731569278395029,
899
+ "loss": 0.5262,
900
+ "step": 45000
901
+ },
902
+ {
903
+ "epoch": 2.76,
904
+ "eval_loss": 0.8558768033981323,
905
+ "eval_runtime": 1.2738,
906
+ "eval_samples_per_second": 785.051,
907
+ "eval_steps_per_second": 12.561,
908
+ "step": 45000
909
+ },
910
+ {
911
+ "epoch": 2.79,
912
+ "learning_rate": 0.0005723378879939481,
913
+ "loss": 0.5254,
914
+ "step": 45500
915
+ },
916
+ {
917
+ "epoch": 2.82,
918
+ "learning_rate": 0.0005715071539231178,
919
+ "loss": 0.5247,
920
+ "step": 46000
921
+ },
922
+ {
923
+ "epoch": 2.82,
924
+ "eval_loss": 0.8633874654769897,
925
+ "eval_runtime": 1.2747,
926
+ "eval_samples_per_second": 784.504,
927
+ "eval_steps_per_second": 12.552,
928
+ "step": 46000
929
+ },
930
+ {
931
+ "epoch": 2.85,
932
+ "learning_rate": 0.0005706647619660116,
933
+ "loss": 0.5243,
934
+ "step": 46500
935
+ },
936
+ {
937
+ "epoch": 2.88,
938
+ "learning_rate": 0.0005698107489715823,
939
+ "loss": 0.5235,
940
+ "step": 47000
941
+ },
942
+ {
943
+ "epoch": 2.88,
944
+ "eval_loss": 0.8606237769126892,
945
+ "eval_runtime": 1.3283,
946
+ "eval_samples_per_second": 752.838,
947
+ "eval_steps_per_second": 12.045,
948
+ "step": 47000
949
+ },
950
+ {
951
+ "epoch": 2.91,
952
+ "learning_rate": 0.0005689451522971252,
953
+ "loss": 0.5228,
954
+ "step": 47500
955
+ },
956
+ {
957
+ "epoch": 2.94,
958
+ "learning_rate": 0.0005680680098066429,
959
+ "loss": 0.5227,
960
+ "step": 48000
961
+ },
962
+ {
963
+ "epoch": 2.94,
964
+ "eval_loss": 0.8610469698905945,
965
+ "eval_runtime": 1.2783,
966
+ "eval_samples_per_second": 782.274,
967
+ "eval_steps_per_second": 12.516,
968
+ "step": 48000
969
+ },
970
+ {
971
+ "epoch": 2.97,
972
+ "learning_rate": 0.0005671793598691895,
973
+ "loss": 0.5215,
974
+ "step": 48500
975
+ },
976
+ {
977
+ "epoch": 3.0,
978
+ "learning_rate": 0.0005662792413571921,
979
+ "loss": 0.5206,
980
+ "step": 49000
981
+ },
982
+ {
983
+ "epoch": 3.0,
984
+ "eval_loss": 0.8610268235206604,
985
+ "eval_runtime": 1.3555,
986
+ "eval_samples_per_second": 737.72,
987
+ "eval_steps_per_second": 11.804,
988
+ "step": 49000
989
+ },
990
+ {
991
+ "epoch": 3.03,
992
+ "learning_rate": 0.0005653676936447504,
993
+ "loss": 0.5201,
994
+ "step": 49500
995
+ },
996
+ {
997
+ "epoch": 3.06,
998
+ "learning_rate": 0.0005644447566059142,
999
+ "loss": 0.5194,
1000
+ "step": 50000
1001
+ },
1002
+ {
1003
+ "epoch": 3.06,
1004
+ "eval_loss": 0.8611247539520264,
1005
+ "eval_runtime": 1.2902,
1006
+ "eval_samples_per_second": 775.099,
1007
+ "eval_steps_per_second": 12.402,
1008
+ "step": 50000
1009
  }
1010
  ],
1011
  "max_steps": 250000,
1012
  "num_train_epochs": 16,
1013
+ "total_flos": 8.008099828181752e+20,
1014
  "trial_name": null,
1015
  "trial_params": null
1016
  }
pytorch_model.bin CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:fb889cc10aaf071995df6da342ff4aec3f65438039925fc96cfdce7c89df56fc
3
  size 25761253
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:cf773b1bedf0e3d4076d93eb3be69d1083c7a6321f77e59758b3adc872017b3a
3
  size 25761253