Bingsu commited on
Commit
6fb9ac9
1 Parent(s): 88ea5b8

Training in progress, step 570000

Browse files
last-checkpoint/optimizer.pt CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:047de29e872fa886935b0856cae368679a5024ec78d3b02056971d0dafe03f46
3
  size 586828837
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:fffdc57df6cba4aecc5d537199d05d28768deaf925b41240f122bcbc526d6c4d
3
  size 586828837
last-checkpoint/pytorch_model.bin CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:c2db06e17c94930e25ba4fa153fb1d09bc548975dd61b046eadd4ef82210ad5b
3
  size 146774203
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:fc99a917e1b327405a8f3c276c96d3252b44e706de05260c86fdfb67a8ea2ba1
3
  size 146774203
last-checkpoint/rng_state.pth CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:6925d15f0ca8b085673c2a9c495fa03dd265589a6d0e5da63276f20be7165697
3
  size 14503
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:9eb8cd28c207e550a8e102ab438e79bd35b1834dd9eb8b97b0c0f9aab456235f
3
  size 14503
last-checkpoint/scaler.pt CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:b1866493c6437f9be9b061bda7fb54561f6f075e18eb8ff9def3d978f033c740
3
  size 559
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:e8cfba3a731feb83ca65973baf77fc04cbf64fea750132892e69c52d95de7113
3
  size 559
last-checkpoint/scheduler.pt CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:f637d7b8ff1440e6b42939ee3d5db1515f248a64a9ccc57bfd7e929c8ce06320
3
  size 733555848
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:f2425d50b58bd1bce863056a07fbed7929c2c0bfeef559ef18326c302aae672a
3
  size 733555848
last-checkpoint/trainer_state.json CHANGED
@@ -1,8 +1,8 @@
1
  {
2
  "best_metric": null,
3
  "best_model_checkpoint": null,
4
- "epoch": 2.4064596251939134,
5
- "global_step": 560000,
6
  "is_hyper_param_search": false,
7
  "is_local_process_zero": true,
8
  "is_world_process_zero": true,
@@ -16806,11 +16806,311 @@
16806
  "learning_rate": 0.006970335826120932,
16807
  "loss": 8.046,
16808
  "step": 560000
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
16809
  }
16810
  ],
16811
  "max_steps": 1000000,
16812
  "num_train_epochs": 5,
16813
- "total_flos": 8.925433954352456e+17,
16814
  "trial_name": null,
16815
  "trial_params": null
16816
  }
 
1
  {
2
  "best_metric": null,
3
  "best_model_checkpoint": null,
4
+ "epoch": 2.4494321185009476,
5
+ "global_step": 570000,
6
  "is_hyper_param_search": false,
7
  "is_local_process_zero": true,
8
  "is_world_process_zero": true,
 
16806
  "learning_rate": 0.006970335826120932,
16807
  "loss": 8.046,
16808
  "step": 560000
16809
+ },
16810
+ {
16811
+ "epoch": 2.41,
16812
+ "learning_rate": 0.006966210216011318,
16813
+ "loss": 8.0551,
16814
+ "step": 560200
16815
+ },
16816
+ {
16817
+ "epoch": 2.41,
16818
+ "learning_rate": 0.0069620830217777575,
16819
+ "loss": 8.0548,
16820
+ "step": 560400
16821
+ },
16822
+ {
16823
+ "epoch": 2.41,
16824
+ "learning_rate": 0.006957954246745461,
16825
+ "loss": 8.0485,
16826
+ "step": 560600
16827
+ },
16828
+ {
16829
+ "epoch": 2.41,
16830
+ "learning_rate": 0.006953823894240906,
16831
+ "loss": 8.0603,
16832
+ "step": 560800
16833
+ },
16834
+ {
16835
+ "epoch": 2.41,
16836
+ "learning_rate": 0.0069496919675918435,
16837
+ "loss": 8.051,
16838
+ "step": 561000
16839
+ },
16840
+ {
16841
+ "epoch": 2.41,
16842
+ "learning_rate": 0.006945558470127292,
16843
+ "loss": 8.0569,
16844
+ "step": 561200
16845
+ },
16846
+ {
16847
+ "epoch": 2.41,
16848
+ "learning_rate": 0.006941423405177537,
16849
+ "loss": 8.0389,
16850
+ "step": 561400
16851
+ },
16852
+ {
16853
+ "epoch": 2.41,
16854
+ "learning_rate": 0.0069372867760741225,
16855
+ "loss": 8.0413,
16856
+ "step": 561600
16857
+ },
16858
+ {
16859
+ "epoch": 2.41,
16860
+ "learning_rate": 0.006933148586149858,
16861
+ "loss": 8.0455,
16862
+ "step": 561800
16863
+ },
16864
+ {
16865
+ "epoch": 2.42,
16866
+ "learning_rate": 0.006929008838738809,
16867
+ "loss": 8.0532,
16868
+ "step": 562000
16869
+ },
16870
+ {
16871
+ "epoch": 2.42,
16872
+ "learning_rate": 0.006924867537176294,
16873
+ "loss": 8.0524,
16874
+ "step": 562200
16875
+ },
16876
+ {
16877
+ "epoch": 2.42,
16878
+ "learning_rate": 0.006920724684798886,
16879
+ "loss": 8.0644,
16880
+ "step": 562400
16881
+ },
16882
+ {
16883
+ "epoch": 2.42,
16884
+ "learning_rate": 0.006916580284944404,
16885
+ "loss": 8.0491,
16886
+ "step": 562600
16887
+ },
16888
+ {
16889
+ "epoch": 2.42,
16890
+ "learning_rate": 0.006912434340951918,
16891
+ "loss": 8.0658,
16892
+ "step": 562800
16893
+ },
16894
+ {
16895
+ "epoch": 2.42,
16896
+ "learning_rate": 0.006908286856161741,
16897
+ "loss": 8.0541,
16898
+ "step": 563000
16899
+ },
16900
+ {
16901
+ "epoch": 2.42,
16902
+ "learning_rate": 0.006904137833915425,
16903
+ "loss": 8.0429,
16904
+ "step": 563200
16905
+ },
16906
+ {
16907
+ "epoch": 2.42,
16908
+ "learning_rate": 0.006900008034148137,
16909
+ "loss": 8.047,
16910
+ "step": 563400
16911
+ },
16912
+ {
16913
+ "epoch": 2.42,
16914
+ "learning_rate": 0.006895855954664682,
16915
+ "loss": 8.0523,
16916
+ "step": 563600
16917
+ },
16918
+ {
16919
+ "epoch": 2.42,
16920
+ "learning_rate": 0.006891702347740443,
16921
+ "loss": 8.0611,
16922
+ "step": 563800
16923
+ },
16924
+ {
16925
+ "epoch": 2.42,
16926
+ "learning_rate": 0.0068875472167219025,
16927
+ "loss": 8.0624,
16928
+ "step": 564000
16929
+ },
16930
+ {
16931
+ "epoch": 2.42,
16932
+ "learning_rate": 0.006883390564956777,
16933
+ "loss": 8.0618,
16934
+ "step": 564200
16935
+ },
16936
+ {
16937
+ "epoch": 2.43,
16938
+ "learning_rate": 0.006879232395794005,
16939
+ "loss": 8.0637,
16940
+ "step": 564400
16941
+ },
16942
+ {
16943
+ "epoch": 2.43,
16944
+ "learning_rate": 0.006875072712583748,
16945
+ "loss": 8.0495,
16946
+ "step": 564600
16947
+ },
16948
+ {
16949
+ "epoch": 2.43,
16950
+ "learning_rate": 0.00687091151867739,
16951
+ "loss": 8.0603,
16952
+ "step": 564800
16953
+ },
16954
+ {
16955
+ "epoch": 2.43,
16956
+ "learning_rate": 0.006866748817427526,
16957
+ "loss": 8.0579,
16958
+ "step": 565000
16959
+ },
16960
+ {
16961
+ "epoch": 2.43,
16962
+ "learning_rate": 0.006862584612187971,
16963
+ "loss": 8.0629,
16964
+ "step": 565200
16965
+ },
16966
+ {
16967
+ "epoch": 2.43,
16968
+ "learning_rate": 0.006858439738570398,
16969
+ "loss": 8.0601,
16970
+ "step": 565400
16971
+ },
16972
+ {
16973
+ "epoch": 2.43,
16974
+ "learning_rate": 0.006854293382593129,
16975
+ "loss": 8.0478,
16976
+ "step": 565600
16977
+ },
16978
+ {
16979
+ "epoch": 2.43,
16980
+ "learning_rate": 0.00685012470044207,
16981
+ "loss": 8.06,
16982
+ "step": 565800
16983
+ },
16984
+ {
16985
+ "epoch": 2.43,
16986
+ "learning_rate": 0.006845954527695071,
16987
+ "loss": 8.0508,
16988
+ "step": 566000
16989
+ },
16990
+ {
16991
+ "epoch": 2.43,
16992
+ "learning_rate": 0.006841782867711967,
16993
+ "loss": 8.0748,
16994
+ "step": 566200
16995
+ },
16996
+ {
16997
+ "epoch": 2.43,
16998
+ "learning_rate": 0.006837609723853784,
16999
+ "loss": 8.0635,
17000
+ "step": 566400
17001
+ },
17002
+ {
17003
+ "epoch": 2.43,
17004
+ "learning_rate": 0.0068334350994827524,
17005
+ "loss": 8.0627,
17006
+ "step": 566600
17007
+ },
17008
+ {
17009
+ "epoch": 2.44,
17010
+ "learning_rate": 0.0068292589979622904,
17011
+ "loss": 8.0511,
17012
+ "step": 566800
17013
+ },
17014
+ {
17015
+ "epoch": 2.44,
17016
+ "learning_rate": 0.006825081422657008,
17017
+ "loss": 8.0495,
17018
+ "step": 567000
17019
+ },
17020
+ {
17021
+ "epoch": 2.44,
17022
+ "learning_rate": 0.0068209023769327005,
17023
+ "loss": 8.0555,
17024
+ "step": 567200
17025
+ },
17026
+ {
17027
+ "epoch": 2.44,
17028
+ "learning_rate": 0.006816721864156354,
17029
+ "loss": 8.0548,
17030
+ "step": 567400
17031
+ },
17032
+ {
17033
+ "epoch": 2.44,
17034
+ "learning_rate": 0.006812539887696127,
17035
+ "loss": 8.0487,
17036
+ "step": 567600
17037
+ },
17038
+ {
17039
+ "epoch": 2.44,
17040
+ "learning_rate": 0.006808356450921365,
17041
+ "loss": 8.0457,
17042
+ "step": 567800
17043
+ },
17044
+ {
17045
+ "epoch": 2.44,
17046
+ "learning_rate": 0.0068041715572025865,
17047
+ "loss": 8.0417,
17048
+ "step": 568000
17049
+ },
17050
+ {
17051
+ "epoch": 2.44,
17052
+ "learning_rate": 0.006799985209911487,
17053
+ "loss": 8.0564,
17054
+ "step": 568200
17055
+ },
17056
+ {
17057
+ "epoch": 2.44,
17058
+ "learning_rate": 0.0067957974124209265,
17059
+ "loss": 8.0481,
17060
+ "step": 568400
17061
+ },
17062
+ {
17063
+ "epoch": 2.44,
17064
+ "learning_rate": 0.0067916081681049425,
17065
+ "loss": 8.0318,
17066
+ "step": 568600
17067
+ },
17068
+ {
17069
+ "epoch": 2.44,
17070
+ "learning_rate": 0.00678741748033873,
17071
+ "loss": 8.0717,
17072
+ "step": 568800
17073
+ },
17074
+ {
17075
+ "epoch": 2.45,
17076
+ "learning_rate": 0.006783225352498653,
17077
+ "loss": 8.0506,
17078
+ "step": 569000
17079
+ },
17080
+ {
17081
+ "epoch": 2.45,
17082
+ "learning_rate": 0.0067790317879622315,
17083
+ "loss": 8.0453,
17084
+ "step": 569200
17085
+ },
17086
+ {
17087
+ "epoch": 2.45,
17088
+ "learning_rate": 0.006774836790108145,
17089
+ "loss": 8.0478,
17090
+ "step": 569400
17091
+ },
17092
+ {
17093
+ "epoch": 2.45,
17094
+ "learning_rate": 0.006770661348006565,
17095
+ "loss": 8.0587,
17096
+ "step": 569600
17097
+ },
17098
+ {
17099
+ "epoch": 2.45,
17100
+ "learning_rate": 0.006766463500782177,
17101
+ "loss": 8.0524,
17102
+ "step": 569800
17103
+ },
17104
+ {
17105
+ "epoch": 2.45,
17106
+ "learning_rate": 0.006762285230252838,
17107
+ "loss": 8.0655,
17108
+ "step": 570000
17109
  }
17110
  ],
17111
  "max_steps": 1000000,
17112
  "num_train_epochs": 5,
17113
+ "total_flos": 9.084816952573256e+17,
17114
  "trial_name": null,
17115
  "trial_params": null
17116
  }
pytorch_model.bin CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:c2db06e17c94930e25ba4fa153fb1d09bc548975dd61b046eadd4ef82210ad5b
3
  size 146774203
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:fc99a917e1b327405a8f3c276c96d3252b44e706de05260c86fdfb67a8ea2ba1
3
  size 146774203