Training in progress, step 570000
Browse files- last-checkpoint/optimizer.pt +1 -1
- last-checkpoint/pytorch_model.bin +1 -1
- last-checkpoint/rng_state.pth +1 -1
- last-checkpoint/scaler.pt +1 -1
- last-checkpoint/scheduler.pt +1 -1
- last-checkpoint/trainer_state.json +303 -3
- pytorch_model.bin +1 -1
last-checkpoint/optimizer.pt
CHANGED
@@ -1,3 +1,3 @@
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
-
oid sha256:
|
3 |
size 586828837
|
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:fffdc57df6cba4aecc5d537199d05d28768deaf925b41240f122bcbc526d6c4d
|
3 |
size 586828837
|
last-checkpoint/pytorch_model.bin
CHANGED
@@ -1,3 +1,3 @@
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
-
oid sha256:
|
3 |
size 146774203
|
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:fc99a917e1b327405a8f3c276c96d3252b44e706de05260c86fdfb67a8ea2ba1
|
3 |
size 146774203
|
last-checkpoint/rng_state.pth
CHANGED
@@ -1,3 +1,3 @@
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
-
oid sha256:
|
3 |
size 14503
|
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:9eb8cd28c207e550a8e102ab438e79bd35b1834dd9eb8b97b0c0f9aab456235f
|
3 |
size 14503
|
last-checkpoint/scaler.pt
CHANGED
@@ -1,3 +1,3 @@
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
-
oid sha256:
|
3 |
size 559
|
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:e8cfba3a731feb83ca65973baf77fc04cbf64fea750132892e69c52d95de7113
|
3 |
size 559
|
last-checkpoint/scheduler.pt
CHANGED
@@ -1,3 +1,3 @@
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
-
oid sha256:
|
3 |
size 733555848
|
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:f2425d50b58bd1bce863056a07fbed7929c2c0bfeef559ef18326c302aae672a
|
3 |
size 733555848
|
last-checkpoint/trainer_state.json
CHANGED
@@ -1,8 +1,8 @@
|
|
1 |
{
|
2 |
"best_metric": null,
|
3 |
"best_model_checkpoint": null,
|
4 |
-
"epoch": 2.
|
5 |
-
"global_step":
|
6 |
"is_hyper_param_search": false,
|
7 |
"is_local_process_zero": true,
|
8 |
"is_world_process_zero": true,
|
@@ -16806,11 +16806,311 @@
|
|
16806 |
"learning_rate": 0.006970335826120932,
|
16807 |
"loss": 8.046,
|
16808 |
"step": 560000
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
16809 |
}
|
16810 |
],
|
16811 |
"max_steps": 1000000,
|
16812 |
"num_train_epochs": 5,
|
16813 |
-
"total_flos":
|
16814 |
"trial_name": null,
|
16815 |
"trial_params": null
|
16816 |
}
|
|
|
1 |
{
|
2 |
"best_metric": null,
|
3 |
"best_model_checkpoint": null,
|
4 |
+
"epoch": 2.4494321185009476,
|
5 |
+
"global_step": 570000,
|
6 |
"is_hyper_param_search": false,
|
7 |
"is_local_process_zero": true,
|
8 |
"is_world_process_zero": true,
|
|
|
16806 |
"learning_rate": 0.006970335826120932,
|
16807 |
"loss": 8.046,
|
16808 |
"step": 560000
|
16809 |
+
},
|
16810 |
+
{
|
16811 |
+
"epoch": 2.41,
|
16812 |
+
"learning_rate": 0.006966210216011318,
|
16813 |
+
"loss": 8.0551,
|
16814 |
+
"step": 560200
|
16815 |
+
},
|
16816 |
+
{
|
16817 |
+
"epoch": 2.41,
|
16818 |
+
"learning_rate": 0.0069620830217777575,
|
16819 |
+
"loss": 8.0548,
|
16820 |
+
"step": 560400
|
16821 |
+
},
|
16822 |
+
{
|
16823 |
+
"epoch": 2.41,
|
16824 |
+
"learning_rate": 0.006957954246745461,
|
16825 |
+
"loss": 8.0485,
|
16826 |
+
"step": 560600
|
16827 |
+
},
|
16828 |
+
{
|
16829 |
+
"epoch": 2.41,
|
16830 |
+
"learning_rate": 0.006953823894240906,
|
16831 |
+
"loss": 8.0603,
|
16832 |
+
"step": 560800
|
16833 |
+
},
|
16834 |
+
{
|
16835 |
+
"epoch": 2.41,
|
16836 |
+
"learning_rate": 0.0069496919675918435,
|
16837 |
+
"loss": 8.051,
|
16838 |
+
"step": 561000
|
16839 |
+
},
|
16840 |
+
{
|
16841 |
+
"epoch": 2.41,
|
16842 |
+
"learning_rate": 0.006945558470127292,
|
16843 |
+
"loss": 8.0569,
|
16844 |
+
"step": 561200
|
16845 |
+
},
|
16846 |
+
{
|
16847 |
+
"epoch": 2.41,
|
16848 |
+
"learning_rate": 0.006941423405177537,
|
16849 |
+
"loss": 8.0389,
|
16850 |
+
"step": 561400
|
16851 |
+
},
|
16852 |
+
{
|
16853 |
+
"epoch": 2.41,
|
16854 |
+
"learning_rate": 0.0069372867760741225,
|
16855 |
+
"loss": 8.0413,
|
16856 |
+
"step": 561600
|
16857 |
+
},
|
16858 |
+
{
|
16859 |
+
"epoch": 2.41,
|
16860 |
+
"learning_rate": 0.006933148586149858,
|
16861 |
+
"loss": 8.0455,
|
16862 |
+
"step": 561800
|
16863 |
+
},
|
16864 |
+
{
|
16865 |
+
"epoch": 2.42,
|
16866 |
+
"learning_rate": 0.006929008838738809,
|
16867 |
+
"loss": 8.0532,
|
16868 |
+
"step": 562000
|
16869 |
+
},
|
16870 |
+
{
|
16871 |
+
"epoch": 2.42,
|
16872 |
+
"learning_rate": 0.006924867537176294,
|
16873 |
+
"loss": 8.0524,
|
16874 |
+
"step": 562200
|
16875 |
+
},
|
16876 |
+
{
|
16877 |
+
"epoch": 2.42,
|
16878 |
+
"learning_rate": 0.006920724684798886,
|
16879 |
+
"loss": 8.0644,
|
16880 |
+
"step": 562400
|
16881 |
+
},
|
16882 |
+
{
|
16883 |
+
"epoch": 2.42,
|
16884 |
+
"learning_rate": 0.006916580284944404,
|
16885 |
+
"loss": 8.0491,
|
16886 |
+
"step": 562600
|
16887 |
+
},
|
16888 |
+
{
|
16889 |
+
"epoch": 2.42,
|
16890 |
+
"learning_rate": 0.006912434340951918,
|
16891 |
+
"loss": 8.0658,
|
16892 |
+
"step": 562800
|
16893 |
+
},
|
16894 |
+
{
|
16895 |
+
"epoch": 2.42,
|
16896 |
+
"learning_rate": 0.006908286856161741,
|
16897 |
+
"loss": 8.0541,
|
16898 |
+
"step": 563000
|
16899 |
+
},
|
16900 |
+
{
|
16901 |
+
"epoch": 2.42,
|
16902 |
+
"learning_rate": 0.006904137833915425,
|
16903 |
+
"loss": 8.0429,
|
16904 |
+
"step": 563200
|
16905 |
+
},
|
16906 |
+
{
|
16907 |
+
"epoch": 2.42,
|
16908 |
+
"learning_rate": 0.006900008034148137,
|
16909 |
+
"loss": 8.047,
|
16910 |
+
"step": 563400
|
16911 |
+
},
|
16912 |
+
{
|
16913 |
+
"epoch": 2.42,
|
16914 |
+
"learning_rate": 0.006895855954664682,
|
16915 |
+
"loss": 8.0523,
|
16916 |
+
"step": 563600
|
16917 |
+
},
|
16918 |
+
{
|
16919 |
+
"epoch": 2.42,
|
16920 |
+
"learning_rate": 0.006891702347740443,
|
16921 |
+
"loss": 8.0611,
|
16922 |
+
"step": 563800
|
16923 |
+
},
|
16924 |
+
{
|
16925 |
+
"epoch": 2.42,
|
16926 |
+
"learning_rate": 0.0068875472167219025,
|
16927 |
+
"loss": 8.0624,
|
16928 |
+
"step": 564000
|
16929 |
+
},
|
16930 |
+
{
|
16931 |
+
"epoch": 2.42,
|
16932 |
+
"learning_rate": 0.006883390564956777,
|
16933 |
+
"loss": 8.0618,
|
16934 |
+
"step": 564200
|
16935 |
+
},
|
16936 |
+
{
|
16937 |
+
"epoch": 2.43,
|
16938 |
+
"learning_rate": 0.006879232395794005,
|
16939 |
+
"loss": 8.0637,
|
16940 |
+
"step": 564400
|
16941 |
+
},
|
16942 |
+
{
|
16943 |
+
"epoch": 2.43,
|
16944 |
+
"learning_rate": 0.006875072712583748,
|
16945 |
+
"loss": 8.0495,
|
16946 |
+
"step": 564600
|
16947 |
+
},
|
16948 |
+
{
|
16949 |
+
"epoch": 2.43,
|
16950 |
+
"learning_rate": 0.00687091151867739,
|
16951 |
+
"loss": 8.0603,
|
16952 |
+
"step": 564800
|
16953 |
+
},
|
16954 |
+
{
|
16955 |
+
"epoch": 2.43,
|
16956 |
+
"learning_rate": 0.006866748817427526,
|
16957 |
+
"loss": 8.0579,
|
16958 |
+
"step": 565000
|
16959 |
+
},
|
16960 |
+
{
|
16961 |
+
"epoch": 2.43,
|
16962 |
+
"learning_rate": 0.006862584612187971,
|
16963 |
+
"loss": 8.0629,
|
16964 |
+
"step": 565200
|
16965 |
+
},
|
16966 |
+
{
|
16967 |
+
"epoch": 2.43,
|
16968 |
+
"learning_rate": 0.006858439738570398,
|
16969 |
+
"loss": 8.0601,
|
16970 |
+
"step": 565400
|
16971 |
+
},
|
16972 |
+
{
|
16973 |
+
"epoch": 2.43,
|
16974 |
+
"learning_rate": 0.006854293382593129,
|
16975 |
+
"loss": 8.0478,
|
16976 |
+
"step": 565600
|
16977 |
+
},
|
16978 |
+
{
|
16979 |
+
"epoch": 2.43,
|
16980 |
+
"learning_rate": 0.00685012470044207,
|
16981 |
+
"loss": 8.06,
|
16982 |
+
"step": 565800
|
16983 |
+
},
|
16984 |
+
{
|
16985 |
+
"epoch": 2.43,
|
16986 |
+
"learning_rate": 0.006845954527695071,
|
16987 |
+
"loss": 8.0508,
|
16988 |
+
"step": 566000
|
16989 |
+
},
|
16990 |
+
{
|
16991 |
+
"epoch": 2.43,
|
16992 |
+
"learning_rate": 0.006841782867711967,
|
16993 |
+
"loss": 8.0748,
|
16994 |
+
"step": 566200
|
16995 |
+
},
|
16996 |
+
{
|
16997 |
+
"epoch": 2.43,
|
16998 |
+
"learning_rate": 0.006837609723853784,
|
16999 |
+
"loss": 8.0635,
|
17000 |
+
"step": 566400
|
17001 |
+
},
|
17002 |
+
{
|
17003 |
+
"epoch": 2.43,
|
17004 |
+
"learning_rate": 0.0068334350994827524,
|
17005 |
+
"loss": 8.0627,
|
17006 |
+
"step": 566600
|
17007 |
+
},
|
17008 |
+
{
|
17009 |
+
"epoch": 2.44,
|
17010 |
+
"learning_rate": 0.0068292589979622904,
|
17011 |
+
"loss": 8.0511,
|
17012 |
+
"step": 566800
|
17013 |
+
},
|
17014 |
+
{
|
17015 |
+
"epoch": 2.44,
|
17016 |
+
"learning_rate": 0.006825081422657008,
|
17017 |
+
"loss": 8.0495,
|
17018 |
+
"step": 567000
|
17019 |
+
},
|
17020 |
+
{
|
17021 |
+
"epoch": 2.44,
|
17022 |
+
"learning_rate": 0.0068209023769327005,
|
17023 |
+
"loss": 8.0555,
|
17024 |
+
"step": 567200
|
17025 |
+
},
|
17026 |
+
{
|
17027 |
+
"epoch": 2.44,
|
17028 |
+
"learning_rate": 0.006816721864156354,
|
17029 |
+
"loss": 8.0548,
|
17030 |
+
"step": 567400
|
17031 |
+
},
|
17032 |
+
{
|
17033 |
+
"epoch": 2.44,
|
17034 |
+
"learning_rate": 0.006812539887696127,
|
17035 |
+
"loss": 8.0487,
|
17036 |
+
"step": 567600
|
17037 |
+
},
|
17038 |
+
{
|
17039 |
+
"epoch": 2.44,
|
17040 |
+
"learning_rate": 0.006808356450921365,
|
17041 |
+
"loss": 8.0457,
|
17042 |
+
"step": 567800
|
17043 |
+
},
|
17044 |
+
{
|
17045 |
+
"epoch": 2.44,
|
17046 |
+
"learning_rate": 0.0068041715572025865,
|
17047 |
+
"loss": 8.0417,
|
17048 |
+
"step": 568000
|
17049 |
+
},
|
17050 |
+
{
|
17051 |
+
"epoch": 2.44,
|
17052 |
+
"learning_rate": 0.006799985209911487,
|
17053 |
+
"loss": 8.0564,
|
17054 |
+
"step": 568200
|
17055 |
+
},
|
17056 |
+
{
|
17057 |
+
"epoch": 2.44,
|
17058 |
+
"learning_rate": 0.0067957974124209265,
|
17059 |
+
"loss": 8.0481,
|
17060 |
+
"step": 568400
|
17061 |
+
},
|
17062 |
+
{
|
17063 |
+
"epoch": 2.44,
|
17064 |
+
"learning_rate": 0.0067916081681049425,
|
17065 |
+
"loss": 8.0318,
|
17066 |
+
"step": 568600
|
17067 |
+
},
|
17068 |
+
{
|
17069 |
+
"epoch": 2.44,
|
17070 |
+
"learning_rate": 0.00678741748033873,
|
17071 |
+
"loss": 8.0717,
|
17072 |
+
"step": 568800
|
17073 |
+
},
|
17074 |
+
{
|
17075 |
+
"epoch": 2.45,
|
17076 |
+
"learning_rate": 0.006783225352498653,
|
17077 |
+
"loss": 8.0506,
|
17078 |
+
"step": 569000
|
17079 |
+
},
|
17080 |
+
{
|
17081 |
+
"epoch": 2.45,
|
17082 |
+
"learning_rate": 0.0067790317879622315,
|
17083 |
+
"loss": 8.0453,
|
17084 |
+
"step": 569200
|
17085 |
+
},
|
17086 |
+
{
|
17087 |
+
"epoch": 2.45,
|
17088 |
+
"learning_rate": 0.006774836790108145,
|
17089 |
+
"loss": 8.0478,
|
17090 |
+
"step": 569400
|
17091 |
+
},
|
17092 |
+
{
|
17093 |
+
"epoch": 2.45,
|
17094 |
+
"learning_rate": 0.006770661348006565,
|
17095 |
+
"loss": 8.0587,
|
17096 |
+
"step": 569600
|
17097 |
+
},
|
17098 |
+
{
|
17099 |
+
"epoch": 2.45,
|
17100 |
+
"learning_rate": 0.006766463500782177,
|
17101 |
+
"loss": 8.0524,
|
17102 |
+
"step": 569800
|
17103 |
+
},
|
17104 |
+
{
|
17105 |
+
"epoch": 2.45,
|
17106 |
+
"learning_rate": 0.006762285230252838,
|
17107 |
+
"loss": 8.0655,
|
17108 |
+
"step": 570000
|
17109 |
}
|
17110 |
],
|
17111 |
"max_steps": 1000000,
|
17112 |
"num_train_epochs": 5,
|
17113 |
+
"total_flos": 9.084816952573256e+17,
|
17114 |
"trial_name": null,
|
17115 |
"trial_params": null
|
17116 |
}
|
pytorch_model.bin
CHANGED
@@ -1,3 +1,3 @@
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
-
oid sha256:
|
3 |
size 146774203
|
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:fc99a917e1b327405a8f3c276c96d3252b44e706de05260c86fdfb67a8ea2ba1
|
3 |
size 146774203
|