Training in progress, step 81000, checkpoint
Browse files- last-checkpoint/optimizer.pt +1 -1
- last-checkpoint/pytorch_model.bin +1 -1
- last-checkpoint/rng_state_0.pth +1 -1
- last-checkpoint/rng_state_1.pth +1 -1
- last-checkpoint/rng_state_2.pth +1 -1
- last-checkpoint/rng_state_3.pth +1 -1
- last-checkpoint/scheduler.pt +1 -1
- last-checkpoint/trainer_state.json +353 -3
last-checkpoint/optimizer.pt
CHANGED
|
@@ -1,3 +1,3 @@
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
-
oid sha256:
|
| 3 |
size 487156538
|
|
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:136551a11d925c9b7e277363afe91252d3c90b8dc5e5c43289e61107e3d62773
|
| 3 |
size 487156538
|
last-checkpoint/pytorch_model.bin
CHANGED
|
@@ -1,3 +1,3 @@
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
-
oid sha256:
|
| 3 |
size 1059459406
|
|
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:1982d8643a6b6bd91ce00559255789edb52f59cd150cda1ba9a3365e1689ee9f
|
| 3 |
size 1059459406
|
last-checkpoint/rng_state_0.pth
CHANGED
|
@@ -1,3 +1,3 @@
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
-
oid sha256:
|
| 3 |
size 14960
|
|
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:53e04a561bdac90ef1f6c25ff5574afc68c7428cdda288bd54c70b9fc50dd7f9
|
| 3 |
size 14960
|
last-checkpoint/rng_state_1.pth
CHANGED
|
@@ -1,3 +1,3 @@
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
-
oid sha256:
|
| 3 |
size 14960
|
|
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:7561f948920604751f7cf826d2cf58f5e293444c817431c5d2aa2bead82cc641
|
| 3 |
size 14960
|
last-checkpoint/rng_state_2.pth
CHANGED
|
@@ -1,3 +1,3 @@
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
-
oid sha256:
|
| 3 |
size 14960
|
|
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:ae0860552be41b41d133150b6b246cf17216ad8cec8467e427463a8701a5e9f2
|
| 3 |
size 14960
|
last-checkpoint/rng_state_3.pth
CHANGED
|
@@ -1,3 +1,3 @@
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
-
oid sha256:
|
| 3 |
size 14960
|
|
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:2910f8dc686565600a07229cef4e507c0697eae3a7e9385ae4e913b2a8f189a5
|
| 3 |
size 14960
|
last-checkpoint/scheduler.pt
CHANGED
|
@@ -1,3 +1,3 @@
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
-
oid sha256:
|
| 3 |
size 1064
|
|
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:38edd545bc4f01de3f608883af1908fbe14efdd33931e3adde347eb4fa00e55f
|
| 3 |
size 1064
|
last-checkpoint/trainer_state.json
CHANGED
|
@@ -2,9 +2,9 @@
|
|
| 2 |
"best_global_step": null,
|
| 3 |
"best_metric": null,
|
| 4 |
"best_model_checkpoint": null,
|
| 5 |
-
"epoch": 0.
|
| 6 |
"eval_steps": 500,
|
| 7 |
-
"global_step":
|
| 8 |
"is_hyper_param_search": false,
|
| 9 |
"is_local_process_zero": true,
|
| 10 |
"is_world_process_zero": true,
|
|
@@ -28008,6 +28008,356 @@
|
|
| 28008 |
"learning_rate": 0.0004803679802916738,
|
| 28009 |
"loss": 16.5126,
|
| 28010 |
"step": 80000
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 28011 |
}
|
| 28012 |
],
|
| 28013 |
"logging_steps": 20,
|
|
@@ -28027,7 +28377,7 @@
|
|
| 28027 |
"attributes": {}
|
| 28028 |
}
|
| 28029 |
},
|
| 28030 |
-
"total_flos": 1.
|
| 28031 |
"train_batch_size": 48,
|
| 28032 |
"trial_name": null,
|
| 28033 |
"trial_params": null
|
|
|
|
| 2 |
"best_global_step": null,
|
| 3 |
"best_metric": null,
|
| 4 |
"best_model_checkpoint": null,
|
| 5 |
+
"epoch": 0.11998649040996866,
|
| 6 |
"eval_steps": 500,
|
| 7 |
+
"global_step": 81000,
|
| 8 |
"is_hyper_param_search": false,
|
| 9 |
"is_local_process_zero": true,
|
| 10 |
"is_world_process_zero": true,
|
|
|
|
| 28008 |
"learning_rate": 0.0004803679802916738,
|
| 28009 |
"loss": 16.5126,
|
| 28010 |
"step": 80000
|
| 28011 |
+
},
|
| 28012 |
+
{
|
| 28013 |
+
"epoch": 0.11853480200747768,
|
| 28014 |
+
"grad_norm": 7.59375,
|
| 28015 |
+
"learning_rate": 0.0004803630413566662,
|
| 28016 |
+
"loss": 16.5302,
|
| 28017 |
+
"step": 80020
|
| 28018 |
+
},
|
| 28019 |
+
{
|
| 28020 |
+
"epoch": 0.11856442830140607,
|
| 28021 |
+
"grad_norm": 7.0625,
|
| 28022 |
+
"learning_rate": 0.0004803581024216586,
|
| 28023 |
+
"loss": 16.4854,
|
| 28024 |
+
"step": 80040
|
| 28025 |
+
},
|
| 28026 |
+
{
|
| 28027 |
+
"epoch": 0.11859405459533445,
|
| 28028 |
+
"grad_norm": 6.90625,
|
| 28029 |
+
"learning_rate": 0.000480353163486651,
|
| 28030 |
+
"loss": 16.4548,
|
| 28031 |
+
"step": 80060
|
| 28032 |
+
},
|
| 28033 |
+
{
|
| 28034 |
+
"epoch": 0.11862368088926284,
|
| 28035 |
+
"grad_norm": 7.03125,
|
| 28036 |
+
"learning_rate": 0.0004803482245516435,
|
| 28037 |
+
"loss": 16.5227,
|
| 28038 |
+
"step": 80080
|
| 28039 |
+
},
|
| 28040 |
+
{
|
| 28041 |
+
"epoch": 0.11865330718319123,
|
| 28042 |
+
"grad_norm": 8.5625,
|
| 28043 |
+
"learning_rate": 0.0004803432856166359,
|
| 28044 |
+
"loss": 16.4541,
|
| 28045 |
+
"step": 80100
|
| 28046 |
+
},
|
| 28047 |
+
{
|
| 28048 |
+
"epoch": 0.11868293347711961,
|
| 28049 |
+
"grad_norm": 6.90625,
|
| 28050 |
+
"learning_rate": 0.00048033834668162836,
|
| 28051 |
+
"loss": 16.4769,
|
| 28052 |
+
"step": 80120
|
| 28053 |
+
},
|
| 28054 |
+
{
|
| 28055 |
+
"epoch": 0.118712559771048,
|
| 28056 |
+
"grad_norm": 8.3125,
|
| 28057 |
+
"learning_rate": 0.00048033340774662075,
|
| 28058 |
+
"loss": 16.433,
|
| 28059 |
+
"step": 80140
|
| 28060 |
+
},
|
| 28061 |
+
{
|
| 28062 |
+
"epoch": 0.11874218606497638,
|
| 28063 |
+
"grad_norm": 10.75,
|
| 28064 |
+
"learning_rate": 0.00048032846881161325,
|
| 28065 |
+
"loss": 16.4778,
|
| 28066 |
+
"step": 80160
|
| 28067 |
+
},
|
| 28068 |
+
{
|
| 28069 |
+
"epoch": 0.11877181235890477,
|
| 28070 |
+
"grad_norm": 8.5,
|
| 28071 |
+
"learning_rate": 0.00048032352987660565,
|
| 28072 |
+
"loss": 16.458,
|
| 28073 |
+
"step": 80180
|
| 28074 |
+
},
|
| 28075 |
+
{
|
| 28076 |
+
"epoch": 0.11880143865283316,
|
| 28077 |
+
"grad_norm": 5.875,
|
| 28078 |
+
"learning_rate": 0.0004803185909415981,
|
| 28079 |
+
"loss": 16.4083,
|
| 28080 |
+
"step": 80200
|
| 28081 |
+
},
|
| 28082 |
+
{
|
| 28083 |
+
"epoch": 0.11883106494676154,
|
| 28084 |
+
"grad_norm": 7.96875,
|
| 28085 |
+
"learning_rate": 0.00048031365200659054,
|
| 28086 |
+
"loss": 16.5132,
|
| 28087 |
+
"step": 80220
|
| 28088 |
+
},
|
| 28089 |
+
{
|
| 28090 |
+
"epoch": 0.11886069124068993,
|
| 28091 |
+
"grad_norm": 7.21875,
|
| 28092 |
+
"learning_rate": 0.000480308713071583,
|
| 28093 |
+
"loss": 16.4829,
|
| 28094 |
+
"step": 80240
|
| 28095 |
+
},
|
| 28096 |
+
{
|
| 28097 |
+
"epoch": 0.11889031753461833,
|
| 28098 |
+
"grad_norm": 8.3125,
|
| 28099 |
+
"learning_rate": 0.0004803037741365754,
|
| 28100 |
+
"loss": 16.4883,
|
| 28101 |
+
"step": 80260
|
| 28102 |
+
},
|
| 28103 |
+
{
|
| 28104 |
+
"epoch": 0.11891994382854672,
|
| 28105 |
+
"grad_norm": 8.3125,
|
| 28106 |
+
"learning_rate": 0.00048029883520156783,
|
| 28107 |
+
"loss": 16.4839,
|
| 28108 |
+
"step": 80280
|
| 28109 |
+
},
|
| 28110 |
+
{
|
| 28111 |
+
"epoch": 0.1189495701224751,
|
| 28112 |
+
"grad_norm": 16.5,
|
| 28113 |
+
"learning_rate": 0.0004802938962665603,
|
| 28114 |
+
"loss": 16.474,
|
| 28115 |
+
"step": 80300
|
| 28116 |
+
},
|
| 28117 |
+
{
|
| 28118 |
+
"epoch": 0.11897919641640349,
|
| 28119 |
+
"grad_norm": 8.5,
|
| 28120 |
+
"learning_rate": 0.0004802889573315527,
|
| 28121 |
+
"loss": 16.5142,
|
| 28122 |
+
"step": 80320
|
| 28123 |
+
},
|
| 28124 |
+
{
|
| 28125 |
+
"epoch": 0.11900882271033188,
|
| 28126 |
+
"grad_norm": 6.71875,
|
| 28127 |
+
"learning_rate": 0.0004802840183965451,
|
| 28128 |
+
"loss": 16.474,
|
| 28129 |
+
"step": 80340
|
| 28130 |
+
},
|
| 28131 |
+
{
|
| 28132 |
+
"epoch": 0.11903844900426026,
|
| 28133 |
+
"grad_norm": 6.9375,
|
| 28134 |
+
"learning_rate": 0.0004802790794615375,
|
| 28135 |
+
"loss": 16.4679,
|
| 28136 |
+
"step": 80360
|
| 28137 |
+
},
|
| 28138 |
+
{
|
| 28139 |
+
"epoch": 0.11906807529818865,
|
| 28140 |
+
"grad_norm": 11.25,
|
| 28141 |
+
"learning_rate": 0.00048027414052653,
|
| 28142 |
+
"loss": 16.4848,
|
| 28143 |
+
"step": 80380
|
| 28144 |
+
},
|
| 28145 |
+
{
|
| 28146 |
+
"epoch": 0.11909770159211704,
|
| 28147 |
+
"grad_norm": 6.75,
|
| 28148 |
+
"learning_rate": 0.0004802692015915224,
|
| 28149 |
+
"loss": 16.4804,
|
| 28150 |
+
"step": 80400
|
| 28151 |
+
},
|
| 28152 |
+
{
|
| 28153 |
+
"epoch": 0.11912732788604542,
|
| 28154 |
+
"grad_norm": 9.625,
|
| 28155 |
+
"learning_rate": 0.00048026426265651486,
|
| 28156 |
+
"loss": 16.4446,
|
| 28157 |
+
"step": 80420
|
| 28158 |
+
},
|
| 28159 |
+
{
|
| 28160 |
+
"epoch": 0.11915695417997381,
|
| 28161 |
+
"grad_norm": 7.65625,
|
| 28162 |
+
"learning_rate": 0.00048025932372150725,
|
| 28163 |
+
"loss": 16.4631,
|
| 28164 |
+
"step": 80440
|
| 28165 |
+
},
|
| 28166 |
+
{
|
| 28167 |
+
"epoch": 0.1191865804739022,
|
| 28168 |
+
"grad_norm": 7.5,
|
| 28169 |
+
"learning_rate": 0.00048025438478649975,
|
| 28170 |
+
"loss": 16.4251,
|
| 28171 |
+
"step": 80460
|
| 28172 |
+
},
|
| 28173 |
+
{
|
| 28174 |
+
"epoch": 0.11921620676783058,
|
| 28175 |
+
"grad_norm": 6.5,
|
| 28176 |
+
"learning_rate": 0.00048024944585149215,
|
| 28177 |
+
"loss": 16.552,
|
| 28178 |
+
"step": 80480
|
| 28179 |
+
},
|
| 28180 |
+
{
|
| 28181 |
+
"epoch": 0.11924583306175897,
|
| 28182 |
+
"grad_norm": 7.40625,
|
| 28183 |
+
"learning_rate": 0.0004802445069164846,
|
| 28184 |
+
"loss": 16.4699,
|
| 28185 |
+
"step": 80500
|
| 28186 |
+
},
|
| 28187 |
+
{
|
| 28188 |
+
"epoch": 0.11927545935568735,
|
| 28189 |
+
"grad_norm": 11.1875,
|
| 28190 |
+
"learning_rate": 0.00048023956798147704,
|
| 28191 |
+
"loss": 16.4617,
|
| 28192 |
+
"step": 80520
|
| 28193 |
+
},
|
| 28194 |
+
{
|
| 28195 |
+
"epoch": 0.11930508564961574,
|
| 28196 |
+
"grad_norm": 8.0625,
|
| 28197 |
+
"learning_rate": 0.0004802346290464695,
|
| 28198 |
+
"loss": 16.4773,
|
| 28199 |
+
"step": 80540
|
| 28200 |
+
},
|
| 28201 |
+
{
|
| 28202 |
+
"epoch": 0.11933471194354413,
|
| 28203 |
+
"grad_norm": 7.53125,
|
| 28204 |
+
"learning_rate": 0.0004802296901114619,
|
| 28205 |
+
"loss": 16.4283,
|
| 28206 |
+
"step": 80560
|
| 28207 |
+
},
|
| 28208 |
+
{
|
| 28209 |
+
"epoch": 0.11936433823747253,
|
| 28210 |
+
"grad_norm": 7.40625,
|
| 28211 |
+
"learning_rate": 0.00048022475117645433,
|
| 28212 |
+
"loss": 16.5017,
|
| 28213 |
+
"step": 80580
|
| 28214 |
+
},
|
| 28215 |
+
{
|
| 28216 |
+
"epoch": 0.11939396453140091,
|
| 28217 |
+
"grad_norm": 7.9375,
|
| 28218 |
+
"learning_rate": 0.0004802198122414468,
|
| 28219 |
+
"loss": 16.4556,
|
| 28220 |
+
"step": 80600
|
| 28221 |
+
},
|
| 28222 |
+
{
|
| 28223 |
+
"epoch": 0.1194235908253293,
|
| 28224 |
+
"grad_norm": 8.5,
|
| 28225 |
+
"learning_rate": 0.00048021487330643923,
|
| 28226 |
+
"loss": 16.448,
|
| 28227 |
+
"step": 80620
|
| 28228 |
+
},
|
| 28229 |
+
{
|
| 28230 |
+
"epoch": 0.11945321711925769,
|
| 28231 |
+
"grad_norm": 8.1875,
|
| 28232 |
+
"learning_rate": 0.0004802099343714316,
|
| 28233 |
+
"loss": 16.4593,
|
| 28234 |
+
"step": 80640
|
| 28235 |
+
},
|
| 28236 |
+
{
|
| 28237 |
+
"epoch": 0.11948284341318607,
|
| 28238 |
+
"grad_norm": 8.4375,
|
| 28239 |
+
"learning_rate": 0.000480204995436424,
|
| 28240 |
+
"loss": 16.5178,
|
| 28241 |
+
"step": 80660
|
| 28242 |
+
},
|
| 28243 |
+
{
|
| 28244 |
+
"epoch": 0.11951246970711446,
|
| 28245 |
+
"grad_norm": 6.59375,
|
| 28246 |
+
"learning_rate": 0.0004802000565014165,
|
| 28247 |
+
"loss": 16.4132,
|
| 28248 |
+
"step": 80680
|
| 28249 |
+
},
|
| 28250 |
+
{
|
| 28251 |
+
"epoch": 0.11954209600104285,
|
| 28252 |
+
"grad_norm": 7.09375,
|
| 28253 |
+
"learning_rate": 0.0004801951175664089,
|
| 28254 |
+
"loss": 16.4124,
|
| 28255 |
+
"step": 80700
|
| 28256 |
+
},
|
| 28257 |
+
{
|
| 28258 |
+
"epoch": 0.11957172229497123,
|
| 28259 |
+
"grad_norm": 8.1875,
|
| 28260 |
+
"learning_rate": 0.00048019017863140136,
|
| 28261 |
+
"loss": 16.4544,
|
| 28262 |
+
"step": 80720
|
| 28263 |
+
},
|
| 28264 |
+
{
|
| 28265 |
+
"epoch": 0.11960134858889962,
|
| 28266 |
+
"grad_norm": 9.8125,
|
| 28267 |
+
"learning_rate": 0.00048018523969639375,
|
| 28268 |
+
"loss": 16.4505,
|
| 28269 |
+
"step": 80740
|
| 28270 |
+
},
|
| 28271 |
+
{
|
| 28272 |
+
"epoch": 0.119630974882828,
|
| 28273 |
+
"grad_norm": 8.0,
|
| 28274 |
+
"learning_rate": 0.00048018030076138626,
|
| 28275 |
+
"loss": 16.4634,
|
| 28276 |
+
"step": 80760
|
| 28277 |
+
},
|
| 28278 |
+
{
|
| 28279 |
+
"epoch": 0.11966060117675639,
|
| 28280 |
+
"grad_norm": 9.8125,
|
| 28281 |
+
"learning_rate": 0.00048017536182637865,
|
| 28282 |
+
"loss": 16.4973,
|
| 28283 |
+
"step": 80780
|
| 28284 |
+
},
|
| 28285 |
+
{
|
| 28286 |
+
"epoch": 0.11969022747068478,
|
| 28287 |
+
"grad_norm": 7.65625,
|
| 28288 |
+
"learning_rate": 0.0004801704228913711,
|
| 28289 |
+
"loss": 16.4454,
|
| 28290 |
+
"step": 80800
|
| 28291 |
+
},
|
| 28292 |
+
{
|
| 28293 |
+
"epoch": 0.11971985376461317,
|
| 28294 |
+
"grad_norm": 9.0,
|
| 28295 |
+
"learning_rate": 0.00048016548395636354,
|
| 28296 |
+
"loss": 16.3708,
|
| 28297 |
+
"step": 80820
|
| 28298 |
+
},
|
| 28299 |
+
{
|
| 28300 |
+
"epoch": 0.11974948005854155,
|
| 28301 |
+
"grad_norm": 7.4375,
|
| 28302 |
+
"learning_rate": 0.000480160545021356,
|
| 28303 |
+
"loss": 16.4656,
|
| 28304 |
+
"step": 80840
|
| 28305 |
+
},
|
| 28306 |
+
{
|
| 28307 |
+
"epoch": 0.11977910635246994,
|
| 28308 |
+
"grad_norm": 7.9375,
|
| 28309 |
+
"learning_rate": 0.0004801556060863484,
|
| 28310 |
+
"loss": 16.4999,
|
| 28311 |
+
"step": 80860
|
| 28312 |
+
},
|
| 28313 |
+
{
|
| 28314 |
+
"epoch": 0.11980873264639832,
|
| 28315 |
+
"grad_norm": 8.125,
|
| 28316 |
+
"learning_rate": 0.00048015066715134083,
|
| 28317 |
+
"loss": 16.3914,
|
| 28318 |
+
"step": 80880
|
| 28319 |
+
},
|
| 28320 |
+
{
|
| 28321 |
+
"epoch": 0.11983835894032673,
|
| 28322 |
+
"grad_norm": 8.5,
|
| 28323 |
+
"learning_rate": 0.0004801457282163333,
|
| 28324 |
+
"loss": 16.4897,
|
| 28325 |
+
"step": 80900
|
| 28326 |
+
},
|
| 28327 |
+
{
|
| 28328 |
+
"epoch": 0.11986798523425511,
|
| 28329 |
+
"grad_norm": 8.4375,
|
| 28330 |
+
"learning_rate": 0.00048014078928132573,
|
| 28331 |
+
"loss": 16.4603,
|
| 28332 |
+
"step": 80920
|
| 28333 |
+
},
|
| 28334 |
+
{
|
| 28335 |
+
"epoch": 0.1198976115281835,
|
| 28336 |
+
"grad_norm": 9.3125,
|
| 28337 |
+
"learning_rate": 0.0004801358503463181,
|
| 28338 |
+
"loss": 16.4485,
|
| 28339 |
+
"step": 80940
|
| 28340 |
+
},
|
| 28341 |
+
{
|
| 28342 |
+
"epoch": 0.11992723782211188,
|
| 28343 |
+
"grad_norm": 7.625,
|
| 28344 |
+
"learning_rate": 0.00048013091141131057,
|
| 28345 |
+
"loss": 16.4565,
|
| 28346 |
+
"step": 80960
|
| 28347 |
+
},
|
| 28348 |
+
{
|
| 28349 |
+
"epoch": 0.11995686411604027,
|
| 28350 |
+
"grad_norm": 9.9375,
|
| 28351 |
+
"learning_rate": 0.000480125972476303,
|
| 28352 |
+
"loss": 16.4224,
|
| 28353 |
+
"step": 80980
|
| 28354 |
+
},
|
| 28355 |
+
{
|
| 28356 |
+
"epoch": 0.11998649040996866,
|
| 28357 |
+
"grad_norm": 13.0625,
|
| 28358 |
+
"learning_rate": 0.0004801210335412954,
|
| 28359 |
+
"loss": 16.473,
|
| 28360 |
+
"step": 81000
|
| 28361 |
}
|
| 28362 |
],
|
| 28363 |
"logging_steps": 20,
|
|
|
|
| 28377 |
"attributes": {}
|
| 28378 |
}
|
| 28379 |
},
|
| 28380 |
+
"total_flos": 1.802706127351395e+20,
|
| 28381 |
"train_batch_size": 48,
|
| 28382 |
"trial_name": null,
|
| 28383 |
"trial_params": null
|