Training in progress, step 7324, checkpoint
Browse files
last-checkpoint/adapter_model.safetensors
CHANGED
@@ -1,3 +1,3 @@
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
-
oid sha256:
|
3 |
size 1623800
|
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:c593d71f7557076e37e0b4b9917c8094c81179d69d7d4e534db17f9dedbe2e92
|
3 |
size 1623800
|
last-checkpoint/optimizer.pt
CHANGED
@@ -1,3 +1,3 @@
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
-
oid sha256:
|
3 |
size 3255543
|
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:7641b338727bb3540ed834aa0894224334616dfda3481784c4699ee547c005c7
|
3 |
size 3255543
|
last-checkpoint/rng_state.pth
CHANGED
@@ -1,3 +1,3 @@
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
-
oid sha256:
|
3 |
size 14244
|
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:db01e641880e948c7c9c41c9c379ab112d117b55ad5f878dd649990157c93f3a
|
3 |
size 14244
|
last-checkpoint/scheduler.pt
CHANGED
@@ -1,3 +1,3 @@
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
-
oid sha256:
|
3 |
size 1064
|
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:108c408d30709d05ff4c72fd6b80731b42d0c024721a42b380443f36c1af49ff
|
3 |
size 1064
|
last-checkpoint/trainer_state.json
CHANGED
@@ -1,9 +1,9 @@
|
|
1 |
{
|
2 |
"best_metric": null,
|
3 |
"best_model_checkpoint": null,
|
4 |
-
"epoch": 0.
|
5 |
"eval_steps": 500,
|
6 |
-
"global_step":
|
7 |
"is_hyper_param_search": false,
|
8 |
"is_local_process_zero": true,
|
9 |
"is_world_process_zero": true,
|
@@ -50197,6 +50197,1084 @@
|
|
50197 |
"learning_rate": 1.1208910173183817e-07,
|
50198 |
"loss": 46.0056,
|
50199 |
"step": 7170
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
50200 |
}
|
50201 |
],
|
50202 |
"logging_steps": 1,
|
@@ -50211,12 +51289,12 @@
|
|
50211 |
"should_evaluate": false,
|
50212 |
"should_log": false,
|
50213 |
"should_save": true,
|
50214 |
-
"should_training_stop":
|
50215 |
},
|
50216 |
"attributes": {}
|
50217 |
}
|
50218 |
},
|
50219 |
-
"total_flos":
|
50220 |
"train_batch_size": 4,
|
50221 |
"trial_name": null,
|
50222 |
"trial_params": null
|
|
|
1 |
{
|
2 |
"best_metric": null,
|
3 |
"best_model_checkpoint": null,
|
4 |
+
"epoch": 0.9999317359546727,
|
5 |
"eval_steps": 500,
|
6 |
+
"global_step": 7324,
|
7 |
"is_hyper_param_search": false,
|
8 |
"is_local_process_zero": true,
|
9 |
"is_world_process_zero": true,
|
|
|
50197 |
"learning_rate": 1.1208910173183817e-07,
|
50198 |
"loss": 46.0056,
|
50199 |
"step": 7170
|
50200 |
+
},
|
50201 |
+
{
|
50202 |
+
"epoch": 0.9790429380845109,
|
50203 |
+
"grad_norm": 0.08908099681138992,
|
50204 |
+
"learning_rate": 1.1063866077125618e-07,
|
50205 |
+
"loss": 46.0037,
|
50206 |
+
"step": 7171
|
50207 |
+
},
|
50208 |
+
{
|
50209 |
+
"epoch": 0.9791794661751655,
|
50210 |
+
"grad_norm": 0.06807133555412292,
|
50211 |
+
"learning_rate": 1.0919765503453195e-07,
|
50212 |
+
"loss": 46.0035,
|
50213 |
+
"step": 7172
|
50214 |
+
},
|
50215 |
+
{
|
50216 |
+
"epoch": 0.9793159942658202,
|
50217 |
+
"grad_norm": 0.08383559435606003,
|
50218 |
+
"learning_rate": 1.0776608479418082e-07,
|
50219 |
+
"loss": 46.0052,
|
50220 |
+
"step": 7173
|
50221 |
+
},
|
50222 |
+
{
|
50223 |
+
"epoch": 0.9794525223564748,
|
50224 |
+
"grad_norm": 0.07002658396959305,
|
50225 |
+
"learning_rate": 1.063439503209529e-07,
|
50226 |
+
"loss": 46.0028,
|
50227 |
+
"step": 7174
|
50228 |
+
},
|
50229 |
+
{
|
50230 |
+
"epoch": 0.9795890504471295,
|
50231 |
+
"grad_norm": 0.0548606738448143,
|
50232 |
+
"learning_rate": 1.0493125188379971e-07,
|
50233 |
+
"loss": 46.0023,
|
50234 |
+
"step": 7175
|
50235 |
+
},
|
50236 |
+
{
|
50237 |
+
"epoch": 0.9797255785377842,
|
50238 |
+
"grad_norm": 0.16602098941802979,
|
50239 |
+
"learning_rate": 1.0352798974990752e-07,
|
50240 |
+
"loss": 46.0117,
|
50241 |
+
"step": 7176
|
50242 |
+
},
|
50243 |
+
{
|
50244 |
+
"epoch": 0.9798621066284388,
|
50245 |
+
"grad_norm": 0.10093922168016434,
|
50246 |
+
"learning_rate": 1.0213416418465294e-07,
|
50247 |
+
"loss": 46.0034,
|
50248 |
+
"step": 7177
|
50249 |
+
},
|
50250 |
+
{
|
50251 |
+
"epoch": 0.9799986347190934,
|
50252 |
+
"grad_norm": 0.08100040256977081,
|
50253 |
+
"learning_rate": 1.0074977545164177e-07,
|
50254 |
+
"loss": 46.011,
|
50255 |
+
"step": 7178
|
50256 |
+
},
|
50257 |
+
{
|
50258 |
+
"epoch": 0.9801351628097481,
|
50259 |
+
"grad_norm": 0.046713173389434814,
|
50260 |
+
"learning_rate": 9.937482381270346e-08,
|
50261 |
+
"loss": 46.0018,
|
50262 |
+
"step": 7179
|
50263 |
+
},
|
50264 |
+
{
|
50265 |
+
"epoch": 0.9802716909004028,
|
50266 |
+
"grad_norm": 0.06428004056215286,
|
50267 |
+
"learning_rate": 9.800930952786336e-08,
|
50268 |
+
"loss": 46.0027,
|
50269 |
+
"step": 7180
|
50270 |
+
},
|
50271 |
+
{
|
50272 |
+
"epoch": 0.9804082189910575,
|
50273 |
+
"grad_norm": 0.09634747356176376,
|
50274 |
+
"learning_rate": 9.665323285537598e-08,
|
50275 |
+
"loss": 46.0029,
|
50276 |
+
"step": 7181
|
50277 |
+
},
|
50278 |
+
{
|
50279 |
+
"epoch": 0.980544747081712,
|
50280 |
+
"grad_norm": 0.10261856019496918,
|
50281 |
+
"learning_rate": 9.530659405169728e-08,
|
50282 |
+
"loss": 46.0024,
|
50283 |
+
"step": 7182
|
50284 |
+
},
|
50285 |
+
{
|
50286 |
+
"epoch": 0.9806812751723667,
|
50287 |
+
"grad_norm": 0.08636260032653809,
|
50288 |
+
"learning_rate": 9.396939337152355e-08,
|
50289 |
+
"loss": 46.016,
|
50290 |
+
"step": 7183
|
50291 |
+
},
|
50292 |
+
{
|
50293 |
+
"epoch": 0.9808178032630214,
|
50294 |
+
"grad_norm": 0.0685448870062828,
|
50295 |
+
"learning_rate": 9.264163106774137e-08,
|
50296 |
+
"loss": 46.0086,
|
50297 |
+
"step": 7184
|
50298 |
+
},
|
50299 |
+
{
|
50300 |
+
"epoch": 0.980954331353676,
|
50301 |
+
"grad_norm": 0.10635057091712952,
|
50302 |
+
"learning_rate": 9.132330739145545e-08,
|
50303 |
+
"loss": 46.0068,
|
50304 |
+
"step": 7185
|
50305 |
+
},
|
50306 |
+
{
|
50307 |
+
"epoch": 0.9810908594443307,
|
50308 |
+
"grad_norm": 0.04230385273694992,
|
50309 |
+
"learning_rate": 9.001442259200521e-08,
|
50310 |
+
"loss": 46.0053,
|
50311 |
+
"step": 7186
|
50312 |
+
},
|
50313 |
+
{
|
50314 |
+
"epoch": 0.9812273875349853,
|
50315 |
+
"grad_norm": 0.07328132539987564,
|
50316 |
+
"learning_rate": 8.871497691691489e-08,
|
50317 |
+
"loss": 46.0056,
|
50318 |
+
"step": 7187
|
50319 |
+
},
|
50320 |
+
{
|
50321 |
+
"epoch": 0.98136391562564,
|
50322 |
+
"grad_norm": 0.09978116303682327,
|
50323 |
+
"learning_rate": 8.742497061195454e-08,
|
50324 |
+
"loss": 46.0054,
|
50325 |
+
"step": 7188
|
50326 |
+
},
|
50327 |
+
{
|
50328 |
+
"epoch": 0.9815004437162946,
|
50329 |
+
"grad_norm": 0.05884576961398125,
|
50330 |
+
"learning_rate": 8.614440392108458e-08,
|
50331 |
+
"loss": 46.0009,
|
50332 |
+
"step": 7189
|
50333 |
+
},
|
50334 |
+
{
|
50335 |
+
"epoch": 0.9816369718069493,
|
50336 |
+
"grad_norm": 0.03834863379597664,
|
50337 |
+
"learning_rate": 8.487327708648907e-08,
|
50338 |
+
"loss": 46.0009,
|
50339 |
+
"step": 7190
|
50340 |
+
},
|
50341 |
+
{
|
50342 |
+
"epoch": 0.981773499897604,
|
50343 |
+
"grad_norm": 0.03978651016950607,
|
50344 |
+
"learning_rate": 8.361159034857569e-08,
|
50345 |
+
"loss": 46.0047,
|
50346 |
+
"step": 7191
|
50347 |
+
},
|
50348 |
+
{
|
50349 |
+
"epoch": 0.9819100279882585,
|
50350 |
+
"grad_norm": 0.07296937704086304,
|
50351 |
+
"learning_rate": 8.235934394594802e-08,
|
50352 |
+
"loss": 46.0005,
|
50353 |
+
"step": 7192
|
50354 |
+
},
|
50355 |
+
{
|
50356 |
+
"epoch": 0.9820465560789132,
|
50357 |
+
"grad_norm": 0.03498697653412819,
|
50358 |
+
"learning_rate": 8.11165381154444e-08,
|
50359 |
+
"loss": 46.0101,
|
50360 |
+
"step": 7193
|
50361 |
+
},
|
50362 |
+
{
|
50363 |
+
"epoch": 0.9821830841695679,
|
50364 |
+
"grad_norm": 0.038850992918014526,
|
50365 |
+
"learning_rate": 7.988317309209902e-08,
|
50366 |
+
"loss": 46.0043,
|
50367 |
+
"step": 7194
|
50368 |
+
},
|
50369 |
+
{
|
50370 |
+
"epoch": 0.9823196122602226,
|
50371 |
+
"grad_norm": 0.10181787610054016,
|
50372 |
+
"learning_rate": 7.865924910916977e-08,
|
50373 |
+
"loss": 46.001,
|
50374 |
+
"step": 7195
|
50375 |
+
},
|
50376 |
+
{
|
50377 |
+
"epoch": 0.9824561403508771,
|
50378 |
+
"grad_norm": 0.4068288803100586,
|
50379 |
+
"learning_rate": 7.744476639813814e-08,
|
50380 |
+
"loss": 46.0063,
|
50381 |
+
"step": 7196
|
50382 |
+
},
|
50383 |
+
{
|
50384 |
+
"epoch": 0.9825926684415318,
|
50385 |
+
"grad_norm": 0.17444883286952972,
|
50386 |
+
"learning_rate": 7.623972518868705e-08,
|
50387 |
+
"loss": 46.006,
|
50388 |
+
"step": 7197
|
50389 |
+
},
|
50390 |
+
{
|
50391 |
+
"epoch": 0.9827291965321865,
|
50392 |
+
"grad_norm": 0.11943278461694717,
|
50393 |
+
"learning_rate": 7.5044125708712e-08,
|
50394 |
+
"loss": 46.0,
|
50395 |
+
"step": 7198
|
50396 |
+
},
|
50397 |
+
{
|
50398 |
+
"epoch": 0.9828657246228412,
|
50399 |
+
"grad_norm": 0.13627932965755463,
|
50400 |
+
"learning_rate": 7.38579681843321e-08,
|
50401 |
+
"loss": 46.0006,
|
50402 |
+
"step": 7199
|
50403 |
+
},
|
50404 |
+
{
|
50405 |
+
"epoch": 0.9830022527134958,
|
50406 |
+
"grad_norm": 0.09473912417888641,
|
50407 |
+
"learning_rate": 7.268125283987348e-08,
|
50408 |
+
"loss": 46.0,
|
50409 |
+
"step": 7200
|
50410 |
+
},
|
50411 |
+
{
|
50412 |
+
"epoch": 0.9831387808041504,
|
50413 |
+
"grad_norm": 0.08604719489812851,
|
50414 |
+
"learning_rate": 7.151397989788588e-08,
|
50415 |
+
"loss": 46.0028,
|
50416 |
+
"step": 7201
|
50417 |
+
},
|
50418 |
+
{
|
50419 |
+
"epoch": 0.9832753088948051,
|
50420 |
+
"grad_norm": 0.03411213681101799,
|
50421 |
+
"learning_rate": 7.035614957912606e-08,
|
50422 |
+
"loss": 46.0001,
|
50423 |
+
"step": 7202
|
50424 |
+
},
|
50425 |
+
{
|
50426 |
+
"epoch": 0.9834118369854598,
|
50427 |
+
"grad_norm": 0.039187826216220856,
|
50428 |
+
"learning_rate": 6.92077621025633e-08,
|
50429 |
+
"loss": 46.0053,
|
50430 |
+
"step": 7203
|
50431 |
+
},
|
50432 |
+
{
|
50433 |
+
"epoch": 0.9835483650761144,
|
50434 |
+
"grad_norm": 0.059648871421813965,
|
50435 |
+
"learning_rate": 6.806881768539052e-08,
|
50436 |
+
"loss": 46.0022,
|
50437 |
+
"step": 7204
|
50438 |
+
},
|
50439 |
+
{
|
50440 |
+
"epoch": 0.9836848931667691,
|
50441 |
+
"grad_norm": 0.041936662048101425,
|
50442 |
+
"learning_rate": 6.693931654299657e-08,
|
50443 |
+
"loss": 46.0009,
|
50444 |
+
"step": 7205
|
50445 |
+
},
|
50446 |
+
{
|
50447 |
+
"epoch": 0.9838214212574237,
|
50448 |
+
"grad_norm": 0.08943246304988861,
|
50449 |
+
"learning_rate": 6.581925888900498e-08,
|
50450 |
+
"loss": 46.0064,
|
50451 |
+
"step": 7206
|
50452 |
+
},
|
50453 |
+
{
|
50454 |
+
"epoch": 0.9839579493480783,
|
50455 |
+
"grad_norm": 0.09488219022750854,
|
50456 |
+
"learning_rate": 6.470864493524075e-08,
|
50457 |
+
"loss": 46.0043,
|
50458 |
+
"step": 7207
|
50459 |
+
},
|
50460 |
+
{
|
50461 |
+
"epoch": 0.984094477438733,
|
50462 |
+
"grad_norm": 0.05033031851053238,
|
50463 |
+
"learning_rate": 6.360747489175256e-08,
|
50464 |
+
"loss": 46.0046,
|
50465 |
+
"step": 7208
|
50466 |
+
},
|
50467 |
+
{
|
50468 |
+
"epoch": 0.9842310055293877,
|
50469 |
+
"grad_norm": 0.1277073323726654,
|
50470 |
+
"learning_rate": 6.251574896679046e-08,
|
50471 |
+
"loss": 46.0052,
|
50472 |
+
"step": 7209
|
50473 |
+
},
|
50474 |
+
{
|
50475 |
+
"epoch": 0.9843675336200424,
|
50476 |
+
"grad_norm": 0.05453884229063988,
|
50477 |
+
"learning_rate": 6.14334673668282e-08,
|
50478 |
+
"loss": 46.0048,
|
50479 |
+
"step": 7210
|
50480 |
+
},
|
50481 |
+
{
|
50482 |
+
"epoch": 0.9845040617106969,
|
50483 |
+
"grad_norm": 0.061872418969869614,
|
50484 |
+
"learning_rate": 6.036063029654649e-08,
|
50485 |
+
"loss": 46.0076,
|
50486 |
+
"step": 7211
|
50487 |
+
},
|
50488 |
+
{
|
50489 |
+
"epoch": 0.9846405898013516,
|
50490 |
+
"grad_norm": 0.053107887506484985,
|
50491 |
+
"learning_rate": 5.929723795884967e-08,
|
50492 |
+
"loss": 46.0111,
|
50493 |
+
"step": 7212
|
50494 |
+
},
|
50495 |
+
{
|
50496 |
+
"epoch": 0.9847771178920063,
|
50497 |
+
"grad_norm": 0.0883575901389122,
|
50498 |
+
"learning_rate": 5.8243290554838014e-08,
|
50499 |
+
"loss": 46.01,
|
50500 |
+
"step": 7213
|
50501 |
+
},
|
50502 |
+
{
|
50503 |
+
"epoch": 0.984913645982661,
|
50504 |
+
"grad_norm": 0.10030897706747055,
|
50505 |
+
"learning_rate": 5.7198788283852057e-08,
|
50506 |
+
"loss": 46.0021,
|
50507 |
+
"step": 7214
|
50508 |
+
},
|
50509 |
+
{
|
50510 |
+
"epoch": 0.9850501740733156,
|
50511 |
+
"grad_norm": 0.07091116905212402,
|
50512 |
+
"learning_rate": 5.6163731343422675e-08,
|
50513 |
+
"loss": 46.0035,
|
50514 |
+
"step": 7215
|
50515 |
+
},
|
50516 |
+
{
|
50517 |
+
"epoch": 0.9851867021639702,
|
50518 |
+
"grad_norm": 0.07437314093112946,
|
50519 |
+
"learning_rate": 5.51381199293044e-08,
|
50520 |
+
"loss": 46.0071,
|
50521 |
+
"step": 7216
|
50522 |
+
},
|
50523 |
+
{
|
50524 |
+
"epoch": 0.9853232302546249,
|
50525 |
+
"grad_norm": 0.055116403847932816,
|
50526 |
+
"learning_rate": 5.412195423545874e-08,
|
50527 |
+
"loss": 46.0039,
|
50528 |
+
"step": 7217
|
50529 |
+
},
|
50530 |
+
{
|
50531 |
+
"epoch": 0.9854597583452795,
|
50532 |
+
"grad_norm": 0.10955941677093506,
|
50533 |
+
"learning_rate": 5.31152344540764e-08,
|
50534 |
+
"loss": 46.0068,
|
50535 |
+
"step": 7218
|
50536 |
+
},
|
50537 |
+
{
|
50538 |
+
"epoch": 0.9855962864359342,
|
50539 |
+
"grad_norm": 0.03393147885799408,
|
50540 |
+
"learning_rate": 5.2117960775543986e-08,
|
50541 |
+
"loss": 46.0066,
|
50542 |
+
"step": 7219
|
50543 |
+
},
|
50544 |
+
{
|
50545 |
+
"epoch": 0.9857328145265889,
|
50546 |
+
"grad_norm": 0.04151192307472229,
|
50547 |
+
"learning_rate": 5.1130133388471724e-08,
|
50548 |
+
"loss": 46.0126,
|
50549 |
+
"step": 7220
|
50550 |
+
},
|
50551 |
+
{
|
50552 |
+
"epoch": 0.9858693426172435,
|
50553 |
+
"grad_norm": 0.10087965428829193,
|
50554 |
+
"learning_rate": 5.015175247967685e-08,
|
50555 |
+
"loss": 46.0049,
|
50556 |
+
"step": 7221
|
50557 |
+
},
|
50558 |
+
{
|
50559 |
+
"epoch": 0.9860058707078981,
|
50560 |
+
"grad_norm": 0.07426264137029648,
|
50561 |
+
"learning_rate": 4.9182818234200235e-08,
|
50562 |
+
"loss": 46.0075,
|
50563 |
+
"step": 7222
|
50564 |
+
},
|
50565 |
+
{
|
50566 |
+
"epoch": 0.9861423987985528,
|
50567 |
+
"grad_norm": 0.059685610234737396,
|
50568 |
+
"learning_rate": 4.8223330835284184e-08,
|
50569 |
+
"loss": 46.0048,
|
50570 |
+
"step": 7223
|
50571 |
+
},
|
50572 |
+
{
|
50573 |
+
"epoch": 0.9862789268892075,
|
50574 |
+
"grad_norm": 0.06613564491271973,
|
50575 |
+
"learning_rate": 4.727329046438911e-08,
|
50576 |
+
"loss": 46.002,
|
50577 |
+
"step": 7224
|
50578 |
+
},
|
50579 |
+
{
|
50580 |
+
"epoch": 0.9864154549798622,
|
50581 |
+
"grad_norm": 0.07357048243284225,
|
50582 |
+
"learning_rate": 4.6332697301193496e-08,
|
50583 |
+
"loss": 46.0064,
|
50584 |
+
"step": 7225
|
50585 |
+
},
|
50586 |
+
{
|
50587 |
+
"epoch": 0.9865519830705167,
|
50588 |
+
"grad_norm": 0.19530758261680603,
|
50589 |
+
"learning_rate": 4.540155152358283e-08,
|
50590 |
+
"loss": 46.0062,
|
50591 |
+
"step": 7226
|
50592 |
+
},
|
50593 |
+
{
|
50594 |
+
"epoch": 0.9866885111611714,
|
50595 |
+
"grad_norm": 0.09539202600717545,
|
50596 |
+
"learning_rate": 4.447985330765514e-08,
|
50597 |
+
"loss": 46.0043,
|
50598 |
+
"step": 7227
|
50599 |
+
},
|
50600 |
+
{
|
50601 |
+
"epoch": 0.9868250392518261,
|
50602 |
+
"grad_norm": 0.04313179850578308,
|
50603 |
+
"learning_rate": 4.356760282773209e-08,
|
50604 |
+
"loss": 46.0006,
|
50605 |
+
"step": 7228
|
50606 |
+
},
|
50607 |
+
{
|
50608 |
+
"epoch": 0.9869615673424808,
|
50609 |
+
"grad_norm": 0.04633820801973343,
|
50610 |
+
"learning_rate": 4.266480025633679e-08,
|
50611 |
+
"loss": 46.0093,
|
50612 |
+
"step": 7229
|
50613 |
+
},
|
50614 |
+
{
|
50615 |
+
"epoch": 0.9870980954331353,
|
50616 |
+
"grad_norm": 0.04100847616791725,
|
50617 |
+
"learning_rate": 4.177144576420489e-08,
|
50618 |
+
"loss": 46.0027,
|
50619 |
+
"step": 7230
|
50620 |
+
},
|
50621 |
+
{
|
50622 |
+
"epoch": 0.98723462352379,
|
50623 |
+
"grad_norm": 0.1251312494277954,
|
50624 |
+
"learning_rate": 4.088753952030122e-08,
|
50625 |
+
"loss": 46.0043,
|
50626 |
+
"step": 7231
|
50627 |
+
},
|
50628 |
+
{
|
50629 |
+
"epoch": 0.9873711516144447,
|
50630 |
+
"grad_norm": 0.04459778219461441,
|
50631 |
+
"learning_rate": 4.0013081691786524e-08,
|
50632 |
+
"loss": 46.0116,
|
50633 |
+
"step": 7232
|
50634 |
+
},
|
50635 |
+
{
|
50636 |
+
"epoch": 0.9875076797050993,
|
50637 |
+
"grad_norm": 0.19127972424030304,
|
50638 |
+
"learning_rate": 3.9148072444039616e-08,
|
50639 |
+
"loss": 46.0086,
|
50640 |
+
"step": 7233
|
50641 |
+
},
|
50642 |
+
{
|
50643 |
+
"epoch": 0.987644207795754,
|
50644 |
+
"grad_norm": 0.11693168431520462,
|
50645 |
+
"learning_rate": 3.8292511940657415e-08,
|
50646 |
+
"loss": 46.0036,
|
50647 |
+
"step": 7234
|
50648 |
+
},
|
50649 |
+
{
|
50650 |
+
"epoch": 0.9877807358864086,
|
50651 |
+
"grad_norm": 0.05080500990152359,
|
50652 |
+
"learning_rate": 3.744640034344382e-08,
|
50653 |
+
"loss": 46.0033,
|
50654 |
+
"step": 7235
|
50655 |
+
},
|
50656 |
+
{
|
50657 |
+
"epoch": 0.9879172639770633,
|
50658 |
+
"grad_norm": 0.18874023854732513,
|
50659 |
+
"learning_rate": 3.660973781242083e-08,
|
50660 |
+
"loss": 46.0089,
|
50661 |
+
"step": 7236
|
50662 |
+
},
|
50663 |
+
{
|
50664 |
+
"epoch": 0.9880537920677179,
|
50665 |
+
"grad_norm": 0.06047139689326286,
|
50666 |
+
"learning_rate": 3.5782524505811876e-08,
|
50667 |
+
"loss": 46.009,
|
50668 |
+
"step": 7237
|
50669 |
+
},
|
50670 |
+
{
|
50671 |
+
"epoch": 0.9881903201583726,
|
50672 |
+
"grad_norm": 0.05019622668623924,
|
50673 |
+
"learning_rate": 3.496476058006959e-08,
|
50674 |
+
"loss": 46.0073,
|
50675 |
+
"step": 7238
|
50676 |
+
},
|
50677 |
+
{
|
50678 |
+
"epoch": 0.9883268482490273,
|
50679 |
+
"grad_norm": 0.05337301269173622,
|
50680 |
+
"learning_rate": 3.415644618985359e-08,
|
50681 |
+
"loss": 46.003,
|
50682 |
+
"step": 7239
|
50683 |
+
},
|
50684 |
+
{
|
50685 |
+
"epoch": 0.9884633763396818,
|
50686 |
+
"grad_norm": 0.06359133124351501,
|
50687 |
+
"learning_rate": 3.3357581488030475e-08,
|
50688 |
+
"loss": 46.0037,
|
50689 |
+
"step": 7240
|
50690 |
+
},
|
50691 |
+
{
|
50692 |
+
"epoch": 0.9885999044303365,
|
50693 |
+
"grad_norm": 0.05159619078040123,
|
50694 |
+
"learning_rate": 3.256816662568496e-08,
|
50695 |
+
"loss": 46.011,
|
50696 |
+
"step": 7241
|
50697 |
+
},
|
50698 |
+
{
|
50699 |
+
"epoch": 0.9887364325209912,
|
50700 |
+
"grad_norm": 0.07998304069042206,
|
50701 |
+
"learning_rate": 3.178820175211428e-08,
|
50702 |
+
"loss": 46.0029,
|
50703 |
+
"step": 7242
|
50704 |
+
},
|
50705 |
+
{
|
50706 |
+
"epoch": 0.9888729606116459,
|
50707 |
+
"grad_norm": 0.03504487872123718,
|
50708 |
+
"learning_rate": 3.1017687014828215e-08,
|
50709 |
+
"loss": 46.004,
|
50710 |
+
"step": 7243
|
50711 |
+
},
|
50712 |
+
{
|
50713 |
+
"epoch": 0.9890094887023005,
|
50714 |
+
"grad_norm": 0.0823051929473877,
|
50715 |
+
"learning_rate": 3.0256622559543536e-08,
|
50716 |
+
"loss": 46.0024,
|
50717 |
+
"step": 7244
|
50718 |
+
},
|
50719 |
+
{
|
50720 |
+
"epoch": 0.9891460167929551,
|
50721 |
+
"grad_norm": 0.422367662191391,
|
50722 |
+
"learning_rate": 2.950500853020066e-08,
|
50723 |
+
"loss": 46.0013,
|
50724 |
+
"step": 7245
|
50725 |
+
},
|
50726 |
+
{
|
50727 |
+
"epoch": 0.9892825448836098,
|
50728 |
+
"grad_norm": 0.1806757152080536,
|
50729 |
+
"learning_rate": 2.8762845068941445e-08,
|
50730 |
+
"loss": 46.0088,
|
50731 |
+
"step": 7246
|
50732 |
+
},
|
50733 |
+
{
|
50734 |
+
"epoch": 0.9894190729742645,
|
50735 |
+
"grad_norm": 0.17935673892498016,
|
50736 |
+
"learning_rate": 2.8030132316136938e-08,
|
50737 |
+
"loss": 46.0132,
|
50738 |
+
"step": 7247
|
50739 |
+
},
|
50740 |
+
{
|
50741 |
+
"epoch": 0.9895556010649191,
|
50742 |
+
"grad_norm": 0.2891080975532532,
|
50743 |
+
"learning_rate": 2.730687041034852e-08,
|
50744 |
+
"loss": 46.0057,
|
50745 |
+
"step": 7248
|
50746 |
+
},
|
50747 |
+
{
|
50748 |
+
"epoch": 0.9896921291555738,
|
50749 |
+
"grad_norm": 0.04798796772956848,
|
50750 |
+
"learning_rate": 2.6593059488366766e-08,
|
50751 |
+
"loss": 46.0012,
|
50752 |
+
"step": 7249
|
50753 |
+
},
|
50754 |
+
{
|
50755 |
+
"epoch": 0.9898286572462284,
|
50756 |
+
"grad_norm": 0.425402969121933,
|
50757 |
+
"learning_rate": 2.5888699685189255e-08,
|
50758 |
+
"loss": 46.0015,
|
50759 |
+
"step": 7250
|
50760 |
+
},
|
50761 |
+
{
|
50762 |
+
"epoch": 0.989965185336883,
|
50763 |
+
"grad_norm": 0.1112116277217865,
|
50764 |
+
"learning_rate": 2.519379113402609e-08,
|
50765 |
+
"loss": 46.0062,
|
50766 |
+
"step": 7251
|
50767 |
+
},
|
50768 |
+
{
|
50769 |
+
"epoch": 0.9901017134275377,
|
50770 |
+
"grad_norm": 0.053855422884225845,
|
50771 |
+
"learning_rate": 2.4508333966305473e-08,
|
50772 |
+
"loss": 46.0038,
|
50773 |
+
"step": 7252
|
50774 |
+
},
|
50775 |
+
{
|
50776 |
+
"epoch": 0.9902382415181924,
|
50777 |
+
"grad_norm": 0.03137766197323799,
|
50778 |
+
"learning_rate": 2.3832328311651496e-08,
|
50779 |
+
"loss": 46.006,
|
50780 |
+
"step": 7253
|
50781 |
+
},
|
50782 |
+
{
|
50783 |
+
"epoch": 0.9903747696088471,
|
50784 |
+
"grad_norm": 0.10431533306837082,
|
50785 |
+
"learning_rate": 2.3165774297922992e-08,
|
50786 |
+
"loss": 46.0022,
|
50787 |
+
"step": 7254
|
50788 |
+
},
|
50789 |
+
{
|
50790 |
+
"epoch": 0.9905112976995016,
|
50791 |
+
"grad_norm": 0.04208254814147949,
|
50792 |
+
"learning_rate": 2.2508672051174685e-08,
|
50793 |
+
"loss": 46.0001,
|
50794 |
+
"step": 7255
|
50795 |
+
},
|
50796 |
+
{
|
50797 |
+
"epoch": 0.9906478257901563,
|
50798 |
+
"grad_norm": 0.045661814510822296,
|
50799 |
+
"learning_rate": 2.1861021695684935e-08,
|
50800 |
+
"loss": 46.0032,
|
50801 |
+
"step": 7256
|
50802 |
+
},
|
50803 |
+
{
|
50804 |
+
"epoch": 0.990784353880811,
|
50805 |
+
"grad_norm": 0.030271239578723907,
|
50806 |
+
"learning_rate": 2.122282335393355e-08,
|
50807 |
+
"loss": 46.0025,
|
50808 |
+
"step": 7257
|
50809 |
+
},
|
50810 |
+
{
|
50811 |
+
"epoch": 0.9909208819714657,
|
50812 |
+
"grad_norm": 0.0907672792673111,
|
50813 |
+
"learning_rate": 2.059407714662398e-08,
|
50814 |
+
"loss": 46.0016,
|
50815 |
+
"step": 7258
|
50816 |
+
},
|
50817 |
+
{
|
50818 |
+
"epoch": 0.9910574100621203,
|
50819 |
+
"grad_norm": 0.08112984895706177,
|
50820 |
+
"learning_rate": 1.9974783192661107e-08,
|
50821 |
+
"loss": 46.0013,
|
50822 |
+
"step": 7259
|
50823 |
+
},
|
50824 |
+
{
|
50825 |
+
"epoch": 0.9911939381527749,
|
50826 |
+
"grad_norm": 0.13127990067005157,
|
50827 |
+
"learning_rate": 1.9364941609167907e-08,
|
50828 |
+
"loss": 46.0027,
|
50829 |
+
"step": 7260
|
50830 |
+
},
|
50831 |
+
{
|
50832 |
+
"epoch": 0.9913304662434296,
|
50833 |
+
"grad_norm": 0.07560381293296814,
|
50834 |
+
"learning_rate": 1.8764552511485457e-08,
|
50835 |
+
"loss": 46.0118,
|
50836 |
+
"step": 7261
|
50837 |
+
},
|
50838 |
+
{
|
50839 |
+
"epoch": 0.9914669943340843,
|
50840 |
+
"grad_norm": 0.03843251243233681,
|
50841 |
+
"learning_rate": 1.817361601315626e-08,
|
50842 |
+
"loss": 46.0048,
|
50843 |
+
"step": 7262
|
50844 |
+
},
|
50845 |
+
{
|
50846 |
+
"epoch": 0.9916035224247389,
|
50847 |
+
"grad_norm": 0.03968435525894165,
|
50848 |
+
"learning_rate": 1.7592132225946468e-08,
|
50849 |
+
"loss": 46.0057,
|
50850 |
+
"step": 7263
|
50851 |
+
},
|
50852 |
+
{
|
50853 |
+
"epoch": 0.9917400505153935,
|
50854 |
+
"grad_norm": 0.054471638053655624,
|
50855 |
+
"learning_rate": 1.702010125981812e-08,
|
50856 |
+
"loss": 46.0006,
|
50857 |
+
"step": 7264
|
50858 |
+
},
|
50859 |
+
{
|
50860 |
+
"epoch": 0.9918765786060482,
|
50861 |
+
"grad_norm": 0.1916276067495346,
|
50862 |
+
"learning_rate": 1.6457523222956907e-08,
|
50863 |
+
"loss": 46.0005,
|
50864 |
+
"step": 7265
|
50865 |
+
},
|
50866 |
+
{
|
50867 |
+
"epoch": 0.9920131066967028,
|
50868 |
+
"grad_norm": 0.2849438786506653,
|
50869 |
+
"learning_rate": 1.5904398221766592e-08,
|
50870 |
+
"loss": 46.0049,
|
50871 |
+
"step": 7266
|
50872 |
+
},
|
50873 |
+
{
|
50874 |
+
"epoch": 0.9921496347873575,
|
50875 |
+
"grad_norm": 0.12652307748794556,
|
50876 |
+
"learning_rate": 1.5360726360852397e-08,
|
50877 |
+
"loss": 46.0085,
|
50878 |
+
"step": 7267
|
50879 |
+
},
|
50880 |
+
{
|
50881 |
+
"epoch": 0.9922861628780122,
|
50882 |
+
"grad_norm": 0.11404263973236084,
|
50883 |
+
"learning_rate": 1.482650774303207e-08,
|
50884 |
+
"loss": 46.0012,
|
50885 |
+
"step": 7268
|
50886 |
+
},
|
50887 |
+
{
|
50888 |
+
"epoch": 0.9924226909686668,
|
50889 |
+
"grad_norm": 0.055355679243803024,
|
50890 |
+
"learning_rate": 1.430174246934146e-08,
|
50891 |
+
"loss": 46.0068,
|
50892 |
+
"step": 7269
|
50893 |
+
},
|
50894 |
+
{
|
50895 |
+
"epoch": 0.9925592190593214,
|
50896 |
+
"grad_norm": 0.09978245943784714,
|
50897 |
+
"learning_rate": 1.3786430639023407e-08,
|
50898 |
+
"loss": 46.0037,
|
50899 |
+
"step": 7270
|
50900 |
+
},
|
50901 |
+
{
|
50902 |
+
"epoch": 0.9926957471499761,
|
50903 |
+
"grad_norm": 0.05256585776805878,
|
50904 |
+
"learning_rate": 1.3280572349538834e-08,
|
50905 |
+
"loss": 46.0084,
|
50906 |
+
"step": 7271
|
50907 |
+
},
|
50908 |
+
{
|
50909 |
+
"epoch": 0.9928322752406308,
|
50910 |
+
"grad_norm": 0.1046704575419426,
|
50911 |
+
"learning_rate": 1.278416769655566e-08,
|
50912 |
+
"loss": 46.0039,
|
50913 |
+
"step": 7272
|
50914 |
+
},
|
50915 |
+
{
|
50916 |
+
"epoch": 0.9929688033312855,
|
50917 |
+
"grad_norm": 0.10620303452014923,
|
50918 |
+
"learning_rate": 1.2297216773954346e-08,
|
50919 |
+
"loss": 46.0041,
|
50920 |
+
"step": 7273
|
50921 |
+
},
|
50922 |
+
{
|
50923 |
+
"epoch": 0.99310533142194,
|
50924 |
+
"grad_norm": 0.11976180225610733,
|
50925 |
+
"learning_rate": 1.1819719673827889e-08,
|
50926 |
+
"loss": 46.0065,
|
50927 |
+
"step": 7274
|
50928 |
+
},
|
50929 |
+
{
|
50930 |
+
"epoch": 0.9932418595125947,
|
50931 |
+
"grad_norm": 0.28400561213493347,
|
50932 |
+
"learning_rate": 1.1351676486487383e-08,
|
50933 |
+
"loss": 46.0087,
|
50934 |
+
"step": 7275
|
50935 |
+
},
|
50936 |
+
{
|
50937 |
+
"epoch": 0.9933783876032494,
|
50938 |
+
"grad_norm": 0.07839032262563705,
|
50939 |
+
"learning_rate": 1.0893087300439809e-08,
|
50940 |
+
"loss": 46.0029,
|
50941 |
+
"step": 7276
|
50942 |
+
},
|
50943 |
+
{
|
50944 |
+
"epoch": 0.993514915693904,
|
50945 |
+
"grad_norm": 0.1288733035326004,
|
50946 |
+
"learning_rate": 1.0443952202426887e-08,
|
50947 |
+
"loss": 46.0086,
|
50948 |
+
"step": 7277
|
50949 |
+
},
|
50950 |
+
{
|
50951 |
+
"epoch": 0.9936514437845587,
|
50952 |
+
"grad_norm": 0.05590146407485008,
|
50953 |
+
"learning_rate": 1.0004271277386234e-08,
|
50954 |
+
"loss": 46.0053,
|
50955 |
+
"step": 7278
|
50956 |
+
},
|
50957 |
+
{
|
50958 |
+
"epoch": 0.9937879718752133,
|
50959 |
+
"grad_norm": 0.04046904668211937,
|
50960 |
+
"learning_rate": 9.574044608468003e-09,
|
50961 |
+
"loss": 46.0029,
|
50962 |
+
"step": 7279
|
50963 |
+
},
|
50964 |
+
{
|
50965 |
+
"epoch": 0.993924499965868,
|
50966 |
+
"grad_norm": 0.05340801179409027,
|
50967 |
+
"learning_rate": 9.153272277040436e-09,
|
50968 |
+
"loss": 46.0055,
|
50969 |
+
"step": 7280
|
50970 |
+
},
|
50971 |
+
{
|
50972 |
+
"epoch": 0.9940610280565226,
|
50973 |
+
"grad_norm": 0.0846848338842392,
|
50974 |
+
"learning_rate": 8.741954362678772e-09,
|
50975 |
+
"loss": 46.0034,
|
50976 |
+
"step": 7281
|
50977 |
+
},
|
50978 |
+
{
|
50979 |
+
"epoch": 0.9941975561471773,
|
50980 |
+
"grad_norm": 0.08063561469316483,
|
50981 |
+
"learning_rate": 8.340090943176338e-09,
|
50982 |
+
"loss": 46.0035,
|
50983 |
+
"step": 7282
|
50984 |
+
},
|
50985 |
+
{
|
50986 |
+
"epoch": 0.994334084237832,
|
50987 |
+
"grad_norm": 0.049741119146347046,
|
50988 |
+
"learning_rate": 7.947682094533449e-09,
|
50989 |
+
"loss": 46.0018,
|
50990 |
+
"step": 7283
|
50991 |
+
},
|
50992 |
+
{
|
50993 |
+
"epoch": 0.9944706123284865,
|
50994 |
+
"grad_norm": 0.0649300292134285,
|
50995 |
+
"learning_rate": 7.564727890968514e-09,
|
50996 |
+
"loss": 46.0059,
|
50997 |
+
"step": 7284
|
50998 |
+
},
|
50999 |
+
{
|
51000 |
+
"epoch": 0.9946071404191412,
|
51001 |
+
"grad_norm": 0.06503751128911972,
|
51002 |
+
"learning_rate": 7.1912284048958336e-09,
|
51003 |
+
"loss": 46.0024,
|
51004 |
+
"step": 7285
|
51005 |
+
},
|
51006 |
+
{
|
51007 |
+
"epoch": 0.9947436685097959,
|
51008 |
+
"grad_norm": 0.08364452421665192,
|
51009 |
+
"learning_rate": 6.8271837069588955e-09,
|
51010 |
+
"loss": 46.0006,
|
51011 |
+
"step": 7286
|
51012 |
+
},
|
51013 |
+
{
|
51014 |
+
"epoch": 0.9948801966004506,
|
51015 |
+
"grad_norm": 0.06732216477394104,
|
51016 |
+
"learning_rate": 6.472593866013732e-09,
|
51017 |
+
"loss": 46.0,
|
51018 |
+
"step": 7287
|
51019 |
+
},
|
51020 |
+
{
|
51021 |
+
"epoch": 0.9950167246911052,
|
51022 |
+
"grad_norm": 0.08693568408489227,
|
51023 |
+
"learning_rate": 6.127458949106713e-09,
|
51024 |
+
"loss": 46.005,
|
51025 |
+
"step": 7288
|
51026 |
+
},
|
51027 |
+
{
|
51028 |
+
"epoch": 0.9951532527817598,
|
51029 |
+
"grad_norm": 0.053449422121047974,
|
51030 |
+
"learning_rate": 5.7917790215245015e-09,
|
51031 |
+
"loss": 46.0021,
|
51032 |
+
"step": 7289
|
51033 |
+
},
|
51034 |
+
{
|
51035 |
+
"epoch": 0.9952897808724145,
|
51036 |
+
"grad_norm": 0.05752718821167946,
|
51037 |
+
"learning_rate": 5.4655541467441006e-09,
|
51038 |
+
"loss": 46.0032,
|
51039 |
+
"step": 7290
|
51040 |
+
},
|
51041 |
+
{
|
51042 |
+
"epoch": 0.9954263089630692,
|
51043 |
+
"grad_norm": 0.04035717621445656,
|
51044 |
+
"learning_rate": 5.148784386460604e-09,
|
51045 |
+
"loss": 46.001,
|
51046 |
+
"step": 7291
|
51047 |
+
},
|
51048 |
+
{
|
51049 |
+
"epoch": 0.9955628370537238,
|
51050 |
+
"grad_norm": 0.05446084216237068,
|
51051 |
+
"learning_rate": 4.841469800592746e-09,
|
51052 |
+
"loss": 46.008,
|
51053 |
+
"step": 7292
|
51054 |
+
},
|
51055 |
+
{
|
51056 |
+
"epoch": 0.9956993651443784,
|
51057 |
+
"grad_norm": 0.05863227695226669,
|
51058 |
+
"learning_rate": 4.543610447249602e-09,
|
51059 |
+
"loss": 46.0042,
|
51060 |
+
"step": 7293
|
51061 |
+
},
|
51062 |
+
{
|
51063 |
+
"epoch": 0.9958358932350331,
|
51064 |
+
"grad_norm": 0.05528897047042847,
|
51065 |
+
"learning_rate": 4.2552063827694386e-09,
|
51066 |
+
"loss": 46.0031,
|
51067 |
+
"step": 7294
|
51068 |
+
},
|
51069 |
+
{
|
51070 |
+
"epoch": 0.9959724213256878,
|
51071 |
+
"grad_norm": 0.07325014472007751,
|
51072 |
+
"learning_rate": 3.976257661691962e-09,
|
51073 |
+
"loss": 46.0055,
|
51074 |
+
"step": 7295
|
51075 |
+
},
|
51076 |
+
{
|
51077 |
+
"epoch": 0.9961089494163424,
|
51078 |
+
"grad_norm": 0.20819850265979767,
|
51079 |
+
"learning_rate": 3.7067643367749705e-09,
|
51080 |
+
"loss": 46.0083,
|
51081 |
+
"step": 7296
|
51082 |
+
},
|
51083 |
+
{
|
51084 |
+
"epoch": 0.9962454775069971,
|
51085 |
+
"grad_norm": 0.16535024344921112,
|
51086 |
+
"learning_rate": 3.446726458988803e-09,
|
51087 |
+
"loss": 46.0058,
|
51088 |
+
"step": 7297
|
51089 |
+
},
|
51090 |
+
{
|
51091 |
+
"epoch": 0.9963820055976517,
|
51092 |
+
"grad_norm": 0.06618311256170273,
|
51093 |
+
"learning_rate": 3.1961440775107878e-09,
|
51094 |
+
"loss": 46.0018,
|
51095 |
+
"step": 7298
|
51096 |
+
},
|
51097 |
+
{
|
51098 |
+
"epoch": 0.9965185336883063,
|
51099 |
+
"grad_norm": 0.20235642790794373,
|
51100 |
+
"learning_rate": 2.9550172397252442e-09,
|
51101 |
+
"loss": 46.0093,
|
51102 |
+
"step": 7299
|
51103 |
+
},
|
51104 |
+
{
|
51105 |
+
"epoch": 0.996655061778961,
|
51106 |
+
"grad_norm": 0.04973575845360756,
|
51107 |
+
"learning_rate": 2.723345991245685e-09,
|
51108 |
+
"loss": 46.0,
|
51109 |
+
"step": 7300
|
51110 |
+
},
|
51111 |
+
{
|
51112 |
+
"epoch": 0.9967915898696157,
|
51113 |
+
"grad_norm": 0.07193689048290253,
|
51114 |
+
"learning_rate": 2.5011303758759596e-09,
|
51115 |
+
"loss": 46.0096,
|
51116 |
+
"step": 7301
|
51117 |
+
},
|
51118 |
+
{
|
51119 |
+
"epoch": 0.9969281179602704,
|
51120 |
+
"grad_norm": 0.057107098400592804,
|
51121 |
+
"learning_rate": 2.288370435654663e-09,
|
51122 |
+
"loss": 46.0006,
|
51123 |
+
"step": 7302
|
51124 |
+
},
|
51125 |
+
{
|
51126 |
+
"epoch": 0.9970646460509249,
|
51127 |
+
"grad_norm": 0.10516843944787979,
|
51128 |
+
"learning_rate": 2.0850662108051755e-09,
|
51129 |
+
"loss": 46.0045,
|
51130 |
+
"step": 7303
|
51131 |
+
},
|
51132 |
+
{
|
51133 |
+
"epoch": 0.9972011741415796,
|
51134 |
+
"grad_norm": 0.15865033864974976,
|
51135 |
+
"learning_rate": 1.8912177397856224e-09,
|
51136 |
+
"loss": 46.0095,
|
51137 |
+
"step": 7304
|
51138 |
+
},
|
51139 |
+
{
|
51140 |
+
"epoch": 0.9973377022322343,
|
51141 |
+
"grad_norm": 0.07561293244361877,
|
51142 |
+
"learning_rate": 1.706825059255568e-09,
|
51143 |
+
"loss": 46.0012,
|
51144 |
+
"step": 7305
|
51145 |
+
},
|
51146 |
+
{
|
51147 |
+
"epoch": 0.997474230322889,
|
51148 |
+
"grad_norm": 0.08446773886680603,
|
51149 |
+
"learning_rate": 1.5318882040926686e-09,
|
51150 |
+
"loss": 46.0098,
|
51151 |
+
"step": 7306
|
51152 |
+
},
|
51153 |
+
{
|
51154 |
+
"epoch": 0.9976107584135436,
|
51155 |
+
"grad_norm": 0.11638659983873367,
|
51156 |
+
"learning_rate": 1.3664072073704681e-09,
|
51157 |
+
"loss": 46.0044,
|
51158 |
+
"step": 7307
|
51159 |
+
},
|
51160 |
+
{
|
51161 |
+
"epoch": 0.9977472865041982,
|
51162 |
+
"grad_norm": 0.049882251769304276,
|
51163 |
+
"learning_rate": 1.210382100397256e-09,
|
51164 |
+
"loss": 46.0102,
|
51165 |
+
"step": 7308
|
51166 |
+
},
|
51167 |
+
{
|
51168 |
+
"epoch": 0.9978838145948529,
|
51169 |
+
"grad_norm": 0.08385684341192245,
|
51170 |
+
"learning_rate": 1.063812912671658e-09,
|
51171 |
+
"loss": 46.0012,
|
51172 |
+
"step": 7309
|
51173 |
+
},
|
51174 |
+
{
|
51175 |
+
"epoch": 0.9980203426855075,
|
51176 |
+
"grad_norm": 0.15993371605873108,
|
51177 |
+
"learning_rate": 9.266996719159426e-10,
|
51178 |
+
"loss": 46.003,
|
51179 |
+
"step": 7310
|
51180 |
+
},
|
51181 |
+
{
|
51182 |
+
"epoch": 0.9981568707761622,
|
51183 |
+
"grad_norm": 0.04674162715673447,
|
51184 |
+
"learning_rate": 7.990424040649202e-10,
|
51185 |
+
"loss": 46.0063,
|
51186 |
+
"step": 7311
|
51187 |
+
},
|
51188 |
+
{
|
51189 |
+
"epoch": 0.9982933988668169,
|
51190 |
+
"grad_norm": 0.17500679194927216,
|
51191 |
+
"learning_rate": 6.808411332548393e-10,
|
51192 |
+
"loss": 46.0085,
|
51193 |
+
"step": 7312
|
51194 |
+
},
|
51195 |
+
{
|
51196 |
+
"epoch": 0.9984299269574715,
|
51197 |
+
"grad_norm": 0.11019917577505112,
|
51198 |
+
"learning_rate": 5.720958818511424e-10,
|
51199 |
+
"loss": 46.0036,
|
51200 |
+
"step": 7313
|
51201 |
+
},
|
51202 |
+
{
|
51203 |
+
"epoch": 0.9985664550481261,
|
51204 |
+
"grad_norm": 0.09337335079908371,
|
51205 |
+
"learning_rate": 4.72806670409609e-10,
|
51206 |
+
"loss": 46.0022,
|
51207 |
+
"step": 7314
|
51208 |
+
},
|
51209 |
+
{
|
51210 |
+
"epoch": 0.9987029831387808,
|
51211 |
+
"grad_norm": 0.09085293859243393,
|
51212 |
+
"learning_rate": 3.829735177096616e-10,
|
51213 |
+
"loss": 46.0003,
|
51214 |
+
"step": 7315
|
51215 |
+
},
|
51216 |
+
{
|
51217 |
+
"epoch": 0.9988395112294355,
|
51218 |
+
"grad_norm": 0.1213827133178711,
|
51219 |
+
"learning_rate": 3.0259644074326353e-10,
|
51220 |
+
"loss": 46.0042,
|
51221 |
+
"step": 7316
|
51222 |
+
},
|
51223 |
+
{
|
51224 |
+
"epoch": 0.9989760393200902,
|
51225 |
+
"grad_norm": 0.07254913449287415,
|
51226 |
+
"learning_rate": 2.3167545471491914e-10,
|
51227 |
+
"loss": 46.0039,
|
51228 |
+
"step": 7317
|
51229 |
+
},
|
51230 |
+
{
|
51231 |
+
"epoch": 0.9991125674107447,
|
51232 |
+
"grad_norm": 0.05589550361037254,
|
51233 |
+
"learning_rate": 1.7021057303057142e-10,
|
51234 |
+
"loss": 46.0002,
|
51235 |
+
"step": 7318
|
51236 |
+
},
|
51237 |
+
{
|
51238 |
+
"epoch": 0.9992490955013994,
|
51239 |
+
"grad_norm": 0.0650186687707901,
|
51240 |
+
"learning_rate": 1.1820180731980656e-10,
|
51241 |
+
"loss": 46.0024,
|
51242 |
+
"step": 7319
|
51243 |
+
},
|
51244 |
+
{
|
51245 |
+
"epoch": 0.9993856235920541,
|
51246 |
+
"grad_norm": 0.07707148045301437,
|
51247 |
+
"learning_rate": 7.564916741364947e-11,
|
51248 |
+
"loss": 46.0048,
|
51249 |
+
"step": 7320
|
51250 |
+
},
|
51251 |
+
{
|
51252 |
+
"epoch": 0.9995221516827087,
|
51253 |
+
"grad_norm": 0.06899863481521606,
|
51254 |
+
"learning_rate": 4.2552661366768164e-11,
|
51255 |
+
"loss": 46.0075,
|
51256 |
+
"step": 7321
|
51257 |
+
},
|
51258 |
+
{
|
51259 |
+
"epoch": 0.9996586797733634,
|
51260 |
+
"grad_norm": 0.13619117438793182,
|
51261 |
+
"learning_rate": 1.8912295429718285e-11,
|
51262 |
+
"loss": 46.0019,
|
51263 |
+
"step": 7322
|
51264 |
+
},
|
51265 |
+
{
|
51266 |
+
"epoch": 0.999795207864018,
|
51267 |
+
"grad_norm": 0.06259801238775253,
|
51268 |
+
"learning_rate": 4.728074082249734e-12,
|
51269 |
+
"loss": 46.0084,
|
51270 |
+
"step": 7323
|
51271 |
+
},
|
51272 |
+
{
|
51273 |
+
"epoch": 0.9999317359546727,
|
51274 |
+
"grad_norm": 0.11005455255508423,
|
51275 |
+
"learning_rate": 0.0,
|
51276 |
+
"loss": 46.0028,
|
51277 |
+
"step": 7324
|
51278 |
}
|
51279 |
],
|
51280 |
"logging_steps": 1,
|
|
|
51289 |
"should_evaluate": false,
|
51290 |
"should_log": false,
|
51291 |
"should_save": true,
|
51292 |
+
"should_training_stop": true
|
51293 |
},
|
51294 |
"attributes": {}
|
51295 |
}
|
51296 |
},
|
51297 |
+
"total_flos": 687479785488384.0,
|
51298 |
"train_batch_size": 4,
|
51299 |
"trial_name": null,
|
51300 |
"trial_params": null
|