Training in progress, step 5736, checkpoint
Browse files- last-checkpoint/optimizer.pt +1 -1
- last-checkpoint/rng_state.pth +1 -1
- last-checkpoint/scheduler.pt +1 -1
- last-checkpoint/trainer_state.json +1676 -3
last-checkpoint/optimizer.pt
CHANGED
@@ -1,3 +1,3 @@
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
-
oid sha256:
|
3 |
size 516802328
|
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:ebef31135b45565df6ed5f3402d41965c0085e3579054017540bd9502336d5bc
|
3 |
size 516802328
|
last-checkpoint/rng_state.pth
CHANGED
@@ -1,3 +1,3 @@
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
-
oid sha256:
|
3 |
size 14244
|
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:9410d6faf0b0b2a20a9f5d7e528ee5c22798ebdce85f3ee5ebc57c74c84b3d40
|
3 |
size 14244
|
last-checkpoint/scheduler.pt
CHANGED
@@ -1,3 +1,3 @@
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
-
oid sha256:
|
3 |
size 1064
|
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:f2d74ffd31206d3e81b71ea7d00d130021e2a8ddf2f551c8cf2a0016186f7977
|
3 |
size 1064
|
last-checkpoint/trainer_state.json
CHANGED
@@ -1,9 +1,9 @@
|
|
1 |
{
|
2 |
"best_metric": null,
|
3 |
"best_model_checkpoint": null,
|
4 |
-
"epoch": 0.
|
5 |
"eval_steps": 500,
|
6 |
-
"global_step":
|
7 |
"is_hyper_param_search": false,
|
8 |
"is_local_process_zero": true,
|
9 |
"is_world_process_zero": true,
|
@@ -38486,6 +38486,1679 @@
|
|
38486 |
"learning_rate": 1.4425382207839954e-06,
|
38487 |
"loss": 0.0,
|
38488 |
"step": 5497
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
38489 |
}
|
38490 |
],
|
38491 |
"logging_steps": 1,
|
@@ -38505,7 +40178,7 @@
|
|
38505 |
"attributes": {}
|
38506 |
}
|
38507 |
},
|
38508 |
-
"total_flos": 1.
|
38509 |
"train_batch_size": 8,
|
38510 |
"trial_name": null,
|
38511 |
"trial_params": null
|
|
|
1 |
{
|
2 |
"best_metric": null,
|
3 |
"best_model_checkpoint": null,
|
4 |
+
"epoch": 0.9647632663358843,
|
5 |
"eval_steps": 500,
|
6 |
+
"global_step": 5736,
|
7 |
"is_hyper_param_search": false,
|
8 |
"is_local_process_zero": true,
|
9 |
"is_world_process_zero": true,
|
|
|
38486 |
"learning_rate": 1.4425382207839954e-06,
|
38487 |
"loss": 0.0,
|
38488 |
"step": 5497
|
38489 |
+
},
|
38490 |
+
{
|
38491 |
+
"epoch": 0.9247329913379867,
|
38492 |
+
"grad_norm": NaN,
|
38493 |
+
"learning_rate": 1.4361364783600895e-06,
|
38494 |
+
"loss": 0.0,
|
38495 |
+
"step": 5498
|
38496 |
+
},
|
38497 |
+
{
|
38498 |
+
"epoch": 0.924901185770751,
|
38499 |
+
"grad_norm": NaN,
|
38500 |
+
"learning_rate": 1.4297487654924002e-06,
|
38501 |
+
"loss": 0.0,
|
38502 |
+
"step": 5499
|
38503 |
+
},
|
38504 |
+
{
|
38505 |
+
"epoch": 0.9250693802035153,
|
38506 |
+
"grad_norm": NaN,
|
38507 |
+
"learning_rate": 1.423375084026285e-06,
|
38508 |
+
"loss": 0.0,
|
38509 |
+
"step": 5500
|
38510 |
+
},
|
38511 |
+
{
|
38512 |
+
"epoch": 0.9252375746362795,
|
38513 |
+
"grad_norm": NaN,
|
38514 |
+
"learning_rate": 1.4170154358030151e-06,
|
38515 |
+
"loss": 0.0,
|
38516 |
+
"step": 5501
|
38517 |
+
},
|
38518 |
+
{
|
38519 |
+
"epoch": 0.9254057690690438,
|
38520 |
+
"grad_norm": NaN,
|
38521 |
+
"learning_rate": 1.410669822659827e-06,
|
38522 |
+
"loss": 0.0,
|
38523 |
+
"step": 5502
|
38524 |
+
},
|
38525 |
+
{
|
38526 |
+
"epoch": 0.9255739635018081,
|
38527 |
+
"grad_norm": NaN,
|
38528 |
+
"learning_rate": 1.404338246429887e-06,
|
38529 |
+
"loss": 0.0,
|
38530 |
+
"step": 5503
|
38531 |
+
},
|
38532 |
+
{
|
38533 |
+
"epoch": 0.9257421579345724,
|
38534 |
+
"grad_norm": NaN,
|
38535 |
+
"learning_rate": 1.3980207089423326e-06,
|
38536 |
+
"loss": 0.0,
|
38537 |
+
"step": 5504
|
38538 |
+
},
|
38539 |
+
{
|
38540 |
+
"epoch": 0.9259103523673367,
|
38541 |
+
"grad_norm": NaN,
|
38542 |
+
"learning_rate": 1.39171721202222e-06,
|
38543 |
+
"loss": 0.0,
|
38544 |
+
"step": 5505
|
38545 |
+
},
|
38546 |
+
{
|
38547 |
+
"epoch": 0.9260785468001009,
|
38548 |
+
"grad_norm": NaN,
|
38549 |
+
"learning_rate": 1.385427757490565e-06,
|
38550 |
+
"loss": 0.0,
|
38551 |
+
"step": 5506
|
38552 |
+
},
|
38553 |
+
{
|
38554 |
+
"epoch": 0.9262467412328652,
|
38555 |
+
"grad_norm": NaN,
|
38556 |
+
"learning_rate": 1.3791523471643141e-06,
|
38557 |
+
"loss": 0.0,
|
38558 |
+
"step": 5507
|
38559 |
+
},
|
38560 |
+
{
|
38561 |
+
"epoch": 0.9264149356656295,
|
38562 |
+
"grad_norm": NaN,
|
38563 |
+
"learning_rate": 1.3728909828563619e-06,
|
38564 |
+
"loss": 0.0,
|
38565 |
+
"step": 5508
|
38566 |
+
},
|
38567 |
+
{
|
38568 |
+
"epoch": 0.9265831300983938,
|
38569 |
+
"grad_norm": NaN,
|
38570 |
+
"learning_rate": 1.3666436663755555e-06,
|
38571 |
+
"loss": 0.0,
|
38572 |
+
"step": 5509
|
38573 |
+
},
|
38574 |
+
{
|
38575 |
+
"epoch": 0.926751324531158,
|
38576 |
+
"grad_norm": NaN,
|
38577 |
+
"learning_rate": 1.3604103995266682e-06,
|
38578 |
+
"loss": 0.0,
|
38579 |
+
"step": 5510
|
38580 |
+
},
|
38581 |
+
{
|
38582 |
+
"epoch": 0.9269195189639223,
|
38583 |
+
"grad_norm": NaN,
|
38584 |
+
"learning_rate": 1.3541911841104149e-06,
|
38585 |
+
"loss": 0.0,
|
38586 |
+
"step": 5511
|
38587 |
+
},
|
38588 |
+
{
|
38589 |
+
"epoch": 0.9270877133966866,
|
38590 |
+
"grad_norm": NaN,
|
38591 |
+
"learning_rate": 1.3479860219234697e-06,
|
38592 |
+
"loss": 0.0,
|
38593 |
+
"step": 5512
|
38594 |
+
},
|
38595 |
+
{
|
38596 |
+
"epoch": 0.9272559078294509,
|
38597 |
+
"grad_norm": NaN,
|
38598 |
+
"learning_rate": 1.3417949147584318e-06,
|
38599 |
+
"loss": 0.0,
|
38600 |
+
"step": 5513
|
38601 |
+
},
|
38602 |
+
{
|
38603 |
+
"epoch": 0.9274241022622152,
|
38604 |
+
"grad_norm": NaN,
|
38605 |
+
"learning_rate": 1.335617864403832e-06,
|
38606 |
+
"loss": 0.0,
|
38607 |
+
"step": 5514
|
38608 |
+
},
|
38609 |
+
{
|
38610 |
+
"epoch": 0.9275922966949794,
|
38611 |
+
"grad_norm": NaN,
|
38612 |
+
"learning_rate": 1.3294548726441592e-06,
|
38613 |
+
"loss": 0.0,
|
38614 |
+
"step": 5515
|
38615 |
+
},
|
38616 |
+
{
|
38617 |
+
"epoch": 0.9277604911277437,
|
38618 |
+
"grad_norm": NaN,
|
38619 |
+
"learning_rate": 1.3233059412598392e-06,
|
38620 |
+
"loss": 0.0,
|
38621 |
+
"step": 5516
|
38622 |
+
},
|
38623 |
+
{
|
38624 |
+
"epoch": 0.9279286855605079,
|
38625 |
+
"grad_norm": NaN,
|
38626 |
+
"learning_rate": 1.3171710720272234e-06,
|
38627 |
+
"loss": 0.0,
|
38628 |
+
"step": 5517
|
38629 |
+
},
|
38630 |
+
{
|
38631 |
+
"epoch": 0.9280968799932722,
|
38632 |
+
"grad_norm": NaN,
|
38633 |
+
"learning_rate": 1.3110502667185997e-06,
|
38634 |
+
"loss": 0.0,
|
38635 |
+
"step": 5518
|
38636 |
+
},
|
38637 |
+
{
|
38638 |
+
"epoch": 0.9282650744260365,
|
38639 |
+
"grad_norm": NaN,
|
38640 |
+
"learning_rate": 1.3049435271022037e-06,
|
38641 |
+
"loss": 0.0,
|
38642 |
+
"step": 5519
|
38643 |
+
},
|
38644 |
+
{
|
38645 |
+
"epoch": 0.9284332688588007,
|
38646 |
+
"grad_norm": NaN,
|
38647 |
+
"learning_rate": 1.298850854942213e-06,
|
38648 |
+
"loss": 0.0,
|
38649 |
+
"step": 5520
|
38650 |
+
},
|
38651 |
+
{
|
38652 |
+
"epoch": 0.928601463291565,
|
38653 |
+
"grad_norm": NaN,
|
38654 |
+
"learning_rate": 1.2927722519987306e-06,
|
38655 |
+
"loss": 0.0,
|
38656 |
+
"step": 5521
|
38657 |
+
},
|
38658 |
+
{
|
38659 |
+
"epoch": 0.9287696577243293,
|
38660 |
+
"grad_norm": NaN,
|
38661 |
+
"learning_rate": 1.2867077200277856e-06,
|
38662 |
+
"loss": 0.0,
|
38663 |
+
"step": 5522
|
38664 |
+
},
|
38665 |
+
{
|
38666 |
+
"epoch": 0.9289378521570936,
|
38667 |
+
"grad_norm": NaN,
|
38668 |
+
"learning_rate": 1.2806572607813649e-06,
|
38669 |
+
"loss": 0.0,
|
38670 |
+
"step": 5523
|
38671 |
+
},
|
38672 |
+
{
|
38673 |
+
"epoch": 0.9291060465898578,
|
38674 |
+
"grad_norm": NaN,
|
38675 |
+
"learning_rate": 1.2746208760073708e-06,
|
38676 |
+
"loss": 0.0,
|
38677 |
+
"step": 5524
|
38678 |
+
},
|
38679 |
+
{
|
38680 |
+
"epoch": 0.9292742410226221,
|
38681 |
+
"grad_norm": NaN,
|
38682 |
+
"learning_rate": 1.268598567449647e-06,
|
38683 |
+
"loss": 0.0,
|
38684 |
+
"step": 5525
|
38685 |
+
},
|
38686 |
+
{
|
38687 |
+
"epoch": 0.9294424354553864,
|
38688 |
+
"grad_norm": NaN,
|
38689 |
+
"learning_rate": 1.2625903368479796e-06,
|
38690 |
+
"loss": 0.0,
|
38691 |
+
"step": 5526
|
38692 |
+
},
|
38693 |
+
{
|
38694 |
+
"epoch": 0.9296106298881507,
|
38695 |
+
"grad_norm": NaN,
|
38696 |
+
"learning_rate": 1.2565961859380693e-06,
|
38697 |
+
"loss": 0.0,
|
38698 |
+
"step": 5527
|
38699 |
+
},
|
38700 |
+
{
|
38701 |
+
"epoch": 0.929778824320915,
|
38702 |
+
"grad_norm": NaN,
|
38703 |
+
"learning_rate": 1.250616116451564e-06,
|
38704 |
+
"loss": 0.0,
|
38705 |
+
"step": 5528
|
38706 |
+
},
|
38707 |
+
{
|
38708 |
+
"epoch": 0.9299470187536792,
|
38709 |
+
"grad_norm": NaN,
|
38710 |
+
"learning_rate": 1.2446501301160374e-06,
|
38711 |
+
"loss": 0.0,
|
38712 |
+
"step": 5529
|
38713 |
+
},
|
38714 |
+
{
|
38715 |
+
"epoch": 0.9301152131864435,
|
38716 |
+
"grad_norm": NaN,
|
38717 |
+
"learning_rate": 1.2386982286549998e-06,
|
38718 |
+
"loss": 0.0,
|
38719 |
+
"step": 5530
|
38720 |
+
},
|
38721 |
+
{
|
38722 |
+
"epoch": 0.9302834076192078,
|
38723 |
+
"grad_norm": NaN,
|
38724 |
+
"learning_rate": 1.2327604137878812e-06,
|
38725 |
+
"loss": 0.0,
|
38726 |
+
"step": 5531
|
38727 |
+
},
|
38728 |
+
{
|
38729 |
+
"epoch": 0.9304516020519721,
|
38730 |
+
"grad_norm": NaN,
|
38731 |
+
"learning_rate": 1.2268366872300597e-06,
|
38732 |
+
"loss": 0.0,
|
38733 |
+
"step": 5532
|
38734 |
+
},
|
38735 |
+
{
|
38736 |
+
"epoch": 0.9306197964847364,
|
38737 |
+
"grad_norm": NaN,
|
38738 |
+
"learning_rate": 1.2209270506928271e-06,
|
38739 |
+
"loss": 0.0,
|
38740 |
+
"step": 5533
|
38741 |
+
},
|
38742 |
+
{
|
38743 |
+
"epoch": 0.9307879909175006,
|
38744 |
+
"grad_norm": NaN,
|
38745 |
+
"learning_rate": 1.2150315058834184e-06,
|
38746 |
+
"loss": 0.0,
|
38747 |
+
"step": 5534
|
38748 |
+
},
|
38749 |
+
{
|
38750 |
+
"epoch": 0.9309561853502649,
|
38751 |
+
"grad_norm": NaN,
|
38752 |
+
"learning_rate": 1.2091500545049706e-06,
|
38753 |
+
"loss": 0.0,
|
38754 |
+
"step": 5535
|
38755 |
+
},
|
38756 |
+
{
|
38757 |
+
"epoch": 0.9311243797830292,
|
38758 |
+
"grad_norm": NaN,
|
38759 |
+
"learning_rate": 1.203282698256597e-06,
|
38760 |
+
"loss": 0.0,
|
38761 |
+
"step": 5536
|
38762 |
+
},
|
38763 |
+
{
|
38764 |
+
"epoch": 0.9312925742157935,
|
38765 |
+
"grad_norm": NaN,
|
38766 |
+
"learning_rate": 1.1974294388332918e-06,
|
38767 |
+
"loss": 0.0,
|
38768 |
+
"step": 5537
|
38769 |
+
},
|
38770 |
+
{
|
38771 |
+
"epoch": 0.9314607686485578,
|
38772 |
+
"grad_norm": NaN,
|
38773 |
+
"learning_rate": 1.1915902779260024e-06,
|
38774 |
+
"loss": 0.0,
|
38775 |
+
"step": 5538
|
38776 |
+
},
|
38777 |
+
{
|
38778 |
+
"epoch": 0.931628963081322,
|
38779 |
+
"grad_norm": NaN,
|
38780 |
+
"learning_rate": 1.1857652172215905e-06,
|
38781 |
+
"loss": 0.0,
|
38782 |
+
"step": 5539
|
38783 |
+
},
|
38784 |
+
{
|
38785 |
+
"epoch": 0.9317971575140863,
|
38786 |
+
"grad_norm": NaN,
|
38787 |
+
"learning_rate": 1.1799542584028656e-06,
|
38788 |
+
"loss": 0.0,
|
38789 |
+
"step": 5540
|
38790 |
+
},
|
38791 |
+
{
|
38792 |
+
"epoch": 0.9319653519468506,
|
38793 |
+
"grad_norm": NaN,
|
38794 |
+
"learning_rate": 1.1741574031485347e-06,
|
38795 |
+
"loss": 0.0,
|
38796 |
+
"step": 5541
|
38797 |
+
},
|
38798 |
+
{
|
38799 |
+
"epoch": 0.9321335463796149,
|
38800 |
+
"grad_norm": NaN,
|
38801 |
+
"learning_rate": 1.1683746531332529e-06,
|
38802 |
+
"loss": 0.0,
|
38803 |
+
"step": 5542
|
38804 |
+
},
|
38805 |
+
{
|
38806 |
+
"epoch": 0.9323017408123792,
|
38807 |
+
"grad_norm": NaN,
|
38808 |
+
"learning_rate": 1.1626060100275837e-06,
|
38809 |
+
"loss": 0.0,
|
38810 |
+
"step": 5543
|
38811 |
+
},
|
38812 |
+
{
|
38813 |
+
"epoch": 0.9324699352451434,
|
38814 |
+
"grad_norm": NaN,
|
38815 |
+
"learning_rate": 1.156851475498033e-06,
|
38816 |
+
"loss": 0.0,
|
38817 |
+
"step": 5544
|
38818 |
+
},
|
38819 |
+
{
|
38820 |
+
"epoch": 0.9326381296779077,
|
38821 |
+
"grad_norm": NaN,
|
38822 |
+
"learning_rate": 1.1511110512070155e-06,
|
38823 |
+
"loss": 0.0,
|
38824 |
+
"step": 5545
|
38825 |
+
},
|
38826 |
+
{
|
38827 |
+
"epoch": 0.932806324110672,
|
38828 |
+
"grad_norm": NaN,
|
38829 |
+
"learning_rate": 1.1453847388128712e-06,
|
38830 |
+
"loss": 0.0,
|
38831 |
+
"step": 5546
|
38832 |
+
},
|
38833 |
+
{
|
38834 |
+
"epoch": 0.9329745185434362,
|
38835 |
+
"grad_norm": NaN,
|
38836 |
+
"learning_rate": 1.1396725399698772e-06,
|
38837 |
+
"loss": 0.0,
|
38838 |
+
"step": 5547
|
38839 |
+
},
|
38840 |
+
{
|
38841 |
+
"epoch": 0.9331427129762004,
|
38842 |
+
"grad_norm": NaN,
|
38843 |
+
"learning_rate": 1.133974456328213e-06,
|
38844 |
+
"loss": 0.0,
|
38845 |
+
"step": 5548
|
38846 |
+
},
|
38847 |
+
{
|
38848 |
+
"epoch": 0.9333109074089647,
|
38849 |
+
"grad_norm": NaN,
|
38850 |
+
"learning_rate": 1.1282904895340064e-06,
|
38851 |
+
"loss": 0.0,
|
38852 |
+
"step": 5549
|
38853 |
+
},
|
38854 |
+
{
|
38855 |
+
"epoch": 0.933479101841729,
|
38856 |
+
"grad_norm": NaN,
|
38857 |
+
"learning_rate": 1.1226206412292773e-06,
|
38858 |
+
"loss": 0.0,
|
38859 |
+
"step": 5550
|
38860 |
+
},
|
38861 |
+
{
|
38862 |
+
"epoch": 0.9336472962744933,
|
38863 |
+
"grad_norm": NaN,
|
38864 |
+
"learning_rate": 1.1169649130519932e-06,
|
38865 |
+
"loss": 0.0,
|
38866 |
+
"step": 5551
|
38867 |
+
},
|
38868 |
+
{
|
38869 |
+
"epoch": 0.9338154907072576,
|
38870 |
+
"grad_norm": NaN,
|
38871 |
+
"learning_rate": 1.111323306636014e-06,
|
38872 |
+
"loss": 0.0,
|
38873 |
+
"step": 5552
|
38874 |
+
},
|
38875 |
+
{
|
38876 |
+
"epoch": 0.9339836851400218,
|
38877 |
+
"grad_norm": NaN,
|
38878 |
+
"learning_rate": 1.1056958236111525e-06,
|
38879 |
+
"loss": 0.0,
|
38880 |
+
"step": 5553
|
38881 |
+
},
|
38882 |
+
{
|
38883 |
+
"epoch": 0.9341518795727861,
|
38884 |
+
"grad_norm": NaN,
|
38885 |
+
"learning_rate": 1.1000824656031195e-06,
|
38886 |
+
"loss": 0.0,
|
38887 |
+
"step": 5554
|
38888 |
+
},
|
38889 |
+
{
|
38890 |
+
"epoch": 0.9343200740055504,
|
38891 |
+
"grad_norm": NaN,
|
38892 |
+
"learning_rate": 1.0944832342335564e-06,
|
38893 |
+
"loss": 0.0,
|
38894 |
+
"step": 5555
|
38895 |
+
},
|
38896 |
+
{
|
38897 |
+
"epoch": 0.9344882684383147,
|
38898 |
+
"grad_norm": NaN,
|
38899 |
+
"learning_rate": 1.0888981311200031e-06,
|
38900 |
+
"loss": 0.0,
|
38901 |
+
"step": 5556
|
38902 |
+
},
|
38903 |
+
{
|
38904 |
+
"epoch": 0.934656462871079,
|
38905 |
+
"grad_norm": NaN,
|
38906 |
+
"learning_rate": 1.083327157875952e-06,
|
38907 |
+
"loss": 0.0,
|
38908 |
+
"step": 5557
|
38909 |
+
},
|
38910 |
+
{
|
38911 |
+
"epoch": 0.9348246573038432,
|
38912 |
+
"grad_norm": NaN,
|
38913 |
+
"learning_rate": 1.0777703161107877e-06,
|
38914 |
+
"loss": 0.0,
|
38915 |
+
"step": 5558
|
38916 |
+
},
|
38917 |
+
{
|
38918 |
+
"epoch": 0.9349928517366075,
|
38919 |
+
"grad_norm": NaN,
|
38920 |
+
"learning_rate": 1.0722276074298154e-06,
|
38921 |
+
"loss": 0.0,
|
38922 |
+
"step": 5559
|
38923 |
+
},
|
38924 |
+
{
|
38925 |
+
"epoch": 0.9351610461693718,
|
38926 |
+
"grad_norm": NaN,
|
38927 |
+
"learning_rate": 1.0666990334342707e-06,
|
38928 |
+
"loss": 0.0,
|
38929 |
+
"step": 5560
|
38930 |
+
},
|
38931 |
+
{
|
38932 |
+
"epoch": 0.9353292406021361,
|
38933 |
+
"grad_norm": NaN,
|
38934 |
+
"learning_rate": 1.0611845957212873e-06,
|
38935 |
+
"loss": 0.0,
|
38936 |
+
"step": 5561
|
38937 |
+
},
|
38938 |
+
{
|
38939 |
+
"epoch": 0.9354974350349003,
|
38940 |
+
"grad_norm": NaN,
|
38941 |
+
"learning_rate": 1.0556842958839242e-06,
|
38942 |
+
"loss": 0.0,
|
38943 |
+
"step": 5562
|
38944 |
+
},
|
38945 |
+
{
|
38946 |
+
"epoch": 0.9356656294676646,
|
38947 |
+
"grad_norm": NaN,
|
38948 |
+
"learning_rate": 1.0501981355111656e-06,
|
38949 |
+
"loss": 0.0,
|
38950 |
+
"step": 5563
|
38951 |
+
},
|
38952 |
+
{
|
38953 |
+
"epoch": 0.9358338239004289,
|
38954 |
+
"grad_norm": NaN,
|
38955 |
+
"learning_rate": 1.0447261161878884e-06,
|
38956 |
+
"loss": 0.0,
|
38957 |
+
"step": 5564
|
38958 |
+
},
|
38959 |
+
{
|
38960 |
+
"epoch": 0.9360020183331932,
|
38961 |
+
"grad_norm": NaN,
|
38962 |
+
"learning_rate": 1.0392682394949116e-06,
|
38963 |
+
"loss": 0.0,
|
38964 |
+
"step": 5565
|
38965 |
+
},
|
38966 |
+
{
|
38967 |
+
"epoch": 0.9361702127659575,
|
38968 |
+
"grad_norm": NaN,
|
38969 |
+
"learning_rate": 1.0338245070089515e-06,
|
38970 |
+
"loss": 0.0,
|
38971 |
+
"step": 5566
|
38972 |
+
},
|
38973 |
+
{
|
38974 |
+
"epoch": 0.9363384071987217,
|
38975 |
+
"grad_norm": NaN,
|
38976 |
+
"learning_rate": 1.0283949203026332e-06,
|
38977 |
+
"loss": 0.0,
|
38978 |
+
"step": 5567
|
38979 |
+
},
|
38980 |
+
{
|
38981 |
+
"epoch": 0.936506601631486,
|
38982 |
+
"grad_norm": NaN,
|
38983 |
+
"learning_rate": 1.0229794809445081e-06,
|
38984 |
+
"loss": 0.0,
|
38985 |
+
"step": 5568
|
38986 |
+
},
|
38987 |
+
{
|
38988 |
+
"epoch": 0.9366747960642503,
|
38989 |
+
"grad_norm": NaN,
|
38990 |
+
"learning_rate": 1.0175781904990412e-06,
|
38991 |
+
"loss": 0.0,
|
38992 |
+
"step": 5569
|
38993 |
+
},
|
38994 |
+
{
|
38995 |
+
"epoch": 0.9368429904970146,
|
38996 |
+
"grad_norm": NaN,
|
38997 |
+
"learning_rate": 1.0121910505266008e-06,
|
38998 |
+
"loss": 0.0,
|
38999 |
+
"step": 5570
|
39000 |
+
},
|
39001 |
+
{
|
39002 |
+
"epoch": 0.9370111849297789,
|
39003 |
+
"grad_norm": NaN,
|
39004 |
+
"learning_rate": 1.00681806258347e-06,
|
39005 |
+
"loss": 0.0,
|
39006 |
+
"step": 5571
|
39007 |
+
},
|
39008 |
+
{
|
39009 |
+
"epoch": 0.9371793793625431,
|
39010 |
+
"grad_norm": NaN,
|
39011 |
+
"learning_rate": 1.0014592282218404e-06,
|
39012 |
+
"loss": 0.0,
|
39013 |
+
"step": 5572
|
39014 |
+
},
|
39015 |
+
{
|
39016 |
+
"epoch": 0.9373475737953074,
|
39017 |
+
"grad_norm": NaN,
|
39018 |
+
"learning_rate": 9.96114548989835e-07,
|
39019 |
+
"loss": 0.0,
|
39020 |
+
"step": 5573
|
39021 |
+
},
|
39022 |
+
{
|
39023 |
+
"epoch": 0.9375157682280717,
|
39024 |
+
"grad_norm": NaN,
|
39025 |
+
"learning_rate": 9.907840264314572e-07,
|
39026 |
+
"loss": 0.0,
|
39027 |
+
"step": 5574
|
39028 |
+
},
|
39029 |
+
{
|
39030 |
+
"epoch": 0.937683962660836,
|
39031 |
+
"grad_norm": NaN,
|
39032 |
+
"learning_rate": 9.854676620866366e-07,
|
39033 |
+
"loss": 0.0,
|
39034 |
+
"step": 5575
|
39035 |
+
},
|
39036 |
+
{
|
39037 |
+
"epoch": 0.9378521570936001,
|
39038 |
+
"grad_norm": NaN,
|
39039 |
+
"learning_rate": 9.80165457491211e-07,
|
39040 |
+
"loss": 0.0,
|
39041 |
+
"step": 5576
|
39042 |
+
},
|
39043 |
+
{
|
39044 |
+
"epoch": 0.9380203515263644,
|
39045 |
+
"grad_norm": NaN,
|
39046 |
+
"learning_rate": 9.74877414176939e-07,
|
39047 |
+
"loss": 0.0,
|
39048 |
+
"step": 5577
|
39049 |
+
},
|
39050 |
+
{
|
39051 |
+
"epoch": 0.9381885459591287,
|
39052 |
+
"grad_norm": NaN,
|
39053 |
+
"learning_rate": 9.696035336714648e-07,
|
39054 |
+
"loss": 0.0,
|
39055 |
+
"step": 5578
|
39056 |
+
},
|
39057 |
+
{
|
39058 |
+
"epoch": 0.938356740391893,
|
39059 |
+
"grad_norm": NaN,
|
39060 |
+
"learning_rate": 9.643438174983589e-07,
|
39061 |
+
"loss": 0.0,
|
39062 |
+
"step": 5579
|
39063 |
+
},
|
39064 |
+
{
|
39065 |
+
"epoch": 0.9385249348246573,
|
39066 |
+
"grad_norm": NaN,
|
39067 |
+
"learning_rate": 9.590982671770943e-07,
|
39068 |
+
"loss": 0.0,
|
39069 |
+
"step": 5580
|
39070 |
+
},
|
39071 |
+
{
|
39072 |
+
"epoch": 0.9386931292574215,
|
39073 |
+
"grad_norm": NaN,
|
39074 |
+
"learning_rate": 9.538668842230537e-07,
|
39075 |
+
"loss": 0.0,
|
39076 |
+
"step": 5581
|
39077 |
+
},
|
39078 |
+
{
|
39079 |
+
"epoch": 0.9388613236901858,
|
39080 |
+
"grad_norm": NaN,
|
39081 |
+
"learning_rate": 9.486496701475167e-07,
|
39082 |
+
"loss": 0.0,
|
39083 |
+
"step": 5582
|
39084 |
+
},
|
39085 |
+
{
|
39086 |
+
"epoch": 0.9390295181229501,
|
39087 |
+
"grad_norm": NaN,
|
39088 |
+
"learning_rate": 9.434466264576892e-07,
|
39089 |
+
"loss": 0.0,
|
39090 |
+
"step": 5583
|
39091 |
+
},
|
39092 |
+
{
|
39093 |
+
"epoch": 0.9391977125557144,
|
39094 |
+
"grad_norm": NaN,
|
39095 |
+
"learning_rate": 9.382577546566574e-07,
|
39096 |
+
"loss": 0.0,
|
39097 |
+
"step": 5584
|
39098 |
+
},
|
39099 |
+
{
|
39100 |
+
"epoch": 0.9393659069884787,
|
39101 |
+
"grad_norm": NaN,
|
39102 |
+
"learning_rate": 9.330830562434445e-07,
|
39103 |
+
"loss": 0.0,
|
39104 |
+
"step": 5585
|
39105 |
+
},
|
39106 |
+
{
|
39107 |
+
"epoch": 0.9395341014212429,
|
39108 |
+
"grad_norm": NaN,
|
39109 |
+
"learning_rate": 9.279225327129548e-07,
|
39110 |
+
"loss": 0.0,
|
39111 |
+
"step": 5586
|
39112 |
+
},
|
39113 |
+
{
|
39114 |
+
"epoch": 0.9397022958540072,
|
39115 |
+
"grad_norm": NaN,
|
39116 |
+
"learning_rate": 9.227761855560069e-07,
|
39117 |
+
"loss": 0.0,
|
39118 |
+
"step": 5587
|
39119 |
+
},
|
39120 |
+
{
|
39121 |
+
"epoch": 0.9398704902867715,
|
39122 |
+
"grad_norm": NaN,
|
39123 |
+
"learning_rate": 9.176440162593169e-07,
|
39124 |
+
"loss": 0.0,
|
39125 |
+
"step": 5588
|
39126 |
+
},
|
39127 |
+
{
|
39128 |
+
"epoch": 0.9400386847195358,
|
39129 |
+
"grad_norm": NaN,
|
39130 |
+
"learning_rate": 9.125260263055213e-07,
|
39131 |
+
"loss": 0.0,
|
39132 |
+
"step": 5589
|
39133 |
+
},
|
39134 |
+
{
|
39135 |
+
"epoch": 0.9402068791523001,
|
39136 |
+
"grad_norm": NaN,
|
39137 |
+
"learning_rate": 9.074222171731427e-07,
|
39138 |
+
"loss": 0.0,
|
39139 |
+
"step": 5590
|
39140 |
+
},
|
39141 |
+
{
|
39142 |
+
"epoch": 0.9403750735850643,
|
39143 |
+
"grad_norm": NaN,
|
39144 |
+
"learning_rate": 9.023325903366242e-07,
|
39145 |
+
"loss": 0.0,
|
39146 |
+
"step": 5591
|
39147 |
+
},
|
39148 |
+
{
|
39149 |
+
"epoch": 0.9405432680178286,
|
39150 |
+
"grad_norm": NaN,
|
39151 |
+
"learning_rate": 8.972571472662838e-07,
|
39152 |
+
"loss": 0.0,
|
39153 |
+
"step": 5592
|
39154 |
+
},
|
39155 |
+
{
|
39156 |
+
"epoch": 0.9407114624505929,
|
39157 |
+
"grad_norm": NaN,
|
39158 |
+
"learning_rate": 8.921958894283767e-07,
|
39159 |
+
"loss": 0.0,
|
39160 |
+
"step": 5593
|
39161 |
+
},
|
39162 |
+
{
|
39163 |
+
"epoch": 0.9408796568833572,
|
39164 |
+
"grad_norm": NaN,
|
39165 |
+
"learning_rate": 8.871488182850441e-07,
|
39166 |
+
"loss": 0.0,
|
39167 |
+
"step": 5594
|
39168 |
+
},
|
39169 |
+
{
|
39170 |
+
"epoch": 0.9410478513161215,
|
39171 |
+
"grad_norm": NaN,
|
39172 |
+
"learning_rate": 8.821159352943143e-07,
|
39173 |
+
"loss": 0.0,
|
39174 |
+
"step": 5595
|
39175 |
+
},
|
39176 |
+
{
|
39177 |
+
"epoch": 0.9412160457488857,
|
39178 |
+
"grad_norm": NaN,
|
39179 |
+
"learning_rate": 8.770972419101464e-07,
|
39180 |
+
"loss": 0.0,
|
39181 |
+
"step": 5596
|
39182 |
+
},
|
39183 |
+
{
|
39184 |
+
"epoch": 0.94138424018165,
|
39185 |
+
"grad_norm": NaN,
|
39186 |
+
"learning_rate": 8.720927395823697e-07,
|
39187 |
+
"loss": 0.0,
|
39188 |
+
"step": 5597
|
39189 |
+
},
|
39190 |
+
{
|
39191 |
+
"epoch": 0.9415524346144143,
|
39192 |
+
"grad_norm": NaN,
|
39193 |
+
"learning_rate": 8.671024297567388e-07,
|
39194 |
+
"loss": 0.0,
|
39195 |
+
"step": 5598
|
39196 |
+
},
|
39197 |
+
{
|
39198 |
+
"epoch": 0.9417206290471786,
|
39199 |
+
"grad_norm": NaN,
|
39200 |
+
"learning_rate": 8.621263138749002e-07,
|
39201 |
+
"loss": 0.0,
|
39202 |
+
"step": 5599
|
39203 |
+
},
|
39204 |
+
{
|
39205 |
+
"epoch": 0.9418888234799428,
|
39206 |
+
"grad_norm": NaN,
|
39207 |
+
"learning_rate": 8.571643933743879e-07,
|
39208 |
+
"loss": 0.0,
|
39209 |
+
"step": 5600
|
39210 |
+
},
|
39211 |
+
{
|
39212 |
+
"epoch": 0.9420570179127071,
|
39213 |
+
"grad_norm": NaN,
|
39214 |
+
"learning_rate": 8.522166696886547e-07,
|
39215 |
+
"loss": 0.0,
|
39216 |
+
"step": 5601
|
39217 |
+
},
|
39218 |
+
{
|
39219 |
+
"epoch": 0.9422252123454714,
|
39220 |
+
"grad_norm": NaN,
|
39221 |
+
"learning_rate": 8.472831442470408e-07,
|
39222 |
+
"loss": 0.0,
|
39223 |
+
"step": 5602
|
39224 |
+
},
|
39225 |
+
{
|
39226 |
+
"epoch": 0.9423934067782357,
|
39227 |
+
"grad_norm": NaN,
|
39228 |
+
"learning_rate": 8.423638184747784e-07,
|
39229 |
+
"loss": 0.0,
|
39230 |
+
"step": 5603
|
39231 |
+
},
|
39232 |
+
{
|
39233 |
+
"epoch": 0.942561601211,
|
39234 |
+
"grad_norm": NaN,
|
39235 |
+
"learning_rate": 8.374586937930196e-07,
|
39236 |
+
"loss": 0.0,
|
39237 |
+
"step": 5604
|
39238 |
+
},
|
39239 |
+
{
|
39240 |
+
"epoch": 0.9427297956437642,
|
39241 |
+
"grad_norm": NaN,
|
39242 |
+
"learning_rate": 8.325677716187807e-07,
|
39243 |
+
"loss": 0.0,
|
39244 |
+
"step": 5605
|
39245 |
+
},
|
39246 |
+
{
|
39247 |
+
"epoch": 0.9428979900765284,
|
39248 |
+
"grad_norm": NaN,
|
39249 |
+
"learning_rate": 8.276910533650151e-07,
|
39250 |
+
"loss": 0.0,
|
39251 |
+
"step": 5606
|
39252 |
+
},
|
39253 |
+
{
|
39254 |
+
"epoch": 0.9430661845092927,
|
39255 |
+
"grad_norm": NaN,
|
39256 |
+
"learning_rate": 8.2282854044054e-07,
|
39257 |
+
"loss": 0.0,
|
39258 |
+
"step": 5607
|
39259 |
+
},
|
39260 |
+
{
|
39261 |
+
"epoch": 0.943234378942057,
|
39262 |
+
"grad_norm": NaN,
|
39263 |
+
"learning_rate": 8.179802342500876e-07,
|
39264 |
+
"loss": 0.0,
|
39265 |
+
"step": 5608
|
39266 |
+
},
|
39267 |
+
{
|
39268 |
+
"epoch": 0.9434025733748213,
|
39269 |
+
"grad_norm": NaN,
|
39270 |
+
"learning_rate": 8.13146136194265e-07,
|
39271 |
+
"loss": 0.0,
|
39272 |
+
"step": 5609
|
39273 |
+
},
|
39274 |
+
{
|
39275 |
+
"epoch": 0.9435707678075855,
|
39276 |
+
"grad_norm": NaN,
|
39277 |
+
"learning_rate": 8.083262476696051e-07,
|
39278 |
+
"loss": 0.0,
|
39279 |
+
"step": 5610
|
39280 |
+
},
|
39281 |
+
{
|
39282 |
+
"epoch": 0.9437389622403498,
|
39283 |
+
"grad_norm": NaN,
|
39284 |
+
"learning_rate": 8.035205700685167e-07,
|
39285 |
+
"loss": 0.0,
|
39286 |
+
"step": 5611
|
39287 |
+
},
|
39288 |
+
{
|
39289 |
+
"epoch": 0.9439071566731141,
|
39290 |
+
"grad_norm": NaN,
|
39291 |
+
"learning_rate": 7.987291047793056e-07,
|
39292 |
+
"loss": 0.0,
|
39293 |
+
"step": 5612
|
39294 |
+
},
|
39295 |
+
{
|
39296 |
+
"epoch": 0.9440753511058784,
|
39297 |
+
"grad_norm": NaN,
|
39298 |
+
"learning_rate": 7.93951853186159e-07,
|
39299 |
+
"loss": 0.0,
|
39300 |
+
"step": 5613
|
39301 |
+
},
|
39302 |
+
{
|
39303 |
+
"epoch": 0.9442435455386426,
|
39304 |
+
"grad_norm": NaN,
|
39305 |
+
"learning_rate": 7.891888166691952e-07,
|
39306 |
+
"loss": 0.0,
|
39307 |
+
"step": 5614
|
39308 |
+
},
|
39309 |
+
{
|
39310 |
+
"epoch": 0.9444117399714069,
|
39311 |
+
"grad_norm": NaN,
|
39312 |
+
"learning_rate": 7.844399966043802e-07,
|
39313 |
+
"loss": 0.0,
|
39314 |
+
"step": 5615
|
39315 |
+
},
|
39316 |
+
{
|
39317 |
+
"epoch": 0.9445799344041712,
|
39318 |
+
"grad_norm": NaN,
|
39319 |
+
"learning_rate": 7.797053943636112e-07,
|
39320 |
+
"loss": 0.0,
|
39321 |
+
"step": 5616
|
39322 |
+
},
|
39323 |
+
{
|
39324 |
+
"epoch": 0.9447481288369355,
|
39325 |
+
"grad_norm": NaN,
|
39326 |
+
"learning_rate": 7.749850113146551e-07,
|
39327 |
+
"loss": 0.0,
|
39328 |
+
"step": 5617
|
39329 |
+
},
|
39330 |
+
{
|
39331 |
+
"epoch": 0.9449163232696998,
|
39332 |
+
"grad_norm": NaN,
|
39333 |
+
"learning_rate": 7.70278848821171e-07,
|
39334 |
+
"loss": 0.0,
|
39335 |
+
"step": 5618
|
39336 |
+
},
|
39337 |
+
{
|
39338 |
+
"epoch": 0.945084517702464,
|
39339 |
+
"grad_norm": NaN,
|
39340 |
+
"learning_rate": 7.655869082427269e-07,
|
39341 |
+
"loss": 0.0,
|
39342 |
+
"step": 5619
|
39343 |
+
},
|
39344 |
+
{
|
39345 |
+
"epoch": 0.9452527121352283,
|
39346 |
+
"grad_norm": NaN,
|
39347 |
+
"learning_rate": 7.609091909347721e-07,
|
39348 |
+
"loss": 0.0,
|
39349 |
+
"step": 5620
|
39350 |
+
},
|
39351 |
+
{
|
39352 |
+
"epoch": 0.9454209065679926,
|
39353 |
+
"grad_norm": NaN,
|
39354 |
+
"learning_rate": 7.56245698248631e-07,
|
39355 |
+
"loss": 0.0,
|
39356 |
+
"step": 5621
|
39357 |
+
},
|
39358 |
+
{
|
39359 |
+
"epoch": 0.9455891010007569,
|
39360 |
+
"grad_norm": NaN,
|
39361 |
+
"learning_rate": 7.515964315315538e-07,
|
39362 |
+
"loss": 0.0,
|
39363 |
+
"step": 5622
|
39364 |
+
},
|
39365 |
+
{
|
39366 |
+
"epoch": 0.9457572954335212,
|
39367 |
+
"grad_norm": NaN,
|
39368 |
+
"learning_rate": 7.46961392126655e-07,
|
39369 |
+
"loss": 0.0,
|
39370 |
+
"step": 5623
|
39371 |
+
},
|
39372 |
+
{
|
39373 |
+
"epoch": 0.9459254898662854,
|
39374 |
+
"grad_norm": NaN,
|
39375 |
+
"learning_rate": 7.423405813729467e-07,
|
39376 |
+
"loss": 0.0,
|
39377 |
+
"step": 5624
|
39378 |
+
},
|
39379 |
+
{
|
39380 |
+
"epoch": 0.9460936842990497,
|
39381 |
+
"grad_norm": NaN,
|
39382 |
+
"learning_rate": 7.377340006053169e-07,
|
39383 |
+
"loss": 0.0,
|
39384 |
+
"step": 5625
|
39385 |
+
},
|
39386 |
+
{
|
39387 |
+
"epoch": 0.946261878731814,
|
39388 |
+
"grad_norm": NaN,
|
39389 |
+
"learning_rate": 7.33141651154573e-07,
|
39390 |
+
"loss": 0.0,
|
39391 |
+
"step": 5626
|
39392 |
+
},
|
39393 |
+
{
|
39394 |
+
"epoch": 0.9464300731645783,
|
39395 |
+
"grad_norm": NaN,
|
39396 |
+
"learning_rate": 7.285635343473818e-07,
|
39397 |
+
"loss": 0.0,
|
39398 |
+
"step": 5627
|
39399 |
+
},
|
39400 |
+
{
|
39401 |
+
"epoch": 0.9465982675973426,
|
39402 |
+
"grad_norm": NaN,
|
39403 |
+
"learning_rate": 7.239996515063186e-07,
|
39404 |
+
"loss": 0.0,
|
39405 |
+
"step": 5628
|
39406 |
+
},
|
39407 |
+
{
|
39408 |
+
"epoch": 0.9467664620301068,
|
39409 |
+
"grad_norm": NaN,
|
39410 |
+
"learning_rate": 7.194500039498286e-07,
|
39411 |
+
"loss": 0.0,
|
39412 |
+
"step": 5629
|
39413 |
+
},
|
39414 |
+
{
|
39415 |
+
"epoch": 0.9469346564628711,
|
39416 |
+
"grad_norm": NaN,
|
39417 |
+
"learning_rate": 7.149145929922607e-07,
|
39418 |
+
"loss": 0.0,
|
39419 |
+
"step": 5630
|
39420 |
+
},
|
39421 |
+
{
|
39422 |
+
"epoch": 0.9471028508956354,
|
39423 |
+
"grad_norm": NaN,
|
39424 |
+
"learning_rate": 7.103934199438444e-07,
|
39425 |
+
"loss": 0.0,
|
39426 |
+
"step": 5631
|
39427 |
+
},
|
39428 |
+
{
|
39429 |
+
"epoch": 0.9472710453283997,
|
39430 |
+
"grad_norm": NaN,
|
39431 |
+
"learning_rate": 7.058864861106907e-07,
|
39432 |
+
"loss": 0.0,
|
39433 |
+
"step": 5632
|
39434 |
+
},
|
39435 |
+
{
|
39436 |
+
"epoch": 0.947439239761164,
|
39437 |
+
"grad_norm": NaN,
|
39438 |
+
"learning_rate": 7.013937927948022e-07,
|
39439 |
+
"loss": 0.0,
|
39440 |
+
"step": 5633
|
39441 |
+
},
|
39442 |
+
{
|
39443 |
+
"epoch": 0.9476074341939282,
|
39444 |
+
"grad_norm": NaN,
|
39445 |
+
"learning_rate": 6.969153412940743e-07,
|
39446 |
+
"loss": 0.0,
|
39447 |
+
"step": 5634
|
39448 |
+
},
|
39449 |
+
{
|
39450 |
+
"epoch": 0.9477756286266924,
|
39451 |
+
"grad_norm": NaN,
|
39452 |
+
"learning_rate": 6.924511329022831e-07,
|
39453 |
+
"loss": 0.0,
|
39454 |
+
"step": 5635
|
39455 |
+
},
|
39456 |
+
{
|
39457 |
+
"epoch": 0.9479438230594567,
|
39458 |
+
"grad_norm": NaN,
|
39459 |
+
"learning_rate": 6.880011689090804e-07,
|
39460 |
+
"loss": 0.0,
|
39461 |
+
"step": 5636
|
39462 |
+
},
|
39463 |
+
{
|
39464 |
+
"epoch": 0.948112017492221,
|
39465 |
+
"grad_norm": NaN,
|
39466 |
+
"learning_rate": 6.8356545060001e-07,
|
39467 |
+
"loss": 0.0,
|
39468 |
+
"step": 5637
|
39469 |
+
},
|
39470 |
+
{
|
39471 |
+
"epoch": 0.9482802119249852,
|
39472 |
+
"grad_norm": NaN,
|
39473 |
+
"learning_rate": 6.79143979256508e-07,
|
39474 |
+
"loss": 0.0,
|
39475 |
+
"step": 5638
|
39476 |
+
},
|
39477 |
+
{
|
39478 |
+
"epoch": 0.9484484063577495,
|
39479 |
+
"grad_norm": NaN,
|
39480 |
+
"learning_rate": 6.747367561558859e-07,
|
39481 |
+
"loss": 0.0,
|
39482 |
+
"step": 5639
|
39483 |
+
},
|
39484 |
+
{
|
39485 |
+
"epoch": 0.9486166007905138,
|
39486 |
+
"grad_norm": NaN,
|
39487 |
+
"learning_rate": 6.703437825713421e-07,
|
39488 |
+
"loss": 0.0,
|
39489 |
+
"step": 5640
|
39490 |
+
},
|
39491 |
+
{
|
39492 |
+
"epoch": 0.9487847952232781,
|
39493 |
+
"grad_norm": NaN,
|
39494 |
+
"learning_rate": 6.659650597719502e-07,
|
39495 |
+
"loss": 0.0,
|
39496 |
+
"step": 5641
|
39497 |
+
},
|
39498 |
+
{
|
39499 |
+
"epoch": 0.9489529896560424,
|
39500 |
+
"grad_norm": NaN,
|
39501 |
+
"learning_rate": 6.616005890226817e-07,
|
39502 |
+
"loss": 0.0,
|
39503 |
+
"step": 5642
|
39504 |
+
},
|
39505 |
+
{
|
39506 |
+
"epoch": 0.9491211840888066,
|
39507 |
+
"grad_norm": NaN,
|
39508 |
+
"learning_rate": 6.572503715843836e-07,
|
39509 |
+
"loss": 0.0,
|
39510 |
+
"step": 5643
|
39511 |
+
},
|
39512 |
+
{
|
39513 |
+
"epoch": 0.9492893785215709,
|
39514 |
+
"grad_norm": NaN,
|
39515 |
+
"learning_rate": 6.52914408713784e-07,
|
39516 |
+
"loss": 0.0,
|
39517 |
+
"step": 5644
|
39518 |
+
},
|
39519 |
+
{
|
39520 |
+
"epoch": 0.9494575729543352,
|
39521 |
+
"grad_norm": NaN,
|
39522 |
+
"learning_rate": 6.485927016634863e-07,
|
39523 |
+
"loss": 0.0,
|
39524 |
+
"step": 5645
|
39525 |
+
},
|
39526 |
+
{
|
39527 |
+
"epoch": 0.9496257673870995,
|
39528 |
+
"grad_norm": NaN,
|
39529 |
+
"learning_rate": 6.44285251681992e-07,
|
39530 |
+
"loss": 0.0,
|
39531 |
+
"step": 5646
|
39532 |
+
},
|
39533 |
+
{
|
39534 |
+
"epoch": 0.9497939618198638,
|
39535 |
+
"grad_norm": NaN,
|
39536 |
+
"learning_rate": 6.399920600136722e-07,
|
39537 |
+
"loss": 0.0,
|
39538 |
+
"step": 5647
|
39539 |
+
},
|
39540 |
+
{
|
39541 |
+
"epoch": 0.949962156252628,
|
39542 |
+
"grad_norm": NaN,
|
39543 |
+
"learning_rate": 6.357131278987849e-07,
|
39544 |
+
"loss": 0.0,
|
39545 |
+
"step": 5648
|
39546 |
+
},
|
39547 |
+
{
|
39548 |
+
"epoch": 0.9501303506853923,
|
39549 |
+
"grad_norm": NaN,
|
39550 |
+
"learning_rate": 6.314484565734636e-07,
|
39551 |
+
"loss": 0.0,
|
39552 |
+
"step": 5649
|
39553 |
+
},
|
39554 |
+
{
|
39555 |
+
"epoch": 0.9502985451181566,
|
39556 |
+
"grad_norm": NaN,
|
39557 |
+
"learning_rate": 6.271980472697225e-07,
|
39558 |
+
"loss": 0.0,
|
39559 |
+
"step": 5650
|
39560 |
+
},
|
39561 |
+
{
|
39562 |
+
"epoch": 0.9504667395509209,
|
39563 |
+
"grad_norm": NaN,
|
39564 |
+
"learning_rate": 6.229619012154575e-07,
|
39565 |
+
"loss": 0.0,
|
39566 |
+
"step": 5651
|
39567 |
+
},
|
39568 |
+
{
|
39569 |
+
"epoch": 0.9506349339836851,
|
39570 |
+
"grad_norm": NaN,
|
39571 |
+
"learning_rate": 6.18740019634445e-07,
|
39572 |
+
"loss": 0.0,
|
39573 |
+
"step": 5652
|
39574 |
+
},
|
39575 |
+
{
|
39576 |
+
"epoch": 0.9508031284164494,
|
39577 |
+
"grad_norm": NaN,
|
39578 |
+
"learning_rate": 6.145324037463429e-07,
|
39579 |
+
"loss": 0.0,
|
39580 |
+
"step": 5653
|
39581 |
+
},
|
39582 |
+
{
|
39583 |
+
"epoch": 0.9509713228492137,
|
39584 |
+
"grad_norm": NaN,
|
39585 |
+
"learning_rate": 6.103390547666788e-07,
|
39586 |
+
"loss": 0.0,
|
39587 |
+
"step": 5654
|
39588 |
+
},
|
39589 |
+
{
|
39590 |
+
"epoch": 0.951139517281978,
|
39591 |
+
"grad_norm": NaN,
|
39592 |
+
"learning_rate": 6.061599739068668e-07,
|
39593 |
+
"loss": 0.0,
|
39594 |
+
"step": 5655
|
39595 |
+
},
|
39596 |
+
{
|
39597 |
+
"epoch": 0.9513077117147423,
|
39598 |
+
"grad_norm": NaN,
|
39599 |
+
"learning_rate": 6.019951623741916e-07,
|
39600 |
+
"loss": 0.0,
|
39601 |
+
"step": 5656
|
39602 |
+
},
|
39603 |
+
{
|
39604 |
+
"epoch": 0.9514759061475065,
|
39605 |
+
"grad_norm": NaN,
|
39606 |
+
"learning_rate": 5.978446213718291e-07,
|
39607 |
+
"loss": 0.0,
|
39608 |
+
"step": 5657
|
39609 |
+
},
|
39610 |
+
{
|
39611 |
+
"epoch": 0.9516441005802708,
|
39612 |
+
"grad_norm": NaN,
|
39613 |
+
"learning_rate": 5.937083520988151e-07,
|
39614 |
+
"loss": 0.0,
|
39615 |
+
"step": 5658
|
39616 |
+
},
|
39617 |
+
{
|
39618 |
+
"epoch": 0.9518122950130351,
|
39619 |
+
"grad_norm": NaN,
|
39620 |
+
"learning_rate": 5.895863557500769e-07,
|
39621 |
+
"loss": 0.0,
|
39622 |
+
"step": 5659
|
39623 |
+
},
|
39624 |
+
{
|
39625 |
+
"epoch": 0.9519804894457994,
|
39626 |
+
"grad_norm": NaN,
|
39627 |
+
"learning_rate": 5.85478633516412e-07,
|
39628 |
+
"loss": 0.0,
|
39629 |
+
"step": 5660
|
39630 |
+
},
|
39631 |
+
{
|
39632 |
+
"epoch": 0.9521486838785637,
|
39633 |
+
"grad_norm": NaN,
|
39634 |
+
"learning_rate": 5.813851865844988e-07,
|
39635 |
+
"loss": 0.0,
|
39636 |
+
"step": 5661
|
39637 |
+
},
|
39638 |
+
{
|
39639 |
+
"epoch": 0.9523168783113279,
|
39640 |
+
"grad_norm": NaN,
|
39641 |
+
"learning_rate": 5.773060161368804e-07,
|
39642 |
+
"loss": 0.0,
|
39643 |
+
"step": 5662
|
39644 |
+
},
|
39645 |
+
{
|
39646 |
+
"epoch": 0.9524850727440922,
|
39647 |
+
"grad_norm": NaN,
|
39648 |
+
"learning_rate": 5.732411233519919e-07,
|
39649 |
+
"loss": 0.0,
|
39650 |
+
"step": 5663
|
39651 |
+
},
|
39652 |
+
{
|
39653 |
+
"epoch": 0.9526532671768565,
|
39654 |
+
"grad_norm": NaN,
|
39655 |
+
"learning_rate": 5.691905094041272e-07,
|
39656 |
+
"loss": 0.0,
|
39657 |
+
"step": 5664
|
39658 |
+
},
|
39659 |
+
{
|
39660 |
+
"epoch": 0.9528214616096207,
|
39661 |
+
"grad_norm": NaN,
|
39662 |
+
"learning_rate": 5.651541754634726e-07,
|
39663 |
+
"loss": 0.0,
|
39664 |
+
"step": 5665
|
39665 |
+
},
|
39666 |
+
{
|
39667 |
+
"epoch": 0.952989656042385,
|
39668 |
+
"grad_norm": NaN,
|
39669 |
+
"learning_rate": 5.611321226960675e-07,
|
39670 |
+
"loss": 0.0,
|
39671 |
+
"step": 5666
|
39672 |
+
},
|
39673 |
+
{
|
39674 |
+
"epoch": 0.9531578504751492,
|
39675 |
+
"grad_norm": NaN,
|
39676 |
+
"learning_rate": 5.571243522638547e-07,
|
39677 |
+
"loss": 0.0,
|
39678 |
+
"step": 5667
|
39679 |
+
},
|
39680 |
+
{
|
39681 |
+
"epoch": 0.9533260449079135,
|
39682 |
+
"grad_norm": NaN,
|
39683 |
+
"learning_rate": 5.531308653246192e-07,
|
39684 |
+
"loss": 0.0,
|
39685 |
+
"step": 5668
|
39686 |
+
},
|
39687 |
+
{
|
39688 |
+
"epoch": 0.9534942393406778,
|
39689 |
+
"grad_norm": NaN,
|
39690 |
+
"learning_rate": 5.491516630320381e-07,
|
39691 |
+
"loss": 0.0,
|
39692 |
+
"step": 5669
|
39693 |
+
},
|
39694 |
+
{
|
39695 |
+
"epoch": 0.9536624337734421,
|
39696 |
+
"grad_norm": NaN,
|
39697 |
+
"learning_rate": 5.451867465356641e-07,
|
39698 |
+
"loss": 0.0,
|
39699 |
+
"step": 5670
|
39700 |
+
},
|
39701 |
+
{
|
39702 |
+
"epoch": 0.9538306282062063,
|
39703 |
+
"grad_norm": NaN,
|
39704 |
+
"learning_rate": 5.412361169809088e-07,
|
39705 |
+
"loss": 0.0,
|
39706 |
+
"step": 5671
|
39707 |
+
},
|
39708 |
+
{
|
39709 |
+
"epoch": 0.9539988226389706,
|
39710 |
+
"grad_norm": NaN,
|
39711 |
+
"learning_rate": 5.372997755090759e-07,
|
39712 |
+
"loss": 0.0,
|
39713 |
+
"step": 5672
|
39714 |
+
},
|
39715 |
+
{
|
39716 |
+
"epoch": 0.9541670170717349,
|
39717 |
+
"grad_norm": NaN,
|
39718 |
+
"learning_rate": 5.333777232573223e-07,
|
39719 |
+
"loss": 0.0,
|
39720 |
+
"step": 5673
|
39721 |
+
},
|
39722 |
+
{
|
39723 |
+
"epoch": 0.9543352115044992,
|
39724 |
+
"grad_norm": NaN,
|
39725 |
+
"learning_rate": 5.294699613586862e-07,
|
39726 |
+
"loss": 0.0,
|
39727 |
+
"step": 5674
|
39728 |
+
},
|
39729 |
+
{
|
39730 |
+
"epoch": 0.9545034059372635,
|
39731 |
+
"grad_norm": NaN,
|
39732 |
+
"learning_rate": 5.255764909420757e-07,
|
39733 |
+
"loss": 0.0,
|
39734 |
+
"step": 5675
|
39735 |
+
},
|
39736 |
+
{
|
39737 |
+
"epoch": 0.9546716003700277,
|
39738 |
+
"grad_norm": NaN,
|
39739 |
+
"learning_rate": 5.216973131322689e-07,
|
39740 |
+
"loss": 0.0,
|
39741 |
+
"step": 5676
|
39742 |
+
},
|
39743 |
+
{
|
39744 |
+
"epoch": 0.954839794802792,
|
39745 |
+
"grad_norm": NaN,
|
39746 |
+
"learning_rate": 5.178324290499248e-07,
|
39747 |
+
"loss": 0.0,
|
39748 |
+
"step": 5677
|
39749 |
+
},
|
39750 |
+
{
|
39751 |
+
"epoch": 0.9550079892355563,
|
39752 |
+
"grad_norm": NaN,
|
39753 |
+
"learning_rate": 5.139818398115559e-07,
|
39754 |
+
"loss": 0.0,
|
39755 |
+
"step": 5678
|
39756 |
+
},
|
39757 |
+
{
|
39758 |
+
"epoch": 0.9551761836683206,
|
39759 |
+
"grad_norm": NaN,
|
39760 |
+
"learning_rate": 5.101455465295557e-07,
|
39761 |
+
"loss": 0.0,
|
39762 |
+
"step": 5679
|
39763 |
+
},
|
39764 |
+
{
|
39765 |
+
"epoch": 0.9553443781010849,
|
39766 |
+
"grad_norm": NaN,
|
39767 |
+
"learning_rate": 5.063235503121933e-07,
|
39768 |
+
"loss": 0.0,
|
39769 |
+
"step": 5680
|
39770 |
+
},
|
39771 |
+
{
|
39772 |
+
"epoch": 0.9555125725338491,
|
39773 |
+
"grad_norm": NaN,
|
39774 |
+
"learning_rate": 5.025158522635964e-07,
|
39775 |
+
"loss": 0.0,
|
39776 |
+
"step": 5681
|
39777 |
+
},
|
39778 |
+
{
|
39779 |
+
"epoch": 0.9556807669666134,
|
39780 |
+
"grad_norm": NaN,
|
39781 |
+
"learning_rate": 4.987224534837631e-07,
|
39782 |
+
"loss": 0.0,
|
39783 |
+
"step": 5682
|
39784 |
+
},
|
39785 |
+
{
|
39786 |
+
"epoch": 0.9558489613993777,
|
39787 |
+
"grad_norm": NaN,
|
39788 |
+
"learning_rate": 4.949433550685722e-07,
|
39789 |
+
"loss": 0.0,
|
39790 |
+
"step": 5683
|
39791 |
+
},
|
39792 |
+
{
|
39793 |
+
"epoch": 0.956017155832142,
|
39794 |
+
"grad_norm": NaN,
|
39795 |
+
"learning_rate": 4.911785581097561e-07,
|
39796 |
+
"loss": 0.0,
|
39797 |
+
"step": 5684
|
39798 |
+
},
|
39799 |
+
{
|
39800 |
+
"epoch": 0.9561853502649063,
|
39801 |
+
"grad_norm": NaN,
|
39802 |
+
"learning_rate": 4.874280636949225e-07,
|
39803 |
+
"loss": 0.0,
|
39804 |
+
"step": 5685
|
39805 |
+
},
|
39806 |
+
{
|
39807 |
+
"epoch": 0.9563535446976705,
|
39808 |
+
"grad_norm": NaN,
|
39809 |
+
"learning_rate": 4.836918729075435e-07,
|
39810 |
+
"loss": 0.0,
|
39811 |
+
"step": 5686
|
39812 |
+
},
|
39813 |
+
{
|
39814 |
+
"epoch": 0.9565217391304348,
|
39815 |
+
"grad_norm": NaN,
|
39816 |
+
"learning_rate": 4.79969986826978e-07,
|
39817 |
+
"loss": 0.0,
|
39818 |
+
"step": 5687
|
39819 |
+
},
|
39820 |
+
{
|
39821 |
+
"epoch": 0.9566899335631991,
|
39822 |
+
"grad_norm": NaN,
|
39823 |
+
"learning_rate": 4.7626240652842155e-07,
|
39824 |
+
"loss": 0.0,
|
39825 |
+
"step": 5688
|
39826 |
+
},
|
39827 |
+
{
|
39828 |
+
"epoch": 0.9568581279959634,
|
39829 |
+
"grad_norm": NaN,
|
39830 |
+
"learning_rate": 4.7256913308295627e-07,
|
39831 |
+
"loss": 0.0,
|
39832 |
+
"step": 5689
|
39833 |
+
},
|
39834 |
+
{
|
39835 |
+
"epoch": 0.9570263224287276,
|
39836 |
+
"grad_norm": NaN,
|
39837 |
+
"learning_rate": 4.688901675575341e-07,
|
39838 |
+
"loss": 0.0,
|
39839 |
+
"step": 5690
|
39840 |
+
},
|
39841 |
+
{
|
39842 |
+
"epoch": 0.9571945168614919,
|
39843 |
+
"grad_norm": NaN,
|
39844 |
+
"learning_rate": 4.6522551101496057e-07,
|
39845 |
+
"loss": 0.0,
|
39846 |
+
"step": 5691
|
39847 |
+
},
|
39848 |
+
{
|
39849 |
+
"epoch": 0.9573627112942562,
|
39850 |
+
"grad_norm": NaN,
|
39851 |
+
"learning_rate": 4.6157516451391656e-07,
|
39852 |
+
"loss": 0.0,
|
39853 |
+
"step": 5692
|
39854 |
+
},
|
39855 |
+
{
|
39856 |
+
"epoch": 0.9575309057270205,
|
39857 |
+
"grad_norm": NaN,
|
39858 |
+
"learning_rate": 4.579391291089419e-07,
|
39859 |
+
"loss": 0.0,
|
39860 |
+
"step": 5693
|
39861 |
+
},
|
39862 |
+
{
|
39863 |
+
"epoch": 0.9576991001597848,
|
39864 |
+
"grad_norm": NaN,
|
39865 |
+
"learning_rate": 4.543174058504518e-07,
|
39866 |
+
"loss": 0.0,
|
39867 |
+
"step": 5694
|
39868 |
+
},
|
39869 |
+
{
|
39870 |
+
"epoch": 0.9578672945925489,
|
39871 |
+
"grad_norm": NaN,
|
39872 |
+
"learning_rate": 4.507099957847205e-07,
|
39873 |
+
"loss": 0.0,
|
39874 |
+
"step": 5695
|
39875 |
+
},
|
39876 |
+
{
|
39877 |
+
"epoch": 0.9580354890253132,
|
39878 |
+
"grad_norm": NaN,
|
39879 |
+
"learning_rate": 4.4711689995389216e-07,
|
39880 |
+
"loss": 0.0,
|
39881 |
+
"step": 5696
|
39882 |
+
},
|
39883 |
+
{
|
39884 |
+
"epoch": 0.9582036834580775,
|
39885 |
+
"grad_norm": NaN,
|
39886 |
+
"learning_rate": 4.435381193959587e-07,
|
39887 |
+
"loss": 0.0,
|
39888 |
+
"step": 5697
|
39889 |
+
},
|
39890 |
+
{
|
39891 |
+
"epoch": 0.9583718778908418,
|
39892 |
+
"grad_norm": NaN,
|
39893 |
+
"learning_rate": 4.3997365514480416e-07,
|
39894 |
+
"loss": 0.0,
|
39895 |
+
"step": 5698
|
39896 |
+
},
|
39897 |
+
{
|
39898 |
+
"epoch": 0.958540072323606,
|
39899 |
+
"grad_norm": NaN,
|
39900 |
+
"learning_rate": 4.364235082301549e-07,
|
39901 |
+
"loss": 0.0,
|
39902 |
+
"step": 5699
|
39903 |
+
},
|
39904 |
+
{
|
39905 |
+
"epoch": 0.9587082667563703,
|
39906 |
+
"grad_norm": NaN,
|
39907 |
+
"learning_rate": 4.3288767967760715e-07,
|
39908 |
+
"loss": 0.0,
|
39909 |
+
"step": 5700
|
39910 |
+
},
|
39911 |
+
{
|
39912 |
+
"epoch": 0.9588764611891346,
|
39913 |
+
"grad_norm": NaN,
|
39914 |
+
"learning_rate": 4.293661705086327e-07,
|
39915 |
+
"loss": 0.0,
|
39916 |
+
"step": 5701
|
39917 |
+
},
|
39918 |
+
{
|
39919 |
+
"epoch": 0.9590446556218989,
|
39920 |
+
"grad_norm": NaN,
|
39921 |
+
"learning_rate": 4.258589817405401e-07,
|
39922 |
+
"loss": 0.0,
|
39923 |
+
"step": 5702
|
39924 |
+
},
|
39925 |
+
{
|
39926 |
+
"epoch": 0.9592128500546632,
|
39927 |
+
"grad_norm": NaN,
|
39928 |
+
"learning_rate": 4.2236611438652986e-07,
|
39929 |
+
"loss": 0.0,
|
39930 |
+
"step": 5703
|
39931 |
+
},
|
39932 |
+
{
|
39933 |
+
"epoch": 0.9593810444874274,
|
39934 |
+
"grad_norm": NaN,
|
39935 |
+
"learning_rate": 4.188875694556449e-07,
|
39936 |
+
"loss": 0.0,
|
39937 |
+
"step": 5704
|
39938 |
+
},
|
39939 |
+
{
|
39940 |
+
"epoch": 0.9595492389201917,
|
39941 |
+
"grad_norm": NaN,
|
39942 |
+
"learning_rate": 4.1542334795280355e-07,
|
39943 |
+
"loss": 0.0,
|
39944 |
+
"step": 5705
|
39945 |
+
},
|
39946 |
+
{
|
39947 |
+
"epoch": 0.959717433352956,
|
39948 |
+
"grad_norm": NaN,
|
39949 |
+
"learning_rate": 4.119734508787776e-07,
|
39950 |
+
"loss": 0.0,
|
39951 |
+
"step": 5706
|
39952 |
+
},
|
39953 |
+
{
|
39954 |
+
"epoch": 0.9598856277857203,
|
39955 |
+
"grad_norm": NaN,
|
39956 |
+
"learning_rate": 4.0853787923020303e-07,
|
39957 |
+
"loss": 0.0,
|
39958 |
+
"step": 5707
|
39959 |
+
},
|
39960 |
+
{
|
39961 |
+
"epoch": 0.9600538222184846,
|
39962 |
+
"grad_norm": NaN,
|
39963 |
+
"learning_rate": 4.0511663399958044e-07,
|
39964 |
+
"loss": 0.0,
|
39965 |
+
"step": 5708
|
39966 |
+
},
|
39967 |
+
{
|
39968 |
+
"epoch": 0.9602220166512488,
|
39969 |
+
"grad_norm": NaN,
|
39970 |
+
"learning_rate": 4.017097161752692e-07,
|
39971 |
+
"loss": 0.0,
|
39972 |
+
"step": 5709
|
39973 |
+
},
|
39974 |
+
{
|
39975 |
+
"epoch": 0.9603902110840131,
|
39976 |
+
"grad_norm": NaN,
|
39977 |
+
"learning_rate": 3.983171267414876e-07,
|
39978 |
+
"loss": 0.0,
|
39979 |
+
"step": 5710
|
39980 |
+
},
|
39981 |
+
{
|
39982 |
+
"epoch": 0.9605584055167774,
|
39983 |
+
"grad_norm": NaN,
|
39984 |
+
"learning_rate": 3.949388666783127e-07,
|
39985 |
+
"loss": 0.0,
|
39986 |
+
"step": 5711
|
39987 |
+
},
|
39988 |
+
{
|
39989 |
+
"epoch": 0.9607265999495417,
|
39990 |
+
"grad_norm": NaN,
|
39991 |
+
"learning_rate": 3.9157493696169724e-07,
|
39992 |
+
"loss": 0.0,
|
39993 |
+
"step": 5712
|
39994 |
+
},
|
39995 |
+
{
|
39996 |
+
"epoch": 0.960894794382306,
|
39997 |
+
"grad_norm": NaN,
|
39998 |
+
"learning_rate": 3.8822533856343044e-07,
|
39999 |
+
"loss": 0.0,
|
40000 |
+
"step": 5713
|
40001 |
+
},
|
40002 |
+
{
|
40003 |
+
"epoch": 0.9610629888150702,
|
40004 |
+
"grad_norm": NaN,
|
40005 |
+
"learning_rate": 3.848900724511828e-07,
|
40006 |
+
"loss": 0.0,
|
40007 |
+
"step": 5714
|
40008 |
+
},
|
40009 |
+
{
|
40010 |
+
"epoch": 0.9612311832478345,
|
40011 |
+
"grad_norm": NaN,
|
40012 |
+
"learning_rate": 3.815691395884724e-07,
|
40013 |
+
"loss": 0.0,
|
40014 |
+
"step": 5715
|
40015 |
+
},
|
40016 |
+
{
|
40017 |
+
"epoch": 0.9613993776805988,
|
40018 |
+
"grad_norm": NaN,
|
40019 |
+
"learning_rate": 3.782625409346763e-07,
|
40020 |
+
"loss": 0.0,
|
40021 |
+
"step": 5716
|
40022 |
+
},
|
40023 |
+
{
|
40024 |
+
"epoch": 0.9615675721133631,
|
40025 |
+
"grad_norm": NaN,
|
40026 |
+
"learning_rate": 3.749702774450414e-07,
|
40027 |
+
"loss": 0.0,
|
40028 |
+
"step": 5717
|
40029 |
+
},
|
40030 |
+
{
|
40031 |
+
"epoch": 0.9617357665461274,
|
40032 |
+
"grad_norm": NaN,
|
40033 |
+
"learning_rate": 3.7169235007065707e-07,
|
40034 |
+
"loss": 0.0,
|
40035 |
+
"step": 5718
|
40036 |
+
},
|
40037 |
+
{
|
40038 |
+
"epoch": 0.9619039609788916,
|
40039 |
+
"grad_norm": NaN,
|
40040 |
+
"learning_rate": 3.684287597584879e-07,
|
40041 |
+
"loss": 0.0,
|
40042 |
+
"step": 5719
|
40043 |
+
},
|
40044 |
+
{
|
40045 |
+
"epoch": 0.9620721554116559,
|
40046 |
+
"grad_norm": NaN,
|
40047 |
+
"learning_rate": 3.651795074513409e-07,
|
40048 |
+
"loss": 0.0,
|
40049 |
+
"step": 5720
|
40050 |
+
},
|
40051 |
+
{
|
40052 |
+
"epoch": 0.9622403498444202,
|
40053 |
+
"grad_norm": NaN,
|
40054 |
+
"learning_rate": 3.619445940878929e-07,
|
40055 |
+
"loss": 0.0,
|
40056 |
+
"step": 5721
|
40057 |
+
},
|
40058 |
+
{
|
40059 |
+
"epoch": 0.9624085442771845,
|
40060 |
+
"grad_norm": NaN,
|
40061 |
+
"learning_rate": 3.587240206026743e-07,
|
40062 |
+
"loss": 0.0,
|
40063 |
+
"step": 5722
|
40064 |
+
},
|
40065 |
+
{
|
40066 |
+
"epoch": 0.9625767387099488,
|
40067 |
+
"grad_norm": NaN,
|
40068 |
+
"learning_rate": 3.555177879260685e-07,
|
40069 |
+
"loss": 0.0,
|
40070 |
+
"step": 5723
|
40071 |
+
},
|
40072 |
+
{
|
40073 |
+
"epoch": 0.9627449331427129,
|
40074 |
+
"grad_norm": NaN,
|
40075 |
+
"learning_rate": 3.5232589698432907e-07,
|
40076 |
+
"loss": 0.0,
|
40077 |
+
"step": 5724
|
40078 |
+
},
|
40079 |
+
{
|
40080 |
+
"epoch": 0.9629131275754772,
|
40081 |
+
"grad_norm": NaN,
|
40082 |
+
"learning_rate": 3.491483486995517e-07,
|
40083 |
+
"loss": 0.0,
|
40084 |
+
"step": 5725
|
40085 |
+
},
|
40086 |
+
{
|
40087 |
+
"epoch": 0.9630813220082415,
|
40088 |
+
"grad_norm": NaN,
|
40089 |
+
"learning_rate": 3.459851439896966e-07,
|
40090 |
+
"loss": 0.0,
|
40091 |
+
"step": 5726
|
40092 |
+
},
|
40093 |
+
{
|
40094 |
+
"epoch": 0.9632495164410058,
|
40095 |
+
"grad_norm": NaN,
|
40096 |
+
"learning_rate": 3.428362837685717e-07,
|
40097 |
+
"loss": 0.0,
|
40098 |
+
"step": 5727
|
40099 |
+
},
|
40100 |
+
{
|
40101 |
+
"epoch": 0.96341771087377,
|
40102 |
+
"grad_norm": NaN,
|
40103 |
+
"learning_rate": 3.397017689458548e-07,
|
40104 |
+
"loss": 0.0,
|
40105 |
+
"step": 5728
|
40106 |
+
},
|
40107 |
+
{
|
40108 |
+
"epoch": 0.9635859053065343,
|
40109 |
+
"grad_norm": NaN,
|
40110 |
+
"learning_rate": 3.365816004270661e-07,
|
40111 |
+
"loss": 0.0,
|
40112 |
+
"step": 5729
|
40113 |
+
},
|
40114 |
+
{
|
40115 |
+
"epoch": 0.9637540997392986,
|
40116 |
+
"grad_norm": NaN,
|
40117 |
+
"learning_rate": 3.334757791135956e-07,
|
40118 |
+
"loss": 0.0,
|
40119 |
+
"step": 5730
|
40120 |
+
},
|
40121 |
+
{
|
40122 |
+
"epoch": 0.9639222941720629,
|
40123 |
+
"grad_norm": NaN,
|
40124 |
+
"learning_rate": 3.303843059026757e-07,
|
40125 |
+
"loss": 0.0,
|
40126 |
+
"step": 5731
|
40127 |
+
},
|
40128 |
+
{
|
40129 |
+
"epoch": 0.9640904886048272,
|
40130 |
+
"grad_norm": NaN,
|
40131 |
+
"learning_rate": 3.273071816873974e-07,
|
40132 |
+
"loss": 0.0,
|
40133 |
+
"step": 5732
|
40134 |
+
},
|
40135 |
+
{
|
40136 |
+
"epoch": 0.9642586830375914,
|
40137 |
+
"grad_norm": NaN,
|
40138 |
+
"learning_rate": 3.2424440735670526e-07,
|
40139 |
+
"loss": 0.0,
|
40140 |
+
"step": 5733
|
40141 |
+
},
|
40142 |
+
{
|
40143 |
+
"epoch": 0.9644268774703557,
|
40144 |
+
"grad_norm": NaN,
|
40145 |
+
"learning_rate": 3.211959837954026e-07,
|
40146 |
+
"loss": 0.0,
|
40147 |
+
"step": 5734
|
40148 |
+
},
|
40149 |
+
{
|
40150 |
+
"epoch": 0.96459507190312,
|
40151 |
+
"grad_norm": NaN,
|
40152 |
+
"learning_rate": 3.1816191188415166e-07,
|
40153 |
+
"loss": 0.0,
|
40154 |
+
"step": 5735
|
40155 |
+
},
|
40156 |
+
{
|
40157 |
+
"epoch": 0.9647632663358843,
|
40158 |
+
"grad_norm": NaN,
|
40159 |
+
"learning_rate": 3.151421924994513e-07,
|
40160 |
+
"loss": 0.0,
|
40161 |
+
"step": 5736
|
40162 |
}
|
40163 |
],
|
40164 |
"logging_steps": 1,
|
|
|
40178 |
"attributes": {}
|
40179 |
}
|
40180 |
},
|
40181 |
+
"total_flos": 1.0797842128699392e+17,
|
40182 |
"train_batch_size": 8,
|
40183 |
"trial_name": null,
|
40184 |
"trial_params": null
|