Training in progress, step 2868, checkpoint
Browse files
last-checkpoint/adapter_model.safetensors
CHANGED
@@ -1,3 +1,3 @@
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
-
oid sha256:
|
3 |
size 2145944
|
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:a96170c4566c1e7efa993538ecb2474b7056c80c01787e9d312ce31d75111369
|
3 |
size 2145944
|
last-checkpoint/optimizer.pt
CHANGED
@@ -1,3 +1,3 @@
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
-
oid sha256:
|
3 |
size 4310020
|
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:635f9699d60977f7f49fef0fac4d8dd2e9540f390cee9cca0ae8d57bdec47c46
|
3 |
size 4310020
|
last-checkpoint/rng_state.pth
CHANGED
@@ -1,3 +1,3 @@
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
-
oid sha256:
|
3 |
size 14244
|
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:a9966c454e255711c675f388ff89a77f36c50534180a271dc2d40e7f43b870bf
|
3 |
size 14244
|
last-checkpoint/scheduler.pt
CHANGED
@@ -1,3 +1,3 @@
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
-
oid sha256:
|
3 |
size 1064
|
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:237b866b1fce3ebf6f30679e5de802610141e1fcea25e04865362188175cac93
|
3 |
size 1064
|
last-checkpoint/trainer_state.json
CHANGED
@@ -1,9 +1,9 @@
|
|
1 |
{
|
2 |
"best_metric": null,
|
3 |
"best_model_checkpoint": null,
|
4 |
-
"epoch": 0.
|
5 |
"eval_steps": 500,
|
6 |
-
"global_step":
|
7 |
"is_hyper_param_search": false,
|
8 |
"is_local_process_zero": true,
|
9 |
"is_world_process_zero": true,
|
@@ -18410,6 +18410,1679 @@
|
|
18410 |
"learning_rate": 2.2784275177278934e-06,
|
18411 |
"loss": 9.3997,
|
18412 |
"step": 2629
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
18413 |
}
|
18414 |
],
|
18415 |
"logging_steps": 1,
|
@@ -18429,7 +20102,7 @@
|
|
18429 |
"attributes": {}
|
18430 |
}
|
18431 |
},
|
18432 |
-
"total_flos":
|
18433 |
"train_batch_size": 4,
|
18434 |
"trial_name": null,
|
18435 |
"trial_params": null
|
|
|
1 |
{
|
2 |
"best_metric": null,
|
3 |
"best_model_checkpoint": null,
|
4 |
+
"epoch": 0.9893066574680924,
|
5 |
"eval_steps": 500,
|
6 |
+
"global_step": 2868,
|
7 |
"is_hyper_param_search": false,
|
8 |
"is_local_process_zero": true,
|
9 |
"is_world_process_zero": true,
|
|
|
18410 |
"learning_rate": 2.2784275177278934e-06,
|
18411 |
"loss": 9.3997,
|
18412 |
"step": 2629
|
18413 |
+
},
|
18414 |
+
{
|
18415 |
+
"epoch": 0.9072093825457054,
|
18416 |
+
"grad_norm": 1.3021107912063599,
|
18417 |
+
"learning_rate": 2.2617097018509613e-06,
|
18418 |
+
"loss": 9.5147,
|
18419 |
+
"step": 2630
|
18420 |
+
},
|
18421 |
+
{
|
18422 |
+
"epoch": 0.9075543290789928,
|
18423 |
+
"grad_norm": 1.280267357826233,
|
18424 |
+
"learning_rate": 2.2450520256057038e-06,
|
18425 |
+
"loss": 9.5056,
|
18426 |
+
"step": 2631
|
18427 |
+
},
|
18428 |
+
{
|
18429 |
+
"epoch": 0.9078992756122801,
|
18430 |
+
"grad_norm": 1.3371871709823608,
|
18431 |
+
"learning_rate": 2.22845450997709e-06,
|
18432 |
+
"loss": 9.521,
|
18433 |
+
"step": 2632
|
18434 |
+
},
|
18435 |
+
{
|
18436 |
+
"epoch": 0.9082442221455674,
|
18437 |
+
"grad_norm": 1.3242119550704956,
|
18438 |
+
"learning_rate": 2.2119171758743117e-06,
|
18439 |
+
"loss": 9.4992,
|
18440 |
+
"step": 2633
|
18441 |
+
},
|
18442 |
+
{
|
18443 |
+
"epoch": 0.9085891686788548,
|
18444 |
+
"grad_norm": 1.357699990272522,
|
18445 |
+
"learning_rate": 2.19544004413072e-06,
|
18446 |
+
"loss": 9.4417,
|
18447 |
+
"step": 2634
|
18448 |
+
},
|
18449 |
+
{
|
18450 |
+
"epoch": 0.9089341152121421,
|
18451 |
+
"grad_norm": 1.3349027633666992,
|
18452 |
+
"learning_rate": 2.1790231355038495e-06,
|
18453 |
+
"loss": 9.462,
|
18454 |
+
"step": 2635
|
18455 |
+
},
|
18456 |
+
{
|
18457 |
+
"epoch": 0.9092790617454295,
|
18458 |
+
"grad_norm": 1.4189931154251099,
|
18459 |
+
"learning_rate": 2.162666470675334e-06,
|
18460 |
+
"loss": 9.4083,
|
18461 |
+
"step": 2636
|
18462 |
+
},
|
18463 |
+
{
|
18464 |
+
"epoch": 0.9096240082787168,
|
18465 |
+
"grad_norm": 1.5067139863967896,
|
18466 |
+
"learning_rate": 2.146370070250958e-06,
|
18467 |
+
"loss": 9.4424,
|
18468 |
+
"step": 2637
|
18469 |
+
},
|
18470 |
+
{
|
18471 |
+
"epoch": 0.9099689548120041,
|
18472 |
+
"grad_norm": 1.5032390356063843,
|
18473 |
+
"learning_rate": 2.130133954760538e-06,
|
18474 |
+
"loss": 9.4019,
|
18475 |
+
"step": 2638
|
18476 |
+
},
|
18477 |
+
{
|
18478 |
+
"epoch": 0.9103139013452914,
|
18479 |
+
"grad_norm": 1.3871209621429443,
|
18480 |
+
"learning_rate": 2.1139581446580017e-06,
|
18481 |
+
"loss": 9.5032,
|
18482 |
+
"step": 2639
|
18483 |
+
},
|
18484 |
+
{
|
18485 |
+
"epoch": 0.9106588478785789,
|
18486 |
+
"grad_norm": 1.5128953456878662,
|
18487 |
+
"learning_rate": 2.097842660321242e-06,
|
18488 |
+
"loss": 9.3989,
|
18489 |
+
"step": 2640
|
18490 |
+
},
|
18491 |
+
{
|
18492 |
+
"epoch": 0.9110037944118662,
|
18493 |
+
"grad_norm": 1.3251484632492065,
|
18494 |
+
"learning_rate": 2.081787522052203e-06,
|
18495 |
+
"loss": 9.5056,
|
18496 |
+
"step": 2641
|
18497 |
+
},
|
18498 |
+
{
|
18499 |
+
"epoch": 0.9113487409451535,
|
18500 |
+
"grad_norm": 1.3176518678665161,
|
18501 |
+
"learning_rate": 2.0657927500767894e-06,
|
18502 |
+
"loss": 9.4636,
|
18503 |
+
"step": 2642
|
18504 |
+
},
|
18505 |
+
{
|
18506 |
+
"epoch": 0.9116936874784408,
|
18507 |
+
"grad_norm": 1.5699663162231445,
|
18508 |
+
"learning_rate": 2.0498583645448487e-06,
|
18509 |
+
"loss": 9.4639,
|
18510 |
+
"step": 2643
|
18511 |
+
},
|
18512 |
+
{
|
18513 |
+
"epoch": 0.9120386340117281,
|
18514 |
+
"grad_norm": 1.4477311372756958,
|
18515 |
+
"learning_rate": 2.0339843855301744e-06,
|
18516 |
+
"loss": 9.4008,
|
18517 |
+
"step": 2644
|
18518 |
+
},
|
18519 |
+
{
|
18520 |
+
"epoch": 0.9123835805450156,
|
18521 |
+
"grad_norm": 1.6083184480667114,
|
18522 |
+
"learning_rate": 2.018170833030436e-06,
|
18523 |
+
"loss": 9.4396,
|
18524 |
+
"step": 2645
|
18525 |
+
},
|
18526 |
+
{
|
18527 |
+
"epoch": 0.9127285270783029,
|
18528 |
+
"grad_norm": 1.6080248355865479,
|
18529 |
+
"learning_rate": 2.0024177269672094e-06,
|
18530 |
+
"loss": 9.4432,
|
18531 |
+
"step": 2646
|
18532 |
+
},
|
18533 |
+
{
|
18534 |
+
"epoch": 0.9130734736115902,
|
18535 |
+
"grad_norm": 1.4727885723114014,
|
18536 |
+
"learning_rate": 1.986725087185898e-06,
|
18537 |
+
"loss": 9.4662,
|
18538 |
+
"step": 2647
|
18539 |
+
},
|
18540 |
+
{
|
18541 |
+
"epoch": 0.9134184201448775,
|
18542 |
+
"grad_norm": 1.6938202381134033,
|
18543 |
+
"learning_rate": 1.9710929334557484e-06,
|
18544 |
+
"loss": 9.332,
|
18545 |
+
"step": 2648
|
18546 |
+
},
|
18547 |
+
{
|
18548 |
+
"epoch": 0.9137633666781649,
|
18549 |
+
"grad_norm": 1.6540645360946655,
|
18550 |
+
"learning_rate": 1.9555212854697803e-06,
|
18551 |
+
"loss": 9.3841,
|
18552 |
+
"step": 2649
|
18553 |
+
},
|
18554 |
+
{
|
18555 |
+
"epoch": 0.9141083132114523,
|
18556 |
+
"grad_norm": 1.7224019765853882,
|
18557 |
+
"learning_rate": 1.940010162844824e-06,
|
18558 |
+
"loss": 9.3694,
|
18559 |
+
"step": 2650
|
18560 |
+
},
|
18561 |
+
{
|
18562 |
+
"epoch": 0.9144532597447396,
|
18563 |
+
"grad_norm": 0.7851664423942566,
|
18564 |
+
"learning_rate": 1.9245595851214328e-06,
|
18565 |
+
"loss": 9.6529,
|
18566 |
+
"step": 2651
|
18567 |
+
},
|
18568 |
+
{
|
18569 |
+
"epoch": 0.9147982062780269,
|
18570 |
+
"grad_norm": 0.9760227203369141,
|
18571 |
+
"learning_rate": 1.909169571763908e-06,
|
18572 |
+
"loss": 9.5619,
|
18573 |
+
"step": 2652
|
18574 |
+
},
|
18575 |
+
{
|
18576 |
+
"epoch": 0.9151431528113142,
|
18577 |
+
"grad_norm": 1.0906972885131836,
|
18578 |
+
"learning_rate": 1.8938401421602359e-06,
|
18579 |
+
"loss": 9.4804,
|
18580 |
+
"step": 2653
|
18581 |
+
},
|
18582 |
+
{
|
18583 |
+
"epoch": 0.9154880993446016,
|
18584 |
+
"grad_norm": 1.1999458074569702,
|
18585 |
+
"learning_rate": 1.8785713156221018e-06,
|
18586 |
+
"loss": 9.527,
|
18587 |
+
"step": 2654
|
18588 |
+
},
|
18589 |
+
{
|
18590 |
+
"epoch": 0.9158330458778889,
|
18591 |
+
"grad_norm": 1.1160448789596558,
|
18592 |
+
"learning_rate": 1.863363111384836e-06,
|
18593 |
+
"loss": 9.5069,
|
18594 |
+
"step": 2655
|
18595 |
+
},
|
18596 |
+
{
|
18597 |
+
"epoch": 0.9161779924111763,
|
18598 |
+
"grad_norm": 1.0647850036621094,
|
18599 |
+
"learning_rate": 1.8482155486073739e-06,
|
18600 |
+
"loss": 9.5693,
|
18601 |
+
"step": 2656
|
18602 |
+
},
|
18603 |
+
{
|
18604 |
+
"epoch": 0.9165229389444636,
|
18605 |
+
"grad_norm": 1.2221078872680664,
|
18606 |
+
"learning_rate": 1.8331286463722951e-06,
|
18607 |
+
"loss": 9.4765,
|
18608 |
+
"step": 2657
|
18609 |
+
},
|
18610 |
+
{
|
18611 |
+
"epoch": 0.916867885477751,
|
18612 |
+
"grad_norm": 1.1700892448425293,
|
18613 |
+
"learning_rate": 1.8181024236857246e-06,
|
18614 |
+
"loss": 9.5458,
|
18615 |
+
"step": 2658
|
18616 |
+
},
|
18617 |
+
{
|
18618 |
+
"epoch": 0.9172128320110383,
|
18619 |
+
"grad_norm": 1.2416577339172363,
|
18620 |
+
"learning_rate": 1.8031368994773756e-06,
|
18621 |
+
"loss": 9.5379,
|
18622 |
+
"step": 2659
|
18623 |
+
},
|
18624 |
+
{
|
18625 |
+
"epoch": 0.9175577785443256,
|
18626 |
+
"grad_norm": 1.1829198598861694,
|
18627 |
+
"learning_rate": 1.788232092600478e-06,
|
18628 |
+
"loss": 9.5421,
|
18629 |
+
"step": 2660
|
18630 |
+
},
|
18631 |
+
{
|
18632 |
+
"epoch": 0.917902725077613,
|
18633 |
+
"grad_norm": 1.1688567399978638,
|
18634 |
+
"learning_rate": 1.7733880218317788e-06,
|
18635 |
+
"loss": 9.4909,
|
18636 |
+
"step": 2661
|
18637 |
+
},
|
18638 |
+
{
|
18639 |
+
"epoch": 0.9182476716109003,
|
18640 |
+
"grad_norm": 1.1145144701004028,
|
18641 |
+
"learning_rate": 1.7586047058714972e-06,
|
18642 |
+
"loss": 9.5701,
|
18643 |
+
"step": 2662
|
18644 |
+
},
|
18645 |
+
{
|
18646 |
+
"epoch": 0.9185926181441877,
|
18647 |
+
"grad_norm": 1.1775919198989868,
|
18648 |
+
"learning_rate": 1.74388216334333e-06,
|
18649 |
+
"loss": 9.5376,
|
18650 |
+
"step": 2663
|
18651 |
+
},
|
18652 |
+
{
|
18653 |
+
"epoch": 0.918937564677475,
|
18654 |
+
"grad_norm": 1.2331610918045044,
|
18655 |
+
"learning_rate": 1.7292204127944134e-06,
|
18656 |
+
"loss": 9.4995,
|
18657 |
+
"step": 2664
|
18658 |
+
},
|
18659 |
+
{
|
18660 |
+
"epoch": 0.9192825112107623,
|
18661 |
+
"grad_norm": 1.2776384353637695,
|
18662 |
+
"learning_rate": 1.714619472695278e-06,
|
18663 |
+
"loss": 9.5589,
|
18664 |
+
"step": 2665
|
18665 |
+
},
|
18666 |
+
{
|
18667 |
+
"epoch": 0.9196274577440496,
|
18668 |
+
"grad_norm": 1.1205222606658936,
|
18669 |
+
"learning_rate": 1.7000793614398714e-06,
|
18670 |
+
"loss": 9.586,
|
18671 |
+
"step": 2666
|
18672 |
+
},
|
18673 |
+
{
|
18674 |
+
"epoch": 0.9199724042773371,
|
18675 |
+
"grad_norm": 1.172733187675476,
|
18676 |
+
"learning_rate": 1.6856000973455022e-06,
|
18677 |
+
"loss": 9.5626,
|
18678 |
+
"step": 2667
|
18679 |
+
},
|
18680 |
+
{
|
18681 |
+
"epoch": 0.9203173508106244,
|
18682 |
+
"grad_norm": 1.1725226640701294,
|
18683 |
+
"learning_rate": 1.6711816986528238e-06,
|
18684 |
+
"loss": 9.5409,
|
18685 |
+
"step": 2668
|
18686 |
+
},
|
18687 |
+
{
|
18688 |
+
"epoch": 0.9206622973439117,
|
18689 |
+
"grad_norm": 1.3246572017669678,
|
18690 |
+
"learning_rate": 1.6568241835258068e-06,
|
18691 |
+
"loss": 9.5028,
|
18692 |
+
"step": 2669
|
18693 |
+
},
|
18694 |
+
{
|
18695 |
+
"epoch": 0.921007243877199,
|
18696 |
+
"grad_norm": 1.4405431747436523,
|
18697 |
+
"learning_rate": 1.6425275700517385e-06,
|
18698 |
+
"loss": 9.4395,
|
18699 |
+
"step": 2670
|
18700 |
+
},
|
18701 |
+
{
|
18702 |
+
"epoch": 0.9213521904104863,
|
18703 |
+
"grad_norm": 1.2822954654693604,
|
18704 |
+
"learning_rate": 1.6282918762411614e-06,
|
18705 |
+
"loss": 9.5234,
|
18706 |
+
"step": 2671
|
18707 |
+
},
|
18708 |
+
{
|
18709 |
+
"epoch": 0.9216971369437738,
|
18710 |
+
"grad_norm": 1.3265615701675415,
|
18711 |
+
"learning_rate": 1.614117120027886e-06,
|
18712 |
+
"loss": 9.4913,
|
18713 |
+
"step": 2672
|
18714 |
+
},
|
18715 |
+
{
|
18716 |
+
"epoch": 0.9220420834770611,
|
18717 |
+
"grad_norm": 1.2790005207061768,
|
18718 |
+
"learning_rate": 1.6000033192689611e-06,
|
18719 |
+
"loss": 9.4907,
|
18720 |
+
"step": 2673
|
18721 |
+
},
|
18722 |
+
{
|
18723 |
+
"epoch": 0.9223870300103484,
|
18724 |
+
"grad_norm": 1.3700817823410034,
|
18725 |
+
"learning_rate": 1.5859504917446366e-06,
|
18726 |
+
"loss": 9.48,
|
18727 |
+
"step": 2674
|
18728 |
+
},
|
18729 |
+
{
|
18730 |
+
"epoch": 0.9227319765436357,
|
18731 |
+
"grad_norm": 1.3636354207992554,
|
18732 |
+
"learning_rate": 1.5719586551583454e-06,
|
18733 |
+
"loss": 9.4867,
|
18734 |
+
"step": 2675
|
18735 |
+
},
|
18736 |
+
{
|
18737 |
+
"epoch": 0.9230769230769231,
|
18738 |
+
"grad_norm": 1.2950481176376343,
|
18739 |
+
"learning_rate": 1.5580278271366878e-06,
|
18740 |
+
"loss": 9.4936,
|
18741 |
+
"step": 2676
|
18742 |
+
},
|
18743 |
+
{
|
18744 |
+
"epoch": 0.9234218696102104,
|
18745 |
+
"grad_norm": 1.3804364204406738,
|
18746 |
+
"learning_rate": 1.5441580252294253e-06,
|
18747 |
+
"loss": 9.4613,
|
18748 |
+
"step": 2677
|
18749 |
+
},
|
18750 |
+
{
|
18751 |
+
"epoch": 0.9237668161434978,
|
18752 |
+
"grad_norm": 1.369942307472229,
|
18753 |
+
"learning_rate": 1.5303492669094089e-06,
|
18754 |
+
"loss": 9.5052,
|
18755 |
+
"step": 2678
|
18756 |
+
},
|
18757 |
+
{
|
18758 |
+
"epoch": 0.9241117626767851,
|
18759 |
+
"grad_norm": 1.2750253677368164,
|
18760 |
+
"learning_rate": 1.5166015695726122e-06,
|
18761 |
+
"loss": 9.4562,
|
18762 |
+
"step": 2679
|
18763 |
+
},
|
18764 |
+
{
|
18765 |
+
"epoch": 0.9244567092100724,
|
18766 |
+
"grad_norm": 1.348370909690857,
|
18767 |
+
"learning_rate": 1.5029149505380646e-06,
|
18768 |
+
"loss": 9.4602,
|
18769 |
+
"step": 2680
|
18770 |
+
},
|
18771 |
+
{
|
18772 |
+
"epoch": 0.9248016557433598,
|
18773 |
+
"grad_norm": 1.2539187669754028,
|
18774 |
+
"learning_rate": 1.4892894270478853e-06,
|
18775 |
+
"loss": 9.525,
|
18776 |
+
"step": 2681
|
18777 |
+
},
|
18778 |
+
{
|
18779 |
+
"epoch": 0.9251466022766471,
|
18780 |
+
"grad_norm": 1.474637508392334,
|
18781 |
+
"learning_rate": 1.4757250162671822e-06,
|
18782 |
+
"loss": 9.449,
|
18783 |
+
"step": 2682
|
18784 |
+
},
|
18785 |
+
{
|
18786 |
+
"epoch": 0.9254915488099345,
|
18787 |
+
"grad_norm": 1.484521508216858,
|
18788 |
+
"learning_rate": 1.4622217352841138e-06,
|
18789 |
+
"loss": 9.3875,
|
18790 |
+
"step": 2683
|
18791 |
+
},
|
18792 |
+
{
|
18793 |
+
"epoch": 0.9258364953432218,
|
18794 |
+
"grad_norm": 1.2464600801467896,
|
18795 |
+
"learning_rate": 1.448779601109801e-06,
|
18796 |
+
"loss": 9.4846,
|
18797 |
+
"step": 2684
|
18798 |
+
},
|
18799 |
+
{
|
18800 |
+
"epoch": 0.9261814418765092,
|
18801 |
+
"grad_norm": 1.404441237449646,
|
18802 |
+
"learning_rate": 1.4353986306783418e-06,
|
18803 |
+
"loss": 9.516,
|
18804 |
+
"step": 2685
|
18805 |
+
},
|
18806 |
+
{
|
18807 |
+
"epoch": 0.9265263884097965,
|
18808 |
+
"grad_norm": 1.3945449590682983,
|
18809 |
+
"learning_rate": 1.4220788408468021e-06,
|
18810 |
+
"loss": 9.4727,
|
18811 |
+
"step": 2686
|
18812 |
+
},
|
18813 |
+
{
|
18814 |
+
"epoch": 0.9268713349430838,
|
18815 |
+
"grad_norm": 1.3472886085510254,
|
18816 |
+
"learning_rate": 1.4088202483951374e-06,
|
18817 |
+
"loss": 9.4367,
|
18818 |
+
"step": 2687
|
18819 |
+
},
|
18820 |
+
{
|
18821 |
+
"epoch": 0.9272162814763711,
|
18822 |
+
"grad_norm": 1.377700686454773,
|
18823 |
+
"learning_rate": 1.3956228700262252e-06,
|
18824 |
+
"loss": 9.5139,
|
18825 |
+
"step": 2688
|
18826 |
+
},
|
18827 |
+
{
|
18828 |
+
"epoch": 0.9275612280096585,
|
18829 |
+
"grad_norm": 1.389609456062317,
|
18830 |
+
"learning_rate": 1.3824867223658388e-06,
|
18831 |
+
"loss": 9.4322,
|
18832 |
+
"step": 2689
|
18833 |
+
},
|
18834 |
+
{
|
18835 |
+
"epoch": 0.9279061745429459,
|
18836 |
+
"grad_norm": 1.4232558012008667,
|
18837 |
+
"learning_rate": 1.3694118219626074e-06,
|
18838 |
+
"loss": 9.5109,
|
18839 |
+
"step": 2690
|
18840 |
+
},
|
18841 |
+
{
|
18842 |
+
"epoch": 0.9282511210762332,
|
18843 |
+
"grad_norm": 1.3743571043014526,
|
18844 |
+
"learning_rate": 1.3563981852879827e-06,
|
18845 |
+
"loss": 9.4921,
|
18846 |
+
"step": 2691
|
18847 |
+
},
|
18848 |
+
{
|
18849 |
+
"epoch": 0.9285960676095205,
|
18850 |
+
"grad_norm": 1.4875822067260742,
|
18851 |
+
"learning_rate": 1.3434458287362672e-06,
|
18852 |
+
"loss": 9.463,
|
18853 |
+
"step": 2692
|
18854 |
+
},
|
18855 |
+
{
|
18856 |
+
"epoch": 0.9289410141428078,
|
18857 |
+
"grad_norm": 1.3445839881896973,
|
18858 |
+
"learning_rate": 1.3305547686245422e-06,
|
18859 |
+
"loss": 9.4212,
|
18860 |
+
"step": 2693
|
18861 |
+
},
|
18862 |
+
{
|
18863 |
+
"epoch": 0.9292859606760953,
|
18864 |
+
"grad_norm": 1.6044926643371582,
|
18865 |
+
"learning_rate": 1.3177250211926728e-06,
|
18866 |
+
"loss": 9.4379,
|
18867 |
+
"step": 2694
|
18868 |
+
},
|
18869 |
+
{
|
18870 |
+
"epoch": 0.9296309072093826,
|
18871 |
+
"grad_norm": 1.5688241720199585,
|
18872 |
+
"learning_rate": 1.3049566026033023e-06,
|
18873 |
+
"loss": 9.4778,
|
18874 |
+
"step": 2695
|
18875 |
+
},
|
18876 |
+
{
|
18877 |
+
"epoch": 0.9299758537426699,
|
18878 |
+
"grad_norm": 1.6058199405670166,
|
18879 |
+
"learning_rate": 1.2922495289417913e-06,
|
18880 |
+
"loss": 9.421,
|
18881 |
+
"step": 2696
|
18882 |
+
},
|
18883 |
+
{
|
18884 |
+
"epoch": 0.9303208002759572,
|
18885 |
+
"grad_norm": 1.693510890007019,
|
18886 |
+
"learning_rate": 1.2796038162162239e-06,
|
18887 |
+
"loss": 9.354,
|
18888 |
+
"step": 2697
|
18889 |
+
},
|
18890 |
+
{
|
18891 |
+
"epoch": 0.9306657468092445,
|
18892 |
+
"grad_norm": 1.5772984027862549,
|
18893 |
+
"learning_rate": 1.2670194803573954e-06,
|
18894 |
+
"loss": 9.3735,
|
18895 |
+
"step": 2698
|
18896 |
+
},
|
18897 |
+
{
|
18898 |
+
"epoch": 0.931010693342532,
|
18899 |
+
"grad_norm": 1.614292860031128,
|
18900 |
+
"learning_rate": 1.2544965372187635e-06,
|
18901 |
+
"loss": 9.3702,
|
18902 |
+
"step": 2699
|
18903 |
+
},
|
18904 |
+
{
|
18905 |
+
"epoch": 0.9313556398758193,
|
18906 |
+
"grad_norm": 1.9061168432235718,
|
18907 |
+
"learning_rate": 1.2420350025764528e-06,
|
18908 |
+
"loss": 9.2773,
|
18909 |
+
"step": 2700
|
18910 |
+
},
|
18911 |
+
{
|
18912 |
+
"epoch": 0.9317005864091066,
|
18913 |
+
"grad_norm": 0.8620436787605286,
|
18914 |
+
"learning_rate": 1.2296348921292333e-06,
|
18915 |
+
"loss": 9.6332,
|
18916 |
+
"step": 2701
|
18917 |
+
},
|
18918 |
+
{
|
18919 |
+
"epoch": 0.9320455329423939,
|
18920 |
+
"grad_norm": 0.8850879073143005,
|
18921 |
+
"learning_rate": 1.2172962214984763e-06,
|
18922 |
+
"loss": 9.6099,
|
18923 |
+
"step": 2702
|
18924 |
+
},
|
18925 |
+
{
|
18926 |
+
"epoch": 0.9323904794756813,
|
18927 |
+
"grad_norm": 1.0009554624557495,
|
18928 |
+
"learning_rate": 1.2050190062281752e-06,
|
18929 |
+
"loss": 9.5471,
|
18930 |
+
"step": 2703
|
18931 |
+
},
|
18932 |
+
{
|
18933 |
+
"epoch": 0.9327354260089686,
|
18934 |
+
"grad_norm": 1.0762373208999634,
|
18935 |
+
"learning_rate": 1.1928032617848805e-06,
|
18936 |
+
"loss": 9.5482,
|
18937 |
+
"step": 2704
|
18938 |
+
},
|
18939 |
+
{
|
18940 |
+
"epoch": 0.933080372542256,
|
18941 |
+
"grad_norm": 1.0936650037765503,
|
18942 |
+
"learning_rate": 1.1806490035577267e-06,
|
18943 |
+
"loss": 9.5399,
|
18944 |
+
"step": 2705
|
18945 |
+
},
|
18946 |
+
{
|
18947 |
+
"epoch": 0.9334253190755433,
|
18948 |
+
"grad_norm": 1.043621301651001,
|
18949 |
+
"learning_rate": 1.16855624685836e-06,
|
18950 |
+
"loss": 9.5344,
|
18951 |
+
"step": 2706
|
18952 |
+
},
|
18953 |
+
{
|
18954 |
+
"epoch": 0.9337702656088306,
|
18955 |
+
"grad_norm": 1.2308648824691772,
|
18956 |
+
"learning_rate": 1.1565250069209776e-06,
|
18957 |
+
"loss": 9.5064,
|
18958 |
+
"step": 2707
|
18959 |
+
},
|
18960 |
+
{
|
18961 |
+
"epoch": 0.934115212142118,
|
18962 |
+
"grad_norm": 1.162726640701294,
|
18963 |
+
"learning_rate": 1.1445552989022668e-06,
|
18964 |
+
"loss": 9.5275,
|
18965 |
+
"step": 2708
|
18966 |
+
},
|
18967 |
+
{
|
18968 |
+
"epoch": 0.9344601586754053,
|
18969 |
+
"grad_norm": 1.1094176769256592,
|
18970 |
+
"learning_rate": 1.132647137881393e-06,
|
18971 |
+
"loss": 9.5538,
|
18972 |
+
"step": 2709
|
18973 |
+
},
|
18974 |
+
{
|
18975 |
+
"epoch": 0.9348051052086926,
|
18976 |
+
"grad_norm": 1.255376935005188,
|
18977 |
+
"learning_rate": 1.120800538859995e-06,
|
18978 |
+
"loss": 9.463,
|
18979 |
+
"step": 2710
|
18980 |
+
},
|
18981 |
+
{
|
18982 |
+
"epoch": 0.93515005174198,
|
18983 |
+
"grad_norm": 1.1070146560668945,
|
18984 |
+
"learning_rate": 1.1090155167621518e-06,
|
18985 |
+
"loss": 9.5253,
|
18986 |
+
"step": 2711
|
18987 |
+
},
|
18988 |
+
{
|
18989 |
+
"epoch": 0.9354949982752674,
|
18990 |
+
"grad_norm": 1.3558119535446167,
|
18991 |
+
"learning_rate": 1.0972920864343705e-06,
|
18992 |
+
"loss": 9.4959,
|
18993 |
+
"step": 2712
|
18994 |
+
},
|
18995 |
+
{
|
18996 |
+
"epoch": 0.9358399448085547,
|
18997 |
+
"grad_norm": 1.2252329587936401,
|
18998 |
+
"learning_rate": 1.085630262645565e-06,
|
18999 |
+
"loss": 9.5362,
|
19000 |
+
"step": 2713
|
19001 |
+
},
|
19002 |
+
{
|
19003 |
+
"epoch": 0.936184891341842,
|
19004 |
+
"grad_norm": 1.1986970901489258,
|
19005 |
+
"learning_rate": 1.07403006008705e-06,
|
19006 |
+
"loss": 9.4938,
|
19007 |
+
"step": 2714
|
19008 |
+
},
|
19009 |
+
{
|
19010 |
+
"epoch": 0.9365298378751293,
|
19011 |
+
"grad_norm": 1.1853275299072266,
|
19012 |
+
"learning_rate": 1.062491493372486e-06,
|
19013 |
+
"loss": 9.5321,
|
19014 |
+
"step": 2715
|
19015 |
+
},
|
19016 |
+
{
|
19017 |
+
"epoch": 0.9368747844084166,
|
19018 |
+
"grad_norm": 1.169337511062622,
|
19019 |
+
"learning_rate": 1.0510145770379177e-06,
|
19020 |
+
"loss": 9.5066,
|
19021 |
+
"step": 2716
|
19022 |
+
},
|
19023 |
+
{
|
19024 |
+
"epoch": 0.9372197309417041,
|
19025 |
+
"grad_norm": 1.2148215770721436,
|
19026 |
+
"learning_rate": 1.0395993255416957e-06,
|
19027 |
+
"loss": 9.5333,
|
19028 |
+
"step": 2717
|
19029 |
+
},
|
19030 |
+
{
|
19031 |
+
"epoch": 0.9375646774749914,
|
19032 |
+
"grad_norm": 1.3632956743240356,
|
19033 |
+
"learning_rate": 1.0282457532645119e-06,
|
19034 |
+
"loss": 9.4924,
|
19035 |
+
"step": 2718
|
19036 |
+
},
|
19037 |
+
{
|
19038 |
+
"epoch": 0.9379096240082787,
|
19039 |
+
"grad_norm": 1.1406636238098145,
|
19040 |
+
"learning_rate": 1.0169538745093242e-06,
|
19041 |
+
"loss": 9.5274,
|
19042 |
+
"step": 2719
|
19043 |
+
},
|
19044 |
+
{
|
19045 |
+
"epoch": 0.938254570541566,
|
19046 |
+
"grad_norm": 1.1914732456207275,
|
19047 |
+
"learning_rate": 1.0057237035014044e-06,
|
19048 |
+
"loss": 9.4695,
|
19049 |
+
"step": 2720
|
19050 |
+
},
|
19051 |
+
{
|
19052 |
+
"epoch": 0.9385995170748535,
|
19053 |
+
"grad_norm": 1.4516353607177734,
|
19054 |
+
"learning_rate": 9.945552543882685e-07,
|
19055 |
+
"loss": 9.4253,
|
19056 |
+
"step": 2721
|
19057 |
+
},
|
19058 |
+
{
|
19059 |
+
"epoch": 0.9389444636081408,
|
19060 |
+
"grad_norm": 1.3653576374053955,
|
19061 |
+
"learning_rate": 9.834485412396677e-07,
|
19062 |
+
"loss": 9.4781,
|
19063 |
+
"step": 2722
|
19064 |
+
},
|
19065 |
+
{
|
19066 |
+
"epoch": 0.9392894101414281,
|
19067 |
+
"grad_norm": 1.1982650756835938,
|
19068 |
+
"learning_rate": 9.724035780476092e-07,
|
19069 |
+
"loss": 9.599,
|
19070 |
+
"step": 2723
|
19071 |
+
},
|
19072 |
+
{
|
19073 |
+
"epoch": 0.9396343566747154,
|
19074 |
+
"grad_norm": 1.244680643081665,
|
19075 |
+
"learning_rate": 9.6142037872628e-07,
|
19076 |
+
"loss": 9.5415,
|
19077 |
+
"step": 2724
|
19078 |
+
},
|
19079 |
+
{
|
19080 |
+
"epoch": 0.9399793032080027,
|
19081 |
+
"grad_norm": 1.288313388824463,
|
19082 |
+
"learning_rate": 9.504989571120726e-07,
|
19083 |
+
"loss": 9.5385,
|
19084 |
+
"step": 2725
|
19085 |
+
},
|
19086 |
+
{
|
19087 |
+
"epoch": 0.9403242497412901,
|
19088 |
+
"grad_norm": 1.185012698173523,
|
19089 |
+
"learning_rate": 9.396393269635484e-07,
|
19090 |
+
"loss": 9.5646,
|
19091 |
+
"step": 2726
|
19092 |
+
},
|
19093 |
+
{
|
19094 |
+
"epoch": 0.9406691962745775,
|
19095 |
+
"grad_norm": 1.2785496711730957,
|
19096 |
+
"learning_rate": 9.28841501961425e-07,
|
19097 |
+
"loss": 9.496,
|
19098 |
+
"step": 2727
|
19099 |
+
},
|
19100 |
+
{
|
19101 |
+
"epoch": 0.9410141428078648,
|
19102 |
+
"grad_norm": 1.2989834547042847,
|
19103 |
+
"learning_rate": 9.1810549570856e-07,
|
19104 |
+
"loss": 9.5301,
|
19105 |
+
"step": 2728
|
19106 |
+
},
|
19107 |
+
{
|
19108 |
+
"epoch": 0.9413590893411521,
|
19109 |
+
"grad_norm": 1.2136149406433105,
|
19110 |
+
"learning_rate": 9.074313217299457e-07,
|
19111 |
+
"loss": 9.5197,
|
19112 |
+
"step": 2729
|
19113 |
+
},
|
19114 |
+
{
|
19115 |
+
"epoch": 0.9417040358744395,
|
19116 |
+
"grad_norm": 1.4056475162506104,
|
19117 |
+
"learning_rate": 8.968189934726534e-07,
|
19118 |
+
"loss": 9.5294,
|
19119 |
+
"step": 2730
|
19120 |
+
},
|
19121 |
+
{
|
19122 |
+
"epoch": 0.9420489824077268,
|
19123 |
+
"grad_norm": 1.2778812646865845,
|
19124 |
+
"learning_rate": 8.862685243058666e-07,
|
19125 |
+
"loss": 9.455,
|
19126 |
+
"step": 2731
|
19127 |
+
},
|
19128 |
+
{
|
19129 |
+
"epoch": 0.9423939289410141,
|
19130 |
+
"grad_norm": 1.3463318347930908,
|
19131 |
+
"learning_rate": 8.757799275208311e-07,
|
19132 |
+
"loss": 9.5346,
|
19133 |
+
"step": 2732
|
19134 |
+
},
|
19135 |
+
{
|
19136 |
+
"epoch": 0.9427388754743015,
|
19137 |
+
"grad_norm": 1.2876229286193848,
|
19138 |
+
"learning_rate": 8.653532163308387e-07,
|
19139 |
+
"loss": 9.5036,
|
19140 |
+
"step": 2733
|
19141 |
+
},
|
19142 |
+
{
|
19143 |
+
"epoch": 0.9430838220075888,
|
19144 |
+
"grad_norm": 1.3723164796829224,
|
19145 |
+
"learning_rate": 8.549884038712375e-07,
|
19146 |
+
"loss": 9.5081,
|
19147 |
+
"step": 2734
|
19148 |
+
},
|
19149 |
+
{
|
19150 |
+
"epoch": 0.9434287685408762,
|
19151 |
+
"grad_norm": 1.4172887802124023,
|
19152 |
+
"learning_rate": 8.446855031993717e-07,
|
19153 |
+
"loss": 9.4852,
|
19154 |
+
"step": 2735
|
19155 |
+
},
|
19156 |
+
{
|
19157 |
+
"epoch": 0.9437737150741635,
|
19158 |
+
"grad_norm": 1.4235061407089233,
|
19159 |
+
"learning_rate": 8.344445272946199e-07,
|
19160 |
+
"loss": 9.486,
|
19161 |
+
"step": 2736
|
19162 |
+
},
|
19163 |
+
{
|
19164 |
+
"epoch": 0.9441186616074508,
|
19165 |
+
"grad_norm": 1.449661374092102,
|
19166 |
+
"learning_rate": 8.24265489058329e-07,
|
19167 |
+
"loss": 9.4349,
|
19168 |
+
"step": 2737
|
19169 |
+
},
|
19170 |
+
{
|
19171 |
+
"epoch": 0.9444636081407382,
|
19172 |
+
"grad_norm": 1.4169197082519531,
|
19173 |
+
"learning_rate": 8.1414840131383e-07,
|
19174 |
+
"loss": 9.4753,
|
19175 |
+
"step": 2738
|
19176 |
+
},
|
19177 |
+
{
|
19178 |
+
"epoch": 0.9448085546740256,
|
19179 |
+
"grad_norm": 1.4273600578308105,
|
19180 |
+
"learning_rate": 8.040932768063947e-07,
|
19181 |
+
"loss": 9.4446,
|
19182 |
+
"step": 2739
|
19183 |
+
},
|
19184 |
+
{
|
19185 |
+
"epoch": 0.9451535012073129,
|
19186 |
+
"grad_norm": 1.4585797786712646,
|
19187 |
+
"learning_rate": 7.941001282032512e-07,
|
19188 |
+
"loss": 9.4683,
|
19189 |
+
"step": 2740
|
19190 |
+
},
|
19191 |
+
{
|
19192 |
+
"epoch": 0.9454984477406002,
|
19193 |
+
"grad_norm": 1.433382272720337,
|
19194 |
+
"learning_rate": 7.841689680935349e-07,
|
19195 |
+
"loss": 9.4656,
|
19196 |
+
"step": 2741
|
19197 |
+
},
|
19198 |
+
{
|
19199 |
+
"epoch": 0.9458433942738875,
|
19200 |
+
"grad_norm": 1.298423409461975,
|
19201 |
+
"learning_rate": 7.742998089883102e-07,
|
19202 |
+
"loss": 9.4483,
|
19203 |
+
"step": 2742
|
19204 |
+
},
|
19205 |
+
{
|
19206 |
+
"epoch": 0.9461883408071748,
|
19207 |
+
"grad_norm": 1.4828376770019531,
|
19208 |
+
"learning_rate": 7.644926633205208e-07,
|
19209 |
+
"loss": 9.4068,
|
19210 |
+
"step": 2743
|
19211 |
+
},
|
19212 |
+
{
|
19213 |
+
"epoch": 0.9465332873404623,
|
19214 |
+
"grad_norm": 1.4661272764205933,
|
19215 |
+
"learning_rate": 7.547475434449835e-07,
|
19216 |
+
"loss": 9.4218,
|
19217 |
+
"step": 2744
|
19218 |
+
},
|
19219 |
+
{
|
19220 |
+
"epoch": 0.9468782338737496,
|
19221 |
+
"grad_norm": 1.4254372119903564,
|
19222 |
+
"learning_rate": 7.450644616383951e-07,
|
19223 |
+
"loss": 9.4599,
|
19224 |
+
"step": 2745
|
19225 |
+
},
|
19226 |
+
{
|
19227 |
+
"epoch": 0.9472231804070369,
|
19228 |
+
"grad_norm": 1.6096229553222656,
|
19229 |
+
"learning_rate": 7.354434300992752e-07,
|
19230 |
+
"loss": 9.4529,
|
19231 |
+
"step": 2746
|
19232 |
+
},
|
19233 |
+
{
|
19234 |
+
"epoch": 0.9475681269403242,
|
19235 |
+
"grad_norm": 1.6057524681091309,
|
19236 |
+
"learning_rate": 7.258844609479953e-07,
|
19237 |
+
"loss": 9.3971,
|
19238 |
+
"step": 2747
|
19239 |
+
},
|
19240 |
+
{
|
19241 |
+
"epoch": 0.9479130734736116,
|
19242 |
+
"grad_norm": 1.5125082731246948,
|
19243 |
+
"learning_rate": 7.163875662267117e-07,
|
19244 |
+
"loss": 9.417,
|
19245 |
+
"step": 2748
|
19246 |
+
},
|
19247 |
+
{
|
19248 |
+
"epoch": 0.948258020006899,
|
19249 |
+
"grad_norm": 1.7600480318069458,
|
19250 |
+
"learning_rate": 7.069527578994151e-07,
|
19251 |
+
"loss": 9.2774,
|
19252 |
+
"step": 2749
|
19253 |
+
},
|
19254 |
+
{
|
19255 |
+
"epoch": 0.9486029665401863,
|
19256 |
+
"grad_norm": 1.6905750036239624,
|
19257 |
+
"learning_rate": 6.975800478518646e-07,
|
19258 |
+
"loss": 9.3521,
|
19259 |
+
"step": 2750
|
19260 |
+
},
|
19261 |
+
{
|
19262 |
+
"epoch": 0.9489479130734736,
|
19263 |
+
"grad_norm": 0.99737948179245,
|
19264 |
+
"learning_rate": 6.88269447891593e-07,
|
19265 |
+
"loss": 9.533,
|
19266 |
+
"step": 2751
|
19267 |
+
},
|
19268 |
+
{
|
19269 |
+
"epoch": 0.9492928596067609,
|
19270 |
+
"grad_norm": 0.9953950643539429,
|
19271 |
+
"learning_rate": 6.790209697478789e-07,
|
19272 |
+
"loss": 9.5993,
|
19273 |
+
"step": 2752
|
19274 |
+
},
|
19275 |
+
{
|
19276 |
+
"epoch": 0.9496378061400483,
|
19277 |
+
"grad_norm": 1.0281240940093994,
|
19278 |
+
"learning_rate": 6.698346250717524e-07,
|
19279 |
+
"loss": 9.5267,
|
19280 |
+
"step": 2753
|
19281 |
+
},
|
19282 |
+
{
|
19283 |
+
"epoch": 0.9499827526733357,
|
19284 |
+
"grad_norm": 1.020925760269165,
|
19285 |
+
"learning_rate": 6.607104254359675e-07,
|
19286 |
+
"loss": 9.5643,
|
19287 |
+
"step": 2754
|
19288 |
+
},
|
19289 |
+
{
|
19290 |
+
"epoch": 0.950327699206623,
|
19291 |
+
"grad_norm": 1.0711194276809692,
|
19292 |
+
"learning_rate": 6.516483823349795e-07,
|
19293 |
+
"loss": 9.5639,
|
19294 |
+
"step": 2755
|
19295 |
+
},
|
19296 |
+
{
|
19297 |
+
"epoch": 0.9506726457399103,
|
19298 |
+
"grad_norm": 1.0857421159744263,
|
19299 |
+
"learning_rate": 6.426485071849564e-07,
|
19300 |
+
"loss": 9.4995,
|
19301 |
+
"step": 2756
|
19302 |
+
},
|
19303 |
+
{
|
19304 |
+
"epoch": 0.9510175922731977,
|
19305 |
+
"grad_norm": 1.262204885482788,
|
19306 |
+
"learning_rate": 6.337108113237344e-07,
|
19307 |
+
"loss": 9.4864,
|
19308 |
+
"step": 2757
|
19309 |
+
},
|
19310 |
+
{
|
19311 |
+
"epoch": 0.951362538806485,
|
19312 |
+
"grad_norm": 1.0766891241073608,
|
19313 |
+
"learning_rate": 6.248353060108292e-07,
|
19314 |
+
"loss": 9.5252,
|
19315 |
+
"step": 2758
|
19316 |
+
},
|
19317 |
+
{
|
19318 |
+
"epoch": 0.9517074853397723,
|
19319 |
+
"grad_norm": 1.153833031654358,
|
19320 |
+
"learning_rate": 6.160220024273966e-07,
|
19321 |
+
"loss": 9.5178,
|
19322 |
+
"step": 2759
|
19323 |
+
},
|
19324 |
+
{
|
19325 |
+
"epoch": 0.9520524318730597,
|
19326 |
+
"grad_norm": 1.1806145906448364,
|
19327 |
+
"learning_rate": 6.072709116762442e-07,
|
19328 |
+
"loss": 9.538,
|
19329 |
+
"step": 2760
|
19330 |
+
},
|
19331 |
+
{
|
19332 |
+
"epoch": 0.952397378406347,
|
19333 |
+
"grad_norm": 1.146377682685852,
|
19334 |
+
"learning_rate": 5.98582044781798e-07,
|
19335 |
+
"loss": 9.534,
|
19336 |
+
"step": 2761
|
19337 |
+
},
|
19338 |
+
{
|
19339 |
+
"epoch": 0.9527423249396344,
|
19340 |
+
"grad_norm": 1.1945569515228271,
|
19341 |
+
"learning_rate": 5.899554126901075e-07,
|
19342 |
+
"loss": 9.5208,
|
19343 |
+
"step": 2762
|
19344 |
+
},
|
19345 |
+
{
|
19346 |
+
"epoch": 0.9530872714729217,
|
19347 |
+
"grad_norm": 1.286086916923523,
|
19348 |
+
"learning_rate": 5.813910262687905e-07,
|
19349 |
+
"loss": 9.4976,
|
19350 |
+
"step": 2763
|
19351 |
+
},
|
19352 |
+
{
|
19353 |
+
"epoch": 0.953432218006209,
|
19354 |
+
"grad_norm": 1.2750720977783203,
|
19355 |
+
"learning_rate": 5.728888963070945e-07,
|
19356 |
+
"loss": 9.5186,
|
19357 |
+
"step": 2764
|
19358 |
+
},
|
19359 |
+
{
|
19360 |
+
"epoch": 0.9537771645394963,
|
19361 |
+
"grad_norm": 1.2140814065933228,
|
19362 |
+
"learning_rate": 5.644490335157959e-07,
|
19363 |
+
"loss": 9.4896,
|
19364 |
+
"step": 2765
|
19365 |
+
},
|
19366 |
+
{
|
19367 |
+
"epoch": 0.9541221110727838,
|
19368 |
+
"grad_norm": 1.1700353622436523,
|
19369 |
+
"learning_rate": 5.560714485272512e-07,
|
19370 |
+
"loss": 9.6212,
|
19371 |
+
"step": 2766
|
19372 |
+
},
|
19373 |
+
{
|
19374 |
+
"epoch": 0.9544670576060711,
|
19375 |
+
"grad_norm": 1.3271950483322144,
|
19376 |
+
"learning_rate": 5.477561518953566e-07,
|
19377 |
+
"loss": 9.4967,
|
19378 |
+
"step": 2767
|
19379 |
+
},
|
19380 |
+
{
|
19381 |
+
"epoch": 0.9548120041393584,
|
19382 |
+
"grad_norm": 1.1459087133407593,
|
19383 |
+
"learning_rate": 5.395031540955275e-07,
|
19384 |
+
"loss": 9.5539,
|
19385 |
+
"step": 2768
|
19386 |
+
},
|
19387 |
+
{
|
19388 |
+
"epoch": 0.9551569506726457,
|
19389 |
+
"grad_norm": 1.122616171836853,
|
19390 |
+
"learning_rate": 5.313124655247192e-07,
|
19391 |
+
"loss": 9.5894,
|
19392 |
+
"step": 2769
|
19393 |
+
},
|
19394 |
+
{
|
19395 |
+
"epoch": 0.955501897205933,
|
19396 |
+
"grad_norm": 1.3789931535720825,
|
19397 |
+
"learning_rate": 5.231840965013668e-07,
|
19398 |
+
"loss": 9.4521,
|
19399 |
+
"step": 2770
|
19400 |
+
},
|
19401 |
+
{
|
19402 |
+
"epoch": 0.9558468437392205,
|
19403 |
+
"grad_norm": 1.2291425466537476,
|
19404 |
+
"learning_rate": 5.151180572654235e-07,
|
19405 |
+
"loss": 9.5169,
|
19406 |
+
"step": 2771
|
19407 |
+
},
|
19408 |
+
{
|
19409 |
+
"epoch": 0.9561917902725078,
|
19410 |
+
"grad_norm": 1.1856980323791504,
|
19411 |
+
"learning_rate": 5.071143579782889e-07,
|
19412 |
+
"loss": 9.4942,
|
19413 |
+
"step": 2772
|
19414 |
+
},
|
19415 |
+
{
|
19416 |
+
"epoch": 0.9565367368057951,
|
19417 |
+
"grad_norm": 1.2218326330184937,
|
19418 |
+
"learning_rate": 4.99173008722853e-07,
|
19419 |
+
"loss": 9.55,
|
19420 |
+
"step": 2773
|
19421 |
+
},
|
19422 |
+
{
|
19423 |
+
"epoch": 0.9568816833390824,
|
19424 |
+
"grad_norm": 1.2583118677139282,
|
19425 |
+
"learning_rate": 4.91294019503441e-07,
|
19426 |
+
"loss": 9.538,
|
19427 |
+
"step": 2774
|
19428 |
+
},
|
19429 |
+
{
|
19430 |
+
"epoch": 0.9572266298723697,
|
19431 |
+
"grad_norm": 1.3726310729980469,
|
19432 |
+
"learning_rate": 4.834774002458409e-07,
|
19433 |
+
"loss": 9.4831,
|
19434 |
+
"step": 2775
|
19435 |
+
},
|
19436 |
+
{
|
19437 |
+
"epoch": 0.9575715764056572,
|
19438 |
+
"grad_norm": 1.3480925559997559,
|
19439 |
+
"learning_rate": 4.757231607972534e-07,
|
19440 |
+
"loss": 9.4674,
|
19441 |
+
"step": 2776
|
19442 |
+
},
|
19443 |
+
{
|
19444 |
+
"epoch": 0.9579165229389445,
|
19445 |
+
"grad_norm": 1.2078763246536255,
|
19446 |
+
"learning_rate": 4.680313109262813e-07,
|
19447 |
+
"loss": 9.4641,
|
19448 |
+
"step": 2777
|
19449 |
+
},
|
19450 |
+
{
|
19451 |
+
"epoch": 0.9582614694722318,
|
19452 |
+
"grad_norm": 1.2556449174880981,
|
19453 |
+
"learning_rate": 4.6040186032296206e-07,
|
19454 |
+
"loss": 9.4801,
|
19455 |
+
"step": 2778
|
19456 |
+
},
|
19457 |
+
{
|
19458 |
+
"epoch": 0.9586064160055191,
|
19459 |
+
"grad_norm": 1.2832894325256348,
|
19460 |
+
"learning_rate": 4.5283481859869635e-07,
|
19461 |
+
"loss": 9.5116,
|
19462 |
+
"step": 2779
|
19463 |
+
},
|
19464 |
+
{
|
19465 |
+
"epoch": 0.9589513625388065,
|
19466 |
+
"grad_norm": 1.3231313228607178,
|
19467 |
+
"learning_rate": 4.4533019528628093e-07,
|
19468 |
+
"loss": 9.4913,
|
19469 |
+
"step": 2780
|
19470 |
+
},
|
19471 |
+
{
|
19472 |
+
"epoch": 0.9592963090720938,
|
19473 |
+
"grad_norm": 1.2611724138259888,
|
19474 |
+
"learning_rate": 4.3788799983986997e-07,
|
19475 |
+
"loss": 9.521,
|
19476 |
+
"step": 2781
|
19477 |
+
},
|
19478 |
+
{
|
19479 |
+
"epoch": 0.9596412556053812,
|
19480 |
+
"grad_norm": 1.2805002927780151,
|
19481 |
+
"learning_rate": 4.305082416349804e-07,
|
19482 |
+
"loss": 9.502,
|
19483 |
+
"step": 2782
|
19484 |
+
},
|
19485 |
+
{
|
19486 |
+
"epoch": 0.9599862021386685,
|
19487 |
+
"grad_norm": 1.3639451265335083,
|
19488 |
+
"learning_rate": 4.231909299684533e-07,
|
19489 |
+
"loss": 9.4736,
|
19490 |
+
"step": 2783
|
19491 |
+
},
|
19492 |
+
{
|
19493 |
+
"epoch": 0.9603311486719558,
|
19494 |
+
"grad_norm": 1.4248766899108887,
|
19495 |
+
"learning_rate": 4.159360740584817e-07,
|
19496 |
+
"loss": 9.5154,
|
19497 |
+
"step": 2784
|
19498 |
+
},
|
19499 |
+
{
|
19500 |
+
"epoch": 0.9606760952052432,
|
19501 |
+
"grad_norm": 1.5050346851348877,
|
19502 |
+
"learning_rate": 4.0874368304457676e-07,
|
19503 |
+
"loss": 9.4806,
|
19504 |
+
"step": 2785
|
19505 |
+
},
|
19506 |
+
{
|
19507 |
+
"epoch": 0.9610210417385305,
|
19508 |
+
"grad_norm": 1.28929603099823,
|
19509 |
+
"learning_rate": 4.016137659875463e-07,
|
19510 |
+
"loss": 9.5005,
|
19511 |
+
"step": 2786
|
19512 |
+
},
|
19513 |
+
{
|
19514 |
+
"epoch": 0.9613659882718179,
|
19515 |
+
"grad_norm": 1.1755974292755127,
|
19516 |
+
"learning_rate": 3.945463318695053e-07,
|
19517 |
+
"loss": 9.4845,
|
19518 |
+
"step": 2787
|
19519 |
+
},
|
19520 |
+
{
|
19521 |
+
"epoch": 0.9617109348051052,
|
19522 |
+
"grad_norm": 1.4888191223144531,
|
19523 |
+
"learning_rate": 3.8754138959383733e-07,
|
19524 |
+
"loss": 9.4333,
|
19525 |
+
"step": 2788
|
19526 |
+
},
|
19527 |
+
{
|
19528 |
+
"epoch": 0.9620558813383926,
|
19529 |
+
"grad_norm": 1.3838120698928833,
|
19530 |
+
"learning_rate": 3.805989479852279e-07,
|
19531 |
+
"loss": 9.4622,
|
19532 |
+
"step": 2789
|
19533 |
+
},
|
19534 |
+
{
|
19535 |
+
"epoch": 0.9624008278716799,
|
19536 |
+
"grad_norm": 1.3965057134628296,
|
19537 |
+
"learning_rate": 3.7371901578959756e-07,
|
19538 |
+
"loss": 9.4135,
|
19539 |
+
"step": 2790
|
19540 |
+
},
|
19541 |
+
{
|
19542 |
+
"epoch": 0.9627457744049672,
|
19543 |
+
"grad_norm": 1.3354548215866089,
|
19544 |
+
"learning_rate": 3.6690160167413554e-07,
|
19545 |
+
"loss": 9.4748,
|
19546 |
+
"step": 2791
|
19547 |
+
},
|
19548 |
+
{
|
19549 |
+
"epoch": 0.9630907209382545,
|
19550 |
+
"grad_norm": 1.4212675094604492,
|
19551 |
+
"learning_rate": 3.6014671422727185e-07,
|
19552 |
+
"loss": 9.4848,
|
19553 |
+
"step": 2792
|
19554 |
+
},
|
19555 |
+
{
|
19556 |
+
"epoch": 0.9634356674715419,
|
19557 |
+
"grad_norm": 1.4828577041625977,
|
19558 |
+
"learning_rate": 3.5345436195866053e-07,
|
19559 |
+
"loss": 9.4245,
|
19560 |
+
"step": 2793
|
19561 |
+
},
|
19562 |
+
{
|
19563 |
+
"epoch": 0.9637806140048293,
|
19564 |
+
"grad_norm": 1.4509541988372803,
|
19565 |
+
"learning_rate": 3.468245532991743e-07,
|
19566 |
+
"loss": 9.4091,
|
19567 |
+
"step": 2794
|
19568 |
+
},
|
19569 |
+
{
|
19570 |
+
"epoch": 0.9641255605381166,
|
19571 |
+
"grad_norm": 1.5361698865890503,
|
19572 |
+
"learning_rate": 3.4025729660089877e-07,
|
19573 |
+
"loss": 9.4698,
|
19574 |
+
"step": 2795
|
19575 |
+
},
|
19576 |
+
{
|
19577 |
+
"epoch": 0.9644705070714039,
|
19578 |
+
"grad_norm": 1.5707025527954102,
|
19579 |
+
"learning_rate": 3.3375260013711604e-07,
|
19580 |
+
"loss": 9.4206,
|
19581 |
+
"step": 2796
|
19582 |
+
},
|
19583 |
+
{
|
19584 |
+
"epoch": 0.9648154536046912,
|
19585 |
+
"grad_norm": 1.47184157371521,
|
19586 |
+
"learning_rate": 3.273104721023046e-07,
|
19587 |
+
"loss": 9.3935,
|
19588 |
+
"step": 2797
|
19589 |
+
},
|
19590 |
+
{
|
19591 |
+
"epoch": 0.9651604001379787,
|
19592 |
+
"grad_norm": 1.5686029195785522,
|
19593 |
+
"learning_rate": 3.209309206121058e-07,
|
19594 |
+
"loss": 9.3636,
|
19595 |
+
"step": 2798
|
19596 |
+
},
|
19597 |
+
{
|
19598 |
+
"epoch": 0.965505346671266,
|
19599 |
+
"grad_norm": 1.5438534021377563,
|
19600 |
+
"learning_rate": 3.1461395370334104e-07,
|
19601 |
+
"loss": 9.3861,
|
19602 |
+
"step": 2799
|
19603 |
+
},
|
19604 |
+
{
|
19605 |
+
"epoch": 0.9658502932045533,
|
19606 |
+
"grad_norm": 1.6539479494094849,
|
19607 |
+
"learning_rate": 3.0835957933397773e-07,
|
19608 |
+
"loss": 9.398,
|
19609 |
+
"step": 2800
|
19610 |
+
},
|
19611 |
+
{
|
19612 |
+
"epoch": 0.9661952397378406,
|
19613 |
+
"grad_norm": 0.774260401725769,
|
19614 |
+
"learning_rate": 3.0216780538314116e-07,
|
19615 |
+
"loss": 9.6004,
|
19616 |
+
"step": 2801
|
19617 |
+
},
|
19618 |
+
{
|
19619 |
+
"epoch": 0.9665401862711279,
|
19620 |
+
"grad_norm": 0.9886148571968079,
|
19621 |
+
"learning_rate": 2.960386396510972e-07,
|
19622 |
+
"loss": 9.5417,
|
19623 |
+
"step": 2802
|
19624 |
+
},
|
19625 |
+
{
|
19626 |
+
"epoch": 0.9668851328044153,
|
19627 |
+
"grad_norm": 1.0178803205490112,
|
19628 |
+
"learning_rate": 2.8997208985921953e-07,
|
19629 |
+
"loss": 9.5666,
|
19630 |
+
"step": 2803
|
19631 |
+
},
|
19632 |
+
{
|
19633 |
+
"epoch": 0.9672300793377027,
|
19634 |
+
"grad_norm": 1.0676816701889038,
|
19635 |
+
"learning_rate": 2.8396816365001687e-07,
|
19636 |
+
"loss": 9.499,
|
19637 |
+
"step": 2804
|
19638 |
+
},
|
19639 |
+
{
|
19640 |
+
"epoch": 0.96757502587099,
|
19641 |
+
"grad_norm": 1.0291966199874878,
|
19642 |
+
"learning_rate": 2.7802686858710016e-07,
|
19643 |
+
"loss": 9.5364,
|
19644 |
+
"step": 2805
|
19645 |
+
},
|
19646 |
+
{
|
19647 |
+
"epoch": 0.9679199724042773,
|
19648 |
+
"grad_norm": 1.100340723991394,
|
19649 |
+
"learning_rate": 2.7214821215518214e-07,
|
19650 |
+
"loss": 9.5105,
|
19651 |
+
"step": 2806
|
19652 |
+
},
|
19653 |
+
{
|
19654 |
+
"epoch": 0.9682649189375647,
|
19655 |
+
"grad_norm": 1.1685614585876465,
|
19656 |
+
"learning_rate": 2.6633220176006667e-07,
|
19657 |
+
"loss": 9.5038,
|
19658 |
+
"step": 2807
|
19659 |
+
},
|
19660 |
+
{
|
19661 |
+
"epoch": 0.968609865470852,
|
19662 |
+
"grad_norm": 1.1775288581848145,
|
19663 |
+
"learning_rate": 2.6057884472862617e-07,
|
19664 |
+
"loss": 9.5383,
|
19665 |
+
"step": 2808
|
19666 |
+
},
|
19667 |
+
{
|
19668 |
+
"epoch": 0.9689548120041394,
|
19669 |
+
"grad_norm": 1.2399282455444336,
|
19670 |
+
"learning_rate": 2.548881483088128e-07,
|
19671 |
+
"loss": 9.4643,
|
19672 |
+
"step": 2809
|
19673 |
+
},
|
19674 |
+
{
|
19675 |
+
"epoch": 0.9692997585374267,
|
19676 |
+
"grad_norm": 1.2756638526916504,
|
19677 |
+
"learning_rate": 2.49260119669642e-07,
|
19678 |
+
"loss": 9.4916,
|
19679 |
+
"step": 2810
|
19680 |
+
},
|
19681 |
+
{
|
19682 |
+
"epoch": 0.969644705070714,
|
19683 |
+
"grad_norm": 1.3724730014801025,
|
19684 |
+
"learning_rate": 2.4369476590118123e-07,
|
19685 |
+
"loss": 9.4872,
|
19686 |
+
"step": 2811
|
19687 |
+
},
|
19688 |
+
{
|
19689 |
+
"epoch": 0.9699896516040014,
|
19690 |
+
"grad_norm": 1.1623852252960205,
|
19691 |
+
"learning_rate": 2.381920940145277e-07,
|
19692 |
+
"loss": 9.5659,
|
19693 |
+
"step": 2812
|
19694 |
+
},
|
19695 |
+
{
|
19696 |
+
"epoch": 0.9703345981372887,
|
19697 |
+
"grad_norm": 1.3232759237289429,
|
19698 |
+
"learning_rate": 2.3275211094183623e-07,
|
19699 |
+
"loss": 9.4726,
|
19700 |
+
"step": 2813
|
19701 |
+
},
|
19702 |
+
{
|
19703 |
+
"epoch": 0.970679544670576,
|
19704 |
+
"grad_norm": 1.105460286140442,
|
19705 |
+
"learning_rate": 2.2737482353626937e-07,
|
19706 |
+
"loss": 9.5364,
|
19707 |
+
"step": 2814
|
19708 |
+
},
|
19709 |
+
{
|
19710 |
+
"epoch": 0.9710244912038634,
|
19711 |
+
"grad_norm": 1.130436897277832,
|
19712 |
+
"learning_rate": 2.2206023857201385e-07,
|
19713 |
+
"loss": 9.5565,
|
19714 |
+
"step": 2815
|
19715 |
+
},
|
19716 |
+
{
|
19717 |
+
"epoch": 0.9713694377371508,
|
19718 |
+
"grad_norm": 1.2147849798202515,
|
19719 |
+
"learning_rate": 2.1680836274426962e-07,
|
19720 |
+
"loss": 9.5194,
|
19721 |
+
"step": 2816
|
19722 |
+
},
|
19723 |
+
{
|
19724 |
+
"epoch": 0.9717143842704381,
|
19725 |
+
"grad_norm": 1.2784674167633057,
|
19726 |
+
"learning_rate": 2.1161920266922763e-07,
|
19727 |
+
"loss": 9.4549,
|
19728 |
+
"step": 2817
|
19729 |
+
},
|
19730 |
+
{
|
19731 |
+
"epoch": 0.9720593308037254,
|
19732 |
+
"grad_norm": 1.3010627031326294,
|
19733 |
+
"learning_rate": 2.0649276488408086e-07,
|
19734 |
+
"loss": 9.4602,
|
19735 |
+
"step": 2818
|
19736 |
+
},
|
19737 |
+
{
|
19738 |
+
"epoch": 0.9724042773370127,
|
19739 |
+
"grad_norm": 1.2807101011276245,
|
19740 |
+
"learning_rate": 2.014290558469911e-07,
|
19741 |
+
"loss": 9.5456,
|
19742 |
+
"step": 2819
|
19743 |
+
},
|
19744 |
+
{
|
19745 |
+
"epoch": 0.9727492238703,
|
19746 |
+
"grad_norm": 1.301635503768921,
|
19747 |
+
"learning_rate": 1.964280819371167e-07,
|
19748 |
+
"loss": 9.5044,
|
19749 |
+
"step": 2820
|
19750 |
+
},
|
19751 |
+
{
|
19752 |
+
"epoch": 0.9730941704035875,
|
19753 |
+
"grad_norm": 1.1405683755874634,
|
19754 |
+
"learning_rate": 1.914898494545736e-07,
|
19755 |
+
"loss": 9.5604,
|
19756 |
+
"step": 2821
|
19757 |
+
},
|
19758 |
+
{
|
19759 |
+
"epoch": 0.9734391169368748,
|
19760 |
+
"grad_norm": 1.261732816696167,
|
19761 |
+
"learning_rate": 1.8661436462042437e-07,
|
19762 |
+
"loss": 9.4851,
|
19763 |
+
"step": 2822
|
19764 |
+
},
|
19765 |
+
{
|
19766 |
+
"epoch": 0.9737840634701621,
|
19767 |
+
"grad_norm": 1.2781423330307007,
|
19768 |
+
"learning_rate": 1.8180163357671143e-07,
|
19769 |
+
"loss": 9.5201,
|
19770 |
+
"step": 2823
|
19771 |
+
},
|
19772 |
+
{
|
19773 |
+
"epoch": 0.9741290100034494,
|
19774 |
+
"grad_norm": 1.2555526494979858,
|
19775 |
+
"learning_rate": 1.7705166238639047e-07,
|
19776 |
+
"loss": 9.4532,
|
19777 |
+
"step": 2824
|
19778 |
+
},
|
19779 |
+
{
|
19780 |
+
"epoch": 0.9744739565367369,
|
19781 |
+
"grad_norm": 1.2804704904556274,
|
19782 |
+
"learning_rate": 1.7236445703338044e-07,
|
19783 |
+
"loss": 9.5293,
|
19784 |
+
"step": 2825
|
19785 |
+
},
|
19786 |
+
{
|
19787 |
+
"epoch": 0.9748189030700242,
|
19788 |
+
"grad_norm": 1.3036941289901733,
|
19789 |
+
"learning_rate": 1.677400234225135e-07,
|
19790 |
+
"loss": 9.536,
|
19791 |
+
"step": 2826
|
19792 |
+
},
|
19793 |
+
{
|
19794 |
+
"epoch": 0.9751638496033115,
|
19795 |
+
"grad_norm": 1.2297347784042358,
|
19796 |
+
"learning_rate": 1.6317836737955172e-07,
|
19797 |
+
"loss": 9.496,
|
19798 |
+
"step": 2827
|
19799 |
+
},
|
19800 |
+
{
|
19801 |
+
"epoch": 0.9755087961365988,
|
19802 |
+
"grad_norm": 1.2707699537277222,
|
19803 |
+
"learning_rate": 1.586794946511594e-07,
|
19804 |
+
"loss": 9.4632,
|
19805 |
+
"step": 2828
|
19806 |
+
},
|
19807 |
+
{
|
19808 |
+
"epoch": 0.9758537426698861,
|
19809 |
+
"grad_norm": 1.3694789409637451,
|
19810 |
+
"learning_rate": 1.542434109049251e-07,
|
19811 |
+
"loss": 9.4874,
|
19812 |
+
"step": 2829
|
19813 |
+
},
|
19814 |
+
{
|
19815 |
+
"epoch": 0.9761986892031735,
|
19816 |
+
"grad_norm": 1.3027503490447998,
|
19817 |
+
"learning_rate": 1.4987012172932302e-07,
|
19818 |
+
"loss": 9.4417,
|
19819 |
+
"step": 2830
|
19820 |
+
},
|
19821 |
+
{
|
19822 |
+
"epoch": 0.9765436357364609,
|
19823 |
+
"grad_norm": 1.300711750984192,
|
19824 |
+
"learning_rate": 1.4555963263372385e-07,
|
19825 |
+
"loss": 9.5223,
|
19826 |
+
"step": 2831
|
19827 |
+
},
|
19828 |
+
{
|
19829 |
+
"epoch": 0.9768885822697482,
|
19830 |
+
"grad_norm": 1.29239821434021,
|
19831 |
+
"learning_rate": 1.413119490483894e-07,
|
19832 |
+
"loss": 9.4713,
|
19833 |
+
"step": 2832
|
19834 |
+
},
|
19835 |
+
{
|
19836 |
+
"epoch": 0.9772335288030355,
|
19837 |
+
"grad_norm": 1.3773186206817627,
|
19838 |
+
"learning_rate": 1.3712707632445032e-07,
|
19839 |
+
"loss": 9.4819,
|
19840 |
+
"step": 2833
|
19841 |
+
},
|
19842 |
+
{
|
19843 |
+
"epoch": 0.9775784753363229,
|
19844 |
+
"grad_norm": 1.4483076333999634,
|
19845 |
+
"learning_rate": 1.3300501973392277e-07,
|
19846 |
+
"loss": 9.4047,
|
19847 |
+
"step": 2834
|
19848 |
+
},
|
19849 |
+
{
|
19850 |
+
"epoch": 0.9779234218696102,
|
19851 |
+
"grad_norm": 1.4170414209365845,
|
19852 |
+
"learning_rate": 1.2894578446968065e-07,
|
19853 |
+
"loss": 9.5096,
|
19854 |
+
"step": 2835
|
19855 |
+
},
|
19856 |
+
{
|
19857 |
+
"epoch": 0.9782683684028975,
|
19858 |
+
"grad_norm": 1.5087085962295532,
|
19859 |
+
"learning_rate": 1.2494937564545562e-07,
|
19860 |
+
"loss": 9.4874,
|
19861 |
+
"step": 2836
|
19862 |
+
},
|
19863 |
+
{
|
19864 |
+
"epoch": 0.9786133149361849,
|
19865 |
+
"grad_norm": 1.5026702880859375,
|
19866 |
+
"learning_rate": 1.2101579829583154e-07,
|
19867 |
+
"loss": 9.4319,
|
19868 |
+
"step": 2837
|
19869 |
+
},
|
19870 |
+
{
|
19871 |
+
"epoch": 0.9789582614694722,
|
19872 |
+
"grad_norm": 1.3315801620483398,
|
19873 |
+
"learning_rate": 1.1714505737625004e-07,
|
19874 |
+
"loss": 9.4616,
|
19875 |
+
"step": 2838
|
19876 |
+
},
|
19877 |
+
{
|
19878 |
+
"epoch": 0.9793032080027596,
|
19879 |
+
"grad_norm": 1.5373655557632446,
|
19880 |
+
"learning_rate": 1.133371577629716e-07,
|
19881 |
+
"loss": 9.4924,
|
19882 |
+
"step": 2839
|
19883 |
+
},
|
19884 |
+
{
|
19885 |
+
"epoch": 0.9796481545360469,
|
19886 |
+
"grad_norm": 1.4027314186096191,
|
19887 |
+
"learning_rate": 1.095921042531145e-07,
|
19888 |
+
"loss": 9.3959,
|
19889 |
+
"step": 2840
|
19890 |
+
},
|
19891 |
+
{
|
19892 |
+
"epoch": 0.9799931010693342,
|
19893 |
+
"grad_norm": 1.4827989339828491,
|
19894 |
+
"learning_rate": 1.0590990156461034e-07,
|
19895 |
+
"loss": 9.4222,
|
19896 |
+
"step": 2841
|
19897 |
+
},
|
19898 |
+
{
|
19899 |
+
"epoch": 0.9803380476026216,
|
19900 |
+
"grad_norm": 1.365110158920288,
|
19901 |
+
"learning_rate": 1.022905543362096e-07,
|
19902 |
+
"loss": 9.4457,
|
19903 |
+
"step": 2842
|
19904 |
+
},
|
19905 |
+
{
|
19906 |
+
"epoch": 0.980682994135909,
|
19907 |
+
"grad_norm": 1.4800422191619873,
|
19908 |
+
"learning_rate": 9.873406712749279e-08,
|
19909 |
+
"loss": 9.4845,
|
19910 |
+
"step": 2843
|
19911 |
+
},
|
19912 |
+
{
|
19913 |
+
"epoch": 0.9810279406691963,
|
19914 |
+
"grad_norm": 1.482553243637085,
|
19915 |
+
"learning_rate": 9.524044441883706e-08,
|
19916 |
+
"loss": 9.4586,
|
19917 |
+
"step": 2844
|
19918 |
+
},
|
19919 |
+
{
|
19920 |
+
"epoch": 0.9813728872024836,
|
19921 |
+
"grad_norm": 1.5155751705169678,
|
19922 |
+
"learning_rate": 9.180969061143852e-08,
|
19923 |
+
"loss": 9.4402,
|
19924 |
+
"step": 2845
|
19925 |
+
},
|
19926 |
+
{
|
19927 |
+
"epoch": 0.9817178337357709,
|
19928 |
+
"grad_norm": 1.5355010032653809,
|
19929 |
+
"learning_rate": 8.844181002727325e-08,
|
19930 |
+
"loss": 9.4685,
|
19931 |
+
"step": 2846
|
19932 |
+
},
|
19933 |
+
{
|
19934 |
+
"epoch": 0.9820627802690582,
|
19935 |
+
"grad_norm": 1.5655204057693481,
|
19936 |
+
"learning_rate": 8.513680690913073e-08,
|
19937 |
+
"loss": 9.4197,
|
19938 |
+
"step": 2847
|
19939 |
+
},
|
19940 |
+
{
|
19941 |
+
"epoch": 0.9824077268023457,
|
19942 |
+
"grad_norm": 1.5435409545898438,
|
19943 |
+
"learning_rate": 8.189468542057488e-08,
|
19944 |
+
"loss": 9.4517,
|
19945 |
+
"step": 2848
|
19946 |
+
},
|
19947 |
+
{
|
19948 |
+
"epoch": 0.982752673335633,
|
19949 |
+
"grad_norm": 1.6485852003097534,
|
19950 |
+
"learning_rate": 7.871544964596633e-08,
|
19951 |
+
"loss": 9.4181,
|
19952 |
+
"step": 2849
|
19953 |
+
},
|
19954 |
+
{
|
19955 |
+
"epoch": 0.9830976198689203,
|
19956 |
+
"grad_norm": 1.6750664710998535,
|
19957 |
+
"learning_rate": 7.559910359042355e-08,
|
19958 |
+
"loss": 9.361,
|
19959 |
+
"step": 2850
|
19960 |
+
},
|
19961 |
+
{
|
19962 |
+
"epoch": 0.9834425664022076,
|
19963 |
+
"grad_norm": 0.9129282832145691,
|
19964 |
+
"learning_rate": 7.254565117985613e-08,
|
19965 |
+
"loss": 9.5804,
|
19966 |
+
"step": 2851
|
19967 |
+
},
|
19968 |
+
{
|
19969 |
+
"epoch": 0.983787512935495,
|
19970 |
+
"grad_norm": 1.0424609184265137,
|
19971 |
+
"learning_rate": 6.955509626093703e-08,
|
19972 |
+
"loss": 9.5371,
|
19973 |
+
"step": 2852
|
19974 |
+
},
|
19975 |
+
{
|
19976 |
+
"epoch": 0.9841324594687824,
|
19977 |
+
"grad_norm": 0.9472970366477966,
|
19978 |
+
"learning_rate": 6.662744260109155e-08,
|
19979 |
+
"loss": 9.6361,
|
19980 |
+
"step": 2853
|
19981 |
+
},
|
19982 |
+
{
|
19983 |
+
"epoch": 0.9844774060020697,
|
19984 |
+
"grad_norm": 1.1693047285079956,
|
19985 |
+
"learning_rate": 6.376269388852496e-08,
|
19986 |
+
"loss": 9.4819,
|
19987 |
+
"step": 2854
|
19988 |
+
},
|
19989 |
+
{
|
19990 |
+
"epoch": 0.984822352535357,
|
19991 |
+
"grad_norm": 0.99554044008255,
|
19992 |
+
"learning_rate": 6.096085373217264e-08,
|
19993 |
+
"loss": 9.5079,
|
19994 |
+
"step": 2855
|
19995 |
+
},
|
19996 |
+
{
|
19997 |
+
"epoch": 0.9851672990686443,
|
19998 |
+
"grad_norm": 0.9872909188270569,
|
19999 |
+
"learning_rate": 5.822192566173334e-08,
|
20000 |
+
"loss": 9.5455,
|
20001 |
+
"step": 2856
|
20002 |
+
},
|
20003 |
+
{
|
20004 |
+
"epoch": 0.9855122456019317,
|
20005 |
+
"grad_norm": 1.08110773563385,
|
20006 |
+
"learning_rate": 5.554591312765811e-08,
|
20007 |
+
"loss": 9.5288,
|
20008 |
+
"step": 2857
|
20009 |
+
},
|
20010 |
+
{
|
20011 |
+
"epoch": 0.985857192135219,
|
20012 |
+
"grad_norm": 1.1271926164627075,
|
20013 |
+
"learning_rate": 5.2932819501111395e-08,
|
20014 |
+
"loss": 9.5112,
|
20015 |
+
"step": 2858
|
20016 |
+
},
|
20017 |
+
{
|
20018 |
+
"epoch": 0.9862021386685064,
|
20019 |
+
"grad_norm": 1.2304562330245972,
|
20020 |
+
"learning_rate": 5.038264807402105e-08,
|
20021 |
+
"loss": 9.5027,
|
20022 |
+
"step": 2859
|
20023 |
+
},
|
20024 |
+
{
|
20025 |
+
"epoch": 0.9865470852017937,
|
20026 |
+
"grad_norm": 1.0815626382827759,
|
20027 |
+
"learning_rate": 4.789540205902832e-08,
|
20028 |
+
"loss": 9.5418,
|
20029 |
+
"step": 2860
|
20030 |
+
},
|
20031 |
+
{
|
20032 |
+
"epoch": 0.9868920317350811,
|
20033 |
+
"grad_norm": 1.279129147529602,
|
20034 |
+
"learning_rate": 4.547108458951566e-08,
|
20035 |
+
"loss": 9.5028,
|
20036 |
+
"step": 2861
|
20037 |
+
},
|
20038 |
+
{
|
20039 |
+
"epoch": 0.9872369782683684,
|
20040 |
+
"grad_norm": 1.1527676582336426,
|
20041 |
+
"learning_rate": 4.310969871958448e-08,
|
20042 |
+
"loss": 9.541,
|
20043 |
+
"step": 2862
|
20044 |
+
},
|
20045 |
+
{
|
20046 |
+
"epoch": 0.9875819248016557,
|
20047 |
+
"grad_norm": 1.260764241218567,
|
20048 |
+
"learning_rate": 4.0811247424049625e-08,
|
20049 |
+
"loss": 9.4835,
|
20050 |
+
"step": 2863
|
20051 |
+
},
|
20052 |
+
{
|
20053 |
+
"epoch": 0.9879268713349431,
|
20054 |
+
"grad_norm": 1.153011679649353,
|
20055 |
+
"learning_rate": 3.857573359845601e-08,
|
20056 |
+
"loss": 9.5011,
|
20057 |
+
"step": 2864
|
20058 |
+
},
|
20059 |
+
{
|
20060 |
+
"epoch": 0.9882718178682304,
|
20061 |
+
"grad_norm": 1.124871015548706,
|
20062 |
+
"learning_rate": 3.6403160059050865e-08,
|
20063 |
+
"loss": 9.5657,
|
20064 |
+
"step": 2865
|
20065 |
+
},
|
20066 |
+
{
|
20067 |
+
"epoch": 0.9886167644015178,
|
20068 |
+
"grad_norm": 1.1694023609161377,
|
20069 |
+
"learning_rate": 3.4293529542800406e-08,
|
20070 |
+
"loss": 9.4759,
|
20071 |
+
"step": 2866
|
20072 |
+
},
|
20073 |
+
{
|
20074 |
+
"epoch": 0.9889617109348051,
|
20075 |
+
"grad_norm": 1.1712218523025513,
|
20076 |
+
"learning_rate": 3.224684470735651e-08,
|
20077 |
+
"loss": 9.5351,
|
20078 |
+
"step": 2867
|
20079 |
+
},
|
20080 |
+
{
|
20081 |
+
"epoch": 0.9893066574680924,
|
20082 |
+
"grad_norm": 1.2313039302825928,
|
20083 |
+
"learning_rate": 3.0263108131095566e-08,
|
20084 |
+
"loss": 9.5316,
|
20085 |
+
"step": 2868
|
20086 |
}
|
20087 |
],
|
20088 |
"logging_steps": 1,
|
|
|
20102 |
"attributes": {}
|
20103 |
}
|
20104 |
},
|
20105 |
+
"total_flos": 595349320237056.0,
|
20106 |
"train_batch_size": 4,
|
20107 |
"trial_name": null,
|
20108 |
"trial_params": null
|