jonathanjordan21
commited on
Upload folder using huggingface_hub
Browse files- data/model.safetensors +1 -1
- data/optimizer.pt +1 -1
- data/rng_state.pth +1 -1
- data/scheduler.pt +1 -1
- data/trainer_state.json +2334 -3
data/model.safetensors
CHANGED
@@ -1,3 +1,3 @@
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
-
oid sha256:
|
3 |
size 576008736
|
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:3f7fc3027701fc162cacfe1f164f9286c19e2c312c8caa0ac162e5c2466bc3fa
|
3 |
size 576008736
|
data/optimizer.pt
CHANGED
@@ -1,3 +1,3 @@
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
-
oid sha256:
|
3 |
size 1152256984
|
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:fb8bef0278b83e5db495bff5d5d0002f9f7c872c67c02a1d8ad135e9b6ab6ca5
|
3 |
size 1152256984
|
data/rng_state.pth
CHANGED
@@ -1,3 +1,3 @@
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
-
oid sha256:
|
3 |
size 14244
|
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:bdbddfb9ed7da6ee0fba930abf6b54364e831087db6cbd0b392f9d8e545a0fb2
|
3 |
size 14244
|
data/scheduler.pt
CHANGED
@@ -1,3 +1,3 @@
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
-
oid sha256:
|
3 |
size 1064
|
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:e543c4939980849c5a85545ea803fa092b2cc6c74d0eae335c9796c38c00f741
|
3 |
size 1064
|
data/trainer_state.json
CHANGED
@@ -1,9 +1,9 @@
|
|
1 |
{
|
2 |
"best_metric": null,
|
3 |
"best_model_checkpoint": null,
|
4 |
-
"epoch": 0.
|
5 |
"eval_steps": 500,
|
6 |
-
"global_step":
|
7 |
"is_hyper_param_search": false,
|
8 |
"is_local_process_zero": true,
|
9 |
"is_world_process_zero": true,
|
@@ -9338,6 +9338,2337 @@
|
|
9338 |
"learning_rate": 2.9887914128987465e-05,
|
9339 |
"loss": 1.852,
|
9340 |
"step": 19995
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
9341 |
}
|
9342 |
],
|
9343 |
"logging_steps": 15,
|
@@ -9357,7 +11688,7 @@
|
|
9357 |
"attributes": {}
|
9358 |
}
|
9359 |
},
|
9360 |
-
"total_flos": 3.
|
9361 |
"train_batch_size": 4,
|
9362 |
"trial_name": null,
|
9363 |
"trial_params": null
|
|
|
1 |
{
|
2 |
"best_metric": null,
|
3 |
"best_model_checkpoint": null,
|
4 |
+
"epoch": 0.048683785183771554,
|
5 |
"eval_steps": 500,
|
6 |
+
"global_step": 25000,
|
7 |
"is_hyper_param_search": false,
|
8 |
"is_local_process_zero": true,
|
9 |
"is_world_process_zero": true,
|
|
|
9338 |
"learning_rate": 2.9887914128987465e-05,
|
9339 |
"loss": 1.852,
|
9340 |
"step": 19995
|
9341 |
+
},
|
9342 |
+
{
|
9343 |
+
"epoch": 0.03896650166109075,
|
9344 |
+
"grad_norm": 4.136464595794678,
|
9345 |
+
"learning_rate": 2.9887746105205264e-05,
|
9346 |
+
"loss": 1.8378,
|
9347 |
+
"step": 20010
|
9348 |
+
},
|
9349 |
+
{
|
9350 |
+
"epoch": 0.03899571193220101,
|
9351 |
+
"grad_norm": 3.3061976432800293,
|
9352 |
+
"learning_rate": 2.9887577956051263e-05,
|
9353 |
+
"loss": 1.8702,
|
9354 |
+
"step": 20025
|
9355 |
+
},
|
9356 |
+
{
|
9357 |
+
"epoch": 0.039024922203311274,
|
9358 |
+
"grad_norm": 3.5822839736938477,
|
9359 |
+
"learning_rate": 2.9887409681526876e-05,
|
9360 |
+
"loss": 1.87,
|
9361 |
+
"step": 20040
|
9362 |
+
},
|
9363 |
+
{
|
9364 |
+
"epoch": 0.03905413247442154,
|
9365 |
+
"grad_norm": 4.132532119750977,
|
9366 |
+
"learning_rate": 2.9887241281633518e-05,
|
9367 |
+
"loss": 2.051,
|
9368 |
+
"step": 20055
|
9369 |
+
},
|
9370 |
+
{
|
9371 |
+
"epoch": 0.0390833427455318,
|
9372 |
+
"grad_norm": 3.9095962047576904,
|
9373 |
+
"learning_rate": 2.9887072756372606e-05,
|
9374 |
+
"loss": 1.8271,
|
9375 |
+
"step": 20070
|
9376 |
+
},
|
9377 |
+
{
|
9378 |
+
"epoch": 0.039112553016642064,
|
9379 |
+
"grad_norm": 3.028393507003784,
|
9380 |
+
"learning_rate": 2.988690410574556e-05,
|
9381 |
+
"loss": 1.825,
|
9382 |
+
"step": 20085
|
9383 |
+
},
|
9384 |
+
{
|
9385 |
+
"epoch": 0.039141763287752325,
|
9386 |
+
"grad_norm": 1.7903603315353394,
|
9387 |
+
"learning_rate": 2.98867353297538e-05,
|
9388 |
+
"loss": 1.9565,
|
9389 |
+
"step": 20100
|
9390 |
+
},
|
9391 |
+
{
|
9392 |
+
"epoch": 0.03917097355886259,
|
9393 |
+
"grad_norm": 2.168895959854126,
|
9394 |
+
"learning_rate": 2.988656642839875e-05,
|
9395 |
+
"loss": 1.8483,
|
9396 |
+
"step": 20115
|
9397 |
+
},
|
9398 |
+
{
|
9399 |
+
"epoch": 0.039200183829972854,
|
9400 |
+
"grad_norm": 7.789179801940918,
|
9401 |
+
"learning_rate": 2.988639740168183e-05,
|
9402 |
+
"loss": 2.0627,
|
9403 |
+
"step": 20130
|
9404 |
+
},
|
9405 |
+
{
|
9406 |
+
"epoch": 0.039229394101083115,
|
9407 |
+
"grad_norm": 2.033658981323242,
|
9408 |
+
"learning_rate": 2.9886228249604464e-05,
|
9409 |
+
"loss": 1.8905,
|
9410 |
+
"step": 20145
|
9411 |
+
},
|
9412 |
+
{
|
9413 |
+
"epoch": 0.03925860437219338,
|
9414 |
+
"grad_norm": 3.6176931858062744,
|
9415 |
+
"learning_rate": 2.9886058972168076e-05,
|
9416 |
+
"loss": 1.8981,
|
9417 |
+
"step": 20160
|
9418 |
+
},
|
9419 |
+
{
|
9420 |
+
"epoch": 0.039287814643303644,
|
9421 |
+
"grad_norm": 3.776540517807007,
|
9422 |
+
"learning_rate": 2.9885889569374088e-05,
|
9423 |
+
"loss": 1.7417,
|
9424 |
+
"step": 20175
|
9425 |
+
},
|
9426 |
+
{
|
9427 |
+
"epoch": 0.039317024914413905,
|
9428 |
+
"grad_norm": 2.138796329498291,
|
9429 |
+
"learning_rate": 2.9885720041223934e-05,
|
9430 |
+
"loss": 1.8845,
|
9431 |
+
"step": 20190
|
9432 |
+
},
|
9433 |
+
{
|
9434 |
+
"epoch": 0.039346235185524166,
|
9435 |
+
"grad_norm": 5.0401482582092285,
|
9436 |
+
"learning_rate": 2.988555038771904e-05,
|
9437 |
+
"loss": 1.8966,
|
9438 |
+
"step": 20205
|
9439 |
+
},
|
9440 |
+
{
|
9441 |
+
"epoch": 0.039375445456634434,
|
9442 |
+
"grad_norm": 2.2886552810668945,
|
9443 |
+
"learning_rate": 2.9885380608860827e-05,
|
9444 |
+
"loss": 1.8898,
|
9445 |
+
"step": 20220
|
9446 |
+
},
|
9447 |
+
{
|
9448 |
+
"epoch": 0.039404655727744695,
|
9449 |
+
"grad_norm": 2.286959648132324,
|
9450 |
+
"learning_rate": 2.9885210704650734e-05,
|
9451 |
+
"loss": 1.9597,
|
9452 |
+
"step": 20235
|
9453 |
+
},
|
9454 |
+
{
|
9455 |
+
"epoch": 0.039433865998854956,
|
9456 |
+
"grad_norm": 2.538752794265747,
|
9457 |
+
"learning_rate": 2.988504067509019e-05,
|
9458 |
+
"loss": 1.8451,
|
9459 |
+
"step": 20250
|
9460 |
+
},
|
9461 |
+
{
|
9462 |
+
"epoch": 0.039463076269965223,
|
9463 |
+
"grad_norm": 3.4922502040863037,
|
9464 |
+
"learning_rate": 2.988487052018062e-05,
|
9465 |
+
"loss": 1.8717,
|
9466 |
+
"step": 20265
|
9467 |
+
},
|
9468 |
+
{
|
9469 |
+
"epoch": 0.039492286541075484,
|
9470 |
+
"grad_norm": 3.5919086933135986,
|
9471 |
+
"learning_rate": 2.9884700239923467e-05,
|
9472 |
+
"loss": 1.8182,
|
9473 |
+
"step": 20280
|
9474 |
+
},
|
9475 |
+
{
|
9476 |
+
"epoch": 0.039521496812185745,
|
9477 |
+
"grad_norm": 1.9253063201904297,
|
9478 |
+
"learning_rate": 2.988452983432016e-05,
|
9479 |
+
"loss": 1.8067,
|
9480 |
+
"step": 20295
|
9481 |
+
},
|
9482 |
+
{
|
9483 |
+
"epoch": 0.039550707083296006,
|
9484 |
+
"grad_norm": 3.168278217315674,
|
9485 |
+
"learning_rate": 2.9884359303372127e-05,
|
9486 |
+
"loss": 1.9619,
|
9487 |
+
"step": 20310
|
9488 |
+
},
|
9489 |
+
{
|
9490 |
+
"epoch": 0.039579917354406274,
|
9491 |
+
"grad_norm": 2.574296236038208,
|
9492 |
+
"learning_rate": 2.9884188647080816e-05,
|
9493 |
+
"loss": 1.9037,
|
9494 |
+
"step": 20325
|
9495 |
+
},
|
9496 |
+
{
|
9497 |
+
"epoch": 0.039609127625516535,
|
9498 |
+
"grad_norm": 3.87908673286438,
|
9499 |
+
"learning_rate": 2.9884017865447657e-05,
|
9500 |
+
"loss": 1.8106,
|
9501 |
+
"step": 20340
|
9502 |
+
},
|
9503 |
+
{
|
9504 |
+
"epoch": 0.039638337896626796,
|
9505 |
+
"grad_norm": 3.060088872909546,
|
9506 |
+
"learning_rate": 2.9883846958474093e-05,
|
9507 |
+
"loss": 1.8878,
|
9508 |
+
"step": 20355
|
9509 |
+
},
|
9510 |
+
{
|
9511 |
+
"epoch": 0.039667548167737064,
|
9512 |
+
"grad_norm": 2.6092071533203125,
|
9513 |
+
"learning_rate": 2.988367592616156e-05,
|
9514 |
+
"loss": 2.0189,
|
9515 |
+
"step": 20370
|
9516 |
+
},
|
9517 |
+
{
|
9518 |
+
"epoch": 0.039696758438847325,
|
9519 |
+
"grad_norm": 2.688831329345703,
|
9520 |
+
"learning_rate": 2.9883504768511496e-05,
|
9521 |
+
"loss": 1.9439,
|
9522 |
+
"step": 20385
|
9523 |
+
},
|
9524 |
+
{
|
9525 |
+
"epoch": 0.039725968709957586,
|
9526 |
+
"grad_norm": 4.126415252685547,
|
9527 |
+
"learning_rate": 2.988333348552535e-05,
|
9528 |
+
"loss": 1.7334,
|
9529 |
+
"step": 20400
|
9530 |
+
},
|
9531 |
+
{
|
9532 |
+
"epoch": 0.03975517898106785,
|
9533 |
+
"grad_norm": 2.5536105632781982,
|
9534 |
+
"learning_rate": 2.988316207720455e-05,
|
9535 |
+
"loss": 1.8741,
|
9536 |
+
"step": 20415
|
9537 |
+
},
|
9538 |
+
{
|
9539 |
+
"epoch": 0.039784389252178115,
|
9540 |
+
"grad_norm": 4.146584510803223,
|
9541 |
+
"learning_rate": 2.9882990543550557e-05,
|
9542 |
+
"loss": 1.839,
|
9543 |
+
"step": 20430
|
9544 |
+
},
|
9545 |
+
{
|
9546 |
+
"epoch": 0.039813599523288376,
|
9547 |
+
"grad_norm": 4.531203746795654,
|
9548 |
+
"learning_rate": 2.9882818884564805e-05,
|
9549 |
+
"loss": 1.801,
|
9550 |
+
"step": 20445
|
9551 |
+
},
|
9552 |
+
{
|
9553 |
+
"epoch": 0.03984280979439864,
|
9554 |
+
"grad_norm": 4.3206562995910645,
|
9555 |
+
"learning_rate": 2.988264710024874e-05,
|
9556 |
+
"loss": 1.9127,
|
9557 |
+
"step": 20460
|
9558 |
+
},
|
9559 |
+
{
|
9560 |
+
"epoch": 0.0398720200655089,
|
9561 |
+
"grad_norm": 2.258627414703369,
|
9562 |
+
"learning_rate": 2.9882475190603815e-05,
|
9563 |
+
"loss": 1.9329,
|
9564 |
+
"step": 20475
|
9565 |
+
},
|
9566 |
+
{
|
9567 |
+
"epoch": 0.039901230336619166,
|
9568 |
+
"grad_norm": 2.874171018600464,
|
9569 |
+
"learning_rate": 2.988230315563147e-05,
|
9570 |
+
"loss": 1.8632,
|
9571 |
+
"step": 20490
|
9572 |
+
},
|
9573 |
+
{
|
9574 |
+
"epoch": 0.03993044060772943,
|
9575 |
+
"grad_norm": 2.2248058319091797,
|
9576 |
+
"learning_rate": 2.988213099533316e-05,
|
9577 |
+
"loss": 1.7653,
|
9578 |
+
"step": 20505
|
9579 |
+
},
|
9580 |
+
{
|
9581 |
+
"epoch": 0.03995965087883969,
|
9582 |
+
"grad_norm": 2.2211756706237793,
|
9583 |
+
"learning_rate": 2.988195870971033e-05,
|
9584 |
+
"loss": 2.0817,
|
9585 |
+
"step": 20520
|
9586 |
+
},
|
9587 |
+
{
|
9588 |
+
"epoch": 0.039988861149949956,
|
9589 |
+
"grad_norm": 2.266099214553833,
|
9590 |
+
"learning_rate": 2.9881786298764432e-05,
|
9591 |
+
"loss": 2.0993,
|
9592 |
+
"step": 20535
|
9593 |
+
},
|
9594 |
+
{
|
9595 |
+
"epoch": 0.04001807142106022,
|
9596 |
+
"grad_norm": 3.2420692443847656,
|
9597 |
+
"learning_rate": 2.988161376249692e-05,
|
9598 |
+
"loss": 1.8448,
|
9599 |
+
"step": 20550
|
9600 |
+
},
|
9601 |
+
{
|
9602 |
+
"epoch": 0.04004728169217048,
|
9603 |
+
"grad_norm": 2.1219029426574707,
|
9604 |
+
"learning_rate": 2.9881441100909244e-05,
|
9605 |
+
"loss": 1.9547,
|
9606 |
+
"step": 20565
|
9607 |
+
},
|
9608 |
+
{
|
9609 |
+
"epoch": 0.04007649196328074,
|
9610 |
+
"grad_norm": 4.585850238800049,
|
9611 |
+
"learning_rate": 2.988126831400286e-05,
|
9612 |
+
"loss": 1.9619,
|
9613 |
+
"step": 20580
|
9614 |
+
},
|
9615 |
+
{
|
9616 |
+
"epoch": 0.04010570223439101,
|
9617 |
+
"grad_norm": 3.4670188426971436,
|
9618 |
+
"learning_rate": 2.9881095401779224e-05,
|
9619 |
+
"loss": 2.0,
|
9620 |
+
"step": 20595
|
9621 |
+
},
|
9622 |
+
{
|
9623 |
+
"epoch": 0.04013491250550127,
|
9624 |
+
"grad_norm": 2.0285186767578125,
|
9625 |
+
"learning_rate": 2.9880922364239787e-05,
|
9626 |
+
"loss": 1.9434,
|
9627 |
+
"step": 20610
|
9628 |
+
},
|
9629 |
+
{
|
9630 |
+
"epoch": 0.04016412277661153,
|
9631 |
+
"grad_norm": 1.873279094696045,
|
9632 |
+
"learning_rate": 2.9880749201386014e-05,
|
9633 |
+
"loss": 1.8546,
|
9634 |
+
"step": 20625
|
9635 |
+
},
|
9636 |
+
{
|
9637 |
+
"epoch": 0.040193333047721796,
|
9638 |
+
"grad_norm": 2.1878137588500977,
|
9639 |
+
"learning_rate": 2.9880575913219354e-05,
|
9640 |
+
"loss": 1.8624,
|
9641 |
+
"step": 20640
|
9642 |
+
},
|
9643 |
+
{
|
9644 |
+
"epoch": 0.04022254331883206,
|
9645 |
+
"grad_norm": 3.9366180896759033,
|
9646 |
+
"learning_rate": 2.988040249974128e-05,
|
9647 |
+
"loss": 1.9143,
|
9648 |
+
"step": 20655
|
9649 |
+
},
|
9650 |
+
{
|
9651 |
+
"epoch": 0.04025175358994232,
|
9652 |
+
"grad_norm": 2.3610141277313232,
|
9653 |
+
"learning_rate": 2.9880228960953236e-05,
|
9654 |
+
"loss": 1.9616,
|
9655 |
+
"step": 20670
|
9656 |
+
},
|
9657 |
+
{
|
9658 |
+
"epoch": 0.04028096386105258,
|
9659 |
+
"grad_norm": 2.8658604621887207,
|
9660 |
+
"learning_rate": 2.9880055296856695e-05,
|
9661 |
+
"loss": 1.7601,
|
9662 |
+
"step": 20685
|
9663 |
+
},
|
9664 |
+
{
|
9665 |
+
"epoch": 0.04031017413216285,
|
9666 |
+
"grad_norm": 4.454057216644287,
|
9667 |
+
"learning_rate": 2.9879881507453112e-05,
|
9668 |
+
"loss": 1.7578,
|
9669 |
+
"step": 20700
|
9670 |
+
},
|
9671 |
+
{
|
9672 |
+
"epoch": 0.04033938440327311,
|
9673 |
+
"grad_norm": 2.244455575942993,
|
9674 |
+
"learning_rate": 2.9879707592743957e-05,
|
9675 |
+
"loss": 1.8334,
|
9676 |
+
"step": 20715
|
9677 |
+
},
|
9678 |
+
{
|
9679 |
+
"epoch": 0.04036859467438337,
|
9680 |
+
"grad_norm": 1.61302649974823,
|
9681 |
+
"learning_rate": 2.987953355273069e-05,
|
9682 |
+
"loss": 1.9746,
|
9683 |
+
"step": 20730
|
9684 |
+
},
|
9685 |
+
{
|
9686 |
+
"epoch": 0.04039780494549364,
|
9687 |
+
"grad_norm": 2.8323678970336914,
|
9688 |
+
"learning_rate": 2.987935938741478e-05,
|
9689 |
+
"loss": 2.1251,
|
9690 |
+
"step": 20745
|
9691 |
+
},
|
9692 |
+
{
|
9693 |
+
"epoch": 0.0404270152166039,
|
9694 |
+
"grad_norm": 2.418241262435913,
|
9695 |
+
"learning_rate": 2.987918509679769e-05,
|
9696 |
+
"loss": 2.0798,
|
9697 |
+
"step": 20760
|
9698 |
+
},
|
9699 |
+
{
|
9700 |
+
"epoch": 0.04045622548771416,
|
9701 |
+
"grad_norm": 3.465550661087036,
|
9702 |
+
"learning_rate": 2.987901068088089e-05,
|
9703 |
+
"loss": 1.8706,
|
9704 |
+
"step": 20775
|
9705 |
+
},
|
9706 |
+
{
|
9707 |
+
"epoch": 0.04048543575882442,
|
9708 |
+
"grad_norm": 4.148263931274414,
|
9709 |
+
"learning_rate": 2.987883613966585e-05,
|
9710 |
+
"loss": 1.7925,
|
9711 |
+
"step": 20790
|
9712 |
+
},
|
9713 |
+
{
|
9714 |
+
"epoch": 0.04051464602993469,
|
9715 |
+
"grad_norm": 2.4039666652679443,
|
9716 |
+
"learning_rate": 2.9878661473154037e-05,
|
9717 |
+
"loss": 1.8672,
|
9718 |
+
"step": 20805
|
9719 |
+
},
|
9720 |
+
{
|
9721 |
+
"epoch": 0.04054385630104495,
|
9722 |
+
"grad_norm": 2.12880277633667,
|
9723 |
+
"learning_rate": 2.9878486681346923e-05,
|
9724 |
+
"loss": 1.8978,
|
9725 |
+
"step": 20820
|
9726 |
+
},
|
9727 |
+
{
|
9728 |
+
"epoch": 0.04057306657215521,
|
9729 |
+
"grad_norm": 1.9612979888916016,
|
9730 |
+
"learning_rate": 2.987831176424598e-05,
|
9731 |
+
"loss": 1.9175,
|
9732 |
+
"step": 20835
|
9733 |
+
},
|
9734 |
+
{
|
9735 |
+
"epoch": 0.04060227684326548,
|
9736 |
+
"grad_norm": 2.161982297897339,
|
9737 |
+
"learning_rate": 2.9878136721852682e-05,
|
9738 |
+
"loss": 1.7752,
|
9739 |
+
"step": 20850
|
9740 |
+
},
|
9741 |
+
{
|
9742 |
+
"epoch": 0.04063148711437574,
|
9743 |
+
"grad_norm": 4.4879961013793945,
|
9744 |
+
"learning_rate": 2.9877961554168498e-05,
|
9745 |
+
"loss": 2.0857,
|
9746 |
+
"step": 20865
|
9747 |
+
},
|
9748 |
+
{
|
9749 |
+
"epoch": 0.040660697385486,
|
9750 |
+
"grad_norm": 4.1571364402771,
|
9751 |
+
"learning_rate": 2.9877786261194914e-05,
|
9752 |
+
"loss": 1.949,
|
9753 |
+
"step": 20880
|
9754 |
+
},
|
9755 |
+
{
|
9756 |
+
"epoch": 0.04068990765659626,
|
9757 |
+
"grad_norm": 3.3120033740997314,
|
9758 |
+
"learning_rate": 2.9877610842933397e-05,
|
9759 |
+
"loss": 1.8585,
|
9760 |
+
"step": 20895
|
9761 |
+
},
|
9762 |
+
{
|
9763 |
+
"epoch": 0.04071911792770653,
|
9764 |
+
"grad_norm": 3.193117618560791,
|
9765 |
+
"learning_rate": 2.9877435299385424e-05,
|
9766 |
+
"loss": 1.731,
|
9767 |
+
"step": 20910
|
9768 |
+
},
|
9769 |
+
{
|
9770 |
+
"epoch": 0.04074832819881679,
|
9771 |
+
"grad_norm": 2.375343084335327,
|
9772 |
+
"learning_rate": 2.987725963055248e-05,
|
9773 |
+
"loss": 1.8269,
|
9774 |
+
"step": 20925
|
9775 |
+
},
|
9776 |
+
{
|
9777 |
+
"epoch": 0.04077753846992705,
|
9778 |
+
"grad_norm": 2.3607242107391357,
|
9779 |
+
"learning_rate": 2.9877083836436036e-05,
|
9780 |
+
"loss": 1.8305,
|
9781 |
+
"step": 20940
|
9782 |
+
},
|
9783 |
+
{
|
9784 |
+
"epoch": 0.04080674874103731,
|
9785 |
+
"grad_norm": 3.0205342769622803,
|
9786 |
+
"learning_rate": 2.987690791703758e-05,
|
9787 |
+
"loss": 1.8631,
|
9788 |
+
"step": 20955
|
9789 |
+
},
|
9790 |
+
{
|
9791 |
+
"epoch": 0.04083595901214758,
|
9792 |
+
"grad_norm": 3.530947685241699,
|
9793 |
+
"learning_rate": 2.9876731872358585e-05,
|
9794 |
+
"loss": 1.8431,
|
9795 |
+
"step": 20970
|
9796 |
+
},
|
9797 |
+
{
|
9798 |
+
"epoch": 0.04086516928325784,
|
9799 |
+
"grad_norm": 2.8419220447540283,
|
9800 |
+
"learning_rate": 2.987655570240054e-05,
|
9801 |
+
"loss": 1.8519,
|
9802 |
+
"step": 20985
|
9803 |
+
},
|
9804 |
+
{
|
9805 |
+
"epoch": 0.0408943795543681,
|
9806 |
+
"grad_norm": 2.253532886505127,
|
9807 |
+
"learning_rate": 2.9876379407164933e-05,
|
9808 |
+
"loss": 1.8688,
|
9809 |
+
"step": 21000
|
9810 |
+
},
|
9811 |
+
{
|
9812 |
+
"epoch": 0.04092358982547837,
|
9813 |
+
"grad_norm": 1.9279251098632812,
|
9814 |
+
"learning_rate": 2.987620298665324e-05,
|
9815 |
+
"loss": 1.9111,
|
9816 |
+
"step": 21015
|
9817 |
+
},
|
9818 |
+
{
|
9819 |
+
"epoch": 0.04095280009658863,
|
9820 |
+
"grad_norm": 3.0645790100097656,
|
9821 |
+
"learning_rate": 2.987602644086695e-05,
|
9822 |
+
"loss": 1.8359,
|
9823 |
+
"step": 21030
|
9824 |
+
},
|
9825 |
+
{
|
9826 |
+
"epoch": 0.04098201036769889,
|
9827 |
+
"grad_norm": 2.9384896755218506,
|
9828 |
+
"learning_rate": 2.9875849769807544e-05,
|
9829 |
+
"loss": 1.747,
|
9830 |
+
"step": 21045
|
9831 |
+
},
|
9832 |
+
{
|
9833 |
+
"epoch": 0.04101122063880915,
|
9834 |
+
"grad_norm": 3.7138500213623047,
|
9835 |
+
"learning_rate": 2.987567297347652e-05,
|
9836 |
+
"loss": 1.8528,
|
9837 |
+
"step": 21060
|
9838 |
+
},
|
9839 |
+
{
|
9840 |
+
"epoch": 0.04104043090991942,
|
9841 |
+
"grad_norm": 4.440821170806885,
|
9842 |
+
"learning_rate": 2.987549605187536e-05,
|
9843 |
+
"loss": 1.9611,
|
9844 |
+
"step": 21075
|
9845 |
+
},
|
9846 |
+
{
|
9847 |
+
"epoch": 0.04106964118102968,
|
9848 |
+
"grad_norm": 3.0378854274749756,
|
9849 |
+
"learning_rate": 2.9875319005005552e-05,
|
9850 |
+
"loss": 2.012,
|
9851 |
+
"step": 21090
|
9852 |
+
},
|
9853 |
+
{
|
9854 |
+
"epoch": 0.04109885145213994,
|
9855 |
+
"grad_norm": 4.378201961517334,
|
9856 |
+
"learning_rate": 2.9875141832868598e-05,
|
9857 |
+
"loss": 1.7366,
|
9858 |
+
"step": 21105
|
9859 |
+
},
|
9860 |
+
{
|
9861 |
+
"epoch": 0.04112806172325021,
|
9862 |
+
"grad_norm": 4.794327735900879,
|
9863 |
+
"learning_rate": 2.9874964535465978e-05,
|
9864 |
+
"loss": 1.8728,
|
9865 |
+
"step": 21120
|
9866 |
+
},
|
9867 |
+
{
|
9868 |
+
"epoch": 0.04115727199436047,
|
9869 |
+
"grad_norm": 2.5137903690338135,
|
9870 |
+
"learning_rate": 2.987478711279919e-05,
|
9871 |
+
"loss": 1.9813,
|
9872 |
+
"step": 21135
|
9873 |
+
},
|
9874 |
+
{
|
9875 |
+
"epoch": 0.04118648226547073,
|
9876 |
+
"grad_norm": 2.2024412155151367,
|
9877 |
+
"learning_rate": 2.987460956486973e-05,
|
9878 |
+
"loss": 1.7721,
|
9879 |
+
"step": 21150
|
9880 |
+
},
|
9881 |
+
{
|
9882 |
+
"epoch": 0.04121569253658099,
|
9883 |
+
"grad_norm": 3.0043609142303467,
|
9884 |
+
"learning_rate": 2.987443189167909e-05,
|
9885 |
+
"loss": 1.9015,
|
9886 |
+
"step": 21165
|
9887 |
+
},
|
9888 |
+
{
|
9889 |
+
"epoch": 0.04124490280769126,
|
9890 |
+
"grad_norm": 4.627270698547363,
|
9891 |
+
"learning_rate": 2.9874254093228763e-05,
|
9892 |
+
"loss": 1.9487,
|
9893 |
+
"step": 21180
|
9894 |
+
},
|
9895 |
+
{
|
9896 |
+
"epoch": 0.04127411307880152,
|
9897 |
+
"grad_norm": 3.208395481109619,
|
9898 |
+
"learning_rate": 2.987407616952025e-05,
|
9899 |
+
"loss": 1.6989,
|
9900 |
+
"step": 21195
|
9901 |
+
},
|
9902 |
+
{
|
9903 |
+
"epoch": 0.04130332334991178,
|
9904 |
+
"grad_norm": 4.102930545806885,
|
9905 |
+
"learning_rate": 2.9873898120555055e-05,
|
9906 |
+
"loss": 2.0639,
|
9907 |
+
"step": 21210
|
9908 |
+
},
|
9909 |
+
{
|
9910 |
+
"epoch": 0.04133253362102205,
|
9911 |
+
"grad_norm": 3.846593141555786,
|
9912 |
+
"learning_rate": 2.987371994633467e-05,
|
9913 |
+
"loss": 1.7067,
|
9914 |
+
"step": 21225
|
9915 |
+
},
|
9916 |
+
{
|
9917 |
+
"epoch": 0.04136174389213231,
|
9918 |
+
"grad_norm": 3.6651105880737305,
|
9919 |
+
"learning_rate": 2.9873541646860597e-05,
|
9920 |
+
"loss": 1.8983,
|
9921 |
+
"step": 21240
|
9922 |
+
},
|
9923 |
+
{
|
9924 |
+
"epoch": 0.04139095416324257,
|
9925 |
+
"grad_norm": 3.715604543685913,
|
9926 |
+
"learning_rate": 2.987336322213434e-05,
|
9927 |
+
"loss": 1.6676,
|
9928 |
+
"step": 21255
|
9929 |
+
},
|
9930 |
+
{
|
9931 |
+
"epoch": 0.041420164434352834,
|
9932 |
+
"grad_norm": 3.0780601501464844,
|
9933 |
+
"learning_rate": 2.9873184672157395e-05,
|
9934 |
+
"loss": 1.9342,
|
9935 |
+
"step": 21270
|
9936 |
+
},
|
9937 |
+
{
|
9938 |
+
"epoch": 0.0414493747054631,
|
9939 |
+
"grad_norm": 3.385103225708008,
|
9940 |
+
"learning_rate": 2.9873005996931274e-05,
|
9941 |
+
"loss": 1.9494,
|
9942 |
+
"step": 21285
|
9943 |
+
},
|
9944 |
+
{
|
9945 |
+
"epoch": 0.04147858497657336,
|
9946 |
+
"grad_norm": 2.9652836322784424,
|
9947 |
+
"learning_rate": 2.9872827196457475e-05,
|
9948 |
+
"loss": 1.8491,
|
9949 |
+
"step": 21300
|
9950 |
+
},
|
9951 |
+
{
|
9952 |
+
"epoch": 0.041507795247683624,
|
9953 |
+
"grad_norm": 2.346210479736328,
|
9954 |
+
"learning_rate": 2.9872648270737507e-05,
|
9955 |
+
"loss": 1.7948,
|
9956 |
+
"step": 21315
|
9957 |
+
},
|
9958 |
+
{
|
9959 |
+
"epoch": 0.04153700551879389,
|
9960 |
+
"grad_norm": 2.0421793460845947,
|
9961 |
+
"learning_rate": 2.9872469219772877e-05,
|
9962 |
+
"loss": 1.7642,
|
9963 |
+
"step": 21330
|
9964 |
+
},
|
9965 |
+
{
|
9966 |
+
"epoch": 0.04156621578990415,
|
9967 |
+
"grad_norm": 3.2347426414489746,
|
9968 |
+
"learning_rate": 2.9872290043565094e-05,
|
9969 |
+
"loss": 1.9741,
|
9970 |
+
"step": 21345
|
9971 |
+
},
|
9972 |
+
{
|
9973 |
+
"epoch": 0.041595426061014414,
|
9974 |
+
"grad_norm": 2.8749160766601562,
|
9975 |
+
"learning_rate": 2.987211074211566e-05,
|
9976 |
+
"loss": 1.8364,
|
9977 |
+
"step": 21360
|
9978 |
+
},
|
9979 |
+
{
|
9980 |
+
"epoch": 0.041624636332124675,
|
9981 |
+
"grad_norm": 3.484539031982422,
|
9982 |
+
"learning_rate": 2.9871931315426094e-05,
|
9983 |
+
"loss": 1.7097,
|
9984 |
+
"step": 21375
|
9985 |
+
},
|
9986 |
+
{
|
9987 |
+
"epoch": 0.04165384660323494,
|
9988 |
+
"grad_norm": 3.105286121368408,
|
9989 |
+
"learning_rate": 2.98717517634979e-05,
|
9990 |
+
"loss": 1.8239,
|
9991 |
+
"step": 21390
|
9992 |
+
},
|
9993 |
+
{
|
9994 |
+
"epoch": 0.0416830568743452,
|
9995 |
+
"grad_norm": 3.804901361465454,
|
9996 |
+
"learning_rate": 2.9871572086332594e-05,
|
9997 |
+
"loss": 1.7356,
|
9998 |
+
"step": 21405
|
9999 |
+
},
|
10000 |
+
{
|
10001 |
+
"epoch": 0.041712267145455464,
|
10002 |
+
"grad_norm": 2.4536283016204834,
|
10003 |
+
"learning_rate": 2.9871392283931686e-05,
|
10004 |
+
"loss": 1.8601,
|
10005 |
+
"step": 21420
|
10006 |
+
},
|
10007 |
+
{
|
10008 |
+
"epoch": 0.04174147741656573,
|
10009 |
+
"grad_norm": 2.8864688873291016,
|
10010 |
+
"learning_rate": 2.9871212356296697e-05,
|
10011 |
+
"loss": 1.802,
|
10012 |
+
"step": 21435
|
10013 |
+
},
|
10014 |
+
{
|
10015 |
+
"epoch": 0.04177068768767599,
|
10016 |
+
"grad_norm": 3.4735238552093506,
|
10017 |
+
"learning_rate": 2.9871032303429133e-05,
|
10018 |
+
"loss": 1.9557,
|
10019 |
+
"step": 21450
|
10020 |
+
},
|
10021 |
+
{
|
10022 |
+
"epoch": 0.041799897958786254,
|
10023 |
+
"grad_norm": 4.21823263168335,
|
10024 |
+
"learning_rate": 2.9870852125330513e-05,
|
10025 |
+
"loss": 1.8513,
|
10026 |
+
"step": 21465
|
10027 |
+
},
|
10028 |
+
{
|
10029 |
+
"epoch": 0.041829108229896515,
|
10030 |
+
"grad_norm": 1.6568303108215332,
|
10031 |
+
"learning_rate": 2.9870671822002357e-05,
|
10032 |
+
"loss": 1.8443,
|
10033 |
+
"step": 21480
|
10034 |
+
},
|
10035 |
+
{
|
10036 |
+
"epoch": 0.04185831850100678,
|
10037 |
+
"grad_norm": 3.8471972942352295,
|
10038 |
+
"learning_rate": 2.9870491393446184e-05,
|
10039 |
+
"loss": 1.8831,
|
10040 |
+
"step": 21495
|
10041 |
+
},
|
10042 |
+
{
|
10043 |
+
"epoch": 0.041887528772117044,
|
10044 |
+
"grad_norm": 4.5181803703308105,
|
10045 |
+
"learning_rate": 2.987031083966351e-05,
|
10046 |
+
"loss": 2.0743,
|
10047 |
+
"step": 21510
|
10048 |
+
},
|
10049 |
+
{
|
10050 |
+
"epoch": 0.041916739043227305,
|
10051 |
+
"grad_norm": 2.7604262828826904,
|
10052 |
+
"learning_rate": 2.987013016065586e-05,
|
10053 |
+
"loss": 1.7647,
|
10054 |
+
"step": 21525
|
10055 |
+
},
|
10056 |
+
{
|
10057 |
+
"epoch": 0.041945949314337566,
|
10058 |
+
"grad_norm": 1.6995900869369507,
|
10059 |
+
"learning_rate": 2.986994935642475e-05,
|
10060 |
+
"loss": 1.9221,
|
10061 |
+
"step": 21540
|
10062 |
+
},
|
10063 |
+
{
|
10064 |
+
"epoch": 0.041975159585447834,
|
10065 |
+
"grad_norm": 3.400073766708374,
|
10066 |
+
"learning_rate": 2.9869768426971706e-05,
|
10067 |
+
"loss": 1.7217,
|
10068 |
+
"step": 21555
|
10069 |
+
},
|
10070 |
+
{
|
10071 |
+
"epoch": 0.042004369856558095,
|
10072 |
+
"grad_norm": 2.7783281803131104,
|
10073 |
+
"learning_rate": 2.986958737229825e-05,
|
10074 |
+
"loss": 1.8149,
|
10075 |
+
"step": 21570
|
10076 |
+
},
|
10077 |
+
{
|
10078 |
+
"epoch": 0.042033580127668356,
|
10079 |
+
"grad_norm": 1.9334073066711426,
|
10080 |
+
"learning_rate": 2.9869406192405904e-05,
|
10081 |
+
"loss": 1.7657,
|
10082 |
+
"step": 21585
|
10083 |
+
},
|
10084 |
+
{
|
10085 |
+
"epoch": 0.042062790398778624,
|
10086 |
+
"grad_norm": 3.244271993637085,
|
10087 |
+
"learning_rate": 2.9869224887296205e-05,
|
10088 |
+
"loss": 1.8122,
|
10089 |
+
"step": 21600
|
10090 |
+
},
|
10091 |
+
{
|
10092 |
+
"epoch": 0.042092000669888885,
|
10093 |
+
"grad_norm": 3.9582252502441406,
|
10094 |
+
"learning_rate": 2.9869043456970662e-05,
|
10095 |
+
"loss": 1.8296,
|
10096 |
+
"step": 21615
|
10097 |
+
},
|
10098 |
+
{
|
10099 |
+
"epoch": 0.042121210940999146,
|
10100 |
+
"grad_norm": 3.9118635654449463,
|
10101 |
+
"learning_rate": 2.986886190143082e-05,
|
10102 |
+
"loss": 1.9162,
|
10103 |
+
"step": 21630
|
10104 |
+
},
|
10105 |
+
{
|
10106 |
+
"epoch": 0.04215042121210941,
|
10107 |
+
"grad_norm": 3.3247733116149902,
|
10108 |
+
"learning_rate": 2.9868680220678198e-05,
|
10109 |
+
"loss": 1.9345,
|
10110 |
+
"step": 21645
|
10111 |
+
},
|
10112 |
+
{
|
10113 |
+
"epoch": 0.042179631483219675,
|
10114 |
+
"grad_norm": 4.189952373504639,
|
10115 |
+
"learning_rate": 2.9868498414714332e-05,
|
10116 |
+
"loss": 1.7394,
|
10117 |
+
"step": 21660
|
10118 |
+
},
|
10119 |
+
{
|
10120 |
+
"epoch": 0.042208841754329936,
|
10121 |
+
"grad_norm": 4.380448818206787,
|
10122 |
+
"learning_rate": 2.986831648354075e-05,
|
10123 |
+
"loss": 1.8411,
|
10124 |
+
"step": 21675
|
10125 |
+
},
|
10126 |
+
{
|
10127 |
+
"epoch": 0.0422380520254402,
|
10128 |
+
"grad_norm": 3.3280911445617676,
|
10129 |
+
"learning_rate": 2.986813442715898e-05,
|
10130 |
+
"loss": 1.7338,
|
10131 |
+
"step": 21690
|
10132 |
+
},
|
10133 |
+
{
|
10134 |
+
"epoch": 0.042267262296550465,
|
10135 |
+
"grad_norm": 2.147976875305176,
|
10136 |
+
"learning_rate": 2.9867952245570557e-05,
|
10137 |
+
"loss": 1.7418,
|
10138 |
+
"step": 21705
|
10139 |
+
},
|
10140 |
+
{
|
10141 |
+
"epoch": 0.042296472567660726,
|
10142 |
+
"grad_norm": 4.927828788757324,
|
10143 |
+
"learning_rate": 2.9867769938777025e-05,
|
10144 |
+
"loss": 1.9729,
|
10145 |
+
"step": 21720
|
10146 |
+
},
|
10147 |
+
{
|
10148 |
+
"epoch": 0.04232568283877099,
|
10149 |
+
"grad_norm": 2.1555793285369873,
|
10150 |
+
"learning_rate": 2.9867587506779903e-05,
|
10151 |
+
"loss": 1.9733,
|
10152 |
+
"step": 21735
|
10153 |
+
},
|
10154 |
+
{
|
10155 |
+
"epoch": 0.04235489310988125,
|
10156 |
+
"grad_norm": 4.826751232147217,
|
10157 |
+
"learning_rate": 2.986740494958074e-05,
|
10158 |
+
"loss": 1.9141,
|
10159 |
+
"step": 21750
|
10160 |
+
},
|
10161 |
+
{
|
10162 |
+
"epoch": 0.042384103380991515,
|
10163 |
+
"grad_norm": 2.62713360786438,
|
10164 |
+
"learning_rate": 2.986722226718107e-05,
|
10165 |
+
"loss": 1.7623,
|
10166 |
+
"step": 21765
|
10167 |
+
},
|
10168 |
+
{
|
10169 |
+
"epoch": 0.042413313652101776,
|
10170 |
+
"grad_norm": 2.9087536334991455,
|
10171 |
+
"learning_rate": 2.9867039459582422e-05,
|
10172 |
+
"loss": 2.0298,
|
10173 |
+
"step": 21780
|
10174 |
+
},
|
10175 |
+
{
|
10176 |
+
"epoch": 0.04244252392321204,
|
10177 |
+
"grad_norm": 3.2233824729919434,
|
10178 |
+
"learning_rate": 2.986685652678635e-05,
|
10179 |
+
"loss": 1.8063,
|
10180 |
+
"step": 21795
|
10181 |
+
},
|
10182 |
+
{
|
10183 |
+
"epoch": 0.042471734194322305,
|
10184 |
+
"grad_norm": 3.8226282596588135,
|
10185 |
+
"learning_rate": 2.9866673468794392e-05,
|
10186 |
+
"loss": 1.8173,
|
10187 |
+
"step": 21810
|
10188 |
+
},
|
10189 |
+
{
|
10190 |
+
"epoch": 0.042500944465432566,
|
10191 |
+
"grad_norm": 2.362210273742676,
|
10192 |
+
"learning_rate": 2.986649028560808e-05,
|
10193 |
+
"loss": 2.0107,
|
10194 |
+
"step": 21825
|
10195 |
+
},
|
10196 |
+
{
|
10197 |
+
"epoch": 0.04253015473654283,
|
10198 |
+
"grad_norm": 1.4686235189437866,
|
10199 |
+
"learning_rate": 2.9866306977228964e-05,
|
10200 |
+
"loss": 1.8339,
|
10201 |
+
"step": 21840
|
10202 |
+
},
|
10203 |
+
{
|
10204 |
+
"epoch": 0.04255936500765309,
|
10205 |
+
"grad_norm": 3.2174501419067383,
|
10206 |
+
"learning_rate": 2.9866123543658585e-05,
|
10207 |
+
"loss": 1.9372,
|
10208 |
+
"step": 21855
|
10209 |
+
},
|
10210 |
+
{
|
10211 |
+
"epoch": 0.042588575278763356,
|
10212 |
+
"grad_norm": 4.215010643005371,
|
10213 |
+
"learning_rate": 2.9865939984898494e-05,
|
10214 |
+
"loss": 1.7492,
|
10215 |
+
"step": 21870
|
10216 |
+
},
|
10217 |
+
{
|
10218 |
+
"epoch": 0.04261778554987362,
|
10219 |
+
"grad_norm": 6.015155792236328,
|
10220 |
+
"learning_rate": 2.9865756300950224e-05,
|
10221 |
+
"loss": 1.8146,
|
10222 |
+
"step": 21885
|
10223 |
+
},
|
10224 |
+
{
|
10225 |
+
"epoch": 0.04264699582098388,
|
10226 |
+
"grad_norm": 3.102923631668091,
|
10227 |
+
"learning_rate": 2.9865572491815336e-05,
|
10228 |
+
"loss": 1.9206,
|
10229 |
+
"step": 21900
|
10230 |
+
},
|
10231 |
+
{
|
10232 |
+
"epoch": 0.042676206092094146,
|
10233 |
+
"grad_norm": 3.5606796741485596,
|
10234 |
+
"learning_rate": 2.986538855749537e-05,
|
10235 |
+
"loss": 1.7442,
|
10236 |
+
"step": 21915
|
10237 |
+
},
|
10238 |
+
{
|
10239 |
+
"epoch": 0.04270541636320441,
|
10240 |
+
"grad_norm": 3.877696990966797,
|
10241 |
+
"learning_rate": 2.9865204497991874e-05,
|
10242 |
+
"loss": 1.8356,
|
10243 |
+
"step": 21930
|
10244 |
+
},
|
10245 |
+
{
|
10246 |
+
"epoch": 0.04273462663431467,
|
10247 |
+
"grad_norm": 2.7707462310791016,
|
10248 |
+
"learning_rate": 2.98650203133064e-05,
|
10249 |
+
"loss": 2.1114,
|
10250 |
+
"step": 21945
|
10251 |
+
},
|
10252 |
+
{
|
10253 |
+
"epoch": 0.04276383690542493,
|
10254 |
+
"grad_norm": 3.8471269607543945,
|
10255 |
+
"learning_rate": 2.9864836003440496e-05,
|
10256 |
+
"loss": 1.974,
|
10257 |
+
"step": 21960
|
10258 |
+
},
|
10259 |
+
{
|
10260 |
+
"epoch": 0.0427930471765352,
|
10261 |
+
"grad_norm": 4.265331268310547,
|
10262 |
+
"learning_rate": 2.9864651568395728e-05,
|
10263 |
+
"loss": 1.7851,
|
10264 |
+
"step": 21975
|
10265 |
+
},
|
10266 |
+
{
|
10267 |
+
"epoch": 0.04282225744764546,
|
10268 |
+
"grad_norm": 3.578641414642334,
|
10269 |
+
"learning_rate": 2.986446700817363e-05,
|
10270 |
+
"loss": 1.7996,
|
10271 |
+
"step": 21990
|
10272 |
+
},
|
10273 |
+
{
|
10274 |
+
"epoch": 0.04285146771875572,
|
10275 |
+
"grad_norm": 3.289726495742798,
|
10276 |
+
"learning_rate": 2.9864282322775768e-05,
|
10277 |
+
"loss": 1.7165,
|
10278 |
+
"step": 22005
|
10279 |
+
},
|
10280 |
+
{
|
10281 |
+
"epoch": 0.04288067798986598,
|
10282 |
+
"grad_norm": 1.883353590965271,
|
10283 |
+
"learning_rate": 2.986409751220369e-05,
|
10284 |
+
"loss": 1.8633,
|
10285 |
+
"step": 22020
|
10286 |
+
},
|
10287 |
+
{
|
10288 |
+
"epoch": 0.04290988826097625,
|
10289 |
+
"grad_norm": 1.7199418544769287,
|
10290 |
+
"learning_rate": 2.9863912576458955e-05,
|
10291 |
+
"loss": 1.7696,
|
10292 |
+
"step": 22035
|
10293 |
+
},
|
10294 |
+
{
|
10295 |
+
"epoch": 0.04293909853208651,
|
10296 |
+
"grad_norm": 3.445349931716919,
|
10297 |
+
"learning_rate": 2.986372751554313e-05,
|
10298 |
+
"loss": 1.8349,
|
10299 |
+
"step": 22050
|
10300 |
+
},
|
10301 |
+
{
|
10302 |
+
"epoch": 0.04296830880319677,
|
10303 |
+
"grad_norm": 2.827427387237549,
|
10304 |
+
"learning_rate": 2.986354232945776e-05,
|
10305 |
+
"loss": 1.8647,
|
10306 |
+
"step": 22065
|
10307 |
+
},
|
10308 |
+
{
|
10309 |
+
"epoch": 0.04299751907430704,
|
10310 |
+
"grad_norm": 2.4225332736968994,
|
10311 |
+
"learning_rate": 2.986335701820441e-05,
|
10312 |
+
"loss": 2.0951,
|
10313 |
+
"step": 22080
|
10314 |
+
},
|
10315 |
+
{
|
10316 |
+
"epoch": 0.0430267293454173,
|
10317 |
+
"grad_norm": 2.9156410694122314,
|
10318 |
+
"learning_rate": 2.986317158178464e-05,
|
10319 |
+
"loss": 1.7989,
|
10320 |
+
"step": 22095
|
10321 |
+
},
|
10322 |
+
{
|
10323 |
+
"epoch": 0.04305593961652756,
|
10324 |
+
"grad_norm": 2.3038766384124756,
|
10325 |
+
"learning_rate": 2.986298602020001e-05,
|
10326 |
+
"loss": 1.8867,
|
10327 |
+
"step": 22110
|
10328 |
+
},
|
10329 |
+
{
|
10330 |
+
"epoch": 0.04308514988763782,
|
10331 |
+
"grad_norm": 3.085129976272583,
|
10332 |
+
"learning_rate": 2.986280033345209e-05,
|
10333 |
+
"loss": 1.6969,
|
10334 |
+
"step": 22125
|
10335 |
+
},
|
10336 |
+
{
|
10337 |
+
"epoch": 0.04311436015874809,
|
10338 |
+
"grad_norm": 4.071490287780762,
|
10339 |
+
"learning_rate": 2.986261452154243e-05,
|
10340 |
+
"loss": 1.7865,
|
10341 |
+
"step": 22140
|
10342 |
+
},
|
10343 |
+
{
|
10344 |
+
"epoch": 0.04314357042985835,
|
10345 |
+
"grad_norm": 2.2542295455932617,
|
10346 |
+
"learning_rate": 2.986242858447261e-05,
|
10347 |
+
"loss": 1.8687,
|
10348 |
+
"step": 22155
|
10349 |
+
},
|
10350 |
+
{
|
10351 |
+
"epoch": 0.04317278070096861,
|
10352 |
+
"grad_norm": 3.3869330883026123,
|
10353 |
+
"learning_rate": 2.9862242522244183e-05,
|
10354 |
+
"loss": 1.8037,
|
10355 |
+
"step": 22170
|
10356 |
+
},
|
10357 |
+
{
|
10358 |
+
"epoch": 0.04320199097207888,
|
10359 |
+
"grad_norm": 3.7993576526641846,
|
10360 |
+
"learning_rate": 2.9862056334858727e-05,
|
10361 |
+
"loss": 1.8819,
|
10362 |
+
"step": 22185
|
10363 |
+
},
|
10364 |
+
{
|
10365 |
+
"epoch": 0.04323120124318914,
|
10366 |
+
"grad_norm": 3.019289255142212,
|
10367 |
+
"learning_rate": 2.9861870022317798e-05,
|
10368 |
+
"loss": 1.8932,
|
10369 |
+
"step": 22200
|
10370 |
+
},
|
10371 |
+
{
|
10372 |
+
"epoch": 0.0432604115142994,
|
10373 |
+
"grad_norm": 2.831664562225342,
|
10374 |
+
"learning_rate": 2.9861683584622976e-05,
|
10375 |
+
"loss": 1.8813,
|
10376 |
+
"step": 22215
|
10377 |
+
},
|
10378 |
+
{
|
10379 |
+
"epoch": 0.04328962178540966,
|
10380 |
+
"grad_norm": 4.9506754875183105,
|
10381 |
+
"learning_rate": 2.9861497021775825e-05,
|
10382 |
+
"loss": 1.7917,
|
10383 |
+
"step": 22230
|
10384 |
+
},
|
10385 |
+
{
|
10386 |
+
"epoch": 0.04331883205651993,
|
10387 |
+
"grad_norm": 2.384033203125,
|
10388 |
+
"learning_rate": 2.986131033377792e-05,
|
10389 |
+
"loss": 2.0226,
|
10390 |
+
"step": 22245
|
10391 |
+
},
|
10392 |
+
{
|
10393 |
+
"epoch": 0.04334804232763019,
|
10394 |
+
"grad_norm": 1.8661621809005737,
|
10395 |
+
"learning_rate": 2.9861123520630828e-05,
|
10396 |
+
"loss": 1.817,
|
10397 |
+
"step": 22260
|
10398 |
+
},
|
10399 |
+
{
|
10400 |
+
"epoch": 0.04337725259874045,
|
10401 |
+
"grad_norm": 3.5283803939819336,
|
10402 |
+
"learning_rate": 2.9860936582336123e-05,
|
10403 |
+
"loss": 1.806,
|
10404 |
+
"step": 22275
|
10405 |
+
},
|
10406 |
+
{
|
10407 |
+
"epoch": 0.04340646286985072,
|
10408 |
+
"grad_norm": 4.240048408508301,
|
10409 |
+
"learning_rate": 2.9860749518895386e-05,
|
10410 |
+
"loss": 1.7773,
|
10411 |
+
"step": 22290
|
10412 |
+
},
|
10413 |
+
{
|
10414 |
+
"epoch": 0.04343567314096098,
|
10415 |
+
"grad_norm": 3.9326212406158447,
|
10416 |
+
"learning_rate": 2.986056233031018e-05,
|
10417 |
+
"loss": 1.8494,
|
10418 |
+
"step": 22305
|
10419 |
+
},
|
10420 |
+
{
|
10421 |
+
"epoch": 0.04346488341207124,
|
10422 |
+
"grad_norm": 4.390437602996826,
|
10423 |
+
"learning_rate": 2.986037501658209e-05,
|
10424 |
+
"loss": 1.7608,
|
10425 |
+
"step": 22320
|
10426 |
+
},
|
10427 |
+
{
|
10428 |
+
"epoch": 0.0434940936831815,
|
10429 |
+
"grad_norm": 2.5305089950561523,
|
10430 |
+
"learning_rate": 2.98601875777127e-05,
|
10431 |
+
"loss": 1.7295,
|
10432 |
+
"step": 22335
|
10433 |
+
},
|
10434 |
+
{
|
10435 |
+
"epoch": 0.04352330395429177,
|
10436 |
+
"grad_norm": 2.4276509284973145,
|
10437 |
+
"learning_rate": 2.9860000013703576e-05,
|
10438 |
+
"loss": 2.0082,
|
10439 |
+
"step": 22350
|
10440 |
+
},
|
10441 |
+
{
|
10442 |
+
"epoch": 0.04355251422540203,
|
10443 |
+
"grad_norm": 3.4239320755004883,
|
10444 |
+
"learning_rate": 2.9859812324556298e-05,
|
10445 |
+
"loss": 1.6979,
|
10446 |
+
"step": 22365
|
10447 |
+
},
|
10448 |
+
{
|
10449 |
+
"epoch": 0.04358172449651229,
|
10450 |
+
"grad_norm": 2.544837474822998,
|
10451 |
+
"learning_rate": 2.985962451027245e-05,
|
10452 |
+
"loss": 1.7844,
|
10453 |
+
"step": 22380
|
10454 |
+
},
|
10455 |
+
{
|
10456 |
+
"epoch": 0.04361093476762256,
|
10457 |
+
"grad_norm": 5.094317436218262,
|
10458 |
+
"learning_rate": 2.985943657085362e-05,
|
10459 |
+
"loss": 1.8792,
|
10460 |
+
"step": 22395
|
10461 |
+
},
|
10462 |
+
{
|
10463 |
+
"epoch": 0.04364014503873282,
|
10464 |
+
"grad_norm": 3.235121250152588,
|
10465 |
+
"learning_rate": 2.985924850630138e-05,
|
10466 |
+
"loss": 1.7795,
|
10467 |
+
"step": 22410
|
10468 |
+
},
|
10469 |
+
{
|
10470 |
+
"epoch": 0.04366935530984308,
|
10471 |
+
"grad_norm": 4.88664436340332,
|
10472 |
+
"learning_rate": 2.9859060316617325e-05,
|
10473 |
+
"loss": 1.9035,
|
10474 |
+
"step": 22425
|
10475 |
+
},
|
10476 |
+
{
|
10477 |
+
"epoch": 0.04369856558095334,
|
10478 |
+
"grad_norm": 2.401301622390747,
|
10479 |
+
"learning_rate": 2.9858872001803025e-05,
|
10480 |
+
"loss": 1.9182,
|
10481 |
+
"step": 22440
|
10482 |
+
},
|
10483 |
+
{
|
10484 |
+
"epoch": 0.04372777585206361,
|
10485 |
+
"grad_norm": 2.24088978767395,
|
10486 |
+
"learning_rate": 2.9858683561860077e-05,
|
10487 |
+
"loss": 1.688,
|
10488 |
+
"step": 22455
|
10489 |
+
},
|
10490 |
+
{
|
10491 |
+
"epoch": 0.04375698612317387,
|
10492 |
+
"grad_norm": 2.521176338195801,
|
10493 |
+
"learning_rate": 2.9858494996790065e-05,
|
10494 |
+
"loss": 1.962,
|
10495 |
+
"step": 22470
|
10496 |
+
},
|
10497 |
+
{
|
10498 |
+
"epoch": 0.04378619639428413,
|
10499 |
+
"grad_norm": 2.897905111312866,
|
10500 |
+
"learning_rate": 2.9858306306594578e-05,
|
10501 |
+
"loss": 1.7388,
|
10502 |
+
"step": 22485
|
10503 |
+
},
|
10504 |
+
{
|
10505 |
+
"epoch": 0.0438154066653944,
|
10506 |
+
"grad_norm": 2.3301055431365967,
|
10507 |
+
"learning_rate": 2.9858117491275204e-05,
|
10508 |
+
"loss": 1.8834,
|
10509 |
+
"step": 22500
|
10510 |
+
},
|
10511 |
+
{
|
10512 |
+
"epoch": 0.04384461693650466,
|
10513 |
+
"grad_norm": 2.078205108642578,
|
10514 |
+
"learning_rate": 2.9857928550833533e-05,
|
10515 |
+
"loss": 1.8959,
|
10516 |
+
"step": 22515
|
10517 |
+
},
|
10518 |
+
{
|
10519 |
+
"epoch": 0.04387382720761492,
|
10520 |
+
"grad_norm": 2.8938355445861816,
|
10521 |
+
"learning_rate": 2.9857739485271153e-05,
|
10522 |
+
"loss": 1.8529,
|
10523 |
+
"step": 22530
|
10524 |
+
},
|
10525 |
+
{
|
10526 |
+
"epoch": 0.04390303747872518,
|
10527 |
+
"grad_norm": 2.0363752841949463,
|
10528 |
+
"learning_rate": 2.9857550294589663e-05,
|
10529 |
+
"loss": 1.9974,
|
10530 |
+
"step": 22545
|
10531 |
+
},
|
10532 |
+
{
|
10533 |
+
"epoch": 0.04393224774983545,
|
10534 |
+
"grad_norm": 3.58372163772583,
|
10535 |
+
"learning_rate": 2.9857360978790647e-05,
|
10536 |
+
"loss": 1.7921,
|
10537 |
+
"step": 22560
|
10538 |
+
},
|
10539 |
+
{
|
10540 |
+
"epoch": 0.04396145802094571,
|
10541 |
+
"grad_norm": 3.4086432456970215,
|
10542 |
+
"learning_rate": 2.985717153787571e-05,
|
10543 |
+
"loss": 1.8401,
|
10544 |
+
"step": 22575
|
10545 |
+
},
|
10546 |
+
{
|
10547 |
+
"epoch": 0.04399066829205597,
|
10548 |
+
"grad_norm": 2.3264150619506836,
|
10549 |
+
"learning_rate": 2.985698197184644e-05,
|
10550 |
+
"loss": 1.8789,
|
10551 |
+
"step": 22590
|
10552 |
+
},
|
10553 |
+
{
|
10554 |
+
"epoch": 0.044019878563166234,
|
10555 |
+
"grad_norm": 2.727571964263916,
|
10556 |
+
"learning_rate": 2.9856792280704435e-05,
|
10557 |
+
"loss": 1.7859,
|
10558 |
+
"step": 22605
|
10559 |
+
},
|
10560 |
+
{
|
10561 |
+
"epoch": 0.0440490888342765,
|
10562 |
+
"grad_norm": 3.833146095275879,
|
10563 |
+
"learning_rate": 2.9856602464451293e-05,
|
10564 |
+
"loss": 1.9976,
|
10565 |
+
"step": 22620
|
10566 |
+
},
|
10567 |
+
{
|
10568 |
+
"epoch": 0.04407829910538676,
|
10569 |
+
"grad_norm": 1.8543720245361328,
|
10570 |
+
"learning_rate": 2.9856412523088612e-05,
|
10571 |
+
"loss": 1.8875,
|
10572 |
+
"step": 22635
|
10573 |
+
},
|
10574 |
+
{
|
10575 |
+
"epoch": 0.044107509376497024,
|
10576 |
+
"grad_norm": 2.6003103256225586,
|
10577 |
+
"learning_rate": 2.9856222456617993e-05,
|
10578 |
+
"loss": 2.0006,
|
10579 |
+
"step": 22650
|
10580 |
+
},
|
10581 |
+
{
|
10582 |
+
"epoch": 0.04413671964760729,
|
10583 |
+
"grad_norm": 3.0136611461639404,
|
10584 |
+
"learning_rate": 2.9856032265041035e-05,
|
10585 |
+
"loss": 1.8309,
|
10586 |
+
"step": 22665
|
10587 |
+
},
|
10588 |
+
{
|
10589 |
+
"epoch": 0.04416592991871755,
|
10590 |
+
"grad_norm": 1.948042631149292,
|
10591 |
+
"learning_rate": 2.9855841948359337e-05,
|
10592 |
+
"loss": 1.8721,
|
10593 |
+
"step": 22680
|
10594 |
+
},
|
10595 |
+
{
|
10596 |
+
"epoch": 0.044195140189827814,
|
10597 |
+
"grad_norm": 3.1908979415893555,
|
10598 |
+
"learning_rate": 2.9855651506574507e-05,
|
10599 |
+
"loss": 1.8373,
|
10600 |
+
"step": 22695
|
10601 |
+
},
|
10602 |
+
{
|
10603 |
+
"epoch": 0.044224350460938075,
|
10604 |
+
"grad_norm": 3.519826889038086,
|
10605 |
+
"learning_rate": 2.985546093968815e-05,
|
10606 |
+
"loss": 1.8992,
|
10607 |
+
"step": 22710
|
10608 |
+
},
|
10609 |
+
{
|
10610 |
+
"epoch": 0.04425356073204834,
|
10611 |
+
"grad_norm": 4.259973526000977,
|
10612 |
+
"learning_rate": 2.985527024770186e-05,
|
10613 |
+
"loss": 1.9466,
|
10614 |
+
"step": 22725
|
10615 |
+
},
|
10616 |
+
{
|
10617 |
+
"epoch": 0.044282771003158604,
|
10618 |
+
"grad_norm": 2.69942569732666,
|
10619 |
+
"learning_rate": 2.9855079430617253e-05,
|
10620 |
+
"loss": 1.6805,
|
10621 |
+
"step": 22740
|
10622 |
+
},
|
10623 |
+
{
|
10624 |
+
"epoch": 0.044311981274268865,
|
10625 |
+
"grad_norm": 3.0210318565368652,
|
10626 |
+
"learning_rate": 2.9854888488435933e-05,
|
10627 |
+
"loss": 1.8744,
|
10628 |
+
"step": 22755
|
10629 |
+
},
|
10630 |
+
{
|
10631 |
+
"epoch": 0.04434119154537913,
|
10632 |
+
"grad_norm": 2.448091745376587,
|
10633 |
+
"learning_rate": 2.9854697421159505e-05,
|
10634 |
+
"loss": 1.8449,
|
10635 |
+
"step": 22770
|
10636 |
+
},
|
10637 |
+
{
|
10638 |
+
"epoch": 0.044370401816489394,
|
10639 |
+
"grad_norm": 3.8297231197357178,
|
10640 |
+
"learning_rate": 2.9854506228789586e-05,
|
10641 |
+
"loss": 1.8071,
|
10642 |
+
"step": 22785
|
10643 |
+
},
|
10644 |
+
{
|
10645 |
+
"epoch": 0.044399612087599655,
|
10646 |
+
"grad_norm": 4.065975189208984,
|
10647 |
+
"learning_rate": 2.9854314911327777e-05,
|
10648 |
+
"loss": 1.9335,
|
10649 |
+
"step": 22800
|
10650 |
+
},
|
10651 |
+
{
|
10652 |
+
"epoch": 0.044428822358709916,
|
10653 |
+
"grad_norm": 2.0296108722686768,
|
10654 |
+
"learning_rate": 2.9854123468775693e-05,
|
10655 |
+
"loss": 1.748,
|
10656 |
+
"step": 22815
|
10657 |
+
},
|
10658 |
+
{
|
10659 |
+
"epoch": 0.044458032629820184,
|
10660 |
+
"grad_norm": 1.7504347562789917,
|
10661 |
+
"learning_rate": 2.985393190113495e-05,
|
10662 |
+
"loss": 1.9084,
|
10663 |
+
"step": 22830
|
10664 |
+
},
|
10665 |
+
{
|
10666 |
+
"epoch": 0.044487242900930445,
|
10667 |
+
"grad_norm": 2.8802502155303955,
|
10668 |
+
"learning_rate": 2.9853740208407152e-05,
|
10669 |
+
"loss": 1.8649,
|
10670 |
+
"step": 22845
|
10671 |
+
},
|
10672 |
+
{
|
10673 |
+
"epoch": 0.044516453172040706,
|
10674 |
+
"grad_norm": 3.5376250743865967,
|
10675 |
+
"learning_rate": 2.985354839059392e-05,
|
10676 |
+
"loss": 1.7617,
|
10677 |
+
"step": 22860
|
10678 |
+
},
|
10679 |
+
{
|
10680 |
+
"epoch": 0.044545663443150973,
|
10681 |
+
"grad_norm": 4.846216201782227,
|
10682 |
+
"learning_rate": 2.985335644769687e-05,
|
10683 |
+
"loss": 1.7531,
|
10684 |
+
"step": 22875
|
10685 |
+
},
|
10686 |
+
{
|
10687 |
+
"epoch": 0.044574873714261234,
|
10688 |
+
"grad_norm": 1.9456549882888794,
|
10689 |
+
"learning_rate": 2.9853164379717615e-05,
|
10690 |
+
"loss": 1.9782,
|
10691 |
+
"step": 22890
|
10692 |
+
},
|
10693 |
+
{
|
10694 |
+
"epoch": 0.044604083985371495,
|
10695 |
+
"grad_norm": 3.7463254928588867,
|
10696 |
+
"learning_rate": 2.9852972186657774e-05,
|
10697 |
+
"loss": 1.9347,
|
10698 |
+
"step": 22905
|
10699 |
+
},
|
10700 |
+
{
|
10701 |
+
"epoch": 0.044633294256481756,
|
10702 |
+
"grad_norm": 1.9641201496124268,
|
10703 |
+
"learning_rate": 2.9852779868518967e-05,
|
10704 |
+
"loss": 1.8366,
|
10705 |
+
"step": 22920
|
10706 |
+
},
|
10707 |
+
{
|
10708 |
+
"epoch": 0.044662504527592024,
|
10709 |
+
"grad_norm": 3.9136605262756348,
|
10710 |
+
"learning_rate": 2.9852587425302812e-05,
|
10711 |
+
"loss": 2.0271,
|
10712 |
+
"step": 22935
|
10713 |
+
},
|
10714 |
+
{
|
10715 |
+
"epoch": 0.044691714798702285,
|
10716 |
+
"grad_norm": 2.161766767501831,
|
10717 |
+
"learning_rate": 2.9852394857010923e-05,
|
10718 |
+
"loss": 1.8681,
|
10719 |
+
"step": 22950
|
10720 |
+
},
|
10721 |
+
{
|
10722 |
+
"epoch": 0.044720925069812546,
|
10723 |
+
"grad_norm": 3.1569862365722656,
|
10724 |
+
"learning_rate": 2.9852202163644937e-05,
|
10725 |
+
"loss": 1.7996,
|
10726 |
+
"step": 22965
|
10727 |
+
},
|
10728 |
+
{
|
10729 |
+
"epoch": 0.044750135340922814,
|
10730 |
+
"grad_norm": 2.0586421489715576,
|
10731 |
+
"learning_rate": 2.9852009345206458e-05,
|
10732 |
+
"loss": 1.7727,
|
10733 |
+
"step": 22980
|
10734 |
+
},
|
10735 |
+
{
|
10736 |
+
"epoch": 0.044779345612033075,
|
10737 |
+
"grad_norm": 5.425686359405518,
|
10738 |
+
"learning_rate": 2.9851816401697127e-05,
|
10739 |
+
"loss": 1.9209,
|
10740 |
+
"step": 22995
|
10741 |
+
},
|
10742 |
+
{
|
10743 |
+
"epoch": 0.044808555883143336,
|
10744 |
+
"grad_norm": 5.385043621063232,
|
10745 |
+
"learning_rate": 2.985162333311856e-05,
|
10746 |
+
"loss": 1.8473,
|
10747 |
+
"step": 23010
|
10748 |
+
},
|
10749 |
+
{
|
10750 |
+
"epoch": 0.0448377661542536,
|
10751 |
+
"grad_norm": 3.4577736854553223,
|
10752 |
+
"learning_rate": 2.985143013947238e-05,
|
10753 |
+
"loss": 1.848,
|
10754 |
+
"step": 23025
|
10755 |
+
},
|
10756 |
+
{
|
10757 |
+
"epoch": 0.044866976425363865,
|
10758 |
+
"grad_norm": 3.0630152225494385,
|
10759 |
+
"learning_rate": 2.985123682076022e-05,
|
10760 |
+
"loss": 1.7272,
|
10761 |
+
"step": 23040
|
10762 |
+
},
|
10763 |
+
{
|
10764 |
+
"epoch": 0.044896186696474126,
|
10765 |
+
"grad_norm": 3.8338427543640137,
|
10766 |
+
"learning_rate": 2.985104337698371e-05,
|
10767 |
+
"loss": 1.9042,
|
10768 |
+
"step": 23055
|
10769 |
+
},
|
10770 |
+
{
|
10771 |
+
"epoch": 0.04492539696758439,
|
10772 |
+
"grad_norm": 3.1702969074249268,
|
10773 |
+
"learning_rate": 2.985084980814447e-05,
|
10774 |
+
"loss": 1.9389,
|
10775 |
+
"step": 23070
|
10776 |
+
},
|
10777 |
+
{
|
10778 |
+
"epoch": 0.04495460723869465,
|
10779 |
+
"grad_norm": 2.758162021636963,
|
10780 |
+
"learning_rate": 2.985065611424414e-05,
|
10781 |
+
"loss": 1.8825,
|
10782 |
+
"step": 23085
|
10783 |
+
},
|
10784 |
+
{
|
10785 |
+
"epoch": 0.044983817509804916,
|
10786 |
+
"grad_norm": 2.1373486518859863,
|
10787 |
+
"learning_rate": 2.985046229528434e-05,
|
10788 |
+
"loss": 1.9451,
|
10789 |
+
"step": 23100
|
10790 |
+
},
|
10791 |
+
{
|
10792 |
+
"epoch": 0.04501302778091518,
|
10793 |
+
"grad_norm": 2.1302478313446045,
|
10794 |
+
"learning_rate": 2.985026835126671e-05,
|
10795 |
+
"loss": 2.0208,
|
10796 |
+
"step": 23115
|
10797 |
+
},
|
10798 |
+
{
|
10799 |
+
"epoch": 0.04504223805202544,
|
10800 |
+
"grad_norm": 3.8777012825012207,
|
10801 |
+
"learning_rate": 2.985007428219289e-05,
|
10802 |
+
"loss": 2.0138,
|
10803 |
+
"step": 23130
|
10804 |
+
},
|
10805 |
+
{
|
10806 |
+
"epoch": 0.045071448323135706,
|
10807 |
+
"grad_norm": 3.428769111633301,
|
10808 |
+
"learning_rate": 2.9849880088064497e-05,
|
10809 |
+
"loss": 1.9868,
|
10810 |
+
"step": 23145
|
10811 |
+
},
|
10812 |
+
{
|
10813 |
+
"epoch": 0.04510065859424597,
|
10814 |
+
"grad_norm": 5.036014556884766,
|
10815 |
+
"learning_rate": 2.9849685768883172e-05,
|
10816 |
+
"loss": 1.7348,
|
10817 |
+
"step": 23160
|
10818 |
+
},
|
10819 |
+
{
|
10820 |
+
"epoch": 0.04512986886535623,
|
10821 |
+
"grad_norm": 3.206932306289673,
|
10822 |
+
"learning_rate": 2.9849491324650563e-05,
|
10823 |
+
"loss": 1.9991,
|
10824 |
+
"step": 23175
|
10825 |
+
},
|
10826 |
+
{
|
10827 |
+
"epoch": 0.04515907913646649,
|
10828 |
+
"grad_norm": 2.5311954021453857,
|
10829 |
+
"learning_rate": 2.9849296755368297e-05,
|
10830 |
+
"loss": 1.7501,
|
10831 |
+
"step": 23190
|
10832 |
+
},
|
10833 |
+
{
|
10834 |
+
"epoch": 0.04518828940757676,
|
10835 |
+
"grad_norm": 2.1587252616882324,
|
10836 |
+
"learning_rate": 2.984910206103801e-05,
|
10837 |
+
"loss": 1.7981,
|
10838 |
+
"step": 23205
|
10839 |
+
},
|
10840 |
+
{
|
10841 |
+
"epoch": 0.04521749967868702,
|
10842 |
+
"grad_norm": 2.198859453201294,
|
10843 |
+
"learning_rate": 2.984890724166135e-05,
|
10844 |
+
"loss": 1.9349,
|
10845 |
+
"step": 23220
|
10846 |
+
},
|
10847 |
+
{
|
10848 |
+
"epoch": 0.04524670994979728,
|
10849 |
+
"grad_norm": 3.513099193572998,
|
10850 |
+
"learning_rate": 2.9848712297239955e-05,
|
10851 |
+
"loss": 1.7639,
|
10852 |
+
"step": 23235
|
10853 |
+
},
|
10854 |
+
{
|
10855 |
+
"epoch": 0.045275920220907546,
|
10856 |
+
"grad_norm": 4.66519021987915,
|
10857 |
+
"learning_rate": 2.984851722777546e-05,
|
10858 |
+
"loss": 1.6865,
|
10859 |
+
"step": 23250
|
10860 |
+
},
|
10861 |
+
{
|
10862 |
+
"epoch": 0.04530513049201781,
|
10863 |
+
"grad_norm": 2.0970771312713623,
|
10864 |
+
"learning_rate": 2.9848322033269523e-05,
|
10865 |
+
"loss": 1.8861,
|
10866 |
+
"step": 23265
|
10867 |
+
},
|
10868 |
+
{
|
10869 |
+
"epoch": 0.04533434076312807,
|
10870 |
+
"grad_norm": 4.110560417175293,
|
10871 |
+
"learning_rate": 2.984812671372377e-05,
|
10872 |
+
"loss": 1.8482,
|
10873 |
+
"step": 23280
|
10874 |
+
},
|
10875 |
+
{
|
10876 |
+
"epoch": 0.04536355103423833,
|
10877 |
+
"grad_norm": 3.2896182537078857,
|
10878 |
+
"learning_rate": 2.9847931269139854e-05,
|
10879 |
+
"loss": 1.9021,
|
10880 |
+
"step": 23295
|
10881 |
+
},
|
10882 |
+
{
|
10883 |
+
"epoch": 0.0453927613053486,
|
10884 |
+
"grad_norm": 1.929777979850769,
|
10885 |
+
"learning_rate": 2.9847735699519423e-05,
|
10886 |
+
"loss": 1.9236,
|
10887 |
+
"step": 23310
|
10888 |
+
},
|
10889 |
+
{
|
10890 |
+
"epoch": 0.04542197157645886,
|
10891 |
+
"grad_norm": 2.989177703857422,
|
10892 |
+
"learning_rate": 2.9847540004864115e-05,
|
10893 |
+
"loss": 1.8256,
|
10894 |
+
"step": 23325
|
10895 |
+
},
|
10896 |
+
{
|
10897 |
+
"epoch": 0.04545118184756912,
|
10898 |
+
"grad_norm": 2.735318899154663,
|
10899 |
+
"learning_rate": 2.984734418517559e-05,
|
10900 |
+
"loss": 1.8279,
|
10901 |
+
"step": 23340
|
10902 |
+
},
|
10903 |
+
{
|
10904 |
+
"epoch": 0.04548039211867939,
|
10905 |
+
"grad_norm": 2.597491502761841,
|
10906 |
+
"learning_rate": 2.9847148240455495e-05,
|
10907 |
+
"loss": 1.8952,
|
10908 |
+
"step": 23355
|
10909 |
+
},
|
10910 |
+
{
|
10911 |
+
"epoch": 0.04550960238978965,
|
10912 |
+
"grad_norm": 3.5635156631469727,
|
10913 |
+
"learning_rate": 2.9846952170705473e-05,
|
10914 |
+
"loss": 1.7302,
|
10915 |
+
"step": 23370
|
10916 |
+
},
|
10917 |
+
{
|
10918 |
+
"epoch": 0.04553881266089991,
|
10919 |
+
"grad_norm": 6.853522777557373,
|
10920 |
+
"learning_rate": 2.9846755975927174e-05,
|
10921 |
+
"loss": 2.0572,
|
10922 |
+
"step": 23385
|
10923 |
+
},
|
10924 |
+
{
|
10925 |
+
"epoch": 0.04556802293201017,
|
10926 |
+
"grad_norm": 2.006582498550415,
|
10927 |
+
"learning_rate": 2.984655965612226e-05,
|
10928 |
+
"loss": 1.939,
|
10929 |
+
"step": 23400
|
10930 |
+
},
|
10931 |
+
{
|
10932 |
+
"epoch": 0.04559723320312044,
|
10933 |
+
"grad_norm": 3.6737654209136963,
|
10934 |
+
"learning_rate": 2.984636321129238e-05,
|
10935 |
+
"loss": 1.8714,
|
10936 |
+
"step": 23415
|
10937 |
+
},
|
10938 |
+
{
|
10939 |
+
"epoch": 0.0456264434742307,
|
10940 |
+
"grad_norm": 2.5802881717681885,
|
10941 |
+
"learning_rate": 2.9846166641439185e-05,
|
10942 |
+
"loss": 1.8687,
|
10943 |
+
"step": 23430
|
10944 |
+
},
|
10945 |
+
{
|
10946 |
+
"epoch": 0.04565565374534096,
|
10947 |
+
"grad_norm": 3.748178005218506,
|
10948 |
+
"learning_rate": 2.984596994656433e-05,
|
10949 |
+
"loss": 1.9146,
|
10950 |
+
"step": 23445
|
10951 |
+
},
|
10952 |
+
{
|
10953 |
+
"epoch": 0.04568486401645123,
|
10954 |
+
"grad_norm": 2.5193021297454834,
|
10955 |
+
"learning_rate": 2.9845773126669475e-05,
|
10956 |
+
"loss": 1.8845,
|
10957 |
+
"step": 23460
|
10958 |
+
},
|
10959 |
+
{
|
10960 |
+
"epoch": 0.04571407428756149,
|
10961 |
+
"grad_norm": 2.3217403888702393,
|
10962 |
+
"learning_rate": 2.984557618175628e-05,
|
10963 |
+
"loss": 1.9149,
|
10964 |
+
"step": 23475
|
10965 |
+
},
|
10966 |
+
{
|
10967 |
+
"epoch": 0.04574328455867175,
|
10968 |
+
"grad_norm": 3.9094045162200928,
|
10969 |
+
"learning_rate": 2.98453791118264e-05,
|
10970 |
+
"loss": 1.8615,
|
10971 |
+
"step": 23490
|
10972 |
+
},
|
10973 |
+
{
|
10974 |
+
"epoch": 0.04577249482978201,
|
10975 |
+
"grad_norm": 2.190436840057373,
|
10976 |
+
"learning_rate": 2.9845181916881495e-05,
|
10977 |
+
"loss": 1.8181,
|
10978 |
+
"step": 23505
|
10979 |
+
},
|
10980 |
+
{
|
10981 |
+
"epoch": 0.04580170510089228,
|
10982 |
+
"grad_norm": 3.33785343170166,
|
10983 |
+
"learning_rate": 2.984498459692322e-05,
|
10984 |
+
"loss": 1.9345,
|
10985 |
+
"step": 23520
|
10986 |
+
},
|
10987 |
+
{
|
10988 |
+
"epoch": 0.04583091537200254,
|
10989 |
+
"grad_norm": 4.065570831298828,
|
10990 |
+
"learning_rate": 2.9844787151953242e-05,
|
10991 |
+
"loss": 1.7801,
|
10992 |
+
"step": 23535
|
10993 |
+
},
|
10994 |
+
{
|
10995 |
+
"epoch": 0.0458601256431128,
|
10996 |
+
"grad_norm": 3.4585118293762207,
|
10997 |
+
"learning_rate": 2.984458958197323e-05,
|
10998 |
+
"loss": 1.8181,
|
10999 |
+
"step": 23550
|
11000 |
+
},
|
11001 |
+
{
|
11002 |
+
"epoch": 0.04588933591422307,
|
11003 |
+
"grad_norm": 2.1615355014801025,
|
11004 |
+
"learning_rate": 2.984439188698484e-05,
|
11005 |
+
"loss": 1.8265,
|
11006 |
+
"step": 23565
|
11007 |
+
},
|
11008 |
+
{
|
11009 |
+
"epoch": 0.04591854618533333,
|
11010 |
+
"grad_norm": 3.9168038368225098,
|
11011 |
+
"learning_rate": 2.9844194066989737e-05,
|
11012 |
+
"loss": 1.8508,
|
11013 |
+
"step": 23580
|
11014 |
+
},
|
11015 |
+
{
|
11016 |
+
"epoch": 0.04594775645644359,
|
11017 |
+
"grad_norm": 2.436638593673706,
|
11018 |
+
"learning_rate": 2.9843996121989587e-05,
|
11019 |
+
"loss": 1.9247,
|
11020 |
+
"step": 23595
|
11021 |
+
},
|
11022 |
+
{
|
11023 |
+
"epoch": 0.04597696672755385,
|
11024 |
+
"grad_norm": 3.637157678604126,
|
11025 |
+
"learning_rate": 2.9843798051986053e-05,
|
11026 |
+
"loss": 1.9791,
|
11027 |
+
"step": 23610
|
11028 |
+
},
|
11029 |
+
{
|
11030 |
+
"epoch": 0.04600617699866412,
|
11031 |
+
"grad_norm": 2.9777755737304688,
|
11032 |
+
"learning_rate": 2.9843599856980815e-05,
|
11033 |
+
"loss": 1.8227,
|
11034 |
+
"step": 23625
|
11035 |
+
},
|
11036 |
+
{
|
11037 |
+
"epoch": 0.04603538726977438,
|
11038 |
+
"grad_norm": 5.862057209014893,
|
11039 |
+
"learning_rate": 2.9843401536975533e-05,
|
11040 |
+
"loss": 1.9745,
|
11041 |
+
"step": 23640
|
11042 |
+
},
|
11043 |
+
{
|
11044 |
+
"epoch": 0.04606459754088464,
|
11045 |
+
"grad_norm": 2.937347173690796,
|
11046 |
+
"learning_rate": 2.9843203091971878e-05,
|
11047 |
+
"loss": 1.7575,
|
11048 |
+
"step": 23655
|
11049 |
+
},
|
11050 |
+
{
|
11051 |
+
"epoch": 0.0460938078119949,
|
11052 |
+
"grad_norm": 3.7695438861846924,
|
11053 |
+
"learning_rate": 2.984300452197152e-05,
|
11054 |
+
"loss": 1.9655,
|
11055 |
+
"step": 23670
|
11056 |
+
},
|
11057 |
+
{
|
11058 |
+
"epoch": 0.04612301808310517,
|
11059 |
+
"grad_norm": 3.1122677326202393,
|
11060 |
+
"learning_rate": 2.9842805826976137e-05,
|
11061 |
+
"loss": 2.0459,
|
11062 |
+
"step": 23685
|
11063 |
+
},
|
11064 |
+
{
|
11065 |
+
"epoch": 0.04615222835421543,
|
11066 |
+
"grad_norm": 4.503921031951904,
|
11067 |
+
"learning_rate": 2.98426070069874e-05,
|
11068 |
+
"loss": 1.8319,
|
11069 |
+
"step": 23700
|
11070 |
+
},
|
11071 |
+
{
|
11072 |
+
"epoch": 0.04618143862532569,
|
11073 |
+
"grad_norm": 2.247570514678955,
|
11074 |
+
"learning_rate": 2.9842408062006982e-05,
|
11075 |
+
"loss": 1.765,
|
11076 |
+
"step": 23715
|
11077 |
+
},
|
11078 |
+
{
|
11079 |
+
"epoch": 0.04621064889643596,
|
11080 |
+
"grad_norm": 3.512427806854248,
|
11081 |
+
"learning_rate": 2.9842208992036554e-05,
|
11082 |
+
"loss": 1.8118,
|
11083 |
+
"step": 23730
|
11084 |
+
},
|
11085 |
+
{
|
11086 |
+
"epoch": 0.04623985916754622,
|
11087 |
+
"grad_norm": 3.174893379211426,
|
11088 |
+
"learning_rate": 2.98420097970778e-05,
|
11089 |
+
"loss": 1.8791,
|
11090 |
+
"step": 23745
|
11091 |
+
},
|
11092 |
+
{
|
11093 |
+
"epoch": 0.04626906943865648,
|
11094 |
+
"grad_norm": 5.151320934295654,
|
11095 |
+
"learning_rate": 2.9841810477132392e-05,
|
11096 |
+
"loss": 1.817,
|
11097 |
+
"step": 23760
|
11098 |
+
},
|
11099 |
+
{
|
11100 |
+
"epoch": 0.04629827970976674,
|
11101 |
+
"grad_norm": 2.6584534645080566,
|
11102 |
+
"learning_rate": 2.984161103220201e-05,
|
11103 |
+
"loss": 1.8441,
|
11104 |
+
"step": 23775
|
11105 |
+
},
|
11106 |
+
{
|
11107 |
+
"epoch": 0.04632748998087701,
|
11108 |
+
"grad_norm": 1.967596173286438,
|
11109 |
+
"learning_rate": 2.9841411462288335e-05,
|
11110 |
+
"loss": 2.0349,
|
11111 |
+
"step": 23790
|
11112 |
+
},
|
11113 |
+
{
|
11114 |
+
"epoch": 0.04635670025198727,
|
11115 |
+
"grad_norm": 3.7256250381469727,
|
11116 |
+
"learning_rate": 2.9841211767393048e-05,
|
11117 |
+
"loss": 1.8131,
|
11118 |
+
"step": 23805
|
11119 |
+
},
|
11120 |
+
{
|
11121 |
+
"epoch": 0.04638591052309753,
|
11122 |
+
"grad_norm": 4.081748962402344,
|
11123 |
+
"learning_rate": 2.9841011947517826e-05,
|
11124 |
+
"loss": 1.8664,
|
11125 |
+
"step": 23820
|
11126 |
+
},
|
11127 |
+
{
|
11128 |
+
"epoch": 0.0464151207942078,
|
11129 |
+
"grad_norm": 2.527451276779175,
|
11130 |
+
"learning_rate": 2.984081200266436e-05,
|
11131 |
+
"loss": 1.8916,
|
11132 |
+
"step": 23835
|
11133 |
+
},
|
11134 |
+
{
|
11135 |
+
"epoch": 0.04644433106531806,
|
11136 |
+
"grad_norm": 2.159346103668213,
|
11137 |
+
"learning_rate": 2.9840611932834326e-05,
|
11138 |
+
"loss": 1.9254,
|
11139 |
+
"step": 23850
|
11140 |
+
},
|
11141 |
+
{
|
11142 |
+
"epoch": 0.04647354133642832,
|
11143 |
+
"grad_norm": 2.636519432067871,
|
11144 |
+
"learning_rate": 2.9840411738029412e-05,
|
11145 |
+
"loss": 1.8062,
|
11146 |
+
"step": 23865
|
11147 |
+
},
|
11148 |
+
{
|
11149 |
+
"epoch": 0.046502751607538584,
|
11150 |
+
"grad_norm": 2.3750388622283936,
|
11151 |
+
"learning_rate": 2.9840211418251303e-05,
|
11152 |
+
"loss": 1.7347,
|
11153 |
+
"step": 23880
|
11154 |
+
},
|
11155 |
+
{
|
11156 |
+
"epoch": 0.04653196187864885,
|
11157 |
+
"grad_norm": 2.181845188140869,
|
11158 |
+
"learning_rate": 2.9840010973501685e-05,
|
11159 |
+
"loss": 1.9207,
|
11160 |
+
"step": 23895
|
11161 |
+
},
|
11162 |
+
{
|
11163 |
+
"epoch": 0.04656117214975911,
|
11164 |
+
"grad_norm": 3.098555564880371,
|
11165 |
+
"learning_rate": 2.9839810403782252e-05,
|
11166 |
+
"loss": 1.9566,
|
11167 |
+
"step": 23910
|
11168 |
+
},
|
11169 |
+
{
|
11170 |
+
"epoch": 0.046590382420869374,
|
11171 |
+
"grad_norm": 5.142632484436035,
|
11172 |
+
"learning_rate": 2.9839609709094685e-05,
|
11173 |
+
"loss": 1.8871,
|
11174 |
+
"step": 23925
|
11175 |
+
},
|
11176 |
+
{
|
11177 |
+
"epoch": 0.04661959269197964,
|
11178 |
+
"grad_norm": 1.7162665128707886,
|
11179 |
+
"learning_rate": 2.9839408889440678e-05,
|
11180 |
+
"loss": 1.8784,
|
11181 |
+
"step": 23940
|
11182 |
+
},
|
11183 |
+
{
|
11184 |
+
"epoch": 0.0466488029630899,
|
11185 |
+
"grad_norm": 3.743401050567627,
|
11186 |
+
"learning_rate": 2.9839207944821925e-05,
|
11187 |
+
"loss": 1.9179,
|
11188 |
+
"step": 23955
|
11189 |
+
},
|
11190 |
+
{
|
11191 |
+
"epoch": 0.046678013234200164,
|
11192 |
+
"grad_norm": 2.952817440032959,
|
11193 |
+
"learning_rate": 2.983900687524011e-05,
|
11194 |
+
"loss": 1.9182,
|
11195 |
+
"step": 23970
|
11196 |
+
},
|
11197 |
+
{
|
11198 |
+
"epoch": 0.046707223505310425,
|
11199 |
+
"grad_norm": 2.3484249114990234,
|
11200 |
+
"learning_rate": 2.983880568069693e-05,
|
11201 |
+
"loss": 1.8336,
|
11202 |
+
"step": 23985
|
11203 |
+
},
|
11204 |
+
{
|
11205 |
+
"epoch": 0.04673643377642069,
|
11206 |
+
"grad_norm": 1.931921362876892,
|
11207 |
+
"learning_rate": 2.9838604361194087e-05,
|
11208 |
+
"loss": 1.771,
|
11209 |
+
"step": 24000
|
11210 |
+
},
|
11211 |
+
{
|
11212 |
+
"epoch": 0.04676564404753095,
|
11213 |
+
"grad_norm": 5.197054386138916,
|
11214 |
+
"learning_rate": 2.9838402916733263e-05,
|
11215 |
+
"loss": 1.8558,
|
11216 |
+
"step": 24015
|
11217 |
+
},
|
11218 |
+
{
|
11219 |
+
"epoch": 0.046794854318641214,
|
11220 |
+
"grad_norm": 2.862427234649658,
|
11221 |
+
"learning_rate": 2.9838201347316164e-05,
|
11222 |
+
"loss": 1.6956,
|
11223 |
+
"step": 24030
|
11224 |
+
},
|
11225 |
+
{
|
11226 |
+
"epoch": 0.04682406458975148,
|
11227 |
+
"grad_norm": 2.6003293991088867,
|
11228 |
+
"learning_rate": 2.9837999652944487e-05,
|
11229 |
+
"loss": 2.0473,
|
11230 |
+
"step": 24045
|
11231 |
+
},
|
11232 |
+
{
|
11233 |
+
"epoch": 0.04685327486086174,
|
11234 |
+
"grad_norm": 2.068455934524536,
|
11235 |
+
"learning_rate": 2.9837797833619926e-05,
|
11236 |
+
"loss": 1.9233,
|
11237 |
+
"step": 24060
|
11238 |
+
},
|
11239 |
+
{
|
11240 |
+
"epoch": 0.046882485131972004,
|
11241 |
+
"grad_norm": 3.4722557067871094,
|
11242 |
+
"learning_rate": 2.983759588934418e-05,
|
11243 |
+
"loss": 2.0052,
|
11244 |
+
"step": 24075
|
11245 |
+
},
|
11246 |
+
{
|
11247 |
+
"epoch": 0.046911695403082265,
|
11248 |
+
"grad_norm": 3.198732852935791,
|
11249 |
+
"learning_rate": 2.9837393820118954e-05,
|
11250 |
+
"loss": 1.906,
|
11251 |
+
"step": 24090
|
11252 |
+
},
|
11253 |
+
{
|
11254 |
+
"epoch": 0.04694090567419253,
|
11255 |
+
"grad_norm": 2.5050504207611084,
|
11256 |
+
"learning_rate": 2.983719162594595e-05,
|
11257 |
+
"loss": 1.833,
|
11258 |
+
"step": 24105
|
11259 |
+
},
|
11260 |
+
{
|
11261 |
+
"epoch": 0.046970115945302794,
|
11262 |
+
"grad_norm": 2.1438305377960205,
|
11263 |
+
"learning_rate": 2.9836989306826866e-05,
|
11264 |
+
"loss": 1.7439,
|
11265 |
+
"step": 24120
|
11266 |
+
},
|
11267 |
+
{
|
11268 |
+
"epoch": 0.046999326216413055,
|
11269 |
+
"grad_norm": 1.5980552434921265,
|
11270 |
+
"learning_rate": 2.983678686276341e-05,
|
11271 |
+
"loss": 1.8018,
|
11272 |
+
"step": 24135
|
11273 |
+
},
|
11274 |
+
{
|
11275 |
+
"epoch": 0.047028536487523316,
|
11276 |
+
"grad_norm": 2.566882610321045,
|
11277 |
+
"learning_rate": 2.9836584293757282e-05,
|
11278 |
+
"loss": 1.8807,
|
11279 |
+
"step": 24150
|
11280 |
+
},
|
11281 |
+
{
|
11282 |
+
"epoch": 0.047057746758633584,
|
11283 |
+
"grad_norm": 4.125818252563477,
|
11284 |
+
"learning_rate": 2.9836381599810196e-05,
|
11285 |
+
"loss": 1.8198,
|
11286 |
+
"step": 24165
|
11287 |
+
},
|
11288 |
+
{
|
11289 |
+
"epoch": 0.047086957029743845,
|
11290 |
+
"grad_norm": 3.1217780113220215,
|
11291 |
+
"learning_rate": 2.983617878092385e-05,
|
11292 |
+
"loss": 2.0202,
|
11293 |
+
"step": 24180
|
11294 |
+
},
|
11295 |
+
{
|
11296 |
+
"epoch": 0.047116167300854106,
|
11297 |
+
"grad_norm": 3.001835823059082,
|
11298 |
+
"learning_rate": 2.9835975837099956e-05,
|
11299 |
+
"loss": 1.851,
|
11300 |
+
"step": 24195
|
11301 |
+
},
|
11302 |
+
{
|
11303 |
+
"epoch": 0.047145377571964374,
|
11304 |
+
"grad_norm": 4.060051918029785,
|
11305 |
+
"learning_rate": 2.9835772768340225e-05,
|
11306 |
+
"loss": 1.8918,
|
11307 |
+
"step": 24210
|
11308 |
+
},
|
11309 |
+
{
|
11310 |
+
"epoch": 0.047174587843074635,
|
11311 |
+
"grad_norm": 4.199372291564941,
|
11312 |
+
"learning_rate": 2.9835569574646363e-05,
|
11313 |
+
"loss": 1.848,
|
11314 |
+
"step": 24225
|
11315 |
+
},
|
11316 |
+
{
|
11317 |
+
"epoch": 0.047203798114184896,
|
11318 |
+
"grad_norm": 3.1068427562713623,
|
11319 |
+
"learning_rate": 2.9835366256020085e-05,
|
11320 |
+
"loss": 1.793,
|
11321 |
+
"step": 24240
|
11322 |
+
},
|
11323 |
+
{
|
11324 |
+
"epoch": 0.04723300838529516,
|
11325 |
+
"grad_norm": 3.30556321144104,
|
11326 |
+
"learning_rate": 2.9835162812463098e-05,
|
11327 |
+
"loss": 1.8914,
|
11328 |
+
"step": 24255
|
11329 |
+
},
|
11330 |
+
{
|
11331 |
+
"epoch": 0.047262218656405425,
|
11332 |
+
"grad_norm": 3.898911476135254,
|
11333 |
+
"learning_rate": 2.9834959243977123e-05,
|
11334 |
+
"loss": 1.929,
|
11335 |
+
"step": 24270
|
11336 |
+
},
|
11337 |
+
{
|
11338 |
+
"epoch": 0.047291428927515686,
|
11339 |
+
"grad_norm": 2.1169817447662354,
|
11340 |
+
"learning_rate": 2.9834755550563865e-05,
|
11341 |
+
"loss": 1.9513,
|
11342 |
+
"step": 24285
|
11343 |
+
},
|
11344 |
+
{
|
11345 |
+
"epoch": 0.04732063919862595,
|
11346 |
+
"grad_norm": 3.0540499687194824,
|
11347 |
+
"learning_rate": 2.9834551732225044e-05,
|
11348 |
+
"loss": 1.8283,
|
11349 |
+
"step": 24300
|
11350 |
+
},
|
11351 |
+
{
|
11352 |
+
"epoch": 0.047349849469736215,
|
11353 |
+
"grad_norm": 2.495234727859497,
|
11354 |
+
"learning_rate": 2.9834347788962383e-05,
|
11355 |
+
"loss": 1.8268,
|
11356 |
+
"step": 24315
|
11357 |
+
},
|
11358 |
+
{
|
11359 |
+
"epoch": 0.047379059740846476,
|
11360 |
+
"grad_norm": 4.254831790924072,
|
11361 |
+
"learning_rate": 2.9834143720777588e-05,
|
11362 |
+
"loss": 1.8933,
|
11363 |
+
"step": 24330
|
11364 |
+
},
|
11365 |
+
{
|
11366 |
+
"epoch": 0.04740827001195674,
|
11367 |
+
"grad_norm": 5.430483818054199,
|
11368 |
+
"learning_rate": 2.9833939527672384e-05,
|
11369 |
+
"loss": 1.8128,
|
11370 |
+
"step": 24345
|
11371 |
+
},
|
11372 |
+
{
|
11373 |
+
"epoch": 0.047437480283067,
|
11374 |
+
"grad_norm": 3.332515239715576,
|
11375 |
+
"learning_rate": 2.983373520964849e-05,
|
11376 |
+
"loss": 1.9089,
|
11377 |
+
"step": 24360
|
11378 |
+
},
|
11379 |
+
{
|
11380 |
+
"epoch": 0.047466690554177265,
|
11381 |
+
"grad_norm": 2.1588125228881836,
|
11382 |
+
"learning_rate": 2.983353076670762e-05,
|
11383 |
+
"loss": 1.7578,
|
11384 |
+
"step": 24375
|
11385 |
+
},
|
11386 |
+
{
|
11387 |
+
"epoch": 0.047495900825287526,
|
11388 |
+
"grad_norm": 2.6773619651794434,
|
11389 |
+
"learning_rate": 2.9833326198851503e-05,
|
11390 |
+
"loss": 1.9486,
|
11391 |
+
"step": 24390
|
11392 |
+
},
|
11393 |
+
{
|
11394 |
+
"epoch": 0.04752511109639779,
|
11395 |
+
"grad_norm": 2.855078935623169,
|
11396 |
+
"learning_rate": 2.9833121506081862e-05,
|
11397 |
+
"loss": 1.9339,
|
11398 |
+
"step": 24405
|
11399 |
+
},
|
11400 |
+
{
|
11401 |
+
"epoch": 0.047554321367508055,
|
11402 |
+
"grad_norm": 2.539210557937622,
|
11403 |
+
"learning_rate": 2.983291668840042e-05,
|
11404 |
+
"loss": 1.8479,
|
11405 |
+
"step": 24420
|
11406 |
+
},
|
11407 |
+
{
|
11408 |
+
"epoch": 0.047583531638618316,
|
11409 |
+
"grad_norm": 3.0605132579803467,
|
11410 |
+
"learning_rate": 2.9832711745808895e-05,
|
11411 |
+
"loss": 1.8924,
|
11412 |
+
"step": 24435
|
11413 |
+
},
|
11414 |
+
{
|
11415 |
+
"epoch": 0.04761274190972858,
|
11416 |
+
"grad_norm": 2.142998218536377,
|
11417 |
+
"learning_rate": 2.9832506678309025e-05,
|
11418 |
+
"loss": 2.0331,
|
11419 |
+
"step": 24450
|
11420 |
+
},
|
11421 |
+
{
|
11422 |
+
"epoch": 0.04764195218083884,
|
11423 |
+
"grad_norm": 3.082610845565796,
|
11424 |
+
"learning_rate": 2.983230148590253e-05,
|
11425 |
+
"loss": 1.8299,
|
11426 |
+
"step": 24465
|
11427 |
+
},
|
11428 |
+
{
|
11429 |
+
"epoch": 0.047671162451949106,
|
11430 |
+
"grad_norm": 4.011190414428711,
|
11431 |
+
"learning_rate": 2.9832096168591128e-05,
|
11432 |
+
"loss": 1.8591,
|
11433 |
+
"step": 24480
|
11434 |
+
},
|
11435 |
+
{
|
11436 |
+
"epoch": 0.04770037272305937,
|
11437 |
+
"grad_norm": 2.326756238937378,
|
11438 |
+
"learning_rate": 2.983189072637657e-05,
|
11439 |
+
"loss": 1.8687,
|
11440 |
+
"step": 24495
|
11441 |
+
},
|
11442 |
+
{
|
11443 |
+
"epoch": 0.04772958299416963,
|
11444 |
+
"grad_norm": 2.1641993522644043,
|
11445 |
+
"learning_rate": 2.9831685159260568e-05,
|
11446 |
+
"loss": 1.9402,
|
11447 |
+
"step": 24510
|
11448 |
+
},
|
11449 |
+
{
|
11450 |
+
"epoch": 0.047758793265279896,
|
11451 |
+
"grad_norm": 2.614429473876953,
|
11452 |
+
"learning_rate": 2.983147946724486e-05,
|
11453 |
+
"loss": 1.8911,
|
11454 |
+
"step": 24525
|
11455 |
+
},
|
11456 |
+
{
|
11457 |
+
"epoch": 0.04778800353639016,
|
11458 |
+
"grad_norm": 2.163760185241699,
|
11459 |
+
"learning_rate": 2.983127365033118e-05,
|
11460 |
+
"loss": 1.7623,
|
11461 |
+
"step": 24540
|
11462 |
+
},
|
11463 |
+
{
|
11464 |
+
"epoch": 0.04781721380750042,
|
11465 |
+
"grad_norm": 1.837815761566162,
|
11466 |
+
"learning_rate": 2.9831067708521257e-05,
|
11467 |
+
"loss": 2.0103,
|
11468 |
+
"step": 24555
|
11469 |
+
},
|
11470 |
+
{
|
11471 |
+
"epoch": 0.04784642407861068,
|
11472 |
+
"grad_norm": 4.629458904266357,
|
11473 |
+
"learning_rate": 2.9830861641816826e-05,
|
11474 |
+
"loss": 1.8473,
|
11475 |
+
"step": 24570
|
11476 |
+
},
|
11477 |
+
{
|
11478 |
+
"epoch": 0.04787563434972095,
|
11479 |
+
"grad_norm": 2.741942882537842,
|
11480 |
+
"learning_rate": 2.9830655450219623e-05,
|
11481 |
+
"loss": 1.8219,
|
11482 |
+
"step": 24585
|
11483 |
+
},
|
11484 |
+
{
|
11485 |
+
"epoch": 0.04790484462083121,
|
11486 |
+
"grad_norm": 2.6273906230926514,
|
11487 |
+
"learning_rate": 2.9830449133731387e-05,
|
11488 |
+
"loss": 1.7688,
|
11489 |
+
"step": 24600
|
11490 |
+
},
|
11491 |
+
{
|
11492 |
+
"epoch": 0.04793405489194147,
|
11493 |
+
"grad_norm": 2.2749216556549072,
|
11494 |
+
"learning_rate": 2.983024269235385e-05,
|
11495 |
+
"loss": 1.9809,
|
11496 |
+
"step": 24615
|
11497 |
+
},
|
11498 |
+
{
|
11499 |
+
"epoch": 0.04796326516305174,
|
11500 |
+
"grad_norm": 4.1970367431640625,
|
11501 |
+
"learning_rate": 2.9830036126088754e-05,
|
11502 |
+
"loss": 1.7998,
|
11503 |
+
"step": 24630
|
11504 |
+
},
|
11505 |
+
{
|
11506 |
+
"epoch": 0.047992475434162,
|
11507 |
+
"grad_norm": 2.87906813621521,
|
11508 |
+
"learning_rate": 2.982982943493784e-05,
|
11509 |
+
"loss": 1.8496,
|
11510 |
+
"step": 24645
|
11511 |
+
},
|
11512 |
+
{
|
11513 |
+
"epoch": 0.04802168570527226,
|
11514 |
+
"grad_norm": 1.9954204559326172,
|
11515 |
+
"learning_rate": 2.9829622618902848e-05,
|
11516 |
+
"loss": 1.8782,
|
11517 |
+
"step": 24660
|
11518 |
+
},
|
11519 |
+
{
|
11520 |
+
"epoch": 0.04805089597638252,
|
11521 |
+
"grad_norm": 3.3936777114868164,
|
11522 |
+
"learning_rate": 2.982941567798551e-05,
|
11523 |
+
"loss": 1.7,
|
11524 |
+
"step": 24675
|
11525 |
+
},
|
11526 |
+
{
|
11527 |
+
"epoch": 0.04808010624749279,
|
11528 |
+
"grad_norm": 5.30580997467041,
|
11529 |
+
"learning_rate": 2.9829208612187585e-05,
|
11530 |
+
"loss": 1.8712,
|
11531 |
+
"step": 24690
|
11532 |
+
},
|
11533 |
+
{
|
11534 |
+
"epoch": 0.04810931651860305,
|
11535 |
+
"grad_norm": 1.797080636024475,
|
11536 |
+
"learning_rate": 2.982900142151081e-05,
|
11537 |
+
"loss": 1.8711,
|
11538 |
+
"step": 24705
|
11539 |
+
},
|
11540 |
+
{
|
11541 |
+
"epoch": 0.04813852678971331,
|
11542 |
+
"grad_norm": 1.7338413000106812,
|
11543 |
+
"learning_rate": 2.9828794105956922e-05,
|
11544 |
+
"loss": 1.9125,
|
11545 |
+
"step": 24720
|
11546 |
+
},
|
11547 |
+
{
|
11548 |
+
"epoch": 0.04816773706082357,
|
11549 |
+
"grad_norm": 6.833633899688721,
|
11550 |
+
"learning_rate": 2.9828586665527677e-05,
|
11551 |
+
"loss": 1.8622,
|
11552 |
+
"step": 24735
|
11553 |
+
},
|
11554 |
+
{
|
11555 |
+
"epoch": 0.04819694733193384,
|
11556 |
+
"grad_norm": 2.8855645656585693,
|
11557 |
+
"learning_rate": 2.9828379100224814e-05,
|
11558 |
+
"loss": 1.7596,
|
11559 |
+
"step": 24750
|
11560 |
+
},
|
11561 |
+
{
|
11562 |
+
"epoch": 0.0482261576030441,
|
11563 |
+
"grad_norm": 3.7425286769866943,
|
11564 |
+
"learning_rate": 2.982817141005009e-05,
|
11565 |
+
"loss": 1.8452,
|
11566 |
+
"step": 24765
|
11567 |
+
},
|
11568 |
+
{
|
11569 |
+
"epoch": 0.04825536787415436,
|
11570 |
+
"grad_norm": 4.187405109405518,
|
11571 |
+
"learning_rate": 2.9827963595005248e-05,
|
11572 |
+
"loss": 1.8669,
|
11573 |
+
"step": 24780
|
11574 |
+
},
|
11575 |
+
{
|
11576 |
+
"epoch": 0.04828457814526463,
|
11577 |
+
"grad_norm": 4.476016998291016,
|
11578 |
+
"learning_rate": 2.982775565509204e-05,
|
11579 |
+
"loss": 1.9221,
|
11580 |
+
"step": 24795
|
11581 |
+
},
|
11582 |
+
{
|
11583 |
+
"epoch": 0.04831378841637489,
|
11584 |
+
"grad_norm": 3.6991500854492188,
|
11585 |
+
"learning_rate": 2.9827547590312213e-05,
|
11586 |
+
"loss": 1.5434,
|
11587 |
+
"step": 24810
|
11588 |
+
},
|
11589 |
+
{
|
11590 |
+
"epoch": 0.04834299868748515,
|
11591 |
+
"grad_norm": 3.0707576274871826,
|
11592 |
+
"learning_rate": 2.9827339400667524e-05,
|
11593 |
+
"loss": 1.88,
|
11594 |
+
"step": 24825
|
11595 |
+
},
|
11596 |
+
{
|
11597 |
+
"epoch": 0.04837220895859541,
|
11598 |
+
"grad_norm": 2.604163646697998,
|
11599 |
+
"learning_rate": 2.9827131086159723e-05,
|
11600 |
+
"loss": 1.9946,
|
11601 |
+
"step": 24840
|
11602 |
+
},
|
11603 |
+
{
|
11604 |
+
"epoch": 0.04840141922970568,
|
11605 |
+
"grad_norm": 4.7181549072265625,
|
11606 |
+
"learning_rate": 2.9826922646790568e-05,
|
11607 |
+
"loss": 1.8807,
|
11608 |
+
"step": 24855
|
11609 |
+
},
|
11610 |
+
{
|
11611 |
+
"epoch": 0.04843062950081594,
|
11612 |
+
"grad_norm": 3.983146905899048,
|
11613 |
+
"learning_rate": 2.9826714082561808e-05,
|
11614 |
+
"loss": 1.8593,
|
11615 |
+
"step": 24870
|
11616 |
+
},
|
11617 |
+
{
|
11618 |
+
"epoch": 0.0484598397719262,
|
11619 |
+
"grad_norm": 4.0342912673950195,
|
11620 |
+
"learning_rate": 2.982650539347521e-05,
|
11621 |
+
"loss": 1.9208,
|
11622 |
+
"step": 24885
|
11623 |
+
},
|
11624 |
+
{
|
11625 |
+
"epoch": 0.04848905004303647,
|
11626 |
+
"grad_norm": 3.092644691467285,
|
11627 |
+
"learning_rate": 2.982629657953252e-05,
|
11628 |
+
"loss": 1.9611,
|
11629 |
+
"step": 24900
|
11630 |
+
},
|
11631 |
+
{
|
11632 |
+
"epoch": 0.04851826031414673,
|
11633 |
+
"grad_norm": 2.244311809539795,
|
11634 |
+
"learning_rate": 2.98260876407355e-05,
|
11635 |
+
"loss": 1.7965,
|
11636 |
+
"step": 24915
|
11637 |
+
},
|
11638 |
+
{
|
11639 |
+
"epoch": 0.04854747058525699,
|
11640 |
+
"grad_norm": 4.740140914916992,
|
11641 |
+
"learning_rate": 2.9825878577085917e-05,
|
11642 |
+
"loss": 2.0418,
|
11643 |
+
"step": 24930
|
11644 |
+
},
|
11645 |
+
{
|
11646 |
+
"epoch": 0.04857668085636725,
|
11647 |
+
"grad_norm": 1.6131473779678345,
|
11648 |
+
"learning_rate": 2.9825669388585523e-05,
|
11649 |
+
"loss": 1.7781,
|
11650 |
+
"step": 24945
|
11651 |
+
},
|
11652 |
+
{
|
11653 |
+
"epoch": 0.04860589112747752,
|
11654 |
+
"grad_norm": 3.3024797439575195,
|
11655 |
+
"learning_rate": 2.9825460075236077e-05,
|
11656 |
+
"loss": 1.9176,
|
11657 |
+
"step": 24960
|
11658 |
+
},
|
11659 |
+
{
|
11660 |
+
"epoch": 0.04863510139858778,
|
11661 |
+
"grad_norm": 3.6958138942718506,
|
11662 |
+
"learning_rate": 2.9825250637039348e-05,
|
11663 |
+
"loss": 1.8613,
|
11664 |
+
"step": 24975
|
11665 |
+
},
|
11666 |
+
{
|
11667 |
+
"epoch": 0.04866431166969804,
|
11668 |
+
"grad_norm": 2.277597427368164,
|
11669 |
+
"learning_rate": 2.9825041073997102e-05,
|
11670 |
+
"loss": 1.9942,
|
11671 |
+
"step": 24990
|
11672 |
}
|
11673 |
],
|
11674 |
"logging_steps": 15,
|
|
|
11688 |
"attributes": {}
|
11689 |
}
|
11690 |
},
|
11691 |
+
"total_flos": 3.955653771590246e+16,
|
11692 |
"train_batch_size": 4,
|
11693 |
"trial_name": null,
|
11694 |
"trial_params": null
|