gabrielaltay
commited on
Commit
•
1203cbb
1
Parent(s):
2809865
Training in progress, step 18216, checkpoint
Browse files
last-checkpoint/model.safetensors
CHANGED
@@ -1,3 +1,3 @@
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
-
oid sha256:
|
3 |
size 500389884
|
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:da8378f58afdcbb61da5aab016c87b360ba3b97ce5895a57c14de5629f844194
|
3 |
size 500389884
|
last-checkpoint/optimizer.pt
CHANGED
@@ -1,3 +1,3 @@
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
-
oid sha256:
|
3 |
size 1000900218
|
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:d95db74dc8b3eeeb846cb4c4e9addbe7241924481ae96c22081d519fcd26cbfc
|
3 |
size 1000900218
|
last-checkpoint/rng_state_0.pth
CHANGED
@@ -1,3 +1,3 @@
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
-
oid sha256:
|
3 |
size 14512
|
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:12f61f7e51cc603cdd137a87be15b659d00af22f66c4e5d529fd92a34ec1a2fc
|
3 |
size 14512
|
last-checkpoint/rng_state_1.pth
CHANGED
@@ -1,3 +1,3 @@
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
-
oid sha256:
|
3 |
size 14512
|
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:a04bf168981df42daaf3298864d910e1018e869d302c5844d03dbfe8eb4c3e75
|
3 |
size 14512
|
last-checkpoint/scheduler.pt
CHANGED
@@ -1,3 +1,3 @@
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
-
oid sha256:
|
3 |
size 1064
|
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:0d863b1e8dd7aadad275d56a5330082a7af839c16da6482be7ffc6e9ccd5d7a6
|
3 |
size 1064
|
last-checkpoint/trainer_state.json
CHANGED
@@ -1,9 +1,9 @@
|
|
1 |
{
|
2 |
"best_metric": null,
|
3 |
"best_model_checkpoint": null,
|
4 |
-
"epoch": 0.
|
5 |
"eval_steps": 500,
|
6 |
-
"global_step":
|
7 |
"is_hyper_param_search": false,
|
8 |
"is_local_process_zero": true,
|
9 |
"is_world_process_zero": true,
|
@@ -28343,6 +28343,3548 @@
|
|
28343 |
"learning_rate": 1.0007905528929295e-05,
|
28344 |
"loss": 5.4911,
|
28345 |
"step": 16192
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
28346 |
}
|
28347 |
],
|
28348 |
"logging_steps": 4,
|
@@ -28350,7 +31892,7 @@
|
|
28350 |
"num_input_tokens_seen": 0,
|
28351 |
"num_train_epochs": 1,
|
28352 |
"save_steps": 2024,
|
28353 |
-
"total_flos":
|
28354 |
"train_batch_size": 8,
|
28355 |
"trial_name": null,
|
28356 |
"trial_params": null
|
|
|
1 |
{
|
2 |
"best_metric": null,
|
3 |
"best_model_checkpoint": null,
|
4 |
+
"epoch": 0.9000444686002272,
|
5 |
"eval_steps": 500,
|
6 |
+
"global_step": 18216,
|
7 |
"is_hyper_param_search": false,
|
8 |
"is_local_process_zero": true,
|
9 |
"is_world_process_zero": true,
|
|
|
28343 |
"learning_rate": 1.0007905528929295e-05,
|
28344 |
"loss": 5.4911,
|
28345 |
"step": 16192
|
28346 |
+
},
|
28347 |
+
{
|
28348 |
+
"epoch": 0.8,
|
28349 |
+
"grad_norm": 2.1323323249816895,
|
28350 |
+
"learning_rate": 9.998023617767677e-06,
|
28351 |
+
"loss": 5.5276,
|
28352 |
+
"step": 16196
|
28353 |
+
},
|
28354 |
+
{
|
28355 |
+
"epoch": 0.8,
|
28356 |
+
"grad_norm": 2.277160167694092,
|
28357 |
+
"learning_rate": 9.988141706606058e-06,
|
28358 |
+
"loss": 5.4718,
|
28359 |
+
"step": 16200
|
28360 |
+
},
|
28361 |
+
{
|
28362 |
+
"epoch": 0.8,
|
28363 |
+
"grad_norm": 1.8769733905792236,
|
28364 |
+
"learning_rate": 9.97825979544444e-06,
|
28365 |
+
"loss": 5.3622,
|
28366 |
+
"step": 16204
|
28367 |
+
},
|
28368 |
+
{
|
28369 |
+
"epoch": 0.8,
|
28370 |
+
"grad_norm": 1.8731465339660645,
|
28371 |
+
"learning_rate": 9.96837788428282e-06,
|
28372 |
+
"loss": 5.5215,
|
28373 |
+
"step": 16208
|
28374 |
+
},
|
28375 |
+
{
|
28376 |
+
"epoch": 0.8,
|
28377 |
+
"grad_norm": 1.8808999061584473,
|
28378 |
+
"learning_rate": 9.958495973121202e-06,
|
28379 |
+
"loss": 5.3953,
|
28380 |
+
"step": 16212
|
28381 |
+
},
|
28382 |
+
{
|
28383 |
+
"epoch": 0.8,
|
28384 |
+
"grad_norm": 1.8657152652740479,
|
28385 |
+
"learning_rate": 9.948614061959584e-06,
|
28386 |
+
"loss": 5.3316,
|
28387 |
+
"step": 16216
|
28388 |
+
},
|
28389 |
+
{
|
28390 |
+
"epoch": 0.8,
|
28391 |
+
"grad_norm": 1.849173665046692,
|
28392 |
+
"learning_rate": 9.938732150797965e-06,
|
28393 |
+
"loss": 5.4471,
|
28394 |
+
"step": 16220
|
28395 |
+
},
|
28396 |
+
{
|
28397 |
+
"epoch": 0.8,
|
28398 |
+
"grad_norm": 2.220717668533325,
|
28399 |
+
"learning_rate": 9.928850239636347e-06,
|
28400 |
+
"loss": 5.5204,
|
28401 |
+
"step": 16224
|
28402 |
+
},
|
28403 |
+
{
|
28404 |
+
"epoch": 0.8,
|
28405 |
+
"grad_norm": 2.0210342407226562,
|
28406 |
+
"learning_rate": 9.918968328474727e-06,
|
28407 |
+
"loss": 5.3364,
|
28408 |
+
"step": 16228
|
28409 |
+
},
|
28410 |
+
{
|
28411 |
+
"epoch": 0.8,
|
28412 |
+
"grad_norm": 1.9695372581481934,
|
28413 |
+
"learning_rate": 9.909086417313109e-06,
|
28414 |
+
"loss": 5.485,
|
28415 |
+
"step": 16232
|
28416 |
+
},
|
28417 |
+
{
|
28418 |
+
"epoch": 0.8,
|
28419 |
+
"grad_norm": 1.875001311302185,
|
28420 |
+
"learning_rate": 9.89920450615149e-06,
|
28421 |
+
"loss": 5.3977,
|
28422 |
+
"step": 16236
|
28423 |
+
},
|
28424 |
+
{
|
28425 |
+
"epoch": 0.8,
|
28426 |
+
"grad_norm": 2.136852741241455,
|
28427 |
+
"learning_rate": 9.889322594989871e-06,
|
28428 |
+
"loss": 5.5784,
|
28429 |
+
"step": 16240
|
28430 |
+
},
|
28431 |
+
{
|
28432 |
+
"epoch": 0.8,
|
28433 |
+
"grad_norm": 1.8972970247268677,
|
28434 |
+
"learning_rate": 9.879440683828253e-06,
|
28435 |
+
"loss": 5.4973,
|
28436 |
+
"step": 16244
|
28437 |
+
},
|
28438 |
+
{
|
28439 |
+
"epoch": 0.8,
|
28440 |
+
"grad_norm": 2.14034366607666,
|
28441 |
+
"learning_rate": 9.869558772666634e-06,
|
28442 |
+
"loss": 5.546,
|
28443 |
+
"step": 16248
|
28444 |
+
},
|
28445 |
+
{
|
28446 |
+
"epoch": 0.8,
|
28447 |
+
"grad_norm": 2.0028293132781982,
|
28448 |
+
"learning_rate": 9.859676861505016e-06,
|
28449 |
+
"loss": 5.3869,
|
28450 |
+
"step": 16252
|
28451 |
+
},
|
28452 |
+
{
|
28453 |
+
"epoch": 0.8,
|
28454 |
+
"grad_norm": 1.9485490322113037,
|
28455 |
+
"learning_rate": 9.849794950343396e-06,
|
28456 |
+
"loss": 5.4852,
|
28457 |
+
"step": 16256
|
28458 |
+
},
|
28459 |
+
{
|
28460 |
+
"epoch": 0.8,
|
28461 |
+
"grad_norm": 1.9260238409042358,
|
28462 |
+
"learning_rate": 9.839913039181778e-06,
|
28463 |
+
"loss": 5.473,
|
28464 |
+
"step": 16260
|
28465 |
+
},
|
28466 |
+
{
|
28467 |
+
"epoch": 0.8,
|
28468 |
+
"grad_norm": 2.1630001068115234,
|
28469 |
+
"learning_rate": 9.830031128020159e-06,
|
28470 |
+
"loss": 5.4207,
|
28471 |
+
"step": 16264
|
28472 |
+
},
|
28473 |
+
{
|
28474 |
+
"epoch": 0.8,
|
28475 |
+
"grad_norm": 1.9328508377075195,
|
28476 |
+
"learning_rate": 9.82014921685854e-06,
|
28477 |
+
"loss": 5.4433,
|
28478 |
+
"step": 16268
|
28479 |
+
},
|
28480 |
+
{
|
28481 |
+
"epoch": 0.8,
|
28482 |
+
"grad_norm": 1.9832642078399658,
|
28483 |
+
"learning_rate": 9.810267305696923e-06,
|
28484 |
+
"loss": 5.4387,
|
28485 |
+
"step": 16272
|
28486 |
+
},
|
28487 |
+
{
|
28488 |
+
"epoch": 0.8,
|
28489 |
+
"grad_norm": 2.0616111755371094,
|
28490 |
+
"learning_rate": 9.800385394535303e-06,
|
28491 |
+
"loss": 5.3895,
|
28492 |
+
"step": 16276
|
28493 |
+
},
|
28494 |
+
{
|
28495 |
+
"epoch": 0.8,
|
28496 |
+
"grad_norm": 2.1709606647491455,
|
28497 |
+
"learning_rate": 9.790503483373685e-06,
|
28498 |
+
"loss": 5.5639,
|
28499 |
+
"step": 16280
|
28500 |
+
},
|
28501 |
+
{
|
28502 |
+
"epoch": 0.8,
|
28503 |
+
"grad_norm": 2.1553335189819336,
|
28504 |
+
"learning_rate": 9.780621572212066e-06,
|
28505 |
+
"loss": 5.5419,
|
28506 |
+
"step": 16284
|
28507 |
+
},
|
28508 |
+
{
|
28509 |
+
"epoch": 0.8,
|
28510 |
+
"grad_norm": 1.7869793176651,
|
28511 |
+
"learning_rate": 9.770739661050448e-06,
|
28512 |
+
"loss": 5.4425,
|
28513 |
+
"step": 16288
|
28514 |
+
},
|
28515 |
+
{
|
28516 |
+
"epoch": 0.8,
|
28517 |
+
"grad_norm": 1.902388095855713,
|
28518 |
+
"learning_rate": 9.760857749888828e-06,
|
28519 |
+
"loss": 5.3651,
|
28520 |
+
"step": 16292
|
28521 |
+
},
|
28522 |
+
{
|
28523 |
+
"epoch": 0.81,
|
28524 |
+
"grad_norm": 2.135723829269409,
|
28525 |
+
"learning_rate": 9.750975838727212e-06,
|
28526 |
+
"loss": 5.5838,
|
28527 |
+
"step": 16296
|
28528 |
+
},
|
28529 |
+
{
|
28530 |
+
"epoch": 0.81,
|
28531 |
+
"grad_norm": 2.3749701976776123,
|
28532 |
+
"learning_rate": 9.741093927565592e-06,
|
28533 |
+
"loss": 5.5562,
|
28534 |
+
"step": 16300
|
28535 |
+
},
|
28536 |
+
{
|
28537 |
+
"epoch": 0.81,
|
28538 |
+
"grad_norm": 1.9421731233596802,
|
28539 |
+
"learning_rate": 9.731212016403973e-06,
|
28540 |
+
"loss": 5.432,
|
28541 |
+
"step": 16304
|
28542 |
+
},
|
28543 |
+
{
|
28544 |
+
"epoch": 0.81,
|
28545 |
+
"grad_norm": 1.87296462059021,
|
28546 |
+
"learning_rate": 9.721330105242355e-06,
|
28547 |
+
"loss": 5.5429,
|
28548 |
+
"step": 16308
|
28549 |
+
},
|
28550 |
+
{
|
28551 |
+
"epoch": 0.81,
|
28552 |
+
"grad_norm": 2.0159077644348145,
|
28553 |
+
"learning_rate": 9.711448194080735e-06,
|
28554 |
+
"loss": 5.5088,
|
28555 |
+
"step": 16312
|
28556 |
+
},
|
28557 |
+
{
|
28558 |
+
"epoch": 0.81,
|
28559 |
+
"grad_norm": 1.9539657831192017,
|
28560 |
+
"learning_rate": 9.701566282919117e-06,
|
28561 |
+
"loss": 5.49,
|
28562 |
+
"step": 16316
|
28563 |
+
},
|
28564 |
+
{
|
28565 |
+
"epoch": 0.81,
|
28566 |
+
"grad_norm": 2.0840256214141846,
|
28567 |
+
"learning_rate": 9.691684371757497e-06,
|
28568 |
+
"loss": 5.4972,
|
28569 |
+
"step": 16320
|
28570 |
+
},
|
28571 |
+
{
|
28572 |
+
"epoch": 0.81,
|
28573 |
+
"grad_norm": 2.0817222595214844,
|
28574 |
+
"learning_rate": 9.68180246059588e-06,
|
28575 |
+
"loss": 5.5185,
|
28576 |
+
"step": 16324
|
28577 |
+
},
|
28578 |
+
{
|
28579 |
+
"epoch": 0.81,
|
28580 |
+
"grad_norm": 2.172551393508911,
|
28581 |
+
"learning_rate": 9.671920549434262e-06,
|
28582 |
+
"loss": 5.5465,
|
28583 |
+
"step": 16328
|
28584 |
+
},
|
28585 |
+
{
|
28586 |
+
"epoch": 0.81,
|
28587 |
+
"grad_norm": 1.8779146671295166,
|
28588 |
+
"learning_rate": 9.662038638272644e-06,
|
28589 |
+
"loss": 5.5047,
|
28590 |
+
"step": 16332
|
28591 |
+
},
|
28592 |
+
{
|
28593 |
+
"epoch": 0.81,
|
28594 |
+
"grad_norm": 1.9017332792282104,
|
28595 |
+
"learning_rate": 9.652156727111024e-06,
|
28596 |
+
"loss": 5.4705,
|
28597 |
+
"step": 16336
|
28598 |
+
},
|
28599 |
+
{
|
28600 |
+
"epoch": 0.81,
|
28601 |
+
"grad_norm": 2.0198209285736084,
|
28602 |
+
"learning_rate": 9.642274815949404e-06,
|
28603 |
+
"loss": 5.4886,
|
28604 |
+
"step": 16340
|
28605 |
+
},
|
28606 |
+
{
|
28607 |
+
"epoch": 0.81,
|
28608 |
+
"grad_norm": 2.0686516761779785,
|
28609 |
+
"learning_rate": 9.632392904787786e-06,
|
28610 |
+
"loss": 5.5188,
|
28611 |
+
"step": 16344
|
28612 |
+
},
|
28613 |
+
{
|
28614 |
+
"epoch": 0.81,
|
28615 |
+
"grad_norm": 1.962902545928955,
|
28616 |
+
"learning_rate": 9.622510993626167e-06,
|
28617 |
+
"loss": 5.5414,
|
28618 |
+
"step": 16348
|
28619 |
+
},
|
28620 |
+
{
|
28621 |
+
"epoch": 0.81,
|
28622 |
+
"grad_norm": 2.0159122943878174,
|
28623 |
+
"learning_rate": 9.612629082464549e-06,
|
28624 |
+
"loss": 5.422,
|
28625 |
+
"step": 16352
|
28626 |
+
},
|
28627 |
+
{
|
28628 |
+
"epoch": 0.81,
|
28629 |
+
"grad_norm": 2.2383811473846436,
|
28630 |
+
"learning_rate": 9.60274717130293e-06,
|
28631 |
+
"loss": 5.5384,
|
28632 |
+
"step": 16356
|
28633 |
+
},
|
28634 |
+
{
|
28635 |
+
"epoch": 0.81,
|
28636 |
+
"grad_norm": 1.993220567703247,
|
28637 |
+
"learning_rate": 9.592865260141313e-06,
|
28638 |
+
"loss": 5.3551,
|
28639 |
+
"step": 16360
|
28640 |
+
},
|
28641 |
+
{
|
28642 |
+
"epoch": 0.81,
|
28643 |
+
"grad_norm": 2.130995035171509,
|
28644 |
+
"learning_rate": 9.582983348979693e-06,
|
28645 |
+
"loss": 5.4926,
|
28646 |
+
"step": 16364
|
28647 |
+
},
|
28648 |
+
{
|
28649 |
+
"epoch": 0.81,
|
28650 |
+
"grad_norm": 1.9019581079483032,
|
28651 |
+
"learning_rate": 9.573101437818074e-06,
|
28652 |
+
"loss": 5.5623,
|
28653 |
+
"step": 16368
|
28654 |
+
},
|
28655 |
+
{
|
28656 |
+
"epoch": 0.81,
|
28657 |
+
"grad_norm": 2.1175732612609863,
|
28658 |
+
"learning_rate": 9.563219526656456e-06,
|
28659 |
+
"loss": 5.4529,
|
28660 |
+
"step": 16372
|
28661 |
+
},
|
28662 |
+
{
|
28663 |
+
"epoch": 0.81,
|
28664 |
+
"grad_norm": 1.8680050373077393,
|
28665 |
+
"learning_rate": 9.553337615494836e-06,
|
28666 |
+
"loss": 5.4273,
|
28667 |
+
"step": 16376
|
28668 |
+
},
|
28669 |
+
{
|
28670 |
+
"epoch": 0.81,
|
28671 |
+
"grad_norm": 2.0988821983337402,
|
28672 |
+
"learning_rate": 9.543455704333218e-06,
|
28673 |
+
"loss": 5.477,
|
28674 |
+
"step": 16380
|
28675 |
+
},
|
28676 |
+
{
|
28677 |
+
"epoch": 0.81,
|
28678 |
+
"grad_norm": 2.198651075363159,
|
28679 |
+
"learning_rate": 9.5335737931716e-06,
|
28680 |
+
"loss": 5.5427,
|
28681 |
+
"step": 16384
|
28682 |
+
},
|
28683 |
+
{
|
28684 |
+
"epoch": 0.81,
|
28685 |
+
"grad_norm": 2.1054868698120117,
|
28686 |
+
"learning_rate": 9.523691882009982e-06,
|
28687 |
+
"loss": 5.4173,
|
28688 |
+
"step": 16388
|
28689 |
+
},
|
28690 |
+
{
|
28691 |
+
"epoch": 0.81,
|
28692 |
+
"grad_norm": 2.0211637020111084,
|
28693 |
+
"learning_rate": 9.513809970848363e-06,
|
28694 |
+
"loss": 5.6035,
|
28695 |
+
"step": 16392
|
28696 |
+
},
|
28697 |
+
{
|
28698 |
+
"epoch": 0.81,
|
28699 |
+
"grad_norm": 2.0547540187835693,
|
28700 |
+
"learning_rate": 9.503928059686745e-06,
|
28701 |
+
"loss": 5.4983,
|
28702 |
+
"step": 16396
|
28703 |
+
},
|
28704 |
+
{
|
28705 |
+
"epoch": 0.81,
|
28706 |
+
"grad_norm": 1.9957647323608398,
|
28707 |
+
"learning_rate": 9.494046148525125e-06,
|
28708 |
+
"loss": 5.5374,
|
28709 |
+
"step": 16400
|
28710 |
+
},
|
28711 |
+
{
|
28712 |
+
"epoch": 0.81,
|
28713 |
+
"grad_norm": 2.0535998344421387,
|
28714 |
+
"learning_rate": 9.484164237363505e-06,
|
28715 |
+
"loss": 5.5643,
|
28716 |
+
"step": 16404
|
28717 |
+
},
|
28718 |
+
{
|
28719 |
+
"epoch": 0.81,
|
28720 |
+
"grad_norm": 2.1046228408813477,
|
28721 |
+
"learning_rate": 9.474282326201887e-06,
|
28722 |
+
"loss": 5.4727,
|
28723 |
+
"step": 16408
|
28724 |
+
},
|
28725 |
+
{
|
28726 |
+
"epoch": 0.81,
|
28727 |
+
"grad_norm": 2.1698873043060303,
|
28728 |
+
"learning_rate": 9.46440041504027e-06,
|
28729 |
+
"loss": 5.5637,
|
28730 |
+
"step": 16412
|
28731 |
+
},
|
28732 |
+
{
|
28733 |
+
"epoch": 0.81,
|
28734 |
+
"grad_norm": 1.9448730945587158,
|
28735 |
+
"learning_rate": 9.454518503878652e-06,
|
28736 |
+
"loss": 5.4932,
|
28737 |
+
"step": 16416
|
28738 |
+
},
|
28739 |
+
{
|
28740 |
+
"epoch": 0.81,
|
28741 |
+
"grad_norm": 2.0663201808929443,
|
28742 |
+
"learning_rate": 9.444636592717032e-06,
|
28743 |
+
"loss": 5.5067,
|
28744 |
+
"step": 16420
|
28745 |
+
},
|
28746 |
+
{
|
28747 |
+
"epoch": 0.81,
|
28748 |
+
"grad_norm": 1.8125361204147339,
|
28749 |
+
"learning_rate": 9.434754681555414e-06,
|
28750 |
+
"loss": 5.5381,
|
28751 |
+
"step": 16424
|
28752 |
+
},
|
28753 |
+
{
|
28754 |
+
"epoch": 0.81,
|
28755 |
+
"grad_norm": 2.077420473098755,
|
28756 |
+
"learning_rate": 9.424872770393794e-06,
|
28757 |
+
"loss": 5.5642,
|
28758 |
+
"step": 16428
|
28759 |
+
},
|
28760 |
+
{
|
28761 |
+
"epoch": 0.81,
|
28762 |
+
"grad_norm": 1.9312140941619873,
|
28763 |
+
"learning_rate": 9.414990859232176e-06,
|
28764 |
+
"loss": 5.3661,
|
28765 |
+
"step": 16432
|
28766 |
+
},
|
28767 |
+
{
|
28768 |
+
"epoch": 0.81,
|
28769 |
+
"grad_norm": 1.8529763221740723,
|
28770 |
+
"learning_rate": 9.405108948070557e-06,
|
28771 |
+
"loss": 5.4776,
|
28772 |
+
"step": 16436
|
28773 |
+
},
|
28774 |
+
{
|
28775 |
+
"epoch": 0.81,
|
28776 |
+
"grad_norm": 1.8764407634735107,
|
28777 |
+
"learning_rate": 9.395227036908939e-06,
|
28778 |
+
"loss": 5.4928,
|
28779 |
+
"step": 16440
|
28780 |
+
},
|
28781 |
+
{
|
28782 |
+
"epoch": 0.81,
|
28783 |
+
"grad_norm": 2.139594316482544,
|
28784 |
+
"learning_rate": 9.385345125747321e-06,
|
28785 |
+
"loss": 5.4108,
|
28786 |
+
"step": 16444
|
28787 |
+
},
|
28788 |
+
{
|
28789 |
+
"epoch": 0.81,
|
28790 |
+
"grad_norm": 2.1105079650878906,
|
28791 |
+
"learning_rate": 9.375463214585701e-06,
|
28792 |
+
"loss": 5.4876,
|
28793 |
+
"step": 16448
|
28794 |
+
},
|
28795 |
+
{
|
28796 |
+
"epoch": 0.81,
|
28797 |
+
"grad_norm": 1.9484140872955322,
|
28798 |
+
"learning_rate": 9.365581303424083e-06,
|
28799 |
+
"loss": 5.4148,
|
28800 |
+
"step": 16452
|
28801 |
+
},
|
28802 |
+
{
|
28803 |
+
"epoch": 0.81,
|
28804 |
+
"grad_norm": 2.0571391582489014,
|
28805 |
+
"learning_rate": 9.355699392262464e-06,
|
28806 |
+
"loss": 5.6282,
|
28807 |
+
"step": 16456
|
28808 |
+
},
|
28809 |
+
{
|
28810 |
+
"epoch": 0.81,
|
28811 |
+
"grad_norm": 1.7625576257705688,
|
28812 |
+
"learning_rate": 9.345817481100846e-06,
|
28813 |
+
"loss": 5.5073,
|
28814 |
+
"step": 16460
|
28815 |
+
},
|
28816 |
+
{
|
28817 |
+
"epoch": 0.81,
|
28818 |
+
"grad_norm": 2.1183722019195557,
|
28819 |
+
"learning_rate": 9.335935569939226e-06,
|
28820 |
+
"loss": 5.4659,
|
28821 |
+
"step": 16464
|
28822 |
+
},
|
28823 |
+
{
|
28824 |
+
"epoch": 0.81,
|
28825 |
+
"grad_norm": 2.135255813598633,
|
28826 |
+
"learning_rate": 9.326053658777608e-06,
|
28827 |
+
"loss": 5.4262,
|
28828 |
+
"step": 16468
|
28829 |
+
},
|
28830 |
+
{
|
28831 |
+
"epoch": 0.81,
|
28832 |
+
"grad_norm": 1.7497916221618652,
|
28833 |
+
"learning_rate": 9.31617174761599e-06,
|
28834 |
+
"loss": 5.537,
|
28835 |
+
"step": 16472
|
28836 |
+
},
|
28837 |
+
{
|
28838 |
+
"epoch": 0.81,
|
28839 |
+
"grad_norm": 1.9297901391983032,
|
28840 |
+
"learning_rate": 9.30628983645437e-06,
|
28841 |
+
"loss": 5.4848,
|
28842 |
+
"step": 16476
|
28843 |
+
},
|
28844 |
+
{
|
28845 |
+
"epoch": 0.81,
|
28846 |
+
"grad_norm": 1.992133617401123,
|
28847 |
+
"learning_rate": 9.296407925292753e-06,
|
28848 |
+
"loss": 5.5307,
|
28849 |
+
"step": 16480
|
28850 |
+
},
|
28851 |
+
{
|
28852 |
+
"epoch": 0.81,
|
28853 |
+
"grad_norm": 1.8346421718597412,
|
28854 |
+
"learning_rate": 9.286526014131133e-06,
|
28855 |
+
"loss": 5.4071,
|
28856 |
+
"step": 16484
|
28857 |
+
},
|
28858 |
+
{
|
28859 |
+
"epoch": 0.81,
|
28860 |
+
"grad_norm": 1.9083247184753418,
|
28861 |
+
"learning_rate": 9.276644102969515e-06,
|
28862 |
+
"loss": 5.4248,
|
28863 |
+
"step": 16488
|
28864 |
+
},
|
28865 |
+
{
|
28866 |
+
"epoch": 0.81,
|
28867 |
+
"grad_norm": 2.0954606533050537,
|
28868 |
+
"learning_rate": 9.266762191807895e-06,
|
28869 |
+
"loss": 5.5175,
|
28870 |
+
"step": 16492
|
28871 |
+
},
|
28872 |
+
{
|
28873 |
+
"epoch": 0.82,
|
28874 |
+
"grad_norm": 2.101158857345581,
|
28875 |
+
"learning_rate": 9.256880280646278e-06,
|
28876 |
+
"loss": 5.5345,
|
28877 |
+
"step": 16496
|
28878 |
+
},
|
28879 |
+
{
|
28880 |
+
"epoch": 0.82,
|
28881 |
+
"grad_norm": 2.170283794403076,
|
28882 |
+
"learning_rate": 9.24699836948466e-06,
|
28883 |
+
"loss": 5.5719,
|
28884 |
+
"step": 16500
|
28885 |
+
},
|
28886 |
+
{
|
28887 |
+
"epoch": 0.82,
|
28888 |
+
"grad_norm": 2.078697681427002,
|
28889 |
+
"learning_rate": 9.23711645832304e-06,
|
28890 |
+
"loss": 5.4199,
|
28891 |
+
"step": 16504
|
28892 |
+
},
|
28893 |
+
{
|
28894 |
+
"epoch": 0.82,
|
28895 |
+
"grad_norm": 2.1254682540893555,
|
28896 |
+
"learning_rate": 9.227234547161422e-06,
|
28897 |
+
"loss": 5.3647,
|
28898 |
+
"step": 16508
|
28899 |
+
},
|
28900 |
+
{
|
28901 |
+
"epoch": 0.82,
|
28902 |
+
"grad_norm": 2.2468132972717285,
|
28903 |
+
"learning_rate": 9.217352635999802e-06,
|
28904 |
+
"loss": 5.4673,
|
28905 |
+
"step": 16512
|
28906 |
+
},
|
28907 |
+
{
|
28908 |
+
"epoch": 0.82,
|
28909 |
+
"grad_norm": 2.053579330444336,
|
28910 |
+
"learning_rate": 9.207470724838184e-06,
|
28911 |
+
"loss": 5.4083,
|
28912 |
+
"step": 16516
|
28913 |
+
},
|
28914 |
+
{
|
28915 |
+
"epoch": 0.82,
|
28916 |
+
"grad_norm": 2.2088301181793213,
|
28917 |
+
"learning_rate": 9.197588813676565e-06,
|
28918 |
+
"loss": 5.5694,
|
28919 |
+
"step": 16520
|
28920 |
+
},
|
28921 |
+
{
|
28922 |
+
"epoch": 0.82,
|
28923 |
+
"grad_norm": 1.9974719285964966,
|
28924 |
+
"learning_rate": 9.187706902514947e-06,
|
28925 |
+
"loss": 5.4396,
|
28926 |
+
"step": 16524
|
28927 |
+
},
|
28928 |
+
{
|
28929 |
+
"epoch": 0.82,
|
28930 |
+
"grad_norm": 2.066420555114746,
|
28931 |
+
"learning_rate": 9.177824991353329e-06,
|
28932 |
+
"loss": 5.5418,
|
28933 |
+
"step": 16528
|
28934 |
+
},
|
28935 |
+
{
|
28936 |
+
"epoch": 0.82,
|
28937 |
+
"grad_norm": 2.2263917922973633,
|
28938 |
+
"learning_rate": 9.16794308019171e-06,
|
28939 |
+
"loss": 5.4904,
|
28940 |
+
"step": 16532
|
28941 |
+
},
|
28942 |
+
{
|
28943 |
+
"epoch": 0.82,
|
28944 |
+
"grad_norm": 2.0956313610076904,
|
28945 |
+
"learning_rate": 9.158061169030091e-06,
|
28946 |
+
"loss": 5.47,
|
28947 |
+
"step": 16536
|
28948 |
+
},
|
28949 |
+
{
|
28950 |
+
"epoch": 0.82,
|
28951 |
+
"grad_norm": 2.120701551437378,
|
28952 |
+
"learning_rate": 9.148179257868472e-06,
|
28953 |
+
"loss": 5.4498,
|
28954 |
+
"step": 16540
|
28955 |
+
},
|
28956 |
+
{
|
28957 |
+
"epoch": 0.82,
|
28958 |
+
"grad_norm": 2.068040609359741,
|
28959 |
+
"learning_rate": 9.138297346706854e-06,
|
28960 |
+
"loss": 5.5373,
|
28961 |
+
"step": 16544
|
28962 |
+
},
|
28963 |
+
{
|
28964 |
+
"epoch": 0.82,
|
28965 |
+
"grad_norm": 2.125322103500366,
|
28966 |
+
"learning_rate": 9.128415435545234e-06,
|
28967 |
+
"loss": 5.4782,
|
28968 |
+
"step": 16548
|
28969 |
+
},
|
28970 |
+
{
|
28971 |
+
"epoch": 0.82,
|
28972 |
+
"grad_norm": 1.8984034061431885,
|
28973 |
+
"learning_rate": 9.118533524383616e-06,
|
28974 |
+
"loss": 5.5614,
|
28975 |
+
"step": 16552
|
28976 |
+
},
|
28977 |
+
{
|
28978 |
+
"epoch": 0.82,
|
28979 |
+
"grad_norm": 2.038201332092285,
|
28980 |
+
"learning_rate": 9.108651613221998e-06,
|
28981 |
+
"loss": 5.5308,
|
28982 |
+
"step": 16556
|
28983 |
+
},
|
28984 |
+
{
|
28985 |
+
"epoch": 0.82,
|
28986 |
+
"grad_norm": 2.083704948425293,
|
28987 |
+
"learning_rate": 9.098769702060379e-06,
|
28988 |
+
"loss": 5.3986,
|
28989 |
+
"step": 16560
|
28990 |
+
},
|
28991 |
+
{
|
28992 |
+
"epoch": 0.82,
|
28993 |
+
"grad_norm": 2.2222559452056885,
|
28994 |
+
"learning_rate": 9.08888779089876e-06,
|
28995 |
+
"loss": 5.4164,
|
28996 |
+
"step": 16564
|
28997 |
+
},
|
28998 |
+
{
|
28999 |
+
"epoch": 0.82,
|
29000 |
+
"grad_norm": 2.319937229156494,
|
29001 |
+
"learning_rate": 9.079005879737141e-06,
|
29002 |
+
"loss": 5.5564,
|
29003 |
+
"step": 16568
|
29004 |
+
},
|
29005 |
+
{
|
29006 |
+
"epoch": 0.82,
|
29007 |
+
"grad_norm": 1.930305004119873,
|
29008 |
+
"learning_rate": 9.069123968575523e-06,
|
29009 |
+
"loss": 5.5181,
|
29010 |
+
"step": 16572
|
29011 |
+
},
|
29012 |
+
{
|
29013 |
+
"epoch": 0.82,
|
29014 |
+
"grad_norm": 2.175090789794922,
|
29015 |
+
"learning_rate": 9.059242057413904e-06,
|
29016 |
+
"loss": 5.4816,
|
29017 |
+
"step": 16576
|
29018 |
+
},
|
29019 |
+
{
|
29020 |
+
"epoch": 0.82,
|
29021 |
+
"grad_norm": 1.9041398763656616,
|
29022 |
+
"learning_rate": 9.049360146252286e-06,
|
29023 |
+
"loss": 5.4462,
|
29024 |
+
"step": 16580
|
29025 |
+
},
|
29026 |
+
{
|
29027 |
+
"epoch": 0.82,
|
29028 |
+
"grad_norm": 1.92721426486969,
|
29029 |
+
"learning_rate": 9.039478235090668e-06,
|
29030 |
+
"loss": 5.4993,
|
29031 |
+
"step": 16584
|
29032 |
+
},
|
29033 |
+
{
|
29034 |
+
"epoch": 0.82,
|
29035 |
+
"grad_norm": 2.098320484161377,
|
29036 |
+
"learning_rate": 9.029596323929048e-06,
|
29037 |
+
"loss": 5.4334,
|
29038 |
+
"step": 16588
|
29039 |
+
},
|
29040 |
+
{
|
29041 |
+
"epoch": 0.82,
|
29042 |
+
"grad_norm": 1.988296389579773,
|
29043 |
+
"learning_rate": 9.01971441276743e-06,
|
29044 |
+
"loss": 5.5241,
|
29045 |
+
"step": 16592
|
29046 |
+
},
|
29047 |
+
{
|
29048 |
+
"epoch": 0.82,
|
29049 |
+
"grad_norm": 1.9492675065994263,
|
29050 |
+
"learning_rate": 9.00983250160581e-06,
|
29051 |
+
"loss": 5.4024,
|
29052 |
+
"step": 16596
|
29053 |
+
},
|
29054 |
+
{
|
29055 |
+
"epoch": 0.82,
|
29056 |
+
"grad_norm": 2.0381886959075928,
|
29057 |
+
"learning_rate": 8.999950590444192e-06,
|
29058 |
+
"loss": 5.4861,
|
29059 |
+
"step": 16600
|
29060 |
+
},
|
29061 |
+
{
|
29062 |
+
"epoch": 0.82,
|
29063 |
+
"grad_norm": 2.2015647888183594,
|
29064 |
+
"learning_rate": 8.990068679282573e-06,
|
29065 |
+
"loss": 5.4589,
|
29066 |
+
"step": 16604
|
29067 |
+
},
|
29068 |
+
{
|
29069 |
+
"epoch": 0.82,
|
29070 |
+
"grad_norm": 1.970509648323059,
|
29071 |
+
"learning_rate": 8.980186768120955e-06,
|
29072 |
+
"loss": 5.5072,
|
29073 |
+
"step": 16608
|
29074 |
+
},
|
29075 |
+
{
|
29076 |
+
"epoch": 0.82,
|
29077 |
+
"grad_norm": 2.048265218734741,
|
29078 |
+
"learning_rate": 8.970304856959337e-06,
|
29079 |
+
"loss": 5.4781,
|
29080 |
+
"step": 16612
|
29081 |
+
},
|
29082 |
+
{
|
29083 |
+
"epoch": 0.82,
|
29084 |
+
"grad_norm": 2.1781177520751953,
|
29085 |
+
"learning_rate": 8.960422945797719e-06,
|
29086 |
+
"loss": 5.4382,
|
29087 |
+
"step": 16616
|
29088 |
+
},
|
29089 |
+
{
|
29090 |
+
"epoch": 0.82,
|
29091 |
+
"grad_norm": 2.3206918239593506,
|
29092 |
+
"learning_rate": 8.9505410346361e-06,
|
29093 |
+
"loss": 5.5371,
|
29094 |
+
"step": 16620
|
29095 |
+
},
|
29096 |
+
{
|
29097 |
+
"epoch": 0.82,
|
29098 |
+
"grad_norm": 2.129166603088379,
|
29099 |
+
"learning_rate": 8.94065912347448e-06,
|
29100 |
+
"loss": 5.4773,
|
29101 |
+
"step": 16624
|
29102 |
+
},
|
29103 |
+
{
|
29104 |
+
"epoch": 0.82,
|
29105 |
+
"grad_norm": 1.8786160945892334,
|
29106 |
+
"learning_rate": 8.930777212312862e-06,
|
29107 |
+
"loss": 5.3599,
|
29108 |
+
"step": 16628
|
29109 |
+
},
|
29110 |
+
{
|
29111 |
+
"epoch": 0.82,
|
29112 |
+
"grad_norm": 2.114015579223633,
|
29113 |
+
"learning_rate": 8.920895301151242e-06,
|
29114 |
+
"loss": 5.5262,
|
29115 |
+
"step": 16632
|
29116 |
+
},
|
29117 |
+
{
|
29118 |
+
"epoch": 0.82,
|
29119 |
+
"grad_norm": 1.9730268716812134,
|
29120 |
+
"learning_rate": 8.911013389989624e-06,
|
29121 |
+
"loss": 5.3921,
|
29122 |
+
"step": 16636
|
29123 |
+
},
|
29124 |
+
{
|
29125 |
+
"epoch": 0.82,
|
29126 |
+
"grad_norm": 2.007050037384033,
|
29127 |
+
"learning_rate": 8.901131478828005e-06,
|
29128 |
+
"loss": 5.4521,
|
29129 |
+
"step": 16640
|
29130 |
+
},
|
29131 |
+
{
|
29132 |
+
"epoch": 0.82,
|
29133 |
+
"grad_norm": 1.9782698154449463,
|
29134 |
+
"learning_rate": 8.891249567666388e-06,
|
29135 |
+
"loss": 5.4024,
|
29136 |
+
"step": 16644
|
29137 |
+
},
|
29138 |
+
{
|
29139 |
+
"epoch": 0.82,
|
29140 |
+
"grad_norm": 2.118109941482544,
|
29141 |
+
"learning_rate": 8.881367656504769e-06,
|
29142 |
+
"loss": 5.4549,
|
29143 |
+
"step": 16648
|
29144 |
+
},
|
29145 |
+
{
|
29146 |
+
"epoch": 0.82,
|
29147 |
+
"grad_norm": 1.9269788265228271,
|
29148 |
+
"learning_rate": 8.871485745343149e-06,
|
29149 |
+
"loss": 5.3879,
|
29150 |
+
"step": 16652
|
29151 |
+
},
|
29152 |
+
{
|
29153 |
+
"epoch": 0.82,
|
29154 |
+
"grad_norm": 2.0356998443603516,
|
29155 |
+
"learning_rate": 8.861603834181531e-06,
|
29156 |
+
"loss": 5.4729,
|
29157 |
+
"step": 16656
|
29158 |
+
},
|
29159 |
+
{
|
29160 |
+
"epoch": 0.82,
|
29161 |
+
"grad_norm": 1.9945244789123535,
|
29162 |
+
"learning_rate": 8.851721923019912e-06,
|
29163 |
+
"loss": 5.489,
|
29164 |
+
"step": 16660
|
29165 |
+
},
|
29166 |
+
{
|
29167 |
+
"epoch": 0.82,
|
29168 |
+
"grad_norm": 2.1839029788970947,
|
29169 |
+
"learning_rate": 8.841840011858294e-06,
|
29170 |
+
"loss": 5.3626,
|
29171 |
+
"step": 16664
|
29172 |
+
},
|
29173 |
+
{
|
29174 |
+
"epoch": 0.82,
|
29175 |
+
"grad_norm": 1.9056282043457031,
|
29176 |
+
"learning_rate": 8.831958100696674e-06,
|
29177 |
+
"loss": 5.5155,
|
29178 |
+
"step": 16668
|
29179 |
+
},
|
29180 |
+
{
|
29181 |
+
"epoch": 0.82,
|
29182 |
+
"grad_norm": 1.971134066581726,
|
29183 |
+
"learning_rate": 8.822076189535058e-06,
|
29184 |
+
"loss": 5.4582,
|
29185 |
+
"step": 16672
|
29186 |
+
},
|
29187 |
+
{
|
29188 |
+
"epoch": 0.82,
|
29189 |
+
"grad_norm": 2.2608683109283447,
|
29190 |
+
"learning_rate": 8.812194278373438e-06,
|
29191 |
+
"loss": 5.5671,
|
29192 |
+
"step": 16676
|
29193 |
+
},
|
29194 |
+
{
|
29195 |
+
"epoch": 0.82,
|
29196 |
+
"grad_norm": 1.8873885869979858,
|
29197 |
+
"learning_rate": 8.80231236721182e-06,
|
29198 |
+
"loss": 5.5378,
|
29199 |
+
"step": 16680
|
29200 |
+
},
|
29201 |
+
{
|
29202 |
+
"epoch": 0.82,
|
29203 |
+
"grad_norm": 2.1268815994262695,
|
29204 |
+
"learning_rate": 8.7924304560502e-06,
|
29205 |
+
"loss": 5.5464,
|
29206 |
+
"step": 16684
|
29207 |
+
},
|
29208 |
+
{
|
29209 |
+
"epoch": 0.82,
|
29210 |
+
"grad_norm": 2.120333671569824,
|
29211 |
+
"learning_rate": 8.782548544888581e-06,
|
29212 |
+
"loss": 5.4566,
|
29213 |
+
"step": 16688
|
29214 |
+
},
|
29215 |
+
{
|
29216 |
+
"epoch": 0.82,
|
29217 |
+
"grad_norm": 2.202099323272705,
|
29218 |
+
"learning_rate": 8.772666633726963e-06,
|
29219 |
+
"loss": 5.3333,
|
29220 |
+
"step": 16692
|
29221 |
+
},
|
29222 |
+
{
|
29223 |
+
"epoch": 0.82,
|
29224 |
+
"grad_norm": 1.8824645280838013,
|
29225 |
+
"learning_rate": 8.762784722565343e-06,
|
29226 |
+
"loss": 5.5016,
|
29227 |
+
"step": 16696
|
29228 |
+
},
|
29229 |
+
{
|
29230 |
+
"epoch": 0.83,
|
29231 |
+
"grad_norm": 2.0147457122802734,
|
29232 |
+
"learning_rate": 8.752902811403727e-06,
|
29233 |
+
"loss": 5.4586,
|
29234 |
+
"step": 16700
|
29235 |
+
},
|
29236 |
+
{
|
29237 |
+
"epoch": 0.83,
|
29238 |
+
"grad_norm": 2.041895627975464,
|
29239 |
+
"learning_rate": 8.743020900242107e-06,
|
29240 |
+
"loss": 5.5022,
|
29241 |
+
"step": 16704
|
29242 |
+
},
|
29243 |
+
{
|
29244 |
+
"epoch": 0.83,
|
29245 |
+
"grad_norm": 2.077690362930298,
|
29246 |
+
"learning_rate": 8.73313898908049e-06,
|
29247 |
+
"loss": 5.5781,
|
29248 |
+
"step": 16708
|
29249 |
+
},
|
29250 |
+
{
|
29251 |
+
"epoch": 0.83,
|
29252 |
+
"grad_norm": 1.9477964639663696,
|
29253 |
+
"learning_rate": 8.72325707791887e-06,
|
29254 |
+
"loss": 5.5584,
|
29255 |
+
"step": 16712
|
29256 |
+
},
|
29257 |
+
{
|
29258 |
+
"epoch": 0.83,
|
29259 |
+
"grad_norm": 1.8398356437683105,
|
29260 |
+
"learning_rate": 8.713375166757252e-06,
|
29261 |
+
"loss": 5.5032,
|
29262 |
+
"step": 16716
|
29263 |
+
},
|
29264 |
+
{
|
29265 |
+
"epoch": 0.83,
|
29266 |
+
"grad_norm": 1.9973992109298706,
|
29267 |
+
"learning_rate": 8.703493255595632e-06,
|
29268 |
+
"loss": 5.37,
|
29269 |
+
"step": 16720
|
29270 |
+
},
|
29271 |
+
{
|
29272 |
+
"epoch": 0.83,
|
29273 |
+
"grad_norm": 2.0176520347595215,
|
29274 |
+
"learning_rate": 8.693611344434013e-06,
|
29275 |
+
"loss": 5.5138,
|
29276 |
+
"step": 16724
|
29277 |
+
},
|
29278 |
+
{
|
29279 |
+
"epoch": 0.83,
|
29280 |
+
"grad_norm": 2.1837217807769775,
|
29281 |
+
"learning_rate": 8.683729433272396e-06,
|
29282 |
+
"loss": 5.5308,
|
29283 |
+
"step": 16728
|
29284 |
+
},
|
29285 |
+
{
|
29286 |
+
"epoch": 0.83,
|
29287 |
+
"grad_norm": 2.0156595706939697,
|
29288 |
+
"learning_rate": 8.673847522110777e-06,
|
29289 |
+
"loss": 5.5722,
|
29290 |
+
"step": 16732
|
29291 |
+
},
|
29292 |
+
{
|
29293 |
+
"epoch": 0.83,
|
29294 |
+
"grad_norm": 2.3676466941833496,
|
29295 |
+
"learning_rate": 8.663965610949159e-06,
|
29296 |
+
"loss": 5.5775,
|
29297 |
+
"step": 16736
|
29298 |
+
},
|
29299 |
+
{
|
29300 |
+
"epoch": 0.83,
|
29301 |
+
"grad_norm": 2.270716667175293,
|
29302 |
+
"learning_rate": 8.65408369978754e-06,
|
29303 |
+
"loss": 5.4107,
|
29304 |
+
"step": 16740
|
29305 |
+
},
|
29306 |
+
{
|
29307 |
+
"epoch": 0.83,
|
29308 |
+
"grad_norm": 1.9551721811294556,
|
29309 |
+
"learning_rate": 8.644201788625921e-06,
|
29310 |
+
"loss": 5.6022,
|
29311 |
+
"step": 16744
|
29312 |
+
},
|
29313 |
+
{
|
29314 |
+
"epoch": 0.83,
|
29315 |
+
"grad_norm": 1.9827896356582642,
|
29316 |
+
"learning_rate": 8.634319877464302e-06,
|
29317 |
+
"loss": 5.4923,
|
29318 |
+
"step": 16748
|
29319 |
+
},
|
29320 |
+
{
|
29321 |
+
"epoch": 0.83,
|
29322 |
+
"grad_norm": 2.1360533237457275,
|
29323 |
+
"learning_rate": 8.624437966302682e-06,
|
29324 |
+
"loss": 5.4328,
|
29325 |
+
"step": 16752
|
29326 |
+
},
|
29327 |
+
{
|
29328 |
+
"epoch": 0.83,
|
29329 |
+
"grad_norm": 2.094109296798706,
|
29330 |
+
"learning_rate": 8.614556055141066e-06,
|
29331 |
+
"loss": 5.416,
|
29332 |
+
"step": 16756
|
29333 |
+
},
|
29334 |
+
{
|
29335 |
+
"epoch": 0.83,
|
29336 |
+
"grad_norm": 1.9513869285583496,
|
29337 |
+
"learning_rate": 8.604674143979446e-06,
|
29338 |
+
"loss": 5.403,
|
29339 |
+
"step": 16760
|
29340 |
+
},
|
29341 |
+
{
|
29342 |
+
"epoch": 0.83,
|
29343 |
+
"grad_norm": 2.0020523071289062,
|
29344 |
+
"learning_rate": 8.594792232817828e-06,
|
29345 |
+
"loss": 5.4628,
|
29346 |
+
"step": 16764
|
29347 |
+
},
|
29348 |
+
{
|
29349 |
+
"epoch": 0.83,
|
29350 |
+
"grad_norm": 2.405801296234131,
|
29351 |
+
"learning_rate": 8.584910321656209e-06,
|
29352 |
+
"loss": 5.5799,
|
29353 |
+
"step": 16768
|
29354 |
+
},
|
29355 |
+
{
|
29356 |
+
"epoch": 0.83,
|
29357 |
+
"grad_norm": 2.114650249481201,
|
29358 |
+
"learning_rate": 8.57502841049459e-06,
|
29359 |
+
"loss": 5.4151,
|
29360 |
+
"step": 16772
|
29361 |
+
},
|
29362 |
+
{
|
29363 |
+
"epoch": 0.83,
|
29364 |
+
"grad_norm": 1.9269883632659912,
|
29365 |
+
"learning_rate": 8.565146499332971e-06,
|
29366 |
+
"loss": 5.4321,
|
29367 |
+
"step": 16776
|
29368 |
+
},
|
29369 |
+
{
|
29370 |
+
"epoch": 0.83,
|
29371 |
+
"grad_norm": 1.894822597503662,
|
29372 |
+
"learning_rate": 8.555264588171353e-06,
|
29373 |
+
"loss": 5.4419,
|
29374 |
+
"step": 16780
|
29375 |
+
},
|
29376 |
+
{
|
29377 |
+
"epoch": 0.83,
|
29378 |
+
"grad_norm": 2.0186209678649902,
|
29379 |
+
"learning_rate": 8.545382677009735e-06,
|
29380 |
+
"loss": 5.5657,
|
29381 |
+
"step": 16784
|
29382 |
+
},
|
29383 |
+
{
|
29384 |
+
"epoch": 0.83,
|
29385 |
+
"grad_norm": 1.920372486114502,
|
29386 |
+
"learning_rate": 8.535500765848115e-06,
|
29387 |
+
"loss": 5.5334,
|
29388 |
+
"step": 16788
|
29389 |
+
},
|
29390 |
+
{
|
29391 |
+
"epoch": 0.83,
|
29392 |
+
"grad_norm": 2.1055715084075928,
|
29393 |
+
"learning_rate": 8.525618854686497e-06,
|
29394 |
+
"loss": 5.4952,
|
29395 |
+
"step": 16792
|
29396 |
+
},
|
29397 |
+
{
|
29398 |
+
"epoch": 0.83,
|
29399 |
+
"grad_norm": 2.0137712955474854,
|
29400 |
+
"learning_rate": 8.515736943524878e-06,
|
29401 |
+
"loss": 5.5128,
|
29402 |
+
"step": 16796
|
29403 |
+
},
|
29404 |
+
{
|
29405 |
+
"epoch": 0.83,
|
29406 |
+
"grad_norm": 2.0473227500915527,
|
29407 |
+
"learning_rate": 8.50585503236326e-06,
|
29408 |
+
"loss": 5.6793,
|
29409 |
+
"step": 16800
|
29410 |
+
},
|
29411 |
+
{
|
29412 |
+
"epoch": 0.83,
|
29413 |
+
"grad_norm": 2.0757429599761963,
|
29414 |
+
"learning_rate": 8.49597312120164e-06,
|
29415 |
+
"loss": 5.4888,
|
29416 |
+
"step": 16804
|
29417 |
+
},
|
29418 |
+
{
|
29419 |
+
"epoch": 0.83,
|
29420 |
+
"grad_norm": 1.9529187679290771,
|
29421 |
+
"learning_rate": 8.486091210040022e-06,
|
29422 |
+
"loss": 5.2731,
|
29423 |
+
"step": 16808
|
29424 |
+
},
|
29425 |
+
{
|
29426 |
+
"epoch": 0.83,
|
29427 |
+
"grad_norm": 1.897220492362976,
|
29428 |
+
"learning_rate": 8.476209298878403e-06,
|
29429 |
+
"loss": 5.4103,
|
29430 |
+
"step": 16812
|
29431 |
+
},
|
29432 |
+
{
|
29433 |
+
"epoch": 0.83,
|
29434 |
+
"grad_norm": 2.0591204166412354,
|
29435 |
+
"learning_rate": 8.466327387716785e-06,
|
29436 |
+
"loss": 5.5609,
|
29437 |
+
"step": 16816
|
29438 |
+
},
|
29439 |
+
{
|
29440 |
+
"epoch": 0.83,
|
29441 |
+
"grad_norm": 1.8429813385009766,
|
29442 |
+
"learning_rate": 8.456445476555167e-06,
|
29443 |
+
"loss": 5.3453,
|
29444 |
+
"step": 16820
|
29445 |
+
},
|
29446 |
+
{
|
29447 |
+
"epoch": 0.83,
|
29448 |
+
"grad_norm": 1.854067087173462,
|
29449 |
+
"learning_rate": 8.446563565393547e-06,
|
29450 |
+
"loss": 5.4855,
|
29451 |
+
"step": 16824
|
29452 |
+
},
|
29453 |
+
{
|
29454 |
+
"epoch": 0.83,
|
29455 |
+
"grad_norm": 1.87723708152771,
|
29456 |
+
"learning_rate": 8.43668165423193e-06,
|
29457 |
+
"loss": 5.5087,
|
29458 |
+
"step": 16828
|
29459 |
+
},
|
29460 |
+
{
|
29461 |
+
"epoch": 0.83,
|
29462 |
+
"grad_norm": 2.25486159324646,
|
29463 |
+
"learning_rate": 8.42679974307031e-06,
|
29464 |
+
"loss": 5.6419,
|
29465 |
+
"step": 16832
|
29466 |
+
},
|
29467 |
+
{
|
29468 |
+
"epoch": 0.83,
|
29469 |
+
"grad_norm": 2.1054129600524902,
|
29470 |
+
"learning_rate": 8.416917831908692e-06,
|
29471 |
+
"loss": 5.4713,
|
29472 |
+
"step": 16836
|
29473 |
+
},
|
29474 |
+
{
|
29475 |
+
"epoch": 0.83,
|
29476 |
+
"grad_norm": 1.9546363353729248,
|
29477 |
+
"learning_rate": 8.407035920747072e-06,
|
29478 |
+
"loss": 5.51,
|
29479 |
+
"step": 16840
|
29480 |
+
},
|
29481 |
+
{
|
29482 |
+
"epoch": 0.83,
|
29483 |
+
"grad_norm": 1.8574483394622803,
|
29484 |
+
"learning_rate": 8.397154009585454e-06,
|
29485 |
+
"loss": 5.4273,
|
29486 |
+
"step": 16844
|
29487 |
+
},
|
29488 |
+
{
|
29489 |
+
"epoch": 0.83,
|
29490 |
+
"grad_norm": 2.135690927505493,
|
29491 |
+
"learning_rate": 8.387272098423836e-06,
|
29492 |
+
"loss": 5.4408,
|
29493 |
+
"step": 16848
|
29494 |
+
},
|
29495 |
+
{
|
29496 |
+
"epoch": 0.83,
|
29497 |
+
"grad_norm": 1.9898631572723389,
|
29498 |
+
"learning_rate": 8.377390187262217e-06,
|
29499 |
+
"loss": 5.2905,
|
29500 |
+
"step": 16852
|
29501 |
+
},
|
29502 |
+
{
|
29503 |
+
"epoch": 0.83,
|
29504 |
+
"grad_norm": 2.016470193862915,
|
29505 |
+
"learning_rate": 8.367508276100599e-06,
|
29506 |
+
"loss": 5.3266,
|
29507 |
+
"step": 16856
|
29508 |
+
},
|
29509 |
+
{
|
29510 |
+
"epoch": 0.83,
|
29511 |
+
"grad_norm": 1.9973735809326172,
|
29512 |
+
"learning_rate": 8.357626364938979e-06,
|
29513 |
+
"loss": 5.4987,
|
29514 |
+
"step": 16860
|
29515 |
+
},
|
29516 |
+
{
|
29517 |
+
"epoch": 0.83,
|
29518 |
+
"grad_norm": 2.116567373275757,
|
29519 |
+
"learning_rate": 8.347744453777361e-06,
|
29520 |
+
"loss": 5.4306,
|
29521 |
+
"step": 16864
|
29522 |
+
},
|
29523 |
+
{
|
29524 |
+
"epoch": 0.83,
|
29525 |
+
"grad_norm": 2.044475793838501,
|
29526 |
+
"learning_rate": 8.337862542615741e-06,
|
29527 |
+
"loss": 5.4907,
|
29528 |
+
"step": 16868
|
29529 |
+
},
|
29530 |
+
{
|
29531 |
+
"epoch": 0.83,
|
29532 |
+
"grad_norm": 2.090527296066284,
|
29533 |
+
"learning_rate": 8.327980631454123e-06,
|
29534 |
+
"loss": 5.3698,
|
29535 |
+
"step": 16872
|
29536 |
+
},
|
29537 |
+
{
|
29538 |
+
"epoch": 0.83,
|
29539 |
+
"grad_norm": 1.98384428024292,
|
29540 |
+
"learning_rate": 8.318098720292506e-06,
|
29541 |
+
"loss": 5.469,
|
29542 |
+
"step": 16876
|
29543 |
+
},
|
29544 |
+
{
|
29545 |
+
"epoch": 0.83,
|
29546 |
+
"grad_norm": 1.775121808052063,
|
29547 |
+
"learning_rate": 8.308216809130886e-06,
|
29548 |
+
"loss": 5.461,
|
29549 |
+
"step": 16880
|
29550 |
+
},
|
29551 |
+
{
|
29552 |
+
"epoch": 0.83,
|
29553 |
+
"grad_norm": 1.9661427736282349,
|
29554 |
+
"learning_rate": 8.298334897969268e-06,
|
29555 |
+
"loss": 5.494,
|
29556 |
+
"step": 16884
|
29557 |
+
},
|
29558 |
+
{
|
29559 |
+
"epoch": 0.83,
|
29560 |
+
"grad_norm": 2.0031895637512207,
|
29561 |
+
"learning_rate": 8.288452986807648e-06,
|
29562 |
+
"loss": 5.5011,
|
29563 |
+
"step": 16888
|
29564 |
+
},
|
29565 |
+
{
|
29566 |
+
"epoch": 0.83,
|
29567 |
+
"grad_norm": 2.221911907196045,
|
29568 |
+
"learning_rate": 8.27857107564603e-06,
|
29569 |
+
"loss": 5.4296,
|
29570 |
+
"step": 16892
|
29571 |
+
},
|
29572 |
+
{
|
29573 |
+
"epoch": 0.83,
|
29574 |
+
"grad_norm": 2.0504343509674072,
|
29575 |
+
"learning_rate": 8.26868916448441e-06,
|
29576 |
+
"loss": 5.5495,
|
29577 |
+
"step": 16896
|
29578 |
+
},
|
29579 |
+
{
|
29580 |
+
"epoch": 0.84,
|
29581 |
+
"grad_norm": 2.1068339347839355,
|
29582 |
+
"learning_rate": 8.258807253322794e-06,
|
29583 |
+
"loss": 5.408,
|
29584 |
+
"step": 16900
|
29585 |
+
},
|
29586 |
+
{
|
29587 |
+
"epoch": 0.84,
|
29588 |
+
"grad_norm": 2.0044867992401123,
|
29589 |
+
"learning_rate": 8.248925342161175e-06,
|
29590 |
+
"loss": 5.512,
|
29591 |
+
"step": 16904
|
29592 |
+
},
|
29593 |
+
{
|
29594 |
+
"epoch": 0.84,
|
29595 |
+
"grad_norm": 2.3192813396453857,
|
29596 |
+
"learning_rate": 8.239043430999555e-06,
|
29597 |
+
"loss": 5.4185,
|
29598 |
+
"step": 16908
|
29599 |
+
},
|
29600 |
+
{
|
29601 |
+
"epoch": 0.84,
|
29602 |
+
"grad_norm": 1.8410991430282593,
|
29603 |
+
"learning_rate": 8.229161519837937e-06,
|
29604 |
+
"loss": 5.4222,
|
29605 |
+
"step": 16912
|
29606 |
+
},
|
29607 |
+
{
|
29608 |
+
"epoch": 0.84,
|
29609 |
+
"grad_norm": 2.0134191513061523,
|
29610 |
+
"learning_rate": 8.219279608676318e-06,
|
29611 |
+
"loss": 5.4357,
|
29612 |
+
"step": 16916
|
29613 |
+
},
|
29614 |
+
{
|
29615 |
+
"epoch": 0.84,
|
29616 |
+
"grad_norm": 2.0390844345092773,
|
29617 |
+
"learning_rate": 8.2093976975147e-06,
|
29618 |
+
"loss": 5.5363,
|
29619 |
+
"step": 16920
|
29620 |
+
},
|
29621 |
+
{
|
29622 |
+
"epoch": 0.84,
|
29623 |
+
"grad_norm": 2.12786602973938,
|
29624 |
+
"learning_rate": 8.19951578635308e-06,
|
29625 |
+
"loss": 5.31,
|
29626 |
+
"step": 16924
|
29627 |
+
},
|
29628 |
+
{
|
29629 |
+
"epoch": 0.84,
|
29630 |
+
"grad_norm": 1.9766027927398682,
|
29631 |
+
"learning_rate": 8.189633875191464e-06,
|
29632 |
+
"loss": 5.534,
|
29633 |
+
"step": 16928
|
29634 |
+
},
|
29635 |
+
{
|
29636 |
+
"epoch": 0.84,
|
29637 |
+
"grad_norm": 1.7689497470855713,
|
29638 |
+
"learning_rate": 8.179751964029844e-06,
|
29639 |
+
"loss": 5.3465,
|
29640 |
+
"step": 16932
|
29641 |
+
},
|
29642 |
+
{
|
29643 |
+
"epoch": 0.84,
|
29644 |
+
"grad_norm": 2.117271900177002,
|
29645 |
+
"learning_rate": 8.169870052868225e-06,
|
29646 |
+
"loss": 5.4583,
|
29647 |
+
"step": 16936
|
29648 |
+
},
|
29649 |
+
{
|
29650 |
+
"epoch": 0.84,
|
29651 |
+
"grad_norm": 2.0808498859405518,
|
29652 |
+
"learning_rate": 8.159988141706607e-06,
|
29653 |
+
"loss": 5.477,
|
29654 |
+
"step": 16940
|
29655 |
+
},
|
29656 |
+
{
|
29657 |
+
"epoch": 0.84,
|
29658 |
+
"grad_norm": 2.0178062915802,
|
29659 |
+
"learning_rate": 8.150106230544987e-06,
|
29660 |
+
"loss": 5.5178,
|
29661 |
+
"step": 16944
|
29662 |
+
},
|
29663 |
+
{
|
29664 |
+
"epoch": 0.84,
|
29665 |
+
"grad_norm": 1.7878342866897583,
|
29666 |
+
"learning_rate": 8.140224319383369e-06,
|
29667 |
+
"loss": 5.533,
|
29668 |
+
"step": 16948
|
29669 |
+
},
|
29670 |
+
{
|
29671 |
+
"epoch": 0.84,
|
29672 |
+
"grad_norm": 2.0112874507904053,
|
29673 |
+
"learning_rate": 8.13034240822175e-06,
|
29674 |
+
"loss": 5.342,
|
29675 |
+
"step": 16952
|
29676 |
+
},
|
29677 |
+
{
|
29678 |
+
"epoch": 0.84,
|
29679 |
+
"grad_norm": 2.224484443664551,
|
29680 |
+
"learning_rate": 8.120460497060133e-06,
|
29681 |
+
"loss": 5.4574,
|
29682 |
+
"step": 16956
|
29683 |
+
},
|
29684 |
+
{
|
29685 |
+
"epoch": 0.84,
|
29686 |
+
"grad_norm": 2.29886531829834,
|
29687 |
+
"learning_rate": 8.110578585898514e-06,
|
29688 |
+
"loss": 5.5541,
|
29689 |
+
"step": 16960
|
29690 |
+
},
|
29691 |
+
{
|
29692 |
+
"epoch": 0.84,
|
29693 |
+
"grad_norm": 1.8924994468688965,
|
29694 |
+
"learning_rate": 8.100696674736896e-06,
|
29695 |
+
"loss": 5.4196,
|
29696 |
+
"step": 16964
|
29697 |
+
},
|
29698 |
+
{
|
29699 |
+
"epoch": 0.84,
|
29700 |
+
"grad_norm": 2.2159488201141357,
|
29701 |
+
"learning_rate": 8.090814763575276e-06,
|
29702 |
+
"loss": 5.4989,
|
29703 |
+
"step": 16968
|
29704 |
+
},
|
29705 |
+
{
|
29706 |
+
"epoch": 0.84,
|
29707 |
+
"grad_norm": 2.170715570449829,
|
29708 |
+
"learning_rate": 8.080932852413656e-06,
|
29709 |
+
"loss": 5.5074,
|
29710 |
+
"step": 16972
|
29711 |
+
},
|
29712 |
+
{
|
29713 |
+
"epoch": 0.84,
|
29714 |
+
"grad_norm": 2.0515708923339844,
|
29715 |
+
"learning_rate": 8.071050941252038e-06,
|
29716 |
+
"loss": 5.4033,
|
29717 |
+
"step": 16976
|
29718 |
+
},
|
29719 |
+
{
|
29720 |
+
"epoch": 0.84,
|
29721 |
+
"grad_norm": 2.0467865467071533,
|
29722 |
+
"learning_rate": 8.061169030090419e-06,
|
29723 |
+
"loss": 5.4566,
|
29724 |
+
"step": 16980
|
29725 |
+
},
|
29726 |
+
{
|
29727 |
+
"epoch": 0.84,
|
29728 |
+
"grad_norm": 1.9163670539855957,
|
29729 |
+
"learning_rate": 8.0512871189288e-06,
|
29730 |
+
"loss": 5.5335,
|
29731 |
+
"step": 16984
|
29732 |
+
},
|
29733 |
+
{
|
29734 |
+
"epoch": 0.84,
|
29735 |
+
"grad_norm": 2.1390318870544434,
|
29736 |
+
"learning_rate": 8.041405207767183e-06,
|
29737 |
+
"loss": 5.5707,
|
29738 |
+
"step": 16988
|
29739 |
+
},
|
29740 |
+
{
|
29741 |
+
"epoch": 0.84,
|
29742 |
+
"grad_norm": 1.9965319633483887,
|
29743 |
+
"learning_rate": 8.031523296605565e-06,
|
29744 |
+
"loss": 5.4165,
|
29745 |
+
"step": 16992
|
29746 |
+
},
|
29747 |
+
{
|
29748 |
+
"epoch": 0.84,
|
29749 |
+
"grad_norm": 2.137233257293701,
|
29750 |
+
"learning_rate": 8.021641385443945e-06,
|
29751 |
+
"loss": 5.3596,
|
29752 |
+
"step": 16996
|
29753 |
+
},
|
29754 |
+
{
|
29755 |
+
"epoch": 0.84,
|
29756 |
+
"grad_norm": 2.152256727218628,
|
29757 |
+
"learning_rate": 8.011759474282327e-06,
|
29758 |
+
"loss": 5.3396,
|
29759 |
+
"step": 17000
|
29760 |
+
},
|
29761 |
+
{
|
29762 |
+
"epoch": 0.84,
|
29763 |
+
"grad_norm": 2.28680682182312,
|
29764 |
+
"learning_rate": 8.001877563120708e-06,
|
29765 |
+
"loss": 5.5507,
|
29766 |
+
"step": 17004
|
29767 |
+
},
|
29768 |
+
{
|
29769 |
+
"epoch": 0.84,
|
29770 |
+
"grad_norm": 2.26821231842041,
|
29771 |
+
"learning_rate": 7.991995651959088e-06,
|
29772 |
+
"loss": 5.4969,
|
29773 |
+
"step": 17008
|
29774 |
+
},
|
29775 |
+
{
|
29776 |
+
"epoch": 0.84,
|
29777 |
+
"grad_norm": 2.275667428970337,
|
29778 |
+
"learning_rate": 7.98211374079747e-06,
|
29779 |
+
"loss": 5.441,
|
29780 |
+
"step": 17012
|
29781 |
+
},
|
29782 |
+
{
|
29783 |
+
"epoch": 0.84,
|
29784 |
+
"grad_norm": 2.080756902694702,
|
29785 |
+
"learning_rate": 7.972231829635852e-06,
|
29786 |
+
"loss": 5.4398,
|
29787 |
+
"step": 17016
|
29788 |
+
},
|
29789 |
+
{
|
29790 |
+
"epoch": 0.84,
|
29791 |
+
"grad_norm": 2.10422420501709,
|
29792 |
+
"learning_rate": 7.962349918474234e-06,
|
29793 |
+
"loss": 5.437,
|
29794 |
+
"step": 17020
|
29795 |
+
},
|
29796 |
+
{
|
29797 |
+
"epoch": 0.84,
|
29798 |
+
"grad_norm": 1.858323335647583,
|
29799 |
+
"learning_rate": 7.952468007312615e-06,
|
29800 |
+
"loss": 5.5432,
|
29801 |
+
"step": 17024
|
29802 |
+
},
|
29803 |
+
{
|
29804 |
+
"epoch": 0.84,
|
29805 |
+
"grad_norm": 2.4101650714874268,
|
29806 |
+
"learning_rate": 7.942586096150997e-06,
|
29807 |
+
"loss": 5.412,
|
29808 |
+
"step": 17028
|
29809 |
+
},
|
29810 |
+
{
|
29811 |
+
"epoch": 0.84,
|
29812 |
+
"grad_norm": 2.2219436168670654,
|
29813 |
+
"learning_rate": 7.932704184989377e-06,
|
29814 |
+
"loss": 5.5279,
|
29815 |
+
"step": 17032
|
29816 |
+
},
|
29817 |
+
{
|
29818 |
+
"epoch": 0.84,
|
29819 |
+
"grad_norm": 2.182474374771118,
|
29820 |
+
"learning_rate": 7.922822273827757e-06,
|
29821 |
+
"loss": 5.4662,
|
29822 |
+
"step": 17036
|
29823 |
+
},
|
29824 |
+
{
|
29825 |
+
"epoch": 0.84,
|
29826 |
+
"grad_norm": 2.060351610183716,
|
29827 |
+
"learning_rate": 7.91294036266614e-06,
|
29828 |
+
"loss": 5.504,
|
29829 |
+
"step": 17040
|
29830 |
+
},
|
29831 |
+
{
|
29832 |
+
"epoch": 0.84,
|
29833 |
+
"grad_norm": 2.1096701622009277,
|
29834 |
+
"learning_rate": 7.903058451504522e-06,
|
29835 |
+
"loss": 5.4539,
|
29836 |
+
"step": 17044
|
29837 |
+
},
|
29838 |
+
{
|
29839 |
+
"epoch": 0.84,
|
29840 |
+
"grad_norm": 2.0492708683013916,
|
29841 |
+
"learning_rate": 7.893176540342904e-06,
|
29842 |
+
"loss": 5.4398,
|
29843 |
+
"step": 17048
|
29844 |
+
},
|
29845 |
+
{
|
29846 |
+
"epoch": 0.84,
|
29847 |
+
"grad_norm": 2.032947301864624,
|
29848 |
+
"learning_rate": 7.883294629181284e-06,
|
29849 |
+
"loss": 5.5253,
|
29850 |
+
"step": 17052
|
29851 |
+
},
|
29852 |
+
{
|
29853 |
+
"epoch": 0.84,
|
29854 |
+
"grad_norm": 2.0764636993408203,
|
29855 |
+
"learning_rate": 7.873412718019666e-06,
|
29856 |
+
"loss": 5.4655,
|
29857 |
+
"step": 17056
|
29858 |
+
},
|
29859 |
+
{
|
29860 |
+
"epoch": 0.84,
|
29861 |
+
"grad_norm": 2.105656862258911,
|
29862 |
+
"learning_rate": 7.863530806858046e-06,
|
29863 |
+
"loss": 5.4761,
|
29864 |
+
"step": 17060
|
29865 |
+
},
|
29866 |
+
{
|
29867 |
+
"epoch": 0.84,
|
29868 |
+
"grad_norm": 1.975953459739685,
|
29869 |
+
"learning_rate": 7.853648895696428e-06,
|
29870 |
+
"loss": 5.5364,
|
29871 |
+
"step": 17064
|
29872 |
+
},
|
29873 |
+
{
|
29874 |
+
"epoch": 0.84,
|
29875 |
+
"grad_norm": 2.0592944622039795,
|
29876 |
+
"learning_rate": 7.843766984534809e-06,
|
29877 |
+
"loss": 5.4987,
|
29878 |
+
"step": 17068
|
29879 |
+
},
|
29880 |
+
{
|
29881 |
+
"epoch": 0.84,
|
29882 |
+
"grad_norm": 2.1122117042541504,
|
29883 |
+
"learning_rate": 7.833885073373191e-06,
|
29884 |
+
"loss": 5.4162,
|
29885 |
+
"step": 17072
|
29886 |
+
},
|
29887 |
+
{
|
29888 |
+
"epoch": 0.84,
|
29889 |
+
"grad_norm": 2.143172264099121,
|
29890 |
+
"learning_rate": 7.824003162211573e-06,
|
29891 |
+
"loss": 5.4959,
|
29892 |
+
"step": 17076
|
29893 |
+
},
|
29894 |
+
{
|
29895 |
+
"epoch": 0.84,
|
29896 |
+
"grad_norm": 1.9919787645339966,
|
29897 |
+
"learning_rate": 7.814121251049953e-06,
|
29898 |
+
"loss": 5.469,
|
29899 |
+
"step": 17080
|
29900 |
+
},
|
29901 |
+
{
|
29902 |
+
"epoch": 0.84,
|
29903 |
+
"grad_norm": 1.9146004915237427,
|
29904 |
+
"learning_rate": 7.804239339888335e-06,
|
29905 |
+
"loss": 5.4748,
|
29906 |
+
"step": 17084
|
29907 |
+
},
|
29908 |
+
{
|
29909 |
+
"epoch": 0.84,
|
29910 |
+
"grad_norm": 2.3150486946105957,
|
29911 |
+
"learning_rate": 7.794357428726716e-06,
|
29912 |
+
"loss": 5.4056,
|
29913 |
+
"step": 17088
|
29914 |
+
},
|
29915 |
+
{
|
29916 |
+
"epoch": 0.84,
|
29917 |
+
"grad_norm": 2.1717705726623535,
|
29918 |
+
"learning_rate": 7.784475517565098e-06,
|
29919 |
+
"loss": 5.5389,
|
29920 |
+
"step": 17092
|
29921 |
+
},
|
29922 |
+
{
|
29923 |
+
"epoch": 0.84,
|
29924 |
+
"grad_norm": 2.1674489974975586,
|
29925 |
+
"learning_rate": 7.774593606403478e-06,
|
29926 |
+
"loss": 5.3378,
|
29927 |
+
"step": 17096
|
29928 |
+
},
|
29929 |
+
{
|
29930 |
+
"epoch": 0.84,
|
29931 |
+
"grad_norm": 2.17425537109375,
|
29932 |
+
"learning_rate": 7.76471169524186e-06,
|
29933 |
+
"loss": 5.5094,
|
29934 |
+
"step": 17100
|
29935 |
+
},
|
29936 |
+
{
|
29937 |
+
"epoch": 0.85,
|
29938 |
+
"grad_norm": 2.2170867919921875,
|
29939 |
+
"learning_rate": 7.754829784080242e-06,
|
29940 |
+
"loss": 5.4591,
|
29941 |
+
"step": 17104
|
29942 |
+
},
|
29943 |
+
{
|
29944 |
+
"epoch": 0.85,
|
29945 |
+
"grad_norm": 2.0710206031799316,
|
29946 |
+
"learning_rate": 7.744947872918623e-06,
|
29947 |
+
"loss": 5.461,
|
29948 |
+
"step": 17108
|
29949 |
+
},
|
29950 |
+
{
|
29951 |
+
"epoch": 0.85,
|
29952 |
+
"grad_norm": 1.9662617444992065,
|
29953 |
+
"learning_rate": 7.735065961757005e-06,
|
29954 |
+
"loss": 5.5232,
|
29955 |
+
"step": 17112
|
29956 |
+
},
|
29957 |
+
{
|
29958 |
+
"epoch": 0.85,
|
29959 |
+
"grad_norm": 2.1950018405914307,
|
29960 |
+
"learning_rate": 7.725184050595385e-06,
|
29961 |
+
"loss": 5.4845,
|
29962 |
+
"step": 17116
|
29963 |
+
},
|
29964 |
+
{
|
29965 |
+
"epoch": 0.85,
|
29966 |
+
"grad_norm": 2.166281223297119,
|
29967 |
+
"learning_rate": 7.715302139433767e-06,
|
29968 |
+
"loss": 5.4899,
|
29969 |
+
"step": 17120
|
29970 |
+
},
|
29971 |
+
{
|
29972 |
+
"epoch": 0.85,
|
29973 |
+
"grad_norm": 2.0825867652893066,
|
29974 |
+
"learning_rate": 7.705420228272148e-06,
|
29975 |
+
"loss": 5.5889,
|
29976 |
+
"step": 17124
|
29977 |
+
},
|
29978 |
+
{
|
29979 |
+
"epoch": 0.85,
|
29980 |
+
"grad_norm": 2.0458121299743652,
|
29981 |
+
"learning_rate": 7.69553831711053e-06,
|
29982 |
+
"loss": 5.4465,
|
29983 |
+
"step": 17128
|
29984 |
+
},
|
29985 |
+
{
|
29986 |
+
"epoch": 0.85,
|
29987 |
+
"grad_norm": 1.972931146621704,
|
29988 |
+
"learning_rate": 7.685656405948912e-06,
|
29989 |
+
"loss": 5.447,
|
29990 |
+
"step": 17132
|
29991 |
+
},
|
29992 |
+
{
|
29993 |
+
"epoch": 0.85,
|
29994 |
+
"grad_norm": 2.2071616649627686,
|
29995 |
+
"learning_rate": 7.675774494787292e-06,
|
29996 |
+
"loss": 5.5405,
|
29997 |
+
"step": 17136
|
29998 |
+
},
|
29999 |
+
{
|
30000 |
+
"epoch": 0.85,
|
30001 |
+
"grad_norm": 2.24798583984375,
|
30002 |
+
"learning_rate": 7.665892583625674e-06,
|
30003 |
+
"loss": 5.5034,
|
30004 |
+
"step": 17140
|
30005 |
+
},
|
30006 |
+
{
|
30007 |
+
"epoch": 0.85,
|
30008 |
+
"grad_norm": 2.3352463245391846,
|
30009 |
+
"learning_rate": 7.656010672464054e-06,
|
30010 |
+
"loss": 5.5322,
|
30011 |
+
"step": 17144
|
30012 |
+
},
|
30013 |
+
{
|
30014 |
+
"epoch": 0.85,
|
30015 |
+
"grad_norm": 2.1701347827911377,
|
30016 |
+
"learning_rate": 7.646128761302436e-06,
|
30017 |
+
"loss": 5.6042,
|
30018 |
+
"step": 17148
|
30019 |
+
},
|
30020 |
+
{
|
30021 |
+
"epoch": 0.85,
|
30022 |
+
"grad_norm": 2.0654942989349365,
|
30023 |
+
"learning_rate": 7.636246850140817e-06,
|
30024 |
+
"loss": 5.5102,
|
30025 |
+
"step": 17152
|
30026 |
+
},
|
30027 |
+
{
|
30028 |
+
"epoch": 0.85,
|
30029 |
+
"grad_norm": 2.410454273223877,
|
30030 |
+
"learning_rate": 7.626364938979198e-06,
|
30031 |
+
"loss": 5.4408,
|
30032 |
+
"step": 17156
|
30033 |
+
},
|
30034 |
+
{
|
30035 |
+
"epoch": 0.85,
|
30036 |
+
"grad_norm": 2.0221352577209473,
|
30037 |
+
"learning_rate": 7.616483027817581e-06,
|
30038 |
+
"loss": 5.4086,
|
30039 |
+
"step": 17160
|
30040 |
+
},
|
30041 |
+
{
|
30042 |
+
"epoch": 0.85,
|
30043 |
+
"grad_norm": 2.211092233657837,
|
30044 |
+
"learning_rate": 7.606601116655962e-06,
|
30045 |
+
"loss": 5.4589,
|
30046 |
+
"step": 17164
|
30047 |
+
},
|
30048 |
+
{
|
30049 |
+
"epoch": 0.85,
|
30050 |
+
"grad_norm": 1.9467920064926147,
|
30051 |
+
"learning_rate": 7.596719205494343e-06,
|
30052 |
+
"loss": 5.4867,
|
30053 |
+
"step": 17168
|
30054 |
+
},
|
30055 |
+
{
|
30056 |
+
"epoch": 0.85,
|
30057 |
+
"grad_norm": 2.1144025325775146,
|
30058 |
+
"learning_rate": 7.586837294332725e-06,
|
30059 |
+
"loss": 5.5143,
|
30060 |
+
"step": 17172
|
30061 |
+
},
|
30062 |
+
{
|
30063 |
+
"epoch": 0.85,
|
30064 |
+
"grad_norm": 2.1652915477752686,
|
30065 |
+
"learning_rate": 7.576955383171106e-06,
|
30066 |
+
"loss": 5.533,
|
30067 |
+
"step": 17176
|
30068 |
+
},
|
30069 |
+
{
|
30070 |
+
"epoch": 0.85,
|
30071 |
+
"grad_norm": 1.9289984703063965,
|
30072 |
+
"learning_rate": 7.567073472009486e-06,
|
30073 |
+
"loss": 5.5406,
|
30074 |
+
"step": 17180
|
30075 |
+
},
|
30076 |
+
{
|
30077 |
+
"epoch": 0.85,
|
30078 |
+
"grad_norm": 2.028322458267212,
|
30079 |
+
"learning_rate": 7.557191560847867e-06,
|
30080 |
+
"loss": 5.4118,
|
30081 |
+
"step": 17184
|
30082 |
+
},
|
30083 |
+
{
|
30084 |
+
"epoch": 0.85,
|
30085 |
+
"grad_norm": 2.2385053634643555,
|
30086 |
+
"learning_rate": 7.54730964968625e-06,
|
30087 |
+
"loss": 5.5023,
|
30088 |
+
"step": 17188
|
30089 |
+
},
|
30090 |
+
{
|
30091 |
+
"epoch": 0.85,
|
30092 |
+
"grad_norm": 1.8756178617477417,
|
30093 |
+
"learning_rate": 7.5374277385246315e-06,
|
30094 |
+
"loss": 5.4028,
|
30095 |
+
"step": 17192
|
30096 |
+
},
|
30097 |
+
{
|
30098 |
+
"epoch": 0.85,
|
30099 |
+
"grad_norm": 2.0008492469787598,
|
30100 |
+
"learning_rate": 7.527545827363013e-06,
|
30101 |
+
"loss": 5.4572,
|
30102 |
+
"step": 17196
|
30103 |
+
},
|
30104 |
+
{
|
30105 |
+
"epoch": 0.85,
|
30106 |
+
"grad_norm": 1.9606680870056152,
|
30107 |
+
"learning_rate": 7.517663916201394e-06,
|
30108 |
+
"loss": 5.3813,
|
30109 |
+
"step": 17200
|
30110 |
+
},
|
30111 |
+
{
|
30112 |
+
"epoch": 0.85,
|
30113 |
+
"grad_norm": 1.765757441520691,
|
30114 |
+
"learning_rate": 7.507782005039775e-06,
|
30115 |
+
"loss": 5.433,
|
30116 |
+
"step": 17204
|
30117 |
+
},
|
30118 |
+
{
|
30119 |
+
"epoch": 0.85,
|
30120 |
+
"grad_norm": 2.2999327182769775,
|
30121 |
+
"learning_rate": 7.497900093878156e-06,
|
30122 |
+
"loss": 5.4443,
|
30123 |
+
"step": 17208
|
30124 |
+
},
|
30125 |
+
{
|
30126 |
+
"epoch": 0.85,
|
30127 |
+
"grad_norm": 1.831790804862976,
|
30128 |
+
"learning_rate": 7.488018182716537e-06,
|
30129 |
+
"loss": 5.4745,
|
30130 |
+
"step": 17212
|
30131 |
+
},
|
30132 |
+
{
|
30133 |
+
"epoch": 0.85,
|
30134 |
+
"grad_norm": 2.0281448364257812,
|
30135 |
+
"learning_rate": 7.47813627155492e-06,
|
30136 |
+
"loss": 5.3546,
|
30137 |
+
"step": 17216
|
30138 |
+
},
|
30139 |
+
{
|
30140 |
+
"epoch": 0.85,
|
30141 |
+
"grad_norm": 2.163875102996826,
|
30142 |
+
"learning_rate": 7.468254360393301e-06,
|
30143 |
+
"loss": 5.4957,
|
30144 |
+
"step": 17220
|
30145 |
+
},
|
30146 |
+
{
|
30147 |
+
"epoch": 0.85,
|
30148 |
+
"grad_norm": 2.0201468467712402,
|
30149 |
+
"learning_rate": 7.458372449231682e-06,
|
30150 |
+
"loss": 5.3797,
|
30151 |
+
"step": 17224
|
30152 |
+
},
|
30153 |
+
{
|
30154 |
+
"epoch": 0.85,
|
30155 |
+
"grad_norm": 1.9520927667617798,
|
30156 |
+
"learning_rate": 7.448490538070063e-06,
|
30157 |
+
"loss": 5.5663,
|
30158 |
+
"step": 17228
|
30159 |
+
},
|
30160 |
+
{
|
30161 |
+
"epoch": 0.85,
|
30162 |
+
"grad_norm": 1.9706037044525146,
|
30163 |
+
"learning_rate": 7.4386086269084445e-06,
|
30164 |
+
"loss": 5.4462,
|
30165 |
+
"step": 17232
|
30166 |
+
},
|
30167 |
+
{
|
30168 |
+
"epoch": 0.85,
|
30169 |
+
"grad_norm": 2.2334280014038086,
|
30170 |
+
"learning_rate": 7.428726715746826e-06,
|
30171 |
+
"loss": 5.5028,
|
30172 |
+
"step": 17236
|
30173 |
+
},
|
30174 |
+
{
|
30175 |
+
"epoch": 0.85,
|
30176 |
+
"grad_norm": 2.089432716369629,
|
30177 |
+
"learning_rate": 7.418844804585207e-06,
|
30178 |
+
"loss": 5.5537,
|
30179 |
+
"step": 17240
|
30180 |
+
},
|
30181 |
+
{
|
30182 |
+
"epoch": 0.85,
|
30183 |
+
"grad_norm": 2.0354325771331787,
|
30184 |
+
"learning_rate": 7.408962893423589e-06,
|
30185 |
+
"loss": 5.4,
|
30186 |
+
"step": 17244
|
30187 |
+
},
|
30188 |
+
{
|
30189 |
+
"epoch": 0.85,
|
30190 |
+
"grad_norm": 1.9282554388046265,
|
30191 |
+
"learning_rate": 7.39908098226197e-06,
|
30192 |
+
"loss": 5.3915,
|
30193 |
+
"step": 17248
|
30194 |
+
},
|
30195 |
+
{
|
30196 |
+
"epoch": 0.85,
|
30197 |
+
"grad_norm": 2.133868455886841,
|
30198 |
+
"learning_rate": 7.389199071100351e-06,
|
30199 |
+
"loss": 5.4868,
|
30200 |
+
"step": 17252
|
30201 |
+
},
|
30202 |
+
{
|
30203 |
+
"epoch": 0.85,
|
30204 |
+
"grad_norm": 2.0873701572418213,
|
30205 |
+
"learning_rate": 7.379317159938733e-06,
|
30206 |
+
"loss": 5.6375,
|
30207 |
+
"step": 17256
|
30208 |
+
},
|
30209 |
+
{
|
30210 |
+
"epoch": 0.85,
|
30211 |
+
"grad_norm": 1.9751038551330566,
|
30212 |
+
"learning_rate": 7.369435248777114e-06,
|
30213 |
+
"loss": 5.4291,
|
30214 |
+
"step": 17260
|
30215 |
+
},
|
30216 |
+
{
|
30217 |
+
"epoch": 0.85,
|
30218 |
+
"grad_norm": 1.8549004793167114,
|
30219 |
+
"learning_rate": 7.359553337615495e-06,
|
30220 |
+
"loss": 5.4953,
|
30221 |
+
"step": 17264
|
30222 |
+
},
|
30223 |
+
{
|
30224 |
+
"epoch": 0.85,
|
30225 |
+
"grad_norm": 1.9882365465164185,
|
30226 |
+
"learning_rate": 7.349671426453876e-06,
|
30227 |
+
"loss": 5.5191,
|
30228 |
+
"step": 17268
|
30229 |
+
},
|
30230 |
+
{
|
30231 |
+
"epoch": 0.85,
|
30232 |
+
"grad_norm": 2.0008509159088135,
|
30233 |
+
"learning_rate": 7.339789515292258e-06,
|
30234 |
+
"loss": 5.573,
|
30235 |
+
"step": 17272
|
30236 |
+
},
|
30237 |
+
{
|
30238 |
+
"epoch": 0.85,
|
30239 |
+
"grad_norm": 1.9084336757659912,
|
30240 |
+
"learning_rate": 7.3299076041306395e-06,
|
30241 |
+
"loss": 5.5864,
|
30242 |
+
"step": 17276
|
30243 |
+
},
|
30244 |
+
{
|
30245 |
+
"epoch": 0.85,
|
30246 |
+
"grad_norm": 2.3234941959381104,
|
30247 |
+
"learning_rate": 7.320025692969021e-06,
|
30248 |
+
"loss": 5.4648,
|
30249 |
+
"step": 17280
|
30250 |
+
},
|
30251 |
+
{
|
30252 |
+
"epoch": 0.85,
|
30253 |
+
"grad_norm": 2.033445358276367,
|
30254 |
+
"learning_rate": 7.310143781807402e-06,
|
30255 |
+
"loss": 5.5663,
|
30256 |
+
"step": 17284
|
30257 |
+
},
|
30258 |
+
{
|
30259 |
+
"epoch": 0.85,
|
30260 |
+
"grad_norm": 2.036726474761963,
|
30261 |
+
"learning_rate": 7.300261870645783e-06,
|
30262 |
+
"loss": 5.3706,
|
30263 |
+
"step": 17288
|
30264 |
+
},
|
30265 |
+
{
|
30266 |
+
"epoch": 0.85,
|
30267 |
+
"grad_norm": 2.135927677154541,
|
30268 |
+
"learning_rate": 7.290379959484164e-06,
|
30269 |
+
"loss": 5.4365,
|
30270 |
+
"step": 17292
|
30271 |
+
},
|
30272 |
+
{
|
30273 |
+
"epoch": 0.85,
|
30274 |
+
"grad_norm": 2.0434539318084717,
|
30275 |
+
"learning_rate": 7.280498048322546e-06,
|
30276 |
+
"loss": 5.4914,
|
30277 |
+
"step": 17296
|
30278 |
+
},
|
30279 |
+
{
|
30280 |
+
"epoch": 0.85,
|
30281 |
+
"grad_norm": 1.9364794492721558,
|
30282 |
+
"learning_rate": 7.270616137160927e-06,
|
30283 |
+
"loss": 5.358,
|
30284 |
+
"step": 17300
|
30285 |
+
},
|
30286 |
+
{
|
30287 |
+
"epoch": 0.85,
|
30288 |
+
"grad_norm": 2.1555495262145996,
|
30289 |
+
"learning_rate": 7.260734225999309e-06,
|
30290 |
+
"loss": 5.4667,
|
30291 |
+
"step": 17304
|
30292 |
+
},
|
30293 |
+
{
|
30294 |
+
"epoch": 0.86,
|
30295 |
+
"grad_norm": 2.0687687397003174,
|
30296 |
+
"learning_rate": 7.25085231483769e-06,
|
30297 |
+
"loss": 5.5075,
|
30298 |
+
"step": 17308
|
30299 |
+
},
|
30300 |
+
{
|
30301 |
+
"epoch": 0.86,
|
30302 |
+
"grad_norm": 2.2169644832611084,
|
30303 |
+
"learning_rate": 7.240970403676071e-06,
|
30304 |
+
"loss": 5.5186,
|
30305 |
+
"step": 17312
|
30306 |
+
},
|
30307 |
+
{
|
30308 |
+
"epoch": 0.86,
|
30309 |
+
"grad_norm": 2.0690207481384277,
|
30310 |
+
"learning_rate": 7.2310884925144525e-06,
|
30311 |
+
"loss": 5.4944,
|
30312 |
+
"step": 17316
|
30313 |
+
},
|
30314 |
+
{
|
30315 |
+
"epoch": 0.86,
|
30316 |
+
"grad_norm": 2.172851324081421,
|
30317 |
+
"learning_rate": 7.221206581352834e-06,
|
30318 |
+
"loss": 5.5178,
|
30319 |
+
"step": 17320
|
30320 |
+
},
|
30321 |
+
{
|
30322 |
+
"epoch": 0.86,
|
30323 |
+
"grad_norm": 2.178602457046509,
|
30324 |
+
"learning_rate": 7.211324670191215e-06,
|
30325 |
+
"loss": 5.5693,
|
30326 |
+
"step": 17324
|
30327 |
+
},
|
30328 |
+
{
|
30329 |
+
"epoch": 0.86,
|
30330 |
+
"grad_norm": 1.9525049924850464,
|
30331 |
+
"learning_rate": 7.201442759029596e-06,
|
30332 |
+
"loss": 5.3773,
|
30333 |
+
"step": 17328
|
30334 |
+
},
|
30335 |
+
{
|
30336 |
+
"epoch": 0.86,
|
30337 |
+
"grad_norm": 2.0250043869018555,
|
30338 |
+
"learning_rate": 7.191560847867978e-06,
|
30339 |
+
"loss": 5.4672,
|
30340 |
+
"step": 17332
|
30341 |
+
},
|
30342 |
+
{
|
30343 |
+
"epoch": 0.86,
|
30344 |
+
"grad_norm": 2.229799747467041,
|
30345 |
+
"learning_rate": 7.1816789367063594e-06,
|
30346 |
+
"loss": 5.4451,
|
30347 |
+
"step": 17336
|
30348 |
+
},
|
30349 |
+
{
|
30350 |
+
"epoch": 0.86,
|
30351 |
+
"grad_norm": 2.2048773765563965,
|
30352 |
+
"learning_rate": 7.171797025544741e-06,
|
30353 |
+
"loss": 5.5388,
|
30354 |
+
"step": 17340
|
30355 |
+
},
|
30356 |
+
{
|
30357 |
+
"epoch": 0.86,
|
30358 |
+
"grad_norm": 2.1948986053466797,
|
30359 |
+
"learning_rate": 7.161915114383122e-06,
|
30360 |
+
"loss": 5.4121,
|
30361 |
+
"step": 17344
|
30362 |
+
},
|
30363 |
+
{
|
30364 |
+
"epoch": 0.86,
|
30365 |
+
"grad_norm": 2.410446882247925,
|
30366 |
+
"learning_rate": 7.152033203221503e-06,
|
30367 |
+
"loss": 5.5068,
|
30368 |
+
"step": 17348
|
30369 |
+
},
|
30370 |
+
{
|
30371 |
+
"epoch": 0.86,
|
30372 |
+
"grad_norm": 2.0198326110839844,
|
30373 |
+
"learning_rate": 7.142151292059884e-06,
|
30374 |
+
"loss": 5.4786,
|
30375 |
+
"step": 17352
|
30376 |
+
},
|
30377 |
+
{
|
30378 |
+
"epoch": 0.86,
|
30379 |
+
"grad_norm": 2.1943955421447754,
|
30380 |
+
"learning_rate": 7.1322693808982655e-06,
|
30381 |
+
"loss": 5.4957,
|
30382 |
+
"step": 17356
|
30383 |
+
},
|
30384 |
+
{
|
30385 |
+
"epoch": 0.86,
|
30386 |
+
"grad_norm": 2.1132426261901855,
|
30387 |
+
"learning_rate": 7.122387469736648e-06,
|
30388 |
+
"loss": 5.3965,
|
30389 |
+
"step": 17360
|
30390 |
+
},
|
30391 |
+
{
|
30392 |
+
"epoch": 0.86,
|
30393 |
+
"grad_norm": 2.0462357997894287,
|
30394 |
+
"learning_rate": 7.112505558575029e-06,
|
30395 |
+
"loss": 5.5153,
|
30396 |
+
"step": 17364
|
30397 |
+
},
|
30398 |
+
{
|
30399 |
+
"epoch": 0.86,
|
30400 |
+
"grad_norm": 2.0501723289489746,
|
30401 |
+
"learning_rate": 7.10262364741341e-06,
|
30402 |
+
"loss": 5.5062,
|
30403 |
+
"step": 17368
|
30404 |
+
},
|
30405 |
+
{
|
30406 |
+
"epoch": 0.86,
|
30407 |
+
"grad_norm": 2.148674726486206,
|
30408 |
+
"learning_rate": 7.092741736251791e-06,
|
30409 |
+
"loss": 5.2946,
|
30410 |
+
"step": 17372
|
30411 |
+
},
|
30412 |
+
{
|
30413 |
+
"epoch": 0.86,
|
30414 |
+
"grad_norm": 2.0384411811828613,
|
30415 |
+
"learning_rate": 7.082859825090172e-06,
|
30416 |
+
"loss": 5.6131,
|
30417 |
+
"step": 17376
|
30418 |
+
},
|
30419 |
+
{
|
30420 |
+
"epoch": 0.86,
|
30421 |
+
"grad_norm": 2.235848903656006,
|
30422 |
+
"learning_rate": 7.072977913928554e-06,
|
30423 |
+
"loss": 5.3982,
|
30424 |
+
"step": 17380
|
30425 |
+
},
|
30426 |
+
{
|
30427 |
+
"epoch": 0.86,
|
30428 |
+
"grad_norm": 2.0050299167633057,
|
30429 |
+
"learning_rate": 7.063096002766935e-06,
|
30430 |
+
"loss": 5.5681,
|
30431 |
+
"step": 17384
|
30432 |
+
},
|
30433 |
+
{
|
30434 |
+
"epoch": 0.86,
|
30435 |
+
"grad_norm": 1.9482308626174927,
|
30436 |
+
"learning_rate": 7.053214091605318e-06,
|
30437 |
+
"loss": 5.4242,
|
30438 |
+
"step": 17388
|
30439 |
+
},
|
30440 |
+
{
|
30441 |
+
"epoch": 0.86,
|
30442 |
+
"grad_norm": 2.077125072479248,
|
30443 |
+
"learning_rate": 7.043332180443699e-06,
|
30444 |
+
"loss": 5.4092,
|
30445 |
+
"step": 17392
|
30446 |
+
},
|
30447 |
+
{
|
30448 |
+
"epoch": 0.86,
|
30449 |
+
"grad_norm": 2.2242355346679688,
|
30450 |
+
"learning_rate": 7.033450269282079e-06,
|
30451 |
+
"loss": 5.4268,
|
30452 |
+
"step": 17396
|
30453 |
+
},
|
30454 |
+
{
|
30455 |
+
"epoch": 0.86,
|
30456 |
+
"grad_norm": 2.2366597652435303,
|
30457 |
+
"learning_rate": 7.0235683581204605e-06,
|
30458 |
+
"loss": 5.4304,
|
30459 |
+
"step": 17400
|
30460 |
+
},
|
30461 |
+
{
|
30462 |
+
"epoch": 0.86,
|
30463 |
+
"grad_norm": 2.3268561363220215,
|
30464 |
+
"learning_rate": 7.013686446958842e-06,
|
30465 |
+
"loss": 5.4141,
|
30466 |
+
"step": 17404
|
30467 |
+
},
|
30468 |
+
{
|
30469 |
+
"epoch": 0.86,
|
30470 |
+
"grad_norm": 2.1040186882019043,
|
30471 |
+
"learning_rate": 7.003804535797223e-06,
|
30472 |
+
"loss": 5.4129,
|
30473 |
+
"step": 17408
|
30474 |
+
},
|
30475 |
+
{
|
30476 |
+
"epoch": 0.86,
|
30477 |
+
"grad_norm": 2.0050957202911377,
|
30478 |
+
"learning_rate": 6.993922624635604e-06,
|
30479 |
+
"loss": 5.5029,
|
30480 |
+
"step": 17412
|
30481 |
+
},
|
30482 |
+
{
|
30483 |
+
"epoch": 0.86,
|
30484 |
+
"grad_norm": 1.914214849472046,
|
30485 |
+
"learning_rate": 6.984040713473987e-06,
|
30486 |
+
"loss": 5.5394,
|
30487 |
+
"step": 17416
|
30488 |
+
},
|
30489 |
+
{
|
30490 |
+
"epoch": 0.86,
|
30491 |
+
"grad_norm": 2.112946033477783,
|
30492 |
+
"learning_rate": 6.974158802312368e-06,
|
30493 |
+
"loss": 5.4547,
|
30494 |
+
"step": 17420
|
30495 |
+
},
|
30496 |
+
{
|
30497 |
+
"epoch": 0.86,
|
30498 |
+
"grad_norm": 1.980510950088501,
|
30499 |
+
"learning_rate": 6.9642768911507495e-06,
|
30500 |
+
"loss": 5.4997,
|
30501 |
+
"step": 17424
|
30502 |
+
},
|
30503 |
+
{
|
30504 |
+
"epoch": 0.86,
|
30505 |
+
"grad_norm": 1.985985517501831,
|
30506 |
+
"learning_rate": 6.954394979989131e-06,
|
30507 |
+
"loss": 5.3671,
|
30508 |
+
"step": 17428
|
30509 |
+
},
|
30510 |
+
{
|
30511 |
+
"epoch": 0.86,
|
30512 |
+
"grad_norm": 1.897262454032898,
|
30513 |
+
"learning_rate": 6.944513068827511e-06,
|
30514 |
+
"loss": 5.4578,
|
30515 |
+
"step": 17432
|
30516 |
+
},
|
30517 |
+
{
|
30518 |
+
"epoch": 0.86,
|
30519 |
+
"grad_norm": 1.9851828813552856,
|
30520 |
+
"learning_rate": 6.934631157665892e-06,
|
30521 |
+
"loss": 5.4318,
|
30522 |
+
"step": 17436
|
30523 |
+
},
|
30524 |
+
{
|
30525 |
+
"epoch": 0.86,
|
30526 |
+
"grad_norm": 1.8977246284484863,
|
30527 |
+
"learning_rate": 6.9247492465042735e-06,
|
30528 |
+
"loss": 5.4777,
|
30529 |
+
"step": 17440
|
30530 |
+
},
|
30531 |
+
{
|
30532 |
+
"epoch": 0.86,
|
30533 |
+
"grad_norm": 2.1280171871185303,
|
30534 |
+
"learning_rate": 6.9148673353426564e-06,
|
30535 |
+
"loss": 5.583,
|
30536 |
+
"step": 17444
|
30537 |
+
},
|
30538 |
+
{
|
30539 |
+
"epoch": 0.86,
|
30540 |
+
"grad_norm": 2.3764641284942627,
|
30541 |
+
"learning_rate": 6.904985424181038e-06,
|
30542 |
+
"loss": 5.5559,
|
30543 |
+
"step": 17448
|
30544 |
+
},
|
30545 |
+
{
|
30546 |
+
"epoch": 0.86,
|
30547 |
+
"grad_norm": 1.9994136095046997,
|
30548 |
+
"learning_rate": 6.895103513019419e-06,
|
30549 |
+
"loss": 5.5662,
|
30550 |
+
"step": 17452
|
30551 |
+
},
|
30552 |
+
{
|
30553 |
+
"epoch": 0.86,
|
30554 |
+
"grad_norm": 2.108659267425537,
|
30555 |
+
"learning_rate": 6.8852216018578e-06,
|
30556 |
+
"loss": 5.4098,
|
30557 |
+
"step": 17456
|
30558 |
+
},
|
30559 |
+
{
|
30560 |
+
"epoch": 0.86,
|
30561 |
+
"grad_norm": 1.9477959871292114,
|
30562 |
+
"learning_rate": 6.875339690696181e-06,
|
30563 |
+
"loss": 5.4161,
|
30564 |
+
"step": 17460
|
30565 |
+
},
|
30566 |
+
{
|
30567 |
+
"epoch": 0.86,
|
30568 |
+
"grad_norm": 2.2120134830474854,
|
30569 |
+
"learning_rate": 6.865457779534562e-06,
|
30570 |
+
"loss": 5.5344,
|
30571 |
+
"step": 17464
|
30572 |
+
},
|
30573 |
+
{
|
30574 |
+
"epoch": 0.86,
|
30575 |
+
"grad_norm": 1.9351931810379028,
|
30576 |
+
"learning_rate": 6.855575868372943e-06,
|
30577 |
+
"loss": 5.5115,
|
30578 |
+
"step": 17468
|
30579 |
+
},
|
30580 |
+
{
|
30581 |
+
"epoch": 0.86,
|
30582 |
+
"grad_norm": 1.9376587867736816,
|
30583 |
+
"learning_rate": 6.845693957211324e-06,
|
30584 |
+
"loss": 5.4218,
|
30585 |
+
"step": 17472
|
30586 |
+
},
|
30587 |
+
{
|
30588 |
+
"epoch": 0.86,
|
30589 |
+
"grad_norm": 1.8527143001556396,
|
30590 |
+
"learning_rate": 6.835812046049707e-06,
|
30591 |
+
"loss": 5.4781,
|
30592 |
+
"step": 17476
|
30593 |
+
},
|
30594 |
+
{
|
30595 |
+
"epoch": 0.86,
|
30596 |
+
"grad_norm": 1.9370919466018677,
|
30597 |
+
"learning_rate": 6.825930134888088e-06,
|
30598 |
+
"loss": 5.6185,
|
30599 |
+
"step": 17480
|
30600 |
+
},
|
30601 |
+
{
|
30602 |
+
"epoch": 0.86,
|
30603 |
+
"grad_norm": 1.8956094980239868,
|
30604 |
+
"learning_rate": 6.816048223726469e-06,
|
30605 |
+
"loss": 5.5335,
|
30606 |
+
"step": 17484
|
30607 |
+
},
|
30608 |
+
{
|
30609 |
+
"epoch": 0.86,
|
30610 |
+
"grad_norm": 2.1358373165130615,
|
30611 |
+
"learning_rate": 6.806166312564851e-06,
|
30612 |
+
"loss": 5.5501,
|
30613 |
+
"step": 17488
|
30614 |
+
},
|
30615 |
+
{
|
30616 |
+
"epoch": 0.86,
|
30617 |
+
"grad_norm": 1.8500255346298218,
|
30618 |
+
"learning_rate": 6.796284401403232e-06,
|
30619 |
+
"loss": 5.4718,
|
30620 |
+
"step": 17492
|
30621 |
+
},
|
30622 |
+
{
|
30623 |
+
"epoch": 0.86,
|
30624 |
+
"grad_norm": 1.9620647430419922,
|
30625 |
+
"learning_rate": 6.786402490241612e-06,
|
30626 |
+
"loss": 5.4566,
|
30627 |
+
"step": 17496
|
30628 |
+
},
|
30629 |
+
{
|
30630 |
+
"epoch": 0.86,
|
30631 |
+
"grad_norm": 2.0902743339538574,
|
30632 |
+
"learning_rate": 6.776520579079993e-06,
|
30633 |
+
"loss": 5.484,
|
30634 |
+
"step": 17500
|
30635 |
+
},
|
30636 |
+
{
|
30637 |
+
"epoch": 0.86,
|
30638 |
+
"grad_norm": 2.329399824142456,
|
30639 |
+
"learning_rate": 6.766638667918376e-06,
|
30640 |
+
"loss": 5.5337,
|
30641 |
+
"step": 17504
|
30642 |
+
},
|
30643 |
+
{
|
30644 |
+
"epoch": 0.87,
|
30645 |
+
"grad_norm": 1.9751675128936768,
|
30646 |
+
"learning_rate": 6.7567567567567575e-06,
|
30647 |
+
"loss": 5.4638,
|
30648 |
+
"step": 17508
|
30649 |
+
},
|
30650 |
+
{
|
30651 |
+
"epoch": 0.87,
|
30652 |
+
"grad_norm": 2.188885450363159,
|
30653 |
+
"learning_rate": 6.746874845595139e-06,
|
30654 |
+
"loss": 5.5174,
|
30655 |
+
"step": 17512
|
30656 |
+
},
|
30657 |
+
{
|
30658 |
+
"epoch": 0.87,
|
30659 |
+
"grad_norm": 2.1230249404907227,
|
30660 |
+
"learning_rate": 6.73699293443352e-06,
|
30661 |
+
"loss": 5.4525,
|
30662 |
+
"step": 17516
|
30663 |
+
},
|
30664 |
+
{
|
30665 |
+
"epoch": 0.87,
|
30666 |
+
"grad_norm": 2.0748202800750732,
|
30667 |
+
"learning_rate": 6.727111023271901e-06,
|
30668 |
+
"loss": 5.5633,
|
30669 |
+
"step": 17520
|
30670 |
+
},
|
30671 |
+
{
|
30672 |
+
"epoch": 0.87,
|
30673 |
+
"grad_norm": 1.8766546249389648,
|
30674 |
+
"learning_rate": 6.717229112110282e-06,
|
30675 |
+
"loss": 5.5295,
|
30676 |
+
"step": 17524
|
30677 |
+
},
|
30678 |
+
{
|
30679 |
+
"epoch": 0.87,
|
30680 |
+
"grad_norm": 2.1104044914245605,
|
30681 |
+
"learning_rate": 6.707347200948664e-06,
|
30682 |
+
"loss": 5.5265,
|
30683 |
+
"step": 17528
|
30684 |
+
},
|
30685 |
+
{
|
30686 |
+
"epoch": 0.87,
|
30687 |
+
"grad_norm": 1.9020673036575317,
|
30688 |
+
"learning_rate": 6.697465289787046e-06,
|
30689 |
+
"loss": 5.4684,
|
30690 |
+
"step": 17532
|
30691 |
+
},
|
30692 |
+
{
|
30693 |
+
"epoch": 0.87,
|
30694 |
+
"grad_norm": 1.9456652402877808,
|
30695 |
+
"learning_rate": 6.687583378625427e-06,
|
30696 |
+
"loss": 5.3826,
|
30697 |
+
"step": 17536
|
30698 |
+
},
|
30699 |
+
{
|
30700 |
+
"epoch": 0.87,
|
30701 |
+
"grad_norm": 2.117117166519165,
|
30702 |
+
"learning_rate": 6.677701467463808e-06,
|
30703 |
+
"loss": 5.4525,
|
30704 |
+
"step": 17540
|
30705 |
+
},
|
30706 |
+
{
|
30707 |
+
"epoch": 0.87,
|
30708 |
+
"grad_norm": 2.0873782634735107,
|
30709 |
+
"learning_rate": 6.667819556302189e-06,
|
30710 |
+
"loss": 5.5402,
|
30711 |
+
"step": 17544
|
30712 |
+
},
|
30713 |
+
{
|
30714 |
+
"epoch": 0.87,
|
30715 |
+
"grad_norm": 2.0289838314056396,
|
30716 |
+
"learning_rate": 6.6579376451405705e-06,
|
30717 |
+
"loss": 5.5603,
|
30718 |
+
"step": 17548
|
30719 |
+
},
|
30720 |
+
{
|
30721 |
+
"epoch": 0.87,
|
30722 |
+
"grad_norm": 2.2275471687316895,
|
30723 |
+
"learning_rate": 6.648055733978952e-06,
|
30724 |
+
"loss": 5.3792,
|
30725 |
+
"step": 17552
|
30726 |
+
},
|
30727 |
+
{
|
30728 |
+
"epoch": 0.87,
|
30729 |
+
"grad_norm": 1.9133155345916748,
|
30730 |
+
"learning_rate": 6.638173822817333e-06,
|
30731 |
+
"loss": 5.4289,
|
30732 |
+
"step": 17556
|
30733 |
+
},
|
30734 |
+
{
|
30735 |
+
"epoch": 0.87,
|
30736 |
+
"grad_norm": 2.193645477294922,
|
30737 |
+
"learning_rate": 6.628291911655715e-06,
|
30738 |
+
"loss": 5.5224,
|
30739 |
+
"step": 17560
|
30740 |
+
},
|
30741 |
+
{
|
30742 |
+
"epoch": 0.87,
|
30743 |
+
"grad_norm": 2.1608972549438477,
|
30744 |
+
"learning_rate": 6.618410000494096e-06,
|
30745 |
+
"loss": 5.5412,
|
30746 |
+
"step": 17564
|
30747 |
+
},
|
30748 |
+
{
|
30749 |
+
"epoch": 0.87,
|
30750 |
+
"grad_norm": 2.141594648361206,
|
30751 |
+
"learning_rate": 6.6085280893324774e-06,
|
30752 |
+
"loss": 5.4787,
|
30753 |
+
"step": 17568
|
30754 |
+
},
|
30755 |
+
{
|
30756 |
+
"epoch": 0.87,
|
30757 |
+
"grad_norm": 1.9416935443878174,
|
30758 |
+
"learning_rate": 6.598646178170859e-06,
|
30759 |
+
"loss": 5.3674,
|
30760 |
+
"step": 17572
|
30761 |
+
},
|
30762 |
+
{
|
30763 |
+
"epoch": 0.87,
|
30764 |
+
"grad_norm": 2.110677480697632,
|
30765 |
+
"learning_rate": 6.58876426700924e-06,
|
30766 |
+
"loss": 5.4024,
|
30767 |
+
"step": 17576
|
30768 |
+
},
|
30769 |
+
{
|
30770 |
+
"epoch": 0.87,
|
30771 |
+
"grad_norm": 2.2235372066497803,
|
30772 |
+
"learning_rate": 6.578882355847621e-06,
|
30773 |
+
"loss": 5.4458,
|
30774 |
+
"step": 17580
|
30775 |
+
},
|
30776 |
+
{
|
30777 |
+
"epoch": 0.87,
|
30778 |
+
"grad_norm": 2.280282974243164,
|
30779 |
+
"learning_rate": 6.569000444686002e-06,
|
30780 |
+
"loss": 5.5981,
|
30781 |
+
"step": 17584
|
30782 |
+
},
|
30783 |
+
{
|
30784 |
+
"epoch": 0.87,
|
30785 |
+
"grad_norm": 2.1084625720977783,
|
30786 |
+
"learning_rate": 6.559118533524384e-06,
|
30787 |
+
"loss": 5.4475,
|
30788 |
+
"step": 17588
|
30789 |
+
},
|
30790 |
+
{
|
30791 |
+
"epoch": 0.87,
|
30792 |
+
"grad_norm": 2.004232406616211,
|
30793 |
+
"learning_rate": 6.5492366223627656e-06,
|
30794 |
+
"loss": 5.4314,
|
30795 |
+
"step": 17592
|
30796 |
+
},
|
30797 |
+
{
|
30798 |
+
"epoch": 0.87,
|
30799 |
+
"grad_norm": 1.9286199808120728,
|
30800 |
+
"learning_rate": 6.539354711201147e-06,
|
30801 |
+
"loss": 5.3943,
|
30802 |
+
"step": 17596
|
30803 |
+
},
|
30804 |
+
{
|
30805 |
+
"epoch": 0.87,
|
30806 |
+
"grad_norm": 1.9742239713668823,
|
30807 |
+
"learning_rate": 6.529472800039528e-06,
|
30808 |
+
"loss": 5.4633,
|
30809 |
+
"step": 17600
|
30810 |
+
},
|
30811 |
+
{
|
30812 |
+
"epoch": 0.87,
|
30813 |
+
"grad_norm": 2.1503305435180664,
|
30814 |
+
"learning_rate": 6.519590888877909e-06,
|
30815 |
+
"loss": 5.4654,
|
30816 |
+
"step": 17604
|
30817 |
+
},
|
30818 |
+
{
|
30819 |
+
"epoch": 0.87,
|
30820 |
+
"grad_norm": 1.996319055557251,
|
30821 |
+
"learning_rate": 6.50970897771629e-06,
|
30822 |
+
"loss": 5.4479,
|
30823 |
+
"step": 17608
|
30824 |
+
},
|
30825 |
+
{
|
30826 |
+
"epoch": 0.87,
|
30827 |
+
"grad_norm": 2.1689870357513428,
|
30828 |
+
"learning_rate": 6.499827066554672e-06,
|
30829 |
+
"loss": 5.4242,
|
30830 |
+
"step": 17612
|
30831 |
+
},
|
30832 |
+
{
|
30833 |
+
"epoch": 0.87,
|
30834 |
+
"grad_norm": 2.0061464309692383,
|
30835 |
+
"learning_rate": 6.489945155393053e-06,
|
30836 |
+
"loss": 5.3833,
|
30837 |
+
"step": 17616
|
30838 |
+
},
|
30839 |
+
{
|
30840 |
+
"epoch": 0.87,
|
30841 |
+
"grad_norm": 2.1201388835906982,
|
30842 |
+
"learning_rate": 6.480063244231435e-06,
|
30843 |
+
"loss": 5.4374,
|
30844 |
+
"step": 17620
|
30845 |
+
},
|
30846 |
+
{
|
30847 |
+
"epoch": 0.87,
|
30848 |
+
"grad_norm": 2.196545124053955,
|
30849 |
+
"learning_rate": 6.470181333069816e-06,
|
30850 |
+
"loss": 5.3564,
|
30851 |
+
"step": 17624
|
30852 |
+
},
|
30853 |
+
{
|
30854 |
+
"epoch": 0.87,
|
30855 |
+
"grad_norm": 2.073232412338257,
|
30856 |
+
"learning_rate": 6.460299421908197e-06,
|
30857 |
+
"loss": 5.4057,
|
30858 |
+
"step": 17628
|
30859 |
+
},
|
30860 |
+
{
|
30861 |
+
"epoch": 0.87,
|
30862 |
+
"grad_norm": 1.9354524612426758,
|
30863 |
+
"learning_rate": 6.4504175107465785e-06,
|
30864 |
+
"loss": 5.4718,
|
30865 |
+
"step": 17632
|
30866 |
+
},
|
30867 |
+
{
|
30868 |
+
"epoch": 0.87,
|
30869 |
+
"grad_norm": 2.032994508743286,
|
30870 |
+
"learning_rate": 6.44053559958496e-06,
|
30871 |
+
"loss": 5.358,
|
30872 |
+
"step": 17636
|
30873 |
+
},
|
30874 |
+
{
|
30875 |
+
"epoch": 0.87,
|
30876 |
+
"grad_norm": 2.130598545074463,
|
30877 |
+
"learning_rate": 6.430653688423341e-06,
|
30878 |
+
"loss": 5.4658,
|
30879 |
+
"step": 17640
|
30880 |
+
},
|
30881 |
+
{
|
30882 |
+
"epoch": 0.87,
|
30883 |
+
"grad_norm": 1.8692468404769897,
|
30884 |
+
"learning_rate": 6.420771777261722e-06,
|
30885 |
+
"loss": 5.4004,
|
30886 |
+
"step": 17644
|
30887 |
+
},
|
30888 |
+
{
|
30889 |
+
"epoch": 0.87,
|
30890 |
+
"grad_norm": 1.9368531703948975,
|
30891 |
+
"learning_rate": 6.410889866100104e-06,
|
30892 |
+
"loss": 5.4809,
|
30893 |
+
"step": 17648
|
30894 |
+
},
|
30895 |
+
{
|
30896 |
+
"epoch": 0.87,
|
30897 |
+
"grad_norm": 2.235506534576416,
|
30898 |
+
"learning_rate": 6.4010079549384855e-06,
|
30899 |
+
"loss": 5.4532,
|
30900 |
+
"step": 17652
|
30901 |
+
},
|
30902 |
+
{
|
30903 |
+
"epoch": 0.87,
|
30904 |
+
"grad_norm": 1.9980324506759644,
|
30905 |
+
"learning_rate": 6.391126043776867e-06,
|
30906 |
+
"loss": 5.5156,
|
30907 |
+
"step": 17656
|
30908 |
+
},
|
30909 |
+
{
|
30910 |
+
"epoch": 0.87,
|
30911 |
+
"grad_norm": 1.947649598121643,
|
30912 |
+
"learning_rate": 6.381244132615248e-06,
|
30913 |
+
"loss": 5.3676,
|
30914 |
+
"step": 17660
|
30915 |
+
},
|
30916 |
+
{
|
30917 |
+
"epoch": 0.87,
|
30918 |
+
"grad_norm": 2.365041971206665,
|
30919 |
+
"learning_rate": 6.371362221453629e-06,
|
30920 |
+
"loss": 5.4387,
|
30921 |
+
"step": 17664
|
30922 |
+
},
|
30923 |
+
{
|
30924 |
+
"epoch": 0.87,
|
30925 |
+
"grad_norm": 2.2820627689361572,
|
30926 |
+
"learning_rate": 6.36148031029201e-06,
|
30927 |
+
"loss": 5.4541,
|
30928 |
+
"step": 17668
|
30929 |
+
},
|
30930 |
+
{
|
30931 |
+
"epoch": 0.87,
|
30932 |
+
"grad_norm": 2.02691650390625,
|
30933 |
+
"learning_rate": 6.3515983991303915e-06,
|
30934 |
+
"loss": 5.5084,
|
30935 |
+
"step": 17672
|
30936 |
+
},
|
30937 |
+
{
|
30938 |
+
"epoch": 0.87,
|
30939 |
+
"grad_norm": 2.0064783096313477,
|
30940 |
+
"learning_rate": 6.3417164879687744e-06,
|
30941 |
+
"loss": 5.5205,
|
30942 |
+
"step": 17676
|
30943 |
+
},
|
30944 |
+
{
|
30945 |
+
"epoch": 0.87,
|
30946 |
+
"grad_norm": 1.9961150884628296,
|
30947 |
+
"learning_rate": 6.331834576807155e-06,
|
30948 |
+
"loss": 5.3904,
|
30949 |
+
"step": 17680
|
30950 |
+
},
|
30951 |
+
{
|
30952 |
+
"epoch": 0.87,
|
30953 |
+
"grad_norm": 2.2273404598236084,
|
30954 |
+
"learning_rate": 6.321952665645536e-06,
|
30955 |
+
"loss": 5.4756,
|
30956 |
+
"step": 17684
|
30957 |
+
},
|
30958 |
+
{
|
30959 |
+
"epoch": 0.87,
|
30960 |
+
"grad_norm": 2.078472852706909,
|
30961 |
+
"learning_rate": 6.312070754483917e-06,
|
30962 |
+
"loss": 5.5535,
|
30963 |
+
"step": 17688
|
30964 |
+
},
|
30965 |
+
{
|
30966 |
+
"epoch": 0.87,
|
30967 |
+
"grad_norm": 1.9248629808425903,
|
30968 |
+
"learning_rate": 6.3021888433222984e-06,
|
30969 |
+
"loss": 5.4378,
|
30970 |
+
"step": 17692
|
30971 |
+
},
|
30972 |
+
{
|
30973 |
+
"epoch": 0.87,
|
30974 |
+
"grad_norm": 2.1985530853271484,
|
30975 |
+
"learning_rate": 6.29230693216068e-06,
|
30976 |
+
"loss": 5.4345,
|
30977 |
+
"step": 17696
|
30978 |
+
},
|
30979 |
+
{
|
30980 |
+
"epoch": 0.87,
|
30981 |
+
"grad_norm": 2.087536096572876,
|
30982 |
+
"learning_rate": 6.282425020999061e-06,
|
30983 |
+
"loss": 5.5135,
|
30984 |
+
"step": 17700
|
30985 |
+
},
|
30986 |
+
{
|
30987 |
+
"epoch": 0.87,
|
30988 |
+
"grad_norm": 2.0699515342712402,
|
30989 |
+
"learning_rate": 6.272543109837444e-06,
|
30990 |
+
"loss": 5.3821,
|
30991 |
+
"step": 17704
|
30992 |
+
},
|
30993 |
+
{
|
30994 |
+
"epoch": 0.87,
|
30995 |
+
"grad_norm": 2.309680223464966,
|
30996 |
+
"learning_rate": 6.262661198675825e-06,
|
30997 |
+
"loss": 5.3426,
|
30998 |
+
"step": 17708
|
30999 |
+
},
|
31000 |
+
{
|
31001 |
+
"epoch": 0.88,
|
31002 |
+
"grad_norm": 1.9877557754516602,
|
31003 |
+
"learning_rate": 6.252779287514206e-06,
|
31004 |
+
"loss": 5.554,
|
31005 |
+
"step": 17712
|
31006 |
+
},
|
31007 |
+
{
|
31008 |
+
"epoch": 0.88,
|
31009 |
+
"grad_norm": 2.1621484756469727,
|
31010 |
+
"learning_rate": 6.2428973763525866e-06,
|
31011 |
+
"loss": 5.4897,
|
31012 |
+
"step": 17716
|
31013 |
+
},
|
31014 |
+
{
|
31015 |
+
"epoch": 0.88,
|
31016 |
+
"grad_norm": 2.0568161010742188,
|
31017 |
+
"learning_rate": 6.233015465190968e-06,
|
31018 |
+
"loss": 5.4726,
|
31019 |
+
"step": 17720
|
31020 |
+
},
|
31021 |
+
{
|
31022 |
+
"epoch": 0.88,
|
31023 |
+
"grad_norm": 1.8659361600875854,
|
31024 |
+
"learning_rate": 6.22313355402935e-06,
|
31025 |
+
"loss": 5.461,
|
31026 |
+
"step": 17724
|
31027 |
+
},
|
31028 |
+
{
|
31029 |
+
"epoch": 0.88,
|
31030 |
+
"grad_norm": 1.8461517095565796,
|
31031 |
+
"learning_rate": 6.213251642867731e-06,
|
31032 |
+
"loss": 5.3407,
|
31033 |
+
"step": 17728
|
31034 |
+
},
|
31035 |
+
{
|
31036 |
+
"epoch": 0.88,
|
31037 |
+
"grad_norm": 2.2194485664367676,
|
31038 |
+
"learning_rate": 6.203369731706112e-06,
|
31039 |
+
"loss": 5.3863,
|
31040 |
+
"step": 17732
|
31041 |
+
},
|
31042 |
+
{
|
31043 |
+
"epoch": 0.88,
|
31044 |
+
"grad_norm": 2.2594525814056396,
|
31045 |
+
"learning_rate": 6.1934878205444935e-06,
|
31046 |
+
"loss": 5.4737,
|
31047 |
+
"step": 17736
|
31048 |
+
},
|
31049 |
+
{
|
31050 |
+
"epoch": 0.88,
|
31051 |
+
"grad_norm": 2.067777156829834,
|
31052 |
+
"learning_rate": 6.1836059093828755e-06,
|
31053 |
+
"loss": 5.4817,
|
31054 |
+
"step": 17740
|
31055 |
+
},
|
31056 |
+
{
|
31057 |
+
"epoch": 0.88,
|
31058 |
+
"grad_norm": 1.9696800708770752,
|
31059 |
+
"learning_rate": 6.173723998221257e-06,
|
31060 |
+
"loss": 5.5215,
|
31061 |
+
"step": 17744
|
31062 |
+
},
|
31063 |
+
{
|
31064 |
+
"epoch": 0.88,
|
31065 |
+
"grad_norm": 1.9700802564620972,
|
31066 |
+
"learning_rate": 6.163842087059637e-06,
|
31067 |
+
"loss": 5.5821,
|
31068 |
+
"step": 17748
|
31069 |
+
},
|
31070 |
+
{
|
31071 |
+
"epoch": 0.88,
|
31072 |
+
"grad_norm": 2.2519845962524414,
|
31073 |
+
"learning_rate": 6.153960175898018e-06,
|
31074 |
+
"loss": 5.5501,
|
31075 |
+
"step": 17752
|
31076 |
+
},
|
31077 |
+
{
|
31078 |
+
"epoch": 0.88,
|
31079 |
+
"grad_norm": 2.1531550884246826,
|
31080 |
+
"learning_rate": 6.1440782647364e-06,
|
31081 |
+
"loss": 5.354,
|
31082 |
+
"step": 17756
|
31083 |
+
},
|
31084 |
+
{
|
31085 |
+
"epoch": 0.88,
|
31086 |
+
"grad_norm": 2.299639940261841,
|
31087 |
+
"learning_rate": 6.134196353574782e-06,
|
31088 |
+
"loss": 5.5487,
|
31089 |
+
"step": 17760
|
31090 |
+
},
|
31091 |
+
{
|
31092 |
+
"epoch": 0.88,
|
31093 |
+
"grad_norm": 1.9032407999038696,
|
31094 |
+
"learning_rate": 6.124314442413163e-06,
|
31095 |
+
"loss": 5.4551,
|
31096 |
+
"step": 17764
|
31097 |
+
},
|
31098 |
+
{
|
31099 |
+
"epoch": 0.88,
|
31100 |
+
"grad_norm": 2.121720552444458,
|
31101 |
+
"learning_rate": 6.114432531251545e-06,
|
31102 |
+
"loss": 5.4185,
|
31103 |
+
"step": 17768
|
31104 |
+
},
|
31105 |
+
{
|
31106 |
+
"epoch": 0.88,
|
31107 |
+
"grad_norm": 1.955588698387146,
|
31108 |
+
"learning_rate": 6.104550620089926e-06,
|
31109 |
+
"loss": 5.5947,
|
31110 |
+
"step": 17772
|
31111 |
+
},
|
31112 |
+
{
|
31113 |
+
"epoch": 0.88,
|
31114 |
+
"grad_norm": 1.9518580436706543,
|
31115 |
+
"learning_rate": 6.094668708928307e-06,
|
31116 |
+
"loss": 5.4159,
|
31117 |
+
"step": 17776
|
31118 |
+
},
|
31119 |
+
{
|
31120 |
+
"epoch": 0.88,
|
31121 |
+
"grad_norm": 2.2284739017486572,
|
31122 |
+
"learning_rate": 6.084786797766688e-06,
|
31123 |
+
"loss": 5.4806,
|
31124 |
+
"step": 17780
|
31125 |
+
},
|
31126 |
+
{
|
31127 |
+
"epoch": 0.88,
|
31128 |
+
"grad_norm": 1.9473198652267456,
|
31129 |
+
"learning_rate": 6.07490488660507e-06,
|
31130 |
+
"loss": 5.3457,
|
31131 |
+
"step": 17784
|
31132 |
+
},
|
31133 |
+
{
|
31134 |
+
"epoch": 0.88,
|
31135 |
+
"grad_norm": 2.25762939453125,
|
31136 |
+
"learning_rate": 6.065022975443451e-06,
|
31137 |
+
"loss": 5.3905,
|
31138 |
+
"step": 17788
|
31139 |
+
},
|
31140 |
+
{
|
31141 |
+
"epoch": 0.88,
|
31142 |
+
"grad_norm": 2.13055682182312,
|
31143 |
+
"learning_rate": 6.055141064281832e-06,
|
31144 |
+
"loss": 5.4596,
|
31145 |
+
"step": 17792
|
31146 |
+
},
|
31147 |
+
{
|
31148 |
+
"epoch": 0.88,
|
31149 |
+
"grad_norm": 2.078608751296997,
|
31150 |
+
"learning_rate": 6.045259153120214e-06,
|
31151 |
+
"loss": 5.4836,
|
31152 |
+
"step": 17796
|
31153 |
+
},
|
31154 |
+
{
|
31155 |
+
"epoch": 0.88,
|
31156 |
+
"grad_norm": 2.032860040664673,
|
31157 |
+
"learning_rate": 6.0353772419585954e-06,
|
31158 |
+
"loss": 5.3315,
|
31159 |
+
"step": 17800
|
31160 |
+
},
|
31161 |
+
{
|
31162 |
+
"epoch": 0.88,
|
31163 |
+
"grad_norm": 2.18186616897583,
|
31164 |
+
"learning_rate": 6.025495330796977e-06,
|
31165 |
+
"loss": 5.5165,
|
31166 |
+
"step": 17804
|
31167 |
+
},
|
31168 |
+
{
|
31169 |
+
"epoch": 0.88,
|
31170 |
+
"grad_norm": 1.8949894905090332,
|
31171 |
+
"learning_rate": 6.015613419635358e-06,
|
31172 |
+
"loss": 5.4296,
|
31173 |
+
"step": 17808
|
31174 |
+
},
|
31175 |
+
{
|
31176 |
+
"epoch": 0.88,
|
31177 |
+
"grad_norm": 1.9019147157669067,
|
31178 |
+
"learning_rate": 6.005731508473739e-06,
|
31179 |
+
"loss": 5.3742,
|
31180 |
+
"step": 17812
|
31181 |
+
},
|
31182 |
+
{
|
31183 |
+
"epoch": 0.88,
|
31184 |
+
"grad_norm": 1.9749938249588013,
|
31185 |
+
"learning_rate": 5.99584959731212e-06,
|
31186 |
+
"loss": 5.4702,
|
31187 |
+
"step": 17816
|
31188 |
+
},
|
31189 |
+
{
|
31190 |
+
"epoch": 0.88,
|
31191 |
+
"grad_norm": 1.9528026580810547,
|
31192 |
+
"learning_rate": 5.9859676861505015e-06,
|
31193 |
+
"loss": 5.4179,
|
31194 |
+
"step": 17820
|
31195 |
+
},
|
31196 |
+
{
|
31197 |
+
"epoch": 0.88,
|
31198 |
+
"grad_norm": 2.04555082321167,
|
31199 |
+
"learning_rate": 5.976085774988883e-06,
|
31200 |
+
"loss": 5.4807,
|
31201 |
+
"step": 17824
|
31202 |
+
},
|
31203 |
+
{
|
31204 |
+
"epoch": 0.88,
|
31205 |
+
"grad_norm": 2.2078750133514404,
|
31206 |
+
"learning_rate": 5.966203863827265e-06,
|
31207 |
+
"loss": 5.4206,
|
31208 |
+
"step": 17828
|
31209 |
+
},
|
31210 |
+
{
|
31211 |
+
"epoch": 0.88,
|
31212 |
+
"grad_norm": 2.1232731342315674,
|
31213 |
+
"learning_rate": 5.956321952665646e-06,
|
31214 |
+
"loss": 5.5452,
|
31215 |
+
"step": 17832
|
31216 |
+
},
|
31217 |
+
{
|
31218 |
+
"epoch": 0.88,
|
31219 |
+
"grad_norm": 1.724265217781067,
|
31220 |
+
"learning_rate": 5.946440041504027e-06,
|
31221 |
+
"loss": 5.3831,
|
31222 |
+
"step": 17836
|
31223 |
+
},
|
31224 |
+
{
|
31225 |
+
"epoch": 0.88,
|
31226 |
+
"grad_norm": 2.0802602767944336,
|
31227 |
+
"learning_rate": 5.936558130342408e-06,
|
31228 |
+
"loss": 5.3925,
|
31229 |
+
"step": 17840
|
31230 |
+
},
|
31231 |
+
{
|
31232 |
+
"epoch": 0.88,
|
31233 |
+
"grad_norm": 1.913464069366455,
|
31234 |
+
"learning_rate": 5.92667621918079e-06,
|
31235 |
+
"loss": 5.4786,
|
31236 |
+
"step": 17844
|
31237 |
+
},
|
31238 |
+
{
|
31239 |
+
"epoch": 0.88,
|
31240 |
+
"grad_norm": 1.9357552528381348,
|
31241 |
+
"learning_rate": 5.916794308019171e-06,
|
31242 |
+
"loss": 5.4415,
|
31243 |
+
"step": 17848
|
31244 |
+
},
|
31245 |
+
{
|
31246 |
+
"epoch": 0.88,
|
31247 |
+
"grad_norm": 1.9869678020477295,
|
31248 |
+
"learning_rate": 5.906912396857552e-06,
|
31249 |
+
"loss": 5.4731,
|
31250 |
+
"step": 17852
|
31251 |
+
},
|
31252 |
+
{
|
31253 |
+
"epoch": 0.88,
|
31254 |
+
"grad_norm": 2.1964402198791504,
|
31255 |
+
"learning_rate": 5.897030485695934e-06,
|
31256 |
+
"loss": 5.4538,
|
31257 |
+
"step": 17856
|
31258 |
+
},
|
31259 |
+
{
|
31260 |
+
"epoch": 0.88,
|
31261 |
+
"grad_norm": 2.2341887950897217,
|
31262 |
+
"learning_rate": 5.887148574534315e-06,
|
31263 |
+
"loss": 5.4484,
|
31264 |
+
"step": 17860
|
31265 |
+
},
|
31266 |
+
{
|
31267 |
+
"epoch": 0.88,
|
31268 |
+
"grad_norm": 2.2685790061950684,
|
31269 |
+
"learning_rate": 5.8772666633726965e-06,
|
31270 |
+
"loss": 5.4935,
|
31271 |
+
"step": 17864
|
31272 |
+
},
|
31273 |
+
{
|
31274 |
+
"epoch": 0.88,
|
31275 |
+
"grad_norm": 1.9977366924285889,
|
31276 |
+
"learning_rate": 5.867384752211079e-06,
|
31277 |
+
"loss": 5.4866,
|
31278 |
+
"step": 17868
|
31279 |
+
},
|
31280 |
+
{
|
31281 |
+
"epoch": 0.88,
|
31282 |
+
"grad_norm": 1.8025336265563965,
|
31283 |
+
"learning_rate": 5.857502841049459e-06,
|
31284 |
+
"loss": 5.5315,
|
31285 |
+
"step": 17872
|
31286 |
+
},
|
31287 |
+
{
|
31288 |
+
"epoch": 0.88,
|
31289 |
+
"grad_norm": 1.8028703927993774,
|
31290 |
+
"learning_rate": 5.84762092988784e-06,
|
31291 |
+
"loss": 5.4048,
|
31292 |
+
"step": 17876
|
31293 |
+
},
|
31294 |
+
{
|
31295 |
+
"epoch": 0.88,
|
31296 |
+
"grad_norm": 2.0206375122070312,
|
31297 |
+
"learning_rate": 5.837739018726221e-06,
|
31298 |
+
"loss": 5.5057,
|
31299 |
+
"step": 17880
|
31300 |
+
},
|
31301 |
+
{
|
31302 |
+
"epoch": 0.88,
|
31303 |
+
"grad_norm": 2.2146549224853516,
|
31304 |
+
"learning_rate": 5.8278571075646034e-06,
|
31305 |
+
"loss": 5.4375,
|
31306 |
+
"step": 17884
|
31307 |
+
},
|
31308 |
+
{
|
31309 |
+
"epoch": 0.88,
|
31310 |
+
"grad_norm": 2.1485488414764404,
|
31311 |
+
"learning_rate": 5.817975196402985e-06,
|
31312 |
+
"loss": 5.4375,
|
31313 |
+
"step": 17888
|
31314 |
+
},
|
31315 |
+
{
|
31316 |
+
"epoch": 0.88,
|
31317 |
+
"grad_norm": 1.9976389408111572,
|
31318 |
+
"learning_rate": 5.808093285241366e-06,
|
31319 |
+
"loss": 5.5645,
|
31320 |
+
"step": 17892
|
31321 |
+
},
|
31322 |
+
{
|
31323 |
+
"epoch": 0.88,
|
31324 |
+
"grad_norm": 2.1577677726745605,
|
31325 |
+
"learning_rate": 5.798211374079747e-06,
|
31326 |
+
"loss": 5.4917,
|
31327 |
+
"step": 17896
|
31328 |
+
},
|
31329 |
+
{
|
31330 |
+
"epoch": 0.88,
|
31331 |
+
"grad_norm": 2.085784673690796,
|
31332 |
+
"learning_rate": 5.788329462918129e-06,
|
31333 |
+
"loss": 5.5115,
|
31334 |
+
"step": 17900
|
31335 |
+
},
|
31336 |
+
{
|
31337 |
+
"epoch": 0.88,
|
31338 |
+
"grad_norm": 2.250061511993408,
|
31339 |
+
"learning_rate": 5.77844755175651e-06,
|
31340 |
+
"loss": 5.4932,
|
31341 |
+
"step": 17904
|
31342 |
+
},
|
31343 |
+
{
|
31344 |
+
"epoch": 0.88,
|
31345 |
+
"grad_norm": 2.076542854309082,
|
31346 |
+
"learning_rate": 5.768565640594891e-06,
|
31347 |
+
"loss": 5.3858,
|
31348 |
+
"step": 17908
|
31349 |
+
},
|
31350 |
+
{
|
31351 |
+
"epoch": 0.89,
|
31352 |
+
"grad_norm": 2.0458431243896484,
|
31353 |
+
"learning_rate": 5.761154207223677e-06,
|
31354 |
+
"loss": 5.5596,
|
31355 |
+
"step": 17912
|
31356 |
+
},
|
31357 |
+
{
|
31358 |
+
"epoch": 0.89,
|
31359 |
+
"grad_norm": 2.075693130493164,
|
31360 |
+
"learning_rate": 5.7512722960620585e-06,
|
31361 |
+
"loss": 5.4962,
|
31362 |
+
"step": 17916
|
31363 |
+
},
|
31364 |
+
{
|
31365 |
+
"epoch": 0.89,
|
31366 |
+
"grad_norm": 2.09002685546875,
|
31367 |
+
"learning_rate": 5.74139038490044e-06,
|
31368 |
+
"loss": 5.3926,
|
31369 |
+
"step": 17920
|
31370 |
+
},
|
31371 |
+
{
|
31372 |
+
"epoch": 0.89,
|
31373 |
+
"grad_norm": 2.0116419792175293,
|
31374 |
+
"learning_rate": 5.731508473738821e-06,
|
31375 |
+
"loss": 5.4649,
|
31376 |
+
"step": 17924
|
31377 |
+
},
|
31378 |
+
{
|
31379 |
+
"epoch": 0.89,
|
31380 |
+
"grad_norm": 2.0170979499816895,
|
31381 |
+
"learning_rate": 5.721626562577203e-06,
|
31382 |
+
"loss": 5.4906,
|
31383 |
+
"step": 17928
|
31384 |
+
},
|
31385 |
+
{
|
31386 |
+
"epoch": 0.89,
|
31387 |
+
"grad_norm": 2.033344268798828,
|
31388 |
+
"learning_rate": 5.711744651415584e-06,
|
31389 |
+
"loss": 5.5144,
|
31390 |
+
"step": 17932
|
31391 |
+
},
|
31392 |
+
{
|
31393 |
+
"epoch": 0.89,
|
31394 |
+
"grad_norm": 1.9842782020568848,
|
31395 |
+
"learning_rate": 5.701862740253965e-06,
|
31396 |
+
"loss": 5.4478,
|
31397 |
+
"step": 17936
|
31398 |
+
},
|
31399 |
+
{
|
31400 |
+
"epoch": 0.89,
|
31401 |
+
"grad_norm": 2.059737205505371,
|
31402 |
+
"learning_rate": 5.691980829092347e-06,
|
31403 |
+
"loss": 5.4969,
|
31404 |
+
"step": 17940
|
31405 |
+
},
|
31406 |
+
{
|
31407 |
+
"epoch": 0.89,
|
31408 |
+
"grad_norm": 2.116508960723877,
|
31409 |
+
"learning_rate": 5.682098917930728e-06,
|
31410 |
+
"loss": 5.5734,
|
31411 |
+
"step": 17944
|
31412 |
+
},
|
31413 |
+
{
|
31414 |
+
"epoch": 0.89,
|
31415 |
+
"grad_norm": 2.062220573425293,
|
31416 |
+
"learning_rate": 5.672217006769109e-06,
|
31417 |
+
"loss": 5.3472,
|
31418 |
+
"step": 17948
|
31419 |
+
},
|
31420 |
+
{
|
31421 |
+
"epoch": 0.89,
|
31422 |
+
"grad_norm": 2.176339626312256,
|
31423 |
+
"learning_rate": 5.66233509560749e-06,
|
31424 |
+
"loss": 5.3356,
|
31425 |
+
"step": 17952
|
31426 |
+
},
|
31427 |
+
{
|
31428 |
+
"epoch": 0.89,
|
31429 |
+
"grad_norm": 2.2417047023773193,
|
31430 |
+
"learning_rate": 5.652453184445872e-06,
|
31431 |
+
"loss": 5.429,
|
31432 |
+
"step": 17956
|
31433 |
+
},
|
31434 |
+
{
|
31435 |
+
"epoch": 0.89,
|
31436 |
+
"grad_norm": 1.9376815557479858,
|
31437 |
+
"learning_rate": 5.6425712732842535e-06,
|
31438 |
+
"loss": 5.4263,
|
31439 |
+
"step": 17960
|
31440 |
+
},
|
31441 |
+
{
|
31442 |
+
"epoch": 0.89,
|
31443 |
+
"grad_norm": 1.9888767004013062,
|
31444 |
+
"learning_rate": 5.632689362122635e-06,
|
31445 |
+
"loss": 5.5286,
|
31446 |
+
"step": 17964
|
31447 |
+
},
|
31448 |
+
{
|
31449 |
+
"epoch": 0.89,
|
31450 |
+
"grad_norm": 2.1054704189300537,
|
31451 |
+
"learning_rate": 5.622807450961016e-06,
|
31452 |
+
"loss": 5.4473,
|
31453 |
+
"step": 17968
|
31454 |
+
},
|
31455 |
+
{
|
31456 |
+
"epoch": 0.89,
|
31457 |
+
"grad_norm": 2.3070156574249268,
|
31458 |
+
"learning_rate": 5.612925539799398e-06,
|
31459 |
+
"loss": 5.6394,
|
31460 |
+
"step": 17972
|
31461 |
+
},
|
31462 |
+
{
|
31463 |
+
"epoch": 0.89,
|
31464 |
+
"grad_norm": 1.956694483757019,
|
31465 |
+
"learning_rate": 5.603043628637779e-06,
|
31466 |
+
"loss": 5.3773,
|
31467 |
+
"step": 17976
|
31468 |
+
},
|
31469 |
+
{
|
31470 |
+
"epoch": 0.89,
|
31471 |
+
"grad_norm": 2.1512622833251953,
|
31472 |
+
"learning_rate": 5.59316171747616e-06,
|
31473 |
+
"loss": 5.5051,
|
31474 |
+
"step": 17980
|
31475 |
+
},
|
31476 |
+
{
|
31477 |
+
"epoch": 0.89,
|
31478 |
+
"grad_norm": 1.9715803861618042,
|
31479 |
+
"learning_rate": 5.583279806314542e-06,
|
31480 |
+
"loss": 5.4564,
|
31481 |
+
"step": 17984
|
31482 |
+
},
|
31483 |
+
{
|
31484 |
+
"epoch": 0.89,
|
31485 |
+
"grad_norm": 2.07094144821167,
|
31486 |
+
"learning_rate": 5.573397895152923e-06,
|
31487 |
+
"loss": 5.486,
|
31488 |
+
"step": 17988
|
31489 |
+
},
|
31490 |
+
{
|
31491 |
+
"epoch": 0.89,
|
31492 |
+
"grad_norm": 2.0776047706604004,
|
31493 |
+
"learning_rate": 5.563515983991304e-06,
|
31494 |
+
"loss": 5.4101,
|
31495 |
+
"step": 17992
|
31496 |
+
},
|
31497 |
+
{
|
31498 |
+
"epoch": 0.89,
|
31499 |
+
"grad_norm": 2.0641090869903564,
|
31500 |
+
"learning_rate": 5.553634072829685e-06,
|
31501 |
+
"loss": 5.4413,
|
31502 |
+
"step": 17996
|
31503 |
+
},
|
31504 |
+
{
|
31505 |
+
"epoch": 0.89,
|
31506 |
+
"grad_norm": 2.069200038909912,
|
31507 |
+
"learning_rate": 5.543752161668067e-06,
|
31508 |
+
"loss": 5.4239,
|
31509 |
+
"step": 18000
|
31510 |
+
},
|
31511 |
+
{
|
31512 |
+
"epoch": 0.89,
|
31513 |
+
"grad_norm": 1.9378856420516968,
|
31514 |
+
"learning_rate": 5.5338702505064486e-06,
|
31515 |
+
"loss": 5.4994,
|
31516 |
+
"step": 18004
|
31517 |
+
},
|
31518 |
+
{
|
31519 |
+
"epoch": 0.89,
|
31520 |
+
"grad_norm": 1.9170506000518799,
|
31521 |
+
"learning_rate": 5.52398833934483e-06,
|
31522 |
+
"loss": 5.4589,
|
31523 |
+
"step": 18008
|
31524 |
+
},
|
31525 |
+
{
|
31526 |
+
"epoch": 0.89,
|
31527 |
+
"grad_norm": 2.1123738288879395,
|
31528 |
+
"learning_rate": 5.514106428183211e-06,
|
31529 |
+
"loss": 5.4226,
|
31530 |
+
"step": 18012
|
31531 |
+
},
|
31532 |
+
{
|
31533 |
+
"epoch": 0.89,
|
31534 |
+
"grad_norm": 1.9895274639129639,
|
31535 |
+
"learning_rate": 5.504224517021592e-06,
|
31536 |
+
"loss": 5.522,
|
31537 |
+
"step": 18016
|
31538 |
+
},
|
31539 |
+
{
|
31540 |
+
"epoch": 0.89,
|
31541 |
+
"grad_norm": 2.216384172439575,
|
31542 |
+
"learning_rate": 5.494342605859973e-06,
|
31543 |
+
"loss": 5.4323,
|
31544 |
+
"step": 18020
|
31545 |
+
},
|
31546 |
+
{
|
31547 |
+
"epoch": 0.89,
|
31548 |
+
"grad_norm": 2.1332645416259766,
|
31549 |
+
"learning_rate": 5.484460694698355e-06,
|
31550 |
+
"loss": 5.3666,
|
31551 |
+
"step": 18024
|
31552 |
+
},
|
31553 |
+
{
|
31554 |
+
"epoch": 0.89,
|
31555 |
+
"grad_norm": 2.0537400245666504,
|
31556 |
+
"learning_rate": 5.474578783536737e-06,
|
31557 |
+
"loss": 5.4988,
|
31558 |
+
"step": 18028
|
31559 |
+
},
|
31560 |
+
{
|
31561 |
+
"epoch": 0.89,
|
31562 |
+
"grad_norm": 1.9897429943084717,
|
31563 |
+
"learning_rate": 5.464696872375118e-06,
|
31564 |
+
"loss": 5.4833,
|
31565 |
+
"step": 18032
|
31566 |
+
},
|
31567 |
+
{
|
31568 |
+
"epoch": 0.89,
|
31569 |
+
"grad_norm": 2.066513776779175,
|
31570 |
+
"learning_rate": 5.454814961213499e-06,
|
31571 |
+
"loss": 5.4636,
|
31572 |
+
"step": 18036
|
31573 |
+
},
|
31574 |
+
{
|
31575 |
+
"epoch": 0.89,
|
31576 |
+
"grad_norm": 2.251376152038574,
|
31577 |
+
"learning_rate": 5.44493305005188e-06,
|
31578 |
+
"loss": 5.5205,
|
31579 |
+
"step": 18040
|
31580 |
+
},
|
31581 |
+
{
|
31582 |
+
"epoch": 0.89,
|
31583 |
+
"grad_norm": 2.1560051441192627,
|
31584 |
+
"learning_rate": 5.4350511388902615e-06,
|
31585 |
+
"loss": 5.4888,
|
31586 |
+
"step": 18044
|
31587 |
+
},
|
31588 |
+
{
|
31589 |
+
"epoch": 0.89,
|
31590 |
+
"grad_norm": 1.9921746253967285,
|
31591 |
+
"learning_rate": 5.425169227728643e-06,
|
31592 |
+
"loss": 5.503,
|
31593 |
+
"step": 18048
|
31594 |
+
},
|
31595 |
+
{
|
31596 |
+
"epoch": 0.89,
|
31597 |
+
"grad_norm": 1.9806662797927856,
|
31598 |
+
"learning_rate": 5.415287316567024e-06,
|
31599 |
+
"loss": 5.5299,
|
31600 |
+
"step": 18052
|
31601 |
+
},
|
31602 |
+
{
|
31603 |
+
"epoch": 0.89,
|
31604 |
+
"grad_norm": 2.086308479309082,
|
31605 |
+
"learning_rate": 5.405405405405406e-06,
|
31606 |
+
"loss": 5.3775,
|
31607 |
+
"step": 18056
|
31608 |
+
},
|
31609 |
+
{
|
31610 |
+
"epoch": 0.89,
|
31611 |
+
"grad_norm": 1.9652711153030396,
|
31612 |
+
"learning_rate": 5.395523494243787e-06,
|
31613 |
+
"loss": 5.5683,
|
31614 |
+
"step": 18060
|
31615 |
+
},
|
31616 |
+
{
|
31617 |
+
"epoch": 0.89,
|
31618 |
+
"grad_norm": 1.8376736640930176,
|
31619 |
+
"learning_rate": 5.3856415830821685e-06,
|
31620 |
+
"loss": 5.3905,
|
31621 |
+
"step": 18064
|
31622 |
+
},
|
31623 |
+
{
|
31624 |
+
"epoch": 0.89,
|
31625 |
+
"grad_norm": 2.1515750885009766,
|
31626 |
+
"learning_rate": 5.37575967192055e-06,
|
31627 |
+
"loss": 5.421,
|
31628 |
+
"step": 18068
|
31629 |
+
},
|
31630 |
+
{
|
31631 |
+
"epoch": 0.89,
|
31632 |
+
"grad_norm": 2.18635630607605,
|
31633 |
+
"learning_rate": 5.365877760758931e-06,
|
31634 |
+
"loss": 5.4513,
|
31635 |
+
"step": 18072
|
31636 |
+
},
|
31637 |
+
{
|
31638 |
+
"epoch": 0.89,
|
31639 |
+
"grad_norm": 2.2322137355804443,
|
31640 |
+
"learning_rate": 5.355995849597312e-06,
|
31641 |
+
"loss": 5.5696,
|
31642 |
+
"step": 18076
|
31643 |
+
},
|
31644 |
+
{
|
31645 |
+
"epoch": 0.89,
|
31646 |
+
"grad_norm": 2.1388771533966064,
|
31647 |
+
"learning_rate": 5.346113938435693e-06,
|
31648 |
+
"loss": 5.5154,
|
31649 |
+
"step": 18080
|
31650 |
+
},
|
31651 |
+
{
|
31652 |
+
"epoch": 0.89,
|
31653 |
+
"grad_norm": 2.1032564640045166,
|
31654 |
+
"learning_rate": 5.336232027274075e-06,
|
31655 |
+
"loss": 5.4737,
|
31656 |
+
"step": 18084
|
31657 |
+
},
|
31658 |
+
{
|
31659 |
+
"epoch": 0.89,
|
31660 |
+
"grad_norm": 2.2419564723968506,
|
31661 |
+
"learning_rate": 5.326350116112457e-06,
|
31662 |
+
"loss": 5.4804,
|
31663 |
+
"step": 18088
|
31664 |
+
},
|
31665 |
+
{
|
31666 |
+
"epoch": 0.89,
|
31667 |
+
"grad_norm": 2.1092734336853027,
|
31668 |
+
"learning_rate": 5.316468204950838e-06,
|
31669 |
+
"loss": 5.523,
|
31670 |
+
"step": 18092
|
31671 |
+
},
|
31672 |
+
{
|
31673 |
+
"epoch": 0.89,
|
31674 |
+
"grad_norm": 1.9642736911773682,
|
31675 |
+
"learning_rate": 5.306586293789219e-06,
|
31676 |
+
"loss": 5.4615,
|
31677 |
+
"step": 18096
|
31678 |
+
},
|
31679 |
+
{
|
31680 |
+
"epoch": 0.89,
|
31681 |
+
"grad_norm": 2.285712480545044,
|
31682 |
+
"learning_rate": 5.296704382627601e-06,
|
31683 |
+
"loss": 5.4944,
|
31684 |
+
"step": 18100
|
31685 |
+
},
|
31686 |
+
{
|
31687 |
+
"epoch": 0.89,
|
31688 |
+
"grad_norm": 1.8048274517059326,
|
31689 |
+
"learning_rate": 5.2868224714659814e-06,
|
31690 |
+
"loss": 5.3293,
|
31691 |
+
"step": 18104
|
31692 |
+
},
|
31693 |
+
{
|
31694 |
+
"epoch": 0.89,
|
31695 |
+
"grad_norm": 1.9001215696334839,
|
31696 |
+
"learning_rate": 5.276940560304363e-06,
|
31697 |
+
"loss": 5.3799,
|
31698 |
+
"step": 18108
|
31699 |
+
},
|
31700 |
+
{
|
31701 |
+
"epoch": 0.89,
|
31702 |
+
"grad_norm": 2.052248954772949,
|
31703 |
+
"learning_rate": 5.267058649142744e-06,
|
31704 |
+
"loss": 5.54,
|
31705 |
+
"step": 18112
|
31706 |
+
},
|
31707 |
+
{
|
31708 |
+
"epoch": 0.9,
|
31709 |
+
"grad_norm": 1.9618264436721802,
|
31710 |
+
"learning_rate": 5.257176737981126e-06,
|
31711 |
+
"loss": 5.5206,
|
31712 |
+
"step": 18116
|
31713 |
+
},
|
31714 |
+
{
|
31715 |
+
"epoch": 0.9,
|
31716 |
+
"grad_norm": 2.086357355117798,
|
31717 |
+
"learning_rate": 5.247294826819507e-06,
|
31718 |
+
"loss": 5.4713,
|
31719 |
+
"step": 18120
|
31720 |
+
},
|
31721 |
+
{
|
31722 |
+
"epoch": 0.9,
|
31723 |
+
"grad_norm": 1.989790678024292,
|
31724 |
+
"learning_rate": 5.237412915657888e-06,
|
31725 |
+
"loss": 5.4183,
|
31726 |
+
"step": 18124
|
31727 |
+
},
|
31728 |
+
{
|
31729 |
+
"epoch": 0.9,
|
31730 |
+
"grad_norm": 2.4142305850982666,
|
31731 |
+
"learning_rate": 5.22753100449627e-06,
|
31732 |
+
"loss": 5.4867,
|
31733 |
+
"step": 18128
|
31734 |
+
},
|
31735 |
+
{
|
31736 |
+
"epoch": 0.9,
|
31737 |
+
"grad_norm": 1.9405925273895264,
|
31738 |
+
"learning_rate": 5.217649093334652e-06,
|
31739 |
+
"loss": 5.4259,
|
31740 |
+
"step": 18132
|
31741 |
+
},
|
31742 |
+
{
|
31743 |
+
"epoch": 0.9,
|
31744 |
+
"grad_norm": 1.9569774866104126,
|
31745 |
+
"learning_rate": 5.207767182173033e-06,
|
31746 |
+
"loss": 5.4619,
|
31747 |
+
"step": 18136
|
31748 |
+
},
|
31749 |
+
{
|
31750 |
+
"epoch": 0.9,
|
31751 |
+
"grad_norm": 2.1723146438598633,
|
31752 |
+
"learning_rate": 5.197885271011413e-06,
|
31753 |
+
"loss": 5.4719,
|
31754 |
+
"step": 18140
|
31755 |
+
},
|
31756 |
+
{
|
31757 |
+
"epoch": 0.9,
|
31758 |
+
"grad_norm": 2.3420112133026123,
|
31759 |
+
"learning_rate": 5.188003359849795e-06,
|
31760 |
+
"loss": 5.4274,
|
31761 |
+
"step": 18144
|
31762 |
+
},
|
31763 |
+
{
|
31764 |
+
"epoch": 0.9,
|
31765 |
+
"grad_norm": 2.2983791828155518,
|
31766 |
+
"learning_rate": 5.1781214486881765e-06,
|
31767 |
+
"loss": 5.5184,
|
31768 |
+
"step": 18148
|
31769 |
+
},
|
31770 |
+
{
|
31771 |
+
"epoch": 0.9,
|
31772 |
+
"grad_norm": 2.061795711517334,
|
31773 |
+
"learning_rate": 5.168239537526558e-06,
|
31774 |
+
"loss": 5.4416,
|
31775 |
+
"step": 18152
|
31776 |
+
},
|
31777 |
+
{
|
31778 |
+
"epoch": 0.9,
|
31779 |
+
"grad_norm": 2.11879301071167,
|
31780 |
+
"learning_rate": 5.15835762636494e-06,
|
31781 |
+
"loss": 5.3997,
|
31782 |
+
"step": 18156
|
31783 |
+
},
|
31784 |
+
{
|
31785 |
+
"epoch": 0.9,
|
31786 |
+
"grad_norm": 2.072601556777954,
|
31787 |
+
"learning_rate": 5.148475715203321e-06,
|
31788 |
+
"loss": 5.4744,
|
31789 |
+
"step": 18160
|
31790 |
+
},
|
31791 |
+
{
|
31792 |
+
"epoch": 0.9,
|
31793 |
+
"grad_norm": 2.037374258041382,
|
31794 |
+
"learning_rate": 5.138593804041702e-06,
|
31795 |
+
"loss": 5.5549,
|
31796 |
+
"step": 18164
|
31797 |
+
},
|
31798 |
+
{
|
31799 |
+
"epoch": 0.9,
|
31800 |
+
"grad_norm": 2.1012215614318848,
|
31801 |
+
"learning_rate": 5.128711892880083e-06,
|
31802 |
+
"loss": 5.5601,
|
31803 |
+
"step": 18168
|
31804 |
+
},
|
31805 |
+
{
|
31806 |
+
"epoch": 0.9,
|
31807 |
+
"grad_norm": 1.9614689350128174,
|
31808 |
+
"learning_rate": 5.118829981718465e-06,
|
31809 |
+
"loss": 5.3775,
|
31810 |
+
"step": 18172
|
31811 |
+
},
|
31812 |
+
{
|
31813 |
+
"epoch": 0.9,
|
31814 |
+
"grad_norm": 2.3375091552734375,
|
31815 |
+
"learning_rate": 5.108948070556846e-06,
|
31816 |
+
"loss": 5.5373,
|
31817 |
+
"step": 18176
|
31818 |
+
},
|
31819 |
+
{
|
31820 |
+
"epoch": 0.9,
|
31821 |
+
"grad_norm": 1.9838519096374512,
|
31822 |
+
"learning_rate": 5.099066159395227e-06,
|
31823 |
+
"loss": 5.3491,
|
31824 |
+
"step": 18180
|
31825 |
+
},
|
31826 |
+
{
|
31827 |
+
"epoch": 0.9,
|
31828 |
+
"grad_norm": 2.0909204483032227,
|
31829 |
+
"learning_rate": 5.089184248233608e-06,
|
31830 |
+
"loss": 5.3386,
|
31831 |
+
"step": 18184
|
31832 |
+
},
|
31833 |
+
{
|
31834 |
+
"epoch": 0.9,
|
31835 |
+
"grad_norm": 2.396127223968506,
|
31836 |
+
"learning_rate": 5.07930233707199e-06,
|
31837 |
+
"loss": 5.4791,
|
31838 |
+
"step": 18188
|
31839 |
+
},
|
31840 |
+
{
|
31841 |
+
"epoch": 0.9,
|
31842 |
+
"grad_norm": 2.117344379425049,
|
31843 |
+
"learning_rate": 5.0694204259103715e-06,
|
31844 |
+
"loss": 5.3322,
|
31845 |
+
"step": 18192
|
31846 |
+
},
|
31847 |
+
{
|
31848 |
+
"epoch": 0.9,
|
31849 |
+
"grad_norm": 2.0109291076660156,
|
31850 |
+
"learning_rate": 5.059538514748753e-06,
|
31851 |
+
"loss": 5.4152,
|
31852 |
+
"step": 18196
|
31853 |
+
},
|
31854 |
+
{
|
31855 |
+
"epoch": 0.9,
|
31856 |
+
"grad_norm": 2.051154851913452,
|
31857 |
+
"learning_rate": 5.049656603587134e-06,
|
31858 |
+
"loss": 5.504,
|
31859 |
+
"step": 18200
|
31860 |
+
},
|
31861 |
+
{
|
31862 |
+
"epoch": 0.9,
|
31863 |
+
"grad_norm": 1.9939186573028564,
|
31864 |
+
"learning_rate": 5.039774692425515e-06,
|
31865 |
+
"loss": 5.377,
|
31866 |
+
"step": 18204
|
31867 |
+
},
|
31868 |
+
{
|
31869 |
+
"epoch": 0.9,
|
31870 |
+
"grad_norm": 2.0900635719299316,
|
31871 |
+
"learning_rate": 5.029892781263896e-06,
|
31872 |
+
"loss": 5.4448,
|
31873 |
+
"step": 18208
|
31874 |
+
},
|
31875 |
+
{
|
31876 |
+
"epoch": 0.9,
|
31877 |
+
"grad_norm": 1.7526922225952148,
|
31878 |
+
"learning_rate": 5.020010870102278e-06,
|
31879 |
+
"loss": 5.2948,
|
31880 |
+
"step": 18212
|
31881 |
+
},
|
31882 |
+
{
|
31883 |
+
"epoch": 0.9,
|
31884 |
+
"grad_norm": 2.082535982131958,
|
31885 |
+
"learning_rate": 5.01012895894066e-06,
|
31886 |
+
"loss": 5.4976,
|
31887 |
+
"step": 18216
|
31888 |
}
|
31889 |
],
|
31890 |
"logging_steps": 4,
|
|
|
31892 |
"num_input_tokens_seen": 0,
|
31893 |
"num_train_epochs": 1,
|
31894 |
"save_steps": 2024,
|
31895 |
+
"total_flos": 7.67303003626537e+16,
|
31896 |
"train_batch_size": 8,
|
31897 |
"trial_name": null,
|
31898 |
"trial_params": null
|