Uploaded checkpoint-25000
Browse files- model.safetensors +1 -1
- optimizer.pt +1 -1
- rng_state.pth +2 -2
- scheduler.pt +1 -1
- trainer_state.json +3511 -3
model.safetensors
CHANGED
@@ -1,3 +1,3 @@
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
-
oid sha256:
|
3 |
size 2836579040
|
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:5d15715937ea965725264e52ec82a27c5a59d3e2767e72ea004c92451c7c82a2
|
3 |
size 2836579040
|
optimizer.pt
CHANGED
@@ -1,3 +1,3 @@
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
-
oid sha256:
|
3 |
size 5673376169
|
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:bf292371fb20f79bd804dd72ad097ecec4ebbe12ed591d8a91a7c7a700c86e3b
|
3 |
size 5673376169
|
rng_state.pth
CHANGED
@@ -1,3 +1,3 @@
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
-
oid sha256:
|
3 |
-
size
|
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:46e65e950ab7222c5262501cd7b7e711ba567e3a80af84a6ae9728e309c1152a
|
3 |
+
size 14308
|
scheduler.pt
CHANGED
@@ -1,3 +1,3 @@
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
-
oid sha256:
|
3 |
size 1064
|
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:b027b2afefb0b47380a0742dc93c54ef79a5f7b66b6bd192151dcb4007d1b136
|
3 |
size 1064
|
trainer_state.json
CHANGED
@@ -1,9 +1,9 @@
|
|
1 |
{
|
2 |
"best_metric": null,
|
3 |
"best_model_checkpoint": null,
|
4 |
-
"epoch": 0.
|
5 |
"eval_steps": 5000,
|
6 |
-
"global_step":
|
7 |
"is_hyper_param_search": false,
|
8 |
"is_local_process_zero": true,
|
9 |
"is_world_process_zero": true,
|
@@ -14039,6 +14039,3514 @@
|
|
14039 |
"eval_samples_per_second": 14.62,
|
14040 |
"eval_steps_per_second": 14.62,
|
14041 |
"step": 20000
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
14042 |
}
|
14043 |
],
|
14044 |
"logging_steps": 10,
|
@@ -14046,7 +17554,7 @@
|
|
14046 |
"num_input_tokens_seen": 0,
|
14047 |
"num_train_epochs": 1,
|
14048 |
"save_steps": 5000,
|
14049 |
-
"total_flos":
|
14050 |
"train_batch_size": 1,
|
14051 |
"trial_name": null,
|
14052 |
"trial_params": null
|
|
|
1 |
{
|
2 |
"best_metric": null,
|
3 |
"best_model_checkpoint": null,
|
4 |
+
"epoch": 0.625,
|
5 |
"eval_steps": 5000,
|
6 |
+
"global_step": 25000,
|
7 |
"is_hyper_param_search": false,
|
8 |
"is_local_process_zero": true,
|
9 |
"is_world_process_zero": true,
|
|
|
14039 |
"eval_samples_per_second": 14.62,
|
14040 |
"eval_steps_per_second": 14.62,
|
14041 |
"step": 20000
|
14042 |
+
},
|
14043 |
+
{
|
14044 |
+
"epoch": 0.5,
|
14045 |
+
"grad_norm": 69.5,
|
14046 |
+
"learning_rate": 3.3864406779661014e-07,
|
14047 |
+
"loss": 1.5056,
|
14048 |
+
"step": 20010
|
14049 |
+
},
|
14050 |
+
{
|
14051 |
+
"epoch": 0.5,
|
14052 |
+
"grad_norm": 66.5,
|
14053 |
+
"learning_rate": 3.383050847457627e-07,
|
14054 |
+
"loss": 1.5009,
|
14055 |
+
"step": 20020
|
14056 |
+
},
|
14057 |
+
{
|
14058 |
+
"epoch": 0.5,
|
14059 |
+
"grad_norm": 69.5,
|
14060 |
+
"learning_rate": 3.3796610169491525e-07,
|
14061 |
+
"loss": 1.4073,
|
14062 |
+
"step": 20030
|
14063 |
+
},
|
14064 |
+
{
|
14065 |
+
"epoch": 0.5,
|
14066 |
+
"grad_norm": 63.25,
|
14067 |
+
"learning_rate": 3.376271186440678e-07,
|
14068 |
+
"loss": 1.4391,
|
14069 |
+
"step": 20040
|
14070 |
+
},
|
14071 |
+
{
|
14072 |
+
"epoch": 0.5,
|
14073 |
+
"grad_norm": 68.0,
|
14074 |
+
"learning_rate": 3.3728813559322035e-07,
|
14075 |
+
"loss": 1.4502,
|
14076 |
+
"step": 20050
|
14077 |
+
},
|
14078 |
+
{
|
14079 |
+
"epoch": 0.5,
|
14080 |
+
"grad_norm": 64.5,
|
14081 |
+
"learning_rate": 3.369491525423729e-07,
|
14082 |
+
"loss": 1.4458,
|
14083 |
+
"step": 20060
|
14084 |
+
},
|
14085 |
+
{
|
14086 |
+
"epoch": 0.5,
|
14087 |
+
"grad_norm": 68.0,
|
14088 |
+
"learning_rate": 3.366101694915254e-07,
|
14089 |
+
"loss": 1.4603,
|
14090 |
+
"step": 20070
|
14091 |
+
},
|
14092 |
+
{
|
14093 |
+
"epoch": 0.5,
|
14094 |
+
"grad_norm": 66.0,
|
14095 |
+
"learning_rate": 3.36271186440678e-07,
|
14096 |
+
"loss": 1.4461,
|
14097 |
+
"step": 20080
|
14098 |
+
},
|
14099 |
+
{
|
14100 |
+
"epoch": 0.5,
|
14101 |
+
"grad_norm": 67.0,
|
14102 |
+
"learning_rate": 3.3593220338983046e-07,
|
14103 |
+
"loss": 1.4921,
|
14104 |
+
"step": 20090
|
14105 |
+
},
|
14106 |
+
{
|
14107 |
+
"epoch": 0.5,
|
14108 |
+
"grad_norm": 64.0,
|
14109 |
+
"learning_rate": 3.35593220338983e-07,
|
14110 |
+
"loss": 1.4483,
|
14111 |
+
"step": 20100
|
14112 |
+
},
|
14113 |
+
{
|
14114 |
+
"epoch": 0.5,
|
14115 |
+
"grad_norm": 69.0,
|
14116 |
+
"learning_rate": 3.3525423728813557e-07,
|
14117 |
+
"loss": 1.4376,
|
14118 |
+
"step": 20110
|
14119 |
+
},
|
14120 |
+
{
|
14121 |
+
"epoch": 0.5,
|
14122 |
+
"grad_norm": 66.0,
|
14123 |
+
"learning_rate": 3.349152542372881e-07,
|
14124 |
+
"loss": 1.5029,
|
14125 |
+
"step": 20120
|
14126 |
+
},
|
14127 |
+
{
|
14128 |
+
"epoch": 0.5,
|
14129 |
+
"grad_norm": 67.5,
|
14130 |
+
"learning_rate": 3.345762711864407e-07,
|
14131 |
+
"loss": 1.4496,
|
14132 |
+
"step": 20130
|
14133 |
+
},
|
14134 |
+
{
|
14135 |
+
"epoch": 0.5,
|
14136 |
+
"grad_norm": 66.0,
|
14137 |
+
"learning_rate": 3.342372881355932e-07,
|
14138 |
+
"loss": 1.4555,
|
14139 |
+
"step": 20140
|
14140 |
+
},
|
14141 |
+
{
|
14142 |
+
"epoch": 0.5,
|
14143 |
+
"grad_norm": 65.0,
|
14144 |
+
"learning_rate": 3.3389830508474574e-07,
|
14145 |
+
"loss": 1.4612,
|
14146 |
+
"step": 20150
|
14147 |
+
},
|
14148 |
+
{
|
14149 |
+
"epoch": 0.5,
|
14150 |
+
"grad_norm": 64.5,
|
14151 |
+
"learning_rate": 3.335593220338983e-07,
|
14152 |
+
"loss": 1.4043,
|
14153 |
+
"step": 20160
|
14154 |
+
},
|
14155 |
+
{
|
14156 |
+
"epoch": 0.5,
|
14157 |
+
"grad_norm": 66.0,
|
14158 |
+
"learning_rate": 3.3322033898305085e-07,
|
14159 |
+
"loss": 1.4803,
|
14160 |
+
"step": 20170
|
14161 |
+
},
|
14162 |
+
{
|
14163 |
+
"epoch": 0.5,
|
14164 |
+
"grad_norm": 64.0,
|
14165 |
+
"learning_rate": 3.328813559322034e-07,
|
14166 |
+
"loss": 1.4877,
|
14167 |
+
"step": 20180
|
14168 |
+
},
|
14169 |
+
{
|
14170 |
+
"epoch": 0.5,
|
14171 |
+
"grad_norm": 63.5,
|
14172 |
+
"learning_rate": 3.325423728813559e-07,
|
14173 |
+
"loss": 1.4582,
|
14174 |
+
"step": 20190
|
14175 |
+
},
|
14176 |
+
{
|
14177 |
+
"epoch": 0.51,
|
14178 |
+
"grad_norm": 67.5,
|
14179 |
+
"learning_rate": 3.3220338983050843e-07,
|
14180 |
+
"loss": 1.4408,
|
14181 |
+
"step": 20200
|
14182 |
+
},
|
14183 |
+
{
|
14184 |
+
"epoch": 0.51,
|
14185 |
+
"grad_norm": 65.0,
|
14186 |
+
"learning_rate": 3.31864406779661e-07,
|
14187 |
+
"loss": 1.4368,
|
14188 |
+
"step": 20210
|
14189 |
+
},
|
14190 |
+
{
|
14191 |
+
"epoch": 0.51,
|
14192 |
+
"grad_norm": 66.5,
|
14193 |
+
"learning_rate": 3.3152542372881354e-07,
|
14194 |
+
"loss": 1.4513,
|
14195 |
+
"step": 20220
|
14196 |
+
},
|
14197 |
+
{
|
14198 |
+
"epoch": 0.51,
|
14199 |
+
"grad_norm": 65.0,
|
14200 |
+
"learning_rate": 3.3118644067796606e-07,
|
14201 |
+
"loss": 1.4667,
|
14202 |
+
"step": 20230
|
14203 |
+
},
|
14204 |
+
{
|
14205 |
+
"epoch": 0.51,
|
14206 |
+
"grad_norm": 70.5,
|
14207 |
+
"learning_rate": 3.3084745762711864e-07,
|
14208 |
+
"loss": 1.4526,
|
14209 |
+
"step": 20240
|
14210 |
+
},
|
14211 |
+
{
|
14212 |
+
"epoch": 0.51,
|
14213 |
+
"grad_norm": 67.5,
|
14214 |
+
"learning_rate": 3.3050847457627117e-07,
|
14215 |
+
"loss": 1.4661,
|
14216 |
+
"step": 20250
|
14217 |
+
},
|
14218 |
+
{
|
14219 |
+
"epoch": 0.51,
|
14220 |
+
"grad_norm": 67.0,
|
14221 |
+
"learning_rate": 3.3016949152542375e-07,
|
14222 |
+
"loss": 1.4909,
|
14223 |
+
"step": 20260
|
14224 |
+
},
|
14225 |
+
{
|
14226 |
+
"epoch": 0.51,
|
14227 |
+
"grad_norm": 65.5,
|
14228 |
+
"learning_rate": 3.298305084745763e-07,
|
14229 |
+
"loss": 1.4234,
|
14230 |
+
"step": 20270
|
14231 |
+
},
|
14232 |
+
{
|
14233 |
+
"epoch": 0.51,
|
14234 |
+
"grad_norm": 69.0,
|
14235 |
+
"learning_rate": 3.294915254237288e-07,
|
14236 |
+
"loss": 1.482,
|
14237 |
+
"step": 20280
|
14238 |
+
},
|
14239 |
+
{
|
14240 |
+
"epoch": 0.51,
|
14241 |
+
"grad_norm": 71.0,
|
14242 |
+
"learning_rate": 3.2915254237288134e-07,
|
14243 |
+
"loss": 1.4114,
|
14244 |
+
"step": 20290
|
14245 |
+
},
|
14246 |
+
{
|
14247 |
+
"epoch": 0.51,
|
14248 |
+
"grad_norm": 70.5,
|
14249 |
+
"learning_rate": 3.2881355932203386e-07,
|
14250 |
+
"loss": 1.4808,
|
14251 |
+
"step": 20300
|
14252 |
+
},
|
14253 |
+
{
|
14254 |
+
"epoch": 0.51,
|
14255 |
+
"grad_norm": 67.5,
|
14256 |
+
"learning_rate": 3.284745762711864e-07,
|
14257 |
+
"loss": 1.4945,
|
14258 |
+
"step": 20310
|
14259 |
+
},
|
14260 |
+
{
|
14261 |
+
"epoch": 0.51,
|
14262 |
+
"grad_norm": 66.5,
|
14263 |
+
"learning_rate": 3.2813559322033897e-07,
|
14264 |
+
"loss": 1.4487,
|
14265 |
+
"step": 20320
|
14266 |
+
},
|
14267 |
+
{
|
14268 |
+
"epoch": 0.51,
|
14269 |
+
"grad_norm": 65.0,
|
14270 |
+
"learning_rate": 3.277966101694915e-07,
|
14271 |
+
"loss": 1.4687,
|
14272 |
+
"step": 20330
|
14273 |
+
},
|
14274 |
+
{
|
14275 |
+
"epoch": 0.51,
|
14276 |
+
"grad_norm": 61.0,
|
14277 |
+
"learning_rate": 3.2745762711864403e-07,
|
14278 |
+
"loss": 1.4407,
|
14279 |
+
"step": 20340
|
14280 |
+
},
|
14281 |
+
{
|
14282 |
+
"epoch": 0.51,
|
14283 |
+
"grad_norm": 65.0,
|
14284 |
+
"learning_rate": 3.271186440677966e-07,
|
14285 |
+
"loss": 1.4604,
|
14286 |
+
"step": 20350
|
14287 |
+
},
|
14288 |
+
{
|
14289 |
+
"epoch": 0.51,
|
14290 |
+
"grad_norm": 65.0,
|
14291 |
+
"learning_rate": 3.2677966101694914e-07,
|
14292 |
+
"loss": 1.4424,
|
14293 |
+
"step": 20360
|
14294 |
+
},
|
14295 |
+
{
|
14296 |
+
"epoch": 0.51,
|
14297 |
+
"grad_norm": 67.5,
|
14298 |
+
"learning_rate": 3.264406779661017e-07,
|
14299 |
+
"loss": 1.4465,
|
14300 |
+
"step": 20370
|
14301 |
+
},
|
14302 |
+
{
|
14303 |
+
"epoch": 0.51,
|
14304 |
+
"grad_norm": 66.0,
|
14305 |
+
"learning_rate": 3.2610169491525424e-07,
|
14306 |
+
"loss": 1.4652,
|
14307 |
+
"step": 20380
|
14308 |
+
},
|
14309 |
+
{
|
14310 |
+
"epoch": 0.51,
|
14311 |
+
"grad_norm": 65.5,
|
14312 |
+
"learning_rate": 3.2576271186440677e-07,
|
14313 |
+
"loss": 1.436,
|
14314 |
+
"step": 20390
|
14315 |
+
},
|
14316 |
+
{
|
14317 |
+
"epoch": 0.51,
|
14318 |
+
"grad_norm": 67.5,
|
14319 |
+
"learning_rate": 3.254237288135593e-07,
|
14320 |
+
"loss": 1.4117,
|
14321 |
+
"step": 20400
|
14322 |
+
},
|
14323 |
+
{
|
14324 |
+
"epoch": 0.51,
|
14325 |
+
"grad_norm": 65.5,
|
14326 |
+
"learning_rate": 3.2508474576271183e-07,
|
14327 |
+
"loss": 1.4606,
|
14328 |
+
"step": 20410
|
14329 |
+
},
|
14330 |
+
{
|
14331 |
+
"epoch": 0.51,
|
14332 |
+
"grad_norm": 63.75,
|
14333 |
+
"learning_rate": 3.2474576271186435e-07,
|
14334 |
+
"loss": 1.4409,
|
14335 |
+
"step": 20420
|
14336 |
+
},
|
14337 |
+
{
|
14338 |
+
"epoch": 0.51,
|
14339 |
+
"grad_norm": 64.5,
|
14340 |
+
"learning_rate": 3.2440677966101694e-07,
|
14341 |
+
"loss": 1.4309,
|
14342 |
+
"step": 20430
|
14343 |
+
},
|
14344 |
+
{
|
14345 |
+
"epoch": 0.51,
|
14346 |
+
"grad_norm": 66.0,
|
14347 |
+
"learning_rate": 3.2406779661016946e-07,
|
14348 |
+
"loss": 1.4505,
|
14349 |
+
"step": 20440
|
14350 |
+
},
|
14351 |
+
{
|
14352 |
+
"epoch": 0.51,
|
14353 |
+
"grad_norm": 67.0,
|
14354 |
+
"learning_rate": 3.2372881355932204e-07,
|
14355 |
+
"loss": 1.4847,
|
14356 |
+
"step": 20450
|
14357 |
+
},
|
14358 |
+
{
|
14359 |
+
"epoch": 0.51,
|
14360 |
+
"grad_norm": 62.5,
|
14361 |
+
"learning_rate": 3.2338983050847457e-07,
|
14362 |
+
"loss": 1.4916,
|
14363 |
+
"step": 20460
|
14364 |
+
},
|
14365 |
+
{
|
14366 |
+
"epoch": 0.51,
|
14367 |
+
"grad_norm": 66.0,
|
14368 |
+
"learning_rate": 3.230508474576271e-07,
|
14369 |
+
"loss": 1.4501,
|
14370 |
+
"step": 20470
|
14371 |
+
},
|
14372 |
+
{
|
14373 |
+
"epoch": 0.51,
|
14374 |
+
"grad_norm": 66.5,
|
14375 |
+
"learning_rate": 3.227118644067797e-07,
|
14376 |
+
"loss": 1.4273,
|
14377 |
+
"step": 20480
|
14378 |
+
},
|
14379 |
+
{
|
14380 |
+
"epoch": 0.51,
|
14381 |
+
"grad_norm": 69.0,
|
14382 |
+
"learning_rate": 3.223728813559322e-07,
|
14383 |
+
"loss": 1.4616,
|
14384 |
+
"step": 20490
|
14385 |
+
},
|
14386 |
+
{
|
14387 |
+
"epoch": 0.51,
|
14388 |
+
"grad_norm": 67.5,
|
14389 |
+
"learning_rate": 3.220338983050847e-07,
|
14390 |
+
"loss": 1.4818,
|
14391 |
+
"step": 20500
|
14392 |
+
},
|
14393 |
+
{
|
14394 |
+
"epoch": 0.51,
|
14395 |
+
"grad_norm": 67.5,
|
14396 |
+
"learning_rate": 3.2169491525423726e-07,
|
14397 |
+
"loss": 1.468,
|
14398 |
+
"step": 20510
|
14399 |
+
},
|
14400 |
+
{
|
14401 |
+
"epoch": 0.51,
|
14402 |
+
"grad_norm": 66.0,
|
14403 |
+
"learning_rate": 3.213559322033898e-07,
|
14404 |
+
"loss": 1.5071,
|
14405 |
+
"step": 20520
|
14406 |
+
},
|
14407 |
+
{
|
14408 |
+
"epoch": 0.51,
|
14409 |
+
"grad_norm": 66.5,
|
14410 |
+
"learning_rate": 3.2101694915254237e-07,
|
14411 |
+
"loss": 1.4516,
|
14412 |
+
"step": 20530
|
14413 |
+
},
|
14414 |
+
{
|
14415 |
+
"epoch": 0.51,
|
14416 |
+
"grad_norm": 67.0,
|
14417 |
+
"learning_rate": 3.206779661016949e-07,
|
14418 |
+
"loss": 1.4563,
|
14419 |
+
"step": 20540
|
14420 |
+
},
|
14421 |
+
{
|
14422 |
+
"epoch": 0.51,
|
14423 |
+
"grad_norm": 70.5,
|
14424 |
+
"learning_rate": 3.203389830508474e-07,
|
14425 |
+
"loss": 1.4821,
|
14426 |
+
"step": 20550
|
14427 |
+
},
|
14428 |
+
{
|
14429 |
+
"epoch": 0.51,
|
14430 |
+
"grad_norm": 67.5,
|
14431 |
+
"learning_rate": 3.2e-07,
|
14432 |
+
"loss": 1.4666,
|
14433 |
+
"step": 20560
|
14434 |
+
},
|
14435 |
+
{
|
14436 |
+
"epoch": 0.51,
|
14437 |
+
"grad_norm": 65.5,
|
14438 |
+
"learning_rate": 3.1966101694915253e-07,
|
14439 |
+
"loss": 1.4145,
|
14440 |
+
"step": 20570
|
14441 |
+
},
|
14442 |
+
{
|
14443 |
+
"epoch": 0.51,
|
14444 |
+
"grad_norm": 62.75,
|
14445 |
+
"learning_rate": 3.1932203389830506e-07,
|
14446 |
+
"loss": 1.4483,
|
14447 |
+
"step": 20580
|
14448 |
+
},
|
14449 |
+
{
|
14450 |
+
"epoch": 0.51,
|
14451 |
+
"grad_norm": 67.5,
|
14452 |
+
"learning_rate": 3.1898305084745764e-07,
|
14453 |
+
"loss": 1.4599,
|
14454 |
+
"step": 20590
|
14455 |
+
},
|
14456 |
+
{
|
14457 |
+
"epoch": 0.52,
|
14458 |
+
"grad_norm": 66.0,
|
14459 |
+
"learning_rate": 3.186440677966101e-07,
|
14460 |
+
"loss": 1.4915,
|
14461 |
+
"step": 20600
|
14462 |
+
},
|
14463 |
+
{
|
14464 |
+
"epoch": 0.52,
|
14465 |
+
"grad_norm": 67.0,
|
14466 |
+
"learning_rate": 3.183050847457627e-07,
|
14467 |
+
"loss": 1.4934,
|
14468 |
+
"step": 20610
|
14469 |
+
},
|
14470 |
+
{
|
14471 |
+
"epoch": 0.52,
|
14472 |
+
"grad_norm": 70.0,
|
14473 |
+
"learning_rate": 3.179661016949152e-07,
|
14474 |
+
"loss": 1.4333,
|
14475 |
+
"step": 20620
|
14476 |
+
},
|
14477 |
+
{
|
14478 |
+
"epoch": 0.52,
|
14479 |
+
"grad_norm": 64.5,
|
14480 |
+
"learning_rate": 3.1762711864406775e-07,
|
14481 |
+
"loss": 1.5158,
|
14482 |
+
"step": 20630
|
14483 |
+
},
|
14484 |
+
{
|
14485 |
+
"epoch": 0.52,
|
14486 |
+
"grad_norm": 68.5,
|
14487 |
+
"learning_rate": 3.1728813559322033e-07,
|
14488 |
+
"loss": 1.426,
|
14489 |
+
"step": 20640
|
14490 |
+
},
|
14491 |
+
{
|
14492 |
+
"epoch": 0.52,
|
14493 |
+
"grad_norm": 69.5,
|
14494 |
+
"learning_rate": 3.1694915254237286e-07,
|
14495 |
+
"loss": 1.448,
|
14496 |
+
"step": 20650
|
14497 |
+
},
|
14498 |
+
{
|
14499 |
+
"epoch": 0.52,
|
14500 |
+
"grad_norm": 64.5,
|
14501 |
+
"learning_rate": 3.166101694915254e-07,
|
14502 |
+
"loss": 1.4542,
|
14503 |
+
"step": 20660
|
14504 |
+
},
|
14505 |
+
{
|
14506 |
+
"epoch": 0.52,
|
14507 |
+
"grad_norm": 65.5,
|
14508 |
+
"learning_rate": 3.1627118644067797e-07,
|
14509 |
+
"loss": 1.4202,
|
14510 |
+
"step": 20670
|
14511 |
+
},
|
14512 |
+
{
|
14513 |
+
"epoch": 0.52,
|
14514 |
+
"grad_norm": 67.0,
|
14515 |
+
"learning_rate": 3.159322033898305e-07,
|
14516 |
+
"loss": 1.4718,
|
14517 |
+
"step": 20680
|
14518 |
+
},
|
14519 |
+
{
|
14520 |
+
"epoch": 0.52,
|
14521 |
+
"grad_norm": 71.0,
|
14522 |
+
"learning_rate": 3.155932203389831e-07,
|
14523 |
+
"loss": 1.5071,
|
14524 |
+
"step": 20690
|
14525 |
+
},
|
14526 |
+
{
|
14527 |
+
"epoch": 0.52,
|
14528 |
+
"grad_norm": 68.0,
|
14529 |
+
"learning_rate": 3.152542372881356e-07,
|
14530 |
+
"loss": 1.4972,
|
14531 |
+
"step": 20700
|
14532 |
+
},
|
14533 |
+
{
|
14534 |
+
"epoch": 0.52,
|
14535 |
+
"grad_norm": 67.0,
|
14536 |
+
"learning_rate": 3.149152542372881e-07,
|
14537 |
+
"loss": 1.4734,
|
14538 |
+
"step": 20710
|
14539 |
+
},
|
14540 |
+
{
|
14541 |
+
"epoch": 0.52,
|
14542 |
+
"grad_norm": 67.5,
|
14543 |
+
"learning_rate": 3.1457627118644066e-07,
|
14544 |
+
"loss": 1.5047,
|
14545 |
+
"step": 20720
|
14546 |
+
},
|
14547 |
+
{
|
14548 |
+
"epoch": 0.52,
|
14549 |
+
"grad_norm": 70.0,
|
14550 |
+
"learning_rate": 3.142372881355932e-07,
|
14551 |
+
"loss": 1.5067,
|
14552 |
+
"step": 20730
|
14553 |
+
},
|
14554 |
+
{
|
14555 |
+
"epoch": 0.52,
|
14556 |
+
"grad_norm": 67.0,
|
14557 |
+
"learning_rate": 3.138983050847457e-07,
|
14558 |
+
"loss": 1.4898,
|
14559 |
+
"step": 20740
|
14560 |
+
},
|
14561 |
+
{
|
14562 |
+
"epoch": 0.52,
|
14563 |
+
"grad_norm": 65.5,
|
14564 |
+
"learning_rate": 3.135593220338983e-07,
|
14565 |
+
"loss": 1.499,
|
14566 |
+
"step": 20750
|
14567 |
+
},
|
14568 |
+
{
|
14569 |
+
"epoch": 0.52,
|
14570 |
+
"grad_norm": 74.0,
|
14571 |
+
"learning_rate": 3.132203389830508e-07,
|
14572 |
+
"loss": 1.4741,
|
14573 |
+
"step": 20760
|
14574 |
+
},
|
14575 |
+
{
|
14576 |
+
"epoch": 0.52,
|
14577 |
+
"grad_norm": 66.5,
|
14578 |
+
"learning_rate": 3.128813559322034e-07,
|
14579 |
+
"loss": 1.4807,
|
14580 |
+
"step": 20770
|
14581 |
+
},
|
14582 |
+
{
|
14583 |
+
"epoch": 0.52,
|
14584 |
+
"grad_norm": 68.0,
|
14585 |
+
"learning_rate": 3.1254237288135593e-07,
|
14586 |
+
"loss": 1.439,
|
14587 |
+
"step": 20780
|
14588 |
+
},
|
14589 |
+
{
|
14590 |
+
"epoch": 0.52,
|
14591 |
+
"grad_norm": 67.0,
|
14592 |
+
"learning_rate": 3.1220338983050846e-07,
|
14593 |
+
"loss": 1.4547,
|
14594 |
+
"step": 20790
|
14595 |
+
},
|
14596 |
+
{
|
14597 |
+
"epoch": 0.52,
|
14598 |
+
"grad_norm": 66.0,
|
14599 |
+
"learning_rate": 3.1186440677966104e-07,
|
14600 |
+
"loss": 1.5078,
|
14601 |
+
"step": 20800
|
14602 |
+
},
|
14603 |
+
{
|
14604 |
+
"epoch": 0.52,
|
14605 |
+
"grad_norm": 67.0,
|
14606 |
+
"learning_rate": 3.115254237288135e-07,
|
14607 |
+
"loss": 1.4719,
|
14608 |
+
"step": 20810
|
14609 |
+
},
|
14610 |
+
{
|
14611 |
+
"epoch": 0.52,
|
14612 |
+
"grad_norm": 65.5,
|
14613 |
+
"learning_rate": 3.1118644067796604e-07,
|
14614 |
+
"loss": 1.3959,
|
14615 |
+
"step": 20820
|
14616 |
+
},
|
14617 |
+
{
|
14618 |
+
"epoch": 0.52,
|
14619 |
+
"grad_norm": 63.0,
|
14620 |
+
"learning_rate": 3.108474576271186e-07,
|
14621 |
+
"loss": 1.467,
|
14622 |
+
"step": 20830
|
14623 |
+
},
|
14624 |
+
{
|
14625 |
+
"epoch": 0.52,
|
14626 |
+
"grad_norm": 67.5,
|
14627 |
+
"learning_rate": 3.1050847457627115e-07,
|
14628 |
+
"loss": 1.4718,
|
14629 |
+
"step": 20840
|
14630 |
+
},
|
14631 |
+
{
|
14632 |
+
"epoch": 0.52,
|
14633 |
+
"grad_norm": 69.0,
|
14634 |
+
"learning_rate": 3.1016949152542373e-07,
|
14635 |
+
"loss": 1.4672,
|
14636 |
+
"step": 20850
|
14637 |
+
},
|
14638 |
+
{
|
14639 |
+
"epoch": 0.52,
|
14640 |
+
"grad_norm": 66.5,
|
14641 |
+
"learning_rate": 3.0983050847457626e-07,
|
14642 |
+
"loss": 1.5616,
|
14643 |
+
"step": 20860
|
14644 |
+
},
|
14645 |
+
{
|
14646 |
+
"epoch": 0.52,
|
14647 |
+
"grad_norm": 69.5,
|
14648 |
+
"learning_rate": 3.094915254237288e-07,
|
14649 |
+
"loss": 1.494,
|
14650 |
+
"step": 20870
|
14651 |
+
},
|
14652 |
+
{
|
14653 |
+
"epoch": 0.52,
|
14654 |
+
"grad_norm": 67.0,
|
14655 |
+
"learning_rate": 3.0915254237288137e-07,
|
14656 |
+
"loss": 1.5186,
|
14657 |
+
"step": 20880
|
14658 |
+
},
|
14659 |
+
{
|
14660 |
+
"epoch": 0.52,
|
14661 |
+
"grad_norm": 70.5,
|
14662 |
+
"learning_rate": 3.088135593220339e-07,
|
14663 |
+
"loss": 1.488,
|
14664 |
+
"step": 20890
|
14665 |
+
},
|
14666 |
+
{
|
14667 |
+
"epoch": 0.52,
|
14668 |
+
"grad_norm": 68.0,
|
14669 |
+
"learning_rate": 3.084745762711864e-07,
|
14670 |
+
"loss": 1.4227,
|
14671 |
+
"step": 20900
|
14672 |
+
},
|
14673 |
+
{
|
14674 |
+
"epoch": 0.52,
|
14675 |
+
"grad_norm": 63.5,
|
14676 |
+
"learning_rate": 3.08135593220339e-07,
|
14677 |
+
"loss": 1.432,
|
14678 |
+
"step": 20910
|
14679 |
+
},
|
14680 |
+
{
|
14681 |
+
"epoch": 0.52,
|
14682 |
+
"grad_norm": 66.5,
|
14683 |
+
"learning_rate": 3.077966101694915e-07,
|
14684 |
+
"loss": 1.4613,
|
14685 |
+
"step": 20920
|
14686 |
+
},
|
14687 |
+
{
|
14688 |
+
"epoch": 0.52,
|
14689 |
+
"grad_norm": 68.0,
|
14690 |
+
"learning_rate": 3.0745762711864406e-07,
|
14691 |
+
"loss": 1.465,
|
14692 |
+
"step": 20930
|
14693 |
+
},
|
14694 |
+
{
|
14695 |
+
"epoch": 0.52,
|
14696 |
+
"grad_norm": 66.0,
|
14697 |
+
"learning_rate": 3.071186440677966e-07,
|
14698 |
+
"loss": 1.4968,
|
14699 |
+
"step": 20940
|
14700 |
+
},
|
14701 |
+
{
|
14702 |
+
"epoch": 0.52,
|
14703 |
+
"grad_norm": 71.0,
|
14704 |
+
"learning_rate": 3.067796610169491e-07,
|
14705 |
+
"loss": 1.4866,
|
14706 |
+
"step": 20950
|
14707 |
+
},
|
14708 |
+
{
|
14709 |
+
"epoch": 0.52,
|
14710 |
+
"grad_norm": 65.5,
|
14711 |
+
"learning_rate": 3.064406779661017e-07,
|
14712 |
+
"loss": 1.5076,
|
14713 |
+
"step": 20960
|
14714 |
+
},
|
14715 |
+
{
|
14716 |
+
"epoch": 0.52,
|
14717 |
+
"grad_norm": 67.5,
|
14718 |
+
"learning_rate": 3.061016949152542e-07,
|
14719 |
+
"loss": 1.4341,
|
14720 |
+
"step": 20970
|
14721 |
+
},
|
14722 |
+
{
|
14723 |
+
"epoch": 0.52,
|
14724 |
+
"grad_norm": 66.5,
|
14725 |
+
"learning_rate": 3.0576271186440675e-07,
|
14726 |
+
"loss": 1.4862,
|
14727 |
+
"step": 20980
|
14728 |
+
},
|
14729 |
+
{
|
14730 |
+
"epoch": 0.52,
|
14731 |
+
"grad_norm": 67.0,
|
14732 |
+
"learning_rate": 3.0542372881355933e-07,
|
14733 |
+
"loss": 1.5154,
|
14734 |
+
"step": 20990
|
14735 |
+
},
|
14736 |
+
{
|
14737 |
+
"epoch": 0.53,
|
14738 |
+
"grad_norm": 68.0,
|
14739 |
+
"learning_rate": 3.0508474576271186e-07,
|
14740 |
+
"loss": 1.4475,
|
14741 |
+
"step": 21000
|
14742 |
+
},
|
14743 |
+
{
|
14744 |
+
"epoch": 0.53,
|
14745 |
+
"grad_norm": 64.5,
|
14746 |
+
"learning_rate": 3.0474576271186444e-07,
|
14747 |
+
"loss": 1.489,
|
14748 |
+
"step": 21010
|
14749 |
+
},
|
14750 |
+
{
|
14751 |
+
"epoch": 0.53,
|
14752 |
+
"grad_norm": 73.0,
|
14753 |
+
"learning_rate": 3.044067796610169e-07,
|
14754 |
+
"loss": 1.4482,
|
14755 |
+
"step": 21020
|
14756 |
+
},
|
14757 |
+
{
|
14758 |
+
"epoch": 0.53,
|
14759 |
+
"grad_norm": 66.5,
|
14760 |
+
"learning_rate": 3.0406779661016944e-07,
|
14761 |
+
"loss": 1.5017,
|
14762 |
+
"step": 21030
|
14763 |
+
},
|
14764 |
+
{
|
14765 |
+
"epoch": 0.53,
|
14766 |
+
"grad_norm": 65.5,
|
14767 |
+
"learning_rate": 3.03728813559322e-07,
|
14768 |
+
"loss": 1.4573,
|
14769 |
+
"step": 21040
|
14770 |
+
},
|
14771 |
+
{
|
14772 |
+
"epoch": 0.53,
|
14773 |
+
"grad_norm": 66.5,
|
14774 |
+
"learning_rate": 3.0338983050847455e-07,
|
14775 |
+
"loss": 1.4509,
|
14776 |
+
"step": 21050
|
14777 |
+
},
|
14778 |
+
{
|
14779 |
+
"epoch": 0.53,
|
14780 |
+
"grad_norm": 71.0,
|
14781 |
+
"learning_rate": 3.030508474576271e-07,
|
14782 |
+
"loss": 1.4765,
|
14783 |
+
"step": 21060
|
14784 |
+
},
|
14785 |
+
{
|
14786 |
+
"epoch": 0.53,
|
14787 |
+
"grad_norm": 65.0,
|
14788 |
+
"learning_rate": 3.0271186440677966e-07,
|
14789 |
+
"loss": 1.4637,
|
14790 |
+
"step": 21070
|
14791 |
+
},
|
14792 |
+
{
|
14793 |
+
"epoch": 0.53,
|
14794 |
+
"grad_norm": 71.5,
|
14795 |
+
"learning_rate": 3.023728813559322e-07,
|
14796 |
+
"loss": 1.4916,
|
14797 |
+
"step": 21080
|
14798 |
+
},
|
14799 |
+
{
|
14800 |
+
"epoch": 0.53,
|
14801 |
+
"grad_norm": 68.0,
|
14802 |
+
"learning_rate": 3.0203389830508477e-07,
|
14803 |
+
"loss": 1.5076,
|
14804 |
+
"step": 21090
|
14805 |
+
},
|
14806 |
+
{
|
14807 |
+
"epoch": 0.53,
|
14808 |
+
"grad_norm": 68.0,
|
14809 |
+
"learning_rate": 3.016949152542373e-07,
|
14810 |
+
"loss": 1.4416,
|
14811 |
+
"step": 21100
|
14812 |
+
},
|
14813 |
+
{
|
14814 |
+
"epoch": 0.53,
|
14815 |
+
"grad_norm": 66.0,
|
14816 |
+
"learning_rate": 3.013559322033898e-07,
|
14817 |
+
"loss": 1.4528,
|
14818 |
+
"step": 21110
|
14819 |
+
},
|
14820 |
+
{
|
14821 |
+
"epoch": 0.53,
|
14822 |
+
"grad_norm": 67.0,
|
14823 |
+
"learning_rate": 3.0101694915254235e-07,
|
14824 |
+
"loss": 1.4604,
|
14825 |
+
"step": 21120
|
14826 |
+
},
|
14827 |
+
{
|
14828 |
+
"epoch": 0.53,
|
14829 |
+
"grad_norm": 67.5,
|
14830 |
+
"learning_rate": 3.006779661016949e-07,
|
14831 |
+
"loss": 1.5248,
|
14832 |
+
"step": 21130
|
14833 |
+
},
|
14834 |
+
{
|
14835 |
+
"epoch": 0.53,
|
14836 |
+
"grad_norm": 68.5,
|
14837 |
+
"learning_rate": 3.003389830508474e-07,
|
14838 |
+
"loss": 1.5132,
|
14839 |
+
"step": 21140
|
14840 |
+
},
|
14841 |
+
{
|
14842 |
+
"epoch": 0.53,
|
14843 |
+
"grad_norm": 65.5,
|
14844 |
+
"learning_rate": 3e-07,
|
14845 |
+
"loss": 1.4708,
|
14846 |
+
"step": 21150
|
14847 |
+
},
|
14848 |
+
{
|
14849 |
+
"epoch": 0.53,
|
14850 |
+
"grad_norm": 65.0,
|
14851 |
+
"learning_rate": 2.996610169491525e-07,
|
14852 |
+
"loss": 1.4542,
|
14853 |
+
"step": 21160
|
14854 |
+
},
|
14855 |
+
{
|
14856 |
+
"epoch": 0.53,
|
14857 |
+
"grad_norm": 68.5,
|
14858 |
+
"learning_rate": 2.993220338983051e-07,
|
14859 |
+
"loss": 1.4819,
|
14860 |
+
"step": 21170
|
14861 |
+
},
|
14862 |
+
{
|
14863 |
+
"epoch": 0.53,
|
14864 |
+
"grad_norm": 66.0,
|
14865 |
+
"learning_rate": 2.989830508474576e-07,
|
14866 |
+
"loss": 1.4844,
|
14867 |
+
"step": 21180
|
14868 |
+
},
|
14869 |
+
{
|
14870 |
+
"epoch": 0.53,
|
14871 |
+
"grad_norm": 69.0,
|
14872 |
+
"learning_rate": 2.9864406779661015e-07,
|
14873 |
+
"loss": 1.4743,
|
14874 |
+
"step": 21190
|
14875 |
+
},
|
14876 |
+
{
|
14877 |
+
"epoch": 0.53,
|
14878 |
+
"grad_norm": 65.5,
|
14879 |
+
"learning_rate": 2.9830508474576273e-07,
|
14880 |
+
"loss": 1.4118,
|
14881 |
+
"step": 21200
|
14882 |
+
},
|
14883 |
+
{
|
14884 |
+
"epoch": 0.53,
|
14885 |
+
"grad_norm": 68.0,
|
14886 |
+
"learning_rate": 2.9796610169491526e-07,
|
14887 |
+
"loss": 1.4716,
|
14888 |
+
"step": 21210
|
14889 |
+
},
|
14890 |
+
{
|
14891 |
+
"epoch": 0.53,
|
14892 |
+
"grad_norm": 64.5,
|
14893 |
+
"learning_rate": 2.976271186440678e-07,
|
14894 |
+
"loss": 1.469,
|
14895 |
+
"step": 21220
|
14896 |
+
},
|
14897 |
+
{
|
14898 |
+
"epoch": 0.53,
|
14899 |
+
"grad_norm": 66.5,
|
14900 |
+
"learning_rate": 2.972881355932203e-07,
|
14901 |
+
"loss": 1.4809,
|
14902 |
+
"step": 21230
|
14903 |
+
},
|
14904 |
+
{
|
14905 |
+
"epoch": 0.53,
|
14906 |
+
"grad_norm": 64.5,
|
14907 |
+
"learning_rate": 2.9694915254237284e-07,
|
14908 |
+
"loss": 1.4595,
|
14909 |
+
"step": 21240
|
14910 |
+
},
|
14911 |
+
{
|
14912 |
+
"epoch": 0.53,
|
14913 |
+
"grad_norm": 73.0,
|
14914 |
+
"learning_rate": 2.966101694915254e-07,
|
14915 |
+
"loss": 1.4719,
|
14916 |
+
"step": 21250
|
14917 |
+
},
|
14918 |
+
{
|
14919 |
+
"epoch": 0.53,
|
14920 |
+
"grad_norm": 66.0,
|
14921 |
+
"learning_rate": 2.9627118644067795e-07,
|
14922 |
+
"loss": 1.4414,
|
14923 |
+
"step": 21260
|
14924 |
+
},
|
14925 |
+
{
|
14926 |
+
"epoch": 0.53,
|
14927 |
+
"grad_norm": 63.25,
|
14928 |
+
"learning_rate": 2.959322033898305e-07,
|
14929 |
+
"loss": 1.4531,
|
14930 |
+
"step": 21270
|
14931 |
+
},
|
14932 |
+
{
|
14933 |
+
"epoch": 0.53,
|
14934 |
+
"grad_norm": 65.0,
|
14935 |
+
"learning_rate": 2.9559322033898306e-07,
|
14936 |
+
"loss": 1.4457,
|
14937 |
+
"step": 21280
|
14938 |
+
},
|
14939 |
+
{
|
14940 |
+
"epoch": 0.53,
|
14941 |
+
"grad_norm": 67.5,
|
14942 |
+
"learning_rate": 2.952542372881356e-07,
|
14943 |
+
"loss": 1.4854,
|
14944 |
+
"step": 21290
|
14945 |
+
},
|
14946 |
+
{
|
14947 |
+
"epoch": 0.53,
|
14948 |
+
"grad_norm": 67.0,
|
14949 |
+
"learning_rate": 2.949152542372881e-07,
|
14950 |
+
"loss": 1.4871,
|
14951 |
+
"step": 21300
|
14952 |
+
},
|
14953 |
+
{
|
14954 |
+
"epoch": 0.53,
|
14955 |
+
"grad_norm": 68.0,
|
14956 |
+
"learning_rate": 2.945762711864407e-07,
|
14957 |
+
"loss": 1.4342,
|
14958 |
+
"step": 21310
|
14959 |
+
},
|
14960 |
+
{
|
14961 |
+
"epoch": 0.53,
|
14962 |
+
"grad_norm": 65.0,
|
14963 |
+
"learning_rate": 2.942372881355932e-07,
|
14964 |
+
"loss": 1.4727,
|
14965 |
+
"step": 21320
|
14966 |
+
},
|
14967 |
+
{
|
14968 |
+
"epoch": 0.53,
|
14969 |
+
"grad_norm": 71.0,
|
14970 |
+
"learning_rate": 2.9389830508474575e-07,
|
14971 |
+
"loss": 1.4878,
|
14972 |
+
"step": 21330
|
14973 |
+
},
|
14974 |
+
{
|
14975 |
+
"epoch": 0.53,
|
14976 |
+
"grad_norm": 69.0,
|
14977 |
+
"learning_rate": 2.935593220338983e-07,
|
14978 |
+
"loss": 1.4561,
|
14979 |
+
"step": 21340
|
14980 |
+
},
|
14981 |
+
{
|
14982 |
+
"epoch": 0.53,
|
14983 |
+
"grad_norm": 69.0,
|
14984 |
+
"learning_rate": 2.932203389830508e-07,
|
14985 |
+
"loss": 1.414,
|
14986 |
+
"step": 21350
|
14987 |
+
},
|
14988 |
+
{
|
14989 |
+
"epoch": 0.53,
|
14990 |
+
"grad_norm": 66.5,
|
14991 |
+
"learning_rate": 2.928813559322034e-07,
|
14992 |
+
"loss": 1.436,
|
14993 |
+
"step": 21360
|
14994 |
+
},
|
14995 |
+
{
|
14996 |
+
"epoch": 0.53,
|
14997 |
+
"grad_norm": 69.5,
|
14998 |
+
"learning_rate": 2.925423728813559e-07,
|
14999 |
+
"loss": 1.4707,
|
15000 |
+
"step": 21370
|
15001 |
+
},
|
15002 |
+
{
|
15003 |
+
"epoch": 0.53,
|
15004 |
+
"grad_norm": 66.5,
|
15005 |
+
"learning_rate": 2.9220338983050844e-07,
|
15006 |
+
"loss": 1.4839,
|
15007 |
+
"step": 21380
|
15008 |
+
},
|
15009 |
+
{
|
15010 |
+
"epoch": 0.53,
|
15011 |
+
"grad_norm": 70.5,
|
15012 |
+
"learning_rate": 2.91864406779661e-07,
|
15013 |
+
"loss": 1.4895,
|
15014 |
+
"step": 21390
|
15015 |
+
},
|
15016 |
+
{
|
15017 |
+
"epoch": 0.54,
|
15018 |
+
"grad_norm": 67.0,
|
15019 |
+
"learning_rate": 2.9152542372881355e-07,
|
15020 |
+
"loss": 1.4373,
|
15021 |
+
"step": 21400
|
15022 |
+
},
|
15023 |
+
{
|
15024 |
+
"epoch": 0.54,
|
15025 |
+
"grad_norm": 65.0,
|
15026 |
+
"learning_rate": 2.9118644067796613e-07,
|
15027 |
+
"loss": 1.3779,
|
15028 |
+
"step": 21410
|
15029 |
+
},
|
15030 |
+
{
|
15031 |
+
"epoch": 0.54,
|
15032 |
+
"grad_norm": 66.5,
|
15033 |
+
"learning_rate": 2.9084745762711866e-07,
|
15034 |
+
"loss": 1.4942,
|
15035 |
+
"step": 21420
|
15036 |
+
},
|
15037 |
+
{
|
15038 |
+
"epoch": 0.54,
|
15039 |
+
"grad_norm": 67.0,
|
15040 |
+
"learning_rate": 2.905084745762712e-07,
|
15041 |
+
"loss": 1.4563,
|
15042 |
+
"step": 21430
|
15043 |
+
},
|
15044 |
+
{
|
15045 |
+
"epoch": 0.54,
|
15046 |
+
"grad_norm": 63.25,
|
15047 |
+
"learning_rate": 2.901694915254237e-07,
|
15048 |
+
"loss": 1.4411,
|
15049 |
+
"step": 21440
|
15050 |
+
},
|
15051 |
+
{
|
15052 |
+
"epoch": 0.54,
|
15053 |
+
"grad_norm": 68.5,
|
15054 |
+
"learning_rate": 2.8983050847457624e-07,
|
15055 |
+
"loss": 1.4222,
|
15056 |
+
"step": 21450
|
15057 |
+
},
|
15058 |
+
{
|
15059 |
+
"epoch": 0.54,
|
15060 |
+
"grad_norm": 65.5,
|
15061 |
+
"learning_rate": 2.8949152542372877e-07,
|
15062 |
+
"loss": 1.4217,
|
15063 |
+
"step": 21460
|
15064 |
+
},
|
15065 |
+
{
|
15066 |
+
"epoch": 0.54,
|
15067 |
+
"grad_norm": 68.0,
|
15068 |
+
"learning_rate": 2.8915254237288135e-07,
|
15069 |
+
"loss": 1.4356,
|
15070 |
+
"step": 21470
|
15071 |
+
},
|
15072 |
+
{
|
15073 |
+
"epoch": 0.54,
|
15074 |
+
"grad_norm": 67.0,
|
15075 |
+
"learning_rate": 2.888135593220339e-07,
|
15076 |
+
"loss": 1.477,
|
15077 |
+
"step": 21480
|
15078 |
+
},
|
15079 |
+
{
|
15080 |
+
"epoch": 0.54,
|
15081 |
+
"grad_norm": 62.5,
|
15082 |
+
"learning_rate": 2.8847457627118646e-07,
|
15083 |
+
"loss": 1.4594,
|
15084 |
+
"step": 21490
|
15085 |
+
},
|
15086 |
+
{
|
15087 |
+
"epoch": 0.54,
|
15088 |
+
"grad_norm": 65.5,
|
15089 |
+
"learning_rate": 2.88135593220339e-07,
|
15090 |
+
"loss": 1.4453,
|
15091 |
+
"step": 21500
|
15092 |
+
},
|
15093 |
+
{
|
15094 |
+
"epoch": 0.54,
|
15095 |
+
"grad_norm": 62.5,
|
15096 |
+
"learning_rate": 2.877966101694915e-07,
|
15097 |
+
"loss": 1.4637,
|
15098 |
+
"step": 21510
|
15099 |
+
},
|
15100 |
+
{
|
15101 |
+
"epoch": 0.54,
|
15102 |
+
"grad_norm": 69.5,
|
15103 |
+
"learning_rate": 2.874576271186441e-07,
|
15104 |
+
"loss": 1.4335,
|
15105 |
+
"step": 21520
|
15106 |
+
},
|
15107 |
+
{
|
15108 |
+
"epoch": 0.54,
|
15109 |
+
"grad_norm": 68.0,
|
15110 |
+
"learning_rate": 2.871186440677966e-07,
|
15111 |
+
"loss": 1.4404,
|
15112 |
+
"step": 21530
|
15113 |
+
},
|
15114 |
+
{
|
15115 |
+
"epoch": 0.54,
|
15116 |
+
"grad_norm": 66.5,
|
15117 |
+
"learning_rate": 2.867796610169491e-07,
|
15118 |
+
"loss": 1.489,
|
15119 |
+
"step": 21540
|
15120 |
+
},
|
15121 |
+
{
|
15122 |
+
"epoch": 0.54,
|
15123 |
+
"grad_norm": 64.0,
|
15124 |
+
"learning_rate": 2.864406779661017e-07,
|
15125 |
+
"loss": 1.4628,
|
15126 |
+
"step": 21550
|
15127 |
+
},
|
15128 |
+
{
|
15129 |
+
"epoch": 0.54,
|
15130 |
+
"grad_norm": 65.0,
|
15131 |
+
"learning_rate": 2.861016949152542e-07,
|
15132 |
+
"loss": 1.4929,
|
15133 |
+
"step": 21560
|
15134 |
+
},
|
15135 |
+
{
|
15136 |
+
"epoch": 0.54,
|
15137 |
+
"grad_norm": 68.5,
|
15138 |
+
"learning_rate": 2.857627118644068e-07,
|
15139 |
+
"loss": 1.4815,
|
15140 |
+
"step": 21570
|
15141 |
+
},
|
15142 |
+
{
|
15143 |
+
"epoch": 0.54,
|
15144 |
+
"grad_norm": 66.0,
|
15145 |
+
"learning_rate": 2.854237288135593e-07,
|
15146 |
+
"loss": 1.4563,
|
15147 |
+
"step": 21580
|
15148 |
+
},
|
15149 |
+
{
|
15150 |
+
"epoch": 0.54,
|
15151 |
+
"grad_norm": 67.5,
|
15152 |
+
"learning_rate": 2.8508474576271184e-07,
|
15153 |
+
"loss": 1.4754,
|
15154 |
+
"step": 21590
|
15155 |
+
},
|
15156 |
+
{
|
15157 |
+
"epoch": 0.54,
|
15158 |
+
"grad_norm": 67.0,
|
15159 |
+
"learning_rate": 2.847457627118644e-07,
|
15160 |
+
"loss": 1.5266,
|
15161 |
+
"step": 21600
|
15162 |
+
},
|
15163 |
+
{
|
15164 |
+
"epoch": 0.54,
|
15165 |
+
"grad_norm": 65.0,
|
15166 |
+
"learning_rate": 2.8440677966101695e-07,
|
15167 |
+
"loss": 1.4389,
|
15168 |
+
"step": 21610
|
15169 |
+
},
|
15170 |
+
{
|
15171 |
+
"epoch": 0.54,
|
15172 |
+
"grad_norm": 66.5,
|
15173 |
+
"learning_rate": 2.840677966101695e-07,
|
15174 |
+
"loss": 1.4256,
|
15175 |
+
"step": 21620
|
15176 |
+
},
|
15177 |
+
{
|
15178 |
+
"epoch": 0.54,
|
15179 |
+
"grad_norm": 66.0,
|
15180 |
+
"learning_rate": 2.8372881355932206e-07,
|
15181 |
+
"loss": 1.4931,
|
15182 |
+
"step": 21630
|
15183 |
+
},
|
15184 |
+
{
|
15185 |
+
"epoch": 0.54,
|
15186 |
+
"grad_norm": 65.5,
|
15187 |
+
"learning_rate": 2.8338983050847453e-07,
|
15188 |
+
"loss": 1.5427,
|
15189 |
+
"step": 21640
|
15190 |
+
},
|
15191 |
+
{
|
15192 |
+
"epoch": 0.54,
|
15193 |
+
"grad_norm": 66.5,
|
15194 |
+
"learning_rate": 2.8305084745762706e-07,
|
15195 |
+
"loss": 1.4232,
|
15196 |
+
"step": 21650
|
15197 |
+
},
|
15198 |
+
{
|
15199 |
+
"epoch": 0.54,
|
15200 |
+
"grad_norm": 66.0,
|
15201 |
+
"learning_rate": 2.8271186440677964e-07,
|
15202 |
+
"loss": 1.484,
|
15203 |
+
"step": 21660
|
15204 |
+
},
|
15205 |
+
{
|
15206 |
+
"epoch": 0.54,
|
15207 |
+
"grad_norm": 67.0,
|
15208 |
+
"learning_rate": 2.8237288135593217e-07,
|
15209 |
+
"loss": 1.4328,
|
15210 |
+
"step": 21670
|
15211 |
+
},
|
15212 |
+
{
|
15213 |
+
"epoch": 0.54,
|
15214 |
+
"grad_norm": 66.5,
|
15215 |
+
"learning_rate": 2.8203389830508475e-07,
|
15216 |
+
"loss": 1.4981,
|
15217 |
+
"step": 21680
|
15218 |
+
},
|
15219 |
+
{
|
15220 |
+
"epoch": 0.54,
|
15221 |
+
"grad_norm": 68.5,
|
15222 |
+
"learning_rate": 2.816949152542373e-07,
|
15223 |
+
"loss": 1.4774,
|
15224 |
+
"step": 21690
|
15225 |
+
},
|
15226 |
+
{
|
15227 |
+
"epoch": 0.54,
|
15228 |
+
"grad_norm": 66.5,
|
15229 |
+
"learning_rate": 2.813559322033898e-07,
|
15230 |
+
"loss": 1.4202,
|
15231 |
+
"step": 21700
|
15232 |
+
},
|
15233 |
+
{
|
15234 |
+
"epoch": 0.54,
|
15235 |
+
"grad_norm": 66.5,
|
15236 |
+
"learning_rate": 2.810169491525424e-07,
|
15237 |
+
"loss": 1.4588,
|
15238 |
+
"step": 21710
|
15239 |
+
},
|
15240 |
+
{
|
15241 |
+
"epoch": 0.54,
|
15242 |
+
"grad_norm": 69.5,
|
15243 |
+
"learning_rate": 2.806779661016949e-07,
|
15244 |
+
"loss": 1.5449,
|
15245 |
+
"step": 21720
|
15246 |
+
},
|
15247 |
+
{
|
15248 |
+
"epoch": 0.54,
|
15249 |
+
"grad_norm": 70.5,
|
15250 |
+
"learning_rate": 2.803389830508475e-07,
|
15251 |
+
"loss": 1.5002,
|
15252 |
+
"step": 21730
|
15253 |
+
},
|
15254 |
+
{
|
15255 |
+
"epoch": 0.54,
|
15256 |
+
"grad_norm": 68.0,
|
15257 |
+
"learning_rate": 2.8e-07,
|
15258 |
+
"loss": 1.463,
|
15259 |
+
"step": 21740
|
15260 |
+
},
|
15261 |
+
{
|
15262 |
+
"epoch": 0.54,
|
15263 |
+
"grad_norm": 66.5,
|
15264 |
+
"learning_rate": 2.796610169491525e-07,
|
15265 |
+
"loss": 1.467,
|
15266 |
+
"step": 21750
|
15267 |
+
},
|
15268 |
+
{
|
15269 |
+
"epoch": 0.54,
|
15270 |
+
"grad_norm": 69.5,
|
15271 |
+
"learning_rate": 2.793220338983051e-07,
|
15272 |
+
"loss": 1.4127,
|
15273 |
+
"step": 21760
|
15274 |
+
},
|
15275 |
+
{
|
15276 |
+
"epoch": 0.54,
|
15277 |
+
"grad_norm": 71.5,
|
15278 |
+
"learning_rate": 2.789830508474576e-07,
|
15279 |
+
"loss": 1.493,
|
15280 |
+
"step": 21770
|
15281 |
+
},
|
15282 |
+
{
|
15283 |
+
"epoch": 0.54,
|
15284 |
+
"grad_norm": 64.0,
|
15285 |
+
"learning_rate": 2.7864406779661013e-07,
|
15286 |
+
"loss": 1.4866,
|
15287 |
+
"step": 21780
|
15288 |
+
},
|
15289 |
+
{
|
15290 |
+
"epoch": 0.54,
|
15291 |
+
"grad_norm": 69.0,
|
15292 |
+
"learning_rate": 2.783050847457627e-07,
|
15293 |
+
"loss": 1.5279,
|
15294 |
+
"step": 21790
|
15295 |
+
},
|
15296 |
+
{
|
15297 |
+
"epoch": 0.55,
|
15298 |
+
"grad_norm": 69.0,
|
15299 |
+
"learning_rate": 2.7796610169491524e-07,
|
15300 |
+
"loss": 1.5252,
|
15301 |
+
"step": 21800
|
15302 |
+
},
|
15303 |
+
{
|
15304 |
+
"epoch": 0.55,
|
15305 |
+
"grad_norm": 67.0,
|
15306 |
+
"learning_rate": 2.776271186440678e-07,
|
15307 |
+
"loss": 1.4794,
|
15308 |
+
"step": 21810
|
15309 |
+
},
|
15310 |
+
{
|
15311 |
+
"epoch": 0.55,
|
15312 |
+
"grad_norm": 67.5,
|
15313 |
+
"learning_rate": 2.7728813559322035e-07,
|
15314 |
+
"loss": 1.4078,
|
15315 |
+
"step": 21820
|
15316 |
+
},
|
15317 |
+
{
|
15318 |
+
"epoch": 0.55,
|
15319 |
+
"grad_norm": 69.0,
|
15320 |
+
"learning_rate": 2.769491525423729e-07,
|
15321 |
+
"loss": 1.4782,
|
15322 |
+
"step": 21830
|
15323 |
+
},
|
15324 |
+
{
|
15325 |
+
"epoch": 0.55,
|
15326 |
+
"grad_norm": 70.5,
|
15327 |
+
"learning_rate": 2.7661016949152545e-07,
|
15328 |
+
"loss": 1.4411,
|
15329 |
+
"step": 21840
|
15330 |
+
},
|
15331 |
+
{
|
15332 |
+
"epoch": 0.55,
|
15333 |
+
"grad_norm": 63.75,
|
15334 |
+
"learning_rate": 2.7627118644067793e-07,
|
15335 |
+
"loss": 1.393,
|
15336 |
+
"step": 21850
|
15337 |
+
},
|
15338 |
+
{
|
15339 |
+
"epoch": 0.55,
|
15340 |
+
"grad_norm": 67.0,
|
15341 |
+
"learning_rate": 2.7593220338983046e-07,
|
15342 |
+
"loss": 1.4784,
|
15343 |
+
"step": 21860
|
15344 |
+
},
|
15345 |
+
{
|
15346 |
+
"epoch": 0.55,
|
15347 |
+
"grad_norm": 68.0,
|
15348 |
+
"learning_rate": 2.7559322033898304e-07,
|
15349 |
+
"loss": 1.4012,
|
15350 |
+
"step": 21870
|
15351 |
+
},
|
15352 |
+
{
|
15353 |
+
"epoch": 0.55,
|
15354 |
+
"grad_norm": 65.5,
|
15355 |
+
"learning_rate": 2.7525423728813557e-07,
|
15356 |
+
"loss": 1.477,
|
15357 |
+
"step": 21880
|
15358 |
+
},
|
15359 |
+
{
|
15360 |
+
"epoch": 0.55,
|
15361 |
+
"grad_norm": 66.5,
|
15362 |
+
"learning_rate": 2.749152542372881e-07,
|
15363 |
+
"loss": 1.5067,
|
15364 |
+
"step": 21890
|
15365 |
+
},
|
15366 |
+
{
|
15367 |
+
"epoch": 0.55,
|
15368 |
+
"grad_norm": 67.5,
|
15369 |
+
"learning_rate": 2.745762711864407e-07,
|
15370 |
+
"loss": 1.4631,
|
15371 |
+
"step": 21900
|
15372 |
+
},
|
15373 |
+
{
|
15374 |
+
"epoch": 0.55,
|
15375 |
+
"grad_norm": 66.5,
|
15376 |
+
"learning_rate": 2.742372881355932e-07,
|
15377 |
+
"loss": 1.5257,
|
15378 |
+
"step": 21910
|
15379 |
+
},
|
15380 |
+
{
|
15381 |
+
"epoch": 0.55,
|
15382 |
+
"grad_norm": 67.5,
|
15383 |
+
"learning_rate": 2.738983050847458e-07,
|
15384 |
+
"loss": 1.4561,
|
15385 |
+
"step": 21920
|
15386 |
+
},
|
15387 |
+
{
|
15388 |
+
"epoch": 0.55,
|
15389 |
+
"grad_norm": 65.5,
|
15390 |
+
"learning_rate": 2.735593220338983e-07,
|
15391 |
+
"loss": 1.4773,
|
15392 |
+
"step": 21930
|
15393 |
+
},
|
15394 |
+
{
|
15395 |
+
"epoch": 0.55,
|
15396 |
+
"grad_norm": 67.5,
|
15397 |
+
"learning_rate": 2.7322033898305084e-07,
|
15398 |
+
"loss": 1.4732,
|
15399 |
+
"step": 21940
|
15400 |
+
},
|
15401 |
+
{
|
15402 |
+
"epoch": 0.55,
|
15403 |
+
"grad_norm": 66.0,
|
15404 |
+
"learning_rate": 2.7288135593220336e-07,
|
15405 |
+
"loss": 1.4743,
|
15406 |
+
"step": 21950
|
15407 |
+
},
|
15408 |
+
{
|
15409 |
+
"epoch": 0.55,
|
15410 |
+
"grad_norm": 67.0,
|
15411 |
+
"learning_rate": 2.725423728813559e-07,
|
15412 |
+
"loss": 1.4624,
|
15413 |
+
"step": 21960
|
15414 |
+
},
|
15415 |
+
{
|
15416 |
+
"epoch": 0.55,
|
15417 |
+
"grad_norm": 63.5,
|
15418 |
+
"learning_rate": 2.722033898305084e-07,
|
15419 |
+
"loss": 1.4451,
|
15420 |
+
"step": 21970
|
15421 |
+
},
|
15422 |
+
{
|
15423 |
+
"epoch": 0.55,
|
15424 |
+
"grad_norm": 64.5,
|
15425 |
+
"learning_rate": 2.71864406779661e-07,
|
15426 |
+
"loss": 1.4502,
|
15427 |
+
"step": 21980
|
15428 |
+
},
|
15429 |
+
{
|
15430 |
+
"epoch": 0.55,
|
15431 |
+
"grad_norm": 68.0,
|
15432 |
+
"learning_rate": 2.7152542372881353e-07,
|
15433 |
+
"loss": 1.4902,
|
15434 |
+
"step": 21990
|
15435 |
+
},
|
15436 |
+
{
|
15437 |
+
"epoch": 0.55,
|
15438 |
+
"grad_norm": 65.5,
|
15439 |
+
"learning_rate": 2.711864406779661e-07,
|
15440 |
+
"loss": 1.4215,
|
15441 |
+
"step": 22000
|
15442 |
+
},
|
15443 |
+
{
|
15444 |
+
"epoch": 0.55,
|
15445 |
+
"grad_norm": 66.0,
|
15446 |
+
"learning_rate": 2.7084745762711864e-07,
|
15447 |
+
"loss": 1.4646,
|
15448 |
+
"step": 22010
|
15449 |
+
},
|
15450 |
+
{
|
15451 |
+
"epoch": 0.55,
|
15452 |
+
"grad_norm": 65.5,
|
15453 |
+
"learning_rate": 2.7050847457627116e-07,
|
15454 |
+
"loss": 1.4843,
|
15455 |
+
"step": 22020
|
15456 |
+
},
|
15457 |
+
{
|
15458 |
+
"epoch": 0.55,
|
15459 |
+
"grad_norm": 66.5,
|
15460 |
+
"learning_rate": 2.7016949152542375e-07,
|
15461 |
+
"loss": 1.4881,
|
15462 |
+
"step": 22030
|
15463 |
+
},
|
15464 |
+
{
|
15465 |
+
"epoch": 0.55,
|
15466 |
+
"grad_norm": 68.5,
|
15467 |
+
"learning_rate": 2.6983050847457627e-07,
|
15468 |
+
"loss": 1.4136,
|
15469 |
+
"step": 22040
|
15470 |
+
},
|
15471 |
+
{
|
15472 |
+
"epoch": 0.55,
|
15473 |
+
"grad_norm": 63.25,
|
15474 |
+
"learning_rate": 2.6949152542372885e-07,
|
15475 |
+
"loss": 1.4735,
|
15476 |
+
"step": 22050
|
15477 |
+
},
|
15478 |
+
{
|
15479 |
+
"epoch": 0.55,
|
15480 |
+
"grad_norm": 67.5,
|
15481 |
+
"learning_rate": 2.6915254237288133e-07,
|
15482 |
+
"loss": 1.4365,
|
15483 |
+
"step": 22060
|
15484 |
+
},
|
15485 |
+
{
|
15486 |
+
"epoch": 0.55,
|
15487 |
+
"grad_norm": 68.0,
|
15488 |
+
"learning_rate": 2.6881355932203386e-07,
|
15489 |
+
"loss": 1.5033,
|
15490 |
+
"step": 22070
|
15491 |
+
},
|
15492 |
+
{
|
15493 |
+
"epoch": 0.55,
|
15494 |
+
"grad_norm": 65.5,
|
15495 |
+
"learning_rate": 2.6847457627118644e-07,
|
15496 |
+
"loss": 1.4978,
|
15497 |
+
"step": 22080
|
15498 |
+
},
|
15499 |
+
{
|
15500 |
+
"epoch": 0.55,
|
15501 |
+
"grad_norm": 67.0,
|
15502 |
+
"learning_rate": 2.6813559322033896e-07,
|
15503 |
+
"loss": 1.4561,
|
15504 |
+
"step": 22090
|
15505 |
+
},
|
15506 |
+
{
|
15507 |
+
"epoch": 0.55,
|
15508 |
+
"grad_norm": 65.5,
|
15509 |
+
"learning_rate": 2.677966101694915e-07,
|
15510 |
+
"loss": 1.5001,
|
15511 |
+
"step": 22100
|
15512 |
+
},
|
15513 |
+
{
|
15514 |
+
"epoch": 0.55,
|
15515 |
+
"grad_norm": 66.5,
|
15516 |
+
"learning_rate": 2.6745762711864407e-07,
|
15517 |
+
"loss": 1.4332,
|
15518 |
+
"step": 22110
|
15519 |
+
},
|
15520 |
+
{
|
15521 |
+
"epoch": 0.55,
|
15522 |
+
"grad_norm": 67.5,
|
15523 |
+
"learning_rate": 2.671186440677966e-07,
|
15524 |
+
"loss": 1.4629,
|
15525 |
+
"step": 22120
|
15526 |
+
},
|
15527 |
+
{
|
15528 |
+
"epoch": 0.55,
|
15529 |
+
"grad_norm": 67.5,
|
15530 |
+
"learning_rate": 2.6677966101694913e-07,
|
15531 |
+
"loss": 1.5131,
|
15532 |
+
"step": 22130
|
15533 |
+
},
|
15534 |
+
{
|
15535 |
+
"epoch": 0.55,
|
15536 |
+
"grad_norm": 66.0,
|
15537 |
+
"learning_rate": 2.664406779661017e-07,
|
15538 |
+
"loss": 1.4893,
|
15539 |
+
"step": 22140
|
15540 |
+
},
|
15541 |
+
{
|
15542 |
+
"epoch": 0.55,
|
15543 |
+
"grad_norm": 68.0,
|
15544 |
+
"learning_rate": 2.6610169491525424e-07,
|
15545 |
+
"loss": 1.4378,
|
15546 |
+
"step": 22150
|
15547 |
+
},
|
15548 |
+
{
|
15549 |
+
"epoch": 0.55,
|
15550 |
+
"grad_norm": 69.5,
|
15551 |
+
"learning_rate": 2.6576271186440676e-07,
|
15552 |
+
"loss": 1.4219,
|
15553 |
+
"step": 22160
|
15554 |
+
},
|
15555 |
+
{
|
15556 |
+
"epoch": 0.55,
|
15557 |
+
"grad_norm": 66.5,
|
15558 |
+
"learning_rate": 2.654237288135593e-07,
|
15559 |
+
"loss": 1.4722,
|
15560 |
+
"step": 22170
|
15561 |
+
},
|
15562 |
+
{
|
15563 |
+
"epoch": 0.55,
|
15564 |
+
"grad_norm": 68.0,
|
15565 |
+
"learning_rate": 2.650847457627118e-07,
|
15566 |
+
"loss": 1.509,
|
15567 |
+
"step": 22180
|
15568 |
+
},
|
15569 |
+
{
|
15570 |
+
"epoch": 0.55,
|
15571 |
+
"grad_norm": 65.5,
|
15572 |
+
"learning_rate": 2.647457627118644e-07,
|
15573 |
+
"loss": 1.4875,
|
15574 |
+
"step": 22190
|
15575 |
+
},
|
15576 |
+
{
|
15577 |
+
"epoch": 0.56,
|
15578 |
+
"grad_norm": 72.5,
|
15579 |
+
"learning_rate": 2.6440677966101693e-07,
|
15580 |
+
"loss": 1.4397,
|
15581 |
+
"step": 22200
|
15582 |
+
},
|
15583 |
+
{
|
15584 |
+
"epoch": 0.56,
|
15585 |
+
"grad_norm": 67.5,
|
15586 |
+
"learning_rate": 2.6406779661016945e-07,
|
15587 |
+
"loss": 1.4862,
|
15588 |
+
"step": 22210
|
15589 |
+
},
|
15590 |
+
{
|
15591 |
+
"epoch": 0.56,
|
15592 |
+
"grad_norm": 70.0,
|
15593 |
+
"learning_rate": 2.6372881355932204e-07,
|
15594 |
+
"loss": 1.4248,
|
15595 |
+
"step": 22220
|
15596 |
+
},
|
15597 |
+
{
|
15598 |
+
"epoch": 0.56,
|
15599 |
+
"grad_norm": 67.5,
|
15600 |
+
"learning_rate": 2.6338983050847456e-07,
|
15601 |
+
"loss": 1.485,
|
15602 |
+
"step": 22230
|
15603 |
+
},
|
15604 |
+
{
|
15605 |
+
"epoch": 0.56,
|
15606 |
+
"grad_norm": 67.5,
|
15607 |
+
"learning_rate": 2.6305084745762714e-07,
|
15608 |
+
"loss": 1.4798,
|
15609 |
+
"step": 22240
|
15610 |
+
},
|
15611 |
+
{
|
15612 |
+
"epoch": 0.56,
|
15613 |
+
"grad_norm": 64.0,
|
15614 |
+
"learning_rate": 2.6271186440677967e-07,
|
15615 |
+
"loss": 1.4664,
|
15616 |
+
"step": 22250
|
15617 |
+
},
|
15618 |
+
{
|
15619 |
+
"epoch": 0.56,
|
15620 |
+
"grad_norm": 65.5,
|
15621 |
+
"learning_rate": 2.623728813559322e-07,
|
15622 |
+
"loss": 1.4435,
|
15623 |
+
"step": 22260
|
15624 |
+
},
|
15625 |
+
{
|
15626 |
+
"epoch": 0.56,
|
15627 |
+
"grad_norm": 64.0,
|
15628 |
+
"learning_rate": 2.6203389830508473e-07,
|
15629 |
+
"loss": 1.5132,
|
15630 |
+
"step": 22270
|
15631 |
+
},
|
15632 |
+
{
|
15633 |
+
"epoch": 0.56,
|
15634 |
+
"grad_norm": 67.0,
|
15635 |
+
"learning_rate": 2.6169491525423725e-07,
|
15636 |
+
"loss": 1.4555,
|
15637 |
+
"step": 22280
|
15638 |
+
},
|
15639 |
+
{
|
15640 |
+
"epoch": 0.56,
|
15641 |
+
"grad_norm": 69.5,
|
15642 |
+
"learning_rate": 2.613559322033898e-07,
|
15643 |
+
"loss": 1.4874,
|
15644 |
+
"step": 22290
|
15645 |
+
},
|
15646 |
+
{
|
15647 |
+
"epoch": 0.56,
|
15648 |
+
"grad_norm": 67.0,
|
15649 |
+
"learning_rate": 2.6101694915254236e-07,
|
15650 |
+
"loss": 1.4806,
|
15651 |
+
"step": 22300
|
15652 |
+
},
|
15653 |
+
{
|
15654 |
+
"epoch": 0.56,
|
15655 |
+
"grad_norm": 69.5,
|
15656 |
+
"learning_rate": 2.606779661016949e-07,
|
15657 |
+
"loss": 1.4733,
|
15658 |
+
"step": 22310
|
15659 |
+
},
|
15660 |
+
{
|
15661 |
+
"epoch": 0.56,
|
15662 |
+
"grad_norm": 65.5,
|
15663 |
+
"learning_rate": 2.6033898305084747e-07,
|
15664 |
+
"loss": 1.4037,
|
15665 |
+
"step": 22320
|
15666 |
+
},
|
15667 |
+
{
|
15668 |
+
"epoch": 0.56,
|
15669 |
+
"grad_norm": 67.0,
|
15670 |
+
"learning_rate": 2.6e-07,
|
15671 |
+
"loss": 1.4826,
|
15672 |
+
"step": 22330
|
15673 |
+
},
|
15674 |
+
{
|
15675 |
+
"epoch": 0.56,
|
15676 |
+
"grad_norm": 69.0,
|
15677 |
+
"learning_rate": 2.5966101694915253e-07,
|
15678 |
+
"loss": 1.4517,
|
15679 |
+
"step": 22340
|
15680 |
+
},
|
15681 |
+
{
|
15682 |
+
"epoch": 0.56,
|
15683 |
+
"grad_norm": 68.5,
|
15684 |
+
"learning_rate": 2.593220338983051e-07,
|
15685 |
+
"loss": 1.4646,
|
15686 |
+
"step": 22350
|
15687 |
+
},
|
15688 |
+
{
|
15689 |
+
"epoch": 0.56,
|
15690 |
+
"grad_norm": 66.5,
|
15691 |
+
"learning_rate": 2.5898305084745763e-07,
|
15692 |
+
"loss": 1.463,
|
15693 |
+
"step": 22360
|
15694 |
+
},
|
15695 |
+
{
|
15696 |
+
"epoch": 0.56,
|
15697 |
+
"grad_norm": 65.0,
|
15698 |
+
"learning_rate": 2.586440677966101e-07,
|
15699 |
+
"loss": 1.3894,
|
15700 |
+
"step": 22370
|
15701 |
+
},
|
15702 |
+
{
|
15703 |
+
"epoch": 0.56,
|
15704 |
+
"grad_norm": 66.0,
|
15705 |
+
"learning_rate": 2.583050847457627e-07,
|
15706 |
+
"loss": 1.4653,
|
15707 |
+
"step": 22380
|
15708 |
+
},
|
15709 |
+
{
|
15710 |
+
"epoch": 0.56,
|
15711 |
+
"grad_norm": 64.0,
|
15712 |
+
"learning_rate": 2.579661016949152e-07,
|
15713 |
+
"loss": 1.4551,
|
15714 |
+
"step": 22390
|
15715 |
+
},
|
15716 |
+
{
|
15717 |
+
"epoch": 0.56,
|
15718 |
+
"grad_norm": 70.5,
|
15719 |
+
"learning_rate": 2.576271186440678e-07,
|
15720 |
+
"loss": 1.4699,
|
15721 |
+
"step": 22400
|
15722 |
+
},
|
15723 |
+
{
|
15724 |
+
"epoch": 0.56,
|
15725 |
+
"grad_norm": 68.0,
|
15726 |
+
"learning_rate": 2.572881355932203e-07,
|
15727 |
+
"loss": 1.4967,
|
15728 |
+
"step": 22410
|
15729 |
+
},
|
15730 |
+
{
|
15731 |
+
"epoch": 0.56,
|
15732 |
+
"grad_norm": 66.5,
|
15733 |
+
"learning_rate": 2.5694915254237285e-07,
|
15734 |
+
"loss": 1.4465,
|
15735 |
+
"step": 22420
|
15736 |
+
},
|
15737 |
+
{
|
15738 |
+
"epoch": 0.56,
|
15739 |
+
"grad_norm": 71.0,
|
15740 |
+
"learning_rate": 2.5661016949152543e-07,
|
15741 |
+
"loss": 1.5049,
|
15742 |
+
"step": 22430
|
15743 |
+
},
|
15744 |
+
{
|
15745 |
+
"epoch": 0.56,
|
15746 |
+
"grad_norm": 68.0,
|
15747 |
+
"learning_rate": 2.5627118644067796e-07,
|
15748 |
+
"loss": 1.459,
|
15749 |
+
"step": 22440
|
15750 |
+
},
|
15751 |
+
{
|
15752 |
+
"epoch": 0.56,
|
15753 |
+
"grad_norm": 64.5,
|
15754 |
+
"learning_rate": 2.559322033898305e-07,
|
15755 |
+
"loss": 1.4719,
|
15756 |
+
"step": 22450
|
15757 |
+
},
|
15758 |
+
{
|
15759 |
+
"epoch": 0.56,
|
15760 |
+
"grad_norm": 68.5,
|
15761 |
+
"learning_rate": 2.5559322033898307e-07,
|
15762 |
+
"loss": 1.4305,
|
15763 |
+
"step": 22460
|
15764 |
+
},
|
15765 |
+
{
|
15766 |
+
"epoch": 0.56,
|
15767 |
+
"grad_norm": 68.0,
|
15768 |
+
"learning_rate": 2.5525423728813555e-07,
|
15769 |
+
"loss": 1.4761,
|
15770 |
+
"step": 22470
|
15771 |
+
},
|
15772 |
+
{
|
15773 |
+
"epoch": 0.56,
|
15774 |
+
"grad_norm": 66.5,
|
15775 |
+
"learning_rate": 2.549152542372881e-07,
|
15776 |
+
"loss": 1.4834,
|
15777 |
+
"step": 22480
|
15778 |
+
},
|
15779 |
+
{
|
15780 |
+
"epoch": 0.56,
|
15781 |
+
"grad_norm": 68.0,
|
15782 |
+
"learning_rate": 2.5457627118644065e-07,
|
15783 |
+
"loss": 1.4149,
|
15784 |
+
"step": 22490
|
15785 |
+
},
|
15786 |
+
{
|
15787 |
+
"epoch": 0.56,
|
15788 |
+
"grad_norm": 66.0,
|
15789 |
+
"learning_rate": 2.542372881355932e-07,
|
15790 |
+
"loss": 1.4605,
|
15791 |
+
"step": 22500
|
15792 |
+
},
|
15793 |
+
{
|
15794 |
+
"epoch": 0.56,
|
15795 |
+
"grad_norm": 68.5,
|
15796 |
+
"learning_rate": 2.5389830508474576e-07,
|
15797 |
+
"loss": 1.4435,
|
15798 |
+
"step": 22510
|
15799 |
+
},
|
15800 |
+
{
|
15801 |
+
"epoch": 0.56,
|
15802 |
+
"grad_norm": 68.5,
|
15803 |
+
"learning_rate": 2.535593220338983e-07,
|
15804 |
+
"loss": 1.5106,
|
15805 |
+
"step": 22520
|
15806 |
+
},
|
15807 |
+
{
|
15808 |
+
"epoch": 0.56,
|
15809 |
+
"grad_norm": 66.5,
|
15810 |
+
"learning_rate": 2.532203389830508e-07,
|
15811 |
+
"loss": 1.4488,
|
15812 |
+
"step": 22530
|
15813 |
+
},
|
15814 |
+
{
|
15815 |
+
"epoch": 0.56,
|
15816 |
+
"grad_norm": 63.75,
|
15817 |
+
"learning_rate": 2.528813559322034e-07,
|
15818 |
+
"loss": 1.4492,
|
15819 |
+
"step": 22540
|
15820 |
+
},
|
15821 |
+
{
|
15822 |
+
"epoch": 0.56,
|
15823 |
+
"grad_norm": 66.5,
|
15824 |
+
"learning_rate": 2.525423728813559e-07,
|
15825 |
+
"loss": 1.4728,
|
15826 |
+
"step": 22550
|
15827 |
+
},
|
15828 |
+
{
|
15829 |
+
"epoch": 0.56,
|
15830 |
+
"grad_norm": 68.0,
|
15831 |
+
"learning_rate": 2.522033898305085e-07,
|
15832 |
+
"loss": 1.5016,
|
15833 |
+
"step": 22560
|
15834 |
+
},
|
15835 |
+
{
|
15836 |
+
"epoch": 0.56,
|
15837 |
+
"grad_norm": 68.5,
|
15838 |
+
"learning_rate": 2.5186440677966103e-07,
|
15839 |
+
"loss": 1.4636,
|
15840 |
+
"step": 22570
|
15841 |
+
},
|
15842 |
+
{
|
15843 |
+
"epoch": 0.56,
|
15844 |
+
"grad_norm": 66.5,
|
15845 |
+
"learning_rate": 2.515254237288135e-07,
|
15846 |
+
"loss": 1.4976,
|
15847 |
+
"step": 22580
|
15848 |
+
},
|
15849 |
+
{
|
15850 |
+
"epoch": 0.56,
|
15851 |
+
"grad_norm": 70.0,
|
15852 |
+
"learning_rate": 2.511864406779661e-07,
|
15853 |
+
"loss": 1.47,
|
15854 |
+
"step": 22590
|
15855 |
+
},
|
15856 |
+
{
|
15857 |
+
"epoch": 0.56,
|
15858 |
+
"grad_norm": 65.0,
|
15859 |
+
"learning_rate": 2.508474576271186e-07,
|
15860 |
+
"loss": 1.4302,
|
15861 |
+
"step": 22600
|
15862 |
+
},
|
15863 |
+
{
|
15864 |
+
"epoch": 0.57,
|
15865 |
+
"grad_norm": 71.5,
|
15866 |
+
"learning_rate": 2.5050847457627114e-07,
|
15867 |
+
"loss": 1.4743,
|
15868 |
+
"step": 22610
|
15869 |
+
},
|
15870 |
+
{
|
15871 |
+
"epoch": 0.57,
|
15872 |
+
"grad_norm": 72.0,
|
15873 |
+
"learning_rate": 2.501694915254237e-07,
|
15874 |
+
"loss": 1.4998,
|
15875 |
+
"step": 22620
|
15876 |
+
},
|
15877 |
+
{
|
15878 |
+
"epoch": 0.57,
|
15879 |
+
"grad_norm": 66.0,
|
15880 |
+
"learning_rate": 2.4983050847457625e-07,
|
15881 |
+
"loss": 1.4342,
|
15882 |
+
"step": 22630
|
15883 |
+
},
|
15884 |
+
{
|
15885 |
+
"epoch": 0.57,
|
15886 |
+
"grad_norm": 66.0,
|
15887 |
+
"learning_rate": 2.4949152542372883e-07,
|
15888 |
+
"loss": 1.4675,
|
15889 |
+
"step": 22640
|
15890 |
+
},
|
15891 |
+
{
|
15892 |
+
"epoch": 0.57,
|
15893 |
+
"grad_norm": 66.5,
|
15894 |
+
"learning_rate": 2.4915254237288136e-07,
|
15895 |
+
"loss": 1.4961,
|
15896 |
+
"step": 22650
|
15897 |
+
},
|
15898 |
+
{
|
15899 |
+
"epoch": 0.57,
|
15900 |
+
"grad_norm": 67.5,
|
15901 |
+
"learning_rate": 2.488135593220339e-07,
|
15902 |
+
"loss": 1.4907,
|
15903 |
+
"step": 22660
|
15904 |
+
},
|
15905 |
+
{
|
15906 |
+
"epoch": 0.57,
|
15907 |
+
"grad_norm": 68.0,
|
15908 |
+
"learning_rate": 2.484745762711864e-07,
|
15909 |
+
"loss": 1.4894,
|
15910 |
+
"step": 22670
|
15911 |
+
},
|
15912 |
+
{
|
15913 |
+
"epoch": 0.57,
|
15914 |
+
"grad_norm": 66.0,
|
15915 |
+
"learning_rate": 2.48135593220339e-07,
|
15916 |
+
"loss": 1.4661,
|
15917 |
+
"step": 22680
|
15918 |
+
},
|
15919 |
+
{
|
15920 |
+
"epoch": 0.57,
|
15921 |
+
"grad_norm": 65.0,
|
15922 |
+
"learning_rate": 2.477966101694915e-07,
|
15923 |
+
"loss": 1.4717,
|
15924 |
+
"step": 22690
|
15925 |
+
},
|
15926 |
+
{
|
15927 |
+
"epoch": 0.57,
|
15928 |
+
"grad_norm": 63.75,
|
15929 |
+
"learning_rate": 2.4745762711864405e-07,
|
15930 |
+
"loss": 1.4701,
|
15931 |
+
"step": 22700
|
15932 |
+
},
|
15933 |
+
{
|
15934 |
+
"epoch": 0.57,
|
15935 |
+
"grad_norm": 66.0,
|
15936 |
+
"learning_rate": 2.471186440677966e-07,
|
15937 |
+
"loss": 1.4747,
|
15938 |
+
"step": 22710
|
15939 |
+
},
|
15940 |
+
{
|
15941 |
+
"epoch": 0.57,
|
15942 |
+
"grad_norm": 67.0,
|
15943 |
+
"learning_rate": 2.4677966101694916e-07,
|
15944 |
+
"loss": 1.4711,
|
15945 |
+
"step": 22720
|
15946 |
+
},
|
15947 |
+
{
|
15948 |
+
"epoch": 0.57,
|
15949 |
+
"grad_norm": 66.5,
|
15950 |
+
"learning_rate": 2.464406779661017e-07,
|
15951 |
+
"loss": 1.4452,
|
15952 |
+
"step": 22730
|
15953 |
+
},
|
15954 |
+
{
|
15955 |
+
"epoch": 0.57,
|
15956 |
+
"grad_norm": 67.5,
|
15957 |
+
"learning_rate": 2.461016949152542e-07,
|
15958 |
+
"loss": 1.4918,
|
15959 |
+
"step": 22740
|
15960 |
+
},
|
15961 |
+
{
|
15962 |
+
"epoch": 0.57,
|
15963 |
+
"grad_norm": 68.0,
|
15964 |
+
"learning_rate": 2.457627118644068e-07,
|
15965 |
+
"loss": 1.4777,
|
15966 |
+
"step": 22750
|
15967 |
+
},
|
15968 |
+
{
|
15969 |
+
"epoch": 0.57,
|
15970 |
+
"grad_norm": 65.0,
|
15971 |
+
"learning_rate": 2.454237288135593e-07,
|
15972 |
+
"loss": 1.4705,
|
15973 |
+
"step": 22760
|
15974 |
+
},
|
15975 |
+
{
|
15976 |
+
"epoch": 0.57,
|
15977 |
+
"grad_norm": 66.5,
|
15978 |
+
"learning_rate": 2.4508474576271185e-07,
|
15979 |
+
"loss": 1.4481,
|
15980 |
+
"step": 22770
|
15981 |
+
},
|
15982 |
+
{
|
15983 |
+
"epoch": 0.57,
|
15984 |
+
"grad_norm": 67.0,
|
15985 |
+
"learning_rate": 2.447457627118644e-07,
|
15986 |
+
"loss": 1.4624,
|
15987 |
+
"step": 22780
|
15988 |
+
},
|
15989 |
+
{
|
15990 |
+
"epoch": 0.57,
|
15991 |
+
"grad_norm": 65.5,
|
15992 |
+
"learning_rate": 2.4440677966101696e-07,
|
15993 |
+
"loss": 1.4995,
|
15994 |
+
"step": 22790
|
15995 |
+
},
|
15996 |
+
{
|
15997 |
+
"epoch": 0.57,
|
15998 |
+
"grad_norm": 67.0,
|
15999 |
+
"learning_rate": 2.440677966101695e-07,
|
16000 |
+
"loss": 1.4544,
|
16001 |
+
"step": 22800
|
16002 |
+
},
|
16003 |
+
{
|
16004 |
+
"epoch": 0.57,
|
16005 |
+
"grad_norm": 66.0,
|
16006 |
+
"learning_rate": 2.43728813559322e-07,
|
16007 |
+
"loss": 1.4868,
|
16008 |
+
"step": 22810
|
16009 |
+
},
|
16010 |
+
{
|
16011 |
+
"epoch": 0.57,
|
16012 |
+
"grad_norm": 65.5,
|
16013 |
+
"learning_rate": 2.4338983050847454e-07,
|
16014 |
+
"loss": 1.4181,
|
16015 |
+
"step": 22820
|
16016 |
+
},
|
16017 |
+
{
|
16018 |
+
"epoch": 0.57,
|
16019 |
+
"grad_norm": 66.0,
|
16020 |
+
"learning_rate": 2.430508474576271e-07,
|
16021 |
+
"loss": 1.4431,
|
16022 |
+
"step": 22830
|
16023 |
+
},
|
16024 |
+
{
|
16025 |
+
"epoch": 0.57,
|
16026 |
+
"grad_norm": 66.5,
|
16027 |
+
"learning_rate": 2.4271186440677965e-07,
|
16028 |
+
"loss": 1.4813,
|
16029 |
+
"step": 22840
|
16030 |
+
},
|
16031 |
+
{
|
16032 |
+
"epoch": 0.57,
|
16033 |
+
"grad_norm": 66.0,
|
16034 |
+
"learning_rate": 2.423728813559322e-07,
|
16035 |
+
"loss": 1.5211,
|
16036 |
+
"step": 22850
|
16037 |
+
},
|
16038 |
+
{
|
16039 |
+
"epoch": 0.57,
|
16040 |
+
"grad_norm": 65.0,
|
16041 |
+
"learning_rate": 2.420338983050847e-07,
|
16042 |
+
"loss": 1.4331,
|
16043 |
+
"step": 22860
|
16044 |
+
},
|
16045 |
+
{
|
16046 |
+
"epoch": 0.57,
|
16047 |
+
"grad_norm": 66.5,
|
16048 |
+
"learning_rate": 2.416949152542373e-07,
|
16049 |
+
"loss": 1.5019,
|
16050 |
+
"step": 22870
|
16051 |
+
},
|
16052 |
+
{
|
16053 |
+
"epoch": 0.57,
|
16054 |
+
"grad_norm": 68.0,
|
16055 |
+
"learning_rate": 2.413559322033898e-07,
|
16056 |
+
"loss": 1.4635,
|
16057 |
+
"step": 22880
|
16058 |
+
},
|
16059 |
+
{
|
16060 |
+
"epoch": 0.57,
|
16061 |
+
"grad_norm": 67.5,
|
16062 |
+
"learning_rate": 2.4101694915254234e-07,
|
16063 |
+
"loss": 1.4641,
|
16064 |
+
"step": 22890
|
16065 |
+
},
|
16066 |
+
{
|
16067 |
+
"epoch": 0.57,
|
16068 |
+
"grad_norm": 66.0,
|
16069 |
+
"learning_rate": 2.406779661016949e-07,
|
16070 |
+
"loss": 1.4351,
|
16071 |
+
"step": 22900
|
16072 |
+
},
|
16073 |
+
{
|
16074 |
+
"epoch": 0.57,
|
16075 |
+
"grad_norm": 71.0,
|
16076 |
+
"learning_rate": 2.4033898305084745e-07,
|
16077 |
+
"loss": 1.4785,
|
16078 |
+
"step": 22910
|
16079 |
+
},
|
16080 |
+
{
|
16081 |
+
"epoch": 0.57,
|
16082 |
+
"grad_norm": 66.5,
|
16083 |
+
"learning_rate": 2.4e-07,
|
16084 |
+
"loss": 1.4715,
|
16085 |
+
"step": 22920
|
16086 |
+
},
|
16087 |
+
{
|
16088 |
+
"epoch": 0.57,
|
16089 |
+
"grad_norm": 67.5,
|
16090 |
+
"learning_rate": 2.396610169491525e-07,
|
16091 |
+
"loss": 1.4631,
|
16092 |
+
"step": 22930
|
16093 |
+
},
|
16094 |
+
{
|
16095 |
+
"epoch": 0.57,
|
16096 |
+
"grad_norm": 66.5,
|
16097 |
+
"learning_rate": 2.393220338983051e-07,
|
16098 |
+
"loss": 1.5024,
|
16099 |
+
"step": 22940
|
16100 |
+
},
|
16101 |
+
{
|
16102 |
+
"epoch": 0.57,
|
16103 |
+
"grad_norm": 68.0,
|
16104 |
+
"learning_rate": 2.389830508474576e-07,
|
16105 |
+
"loss": 1.4602,
|
16106 |
+
"step": 22950
|
16107 |
+
},
|
16108 |
+
{
|
16109 |
+
"epoch": 0.57,
|
16110 |
+
"grad_norm": 66.5,
|
16111 |
+
"learning_rate": 2.386440677966102e-07,
|
16112 |
+
"loss": 1.3829,
|
16113 |
+
"step": 22960
|
16114 |
+
},
|
16115 |
+
{
|
16116 |
+
"epoch": 0.57,
|
16117 |
+
"grad_norm": 67.0,
|
16118 |
+
"learning_rate": 2.383050847457627e-07,
|
16119 |
+
"loss": 1.4193,
|
16120 |
+
"step": 22970
|
16121 |
+
},
|
16122 |
+
{
|
16123 |
+
"epoch": 0.57,
|
16124 |
+
"grad_norm": 70.5,
|
16125 |
+
"learning_rate": 2.3796610169491525e-07,
|
16126 |
+
"loss": 1.4878,
|
16127 |
+
"step": 22980
|
16128 |
+
},
|
16129 |
+
{
|
16130 |
+
"epoch": 0.57,
|
16131 |
+
"grad_norm": 65.5,
|
16132 |
+
"learning_rate": 2.3762711864406778e-07,
|
16133 |
+
"loss": 1.4852,
|
16134 |
+
"step": 22990
|
16135 |
+
},
|
16136 |
+
{
|
16137 |
+
"epoch": 0.57,
|
16138 |
+
"grad_norm": 70.0,
|
16139 |
+
"learning_rate": 2.3728813559322033e-07,
|
16140 |
+
"loss": 1.4789,
|
16141 |
+
"step": 23000
|
16142 |
+
},
|
16143 |
+
{
|
16144 |
+
"epoch": 0.58,
|
16145 |
+
"grad_norm": 71.0,
|
16146 |
+
"learning_rate": 2.3694915254237289e-07,
|
16147 |
+
"loss": 1.4596,
|
16148 |
+
"step": 23010
|
16149 |
+
},
|
16150 |
+
{
|
16151 |
+
"epoch": 0.58,
|
16152 |
+
"grad_norm": 64.5,
|
16153 |
+
"learning_rate": 2.3661016949152541e-07,
|
16154 |
+
"loss": 1.4706,
|
16155 |
+
"step": 23020
|
16156 |
+
},
|
16157 |
+
{
|
16158 |
+
"epoch": 0.58,
|
16159 |
+
"grad_norm": 68.5,
|
16160 |
+
"learning_rate": 2.3627118644067794e-07,
|
16161 |
+
"loss": 1.4422,
|
16162 |
+
"step": 23030
|
16163 |
+
},
|
16164 |
+
{
|
16165 |
+
"epoch": 0.58,
|
16166 |
+
"grad_norm": 65.0,
|
16167 |
+
"learning_rate": 2.359322033898305e-07,
|
16168 |
+
"loss": 1.441,
|
16169 |
+
"step": 23040
|
16170 |
+
},
|
16171 |
+
{
|
16172 |
+
"epoch": 0.58,
|
16173 |
+
"grad_norm": 67.5,
|
16174 |
+
"learning_rate": 2.3559322033898305e-07,
|
16175 |
+
"loss": 1.4376,
|
16176 |
+
"step": 23050
|
16177 |
+
},
|
16178 |
+
{
|
16179 |
+
"epoch": 0.58,
|
16180 |
+
"grad_norm": 64.5,
|
16181 |
+
"learning_rate": 2.352542372881356e-07,
|
16182 |
+
"loss": 1.4766,
|
16183 |
+
"step": 23060
|
16184 |
+
},
|
16185 |
+
{
|
16186 |
+
"epoch": 0.58,
|
16187 |
+
"grad_norm": 66.0,
|
16188 |
+
"learning_rate": 2.349152542372881e-07,
|
16189 |
+
"loss": 1.482,
|
16190 |
+
"step": 23070
|
16191 |
+
},
|
16192 |
+
{
|
16193 |
+
"epoch": 0.58,
|
16194 |
+
"grad_norm": 65.0,
|
16195 |
+
"learning_rate": 2.3457627118644066e-07,
|
16196 |
+
"loss": 1.4598,
|
16197 |
+
"step": 23080
|
16198 |
+
},
|
16199 |
+
{
|
16200 |
+
"epoch": 0.58,
|
16201 |
+
"grad_norm": 67.5,
|
16202 |
+
"learning_rate": 2.3423728813559321e-07,
|
16203 |
+
"loss": 1.5314,
|
16204 |
+
"step": 23090
|
16205 |
+
},
|
16206 |
+
{
|
16207 |
+
"epoch": 0.58,
|
16208 |
+
"grad_norm": 66.5,
|
16209 |
+
"learning_rate": 2.3389830508474577e-07,
|
16210 |
+
"loss": 1.4375,
|
16211 |
+
"step": 23100
|
16212 |
+
},
|
16213 |
+
{
|
16214 |
+
"epoch": 0.58,
|
16215 |
+
"grad_norm": 68.0,
|
16216 |
+
"learning_rate": 2.335593220338983e-07,
|
16217 |
+
"loss": 1.4928,
|
16218 |
+
"step": 23110
|
16219 |
+
},
|
16220 |
+
{
|
16221 |
+
"epoch": 0.58,
|
16222 |
+
"grad_norm": 67.5,
|
16223 |
+
"learning_rate": 2.3322033898305082e-07,
|
16224 |
+
"loss": 1.4823,
|
16225 |
+
"step": 23120
|
16226 |
+
},
|
16227 |
+
{
|
16228 |
+
"epoch": 0.58,
|
16229 |
+
"grad_norm": 70.0,
|
16230 |
+
"learning_rate": 2.3288135593220338e-07,
|
16231 |
+
"loss": 1.5182,
|
16232 |
+
"step": 23130
|
16233 |
+
},
|
16234 |
+
{
|
16235 |
+
"epoch": 0.58,
|
16236 |
+
"grad_norm": 68.0,
|
16237 |
+
"learning_rate": 2.3254237288135593e-07,
|
16238 |
+
"loss": 1.5249,
|
16239 |
+
"step": 23140
|
16240 |
+
},
|
16241 |
+
{
|
16242 |
+
"epoch": 0.58,
|
16243 |
+
"grad_norm": 67.0,
|
16244 |
+
"learning_rate": 2.3220338983050846e-07,
|
16245 |
+
"loss": 1.4414,
|
16246 |
+
"step": 23150
|
16247 |
+
},
|
16248 |
+
{
|
16249 |
+
"epoch": 0.58,
|
16250 |
+
"grad_norm": 67.0,
|
16251 |
+
"learning_rate": 2.3186440677966101e-07,
|
16252 |
+
"loss": 1.4908,
|
16253 |
+
"step": 23160
|
16254 |
+
},
|
16255 |
+
{
|
16256 |
+
"epoch": 0.58,
|
16257 |
+
"grad_norm": 66.0,
|
16258 |
+
"learning_rate": 2.3152542372881357e-07,
|
16259 |
+
"loss": 1.4741,
|
16260 |
+
"step": 23170
|
16261 |
+
},
|
16262 |
+
{
|
16263 |
+
"epoch": 0.58,
|
16264 |
+
"grad_norm": 64.5,
|
16265 |
+
"learning_rate": 2.311864406779661e-07,
|
16266 |
+
"loss": 1.4474,
|
16267 |
+
"step": 23180
|
16268 |
+
},
|
16269 |
+
{
|
16270 |
+
"epoch": 0.58,
|
16271 |
+
"grad_norm": 65.5,
|
16272 |
+
"learning_rate": 2.3084745762711862e-07,
|
16273 |
+
"loss": 1.498,
|
16274 |
+
"step": 23190
|
16275 |
+
},
|
16276 |
+
{
|
16277 |
+
"epoch": 0.58,
|
16278 |
+
"grad_norm": 66.5,
|
16279 |
+
"learning_rate": 2.3050847457627118e-07,
|
16280 |
+
"loss": 1.4339,
|
16281 |
+
"step": 23200
|
16282 |
+
},
|
16283 |
+
{
|
16284 |
+
"epoch": 0.58,
|
16285 |
+
"grad_norm": 67.5,
|
16286 |
+
"learning_rate": 2.3016949152542373e-07,
|
16287 |
+
"loss": 1.5245,
|
16288 |
+
"step": 23210
|
16289 |
+
},
|
16290 |
+
{
|
16291 |
+
"epoch": 0.58,
|
16292 |
+
"grad_norm": 69.0,
|
16293 |
+
"learning_rate": 2.2983050847457629e-07,
|
16294 |
+
"loss": 1.5048,
|
16295 |
+
"step": 23220
|
16296 |
+
},
|
16297 |
+
{
|
16298 |
+
"epoch": 0.58,
|
16299 |
+
"grad_norm": 65.0,
|
16300 |
+
"learning_rate": 2.2949152542372879e-07,
|
16301 |
+
"loss": 1.47,
|
16302 |
+
"step": 23230
|
16303 |
+
},
|
16304 |
+
{
|
16305 |
+
"epoch": 0.58,
|
16306 |
+
"grad_norm": 65.0,
|
16307 |
+
"learning_rate": 2.2915254237288134e-07,
|
16308 |
+
"loss": 1.4383,
|
16309 |
+
"step": 23240
|
16310 |
+
},
|
16311 |
+
{
|
16312 |
+
"epoch": 0.58,
|
16313 |
+
"grad_norm": 69.0,
|
16314 |
+
"learning_rate": 2.288135593220339e-07,
|
16315 |
+
"loss": 1.5034,
|
16316 |
+
"step": 23250
|
16317 |
+
},
|
16318 |
+
{
|
16319 |
+
"epoch": 0.58,
|
16320 |
+
"grad_norm": 63.0,
|
16321 |
+
"learning_rate": 2.2847457627118645e-07,
|
16322 |
+
"loss": 1.4556,
|
16323 |
+
"step": 23260
|
16324 |
+
},
|
16325 |
+
{
|
16326 |
+
"epoch": 0.58,
|
16327 |
+
"grad_norm": 66.5,
|
16328 |
+
"learning_rate": 2.2813559322033898e-07,
|
16329 |
+
"loss": 1.4525,
|
16330 |
+
"step": 23270
|
16331 |
+
},
|
16332 |
+
{
|
16333 |
+
"epoch": 0.58,
|
16334 |
+
"grad_norm": 65.5,
|
16335 |
+
"learning_rate": 2.277966101694915e-07,
|
16336 |
+
"loss": 1.4466,
|
16337 |
+
"step": 23280
|
16338 |
+
},
|
16339 |
+
{
|
16340 |
+
"epoch": 0.58,
|
16341 |
+
"grad_norm": 70.0,
|
16342 |
+
"learning_rate": 2.2745762711864406e-07,
|
16343 |
+
"loss": 1.3894,
|
16344 |
+
"step": 23290
|
16345 |
+
},
|
16346 |
+
{
|
16347 |
+
"epoch": 0.58,
|
16348 |
+
"grad_norm": 62.5,
|
16349 |
+
"learning_rate": 2.271186440677966e-07,
|
16350 |
+
"loss": 1.356,
|
16351 |
+
"step": 23300
|
16352 |
+
},
|
16353 |
+
{
|
16354 |
+
"epoch": 0.58,
|
16355 |
+
"grad_norm": 67.5,
|
16356 |
+
"learning_rate": 2.2677966101694914e-07,
|
16357 |
+
"loss": 1.5171,
|
16358 |
+
"step": 23310
|
16359 |
+
},
|
16360 |
+
{
|
16361 |
+
"epoch": 0.58,
|
16362 |
+
"grad_norm": 66.5,
|
16363 |
+
"learning_rate": 2.264406779661017e-07,
|
16364 |
+
"loss": 1.4655,
|
16365 |
+
"step": 23320
|
16366 |
+
},
|
16367 |
+
{
|
16368 |
+
"epoch": 0.58,
|
16369 |
+
"grad_norm": 67.5,
|
16370 |
+
"learning_rate": 2.2610169491525422e-07,
|
16371 |
+
"loss": 1.4562,
|
16372 |
+
"step": 23330
|
16373 |
+
},
|
16374 |
+
{
|
16375 |
+
"epoch": 0.58,
|
16376 |
+
"grad_norm": 71.0,
|
16377 |
+
"learning_rate": 2.2576271186440678e-07,
|
16378 |
+
"loss": 1.4748,
|
16379 |
+
"step": 23340
|
16380 |
+
},
|
16381 |
+
{
|
16382 |
+
"epoch": 0.58,
|
16383 |
+
"grad_norm": 65.0,
|
16384 |
+
"learning_rate": 2.254237288135593e-07,
|
16385 |
+
"loss": 1.5118,
|
16386 |
+
"step": 23350
|
16387 |
+
},
|
16388 |
+
{
|
16389 |
+
"epoch": 0.58,
|
16390 |
+
"grad_norm": 73.0,
|
16391 |
+
"learning_rate": 2.2508474576271186e-07,
|
16392 |
+
"loss": 1.4944,
|
16393 |
+
"step": 23360
|
16394 |
+
},
|
16395 |
+
{
|
16396 |
+
"epoch": 0.58,
|
16397 |
+
"grad_norm": 68.5,
|
16398 |
+
"learning_rate": 2.247457627118644e-07,
|
16399 |
+
"loss": 1.4614,
|
16400 |
+
"step": 23370
|
16401 |
+
},
|
16402 |
+
{
|
16403 |
+
"epoch": 0.58,
|
16404 |
+
"grad_norm": 68.0,
|
16405 |
+
"learning_rate": 2.2440677966101691e-07,
|
16406 |
+
"loss": 1.4759,
|
16407 |
+
"step": 23380
|
16408 |
+
},
|
16409 |
+
{
|
16410 |
+
"epoch": 0.58,
|
16411 |
+
"grad_norm": 73.0,
|
16412 |
+
"learning_rate": 2.2406779661016947e-07,
|
16413 |
+
"loss": 1.4913,
|
16414 |
+
"step": 23390
|
16415 |
+
},
|
16416 |
+
{
|
16417 |
+
"epoch": 0.58,
|
16418 |
+
"grad_norm": 63.5,
|
16419 |
+
"learning_rate": 2.2372881355932202e-07,
|
16420 |
+
"loss": 1.4718,
|
16421 |
+
"step": 23400
|
16422 |
+
},
|
16423 |
+
{
|
16424 |
+
"epoch": 0.59,
|
16425 |
+
"grad_norm": 67.5,
|
16426 |
+
"learning_rate": 2.2338983050847458e-07,
|
16427 |
+
"loss": 1.4678,
|
16428 |
+
"step": 23410
|
16429 |
+
},
|
16430 |
+
{
|
16431 |
+
"epoch": 0.59,
|
16432 |
+
"grad_norm": 71.0,
|
16433 |
+
"learning_rate": 2.2305084745762713e-07,
|
16434 |
+
"loss": 1.49,
|
16435 |
+
"step": 23420
|
16436 |
+
},
|
16437 |
+
{
|
16438 |
+
"epoch": 0.59,
|
16439 |
+
"grad_norm": 72.0,
|
16440 |
+
"learning_rate": 2.2271186440677966e-07,
|
16441 |
+
"loss": 1.4582,
|
16442 |
+
"step": 23430
|
16443 |
+
},
|
16444 |
+
{
|
16445 |
+
"epoch": 0.59,
|
16446 |
+
"grad_norm": 65.5,
|
16447 |
+
"learning_rate": 2.2237288135593219e-07,
|
16448 |
+
"loss": 1.4385,
|
16449 |
+
"step": 23440
|
16450 |
+
},
|
16451 |
+
{
|
16452 |
+
"epoch": 0.59,
|
16453 |
+
"grad_norm": 67.0,
|
16454 |
+
"learning_rate": 2.2203389830508474e-07,
|
16455 |
+
"loss": 1.5076,
|
16456 |
+
"step": 23450
|
16457 |
+
},
|
16458 |
+
{
|
16459 |
+
"epoch": 0.59,
|
16460 |
+
"grad_norm": 66.5,
|
16461 |
+
"learning_rate": 2.216949152542373e-07,
|
16462 |
+
"loss": 1.4824,
|
16463 |
+
"step": 23460
|
16464 |
+
},
|
16465 |
+
{
|
16466 |
+
"epoch": 0.59,
|
16467 |
+
"grad_norm": 68.0,
|
16468 |
+
"learning_rate": 2.2135593220338982e-07,
|
16469 |
+
"loss": 1.4799,
|
16470 |
+
"step": 23470
|
16471 |
+
},
|
16472 |
+
{
|
16473 |
+
"epoch": 0.59,
|
16474 |
+
"grad_norm": 67.0,
|
16475 |
+
"learning_rate": 2.2101694915254238e-07,
|
16476 |
+
"loss": 1.4648,
|
16477 |
+
"step": 23480
|
16478 |
+
},
|
16479 |
+
{
|
16480 |
+
"epoch": 0.59,
|
16481 |
+
"grad_norm": 67.0,
|
16482 |
+
"learning_rate": 2.206779661016949e-07,
|
16483 |
+
"loss": 1.4858,
|
16484 |
+
"step": 23490
|
16485 |
+
},
|
16486 |
+
{
|
16487 |
+
"epoch": 0.59,
|
16488 |
+
"grad_norm": 68.0,
|
16489 |
+
"learning_rate": 2.2033898305084743e-07,
|
16490 |
+
"loss": 1.5103,
|
16491 |
+
"step": 23500
|
16492 |
+
},
|
16493 |
+
{
|
16494 |
+
"epoch": 0.59,
|
16495 |
+
"grad_norm": 67.5,
|
16496 |
+
"learning_rate": 2.1999999999999998e-07,
|
16497 |
+
"loss": 1.4786,
|
16498 |
+
"step": 23510
|
16499 |
+
},
|
16500 |
+
{
|
16501 |
+
"epoch": 0.59,
|
16502 |
+
"grad_norm": 66.5,
|
16503 |
+
"learning_rate": 2.1966101694915254e-07,
|
16504 |
+
"loss": 1.3982,
|
16505 |
+
"step": 23520
|
16506 |
+
},
|
16507 |
+
{
|
16508 |
+
"epoch": 0.59,
|
16509 |
+
"grad_norm": 67.0,
|
16510 |
+
"learning_rate": 2.193220338983051e-07,
|
16511 |
+
"loss": 1.4859,
|
16512 |
+
"step": 23530
|
16513 |
+
},
|
16514 |
+
{
|
16515 |
+
"epoch": 0.59,
|
16516 |
+
"grad_norm": 64.5,
|
16517 |
+
"learning_rate": 2.189830508474576e-07,
|
16518 |
+
"loss": 1.4449,
|
16519 |
+
"step": 23540
|
16520 |
+
},
|
16521 |
+
{
|
16522 |
+
"epoch": 0.59,
|
16523 |
+
"grad_norm": 66.0,
|
16524 |
+
"learning_rate": 2.1864406779661015e-07,
|
16525 |
+
"loss": 1.425,
|
16526 |
+
"step": 23550
|
16527 |
+
},
|
16528 |
+
{
|
16529 |
+
"epoch": 0.59,
|
16530 |
+
"grad_norm": 67.0,
|
16531 |
+
"learning_rate": 2.183050847457627e-07,
|
16532 |
+
"loss": 1.4306,
|
16533 |
+
"step": 23560
|
16534 |
+
},
|
16535 |
+
{
|
16536 |
+
"epoch": 0.59,
|
16537 |
+
"grad_norm": 66.0,
|
16538 |
+
"learning_rate": 2.1796610169491526e-07,
|
16539 |
+
"loss": 1.3738,
|
16540 |
+
"step": 23570
|
16541 |
+
},
|
16542 |
+
{
|
16543 |
+
"epoch": 0.59,
|
16544 |
+
"grad_norm": 64.0,
|
16545 |
+
"learning_rate": 2.1762711864406778e-07,
|
16546 |
+
"loss": 1.4323,
|
16547 |
+
"step": 23580
|
16548 |
+
},
|
16549 |
+
{
|
16550 |
+
"epoch": 0.59,
|
16551 |
+
"grad_norm": 69.0,
|
16552 |
+
"learning_rate": 2.172881355932203e-07,
|
16553 |
+
"loss": 1.4396,
|
16554 |
+
"step": 23590
|
16555 |
+
},
|
16556 |
+
{
|
16557 |
+
"epoch": 0.59,
|
16558 |
+
"grad_norm": 68.0,
|
16559 |
+
"learning_rate": 2.1694915254237287e-07,
|
16560 |
+
"loss": 1.4858,
|
16561 |
+
"step": 23600
|
16562 |
+
},
|
16563 |
+
{
|
16564 |
+
"epoch": 0.59,
|
16565 |
+
"grad_norm": 66.5,
|
16566 |
+
"learning_rate": 2.1661016949152542e-07,
|
16567 |
+
"loss": 1.5004,
|
16568 |
+
"step": 23610
|
16569 |
+
},
|
16570 |
+
{
|
16571 |
+
"epoch": 0.59,
|
16572 |
+
"grad_norm": 70.0,
|
16573 |
+
"learning_rate": 2.1627118644067795e-07,
|
16574 |
+
"loss": 1.5145,
|
16575 |
+
"step": 23620
|
16576 |
+
},
|
16577 |
+
{
|
16578 |
+
"epoch": 0.59,
|
16579 |
+
"grad_norm": 65.0,
|
16580 |
+
"learning_rate": 2.159322033898305e-07,
|
16581 |
+
"loss": 1.4797,
|
16582 |
+
"step": 23630
|
16583 |
+
},
|
16584 |
+
{
|
16585 |
+
"epoch": 0.59,
|
16586 |
+
"grad_norm": 66.5,
|
16587 |
+
"learning_rate": 2.1559322033898303e-07,
|
16588 |
+
"loss": 1.4935,
|
16589 |
+
"step": 23640
|
16590 |
+
},
|
16591 |
+
{
|
16592 |
+
"epoch": 0.59,
|
16593 |
+
"grad_norm": 68.5,
|
16594 |
+
"learning_rate": 2.1525423728813558e-07,
|
16595 |
+
"loss": 1.4311,
|
16596 |
+
"step": 23650
|
16597 |
+
},
|
16598 |
+
{
|
16599 |
+
"epoch": 0.59,
|
16600 |
+
"grad_norm": 66.0,
|
16601 |
+
"learning_rate": 2.149152542372881e-07,
|
16602 |
+
"loss": 1.5181,
|
16603 |
+
"step": 23660
|
16604 |
+
},
|
16605 |
+
{
|
16606 |
+
"epoch": 0.59,
|
16607 |
+
"grad_norm": 74.0,
|
16608 |
+
"learning_rate": 2.1457627118644067e-07,
|
16609 |
+
"loss": 1.4418,
|
16610 |
+
"step": 23670
|
16611 |
+
},
|
16612 |
+
{
|
16613 |
+
"epoch": 0.59,
|
16614 |
+
"grad_norm": 66.5,
|
16615 |
+
"learning_rate": 2.1423728813559322e-07,
|
16616 |
+
"loss": 1.4931,
|
16617 |
+
"step": 23680
|
16618 |
+
},
|
16619 |
+
{
|
16620 |
+
"epoch": 0.59,
|
16621 |
+
"grad_norm": 67.5,
|
16622 |
+
"learning_rate": 2.1389830508474575e-07,
|
16623 |
+
"loss": 1.4365,
|
16624 |
+
"step": 23690
|
16625 |
+
},
|
16626 |
+
{
|
16627 |
+
"epoch": 0.59,
|
16628 |
+
"grad_norm": 70.0,
|
16629 |
+
"learning_rate": 2.1355932203389828e-07,
|
16630 |
+
"loss": 1.4576,
|
16631 |
+
"step": 23700
|
16632 |
+
},
|
16633 |
+
{
|
16634 |
+
"epoch": 0.59,
|
16635 |
+
"grad_norm": 66.5,
|
16636 |
+
"learning_rate": 2.1322033898305083e-07,
|
16637 |
+
"loss": 1.4804,
|
16638 |
+
"step": 23710
|
16639 |
+
},
|
16640 |
+
{
|
16641 |
+
"epoch": 0.59,
|
16642 |
+
"grad_norm": 65.5,
|
16643 |
+
"learning_rate": 2.1288135593220338e-07,
|
16644 |
+
"loss": 1.4659,
|
16645 |
+
"step": 23720
|
16646 |
+
},
|
16647 |
+
{
|
16648 |
+
"epoch": 0.59,
|
16649 |
+
"grad_norm": 66.0,
|
16650 |
+
"learning_rate": 2.1254237288135594e-07,
|
16651 |
+
"loss": 1.4558,
|
16652 |
+
"step": 23730
|
16653 |
+
},
|
16654 |
+
{
|
16655 |
+
"epoch": 0.59,
|
16656 |
+
"grad_norm": 63.75,
|
16657 |
+
"learning_rate": 2.1220338983050847e-07,
|
16658 |
+
"loss": 1.4657,
|
16659 |
+
"step": 23740
|
16660 |
+
},
|
16661 |
+
{
|
16662 |
+
"epoch": 0.59,
|
16663 |
+
"grad_norm": 71.5,
|
16664 |
+
"learning_rate": 2.11864406779661e-07,
|
16665 |
+
"loss": 1.4803,
|
16666 |
+
"step": 23750
|
16667 |
+
},
|
16668 |
+
{
|
16669 |
+
"epoch": 0.59,
|
16670 |
+
"grad_norm": 66.5,
|
16671 |
+
"learning_rate": 2.1152542372881355e-07,
|
16672 |
+
"loss": 1.4303,
|
16673 |
+
"step": 23760
|
16674 |
+
},
|
16675 |
+
{
|
16676 |
+
"epoch": 0.59,
|
16677 |
+
"grad_norm": 66.0,
|
16678 |
+
"learning_rate": 2.111864406779661e-07,
|
16679 |
+
"loss": 1.4866,
|
16680 |
+
"step": 23770
|
16681 |
+
},
|
16682 |
+
{
|
16683 |
+
"epoch": 0.59,
|
16684 |
+
"grad_norm": 63.5,
|
16685 |
+
"learning_rate": 2.1084745762711863e-07,
|
16686 |
+
"loss": 1.4306,
|
16687 |
+
"step": 23780
|
16688 |
+
},
|
16689 |
+
{
|
16690 |
+
"epoch": 0.59,
|
16691 |
+
"grad_norm": 67.5,
|
16692 |
+
"learning_rate": 2.1050847457627118e-07,
|
16693 |
+
"loss": 1.4288,
|
16694 |
+
"step": 23790
|
16695 |
+
},
|
16696 |
+
{
|
16697 |
+
"epoch": 0.59,
|
16698 |
+
"grad_norm": 67.0,
|
16699 |
+
"learning_rate": 2.101694915254237e-07,
|
16700 |
+
"loss": 1.4637,
|
16701 |
+
"step": 23800
|
16702 |
+
},
|
16703 |
+
{
|
16704 |
+
"epoch": 0.6,
|
16705 |
+
"grad_norm": 67.5,
|
16706 |
+
"learning_rate": 2.0983050847457626e-07,
|
16707 |
+
"loss": 1.515,
|
16708 |
+
"step": 23810
|
16709 |
+
},
|
16710 |
+
{
|
16711 |
+
"epoch": 0.6,
|
16712 |
+
"grad_norm": 70.0,
|
16713 |
+
"learning_rate": 2.094915254237288e-07,
|
16714 |
+
"loss": 1.4255,
|
16715 |
+
"step": 23820
|
16716 |
+
},
|
16717 |
+
{
|
16718 |
+
"epoch": 0.6,
|
16719 |
+
"grad_norm": 67.5,
|
16720 |
+
"learning_rate": 2.0915254237288135e-07,
|
16721 |
+
"loss": 1.4935,
|
16722 |
+
"step": 23830
|
16723 |
+
},
|
16724 |
+
{
|
16725 |
+
"epoch": 0.6,
|
16726 |
+
"grad_norm": 68.5,
|
16727 |
+
"learning_rate": 2.088135593220339e-07,
|
16728 |
+
"loss": 1.5427,
|
16729 |
+
"step": 23840
|
16730 |
+
},
|
16731 |
+
{
|
16732 |
+
"epoch": 0.6,
|
16733 |
+
"grad_norm": 66.5,
|
16734 |
+
"learning_rate": 2.0847457627118643e-07,
|
16735 |
+
"loss": 1.5009,
|
16736 |
+
"step": 23850
|
16737 |
+
},
|
16738 |
+
{
|
16739 |
+
"epoch": 0.6,
|
16740 |
+
"grad_norm": 69.5,
|
16741 |
+
"learning_rate": 2.0813559322033896e-07,
|
16742 |
+
"loss": 1.5038,
|
16743 |
+
"step": 23860
|
16744 |
+
},
|
16745 |
+
{
|
16746 |
+
"epoch": 0.6,
|
16747 |
+
"grad_norm": 67.0,
|
16748 |
+
"learning_rate": 2.077966101694915e-07,
|
16749 |
+
"loss": 1.4524,
|
16750 |
+
"step": 23870
|
16751 |
+
},
|
16752 |
+
{
|
16753 |
+
"epoch": 0.6,
|
16754 |
+
"grad_norm": 69.0,
|
16755 |
+
"learning_rate": 2.0745762711864406e-07,
|
16756 |
+
"loss": 1.4575,
|
16757 |
+
"step": 23880
|
16758 |
+
},
|
16759 |
+
{
|
16760 |
+
"epoch": 0.6,
|
16761 |
+
"grad_norm": 67.0,
|
16762 |
+
"learning_rate": 2.0711864406779662e-07,
|
16763 |
+
"loss": 1.4309,
|
16764 |
+
"step": 23890
|
16765 |
+
},
|
16766 |
+
{
|
16767 |
+
"epoch": 0.6,
|
16768 |
+
"grad_norm": 70.0,
|
16769 |
+
"learning_rate": 2.0677966101694912e-07,
|
16770 |
+
"loss": 1.4585,
|
16771 |
+
"step": 23900
|
16772 |
+
},
|
16773 |
+
{
|
16774 |
+
"epoch": 0.6,
|
16775 |
+
"grad_norm": 64.0,
|
16776 |
+
"learning_rate": 2.0644067796610167e-07,
|
16777 |
+
"loss": 1.4503,
|
16778 |
+
"step": 23910
|
16779 |
+
},
|
16780 |
+
{
|
16781 |
+
"epoch": 0.6,
|
16782 |
+
"grad_norm": 71.0,
|
16783 |
+
"learning_rate": 2.0610169491525423e-07,
|
16784 |
+
"loss": 1.4801,
|
16785 |
+
"step": 23920
|
16786 |
+
},
|
16787 |
+
{
|
16788 |
+
"epoch": 0.6,
|
16789 |
+
"grad_norm": 67.5,
|
16790 |
+
"learning_rate": 2.0576271186440678e-07,
|
16791 |
+
"loss": 1.4773,
|
16792 |
+
"step": 23930
|
16793 |
+
},
|
16794 |
+
{
|
16795 |
+
"epoch": 0.6,
|
16796 |
+
"grad_norm": 66.0,
|
16797 |
+
"learning_rate": 2.054237288135593e-07,
|
16798 |
+
"loss": 1.4497,
|
16799 |
+
"step": 23940
|
16800 |
+
},
|
16801 |
+
{
|
16802 |
+
"epoch": 0.6,
|
16803 |
+
"grad_norm": 66.0,
|
16804 |
+
"learning_rate": 2.0508474576271184e-07,
|
16805 |
+
"loss": 1.4953,
|
16806 |
+
"step": 23950
|
16807 |
+
},
|
16808 |
+
{
|
16809 |
+
"epoch": 0.6,
|
16810 |
+
"grad_norm": 65.0,
|
16811 |
+
"learning_rate": 2.047457627118644e-07,
|
16812 |
+
"loss": 1.5234,
|
16813 |
+
"step": 23960
|
16814 |
+
},
|
16815 |
+
{
|
16816 |
+
"epoch": 0.6,
|
16817 |
+
"grad_norm": 67.0,
|
16818 |
+
"learning_rate": 2.0440677966101695e-07,
|
16819 |
+
"loss": 1.4482,
|
16820 |
+
"step": 23970
|
16821 |
+
},
|
16822 |
+
{
|
16823 |
+
"epoch": 0.6,
|
16824 |
+
"grad_norm": 68.0,
|
16825 |
+
"learning_rate": 2.0406779661016947e-07,
|
16826 |
+
"loss": 1.4598,
|
16827 |
+
"step": 23980
|
16828 |
+
},
|
16829 |
+
{
|
16830 |
+
"epoch": 0.6,
|
16831 |
+
"grad_norm": 67.0,
|
16832 |
+
"learning_rate": 2.0372881355932203e-07,
|
16833 |
+
"loss": 1.4573,
|
16834 |
+
"step": 23990
|
16835 |
+
},
|
16836 |
+
{
|
16837 |
+
"epoch": 0.6,
|
16838 |
+
"grad_norm": 67.0,
|
16839 |
+
"learning_rate": 2.0338983050847458e-07,
|
16840 |
+
"loss": 1.4849,
|
16841 |
+
"step": 24000
|
16842 |
+
},
|
16843 |
+
{
|
16844 |
+
"epoch": 0.6,
|
16845 |
+
"grad_norm": 68.0,
|
16846 |
+
"learning_rate": 2.030508474576271e-07,
|
16847 |
+
"loss": 1.4329,
|
16848 |
+
"step": 24010
|
16849 |
+
},
|
16850 |
+
{
|
16851 |
+
"epoch": 0.6,
|
16852 |
+
"grad_norm": 67.0,
|
16853 |
+
"learning_rate": 2.0271186440677964e-07,
|
16854 |
+
"loss": 1.481,
|
16855 |
+
"step": 24020
|
16856 |
+
},
|
16857 |
+
{
|
16858 |
+
"epoch": 0.6,
|
16859 |
+
"grad_norm": 65.0,
|
16860 |
+
"learning_rate": 2.023728813559322e-07,
|
16861 |
+
"loss": 1.4037,
|
16862 |
+
"step": 24030
|
16863 |
+
},
|
16864 |
+
{
|
16865 |
+
"epoch": 0.6,
|
16866 |
+
"grad_norm": 68.5,
|
16867 |
+
"learning_rate": 2.0203389830508475e-07,
|
16868 |
+
"loss": 1.4665,
|
16869 |
+
"step": 24040
|
16870 |
+
},
|
16871 |
+
{
|
16872 |
+
"epoch": 0.6,
|
16873 |
+
"grad_norm": 64.5,
|
16874 |
+
"learning_rate": 2.016949152542373e-07,
|
16875 |
+
"loss": 1.4291,
|
16876 |
+
"step": 24050
|
16877 |
+
},
|
16878 |
+
{
|
16879 |
+
"epoch": 0.6,
|
16880 |
+
"grad_norm": 68.5,
|
16881 |
+
"learning_rate": 2.013559322033898e-07,
|
16882 |
+
"loss": 1.4684,
|
16883 |
+
"step": 24060
|
16884 |
+
},
|
16885 |
+
{
|
16886 |
+
"epoch": 0.6,
|
16887 |
+
"grad_norm": 69.5,
|
16888 |
+
"learning_rate": 2.0101694915254235e-07,
|
16889 |
+
"loss": 1.4836,
|
16890 |
+
"step": 24070
|
16891 |
+
},
|
16892 |
+
{
|
16893 |
+
"epoch": 0.6,
|
16894 |
+
"grad_norm": 65.5,
|
16895 |
+
"learning_rate": 2.006779661016949e-07,
|
16896 |
+
"loss": 1.4914,
|
16897 |
+
"step": 24080
|
16898 |
+
},
|
16899 |
+
{
|
16900 |
+
"epoch": 0.6,
|
16901 |
+
"grad_norm": 64.0,
|
16902 |
+
"learning_rate": 2.0033898305084746e-07,
|
16903 |
+
"loss": 1.4268,
|
16904 |
+
"step": 24090
|
16905 |
+
},
|
16906 |
+
{
|
16907 |
+
"epoch": 0.6,
|
16908 |
+
"grad_norm": 66.5,
|
16909 |
+
"learning_rate": 2e-07,
|
16910 |
+
"loss": 1.5189,
|
16911 |
+
"step": 24100
|
16912 |
+
},
|
16913 |
+
{
|
16914 |
+
"epoch": 0.6,
|
16915 |
+
"grad_norm": 69.5,
|
16916 |
+
"learning_rate": 1.9966101694915252e-07,
|
16917 |
+
"loss": 1.4294,
|
16918 |
+
"step": 24110
|
16919 |
+
},
|
16920 |
+
{
|
16921 |
+
"epoch": 0.6,
|
16922 |
+
"grad_norm": 66.0,
|
16923 |
+
"learning_rate": 1.9932203389830507e-07,
|
16924 |
+
"loss": 1.4163,
|
16925 |
+
"step": 24120
|
16926 |
+
},
|
16927 |
+
{
|
16928 |
+
"epoch": 0.6,
|
16929 |
+
"grad_norm": 66.0,
|
16930 |
+
"learning_rate": 1.9898305084745763e-07,
|
16931 |
+
"loss": 1.4475,
|
16932 |
+
"step": 24130
|
16933 |
+
},
|
16934 |
+
{
|
16935 |
+
"epoch": 0.6,
|
16936 |
+
"grad_norm": 67.0,
|
16937 |
+
"learning_rate": 1.9864406779661015e-07,
|
16938 |
+
"loss": 1.4687,
|
16939 |
+
"step": 24140
|
16940 |
+
},
|
16941 |
+
{
|
16942 |
+
"epoch": 0.6,
|
16943 |
+
"grad_norm": 65.0,
|
16944 |
+
"learning_rate": 1.983050847457627e-07,
|
16945 |
+
"loss": 1.4612,
|
16946 |
+
"step": 24150
|
16947 |
+
},
|
16948 |
+
{
|
16949 |
+
"epoch": 0.6,
|
16950 |
+
"grad_norm": 66.5,
|
16951 |
+
"learning_rate": 1.9796610169491524e-07,
|
16952 |
+
"loss": 1.4849,
|
16953 |
+
"step": 24160
|
16954 |
+
},
|
16955 |
+
{
|
16956 |
+
"epoch": 0.6,
|
16957 |
+
"grad_norm": 65.0,
|
16958 |
+
"learning_rate": 1.976271186440678e-07,
|
16959 |
+
"loss": 1.461,
|
16960 |
+
"step": 24170
|
16961 |
+
},
|
16962 |
+
{
|
16963 |
+
"epoch": 0.6,
|
16964 |
+
"grad_norm": 66.0,
|
16965 |
+
"learning_rate": 1.9728813559322032e-07,
|
16966 |
+
"loss": 1.4663,
|
16967 |
+
"step": 24180
|
16968 |
+
},
|
16969 |
+
{
|
16970 |
+
"epoch": 0.6,
|
16971 |
+
"grad_norm": 68.5,
|
16972 |
+
"learning_rate": 1.9694915254237287e-07,
|
16973 |
+
"loss": 1.4987,
|
16974 |
+
"step": 24190
|
16975 |
+
},
|
16976 |
+
{
|
16977 |
+
"epoch": 0.6,
|
16978 |
+
"grad_norm": 74.0,
|
16979 |
+
"learning_rate": 1.9661016949152543e-07,
|
16980 |
+
"loss": 1.47,
|
16981 |
+
"step": 24200
|
16982 |
+
},
|
16983 |
+
{
|
16984 |
+
"epoch": 0.61,
|
16985 |
+
"grad_norm": 66.5,
|
16986 |
+
"learning_rate": 1.9627118644067795e-07,
|
16987 |
+
"loss": 1.4539,
|
16988 |
+
"step": 24210
|
16989 |
+
},
|
16990 |
+
{
|
16991 |
+
"epoch": 0.61,
|
16992 |
+
"grad_norm": 68.5,
|
16993 |
+
"learning_rate": 1.9593220338983048e-07,
|
16994 |
+
"loss": 1.4931,
|
16995 |
+
"step": 24220
|
16996 |
+
},
|
16997 |
+
{
|
16998 |
+
"epoch": 0.61,
|
16999 |
+
"grad_norm": 70.0,
|
17000 |
+
"learning_rate": 1.9559322033898304e-07,
|
17001 |
+
"loss": 1.507,
|
17002 |
+
"step": 24230
|
17003 |
+
},
|
17004 |
+
{
|
17005 |
+
"epoch": 0.61,
|
17006 |
+
"grad_norm": 69.0,
|
17007 |
+
"learning_rate": 1.952542372881356e-07,
|
17008 |
+
"loss": 1.4408,
|
17009 |
+
"step": 24240
|
17010 |
+
},
|
17011 |
+
{
|
17012 |
+
"epoch": 0.61,
|
17013 |
+
"grad_norm": 71.0,
|
17014 |
+
"learning_rate": 1.9491525423728814e-07,
|
17015 |
+
"loss": 1.4734,
|
17016 |
+
"step": 24250
|
17017 |
+
},
|
17018 |
+
{
|
17019 |
+
"epoch": 0.61,
|
17020 |
+
"grad_norm": 64.0,
|
17021 |
+
"learning_rate": 1.9457627118644067e-07,
|
17022 |
+
"loss": 1.4702,
|
17023 |
+
"step": 24260
|
17024 |
+
},
|
17025 |
+
{
|
17026 |
+
"epoch": 0.61,
|
17027 |
+
"grad_norm": 68.0,
|
17028 |
+
"learning_rate": 1.942372881355932e-07,
|
17029 |
+
"loss": 1.4114,
|
17030 |
+
"step": 24270
|
17031 |
+
},
|
17032 |
+
{
|
17033 |
+
"epoch": 0.61,
|
17034 |
+
"grad_norm": 67.0,
|
17035 |
+
"learning_rate": 1.9389830508474575e-07,
|
17036 |
+
"loss": 1.4777,
|
17037 |
+
"step": 24280
|
17038 |
+
},
|
17039 |
+
{
|
17040 |
+
"epoch": 0.61,
|
17041 |
+
"grad_norm": 65.5,
|
17042 |
+
"learning_rate": 1.935593220338983e-07,
|
17043 |
+
"loss": 1.4162,
|
17044 |
+
"step": 24290
|
17045 |
+
},
|
17046 |
+
{
|
17047 |
+
"epoch": 0.61,
|
17048 |
+
"grad_norm": 68.0,
|
17049 |
+
"learning_rate": 1.9322033898305084e-07,
|
17050 |
+
"loss": 1.4881,
|
17051 |
+
"step": 24300
|
17052 |
+
},
|
17053 |
+
{
|
17054 |
+
"epoch": 0.61,
|
17055 |
+
"grad_norm": 65.0,
|
17056 |
+
"learning_rate": 1.928813559322034e-07,
|
17057 |
+
"loss": 1.4465,
|
17058 |
+
"step": 24310
|
17059 |
+
},
|
17060 |
+
{
|
17061 |
+
"epoch": 0.61,
|
17062 |
+
"grad_norm": 65.5,
|
17063 |
+
"learning_rate": 1.9254237288135592e-07,
|
17064 |
+
"loss": 1.4648,
|
17065 |
+
"step": 24320
|
17066 |
+
},
|
17067 |
+
{
|
17068 |
+
"epoch": 0.61,
|
17069 |
+
"grad_norm": 64.5,
|
17070 |
+
"learning_rate": 1.9220338983050847e-07,
|
17071 |
+
"loss": 1.4391,
|
17072 |
+
"step": 24330
|
17073 |
+
},
|
17074 |
+
{
|
17075 |
+
"epoch": 0.61,
|
17076 |
+
"grad_norm": 71.5,
|
17077 |
+
"learning_rate": 1.91864406779661e-07,
|
17078 |
+
"loss": 1.488,
|
17079 |
+
"step": 24340
|
17080 |
+
},
|
17081 |
+
{
|
17082 |
+
"epoch": 0.61,
|
17083 |
+
"grad_norm": 65.5,
|
17084 |
+
"learning_rate": 1.9152542372881355e-07,
|
17085 |
+
"loss": 1.4339,
|
17086 |
+
"step": 24350
|
17087 |
+
},
|
17088 |
+
{
|
17089 |
+
"epoch": 0.61,
|
17090 |
+
"grad_norm": 68.5,
|
17091 |
+
"learning_rate": 1.911864406779661e-07,
|
17092 |
+
"loss": 1.4563,
|
17093 |
+
"step": 24360
|
17094 |
+
},
|
17095 |
+
{
|
17096 |
+
"epoch": 0.61,
|
17097 |
+
"grad_norm": 69.5,
|
17098 |
+
"learning_rate": 1.9084745762711864e-07,
|
17099 |
+
"loss": 1.4788,
|
17100 |
+
"step": 24370
|
17101 |
+
},
|
17102 |
+
{
|
17103 |
+
"epoch": 0.61,
|
17104 |
+
"grad_norm": 66.0,
|
17105 |
+
"learning_rate": 1.9050847457627116e-07,
|
17106 |
+
"loss": 1.4948,
|
17107 |
+
"step": 24380
|
17108 |
+
},
|
17109 |
+
{
|
17110 |
+
"epoch": 0.61,
|
17111 |
+
"grad_norm": 66.5,
|
17112 |
+
"learning_rate": 1.9016949152542372e-07,
|
17113 |
+
"loss": 1.4515,
|
17114 |
+
"step": 24390
|
17115 |
+
},
|
17116 |
+
{
|
17117 |
+
"epoch": 0.61,
|
17118 |
+
"grad_norm": 65.5,
|
17119 |
+
"learning_rate": 1.8983050847457627e-07,
|
17120 |
+
"loss": 1.4362,
|
17121 |
+
"step": 24400
|
17122 |
+
},
|
17123 |
+
{
|
17124 |
+
"epoch": 0.61,
|
17125 |
+
"grad_norm": 69.0,
|
17126 |
+
"learning_rate": 1.8949152542372883e-07,
|
17127 |
+
"loss": 1.4519,
|
17128 |
+
"step": 24410
|
17129 |
+
},
|
17130 |
+
{
|
17131 |
+
"epoch": 0.61,
|
17132 |
+
"grad_norm": 67.5,
|
17133 |
+
"learning_rate": 1.8915254237288133e-07,
|
17134 |
+
"loss": 1.4351,
|
17135 |
+
"step": 24420
|
17136 |
+
},
|
17137 |
+
{
|
17138 |
+
"epoch": 0.61,
|
17139 |
+
"grad_norm": 68.5,
|
17140 |
+
"learning_rate": 1.8881355932203388e-07,
|
17141 |
+
"loss": 1.4866,
|
17142 |
+
"step": 24430
|
17143 |
+
},
|
17144 |
+
{
|
17145 |
+
"epoch": 0.61,
|
17146 |
+
"grad_norm": 66.0,
|
17147 |
+
"learning_rate": 1.8847457627118643e-07,
|
17148 |
+
"loss": 1.4731,
|
17149 |
+
"step": 24440
|
17150 |
+
},
|
17151 |
+
{
|
17152 |
+
"epoch": 0.61,
|
17153 |
+
"grad_norm": 66.0,
|
17154 |
+
"learning_rate": 1.88135593220339e-07,
|
17155 |
+
"loss": 1.4202,
|
17156 |
+
"step": 24450
|
17157 |
+
},
|
17158 |
+
{
|
17159 |
+
"epoch": 0.61,
|
17160 |
+
"grad_norm": 67.0,
|
17161 |
+
"learning_rate": 1.8779661016949152e-07,
|
17162 |
+
"loss": 1.4834,
|
17163 |
+
"step": 24460
|
17164 |
+
},
|
17165 |
+
{
|
17166 |
+
"epoch": 0.61,
|
17167 |
+
"grad_norm": 69.5,
|
17168 |
+
"learning_rate": 1.8745762711864404e-07,
|
17169 |
+
"loss": 1.4601,
|
17170 |
+
"step": 24470
|
17171 |
+
},
|
17172 |
+
{
|
17173 |
+
"epoch": 0.61,
|
17174 |
+
"grad_norm": 69.0,
|
17175 |
+
"learning_rate": 1.871186440677966e-07,
|
17176 |
+
"loss": 1.4693,
|
17177 |
+
"step": 24480
|
17178 |
+
},
|
17179 |
+
{
|
17180 |
+
"epoch": 0.61,
|
17181 |
+
"grad_norm": 65.0,
|
17182 |
+
"learning_rate": 1.8677966101694915e-07,
|
17183 |
+
"loss": 1.4383,
|
17184 |
+
"step": 24490
|
17185 |
+
},
|
17186 |
+
{
|
17187 |
+
"epoch": 0.61,
|
17188 |
+
"grad_norm": 63.75,
|
17189 |
+
"learning_rate": 1.8644067796610168e-07,
|
17190 |
+
"loss": 1.4926,
|
17191 |
+
"step": 24500
|
17192 |
+
},
|
17193 |
+
{
|
17194 |
+
"epoch": 0.61,
|
17195 |
+
"grad_norm": 63.0,
|
17196 |
+
"learning_rate": 1.8610169491525423e-07,
|
17197 |
+
"loss": 1.4355,
|
17198 |
+
"step": 24510
|
17199 |
+
},
|
17200 |
+
{
|
17201 |
+
"epoch": 0.61,
|
17202 |
+
"grad_norm": 64.5,
|
17203 |
+
"learning_rate": 1.857627118644068e-07,
|
17204 |
+
"loss": 1.474,
|
17205 |
+
"step": 24520
|
17206 |
+
},
|
17207 |
+
{
|
17208 |
+
"epoch": 0.61,
|
17209 |
+
"grad_norm": 67.5,
|
17210 |
+
"learning_rate": 1.8542372881355932e-07,
|
17211 |
+
"loss": 1.4319,
|
17212 |
+
"step": 24530
|
17213 |
+
},
|
17214 |
+
{
|
17215 |
+
"epoch": 0.61,
|
17216 |
+
"grad_norm": 68.5,
|
17217 |
+
"learning_rate": 1.8508474576271184e-07,
|
17218 |
+
"loss": 1.469,
|
17219 |
+
"step": 24540
|
17220 |
+
},
|
17221 |
+
{
|
17222 |
+
"epoch": 0.61,
|
17223 |
+
"grad_norm": 66.0,
|
17224 |
+
"learning_rate": 1.847457627118644e-07,
|
17225 |
+
"loss": 1.4818,
|
17226 |
+
"step": 24550
|
17227 |
+
},
|
17228 |
+
{
|
17229 |
+
"epoch": 0.61,
|
17230 |
+
"grad_norm": 72.5,
|
17231 |
+
"learning_rate": 1.8440677966101695e-07,
|
17232 |
+
"loss": 1.4986,
|
17233 |
+
"step": 24560
|
17234 |
+
},
|
17235 |
+
{
|
17236 |
+
"epoch": 0.61,
|
17237 |
+
"grad_norm": 64.5,
|
17238 |
+
"learning_rate": 1.840677966101695e-07,
|
17239 |
+
"loss": 1.4524,
|
17240 |
+
"step": 24570
|
17241 |
+
},
|
17242 |
+
{
|
17243 |
+
"epoch": 0.61,
|
17244 |
+
"grad_norm": 65.0,
|
17245 |
+
"learning_rate": 1.83728813559322e-07,
|
17246 |
+
"loss": 1.5097,
|
17247 |
+
"step": 24580
|
17248 |
+
},
|
17249 |
+
{
|
17250 |
+
"epoch": 0.61,
|
17251 |
+
"grad_norm": 68.5,
|
17252 |
+
"learning_rate": 1.8338983050847456e-07,
|
17253 |
+
"loss": 1.4928,
|
17254 |
+
"step": 24590
|
17255 |
+
},
|
17256 |
+
{
|
17257 |
+
"epoch": 0.61,
|
17258 |
+
"grad_norm": 67.5,
|
17259 |
+
"learning_rate": 1.8305084745762712e-07,
|
17260 |
+
"loss": 1.4628,
|
17261 |
+
"step": 24600
|
17262 |
+
},
|
17263 |
+
{
|
17264 |
+
"epoch": 0.62,
|
17265 |
+
"grad_norm": 62.75,
|
17266 |
+
"learning_rate": 1.8271186440677967e-07,
|
17267 |
+
"loss": 1.4052,
|
17268 |
+
"step": 24610
|
17269 |
+
},
|
17270 |
+
{
|
17271 |
+
"epoch": 0.62,
|
17272 |
+
"grad_norm": 67.0,
|
17273 |
+
"learning_rate": 1.823728813559322e-07,
|
17274 |
+
"loss": 1.4308,
|
17275 |
+
"step": 24620
|
17276 |
+
},
|
17277 |
+
{
|
17278 |
+
"epoch": 0.62,
|
17279 |
+
"grad_norm": 70.5,
|
17280 |
+
"learning_rate": 1.8203389830508473e-07,
|
17281 |
+
"loss": 1.5198,
|
17282 |
+
"step": 24630
|
17283 |
+
},
|
17284 |
+
{
|
17285 |
+
"epoch": 0.62,
|
17286 |
+
"grad_norm": 65.5,
|
17287 |
+
"learning_rate": 1.8169491525423728e-07,
|
17288 |
+
"loss": 1.5095,
|
17289 |
+
"step": 24640
|
17290 |
+
},
|
17291 |
+
{
|
17292 |
+
"epoch": 0.62,
|
17293 |
+
"grad_norm": 67.0,
|
17294 |
+
"learning_rate": 1.8135593220338983e-07,
|
17295 |
+
"loss": 1.4858,
|
17296 |
+
"step": 24650
|
17297 |
+
},
|
17298 |
+
{
|
17299 |
+
"epoch": 0.62,
|
17300 |
+
"grad_norm": 69.0,
|
17301 |
+
"learning_rate": 1.8101694915254236e-07,
|
17302 |
+
"loss": 1.4812,
|
17303 |
+
"step": 24660
|
17304 |
+
},
|
17305 |
+
{
|
17306 |
+
"epoch": 0.62,
|
17307 |
+
"grad_norm": 66.0,
|
17308 |
+
"learning_rate": 1.8067796610169492e-07,
|
17309 |
+
"loss": 1.4465,
|
17310 |
+
"step": 24670
|
17311 |
+
},
|
17312 |
+
{
|
17313 |
+
"epoch": 0.62,
|
17314 |
+
"grad_norm": 65.5,
|
17315 |
+
"learning_rate": 1.8033898305084744e-07,
|
17316 |
+
"loss": 1.4647,
|
17317 |
+
"step": 24680
|
17318 |
+
},
|
17319 |
+
{
|
17320 |
+
"epoch": 0.62,
|
17321 |
+
"grad_norm": 66.5,
|
17322 |
+
"learning_rate": 1.8e-07,
|
17323 |
+
"loss": 1.5012,
|
17324 |
+
"step": 24690
|
17325 |
+
},
|
17326 |
+
{
|
17327 |
+
"epoch": 0.62,
|
17328 |
+
"grad_norm": 68.0,
|
17329 |
+
"learning_rate": 1.7966101694915252e-07,
|
17330 |
+
"loss": 1.4985,
|
17331 |
+
"step": 24700
|
17332 |
+
},
|
17333 |
+
{
|
17334 |
+
"epoch": 0.62,
|
17335 |
+
"grad_norm": 64.5,
|
17336 |
+
"learning_rate": 1.7932203389830508e-07,
|
17337 |
+
"loss": 1.4605,
|
17338 |
+
"step": 24710
|
17339 |
+
},
|
17340 |
+
{
|
17341 |
+
"epoch": 0.62,
|
17342 |
+
"grad_norm": 67.0,
|
17343 |
+
"learning_rate": 1.7898305084745763e-07,
|
17344 |
+
"loss": 1.4714,
|
17345 |
+
"step": 24720
|
17346 |
+
},
|
17347 |
+
{
|
17348 |
+
"epoch": 0.62,
|
17349 |
+
"grad_norm": 68.0,
|
17350 |
+
"learning_rate": 1.7864406779661016e-07,
|
17351 |
+
"loss": 1.45,
|
17352 |
+
"step": 24730
|
17353 |
+
},
|
17354 |
+
{
|
17355 |
+
"epoch": 0.62,
|
17356 |
+
"grad_norm": 67.0,
|
17357 |
+
"learning_rate": 1.783050847457627e-07,
|
17358 |
+
"loss": 1.4958,
|
17359 |
+
"step": 24740
|
17360 |
+
},
|
17361 |
+
{
|
17362 |
+
"epoch": 0.62,
|
17363 |
+
"grad_norm": 69.0,
|
17364 |
+
"learning_rate": 1.7796610169491524e-07,
|
17365 |
+
"loss": 1.4518,
|
17366 |
+
"step": 24750
|
17367 |
+
},
|
17368 |
+
{
|
17369 |
+
"epoch": 0.62,
|
17370 |
+
"grad_norm": 65.5,
|
17371 |
+
"learning_rate": 1.776271186440678e-07,
|
17372 |
+
"loss": 1.4454,
|
17373 |
+
"step": 24760
|
17374 |
+
},
|
17375 |
+
{
|
17376 |
+
"epoch": 0.62,
|
17377 |
+
"grad_norm": 66.5,
|
17378 |
+
"learning_rate": 1.7728813559322035e-07,
|
17379 |
+
"loss": 1.4836,
|
17380 |
+
"step": 24770
|
17381 |
+
},
|
17382 |
+
{
|
17383 |
+
"epoch": 0.62,
|
17384 |
+
"grad_norm": 64.5,
|
17385 |
+
"learning_rate": 1.7694915254237288e-07,
|
17386 |
+
"loss": 1.4561,
|
17387 |
+
"step": 24780
|
17388 |
+
},
|
17389 |
+
{
|
17390 |
+
"epoch": 0.62,
|
17391 |
+
"grad_norm": 66.0,
|
17392 |
+
"learning_rate": 1.766101694915254e-07,
|
17393 |
+
"loss": 1.4442,
|
17394 |
+
"step": 24790
|
17395 |
+
},
|
17396 |
+
{
|
17397 |
+
"epoch": 0.62,
|
17398 |
+
"grad_norm": 64.5,
|
17399 |
+
"learning_rate": 1.7627118644067796e-07,
|
17400 |
+
"loss": 1.4462,
|
17401 |
+
"step": 24800
|
17402 |
+
},
|
17403 |
+
{
|
17404 |
+
"epoch": 0.62,
|
17405 |
+
"grad_norm": 66.0,
|
17406 |
+
"learning_rate": 1.7593220338983051e-07,
|
17407 |
+
"loss": 1.5105,
|
17408 |
+
"step": 24810
|
17409 |
+
},
|
17410 |
+
{
|
17411 |
+
"epoch": 0.62,
|
17412 |
+
"grad_norm": 66.0,
|
17413 |
+
"learning_rate": 1.7559322033898304e-07,
|
17414 |
+
"loss": 1.4328,
|
17415 |
+
"step": 24820
|
17416 |
+
},
|
17417 |
+
{
|
17418 |
+
"epoch": 0.62,
|
17419 |
+
"grad_norm": 66.0,
|
17420 |
+
"learning_rate": 1.752542372881356e-07,
|
17421 |
+
"loss": 1.4657,
|
17422 |
+
"step": 24830
|
17423 |
+
},
|
17424 |
+
{
|
17425 |
+
"epoch": 0.62,
|
17426 |
+
"grad_norm": 68.5,
|
17427 |
+
"learning_rate": 1.7491525423728812e-07,
|
17428 |
+
"loss": 1.4911,
|
17429 |
+
"step": 24840
|
17430 |
+
},
|
17431 |
+
{
|
17432 |
+
"epoch": 0.62,
|
17433 |
+
"grad_norm": 65.0,
|
17434 |
+
"learning_rate": 1.7457627118644068e-07,
|
17435 |
+
"loss": 1.4936,
|
17436 |
+
"step": 24850
|
17437 |
+
},
|
17438 |
+
{
|
17439 |
+
"epoch": 0.62,
|
17440 |
+
"grad_norm": 66.5,
|
17441 |
+
"learning_rate": 1.742372881355932e-07,
|
17442 |
+
"loss": 1.4486,
|
17443 |
+
"step": 24860
|
17444 |
+
},
|
17445 |
+
{
|
17446 |
+
"epoch": 0.62,
|
17447 |
+
"grad_norm": 65.0,
|
17448 |
+
"learning_rate": 1.7389830508474576e-07,
|
17449 |
+
"loss": 1.4143,
|
17450 |
+
"step": 24870
|
17451 |
+
},
|
17452 |
+
{
|
17453 |
+
"epoch": 0.62,
|
17454 |
+
"grad_norm": 67.0,
|
17455 |
+
"learning_rate": 1.7355932203389831e-07,
|
17456 |
+
"loss": 1.4867,
|
17457 |
+
"step": 24880
|
17458 |
+
},
|
17459 |
+
{
|
17460 |
+
"epoch": 0.62,
|
17461 |
+
"grad_norm": 66.5,
|
17462 |
+
"learning_rate": 1.7322033898305084e-07,
|
17463 |
+
"loss": 1.4848,
|
17464 |
+
"step": 24890
|
17465 |
+
},
|
17466 |
+
{
|
17467 |
+
"epoch": 0.62,
|
17468 |
+
"grad_norm": 67.0,
|
17469 |
+
"learning_rate": 1.7288135593220337e-07,
|
17470 |
+
"loss": 1.4092,
|
17471 |
+
"step": 24900
|
17472 |
+
},
|
17473 |
+
{
|
17474 |
+
"epoch": 0.62,
|
17475 |
+
"grad_norm": 66.5,
|
17476 |
+
"learning_rate": 1.7254237288135592e-07,
|
17477 |
+
"loss": 1.4708,
|
17478 |
+
"step": 24910
|
17479 |
+
},
|
17480 |
+
{
|
17481 |
+
"epoch": 0.62,
|
17482 |
+
"grad_norm": 65.5,
|
17483 |
+
"learning_rate": 1.7220338983050848e-07,
|
17484 |
+
"loss": 1.4355,
|
17485 |
+
"step": 24920
|
17486 |
+
},
|
17487 |
+
{
|
17488 |
+
"epoch": 0.62,
|
17489 |
+
"grad_norm": 67.0,
|
17490 |
+
"learning_rate": 1.7186440677966103e-07,
|
17491 |
+
"loss": 1.4986,
|
17492 |
+
"step": 24930
|
17493 |
+
},
|
17494 |
+
{
|
17495 |
+
"epoch": 0.62,
|
17496 |
+
"grad_norm": 68.5,
|
17497 |
+
"learning_rate": 1.7152542372881353e-07,
|
17498 |
+
"loss": 1.5264,
|
17499 |
+
"step": 24940
|
17500 |
+
},
|
17501 |
+
{
|
17502 |
+
"epoch": 0.62,
|
17503 |
+
"grad_norm": 71.5,
|
17504 |
+
"learning_rate": 1.711864406779661e-07,
|
17505 |
+
"loss": 1.5117,
|
17506 |
+
"step": 24950
|
17507 |
+
},
|
17508 |
+
{
|
17509 |
+
"epoch": 0.62,
|
17510 |
+
"grad_norm": 68.5,
|
17511 |
+
"learning_rate": 1.7084745762711864e-07,
|
17512 |
+
"loss": 1.4339,
|
17513 |
+
"step": 24960
|
17514 |
+
},
|
17515 |
+
{
|
17516 |
+
"epoch": 0.62,
|
17517 |
+
"grad_norm": 67.0,
|
17518 |
+
"learning_rate": 1.705084745762712e-07,
|
17519 |
+
"loss": 1.4962,
|
17520 |
+
"step": 24970
|
17521 |
+
},
|
17522 |
+
{
|
17523 |
+
"epoch": 0.62,
|
17524 |
+
"grad_norm": 66.5,
|
17525 |
+
"learning_rate": 1.7016949152542372e-07,
|
17526 |
+
"loss": 1.4068,
|
17527 |
+
"step": 24980
|
17528 |
+
},
|
17529 |
+
{
|
17530 |
+
"epoch": 0.62,
|
17531 |
+
"grad_norm": 67.5,
|
17532 |
+
"learning_rate": 1.6983050847457625e-07,
|
17533 |
+
"loss": 1.4685,
|
17534 |
+
"step": 24990
|
17535 |
+
},
|
17536 |
+
{
|
17537 |
+
"epoch": 0.62,
|
17538 |
+
"grad_norm": 67.0,
|
17539 |
+
"learning_rate": 1.694915254237288e-07,
|
17540 |
+
"loss": 1.4548,
|
17541 |
+
"step": 25000
|
17542 |
+
},
|
17543 |
+
{
|
17544 |
+
"epoch": 0.62,
|
17545 |
+
"eval_loss": 1.466233730316162,
|
17546 |
+
"eval_runtime": 68.2791,
|
17547 |
+
"eval_samples_per_second": 14.646,
|
17548 |
+
"eval_steps_per_second": 14.646,
|
17549 |
+
"step": 25000
|
17550 |
}
|
17551 |
],
|
17552 |
"logging_steps": 10,
|
|
|
17554 |
"num_input_tokens_seen": 0,
|
17555 |
"num_train_epochs": 1,
|
17556 |
"save_steps": 5000,
|
17557 |
+
"total_flos": 4.03480510464e+17,
|
17558 |
"train_batch_size": 1,
|
17559 |
"trial_name": null,
|
17560 |
"trial_params": null
|