Uploaded checkpoint-30000
Browse files- model.safetensors +1 -1
- optimizer.pt +1 -1
- rng_state.pth +2 -2
- scheduler.pt +1 -1
- trainer_state.json +3511 -3
model.safetensors
CHANGED
@@ -1,3 +1,3 @@
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
-
oid sha256:
|
3 |
size 2836579040
|
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:1e59f1e99fbc811694ab92c708424caf65cf19db478e06369ab92e39fb33b4c4
|
3 |
size 2836579040
|
optimizer.pt
CHANGED
@@ -1,3 +1,3 @@
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
-
oid sha256:
|
3 |
size 5673376169
|
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:12878dcb2569715e88e723f6c456aa41e14c85426fc406438f9d0640a7dfc5dc
|
3 |
size 5673376169
|
rng_state.pth
CHANGED
@@ -1,3 +1,3 @@
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
-
oid sha256:
|
3 |
-
size
|
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:8f6fab5b98c9117992015ae4aa1f5dc6bad25ab862e59672747f31c88ae9efe6
|
3 |
+
size 14244
|
scheduler.pt
CHANGED
@@ -1,3 +1,3 @@
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
-
oid sha256:
|
3 |
size 1064
|
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:7def393ab2a8c7ec0497fd0cb247710e6273a07ccc7d3c4a47916ec7e85eb0c0
|
3 |
size 1064
|
trainer_state.json
CHANGED
@@ -1,9 +1,9 @@
|
|
1 |
{
|
2 |
"best_metric": null,
|
3 |
"best_model_checkpoint": null,
|
4 |
-
"epoch": 0.
|
5 |
"eval_steps": 5000,
|
6 |
-
"global_step":
|
7 |
"is_hyper_param_search": false,
|
8 |
"is_local_process_zero": true,
|
9 |
"is_world_process_zero": true,
|
@@ -17547,6 +17547,3514 @@
|
|
17547 |
"eval_samples_per_second": 14.646,
|
17548 |
"eval_steps_per_second": 14.646,
|
17549 |
"step": 25000
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
17550 |
}
|
17551 |
],
|
17552 |
"logging_steps": 10,
|
@@ -17554,7 +21062,7 @@
|
|
17554 |
"num_input_tokens_seen": 0,
|
17555 |
"num_train_epochs": 1,
|
17556 |
"save_steps": 5000,
|
17557 |
-
"total_flos": 4.
|
17558 |
"train_batch_size": 1,
|
17559 |
"trial_name": null,
|
17560 |
"trial_params": null
|
|
|
1 |
{
|
2 |
"best_metric": null,
|
3 |
"best_model_checkpoint": null,
|
4 |
+
"epoch": 0.75,
|
5 |
"eval_steps": 5000,
|
6 |
+
"global_step": 30000,
|
7 |
"is_hyper_param_search": false,
|
8 |
"is_local_process_zero": true,
|
9 |
"is_world_process_zero": true,
|
|
|
17547 |
"eval_samples_per_second": 14.646,
|
17548 |
"eval_steps_per_second": 14.646,
|
17549 |
"step": 25000
|
17550 |
+
},
|
17551 |
+
{
|
17552 |
+
"epoch": 0.63,
|
17553 |
+
"grad_norm": 70.5,
|
17554 |
+
"learning_rate": 1.6915254237288136e-07,
|
17555 |
+
"loss": 1.4549,
|
17556 |
+
"step": 25010
|
17557 |
+
},
|
17558 |
+
{
|
17559 |
+
"epoch": 0.63,
|
17560 |
+
"grad_norm": 66.0,
|
17561 |
+
"learning_rate": 1.688135593220339e-07,
|
17562 |
+
"loss": 1.482,
|
17563 |
+
"step": 25020
|
17564 |
+
},
|
17565 |
+
{
|
17566 |
+
"epoch": 0.63,
|
17567 |
+
"grad_norm": 69.0,
|
17568 |
+
"learning_rate": 1.6847457627118644e-07,
|
17569 |
+
"loss": 1.4466,
|
17570 |
+
"step": 25030
|
17571 |
+
},
|
17572 |
+
{
|
17573 |
+
"epoch": 0.63,
|
17574 |
+
"grad_norm": 68.5,
|
17575 |
+
"learning_rate": 1.68135593220339e-07,
|
17576 |
+
"loss": 1.448,
|
17577 |
+
"step": 25040
|
17578 |
+
},
|
17579 |
+
{
|
17580 |
+
"epoch": 0.63,
|
17581 |
+
"grad_norm": 69.5,
|
17582 |
+
"learning_rate": 1.677966101694915e-07,
|
17583 |
+
"loss": 1.5145,
|
17584 |
+
"step": 25050
|
17585 |
+
},
|
17586 |
+
{
|
17587 |
+
"epoch": 0.63,
|
17588 |
+
"grad_norm": 63.0,
|
17589 |
+
"learning_rate": 1.6745762711864405e-07,
|
17590 |
+
"loss": 1.4911,
|
17591 |
+
"step": 25060
|
17592 |
+
},
|
17593 |
+
{
|
17594 |
+
"epoch": 0.63,
|
17595 |
+
"grad_norm": 70.5,
|
17596 |
+
"learning_rate": 1.671186440677966e-07,
|
17597 |
+
"loss": 1.4467,
|
17598 |
+
"step": 25070
|
17599 |
+
},
|
17600 |
+
{
|
17601 |
+
"epoch": 0.63,
|
17602 |
+
"grad_norm": 67.5,
|
17603 |
+
"learning_rate": 1.6677966101694916e-07,
|
17604 |
+
"loss": 1.4316,
|
17605 |
+
"step": 25080
|
17606 |
+
},
|
17607 |
+
{
|
17608 |
+
"epoch": 0.63,
|
17609 |
+
"grad_norm": 66.0,
|
17610 |
+
"learning_rate": 1.664406779661017e-07,
|
17611 |
+
"loss": 1.4829,
|
17612 |
+
"step": 25090
|
17613 |
+
},
|
17614 |
+
{
|
17615 |
+
"epoch": 0.63,
|
17616 |
+
"grad_norm": 73.5,
|
17617 |
+
"learning_rate": 1.6610169491525421e-07,
|
17618 |
+
"loss": 1.4723,
|
17619 |
+
"step": 25100
|
17620 |
+
},
|
17621 |
+
{
|
17622 |
+
"epoch": 0.63,
|
17623 |
+
"grad_norm": 66.0,
|
17624 |
+
"learning_rate": 1.6576271186440677e-07,
|
17625 |
+
"loss": 1.4702,
|
17626 |
+
"step": 25110
|
17627 |
+
},
|
17628 |
+
{
|
17629 |
+
"epoch": 0.63,
|
17630 |
+
"grad_norm": 65.0,
|
17631 |
+
"learning_rate": 1.6542372881355932e-07,
|
17632 |
+
"loss": 1.4329,
|
17633 |
+
"step": 25120
|
17634 |
+
},
|
17635 |
+
{
|
17636 |
+
"epoch": 0.63,
|
17637 |
+
"grad_norm": 65.5,
|
17638 |
+
"learning_rate": 1.6508474576271188e-07,
|
17639 |
+
"loss": 1.481,
|
17640 |
+
"step": 25130
|
17641 |
+
},
|
17642 |
+
{
|
17643 |
+
"epoch": 0.63,
|
17644 |
+
"grad_norm": 65.0,
|
17645 |
+
"learning_rate": 1.647457627118644e-07,
|
17646 |
+
"loss": 1.4415,
|
17647 |
+
"step": 25140
|
17648 |
+
},
|
17649 |
+
{
|
17650 |
+
"epoch": 0.63,
|
17651 |
+
"grad_norm": 69.0,
|
17652 |
+
"learning_rate": 1.6440677966101693e-07,
|
17653 |
+
"loss": 1.4572,
|
17654 |
+
"step": 25150
|
17655 |
+
},
|
17656 |
+
{
|
17657 |
+
"epoch": 0.63,
|
17658 |
+
"grad_norm": 67.0,
|
17659 |
+
"learning_rate": 1.6406779661016949e-07,
|
17660 |
+
"loss": 1.4667,
|
17661 |
+
"step": 25160
|
17662 |
+
},
|
17663 |
+
{
|
17664 |
+
"epoch": 0.63,
|
17665 |
+
"grad_norm": 68.5,
|
17666 |
+
"learning_rate": 1.6372881355932201e-07,
|
17667 |
+
"loss": 1.4629,
|
17668 |
+
"step": 25170
|
17669 |
+
},
|
17670 |
+
{
|
17671 |
+
"epoch": 0.63,
|
17672 |
+
"grad_norm": 65.5,
|
17673 |
+
"learning_rate": 1.6338983050847457e-07,
|
17674 |
+
"loss": 1.4532,
|
17675 |
+
"step": 25180
|
17676 |
+
},
|
17677 |
+
{
|
17678 |
+
"epoch": 0.63,
|
17679 |
+
"grad_norm": 67.0,
|
17680 |
+
"learning_rate": 1.6305084745762712e-07,
|
17681 |
+
"loss": 1.4474,
|
17682 |
+
"step": 25190
|
17683 |
+
},
|
17684 |
+
{
|
17685 |
+
"epoch": 0.63,
|
17686 |
+
"grad_norm": 67.5,
|
17687 |
+
"learning_rate": 1.6271186440677965e-07,
|
17688 |
+
"loss": 1.4358,
|
17689 |
+
"step": 25200
|
17690 |
+
},
|
17691 |
+
{
|
17692 |
+
"epoch": 0.63,
|
17693 |
+
"grad_norm": 65.5,
|
17694 |
+
"learning_rate": 1.6237288135593218e-07,
|
17695 |
+
"loss": 1.4745,
|
17696 |
+
"step": 25210
|
17697 |
+
},
|
17698 |
+
{
|
17699 |
+
"epoch": 0.63,
|
17700 |
+
"grad_norm": 65.0,
|
17701 |
+
"learning_rate": 1.6203389830508473e-07,
|
17702 |
+
"loss": 1.4517,
|
17703 |
+
"step": 25220
|
17704 |
+
},
|
17705 |
+
{
|
17706 |
+
"epoch": 0.63,
|
17707 |
+
"grad_norm": 65.0,
|
17708 |
+
"learning_rate": 1.6169491525423729e-07,
|
17709 |
+
"loss": 1.4568,
|
17710 |
+
"step": 25230
|
17711 |
+
},
|
17712 |
+
{
|
17713 |
+
"epoch": 0.63,
|
17714 |
+
"grad_norm": 68.5,
|
17715 |
+
"learning_rate": 1.6135593220338984e-07,
|
17716 |
+
"loss": 1.4866,
|
17717 |
+
"step": 25240
|
17718 |
+
},
|
17719 |
+
{
|
17720 |
+
"epoch": 0.63,
|
17721 |
+
"grad_norm": 65.0,
|
17722 |
+
"learning_rate": 1.6101694915254234e-07,
|
17723 |
+
"loss": 1.426,
|
17724 |
+
"step": 25250
|
17725 |
+
},
|
17726 |
+
{
|
17727 |
+
"epoch": 0.63,
|
17728 |
+
"grad_norm": 65.5,
|
17729 |
+
"learning_rate": 1.606779661016949e-07,
|
17730 |
+
"loss": 1.4852,
|
17731 |
+
"step": 25260
|
17732 |
+
},
|
17733 |
+
{
|
17734 |
+
"epoch": 0.63,
|
17735 |
+
"grad_norm": 67.0,
|
17736 |
+
"learning_rate": 1.6033898305084745e-07,
|
17737 |
+
"loss": 1.4411,
|
17738 |
+
"step": 25270
|
17739 |
+
},
|
17740 |
+
{
|
17741 |
+
"epoch": 0.63,
|
17742 |
+
"grad_norm": 68.5,
|
17743 |
+
"learning_rate": 1.6e-07,
|
17744 |
+
"loss": 1.4613,
|
17745 |
+
"step": 25280
|
17746 |
+
},
|
17747 |
+
{
|
17748 |
+
"epoch": 0.63,
|
17749 |
+
"grad_norm": 68.0,
|
17750 |
+
"learning_rate": 1.5966101694915253e-07,
|
17751 |
+
"loss": 1.4734,
|
17752 |
+
"step": 25290
|
17753 |
+
},
|
17754 |
+
{
|
17755 |
+
"epoch": 0.63,
|
17756 |
+
"grad_norm": 67.5,
|
17757 |
+
"learning_rate": 1.5932203389830506e-07,
|
17758 |
+
"loss": 1.5376,
|
17759 |
+
"step": 25300
|
17760 |
+
},
|
17761 |
+
{
|
17762 |
+
"epoch": 0.63,
|
17763 |
+
"grad_norm": 65.0,
|
17764 |
+
"learning_rate": 1.589830508474576e-07,
|
17765 |
+
"loss": 1.4272,
|
17766 |
+
"step": 25310
|
17767 |
+
},
|
17768 |
+
{
|
17769 |
+
"epoch": 0.63,
|
17770 |
+
"grad_norm": 67.5,
|
17771 |
+
"learning_rate": 1.5864406779661017e-07,
|
17772 |
+
"loss": 1.4695,
|
17773 |
+
"step": 25320
|
17774 |
+
},
|
17775 |
+
{
|
17776 |
+
"epoch": 0.63,
|
17777 |
+
"grad_norm": 67.0,
|
17778 |
+
"learning_rate": 1.583050847457627e-07,
|
17779 |
+
"loss": 1.4553,
|
17780 |
+
"step": 25330
|
17781 |
+
},
|
17782 |
+
{
|
17783 |
+
"epoch": 0.63,
|
17784 |
+
"grad_norm": 65.0,
|
17785 |
+
"learning_rate": 1.5796610169491525e-07,
|
17786 |
+
"loss": 1.4733,
|
17787 |
+
"step": 25340
|
17788 |
+
},
|
17789 |
+
{
|
17790 |
+
"epoch": 0.63,
|
17791 |
+
"grad_norm": 65.5,
|
17792 |
+
"learning_rate": 1.576271186440678e-07,
|
17793 |
+
"loss": 1.435,
|
17794 |
+
"step": 25350
|
17795 |
+
},
|
17796 |
+
{
|
17797 |
+
"epoch": 0.63,
|
17798 |
+
"grad_norm": 64.5,
|
17799 |
+
"learning_rate": 1.5728813559322033e-07,
|
17800 |
+
"loss": 1.4751,
|
17801 |
+
"step": 25360
|
17802 |
+
},
|
17803 |
+
{
|
17804 |
+
"epoch": 0.63,
|
17805 |
+
"grad_norm": 66.5,
|
17806 |
+
"learning_rate": 1.5694915254237286e-07,
|
17807 |
+
"loss": 1.4824,
|
17808 |
+
"step": 25370
|
17809 |
+
},
|
17810 |
+
{
|
17811 |
+
"epoch": 0.63,
|
17812 |
+
"grad_norm": 65.0,
|
17813 |
+
"learning_rate": 1.566101694915254e-07,
|
17814 |
+
"loss": 1.4431,
|
17815 |
+
"step": 25380
|
17816 |
+
},
|
17817 |
+
{
|
17818 |
+
"epoch": 0.63,
|
17819 |
+
"grad_norm": 73.5,
|
17820 |
+
"learning_rate": 1.5627118644067797e-07,
|
17821 |
+
"loss": 1.516,
|
17822 |
+
"step": 25390
|
17823 |
+
},
|
17824 |
+
{
|
17825 |
+
"epoch": 0.64,
|
17826 |
+
"grad_norm": 65.5,
|
17827 |
+
"learning_rate": 1.5593220338983052e-07,
|
17828 |
+
"loss": 1.4409,
|
17829 |
+
"step": 25400
|
17830 |
+
},
|
17831 |
+
{
|
17832 |
+
"epoch": 0.64,
|
17833 |
+
"grad_norm": 67.0,
|
17834 |
+
"learning_rate": 1.5559322033898302e-07,
|
17835 |
+
"loss": 1.4274,
|
17836 |
+
"step": 25410
|
17837 |
+
},
|
17838 |
+
{
|
17839 |
+
"epoch": 0.64,
|
17840 |
+
"grad_norm": 66.5,
|
17841 |
+
"learning_rate": 1.5525423728813558e-07,
|
17842 |
+
"loss": 1.48,
|
17843 |
+
"step": 25420
|
17844 |
+
},
|
17845 |
+
{
|
17846 |
+
"epoch": 0.64,
|
17847 |
+
"grad_norm": 65.0,
|
17848 |
+
"learning_rate": 1.5491525423728813e-07,
|
17849 |
+
"loss": 1.4892,
|
17850 |
+
"step": 25430
|
17851 |
+
},
|
17852 |
+
{
|
17853 |
+
"epoch": 0.64,
|
17854 |
+
"grad_norm": 66.0,
|
17855 |
+
"learning_rate": 1.5457627118644068e-07,
|
17856 |
+
"loss": 1.4686,
|
17857 |
+
"step": 25440
|
17858 |
+
},
|
17859 |
+
{
|
17860 |
+
"epoch": 0.64,
|
17861 |
+
"grad_norm": 68.0,
|
17862 |
+
"learning_rate": 1.542372881355932e-07,
|
17863 |
+
"loss": 1.4634,
|
17864 |
+
"step": 25450
|
17865 |
+
},
|
17866 |
+
{
|
17867 |
+
"epoch": 0.64,
|
17868 |
+
"grad_norm": 68.0,
|
17869 |
+
"learning_rate": 1.5389830508474574e-07,
|
17870 |
+
"loss": 1.4937,
|
17871 |
+
"step": 25460
|
17872 |
+
},
|
17873 |
+
{
|
17874 |
+
"epoch": 0.64,
|
17875 |
+
"grad_norm": 63.75,
|
17876 |
+
"learning_rate": 1.535593220338983e-07,
|
17877 |
+
"loss": 1.4743,
|
17878 |
+
"step": 25470
|
17879 |
+
},
|
17880 |
+
{
|
17881 |
+
"epoch": 0.64,
|
17882 |
+
"grad_norm": 67.0,
|
17883 |
+
"learning_rate": 1.5322033898305085e-07,
|
17884 |
+
"loss": 1.4774,
|
17885 |
+
"step": 25480
|
17886 |
+
},
|
17887 |
+
{
|
17888 |
+
"epoch": 0.64,
|
17889 |
+
"grad_norm": 65.0,
|
17890 |
+
"learning_rate": 1.5288135593220338e-07,
|
17891 |
+
"loss": 1.4126,
|
17892 |
+
"step": 25490
|
17893 |
+
},
|
17894 |
+
{
|
17895 |
+
"epoch": 0.64,
|
17896 |
+
"grad_norm": 64.5,
|
17897 |
+
"learning_rate": 1.5254237288135593e-07,
|
17898 |
+
"loss": 1.4223,
|
17899 |
+
"step": 25500
|
17900 |
+
},
|
17901 |
+
{
|
17902 |
+
"epoch": 0.64,
|
17903 |
+
"grad_norm": 66.5,
|
17904 |
+
"learning_rate": 1.5220338983050846e-07,
|
17905 |
+
"loss": 1.4963,
|
17906 |
+
"step": 25510
|
17907 |
+
},
|
17908 |
+
{
|
17909 |
+
"epoch": 0.64,
|
17910 |
+
"grad_norm": 66.5,
|
17911 |
+
"learning_rate": 1.51864406779661e-07,
|
17912 |
+
"loss": 1.4755,
|
17913 |
+
"step": 25520
|
17914 |
+
},
|
17915 |
+
{
|
17916 |
+
"epoch": 0.64,
|
17917 |
+
"grad_norm": 66.0,
|
17918 |
+
"learning_rate": 1.5152542372881354e-07,
|
17919 |
+
"loss": 1.4751,
|
17920 |
+
"step": 25530
|
17921 |
+
},
|
17922 |
+
{
|
17923 |
+
"epoch": 0.64,
|
17924 |
+
"grad_norm": 70.0,
|
17925 |
+
"learning_rate": 1.511864406779661e-07,
|
17926 |
+
"loss": 1.5208,
|
17927 |
+
"step": 25540
|
17928 |
+
},
|
17929 |
+
{
|
17930 |
+
"epoch": 0.64,
|
17931 |
+
"grad_norm": 64.0,
|
17932 |
+
"learning_rate": 1.5084745762711865e-07,
|
17933 |
+
"loss": 1.4453,
|
17934 |
+
"step": 25550
|
17935 |
+
},
|
17936 |
+
{
|
17937 |
+
"epoch": 0.64,
|
17938 |
+
"grad_norm": 65.0,
|
17939 |
+
"learning_rate": 1.5050847457627118e-07,
|
17940 |
+
"loss": 1.4952,
|
17941 |
+
"step": 25560
|
17942 |
+
},
|
17943 |
+
{
|
17944 |
+
"epoch": 0.64,
|
17945 |
+
"grad_norm": 69.5,
|
17946 |
+
"learning_rate": 1.501694915254237e-07,
|
17947 |
+
"loss": 1.4834,
|
17948 |
+
"step": 25570
|
17949 |
+
},
|
17950 |
+
{
|
17951 |
+
"epoch": 0.64,
|
17952 |
+
"grad_norm": 70.0,
|
17953 |
+
"learning_rate": 1.4983050847457626e-07,
|
17954 |
+
"loss": 1.4714,
|
17955 |
+
"step": 25580
|
17956 |
+
},
|
17957 |
+
{
|
17958 |
+
"epoch": 0.64,
|
17959 |
+
"grad_norm": 69.5,
|
17960 |
+
"learning_rate": 1.494915254237288e-07,
|
17961 |
+
"loss": 1.5123,
|
17962 |
+
"step": 25590
|
17963 |
+
},
|
17964 |
+
{
|
17965 |
+
"epoch": 0.64,
|
17966 |
+
"grad_norm": 66.0,
|
17967 |
+
"learning_rate": 1.4915254237288137e-07,
|
17968 |
+
"loss": 1.4964,
|
17969 |
+
"step": 25600
|
17970 |
+
},
|
17971 |
+
{
|
17972 |
+
"epoch": 0.64,
|
17973 |
+
"grad_norm": 67.5,
|
17974 |
+
"learning_rate": 1.488135593220339e-07,
|
17975 |
+
"loss": 1.4979,
|
17976 |
+
"step": 25610
|
17977 |
+
},
|
17978 |
+
{
|
17979 |
+
"epoch": 0.64,
|
17980 |
+
"grad_norm": 68.0,
|
17981 |
+
"learning_rate": 1.4847457627118642e-07,
|
17982 |
+
"loss": 1.4971,
|
17983 |
+
"step": 25620
|
17984 |
+
},
|
17985 |
+
{
|
17986 |
+
"epoch": 0.64,
|
17987 |
+
"grad_norm": 65.5,
|
17988 |
+
"learning_rate": 1.4813559322033897e-07,
|
17989 |
+
"loss": 1.5237,
|
17990 |
+
"step": 25630
|
17991 |
+
},
|
17992 |
+
{
|
17993 |
+
"epoch": 0.64,
|
17994 |
+
"grad_norm": 65.0,
|
17995 |
+
"learning_rate": 1.4779661016949153e-07,
|
17996 |
+
"loss": 1.466,
|
17997 |
+
"step": 25640
|
17998 |
+
},
|
17999 |
+
{
|
18000 |
+
"epoch": 0.64,
|
18001 |
+
"grad_norm": 67.0,
|
18002 |
+
"learning_rate": 1.4745762711864406e-07,
|
18003 |
+
"loss": 1.4765,
|
18004 |
+
"step": 25650
|
18005 |
+
},
|
18006 |
+
{
|
18007 |
+
"epoch": 0.64,
|
18008 |
+
"grad_norm": 62.25,
|
18009 |
+
"learning_rate": 1.471186440677966e-07,
|
18010 |
+
"loss": 1.417,
|
18011 |
+
"step": 25660
|
18012 |
+
},
|
18013 |
+
{
|
18014 |
+
"epoch": 0.64,
|
18015 |
+
"grad_norm": 69.5,
|
18016 |
+
"learning_rate": 1.4677966101694914e-07,
|
18017 |
+
"loss": 1.4432,
|
18018 |
+
"step": 25670
|
18019 |
+
},
|
18020 |
+
{
|
18021 |
+
"epoch": 0.64,
|
18022 |
+
"grad_norm": 67.0,
|
18023 |
+
"learning_rate": 1.464406779661017e-07,
|
18024 |
+
"loss": 1.4499,
|
18025 |
+
"step": 25680
|
18026 |
+
},
|
18027 |
+
{
|
18028 |
+
"epoch": 0.64,
|
18029 |
+
"grad_norm": 73.0,
|
18030 |
+
"learning_rate": 1.4610169491525422e-07,
|
18031 |
+
"loss": 1.4715,
|
18032 |
+
"step": 25690
|
18033 |
+
},
|
18034 |
+
{
|
18035 |
+
"epoch": 0.64,
|
18036 |
+
"grad_norm": 66.0,
|
18037 |
+
"learning_rate": 1.4576271186440677e-07,
|
18038 |
+
"loss": 1.4751,
|
18039 |
+
"step": 25700
|
18040 |
+
},
|
18041 |
+
{
|
18042 |
+
"epoch": 0.64,
|
18043 |
+
"grad_norm": 67.0,
|
18044 |
+
"learning_rate": 1.4542372881355933e-07,
|
18045 |
+
"loss": 1.4418,
|
18046 |
+
"step": 25710
|
18047 |
+
},
|
18048 |
+
{
|
18049 |
+
"epoch": 0.64,
|
18050 |
+
"grad_norm": 65.5,
|
18051 |
+
"learning_rate": 1.4508474576271186e-07,
|
18052 |
+
"loss": 1.4736,
|
18053 |
+
"step": 25720
|
18054 |
+
},
|
18055 |
+
{
|
18056 |
+
"epoch": 0.64,
|
18057 |
+
"grad_norm": 67.0,
|
18058 |
+
"learning_rate": 1.4474576271186438e-07,
|
18059 |
+
"loss": 1.4995,
|
18060 |
+
"step": 25730
|
18061 |
+
},
|
18062 |
+
{
|
18063 |
+
"epoch": 0.64,
|
18064 |
+
"grad_norm": 71.5,
|
18065 |
+
"learning_rate": 1.4440677966101694e-07,
|
18066 |
+
"loss": 1.4869,
|
18067 |
+
"step": 25740
|
18068 |
+
},
|
18069 |
+
{
|
18070 |
+
"epoch": 0.64,
|
18071 |
+
"grad_norm": 68.0,
|
18072 |
+
"learning_rate": 1.440677966101695e-07,
|
18073 |
+
"loss": 1.5158,
|
18074 |
+
"step": 25750
|
18075 |
+
},
|
18076 |
+
{
|
18077 |
+
"epoch": 0.64,
|
18078 |
+
"grad_norm": 69.0,
|
18079 |
+
"learning_rate": 1.4372881355932205e-07,
|
18080 |
+
"loss": 1.5294,
|
18081 |
+
"step": 25760
|
18082 |
+
},
|
18083 |
+
{
|
18084 |
+
"epoch": 0.64,
|
18085 |
+
"grad_norm": 63.75,
|
18086 |
+
"learning_rate": 1.4338983050847455e-07,
|
18087 |
+
"loss": 1.4689,
|
18088 |
+
"step": 25770
|
18089 |
+
},
|
18090 |
+
{
|
18091 |
+
"epoch": 0.64,
|
18092 |
+
"grad_norm": 66.0,
|
18093 |
+
"learning_rate": 1.430508474576271e-07,
|
18094 |
+
"loss": 1.4569,
|
18095 |
+
"step": 25780
|
18096 |
+
},
|
18097 |
+
{
|
18098 |
+
"epoch": 0.64,
|
18099 |
+
"grad_norm": 66.5,
|
18100 |
+
"learning_rate": 1.4271186440677966e-07,
|
18101 |
+
"loss": 1.4544,
|
18102 |
+
"step": 25790
|
18103 |
+
},
|
18104 |
+
{
|
18105 |
+
"epoch": 0.65,
|
18106 |
+
"grad_norm": 66.5,
|
18107 |
+
"learning_rate": 1.423728813559322e-07,
|
18108 |
+
"loss": 1.4838,
|
18109 |
+
"step": 25800
|
18110 |
+
},
|
18111 |
+
{
|
18112 |
+
"epoch": 0.65,
|
18113 |
+
"grad_norm": 66.5,
|
18114 |
+
"learning_rate": 1.4203389830508474e-07,
|
18115 |
+
"loss": 1.4641,
|
18116 |
+
"step": 25810
|
18117 |
+
},
|
18118 |
+
{
|
18119 |
+
"epoch": 0.65,
|
18120 |
+
"grad_norm": 66.0,
|
18121 |
+
"learning_rate": 1.4169491525423727e-07,
|
18122 |
+
"loss": 1.4856,
|
18123 |
+
"step": 25820
|
18124 |
+
},
|
18125 |
+
{
|
18126 |
+
"epoch": 0.65,
|
18127 |
+
"grad_norm": 63.75,
|
18128 |
+
"learning_rate": 1.4135593220338982e-07,
|
18129 |
+
"loss": 1.4885,
|
18130 |
+
"step": 25830
|
18131 |
+
},
|
18132 |
+
{
|
18133 |
+
"epoch": 0.65,
|
18134 |
+
"grad_norm": 66.0,
|
18135 |
+
"learning_rate": 1.4101694915254237e-07,
|
18136 |
+
"loss": 1.4051,
|
18137 |
+
"step": 25840
|
18138 |
+
},
|
18139 |
+
{
|
18140 |
+
"epoch": 0.65,
|
18141 |
+
"grad_norm": 67.0,
|
18142 |
+
"learning_rate": 1.406779661016949e-07,
|
18143 |
+
"loss": 1.4431,
|
18144 |
+
"step": 25850
|
18145 |
+
},
|
18146 |
+
{
|
18147 |
+
"epoch": 0.65,
|
18148 |
+
"grad_norm": 65.5,
|
18149 |
+
"learning_rate": 1.4033898305084746e-07,
|
18150 |
+
"loss": 1.4973,
|
18151 |
+
"step": 25860
|
18152 |
+
},
|
18153 |
+
{
|
18154 |
+
"epoch": 0.65,
|
18155 |
+
"grad_norm": 66.0,
|
18156 |
+
"learning_rate": 1.4e-07,
|
18157 |
+
"loss": 1.4481,
|
18158 |
+
"step": 25870
|
18159 |
+
},
|
18160 |
+
{
|
18161 |
+
"epoch": 0.65,
|
18162 |
+
"grad_norm": 66.5,
|
18163 |
+
"learning_rate": 1.3966101694915254e-07,
|
18164 |
+
"loss": 1.473,
|
18165 |
+
"step": 25880
|
18166 |
+
},
|
18167 |
+
{
|
18168 |
+
"epoch": 0.65,
|
18169 |
+
"grad_norm": 65.5,
|
18170 |
+
"learning_rate": 1.3932203389830506e-07,
|
18171 |
+
"loss": 1.4835,
|
18172 |
+
"step": 25890
|
18173 |
+
},
|
18174 |
+
{
|
18175 |
+
"epoch": 0.65,
|
18176 |
+
"grad_norm": 65.0,
|
18177 |
+
"learning_rate": 1.3898305084745762e-07,
|
18178 |
+
"loss": 1.4437,
|
18179 |
+
"step": 25900
|
18180 |
+
},
|
18181 |
+
{
|
18182 |
+
"epoch": 0.65,
|
18183 |
+
"grad_norm": 69.5,
|
18184 |
+
"learning_rate": 1.3864406779661017e-07,
|
18185 |
+
"loss": 1.5014,
|
18186 |
+
"step": 25910
|
18187 |
+
},
|
18188 |
+
{
|
18189 |
+
"epoch": 0.65,
|
18190 |
+
"grad_norm": 64.5,
|
18191 |
+
"learning_rate": 1.3830508474576273e-07,
|
18192 |
+
"loss": 1.413,
|
18193 |
+
"step": 25920
|
18194 |
+
},
|
18195 |
+
{
|
18196 |
+
"epoch": 0.65,
|
18197 |
+
"grad_norm": 71.5,
|
18198 |
+
"learning_rate": 1.3796610169491523e-07,
|
18199 |
+
"loss": 1.5194,
|
18200 |
+
"step": 25930
|
18201 |
+
},
|
18202 |
+
{
|
18203 |
+
"epoch": 0.65,
|
18204 |
+
"grad_norm": 66.5,
|
18205 |
+
"learning_rate": 1.3762711864406778e-07,
|
18206 |
+
"loss": 1.439,
|
18207 |
+
"step": 25940
|
18208 |
+
},
|
18209 |
+
{
|
18210 |
+
"epoch": 0.65,
|
18211 |
+
"grad_norm": 72.0,
|
18212 |
+
"learning_rate": 1.3728813559322034e-07,
|
18213 |
+
"loss": 1.5017,
|
18214 |
+
"step": 25950
|
18215 |
+
},
|
18216 |
+
{
|
18217 |
+
"epoch": 0.65,
|
18218 |
+
"grad_norm": 65.0,
|
18219 |
+
"learning_rate": 1.369491525423729e-07,
|
18220 |
+
"loss": 1.4643,
|
18221 |
+
"step": 25960
|
18222 |
+
},
|
18223 |
+
{
|
18224 |
+
"epoch": 0.65,
|
18225 |
+
"grad_norm": 65.0,
|
18226 |
+
"learning_rate": 1.3661016949152542e-07,
|
18227 |
+
"loss": 1.4582,
|
18228 |
+
"step": 25970
|
18229 |
+
},
|
18230 |
+
{
|
18231 |
+
"epoch": 0.65,
|
18232 |
+
"grad_norm": 66.0,
|
18233 |
+
"learning_rate": 1.3627118644067795e-07,
|
18234 |
+
"loss": 1.4518,
|
18235 |
+
"step": 25980
|
18236 |
+
},
|
18237 |
+
{
|
18238 |
+
"epoch": 0.65,
|
18239 |
+
"grad_norm": 66.0,
|
18240 |
+
"learning_rate": 1.359322033898305e-07,
|
18241 |
+
"loss": 1.4446,
|
18242 |
+
"step": 25990
|
18243 |
+
},
|
18244 |
+
{
|
18245 |
+
"epoch": 0.65,
|
18246 |
+
"grad_norm": 67.0,
|
18247 |
+
"learning_rate": 1.3559322033898305e-07,
|
18248 |
+
"loss": 1.5096,
|
18249 |
+
"step": 26000
|
18250 |
+
},
|
18251 |
+
{
|
18252 |
+
"epoch": 0.65,
|
18253 |
+
"grad_norm": 66.0,
|
18254 |
+
"learning_rate": 1.3525423728813558e-07,
|
18255 |
+
"loss": 1.385,
|
18256 |
+
"step": 26010
|
18257 |
+
},
|
18258 |
+
{
|
18259 |
+
"epoch": 0.65,
|
18260 |
+
"grad_norm": 66.5,
|
18261 |
+
"learning_rate": 1.3491525423728814e-07,
|
18262 |
+
"loss": 1.4715,
|
18263 |
+
"step": 26020
|
18264 |
+
},
|
18265 |
+
{
|
18266 |
+
"epoch": 0.65,
|
18267 |
+
"grad_norm": 65.5,
|
18268 |
+
"learning_rate": 1.3457627118644066e-07,
|
18269 |
+
"loss": 1.4763,
|
18270 |
+
"step": 26030
|
18271 |
+
},
|
18272 |
+
{
|
18273 |
+
"epoch": 0.65,
|
18274 |
+
"grad_norm": 66.0,
|
18275 |
+
"learning_rate": 1.3423728813559322e-07,
|
18276 |
+
"loss": 1.4966,
|
18277 |
+
"step": 26040
|
18278 |
+
},
|
18279 |
+
{
|
18280 |
+
"epoch": 0.65,
|
18281 |
+
"grad_norm": 69.0,
|
18282 |
+
"learning_rate": 1.3389830508474575e-07,
|
18283 |
+
"loss": 1.4786,
|
18284 |
+
"step": 26050
|
18285 |
+
},
|
18286 |
+
{
|
18287 |
+
"epoch": 0.65,
|
18288 |
+
"grad_norm": 66.5,
|
18289 |
+
"learning_rate": 1.335593220338983e-07,
|
18290 |
+
"loss": 1.4363,
|
18291 |
+
"step": 26060
|
18292 |
+
},
|
18293 |
+
{
|
18294 |
+
"epoch": 0.65,
|
18295 |
+
"grad_norm": 65.5,
|
18296 |
+
"learning_rate": 1.3322033898305085e-07,
|
18297 |
+
"loss": 1.4564,
|
18298 |
+
"step": 26070
|
18299 |
+
},
|
18300 |
+
{
|
18301 |
+
"epoch": 0.65,
|
18302 |
+
"grad_norm": 66.0,
|
18303 |
+
"learning_rate": 1.3288135593220338e-07,
|
18304 |
+
"loss": 1.4775,
|
18305 |
+
"step": 26080
|
18306 |
+
},
|
18307 |
+
{
|
18308 |
+
"epoch": 0.65,
|
18309 |
+
"grad_norm": 68.5,
|
18310 |
+
"learning_rate": 1.325423728813559e-07,
|
18311 |
+
"loss": 1.5025,
|
18312 |
+
"step": 26090
|
18313 |
+
},
|
18314 |
+
{
|
18315 |
+
"epoch": 0.65,
|
18316 |
+
"grad_norm": 67.5,
|
18317 |
+
"learning_rate": 1.3220338983050846e-07,
|
18318 |
+
"loss": 1.4541,
|
18319 |
+
"step": 26100
|
18320 |
+
},
|
18321 |
+
{
|
18322 |
+
"epoch": 0.65,
|
18323 |
+
"grad_norm": 68.5,
|
18324 |
+
"learning_rate": 1.3186440677966102e-07,
|
18325 |
+
"loss": 1.4984,
|
18326 |
+
"step": 26110
|
18327 |
+
},
|
18328 |
+
{
|
18329 |
+
"epoch": 0.65,
|
18330 |
+
"grad_norm": 63.5,
|
18331 |
+
"learning_rate": 1.3152542372881357e-07,
|
18332 |
+
"loss": 1.4578,
|
18333 |
+
"step": 26120
|
18334 |
+
},
|
18335 |
+
{
|
18336 |
+
"epoch": 0.65,
|
18337 |
+
"grad_norm": 68.5,
|
18338 |
+
"learning_rate": 1.311864406779661e-07,
|
18339 |
+
"loss": 1.4594,
|
18340 |
+
"step": 26130
|
18341 |
+
},
|
18342 |
+
{
|
18343 |
+
"epoch": 0.65,
|
18344 |
+
"grad_norm": 68.0,
|
18345 |
+
"learning_rate": 1.3084745762711863e-07,
|
18346 |
+
"loss": 1.4632,
|
18347 |
+
"step": 26140
|
18348 |
+
},
|
18349 |
+
{
|
18350 |
+
"epoch": 0.65,
|
18351 |
+
"grad_norm": 67.0,
|
18352 |
+
"learning_rate": 1.3050847457627118e-07,
|
18353 |
+
"loss": 1.4885,
|
18354 |
+
"step": 26150
|
18355 |
+
},
|
18356 |
+
{
|
18357 |
+
"epoch": 0.65,
|
18358 |
+
"grad_norm": 68.0,
|
18359 |
+
"learning_rate": 1.3016949152542374e-07,
|
18360 |
+
"loss": 1.4458,
|
18361 |
+
"step": 26160
|
18362 |
+
},
|
18363 |
+
{
|
18364 |
+
"epoch": 0.65,
|
18365 |
+
"grad_norm": 66.5,
|
18366 |
+
"learning_rate": 1.2983050847457626e-07,
|
18367 |
+
"loss": 1.4944,
|
18368 |
+
"step": 26170
|
18369 |
+
},
|
18370 |
+
{
|
18371 |
+
"epoch": 0.65,
|
18372 |
+
"grad_norm": 67.5,
|
18373 |
+
"learning_rate": 1.2949152542372882e-07,
|
18374 |
+
"loss": 1.4712,
|
18375 |
+
"step": 26180
|
18376 |
+
},
|
18377 |
+
{
|
18378 |
+
"epoch": 0.65,
|
18379 |
+
"grad_norm": 66.0,
|
18380 |
+
"learning_rate": 1.2915254237288135e-07,
|
18381 |
+
"loss": 1.4466,
|
18382 |
+
"step": 26190
|
18383 |
+
},
|
18384 |
+
{
|
18385 |
+
"epoch": 0.66,
|
18386 |
+
"grad_norm": 65.0,
|
18387 |
+
"learning_rate": 1.288135593220339e-07,
|
18388 |
+
"loss": 1.474,
|
18389 |
+
"step": 26200
|
18390 |
+
},
|
18391 |
+
{
|
18392 |
+
"epoch": 0.66,
|
18393 |
+
"grad_norm": 64.5,
|
18394 |
+
"learning_rate": 1.2847457627118643e-07,
|
18395 |
+
"loss": 1.4741,
|
18396 |
+
"step": 26210
|
18397 |
+
},
|
18398 |
+
{
|
18399 |
+
"epoch": 0.66,
|
18400 |
+
"grad_norm": 68.5,
|
18401 |
+
"learning_rate": 1.2813559322033898e-07,
|
18402 |
+
"loss": 1.4871,
|
18403 |
+
"step": 26220
|
18404 |
+
},
|
18405 |
+
{
|
18406 |
+
"epoch": 0.66,
|
18407 |
+
"grad_norm": 65.5,
|
18408 |
+
"learning_rate": 1.2779661016949154e-07,
|
18409 |
+
"loss": 1.4677,
|
18410 |
+
"step": 26230
|
18411 |
+
},
|
18412 |
+
{
|
18413 |
+
"epoch": 0.66,
|
18414 |
+
"grad_norm": 68.5,
|
18415 |
+
"learning_rate": 1.2745762711864406e-07,
|
18416 |
+
"loss": 1.4899,
|
18417 |
+
"step": 26240
|
18418 |
+
},
|
18419 |
+
{
|
18420 |
+
"epoch": 0.66,
|
18421 |
+
"grad_norm": 72.0,
|
18422 |
+
"learning_rate": 1.271186440677966e-07,
|
18423 |
+
"loss": 1.5358,
|
18424 |
+
"step": 26250
|
18425 |
+
},
|
18426 |
+
{
|
18427 |
+
"epoch": 0.66,
|
18428 |
+
"grad_norm": 65.5,
|
18429 |
+
"learning_rate": 1.2677966101694914e-07,
|
18430 |
+
"loss": 1.4496,
|
18431 |
+
"step": 26260
|
18432 |
+
},
|
18433 |
+
{
|
18434 |
+
"epoch": 0.66,
|
18435 |
+
"grad_norm": 69.0,
|
18436 |
+
"learning_rate": 1.264406779661017e-07,
|
18437 |
+
"loss": 1.5128,
|
18438 |
+
"step": 26270
|
18439 |
+
},
|
18440 |
+
{
|
18441 |
+
"epoch": 0.66,
|
18442 |
+
"grad_norm": 63.5,
|
18443 |
+
"learning_rate": 1.2610169491525425e-07,
|
18444 |
+
"loss": 1.4466,
|
18445 |
+
"step": 26280
|
18446 |
+
},
|
18447 |
+
{
|
18448 |
+
"epoch": 0.66,
|
18449 |
+
"grad_norm": 66.0,
|
18450 |
+
"learning_rate": 1.2576271186440675e-07,
|
18451 |
+
"loss": 1.4976,
|
18452 |
+
"step": 26290
|
18453 |
+
},
|
18454 |
+
{
|
18455 |
+
"epoch": 0.66,
|
18456 |
+
"grad_norm": 69.0,
|
18457 |
+
"learning_rate": 1.254237288135593e-07,
|
18458 |
+
"loss": 1.4673,
|
18459 |
+
"step": 26300
|
18460 |
+
},
|
18461 |
+
{
|
18462 |
+
"epoch": 0.66,
|
18463 |
+
"grad_norm": 63.75,
|
18464 |
+
"learning_rate": 1.2508474576271186e-07,
|
18465 |
+
"loss": 1.4478,
|
18466 |
+
"step": 26310
|
18467 |
+
},
|
18468 |
+
{
|
18469 |
+
"epoch": 0.66,
|
18470 |
+
"grad_norm": 66.0,
|
18471 |
+
"learning_rate": 1.2474576271186442e-07,
|
18472 |
+
"loss": 1.4564,
|
18473 |
+
"step": 26320
|
18474 |
+
},
|
18475 |
+
{
|
18476 |
+
"epoch": 0.66,
|
18477 |
+
"grad_norm": 67.5,
|
18478 |
+
"learning_rate": 1.2440677966101694e-07,
|
18479 |
+
"loss": 1.4362,
|
18480 |
+
"step": 26330
|
18481 |
+
},
|
18482 |
+
{
|
18483 |
+
"epoch": 0.66,
|
18484 |
+
"grad_norm": 68.5,
|
18485 |
+
"learning_rate": 1.240677966101695e-07,
|
18486 |
+
"loss": 1.4838,
|
18487 |
+
"step": 26340
|
18488 |
+
},
|
18489 |
+
{
|
18490 |
+
"epoch": 0.66,
|
18491 |
+
"grad_norm": 67.0,
|
18492 |
+
"learning_rate": 1.2372881355932203e-07,
|
18493 |
+
"loss": 1.4604,
|
18494 |
+
"step": 26350
|
18495 |
+
},
|
18496 |
+
{
|
18497 |
+
"epoch": 0.66,
|
18498 |
+
"grad_norm": 66.0,
|
18499 |
+
"learning_rate": 1.2338983050847458e-07,
|
18500 |
+
"loss": 1.3902,
|
18501 |
+
"step": 26360
|
18502 |
+
},
|
18503 |
+
{
|
18504 |
+
"epoch": 0.66,
|
18505 |
+
"grad_norm": 64.0,
|
18506 |
+
"learning_rate": 1.230508474576271e-07,
|
18507 |
+
"loss": 1.4454,
|
18508 |
+
"step": 26370
|
18509 |
+
},
|
18510 |
+
{
|
18511 |
+
"epoch": 0.66,
|
18512 |
+
"grad_norm": 70.0,
|
18513 |
+
"learning_rate": 1.2271186440677966e-07,
|
18514 |
+
"loss": 1.4411,
|
18515 |
+
"step": 26380
|
18516 |
+
},
|
18517 |
+
{
|
18518 |
+
"epoch": 0.66,
|
18519 |
+
"grad_norm": 67.5,
|
18520 |
+
"learning_rate": 1.223728813559322e-07,
|
18521 |
+
"loss": 1.4241,
|
18522 |
+
"step": 26390
|
18523 |
+
},
|
18524 |
+
{
|
18525 |
+
"epoch": 0.66,
|
18526 |
+
"grad_norm": 67.5,
|
18527 |
+
"learning_rate": 1.2203389830508474e-07,
|
18528 |
+
"loss": 1.4765,
|
18529 |
+
"step": 26400
|
18530 |
+
},
|
18531 |
+
{
|
18532 |
+
"epoch": 0.66,
|
18533 |
+
"grad_norm": 64.0,
|
18534 |
+
"learning_rate": 1.2169491525423727e-07,
|
18535 |
+
"loss": 1.4272,
|
18536 |
+
"step": 26410
|
18537 |
+
},
|
18538 |
+
{
|
18539 |
+
"epoch": 0.66,
|
18540 |
+
"grad_norm": 67.0,
|
18541 |
+
"learning_rate": 1.2135593220338983e-07,
|
18542 |
+
"loss": 1.487,
|
18543 |
+
"step": 26420
|
18544 |
+
},
|
18545 |
+
{
|
18546 |
+
"epoch": 0.66,
|
18547 |
+
"grad_norm": 68.0,
|
18548 |
+
"learning_rate": 1.2101694915254235e-07,
|
18549 |
+
"loss": 1.5069,
|
18550 |
+
"step": 26430
|
18551 |
+
},
|
18552 |
+
{
|
18553 |
+
"epoch": 0.66,
|
18554 |
+
"grad_norm": 66.5,
|
18555 |
+
"learning_rate": 1.206779661016949e-07,
|
18556 |
+
"loss": 1.4431,
|
18557 |
+
"step": 26440
|
18558 |
+
},
|
18559 |
+
{
|
18560 |
+
"epoch": 0.66,
|
18561 |
+
"grad_norm": 67.0,
|
18562 |
+
"learning_rate": 1.2033898305084746e-07,
|
18563 |
+
"loss": 1.4667,
|
18564 |
+
"step": 26450
|
18565 |
+
},
|
18566 |
+
{
|
18567 |
+
"epoch": 0.66,
|
18568 |
+
"grad_norm": 67.5,
|
18569 |
+
"learning_rate": 1.2e-07,
|
18570 |
+
"loss": 1.4413,
|
18571 |
+
"step": 26460
|
18572 |
+
},
|
18573 |
+
{
|
18574 |
+
"epoch": 0.66,
|
18575 |
+
"grad_norm": 64.0,
|
18576 |
+
"learning_rate": 1.1966101694915254e-07,
|
18577 |
+
"loss": 1.442,
|
18578 |
+
"step": 26470
|
18579 |
+
},
|
18580 |
+
{
|
18581 |
+
"epoch": 0.66,
|
18582 |
+
"grad_norm": 67.0,
|
18583 |
+
"learning_rate": 1.193220338983051e-07,
|
18584 |
+
"loss": 1.5008,
|
18585 |
+
"step": 26480
|
18586 |
+
},
|
18587 |
+
{
|
18588 |
+
"epoch": 0.66,
|
18589 |
+
"grad_norm": 67.0,
|
18590 |
+
"learning_rate": 1.1898305084745763e-07,
|
18591 |
+
"loss": 1.4875,
|
18592 |
+
"step": 26490
|
18593 |
+
},
|
18594 |
+
{
|
18595 |
+
"epoch": 0.66,
|
18596 |
+
"grad_norm": 73.0,
|
18597 |
+
"learning_rate": 1.1864406779661017e-07,
|
18598 |
+
"loss": 1.5183,
|
18599 |
+
"step": 26500
|
18600 |
+
},
|
18601 |
+
{
|
18602 |
+
"epoch": 0.66,
|
18603 |
+
"grad_norm": 66.5,
|
18604 |
+
"learning_rate": 1.1830508474576271e-07,
|
18605 |
+
"loss": 1.4863,
|
18606 |
+
"step": 26510
|
18607 |
+
},
|
18608 |
+
{
|
18609 |
+
"epoch": 0.66,
|
18610 |
+
"grad_norm": 65.0,
|
18611 |
+
"learning_rate": 1.1796610169491525e-07,
|
18612 |
+
"loss": 1.456,
|
18613 |
+
"step": 26520
|
18614 |
+
},
|
18615 |
+
{
|
18616 |
+
"epoch": 0.66,
|
18617 |
+
"grad_norm": 69.5,
|
18618 |
+
"learning_rate": 1.176271186440678e-07,
|
18619 |
+
"loss": 1.5094,
|
18620 |
+
"step": 26530
|
18621 |
+
},
|
18622 |
+
{
|
18623 |
+
"epoch": 0.66,
|
18624 |
+
"grad_norm": 71.5,
|
18625 |
+
"learning_rate": 1.1728813559322033e-07,
|
18626 |
+
"loss": 1.4483,
|
18627 |
+
"step": 26540
|
18628 |
+
},
|
18629 |
+
{
|
18630 |
+
"epoch": 0.66,
|
18631 |
+
"grad_norm": 67.5,
|
18632 |
+
"learning_rate": 1.1694915254237288e-07,
|
18633 |
+
"loss": 1.4301,
|
18634 |
+
"step": 26550
|
18635 |
+
},
|
18636 |
+
{
|
18637 |
+
"epoch": 0.66,
|
18638 |
+
"grad_norm": 71.5,
|
18639 |
+
"learning_rate": 1.1661016949152541e-07,
|
18640 |
+
"loss": 1.5129,
|
18641 |
+
"step": 26560
|
18642 |
+
},
|
18643 |
+
{
|
18644 |
+
"epoch": 0.66,
|
18645 |
+
"grad_norm": 63.75,
|
18646 |
+
"learning_rate": 1.1627118644067797e-07,
|
18647 |
+
"loss": 1.3979,
|
18648 |
+
"step": 26570
|
18649 |
+
},
|
18650 |
+
{
|
18651 |
+
"epoch": 0.66,
|
18652 |
+
"grad_norm": 63.0,
|
18653 |
+
"learning_rate": 1.1593220338983051e-07,
|
18654 |
+
"loss": 1.472,
|
18655 |
+
"step": 26580
|
18656 |
+
},
|
18657 |
+
{
|
18658 |
+
"epoch": 0.66,
|
18659 |
+
"grad_norm": 66.5,
|
18660 |
+
"learning_rate": 1.1559322033898305e-07,
|
18661 |
+
"loss": 1.4346,
|
18662 |
+
"step": 26590
|
18663 |
+
},
|
18664 |
+
{
|
18665 |
+
"epoch": 0.67,
|
18666 |
+
"grad_norm": 68.0,
|
18667 |
+
"learning_rate": 1.1525423728813559e-07,
|
18668 |
+
"loss": 1.4705,
|
18669 |
+
"step": 26600
|
18670 |
+
},
|
18671 |
+
{
|
18672 |
+
"epoch": 0.67,
|
18673 |
+
"grad_norm": 68.0,
|
18674 |
+
"learning_rate": 1.1491525423728814e-07,
|
18675 |
+
"loss": 1.4424,
|
18676 |
+
"step": 26610
|
18677 |
+
},
|
18678 |
+
{
|
18679 |
+
"epoch": 0.67,
|
18680 |
+
"grad_norm": 67.0,
|
18681 |
+
"learning_rate": 1.1457627118644067e-07,
|
18682 |
+
"loss": 1.4568,
|
18683 |
+
"step": 26620
|
18684 |
+
},
|
18685 |
+
{
|
18686 |
+
"epoch": 0.67,
|
18687 |
+
"grad_norm": 66.0,
|
18688 |
+
"learning_rate": 1.1423728813559322e-07,
|
18689 |
+
"loss": 1.4427,
|
18690 |
+
"step": 26630
|
18691 |
+
},
|
18692 |
+
{
|
18693 |
+
"epoch": 0.67,
|
18694 |
+
"grad_norm": 63.0,
|
18695 |
+
"learning_rate": 1.1389830508474575e-07,
|
18696 |
+
"loss": 1.4333,
|
18697 |
+
"step": 26640
|
18698 |
+
},
|
18699 |
+
{
|
18700 |
+
"epoch": 0.67,
|
18701 |
+
"grad_norm": 65.5,
|
18702 |
+
"learning_rate": 1.135593220338983e-07,
|
18703 |
+
"loss": 1.465,
|
18704 |
+
"step": 26650
|
18705 |
+
},
|
18706 |
+
{
|
18707 |
+
"epoch": 0.67,
|
18708 |
+
"grad_norm": 64.0,
|
18709 |
+
"learning_rate": 1.1322033898305085e-07,
|
18710 |
+
"loss": 1.4452,
|
18711 |
+
"step": 26660
|
18712 |
+
},
|
18713 |
+
{
|
18714 |
+
"epoch": 0.67,
|
18715 |
+
"grad_norm": 62.75,
|
18716 |
+
"learning_rate": 1.1288135593220339e-07,
|
18717 |
+
"loss": 1.4542,
|
18718 |
+
"step": 26670
|
18719 |
+
},
|
18720 |
+
{
|
18721 |
+
"epoch": 0.67,
|
18722 |
+
"grad_norm": 65.5,
|
18723 |
+
"learning_rate": 1.1254237288135593e-07,
|
18724 |
+
"loss": 1.431,
|
18725 |
+
"step": 26680
|
18726 |
+
},
|
18727 |
+
{
|
18728 |
+
"epoch": 0.67,
|
18729 |
+
"grad_norm": 65.5,
|
18730 |
+
"learning_rate": 1.1220338983050846e-07,
|
18731 |
+
"loss": 1.4976,
|
18732 |
+
"step": 26690
|
18733 |
+
},
|
18734 |
+
{
|
18735 |
+
"epoch": 0.67,
|
18736 |
+
"grad_norm": 66.5,
|
18737 |
+
"learning_rate": 1.1186440677966101e-07,
|
18738 |
+
"loss": 1.4866,
|
18739 |
+
"step": 26700
|
18740 |
+
},
|
18741 |
+
{
|
18742 |
+
"epoch": 0.67,
|
18743 |
+
"grad_norm": 67.0,
|
18744 |
+
"learning_rate": 1.1152542372881356e-07,
|
18745 |
+
"loss": 1.451,
|
18746 |
+
"step": 26710
|
18747 |
+
},
|
18748 |
+
{
|
18749 |
+
"epoch": 0.67,
|
18750 |
+
"grad_norm": 67.0,
|
18751 |
+
"learning_rate": 1.1118644067796609e-07,
|
18752 |
+
"loss": 1.4699,
|
18753 |
+
"step": 26720
|
18754 |
+
},
|
18755 |
+
{
|
18756 |
+
"epoch": 0.67,
|
18757 |
+
"grad_norm": 68.0,
|
18758 |
+
"learning_rate": 1.1084745762711865e-07,
|
18759 |
+
"loss": 1.4611,
|
18760 |
+
"step": 26730
|
18761 |
+
},
|
18762 |
+
{
|
18763 |
+
"epoch": 0.67,
|
18764 |
+
"grad_norm": 69.0,
|
18765 |
+
"learning_rate": 1.1050847457627119e-07,
|
18766 |
+
"loss": 1.4227,
|
18767 |
+
"step": 26740
|
18768 |
+
},
|
18769 |
+
{
|
18770 |
+
"epoch": 0.67,
|
18771 |
+
"grad_norm": 67.5,
|
18772 |
+
"learning_rate": 1.1016949152542372e-07,
|
18773 |
+
"loss": 1.4797,
|
18774 |
+
"step": 26750
|
18775 |
+
},
|
18776 |
+
{
|
18777 |
+
"epoch": 0.67,
|
18778 |
+
"grad_norm": 64.5,
|
18779 |
+
"learning_rate": 1.0983050847457627e-07,
|
18780 |
+
"loss": 1.4143,
|
18781 |
+
"step": 26760
|
18782 |
+
},
|
18783 |
+
{
|
18784 |
+
"epoch": 0.67,
|
18785 |
+
"grad_norm": 66.5,
|
18786 |
+
"learning_rate": 1.094915254237288e-07,
|
18787 |
+
"loss": 1.4982,
|
18788 |
+
"step": 26770
|
18789 |
+
},
|
18790 |
+
{
|
18791 |
+
"epoch": 0.67,
|
18792 |
+
"grad_norm": 64.0,
|
18793 |
+
"learning_rate": 1.0915254237288135e-07,
|
18794 |
+
"loss": 1.3862,
|
18795 |
+
"step": 26780
|
18796 |
+
},
|
18797 |
+
{
|
18798 |
+
"epoch": 0.67,
|
18799 |
+
"grad_norm": 69.0,
|
18800 |
+
"learning_rate": 1.0881355932203389e-07,
|
18801 |
+
"loss": 1.4549,
|
18802 |
+
"step": 26790
|
18803 |
+
},
|
18804 |
+
{
|
18805 |
+
"epoch": 0.67,
|
18806 |
+
"grad_norm": 66.5,
|
18807 |
+
"learning_rate": 1.0847457627118643e-07,
|
18808 |
+
"loss": 1.427,
|
18809 |
+
"step": 26800
|
18810 |
+
},
|
18811 |
+
{
|
18812 |
+
"epoch": 0.67,
|
18813 |
+
"grad_norm": 66.0,
|
18814 |
+
"learning_rate": 1.0813559322033897e-07,
|
18815 |
+
"loss": 1.4717,
|
18816 |
+
"step": 26810
|
18817 |
+
},
|
18818 |
+
{
|
18819 |
+
"epoch": 0.67,
|
18820 |
+
"grad_norm": 64.5,
|
18821 |
+
"learning_rate": 1.0779661016949151e-07,
|
18822 |
+
"loss": 1.4578,
|
18823 |
+
"step": 26820
|
18824 |
+
},
|
18825 |
+
{
|
18826 |
+
"epoch": 0.67,
|
18827 |
+
"grad_norm": 64.5,
|
18828 |
+
"learning_rate": 1.0745762711864406e-07,
|
18829 |
+
"loss": 1.4316,
|
18830 |
+
"step": 26830
|
18831 |
+
},
|
18832 |
+
{
|
18833 |
+
"epoch": 0.67,
|
18834 |
+
"grad_norm": 67.5,
|
18835 |
+
"learning_rate": 1.0711864406779661e-07,
|
18836 |
+
"loss": 1.4959,
|
18837 |
+
"step": 26840
|
18838 |
+
},
|
18839 |
+
{
|
18840 |
+
"epoch": 0.67,
|
18841 |
+
"grad_norm": 69.0,
|
18842 |
+
"learning_rate": 1.0677966101694914e-07,
|
18843 |
+
"loss": 1.4684,
|
18844 |
+
"step": 26850
|
18845 |
+
},
|
18846 |
+
{
|
18847 |
+
"epoch": 0.67,
|
18848 |
+
"grad_norm": 68.5,
|
18849 |
+
"learning_rate": 1.0644067796610169e-07,
|
18850 |
+
"loss": 1.4891,
|
18851 |
+
"step": 26860
|
18852 |
+
},
|
18853 |
+
{
|
18854 |
+
"epoch": 0.67,
|
18855 |
+
"grad_norm": 63.25,
|
18856 |
+
"learning_rate": 1.0610169491525423e-07,
|
18857 |
+
"loss": 1.4411,
|
18858 |
+
"step": 26870
|
18859 |
+
},
|
18860 |
+
{
|
18861 |
+
"epoch": 0.67,
|
18862 |
+
"grad_norm": 67.5,
|
18863 |
+
"learning_rate": 1.0576271186440677e-07,
|
18864 |
+
"loss": 1.4614,
|
18865 |
+
"step": 26880
|
18866 |
+
},
|
18867 |
+
{
|
18868 |
+
"epoch": 0.67,
|
18869 |
+
"grad_norm": 69.5,
|
18870 |
+
"learning_rate": 1.0542372881355931e-07,
|
18871 |
+
"loss": 1.4461,
|
18872 |
+
"step": 26890
|
18873 |
+
},
|
18874 |
+
{
|
18875 |
+
"epoch": 0.67,
|
18876 |
+
"grad_norm": 65.0,
|
18877 |
+
"learning_rate": 1.0508474576271186e-07,
|
18878 |
+
"loss": 1.4477,
|
18879 |
+
"step": 26900
|
18880 |
+
},
|
18881 |
+
{
|
18882 |
+
"epoch": 0.67,
|
18883 |
+
"grad_norm": 68.5,
|
18884 |
+
"learning_rate": 1.047457627118644e-07,
|
18885 |
+
"loss": 1.4497,
|
18886 |
+
"step": 26910
|
18887 |
+
},
|
18888 |
+
{
|
18889 |
+
"epoch": 0.67,
|
18890 |
+
"grad_norm": 66.0,
|
18891 |
+
"learning_rate": 1.0440677966101695e-07,
|
18892 |
+
"loss": 1.4276,
|
18893 |
+
"step": 26920
|
18894 |
+
},
|
18895 |
+
{
|
18896 |
+
"epoch": 0.67,
|
18897 |
+
"grad_norm": 71.5,
|
18898 |
+
"learning_rate": 1.0406779661016948e-07,
|
18899 |
+
"loss": 1.5078,
|
18900 |
+
"step": 26930
|
18901 |
+
},
|
18902 |
+
{
|
18903 |
+
"epoch": 0.67,
|
18904 |
+
"grad_norm": 66.5,
|
18905 |
+
"learning_rate": 1.0372881355932203e-07,
|
18906 |
+
"loss": 1.4674,
|
18907 |
+
"step": 26940
|
18908 |
+
},
|
18909 |
+
{
|
18910 |
+
"epoch": 0.67,
|
18911 |
+
"grad_norm": 67.0,
|
18912 |
+
"learning_rate": 1.0338983050847456e-07,
|
18913 |
+
"loss": 1.4352,
|
18914 |
+
"step": 26950
|
18915 |
+
},
|
18916 |
+
{
|
18917 |
+
"epoch": 0.67,
|
18918 |
+
"grad_norm": 70.5,
|
18919 |
+
"learning_rate": 1.0305084745762711e-07,
|
18920 |
+
"loss": 1.4248,
|
18921 |
+
"step": 26960
|
18922 |
+
},
|
18923 |
+
{
|
18924 |
+
"epoch": 0.67,
|
18925 |
+
"grad_norm": 65.5,
|
18926 |
+
"learning_rate": 1.0271186440677965e-07,
|
18927 |
+
"loss": 1.4597,
|
18928 |
+
"step": 26970
|
18929 |
+
},
|
18930 |
+
{
|
18931 |
+
"epoch": 0.67,
|
18932 |
+
"grad_norm": 65.5,
|
18933 |
+
"learning_rate": 1.023728813559322e-07,
|
18934 |
+
"loss": 1.4352,
|
18935 |
+
"step": 26980
|
18936 |
+
},
|
18937 |
+
{
|
18938 |
+
"epoch": 0.67,
|
18939 |
+
"grad_norm": 67.5,
|
18940 |
+
"learning_rate": 1.0203389830508474e-07,
|
18941 |
+
"loss": 1.4893,
|
18942 |
+
"step": 26990
|
18943 |
+
},
|
18944 |
+
{
|
18945 |
+
"epoch": 0.68,
|
18946 |
+
"grad_norm": 65.0,
|
18947 |
+
"learning_rate": 1.0169491525423729e-07,
|
18948 |
+
"loss": 1.4631,
|
18949 |
+
"step": 27000
|
18950 |
+
},
|
18951 |
+
{
|
18952 |
+
"epoch": 0.68,
|
18953 |
+
"grad_norm": 65.5,
|
18954 |
+
"learning_rate": 1.0135593220338982e-07,
|
18955 |
+
"loss": 1.4241,
|
18956 |
+
"step": 27010
|
18957 |
+
},
|
18958 |
+
{
|
18959 |
+
"epoch": 0.68,
|
18960 |
+
"grad_norm": 68.0,
|
18961 |
+
"learning_rate": 1.0101694915254237e-07,
|
18962 |
+
"loss": 1.4746,
|
18963 |
+
"step": 27020
|
18964 |
+
},
|
18965 |
+
{
|
18966 |
+
"epoch": 0.68,
|
18967 |
+
"grad_norm": 70.5,
|
18968 |
+
"learning_rate": 1.006779661016949e-07,
|
18969 |
+
"loss": 1.4814,
|
18970 |
+
"step": 27030
|
18971 |
+
},
|
18972 |
+
{
|
18973 |
+
"epoch": 0.68,
|
18974 |
+
"grad_norm": 69.5,
|
18975 |
+
"learning_rate": 1.0033898305084745e-07,
|
18976 |
+
"loss": 1.4733,
|
18977 |
+
"step": 27040
|
18978 |
+
},
|
18979 |
+
{
|
18980 |
+
"epoch": 0.68,
|
18981 |
+
"grad_norm": 66.0,
|
18982 |
+
"learning_rate": 1e-07,
|
18983 |
+
"loss": 1.414,
|
18984 |
+
"step": 27050
|
18985 |
+
},
|
18986 |
+
{
|
18987 |
+
"epoch": 0.68,
|
18988 |
+
"grad_norm": 66.5,
|
18989 |
+
"learning_rate": 9.966101694915254e-08,
|
18990 |
+
"loss": 1.4638,
|
18991 |
+
"step": 27060
|
18992 |
+
},
|
18993 |
+
{
|
18994 |
+
"epoch": 0.68,
|
18995 |
+
"grad_norm": 65.5,
|
18996 |
+
"learning_rate": 9.932203389830508e-08,
|
18997 |
+
"loss": 1.4267,
|
18998 |
+
"step": 27070
|
18999 |
+
},
|
19000 |
+
{
|
19001 |
+
"epoch": 0.68,
|
19002 |
+
"grad_norm": 67.0,
|
19003 |
+
"learning_rate": 9.898305084745762e-08,
|
19004 |
+
"loss": 1.4608,
|
19005 |
+
"step": 27080
|
19006 |
+
},
|
19007 |
+
{
|
19008 |
+
"epoch": 0.68,
|
19009 |
+
"grad_norm": 66.0,
|
19010 |
+
"learning_rate": 9.864406779661016e-08,
|
19011 |
+
"loss": 1.4722,
|
19012 |
+
"step": 27090
|
19013 |
+
},
|
19014 |
+
{
|
19015 |
+
"epoch": 0.68,
|
19016 |
+
"grad_norm": 71.5,
|
19017 |
+
"learning_rate": 9.830508474576271e-08,
|
19018 |
+
"loss": 1.4615,
|
19019 |
+
"step": 27100
|
19020 |
+
},
|
19021 |
+
{
|
19022 |
+
"epoch": 0.68,
|
19023 |
+
"grad_norm": 66.0,
|
19024 |
+
"learning_rate": 9.796610169491524e-08,
|
19025 |
+
"loss": 1.4675,
|
19026 |
+
"step": 27110
|
19027 |
+
},
|
19028 |
+
{
|
19029 |
+
"epoch": 0.68,
|
19030 |
+
"grad_norm": 68.5,
|
19031 |
+
"learning_rate": 9.76271186440678e-08,
|
19032 |
+
"loss": 1.4645,
|
19033 |
+
"step": 27120
|
19034 |
+
},
|
19035 |
+
{
|
19036 |
+
"epoch": 0.68,
|
19037 |
+
"grad_norm": 65.0,
|
19038 |
+
"learning_rate": 9.728813559322034e-08,
|
19039 |
+
"loss": 1.4702,
|
19040 |
+
"step": 27130
|
19041 |
+
},
|
19042 |
+
{
|
19043 |
+
"epoch": 0.68,
|
19044 |
+
"grad_norm": 69.0,
|
19045 |
+
"learning_rate": 9.694915254237288e-08,
|
19046 |
+
"loss": 1.4844,
|
19047 |
+
"step": 27140
|
19048 |
+
},
|
19049 |
+
{
|
19050 |
+
"epoch": 0.68,
|
19051 |
+
"grad_norm": 68.5,
|
19052 |
+
"learning_rate": 9.661016949152542e-08,
|
19053 |
+
"loss": 1.4466,
|
19054 |
+
"step": 27150
|
19055 |
+
},
|
19056 |
+
{
|
19057 |
+
"epoch": 0.68,
|
19058 |
+
"grad_norm": 69.0,
|
19059 |
+
"learning_rate": 9.627118644067796e-08,
|
19060 |
+
"loss": 1.4445,
|
19061 |
+
"step": 27160
|
19062 |
+
},
|
19063 |
+
{
|
19064 |
+
"epoch": 0.68,
|
19065 |
+
"grad_norm": 66.5,
|
19066 |
+
"learning_rate": 9.59322033898305e-08,
|
19067 |
+
"loss": 1.4458,
|
19068 |
+
"step": 27170
|
19069 |
+
},
|
19070 |
+
{
|
19071 |
+
"epoch": 0.68,
|
19072 |
+
"grad_norm": 66.5,
|
19073 |
+
"learning_rate": 9.559322033898305e-08,
|
19074 |
+
"loss": 1.4263,
|
19075 |
+
"step": 27180
|
19076 |
+
},
|
19077 |
+
{
|
19078 |
+
"epoch": 0.68,
|
19079 |
+
"grad_norm": 65.5,
|
19080 |
+
"learning_rate": 9.525423728813558e-08,
|
19081 |
+
"loss": 1.4717,
|
19082 |
+
"step": 27190
|
19083 |
+
},
|
19084 |
+
{
|
19085 |
+
"epoch": 0.68,
|
19086 |
+
"grad_norm": 69.0,
|
19087 |
+
"learning_rate": 9.491525423728814e-08,
|
19088 |
+
"loss": 1.4649,
|
19089 |
+
"step": 27200
|
19090 |
+
},
|
19091 |
+
{
|
19092 |
+
"epoch": 0.68,
|
19093 |
+
"grad_norm": 65.5,
|
19094 |
+
"learning_rate": 9.457627118644066e-08,
|
19095 |
+
"loss": 1.4142,
|
19096 |
+
"step": 27210
|
19097 |
+
},
|
19098 |
+
{
|
19099 |
+
"epoch": 0.68,
|
19100 |
+
"grad_norm": 65.5,
|
19101 |
+
"learning_rate": 9.423728813559322e-08,
|
19102 |
+
"loss": 1.4443,
|
19103 |
+
"step": 27220
|
19104 |
+
},
|
19105 |
+
{
|
19106 |
+
"epoch": 0.68,
|
19107 |
+
"grad_norm": 65.5,
|
19108 |
+
"learning_rate": 9.389830508474576e-08,
|
19109 |
+
"loss": 1.4432,
|
19110 |
+
"step": 27230
|
19111 |
+
},
|
19112 |
+
{
|
19113 |
+
"epoch": 0.68,
|
19114 |
+
"grad_norm": 67.0,
|
19115 |
+
"learning_rate": 9.35593220338983e-08,
|
19116 |
+
"loss": 1.4888,
|
19117 |
+
"step": 27240
|
19118 |
+
},
|
19119 |
+
{
|
19120 |
+
"epoch": 0.68,
|
19121 |
+
"grad_norm": 70.0,
|
19122 |
+
"learning_rate": 9.322033898305084e-08,
|
19123 |
+
"loss": 1.4675,
|
19124 |
+
"step": 27250
|
19125 |
+
},
|
19126 |
+
{
|
19127 |
+
"epoch": 0.68,
|
19128 |
+
"grad_norm": 68.0,
|
19129 |
+
"learning_rate": 9.28813559322034e-08,
|
19130 |
+
"loss": 1.4734,
|
19131 |
+
"step": 27260
|
19132 |
+
},
|
19133 |
+
{
|
19134 |
+
"epoch": 0.68,
|
19135 |
+
"grad_norm": 69.5,
|
19136 |
+
"learning_rate": 9.254237288135592e-08,
|
19137 |
+
"loss": 1.4639,
|
19138 |
+
"step": 27270
|
19139 |
+
},
|
19140 |
+
{
|
19141 |
+
"epoch": 0.68,
|
19142 |
+
"grad_norm": 64.5,
|
19143 |
+
"learning_rate": 9.220338983050848e-08,
|
19144 |
+
"loss": 1.4594,
|
19145 |
+
"step": 27280
|
19146 |
+
},
|
19147 |
+
{
|
19148 |
+
"epoch": 0.68,
|
19149 |
+
"grad_norm": 65.5,
|
19150 |
+
"learning_rate": 9.1864406779661e-08,
|
19151 |
+
"loss": 1.4698,
|
19152 |
+
"step": 27290
|
19153 |
+
},
|
19154 |
+
{
|
19155 |
+
"epoch": 0.68,
|
19156 |
+
"grad_norm": 67.0,
|
19157 |
+
"learning_rate": 9.152542372881356e-08,
|
19158 |
+
"loss": 1.5159,
|
19159 |
+
"step": 27300
|
19160 |
+
},
|
19161 |
+
{
|
19162 |
+
"epoch": 0.68,
|
19163 |
+
"grad_norm": 66.5,
|
19164 |
+
"learning_rate": 9.11864406779661e-08,
|
19165 |
+
"loss": 1.4563,
|
19166 |
+
"step": 27310
|
19167 |
+
},
|
19168 |
+
{
|
19169 |
+
"epoch": 0.68,
|
19170 |
+
"grad_norm": 69.0,
|
19171 |
+
"learning_rate": 9.084745762711864e-08,
|
19172 |
+
"loss": 1.4545,
|
19173 |
+
"step": 27320
|
19174 |
+
},
|
19175 |
+
{
|
19176 |
+
"epoch": 0.68,
|
19177 |
+
"grad_norm": 64.5,
|
19178 |
+
"learning_rate": 9.050847457627118e-08,
|
19179 |
+
"loss": 1.4652,
|
19180 |
+
"step": 27330
|
19181 |
+
},
|
19182 |
+
{
|
19183 |
+
"epoch": 0.68,
|
19184 |
+
"grad_norm": 67.5,
|
19185 |
+
"learning_rate": 9.016949152542372e-08,
|
19186 |
+
"loss": 1.4417,
|
19187 |
+
"step": 27340
|
19188 |
+
},
|
19189 |
+
{
|
19190 |
+
"epoch": 0.68,
|
19191 |
+
"grad_norm": 69.5,
|
19192 |
+
"learning_rate": 8.983050847457626e-08,
|
19193 |
+
"loss": 1.456,
|
19194 |
+
"step": 27350
|
19195 |
+
},
|
19196 |
+
{
|
19197 |
+
"epoch": 0.68,
|
19198 |
+
"grad_norm": 67.5,
|
19199 |
+
"learning_rate": 8.949152542372882e-08,
|
19200 |
+
"loss": 1.4768,
|
19201 |
+
"step": 27360
|
19202 |
+
},
|
19203 |
+
{
|
19204 |
+
"epoch": 0.68,
|
19205 |
+
"grad_norm": 72.5,
|
19206 |
+
"learning_rate": 8.915254237288134e-08,
|
19207 |
+
"loss": 1.4866,
|
19208 |
+
"step": 27370
|
19209 |
+
},
|
19210 |
+
{
|
19211 |
+
"epoch": 0.68,
|
19212 |
+
"grad_norm": 67.0,
|
19213 |
+
"learning_rate": 8.88135593220339e-08,
|
19214 |
+
"loss": 1.4376,
|
19215 |
+
"step": 27380
|
19216 |
+
},
|
19217 |
+
{
|
19218 |
+
"epoch": 0.68,
|
19219 |
+
"grad_norm": 65.5,
|
19220 |
+
"learning_rate": 8.847457627118644e-08,
|
19221 |
+
"loss": 1.492,
|
19222 |
+
"step": 27390
|
19223 |
+
},
|
19224 |
+
{
|
19225 |
+
"epoch": 0.69,
|
19226 |
+
"grad_norm": 68.5,
|
19227 |
+
"learning_rate": 8.813559322033898e-08,
|
19228 |
+
"loss": 1.4365,
|
19229 |
+
"step": 27400
|
19230 |
+
},
|
19231 |
+
{
|
19232 |
+
"epoch": 0.69,
|
19233 |
+
"grad_norm": 65.5,
|
19234 |
+
"learning_rate": 8.779661016949152e-08,
|
19235 |
+
"loss": 1.426,
|
19236 |
+
"step": 27410
|
19237 |
+
},
|
19238 |
+
{
|
19239 |
+
"epoch": 0.69,
|
19240 |
+
"grad_norm": 66.5,
|
19241 |
+
"learning_rate": 8.745762711864406e-08,
|
19242 |
+
"loss": 1.4611,
|
19243 |
+
"step": 27420
|
19244 |
+
},
|
19245 |
+
{
|
19246 |
+
"epoch": 0.69,
|
19247 |
+
"grad_norm": 63.25,
|
19248 |
+
"learning_rate": 8.71186440677966e-08,
|
19249 |
+
"loss": 1.4911,
|
19250 |
+
"step": 27430
|
19251 |
+
},
|
19252 |
+
{
|
19253 |
+
"epoch": 0.69,
|
19254 |
+
"grad_norm": 67.0,
|
19255 |
+
"learning_rate": 8.677966101694916e-08,
|
19256 |
+
"loss": 1.4346,
|
19257 |
+
"step": 27440
|
19258 |
+
},
|
19259 |
+
{
|
19260 |
+
"epoch": 0.69,
|
19261 |
+
"grad_norm": 65.5,
|
19262 |
+
"learning_rate": 8.644067796610168e-08,
|
19263 |
+
"loss": 1.4557,
|
19264 |
+
"step": 27450
|
19265 |
+
},
|
19266 |
+
{
|
19267 |
+
"epoch": 0.69,
|
19268 |
+
"grad_norm": 68.5,
|
19269 |
+
"learning_rate": 8.610169491525424e-08,
|
19270 |
+
"loss": 1.4695,
|
19271 |
+
"step": 27460
|
19272 |
+
},
|
19273 |
+
{
|
19274 |
+
"epoch": 0.69,
|
19275 |
+
"grad_norm": 64.5,
|
19276 |
+
"learning_rate": 8.576271186440677e-08,
|
19277 |
+
"loss": 1.4524,
|
19278 |
+
"step": 27470
|
19279 |
+
},
|
19280 |
+
{
|
19281 |
+
"epoch": 0.69,
|
19282 |
+
"grad_norm": 66.5,
|
19283 |
+
"learning_rate": 8.542372881355932e-08,
|
19284 |
+
"loss": 1.4999,
|
19285 |
+
"step": 27480
|
19286 |
+
},
|
19287 |
+
{
|
19288 |
+
"epoch": 0.69,
|
19289 |
+
"grad_norm": 74.5,
|
19290 |
+
"learning_rate": 8.508474576271186e-08,
|
19291 |
+
"loss": 1.4584,
|
19292 |
+
"step": 27490
|
19293 |
+
},
|
19294 |
+
{
|
19295 |
+
"epoch": 0.69,
|
19296 |
+
"grad_norm": 66.0,
|
19297 |
+
"learning_rate": 8.47457627118644e-08,
|
19298 |
+
"loss": 1.4943,
|
19299 |
+
"step": 27500
|
19300 |
+
},
|
19301 |
+
{
|
19302 |
+
"epoch": 0.69,
|
19303 |
+
"grad_norm": 63.25,
|
19304 |
+
"learning_rate": 8.440677966101694e-08,
|
19305 |
+
"loss": 1.4495,
|
19306 |
+
"step": 27510
|
19307 |
+
},
|
19308 |
+
{
|
19309 |
+
"epoch": 0.69,
|
19310 |
+
"grad_norm": 66.5,
|
19311 |
+
"learning_rate": 8.40677966101695e-08,
|
19312 |
+
"loss": 1.4801,
|
19313 |
+
"step": 27520
|
19314 |
+
},
|
19315 |
+
{
|
19316 |
+
"epoch": 0.69,
|
19317 |
+
"grad_norm": 66.0,
|
19318 |
+
"learning_rate": 8.372881355932203e-08,
|
19319 |
+
"loss": 1.4397,
|
19320 |
+
"step": 27530
|
19321 |
+
},
|
19322 |
+
{
|
19323 |
+
"epoch": 0.69,
|
19324 |
+
"grad_norm": 65.5,
|
19325 |
+
"learning_rate": 8.338983050847458e-08,
|
19326 |
+
"loss": 1.4397,
|
19327 |
+
"step": 27540
|
19328 |
+
},
|
19329 |
+
{
|
19330 |
+
"epoch": 0.69,
|
19331 |
+
"grad_norm": 68.5,
|
19332 |
+
"learning_rate": 8.305084745762711e-08,
|
19333 |
+
"loss": 1.4723,
|
19334 |
+
"step": 27550
|
19335 |
+
},
|
19336 |
+
{
|
19337 |
+
"epoch": 0.69,
|
19338 |
+
"grad_norm": 65.5,
|
19339 |
+
"learning_rate": 8.271186440677966e-08,
|
19340 |
+
"loss": 1.4612,
|
19341 |
+
"step": 27560
|
19342 |
+
},
|
19343 |
+
{
|
19344 |
+
"epoch": 0.69,
|
19345 |
+
"grad_norm": 66.5,
|
19346 |
+
"learning_rate": 8.23728813559322e-08,
|
19347 |
+
"loss": 1.4544,
|
19348 |
+
"step": 27570
|
19349 |
+
},
|
19350 |
+
{
|
19351 |
+
"epoch": 0.69,
|
19352 |
+
"grad_norm": 70.5,
|
19353 |
+
"learning_rate": 8.203389830508474e-08,
|
19354 |
+
"loss": 1.5102,
|
19355 |
+
"step": 27580
|
19356 |
+
},
|
19357 |
+
{
|
19358 |
+
"epoch": 0.69,
|
19359 |
+
"grad_norm": 68.5,
|
19360 |
+
"learning_rate": 8.169491525423728e-08,
|
19361 |
+
"loss": 1.4415,
|
19362 |
+
"step": 27590
|
19363 |
+
},
|
19364 |
+
{
|
19365 |
+
"epoch": 0.69,
|
19366 |
+
"grad_norm": 65.0,
|
19367 |
+
"learning_rate": 8.135593220338982e-08,
|
19368 |
+
"loss": 1.3897,
|
19369 |
+
"step": 27600
|
19370 |
+
},
|
19371 |
+
{
|
19372 |
+
"epoch": 0.69,
|
19373 |
+
"grad_norm": 67.5,
|
19374 |
+
"learning_rate": 8.101694915254237e-08,
|
19375 |
+
"loss": 1.5114,
|
19376 |
+
"step": 27610
|
19377 |
+
},
|
19378 |
+
{
|
19379 |
+
"epoch": 0.69,
|
19380 |
+
"grad_norm": 68.0,
|
19381 |
+
"learning_rate": 8.067796610169492e-08,
|
19382 |
+
"loss": 1.5128,
|
19383 |
+
"step": 27620
|
19384 |
+
},
|
19385 |
+
{
|
19386 |
+
"epoch": 0.69,
|
19387 |
+
"grad_norm": 67.0,
|
19388 |
+
"learning_rate": 8.033898305084745e-08,
|
19389 |
+
"loss": 1.4465,
|
19390 |
+
"step": 27630
|
19391 |
+
},
|
19392 |
+
{
|
19393 |
+
"epoch": 0.69,
|
19394 |
+
"grad_norm": 64.5,
|
19395 |
+
"learning_rate": 8e-08,
|
19396 |
+
"loss": 1.442,
|
19397 |
+
"step": 27640
|
19398 |
+
},
|
19399 |
+
{
|
19400 |
+
"epoch": 0.69,
|
19401 |
+
"grad_norm": 64.5,
|
19402 |
+
"learning_rate": 7.966101694915253e-08,
|
19403 |
+
"loss": 1.4609,
|
19404 |
+
"step": 27650
|
19405 |
+
},
|
19406 |
+
{
|
19407 |
+
"epoch": 0.69,
|
19408 |
+
"grad_norm": 65.0,
|
19409 |
+
"learning_rate": 7.932203389830508e-08,
|
19410 |
+
"loss": 1.4393,
|
19411 |
+
"step": 27660
|
19412 |
+
},
|
19413 |
+
{
|
19414 |
+
"epoch": 0.69,
|
19415 |
+
"grad_norm": 68.5,
|
19416 |
+
"learning_rate": 7.898305084745762e-08,
|
19417 |
+
"loss": 1.4636,
|
19418 |
+
"step": 27670
|
19419 |
+
},
|
19420 |
+
{
|
19421 |
+
"epoch": 0.69,
|
19422 |
+
"grad_norm": 64.5,
|
19423 |
+
"learning_rate": 7.864406779661017e-08,
|
19424 |
+
"loss": 1.4555,
|
19425 |
+
"step": 27680
|
19426 |
+
},
|
19427 |
+
{
|
19428 |
+
"epoch": 0.69,
|
19429 |
+
"grad_norm": 64.0,
|
19430 |
+
"learning_rate": 7.83050847457627e-08,
|
19431 |
+
"loss": 1.443,
|
19432 |
+
"step": 27690
|
19433 |
+
},
|
19434 |
+
{
|
19435 |
+
"epoch": 0.69,
|
19436 |
+
"grad_norm": 67.0,
|
19437 |
+
"learning_rate": 7.796610169491526e-08,
|
19438 |
+
"loss": 1.4717,
|
19439 |
+
"step": 27700
|
19440 |
+
},
|
19441 |
+
{
|
19442 |
+
"epoch": 0.69,
|
19443 |
+
"grad_norm": 71.5,
|
19444 |
+
"learning_rate": 7.762711864406779e-08,
|
19445 |
+
"loss": 1.4653,
|
19446 |
+
"step": 27710
|
19447 |
+
},
|
19448 |
+
{
|
19449 |
+
"epoch": 0.69,
|
19450 |
+
"grad_norm": 67.5,
|
19451 |
+
"learning_rate": 7.728813559322034e-08,
|
19452 |
+
"loss": 1.4794,
|
19453 |
+
"step": 27720
|
19454 |
+
},
|
19455 |
+
{
|
19456 |
+
"epoch": 0.69,
|
19457 |
+
"grad_norm": 69.5,
|
19458 |
+
"learning_rate": 7.694915254237287e-08,
|
19459 |
+
"loss": 1.4852,
|
19460 |
+
"step": 27730
|
19461 |
+
},
|
19462 |
+
{
|
19463 |
+
"epoch": 0.69,
|
19464 |
+
"grad_norm": 65.0,
|
19465 |
+
"learning_rate": 7.661016949152542e-08,
|
19466 |
+
"loss": 1.4311,
|
19467 |
+
"step": 27740
|
19468 |
+
},
|
19469 |
+
{
|
19470 |
+
"epoch": 0.69,
|
19471 |
+
"grad_norm": 70.0,
|
19472 |
+
"learning_rate": 7.627118644067796e-08,
|
19473 |
+
"loss": 1.4993,
|
19474 |
+
"step": 27750
|
19475 |
+
},
|
19476 |
+
{
|
19477 |
+
"epoch": 0.69,
|
19478 |
+
"grad_norm": 66.5,
|
19479 |
+
"learning_rate": 7.59322033898305e-08,
|
19480 |
+
"loss": 1.4761,
|
19481 |
+
"step": 27760
|
19482 |
+
},
|
19483 |
+
{
|
19484 |
+
"epoch": 0.69,
|
19485 |
+
"grad_norm": 66.0,
|
19486 |
+
"learning_rate": 7.559322033898305e-08,
|
19487 |
+
"loss": 1.4759,
|
19488 |
+
"step": 27770
|
19489 |
+
},
|
19490 |
+
{
|
19491 |
+
"epoch": 0.69,
|
19492 |
+
"grad_norm": 64.5,
|
19493 |
+
"learning_rate": 7.525423728813559e-08,
|
19494 |
+
"loss": 1.4645,
|
19495 |
+
"step": 27780
|
19496 |
+
},
|
19497 |
+
{
|
19498 |
+
"epoch": 0.69,
|
19499 |
+
"grad_norm": 66.0,
|
19500 |
+
"learning_rate": 7.491525423728813e-08,
|
19501 |
+
"loss": 1.4371,
|
19502 |
+
"step": 27790
|
19503 |
+
},
|
19504 |
+
{
|
19505 |
+
"epoch": 0.69,
|
19506 |
+
"grad_norm": 69.0,
|
19507 |
+
"learning_rate": 7.457627118644068e-08,
|
19508 |
+
"loss": 1.4188,
|
19509 |
+
"step": 27800
|
19510 |
+
},
|
19511 |
+
{
|
19512 |
+
"epoch": 0.7,
|
19513 |
+
"grad_norm": 67.0,
|
19514 |
+
"learning_rate": 7.423728813559321e-08,
|
19515 |
+
"loss": 1.4593,
|
19516 |
+
"step": 27810
|
19517 |
+
},
|
19518 |
+
{
|
19519 |
+
"epoch": 0.7,
|
19520 |
+
"grad_norm": 67.5,
|
19521 |
+
"learning_rate": 7.389830508474576e-08,
|
19522 |
+
"loss": 1.4744,
|
19523 |
+
"step": 27820
|
19524 |
+
},
|
19525 |
+
{
|
19526 |
+
"epoch": 0.7,
|
19527 |
+
"grad_norm": 67.5,
|
19528 |
+
"learning_rate": 7.35593220338983e-08,
|
19529 |
+
"loss": 1.3997,
|
19530 |
+
"step": 27830
|
19531 |
+
},
|
19532 |
+
{
|
19533 |
+
"epoch": 0.7,
|
19534 |
+
"grad_norm": 64.5,
|
19535 |
+
"learning_rate": 7.322033898305085e-08,
|
19536 |
+
"loss": 1.4356,
|
19537 |
+
"step": 27840
|
19538 |
+
},
|
19539 |
+
{
|
19540 |
+
"epoch": 0.7,
|
19541 |
+
"grad_norm": 65.0,
|
19542 |
+
"learning_rate": 7.288135593220339e-08,
|
19543 |
+
"loss": 1.4756,
|
19544 |
+
"step": 27850
|
19545 |
+
},
|
19546 |
+
{
|
19547 |
+
"epoch": 0.7,
|
19548 |
+
"grad_norm": 70.5,
|
19549 |
+
"learning_rate": 7.254237288135593e-08,
|
19550 |
+
"loss": 1.4537,
|
19551 |
+
"step": 27860
|
19552 |
+
},
|
19553 |
+
{
|
19554 |
+
"epoch": 0.7,
|
19555 |
+
"grad_norm": 69.0,
|
19556 |
+
"learning_rate": 7.220338983050847e-08,
|
19557 |
+
"loss": 1.4577,
|
19558 |
+
"step": 27870
|
19559 |
+
},
|
19560 |
+
{
|
19561 |
+
"epoch": 0.7,
|
19562 |
+
"grad_norm": 68.0,
|
19563 |
+
"learning_rate": 7.186440677966102e-08,
|
19564 |
+
"loss": 1.4765,
|
19565 |
+
"step": 27880
|
19566 |
+
},
|
19567 |
+
{
|
19568 |
+
"epoch": 0.7,
|
19569 |
+
"grad_norm": 69.5,
|
19570 |
+
"learning_rate": 7.152542372881355e-08,
|
19571 |
+
"loss": 1.4104,
|
19572 |
+
"step": 27890
|
19573 |
+
},
|
19574 |
+
{
|
19575 |
+
"epoch": 0.7,
|
19576 |
+
"grad_norm": 69.0,
|
19577 |
+
"learning_rate": 7.11864406779661e-08,
|
19578 |
+
"loss": 1.4665,
|
19579 |
+
"step": 27900
|
19580 |
+
},
|
19581 |
+
{
|
19582 |
+
"epoch": 0.7,
|
19583 |
+
"grad_norm": 67.0,
|
19584 |
+
"learning_rate": 7.084745762711863e-08,
|
19585 |
+
"loss": 1.4816,
|
19586 |
+
"step": 27910
|
19587 |
+
},
|
19588 |
+
{
|
19589 |
+
"epoch": 0.7,
|
19590 |
+
"grad_norm": 66.0,
|
19591 |
+
"learning_rate": 7.050847457627119e-08,
|
19592 |
+
"loss": 1.4724,
|
19593 |
+
"step": 27920
|
19594 |
+
},
|
19595 |
+
{
|
19596 |
+
"epoch": 0.7,
|
19597 |
+
"grad_norm": 67.5,
|
19598 |
+
"learning_rate": 7.016949152542373e-08,
|
19599 |
+
"loss": 1.4205,
|
19600 |
+
"step": 27930
|
19601 |
+
},
|
19602 |
+
{
|
19603 |
+
"epoch": 0.7,
|
19604 |
+
"grad_norm": 72.0,
|
19605 |
+
"learning_rate": 6.983050847457627e-08,
|
19606 |
+
"loss": 1.4627,
|
19607 |
+
"step": 27940
|
19608 |
+
},
|
19609 |
+
{
|
19610 |
+
"epoch": 0.7,
|
19611 |
+
"grad_norm": 66.5,
|
19612 |
+
"learning_rate": 6.949152542372881e-08,
|
19613 |
+
"loss": 1.4729,
|
19614 |
+
"step": 27950
|
19615 |
+
},
|
19616 |
+
{
|
19617 |
+
"epoch": 0.7,
|
19618 |
+
"grad_norm": 65.5,
|
19619 |
+
"learning_rate": 6.915254237288136e-08,
|
19620 |
+
"loss": 1.4396,
|
19621 |
+
"step": 27960
|
19622 |
+
},
|
19623 |
+
{
|
19624 |
+
"epoch": 0.7,
|
19625 |
+
"grad_norm": 64.5,
|
19626 |
+
"learning_rate": 6.881355932203389e-08,
|
19627 |
+
"loss": 1.4283,
|
19628 |
+
"step": 27970
|
19629 |
+
},
|
19630 |
+
{
|
19631 |
+
"epoch": 0.7,
|
19632 |
+
"grad_norm": 67.5,
|
19633 |
+
"learning_rate": 6.847457627118645e-08,
|
19634 |
+
"loss": 1.4881,
|
19635 |
+
"step": 27980
|
19636 |
+
},
|
19637 |
+
{
|
19638 |
+
"epoch": 0.7,
|
19639 |
+
"grad_norm": 67.5,
|
19640 |
+
"learning_rate": 6.813559322033897e-08,
|
19641 |
+
"loss": 1.5324,
|
19642 |
+
"step": 27990
|
19643 |
+
},
|
19644 |
+
{
|
19645 |
+
"epoch": 0.7,
|
19646 |
+
"grad_norm": 65.0,
|
19647 |
+
"learning_rate": 6.779661016949153e-08,
|
19648 |
+
"loss": 1.4599,
|
19649 |
+
"step": 28000
|
19650 |
+
},
|
19651 |
+
{
|
19652 |
+
"epoch": 0.7,
|
19653 |
+
"grad_norm": 68.5,
|
19654 |
+
"learning_rate": 6.745762711864407e-08,
|
19655 |
+
"loss": 1.4843,
|
19656 |
+
"step": 28010
|
19657 |
+
},
|
19658 |
+
{
|
19659 |
+
"epoch": 0.7,
|
19660 |
+
"grad_norm": 69.5,
|
19661 |
+
"learning_rate": 6.711864406779661e-08,
|
19662 |
+
"loss": 1.4745,
|
19663 |
+
"step": 28020
|
19664 |
+
},
|
19665 |
+
{
|
19666 |
+
"epoch": 0.7,
|
19667 |
+
"grad_norm": 67.5,
|
19668 |
+
"learning_rate": 6.677966101694915e-08,
|
19669 |
+
"loss": 1.5165,
|
19670 |
+
"step": 28030
|
19671 |
+
},
|
19672 |
+
{
|
19673 |
+
"epoch": 0.7,
|
19674 |
+
"grad_norm": 68.0,
|
19675 |
+
"learning_rate": 6.644067796610169e-08,
|
19676 |
+
"loss": 1.479,
|
19677 |
+
"step": 28040
|
19678 |
+
},
|
19679 |
+
{
|
19680 |
+
"epoch": 0.7,
|
19681 |
+
"grad_norm": 66.5,
|
19682 |
+
"learning_rate": 6.610169491525423e-08,
|
19683 |
+
"loss": 1.4236,
|
19684 |
+
"step": 28050
|
19685 |
+
},
|
19686 |
+
{
|
19687 |
+
"epoch": 0.7,
|
19688 |
+
"grad_norm": 72.0,
|
19689 |
+
"learning_rate": 6.576271186440679e-08,
|
19690 |
+
"loss": 1.4542,
|
19691 |
+
"step": 28060
|
19692 |
+
},
|
19693 |
+
{
|
19694 |
+
"epoch": 0.7,
|
19695 |
+
"grad_norm": 68.0,
|
19696 |
+
"learning_rate": 6.542372881355931e-08,
|
19697 |
+
"loss": 1.466,
|
19698 |
+
"step": 28070
|
19699 |
+
},
|
19700 |
+
{
|
19701 |
+
"epoch": 0.7,
|
19702 |
+
"grad_norm": 69.5,
|
19703 |
+
"learning_rate": 6.508474576271187e-08,
|
19704 |
+
"loss": 1.5094,
|
19705 |
+
"step": 28080
|
19706 |
+
},
|
19707 |
+
{
|
19708 |
+
"epoch": 0.7,
|
19709 |
+
"grad_norm": 68.0,
|
19710 |
+
"learning_rate": 6.474576271186441e-08,
|
19711 |
+
"loss": 1.4899,
|
19712 |
+
"step": 28090
|
19713 |
+
},
|
19714 |
+
{
|
19715 |
+
"epoch": 0.7,
|
19716 |
+
"grad_norm": 70.5,
|
19717 |
+
"learning_rate": 6.440677966101695e-08,
|
19718 |
+
"loss": 1.5057,
|
19719 |
+
"step": 28100
|
19720 |
+
},
|
19721 |
+
{
|
19722 |
+
"epoch": 0.7,
|
19723 |
+
"grad_norm": 67.5,
|
19724 |
+
"learning_rate": 6.406779661016949e-08,
|
19725 |
+
"loss": 1.5005,
|
19726 |
+
"step": 28110
|
19727 |
+
},
|
19728 |
+
{
|
19729 |
+
"epoch": 0.7,
|
19730 |
+
"grad_norm": 65.5,
|
19731 |
+
"learning_rate": 6.372881355932203e-08,
|
19732 |
+
"loss": 1.4355,
|
19733 |
+
"step": 28120
|
19734 |
+
},
|
19735 |
+
{
|
19736 |
+
"epoch": 0.7,
|
19737 |
+
"grad_norm": 67.0,
|
19738 |
+
"learning_rate": 6.338983050847457e-08,
|
19739 |
+
"loss": 1.4489,
|
19740 |
+
"step": 28130
|
19741 |
+
},
|
19742 |
+
{
|
19743 |
+
"epoch": 0.7,
|
19744 |
+
"grad_norm": 69.0,
|
19745 |
+
"learning_rate": 6.305084745762713e-08,
|
19746 |
+
"loss": 1.4299,
|
19747 |
+
"step": 28140
|
19748 |
+
},
|
19749 |
+
{
|
19750 |
+
"epoch": 0.7,
|
19751 |
+
"grad_norm": 64.5,
|
19752 |
+
"learning_rate": 6.271186440677965e-08,
|
19753 |
+
"loss": 1.4528,
|
19754 |
+
"step": 28150
|
19755 |
+
},
|
19756 |
+
{
|
19757 |
+
"epoch": 0.7,
|
19758 |
+
"grad_norm": 65.5,
|
19759 |
+
"learning_rate": 6.237288135593221e-08,
|
19760 |
+
"loss": 1.4766,
|
19761 |
+
"step": 28160
|
19762 |
+
},
|
19763 |
+
{
|
19764 |
+
"epoch": 0.7,
|
19765 |
+
"grad_norm": 64.0,
|
19766 |
+
"learning_rate": 6.203389830508475e-08,
|
19767 |
+
"loss": 1.4354,
|
19768 |
+
"step": 28170
|
19769 |
+
},
|
19770 |
+
{
|
19771 |
+
"epoch": 0.7,
|
19772 |
+
"grad_norm": 62.25,
|
19773 |
+
"learning_rate": 6.169491525423729e-08,
|
19774 |
+
"loss": 1.4779,
|
19775 |
+
"step": 28180
|
19776 |
+
},
|
19777 |
+
{
|
19778 |
+
"epoch": 0.7,
|
19779 |
+
"grad_norm": 65.0,
|
19780 |
+
"learning_rate": 6.135593220338983e-08,
|
19781 |
+
"loss": 1.5016,
|
19782 |
+
"step": 28190
|
19783 |
+
},
|
19784 |
+
{
|
19785 |
+
"epoch": 0.7,
|
19786 |
+
"grad_norm": 64.5,
|
19787 |
+
"learning_rate": 6.101694915254237e-08,
|
19788 |
+
"loss": 1.4592,
|
19789 |
+
"step": 28200
|
19790 |
+
},
|
19791 |
+
{
|
19792 |
+
"epoch": 0.71,
|
19793 |
+
"grad_norm": 67.0,
|
19794 |
+
"learning_rate": 6.067796610169491e-08,
|
19795 |
+
"loss": 1.4624,
|
19796 |
+
"step": 28210
|
19797 |
+
},
|
19798 |
+
{
|
19799 |
+
"epoch": 0.71,
|
19800 |
+
"grad_norm": 65.0,
|
19801 |
+
"learning_rate": 6.033898305084745e-08,
|
19802 |
+
"loss": 1.4414,
|
19803 |
+
"step": 28220
|
19804 |
+
},
|
19805 |
+
{
|
19806 |
+
"epoch": 0.71,
|
19807 |
+
"grad_norm": 70.0,
|
19808 |
+
"learning_rate": 6e-08,
|
19809 |
+
"loss": 1.4958,
|
19810 |
+
"step": 28230
|
19811 |
+
},
|
19812 |
+
{
|
19813 |
+
"epoch": 0.71,
|
19814 |
+
"grad_norm": 69.5,
|
19815 |
+
"learning_rate": 5.966101694915255e-08,
|
19816 |
+
"loss": 1.4772,
|
19817 |
+
"step": 28240
|
19818 |
+
},
|
19819 |
+
{
|
19820 |
+
"epoch": 0.71,
|
19821 |
+
"grad_norm": 68.0,
|
19822 |
+
"learning_rate": 5.932203389830508e-08,
|
19823 |
+
"loss": 1.4262,
|
19824 |
+
"step": 28250
|
19825 |
+
},
|
19826 |
+
{
|
19827 |
+
"epoch": 0.71,
|
19828 |
+
"grad_norm": 67.5,
|
19829 |
+
"learning_rate": 5.8983050847457624e-08,
|
19830 |
+
"loss": 1.4118,
|
19831 |
+
"step": 28260
|
19832 |
+
},
|
19833 |
+
{
|
19834 |
+
"epoch": 0.71,
|
19835 |
+
"grad_norm": 69.5,
|
19836 |
+
"learning_rate": 5.8644067796610165e-08,
|
19837 |
+
"loss": 1.5084,
|
19838 |
+
"step": 28270
|
19839 |
+
},
|
19840 |
+
{
|
19841 |
+
"epoch": 0.71,
|
19842 |
+
"grad_norm": 66.5,
|
19843 |
+
"learning_rate": 5.8305084745762706e-08,
|
19844 |
+
"loss": 1.4377,
|
19845 |
+
"step": 28280
|
19846 |
+
},
|
19847 |
+
{
|
19848 |
+
"epoch": 0.71,
|
19849 |
+
"grad_norm": 72.0,
|
19850 |
+
"learning_rate": 5.7966101694915253e-08,
|
19851 |
+
"loss": 1.4479,
|
19852 |
+
"step": 28290
|
19853 |
+
},
|
19854 |
+
{
|
19855 |
+
"epoch": 0.71,
|
19856 |
+
"grad_norm": 68.0,
|
19857 |
+
"learning_rate": 5.7627118644067794e-08,
|
19858 |
+
"loss": 1.4761,
|
19859 |
+
"step": 28300
|
19860 |
+
},
|
19861 |
+
{
|
19862 |
+
"epoch": 0.71,
|
19863 |
+
"grad_norm": 66.5,
|
19864 |
+
"learning_rate": 5.7288135593220335e-08,
|
19865 |
+
"loss": 1.4407,
|
19866 |
+
"step": 28310
|
19867 |
+
},
|
19868 |
+
{
|
19869 |
+
"epoch": 0.71,
|
19870 |
+
"grad_norm": 69.5,
|
19871 |
+
"learning_rate": 5.6949152542372876e-08,
|
19872 |
+
"loss": 1.4617,
|
19873 |
+
"step": 28320
|
19874 |
+
},
|
19875 |
+
{
|
19876 |
+
"epoch": 0.71,
|
19877 |
+
"grad_norm": 67.5,
|
19878 |
+
"learning_rate": 5.6610169491525424e-08,
|
19879 |
+
"loss": 1.4758,
|
19880 |
+
"step": 28330
|
19881 |
+
},
|
19882 |
+
{
|
19883 |
+
"epoch": 0.71,
|
19884 |
+
"grad_norm": 69.5,
|
19885 |
+
"learning_rate": 5.6271186440677964e-08,
|
19886 |
+
"loss": 1.461,
|
19887 |
+
"step": 28340
|
19888 |
+
},
|
19889 |
+
{
|
19890 |
+
"epoch": 0.71,
|
19891 |
+
"grad_norm": 64.0,
|
19892 |
+
"learning_rate": 5.5932203389830505e-08,
|
19893 |
+
"loss": 1.4355,
|
19894 |
+
"step": 28350
|
19895 |
+
},
|
19896 |
+
{
|
19897 |
+
"epoch": 0.71,
|
19898 |
+
"grad_norm": 66.5,
|
19899 |
+
"learning_rate": 5.5593220338983046e-08,
|
19900 |
+
"loss": 1.4778,
|
19901 |
+
"step": 28360
|
19902 |
+
},
|
19903 |
+
{
|
19904 |
+
"epoch": 0.71,
|
19905 |
+
"grad_norm": 67.5,
|
19906 |
+
"learning_rate": 5.5254237288135594e-08,
|
19907 |
+
"loss": 1.4279,
|
19908 |
+
"step": 28370
|
19909 |
+
},
|
19910 |
+
{
|
19911 |
+
"epoch": 0.71,
|
19912 |
+
"grad_norm": 68.5,
|
19913 |
+
"learning_rate": 5.4915254237288135e-08,
|
19914 |
+
"loss": 1.4686,
|
19915 |
+
"step": 28380
|
19916 |
+
},
|
19917 |
+
{
|
19918 |
+
"epoch": 0.71,
|
19919 |
+
"grad_norm": 63.5,
|
19920 |
+
"learning_rate": 5.4576271186440676e-08,
|
19921 |
+
"loss": 1.4389,
|
19922 |
+
"step": 28390
|
19923 |
+
},
|
19924 |
+
{
|
19925 |
+
"epoch": 0.71,
|
19926 |
+
"grad_norm": 66.5,
|
19927 |
+
"learning_rate": 5.4237288135593217e-08,
|
19928 |
+
"loss": 1.4694,
|
19929 |
+
"step": 28400
|
19930 |
+
},
|
19931 |
+
{
|
19932 |
+
"epoch": 0.71,
|
19933 |
+
"grad_norm": 66.5,
|
19934 |
+
"learning_rate": 5.389830508474576e-08,
|
19935 |
+
"loss": 1.471,
|
19936 |
+
"step": 28410
|
19937 |
+
},
|
19938 |
+
{
|
19939 |
+
"epoch": 0.71,
|
19940 |
+
"grad_norm": 67.0,
|
19941 |
+
"learning_rate": 5.3559322033898305e-08,
|
19942 |
+
"loss": 1.479,
|
19943 |
+
"step": 28420
|
19944 |
+
},
|
19945 |
+
{
|
19946 |
+
"epoch": 0.71,
|
19947 |
+
"grad_norm": 64.0,
|
19948 |
+
"learning_rate": 5.3220338983050846e-08,
|
19949 |
+
"loss": 1.4752,
|
19950 |
+
"step": 28430
|
19951 |
+
},
|
19952 |
+
{
|
19953 |
+
"epoch": 0.71,
|
19954 |
+
"grad_norm": 64.5,
|
19955 |
+
"learning_rate": 5.288135593220339e-08,
|
19956 |
+
"loss": 1.4571,
|
19957 |
+
"step": 28440
|
19958 |
+
},
|
19959 |
+
{
|
19960 |
+
"epoch": 0.71,
|
19961 |
+
"grad_norm": 69.5,
|
19962 |
+
"learning_rate": 5.254237288135593e-08,
|
19963 |
+
"loss": 1.4891,
|
19964 |
+
"step": 28450
|
19965 |
+
},
|
19966 |
+
{
|
19967 |
+
"epoch": 0.71,
|
19968 |
+
"grad_norm": 65.0,
|
19969 |
+
"learning_rate": 5.2203389830508475e-08,
|
19970 |
+
"loss": 1.4618,
|
19971 |
+
"step": 28460
|
19972 |
+
},
|
19973 |
+
{
|
19974 |
+
"epoch": 0.71,
|
19975 |
+
"grad_norm": 70.0,
|
19976 |
+
"learning_rate": 5.1864406779661016e-08,
|
19977 |
+
"loss": 1.5078,
|
19978 |
+
"step": 28470
|
19979 |
+
},
|
19980 |
+
{
|
19981 |
+
"epoch": 0.71,
|
19982 |
+
"grad_norm": 67.5,
|
19983 |
+
"learning_rate": 5.152542372881356e-08,
|
19984 |
+
"loss": 1.4645,
|
19985 |
+
"step": 28480
|
19986 |
+
},
|
19987 |
+
{
|
19988 |
+
"epoch": 0.71,
|
19989 |
+
"grad_norm": 65.5,
|
19990 |
+
"learning_rate": 5.11864406779661e-08,
|
19991 |
+
"loss": 1.4452,
|
19992 |
+
"step": 28490
|
19993 |
+
},
|
19994 |
+
{
|
19995 |
+
"epoch": 0.71,
|
19996 |
+
"grad_norm": 63.0,
|
19997 |
+
"learning_rate": 5.0847457627118645e-08,
|
19998 |
+
"loss": 1.457,
|
19999 |
+
"step": 28500
|
20000 |
+
},
|
20001 |
+
{
|
20002 |
+
"epoch": 0.71,
|
20003 |
+
"grad_norm": 65.5,
|
20004 |
+
"learning_rate": 5.0508474576271186e-08,
|
20005 |
+
"loss": 1.5021,
|
20006 |
+
"step": 28510
|
20007 |
+
},
|
20008 |
+
{
|
20009 |
+
"epoch": 0.71,
|
20010 |
+
"grad_norm": 66.0,
|
20011 |
+
"learning_rate": 5.016949152542373e-08,
|
20012 |
+
"loss": 1.4595,
|
20013 |
+
"step": 28520
|
20014 |
+
},
|
20015 |
+
{
|
20016 |
+
"epoch": 0.71,
|
20017 |
+
"grad_norm": 64.5,
|
20018 |
+
"learning_rate": 4.983050847457627e-08,
|
20019 |
+
"loss": 1.4542,
|
20020 |
+
"step": 28530
|
20021 |
+
},
|
20022 |
+
{
|
20023 |
+
"epoch": 0.71,
|
20024 |
+
"grad_norm": 66.5,
|
20025 |
+
"learning_rate": 4.949152542372881e-08,
|
20026 |
+
"loss": 1.4501,
|
20027 |
+
"step": 28540
|
20028 |
+
},
|
20029 |
+
{
|
20030 |
+
"epoch": 0.71,
|
20031 |
+
"grad_norm": 67.0,
|
20032 |
+
"learning_rate": 4.9152542372881357e-08,
|
20033 |
+
"loss": 1.4917,
|
20034 |
+
"step": 28550
|
20035 |
+
},
|
20036 |
+
{
|
20037 |
+
"epoch": 0.71,
|
20038 |
+
"grad_norm": 68.5,
|
20039 |
+
"learning_rate": 4.88135593220339e-08,
|
20040 |
+
"loss": 1.4516,
|
20041 |
+
"step": 28560
|
20042 |
+
},
|
20043 |
+
{
|
20044 |
+
"epoch": 0.71,
|
20045 |
+
"grad_norm": 65.0,
|
20046 |
+
"learning_rate": 4.847457627118644e-08,
|
20047 |
+
"loss": 1.4666,
|
20048 |
+
"step": 28570
|
20049 |
+
},
|
20050 |
+
{
|
20051 |
+
"epoch": 0.71,
|
20052 |
+
"grad_norm": 69.0,
|
20053 |
+
"learning_rate": 4.813559322033898e-08,
|
20054 |
+
"loss": 1.5123,
|
20055 |
+
"step": 28580
|
20056 |
+
},
|
20057 |
+
{
|
20058 |
+
"epoch": 0.71,
|
20059 |
+
"grad_norm": 69.0,
|
20060 |
+
"learning_rate": 4.779661016949153e-08,
|
20061 |
+
"loss": 1.4659,
|
20062 |
+
"step": 28590
|
20063 |
+
},
|
20064 |
+
{
|
20065 |
+
"epoch": 0.71,
|
20066 |
+
"grad_norm": 69.0,
|
20067 |
+
"learning_rate": 4.745762711864407e-08,
|
20068 |
+
"loss": 1.4839,
|
20069 |
+
"step": 28600
|
20070 |
+
},
|
20071 |
+
{
|
20072 |
+
"epoch": 0.72,
|
20073 |
+
"grad_norm": 67.5,
|
20074 |
+
"learning_rate": 4.711864406779661e-08,
|
20075 |
+
"loss": 1.4728,
|
20076 |
+
"step": 28610
|
20077 |
+
},
|
20078 |
+
{
|
20079 |
+
"epoch": 0.72,
|
20080 |
+
"grad_norm": 67.0,
|
20081 |
+
"learning_rate": 4.677966101694915e-08,
|
20082 |
+
"loss": 1.4596,
|
20083 |
+
"step": 28620
|
20084 |
+
},
|
20085 |
+
{
|
20086 |
+
"epoch": 0.72,
|
20087 |
+
"grad_norm": 64.0,
|
20088 |
+
"learning_rate": 4.64406779661017e-08,
|
20089 |
+
"loss": 1.4157,
|
20090 |
+
"step": 28630
|
20091 |
+
},
|
20092 |
+
{
|
20093 |
+
"epoch": 0.72,
|
20094 |
+
"grad_norm": 68.0,
|
20095 |
+
"learning_rate": 4.610169491525424e-08,
|
20096 |
+
"loss": 1.4506,
|
20097 |
+
"step": 28640
|
20098 |
+
},
|
20099 |
+
{
|
20100 |
+
"epoch": 0.72,
|
20101 |
+
"grad_norm": 63.25,
|
20102 |
+
"learning_rate": 4.576271186440678e-08,
|
20103 |
+
"loss": 1.4663,
|
20104 |
+
"step": 28650
|
20105 |
+
},
|
20106 |
+
{
|
20107 |
+
"epoch": 0.72,
|
20108 |
+
"grad_norm": 66.0,
|
20109 |
+
"learning_rate": 4.542372881355932e-08,
|
20110 |
+
"loss": 1.4602,
|
20111 |
+
"step": 28660
|
20112 |
+
},
|
20113 |
+
{
|
20114 |
+
"epoch": 0.72,
|
20115 |
+
"grad_norm": 66.0,
|
20116 |
+
"learning_rate": 4.508474576271186e-08,
|
20117 |
+
"loss": 1.4337,
|
20118 |
+
"step": 28670
|
20119 |
+
},
|
20120 |
+
{
|
20121 |
+
"epoch": 0.72,
|
20122 |
+
"grad_norm": 66.0,
|
20123 |
+
"learning_rate": 4.474576271186441e-08,
|
20124 |
+
"loss": 1.4953,
|
20125 |
+
"step": 28680
|
20126 |
+
},
|
20127 |
+
{
|
20128 |
+
"epoch": 0.72,
|
20129 |
+
"grad_norm": 67.5,
|
20130 |
+
"learning_rate": 4.440677966101695e-08,
|
20131 |
+
"loss": 1.4918,
|
20132 |
+
"step": 28690
|
20133 |
+
},
|
20134 |
+
{
|
20135 |
+
"epoch": 0.72,
|
20136 |
+
"grad_norm": 67.0,
|
20137 |
+
"learning_rate": 4.406779661016949e-08,
|
20138 |
+
"loss": 1.5065,
|
20139 |
+
"step": 28700
|
20140 |
+
},
|
20141 |
+
{
|
20142 |
+
"epoch": 0.72,
|
20143 |
+
"grad_norm": 69.0,
|
20144 |
+
"learning_rate": 4.372881355932203e-08,
|
20145 |
+
"loss": 1.4844,
|
20146 |
+
"step": 28710
|
20147 |
+
},
|
20148 |
+
{
|
20149 |
+
"epoch": 0.72,
|
20150 |
+
"grad_norm": 68.5,
|
20151 |
+
"learning_rate": 4.338983050847458e-08,
|
20152 |
+
"loss": 1.4525,
|
20153 |
+
"step": 28720
|
20154 |
+
},
|
20155 |
+
{
|
20156 |
+
"epoch": 0.72,
|
20157 |
+
"grad_norm": 67.5,
|
20158 |
+
"learning_rate": 4.305084745762712e-08,
|
20159 |
+
"loss": 1.4415,
|
20160 |
+
"step": 28730
|
20161 |
+
},
|
20162 |
+
{
|
20163 |
+
"epoch": 0.72,
|
20164 |
+
"grad_norm": 66.0,
|
20165 |
+
"learning_rate": 4.271186440677966e-08,
|
20166 |
+
"loss": 1.4609,
|
20167 |
+
"step": 28740
|
20168 |
+
},
|
20169 |
+
{
|
20170 |
+
"epoch": 0.72,
|
20171 |
+
"grad_norm": 68.0,
|
20172 |
+
"learning_rate": 4.23728813559322e-08,
|
20173 |
+
"loss": 1.41,
|
20174 |
+
"step": 28750
|
20175 |
+
},
|
20176 |
+
{
|
20177 |
+
"epoch": 0.72,
|
20178 |
+
"grad_norm": 69.0,
|
20179 |
+
"learning_rate": 4.203389830508475e-08,
|
20180 |
+
"loss": 1.4901,
|
20181 |
+
"step": 28760
|
20182 |
+
},
|
20183 |
+
{
|
20184 |
+
"epoch": 0.72,
|
20185 |
+
"grad_norm": 68.5,
|
20186 |
+
"learning_rate": 4.169491525423729e-08,
|
20187 |
+
"loss": 1.4615,
|
20188 |
+
"step": 28770
|
20189 |
+
},
|
20190 |
+
{
|
20191 |
+
"epoch": 0.72,
|
20192 |
+
"grad_norm": 64.0,
|
20193 |
+
"learning_rate": 4.135593220338983e-08,
|
20194 |
+
"loss": 1.4815,
|
20195 |
+
"step": 28780
|
20196 |
+
},
|
20197 |
+
{
|
20198 |
+
"epoch": 0.72,
|
20199 |
+
"grad_norm": 68.5,
|
20200 |
+
"learning_rate": 4.101694915254237e-08,
|
20201 |
+
"loss": 1.4814,
|
20202 |
+
"step": 28790
|
20203 |
+
},
|
20204 |
+
{
|
20205 |
+
"epoch": 0.72,
|
20206 |
+
"grad_norm": 69.5,
|
20207 |
+
"learning_rate": 4.067796610169491e-08,
|
20208 |
+
"loss": 1.4773,
|
20209 |
+
"step": 28800
|
20210 |
+
},
|
20211 |
+
{
|
20212 |
+
"epoch": 0.72,
|
20213 |
+
"grad_norm": 67.0,
|
20214 |
+
"learning_rate": 4.033898305084746e-08,
|
20215 |
+
"loss": 1.4626,
|
20216 |
+
"step": 28810
|
20217 |
+
},
|
20218 |
+
{
|
20219 |
+
"epoch": 0.72,
|
20220 |
+
"grad_norm": 66.5,
|
20221 |
+
"learning_rate": 4e-08,
|
20222 |
+
"loss": 1.4785,
|
20223 |
+
"step": 28820
|
20224 |
+
},
|
20225 |
+
{
|
20226 |
+
"epoch": 0.72,
|
20227 |
+
"grad_norm": 67.0,
|
20228 |
+
"learning_rate": 3.966101694915254e-08,
|
20229 |
+
"loss": 1.4811,
|
20230 |
+
"step": 28830
|
20231 |
+
},
|
20232 |
+
{
|
20233 |
+
"epoch": 0.72,
|
20234 |
+
"grad_norm": 65.5,
|
20235 |
+
"learning_rate": 3.932203389830508e-08,
|
20236 |
+
"loss": 1.4873,
|
20237 |
+
"step": 28840
|
20238 |
+
},
|
20239 |
+
{
|
20240 |
+
"epoch": 0.72,
|
20241 |
+
"grad_norm": 63.75,
|
20242 |
+
"learning_rate": 3.898305084745763e-08,
|
20243 |
+
"loss": 1.4226,
|
20244 |
+
"step": 28850
|
20245 |
+
},
|
20246 |
+
{
|
20247 |
+
"epoch": 0.72,
|
20248 |
+
"grad_norm": 69.0,
|
20249 |
+
"learning_rate": 3.864406779661017e-08,
|
20250 |
+
"loss": 1.485,
|
20251 |
+
"step": 28860
|
20252 |
+
},
|
20253 |
+
{
|
20254 |
+
"epoch": 0.72,
|
20255 |
+
"grad_norm": 65.0,
|
20256 |
+
"learning_rate": 3.830508474576271e-08,
|
20257 |
+
"loss": 1.4259,
|
20258 |
+
"step": 28870
|
20259 |
+
},
|
20260 |
+
{
|
20261 |
+
"epoch": 0.72,
|
20262 |
+
"grad_norm": 68.5,
|
20263 |
+
"learning_rate": 3.796610169491525e-08,
|
20264 |
+
"loss": 1.4619,
|
20265 |
+
"step": 28880
|
20266 |
+
},
|
20267 |
+
{
|
20268 |
+
"epoch": 0.72,
|
20269 |
+
"grad_norm": 73.0,
|
20270 |
+
"learning_rate": 3.7627118644067794e-08,
|
20271 |
+
"loss": 1.4714,
|
20272 |
+
"step": 28890
|
20273 |
+
},
|
20274 |
+
{
|
20275 |
+
"epoch": 0.72,
|
20276 |
+
"grad_norm": 65.0,
|
20277 |
+
"learning_rate": 3.728813559322034e-08,
|
20278 |
+
"loss": 1.4456,
|
20279 |
+
"step": 28900
|
20280 |
+
},
|
20281 |
+
{
|
20282 |
+
"epoch": 0.72,
|
20283 |
+
"grad_norm": 65.0,
|
20284 |
+
"learning_rate": 3.694915254237288e-08,
|
20285 |
+
"loss": 1.4864,
|
20286 |
+
"step": 28910
|
20287 |
+
},
|
20288 |
+
{
|
20289 |
+
"epoch": 0.72,
|
20290 |
+
"grad_norm": 63.5,
|
20291 |
+
"learning_rate": 3.661016949152542e-08,
|
20292 |
+
"loss": 1.394,
|
20293 |
+
"step": 28920
|
20294 |
+
},
|
20295 |
+
{
|
20296 |
+
"epoch": 0.72,
|
20297 |
+
"grad_norm": 67.0,
|
20298 |
+
"learning_rate": 3.6271186440677964e-08,
|
20299 |
+
"loss": 1.4562,
|
20300 |
+
"step": 28930
|
20301 |
+
},
|
20302 |
+
{
|
20303 |
+
"epoch": 0.72,
|
20304 |
+
"grad_norm": 69.0,
|
20305 |
+
"learning_rate": 3.593220338983051e-08,
|
20306 |
+
"loss": 1.5021,
|
20307 |
+
"step": 28940
|
20308 |
+
},
|
20309 |
+
{
|
20310 |
+
"epoch": 0.72,
|
20311 |
+
"grad_norm": 65.5,
|
20312 |
+
"learning_rate": 3.559322033898305e-08,
|
20313 |
+
"loss": 1.4753,
|
20314 |
+
"step": 28950
|
20315 |
+
},
|
20316 |
+
{
|
20317 |
+
"epoch": 0.72,
|
20318 |
+
"grad_norm": 64.5,
|
20319 |
+
"learning_rate": 3.5254237288135593e-08,
|
20320 |
+
"loss": 1.4206,
|
20321 |
+
"step": 28960
|
20322 |
+
},
|
20323 |
+
{
|
20324 |
+
"epoch": 0.72,
|
20325 |
+
"grad_norm": 66.0,
|
20326 |
+
"learning_rate": 3.4915254237288134e-08,
|
20327 |
+
"loss": 1.4772,
|
20328 |
+
"step": 28970
|
20329 |
+
},
|
20330 |
+
{
|
20331 |
+
"epoch": 0.72,
|
20332 |
+
"grad_norm": 67.5,
|
20333 |
+
"learning_rate": 3.457627118644068e-08,
|
20334 |
+
"loss": 1.4506,
|
20335 |
+
"step": 28980
|
20336 |
+
},
|
20337 |
+
{
|
20338 |
+
"epoch": 0.72,
|
20339 |
+
"grad_norm": 68.0,
|
20340 |
+
"learning_rate": 3.423728813559322e-08,
|
20341 |
+
"loss": 1.5169,
|
20342 |
+
"step": 28990
|
20343 |
+
},
|
20344 |
+
{
|
20345 |
+
"epoch": 0.72,
|
20346 |
+
"grad_norm": 64.5,
|
20347 |
+
"learning_rate": 3.3898305084745764e-08,
|
20348 |
+
"loss": 1.4311,
|
20349 |
+
"step": 29000
|
20350 |
+
},
|
20351 |
+
{
|
20352 |
+
"epoch": 0.73,
|
20353 |
+
"grad_norm": 66.5,
|
20354 |
+
"learning_rate": 3.3559322033898305e-08,
|
20355 |
+
"loss": 1.5013,
|
20356 |
+
"step": 29010
|
20357 |
+
},
|
20358 |
+
{
|
20359 |
+
"epoch": 0.73,
|
20360 |
+
"grad_norm": 66.5,
|
20361 |
+
"learning_rate": 3.3220338983050845e-08,
|
20362 |
+
"loss": 1.3978,
|
20363 |
+
"step": 29020
|
20364 |
+
},
|
20365 |
+
{
|
20366 |
+
"epoch": 0.73,
|
20367 |
+
"grad_norm": 69.5,
|
20368 |
+
"learning_rate": 3.288135593220339e-08,
|
20369 |
+
"loss": 1.5471,
|
20370 |
+
"step": 29030
|
20371 |
+
},
|
20372 |
+
{
|
20373 |
+
"epoch": 0.73,
|
20374 |
+
"grad_norm": 66.0,
|
20375 |
+
"learning_rate": 3.2542372881355934e-08,
|
20376 |
+
"loss": 1.438,
|
20377 |
+
"step": 29040
|
20378 |
+
},
|
20379 |
+
{
|
20380 |
+
"epoch": 0.73,
|
20381 |
+
"grad_norm": 64.5,
|
20382 |
+
"learning_rate": 3.2203389830508475e-08,
|
20383 |
+
"loss": 1.4891,
|
20384 |
+
"step": 29050
|
20385 |
+
},
|
20386 |
+
{
|
20387 |
+
"epoch": 0.73,
|
20388 |
+
"grad_norm": 70.0,
|
20389 |
+
"learning_rate": 3.1864406779661016e-08,
|
20390 |
+
"loss": 1.4335,
|
20391 |
+
"step": 29060
|
20392 |
+
},
|
20393 |
+
{
|
20394 |
+
"epoch": 0.73,
|
20395 |
+
"grad_norm": 66.5,
|
20396 |
+
"learning_rate": 3.152542372881356e-08,
|
20397 |
+
"loss": 1.4434,
|
20398 |
+
"step": 29070
|
20399 |
+
},
|
20400 |
+
{
|
20401 |
+
"epoch": 0.73,
|
20402 |
+
"grad_norm": 66.5,
|
20403 |
+
"learning_rate": 3.1186440677966104e-08,
|
20404 |
+
"loss": 1.461,
|
20405 |
+
"step": 29080
|
20406 |
+
},
|
20407 |
+
{
|
20408 |
+
"epoch": 0.73,
|
20409 |
+
"grad_norm": 68.0,
|
20410 |
+
"learning_rate": 3.0847457627118645e-08,
|
20411 |
+
"loss": 1.5126,
|
20412 |
+
"step": 29090
|
20413 |
+
},
|
20414 |
+
{
|
20415 |
+
"epoch": 0.73,
|
20416 |
+
"grad_norm": 67.0,
|
20417 |
+
"learning_rate": 3.0508474576271186e-08,
|
20418 |
+
"loss": 1.4445,
|
20419 |
+
"step": 29100
|
20420 |
+
},
|
20421 |
+
{
|
20422 |
+
"epoch": 0.73,
|
20423 |
+
"grad_norm": 64.5,
|
20424 |
+
"learning_rate": 3.016949152542373e-08,
|
20425 |
+
"loss": 1.4168,
|
20426 |
+
"step": 29110
|
20427 |
+
},
|
20428 |
+
{
|
20429 |
+
"epoch": 0.73,
|
20430 |
+
"grad_norm": 64.5,
|
20431 |
+
"learning_rate": 2.9830508474576274e-08,
|
20432 |
+
"loss": 1.4507,
|
20433 |
+
"step": 29120
|
20434 |
+
},
|
20435 |
+
{
|
20436 |
+
"epoch": 0.73,
|
20437 |
+
"grad_norm": 65.5,
|
20438 |
+
"learning_rate": 2.9491525423728812e-08,
|
20439 |
+
"loss": 1.458,
|
20440 |
+
"step": 29130
|
20441 |
+
},
|
20442 |
+
{
|
20443 |
+
"epoch": 0.73,
|
20444 |
+
"grad_norm": 69.5,
|
20445 |
+
"learning_rate": 2.9152542372881353e-08,
|
20446 |
+
"loss": 1.4649,
|
20447 |
+
"step": 29140
|
20448 |
+
},
|
20449 |
+
{
|
20450 |
+
"epoch": 0.73,
|
20451 |
+
"grad_norm": 66.0,
|
20452 |
+
"learning_rate": 2.8813559322033897e-08,
|
20453 |
+
"loss": 1.457,
|
20454 |
+
"step": 29150
|
20455 |
+
},
|
20456 |
+
{
|
20457 |
+
"epoch": 0.73,
|
20458 |
+
"grad_norm": 72.5,
|
20459 |
+
"learning_rate": 2.8474576271186438e-08,
|
20460 |
+
"loss": 1.4955,
|
20461 |
+
"step": 29160
|
20462 |
+
},
|
20463 |
+
{
|
20464 |
+
"epoch": 0.73,
|
20465 |
+
"grad_norm": 67.0,
|
20466 |
+
"learning_rate": 2.8135593220338982e-08,
|
20467 |
+
"loss": 1.4841,
|
20468 |
+
"step": 29170
|
20469 |
+
},
|
20470 |
+
{
|
20471 |
+
"epoch": 0.73,
|
20472 |
+
"grad_norm": 70.5,
|
20473 |
+
"learning_rate": 2.7796610169491523e-08,
|
20474 |
+
"loss": 1.4464,
|
20475 |
+
"step": 29180
|
20476 |
+
},
|
20477 |
+
{
|
20478 |
+
"epoch": 0.73,
|
20479 |
+
"grad_norm": 68.0,
|
20480 |
+
"learning_rate": 2.7457627118644067e-08,
|
20481 |
+
"loss": 1.4687,
|
20482 |
+
"step": 29190
|
20483 |
+
},
|
20484 |
+
{
|
20485 |
+
"epoch": 0.73,
|
20486 |
+
"grad_norm": 65.0,
|
20487 |
+
"learning_rate": 2.7118644067796608e-08,
|
20488 |
+
"loss": 1.5202,
|
20489 |
+
"step": 29200
|
20490 |
+
},
|
20491 |
+
{
|
20492 |
+
"epoch": 0.73,
|
20493 |
+
"grad_norm": 68.0,
|
20494 |
+
"learning_rate": 2.6779661016949152e-08,
|
20495 |
+
"loss": 1.449,
|
20496 |
+
"step": 29210
|
20497 |
+
},
|
20498 |
+
{
|
20499 |
+
"epoch": 0.73,
|
20500 |
+
"grad_norm": 70.0,
|
20501 |
+
"learning_rate": 2.6440677966101693e-08,
|
20502 |
+
"loss": 1.5073,
|
20503 |
+
"step": 29220
|
20504 |
+
},
|
20505 |
+
{
|
20506 |
+
"epoch": 0.73,
|
20507 |
+
"grad_norm": 65.0,
|
20508 |
+
"learning_rate": 2.6101694915254238e-08,
|
20509 |
+
"loss": 1.4933,
|
20510 |
+
"step": 29230
|
20511 |
+
},
|
20512 |
+
{
|
20513 |
+
"epoch": 0.73,
|
20514 |
+
"grad_norm": 64.0,
|
20515 |
+
"learning_rate": 2.576271186440678e-08,
|
20516 |
+
"loss": 1.4657,
|
20517 |
+
"step": 29240
|
20518 |
+
},
|
20519 |
+
{
|
20520 |
+
"epoch": 0.73,
|
20521 |
+
"grad_norm": 66.5,
|
20522 |
+
"learning_rate": 2.5423728813559323e-08,
|
20523 |
+
"loss": 1.4532,
|
20524 |
+
"step": 29250
|
20525 |
+
},
|
20526 |
+
{
|
20527 |
+
"epoch": 0.73,
|
20528 |
+
"grad_norm": 69.5,
|
20529 |
+
"learning_rate": 2.5084745762711864e-08,
|
20530 |
+
"loss": 1.4642,
|
20531 |
+
"step": 29260
|
20532 |
+
},
|
20533 |
+
{
|
20534 |
+
"epoch": 0.73,
|
20535 |
+
"grad_norm": 68.0,
|
20536 |
+
"learning_rate": 2.4745762711864405e-08,
|
20537 |
+
"loss": 1.4637,
|
20538 |
+
"step": 29270
|
20539 |
+
},
|
20540 |
+
{
|
20541 |
+
"epoch": 0.73,
|
20542 |
+
"grad_norm": 63.5,
|
20543 |
+
"learning_rate": 2.440677966101695e-08,
|
20544 |
+
"loss": 1.4633,
|
20545 |
+
"step": 29280
|
20546 |
+
},
|
20547 |
+
{
|
20548 |
+
"epoch": 0.73,
|
20549 |
+
"grad_norm": 63.0,
|
20550 |
+
"learning_rate": 2.406779661016949e-08,
|
20551 |
+
"loss": 1.44,
|
20552 |
+
"step": 29290
|
20553 |
+
},
|
20554 |
+
{
|
20555 |
+
"epoch": 0.73,
|
20556 |
+
"grad_norm": 67.5,
|
20557 |
+
"learning_rate": 2.3728813559322034e-08,
|
20558 |
+
"loss": 1.5197,
|
20559 |
+
"step": 29300
|
20560 |
+
},
|
20561 |
+
{
|
20562 |
+
"epoch": 0.73,
|
20563 |
+
"grad_norm": 64.5,
|
20564 |
+
"learning_rate": 2.3389830508474575e-08,
|
20565 |
+
"loss": 1.464,
|
20566 |
+
"step": 29310
|
20567 |
+
},
|
20568 |
+
{
|
20569 |
+
"epoch": 0.73,
|
20570 |
+
"grad_norm": 73.5,
|
20571 |
+
"learning_rate": 2.305084745762712e-08,
|
20572 |
+
"loss": 1.5056,
|
20573 |
+
"step": 29320
|
20574 |
+
},
|
20575 |
+
{
|
20576 |
+
"epoch": 0.73,
|
20577 |
+
"grad_norm": 67.5,
|
20578 |
+
"learning_rate": 2.271186440677966e-08,
|
20579 |
+
"loss": 1.4384,
|
20580 |
+
"step": 29330
|
20581 |
+
},
|
20582 |
+
{
|
20583 |
+
"epoch": 0.73,
|
20584 |
+
"grad_norm": 68.0,
|
20585 |
+
"learning_rate": 2.2372881355932204e-08,
|
20586 |
+
"loss": 1.47,
|
20587 |
+
"step": 29340
|
20588 |
+
},
|
20589 |
+
{
|
20590 |
+
"epoch": 0.73,
|
20591 |
+
"grad_norm": 71.5,
|
20592 |
+
"learning_rate": 2.2033898305084745e-08,
|
20593 |
+
"loss": 1.4637,
|
20594 |
+
"step": 29350
|
20595 |
+
},
|
20596 |
+
{
|
20597 |
+
"epoch": 0.73,
|
20598 |
+
"grad_norm": 65.5,
|
20599 |
+
"learning_rate": 2.169491525423729e-08,
|
20600 |
+
"loss": 1.4459,
|
20601 |
+
"step": 29360
|
20602 |
+
},
|
20603 |
+
{
|
20604 |
+
"epoch": 0.73,
|
20605 |
+
"grad_norm": 66.0,
|
20606 |
+
"learning_rate": 2.135593220338983e-08,
|
20607 |
+
"loss": 1.4714,
|
20608 |
+
"step": 29370
|
20609 |
+
},
|
20610 |
+
{
|
20611 |
+
"epoch": 0.73,
|
20612 |
+
"grad_norm": 64.0,
|
20613 |
+
"learning_rate": 2.1016949152542374e-08,
|
20614 |
+
"loss": 1.4877,
|
20615 |
+
"step": 29380
|
20616 |
+
},
|
20617 |
+
{
|
20618 |
+
"epoch": 0.73,
|
20619 |
+
"grad_norm": 66.0,
|
20620 |
+
"learning_rate": 2.0677966101694915e-08,
|
20621 |
+
"loss": 1.4625,
|
20622 |
+
"step": 29390
|
20623 |
+
},
|
20624 |
+
{
|
20625 |
+
"epoch": 0.73,
|
20626 |
+
"grad_norm": 68.0,
|
20627 |
+
"learning_rate": 2.0338983050847456e-08,
|
20628 |
+
"loss": 1.4429,
|
20629 |
+
"step": 29400
|
20630 |
+
},
|
20631 |
+
{
|
20632 |
+
"epoch": 0.74,
|
20633 |
+
"grad_norm": 66.5,
|
20634 |
+
"learning_rate": 2e-08,
|
20635 |
+
"loss": 1.4736,
|
20636 |
+
"step": 29410
|
20637 |
+
},
|
20638 |
+
{
|
20639 |
+
"epoch": 0.74,
|
20640 |
+
"grad_norm": 71.0,
|
20641 |
+
"learning_rate": 1.966101694915254e-08,
|
20642 |
+
"loss": 1.4478,
|
20643 |
+
"step": 29420
|
20644 |
+
},
|
20645 |
+
{
|
20646 |
+
"epoch": 0.74,
|
20647 |
+
"grad_norm": 68.0,
|
20648 |
+
"learning_rate": 1.9322033898305086e-08,
|
20649 |
+
"loss": 1.4806,
|
20650 |
+
"step": 29430
|
20651 |
+
},
|
20652 |
+
{
|
20653 |
+
"epoch": 0.74,
|
20654 |
+
"grad_norm": 65.0,
|
20655 |
+
"learning_rate": 1.8983050847457626e-08,
|
20656 |
+
"loss": 1.458,
|
20657 |
+
"step": 29440
|
20658 |
+
},
|
20659 |
+
{
|
20660 |
+
"epoch": 0.74,
|
20661 |
+
"grad_norm": 64.5,
|
20662 |
+
"learning_rate": 1.864406779661017e-08,
|
20663 |
+
"loss": 1.4625,
|
20664 |
+
"step": 29450
|
20665 |
+
},
|
20666 |
+
{
|
20667 |
+
"epoch": 0.74,
|
20668 |
+
"grad_norm": 64.5,
|
20669 |
+
"learning_rate": 1.830508474576271e-08,
|
20670 |
+
"loss": 1.4605,
|
20671 |
+
"step": 29460
|
20672 |
+
},
|
20673 |
+
{
|
20674 |
+
"epoch": 0.74,
|
20675 |
+
"grad_norm": 68.5,
|
20676 |
+
"learning_rate": 1.7966101694915256e-08,
|
20677 |
+
"loss": 1.465,
|
20678 |
+
"step": 29470
|
20679 |
+
},
|
20680 |
+
{
|
20681 |
+
"epoch": 0.74,
|
20682 |
+
"grad_norm": 66.5,
|
20683 |
+
"learning_rate": 1.7627118644067797e-08,
|
20684 |
+
"loss": 1.4571,
|
20685 |
+
"step": 29480
|
20686 |
+
},
|
20687 |
+
{
|
20688 |
+
"epoch": 0.74,
|
20689 |
+
"grad_norm": 64.5,
|
20690 |
+
"learning_rate": 1.728813559322034e-08,
|
20691 |
+
"loss": 1.438,
|
20692 |
+
"step": 29490
|
20693 |
+
},
|
20694 |
+
{
|
20695 |
+
"epoch": 0.74,
|
20696 |
+
"grad_norm": 69.5,
|
20697 |
+
"learning_rate": 1.6949152542372882e-08,
|
20698 |
+
"loss": 1.5273,
|
20699 |
+
"step": 29500
|
20700 |
+
},
|
20701 |
+
{
|
20702 |
+
"epoch": 0.74,
|
20703 |
+
"grad_norm": 64.5,
|
20704 |
+
"learning_rate": 1.6610169491525423e-08,
|
20705 |
+
"loss": 1.4298,
|
20706 |
+
"step": 29510
|
20707 |
+
},
|
20708 |
+
{
|
20709 |
+
"epoch": 0.74,
|
20710 |
+
"grad_norm": 65.5,
|
20711 |
+
"learning_rate": 1.6271186440677967e-08,
|
20712 |
+
"loss": 1.4049,
|
20713 |
+
"step": 29520
|
20714 |
+
},
|
20715 |
+
{
|
20716 |
+
"epoch": 0.74,
|
20717 |
+
"grad_norm": 69.0,
|
20718 |
+
"learning_rate": 1.5932203389830508e-08,
|
20719 |
+
"loss": 1.4414,
|
20720 |
+
"step": 29530
|
20721 |
+
},
|
20722 |
+
{
|
20723 |
+
"epoch": 0.74,
|
20724 |
+
"grad_norm": 68.0,
|
20725 |
+
"learning_rate": 1.5593220338983052e-08,
|
20726 |
+
"loss": 1.4952,
|
20727 |
+
"step": 29540
|
20728 |
+
},
|
20729 |
+
{
|
20730 |
+
"epoch": 0.74,
|
20731 |
+
"grad_norm": 65.5,
|
20732 |
+
"learning_rate": 1.5254237288135593e-08,
|
20733 |
+
"loss": 1.4542,
|
20734 |
+
"step": 29550
|
20735 |
+
},
|
20736 |
+
{
|
20737 |
+
"epoch": 0.74,
|
20738 |
+
"grad_norm": 67.5,
|
20739 |
+
"learning_rate": 1.4915254237288137e-08,
|
20740 |
+
"loss": 1.4709,
|
20741 |
+
"step": 29560
|
20742 |
+
},
|
20743 |
+
{
|
20744 |
+
"epoch": 0.74,
|
20745 |
+
"grad_norm": 66.5,
|
20746 |
+
"learning_rate": 1.4576271186440676e-08,
|
20747 |
+
"loss": 1.4756,
|
20748 |
+
"step": 29570
|
20749 |
+
},
|
20750 |
+
{
|
20751 |
+
"epoch": 0.74,
|
20752 |
+
"grad_norm": 63.75,
|
20753 |
+
"learning_rate": 1.4237288135593219e-08,
|
20754 |
+
"loss": 1.4611,
|
20755 |
+
"step": 29580
|
20756 |
+
},
|
20757 |
+
{
|
20758 |
+
"epoch": 0.74,
|
20759 |
+
"grad_norm": 67.0,
|
20760 |
+
"learning_rate": 1.3898305084745762e-08,
|
20761 |
+
"loss": 1.4593,
|
20762 |
+
"step": 29590
|
20763 |
+
},
|
20764 |
+
{
|
20765 |
+
"epoch": 0.74,
|
20766 |
+
"grad_norm": 68.0,
|
20767 |
+
"learning_rate": 1.3559322033898304e-08,
|
20768 |
+
"loss": 1.4649,
|
20769 |
+
"step": 29600
|
20770 |
+
},
|
20771 |
+
{
|
20772 |
+
"epoch": 0.74,
|
20773 |
+
"grad_norm": 66.0,
|
20774 |
+
"learning_rate": 1.3220338983050847e-08,
|
20775 |
+
"loss": 1.4545,
|
20776 |
+
"step": 29610
|
20777 |
+
},
|
20778 |
+
{
|
20779 |
+
"epoch": 0.74,
|
20780 |
+
"grad_norm": 65.0,
|
20781 |
+
"learning_rate": 1.288135593220339e-08,
|
20782 |
+
"loss": 1.4639,
|
20783 |
+
"step": 29620
|
20784 |
+
},
|
20785 |
+
{
|
20786 |
+
"epoch": 0.74,
|
20787 |
+
"grad_norm": 65.5,
|
20788 |
+
"learning_rate": 1.2542372881355932e-08,
|
20789 |
+
"loss": 1.4659,
|
20790 |
+
"step": 29630
|
20791 |
+
},
|
20792 |
+
{
|
20793 |
+
"epoch": 0.74,
|
20794 |
+
"grad_norm": 64.5,
|
20795 |
+
"learning_rate": 1.2203389830508474e-08,
|
20796 |
+
"loss": 1.4277,
|
20797 |
+
"step": 29640
|
20798 |
+
},
|
20799 |
+
{
|
20800 |
+
"epoch": 0.74,
|
20801 |
+
"grad_norm": 69.0,
|
20802 |
+
"learning_rate": 1.1864406779661017e-08,
|
20803 |
+
"loss": 1.473,
|
20804 |
+
"step": 29650
|
20805 |
+
},
|
20806 |
+
{
|
20807 |
+
"epoch": 0.74,
|
20808 |
+
"grad_norm": 68.5,
|
20809 |
+
"learning_rate": 1.152542372881356e-08,
|
20810 |
+
"loss": 1.4905,
|
20811 |
+
"step": 29660
|
20812 |
+
},
|
20813 |
+
{
|
20814 |
+
"epoch": 0.74,
|
20815 |
+
"grad_norm": 67.0,
|
20816 |
+
"learning_rate": 1.1186440677966102e-08,
|
20817 |
+
"loss": 1.4237,
|
20818 |
+
"step": 29670
|
20819 |
+
},
|
20820 |
+
{
|
20821 |
+
"epoch": 0.74,
|
20822 |
+
"grad_norm": 69.5,
|
20823 |
+
"learning_rate": 1.0847457627118645e-08,
|
20824 |
+
"loss": 1.4994,
|
20825 |
+
"step": 29680
|
20826 |
+
},
|
20827 |
+
{
|
20828 |
+
"epoch": 0.74,
|
20829 |
+
"grad_norm": 67.0,
|
20830 |
+
"learning_rate": 1.0508474576271187e-08,
|
20831 |
+
"loss": 1.4373,
|
20832 |
+
"step": 29690
|
20833 |
+
},
|
20834 |
+
{
|
20835 |
+
"epoch": 0.74,
|
20836 |
+
"grad_norm": 65.5,
|
20837 |
+
"learning_rate": 1.0169491525423728e-08,
|
20838 |
+
"loss": 1.4231,
|
20839 |
+
"step": 29700
|
20840 |
+
},
|
20841 |
+
{
|
20842 |
+
"epoch": 0.74,
|
20843 |
+
"grad_norm": 69.0,
|
20844 |
+
"learning_rate": 9.83050847457627e-09,
|
20845 |
+
"loss": 1.4697,
|
20846 |
+
"step": 29710
|
20847 |
+
},
|
20848 |
+
{
|
20849 |
+
"epoch": 0.74,
|
20850 |
+
"grad_norm": 66.0,
|
20851 |
+
"learning_rate": 9.491525423728813e-09,
|
20852 |
+
"loss": 1.5293,
|
20853 |
+
"step": 29720
|
20854 |
+
},
|
20855 |
+
{
|
20856 |
+
"epoch": 0.74,
|
20857 |
+
"grad_norm": 64.5,
|
20858 |
+
"learning_rate": 9.152542372881356e-09,
|
20859 |
+
"loss": 1.4447,
|
20860 |
+
"step": 29730
|
20861 |
+
},
|
20862 |
+
{
|
20863 |
+
"epoch": 0.74,
|
20864 |
+
"grad_norm": 66.0,
|
20865 |
+
"learning_rate": 8.813559322033898e-09,
|
20866 |
+
"loss": 1.4482,
|
20867 |
+
"step": 29740
|
20868 |
+
},
|
20869 |
+
{
|
20870 |
+
"epoch": 0.74,
|
20871 |
+
"grad_norm": 66.0,
|
20872 |
+
"learning_rate": 8.474576271186441e-09,
|
20873 |
+
"loss": 1.4338,
|
20874 |
+
"step": 29750
|
20875 |
+
},
|
20876 |
+
{
|
20877 |
+
"epoch": 0.74,
|
20878 |
+
"grad_norm": 69.5,
|
20879 |
+
"learning_rate": 8.135593220338983e-09,
|
20880 |
+
"loss": 1.5099,
|
20881 |
+
"step": 29760
|
20882 |
+
},
|
20883 |
+
{
|
20884 |
+
"epoch": 0.74,
|
20885 |
+
"grad_norm": 66.5,
|
20886 |
+
"learning_rate": 7.796610169491526e-09,
|
20887 |
+
"loss": 1.5048,
|
20888 |
+
"step": 29770
|
20889 |
+
},
|
20890 |
+
{
|
20891 |
+
"epoch": 0.74,
|
20892 |
+
"grad_norm": 63.25,
|
20893 |
+
"learning_rate": 7.457627118644069e-09,
|
20894 |
+
"loss": 1.4926,
|
20895 |
+
"step": 29780
|
20896 |
+
},
|
20897 |
+
{
|
20898 |
+
"epoch": 0.74,
|
20899 |
+
"grad_norm": 66.0,
|
20900 |
+
"learning_rate": 7.1186440677966095e-09,
|
20901 |
+
"loss": 1.4132,
|
20902 |
+
"step": 29790
|
20903 |
+
},
|
20904 |
+
{
|
20905 |
+
"epoch": 0.74,
|
20906 |
+
"grad_norm": 69.0,
|
20907 |
+
"learning_rate": 6.779661016949152e-09,
|
20908 |
+
"loss": 1.4814,
|
20909 |
+
"step": 29800
|
20910 |
+
},
|
20911 |
+
{
|
20912 |
+
"epoch": 0.75,
|
20913 |
+
"grad_norm": 67.0,
|
20914 |
+
"learning_rate": 6.440677966101695e-09,
|
20915 |
+
"loss": 1.4551,
|
20916 |
+
"step": 29810
|
20917 |
+
},
|
20918 |
+
{
|
20919 |
+
"epoch": 0.75,
|
20920 |
+
"grad_norm": 67.0,
|
20921 |
+
"learning_rate": 6.101694915254237e-09,
|
20922 |
+
"loss": 1.4826,
|
20923 |
+
"step": 29820
|
20924 |
+
},
|
20925 |
+
{
|
20926 |
+
"epoch": 0.75,
|
20927 |
+
"grad_norm": 71.0,
|
20928 |
+
"learning_rate": 5.76271186440678e-09,
|
20929 |
+
"loss": 1.458,
|
20930 |
+
"step": 29830
|
20931 |
+
},
|
20932 |
+
{
|
20933 |
+
"epoch": 0.75,
|
20934 |
+
"grad_norm": 66.5,
|
20935 |
+
"learning_rate": 5.423728813559322e-09,
|
20936 |
+
"loss": 1.5004,
|
20937 |
+
"step": 29840
|
20938 |
+
},
|
20939 |
+
{
|
20940 |
+
"epoch": 0.75,
|
20941 |
+
"grad_norm": 65.5,
|
20942 |
+
"learning_rate": 5.084745762711864e-09,
|
20943 |
+
"loss": 1.4904,
|
20944 |
+
"step": 29850
|
20945 |
+
},
|
20946 |
+
{
|
20947 |
+
"epoch": 0.75,
|
20948 |
+
"grad_norm": 66.5,
|
20949 |
+
"learning_rate": 4.745762711864407e-09,
|
20950 |
+
"loss": 1.4144,
|
20951 |
+
"step": 29860
|
20952 |
+
},
|
20953 |
+
{
|
20954 |
+
"epoch": 0.75,
|
20955 |
+
"grad_norm": 69.5,
|
20956 |
+
"learning_rate": 4.406779661016949e-09,
|
20957 |
+
"loss": 1.4279,
|
20958 |
+
"step": 29870
|
20959 |
+
},
|
20960 |
+
{
|
20961 |
+
"epoch": 0.75,
|
20962 |
+
"grad_norm": 65.0,
|
20963 |
+
"learning_rate": 4.067796610169492e-09,
|
20964 |
+
"loss": 1.4719,
|
20965 |
+
"step": 29880
|
20966 |
+
},
|
20967 |
+
{
|
20968 |
+
"epoch": 0.75,
|
20969 |
+
"grad_norm": 72.5,
|
20970 |
+
"learning_rate": 3.728813559322034e-09,
|
20971 |
+
"loss": 1.5085,
|
20972 |
+
"step": 29890
|
20973 |
+
},
|
20974 |
+
{
|
20975 |
+
"epoch": 0.75,
|
20976 |
+
"grad_norm": 70.5,
|
20977 |
+
"learning_rate": 3.389830508474576e-09,
|
20978 |
+
"loss": 1.4336,
|
20979 |
+
"step": 29900
|
20980 |
+
},
|
20981 |
+
{
|
20982 |
+
"epoch": 0.75,
|
20983 |
+
"grad_norm": 63.25,
|
20984 |
+
"learning_rate": 3.0508474576271186e-09,
|
20985 |
+
"loss": 1.4601,
|
20986 |
+
"step": 29910
|
20987 |
+
},
|
20988 |
+
{
|
20989 |
+
"epoch": 0.75,
|
20990 |
+
"grad_norm": 65.0,
|
20991 |
+
"learning_rate": 2.711864406779661e-09,
|
20992 |
+
"loss": 1.4593,
|
20993 |
+
"step": 29920
|
20994 |
+
},
|
20995 |
+
{
|
20996 |
+
"epoch": 0.75,
|
20997 |
+
"grad_norm": 67.0,
|
20998 |
+
"learning_rate": 2.3728813559322033e-09,
|
20999 |
+
"loss": 1.4755,
|
21000 |
+
"step": 29930
|
21001 |
+
},
|
21002 |
+
{
|
21003 |
+
"epoch": 0.75,
|
21004 |
+
"grad_norm": 60.5,
|
21005 |
+
"learning_rate": 2.033898305084746e-09,
|
21006 |
+
"loss": 1.4302,
|
21007 |
+
"step": 29940
|
21008 |
+
},
|
21009 |
+
{
|
21010 |
+
"epoch": 0.75,
|
21011 |
+
"grad_norm": 65.5,
|
21012 |
+
"learning_rate": 1.694915254237288e-09,
|
21013 |
+
"loss": 1.4244,
|
21014 |
+
"step": 29950
|
21015 |
+
},
|
21016 |
+
{
|
21017 |
+
"epoch": 0.75,
|
21018 |
+
"grad_norm": 67.0,
|
21019 |
+
"learning_rate": 1.3559322033898306e-09,
|
21020 |
+
"loss": 1.4924,
|
21021 |
+
"step": 29960
|
21022 |
+
},
|
21023 |
+
{
|
21024 |
+
"epoch": 0.75,
|
21025 |
+
"grad_norm": 67.5,
|
21026 |
+
"learning_rate": 1.016949152542373e-09,
|
21027 |
+
"loss": 1.4943,
|
21028 |
+
"step": 29970
|
21029 |
+
},
|
21030 |
+
{
|
21031 |
+
"epoch": 0.75,
|
21032 |
+
"grad_norm": 65.5,
|
21033 |
+
"learning_rate": 6.779661016949153e-10,
|
21034 |
+
"loss": 1.4661,
|
21035 |
+
"step": 29980
|
21036 |
+
},
|
21037 |
+
{
|
21038 |
+
"epoch": 0.75,
|
21039 |
+
"grad_norm": 67.0,
|
21040 |
+
"learning_rate": 3.3898305084745764e-10,
|
21041 |
+
"loss": 1.4674,
|
21042 |
+
"step": 29990
|
21043 |
+
},
|
21044 |
+
{
|
21045 |
+
"epoch": 0.75,
|
21046 |
+
"grad_norm": 71.0,
|
21047 |
+
"learning_rate": 0.0,
|
21048 |
+
"loss": 1.4675,
|
21049 |
+
"step": 30000
|
21050 |
+
},
|
21051 |
+
{
|
21052 |
+
"epoch": 0.75,
|
21053 |
+
"eval_loss": 1.463437795639038,
|
21054 |
+
"eval_runtime": 66.0956,
|
21055 |
+
"eval_samples_per_second": 15.13,
|
21056 |
+
"eval_steps_per_second": 15.13,
|
21057 |
+
"step": 30000
|
21058 |
}
|
21059 |
],
|
21060 |
"logging_steps": 10,
|
|
|
21062 |
"num_input_tokens_seen": 0,
|
21063 |
"num_train_epochs": 1,
|
21064 |
"save_steps": 5000,
|
21065 |
+
"total_flos": 4.841766125568e+17,
|
21066 |
"train_batch_size": 1,
|
21067 |
"trial_name": null,
|
21068 |
"trial_params": null
|