Training in progress, step 14400, checkpoint
Browse files
last-checkpoint/adapter_model.safetensors
CHANGED
|
@@ -1,3 +1,3 @@
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
-
oid sha256:
|
| 3 |
size 340808816
|
|
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:aeda08a0b1a7afde6c2627a25d70a74edfe3676e09118bc3e2aef74917d596e0
|
| 3 |
size 340808816
|
last-checkpoint/optimizer.pt
CHANGED
|
@@ -1,3 +1,3 @@
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
-
oid sha256:
|
| 3 |
size 173247691
|
|
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:44ece796e893aaa95ded0f34cf286f442d6b9fbab4365a888dc2b7348e05c34a
|
| 3 |
size 173247691
|
last-checkpoint/scheduler.pt
CHANGED
|
@@ -1,3 +1,3 @@
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
-
oid sha256:
|
| 3 |
size 1465
|
|
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:e5e58437fe7197a9ead7ed420c770654a16b7dd950447cfa811af5eda48bad57
|
| 3 |
size 1465
|
last-checkpoint/trainer_state.json
CHANGED
|
@@ -2,9 +2,9 @@
|
|
| 2 |
"best_global_step": null,
|
| 3 |
"best_metric": null,
|
| 4 |
"best_model_checkpoint": null,
|
| 5 |
-
"epoch": 16.
|
| 6 |
"eval_steps": 500,
|
| 7 |
-
"global_step":
|
| 8 |
"is_hyper_param_search": false,
|
| 9 |
"is_local_process_zero": true,
|
| 10 |
"is_world_process_zero": true,
|
|
@@ -98708,6 +98708,2106 @@
|
|
| 98708 |
"learning_rate": 5.02276187815508e-06,
|
| 98709 |
"loss": 0.7436,
|
| 98710 |
"step": 14100
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 98711 |
}
|
| 98712 |
],
|
| 98713 |
"logging_steps": 1,
|
|
@@ -98727,7 +100827,7 @@
|
|
| 98727 |
"attributes": {}
|
| 98728 |
}
|
| 98729 |
},
|
| 98730 |
-
"total_flos":
|
| 98731 |
"train_batch_size": 8,
|
| 98732 |
"trial_name": null,
|
| 98733 |
"trial_params": null
|
|
|
|
| 2 |
"best_global_step": null,
|
| 3 |
"best_metric": null,
|
| 4 |
"best_model_checkpoint": null,
|
| 5 |
+
"epoch": 16.4011396011396,
|
| 6 |
"eval_steps": 500,
|
| 7 |
+
"global_step": 14400,
|
| 8 |
"is_hyper_param_search": false,
|
| 9 |
"is_local_process_zero": true,
|
| 10 |
"is_world_process_zero": true,
|
|
|
|
| 98708 |
"learning_rate": 5.02276187815508e-06,
|
| 98709 |
"loss": 0.7436,
|
| 98710 |
"step": 14100
|
| 98711 |
+
},
|
| 98712 |
+
{
|
| 98713 |
+
"epoch": 16.06039886039886,
|
| 98714 |
+
"grad_norm": 0.16916537284851074,
|
| 98715 |
+
"learning_rate": 5.01996106925548e-06,
|
| 98716 |
+
"loss": 0.6765,
|
| 98717 |
+
"step": 14101
|
| 98718 |
+
},
|
| 98719 |
+
{
|
| 98720 |
+
"epoch": 16.06153846153846,
|
| 98721 |
+
"grad_norm": 0.26116690039634705,
|
| 98722 |
+
"learning_rate": 5.017160954317579e-06,
|
| 98723 |
+
"loss": 0.5046,
|
| 98724 |
+
"step": 14102
|
| 98725 |
+
},
|
| 98726 |
+
{
|
| 98727 |
+
"epoch": 16.062678062678064,
|
| 98728 |
+
"grad_norm": 0.17816010117530823,
|
| 98729 |
+
"learning_rate": 5.01436153343863e-06,
|
| 98730 |
+
"loss": 0.7736,
|
| 98731 |
+
"step": 14103
|
| 98732 |
+
},
|
| 98733 |
+
{
|
| 98734 |
+
"epoch": 16.063817663817662,
|
| 98735 |
+
"grad_norm": 0.2398657202720642,
|
| 98736 |
+
"learning_rate": 5.0115628067158745e-06,
|
| 98737 |
+
"loss": 0.6005,
|
| 98738 |
+
"step": 14104
|
| 98739 |
+
},
|
| 98740 |
+
{
|
| 98741 |
+
"epoch": 16.064957264957265,
|
| 98742 |
+
"grad_norm": 0.19028586149215698,
|
| 98743 |
+
"learning_rate": 5.008764774246502e-06,
|
| 98744 |
+
"loss": 0.891,
|
| 98745 |
+
"step": 14105
|
| 98746 |
+
},
|
| 98747 |
+
{
|
| 98748 |
+
"epoch": 16.066096866096867,
|
| 98749 |
+
"grad_norm": 0.18784038722515106,
|
| 98750 |
+
"learning_rate": 5.005967436127706e-06,
|
| 98751 |
+
"loss": 0.7326,
|
| 98752 |
+
"step": 14106
|
| 98753 |
+
},
|
| 98754 |
+
{
|
| 98755 |
+
"epoch": 16.067236467236466,
|
| 98756 |
+
"grad_norm": 0.2758425176143646,
|
| 98757 |
+
"learning_rate": 5.003170792456646e-06,
|
| 98758 |
+
"loss": 0.5643,
|
| 98759 |
+
"step": 14107
|
| 98760 |
+
},
|
| 98761 |
+
{
|
| 98762 |
+
"epoch": 16.068376068376068,
|
| 98763 |
+
"grad_norm": 0.17467260360717773,
|
| 98764 |
+
"learning_rate": 5.000374843330463e-06,
|
| 98765 |
+
"loss": 0.772,
|
| 98766 |
+
"step": 14108
|
| 98767 |
+
},
|
| 98768 |
+
{
|
| 98769 |
+
"epoch": 16.06951566951567,
|
| 98770 |
+
"grad_norm": 0.19102101027965546,
|
| 98771 |
+
"learning_rate": 4.997579588846246e-06,
|
| 98772 |
+
"loss": 0.8077,
|
| 98773 |
+
"step": 14109
|
| 98774 |
+
},
|
| 98775 |
+
{
|
| 98776 |
+
"epoch": 16.07065527065527,
|
| 98777 |
+
"grad_norm": 0.19210946559906006,
|
| 98778 |
+
"learning_rate": 4.994785029101104e-06,
|
| 98779 |
+
"loss": 0.7675,
|
| 98780 |
+
"step": 14110
|
| 98781 |
+
},
|
| 98782 |
+
{
|
| 98783 |
+
"epoch": 16.07179487179487,
|
| 98784 |
+
"grad_norm": 0.23584634065628052,
|
| 98785 |
+
"learning_rate": 4.991991164192097e-06,
|
| 98786 |
+
"loss": 0.5919,
|
| 98787 |
+
"step": 14111
|
| 98788 |
+
},
|
| 98789 |
+
{
|
| 98790 |
+
"epoch": 16.072934472934474,
|
| 98791 |
+
"grad_norm": 0.2016778588294983,
|
| 98792 |
+
"learning_rate": 4.989197994216255e-06,
|
| 98793 |
+
"loss": 0.573,
|
| 98794 |
+
"step": 14112
|
| 98795 |
+
},
|
| 98796 |
+
{
|
| 98797 |
+
"epoch": 16.074074074074073,
|
| 98798 |
+
"grad_norm": 0.1590469479560852,
|
| 98799 |
+
"learning_rate": 4.986405519270595e-06,
|
| 98800 |
+
"loss": 0.6816,
|
| 98801 |
+
"step": 14113
|
| 98802 |
+
},
|
| 98803 |
+
{
|
| 98804 |
+
"epoch": 16.075213675213675,
|
| 98805 |
+
"grad_norm": 0.2068215012550354,
|
| 98806 |
+
"learning_rate": 4.983613739452109e-06,
|
| 98807 |
+
"loss": 0.585,
|
| 98808 |
+
"step": 14114
|
| 98809 |
+
},
|
| 98810 |
+
{
|
| 98811 |
+
"epoch": 16.076353276353277,
|
| 98812 |
+
"grad_norm": 0.19409841299057007,
|
| 98813 |
+
"learning_rate": 4.9808226548577725e-06,
|
| 98814 |
+
"loss": 0.642,
|
| 98815 |
+
"step": 14115
|
| 98816 |
+
},
|
| 98817 |
+
{
|
| 98818 |
+
"epoch": 16.077492877492876,
|
| 98819 |
+
"grad_norm": 0.20613178610801697,
|
| 98820 |
+
"learning_rate": 4.978032265584509e-06,
|
| 98821 |
+
"loss": 0.608,
|
| 98822 |
+
"step": 14116
|
| 98823 |
+
},
|
| 98824 |
+
{
|
| 98825 |
+
"epoch": 16.07863247863248,
|
| 98826 |
+
"grad_norm": 0.24388591945171356,
|
| 98827 |
+
"learning_rate": 4.975242571729244e-06,
|
| 98828 |
+
"loss": 0.4963,
|
| 98829 |
+
"step": 14117
|
| 98830 |
+
},
|
| 98831 |
+
{
|
| 98832 |
+
"epoch": 16.07977207977208,
|
| 98833 |
+
"grad_norm": 0.18532788753509521,
|
| 98834 |
+
"learning_rate": 4.972453573388875e-06,
|
| 98835 |
+
"loss": 0.5111,
|
| 98836 |
+
"step": 14118
|
| 98837 |
+
},
|
| 98838 |
+
{
|
| 98839 |
+
"epoch": 16.08091168091168,
|
| 98840 |
+
"grad_norm": 0.2667822539806366,
|
| 98841 |
+
"learning_rate": 4.969665270660276e-06,
|
| 98842 |
+
"loss": 0.5238,
|
| 98843 |
+
"step": 14119
|
| 98844 |
+
},
|
| 98845 |
+
{
|
| 98846 |
+
"epoch": 16.08205128205128,
|
| 98847 |
+
"grad_norm": 0.15260297060012817,
|
| 98848 |
+
"learning_rate": 4.966877663640279e-06,
|
| 98849 |
+
"loss": 0.6686,
|
| 98850 |
+
"step": 14120
|
| 98851 |
+
},
|
| 98852 |
+
{
|
| 98853 |
+
"epoch": 16.083190883190884,
|
| 98854 |
+
"grad_norm": 0.16568130254745483,
|
| 98855 |
+
"learning_rate": 4.964090752425704e-06,
|
| 98856 |
+
"loss": 0.7475,
|
| 98857 |
+
"step": 14121
|
| 98858 |
+
},
|
| 98859 |
+
{
|
| 98860 |
+
"epoch": 16.084330484330483,
|
| 98861 |
+
"grad_norm": 0.2504945397377014,
|
| 98862 |
+
"learning_rate": 4.9613045371133644e-06,
|
| 98863 |
+
"loss": 0.4494,
|
| 98864 |
+
"step": 14122
|
| 98865 |
+
},
|
| 98866 |
+
{
|
| 98867 |
+
"epoch": 16.085470085470085,
|
| 98868 |
+
"grad_norm": 0.20629118382930756,
|
| 98869 |
+
"learning_rate": 4.958519017800031e-06,
|
| 98870 |
+
"loss": 0.7373,
|
| 98871 |
+
"step": 14123
|
| 98872 |
+
},
|
| 98873 |
+
{
|
| 98874 |
+
"epoch": 16.086609686609687,
|
| 98875 |
+
"grad_norm": 0.21487271785736084,
|
| 98876 |
+
"learning_rate": 4.95573419458244e-06,
|
| 98877 |
+
"loss": 0.3647,
|
| 98878 |
+
"step": 14124
|
| 98879 |
+
},
|
| 98880 |
+
{
|
| 98881 |
+
"epoch": 16.087749287749286,
|
| 98882 |
+
"grad_norm": 0.2165013551712036,
|
| 98883 |
+
"learning_rate": 4.952950067557324e-06,
|
| 98884 |
+
"loss": 0.4537,
|
| 98885 |
+
"step": 14125
|
| 98886 |
+
},
|
| 98887 |
+
{
|
| 98888 |
+
"epoch": 16.08888888888889,
|
| 98889 |
+
"grad_norm": 0.1743670254945755,
|
| 98890 |
+
"learning_rate": 4.95016663682138e-06,
|
| 98891 |
+
"loss": 0.6269,
|
| 98892 |
+
"step": 14126
|
| 98893 |
+
},
|
| 98894 |
+
{
|
| 98895 |
+
"epoch": 16.09002849002849,
|
| 98896 |
+
"grad_norm": 0.2542421519756317,
|
| 98897 |
+
"learning_rate": 4.947383902471292e-06,
|
| 98898 |
+
"loss": 0.5258,
|
| 98899 |
+
"step": 14127
|
| 98900 |
+
},
|
| 98901 |
+
{
|
| 98902 |
+
"epoch": 16.09116809116809,
|
| 98903 |
+
"grad_norm": 0.16560454666614532,
|
| 98904 |
+
"learning_rate": 4.944601864603698e-06,
|
| 98905 |
+
"loss": 0.8407,
|
| 98906 |
+
"step": 14128
|
| 98907 |
+
},
|
| 98908 |
+
{
|
| 98909 |
+
"epoch": 16.092307692307692,
|
| 98910 |
+
"grad_norm": 0.18629920482635498,
|
| 98911 |
+
"learning_rate": 4.941820523315238e-06,
|
| 98912 |
+
"loss": 0.8053,
|
| 98913 |
+
"step": 14129
|
| 98914 |
+
},
|
| 98915 |
+
{
|
| 98916 |
+
"epoch": 16.093447293447294,
|
| 98917 |
+
"grad_norm": 0.20721445977687836,
|
| 98918 |
+
"learning_rate": 4.9390398787025085e-06,
|
| 98919 |
+
"loss": 0.6357,
|
| 98920 |
+
"step": 14130
|
| 98921 |
+
},
|
| 98922 |
+
{
|
| 98923 |
+
"epoch": 16.094586894586893,
|
| 98924 |
+
"grad_norm": 0.20368462800979614,
|
| 98925 |
+
"learning_rate": 4.9362599308621e-06,
|
| 98926 |
+
"loss": 0.714,
|
| 98927 |
+
"step": 14131
|
| 98928 |
+
},
|
| 98929 |
+
{
|
| 98930 |
+
"epoch": 16.095726495726495,
|
| 98931 |
+
"grad_norm": 0.15554797649383545,
|
| 98932 |
+
"learning_rate": 4.933480679890545e-06,
|
| 98933 |
+
"loss": 0.6415,
|
| 98934 |
+
"step": 14132
|
| 98935 |
+
},
|
| 98936 |
+
{
|
| 98937 |
+
"epoch": 16.096866096866098,
|
| 98938 |
+
"grad_norm": 0.17425628006458282,
|
| 98939 |
+
"learning_rate": 4.930702125884396e-06,
|
| 98940 |
+
"loss": 0.7755,
|
| 98941 |
+
"step": 14133
|
| 98942 |
+
},
|
| 98943 |
+
{
|
| 98944 |
+
"epoch": 16.098005698005696,
|
| 98945 |
+
"grad_norm": 0.19579556584358215,
|
| 98946 |
+
"learning_rate": 4.927924268940159e-06,
|
| 98947 |
+
"loss": 0.6669,
|
| 98948 |
+
"step": 14134
|
| 98949 |
+
},
|
| 98950 |
+
{
|
| 98951 |
+
"epoch": 16.0991452991453,
|
| 98952 |
+
"grad_norm": 0.19466902315616608,
|
| 98953 |
+
"learning_rate": 4.925147109154304e-06,
|
| 98954 |
+
"loss": 0.5933,
|
| 98955 |
+
"step": 14135
|
| 98956 |
+
},
|
| 98957 |
+
{
|
| 98958 |
+
"epoch": 16.1002849002849,
|
| 98959 |
+
"grad_norm": 0.18656110763549805,
|
| 98960 |
+
"learning_rate": 4.922370646623298e-06,
|
| 98961 |
+
"loss": 0.6976,
|
| 98962 |
+
"step": 14136
|
| 98963 |
+
},
|
| 98964 |
+
{
|
| 98965 |
+
"epoch": 16.1014245014245,
|
| 98966 |
+
"grad_norm": 0.17684151232242584,
|
| 98967 |
+
"learning_rate": 4.919594881443573e-06,
|
| 98968 |
+
"loss": 0.6015,
|
| 98969 |
+
"step": 14137
|
| 98970 |
+
},
|
| 98971 |
+
{
|
| 98972 |
+
"epoch": 16.102564102564102,
|
| 98973 |
+
"grad_norm": 0.17531482875347137,
|
| 98974 |
+
"learning_rate": 4.916819813711543e-06,
|
| 98975 |
+
"loss": 0.5635,
|
| 98976 |
+
"step": 14138
|
| 98977 |
+
},
|
| 98978 |
+
{
|
| 98979 |
+
"epoch": 16.103703703703705,
|
| 98980 |
+
"grad_norm": 0.20434801280498505,
|
| 98981 |
+
"learning_rate": 4.914045443523585e-06,
|
| 98982 |
+
"loss": 0.7176,
|
| 98983 |
+
"step": 14139
|
| 98984 |
+
},
|
| 98985 |
+
{
|
| 98986 |
+
"epoch": 16.104843304843303,
|
| 98987 |
+
"grad_norm": 0.1615433245897293,
|
| 98988 |
+
"learning_rate": 4.911271770976064e-06,
|
| 98989 |
+
"loss": 0.708,
|
| 98990 |
+
"step": 14140
|
| 98991 |
+
},
|
| 98992 |
+
{
|
| 98993 |
+
"epoch": 16.105982905982906,
|
| 98994 |
+
"grad_norm": 0.1820320039987564,
|
| 98995 |
+
"learning_rate": 4.9084987961653205e-06,
|
| 98996 |
+
"loss": 0.8438,
|
| 98997 |
+
"step": 14141
|
| 98998 |
+
},
|
| 98999 |
+
{
|
| 99000 |
+
"epoch": 16.107122507122508,
|
| 99001 |
+
"grad_norm": 0.1728564351797104,
|
| 99002 |
+
"learning_rate": 4.9057265191876725e-06,
|
| 99003 |
+
"loss": 0.7711,
|
| 99004 |
+
"step": 14142
|
| 99005 |
+
},
|
| 99006 |
+
{
|
| 99007 |
+
"epoch": 16.108262108262107,
|
| 99008 |
+
"grad_norm": 0.1594773530960083,
|
| 99009 |
+
"learning_rate": 4.902954940139387e-06,
|
| 99010 |
+
"loss": 0.8792,
|
| 99011 |
+
"step": 14143
|
| 99012 |
+
},
|
| 99013 |
+
{
|
| 99014 |
+
"epoch": 16.10940170940171,
|
| 99015 |
+
"grad_norm": 0.19131292402744293,
|
| 99016 |
+
"learning_rate": 4.9001840591167525e-06,
|
| 99017 |
+
"loss": 0.8845,
|
| 99018 |
+
"step": 14144
|
| 99019 |
+
},
|
| 99020 |
+
{
|
| 99021 |
+
"epoch": 16.11054131054131,
|
| 99022 |
+
"grad_norm": 0.24351909756660461,
|
| 99023 |
+
"learning_rate": 4.897413876216003e-06,
|
| 99024 |
+
"loss": 0.4772,
|
| 99025 |
+
"step": 14145
|
| 99026 |
+
},
|
| 99027 |
+
{
|
| 99028 |
+
"epoch": 16.11168091168091,
|
| 99029 |
+
"grad_norm": 0.19399766623973846,
|
| 99030 |
+
"learning_rate": 4.894644391533348e-06,
|
| 99031 |
+
"loss": 0.7453,
|
| 99032 |
+
"step": 14146
|
| 99033 |
+
},
|
| 99034 |
+
{
|
| 99035 |
+
"epoch": 16.112820512820512,
|
| 99036 |
+
"grad_norm": 0.18186573684215546,
|
| 99037 |
+
"learning_rate": 4.891875605164983e-06,
|
| 99038 |
+
"loss": 0.5295,
|
| 99039 |
+
"step": 14147
|
| 99040 |
+
},
|
| 99041 |
+
{
|
| 99042 |
+
"epoch": 16.113960113960115,
|
| 99043 |
+
"grad_norm": 0.21143805980682373,
|
| 99044 |
+
"learning_rate": 4.889107517207075e-06,
|
| 99045 |
+
"loss": 0.6165,
|
| 99046 |
+
"step": 14148
|
| 99047 |
+
},
|
| 99048 |
+
{
|
| 99049 |
+
"epoch": 16.115099715099714,
|
| 99050 |
+
"grad_norm": 0.1572553515434265,
|
| 99051 |
+
"learning_rate": 4.886340127755776e-06,
|
| 99052 |
+
"loss": 0.8428,
|
| 99053 |
+
"step": 14149
|
| 99054 |
+
},
|
| 99055 |
+
{
|
| 99056 |
+
"epoch": 16.116239316239316,
|
| 99057 |
+
"grad_norm": 0.20264385640621185,
|
| 99058 |
+
"learning_rate": 4.883573436907191e-06,
|
| 99059 |
+
"loss": 0.6445,
|
| 99060 |
+
"step": 14150
|
| 99061 |
+
},
|
| 99062 |
+
{
|
| 99063 |
+
"epoch": 16.117378917378918,
|
| 99064 |
+
"grad_norm": 0.20846238732337952,
|
| 99065 |
+
"learning_rate": 4.880807444757418e-06,
|
| 99066 |
+
"loss": 0.6308,
|
| 99067 |
+
"step": 14151
|
| 99068 |
+
},
|
| 99069 |
+
{
|
| 99070 |
+
"epoch": 16.118518518518517,
|
| 99071 |
+
"grad_norm": 0.19726471602916718,
|
| 99072 |
+
"learning_rate": 4.8780421514025345e-06,
|
| 99073 |
+
"loss": 0.7571,
|
| 99074 |
+
"step": 14152
|
| 99075 |
+
},
|
| 99076 |
+
{
|
| 99077 |
+
"epoch": 16.11965811965812,
|
| 99078 |
+
"grad_norm": 0.18709038197994232,
|
| 99079 |
+
"learning_rate": 4.8752775569385845e-06,
|
| 99080 |
+
"loss": 0.7035,
|
| 99081 |
+
"step": 14153
|
| 99082 |
+
},
|
| 99083 |
+
{
|
| 99084 |
+
"epoch": 16.12079772079772,
|
| 99085 |
+
"grad_norm": 0.18194390833377838,
|
| 99086 |
+
"learning_rate": 4.872513661461578e-06,
|
| 99087 |
+
"loss": 0.6971,
|
| 99088 |
+
"step": 14154
|
| 99089 |
+
},
|
| 99090 |
+
{
|
| 99091 |
+
"epoch": 16.12193732193732,
|
| 99092 |
+
"grad_norm": 0.21358007192611694,
|
| 99093 |
+
"learning_rate": 4.869750465067529e-06,
|
| 99094 |
+
"loss": 0.6177,
|
| 99095 |
+
"step": 14155
|
| 99096 |
+
},
|
| 99097 |
+
{
|
| 99098 |
+
"epoch": 16.123076923076923,
|
| 99099 |
+
"grad_norm": 0.2152358442544937,
|
| 99100 |
+
"learning_rate": 4.866987967852413e-06,
|
| 99101 |
+
"loss": 0.6067,
|
| 99102 |
+
"step": 14156
|
| 99103 |
+
},
|
| 99104 |
+
{
|
| 99105 |
+
"epoch": 16.124216524216525,
|
| 99106 |
+
"grad_norm": 0.17053575813770294,
|
| 99107 |
+
"learning_rate": 4.864226169912162e-06,
|
| 99108 |
+
"loss": 0.7404,
|
| 99109 |
+
"step": 14157
|
| 99110 |
+
},
|
| 99111 |
+
{
|
| 99112 |
+
"epoch": 16.125356125356124,
|
| 99113 |
+
"grad_norm": 0.2537604868412018,
|
| 99114 |
+
"learning_rate": 4.861465071342708e-06,
|
| 99115 |
+
"loss": 0.6115,
|
| 99116 |
+
"step": 14158
|
| 99117 |
+
},
|
| 99118 |
+
{
|
| 99119 |
+
"epoch": 16.126495726495726,
|
| 99120 |
+
"grad_norm": 0.18276666104793549,
|
| 99121 |
+
"learning_rate": 4.858704672239956e-06,
|
| 99122 |
+
"loss": 0.8084,
|
| 99123 |
+
"step": 14159
|
| 99124 |
+
},
|
| 99125 |
+
{
|
| 99126 |
+
"epoch": 16.12763532763533,
|
| 99127 |
+
"grad_norm": 0.23169055581092834,
|
| 99128 |
+
"learning_rate": 4.85594497269978e-06,
|
| 99129 |
+
"loss": 0.652,
|
| 99130 |
+
"step": 14160
|
| 99131 |
+
},
|
| 99132 |
+
{
|
| 99133 |
+
"epoch": 16.128774928774927,
|
| 99134 |
+
"grad_norm": 0.215578094124794,
|
| 99135 |
+
"learning_rate": 4.853185972818036e-06,
|
| 99136 |
+
"loss": 0.6886,
|
| 99137 |
+
"step": 14161
|
| 99138 |
+
},
|
| 99139 |
+
{
|
| 99140 |
+
"epoch": 16.12991452991453,
|
| 99141 |
+
"grad_norm": 0.2252432256937027,
|
| 99142 |
+
"learning_rate": 4.850427672690541e-06,
|
| 99143 |
+
"loss": 0.778,
|
| 99144 |
+
"step": 14162
|
| 99145 |
+
},
|
| 99146 |
+
{
|
| 99147 |
+
"epoch": 16.13105413105413,
|
| 99148 |
+
"grad_norm": 0.2282172590494156,
|
| 99149 |
+
"learning_rate": 4.847670072413105e-06,
|
| 99150 |
+
"loss": 0.5893,
|
| 99151 |
+
"step": 14163
|
| 99152 |
+
},
|
| 99153 |
+
{
|
| 99154 |
+
"epoch": 16.13219373219373,
|
| 99155 |
+
"grad_norm": 0.1930750161409378,
|
| 99156 |
+
"learning_rate": 4.844913172081508e-06,
|
| 99157 |
+
"loss": 0.7367,
|
| 99158 |
+
"step": 14164
|
| 99159 |
+
},
|
| 99160 |
+
{
|
| 99161 |
+
"epoch": 16.133333333333333,
|
| 99162 |
+
"grad_norm": 0.2539467215538025,
|
| 99163 |
+
"learning_rate": 4.8421569717915005e-06,
|
| 99164 |
+
"loss": 0.5583,
|
| 99165 |
+
"step": 14165
|
| 99166 |
+
},
|
| 99167 |
+
{
|
| 99168 |
+
"epoch": 16.134472934472935,
|
| 99169 |
+
"grad_norm": 0.21186986565589905,
|
| 99170 |
+
"learning_rate": 4.839401471638818e-06,
|
| 99171 |
+
"loss": 0.4972,
|
| 99172 |
+
"step": 14166
|
| 99173 |
+
},
|
| 99174 |
+
{
|
| 99175 |
+
"epoch": 16.135612535612534,
|
| 99176 |
+
"grad_norm": 0.21824829280376434,
|
| 99177 |
+
"learning_rate": 4.836646671719164e-06,
|
| 99178 |
+
"loss": 0.6324,
|
| 99179 |
+
"step": 14167
|
| 99180 |
+
},
|
| 99181 |
+
{
|
| 99182 |
+
"epoch": 16.136752136752136,
|
| 99183 |
+
"grad_norm": 0.2590080201625824,
|
| 99184 |
+
"learning_rate": 4.8338925721282265e-06,
|
| 99185 |
+
"loss": 0.3894,
|
| 99186 |
+
"step": 14168
|
| 99187 |
+
},
|
| 99188 |
+
{
|
| 99189 |
+
"epoch": 16.13789173789174,
|
| 99190 |
+
"grad_norm": 0.27673637866973877,
|
| 99191 |
+
"learning_rate": 4.831139172961649e-06,
|
| 99192 |
+
"loss": 0.3115,
|
| 99193 |
+
"step": 14169
|
| 99194 |
+
},
|
| 99195 |
+
{
|
| 99196 |
+
"epoch": 16.139031339031337,
|
| 99197 |
+
"grad_norm": 0.1810840368270874,
|
| 99198 |
+
"learning_rate": 4.828386474315075e-06,
|
| 99199 |
+
"loss": 0.6401,
|
| 99200 |
+
"step": 14170
|
| 99201 |
+
},
|
| 99202 |
+
{
|
| 99203 |
+
"epoch": 16.14017094017094,
|
| 99204 |
+
"grad_norm": 0.19593803584575653,
|
| 99205 |
+
"learning_rate": 4.825634476284108e-06,
|
| 99206 |
+
"loss": 0.564,
|
| 99207 |
+
"step": 14171
|
| 99208 |
+
},
|
| 99209 |
+
{
|
| 99210 |
+
"epoch": 16.141310541310542,
|
| 99211 |
+
"grad_norm": 0.20629572868347168,
|
| 99212 |
+
"learning_rate": 4.822883178964343e-06,
|
| 99213 |
+
"loss": 0.7883,
|
| 99214 |
+
"step": 14172
|
| 99215 |
+
},
|
| 99216 |
+
{
|
| 99217 |
+
"epoch": 16.14245014245014,
|
| 99218 |
+
"grad_norm": 0.28529953956604004,
|
| 99219 |
+
"learning_rate": 4.8201325824513284e-06,
|
| 99220 |
+
"loss": 0.7357,
|
| 99221 |
+
"step": 14173
|
| 99222 |
+
},
|
| 99223 |
+
{
|
| 99224 |
+
"epoch": 16.143589743589743,
|
| 99225 |
+
"grad_norm": 0.19003815948963165,
|
| 99226 |
+
"learning_rate": 4.817382686840602e-06,
|
| 99227 |
+
"loss": 0.6053,
|
| 99228 |
+
"step": 14174
|
| 99229 |
+
},
|
| 99230 |
+
{
|
| 99231 |
+
"epoch": 16.144729344729345,
|
| 99232 |
+
"grad_norm": 0.20167994499206543,
|
| 99233 |
+
"learning_rate": 4.814633492227677e-06,
|
| 99234 |
+
"loss": 0.6366,
|
| 99235 |
+
"step": 14175
|
| 99236 |
+
},
|
| 99237 |
+
{
|
| 99238 |
+
"epoch": 16.145868945868944,
|
| 99239 |
+
"grad_norm": 0.17483095824718475,
|
| 99240 |
+
"learning_rate": 4.811884998708041e-06,
|
| 99241 |
+
"loss": 0.7448,
|
| 99242 |
+
"step": 14176
|
| 99243 |
+
},
|
| 99244 |
+
{
|
| 99245 |
+
"epoch": 16.147008547008546,
|
| 99246 |
+
"grad_norm": 0.2396191954612732,
|
| 99247 |
+
"learning_rate": 4.8091372063771566e-06,
|
| 99248 |
+
"loss": 0.7191,
|
| 99249 |
+
"step": 14177
|
| 99250 |
+
},
|
| 99251 |
+
{
|
| 99252 |
+
"epoch": 16.14814814814815,
|
| 99253 |
+
"grad_norm": 0.21231231093406677,
|
| 99254 |
+
"learning_rate": 4.8063901153304615e-06,
|
| 99255 |
+
"loss": 0.6498,
|
| 99256 |
+
"step": 14178
|
| 99257 |
+
},
|
| 99258 |
+
{
|
| 99259 |
+
"epoch": 16.149287749287748,
|
| 99260 |
+
"grad_norm": 0.1673159897327423,
|
| 99261 |
+
"learning_rate": 4.803643725663379e-06,
|
| 99262 |
+
"loss": 0.6035,
|
| 99263 |
+
"step": 14179
|
| 99264 |
+
},
|
| 99265 |
+
{
|
| 99266 |
+
"epoch": 16.15042735042735,
|
| 99267 |
+
"grad_norm": 0.1700536459684372,
|
| 99268 |
+
"learning_rate": 4.800898037471283e-06,
|
| 99269 |
+
"loss": 0.9098,
|
| 99270 |
+
"step": 14180
|
| 99271 |
+
},
|
| 99272 |
+
{
|
| 99273 |
+
"epoch": 16.151566951566952,
|
| 99274 |
+
"grad_norm": 0.17028027772903442,
|
| 99275 |
+
"learning_rate": 4.798153050849543e-06,
|
| 99276 |
+
"loss": 0.7013,
|
| 99277 |
+
"step": 14181
|
| 99278 |
+
},
|
| 99279 |
+
{
|
| 99280 |
+
"epoch": 16.15270655270655,
|
| 99281 |
+
"grad_norm": 0.1970115751028061,
|
| 99282 |
+
"learning_rate": 4.795408765893506e-06,
|
| 99283 |
+
"loss": 0.8321,
|
| 99284 |
+
"step": 14182
|
| 99285 |
+
},
|
| 99286 |
+
{
|
| 99287 |
+
"epoch": 16.153846153846153,
|
| 99288 |
+
"grad_norm": 0.16475103795528412,
|
| 99289 |
+
"learning_rate": 4.792665182698491e-06,
|
| 99290 |
+
"loss": 0.9184,
|
| 99291 |
+
"step": 14183
|
| 99292 |
+
},
|
| 99293 |
+
{
|
| 99294 |
+
"epoch": 16.154985754985756,
|
| 99295 |
+
"grad_norm": 0.17631348967552185,
|
| 99296 |
+
"learning_rate": 4.789922301359778e-06,
|
| 99297 |
+
"loss": 0.7771,
|
| 99298 |
+
"step": 14184
|
| 99299 |
+
},
|
| 99300 |
+
{
|
| 99301 |
+
"epoch": 16.156125356125354,
|
| 99302 |
+
"grad_norm": 0.21707049012184143,
|
| 99303 |
+
"learning_rate": 4.78718012197264e-06,
|
| 99304 |
+
"loss": 0.7017,
|
| 99305 |
+
"step": 14185
|
| 99306 |
+
},
|
| 99307 |
+
{
|
| 99308 |
+
"epoch": 16.157264957264957,
|
| 99309 |
+
"grad_norm": 0.19491758942604065,
|
| 99310 |
+
"learning_rate": 4.784438644632325e-06,
|
| 99311 |
+
"loss": 0.7447,
|
| 99312 |
+
"step": 14186
|
| 99313 |
+
},
|
| 99314 |
+
{
|
| 99315 |
+
"epoch": 16.15840455840456,
|
| 99316 |
+
"grad_norm": 0.19003994762897491,
|
| 99317 |
+
"learning_rate": 4.781697869434046e-06,
|
| 99318 |
+
"loss": 0.7024,
|
| 99319 |
+
"step": 14187
|
| 99320 |
+
},
|
| 99321 |
+
{
|
| 99322 |
+
"epoch": 16.159544159544158,
|
| 99323 |
+
"grad_norm": 0.20359846949577332,
|
| 99324 |
+
"learning_rate": 4.778957796473002e-06,
|
| 99325 |
+
"loss": 0.8946,
|
| 99326 |
+
"step": 14188
|
| 99327 |
+
},
|
| 99328 |
+
{
|
| 99329 |
+
"epoch": 16.16068376068376,
|
| 99330 |
+
"grad_norm": 0.22367510199546814,
|
| 99331 |
+
"learning_rate": 4.776218425844362e-06,
|
| 99332 |
+
"loss": 0.6882,
|
| 99333 |
+
"step": 14189
|
| 99334 |
+
},
|
| 99335 |
+
{
|
| 99336 |
+
"epoch": 16.161823361823362,
|
| 99337 |
+
"grad_norm": 0.2077135294675827,
|
| 99338 |
+
"learning_rate": 4.773479757643276e-06,
|
| 99339 |
+
"loss": 0.487,
|
| 99340 |
+
"step": 14190
|
| 99341 |
+
},
|
| 99342 |
+
{
|
| 99343 |
+
"epoch": 16.162962962962965,
|
| 99344 |
+
"grad_norm": 0.1768800914287567,
|
| 99345 |
+
"learning_rate": 4.770741791964856e-06,
|
| 99346 |
+
"loss": 0.6527,
|
| 99347 |
+
"step": 14191
|
| 99348 |
+
},
|
| 99349 |
+
{
|
| 99350 |
+
"epoch": 16.164102564102564,
|
| 99351 |
+
"grad_norm": 0.19622980058193207,
|
| 99352 |
+
"learning_rate": 4.7680045289042045e-06,
|
| 99353 |
+
"loss": 0.4951,
|
| 99354 |
+
"step": 14192
|
| 99355 |
+
},
|
| 99356 |
+
{
|
| 99357 |
+
"epoch": 16.165242165242166,
|
| 99358 |
+
"grad_norm": 0.1771240234375,
|
| 99359 |
+
"learning_rate": 4.7652679685563945e-06,
|
| 99360 |
+
"loss": 0.6839,
|
| 99361 |
+
"step": 14193
|
| 99362 |
+
},
|
| 99363 |
+
{
|
| 99364 |
+
"epoch": 16.166381766381768,
|
| 99365 |
+
"grad_norm": 0.21365617215633392,
|
| 99366 |
+
"learning_rate": 4.76253211101648e-06,
|
| 99367 |
+
"loss": 0.6114,
|
| 99368 |
+
"step": 14194
|
| 99369 |
+
},
|
| 99370 |
+
{
|
| 99371 |
+
"epoch": 16.167521367521367,
|
| 99372 |
+
"grad_norm": 0.23467305302619934,
|
| 99373 |
+
"learning_rate": 4.7597969563794726e-06,
|
| 99374 |
+
"loss": 0.5676,
|
| 99375 |
+
"step": 14195
|
| 99376 |
+
},
|
| 99377 |
+
{
|
| 99378 |
+
"epoch": 16.16866096866097,
|
| 99379 |
+
"grad_norm": 0.18005667626857758,
|
| 99380 |
+
"learning_rate": 4.7570625047403755e-06,
|
| 99381 |
+
"loss": 0.6889,
|
| 99382 |
+
"step": 14196
|
| 99383 |
+
},
|
| 99384 |
+
{
|
| 99385 |
+
"epoch": 16.16980056980057,
|
| 99386 |
+
"grad_norm": 0.6053818464279175,
|
| 99387 |
+
"learning_rate": 4.754328756194168e-06,
|
| 99388 |
+
"loss": 0.7246,
|
| 99389 |
+
"step": 14197
|
| 99390 |
+
},
|
| 99391 |
+
{
|
| 99392 |
+
"epoch": 16.17094017094017,
|
| 99393 |
+
"grad_norm": 0.2183670699596405,
|
| 99394 |
+
"learning_rate": 4.7515957108357965e-06,
|
| 99395 |
+
"loss": 0.7077,
|
| 99396 |
+
"step": 14198
|
| 99397 |
+
},
|
| 99398 |
+
{
|
| 99399 |
+
"epoch": 16.172079772079773,
|
| 99400 |
+
"grad_norm": 0.21603445708751678,
|
| 99401 |
+
"learning_rate": 4.74886336876019e-06,
|
| 99402 |
+
"loss": 0.7866,
|
| 99403 |
+
"step": 14199
|
| 99404 |
+
},
|
| 99405 |
+
{
|
| 99406 |
+
"epoch": 16.173219373219375,
|
| 99407 |
+
"grad_norm": 0.21477492153644562,
|
| 99408 |
+
"learning_rate": 4.74613173006225e-06,
|
| 99409 |
+
"loss": 0.4504,
|
| 99410 |
+
"step": 14200
|
| 99411 |
+
},
|
| 99412 |
+
{
|
| 99413 |
+
"epoch": 16.174358974358974,
|
| 99414 |
+
"grad_norm": 0.19185055792331696,
|
| 99415 |
+
"learning_rate": 4.743400794836855e-06,
|
| 99416 |
+
"loss": 0.6005,
|
| 99417 |
+
"step": 14201
|
| 99418 |
+
},
|
| 99419 |
+
{
|
| 99420 |
+
"epoch": 16.175498575498576,
|
| 99421 |
+
"grad_norm": 0.18981438875198364,
|
| 99422 |
+
"learning_rate": 4.740670563178861e-06,
|
| 99423 |
+
"loss": 0.6728,
|
| 99424 |
+
"step": 14202
|
| 99425 |
+
},
|
| 99426 |
+
{
|
| 99427 |
+
"epoch": 16.17663817663818,
|
| 99428 |
+
"grad_norm": 0.18861323595046997,
|
| 99429 |
+
"learning_rate": 4.737941035183085e-06,
|
| 99430 |
+
"loss": 0.6924,
|
| 99431 |
+
"step": 14203
|
| 99432 |
+
},
|
| 99433 |
+
{
|
| 99434 |
+
"epoch": 16.177777777777777,
|
| 99435 |
+
"grad_norm": 0.1881737858057022,
|
| 99436 |
+
"learning_rate": 4.735212210944337e-06,
|
| 99437 |
+
"loss": 0.7978,
|
| 99438 |
+
"step": 14204
|
| 99439 |
+
},
|
| 99440 |
+
{
|
| 99441 |
+
"epoch": 16.17891737891738,
|
| 99442 |
+
"grad_norm": 0.17678165435791016,
|
| 99443 |
+
"learning_rate": 4.732484090557399e-06,
|
| 99444 |
+
"loss": 0.7861,
|
| 99445 |
+
"step": 14205
|
| 99446 |
+
},
|
| 99447 |
+
{
|
| 99448 |
+
"epoch": 16.180056980056982,
|
| 99449 |
+
"grad_norm": 0.19209317862987518,
|
| 99450 |
+
"learning_rate": 4.7297566741170305e-06,
|
| 99451 |
+
"loss": 0.8984,
|
| 99452 |
+
"step": 14206
|
| 99453 |
+
},
|
| 99454 |
+
{
|
| 99455 |
+
"epoch": 16.18119658119658,
|
| 99456 |
+
"grad_norm": 0.2325279712677002,
|
| 99457 |
+
"learning_rate": 4.727029961717949e-06,
|
| 99458 |
+
"loss": 0.4909,
|
| 99459 |
+
"step": 14207
|
| 99460 |
+
},
|
| 99461 |
+
{
|
| 99462 |
+
"epoch": 16.182336182336183,
|
| 99463 |
+
"grad_norm": 0.24878153204917908,
|
| 99464 |
+
"learning_rate": 4.72430395345487e-06,
|
| 99465 |
+
"loss": 0.5382,
|
| 99466 |
+
"step": 14208
|
| 99467 |
+
},
|
| 99468 |
+
{
|
| 99469 |
+
"epoch": 16.183475783475785,
|
| 99470 |
+
"grad_norm": 0.2192421704530716,
|
| 99471 |
+
"learning_rate": 4.72157864942247e-06,
|
| 99472 |
+
"loss": 0.6141,
|
| 99473 |
+
"step": 14209
|
| 99474 |
+
},
|
| 99475 |
+
{
|
| 99476 |
+
"epoch": 16.184615384615384,
|
| 99477 |
+
"grad_norm": 0.18220512568950653,
|
| 99478 |
+
"learning_rate": 4.71885404971541e-06,
|
| 99479 |
+
"loss": 0.6841,
|
| 99480 |
+
"step": 14210
|
| 99481 |
+
},
|
| 99482 |
+
{
|
| 99483 |
+
"epoch": 16.185754985754986,
|
| 99484 |
+
"grad_norm": 0.16583271324634552,
|
| 99485 |
+
"learning_rate": 4.716130154428325e-06,
|
| 99486 |
+
"loss": 0.7289,
|
| 99487 |
+
"step": 14211
|
| 99488 |
+
},
|
| 99489 |
+
{
|
| 99490 |
+
"epoch": 16.18689458689459,
|
| 99491 |
+
"grad_norm": 0.1805356740951538,
|
| 99492 |
+
"learning_rate": 4.713406963655817e-06,
|
| 99493 |
+
"loss": 0.5646,
|
| 99494 |
+
"step": 14212
|
| 99495 |
+
},
|
| 99496 |
+
{
|
| 99497 |
+
"epoch": 16.188034188034187,
|
| 99498 |
+
"grad_norm": 0.18584349751472473,
|
| 99499 |
+
"learning_rate": 4.710684477492483e-06,
|
| 99500 |
+
"loss": 0.4618,
|
| 99501 |
+
"step": 14213
|
| 99502 |
+
},
|
| 99503 |
+
{
|
| 99504 |
+
"epoch": 16.18917378917379,
|
| 99505 |
+
"grad_norm": 0.20051173865795135,
|
| 99506 |
+
"learning_rate": 4.707962696032864e-06,
|
| 99507 |
+
"loss": 0.5886,
|
| 99508 |
+
"step": 14214
|
| 99509 |
+
},
|
| 99510 |
+
{
|
| 99511 |
+
"epoch": 16.190313390313392,
|
| 99512 |
+
"grad_norm": 0.17054873704910278,
|
| 99513 |
+
"learning_rate": 4.705241619371506e-06,
|
| 99514 |
+
"loss": 0.7782,
|
| 99515 |
+
"step": 14215
|
| 99516 |
+
},
|
| 99517 |
+
{
|
| 99518 |
+
"epoch": 16.19145299145299,
|
| 99519 |
+
"grad_norm": 0.20547950267791748,
|
| 99520 |
+
"learning_rate": 4.702521247602915e-06,
|
| 99521 |
+
"loss": 0.602,
|
| 99522 |
+
"step": 14216
|
| 99523 |
+
},
|
| 99524 |
+
{
|
| 99525 |
+
"epoch": 16.192592592592593,
|
| 99526 |
+
"grad_norm": 0.21678493916988373,
|
| 99527 |
+
"learning_rate": 4.699801580821586e-06,
|
| 99528 |
+
"loss": 0.6294,
|
| 99529 |
+
"step": 14217
|
| 99530 |
+
},
|
| 99531 |
+
{
|
| 99532 |
+
"epoch": 16.193732193732195,
|
| 99533 |
+
"grad_norm": 0.16647404432296753,
|
| 99534 |
+
"learning_rate": 4.69708261912197e-06,
|
| 99535 |
+
"loss": 0.5903,
|
| 99536 |
+
"step": 14218
|
| 99537 |
+
},
|
| 99538 |
+
{
|
| 99539 |
+
"epoch": 16.194871794871794,
|
| 99540 |
+
"grad_norm": 0.1705893576145172,
|
| 99541 |
+
"learning_rate": 4.6943643625985054e-06,
|
| 99542 |
+
"loss": 0.5777,
|
| 99543 |
+
"step": 14219
|
| 99544 |
+
},
|
| 99545 |
+
{
|
| 99546 |
+
"epoch": 16.196011396011396,
|
| 99547 |
+
"grad_norm": 0.22176790237426758,
|
| 99548 |
+
"learning_rate": 4.6916468113456095e-06,
|
| 99549 |
+
"loss": 0.8056,
|
| 99550 |
+
"step": 14220
|
| 99551 |
+
},
|
| 99552 |
+
{
|
| 99553 |
+
"epoch": 16.197150997151,
|
| 99554 |
+
"grad_norm": 0.18766705691814423,
|
| 99555 |
+
"learning_rate": 4.688929965457667e-06,
|
| 99556 |
+
"loss": 0.8236,
|
| 99557 |
+
"step": 14221
|
| 99558 |
+
},
|
| 99559 |
+
{
|
| 99560 |
+
"epoch": 16.198290598290598,
|
| 99561 |
+
"grad_norm": 0.21484479308128357,
|
| 99562 |
+
"learning_rate": 4.68621382502904e-06,
|
| 99563 |
+
"loss": 0.741,
|
| 99564 |
+
"step": 14222
|
| 99565 |
+
},
|
| 99566 |
+
{
|
| 99567 |
+
"epoch": 16.1994301994302,
|
| 99568 |
+
"grad_norm": 0.21632397174835205,
|
| 99569 |
+
"learning_rate": 4.683498390154073e-06,
|
| 99570 |
+
"loss": 0.6077,
|
| 99571 |
+
"step": 14223
|
| 99572 |
+
},
|
| 99573 |
+
{
|
| 99574 |
+
"epoch": 16.200569800569802,
|
| 99575 |
+
"grad_norm": 0.2317371666431427,
|
| 99576 |
+
"learning_rate": 4.680783660927082e-06,
|
| 99577 |
+
"loss": 0.5916,
|
| 99578 |
+
"step": 14224
|
| 99579 |
+
},
|
| 99580 |
+
{
|
| 99581 |
+
"epoch": 16.2017094017094,
|
| 99582 |
+
"grad_norm": 0.2338539958000183,
|
| 99583 |
+
"learning_rate": 4.678069637442348e-06,
|
| 99584 |
+
"loss": 0.6772,
|
| 99585 |
+
"step": 14225
|
| 99586 |
+
},
|
| 99587 |
+
{
|
| 99588 |
+
"epoch": 16.202849002849003,
|
| 99589 |
+
"grad_norm": 0.19969230890274048,
|
| 99590 |
+
"learning_rate": 4.675356319794139e-06,
|
| 99591 |
+
"loss": 0.6911,
|
| 99592 |
+
"step": 14226
|
| 99593 |
+
},
|
| 99594 |
+
{
|
| 99595 |
+
"epoch": 16.203988603988606,
|
| 99596 |
+
"grad_norm": 0.21318411827087402,
|
| 99597 |
+
"learning_rate": 4.6726437080766985e-06,
|
| 99598 |
+
"loss": 0.7214,
|
| 99599 |
+
"step": 14227
|
| 99600 |
+
},
|
| 99601 |
+
{
|
| 99602 |
+
"epoch": 16.205128205128204,
|
| 99603 |
+
"grad_norm": 0.22114770114421844,
|
| 99604 |
+
"learning_rate": 4.6699318023842505e-06,
|
| 99605 |
+
"loss": 0.6869,
|
| 99606 |
+
"step": 14228
|
| 99607 |
+
},
|
| 99608 |
+
{
|
| 99609 |
+
"epoch": 16.206267806267807,
|
| 99610 |
+
"grad_norm": 0.20420998334884644,
|
| 99611 |
+
"learning_rate": 4.667220602810973e-06,
|
| 99612 |
+
"loss": 0.5772,
|
| 99613 |
+
"step": 14229
|
| 99614 |
+
},
|
| 99615 |
+
{
|
| 99616 |
+
"epoch": 16.20740740740741,
|
| 99617 |
+
"grad_norm": 0.1809093952178955,
|
| 99618 |
+
"learning_rate": 4.664510109451037e-06,
|
| 99619 |
+
"loss": 0.7658,
|
| 99620 |
+
"step": 14230
|
| 99621 |
+
},
|
| 99622 |
+
{
|
| 99623 |
+
"epoch": 16.208547008547008,
|
| 99624 |
+
"grad_norm": 0.24055469036102295,
|
| 99625 |
+
"learning_rate": 4.6618003223985875e-06,
|
| 99626 |
+
"loss": 0.6819,
|
| 99627 |
+
"step": 14231
|
| 99628 |
+
},
|
| 99629 |
+
{
|
| 99630 |
+
"epoch": 16.20968660968661,
|
| 99631 |
+
"grad_norm": 0.2183607965707779,
|
| 99632 |
+
"learning_rate": 4.659091241747745e-06,
|
| 99633 |
+
"loss": 0.7314,
|
| 99634 |
+
"step": 14232
|
| 99635 |
+
},
|
| 99636 |
+
{
|
| 99637 |
+
"epoch": 16.210826210826212,
|
| 99638 |
+
"grad_norm": 0.1742558628320694,
|
| 99639 |
+
"learning_rate": 4.656382867592601e-06,
|
| 99640 |
+
"loss": 0.6664,
|
| 99641 |
+
"step": 14233
|
| 99642 |
+
},
|
| 99643 |
+
{
|
| 99644 |
+
"epoch": 16.21196581196581,
|
| 99645 |
+
"grad_norm": 0.2021118849515915,
|
| 99646 |
+
"learning_rate": 4.653675200027224e-06,
|
| 99647 |
+
"loss": 0.6182,
|
| 99648 |
+
"step": 14234
|
| 99649 |
+
},
|
| 99650 |
+
{
|
| 99651 |
+
"epoch": 16.213105413105414,
|
| 99652 |
+
"grad_norm": 0.19149664044380188,
|
| 99653 |
+
"learning_rate": 4.650968239145667e-06,
|
| 99654 |
+
"loss": 0.5857,
|
| 99655 |
+
"step": 14235
|
| 99656 |
+
},
|
| 99657 |
+
{
|
| 99658 |
+
"epoch": 16.214245014245016,
|
| 99659 |
+
"grad_norm": 0.1900215893983841,
|
| 99660 |
+
"learning_rate": 4.648261985041938e-06,
|
| 99661 |
+
"loss": 0.7543,
|
| 99662 |
+
"step": 14236
|
| 99663 |
+
},
|
| 99664 |
+
{
|
| 99665 |
+
"epoch": 16.215384615384615,
|
| 99666 |
+
"grad_norm": 0.17404602468013763,
|
| 99667 |
+
"learning_rate": 4.6455564378100365e-06,
|
| 99668 |
+
"loss": 0.589,
|
| 99669 |
+
"step": 14237
|
| 99670 |
+
},
|
| 99671 |
+
{
|
| 99672 |
+
"epoch": 16.216524216524217,
|
| 99673 |
+
"grad_norm": 0.18508280813694,
|
| 99674 |
+
"learning_rate": 4.642851597543935e-06,
|
| 99675 |
+
"loss": 0.7098,
|
| 99676 |
+
"step": 14238
|
| 99677 |
+
},
|
| 99678 |
+
{
|
| 99679 |
+
"epoch": 16.21766381766382,
|
| 99680 |
+
"grad_norm": 0.2232862412929535,
|
| 99681 |
+
"learning_rate": 4.640147464337577e-06,
|
| 99682 |
+
"loss": 0.4957,
|
| 99683 |
+
"step": 14239
|
| 99684 |
+
},
|
| 99685 |
+
{
|
| 99686 |
+
"epoch": 16.218803418803418,
|
| 99687 |
+
"grad_norm": 0.17864273488521576,
|
| 99688 |
+
"learning_rate": 4.637444038284897e-06,
|
| 99689 |
+
"loss": 0.676,
|
| 99690 |
+
"step": 14240
|
| 99691 |
+
},
|
| 99692 |
+
{
|
| 99693 |
+
"epoch": 16.21994301994302,
|
| 99694 |
+
"grad_norm": 0.15862596035003662,
|
| 99695 |
+
"learning_rate": 4.634741319479777e-06,
|
| 99696 |
+
"loss": 0.7898,
|
| 99697 |
+
"step": 14241
|
| 99698 |
+
},
|
| 99699 |
+
{
|
| 99700 |
+
"epoch": 16.221082621082623,
|
| 99701 |
+
"grad_norm": 0.2464563250541687,
|
| 99702 |
+
"learning_rate": 4.632039308016087e-06,
|
| 99703 |
+
"loss": 0.5794,
|
| 99704 |
+
"step": 14242
|
| 99705 |
+
},
|
| 99706 |
+
{
|
| 99707 |
+
"epoch": 16.22222222222222,
|
| 99708 |
+
"grad_norm": 0.19402630627155304,
|
| 99709 |
+
"learning_rate": 4.629338003987699e-06,
|
| 99710 |
+
"loss": 0.5994,
|
| 99711 |
+
"step": 14243
|
| 99712 |
+
},
|
| 99713 |
+
{
|
| 99714 |
+
"epoch": 16.223361823361824,
|
| 99715 |
+
"grad_norm": 0.18478505313396454,
|
| 99716 |
+
"learning_rate": 4.6266374074884125e-06,
|
| 99717 |
+
"loss": 0.6814,
|
| 99718 |
+
"step": 14244
|
| 99719 |
+
},
|
| 99720 |
+
{
|
| 99721 |
+
"epoch": 16.224501424501426,
|
| 99722 |
+
"grad_norm": 0.16128045320510864,
|
| 99723 |
+
"learning_rate": 4.623937518612037e-06,
|
| 99724 |
+
"loss": 0.8334,
|
| 99725 |
+
"step": 14245
|
| 99726 |
+
},
|
| 99727 |
+
{
|
| 99728 |
+
"epoch": 16.225641025641025,
|
| 99729 |
+
"grad_norm": 0.21322540938854218,
|
| 99730 |
+
"learning_rate": 4.6212383374523465e-06,
|
| 99731 |
+
"loss": 0.5717,
|
| 99732 |
+
"step": 14246
|
| 99733 |
+
},
|
| 99734 |
+
{
|
| 99735 |
+
"epoch": 16.226780626780627,
|
| 99736 |
+
"grad_norm": 0.18833217024803162,
|
| 99737 |
+
"learning_rate": 4.618539864103097e-06,
|
| 99738 |
+
"loss": 0.7203,
|
| 99739 |
+
"step": 14247
|
| 99740 |
+
},
|
| 99741 |
+
{
|
| 99742 |
+
"epoch": 16.22792022792023,
|
| 99743 |
+
"grad_norm": 0.17805443704128265,
|
| 99744 |
+
"learning_rate": 4.615842098658002e-06,
|
| 99745 |
+
"loss": 0.5734,
|
| 99746 |
+
"step": 14248
|
| 99747 |
+
},
|
| 99748 |
+
{
|
| 99749 |
+
"epoch": 16.22905982905983,
|
| 99750 |
+
"grad_norm": 0.2207133173942566,
|
| 99751 |
+
"learning_rate": 4.613145041210765e-06,
|
| 99752 |
+
"loss": 0.6573,
|
| 99753 |
+
"step": 14249
|
| 99754 |
+
},
|
| 99755 |
+
{
|
| 99756 |
+
"epoch": 16.23019943019943,
|
| 99757 |
+
"grad_norm": 0.2588041126728058,
|
| 99758 |
+
"learning_rate": 4.610448691855066e-06,
|
| 99759 |
+
"loss": 0.3913,
|
| 99760 |
+
"step": 14250
|
| 99761 |
+
},
|
| 99762 |
+
{
|
| 99763 |
+
"epoch": 16.231339031339033,
|
| 99764 |
+
"grad_norm": 0.20682033896446228,
|
| 99765 |
+
"learning_rate": 4.60775305068456e-06,
|
| 99766 |
+
"loss": 0.7064,
|
| 99767 |
+
"step": 14251
|
| 99768 |
+
},
|
| 99769 |
+
{
|
| 99770 |
+
"epoch": 16.23247863247863,
|
| 99771 |
+
"grad_norm": 0.18519830703735352,
|
| 99772 |
+
"learning_rate": 4.605058117792865e-06,
|
| 99773 |
+
"loss": 0.4983,
|
| 99774 |
+
"step": 14252
|
| 99775 |
+
},
|
| 99776 |
+
{
|
| 99777 |
+
"epoch": 16.233618233618234,
|
| 99778 |
+
"grad_norm": 0.20930525660514832,
|
| 99779 |
+
"learning_rate": 4.602363893273581e-06,
|
| 99780 |
+
"loss": 0.6482,
|
| 99781 |
+
"step": 14253
|
| 99782 |
+
},
|
| 99783 |
+
{
|
| 99784 |
+
"epoch": 16.234757834757836,
|
| 99785 |
+
"grad_norm": 0.2454683631658554,
|
| 99786 |
+
"learning_rate": 4.5996703772203054e-06,
|
| 99787 |
+
"loss": 0.5938,
|
| 99788 |
+
"step": 14254
|
| 99789 |
+
},
|
| 99790 |
+
{
|
| 99791 |
+
"epoch": 16.235897435897435,
|
| 99792 |
+
"grad_norm": 0.2545921504497528,
|
| 99793 |
+
"learning_rate": 4.5969775697265735e-06,
|
| 99794 |
+
"loss": 0.4597,
|
| 99795 |
+
"step": 14255
|
| 99796 |
+
},
|
| 99797 |
+
{
|
| 99798 |
+
"epoch": 16.237037037037037,
|
| 99799 |
+
"grad_norm": 0.20620526373386383,
|
| 99800 |
+
"learning_rate": 4.594285470885917e-06,
|
| 99801 |
+
"loss": 0.783,
|
| 99802 |
+
"step": 14256
|
| 99803 |
+
},
|
| 99804 |
+
{
|
| 99805 |
+
"epoch": 16.23817663817664,
|
| 99806 |
+
"grad_norm": 0.16898657381534576,
|
| 99807 |
+
"learning_rate": 4.5915940807918444e-06,
|
| 99808 |
+
"loss": 0.6661,
|
| 99809 |
+
"step": 14257
|
| 99810 |
+
},
|
| 99811 |
+
{
|
| 99812 |
+
"epoch": 16.23931623931624,
|
| 99813 |
+
"grad_norm": 0.20109368860721588,
|
| 99814 |
+
"learning_rate": 4.588903399537836e-06,
|
| 99815 |
+
"loss": 0.4781,
|
| 99816 |
+
"step": 14258
|
| 99817 |
+
},
|
| 99818 |
+
{
|
| 99819 |
+
"epoch": 16.24045584045584,
|
| 99820 |
+
"grad_norm": 0.24306416511535645,
|
| 99821 |
+
"learning_rate": 4.586213427217339e-06,
|
| 99822 |
+
"loss": 0.7478,
|
| 99823 |
+
"step": 14259
|
| 99824 |
+
},
|
| 99825 |
+
{
|
| 99826 |
+
"epoch": 16.241595441595443,
|
| 99827 |
+
"grad_norm": 0.20856760442256927,
|
| 99828 |
+
"learning_rate": 4.5835241639237884e-06,
|
| 99829 |
+
"loss": 0.7023,
|
| 99830 |
+
"step": 14260
|
| 99831 |
+
},
|
| 99832 |
+
{
|
| 99833 |
+
"epoch": 16.242735042735042,
|
| 99834 |
+
"grad_norm": 0.16698814928531647,
|
| 99835 |
+
"learning_rate": 4.580835609750589e-06,
|
| 99836 |
+
"loss": 0.6244,
|
| 99837 |
+
"step": 14261
|
| 99838 |
+
},
|
| 99839 |
+
{
|
| 99840 |
+
"epoch": 16.243874643874644,
|
| 99841 |
+
"grad_norm": 0.19303050637245178,
|
| 99842 |
+
"learning_rate": 4.5781477647911265e-06,
|
| 99843 |
+
"loss": 0.6157,
|
| 99844 |
+
"step": 14262
|
| 99845 |
+
},
|
| 99846 |
+
{
|
| 99847 |
+
"epoch": 16.245014245014247,
|
| 99848 |
+
"grad_norm": 0.22583156824111938,
|
| 99849 |
+
"learning_rate": 4.575460629138747e-06,
|
| 99850 |
+
"loss": 0.5981,
|
| 99851 |
+
"step": 14263
|
| 99852 |
+
},
|
| 99853 |
+
{
|
| 99854 |
+
"epoch": 16.246153846153845,
|
| 99855 |
+
"grad_norm": 0.26615431904792786,
|
| 99856 |
+
"learning_rate": 4.5727742028867815e-06,
|
| 99857 |
+
"loss": 0.5091,
|
| 99858 |
+
"step": 14264
|
| 99859 |
+
},
|
| 99860 |
+
{
|
| 99861 |
+
"epoch": 16.247293447293448,
|
| 99862 |
+
"grad_norm": 0.21061669290065765,
|
| 99863 |
+
"learning_rate": 4.570088486128554e-06,
|
| 99864 |
+
"loss": 0.5863,
|
| 99865 |
+
"step": 14265
|
| 99866 |
+
},
|
| 99867 |
+
{
|
| 99868 |
+
"epoch": 16.24843304843305,
|
| 99869 |
+
"grad_norm": 0.167541041970253,
|
| 99870 |
+
"learning_rate": 4.5674034789573325e-06,
|
| 99871 |
+
"loss": 0.6977,
|
| 99872 |
+
"step": 14266
|
| 99873 |
+
},
|
| 99874 |
+
{
|
| 99875 |
+
"epoch": 16.24957264957265,
|
| 99876 |
+
"grad_norm": 0.26731422543525696,
|
| 99877 |
+
"learning_rate": 4.564719181466376e-06,
|
| 99878 |
+
"loss": 0.4772,
|
| 99879 |
+
"step": 14267
|
| 99880 |
+
},
|
| 99881 |
+
{
|
| 99882 |
+
"epoch": 16.25071225071225,
|
| 99883 |
+
"grad_norm": 0.19785575568675995,
|
| 99884 |
+
"learning_rate": 4.56203559374892e-06,
|
| 99885 |
+
"loss": 0.7455,
|
| 99886 |
+
"step": 14268
|
| 99887 |
+
},
|
| 99888 |
+
{
|
| 99889 |
+
"epoch": 16.251851851851853,
|
| 99890 |
+
"grad_norm": 1.1487804651260376,
|
| 99891 |
+
"learning_rate": 4.55935271589818e-06,
|
| 99892 |
+
"loss": 0.6764,
|
| 99893 |
+
"step": 14269
|
| 99894 |
+
},
|
| 99895 |
+
{
|
| 99896 |
+
"epoch": 16.252991452991452,
|
| 99897 |
+
"grad_norm": 0.1889180988073349,
|
| 99898 |
+
"learning_rate": 4.556670548007322e-06,
|
| 99899 |
+
"loss": 0.7167,
|
| 99900 |
+
"step": 14270
|
| 99901 |
+
},
|
| 99902 |
+
{
|
| 99903 |
+
"epoch": 16.254131054131054,
|
| 99904 |
+
"grad_norm": 0.23285332322120667,
|
| 99905 |
+
"learning_rate": 4.553989090169519e-06,
|
| 99906 |
+
"loss": 0.5157,
|
| 99907 |
+
"step": 14271
|
| 99908 |
+
},
|
| 99909 |
+
{
|
| 99910 |
+
"epoch": 16.255270655270657,
|
| 99911 |
+
"grad_norm": 0.1659945249557495,
|
| 99912 |
+
"learning_rate": 4.5513083424779e-06,
|
| 99913 |
+
"loss": 0.6041,
|
| 99914 |
+
"step": 14272
|
| 99915 |
+
},
|
| 99916 |
+
{
|
| 99917 |
+
"epoch": 16.256410256410255,
|
| 99918 |
+
"grad_norm": 0.2029780149459839,
|
| 99919 |
+
"learning_rate": 4.548628305025584e-06,
|
| 99920 |
+
"loss": 0.6255,
|
| 99921 |
+
"step": 14273
|
| 99922 |
+
},
|
| 99923 |
+
{
|
| 99924 |
+
"epoch": 16.257549857549858,
|
| 99925 |
+
"grad_norm": 0.19079919159412384,
|
| 99926 |
+
"learning_rate": 4.54594897790564e-06,
|
| 99927 |
+
"loss": 0.6769,
|
| 99928 |
+
"step": 14274
|
| 99929 |
+
},
|
| 99930 |
+
{
|
| 99931 |
+
"epoch": 16.25868945868946,
|
| 99932 |
+
"grad_norm": 0.182784304022789,
|
| 99933 |
+
"learning_rate": 4.543270361211133e-06,
|
| 99934 |
+
"loss": 0.874,
|
| 99935 |
+
"step": 14275
|
| 99936 |
+
},
|
| 99937 |
+
{
|
| 99938 |
+
"epoch": 16.25982905982906,
|
| 99939 |
+
"grad_norm": 0.18271218240261078,
|
| 99940 |
+
"learning_rate": 4.540592455035108e-06,
|
| 99941 |
+
"loss": 0.6597,
|
| 99942 |
+
"step": 14276
|
| 99943 |
+
},
|
| 99944 |
+
{
|
| 99945 |
+
"epoch": 16.26096866096866,
|
| 99946 |
+
"grad_norm": 0.20095323026180267,
|
| 99947 |
+
"learning_rate": 4.5379152594705765e-06,
|
| 99948 |
+
"loss": 0.6511,
|
| 99949 |
+
"step": 14277
|
| 99950 |
+
},
|
| 99951 |
+
{
|
| 99952 |
+
"epoch": 16.262108262108264,
|
| 99953 |
+
"grad_norm": 0.27629172801971436,
|
| 99954 |
+
"learning_rate": 4.535238774610512e-06,
|
| 99955 |
+
"loss": 0.5178,
|
| 99956 |
+
"step": 14278
|
| 99957 |
+
},
|
| 99958 |
+
{
|
| 99959 |
+
"epoch": 16.263247863247862,
|
| 99960 |
+
"grad_norm": 0.21059632301330566,
|
| 99961 |
+
"learning_rate": 4.532563000547885e-06,
|
| 99962 |
+
"loss": 0.549,
|
| 99963 |
+
"step": 14279
|
| 99964 |
+
},
|
| 99965 |
+
{
|
| 99966 |
+
"epoch": 16.264387464387465,
|
| 99967 |
+
"grad_norm": 0.19386284053325653,
|
| 99968 |
+
"learning_rate": 4.529887937375629e-06,
|
| 99969 |
+
"loss": 0.7525,
|
| 99970 |
+
"step": 14280
|
| 99971 |
+
},
|
| 99972 |
+
{
|
| 99973 |
+
"epoch": 16.265527065527067,
|
| 99974 |
+
"grad_norm": 0.19157487154006958,
|
| 99975 |
+
"learning_rate": 4.527213585186665e-06,
|
| 99976 |
+
"loss": 0.6305,
|
| 99977 |
+
"step": 14281
|
| 99978 |
+
},
|
| 99979 |
+
{
|
| 99980 |
+
"epoch": 16.266666666666666,
|
| 99981 |
+
"grad_norm": 0.21057751774787903,
|
| 99982 |
+
"learning_rate": 4.5245399440738686e-06,
|
| 99983 |
+
"loss": 0.8012,
|
| 99984 |
+
"step": 14282
|
| 99985 |
+
},
|
| 99986 |
+
{
|
| 99987 |
+
"epoch": 16.267806267806268,
|
| 99988 |
+
"grad_norm": 0.1656307429075241,
|
| 99989 |
+
"learning_rate": 4.521867014130104e-06,
|
| 99990 |
+
"loss": 0.7583,
|
| 99991 |
+
"step": 14283
|
| 99992 |
+
},
|
| 99993 |
+
{
|
| 99994 |
+
"epoch": 16.26894586894587,
|
| 99995 |
+
"grad_norm": 0.2325628250837326,
|
| 99996 |
+
"learning_rate": 4.519194795448217e-06,
|
| 99997 |
+
"loss": 0.6343,
|
| 99998 |
+
"step": 14284
|
| 99999 |
+
},
|
| 100000 |
+
{
|
| 100001 |
+
"epoch": 16.27008547008547,
|
| 100002 |
+
"grad_norm": 0.23686346411705017,
|
| 100003 |
+
"learning_rate": 4.516523288121022e-06,
|
| 100004 |
+
"loss": 0.4647,
|
| 100005 |
+
"step": 14285
|
| 100006 |
+
},
|
| 100007 |
+
{
|
| 100008 |
+
"epoch": 16.27122507122507,
|
| 100009 |
+
"grad_norm": 0.207304909825325,
|
| 100010 |
+
"learning_rate": 4.513852492241291e-06,
|
| 100011 |
+
"loss": 0.6472,
|
| 100012 |
+
"step": 14286
|
| 100013 |
+
},
|
| 100014 |
+
{
|
| 100015 |
+
"epoch": 16.272364672364674,
|
| 100016 |
+
"grad_norm": 0.2733539938926697,
|
| 100017 |
+
"learning_rate": 4.511182407901804e-06,
|
| 100018 |
+
"loss": 0.5109,
|
| 100019 |
+
"step": 14287
|
| 100020 |
+
},
|
| 100021 |
+
{
|
| 100022 |
+
"epoch": 16.273504273504273,
|
| 100023 |
+
"grad_norm": 0.2384081482887268,
|
| 100024 |
+
"learning_rate": 4.5085130351953055e-06,
|
| 100025 |
+
"loss": 0.6514,
|
| 100026 |
+
"step": 14288
|
| 100027 |
+
},
|
| 100028 |
+
{
|
| 100029 |
+
"epoch": 16.274643874643875,
|
| 100030 |
+
"grad_norm": 0.18418949842453003,
|
| 100031 |
+
"learning_rate": 4.505844374214494e-06,
|
| 100032 |
+
"loss": 0.6763,
|
| 100033 |
+
"step": 14289
|
| 100034 |
+
},
|
| 100035 |
+
{
|
| 100036 |
+
"epoch": 16.275783475783477,
|
| 100037 |
+
"grad_norm": 0.20137840509414673,
|
| 100038 |
+
"learning_rate": 4.503176425052066e-06,
|
| 100039 |
+
"loss": 0.6515,
|
| 100040 |
+
"step": 14290
|
| 100041 |
+
},
|
| 100042 |
+
{
|
| 100043 |
+
"epoch": 16.276923076923076,
|
| 100044 |
+
"grad_norm": 0.19259755313396454,
|
| 100045 |
+
"learning_rate": 4.500509187800686e-06,
|
| 100046 |
+
"loss": 0.7522,
|
| 100047 |
+
"step": 14291
|
| 100048 |
+
},
|
| 100049 |
+
{
|
| 100050 |
+
"epoch": 16.27806267806268,
|
| 100051 |
+
"grad_norm": 0.343244343996048,
|
| 100052 |
+
"learning_rate": 4.497842662553004e-06,
|
| 100053 |
+
"loss": 0.5417,
|
| 100054 |
+
"step": 14292
|
| 100055 |
+
},
|
| 100056 |
+
{
|
| 100057 |
+
"epoch": 16.27920227920228,
|
| 100058 |
+
"grad_norm": 0.2501336932182312,
|
| 100059 |
+
"learning_rate": 4.49517684940162e-06,
|
| 100060 |
+
"loss": 0.391,
|
| 100061 |
+
"step": 14293
|
| 100062 |
+
},
|
| 100063 |
+
{
|
| 100064 |
+
"epoch": 16.28034188034188,
|
| 100065 |
+
"grad_norm": 0.17898280918598175,
|
| 100066 |
+
"learning_rate": 4.4925117484391345e-06,
|
| 100067 |
+
"loss": 0.5586,
|
| 100068 |
+
"step": 14294
|
| 100069 |
+
},
|
| 100070 |
+
{
|
| 100071 |
+
"epoch": 16.28148148148148,
|
| 100072 |
+
"grad_norm": 0.18999363481998444,
|
| 100073 |
+
"learning_rate": 4.489847359758109e-06,
|
| 100074 |
+
"loss": 0.8255,
|
| 100075 |
+
"step": 14295
|
| 100076 |
+
},
|
| 100077 |
+
{
|
| 100078 |
+
"epoch": 16.282621082621084,
|
| 100079 |
+
"grad_norm": 0.20510265231132507,
|
| 100080 |
+
"learning_rate": 4.487183683451096e-06,
|
| 100081 |
+
"loss": 0.6636,
|
| 100082 |
+
"step": 14296
|
| 100083 |
+
},
|
| 100084 |
+
{
|
| 100085 |
+
"epoch": 16.283760683760683,
|
| 100086 |
+
"grad_norm": 0.17222599685192108,
|
| 100087 |
+
"learning_rate": 4.484520719610591e-06,
|
| 100088 |
+
"loss": 0.6504,
|
| 100089 |
+
"step": 14297
|
| 100090 |
+
},
|
| 100091 |
+
{
|
| 100092 |
+
"epoch": 16.284900284900285,
|
| 100093 |
+
"grad_norm": 0.212445929646492,
|
| 100094 |
+
"learning_rate": 4.481858468329106e-06,
|
| 100095 |
+
"loss": 0.6673,
|
| 100096 |
+
"step": 14298
|
| 100097 |
+
},
|
| 100098 |
+
{
|
| 100099 |
+
"epoch": 16.286039886039887,
|
| 100100 |
+
"grad_norm": 0.200743168592453,
|
| 100101 |
+
"learning_rate": 4.479196929699108e-06,
|
| 100102 |
+
"loss": 0.5258,
|
| 100103 |
+
"step": 14299
|
| 100104 |
+
},
|
| 100105 |
+
{
|
| 100106 |
+
"epoch": 16.287179487179486,
|
| 100107 |
+
"grad_norm": 0.19284288585186005,
|
| 100108 |
+
"learning_rate": 4.476536103813026e-06,
|
| 100109 |
+
"loss": 0.6693,
|
| 100110 |
+
"step": 14300
|
| 100111 |
+
},
|
| 100112 |
+
{
|
| 100113 |
+
"epoch": 16.28831908831909,
|
| 100114 |
+
"grad_norm": 0.2084466516971588,
|
| 100115 |
+
"learning_rate": 4.473875990763285e-06,
|
| 100116 |
+
"loss": 0.8346,
|
| 100117 |
+
"step": 14301
|
| 100118 |
+
},
|
| 100119 |
+
{
|
| 100120 |
+
"epoch": 16.28945868945869,
|
| 100121 |
+
"grad_norm": 0.233940988779068,
|
| 100122 |
+
"learning_rate": 4.471216590642277e-06,
|
| 100123 |
+
"loss": 0.5976,
|
| 100124 |
+
"step": 14302
|
| 100125 |
+
},
|
| 100126 |
+
{
|
| 100127 |
+
"epoch": 16.29059829059829,
|
| 100128 |
+
"grad_norm": 0.17546918988227844,
|
| 100129 |
+
"learning_rate": 4.468557903542378e-06,
|
| 100130 |
+
"loss": 0.6661,
|
| 100131 |
+
"step": 14303
|
| 100132 |
+
},
|
| 100133 |
+
{
|
| 100134 |
+
"epoch": 16.291737891737892,
|
| 100135 |
+
"grad_norm": 0.17741990089416504,
|
| 100136 |
+
"learning_rate": 4.4658999295559195e-06,
|
| 100137 |
+
"loss": 0.608,
|
| 100138 |
+
"step": 14304
|
| 100139 |
+
},
|
| 100140 |
+
{
|
| 100141 |
+
"epoch": 16.292877492877494,
|
| 100142 |
+
"grad_norm": 0.23576615750789642,
|
| 100143 |
+
"learning_rate": 4.463242668775225e-06,
|
| 100144 |
+
"loss": 0.6547,
|
| 100145 |
+
"step": 14305
|
| 100146 |
+
},
|
| 100147 |
+
{
|
| 100148 |
+
"epoch": 16.294017094017093,
|
| 100149 |
+
"grad_norm": 0.1998688131570816,
|
| 100150 |
+
"learning_rate": 4.46058612129259e-06,
|
| 100151 |
+
"loss": 0.7048,
|
| 100152 |
+
"step": 14306
|
| 100153 |
+
},
|
| 100154 |
+
{
|
| 100155 |
+
"epoch": 16.295156695156695,
|
| 100156 |
+
"grad_norm": 0.1796422302722931,
|
| 100157 |
+
"learning_rate": 4.457930287200285e-06,
|
| 100158 |
+
"loss": 0.6466,
|
| 100159 |
+
"step": 14307
|
| 100160 |
+
},
|
| 100161 |
+
{
|
| 100162 |
+
"epoch": 16.296296296296298,
|
| 100163 |
+
"grad_norm": 0.22589747607707977,
|
| 100164 |
+
"learning_rate": 4.4552751665905415e-06,
|
| 100165 |
+
"loss": 0.5144,
|
| 100166 |
+
"step": 14308
|
| 100167 |
+
},
|
| 100168 |
+
{
|
| 100169 |
+
"epoch": 16.297435897435896,
|
| 100170 |
+
"grad_norm": 0.19338934123516083,
|
| 100171 |
+
"learning_rate": 4.452620759555598e-06,
|
| 100172 |
+
"loss": 0.3244,
|
| 100173 |
+
"step": 14309
|
| 100174 |
+
},
|
| 100175 |
+
{
|
| 100176 |
+
"epoch": 16.2985754985755,
|
| 100177 |
+
"grad_norm": 0.17849305272102356,
|
| 100178 |
+
"learning_rate": 4.449967066187646e-06,
|
| 100179 |
+
"loss": 0.7224,
|
| 100180 |
+
"step": 14310
|
| 100181 |
+
},
|
| 100182 |
+
{
|
| 100183 |
+
"epoch": 16.2997150997151,
|
| 100184 |
+
"grad_norm": 0.18624725937843323,
|
| 100185 |
+
"learning_rate": 4.447314086578844e-06,
|
| 100186 |
+
"loss": 0.5929,
|
| 100187 |
+
"step": 14311
|
| 100188 |
+
},
|
| 100189 |
+
{
|
| 100190 |
+
"epoch": 16.3008547008547,
|
| 100191 |
+
"grad_norm": 0.21634441614151,
|
| 100192 |
+
"learning_rate": 4.444661820821344e-06,
|
| 100193 |
+
"loss": 0.6098,
|
| 100194 |
+
"step": 14312
|
| 100195 |
+
},
|
| 100196 |
+
{
|
| 100197 |
+
"epoch": 16.301994301994302,
|
| 100198 |
+
"grad_norm": 0.1766114979982376,
|
| 100199 |
+
"learning_rate": 4.442010269007266e-06,
|
| 100200 |
+
"loss": 0.6877,
|
| 100201 |
+
"step": 14313
|
| 100202 |
+
},
|
| 100203 |
+
{
|
| 100204 |
+
"epoch": 16.303133903133904,
|
| 100205 |
+
"grad_norm": 0.2100430727005005,
|
| 100206 |
+
"learning_rate": 4.4393594312287126e-06,
|
| 100207 |
+
"loss": 0.7294,
|
| 100208 |
+
"step": 14314
|
| 100209 |
+
},
|
| 100210 |
+
{
|
| 100211 |
+
"epoch": 16.304273504273503,
|
| 100212 |
+
"grad_norm": 0.2034444361925125,
|
| 100213 |
+
"learning_rate": 4.436709307577741e-06,
|
| 100214 |
+
"loss": 0.5865,
|
| 100215 |
+
"step": 14315
|
| 100216 |
+
},
|
| 100217 |
+
{
|
| 100218 |
+
"epoch": 16.305413105413106,
|
| 100219 |
+
"grad_norm": 0.18770147860050201,
|
| 100220 |
+
"learning_rate": 4.434059898146406e-06,
|
| 100221 |
+
"loss": 0.6676,
|
| 100222 |
+
"step": 14316
|
| 100223 |
+
},
|
| 100224 |
+
{
|
| 100225 |
+
"epoch": 16.306552706552708,
|
| 100226 |
+
"grad_norm": 0.19414395093917847,
|
| 100227 |
+
"learning_rate": 4.431411203026728e-06,
|
| 100228 |
+
"loss": 0.7387,
|
| 100229 |
+
"step": 14317
|
| 100230 |
+
},
|
| 100231 |
+
{
|
| 100232 |
+
"epoch": 16.307692307692307,
|
| 100233 |
+
"grad_norm": 0.1796029955148697,
|
| 100234 |
+
"learning_rate": 4.4287632223107e-06,
|
| 100235 |
+
"loss": 0.5206,
|
| 100236 |
+
"step": 14318
|
| 100237 |
+
},
|
| 100238 |
+
{
|
| 100239 |
+
"epoch": 16.30883190883191,
|
| 100240 |
+
"grad_norm": 0.17089225351810455,
|
| 100241 |
+
"learning_rate": 4.4261159560902985e-06,
|
| 100242 |
+
"loss": 0.6783,
|
| 100243 |
+
"step": 14319
|
| 100244 |
+
},
|
| 100245 |
+
{
|
| 100246 |
+
"epoch": 16.30997150997151,
|
| 100247 |
+
"grad_norm": 0.18960067629814148,
|
| 100248 |
+
"learning_rate": 4.423469404457467e-06,
|
| 100249 |
+
"loss": 0.5921,
|
| 100250 |
+
"step": 14320
|
| 100251 |
+
},
|
| 100252 |
+
{
|
| 100253 |
+
"epoch": 16.31111111111111,
|
| 100254 |
+
"grad_norm": 0.17689311504364014,
|
| 100255 |
+
"learning_rate": 4.420823567504129e-06,
|
| 100256 |
+
"loss": 0.8011,
|
| 100257 |
+
"step": 14321
|
| 100258 |
+
},
|
| 100259 |
+
{
|
| 100260 |
+
"epoch": 16.312250712250712,
|
| 100261 |
+
"grad_norm": 0.20006713271141052,
|
| 100262 |
+
"learning_rate": 4.418178445322188e-06,
|
| 100263 |
+
"loss": 0.663,
|
| 100264 |
+
"step": 14322
|
| 100265 |
+
},
|
| 100266 |
+
{
|
| 100267 |
+
"epoch": 16.313390313390315,
|
| 100268 |
+
"grad_norm": 0.1775377094745636,
|
| 100269 |
+
"learning_rate": 4.4155340380035045e-06,
|
| 100270 |
+
"loss": 0.5213,
|
| 100271 |
+
"step": 14323
|
| 100272 |
+
},
|
| 100273 |
+
{
|
| 100274 |
+
"epoch": 16.314529914529913,
|
| 100275 |
+
"grad_norm": 0.32989034056663513,
|
| 100276 |
+
"learning_rate": 4.412890345639931e-06,
|
| 100277 |
+
"loss": 0.6832,
|
| 100278 |
+
"step": 14324
|
| 100279 |
+
},
|
| 100280 |
+
{
|
| 100281 |
+
"epoch": 16.315669515669516,
|
| 100282 |
+
"grad_norm": 0.21517755091190338,
|
| 100283 |
+
"learning_rate": 4.410247368323289e-06,
|
| 100284 |
+
"loss": 0.6429,
|
| 100285 |
+
"step": 14325
|
| 100286 |
+
},
|
| 100287 |
+
{
|
| 100288 |
+
"epoch": 16.316809116809118,
|
| 100289 |
+
"grad_norm": 0.20794017612934113,
|
| 100290 |
+
"learning_rate": 4.407605106145385e-06,
|
| 100291 |
+
"loss": 0.7617,
|
| 100292 |
+
"step": 14326
|
| 100293 |
+
},
|
| 100294 |
+
{
|
| 100295 |
+
"epoch": 16.317948717948717,
|
| 100296 |
+
"grad_norm": 0.21983087062835693,
|
| 100297 |
+
"learning_rate": 4.40496355919798e-06,
|
| 100298 |
+
"loss": 0.4611,
|
| 100299 |
+
"step": 14327
|
| 100300 |
+
},
|
| 100301 |
+
{
|
| 100302 |
+
"epoch": 16.31908831908832,
|
| 100303 |
+
"grad_norm": 0.19113603234291077,
|
| 100304 |
+
"learning_rate": 4.402322727572825e-06,
|
| 100305 |
+
"loss": 0.7669,
|
| 100306 |
+
"step": 14328
|
| 100307 |
+
},
|
| 100308 |
+
{
|
| 100309 |
+
"epoch": 16.32022792022792,
|
| 100310 |
+
"grad_norm": 0.21938566863536835,
|
| 100311 |
+
"learning_rate": 4.399682611361647e-06,
|
| 100312 |
+
"loss": 0.6253,
|
| 100313 |
+
"step": 14329
|
| 100314 |
+
},
|
| 100315 |
+
{
|
| 100316 |
+
"epoch": 16.32136752136752,
|
| 100317 |
+
"grad_norm": 0.18340864777565002,
|
| 100318 |
+
"learning_rate": 4.397043210656143e-06,
|
| 100319 |
+
"loss": 0.6056,
|
| 100320 |
+
"step": 14330
|
| 100321 |
+
},
|
| 100322 |
+
{
|
| 100323 |
+
"epoch": 16.322507122507123,
|
| 100324 |
+
"grad_norm": 0.2354237288236618,
|
| 100325 |
+
"learning_rate": 4.394404525547985e-06,
|
| 100326 |
+
"loss": 0.5475,
|
| 100327 |
+
"step": 14331
|
| 100328 |
+
},
|
| 100329 |
+
{
|
| 100330 |
+
"epoch": 16.323646723646725,
|
| 100331 |
+
"grad_norm": 0.2733052372932434,
|
| 100332 |
+
"learning_rate": 4.391766556128826e-06,
|
| 100333 |
+
"loss": 0.4954,
|
| 100334 |
+
"step": 14332
|
| 100335 |
+
},
|
| 100336 |
+
{
|
| 100337 |
+
"epoch": 16.324786324786324,
|
| 100338 |
+
"grad_norm": 0.19026166200637817,
|
| 100339 |
+
"learning_rate": 4.389129302490291e-06,
|
| 100340 |
+
"loss": 0.7062,
|
| 100341 |
+
"step": 14333
|
| 100342 |
+
},
|
| 100343 |
+
{
|
| 100344 |
+
"epoch": 16.325925925925926,
|
| 100345 |
+
"grad_norm": 0.225381001830101,
|
| 100346 |
+
"learning_rate": 4.386492764723971e-06,
|
| 100347 |
+
"loss": 0.8039,
|
| 100348 |
+
"step": 14334
|
| 100349 |
+
},
|
| 100350 |
+
{
|
| 100351 |
+
"epoch": 16.32706552706553,
|
| 100352 |
+
"grad_norm": 0.1898542195558548,
|
| 100353 |
+
"learning_rate": 4.3838569429214435e-06,
|
| 100354 |
+
"loss": 0.6213,
|
| 100355 |
+
"step": 14335
|
| 100356 |
+
},
|
| 100357 |
+
{
|
| 100358 |
+
"epoch": 16.328205128205127,
|
| 100359 |
+
"grad_norm": 0.23803776502609253,
|
| 100360 |
+
"learning_rate": 4.38122183717426e-06,
|
| 100361 |
+
"loss": 0.6764,
|
| 100362 |
+
"step": 14336
|
| 100363 |
+
},
|
| 100364 |
+
{
|
| 100365 |
+
"epoch": 16.32934472934473,
|
| 100366 |
+
"grad_norm": 0.1944434940814972,
|
| 100367 |
+
"learning_rate": 4.378587447573951e-06,
|
| 100368 |
+
"loss": 0.7895,
|
| 100369 |
+
"step": 14337
|
| 100370 |
+
},
|
| 100371 |
+
{
|
| 100372 |
+
"epoch": 16.33048433048433,
|
| 100373 |
+
"grad_norm": 0.1940242499113083,
|
| 100374 |
+
"learning_rate": 4.375953774211999e-06,
|
| 100375 |
+
"loss": 0.6543,
|
| 100376 |
+
"step": 14338
|
| 100377 |
+
},
|
| 100378 |
+
{
|
| 100379 |
+
"epoch": 16.33162393162393,
|
| 100380 |
+
"grad_norm": 0.202335387468338,
|
| 100381 |
+
"learning_rate": 4.3733208171798934e-06,
|
| 100382 |
+
"loss": 0.8032,
|
| 100383 |
+
"step": 14339
|
| 100384 |
+
},
|
| 100385 |
+
{
|
| 100386 |
+
"epoch": 16.332763532763533,
|
| 100387 |
+
"grad_norm": 0.22755244374275208,
|
| 100388 |
+
"learning_rate": 4.370688576569076e-06,
|
| 100389 |
+
"loss": 0.6037,
|
| 100390 |
+
"step": 14340
|
| 100391 |
+
},
|
| 100392 |
+
{
|
| 100393 |
+
"epoch": 16.333903133903135,
|
| 100394 |
+
"grad_norm": 0.18921247124671936,
|
| 100395 |
+
"learning_rate": 4.368057052470978e-06,
|
| 100396 |
+
"loss": 0.749,
|
| 100397 |
+
"step": 14341
|
| 100398 |
+
},
|
| 100399 |
+
{
|
| 100400 |
+
"epoch": 16.335042735042734,
|
| 100401 |
+
"grad_norm": 0.17129771411418915,
|
| 100402 |
+
"learning_rate": 4.3654262449769946e-06,
|
| 100403 |
+
"loss": 0.6225,
|
| 100404 |
+
"step": 14342
|
| 100405 |
+
},
|
| 100406 |
+
{
|
| 100407 |
+
"epoch": 16.336182336182336,
|
| 100408 |
+
"grad_norm": 0.1838228851556778,
|
| 100409 |
+
"learning_rate": 4.362796154178503e-06,
|
| 100410 |
+
"loss": 0.8151,
|
| 100411 |
+
"step": 14343
|
| 100412 |
+
},
|
| 100413 |
+
{
|
| 100414 |
+
"epoch": 16.33732193732194,
|
| 100415 |
+
"grad_norm": 0.20149587094783783,
|
| 100416 |
+
"learning_rate": 4.360166780166861e-06,
|
| 100417 |
+
"loss": 0.6324,
|
| 100418 |
+
"step": 14344
|
| 100419 |
+
},
|
| 100420 |
+
{
|
| 100421 |
+
"epoch": 16.338461538461537,
|
| 100422 |
+
"grad_norm": 0.25393611192703247,
|
| 100423 |
+
"learning_rate": 4.357538123033378e-06,
|
| 100424 |
+
"loss": 0.2834,
|
| 100425 |
+
"step": 14345
|
| 100426 |
+
},
|
| 100427 |
+
{
|
| 100428 |
+
"epoch": 16.33960113960114,
|
| 100429 |
+
"grad_norm": 0.18815156817436218,
|
| 100430 |
+
"learning_rate": 4.354910182869365e-06,
|
| 100431 |
+
"loss": 0.8636,
|
| 100432 |
+
"step": 14346
|
| 100433 |
+
},
|
| 100434 |
+
{
|
| 100435 |
+
"epoch": 16.340740740740742,
|
| 100436 |
+
"grad_norm": 0.17681847512722015,
|
| 100437 |
+
"learning_rate": 4.352282959766096e-06,
|
| 100438 |
+
"loss": 0.7832,
|
| 100439 |
+
"step": 14347
|
| 100440 |
+
},
|
| 100441 |
+
{
|
| 100442 |
+
"epoch": 16.34188034188034,
|
| 100443 |
+
"grad_norm": 0.20507217943668365,
|
| 100444 |
+
"learning_rate": 4.349656453814827e-06,
|
| 100445 |
+
"loss": 0.5741,
|
| 100446 |
+
"step": 14348
|
| 100447 |
+
},
|
| 100448 |
+
{
|
| 100449 |
+
"epoch": 16.343019943019943,
|
| 100450 |
+
"grad_norm": 0.2923872768878937,
|
| 100451 |
+
"learning_rate": 4.347030665106772e-06,
|
| 100452 |
+
"loss": 0.4988,
|
| 100453 |
+
"step": 14349
|
| 100454 |
+
},
|
| 100455 |
+
{
|
| 100456 |
+
"epoch": 16.344159544159545,
|
| 100457 |
+
"grad_norm": 0.2339613139629364,
|
| 100458 |
+
"learning_rate": 4.344405593733136e-06,
|
| 100459 |
+
"loss": 0.5456,
|
| 100460 |
+
"step": 14350
|
| 100461 |
+
},
|
| 100462 |
+
{
|
| 100463 |
+
"epoch": 16.345299145299144,
|
| 100464 |
+
"grad_norm": 0.1596457064151764,
|
| 100465 |
+
"learning_rate": 4.341781239785098e-06,
|
| 100466 |
+
"loss": 0.8624,
|
| 100467 |
+
"step": 14351
|
| 100468 |
+
},
|
| 100469 |
+
{
|
| 100470 |
+
"epoch": 16.346438746438746,
|
| 100471 |
+
"grad_norm": 0.19932234287261963,
|
| 100472 |
+
"learning_rate": 4.339157603353808e-06,
|
| 100473 |
+
"loss": 0.8029,
|
| 100474 |
+
"step": 14352
|
| 100475 |
+
},
|
| 100476 |
+
{
|
| 100477 |
+
"epoch": 16.34757834757835,
|
| 100478 |
+
"grad_norm": 0.20804062485694885,
|
| 100479 |
+
"learning_rate": 4.336534684530391e-06,
|
| 100480 |
+
"loss": 0.6707,
|
| 100481 |
+
"step": 14353
|
| 100482 |
+
},
|
| 100483 |
+
{
|
| 100484 |
+
"epoch": 16.348717948717947,
|
| 100485 |
+
"grad_norm": 0.18091614544391632,
|
| 100486 |
+
"learning_rate": 4.33391248340595e-06,
|
| 100487 |
+
"loss": 0.6526,
|
| 100488 |
+
"step": 14354
|
| 100489 |
+
},
|
| 100490 |
+
{
|
| 100491 |
+
"epoch": 16.34985754985755,
|
| 100492 |
+
"grad_norm": 0.23857539892196655,
|
| 100493 |
+
"learning_rate": 4.331291000071561e-06,
|
| 100494 |
+
"loss": 0.8791,
|
| 100495 |
+
"step": 14355
|
| 100496 |
+
},
|
| 100497 |
+
{
|
| 100498 |
+
"epoch": 16.350997150997152,
|
| 100499 |
+
"grad_norm": 0.23884528875350952,
|
| 100500 |
+
"learning_rate": 4.3286702346182815e-06,
|
| 100501 |
+
"loss": 0.6356,
|
| 100502 |
+
"step": 14356
|
| 100503 |
+
},
|
| 100504 |
+
{
|
| 100505 |
+
"epoch": 16.35213675213675,
|
| 100506 |
+
"grad_norm": 0.22342674434185028,
|
| 100507 |
+
"learning_rate": 4.326050187137123e-06,
|
| 100508 |
+
"loss": 0.608,
|
| 100509 |
+
"step": 14357
|
| 100510 |
+
},
|
| 100511 |
+
{
|
| 100512 |
+
"epoch": 16.353276353276353,
|
| 100513 |
+
"grad_norm": 0.2086082547903061,
|
| 100514 |
+
"learning_rate": 4.323430857719097e-06,
|
| 100515 |
+
"loss": 0.666,
|
| 100516 |
+
"step": 14358
|
| 100517 |
+
},
|
| 100518 |
+
{
|
| 100519 |
+
"epoch": 16.354415954415956,
|
| 100520 |
+
"grad_norm": 0.21723128855228424,
|
| 100521 |
+
"learning_rate": 4.320812246455175e-06,
|
| 100522 |
+
"loss": 0.6238,
|
| 100523 |
+
"step": 14359
|
| 100524 |
+
},
|
| 100525 |
+
{
|
| 100526 |
+
"epoch": 16.355555555555554,
|
| 100527 |
+
"grad_norm": 0.2092699557542801,
|
| 100528 |
+
"learning_rate": 4.3181943534363225e-06,
|
| 100529 |
+
"loss": 0.5761,
|
| 100530 |
+
"step": 14360
|
| 100531 |
+
},
|
| 100532 |
+
{
|
| 100533 |
+
"epoch": 16.356695156695157,
|
| 100534 |
+
"grad_norm": 0.22917023301124573,
|
| 100535 |
+
"learning_rate": 4.315577178753444e-06,
|
| 100536 |
+
"loss": 0.6894,
|
| 100537 |
+
"step": 14361
|
| 100538 |
+
},
|
| 100539 |
+
{
|
| 100540 |
+
"epoch": 16.35783475783476,
|
| 100541 |
+
"grad_norm": 0.24297906458377838,
|
| 100542 |
+
"learning_rate": 4.312960722497455e-06,
|
| 100543 |
+
"loss": 0.5083,
|
| 100544 |
+
"step": 14362
|
| 100545 |
+
},
|
| 100546 |
+
{
|
| 100547 |
+
"epoch": 16.358974358974358,
|
| 100548 |
+
"grad_norm": 0.21671253442764282,
|
| 100549 |
+
"learning_rate": 4.310344984759229e-06,
|
| 100550 |
+
"loss": 0.5863,
|
| 100551 |
+
"step": 14363
|
| 100552 |
+
},
|
| 100553 |
+
{
|
| 100554 |
+
"epoch": 16.36011396011396,
|
| 100555 |
+
"grad_norm": 0.19929228723049164,
|
| 100556 |
+
"learning_rate": 4.30772996562962e-06,
|
| 100557 |
+
"loss": 0.6084,
|
| 100558 |
+
"step": 14364
|
| 100559 |
+
},
|
| 100560 |
+
{
|
| 100561 |
+
"epoch": 16.361253561253562,
|
| 100562 |
+
"grad_norm": 0.17057958245277405,
|
| 100563 |
+
"learning_rate": 4.305115665199453e-06,
|
| 100564 |
+
"loss": 0.6095,
|
| 100565 |
+
"step": 14365
|
| 100566 |
+
},
|
| 100567 |
+
{
|
| 100568 |
+
"epoch": 16.36239316239316,
|
| 100569 |
+
"grad_norm": 0.25199905037879944,
|
| 100570 |
+
"learning_rate": 4.302502083559529e-06,
|
| 100571 |
+
"loss": 0.7987,
|
| 100572 |
+
"step": 14366
|
| 100573 |
+
},
|
| 100574 |
+
{
|
| 100575 |
+
"epoch": 16.363532763532763,
|
| 100576 |
+
"grad_norm": 0.2120455950498581,
|
| 100577 |
+
"learning_rate": 4.299889220800632e-06,
|
| 100578 |
+
"loss": 0.7455,
|
| 100579 |
+
"step": 14367
|
| 100580 |
+
},
|
| 100581 |
+
{
|
| 100582 |
+
"epoch": 16.364672364672366,
|
| 100583 |
+
"grad_norm": 0.19496478140354156,
|
| 100584 |
+
"learning_rate": 4.2972770770135035e-06,
|
| 100585 |
+
"loss": 0.6692,
|
| 100586 |
+
"step": 14368
|
| 100587 |
+
},
|
| 100588 |
+
{
|
| 100589 |
+
"epoch": 16.365811965811965,
|
| 100590 |
+
"grad_norm": 0.18825624883174896,
|
| 100591 |
+
"learning_rate": 4.294665652288873e-06,
|
| 100592 |
+
"loss": 0.796,
|
| 100593 |
+
"step": 14369
|
| 100594 |
+
},
|
| 100595 |
+
{
|
| 100596 |
+
"epoch": 16.366951566951567,
|
| 100597 |
+
"grad_norm": 0.21346953511238098,
|
| 100598 |
+
"learning_rate": 4.292054946717444e-06,
|
| 100599 |
+
"loss": 0.5156,
|
| 100600 |
+
"step": 14370
|
| 100601 |
+
},
|
| 100602 |
+
{
|
| 100603 |
+
"epoch": 16.36809116809117,
|
| 100604 |
+
"grad_norm": 0.19936388731002808,
|
| 100605 |
+
"learning_rate": 4.2894449603899015e-06,
|
| 100606 |
+
"loss": 0.6216,
|
| 100607 |
+
"step": 14371
|
| 100608 |
+
},
|
| 100609 |
+
{
|
| 100610 |
+
"epoch": 16.369230769230768,
|
| 100611 |
+
"grad_norm": 0.25102996826171875,
|
| 100612 |
+
"learning_rate": 4.286835693396885e-06,
|
| 100613 |
+
"loss": 0.5496,
|
| 100614 |
+
"step": 14372
|
| 100615 |
+
},
|
| 100616 |
+
{
|
| 100617 |
+
"epoch": 16.37037037037037,
|
| 100618 |
+
"grad_norm": 0.21934431791305542,
|
| 100619 |
+
"learning_rate": 4.284227145829023e-06,
|
| 100620 |
+
"loss": 0.5757,
|
| 100621 |
+
"step": 14373
|
| 100622 |
+
},
|
| 100623 |
+
{
|
| 100624 |
+
"epoch": 16.371509971509973,
|
| 100625 |
+
"grad_norm": 0.16905446350574493,
|
| 100626 |
+
"learning_rate": 4.281619317776922e-06,
|
| 100627 |
+
"loss": 0.9319,
|
| 100628 |
+
"step": 14374
|
| 100629 |
+
},
|
| 100630 |
+
{
|
| 100631 |
+
"epoch": 16.37264957264957,
|
| 100632 |
+
"grad_norm": 0.17735671997070312,
|
| 100633 |
+
"learning_rate": 4.279012209331157e-06,
|
| 100634 |
+
"loss": 0.7828,
|
| 100635 |
+
"step": 14375
|
| 100636 |
+
},
|
| 100637 |
+
{
|
| 100638 |
+
"epoch": 16.373789173789174,
|
| 100639 |
+
"grad_norm": 0.17744825780391693,
|
| 100640 |
+
"learning_rate": 4.276405820582282e-06,
|
| 100641 |
+
"loss": 0.5597,
|
| 100642 |
+
"step": 14376
|
| 100643 |
+
},
|
| 100644 |
+
{
|
| 100645 |
+
"epoch": 16.374928774928776,
|
| 100646 |
+
"grad_norm": 0.1992998868227005,
|
| 100647 |
+
"learning_rate": 4.273800151620824e-06,
|
| 100648 |
+
"loss": 0.8753,
|
| 100649 |
+
"step": 14377
|
| 100650 |
+
},
|
| 100651 |
+
{
|
| 100652 |
+
"epoch": 16.376068376068375,
|
| 100653 |
+
"grad_norm": 0.1772407591342926,
|
| 100654 |
+
"learning_rate": 4.27119520253729e-06,
|
| 100655 |
+
"loss": 0.6356,
|
| 100656 |
+
"step": 14378
|
| 100657 |
+
},
|
| 100658 |
+
{
|
| 100659 |
+
"epoch": 16.377207977207977,
|
| 100660 |
+
"grad_norm": 0.2159920334815979,
|
| 100661 |
+
"learning_rate": 4.268590973422143e-06,
|
| 100662 |
+
"loss": 0.7826,
|
| 100663 |
+
"step": 14379
|
| 100664 |
+
},
|
| 100665 |
+
{
|
| 100666 |
+
"epoch": 16.37834757834758,
|
| 100667 |
+
"grad_norm": 0.24267174303531647,
|
| 100668 |
+
"learning_rate": 4.265987464365842e-06,
|
| 100669 |
+
"loss": 0.5721,
|
| 100670 |
+
"step": 14380
|
| 100671 |
+
},
|
| 100672 |
+
{
|
| 100673 |
+
"epoch": 16.379487179487178,
|
| 100674 |
+
"grad_norm": 0.25559645891189575,
|
| 100675 |
+
"learning_rate": 4.2633846754588175e-06,
|
| 100676 |
+
"loss": 0.5517,
|
| 100677 |
+
"step": 14381
|
| 100678 |
+
},
|
| 100679 |
+
{
|
| 100680 |
+
"epoch": 16.38062678062678,
|
| 100681 |
+
"grad_norm": 0.22765012085437775,
|
| 100682 |
+
"learning_rate": 4.260782606791475e-06,
|
| 100683 |
+
"loss": 0.4509,
|
| 100684 |
+
"step": 14382
|
| 100685 |
+
},
|
| 100686 |
+
{
|
| 100687 |
+
"epoch": 16.381766381766383,
|
| 100688 |
+
"grad_norm": 0.2365788072347641,
|
| 100689 |
+
"learning_rate": 4.2581812584541805e-06,
|
| 100690 |
+
"loss": 0.7347,
|
| 100691 |
+
"step": 14383
|
| 100692 |
+
},
|
| 100693 |
+
{
|
| 100694 |
+
"epoch": 16.38290598290598,
|
| 100695 |
+
"grad_norm": 0.19298528134822845,
|
| 100696 |
+
"learning_rate": 4.25558063053729e-06,
|
| 100697 |
+
"loss": 0.6287,
|
| 100698 |
+
"step": 14384
|
| 100699 |
+
},
|
| 100700 |
+
{
|
| 100701 |
+
"epoch": 16.384045584045584,
|
| 100702 |
+
"grad_norm": 0.16975969076156616,
|
| 100703 |
+
"learning_rate": 4.252980723131131e-06,
|
| 100704 |
+
"loss": 0.9887,
|
| 100705 |
+
"step": 14385
|
| 100706 |
+
},
|
| 100707 |
+
{
|
| 100708 |
+
"epoch": 16.385185185185186,
|
| 100709 |
+
"grad_norm": 0.19433729350566864,
|
| 100710 |
+
"learning_rate": 4.250381536326006e-06,
|
| 100711 |
+
"loss": 0.6136,
|
| 100712 |
+
"step": 14386
|
| 100713 |
+
},
|
| 100714 |
+
{
|
| 100715 |
+
"epoch": 16.386324786324785,
|
| 100716 |
+
"grad_norm": 0.23822815716266632,
|
| 100717 |
+
"learning_rate": 4.247783070212194e-06,
|
| 100718 |
+
"loss": 0.5435,
|
| 100719 |
+
"step": 14387
|
| 100720 |
+
},
|
| 100721 |
+
{
|
| 100722 |
+
"epoch": 16.387464387464387,
|
| 100723 |
+
"grad_norm": 0.16016489267349243,
|
| 100724 |
+
"learning_rate": 4.245185324879941e-06,
|
| 100725 |
+
"loss": 0.6939,
|
| 100726 |
+
"step": 14388
|
| 100727 |
+
},
|
| 100728 |
+
{
|
| 100729 |
+
"epoch": 16.38860398860399,
|
| 100730 |
+
"grad_norm": 0.22926373779773712,
|
| 100731 |
+
"learning_rate": 4.2425883004194865e-06,
|
| 100732 |
+
"loss": 0.5478,
|
| 100733 |
+
"step": 14389
|
| 100734 |
+
},
|
| 100735 |
+
{
|
| 100736 |
+
"epoch": 16.38974358974359,
|
| 100737 |
+
"grad_norm": 0.21683046221733093,
|
| 100738 |
+
"learning_rate": 4.2399919969210164e-06,
|
| 100739 |
+
"loss": 0.7768,
|
| 100740 |
+
"step": 14390
|
| 100741 |
+
},
|
| 100742 |
+
{
|
| 100743 |
+
"epoch": 16.39088319088319,
|
| 100744 |
+
"grad_norm": 0.21870996057987213,
|
| 100745 |
+
"learning_rate": 4.237396414474715e-06,
|
| 100746 |
+
"loss": 0.5079,
|
| 100747 |
+
"step": 14391
|
| 100748 |
+
},
|
| 100749 |
+
{
|
| 100750 |
+
"epoch": 16.392022792022793,
|
| 100751 |
+
"grad_norm": 0.2075919359922409,
|
| 100752 |
+
"learning_rate": 4.234801553170734e-06,
|
| 100753 |
+
"loss": 0.6293,
|
| 100754 |
+
"step": 14392
|
| 100755 |
+
},
|
| 100756 |
+
{
|
| 100757 |
+
"epoch": 16.39316239316239,
|
| 100758 |
+
"grad_norm": 0.20165051519870758,
|
| 100759 |
+
"learning_rate": 4.232207413099204e-06,
|
| 100760 |
+
"loss": 0.6944,
|
| 100761 |
+
"step": 14393
|
| 100762 |
+
},
|
| 100763 |
+
{
|
| 100764 |
+
"epoch": 16.394301994301994,
|
| 100765 |
+
"grad_norm": 0.2192036360502243,
|
| 100766 |
+
"learning_rate": 4.2296139943502175e-06,
|
| 100767 |
+
"loss": 0.4595,
|
| 100768 |
+
"step": 14394
|
| 100769 |
+
},
|
| 100770 |
+
{
|
| 100771 |
+
"epoch": 16.395441595441596,
|
| 100772 |
+
"grad_norm": 0.22861096262931824,
|
| 100773 |
+
"learning_rate": 4.227021297013856e-06,
|
| 100774 |
+
"loss": 0.6534,
|
| 100775 |
+
"step": 14395
|
| 100776 |
+
},
|
| 100777 |
+
{
|
| 100778 |
+
"epoch": 16.396581196581195,
|
| 100779 |
+
"grad_norm": 0.18797266483306885,
|
| 100780 |
+
"learning_rate": 4.22442932118017e-06,
|
| 100781 |
+
"loss": 0.6111,
|
| 100782 |
+
"step": 14396
|
| 100783 |
+
},
|
| 100784 |
+
{
|
| 100785 |
+
"epoch": 16.397720797720797,
|
| 100786 |
+
"grad_norm": 0.1839674562215805,
|
| 100787 |
+
"learning_rate": 4.221838066939187e-06,
|
| 100788 |
+
"loss": 0.6883,
|
| 100789 |
+
"step": 14397
|
| 100790 |
+
},
|
| 100791 |
+
{
|
| 100792 |
+
"epoch": 16.3988603988604,
|
| 100793 |
+
"grad_norm": 0.22609136998653412,
|
| 100794 |
+
"learning_rate": 4.219247534380907e-06,
|
| 100795 |
+
"loss": 0.6929,
|
| 100796 |
+
"step": 14398
|
| 100797 |
+
},
|
| 100798 |
+
{
|
| 100799 |
+
"epoch": 16.4,
|
| 100800 |
+
"grad_norm": 0.18066316843032837,
|
| 100801 |
+
"learning_rate": 4.216657723595311e-06,
|
| 100802 |
+
"loss": 0.6833,
|
| 100803 |
+
"step": 14399
|
| 100804 |
+
},
|
| 100805 |
+
{
|
| 100806 |
+
"epoch": 16.4011396011396,
|
| 100807 |
+
"grad_norm": 0.21688348054885864,
|
| 100808 |
+
"learning_rate": 4.214068634672344e-06,
|
| 100809 |
+
"loss": 0.4891,
|
| 100810 |
+
"step": 14400
|
| 100811 |
}
|
| 100812 |
],
|
| 100813 |
"logging_steps": 1,
|
|
|
|
| 100827 |
"attributes": {}
|
| 100828 |
}
|
| 100829 |
},
|
| 100830 |
+
"total_flos": 8.051358816826049e+19,
|
| 100831 |
"train_batch_size": 8,
|
| 100832 |
"trial_name": null,
|
| 100833 |
"trial_params": null
|