Upload folder using huggingface_hub
Browse files- adapter_model.safetensors +1 -1
- optimizer.pt +1 -1
- rng_state.pth +1 -1
- scheduler.pt +1 -1
- trainer_state.json +1753 -3
adapter_model.safetensors
CHANGED
@@ -1,3 +1,3 @@
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
-
oid sha256:
|
3 |
size 3208536
|
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:0fc3080bc8c9d5423bd93b737dad7a79888723a50b70ebcf4c6c61f113338df6
|
3 |
size 3208536
|
optimizer.pt
CHANGED
@@ -1,3 +1,3 @@
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
-
oid sha256:
|
3 |
size 1720890
|
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:04c3614839da41671462f92a3b76cf7997814e78fb2974ee91022d1eb91c3f32
|
3 |
size 1720890
|
rng_state.pth
CHANGED
@@ -1,3 +1,3 @@
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
-
oid sha256:
|
3 |
size 14244
|
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:456a5ee388d6bddca5fe294594c1eefd9c1493fdfe252053684e890bf8c56752
|
3 |
size 14244
|
scheduler.pt
CHANGED
@@ -1,3 +1,3 @@
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
-
oid sha256:
|
3 |
size 1064
|
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:7c26800e6aad79542c71a265990c35470e0f0c258d88e99aef73f16b83c77e16
|
3 |
size 1064
|
trainer_state.json
CHANGED
@@ -1,9 +1,9 @@
|
|
1 |
{
|
2 |
"best_metric": null,
|
3 |
"best_model_checkpoint": null,
|
4 |
-
"epoch":
|
5 |
"eval_steps": 500,
|
6 |
-
"global_step":
|
7 |
"is_hyper_param_search": false,
|
8 |
"is_local_process_zero": true,
|
9 |
"is_world_process_zero": true,
|
@@ -8757,6 +8757,1756 @@
|
|
8757 |
"learning_rate": 3.672131147540984e-05,
|
8758 |
"loss": 0.0179,
|
8759 |
"step": 25000
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
8760 |
}
|
8761 |
],
|
8762 |
"logging_steps": 20,
|
@@ -8776,7 +10526,7 @@
|
|
8776 |
"attributes": {}
|
8777 |
}
|
8778 |
},
|
8779 |
-
"total_flos": 3.
|
8780 |
"train_batch_size": 1,
|
8781 |
"trial_name": null,
|
8782 |
"trial_params": null
|
|
|
1 |
{
|
2 |
"best_metric": null,
|
3 |
"best_model_checkpoint": null,
|
4 |
+
"epoch": 146.4307504575961,
|
5 |
"eval_steps": 500,
|
6 |
+
"global_step": 30000,
|
7 |
"is_hyper_param_search": false,
|
8 |
"is_local_process_zero": true,
|
9 |
"is_world_process_zero": true,
|
|
|
8757 |
"learning_rate": 3.672131147540984e-05,
|
8758 |
"loss": 0.0179,
|
8759 |
"step": 25000
|
8760 |
+
},
|
8761 |
+
{
|
8762 |
+
"epoch": 122.12324588163514,
|
8763 |
+
"grad_norm": 0.5783191323280334,
|
8764 |
+
"learning_rate": 3.659016393442623e-05,
|
8765 |
+
"loss": 0.0159,
|
8766 |
+
"step": 25020
|
8767 |
+
},
|
8768 |
+
{
|
8769 |
+
"epoch": 122.22086638194021,
|
8770 |
+
"grad_norm": 0.6851247549057007,
|
8771 |
+
"learning_rate": 3.6459016393442625e-05,
|
8772 |
+
"loss": 0.0167,
|
8773 |
+
"step": 25040
|
8774 |
+
},
|
8775 |
+
{
|
8776 |
+
"epoch": 122.31848688224527,
|
8777 |
+
"grad_norm": 0.5190562009811401,
|
8778 |
+
"learning_rate": 3.632786885245902e-05,
|
8779 |
+
"loss": 0.0168,
|
8780 |
+
"step": 25060
|
8781 |
+
},
|
8782 |
+
{
|
8783 |
+
"epoch": 122.41610738255034,
|
8784 |
+
"grad_norm": 0.8032932281494141,
|
8785 |
+
"learning_rate": 3.619672131147541e-05,
|
8786 |
+
"loss": 0.0173,
|
8787 |
+
"step": 25080
|
8788 |
+
},
|
8789 |
+
{
|
8790 |
+
"epoch": 122.5137278828554,
|
8791 |
+
"grad_norm": 1.162681221961975,
|
8792 |
+
"learning_rate": 3.6065573770491806e-05,
|
8793 |
+
"loss": 0.0175,
|
8794 |
+
"step": 25100
|
8795 |
+
},
|
8796 |
+
{
|
8797 |
+
"epoch": 122.61134838316046,
|
8798 |
+
"grad_norm": 0.9898841977119446,
|
8799 |
+
"learning_rate": 3.59344262295082e-05,
|
8800 |
+
"loss": 0.0154,
|
8801 |
+
"step": 25120
|
8802 |
+
},
|
8803 |
+
{
|
8804 |
+
"epoch": 122.70896888346553,
|
8805 |
+
"grad_norm": 0.7703188061714172,
|
8806 |
+
"learning_rate": 3.580327868852459e-05,
|
8807 |
+
"loss": 0.0177,
|
8808 |
+
"step": 25140
|
8809 |
+
},
|
8810 |
+
{
|
8811 |
+
"epoch": 122.80658938377059,
|
8812 |
+
"grad_norm": 0.6557360291481018,
|
8813 |
+
"learning_rate": 3.567213114754099e-05,
|
8814 |
+
"loss": 0.0187,
|
8815 |
+
"step": 25160
|
8816 |
+
},
|
8817 |
+
{
|
8818 |
+
"epoch": 122.90420988407566,
|
8819 |
+
"grad_norm": 0.6278268694877625,
|
8820 |
+
"learning_rate": 3.554098360655738e-05,
|
8821 |
+
"loss": 0.0173,
|
8822 |
+
"step": 25180
|
8823 |
+
},
|
8824 |
+
{
|
8825 |
+
"epoch": 123.00183038438072,
|
8826 |
+
"grad_norm": 0.5595793128013611,
|
8827 |
+
"learning_rate": 3.5409836065573773e-05,
|
8828 |
+
"loss": 0.02,
|
8829 |
+
"step": 25200
|
8830 |
+
},
|
8831 |
+
{
|
8832 |
+
"epoch": 123.09945088468578,
|
8833 |
+
"grad_norm": 0.8069674968719482,
|
8834 |
+
"learning_rate": 3.527868852459016e-05,
|
8835 |
+
"loss": 0.0157,
|
8836 |
+
"step": 25220
|
8837 |
+
},
|
8838 |
+
{
|
8839 |
+
"epoch": 123.19707138499085,
|
8840 |
+
"grad_norm": 0.5641182661056519,
|
8841 |
+
"learning_rate": 3.514754098360656e-05,
|
8842 |
+
"loss": 0.0162,
|
8843 |
+
"step": 25240
|
8844 |
+
},
|
8845 |
+
{
|
8846 |
+
"epoch": 123.29469188529592,
|
8847 |
+
"grad_norm": 1.641262412071228,
|
8848 |
+
"learning_rate": 3.5016393442622955e-05,
|
8849 |
+
"loss": 0.0153,
|
8850 |
+
"step": 25260
|
8851 |
+
},
|
8852 |
+
{
|
8853 |
+
"epoch": 123.39231238560097,
|
8854 |
+
"grad_norm": 0.828906238079071,
|
8855 |
+
"learning_rate": 3.4885245901639344e-05,
|
8856 |
+
"loss": 0.0165,
|
8857 |
+
"step": 25280
|
8858 |
+
},
|
8859 |
+
{
|
8860 |
+
"epoch": 123.48993288590604,
|
8861 |
+
"grad_norm": 0.4439915418624878,
|
8862 |
+
"learning_rate": 3.475409836065574e-05,
|
8863 |
+
"loss": 0.0175,
|
8864 |
+
"step": 25300
|
8865 |
+
},
|
8866 |
+
{
|
8867 |
+
"epoch": 123.5875533862111,
|
8868 |
+
"grad_norm": 0.5250588059425354,
|
8869 |
+
"learning_rate": 3.462295081967214e-05,
|
8870 |
+
"loss": 0.016,
|
8871 |
+
"step": 25320
|
8872 |
+
},
|
8873 |
+
{
|
8874 |
+
"epoch": 123.68517388651617,
|
8875 |
+
"grad_norm": 1.8672527074813843,
|
8876 |
+
"learning_rate": 3.4491803278688526e-05,
|
8877 |
+
"loss": 0.0168,
|
8878 |
+
"step": 25340
|
8879 |
+
},
|
8880 |
+
{
|
8881 |
+
"epoch": 123.78279438682124,
|
8882 |
+
"grad_norm": 0.905852735042572,
|
8883 |
+
"learning_rate": 3.436065573770492e-05,
|
8884 |
+
"loss": 0.0166,
|
8885 |
+
"step": 25360
|
8886 |
+
},
|
8887 |
+
{
|
8888 |
+
"epoch": 123.88041488712629,
|
8889 |
+
"grad_norm": 1.0820852518081665,
|
8890 |
+
"learning_rate": 3.422950819672131e-05,
|
8891 |
+
"loss": 0.018,
|
8892 |
+
"step": 25380
|
8893 |
+
},
|
8894 |
+
{
|
8895 |
+
"epoch": 123.97803538743136,
|
8896 |
+
"grad_norm": 0.901567816734314,
|
8897 |
+
"learning_rate": 3.409836065573771e-05,
|
8898 |
+
"loss": 0.0175,
|
8899 |
+
"step": 25400
|
8900 |
+
},
|
8901 |
+
{
|
8902 |
+
"epoch": 124.07565588773643,
|
8903 |
+
"grad_norm": 0.5788626074790955,
|
8904 |
+
"learning_rate": 3.39672131147541e-05,
|
8905 |
+
"loss": 0.0153,
|
8906 |
+
"step": 25420
|
8907 |
+
},
|
8908 |
+
{
|
8909 |
+
"epoch": 124.17327638804149,
|
8910 |
+
"grad_norm": 2.4590864181518555,
|
8911 |
+
"learning_rate": 3.383606557377049e-05,
|
8912 |
+
"loss": 0.0156,
|
8913 |
+
"step": 25440
|
8914 |
+
},
|
8915 |
+
{
|
8916 |
+
"epoch": 124.27089688834656,
|
8917 |
+
"grad_norm": 0.5568335056304932,
|
8918 |
+
"learning_rate": 3.370491803278689e-05,
|
8919 |
+
"loss": 0.0145,
|
8920 |
+
"step": 25460
|
8921 |
+
},
|
8922 |
+
{
|
8923 |
+
"epoch": 124.36851738865161,
|
8924 |
+
"grad_norm": 0.9648571014404297,
|
8925 |
+
"learning_rate": 3.357377049180328e-05,
|
8926 |
+
"loss": 0.0139,
|
8927 |
+
"step": 25480
|
8928 |
+
},
|
8929 |
+
{
|
8930 |
+
"epoch": 124.46613788895668,
|
8931 |
+
"grad_norm": 1.7256869077682495,
|
8932 |
+
"learning_rate": 3.3442622950819675e-05,
|
8933 |
+
"loss": 0.0161,
|
8934 |
+
"step": 25500
|
8935 |
+
},
|
8936 |
+
{
|
8937 |
+
"epoch": 124.56375838926175,
|
8938 |
+
"grad_norm": 0.7551959753036499,
|
8939 |
+
"learning_rate": 3.331147540983607e-05,
|
8940 |
+
"loss": 0.0165,
|
8941 |
+
"step": 25520
|
8942 |
+
},
|
8943 |
+
{
|
8944 |
+
"epoch": 124.6613788895668,
|
8945 |
+
"grad_norm": 0.6856973767280579,
|
8946 |
+
"learning_rate": 3.318032786885246e-05,
|
8947 |
+
"loss": 0.0167,
|
8948 |
+
"step": 25540
|
8949 |
+
},
|
8950 |
+
{
|
8951 |
+
"epoch": 124.75899938987187,
|
8952 |
+
"grad_norm": 0.6650362610816956,
|
8953 |
+
"learning_rate": 3.3049180327868857e-05,
|
8954 |
+
"loss": 0.0164,
|
8955 |
+
"step": 25560
|
8956 |
+
},
|
8957 |
+
{
|
8958 |
+
"epoch": 124.85661989017694,
|
8959 |
+
"grad_norm": 1.0952746868133545,
|
8960 |
+
"learning_rate": 3.291803278688525e-05,
|
8961 |
+
"loss": 0.0181,
|
8962 |
+
"step": 25580
|
8963 |
+
},
|
8964 |
+
{
|
8965 |
+
"epoch": 124.954240390482,
|
8966 |
+
"grad_norm": 0.8695099353790283,
|
8967 |
+
"learning_rate": 3.2786885245901635e-05,
|
8968 |
+
"loss": 0.0167,
|
8969 |
+
"step": 25600
|
8970 |
+
},
|
8971 |
+
{
|
8972 |
+
"epoch": 125.05186089078707,
|
8973 |
+
"grad_norm": 0.5697212219238281,
|
8974 |
+
"learning_rate": 3.265573770491803e-05,
|
8975 |
+
"loss": 0.0166,
|
8976 |
+
"step": 25620
|
8977 |
+
},
|
8978 |
+
{
|
8979 |
+
"epoch": 125.14948139109212,
|
8980 |
+
"grad_norm": 0.6281394958496094,
|
8981 |
+
"learning_rate": 3.252459016393443e-05,
|
8982 |
+
"loss": 0.0138,
|
8983 |
+
"step": 25640
|
8984 |
+
},
|
8985 |
+
{
|
8986 |
+
"epoch": 125.2471018913972,
|
8987 |
+
"grad_norm": 0.7632110118865967,
|
8988 |
+
"learning_rate": 3.2393442622950824e-05,
|
8989 |
+
"loss": 0.0163,
|
8990 |
+
"step": 25660
|
8991 |
+
},
|
8992 |
+
{
|
8993 |
+
"epoch": 125.34472239170226,
|
8994 |
+
"grad_norm": 0.587164580821991,
|
8995 |
+
"learning_rate": 3.226229508196721e-05,
|
8996 |
+
"loss": 0.0169,
|
8997 |
+
"step": 25680
|
8998 |
+
},
|
8999 |
+
{
|
9000 |
+
"epoch": 125.44234289200732,
|
9001 |
+
"grad_norm": 0.8123992681503296,
|
9002 |
+
"learning_rate": 3.213114754098361e-05,
|
9003 |
+
"loss": 0.0156,
|
9004 |
+
"step": 25700
|
9005 |
+
},
|
9006 |
+
{
|
9007 |
+
"epoch": 125.53996339231239,
|
9008 |
+
"grad_norm": 0.7210849523544312,
|
9009 |
+
"learning_rate": 3.2000000000000005e-05,
|
9010 |
+
"loss": 0.0161,
|
9011 |
+
"step": 25720
|
9012 |
+
},
|
9013 |
+
{
|
9014 |
+
"epoch": 125.63758389261746,
|
9015 |
+
"grad_norm": 0.6011385917663574,
|
9016 |
+
"learning_rate": 3.1868852459016395e-05,
|
9017 |
+
"loss": 0.0178,
|
9018 |
+
"step": 25740
|
9019 |
+
},
|
9020 |
+
{
|
9021 |
+
"epoch": 125.73520439292251,
|
9022 |
+
"grad_norm": 0.8048945665359497,
|
9023 |
+
"learning_rate": 3.173770491803279e-05,
|
9024 |
+
"loss": 0.0172,
|
9025 |
+
"step": 25760
|
9026 |
+
},
|
9027 |
+
{
|
9028 |
+
"epoch": 125.83282489322758,
|
9029 |
+
"grad_norm": 0.5456706285476685,
|
9030 |
+
"learning_rate": 3.160655737704919e-05,
|
9031 |
+
"loss": 0.0188,
|
9032 |
+
"step": 25780
|
9033 |
+
},
|
9034 |
+
{
|
9035 |
+
"epoch": 125.93044539353264,
|
9036 |
+
"grad_norm": 1.419385313987732,
|
9037 |
+
"learning_rate": 3.1475409836065576e-05,
|
9038 |
+
"loss": 0.0187,
|
9039 |
+
"step": 25800
|
9040 |
+
},
|
9041 |
+
{
|
9042 |
+
"epoch": 126.0280658938377,
|
9043 |
+
"grad_norm": 0.8208538293838501,
|
9044 |
+
"learning_rate": 3.1344262295081966e-05,
|
9045 |
+
"loss": 0.0149,
|
9046 |
+
"step": 25820
|
9047 |
+
},
|
9048 |
+
{
|
9049 |
+
"epoch": 126.12568639414278,
|
9050 |
+
"grad_norm": 0.45135247707366943,
|
9051 |
+
"learning_rate": 3.121311475409836e-05,
|
9052 |
+
"loss": 0.0139,
|
9053 |
+
"step": 25840
|
9054 |
+
},
|
9055 |
+
{
|
9056 |
+
"epoch": 126.22330689444783,
|
9057 |
+
"grad_norm": 0.565280556678772,
|
9058 |
+
"learning_rate": 3.108196721311475e-05,
|
9059 |
+
"loss": 0.0123,
|
9060 |
+
"step": 25860
|
9061 |
+
},
|
9062 |
+
{
|
9063 |
+
"epoch": 126.3209273947529,
|
9064 |
+
"grad_norm": 0.742659866809845,
|
9065 |
+
"learning_rate": 3.095081967213115e-05,
|
9066 |
+
"loss": 0.0151,
|
9067 |
+
"step": 25880
|
9068 |
+
},
|
9069 |
+
{
|
9070 |
+
"epoch": 126.41854789505797,
|
9071 |
+
"grad_norm": 1.5381386280059814,
|
9072 |
+
"learning_rate": 3.0819672131147544e-05,
|
9073 |
+
"loss": 0.0157,
|
9074 |
+
"step": 25900
|
9075 |
+
},
|
9076 |
+
{
|
9077 |
+
"epoch": 126.51616839536302,
|
9078 |
+
"grad_norm": 0.626524031162262,
|
9079 |
+
"learning_rate": 3.068852459016393e-05,
|
9080 |
+
"loss": 0.0151,
|
9081 |
+
"step": 25920
|
9082 |
+
},
|
9083 |
+
{
|
9084 |
+
"epoch": 126.6137888956681,
|
9085 |
+
"grad_norm": 0.6463727355003357,
|
9086 |
+
"learning_rate": 3.055737704918033e-05,
|
9087 |
+
"loss": 0.0174,
|
9088 |
+
"step": 25940
|
9089 |
+
},
|
9090 |
+
{
|
9091 |
+
"epoch": 126.71140939597315,
|
9092 |
+
"grad_norm": 0.48679399490356445,
|
9093 |
+
"learning_rate": 3.0426229508196725e-05,
|
9094 |
+
"loss": 0.0154,
|
9095 |
+
"step": 25960
|
9096 |
+
},
|
9097 |
+
{
|
9098 |
+
"epoch": 126.80902989627822,
|
9099 |
+
"grad_norm": 0.9534430503845215,
|
9100 |
+
"learning_rate": 3.0295081967213118e-05,
|
9101 |
+
"loss": 0.0185,
|
9102 |
+
"step": 25980
|
9103 |
+
},
|
9104 |
+
{
|
9105 |
+
"epoch": 126.90665039658329,
|
9106 |
+
"grad_norm": 0.571997344493866,
|
9107 |
+
"learning_rate": 3.016393442622951e-05,
|
9108 |
+
"loss": 0.0174,
|
9109 |
+
"step": 26000
|
9110 |
+
},
|
9111 |
+
{
|
9112 |
+
"epoch": 127.00427089688834,
|
9113 |
+
"grad_norm": 0.8983253836631775,
|
9114 |
+
"learning_rate": 3.00327868852459e-05,
|
9115 |
+
"loss": 0.0183,
|
9116 |
+
"step": 26020
|
9117 |
+
},
|
9118 |
+
{
|
9119 |
+
"epoch": 127.10189139719341,
|
9120 |
+
"grad_norm": 0.37496012449264526,
|
9121 |
+
"learning_rate": 2.9901639344262293e-05,
|
9122 |
+
"loss": 0.0135,
|
9123 |
+
"step": 26040
|
9124 |
+
},
|
9125 |
+
{
|
9126 |
+
"epoch": 127.19951189749847,
|
9127 |
+
"grad_norm": 0.7320930361747742,
|
9128 |
+
"learning_rate": 2.977049180327869e-05,
|
9129 |
+
"loss": 0.0138,
|
9130 |
+
"step": 26060
|
9131 |
+
},
|
9132 |
+
{
|
9133 |
+
"epoch": 127.29713239780354,
|
9134 |
+
"grad_norm": 1.5510950088500977,
|
9135 |
+
"learning_rate": 2.963934426229508e-05,
|
9136 |
+
"loss": 0.0181,
|
9137 |
+
"step": 26080
|
9138 |
+
},
|
9139 |
+
{
|
9140 |
+
"epoch": 127.3947528981086,
|
9141 |
+
"grad_norm": 0.25900664925575256,
|
9142 |
+
"learning_rate": 2.9508196721311478e-05,
|
9143 |
+
"loss": 0.0145,
|
9144 |
+
"step": 26100
|
9145 |
+
},
|
9146 |
+
{
|
9147 |
+
"epoch": 127.49237339841366,
|
9148 |
+
"grad_norm": 0.7860931754112244,
|
9149 |
+
"learning_rate": 2.937704918032787e-05,
|
9150 |
+
"loss": 0.0149,
|
9151 |
+
"step": 26120
|
9152 |
+
},
|
9153 |
+
{
|
9154 |
+
"epoch": 127.58999389871873,
|
9155 |
+
"grad_norm": 1.5779728889465332,
|
9156 |
+
"learning_rate": 2.9245901639344263e-05,
|
9157 |
+
"loss": 0.0168,
|
9158 |
+
"step": 26140
|
9159 |
+
},
|
9160 |
+
{
|
9161 |
+
"epoch": 127.6876143990238,
|
9162 |
+
"grad_norm": 0.8237743377685547,
|
9163 |
+
"learning_rate": 2.911475409836066e-05,
|
9164 |
+
"loss": 0.0173,
|
9165 |
+
"step": 26160
|
9166 |
+
},
|
9167 |
+
{
|
9168 |
+
"epoch": 127.78523489932886,
|
9169 |
+
"grad_norm": 0.5260995626449585,
|
9170 |
+
"learning_rate": 2.8983606557377052e-05,
|
9171 |
+
"loss": 0.0148,
|
9172 |
+
"step": 26180
|
9173 |
+
},
|
9174 |
+
{
|
9175 |
+
"epoch": 127.88285539963393,
|
9176 |
+
"grad_norm": 1.158836007118225,
|
9177 |
+
"learning_rate": 2.8852459016393445e-05,
|
9178 |
+
"loss": 0.0179,
|
9179 |
+
"step": 26200
|
9180 |
+
},
|
9181 |
+
{
|
9182 |
+
"epoch": 127.98047589993898,
|
9183 |
+
"grad_norm": 1.7822519540786743,
|
9184 |
+
"learning_rate": 2.872131147540984e-05,
|
9185 |
+
"loss": 0.0153,
|
9186 |
+
"step": 26220
|
9187 |
+
},
|
9188 |
+
{
|
9189 |
+
"epoch": 128.07809640024405,
|
9190 |
+
"grad_norm": 0.6409000158309937,
|
9191 |
+
"learning_rate": 2.8590163934426227e-05,
|
9192 |
+
"loss": 0.0178,
|
9193 |
+
"step": 26240
|
9194 |
+
},
|
9195 |
+
{
|
9196 |
+
"epoch": 128.1757169005491,
|
9197 |
+
"grad_norm": 0.7198218107223511,
|
9198 |
+
"learning_rate": 2.8459016393442623e-05,
|
9199 |
+
"loss": 0.0143,
|
9200 |
+
"step": 26260
|
9201 |
+
},
|
9202 |
+
{
|
9203 |
+
"epoch": 128.2733374008542,
|
9204 |
+
"grad_norm": 0.9570964574813843,
|
9205 |
+
"learning_rate": 2.8327868852459016e-05,
|
9206 |
+
"loss": 0.0132,
|
9207 |
+
"step": 26280
|
9208 |
+
},
|
9209 |
+
{
|
9210 |
+
"epoch": 128.37095790115924,
|
9211 |
+
"grad_norm": 0.40788573026657104,
|
9212 |
+
"learning_rate": 2.819672131147541e-05,
|
9213 |
+
"loss": 0.0151,
|
9214 |
+
"step": 26300
|
9215 |
+
},
|
9216 |
+
{
|
9217 |
+
"epoch": 128.4685784014643,
|
9218 |
+
"grad_norm": 1.0642712116241455,
|
9219 |
+
"learning_rate": 2.8065573770491805e-05,
|
9220 |
+
"loss": 0.0154,
|
9221 |
+
"step": 26320
|
9222 |
+
},
|
9223 |
+
{
|
9224 |
+
"epoch": 128.56619890176938,
|
9225 |
+
"grad_norm": 0.5972766280174255,
|
9226 |
+
"learning_rate": 2.7934426229508198e-05,
|
9227 |
+
"loss": 0.015,
|
9228 |
+
"step": 26340
|
9229 |
+
},
|
9230 |
+
{
|
9231 |
+
"epoch": 128.66381940207444,
|
9232 |
+
"grad_norm": 0.5974799990653992,
|
9233 |
+
"learning_rate": 2.7803278688524594e-05,
|
9234 |
+
"loss": 0.0144,
|
9235 |
+
"step": 26360
|
9236 |
+
},
|
9237 |
+
{
|
9238 |
+
"epoch": 128.7614399023795,
|
9239 |
+
"grad_norm": 0.8131697773933411,
|
9240 |
+
"learning_rate": 2.7672131147540987e-05,
|
9241 |
+
"loss": 0.0166,
|
9242 |
+
"step": 26380
|
9243 |
+
},
|
9244 |
+
{
|
9245 |
+
"epoch": 128.85906040268458,
|
9246 |
+
"grad_norm": 0.8219912648200989,
|
9247 |
+
"learning_rate": 2.754098360655738e-05,
|
9248 |
+
"loss": 0.0186,
|
9249 |
+
"step": 26400
|
9250 |
+
},
|
9251 |
+
{
|
9252 |
+
"epoch": 128.95668090298963,
|
9253 |
+
"grad_norm": 0.7310410737991333,
|
9254 |
+
"learning_rate": 2.7409836065573775e-05,
|
9255 |
+
"loss": 0.0158,
|
9256 |
+
"step": 26420
|
9257 |
+
},
|
9258 |
+
{
|
9259 |
+
"epoch": 129.0543014032947,
|
9260 |
+
"grad_norm": 0.7448714375495911,
|
9261 |
+
"learning_rate": 2.7278688524590168e-05,
|
9262 |
+
"loss": 0.0161,
|
9263 |
+
"step": 26440
|
9264 |
+
},
|
9265 |
+
{
|
9266 |
+
"epoch": 129.15192190359974,
|
9267 |
+
"grad_norm": 1.0379338264465332,
|
9268 |
+
"learning_rate": 2.7147540983606558e-05,
|
9269 |
+
"loss": 0.0129,
|
9270 |
+
"step": 26460
|
9271 |
+
},
|
9272 |
+
{
|
9273 |
+
"epoch": 129.24954240390483,
|
9274 |
+
"grad_norm": 0.4505363404750824,
|
9275 |
+
"learning_rate": 2.701639344262295e-05,
|
9276 |
+
"loss": 0.0139,
|
9277 |
+
"step": 26480
|
9278 |
+
},
|
9279 |
+
{
|
9280 |
+
"epoch": 129.34716290420988,
|
9281 |
+
"grad_norm": 0.49264198541641235,
|
9282 |
+
"learning_rate": 2.6885245901639343e-05,
|
9283 |
+
"loss": 0.0141,
|
9284 |
+
"step": 26500
|
9285 |
+
},
|
9286 |
+
{
|
9287 |
+
"epoch": 129.44478340451494,
|
9288 |
+
"grad_norm": 0.38399410247802734,
|
9289 |
+
"learning_rate": 2.675409836065574e-05,
|
9290 |
+
"loss": 0.0148,
|
9291 |
+
"step": 26520
|
9292 |
+
},
|
9293 |
+
{
|
9294 |
+
"epoch": 129.54240390482002,
|
9295 |
+
"grad_norm": 0.8914321660995483,
|
9296 |
+
"learning_rate": 2.6622950819672132e-05,
|
9297 |
+
"loss": 0.0159,
|
9298 |
+
"step": 26540
|
9299 |
+
},
|
9300 |
+
{
|
9301 |
+
"epoch": 129.64002440512508,
|
9302 |
+
"grad_norm": 0.8293542265892029,
|
9303 |
+
"learning_rate": 2.6491803278688525e-05,
|
9304 |
+
"loss": 0.0157,
|
9305 |
+
"step": 26560
|
9306 |
+
},
|
9307 |
+
{
|
9308 |
+
"epoch": 129.73764490543013,
|
9309 |
+
"grad_norm": 0.5534564256668091,
|
9310 |
+
"learning_rate": 2.636065573770492e-05,
|
9311 |
+
"loss": 0.0158,
|
9312 |
+
"step": 26580
|
9313 |
+
},
|
9314 |
+
{
|
9315 |
+
"epoch": 129.8352654057352,
|
9316 |
+
"grad_norm": 0.7157993912696838,
|
9317 |
+
"learning_rate": 2.6229508196721314e-05,
|
9318 |
+
"loss": 0.016,
|
9319 |
+
"step": 26600
|
9320 |
+
},
|
9321 |
+
{
|
9322 |
+
"epoch": 129.93288590604027,
|
9323 |
+
"grad_norm": 0.7746397256851196,
|
9324 |
+
"learning_rate": 2.6098360655737706e-05,
|
9325 |
+
"loss": 0.0192,
|
9326 |
+
"step": 26620
|
9327 |
+
},
|
9328 |
+
{
|
9329 |
+
"epoch": 130.03050640634532,
|
9330 |
+
"grad_norm": 0.7727970480918884,
|
9331 |
+
"learning_rate": 2.5967213114754103e-05,
|
9332 |
+
"loss": 0.0151,
|
9333 |
+
"step": 26640
|
9334 |
+
},
|
9335 |
+
{
|
9336 |
+
"epoch": 130.1281269066504,
|
9337 |
+
"grad_norm": 0.514680802822113,
|
9338 |
+
"learning_rate": 2.5836065573770492e-05,
|
9339 |
+
"loss": 0.0159,
|
9340 |
+
"step": 26660
|
9341 |
+
},
|
9342 |
+
{
|
9343 |
+
"epoch": 130.22574740695546,
|
9344 |
+
"grad_norm": 0.87467360496521,
|
9345 |
+
"learning_rate": 2.5704918032786885e-05,
|
9346 |
+
"loss": 0.0137,
|
9347 |
+
"step": 26680
|
9348 |
+
},
|
9349 |
+
{
|
9350 |
+
"epoch": 130.32336790726052,
|
9351 |
+
"grad_norm": 0.7342318296432495,
|
9352 |
+
"learning_rate": 2.5573770491803277e-05,
|
9353 |
+
"loss": 0.0164,
|
9354 |
+
"step": 26700
|
9355 |
+
},
|
9356 |
+
{
|
9357 |
+
"epoch": 130.4209884075656,
|
9358 |
+
"grad_norm": 0.46169203519821167,
|
9359 |
+
"learning_rate": 2.5442622950819674e-05,
|
9360 |
+
"loss": 0.0148,
|
9361 |
+
"step": 26720
|
9362 |
+
},
|
9363 |
+
{
|
9364 |
+
"epoch": 130.51860890787066,
|
9365 |
+
"grad_norm": 0.5552070140838623,
|
9366 |
+
"learning_rate": 2.5311475409836066e-05,
|
9367 |
+
"loss": 0.0146,
|
9368 |
+
"step": 26740
|
9369 |
+
},
|
9370 |
+
{
|
9371 |
+
"epoch": 130.6162294081757,
|
9372 |
+
"grad_norm": 2.3732874393463135,
|
9373 |
+
"learning_rate": 2.518032786885246e-05,
|
9374 |
+
"loss": 0.0151,
|
9375 |
+
"step": 26760
|
9376 |
+
},
|
9377 |
+
{
|
9378 |
+
"epoch": 130.71384990848077,
|
9379 |
+
"grad_norm": 0.7399420142173767,
|
9380 |
+
"learning_rate": 2.5049180327868855e-05,
|
9381 |
+
"loss": 0.0136,
|
9382 |
+
"step": 26780
|
9383 |
+
},
|
9384 |
+
{
|
9385 |
+
"epoch": 130.81147040878585,
|
9386 |
+
"grad_norm": 0.7631209492683411,
|
9387 |
+
"learning_rate": 2.4918032786885248e-05,
|
9388 |
+
"loss": 0.0168,
|
9389 |
+
"step": 26800
|
9390 |
+
},
|
9391 |
+
{
|
9392 |
+
"epoch": 130.9090909090909,
|
9393 |
+
"grad_norm": 0.4778473675251007,
|
9394 |
+
"learning_rate": 2.478688524590164e-05,
|
9395 |
+
"loss": 0.0144,
|
9396 |
+
"step": 26820
|
9397 |
+
},
|
9398 |
+
{
|
9399 |
+
"epoch": 131.00671140939596,
|
9400 |
+
"grad_norm": 0.48981741070747375,
|
9401 |
+
"learning_rate": 2.4655737704918033e-05,
|
9402 |
+
"loss": 0.0174,
|
9403 |
+
"step": 26840
|
9404 |
+
},
|
9405 |
+
{
|
9406 |
+
"epoch": 131.10433190970105,
|
9407 |
+
"grad_norm": 0.550786018371582,
|
9408 |
+
"learning_rate": 2.4524590163934426e-05,
|
9409 |
+
"loss": 0.0144,
|
9410 |
+
"step": 26860
|
9411 |
+
},
|
9412 |
+
{
|
9413 |
+
"epoch": 131.2019524100061,
|
9414 |
+
"grad_norm": 1.1115200519561768,
|
9415 |
+
"learning_rate": 2.4393442622950822e-05,
|
9416 |
+
"loss": 0.0137,
|
9417 |
+
"step": 26880
|
9418 |
+
},
|
9419 |
+
{
|
9420 |
+
"epoch": 131.29957291031116,
|
9421 |
+
"grad_norm": 0.7832316160202026,
|
9422 |
+
"learning_rate": 2.4262295081967215e-05,
|
9423 |
+
"loss": 0.0138,
|
9424 |
+
"step": 26900
|
9425 |
+
},
|
9426 |
+
{
|
9427 |
+
"epoch": 131.39719341061624,
|
9428 |
+
"grad_norm": 0.7918095588684082,
|
9429 |
+
"learning_rate": 2.4131147540983608e-05,
|
9430 |
+
"loss": 0.0153,
|
9431 |
+
"step": 26920
|
9432 |
+
},
|
9433 |
+
{
|
9434 |
+
"epoch": 131.4948139109213,
|
9435 |
+
"grad_norm": 0.5915355682373047,
|
9436 |
+
"learning_rate": 2.4e-05,
|
9437 |
+
"loss": 0.0159,
|
9438 |
+
"step": 26940
|
9439 |
+
},
|
9440 |
+
{
|
9441 |
+
"epoch": 131.59243441122635,
|
9442 |
+
"grad_norm": 0.6909199357032776,
|
9443 |
+
"learning_rate": 2.3868852459016393e-05,
|
9444 |
+
"loss": 0.0159,
|
9445 |
+
"step": 26960
|
9446 |
+
},
|
9447 |
+
{
|
9448 |
+
"epoch": 131.69005491153143,
|
9449 |
+
"grad_norm": 1.0566037893295288,
|
9450 |
+
"learning_rate": 2.373770491803279e-05,
|
9451 |
+
"loss": 0.0147,
|
9452 |
+
"step": 26980
|
9453 |
+
},
|
9454 |
+
{
|
9455 |
+
"epoch": 131.7876754118365,
|
9456 |
+
"grad_norm": 1.6122446060180664,
|
9457 |
+
"learning_rate": 2.3606557377049182e-05,
|
9458 |
+
"loss": 0.0141,
|
9459 |
+
"step": 27000
|
9460 |
+
},
|
9461 |
+
{
|
9462 |
+
"epoch": 131.88529591214154,
|
9463 |
+
"grad_norm": 0.8080132007598877,
|
9464 |
+
"learning_rate": 2.3475409836065575e-05,
|
9465 |
+
"loss": 0.0155,
|
9466 |
+
"step": 27020
|
9467 |
+
},
|
9468 |
+
{
|
9469 |
+
"epoch": 131.98291641244663,
|
9470 |
+
"grad_norm": 0.45939984917640686,
|
9471 |
+
"learning_rate": 2.3344262295081968e-05,
|
9472 |
+
"loss": 0.0166,
|
9473 |
+
"step": 27040
|
9474 |
+
},
|
9475 |
+
{
|
9476 |
+
"epoch": 132.08053691275168,
|
9477 |
+
"grad_norm": 0.8284308314323425,
|
9478 |
+
"learning_rate": 2.321311475409836e-05,
|
9479 |
+
"loss": 0.015,
|
9480 |
+
"step": 27060
|
9481 |
+
},
|
9482 |
+
{
|
9483 |
+
"epoch": 132.17815741305674,
|
9484 |
+
"grad_norm": 0.6223374605178833,
|
9485 |
+
"learning_rate": 2.3081967213114757e-05,
|
9486 |
+
"loss": 0.0155,
|
9487 |
+
"step": 27080
|
9488 |
+
},
|
9489 |
+
{
|
9490 |
+
"epoch": 132.2757779133618,
|
9491 |
+
"grad_norm": 1.6535650491714478,
|
9492 |
+
"learning_rate": 2.295081967213115e-05,
|
9493 |
+
"loss": 0.015,
|
9494 |
+
"step": 27100
|
9495 |
+
},
|
9496 |
+
{
|
9497 |
+
"epoch": 132.37339841366688,
|
9498 |
+
"grad_norm": 0.6285653710365295,
|
9499 |
+
"learning_rate": 2.2819672131147542e-05,
|
9500 |
+
"loss": 0.0158,
|
9501 |
+
"step": 27120
|
9502 |
+
},
|
9503 |
+
{
|
9504 |
+
"epoch": 132.47101891397193,
|
9505 |
+
"grad_norm": 0.6470975279808044,
|
9506 |
+
"learning_rate": 2.2688524590163935e-05,
|
9507 |
+
"loss": 0.0131,
|
9508 |
+
"step": 27140
|
9509 |
+
},
|
9510 |
+
{
|
9511 |
+
"epoch": 132.568639414277,
|
9512 |
+
"grad_norm": 0.6603531241416931,
|
9513 |
+
"learning_rate": 2.2557377049180328e-05,
|
9514 |
+
"loss": 0.0155,
|
9515 |
+
"step": 27160
|
9516 |
+
},
|
9517 |
+
{
|
9518 |
+
"epoch": 132.66625991458207,
|
9519 |
+
"grad_norm": 0.9789283275604248,
|
9520 |
+
"learning_rate": 2.2426229508196724e-05,
|
9521 |
+
"loss": 0.014,
|
9522 |
+
"step": 27180
|
9523 |
+
},
|
9524 |
+
{
|
9525 |
+
"epoch": 132.76388041488713,
|
9526 |
+
"grad_norm": 0.7158600687980652,
|
9527 |
+
"learning_rate": 2.2295081967213117e-05,
|
9528 |
+
"loss": 0.0149,
|
9529 |
+
"step": 27200
|
9530 |
+
},
|
9531 |
+
{
|
9532 |
+
"epoch": 132.86150091519218,
|
9533 |
+
"grad_norm": 0.4593288004398346,
|
9534 |
+
"learning_rate": 2.216393442622951e-05,
|
9535 |
+
"loss": 0.0149,
|
9536 |
+
"step": 27220
|
9537 |
+
},
|
9538 |
+
{
|
9539 |
+
"epoch": 132.95912141549726,
|
9540 |
+
"grad_norm": 0.7383930087089539,
|
9541 |
+
"learning_rate": 2.2032786885245905e-05,
|
9542 |
+
"loss": 0.0158,
|
9543 |
+
"step": 27240
|
9544 |
+
},
|
9545 |
+
{
|
9546 |
+
"epoch": 133.05674191580232,
|
9547 |
+
"grad_norm": 0.8438706398010254,
|
9548 |
+
"learning_rate": 2.1901639344262295e-05,
|
9549 |
+
"loss": 0.0152,
|
9550 |
+
"step": 27260
|
9551 |
+
},
|
9552 |
+
{
|
9553 |
+
"epoch": 133.15436241610738,
|
9554 |
+
"grad_norm": 0.3977959156036377,
|
9555 |
+
"learning_rate": 2.1770491803278688e-05,
|
9556 |
+
"loss": 0.0135,
|
9557 |
+
"step": 27280
|
9558 |
+
},
|
9559 |
+
{
|
9560 |
+
"epoch": 133.25198291641246,
|
9561 |
+
"grad_norm": 0.5032092332839966,
|
9562 |
+
"learning_rate": 2.1639344262295084e-05,
|
9563 |
+
"loss": 0.0144,
|
9564 |
+
"step": 27300
|
9565 |
+
},
|
9566 |
+
{
|
9567 |
+
"epoch": 133.3496034167175,
|
9568 |
+
"grad_norm": 0.8900758028030396,
|
9569 |
+
"learning_rate": 2.1508196721311476e-05,
|
9570 |
+
"loss": 0.0142,
|
9571 |
+
"step": 27320
|
9572 |
+
},
|
9573 |
+
{
|
9574 |
+
"epoch": 133.44722391702257,
|
9575 |
+
"grad_norm": 0.6694475412368774,
|
9576 |
+
"learning_rate": 2.1377049180327873e-05,
|
9577 |
+
"loss": 0.0148,
|
9578 |
+
"step": 27340
|
9579 |
+
},
|
9580 |
+
{
|
9581 |
+
"epoch": 133.54484441732765,
|
9582 |
+
"grad_norm": 0.6150327920913696,
|
9583 |
+
"learning_rate": 2.1245901639344262e-05,
|
9584 |
+
"loss": 0.0137,
|
9585 |
+
"step": 27360
|
9586 |
+
},
|
9587 |
+
{
|
9588 |
+
"epoch": 133.6424649176327,
|
9589 |
+
"grad_norm": 0.3980708718299866,
|
9590 |
+
"learning_rate": 2.1114754098360655e-05,
|
9591 |
+
"loss": 0.0131,
|
9592 |
+
"step": 27380
|
9593 |
+
},
|
9594 |
+
{
|
9595 |
+
"epoch": 133.74008541793776,
|
9596 |
+
"grad_norm": 0.556053876876831,
|
9597 |
+
"learning_rate": 2.098360655737705e-05,
|
9598 |
+
"loss": 0.0173,
|
9599 |
+
"step": 27400
|
9600 |
+
},
|
9601 |
+
{
|
9602 |
+
"epoch": 133.83770591824282,
|
9603 |
+
"grad_norm": 0.7154746055603027,
|
9604 |
+
"learning_rate": 2.0852459016393444e-05,
|
9605 |
+
"loss": 0.0142,
|
9606 |
+
"step": 27420
|
9607 |
+
},
|
9608 |
+
{
|
9609 |
+
"epoch": 133.9353264185479,
|
9610 |
+
"grad_norm": 0.585117757320404,
|
9611 |
+
"learning_rate": 2.0721311475409836e-05,
|
9612 |
+
"loss": 0.0148,
|
9613 |
+
"step": 27440
|
9614 |
+
},
|
9615 |
+
{
|
9616 |
+
"epoch": 134.03294691885296,
|
9617 |
+
"grad_norm": 0.4688512682914734,
|
9618 |
+
"learning_rate": 2.059016393442623e-05,
|
9619 |
+
"loss": 0.0139,
|
9620 |
+
"step": 27460
|
9621 |
+
},
|
9622 |
+
{
|
9623 |
+
"epoch": 134.130567419158,
|
9624 |
+
"grad_norm": 0.3597017824649811,
|
9625 |
+
"learning_rate": 2.0459016393442622e-05,
|
9626 |
+
"loss": 0.0125,
|
9627 |
+
"step": 27480
|
9628 |
+
},
|
9629 |
+
{
|
9630 |
+
"epoch": 134.2281879194631,
|
9631 |
+
"grad_norm": 0.6201938986778259,
|
9632 |
+
"learning_rate": 2.0327868852459018e-05,
|
9633 |
+
"loss": 0.014,
|
9634 |
+
"step": 27500
|
9635 |
+
},
|
9636 |
+
{
|
9637 |
+
"epoch": 134.32580841976815,
|
9638 |
+
"grad_norm": 0.6969265341758728,
|
9639 |
+
"learning_rate": 2.019672131147541e-05,
|
9640 |
+
"loss": 0.0133,
|
9641 |
+
"step": 27520
|
9642 |
+
},
|
9643 |
+
{
|
9644 |
+
"epoch": 134.4234289200732,
|
9645 |
+
"grad_norm": 0.6457026600837708,
|
9646 |
+
"learning_rate": 2.0065573770491804e-05,
|
9647 |
+
"loss": 0.0152,
|
9648 |
+
"step": 27540
|
9649 |
+
},
|
9650 |
+
{
|
9651 |
+
"epoch": 134.5210494203783,
|
9652 |
+
"grad_norm": 0.7583892941474915,
|
9653 |
+
"learning_rate": 1.99344262295082e-05,
|
9654 |
+
"loss": 0.0148,
|
9655 |
+
"step": 27560
|
9656 |
+
},
|
9657 |
+
{
|
9658 |
+
"epoch": 134.61866992068335,
|
9659 |
+
"grad_norm": 0.41781967878341675,
|
9660 |
+
"learning_rate": 1.980327868852459e-05,
|
9661 |
+
"loss": 0.0145,
|
9662 |
+
"step": 27580
|
9663 |
+
},
|
9664 |
+
{
|
9665 |
+
"epoch": 134.7162904209884,
|
9666 |
+
"grad_norm": 1.2802424430847168,
|
9667 |
+
"learning_rate": 1.9672131147540985e-05,
|
9668 |
+
"loss": 0.0158,
|
9669 |
+
"step": 27600
|
9670 |
+
},
|
9671 |
+
{
|
9672 |
+
"epoch": 134.81391092129348,
|
9673 |
+
"grad_norm": 0.3811515271663666,
|
9674 |
+
"learning_rate": 1.9540983606557378e-05,
|
9675 |
+
"loss": 0.0136,
|
9676 |
+
"step": 27620
|
9677 |
+
},
|
9678 |
+
{
|
9679 |
+
"epoch": 134.91153142159854,
|
9680 |
+
"grad_norm": 0.41068577766418457,
|
9681 |
+
"learning_rate": 1.940983606557377e-05,
|
9682 |
+
"loss": 0.0166,
|
9683 |
+
"step": 27640
|
9684 |
+
},
|
9685 |
+
{
|
9686 |
+
"epoch": 135.0091519219036,
|
9687 |
+
"grad_norm": 0.690075695514679,
|
9688 |
+
"learning_rate": 1.9278688524590167e-05,
|
9689 |
+
"loss": 0.0152,
|
9690 |
+
"step": 27660
|
9691 |
+
},
|
9692 |
+
{
|
9693 |
+
"epoch": 135.10677242220865,
|
9694 |
+
"grad_norm": 0.6945540308952332,
|
9695 |
+
"learning_rate": 1.9147540983606556e-05,
|
9696 |
+
"loss": 0.0125,
|
9697 |
+
"step": 27680
|
9698 |
+
},
|
9699 |
+
{
|
9700 |
+
"epoch": 135.20439292251373,
|
9701 |
+
"grad_norm": 0.9262276291847229,
|
9702 |
+
"learning_rate": 1.9016393442622952e-05,
|
9703 |
+
"loss": 0.014,
|
9704 |
+
"step": 27700
|
9705 |
+
},
|
9706 |
+
{
|
9707 |
+
"epoch": 135.3020134228188,
|
9708 |
+
"grad_norm": 0.5992072224617004,
|
9709 |
+
"learning_rate": 1.8885245901639345e-05,
|
9710 |
+
"loss": 0.0132,
|
9711 |
+
"step": 27720
|
9712 |
+
},
|
9713 |
+
{
|
9714 |
+
"epoch": 135.39963392312384,
|
9715 |
+
"grad_norm": 0.6684610247612,
|
9716 |
+
"learning_rate": 1.8754098360655738e-05,
|
9717 |
+
"loss": 0.0147,
|
9718 |
+
"step": 27740
|
9719 |
+
},
|
9720 |
+
{
|
9721 |
+
"epoch": 135.49725442342893,
|
9722 |
+
"grad_norm": 0.647719144821167,
|
9723 |
+
"learning_rate": 1.8622950819672134e-05,
|
9724 |
+
"loss": 0.0168,
|
9725 |
+
"step": 27760
|
9726 |
+
},
|
9727 |
+
{
|
9728 |
+
"epoch": 135.59487492373398,
|
9729 |
+
"grad_norm": 1.5291879177093506,
|
9730 |
+
"learning_rate": 1.8491803278688523e-05,
|
9731 |
+
"loss": 0.0159,
|
9732 |
+
"step": 27780
|
9733 |
+
},
|
9734 |
+
{
|
9735 |
+
"epoch": 135.69249542403904,
|
9736 |
+
"grad_norm": 0.7436932325363159,
|
9737 |
+
"learning_rate": 1.836065573770492e-05,
|
9738 |
+
"loss": 0.0136,
|
9739 |
+
"step": 27800
|
9740 |
+
},
|
9741 |
+
{
|
9742 |
+
"epoch": 135.79011592434412,
|
9743 |
+
"grad_norm": 0.38243773579597473,
|
9744 |
+
"learning_rate": 1.8229508196721312e-05,
|
9745 |
+
"loss": 0.0145,
|
9746 |
+
"step": 27820
|
9747 |
+
},
|
9748 |
+
{
|
9749 |
+
"epoch": 135.88773642464918,
|
9750 |
+
"grad_norm": 0.6765353679656982,
|
9751 |
+
"learning_rate": 1.8098360655737705e-05,
|
9752 |
+
"loss": 0.0139,
|
9753 |
+
"step": 27840
|
9754 |
+
},
|
9755 |
+
{
|
9756 |
+
"epoch": 135.98535692495423,
|
9757 |
+
"grad_norm": 0.3190823495388031,
|
9758 |
+
"learning_rate": 1.79672131147541e-05,
|
9759 |
+
"loss": 0.0152,
|
9760 |
+
"step": 27860
|
9761 |
+
},
|
9762 |
+
{
|
9763 |
+
"epoch": 136.08297742525932,
|
9764 |
+
"grad_norm": 2.0219767093658447,
|
9765 |
+
"learning_rate": 1.7836065573770494e-05,
|
9766 |
+
"loss": 0.0143,
|
9767 |
+
"step": 27880
|
9768 |
+
},
|
9769 |
+
{
|
9770 |
+
"epoch": 136.18059792556437,
|
9771 |
+
"grad_norm": 0.776849627494812,
|
9772 |
+
"learning_rate": 1.7704918032786887e-05,
|
9773 |
+
"loss": 0.0135,
|
9774 |
+
"step": 27900
|
9775 |
+
},
|
9776 |
+
{
|
9777 |
+
"epoch": 136.27821842586943,
|
9778 |
+
"grad_norm": 0.5274736285209656,
|
9779 |
+
"learning_rate": 1.757377049180328e-05,
|
9780 |
+
"loss": 0.0123,
|
9781 |
+
"step": 27920
|
9782 |
+
},
|
9783 |
+
{
|
9784 |
+
"epoch": 136.3758389261745,
|
9785 |
+
"grad_norm": 0.886225700378418,
|
9786 |
+
"learning_rate": 1.7442622950819672e-05,
|
9787 |
+
"loss": 0.0146,
|
9788 |
+
"step": 27940
|
9789 |
+
},
|
9790 |
+
{
|
9791 |
+
"epoch": 136.47345942647956,
|
9792 |
+
"grad_norm": 0.5282070636749268,
|
9793 |
+
"learning_rate": 1.731147540983607e-05,
|
9794 |
+
"loss": 0.0137,
|
9795 |
+
"step": 27960
|
9796 |
+
},
|
9797 |
+
{
|
9798 |
+
"epoch": 136.57107992678462,
|
9799 |
+
"grad_norm": 0.6784070730209351,
|
9800 |
+
"learning_rate": 1.718032786885246e-05,
|
9801 |
+
"loss": 0.0143,
|
9802 |
+
"step": 27980
|
9803 |
+
},
|
9804 |
+
{
|
9805 |
+
"epoch": 136.66870042708968,
|
9806 |
+
"grad_norm": 1.7534900903701782,
|
9807 |
+
"learning_rate": 1.7049180327868854e-05,
|
9808 |
+
"loss": 0.0137,
|
9809 |
+
"step": 28000
|
9810 |
+
},
|
9811 |
+
{
|
9812 |
+
"epoch": 136.76632092739476,
|
9813 |
+
"grad_norm": 0.40347975492477417,
|
9814 |
+
"learning_rate": 1.6918032786885247e-05,
|
9815 |
+
"loss": 0.0157,
|
9816 |
+
"step": 28020
|
9817 |
+
},
|
9818 |
+
{
|
9819 |
+
"epoch": 136.8639414276998,
|
9820 |
+
"grad_norm": 1.0218480825424194,
|
9821 |
+
"learning_rate": 1.678688524590164e-05,
|
9822 |
+
"loss": 0.0145,
|
9823 |
+
"step": 28040
|
9824 |
+
},
|
9825 |
+
{
|
9826 |
+
"epoch": 136.96156192800487,
|
9827 |
+
"grad_norm": 0.2875036597251892,
|
9828 |
+
"learning_rate": 1.6655737704918036e-05,
|
9829 |
+
"loss": 0.014,
|
9830 |
+
"step": 28060
|
9831 |
+
},
|
9832 |
+
{
|
9833 |
+
"epoch": 137.05918242830995,
|
9834 |
+
"grad_norm": 1.5968719720840454,
|
9835 |
+
"learning_rate": 1.6524590163934428e-05,
|
9836 |
+
"loss": 0.0132,
|
9837 |
+
"step": 28080
|
9838 |
+
},
|
9839 |
+
{
|
9840 |
+
"epoch": 137.156802928615,
|
9841 |
+
"grad_norm": 0.39140036702156067,
|
9842 |
+
"learning_rate": 1.6393442622950818e-05,
|
9843 |
+
"loss": 0.0138,
|
9844 |
+
"step": 28100
|
9845 |
+
},
|
9846 |
+
{
|
9847 |
+
"epoch": 137.25442342892006,
|
9848 |
+
"grad_norm": 0.36571571230888367,
|
9849 |
+
"learning_rate": 1.6262295081967214e-05,
|
9850 |
+
"loss": 0.0134,
|
9851 |
+
"step": 28120
|
9852 |
+
},
|
9853 |
+
{
|
9854 |
+
"epoch": 137.35204392922515,
|
9855 |
+
"grad_norm": 0.6531932950019836,
|
9856 |
+
"learning_rate": 1.6131147540983607e-05,
|
9857 |
+
"loss": 0.0146,
|
9858 |
+
"step": 28140
|
9859 |
+
},
|
9860 |
+
{
|
9861 |
+
"epoch": 137.4496644295302,
|
9862 |
+
"grad_norm": 0.46148520708084106,
|
9863 |
+
"learning_rate": 1.6000000000000003e-05,
|
9864 |
+
"loss": 0.0136,
|
9865 |
+
"step": 28160
|
9866 |
+
},
|
9867 |
+
{
|
9868 |
+
"epoch": 137.54728492983526,
|
9869 |
+
"grad_norm": 0.5359562635421753,
|
9870 |
+
"learning_rate": 1.5868852459016395e-05,
|
9871 |
+
"loss": 0.0128,
|
9872 |
+
"step": 28180
|
9873 |
+
},
|
9874 |
+
{
|
9875 |
+
"epoch": 137.64490543014034,
|
9876 |
+
"grad_norm": 0.5632950663566589,
|
9877 |
+
"learning_rate": 1.5737704918032788e-05,
|
9878 |
+
"loss": 0.0148,
|
9879 |
+
"step": 28200
|
9880 |
+
},
|
9881 |
+
{
|
9882 |
+
"epoch": 137.7425259304454,
|
9883 |
+
"grad_norm": 0.7229663729667664,
|
9884 |
+
"learning_rate": 1.560655737704918e-05,
|
9885 |
+
"loss": 0.0147,
|
9886 |
+
"step": 28220
|
9887 |
+
},
|
9888 |
+
{
|
9889 |
+
"epoch": 137.84014643075045,
|
9890 |
+
"grad_norm": 0.5531187653541565,
|
9891 |
+
"learning_rate": 1.5475409836065574e-05,
|
9892 |
+
"loss": 0.014,
|
9893 |
+
"step": 28240
|
9894 |
+
},
|
9895 |
+
{
|
9896 |
+
"epoch": 137.93776693105553,
|
9897 |
+
"grad_norm": 0.6305696964263916,
|
9898 |
+
"learning_rate": 1.5344262295081966e-05,
|
9899 |
+
"loss": 0.0157,
|
9900 |
+
"step": 28260
|
9901 |
+
},
|
9902 |
+
{
|
9903 |
+
"epoch": 138.0353874313606,
|
9904 |
+
"grad_norm": 0.8933548331260681,
|
9905 |
+
"learning_rate": 1.5213114754098363e-05,
|
9906 |
+
"loss": 0.0156,
|
9907 |
+
"step": 28280
|
9908 |
+
},
|
9909 |
+
{
|
9910 |
+
"epoch": 138.13300793166565,
|
9911 |
+
"grad_norm": 0.39126649498939514,
|
9912 |
+
"learning_rate": 1.5081967213114755e-05,
|
9913 |
+
"loss": 0.011,
|
9914 |
+
"step": 28300
|
9915 |
+
},
|
9916 |
+
{
|
9917 |
+
"epoch": 138.2306284319707,
|
9918 |
+
"grad_norm": 0.6234102249145508,
|
9919 |
+
"learning_rate": 1.4950819672131146e-05,
|
9920 |
+
"loss": 0.0133,
|
9921 |
+
"step": 28320
|
9922 |
+
},
|
9923 |
+
{
|
9924 |
+
"epoch": 138.32824893227578,
|
9925 |
+
"grad_norm": 0.5867244005203247,
|
9926 |
+
"learning_rate": 1.481967213114754e-05,
|
9927 |
+
"loss": 0.0138,
|
9928 |
+
"step": 28340
|
9929 |
+
},
|
9930 |
+
{
|
9931 |
+
"epoch": 138.42586943258084,
|
9932 |
+
"grad_norm": 0.6564351916313171,
|
9933 |
+
"learning_rate": 1.4688524590163935e-05,
|
9934 |
+
"loss": 0.014,
|
9935 |
+
"step": 28360
|
9936 |
+
},
|
9937 |
+
{
|
9938 |
+
"epoch": 138.5234899328859,
|
9939 |
+
"grad_norm": 1.0982993841171265,
|
9940 |
+
"learning_rate": 1.455737704918033e-05,
|
9941 |
+
"loss": 0.0133,
|
9942 |
+
"step": 28380
|
9943 |
+
},
|
9944 |
+
{
|
9945 |
+
"epoch": 138.62111043319098,
|
9946 |
+
"grad_norm": 0.8140943646430969,
|
9947 |
+
"learning_rate": 1.4426229508196722e-05,
|
9948 |
+
"loss": 0.0149,
|
9949 |
+
"step": 28400
|
9950 |
+
},
|
9951 |
+
{
|
9952 |
+
"epoch": 138.71873093349603,
|
9953 |
+
"grad_norm": 0.7273306846618652,
|
9954 |
+
"learning_rate": 1.4295081967213114e-05,
|
9955 |
+
"loss": 0.0149,
|
9956 |
+
"step": 28420
|
9957 |
+
},
|
9958 |
+
{
|
9959 |
+
"epoch": 138.8163514338011,
|
9960 |
+
"grad_norm": 0.46543437242507935,
|
9961 |
+
"learning_rate": 1.4163934426229508e-05,
|
9962 |
+
"loss": 0.0143,
|
9963 |
+
"step": 28440
|
9964 |
+
},
|
9965 |
+
{
|
9966 |
+
"epoch": 138.91397193410617,
|
9967 |
+
"grad_norm": 1.823746681213379,
|
9968 |
+
"learning_rate": 1.4032786885245902e-05,
|
9969 |
+
"loss": 0.0141,
|
9970 |
+
"step": 28460
|
9971 |
+
},
|
9972 |
+
{
|
9973 |
+
"epoch": 139.01159243441123,
|
9974 |
+
"grad_norm": 0.602825939655304,
|
9975 |
+
"learning_rate": 1.3901639344262297e-05,
|
9976 |
+
"loss": 0.0129,
|
9977 |
+
"step": 28480
|
9978 |
+
},
|
9979 |
+
{
|
9980 |
+
"epoch": 139.10921293471628,
|
9981 |
+
"grad_norm": 0.30030643939971924,
|
9982 |
+
"learning_rate": 1.377049180327869e-05,
|
9983 |
+
"loss": 0.0108,
|
9984 |
+
"step": 28500
|
9985 |
+
},
|
9986 |
+
{
|
9987 |
+
"epoch": 139.20683343502137,
|
9988 |
+
"grad_norm": 0.7023382186889648,
|
9989 |
+
"learning_rate": 1.3639344262295084e-05,
|
9990 |
+
"loss": 0.013,
|
9991 |
+
"step": 28520
|
9992 |
+
},
|
9993 |
+
{
|
9994 |
+
"epoch": 139.30445393532642,
|
9995 |
+
"grad_norm": 0.8771500587463379,
|
9996 |
+
"learning_rate": 1.3508196721311475e-05,
|
9997 |
+
"loss": 0.0144,
|
9998 |
+
"step": 28540
|
9999 |
+
},
|
10000 |
+
{
|
10001 |
+
"epoch": 139.40207443563148,
|
10002 |
+
"grad_norm": 0.6988272666931152,
|
10003 |
+
"learning_rate": 1.337704918032787e-05,
|
10004 |
+
"loss": 0.0125,
|
10005 |
+
"step": 28560
|
10006 |
+
},
|
10007 |
+
{
|
10008 |
+
"epoch": 139.49969493593656,
|
10009 |
+
"grad_norm": 0.8657557368278503,
|
10010 |
+
"learning_rate": 1.3245901639344262e-05,
|
10011 |
+
"loss": 0.0138,
|
10012 |
+
"step": 28580
|
10013 |
+
},
|
10014 |
+
{
|
10015 |
+
"epoch": 139.59731543624162,
|
10016 |
+
"grad_norm": 0.6832662224769592,
|
10017 |
+
"learning_rate": 1.3114754098360657e-05,
|
10018 |
+
"loss": 0.0127,
|
10019 |
+
"step": 28600
|
10020 |
+
},
|
10021 |
+
{
|
10022 |
+
"epoch": 139.69493593654667,
|
10023 |
+
"grad_norm": 0.9065951108932495,
|
10024 |
+
"learning_rate": 1.2983606557377051e-05,
|
10025 |
+
"loss": 0.015,
|
10026 |
+
"step": 28620
|
10027 |
+
},
|
10028 |
+
{
|
10029 |
+
"epoch": 139.79255643685173,
|
10030 |
+
"grad_norm": 0.9211568236351013,
|
10031 |
+
"learning_rate": 1.2852459016393442e-05,
|
10032 |
+
"loss": 0.0131,
|
10033 |
+
"step": 28640
|
10034 |
+
},
|
10035 |
+
{
|
10036 |
+
"epoch": 139.8901769371568,
|
10037 |
+
"grad_norm": 0.6160862445831299,
|
10038 |
+
"learning_rate": 1.2721311475409837e-05,
|
10039 |
+
"loss": 0.0163,
|
10040 |
+
"step": 28660
|
10041 |
+
},
|
10042 |
+
{
|
10043 |
+
"epoch": 139.98779743746186,
|
10044 |
+
"grad_norm": 0.8593130111694336,
|
10045 |
+
"learning_rate": 1.259016393442623e-05,
|
10046 |
+
"loss": 0.0135,
|
10047 |
+
"step": 28680
|
10048 |
+
},
|
10049 |
+
{
|
10050 |
+
"epoch": 140.08541793776692,
|
10051 |
+
"grad_norm": 0.7746515274047852,
|
10052 |
+
"learning_rate": 1.2459016393442624e-05,
|
10053 |
+
"loss": 0.0141,
|
10054 |
+
"step": 28700
|
10055 |
+
},
|
10056 |
+
{
|
10057 |
+
"epoch": 140.183038438072,
|
10058 |
+
"grad_norm": 0.7830790877342224,
|
10059 |
+
"learning_rate": 1.2327868852459017e-05,
|
10060 |
+
"loss": 0.0126,
|
10061 |
+
"step": 28720
|
10062 |
+
},
|
10063 |
+
{
|
10064 |
+
"epoch": 140.28065893837706,
|
10065 |
+
"grad_norm": 0.49005040526390076,
|
10066 |
+
"learning_rate": 1.2196721311475411e-05,
|
10067 |
+
"loss": 0.0127,
|
10068 |
+
"step": 28740
|
10069 |
+
},
|
10070 |
+
{
|
10071 |
+
"epoch": 140.3782794386821,
|
10072 |
+
"grad_norm": 0.9640679359436035,
|
10073 |
+
"learning_rate": 1.2065573770491804e-05,
|
10074 |
+
"loss": 0.0125,
|
10075 |
+
"step": 28760
|
10076 |
+
},
|
10077 |
+
{
|
10078 |
+
"epoch": 140.4758999389872,
|
10079 |
+
"grad_norm": 0.8114829659461975,
|
10080 |
+
"learning_rate": 1.1934426229508197e-05,
|
10081 |
+
"loss": 0.0138,
|
10082 |
+
"step": 28780
|
10083 |
+
},
|
10084 |
+
{
|
10085 |
+
"epoch": 140.57352043929225,
|
10086 |
+
"grad_norm": 0.8460706472396851,
|
10087 |
+
"learning_rate": 1.1803278688524591e-05,
|
10088 |
+
"loss": 0.0148,
|
10089 |
+
"step": 28800
|
10090 |
+
},
|
10091 |
+
{
|
10092 |
+
"epoch": 140.6711409395973,
|
10093 |
+
"grad_norm": 0.4882986843585968,
|
10094 |
+
"learning_rate": 1.1672131147540984e-05,
|
10095 |
+
"loss": 0.0141,
|
10096 |
+
"step": 28820
|
10097 |
+
},
|
10098 |
+
{
|
10099 |
+
"epoch": 140.7687614399024,
|
10100 |
+
"grad_norm": 1.0322729349136353,
|
10101 |
+
"learning_rate": 1.1540983606557378e-05,
|
10102 |
+
"loss": 0.0148,
|
10103 |
+
"step": 28840
|
10104 |
+
},
|
10105 |
+
{
|
10106 |
+
"epoch": 140.86638194020745,
|
10107 |
+
"grad_norm": 1.2970582246780396,
|
10108 |
+
"learning_rate": 1.1409836065573771e-05,
|
10109 |
+
"loss": 0.0144,
|
10110 |
+
"step": 28860
|
10111 |
+
},
|
10112 |
+
{
|
10113 |
+
"epoch": 140.9640024405125,
|
10114 |
+
"grad_norm": 0.9063767790794373,
|
10115 |
+
"learning_rate": 1.1278688524590164e-05,
|
10116 |
+
"loss": 0.0123,
|
10117 |
+
"step": 28880
|
10118 |
+
},
|
10119 |
+
{
|
10120 |
+
"epoch": 141.06162294081759,
|
10121 |
+
"grad_norm": 0.60384202003479,
|
10122 |
+
"learning_rate": 1.1147540983606558e-05,
|
10123 |
+
"loss": 0.0122,
|
10124 |
+
"step": 28900
|
10125 |
+
},
|
10126 |
+
{
|
10127 |
+
"epoch": 141.15924344112264,
|
10128 |
+
"grad_norm": 0.5142499804496765,
|
10129 |
+
"learning_rate": 1.1016393442622953e-05,
|
10130 |
+
"loss": 0.0125,
|
10131 |
+
"step": 28920
|
10132 |
+
},
|
10133 |
+
{
|
10134 |
+
"epoch": 141.2568639414277,
|
10135 |
+
"grad_norm": 0.6854032874107361,
|
10136 |
+
"learning_rate": 1.0885245901639344e-05,
|
10137 |
+
"loss": 0.0132,
|
10138 |
+
"step": 28940
|
10139 |
+
},
|
10140 |
+
{
|
10141 |
+
"epoch": 141.35448444173275,
|
10142 |
+
"grad_norm": 1.138895034790039,
|
10143 |
+
"learning_rate": 1.0754098360655738e-05,
|
10144 |
+
"loss": 0.0138,
|
10145 |
+
"step": 28960
|
10146 |
+
},
|
10147 |
+
{
|
10148 |
+
"epoch": 141.45210494203783,
|
10149 |
+
"grad_norm": 0.5815340280532837,
|
10150 |
+
"learning_rate": 1.0622950819672131e-05,
|
10151 |
+
"loss": 0.0115,
|
10152 |
+
"step": 28980
|
10153 |
+
},
|
10154 |
+
{
|
10155 |
+
"epoch": 141.5497254423429,
|
10156 |
+
"grad_norm": 0.6024242639541626,
|
10157 |
+
"learning_rate": 1.0491803278688525e-05,
|
10158 |
+
"loss": 0.0127,
|
10159 |
+
"step": 29000
|
10160 |
+
},
|
10161 |
+
{
|
10162 |
+
"epoch": 141.64734594264795,
|
10163 |
+
"grad_norm": 0.44016191363334656,
|
10164 |
+
"learning_rate": 1.0360655737704918e-05,
|
10165 |
+
"loss": 0.0146,
|
10166 |
+
"step": 29020
|
10167 |
+
},
|
10168 |
+
{
|
10169 |
+
"epoch": 141.74496644295303,
|
10170 |
+
"grad_norm": 2.051720142364502,
|
10171 |
+
"learning_rate": 1.0229508196721311e-05,
|
10172 |
+
"loss": 0.0151,
|
10173 |
+
"step": 29040
|
10174 |
+
},
|
10175 |
+
{
|
10176 |
+
"epoch": 141.84258694325808,
|
10177 |
+
"grad_norm": 0.6961409449577332,
|
10178 |
+
"learning_rate": 1.0098360655737705e-05,
|
10179 |
+
"loss": 0.013,
|
10180 |
+
"step": 29060
|
10181 |
+
},
|
10182 |
+
{
|
10183 |
+
"epoch": 141.94020744356314,
|
10184 |
+
"grad_norm": 1.1912919282913208,
|
10185 |
+
"learning_rate": 9.9672131147541e-06,
|
10186 |
+
"loss": 0.0131,
|
10187 |
+
"step": 29080
|
10188 |
+
},
|
10189 |
+
{
|
10190 |
+
"epoch": 142.03782794386822,
|
10191 |
+
"grad_norm": 0.6203546524047852,
|
10192 |
+
"learning_rate": 9.836065573770493e-06,
|
10193 |
+
"loss": 0.013,
|
10194 |
+
"step": 29100
|
10195 |
+
},
|
10196 |
+
{
|
10197 |
+
"epoch": 142.13544844417328,
|
10198 |
+
"grad_norm": 0.5386860966682434,
|
10199 |
+
"learning_rate": 9.704918032786885e-06,
|
10200 |
+
"loss": 0.0123,
|
10201 |
+
"step": 29120
|
10202 |
+
},
|
10203 |
+
{
|
10204 |
+
"epoch": 142.23306894447833,
|
10205 |
+
"grad_norm": 0.5639663934707642,
|
10206 |
+
"learning_rate": 9.573770491803278e-06,
|
10207 |
+
"loss": 0.0123,
|
10208 |
+
"step": 29140
|
10209 |
+
},
|
10210 |
+
{
|
10211 |
+
"epoch": 142.33068944478342,
|
10212 |
+
"grad_norm": 0.577315628528595,
|
10213 |
+
"learning_rate": 9.442622950819673e-06,
|
10214 |
+
"loss": 0.0125,
|
10215 |
+
"step": 29160
|
10216 |
+
},
|
10217 |
+
{
|
10218 |
+
"epoch": 142.42830994508847,
|
10219 |
+
"grad_norm": 0.5142390727996826,
|
10220 |
+
"learning_rate": 9.311475409836067e-06,
|
10221 |
+
"loss": 0.0133,
|
10222 |
+
"step": 29180
|
10223 |
+
},
|
10224 |
+
{
|
10225 |
+
"epoch": 142.52593044539353,
|
10226 |
+
"grad_norm": 0.7933589816093445,
|
10227 |
+
"learning_rate": 9.18032786885246e-06,
|
10228 |
+
"loss": 0.0151,
|
10229 |
+
"step": 29200
|
10230 |
+
},
|
10231 |
+
{
|
10232 |
+
"epoch": 142.6235509456986,
|
10233 |
+
"grad_norm": 0.8499199151992798,
|
10234 |
+
"learning_rate": 9.049180327868853e-06,
|
10235 |
+
"loss": 0.0136,
|
10236 |
+
"step": 29220
|
10237 |
+
},
|
10238 |
+
{
|
10239 |
+
"epoch": 142.72117144600367,
|
10240 |
+
"grad_norm": 0.6795129179954529,
|
10241 |
+
"learning_rate": 8.918032786885247e-06,
|
10242 |
+
"loss": 0.0136,
|
10243 |
+
"step": 29240
|
10244 |
+
},
|
10245 |
+
{
|
10246 |
+
"epoch": 142.81879194630872,
|
10247 |
+
"grad_norm": 0.3827701210975647,
|
10248 |
+
"learning_rate": 8.78688524590164e-06,
|
10249 |
+
"loss": 0.0122,
|
10250 |
+
"step": 29260
|
10251 |
+
},
|
10252 |
+
{
|
10253 |
+
"epoch": 142.91641244661378,
|
10254 |
+
"grad_norm": 0.6248555779457092,
|
10255 |
+
"learning_rate": 8.655737704918034e-06,
|
10256 |
+
"loss": 0.0113,
|
10257 |
+
"step": 29280
|
10258 |
+
},
|
10259 |
+
{
|
10260 |
+
"epoch": 143.01403294691886,
|
10261 |
+
"grad_norm": 0.9943171739578247,
|
10262 |
+
"learning_rate": 8.524590163934427e-06,
|
10263 |
+
"loss": 0.0145,
|
10264 |
+
"step": 29300
|
10265 |
+
},
|
10266 |
+
{
|
10267 |
+
"epoch": 143.11165344722392,
|
10268 |
+
"grad_norm": 0.3848264217376709,
|
10269 |
+
"learning_rate": 8.39344262295082e-06,
|
10270 |
+
"loss": 0.0119,
|
10271 |
+
"step": 29320
|
10272 |
+
},
|
10273 |
+
{
|
10274 |
+
"epoch": 143.20927394752897,
|
10275 |
+
"grad_norm": 1.02989661693573,
|
10276 |
+
"learning_rate": 8.262295081967214e-06,
|
10277 |
+
"loss": 0.0123,
|
10278 |
+
"step": 29340
|
10279 |
+
},
|
10280 |
+
{
|
10281 |
+
"epoch": 143.30689444783405,
|
10282 |
+
"grad_norm": 0.5843254923820496,
|
10283 |
+
"learning_rate": 8.131147540983607e-06,
|
10284 |
+
"loss": 0.0124,
|
10285 |
+
"step": 29360
|
10286 |
+
},
|
10287 |
+
{
|
10288 |
+
"epoch": 143.4045149481391,
|
10289 |
+
"grad_norm": 0.5134753584861755,
|
10290 |
+
"learning_rate": 8.000000000000001e-06,
|
10291 |
+
"loss": 0.0122,
|
10292 |
+
"step": 29380
|
10293 |
+
},
|
10294 |
+
{
|
10295 |
+
"epoch": 143.50213544844416,
|
10296 |
+
"grad_norm": 0.4464253783226013,
|
10297 |
+
"learning_rate": 7.868852459016394e-06,
|
10298 |
+
"loss": 0.0116,
|
10299 |
+
"step": 29400
|
10300 |
+
},
|
10301 |
+
{
|
10302 |
+
"epoch": 143.59975594874925,
|
10303 |
+
"grad_norm": 0.445730060338974,
|
10304 |
+
"learning_rate": 7.737704918032787e-06,
|
10305 |
+
"loss": 0.0116,
|
10306 |
+
"step": 29420
|
10307 |
+
},
|
10308 |
+
{
|
10309 |
+
"epoch": 143.6973764490543,
|
10310 |
+
"grad_norm": 0.7831693887710571,
|
10311 |
+
"learning_rate": 7.606557377049181e-06,
|
10312 |
+
"loss": 0.0122,
|
10313 |
+
"step": 29440
|
10314 |
+
},
|
10315 |
+
{
|
10316 |
+
"epoch": 143.79499694935936,
|
10317 |
+
"grad_norm": 0.33939194679260254,
|
10318 |
+
"learning_rate": 7.475409836065573e-06,
|
10319 |
+
"loss": 0.0131,
|
10320 |
+
"step": 29460
|
10321 |
+
},
|
10322 |
+
{
|
10323 |
+
"epoch": 143.89261744966444,
|
10324 |
+
"grad_norm": 0.36323612928390503,
|
10325 |
+
"learning_rate": 7.344262295081968e-06,
|
10326 |
+
"loss": 0.0157,
|
10327 |
+
"step": 29480
|
10328 |
+
},
|
10329 |
+
{
|
10330 |
+
"epoch": 143.9902379499695,
|
10331 |
+
"grad_norm": 0.6487870216369629,
|
10332 |
+
"learning_rate": 7.213114754098361e-06,
|
10333 |
+
"loss": 0.0157,
|
10334 |
+
"step": 29500
|
10335 |
+
},
|
10336 |
+
{
|
10337 |
+
"epoch": 144.08785845027455,
|
10338 |
+
"grad_norm": 0.3841145932674408,
|
10339 |
+
"learning_rate": 7.081967213114754e-06,
|
10340 |
+
"loss": 0.0106,
|
10341 |
+
"step": 29520
|
10342 |
+
},
|
10343 |
+
{
|
10344 |
+
"epoch": 144.1854789505796,
|
10345 |
+
"grad_norm": 1.0142998695373535,
|
10346 |
+
"learning_rate": 6.9508196721311484e-06,
|
10347 |
+
"loss": 0.0129,
|
10348 |
+
"step": 29540
|
10349 |
+
},
|
10350 |
+
{
|
10351 |
+
"epoch": 144.2830994508847,
|
10352 |
+
"grad_norm": 1.5330740213394165,
|
10353 |
+
"learning_rate": 6.819672131147542e-06,
|
10354 |
+
"loss": 0.0142,
|
10355 |
+
"step": 29560
|
10356 |
+
},
|
10357 |
+
{
|
10358 |
+
"epoch": 144.38071995118975,
|
10359 |
+
"grad_norm": 2.0231475830078125,
|
10360 |
+
"learning_rate": 6.688524590163935e-06,
|
10361 |
+
"loss": 0.0115,
|
10362 |
+
"step": 29580
|
10363 |
+
},
|
10364 |
+
{
|
10365 |
+
"epoch": 144.4783404514948,
|
10366 |
+
"grad_norm": 0.542549192905426,
|
10367 |
+
"learning_rate": 6.557377049180328e-06,
|
10368 |
+
"loss": 0.0131,
|
10369 |
+
"step": 29600
|
10370 |
+
},
|
10371 |
+
{
|
10372 |
+
"epoch": 144.57596095179989,
|
10373 |
+
"grad_norm": 0.6942082047462463,
|
10374 |
+
"learning_rate": 6.426229508196721e-06,
|
10375 |
+
"loss": 0.013,
|
10376 |
+
"step": 29620
|
10377 |
+
},
|
10378 |
+
{
|
10379 |
+
"epoch": 144.67358145210494,
|
10380 |
+
"grad_norm": 0.4934479296207428,
|
10381 |
+
"learning_rate": 6.295081967213115e-06,
|
10382 |
+
"loss": 0.0124,
|
10383 |
+
"step": 29640
|
10384 |
+
},
|
10385 |
+
{
|
10386 |
+
"epoch": 144.77120195241,
|
10387 |
+
"grad_norm": 0.9981206655502319,
|
10388 |
+
"learning_rate": 6.163934426229508e-06,
|
10389 |
+
"loss": 0.013,
|
10390 |
+
"step": 29660
|
10391 |
+
},
|
10392 |
+
{
|
10393 |
+
"epoch": 144.86882245271508,
|
10394 |
+
"grad_norm": 0.5263285636901855,
|
10395 |
+
"learning_rate": 6.032786885245902e-06,
|
10396 |
+
"loss": 0.013,
|
10397 |
+
"step": 29680
|
10398 |
+
},
|
10399 |
+
{
|
10400 |
+
"epoch": 144.96644295302013,
|
10401 |
+
"grad_norm": 0.4131539762020111,
|
10402 |
+
"learning_rate": 5.9016393442622956e-06,
|
10403 |
+
"loss": 0.0132,
|
10404 |
+
"step": 29700
|
10405 |
+
},
|
10406 |
+
{
|
10407 |
+
"epoch": 145.0640634533252,
|
10408 |
+
"grad_norm": 0.9396491646766663,
|
10409 |
+
"learning_rate": 5.770491803278689e-06,
|
10410 |
+
"loss": 0.012,
|
10411 |
+
"step": 29720
|
10412 |
+
},
|
10413 |
+
{
|
10414 |
+
"epoch": 145.16168395363027,
|
10415 |
+
"grad_norm": 0.37081795930862427,
|
10416 |
+
"learning_rate": 5.639344262295082e-06,
|
10417 |
+
"loss": 0.0118,
|
10418 |
+
"step": 29740
|
10419 |
+
},
|
10420 |
+
{
|
10421 |
+
"epoch": 145.25930445393533,
|
10422 |
+
"grad_norm": 0.5653529167175293,
|
10423 |
+
"learning_rate": 5.508196721311476e-06,
|
10424 |
+
"loss": 0.0122,
|
10425 |
+
"step": 29760
|
10426 |
+
},
|
10427 |
+
{
|
10428 |
+
"epoch": 145.35692495424038,
|
10429 |
+
"grad_norm": 0.49712416529655457,
|
10430 |
+
"learning_rate": 5.377049180327869e-06,
|
10431 |
+
"loss": 0.012,
|
10432 |
+
"step": 29780
|
10433 |
+
},
|
10434 |
+
{
|
10435 |
+
"epoch": 145.45454545454547,
|
10436 |
+
"grad_norm": 0.6723568439483643,
|
10437 |
+
"learning_rate": 5.245901639344263e-06,
|
10438 |
+
"loss": 0.0132,
|
10439 |
+
"step": 29800
|
10440 |
+
},
|
10441 |
+
{
|
10442 |
+
"epoch": 145.55216595485052,
|
10443 |
+
"grad_norm": 0.6191849708557129,
|
10444 |
+
"learning_rate": 5.1147540983606555e-06,
|
10445 |
+
"loss": 0.0142,
|
10446 |
+
"step": 29820
|
10447 |
+
},
|
10448 |
+
{
|
10449 |
+
"epoch": 145.64978645515558,
|
10450 |
+
"grad_norm": 0.8201606273651123,
|
10451 |
+
"learning_rate": 4.98360655737705e-06,
|
10452 |
+
"loss": 0.014,
|
10453 |
+
"step": 29840
|
10454 |
+
},
|
10455 |
+
{
|
10456 |
+
"epoch": 145.74740695546063,
|
10457 |
+
"grad_norm": 0.4357975423336029,
|
10458 |
+
"learning_rate": 4.852459016393443e-06,
|
10459 |
+
"loss": 0.0119,
|
10460 |
+
"step": 29860
|
10461 |
+
},
|
10462 |
+
{
|
10463 |
+
"epoch": 145.84502745576572,
|
10464 |
+
"grad_norm": 0.5062920451164246,
|
10465 |
+
"learning_rate": 4.721311475409836e-06,
|
10466 |
+
"loss": 0.0112,
|
10467 |
+
"step": 29880
|
10468 |
+
},
|
10469 |
+
{
|
10470 |
+
"epoch": 145.94264795607077,
|
10471 |
+
"grad_norm": 0.6272954940795898,
|
10472 |
+
"learning_rate": 4.59016393442623e-06,
|
10473 |
+
"loss": 0.0121,
|
10474 |
+
"step": 29900
|
10475 |
+
},
|
10476 |
+
{
|
10477 |
+
"epoch": 146.04026845637583,
|
10478 |
+
"grad_norm": 0.3578208088874817,
|
10479 |
+
"learning_rate": 4.4590163934426235e-06,
|
10480 |
+
"loss": 0.0137,
|
10481 |
+
"step": 29920
|
10482 |
+
},
|
10483 |
+
{
|
10484 |
+
"epoch": 146.1378889566809,
|
10485 |
+
"grad_norm": 0.4044102132320404,
|
10486 |
+
"learning_rate": 4.327868852459017e-06,
|
10487 |
+
"loss": 0.0133,
|
10488 |
+
"step": 29940
|
10489 |
+
},
|
10490 |
+
{
|
10491 |
+
"epoch": 146.23550945698597,
|
10492 |
+
"grad_norm": 0.4162692725658417,
|
10493 |
+
"learning_rate": 4.19672131147541e-06,
|
10494 |
+
"loss": 0.013,
|
10495 |
+
"step": 29960
|
10496 |
+
},
|
10497 |
+
{
|
10498 |
+
"epoch": 146.33312995729102,
|
10499 |
+
"grad_norm": 0.6349827647209167,
|
10500 |
+
"learning_rate": 4.0655737704918034e-06,
|
10501 |
+
"loss": 0.0138,
|
10502 |
+
"step": 29980
|
10503 |
+
},
|
10504 |
+
{
|
10505 |
+
"epoch": 146.4307504575961,
|
10506 |
+
"grad_norm": 0.6992813348770142,
|
10507 |
+
"learning_rate": 3.934426229508197e-06,
|
10508 |
+
"loss": 0.0142,
|
10509 |
+
"step": 30000
|
10510 |
}
|
10511 |
],
|
10512 |
"logging_steps": 20,
|
|
|
10526 |
"attributes": {}
|
10527 |
}
|
10528 |
},
|
10529 |
+
"total_flos": 3.9434336130018816e+17,
|
10530 |
"train_batch_size": 1,
|
10531 |
"trial_name": null,
|
10532 |
"trial_params": null
|