Training in progress, step 2000, checkpoint
Browse files
last-checkpoint/adapter_model.safetensors
CHANGED
@@ -1,3 +1,3 @@
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
-
oid sha256:
|
3 |
size 167832240
|
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:6d86a9b6f56e324364790bdda5d43e79cba70c5d7e3fad0083cbcb3356bbe532
|
3 |
size 167832240
|
last-checkpoint/optimizer.pt
CHANGED
@@ -1,3 +1,3 @@
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
-
oid sha256:
|
3 |
size 85723732
|
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:10ebdfab380cce47a87ddc82ce0b12618986299320e3c1024e0d5a3d2c7efff6
|
3 |
size 85723732
|
last-checkpoint/scheduler.pt
CHANGED
@@ -1,3 +1,3 @@
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
-
oid sha256:
|
3 |
size 1064
|
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:0a6dc2c1810a293fe20df2a18d045861e1be8accb490c530cbdb055d1a3a674a
|
3 |
size 1064
|
last-checkpoint/trainer_state.json
CHANGED
@@ -1,9 +1,9 @@
|
|
1 |
{
|
2 |
"best_metric": null,
|
3 |
"best_model_checkpoint": null,
|
4 |
-
"epoch": 0.
|
5 |
"eval_steps": 500,
|
6 |
-
"global_step":
|
7 |
"is_hyper_param_search": false,
|
8 |
"is_local_process_zero": true,
|
9 |
"is_world_process_zero": true,
|
@@ -10507,6 +10507,3506 @@
|
|
10507 |
"learning_rate": 0.00017411805495362747,
|
10508 |
"loss": 0.7348,
|
10509 |
"step": 1500
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
10510 |
}
|
10511 |
],
|
10512 |
"logging_steps": 1,
|
@@ -10526,7 +14026,7 @@
|
|
10526 |
"attributes": {}
|
10527 |
}
|
10528 |
},
|
10529 |
-
"total_flos":
|
10530 |
"train_batch_size": 2,
|
10531 |
"trial_name": null,
|
10532 |
"trial_params": null
|
|
|
1 |
{
|
2 |
"best_metric": null,
|
3 |
"best_model_checkpoint": null,
|
4 |
+
"epoch": 0.1732764409019039,
|
5 |
"eval_steps": 500,
|
6 |
+
"global_step": 2000,
|
7 |
"is_hyper_param_search": false,
|
8 |
"is_local_process_zero": true,
|
9 |
"is_world_process_zero": true,
|
|
|
10507 |
"learning_rate": 0.00017411805495362747,
|
10508 |
"loss": 0.7348,
|
10509 |
"step": 1500
|
10510 |
+
},
|
10511 |
+
{
|
10512 |
+
"epoch": 0.13004396889687886,
|
10513 |
+
"grad_norm": 2.5850331783294678,
|
10514 |
+
"learning_rate": 0.00017410071942446044,
|
10515 |
+
"loss": 0.7476,
|
10516 |
+
"step": 1501
|
10517 |
+
},
|
10518 |
+
{
|
10519 |
+
"epoch": 0.13013060711732982,
|
10520 |
+
"grad_norm": 2.4636292457580566,
|
10521 |
+
"learning_rate": 0.0001740833838952934,
|
10522 |
+
"loss": 0.7577,
|
10523 |
+
"step": 1502
|
10524 |
+
},
|
10525 |
+
{
|
10526 |
+
"epoch": 0.13021724533778076,
|
10527 |
+
"grad_norm": 2.44307804107666,
|
10528 |
+
"learning_rate": 0.00017406604836612638,
|
10529 |
+
"loss": 0.7806,
|
10530 |
+
"step": 1503
|
10531 |
+
},
|
10532 |
+
{
|
10533 |
+
"epoch": 0.13030388355823172,
|
10534 |
+
"grad_norm": 2.556407928466797,
|
10535 |
+
"learning_rate": 0.00017404871283695935,
|
10536 |
+
"loss": 0.8506,
|
10537 |
+
"step": 1504
|
10538 |
+
},
|
10539 |
+
{
|
10540 |
+
"epoch": 0.13039052177868266,
|
10541 |
+
"grad_norm": 2.1759049892425537,
|
10542 |
+
"learning_rate": 0.00017403137730779232,
|
10543 |
+
"loss": 0.7545,
|
10544 |
+
"step": 1505
|
10545 |
+
},
|
10546 |
+
{
|
10547 |
+
"epoch": 0.13047715999913362,
|
10548 |
+
"grad_norm": 1.515861988067627,
|
10549 |
+
"learning_rate": 0.0001740140417786253,
|
10550 |
+
"loss": 0.6407,
|
10551 |
+
"step": 1506
|
10552 |
+
},
|
10553 |
+
{
|
10554 |
+
"epoch": 0.13056379821958458,
|
10555 |
+
"grad_norm": 3.348797082901001,
|
10556 |
+
"learning_rate": 0.00017399670624945827,
|
10557 |
+
"loss": 1.3821,
|
10558 |
+
"step": 1507
|
10559 |
+
},
|
10560 |
+
{
|
10561 |
+
"epoch": 0.13065043644003552,
|
10562 |
+
"grad_norm": 2.409515142440796,
|
10563 |
+
"learning_rate": 0.00017397937072029124,
|
10564 |
+
"loss": 0.8388,
|
10565 |
+
"step": 1508
|
10566 |
+
},
|
10567 |
+
{
|
10568 |
+
"epoch": 0.13073707466048648,
|
10569 |
+
"grad_norm": 3.6599137783050537,
|
10570 |
+
"learning_rate": 0.0001739620351911242,
|
10571 |
+
"loss": 1.2313,
|
10572 |
+
"step": 1509
|
10573 |
+
},
|
10574 |
+
{
|
10575 |
+
"epoch": 0.13082371288093742,
|
10576 |
+
"grad_norm": 2.307520627975464,
|
10577 |
+
"learning_rate": 0.00017394469966195718,
|
10578 |
+
"loss": 0.7984,
|
10579 |
+
"step": 1510
|
10580 |
+
},
|
10581 |
+
{
|
10582 |
+
"epoch": 0.13091035110138838,
|
10583 |
+
"grad_norm": 2.284538507461548,
|
10584 |
+
"learning_rate": 0.00017392736413279015,
|
10585 |
+
"loss": 0.8024,
|
10586 |
+
"step": 1511
|
10587 |
+
},
|
10588 |
+
{
|
10589 |
+
"epoch": 0.13099698932183934,
|
10590 |
+
"grad_norm": 2.672722339630127,
|
10591 |
+
"learning_rate": 0.00017391002860362313,
|
10592 |
+
"loss": 0.7012,
|
10593 |
+
"step": 1512
|
10594 |
+
},
|
10595 |
+
{
|
10596 |
+
"epoch": 0.13108362754229028,
|
10597 |
+
"grad_norm": 1.9546332359313965,
|
10598 |
+
"learning_rate": 0.0001738926930744561,
|
10599 |
+
"loss": 0.5964,
|
10600 |
+
"step": 1513
|
10601 |
+
},
|
10602 |
+
{
|
10603 |
+
"epoch": 0.13117026576274124,
|
10604 |
+
"grad_norm": 1.7761123180389404,
|
10605 |
+
"learning_rate": 0.00017387535754528907,
|
10606 |
+
"loss": 0.6787,
|
10607 |
+
"step": 1514
|
10608 |
+
},
|
10609 |
+
{
|
10610 |
+
"epoch": 0.13125690398319217,
|
10611 |
+
"grad_norm": 2.336682081222534,
|
10612 |
+
"learning_rate": 0.00017385802201612204,
|
10613 |
+
"loss": 0.7223,
|
10614 |
+
"step": 1515
|
10615 |
+
},
|
10616 |
+
{
|
10617 |
+
"epoch": 0.13134354220364314,
|
10618 |
+
"grad_norm": 3.807426929473877,
|
10619 |
+
"learning_rate": 0.000173840686486955,
|
10620 |
+
"loss": 1.359,
|
10621 |
+
"step": 1516
|
10622 |
+
},
|
10623 |
+
{
|
10624 |
+
"epoch": 0.1314301804240941,
|
10625 |
+
"grad_norm": 2.3983254432678223,
|
10626 |
+
"learning_rate": 0.00017382335095778798,
|
10627 |
+
"loss": 0.7041,
|
10628 |
+
"step": 1517
|
10629 |
+
},
|
10630 |
+
{
|
10631 |
+
"epoch": 0.13151681864454504,
|
10632 |
+
"grad_norm": 3.9924490451812744,
|
10633 |
+
"learning_rate": 0.00017380601542862096,
|
10634 |
+
"loss": 1.4387,
|
10635 |
+
"step": 1518
|
10636 |
+
},
|
10637 |
+
{
|
10638 |
+
"epoch": 0.131603456864996,
|
10639 |
+
"grad_norm": 3.7196366786956787,
|
10640 |
+
"learning_rate": 0.00017378867989945393,
|
10641 |
+
"loss": 1.1075,
|
10642 |
+
"step": 1519
|
10643 |
+
},
|
10644 |
+
{
|
10645 |
+
"epoch": 0.13169009508544693,
|
10646 |
+
"grad_norm": 2.8900740146636963,
|
10647 |
+
"learning_rate": 0.0001737713443702869,
|
10648 |
+
"loss": 1.2191,
|
10649 |
+
"step": 1520
|
10650 |
+
},
|
10651 |
+
{
|
10652 |
+
"epoch": 0.1317767333058979,
|
10653 |
+
"grad_norm": 2.511605978012085,
|
10654 |
+
"learning_rate": 0.00017375400884111987,
|
10655 |
+
"loss": 0.9275,
|
10656 |
+
"step": 1521
|
10657 |
+
},
|
10658 |
+
{
|
10659 |
+
"epoch": 0.13186337152634886,
|
10660 |
+
"grad_norm": 2.8332509994506836,
|
10661 |
+
"learning_rate": 0.00017373667331195284,
|
10662 |
+
"loss": 1.2634,
|
10663 |
+
"step": 1522
|
10664 |
+
},
|
10665 |
+
{
|
10666 |
+
"epoch": 0.1319500097467998,
|
10667 |
+
"grad_norm": 2.474395751953125,
|
10668 |
+
"learning_rate": 0.0001737193377827858,
|
10669 |
+
"loss": 0.8891,
|
10670 |
+
"step": 1523
|
10671 |
+
},
|
10672 |
+
{
|
10673 |
+
"epoch": 0.13203664796725076,
|
10674 |
+
"grad_norm": 1.9100064039230347,
|
10675 |
+
"learning_rate": 0.00017370200225361878,
|
10676 |
+
"loss": 0.6819,
|
10677 |
+
"step": 1524
|
10678 |
+
},
|
10679 |
+
{
|
10680 |
+
"epoch": 0.1321232861877017,
|
10681 |
+
"grad_norm": 1.8584879636764526,
|
10682 |
+
"learning_rate": 0.00017368466672445176,
|
10683 |
+
"loss": 0.7188,
|
10684 |
+
"step": 1525
|
10685 |
+
},
|
10686 |
+
{
|
10687 |
+
"epoch": 0.13220992440815266,
|
10688 |
+
"grad_norm": 2.176227569580078,
|
10689 |
+
"learning_rate": 0.00017366733119528476,
|
10690 |
+
"loss": 0.7984,
|
10691 |
+
"step": 1526
|
10692 |
+
},
|
10693 |
+
{
|
10694 |
+
"epoch": 0.13229656262860362,
|
10695 |
+
"grad_norm": 1.7969623804092407,
|
10696 |
+
"learning_rate": 0.00017364999566611773,
|
10697 |
+
"loss": 0.7156,
|
10698 |
+
"step": 1527
|
10699 |
+
},
|
10700 |
+
{
|
10701 |
+
"epoch": 0.13238320084905456,
|
10702 |
+
"grad_norm": 1.8679330348968506,
|
10703 |
+
"learning_rate": 0.0001736326601369507,
|
10704 |
+
"loss": 0.6682,
|
10705 |
+
"step": 1528
|
10706 |
+
},
|
10707 |
+
{
|
10708 |
+
"epoch": 0.13246983906950552,
|
10709 |
+
"grad_norm": 2.120135545730591,
|
10710 |
+
"learning_rate": 0.00017361532460778367,
|
10711 |
+
"loss": 0.6567,
|
10712 |
+
"step": 1529
|
10713 |
+
},
|
10714 |
+
{
|
10715 |
+
"epoch": 0.13255647728995645,
|
10716 |
+
"grad_norm": 1.7221474647521973,
|
10717 |
+
"learning_rate": 0.00017359798907861664,
|
10718 |
+
"loss": 0.6969,
|
10719 |
+
"step": 1530
|
10720 |
+
},
|
10721 |
+
{
|
10722 |
+
"epoch": 0.13264311551040742,
|
10723 |
+
"grad_norm": 1.7574238777160645,
|
10724 |
+
"learning_rate": 0.0001735806535494496,
|
10725 |
+
"loss": 0.7448,
|
10726 |
+
"step": 1531
|
10727 |
+
},
|
10728 |
+
{
|
10729 |
+
"epoch": 0.13272975373085838,
|
10730 |
+
"grad_norm": 2.1961381435394287,
|
10731 |
+
"learning_rate": 0.00017356331802028258,
|
10732 |
+
"loss": 0.7367,
|
10733 |
+
"step": 1532
|
10734 |
+
},
|
10735 |
+
{
|
10736 |
+
"epoch": 0.13281639195130931,
|
10737 |
+
"grad_norm": 2.442263603210449,
|
10738 |
+
"learning_rate": 0.00017354598249111556,
|
10739 |
+
"loss": 0.776,
|
10740 |
+
"step": 1533
|
10741 |
+
},
|
10742 |
+
{
|
10743 |
+
"epoch": 0.13290303017176028,
|
10744 |
+
"grad_norm": 2.670750856399536,
|
10745 |
+
"learning_rate": 0.00017352864696194853,
|
10746 |
+
"loss": 1.4044,
|
10747 |
+
"step": 1534
|
10748 |
+
},
|
10749 |
+
{
|
10750 |
+
"epoch": 0.1329896683922112,
|
10751 |
+
"grad_norm": 1.8144429922103882,
|
10752 |
+
"learning_rate": 0.0001735113114327815,
|
10753 |
+
"loss": 0.6764,
|
10754 |
+
"step": 1535
|
10755 |
+
},
|
10756 |
+
{
|
10757 |
+
"epoch": 0.13307630661266218,
|
10758 |
+
"grad_norm": 3.0238986015319824,
|
10759 |
+
"learning_rate": 0.00017349397590361447,
|
10760 |
+
"loss": 1.2743,
|
10761 |
+
"step": 1536
|
10762 |
+
},
|
10763 |
+
{
|
10764 |
+
"epoch": 0.13316294483311314,
|
10765 |
+
"grad_norm": 2.5088775157928467,
|
10766 |
+
"learning_rate": 0.00017347664037444744,
|
10767 |
+
"loss": 0.8142,
|
10768 |
+
"step": 1537
|
10769 |
+
},
|
10770 |
+
{
|
10771 |
+
"epoch": 0.13324958305356407,
|
10772 |
+
"grad_norm": 1.8709009885787964,
|
10773 |
+
"learning_rate": 0.00017345930484528041,
|
10774 |
+
"loss": 0.6745,
|
10775 |
+
"step": 1538
|
10776 |
+
},
|
10777 |
+
{
|
10778 |
+
"epoch": 0.13333622127401504,
|
10779 |
+
"grad_norm": 3.4287478923797607,
|
10780 |
+
"learning_rate": 0.00017344196931611339,
|
10781 |
+
"loss": 1.5627,
|
10782 |
+
"step": 1539
|
10783 |
+
},
|
10784 |
+
{
|
10785 |
+
"epoch": 0.13342285949446597,
|
10786 |
+
"grad_norm": 1.5935015678405762,
|
10787 |
+
"learning_rate": 0.00017342463378694636,
|
10788 |
+
"loss": 0.6259,
|
10789 |
+
"step": 1540
|
10790 |
+
},
|
10791 |
+
{
|
10792 |
+
"epoch": 0.13350949771491694,
|
10793 |
+
"grad_norm": 3.7165770530700684,
|
10794 |
+
"learning_rate": 0.00017340729825777933,
|
10795 |
+
"loss": 1.5128,
|
10796 |
+
"step": 1541
|
10797 |
+
},
|
10798 |
+
{
|
10799 |
+
"epoch": 0.1335961359353679,
|
10800 |
+
"grad_norm": 3.034597873687744,
|
10801 |
+
"learning_rate": 0.0001733899627286123,
|
10802 |
+
"loss": 1.2054,
|
10803 |
+
"step": 1542
|
10804 |
+
},
|
10805 |
+
{
|
10806 |
+
"epoch": 0.13368277415581883,
|
10807 |
+
"grad_norm": 2.5076537132263184,
|
10808 |
+
"learning_rate": 0.00017337262719944527,
|
10809 |
+
"loss": 0.8369,
|
10810 |
+
"step": 1543
|
10811 |
+
},
|
10812 |
+
{
|
10813 |
+
"epoch": 0.1337694123762698,
|
10814 |
+
"grad_norm": 1.6949880123138428,
|
10815 |
+
"learning_rate": 0.00017335529167027824,
|
10816 |
+
"loss": 0.7561,
|
10817 |
+
"step": 1544
|
10818 |
+
},
|
10819 |
+
{
|
10820 |
+
"epoch": 0.13385605059672073,
|
10821 |
+
"grad_norm": 2.3536059856414795,
|
10822 |
+
"learning_rate": 0.00017333795614111122,
|
10823 |
+
"loss": 0.7838,
|
10824 |
+
"step": 1545
|
10825 |
+
},
|
10826 |
+
{
|
10827 |
+
"epoch": 0.1339426888171717,
|
10828 |
+
"grad_norm": 2.0439205169677734,
|
10829 |
+
"learning_rate": 0.0001733206206119442,
|
10830 |
+
"loss": 0.6545,
|
10831 |
+
"step": 1546
|
10832 |
+
},
|
10833 |
+
{
|
10834 |
+
"epoch": 0.13402932703762266,
|
10835 |
+
"grad_norm": 2.0284693241119385,
|
10836 |
+
"learning_rate": 0.00017330328508277716,
|
10837 |
+
"loss": 0.8174,
|
10838 |
+
"step": 1547
|
10839 |
+
},
|
10840 |
+
{
|
10841 |
+
"epoch": 0.1341159652580736,
|
10842 |
+
"grad_norm": 2.5361385345458984,
|
10843 |
+
"learning_rate": 0.00017328594955361013,
|
10844 |
+
"loss": 0.7539,
|
10845 |
+
"step": 1548
|
10846 |
+
},
|
10847 |
+
{
|
10848 |
+
"epoch": 0.13420260347852456,
|
10849 |
+
"grad_norm": 2.3491008281707764,
|
10850 |
+
"learning_rate": 0.0001732686140244431,
|
10851 |
+
"loss": 1.4804,
|
10852 |
+
"step": 1549
|
10853 |
+
},
|
10854 |
+
{
|
10855 |
+
"epoch": 0.1342892416989755,
|
10856 |
+
"grad_norm": 2.668773651123047,
|
10857 |
+
"learning_rate": 0.00017325127849527607,
|
10858 |
+
"loss": 1.1566,
|
10859 |
+
"step": 1550
|
10860 |
+
},
|
10861 |
+
{
|
10862 |
+
"epoch": 0.13437587991942646,
|
10863 |
+
"grad_norm": 1.8820674419403076,
|
10864 |
+
"learning_rate": 0.00017323394296610905,
|
10865 |
+
"loss": 0.7424,
|
10866 |
+
"step": 1551
|
10867 |
+
},
|
10868 |
+
{
|
10869 |
+
"epoch": 0.13446251813987742,
|
10870 |
+
"grad_norm": 2.9547455310821533,
|
10871 |
+
"learning_rate": 0.00017321660743694202,
|
10872 |
+
"loss": 1.0484,
|
10873 |
+
"step": 1552
|
10874 |
+
},
|
10875 |
+
{
|
10876 |
+
"epoch": 0.13454915636032835,
|
10877 |
+
"grad_norm": 1.957566261291504,
|
10878 |
+
"learning_rate": 0.000173199271907775,
|
10879 |
+
"loss": 0.6705,
|
10880 |
+
"step": 1553
|
10881 |
+
},
|
10882 |
+
{
|
10883 |
+
"epoch": 0.13463579458077932,
|
10884 |
+
"grad_norm": 2.385542631149292,
|
10885 |
+
"learning_rate": 0.00017318193637860796,
|
10886 |
+
"loss": 0.8964,
|
10887 |
+
"step": 1554
|
10888 |
+
},
|
10889 |
+
{
|
10890 |
+
"epoch": 0.13472243280123025,
|
10891 |
+
"grad_norm": 2.3893930912017822,
|
10892 |
+
"learning_rate": 0.00017316460084944093,
|
10893 |
+
"loss": 0.7353,
|
10894 |
+
"step": 1555
|
10895 |
+
},
|
10896 |
+
{
|
10897 |
+
"epoch": 0.13480907102168121,
|
10898 |
+
"grad_norm": 2.231502056121826,
|
10899 |
+
"learning_rate": 0.0001731472653202739,
|
10900 |
+
"loss": 0.6707,
|
10901 |
+
"step": 1556
|
10902 |
+
},
|
10903 |
+
{
|
10904 |
+
"epoch": 0.13489570924213218,
|
10905 |
+
"grad_norm": 1.6075713634490967,
|
10906 |
+
"learning_rate": 0.0001731299297911069,
|
10907 |
+
"loss": 0.6438,
|
10908 |
+
"step": 1557
|
10909 |
+
},
|
10910 |
+
{
|
10911 |
+
"epoch": 0.1349823474625831,
|
10912 |
+
"grad_norm": 3.559945583343506,
|
10913 |
+
"learning_rate": 0.00017311259426193987,
|
10914 |
+
"loss": 1.2579,
|
10915 |
+
"step": 1558
|
10916 |
+
},
|
10917 |
+
{
|
10918 |
+
"epoch": 0.13506898568303408,
|
10919 |
+
"grad_norm": 2.603102445602417,
|
10920 |
+
"learning_rate": 0.00017309525873277285,
|
10921 |
+
"loss": 0.8342,
|
10922 |
+
"step": 1559
|
10923 |
+
},
|
10924 |
+
{
|
10925 |
+
"epoch": 0.135155623903485,
|
10926 |
+
"grad_norm": 1.6666685342788696,
|
10927 |
+
"learning_rate": 0.00017307792320360582,
|
10928 |
+
"loss": 0.7036,
|
10929 |
+
"step": 1560
|
10930 |
+
},
|
10931 |
+
{
|
10932 |
+
"epoch": 0.13524226212393597,
|
10933 |
+
"grad_norm": 1.9931856393814087,
|
10934 |
+
"learning_rate": 0.0001730605876744388,
|
10935 |
+
"loss": 0.7024,
|
10936 |
+
"step": 1561
|
10937 |
+
},
|
10938 |
+
{
|
10939 |
+
"epoch": 0.13532890034438694,
|
10940 |
+
"grad_norm": 2.227538824081421,
|
10941 |
+
"learning_rate": 0.00017304325214527176,
|
10942 |
+
"loss": 0.6952,
|
10943 |
+
"step": 1562
|
10944 |
+
},
|
10945 |
+
{
|
10946 |
+
"epoch": 0.13541553856483787,
|
10947 |
+
"grad_norm": 3.72676944732666,
|
10948 |
+
"learning_rate": 0.00017302591661610473,
|
10949 |
+
"loss": 1.5303,
|
10950 |
+
"step": 1563
|
10951 |
+
},
|
10952 |
+
{
|
10953 |
+
"epoch": 0.13550217678528884,
|
10954 |
+
"grad_norm": 2.943199872970581,
|
10955 |
+
"learning_rate": 0.0001730085810869377,
|
10956 |
+
"loss": 0.7202,
|
10957 |
+
"step": 1564
|
10958 |
+
},
|
10959 |
+
{
|
10960 |
+
"epoch": 0.13558881500573977,
|
10961 |
+
"grad_norm": 1.9090582132339478,
|
10962 |
+
"learning_rate": 0.00017299124555777068,
|
10963 |
+
"loss": 0.689,
|
10964 |
+
"step": 1565
|
10965 |
+
},
|
10966 |
+
{
|
10967 |
+
"epoch": 0.13567545322619073,
|
10968 |
+
"grad_norm": 2.5626332759857178,
|
10969 |
+
"learning_rate": 0.00017297391002860365,
|
10970 |
+
"loss": 0.7877,
|
10971 |
+
"step": 1566
|
10972 |
+
},
|
10973 |
+
{
|
10974 |
+
"epoch": 0.1357620914466417,
|
10975 |
+
"grad_norm": 3.0278728008270264,
|
10976 |
+
"learning_rate": 0.00017295657449943662,
|
10977 |
+
"loss": 0.8433,
|
10978 |
+
"step": 1567
|
10979 |
+
},
|
10980 |
+
{
|
10981 |
+
"epoch": 0.13584872966709263,
|
10982 |
+
"grad_norm": 1.7632516622543335,
|
10983 |
+
"learning_rate": 0.0001729392389702696,
|
10984 |
+
"loss": 0.649,
|
10985 |
+
"step": 1568
|
10986 |
+
},
|
10987 |
+
{
|
10988 |
+
"epoch": 0.1359353678875436,
|
10989 |
+
"grad_norm": 2.0897111892700195,
|
10990 |
+
"learning_rate": 0.00017292190344110256,
|
10991 |
+
"loss": 0.7204,
|
10992 |
+
"step": 1569
|
10993 |
+
},
|
10994 |
+
{
|
10995 |
+
"epoch": 0.13602200610799453,
|
10996 |
+
"grad_norm": 4.0717949867248535,
|
10997 |
+
"learning_rate": 0.00017290456791193553,
|
10998 |
+
"loss": 1.1998,
|
10999 |
+
"step": 1570
|
11000 |
+
},
|
11001 |
+
{
|
11002 |
+
"epoch": 0.1361086443284455,
|
11003 |
+
"grad_norm": 2.0205678939819336,
|
11004 |
+
"learning_rate": 0.0001728872323827685,
|
11005 |
+
"loss": 0.7348,
|
11006 |
+
"step": 1571
|
11007 |
+
},
|
11008 |
+
{
|
11009 |
+
"epoch": 0.13619528254889646,
|
11010 |
+
"grad_norm": 2.433868646621704,
|
11011 |
+
"learning_rate": 0.00017286989685360148,
|
11012 |
+
"loss": 0.7763,
|
11013 |
+
"step": 1572
|
11014 |
+
},
|
11015 |
+
{
|
11016 |
+
"epoch": 0.1362819207693474,
|
11017 |
+
"grad_norm": 1.841062068939209,
|
11018 |
+
"learning_rate": 0.00017285256132443445,
|
11019 |
+
"loss": 0.6328,
|
11020 |
+
"step": 1573
|
11021 |
+
},
|
11022 |
+
{
|
11023 |
+
"epoch": 0.13636855898979836,
|
11024 |
+
"grad_norm": 2.058187961578369,
|
11025 |
+
"learning_rate": 0.00017283522579526742,
|
11026 |
+
"loss": 0.8164,
|
11027 |
+
"step": 1574
|
11028 |
+
},
|
11029 |
+
{
|
11030 |
+
"epoch": 0.1364551972102493,
|
11031 |
+
"grad_norm": 4.376664638519287,
|
11032 |
+
"learning_rate": 0.0001728178902661004,
|
11033 |
+
"loss": 1.6294,
|
11034 |
+
"step": 1575
|
11035 |
+
},
|
11036 |
+
{
|
11037 |
+
"epoch": 0.13654183543070025,
|
11038 |
+
"grad_norm": 2.4949536323547363,
|
11039 |
+
"learning_rate": 0.00017280055473693336,
|
11040 |
+
"loss": 0.7556,
|
11041 |
+
"step": 1576
|
11042 |
+
},
|
11043 |
+
{
|
11044 |
+
"epoch": 0.13662847365115122,
|
11045 |
+
"grad_norm": 3.496859312057495,
|
11046 |
+
"learning_rate": 0.00017278321920776633,
|
11047 |
+
"loss": 1.2018,
|
11048 |
+
"step": 1577
|
11049 |
+
},
|
11050 |
+
{
|
11051 |
+
"epoch": 0.13671511187160215,
|
11052 |
+
"grad_norm": 2.3041200637817383,
|
11053 |
+
"learning_rate": 0.0001727658836785993,
|
11054 |
+
"loss": 0.8249,
|
11055 |
+
"step": 1578
|
11056 |
+
},
|
11057 |
+
{
|
11058 |
+
"epoch": 0.13680175009205311,
|
11059 |
+
"grad_norm": 2.196355104446411,
|
11060 |
+
"learning_rate": 0.00017274854814943228,
|
11061 |
+
"loss": 0.6733,
|
11062 |
+
"step": 1579
|
11063 |
+
},
|
11064 |
+
{
|
11065 |
+
"epoch": 0.13688838831250405,
|
11066 |
+
"grad_norm": 1.8753951787948608,
|
11067 |
+
"learning_rate": 0.00017273121262026525,
|
11068 |
+
"loss": 0.6835,
|
11069 |
+
"step": 1580
|
11070 |
+
},
|
11071 |
+
{
|
11072 |
+
"epoch": 0.136975026532955,
|
11073 |
+
"grad_norm": 2.182445526123047,
|
11074 |
+
"learning_rate": 0.00017271387709109822,
|
11075 |
+
"loss": 0.7341,
|
11076 |
+
"step": 1581
|
11077 |
+
},
|
11078 |
+
{
|
11079 |
+
"epoch": 0.13706166475340598,
|
11080 |
+
"grad_norm": 3.20149827003479,
|
11081 |
+
"learning_rate": 0.0001726965415619312,
|
11082 |
+
"loss": 1.3818,
|
11083 |
+
"step": 1582
|
11084 |
+
},
|
11085 |
+
{
|
11086 |
+
"epoch": 0.1371483029738569,
|
11087 |
+
"grad_norm": 3.472027063369751,
|
11088 |
+
"learning_rate": 0.00017267920603276416,
|
11089 |
+
"loss": 1.3843,
|
11090 |
+
"step": 1583
|
11091 |
+
},
|
11092 |
+
{
|
11093 |
+
"epoch": 0.13723494119430787,
|
11094 |
+
"grad_norm": 1.9237418174743652,
|
11095 |
+
"learning_rate": 0.00017266187050359714,
|
11096 |
+
"loss": 1.2191,
|
11097 |
+
"step": 1584
|
11098 |
+
},
|
11099 |
+
{
|
11100 |
+
"epoch": 0.1373215794147588,
|
11101 |
+
"grad_norm": 2.5636589527130127,
|
11102 |
+
"learning_rate": 0.0001726445349744301,
|
11103 |
+
"loss": 0.8091,
|
11104 |
+
"step": 1585
|
11105 |
+
},
|
11106 |
+
{
|
11107 |
+
"epoch": 0.13740821763520977,
|
11108 |
+
"grad_norm": 2.726247787475586,
|
11109 |
+
"learning_rate": 0.00017262719944526308,
|
11110 |
+
"loss": 0.82,
|
11111 |
+
"step": 1586
|
11112 |
+
},
|
11113 |
+
{
|
11114 |
+
"epoch": 0.13749485585566074,
|
11115 |
+
"grad_norm": 3.47124981880188,
|
11116 |
+
"learning_rate": 0.00017260986391609605,
|
11117 |
+
"loss": 1.2759,
|
11118 |
+
"step": 1587
|
11119 |
+
},
|
11120 |
+
{
|
11121 |
+
"epoch": 0.13758149407611167,
|
11122 |
+
"grad_norm": 1.6789535284042358,
|
11123 |
+
"learning_rate": 0.00017259252838692902,
|
11124 |
+
"loss": 0.6117,
|
11125 |
+
"step": 1588
|
11126 |
+
},
|
11127 |
+
{
|
11128 |
+
"epoch": 0.13766813229656263,
|
11129 |
+
"grad_norm": 1.8796265125274658,
|
11130 |
+
"learning_rate": 0.000172575192857762,
|
11131 |
+
"loss": 0.6657,
|
11132 |
+
"step": 1589
|
11133 |
+
},
|
11134 |
+
{
|
11135 |
+
"epoch": 0.13775477051701357,
|
11136 |
+
"grad_norm": 2.906919240951538,
|
11137 |
+
"learning_rate": 0.00017255785732859497,
|
11138 |
+
"loss": 1.0996,
|
11139 |
+
"step": 1590
|
11140 |
+
},
|
11141 |
+
{
|
11142 |
+
"epoch": 0.13784140873746453,
|
11143 |
+
"grad_norm": 3.4165568351745605,
|
11144 |
+
"learning_rate": 0.00017254052179942794,
|
11145 |
+
"loss": 1.205,
|
11146 |
+
"step": 1591
|
11147 |
+
},
|
11148 |
+
{
|
11149 |
+
"epoch": 0.1379280469579155,
|
11150 |
+
"grad_norm": 2.392327308654785,
|
11151 |
+
"learning_rate": 0.0001725231862702609,
|
11152 |
+
"loss": 0.6988,
|
11153 |
+
"step": 1592
|
11154 |
+
},
|
11155 |
+
{
|
11156 |
+
"epoch": 0.13801468517836643,
|
11157 |
+
"grad_norm": 2.9643502235412598,
|
11158 |
+
"learning_rate": 0.00017250585074109388,
|
11159 |
+
"loss": 0.8139,
|
11160 |
+
"step": 1593
|
11161 |
+
},
|
11162 |
+
{
|
11163 |
+
"epoch": 0.1381013233988174,
|
11164 |
+
"grad_norm": 1.7248785495758057,
|
11165 |
+
"learning_rate": 0.00017248851521192685,
|
11166 |
+
"loss": 0.595,
|
11167 |
+
"step": 1594
|
11168 |
+
},
|
11169 |
+
{
|
11170 |
+
"epoch": 0.13818796161926833,
|
11171 |
+
"grad_norm": 3.861988067626953,
|
11172 |
+
"learning_rate": 0.00017247117968275982,
|
11173 |
+
"loss": 1.3043,
|
11174 |
+
"step": 1595
|
11175 |
+
},
|
11176 |
+
{
|
11177 |
+
"epoch": 0.1382745998397193,
|
11178 |
+
"grad_norm": 2.2599592208862305,
|
11179 |
+
"learning_rate": 0.0001724538441535928,
|
11180 |
+
"loss": 0.6611,
|
11181 |
+
"step": 1596
|
11182 |
+
},
|
11183 |
+
{
|
11184 |
+
"epoch": 0.13836123806017026,
|
11185 |
+
"grad_norm": 2.093370199203491,
|
11186 |
+
"learning_rate": 0.00017243650862442577,
|
11187 |
+
"loss": 0.895,
|
11188 |
+
"step": 1597
|
11189 |
+
},
|
11190 |
+
{
|
11191 |
+
"epoch": 0.1384478762806212,
|
11192 |
+
"grad_norm": 2.492495536804199,
|
11193 |
+
"learning_rate": 0.00017241917309525874,
|
11194 |
+
"loss": 0.6792,
|
11195 |
+
"step": 1598
|
11196 |
+
},
|
11197 |
+
{
|
11198 |
+
"epoch": 0.13853451450107215,
|
11199 |
+
"grad_norm": 1.8195209503173828,
|
11200 |
+
"learning_rate": 0.0001724018375660917,
|
11201 |
+
"loss": 0.6447,
|
11202 |
+
"step": 1599
|
11203 |
+
},
|
11204 |
+
{
|
11205 |
+
"epoch": 0.1386211527215231,
|
11206 |
+
"grad_norm": 3.280806064605713,
|
11207 |
+
"learning_rate": 0.00017238450203692468,
|
11208 |
+
"loss": 0.7211,
|
11209 |
+
"step": 1600
|
11210 |
+
},
|
11211 |
+
{
|
11212 |
+
"epoch": 0.13870779094197405,
|
11213 |
+
"grad_norm": 2.19403076171875,
|
11214 |
+
"learning_rate": 0.00017236716650775765,
|
11215 |
+
"loss": 0.7823,
|
11216 |
+
"step": 1601
|
11217 |
+
},
|
11218 |
+
{
|
11219 |
+
"epoch": 0.13879442916242501,
|
11220 |
+
"grad_norm": 2.3731212615966797,
|
11221 |
+
"learning_rate": 0.00017234983097859062,
|
11222 |
+
"loss": 0.6725,
|
11223 |
+
"step": 1602
|
11224 |
+
},
|
11225 |
+
{
|
11226 |
+
"epoch": 0.13888106738287595,
|
11227 |
+
"grad_norm": 1.9377033710479736,
|
11228 |
+
"learning_rate": 0.0001723324954494236,
|
11229 |
+
"loss": 0.7742,
|
11230 |
+
"step": 1603
|
11231 |
+
},
|
11232 |
+
{
|
11233 |
+
"epoch": 0.1389677056033269,
|
11234 |
+
"grad_norm": 2.1013801097869873,
|
11235 |
+
"learning_rate": 0.00017231515992025657,
|
11236 |
+
"loss": 0.8131,
|
11237 |
+
"step": 1604
|
11238 |
+
},
|
11239 |
+
{
|
11240 |
+
"epoch": 0.13905434382377785,
|
11241 |
+
"grad_norm": 2.090787649154663,
|
11242 |
+
"learning_rate": 0.00017229782439108954,
|
11243 |
+
"loss": 0.7698,
|
11244 |
+
"step": 1605
|
11245 |
+
},
|
11246 |
+
{
|
11247 |
+
"epoch": 0.1391409820442288,
|
11248 |
+
"grad_norm": 2.025236129760742,
|
11249 |
+
"learning_rate": 0.0001722804888619225,
|
11250 |
+
"loss": 0.7863,
|
11251 |
+
"step": 1606
|
11252 |
+
},
|
11253 |
+
{
|
11254 |
+
"epoch": 0.13922762026467977,
|
11255 |
+
"grad_norm": 1.9263955354690552,
|
11256 |
+
"learning_rate": 0.00017226315333275548,
|
11257 |
+
"loss": 0.7203,
|
11258 |
+
"step": 1607
|
11259 |
+
},
|
11260 |
+
{
|
11261 |
+
"epoch": 0.1393142584851307,
|
11262 |
+
"grad_norm": 1.787283182144165,
|
11263 |
+
"learning_rate": 0.00017224581780358845,
|
11264 |
+
"loss": 0.5773,
|
11265 |
+
"step": 1608
|
11266 |
+
},
|
11267 |
+
{
|
11268 |
+
"epoch": 0.13940089670558167,
|
11269 |
+
"grad_norm": 2.3564138412475586,
|
11270 |
+
"learning_rate": 0.00017222848227442143,
|
11271 |
+
"loss": 0.7371,
|
11272 |
+
"step": 1609
|
11273 |
+
},
|
11274 |
+
{
|
11275 |
+
"epoch": 0.1394875349260326,
|
11276 |
+
"grad_norm": 2.6562459468841553,
|
11277 |
+
"learning_rate": 0.0001722111467452544,
|
11278 |
+
"loss": 1.0158,
|
11279 |
+
"step": 1610
|
11280 |
+
},
|
11281 |
+
{
|
11282 |
+
"epoch": 0.13957417314648357,
|
11283 |
+
"grad_norm": 2.0607364177703857,
|
11284 |
+
"learning_rate": 0.00017219381121608737,
|
11285 |
+
"loss": 0.745,
|
11286 |
+
"step": 1611
|
11287 |
+
},
|
11288 |
+
{
|
11289 |
+
"epoch": 0.13966081136693453,
|
11290 |
+
"grad_norm": 2.7639501094818115,
|
11291 |
+
"learning_rate": 0.00017217647568692034,
|
11292 |
+
"loss": 1.5551,
|
11293 |
+
"step": 1612
|
11294 |
+
},
|
11295 |
+
{
|
11296 |
+
"epoch": 0.13974744958738547,
|
11297 |
+
"grad_norm": 2.708682060241699,
|
11298 |
+
"learning_rate": 0.0001721591401577533,
|
11299 |
+
"loss": 0.7651,
|
11300 |
+
"step": 1613
|
11301 |
+
},
|
11302 |
+
{
|
11303 |
+
"epoch": 0.13983408780783643,
|
11304 |
+
"grad_norm": 2.290919303894043,
|
11305 |
+
"learning_rate": 0.00017214180462858628,
|
11306 |
+
"loss": 0.74,
|
11307 |
+
"step": 1614
|
11308 |
+
},
|
11309 |
+
{
|
11310 |
+
"epoch": 0.13992072602828737,
|
11311 |
+
"grad_norm": 3.2296056747436523,
|
11312 |
+
"learning_rate": 0.00017212446909941926,
|
11313 |
+
"loss": 1.1724,
|
11314 |
+
"step": 1615
|
11315 |
+
},
|
11316 |
+
{
|
11317 |
+
"epoch": 0.14000736424873833,
|
11318 |
+
"grad_norm": 2.4242615699768066,
|
11319 |
+
"learning_rate": 0.00017210713357025223,
|
11320 |
+
"loss": 0.7549,
|
11321 |
+
"step": 1616
|
11322 |
+
},
|
11323 |
+
{
|
11324 |
+
"epoch": 0.1400940024691893,
|
11325 |
+
"grad_norm": 2.313873529434204,
|
11326 |
+
"learning_rate": 0.0001720897980410852,
|
11327 |
+
"loss": 0.9087,
|
11328 |
+
"step": 1617
|
11329 |
+
},
|
11330 |
+
{
|
11331 |
+
"epoch": 0.14018064068964023,
|
11332 |
+
"grad_norm": 1.5456923246383667,
|
11333 |
+
"learning_rate": 0.00017207246251191817,
|
11334 |
+
"loss": 0.6511,
|
11335 |
+
"step": 1618
|
11336 |
+
},
|
11337 |
+
{
|
11338 |
+
"epoch": 0.1402672789100912,
|
11339 |
+
"grad_norm": 3.2778632640838623,
|
11340 |
+
"learning_rate": 0.00017205512698275114,
|
11341 |
+
"loss": 1.2691,
|
11342 |
+
"step": 1619
|
11343 |
+
},
|
11344 |
+
{
|
11345 |
+
"epoch": 0.14035391713054213,
|
11346 |
+
"grad_norm": 2.053410530090332,
|
11347 |
+
"learning_rate": 0.00017203779145358411,
|
11348 |
+
"loss": 0.6909,
|
11349 |
+
"step": 1620
|
11350 |
+
},
|
11351 |
+
{
|
11352 |
+
"epoch": 0.1404405553509931,
|
11353 |
+
"grad_norm": 2.050614595413208,
|
11354 |
+
"learning_rate": 0.0001720204559244171,
|
11355 |
+
"loss": 0.7196,
|
11356 |
+
"step": 1621
|
11357 |
+
},
|
11358 |
+
{
|
11359 |
+
"epoch": 0.14052719357144405,
|
11360 |
+
"grad_norm": 1.9475194215774536,
|
11361 |
+
"learning_rate": 0.00017200312039525008,
|
11362 |
+
"loss": 0.6709,
|
11363 |
+
"step": 1622
|
11364 |
+
},
|
11365 |
+
{
|
11366 |
+
"epoch": 0.140613831791895,
|
11367 |
+
"grad_norm": 3.8903300762176514,
|
11368 |
+
"learning_rate": 0.00017198578486608306,
|
11369 |
+
"loss": 1.2688,
|
11370 |
+
"step": 1623
|
11371 |
+
},
|
11372 |
+
{
|
11373 |
+
"epoch": 0.14070047001234595,
|
11374 |
+
"grad_norm": 1.5618529319763184,
|
11375 |
+
"learning_rate": 0.00017196844933691603,
|
11376 |
+
"loss": 0.5948,
|
11377 |
+
"step": 1624
|
11378 |
+
},
|
11379 |
+
{
|
11380 |
+
"epoch": 0.1407871082327969,
|
11381 |
+
"grad_norm": 2.4682962894439697,
|
11382 |
+
"learning_rate": 0.000171951113807749,
|
11383 |
+
"loss": 0.7977,
|
11384 |
+
"step": 1625
|
11385 |
+
},
|
11386 |
+
{
|
11387 |
+
"epoch": 0.14087374645324785,
|
11388 |
+
"grad_norm": 3.0988516807556152,
|
11389 |
+
"learning_rate": 0.00017193377827858197,
|
11390 |
+
"loss": 0.7089,
|
11391 |
+
"step": 1626
|
11392 |
+
},
|
11393 |
+
{
|
11394 |
+
"epoch": 0.1409603846736988,
|
11395 |
+
"grad_norm": 3.128073215484619,
|
11396 |
+
"learning_rate": 0.00017191644274941494,
|
11397 |
+
"loss": 1.4088,
|
11398 |
+
"step": 1627
|
11399 |
+
},
|
11400 |
+
{
|
11401 |
+
"epoch": 0.14104702289414975,
|
11402 |
+
"grad_norm": 2.1503820419311523,
|
11403 |
+
"learning_rate": 0.00017189910722024791,
|
11404 |
+
"loss": 0.6885,
|
11405 |
+
"step": 1628
|
11406 |
+
},
|
11407 |
+
{
|
11408 |
+
"epoch": 0.1411336611146007,
|
11409 |
+
"grad_norm": 2.601734161376953,
|
11410 |
+
"learning_rate": 0.00017188177169108089,
|
11411 |
+
"loss": 0.748,
|
11412 |
+
"step": 1629
|
11413 |
+
},
|
11414 |
+
{
|
11415 |
+
"epoch": 0.14122029933505165,
|
11416 |
+
"grad_norm": 3.7015883922576904,
|
11417 |
+
"learning_rate": 0.00017186443616191386,
|
11418 |
+
"loss": 1.1446,
|
11419 |
+
"step": 1630
|
11420 |
+
},
|
11421 |
+
{
|
11422 |
+
"epoch": 0.1413069375555026,
|
11423 |
+
"grad_norm": 3.4025862216949463,
|
11424 |
+
"learning_rate": 0.00017184710063274683,
|
11425 |
+
"loss": 0.987,
|
11426 |
+
"step": 1631
|
11427 |
+
},
|
11428 |
+
{
|
11429 |
+
"epoch": 0.14139357577595357,
|
11430 |
+
"grad_norm": 3.4777488708496094,
|
11431 |
+
"learning_rate": 0.0001718297651035798,
|
11432 |
+
"loss": 1.333,
|
11433 |
+
"step": 1632
|
11434 |
+
},
|
11435 |
+
{
|
11436 |
+
"epoch": 0.1414802139964045,
|
11437 |
+
"grad_norm": 2.13856840133667,
|
11438 |
+
"learning_rate": 0.00017181242957441277,
|
11439 |
+
"loss": 0.805,
|
11440 |
+
"step": 1633
|
11441 |
+
},
|
11442 |
+
{
|
11443 |
+
"epoch": 0.14156685221685547,
|
11444 |
+
"grad_norm": 3.1119301319122314,
|
11445 |
+
"learning_rate": 0.00017179509404524574,
|
11446 |
+
"loss": 1.2745,
|
11447 |
+
"step": 1634
|
11448 |
+
},
|
11449 |
+
{
|
11450 |
+
"epoch": 0.1416534904373064,
|
11451 |
+
"grad_norm": 2.9013967514038086,
|
11452 |
+
"learning_rate": 0.00017177775851607872,
|
11453 |
+
"loss": 0.7214,
|
11454 |
+
"step": 1635
|
11455 |
+
},
|
11456 |
+
{
|
11457 |
+
"epoch": 0.14174012865775737,
|
11458 |
+
"grad_norm": 2.842689037322998,
|
11459 |
+
"learning_rate": 0.0001717604229869117,
|
11460 |
+
"loss": 0.9115,
|
11461 |
+
"step": 1636
|
11462 |
+
},
|
11463 |
+
{
|
11464 |
+
"epoch": 0.14182676687820833,
|
11465 |
+
"grad_norm": 2.2313756942749023,
|
11466 |
+
"learning_rate": 0.00017174308745774466,
|
11467 |
+
"loss": 0.7886,
|
11468 |
+
"step": 1637
|
11469 |
+
},
|
11470 |
+
{
|
11471 |
+
"epoch": 0.14191340509865927,
|
11472 |
+
"grad_norm": 2.7678327560424805,
|
11473 |
+
"learning_rate": 0.00017172575192857763,
|
11474 |
+
"loss": 0.8599,
|
11475 |
+
"step": 1638
|
11476 |
+
},
|
11477 |
+
{
|
11478 |
+
"epoch": 0.14200004331911023,
|
11479 |
+
"grad_norm": 2.236875534057617,
|
11480 |
+
"learning_rate": 0.0001717084163994106,
|
11481 |
+
"loss": 0.7208,
|
11482 |
+
"step": 1639
|
11483 |
+
},
|
11484 |
+
{
|
11485 |
+
"epoch": 0.14208668153956117,
|
11486 |
+
"grad_norm": 1.792824149131775,
|
11487 |
+
"learning_rate": 0.00017169108087024357,
|
11488 |
+
"loss": 0.5996,
|
11489 |
+
"step": 1640
|
11490 |
+
},
|
11491 |
+
{
|
11492 |
+
"epoch": 0.14217331976001213,
|
11493 |
+
"grad_norm": 2.282302141189575,
|
11494 |
+
"learning_rate": 0.00017167374534107654,
|
11495 |
+
"loss": 0.7595,
|
11496 |
+
"step": 1641
|
11497 |
+
},
|
11498 |
+
{
|
11499 |
+
"epoch": 0.1422599579804631,
|
11500 |
+
"grad_norm": 1.7437708377838135,
|
11501 |
+
"learning_rate": 0.00017165640981190952,
|
11502 |
+
"loss": 0.6601,
|
11503 |
+
"step": 1642
|
11504 |
+
},
|
11505 |
+
{
|
11506 |
+
"epoch": 0.14234659620091403,
|
11507 |
+
"grad_norm": 2.5336437225341797,
|
11508 |
+
"learning_rate": 0.0001716390742827425,
|
11509 |
+
"loss": 0.7475,
|
11510 |
+
"step": 1643
|
11511 |
+
},
|
11512 |
+
{
|
11513 |
+
"epoch": 0.142433234421365,
|
11514 |
+
"grad_norm": 2.582460641860962,
|
11515 |
+
"learning_rate": 0.00017162173875357546,
|
11516 |
+
"loss": 0.8142,
|
11517 |
+
"step": 1644
|
11518 |
+
},
|
11519 |
+
{
|
11520 |
+
"epoch": 0.14251987264181593,
|
11521 |
+
"grad_norm": 3.1204535961151123,
|
11522 |
+
"learning_rate": 0.00017160440322440843,
|
11523 |
+
"loss": 0.9999,
|
11524 |
+
"step": 1645
|
11525 |
+
},
|
11526 |
+
{
|
11527 |
+
"epoch": 0.1426065108622669,
|
11528 |
+
"grad_norm": 2.7266287803649902,
|
11529 |
+
"learning_rate": 0.0001715870676952414,
|
11530 |
+
"loss": 1.2484,
|
11531 |
+
"step": 1646
|
11532 |
+
},
|
11533 |
+
{
|
11534 |
+
"epoch": 0.14269314908271785,
|
11535 |
+
"grad_norm": 2.1008055210113525,
|
11536 |
+
"learning_rate": 0.00017156973216607437,
|
11537 |
+
"loss": 0.7352,
|
11538 |
+
"step": 1647
|
11539 |
+
},
|
11540 |
+
{
|
11541 |
+
"epoch": 0.1427797873031688,
|
11542 |
+
"grad_norm": 2.0627474784851074,
|
11543 |
+
"learning_rate": 0.00017155239663690735,
|
11544 |
+
"loss": 0.7025,
|
11545 |
+
"step": 1648
|
11546 |
+
},
|
11547 |
+
{
|
11548 |
+
"epoch": 0.14286642552361975,
|
11549 |
+
"grad_norm": 2.3985772132873535,
|
11550 |
+
"learning_rate": 0.00017153506110774032,
|
11551 |
+
"loss": 0.8061,
|
11552 |
+
"step": 1649
|
11553 |
+
},
|
11554 |
+
{
|
11555 |
+
"epoch": 0.14295306374407069,
|
11556 |
+
"grad_norm": 3.7956619262695312,
|
11557 |
+
"learning_rate": 0.0001715177255785733,
|
11558 |
+
"loss": 1.2867,
|
11559 |
+
"step": 1650
|
11560 |
+
},
|
11561 |
+
{
|
11562 |
+
"epoch": 0.14303970196452165,
|
11563 |
+
"grad_norm": 2.972729444503784,
|
11564 |
+
"learning_rate": 0.00017150039004940626,
|
11565 |
+
"loss": 0.7314,
|
11566 |
+
"step": 1651
|
11567 |
+
},
|
11568 |
+
{
|
11569 |
+
"epoch": 0.1431263401849726,
|
11570 |
+
"grad_norm": 2.8962388038635254,
|
11571 |
+
"learning_rate": 0.00017148305452023923,
|
11572 |
+
"loss": 0.7007,
|
11573 |
+
"step": 1652
|
11574 |
+
},
|
11575 |
+
{
|
11576 |
+
"epoch": 0.14321297840542355,
|
11577 |
+
"grad_norm": 2.368075370788574,
|
11578 |
+
"learning_rate": 0.00017146571899107223,
|
11579 |
+
"loss": 0.8282,
|
11580 |
+
"step": 1653
|
11581 |
+
},
|
11582 |
+
{
|
11583 |
+
"epoch": 0.1432996166258745,
|
11584 |
+
"grad_norm": 2.195873975753784,
|
11585 |
+
"learning_rate": 0.0001714483834619052,
|
11586 |
+
"loss": 0.7086,
|
11587 |
+
"step": 1654
|
11588 |
+
},
|
11589 |
+
{
|
11590 |
+
"epoch": 0.14338625484632544,
|
11591 |
+
"grad_norm": 2.4867501258850098,
|
11592 |
+
"learning_rate": 0.00017143104793273817,
|
11593 |
+
"loss": 0.9421,
|
11594 |
+
"step": 1655
|
11595 |
+
},
|
11596 |
+
{
|
11597 |
+
"epoch": 0.1434728930667764,
|
11598 |
+
"grad_norm": 2.228952407836914,
|
11599 |
+
"learning_rate": 0.00017141371240357115,
|
11600 |
+
"loss": 0.828,
|
11601 |
+
"step": 1656
|
11602 |
+
},
|
11603 |
+
{
|
11604 |
+
"epoch": 0.14355953128722737,
|
11605 |
+
"grad_norm": 2.687671661376953,
|
11606 |
+
"learning_rate": 0.00017139637687440412,
|
11607 |
+
"loss": 1.1883,
|
11608 |
+
"step": 1657
|
11609 |
+
},
|
11610 |
+
{
|
11611 |
+
"epoch": 0.1436461695076783,
|
11612 |
+
"grad_norm": 2.08331036567688,
|
11613 |
+
"learning_rate": 0.0001713790413452371,
|
11614 |
+
"loss": 0.6826,
|
11615 |
+
"step": 1658
|
11616 |
+
},
|
11617 |
+
{
|
11618 |
+
"epoch": 0.14373280772812927,
|
11619 |
+
"grad_norm": 2.9926743507385254,
|
11620 |
+
"learning_rate": 0.00017136170581607006,
|
11621 |
+
"loss": 1.4945,
|
11622 |
+
"step": 1659
|
11623 |
+
},
|
11624 |
+
{
|
11625 |
+
"epoch": 0.1438194459485802,
|
11626 |
+
"grad_norm": 1.9897973537445068,
|
11627 |
+
"learning_rate": 0.00017134437028690303,
|
11628 |
+
"loss": 0.6657,
|
11629 |
+
"step": 1660
|
11630 |
+
},
|
11631 |
+
{
|
11632 |
+
"epoch": 0.14390608416903117,
|
11633 |
+
"grad_norm": 1.9877430200576782,
|
11634 |
+
"learning_rate": 0.000171327034757736,
|
11635 |
+
"loss": 0.7568,
|
11636 |
+
"step": 1661
|
11637 |
+
},
|
11638 |
+
{
|
11639 |
+
"epoch": 0.14399272238948213,
|
11640 |
+
"grad_norm": 1.7816320657730103,
|
11641 |
+
"learning_rate": 0.00017130969922856898,
|
11642 |
+
"loss": 0.6237,
|
11643 |
+
"step": 1662
|
11644 |
+
},
|
11645 |
+
{
|
11646 |
+
"epoch": 0.14407936060993307,
|
11647 |
+
"grad_norm": 2.3789243698120117,
|
11648 |
+
"learning_rate": 0.00017129236369940195,
|
11649 |
+
"loss": 0.7238,
|
11650 |
+
"step": 1663
|
11651 |
+
},
|
11652 |
+
{
|
11653 |
+
"epoch": 0.14416599883038403,
|
11654 |
+
"grad_norm": 3.195343017578125,
|
11655 |
+
"learning_rate": 0.00017127502817023492,
|
11656 |
+
"loss": 1.1631,
|
11657 |
+
"step": 1664
|
11658 |
+
},
|
11659 |
+
{
|
11660 |
+
"epoch": 0.14425263705083496,
|
11661 |
+
"grad_norm": 1.7618796825408936,
|
11662 |
+
"learning_rate": 0.0001712576926410679,
|
11663 |
+
"loss": 0.6122,
|
11664 |
+
"step": 1665
|
11665 |
+
},
|
11666 |
+
{
|
11667 |
+
"epoch": 0.14433927527128593,
|
11668 |
+
"grad_norm": 1.8592466115951538,
|
11669 |
+
"learning_rate": 0.00017124035711190086,
|
11670 |
+
"loss": 0.6109,
|
11671 |
+
"step": 1666
|
11672 |
+
},
|
11673 |
+
{
|
11674 |
+
"epoch": 0.1444259134917369,
|
11675 |
+
"grad_norm": 1.7936112880706787,
|
11676 |
+
"learning_rate": 0.00017122302158273383,
|
11677 |
+
"loss": 0.6588,
|
11678 |
+
"step": 1667
|
11679 |
+
},
|
11680 |
+
{
|
11681 |
+
"epoch": 0.14451255171218783,
|
11682 |
+
"grad_norm": 2.9144630432128906,
|
11683 |
+
"learning_rate": 0.0001712056860535668,
|
11684 |
+
"loss": 0.8787,
|
11685 |
+
"step": 1668
|
11686 |
+
},
|
11687 |
+
{
|
11688 |
+
"epoch": 0.1445991899326388,
|
11689 |
+
"grad_norm": 3.0413758754730225,
|
11690 |
+
"learning_rate": 0.00017118835052439978,
|
11691 |
+
"loss": 1.4111,
|
11692 |
+
"step": 1669
|
11693 |
+
},
|
11694 |
+
{
|
11695 |
+
"epoch": 0.14468582815308972,
|
11696 |
+
"grad_norm": 2.5947086811065674,
|
11697 |
+
"learning_rate": 0.00017117101499523275,
|
11698 |
+
"loss": 1.4285,
|
11699 |
+
"step": 1670
|
11700 |
+
},
|
11701 |
+
{
|
11702 |
+
"epoch": 0.1447724663735407,
|
11703 |
+
"grad_norm": 3.6176469326019287,
|
11704 |
+
"learning_rate": 0.00017115367946606572,
|
11705 |
+
"loss": 1.2737,
|
11706 |
+
"step": 1671
|
11707 |
+
},
|
11708 |
+
{
|
11709 |
+
"epoch": 0.14485910459399165,
|
11710 |
+
"grad_norm": 2.415332555770874,
|
11711 |
+
"learning_rate": 0.0001711363439368987,
|
11712 |
+
"loss": 0.8033,
|
11713 |
+
"step": 1672
|
11714 |
+
},
|
11715 |
+
{
|
11716 |
+
"epoch": 0.14494574281444259,
|
11717 |
+
"grad_norm": 2.4042274951934814,
|
11718 |
+
"learning_rate": 0.00017111900840773166,
|
11719 |
+
"loss": 0.7377,
|
11720 |
+
"step": 1673
|
11721 |
+
},
|
11722 |
+
{
|
11723 |
+
"epoch": 0.14503238103489355,
|
11724 |
+
"grad_norm": 1.9535367488861084,
|
11725 |
+
"learning_rate": 0.00017110167287856464,
|
11726 |
+
"loss": 0.6854,
|
11727 |
+
"step": 1674
|
11728 |
+
},
|
11729 |
+
{
|
11730 |
+
"epoch": 0.14511901925534448,
|
11731 |
+
"grad_norm": 2.505851984024048,
|
11732 |
+
"learning_rate": 0.0001710843373493976,
|
11733 |
+
"loss": 0.5743,
|
11734 |
+
"step": 1675
|
11735 |
+
},
|
11736 |
+
{
|
11737 |
+
"epoch": 0.14520565747579545,
|
11738 |
+
"grad_norm": 2.7805051803588867,
|
11739 |
+
"learning_rate": 0.00017106700182023058,
|
11740 |
+
"loss": 1.2287,
|
11741 |
+
"step": 1676
|
11742 |
+
},
|
11743 |
+
{
|
11744 |
+
"epoch": 0.1452922956962464,
|
11745 |
+
"grad_norm": 1.95442533493042,
|
11746 |
+
"learning_rate": 0.00017104966629106355,
|
11747 |
+
"loss": 0.6902,
|
11748 |
+
"step": 1677
|
11749 |
+
},
|
11750 |
+
{
|
11751 |
+
"epoch": 0.14537893391669734,
|
11752 |
+
"grad_norm": 1.672817349433899,
|
11753 |
+
"learning_rate": 0.00017103233076189652,
|
11754 |
+
"loss": 0.5887,
|
11755 |
+
"step": 1678
|
11756 |
+
},
|
11757 |
+
{
|
11758 |
+
"epoch": 0.1454655721371483,
|
11759 |
+
"grad_norm": 1.821891188621521,
|
11760 |
+
"learning_rate": 0.0001710149952327295,
|
11761 |
+
"loss": 0.6593,
|
11762 |
+
"step": 1679
|
11763 |
+
},
|
11764 |
+
{
|
11765 |
+
"epoch": 0.14555221035759924,
|
11766 |
+
"grad_norm": 2.1559698581695557,
|
11767 |
+
"learning_rate": 0.00017099765970356246,
|
11768 |
+
"loss": 0.7672,
|
11769 |
+
"step": 1680
|
11770 |
+
},
|
11771 |
+
{
|
11772 |
+
"epoch": 0.1456388485780502,
|
11773 |
+
"grad_norm": 1.8534151315689087,
|
11774 |
+
"learning_rate": 0.00017098032417439544,
|
11775 |
+
"loss": 0.7136,
|
11776 |
+
"step": 1681
|
11777 |
+
},
|
11778 |
+
{
|
11779 |
+
"epoch": 0.14572548679850117,
|
11780 |
+
"grad_norm": 3.7335124015808105,
|
11781 |
+
"learning_rate": 0.0001709629886452284,
|
11782 |
+
"loss": 1.4268,
|
11783 |
+
"step": 1682
|
11784 |
+
},
|
11785 |
+
{
|
11786 |
+
"epoch": 0.1458121250189521,
|
11787 |
+
"grad_norm": 1.8088746070861816,
|
11788 |
+
"learning_rate": 0.00017094565311606138,
|
11789 |
+
"loss": 0.6658,
|
11790 |
+
"step": 1683
|
11791 |
+
},
|
11792 |
+
{
|
11793 |
+
"epoch": 0.14589876323940307,
|
11794 |
+
"grad_norm": 2.42153000831604,
|
11795 |
+
"learning_rate": 0.00017092831758689432,
|
11796 |
+
"loss": 0.8545,
|
11797 |
+
"step": 1684
|
11798 |
+
},
|
11799 |
+
{
|
11800 |
+
"epoch": 0.145985401459854,
|
11801 |
+
"grad_norm": 1.8992491960525513,
|
11802 |
+
"learning_rate": 0.00017091098205772732,
|
11803 |
+
"loss": 0.7,
|
11804 |
+
"step": 1685
|
11805 |
+
},
|
11806 |
+
{
|
11807 |
+
"epoch": 0.14607203968030497,
|
11808 |
+
"grad_norm": 3.7487382888793945,
|
11809 |
+
"learning_rate": 0.0001708936465285603,
|
11810 |
+
"loss": 1.4693,
|
11811 |
+
"step": 1686
|
11812 |
+
},
|
11813 |
+
{
|
11814 |
+
"epoch": 0.14615867790075593,
|
11815 |
+
"grad_norm": 4.550262928009033,
|
11816 |
+
"learning_rate": 0.00017087631099939327,
|
11817 |
+
"loss": 1.0104,
|
11818 |
+
"step": 1687
|
11819 |
+
},
|
11820 |
+
{
|
11821 |
+
"epoch": 0.14624531612120686,
|
11822 |
+
"grad_norm": 3.737248182296753,
|
11823 |
+
"learning_rate": 0.00017085897547022624,
|
11824 |
+
"loss": 1.5394,
|
11825 |
+
"step": 1688
|
11826 |
+
},
|
11827 |
+
{
|
11828 |
+
"epoch": 0.14633195434165783,
|
11829 |
+
"grad_norm": 1.9069528579711914,
|
11830 |
+
"learning_rate": 0.0001708416399410592,
|
11831 |
+
"loss": 0.7532,
|
11832 |
+
"step": 1689
|
11833 |
+
},
|
11834 |
+
{
|
11835 |
+
"epoch": 0.14641859256210876,
|
11836 |
+
"grad_norm": 1.7625000476837158,
|
11837 |
+
"learning_rate": 0.00017082430441189218,
|
11838 |
+
"loss": 0.7417,
|
11839 |
+
"step": 1690
|
11840 |
+
},
|
11841 |
+
{
|
11842 |
+
"epoch": 0.14650523078255973,
|
11843 |
+
"grad_norm": 3.8548965454101562,
|
11844 |
+
"learning_rate": 0.00017080696888272515,
|
11845 |
+
"loss": 1.393,
|
11846 |
+
"step": 1691
|
11847 |
+
},
|
11848 |
+
{
|
11849 |
+
"epoch": 0.1465918690030107,
|
11850 |
+
"grad_norm": 2.1203994750976562,
|
11851 |
+
"learning_rate": 0.00017078963335355812,
|
11852 |
+
"loss": 0.7264,
|
11853 |
+
"step": 1692
|
11854 |
+
},
|
11855 |
+
{
|
11856 |
+
"epoch": 0.14667850722346162,
|
11857 |
+
"grad_norm": 2.462294340133667,
|
11858 |
+
"learning_rate": 0.0001707722978243911,
|
11859 |
+
"loss": 0.7237,
|
11860 |
+
"step": 1693
|
11861 |
+
},
|
11862 |
+
{
|
11863 |
+
"epoch": 0.1467651454439126,
|
11864 |
+
"grad_norm": 2.781794309616089,
|
11865 |
+
"learning_rate": 0.00017075496229522407,
|
11866 |
+
"loss": 1.292,
|
11867 |
+
"step": 1694
|
11868 |
+
},
|
11869 |
+
{
|
11870 |
+
"epoch": 0.14685178366436352,
|
11871 |
+
"grad_norm": 1.7303260564804077,
|
11872 |
+
"learning_rate": 0.00017073762676605704,
|
11873 |
+
"loss": 0.6188,
|
11874 |
+
"step": 1695
|
11875 |
+
},
|
11876 |
+
{
|
11877 |
+
"epoch": 0.14693842188481449,
|
11878 |
+
"grad_norm": 1.9921581745147705,
|
11879 |
+
"learning_rate": 0.00017072029123689,
|
11880 |
+
"loss": 0.5657,
|
11881 |
+
"step": 1696
|
11882 |
+
},
|
11883 |
+
{
|
11884 |
+
"epoch": 0.14702506010526545,
|
11885 |
+
"grad_norm": 2.3421790599823,
|
11886 |
+
"learning_rate": 0.00017070295570772298,
|
11887 |
+
"loss": 0.7629,
|
11888 |
+
"step": 1697
|
11889 |
+
},
|
11890 |
+
{
|
11891 |
+
"epoch": 0.14711169832571638,
|
11892 |
+
"grad_norm": 1.648913860321045,
|
11893 |
+
"learning_rate": 0.00017068562017855595,
|
11894 |
+
"loss": 0.6006,
|
11895 |
+
"step": 1698
|
11896 |
+
},
|
11897 |
+
{
|
11898 |
+
"epoch": 0.14719833654616735,
|
11899 |
+
"grad_norm": 2.2114713191986084,
|
11900 |
+
"learning_rate": 0.00017066828464938893,
|
11901 |
+
"loss": 0.6381,
|
11902 |
+
"step": 1699
|
11903 |
+
},
|
11904 |
+
{
|
11905 |
+
"epoch": 0.14728497476661828,
|
11906 |
+
"grad_norm": 1.968942642211914,
|
11907 |
+
"learning_rate": 0.0001706509491202219,
|
11908 |
+
"loss": 0.8233,
|
11909 |
+
"step": 1700
|
11910 |
+
},
|
11911 |
+
{
|
11912 |
+
"epoch": 0.14737161298706924,
|
11913 |
+
"grad_norm": 3.240399122238159,
|
11914 |
+
"learning_rate": 0.00017063361359105487,
|
11915 |
+
"loss": 1.289,
|
11916 |
+
"step": 1701
|
11917 |
+
},
|
11918 |
+
{
|
11919 |
+
"epoch": 0.1474582512075202,
|
11920 |
+
"grad_norm": 2.444948196411133,
|
11921 |
+
"learning_rate": 0.00017061627806188784,
|
11922 |
+
"loss": 0.8681,
|
11923 |
+
"step": 1702
|
11924 |
+
},
|
11925 |
+
{
|
11926 |
+
"epoch": 0.14754488942797114,
|
11927 |
+
"grad_norm": 3.0092973709106445,
|
11928 |
+
"learning_rate": 0.0001705989425327208,
|
11929 |
+
"loss": 0.8336,
|
11930 |
+
"step": 1703
|
11931 |
+
},
|
11932 |
+
{
|
11933 |
+
"epoch": 0.1476315276484221,
|
11934 |
+
"grad_norm": 2.308622360229492,
|
11935 |
+
"learning_rate": 0.00017058160700355378,
|
11936 |
+
"loss": 0.6849,
|
11937 |
+
"step": 1704
|
11938 |
+
},
|
11939 |
+
{
|
11940 |
+
"epoch": 0.14771816586887304,
|
11941 |
+
"grad_norm": 2.1733205318450928,
|
11942 |
+
"learning_rate": 0.00017056427147438676,
|
11943 |
+
"loss": 0.7589,
|
11944 |
+
"step": 1705
|
11945 |
+
},
|
11946 |
+
{
|
11947 |
+
"epoch": 0.147804804089324,
|
11948 |
+
"grad_norm": 3.428966999053955,
|
11949 |
+
"learning_rate": 0.00017054693594521973,
|
11950 |
+
"loss": 1.3943,
|
11951 |
+
"step": 1706
|
11952 |
+
},
|
11953 |
+
{
|
11954 |
+
"epoch": 0.14789144230977497,
|
11955 |
+
"grad_norm": 1.6916941404342651,
|
11956 |
+
"learning_rate": 0.0001705296004160527,
|
11957 |
+
"loss": 0.7166,
|
11958 |
+
"step": 1707
|
11959 |
+
},
|
11960 |
+
{
|
11961 |
+
"epoch": 0.1479780805302259,
|
11962 |
+
"grad_norm": 3.7473268508911133,
|
11963 |
+
"learning_rate": 0.00017051226488688567,
|
11964 |
+
"loss": 1.0922,
|
11965 |
+
"step": 1708
|
11966 |
+
},
|
11967 |
+
{
|
11968 |
+
"epoch": 0.14806471875067687,
|
11969 |
+
"grad_norm": 1.8072813749313354,
|
11970 |
+
"learning_rate": 0.00017049492935771864,
|
11971 |
+
"loss": 0.726,
|
11972 |
+
"step": 1709
|
11973 |
+
},
|
11974 |
+
{
|
11975 |
+
"epoch": 0.1481513569711278,
|
11976 |
+
"grad_norm": 1.856656789779663,
|
11977 |
+
"learning_rate": 0.0001704775938285516,
|
11978 |
+
"loss": 0.6827,
|
11979 |
+
"step": 1710
|
11980 |
+
},
|
11981 |
+
{
|
11982 |
+
"epoch": 0.14823799519157876,
|
11983 |
+
"grad_norm": 1.9279342889785767,
|
11984 |
+
"learning_rate": 0.00017046025829938458,
|
11985 |
+
"loss": 0.6826,
|
11986 |
+
"step": 1711
|
11987 |
+
},
|
11988 |
+
{
|
11989 |
+
"epoch": 0.14832463341202973,
|
11990 |
+
"grad_norm": 1.6791661977767944,
|
11991 |
+
"learning_rate": 0.00017044292277021756,
|
11992 |
+
"loss": 0.6901,
|
11993 |
+
"step": 1712
|
11994 |
+
},
|
11995 |
+
{
|
11996 |
+
"epoch": 0.14841127163248066,
|
11997 |
+
"grad_norm": 2.066715717315674,
|
11998 |
+
"learning_rate": 0.00017042558724105053,
|
11999 |
+
"loss": 0.8535,
|
12000 |
+
"step": 1713
|
12001 |
+
},
|
12002 |
+
{
|
12003 |
+
"epoch": 0.14849790985293163,
|
12004 |
+
"grad_norm": 1.943643569946289,
|
12005 |
+
"learning_rate": 0.0001704082517118835,
|
12006 |
+
"loss": 0.6195,
|
12007 |
+
"step": 1714
|
12008 |
+
},
|
12009 |
+
{
|
12010 |
+
"epoch": 0.14858454807338256,
|
12011 |
+
"grad_norm": 2.405428409576416,
|
12012 |
+
"learning_rate": 0.00017039091618271647,
|
12013 |
+
"loss": 0.8231,
|
12014 |
+
"step": 1715
|
12015 |
+
},
|
12016 |
+
{
|
12017 |
+
"epoch": 0.14867118629383352,
|
12018 |
+
"grad_norm": 1.9496991634368896,
|
12019 |
+
"learning_rate": 0.00017037358065354944,
|
12020 |
+
"loss": 0.8091,
|
12021 |
+
"step": 1716
|
12022 |
+
},
|
12023 |
+
{
|
12024 |
+
"epoch": 0.1487578245142845,
|
12025 |
+
"grad_norm": 2.0480973720550537,
|
12026 |
+
"learning_rate": 0.00017035624512438244,
|
12027 |
+
"loss": 0.749,
|
12028 |
+
"step": 1717
|
12029 |
+
},
|
12030 |
+
{
|
12031 |
+
"epoch": 0.14884446273473542,
|
12032 |
+
"grad_norm": 1.9998536109924316,
|
12033 |
+
"learning_rate": 0.0001703389095952154,
|
12034 |
+
"loss": 0.689,
|
12035 |
+
"step": 1718
|
12036 |
+
},
|
12037 |
+
{
|
12038 |
+
"epoch": 0.14893110095518639,
|
12039 |
+
"grad_norm": 2.0259885787963867,
|
12040 |
+
"learning_rate": 0.00017032157406604838,
|
12041 |
+
"loss": 0.731,
|
12042 |
+
"step": 1719
|
12043 |
+
},
|
12044 |
+
{
|
12045 |
+
"epoch": 0.14901773917563732,
|
12046 |
+
"grad_norm": 4.064607620239258,
|
12047 |
+
"learning_rate": 0.00017030423853688136,
|
12048 |
+
"loss": 1.0877,
|
12049 |
+
"step": 1720
|
12050 |
+
},
|
12051 |
+
{
|
12052 |
+
"epoch": 0.14910437739608828,
|
12053 |
+
"grad_norm": 2.8392817974090576,
|
12054 |
+
"learning_rate": 0.00017028690300771433,
|
12055 |
+
"loss": 0.7399,
|
12056 |
+
"step": 1721
|
12057 |
+
},
|
12058 |
+
{
|
12059 |
+
"epoch": 0.14919101561653925,
|
12060 |
+
"grad_norm": 2.179415464401245,
|
12061 |
+
"learning_rate": 0.0001702695674785473,
|
12062 |
+
"loss": 0.6812,
|
12063 |
+
"step": 1722
|
12064 |
+
},
|
12065 |
+
{
|
12066 |
+
"epoch": 0.14927765383699018,
|
12067 |
+
"grad_norm": 2.63527512550354,
|
12068 |
+
"learning_rate": 0.00017025223194938027,
|
12069 |
+
"loss": 0.6938,
|
12070 |
+
"step": 1723
|
12071 |
+
},
|
12072 |
+
{
|
12073 |
+
"epoch": 0.14936429205744114,
|
12074 |
+
"grad_norm": 3.320754289627075,
|
12075 |
+
"learning_rate": 0.00017023489642021324,
|
12076 |
+
"loss": 1.3676,
|
12077 |
+
"step": 1724
|
12078 |
+
},
|
12079 |
+
{
|
12080 |
+
"epoch": 0.14945093027789208,
|
12081 |
+
"grad_norm": 3.677220106124878,
|
12082 |
+
"learning_rate": 0.00017021756089104621,
|
12083 |
+
"loss": 0.8076,
|
12084 |
+
"step": 1725
|
12085 |
+
},
|
12086 |
+
{
|
12087 |
+
"epoch": 0.14953756849834304,
|
12088 |
+
"grad_norm": 1.9516993761062622,
|
12089 |
+
"learning_rate": 0.00017020022536187919,
|
12090 |
+
"loss": 0.6601,
|
12091 |
+
"step": 1726
|
12092 |
+
},
|
12093 |
+
{
|
12094 |
+
"epoch": 0.149624206718794,
|
12095 |
+
"grad_norm": 1.625036358833313,
|
12096 |
+
"learning_rate": 0.00017018288983271216,
|
12097 |
+
"loss": 0.4996,
|
12098 |
+
"step": 1727
|
12099 |
+
},
|
12100 |
+
{
|
12101 |
+
"epoch": 0.14971084493924494,
|
12102 |
+
"grad_norm": 1.8334696292877197,
|
12103 |
+
"learning_rate": 0.00017016555430354513,
|
12104 |
+
"loss": 0.646,
|
12105 |
+
"step": 1728
|
12106 |
+
},
|
12107 |
+
{
|
12108 |
+
"epoch": 0.1497974831596959,
|
12109 |
+
"grad_norm": 2.091923952102661,
|
12110 |
+
"learning_rate": 0.0001701482187743781,
|
12111 |
+
"loss": 0.8509,
|
12112 |
+
"step": 1729
|
12113 |
+
},
|
12114 |
+
{
|
12115 |
+
"epoch": 0.14988412138014684,
|
12116 |
+
"grad_norm": 3.424103260040283,
|
12117 |
+
"learning_rate": 0.00017013088324521107,
|
12118 |
+
"loss": 1.3961,
|
12119 |
+
"step": 1730
|
12120 |
+
},
|
12121 |
+
{
|
12122 |
+
"epoch": 0.1499707596005978,
|
12123 |
+
"grad_norm": 3.841977596282959,
|
12124 |
+
"learning_rate": 0.00017011354771604404,
|
12125 |
+
"loss": 1.2245,
|
12126 |
+
"step": 1731
|
12127 |
+
},
|
12128 |
+
{
|
12129 |
+
"epoch": 0.15005739782104877,
|
12130 |
+
"grad_norm": 2.0216140747070312,
|
12131 |
+
"learning_rate": 0.00017009621218687702,
|
12132 |
+
"loss": 0.721,
|
12133 |
+
"step": 1732
|
12134 |
+
},
|
12135 |
+
{
|
12136 |
+
"epoch": 0.1501440360414997,
|
12137 |
+
"grad_norm": 1.8720191717147827,
|
12138 |
+
"learning_rate": 0.00017007887665771,
|
12139 |
+
"loss": 0.7794,
|
12140 |
+
"step": 1733
|
12141 |
+
},
|
12142 |
+
{
|
12143 |
+
"epoch": 0.15023067426195066,
|
12144 |
+
"grad_norm": 2.76397442817688,
|
12145 |
+
"learning_rate": 0.00017006154112854296,
|
12146 |
+
"loss": 1.4476,
|
12147 |
+
"step": 1734
|
12148 |
+
},
|
12149 |
+
{
|
12150 |
+
"epoch": 0.1503173124824016,
|
12151 |
+
"grad_norm": 1.554577112197876,
|
12152 |
+
"learning_rate": 0.00017004420559937593,
|
12153 |
+
"loss": 0.6097,
|
12154 |
+
"step": 1735
|
12155 |
+
},
|
12156 |
+
{
|
12157 |
+
"epoch": 0.15040395070285256,
|
12158 |
+
"grad_norm": 1.6091926097869873,
|
12159 |
+
"learning_rate": 0.0001700268700702089,
|
12160 |
+
"loss": 0.6423,
|
12161 |
+
"step": 1736
|
12162 |
+
},
|
12163 |
+
{
|
12164 |
+
"epoch": 0.15049058892330353,
|
12165 |
+
"grad_norm": 3.766949415206909,
|
12166 |
+
"learning_rate": 0.00017000953454104187,
|
12167 |
+
"loss": 1.1055,
|
12168 |
+
"step": 1737
|
12169 |
+
},
|
12170 |
+
{
|
12171 |
+
"epoch": 0.15057722714375446,
|
12172 |
+
"grad_norm": 2.207890510559082,
|
12173 |
+
"learning_rate": 0.00016999219901187485,
|
12174 |
+
"loss": 0.7142,
|
12175 |
+
"step": 1738
|
12176 |
+
},
|
12177 |
+
{
|
12178 |
+
"epoch": 0.15066386536420542,
|
12179 |
+
"grad_norm": 1.5540878772735596,
|
12180 |
+
"learning_rate": 0.00016997486348270782,
|
12181 |
+
"loss": 0.6366,
|
12182 |
+
"step": 1739
|
12183 |
+
},
|
12184 |
+
{
|
12185 |
+
"epoch": 0.15075050358465636,
|
12186 |
+
"grad_norm": 2.3206419944763184,
|
12187 |
+
"learning_rate": 0.0001699575279535408,
|
12188 |
+
"loss": 0.6924,
|
12189 |
+
"step": 1740
|
12190 |
+
},
|
12191 |
+
{
|
12192 |
+
"epoch": 0.15083714180510732,
|
12193 |
+
"grad_norm": 3.838148593902588,
|
12194 |
+
"learning_rate": 0.00016994019242437376,
|
12195 |
+
"loss": 0.7725,
|
12196 |
+
"step": 1741
|
12197 |
+
},
|
12198 |
+
{
|
12199 |
+
"epoch": 0.15092378002555829,
|
12200 |
+
"grad_norm": 3.434176445007324,
|
12201 |
+
"learning_rate": 0.00016992285689520673,
|
12202 |
+
"loss": 1.3823,
|
12203 |
+
"step": 1742
|
12204 |
+
},
|
12205 |
+
{
|
12206 |
+
"epoch": 0.15101041824600922,
|
12207 |
+
"grad_norm": 4.0474042892456055,
|
12208 |
+
"learning_rate": 0.0001699055213660397,
|
12209 |
+
"loss": 1.0463,
|
12210 |
+
"step": 1743
|
12211 |
+
},
|
12212 |
+
{
|
12213 |
+
"epoch": 0.15109705646646018,
|
12214 |
+
"grad_norm": 1.9551926851272583,
|
12215 |
+
"learning_rate": 0.00016988818583687268,
|
12216 |
+
"loss": 0.6454,
|
12217 |
+
"step": 1744
|
12218 |
+
},
|
12219 |
+
{
|
12220 |
+
"epoch": 0.15118369468691112,
|
12221 |
+
"grad_norm": 2.045931100845337,
|
12222 |
+
"learning_rate": 0.00016987085030770565,
|
12223 |
+
"loss": 0.6641,
|
12224 |
+
"step": 1745
|
12225 |
+
},
|
12226 |
+
{
|
12227 |
+
"epoch": 0.15127033290736208,
|
12228 |
+
"grad_norm": 2.059396266937256,
|
12229 |
+
"learning_rate": 0.00016985351477853862,
|
12230 |
+
"loss": 0.7461,
|
12231 |
+
"step": 1746
|
12232 |
+
},
|
12233 |
+
{
|
12234 |
+
"epoch": 0.15135697112781304,
|
12235 |
+
"grad_norm": 3.623816728591919,
|
12236 |
+
"learning_rate": 0.0001698361792493716,
|
12237 |
+
"loss": 1.1414,
|
12238 |
+
"step": 1747
|
12239 |
+
},
|
12240 |
+
{
|
12241 |
+
"epoch": 0.15144360934826398,
|
12242 |
+
"grad_norm": 3.8433258533477783,
|
12243 |
+
"learning_rate": 0.00016981884372020456,
|
12244 |
+
"loss": 1.2567,
|
12245 |
+
"step": 1748
|
12246 |
+
},
|
12247 |
+
{
|
12248 |
+
"epoch": 0.15153024756871494,
|
12249 |
+
"grad_norm": 1.7676864862442017,
|
12250 |
+
"learning_rate": 0.00016980150819103756,
|
12251 |
+
"loss": 0.7186,
|
12252 |
+
"step": 1749
|
12253 |
+
},
|
12254 |
+
{
|
12255 |
+
"epoch": 0.15161688578916588,
|
12256 |
+
"grad_norm": 3.0051074028015137,
|
12257 |
+
"learning_rate": 0.00016978417266187053,
|
12258 |
+
"loss": 1.2334,
|
12259 |
+
"step": 1750
|
12260 |
+
},
|
12261 |
+
{
|
12262 |
+
"epoch": 0.15170352400961684,
|
12263 |
+
"grad_norm": 1.7863881587982178,
|
12264 |
+
"learning_rate": 0.0001697668371327035,
|
12265 |
+
"loss": 0.6959,
|
12266 |
+
"step": 1751
|
12267 |
+
},
|
12268 |
+
{
|
12269 |
+
"epoch": 0.1517901622300678,
|
12270 |
+
"grad_norm": 2.93080997467041,
|
12271 |
+
"learning_rate": 0.00016974950160353648,
|
12272 |
+
"loss": 1.3504,
|
12273 |
+
"step": 1752
|
12274 |
+
},
|
12275 |
+
{
|
12276 |
+
"epoch": 0.15187680045051874,
|
12277 |
+
"grad_norm": 2.809011220932007,
|
12278 |
+
"learning_rate": 0.00016973216607436945,
|
12279 |
+
"loss": 1.3585,
|
12280 |
+
"step": 1753
|
12281 |
+
},
|
12282 |
+
{
|
12283 |
+
"epoch": 0.1519634386709697,
|
12284 |
+
"grad_norm": 1.2620030641555786,
|
12285 |
+
"learning_rate": 0.00016971483054520242,
|
12286 |
+
"loss": 0.6976,
|
12287 |
+
"step": 1754
|
12288 |
+
},
|
12289 |
+
{
|
12290 |
+
"epoch": 0.15205007689142064,
|
12291 |
+
"grad_norm": 2.436025381088257,
|
12292 |
+
"learning_rate": 0.0001696974950160354,
|
12293 |
+
"loss": 0.7185,
|
12294 |
+
"step": 1755
|
12295 |
+
},
|
12296 |
+
{
|
12297 |
+
"epoch": 0.1521367151118716,
|
12298 |
+
"grad_norm": 2.8672831058502197,
|
12299 |
+
"learning_rate": 0.00016968015948686836,
|
12300 |
+
"loss": 1.2531,
|
12301 |
+
"step": 1756
|
12302 |
+
},
|
12303 |
+
{
|
12304 |
+
"epoch": 0.15222335333232256,
|
12305 |
+
"grad_norm": 2.9081544876098633,
|
12306 |
+
"learning_rate": 0.00016966282395770133,
|
12307 |
+
"loss": 1.1994,
|
12308 |
+
"step": 1757
|
12309 |
+
},
|
12310 |
+
{
|
12311 |
+
"epoch": 0.1523099915527735,
|
12312 |
+
"grad_norm": 3.3374688625335693,
|
12313 |
+
"learning_rate": 0.0001696454884285343,
|
12314 |
+
"loss": 1.3973,
|
12315 |
+
"step": 1758
|
12316 |
+
},
|
12317 |
+
{
|
12318 |
+
"epoch": 0.15239662977322446,
|
12319 |
+
"grad_norm": 1.6591036319732666,
|
12320 |
+
"learning_rate": 0.00016962815289936728,
|
12321 |
+
"loss": 0.7326,
|
12322 |
+
"step": 1759
|
12323 |
+
},
|
12324 |
+
{
|
12325 |
+
"epoch": 0.1524832679936754,
|
12326 |
+
"grad_norm": 2.110149383544922,
|
12327 |
+
"learning_rate": 0.00016961081737020025,
|
12328 |
+
"loss": 0.717,
|
12329 |
+
"step": 1760
|
12330 |
+
},
|
12331 |
+
{
|
12332 |
+
"epoch": 0.15256990621412636,
|
12333 |
+
"grad_norm": 2.1149117946624756,
|
12334 |
+
"learning_rate": 0.00016959348184103322,
|
12335 |
+
"loss": 0.7261,
|
12336 |
+
"step": 1761
|
12337 |
+
},
|
12338 |
+
{
|
12339 |
+
"epoch": 0.15265654443457732,
|
12340 |
+
"grad_norm": 1.6482665538787842,
|
12341 |
+
"learning_rate": 0.0001695761463118662,
|
12342 |
+
"loss": 0.6924,
|
12343 |
+
"step": 1762
|
12344 |
+
},
|
12345 |
+
{
|
12346 |
+
"epoch": 0.15274318265502826,
|
12347 |
+
"grad_norm": 1.8392702341079712,
|
12348 |
+
"learning_rate": 0.00016955881078269916,
|
12349 |
+
"loss": 0.5869,
|
12350 |
+
"step": 1763
|
12351 |
+
},
|
12352 |
+
{
|
12353 |
+
"epoch": 0.15282982087547922,
|
12354 |
+
"grad_norm": 2.8814315795898438,
|
12355 |
+
"learning_rate": 0.00016954147525353213,
|
12356 |
+
"loss": 0.7349,
|
12357 |
+
"step": 1764
|
12358 |
+
},
|
12359 |
+
{
|
12360 |
+
"epoch": 0.15291645909593016,
|
12361 |
+
"grad_norm": 2.3000972270965576,
|
12362 |
+
"learning_rate": 0.0001695241397243651,
|
12363 |
+
"loss": 0.76,
|
12364 |
+
"step": 1765
|
12365 |
+
},
|
12366 |
+
{
|
12367 |
+
"epoch": 0.15300309731638112,
|
12368 |
+
"grad_norm": 3.1703596115112305,
|
12369 |
+
"learning_rate": 0.00016950680419519808,
|
12370 |
+
"loss": 1.4504,
|
12371 |
+
"step": 1766
|
12372 |
+
},
|
12373 |
+
{
|
12374 |
+
"epoch": 0.15308973553683208,
|
12375 |
+
"grad_norm": 1.816587209701538,
|
12376 |
+
"learning_rate": 0.00016948946866603105,
|
12377 |
+
"loss": 0.6954,
|
12378 |
+
"step": 1767
|
12379 |
+
},
|
12380 |
+
{
|
12381 |
+
"epoch": 0.15317637375728302,
|
12382 |
+
"grad_norm": 2.2006471157073975,
|
12383 |
+
"learning_rate": 0.00016947213313686402,
|
12384 |
+
"loss": 0.6981,
|
12385 |
+
"step": 1768
|
12386 |
+
},
|
12387 |
+
{
|
12388 |
+
"epoch": 0.15326301197773398,
|
12389 |
+
"grad_norm": 2.1972858905792236,
|
12390 |
+
"learning_rate": 0.000169454797607697,
|
12391 |
+
"loss": 0.6711,
|
12392 |
+
"step": 1769
|
12393 |
+
},
|
12394 |
+
{
|
12395 |
+
"epoch": 0.15334965019818492,
|
12396 |
+
"grad_norm": 3.571471929550171,
|
12397 |
+
"learning_rate": 0.00016943746207852996,
|
12398 |
+
"loss": 1.3881,
|
12399 |
+
"step": 1770
|
12400 |
+
},
|
12401 |
+
{
|
12402 |
+
"epoch": 0.15343628841863588,
|
12403 |
+
"grad_norm": 4.164616107940674,
|
12404 |
+
"learning_rate": 0.00016942012654936294,
|
12405 |
+
"loss": 1.2625,
|
12406 |
+
"step": 1771
|
12407 |
+
},
|
12408 |
+
{
|
12409 |
+
"epoch": 0.15352292663908684,
|
12410 |
+
"grad_norm": 4.141618251800537,
|
12411 |
+
"learning_rate": 0.0001694027910201959,
|
12412 |
+
"loss": 0.626,
|
12413 |
+
"step": 1772
|
12414 |
+
},
|
12415 |
+
{
|
12416 |
+
"epoch": 0.15360956485953778,
|
12417 |
+
"grad_norm": 2.4178426265716553,
|
12418 |
+
"learning_rate": 0.00016938545549102888,
|
12419 |
+
"loss": 0.8067,
|
12420 |
+
"step": 1773
|
12421 |
+
},
|
12422 |
+
{
|
12423 |
+
"epoch": 0.15369620307998874,
|
12424 |
+
"grad_norm": 1.7022314071655273,
|
12425 |
+
"learning_rate": 0.00016936811996186185,
|
12426 |
+
"loss": 0.6043,
|
12427 |
+
"step": 1774
|
12428 |
+
},
|
12429 |
+
{
|
12430 |
+
"epoch": 0.15378284130043968,
|
12431 |
+
"grad_norm": 3.7896087169647217,
|
12432 |
+
"learning_rate": 0.00016935078443269482,
|
12433 |
+
"loss": 0.9447,
|
12434 |
+
"step": 1775
|
12435 |
+
},
|
12436 |
+
{
|
12437 |
+
"epoch": 0.15386947952089064,
|
12438 |
+
"grad_norm": 5.354947090148926,
|
12439 |
+
"learning_rate": 0.0001693334489035278,
|
12440 |
+
"loss": 1.4251,
|
12441 |
+
"step": 1776
|
12442 |
+
},
|
12443 |
+
{
|
12444 |
+
"epoch": 0.1539561177413416,
|
12445 |
+
"grad_norm": 3.51838755607605,
|
12446 |
+
"learning_rate": 0.00016931611337436077,
|
12447 |
+
"loss": 2.1263,
|
12448 |
+
"step": 1777
|
12449 |
+
},
|
12450 |
+
{
|
12451 |
+
"epoch": 0.15404275596179254,
|
12452 |
+
"grad_norm": 2.3795011043548584,
|
12453 |
+
"learning_rate": 0.00016929877784519374,
|
12454 |
+
"loss": 0.8083,
|
12455 |
+
"step": 1778
|
12456 |
+
},
|
12457 |
+
{
|
12458 |
+
"epoch": 0.1541293941822435,
|
12459 |
+
"grad_norm": 3.283785581588745,
|
12460 |
+
"learning_rate": 0.00016928144231602668,
|
12461 |
+
"loss": 1.2155,
|
12462 |
+
"step": 1779
|
12463 |
+
},
|
12464 |
+
{
|
12465 |
+
"epoch": 0.15421603240269444,
|
12466 |
+
"grad_norm": 2.0556414127349854,
|
12467 |
+
"learning_rate": 0.00016926410678685965,
|
12468 |
+
"loss": 0.6918,
|
12469 |
+
"step": 1780
|
12470 |
+
},
|
12471 |
+
{
|
12472 |
+
"epoch": 0.1543026706231454,
|
12473 |
+
"grad_norm": 2.129504680633545,
|
12474 |
+
"learning_rate": 0.00016924677125769265,
|
12475 |
+
"loss": 0.608,
|
12476 |
+
"step": 1781
|
12477 |
+
},
|
12478 |
+
{
|
12479 |
+
"epoch": 0.15438930884359636,
|
12480 |
+
"grad_norm": 3.599369764328003,
|
12481 |
+
"learning_rate": 0.00016922943572852562,
|
12482 |
+
"loss": 1.5986,
|
12483 |
+
"step": 1782
|
12484 |
+
},
|
12485 |
+
{
|
12486 |
+
"epoch": 0.1544759470640473,
|
12487 |
+
"grad_norm": 4.643326759338379,
|
12488 |
+
"learning_rate": 0.0001692121001993586,
|
12489 |
+
"loss": 1.3064,
|
12490 |
+
"step": 1783
|
12491 |
+
},
|
12492 |
+
{
|
12493 |
+
"epoch": 0.15456258528449826,
|
12494 |
+
"grad_norm": 2.29093861579895,
|
12495 |
+
"learning_rate": 0.00016919476467019157,
|
12496 |
+
"loss": 0.7386,
|
12497 |
+
"step": 1784
|
12498 |
+
},
|
12499 |
+
{
|
12500 |
+
"epoch": 0.1546492235049492,
|
12501 |
+
"grad_norm": 2.5402650833129883,
|
12502 |
+
"learning_rate": 0.00016917742914102454,
|
12503 |
+
"loss": 0.6427,
|
12504 |
+
"step": 1785
|
12505 |
+
},
|
12506 |
+
{
|
12507 |
+
"epoch": 0.15473586172540016,
|
12508 |
+
"grad_norm": 2.1897969245910645,
|
12509 |
+
"learning_rate": 0.0001691600936118575,
|
12510 |
+
"loss": 0.6332,
|
12511 |
+
"step": 1786
|
12512 |
+
},
|
12513 |
+
{
|
12514 |
+
"epoch": 0.15482249994585112,
|
12515 |
+
"grad_norm": 1.439754605293274,
|
12516 |
+
"learning_rate": 0.00016914275808269048,
|
12517 |
+
"loss": 0.555,
|
12518 |
+
"step": 1787
|
12519 |
+
},
|
12520 |
+
{
|
12521 |
+
"epoch": 0.15490913816630206,
|
12522 |
+
"grad_norm": 2.317779064178467,
|
12523 |
+
"learning_rate": 0.00016912542255352345,
|
12524 |
+
"loss": 0.8091,
|
12525 |
+
"step": 1788
|
12526 |
+
},
|
12527 |
+
{
|
12528 |
+
"epoch": 0.15499577638675302,
|
12529 |
+
"grad_norm": 4.258970737457275,
|
12530 |
+
"learning_rate": 0.00016910808702435642,
|
12531 |
+
"loss": 0.7707,
|
12532 |
+
"step": 1789
|
12533 |
+
},
|
12534 |
+
{
|
12535 |
+
"epoch": 0.15508241460720396,
|
12536 |
+
"grad_norm": 2.3234448432922363,
|
12537 |
+
"learning_rate": 0.0001690907514951894,
|
12538 |
+
"loss": 0.7787,
|
12539 |
+
"step": 1790
|
12540 |
+
},
|
12541 |
+
{
|
12542 |
+
"epoch": 0.15516905282765492,
|
12543 |
+
"grad_norm": 2.196978807449341,
|
12544 |
+
"learning_rate": 0.00016907341596602237,
|
12545 |
+
"loss": 0.766,
|
12546 |
+
"step": 1791
|
12547 |
+
},
|
12548 |
+
{
|
12549 |
+
"epoch": 0.15525569104810588,
|
12550 |
+
"grad_norm": 1.9291595220565796,
|
12551 |
+
"learning_rate": 0.00016905608043685534,
|
12552 |
+
"loss": 0.7703,
|
12553 |
+
"step": 1792
|
12554 |
+
},
|
12555 |
+
{
|
12556 |
+
"epoch": 0.15534232926855682,
|
12557 |
+
"grad_norm": 1.68526029586792,
|
12558 |
+
"learning_rate": 0.0001690387449076883,
|
12559 |
+
"loss": 0.5876,
|
12560 |
+
"step": 1793
|
12561 |
+
},
|
12562 |
+
{
|
12563 |
+
"epoch": 0.15542896748900778,
|
12564 |
+
"grad_norm": 2.023825168609619,
|
12565 |
+
"learning_rate": 0.00016902140937852128,
|
12566 |
+
"loss": 0.6832,
|
12567 |
+
"step": 1794
|
12568 |
+
},
|
12569 |
+
{
|
12570 |
+
"epoch": 0.15551560570945872,
|
12571 |
+
"grad_norm": 4.695090293884277,
|
12572 |
+
"learning_rate": 0.00016900407384935425,
|
12573 |
+
"loss": 1.4986,
|
12574 |
+
"step": 1795
|
12575 |
+
},
|
12576 |
+
{
|
12577 |
+
"epoch": 0.15560224392990968,
|
12578 |
+
"grad_norm": 2.251880407333374,
|
12579 |
+
"learning_rate": 0.00016898673832018723,
|
12580 |
+
"loss": 0.6553,
|
12581 |
+
"step": 1796
|
12582 |
+
},
|
12583 |
+
{
|
12584 |
+
"epoch": 0.15568888215036064,
|
12585 |
+
"grad_norm": 1.7865068912506104,
|
12586 |
+
"learning_rate": 0.0001689694027910202,
|
12587 |
+
"loss": 0.6466,
|
12588 |
+
"step": 1797
|
12589 |
+
},
|
12590 |
+
{
|
12591 |
+
"epoch": 0.15577552037081158,
|
12592 |
+
"grad_norm": 2.233473300933838,
|
12593 |
+
"learning_rate": 0.00016895206726185317,
|
12594 |
+
"loss": 0.7024,
|
12595 |
+
"step": 1798
|
12596 |
+
},
|
12597 |
+
{
|
12598 |
+
"epoch": 0.15586215859126254,
|
12599 |
+
"grad_norm": 2.202174186706543,
|
12600 |
+
"learning_rate": 0.00016893473173268614,
|
12601 |
+
"loss": 0.6965,
|
12602 |
+
"step": 1799
|
12603 |
+
},
|
12604 |
+
{
|
12605 |
+
"epoch": 0.15594879681171347,
|
12606 |
+
"grad_norm": 2.558868169784546,
|
12607 |
+
"learning_rate": 0.0001689173962035191,
|
12608 |
+
"loss": 0.7657,
|
12609 |
+
"step": 1800
|
12610 |
+
},
|
12611 |
+
{
|
12612 |
+
"epoch": 0.15603543503216444,
|
12613 |
+
"grad_norm": 2.2319934368133545,
|
12614 |
+
"learning_rate": 0.00016890006067435208,
|
12615 |
+
"loss": 0.8032,
|
12616 |
+
"step": 1801
|
12617 |
+
},
|
12618 |
+
{
|
12619 |
+
"epoch": 0.1561220732526154,
|
12620 |
+
"grad_norm": 2.634031057357788,
|
12621 |
+
"learning_rate": 0.00016888272514518506,
|
12622 |
+
"loss": 0.9199,
|
12623 |
+
"step": 1802
|
12624 |
+
},
|
12625 |
+
{
|
12626 |
+
"epoch": 0.15620871147306634,
|
12627 |
+
"grad_norm": 3.378466844558716,
|
12628 |
+
"learning_rate": 0.00016886538961601803,
|
12629 |
+
"loss": 1.1971,
|
12630 |
+
"step": 1803
|
12631 |
+
},
|
12632 |
+
{
|
12633 |
+
"epoch": 0.1562953496935173,
|
12634 |
+
"grad_norm": 1.9781187772750854,
|
12635 |
+
"learning_rate": 0.000168848054086851,
|
12636 |
+
"loss": 0.7252,
|
12637 |
+
"step": 1804
|
12638 |
+
},
|
12639 |
+
{
|
12640 |
+
"epoch": 0.15638198791396823,
|
12641 |
+
"grad_norm": 1.6695417165756226,
|
12642 |
+
"learning_rate": 0.00016883071855768397,
|
12643 |
+
"loss": 0.6223,
|
12644 |
+
"step": 1805
|
12645 |
+
},
|
12646 |
+
{
|
12647 |
+
"epoch": 0.1564686261344192,
|
12648 |
+
"grad_norm": 1.8212687969207764,
|
12649 |
+
"learning_rate": 0.00016881338302851694,
|
12650 |
+
"loss": 0.6357,
|
12651 |
+
"step": 1806
|
12652 |
+
},
|
12653 |
+
{
|
12654 |
+
"epoch": 0.15655526435487016,
|
12655 |
+
"grad_norm": 2.042590856552124,
|
12656 |
+
"learning_rate": 0.00016879604749934991,
|
12657 |
+
"loss": 0.707,
|
12658 |
+
"step": 1807
|
12659 |
+
},
|
12660 |
+
{
|
12661 |
+
"epoch": 0.1566419025753211,
|
12662 |
+
"grad_norm": 2.0267105102539062,
|
12663 |
+
"learning_rate": 0.00016877871197018289,
|
12664 |
+
"loss": 0.7026,
|
12665 |
+
"step": 1808
|
12666 |
+
},
|
12667 |
+
{
|
12668 |
+
"epoch": 0.15672854079577206,
|
12669 |
+
"grad_norm": 1.8397223949432373,
|
12670 |
+
"learning_rate": 0.00016876137644101586,
|
12671 |
+
"loss": 0.6449,
|
12672 |
+
"step": 1809
|
12673 |
+
},
|
12674 |
+
{
|
12675 |
+
"epoch": 0.156815179016223,
|
12676 |
+
"grad_norm": 2.7385518550872803,
|
12677 |
+
"learning_rate": 0.00016874404091184883,
|
12678 |
+
"loss": 0.9185,
|
12679 |
+
"step": 1810
|
12680 |
+
},
|
12681 |
+
{
|
12682 |
+
"epoch": 0.15690181723667396,
|
12683 |
+
"grad_norm": 3.0502769947052,
|
12684 |
+
"learning_rate": 0.0001687267053826818,
|
12685 |
+
"loss": 1.1663,
|
12686 |
+
"step": 1811
|
12687 |
+
},
|
12688 |
+
{
|
12689 |
+
"epoch": 0.15698845545712492,
|
12690 |
+
"grad_norm": 3.3484129905700684,
|
12691 |
+
"learning_rate": 0.00016870936985351477,
|
12692 |
+
"loss": 1.0417,
|
12693 |
+
"step": 1812
|
12694 |
+
},
|
12695 |
+
{
|
12696 |
+
"epoch": 0.15707509367757586,
|
12697 |
+
"grad_norm": 3.5042994022369385,
|
12698 |
+
"learning_rate": 0.00016869203432434777,
|
12699 |
+
"loss": 1.0092,
|
12700 |
+
"step": 1813
|
12701 |
+
},
|
12702 |
+
{
|
12703 |
+
"epoch": 0.15716173189802682,
|
12704 |
+
"grad_norm": 3.389727830886841,
|
12705 |
+
"learning_rate": 0.00016867469879518074,
|
12706 |
+
"loss": 1.1811,
|
12707 |
+
"step": 1814
|
12708 |
+
},
|
12709 |
+
{
|
12710 |
+
"epoch": 0.15724837011847775,
|
12711 |
+
"grad_norm": 1.8766400814056396,
|
12712 |
+
"learning_rate": 0.00016865736326601371,
|
12713 |
+
"loss": 0.6952,
|
12714 |
+
"step": 1815
|
12715 |
+
},
|
12716 |
+
{
|
12717 |
+
"epoch": 0.15733500833892872,
|
12718 |
+
"grad_norm": 3.8636491298675537,
|
12719 |
+
"learning_rate": 0.00016864002773684669,
|
12720 |
+
"loss": 1.2584,
|
12721 |
+
"step": 1816
|
12722 |
+
},
|
12723 |
+
{
|
12724 |
+
"epoch": 0.15742164655937968,
|
12725 |
+
"grad_norm": 2.288451910018921,
|
12726 |
+
"learning_rate": 0.00016862269220767966,
|
12727 |
+
"loss": 0.7653,
|
12728 |
+
"step": 1817
|
12729 |
+
},
|
12730 |
+
{
|
12731 |
+
"epoch": 0.15750828477983062,
|
12732 |
+
"grad_norm": 2.9387903213500977,
|
12733 |
+
"learning_rate": 0.00016860535667851263,
|
12734 |
+
"loss": 0.9876,
|
12735 |
+
"step": 1818
|
12736 |
+
},
|
12737 |
+
{
|
12738 |
+
"epoch": 0.15759492300028158,
|
12739 |
+
"grad_norm": 2.2507741451263428,
|
12740 |
+
"learning_rate": 0.0001685880211493456,
|
12741 |
+
"loss": 0.7271,
|
12742 |
+
"step": 1819
|
12743 |
+
},
|
12744 |
+
{
|
12745 |
+
"epoch": 0.1576815612207325,
|
12746 |
+
"grad_norm": 3.723278284072876,
|
12747 |
+
"learning_rate": 0.00016857068562017857,
|
12748 |
+
"loss": 1.4557,
|
12749 |
+
"step": 1820
|
12750 |
+
},
|
12751 |
+
{
|
12752 |
+
"epoch": 0.15776819944118348,
|
12753 |
+
"grad_norm": 2.301093816757202,
|
12754 |
+
"learning_rate": 0.00016855335009101154,
|
12755 |
+
"loss": 0.7645,
|
12756 |
+
"step": 1821
|
12757 |
+
},
|
12758 |
+
{
|
12759 |
+
"epoch": 0.15785483766163444,
|
12760 |
+
"grad_norm": 2.5900495052337646,
|
12761 |
+
"learning_rate": 0.00016853601456184451,
|
12762 |
+
"loss": 0.9459,
|
12763 |
+
"step": 1822
|
12764 |
+
},
|
12765 |
+
{
|
12766 |
+
"epoch": 0.15794147588208537,
|
12767 |
+
"grad_norm": 2.217280149459839,
|
12768 |
+
"learning_rate": 0.0001685186790326775,
|
12769 |
+
"loss": 0.7377,
|
12770 |
+
"step": 1823
|
12771 |
+
},
|
12772 |
+
{
|
12773 |
+
"epoch": 0.15802811410253634,
|
12774 |
+
"grad_norm": 4.1909918785095215,
|
12775 |
+
"learning_rate": 0.00016850134350351046,
|
12776 |
+
"loss": 1.7084,
|
12777 |
+
"step": 1824
|
12778 |
+
},
|
12779 |
+
{
|
12780 |
+
"epoch": 0.15811475232298727,
|
12781 |
+
"grad_norm": 3.2604100704193115,
|
12782 |
+
"learning_rate": 0.00016848400797434343,
|
12783 |
+
"loss": 1.3074,
|
12784 |
+
"step": 1825
|
12785 |
+
},
|
12786 |
+
{
|
12787 |
+
"epoch": 0.15820139054343824,
|
12788 |
+
"grad_norm": 1.4537760019302368,
|
12789 |
+
"learning_rate": 0.0001684666724451764,
|
12790 |
+
"loss": 0.5581,
|
12791 |
+
"step": 1826
|
12792 |
+
},
|
12793 |
+
{
|
12794 |
+
"epoch": 0.1582880287638892,
|
12795 |
+
"grad_norm": 2.1009860038757324,
|
12796 |
+
"learning_rate": 0.00016844933691600937,
|
12797 |
+
"loss": 0.7139,
|
12798 |
+
"step": 1827
|
12799 |
+
},
|
12800 |
+
{
|
12801 |
+
"epoch": 0.15837466698434013,
|
12802 |
+
"grad_norm": 2.6467387676239014,
|
12803 |
+
"learning_rate": 0.00016843200138684234,
|
12804 |
+
"loss": 0.7999,
|
12805 |
+
"step": 1828
|
12806 |
+
},
|
12807 |
+
{
|
12808 |
+
"epoch": 0.1584613052047911,
|
12809 |
+
"grad_norm": 2.213688611984253,
|
12810 |
+
"learning_rate": 0.00016841466585767532,
|
12811 |
+
"loss": 0.7933,
|
12812 |
+
"step": 1829
|
12813 |
+
},
|
12814 |
+
{
|
12815 |
+
"epoch": 0.15854794342524203,
|
12816 |
+
"grad_norm": 3.8144092559814453,
|
12817 |
+
"learning_rate": 0.0001683973303285083,
|
12818 |
+
"loss": 1.2431,
|
12819 |
+
"step": 1830
|
12820 |
+
},
|
12821 |
+
{
|
12822 |
+
"epoch": 0.158634581645693,
|
12823 |
+
"grad_norm": 2.3899083137512207,
|
12824 |
+
"learning_rate": 0.00016837999479934126,
|
12825 |
+
"loss": 0.7783,
|
12826 |
+
"step": 1831
|
12827 |
+
},
|
12828 |
+
{
|
12829 |
+
"epoch": 0.15872121986614396,
|
12830 |
+
"grad_norm": 2.191710948944092,
|
12831 |
+
"learning_rate": 0.00016836265927017423,
|
12832 |
+
"loss": 0.7444,
|
12833 |
+
"step": 1832
|
12834 |
+
},
|
12835 |
+
{
|
12836 |
+
"epoch": 0.1588078580865949,
|
12837 |
+
"grad_norm": 2.8994059562683105,
|
12838 |
+
"learning_rate": 0.0001683453237410072,
|
12839 |
+
"loss": 1.3632,
|
12840 |
+
"step": 1833
|
12841 |
+
},
|
12842 |
+
{
|
12843 |
+
"epoch": 0.15889449630704586,
|
12844 |
+
"grad_norm": 2.2981436252593994,
|
12845 |
+
"learning_rate": 0.00016832798821184017,
|
12846 |
+
"loss": 0.7151,
|
12847 |
+
"step": 1834
|
12848 |
+
},
|
12849 |
+
{
|
12850 |
+
"epoch": 0.1589811345274968,
|
12851 |
+
"grad_norm": 3.271723508834839,
|
12852 |
+
"learning_rate": 0.00016831065268267315,
|
12853 |
+
"loss": 1.3908,
|
12854 |
+
"step": 1835
|
12855 |
+
},
|
12856 |
+
{
|
12857 |
+
"epoch": 0.15906777274794776,
|
12858 |
+
"grad_norm": 2.743417978286743,
|
12859 |
+
"learning_rate": 0.00016829331715350612,
|
12860 |
+
"loss": 0.802,
|
12861 |
+
"step": 1836
|
12862 |
+
},
|
12863 |
+
{
|
12864 |
+
"epoch": 0.15915441096839872,
|
12865 |
+
"grad_norm": 1.8981465101242065,
|
12866 |
+
"learning_rate": 0.0001682759816243391,
|
12867 |
+
"loss": 0.5853,
|
12868 |
+
"step": 1837
|
12869 |
+
},
|
12870 |
+
{
|
12871 |
+
"epoch": 0.15924104918884965,
|
12872 |
+
"grad_norm": 3.2041971683502197,
|
12873 |
+
"learning_rate": 0.00016825864609517206,
|
12874 |
+
"loss": 0.9764,
|
12875 |
+
"step": 1838
|
12876 |
+
},
|
12877 |
+
{
|
12878 |
+
"epoch": 0.15932768740930062,
|
12879 |
+
"grad_norm": 2.1467788219451904,
|
12880 |
+
"learning_rate": 0.00016824131056600503,
|
12881 |
+
"loss": 0.7174,
|
12882 |
+
"step": 1839
|
12883 |
+
},
|
12884 |
+
{
|
12885 |
+
"epoch": 0.15941432562975155,
|
12886 |
+
"grad_norm": 2.008119583129883,
|
12887 |
+
"learning_rate": 0.000168223975036838,
|
12888 |
+
"loss": 0.7753,
|
12889 |
+
"step": 1840
|
12890 |
+
},
|
12891 |
+
{
|
12892 |
+
"epoch": 0.15950096385020252,
|
12893 |
+
"grad_norm": 3.5105156898498535,
|
12894 |
+
"learning_rate": 0.00016820663950767098,
|
12895 |
+
"loss": 1.2352,
|
12896 |
+
"step": 1841
|
12897 |
+
},
|
12898 |
+
{
|
12899 |
+
"epoch": 0.15958760207065348,
|
12900 |
+
"grad_norm": 2.0866858959198,
|
12901 |
+
"learning_rate": 0.00016818930397850395,
|
12902 |
+
"loss": 0.7283,
|
12903 |
+
"step": 1842
|
12904 |
+
},
|
12905 |
+
{
|
12906 |
+
"epoch": 0.1596742402911044,
|
12907 |
+
"grad_norm": 3.6560466289520264,
|
12908 |
+
"learning_rate": 0.00016817196844933692,
|
12909 |
+
"loss": 1.357,
|
12910 |
+
"step": 1843
|
12911 |
+
},
|
12912 |
+
{
|
12913 |
+
"epoch": 0.15976087851155538,
|
12914 |
+
"grad_norm": 2.019174098968506,
|
12915 |
+
"learning_rate": 0.0001681546329201699,
|
12916 |
+
"loss": 0.7174,
|
12917 |
+
"step": 1844
|
12918 |
+
},
|
12919 |
+
{
|
12920 |
+
"epoch": 0.1598475167320063,
|
12921 |
+
"grad_norm": 2.8867852687835693,
|
12922 |
+
"learning_rate": 0.0001681372973910029,
|
12923 |
+
"loss": 1.3516,
|
12924 |
+
"step": 1845
|
12925 |
+
},
|
12926 |
+
{
|
12927 |
+
"epoch": 0.15993415495245727,
|
12928 |
+
"grad_norm": 1.8973006010055542,
|
12929 |
+
"learning_rate": 0.00016811996186183586,
|
12930 |
+
"loss": 0.7153,
|
12931 |
+
"step": 1846
|
12932 |
+
},
|
12933 |
+
{
|
12934 |
+
"epoch": 0.16002079317290824,
|
12935 |
+
"grad_norm": 2.8100392818450928,
|
12936 |
+
"learning_rate": 0.00016810262633266883,
|
12937 |
+
"loss": 1.5585,
|
12938 |
+
"step": 1847
|
12939 |
+
},
|
12940 |
+
{
|
12941 |
+
"epoch": 0.16010743139335917,
|
12942 |
+
"grad_norm": 2.069488763809204,
|
12943 |
+
"learning_rate": 0.0001680852908035018,
|
12944 |
+
"loss": 0.7012,
|
12945 |
+
"step": 1848
|
12946 |
+
},
|
12947 |
+
{
|
12948 |
+
"epoch": 0.16019406961381014,
|
12949 |
+
"grad_norm": 1.8301706314086914,
|
12950 |
+
"learning_rate": 0.00016806795527433478,
|
12951 |
+
"loss": 0.7514,
|
12952 |
+
"step": 1849
|
12953 |
+
},
|
12954 |
+
{
|
12955 |
+
"epoch": 0.16028070783426107,
|
12956 |
+
"grad_norm": 3.1384923458099365,
|
12957 |
+
"learning_rate": 0.00016805061974516775,
|
12958 |
+
"loss": 1.1052,
|
12959 |
+
"step": 1850
|
12960 |
+
},
|
12961 |
+
{
|
12962 |
+
"epoch": 0.16036734605471203,
|
12963 |
+
"grad_norm": 1.8604960441589355,
|
12964 |
+
"learning_rate": 0.00016803328421600072,
|
12965 |
+
"loss": 0.7039,
|
12966 |
+
"step": 1851
|
12967 |
+
},
|
12968 |
+
{
|
12969 |
+
"epoch": 0.160453984275163,
|
12970 |
+
"grad_norm": 2.5062098503112793,
|
12971 |
+
"learning_rate": 0.0001680159486868337,
|
12972 |
+
"loss": 0.684,
|
12973 |
+
"step": 1852
|
12974 |
+
},
|
12975 |
+
{
|
12976 |
+
"epoch": 0.16054062249561393,
|
12977 |
+
"grad_norm": 4.232210636138916,
|
12978 |
+
"learning_rate": 0.00016799861315766666,
|
12979 |
+
"loss": 1.2851,
|
12980 |
+
"step": 1853
|
12981 |
+
},
|
12982 |
+
{
|
12983 |
+
"epoch": 0.1606272607160649,
|
12984 |
+
"grad_norm": 1.7405468225479126,
|
12985 |
+
"learning_rate": 0.00016798127762849963,
|
12986 |
+
"loss": 0.6392,
|
12987 |
+
"step": 1854
|
12988 |
+
},
|
12989 |
+
{
|
12990 |
+
"epoch": 0.16071389893651583,
|
12991 |
+
"grad_norm": 2.894181251525879,
|
12992 |
+
"learning_rate": 0.0001679639420993326,
|
12993 |
+
"loss": 1.0375,
|
12994 |
+
"step": 1855
|
12995 |
+
},
|
12996 |
+
{
|
12997 |
+
"epoch": 0.1608005371569668,
|
12998 |
+
"grad_norm": 3.2232768535614014,
|
12999 |
+
"learning_rate": 0.00016794660657016558,
|
13000 |
+
"loss": 1.4509,
|
13001 |
+
"step": 1856
|
13002 |
+
},
|
13003 |
+
{
|
13004 |
+
"epoch": 0.16088717537741776,
|
13005 |
+
"grad_norm": 1.8261209726333618,
|
13006 |
+
"learning_rate": 0.00016792927104099855,
|
13007 |
+
"loss": 0.7366,
|
13008 |
+
"step": 1857
|
13009 |
+
},
|
13010 |
+
{
|
13011 |
+
"epoch": 0.1609738135978687,
|
13012 |
+
"grad_norm": 1.3260220289230347,
|
13013 |
+
"learning_rate": 0.00016791193551183152,
|
13014 |
+
"loss": 0.564,
|
13015 |
+
"step": 1858
|
13016 |
+
},
|
13017 |
+
{
|
13018 |
+
"epoch": 0.16106045181831966,
|
13019 |
+
"grad_norm": 2.1255099773406982,
|
13020 |
+
"learning_rate": 0.0001678945999826645,
|
13021 |
+
"loss": 0.688,
|
13022 |
+
"step": 1859
|
13023 |
+
},
|
13024 |
+
{
|
13025 |
+
"epoch": 0.1611470900387706,
|
13026 |
+
"grad_norm": 1.9587273597717285,
|
13027 |
+
"learning_rate": 0.00016787726445349746,
|
13028 |
+
"loss": 0.6464,
|
13029 |
+
"step": 1860
|
13030 |
+
},
|
13031 |
+
{
|
13032 |
+
"epoch": 0.16123372825922155,
|
13033 |
+
"grad_norm": 3.3253912925720215,
|
13034 |
+
"learning_rate": 0.00016785992892433043,
|
13035 |
+
"loss": 1.2779,
|
13036 |
+
"step": 1861
|
13037 |
+
},
|
13038 |
+
{
|
13039 |
+
"epoch": 0.16132036647967252,
|
13040 |
+
"grad_norm": 2.8991646766662598,
|
13041 |
+
"learning_rate": 0.0001678425933951634,
|
13042 |
+
"loss": 1.0444,
|
13043 |
+
"step": 1862
|
13044 |
+
},
|
13045 |
+
{
|
13046 |
+
"epoch": 0.16140700470012345,
|
13047 |
+
"grad_norm": 1.9616948366165161,
|
13048 |
+
"learning_rate": 0.00016782525786599638,
|
13049 |
+
"loss": 0.6819,
|
13050 |
+
"step": 1863
|
13051 |
+
},
|
13052 |
+
{
|
13053 |
+
"epoch": 0.16149364292057442,
|
13054 |
+
"grad_norm": 1.6658324003219604,
|
13055 |
+
"learning_rate": 0.00016780792233682935,
|
13056 |
+
"loss": 0.6422,
|
13057 |
+
"step": 1864
|
13058 |
+
},
|
13059 |
+
{
|
13060 |
+
"epoch": 0.16158028114102535,
|
13061 |
+
"grad_norm": 1.3089711666107178,
|
13062 |
+
"learning_rate": 0.00016779058680766232,
|
13063 |
+
"loss": 0.5879,
|
13064 |
+
"step": 1865
|
13065 |
+
},
|
13066 |
+
{
|
13067 |
+
"epoch": 0.1616669193614763,
|
13068 |
+
"grad_norm": 2.3709726333618164,
|
13069 |
+
"learning_rate": 0.0001677732512784953,
|
13070 |
+
"loss": 0.767,
|
13071 |
+
"step": 1866
|
13072 |
+
},
|
13073 |
+
{
|
13074 |
+
"epoch": 0.16175355758192728,
|
13075 |
+
"grad_norm": 2.0764918327331543,
|
13076 |
+
"learning_rate": 0.00016775591574932826,
|
13077 |
+
"loss": 0.7081,
|
13078 |
+
"step": 1867
|
13079 |
+
},
|
13080 |
+
{
|
13081 |
+
"epoch": 0.1618401958023782,
|
13082 |
+
"grad_norm": 3.3698534965515137,
|
13083 |
+
"learning_rate": 0.00016773858022016124,
|
13084 |
+
"loss": 1.2156,
|
13085 |
+
"step": 1868
|
13086 |
+
},
|
13087 |
+
{
|
13088 |
+
"epoch": 0.16192683402282917,
|
13089 |
+
"grad_norm": 3.6001696586608887,
|
13090 |
+
"learning_rate": 0.0001677212446909942,
|
13091 |
+
"loss": 1.1197,
|
13092 |
+
"step": 1869
|
13093 |
+
},
|
13094 |
+
{
|
13095 |
+
"epoch": 0.1620134722432801,
|
13096 |
+
"grad_norm": 2.576395034790039,
|
13097 |
+
"learning_rate": 0.00016770390916182718,
|
13098 |
+
"loss": 0.6482,
|
13099 |
+
"step": 1870
|
13100 |
+
},
|
13101 |
+
{
|
13102 |
+
"epoch": 0.16210011046373107,
|
13103 |
+
"grad_norm": 3.471985340118408,
|
13104 |
+
"learning_rate": 0.00016768657363266015,
|
13105 |
+
"loss": 1.284,
|
13106 |
+
"step": 1871
|
13107 |
+
},
|
13108 |
+
{
|
13109 |
+
"epoch": 0.16218674868418204,
|
13110 |
+
"grad_norm": 1.9664011001586914,
|
13111 |
+
"learning_rate": 0.00016766923810349312,
|
13112 |
+
"loss": 0.6408,
|
13113 |
+
"step": 1872
|
13114 |
+
},
|
13115 |
+
{
|
13116 |
+
"epoch": 0.16227338690463297,
|
13117 |
+
"grad_norm": 2.1498782634735107,
|
13118 |
+
"learning_rate": 0.00016765190257432607,
|
13119 |
+
"loss": 0.7813,
|
13120 |
+
"step": 1873
|
13121 |
+
},
|
13122 |
+
{
|
13123 |
+
"epoch": 0.16236002512508393,
|
13124 |
+
"grad_norm": 2.6741929054260254,
|
13125 |
+
"learning_rate": 0.00016763456704515904,
|
13126 |
+
"loss": 0.8467,
|
13127 |
+
"step": 1874
|
13128 |
+
},
|
13129 |
+
{
|
13130 |
+
"epoch": 0.16244666334553487,
|
13131 |
+
"grad_norm": 1.8352891206741333,
|
13132 |
+
"learning_rate": 0.000167617231515992,
|
13133 |
+
"loss": 0.7746,
|
13134 |
+
"step": 1875
|
13135 |
+
},
|
13136 |
+
{
|
13137 |
+
"epoch": 0.16253330156598583,
|
13138 |
+
"grad_norm": 3.2602646350860596,
|
13139 |
+
"learning_rate": 0.00016759989598682498,
|
13140 |
+
"loss": 0.8091,
|
13141 |
+
"step": 1876
|
13142 |
+
},
|
13143 |
+
{
|
13144 |
+
"epoch": 0.1626199397864368,
|
13145 |
+
"grad_norm": 2.223757266998291,
|
13146 |
+
"learning_rate": 0.00016758256045765798,
|
13147 |
+
"loss": 0.8002,
|
13148 |
+
"step": 1877
|
13149 |
+
},
|
13150 |
+
{
|
13151 |
+
"epoch": 0.16270657800688773,
|
13152 |
+
"grad_norm": 2.9596664905548096,
|
13153 |
+
"learning_rate": 0.00016756522492849095,
|
13154 |
+
"loss": 1.1954,
|
13155 |
+
"step": 1878
|
13156 |
+
},
|
13157 |
+
{
|
13158 |
+
"epoch": 0.1627932162273387,
|
13159 |
+
"grad_norm": 3.6357240676879883,
|
13160 |
+
"learning_rate": 0.00016754788939932392,
|
13161 |
+
"loss": 1.2972,
|
13162 |
+
"step": 1879
|
13163 |
+
},
|
13164 |
+
{
|
13165 |
+
"epoch": 0.16287985444778963,
|
13166 |
+
"grad_norm": 2.1043758392333984,
|
13167 |
+
"learning_rate": 0.0001675305538701569,
|
13168 |
+
"loss": 0.8508,
|
13169 |
+
"step": 1880
|
13170 |
+
},
|
13171 |
+
{
|
13172 |
+
"epoch": 0.1629664926682406,
|
13173 |
+
"grad_norm": 4.1881842613220215,
|
13174 |
+
"learning_rate": 0.00016751321834098987,
|
13175 |
+
"loss": 1.1804,
|
13176 |
+
"step": 1881
|
13177 |
+
},
|
13178 |
+
{
|
13179 |
+
"epoch": 0.16305313088869156,
|
13180 |
+
"grad_norm": 3.396552801132202,
|
13181 |
+
"learning_rate": 0.00016749588281182284,
|
13182 |
+
"loss": 1.2794,
|
13183 |
+
"step": 1882
|
13184 |
+
},
|
13185 |
+
{
|
13186 |
+
"epoch": 0.1631397691091425,
|
13187 |
+
"grad_norm": 1.4036118984222412,
|
13188 |
+
"learning_rate": 0.0001674785472826558,
|
13189 |
+
"loss": 0.6308,
|
13190 |
+
"step": 1883
|
13191 |
+
},
|
13192 |
+
{
|
13193 |
+
"epoch": 0.16322640732959345,
|
13194 |
+
"grad_norm": 1.6631494760513306,
|
13195 |
+
"learning_rate": 0.00016746121175348878,
|
13196 |
+
"loss": 0.6111,
|
13197 |
+
"step": 1884
|
13198 |
+
},
|
13199 |
+
{
|
13200 |
+
"epoch": 0.1633130455500444,
|
13201 |
+
"grad_norm": 3.6630938053131104,
|
13202 |
+
"learning_rate": 0.00016744387622432175,
|
13203 |
+
"loss": 1.1099,
|
13204 |
+
"step": 1885
|
13205 |
+
},
|
13206 |
+
{
|
13207 |
+
"epoch": 0.16339968377049535,
|
13208 |
+
"grad_norm": 4.1862874031066895,
|
13209 |
+
"learning_rate": 0.00016742654069515473,
|
13210 |
+
"loss": 1.4206,
|
13211 |
+
"step": 1886
|
13212 |
+
},
|
13213 |
+
{
|
13214 |
+
"epoch": 0.16348632199094632,
|
13215 |
+
"grad_norm": 2.0839083194732666,
|
13216 |
+
"learning_rate": 0.0001674092051659877,
|
13217 |
+
"loss": 0.7446,
|
13218 |
+
"step": 1887
|
13219 |
+
},
|
13220 |
+
{
|
13221 |
+
"epoch": 0.16357296021139725,
|
13222 |
+
"grad_norm": 3.600731134414673,
|
13223 |
+
"learning_rate": 0.00016739186963682067,
|
13224 |
+
"loss": 1.4344,
|
13225 |
+
"step": 1888
|
13226 |
+
},
|
13227 |
+
{
|
13228 |
+
"epoch": 0.1636595984318482,
|
13229 |
+
"grad_norm": 2.0269649028778076,
|
13230 |
+
"learning_rate": 0.00016737453410765364,
|
13231 |
+
"loss": 0.6072,
|
13232 |
+
"step": 1889
|
13233 |
+
},
|
13234 |
+
{
|
13235 |
+
"epoch": 0.16374623665229915,
|
13236 |
+
"grad_norm": 2.5664050579071045,
|
13237 |
+
"learning_rate": 0.0001673571985784866,
|
13238 |
+
"loss": 1.0184,
|
13239 |
+
"step": 1890
|
13240 |
+
},
|
13241 |
+
{
|
13242 |
+
"epoch": 0.1638328748727501,
|
13243 |
+
"grad_norm": 2.2857000827789307,
|
13244 |
+
"learning_rate": 0.00016733986304931958,
|
13245 |
+
"loss": 0.6962,
|
13246 |
+
"step": 1891
|
13247 |
+
},
|
13248 |
+
{
|
13249 |
+
"epoch": 0.16391951309320107,
|
13250 |
+
"grad_norm": 3.5362954139709473,
|
13251 |
+
"learning_rate": 0.00016732252752015255,
|
13252 |
+
"loss": 1.0255,
|
13253 |
+
"step": 1892
|
13254 |
+
},
|
13255 |
+
{
|
13256 |
+
"epoch": 0.164006151313652,
|
13257 |
+
"grad_norm": 2.645073413848877,
|
13258 |
+
"learning_rate": 0.00016730519199098553,
|
13259 |
+
"loss": 1.5127,
|
13260 |
+
"step": 1893
|
13261 |
+
},
|
13262 |
+
{
|
13263 |
+
"epoch": 0.16409278953410297,
|
13264 |
+
"grad_norm": 1.9250407218933105,
|
13265 |
+
"learning_rate": 0.0001672878564618185,
|
13266 |
+
"loss": 0.7112,
|
13267 |
+
"step": 1894
|
13268 |
+
},
|
13269 |
+
{
|
13270 |
+
"epoch": 0.1641794277545539,
|
13271 |
+
"grad_norm": 2.780883312225342,
|
13272 |
+
"learning_rate": 0.00016727052093265147,
|
13273 |
+
"loss": 1.2273,
|
13274 |
+
"step": 1895
|
13275 |
+
},
|
13276 |
+
{
|
13277 |
+
"epoch": 0.16426606597500487,
|
13278 |
+
"grad_norm": 2.1424574851989746,
|
13279 |
+
"learning_rate": 0.00016725318540348444,
|
13280 |
+
"loss": 0.6137,
|
13281 |
+
"step": 1896
|
13282 |
+
},
|
13283 |
+
{
|
13284 |
+
"epoch": 0.16435270419545583,
|
13285 |
+
"grad_norm": 2.8967080116271973,
|
13286 |
+
"learning_rate": 0.0001672358498743174,
|
13287 |
+
"loss": 1.3952,
|
13288 |
+
"step": 1897
|
13289 |
+
},
|
13290 |
+
{
|
13291 |
+
"epoch": 0.16443934241590677,
|
13292 |
+
"grad_norm": 3.9579975605010986,
|
13293 |
+
"learning_rate": 0.00016721851434515038,
|
13294 |
+
"loss": 1.4253,
|
13295 |
+
"step": 1898
|
13296 |
+
},
|
13297 |
+
{
|
13298 |
+
"epoch": 0.16452598063635773,
|
13299 |
+
"grad_norm": 2.759655714035034,
|
13300 |
+
"learning_rate": 0.00016720117881598336,
|
13301 |
+
"loss": 0.7778,
|
13302 |
+
"step": 1899
|
13303 |
+
},
|
13304 |
+
{
|
13305 |
+
"epoch": 0.16461261885680867,
|
13306 |
+
"grad_norm": 1.7057126760482788,
|
13307 |
+
"learning_rate": 0.00016718384328681633,
|
13308 |
+
"loss": 0.6089,
|
13309 |
+
"step": 1900
|
13310 |
+
},
|
13311 |
+
{
|
13312 |
+
"epoch": 0.16469925707725963,
|
13313 |
+
"grad_norm": 2.1132805347442627,
|
13314 |
+
"learning_rate": 0.0001671665077576493,
|
13315 |
+
"loss": 0.8122,
|
13316 |
+
"step": 1901
|
13317 |
+
},
|
13318 |
+
{
|
13319 |
+
"epoch": 0.1647858952977106,
|
13320 |
+
"grad_norm": 2.0771095752716064,
|
13321 |
+
"learning_rate": 0.00016714917222848227,
|
13322 |
+
"loss": 0.6464,
|
13323 |
+
"step": 1902
|
13324 |
+
},
|
13325 |
+
{
|
13326 |
+
"epoch": 0.16487253351816153,
|
13327 |
+
"grad_norm": 2.250375270843506,
|
13328 |
+
"learning_rate": 0.00016713183669931524,
|
13329 |
+
"loss": 1.3111,
|
13330 |
+
"step": 1903
|
13331 |
+
},
|
13332 |
+
{
|
13333 |
+
"epoch": 0.1649591717386125,
|
13334 |
+
"grad_norm": 3.50311279296875,
|
13335 |
+
"learning_rate": 0.00016711450117014821,
|
13336 |
+
"loss": 1.5869,
|
13337 |
+
"step": 1904
|
13338 |
+
},
|
13339 |
+
{
|
13340 |
+
"epoch": 0.16504580995906343,
|
13341 |
+
"grad_norm": 2.48093581199646,
|
13342 |
+
"learning_rate": 0.00016709716564098119,
|
13343 |
+
"loss": 0.7445,
|
13344 |
+
"step": 1905
|
13345 |
+
},
|
13346 |
+
{
|
13347 |
+
"epoch": 0.1651324481795144,
|
13348 |
+
"grad_norm": 2.7426469326019287,
|
13349 |
+
"learning_rate": 0.00016707983011181416,
|
13350 |
+
"loss": 1.3606,
|
13351 |
+
"step": 1906
|
13352 |
+
},
|
13353 |
+
{
|
13354 |
+
"epoch": 0.16521908639996535,
|
13355 |
+
"grad_norm": 2.7575409412384033,
|
13356 |
+
"learning_rate": 0.00016706249458264713,
|
13357 |
+
"loss": 1.8804,
|
13358 |
+
"step": 1907
|
13359 |
+
},
|
13360 |
+
{
|
13361 |
+
"epoch": 0.1653057246204163,
|
13362 |
+
"grad_norm": 2.152456521987915,
|
13363 |
+
"learning_rate": 0.0001670451590534801,
|
13364 |
+
"loss": 0.7494,
|
13365 |
+
"step": 1908
|
13366 |
+
},
|
13367 |
+
{
|
13368 |
+
"epoch": 0.16539236284086725,
|
13369 |
+
"grad_norm": 3.2211177349090576,
|
13370 |
+
"learning_rate": 0.0001670278235243131,
|
13371 |
+
"loss": 1.1857,
|
13372 |
+
"step": 1909
|
13373 |
+
},
|
13374 |
+
{
|
13375 |
+
"epoch": 0.1654790010613182,
|
13376 |
+
"grad_norm": 2.4371657371520996,
|
13377 |
+
"learning_rate": 0.00016701048799514607,
|
13378 |
+
"loss": 0.8392,
|
13379 |
+
"step": 1910
|
13380 |
+
},
|
13381 |
+
{
|
13382 |
+
"epoch": 0.16556563928176915,
|
13383 |
+
"grad_norm": 2.109482526779175,
|
13384 |
+
"learning_rate": 0.00016699315246597904,
|
13385 |
+
"loss": 0.7437,
|
13386 |
+
"step": 1911
|
13387 |
+
},
|
13388 |
+
{
|
13389 |
+
"epoch": 0.1656522775022201,
|
13390 |
+
"grad_norm": 2.00662899017334,
|
13391 |
+
"learning_rate": 0.00016697581693681201,
|
13392 |
+
"loss": 0.6836,
|
13393 |
+
"step": 1912
|
13394 |
+
},
|
13395 |
+
{
|
13396 |
+
"epoch": 0.16573891572267105,
|
13397 |
+
"grad_norm": 1.6882870197296143,
|
13398 |
+
"learning_rate": 0.00016695848140764499,
|
13399 |
+
"loss": 0.6289,
|
13400 |
+
"step": 1913
|
13401 |
+
},
|
13402 |
+
{
|
13403 |
+
"epoch": 0.165825553943122,
|
13404 |
+
"grad_norm": 1.8132233619689941,
|
13405 |
+
"learning_rate": 0.00016694114587847796,
|
13406 |
+
"loss": 0.6837,
|
13407 |
+
"step": 1914
|
13408 |
+
},
|
13409 |
+
{
|
13410 |
+
"epoch": 0.16591219216357295,
|
13411 |
+
"grad_norm": 2.0409324169158936,
|
13412 |
+
"learning_rate": 0.00016692381034931093,
|
13413 |
+
"loss": 0.6665,
|
13414 |
+
"step": 1915
|
13415 |
+
},
|
13416 |
+
{
|
13417 |
+
"epoch": 0.1659988303840239,
|
13418 |
+
"grad_norm": 2.9584858417510986,
|
13419 |
+
"learning_rate": 0.0001669064748201439,
|
13420 |
+
"loss": 1.0933,
|
13421 |
+
"step": 1916
|
13422 |
+
},
|
13423 |
+
{
|
13424 |
+
"epoch": 0.16608546860447487,
|
13425 |
+
"grad_norm": 2.232454299926758,
|
13426 |
+
"learning_rate": 0.00016688913929097687,
|
13427 |
+
"loss": 0.9112,
|
13428 |
+
"step": 1917
|
13429 |
+
},
|
13430 |
+
{
|
13431 |
+
"epoch": 0.1661721068249258,
|
13432 |
+
"grad_norm": 1.7857698202133179,
|
13433 |
+
"learning_rate": 0.00016687180376180984,
|
13434 |
+
"loss": 0.6247,
|
13435 |
+
"step": 1918
|
13436 |
+
},
|
13437 |
+
{
|
13438 |
+
"epoch": 0.16625874504537677,
|
13439 |
+
"grad_norm": 1.5558698177337646,
|
13440 |
+
"learning_rate": 0.00016685446823264282,
|
13441 |
+
"loss": 0.5683,
|
13442 |
+
"step": 1919
|
13443 |
+
},
|
13444 |
+
{
|
13445 |
+
"epoch": 0.1663453832658277,
|
13446 |
+
"grad_norm": 1.949049472808838,
|
13447 |
+
"learning_rate": 0.0001668371327034758,
|
13448 |
+
"loss": 0.6647,
|
13449 |
+
"step": 1920
|
13450 |
+
},
|
13451 |
+
{
|
13452 |
+
"epoch": 0.16643202148627867,
|
13453 |
+
"grad_norm": 3.2460975646972656,
|
13454 |
+
"learning_rate": 0.00016681979717430876,
|
13455 |
+
"loss": 1.1478,
|
13456 |
+
"step": 1921
|
13457 |
+
},
|
13458 |
+
{
|
13459 |
+
"epoch": 0.16651865970672963,
|
13460 |
+
"grad_norm": 3.3453333377838135,
|
13461 |
+
"learning_rate": 0.00016680246164514173,
|
13462 |
+
"loss": 1.2167,
|
13463 |
+
"step": 1922
|
13464 |
+
},
|
13465 |
+
{
|
13466 |
+
"epoch": 0.16660529792718057,
|
13467 |
+
"grad_norm": 2.664114475250244,
|
13468 |
+
"learning_rate": 0.0001667851261159747,
|
13469 |
+
"loss": 1.4783,
|
13470 |
+
"step": 1923
|
13471 |
+
},
|
13472 |
+
{
|
13473 |
+
"epoch": 0.16669193614763153,
|
13474 |
+
"grad_norm": 3.62066388130188,
|
13475 |
+
"learning_rate": 0.00016676779058680767,
|
13476 |
+
"loss": 1.0352,
|
13477 |
+
"step": 1924
|
13478 |
+
},
|
13479 |
+
{
|
13480 |
+
"epoch": 0.16677857436808247,
|
13481 |
+
"grad_norm": 1.897339105606079,
|
13482 |
+
"learning_rate": 0.00016675045505764065,
|
13483 |
+
"loss": 0.6765,
|
13484 |
+
"step": 1925
|
13485 |
+
},
|
13486 |
+
{
|
13487 |
+
"epoch": 0.16686521258853343,
|
13488 |
+
"grad_norm": 2.7117483615875244,
|
13489 |
+
"learning_rate": 0.00016673311952847362,
|
13490 |
+
"loss": 0.8275,
|
13491 |
+
"step": 1926
|
13492 |
+
},
|
13493 |
+
{
|
13494 |
+
"epoch": 0.1669518508089844,
|
13495 |
+
"grad_norm": 2.2300453186035156,
|
13496 |
+
"learning_rate": 0.0001667157839993066,
|
13497 |
+
"loss": 0.7969,
|
13498 |
+
"step": 1927
|
13499 |
+
},
|
13500 |
+
{
|
13501 |
+
"epoch": 0.16703848902943533,
|
13502 |
+
"grad_norm": 2.0223610401153564,
|
13503 |
+
"learning_rate": 0.00016669844847013956,
|
13504 |
+
"loss": 0.6976,
|
13505 |
+
"step": 1928
|
13506 |
+
},
|
13507 |
+
{
|
13508 |
+
"epoch": 0.1671251272498863,
|
13509 |
+
"grad_norm": 2.88665509223938,
|
13510 |
+
"learning_rate": 0.00016668111294097253,
|
13511 |
+
"loss": 1.5507,
|
13512 |
+
"step": 1929
|
13513 |
+
},
|
13514 |
+
{
|
13515 |
+
"epoch": 0.16721176547033723,
|
13516 |
+
"grad_norm": 1.8440682888031006,
|
13517 |
+
"learning_rate": 0.0001666637774118055,
|
13518 |
+
"loss": 0.7916,
|
13519 |
+
"step": 1930
|
13520 |
+
},
|
13521 |
+
{
|
13522 |
+
"epoch": 0.1672984036907882,
|
13523 |
+
"grad_norm": 1.956483006477356,
|
13524 |
+
"learning_rate": 0.00016664644188263847,
|
13525 |
+
"loss": 0.6654,
|
13526 |
+
"step": 1931
|
13527 |
+
},
|
13528 |
+
{
|
13529 |
+
"epoch": 0.16738504191123915,
|
13530 |
+
"grad_norm": 2.1320438385009766,
|
13531 |
+
"learning_rate": 0.00016662910635347145,
|
13532 |
+
"loss": 0.6513,
|
13533 |
+
"step": 1932
|
13534 |
+
},
|
13535 |
+
{
|
13536 |
+
"epoch": 0.1674716801316901,
|
13537 |
+
"grad_norm": 3.3617520332336426,
|
13538 |
+
"learning_rate": 0.00016661177082430442,
|
13539 |
+
"loss": 1.6435,
|
13540 |
+
"step": 1933
|
13541 |
+
},
|
13542 |
+
{
|
13543 |
+
"epoch": 0.16755831835214105,
|
13544 |
+
"grad_norm": 2.2873220443725586,
|
13545 |
+
"learning_rate": 0.0001665944352951374,
|
13546 |
+
"loss": 0.6769,
|
13547 |
+
"step": 1934
|
13548 |
+
},
|
13549 |
+
{
|
13550 |
+
"epoch": 0.16764495657259199,
|
13551 |
+
"grad_norm": 1.912039875984192,
|
13552 |
+
"learning_rate": 0.00016657709976597036,
|
13553 |
+
"loss": 0.6901,
|
13554 |
+
"step": 1935
|
13555 |
+
},
|
13556 |
+
{
|
13557 |
+
"epoch": 0.16773159479304295,
|
13558 |
+
"grad_norm": 1.831507921218872,
|
13559 |
+
"learning_rate": 0.00016655976423680333,
|
13560 |
+
"loss": 0.5683,
|
13561 |
+
"step": 1936
|
13562 |
+
},
|
13563 |
+
{
|
13564 |
+
"epoch": 0.1678182330134939,
|
13565 |
+
"grad_norm": 2.721996545791626,
|
13566 |
+
"learning_rate": 0.0001665424287076363,
|
13567 |
+
"loss": 1.3806,
|
13568 |
+
"step": 1937
|
13569 |
+
},
|
13570 |
+
{
|
13571 |
+
"epoch": 0.16790487123394485,
|
13572 |
+
"grad_norm": 1.8874839544296265,
|
13573 |
+
"learning_rate": 0.00016652509317846928,
|
13574 |
+
"loss": 0.6857,
|
13575 |
+
"step": 1938
|
13576 |
+
},
|
13577 |
+
{
|
13578 |
+
"epoch": 0.1679915094543958,
|
13579 |
+
"grad_norm": 2.900580883026123,
|
13580 |
+
"learning_rate": 0.00016650775764930225,
|
13581 |
+
"loss": 1.3001,
|
13582 |
+
"step": 1939
|
13583 |
+
},
|
13584 |
+
{
|
13585 |
+
"epoch": 0.16807814767484675,
|
13586 |
+
"grad_norm": 3.024061679840088,
|
13587 |
+
"learning_rate": 0.00016649042212013522,
|
13588 |
+
"loss": 1.2226,
|
13589 |
+
"step": 1940
|
13590 |
+
},
|
13591 |
+
{
|
13592 |
+
"epoch": 0.1681647858952977,
|
13593 |
+
"grad_norm": 2.2423317432403564,
|
13594 |
+
"learning_rate": 0.00016647308659096822,
|
13595 |
+
"loss": 0.7904,
|
13596 |
+
"step": 1941
|
13597 |
+
},
|
13598 |
+
{
|
13599 |
+
"epoch": 0.16825142411574867,
|
13600 |
+
"grad_norm": 2.0588197708129883,
|
13601 |
+
"learning_rate": 0.0001664557510618012,
|
13602 |
+
"loss": 0.6943,
|
13603 |
+
"step": 1942
|
13604 |
+
},
|
13605 |
+
{
|
13606 |
+
"epoch": 0.1683380623361996,
|
13607 |
+
"grad_norm": 3.367280960083008,
|
13608 |
+
"learning_rate": 0.00016643841553263416,
|
13609 |
+
"loss": 1.3288,
|
13610 |
+
"step": 1943
|
13611 |
+
},
|
13612 |
+
{
|
13613 |
+
"epoch": 0.16842470055665057,
|
13614 |
+
"grad_norm": 2.6702747344970703,
|
13615 |
+
"learning_rate": 0.00016642108000346713,
|
13616 |
+
"loss": 0.8267,
|
13617 |
+
"step": 1944
|
13618 |
+
},
|
13619 |
+
{
|
13620 |
+
"epoch": 0.1685113387771015,
|
13621 |
+
"grad_norm": 2.179940938949585,
|
13622 |
+
"learning_rate": 0.0001664037444743001,
|
13623 |
+
"loss": 0.8863,
|
13624 |
+
"step": 1945
|
13625 |
+
},
|
13626 |
+
{
|
13627 |
+
"epoch": 0.16859797699755247,
|
13628 |
+
"grad_norm": 1.2475664615631104,
|
13629 |
+
"learning_rate": 0.00016638640894513308,
|
13630 |
+
"loss": 0.4869,
|
13631 |
+
"step": 1946
|
13632 |
+
},
|
13633 |
+
{
|
13634 |
+
"epoch": 0.16868461521800343,
|
13635 |
+
"grad_norm": 2.4083683490753174,
|
13636 |
+
"learning_rate": 0.00016636907341596605,
|
13637 |
+
"loss": 0.8179,
|
13638 |
+
"step": 1947
|
13639 |
+
},
|
13640 |
+
{
|
13641 |
+
"epoch": 0.16877125343845437,
|
13642 |
+
"grad_norm": 2.456315517425537,
|
13643 |
+
"learning_rate": 0.00016635173788679902,
|
13644 |
+
"loss": 0.763,
|
13645 |
+
"step": 1948
|
13646 |
+
},
|
13647 |
+
{
|
13648 |
+
"epoch": 0.16885789165890533,
|
13649 |
+
"grad_norm": 1.5172966718673706,
|
13650 |
+
"learning_rate": 0.000166334402357632,
|
13651 |
+
"loss": 0.7327,
|
13652 |
+
"step": 1949
|
13653 |
+
},
|
13654 |
+
{
|
13655 |
+
"epoch": 0.16894452987935626,
|
13656 |
+
"grad_norm": 1.639425277709961,
|
13657 |
+
"learning_rate": 0.00016631706682846496,
|
13658 |
+
"loss": 0.6647,
|
13659 |
+
"step": 1950
|
13660 |
+
},
|
13661 |
+
{
|
13662 |
+
"epoch": 0.16903116809980723,
|
13663 |
+
"grad_norm": 2.421419620513916,
|
13664 |
+
"learning_rate": 0.00016629973129929793,
|
13665 |
+
"loss": 0.8074,
|
13666 |
+
"step": 1951
|
13667 |
+
},
|
13668 |
+
{
|
13669 |
+
"epoch": 0.1691178063202582,
|
13670 |
+
"grad_norm": 2.0199437141418457,
|
13671 |
+
"learning_rate": 0.0001662823957701309,
|
13672 |
+
"loss": 0.8627,
|
13673 |
+
"step": 1952
|
13674 |
+
},
|
13675 |
+
{
|
13676 |
+
"epoch": 0.16920444454070913,
|
13677 |
+
"grad_norm": 3.442995071411133,
|
13678 |
+
"learning_rate": 0.00016626506024096388,
|
13679 |
+
"loss": 1.1713,
|
13680 |
+
"step": 1953
|
13681 |
+
},
|
13682 |
+
{
|
13683 |
+
"epoch": 0.1692910827611601,
|
13684 |
+
"grad_norm": 2.858927011489868,
|
13685 |
+
"learning_rate": 0.00016624772471179685,
|
13686 |
+
"loss": 0.7123,
|
13687 |
+
"step": 1954
|
13688 |
+
},
|
13689 |
+
{
|
13690 |
+
"epoch": 0.16937772098161102,
|
13691 |
+
"grad_norm": 2.7540299892425537,
|
13692 |
+
"learning_rate": 0.00016623038918262982,
|
13693 |
+
"loss": 0.8941,
|
13694 |
+
"step": 1955
|
13695 |
+
},
|
13696 |
+
{
|
13697 |
+
"epoch": 0.169464359202062,
|
13698 |
+
"grad_norm": 2.427382469177246,
|
13699 |
+
"learning_rate": 0.0001662130536534628,
|
13700 |
+
"loss": 0.8401,
|
13701 |
+
"step": 1956
|
13702 |
+
},
|
13703 |
+
{
|
13704 |
+
"epoch": 0.16955099742251295,
|
13705 |
+
"grad_norm": 2.4466335773468018,
|
13706 |
+
"learning_rate": 0.00016619571812429576,
|
13707 |
+
"loss": 0.8022,
|
13708 |
+
"step": 1957
|
13709 |
+
},
|
13710 |
+
{
|
13711 |
+
"epoch": 0.16963763564296389,
|
13712 |
+
"grad_norm": 1.835880994796753,
|
13713 |
+
"learning_rate": 0.00016617838259512874,
|
13714 |
+
"loss": 0.6049,
|
13715 |
+
"step": 1958
|
13716 |
+
},
|
13717 |
+
{
|
13718 |
+
"epoch": 0.16972427386341485,
|
13719 |
+
"grad_norm": 4.6314826011657715,
|
13720 |
+
"learning_rate": 0.0001661610470659617,
|
13721 |
+
"loss": 1.5288,
|
13722 |
+
"step": 1959
|
13723 |
+
},
|
13724 |
+
{
|
13725 |
+
"epoch": 0.16981091208386578,
|
13726 |
+
"grad_norm": 1.9439544677734375,
|
13727 |
+
"learning_rate": 0.00016614371153679468,
|
13728 |
+
"loss": 0.7296,
|
13729 |
+
"step": 1960
|
13730 |
+
},
|
13731 |
+
{
|
13732 |
+
"epoch": 0.16989755030431675,
|
13733 |
+
"grad_norm": 2.271732807159424,
|
13734 |
+
"learning_rate": 0.00016612637600762765,
|
13735 |
+
"loss": 0.7481,
|
13736 |
+
"step": 1961
|
13737 |
+
},
|
13738 |
+
{
|
13739 |
+
"epoch": 0.1699841885247677,
|
13740 |
+
"grad_norm": 2.5288052558898926,
|
13741 |
+
"learning_rate": 0.00016610904047846062,
|
13742 |
+
"loss": 0.6788,
|
13743 |
+
"step": 1962
|
13744 |
+
},
|
13745 |
+
{
|
13746 |
+
"epoch": 0.17007082674521865,
|
13747 |
+
"grad_norm": 3.5051944255828857,
|
13748 |
+
"learning_rate": 0.0001660917049492936,
|
13749 |
+
"loss": 1.0534,
|
13750 |
+
"step": 1963
|
13751 |
+
},
|
13752 |
+
{
|
13753 |
+
"epoch": 0.1701574649656696,
|
13754 |
+
"grad_norm": 3.456272602081299,
|
13755 |
+
"learning_rate": 0.00016607436942012657,
|
13756 |
+
"loss": 0.7357,
|
13757 |
+
"step": 1964
|
13758 |
+
},
|
13759 |
+
{
|
13760 |
+
"epoch": 0.17024410318612054,
|
13761 |
+
"grad_norm": 3.1805496215820312,
|
13762 |
+
"learning_rate": 0.00016605703389095954,
|
13763 |
+
"loss": 1.1865,
|
13764 |
+
"step": 1965
|
13765 |
+
},
|
13766 |
+
{
|
13767 |
+
"epoch": 0.1703307414065715,
|
13768 |
+
"grad_norm": 3.216212272644043,
|
13769 |
+
"learning_rate": 0.0001660396983617925,
|
13770 |
+
"loss": 1.2175,
|
13771 |
+
"step": 1966
|
13772 |
+
},
|
13773 |
+
{
|
13774 |
+
"epoch": 0.17041737962702247,
|
13775 |
+
"grad_norm": 1.612253189086914,
|
13776 |
+
"learning_rate": 0.00016602236283262548,
|
13777 |
+
"loss": 0.6032,
|
13778 |
+
"step": 1967
|
13779 |
+
},
|
13780 |
+
{
|
13781 |
+
"epoch": 0.1705040178474734,
|
13782 |
+
"grad_norm": 2.18098783493042,
|
13783 |
+
"learning_rate": 0.00016600502730345842,
|
13784 |
+
"loss": 0.6651,
|
13785 |
+
"step": 1968
|
13786 |
+
},
|
13787 |
+
{
|
13788 |
+
"epoch": 0.17059065606792437,
|
13789 |
+
"grad_norm": 2.0245211124420166,
|
13790 |
+
"learning_rate": 0.0001659876917742914,
|
13791 |
+
"loss": 0.7908,
|
13792 |
+
"step": 1969
|
13793 |
+
},
|
13794 |
+
{
|
13795 |
+
"epoch": 0.1706772942883753,
|
13796 |
+
"grad_norm": 3.282804250717163,
|
13797 |
+
"learning_rate": 0.00016597035624512437,
|
13798 |
+
"loss": 1.0258,
|
13799 |
+
"step": 1970
|
13800 |
+
},
|
13801 |
+
{
|
13802 |
+
"epoch": 0.17076393250882627,
|
13803 |
+
"grad_norm": 3.046577215194702,
|
13804 |
+
"learning_rate": 0.00016595302071595734,
|
13805 |
+
"loss": 1.4314,
|
13806 |
+
"step": 1971
|
13807 |
+
},
|
13808 |
+
{
|
13809 |
+
"epoch": 0.17085057072927723,
|
13810 |
+
"grad_norm": 3.9332213401794434,
|
13811 |
+
"learning_rate": 0.0001659356851867903,
|
13812 |
+
"loss": 1.0513,
|
13813 |
+
"step": 1972
|
13814 |
+
},
|
13815 |
+
{
|
13816 |
+
"epoch": 0.17093720894972816,
|
13817 |
+
"grad_norm": 2.1859307289123535,
|
13818 |
+
"learning_rate": 0.0001659183496576233,
|
13819 |
+
"loss": 0.8592,
|
13820 |
+
"step": 1973
|
13821 |
+
},
|
13822 |
+
{
|
13823 |
+
"epoch": 0.17102384717017913,
|
13824 |
+
"grad_norm": 2.1134324073791504,
|
13825 |
+
"learning_rate": 0.00016590101412845628,
|
13826 |
+
"loss": 0.7403,
|
13827 |
+
"step": 1974
|
13828 |
+
},
|
13829 |
+
{
|
13830 |
+
"epoch": 0.17111048539063006,
|
13831 |
+
"grad_norm": 1.9346601963043213,
|
13832 |
+
"learning_rate": 0.00016588367859928925,
|
13833 |
+
"loss": 0.8007,
|
13834 |
+
"step": 1975
|
13835 |
+
},
|
13836 |
+
{
|
13837 |
+
"epoch": 0.17119712361108103,
|
13838 |
+
"grad_norm": 3.205371379852295,
|
13839 |
+
"learning_rate": 0.00016586634307012222,
|
13840 |
+
"loss": 1.4424,
|
13841 |
+
"step": 1976
|
13842 |
+
},
|
13843 |
+
{
|
13844 |
+
"epoch": 0.171283761831532,
|
13845 |
+
"grad_norm": 1.9553583860397339,
|
13846 |
+
"learning_rate": 0.0001658490075409552,
|
13847 |
+
"loss": 0.7433,
|
13848 |
+
"step": 1977
|
13849 |
+
},
|
13850 |
+
{
|
13851 |
+
"epoch": 0.17137040005198292,
|
13852 |
+
"grad_norm": 3.8189244270324707,
|
13853 |
+
"learning_rate": 0.00016583167201178817,
|
13854 |
+
"loss": 1.4445,
|
13855 |
+
"step": 1978
|
13856 |
+
},
|
13857 |
+
{
|
13858 |
+
"epoch": 0.1714570382724339,
|
13859 |
+
"grad_norm": 1.8212469816207886,
|
13860 |
+
"learning_rate": 0.00016581433648262114,
|
13861 |
+
"loss": 0.6635,
|
13862 |
+
"step": 1979
|
13863 |
+
},
|
13864 |
+
{
|
13865 |
+
"epoch": 0.17154367649288482,
|
13866 |
+
"grad_norm": 3.460909366607666,
|
13867 |
+
"learning_rate": 0.0001657970009534541,
|
13868 |
+
"loss": 1.0919,
|
13869 |
+
"step": 1980
|
13870 |
+
},
|
13871 |
+
{
|
13872 |
+
"epoch": 0.17163031471333579,
|
13873 |
+
"grad_norm": 1.7859045267105103,
|
13874 |
+
"learning_rate": 0.00016577966542428708,
|
13875 |
+
"loss": 0.7707,
|
13876 |
+
"step": 1981
|
13877 |
+
},
|
13878 |
+
{
|
13879 |
+
"epoch": 0.17171695293378675,
|
13880 |
+
"grad_norm": 2.140434741973877,
|
13881 |
+
"learning_rate": 0.00016576232989512005,
|
13882 |
+
"loss": 0.7682,
|
13883 |
+
"step": 1982
|
13884 |
+
},
|
13885 |
+
{
|
13886 |
+
"epoch": 0.17180359115423768,
|
13887 |
+
"grad_norm": 3.248626708984375,
|
13888 |
+
"learning_rate": 0.00016574499436595303,
|
13889 |
+
"loss": 1.3366,
|
13890 |
+
"step": 1983
|
13891 |
+
},
|
13892 |
+
{
|
13893 |
+
"epoch": 0.17189022937468865,
|
13894 |
+
"grad_norm": 2.4410815238952637,
|
13895 |
+
"learning_rate": 0.000165727658836786,
|
13896 |
+
"loss": 0.7295,
|
13897 |
+
"step": 1984
|
13898 |
+
},
|
13899 |
+
{
|
13900 |
+
"epoch": 0.17197686759513958,
|
13901 |
+
"grad_norm": 2.5292134284973145,
|
13902 |
+
"learning_rate": 0.00016571032330761897,
|
13903 |
+
"loss": 0.8679,
|
13904 |
+
"step": 1985
|
13905 |
+
},
|
13906 |
+
{
|
13907 |
+
"epoch": 0.17206350581559055,
|
13908 |
+
"grad_norm": 1.506658911705017,
|
13909 |
+
"learning_rate": 0.00016569298777845194,
|
13910 |
+
"loss": 0.5845,
|
13911 |
+
"step": 1986
|
13912 |
+
},
|
13913 |
+
{
|
13914 |
+
"epoch": 0.1721501440360415,
|
13915 |
+
"grad_norm": 2.7821598052978516,
|
13916 |
+
"learning_rate": 0.0001656756522492849,
|
13917 |
+
"loss": 0.7041,
|
13918 |
+
"step": 1987
|
13919 |
+
},
|
13920 |
+
{
|
13921 |
+
"epoch": 0.17223678225649244,
|
13922 |
+
"grad_norm": 1.7464864253997803,
|
13923 |
+
"learning_rate": 0.00016565831672011788,
|
13924 |
+
"loss": 0.5727,
|
13925 |
+
"step": 1988
|
13926 |
+
},
|
13927 |
+
{
|
13928 |
+
"epoch": 0.1723234204769434,
|
13929 |
+
"grad_norm": 1.803768515586853,
|
13930 |
+
"learning_rate": 0.00016564098119095086,
|
13931 |
+
"loss": 0.6957,
|
13932 |
+
"step": 1989
|
13933 |
+
},
|
13934 |
+
{
|
13935 |
+
"epoch": 0.17241005869739434,
|
13936 |
+
"grad_norm": 2.0977275371551514,
|
13937 |
+
"learning_rate": 0.00016562364566178383,
|
13938 |
+
"loss": 0.6639,
|
13939 |
+
"step": 1990
|
13940 |
+
},
|
13941 |
+
{
|
13942 |
+
"epoch": 0.1724966969178453,
|
13943 |
+
"grad_norm": 3.541428565979004,
|
13944 |
+
"learning_rate": 0.0001656063101326168,
|
13945 |
+
"loss": 1.2943,
|
13946 |
+
"step": 1991
|
13947 |
+
},
|
13948 |
+
{
|
13949 |
+
"epoch": 0.17258333513829627,
|
13950 |
+
"grad_norm": 2.0628700256347656,
|
13951 |
+
"learning_rate": 0.00016558897460344977,
|
13952 |
+
"loss": 0.6475,
|
13953 |
+
"step": 1992
|
13954 |
+
},
|
13955 |
+
{
|
13956 |
+
"epoch": 0.1726699733587472,
|
13957 |
+
"grad_norm": 2.4118738174438477,
|
13958 |
+
"learning_rate": 0.00016557163907428274,
|
13959 |
+
"loss": 0.7043,
|
13960 |
+
"step": 1993
|
13961 |
+
},
|
13962 |
+
{
|
13963 |
+
"epoch": 0.17275661157919817,
|
13964 |
+
"grad_norm": 2.431439161300659,
|
13965 |
+
"learning_rate": 0.0001655543035451157,
|
13966 |
+
"loss": 0.6876,
|
13967 |
+
"step": 1994
|
13968 |
+
},
|
13969 |
+
{
|
13970 |
+
"epoch": 0.17284324979964913,
|
13971 |
+
"grad_norm": 2.8489978313446045,
|
13972 |
+
"learning_rate": 0.00016553696801594869,
|
13973 |
+
"loss": 1.524,
|
13974 |
+
"step": 1995
|
13975 |
+
},
|
13976 |
+
{
|
13977 |
+
"epoch": 0.17292988802010006,
|
13978 |
+
"grad_norm": 3.068870782852173,
|
13979 |
+
"learning_rate": 0.00016551963248678166,
|
13980 |
+
"loss": 0.6626,
|
13981 |
+
"step": 1996
|
13982 |
+
},
|
13983 |
+
{
|
13984 |
+
"epoch": 0.17301652624055103,
|
13985 |
+
"grad_norm": 2.357598304748535,
|
13986 |
+
"learning_rate": 0.00016550229695761463,
|
13987 |
+
"loss": 0.695,
|
13988 |
+
"step": 1997
|
13989 |
+
},
|
13990 |
+
{
|
13991 |
+
"epoch": 0.17310316446100196,
|
13992 |
+
"grad_norm": 2.8419971466064453,
|
13993 |
+
"learning_rate": 0.0001654849614284476,
|
13994 |
+
"loss": 1.1585,
|
13995 |
+
"step": 1998
|
13996 |
+
},
|
13997 |
+
{
|
13998 |
+
"epoch": 0.17318980268145293,
|
13999 |
+
"grad_norm": 2.7539398670196533,
|
14000 |
+
"learning_rate": 0.00016546762589928057,
|
14001 |
+
"loss": 1.3085,
|
14002 |
+
"step": 1999
|
14003 |
+
},
|
14004 |
+
{
|
14005 |
+
"epoch": 0.1732764409019039,
|
14006 |
+
"grad_norm": 2.707667589187622,
|
14007 |
+
"learning_rate": 0.00016545029037011354,
|
14008 |
+
"loss": 1.3235,
|
14009 |
+
"step": 2000
|
14010 |
}
|
14011 |
],
|
14012 |
"logging_steps": 1,
|
|
|
14026 |
"attributes": {}
|
14027 |
}
|
14028 |
},
|
14029 |
+
"total_flos": 5.9201609521152e+16,
|
14030 |
"train_batch_size": 2,
|
14031 |
"trial_name": null,
|
14032 |
"trial_params": null
|