JacobLinCool
commited on
Commit
•
b1dc8ae
1
Parent(s):
dbee3f5
Training in progress, epoch 9, checkpoint
Browse files
last-checkpoint/adapter_model.safetensors
CHANGED
@@ -1,3 +1,3 @@
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
-
oid sha256:
|
3 |
size 111475752
|
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:38542b6c83d8c7d2da3b86252ff580b5ebd36f122cd13821f913186de64bba44
|
3 |
size 111475752
|
last-checkpoint/optimizer.pt
CHANGED
@@ -1,3 +1,3 @@
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
-
oid sha256:
|
3 |
size 223212738
|
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:f89505f22034dcc89eb0994064d493a07e7412902e41199ae10247e7769acc82
|
3 |
size 223212738
|
last-checkpoint/rng_state.pth
CHANGED
@@ -1,3 +1,3 @@
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
-
oid sha256:
|
3 |
size 14244
|
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:d2eef67b93c52295f6dcc2fc00269e8329efe3971b5729d18d6d7546a1698d13
|
3 |
size 14244
|
last-checkpoint/scheduler.pt
CHANGED
@@ -1,3 +1,3 @@
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
-
oid sha256:
|
3 |
size 1064
|
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:709b689cdaa470cfc575e643ff39b4e6938bba001e0031274a8872524520efb6
|
3 |
size 1064
|
last-checkpoint/trainer_state.json
CHANGED
@@ -1,9 +1,9 @@
|
|
1 |
{
|
2 |
"best_metric": 39.83118971061093,
|
3 |
"best_model_checkpoint": "./exp/whisper-large-v3-turbo-common_voice_16_1-zh-TW-2/checkpoint-3395",
|
4 |
-
"epoch":
|
5 |
"eval_steps": 500,
|
6 |
-
"global_step":
|
7 |
"is_hyper_param_search": false,
|
8 |
"is_local_process_zero": true,
|
9 |
"is_world_process_zero": true,
|
@@ -23882,6 +23882,2631 @@
|
|
23882 |
"eval_steps_per_second": 2.87,
|
23883 |
"eval_wer": 39.83118971061093,
|
23884 |
"step": 3395
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
23885 |
}
|
23886 |
],
|
23887 |
"logging_steps": 1,
|
@@ -23896,12 +26521,12 @@
|
|
23896 |
"should_evaluate": false,
|
23897 |
"should_log": false,
|
23898 |
"should_save": true,
|
23899 |
-
"should_training_stop":
|
23900 |
},
|
23901 |
"attributes": {}
|
23902 |
}
|
23903 |
},
|
23904 |
-
"total_flos":
|
23905 |
"train_batch_size": 8,
|
23906 |
"trial_name": null,
|
23907 |
"trial_params": null
|
|
|
1 |
{
|
2 |
"best_metric": 39.83118971061093,
|
3 |
"best_model_checkpoint": "./exp/whisper-large-v3-turbo-common_voice_16_1-zh-TW-2/checkpoint-3395",
|
4 |
+
"epoch": 9.992715231788079,
|
5 |
"eval_steps": 500,
|
6 |
+
"global_step": 3770,
|
7 |
"is_hyper_param_search": false,
|
8 |
"is_local_process_zero": true,
|
9 |
"is_world_process_zero": true,
|
|
|
23882 |
"eval_steps_per_second": 2.87,
|
23883 |
"eval_wer": 39.83118971061093,
|
23884 |
"step": 3395
|
23885 |
+
},
|
23886 |
+
{
|
23887 |
+
"epoch": 9.001986754966888,
|
23888 |
+
"grad_norm": 0.3046058416366577,
|
23889 |
+
"learning_rate": 4.960212201591512e-05,
|
23890 |
+
"loss": 0.0045,
|
23891 |
+
"step": 3396
|
23892 |
+
},
|
23893 |
+
{
|
23894 |
+
"epoch": 9.004635761589403,
|
23895 |
+
"grad_norm": 0.2195853292942047,
|
23896 |
+
"learning_rate": 4.946949602122016e-05,
|
23897 |
+
"loss": 0.0069,
|
23898 |
+
"step": 3397
|
23899 |
+
},
|
23900 |
+
{
|
23901 |
+
"epoch": 9.007284768211921,
|
23902 |
+
"grad_norm": 0.07569844275712967,
|
23903 |
+
"learning_rate": 4.93368700265252e-05,
|
23904 |
+
"loss": 0.0051,
|
23905 |
+
"step": 3398
|
23906 |
+
},
|
23907 |
+
{
|
23908 |
+
"epoch": 9.009933774834437,
|
23909 |
+
"grad_norm": 0.10955400764942169,
|
23910 |
+
"learning_rate": 4.9204244031830244e-05,
|
23911 |
+
"loss": 0.0048,
|
23912 |
+
"step": 3399
|
23913 |
+
},
|
23914 |
+
{
|
23915 |
+
"epoch": 9.012582781456954,
|
23916 |
+
"grad_norm": 0.0918859988451004,
|
23917 |
+
"learning_rate": 4.907161803713528e-05,
|
23918 |
+
"loss": 0.0046,
|
23919 |
+
"step": 3400
|
23920 |
+
},
|
23921 |
+
{
|
23922 |
+
"epoch": 9.01523178807947,
|
23923 |
+
"grad_norm": 0.05276548117399216,
|
23924 |
+
"learning_rate": 4.893899204244032e-05,
|
23925 |
+
"loss": 0.0042,
|
23926 |
+
"step": 3401
|
23927 |
+
},
|
23928 |
+
{
|
23929 |
+
"epoch": 9.017880794701986,
|
23930 |
+
"grad_norm": 0.04799486696720123,
|
23931 |
+
"learning_rate": 4.880636604774536e-05,
|
23932 |
+
"loss": 0.0031,
|
23933 |
+
"step": 3402
|
23934 |
+
},
|
23935 |
+
{
|
23936 |
+
"epoch": 9.020529801324503,
|
23937 |
+
"grad_norm": 1.0612272024154663,
|
23938 |
+
"learning_rate": 4.8673740053050396e-05,
|
23939 |
+
"loss": 0.055,
|
23940 |
+
"step": 3403
|
23941 |
+
},
|
23942 |
+
{
|
23943 |
+
"epoch": 9.02317880794702,
|
23944 |
+
"grad_norm": 0.9669842720031738,
|
23945 |
+
"learning_rate": 4.8541114058355434e-05,
|
23946 |
+
"loss": 0.0145,
|
23947 |
+
"step": 3404
|
23948 |
+
},
|
23949 |
+
{
|
23950 |
+
"epoch": 9.025827814569537,
|
23951 |
+
"grad_norm": 0.05631180480122566,
|
23952 |
+
"learning_rate": 4.840848806366048e-05,
|
23953 |
+
"loss": 0.0038,
|
23954 |
+
"step": 3405
|
23955 |
+
},
|
23956 |
+
{
|
23957 |
+
"epoch": 9.028476821192053,
|
23958 |
+
"grad_norm": 0.08740442246198654,
|
23959 |
+
"learning_rate": 4.8275862068965517e-05,
|
23960 |
+
"loss": 0.0047,
|
23961 |
+
"step": 3406
|
23962 |
+
},
|
23963 |
+
{
|
23964 |
+
"epoch": 9.03112582781457,
|
23965 |
+
"grad_norm": 0.04892691969871521,
|
23966 |
+
"learning_rate": 4.8143236074270555e-05,
|
23967 |
+
"loss": 0.0031,
|
23968 |
+
"step": 3407
|
23969 |
+
},
|
23970 |
+
{
|
23971 |
+
"epoch": 9.033774834437086,
|
23972 |
+
"grad_norm": 0.13646160066127777,
|
23973 |
+
"learning_rate": 4.80106100795756e-05,
|
23974 |
+
"loss": 0.005,
|
23975 |
+
"step": 3408
|
23976 |
+
},
|
23977 |
+
{
|
23978 |
+
"epoch": 9.036423841059603,
|
23979 |
+
"grad_norm": 0.5149220824241638,
|
23980 |
+
"learning_rate": 4.787798408488064e-05,
|
23981 |
+
"loss": 0.02,
|
23982 |
+
"step": 3409
|
23983 |
+
},
|
23984 |
+
{
|
23985 |
+
"epoch": 9.03907284768212,
|
23986 |
+
"grad_norm": 0.11811016499996185,
|
23987 |
+
"learning_rate": 4.7745358090185675e-05,
|
23988 |
+
"loss": 0.0047,
|
23989 |
+
"step": 3410
|
23990 |
+
},
|
23991 |
+
{
|
23992 |
+
"epoch": 9.041721854304635,
|
23993 |
+
"grad_norm": 0.1897936463356018,
|
23994 |
+
"learning_rate": 4.761273209549072e-05,
|
23995 |
+
"loss": 0.0076,
|
23996 |
+
"step": 3411
|
23997 |
+
},
|
23998 |
+
{
|
23999 |
+
"epoch": 9.044370860927152,
|
24000 |
+
"grad_norm": 0.05969947203993797,
|
24001 |
+
"learning_rate": 4.748010610079576e-05,
|
24002 |
+
"loss": 0.0031,
|
24003 |
+
"step": 3412
|
24004 |
+
},
|
24005 |
+
{
|
24006 |
+
"epoch": 9.047019867549668,
|
24007 |
+
"grad_norm": 0.09473060071468353,
|
24008 |
+
"learning_rate": 4.73474801061008e-05,
|
24009 |
+
"loss": 0.0054,
|
24010 |
+
"step": 3413
|
24011 |
+
},
|
24012 |
+
{
|
24013 |
+
"epoch": 9.049668874172186,
|
24014 |
+
"grad_norm": 0.2807403802871704,
|
24015 |
+
"learning_rate": 4.721485411140584e-05,
|
24016 |
+
"loss": 0.008,
|
24017 |
+
"step": 3414
|
24018 |
+
},
|
24019 |
+
{
|
24020 |
+
"epoch": 9.052317880794702,
|
24021 |
+
"grad_norm": 0.06429164111614227,
|
24022 |
+
"learning_rate": 4.708222811671087e-05,
|
24023 |
+
"loss": 0.0037,
|
24024 |
+
"step": 3415
|
24025 |
+
},
|
24026 |
+
{
|
24027 |
+
"epoch": 9.05496688741722,
|
24028 |
+
"grad_norm": 0.36819136142730713,
|
24029 |
+
"learning_rate": 4.694960212201592e-05,
|
24030 |
+
"loss": 0.0048,
|
24031 |
+
"step": 3416
|
24032 |
+
},
|
24033 |
+
{
|
24034 |
+
"epoch": 9.057615894039735,
|
24035 |
+
"grad_norm": 0.10270120948553085,
|
24036 |
+
"learning_rate": 4.6816976127320955e-05,
|
24037 |
+
"loss": 0.0055,
|
24038 |
+
"step": 3417
|
24039 |
+
},
|
24040 |
+
{
|
24041 |
+
"epoch": 9.060264900662252,
|
24042 |
+
"grad_norm": 0.06311608850955963,
|
24043 |
+
"learning_rate": 4.668435013262599e-05,
|
24044 |
+
"loss": 0.0042,
|
24045 |
+
"step": 3418
|
24046 |
+
},
|
24047 |
+
{
|
24048 |
+
"epoch": 9.062913907284768,
|
24049 |
+
"grad_norm": 0.0841260626912117,
|
24050 |
+
"learning_rate": 4.655172413793104e-05,
|
24051 |
+
"loss": 0.0046,
|
24052 |
+
"step": 3419
|
24053 |
+
},
|
24054 |
+
{
|
24055 |
+
"epoch": 9.065562913907284,
|
24056 |
+
"grad_norm": 0.11077243834733963,
|
24057 |
+
"learning_rate": 4.6419098143236075e-05,
|
24058 |
+
"loss": 0.0041,
|
24059 |
+
"step": 3420
|
24060 |
+
},
|
24061 |
+
{
|
24062 |
+
"epoch": 9.068211920529802,
|
24063 |
+
"grad_norm": 0.3600999414920807,
|
24064 |
+
"learning_rate": 4.628647214854111e-05,
|
24065 |
+
"loss": 0.0129,
|
24066 |
+
"step": 3421
|
24067 |
+
},
|
24068 |
+
{
|
24069 |
+
"epoch": 9.070860927152317,
|
24070 |
+
"grad_norm": 0.07916560024023056,
|
24071 |
+
"learning_rate": 4.615384615384616e-05,
|
24072 |
+
"loss": 0.0051,
|
24073 |
+
"step": 3422
|
24074 |
+
},
|
24075 |
+
{
|
24076 |
+
"epoch": 9.073509933774835,
|
24077 |
+
"grad_norm": 0.06836560368537903,
|
24078 |
+
"learning_rate": 4.6021220159151196e-05,
|
24079 |
+
"loss": 0.0035,
|
24080 |
+
"step": 3423
|
24081 |
+
},
|
24082 |
+
{
|
24083 |
+
"epoch": 9.07615894039735,
|
24084 |
+
"grad_norm": 0.0479905940592289,
|
24085 |
+
"learning_rate": 4.5888594164456234e-05,
|
24086 |
+
"loss": 0.0031,
|
24087 |
+
"step": 3424
|
24088 |
+
},
|
24089 |
+
{
|
24090 |
+
"epoch": 9.078807947019868,
|
24091 |
+
"grad_norm": 0.08379294723272324,
|
24092 |
+
"learning_rate": 4.575596816976128e-05,
|
24093 |
+
"loss": 0.0038,
|
24094 |
+
"step": 3425
|
24095 |
+
},
|
24096 |
+
{
|
24097 |
+
"epoch": 9.081456953642384,
|
24098 |
+
"grad_norm": 0.6001594066619873,
|
24099 |
+
"learning_rate": 4.562334217506632e-05,
|
24100 |
+
"loss": 0.0066,
|
24101 |
+
"step": 3426
|
24102 |
+
},
|
24103 |
+
{
|
24104 |
+
"epoch": 9.084105960264901,
|
24105 |
+
"grad_norm": 1.2279647588729858,
|
24106 |
+
"learning_rate": 4.549071618037135e-05,
|
24107 |
+
"loss": 0.0283,
|
24108 |
+
"step": 3427
|
24109 |
+
},
|
24110 |
+
{
|
24111 |
+
"epoch": 9.086754966887417,
|
24112 |
+
"grad_norm": 0.09612450748682022,
|
24113 |
+
"learning_rate": 4.535809018567639e-05,
|
24114 |
+
"loss": 0.0047,
|
24115 |
+
"step": 3428
|
24116 |
+
},
|
24117 |
+
{
|
24118 |
+
"epoch": 9.089403973509933,
|
24119 |
+
"grad_norm": 0.049450572580099106,
|
24120 |
+
"learning_rate": 4.522546419098143e-05,
|
24121 |
+
"loss": 0.0031,
|
24122 |
+
"step": 3429
|
24123 |
+
},
|
24124 |
+
{
|
24125 |
+
"epoch": 9.09205298013245,
|
24126 |
+
"grad_norm": 0.7725188136100769,
|
24127 |
+
"learning_rate": 4.509283819628647e-05,
|
24128 |
+
"loss": 0.007,
|
24129 |
+
"step": 3430
|
24130 |
+
},
|
24131 |
+
{
|
24132 |
+
"epoch": 9.094701986754966,
|
24133 |
+
"grad_norm": 0.08635123074054718,
|
24134 |
+
"learning_rate": 4.4960212201591514e-05,
|
24135 |
+
"loss": 0.004,
|
24136 |
+
"step": 3431
|
24137 |
+
},
|
24138 |
+
{
|
24139 |
+
"epoch": 9.097350993377484,
|
24140 |
+
"grad_norm": 0.07909909635782242,
|
24141 |
+
"learning_rate": 4.482758620689655e-05,
|
24142 |
+
"loss": 0.0048,
|
24143 |
+
"step": 3432
|
24144 |
+
},
|
24145 |
+
{
|
24146 |
+
"epoch": 9.1,
|
24147 |
+
"grad_norm": 0.06404121965169907,
|
24148 |
+
"learning_rate": 4.469496021220159e-05,
|
24149 |
+
"loss": 0.0042,
|
24150 |
+
"step": 3433
|
24151 |
+
},
|
24152 |
+
{
|
24153 |
+
"epoch": 9.102649006622517,
|
24154 |
+
"grad_norm": 0.08604049682617188,
|
24155 |
+
"learning_rate": 4.4562334217506634e-05,
|
24156 |
+
"loss": 0.006,
|
24157 |
+
"step": 3434
|
24158 |
+
},
|
24159 |
+
{
|
24160 |
+
"epoch": 9.105298013245033,
|
24161 |
+
"grad_norm": 0.09343012422323227,
|
24162 |
+
"learning_rate": 4.442970822281167e-05,
|
24163 |
+
"loss": 0.0043,
|
24164 |
+
"step": 3435
|
24165 |
+
},
|
24166 |
+
{
|
24167 |
+
"epoch": 9.10794701986755,
|
24168 |
+
"grad_norm": 1.6587215662002563,
|
24169 |
+
"learning_rate": 4.429708222811671e-05,
|
24170 |
+
"loss": 0.026,
|
24171 |
+
"step": 3436
|
24172 |
+
},
|
24173 |
+
{
|
24174 |
+
"epoch": 9.110596026490066,
|
24175 |
+
"grad_norm": 0.13061711192131042,
|
24176 |
+
"learning_rate": 4.4164456233421755e-05,
|
24177 |
+
"loss": 0.0051,
|
24178 |
+
"step": 3437
|
24179 |
+
},
|
24180 |
+
{
|
24181 |
+
"epoch": 9.113245033112582,
|
24182 |
+
"grad_norm": 0.3710346519947052,
|
24183 |
+
"learning_rate": 4.403183023872679e-05,
|
24184 |
+
"loss": 0.0056,
|
24185 |
+
"step": 3438
|
24186 |
+
},
|
24187 |
+
{
|
24188 |
+
"epoch": 9.1158940397351,
|
24189 |
+
"grad_norm": 0.186061292886734,
|
24190 |
+
"learning_rate": 4.389920424403184e-05,
|
24191 |
+
"loss": 0.0033,
|
24192 |
+
"step": 3439
|
24193 |
+
},
|
24194 |
+
{
|
24195 |
+
"epoch": 9.118543046357615,
|
24196 |
+
"grad_norm": 0.31838712096214294,
|
24197 |
+
"learning_rate": 4.376657824933687e-05,
|
24198 |
+
"loss": 0.0079,
|
24199 |
+
"step": 3440
|
24200 |
+
},
|
24201 |
+
{
|
24202 |
+
"epoch": 9.121192052980133,
|
24203 |
+
"grad_norm": 0.05291552469134331,
|
24204 |
+
"learning_rate": 4.363395225464191e-05,
|
24205 |
+
"loss": 0.0031,
|
24206 |
+
"step": 3441
|
24207 |
+
},
|
24208 |
+
{
|
24209 |
+
"epoch": 9.123841059602649,
|
24210 |
+
"grad_norm": 0.12245338410139084,
|
24211 |
+
"learning_rate": 4.350132625994695e-05,
|
24212 |
+
"loss": 0.0057,
|
24213 |
+
"step": 3442
|
24214 |
+
},
|
24215 |
+
{
|
24216 |
+
"epoch": 9.126490066225166,
|
24217 |
+
"grad_norm": 0.19992384314537048,
|
24218 |
+
"learning_rate": 4.336870026525199e-05,
|
24219 |
+
"loss": 0.0057,
|
24220 |
+
"step": 3443
|
24221 |
+
},
|
24222 |
+
{
|
24223 |
+
"epoch": 9.129139072847682,
|
24224 |
+
"grad_norm": 0.04947689175605774,
|
24225 |
+
"learning_rate": 4.323607427055703e-05,
|
24226 |
+
"loss": 0.0039,
|
24227 |
+
"step": 3444
|
24228 |
+
},
|
24229 |
+
{
|
24230 |
+
"epoch": 9.1317880794702,
|
24231 |
+
"grad_norm": 0.12987762689590454,
|
24232 |
+
"learning_rate": 4.310344827586207e-05,
|
24233 |
+
"loss": 0.0048,
|
24234 |
+
"step": 3445
|
24235 |
+
},
|
24236 |
+
{
|
24237 |
+
"epoch": 9.134437086092715,
|
24238 |
+
"grad_norm": 0.9384921789169312,
|
24239 |
+
"learning_rate": 4.297082228116711e-05,
|
24240 |
+
"loss": 0.0067,
|
24241 |
+
"step": 3446
|
24242 |
+
},
|
24243 |
+
{
|
24244 |
+
"epoch": 9.137086092715231,
|
24245 |
+
"grad_norm": 0.0664202943444252,
|
24246 |
+
"learning_rate": 4.283819628647215e-05,
|
24247 |
+
"loss": 0.005,
|
24248 |
+
"step": 3447
|
24249 |
+
},
|
24250 |
+
{
|
24251 |
+
"epoch": 9.139735099337749,
|
24252 |
+
"grad_norm": 0.2273460328578949,
|
24253 |
+
"learning_rate": 4.270557029177719e-05,
|
24254 |
+
"loss": 0.0086,
|
24255 |
+
"step": 3448
|
24256 |
+
},
|
24257 |
+
{
|
24258 |
+
"epoch": 9.142384105960264,
|
24259 |
+
"grad_norm": 0.06090230494737625,
|
24260 |
+
"learning_rate": 4.257294429708223e-05,
|
24261 |
+
"loss": 0.0037,
|
24262 |
+
"step": 3449
|
24263 |
+
},
|
24264 |
+
{
|
24265 |
+
"epoch": 9.145033112582782,
|
24266 |
+
"grad_norm": 0.0633263811469078,
|
24267 |
+
"learning_rate": 4.244031830238727e-05,
|
24268 |
+
"loss": 0.004,
|
24269 |
+
"step": 3450
|
24270 |
+
},
|
24271 |
+
{
|
24272 |
+
"epoch": 9.147682119205298,
|
24273 |
+
"grad_norm": 0.06657669693231583,
|
24274 |
+
"learning_rate": 4.2307692307692314e-05,
|
24275 |
+
"loss": 0.0045,
|
24276 |
+
"step": 3451
|
24277 |
+
},
|
24278 |
+
{
|
24279 |
+
"epoch": 9.150331125827815,
|
24280 |
+
"grad_norm": 0.0977824255824089,
|
24281 |
+
"learning_rate": 4.2175066312997345e-05,
|
24282 |
+
"loss": 0.0037,
|
24283 |
+
"step": 3452
|
24284 |
+
},
|
24285 |
+
{
|
24286 |
+
"epoch": 9.152980132450331,
|
24287 |
+
"grad_norm": 0.1436937004327774,
|
24288 |
+
"learning_rate": 4.204244031830238e-05,
|
24289 |
+
"loss": 0.0056,
|
24290 |
+
"step": 3453
|
24291 |
+
},
|
24292 |
+
{
|
24293 |
+
"epoch": 9.155629139072847,
|
24294 |
+
"grad_norm": 0.1385282725095749,
|
24295 |
+
"learning_rate": 4.190981432360743e-05,
|
24296 |
+
"loss": 0.0051,
|
24297 |
+
"step": 3454
|
24298 |
+
},
|
24299 |
+
{
|
24300 |
+
"epoch": 9.158278145695364,
|
24301 |
+
"grad_norm": 0.05507650971412659,
|
24302 |
+
"learning_rate": 4.1777188328912466e-05,
|
24303 |
+
"loss": 0.0036,
|
24304 |
+
"step": 3455
|
24305 |
+
},
|
24306 |
+
{
|
24307 |
+
"epoch": 9.16092715231788,
|
24308 |
+
"grad_norm": 0.16013847291469574,
|
24309 |
+
"learning_rate": 4.1644562334217504e-05,
|
24310 |
+
"loss": 0.0061,
|
24311 |
+
"step": 3456
|
24312 |
+
},
|
24313 |
+
{
|
24314 |
+
"epoch": 9.163576158940398,
|
24315 |
+
"grad_norm": 0.05230237916111946,
|
24316 |
+
"learning_rate": 4.151193633952255e-05,
|
24317 |
+
"loss": 0.003,
|
24318 |
+
"step": 3457
|
24319 |
+
},
|
24320 |
+
{
|
24321 |
+
"epoch": 9.166225165562913,
|
24322 |
+
"grad_norm": 0.042716044932603836,
|
24323 |
+
"learning_rate": 4.1379310344827587e-05,
|
24324 |
+
"loss": 0.0029,
|
24325 |
+
"step": 3458
|
24326 |
+
},
|
24327 |
+
{
|
24328 |
+
"epoch": 9.168874172185431,
|
24329 |
+
"grad_norm": 0.07502096891403198,
|
24330 |
+
"learning_rate": 4.1246684350132625e-05,
|
24331 |
+
"loss": 0.005,
|
24332 |
+
"step": 3459
|
24333 |
+
},
|
24334 |
+
{
|
24335 |
+
"epoch": 9.171523178807947,
|
24336 |
+
"grad_norm": 0.09310350567102432,
|
24337 |
+
"learning_rate": 4.111405835543767e-05,
|
24338 |
+
"loss": 0.0038,
|
24339 |
+
"step": 3460
|
24340 |
+
},
|
24341 |
+
{
|
24342 |
+
"epoch": 9.174172185430464,
|
24343 |
+
"grad_norm": 0.07771538943052292,
|
24344 |
+
"learning_rate": 4.098143236074271e-05,
|
24345 |
+
"loss": 0.0047,
|
24346 |
+
"step": 3461
|
24347 |
+
},
|
24348 |
+
{
|
24349 |
+
"epoch": 9.17682119205298,
|
24350 |
+
"grad_norm": 0.08970572799444199,
|
24351 |
+
"learning_rate": 4.0848806366047745e-05,
|
24352 |
+
"loss": 0.0041,
|
24353 |
+
"step": 3462
|
24354 |
+
},
|
24355 |
+
{
|
24356 |
+
"epoch": 9.179470198675496,
|
24357 |
+
"grad_norm": 0.14392562210559845,
|
24358 |
+
"learning_rate": 4.071618037135279e-05,
|
24359 |
+
"loss": 0.0044,
|
24360 |
+
"step": 3463
|
24361 |
+
},
|
24362 |
+
{
|
24363 |
+
"epoch": 9.182119205298013,
|
24364 |
+
"grad_norm": 0.7532879114151001,
|
24365 |
+
"learning_rate": 4.058355437665783e-05,
|
24366 |
+
"loss": 0.0089,
|
24367 |
+
"step": 3464
|
24368 |
+
},
|
24369 |
+
{
|
24370 |
+
"epoch": 9.18476821192053,
|
24371 |
+
"grad_norm": 0.14453311264514923,
|
24372 |
+
"learning_rate": 4.0450928381962866e-05,
|
24373 |
+
"loss": 0.0058,
|
24374 |
+
"step": 3465
|
24375 |
+
},
|
24376 |
+
{
|
24377 |
+
"epoch": 9.187417218543047,
|
24378 |
+
"grad_norm": 0.09528756886720657,
|
24379 |
+
"learning_rate": 4.0318302387267904e-05,
|
24380 |
+
"loss": 0.004,
|
24381 |
+
"step": 3466
|
24382 |
+
},
|
24383 |
+
{
|
24384 |
+
"epoch": 9.190066225165562,
|
24385 |
+
"grad_norm": 0.05848846584558487,
|
24386 |
+
"learning_rate": 4.018567639257294e-05,
|
24387 |
+
"loss": 0.0038,
|
24388 |
+
"step": 3467
|
24389 |
+
},
|
24390 |
+
{
|
24391 |
+
"epoch": 9.19271523178808,
|
24392 |
+
"grad_norm": 0.06267541646957397,
|
24393 |
+
"learning_rate": 4.005305039787799e-05,
|
24394 |
+
"loss": 0.0045,
|
24395 |
+
"step": 3468
|
24396 |
+
},
|
24397 |
+
{
|
24398 |
+
"epoch": 9.195364238410596,
|
24399 |
+
"grad_norm": 0.5184205770492554,
|
24400 |
+
"learning_rate": 3.9920424403183025e-05,
|
24401 |
+
"loss": 0.0131,
|
24402 |
+
"step": 3469
|
24403 |
+
},
|
24404 |
+
{
|
24405 |
+
"epoch": 9.198013245033113,
|
24406 |
+
"grad_norm": 0.05579211190342903,
|
24407 |
+
"learning_rate": 3.978779840848806e-05,
|
24408 |
+
"loss": 0.0029,
|
24409 |
+
"step": 3470
|
24410 |
+
},
|
24411 |
+
{
|
24412 |
+
"epoch": 9.20066225165563,
|
24413 |
+
"grad_norm": 0.1841682344675064,
|
24414 |
+
"learning_rate": 3.965517241379311e-05,
|
24415 |
+
"loss": 0.0066,
|
24416 |
+
"step": 3471
|
24417 |
+
},
|
24418 |
+
{
|
24419 |
+
"epoch": 9.203311258278145,
|
24420 |
+
"grad_norm": 0.11529310047626495,
|
24421 |
+
"learning_rate": 3.9522546419098145e-05,
|
24422 |
+
"loss": 0.005,
|
24423 |
+
"step": 3472
|
24424 |
+
},
|
24425 |
+
{
|
24426 |
+
"epoch": 9.205960264900662,
|
24427 |
+
"grad_norm": 0.09236898273229599,
|
24428 |
+
"learning_rate": 3.9389920424403183e-05,
|
24429 |
+
"loss": 0.0044,
|
24430 |
+
"step": 3473
|
24431 |
+
},
|
24432 |
+
{
|
24433 |
+
"epoch": 9.208609271523178,
|
24434 |
+
"grad_norm": 0.07809614390134811,
|
24435 |
+
"learning_rate": 3.925729442970823e-05,
|
24436 |
+
"loss": 0.0033,
|
24437 |
+
"step": 3474
|
24438 |
+
},
|
24439 |
+
{
|
24440 |
+
"epoch": 9.211258278145696,
|
24441 |
+
"grad_norm": 0.06222685053944588,
|
24442 |
+
"learning_rate": 3.9124668435013266e-05,
|
24443 |
+
"loss": 0.0034,
|
24444 |
+
"step": 3475
|
24445 |
+
},
|
24446 |
+
{
|
24447 |
+
"epoch": 9.213907284768212,
|
24448 |
+
"grad_norm": 0.11090411990880966,
|
24449 |
+
"learning_rate": 3.8992042440318304e-05,
|
24450 |
+
"loss": 0.0038,
|
24451 |
+
"step": 3476
|
24452 |
+
},
|
24453 |
+
{
|
24454 |
+
"epoch": 9.216556291390729,
|
24455 |
+
"grad_norm": 0.07948897033929825,
|
24456 |
+
"learning_rate": 3.885941644562334e-05,
|
24457 |
+
"loss": 0.0062,
|
24458 |
+
"step": 3477
|
24459 |
+
},
|
24460 |
+
{
|
24461 |
+
"epoch": 9.219205298013245,
|
24462 |
+
"grad_norm": 0.12640348076820374,
|
24463 |
+
"learning_rate": 3.872679045092838e-05,
|
24464 |
+
"loss": 0.004,
|
24465 |
+
"step": 3478
|
24466 |
+
},
|
24467 |
+
{
|
24468 |
+
"epoch": 9.221854304635762,
|
24469 |
+
"grad_norm": 0.1517658829689026,
|
24470 |
+
"learning_rate": 3.859416445623342e-05,
|
24471 |
+
"loss": 0.0052,
|
24472 |
+
"step": 3479
|
24473 |
+
},
|
24474 |
+
{
|
24475 |
+
"epoch": 9.224503311258278,
|
24476 |
+
"grad_norm": 0.06037524342536926,
|
24477 |
+
"learning_rate": 3.846153846153846e-05,
|
24478 |
+
"loss": 0.0041,
|
24479 |
+
"step": 3480
|
24480 |
+
},
|
24481 |
+
{
|
24482 |
+
"epoch": 9.227152317880794,
|
24483 |
+
"grad_norm": 5.191507816314697,
|
24484 |
+
"learning_rate": 3.83289124668435e-05,
|
24485 |
+
"loss": 0.0851,
|
24486 |
+
"step": 3481
|
24487 |
+
},
|
24488 |
+
{
|
24489 |
+
"epoch": 9.229801324503311,
|
24490 |
+
"grad_norm": 0.06474646925926208,
|
24491 |
+
"learning_rate": 3.819628647214854e-05,
|
24492 |
+
"loss": 0.0036,
|
24493 |
+
"step": 3482
|
24494 |
+
},
|
24495 |
+
{
|
24496 |
+
"epoch": 9.232450331125827,
|
24497 |
+
"grad_norm": 0.044177595525979996,
|
24498 |
+
"learning_rate": 3.8063660477453584e-05,
|
24499 |
+
"loss": 0.0037,
|
24500 |
+
"step": 3483
|
24501 |
+
},
|
24502 |
+
{
|
24503 |
+
"epoch": 9.235099337748345,
|
24504 |
+
"grad_norm": 0.38126340508461,
|
24505 |
+
"learning_rate": 3.793103448275862e-05,
|
24506 |
+
"loss": 0.007,
|
24507 |
+
"step": 3484
|
24508 |
+
},
|
24509 |
+
{
|
24510 |
+
"epoch": 9.23774834437086,
|
24511 |
+
"grad_norm": 0.1678951382637024,
|
24512 |
+
"learning_rate": 3.779840848806366e-05,
|
24513 |
+
"loss": 0.0065,
|
24514 |
+
"step": 3485
|
24515 |
+
},
|
24516 |
+
{
|
24517 |
+
"epoch": 9.240397350993378,
|
24518 |
+
"grad_norm": 0.0644679069519043,
|
24519 |
+
"learning_rate": 3.7665782493368704e-05,
|
24520 |
+
"loss": 0.0039,
|
24521 |
+
"step": 3486
|
24522 |
+
},
|
24523 |
+
{
|
24524 |
+
"epoch": 9.243046357615894,
|
24525 |
+
"grad_norm": 0.09104061126708984,
|
24526 |
+
"learning_rate": 3.753315649867374e-05,
|
24527 |
+
"loss": 0.0033,
|
24528 |
+
"step": 3487
|
24529 |
+
},
|
24530 |
+
{
|
24531 |
+
"epoch": 9.245695364238411,
|
24532 |
+
"grad_norm": 0.055731672793626785,
|
24533 |
+
"learning_rate": 3.740053050397878e-05,
|
24534 |
+
"loss": 0.0034,
|
24535 |
+
"step": 3488
|
24536 |
+
},
|
24537 |
+
{
|
24538 |
+
"epoch": 9.248344370860927,
|
24539 |
+
"grad_norm": 2.4842283725738525,
|
24540 |
+
"learning_rate": 3.7267904509283825e-05,
|
24541 |
+
"loss": 0.089,
|
24542 |
+
"step": 3489
|
24543 |
+
},
|
24544 |
+
{
|
24545 |
+
"epoch": 9.250993377483443,
|
24546 |
+
"grad_norm": 0.06148075684905052,
|
24547 |
+
"learning_rate": 3.7135278514588856e-05,
|
24548 |
+
"loss": 0.0033,
|
24549 |
+
"step": 3490
|
24550 |
+
},
|
24551 |
+
{
|
24552 |
+
"epoch": 9.25364238410596,
|
24553 |
+
"grad_norm": 0.07563995569944382,
|
24554 |
+
"learning_rate": 3.7002652519893894e-05,
|
24555 |
+
"loss": 0.0046,
|
24556 |
+
"step": 3491
|
24557 |
+
},
|
24558 |
+
{
|
24559 |
+
"epoch": 9.256291390728476,
|
24560 |
+
"grad_norm": 0.09950581192970276,
|
24561 |
+
"learning_rate": 3.687002652519894e-05,
|
24562 |
+
"loss": 0.0048,
|
24563 |
+
"step": 3492
|
24564 |
+
},
|
24565 |
+
{
|
24566 |
+
"epoch": 9.258940397350994,
|
24567 |
+
"grad_norm": 0.07033694535493851,
|
24568 |
+
"learning_rate": 3.673740053050398e-05,
|
24569 |
+
"loss": 0.0044,
|
24570 |
+
"step": 3493
|
24571 |
+
},
|
24572 |
+
{
|
24573 |
+
"epoch": 9.26158940397351,
|
24574 |
+
"grad_norm": 0.5478590726852417,
|
24575 |
+
"learning_rate": 3.660477453580902e-05,
|
24576 |
+
"loss": 0.0148,
|
24577 |
+
"step": 3494
|
24578 |
+
},
|
24579 |
+
{
|
24580 |
+
"epoch": 9.264238410596027,
|
24581 |
+
"grad_norm": 0.04912034422159195,
|
24582 |
+
"learning_rate": 3.647214854111406e-05,
|
24583 |
+
"loss": 0.0038,
|
24584 |
+
"step": 3495
|
24585 |
+
},
|
24586 |
+
{
|
24587 |
+
"epoch": 9.266887417218543,
|
24588 |
+
"grad_norm": 0.0955716073513031,
|
24589 |
+
"learning_rate": 3.63395225464191e-05,
|
24590 |
+
"loss": 0.0045,
|
24591 |
+
"step": 3496
|
24592 |
+
},
|
24593 |
+
{
|
24594 |
+
"epoch": 9.26953642384106,
|
24595 |
+
"grad_norm": 0.04738667607307434,
|
24596 |
+
"learning_rate": 3.620689655172414e-05,
|
24597 |
+
"loss": 0.0035,
|
24598 |
+
"step": 3497
|
24599 |
+
},
|
24600 |
+
{
|
24601 |
+
"epoch": 9.272185430463576,
|
24602 |
+
"grad_norm": 0.25836634635925293,
|
24603 |
+
"learning_rate": 3.607427055702918e-05,
|
24604 |
+
"loss": 0.0071,
|
24605 |
+
"step": 3498
|
24606 |
+
},
|
24607 |
+
{
|
24608 |
+
"epoch": 9.274834437086092,
|
24609 |
+
"grad_norm": 0.08777689188718796,
|
24610 |
+
"learning_rate": 3.594164456233422e-05,
|
24611 |
+
"loss": 0.0034,
|
24612 |
+
"step": 3499
|
24613 |
+
},
|
24614 |
+
{
|
24615 |
+
"epoch": 9.27748344370861,
|
24616 |
+
"grad_norm": 0.2181941419839859,
|
24617 |
+
"learning_rate": 3.580901856763926e-05,
|
24618 |
+
"loss": 0.0074,
|
24619 |
+
"step": 3500
|
24620 |
+
},
|
24621 |
+
{
|
24622 |
+
"epoch": 9.280132450331125,
|
24623 |
+
"grad_norm": 0.11127748340368271,
|
24624 |
+
"learning_rate": 3.56763925729443e-05,
|
24625 |
+
"loss": 0.005,
|
24626 |
+
"step": 3501
|
24627 |
+
},
|
24628 |
+
{
|
24629 |
+
"epoch": 9.282781456953643,
|
24630 |
+
"grad_norm": 0.0854862704873085,
|
24631 |
+
"learning_rate": 3.554376657824933e-05,
|
24632 |
+
"loss": 0.0037,
|
24633 |
+
"step": 3502
|
24634 |
+
},
|
24635 |
+
{
|
24636 |
+
"epoch": 9.285430463576159,
|
24637 |
+
"grad_norm": 0.04827234521508217,
|
24638 |
+
"learning_rate": 3.541114058355438e-05,
|
24639 |
+
"loss": 0.0035,
|
24640 |
+
"step": 3503
|
24641 |
+
},
|
24642 |
+
{
|
24643 |
+
"epoch": 9.288079470198676,
|
24644 |
+
"grad_norm": 0.04491313174366951,
|
24645 |
+
"learning_rate": 3.5278514588859415e-05,
|
24646 |
+
"loss": 0.0031,
|
24647 |
+
"step": 3504
|
24648 |
+
},
|
24649 |
+
{
|
24650 |
+
"epoch": 9.290728476821192,
|
24651 |
+
"grad_norm": 0.06774583458900452,
|
24652 |
+
"learning_rate": 3.514588859416445e-05,
|
24653 |
+
"loss": 0.0038,
|
24654 |
+
"step": 3505
|
24655 |
+
},
|
24656 |
+
{
|
24657 |
+
"epoch": 9.29337748344371,
|
24658 |
+
"grad_norm": 0.07155600935220718,
|
24659 |
+
"learning_rate": 3.50132625994695e-05,
|
24660 |
+
"loss": 0.0041,
|
24661 |
+
"step": 3506
|
24662 |
+
},
|
24663 |
+
{
|
24664 |
+
"epoch": 9.296026490066225,
|
24665 |
+
"grad_norm": 0.06741708517074585,
|
24666 |
+
"learning_rate": 3.4880636604774536e-05,
|
24667 |
+
"loss": 0.0039,
|
24668 |
+
"step": 3507
|
24669 |
+
},
|
24670 |
+
{
|
24671 |
+
"epoch": 9.298675496688741,
|
24672 |
+
"grad_norm": 0.609828770160675,
|
24673 |
+
"learning_rate": 3.4748010610079574e-05,
|
24674 |
+
"loss": 0.0165,
|
24675 |
+
"step": 3508
|
24676 |
+
},
|
24677 |
+
{
|
24678 |
+
"epoch": 9.301324503311259,
|
24679 |
+
"grad_norm": 0.06398732215166092,
|
24680 |
+
"learning_rate": 3.461538461538462e-05,
|
24681 |
+
"loss": 0.0037,
|
24682 |
+
"step": 3509
|
24683 |
+
},
|
24684 |
+
{
|
24685 |
+
"epoch": 9.303973509933774,
|
24686 |
+
"grad_norm": 0.06178402900695801,
|
24687 |
+
"learning_rate": 3.4482758620689657e-05,
|
24688 |
+
"loss": 0.0035,
|
24689 |
+
"step": 3510
|
24690 |
+
},
|
24691 |
+
{
|
24692 |
+
"epoch": 9.306622516556292,
|
24693 |
+
"grad_norm": 0.04404199495911598,
|
24694 |
+
"learning_rate": 3.4350132625994695e-05,
|
24695 |
+
"loss": 0.0029,
|
24696 |
+
"step": 3511
|
24697 |
+
},
|
24698 |
+
{
|
24699 |
+
"epoch": 9.309271523178808,
|
24700 |
+
"grad_norm": 0.06852854043245316,
|
24701 |
+
"learning_rate": 3.421750663129974e-05,
|
24702 |
+
"loss": 0.0035,
|
24703 |
+
"step": 3512
|
24704 |
+
},
|
24705 |
+
{
|
24706 |
+
"epoch": 9.311920529801325,
|
24707 |
+
"grad_norm": 0.10176531225442886,
|
24708 |
+
"learning_rate": 3.408488063660478e-05,
|
24709 |
+
"loss": 0.0045,
|
24710 |
+
"step": 3513
|
24711 |
+
},
|
24712 |
+
{
|
24713 |
+
"epoch": 9.314569536423841,
|
24714 |
+
"grad_norm": 0.0930376946926117,
|
24715 |
+
"learning_rate": 3.395225464190981e-05,
|
24716 |
+
"loss": 0.0045,
|
24717 |
+
"step": 3514
|
24718 |
+
},
|
24719 |
+
{
|
24720 |
+
"epoch": 9.317218543046357,
|
24721 |
+
"grad_norm": 0.07334122061729431,
|
24722 |
+
"learning_rate": 3.381962864721485e-05,
|
24723 |
+
"loss": 0.0045,
|
24724 |
+
"step": 3515
|
24725 |
+
},
|
24726 |
+
{
|
24727 |
+
"epoch": 9.319867549668874,
|
24728 |
+
"grad_norm": 0.060344111174345016,
|
24729 |
+
"learning_rate": 3.368700265251989e-05,
|
24730 |
+
"loss": 0.0035,
|
24731 |
+
"step": 3516
|
24732 |
+
},
|
24733 |
+
{
|
24734 |
+
"epoch": 9.32251655629139,
|
24735 |
+
"grad_norm": 0.08247268199920654,
|
24736 |
+
"learning_rate": 3.355437665782493e-05,
|
24737 |
+
"loss": 0.0041,
|
24738 |
+
"step": 3517
|
24739 |
+
},
|
24740 |
+
{
|
24741 |
+
"epoch": 9.325165562913908,
|
24742 |
+
"grad_norm": 0.15309296548366547,
|
24743 |
+
"learning_rate": 3.3421750663129974e-05,
|
24744 |
+
"loss": 0.0049,
|
24745 |
+
"step": 3518
|
24746 |
+
},
|
24747 |
+
{
|
24748 |
+
"epoch": 9.327814569536423,
|
24749 |
+
"grad_norm": 0.31503990292549133,
|
24750 |
+
"learning_rate": 3.328912466843501e-05,
|
24751 |
+
"loss": 0.0089,
|
24752 |
+
"step": 3519
|
24753 |
+
},
|
24754 |
+
{
|
24755 |
+
"epoch": 9.330463576158941,
|
24756 |
+
"grad_norm": 0.04644061625003815,
|
24757 |
+
"learning_rate": 3.315649867374006e-05,
|
24758 |
+
"loss": 0.0027,
|
24759 |
+
"step": 3520
|
24760 |
+
},
|
24761 |
+
{
|
24762 |
+
"epoch": 9.333112582781457,
|
24763 |
+
"grad_norm": 0.4016202986240387,
|
24764 |
+
"learning_rate": 3.3023872679045095e-05,
|
24765 |
+
"loss": 0.0078,
|
24766 |
+
"step": 3521
|
24767 |
+
},
|
24768 |
+
{
|
24769 |
+
"epoch": 9.335761589403974,
|
24770 |
+
"grad_norm": 0.4134283661842346,
|
24771 |
+
"learning_rate": 3.289124668435013e-05,
|
24772 |
+
"loss": 0.0062,
|
24773 |
+
"step": 3522
|
24774 |
+
},
|
24775 |
+
{
|
24776 |
+
"epoch": 9.33841059602649,
|
24777 |
+
"grad_norm": 0.106345534324646,
|
24778 |
+
"learning_rate": 3.275862068965518e-05,
|
24779 |
+
"loss": 0.0049,
|
24780 |
+
"step": 3523
|
24781 |
+
},
|
24782 |
+
{
|
24783 |
+
"epoch": 9.341059602649006,
|
24784 |
+
"grad_norm": 0.1517058163881302,
|
24785 |
+
"learning_rate": 3.2625994694960215e-05,
|
24786 |
+
"loss": 0.0057,
|
24787 |
+
"step": 3524
|
24788 |
+
},
|
24789 |
+
{
|
24790 |
+
"epoch": 9.343708609271523,
|
24791 |
+
"grad_norm": 0.06866603344678879,
|
24792 |
+
"learning_rate": 3.2493368700265253e-05,
|
24793 |
+
"loss": 0.0038,
|
24794 |
+
"step": 3525
|
24795 |
+
},
|
24796 |
+
{
|
24797 |
+
"epoch": 9.34635761589404,
|
24798 |
+
"grad_norm": 0.07904902845621109,
|
24799 |
+
"learning_rate": 3.23607427055703e-05,
|
24800 |
+
"loss": 0.0041,
|
24801 |
+
"step": 3526
|
24802 |
+
},
|
24803 |
+
{
|
24804 |
+
"epoch": 9.349006622516557,
|
24805 |
+
"grad_norm": 0.09016682952642441,
|
24806 |
+
"learning_rate": 3.222811671087533e-05,
|
24807 |
+
"loss": 0.0045,
|
24808 |
+
"step": 3527
|
24809 |
+
},
|
24810 |
+
{
|
24811 |
+
"epoch": 9.351655629139072,
|
24812 |
+
"grad_norm": 0.04925484582781792,
|
24813 |
+
"learning_rate": 3.209549071618037e-05,
|
24814 |
+
"loss": 0.0033,
|
24815 |
+
"step": 3528
|
24816 |
+
},
|
24817 |
+
{
|
24818 |
+
"epoch": 9.35430463576159,
|
24819 |
+
"grad_norm": 0.04000997170805931,
|
24820 |
+
"learning_rate": 3.196286472148541e-05,
|
24821 |
+
"loss": 0.0029,
|
24822 |
+
"step": 3529
|
24823 |
+
},
|
24824 |
+
{
|
24825 |
+
"epoch": 9.356953642384106,
|
24826 |
+
"grad_norm": 0.17996680736541748,
|
24827 |
+
"learning_rate": 3.183023872679045e-05,
|
24828 |
+
"loss": 0.0055,
|
24829 |
+
"step": 3530
|
24830 |
+
},
|
24831 |
+
{
|
24832 |
+
"epoch": 9.359602649006623,
|
24833 |
+
"grad_norm": 0.19719724357128143,
|
24834 |
+
"learning_rate": 3.169761273209549e-05,
|
24835 |
+
"loss": 0.0065,
|
24836 |
+
"step": 3531
|
24837 |
+
},
|
24838 |
+
{
|
24839 |
+
"epoch": 9.362251655629139,
|
24840 |
+
"grad_norm": 0.05968065187335014,
|
24841 |
+
"learning_rate": 3.156498673740053e-05,
|
24842 |
+
"loss": 0.0038,
|
24843 |
+
"step": 3532
|
24844 |
+
},
|
24845 |
+
{
|
24846 |
+
"epoch": 9.364900662251655,
|
24847 |
+
"grad_norm": 0.09736710041761398,
|
24848 |
+
"learning_rate": 3.143236074270557e-05,
|
24849 |
+
"loss": 0.0046,
|
24850 |
+
"step": 3533
|
24851 |
+
},
|
24852 |
+
{
|
24853 |
+
"epoch": 9.367549668874172,
|
24854 |
+
"grad_norm": 0.08279327303171158,
|
24855 |
+
"learning_rate": 3.129973474801061e-05,
|
24856 |
+
"loss": 0.0037,
|
24857 |
+
"step": 3534
|
24858 |
+
},
|
24859 |
+
{
|
24860 |
+
"epoch": 9.370198675496688,
|
24861 |
+
"grad_norm": 0.072016641497612,
|
24862 |
+
"learning_rate": 3.116710875331565e-05,
|
24863 |
+
"loss": 0.0041,
|
24864 |
+
"step": 3535
|
24865 |
+
},
|
24866 |
+
{
|
24867 |
+
"epoch": 9.372847682119206,
|
24868 |
+
"grad_norm": 0.06081625074148178,
|
24869 |
+
"learning_rate": 3.103448275862069e-05,
|
24870 |
+
"loss": 0.0034,
|
24871 |
+
"step": 3536
|
24872 |
+
},
|
24873 |
+
{
|
24874 |
+
"epoch": 9.375496688741721,
|
24875 |
+
"grad_norm": 0.07212834060192108,
|
24876 |
+
"learning_rate": 3.090185676392573e-05,
|
24877 |
+
"loss": 0.004,
|
24878 |
+
"step": 3537
|
24879 |
+
},
|
24880 |
+
{
|
24881 |
+
"epoch": 9.378145695364239,
|
24882 |
+
"grad_norm": 0.07869751751422882,
|
24883 |
+
"learning_rate": 3.0769230769230774e-05,
|
24884 |
+
"loss": 0.0034,
|
24885 |
+
"step": 3538
|
24886 |
+
},
|
24887 |
+
{
|
24888 |
+
"epoch": 9.380794701986755,
|
24889 |
+
"grad_norm": 0.049005139619112015,
|
24890 |
+
"learning_rate": 3.063660477453581e-05,
|
24891 |
+
"loss": 0.0035,
|
24892 |
+
"step": 3539
|
24893 |
+
},
|
24894 |
+
{
|
24895 |
+
"epoch": 9.383443708609272,
|
24896 |
+
"grad_norm": 0.05128003656864166,
|
24897 |
+
"learning_rate": 3.050397877984085e-05,
|
24898 |
+
"loss": 0.0036,
|
24899 |
+
"step": 3540
|
24900 |
+
},
|
24901 |
+
{
|
24902 |
+
"epoch": 9.386092715231788,
|
24903 |
+
"grad_norm": 0.11874201148748398,
|
24904 |
+
"learning_rate": 3.0371352785145892e-05,
|
24905 |
+
"loss": 0.0052,
|
24906 |
+
"step": 3541
|
24907 |
+
},
|
24908 |
+
{
|
24909 |
+
"epoch": 9.388741721854304,
|
24910 |
+
"grad_norm": 0.2354889214038849,
|
24911 |
+
"learning_rate": 3.023872679045093e-05,
|
24912 |
+
"loss": 0.0068,
|
24913 |
+
"step": 3542
|
24914 |
+
},
|
24915 |
+
{
|
24916 |
+
"epoch": 9.391390728476821,
|
24917 |
+
"grad_norm": 0.19837160408496857,
|
24918 |
+
"learning_rate": 3.0106100795755968e-05,
|
24919 |
+
"loss": 0.0055,
|
24920 |
+
"step": 3543
|
24921 |
+
},
|
24922 |
+
{
|
24923 |
+
"epoch": 9.394039735099337,
|
24924 |
+
"grad_norm": 0.13990575075149536,
|
24925 |
+
"learning_rate": 2.997347480106101e-05,
|
24926 |
+
"loss": 0.005,
|
24927 |
+
"step": 3544
|
24928 |
+
},
|
24929 |
+
{
|
24930 |
+
"epoch": 9.396688741721855,
|
24931 |
+
"grad_norm": 0.08963897079229355,
|
24932 |
+
"learning_rate": 2.9840848806366047e-05,
|
24933 |
+
"loss": 0.0048,
|
24934 |
+
"step": 3545
|
24935 |
+
},
|
24936 |
+
{
|
24937 |
+
"epoch": 9.39933774834437,
|
24938 |
+
"grad_norm": 0.40259331464767456,
|
24939 |
+
"learning_rate": 2.970822281167109e-05,
|
24940 |
+
"loss": 0.0072,
|
24941 |
+
"step": 3546
|
24942 |
+
},
|
24943 |
+
{
|
24944 |
+
"epoch": 9.401986754966888,
|
24945 |
+
"grad_norm": 0.11104849725961685,
|
24946 |
+
"learning_rate": 2.957559681697613e-05,
|
24947 |
+
"loss": 0.004,
|
24948 |
+
"step": 3547
|
24949 |
+
},
|
24950 |
+
{
|
24951 |
+
"epoch": 9.404635761589404,
|
24952 |
+
"grad_norm": 0.054640308022499084,
|
24953 |
+
"learning_rate": 2.9442970822281168e-05,
|
24954 |
+
"loss": 0.0035,
|
24955 |
+
"step": 3548
|
24956 |
+
},
|
24957 |
+
{
|
24958 |
+
"epoch": 9.407284768211921,
|
24959 |
+
"grad_norm": 0.05192907899618149,
|
24960 |
+
"learning_rate": 2.9310344827586206e-05,
|
24961 |
+
"loss": 0.0042,
|
24962 |
+
"step": 3549
|
24963 |
+
},
|
24964 |
+
{
|
24965 |
+
"epoch": 9.409933774834437,
|
24966 |
+
"grad_norm": 0.04460398480296135,
|
24967 |
+
"learning_rate": 2.9177718832891247e-05,
|
24968 |
+
"loss": 0.0026,
|
24969 |
+
"step": 3550
|
24970 |
+
},
|
24971 |
+
{
|
24972 |
+
"epoch": 9.412582781456953,
|
24973 |
+
"grad_norm": 0.0791364312171936,
|
24974 |
+
"learning_rate": 2.904509283819629e-05,
|
24975 |
+
"loss": 0.0042,
|
24976 |
+
"step": 3551
|
24977 |
+
},
|
24978 |
+
{
|
24979 |
+
"epoch": 9.41523178807947,
|
24980 |
+
"grad_norm": 0.16054631769657135,
|
24981 |
+
"learning_rate": 2.8912466843501326e-05,
|
24982 |
+
"loss": 0.0043,
|
24983 |
+
"step": 3552
|
24984 |
+
},
|
24985 |
+
{
|
24986 |
+
"epoch": 9.417880794701986,
|
24987 |
+
"grad_norm": 0.041672658175230026,
|
24988 |
+
"learning_rate": 2.8779840848806368e-05,
|
24989 |
+
"loss": 0.0032,
|
24990 |
+
"step": 3553
|
24991 |
+
},
|
24992 |
+
{
|
24993 |
+
"epoch": 9.420529801324504,
|
24994 |
+
"grad_norm": 0.04349193349480629,
|
24995 |
+
"learning_rate": 2.864721485411141e-05,
|
24996 |
+
"loss": 0.003,
|
24997 |
+
"step": 3554
|
24998 |
+
},
|
24999 |
+
{
|
25000 |
+
"epoch": 9.42317880794702,
|
25001 |
+
"grad_norm": 0.04152658209204674,
|
25002 |
+
"learning_rate": 2.8514588859416444e-05,
|
25003 |
+
"loss": 0.003,
|
25004 |
+
"step": 3555
|
25005 |
+
},
|
25006 |
+
{
|
25007 |
+
"epoch": 9.425827814569537,
|
25008 |
+
"grad_norm": 0.059349656105041504,
|
25009 |
+
"learning_rate": 2.8381962864721485e-05,
|
25010 |
+
"loss": 0.0033,
|
25011 |
+
"step": 3556
|
25012 |
+
},
|
25013 |
+
{
|
25014 |
+
"epoch": 9.428476821192053,
|
25015 |
+
"grad_norm": 0.06577035784721375,
|
25016 |
+
"learning_rate": 2.8249336870026527e-05,
|
25017 |
+
"loss": 0.0039,
|
25018 |
+
"step": 3557
|
25019 |
+
},
|
25020 |
+
{
|
25021 |
+
"epoch": 9.431125827814569,
|
25022 |
+
"grad_norm": 0.051312051713466644,
|
25023 |
+
"learning_rate": 2.8116710875331565e-05,
|
25024 |
+
"loss": 0.003,
|
25025 |
+
"step": 3558
|
25026 |
+
},
|
25027 |
+
{
|
25028 |
+
"epoch": 9.433774834437086,
|
25029 |
+
"grad_norm": 0.12479935586452484,
|
25030 |
+
"learning_rate": 2.7984084880636606e-05,
|
25031 |
+
"loss": 0.0046,
|
25032 |
+
"step": 3559
|
25033 |
+
},
|
25034 |
+
{
|
25035 |
+
"epoch": 9.436423841059602,
|
25036 |
+
"grad_norm": 0.17824234068393707,
|
25037 |
+
"learning_rate": 2.7851458885941647e-05,
|
25038 |
+
"loss": 0.0053,
|
25039 |
+
"step": 3560
|
25040 |
+
},
|
25041 |
+
{
|
25042 |
+
"epoch": 9.43907284768212,
|
25043 |
+
"grad_norm": 0.2889949679374695,
|
25044 |
+
"learning_rate": 2.7718832891246682e-05,
|
25045 |
+
"loss": 0.0065,
|
25046 |
+
"step": 3561
|
25047 |
+
},
|
25048 |
+
{
|
25049 |
+
"epoch": 9.441721854304635,
|
25050 |
+
"grad_norm": 2.6025192737579346,
|
25051 |
+
"learning_rate": 2.7586206896551723e-05,
|
25052 |
+
"loss": 0.0161,
|
25053 |
+
"step": 3562
|
25054 |
+
},
|
25055 |
+
{
|
25056 |
+
"epoch": 9.444370860927153,
|
25057 |
+
"grad_norm": 0.0740509107708931,
|
25058 |
+
"learning_rate": 2.7453580901856765e-05,
|
25059 |
+
"loss": 0.0045,
|
25060 |
+
"step": 3563
|
25061 |
+
},
|
25062 |
+
{
|
25063 |
+
"epoch": 9.447019867549669,
|
25064 |
+
"grad_norm": 0.06827876716852188,
|
25065 |
+
"learning_rate": 2.7320954907161803e-05,
|
25066 |
+
"loss": 0.0033,
|
25067 |
+
"step": 3564
|
25068 |
+
},
|
25069 |
+
{
|
25070 |
+
"epoch": 9.449668874172186,
|
25071 |
+
"grad_norm": 0.07209566235542297,
|
25072 |
+
"learning_rate": 2.7188328912466844e-05,
|
25073 |
+
"loss": 0.0041,
|
25074 |
+
"step": 3565
|
25075 |
+
},
|
25076 |
+
{
|
25077 |
+
"epoch": 9.452317880794702,
|
25078 |
+
"grad_norm": 0.32902833819389343,
|
25079 |
+
"learning_rate": 2.7055702917771885e-05,
|
25080 |
+
"loss": 0.0092,
|
25081 |
+
"step": 3566
|
25082 |
+
},
|
25083 |
+
{
|
25084 |
+
"epoch": 9.454966887417218,
|
25085 |
+
"grad_norm": 0.07957999408245087,
|
25086 |
+
"learning_rate": 2.6923076923076927e-05,
|
25087 |
+
"loss": 0.0043,
|
25088 |
+
"step": 3567
|
25089 |
+
},
|
25090 |
+
{
|
25091 |
+
"epoch": 9.457615894039735,
|
25092 |
+
"grad_norm": 0.10473563522100449,
|
25093 |
+
"learning_rate": 2.679045092838196e-05,
|
25094 |
+
"loss": 0.0047,
|
25095 |
+
"step": 3568
|
25096 |
+
},
|
25097 |
+
{
|
25098 |
+
"epoch": 9.460264900662251,
|
25099 |
+
"grad_norm": 0.07355739176273346,
|
25100 |
+
"learning_rate": 2.6657824933687003e-05,
|
25101 |
+
"loss": 0.0033,
|
25102 |
+
"step": 3569
|
25103 |
+
},
|
25104 |
+
{
|
25105 |
+
"epoch": 9.462913907284769,
|
25106 |
+
"grad_norm": 0.11686844378709793,
|
25107 |
+
"learning_rate": 2.6525198938992044e-05,
|
25108 |
+
"loss": 0.004,
|
25109 |
+
"step": 3570
|
25110 |
+
},
|
25111 |
+
{
|
25112 |
+
"epoch": 9.465562913907284,
|
25113 |
+
"grad_norm": 0.08648373931646347,
|
25114 |
+
"learning_rate": 2.6392572944297082e-05,
|
25115 |
+
"loss": 0.0047,
|
25116 |
+
"step": 3571
|
25117 |
+
},
|
25118 |
+
{
|
25119 |
+
"epoch": 9.468211920529802,
|
25120 |
+
"grad_norm": 0.24334782361984253,
|
25121 |
+
"learning_rate": 2.6259946949602123e-05,
|
25122 |
+
"loss": 0.0074,
|
25123 |
+
"step": 3572
|
25124 |
+
},
|
25125 |
+
{
|
25126 |
+
"epoch": 9.470860927152318,
|
25127 |
+
"grad_norm": 0.9272996783256531,
|
25128 |
+
"learning_rate": 2.6127320954907165e-05,
|
25129 |
+
"loss": 0.0771,
|
25130 |
+
"step": 3573
|
25131 |
+
},
|
25132 |
+
{
|
25133 |
+
"epoch": 9.473509933774835,
|
25134 |
+
"grad_norm": 0.08901096880435944,
|
25135 |
+
"learning_rate": 2.59946949602122e-05,
|
25136 |
+
"loss": 0.0049,
|
25137 |
+
"step": 3574
|
25138 |
+
},
|
25139 |
+
{
|
25140 |
+
"epoch": 9.476158940397351,
|
25141 |
+
"grad_norm": 0.27438682317733765,
|
25142 |
+
"learning_rate": 2.586206896551724e-05,
|
25143 |
+
"loss": 0.0076,
|
25144 |
+
"step": 3575
|
25145 |
+
},
|
25146 |
+
{
|
25147 |
+
"epoch": 9.478807947019867,
|
25148 |
+
"grad_norm": 0.4318556785583496,
|
25149 |
+
"learning_rate": 2.5729442970822282e-05,
|
25150 |
+
"loss": 0.0067,
|
25151 |
+
"step": 3576
|
25152 |
+
},
|
25153 |
+
{
|
25154 |
+
"epoch": 9.481456953642384,
|
25155 |
+
"grad_norm": 0.049917444586753845,
|
25156 |
+
"learning_rate": 2.559681697612732e-05,
|
25157 |
+
"loss": 0.0038,
|
25158 |
+
"step": 3577
|
25159 |
+
},
|
25160 |
+
{
|
25161 |
+
"epoch": 9.4841059602649,
|
25162 |
+
"grad_norm": 0.0731232762336731,
|
25163 |
+
"learning_rate": 2.546419098143236e-05,
|
25164 |
+
"loss": 0.0046,
|
25165 |
+
"step": 3578
|
25166 |
+
},
|
25167 |
+
{
|
25168 |
+
"epoch": 9.486754966887418,
|
25169 |
+
"grad_norm": 0.058809563517570496,
|
25170 |
+
"learning_rate": 2.5331564986737403e-05,
|
25171 |
+
"loss": 0.0036,
|
25172 |
+
"step": 3579
|
25173 |
+
},
|
25174 |
+
{
|
25175 |
+
"epoch": 9.489403973509933,
|
25176 |
+
"grad_norm": 0.0966796875,
|
25177 |
+
"learning_rate": 2.519893899204244e-05,
|
25178 |
+
"loss": 0.0046,
|
25179 |
+
"step": 3580
|
25180 |
+
},
|
25181 |
+
{
|
25182 |
+
"epoch": 9.492052980132451,
|
25183 |
+
"grad_norm": 0.09159884601831436,
|
25184 |
+
"learning_rate": 2.506631299734748e-05,
|
25185 |
+
"loss": 0.0041,
|
25186 |
+
"step": 3581
|
25187 |
+
},
|
25188 |
+
{
|
25189 |
+
"epoch": 9.494701986754967,
|
25190 |
+
"grad_norm": 0.17658160626888275,
|
25191 |
+
"learning_rate": 2.493368700265252e-05,
|
25192 |
+
"loss": 0.0053,
|
25193 |
+
"step": 3582
|
25194 |
+
},
|
25195 |
+
{
|
25196 |
+
"epoch": 9.497350993377484,
|
25197 |
+
"grad_norm": 0.0842135101556778,
|
25198 |
+
"learning_rate": 2.480106100795756e-05,
|
25199 |
+
"loss": 0.0041,
|
25200 |
+
"step": 3583
|
25201 |
+
},
|
25202 |
+
{
|
25203 |
+
"epoch": 9.5,
|
25204 |
+
"grad_norm": 0.1723368614912033,
|
25205 |
+
"learning_rate": 2.46684350132626e-05,
|
25206 |
+
"loss": 0.0054,
|
25207 |
+
"step": 3584
|
25208 |
+
},
|
25209 |
+
{
|
25210 |
+
"epoch": 9.502649006622516,
|
25211 |
+
"grad_norm": 0.07558996230363846,
|
25212 |
+
"learning_rate": 2.453580901856764e-05,
|
25213 |
+
"loss": 0.0034,
|
25214 |
+
"step": 3585
|
25215 |
+
},
|
25216 |
+
{
|
25217 |
+
"epoch": 9.505298013245033,
|
25218 |
+
"grad_norm": 0.14581862092018127,
|
25219 |
+
"learning_rate": 2.440318302387268e-05,
|
25220 |
+
"loss": 0.0052,
|
25221 |
+
"step": 3586
|
25222 |
+
},
|
25223 |
+
{
|
25224 |
+
"epoch": 9.507947019867549,
|
25225 |
+
"grad_norm": 0.04708685725927353,
|
25226 |
+
"learning_rate": 2.4270557029177717e-05,
|
25227 |
+
"loss": 0.0031,
|
25228 |
+
"step": 3587
|
25229 |
+
},
|
25230 |
+
{
|
25231 |
+
"epoch": 9.510596026490067,
|
25232 |
+
"grad_norm": 0.3849869668483734,
|
25233 |
+
"learning_rate": 2.4137931034482758e-05,
|
25234 |
+
"loss": 0.01,
|
25235 |
+
"step": 3588
|
25236 |
+
},
|
25237 |
+
{
|
25238 |
+
"epoch": 9.513245033112582,
|
25239 |
+
"grad_norm": 0.26843276619911194,
|
25240 |
+
"learning_rate": 2.40053050397878e-05,
|
25241 |
+
"loss": 0.0084,
|
25242 |
+
"step": 3589
|
25243 |
+
},
|
25244 |
+
{
|
25245 |
+
"epoch": 9.5158940397351,
|
25246 |
+
"grad_norm": 0.05932989344000816,
|
25247 |
+
"learning_rate": 2.3872679045092838e-05,
|
25248 |
+
"loss": 0.0036,
|
25249 |
+
"step": 3590
|
25250 |
+
},
|
25251 |
+
{
|
25252 |
+
"epoch": 9.518543046357616,
|
25253 |
+
"grad_norm": 0.10128777474164963,
|
25254 |
+
"learning_rate": 2.374005305039788e-05,
|
25255 |
+
"loss": 0.0038,
|
25256 |
+
"step": 3591
|
25257 |
+
},
|
25258 |
+
{
|
25259 |
+
"epoch": 9.521192052980133,
|
25260 |
+
"grad_norm": 0.4299328923225403,
|
25261 |
+
"learning_rate": 2.360742705570292e-05,
|
25262 |
+
"loss": 0.0071,
|
25263 |
+
"step": 3592
|
25264 |
+
},
|
25265 |
+
{
|
25266 |
+
"epoch": 9.523841059602649,
|
25267 |
+
"grad_norm": 0.1375584602355957,
|
25268 |
+
"learning_rate": 2.347480106100796e-05,
|
25269 |
+
"loss": 0.0033,
|
25270 |
+
"step": 3593
|
25271 |
+
},
|
25272 |
+
{
|
25273 |
+
"epoch": 9.526490066225165,
|
25274 |
+
"grad_norm": 0.054577238857746124,
|
25275 |
+
"learning_rate": 2.3342175066312996e-05,
|
25276 |
+
"loss": 0.0031,
|
25277 |
+
"step": 3594
|
25278 |
+
},
|
25279 |
+
{
|
25280 |
+
"epoch": 9.529139072847682,
|
25281 |
+
"grad_norm": 0.036791156977415085,
|
25282 |
+
"learning_rate": 2.3209549071618038e-05,
|
25283 |
+
"loss": 0.0026,
|
25284 |
+
"step": 3595
|
25285 |
+
},
|
25286 |
+
{
|
25287 |
+
"epoch": 9.531788079470198,
|
25288 |
+
"grad_norm": 0.05285593122243881,
|
25289 |
+
"learning_rate": 2.307692307692308e-05,
|
25290 |
+
"loss": 0.0032,
|
25291 |
+
"step": 3596
|
25292 |
+
},
|
25293 |
+
{
|
25294 |
+
"epoch": 9.534437086092716,
|
25295 |
+
"grad_norm": 0.03877342492341995,
|
25296 |
+
"learning_rate": 2.2944297082228117e-05,
|
25297 |
+
"loss": 0.0026,
|
25298 |
+
"step": 3597
|
25299 |
+
},
|
25300 |
+
{
|
25301 |
+
"epoch": 9.537086092715231,
|
25302 |
+
"grad_norm": 0.06022324413061142,
|
25303 |
+
"learning_rate": 2.281167108753316e-05,
|
25304 |
+
"loss": 0.0039,
|
25305 |
+
"step": 3598
|
25306 |
+
},
|
25307 |
+
{
|
25308 |
+
"epoch": 9.539735099337749,
|
25309 |
+
"grad_norm": 0.04812643304467201,
|
25310 |
+
"learning_rate": 2.2679045092838196e-05,
|
25311 |
+
"loss": 0.0024,
|
25312 |
+
"step": 3599
|
25313 |
+
},
|
25314 |
+
{
|
25315 |
+
"epoch": 9.542384105960265,
|
25316 |
+
"grad_norm": 0.1394866406917572,
|
25317 |
+
"learning_rate": 2.2546419098143234e-05,
|
25318 |
+
"loss": 0.0051,
|
25319 |
+
"step": 3600
|
25320 |
+
},
|
25321 |
+
{
|
25322 |
+
"epoch": 9.545033112582782,
|
25323 |
+
"grad_norm": 0.0539911687374115,
|
25324 |
+
"learning_rate": 2.2413793103448276e-05,
|
25325 |
+
"loss": 0.0036,
|
25326 |
+
"step": 3601
|
25327 |
+
},
|
25328 |
+
{
|
25329 |
+
"epoch": 9.547682119205298,
|
25330 |
+
"grad_norm": 0.04176941514015198,
|
25331 |
+
"learning_rate": 2.2281167108753317e-05,
|
25332 |
+
"loss": 0.0026,
|
25333 |
+
"step": 3602
|
25334 |
+
},
|
25335 |
+
{
|
25336 |
+
"epoch": 9.550331125827814,
|
25337 |
+
"grad_norm": 0.04899568855762482,
|
25338 |
+
"learning_rate": 2.2148541114058355e-05,
|
25339 |
+
"loss": 0.003,
|
25340 |
+
"step": 3603
|
25341 |
+
},
|
25342 |
+
{
|
25343 |
+
"epoch": 9.552980132450331,
|
25344 |
+
"grad_norm": 0.0828976109623909,
|
25345 |
+
"learning_rate": 2.2015915119363396e-05,
|
25346 |
+
"loss": 0.0036,
|
25347 |
+
"step": 3604
|
25348 |
+
},
|
25349 |
+
{
|
25350 |
+
"epoch": 9.555629139072847,
|
25351 |
+
"grad_norm": 0.09971681237220764,
|
25352 |
+
"learning_rate": 2.1883289124668434e-05,
|
25353 |
+
"loss": 0.0039,
|
25354 |
+
"step": 3605
|
25355 |
+
},
|
25356 |
+
{
|
25357 |
+
"epoch": 9.558278145695365,
|
25358 |
+
"grad_norm": 0.05361773073673248,
|
25359 |
+
"learning_rate": 2.1750663129973476e-05,
|
25360 |
+
"loss": 0.0029,
|
25361 |
+
"step": 3606
|
25362 |
+
},
|
25363 |
+
{
|
25364 |
+
"epoch": 9.56092715231788,
|
25365 |
+
"grad_norm": 0.9215199947357178,
|
25366 |
+
"learning_rate": 2.1618037135278514e-05,
|
25367 |
+
"loss": 0.0091,
|
25368 |
+
"step": 3607
|
25369 |
+
},
|
25370 |
+
{
|
25371 |
+
"epoch": 9.563576158940398,
|
25372 |
+
"grad_norm": 0.06320016086101532,
|
25373 |
+
"learning_rate": 2.1485411140583555e-05,
|
25374 |
+
"loss": 0.0034,
|
25375 |
+
"step": 3608
|
25376 |
+
},
|
25377 |
+
{
|
25378 |
+
"epoch": 9.566225165562914,
|
25379 |
+
"grad_norm": 0.06845584511756897,
|
25380 |
+
"learning_rate": 2.1352785145888597e-05,
|
25381 |
+
"loss": 0.0045,
|
25382 |
+
"step": 3609
|
25383 |
+
},
|
25384 |
+
{
|
25385 |
+
"epoch": 9.568874172185431,
|
25386 |
+
"grad_norm": 0.2629742920398712,
|
25387 |
+
"learning_rate": 2.1220159151193635e-05,
|
25388 |
+
"loss": 0.0046,
|
25389 |
+
"step": 3610
|
25390 |
+
},
|
25391 |
+
{
|
25392 |
+
"epoch": 9.571523178807947,
|
25393 |
+
"grad_norm": 0.20390111207962036,
|
25394 |
+
"learning_rate": 2.1087533156498673e-05,
|
25395 |
+
"loss": 0.0072,
|
25396 |
+
"step": 3611
|
25397 |
+
},
|
25398 |
+
{
|
25399 |
+
"epoch": 9.574172185430463,
|
25400 |
+
"grad_norm": 0.09552650153636932,
|
25401 |
+
"learning_rate": 2.0954907161803714e-05,
|
25402 |
+
"loss": 0.0042,
|
25403 |
+
"step": 3612
|
25404 |
+
},
|
25405 |
+
{
|
25406 |
+
"epoch": 9.57682119205298,
|
25407 |
+
"grad_norm": 0.1757827252149582,
|
25408 |
+
"learning_rate": 2.0822281167108752e-05,
|
25409 |
+
"loss": 0.0051,
|
25410 |
+
"step": 3613
|
25411 |
+
},
|
25412 |
+
{
|
25413 |
+
"epoch": 9.579470198675496,
|
25414 |
+
"grad_norm": 0.06707777082920074,
|
25415 |
+
"learning_rate": 2.0689655172413793e-05,
|
25416 |
+
"loss": 0.0033,
|
25417 |
+
"step": 3614
|
25418 |
+
},
|
25419 |
+
{
|
25420 |
+
"epoch": 9.582119205298014,
|
25421 |
+
"grad_norm": 0.06632061302661896,
|
25422 |
+
"learning_rate": 2.0557029177718835e-05,
|
25423 |
+
"loss": 0.0049,
|
25424 |
+
"step": 3615
|
25425 |
+
},
|
25426 |
+
{
|
25427 |
+
"epoch": 9.58476821192053,
|
25428 |
+
"grad_norm": 0.13070222735404968,
|
25429 |
+
"learning_rate": 2.0424403183023873e-05,
|
25430 |
+
"loss": 0.0053,
|
25431 |
+
"step": 3616
|
25432 |
+
},
|
25433 |
+
{
|
25434 |
+
"epoch": 9.587417218543047,
|
25435 |
+
"grad_norm": 0.14693477749824524,
|
25436 |
+
"learning_rate": 2.0291777188328914e-05,
|
25437 |
+
"loss": 0.0059,
|
25438 |
+
"step": 3617
|
25439 |
+
},
|
25440 |
+
{
|
25441 |
+
"epoch": 9.590066225165563,
|
25442 |
+
"grad_norm": 0.06070803850889206,
|
25443 |
+
"learning_rate": 2.0159151193633952e-05,
|
25444 |
+
"loss": 0.0031,
|
25445 |
+
"step": 3618
|
25446 |
+
},
|
25447 |
+
{
|
25448 |
+
"epoch": 9.59271523178808,
|
25449 |
+
"grad_norm": 0.05214730277657509,
|
25450 |
+
"learning_rate": 2.0026525198938993e-05,
|
25451 |
+
"loss": 0.0031,
|
25452 |
+
"step": 3619
|
25453 |
+
},
|
25454 |
+
{
|
25455 |
+
"epoch": 9.595364238410596,
|
25456 |
+
"grad_norm": 0.05979718267917633,
|
25457 |
+
"learning_rate": 1.989389920424403e-05,
|
25458 |
+
"loss": 0.0036,
|
25459 |
+
"step": 3620
|
25460 |
+
},
|
25461 |
+
{
|
25462 |
+
"epoch": 9.598013245033112,
|
25463 |
+
"grad_norm": 0.0396120548248291,
|
25464 |
+
"learning_rate": 1.9761273209549073e-05,
|
25465 |
+
"loss": 0.003,
|
25466 |
+
"step": 3621
|
25467 |
+
},
|
25468 |
+
{
|
25469 |
+
"epoch": 9.60066225165563,
|
25470 |
+
"grad_norm": 0.06984266638755798,
|
25471 |
+
"learning_rate": 1.9628647214854114e-05,
|
25472 |
+
"loss": 0.0038,
|
25473 |
+
"step": 3622
|
25474 |
+
},
|
25475 |
+
{
|
25476 |
+
"epoch": 9.603311258278145,
|
25477 |
+
"grad_norm": 0.13146281242370605,
|
25478 |
+
"learning_rate": 1.9496021220159152e-05,
|
25479 |
+
"loss": 0.0042,
|
25480 |
+
"step": 3623
|
25481 |
+
},
|
25482 |
+
{
|
25483 |
+
"epoch": 9.605960264900663,
|
25484 |
+
"grad_norm": 0.03734046220779419,
|
25485 |
+
"learning_rate": 1.936339522546419e-05,
|
25486 |
+
"loss": 0.0027,
|
25487 |
+
"step": 3624
|
25488 |
+
},
|
25489 |
+
{
|
25490 |
+
"epoch": 9.608609271523179,
|
25491 |
+
"grad_norm": 0.08083772659301758,
|
25492 |
+
"learning_rate": 1.923076923076923e-05,
|
25493 |
+
"loss": 0.0039,
|
25494 |
+
"step": 3625
|
25495 |
+
},
|
25496 |
+
{
|
25497 |
+
"epoch": 9.611258278145696,
|
25498 |
+
"grad_norm": 0.06173601746559143,
|
25499 |
+
"learning_rate": 1.909814323607427e-05,
|
25500 |
+
"loss": 0.0036,
|
25501 |
+
"step": 3626
|
25502 |
+
},
|
25503 |
+
{
|
25504 |
+
"epoch": 9.613907284768212,
|
25505 |
+
"grad_norm": 0.13348834216594696,
|
25506 |
+
"learning_rate": 1.896551724137931e-05,
|
25507 |
+
"loss": 0.0065,
|
25508 |
+
"step": 3627
|
25509 |
+
},
|
25510 |
+
{
|
25511 |
+
"epoch": 9.616556291390728,
|
25512 |
+
"grad_norm": 0.053203944116830826,
|
25513 |
+
"learning_rate": 1.8832891246684352e-05,
|
25514 |
+
"loss": 0.0035,
|
25515 |
+
"step": 3628
|
25516 |
+
},
|
25517 |
+
{
|
25518 |
+
"epoch": 9.619205298013245,
|
25519 |
+
"grad_norm": 0.14915244281291962,
|
25520 |
+
"learning_rate": 1.870026525198939e-05,
|
25521 |
+
"loss": 0.0031,
|
25522 |
+
"step": 3629
|
25523 |
+
},
|
25524 |
+
{
|
25525 |
+
"epoch": 9.621854304635761,
|
25526 |
+
"grad_norm": 0.34291309118270874,
|
25527 |
+
"learning_rate": 1.8567639257294428e-05,
|
25528 |
+
"loss": 0.0076,
|
25529 |
+
"step": 3630
|
25530 |
+
},
|
25531 |
+
{
|
25532 |
+
"epoch": 9.624503311258279,
|
25533 |
+
"grad_norm": 0.04914240911602974,
|
25534 |
+
"learning_rate": 1.843501326259947e-05,
|
25535 |
+
"loss": 0.0032,
|
25536 |
+
"step": 3631
|
25537 |
+
},
|
25538 |
+
{
|
25539 |
+
"epoch": 9.627152317880794,
|
25540 |
+
"grad_norm": 0.0698951929807663,
|
25541 |
+
"learning_rate": 1.830238726790451e-05,
|
25542 |
+
"loss": 0.0036,
|
25543 |
+
"step": 3632
|
25544 |
+
},
|
25545 |
+
{
|
25546 |
+
"epoch": 9.629801324503312,
|
25547 |
+
"grad_norm": 0.06013815850019455,
|
25548 |
+
"learning_rate": 1.816976127320955e-05,
|
25549 |
+
"loss": 0.0041,
|
25550 |
+
"step": 3633
|
25551 |
+
},
|
25552 |
+
{
|
25553 |
+
"epoch": 9.632450331125828,
|
25554 |
+
"grad_norm": 0.13527649641036987,
|
25555 |
+
"learning_rate": 1.803713527851459e-05,
|
25556 |
+
"loss": 0.0059,
|
25557 |
+
"step": 3634
|
25558 |
+
},
|
25559 |
+
{
|
25560 |
+
"epoch": 9.635099337748345,
|
25561 |
+
"grad_norm": 0.10634315013885498,
|
25562 |
+
"learning_rate": 1.790450928381963e-05,
|
25563 |
+
"loss": 0.0039,
|
25564 |
+
"step": 3635
|
25565 |
+
},
|
25566 |
+
{
|
25567 |
+
"epoch": 9.637748344370861,
|
25568 |
+
"grad_norm": 0.09149981290102005,
|
25569 |
+
"learning_rate": 1.7771883289124666e-05,
|
25570 |
+
"loss": 0.0041,
|
25571 |
+
"step": 3636
|
25572 |
+
},
|
25573 |
+
{
|
25574 |
+
"epoch": 9.640397350993377,
|
25575 |
+
"grad_norm": 0.063264399766922,
|
25576 |
+
"learning_rate": 1.7639257294429708e-05,
|
25577 |
+
"loss": 0.0036,
|
25578 |
+
"step": 3637
|
25579 |
+
},
|
25580 |
+
{
|
25581 |
+
"epoch": 9.643046357615894,
|
25582 |
+
"grad_norm": 0.03329646959900856,
|
25583 |
+
"learning_rate": 1.750663129973475e-05,
|
25584 |
+
"loss": 0.0024,
|
25585 |
+
"step": 3638
|
25586 |
+
},
|
25587 |
+
{
|
25588 |
+
"epoch": 9.64569536423841,
|
25589 |
+
"grad_norm": 0.06810608506202698,
|
25590 |
+
"learning_rate": 1.7374005305039787e-05,
|
25591 |
+
"loss": 0.0034,
|
25592 |
+
"step": 3639
|
25593 |
+
},
|
25594 |
+
{
|
25595 |
+
"epoch": 9.648344370860928,
|
25596 |
+
"grad_norm": 0.24302716553211212,
|
25597 |
+
"learning_rate": 1.7241379310344828e-05,
|
25598 |
+
"loss": 0.0044,
|
25599 |
+
"step": 3640
|
25600 |
+
},
|
25601 |
+
{
|
25602 |
+
"epoch": 9.650993377483443,
|
25603 |
+
"grad_norm": 0.040287893265485764,
|
25604 |
+
"learning_rate": 1.710875331564987e-05,
|
25605 |
+
"loss": 0.0026,
|
25606 |
+
"step": 3641
|
25607 |
+
},
|
25608 |
+
{
|
25609 |
+
"epoch": 9.65364238410596,
|
25610 |
+
"grad_norm": 0.04351738840341568,
|
25611 |
+
"learning_rate": 1.6976127320954904e-05,
|
25612 |
+
"loss": 0.0032,
|
25613 |
+
"step": 3642
|
25614 |
+
},
|
25615 |
+
{
|
25616 |
+
"epoch": 9.656291390728477,
|
25617 |
+
"grad_norm": 0.06122462451457977,
|
25618 |
+
"learning_rate": 1.6843501326259946e-05,
|
25619 |
+
"loss": 0.0037,
|
25620 |
+
"step": 3643
|
25621 |
+
},
|
25622 |
+
{
|
25623 |
+
"epoch": 9.658940397350994,
|
25624 |
+
"grad_norm": 0.040622591972351074,
|
25625 |
+
"learning_rate": 1.6710875331564987e-05,
|
25626 |
+
"loss": 0.0028,
|
25627 |
+
"step": 3644
|
25628 |
+
},
|
25629 |
+
{
|
25630 |
+
"epoch": 9.66158940397351,
|
25631 |
+
"grad_norm": 0.36340340971946716,
|
25632 |
+
"learning_rate": 1.657824933687003e-05,
|
25633 |
+
"loss": 0.0091,
|
25634 |
+
"step": 3645
|
25635 |
+
},
|
25636 |
+
{
|
25637 |
+
"epoch": 9.664238410596026,
|
25638 |
+
"grad_norm": 3.7831473350524902,
|
25639 |
+
"learning_rate": 1.6445623342175066e-05,
|
25640 |
+
"loss": 0.0612,
|
25641 |
+
"step": 3646
|
25642 |
+
},
|
25643 |
+
{
|
25644 |
+
"epoch": 9.666887417218543,
|
25645 |
+
"grad_norm": 0.10211093723773956,
|
25646 |
+
"learning_rate": 1.6312997347480108e-05,
|
25647 |
+
"loss": 0.0046,
|
25648 |
+
"step": 3647
|
25649 |
+
},
|
25650 |
+
{
|
25651 |
+
"epoch": 9.669536423841059,
|
25652 |
+
"grad_norm": 0.06653808057308197,
|
25653 |
+
"learning_rate": 1.618037135278515e-05,
|
25654 |
+
"loss": 0.004,
|
25655 |
+
"step": 3648
|
25656 |
+
},
|
25657 |
+
{
|
25658 |
+
"epoch": 9.672185430463577,
|
25659 |
+
"grad_norm": 0.15289029479026794,
|
25660 |
+
"learning_rate": 1.6047745358090184e-05,
|
25661 |
+
"loss": 0.0041,
|
25662 |
+
"step": 3649
|
25663 |
+
},
|
25664 |
+
{
|
25665 |
+
"epoch": 9.674834437086092,
|
25666 |
+
"grad_norm": 0.07520382106304169,
|
25667 |
+
"learning_rate": 1.5915119363395225e-05,
|
25668 |
+
"loss": 0.0045,
|
25669 |
+
"step": 3650
|
25670 |
+
},
|
25671 |
+
{
|
25672 |
+
"epoch": 9.67748344370861,
|
25673 |
+
"grad_norm": 0.052842143923044205,
|
25674 |
+
"learning_rate": 1.5782493368700266e-05,
|
25675 |
+
"loss": 0.0029,
|
25676 |
+
"step": 3651
|
25677 |
+
},
|
25678 |
+
{
|
25679 |
+
"epoch": 9.680132450331126,
|
25680 |
+
"grad_norm": 0.5506959557533264,
|
25681 |
+
"learning_rate": 1.5649867374005304e-05,
|
25682 |
+
"loss": 0.0068,
|
25683 |
+
"step": 3652
|
25684 |
+
},
|
25685 |
+
{
|
25686 |
+
"epoch": 9.682781456953641,
|
25687 |
+
"grad_norm": 0.05929022654891014,
|
25688 |
+
"learning_rate": 1.5517241379310346e-05,
|
25689 |
+
"loss": 0.0038,
|
25690 |
+
"step": 3653
|
25691 |
+
},
|
25692 |
+
{
|
25693 |
+
"epoch": 9.685430463576159,
|
25694 |
+
"grad_norm": 0.06791640818119049,
|
25695 |
+
"learning_rate": 1.5384615384615387e-05,
|
25696 |
+
"loss": 0.0035,
|
25697 |
+
"step": 3654
|
25698 |
+
},
|
25699 |
+
{
|
25700 |
+
"epoch": 9.688079470198675,
|
25701 |
+
"grad_norm": 0.04293552041053772,
|
25702 |
+
"learning_rate": 1.5251989389920425e-05,
|
25703 |
+
"loss": 0.0028,
|
25704 |
+
"step": 3655
|
25705 |
+
},
|
25706 |
+
{
|
25707 |
+
"epoch": 9.690728476821192,
|
25708 |
+
"grad_norm": 0.06772167980670929,
|
25709 |
+
"learning_rate": 1.5119363395225465e-05,
|
25710 |
+
"loss": 0.0041,
|
25711 |
+
"step": 3656
|
25712 |
+
},
|
25713 |
+
{
|
25714 |
+
"epoch": 9.693377483443708,
|
25715 |
+
"grad_norm": 0.055073171854019165,
|
25716 |
+
"learning_rate": 1.4986737400530505e-05,
|
25717 |
+
"loss": 0.0037,
|
25718 |
+
"step": 3657
|
25719 |
+
},
|
25720 |
+
{
|
25721 |
+
"epoch": 9.696026490066226,
|
25722 |
+
"grad_norm": 0.07414185255765915,
|
25723 |
+
"learning_rate": 1.4854111405835544e-05,
|
25724 |
+
"loss": 0.0048,
|
25725 |
+
"step": 3658
|
25726 |
+
},
|
25727 |
+
{
|
25728 |
+
"epoch": 9.698675496688741,
|
25729 |
+
"grad_norm": 0.1557607650756836,
|
25730 |
+
"learning_rate": 1.4721485411140584e-05,
|
25731 |
+
"loss": 0.005,
|
25732 |
+
"step": 3659
|
25733 |
+
},
|
25734 |
+
{
|
25735 |
+
"epoch": 9.701324503311259,
|
25736 |
+
"grad_norm": 0.04627758264541626,
|
25737 |
+
"learning_rate": 1.4588859416445624e-05,
|
25738 |
+
"loss": 0.0024,
|
25739 |
+
"step": 3660
|
25740 |
+
},
|
25741 |
+
{
|
25742 |
+
"epoch": 9.703973509933775,
|
25743 |
+
"grad_norm": 0.12415965646505356,
|
25744 |
+
"learning_rate": 1.4456233421750663e-05,
|
25745 |
+
"loss": 0.003,
|
25746 |
+
"step": 3661
|
25747 |
+
},
|
25748 |
+
{
|
25749 |
+
"epoch": 9.70662251655629,
|
25750 |
+
"grad_norm": 0.19970257580280304,
|
25751 |
+
"learning_rate": 1.4323607427055705e-05,
|
25752 |
+
"loss": 0.0053,
|
25753 |
+
"step": 3662
|
25754 |
+
},
|
25755 |
+
{
|
25756 |
+
"epoch": 9.709271523178808,
|
25757 |
+
"grad_norm": 0.03533896803855896,
|
25758 |
+
"learning_rate": 1.4190981432360743e-05,
|
25759 |
+
"loss": 0.0026,
|
25760 |
+
"step": 3663
|
25761 |
+
},
|
25762 |
+
{
|
25763 |
+
"epoch": 9.711920529801324,
|
25764 |
+
"grad_norm": 0.09190724790096283,
|
25765 |
+
"learning_rate": 1.4058355437665782e-05,
|
25766 |
+
"loss": 0.0041,
|
25767 |
+
"step": 3664
|
25768 |
+
},
|
25769 |
+
{
|
25770 |
+
"epoch": 9.714569536423841,
|
25771 |
+
"grad_norm": 0.11670997738838196,
|
25772 |
+
"learning_rate": 1.3925729442970824e-05,
|
25773 |
+
"loss": 0.0045,
|
25774 |
+
"step": 3665
|
25775 |
+
},
|
25776 |
+
{
|
25777 |
+
"epoch": 9.717218543046357,
|
25778 |
+
"grad_norm": 0.05072174221277237,
|
25779 |
+
"learning_rate": 1.3793103448275862e-05,
|
25780 |
+
"loss": 0.0031,
|
25781 |
+
"step": 3666
|
25782 |
+
},
|
25783 |
+
{
|
25784 |
+
"epoch": 9.719867549668875,
|
25785 |
+
"grad_norm": 0.08154340088367462,
|
25786 |
+
"learning_rate": 1.3660477453580901e-05,
|
25787 |
+
"loss": 0.0042,
|
25788 |
+
"step": 3667
|
25789 |
+
},
|
25790 |
+
{
|
25791 |
+
"epoch": 9.72251655629139,
|
25792 |
+
"grad_norm": 0.074842169880867,
|
25793 |
+
"learning_rate": 1.3527851458885943e-05,
|
25794 |
+
"loss": 0.0041,
|
25795 |
+
"step": 3668
|
25796 |
+
},
|
25797 |
+
{
|
25798 |
+
"epoch": 9.725165562913908,
|
25799 |
+
"grad_norm": 2.423093557357788,
|
25800 |
+
"learning_rate": 1.339522546419098e-05,
|
25801 |
+
"loss": 0.0857,
|
25802 |
+
"step": 3669
|
25803 |
+
},
|
25804 |
+
{
|
25805 |
+
"epoch": 9.727814569536424,
|
25806 |
+
"grad_norm": 0.06404265761375427,
|
25807 |
+
"learning_rate": 1.3262599469496022e-05,
|
25808 |
+
"loss": 0.0036,
|
25809 |
+
"step": 3670
|
25810 |
+
},
|
25811 |
+
{
|
25812 |
+
"epoch": 9.73046357615894,
|
25813 |
+
"grad_norm": 0.04228491336107254,
|
25814 |
+
"learning_rate": 1.3129973474801062e-05,
|
25815 |
+
"loss": 0.0025,
|
25816 |
+
"step": 3671
|
25817 |
+
},
|
25818 |
+
{
|
25819 |
+
"epoch": 9.733112582781457,
|
25820 |
+
"grad_norm": 0.0822591632604599,
|
25821 |
+
"learning_rate": 1.29973474801061e-05,
|
25822 |
+
"loss": 0.0042,
|
25823 |
+
"step": 3672
|
25824 |
+
},
|
25825 |
+
{
|
25826 |
+
"epoch": 9.735761589403973,
|
25827 |
+
"grad_norm": 0.25835084915161133,
|
25828 |
+
"learning_rate": 1.2864721485411141e-05,
|
25829 |
+
"loss": 0.0066,
|
25830 |
+
"step": 3673
|
25831 |
+
},
|
25832 |
+
{
|
25833 |
+
"epoch": 9.73841059602649,
|
25834 |
+
"grad_norm": 0.18618929386138916,
|
25835 |
+
"learning_rate": 1.273209549071618e-05,
|
25836 |
+
"loss": 0.0059,
|
25837 |
+
"step": 3674
|
25838 |
+
},
|
25839 |
+
{
|
25840 |
+
"epoch": 9.741059602649006,
|
25841 |
+
"grad_norm": 0.0757196694612503,
|
25842 |
+
"learning_rate": 1.259946949602122e-05,
|
25843 |
+
"loss": 0.0036,
|
25844 |
+
"step": 3675
|
25845 |
+
},
|
25846 |
+
{
|
25847 |
+
"epoch": 9.743708609271524,
|
25848 |
+
"grad_norm": 0.05270601063966751,
|
25849 |
+
"learning_rate": 1.246684350132626e-05,
|
25850 |
+
"loss": 0.0036,
|
25851 |
+
"step": 3676
|
25852 |
+
},
|
25853 |
+
{
|
25854 |
+
"epoch": 9.74635761589404,
|
25855 |
+
"grad_norm": 0.05169150233268738,
|
25856 |
+
"learning_rate": 1.23342175066313e-05,
|
25857 |
+
"loss": 0.0038,
|
25858 |
+
"step": 3677
|
25859 |
+
},
|
25860 |
+
{
|
25861 |
+
"epoch": 9.749006622516557,
|
25862 |
+
"grad_norm": 0.06097572669386864,
|
25863 |
+
"learning_rate": 1.220159151193634e-05,
|
25864 |
+
"loss": 0.0034,
|
25865 |
+
"step": 3678
|
25866 |
+
},
|
25867 |
+
{
|
25868 |
+
"epoch": 9.751655629139073,
|
25869 |
+
"grad_norm": 0.06827215105295181,
|
25870 |
+
"learning_rate": 1.2068965517241379e-05,
|
25871 |
+
"loss": 0.0038,
|
25872 |
+
"step": 3679
|
25873 |
+
},
|
25874 |
+
{
|
25875 |
+
"epoch": 9.754304635761589,
|
25876 |
+
"grad_norm": 0.06542674452066422,
|
25877 |
+
"learning_rate": 1.1936339522546419e-05,
|
25878 |
+
"loss": 0.0039,
|
25879 |
+
"step": 3680
|
25880 |
+
},
|
25881 |
+
{
|
25882 |
+
"epoch": 9.756953642384106,
|
25883 |
+
"grad_norm": 0.04209192842245102,
|
25884 |
+
"learning_rate": 1.180371352785146e-05,
|
25885 |
+
"loss": 0.0027,
|
25886 |
+
"step": 3681
|
25887 |
+
},
|
25888 |
+
{
|
25889 |
+
"epoch": 9.759602649006622,
|
25890 |
+
"grad_norm": 0.1020483449101448,
|
25891 |
+
"learning_rate": 1.1671087533156498e-05,
|
25892 |
+
"loss": 0.0043,
|
25893 |
+
"step": 3682
|
25894 |
+
},
|
25895 |
+
{
|
25896 |
+
"epoch": 9.76225165562914,
|
25897 |
+
"grad_norm": 0.11834131181240082,
|
25898 |
+
"learning_rate": 1.153846153846154e-05,
|
25899 |
+
"loss": 0.0039,
|
25900 |
+
"step": 3683
|
25901 |
+
},
|
25902 |
+
{
|
25903 |
+
"epoch": 9.764900662251655,
|
25904 |
+
"grad_norm": 0.205880805850029,
|
25905 |
+
"learning_rate": 1.140583554376658e-05,
|
25906 |
+
"loss": 0.0052,
|
25907 |
+
"step": 3684
|
25908 |
+
},
|
25909 |
+
{
|
25910 |
+
"epoch": 9.767549668874173,
|
25911 |
+
"grad_norm": 0.06291143596172333,
|
25912 |
+
"learning_rate": 1.1273209549071617e-05,
|
25913 |
+
"loss": 0.004,
|
25914 |
+
"step": 3685
|
25915 |
+
},
|
25916 |
+
{
|
25917 |
+
"epoch": 9.770198675496689,
|
25918 |
+
"grad_norm": 1.7837027311325073,
|
25919 |
+
"learning_rate": 1.1140583554376659e-05,
|
25920 |
+
"loss": 0.1601,
|
25921 |
+
"step": 3686
|
25922 |
+
},
|
25923 |
+
{
|
25924 |
+
"epoch": 9.772847682119206,
|
25925 |
+
"grad_norm": 0.06051596626639366,
|
25926 |
+
"learning_rate": 1.1007957559681698e-05,
|
25927 |
+
"loss": 0.004,
|
25928 |
+
"step": 3687
|
25929 |
+
},
|
25930 |
+
{
|
25931 |
+
"epoch": 9.775496688741722,
|
25932 |
+
"grad_norm": 0.03722044825553894,
|
25933 |
+
"learning_rate": 1.0875331564986738e-05,
|
25934 |
+
"loss": 0.0025,
|
25935 |
+
"step": 3688
|
25936 |
+
},
|
25937 |
+
{
|
25938 |
+
"epoch": 9.778145695364238,
|
25939 |
+
"grad_norm": 0.042234018445014954,
|
25940 |
+
"learning_rate": 1.0742705570291778e-05,
|
25941 |
+
"loss": 0.0034,
|
25942 |
+
"step": 3689
|
25943 |
+
},
|
25944 |
+
{
|
25945 |
+
"epoch": 9.780794701986755,
|
25946 |
+
"grad_norm": 0.13030007481575012,
|
25947 |
+
"learning_rate": 1.0610079575596817e-05,
|
25948 |
+
"loss": 0.0054,
|
25949 |
+
"step": 3690
|
25950 |
+
},
|
25951 |
+
{
|
25952 |
+
"epoch": 9.783443708609271,
|
25953 |
+
"grad_norm": 0.06755171716213226,
|
25954 |
+
"learning_rate": 1.0477453580901857e-05,
|
25955 |
+
"loss": 0.0032,
|
25956 |
+
"step": 3691
|
25957 |
+
},
|
25958 |
+
{
|
25959 |
+
"epoch": 9.786092715231788,
|
25960 |
+
"grad_norm": 0.47778379917144775,
|
25961 |
+
"learning_rate": 1.0344827586206897e-05,
|
25962 |
+
"loss": 0.0104,
|
25963 |
+
"step": 3692
|
25964 |
+
},
|
25965 |
+
{
|
25966 |
+
"epoch": 9.788741721854304,
|
25967 |
+
"grad_norm": 0.07621069997549057,
|
25968 |
+
"learning_rate": 1.0212201591511936e-05,
|
25969 |
+
"loss": 0.0045,
|
25970 |
+
"step": 3693
|
25971 |
+
},
|
25972 |
+
{
|
25973 |
+
"epoch": 9.791390728476822,
|
25974 |
+
"grad_norm": 0.0634002536535263,
|
25975 |
+
"learning_rate": 1.0079575596816976e-05,
|
25976 |
+
"loss": 0.0041,
|
25977 |
+
"step": 3694
|
25978 |
+
},
|
25979 |
+
{
|
25980 |
+
"epoch": 9.794039735099338,
|
25981 |
+
"grad_norm": 0.12741287052631378,
|
25982 |
+
"learning_rate": 9.946949602122016e-06,
|
25983 |
+
"loss": 0.0043,
|
25984 |
+
"step": 3695
|
25985 |
+
},
|
25986 |
+
{
|
25987 |
+
"epoch": 9.796688741721855,
|
25988 |
+
"grad_norm": 0.06733301281929016,
|
25989 |
+
"learning_rate": 9.814323607427057e-06,
|
25990 |
+
"loss": 0.0041,
|
25991 |
+
"step": 3696
|
25992 |
+
},
|
25993 |
+
{
|
25994 |
+
"epoch": 9.79933774834437,
|
25995 |
+
"grad_norm": 0.17157508432865143,
|
25996 |
+
"learning_rate": 9.681697612732095e-06,
|
25997 |
+
"loss": 0.0057,
|
25998 |
+
"step": 3697
|
25999 |
+
},
|
26000 |
+
{
|
26001 |
+
"epoch": 9.801986754966887,
|
26002 |
+
"grad_norm": 0.04867894574999809,
|
26003 |
+
"learning_rate": 9.549071618037135e-06,
|
26004 |
+
"loss": 0.0035,
|
26005 |
+
"step": 3698
|
26006 |
+
},
|
26007 |
+
{
|
26008 |
+
"epoch": 9.804635761589404,
|
26009 |
+
"grad_norm": 0.06411290913820267,
|
26010 |
+
"learning_rate": 9.416445623342176e-06,
|
26011 |
+
"loss": 0.0032,
|
26012 |
+
"step": 3699
|
26013 |
+
},
|
26014 |
+
{
|
26015 |
+
"epoch": 9.80728476821192,
|
26016 |
+
"grad_norm": 0.05204178765416145,
|
26017 |
+
"learning_rate": 9.283819628647214e-06,
|
26018 |
+
"loss": 0.0024,
|
26019 |
+
"step": 3700
|
26020 |
+
},
|
26021 |
+
{
|
26022 |
+
"epoch": 9.809933774834438,
|
26023 |
+
"grad_norm": 0.06606651842594147,
|
26024 |
+
"learning_rate": 9.151193633952255e-06,
|
26025 |
+
"loss": 0.0036,
|
26026 |
+
"step": 3701
|
26027 |
+
},
|
26028 |
+
{
|
26029 |
+
"epoch": 9.812582781456953,
|
26030 |
+
"grad_norm": 0.0978202372789383,
|
26031 |
+
"learning_rate": 9.018567639257295e-06,
|
26032 |
+
"loss": 0.0045,
|
26033 |
+
"step": 3702
|
26034 |
+
},
|
26035 |
+
{
|
26036 |
+
"epoch": 9.81523178807947,
|
26037 |
+
"grad_norm": 0.06358205527067184,
|
26038 |
+
"learning_rate": 8.885941644562333e-06,
|
26039 |
+
"loss": 0.003,
|
26040 |
+
"step": 3703
|
26041 |
+
},
|
26042 |
+
{
|
26043 |
+
"epoch": 9.817880794701987,
|
26044 |
+
"grad_norm": 0.07305984944105148,
|
26045 |
+
"learning_rate": 8.753315649867374e-06,
|
26046 |
+
"loss": 0.0041,
|
26047 |
+
"step": 3704
|
26048 |
+
},
|
26049 |
+
{
|
26050 |
+
"epoch": 9.820529801324504,
|
26051 |
+
"grad_norm": 0.04805733636021614,
|
26052 |
+
"learning_rate": 8.620689655172414e-06,
|
26053 |
+
"loss": 0.0033,
|
26054 |
+
"step": 3705
|
26055 |
+
},
|
26056 |
+
{
|
26057 |
+
"epoch": 9.82317880794702,
|
26058 |
+
"grad_norm": 0.04050121083855629,
|
26059 |
+
"learning_rate": 8.488063660477452e-06,
|
26060 |
+
"loss": 0.0026,
|
26061 |
+
"step": 3706
|
26062 |
+
},
|
26063 |
+
{
|
26064 |
+
"epoch": 9.825827814569536,
|
26065 |
+
"grad_norm": 0.3714807629585266,
|
26066 |
+
"learning_rate": 8.355437665782494e-06,
|
26067 |
+
"loss": 0.0055,
|
26068 |
+
"step": 3707
|
26069 |
+
},
|
26070 |
+
{
|
26071 |
+
"epoch": 9.828476821192053,
|
26072 |
+
"grad_norm": 0.09462448954582214,
|
26073 |
+
"learning_rate": 8.222811671087533e-06,
|
26074 |
+
"loss": 0.0044,
|
26075 |
+
"step": 3708
|
26076 |
+
},
|
26077 |
+
{
|
26078 |
+
"epoch": 9.831125827814569,
|
26079 |
+
"grad_norm": 0.09604524075984955,
|
26080 |
+
"learning_rate": 8.090185676392575e-06,
|
26081 |
+
"loss": 0.0036,
|
26082 |
+
"step": 3709
|
26083 |
+
},
|
26084 |
+
{
|
26085 |
+
"epoch": 9.833774834437087,
|
26086 |
+
"grad_norm": 0.06740278005599976,
|
26087 |
+
"learning_rate": 7.957559681697613e-06,
|
26088 |
+
"loss": 0.0037,
|
26089 |
+
"step": 3710
|
26090 |
+
},
|
26091 |
+
{
|
26092 |
+
"epoch": 9.836423841059602,
|
26093 |
+
"grad_norm": 0.10372499376535416,
|
26094 |
+
"learning_rate": 7.824933687002652e-06,
|
26095 |
+
"loss": 0.0038,
|
26096 |
+
"step": 3711
|
26097 |
+
},
|
26098 |
+
{
|
26099 |
+
"epoch": 9.83907284768212,
|
26100 |
+
"grad_norm": 0.05925479903817177,
|
26101 |
+
"learning_rate": 7.692307692307694e-06,
|
26102 |
+
"loss": 0.0035,
|
26103 |
+
"step": 3712
|
26104 |
+
},
|
26105 |
+
{
|
26106 |
+
"epoch": 9.841721854304636,
|
26107 |
+
"grad_norm": 0.08519647270441055,
|
26108 |
+
"learning_rate": 7.559681697612732e-06,
|
26109 |
+
"loss": 0.0046,
|
26110 |
+
"step": 3713
|
26111 |
+
},
|
26112 |
+
{
|
26113 |
+
"epoch": 9.844370860927153,
|
26114 |
+
"grad_norm": 0.3678317070007324,
|
26115 |
+
"learning_rate": 7.427055702917772e-06,
|
26116 |
+
"loss": 0.0056,
|
26117 |
+
"step": 3714
|
26118 |
+
},
|
26119 |
+
{
|
26120 |
+
"epoch": 9.847019867549669,
|
26121 |
+
"grad_norm": 0.066609226167202,
|
26122 |
+
"learning_rate": 7.294429708222812e-06,
|
26123 |
+
"loss": 0.0029,
|
26124 |
+
"step": 3715
|
26125 |
+
},
|
26126 |
+
{
|
26127 |
+
"epoch": 9.849668874172185,
|
26128 |
+
"grad_norm": 0.1516285091638565,
|
26129 |
+
"learning_rate": 7.161803713527852e-06,
|
26130 |
+
"loss": 0.008,
|
26131 |
+
"step": 3716
|
26132 |
+
},
|
26133 |
+
{
|
26134 |
+
"epoch": 9.852317880794702,
|
26135 |
+
"grad_norm": 0.04973870515823364,
|
26136 |
+
"learning_rate": 7.029177718832891e-06,
|
26137 |
+
"loss": 0.0034,
|
26138 |
+
"step": 3717
|
26139 |
+
},
|
26140 |
+
{
|
26141 |
+
"epoch": 9.854966887417218,
|
26142 |
+
"grad_norm": 0.07179173082113266,
|
26143 |
+
"learning_rate": 6.896551724137931e-06,
|
26144 |
+
"loss": 0.0039,
|
26145 |
+
"step": 3718
|
26146 |
+
},
|
26147 |
+
{
|
26148 |
+
"epoch": 9.857615894039736,
|
26149 |
+
"grad_norm": 0.09722237288951874,
|
26150 |
+
"learning_rate": 6.763925729442971e-06,
|
26151 |
+
"loss": 0.0047,
|
26152 |
+
"step": 3719
|
26153 |
+
},
|
26154 |
+
{
|
26155 |
+
"epoch": 9.860264900662251,
|
26156 |
+
"grad_norm": 0.04187830165028572,
|
26157 |
+
"learning_rate": 6.631299734748011e-06,
|
26158 |
+
"loss": 0.0029,
|
26159 |
+
"step": 3720
|
26160 |
+
},
|
26161 |
+
{
|
26162 |
+
"epoch": 9.862913907284769,
|
26163 |
+
"grad_norm": 0.07614066451787949,
|
26164 |
+
"learning_rate": 6.49867374005305e-06,
|
26165 |
+
"loss": 0.0037,
|
26166 |
+
"step": 3721
|
26167 |
+
},
|
26168 |
+
{
|
26169 |
+
"epoch": 9.865562913907285,
|
26170 |
+
"grad_norm": 0.34172070026397705,
|
26171 |
+
"learning_rate": 6.36604774535809e-06,
|
26172 |
+
"loss": 0.0096,
|
26173 |
+
"step": 3722
|
26174 |
+
},
|
26175 |
+
{
|
26176 |
+
"epoch": 9.868211920529802,
|
26177 |
+
"grad_norm": 0.08820667862892151,
|
26178 |
+
"learning_rate": 6.23342175066313e-06,
|
26179 |
+
"loss": 0.0036,
|
26180 |
+
"step": 3723
|
26181 |
+
},
|
26182 |
+
{
|
26183 |
+
"epoch": 9.870860927152318,
|
26184 |
+
"grad_norm": 0.12403719127178192,
|
26185 |
+
"learning_rate": 6.10079575596817e-06,
|
26186 |
+
"loss": 0.0048,
|
26187 |
+
"step": 3724
|
26188 |
+
},
|
26189 |
+
{
|
26190 |
+
"epoch": 9.873509933774834,
|
26191 |
+
"grad_norm": 0.06692824512720108,
|
26192 |
+
"learning_rate": 5.968169761273209e-06,
|
26193 |
+
"loss": 0.0039,
|
26194 |
+
"step": 3725
|
26195 |
+
},
|
26196 |
+
{
|
26197 |
+
"epoch": 9.876158940397351,
|
26198 |
+
"grad_norm": 0.7185715436935425,
|
26199 |
+
"learning_rate": 5.835543766578249e-06,
|
26200 |
+
"loss": 0.0082,
|
26201 |
+
"step": 3726
|
26202 |
+
},
|
26203 |
+
{
|
26204 |
+
"epoch": 9.878807947019867,
|
26205 |
+
"grad_norm": 0.057163890451192856,
|
26206 |
+
"learning_rate": 5.70291777188329e-06,
|
26207 |
+
"loss": 0.0038,
|
26208 |
+
"step": 3727
|
26209 |
+
},
|
26210 |
+
{
|
26211 |
+
"epoch": 9.881456953642385,
|
26212 |
+
"grad_norm": 0.0958801805973053,
|
26213 |
+
"learning_rate": 5.570291777188329e-06,
|
26214 |
+
"loss": 0.0054,
|
26215 |
+
"step": 3728
|
26216 |
+
},
|
26217 |
+
{
|
26218 |
+
"epoch": 9.8841059602649,
|
26219 |
+
"grad_norm": 0.0668308287858963,
|
26220 |
+
"learning_rate": 5.437665782493369e-06,
|
26221 |
+
"loss": 0.0037,
|
26222 |
+
"step": 3729
|
26223 |
+
},
|
26224 |
+
{
|
26225 |
+
"epoch": 9.886754966887418,
|
26226 |
+
"grad_norm": 0.5264093279838562,
|
26227 |
+
"learning_rate": 5.305039787798409e-06,
|
26228 |
+
"loss": 0.0119,
|
26229 |
+
"step": 3730
|
26230 |
+
},
|
26231 |
+
{
|
26232 |
+
"epoch": 9.889403973509934,
|
26233 |
+
"grad_norm": 0.07461276650428772,
|
26234 |
+
"learning_rate": 5.172413793103448e-06,
|
26235 |
+
"loss": 0.004,
|
26236 |
+
"step": 3731
|
26237 |
+
},
|
26238 |
+
{
|
26239 |
+
"epoch": 9.892052980132451,
|
26240 |
+
"grad_norm": 0.064832404255867,
|
26241 |
+
"learning_rate": 5.039787798408488e-06,
|
26242 |
+
"loss": 0.0036,
|
26243 |
+
"step": 3732
|
26244 |
+
},
|
26245 |
+
{
|
26246 |
+
"epoch": 9.894701986754967,
|
26247 |
+
"grad_norm": 0.05493270978331566,
|
26248 |
+
"learning_rate": 4.9071618037135285e-06,
|
26249 |
+
"loss": 0.004,
|
26250 |
+
"step": 3733
|
26251 |
+
},
|
26252 |
+
{
|
26253 |
+
"epoch": 9.897350993377483,
|
26254 |
+
"grad_norm": 0.05123838037252426,
|
26255 |
+
"learning_rate": 4.774535809018567e-06,
|
26256 |
+
"loss": 0.0034,
|
26257 |
+
"step": 3734
|
26258 |
+
},
|
26259 |
+
{
|
26260 |
+
"epoch": 9.9,
|
26261 |
+
"grad_norm": 0.08637768775224686,
|
26262 |
+
"learning_rate": 4.641909814323607e-06,
|
26263 |
+
"loss": 0.004,
|
26264 |
+
"step": 3735
|
26265 |
+
},
|
26266 |
+
{
|
26267 |
+
"epoch": 9.902649006622516,
|
26268 |
+
"grad_norm": 0.17615444958209991,
|
26269 |
+
"learning_rate": 4.5092838196286476e-06,
|
26270 |
+
"loss": 0.0054,
|
26271 |
+
"step": 3736
|
26272 |
+
},
|
26273 |
+
{
|
26274 |
+
"epoch": 9.905298013245034,
|
26275 |
+
"grad_norm": 0.06976252794265747,
|
26276 |
+
"learning_rate": 4.376657824933687e-06,
|
26277 |
+
"loss": 0.0045,
|
26278 |
+
"step": 3737
|
26279 |
+
},
|
26280 |
+
{
|
26281 |
+
"epoch": 9.90794701986755,
|
26282 |
+
"grad_norm": 0.06455208361148834,
|
26283 |
+
"learning_rate": 4.244031830238726e-06,
|
26284 |
+
"loss": 0.0031,
|
26285 |
+
"step": 3738
|
26286 |
+
},
|
26287 |
+
{
|
26288 |
+
"epoch": 9.910596026490067,
|
26289 |
+
"grad_norm": 0.08497379720211029,
|
26290 |
+
"learning_rate": 4.111405835543767e-06,
|
26291 |
+
"loss": 0.0041,
|
26292 |
+
"step": 3739
|
26293 |
+
},
|
26294 |
+
{
|
26295 |
+
"epoch": 9.913245033112583,
|
26296 |
+
"grad_norm": 0.07254844903945923,
|
26297 |
+
"learning_rate": 3.978779840848806e-06,
|
26298 |
+
"loss": 0.0042,
|
26299 |
+
"step": 3740
|
26300 |
+
},
|
26301 |
+
{
|
26302 |
+
"epoch": 9.915894039735099,
|
26303 |
+
"grad_norm": 0.051268093287944794,
|
26304 |
+
"learning_rate": 3.846153846153847e-06,
|
26305 |
+
"loss": 0.0041,
|
26306 |
+
"step": 3741
|
26307 |
+
},
|
26308 |
+
{
|
26309 |
+
"epoch": 9.918543046357616,
|
26310 |
+
"grad_norm": 0.11015918850898743,
|
26311 |
+
"learning_rate": 3.713527851458886e-06,
|
26312 |
+
"loss": 0.0037,
|
26313 |
+
"step": 3742
|
26314 |
+
},
|
26315 |
+
{
|
26316 |
+
"epoch": 9.921192052980132,
|
26317 |
+
"grad_norm": 0.08197928965091705,
|
26318 |
+
"learning_rate": 3.580901856763926e-06,
|
26319 |
+
"loss": 0.0038,
|
26320 |
+
"step": 3743
|
26321 |
+
},
|
26322 |
+
{
|
26323 |
+
"epoch": 9.92384105960265,
|
26324 |
+
"grad_norm": 0.2536082863807678,
|
26325 |
+
"learning_rate": 3.4482758620689654e-06,
|
26326 |
+
"loss": 0.0086,
|
26327 |
+
"step": 3744
|
26328 |
+
},
|
26329 |
+
{
|
26330 |
+
"epoch": 9.926490066225165,
|
26331 |
+
"grad_norm": 0.07558843493461609,
|
26332 |
+
"learning_rate": 3.3156498673740055e-06,
|
26333 |
+
"loss": 0.0034,
|
26334 |
+
"step": 3745
|
26335 |
+
},
|
26336 |
+
{
|
26337 |
+
"epoch": 9.929139072847683,
|
26338 |
+
"grad_norm": 1.6272495985031128,
|
26339 |
+
"learning_rate": 3.183023872679045e-06,
|
26340 |
+
"loss": 0.0432,
|
26341 |
+
"step": 3746
|
26342 |
+
},
|
26343 |
+
{
|
26344 |
+
"epoch": 9.931788079470198,
|
26345 |
+
"grad_norm": 0.061038896441459656,
|
26346 |
+
"learning_rate": 3.050397877984085e-06,
|
26347 |
+
"loss": 0.0037,
|
26348 |
+
"step": 3747
|
26349 |
+
},
|
26350 |
+
{
|
26351 |
+
"epoch": 9.934437086092716,
|
26352 |
+
"grad_norm": 0.05754661560058594,
|
26353 |
+
"learning_rate": 2.9177718832891245e-06,
|
26354 |
+
"loss": 0.0028,
|
26355 |
+
"step": 3748
|
26356 |
+
},
|
26357 |
+
{
|
26358 |
+
"epoch": 9.937086092715232,
|
26359 |
+
"grad_norm": 0.12635254859924316,
|
26360 |
+
"learning_rate": 2.7851458885941646e-06,
|
26361 |
+
"loss": 0.0032,
|
26362 |
+
"step": 3749
|
26363 |
+
},
|
26364 |
+
{
|
26365 |
+
"epoch": 9.939735099337748,
|
26366 |
+
"grad_norm": 0.05356074497103691,
|
26367 |
+
"learning_rate": 2.6525198938992043e-06,
|
26368 |
+
"loss": 0.004,
|
26369 |
+
"step": 3750
|
26370 |
+
},
|
26371 |
+
{
|
26372 |
+
"epoch": 9.942384105960265,
|
26373 |
+
"grad_norm": 0.0652216300368309,
|
26374 |
+
"learning_rate": 2.519893899204244e-06,
|
26375 |
+
"loss": 0.0045,
|
26376 |
+
"step": 3751
|
26377 |
+
},
|
26378 |
+
{
|
26379 |
+
"epoch": 9.94503311258278,
|
26380 |
+
"grad_norm": 0.08353260904550552,
|
26381 |
+
"learning_rate": 2.3872679045092837e-06,
|
26382 |
+
"loss": 0.0039,
|
26383 |
+
"step": 3752
|
26384 |
+
},
|
26385 |
+
{
|
26386 |
+
"epoch": 9.947682119205298,
|
26387 |
+
"grad_norm": 0.07516485452651978,
|
26388 |
+
"learning_rate": 2.2546419098143238e-06,
|
26389 |
+
"loss": 0.0042,
|
26390 |
+
"step": 3753
|
26391 |
+
},
|
26392 |
+
{
|
26393 |
+
"epoch": 9.950331125827814,
|
26394 |
+
"grad_norm": 0.060415808111429214,
|
26395 |
+
"learning_rate": 2.122015915119363e-06,
|
26396 |
+
"loss": 0.0037,
|
26397 |
+
"step": 3754
|
26398 |
+
},
|
26399 |
+
{
|
26400 |
+
"epoch": 9.952980132450332,
|
26401 |
+
"grad_norm": 0.18446998298168182,
|
26402 |
+
"learning_rate": 1.989389920424403e-06,
|
26403 |
+
"loss": 0.0047,
|
26404 |
+
"step": 3755
|
26405 |
+
},
|
26406 |
+
{
|
26407 |
+
"epoch": 9.955629139072848,
|
26408 |
+
"grad_norm": 0.6835467219352722,
|
26409 |
+
"learning_rate": 1.856763925729443e-06,
|
26410 |
+
"loss": 0.0208,
|
26411 |
+
"step": 3756
|
26412 |
+
},
|
26413 |
+
{
|
26414 |
+
"epoch": 9.958278145695365,
|
26415 |
+
"grad_norm": 0.0653797909617424,
|
26416 |
+
"learning_rate": 1.7241379310344827e-06,
|
26417 |
+
"loss": 0.0037,
|
26418 |
+
"step": 3757
|
26419 |
+
},
|
26420 |
+
{
|
26421 |
+
"epoch": 9.96092715231788,
|
26422 |
+
"grad_norm": 0.7988371253013611,
|
26423 |
+
"learning_rate": 1.5915119363395226e-06,
|
26424 |
+
"loss": 0.0091,
|
26425 |
+
"step": 3758
|
26426 |
+
},
|
26427 |
+
{
|
26428 |
+
"epoch": 9.963576158940397,
|
26429 |
+
"grad_norm": 0.06156877055764198,
|
26430 |
+
"learning_rate": 1.4588859416445623e-06,
|
26431 |
+
"loss": 0.0036,
|
26432 |
+
"step": 3759
|
26433 |
+
},
|
26434 |
+
{
|
26435 |
+
"epoch": 9.966225165562914,
|
26436 |
+
"grad_norm": 3.7909204959869385,
|
26437 |
+
"learning_rate": 1.3262599469496022e-06,
|
26438 |
+
"loss": 0.0233,
|
26439 |
+
"step": 3760
|
26440 |
+
},
|
26441 |
+
{
|
26442 |
+
"epoch": 9.96887417218543,
|
26443 |
+
"grad_norm": 0.046591877937316895,
|
26444 |
+
"learning_rate": 1.1936339522546418e-06,
|
26445 |
+
"loss": 0.0031,
|
26446 |
+
"step": 3761
|
26447 |
+
},
|
26448 |
+
{
|
26449 |
+
"epoch": 9.971523178807947,
|
26450 |
+
"grad_norm": 0.14124387502670288,
|
26451 |
+
"learning_rate": 1.0610079575596815e-06,
|
26452 |
+
"loss": 0.006,
|
26453 |
+
"step": 3762
|
26454 |
+
},
|
26455 |
+
{
|
26456 |
+
"epoch": 9.974172185430463,
|
26457 |
+
"grad_norm": 0.035212110728025436,
|
26458 |
+
"learning_rate": 9.283819628647215e-07,
|
26459 |
+
"loss": 0.0024,
|
26460 |
+
"step": 3763
|
26461 |
+
},
|
26462 |
+
{
|
26463 |
+
"epoch": 9.97682119205298,
|
26464 |
+
"grad_norm": 0.04375937581062317,
|
26465 |
+
"learning_rate": 7.957559681697613e-07,
|
26466 |
+
"loss": 0.0026,
|
26467 |
+
"step": 3764
|
26468 |
+
},
|
26469 |
+
{
|
26470 |
+
"epoch": 9.979470198675497,
|
26471 |
+
"grad_norm": 0.1499042958021164,
|
26472 |
+
"learning_rate": 6.631299734748011e-07,
|
26473 |
+
"loss": 0.0051,
|
26474 |
+
"step": 3765
|
26475 |
+
},
|
26476 |
+
{
|
26477 |
+
"epoch": 9.982119205298012,
|
26478 |
+
"grad_norm": 0.0520511195063591,
|
26479 |
+
"learning_rate": 5.305039787798408e-07,
|
26480 |
+
"loss": 0.0029,
|
26481 |
+
"step": 3766
|
26482 |
+
},
|
26483 |
+
{
|
26484 |
+
"epoch": 9.98476821192053,
|
26485 |
+
"grad_norm": 0.05783466994762421,
|
26486 |
+
"learning_rate": 3.9787798408488065e-07,
|
26487 |
+
"loss": 0.0037,
|
26488 |
+
"step": 3767
|
26489 |
+
},
|
26490 |
+
{
|
26491 |
+
"epoch": 9.987417218543046,
|
26492 |
+
"grad_norm": 0.04889446124434471,
|
26493 |
+
"learning_rate": 2.652519893899204e-07,
|
26494 |
+
"loss": 0.0029,
|
26495 |
+
"step": 3768
|
26496 |
+
},
|
26497 |
+
{
|
26498 |
+
"epoch": 9.990066225165563,
|
26499 |
+
"grad_norm": 1.5146942138671875,
|
26500 |
+
"learning_rate": 1.326259946949602e-07,
|
26501 |
+
"loss": 0.0124,
|
26502 |
+
"step": 3769
|
26503 |
+
},
|
26504 |
+
{
|
26505 |
+
"epoch": 9.992715231788079,
|
26506 |
+
"grad_norm": 0.13475480675697327,
|
26507 |
+
"learning_rate": 0.0,
|
26508 |
+
"loss": 0.0056,
|
26509 |
+
"step": 3770
|
26510 |
}
|
26511 |
],
|
26512 |
"logging_steps": 1,
|
|
|
26521 |
"should_evaluate": false,
|
26522 |
"should_log": false,
|
26523 |
"should_save": true,
|
26524 |
+
"should_training_stop": true
|
26525 |
},
|
26526 |
"attributes": {}
|
26527 |
}
|
26528 |
},
|
26529 |
+
"total_flos": 2.134271198232576e+20,
|
26530 |
"train_batch_size": 8,
|
26531 |
"trial_name": null,
|
26532 |
"trial_params": null
|