JacobLinCool
commited on
Training in progress, epoch 9, checkpoint
Browse files
last-checkpoint/adapter_model.safetensors
CHANGED
@@ -1,3 +1,3 @@
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
-
oid sha256:
|
3 |
size 111475752
|
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:0d51ecd70bd6abe98049e3903569c3e9e0c225f8ca47ca6925715bbb4525a07f
|
3 |
size 111475752
|
last-checkpoint/optimizer.pt
CHANGED
@@ -1,3 +1,3 @@
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
-
oid sha256:
|
3 |
size 223212738
|
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:b1015cd357231c8969ba9b399558b4e4e768018a6be01973d55dd3f1a4a7117d
|
3 |
size 223212738
|
last-checkpoint/rng_state.pth
CHANGED
@@ -1,3 +1,3 @@
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
-
oid sha256:
|
3 |
size 14244
|
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:0a9f98afb0211ede3e469085bcd7ffb48239431e6596713b1e711384e5b986da
|
3 |
size 14244
|
last-checkpoint/scheduler.pt
CHANGED
@@ -1,3 +1,3 @@
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
-
oid sha256:
|
3 |
size 1064
|
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:709b689cdaa470cfc575e643ff39b4e6938bba001e0031274a8872524520efb6
|
3 |
size 1064
|
last-checkpoint/trainer_state.json
CHANGED
@@ -1,9 +1,9 @@
|
|
1 |
{
|
2 |
"best_metric": 40.896302250803856,
|
3 |
"best_model_checkpoint": "./exp/whisper-large-v3-turbo-zh-TW-clean-1/checkpoint-3397",
|
4 |
-
"epoch":
|
5 |
"eval_steps": 500,
|
6 |
-
"global_step":
|
7 |
"is_hyper_param_search": false,
|
8 |
"is_local_process_zero": true,
|
9 |
"is_world_process_zero": true,
|
@@ -23896,6 +23896,2617 @@
|
|
23896 |
"eval_steps_per_second": 3.762,
|
23897 |
"eval_wer": 40.896302250803856,
|
23898 |
"step": 3397
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
23899 |
}
|
23900 |
],
|
23901 |
"logging_steps": 1,
|
@@ -23910,12 +26521,12 @@
|
|
23910 |
"should_evaluate": false,
|
23911 |
"should_log": false,
|
23912 |
"should_save": true,
|
23913 |
-
"should_training_stop":
|
23914 |
},
|
23915 |
"attributes": {}
|
23916 |
}
|
23917 |
},
|
23918 |
-
"total_flos":
|
23919 |
"train_batch_size": 4,
|
23920 |
"trial_name": null,
|
23921 |
"trial_params": null
|
|
|
1 |
{
|
2 |
"best_metric": 40.896302250803856,
|
3 |
"best_model_checkpoint": "./exp/whisper-large-v3-turbo-zh-TW-clean-1/checkpoint-3397",
|
4 |
+
"epoch": 9.986754966887418,
|
5 |
"eval_steps": 500,
|
6 |
+
"global_step": 3770,
|
7 |
"is_hyper_param_search": false,
|
8 |
"is_local_process_zero": true,
|
9 |
"is_world_process_zero": true,
|
|
|
23896 |
"eval_steps_per_second": 3.762,
|
23897 |
"eval_wer": 40.896302250803856,
|
23898 |
"step": 3397
|
23899 |
+
},
|
23900 |
+
{
|
23901 |
+
"epoch": 9.001324503311258,
|
23902 |
+
"grad_norm": 1.3489973545074463,
|
23903 |
+
"learning_rate": 4.93368700265252e-05,
|
23904 |
+
"loss": 0.0367,
|
23905 |
+
"step": 3398
|
23906 |
+
},
|
23907 |
+
{
|
23908 |
+
"epoch": 9.003973509933775,
|
23909 |
+
"grad_norm": 0.38989582657814026,
|
23910 |
+
"learning_rate": 4.9204244031830244e-05,
|
23911 |
+
"loss": 0.0104,
|
23912 |
+
"step": 3399
|
23913 |
+
},
|
23914 |
+
{
|
23915 |
+
"epoch": 9.006622516556291,
|
23916 |
+
"grad_norm": 0.1691805124282837,
|
23917 |
+
"learning_rate": 4.907161803713528e-05,
|
23918 |
+
"loss": 0.0097,
|
23919 |
+
"step": 3400
|
23920 |
+
},
|
23921 |
+
{
|
23922 |
+
"epoch": 9.009271523178809,
|
23923 |
+
"grad_norm": 0.10428066551685333,
|
23924 |
+
"learning_rate": 4.893899204244032e-05,
|
23925 |
+
"loss": 0.0078,
|
23926 |
+
"step": 3401
|
23927 |
+
},
|
23928 |
+
{
|
23929 |
+
"epoch": 9.011920529801325,
|
23930 |
+
"grad_norm": 0.17672701179981232,
|
23931 |
+
"learning_rate": 4.880636604774536e-05,
|
23932 |
+
"loss": 0.008,
|
23933 |
+
"step": 3402
|
23934 |
+
},
|
23935 |
+
{
|
23936 |
+
"epoch": 9.01456953642384,
|
23937 |
+
"grad_norm": 0.31466829776763916,
|
23938 |
+
"learning_rate": 4.8673740053050396e-05,
|
23939 |
+
"loss": 0.0098,
|
23940 |
+
"step": 3403
|
23941 |
+
},
|
23942 |
+
{
|
23943 |
+
"epoch": 9.017218543046358,
|
23944 |
+
"grad_norm": 0.223367378115654,
|
23945 |
+
"learning_rate": 4.8541114058355434e-05,
|
23946 |
+
"loss": 0.0088,
|
23947 |
+
"step": 3404
|
23948 |
+
},
|
23949 |
+
{
|
23950 |
+
"epoch": 9.019867549668874,
|
23951 |
+
"grad_norm": 0.4342547357082367,
|
23952 |
+
"learning_rate": 4.840848806366048e-05,
|
23953 |
+
"loss": 0.0139,
|
23954 |
+
"step": 3405
|
23955 |
+
},
|
23956 |
+
{
|
23957 |
+
"epoch": 9.022516556291391,
|
23958 |
+
"grad_norm": 6.883068561553955,
|
23959 |
+
"learning_rate": 4.8275862068965517e-05,
|
23960 |
+
"loss": 0.2748,
|
23961 |
+
"step": 3406
|
23962 |
+
},
|
23963 |
+
{
|
23964 |
+
"epoch": 9.025165562913907,
|
23965 |
+
"grad_norm": 0.10145234316587448,
|
23966 |
+
"learning_rate": 4.8143236074270555e-05,
|
23967 |
+
"loss": 0.0066,
|
23968 |
+
"step": 3407
|
23969 |
+
},
|
23970 |
+
{
|
23971 |
+
"epoch": 9.027814569536424,
|
23972 |
+
"grad_norm": 0.10756523162126541,
|
23973 |
+
"learning_rate": 4.80106100795756e-05,
|
23974 |
+
"loss": 0.0075,
|
23975 |
+
"step": 3408
|
23976 |
+
},
|
23977 |
+
{
|
23978 |
+
"epoch": 9.03046357615894,
|
23979 |
+
"grad_norm": 0.12273053824901581,
|
23980 |
+
"learning_rate": 4.787798408488064e-05,
|
23981 |
+
"loss": 0.0067,
|
23982 |
+
"step": 3409
|
23983 |
+
},
|
23984 |
+
{
|
23985 |
+
"epoch": 9.033112582781458,
|
23986 |
+
"grad_norm": 0.07955110818147659,
|
23987 |
+
"learning_rate": 4.7745358090185675e-05,
|
23988 |
+
"loss": 0.0049,
|
23989 |
+
"step": 3410
|
23990 |
+
},
|
23991 |
+
{
|
23992 |
+
"epoch": 9.035761589403974,
|
23993 |
+
"grad_norm": 0.38133883476257324,
|
23994 |
+
"learning_rate": 4.761273209549072e-05,
|
23995 |
+
"loss": 0.0162,
|
23996 |
+
"step": 3411
|
23997 |
+
},
|
23998 |
+
{
|
23999 |
+
"epoch": 9.03841059602649,
|
24000 |
+
"grad_norm": 0.2501557171344757,
|
24001 |
+
"learning_rate": 4.748010610079576e-05,
|
24002 |
+
"loss": 0.0089,
|
24003 |
+
"step": 3412
|
24004 |
+
},
|
24005 |
+
{
|
24006 |
+
"epoch": 9.041059602649007,
|
24007 |
+
"grad_norm": 0.12085811793804169,
|
24008 |
+
"learning_rate": 4.73474801061008e-05,
|
24009 |
+
"loss": 0.0064,
|
24010 |
+
"step": 3413
|
24011 |
+
},
|
24012 |
+
{
|
24013 |
+
"epoch": 9.043708609271523,
|
24014 |
+
"grad_norm": 0.36201950907707214,
|
24015 |
+
"learning_rate": 4.721485411140584e-05,
|
24016 |
+
"loss": 0.0102,
|
24017 |
+
"step": 3414
|
24018 |
+
},
|
24019 |
+
{
|
24020 |
+
"epoch": 9.04635761589404,
|
24021 |
+
"grad_norm": 0.09906315058469772,
|
24022 |
+
"learning_rate": 4.708222811671087e-05,
|
24023 |
+
"loss": 0.0056,
|
24024 |
+
"step": 3415
|
24025 |
+
},
|
24026 |
+
{
|
24027 |
+
"epoch": 9.049006622516556,
|
24028 |
+
"grad_norm": 0.26838064193725586,
|
24029 |
+
"learning_rate": 4.694960212201592e-05,
|
24030 |
+
"loss": 0.0104,
|
24031 |
+
"step": 3416
|
24032 |
+
},
|
24033 |
+
{
|
24034 |
+
"epoch": 9.051655629139074,
|
24035 |
+
"grad_norm": 0.11671532690525055,
|
24036 |
+
"learning_rate": 4.6816976127320955e-05,
|
24037 |
+
"loss": 0.0071,
|
24038 |
+
"step": 3417
|
24039 |
+
},
|
24040 |
+
{
|
24041 |
+
"epoch": 9.05430463576159,
|
24042 |
+
"grad_norm": 0.11996608972549438,
|
24043 |
+
"learning_rate": 4.668435013262599e-05,
|
24044 |
+
"loss": 0.0064,
|
24045 |
+
"step": 3418
|
24046 |
+
},
|
24047 |
+
{
|
24048 |
+
"epoch": 9.056953642384107,
|
24049 |
+
"grad_norm": 0.37756839394569397,
|
24050 |
+
"learning_rate": 4.655172413793104e-05,
|
24051 |
+
"loss": 0.0128,
|
24052 |
+
"step": 3419
|
24053 |
+
},
|
24054 |
+
{
|
24055 |
+
"epoch": 9.059602649006623,
|
24056 |
+
"grad_norm": 0.1943897008895874,
|
24057 |
+
"learning_rate": 4.6419098143236075e-05,
|
24058 |
+
"loss": 0.0107,
|
24059 |
+
"step": 3420
|
24060 |
+
},
|
24061 |
+
{
|
24062 |
+
"epoch": 9.062251655629138,
|
24063 |
+
"grad_norm": 0.19497939944267273,
|
24064 |
+
"learning_rate": 4.628647214854111e-05,
|
24065 |
+
"loss": 0.0092,
|
24066 |
+
"step": 3421
|
24067 |
+
},
|
24068 |
+
{
|
24069 |
+
"epoch": 9.064900662251656,
|
24070 |
+
"grad_norm": 2.2906646728515625,
|
24071 |
+
"learning_rate": 4.615384615384616e-05,
|
24072 |
+
"loss": 0.0198,
|
24073 |
+
"step": 3422
|
24074 |
+
},
|
24075 |
+
{
|
24076 |
+
"epoch": 9.067549668874172,
|
24077 |
+
"grad_norm": 2.465967893600464,
|
24078 |
+
"learning_rate": 4.6021220159151196e-05,
|
24079 |
+
"loss": 0.0727,
|
24080 |
+
"step": 3423
|
24081 |
+
},
|
24082 |
+
{
|
24083 |
+
"epoch": 9.07019867549669,
|
24084 |
+
"grad_norm": 0.15241271257400513,
|
24085 |
+
"learning_rate": 4.5888594164456234e-05,
|
24086 |
+
"loss": 0.0072,
|
24087 |
+
"step": 3424
|
24088 |
+
},
|
24089 |
+
{
|
24090 |
+
"epoch": 9.072847682119205,
|
24091 |
+
"grad_norm": 0.15346211194992065,
|
24092 |
+
"learning_rate": 4.575596816976128e-05,
|
24093 |
+
"loss": 0.0088,
|
24094 |
+
"step": 3425
|
24095 |
+
},
|
24096 |
+
{
|
24097 |
+
"epoch": 9.075496688741723,
|
24098 |
+
"grad_norm": 0.3105780780315399,
|
24099 |
+
"learning_rate": 4.562334217506632e-05,
|
24100 |
+
"loss": 0.0074,
|
24101 |
+
"step": 3426
|
24102 |
+
},
|
24103 |
+
{
|
24104 |
+
"epoch": 9.078145695364238,
|
24105 |
+
"grad_norm": 0.28642329573631287,
|
24106 |
+
"learning_rate": 4.549071618037135e-05,
|
24107 |
+
"loss": 0.0071,
|
24108 |
+
"step": 3427
|
24109 |
+
},
|
24110 |
+
{
|
24111 |
+
"epoch": 9.080794701986754,
|
24112 |
+
"grad_norm": 0.17047229409217834,
|
24113 |
+
"learning_rate": 4.535809018567639e-05,
|
24114 |
+
"loss": 0.0074,
|
24115 |
+
"step": 3428
|
24116 |
+
},
|
24117 |
+
{
|
24118 |
+
"epoch": 9.083443708609272,
|
24119 |
+
"grad_norm": 1.974877953529358,
|
24120 |
+
"learning_rate": 4.522546419098143e-05,
|
24121 |
+
"loss": 0.1462,
|
24122 |
+
"step": 3429
|
24123 |
+
},
|
24124 |
+
{
|
24125 |
+
"epoch": 9.086092715231787,
|
24126 |
+
"grad_norm": 0.09645556658506393,
|
24127 |
+
"learning_rate": 4.509283819628647e-05,
|
24128 |
+
"loss": 0.0061,
|
24129 |
+
"step": 3430
|
24130 |
+
},
|
24131 |
+
{
|
24132 |
+
"epoch": 9.088741721854305,
|
24133 |
+
"grad_norm": 1.6515982151031494,
|
24134 |
+
"learning_rate": 4.4960212201591514e-05,
|
24135 |
+
"loss": 0.0304,
|
24136 |
+
"step": 3431
|
24137 |
+
},
|
24138 |
+
{
|
24139 |
+
"epoch": 9.09139072847682,
|
24140 |
+
"grad_norm": 0.12649770081043243,
|
24141 |
+
"learning_rate": 4.482758620689655e-05,
|
24142 |
+
"loss": 0.006,
|
24143 |
+
"step": 3432
|
24144 |
+
},
|
24145 |
+
{
|
24146 |
+
"epoch": 9.094039735099338,
|
24147 |
+
"grad_norm": 0.8409780263900757,
|
24148 |
+
"learning_rate": 4.469496021220159e-05,
|
24149 |
+
"loss": 0.0139,
|
24150 |
+
"step": 3433
|
24151 |
+
},
|
24152 |
+
{
|
24153 |
+
"epoch": 9.096688741721854,
|
24154 |
+
"grad_norm": 0.10516829788684845,
|
24155 |
+
"learning_rate": 4.4562334217506634e-05,
|
24156 |
+
"loss": 0.0068,
|
24157 |
+
"step": 3434
|
24158 |
+
},
|
24159 |
+
{
|
24160 |
+
"epoch": 9.099337748344372,
|
24161 |
+
"grad_norm": 0.12136232107877731,
|
24162 |
+
"learning_rate": 4.442970822281167e-05,
|
24163 |
+
"loss": 0.0067,
|
24164 |
+
"step": 3435
|
24165 |
+
},
|
24166 |
+
{
|
24167 |
+
"epoch": 9.101986754966887,
|
24168 |
+
"grad_norm": 0.1334480494260788,
|
24169 |
+
"learning_rate": 4.429708222811671e-05,
|
24170 |
+
"loss": 0.0085,
|
24171 |
+
"step": 3436
|
24172 |
+
},
|
24173 |
+
{
|
24174 |
+
"epoch": 9.104635761589403,
|
24175 |
+
"grad_norm": 0.09830499440431595,
|
24176 |
+
"learning_rate": 4.4164456233421755e-05,
|
24177 |
+
"loss": 0.0072,
|
24178 |
+
"step": 3437
|
24179 |
+
},
|
24180 |
+
{
|
24181 |
+
"epoch": 9.10728476821192,
|
24182 |
+
"grad_norm": 0.07183544337749481,
|
24183 |
+
"learning_rate": 4.403183023872679e-05,
|
24184 |
+
"loss": 0.0059,
|
24185 |
+
"step": 3438
|
24186 |
+
},
|
24187 |
+
{
|
24188 |
+
"epoch": 9.109933774834436,
|
24189 |
+
"grad_norm": 0.9209287762641907,
|
24190 |
+
"learning_rate": 4.389920424403184e-05,
|
24191 |
+
"loss": 0.024,
|
24192 |
+
"step": 3439
|
24193 |
+
},
|
24194 |
+
{
|
24195 |
+
"epoch": 9.112582781456954,
|
24196 |
+
"grad_norm": 0.5843676328659058,
|
24197 |
+
"learning_rate": 4.376657824933687e-05,
|
24198 |
+
"loss": 0.0088,
|
24199 |
+
"step": 3440
|
24200 |
+
},
|
24201 |
+
{
|
24202 |
+
"epoch": 9.11523178807947,
|
24203 |
+
"grad_norm": 0.24383290112018585,
|
24204 |
+
"learning_rate": 4.363395225464191e-05,
|
24205 |
+
"loss": 0.0077,
|
24206 |
+
"step": 3441
|
24207 |
+
},
|
24208 |
+
{
|
24209 |
+
"epoch": 9.117880794701987,
|
24210 |
+
"grad_norm": 0.3678019344806671,
|
24211 |
+
"learning_rate": 4.350132625994695e-05,
|
24212 |
+
"loss": 0.0101,
|
24213 |
+
"step": 3442
|
24214 |
+
},
|
24215 |
+
{
|
24216 |
+
"epoch": 9.120529801324503,
|
24217 |
+
"grad_norm": 0.1334732174873352,
|
24218 |
+
"learning_rate": 4.336870026525199e-05,
|
24219 |
+
"loss": 0.0056,
|
24220 |
+
"step": 3443
|
24221 |
+
},
|
24222 |
+
{
|
24223 |
+
"epoch": 9.12317880794702,
|
24224 |
+
"grad_norm": 0.24420872330665588,
|
24225 |
+
"learning_rate": 4.323607427055703e-05,
|
24226 |
+
"loss": 0.0079,
|
24227 |
+
"step": 3444
|
24228 |
+
},
|
24229 |
+
{
|
24230 |
+
"epoch": 9.125827814569536,
|
24231 |
+
"grad_norm": 0.2850385308265686,
|
24232 |
+
"learning_rate": 4.310344827586207e-05,
|
24233 |
+
"loss": 0.0114,
|
24234 |
+
"step": 3445
|
24235 |
+
},
|
24236 |
+
{
|
24237 |
+
"epoch": 9.128476821192052,
|
24238 |
+
"grad_norm": 0.08591492474079132,
|
24239 |
+
"learning_rate": 4.297082228116711e-05,
|
24240 |
+
"loss": 0.0054,
|
24241 |
+
"step": 3446
|
24242 |
+
},
|
24243 |
+
{
|
24244 |
+
"epoch": 9.13112582781457,
|
24245 |
+
"grad_norm": 0.09644327312707901,
|
24246 |
+
"learning_rate": 4.283819628647215e-05,
|
24247 |
+
"loss": 0.006,
|
24248 |
+
"step": 3447
|
24249 |
+
},
|
24250 |
+
{
|
24251 |
+
"epoch": 9.133774834437085,
|
24252 |
+
"grad_norm": 0.2626118063926697,
|
24253 |
+
"learning_rate": 4.270557029177719e-05,
|
24254 |
+
"loss": 0.0091,
|
24255 |
+
"step": 3448
|
24256 |
+
},
|
24257 |
+
{
|
24258 |
+
"epoch": 9.136423841059603,
|
24259 |
+
"grad_norm": 0.3831780254840851,
|
24260 |
+
"learning_rate": 4.257294429708223e-05,
|
24261 |
+
"loss": 0.0089,
|
24262 |
+
"step": 3449
|
24263 |
+
},
|
24264 |
+
{
|
24265 |
+
"epoch": 9.139072847682119,
|
24266 |
+
"grad_norm": 1.390022873878479,
|
24267 |
+
"learning_rate": 4.244031830238727e-05,
|
24268 |
+
"loss": 0.0714,
|
24269 |
+
"step": 3450
|
24270 |
+
},
|
24271 |
+
{
|
24272 |
+
"epoch": 9.141721854304636,
|
24273 |
+
"grad_norm": 0.4582221508026123,
|
24274 |
+
"learning_rate": 4.2307692307692314e-05,
|
24275 |
+
"loss": 0.0113,
|
24276 |
+
"step": 3451
|
24277 |
+
},
|
24278 |
+
{
|
24279 |
+
"epoch": 9.144370860927152,
|
24280 |
+
"grad_norm": 0.11480309814214706,
|
24281 |
+
"learning_rate": 4.2175066312997345e-05,
|
24282 |
+
"loss": 0.0073,
|
24283 |
+
"step": 3452
|
24284 |
+
},
|
24285 |
+
{
|
24286 |
+
"epoch": 9.14701986754967,
|
24287 |
+
"grad_norm": 0.08496708422899246,
|
24288 |
+
"learning_rate": 4.204244031830238e-05,
|
24289 |
+
"loss": 0.0059,
|
24290 |
+
"step": 3453
|
24291 |
+
},
|
24292 |
+
{
|
24293 |
+
"epoch": 9.149668874172185,
|
24294 |
+
"grad_norm": 0.09760905802249908,
|
24295 |
+
"learning_rate": 4.190981432360743e-05,
|
24296 |
+
"loss": 0.0069,
|
24297 |
+
"step": 3454
|
24298 |
+
},
|
24299 |
+
{
|
24300 |
+
"epoch": 9.152317880794701,
|
24301 |
+
"grad_norm": 0.5061994194984436,
|
24302 |
+
"learning_rate": 4.1777188328912466e-05,
|
24303 |
+
"loss": 0.0104,
|
24304 |
+
"step": 3455
|
24305 |
+
},
|
24306 |
+
{
|
24307 |
+
"epoch": 9.154966887417219,
|
24308 |
+
"grad_norm": 0.2912754714488983,
|
24309 |
+
"learning_rate": 4.1644562334217504e-05,
|
24310 |
+
"loss": 0.0085,
|
24311 |
+
"step": 3456
|
24312 |
+
},
|
24313 |
+
{
|
24314 |
+
"epoch": 9.157615894039735,
|
24315 |
+
"grad_norm": 0.06553248316049576,
|
24316 |
+
"learning_rate": 4.151193633952255e-05,
|
24317 |
+
"loss": 0.0049,
|
24318 |
+
"step": 3457
|
24319 |
+
},
|
24320 |
+
{
|
24321 |
+
"epoch": 9.160264900662252,
|
24322 |
+
"grad_norm": 0.1199113130569458,
|
24323 |
+
"learning_rate": 4.1379310344827587e-05,
|
24324 |
+
"loss": 0.0071,
|
24325 |
+
"step": 3458
|
24326 |
+
},
|
24327 |
+
{
|
24328 |
+
"epoch": 9.162913907284768,
|
24329 |
+
"grad_norm": 0.13050176203250885,
|
24330 |
+
"learning_rate": 4.1246684350132625e-05,
|
24331 |
+
"loss": 0.0065,
|
24332 |
+
"step": 3459
|
24333 |
+
},
|
24334 |
+
{
|
24335 |
+
"epoch": 9.165562913907285,
|
24336 |
+
"grad_norm": 0.15488766133785248,
|
24337 |
+
"learning_rate": 4.111405835543767e-05,
|
24338 |
+
"loss": 0.0059,
|
24339 |
+
"step": 3460
|
24340 |
+
},
|
24341 |
+
{
|
24342 |
+
"epoch": 9.168211920529801,
|
24343 |
+
"grad_norm": 0.1280549168586731,
|
24344 |
+
"learning_rate": 4.098143236074271e-05,
|
24345 |
+
"loss": 0.0088,
|
24346 |
+
"step": 3461
|
24347 |
+
},
|
24348 |
+
{
|
24349 |
+
"epoch": 9.170860927152319,
|
24350 |
+
"grad_norm": 0.08911632001399994,
|
24351 |
+
"learning_rate": 4.0848806366047745e-05,
|
24352 |
+
"loss": 0.006,
|
24353 |
+
"step": 3462
|
24354 |
+
},
|
24355 |
+
{
|
24356 |
+
"epoch": 9.173509933774834,
|
24357 |
+
"grad_norm": 0.4611297845840454,
|
24358 |
+
"learning_rate": 4.071618037135279e-05,
|
24359 |
+
"loss": 0.0095,
|
24360 |
+
"step": 3463
|
24361 |
+
},
|
24362 |
+
{
|
24363 |
+
"epoch": 9.17615894039735,
|
24364 |
+
"grad_norm": 0.12303514778614044,
|
24365 |
+
"learning_rate": 4.058355437665783e-05,
|
24366 |
+
"loss": 0.0059,
|
24367 |
+
"step": 3464
|
24368 |
+
},
|
24369 |
+
{
|
24370 |
+
"epoch": 9.178807947019868,
|
24371 |
+
"grad_norm": 0.3407684862613678,
|
24372 |
+
"learning_rate": 4.0450928381962866e-05,
|
24373 |
+
"loss": 0.0072,
|
24374 |
+
"step": 3465
|
24375 |
+
},
|
24376 |
+
{
|
24377 |
+
"epoch": 9.181456953642384,
|
24378 |
+
"grad_norm": 0.7087451815605164,
|
24379 |
+
"learning_rate": 4.0318302387267904e-05,
|
24380 |
+
"loss": 0.0122,
|
24381 |
+
"step": 3466
|
24382 |
+
},
|
24383 |
+
{
|
24384 |
+
"epoch": 9.184105960264901,
|
24385 |
+
"grad_norm": 0.5841720700263977,
|
24386 |
+
"learning_rate": 4.018567639257294e-05,
|
24387 |
+
"loss": 0.0155,
|
24388 |
+
"step": 3467
|
24389 |
+
},
|
24390 |
+
{
|
24391 |
+
"epoch": 9.186754966887417,
|
24392 |
+
"grad_norm": 0.08858039975166321,
|
24393 |
+
"learning_rate": 4.005305039787799e-05,
|
24394 |
+
"loss": 0.0048,
|
24395 |
+
"step": 3468
|
24396 |
+
},
|
24397 |
+
{
|
24398 |
+
"epoch": 9.189403973509934,
|
24399 |
+
"grad_norm": 0.13346530497074127,
|
24400 |
+
"learning_rate": 3.9920424403183025e-05,
|
24401 |
+
"loss": 0.0065,
|
24402 |
+
"step": 3469
|
24403 |
+
},
|
24404 |
+
{
|
24405 |
+
"epoch": 9.19205298013245,
|
24406 |
+
"grad_norm": 0.09984955191612244,
|
24407 |
+
"learning_rate": 3.978779840848806e-05,
|
24408 |
+
"loss": 0.006,
|
24409 |
+
"step": 3470
|
24410 |
+
},
|
24411 |
+
{
|
24412 |
+
"epoch": 9.194701986754968,
|
24413 |
+
"grad_norm": 0.1724618822336197,
|
24414 |
+
"learning_rate": 3.965517241379311e-05,
|
24415 |
+
"loss": 0.0082,
|
24416 |
+
"step": 3471
|
24417 |
+
},
|
24418 |
+
{
|
24419 |
+
"epoch": 9.197350993377484,
|
24420 |
+
"grad_norm": 0.204783096909523,
|
24421 |
+
"learning_rate": 3.9522546419098145e-05,
|
24422 |
+
"loss": 0.007,
|
24423 |
+
"step": 3472
|
24424 |
+
},
|
24425 |
+
{
|
24426 |
+
"epoch": 9.2,
|
24427 |
+
"grad_norm": 0.6199121475219727,
|
24428 |
+
"learning_rate": 3.9389920424403183e-05,
|
24429 |
+
"loss": 0.0107,
|
24430 |
+
"step": 3473
|
24431 |
+
},
|
24432 |
+
{
|
24433 |
+
"epoch": 9.202649006622517,
|
24434 |
+
"grad_norm": 0.7197751998901367,
|
24435 |
+
"learning_rate": 3.925729442970823e-05,
|
24436 |
+
"loss": 0.0162,
|
24437 |
+
"step": 3474
|
24438 |
+
},
|
24439 |
+
{
|
24440 |
+
"epoch": 9.205298013245033,
|
24441 |
+
"grad_norm": 0.1388215869665146,
|
24442 |
+
"learning_rate": 3.9124668435013266e-05,
|
24443 |
+
"loss": 0.0077,
|
24444 |
+
"step": 3475
|
24445 |
+
},
|
24446 |
+
{
|
24447 |
+
"epoch": 9.20794701986755,
|
24448 |
+
"grad_norm": 0.1174258440732956,
|
24449 |
+
"learning_rate": 3.8992042440318304e-05,
|
24450 |
+
"loss": 0.0054,
|
24451 |
+
"step": 3476
|
24452 |
+
},
|
24453 |
+
{
|
24454 |
+
"epoch": 9.210596026490066,
|
24455 |
+
"grad_norm": 4.751803398132324,
|
24456 |
+
"learning_rate": 3.885941644562334e-05,
|
24457 |
+
"loss": 0.041,
|
24458 |
+
"step": 3477
|
24459 |
+
},
|
24460 |
+
{
|
24461 |
+
"epoch": 9.213245033112583,
|
24462 |
+
"grad_norm": 0.09883010387420654,
|
24463 |
+
"learning_rate": 3.872679045092838e-05,
|
24464 |
+
"loss": 0.0067,
|
24465 |
+
"step": 3478
|
24466 |
+
},
|
24467 |
+
{
|
24468 |
+
"epoch": 9.2158940397351,
|
24469 |
+
"grad_norm": 0.12656521797180176,
|
24470 |
+
"learning_rate": 3.859416445623342e-05,
|
24471 |
+
"loss": 0.0084,
|
24472 |
+
"step": 3479
|
24473 |
+
},
|
24474 |
+
{
|
24475 |
+
"epoch": 9.218543046357617,
|
24476 |
+
"grad_norm": 0.16607128083705902,
|
24477 |
+
"learning_rate": 3.846153846153846e-05,
|
24478 |
+
"loss": 0.0068,
|
24479 |
+
"step": 3480
|
24480 |
+
},
|
24481 |
+
{
|
24482 |
+
"epoch": 9.221192052980133,
|
24483 |
+
"grad_norm": 0.22199641168117523,
|
24484 |
+
"learning_rate": 3.83289124668435e-05,
|
24485 |
+
"loss": 0.0073,
|
24486 |
+
"step": 3481
|
24487 |
+
},
|
24488 |
+
{
|
24489 |
+
"epoch": 9.223841059602648,
|
24490 |
+
"grad_norm": 0.09226368367671967,
|
24491 |
+
"learning_rate": 3.819628647214854e-05,
|
24492 |
+
"loss": 0.0055,
|
24493 |
+
"step": 3482
|
24494 |
+
},
|
24495 |
+
{
|
24496 |
+
"epoch": 9.226490066225166,
|
24497 |
+
"grad_norm": 2.1479151248931885,
|
24498 |
+
"learning_rate": 3.8063660477453584e-05,
|
24499 |
+
"loss": 0.0295,
|
24500 |
+
"step": 3483
|
24501 |
+
},
|
24502 |
+
{
|
24503 |
+
"epoch": 9.229139072847682,
|
24504 |
+
"grad_norm": 0.11172802746295929,
|
24505 |
+
"learning_rate": 3.793103448275862e-05,
|
24506 |
+
"loss": 0.0053,
|
24507 |
+
"step": 3484
|
24508 |
+
},
|
24509 |
+
{
|
24510 |
+
"epoch": 9.2317880794702,
|
24511 |
+
"grad_norm": 0.07896125316619873,
|
24512 |
+
"learning_rate": 3.779840848806366e-05,
|
24513 |
+
"loss": 0.0059,
|
24514 |
+
"step": 3485
|
24515 |
+
},
|
24516 |
+
{
|
24517 |
+
"epoch": 9.234437086092715,
|
24518 |
+
"grad_norm": 0.10223594307899475,
|
24519 |
+
"learning_rate": 3.7665782493368704e-05,
|
24520 |
+
"loss": 0.0052,
|
24521 |
+
"step": 3486
|
24522 |
+
},
|
24523 |
+
{
|
24524 |
+
"epoch": 9.237086092715233,
|
24525 |
+
"grad_norm": 0.17130863666534424,
|
24526 |
+
"learning_rate": 3.753315649867374e-05,
|
24527 |
+
"loss": 0.0074,
|
24528 |
+
"step": 3487
|
24529 |
+
},
|
24530 |
+
{
|
24531 |
+
"epoch": 9.239735099337748,
|
24532 |
+
"grad_norm": 0.1503896713256836,
|
24533 |
+
"learning_rate": 3.740053050397878e-05,
|
24534 |
+
"loss": 0.007,
|
24535 |
+
"step": 3488
|
24536 |
+
},
|
24537 |
+
{
|
24538 |
+
"epoch": 9.242384105960264,
|
24539 |
+
"grad_norm": 0.4115447998046875,
|
24540 |
+
"learning_rate": 3.7267904509283825e-05,
|
24541 |
+
"loss": 0.0081,
|
24542 |
+
"step": 3489
|
24543 |
+
},
|
24544 |
+
{
|
24545 |
+
"epoch": 9.245033112582782,
|
24546 |
+
"grad_norm": 1.1161892414093018,
|
24547 |
+
"learning_rate": 3.7135278514588856e-05,
|
24548 |
+
"loss": 0.0149,
|
24549 |
+
"step": 3490
|
24550 |
+
},
|
24551 |
+
{
|
24552 |
+
"epoch": 9.247682119205297,
|
24553 |
+
"grad_norm": 0.08194896578788757,
|
24554 |
+
"learning_rate": 3.7002652519893894e-05,
|
24555 |
+
"loss": 0.0048,
|
24556 |
+
"step": 3491
|
24557 |
+
},
|
24558 |
+
{
|
24559 |
+
"epoch": 9.250331125827815,
|
24560 |
+
"grad_norm": 0.28096774220466614,
|
24561 |
+
"learning_rate": 3.687002652519894e-05,
|
24562 |
+
"loss": 0.0086,
|
24563 |
+
"step": 3492
|
24564 |
+
},
|
24565 |
+
{
|
24566 |
+
"epoch": 9.25298013245033,
|
24567 |
+
"grad_norm": 0.25251340866088867,
|
24568 |
+
"learning_rate": 3.673740053050398e-05,
|
24569 |
+
"loss": 0.0098,
|
24570 |
+
"step": 3493
|
24571 |
+
},
|
24572 |
+
{
|
24573 |
+
"epoch": 9.255629139072848,
|
24574 |
+
"grad_norm": 0.11859967559576035,
|
24575 |
+
"learning_rate": 3.660477453580902e-05,
|
24576 |
+
"loss": 0.0074,
|
24577 |
+
"step": 3494
|
24578 |
+
},
|
24579 |
+
{
|
24580 |
+
"epoch": 9.258278145695364,
|
24581 |
+
"grad_norm": 0.17372949421405792,
|
24582 |
+
"learning_rate": 3.647214854111406e-05,
|
24583 |
+
"loss": 0.0065,
|
24584 |
+
"step": 3495
|
24585 |
+
},
|
24586 |
+
{
|
24587 |
+
"epoch": 9.260927152317882,
|
24588 |
+
"grad_norm": 0.1542670577764511,
|
24589 |
+
"learning_rate": 3.63395225464191e-05,
|
24590 |
+
"loss": 0.0071,
|
24591 |
+
"step": 3496
|
24592 |
+
},
|
24593 |
+
{
|
24594 |
+
"epoch": 9.263576158940397,
|
24595 |
+
"grad_norm": 1.8768460750579834,
|
24596 |
+
"learning_rate": 3.620689655172414e-05,
|
24597 |
+
"loss": 0.042,
|
24598 |
+
"step": 3497
|
24599 |
+
},
|
24600 |
+
{
|
24601 |
+
"epoch": 9.266225165562913,
|
24602 |
+
"grad_norm": 0.11673364043235779,
|
24603 |
+
"learning_rate": 3.607427055702918e-05,
|
24604 |
+
"loss": 0.0066,
|
24605 |
+
"step": 3498
|
24606 |
+
},
|
24607 |
+
{
|
24608 |
+
"epoch": 9.26887417218543,
|
24609 |
+
"grad_norm": 0.0539066381752491,
|
24610 |
+
"learning_rate": 3.594164456233422e-05,
|
24611 |
+
"loss": 0.0048,
|
24612 |
+
"step": 3499
|
24613 |
+
},
|
24614 |
+
{
|
24615 |
+
"epoch": 9.271523178807946,
|
24616 |
+
"grad_norm": 0.11874030530452728,
|
24617 |
+
"learning_rate": 3.580901856763926e-05,
|
24618 |
+
"loss": 0.0055,
|
24619 |
+
"step": 3500
|
24620 |
+
},
|
24621 |
+
{
|
24622 |
+
"epoch": 9.274172185430464,
|
24623 |
+
"grad_norm": 0.8672187328338623,
|
24624 |
+
"learning_rate": 3.56763925729443e-05,
|
24625 |
+
"loss": 0.0218,
|
24626 |
+
"step": 3501
|
24627 |
+
},
|
24628 |
+
{
|
24629 |
+
"epoch": 9.27682119205298,
|
24630 |
+
"grad_norm": 0.21986471116542816,
|
24631 |
+
"learning_rate": 3.554376657824933e-05,
|
24632 |
+
"loss": 0.0072,
|
24633 |
+
"step": 3502
|
24634 |
+
},
|
24635 |
+
{
|
24636 |
+
"epoch": 9.279470198675497,
|
24637 |
+
"grad_norm": 0.19156527519226074,
|
24638 |
+
"learning_rate": 3.541114058355438e-05,
|
24639 |
+
"loss": 0.0086,
|
24640 |
+
"step": 3503
|
24641 |
+
},
|
24642 |
+
{
|
24643 |
+
"epoch": 9.282119205298013,
|
24644 |
+
"grad_norm": 0.07234346866607666,
|
24645 |
+
"learning_rate": 3.5278514588859415e-05,
|
24646 |
+
"loss": 0.0045,
|
24647 |
+
"step": 3504
|
24648 |
+
},
|
24649 |
+
{
|
24650 |
+
"epoch": 9.28476821192053,
|
24651 |
+
"grad_norm": 0.10020241886377335,
|
24652 |
+
"learning_rate": 3.514588859416445e-05,
|
24653 |
+
"loss": 0.0053,
|
24654 |
+
"step": 3505
|
24655 |
+
},
|
24656 |
+
{
|
24657 |
+
"epoch": 9.287417218543046,
|
24658 |
+
"grad_norm": 0.11927499622106552,
|
24659 |
+
"learning_rate": 3.50132625994695e-05,
|
24660 |
+
"loss": 0.0068,
|
24661 |
+
"step": 3506
|
24662 |
+
},
|
24663 |
+
{
|
24664 |
+
"epoch": 9.290066225165562,
|
24665 |
+
"grad_norm": 0.3162318766117096,
|
24666 |
+
"learning_rate": 3.4880636604774536e-05,
|
24667 |
+
"loss": 0.0079,
|
24668 |
+
"step": 3507
|
24669 |
+
},
|
24670 |
+
{
|
24671 |
+
"epoch": 9.29271523178808,
|
24672 |
+
"grad_norm": 0.0676041841506958,
|
24673 |
+
"learning_rate": 3.4748010610079574e-05,
|
24674 |
+
"loss": 0.0052,
|
24675 |
+
"step": 3508
|
24676 |
+
},
|
24677 |
+
{
|
24678 |
+
"epoch": 9.295364238410595,
|
24679 |
+
"grad_norm": 0.1660802811384201,
|
24680 |
+
"learning_rate": 3.461538461538462e-05,
|
24681 |
+
"loss": 0.0063,
|
24682 |
+
"step": 3509
|
24683 |
+
},
|
24684 |
+
{
|
24685 |
+
"epoch": 9.298013245033113,
|
24686 |
+
"grad_norm": 0.22426308691501617,
|
24687 |
+
"learning_rate": 3.4482758620689657e-05,
|
24688 |
+
"loss": 0.0087,
|
24689 |
+
"step": 3510
|
24690 |
+
},
|
24691 |
+
{
|
24692 |
+
"epoch": 9.300662251655629,
|
24693 |
+
"grad_norm": 0.3508667051792145,
|
24694 |
+
"learning_rate": 3.4350132625994695e-05,
|
24695 |
+
"loss": 0.0063,
|
24696 |
+
"step": 3511
|
24697 |
+
},
|
24698 |
+
{
|
24699 |
+
"epoch": 9.303311258278146,
|
24700 |
+
"grad_norm": 0.08600914478302002,
|
24701 |
+
"learning_rate": 3.421750663129974e-05,
|
24702 |
+
"loss": 0.0059,
|
24703 |
+
"step": 3512
|
24704 |
+
},
|
24705 |
+
{
|
24706 |
+
"epoch": 9.305960264900662,
|
24707 |
+
"grad_norm": 0.19650253653526306,
|
24708 |
+
"learning_rate": 3.408488063660478e-05,
|
24709 |
+
"loss": 0.0072,
|
24710 |
+
"step": 3513
|
24711 |
+
},
|
24712 |
+
{
|
24713 |
+
"epoch": 9.30860927152318,
|
24714 |
+
"grad_norm": 1.2427210807800293,
|
24715 |
+
"learning_rate": 3.395225464190981e-05,
|
24716 |
+
"loss": 0.0161,
|
24717 |
+
"step": 3514
|
24718 |
+
},
|
24719 |
+
{
|
24720 |
+
"epoch": 9.311258278145695,
|
24721 |
+
"grad_norm": 0.1413085013628006,
|
24722 |
+
"learning_rate": 3.381962864721485e-05,
|
24723 |
+
"loss": 0.0061,
|
24724 |
+
"step": 3515
|
24725 |
+
},
|
24726 |
+
{
|
24727 |
+
"epoch": 9.313907284768211,
|
24728 |
+
"grad_norm": 0.08845118433237076,
|
24729 |
+
"learning_rate": 3.368700265251989e-05,
|
24730 |
+
"loss": 0.0056,
|
24731 |
+
"step": 3516
|
24732 |
+
},
|
24733 |
+
{
|
24734 |
+
"epoch": 9.316556291390729,
|
24735 |
+
"grad_norm": 0.09792495518922806,
|
24736 |
+
"learning_rate": 3.355437665782493e-05,
|
24737 |
+
"loss": 0.0061,
|
24738 |
+
"step": 3517
|
24739 |
+
},
|
24740 |
+
{
|
24741 |
+
"epoch": 9.319205298013244,
|
24742 |
+
"grad_norm": 0.10987572371959686,
|
24743 |
+
"learning_rate": 3.3421750663129974e-05,
|
24744 |
+
"loss": 0.0061,
|
24745 |
+
"step": 3518
|
24746 |
+
},
|
24747 |
+
{
|
24748 |
+
"epoch": 9.321854304635762,
|
24749 |
+
"grad_norm": 0.846443235874176,
|
24750 |
+
"learning_rate": 3.328912466843501e-05,
|
24751 |
+
"loss": 0.0147,
|
24752 |
+
"step": 3519
|
24753 |
+
},
|
24754 |
+
{
|
24755 |
+
"epoch": 9.324503311258278,
|
24756 |
+
"grad_norm": 0.07839330285787582,
|
24757 |
+
"learning_rate": 3.315649867374006e-05,
|
24758 |
+
"loss": 0.0047,
|
24759 |
+
"step": 3520
|
24760 |
+
},
|
24761 |
+
{
|
24762 |
+
"epoch": 9.327152317880795,
|
24763 |
+
"grad_norm": 0.11738426238298416,
|
24764 |
+
"learning_rate": 3.3023872679045095e-05,
|
24765 |
+
"loss": 0.0065,
|
24766 |
+
"step": 3521
|
24767 |
+
},
|
24768 |
+
{
|
24769 |
+
"epoch": 9.329801324503311,
|
24770 |
+
"grad_norm": 0.13772623240947723,
|
24771 |
+
"learning_rate": 3.289124668435013e-05,
|
24772 |
+
"loss": 0.0071,
|
24773 |
+
"step": 3522
|
24774 |
+
},
|
24775 |
+
{
|
24776 |
+
"epoch": 9.332450331125829,
|
24777 |
+
"grad_norm": 1.1634302139282227,
|
24778 |
+
"learning_rate": 3.275862068965518e-05,
|
24779 |
+
"loss": 0.0238,
|
24780 |
+
"step": 3523
|
24781 |
+
},
|
24782 |
+
{
|
24783 |
+
"epoch": 9.335099337748344,
|
24784 |
+
"grad_norm": 2.1623897552490234,
|
24785 |
+
"learning_rate": 3.2625994694960215e-05,
|
24786 |
+
"loss": 0.0999,
|
24787 |
+
"step": 3524
|
24788 |
+
},
|
24789 |
+
{
|
24790 |
+
"epoch": 9.33774834437086,
|
24791 |
+
"grad_norm": 0.19755753874778748,
|
24792 |
+
"learning_rate": 3.2493368700265253e-05,
|
24793 |
+
"loss": 0.0075,
|
24794 |
+
"step": 3525
|
24795 |
+
},
|
24796 |
+
{
|
24797 |
+
"epoch": 9.340397350993378,
|
24798 |
+
"grad_norm": 0.539291262626648,
|
24799 |
+
"learning_rate": 3.23607427055703e-05,
|
24800 |
+
"loss": 0.0131,
|
24801 |
+
"step": 3526
|
24802 |
+
},
|
24803 |
+
{
|
24804 |
+
"epoch": 9.343046357615894,
|
24805 |
+
"grad_norm": 0.1200319305062294,
|
24806 |
+
"learning_rate": 3.222811671087533e-05,
|
24807 |
+
"loss": 0.0064,
|
24808 |
+
"step": 3527
|
24809 |
+
},
|
24810 |
+
{
|
24811 |
+
"epoch": 9.345695364238411,
|
24812 |
+
"grad_norm": 0.10444632917642593,
|
24813 |
+
"learning_rate": 3.209549071618037e-05,
|
24814 |
+
"loss": 0.0061,
|
24815 |
+
"step": 3528
|
24816 |
+
},
|
24817 |
+
{
|
24818 |
+
"epoch": 9.348344370860927,
|
24819 |
+
"grad_norm": 0.09037845581769943,
|
24820 |
+
"learning_rate": 3.196286472148541e-05,
|
24821 |
+
"loss": 0.0054,
|
24822 |
+
"step": 3529
|
24823 |
+
},
|
24824 |
+
{
|
24825 |
+
"epoch": 9.350993377483444,
|
24826 |
+
"grad_norm": 0.09533439576625824,
|
24827 |
+
"learning_rate": 3.183023872679045e-05,
|
24828 |
+
"loss": 0.0066,
|
24829 |
+
"step": 3530
|
24830 |
+
},
|
24831 |
+
{
|
24832 |
+
"epoch": 9.35364238410596,
|
24833 |
+
"grad_norm": 0.07484545558691025,
|
24834 |
+
"learning_rate": 3.169761273209549e-05,
|
24835 |
+
"loss": 0.0052,
|
24836 |
+
"step": 3531
|
24837 |
+
},
|
24838 |
+
{
|
24839 |
+
"epoch": 9.356291390728476,
|
24840 |
+
"grad_norm": 0.13758543133735657,
|
24841 |
+
"learning_rate": 3.156498673740053e-05,
|
24842 |
+
"loss": 0.006,
|
24843 |
+
"step": 3532
|
24844 |
+
},
|
24845 |
+
{
|
24846 |
+
"epoch": 9.358940397350993,
|
24847 |
+
"grad_norm": 3.194049119949341,
|
24848 |
+
"learning_rate": 3.143236074270557e-05,
|
24849 |
+
"loss": 0.0414,
|
24850 |
+
"step": 3533
|
24851 |
+
},
|
24852 |
+
{
|
24853 |
+
"epoch": 9.36158940397351,
|
24854 |
+
"grad_norm": 0.4014771282672882,
|
24855 |
+
"learning_rate": 3.129973474801061e-05,
|
24856 |
+
"loss": 0.0117,
|
24857 |
+
"step": 3534
|
24858 |
+
},
|
24859 |
+
{
|
24860 |
+
"epoch": 9.364238410596027,
|
24861 |
+
"grad_norm": 1.8910866975784302,
|
24862 |
+
"learning_rate": 3.116710875331565e-05,
|
24863 |
+
"loss": 0.0437,
|
24864 |
+
"step": 3535
|
24865 |
+
},
|
24866 |
+
{
|
24867 |
+
"epoch": 9.366887417218543,
|
24868 |
+
"grad_norm": 0.1309318244457245,
|
24869 |
+
"learning_rate": 3.103448275862069e-05,
|
24870 |
+
"loss": 0.0067,
|
24871 |
+
"step": 3536
|
24872 |
+
},
|
24873 |
+
{
|
24874 |
+
"epoch": 9.36953642384106,
|
24875 |
+
"grad_norm": 0.2526959776878357,
|
24876 |
+
"learning_rate": 3.090185676392573e-05,
|
24877 |
+
"loss": 0.0094,
|
24878 |
+
"step": 3537
|
24879 |
+
},
|
24880 |
+
{
|
24881 |
+
"epoch": 9.372185430463576,
|
24882 |
+
"grad_norm": 0.17184001207351685,
|
24883 |
+
"learning_rate": 3.0769230769230774e-05,
|
24884 |
+
"loss": 0.0074,
|
24885 |
+
"step": 3538
|
24886 |
+
},
|
24887 |
+
{
|
24888 |
+
"epoch": 9.374834437086093,
|
24889 |
+
"grad_norm": 0.26467862725257874,
|
24890 |
+
"learning_rate": 3.063660477453581e-05,
|
24891 |
+
"loss": 0.0094,
|
24892 |
+
"step": 3539
|
24893 |
+
},
|
24894 |
+
{
|
24895 |
+
"epoch": 9.37748344370861,
|
24896 |
+
"grad_norm": 0.05459669604897499,
|
24897 |
+
"learning_rate": 3.050397877984085e-05,
|
24898 |
+
"loss": 0.0043,
|
24899 |
+
"step": 3540
|
24900 |
+
},
|
24901 |
+
{
|
24902 |
+
"epoch": 9.380132450331125,
|
24903 |
+
"grad_norm": 0.1171836405992508,
|
24904 |
+
"learning_rate": 3.0371352785145892e-05,
|
24905 |
+
"loss": 0.0067,
|
24906 |
+
"step": 3541
|
24907 |
+
},
|
24908 |
+
{
|
24909 |
+
"epoch": 9.382781456953643,
|
24910 |
+
"grad_norm": 0.10178942233324051,
|
24911 |
+
"learning_rate": 3.023872679045093e-05,
|
24912 |
+
"loss": 0.007,
|
24913 |
+
"step": 3542
|
24914 |
+
},
|
24915 |
+
{
|
24916 |
+
"epoch": 9.385430463576158,
|
24917 |
+
"grad_norm": 1.3025785684585571,
|
24918 |
+
"learning_rate": 3.0106100795755968e-05,
|
24919 |
+
"loss": 0.0344,
|
24920 |
+
"step": 3543
|
24921 |
+
},
|
24922 |
+
{
|
24923 |
+
"epoch": 9.388079470198676,
|
24924 |
+
"grad_norm": 1.7906361818313599,
|
24925 |
+
"learning_rate": 2.997347480106101e-05,
|
24926 |
+
"loss": 0.0824,
|
24927 |
+
"step": 3544
|
24928 |
+
},
|
24929 |
+
{
|
24930 |
+
"epoch": 9.390728476821192,
|
24931 |
+
"grad_norm": 0.10218120366334915,
|
24932 |
+
"learning_rate": 2.9840848806366047e-05,
|
24933 |
+
"loss": 0.0066,
|
24934 |
+
"step": 3545
|
24935 |
+
},
|
24936 |
+
{
|
24937 |
+
"epoch": 9.39337748344371,
|
24938 |
+
"grad_norm": 0.11341088265180588,
|
24939 |
+
"learning_rate": 2.970822281167109e-05,
|
24940 |
+
"loss": 0.0074,
|
24941 |
+
"step": 3546
|
24942 |
+
},
|
24943 |
+
{
|
24944 |
+
"epoch": 9.396026490066225,
|
24945 |
+
"grad_norm": 0.128928080201149,
|
24946 |
+
"learning_rate": 2.957559681697613e-05,
|
24947 |
+
"loss": 0.0063,
|
24948 |
+
"step": 3547
|
24949 |
+
},
|
24950 |
+
{
|
24951 |
+
"epoch": 9.398675496688742,
|
24952 |
+
"grad_norm": 0.09247614443302155,
|
24953 |
+
"learning_rate": 2.9442970822281168e-05,
|
24954 |
+
"loss": 0.0057,
|
24955 |
+
"step": 3548
|
24956 |
+
},
|
24957 |
+
{
|
24958 |
+
"epoch": 9.401324503311258,
|
24959 |
+
"grad_norm": 0.9803311824798584,
|
24960 |
+
"learning_rate": 2.9310344827586206e-05,
|
24961 |
+
"loss": 0.0182,
|
24962 |
+
"step": 3549
|
24963 |
+
},
|
24964 |
+
{
|
24965 |
+
"epoch": 9.403973509933774,
|
24966 |
+
"grad_norm": 0.23505902290344238,
|
24967 |
+
"learning_rate": 2.9177718832891247e-05,
|
24968 |
+
"loss": 0.0071,
|
24969 |
+
"step": 3550
|
24970 |
+
},
|
24971 |
+
{
|
24972 |
+
"epoch": 9.406622516556292,
|
24973 |
+
"grad_norm": 0.2330973744392395,
|
24974 |
+
"learning_rate": 2.904509283819629e-05,
|
24975 |
+
"loss": 0.011,
|
24976 |
+
"step": 3551
|
24977 |
+
},
|
24978 |
+
{
|
24979 |
+
"epoch": 9.409271523178807,
|
24980 |
+
"grad_norm": 0.11561492830514908,
|
24981 |
+
"learning_rate": 2.8912466843501326e-05,
|
24982 |
+
"loss": 0.0064,
|
24983 |
+
"step": 3552
|
24984 |
+
},
|
24985 |
+
{
|
24986 |
+
"epoch": 9.411920529801325,
|
24987 |
+
"grad_norm": 0.2049504816532135,
|
24988 |
+
"learning_rate": 2.8779840848806368e-05,
|
24989 |
+
"loss": 0.0082,
|
24990 |
+
"step": 3553
|
24991 |
+
},
|
24992 |
+
{
|
24993 |
+
"epoch": 9.41456953642384,
|
24994 |
+
"grad_norm": 0.08177812397480011,
|
24995 |
+
"learning_rate": 2.864721485411141e-05,
|
24996 |
+
"loss": 0.0052,
|
24997 |
+
"step": 3554
|
24998 |
+
},
|
24999 |
+
{
|
25000 |
+
"epoch": 9.417218543046358,
|
25001 |
+
"grad_norm": 0.12092561274766922,
|
25002 |
+
"learning_rate": 2.8514588859416444e-05,
|
25003 |
+
"loss": 0.0061,
|
25004 |
+
"step": 3555
|
25005 |
+
},
|
25006 |
+
{
|
25007 |
+
"epoch": 9.419867549668874,
|
25008 |
+
"grad_norm": 0.08898591995239258,
|
25009 |
+
"learning_rate": 2.8381962864721485e-05,
|
25010 |
+
"loss": 0.0061,
|
25011 |
+
"step": 3556
|
25012 |
+
},
|
25013 |
+
{
|
25014 |
+
"epoch": 9.422516556291392,
|
25015 |
+
"grad_norm": 0.15374480187892914,
|
25016 |
+
"learning_rate": 2.8249336870026527e-05,
|
25017 |
+
"loss": 0.0058,
|
25018 |
+
"step": 3557
|
25019 |
+
},
|
25020 |
+
{
|
25021 |
+
"epoch": 9.425165562913907,
|
25022 |
+
"grad_norm": 0.1372290402650833,
|
25023 |
+
"learning_rate": 2.8116710875331565e-05,
|
25024 |
+
"loss": 0.0071,
|
25025 |
+
"step": 3558
|
25026 |
+
},
|
25027 |
+
{
|
25028 |
+
"epoch": 9.427814569536423,
|
25029 |
+
"grad_norm": 0.11643172055482864,
|
25030 |
+
"learning_rate": 2.7984084880636606e-05,
|
25031 |
+
"loss": 0.0068,
|
25032 |
+
"step": 3559
|
25033 |
+
},
|
25034 |
+
{
|
25035 |
+
"epoch": 9.43046357615894,
|
25036 |
+
"grad_norm": 0.09352966398000717,
|
25037 |
+
"learning_rate": 2.7851458885941647e-05,
|
25038 |
+
"loss": 0.006,
|
25039 |
+
"step": 3560
|
25040 |
+
},
|
25041 |
+
{
|
25042 |
+
"epoch": 9.433112582781456,
|
25043 |
+
"grad_norm": 0.20532579720020294,
|
25044 |
+
"learning_rate": 2.7718832891246682e-05,
|
25045 |
+
"loss": 0.0071,
|
25046 |
+
"step": 3561
|
25047 |
+
},
|
25048 |
+
{
|
25049 |
+
"epoch": 9.435761589403974,
|
25050 |
+
"grad_norm": 1.9546130895614624,
|
25051 |
+
"learning_rate": 2.7586206896551723e-05,
|
25052 |
+
"loss": 0.0191,
|
25053 |
+
"step": 3562
|
25054 |
+
},
|
25055 |
+
{
|
25056 |
+
"epoch": 9.43841059602649,
|
25057 |
+
"grad_norm": 0.16163243353366852,
|
25058 |
+
"learning_rate": 2.7453580901856765e-05,
|
25059 |
+
"loss": 0.007,
|
25060 |
+
"step": 3563
|
25061 |
+
},
|
25062 |
+
{
|
25063 |
+
"epoch": 9.441059602649007,
|
25064 |
+
"grad_norm": 0.18948762118816376,
|
25065 |
+
"learning_rate": 2.7320954907161803e-05,
|
25066 |
+
"loss": 0.0079,
|
25067 |
+
"step": 3564
|
25068 |
+
},
|
25069 |
+
{
|
25070 |
+
"epoch": 9.443708609271523,
|
25071 |
+
"grad_norm": 0.2582177519798279,
|
25072 |
+
"learning_rate": 2.7188328912466844e-05,
|
25073 |
+
"loss": 0.0093,
|
25074 |
+
"step": 3565
|
25075 |
+
},
|
25076 |
+
{
|
25077 |
+
"epoch": 9.44635761589404,
|
25078 |
+
"grad_norm": 0.20868119597434998,
|
25079 |
+
"learning_rate": 2.7055702917771885e-05,
|
25080 |
+
"loss": 0.0081,
|
25081 |
+
"step": 3566
|
25082 |
+
},
|
25083 |
+
{
|
25084 |
+
"epoch": 9.449006622516556,
|
25085 |
+
"grad_norm": 1.0529993772506714,
|
25086 |
+
"learning_rate": 2.6923076923076927e-05,
|
25087 |
+
"loss": 0.0158,
|
25088 |
+
"step": 3567
|
25089 |
+
},
|
25090 |
+
{
|
25091 |
+
"epoch": 9.451655629139072,
|
25092 |
+
"grad_norm": 0.2311079204082489,
|
25093 |
+
"learning_rate": 2.679045092838196e-05,
|
25094 |
+
"loss": 0.0088,
|
25095 |
+
"step": 3568
|
25096 |
+
},
|
25097 |
+
{
|
25098 |
+
"epoch": 9.45430463576159,
|
25099 |
+
"grad_norm": 0.4316157400608063,
|
25100 |
+
"learning_rate": 2.6657824933687003e-05,
|
25101 |
+
"loss": 0.0106,
|
25102 |
+
"step": 3569
|
25103 |
+
},
|
25104 |
+
{
|
25105 |
+
"epoch": 9.456953642384105,
|
25106 |
+
"grad_norm": 0.2940143048763275,
|
25107 |
+
"learning_rate": 2.6525198938992044e-05,
|
25108 |
+
"loss": 0.0091,
|
25109 |
+
"step": 3570
|
25110 |
+
},
|
25111 |
+
{
|
25112 |
+
"epoch": 9.459602649006623,
|
25113 |
+
"grad_norm": 0.10199405252933502,
|
25114 |
+
"learning_rate": 2.6392572944297082e-05,
|
25115 |
+
"loss": 0.0061,
|
25116 |
+
"step": 3571
|
25117 |
+
},
|
25118 |
+
{
|
25119 |
+
"epoch": 9.462251655629139,
|
25120 |
+
"grad_norm": 0.09311787784099579,
|
25121 |
+
"learning_rate": 2.6259946949602123e-05,
|
25122 |
+
"loss": 0.0066,
|
25123 |
+
"step": 3572
|
25124 |
+
},
|
25125 |
+
{
|
25126 |
+
"epoch": 9.464900662251656,
|
25127 |
+
"grad_norm": 0.11943931877613068,
|
25128 |
+
"learning_rate": 2.6127320954907165e-05,
|
25129 |
+
"loss": 0.0067,
|
25130 |
+
"step": 3573
|
25131 |
+
},
|
25132 |
+
{
|
25133 |
+
"epoch": 9.467549668874172,
|
25134 |
+
"grad_norm": 0.08641272783279419,
|
25135 |
+
"learning_rate": 2.59946949602122e-05,
|
25136 |
+
"loss": 0.0064,
|
25137 |
+
"step": 3574
|
25138 |
+
},
|
25139 |
+
{
|
25140 |
+
"epoch": 9.47019867549669,
|
25141 |
+
"grad_norm": 2.144976854324341,
|
25142 |
+
"learning_rate": 2.586206896551724e-05,
|
25143 |
+
"loss": 0.0777,
|
25144 |
+
"step": 3575
|
25145 |
+
},
|
25146 |
+
{
|
25147 |
+
"epoch": 9.472847682119205,
|
25148 |
+
"grad_norm": 2.3750195503234863,
|
25149 |
+
"learning_rate": 2.5729442970822282e-05,
|
25150 |
+
"loss": 0.03,
|
25151 |
+
"step": 3576
|
25152 |
+
},
|
25153 |
+
{
|
25154 |
+
"epoch": 9.475496688741721,
|
25155 |
+
"grad_norm": 0.3863673210144043,
|
25156 |
+
"learning_rate": 2.559681697612732e-05,
|
25157 |
+
"loss": 0.0133,
|
25158 |
+
"step": 3577
|
25159 |
+
},
|
25160 |
+
{
|
25161 |
+
"epoch": 9.478145695364239,
|
25162 |
+
"grad_norm": 0.18488571047782898,
|
25163 |
+
"learning_rate": 2.546419098143236e-05,
|
25164 |
+
"loss": 0.0101,
|
25165 |
+
"step": 3578
|
25166 |
+
},
|
25167 |
+
{
|
25168 |
+
"epoch": 9.480794701986754,
|
25169 |
+
"grad_norm": 0.14614498615264893,
|
25170 |
+
"learning_rate": 2.5331564986737403e-05,
|
25171 |
+
"loss": 0.0078,
|
25172 |
+
"step": 3579
|
25173 |
+
},
|
25174 |
+
{
|
25175 |
+
"epoch": 9.483443708609272,
|
25176 |
+
"grad_norm": 0.502761721611023,
|
25177 |
+
"learning_rate": 2.519893899204244e-05,
|
25178 |
+
"loss": 0.0098,
|
25179 |
+
"step": 3580
|
25180 |
+
},
|
25181 |
+
{
|
25182 |
+
"epoch": 9.486092715231788,
|
25183 |
+
"grad_norm": 0.12372557818889618,
|
25184 |
+
"learning_rate": 2.506631299734748e-05,
|
25185 |
+
"loss": 0.0064,
|
25186 |
+
"step": 3581
|
25187 |
+
},
|
25188 |
+
{
|
25189 |
+
"epoch": 9.488741721854305,
|
25190 |
+
"grad_norm": 0.06779627501964569,
|
25191 |
+
"learning_rate": 2.493368700265252e-05,
|
25192 |
+
"loss": 0.0045,
|
25193 |
+
"step": 3582
|
25194 |
+
},
|
25195 |
+
{
|
25196 |
+
"epoch": 9.491390728476821,
|
25197 |
+
"grad_norm": 0.20525391399860382,
|
25198 |
+
"learning_rate": 2.480106100795756e-05,
|
25199 |
+
"loss": 0.009,
|
25200 |
+
"step": 3583
|
25201 |
+
},
|
25202 |
+
{
|
25203 |
+
"epoch": 9.494039735099339,
|
25204 |
+
"grad_norm": 0.12155789136886597,
|
25205 |
+
"learning_rate": 2.46684350132626e-05,
|
25206 |
+
"loss": 0.0063,
|
25207 |
+
"step": 3584
|
25208 |
+
},
|
25209 |
+
{
|
25210 |
+
"epoch": 9.496688741721854,
|
25211 |
+
"grad_norm": 0.10192214697599411,
|
25212 |
+
"learning_rate": 2.453580901856764e-05,
|
25213 |
+
"loss": 0.0057,
|
25214 |
+
"step": 3585
|
25215 |
+
},
|
25216 |
+
{
|
25217 |
+
"epoch": 9.49933774834437,
|
25218 |
+
"grad_norm": 0.987741231918335,
|
25219 |
+
"learning_rate": 2.440318302387268e-05,
|
25220 |
+
"loss": 0.0233,
|
25221 |
+
"step": 3586
|
25222 |
+
},
|
25223 |
+
{
|
25224 |
+
"epoch": 9.501986754966888,
|
25225 |
+
"grad_norm": 0.39217236638069153,
|
25226 |
+
"learning_rate": 2.4270557029177717e-05,
|
25227 |
+
"loss": 0.0098,
|
25228 |
+
"step": 3587
|
25229 |
+
},
|
25230 |
+
{
|
25231 |
+
"epoch": 9.504635761589403,
|
25232 |
+
"grad_norm": 0.11593826860189438,
|
25233 |
+
"learning_rate": 2.4137931034482758e-05,
|
25234 |
+
"loss": 0.0055,
|
25235 |
+
"step": 3588
|
25236 |
+
},
|
25237 |
+
{
|
25238 |
+
"epoch": 9.507284768211921,
|
25239 |
+
"grad_norm": 4.071221828460693,
|
25240 |
+
"learning_rate": 2.40053050397878e-05,
|
25241 |
+
"loss": 0.0562,
|
25242 |
+
"step": 3589
|
25243 |
+
},
|
25244 |
+
{
|
25245 |
+
"epoch": 9.509933774834437,
|
25246 |
+
"grad_norm": 2.431532382965088,
|
25247 |
+
"learning_rate": 2.3872679045092838e-05,
|
25248 |
+
"loss": 0.2236,
|
25249 |
+
"step": 3590
|
25250 |
+
},
|
25251 |
+
{
|
25252 |
+
"epoch": 9.512582781456954,
|
25253 |
+
"grad_norm": 0.9646773934364319,
|
25254 |
+
"learning_rate": 2.374005305039788e-05,
|
25255 |
+
"loss": 0.0297,
|
25256 |
+
"step": 3591
|
25257 |
+
},
|
25258 |
+
{
|
25259 |
+
"epoch": 9.51523178807947,
|
25260 |
+
"grad_norm": 0.9836496710777283,
|
25261 |
+
"learning_rate": 2.360742705570292e-05,
|
25262 |
+
"loss": 0.0152,
|
25263 |
+
"step": 3592
|
25264 |
+
},
|
25265 |
+
{
|
25266 |
+
"epoch": 9.517880794701988,
|
25267 |
+
"grad_norm": 0.08301722258329391,
|
25268 |
+
"learning_rate": 2.347480106100796e-05,
|
25269 |
+
"loss": 0.0054,
|
25270 |
+
"step": 3593
|
25271 |
+
},
|
25272 |
+
{
|
25273 |
+
"epoch": 9.520529801324503,
|
25274 |
+
"grad_norm": 0.3062174618244171,
|
25275 |
+
"learning_rate": 2.3342175066312996e-05,
|
25276 |
+
"loss": 0.0075,
|
25277 |
+
"step": 3594
|
25278 |
+
},
|
25279 |
+
{
|
25280 |
+
"epoch": 9.52317880794702,
|
25281 |
+
"grad_norm": 1.9968500137329102,
|
25282 |
+
"learning_rate": 2.3209549071618038e-05,
|
25283 |
+
"loss": 0.022,
|
25284 |
+
"step": 3595
|
25285 |
+
},
|
25286 |
+
{
|
25287 |
+
"epoch": 9.525827814569537,
|
25288 |
+
"grad_norm": 0.047811876982450485,
|
25289 |
+
"learning_rate": 2.307692307692308e-05,
|
25290 |
+
"loss": 0.0038,
|
25291 |
+
"step": 3596
|
25292 |
+
},
|
25293 |
+
{
|
25294 |
+
"epoch": 9.528476821192053,
|
25295 |
+
"grad_norm": 0.08924803137779236,
|
25296 |
+
"learning_rate": 2.2944297082228117e-05,
|
25297 |
+
"loss": 0.0063,
|
25298 |
+
"step": 3597
|
25299 |
+
},
|
25300 |
+
{
|
25301 |
+
"epoch": 9.53112582781457,
|
25302 |
+
"grad_norm": 0.16977474093437195,
|
25303 |
+
"learning_rate": 2.281167108753316e-05,
|
25304 |
+
"loss": 0.0068,
|
25305 |
+
"step": 3598
|
25306 |
+
},
|
25307 |
+
{
|
25308 |
+
"epoch": 9.533774834437086,
|
25309 |
+
"grad_norm": 0.06967270374298096,
|
25310 |
+
"learning_rate": 2.2679045092838196e-05,
|
25311 |
+
"loss": 0.0038,
|
25312 |
+
"step": 3599
|
25313 |
+
},
|
25314 |
+
{
|
25315 |
+
"epoch": 9.536423841059603,
|
25316 |
+
"grad_norm": 0.10326126962900162,
|
25317 |
+
"learning_rate": 2.2546419098143234e-05,
|
25318 |
+
"loss": 0.007,
|
25319 |
+
"step": 3600
|
25320 |
+
},
|
25321 |
+
{
|
25322 |
+
"epoch": 9.53907284768212,
|
25323 |
+
"grad_norm": 0.0956234410405159,
|
25324 |
+
"learning_rate": 2.2413793103448276e-05,
|
25325 |
+
"loss": 0.0048,
|
25326 |
+
"step": 3601
|
25327 |
+
},
|
25328 |
+
{
|
25329 |
+
"epoch": 9.541721854304635,
|
25330 |
+
"grad_norm": 0.11128674447536469,
|
25331 |
+
"learning_rate": 2.2281167108753317e-05,
|
25332 |
+
"loss": 0.0073,
|
25333 |
+
"step": 3602
|
25334 |
+
},
|
25335 |
+
{
|
25336 |
+
"epoch": 9.544370860927152,
|
25337 |
+
"grad_norm": 0.34343084692955017,
|
25338 |
+
"learning_rate": 2.2148541114058355e-05,
|
25339 |
+
"loss": 0.0094,
|
25340 |
+
"step": 3603
|
25341 |
+
},
|
25342 |
+
{
|
25343 |
+
"epoch": 9.547019867549668,
|
25344 |
+
"grad_norm": 0.10045126080513,
|
25345 |
+
"learning_rate": 2.2015915119363396e-05,
|
25346 |
+
"loss": 0.0058,
|
25347 |
+
"step": 3604
|
25348 |
+
},
|
25349 |
+
{
|
25350 |
+
"epoch": 9.549668874172186,
|
25351 |
+
"grad_norm": 0.20021073520183563,
|
25352 |
+
"learning_rate": 2.1883289124668434e-05,
|
25353 |
+
"loss": 0.0067,
|
25354 |
+
"step": 3605
|
25355 |
+
},
|
25356 |
+
{
|
25357 |
+
"epoch": 9.552317880794702,
|
25358 |
+
"grad_norm": 0.16547444462776184,
|
25359 |
+
"learning_rate": 2.1750663129973476e-05,
|
25360 |
+
"loss": 0.0066,
|
25361 |
+
"step": 3606
|
25362 |
+
},
|
25363 |
+
{
|
25364 |
+
"epoch": 9.55496688741722,
|
25365 |
+
"grad_norm": 0.6814205050468445,
|
25366 |
+
"learning_rate": 2.1618037135278514e-05,
|
25367 |
+
"loss": 0.0097,
|
25368 |
+
"step": 3607
|
25369 |
+
},
|
25370 |
+
{
|
25371 |
+
"epoch": 9.557615894039735,
|
25372 |
+
"grad_norm": 0.15485073626041412,
|
25373 |
+
"learning_rate": 2.1485411140583555e-05,
|
25374 |
+
"loss": 0.0066,
|
25375 |
+
"step": 3608
|
25376 |
+
},
|
25377 |
+
{
|
25378 |
+
"epoch": 9.560264900662252,
|
25379 |
+
"grad_norm": 0.3462410867214203,
|
25380 |
+
"learning_rate": 2.1352785145888597e-05,
|
25381 |
+
"loss": 0.0099,
|
25382 |
+
"step": 3609
|
25383 |
+
},
|
25384 |
+
{
|
25385 |
+
"epoch": 9.562913907284768,
|
25386 |
+
"grad_norm": 1.7099510431289673,
|
25387 |
+
"learning_rate": 2.1220159151193635e-05,
|
25388 |
+
"loss": 0.0234,
|
25389 |
+
"step": 3610
|
25390 |
+
},
|
25391 |
+
{
|
25392 |
+
"epoch": 9.565562913907284,
|
25393 |
+
"grad_norm": 0.18412642180919647,
|
25394 |
+
"learning_rate": 2.1087533156498673e-05,
|
25395 |
+
"loss": 0.0065,
|
25396 |
+
"step": 3611
|
25397 |
+
},
|
25398 |
+
{
|
25399 |
+
"epoch": 9.568211920529802,
|
25400 |
+
"grad_norm": 0.10271494835615158,
|
25401 |
+
"learning_rate": 2.0954907161803714e-05,
|
25402 |
+
"loss": 0.0054,
|
25403 |
+
"step": 3612
|
25404 |
+
},
|
25405 |
+
{
|
25406 |
+
"epoch": 9.570860927152317,
|
25407 |
+
"grad_norm": 0.1996956765651703,
|
25408 |
+
"learning_rate": 2.0822281167108752e-05,
|
25409 |
+
"loss": 0.0094,
|
25410 |
+
"step": 3613
|
25411 |
+
},
|
25412 |
+
{
|
25413 |
+
"epoch": 9.573509933774835,
|
25414 |
+
"grad_norm": 0.215884268283844,
|
25415 |
+
"learning_rate": 2.0689655172413793e-05,
|
25416 |
+
"loss": 0.008,
|
25417 |
+
"step": 3614
|
25418 |
+
},
|
25419 |
+
{
|
25420 |
+
"epoch": 9.57615894039735,
|
25421 |
+
"grad_norm": 0.40959951281547546,
|
25422 |
+
"learning_rate": 2.0557029177718835e-05,
|
25423 |
+
"loss": 0.0104,
|
25424 |
+
"step": 3615
|
25425 |
+
},
|
25426 |
+
{
|
25427 |
+
"epoch": 9.578807947019868,
|
25428 |
+
"grad_norm": 0.2678709626197815,
|
25429 |
+
"learning_rate": 2.0424403183023873e-05,
|
25430 |
+
"loss": 0.0095,
|
25431 |
+
"step": 3616
|
25432 |
+
},
|
25433 |
+
{
|
25434 |
+
"epoch": 9.581456953642384,
|
25435 |
+
"grad_norm": 0.6541901230812073,
|
25436 |
+
"learning_rate": 2.0291777188328914e-05,
|
25437 |
+
"loss": 0.0136,
|
25438 |
+
"step": 3617
|
25439 |
+
},
|
25440 |
+
{
|
25441 |
+
"epoch": 9.584105960264901,
|
25442 |
+
"grad_norm": 0.14791490137577057,
|
25443 |
+
"learning_rate": 2.0159151193633952e-05,
|
25444 |
+
"loss": 0.0065,
|
25445 |
+
"step": 3618
|
25446 |
+
},
|
25447 |
+
{
|
25448 |
+
"epoch": 9.586754966887417,
|
25449 |
+
"grad_norm": 0.2216922789812088,
|
25450 |
+
"learning_rate": 2.0026525198938993e-05,
|
25451 |
+
"loss": 0.0071,
|
25452 |
+
"step": 3619
|
25453 |
+
},
|
25454 |
+
{
|
25455 |
+
"epoch": 9.589403973509933,
|
25456 |
+
"grad_norm": 0.08360443264245987,
|
25457 |
+
"learning_rate": 1.989389920424403e-05,
|
25458 |
+
"loss": 0.0057,
|
25459 |
+
"step": 3620
|
25460 |
+
},
|
25461 |
+
{
|
25462 |
+
"epoch": 9.59205298013245,
|
25463 |
+
"grad_norm": 0.21307526528835297,
|
25464 |
+
"learning_rate": 1.9761273209549073e-05,
|
25465 |
+
"loss": 0.0078,
|
25466 |
+
"step": 3621
|
25467 |
+
},
|
25468 |
+
{
|
25469 |
+
"epoch": 9.594701986754966,
|
25470 |
+
"grad_norm": 0.16253237426280975,
|
25471 |
+
"learning_rate": 1.9628647214854114e-05,
|
25472 |
+
"loss": 0.0067,
|
25473 |
+
"step": 3622
|
25474 |
+
},
|
25475 |
+
{
|
25476 |
+
"epoch": 9.597350993377484,
|
25477 |
+
"grad_norm": 0.07736147940158844,
|
25478 |
+
"learning_rate": 1.9496021220159152e-05,
|
25479 |
+
"loss": 0.0049,
|
25480 |
+
"step": 3623
|
25481 |
+
},
|
25482 |
+
{
|
25483 |
+
"epoch": 9.6,
|
25484 |
+
"grad_norm": 0.18379540741443634,
|
25485 |
+
"learning_rate": 1.936339522546419e-05,
|
25486 |
+
"loss": 0.0089,
|
25487 |
+
"step": 3624
|
25488 |
+
},
|
25489 |
+
{
|
25490 |
+
"epoch": 9.602649006622517,
|
25491 |
+
"grad_norm": 0.07767052948474884,
|
25492 |
+
"learning_rate": 1.923076923076923e-05,
|
25493 |
+
"loss": 0.0053,
|
25494 |
+
"step": 3625
|
25495 |
+
},
|
25496 |
+
{
|
25497 |
+
"epoch": 9.605298013245033,
|
25498 |
+
"grad_norm": 0.15335260331630707,
|
25499 |
+
"learning_rate": 1.909814323607427e-05,
|
25500 |
+
"loss": 0.0066,
|
25501 |
+
"step": 3626
|
25502 |
+
},
|
25503 |
+
{
|
25504 |
+
"epoch": 9.607947019867549,
|
25505 |
+
"grad_norm": 0.1968509703874588,
|
25506 |
+
"learning_rate": 1.896551724137931e-05,
|
25507 |
+
"loss": 0.0076,
|
25508 |
+
"step": 3627
|
25509 |
+
},
|
25510 |
+
{
|
25511 |
+
"epoch": 9.610596026490066,
|
25512 |
+
"grad_norm": 0.06846405565738678,
|
25513 |
+
"learning_rate": 1.8832891246684352e-05,
|
25514 |
+
"loss": 0.0048,
|
25515 |
+
"step": 3628
|
25516 |
+
},
|
25517 |
+
{
|
25518 |
+
"epoch": 9.613245033112582,
|
25519 |
+
"grad_norm": 0.12656621634960175,
|
25520 |
+
"learning_rate": 1.870026525198939e-05,
|
25521 |
+
"loss": 0.0079,
|
25522 |
+
"step": 3629
|
25523 |
+
},
|
25524 |
+
{
|
25525 |
+
"epoch": 9.6158940397351,
|
25526 |
+
"grad_norm": 0.0959506556391716,
|
25527 |
+
"learning_rate": 1.8567639257294428e-05,
|
25528 |
+
"loss": 0.0058,
|
25529 |
+
"step": 3630
|
25530 |
+
},
|
25531 |
+
{
|
25532 |
+
"epoch": 9.618543046357615,
|
25533 |
+
"grad_norm": 0.2598751187324524,
|
25534 |
+
"learning_rate": 1.843501326259947e-05,
|
25535 |
+
"loss": 0.0072,
|
25536 |
+
"step": 3631
|
25537 |
+
},
|
25538 |
+
{
|
25539 |
+
"epoch": 9.621192052980133,
|
25540 |
+
"grad_norm": 0.16255703568458557,
|
25541 |
+
"learning_rate": 1.830238726790451e-05,
|
25542 |
+
"loss": 0.0068,
|
25543 |
+
"step": 3632
|
25544 |
+
},
|
25545 |
+
{
|
25546 |
+
"epoch": 9.623841059602649,
|
25547 |
+
"grad_norm": 0.14356957376003265,
|
25548 |
+
"learning_rate": 1.816976127320955e-05,
|
25549 |
+
"loss": 0.0071,
|
25550 |
+
"step": 3633
|
25551 |
+
},
|
25552 |
+
{
|
25553 |
+
"epoch": 9.626490066225166,
|
25554 |
+
"grad_norm": 0.11708579212427139,
|
25555 |
+
"learning_rate": 1.803713527851459e-05,
|
25556 |
+
"loss": 0.0067,
|
25557 |
+
"step": 3634
|
25558 |
+
},
|
25559 |
+
{
|
25560 |
+
"epoch": 9.629139072847682,
|
25561 |
+
"grad_norm": 0.11293361335992813,
|
25562 |
+
"learning_rate": 1.790450928381963e-05,
|
25563 |
+
"loss": 0.0063,
|
25564 |
+
"step": 3635
|
25565 |
+
},
|
25566 |
+
{
|
25567 |
+
"epoch": 9.631788079470198,
|
25568 |
+
"grad_norm": 0.38313353061676025,
|
25569 |
+
"learning_rate": 1.7771883289124666e-05,
|
25570 |
+
"loss": 0.0118,
|
25571 |
+
"step": 3636
|
25572 |
+
},
|
25573 |
+
{
|
25574 |
+
"epoch": 9.634437086092715,
|
25575 |
+
"grad_norm": 0.13401620090007782,
|
25576 |
+
"learning_rate": 1.7639257294429708e-05,
|
25577 |
+
"loss": 0.0067,
|
25578 |
+
"step": 3637
|
25579 |
+
},
|
25580 |
+
{
|
25581 |
+
"epoch": 9.637086092715231,
|
25582 |
+
"grad_norm": 0.23206515610218048,
|
25583 |
+
"learning_rate": 1.750663129973475e-05,
|
25584 |
+
"loss": 0.0069,
|
25585 |
+
"step": 3638
|
25586 |
+
},
|
25587 |
+
{
|
25588 |
+
"epoch": 9.639735099337749,
|
25589 |
+
"grad_norm": 0.19798971712589264,
|
25590 |
+
"learning_rate": 1.7374005305039787e-05,
|
25591 |
+
"loss": 0.0062,
|
25592 |
+
"step": 3639
|
25593 |
+
},
|
25594 |
+
{
|
25595 |
+
"epoch": 9.642384105960264,
|
25596 |
+
"grad_norm": 0.09165431559085846,
|
25597 |
+
"learning_rate": 1.7241379310344828e-05,
|
25598 |
+
"loss": 0.0058,
|
25599 |
+
"step": 3640
|
25600 |
+
},
|
25601 |
+
{
|
25602 |
+
"epoch": 9.645033112582782,
|
25603 |
+
"grad_norm": 0.616093099117279,
|
25604 |
+
"learning_rate": 1.710875331564987e-05,
|
25605 |
+
"loss": 0.0127,
|
25606 |
+
"step": 3641
|
25607 |
+
},
|
25608 |
+
{
|
25609 |
+
"epoch": 9.647682119205298,
|
25610 |
+
"grad_norm": 1.1477009057998657,
|
25611 |
+
"learning_rate": 1.6976127320954904e-05,
|
25612 |
+
"loss": 0.0116,
|
25613 |
+
"step": 3642
|
25614 |
+
},
|
25615 |
+
{
|
25616 |
+
"epoch": 9.650331125827815,
|
25617 |
+
"grad_norm": 0.13470661640167236,
|
25618 |
+
"learning_rate": 1.6843501326259946e-05,
|
25619 |
+
"loss": 0.0062,
|
25620 |
+
"step": 3643
|
25621 |
+
},
|
25622 |
+
{
|
25623 |
+
"epoch": 9.652980132450331,
|
25624 |
+
"grad_norm": 0.11247550696134567,
|
25625 |
+
"learning_rate": 1.6710875331564987e-05,
|
25626 |
+
"loss": 0.0052,
|
25627 |
+
"step": 3644
|
25628 |
+
},
|
25629 |
+
{
|
25630 |
+
"epoch": 9.655629139072847,
|
25631 |
+
"grad_norm": 0.5074613094329834,
|
25632 |
+
"learning_rate": 1.657824933687003e-05,
|
25633 |
+
"loss": 0.0084,
|
25634 |
+
"step": 3645
|
25635 |
+
},
|
25636 |
+
{
|
25637 |
+
"epoch": 9.658278145695364,
|
25638 |
+
"grad_norm": 0.19383744895458221,
|
25639 |
+
"learning_rate": 1.6445623342175066e-05,
|
25640 |
+
"loss": 0.0078,
|
25641 |
+
"step": 3646
|
25642 |
+
},
|
25643 |
+
{
|
25644 |
+
"epoch": 9.66092715231788,
|
25645 |
+
"grad_norm": 1.3290162086486816,
|
25646 |
+
"learning_rate": 1.6312997347480108e-05,
|
25647 |
+
"loss": 0.0304,
|
25648 |
+
"step": 3647
|
25649 |
+
},
|
25650 |
+
{
|
25651 |
+
"epoch": 9.663576158940398,
|
25652 |
+
"grad_norm": 0.3068723678588867,
|
25653 |
+
"learning_rate": 1.618037135278515e-05,
|
25654 |
+
"loss": 0.0083,
|
25655 |
+
"step": 3648
|
25656 |
+
},
|
25657 |
+
{
|
25658 |
+
"epoch": 9.666225165562913,
|
25659 |
+
"grad_norm": 0.16725531220436096,
|
25660 |
+
"learning_rate": 1.6047745358090184e-05,
|
25661 |
+
"loss": 0.0082,
|
25662 |
+
"step": 3649
|
25663 |
+
},
|
25664 |
+
{
|
25665 |
+
"epoch": 9.668874172185431,
|
25666 |
+
"grad_norm": 0.09185387194156647,
|
25667 |
+
"learning_rate": 1.5915119363395225e-05,
|
25668 |
+
"loss": 0.0058,
|
25669 |
+
"step": 3650
|
25670 |
+
},
|
25671 |
+
{
|
25672 |
+
"epoch": 9.671523178807947,
|
25673 |
+
"grad_norm": 0.20803192257881165,
|
25674 |
+
"learning_rate": 1.5782493368700266e-05,
|
25675 |
+
"loss": 0.0062,
|
25676 |
+
"step": 3651
|
25677 |
+
},
|
25678 |
+
{
|
25679 |
+
"epoch": 9.674172185430464,
|
25680 |
+
"grad_norm": 0.11356912553310394,
|
25681 |
+
"learning_rate": 1.5649867374005304e-05,
|
25682 |
+
"loss": 0.007,
|
25683 |
+
"step": 3652
|
25684 |
+
},
|
25685 |
+
{
|
25686 |
+
"epoch": 9.67682119205298,
|
25687 |
+
"grad_norm": 0.12919805943965912,
|
25688 |
+
"learning_rate": 1.5517241379310346e-05,
|
25689 |
+
"loss": 0.0063,
|
25690 |
+
"step": 3653
|
25691 |
+
},
|
25692 |
+
{
|
25693 |
+
"epoch": 9.679470198675496,
|
25694 |
+
"grad_norm": 0.08865955471992493,
|
25695 |
+
"learning_rate": 1.5384615384615387e-05,
|
25696 |
+
"loss": 0.0057,
|
25697 |
+
"step": 3654
|
25698 |
+
},
|
25699 |
+
{
|
25700 |
+
"epoch": 9.682119205298013,
|
25701 |
+
"grad_norm": 0.21951282024383545,
|
25702 |
+
"learning_rate": 1.5251989389920425e-05,
|
25703 |
+
"loss": 0.0076,
|
25704 |
+
"step": 3655
|
25705 |
+
},
|
25706 |
+
{
|
25707 |
+
"epoch": 9.68476821192053,
|
25708 |
+
"grad_norm": 0.2228957563638687,
|
25709 |
+
"learning_rate": 1.5119363395225465e-05,
|
25710 |
+
"loss": 0.0071,
|
25711 |
+
"step": 3656
|
25712 |
+
},
|
25713 |
+
{
|
25714 |
+
"epoch": 9.687417218543047,
|
25715 |
+
"grad_norm": 0.49982765316963196,
|
25716 |
+
"learning_rate": 1.4986737400530505e-05,
|
25717 |
+
"loss": 0.0086,
|
25718 |
+
"step": 3657
|
25719 |
+
},
|
25720 |
+
{
|
25721 |
+
"epoch": 9.690066225165562,
|
25722 |
+
"grad_norm": 0.17828427255153656,
|
25723 |
+
"learning_rate": 1.4854111405835544e-05,
|
25724 |
+
"loss": 0.007,
|
25725 |
+
"step": 3658
|
25726 |
+
},
|
25727 |
+
{
|
25728 |
+
"epoch": 9.69271523178808,
|
25729 |
+
"grad_norm": 0.07209718227386475,
|
25730 |
+
"learning_rate": 1.4721485411140584e-05,
|
25731 |
+
"loss": 0.0051,
|
25732 |
+
"step": 3659
|
25733 |
+
},
|
25734 |
+
{
|
25735 |
+
"epoch": 9.695364238410596,
|
25736 |
+
"grad_norm": 0.9999616146087646,
|
25737 |
+
"learning_rate": 1.4588859416445624e-05,
|
25738 |
+
"loss": 0.0124,
|
25739 |
+
"step": 3660
|
25740 |
+
},
|
25741 |
+
{
|
25742 |
+
"epoch": 9.698013245033113,
|
25743 |
+
"grad_norm": 0.4745708703994751,
|
25744 |
+
"learning_rate": 1.4456233421750663e-05,
|
25745 |
+
"loss": 0.0128,
|
25746 |
+
"step": 3661
|
25747 |
+
},
|
25748 |
+
{
|
25749 |
+
"epoch": 9.70066225165563,
|
25750 |
+
"grad_norm": 0.05109310522675514,
|
25751 |
+
"learning_rate": 1.4323607427055705e-05,
|
25752 |
+
"loss": 0.0033,
|
25753 |
+
"step": 3662
|
25754 |
+
},
|
25755 |
+
{
|
25756 |
+
"epoch": 9.703311258278145,
|
25757 |
+
"grad_norm": 0.05594147741794586,
|
25758 |
+
"learning_rate": 1.4190981432360743e-05,
|
25759 |
+
"loss": 0.0042,
|
25760 |
+
"step": 3663
|
25761 |
+
},
|
25762 |
+
{
|
25763 |
+
"epoch": 9.705960264900662,
|
25764 |
+
"grad_norm": 0.11659505218267441,
|
25765 |
+
"learning_rate": 1.4058355437665782e-05,
|
25766 |
+
"loss": 0.0064,
|
25767 |
+
"step": 3664
|
25768 |
+
},
|
25769 |
+
{
|
25770 |
+
"epoch": 9.708609271523178,
|
25771 |
+
"grad_norm": 0.1137813925743103,
|
25772 |
+
"learning_rate": 1.3925729442970824e-05,
|
25773 |
+
"loss": 0.0051,
|
25774 |
+
"step": 3665
|
25775 |
+
},
|
25776 |
+
{
|
25777 |
+
"epoch": 9.711258278145696,
|
25778 |
+
"grad_norm": 0.48468494415283203,
|
25779 |
+
"learning_rate": 1.3793103448275862e-05,
|
25780 |
+
"loss": 0.0081,
|
25781 |
+
"step": 3666
|
25782 |
+
},
|
25783 |
+
{
|
25784 |
+
"epoch": 9.713907284768212,
|
25785 |
+
"grad_norm": 0.08497241139411926,
|
25786 |
+
"learning_rate": 1.3660477453580901e-05,
|
25787 |
+
"loss": 0.0047,
|
25788 |
+
"step": 3667
|
25789 |
+
},
|
25790 |
+
{
|
25791 |
+
"epoch": 9.716556291390729,
|
25792 |
+
"grad_norm": 0.923937976360321,
|
25793 |
+
"learning_rate": 1.3527851458885943e-05,
|
25794 |
+
"loss": 0.0116,
|
25795 |
+
"step": 3668
|
25796 |
+
},
|
25797 |
+
{
|
25798 |
+
"epoch": 9.719205298013245,
|
25799 |
+
"grad_norm": 0.09511914104223251,
|
25800 |
+
"learning_rate": 1.339522546419098e-05,
|
25801 |
+
"loss": 0.0051,
|
25802 |
+
"step": 3669
|
25803 |
+
},
|
25804 |
+
{
|
25805 |
+
"epoch": 9.721854304635762,
|
25806 |
+
"grad_norm": 0.1370173543691635,
|
25807 |
+
"learning_rate": 1.3262599469496022e-05,
|
25808 |
+
"loss": 0.0069,
|
25809 |
+
"step": 3670
|
25810 |
+
},
|
25811 |
+
{
|
25812 |
+
"epoch": 9.724503311258278,
|
25813 |
+
"grad_norm": 0.7186203002929688,
|
25814 |
+
"learning_rate": 1.3129973474801062e-05,
|
25815 |
+
"loss": 0.0094,
|
25816 |
+
"step": 3671
|
25817 |
+
},
|
25818 |
+
{
|
25819 |
+
"epoch": 9.727152317880794,
|
25820 |
+
"grad_norm": 0.11266762018203735,
|
25821 |
+
"learning_rate": 1.29973474801061e-05,
|
25822 |
+
"loss": 0.0067,
|
25823 |
+
"step": 3672
|
25824 |
+
},
|
25825 |
+
{
|
25826 |
+
"epoch": 9.729801324503311,
|
25827 |
+
"grad_norm": 0.08314207941293716,
|
25828 |
+
"learning_rate": 1.2864721485411141e-05,
|
25829 |
+
"loss": 0.004,
|
25830 |
+
"step": 3673
|
25831 |
+
},
|
25832 |
+
{
|
25833 |
+
"epoch": 9.732450331125827,
|
25834 |
+
"grad_norm": 0.18078316748142242,
|
25835 |
+
"learning_rate": 1.273209549071618e-05,
|
25836 |
+
"loss": 0.0062,
|
25837 |
+
"step": 3674
|
25838 |
+
},
|
25839 |
+
{
|
25840 |
+
"epoch": 9.735099337748345,
|
25841 |
+
"grad_norm": 0.3765209913253784,
|
25842 |
+
"learning_rate": 1.259946949602122e-05,
|
25843 |
+
"loss": 0.0091,
|
25844 |
+
"step": 3675
|
25845 |
+
},
|
25846 |
+
{
|
25847 |
+
"epoch": 9.73774834437086,
|
25848 |
+
"grad_norm": 0.12933093309402466,
|
25849 |
+
"learning_rate": 1.246684350132626e-05,
|
25850 |
+
"loss": 0.007,
|
25851 |
+
"step": 3676
|
25852 |
+
},
|
25853 |
+
{
|
25854 |
+
"epoch": 9.740397350993378,
|
25855 |
+
"grad_norm": 0.08696829527616501,
|
25856 |
+
"learning_rate": 1.23342175066313e-05,
|
25857 |
+
"loss": 0.0056,
|
25858 |
+
"step": 3677
|
25859 |
+
},
|
25860 |
+
{
|
25861 |
+
"epoch": 9.743046357615894,
|
25862 |
+
"grad_norm": 0.11974237114191055,
|
25863 |
+
"learning_rate": 1.220159151193634e-05,
|
25864 |
+
"loss": 0.0066,
|
25865 |
+
"step": 3678
|
25866 |
+
},
|
25867 |
+
{
|
25868 |
+
"epoch": 9.745695364238411,
|
25869 |
+
"grad_norm": 0.0988985225558281,
|
25870 |
+
"learning_rate": 1.2068965517241379e-05,
|
25871 |
+
"loss": 0.0062,
|
25872 |
+
"step": 3679
|
25873 |
+
},
|
25874 |
+
{
|
25875 |
+
"epoch": 9.748344370860927,
|
25876 |
+
"grad_norm": 0.08422379940748215,
|
25877 |
+
"learning_rate": 1.1936339522546419e-05,
|
25878 |
+
"loss": 0.0056,
|
25879 |
+
"step": 3680
|
25880 |
+
},
|
25881 |
+
{
|
25882 |
+
"epoch": 9.750993377483443,
|
25883 |
+
"grad_norm": 0.10629460215568542,
|
25884 |
+
"learning_rate": 1.180371352785146e-05,
|
25885 |
+
"loss": 0.0063,
|
25886 |
+
"step": 3681
|
25887 |
+
},
|
25888 |
+
{
|
25889 |
+
"epoch": 9.75364238410596,
|
25890 |
+
"grad_norm": 0.08831437677145004,
|
25891 |
+
"learning_rate": 1.1671087533156498e-05,
|
25892 |
+
"loss": 0.0054,
|
25893 |
+
"step": 3682
|
25894 |
+
},
|
25895 |
+
{
|
25896 |
+
"epoch": 9.756291390728476,
|
25897 |
+
"grad_norm": 0.06336377561092377,
|
25898 |
+
"learning_rate": 1.153846153846154e-05,
|
25899 |
+
"loss": 0.0048,
|
25900 |
+
"step": 3683
|
25901 |
+
},
|
25902 |
+
{
|
25903 |
+
"epoch": 9.758940397350994,
|
25904 |
+
"grad_norm": 0.08550837635993958,
|
25905 |
+
"learning_rate": 1.140583554376658e-05,
|
25906 |
+
"loss": 0.0059,
|
25907 |
+
"step": 3684
|
25908 |
+
},
|
25909 |
+
{
|
25910 |
+
"epoch": 9.76158940397351,
|
25911 |
+
"grad_norm": 0.20795460045337677,
|
25912 |
+
"learning_rate": 1.1273209549071617e-05,
|
25913 |
+
"loss": 0.0083,
|
25914 |
+
"step": 3685
|
25915 |
+
},
|
25916 |
+
{
|
25917 |
+
"epoch": 9.764238410596027,
|
25918 |
+
"grad_norm": 0.06397215276956558,
|
25919 |
+
"learning_rate": 1.1140583554376659e-05,
|
25920 |
+
"loss": 0.0045,
|
25921 |
+
"step": 3686
|
25922 |
+
},
|
25923 |
+
{
|
25924 |
+
"epoch": 9.766887417218543,
|
25925 |
+
"grad_norm": 0.2030840516090393,
|
25926 |
+
"learning_rate": 1.1007957559681698e-05,
|
25927 |
+
"loss": 0.008,
|
25928 |
+
"step": 3687
|
25929 |
+
},
|
25930 |
+
{
|
25931 |
+
"epoch": 9.76953642384106,
|
25932 |
+
"grad_norm": 0.09845580160617828,
|
25933 |
+
"learning_rate": 1.0875331564986738e-05,
|
25934 |
+
"loss": 0.0056,
|
25935 |
+
"step": 3688
|
25936 |
+
},
|
25937 |
+
{
|
25938 |
+
"epoch": 9.772185430463576,
|
25939 |
+
"grad_norm": 0.0654703751206398,
|
25940 |
+
"learning_rate": 1.0742705570291778e-05,
|
25941 |
+
"loss": 0.0054,
|
25942 |
+
"step": 3689
|
25943 |
+
},
|
25944 |
+
{
|
25945 |
+
"epoch": 9.774834437086092,
|
25946 |
+
"grad_norm": 0.08135940134525299,
|
25947 |
+
"learning_rate": 1.0610079575596817e-05,
|
25948 |
+
"loss": 0.0048,
|
25949 |
+
"step": 3690
|
25950 |
+
},
|
25951 |
+
{
|
25952 |
+
"epoch": 9.77748344370861,
|
25953 |
+
"grad_norm": 0.13394837081432343,
|
25954 |
+
"learning_rate": 1.0477453580901857e-05,
|
25955 |
+
"loss": 0.0069,
|
25956 |
+
"step": 3691
|
25957 |
+
},
|
25958 |
+
{
|
25959 |
+
"epoch": 9.780132450331125,
|
25960 |
+
"grad_norm": 0.2942659258842468,
|
25961 |
+
"learning_rate": 1.0344827586206897e-05,
|
25962 |
+
"loss": 0.009,
|
25963 |
+
"step": 3692
|
25964 |
+
},
|
25965 |
+
{
|
25966 |
+
"epoch": 9.782781456953643,
|
25967 |
+
"grad_norm": 1.7077428102493286,
|
25968 |
+
"learning_rate": 1.0212201591511936e-05,
|
25969 |
+
"loss": 0.024,
|
25970 |
+
"step": 3693
|
25971 |
+
},
|
25972 |
+
{
|
25973 |
+
"epoch": 9.785430463576159,
|
25974 |
+
"grad_norm": 0.05311325937509537,
|
25975 |
+
"learning_rate": 1.0079575596816976e-05,
|
25976 |
+
"loss": 0.004,
|
25977 |
+
"step": 3694
|
25978 |
+
},
|
25979 |
+
{
|
25980 |
+
"epoch": 9.788079470198676,
|
25981 |
+
"grad_norm": 0.15471872687339783,
|
25982 |
+
"learning_rate": 9.946949602122016e-06,
|
25983 |
+
"loss": 0.0077,
|
25984 |
+
"step": 3695
|
25985 |
+
},
|
25986 |
+
{
|
25987 |
+
"epoch": 9.790728476821192,
|
25988 |
+
"grad_norm": 0.11402935534715652,
|
25989 |
+
"learning_rate": 9.814323607427057e-06,
|
25990 |
+
"loss": 0.0063,
|
25991 |
+
"step": 3696
|
25992 |
+
},
|
25993 |
+
{
|
25994 |
+
"epoch": 9.79337748344371,
|
25995 |
+
"grad_norm": 0.07030440121889114,
|
25996 |
+
"learning_rate": 9.681697612732095e-06,
|
25997 |
+
"loss": 0.0049,
|
25998 |
+
"step": 3697
|
25999 |
+
},
|
26000 |
+
{
|
26001 |
+
"epoch": 9.796026490066225,
|
26002 |
+
"grad_norm": 0.10101749002933502,
|
26003 |
+
"learning_rate": 9.549071618037135e-06,
|
26004 |
+
"loss": 0.0053,
|
26005 |
+
"step": 3698
|
26006 |
+
},
|
26007 |
+
{
|
26008 |
+
"epoch": 9.798675496688741,
|
26009 |
+
"grad_norm": 0.46287235617637634,
|
26010 |
+
"learning_rate": 9.416445623342176e-06,
|
26011 |
+
"loss": 0.0124,
|
26012 |
+
"step": 3699
|
26013 |
+
},
|
26014 |
+
{
|
26015 |
+
"epoch": 9.801324503311259,
|
26016 |
+
"grad_norm": 0.2550599277019501,
|
26017 |
+
"learning_rate": 9.283819628647214e-06,
|
26018 |
+
"loss": 0.0069,
|
26019 |
+
"step": 3700
|
26020 |
+
},
|
26021 |
+
{
|
26022 |
+
"epoch": 9.803973509933774,
|
26023 |
+
"grad_norm": 0.24267533421516418,
|
26024 |
+
"learning_rate": 9.151193633952255e-06,
|
26025 |
+
"loss": 0.0084,
|
26026 |
+
"step": 3701
|
26027 |
+
},
|
26028 |
+
{
|
26029 |
+
"epoch": 9.806622516556292,
|
26030 |
+
"grad_norm": 0.08424295485019684,
|
26031 |
+
"learning_rate": 9.018567639257295e-06,
|
26032 |
+
"loss": 0.0043,
|
26033 |
+
"step": 3702
|
26034 |
+
},
|
26035 |
+
{
|
26036 |
+
"epoch": 9.809271523178808,
|
26037 |
+
"grad_norm": 0.11411385238170624,
|
26038 |
+
"learning_rate": 8.885941644562333e-06,
|
26039 |
+
"loss": 0.0047,
|
26040 |
+
"step": 3703
|
26041 |
+
},
|
26042 |
+
{
|
26043 |
+
"epoch": 9.811920529801325,
|
26044 |
+
"grad_norm": 0.08485867083072662,
|
26045 |
+
"learning_rate": 8.753315649867374e-06,
|
26046 |
+
"loss": 0.0049,
|
26047 |
+
"step": 3704
|
26048 |
+
},
|
26049 |
+
{
|
26050 |
+
"epoch": 9.814569536423841,
|
26051 |
+
"grad_norm": 0.3491503596305847,
|
26052 |
+
"learning_rate": 8.620689655172414e-06,
|
26053 |
+
"loss": 0.0098,
|
26054 |
+
"step": 3705
|
26055 |
+
},
|
26056 |
+
{
|
26057 |
+
"epoch": 9.817218543046359,
|
26058 |
+
"grad_norm": 0.0885666087269783,
|
26059 |
+
"learning_rate": 8.488063660477452e-06,
|
26060 |
+
"loss": 0.0056,
|
26061 |
+
"step": 3706
|
26062 |
+
},
|
26063 |
+
{
|
26064 |
+
"epoch": 9.819867549668874,
|
26065 |
+
"grad_norm": 0.6458684206008911,
|
26066 |
+
"learning_rate": 8.355437665782494e-06,
|
26067 |
+
"loss": 0.0129,
|
26068 |
+
"step": 3707
|
26069 |
+
},
|
26070 |
+
{
|
26071 |
+
"epoch": 9.82251655629139,
|
26072 |
+
"grad_norm": 0.10896582156419754,
|
26073 |
+
"learning_rate": 8.222811671087533e-06,
|
26074 |
+
"loss": 0.0058,
|
26075 |
+
"step": 3708
|
26076 |
+
},
|
26077 |
+
{
|
26078 |
+
"epoch": 9.825165562913908,
|
26079 |
+
"grad_norm": 0.06155227869749069,
|
26080 |
+
"learning_rate": 8.090185676392575e-06,
|
26081 |
+
"loss": 0.0041,
|
26082 |
+
"step": 3709
|
26083 |
+
},
|
26084 |
+
{
|
26085 |
+
"epoch": 9.827814569536423,
|
26086 |
+
"grad_norm": 0.12445490807294846,
|
26087 |
+
"learning_rate": 7.957559681697613e-06,
|
26088 |
+
"loss": 0.0076,
|
26089 |
+
"step": 3710
|
26090 |
+
},
|
26091 |
+
{
|
26092 |
+
"epoch": 9.830463576158941,
|
26093 |
+
"grad_norm": 0.1290195882320404,
|
26094 |
+
"learning_rate": 7.824933687002652e-06,
|
26095 |
+
"loss": 0.0052,
|
26096 |
+
"step": 3711
|
26097 |
+
},
|
26098 |
+
{
|
26099 |
+
"epoch": 9.833112582781457,
|
26100 |
+
"grad_norm": 0.26849737763404846,
|
26101 |
+
"learning_rate": 7.692307692307694e-06,
|
26102 |
+
"loss": 0.0091,
|
26103 |
+
"step": 3712
|
26104 |
+
},
|
26105 |
+
{
|
26106 |
+
"epoch": 9.835761589403974,
|
26107 |
+
"grad_norm": 0.08209025114774704,
|
26108 |
+
"learning_rate": 7.559681697612732e-06,
|
26109 |
+
"loss": 0.0054,
|
26110 |
+
"step": 3713
|
26111 |
+
},
|
26112 |
+
{
|
26113 |
+
"epoch": 9.83841059602649,
|
26114 |
+
"grad_norm": 0.06356347352266312,
|
26115 |
+
"learning_rate": 7.427055702917772e-06,
|
26116 |
+
"loss": 0.0038,
|
26117 |
+
"step": 3714
|
26118 |
+
},
|
26119 |
+
{
|
26120 |
+
"epoch": 9.841059602649006,
|
26121 |
+
"grad_norm": 0.16247440874576569,
|
26122 |
+
"learning_rate": 7.294429708222812e-06,
|
26123 |
+
"loss": 0.0084,
|
26124 |
+
"step": 3715
|
26125 |
+
},
|
26126 |
+
{
|
26127 |
+
"epoch": 9.843708609271523,
|
26128 |
+
"grad_norm": 0.175484761595726,
|
26129 |
+
"learning_rate": 7.161803713527852e-06,
|
26130 |
+
"loss": 0.0071,
|
26131 |
+
"step": 3716
|
26132 |
+
},
|
26133 |
+
{
|
26134 |
+
"epoch": 9.84635761589404,
|
26135 |
+
"grad_norm": 0.16009952127933502,
|
26136 |
+
"learning_rate": 7.029177718832891e-06,
|
26137 |
+
"loss": 0.0054,
|
26138 |
+
"step": 3717
|
26139 |
+
},
|
26140 |
+
{
|
26141 |
+
"epoch": 9.849006622516557,
|
26142 |
+
"grad_norm": 0.824354887008667,
|
26143 |
+
"learning_rate": 6.896551724137931e-06,
|
26144 |
+
"loss": 0.0225,
|
26145 |
+
"step": 3718
|
26146 |
+
},
|
26147 |
+
{
|
26148 |
+
"epoch": 9.851655629139072,
|
26149 |
+
"grad_norm": 1.2063053846359253,
|
26150 |
+
"learning_rate": 6.763925729442971e-06,
|
26151 |
+
"loss": 0.0186,
|
26152 |
+
"step": 3719
|
26153 |
+
},
|
26154 |
+
{
|
26155 |
+
"epoch": 9.85430463576159,
|
26156 |
+
"grad_norm": 0.1102658361196518,
|
26157 |
+
"learning_rate": 6.631299734748011e-06,
|
26158 |
+
"loss": 0.0068,
|
26159 |
+
"step": 3720
|
26160 |
+
},
|
26161 |
+
{
|
26162 |
+
"epoch": 9.856953642384106,
|
26163 |
+
"grad_norm": 0.6607574820518494,
|
26164 |
+
"learning_rate": 6.49867374005305e-06,
|
26165 |
+
"loss": 0.0163,
|
26166 |
+
"step": 3721
|
26167 |
+
},
|
26168 |
+
{
|
26169 |
+
"epoch": 9.859602649006623,
|
26170 |
+
"grad_norm": 0.05733625218272209,
|
26171 |
+
"learning_rate": 6.36604774535809e-06,
|
26172 |
+
"loss": 0.0043,
|
26173 |
+
"step": 3722
|
26174 |
+
},
|
26175 |
+
{
|
26176 |
+
"epoch": 9.862251655629139,
|
26177 |
+
"grad_norm": 1.854579210281372,
|
26178 |
+
"learning_rate": 6.23342175066313e-06,
|
26179 |
+
"loss": 0.0532,
|
26180 |
+
"step": 3723
|
26181 |
+
},
|
26182 |
+
{
|
26183 |
+
"epoch": 9.864900662251655,
|
26184 |
+
"grad_norm": 0.4233485460281372,
|
26185 |
+
"learning_rate": 6.10079575596817e-06,
|
26186 |
+
"loss": 0.0096,
|
26187 |
+
"step": 3724
|
26188 |
+
},
|
26189 |
+
{
|
26190 |
+
"epoch": 9.867549668874172,
|
26191 |
+
"grad_norm": 0.20195017755031586,
|
26192 |
+
"learning_rate": 5.968169761273209e-06,
|
26193 |
+
"loss": 0.0076,
|
26194 |
+
"step": 3725
|
26195 |
+
},
|
26196 |
+
{
|
26197 |
+
"epoch": 9.870198675496688,
|
26198 |
+
"grad_norm": 0.07918886095285416,
|
26199 |
+
"learning_rate": 5.835543766578249e-06,
|
26200 |
+
"loss": 0.0055,
|
26201 |
+
"step": 3726
|
26202 |
+
},
|
26203 |
+
{
|
26204 |
+
"epoch": 9.872847682119206,
|
26205 |
+
"grad_norm": 0.13845008611679077,
|
26206 |
+
"learning_rate": 5.70291777188329e-06,
|
26207 |
+
"loss": 0.0085,
|
26208 |
+
"step": 3727
|
26209 |
+
},
|
26210 |
+
{
|
26211 |
+
"epoch": 9.875496688741721,
|
26212 |
+
"grad_norm": 3.525977611541748,
|
26213 |
+
"learning_rate": 5.570291777188329e-06,
|
26214 |
+
"loss": 0.074,
|
26215 |
+
"step": 3728
|
26216 |
+
},
|
26217 |
+
{
|
26218 |
+
"epoch": 9.878145695364239,
|
26219 |
+
"grad_norm": 1.5265491008758545,
|
26220 |
+
"learning_rate": 5.437665782493369e-06,
|
26221 |
+
"loss": 0.048,
|
26222 |
+
"step": 3729
|
26223 |
+
},
|
26224 |
+
{
|
26225 |
+
"epoch": 9.880794701986755,
|
26226 |
+
"grad_norm": 1.900270700454712,
|
26227 |
+
"learning_rate": 5.305039787798409e-06,
|
26228 |
+
"loss": 0.0921,
|
26229 |
+
"step": 3730
|
26230 |
+
},
|
26231 |
+
{
|
26232 |
+
"epoch": 9.883443708609272,
|
26233 |
+
"grad_norm": 0.14628323912620544,
|
26234 |
+
"learning_rate": 5.172413793103448e-06,
|
26235 |
+
"loss": 0.0082,
|
26236 |
+
"step": 3731
|
26237 |
+
},
|
26238 |
+
{
|
26239 |
+
"epoch": 9.886092715231788,
|
26240 |
+
"grad_norm": 0.0840618833899498,
|
26241 |
+
"learning_rate": 5.039787798408488e-06,
|
26242 |
+
"loss": 0.005,
|
26243 |
+
"step": 3732
|
26244 |
+
},
|
26245 |
+
{
|
26246 |
+
"epoch": 9.888741721854304,
|
26247 |
+
"grad_norm": 0.0762125700712204,
|
26248 |
+
"learning_rate": 4.9071618037135285e-06,
|
26249 |
+
"loss": 0.0047,
|
26250 |
+
"step": 3733
|
26251 |
+
},
|
26252 |
+
{
|
26253 |
+
"epoch": 9.891390728476821,
|
26254 |
+
"grad_norm": 0.11019699275493622,
|
26255 |
+
"learning_rate": 4.774535809018567e-06,
|
26256 |
+
"loss": 0.0062,
|
26257 |
+
"step": 3734
|
26258 |
+
},
|
26259 |
+
{
|
26260 |
+
"epoch": 9.894039735099337,
|
26261 |
+
"grad_norm": 0.12956224381923676,
|
26262 |
+
"learning_rate": 4.641909814323607e-06,
|
26263 |
+
"loss": 0.0065,
|
26264 |
+
"step": 3735
|
26265 |
+
},
|
26266 |
+
{
|
26267 |
+
"epoch": 9.896688741721855,
|
26268 |
+
"grad_norm": 0.14278213679790497,
|
26269 |
+
"learning_rate": 4.5092838196286476e-06,
|
26270 |
+
"loss": 0.01,
|
26271 |
+
"step": 3736
|
26272 |
+
},
|
26273 |
+
{
|
26274 |
+
"epoch": 9.89933774834437,
|
26275 |
+
"grad_norm": 0.14741727709770203,
|
26276 |
+
"learning_rate": 4.376657824933687e-06,
|
26277 |
+
"loss": 0.0059,
|
26278 |
+
"step": 3737
|
26279 |
+
},
|
26280 |
+
{
|
26281 |
+
"epoch": 9.901986754966888,
|
26282 |
+
"grad_norm": 0.11628686636686325,
|
26283 |
+
"learning_rate": 4.244031830238726e-06,
|
26284 |
+
"loss": 0.007,
|
26285 |
+
"step": 3738
|
26286 |
+
},
|
26287 |
+
{
|
26288 |
+
"epoch": 9.904635761589404,
|
26289 |
+
"grad_norm": 0.080344058573246,
|
26290 |
+
"learning_rate": 4.111405835543767e-06,
|
26291 |
+
"loss": 0.0059,
|
26292 |
+
"step": 3739
|
26293 |
+
},
|
26294 |
+
{
|
26295 |
+
"epoch": 9.90728476821192,
|
26296 |
+
"grad_norm": 0.18567059934139252,
|
26297 |
+
"learning_rate": 3.978779840848806e-06,
|
26298 |
+
"loss": 0.0078,
|
26299 |
+
"step": 3740
|
26300 |
+
},
|
26301 |
+
{
|
26302 |
+
"epoch": 9.909933774834437,
|
26303 |
+
"grad_norm": 0.6573330163955688,
|
26304 |
+
"learning_rate": 3.846153846153847e-06,
|
26305 |
+
"loss": 0.0089,
|
26306 |
+
"step": 3741
|
26307 |
+
},
|
26308 |
+
{
|
26309 |
+
"epoch": 9.912582781456953,
|
26310 |
+
"grad_norm": 0.0857321098446846,
|
26311 |
+
"learning_rate": 3.713527851458886e-06,
|
26312 |
+
"loss": 0.0054,
|
26313 |
+
"step": 3742
|
26314 |
+
},
|
26315 |
+
{
|
26316 |
+
"epoch": 9.91523178807947,
|
26317 |
+
"grad_norm": 0.16797839105129242,
|
26318 |
+
"learning_rate": 3.580901856763926e-06,
|
26319 |
+
"loss": 0.0093,
|
26320 |
+
"step": 3743
|
26321 |
+
},
|
26322 |
+
{
|
26323 |
+
"epoch": 9.917880794701986,
|
26324 |
+
"grad_norm": 0.06998272985219955,
|
26325 |
+
"learning_rate": 3.4482758620689654e-06,
|
26326 |
+
"loss": 0.0047,
|
26327 |
+
"step": 3744
|
26328 |
+
},
|
26329 |
+
{
|
26330 |
+
"epoch": 9.920529801324504,
|
26331 |
+
"grad_norm": 0.0836014598608017,
|
26332 |
+
"learning_rate": 3.3156498673740055e-06,
|
26333 |
+
"loss": 0.0046,
|
26334 |
+
"step": 3745
|
26335 |
+
},
|
26336 |
+
{
|
26337 |
+
"epoch": 9.92317880794702,
|
26338 |
+
"grad_norm": 0.141579270362854,
|
26339 |
+
"learning_rate": 3.183023872679045e-06,
|
26340 |
+
"loss": 0.0083,
|
26341 |
+
"step": 3746
|
26342 |
+
},
|
26343 |
+
{
|
26344 |
+
"epoch": 9.925827814569537,
|
26345 |
+
"grad_norm": 0.0892159640789032,
|
26346 |
+
"learning_rate": 3.050397877984085e-06,
|
26347 |
+
"loss": 0.0048,
|
26348 |
+
"step": 3747
|
26349 |
+
},
|
26350 |
+
{
|
26351 |
+
"epoch": 9.928476821192053,
|
26352 |
+
"grad_norm": 0.09989528357982635,
|
26353 |
+
"learning_rate": 2.9177718832891245e-06,
|
26354 |
+
"loss": 0.005,
|
26355 |
+
"step": 3748
|
26356 |
+
},
|
26357 |
+
{
|
26358 |
+
"epoch": 9.931125827814569,
|
26359 |
+
"grad_norm": 0.13360846042633057,
|
26360 |
+
"learning_rate": 2.7851458885941646e-06,
|
26361 |
+
"loss": 0.0064,
|
26362 |
+
"step": 3749
|
26363 |
+
},
|
26364 |
+
{
|
26365 |
+
"epoch": 9.933774834437086,
|
26366 |
+
"grad_norm": 0.19383981823921204,
|
26367 |
+
"learning_rate": 2.6525198938992043e-06,
|
26368 |
+
"loss": 0.007,
|
26369 |
+
"step": 3750
|
26370 |
+
},
|
26371 |
+
{
|
26372 |
+
"epoch": 9.936423841059602,
|
26373 |
+
"grad_norm": 0.19157814979553223,
|
26374 |
+
"learning_rate": 2.519893899204244e-06,
|
26375 |
+
"loss": 0.0055,
|
26376 |
+
"step": 3751
|
26377 |
+
},
|
26378 |
+
{
|
26379 |
+
"epoch": 9.93907284768212,
|
26380 |
+
"grad_norm": 0.06832128018140793,
|
26381 |
+
"learning_rate": 2.3872679045092837e-06,
|
26382 |
+
"loss": 0.0053,
|
26383 |
+
"step": 3752
|
26384 |
+
},
|
26385 |
+
{
|
26386 |
+
"epoch": 9.941721854304635,
|
26387 |
+
"grad_norm": 0.5007702708244324,
|
26388 |
+
"learning_rate": 2.2546419098143238e-06,
|
26389 |
+
"loss": 0.0115,
|
26390 |
+
"step": 3753
|
26391 |
+
},
|
26392 |
+
{
|
26393 |
+
"epoch": 9.944370860927153,
|
26394 |
+
"grad_norm": 0.18187777698040009,
|
26395 |
+
"learning_rate": 2.122015915119363e-06,
|
26396 |
+
"loss": 0.0075,
|
26397 |
+
"step": 3754
|
26398 |
+
},
|
26399 |
+
{
|
26400 |
+
"epoch": 9.947019867549669,
|
26401 |
+
"grad_norm": 0.1455528289079666,
|
26402 |
+
"learning_rate": 1.989389920424403e-06,
|
26403 |
+
"loss": 0.0075,
|
26404 |
+
"step": 3755
|
26405 |
+
},
|
26406 |
+
{
|
26407 |
+
"epoch": 9.949668874172186,
|
26408 |
+
"grad_norm": 0.11611378937959671,
|
26409 |
+
"learning_rate": 1.856763925729443e-06,
|
26410 |
+
"loss": 0.0058,
|
26411 |
+
"step": 3756
|
26412 |
+
},
|
26413 |
+
{
|
26414 |
+
"epoch": 9.952317880794702,
|
26415 |
+
"grad_norm": 0.08630287647247314,
|
26416 |
+
"learning_rate": 1.7241379310344827e-06,
|
26417 |
+
"loss": 0.0054,
|
26418 |
+
"step": 3757
|
26419 |
+
},
|
26420 |
+
{
|
26421 |
+
"epoch": 9.954966887417218,
|
26422 |
+
"grad_norm": 0.37754523754119873,
|
26423 |
+
"learning_rate": 1.5915119363395226e-06,
|
26424 |
+
"loss": 0.0111,
|
26425 |
+
"step": 3758
|
26426 |
+
},
|
26427 |
+
{
|
26428 |
+
"epoch": 9.957615894039735,
|
26429 |
+
"grad_norm": 2.650604248046875,
|
26430 |
+
"learning_rate": 1.4588859416445623e-06,
|
26431 |
+
"loss": 0.0598,
|
26432 |
+
"step": 3759
|
26433 |
+
},
|
26434 |
+
{
|
26435 |
+
"epoch": 9.960264900662251,
|
26436 |
+
"grad_norm": 1.2378737926483154,
|
26437 |
+
"learning_rate": 1.3262599469496022e-06,
|
26438 |
+
"loss": 0.0196,
|
26439 |
+
"step": 3760
|
26440 |
+
},
|
26441 |
+
{
|
26442 |
+
"epoch": 9.962913907284769,
|
26443 |
+
"grad_norm": 0.4178251624107361,
|
26444 |
+
"learning_rate": 1.1936339522546418e-06,
|
26445 |
+
"loss": 0.0102,
|
26446 |
+
"step": 3761
|
26447 |
+
},
|
26448 |
+
{
|
26449 |
+
"epoch": 9.965562913907284,
|
26450 |
+
"grad_norm": 0.5158873796463013,
|
26451 |
+
"learning_rate": 1.0610079575596815e-06,
|
26452 |
+
"loss": 0.0058,
|
26453 |
+
"step": 3762
|
26454 |
+
},
|
26455 |
+
{
|
26456 |
+
"epoch": 9.968211920529802,
|
26457 |
+
"grad_norm": 0.0933571457862854,
|
26458 |
+
"learning_rate": 9.283819628647215e-07,
|
26459 |
+
"loss": 0.006,
|
26460 |
+
"step": 3763
|
26461 |
+
},
|
26462 |
+
{
|
26463 |
+
"epoch": 9.970860927152318,
|
26464 |
+
"grad_norm": 0.207793727517128,
|
26465 |
+
"learning_rate": 7.957559681697613e-07,
|
26466 |
+
"loss": 0.0098,
|
26467 |
+
"step": 3764
|
26468 |
+
},
|
26469 |
+
{
|
26470 |
+
"epoch": 9.973509933774835,
|
26471 |
+
"grad_norm": 0.11516987532377243,
|
26472 |
+
"learning_rate": 6.631299734748011e-07,
|
26473 |
+
"loss": 0.0063,
|
26474 |
+
"step": 3765
|
26475 |
+
},
|
26476 |
+
{
|
26477 |
+
"epoch": 9.976158940397351,
|
26478 |
+
"grad_norm": 4.050484657287598,
|
26479 |
+
"learning_rate": 5.305039787798408e-07,
|
26480 |
+
"loss": 0.0562,
|
26481 |
+
"step": 3766
|
26482 |
+
},
|
26483 |
+
{
|
26484 |
+
"epoch": 9.978807947019867,
|
26485 |
+
"grad_norm": 0.06329333037137985,
|
26486 |
+
"learning_rate": 3.9787798408488065e-07,
|
26487 |
+
"loss": 0.0044,
|
26488 |
+
"step": 3767
|
26489 |
+
},
|
26490 |
+
{
|
26491 |
+
"epoch": 9.981456953642384,
|
26492 |
+
"grad_norm": 0.5716582536697388,
|
26493 |
+
"learning_rate": 2.652519893899204e-07,
|
26494 |
+
"loss": 0.0068,
|
26495 |
+
"step": 3768
|
26496 |
+
},
|
26497 |
+
{
|
26498 |
+
"epoch": 9.9841059602649,
|
26499 |
+
"grad_norm": 0.15470373630523682,
|
26500 |
+
"learning_rate": 1.326259946949602e-07,
|
26501 |
+
"loss": 0.0072,
|
26502 |
+
"step": 3769
|
26503 |
+
},
|
26504 |
+
{
|
26505 |
+
"epoch": 9.986754966887418,
|
26506 |
+
"grad_norm": 0.05631321296095848,
|
26507 |
+
"learning_rate": 0.0,
|
26508 |
+
"loss": 0.004,
|
26509 |
+
"step": 3770
|
26510 |
}
|
26511 |
],
|
26512 |
"logging_steps": 1,
|
|
|
26521 |
"should_evaluate": false,
|
26522 |
"should_log": false,
|
26523 |
"should_save": true,
|
26524 |
+
"should_training_stop": true
|
26525 |
},
|
26526 |
"attributes": {}
|
26527 |
}
|
26528 |
},
|
26529 |
+
"total_flos": 2.1337935347515392e+20,
|
26530 |
"train_batch_size": 4,
|
26531 |
"trial_name": null,
|
26532 |
"trial_params": null
|