amazingvince
commited on
Commit
•
b5c9cfb
1
Parent(s):
b990fda
Upload folder using huggingface_hub
Browse files- latest +1 -1
- model-00001-of-00003.safetensors +1 -1
- model-00002-of-00003.safetensors +1 -1
- model-00003-of-00003.safetensors +1 -1
- trainer_state.json +1939 -3
latest
CHANGED
@@ -1 +1 @@
|
|
1 |
-
|
|
|
1 |
+
global_step22800
|
model-00001-of-00003.safetensors
CHANGED
@@ -1,3 +1,3 @@
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
-
oid sha256:
|
3 |
size 4944210912
|
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:d91441d23aa02635dee209f09f6e93410b26201584e9f1bce97ab412c82b1fc3
|
3 |
size 4944210912
|
model-00002-of-00003.safetensors
CHANGED
@@ -1,3 +1,3 @@
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
-
oid sha256:
|
3 |
size 4999819336
|
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:4fc539e11e01c3090c81c8f8c3950abb87a747cdcf9f383afbc11d521b923257
|
3 |
size 4999819336
|
model-00003-of-00003.safetensors
CHANGED
@@ -1,3 +1,3 @@
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
-
oid sha256:
|
3 |
size 4541564920
|
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:04ec6cd21adbfddc4a5169e1fd7db882972c50ca419d233852ce3302a3496cf6
|
3 |
size 4541564920
|
trainer_state.json
CHANGED
@@ -1,9 +1,9 @@
|
|
1 |
{
|
2 |
"best_metric": null,
|
3 |
"best_model_checkpoint": null,
|
4 |
-
"epoch": 0.
|
5 |
"eval_steps": 800,
|
6 |
-
"global_step":
|
7 |
"is_hyper_param_search": false,
|
8 |
"is_local_process_zero": true,
|
9 |
"is_world_process_zero": true,
|
@@ -25661,6 +25661,1942 @@
|
|
25661 |
"learning_rate": 9.555310253116467e-08,
|
25662 |
"loss": 0.6784,
|
25663 |
"step": 21200
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
25664 |
}
|
25665 |
],
|
25666 |
"logging_steps": 5,
|
@@ -25668,7 +27604,7 @@
|
|
25668 |
"num_input_tokens_seen": 0,
|
25669 |
"num_train_epochs": 1,
|
25670 |
"save_steps": 400,
|
25671 |
-
"total_flos":
|
25672 |
"trial_name": null,
|
25673 |
"trial_params": null
|
25674 |
}
|
|
|
1 |
{
|
2 |
"best_metric": null,
|
3 |
"best_model_checkpoint": null,
|
4 |
+
"epoch": 0.926099870528801,
|
5 |
"eval_steps": 800,
|
6 |
+
"global_step": 22800,
|
7 |
"is_hyper_param_search": false,
|
8 |
"is_local_process_zero": true,
|
9 |
"is_world_process_zero": true,
|
|
|
25661 |
"learning_rate": 9.555310253116467e-08,
|
25662 |
"loss": 0.6784,
|
25663 |
"step": 21200
|
25664 |
+
},
|
25665 |
+
{
|
25666 |
+
"epoch": 0.86,
|
25667 |
+
"learning_rate": 9.527835191329392e-08,
|
25668 |
+
"loss": 0.6565,
|
25669 |
+
"step": 21205
|
25670 |
+
},
|
25671 |
+
{
|
25672 |
+
"epoch": 0.86,
|
25673 |
+
"learning_rate": 9.500397710892816e-08,
|
25674 |
+
"loss": 0.6305,
|
25675 |
+
"step": 21210
|
25676 |
+
},
|
25677 |
+
{
|
25678 |
+
"epoch": 0.86,
|
25679 |
+
"learning_rate": 9.472997823203999e-08,
|
25680 |
+
"loss": 0.6524,
|
25681 |
+
"step": 21215
|
25682 |
+
},
|
25683 |
+
{
|
25684 |
+
"epoch": 0.86,
|
25685 |
+
"learning_rate": 9.445635539644615e-08,
|
25686 |
+
"loss": 0.6717,
|
25687 |
+
"step": 21220
|
25688 |
+
},
|
25689 |
+
{
|
25690 |
+
"epoch": 0.86,
|
25691 |
+
"learning_rate": 9.418310871580737e-08,
|
25692 |
+
"loss": 0.6429,
|
25693 |
+
"step": 21225
|
25694 |
+
},
|
25695 |
+
{
|
25696 |
+
"epoch": 0.86,
|
25697 |
+
"learning_rate": 9.391023830362799e-08,
|
25698 |
+
"loss": 0.6434,
|
25699 |
+
"step": 21230
|
25700 |
+
},
|
25701 |
+
{
|
25702 |
+
"epoch": 0.86,
|
25703 |
+
"learning_rate": 9.363774427325577e-08,
|
25704 |
+
"loss": 0.6648,
|
25705 |
+
"step": 21235
|
25706 |
+
},
|
25707 |
+
{
|
25708 |
+
"epoch": 0.86,
|
25709 |
+
"learning_rate": 9.336562673788228e-08,
|
25710 |
+
"loss": 0.636,
|
25711 |
+
"step": 21240
|
25712 |
+
},
|
25713 |
+
{
|
25714 |
+
"epoch": 0.86,
|
25715 |
+
"learning_rate": 9.309388581054322e-08,
|
25716 |
+
"loss": 0.6771,
|
25717 |
+
"step": 21245
|
25718 |
+
},
|
25719 |
+
{
|
25720 |
+
"epoch": 0.86,
|
25721 |
+
"learning_rate": 9.282252160411719e-08,
|
25722 |
+
"loss": 0.6502,
|
25723 |
+
"step": 21250
|
25724 |
+
},
|
25725 |
+
{
|
25726 |
+
"epoch": 0.86,
|
25727 |
+
"learning_rate": 9.255153423132622e-08,
|
25728 |
+
"loss": 0.6437,
|
25729 |
+
"step": 21255
|
25730 |
+
},
|
25731 |
+
{
|
25732 |
+
"epoch": 0.86,
|
25733 |
+
"learning_rate": 9.22809238047365e-08,
|
25734 |
+
"loss": 0.6704,
|
25735 |
+
"step": 21260
|
25736 |
+
},
|
25737 |
+
{
|
25738 |
+
"epoch": 0.86,
|
25739 |
+
"learning_rate": 9.201069043675724e-08,
|
25740 |
+
"loss": 0.6404,
|
25741 |
+
"step": 21265
|
25742 |
+
},
|
25743 |
+
{
|
25744 |
+
"epoch": 0.86,
|
25745 |
+
"learning_rate": 9.174083423964062e-08,
|
25746 |
+
"loss": 0.6834,
|
25747 |
+
"step": 21270
|
25748 |
+
},
|
25749 |
+
{
|
25750 |
+
"epoch": 0.86,
|
25751 |
+
"learning_rate": 9.147135532548311e-08,
|
25752 |
+
"loss": 0.6516,
|
25753 |
+
"step": 21275
|
25754 |
+
},
|
25755 |
+
{
|
25756 |
+
"epoch": 0.86,
|
25757 |
+
"learning_rate": 9.120225380622371e-08,
|
25758 |
+
"loss": 0.671,
|
25759 |
+
"step": 21280
|
25760 |
+
},
|
25761 |
+
{
|
25762 |
+
"epoch": 0.86,
|
25763 |
+
"learning_rate": 9.093352979364466e-08,
|
25764 |
+
"loss": 0.6583,
|
25765 |
+
"step": 21285
|
25766 |
+
},
|
25767 |
+
{
|
25768 |
+
"epoch": 0.86,
|
25769 |
+
"learning_rate": 9.066518339937157e-08,
|
25770 |
+
"loss": 0.6467,
|
25771 |
+
"step": 21290
|
25772 |
+
},
|
25773 |
+
{
|
25774 |
+
"epoch": 0.86,
|
25775 |
+
"learning_rate": 9.03972147348735e-08,
|
25776 |
+
"loss": 0.5999,
|
25777 |
+
"step": 21295
|
25778 |
+
},
|
25779 |
+
{
|
25780 |
+
"epoch": 0.87,
|
25781 |
+
"learning_rate": 9.012962391146217e-08,
|
25782 |
+
"loss": 0.6589,
|
25783 |
+
"step": 21300
|
25784 |
+
},
|
25785 |
+
{
|
25786 |
+
"epoch": 0.87,
|
25787 |
+
"learning_rate": 8.986241104029224e-08,
|
25788 |
+
"loss": 0.647,
|
25789 |
+
"step": 21305
|
25790 |
+
},
|
25791 |
+
{
|
25792 |
+
"epoch": 0.87,
|
25793 |
+
"learning_rate": 8.959557623236202e-08,
|
25794 |
+
"loss": 0.6199,
|
25795 |
+
"step": 21310
|
25796 |
+
},
|
25797 |
+
{
|
25798 |
+
"epoch": 0.87,
|
25799 |
+
"learning_rate": 8.93291195985122e-08,
|
25800 |
+
"loss": 0.6762,
|
25801 |
+
"step": 21315
|
25802 |
+
},
|
25803 |
+
{
|
25804 |
+
"epoch": 0.87,
|
25805 |
+
"learning_rate": 8.906304124942632e-08,
|
25806 |
+
"loss": 0.6446,
|
25807 |
+
"step": 21320
|
25808 |
+
},
|
25809 |
+
{
|
25810 |
+
"epoch": 0.87,
|
25811 |
+
"learning_rate": 8.879734129563132e-08,
|
25812 |
+
"loss": 0.6504,
|
25813 |
+
"step": 21325
|
25814 |
+
},
|
25815 |
+
{
|
25816 |
+
"epoch": 0.87,
|
25817 |
+
"learning_rate": 8.853201984749658e-08,
|
25818 |
+
"loss": 0.6898,
|
25819 |
+
"step": 21330
|
25820 |
+
},
|
25821 |
+
{
|
25822 |
+
"epoch": 0.87,
|
25823 |
+
"learning_rate": 8.826707701523428e-08,
|
25824 |
+
"loss": 0.6575,
|
25825 |
+
"step": 21335
|
25826 |
+
},
|
25827 |
+
{
|
25828 |
+
"epoch": 0.87,
|
25829 |
+
"learning_rate": 8.800251290889927e-08,
|
25830 |
+
"loss": 0.6208,
|
25831 |
+
"step": 21340
|
25832 |
+
},
|
25833 |
+
{
|
25834 |
+
"epoch": 0.87,
|
25835 |
+
"learning_rate": 8.773832763838939e-08,
|
25836 |
+
"loss": 0.6662,
|
25837 |
+
"step": 21345
|
25838 |
+
},
|
25839 |
+
{
|
25840 |
+
"epoch": 0.87,
|
25841 |
+
"learning_rate": 8.74745213134448e-08,
|
25842 |
+
"loss": 0.6218,
|
25843 |
+
"step": 21350
|
25844 |
+
},
|
25845 |
+
{
|
25846 |
+
"epoch": 0.87,
|
25847 |
+
"learning_rate": 8.721109404364812e-08,
|
25848 |
+
"loss": 0.6747,
|
25849 |
+
"step": 21355
|
25850 |
+
},
|
25851 |
+
{
|
25852 |
+
"epoch": 0.87,
|
25853 |
+
"learning_rate": 8.694804593842519e-08,
|
25854 |
+
"loss": 0.693,
|
25855 |
+
"step": 21360
|
25856 |
+
},
|
25857 |
+
{
|
25858 |
+
"epoch": 0.87,
|
25859 |
+
"learning_rate": 8.668537710704371e-08,
|
25860 |
+
"loss": 0.6482,
|
25861 |
+
"step": 21365
|
25862 |
+
},
|
25863 |
+
{
|
25864 |
+
"epoch": 0.87,
|
25865 |
+
"learning_rate": 8.642308765861406e-08,
|
25866 |
+
"loss": 0.6946,
|
25867 |
+
"step": 21370
|
25868 |
+
},
|
25869 |
+
{
|
25870 |
+
"epoch": 0.87,
|
25871 |
+
"learning_rate": 8.616117770208864e-08,
|
25872 |
+
"loss": 0.655,
|
25873 |
+
"step": 21375
|
25874 |
+
},
|
25875 |
+
{
|
25876 |
+
"epoch": 0.87,
|
25877 |
+
"learning_rate": 8.58996473462631e-08,
|
25878 |
+
"loss": 0.6549,
|
25879 |
+
"step": 21380
|
25880 |
+
},
|
25881 |
+
{
|
25882 |
+
"epoch": 0.87,
|
25883 |
+
"learning_rate": 8.563849669977463e-08,
|
25884 |
+
"loss": 0.6444,
|
25885 |
+
"step": 21385
|
25886 |
+
},
|
25887 |
+
{
|
25888 |
+
"epoch": 0.87,
|
25889 |
+
"learning_rate": 8.537772587110281e-08,
|
25890 |
+
"loss": 0.646,
|
25891 |
+
"step": 21390
|
25892 |
+
},
|
25893 |
+
{
|
25894 |
+
"epoch": 0.87,
|
25895 |
+
"learning_rate": 8.511733496856999e-08,
|
25896 |
+
"loss": 0.6792,
|
25897 |
+
"step": 21395
|
25898 |
+
},
|
25899 |
+
{
|
25900 |
+
"epoch": 0.87,
|
25901 |
+
"learning_rate": 8.485732410033985e-08,
|
25902 |
+
"loss": 0.6037,
|
25903 |
+
"step": 21400
|
25904 |
+
},
|
25905 |
+
{
|
25906 |
+
"epoch": 0.87,
|
25907 |
+
"learning_rate": 8.459769337441868e-08,
|
25908 |
+
"loss": 0.6055,
|
25909 |
+
"step": 21405
|
25910 |
+
},
|
25911 |
+
{
|
25912 |
+
"epoch": 0.87,
|
25913 |
+
"learning_rate": 8.433844289865521e-08,
|
25914 |
+
"loss": 0.6427,
|
25915 |
+
"step": 21410
|
25916 |
+
},
|
25917 |
+
{
|
25918 |
+
"epoch": 0.87,
|
25919 |
+
"learning_rate": 8.407957278073952e-08,
|
25920 |
+
"loss": 0.6628,
|
25921 |
+
"step": 21415
|
25922 |
+
},
|
25923 |
+
{
|
25924 |
+
"epoch": 0.87,
|
25925 |
+
"learning_rate": 8.382108312820401e-08,
|
25926 |
+
"loss": 0.6569,
|
25927 |
+
"step": 21420
|
25928 |
+
},
|
25929 |
+
{
|
25930 |
+
"epoch": 0.87,
|
25931 |
+
"learning_rate": 8.356297404842305e-08,
|
25932 |
+
"loss": 0.659,
|
25933 |
+
"step": 21425
|
25934 |
+
},
|
25935 |
+
{
|
25936 |
+
"epoch": 0.87,
|
25937 |
+
"learning_rate": 8.330524564861297e-08,
|
25938 |
+
"loss": 0.6279,
|
25939 |
+
"step": 21430
|
25940 |
+
},
|
25941 |
+
{
|
25942 |
+
"epoch": 0.87,
|
25943 |
+
"learning_rate": 8.304789803583201e-08,
|
25944 |
+
"loss": 0.6281,
|
25945 |
+
"step": 21435
|
25946 |
+
},
|
25947 |
+
{
|
25948 |
+
"epoch": 0.87,
|
25949 |
+
"learning_rate": 8.279093131697968e-08,
|
25950 |
+
"loss": 0.6327,
|
25951 |
+
"step": 21440
|
25952 |
+
},
|
25953 |
+
{
|
25954 |
+
"epoch": 0.87,
|
25955 |
+
"learning_rate": 8.253434559879835e-08,
|
25956 |
+
"loss": 0.6402,
|
25957 |
+
"step": 21445
|
25958 |
+
},
|
25959 |
+
{
|
25960 |
+
"epoch": 0.87,
|
25961 |
+
"learning_rate": 8.227814098787111e-08,
|
25962 |
+
"loss": 0.6601,
|
25963 |
+
"step": 21450
|
25964 |
+
},
|
25965 |
+
{
|
25966 |
+
"epoch": 0.87,
|
25967 |
+
"learning_rate": 8.202231759062305e-08,
|
25968 |
+
"loss": 0.6355,
|
25969 |
+
"step": 21455
|
25970 |
+
},
|
25971 |
+
{
|
25972 |
+
"epoch": 0.87,
|
25973 |
+
"learning_rate": 8.17668755133214e-08,
|
25974 |
+
"loss": 0.663,
|
25975 |
+
"step": 21460
|
25976 |
+
},
|
25977 |
+
{
|
25978 |
+
"epoch": 0.87,
|
25979 |
+
"learning_rate": 8.151181486207414e-08,
|
25980 |
+
"loss": 0.6715,
|
25981 |
+
"step": 21465
|
25982 |
+
},
|
25983 |
+
{
|
25984 |
+
"epoch": 0.87,
|
25985 |
+
"learning_rate": 8.125713574283155e-08,
|
25986 |
+
"loss": 0.6456,
|
25987 |
+
"step": 21470
|
25988 |
+
},
|
25989 |
+
{
|
25990 |
+
"epoch": 0.87,
|
25991 |
+
"learning_rate": 8.100283826138477e-08,
|
25992 |
+
"loss": 0.6243,
|
25993 |
+
"step": 21475
|
25994 |
+
},
|
25995 |
+
{
|
25996 |
+
"epoch": 0.87,
|
25997 |
+
"learning_rate": 8.074892252336718e-08,
|
25998 |
+
"loss": 0.6273,
|
25999 |
+
"step": 21480
|
26000 |
+
},
|
26001 |
+
{
|
26002 |
+
"epoch": 0.87,
|
26003 |
+
"learning_rate": 8.049538863425298e-08,
|
26004 |
+
"loss": 0.6379,
|
26005 |
+
"step": 21485
|
26006 |
+
},
|
26007 |
+
{
|
26008 |
+
"epoch": 0.87,
|
26009 |
+
"learning_rate": 8.024223669935782e-08,
|
26010 |
+
"loss": 0.6303,
|
26011 |
+
"step": 21490
|
26012 |
+
},
|
26013 |
+
{
|
26014 |
+
"epoch": 0.87,
|
26015 |
+
"learning_rate": 7.9989466823839e-08,
|
26016 |
+
"loss": 0.6826,
|
26017 |
+
"step": 21495
|
26018 |
+
},
|
26019 |
+
{
|
26020 |
+
"epoch": 0.87,
|
26021 |
+
"learning_rate": 7.973707911269489e-08,
|
26022 |
+
"loss": 0.6236,
|
26023 |
+
"step": 21500
|
26024 |
+
},
|
26025 |
+
{
|
26026 |
+
"epoch": 0.87,
|
26027 |
+
"learning_rate": 7.948507367076518e-08,
|
26028 |
+
"loss": 0.6341,
|
26029 |
+
"step": 21505
|
26030 |
+
},
|
26031 |
+
{
|
26032 |
+
"epoch": 0.87,
|
26033 |
+
"learning_rate": 7.923345060273046e-08,
|
26034 |
+
"loss": 0.6677,
|
26035 |
+
"step": 21510
|
26036 |
+
},
|
26037 |
+
{
|
26038 |
+
"epoch": 0.87,
|
26039 |
+
"learning_rate": 7.898221001311312e-08,
|
26040 |
+
"loss": 0.6299,
|
26041 |
+
"step": 21515
|
26042 |
+
},
|
26043 |
+
{
|
26044 |
+
"epoch": 0.87,
|
26045 |
+
"learning_rate": 7.873135200627623e-08,
|
26046 |
+
"loss": 0.6272,
|
26047 |
+
"step": 21520
|
26048 |
+
},
|
26049 |
+
{
|
26050 |
+
"epoch": 0.87,
|
26051 |
+
"learning_rate": 7.848087668642377e-08,
|
26052 |
+
"loss": 0.6455,
|
26053 |
+
"step": 21525
|
26054 |
+
},
|
26055 |
+
{
|
26056 |
+
"epoch": 0.87,
|
26057 |
+
"learning_rate": 7.823078415760143e-08,
|
26058 |
+
"loss": 0.6406,
|
26059 |
+
"step": 21530
|
26060 |
+
},
|
26061 |
+
{
|
26062 |
+
"epoch": 0.87,
|
26063 |
+
"learning_rate": 7.798107452369517e-08,
|
26064 |
+
"loss": 0.7099,
|
26065 |
+
"step": 21535
|
26066 |
+
},
|
26067 |
+
{
|
26068 |
+
"epoch": 0.87,
|
26069 |
+
"learning_rate": 7.773174788843218e-08,
|
26070 |
+
"loss": 0.6831,
|
26071 |
+
"step": 21540
|
26072 |
+
},
|
26073 |
+
{
|
26074 |
+
"epoch": 0.88,
|
26075 |
+
"learning_rate": 7.74828043553808e-08,
|
26076 |
+
"loss": 0.6205,
|
26077 |
+
"step": 21545
|
26078 |
+
},
|
26079 |
+
{
|
26080 |
+
"epoch": 0.88,
|
26081 |
+
"learning_rate": 7.723424402794998e-08,
|
26082 |
+
"loss": 0.649,
|
26083 |
+
"step": 21550
|
26084 |
+
},
|
26085 |
+
{
|
26086 |
+
"epoch": 0.88,
|
26087 |
+
"learning_rate": 7.698606700938936e-08,
|
26088 |
+
"loss": 0.6636,
|
26089 |
+
"step": 21555
|
26090 |
+
},
|
26091 |
+
{
|
26092 |
+
"epoch": 0.88,
|
26093 |
+
"learning_rate": 7.673827340278937e-08,
|
26094 |
+
"loss": 0.6314,
|
26095 |
+
"step": 21560
|
26096 |
+
},
|
26097 |
+
{
|
26098 |
+
"epoch": 0.88,
|
26099 |
+
"learning_rate": 7.649086331108178e-08,
|
26100 |
+
"loss": 0.6969,
|
26101 |
+
"step": 21565
|
26102 |
+
},
|
26103 |
+
{
|
26104 |
+
"epoch": 0.88,
|
26105 |
+
"learning_rate": 7.624383683703839e-08,
|
26106 |
+
"loss": 0.6516,
|
26107 |
+
"step": 21570
|
26108 |
+
},
|
26109 |
+
{
|
26110 |
+
"epoch": 0.88,
|
26111 |
+
"learning_rate": 7.599719408327155e-08,
|
26112 |
+
"loss": 0.7174,
|
26113 |
+
"step": 21575
|
26114 |
+
},
|
26115 |
+
{
|
26116 |
+
"epoch": 0.88,
|
26117 |
+
"learning_rate": 7.575093515223496e-08,
|
26118 |
+
"loss": 0.6436,
|
26119 |
+
"step": 21580
|
26120 |
+
},
|
26121 |
+
{
|
26122 |
+
"epoch": 0.88,
|
26123 |
+
"learning_rate": 7.550506014622215e-08,
|
26124 |
+
"loss": 0.6571,
|
26125 |
+
"step": 21585
|
26126 |
+
},
|
26127 |
+
{
|
26128 |
+
"epoch": 0.88,
|
26129 |
+
"learning_rate": 7.525956916736753e-08,
|
26130 |
+
"loss": 0.6919,
|
26131 |
+
"step": 21590
|
26132 |
+
},
|
26133 |
+
{
|
26134 |
+
"epoch": 0.88,
|
26135 |
+
"learning_rate": 7.501446231764607e-08,
|
26136 |
+
"loss": 0.6461,
|
26137 |
+
"step": 21595
|
26138 |
+
},
|
26139 |
+
{
|
26140 |
+
"epoch": 0.88,
|
26141 |
+
"learning_rate": 7.47697396988729e-08,
|
26142 |
+
"loss": 0.6169,
|
26143 |
+
"step": 21600
|
26144 |
+
},
|
26145 |
+
{
|
26146 |
+
"epoch": 0.88,
|
26147 |
+
"eval_loss": 0.6164625883102417,
|
26148 |
+
"eval_runtime": 140.1585,
|
26149 |
+
"eval_samples_per_second": 16.881,
|
26150 |
+
"eval_steps_per_second": 2.818,
|
26151 |
+
"step": 21600
|
26152 |
+
},
|
26153 |
+
{
|
26154 |
+
"epoch": 0.88,
|
26155 |
+
"learning_rate": 7.452540141270358e-08,
|
26156 |
+
"loss": 0.643,
|
26157 |
+
"step": 21605
|
26158 |
+
},
|
26159 |
+
{
|
26160 |
+
"epoch": 0.88,
|
26161 |
+
"learning_rate": 7.428144756063415e-08,
|
26162 |
+
"loss": 0.6571,
|
26163 |
+
"step": 21610
|
26164 |
+
},
|
26165 |
+
{
|
26166 |
+
"epoch": 0.88,
|
26167 |
+
"learning_rate": 7.403787824400098e-08,
|
26168 |
+
"loss": 0.6473,
|
26169 |
+
"step": 21615
|
26170 |
+
},
|
26171 |
+
{
|
26172 |
+
"epoch": 0.88,
|
26173 |
+
"learning_rate": 7.379469356398072e-08,
|
26174 |
+
"loss": 0.6089,
|
26175 |
+
"step": 21620
|
26176 |
+
},
|
26177 |
+
{
|
26178 |
+
"epoch": 0.88,
|
26179 |
+
"learning_rate": 7.355189362158997e-08,
|
26180 |
+
"loss": 0.6356,
|
26181 |
+
"step": 21625
|
26182 |
+
},
|
26183 |
+
{
|
26184 |
+
"epoch": 0.88,
|
26185 |
+
"learning_rate": 7.330947851768588e-08,
|
26186 |
+
"loss": 0.6338,
|
26187 |
+
"step": 21630
|
26188 |
+
},
|
26189 |
+
{
|
26190 |
+
"epoch": 0.88,
|
26191 |
+
"learning_rate": 7.306744835296563e-08,
|
26192 |
+
"loss": 0.6296,
|
26193 |
+
"step": 21635
|
26194 |
+
},
|
26195 |
+
{
|
26196 |
+
"epoch": 0.88,
|
26197 |
+
"learning_rate": 7.282580322796606e-08,
|
26198 |
+
"loss": 0.6403,
|
26199 |
+
"step": 21640
|
26200 |
+
},
|
26201 |
+
{
|
26202 |
+
"epoch": 0.88,
|
26203 |
+
"learning_rate": 7.258454324306495e-08,
|
26204 |
+
"loss": 0.649,
|
26205 |
+
"step": 21645
|
26206 |
+
},
|
26207 |
+
{
|
26208 |
+
"epoch": 0.88,
|
26209 |
+
"learning_rate": 7.23436684984794e-08,
|
26210 |
+
"loss": 0.6433,
|
26211 |
+
"step": 21650
|
26212 |
+
},
|
26213 |
+
{
|
26214 |
+
"epoch": 0.88,
|
26215 |
+
"learning_rate": 7.210317909426656e-08,
|
26216 |
+
"loss": 0.6741,
|
26217 |
+
"step": 21655
|
26218 |
+
},
|
26219 |
+
{
|
26220 |
+
"epoch": 0.88,
|
26221 |
+
"learning_rate": 7.186307513032364e-08,
|
26222 |
+
"loss": 0.6607,
|
26223 |
+
"step": 21660
|
26224 |
+
},
|
26225 |
+
{
|
26226 |
+
"epoch": 0.88,
|
26227 |
+
"learning_rate": 7.162335670638797e-08,
|
26228 |
+
"loss": 0.6845,
|
26229 |
+
"step": 21665
|
26230 |
+
},
|
26231 |
+
{
|
26232 |
+
"epoch": 0.88,
|
26233 |
+
"learning_rate": 7.138402392203646e-08,
|
26234 |
+
"loss": 0.5908,
|
26235 |
+
"step": 21670
|
26236 |
+
},
|
26237 |
+
{
|
26238 |
+
"epoch": 0.88,
|
26239 |
+
"learning_rate": 7.114507687668559e-08,
|
26240 |
+
"loss": 0.6756,
|
26241 |
+
"step": 21675
|
26242 |
+
},
|
26243 |
+
{
|
26244 |
+
"epoch": 0.88,
|
26245 |
+
"learning_rate": 7.090651566959216e-08,
|
26246 |
+
"loss": 0.6435,
|
26247 |
+
"step": 21680
|
26248 |
+
},
|
26249 |
+
{
|
26250 |
+
"epoch": 0.88,
|
26251 |
+
"learning_rate": 7.066834039985237e-08,
|
26252 |
+
"loss": 0.6275,
|
26253 |
+
"step": 21685
|
26254 |
+
},
|
26255 |
+
{
|
26256 |
+
"epoch": 0.88,
|
26257 |
+
"learning_rate": 7.043055116640206e-08,
|
26258 |
+
"loss": 0.6286,
|
26259 |
+
"step": 21690
|
26260 |
+
},
|
26261 |
+
{
|
26262 |
+
"epoch": 0.88,
|
26263 |
+
"learning_rate": 7.019314806801679e-08,
|
26264 |
+
"loss": 0.6561,
|
26265 |
+
"step": 21695
|
26266 |
+
},
|
26267 |
+
{
|
26268 |
+
"epoch": 0.88,
|
26269 |
+
"learning_rate": 6.99561312033119e-08,
|
26270 |
+
"loss": 0.6653,
|
26271 |
+
"step": 21700
|
26272 |
+
},
|
26273 |
+
{
|
26274 |
+
"epoch": 0.88,
|
26275 |
+
"learning_rate": 6.971950067074206e-08,
|
26276 |
+
"loss": 0.6333,
|
26277 |
+
"step": 21705
|
26278 |
+
},
|
26279 |
+
{
|
26280 |
+
"epoch": 0.88,
|
26281 |
+
"learning_rate": 6.948325656860143e-08,
|
26282 |
+
"loss": 0.6574,
|
26283 |
+
"step": 21710
|
26284 |
+
},
|
26285 |
+
{
|
26286 |
+
"epoch": 0.88,
|
26287 |
+
"learning_rate": 6.924739899502396e-08,
|
26288 |
+
"loss": 0.6581,
|
26289 |
+
"step": 21715
|
26290 |
+
},
|
26291 |
+
{
|
26292 |
+
"epoch": 0.88,
|
26293 |
+
"learning_rate": 6.901192804798272e-08,
|
26294 |
+
"loss": 0.6574,
|
26295 |
+
"step": 21720
|
26296 |
+
},
|
26297 |
+
{
|
26298 |
+
"epoch": 0.88,
|
26299 |
+
"learning_rate": 6.877684382529025e-08,
|
26300 |
+
"loss": 0.6292,
|
26301 |
+
"step": 21725
|
26302 |
+
},
|
26303 |
+
{
|
26304 |
+
"epoch": 0.88,
|
26305 |
+
"learning_rate": 6.854214642459855e-08,
|
26306 |
+
"loss": 0.6288,
|
26307 |
+
"step": 21730
|
26308 |
+
},
|
26309 |
+
{
|
26310 |
+
"epoch": 0.88,
|
26311 |
+
"learning_rate": 6.830783594339895e-08,
|
26312 |
+
"loss": 0.6242,
|
26313 |
+
"step": 21735
|
26314 |
+
},
|
26315 |
+
{
|
26316 |
+
"epoch": 0.88,
|
26317 |
+
"learning_rate": 6.807391247902195e-08,
|
26318 |
+
"loss": 0.6551,
|
26319 |
+
"step": 21740
|
26320 |
+
},
|
26321 |
+
{
|
26322 |
+
"epoch": 0.88,
|
26323 |
+
"learning_rate": 6.784037612863702e-08,
|
26324 |
+
"loss": 0.6485,
|
26325 |
+
"step": 21745
|
26326 |
+
},
|
26327 |
+
{
|
26328 |
+
"epoch": 0.88,
|
26329 |
+
"learning_rate": 6.760722698925358e-08,
|
26330 |
+
"loss": 0.6398,
|
26331 |
+
"step": 21750
|
26332 |
+
},
|
26333 |
+
{
|
26334 |
+
"epoch": 0.88,
|
26335 |
+
"learning_rate": 6.737446515771961e-08,
|
26336 |
+
"loss": 0.6063,
|
26337 |
+
"step": 21755
|
26338 |
+
},
|
26339 |
+
{
|
26340 |
+
"epoch": 0.88,
|
26341 |
+
"learning_rate": 6.714209073072218e-08,
|
26342 |
+
"loss": 0.6095,
|
26343 |
+
"step": 21760
|
26344 |
+
},
|
26345 |
+
{
|
26346 |
+
"epoch": 0.88,
|
26347 |
+
"learning_rate": 6.691010380478779e-08,
|
26348 |
+
"loss": 0.6306,
|
26349 |
+
"step": 21765
|
26350 |
+
},
|
26351 |
+
{
|
26352 |
+
"epoch": 0.88,
|
26353 |
+
"learning_rate": 6.667850447628175e-08,
|
26354 |
+
"loss": 0.5991,
|
26355 |
+
"step": 21770
|
26356 |
+
},
|
26357 |
+
{
|
26358 |
+
"epoch": 0.88,
|
26359 |
+
"learning_rate": 6.644729284140826e-08,
|
26360 |
+
"loss": 0.6475,
|
26361 |
+
"step": 21775
|
26362 |
+
},
|
26363 |
+
{
|
26364 |
+
"epoch": 0.88,
|
26365 |
+
"learning_rate": 6.621646899621091e-08,
|
26366 |
+
"loss": 0.6737,
|
26367 |
+
"step": 21780
|
26368 |
+
},
|
26369 |
+
{
|
26370 |
+
"epoch": 0.88,
|
26371 |
+
"learning_rate": 6.598603303657179e-08,
|
26372 |
+
"loss": 0.6395,
|
26373 |
+
"step": 21785
|
26374 |
+
},
|
26375 |
+
{
|
26376 |
+
"epoch": 0.89,
|
26377 |
+
"learning_rate": 6.5755985058212e-08,
|
26378 |
+
"loss": 0.6428,
|
26379 |
+
"step": 21790
|
26380 |
+
},
|
26381 |
+
{
|
26382 |
+
"epoch": 0.89,
|
26383 |
+
"learning_rate": 6.552632515669121e-08,
|
26384 |
+
"loss": 0.6312,
|
26385 |
+
"step": 21795
|
26386 |
+
},
|
26387 |
+
{
|
26388 |
+
"epoch": 0.89,
|
26389 |
+
"learning_rate": 6.529705342740843e-08,
|
26390 |
+
"loss": 0.6315,
|
26391 |
+
"step": 21800
|
26392 |
+
},
|
26393 |
+
{
|
26394 |
+
"epoch": 0.89,
|
26395 |
+
"learning_rate": 6.506816996560127e-08,
|
26396 |
+
"loss": 0.6268,
|
26397 |
+
"step": 21805
|
26398 |
+
},
|
26399 |
+
{
|
26400 |
+
"epoch": 0.89,
|
26401 |
+
"learning_rate": 6.483967486634546e-08,
|
26402 |
+
"loss": 0.664,
|
26403 |
+
"step": 21810
|
26404 |
+
},
|
26405 |
+
{
|
26406 |
+
"epoch": 0.89,
|
26407 |
+
"learning_rate": 6.461156822455638e-08,
|
26408 |
+
"loss": 0.6397,
|
26409 |
+
"step": 21815
|
26410 |
+
},
|
26411 |
+
{
|
26412 |
+
"epoch": 0.89,
|
26413 |
+
"learning_rate": 6.438385013498726e-08,
|
26414 |
+
"loss": 0.6273,
|
26415 |
+
"step": 21820
|
26416 |
+
},
|
26417 |
+
{
|
26418 |
+
"epoch": 0.89,
|
26419 |
+
"learning_rate": 6.415652069223032e-08,
|
26420 |
+
"loss": 0.6245,
|
26421 |
+
"step": 21825
|
26422 |
+
},
|
26423 |
+
{
|
26424 |
+
"epoch": 0.89,
|
26425 |
+
"learning_rate": 6.392957999071602e-08,
|
26426 |
+
"loss": 0.6921,
|
26427 |
+
"step": 21830
|
26428 |
+
},
|
26429 |
+
{
|
26430 |
+
"epoch": 0.89,
|
26431 |
+
"learning_rate": 6.370302812471384e-08,
|
26432 |
+
"loss": 0.6685,
|
26433 |
+
"step": 21835
|
26434 |
+
},
|
26435 |
+
{
|
26436 |
+
"epoch": 0.89,
|
26437 |
+
"learning_rate": 6.34768651883314e-08,
|
26438 |
+
"loss": 0.6563,
|
26439 |
+
"step": 21840
|
26440 |
+
},
|
26441 |
+
{
|
26442 |
+
"epoch": 0.89,
|
26443 |
+
"learning_rate": 6.325109127551465e-08,
|
26444 |
+
"loss": 0.6676,
|
26445 |
+
"step": 21845
|
26446 |
+
},
|
26447 |
+
{
|
26448 |
+
"epoch": 0.89,
|
26449 |
+
"learning_rate": 6.302570648004834e-08,
|
26450 |
+
"loss": 0.682,
|
26451 |
+
"step": 21850
|
26452 |
+
},
|
26453 |
+
{
|
26454 |
+
"epoch": 0.89,
|
26455 |
+
"learning_rate": 6.280071089555516e-08,
|
26456 |
+
"loss": 0.63,
|
26457 |
+
"step": 21855
|
26458 |
+
},
|
26459 |
+
{
|
26460 |
+
"epoch": 0.89,
|
26461 |
+
"learning_rate": 6.257610461549634e-08,
|
26462 |
+
"loss": 0.6781,
|
26463 |
+
"step": 21860
|
26464 |
+
},
|
26465 |
+
{
|
26466 |
+
"epoch": 0.89,
|
26467 |
+
"learning_rate": 6.235188773317146e-08,
|
26468 |
+
"loss": 0.6647,
|
26469 |
+
"step": 21865
|
26470 |
+
},
|
26471 |
+
{
|
26472 |
+
"epoch": 0.89,
|
26473 |
+
"learning_rate": 6.212806034171836e-08,
|
26474 |
+
"loss": 0.6611,
|
26475 |
+
"step": 21870
|
26476 |
+
},
|
26477 |
+
{
|
26478 |
+
"epoch": 0.89,
|
26479 |
+
"learning_rate": 6.190462253411277e-08,
|
26480 |
+
"loss": 0.658,
|
26481 |
+
"step": 21875
|
26482 |
+
},
|
26483 |
+
{
|
26484 |
+
"epoch": 0.89,
|
26485 |
+
"learning_rate": 6.16815744031688e-08,
|
26486 |
+
"loss": 0.6362,
|
26487 |
+
"step": 21880
|
26488 |
+
},
|
26489 |
+
{
|
26490 |
+
"epoch": 0.89,
|
26491 |
+
"learning_rate": 6.145891604153886e-08,
|
26492 |
+
"loss": 0.64,
|
26493 |
+
"step": 21885
|
26494 |
+
},
|
26495 |
+
{
|
26496 |
+
"epoch": 0.89,
|
26497 |
+
"learning_rate": 6.123664754171331e-08,
|
26498 |
+
"loss": 0.6428,
|
26499 |
+
"step": 21890
|
26500 |
+
},
|
26501 |
+
{
|
26502 |
+
"epoch": 0.89,
|
26503 |
+
"learning_rate": 6.101476899602043e-08,
|
26504 |
+
"loss": 0.6626,
|
26505 |
+
"step": 21895
|
26506 |
+
},
|
26507 |
+
{
|
26508 |
+
"epoch": 0.89,
|
26509 |
+
"learning_rate": 6.079328049662668e-08,
|
26510 |
+
"loss": 0.6502,
|
26511 |
+
"step": 21900
|
26512 |
+
},
|
26513 |
+
{
|
26514 |
+
"epoch": 0.89,
|
26515 |
+
"learning_rate": 6.057218213553661e-08,
|
26516 |
+
"loss": 0.6694,
|
26517 |
+
"step": 21905
|
26518 |
+
},
|
26519 |
+
{
|
26520 |
+
"epoch": 0.89,
|
26521 |
+
"learning_rate": 6.035147400459217e-08,
|
26522 |
+
"loss": 0.642,
|
26523 |
+
"step": 21910
|
26524 |
+
},
|
26525 |
+
{
|
26526 |
+
"epoch": 0.89,
|
26527 |
+
"learning_rate": 6.013115619547404e-08,
|
26528 |
+
"loss": 0.6864,
|
26529 |
+
"step": 21915
|
26530 |
+
},
|
26531 |
+
{
|
26532 |
+
"epoch": 0.89,
|
26533 |
+
"learning_rate": 5.991122879970012e-08,
|
26534 |
+
"loss": 0.636,
|
26535 |
+
"step": 21920
|
26536 |
+
},
|
26537 |
+
{
|
26538 |
+
"epoch": 0.89,
|
26539 |
+
"learning_rate": 5.969169190862644e-08,
|
26540 |
+
"loss": 0.6338,
|
26541 |
+
"step": 21925
|
26542 |
+
},
|
26543 |
+
{
|
26544 |
+
"epoch": 0.89,
|
26545 |
+
"learning_rate": 5.947254561344628e-08,
|
26546 |
+
"loss": 0.6647,
|
26547 |
+
"step": 21930
|
26548 |
+
},
|
26549 |
+
{
|
26550 |
+
"epoch": 0.89,
|
26551 |
+
"learning_rate": 5.9253790005191705e-08,
|
26552 |
+
"loss": 0.65,
|
26553 |
+
"step": 21935
|
26554 |
+
},
|
26555 |
+
{
|
26556 |
+
"epoch": 0.89,
|
26557 |
+
"learning_rate": 5.90354251747317e-08,
|
26558 |
+
"loss": 0.6274,
|
26559 |
+
"step": 21940
|
26560 |
+
},
|
26561 |
+
{
|
26562 |
+
"epoch": 0.89,
|
26563 |
+
"learning_rate": 5.8817451212772815e-08,
|
26564 |
+
"loss": 0.6559,
|
26565 |
+
"step": 21945
|
26566 |
+
},
|
26567 |
+
{
|
26568 |
+
"epoch": 0.89,
|
26569 |
+
"learning_rate": 5.859986820985985e-08,
|
26570 |
+
"loss": 0.6318,
|
26571 |
+
"step": 21950
|
26572 |
+
},
|
26573 |
+
{
|
26574 |
+
"epoch": 0.89,
|
26575 |
+
"learning_rate": 5.838267625637494e-08,
|
26576 |
+
"loss": 0.6755,
|
26577 |
+
"step": 21955
|
26578 |
+
},
|
26579 |
+
{
|
26580 |
+
"epoch": 0.89,
|
26581 |
+
"learning_rate": 5.8165875442537594e-08,
|
26582 |
+
"loss": 0.6342,
|
26583 |
+
"step": 21960
|
26584 |
+
},
|
26585 |
+
{
|
26586 |
+
"epoch": 0.89,
|
26587 |
+
"learning_rate": 5.7949465858404766e-08,
|
26588 |
+
"loss": 0.6707,
|
26589 |
+
"step": 21965
|
26590 |
+
},
|
26591 |
+
{
|
26592 |
+
"epoch": 0.89,
|
26593 |
+
"learning_rate": 5.773344759387155e-08,
|
26594 |
+
"loss": 0.6339,
|
26595 |
+
"step": 21970
|
26596 |
+
},
|
26597 |
+
{
|
26598 |
+
"epoch": 0.89,
|
26599 |
+
"learning_rate": 5.751782073866984e-08,
|
26600 |
+
"loss": 0.6389,
|
26601 |
+
"step": 21975
|
26602 |
+
},
|
26603 |
+
{
|
26604 |
+
"epoch": 0.89,
|
26605 |
+
"learning_rate": 5.730258538236909e-08,
|
26606 |
+
"loss": 0.6287,
|
26607 |
+
"step": 21980
|
26608 |
+
},
|
26609 |
+
{
|
26610 |
+
"epoch": 0.89,
|
26611 |
+
"learning_rate": 5.708774161437635e-08,
|
26612 |
+
"loss": 0.6844,
|
26613 |
+
"step": 21985
|
26614 |
+
},
|
26615 |
+
{
|
26616 |
+
"epoch": 0.89,
|
26617 |
+
"learning_rate": 5.6873289523935775e-08,
|
26618 |
+
"loss": 0.6497,
|
26619 |
+
"step": 21990
|
26620 |
+
},
|
26621 |
+
{
|
26622 |
+
"epoch": 0.89,
|
26623 |
+
"learning_rate": 5.665922920012878e-08,
|
26624 |
+
"loss": 0.7022,
|
26625 |
+
"step": 21995
|
26626 |
+
},
|
26627 |
+
{
|
26628 |
+
"epoch": 0.89,
|
26629 |
+
"learning_rate": 5.644556073187445e-08,
|
26630 |
+
"loss": 0.6279,
|
26631 |
+
"step": 22000
|
26632 |
+
},
|
26633 |
+
{
|
26634 |
+
"epoch": 0.89,
|
26635 |
+
"learning_rate": 5.6232284207928584e-08,
|
26636 |
+
"loss": 0.6533,
|
26637 |
+
"step": 22005
|
26638 |
+
},
|
26639 |
+
{
|
26640 |
+
"epoch": 0.89,
|
26641 |
+
"learning_rate": 5.601939971688452e-08,
|
26642 |
+
"loss": 0.635,
|
26643 |
+
"step": 22010
|
26644 |
+
},
|
26645 |
+
{
|
26646 |
+
"epoch": 0.89,
|
26647 |
+
"learning_rate": 5.580690734717241e-08,
|
26648 |
+
"loss": 0.6474,
|
26649 |
+
"step": 22015
|
26650 |
+
},
|
26651 |
+
{
|
26652 |
+
"epoch": 0.89,
|
26653 |
+
"learning_rate": 5.559480718706e-08,
|
26654 |
+
"loss": 0.6732,
|
26655 |
+
"step": 22020
|
26656 |
+
},
|
26657 |
+
{
|
26658 |
+
"epoch": 0.89,
|
26659 |
+
"learning_rate": 5.5383099324651684e-08,
|
26660 |
+
"loss": 0.6351,
|
26661 |
+
"step": 22025
|
26662 |
+
},
|
26663 |
+
{
|
26664 |
+
"epoch": 0.89,
|
26665 |
+
"learning_rate": 5.5171783847889006e-08,
|
26666 |
+
"loss": 0.6832,
|
26667 |
+
"step": 22030
|
26668 |
+
},
|
26669 |
+
{
|
26670 |
+
"epoch": 0.9,
|
26671 |
+
"learning_rate": 5.496086084455087e-08,
|
26672 |
+
"loss": 0.6403,
|
26673 |
+
"step": 22035
|
26674 |
+
},
|
26675 |
+
{
|
26676 |
+
"epoch": 0.9,
|
26677 |
+
"learning_rate": 5.475033040225274e-08,
|
26678 |
+
"loss": 0.6231,
|
26679 |
+
"step": 22040
|
26680 |
+
},
|
26681 |
+
{
|
26682 |
+
"epoch": 0.9,
|
26683 |
+
"learning_rate": 5.454019260844678e-08,
|
26684 |
+
"loss": 0.6585,
|
26685 |
+
"step": 22045
|
26686 |
+
},
|
26687 |
+
{
|
26688 |
+
"epoch": 0.9,
|
26689 |
+
"learning_rate": 5.433044755042293e-08,
|
26690 |
+
"loss": 0.6478,
|
26691 |
+
"step": 22050
|
26692 |
+
},
|
26693 |
+
{
|
26694 |
+
"epoch": 0.9,
|
26695 |
+
"learning_rate": 5.4121095315307173e-08,
|
26696 |
+
"loss": 0.627,
|
26697 |
+
"step": 22055
|
26698 |
+
},
|
26699 |
+
{
|
26700 |
+
"epoch": 0.9,
|
26701 |
+
"learning_rate": 5.3912135990062726e-08,
|
26702 |
+
"loss": 0.6431,
|
26703 |
+
"step": 22060
|
26704 |
+
},
|
26705 |
+
{
|
26706 |
+
"epoch": 0.9,
|
26707 |
+
"learning_rate": 5.370356966148914e-08,
|
26708 |
+
"loss": 0.6224,
|
26709 |
+
"step": 22065
|
26710 |
+
},
|
26711 |
+
{
|
26712 |
+
"epoch": 0.9,
|
26713 |
+
"learning_rate": 5.3495396416223584e-08,
|
26714 |
+
"loss": 0.6851,
|
26715 |
+
"step": 22070
|
26716 |
+
},
|
26717 |
+
{
|
26718 |
+
"epoch": 0.9,
|
26719 |
+
"learning_rate": 5.3287616340739084e-08,
|
26720 |
+
"loss": 0.6471,
|
26721 |
+
"step": 22075
|
26722 |
+
},
|
26723 |
+
{
|
26724 |
+
"epoch": 0.9,
|
26725 |
+
"learning_rate": 5.308022952134561e-08,
|
26726 |
+
"loss": 0.6231,
|
26727 |
+
"step": 22080
|
26728 |
+
},
|
26729 |
+
{
|
26730 |
+
"epoch": 0.9,
|
26731 |
+
"learning_rate": 5.287323604419014e-08,
|
26732 |
+
"loss": 0.6358,
|
26733 |
+
"step": 22085
|
26734 |
+
},
|
26735 |
+
{
|
26736 |
+
"epoch": 0.9,
|
26737 |
+
"learning_rate": 5.266663599525578e-08,
|
26738 |
+
"loss": 0.6723,
|
26739 |
+
"step": 22090
|
26740 |
+
},
|
26741 |
+
{
|
26742 |
+
"epoch": 0.9,
|
26743 |
+
"learning_rate": 5.246042946036244e-08,
|
26744 |
+
"loss": 0.674,
|
26745 |
+
"step": 22095
|
26746 |
+
},
|
26747 |
+
{
|
26748 |
+
"epoch": 0.9,
|
26749 |
+
"learning_rate": 5.225461652516639e-08,
|
26750 |
+
"loss": 0.6099,
|
26751 |
+
"step": 22100
|
26752 |
+
},
|
26753 |
+
{
|
26754 |
+
"epoch": 0.9,
|
26755 |
+
"learning_rate": 5.204919727516066e-08,
|
26756 |
+
"loss": 0.6175,
|
26757 |
+
"step": 22105
|
26758 |
+
},
|
26759 |
+
{
|
26760 |
+
"epoch": 0.9,
|
26761 |
+
"learning_rate": 5.184417179567468e-08,
|
26762 |
+
"loss": 0.6383,
|
26763 |
+
"step": 22110
|
26764 |
+
},
|
26765 |
+
{
|
26766 |
+
"epoch": 0.9,
|
26767 |
+
"learning_rate": 5.163954017187399e-08,
|
26768 |
+
"loss": 0.6796,
|
26769 |
+
"step": 22115
|
26770 |
+
},
|
26771 |
+
{
|
26772 |
+
"epoch": 0.9,
|
26773 |
+
"learning_rate": 5.143530248876116e-08,
|
26774 |
+
"loss": 0.6428,
|
26775 |
+
"step": 22120
|
26776 |
+
},
|
26777 |
+
{
|
26778 |
+
"epoch": 0.9,
|
26779 |
+
"learning_rate": 5.123145883117452e-08,
|
26780 |
+
"loss": 0.7068,
|
26781 |
+
"step": 22125
|
26782 |
+
},
|
26783 |
+
{
|
26784 |
+
"epoch": 0.9,
|
26785 |
+
"learning_rate": 5.102800928378881e-08,
|
26786 |
+
"loss": 0.6485,
|
26787 |
+
"step": 22130
|
26788 |
+
},
|
26789 |
+
{
|
26790 |
+
"epoch": 0.9,
|
26791 |
+
"learning_rate": 5.082495393111563e-08,
|
26792 |
+
"loss": 0.6488,
|
26793 |
+
"step": 22135
|
26794 |
+
},
|
26795 |
+
{
|
26796 |
+
"epoch": 0.9,
|
26797 |
+
"learning_rate": 5.062229285750208e-08,
|
26798 |
+
"loss": 0.639,
|
26799 |
+
"step": 22140
|
26800 |
+
},
|
26801 |
+
{
|
26802 |
+
"epoch": 0.9,
|
26803 |
+
"learning_rate": 5.0420026147131925e-08,
|
26804 |
+
"loss": 0.6629,
|
26805 |
+
"step": 22145
|
26806 |
+
},
|
26807 |
+
{
|
26808 |
+
"epoch": 0.9,
|
26809 |
+
"learning_rate": 5.021815388402473e-08,
|
26810 |
+
"loss": 0.6624,
|
26811 |
+
"step": 22150
|
26812 |
+
},
|
26813 |
+
{
|
26814 |
+
"epoch": 0.9,
|
26815 |
+
"learning_rate": 5.0016676152036974e-08,
|
26816 |
+
"loss": 0.6282,
|
26817 |
+
"step": 22155
|
26818 |
+
},
|
26819 |
+
{
|
26820 |
+
"epoch": 0.9,
|
26821 |
+
"learning_rate": 4.981559303486038e-08,
|
26822 |
+
"loss": 0.6319,
|
26823 |
+
"step": 22160
|
26824 |
+
},
|
26825 |
+
{
|
26826 |
+
"epoch": 0.9,
|
26827 |
+
"learning_rate": 4.9614904616023134e-08,
|
26828 |
+
"loss": 0.6655,
|
26829 |
+
"step": 22165
|
26830 |
+
},
|
26831 |
+
{
|
26832 |
+
"epoch": 0.9,
|
26833 |
+
"learning_rate": 4.941461097888966e-08,
|
26834 |
+
"loss": 0.6915,
|
26835 |
+
"step": 22170
|
26836 |
+
},
|
26837 |
+
{
|
26838 |
+
"epoch": 0.9,
|
26839 |
+
"learning_rate": 4.921471220666018e-08,
|
26840 |
+
"loss": 0.5924,
|
26841 |
+
"step": 22175
|
26842 |
+
},
|
26843 |
+
{
|
26844 |
+
"epoch": 0.9,
|
26845 |
+
"learning_rate": 4.901520838237061e-08,
|
26846 |
+
"loss": 0.6699,
|
26847 |
+
"step": 22180
|
26848 |
+
},
|
26849 |
+
{
|
26850 |
+
"epoch": 0.9,
|
26851 |
+
"learning_rate": 4.8816099588893436e-08,
|
26852 |
+
"loss": 0.635,
|
26853 |
+
"step": 22185
|
26854 |
+
},
|
26855 |
+
{
|
26856 |
+
"epoch": 0.9,
|
26857 |
+
"learning_rate": 4.86173859089366e-08,
|
26858 |
+
"loss": 0.6572,
|
26859 |
+
"step": 22190
|
26860 |
+
},
|
26861 |
+
{
|
26862 |
+
"epoch": 0.9,
|
26863 |
+
"learning_rate": 4.8419067425044094e-08,
|
26864 |
+
"loss": 0.6296,
|
26865 |
+
"step": 22195
|
26866 |
+
},
|
26867 |
+
{
|
26868 |
+
"epoch": 0.9,
|
26869 |
+
"learning_rate": 4.822114421959545e-08,
|
26870 |
+
"loss": 0.6598,
|
26871 |
+
"step": 22200
|
26872 |
+
},
|
26873 |
+
{
|
26874 |
+
"epoch": 0.9,
|
26875 |
+
"learning_rate": 4.8023616374806564e-08,
|
26876 |
+
"loss": 0.6451,
|
26877 |
+
"step": 22205
|
26878 |
+
},
|
26879 |
+
{
|
26880 |
+
"epoch": 0.9,
|
26881 |
+
"learning_rate": 4.782648397272859e-08,
|
26882 |
+
"loss": 0.6616,
|
26883 |
+
"step": 22210
|
26884 |
+
},
|
26885 |
+
{
|
26886 |
+
"epoch": 0.9,
|
26887 |
+
"learning_rate": 4.762974709524858e-08,
|
26888 |
+
"loss": 0.6583,
|
26889 |
+
"step": 22215
|
26890 |
+
},
|
26891 |
+
{
|
26892 |
+
"epoch": 0.9,
|
26893 |
+
"learning_rate": 4.743340582408961e-08,
|
26894 |
+
"loss": 0.66,
|
26895 |
+
"step": 22220
|
26896 |
+
},
|
26897 |
+
{
|
26898 |
+
"epoch": 0.9,
|
26899 |
+
"learning_rate": 4.723746024080988e-08,
|
26900 |
+
"loss": 0.6213,
|
26901 |
+
"step": 22225
|
26902 |
+
},
|
26903 |
+
{
|
26904 |
+
"epoch": 0.9,
|
26905 |
+
"learning_rate": 4.70419104268035e-08,
|
26906 |
+
"loss": 0.6316,
|
26907 |
+
"step": 22230
|
26908 |
+
},
|
26909 |
+
{
|
26910 |
+
"epoch": 0.9,
|
26911 |
+
"learning_rate": 4.6846756463300054e-08,
|
26912 |
+
"loss": 0.6505,
|
26913 |
+
"step": 22235
|
26914 |
+
},
|
26915 |
+
{
|
26916 |
+
"epoch": 0.9,
|
26917 |
+
"learning_rate": 4.665199843136513e-08,
|
26918 |
+
"loss": 0.6645,
|
26919 |
+
"step": 22240
|
26920 |
+
},
|
26921 |
+
{
|
26922 |
+
"epoch": 0.9,
|
26923 |
+
"learning_rate": 4.645763641189937e-08,
|
26924 |
+
"loss": 0.6523,
|
26925 |
+
"step": 22245
|
26926 |
+
},
|
26927 |
+
{
|
26928 |
+
"epoch": 0.9,
|
26929 |
+
"learning_rate": 4.626367048563884e-08,
|
26930 |
+
"loss": 0.6516,
|
26931 |
+
"step": 22250
|
26932 |
+
},
|
26933 |
+
{
|
26934 |
+
"epoch": 0.9,
|
26935 |
+
"learning_rate": 4.607010073315565e-08,
|
26936 |
+
"loss": 0.638,
|
26937 |
+
"step": 22255
|
26938 |
+
},
|
26939 |
+
{
|
26940 |
+
"epoch": 0.9,
|
26941 |
+
"learning_rate": 4.587692723485681e-08,
|
26942 |
+
"loss": 0.5924,
|
26943 |
+
"step": 22260
|
26944 |
+
},
|
26945 |
+
{
|
26946 |
+
"epoch": 0.9,
|
26947 |
+
"learning_rate": 4.5684150070984804e-08,
|
26948 |
+
"loss": 0.6316,
|
26949 |
+
"step": 22265
|
26950 |
+
},
|
26951 |
+
{
|
26952 |
+
"epoch": 0.9,
|
26953 |
+
"learning_rate": 4.549176932161791e-08,
|
26954 |
+
"loss": 0.6585,
|
26955 |
+
"step": 22270
|
26956 |
+
},
|
26957 |
+
{
|
26958 |
+
"epoch": 0.9,
|
26959 |
+
"learning_rate": 4.5299785066669205e-08,
|
26960 |
+
"loss": 0.6547,
|
26961 |
+
"step": 22275
|
26962 |
+
},
|
26963 |
+
{
|
26964 |
+
"epoch": 0.9,
|
26965 |
+
"learning_rate": 4.5108197385887335e-08,
|
26966 |
+
"loss": 0.6432,
|
26967 |
+
"step": 22280
|
26968 |
+
},
|
26969 |
+
{
|
26970 |
+
"epoch": 0.91,
|
26971 |
+
"learning_rate": 4.491700635885598e-08,
|
26972 |
+
"loss": 0.6075,
|
26973 |
+
"step": 22285
|
26974 |
+
},
|
26975 |
+
{
|
26976 |
+
"epoch": 0.91,
|
26977 |
+
"learning_rate": 4.4726212064994493e-08,
|
26978 |
+
"loss": 0.5892,
|
26979 |
+
"step": 22290
|
26980 |
+
},
|
26981 |
+
{
|
26982 |
+
"epoch": 0.91,
|
26983 |
+
"learning_rate": 4.453581458355704e-08,
|
26984 |
+
"loss": 0.6202,
|
26985 |
+
"step": 22295
|
26986 |
+
},
|
26987 |
+
{
|
26988 |
+
"epoch": 0.91,
|
26989 |
+
"learning_rate": 4.4345813993632905e-08,
|
26990 |
+
"loss": 0.6405,
|
26991 |
+
"step": 22300
|
26992 |
+
},
|
26993 |
+
{
|
26994 |
+
"epoch": 0.91,
|
26995 |
+
"learning_rate": 4.4156210374147075e-08,
|
26996 |
+
"loss": 0.6393,
|
26997 |
+
"step": 22305
|
26998 |
+
},
|
26999 |
+
{
|
27000 |
+
"epoch": 0.91,
|
27001 |
+
"learning_rate": 4.396700380385898e-08,
|
27002 |
+
"loss": 0.6548,
|
27003 |
+
"step": 22310
|
27004 |
+
},
|
27005 |
+
{
|
27006 |
+
"epoch": 0.91,
|
27007 |
+
"learning_rate": 4.377819436136332e-08,
|
27008 |
+
"loss": 0.6813,
|
27009 |
+
"step": 22315
|
27010 |
+
},
|
27011 |
+
{
|
27012 |
+
"epoch": 0.91,
|
27013 |
+
"learning_rate": 4.358978212509012e-08,
|
27014 |
+
"loss": 0.6689,
|
27015 |
+
"step": 22320
|
27016 |
+
},
|
27017 |
+
{
|
27018 |
+
"epoch": 0.91,
|
27019 |
+
"learning_rate": 4.340176717330413e-08,
|
27020 |
+
"loss": 0.6631,
|
27021 |
+
"step": 22325
|
27022 |
+
},
|
27023 |
+
{
|
27024 |
+
"epoch": 0.91,
|
27025 |
+
"learning_rate": 4.3214149584105076e-08,
|
27026 |
+
"loss": 0.6586,
|
27027 |
+
"step": 22330
|
27028 |
+
},
|
27029 |
+
{
|
27030 |
+
"epoch": 0.91,
|
27031 |
+
"learning_rate": 4.3026929435427516e-08,
|
27032 |
+
"loss": 0.6643,
|
27033 |
+
"step": 22335
|
27034 |
+
},
|
27035 |
+
{
|
27036 |
+
"epoch": 0.91,
|
27037 |
+
"learning_rate": 4.2840106805041354e-08,
|
27038 |
+
"loss": 0.654,
|
27039 |
+
"step": 22340
|
27040 |
+
},
|
27041 |
+
{
|
27042 |
+
"epoch": 0.91,
|
27043 |
+
"learning_rate": 4.2653681770550955e-08,
|
27044 |
+
"loss": 0.6688,
|
27045 |
+
"step": 22345
|
27046 |
+
},
|
27047 |
+
{
|
27048 |
+
"epoch": 0.91,
|
27049 |
+
"learning_rate": 4.2467654409395484e-08,
|
27050 |
+
"loss": 0.6761,
|
27051 |
+
"step": 22350
|
27052 |
+
},
|
27053 |
+
{
|
27054 |
+
"epoch": 0.91,
|
27055 |
+
"learning_rate": 4.228202479884946e-08,
|
27056 |
+
"loss": 0.6667,
|
27057 |
+
"step": 22355
|
27058 |
+
},
|
27059 |
+
{
|
27060 |
+
"epoch": 0.91,
|
27061 |
+
"learning_rate": 4.209679301602165e-08,
|
27062 |
+
"loss": 0.6749,
|
27063 |
+
"step": 22360
|
27064 |
+
},
|
27065 |
+
{
|
27066 |
+
"epoch": 0.91,
|
27067 |
+
"learning_rate": 4.191195913785561e-08,
|
27068 |
+
"loss": 0.6396,
|
27069 |
+
"step": 22365
|
27070 |
+
},
|
27071 |
+
{
|
27072 |
+
"epoch": 0.91,
|
27073 |
+
"learning_rate": 4.1727523241129606e-08,
|
27074 |
+
"loss": 0.6696,
|
27075 |
+
"step": 22370
|
27076 |
+
},
|
27077 |
+
{
|
27078 |
+
"epoch": 0.91,
|
27079 |
+
"learning_rate": 4.154348540245711e-08,
|
27080 |
+
"loss": 0.6364,
|
27081 |
+
"step": 22375
|
27082 |
+
},
|
27083 |
+
{
|
27084 |
+
"epoch": 0.91,
|
27085 |
+
"learning_rate": 4.135984569828566e-08,
|
27086 |
+
"loss": 0.6495,
|
27087 |
+
"step": 22380
|
27088 |
+
},
|
27089 |
+
{
|
27090 |
+
"epoch": 0.91,
|
27091 |
+
"learning_rate": 4.1176604204897434e-08,
|
27092 |
+
"loss": 0.6496,
|
27093 |
+
"step": 22385
|
27094 |
+
},
|
27095 |
+
{
|
27096 |
+
"epoch": 0.91,
|
27097 |
+
"learning_rate": 4.099376099840968e-08,
|
27098 |
+
"loss": 0.6268,
|
27099 |
+
"step": 22390
|
27100 |
+
},
|
27101 |
+
{
|
27102 |
+
"epoch": 0.91,
|
27103 |
+
"learning_rate": 4.0811316154773515e-08,
|
27104 |
+
"loss": 0.6527,
|
27105 |
+
"step": 22395
|
27106 |
+
},
|
27107 |
+
{
|
27108 |
+
"epoch": 0.91,
|
27109 |
+
"learning_rate": 4.06292697497751e-08,
|
27110 |
+
"loss": 0.651,
|
27111 |
+
"step": 22400
|
27112 |
+
},
|
27113 |
+
{
|
27114 |
+
"epoch": 0.91,
|
27115 |
+
"eval_loss": 0.6161190867424011,
|
27116 |
+
"eval_runtime": 139.4449,
|
27117 |
+
"eval_samples_per_second": 16.967,
|
27118 |
+
"eval_steps_per_second": 2.833,
|
27119 |
+
"step": 22400
|
27120 |
+
},
|
27121 |
+
{
|
27122 |
+
"epoch": 0.91,
|
27123 |
+
"learning_rate": 4.044762185903494e-08,
|
27124 |
+
"loss": 0.6551,
|
27125 |
+
"step": 22405
|
27126 |
+
},
|
27127 |
+
{
|
27128 |
+
"epoch": 0.91,
|
27129 |
+
"learning_rate": 4.026637255800813e-08,
|
27130 |
+
"loss": 0.6677,
|
27131 |
+
"step": 22410
|
27132 |
+
},
|
27133 |
+
{
|
27134 |
+
"epoch": 0.91,
|
27135 |
+
"learning_rate": 4.008552192198378e-08,
|
27136 |
+
"loss": 0.6738,
|
27137 |
+
"step": 22415
|
27138 |
+
},
|
27139 |
+
{
|
27140 |
+
"epoch": 0.91,
|
27141 |
+
"learning_rate": 3.9905070026085784e-08,
|
27142 |
+
"loss": 0.6642,
|
27143 |
+
"step": 22420
|
27144 |
+
},
|
27145 |
+
{
|
27146 |
+
"epoch": 0.91,
|
27147 |
+
"learning_rate": 3.9725016945272416e-08,
|
27148 |
+
"loss": 0.6399,
|
27149 |
+
"step": 22425
|
27150 |
+
},
|
27151 |
+
{
|
27152 |
+
"epoch": 0.91,
|
27153 |
+
"learning_rate": 3.9545362754335955e-08,
|
27154 |
+
"loss": 0.6706,
|
27155 |
+
"step": 22430
|
27156 |
+
},
|
27157 |
+
{
|
27158 |
+
"epoch": 0.91,
|
27159 |
+
"learning_rate": 3.936610752790326e-08,
|
27160 |
+
"loss": 0.686,
|
27161 |
+
"step": 22435
|
27162 |
+
},
|
27163 |
+
{
|
27164 |
+
"epoch": 0.91,
|
27165 |
+
"learning_rate": 3.9187251340435653e-08,
|
27166 |
+
"loss": 0.5905,
|
27167 |
+
"step": 22440
|
27168 |
+
},
|
27169 |
+
{
|
27170 |
+
"epoch": 0.91,
|
27171 |
+
"learning_rate": 3.900879426622794e-08,
|
27172 |
+
"loss": 0.5991,
|
27173 |
+
"step": 22445
|
27174 |
+
},
|
27175 |
+
{
|
27176 |
+
"epoch": 0.91,
|
27177 |
+
"learning_rate": 3.8830736379409814e-08,
|
27178 |
+
"loss": 0.6265,
|
27179 |
+
"step": 22450
|
27180 |
+
},
|
27181 |
+
{
|
27182 |
+
"epoch": 0.91,
|
27183 |
+
"learning_rate": 3.865307775394533e-08,
|
27184 |
+
"loss": 0.6525,
|
27185 |
+
"step": 22455
|
27186 |
+
},
|
27187 |
+
{
|
27188 |
+
"epoch": 0.91,
|
27189 |
+
"learning_rate": 3.84758184636319e-08,
|
27190 |
+
"loss": 0.6228,
|
27191 |
+
"step": 22460
|
27192 |
+
},
|
27193 |
+
{
|
27194 |
+
"epoch": 0.91,
|
27195 |
+
"learning_rate": 3.829895858210186e-08,
|
27196 |
+
"loss": 0.6736,
|
27197 |
+
"step": 22465
|
27198 |
+
},
|
27199 |
+
{
|
27200 |
+
"epoch": 0.91,
|
27201 |
+
"learning_rate": 3.812249818282076e-08,
|
27202 |
+
"loss": 0.6303,
|
27203 |
+
"step": 22470
|
27204 |
+
},
|
27205 |
+
{
|
27206 |
+
"epoch": 0.91,
|
27207 |
+
"learning_rate": 3.79464373390892e-08,
|
27208 |
+
"loss": 0.6367,
|
27209 |
+
"step": 22475
|
27210 |
+
},
|
27211 |
+
{
|
27212 |
+
"epoch": 0.91,
|
27213 |
+
"learning_rate": 3.777077612404123e-08,
|
27214 |
+
"loss": 0.6334,
|
27215 |
+
"step": 22480
|
27216 |
+
},
|
27217 |
+
{
|
27218 |
+
"epoch": 0.91,
|
27219 |
+
"learning_rate": 3.75955146106447e-08,
|
27220 |
+
"loss": 0.6403,
|
27221 |
+
"step": 22485
|
27222 |
+
},
|
27223 |
+
{
|
27224 |
+
"epoch": 0.91,
|
27225 |
+
"learning_rate": 3.742065287170215e-08,
|
27226 |
+
"loss": 0.6089,
|
27227 |
+
"step": 22490
|
27228 |
+
},
|
27229 |
+
{
|
27230 |
+
"epoch": 0.91,
|
27231 |
+
"learning_rate": 3.724619097984916e-08,
|
27232 |
+
"loss": 0.6471,
|
27233 |
+
"step": 22495
|
27234 |
+
},
|
27235 |
+
{
|
27236 |
+
"epoch": 0.91,
|
27237 |
+
"learning_rate": 3.707212900755608e-08,
|
27238 |
+
"loss": 0.6479,
|
27239 |
+
"step": 22500
|
27240 |
+
},
|
27241 |
+
{
|
27242 |
+
"epoch": 0.91,
|
27243 |
+
"learning_rate": 3.689846702712651e-08,
|
27244 |
+
"loss": 0.6429,
|
27245 |
+
"step": 22505
|
27246 |
+
},
|
27247 |
+
{
|
27248 |
+
"epoch": 0.91,
|
27249 |
+
"learning_rate": 3.672520511069821e-08,
|
27250 |
+
"loss": 0.6175,
|
27251 |
+
"step": 22510
|
27252 |
+
},
|
27253 |
+
{
|
27254 |
+
"epoch": 0.91,
|
27255 |
+
"learning_rate": 3.655234333024271e-08,
|
27256 |
+
"loss": 0.6745,
|
27257 |
+
"step": 22515
|
27258 |
+
},
|
27259 |
+
{
|
27260 |
+
"epoch": 0.91,
|
27261 |
+
"learning_rate": 3.637988175756512e-08,
|
27262 |
+
"loss": 0.6604,
|
27263 |
+
"step": 22520
|
27264 |
+
},
|
27265 |
+
{
|
27266 |
+
"epoch": 0.91,
|
27267 |
+
"learning_rate": 3.6207820464304814e-08,
|
27268 |
+
"loss": 0.6285,
|
27269 |
+
"step": 22525
|
27270 |
+
},
|
27271 |
+
{
|
27272 |
+
"epoch": 0.92,
|
27273 |
+
"learning_rate": 3.603615952193417e-08,
|
27274 |
+
"loss": 0.6314,
|
27275 |
+
"step": 22530
|
27276 |
+
},
|
27277 |
+
{
|
27278 |
+
"epoch": 0.92,
|
27279 |
+
"learning_rate": 3.5864899001759706e-08,
|
27280 |
+
"loss": 0.6703,
|
27281 |
+
"step": 22535
|
27282 |
+
},
|
27283 |
+
{
|
27284 |
+
"epoch": 0.92,
|
27285 |
+
"learning_rate": 3.569403897492185e-08,
|
27286 |
+
"loss": 0.6586,
|
27287 |
+
"step": 22540
|
27288 |
+
},
|
27289 |
+
{
|
27290 |
+
"epoch": 0.92,
|
27291 |
+
"learning_rate": 3.552357951239427e-08,
|
27292 |
+
"loss": 0.6588,
|
27293 |
+
"step": 22545
|
27294 |
+
},
|
27295 |
+
{
|
27296 |
+
"epoch": 0.92,
|
27297 |
+
"learning_rate": 3.5353520684984096e-08,
|
27298 |
+
"loss": 0.6623,
|
27299 |
+
"step": 22550
|
27300 |
+
},
|
27301 |
+
{
|
27302 |
+
"epoch": 0.92,
|
27303 |
+
"learning_rate": 3.51838625633325e-08,
|
27304 |
+
"loss": 0.6619,
|
27305 |
+
"step": 22555
|
27306 |
+
},
|
27307 |
+
{
|
27308 |
+
"epoch": 0.92,
|
27309 |
+
"learning_rate": 3.501460521791399e-08,
|
27310 |
+
"loss": 0.7056,
|
27311 |
+
"step": 22560
|
27312 |
+
},
|
27313 |
+
{
|
27314 |
+
"epoch": 0.92,
|
27315 |
+
"learning_rate": 3.484574871903656e-08,
|
27316 |
+
"loss": 0.6647,
|
27317 |
+
"step": 22565
|
27318 |
+
},
|
27319 |
+
{
|
27320 |
+
"epoch": 0.92,
|
27321 |
+
"learning_rate": 3.467729313684153e-08,
|
27322 |
+
"loss": 0.6277,
|
27323 |
+
"step": 22570
|
27324 |
+
},
|
27325 |
+
{
|
27326 |
+
"epoch": 0.92,
|
27327 |
+
"learning_rate": 3.4509238541304384e-08,
|
27328 |
+
"loss": 0.6705,
|
27329 |
+
"step": 22575
|
27330 |
+
},
|
27331 |
+
{
|
27332 |
+
"epoch": 0.92,
|
27333 |
+
"learning_rate": 3.4341585002232945e-08,
|
27334 |
+
"loss": 0.654,
|
27335 |
+
"step": 22580
|
27336 |
+
},
|
27337 |
+
{
|
27338 |
+
"epoch": 0.92,
|
27339 |
+
"learning_rate": 3.4174332589269385e-08,
|
27340 |
+
"loss": 0.6352,
|
27341 |
+
"step": 22585
|
27342 |
+
},
|
27343 |
+
{
|
27344 |
+
"epoch": 0.92,
|
27345 |
+
"learning_rate": 3.4007481371888915e-08,
|
27346 |
+
"loss": 0.6517,
|
27347 |
+
"step": 22590
|
27348 |
+
},
|
27349 |
+
{
|
27350 |
+
"epoch": 0.92,
|
27351 |
+
"learning_rate": 3.384103141940009e-08,
|
27352 |
+
"loss": 0.6898,
|
27353 |
+
"step": 22595
|
27354 |
+
},
|
27355 |
+
{
|
27356 |
+
"epoch": 0.92,
|
27357 |
+
"learning_rate": 3.3674982800944604e-08,
|
27358 |
+
"loss": 0.6067,
|
27359 |
+
"step": 22600
|
27360 |
+
},
|
27361 |
+
{
|
27362 |
+
"epoch": 0.92,
|
27363 |
+
"learning_rate": 3.350933558549751e-08,
|
27364 |
+
"loss": 0.6752,
|
27365 |
+
"step": 22605
|
27366 |
+
},
|
27367 |
+
{
|
27368 |
+
"epoch": 0.92,
|
27369 |
+
"learning_rate": 3.334408984186765e-08,
|
27370 |
+
"loss": 0.664,
|
27371 |
+
"step": 22610
|
27372 |
+
},
|
27373 |
+
{
|
27374 |
+
"epoch": 0.92,
|
27375 |
+
"learning_rate": 3.317924563869634e-08,
|
27376 |
+
"loss": 0.6458,
|
27377 |
+
"step": 22615
|
27378 |
+
},
|
27379 |
+
{
|
27380 |
+
"epoch": 0.92,
|
27381 |
+
"learning_rate": 3.301480304445836e-08,
|
27382 |
+
"loss": 0.7108,
|
27383 |
+
"step": 22620
|
27384 |
+
},
|
27385 |
+
{
|
27386 |
+
"epoch": 0.92,
|
27387 |
+
"learning_rate": 3.2850762127462184e-08,
|
27388 |
+
"loss": 0.6561,
|
27389 |
+
"step": 22625
|
27390 |
+
},
|
27391 |
+
{
|
27392 |
+
"epoch": 0.92,
|
27393 |
+
"learning_rate": 3.268712295584841e-08,
|
27394 |
+
"loss": 0.7372,
|
27395 |
+
"step": 22630
|
27396 |
+
},
|
27397 |
+
{
|
27398 |
+
"epoch": 0.92,
|
27399 |
+
"learning_rate": 3.252388559759156e-08,
|
27400 |
+
"loss": 0.6515,
|
27401 |
+
"step": 22635
|
27402 |
+
},
|
27403 |
+
{
|
27404 |
+
"epoch": 0.92,
|
27405 |
+
"learning_rate": 3.2361050120499275e-08,
|
27406 |
+
"loss": 0.6417,
|
27407 |
+
"step": 22640
|
27408 |
+
},
|
27409 |
+
{
|
27410 |
+
"epoch": 0.92,
|
27411 |
+
"learning_rate": 3.219861659221168e-08,
|
27412 |
+
"loss": 0.6537,
|
27413 |
+
"step": 22645
|
27414 |
+
},
|
27415 |
+
{
|
27416 |
+
"epoch": 0.92,
|
27417 |
+
"learning_rate": 3.203658508020235e-08,
|
27418 |
+
"loss": 0.6385,
|
27419 |
+
"step": 22650
|
27420 |
+
},
|
27421 |
+
{
|
27422 |
+
"epoch": 0.92,
|
27423 |
+
"learning_rate": 3.1874955651777667e-08,
|
27424 |
+
"loss": 0.6112,
|
27425 |
+
"step": 22655
|
27426 |
+
},
|
27427 |
+
{
|
27428 |
+
"epoch": 0.92,
|
27429 |
+
"learning_rate": 3.171372837407738e-08,
|
27430 |
+
"loss": 0.6437,
|
27431 |
+
"step": 22660
|
27432 |
+
},
|
27433 |
+
{
|
27434 |
+
"epoch": 0.92,
|
27435 |
+
"learning_rate": 3.155290331407357e-08,
|
27436 |
+
"loss": 0.6222,
|
27437 |
+
"step": 22665
|
27438 |
+
},
|
27439 |
+
{
|
27440 |
+
"epoch": 0.92,
|
27441 |
+
"learning_rate": 3.1392480538571574e-08,
|
27442 |
+
"loss": 0.6439,
|
27443 |
+
"step": 22670
|
27444 |
+
},
|
27445 |
+
{
|
27446 |
+
"epoch": 0.92,
|
27447 |
+
"learning_rate": 3.123246011420999e-08,
|
27448 |
+
"loss": 0.6528,
|
27449 |
+
"step": 22675
|
27450 |
+
},
|
27451 |
+
{
|
27452 |
+
"epoch": 0.92,
|
27453 |
+
"learning_rate": 3.107284210745953e-08,
|
27454 |
+
"loss": 0.6654,
|
27455 |
+
"step": 22680
|
27456 |
+
},
|
27457 |
+
{
|
27458 |
+
"epoch": 0.92,
|
27459 |
+
"learning_rate": 3.0913626584624266e-08,
|
27460 |
+
"loss": 0.6474,
|
27461 |
+
"step": 22685
|
27462 |
+
},
|
27463 |
+
{
|
27464 |
+
"epoch": 0.92,
|
27465 |
+
"learning_rate": 3.0754813611840846e-08,
|
27466 |
+
"loss": 0.6528,
|
27467 |
+
"step": 22690
|
27468 |
+
},
|
27469 |
+
{
|
27470 |
+
"epoch": 0.92,
|
27471 |
+
"learning_rate": 3.0596403255078954e-08,
|
27472 |
+
"loss": 0.6708,
|
27473 |
+
"step": 22695
|
27474 |
+
},
|
27475 |
+
{
|
27476 |
+
"epoch": 0.92,
|
27477 |
+
"learning_rate": 3.043839558014083e-08,
|
27478 |
+
"loss": 0.6365,
|
27479 |
+
"step": 22700
|
27480 |
+
},
|
27481 |
+
{
|
27482 |
+
"epoch": 0.92,
|
27483 |
+
"learning_rate": 3.028079065266142e-08,
|
27484 |
+
"loss": 0.6687,
|
27485 |
+
"step": 22705
|
27486 |
+
},
|
27487 |
+
{
|
27488 |
+
"epoch": 0.92,
|
27489 |
+
"learning_rate": 3.012358853810859e-08,
|
27490 |
+
"loss": 0.6645,
|
27491 |
+
"step": 22710
|
27492 |
+
},
|
27493 |
+
{
|
27494 |
+
"epoch": 0.92,
|
27495 |
+
"learning_rate": 2.9966789301782535e-08,
|
27496 |
+
"loss": 0.6366,
|
27497 |
+
"step": 22715
|
27498 |
+
},
|
27499 |
+
{
|
27500 |
+
"epoch": 0.92,
|
27501 |
+
"learning_rate": 2.981039300881627e-08,
|
27502 |
+
"loss": 0.6575,
|
27503 |
+
"step": 22720
|
27504 |
+
},
|
27505 |
+
{
|
27506 |
+
"epoch": 0.92,
|
27507 |
+
"learning_rate": 2.9654399724175828e-08,
|
27508 |
+
"loss": 0.6966,
|
27509 |
+
"step": 22725
|
27510 |
+
},
|
27511 |
+
{
|
27512 |
+
"epoch": 0.92,
|
27513 |
+
"learning_rate": 2.949880951265904e-08,
|
27514 |
+
"loss": 0.6842,
|
27515 |
+
"step": 22730
|
27516 |
+
},
|
27517 |
+
{
|
27518 |
+
"epoch": 0.92,
|
27519 |
+
"learning_rate": 2.9343622438896875e-08,
|
27520 |
+
"loss": 0.6586,
|
27521 |
+
"step": 22735
|
27522 |
+
},
|
27523 |
+
{
|
27524 |
+
"epoch": 0.92,
|
27525 |
+
"learning_rate": 2.918883856735277e-08,
|
27526 |
+
"loss": 0.6629,
|
27527 |
+
"step": 22740
|
27528 |
+
},
|
27529 |
+
{
|
27530 |
+
"epoch": 0.92,
|
27531 |
+
"learning_rate": 2.9034457962322513e-08,
|
27532 |
+
"loss": 0.6283,
|
27533 |
+
"step": 22745
|
27534 |
+
},
|
27535 |
+
{
|
27536 |
+
"epoch": 0.92,
|
27537 |
+
"learning_rate": 2.8880480687934473e-08,
|
27538 |
+
"loss": 0.6272,
|
27539 |
+
"step": 22750
|
27540 |
+
},
|
27541 |
+
{
|
27542 |
+
"epoch": 0.92,
|
27543 |
+
"learning_rate": 2.8726906808149486e-08,
|
27544 |
+
"loss": 0.616,
|
27545 |
+
"step": 22755
|
27546 |
+
},
|
27547 |
+
{
|
27548 |
+
"epoch": 0.92,
|
27549 |
+
"learning_rate": 2.857373638676097e-08,
|
27550 |
+
"loss": 0.6605,
|
27551 |
+
"step": 22760
|
27552 |
+
},
|
27553 |
+
{
|
27554 |
+
"epoch": 0.92,
|
27555 |
+
"learning_rate": 2.8420969487394143e-08,
|
27556 |
+
"loss": 0.6212,
|
27557 |
+
"step": 22765
|
27558 |
+
},
|
27559 |
+
{
|
27560 |
+
"epoch": 0.92,
|
27561 |
+
"learning_rate": 2.826860617350746e-08,
|
27562 |
+
"loss": 0.6844,
|
27563 |
+
"step": 22770
|
27564 |
+
},
|
27565 |
+
{
|
27566 |
+
"epoch": 0.93,
|
27567 |
+
"learning_rate": 2.8116646508391183e-08,
|
27568 |
+
"loss": 0.645,
|
27569 |
+
"step": 22775
|
27570 |
+
},
|
27571 |
+
{
|
27572 |
+
"epoch": 0.93,
|
27573 |
+
"learning_rate": 2.7965090555168047e-08,
|
27574 |
+
"loss": 0.6497,
|
27575 |
+
"step": 22780
|
27576 |
+
},
|
27577 |
+
{
|
27578 |
+
"epoch": 0.93,
|
27579 |
+
"learning_rate": 2.7813938376793134e-08,
|
27580 |
+
"loss": 0.6361,
|
27581 |
+
"step": 22785
|
27582 |
+
},
|
27583 |
+
{
|
27584 |
+
"epoch": 0.93,
|
27585 |
+
"learning_rate": 2.7663190036053552e-08,
|
27586 |
+
"loss": 0.5859,
|
27587 |
+
"step": 22790
|
27588 |
+
},
|
27589 |
+
{
|
27590 |
+
"epoch": 0.93,
|
27591 |
+
"learning_rate": 2.75128455955691e-08,
|
27592 |
+
"loss": 0.6026,
|
27593 |
+
"step": 22795
|
27594 |
+
},
|
27595 |
+
{
|
27596 |
+
"epoch": 0.93,
|
27597 |
+
"learning_rate": 2.7362905117791268e-08,
|
27598 |
+
"loss": 0.6694,
|
27599 |
+
"step": 22800
|
27600 |
}
|
27601 |
],
|
27602 |
"logging_steps": 5,
|
|
|
27604 |
"num_input_tokens_seen": 0,
|
27605 |
"num_train_epochs": 1,
|
27606 |
"save_steps": 400,
|
27607 |
+
"total_flos": 3187222391169024.0,
|
27608 |
"trial_name": null,
|
27609 |
"trial_params": null
|
27610 |
}
|