Upload folder using huggingface_hub
Browse files- model.safetensors +1 -1
- optimizer.pt +1 -1
- rng_state_0.pth +1 -1
- rng_state_1.pth +1 -1
- scheduler.pt +1 -1
- trainer_state.json +1221 -5
model.safetensors
CHANGED
@@ -1,3 +1,3 @@
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
-
oid sha256:
|
3 |
size 1421709600
|
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:0a4dc01a62180b671235df605bd949d91ee5e79a277541e196862797a12cb410
|
3 |
size 1421709600
|
optimizer.pt
CHANGED
@@ -1,3 +1,3 @@
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
-
oid sha256:
|
3 |
size 712762106
|
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:caa571c3b6266b8d7489d60262f29cb4d0642227fe03ea07f48f0ec877638e5f
|
3 |
size 712762106
|
rng_state_0.pth
CHANGED
@@ -1,3 +1,3 @@
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
-
oid sha256:
|
3 |
size 14512
|
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:33cecfafe0d11e62c18063727af52cc3be05e3a270099438df514f675a702b94
|
3 |
size 14512
|
rng_state_1.pth
CHANGED
@@ -1,3 +1,3 @@
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
-
oid sha256:
|
3 |
size 14512
|
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:988254c6cd2ce48fc90f3031675157ff19bb6013dae9b99c16a8a82f7dba3fd7
|
3 |
size 14512
|
scheduler.pt
CHANGED
@@ -1,3 +1,3 @@
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
-
oid sha256:
|
3 |
size 1064
|
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:1c34cc6321076d6d0f8c53d44b0a1cde5a1527d5c424cf5e2d4fffd6d91186ef
|
3 |
size 1064
|
trainer_state.json
CHANGED
@@ -1,9 +1,9 @@
|
|
1 |
{
|
2 |
-
"best_metric": 2.
|
3 |
-
"best_model_checkpoint": "checkpoints-mistral-300M/checkpoint-
|
4 |
-
"epoch": 13.
|
5 |
"eval_steps": 1000,
|
6 |
-
"global_step":
|
7 |
"is_hyper_param_search": false,
|
8 |
"is_local_process_zero": true,
|
9 |
"is_world_process_zero": true,
|
@@ -61415,13 +61415,1229 @@
|
|
61415 |
"eval_samples_per_second": 59.929,
|
61416 |
"eval_steps_per_second": 4.994,
|
61417 |
"step": 101000
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
61418 |
}
|
61419 |
],
|
61420 |
"logging_steps": 10,
|
61421 |
"max_steps": 111060,
|
61422 |
"num_train_epochs": 15,
|
61423 |
"save_steps": 1000,
|
61424 |
-
"total_flos": 3.
|
61425 |
"trial_name": null,
|
61426 |
"trial_params": null
|
61427 |
}
|
|
|
1 |
{
|
2 |
+
"best_metric": 2.39996337890625,
|
3 |
+
"best_model_checkpoint": "checkpoints-mistral-300M/checkpoint-103000",
|
4 |
+
"epoch": 13.910818886206721,
|
5 |
"eval_steps": 1000,
|
6 |
+
"global_step": 103000,
|
7 |
"is_hyper_param_search": false,
|
8 |
"is_local_process_zero": true,
|
9 |
"is_world_process_zero": true,
|
|
|
61415 |
"eval_samples_per_second": 59.929,
|
61416 |
"eval_steps_per_second": 4.994,
|
61417 |
"step": 101000
|
61418 |
+
},
|
61419 |
+
{
|
61420 |
+
"epoch": 13.64,
|
61421 |
+
"learning_rate": 6.052167733471941e-06,
|
61422 |
+
"loss": 2.2055,
|
61423 |
+
"step": 101010
|
61424 |
+
},
|
61425 |
+
{
|
61426 |
+
"epoch": 13.64,
|
61427 |
+
"learning_rate": 6.0402418995578684e-06,
|
61428 |
+
"loss": 2.1925,
|
61429 |
+
"step": 101020
|
61430 |
+
},
|
61431 |
+
{
|
61432 |
+
"epoch": 13.64,
|
61433 |
+
"learning_rate": 6.028327585758325e-06,
|
61434 |
+
"loss": 2.1854,
|
61435 |
+
"step": 101030
|
61436 |
+
},
|
61437 |
+
{
|
61438 |
+
"epoch": 13.65,
|
61439 |
+
"learning_rate": 6.016424793026714e-06,
|
61440 |
+
"loss": 2.1818,
|
61441 |
+
"step": 101040
|
61442 |
+
},
|
61443 |
+
{
|
61444 |
+
"epoch": 13.65,
|
61445 |
+
"learning_rate": 6.004533522315558e-06,
|
61446 |
+
"loss": 2.2056,
|
61447 |
+
"step": 101050
|
61448 |
+
},
|
61449 |
+
{
|
61450 |
+
"epoch": 13.65,
|
61451 |
+
"learning_rate": 5.9926537745764115e-06,
|
61452 |
+
"loss": 2.2048,
|
61453 |
+
"step": 101060
|
61454 |
+
},
|
61455 |
+
{
|
61456 |
+
"epoch": 13.65,
|
61457 |
+
"learning_rate": 5.980785550759965e-06,
|
61458 |
+
"loss": 2.1758,
|
61459 |
+
"step": 101070
|
61460 |
+
},
|
61461 |
+
{
|
61462 |
+
"epoch": 13.65,
|
61463 |
+
"learning_rate": 5.968928851815907e-06,
|
61464 |
+
"loss": 2.2104,
|
61465 |
+
"step": 101080
|
61466 |
+
},
|
61467 |
+
{
|
61468 |
+
"epoch": 13.65,
|
61469 |
+
"learning_rate": 5.957083678693081e-06,
|
61470 |
+
"loss": 2.197,
|
61471 |
+
"step": 101090
|
61472 |
+
},
|
61473 |
+
{
|
61474 |
+
"epoch": 13.65,
|
61475 |
+
"learning_rate": 5.94525003233936e-06,
|
61476 |
+
"loss": 2.197,
|
61477 |
+
"step": 101100
|
61478 |
+
},
|
61479 |
+
{
|
61480 |
+
"epoch": 13.66,
|
61481 |
+
"learning_rate": 5.933427913701722e-06,
|
61482 |
+
"loss": 2.2029,
|
61483 |
+
"step": 101110
|
61484 |
+
},
|
61485 |
+
{
|
61486 |
+
"epoch": 13.66,
|
61487 |
+
"learning_rate": 5.9216173237262075e-06,
|
61488 |
+
"loss": 2.1813,
|
61489 |
+
"step": 101120
|
61490 |
+
},
|
61491 |
+
{
|
61492 |
+
"epoch": 13.66,
|
61493 |
+
"learning_rate": 5.9098182633579125e-06,
|
61494 |
+
"loss": 2.2079,
|
61495 |
+
"step": 101130
|
61496 |
+
},
|
61497 |
+
{
|
61498 |
+
"epoch": 13.66,
|
61499 |
+
"learning_rate": 5.898030733541065e-06,
|
61500 |
+
"loss": 2.233,
|
61501 |
+
"step": 101140
|
61502 |
+
},
|
61503 |
+
{
|
61504 |
+
"epoch": 13.66,
|
61505 |
+
"learning_rate": 5.8862547352189424e-06,
|
61506 |
+
"loss": 2.1991,
|
61507 |
+
"step": 101150
|
61508 |
+
},
|
61509 |
+
{
|
61510 |
+
"epoch": 13.66,
|
61511 |
+
"learning_rate": 5.874490269333859e-06,
|
61512 |
+
"loss": 2.2204,
|
61513 |
+
"step": 101160
|
61514 |
+
},
|
61515 |
+
{
|
61516 |
+
"epoch": 13.66,
|
61517 |
+
"learning_rate": 5.862737336827294e-06,
|
61518 |
+
"loss": 2.1827,
|
61519 |
+
"step": 101170
|
61520 |
+
},
|
61521 |
+
{
|
61522 |
+
"epoch": 13.67,
|
61523 |
+
"learning_rate": 5.850995938639697e-06,
|
61524 |
+
"loss": 2.1891,
|
61525 |
+
"step": 101180
|
61526 |
+
},
|
61527 |
+
{
|
61528 |
+
"epoch": 13.67,
|
61529 |
+
"learning_rate": 5.839266075710747e-06,
|
61530 |
+
"loss": 2.2242,
|
61531 |
+
"step": 101190
|
61532 |
+
},
|
61533 |
+
{
|
61534 |
+
"epoch": 13.67,
|
61535 |
+
"learning_rate": 5.8275477489789944e-06,
|
61536 |
+
"loss": 2.1684,
|
61537 |
+
"step": 101200
|
61538 |
+
},
|
61539 |
+
{
|
61540 |
+
"epoch": 13.67,
|
61541 |
+
"learning_rate": 5.815840959382223e-06,
|
61542 |
+
"loss": 2.232,
|
61543 |
+
"step": 101210
|
61544 |
+
},
|
61545 |
+
{
|
61546 |
+
"epoch": 13.67,
|
61547 |
+
"learning_rate": 5.804145707857266e-06,
|
61548 |
+
"loss": 2.1859,
|
61549 |
+
"step": 101220
|
61550 |
+
},
|
61551 |
+
{
|
61552 |
+
"epoch": 13.67,
|
61553 |
+
"learning_rate": 5.792461995340009e-06,
|
61554 |
+
"loss": 2.1793,
|
61555 |
+
"step": 101230
|
61556 |
+
},
|
61557 |
+
{
|
61558 |
+
"epoch": 13.67,
|
61559 |
+
"learning_rate": 5.780789822765402e-06,
|
61560 |
+
"loss": 2.201,
|
61561 |
+
"step": 101240
|
61562 |
+
},
|
61563 |
+
{
|
61564 |
+
"epoch": 13.67,
|
61565 |
+
"learning_rate": 5.769129191067467e-06,
|
61566 |
+
"loss": 2.2156,
|
61567 |
+
"step": 101250
|
61568 |
+
},
|
61569 |
+
{
|
61570 |
+
"epoch": 13.68,
|
61571 |
+
"learning_rate": 5.757480101179391e-06,
|
61572 |
+
"loss": 2.1748,
|
61573 |
+
"step": 101260
|
61574 |
+
},
|
61575 |
+
{
|
61576 |
+
"epoch": 13.68,
|
61577 |
+
"learning_rate": 5.745842554033309e-06,
|
61578 |
+
"loss": 2.1969,
|
61579 |
+
"step": 101270
|
61580 |
+
},
|
61581 |
+
{
|
61582 |
+
"epoch": 13.68,
|
61583 |
+
"learning_rate": 5.734216550560528e-06,
|
61584 |
+
"loss": 2.2004,
|
61585 |
+
"step": 101280
|
61586 |
+
},
|
61587 |
+
{
|
61588 |
+
"epoch": 13.68,
|
61589 |
+
"learning_rate": 5.7226020916913865e-06,
|
61590 |
+
"loss": 2.2341,
|
61591 |
+
"step": 101290
|
61592 |
+
},
|
61593 |
+
{
|
61594 |
+
"epoch": 13.68,
|
61595 |
+
"learning_rate": 5.710999178355291e-06,
|
61596 |
+
"loss": 2.2203,
|
61597 |
+
"step": 101300
|
61598 |
+
},
|
61599 |
+
{
|
61600 |
+
"epoch": 13.68,
|
61601 |
+
"learning_rate": 5.699407811480799e-06,
|
61602 |
+
"loss": 2.2237,
|
61603 |
+
"step": 101310
|
61604 |
+
},
|
61605 |
+
{
|
61606 |
+
"epoch": 13.68,
|
61607 |
+
"learning_rate": 5.687827991995436e-06,
|
61608 |
+
"loss": 2.2007,
|
61609 |
+
"step": 101320
|
61610 |
+
},
|
61611 |
+
{
|
61612 |
+
"epoch": 13.69,
|
61613 |
+
"learning_rate": 5.676259720825876e-06,
|
61614 |
+
"loss": 2.2128,
|
61615 |
+
"step": 101330
|
61616 |
+
},
|
61617 |
+
{
|
61618 |
+
"epoch": 13.69,
|
61619 |
+
"learning_rate": 5.664702998897847e-06,
|
61620 |
+
"loss": 2.2224,
|
61621 |
+
"step": 101340
|
61622 |
+
},
|
61623 |
+
{
|
61624 |
+
"epoch": 13.69,
|
61625 |
+
"learning_rate": 5.653157827136156e-06,
|
61626 |
+
"loss": 2.1993,
|
61627 |
+
"step": 101350
|
61628 |
+
},
|
61629 |
+
{
|
61630 |
+
"epoch": 13.69,
|
61631 |
+
"learning_rate": 5.641624206464668e-06,
|
61632 |
+
"loss": 2.1912,
|
61633 |
+
"step": 101360
|
61634 |
+
},
|
61635 |
+
{
|
61636 |
+
"epoch": 13.69,
|
61637 |
+
"learning_rate": 5.6301021378063586e-06,
|
61638 |
+
"loss": 2.217,
|
61639 |
+
"step": 101370
|
61640 |
+
},
|
61641 |
+
{
|
61642 |
+
"epoch": 13.69,
|
61643 |
+
"learning_rate": 5.618591622083258e-06,
|
61644 |
+
"loss": 2.2013,
|
61645 |
+
"step": 101380
|
61646 |
+
},
|
61647 |
+
{
|
61648 |
+
"epoch": 13.69,
|
61649 |
+
"learning_rate": 5.607092660216478e-06,
|
61650 |
+
"loss": 2.2333,
|
61651 |
+
"step": 101390
|
61652 |
+
},
|
61653 |
+
{
|
61654 |
+
"epoch": 13.69,
|
61655 |
+
"learning_rate": 5.595605253126184e-06,
|
61656 |
+
"loss": 2.2003,
|
61657 |
+
"step": 101400
|
61658 |
+
},
|
61659 |
+
{
|
61660 |
+
"epoch": 13.7,
|
61661 |
+
"learning_rate": 5.584129401731641e-06,
|
61662 |
+
"loss": 2.2048,
|
61663 |
+
"step": 101410
|
61664 |
+
},
|
61665 |
+
{
|
61666 |
+
"epoch": 13.7,
|
61667 |
+
"learning_rate": 5.572665106951179e-06,
|
61668 |
+
"loss": 2.1934,
|
61669 |
+
"step": 101420
|
61670 |
+
},
|
61671 |
+
{
|
61672 |
+
"epoch": 13.7,
|
61673 |
+
"learning_rate": 5.561212369702234e-06,
|
61674 |
+
"loss": 2.213,
|
61675 |
+
"step": 101430
|
61676 |
+
},
|
61677 |
+
{
|
61678 |
+
"epoch": 13.7,
|
61679 |
+
"learning_rate": 5.54977119090127e-06,
|
61680 |
+
"loss": 2.2165,
|
61681 |
+
"step": 101440
|
61682 |
+
},
|
61683 |
+
{
|
61684 |
+
"epoch": 13.7,
|
61685 |
+
"learning_rate": 5.538341571463839e-06,
|
61686 |
+
"loss": 2.2274,
|
61687 |
+
"step": 101450
|
61688 |
+
},
|
61689 |
+
{
|
61690 |
+
"epoch": 13.7,
|
61691 |
+
"learning_rate": 5.526923512304593e-06,
|
61692 |
+
"loss": 2.1762,
|
61693 |
+
"step": 101460
|
61694 |
+
},
|
61695 |
+
{
|
61696 |
+
"epoch": 13.7,
|
61697 |
+
"learning_rate": 5.5155170143372174e-06,
|
61698 |
+
"loss": 2.2149,
|
61699 |
+
"step": 101470
|
61700 |
+
},
|
61701 |
+
{
|
61702 |
+
"epoch": 13.71,
|
61703 |
+
"learning_rate": 5.504122078474516e-06,
|
61704 |
+
"loss": 2.2082,
|
61705 |
+
"step": 101480
|
61706 |
+
},
|
61707 |
+
{
|
61708 |
+
"epoch": 13.71,
|
61709 |
+
"learning_rate": 5.492738705628341e-06,
|
61710 |
+
"loss": 2.2212,
|
61711 |
+
"step": 101490
|
61712 |
+
},
|
61713 |
+
{
|
61714 |
+
"epoch": 13.71,
|
61715 |
+
"learning_rate": 5.481366896709616e-06,
|
61716 |
+
"loss": 2.1909,
|
61717 |
+
"step": 101500
|
61718 |
+
},
|
61719 |
+
{
|
61720 |
+
"epoch": 13.71,
|
61721 |
+
"learning_rate": 5.470006652628378e-06,
|
61722 |
+
"loss": 2.179,
|
61723 |
+
"step": 101510
|
61724 |
+
},
|
61725 |
+
{
|
61726 |
+
"epoch": 13.71,
|
61727 |
+
"learning_rate": 5.458657974293701e-06,
|
61728 |
+
"loss": 2.2055,
|
61729 |
+
"step": 101520
|
61730 |
+
},
|
61731 |
+
{
|
61732 |
+
"epoch": 13.71,
|
61733 |
+
"learning_rate": 5.4473208626137076e-06,
|
61734 |
+
"loss": 2.2205,
|
61735 |
+
"step": 101530
|
61736 |
+
},
|
61737 |
+
{
|
61738 |
+
"epoch": 13.71,
|
61739 |
+
"learning_rate": 5.435995318495656e-06,
|
61740 |
+
"loss": 2.1771,
|
61741 |
+
"step": 101540
|
61742 |
+
},
|
61743 |
+
{
|
61744 |
+
"epoch": 13.72,
|
61745 |
+
"learning_rate": 5.424681342845871e-06,
|
61746 |
+
"loss": 2.2074,
|
61747 |
+
"step": 101550
|
61748 |
+
},
|
61749 |
+
{
|
61750 |
+
"epoch": 13.72,
|
61751 |
+
"learning_rate": 5.413378936569695e-06,
|
61752 |
+
"loss": 2.1829,
|
61753 |
+
"step": 101560
|
61754 |
+
},
|
61755 |
+
{
|
61756 |
+
"epoch": 13.72,
|
61757 |
+
"learning_rate": 5.402088100571606e-06,
|
61758 |
+
"loss": 2.1937,
|
61759 |
+
"step": 101570
|
61760 |
+
},
|
61761 |
+
{
|
61762 |
+
"epoch": 13.72,
|
61763 |
+
"learning_rate": 5.390808835755111e-06,
|
61764 |
+
"loss": 2.2218,
|
61765 |
+
"step": 101580
|
61766 |
+
},
|
61767 |
+
{
|
61768 |
+
"epoch": 13.72,
|
61769 |
+
"learning_rate": 5.379541143022859e-06,
|
61770 |
+
"loss": 2.2359,
|
61771 |
+
"step": 101590
|
61772 |
+
},
|
61773 |
+
{
|
61774 |
+
"epoch": 13.72,
|
61775 |
+
"learning_rate": 5.368285023276458e-06,
|
61776 |
+
"loss": 2.1684,
|
61777 |
+
"step": 101600
|
61778 |
+
},
|
61779 |
+
{
|
61780 |
+
"epoch": 13.72,
|
61781 |
+
"learning_rate": 5.357040477416724e-06,
|
61782 |
+
"loss": 2.1749,
|
61783 |
+
"step": 101610
|
61784 |
+
},
|
61785 |
+
{
|
61786 |
+
"epoch": 13.72,
|
61787 |
+
"learning_rate": 5.345807506343419e-06,
|
61788 |
+
"loss": 2.1799,
|
61789 |
+
"step": 101620
|
61790 |
+
},
|
61791 |
+
{
|
61792 |
+
"epoch": 13.73,
|
61793 |
+
"learning_rate": 5.334586110955508e-06,
|
61794 |
+
"loss": 2.2011,
|
61795 |
+
"step": 101630
|
61796 |
+
},
|
61797 |
+
{
|
61798 |
+
"epoch": 13.73,
|
61799 |
+
"learning_rate": 5.3233762921509215e-06,
|
61800 |
+
"loss": 2.2168,
|
61801 |
+
"step": 101640
|
61802 |
+
},
|
61803 |
+
{
|
61804 |
+
"epoch": 13.73,
|
61805 |
+
"learning_rate": 5.3121780508266766e-06,
|
61806 |
+
"loss": 2.2133,
|
61807 |
+
"step": 101650
|
61808 |
+
},
|
61809 |
+
{
|
61810 |
+
"epoch": 13.73,
|
61811 |
+
"learning_rate": 5.300991387878972e-06,
|
61812 |
+
"loss": 2.22,
|
61813 |
+
"step": 101660
|
61814 |
+
},
|
61815 |
+
{
|
61816 |
+
"epoch": 13.73,
|
61817 |
+
"learning_rate": 5.289816304202927e-06,
|
61818 |
+
"loss": 2.2098,
|
61819 |
+
"step": 101670
|
61820 |
+
},
|
61821 |
+
{
|
61822 |
+
"epoch": 13.73,
|
61823 |
+
"learning_rate": 5.27865280069284e-06,
|
61824 |
+
"loss": 2.2029,
|
61825 |
+
"step": 101680
|
61826 |
+
},
|
61827 |
+
{
|
61828 |
+
"epoch": 13.73,
|
61829 |
+
"learning_rate": 5.267500878242048e-06,
|
61830 |
+
"loss": 2.1947,
|
61831 |
+
"step": 101690
|
61832 |
+
},
|
61833 |
+
{
|
61834 |
+
"epoch": 13.74,
|
61835 |
+
"learning_rate": 5.256360537742937e-06,
|
61836 |
+
"loss": 2.2075,
|
61837 |
+
"step": 101700
|
61838 |
+
},
|
61839 |
+
{
|
61840 |
+
"epoch": 13.74,
|
61841 |
+
"learning_rate": 5.245231780087044e-06,
|
61842 |
+
"loss": 2.201,
|
61843 |
+
"step": 101710
|
61844 |
+
},
|
61845 |
+
{
|
61846 |
+
"epoch": 13.74,
|
61847 |
+
"learning_rate": 5.2341146061648905e-06,
|
61848 |
+
"loss": 2.184,
|
61849 |
+
"step": 101720
|
61850 |
+
},
|
61851 |
+
{
|
61852 |
+
"epoch": 13.74,
|
61853 |
+
"learning_rate": 5.223009016866114e-06,
|
61854 |
+
"loss": 2.2246,
|
61855 |
+
"step": 101730
|
61856 |
+
},
|
61857 |
+
{
|
61858 |
+
"epoch": 13.74,
|
61859 |
+
"learning_rate": 5.2119150130794205e-06,
|
61860 |
+
"loss": 2.2135,
|
61861 |
+
"step": 101740
|
61862 |
+
},
|
61863 |
+
{
|
61864 |
+
"epoch": 13.74,
|
61865 |
+
"learning_rate": 5.200832595692583e-06,
|
61866 |
+
"loss": 2.2184,
|
61867 |
+
"step": 101750
|
61868 |
+
},
|
61869 |
+
{
|
61870 |
+
"epoch": 13.74,
|
61871 |
+
"learning_rate": 5.189761765592443e-06,
|
61872 |
+
"loss": 2.2135,
|
61873 |
+
"step": 101760
|
61874 |
+
},
|
61875 |
+
{
|
61876 |
+
"epoch": 13.74,
|
61877 |
+
"learning_rate": 5.178702523664957e-06,
|
61878 |
+
"loss": 2.1781,
|
61879 |
+
"step": 101770
|
61880 |
+
},
|
61881 |
+
{
|
61882 |
+
"epoch": 13.75,
|
61883 |
+
"learning_rate": 5.167654870795085e-06,
|
61884 |
+
"loss": 2.2232,
|
61885 |
+
"step": 101780
|
61886 |
+
},
|
61887 |
+
{
|
61888 |
+
"epoch": 13.75,
|
61889 |
+
"learning_rate": 5.156618807866936e-06,
|
61890 |
+
"loss": 2.2125,
|
61891 |
+
"step": 101790
|
61892 |
+
},
|
61893 |
+
{
|
61894 |
+
"epoch": 13.75,
|
61895 |
+
"learning_rate": 5.1455943357635864e-06,
|
61896 |
+
"loss": 2.2199,
|
61897 |
+
"step": 101800
|
61898 |
+
},
|
61899 |
+
{
|
61900 |
+
"epoch": 13.75,
|
61901 |
+
"learning_rate": 5.134581455367315e-06,
|
61902 |
+
"loss": 2.209,
|
61903 |
+
"step": 101810
|
61904 |
+
},
|
61905 |
+
{
|
61906 |
+
"epoch": 13.75,
|
61907 |
+
"learning_rate": 5.123580167559349e-06,
|
61908 |
+
"loss": 2.2137,
|
61909 |
+
"step": 101820
|
61910 |
+
},
|
61911 |
+
{
|
61912 |
+
"epoch": 13.75,
|
61913 |
+
"learning_rate": 5.112590473220102e-06,
|
61914 |
+
"loss": 2.203,
|
61915 |
+
"step": 101830
|
61916 |
+
},
|
61917 |
+
{
|
61918 |
+
"epoch": 13.75,
|
61919 |
+
"learning_rate": 5.101612373228969e-06,
|
61920 |
+
"loss": 2.1788,
|
61921 |
+
"step": 101840
|
61922 |
+
},
|
61923 |
+
{
|
61924 |
+
"epoch": 13.76,
|
61925 |
+
"learning_rate": 5.090645868464449e-06,
|
61926 |
+
"loss": 2.2026,
|
61927 |
+
"step": 101850
|
61928 |
+
},
|
61929 |
+
{
|
61930 |
+
"epoch": 13.76,
|
61931 |
+
"learning_rate": 5.079690959804156e-06,
|
61932 |
+
"loss": 2.2249,
|
61933 |
+
"step": 101860
|
61934 |
+
},
|
61935 |
+
{
|
61936 |
+
"epoch": 13.76,
|
61937 |
+
"learning_rate": 5.069841457403523e-06,
|
61938 |
+
"loss": 2.1968,
|
61939 |
+
"step": 101870
|
61940 |
+
},
|
61941 |
+
{
|
61942 |
+
"epoch": 13.76,
|
61943 |
+
"learning_rate": 5.058908583755594e-06,
|
61944 |
+
"loss": 2.2105,
|
61945 |
+
"step": 101880
|
61946 |
+
},
|
61947 |
+
{
|
61948 |
+
"epoch": 13.76,
|
61949 |
+
"learning_rate": 5.047987308751561e-06,
|
61950 |
+
"loss": 2.1889,
|
61951 |
+
"step": 101890
|
61952 |
+
},
|
61953 |
+
{
|
61954 |
+
"epoch": 13.76,
|
61955 |
+
"learning_rate": 5.037077633265424e-06,
|
61956 |
+
"loss": 2.2238,
|
61957 |
+
"step": 101900
|
61958 |
+
},
|
61959 |
+
{
|
61960 |
+
"epoch": 13.76,
|
61961 |
+
"learning_rate": 5.026179558170185e-06,
|
61962 |
+
"loss": 2.203,
|
61963 |
+
"step": 101910
|
61964 |
+
},
|
61965 |
+
{
|
61966 |
+
"epoch": 13.76,
|
61967 |
+
"learning_rate": 5.015293084337929e-06,
|
61968 |
+
"loss": 2.2099,
|
61969 |
+
"step": 101920
|
61970 |
+
},
|
61971 |
+
{
|
61972 |
+
"epoch": 13.77,
|
61973 |
+
"learning_rate": 5.004418212639844e-06,
|
61974 |
+
"loss": 2.2041,
|
61975 |
+
"step": 101930
|
61976 |
+
},
|
61977 |
+
{
|
61978 |
+
"epoch": 13.77,
|
61979 |
+
"learning_rate": 4.993554943946182e-06,
|
61980 |
+
"loss": 2.219,
|
61981 |
+
"step": 101940
|
61982 |
+
},
|
61983 |
+
{
|
61984 |
+
"epoch": 13.77,
|
61985 |
+
"learning_rate": 4.982703279126232e-06,
|
61986 |
+
"loss": 2.1971,
|
61987 |
+
"step": 101950
|
61988 |
+
},
|
61989 |
+
{
|
61990 |
+
"epoch": 13.77,
|
61991 |
+
"learning_rate": 4.971863219048383e-06,
|
61992 |
+
"loss": 2.1991,
|
61993 |
+
"step": 101960
|
61994 |
+
},
|
61995 |
+
{
|
61996 |
+
"epoch": 13.77,
|
61997 |
+
"learning_rate": 4.9610347645800904e-06,
|
61998 |
+
"loss": 2.2002,
|
61999 |
+
"step": 101970
|
62000 |
+
},
|
62001 |
+
{
|
62002 |
+
"epoch": 13.77,
|
62003 |
+
"learning_rate": 4.950217916587912e-06,
|
62004 |
+
"loss": 2.1817,
|
62005 |
+
"step": 101980
|
62006 |
+
},
|
62007 |
+
{
|
62008 |
+
"epoch": 13.77,
|
62009 |
+
"learning_rate": 4.939412675937405e-06,
|
62010 |
+
"loss": 2.2307,
|
62011 |
+
"step": 101990
|
62012 |
+
},
|
62013 |
+
{
|
62014 |
+
"epoch": 13.78,
|
62015 |
+
"learning_rate": 4.928619043493259e-06,
|
62016 |
+
"loss": 2.2212,
|
62017 |
+
"step": 102000
|
62018 |
+
},
|
62019 |
+
{
|
62020 |
+
"epoch": 13.78,
|
62021 |
+
"eval_loss": 2.400679111480713,
|
62022 |
+
"eval_runtime": 1953.3809,
|
62023 |
+
"eval_samples_per_second": 38.976,
|
62024 |
+
"eval_steps_per_second": 3.248,
|
62025 |
+
"step": 102000
|
62026 |
+
},
|
62027 |
+
{
|
62028 |
+
"epoch": 13.78,
|
62029 |
+
"learning_rate": 4.917837020119203e-06,
|
62030 |
+
"loss": 2.2046,
|
62031 |
+
"step": 102010
|
62032 |
+
},
|
62033 |
+
{
|
62034 |
+
"epoch": 13.78,
|
62035 |
+
"learning_rate": 4.907066606678062e-06,
|
62036 |
+
"loss": 2.2169,
|
62037 |
+
"step": 102020
|
62038 |
+
},
|
62039 |
+
{
|
62040 |
+
"epoch": 13.78,
|
62041 |
+
"learning_rate": 4.896307804031713e-06,
|
62042 |
+
"loss": 2.2054,
|
62043 |
+
"step": 102030
|
62044 |
+
},
|
62045 |
+
{
|
62046 |
+
"epoch": 13.78,
|
62047 |
+
"learning_rate": 4.885560613041101e-06,
|
62048 |
+
"loss": 2.2078,
|
62049 |
+
"step": 102040
|
62050 |
+
},
|
62051 |
+
{
|
62052 |
+
"epoch": 13.78,
|
62053 |
+
"learning_rate": 4.874825034566271e-06,
|
62054 |
+
"loss": 2.2067,
|
62055 |
+
"step": 102050
|
62056 |
+
},
|
62057 |
+
{
|
62058 |
+
"epoch": 13.78,
|
62059 |
+
"learning_rate": 4.86410106946632e-06,
|
62060 |
+
"loss": 2.234,
|
62061 |
+
"step": 102060
|
62062 |
+
},
|
62063 |
+
{
|
62064 |
+
"epoch": 13.79,
|
62065 |
+
"learning_rate": 4.8533887185993604e-06,
|
62066 |
+
"loss": 2.2144,
|
62067 |
+
"step": 102070
|
62068 |
+
},
|
62069 |
+
{
|
62070 |
+
"epoch": 13.79,
|
62071 |
+
"learning_rate": 4.842687982822691e-06,
|
62072 |
+
"loss": 2.2134,
|
62073 |
+
"step": 102080
|
62074 |
+
},
|
62075 |
+
{
|
62076 |
+
"epoch": 13.79,
|
62077 |
+
"learning_rate": 4.831998862992592e-06,
|
62078 |
+
"loss": 2.2034,
|
62079 |
+
"step": 102090
|
62080 |
+
},
|
62081 |
+
{
|
62082 |
+
"epoch": 13.79,
|
62083 |
+
"learning_rate": 4.821321359964447e-06,
|
62084 |
+
"loss": 2.1982,
|
62085 |
+
"step": 102100
|
62086 |
+
},
|
62087 |
+
{
|
62088 |
+
"epoch": 13.79,
|
62089 |
+
"learning_rate": 4.81065547459269e-06,
|
62090 |
+
"loss": 2.2427,
|
62091 |
+
"step": 102110
|
62092 |
+
},
|
62093 |
+
{
|
62094 |
+
"epoch": 13.79,
|
62095 |
+
"learning_rate": 4.800001207730836e-06,
|
62096 |
+
"loss": 2.234,
|
62097 |
+
"step": 102120
|
62098 |
+
},
|
62099 |
+
{
|
62100 |
+
"epoch": 13.79,
|
62101 |
+
"learning_rate": 4.789358560231521e-06,
|
62102 |
+
"loss": 2.2076,
|
62103 |
+
"step": 102130
|
62104 |
+
},
|
62105 |
+
{
|
62106 |
+
"epoch": 13.79,
|
62107 |
+
"learning_rate": 4.778727532946347e-06,
|
62108 |
+
"loss": 2.2022,
|
62109 |
+
"step": 102140
|
62110 |
+
},
|
62111 |
+
{
|
62112 |
+
"epoch": 13.8,
|
62113 |
+
"learning_rate": 4.768108126726067e-06,
|
62114 |
+
"loss": 2.2359,
|
62115 |
+
"step": 102150
|
62116 |
+
},
|
62117 |
+
{
|
62118 |
+
"epoch": 13.8,
|
62119 |
+
"learning_rate": 4.7575003424204675e-06,
|
62120 |
+
"loss": 2.2241,
|
62121 |
+
"step": 102160
|
62122 |
+
},
|
62123 |
+
{
|
62124 |
+
"epoch": 13.8,
|
62125 |
+
"learning_rate": 4.7469041808784195e-06,
|
62126 |
+
"loss": 2.211,
|
62127 |
+
"step": 102170
|
62128 |
+
},
|
62129 |
+
{
|
62130 |
+
"epoch": 13.8,
|
62131 |
+
"learning_rate": 4.736319642947895e-06,
|
62132 |
+
"loss": 2.2132,
|
62133 |
+
"step": 102180
|
62134 |
+
},
|
62135 |
+
{
|
62136 |
+
"epoch": 13.8,
|
62137 |
+
"learning_rate": 4.725746729475849e-06,
|
62138 |
+
"loss": 2.1927,
|
62139 |
+
"step": 102190
|
62140 |
+
},
|
62141 |
+
{
|
62142 |
+
"epoch": 13.8,
|
62143 |
+
"learning_rate": 4.715185441308389e-06,
|
62144 |
+
"loss": 2.215,
|
62145 |
+
"step": 102200
|
62146 |
+
},
|
62147 |
+
{
|
62148 |
+
"epoch": 13.8,
|
62149 |
+
"learning_rate": 4.704635779290655e-06,
|
62150 |
+
"loss": 2.1973,
|
62151 |
+
"step": 102210
|
62152 |
+
},
|
62153 |
+
{
|
62154 |
+
"epoch": 13.81,
|
62155 |
+
"learning_rate": 4.694097744266873e-06,
|
62156 |
+
"loss": 2.1824,
|
62157 |
+
"step": 102220
|
62158 |
+
},
|
62159 |
+
{
|
62160 |
+
"epoch": 13.81,
|
62161 |
+
"learning_rate": 4.683571337080333e-06,
|
62162 |
+
"loss": 2.2286,
|
62163 |
+
"step": 102230
|
62164 |
+
},
|
62165 |
+
{
|
62166 |
+
"epoch": 13.81,
|
62167 |
+
"learning_rate": 4.673056558573346e-06,
|
62168 |
+
"loss": 2.2037,
|
62169 |
+
"step": 102240
|
62170 |
+
},
|
62171 |
+
{
|
62172 |
+
"epoch": 13.81,
|
62173 |
+
"learning_rate": 4.662553409587405e-06,
|
62174 |
+
"loss": 2.2007,
|
62175 |
+
"step": 102250
|
62176 |
+
},
|
62177 |
+
{
|
62178 |
+
"epoch": 13.81,
|
62179 |
+
"learning_rate": 4.652061890962988e-06,
|
62180 |
+
"loss": 2.2184,
|
62181 |
+
"step": 102260
|
62182 |
+
},
|
62183 |
+
{
|
62184 |
+
"epoch": 13.81,
|
62185 |
+
"learning_rate": 4.641582003539606e-06,
|
62186 |
+
"loss": 2.197,
|
62187 |
+
"step": 102270
|
62188 |
+
},
|
62189 |
+
{
|
62190 |
+
"epoch": 13.81,
|
62191 |
+
"learning_rate": 4.6311137481559565e-06,
|
62192 |
+
"loss": 2.2,
|
62193 |
+
"step": 102280
|
62194 |
+
},
|
62195 |
+
{
|
62196 |
+
"epoch": 13.81,
|
62197 |
+
"learning_rate": 4.620657125649701e-06,
|
62198 |
+
"loss": 2.2164,
|
62199 |
+
"step": 102290
|
62200 |
+
},
|
62201 |
+
{
|
62202 |
+
"epoch": 13.82,
|
62203 |
+
"learning_rate": 4.610212136857672e-06,
|
62204 |
+
"loss": 2.1976,
|
62205 |
+
"step": 102300
|
62206 |
+
},
|
62207 |
+
{
|
62208 |
+
"epoch": 13.82,
|
62209 |
+
"learning_rate": 4.599778782615632e-06,
|
62210 |
+
"loss": 2.2099,
|
62211 |
+
"step": 102310
|
62212 |
+
},
|
62213 |
+
{
|
62214 |
+
"epoch": 13.82,
|
62215 |
+
"learning_rate": 4.589357063758514e-06,
|
62216 |
+
"loss": 2.2143,
|
62217 |
+
"step": 102320
|
62218 |
+
},
|
62219 |
+
{
|
62220 |
+
"epoch": 13.82,
|
62221 |
+
"learning_rate": 4.578946981120335e-06,
|
62222 |
+
"loss": 2.1881,
|
62223 |
+
"step": 102330
|
62224 |
+
},
|
62225 |
+
{
|
62226 |
+
"epoch": 13.82,
|
62227 |
+
"learning_rate": 4.568548535534111e-06,
|
62228 |
+
"loss": 2.2009,
|
62229 |
+
"step": 102340
|
62230 |
+
},
|
62231 |
+
{
|
62232 |
+
"epoch": 13.82,
|
62233 |
+
"learning_rate": 4.5581617278319604e-06,
|
62234 |
+
"loss": 2.2198,
|
62235 |
+
"step": 102350
|
62236 |
+
},
|
62237 |
+
{
|
62238 |
+
"epoch": 13.82,
|
62239 |
+
"learning_rate": 4.547786558845068e-06,
|
62240 |
+
"loss": 2.2031,
|
62241 |
+
"step": 102360
|
62242 |
+
},
|
62243 |
+
{
|
62244 |
+
"epoch": 13.83,
|
62245 |
+
"learning_rate": 4.537423029403686e-06,
|
62246 |
+
"loss": 2.1882,
|
62247 |
+
"step": 102370
|
62248 |
+
},
|
62249 |
+
{
|
62250 |
+
"epoch": 13.83,
|
62251 |
+
"learning_rate": 4.527071140337185e-06,
|
62252 |
+
"loss": 2.195,
|
62253 |
+
"step": 102380
|
62254 |
+
},
|
62255 |
+
{
|
62256 |
+
"epoch": 13.83,
|
62257 |
+
"learning_rate": 4.5167308924738685e-06,
|
62258 |
+
"loss": 2.205,
|
62259 |
+
"step": 102390
|
62260 |
+
},
|
62261 |
+
{
|
62262 |
+
"epoch": 13.83,
|
62263 |
+
"learning_rate": 4.506402286641259e-06,
|
62264 |
+
"loss": 2.1698,
|
62265 |
+
"step": 102400
|
62266 |
+
},
|
62267 |
+
{
|
62268 |
+
"epoch": 13.83,
|
62269 |
+
"learning_rate": 4.4960853236658435e-06,
|
62270 |
+
"loss": 2.1919,
|
62271 |
+
"step": 102410
|
62272 |
+
},
|
62273 |
+
{
|
62274 |
+
"epoch": 13.83,
|
62275 |
+
"learning_rate": 4.485780004373263e-06,
|
62276 |
+
"loss": 2.2087,
|
62277 |
+
"step": 102420
|
62278 |
+
},
|
62279 |
+
{
|
62280 |
+
"epoch": 13.83,
|
62281 |
+
"learning_rate": 4.475486329588157e-06,
|
62282 |
+
"loss": 2.202,
|
62283 |
+
"step": 102430
|
62284 |
+
},
|
62285 |
+
{
|
62286 |
+
"epoch": 13.84,
|
62287 |
+
"learning_rate": 4.4652043001342505e-06,
|
62288 |
+
"loss": 2.2202,
|
62289 |
+
"step": 102440
|
62290 |
+
},
|
62291 |
+
{
|
62292 |
+
"epoch": 13.84,
|
62293 |
+
"learning_rate": 4.454933916834369e-06,
|
62294 |
+
"loss": 2.172,
|
62295 |
+
"step": 102450
|
62296 |
+
},
|
62297 |
+
{
|
62298 |
+
"epoch": 13.84,
|
62299 |
+
"learning_rate": 4.4446751805103544e-06,
|
62300 |
+
"loss": 2.1928,
|
62301 |
+
"step": 102460
|
62302 |
+
},
|
62303 |
+
{
|
62304 |
+
"epoch": 13.84,
|
62305 |
+
"learning_rate": 4.43442809198315e-06,
|
62306 |
+
"loss": 2.2236,
|
62307 |
+
"step": 102470
|
62308 |
+
},
|
62309 |
+
{
|
62310 |
+
"epoch": 13.84,
|
62311 |
+
"learning_rate": 4.424192652072767e-06,
|
62312 |
+
"loss": 2.2181,
|
62313 |
+
"step": 102480
|
62314 |
+
},
|
62315 |
+
{
|
62316 |
+
"epoch": 13.84,
|
62317 |
+
"learning_rate": 4.413968861598266e-06,
|
62318 |
+
"loss": 2.2153,
|
62319 |
+
"step": 102490
|
62320 |
+
},
|
62321 |
+
{
|
62322 |
+
"epoch": 13.84,
|
62323 |
+
"learning_rate": 4.403756721377827e-06,
|
62324 |
+
"loss": 2.2056,
|
62325 |
+
"step": 102500
|
62326 |
+
},
|
62327 |
+
{
|
62328 |
+
"epoch": 13.84,
|
62329 |
+
"learning_rate": 4.393556232228596e-06,
|
62330 |
+
"loss": 2.1954,
|
62331 |
+
"step": 102510
|
62332 |
+
},
|
62333 |
+
{
|
62334 |
+
"epoch": 13.85,
|
62335 |
+
"learning_rate": 4.383367394966886e-06,
|
62336 |
+
"loss": 2.1726,
|
62337 |
+
"step": 102520
|
62338 |
+
},
|
62339 |
+
{
|
62340 |
+
"epoch": 13.85,
|
62341 |
+
"learning_rate": 4.373190210408045e-06,
|
62342 |
+
"loss": 2.1764,
|
62343 |
+
"step": 102530
|
62344 |
+
},
|
62345 |
+
{
|
62346 |
+
"epoch": 13.85,
|
62347 |
+
"learning_rate": 4.363024679366456e-06,
|
62348 |
+
"loss": 2.2037,
|
62349 |
+
"step": 102540
|
62350 |
+
},
|
62351 |
+
{
|
62352 |
+
"epoch": 13.85,
|
62353 |
+
"learning_rate": 4.352870802655633e-06,
|
62354 |
+
"loss": 2.1961,
|
62355 |
+
"step": 102550
|
62356 |
+
},
|
62357 |
+
{
|
62358 |
+
"epoch": 13.85,
|
62359 |
+
"learning_rate": 4.342728581088079e-06,
|
62360 |
+
"loss": 2.1842,
|
62361 |
+
"step": 102560
|
62362 |
+
},
|
62363 |
+
{
|
62364 |
+
"epoch": 13.85,
|
62365 |
+
"learning_rate": 4.332598015475425e-06,
|
62366 |
+
"loss": 2.1952,
|
62367 |
+
"step": 102570
|
62368 |
+
},
|
62369 |
+
{
|
62370 |
+
"epoch": 13.85,
|
62371 |
+
"learning_rate": 4.322479106628374e-06,
|
62372 |
+
"loss": 2.1948,
|
62373 |
+
"step": 102580
|
62374 |
+
},
|
62375 |
+
{
|
62376 |
+
"epoch": 13.86,
|
62377 |
+
"learning_rate": 4.3123718553566446e-06,
|
62378 |
+
"loss": 2.1408,
|
62379 |
+
"step": 102590
|
62380 |
+
},
|
62381 |
+
{
|
62382 |
+
"epoch": 13.86,
|
62383 |
+
"learning_rate": 4.302276262469056e-06,
|
62384 |
+
"loss": 2.2133,
|
62385 |
+
"step": 102600
|
62386 |
+
},
|
62387 |
+
{
|
62388 |
+
"epoch": 13.86,
|
62389 |
+
"learning_rate": 4.29219232877348e-06,
|
62390 |
+
"loss": 2.1878,
|
62391 |
+
"step": 102610
|
62392 |
+
},
|
62393 |
+
{
|
62394 |
+
"epoch": 13.86,
|
62395 |
+
"learning_rate": 4.282120055076904e-06,
|
62396 |
+
"loss": 2.2225,
|
62397 |
+
"step": 102620
|
62398 |
+
},
|
62399 |
+
{
|
62400 |
+
"epoch": 13.86,
|
62401 |
+
"learning_rate": 4.272059442185316e-06,
|
62402 |
+
"loss": 2.2082,
|
62403 |
+
"step": 102630
|
62404 |
+
},
|
62405 |
+
{
|
62406 |
+
"epoch": 13.86,
|
62407 |
+
"learning_rate": 4.262010490903772e-06,
|
62408 |
+
"loss": 2.1941,
|
62409 |
+
"step": 102640
|
62410 |
+
},
|
62411 |
+
{
|
62412 |
+
"epoch": 13.86,
|
62413 |
+
"learning_rate": 4.251973202036462e-06,
|
62414 |
+
"loss": 2.2062,
|
62415 |
+
"step": 102650
|
62416 |
+
},
|
62417 |
+
{
|
62418 |
+
"epoch": 13.86,
|
62419 |
+
"learning_rate": 4.241947576386595e-06,
|
62420 |
+
"loss": 2.1949,
|
62421 |
+
"step": 102660
|
62422 |
+
},
|
62423 |
+
{
|
62424 |
+
"epoch": 13.87,
|
62425 |
+
"learning_rate": 4.231933614756444e-06,
|
62426 |
+
"loss": 2.2056,
|
62427 |
+
"step": 102670
|
62428 |
+
},
|
62429 |
+
{
|
62430 |
+
"epoch": 13.87,
|
62431 |
+
"learning_rate": 4.221931317947352e-06,
|
62432 |
+
"loss": 2.2287,
|
62433 |
+
"step": 102680
|
62434 |
+
},
|
62435 |
+
{
|
62436 |
+
"epoch": 13.87,
|
62437 |
+
"learning_rate": 4.211940686759729e-06,
|
62438 |
+
"loss": 2.1932,
|
62439 |
+
"step": 102690
|
62440 |
+
},
|
62441 |
+
{
|
62442 |
+
"epoch": 13.87,
|
62443 |
+
"learning_rate": 4.201961721993119e-06,
|
62444 |
+
"loss": 2.196,
|
62445 |
+
"step": 102700
|
62446 |
+
},
|
62447 |
+
{
|
62448 |
+
"epoch": 13.87,
|
62449 |
+
"learning_rate": 4.191994424445966e-06,
|
62450 |
+
"loss": 2.2051,
|
62451 |
+
"step": 102710
|
62452 |
+
},
|
62453 |
+
{
|
62454 |
+
"epoch": 13.87,
|
62455 |
+
"learning_rate": 4.182038794915948e-06,
|
62456 |
+
"loss": 2.2149,
|
62457 |
+
"step": 102720
|
62458 |
+
},
|
62459 |
+
{
|
62460 |
+
"epoch": 13.87,
|
62461 |
+
"learning_rate": 4.1720948341997625e-06,
|
62462 |
+
"loss": 2.1769,
|
62463 |
+
"step": 102730
|
62464 |
+
},
|
62465 |
+
{
|
62466 |
+
"epoch": 13.88,
|
62467 |
+
"learning_rate": 4.162162543093123e-06,
|
62468 |
+
"loss": 2.2089,
|
62469 |
+
"step": 102740
|
62470 |
+
},
|
62471 |
+
{
|
62472 |
+
"epoch": 13.88,
|
62473 |
+
"learning_rate": 4.152241922390858e-06,
|
62474 |
+
"loss": 2.2152,
|
62475 |
+
"step": 102750
|
62476 |
+
},
|
62477 |
+
{
|
62478 |
+
"epoch": 13.88,
|
62479 |
+
"learning_rate": 4.14233297288682e-06,
|
62480 |
+
"loss": 2.1864,
|
62481 |
+
"step": 102760
|
62482 |
+
},
|
62483 |
+
{
|
62484 |
+
"epoch": 13.88,
|
62485 |
+
"learning_rate": 4.13243569537397e-06,
|
62486 |
+
"loss": 2.2047,
|
62487 |
+
"step": 102770
|
62488 |
+
},
|
62489 |
+
{
|
62490 |
+
"epoch": 13.88,
|
62491 |
+
"learning_rate": 4.122550090644361e-06,
|
62492 |
+
"loss": 2.2095,
|
62493 |
+
"step": 102780
|
62494 |
+
},
|
62495 |
+
{
|
62496 |
+
"epoch": 13.88,
|
62497 |
+
"learning_rate": 4.112676159488992e-06,
|
62498 |
+
"loss": 2.1928,
|
62499 |
+
"step": 102790
|
62500 |
+
},
|
62501 |
+
{
|
62502 |
+
"epoch": 13.88,
|
62503 |
+
"learning_rate": 4.102813902698082e-06,
|
62504 |
+
"loss": 2.2041,
|
62505 |
+
"step": 102800
|
62506 |
+
},
|
62507 |
+
{
|
62508 |
+
"epoch": 13.89,
|
62509 |
+
"learning_rate": 4.092963321060766e-06,
|
62510 |
+
"loss": 2.2078,
|
62511 |
+
"step": 102810
|
62512 |
+
},
|
62513 |
+
{
|
62514 |
+
"epoch": 13.89,
|
62515 |
+
"learning_rate": 4.0831244153653965e-06,
|
62516 |
+
"loss": 2.1855,
|
62517 |
+
"step": 102820
|
62518 |
+
},
|
62519 |
+
{
|
62520 |
+
"epoch": 13.89,
|
62521 |
+
"learning_rate": 4.07329718639926e-06,
|
62522 |
+
"loss": 2.2114,
|
62523 |
+
"step": 102830
|
62524 |
+
},
|
62525 |
+
{
|
62526 |
+
"epoch": 13.89,
|
62527 |
+
"learning_rate": 4.063481634948779e-06,
|
62528 |
+
"loss": 2.2252,
|
62529 |
+
"step": 102840
|
62530 |
+
},
|
62531 |
+
{
|
62532 |
+
"epoch": 13.89,
|
62533 |
+
"learning_rate": 4.053677761799423e-06,
|
62534 |
+
"loss": 2.1712,
|
62535 |
+
"step": 102850
|
62536 |
+
},
|
62537 |
+
{
|
62538 |
+
"epoch": 13.89,
|
62539 |
+
"learning_rate": 4.043885567735733e-06,
|
62540 |
+
"loss": 2.1821,
|
62541 |
+
"step": 102860
|
62542 |
+
},
|
62543 |
+
{
|
62544 |
+
"epoch": 13.89,
|
62545 |
+
"learning_rate": 4.034105053541298e-06,
|
62546 |
+
"loss": 2.1961,
|
62547 |
+
"step": 102870
|
62548 |
+
},
|
62549 |
+
{
|
62550 |
+
"epoch": 13.89,
|
62551 |
+
"learning_rate": 4.0243362199988085e-06,
|
62552 |
+
"loss": 2.2207,
|
62553 |
+
"step": 102880
|
62554 |
+
},
|
62555 |
+
{
|
62556 |
+
"epoch": 13.9,
|
62557 |
+
"learning_rate": 4.0145790678899555e-06,
|
62558 |
+
"loss": 2.2167,
|
62559 |
+
"step": 102890
|
62560 |
+
},
|
62561 |
+
{
|
62562 |
+
"epoch": 13.9,
|
62563 |
+
"learning_rate": 4.004833597995616e-06,
|
62564 |
+
"loss": 2.2107,
|
62565 |
+
"step": 102900
|
62566 |
+
},
|
62567 |
+
{
|
62568 |
+
"epoch": 13.9,
|
62569 |
+
"learning_rate": 3.995099811095564e-06,
|
62570 |
+
"loss": 2.1945,
|
62571 |
+
"step": 102910
|
62572 |
+
},
|
62573 |
+
{
|
62574 |
+
"epoch": 13.9,
|
62575 |
+
"learning_rate": 3.985377707968778e-06,
|
62576 |
+
"loss": 2.2154,
|
62577 |
+
"step": 102920
|
62578 |
+
},
|
62579 |
+
{
|
62580 |
+
"epoch": 13.9,
|
62581 |
+
"learning_rate": 3.975667289393236e-06,
|
62582 |
+
"loss": 2.2188,
|
62583 |
+
"step": 102930
|
62584 |
+
},
|
62585 |
+
{
|
62586 |
+
"epoch": 13.9,
|
62587 |
+
"learning_rate": 3.9659685561460154e-06,
|
62588 |
+
"loss": 2.2029,
|
62589 |
+
"step": 102940
|
62590 |
+
},
|
62591 |
+
{
|
62592 |
+
"epoch": 13.9,
|
62593 |
+
"learning_rate": 3.956281509003228e-06,
|
62594 |
+
"loss": 2.1711,
|
62595 |
+
"step": 102950
|
62596 |
+
},
|
62597 |
+
{
|
62598 |
+
"epoch": 13.91,
|
62599 |
+
"learning_rate": 3.9466061487400395e-06,
|
62600 |
+
"loss": 2.1978,
|
62601 |
+
"step": 102960
|
62602 |
+
},
|
62603 |
+
{
|
62604 |
+
"epoch": 13.91,
|
62605 |
+
"learning_rate": 3.9369424761307115e-06,
|
62606 |
+
"loss": 2.2257,
|
62607 |
+
"step": 102970
|
62608 |
+
},
|
62609 |
+
{
|
62610 |
+
"epoch": 13.91,
|
62611 |
+
"learning_rate": 3.927290491948609e-06,
|
62612 |
+
"loss": 2.2007,
|
62613 |
+
"step": 102980
|
62614 |
+
},
|
62615 |
+
{
|
62616 |
+
"epoch": 13.91,
|
62617 |
+
"learning_rate": 3.917650196966049e-06,
|
62618 |
+
"loss": 2.1983,
|
62619 |
+
"step": 102990
|
62620 |
+
},
|
62621 |
+
{
|
62622 |
+
"epoch": 13.91,
|
62623 |
+
"learning_rate": 3.90802159195453e-06,
|
62624 |
+
"loss": 2.2049,
|
62625 |
+
"step": 103000
|
62626 |
+
},
|
62627 |
+
{
|
62628 |
+
"epoch": 13.91,
|
62629 |
+
"eval_loss": 2.39996337890625,
|
62630 |
+
"eval_runtime": 1269.0782,
|
62631 |
+
"eval_samples_per_second": 59.992,
|
62632 |
+
"eval_steps_per_second": 5.0,
|
62633 |
+
"step": 103000
|
62634 |
}
|
62635 |
],
|
62636 |
"logging_steps": 10,
|
62637 |
"max_steps": 111060,
|
62638 |
"num_train_epochs": 15,
|
62639 |
"save_steps": 1000,
|
62640 |
+
"total_flos": 3.250975421969203e+19,
|
62641 |
"trial_name": null,
|
62642 |
"trial_params": null
|
62643 |
}
|