Training in progress, step 1600
Browse files- last-checkpoint/optimizer.pt +1 -1
- last-checkpoint/pytorch_model.bin +1 -1
- last-checkpoint/rng_state.pth +1 -1
- last-checkpoint/scheduler.pt +1 -1
- last-checkpoint/trainer_state.json +1218 -5
- pytorch_model.bin +1 -1
last-checkpoint/optimizer.pt
CHANGED
@@ -1,3 +1,3 @@
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
-
oid sha256:
|
3 |
size 4736616809
|
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:d827ca908186ff4d1eeac2cd097d10d72152f52a20d876b710c6ab92928f405b
|
3 |
size 4736616809
|
last-checkpoint/pytorch_model.bin
CHANGED
@@ -1,3 +1,3 @@
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
-
oid sha256:
|
3 |
size 2368281769
|
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:728b6dfaa1c989be32093514baf4c7d88f3ff72d43aa5c614b342b108779fd92
|
3 |
size 2368281769
|
last-checkpoint/rng_state.pth
CHANGED
@@ -1,3 +1,3 @@
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
-
oid sha256:
|
3 |
size 14575
|
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:204741013cff7ad3367b61fa8ef614471a5423dec181a95b8549965efd787d9c
|
3 |
size 14575
|
last-checkpoint/scheduler.pt
CHANGED
@@ -1,3 +1,3 @@
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
-
oid sha256:
|
3 |
size 627
|
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:504d7c79830240f80701a38d5e784c442b3685f4ab42e22f30cf40ef402b0086
|
3 |
size 627
|
last-checkpoint/trainer_state.json
CHANGED
@@ -1,8 +1,8 @@
|
|
1 |
{
|
2 |
-
"best_metric": 2.
|
3 |
-
"best_model_checkpoint": "output/checkpoint-
|
4 |
-
"epoch": 1.
|
5 |
-
"global_step":
|
6 |
"is_hyper_param_search": false,
|
7 |
"is_local_process_zero": true,
|
8 |
"is_world_process_zero": true,
|
@@ -8497,11 +8497,1224 @@
|
|
8497 |
"eval_samples_per_second": 0.054,
|
8498 |
"eval_steps_per_second": 0.054,
|
8499 |
"step": 1400
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
8500 |
}
|
8501 |
],
|
8502 |
"max_steps": 2000,
|
8503 |
"num_train_epochs": 9223372036854775807,
|
8504 |
-
"total_flos": 1.
|
8505 |
"trial_name": null,
|
8506 |
"trial_params": null
|
8507 |
}
|
|
|
1 |
{
|
2 |
+
"best_metric": 2.1070616245269775,
|
3 |
+
"best_model_checkpoint": "output/checkpoint-1600",
|
4 |
+
"epoch": 1.18650390625,
|
5 |
+
"global_step": 1600,
|
6 |
"is_hyper_param_search": false,
|
7 |
"is_local_process_zero": true,
|
8 |
"is_world_process_zero": true,
|
|
|
8497 |
"eval_samples_per_second": 0.054,
|
8498 |
"eval_steps_per_second": 0.054,
|
8499 |
"step": 1400
|
8500 |
+
},
|
8501 |
+
{
|
8502 |
+
"epoch": 1.09,
|
8503 |
+
"learning_rate": 0.0002662222222222222,
|
8504 |
+
"loss": 2.0898,
|
8505 |
+
"step": 1401
|
8506 |
+
},
|
8507 |
+
{
|
8508 |
+
"epoch": 1.09,
|
8509 |
+
"learning_rate": 0.00026577777777777776,
|
8510 |
+
"loss": 2.1313,
|
8511 |
+
"step": 1402
|
8512 |
+
},
|
8513 |
+
{
|
8514 |
+
"epoch": 1.09,
|
8515 |
+
"learning_rate": 0.00026533333333333335,
|
8516 |
+
"loss": 2.1198,
|
8517 |
+
"step": 1403
|
8518 |
+
},
|
8519 |
+
{
|
8520 |
+
"epoch": 1.09,
|
8521 |
+
"learning_rate": 0.00026488888888888894,
|
8522 |
+
"loss": 2.1396,
|
8523 |
+
"step": 1404
|
8524 |
+
},
|
8525 |
+
{
|
8526 |
+
"epoch": 1.09,
|
8527 |
+
"learning_rate": 0.00026444444444444443,
|
8528 |
+
"loss": 2.1515,
|
8529 |
+
"step": 1405
|
8530 |
+
},
|
8531 |
+
{
|
8532 |
+
"epoch": 1.09,
|
8533 |
+
"learning_rate": 0.000264,
|
8534 |
+
"loss": 2.1336,
|
8535 |
+
"step": 1406
|
8536 |
+
},
|
8537 |
+
{
|
8538 |
+
"epoch": 1.09,
|
8539 |
+
"learning_rate": 0.00026355555555555556,
|
8540 |
+
"loss": 2.1756,
|
8541 |
+
"step": 1407
|
8542 |
+
},
|
8543 |
+
{
|
8544 |
+
"epoch": 1.09,
|
8545 |
+
"learning_rate": 0.00026311111111111115,
|
8546 |
+
"loss": 2.1563,
|
8547 |
+
"step": 1408
|
8548 |
+
},
|
8549 |
+
{
|
8550 |
+
"epoch": 1.09,
|
8551 |
+
"learning_rate": 0.00026266666666666664,
|
8552 |
+
"loss": 2.1252,
|
8553 |
+
"step": 1409
|
8554 |
+
},
|
8555 |
+
{
|
8556 |
+
"epoch": 1.09,
|
8557 |
+
"learning_rate": 0.00026222222222222223,
|
8558 |
+
"loss": 2.118,
|
8559 |
+
"step": 1410
|
8560 |
+
},
|
8561 |
+
{
|
8562 |
+
"epoch": 1.09,
|
8563 |
+
"learning_rate": 0.0002617777777777778,
|
8564 |
+
"loss": 2.142,
|
8565 |
+
"step": 1411
|
8566 |
+
},
|
8567 |
+
{
|
8568 |
+
"epoch": 1.09,
|
8569 |
+
"learning_rate": 0.00026133333333333336,
|
8570 |
+
"loss": 2.2093,
|
8571 |
+
"step": 1412
|
8572 |
+
},
|
8573 |
+
{
|
8574 |
+
"epoch": 1.09,
|
8575 |
+
"learning_rate": 0.0002608888888888889,
|
8576 |
+
"loss": 2.1199,
|
8577 |
+
"step": 1413
|
8578 |
+
},
|
8579 |
+
{
|
8580 |
+
"epoch": 1.09,
|
8581 |
+
"learning_rate": 0.00026044444444444444,
|
8582 |
+
"loss": 2.0734,
|
8583 |
+
"step": 1414
|
8584 |
+
},
|
8585 |
+
{
|
8586 |
+
"epoch": 1.09,
|
8587 |
+
"learning_rate": 0.00026000000000000003,
|
8588 |
+
"loss": 2.0976,
|
8589 |
+
"step": 1415
|
8590 |
+
},
|
8591 |
+
{
|
8592 |
+
"epoch": 1.09,
|
8593 |
+
"learning_rate": 0.00025955555555555557,
|
8594 |
+
"loss": 2.0571,
|
8595 |
+
"step": 1416
|
8596 |
+
},
|
8597 |
+
{
|
8598 |
+
"epoch": 1.1,
|
8599 |
+
"learning_rate": 0.0002591111111111111,
|
8600 |
+
"loss": 2.1415,
|
8601 |
+
"step": 1417
|
8602 |
+
},
|
8603 |
+
{
|
8604 |
+
"epoch": 1.1,
|
8605 |
+
"learning_rate": 0.00025866666666666665,
|
8606 |
+
"loss": 2.0729,
|
8607 |
+
"step": 1418
|
8608 |
+
},
|
8609 |
+
{
|
8610 |
+
"epoch": 1.1,
|
8611 |
+
"learning_rate": 0.00025822222222222224,
|
8612 |
+
"loss": 2.1535,
|
8613 |
+
"step": 1419
|
8614 |
+
},
|
8615 |
+
{
|
8616 |
+
"epoch": 1.1,
|
8617 |
+
"learning_rate": 0.00025777777777777783,
|
8618 |
+
"loss": 2.1454,
|
8619 |
+
"step": 1420
|
8620 |
+
},
|
8621 |
+
{
|
8622 |
+
"epoch": 1.1,
|
8623 |
+
"learning_rate": 0.0002573333333333333,
|
8624 |
+
"loss": 2.1955,
|
8625 |
+
"step": 1421
|
8626 |
+
},
|
8627 |
+
{
|
8628 |
+
"epoch": 1.1,
|
8629 |
+
"learning_rate": 0.0002568888888888889,
|
8630 |
+
"loss": 2.1006,
|
8631 |
+
"step": 1422
|
8632 |
+
},
|
8633 |
+
{
|
8634 |
+
"epoch": 1.1,
|
8635 |
+
"learning_rate": 0.00025644444444444445,
|
8636 |
+
"loss": 2.0917,
|
8637 |
+
"step": 1423
|
8638 |
+
},
|
8639 |
+
{
|
8640 |
+
"epoch": 1.1,
|
8641 |
+
"learning_rate": 0.00025600000000000004,
|
8642 |
+
"loss": 2.0921,
|
8643 |
+
"step": 1424
|
8644 |
+
},
|
8645 |
+
{
|
8646 |
+
"epoch": 1.1,
|
8647 |
+
"learning_rate": 0.00025555555555555553,
|
8648 |
+
"loss": 2.1228,
|
8649 |
+
"step": 1425
|
8650 |
+
},
|
8651 |
+
{
|
8652 |
+
"epoch": 1.1,
|
8653 |
+
"learning_rate": 0.0002551111111111111,
|
8654 |
+
"loss": 2.1433,
|
8655 |
+
"step": 1426
|
8656 |
+
},
|
8657 |
+
{
|
8658 |
+
"epoch": 1.1,
|
8659 |
+
"learning_rate": 0.0002546666666666667,
|
8660 |
+
"loss": 2.1351,
|
8661 |
+
"step": 1427
|
8662 |
+
},
|
8663 |
+
{
|
8664 |
+
"epoch": 1.1,
|
8665 |
+
"learning_rate": 0.00025422222222222225,
|
8666 |
+
"loss": 2.1113,
|
8667 |
+
"step": 1428
|
8668 |
+
},
|
8669 |
+
{
|
8670 |
+
"epoch": 1.1,
|
8671 |
+
"learning_rate": 0.0002537777777777778,
|
8672 |
+
"loss": 2.0421,
|
8673 |
+
"step": 1429
|
8674 |
+
},
|
8675 |
+
{
|
8676 |
+
"epoch": 1.1,
|
8677 |
+
"learning_rate": 0.00025333333333333333,
|
8678 |
+
"loss": 2.0756,
|
8679 |
+
"step": 1430
|
8680 |
+
},
|
8681 |
+
{
|
8682 |
+
"epoch": 1.1,
|
8683 |
+
"learning_rate": 0.0002528888888888889,
|
8684 |
+
"loss": 2.1356,
|
8685 |
+
"step": 1431
|
8686 |
+
},
|
8687 |
+
{
|
8688 |
+
"epoch": 1.1,
|
8689 |
+
"learning_rate": 0.00025244444444444446,
|
8690 |
+
"loss": 2.1675,
|
8691 |
+
"step": 1432
|
8692 |
+
},
|
8693 |
+
{
|
8694 |
+
"epoch": 1.1,
|
8695 |
+
"learning_rate": 0.000252,
|
8696 |
+
"loss": 2.1583,
|
8697 |
+
"step": 1433
|
8698 |
+
},
|
8699 |
+
{
|
8700 |
+
"epoch": 1.1,
|
8701 |
+
"learning_rate": 0.0002515555555555556,
|
8702 |
+
"loss": 2.1534,
|
8703 |
+
"step": 1434
|
8704 |
+
},
|
8705 |
+
{
|
8706 |
+
"epoch": 1.1,
|
8707 |
+
"learning_rate": 0.00025111111111111113,
|
8708 |
+
"loss": 2.1404,
|
8709 |
+
"step": 1435
|
8710 |
+
},
|
8711 |
+
{
|
8712 |
+
"epoch": 1.1,
|
8713 |
+
"learning_rate": 0.00025066666666666667,
|
8714 |
+
"loss": 2.1038,
|
8715 |
+
"step": 1436
|
8716 |
+
},
|
8717 |
+
{
|
8718 |
+
"epoch": 1.11,
|
8719 |
+
"learning_rate": 0.0002502222222222222,
|
8720 |
+
"loss": 2.1705,
|
8721 |
+
"step": 1437
|
8722 |
+
},
|
8723 |
+
{
|
8724 |
+
"epoch": 1.11,
|
8725 |
+
"learning_rate": 0.0002497777777777778,
|
8726 |
+
"loss": 2.1683,
|
8727 |
+
"step": 1438
|
8728 |
+
},
|
8729 |
+
{
|
8730 |
+
"epoch": 1.11,
|
8731 |
+
"learning_rate": 0.00024933333333333334,
|
8732 |
+
"loss": 2.1288,
|
8733 |
+
"step": 1439
|
8734 |
+
},
|
8735 |
+
{
|
8736 |
+
"epoch": 1.11,
|
8737 |
+
"learning_rate": 0.0002488888888888889,
|
8738 |
+
"loss": 2.1502,
|
8739 |
+
"step": 1440
|
8740 |
+
},
|
8741 |
+
{
|
8742 |
+
"epoch": 1.11,
|
8743 |
+
"learning_rate": 0.0002484444444444444,
|
8744 |
+
"loss": 2.1077,
|
8745 |
+
"step": 1441
|
8746 |
+
},
|
8747 |
+
{
|
8748 |
+
"epoch": 1.11,
|
8749 |
+
"learning_rate": 0.000248,
|
8750 |
+
"loss": 2.1261,
|
8751 |
+
"step": 1442
|
8752 |
+
},
|
8753 |
+
{
|
8754 |
+
"epoch": 1.11,
|
8755 |
+
"learning_rate": 0.0002475555555555556,
|
8756 |
+
"loss": 2.1351,
|
8757 |
+
"step": 1443
|
8758 |
+
},
|
8759 |
+
{
|
8760 |
+
"epoch": 1.11,
|
8761 |
+
"learning_rate": 0.00024711111111111114,
|
8762 |
+
"loss": 2.1047,
|
8763 |
+
"step": 1444
|
8764 |
+
},
|
8765 |
+
{
|
8766 |
+
"epoch": 1.11,
|
8767 |
+
"learning_rate": 0.0002466666666666667,
|
8768 |
+
"loss": 2.1223,
|
8769 |
+
"step": 1445
|
8770 |
+
},
|
8771 |
+
{
|
8772 |
+
"epoch": 1.11,
|
8773 |
+
"learning_rate": 0.0002462222222222222,
|
8774 |
+
"loss": 2.0726,
|
8775 |
+
"step": 1446
|
8776 |
+
},
|
8777 |
+
{
|
8778 |
+
"epoch": 1.11,
|
8779 |
+
"learning_rate": 0.0002457777777777778,
|
8780 |
+
"loss": 2.1112,
|
8781 |
+
"step": 1447
|
8782 |
+
},
|
8783 |
+
{
|
8784 |
+
"epoch": 1.11,
|
8785 |
+
"learning_rate": 0.00024533333333333335,
|
8786 |
+
"loss": 2.1283,
|
8787 |
+
"step": 1448
|
8788 |
+
},
|
8789 |
+
{
|
8790 |
+
"epoch": 1.11,
|
8791 |
+
"learning_rate": 0.0002448888888888889,
|
8792 |
+
"loss": 2.1205,
|
8793 |
+
"step": 1449
|
8794 |
+
},
|
8795 |
+
{
|
8796 |
+
"epoch": 1.11,
|
8797 |
+
"learning_rate": 0.0002444444444444445,
|
8798 |
+
"loss": 2.1715,
|
8799 |
+
"step": 1450
|
8800 |
+
},
|
8801 |
+
{
|
8802 |
+
"epoch": 1.11,
|
8803 |
+
"learning_rate": 0.000244,
|
8804 |
+
"loss": 2.1517,
|
8805 |
+
"step": 1451
|
8806 |
+
},
|
8807 |
+
{
|
8808 |
+
"epoch": 1.11,
|
8809 |
+
"learning_rate": 0.0002435555555555556,
|
8810 |
+
"loss": 2.1007,
|
8811 |
+
"step": 1452
|
8812 |
+
},
|
8813 |
+
{
|
8814 |
+
"epoch": 1.11,
|
8815 |
+
"learning_rate": 0.0002431111111111111,
|
8816 |
+
"loss": 2.1804,
|
8817 |
+
"step": 1453
|
8818 |
+
},
|
8819 |
+
{
|
8820 |
+
"epoch": 1.11,
|
8821 |
+
"learning_rate": 0.0002426666666666667,
|
8822 |
+
"loss": 2.1121,
|
8823 |
+
"step": 1454
|
8824 |
+
},
|
8825 |
+
{
|
8826 |
+
"epoch": 1.11,
|
8827 |
+
"learning_rate": 0.0002422222222222222,
|
8828 |
+
"loss": 2.1229,
|
8829 |
+
"step": 1455
|
8830 |
+
},
|
8831 |
+
{
|
8832 |
+
"epoch": 1.11,
|
8833 |
+
"learning_rate": 0.0002417777777777778,
|
8834 |
+
"loss": 2.0234,
|
8835 |
+
"step": 1456
|
8836 |
+
},
|
8837 |
+
{
|
8838 |
+
"epoch": 1.12,
|
8839 |
+
"learning_rate": 0.00024133333333333336,
|
8840 |
+
"loss": 2.0615,
|
8841 |
+
"step": 1457
|
8842 |
+
},
|
8843 |
+
{
|
8844 |
+
"epoch": 1.12,
|
8845 |
+
"learning_rate": 0.0002408888888888889,
|
8846 |
+
"loss": 2.1373,
|
8847 |
+
"step": 1458
|
8848 |
+
},
|
8849 |
+
{
|
8850 |
+
"epoch": 1.12,
|
8851 |
+
"learning_rate": 0.00024044444444444447,
|
8852 |
+
"loss": 2.1106,
|
8853 |
+
"step": 1459
|
8854 |
+
},
|
8855 |
+
{
|
8856 |
+
"epoch": 1.12,
|
8857 |
+
"learning_rate": 0.00024,
|
8858 |
+
"loss": 2.1396,
|
8859 |
+
"step": 1460
|
8860 |
+
},
|
8861 |
+
{
|
8862 |
+
"epoch": 1.12,
|
8863 |
+
"learning_rate": 0.00023955555555555557,
|
8864 |
+
"loss": 2.1163,
|
8865 |
+
"step": 1461
|
8866 |
+
},
|
8867 |
+
{
|
8868 |
+
"epoch": 1.12,
|
8869 |
+
"learning_rate": 0.0002391111111111111,
|
8870 |
+
"loss": 2.0869,
|
8871 |
+
"step": 1462
|
8872 |
+
},
|
8873 |
+
{
|
8874 |
+
"epoch": 1.12,
|
8875 |
+
"learning_rate": 0.00023866666666666668,
|
8876 |
+
"loss": 2.1535,
|
8877 |
+
"step": 1463
|
8878 |
+
},
|
8879 |
+
{
|
8880 |
+
"epoch": 1.12,
|
8881 |
+
"learning_rate": 0.00023822222222222222,
|
8882 |
+
"loss": 2.0669,
|
8883 |
+
"step": 1464
|
8884 |
+
},
|
8885 |
+
{
|
8886 |
+
"epoch": 1.12,
|
8887 |
+
"learning_rate": 0.00023777777777777778,
|
8888 |
+
"loss": 2.1315,
|
8889 |
+
"step": 1465
|
8890 |
+
},
|
8891 |
+
{
|
8892 |
+
"epoch": 1.12,
|
8893 |
+
"learning_rate": 0.00023733333333333337,
|
8894 |
+
"loss": 2.1385,
|
8895 |
+
"step": 1466
|
8896 |
+
},
|
8897 |
+
{
|
8898 |
+
"epoch": 1.12,
|
8899 |
+
"learning_rate": 0.00023688888888888889,
|
8900 |
+
"loss": 2.1548,
|
8901 |
+
"step": 1467
|
8902 |
+
},
|
8903 |
+
{
|
8904 |
+
"epoch": 1.12,
|
8905 |
+
"learning_rate": 0.00023644444444444448,
|
8906 |
+
"loss": 2.1021,
|
8907 |
+
"step": 1468
|
8908 |
+
},
|
8909 |
+
{
|
8910 |
+
"epoch": 1.12,
|
8911 |
+
"learning_rate": 0.000236,
|
8912 |
+
"loss": 2.1829,
|
8913 |
+
"step": 1469
|
8914 |
+
},
|
8915 |
+
{
|
8916 |
+
"epoch": 1.12,
|
8917 |
+
"learning_rate": 0.00023555555555555558,
|
8918 |
+
"loss": 2.1002,
|
8919 |
+
"step": 1470
|
8920 |
+
},
|
8921 |
+
{
|
8922 |
+
"epoch": 1.12,
|
8923 |
+
"learning_rate": 0.0002351111111111111,
|
8924 |
+
"loss": 2.162,
|
8925 |
+
"step": 1471
|
8926 |
+
},
|
8927 |
+
{
|
8928 |
+
"epoch": 1.12,
|
8929 |
+
"learning_rate": 0.0002346666666666667,
|
8930 |
+
"loss": 2.099,
|
8931 |
+
"step": 1472
|
8932 |
+
},
|
8933 |
+
{
|
8934 |
+
"epoch": 1.12,
|
8935 |
+
"learning_rate": 0.00023422222222222225,
|
8936 |
+
"loss": 2.1477,
|
8937 |
+
"step": 1473
|
8938 |
+
},
|
8939 |
+
{
|
8940 |
+
"epoch": 1.12,
|
8941 |
+
"learning_rate": 0.0002337777777777778,
|
8942 |
+
"loss": 2.1612,
|
8943 |
+
"step": 1474
|
8944 |
+
},
|
8945 |
+
{
|
8946 |
+
"epoch": 1.12,
|
8947 |
+
"learning_rate": 0.00023333333333333336,
|
8948 |
+
"loss": 2.0684,
|
8949 |
+
"step": 1475
|
8950 |
+
},
|
8951 |
+
{
|
8952 |
+
"epoch": 1.12,
|
8953 |
+
"learning_rate": 0.0002328888888888889,
|
8954 |
+
"loss": 2.1182,
|
8955 |
+
"step": 1476
|
8956 |
+
},
|
8957 |
+
{
|
8958 |
+
"epoch": 1.13,
|
8959 |
+
"learning_rate": 0.00023244444444444446,
|
8960 |
+
"loss": 2.1298,
|
8961 |
+
"step": 1477
|
8962 |
+
},
|
8963 |
+
{
|
8964 |
+
"epoch": 1.13,
|
8965 |
+
"learning_rate": 0.000232,
|
8966 |
+
"loss": 2.0761,
|
8967 |
+
"step": 1478
|
8968 |
+
},
|
8969 |
+
{
|
8970 |
+
"epoch": 1.13,
|
8971 |
+
"learning_rate": 0.00023155555555555557,
|
8972 |
+
"loss": 2.1133,
|
8973 |
+
"step": 1479
|
8974 |
+
},
|
8975 |
+
{
|
8976 |
+
"epoch": 1.13,
|
8977 |
+
"learning_rate": 0.0002311111111111111,
|
8978 |
+
"loss": 2.1749,
|
8979 |
+
"step": 1480
|
8980 |
+
},
|
8981 |
+
{
|
8982 |
+
"epoch": 1.13,
|
8983 |
+
"learning_rate": 0.00023066666666666667,
|
8984 |
+
"loss": 2.1653,
|
8985 |
+
"step": 1481
|
8986 |
+
},
|
8987 |
+
{
|
8988 |
+
"epoch": 1.13,
|
8989 |
+
"learning_rate": 0.00023022222222222224,
|
8990 |
+
"loss": 2.1304,
|
8991 |
+
"step": 1482
|
8992 |
+
},
|
8993 |
+
{
|
8994 |
+
"epoch": 1.13,
|
8995 |
+
"learning_rate": 0.00022977777777777778,
|
8996 |
+
"loss": 2.0739,
|
8997 |
+
"step": 1483
|
8998 |
+
},
|
8999 |
+
{
|
9000 |
+
"epoch": 1.13,
|
9001 |
+
"learning_rate": 0.00022933333333333334,
|
9002 |
+
"loss": 2.0529,
|
9003 |
+
"step": 1484
|
9004 |
+
},
|
9005 |
+
{
|
9006 |
+
"epoch": 1.13,
|
9007 |
+
"learning_rate": 0.00022888888888888888,
|
9008 |
+
"loss": 2.1136,
|
9009 |
+
"step": 1485
|
9010 |
+
},
|
9011 |
+
{
|
9012 |
+
"epoch": 1.13,
|
9013 |
+
"learning_rate": 0.00022844444444444447,
|
9014 |
+
"loss": 2.11,
|
9015 |
+
"step": 1486
|
9016 |
+
},
|
9017 |
+
{
|
9018 |
+
"epoch": 1.13,
|
9019 |
+
"learning_rate": 0.00022799999999999999,
|
9020 |
+
"loss": 2.1093,
|
9021 |
+
"step": 1487
|
9022 |
+
},
|
9023 |
+
{
|
9024 |
+
"epoch": 1.13,
|
9025 |
+
"learning_rate": 0.00022755555555555558,
|
9026 |
+
"loss": 2.1102,
|
9027 |
+
"step": 1488
|
9028 |
+
},
|
9029 |
+
{
|
9030 |
+
"epoch": 1.13,
|
9031 |
+
"learning_rate": 0.00022711111111111114,
|
9032 |
+
"loss": 2.1033,
|
9033 |
+
"step": 1489
|
9034 |
+
},
|
9035 |
+
{
|
9036 |
+
"epoch": 1.13,
|
9037 |
+
"learning_rate": 0.00022666666666666668,
|
9038 |
+
"loss": 2.1908,
|
9039 |
+
"step": 1490
|
9040 |
+
},
|
9041 |
+
{
|
9042 |
+
"epoch": 1.13,
|
9043 |
+
"learning_rate": 0.00022622222222222225,
|
9044 |
+
"loss": 2.0719,
|
9045 |
+
"step": 1491
|
9046 |
+
},
|
9047 |
+
{
|
9048 |
+
"epoch": 1.13,
|
9049 |
+
"learning_rate": 0.0002257777777777778,
|
9050 |
+
"loss": 2.0688,
|
9051 |
+
"step": 1492
|
9052 |
+
},
|
9053 |
+
{
|
9054 |
+
"epoch": 1.13,
|
9055 |
+
"learning_rate": 0.00022533333333333335,
|
9056 |
+
"loss": 2.1859,
|
9057 |
+
"step": 1493
|
9058 |
+
},
|
9059 |
+
{
|
9060 |
+
"epoch": 1.13,
|
9061 |
+
"learning_rate": 0.0002248888888888889,
|
9062 |
+
"loss": 2.1219,
|
9063 |
+
"step": 1494
|
9064 |
+
},
|
9065 |
+
{
|
9066 |
+
"epoch": 1.13,
|
9067 |
+
"learning_rate": 0.00022444444444444446,
|
9068 |
+
"loss": 2.0813,
|
9069 |
+
"step": 1495
|
9070 |
+
},
|
9071 |
+
{
|
9072 |
+
"epoch": 1.13,
|
9073 |
+
"learning_rate": 0.00022400000000000002,
|
9074 |
+
"loss": 2.133,
|
9075 |
+
"step": 1496
|
9076 |
+
},
|
9077 |
+
{
|
9078 |
+
"epoch": 1.14,
|
9079 |
+
"learning_rate": 0.00022355555555555556,
|
9080 |
+
"loss": 2.2231,
|
9081 |
+
"step": 1497
|
9082 |
+
},
|
9083 |
+
{
|
9084 |
+
"epoch": 1.14,
|
9085 |
+
"learning_rate": 0.00022311111111111113,
|
9086 |
+
"loss": 2.1299,
|
9087 |
+
"step": 1498
|
9088 |
+
},
|
9089 |
+
{
|
9090 |
+
"epoch": 1.14,
|
9091 |
+
"learning_rate": 0.00022266666666666667,
|
9092 |
+
"loss": 2.1343,
|
9093 |
+
"step": 1499
|
9094 |
+
},
|
9095 |
+
{
|
9096 |
+
"epoch": 1.14,
|
9097 |
+
"learning_rate": 0.00022222222222222223,
|
9098 |
+
"loss": 2.1702,
|
9099 |
+
"step": 1500
|
9100 |
+
},
|
9101 |
+
{
|
9102 |
+
"epoch": 1.14,
|
9103 |
+
"learning_rate": 0.00022177777777777777,
|
9104 |
+
"loss": 2.1268,
|
9105 |
+
"step": 1501
|
9106 |
+
},
|
9107 |
+
{
|
9108 |
+
"epoch": 1.14,
|
9109 |
+
"learning_rate": 0.00022133333333333334,
|
9110 |
+
"loss": 2.1472,
|
9111 |
+
"step": 1502
|
9112 |
+
},
|
9113 |
+
{
|
9114 |
+
"epoch": 1.14,
|
9115 |
+
"learning_rate": 0.00022088888888888888,
|
9116 |
+
"loss": 2.1203,
|
9117 |
+
"step": 1503
|
9118 |
+
},
|
9119 |
+
{
|
9120 |
+
"epoch": 1.14,
|
9121 |
+
"learning_rate": 0.00022044444444444444,
|
9122 |
+
"loss": 2.1968,
|
9123 |
+
"step": 1504
|
9124 |
+
},
|
9125 |
+
{
|
9126 |
+
"epoch": 1.14,
|
9127 |
+
"learning_rate": 0.00022000000000000003,
|
9128 |
+
"loss": 2.0619,
|
9129 |
+
"step": 1505
|
9130 |
+
},
|
9131 |
+
{
|
9132 |
+
"epoch": 1.14,
|
9133 |
+
"learning_rate": 0.00021955555555555555,
|
9134 |
+
"loss": 2.1274,
|
9135 |
+
"step": 1506
|
9136 |
+
},
|
9137 |
+
{
|
9138 |
+
"epoch": 1.14,
|
9139 |
+
"learning_rate": 0.00021911111111111114,
|
9140 |
+
"loss": 2.0527,
|
9141 |
+
"step": 1507
|
9142 |
+
},
|
9143 |
+
{
|
9144 |
+
"epoch": 1.14,
|
9145 |
+
"learning_rate": 0.00021866666666666665,
|
9146 |
+
"loss": 2.0104,
|
9147 |
+
"step": 1508
|
9148 |
+
},
|
9149 |
+
{
|
9150 |
+
"epoch": 1.14,
|
9151 |
+
"learning_rate": 0.00021822222222222224,
|
9152 |
+
"loss": 2.1593,
|
9153 |
+
"step": 1509
|
9154 |
+
},
|
9155 |
+
{
|
9156 |
+
"epoch": 1.14,
|
9157 |
+
"learning_rate": 0.00021777777777777778,
|
9158 |
+
"loss": 2.0624,
|
9159 |
+
"step": 1510
|
9160 |
+
},
|
9161 |
+
{
|
9162 |
+
"epoch": 1.14,
|
9163 |
+
"learning_rate": 0.00021733333333333335,
|
9164 |
+
"loss": 2.1088,
|
9165 |
+
"step": 1511
|
9166 |
+
},
|
9167 |
+
{
|
9168 |
+
"epoch": 1.14,
|
9169 |
+
"learning_rate": 0.00021688888888888891,
|
9170 |
+
"loss": 2.0986,
|
9171 |
+
"step": 1512
|
9172 |
+
},
|
9173 |
+
{
|
9174 |
+
"epoch": 1.14,
|
9175 |
+
"learning_rate": 0.00021644444444444445,
|
9176 |
+
"loss": 2.0843,
|
9177 |
+
"step": 1513
|
9178 |
+
},
|
9179 |
+
{
|
9180 |
+
"epoch": 1.14,
|
9181 |
+
"learning_rate": 0.00021600000000000002,
|
9182 |
+
"loss": 2.1256,
|
9183 |
+
"step": 1514
|
9184 |
+
},
|
9185 |
+
{
|
9186 |
+
"epoch": 1.14,
|
9187 |
+
"learning_rate": 0.00021555555555555556,
|
9188 |
+
"loss": 2.1612,
|
9189 |
+
"step": 1515
|
9190 |
+
},
|
9191 |
+
{
|
9192 |
+
"epoch": 1.14,
|
9193 |
+
"learning_rate": 0.00021511111111111112,
|
9194 |
+
"loss": 2.1048,
|
9195 |
+
"step": 1516
|
9196 |
+
},
|
9197 |
+
{
|
9198 |
+
"epoch": 1.15,
|
9199 |
+
"learning_rate": 0.00021466666666666666,
|
9200 |
+
"loss": 2.1749,
|
9201 |
+
"step": 1517
|
9202 |
+
},
|
9203 |
+
{
|
9204 |
+
"epoch": 1.15,
|
9205 |
+
"learning_rate": 0.00021422222222222223,
|
9206 |
+
"loss": 2.13,
|
9207 |
+
"step": 1518
|
9208 |
+
},
|
9209 |
+
{
|
9210 |
+
"epoch": 1.15,
|
9211 |
+
"learning_rate": 0.00021377777777777782,
|
9212 |
+
"loss": 2.1937,
|
9213 |
+
"step": 1519
|
9214 |
+
},
|
9215 |
+
{
|
9216 |
+
"epoch": 1.15,
|
9217 |
+
"learning_rate": 0.00021333333333333333,
|
9218 |
+
"loss": 2.1608,
|
9219 |
+
"step": 1520
|
9220 |
+
},
|
9221 |
+
{
|
9222 |
+
"epoch": 1.15,
|
9223 |
+
"learning_rate": 0.00021288888888888893,
|
9224 |
+
"loss": 2.1284,
|
9225 |
+
"step": 1521
|
9226 |
+
},
|
9227 |
+
{
|
9228 |
+
"epoch": 1.15,
|
9229 |
+
"learning_rate": 0.00021244444444444444,
|
9230 |
+
"loss": 2.1542,
|
9231 |
+
"step": 1522
|
9232 |
+
},
|
9233 |
+
{
|
9234 |
+
"epoch": 1.15,
|
9235 |
+
"learning_rate": 0.00021200000000000003,
|
9236 |
+
"loss": 2.1237,
|
9237 |
+
"step": 1523
|
9238 |
+
},
|
9239 |
+
{
|
9240 |
+
"epoch": 1.15,
|
9241 |
+
"learning_rate": 0.00021155555555555554,
|
9242 |
+
"loss": 2.1512,
|
9243 |
+
"step": 1524
|
9244 |
+
},
|
9245 |
+
{
|
9246 |
+
"epoch": 1.15,
|
9247 |
+
"learning_rate": 0.00021111111111111113,
|
9248 |
+
"loss": 2.0976,
|
9249 |
+
"step": 1525
|
9250 |
+
},
|
9251 |
+
{
|
9252 |
+
"epoch": 1.15,
|
9253 |
+
"learning_rate": 0.00021066666666666665,
|
9254 |
+
"loss": 2.135,
|
9255 |
+
"step": 1526
|
9256 |
+
},
|
9257 |
+
{
|
9258 |
+
"epoch": 1.15,
|
9259 |
+
"learning_rate": 0.00021022222222222224,
|
9260 |
+
"loss": 2.1227,
|
9261 |
+
"step": 1527
|
9262 |
+
},
|
9263 |
+
{
|
9264 |
+
"epoch": 1.15,
|
9265 |
+
"learning_rate": 0.0002097777777777778,
|
9266 |
+
"loss": 2.114,
|
9267 |
+
"step": 1528
|
9268 |
+
},
|
9269 |
+
{
|
9270 |
+
"epoch": 1.15,
|
9271 |
+
"learning_rate": 0.00020933333333333334,
|
9272 |
+
"loss": 2.141,
|
9273 |
+
"step": 1529
|
9274 |
+
},
|
9275 |
+
{
|
9276 |
+
"epoch": 1.15,
|
9277 |
+
"learning_rate": 0.0002088888888888889,
|
9278 |
+
"loss": 2.1855,
|
9279 |
+
"step": 1530
|
9280 |
+
},
|
9281 |
+
{
|
9282 |
+
"epoch": 1.15,
|
9283 |
+
"learning_rate": 0.00020844444444444445,
|
9284 |
+
"loss": 2.1343,
|
9285 |
+
"step": 1531
|
9286 |
+
},
|
9287 |
+
{
|
9288 |
+
"epoch": 1.15,
|
9289 |
+
"learning_rate": 0.00020800000000000001,
|
9290 |
+
"loss": 2.0732,
|
9291 |
+
"step": 1532
|
9292 |
+
},
|
9293 |
+
{
|
9294 |
+
"epoch": 1.15,
|
9295 |
+
"learning_rate": 0.00020755555555555555,
|
9296 |
+
"loss": 2.1155,
|
9297 |
+
"step": 1533
|
9298 |
+
},
|
9299 |
+
{
|
9300 |
+
"epoch": 1.15,
|
9301 |
+
"learning_rate": 0.00020711111111111112,
|
9302 |
+
"loss": 2.1146,
|
9303 |
+
"step": 1534
|
9304 |
+
},
|
9305 |
+
{
|
9306 |
+
"epoch": 1.15,
|
9307 |
+
"learning_rate": 0.00020666666666666668,
|
9308 |
+
"loss": 2.1464,
|
9309 |
+
"step": 1535
|
9310 |
+
},
|
9311 |
+
{
|
9312 |
+
"epoch": 1.15,
|
9313 |
+
"learning_rate": 0.00020622222222222222,
|
9314 |
+
"loss": 2.1327,
|
9315 |
+
"step": 1536
|
9316 |
+
},
|
9317 |
+
{
|
9318 |
+
"epoch": 1.16,
|
9319 |
+
"learning_rate": 0.0002057777777777778,
|
9320 |
+
"loss": 2.1498,
|
9321 |
+
"step": 1537
|
9322 |
+
},
|
9323 |
+
{
|
9324 |
+
"epoch": 1.16,
|
9325 |
+
"learning_rate": 0.00020533333333333333,
|
9326 |
+
"loss": 2.133,
|
9327 |
+
"step": 1538
|
9328 |
+
},
|
9329 |
+
{
|
9330 |
+
"epoch": 1.16,
|
9331 |
+
"learning_rate": 0.00020488888888888892,
|
9332 |
+
"loss": 2.1969,
|
9333 |
+
"step": 1539
|
9334 |
+
},
|
9335 |
+
{
|
9336 |
+
"epoch": 1.16,
|
9337 |
+
"learning_rate": 0.00020444444444444443,
|
9338 |
+
"loss": 2.1602,
|
9339 |
+
"step": 1540
|
9340 |
+
},
|
9341 |
+
{
|
9342 |
+
"epoch": 1.16,
|
9343 |
+
"learning_rate": 0.00020400000000000003,
|
9344 |
+
"loss": 2.1356,
|
9345 |
+
"step": 1541
|
9346 |
+
},
|
9347 |
+
{
|
9348 |
+
"epoch": 1.16,
|
9349 |
+
"learning_rate": 0.00020355555555555554,
|
9350 |
+
"loss": 2.1506,
|
9351 |
+
"step": 1542
|
9352 |
+
},
|
9353 |
+
{
|
9354 |
+
"epoch": 1.16,
|
9355 |
+
"learning_rate": 0.00020311111111111113,
|
9356 |
+
"loss": 2.0696,
|
9357 |
+
"step": 1543
|
9358 |
+
},
|
9359 |
+
{
|
9360 |
+
"epoch": 1.16,
|
9361 |
+
"learning_rate": 0.0002026666666666667,
|
9362 |
+
"loss": 2.1018,
|
9363 |
+
"step": 1544
|
9364 |
+
},
|
9365 |
+
{
|
9366 |
+
"epoch": 1.16,
|
9367 |
+
"learning_rate": 0.00020222222222222223,
|
9368 |
+
"loss": 2.1302,
|
9369 |
+
"step": 1545
|
9370 |
+
},
|
9371 |
+
{
|
9372 |
+
"epoch": 1.16,
|
9373 |
+
"learning_rate": 0.0002017777777777778,
|
9374 |
+
"loss": 2.1233,
|
9375 |
+
"step": 1546
|
9376 |
+
},
|
9377 |
+
{
|
9378 |
+
"epoch": 1.16,
|
9379 |
+
"learning_rate": 0.00020133333333333334,
|
9380 |
+
"loss": 2.1112,
|
9381 |
+
"step": 1547
|
9382 |
+
},
|
9383 |
+
{
|
9384 |
+
"epoch": 1.16,
|
9385 |
+
"learning_rate": 0.0002008888888888889,
|
9386 |
+
"loss": 2.219,
|
9387 |
+
"step": 1548
|
9388 |
+
},
|
9389 |
+
{
|
9390 |
+
"epoch": 1.16,
|
9391 |
+
"learning_rate": 0.00020044444444444444,
|
9392 |
+
"loss": 2.1709,
|
9393 |
+
"step": 1549
|
9394 |
+
},
|
9395 |
+
{
|
9396 |
+
"epoch": 1.16,
|
9397 |
+
"learning_rate": 0.0002,
|
9398 |
+
"loss": 2.077,
|
9399 |
+
"step": 1550
|
9400 |
+
},
|
9401 |
+
{
|
9402 |
+
"epoch": 1.16,
|
9403 |
+
"learning_rate": 0.00019955555555555558,
|
9404 |
+
"loss": 2.167,
|
9405 |
+
"step": 1551
|
9406 |
+
},
|
9407 |
+
{
|
9408 |
+
"epoch": 1.16,
|
9409 |
+
"learning_rate": 0.00019911111111111111,
|
9410 |
+
"loss": 2.0899,
|
9411 |
+
"step": 1552
|
9412 |
+
},
|
9413 |
+
{
|
9414 |
+
"epoch": 1.16,
|
9415 |
+
"learning_rate": 0.00019866666666666668,
|
9416 |
+
"loss": 2.1202,
|
9417 |
+
"step": 1553
|
9418 |
+
},
|
9419 |
+
{
|
9420 |
+
"epoch": 1.16,
|
9421 |
+
"learning_rate": 0.00019822222222222225,
|
9422 |
+
"loss": 2.1057,
|
9423 |
+
"step": 1554
|
9424 |
+
},
|
9425 |
+
{
|
9426 |
+
"epoch": 1.16,
|
9427 |
+
"learning_rate": 0.00019777777777777778,
|
9428 |
+
"loss": 2.0946,
|
9429 |
+
"step": 1555
|
9430 |
+
},
|
9431 |
+
{
|
9432 |
+
"epoch": 1.16,
|
9433 |
+
"learning_rate": 0.00019733333333333335,
|
9434 |
+
"loss": 2.1357,
|
9435 |
+
"step": 1556
|
9436 |
+
},
|
9437 |
+
{
|
9438 |
+
"epoch": 1.17,
|
9439 |
+
"learning_rate": 0.0001968888888888889,
|
9440 |
+
"loss": 2.1182,
|
9441 |
+
"step": 1557
|
9442 |
+
},
|
9443 |
+
{
|
9444 |
+
"epoch": 1.17,
|
9445 |
+
"learning_rate": 0.00019644444444444445,
|
9446 |
+
"loss": 2.112,
|
9447 |
+
"step": 1558
|
9448 |
+
},
|
9449 |
+
{
|
9450 |
+
"epoch": 1.17,
|
9451 |
+
"learning_rate": 0.000196,
|
9452 |
+
"loss": 2.1319,
|
9453 |
+
"step": 1559
|
9454 |
+
},
|
9455 |
+
{
|
9456 |
+
"epoch": 1.17,
|
9457 |
+
"learning_rate": 0.00019555555555555556,
|
9458 |
+
"loss": 2.1783,
|
9459 |
+
"step": 1560
|
9460 |
+
},
|
9461 |
+
{
|
9462 |
+
"epoch": 1.17,
|
9463 |
+
"learning_rate": 0.0001951111111111111,
|
9464 |
+
"loss": 2.1374,
|
9465 |
+
"step": 1561
|
9466 |
+
},
|
9467 |
+
{
|
9468 |
+
"epoch": 1.17,
|
9469 |
+
"learning_rate": 0.0001946666666666667,
|
9470 |
+
"loss": 2.1862,
|
9471 |
+
"step": 1562
|
9472 |
+
},
|
9473 |
+
{
|
9474 |
+
"epoch": 1.17,
|
9475 |
+
"learning_rate": 0.00019422222222222223,
|
9476 |
+
"loss": 2.1993,
|
9477 |
+
"step": 1563
|
9478 |
+
},
|
9479 |
+
{
|
9480 |
+
"epoch": 1.17,
|
9481 |
+
"learning_rate": 0.0001937777777777778,
|
9482 |
+
"loss": 2.0878,
|
9483 |
+
"step": 1564
|
9484 |
+
},
|
9485 |
+
{
|
9486 |
+
"epoch": 1.17,
|
9487 |
+
"learning_rate": 0.00019333333333333333,
|
9488 |
+
"loss": 2.1082,
|
9489 |
+
"step": 1565
|
9490 |
+
},
|
9491 |
+
{
|
9492 |
+
"epoch": 1.17,
|
9493 |
+
"learning_rate": 0.0001928888888888889,
|
9494 |
+
"loss": 2.1007,
|
9495 |
+
"step": 1566
|
9496 |
+
},
|
9497 |
+
{
|
9498 |
+
"epoch": 1.17,
|
9499 |
+
"learning_rate": 0.00019244444444444444,
|
9500 |
+
"loss": 2.1888,
|
9501 |
+
"step": 1567
|
9502 |
+
},
|
9503 |
+
{
|
9504 |
+
"epoch": 1.17,
|
9505 |
+
"learning_rate": 0.000192,
|
9506 |
+
"loss": 2.1479,
|
9507 |
+
"step": 1568
|
9508 |
+
},
|
9509 |
+
{
|
9510 |
+
"epoch": 1.17,
|
9511 |
+
"learning_rate": 0.00019155555555555554,
|
9512 |
+
"loss": 2.235,
|
9513 |
+
"step": 1569
|
9514 |
+
},
|
9515 |
+
{
|
9516 |
+
"epoch": 1.17,
|
9517 |
+
"learning_rate": 0.00019111111111111114,
|
9518 |
+
"loss": 2.1963,
|
9519 |
+
"step": 1570
|
9520 |
+
},
|
9521 |
+
{
|
9522 |
+
"epoch": 1.17,
|
9523 |
+
"learning_rate": 0.00019066666666666668,
|
9524 |
+
"loss": 2.0638,
|
9525 |
+
"step": 1571
|
9526 |
+
},
|
9527 |
+
{
|
9528 |
+
"epoch": 1.17,
|
9529 |
+
"learning_rate": 0.00019022222222222224,
|
9530 |
+
"loss": 2.1389,
|
9531 |
+
"step": 1572
|
9532 |
+
},
|
9533 |
+
{
|
9534 |
+
"epoch": 1.17,
|
9535 |
+
"learning_rate": 0.00018977777777777778,
|
9536 |
+
"loss": 2.0982,
|
9537 |
+
"step": 1573
|
9538 |
+
},
|
9539 |
+
{
|
9540 |
+
"epoch": 1.17,
|
9541 |
+
"learning_rate": 0.00018933333333333335,
|
9542 |
+
"loss": 2.1129,
|
9543 |
+
"step": 1574
|
9544 |
+
},
|
9545 |
+
{
|
9546 |
+
"epoch": 1.17,
|
9547 |
+
"learning_rate": 0.00018888888888888888,
|
9548 |
+
"loss": 2.1199,
|
9549 |
+
"step": 1575
|
9550 |
+
},
|
9551 |
+
{
|
9552 |
+
"epoch": 1.17,
|
9553 |
+
"learning_rate": 0.00018844444444444445,
|
9554 |
+
"loss": 2.0823,
|
9555 |
+
"step": 1576
|
9556 |
+
},
|
9557 |
+
{
|
9558 |
+
"epoch": 1.18,
|
9559 |
+
"learning_rate": 0.000188,
|
9560 |
+
"loss": 2.1113,
|
9561 |
+
"step": 1577
|
9562 |
+
},
|
9563 |
+
{
|
9564 |
+
"epoch": 1.18,
|
9565 |
+
"learning_rate": 0.00018755555555555558,
|
9566 |
+
"loss": 2.1034,
|
9567 |
+
"step": 1578
|
9568 |
+
},
|
9569 |
+
{
|
9570 |
+
"epoch": 1.18,
|
9571 |
+
"learning_rate": 0.00018711111111111112,
|
9572 |
+
"loss": 2.0663,
|
9573 |
+
"step": 1579
|
9574 |
+
},
|
9575 |
+
{
|
9576 |
+
"epoch": 1.18,
|
9577 |
+
"learning_rate": 0.0001866666666666667,
|
9578 |
+
"loss": 2.0847,
|
9579 |
+
"step": 1580
|
9580 |
+
},
|
9581 |
+
{
|
9582 |
+
"epoch": 1.18,
|
9583 |
+
"learning_rate": 0.00018622222222222223,
|
9584 |
+
"loss": 2.155,
|
9585 |
+
"step": 1581
|
9586 |
+
},
|
9587 |
+
{
|
9588 |
+
"epoch": 1.18,
|
9589 |
+
"learning_rate": 0.0001857777777777778,
|
9590 |
+
"loss": 2.1369,
|
9591 |
+
"step": 1582
|
9592 |
+
},
|
9593 |
+
{
|
9594 |
+
"epoch": 1.18,
|
9595 |
+
"learning_rate": 0.00018533333333333333,
|
9596 |
+
"loss": 2.0877,
|
9597 |
+
"step": 1583
|
9598 |
+
},
|
9599 |
+
{
|
9600 |
+
"epoch": 1.18,
|
9601 |
+
"learning_rate": 0.0001848888888888889,
|
9602 |
+
"loss": 2.0629,
|
9603 |
+
"step": 1584
|
9604 |
+
},
|
9605 |
+
{
|
9606 |
+
"epoch": 1.18,
|
9607 |
+
"learning_rate": 0.00018444444444444446,
|
9608 |
+
"loss": 2.1677,
|
9609 |
+
"step": 1585
|
9610 |
+
},
|
9611 |
+
{
|
9612 |
+
"epoch": 1.18,
|
9613 |
+
"learning_rate": 0.00018400000000000003,
|
9614 |
+
"loss": 2.0936,
|
9615 |
+
"step": 1586
|
9616 |
+
},
|
9617 |
+
{
|
9618 |
+
"epoch": 1.18,
|
9619 |
+
"learning_rate": 0.00018355555555555557,
|
9620 |
+
"loss": 2.0795,
|
9621 |
+
"step": 1587
|
9622 |
+
},
|
9623 |
+
{
|
9624 |
+
"epoch": 1.18,
|
9625 |
+
"learning_rate": 0.00018311111111111113,
|
9626 |
+
"loss": 2.0966,
|
9627 |
+
"step": 1588
|
9628 |
+
},
|
9629 |
+
{
|
9630 |
+
"epoch": 1.18,
|
9631 |
+
"learning_rate": 0.00018266666666666667,
|
9632 |
+
"loss": 2.1501,
|
9633 |
+
"step": 1589
|
9634 |
+
},
|
9635 |
+
{
|
9636 |
+
"epoch": 1.18,
|
9637 |
+
"learning_rate": 0.00018222222222222224,
|
9638 |
+
"loss": 2.1039,
|
9639 |
+
"step": 1590
|
9640 |
+
},
|
9641 |
+
{
|
9642 |
+
"epoch": 1.18,
|
9643 |
+
"learning_rate": 0.00018177777777777778,
|
9644 |
+
"loss": 2.1578,
|
9645 |
+
"step": 1591
|
9646 |
+
},
|
9647 |
+
{
|
9648 |
+
"epoch": 1.18,
|
9649 |
+
"learning_rate": 0.00018133333333333334,
|
9650 |
+
"loss": 2.0771,
|
9651 |
+
"step": 1592
|
9652 |
+
},
|
9653 |
+
{
|
9654 |
+
"epoch": 1.18,
|
9655 |
+
"learning_rate": 0.0001808888888888889,
|
9656 |
+
"loss": 2.126,
|
9657 |
+
"step": 1593
|
9658 |
+
},
|
9659 |
+
{
|
9660 |
+
"epoch": 1.18,
|
9661 |
+
"learning_rate": 0.00018044444444444447,
|
9662 |
+
"loss": 2.0727,
|
9663 |
+
"step": 1594
|
9664 |
+
},
|
9665 |
+
{
|
9666 |
+
"epoch": 1.18,
|
9667 |
+
"learning_rate": 0.00018,
|
9668 |
+
"loss": 2.1178,
|
9669 |
+
"step": 1595
|
9670 |
+
},
|
9671 |
+
{
|
9672 |
+
"epoch": 1.18,
|
9673 |
+
"learning_rate": 0.00017955555555555558,
|
9674 |
+
"loss": 2.1327,
|
9675 |
+
"step": 1596
|
9676 |
+
},
|
9677 |
+
{
|
9678 |
+
"epoch": 1.19,
|
9679 |
+
"learning_rate": 0.00017911111111111112,
|
9680 |
+
"loss": 2.1465,
|
9681 |
+
"step": 1597
|
9682 |
+
},
|
9683 |
+
{
|
9684 |
+
"epoch": 1.19,
|
9685 |
+
"learning_rate": 0.00017866666666666668,
|
9686 |
+
"loss": 2.1755,
|
9687 |
+
"step": 1598
|
9688 |
+
},
|
9689 |
+
{
|
9690 |
+
"epoch": 1.19,
|
9691 |
+
"learning_rate": 0.00017822222222222222,
|
9692 |
+
"loss": 2.1052,
|
9693 |
+
"step": 1599
|
9694 |
+
},
|
9695 |
+
{
|
9696 |
+
"epoch": 1.19,
|
9697 |
+
"learning_rate": 0.00017777777777777779,
|
9698 |
+
"loss": 2.1484,
|
9699 |
+
"step": 1600
|
9700 |
+
},
|
9701 |
+
{
|
9702 |
+
"epoch": 1.19,
|
9703 |
+
"eval_gen_len": 1023.0,
|
9704 |
+
"eval_loss": 2.1070616245269775,
|
9705 |
+
"eval_rouge1": 14.2289,
|
9706 |
+
"eval_rouge2": 4.7872,
|
9707 |
+
"eval_rougeL": 8.9958,
|
9708 |
+
"eval_rougeLsum": 9.0075,
|
9709 |
+
"eval_runtime": 9037.4832,
|
9710 |
+
"eval_samples_per_second": 0.055,
|
9711 |
+
"eval_steps_per_second": 0.055,
|
9712 |
+
"step": 1600
|
9713 |
}
|
9714 |
],
|
9715 |
"max_steps": 2000,
|
9716 |
"num_train_epochs": 9223372036854775807,
|
9717 |
+
"total_flos": 1.2852794184310794e+18,
|
9718 |
"trial_name": null,
|
9719 |
"trial_params": null
|
9720 |
}
|
pytorch_model.bin
CHANGED
@@ -1,3 +1,3 @@
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
-
oid sha256:
|
3 |
size 2368281769
|
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:728b6dfaa1c989be32093514baf4c7d88f3ff72d43aa5c614b342b108779fd92
|
3 |
size 2368281769
|