Training in progress, step 9500, checkpoint
Browse files
last-checkpoint/model.safetensors
CHANGED
|
@@ -1,3 +1,3 @@
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
-
oid sha256:
|
| 3 |
size 328277848
|
|
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:feeb8af86d4228c031ab0303150253b8e59c08c82f4f8aa78a75fae604e120a1
|
| 3 |
size 328277848
|
last-checkpoint/optimizer.pt
CHANGED
|
@@ -1,3 +1,3 @@
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
-
oid sha256:
|
| 3 |
size 318646859
|
|
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:5d5fa1cbde1c469de32a370ba5361ae4e7744a119f98350fc2511f131db06a4e
|
| 3 |
size 318646859
|
last-checkpoint/rng_state.pth
CHANGED
|
@@ -1,3 +1,3 @@
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
-
oid sha256:
|
| 3 |
size 14645
|
|
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:ccd074c7b8f0b016dc440e87123ddc293303707dc1fa944c0ab62d0b20aa48bd
|
| 3 |
size 14645
|
last-checkpoint/scheduler.pt
CHANGED
|
@@ -1,3 +1,3 @@
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
-
oid sha256:
|
| 3 |
size 1465
|
|
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:4784f3b1ac308d4093c525f58ebfb1ed5c4e7ca17828bd58e2e6a8e2baed20b5
|
| 3 |
size 1465
|
last-checkpoint/trainer_state.json
CHANGED
|
@@ -2,9 +2,9 @@
|
|
| 2 |
"best_global_step": null,
|
| 3 |
"best_metric": null,
|
| 4 |
"best_model_checkpoint": null,
|
| 5 |
-
"epoch": 1.
|
| 6 |
"eval_steps": 500,
|
| 7 |
-
"global_step":
|
| 8 |
"is_hyper_param_search": false,
|
| 9 |
"is_local_process_zero": true,
|
| 10 |
"is_world_process_zero": true,
|
|
@@ -6459,6 +6459,364 @@
|
|
| 6459 |
"eval_samples_per_second": 269.07,
|
| 6460 |
"eval_steps_per_second": 5.65,
|
| 6461 |
"step": 9000
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 6462 |
}
|
| 6463 |
],
|
| 6464 |
"logging_steps": 10,
|
|
@@ -6478,7 +6836,7 @@
|
|
| 6478 |
"attributes": {}
|
| 6479 |
}
|
| 6480 |
},
|
| 6481 |
-
"total_flos": 3.
|
| 6482 |
"train_batch_size": 48,
|
| 6483 |
"trial_name": null,
|
| 6484 |
"trial_params": null
|
|
|
|
| 2 |
"best_global_step": null,
|
| 3 |
"best_metric": null,
|
| 4 |
"best_model_checkpoint": null,
|
| 5 |
+
"epoch": 1.6050008447372868,
|
| 6 |
"eval_steps": 500,
|
| 7 |
+
"global_step": 9500,
|
| 8 |
"is_hyper_param_search": false,
|
| 9 |
"is_local_process_zero": true,
|
| 10 |
"is_world_process_zero": true,
|
|
|
|
| 6459 |
"eval_samples_per_second": 269.07,
|
| 6460 |
"eval_steps_per_second": 5.65,
|
| 6461 |
"step": 9000
|
| 6462 |
+
},
|
| 6463 |
+
{
|
| 6464 |
+
"epoch": 1.522216590640311,
|
| 6465 |
+
"grad_norm": 0.4886947572231293,
|
| 6466 |
+
"learning_rate": 5.715753870066455e-05,
|
| 6467 |
+
"loss": 4.321556472778321,
|
| 6468 |
+
"step": 9010
|
| 6469 |
+
},
|
| 6470 |
+
{
|
| 6471 |
+
"epoch": 1.5239060652137186,
|
| 6472 |
+
"grad_norm": 0.4844423532485962,
|
| 6473 |
+
"learning_rate": 5.67817924971296e-05,
|
| 6474 |
+
"loss": 4.319805908203125,
|
| 6475 |
+
"step": 9020
|
| 6476 |
+
},
|
| 6477 |
+
{
|
| 6478 |
+
"epoch": 1.5255955397871261,
|
| 6479 |
+
"grad_norm": 0.5226190686225891,
|
| 6480 |
+
"learning_rate": 5.6406996868811885e-05,
|
| 6481 |
+
"loss": 4.334315490722656,
|
| 6482 |
+
"step": 9030
|
| 6483 |
+
},
|
| 6484 |
+
{
|
| 6485 |
+
"epoch": 1.5272850143605339,
|
| 6486 |
+
"grad_norm": 0.4798380136489868,
|
| 6487 |
+
"learning_rate": 5.60331556376197e-05,
|
| 6488 |
+
"loss": 4.332364654541015,
|
| 6489 |
+
"step": 9040
|
| 6490 |
+
},
|
| 6491 |
+
{
|
| 6492 |
+
"epoch": 1.5289744889339416,
|
| 6493 |
+
"grad_norm": 0.4943060874938965,
|
| 6494 |
+
"learning_rate": 5.566027261572907e-05,
|
| 6495 |
+
"loss": 4.315433502197266,
|
| 6496 |
+
"step": 9050
|
| 6497 |
+
},
|
| 6498 |
+
{
|
| 6499 |
+
"epoch": 1.5306639635073491,
|
| 6500 |
+
"grad_norm": 0.4950096011161804,
|
| 6501 |
+
"learning_rate": 5.528835160554475e-05,
|
| 6502 |
+
"loss": 4.333520126342774,
|
| 6503 |
+
"step": 9060
|
| 6504 |
+
},
|
| 6505 |
+
{
|
| 6506 |
+
"epoch": 1.5323534380807569,
|
| 6507 |
+
"grad_norm": 0.49306342005729675,
|
| 6508 |
+
"learning_rate": 5.491739639966153e-05,
|
| 6509 |
+
"loss": 4.341754531860351,
|
| 6510 |
+
"step": 9070
|
| 6511 |
+
},
|
| 6512 |
+
{
|
| 6513 |
+
"epoch": 1.5340429126541646,
|
| 6514 |
+
"grad_norm": 0.5156424641609192,
|
| 6515 |
+
"learning_rate": 5.454741078082578e-05,
|
| 6516 |
+
"loss": 4.341635894775391,
|
| 6517 |
+
"step": 9080
|
| 6518 |
+
},
|
| 6519 |
+
{
|
| 6520 |
+
"epoch": 1.535732387227572,
|
| 6521 |
+
"grad_norm": 0.4766913652420044,
|
| 6522 |
+
"learning_rate": 5.417839852189653e-05,
|
| 6523 |
+
"loss": 4.368686294555664,
|
| 6524 |
+
"step": 9090
|
| 6525 |
+
},
|
| 6526 |
+
{
|
| 6527 |
+
"epoch": 1.53742186180098,
|
| 6528 |
+
"grad_norm": 0.4723012447357178,
|
| 6529 |
+
"learning_rate": 5.381036338580718e-05,
|
| 6530 |
+
"loss": 4.3226276397705075,
|
| 6531 |
+
"step": 9100
|
| 6532 |
+
},
|
| 6533 |
+
{
|
| 6534 |
+
"epoch": 1.5391113363743876,
|
| 6535 |
+
"grad_norm": 0.48478707671165466,
|
| 6536 |
+
"learning_rate": 5.344330912552703e-05,
|
| 6537 |
+
"loss": 4.324138259887695,
|
| 6538 |
+
"step": 9110
|
| 6539 |
+
},
|
| 6540 |
+
{
|
| 6541 |
+
"epoch": 1.540800810947795,
|
| 6542 |
+
"grad_norm": 0.4678701162338257,
|
| 6543 |
+
"learning_rate": 5.3077239484023385e-05,
|
| 6544 |
+
"loss": 4.336772918701172,
|
| 6545 |
+
"step": 9120
|
| 6546 |
+
},
|
| 6547 |
+
{
|
| 6548 |
+
"epoch": 1.542490285521203,
|
| 6549 |
+
"grad_norm": 0.5012118220329285,
|
| 6550 |
+
"learning_rate": 5.271215819422277e-05,
|
| 6551 |
+
"loss": 4.340796661376953,
|
| 6552 |
+
"step": 9130
|
| 6553 |
+
},
|
| 6554 |
+
{
|
| 6555 |
+
"epoch": 1.5441797600946106,
|
| 6556 |
+
"grad_norm": 0.46702662110328674,
|
| 6557 |
+
"learning_rate": 5.234806897897328e-05,
|
| 6558 |
+
"loss": 4.351072692871094,
|
| 6559 |
+
"step": 9140
|
| 6560 |
+
},
|
| 6561 |
+
{
|
| 6562 |
+
"epoch": 1.5458692346680183,
|
| 6563 |
+
"grad_norm": 0.4755004346370697,
|
| 6564 |
+
"learning_rate": 5.1984975551006434e-05,
|
| 6565 |
+
"loss": 4.333198165893554,
|
| 6566 |
+
"step": 9150
|
| 6567 |
+
},
|
| 6568 |
+
{
|
| 6569 |
+
"epoch": 1.547558709241426,
|
| 6570 |
+
"grad_norm": 0.49710944294929504,
|
| 6571 |
+
"learning_rate": 5.1622881612899635e-05,
|
| 6572 |
+
"loss": 4.329468536376953,
|
| 6573 |
+
"step": 9160
|
| 6574 |
+
},
|
| 6575 |
+
{
|
| 6576 |
+
"epoch": 1.5492481838148335,
|
| 6577 |
+
"grad_norm": 0.47669288516044617,
|
| 6578 |
+
"learning_rate": 5.126179085703794e-05,
|
| 6579 |
+
"loss": 4.306584167480469,
|
| 6580 |
+
"step": 9170
|
| 6581 |
+
},
|
| 6582 |
+
{
|
| 6583 |
+
"epoch": 1.5509376583882413,
|
| 6584 |
+
"grad_norm": 0.4755394458770752,
|
| 6585 |
+
"learning_rate": 5.090170696557667e-05,
|
| 6586 |
+
"loss": 4.332014083862305,
|
| 6587 |
+
"step": 9180
|
| 6588 |
+
},
|
| 6589 |
+
{
|
| 6590 |
+
"epoch": 1.552627132961649,
|
| 6591 |
+
"grad_norm": 0.4676371216773987,
|
| 6592 |
+
"learning_rate": 5.054263361040395e-05,
|
| 6593 |
+
"loss": 4.323485565185547,
|
| 6594 |
+
"step": 9190
|
| 6595 |
+
},
|
| 6596 |
+
{
|
| 6597 |
+
"epoch": 1.5543166075350565,
|
| 6598 |
+
"grad_norm": 0.478369802236557,
|
| 6599 |
+
"learning_rate": 5.018457445310313e-05,
|
| 6600 |
+
"loss": 4.330324935913086,
|
| 6601 |
+
"step": 9200
|
| 6602 |
+
},
|
| 6603 |
+
{
|
| 6604 |
+
"epoch": 1.5560060821084643,
|
| 6605 |
+
"grad_norm": 0.47149577736854553,
|
| 6606 |
+
"learning_rate": 4.9827533144915384e-05,
|
| 6607 |
+
"loss": 4.3147937774658205,
|
| 6608 |
+
"step": 9210
|
| 6609 |
+
},
|
| 6610 |
+
{
|
| 6611 |
+
"epoch": 1.557695556681872,
|
| 6612 |
+
"grad_norm": 0.46445849537849426,
|
| 6613 |
+
"learning_rate": 4.9471513326702544e-05,
|
| 6614 |
+
"loss": 4.3321784973144535,
|
| 6615 |
+
"step": 9220
|
| 6616 |
+
},
|
| 6617 |
+
{
|
| 6618 |
+
"epoch": 1.5593850312552795,
|
| 6619 |
+
"grad_norm": 0.4738256335258484,
|
| 6620 |
+
"learning_rate": 4.911651862891014e-05,
|
| 6621 |
+
"loss": 4.331890487670899,
|
| 6622 |
+
"step": 9230
|
| 6623 |
+
},
|
| 6624 |
+
{
|
| 6625 |
+
"epoch": 1.5610745058286872,
|
| 6626 |
+
"grad_norm": 0.461725115776062,
|
| 6627 |
+
"learning_rate": 4.876255267153011e-05,
|
| 6628 |
+
"loss": 4.33313217163086,
|
| 6629 |
+
"step": 9240
|
| 6630 |
+
},
|
| 6631 |
+
{
|
| 6632 |
+
"epoch": 1.562763980402095,
|
| 6633 |
+
"grad_norm": 0.47477617859840393,
|
| 6634 |
+
"learning_rate": 4.8409619064063965e-05,
|
| 6635 |
+
"loss": 4.322317504882813,
|
| 6636 |
+
"step": 9250
|
| 6637 |
+
},
|
| 6638 |
+
{
|
| 6639 |
+
"epoch": 1.5644534549755025,
|
| 6640 |
+
"grad_norm": 0.5020060539245605,
|
| 6641 |
+
"learning_rate": 4.805772140548613e-05,
|
| 6642 |
+
"loss": 4.332529067993164,
|
| 6643 |
+
"step": 9260
|
| 6644 |
+
},
|
| 6645 |
+
{
|
| 6646 |
+
"epoch": 1.5661429295489104,
|
| 6647 |
+
"grad_norm": 0.49133333563804626,
|
| 6648 |
+
"learning_rate": 4.770686328420713e-05,
|
| 6649 |
+
"loss": 4.309441375732422,
|
| 6650 |
+
"step": 9270
|
| 6651 |
+
},
|
| 6652 |
+
{
|
| 6653 |
+
"epoch": 1.567832404122318,
|
| 6654 |
+
"grad_norm": 0.4712921380996704,
|
| 6655 |
+
"learning_rate": 4.7357048278036944e-05,
|
| 6656 |
+
"loss": 4.33348503112793,
|
| 6657 |
+
"step": 9280
|
| 6658 |
+
},
|
| 6659 |
+
{
|
| 6660 |
+
"epoch": 1.5695218786957257,
|
| 6661 |
+
"grad_norm": 0.4794677495956421,
|
| 6662 |
+
"learning_rate": 4.700827995414853e-05,
|
| 6663 |
+
"loss": 4.318268203735352,
|
| 6664 |
+
"step": 9290
|
| 6665 |
+
},
|
| 6666 |
+
{
|
| 6667 |
+
"epoch": 1.5712113532691334,
|
| 6668 |
+
"grad_norm": 0.48898905515670776,
|
| 6669 |
+
"learning_rate": 4.666056186904168e-05,
|
| 6670 |
+
"loss": 4.350247955322265,
|
| 6671 |
+
"step": 9300
|
| 6672 |
+
},
|
| 6673 |
+
{
|
| 6674 |
+
"epoch": 1.572900827842541,
|
| 6675 |
+
"grad_norm": 0.4803585112094879,
|
| 6676 |
+
"learning_rate": 4.63138975685064e-05,
|
| 6677 |
+
"loss": 4.3233489990234375,
|
| 6678 |
+
"step": 9310
|
| 6679 |
+
},
|
| 6680 |
+
{
|
| 6681 |
+
"epoch": 1.5745903024159487,
|
| 6682 |
+
"grad_norm": 0.46920374035835266,
|
| 6683 |
+
"learning_rate": 4.596829058758694e-05,
|
| 6684 |
+
"loss": 4.339992904663086,
|
| 6685 |
+
"step": 9320
|
| 6686 |
+
},
|
| 6687 |
+
{
|
| 6688 |
+
"epoch": 1.5762797769893564,
|
| 6689 |
+
"grad_norm": 0.483453631401062,
|
| 6690 |
+
"learning_rate": 4.5623744450545846e-05,
|
| 6691 |
+
"loss": 4.3560230255126955,
|
| 6692 |
+
"step": 9330
|
| 6693 |
+
},
|
| 6694 |
+
{
|
| 6695 |
+
"epoch": 1.577969251562764,
|
| 6696 |
+
"grad_norm": 0.4820483326911926,
|
| 6697 |
+
"learning_rate": 4.528026267082786e-05,
|
| 6698 |
+
"loss": 4.344028091430664,
|
| 6699 |
+
"step": 9340
|
| 6700 |
+
},
|
| 6701 |
+
{
|
| 6702 |
+
"epoch": 1.5796587261361716,
|
| 6703 |
+
"grad_norm": 0.47812727093696594,
|
| 6704 |
+
"learning_rate": 4.493784875102409e-05,
|
| 6705 |
+
"loss": 4.326963806152344,
|
| 6706 |
+
"step": 9350
|
| 6707 |
+
},
|
| 6708 |
+
{
|
| 6709 |
+
"epoch": 1.5813482007095794,
|
| 6710 |
+
"grad_norm": 0.49543893337249756,
|
| 6711 |
+
"learning_rate": 4.45965061828363e-05,
|
| 6712 |
+
"loss": 4.336210632324219,
|
| 6713 |
+
"step": 9360
|
| 6714 |
+
},
|
| 6715 |
+
{
|
| 6716 |
+
"epoch": 1.583037675282987,
|
| 6717 |
+
"grad_norm": 0.4648870825767517,
|
| 6718 |
+
"learning_rate": 4.4256238447041556e-05,
|
| 6719 |
+
"loss": 4.335990905761719,
|
| 6720 |
+
"step": 9370
|
| 6721 |
+
},
|
| 6722 |
+
{
|
| 6723 |
+
"epoch": 1.5847271498563946,
|
| 6724 |
+
"grad_norm": 0.4952487051486969,
|
| 6725 |
+
"learning_rate": 4.39170490134563e-05,
|
| 6726 |
+
"loss": 4.324835968017578,
|
| 6727 |
+
"step": 9380
|
| 6728 |
+
},
|
| 6729 |
+
{
|
| 6730 |
+
"epoch": 1.5864166244298024,
|
| 6731 |
+
"grad_norm": 0.4667623043060303,
|
| 6732 |
+
"learning_rate": 4.3578941340901274e-05,
|
| 6733 |
+
"loss": 4.3118232727050785,
|
| 6734 |
+
"step": 9390
|
| 6735 |
+
},
|
| 6736 |
+
{
|
| 6737 |
+
"epoch": 1.5881060990032099,
|
| 6738 |
+
"grad_norm": 0.4628295600414276,
|
| 6739 |
+
"learning_rate": 4.324191887716612e-05,
|
| 6740 |
+
"loss": 4.319614028930664,
|
| 6741 |
+
"step": 9400
|
| 6742 |
+
},
|
| 6743 |
+
{
|
| 6744 |
+
"epoch": 1.5897955735766178,
|
| 6745 |
+
"grad_norm": 0.47781363129615784,
|
| 6746 |
+
"learning_rate": 4.290598505897439e-05,
|
| 6747 |
+
"loss": 4.33368148803711,
|
| 6748 |
+
"step": 9410
|
| 6749 |
+
},
|
| 6750 |
+
{
|
| 6751 |
+
"epoch": 1.5914850481500253,
|
| 6752 |
+
"grad_norm": 0.47368741035461426,
|
| 6753 |
+
"learning_rate": 4.25711433119483e-05,
|
| 6754 |
+
"loss": 4.343207550048828,
|
| 6755 |
+
"step": 9420
|
| 6756 |
+
},
|
| 6757 |
+
{
|
| 6758 |
+
"epoch": 1.5931745227234329,
|
| 6759 |
+
"grad_norm": 0.48697587847709656,
|
| 6760 |
+
"learning_rate": 4.223739705057384e-05,
|
| 6761 |
+
"loss": 4.332680130004883,
|
| 6762 |
+
"step": 9430
|
| 6763 |
+
},
|
| 6764 |
+
{
|
| 6765 |
+
"epoch": 1.5948639972968408,
|
| 6766 |
+
"grad_norm": 0.4794331192970276,
|
| 6767 |
+
"learning_rate": 4.1904749678165965e-05,
|
| 6768 |
+
"loss": 4.318676376342774,
|
| 6769 |
+
"step": 9440
|
| 6770 |
+
},
|
| 6771 |
+
{
|
| 6772 |
+
"epoch": 1.5965534718702483,
|
| 6773 |
+
"grad_norm": 0.4642111361026764,
|
| 6774 |
+
"learning_rate": 4.157320458683409e-05,
|
| 6775 |
+
"loss": 4.281653976440429,
|
| 6776 |
+
"step": 9450
|
| 6777 |
+
},
|
| 6778 |
+
{
|
| 6779 |
+
"epoch": 1.598242946443656,
|
| 6780 |
+
"grad_norm": 0.48844289779663086,
|
| 6781 |
+
"learning_rate": 4.124276515744713e-05,
|
| 6782 |
+
"loss": 4.316392135620117,
|
| 6783 |
+
"step": 9460
|
| 6784 |
+
},
|
| 6785 |
+
{
|
| 6786 |
+
"epoch": 1.5999324210170638,
|
| 6787 |
+
"grad_norm": 0.4724809229373932,
|
| 6788 |
+
"learning_rate": 4.091343475959928e-05,
|
| 6789 |
+
"loss": 4.30932502746582,
|
| 6790 |
+
"step": 9470
|
| 6791 |
+
},
|
| 6792 |
+
{
|
| 6793 |
+
"epoch": 1.6016218955904713,
|
| 6794 |
+
"grad_norm": 0.463838666677475,
|
| 6795 |
+
"learning_rate": 4.058521675157563e-05,
|
| 6796 |
+
"loss": 4.312925338745117,
|
| 6797 |
+
"step": 9480
|
| 6798 |
+
},
|
| 6799 |
+
{
|
| 6800 |
+
"epoch": 1.603311370163879,
|
| 6801 |
+
"grad_norm": 0.48553308844566345,
|
| 6802 |
+
"learning_rate": 4.025811448031792e-05,
|
| 6803 |
+
"loss": 4.316752624511719,
|
| 6804 |
+
"step": 9490
|
| 6805 |
+
},
|
| 6806 |
+
{
|
| 6807 |
+
"epoch": 1.6050008447372868,
|
| 6808 |
+
"grad_norm": 0.4514022171497345,
|
| 6809 |
+
"learning_rate": 3.993213128139027e-05,
|
| 6810 |
+
"loss": 4.320250701904297,
|
| 6811 |
+
"step": 9500
|
| 6812 |
+
},
|
| 6813 |
+
{
|
| 6814 |
+
"epoch": 1.6050008447372868,
|
| 6815 |
+
"eval_loss": 4.275903224945068,
|
| 6816 |
+
"eval_runtime": 3.5803,
|
| 6817 |
+
"eval_samples_per_second": 279.306,
|
| 6818 |
+
"eval_steps_per_second": 5.865,
|
| 6819 |
+
"step": 9500
|
| 6820 |
}
|
| 6821 |
],
|
| 6822 |
"logging_steps": 10,
|
|
|
|
| 6836 |
"attributes": {}
|
| 6837 |
}
|
| 6838 |
},
|
| 6839 |
+
"total_flos": 3.177318894608056e+17,
|
| 6840 |
"train_batch_size": 48,
|
| 6841 |
"trial_name": null,
|
| 6842 |
"trial_params": null
|