Training in progress, step 560000
Browse files- last-checkpoint/optimizer.pt +1 -1
- last-checkpoint/pytorch_model.bin +1 -1
- last-checkpoint/rng_state.pth +1 -1
- last-checkpoint/scaler.pt +1 -1
- last-checkpoint/scheduler.pt +1 -1
- last-checkpoint/trainer_state.json +303 -3
- pytorch_model.bin +1 -1
last-checkpoint/optimizer.pt
CHANGED
@@ -1,3 +1,3 @@
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
-
oid sha256:
|
3 |
size 586828837
|
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:047de29e872fa886935b0856cae368679a5024ec78d3b02056971d0dafe03f46
|
3 |
size 586828837
|
last-checkpoint/pytorch_model.bin
CHANGED
@@ -1,3 +1,3 @@
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
-
oid sha256:
|
3 |
size 146774203
|
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:c2db06e17c94930e25ba4fa153fb1d09bc548975dd61b046eadd4ef82210ad5b
|
3 |
size 146774203
|
last-checkpoint/rng_state.pth
CHANGED
@@ -1,3 +1,3 @@
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
-
oid sha256:
|
3 |
size 14503
|
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:6925d15f0ca8b085673c2a9c495fa03dd265589a6d0e5da63276f20be7165697
|
3 |
size 14503
|
last-checkpoint/scaler.pt
CHANGED
@@ -1,3 +1,3 @@
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
-
oid sha256:
|
3 |
size 559
|
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:b1866493c6437f9be9b061bda7fb54561f6f075e18eb8ff9def3d978f033c740
|
3 |
size 559
|
last-checkpoint/scheduler.pt
CHANGED
@@ -1,3 +1,3 @@
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
-
oid sha256:
|
3 |
size 733555848
|
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:f637d7b8ff1440e6b42939ee3d5db1515f248a64a9ccc57bfd7e929c8ce06320
|
3 |
size 733555848
|
last-checkpoint/trainer_state.json
CHANGED
@@ -1,8 +1,8 @@
|
|
1 |
{
|
2 |
"best_metric": null,
|
3 |
"best_model_checkpoint": null,
|
4 |
-
"epoch": 2.
|
5 |
-
"global_step":
|
6 |
"is_hyper_param_search": false,
|
7 |
"is_local_process_zero": true,
|
8 |
"is_world_process_zero": true,
|
@@ -16506,11 +16506,311 @@
|
|
16506 |
"learning_rate": 0.007174402227596965,
|
16507 |
"loss": 8.0567,
|
16508 |
"step": 550000
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
16509 |
}
|
16510 |
],
|
16511 |
"max_steps": 1000000,
|
16512 |
"num_train_epochs": 5,
|
16513 |
-
"total_flos": 8.
|
16514 |
"trial_name": null,
|
16515 |
"trial_params": null
|
16516 |
}
|
|
|
1 |
{
|
2 |
"best_metric": null,
|
3 |
"best_model_checkpoint": null,
|
4 |
+
"epoch": 2.4064596251939134,
|
5 |
+
"global_step": 560000,
|
6 |
"is_hyper_param_search": false,
|
7 |
"is_local_process_zero": true,
|
8 |
"is_world_process_zero": true,
|
|
|
16506 |
"learning_rate": 0.007174402227596965,
|
16507 |
"loss": 8.0567,
|
16508 |
"step": 550000
|
16509 |
+
},
|
16510 |
+
{
|
16511 |
+
"epoch": 2.36,
|
16512 |
+
"learning_rate": 0.007170359981885019,
|
16513 |
+
"loss": 8.0657,
|
16514 |
+
"step": 550200
|
16515 |
+
},
|
16516 |
+
{
|
16517 |
+
"epoch": 2.37,
|
16518 |
+
"learning_rate": 0.007166315987569351,
|
16519 |
+
"loss": 8.0443,
|
16520 |
+
"step": 550400
|
16521 |
+
},
|
16522 |
+
{
|
16523 |
+
"epoch": 2.37,
|
16524 |
+
"learning_rate": 0.007162270247908135,
|
16525 |
+
"loss": 8.0611,
|
16526 |
+
"step": 550600
|
16527 |
+
},
|
16528 |
+
{
|
16529 |
+
"epoch": 2.37,
|
16530 |
+
"learning_rate": 0.00715824300789773,
|
16531 |
+
"loss": 8.061,
|
16532 |
+
"step": 550800
|
16533 |
+
},
|
16534 |
+
{
|
16535 |
+
"epoch": 2.37,
|
16536 |
+
"learning_rate": 0.007154193796011567,
|
16537 |
+
"loss": 8.0521,
|
16538 |
+
"step": 551000
|
16539 |
+
},
|
16540 |
+
{
|
16541 |
+
"epoch": 2.37,
|
16542 |
+
"learning_rate": 0.007150142848546487,
|
16543 |
+
"loss": 8.057,
|
16544 |
+
"step": 551200
|
16545 |
+
},
|
16546 |
+
{
|
16547 |
+
"epoch": 2.37,
|
16548 |
+
"learning_rate": 0.007146090168766263,
|
16549 |
+
"loss": 8.0605,
|
16550 |
+
"step": 551400
|
16551 |
+
},
|
16552 |
+
{
|
16553 |
+
"epoch": 2.37,
|
16554 |
+
"learning_rate": 0.007142035759936066,
|
16555 |
+
"loss": 8.0472,
|
16556 |
+
"step": 551600
|
16557 |
+
},
|
16558 |
+
{
|
16559 |
+
"epoch": 2.37,
|
16560 |
+
"learning_rate": 0.007137979625322462,
|
16561 |
+
"loss": 8.0535,
|
16562 |
+
"step": 551800
|
16563 |
+
},
|
16564 |
+
{
|
16565 |
+
"epoch": 2.37,
|
16566 |
+
"learning_rate": 0.0071339217681934035,
|
16567 |
+
"loss": 8.0519,
|
16568 |
+
"step": 552000
|
16569 |
+
},
|
16570 |
+
{
|
16571 |
+
"epoch": 2.37,
|
16572 |
+
"learning_rate": 0.007129862191818233,
|
16573 |
+
"loss": 8.0626,
|
16574 |
+
"step": 552200
|
16575 |
+
},
|
16576 |
+
{
|
16577 |
+
"epoch": 2.37,
|
16578 |
+
"learning_rate": 0.007125800899467677,
|
16579 |
+
"loss": 8.0612,
|
16580 |
+
"step": 552400
|
16581 |
+
},
|
16582 |
+
{
|
16583 |
+
"epoch": 2.37,
|
16584 |
+
"learning_rate": 0.007121737894413849,
|
16585 |
+
"loss": 8.051,
|
16586 |
+
"step": 552600
|
16587 |
+
},
|
16588 |
+
{
|
16589 |
+
"epoch": 2.38,
|
16590 |
+
"learning_rate": 0.007117693507749444,
|
16591 |
+
"loss": 8.0404,
|
16592 |
+
"step": 552800
|
16593 |
+
},
|
16594 |
+
{
|
16595 |
+
"epoch": 2.38,
|
16596 |
+
"learning_rate": 0.0071136270956335375,
|
16597 |
+
"loss": 8.0493,
|
16598 |
+
"step": 553000
|
16599 |
+
},
|
16600 |
+
{
|
16601 |
+
"epoch": 2.38,
|
16602 |
+
"learning_rate": 0.007109558980622571,
|
16603 |
+
"loss": 8.0652,
|
16604 |
+
"step": 553200
|
16605 |
+
},
|
16606 |
+
{
|
16607 |
+
"epoch": 2.38,
|
16608 |
+
"learning_rate": 0.007105489165994151,
|
16609 |
+
"loss": 8.0499,
|
16610 |
+
"step": 553400
|
16611 |
+
},
|
16612 |
+
{
|
16613 |
+
"epoch": 2.38,
|
16614 |
+
"learning_rate": 0.007101417655027253,
|
16615 |
+
"loss": 8.0657,
|
16616 |
+
"step": 553600
|
16617 |
+
},
|
16618 |
+
{
|
16619 |
+
"epoch": 2.38,
|
16620 |
+
"learning_rate": 0.007097344451002222,
|
16621 |
+
"loss": 8.0415,
|
16622 |
+
"step": 553800
|
16623 |
+
},
|
16624 |
+
{
|
16625 |
+
"epoch": 2.38,
|
16626 |
+
"learning_rate": 0.0070932695572007605,
|
16627 |
+
"loss": 8.0431,
|
16628 |
+
"step": 554000
|
16629 |
+
},
|
16630 |
+
{
|
16631 |
+
"epoch": 2.38,
|
16632 |
+
"learning_rate": 0.007089192976905942,
|
16633 |
+
"loss": 8.0563,
|
16634 |
+
"step": 554200
|
16635 |
+
},
|
16636 |
+
{
|
16637 |
+
"epoch": 2.38,
|
16638 |
+
"learning_rate": 0.007085114713402188,
|
16639 |
+
"loss": 8.0635,
|
16640 |
+
"step": 554400
|
16641 |
+
},
|
16642 |
+
{
|
16643 |
+
"epoch": 2.38,
|
16644 |
+
"learning_rate": 0.007081034769975286,
|
16645 |
+
"loss": 8.0628,
|
16646 |
+
"step": 554600
|
16647 |
+
},
|
16648 |
+
{
|
16649 |
+
"epoch": 2.38,
|
16650 |
+
"learning_rate": 0.00707697356217788,
|
16651 |
+
"loss": 8.0692,
|
16652 |
+
"step": 554800
|
16653 |
+
},
|
16654 |
+
{
|
16655 |
+
"epoch": 2.38,
|
16656 |
+
"learning_rate": 0.007072890277125996,
|
16657 |
+
"loss": 8.0654,
|
16658 |
+
"step": 555000
|
16659 |
+
},
|
16660 |
+
{
|
16661 |
+
"epoch": 2.39,
|
16662 |
+
"learning_rate": 0.00706880532199997,
|
16663 |
+
"loss": 8.0595,
|
16664 |
+
"step": 555200
|
16665 |
+
},
|
16666 |
+
{
|
16667 |
+
"epoch": 2.39,
|
16668 |
+
"learning_rate": 0.007064718700090976,
|
16669 |
+
"loss": 8.0514,
|
16670 |
+
"step": 555400
|
16671 |
+
},
|
16672 |
+
{
|
16673 |
+
"epoch": 2.39,
|
16674 |
+
"learning_rate": 0.007060630414691535,
|
16675 |
+
"loss": 8.0607,
|
16676 |
+
"step": 555600
|
16677 |
+
},
|
16678 |
+
{
|
16679 |
+
"epoch": 2.39,
|
16680 |
+
"learning_rate": 0.0070565404690954995,
|
16681 |
+
"loss": 8.0671,
|
16682 |
+
"step": 555800
|
16683 |
+
},
|
16684 |
+
{
|
16685 |
+
"epoch": 2.39,
|
16686 |
+
"learning_rate": 0.007052448866598068,
|
16687 |
+
"loss": 8.0563,
|
16688 |
+
"step": 556000
|
16689 |
+
},
|
16690 |
+
{
|
16691 |
+
"epoch": 2.39,
|
16692 |
+
"learning_rate": 0.00704835561049577,
|
16693 |
+
"loss": 8.0695,
|
16694 |
+
"step": 556200
|
16695 |
+
},
|
16696 |
+
{
|
16697 |
+
"epoch": 2.39,
|
16698 |
+
"learning_rate": 0.007044260704086468,
|
16699 |
+
"loss": 8.0473,
|
16700 |
+
"step": 556400
|
16701 |
+
},
|
16702 |
+
{
|
16703 |
+
"epoch": 2.39,
|
16704 |
+
"learning_rate": 0.007040164150669354,
|
16705 |
+
"loss": 8.0468,
|
16706 |
+
"step": 556600
|
16707 |
+
},
|
16708 |
+
{
|
16709 |
+
"epoch": 2.39,
|
16710 |
+
"learning_rate": 0.007036086448613831,
|
16711 |
+
"loss": 8.0611,
|
16712 |
+
"step": 556800
|
16713 |
+
},
|
16714 |
+
{
|
16715 |
+
"epoch": 2.39,
|
16716 |
+
"learning_rate": 0.007031986619277786,
|
16717 |
+
"loss": 8.0643,
|
16718 |
+
"step": 557000
|
16719 |
+
},
|
16720 |
+
{
|
16721 |
+
"epoch": 2.39,
|
16722 |
+
"learning_rate": 0.0070278851528229385,
|
16723 |
+
"loss": 8.0717,
|
16724 |
+
"step": 557200
|
16725 |
+
},
|
16726 |
+
{
|
16727 |
+
"epoch": 2.4,
|
16728 |
+
"learning_rate": 0.0070237820525537635,
|
16729 |
+
"loss": 8.0483,
|
16730 |
+
"step": 557400
|
16731 |
+
},
|
16732 |
+
{
|
16733 |
+
"epoch": 2.4,
|
16734 |
+
"learning_rate": 0.007019677321776058,
|
16735 |
+
"loss": 8.0599,
|
16736 |
+
"step": 557600
|
16737 |
+
},
|
16738 |
+
{
|
16739 |
+
"epoch": 2.4,
|
16740 |
+
"learning_rate": 0.007015570963796927,
|
16741 |
+
"loss": 8.0636,
|
16742 |
+
"step": 557800
|
16743 |
+
},
|
16744 |
+
{
|
16745 |
+
"epoch": 2.4,
|
16746 |
+
"learning_rate": 0.00701146298192479,
|
16747 |
+
"loss": 8.0634,
|
16748 |
+
"step": 558000
|
16749 |
+
},
|
16750 |
+
{
|
16751 |
+
"epoch": 2.4,
|
16752 |
+
"learning_rate": 0.007007353379469375,
|
16753 |
+
"loss": 8.0698,
|
16754 |
+
"step": 558200
|
16755 |
+
},
|
16756 |
+
{
|
16757 |
+
"epoch": 2.4,
|
16758 |
+
"learning_rate": 0.007003242159741711,
|
16759 |
+
"loss": 8.0581,
|
16760 |
+
"step": 558400
|
16761 |
+
},
|
16762 |
+
{
|
16763 |
+
"epoch": 2.4,
|
16764 |
+
"learning_rate": 0.0069991293260541374,
|
16765 |
+
"loss": 8.0534,
|
16766 |
+
"step": 558600
|
16767 |
+
},
|
16768 |
+
{
|
16769 |
+
"epoch": 2.4,
|
16770 |
+
"learning_rate": 0.006995035457942955,
|
16771 |
+
"loss": 8.0641,
|
16772 |
+
"step": 558800
|
16773 |
+
},
|
16774 |
+
{
|
16775 |
+
"epoch": 2.4,
|
16776 |
+
"learning_rate": 0.006990919414306169,
|
16777 |
+
"loss": 8.0571,
|
16778 |
+
"step": 559000
|
16779 |
+
},
|
16780 |
+
{
|
16781 |
+
"epoch": 2.4,
|
16782 |
+
"learning_rate": 0.0069868017666376864,
|
16783 |
+
"loss": 8.0577,
|
16784 |
+
"step": 559200
|
16785 |
+
},
|
16786 |
+
{
|
16787 |
+
"epoch": 2.4,
|
16788 |
+
"learning_rate": 0.006982703118473221,
|
16789 |
+
"loss": 8.0753,
|
16790 |
+
"step": 559400
|
16791 |
+
},
|
16792 |
+
{
|
16793 |
+
"epoch": 2.4,
|
16794 |
+
"learning_rate": 0.006978582280673894,
|
16795 |
+
"loss": 8.0559,
|
16796 |
+
"step": 559600
|
16797 |
+
},
|
16798 |
+
{
|
16799 |
+
"epoch": 2.41,
|
16800 |
+
"learning_rate": 0.006974459848782675,
|
16801 |
+
"loss": 8.0529,
|
16802 |
+
"step": 559800
|
16803 |
+
},
|
16804 |
+
{
|
16805 |
+
"epoch": 2.41,
|
16806 |
+
"learning_rate": 0.006970335826120932,
|
16807 |
+
"loss": 8.046,
|
16808 |
+
"step": 560000
|
16809 |
}
|
16810 |
],
|
16811 |
"max_steps": 1000000,
|
16812 |
"num_train_epochs": 5,
|
16813 |
+
"total_flos": 8.925433954352456e+17,
|
16814 |
"trial_name": null,
|
16815 |
"trial_params": null
|
16816 |
}
|
pytorch_model.bin
CHANGED
@@ -1,3 +1,3 @@
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
-
oid sha256:
|
3 |
size 146774203
|
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:c2db06e17c94930e25ba4fa153fb1d09bc548975dd61b046eadd4ef82210ad5b
|
3 |
size 146774203
|