Training in progress, step 990000
Browse files- last-checkpoint/optimizer.pt +1 -1
- last-checkpoint/pytorch_model.bin +1 -1
- last-checkpoint/rng_state_0.pth +1 -1
- last-checkpoint/rng_state_1.pth +1 -1
- last-checkpoint/rng_state_2.pth +1 -1
- last-checkpoint/rng_state_3.pth +1 -1
- last-checkpoint/rng_state_4.pth +1 -1
- last-checkpoint/rng_state_5.pth +1 -1
- last-checkpoint/rng_state_6.pth +1 -1
- last-checkpoint/rng_state_7.pth +1 -1
- last-checkpoint/scheduler.pt +1 -1
- last-checkpoint/trainer_state.json +203 -3
- pytorch_model.bin +1 -1
last-checkpoint/optimizer.pt
CHANGED
@@ -1,3 +1,3 @@
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
-
oid sha256:
|
3 |
size 893439185
|
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:eb4cd9e789adbd1802119018bcfc4f0b6dba2541ced8918776537c19936d2aa3
|
3 |
size 893439185
|
last-checkpoint/pytorch_model.bin
CHANGED
@@ -1,3 +1,3 @@
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
-
oid sha256:
|
3 |
size 449471589
|
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:21e636c80ed6aaf4e2b5d21598685c1a08b0a8d8edf7041e56552898357162ca
|
3 |
size 449471589
|
last-checkpoint/rng_state_0.pth
CHANGED
@@ -1,3 +1,3 @@
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
-
oid sha256:
|
3 |
size 14503
|
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:4c1caabd63b797b525c8b3557d0ed6bdcb32c060e6354cffd8a2f88412a58c50
|
3 |
size 14503
|
last-checkpoint/rng_state_1.pth
CHANGED
@@ -1,3 +1,3 @@
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
-
oid sha256:
|
3 |
size 14503
|
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:4c1caabd63b797b525c8b3557d0ed6bdcb32c060e6354cffd8a2f88412a58c50
|
3 |
size 14503
|
last-checkpoint/rng_state_2.pth
CHANGED
@@ -1,3 +1,3 @@
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
-
oid sha256:
|
3 |
size 14503
|
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:4c1caabd63b797b525c8b3557d0ed6bdcb32c060e6354cffd8a2f88412a58c50
|
3 |
size 14503
|
last-checkpoint/rng_state_3.pth
CHANGED
@@ -1,3 +1,3 @@
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
-
oid sha256:
|
3 |
size 14503
|
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:4c1caabd63b797b525c8b3557d0ed6bdcb32c060e6354cffd8a2f88412a58c50
|
3 |
size 14503
|
last-checkpoint/rng_state_4.pth
CHANGED
@@ -1,3 +1,3 @@
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
-
oid sha256:
|
3 |
size 14503
|
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:4c1caabd63b797b525c8b3557d0ed6bdcb32c060e6354cffd8a2f88412a58c50
|
3 |
size 14503
|
last-checkpoint/rng_state_5.pth
CHANGED
@@ -1,3 +1,3 @@
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
-
oid sha256:
|
3 |
size 14503
|
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:4c1caabd63b797b525c8b3557d0ed6bdcb32c060e6354cffd8a2f88412a58c50
|
3 |
size 14503
|
last-checkpoint/rng_state_6.pth
CHANGED
@@ -1,3 +1,3 @@
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
-
oid sha256:
|
3 |
size 14503
|
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:4c1caabd63b797b525c8b3557d0ed6bdcb32c060e6354cffd8a2f88412a58c50
|
3 |
size 14503
|
last-checkpoint/rng_state_7.pth
CHANGED
@@ -1,3 +1,3 @@
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
-
oid sha256:
|
3 |
size 14503
|
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:4c1caabd63b797b525c8b3557d0ed6bdcb32c060e6354cffd8a2f88412a58c50
|
3 |
size 14503
|
last-checkpoint/scheduler.pt
CHANGED
@@ -1,3 +1,3 @@
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
-
oid sha256:
|
3 |
size 623
|
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:b2358905887cd0ce80c53b6e8a0174e039c4c5bd62c6c91c86f0312f9b46fcf7
|
3 |
size 623
|
last-checkpoint/trainer_state.json
CHANGED
@@ -1,8 +1,8 @@
|
|
1 |
{
|
2 |
"best_metric": null,
|
3 |
"best_model_checkpoint": null,
|
4 |
-
"epoch":
|
5 |
-
"global_step":
|
6 |
"is_hyper_param_search": false,
|
7 |
"is_local_process_zero": true,
|
8 |
"is_world_process_zero": true,
|
@@ -19606,11 +19606,211 @@
|
|
19606 |
"eval_samples_per_second": 887.915,
|
19607 |
"eval_steps_per_second": 13.916,
|
19608 |
"step": 980000
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
19609 |
}
|
19610 |
],
|
19611 |
"max_steps": 1000000,
|
19612 |
"num_train_epochs": 12,
|
19613 |
-
"total_flos": 6.
|
19614 |
"trial_name": null,
|
19615 |
"trial_params": null
|
19616 |
}
|
|
|
1 |
{
|
2 |
"best_metric": null,
|
3 |
"best_model_checkpoint": null,
|
4 |
+
"epoch": 11.041343698069438,
|
5 |
+
"global_step": 990000,
|
6 |
"is_hyper_param_search": false,
|
7 |
"is_local_process_zero": true,
|
8 |
"is_world_process_zero": true,
|
|
|
19606 |
"eval_samples_per_second": 887.915,
|
19607 |
"eval_steps_per_second": 13.916,
|
19608 |
"step": 980000
|
19609 |
+
},
|
19610 |
+
{
|
19611 |
+
"epoch": 10.94,
|
19612 |
+
"learning_rate": 1.0145492062574731e-05,
|
19613 |
+
"loss": 0.1798,
|
19614 |
+
"step": 980500
|
19615 |
+
},
|
19616 |
+
{
|
19617 |
+
"epoch": 10.94,
|
19618 |
+
"learning_rate": 1.0138129010020992e-05,
|
19619 |
+
"loss": 0.1797,
|
19620 |
+
"step": 981000
|
19621 |
+
},
|
19622 |
+
{
|
19623 |
+
"epoch": 10.94,
|
19624 |
+
"eval_loss": 0.17310407757759094,
|
19625 |
+
"eval_runtime": 2.575,
|
19626 |
+
"eval_samples_per_second": 892.044,
|
19627 |
+
"eval_steps_per_second": 13.981,
|
19628 |
+
"step": 981000
|
19629 |
+
},
|
19630 |
+
{
|
19631 |
+
"epoch": 10.95,
|
19632 |
+
"learning_rate": 1.0130956957154867e-05,
|
19633 |
+
"loss": 0.1796,
|
19634 |
+
"step": 981500
|
19635 |
+
},
|
19636 |
+
{
|
19637 |
+
"epoch": 10.95,
|
19638 |
+
"learning_rate": 1.0123975923584488e-05,
|
19639 |
+
"loss": 0.1795,
|
19640 |
+
"step": 982000
|
19641 |
+
},
|
19642 |
+
{
|
19643 |
+
"epoch": 10.95,
|
19644 |
+
"eval_loss": 0.17133940756320953,
|
19645 |
+
"eval_runtime": 2.6289,
|
19646 |
+
"eval_samples_per_second": 873.748,
|
19647 |
+
"eval_steps_per_second": 13.694,
|
19648 |
+
"step": 982000
|
19649 |
+
},
|
19650 |
+
{
|
19651 |
+
"epoch": 10.96,
|
19652 |
+
"learning_rate": 1.0117185928395721e-05,
|
19653 |
+
"loss": 0.1797,
|
19654 |
+
"step": 982500
|
19655 |
+
},
|
19656 |
+
{
|
19657 |
+
"epoch": 10.96,
|
19658 |
+
"learning_rate": 1.0110586990152152e-05,
|
19659 |
+
"loss": 0.1796,
|
19660 |
+
"step": 983000
|
19661 |
+
},
|
19662 |
+
{
|
19663 |
+
"epoch": 10.96,
|
19664 |
+
"eval_loss": 0.17200584709644318,
|
19665 |
+
"eval_runtime": 2.6007,
|
19666 |
+
"eval_samples_per_second": 883.236,
|
19667 |
+
"eval_steps_per_second": 13.843,
|
19668 |
+
"step": 983000
|
19669 |
+
},
|
19670 |
+
{
|
19671 |
+
"epoch": 10.97,
|
19672 |
+
"learning_rate": 1.0104179126895039e-05,
|
19673 |
+
"loss": 0.1797,
|
19674 |
+
"step": 983500
|
19675 |
+
},
|
19676 |
+
{
|
19677 |
+
"epoch": 10.97,
|
19678 |
+
"learning_rate": 1.0097962356143219e-05,
|
19679 |
+
"loss": 0.1797,
|
19680 |
+
"step": 984000
|
19681 |
+
},
|
19682 |
+
{
|
19683 |
+
"epoch": 10.97,
|
19684 |
+
"eval_loss": 0.17025373876094818,
|
19685 |
+
"eval_runtime": 2.5803,
|
19686 |
+
"eval_samples_per_second": 890.197,
|
19687 |
+
"eval_steps_per_second": 13.952,
|
19688 |
+
"step": 984000
|
19689 |
+
},
|
19690 |
+
{
|
19691 |
+
"epoch": 10.98,
|
19692 |
+
"learning_rate": 1.009193669489312e-05,
|
19693 |
+
"loss": 0.1797,
|
19694 |
+
"step": 984500
|
19695 |
+
},
|
19696 |
+
{
|
19697 |
+
"epoch": 10.99,
|
19698 |
+
"learning_rate": 1.0086102159618668e-05,
|
19699 |
+
"loss": 0.1796,
|
19700 |
+
"step": 985000
|
19701 |
+
},
|
19702 |
+
{
|
19703 |
+
"epoch": 10.99,
|
19704 |
+
"eval_loss": 0.17083962261676788,
|
19705 |
+
"eval_runtime": 2.5712,
|
19706 |
+
"eval_samples_per_second": 893.348,
|
19707 |
+
"eval_steps_per_second": 14.001,
|
19708 |
+
"step": 985000
|
19709 |
+
},
|
19710 |
+
{
|
19711 |
+
"epoch": 10.99,
|
19712 |
+
"learning_rate": 1.0080458766271252e-05,
|
19713 |
+
"loss": 0.1798,
|
19714 |
+
"step": 985500
|
19715 |
+
},
|
19716 |
+
{
|
19717 |
+
"epoch": 11.0,
|
19718 |
+
"learning_rate": 1.0075006530279694e-05,
|
19719 |
+
"loss": 0.1797,
|
19720 |
+
"step": 986000
|
19721 |
+
},
|
19722 |
+
{
|
19723 |
+
"epoch": 11.0,
|
19724 |
+
"eval_loss": 0.1690717339515686,
|
19725 |
+
"eval_runtime": 2.588,
|
19726 |
+
"eval_samples_per_second": 887.569,
|
19727 |
+
"eval_steps_per_second": 13.911,
|
19728 |
+
"step": 986000
|
19729 |
+
},
|
19730 |
+
{
|
19731 |
+
"epoch": 11.0,
|
19732 |
+
"learning_rate": 1.0069745466550205e-05,
|
19733 |
+
"loss": 0.1794,
|
19734 |
+
"step": 986500
|
19735 |
+
},
|
19736 |
+
{
|
19737 |
+
"epoch": 11.01,
|
19738 |
+
"learning_rate": 1.0064675589466339e-05,
|
19739 |
+
"loss": 0.1796,
|
19740 |
+
"step": 987000
|
19741 |
+
},
|
19742 |
+
{
|
19743 |
+
"epoch": 11.01,
|
19744 |
+
"eval_loss": 0.16997100412845612,
|
19745 |
+
"eval_runtime": 2.5938,
|
19746 |
+
"eval_samples_per_second": 885.56,
|
19747 |
+
"eval_steps_per_second": 13.879,
|
19748 |
+
"step": 987000
|
19749 |
+
},
|
19750 |
+
{
|
19751 |
+
"epoch": 11.01,
|
19752 |
+
"learning_rate": 1.005979691288893e-05,
|
19753 |
+
"loss": 0.1795,
|
19754 |
+
"step": 987500
|
19755 |
+
},
|
19756 |
+
{
|
19757 |
+
"epoch": 11.02,
|
19758 |
+
"learning_rate": 1.0055109450156098e-05,
|
19759 |
+
"loss": 0.1791,
|
19760 |
+
"step": 988000
|
19761 |
+
},
|
19762 |
+
{
|
19763 |
+
"epoch": 11.02,
|
19764 |
+
"eval_loss": 0.1697554588317871,
|
19765 |
+
"eval_runtime": 2.5898,
|
19766 |
+
"eval_samples_per_second": 886.931,
|
19767 |
+
"eval_steps_per_second": 13.901,
|
19768 |
+
"step": 988000
|
19769 |
+
},
|
19770 |
+
{
|
19771 |
+
"epoch": 11.02,
|
19772 |
+
"learning_rate": 1.0050613214083197e-05,
|
19773 |
+
"loss": 0.1797,
|
19774 |
+
"step": 988500
|
19775 |
+
},
|
19776 |
+
{
|
19777 |
+
"epoch": 11.03,
|
19778 |
+
"learning_rate": 1.0046308216962759e-05,
|
19779 |
+
"loss": 0.1795,
|
19780 |
+
"step": 989000
|
19781 |
+
},
|
19782 |
+
{
|
19783 |
+
"epoch": 11.03,
|
19784 |
+
"eval_loss": 0.1698392927646637,
|
19785 |
+
"eval_runtime": 2.6586,
|
19786 |
+
"eval_samples_per_second": 863.992,
|
19787 |
+
"eval_steps_per_second": 13.541,
|
19788 |
+
"step": 989000
|
19789 |
+
},
|
19790 |
+
{
|
19791 |
+
"epoch": 11.04,
|
19792 |
+
"learning_rate": 1.0042194470564472e-05,
|
19793 |
+
"loss": 0.1796,
|
19794 |
+
"step": 989500
|
19795 |
+
},
|
19796 |
+
{
|
19797 |
+
"epoch": 11.04,
|
19798 |
+
"learning_rate": 1.0038271986135177e-05,
|
19799 |
+
"loss": 0.1799,
|
19800 |
+
"step": 990000
|
19801 |
+
},
|
19802 |
+
{
|
19803 |
+
"epoch": 11.04,
|
19804 |
+
"eval_loss": 0.16946464776992798,
|
19805 |
+
"eval_runtime": 2.6152,
|
19806 |
+
"eval_samples_per_second": 878.327,
|
19807 |
+
"eval_steps_per_second": 13.766,
|
19808 |
+
"step": 990000
|
19809 |
}
|
19810 |
],
|
19811 |
"max_steps": 1000000,
|
19812 |
"num_train_epochs": 12,
|
19813 |
+
"total_flos": 6.9398656010816955e+22,
|
19814 |
"trial_name": null,
|
19815 |
"trial_params": null
|
19816 |
}
|
pytorch_model.bin
CHANGED
@@ -1,3 +1,3 @@
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
-
oid sha256:
|
3 |
size 449471589
|
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:21e636c80ed6aaf4e2b5d21598685c1a08b0a8d8edf7041e56552898357162ca
|
3 |
size 449471589
|