Training in progress, step 440000
Browse files- last-checkpoint/optimizer.pt +1 -1
- last-checkpoint/pytorch_model.bin +1 -1
- last-checkpoint/rng_state_0.pth +1 -1
- last-checkpoint/rng_state_1.pth +1 -1
- last-checkpoint/rng_state_2.pth +1 -1
- last-checkpoint/rng_state_3.pth +1 -1
- last-checkpoint/rng_state_4.pth +1 -1
- last-checkpoint/rng_state_5.pth +1 -1
- last-checkpoint/rng_state_6.pth +1 -1
- last-checkpoint/rng_state_7.pth +1 -1
- last-checkpoint/scheduler.pt +1 -1
- last-checkpoint/trainer_state.json +203 -3
- pytorch_model.bin +1 -1
last-checkpoint/optimizer.pt
CHANGED
@@ -1,3 +1,3 @@
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
-
oid sha256:
|
3 |
size 202194449
|
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:d13dee40bf739e07daa5e776dbbdc46c2654fbf14746060e2ac946fb44dbe8e5
|
3 |
size 202194449
|
last-checkpoint/pytorch_model.bin
CHANGED
@@ -1,3 +1,3 @@
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
-
oid sha256:
|
3 |
size 102501541
|
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:61e3683717b89ff6ecb486f42da196e91a7d9315fab8b8963d03c3872a54d691
|
3 |
size 102501541
|
last-checkpoint/rng_state_0.pth
CHANGED
@@ -1,3 +1,3 @@
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
-
oid sha256:
|
3 |
size 14503
|
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:ce0e6291f372fe9f86b6a4068760fc679c69ce294ee69065c8ff7ec587e652c0
|
3 |
size 14503
|
last-checkpoint/rng_state_1.pth
CHANGED
@@ -1,3 +1,3 @@
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
-
oid sha256:
|
3 |
size 14503
|
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:ce0e6291f372fe9f86b6a4068760fc679c69ce294ee69065c8ff7ec587e652c0
|
3 |
size 14503
|
last-checkpoint/rng_state_2.pth
CHANGED
@@ -1,3 +1,3 @@
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
-
oid sha256:
|
3 |
size 14503
|
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:ce0e6291f372fe9f86b6a4068760fc679c69ce294ee69065c8ff7ec587e652c0
|
3 |
size 14503
|
last-checkpoint/rng_state_3.pth
CHANGED
@@ -1,3 +1,3 @@
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
-
oid sha256:
|
3 |
size 14503
|
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:ce0e6291f372fe9f86b6a4068760fc679c69ce294ee69065c8ff7ec587e652c0
|
3 |
size 14503
|
last-checkpoint/rng_state_4.pth
CHANGED
@@ -1,3 +1,3 @@
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
-
oid sha256:
|
3 |
size 14503
|
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:ce0e6291f372fe9f86b6a4068760fc679c69ce294ee69065c8ff7ec587e652c0
|
3 |
size 14503
|
last-checkpoint/rng_state_5.pth
CHANGED
@@ -1,3 +1,3 @@
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
-
oid sha256:
|
3 |
size 14503
|
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:ce0e6291f372fe9f86b6a4068760fc679c69ce294ee69065c8ff7ec587e652c0
|
3 |
size 14503
|
last-checkpoint/rng_state_6.pth
CHANGED
@@ -1,3 +1,3 @@
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
-
oid sha256:
|
3 |
size 14503
|
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:ce0e6291f372fe9f86b6a4068760fc679c69ce294ee69065c8ff7ec587e652c0
|
3 |
size 14503
|
last-checkpoint/rng_state_7.pth
CHANGED
@@ -1,3 +1,3 @@
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
-
oid sha256:
|
3 |
size 14503
|
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:ce0e6291f372fe9f86b6a4068760fc679c69ce294ee69065c8ff7ec587e652c0
|
3 |
size 14503
|
last-checkpoint/scheduler.pt
CHANGED
@@ -1,3 +1,3 @@
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
-
oid sha256:
|
3 |
size 623
|
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:1894f32b2441ea8820978bbb44f8f2d9ce0a579e669301efbd1655378591798a
|
3 |
size 623
|
last-checkpoint/trainer_state.json
CHANGED
@@ -1,8 +1,8 @@
|
|
1 |
{
|
2 |
"best_metric": null,
|
3 |
"best_model_checkpoint": null,
|
4 |
-
"epoch": 10.
|
5 |
-
"global_step":
|
6 |
"is_hyper_param_search": false,
|
7 |
"is_local_process_zero": true,
|
8 |
"is_world_process_zero": true,
|
@@ -8606,11 +8606,211 @@
|
|
8606 |
"eval_samples_per_second": 1535.576,
|
8607 |
"eval_steps_per_second": 24.452,
|
8608 |
"step": 430000
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
8609 |
}
|
8610 |
],
|
8611 |
"max_steps": 500000,
|
8612 |
"num_train_epochs": 12,
|
8613 |
-
"total_flos": 1.
|
8614 |
"trial_name": null,
|
8615 |
"trial_params": null
|
8616 |
}
|
|
|
1 |
{
|
2 |
"best_metric": null,
|
3 |
"best_model_checkpoint": null,
|
4 |
+
"epoch": 10.251869801253523,
|
5 |
+
"global_step": 440000,
|
6 |
"is_hyper_param_search": false,
|
7 |
"is_local_process_zero": true,
|
8 |
"is_world_process_zero": true,
|
|
|
8606 |
"eval_samples_per_second": 1535.576,
|
8607 |
"eval_steps_per_second": 24.452,
|
8608 |
"step": 430000
|
8609 |
+
},
|
8610 |
+
{
|
8611 |
+
"epoch": 10.03,
|
8612 |
+
"learning_rate": 2.5050805238106804e-05,
|
8613 |
+
"loss": 0.253,
|
8614 |
+
"step": 430500
|
8615 |
+
},
|
8616 |
+
{
|
8617 |
+
"epoch": 10.04,
|
8618 |
+
"learning_rate": 2.4838784403798542e-05,
|
8619 |
+
"loss": 0.2529,
|
8620 |
+
"step": 431000
|
8621 |
+
},
|
8622 |
+
{
|
8623 |
+
"epoch": 10.04,
|
8624 |
+
"eval_loss": 0.23809103667736053,
|
8625 |
+
"eval_runtime": 1.4751,
|
8626 |
+
"eval_samples_per_second": 1490.054,
|
8627 |
+
"eval_steps_per_second": 23.727,
|
8628 |
+
"step": 431000
|
8629 |
+
},
|
8630 |
+
{
|
8631 |
+
"epoch": 10.05,
|
8632 |
+
"learning_rate": 2.4628186991690346e-05,
|
8633 |
+
"loss": 0.2528,
|
8634 |
+
"step": 431500
|
8635 |
+
},
|
8636 |
+
{
|
8637 |
+
"epoch": 10.07,
|
8638 |
+
"learning_rate": 2.4419015304841797e-05,
|
8639 |
+
"loss": 0.2532,
|
8640 |
+
"step": 432000
|
8641 |
+
},
|
8642 |
+
{
|
8643 |
+
"epoch": 10.07,
|
8644 |
+
"eval_loss": 0.23703138530254364,
|
8645 |
+
"eval_runtime": 1.4419,
|
8646 |
+
"eval_samples_per_second": 1524.361,
|
8647 |
+
"eval_steps_per_second": 24.273,
|
8648 |
+
"step": 432000
|
8649 |
+
},
|
8650 |
+
{
|
8651 |
+
"epoch": 10.08,
|
8652 |
+
"learning_rate": 2.4211271630720957e-05,
|
8653 |
+
"loss": 0.2532,
|
8654 |
+
"step": 432500
|
8655 |
+
},
|
8656 |
+
{
|
8657 |
+
"epoch": 10.09,
|
8658 |
+
"learning_rate": 2.4004958241179347e-05,
|
8659 |
+
"loss": 0.253,
|
8660 |
+
"step": 433000
|
8661 |
+
},
|
8662 |
+
{
|
8663 |
+
"epoch": 10.09,
|
8664 |
+
"eval_loss": 0.2378602772951126,
|
8665 |
+
"eval_runtime": 1.476,
|
8666 |
+
"eval_samples_per_second": 1489.124,
|
8667 |
+
"eval_steps_per_second": 23.712,
|
8668 |
+
"step": 433000
|
8669 |
+
},
|
8670 |
+
{
|
8671 |
+
"epoch": 10.1,
|
8672 |
+
"learning_rate": 2.3800077392427193e-05,
|
8673 |
+
"loss": 0.2533,
|
8674 |
+
"step": 433500
|
8675 |
+
},
|
8676 |
+
{
|
8677 |
+
"epoch": 10.11,
|
8678 |
+
"learning_rate": 2.3596631325008536e-05,
|
8679 |
+
"loss": 0.2531,
|
8680 |
+
"step": 434000
|
8681 |
+
},
|
8682 |
+
{
|
8683 |
+
"epoch": 10.11,
|
8684 |
+
"eval_loss": 0.23943325877189636,
|
8685 |
+
"eval_runtime": 1.4371,
|
8686 |
+
"eval_samples_per_second": 1529.446,
|
8687 |
+
"eval_steps_per_second": 24.354,
|
8688 |
+
"step": 434000
|
8689 |
+
},
|
8690 |
+
{
|
8691 |
+
"epoch": 10.12,
|
8692 |
+
"learning_rate": 2.3394622263777042e-05,
|
8693 |
+
"loss": 0.2529,
|
8694 |
+
"step": 434500
|
8695 |
+
},
|
8696 |
+
{
|
8697 |
+
"epoch": 10.14,
|
8698 |
+
"learning_rate": 2.3194052417871433e-05,
|
8699 |
+
"loss": 0.2528,
|
8700 |
+
"step": 435000
|
8701 |
+
},
|
8702 |
+
{
|
8703 |
+
"epoch": 10.14,
|
8704 |
+
"eval_loss": 0.2356664538383484,
|
8705 |
+
"eval_runtime": 1.4719,
|
8706 |
+
"eval_samples_per_second": 1493.299,
|
8707 |
+
"eval_steps_per_second": 23.779,
|
8708 |
+
"step": 435000
|
8709 |
+
},
|
8710 |
+
{
|
8711 |
+
"epoch": 10.15,
|
8712 |
+
"learning_rate": 2.2994923980691425e-05,
|
8713 |
+
"loss": 0.253,
|
8714 |
+
"step": 435500
|
8715 |
+
},
|
8716 |
+
{
|
8717 |
+
"epoch": 10.16,
|
8718 |
+
"learning_rate": 2.279723912987365e-05,
|
8719 |
+
"loss": 0.253,
|
8720 |
+
"step": 436000
|
8721 |
+
},
|
8722 |
+
{
|
8723 |
+
"epoch": 10.16,
|
8724 |
+
"eval_loss": 0.23836100101470947,
|
8725 |
+
"eval_runtime": 1.4427,
|
8726 |
+
"eval_samples_per_second": 1523.571,
|
8727 |
+
"eval_steps_per_second": 24.261,
|
8728 |
+
"step": 436000
|
8729 |
+
},
|
8730 |
+
{
|
8731 |
+
"epoch": 10.17,
|
8732 |
+
"learning_rate": 2.2601000027268006e-05,
|
8733 |
+
"loss": 0.2526,
|
8734 |
+
"step": 436500
|
8735 |
+
},
|
8736 |
+
{
|
8737 |
+
"epoch": 10.18,
|
8738 |
+
"learning_rate": 2.2406208818913857e-05,
|
8739 |
+
"loss": 0.2525,
|
8740 |
+
"step": 437000
|
8741 |
+
},
|
8742 |
+
{
|
8743 |
+
"epoch": 10.18,
|
8744 |
+
"eval_loss": 0.23866206407546997,
|
8745 |
+
"eval_runtime": 1.4308,
|
8746 |
+
"eval_samples_per_second": 1536.237,
|
8747 |
+
"eval_steps_per_second": 24.462,
|
8748 |
+
"step": 437000
|
8749 |
+
},
|
8750 |
+
{
|
8751 |
+
"epoch": 10.19,
|
8752 |
+
"learning_rate": 2.221286763501666e-05,
|
8753 |
+
"loss": 0.2527,
|
8754 |
+
"step": 437500
|
8755 |
+
},
|
8756 |
+
{
|
8757 |
+
"epoch": 10.21,
|
8758 |
+
"learning_rate": 2.2020978589924673e-05,
|
8759 |
+
"loss": 0.2526,
|
8760 |
+
"step": 438000
|
8761 |
+
},
|
8762 |
+
{
|
8763 |
+
"epoch": 10.21,
|
8764 |
+
"eval_loss": 0.23435133695602417,
|
8765 |
+
"eval_runtime": 1.417,
|
8766 |
+
"eval_samples_per_second": 1551.208,
|
8767 |
+
"eval_steps_per_second": 24.701,
|
8768 |
+
"step": 438000
|
8769 |
+
},
|
8770 |
+
{
|
8771 |
+
"epoch": 10.22,
|
8772 |
+
"learning_rate": 2.1830543782105647e-05,
|
8773 |
+
"loss": 0.2528,
|
8774 |
+
"step": 438500
|
8775 |
+
},
|
8776 |
+
{
|
8777 |
+
"epoch": 10.23,
|
8778 |
+
"learning_rate": 2.1641565294124206e-05,
|
8779 |
+
"loss": 0.2527,
|
8780 |
+
"step": 439000
|
8781 |
+
},
|
8782 |
+
{
|
8783 |
+
"epoch": 10.23,
|
8784 |
+
"eval_loss": 0.23615118861198425,
|
8785 |
+
"eval_runtime": 1.4413,
|
8786 |
+
"eval_samples_per_second": 1524.993,
|
8787 |
+
"eval_steps_per_second": 24.283,
|
8788 |
+
"step": 439000
|
8789 |
+
},
|
8790 |
+
{
|
8791 |
+
"epoch": 10.24,
|
8792 |
+
"learning_rate": 2.1454045192618794e-05,
|
8793 |
+
"loss": 0.2525,
|
8794 |
+
"step": 439500
|
8795 |
+
},
|
8796 |
+
{
|
8797 |
+
"epoch": 10.25,
|
8798 |
+
"learning_rate": 2.1267985528279212e-05,
|
8799 |
+
"loss": 0.2528,
|
8800 |
+
"step": 440000
|
8801 |
+
},
|
8802 |
+
{
|
8803 |
+
"epoch": 10.25,
|
8804 |
+
"eval_loss": 0.23707140982151031,
|
8805 |
+
"eval_runtime": 1.4358,
|
8806 |
+
"eval_samples_per_second": 1530.896,
|
8807 |
+
"eval_steps_per_second": 24.377,
|
8808 |
+
"step": 440000
|
8809 |
}
|
8810 |
],
|
8811 |
"max_steps": 500000,
|
8812 |
"num_train_epochs": 12,
|
8813 |
+
"total_flos": 1.4057409136168932e+22,
|
8814 |
"trial_name": null,
|
8815 |
"trial_params": null
|
8816 |
}
|
pytorch_model.bin
CHANGED
@@ -1,3 +1,3 @@
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
-
oid sha256:
|
3 |
size 102501541
|
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:61e3683717b89ff6ecb486f42da196e91a7d9315fab8b8963d03c3872a54d691
|
3 |
size 102501541
|