Training in progress, step 500000
Browse files- last-checkpoint/optimizer.pt +1 -1
- last-checkpoint/pytorch_model.bin +1 -1
- last-checkpoint/rng_state_0.pth +1 -1
- last-checkpoint/rng_state_1.pth +1 -1
- last-checkpoint/rng_state_2.pth +1 -1
- last-checkpoint/rng_state_3.pth +1 -1
- last-checkpoint/rng_state_4.pth +1 -1
- last-checkpoint/rng_state_5.pth +1 -1
- last-checkpoint/rng_state_6.pth +1 -1
- last-checkpoint/rng_state_7.pth +1 -1
- last-checkpoint/scheduler.pt +1 -1
- last-checkpoint/trainer_state.json +203 -3
- pytorch_model.bin +1 -1
last-checkpoint/optimizer.pt
CHANGED
@@ -1,3 +1,3 @@
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
-
oid sha256:
|
3 |
size 202194449
|
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:8c9d46c0928f6304f48bd62fa6e71bd8d578d23bad4ca4e988379cbeabeca88e
|
3 |
size 202194449
|
last-checkpoint/pytorch_model.bin
CHANGED
@@ -1,3 +1,3 @@
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
-
oid sha256:
|
3 |
size 102501541
|
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:236aaee0ef2e2b221aabbd9376533af723e290eee22b162dcb3dc545c9bc0456
|
3 |
size 102501541
|
last-checkpoint/rng_state_0.pth
CHANGED
@@ -1,3 +1,3 @@
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
-
oid sha256:
|
3 |
size 14503
|
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:7e47c014bce57a678aa8e29889fa3baa2aec11726dcca68d912de21df35c6be0
|
3 |
size 14503
|
last-checkpoint/rng_state_1.pth
CHANGED
@@ -1,3 +1,3 @@
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
-
oid sha256:
|
3 |
size 14503
|
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:7e47c014bce57a678aa8e29889fa3baa2aec11726dcca68d912de21df35c6be0
|
3 |
size 14503
|
last-checkpoint/rng_state_2.pth
CHANGED
@@ -1,3 +1,3 @@
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
-
oid sha256:
|
3 |
size 14503
|
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:7e47c014bce57a678aa8e29889fa3baa2aec11726dcca68d912de21df35c6be0
|
3 |
size 14503
|
last-checkpoint/rng_state_3.pth
CHANGED
@@ -1,3 +1,3 @@
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
-
oid sha256:
|
3 |
size 14503
|
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:7e47c014bce57a678aa8e29889fa3baa2aec11726dcca68d912de21df35c6be0
|
3 |
size 14503
|
last-checkpoint/rng_state_4.pth
CHANGED
@@ -1,3 +1,3 @@
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
-
oid sha256:
|
3 |
size 14503
|
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:7e47c014bce57a678aa8e29889fa3baa2aec11726dcca68d912de21df35c6be0
|
3 |
size 14503
|
last-checkpoint/rng_state_5.pth
CHANGED
@@ -1,3 +1,3 @@
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
-
oid sha256:
|
3 |
size 14503
|
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:7e47c014bce57a678aa8e29889fa3baa2aec11726dcca68d912de21df35c6be0
|
3 |
size 14503
|
last-checkpoint/rng_state_6.pth
CHANGED
@@ -1,3 +1,3 @@
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
-
oid sha256:
|
3 |
size 14503
|
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:7e47c014bce57a678aa8e29889fa3baa2aec11726dcca68d912de21df35c6be0
|
3 |
size 14503
|
last-checkpoint/rng_state_7.pth
CHANGED
@@ -1,3 +1,3 @@
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
-
oid sha256:
|
3 |
size 14503
|
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:7e47c014bce57a678aa8e29889fa3baa2aec11726dcca68d912de21df35c6be0
|
3 |
size 14503
|
last-checkpoint/scheduler.pt
CHANGED
@@ -1,3 +1,3 @@
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
-
oid sha256:
|
3 |
size 623
|
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:53a074c65f43dfe683b5f0e988de79e3365c939ebb4b13c8f9ce84b59bdb64a7
|
3 |
size 623
|
last-checkpoint/trainer_state.json
CHANGED
@@ -1,8 +1,8 @@
|
|
1 |
{
|
2 |
"best_metric": null,
|
3 |
"best_model_checkpoint": null,
|
4 |
-
"epoch": 11.
|
5 |
-
"global_step":
|
6 |
"is_hyper_param_search": false,
|
7 |
"is_local_process_zero": true,
|
8 |
"is_world_process_zero": true,
|
@@ -9806,11 +9806,211 @@
|
|
9806 |
"eval_samples_per_second": 1543.227,
|
9807 |
"eval_steps_per_second": 24.574,
|
9808 |
"step": 490000
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
9809 |
}
|
9810 |
],
|
9811 |
"max_steps": 500000,
|
9812 |
"num_train_epochs": 12,
|
9813 |
-
"total_flos": 1.
|
9814 |
"trial_name": null,
|
9815 |
"trial_params": null
|
9816 |
}
|
|
|
1 |
{
|
2 |
"best_metric": null,
|
3 |
"best_model_checkpoint": null,
|
4 |
+
"epoch": 11.649852046879005,
|
5 |
+
"global_step": 500000,
|
6 |
"is_hyper_param_search": false,
|
7 |
"is_local_process_zero": true,
|
8 |
"is_world_process_zero": true,
|
|
|
9806 |
"eval_samples_per_second": 1543.227,
|
9807 |
"eval_steps_per_second": 24.574,
|
9808 |
"step": 490000
|
9809 |
+
},
|
9810 |
+
{
|
9811 |
+
"epoch": 11.43,
|
9812 |
+
"learning_rate": 1.0286124377900624e-05,
|
9813 |
+
"loss": 0.2504,
|
9814 |
+
"step": 490500
|
9815 |
+
},
|
9816 |
+
{
|
9817 |
+
"epoch": 11.44,
|
9818 |
+
"learning_rate": 1.0256807270282153e-05,
|
9819 |
+
"loss": 0.2503,
|
9820 |
+
"step": 491000
|
9821 |
+
},
|
9822 |
+
{
|
9823 |
+
"epoch": 11.44,
|
9824 |
+
"eval_loss": 0.2337212711572647,
|
9825 |
+
"eval_runtime": 1.4623,
|
9826 |
+
"eval_samples_per_second": 1503.072,
|
9827 |
+
"eval_steps_per_second": 23.934,
|
9828 |
+
"step": 491000
|
9829 |
+
},
|
9830 |
+
{
|
9831 |
+
"epoch": 11.45,
|
9832 |
+
"learning_rate": 1.0229073051029455e-05,
|
9833 |
+
"loss": 0.25,
|
9834 |
+
"step": 491500
|
9835 |
+
},
|
9836 |
+
{
|
9837 |
+
"epoch": 11.46,
|
9838 |
+
"learning_rate": 1.020292202343952e-05,
|
9839 |
+
"loss": 0.2501,
|
9840 |
+
"step": 492000
|
9841 |
+
},
|
9842 |
+
{
|
9843 |
+
"epoch": 11.46,
|
9844 |
+
"eval_loss": 0.23177069425582886,
|
9845 |
+
"eval_runtime": 1.4821,
|
9846 |
+
"eval_samples_per_second": 1482.991,
|
9847 |
+
"eval_steps_per_second": 23.615,
|
9848 |
+
"step": 492000
|
9849 |
+
},
|
9850 |
+
{
|
9851 |
+
"epoch": 11.48,
|
9852 |
+
"learning_rate": 1.0178354473495813e-05,
|
9853 |
+
"loss": 0.2503,
|
9854 |
+
"step": 492500
|
9855 |
+
},
|
9856 |
+
{
|
9857 |
+
"epoch": 11.49,
|
9858 |
+
"learning_rate": 1.0155370669865077e-05,
|
9859 |
+
"loss": 0.2505,
|
9860 |
+
"step": 493000
|
9861 |
+
},
|
9862 |
+
{
|
9863 |
+
"epoch": 11.49,
|
9864 |
+
"eval_loss": 0.23253387212753296,
|
9865 |
+
"eval_runtime": 1.4831,
|
9866 |
+
"eval_samples_per_second": 1482.001,
|
9867 |
+
"eval_steps_per_second": 23.599,
|
9868 |
+
"step": 493000
|
9869 |
+
},
|
9870 |
+
{
|
9871 |
+
"epoch": 11.5,
|
9872 |
+
"learning_rate": 1.0133970863894557e-05,
|
9873 |
+
"loss": 0.2502,
|
9874 |
+
"step": 493500
|
9875 |
+
},
|
9876 |
+
{
|
9877 |
+
"epoch": 11.51,
|
9878 |
+
"learning_rate": 1.0114155289609061e-05,
|
9879 |
+
"loss": 0.2504,
|
9880 |
+
"step": 494000
|
9881 |
+
},
|
9882 |
+
{
|
9883 |
+
"epoch": 11.51,
|
9884 |
+
"eval_loss": 0.23376800119876862,
|
9885 |
+
"eval_runtime": 1.4253,
|
9886 |
+
"eval_samples_per_second": 1542.116,
|
9887 |
+
"eval_steps_per_second": 24.556,
|
9888 |
+
"step": 494000
|
9889 |
+
},
|
9890 |
+
{
|
9891 |
+
"epoch": 11.52,
|
9892 |
+
"learning_rate": 1.0095924163708572e-05,
|
9893 |
+
"loss": 0.2503,
|
9894 |
+
"step": 494500
|
9895 |
+
},
|
9896 |
+
{
|
9897 |
+
"epoch": 11.53,
|
9898 |
+
"learning_rate": 1.0079277685565724e-05,
|
9899 |
+
"loss": 0.2503,
|
9900 |
+
"step": 495000
|
9901 |
+
},
|
9902 |
+
{
|
9903 |
+
"epoch": 11.53,
|
9904 |
+
"eval_loss": 0.23174437880516052,
|
9905 |
+
"eval_runtime": 1.4318,
|
9906 |
+
"eval_samples_per_second": 1535.107,
|
9907 |
+
"eval_steps_per_second": 24.444,
|
9908 |
+
"step": 495000
|
9909 |
+
},
|
9910 |
+
{
|
9911 |
+
"epoch": 11.55,
|
9912 |
+
"learning_rate": 1.0064216037223772e-05,
|
9913 |
+
"loss": 0.2501,
|
9914 |
+
"step": 495500
|
9915 |
+
},
|
9916 |
+
{
|
9917 |
+
"epoch": 11.56,
|
9918 |
+
"learning_rate": 1.0050739383394454e-05,
|
9919 |
+
"loss": 0.2502,
|
9920 |
+
"step": 496000
|
9921 |
+
},
|
9922 |
+
{
|
9923 |
+
"epoch": 11.56,
|
9924 |
+
"eval_loss": 0.23404622077941895,
|
9925 |
+
"eval_runtime": 1.4342,
|
9926 |
+
"eval_samples_per_second": 1532.524,
|
9927 |
+
"eval_steps_per_second": 24.403,
|
9928 |
+
"step": 496000
|
9929 |
+
},
|
9930 |
+
{
|
9931 |
+
"epoch": 11.57,
|
9932 |
+
"learning_rate": 1.003884787145633e-05,
|
9933 |
+
"loss": 0.2505,
|
9934 |
+
"step": 496500
|
9935 |
+
},
|
9936 |
+
{
|
9937 |
+
"epoch": 11.58,
|
9938 |
+
"learning_rate": 1.002854163145305e-05,
|
9939 |
+
"loss": 0.25,
|
9940 |
+
"step": 497000
|
9941 |
+
},
|
9942 |
+
{
|
9943 |
+
"epoch": 11.58,
|
9944 |
+
"eval_loss": 0.2340567409992218,
|
9945 |
+
"eval_runtime": 1.4613,
|
9946 |
+
"eval_samples_per_second": 1504.132,
|
9947 |
+
"eval_steps_per_second": 23.951,
|
9948 |
+
"step": 497000
|
9949 |
+
},
|
9950 |
+
{
|
9951 |
+
"epoch": 11.59,
|
9952 |
+
"learning_rate": 1.0019820776091995e-05,
|
9953 |
+
"loss": 0.2503,
|
9954 |
+
"step": 497500
|
9955 |
+
},
|
9956 |
+
{
|
9957 |
+
"epoch": 11.6,
|
9958 |
+
"learning_rate": 1.0012685400743077e-05,
|
9959 |
+
"loss": 0.2502,
|
9960 |
+
"step": 498000
|
9961 |
+
},
|
9962 |
+
{
|
9963 |
+
"epoch": 11.6,
|
9964 |
+
"eval_loss": 0.2324497401714325,
|
9965 |
+
"eval_runtime": 1.4433,
|
9966 |
+
"eval_samples_per_second": 1522.91,
|
9967 |
+
"eval_steps_per_second": 24.25,
|
9968 |
+
"step": 498000
|
9969 |
+
},
|
9970 |
+
{
|
9971 |
+
"epoch": 11.61,
|
9972 |
+
"learning_rate": 1.0007135583437572e-05,
|
9973 |
+
"loss": 0.25,
|
9974 |
+
"step": 498500
|
9975 |
+
},
|
9976 |
+
{
|
9977 |
+
"epoch": 11.63,
|
9978 |
+
"learning_rate": 1.0003171384867436e-05,
|
9979 |
+
"loss": 0.2504,
|
9980 |
+
"step": 499000
|
9981 |
+
},
|
9982 |
+
{
|
9983 |
+
"epoch": 11.63,
|
9984 |
+
"eval_loss": 0.2359778881072998,
|
9985 |
+
"eval_runtime": 1.4253,
|
9986 |
+
"eval_samples_per_second": 1542.145,
|
9987 |
+
"eval_steps_per_second": 24.556,
|
9988 |
+
"step": 499000
|
9989 |
+
},
|
9990 |
+
{
|
9991 |
+
"epoch": 11.64,
|
9992 |
+
"learning_rate": 1.0000792848384467e-05,
|
9993 |
+
"loss": 0.2501,
|
9994 |
+
"step": 499500
|
9995 |
+
},
|
9996 |
+
{
|
9997 |
+
"epoch": 11.65,
|
9998 |
+
"learning_rate": 1e-05,
|
9999 |
+
"loss": 0.2501,
|
10000 |
+
"step": 500000
|
10001 |
+
},
|
10002 |
+
{
|
10003 |
+
"epoch": 11.65,
|
10004 |
+
"eval_loss": 0.23462386429309845,
|
10005 |
+
"eval_runtime": 1.444,
|
10006 |
+
"eval_samples_per_second": 1522.115,
|
10007 |
+
"eval_steps_per_second": 24.238,
|
10008 |
+
"step": 500000
|
10009 |
}
|
10010 |
],
|
10011 |
"max_steps": 500000,
|
10012 |
"num_train_epochs": 12,
|
10013 |
+
"total_flos": 1.5974329471825026e+22,
|
10014 |
"trial_name": null,
|
10015 |
"trial_params": null
|
10016 |
}
|
pytorch_model.bin
CHANGED
@@ -1,3 +1,3 @@
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
-
oid sha256:
|
3 |
size 102501541
|
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:236aaee0ef2e2b221aabbd9376533af723e290eee22b162dcb3dc545c9bc0456
|
3 |
size 102501541
|