Training in progress, step 410000
Browse files- last-checkpoint/optimizer.pt +1 -1
- last-checkpoint/pytorch_model.bin +1 -1
- last-checkpoint/rng_state_0.pth +1 -1
- last-checkpoint/rng_state_1.pth +1 -1
- last-checkpoint/rng_state_2.pth +1 -1
- last-checkpoint/rng_state_3.pth +1 -1
- last-checkpoint/scaler.pt +1 -1
- last-checkpoint/scheduler.pt +1 -1
- last-checkpoint/trainer_state.json +62 -2
- pytorch_model.bin +1 -1
last-checkpoint/optimizer.pt
CHANGED
@@ -1,3 +1,3 @@
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
-
oid sha256:
|
3 |
size 402588883
|
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:92fdc000d4fafbbd2e91584a02c4c2fb962834ee1637306eeaaf0dd292216cd4
|
3 |
size 402588883
|
last-checkpoint/pytorch_model.bin
CHANGED
@@ -1,3 +1,3 @@
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
-
oid sha256:
|
3 |
size 201355195
|
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:d1fa79c529616ca16d42da8e006a51df8140abfed26cff6e5cdad9c739308e68
|
3 |
size 201355195
|
last-checkpoint/rng_state_0.pth
CHANGED
@@ -1,3 +1,3 @@
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
-
oid sha256:
|
3 |
size 14503
|
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:a9a8f077a1c70700ffbfa5a0e85deb8c464d1d13ef25197649c92ff96bba8de6
|
3 |
size 14503
|
last-checkpoint/rng_state_1.pth
CHANGED
@@ -1,3 +1,3 @@
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
-
oid sha256:
|
3 |
size 14503
|
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:6ec5481805755dac4cdd9cf08a6aa8694c0c1977d1745d95ec408164c6e121d6
|
3 |
size 14503
|
last-checkpoint/rng_state_2.pth
CHANGED
@@ -1,3 +1,3 @@
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
-
oid sha256:
|
3 |
size 14503
|
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:b929aa620a562d3388e495ece447a5fb5f09e20c1a47581e86d9dca02f8026db
|
3 |
size 14503
|
last-checkpoint/rng_state_3.pth
CHANGED
@@ -1,3 +1,3 @@
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
-
oid sha256:
|
3 |
size 14503
|
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:a3a936c5ca740e050908abddcbed8d1d61194968ca4f9033807ade5b0f47a64e
|
3 |
size 14503
|
last-checkpoint/scaler.pt
CHANGED
@@ -1,3 +1,3 @@
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
-
oid sha256:
|
3 |
size 559
|
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:47fd3beeec6665e3f21e3103a71e599c6fc8aac864bb1094b5f55aa514eebecd
|
3 |
size 559
|
last-checkpoint/scheduler.pt
CHANGED
@@ -1,3 +1,3 @@
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
-
oid sha256:
|
3 |
size 623
|
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:ed6c7ad97c29f9f9e30d0579ba32ff06f3d615e80dfaa268969429dbd7edd1ad
|
3 |
size 623
|
last-checkpoint/trainer_state.json
CHANGED
@@ -1,8 +1,8 @@
|
|
1 |
{
|
2 |
"best_metric": null,
|
3 |
"best_model_checkpoint": null,
|
4 |
-
"epoch": 6.
|
5 |
-
"global_step":
|
6 |
"is_hyper_param_search": false,
|
7 |
"is_local_process_zero": true,
|
8 |
"is_world_process_zero": true,
|
@@ -4866,6 +4866,66 @@
|
|
4866 |
"learning_rate": 2.2142953486438546e-05,
|
4867 |
"loss": 0.32,
|
4868 |
"step": 405000
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
4869 |
}
|
4870 |
],
|
4871 |
"max_steps": 500000,
|
|
|
1 |
{
|
2 |
"best_metric": null,
|
3 |
"best_model_checkpoint": null,
|
4 |
+
"epoch": 6.978715074765321,
|
5 |
+
"global_step": 410000,
|
6 |
"is_hyper_param_search": false,
|
7 |
"is_local_process_zero": true,
|
8 |
"is_world_process_zero": true,
|
|
|
4866 |
"learning_rate": 2.2142953486438546e-05,
|
4867 |
"loss": 0.32,
|
4868 |
"step": 405000
|
4869 |
+
},
|
4870 |
+
{
|
4871 |
+
"epoch": 6.9,
|
4872 |
+
"learning_rate": 2.201945239256808e-05,
|
4873 |
+
"loss": 0.3199,
|
4874 |
+
"step": 405500
|
4875 |
+
},
|
4876 |
+
{
|
4877 |
+
"epoch": 6.91,
|
4878 |
+
"learning_rate": 2.1896523543294826e-05,
|
4879 |
+
"loss": 0.3198,
|
4880 |
+
"step": 406000
|
4881 |
+
},
|
4882 |
+
{
|
4883 |
+
"epoch": 6.92,
|
4884 |
+
"learning_rate": 2.177465642951213e-05,
|
4885 |
+
"loss": 0.3199,
|
4886 |
+
"step": 406500
|
4887 |
+
},
|
4888 |
+
{
|
4889 |
+
"epoch": 6.93,
|
4890 |
+
"learning_rate": 2.165287340248795e-05,
|
4891 |
+
"loss": 0.3197,
|
4892 |
+
"step": 407000
|
4893 |
+
},
|
4894 |
+
{
|
4895 |
+
"epoch": 6.94,
|
4896 |
+
"learning_rate": 2.1531666238047615e-05,
|
4897 |
+
"loss": 0.32,
|
4898 |
+
"step": 407500
|
4899 |
+
},
|
4900 |
+
{
|
4901 |
+
"epoch": 6.94,
|
4902 |
+
"learning_rate": 2.141103613245686e-05,
|
4903 |
+
"loss": 0.32,
|
4904 |
+
"step": 408000
|
4905 |
+
},
|
4906 |
+
{
|
4907 |
+
"epoch": 6.95,
|
4908 |
+
"learning_rate": 2.129098427628619e-05,
|
4909 |
+
"loss": 0.3199,
|
4910 |
+
"step": 408500
|
4911 |
+
},
|
4912 |
+
{
|
4913 |
+
"epoch": 6.96,
|
4914 |
+
"learning_rate": 2.1171511854398904e-05,
|
4915 |
+
"loss": 0.3197,
|
4916 |
+
"step": 409000
|
4917 |
+
},
|
4918 |
+
{
|
4919 |
+
"epoch": 6.97,
|
4920 |
+
"learning_rate": 2.1052620045939608e-05,
|
4921 |
+
"loss": 0.32,
|
4922 |
+
"step": 409500
|
4923 |
+
},
|
4924 |
+
{
|
4925 |
+
"epoch": 6.98,
|
4926 |
+
"learning_rate": 2.0934546062965312e-05,
|
4927 |
+
"loss": 0.32,
|
4928 |
+
"step": 410000
|
4929 |
}
|
4930 |
],
|
4931 |
"max_steps": 500000,
|
pytorch_model.bin
CHANGED
@@ -1,3 +1,3 @@
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
-
oid sha256:
|
3 |
size 201355195
|
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:d1fa79c529616ca16d42da8e006a51df8140abfed26cff6e5cdad9c739308e68
|
3 |
size 201355195
|