Training in progress, step 495000
Browse files- last-checkpoint/optimizer.pt +1 -1
- last-checkpoint/pytorch_model.bin +1 -1
- last-checkpoint/rng_state_0.pth +1 -1
- last-checkpoint/rng_state_1.pth +1 -1
- last-checkpoint/rng_state_2.pth +1 -1
- last-checkpoint/rng_state_3.pth +1 -1
- last-checkpoint/scaler.pt +1 -1
- last-checkpoint/scheduler.pt +1 -1
- last-checkpoint/trainer_state.json +62 -2
- pytorch_model.bin +1 -1
last-checkpoint/optimizer.pt
CHANGED
@@ -1,3 +1,3 @@
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
-
oid sha256:
|
3 |
size 402588883
|
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:84af0d18833e41d1a10681188880ec8ba4f30596fd8f7cde082ff46ab45cbdbe
|
3 |
size 402588883
|
last-checkpoint/pytorch_model.bin
CHANGED
@@ -1,3 +1,3 @@
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
-
oid sha256:
|
3 |
size 201355195
|
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:1a637602bf8a77f2410da25ab5d173ee31a6be30a49806bbec5d80d168cc5d7d
|
3 |
size 201355195
|
last-checkpoint/rng_state_0.pth
CHANGED
@@ -1,3 +1,3 @@
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
-
oid sha256:
|
3 |
size 14503
|
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:12ae54c41aaca6d65f26d1e6bbd2f6203cb961f99f7b84be560e3bbd2396b2e3
|
3 |
size 14503
|
last-checkpoint/rng_state_1.pth
CHANGED
@@ -1,3 +1,3 @@
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
-
oid sha256:
|
3 |
size 14503
|
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:84ff6c7712aa546d5132d185aba96a5a4249dab2b9a289b4552ba71d6df59a7d
|
3 |
size 14503
|
last-checkpoint/rng_state_2.pth
CHANGED
@@ -1,3 +1,3 @@
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
-
oid sha256:
|
3 |
size 14503
|
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:b4843dae073ec4305f57d7c7321f9c3c4b37572653eef93fdcfec1b0443124ae
|
3 |
size 14503
|
last-checkpoint/rng_state_3.pth
CHANGED
@@ -1,3 +1,3 @@
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
-
oid sha256:
|
3 |
size 14503
|
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:5a7ddaa7c379f07ffb65846eedd96b8e39986c95cb5ff903d85ebfaa34b6adae
|
3 |
size 14503
|
last-checkpoint/scaler.pt
CHANGED
@@ -1,3 +1,3 @@
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
-
oid sha256:
|
3 |
size 559
|
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:d359d67c34eab31fb94ce6951c370a8f3e7fc3440bc514b0b92658e55af5bafb
|
3 |
size 559
|
last-checkpoint/scheduler.pt
CHANGED
@@ -1,3 +1,3 @@
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
-
oid sha256:
|
3 |
size 623
|
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:4b13ec93cdbe5490bb95db6f38adcdfd8751f641d8a226eeb4226b8700039209
|
3 |
size 623
|
last-checkpoint/trainer_state.json
CHANGED
@@ -1,8 +1,8 @@
|
|
1 |
{
|
2 |
"best_metric": null,
|
3 |
"best_model_checkpoint": null,
|
4 |
-
"epoch": 8.
|
5 |
-
"global_step":
|
6 |
"is_hyper_param_search": false,
|
7 |
"is_local_process_zero": true,
|
8 |
"is_world_process_zero": true,
|
@@ -5886,6 +5886,66 @@
|
|
5886 |
"learning_rate": 1.0143397827427121e-05,
|
5887 |
"loss": 0.3183,
|
5888 |
"step": 490000
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
5889 |
}
|
5890 |
],
|
5891 |
"max_steps": 500000,
|
|
|
1 |
{
|
2 |
"best_metric": null,
|
3 |
"best_model_checkpoint": null,
|
4 |
+
"epoch": 8.425528293376226,
|
5 |
+
"global_step": 495000,
|
6 |
"is_hyper_param_search": false,
|
7 |
"is_local_process_zero": true,
|
8 |
"is_world_process_zero": true,
|
|
|
5886 |
"learning_rate": 1.0143397827427121e-05,
|
5887 |
"loss": 0.3183,
|
5888 |
"step": 490000
|
5889 |
+
},
|
5890 |
+
{
|
5891 |
+
"epoch": 8.35,
|
5892 |
+
"learning_rate": 1.0129673597743943e-05,
|
5893 |
+
"loss": 0.318,
|
5894 |
+
"step": 490500
|
5895 |
+
},
|
5896 |
+
{
|
5897 |
+
"epoch": 8.36,
|
5898 |
+
"learning_rate": 1.0116638959974567e-05,
|
5899 |
+
"loss": 0.3183,
|
5900 |
+
"step": 491000
|
5901 |
+
},
|
5902 |
+
{
|
5903 |
+
"epoch": 8.37,
|
5904 |
+
"learning_rate": 1.0104294042765599e-05,
|
5905 |
+
"loss": 0.3184,
|
5906 |
+
"step": 491500
|
5907 |
+
},
|
5908 |
+
{
|
5909 |
+
"epoch": 8.37,
|
5910 |
+
"learning_rate": 1.009263896795639e-05,
|
5911 |
+
"loss": 0.3185,
|
5912 |
+
"step": 492000
|
5913 |
+
},
|
5914 |
+
{
|
5915 |
+
"epoch": 8.38,
|
5916 |
+
"learning_rate": 1.008167385057782e-05,
|
5917 |
+
"loss": 0.3184,
|
5918 |
+
"step": 492500
|
5919 |
+
},
|
5920 |
+
{
|
5921 |
+
"epoch": 8.39,
|
5922 |
+
"learning_rate": 1.007139879885118e-05,
|
5923 |
+
"loss": 0.3184,
|
5924 |
+
"step": 493000
|
5925 |
+
},
|
5926 |
+
{
|
5927 |
+
"epoch": 8.4,
|
5928 |
+
"learning_rate": 1.0061832395105667e-05,
|
5929 |
+
"loss": 0.3182,
|
5930 |
+
"step": 493500
|
5931 |
+
},
|
5932 |
+
{
|
5933 |
+
"epoch": 8.41,
|
5934 |
+
"learning_rate": 1.0052936391489836e-05,
|
5935 |
+
"loss": 0.3184,
|
5936 |
+
"step": 494000
|
5937 |
+
},
|
5938 |
+
{
|
5939 |
+
"epoch": 8.42,
|
5940 |
+
"learning_rate": 1.0044730737153053e-05,
|
5941 |
+
"loss": 0.3181,
|
5942 |
+
"step": 494500
|
5943 |
+
},
|
5944 |
+
{
|
5945 |
+
"epoch": 8.43,
|
5946 |
+
"learning_rate": 1.0037215513081807e-05,
|
5947 |
+
"loss": 0.3182,
|
5948 |
+
"step": 495000
|
5949 |
}
|
5950 |
],
|
5951 |
"max_steps": 500000,
|
pytorch_model.bin
CHANGED
@@ -1,3 +1,3 @@
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
-
oid sha256:
|
3 |
size 201355195
|
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:1a637602bf8a77f2410da25ab5d173ee31a6be30a49806bbec5d80d168cc5d7d
|
3 |
size 201355195
|