Training in progress, step 435000
Browse files- last-checkpoint/optimizer.pt +1 -1
- last-checkpoint/pytorch_model.bin +1 -1
- last-checkpoint/rng_state_0.pth +1 -1
- last-checkpoint/rng_state_1.pth +1 -1
- last-checkpoint/rng_state_2.pth +1 -1
- last-checkpoint/rng_state_3.pth +1 -1
- last-checkpoint/scaler.pt +1 -1
- last-checkpoint/scheduler.pt +1 -1
- last-checkpoint/trainer_state.json +62 -2
- pytorch_model.bin +1 -1
last-checkpoint/optimizer.pt
CHANGED
@@ -1,3 +1,3 @@
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
-
oid sha256:
|
3 |
size 402588883
|
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:615b67beaebd9fc11db06c2376748df967a3e0597412b5fbc3de1df84085bf30
|
3 |
size 402588883
|
last-checkpoint/pytorch_model.bin
CHANGED
@@ -1,3 +1,3 @@
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
-
oid sha256:
|
3 |
size 201355195
|
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:ef4ded1c570c6515d92814a413bd59666ae763eb46a809315444972ca8266201
|
3 |
size 201355195
|
last-checkpoint/rng_state_0.pth
CHANGED
@@ -1,3 +1,3 @@
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
-
oid sha256:
|
3 |
size 14503
|
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:c23b9ae5a4f78c49e5f3efe50a5ebccd6e5966ba4b0b8d5764d0403ecb74aac4
|
3 |
size 14503
|
last-checkpoint/rng_state_1.pth
CHANGED
@@ -1,3 +1,3 @@
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
-
oid sha256:
|
3 |
size 14503
|
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:dbaa614a2f99741bfcaecef4c4718ba56e8945228cbbf85c2b8009964d63bd85
|
3 |
size 14503
|
last-checkpoint/rng_state_2.pth
CHANGED
@@ -1,3 +1,3 @@
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
-
oid sha256:
|
3 |
size 14503
|
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:a37573b1c526ec4918e05947745d8dab3ff6f75434a218b31f02401e2c40760f
|
3 |
size 14503
|
last-checkpoint/rng_state_3.pth
CHANGED
@@ -1,3 +1,3 @@
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
-
oid sha256:
|
3 |
size 14503
|
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:e554c436f1253409d772e9d5282829d58b498573460fd583fd0185782170b927
|
3 |
size 14503
|
last-checkpoint/scaler.pt
CHANGED
@@ -1,3 +1,3 @@
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
-
oid sha256:
|
3 |
size 559
|
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:4d4cd1ca0fe0185565e4959b8efebca361d6e5c0d3a4ff3c3b51faf577175581
|
3 |
size 559
|
last-checkpoint/scheduler.pt
CHANGED
@@ -1,3 +1,3 @@
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
-
oid sha256:
|
3 |
size 623
|
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:142284cd18825de16ea2c5e9e3b90ce2a14b040ec54c9e7c11ab653d2cc9c8bb
|
3 |
size 623
|
last-checkpoint/trainer_state.json
CHANGED
@@ -1,8 +1,8 @@
|
|
1 |
{
|
2 |
"best_metric": null,
|
3 |
"best_model_checkpoint": null,
|
4 |
-
"epoch": 7.
|
5 |
-
"global_step":
|
6 |
"is_hyper_param_search": false,
|
7 |
"is_local_process_zero": true,
|
8 |
"is_world_process_zero": true,
|
@@ -5166,6 +5166,66 @@
|
|
5166 |
"learning_rate": 1.6693414864285253e-05,
|
5167 |
"loss": 0.3193,
|
5168 |
"step": 430000
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
5169 |
}
|
5170 |
],
|
5171 |
"max_steps": 500000,
|
|
|
1 |
{
|
2 |
"best_metric": null,
|
3 |
"best_model_checkpoint": null,
|
4 |
+
"epoch": 7.404251878707416,
|
5 |
+
"global_step": 435000,
|
6 |
"is_hyper_param_search": false,
|
7 |
"is_local_process_zero": true,
|
8 |
"is_world_process_zero": true,
|
|
|
5166 |
"learning_rate": 1.6693414864285253e-05,
|
5167 |
"loss": 0.3193,
|
5168 |
"step": 430000
|
5169 |
+
},
|
5170 |
+
{
|
5171 |
+
"epoch": 7.33,
|
5172 |
+
"learning_rate": 1.6599884928778023e-05,
|
5173 |
+
"loss": 0.3191,
|
5174 |
+
"step": 430500
|
5175 |
+
},
|
5176 |
+
{
|
5177 |
+
"epoch": 7.34,
|
5178 |
+
"learning_rate": 1.650698072681089e-05,
|
5179 |
+
"loss": 0.3191,
|
5180 |
+
"step": 431000
|
5181 |
+
},
|
5182 |
+
{
|
5183 |
+
"epoch": 7.34,
|
5184 |
+
"learning_rate": 1.6414703175310808e-05,
|
5185 |
+
"loss": 0.3194,
|
5186 |
+
"step": 431500
|
5187 |
+
},
|
5188 |
+
{
|
5189 |
+
"epoch": 7.35,
|
5190 |
+
"learning_rate": 1.632305318501997e-05,
|
5191 |
+
"loss": 0.3191,
|
5192 |
+
"step": 432000
|
5193 |
+
},
|
5194 |
+
{
|
5195 |
+
"epoch": 7.36,
|
5196 |
+
"learning_rate": 1.623221307572887e-05,
|
5197 |
+
"loss": 0.3194,
|
5198 |
+
"step": 432500
|
5199 |
+
},
|
5200 |
+
{
|
5201 |
+
"epoch": 7.37,
|
5202 |
+
"learning_rate": 1.6141819655678393e-05,
|
5203 |
+
"loss": 0.3191,
|
5204 |
+
"step": 433000
|
5205 |
+
},
|
5206 |
+
{
|
5207 |
+
"epoch": 7.38,
|
5208 |
+
"learning_rate": 1.6052056490087305e-05,
|
5209 |
+
"loss": 0.3192,
|
5210 |
+
"step": 433500
|
5211 |
+
},
|
5212 |
+
{
|
5213 |
+
"epoch": 7.39,
|
5214 |
+
"learning_rate": 1.5962924464881844e-05,
|
5215 |
+
"loss": 0.3192,
|
5216 |
+
"step": 434000
|
5217 |
+
},
|
5218 |
+
{
|
5219 |
+
"epoch": 7.4,
|
5220 |
+
"learning_rate": 1.5874424459759086e-05,
|
5221 |
+
"loss": 0.3194,
|
5222 |
+
"step": 434500
|
5223 |
+
},
|
5224 |
+
{
|
5225 |
+
"epoch": 7.4,
|
5226 |
+
"learning_rate": 1.578655734817838e-05,
|
5227 |
+
"loss": 0.3192,
|
5228 |
+
"step": 435000
|
5229 |
}
|
5230 |
],
|
5231 |
"max_steps": 500000,
|
pytorch_model.bin
CHANGED
@@ -1,3 +1,3 @@
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
-
oid sha256:
|
3 |
size 201355195
|
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:ef4ded1c570c6515d92814a413bd59666ae763eb46a809315444972ca8266201
|
3 |
size 201355195
|