Training in progress, step 360000
Browse files- last-checkpoint/optimizer.pt +1 -1
- last-checkpoint/pytorch_model.bin +1 -1
- last-checkpoint/rng_state_0.pth +1 -1
- last-checkpoint/rng_state_1.pth +1 -1
- last-checkpoint/rng_state_2.pth +1 -1
- last-checkpoint/rng_state_3.pth +1 -1
- last-checkpoint/scaler.pt +1 -1
- last-checkpoint/scheduler.pt +1 -1
- last-checkpoint/trainer_state.json +62 -2
- pytorch_model.bin +1 -1
last-checkpoint/optimizer.pt
CHANGED
@@ -1,3 +1,3 @@
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
-
oid sha256:
|
3 |
size 402588883
|
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:f9bb647830011f1f0c4939bdfd58513ca8d1b6b369f0f905ba5d1b59dff0d694
|
3 |
size 402588883
|
last-checkpoint/pytorch_model.bin
CHANGED
@@ -1,3 +1,3 @@
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
-
oid sha256:
|
3 |
size 201355195
|
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:7697a15ef9673439312fb036c35de00fc97c3f08924c4d9a382535bc3e9dd969
|
3 |
size 201355195
|
last-checkpoint/rng_state_0.pth
CHANGED
@@ -1,3 +1,3 @@
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
-
oid sha256:
|
3 |
size 14503
|
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:aac6ab5786efe14d88a6dfd5c47656ff2718062b841d7abdc16c78094b2cc8d5
|
3 |
size 14503
|
last-checkpoint/rng_state_1.pth
CHANGED
@@ -1,3 +1,3 @@
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
-
oid sha256:
|
3 |
size 14503
|
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:5c94c7c084b365de19772f29fe1d2ffdd18e2bd3f744bc288dfa9981c05f383e
|
3 |
size 14503
|
last-checkpoint/rng_state_2.pth
CHANGED
@@ -1,3 +1,3 @@
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
-
oid sha256:
|
3 |
size 14503
|
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:867e91d51677fc0001645b808eefef44c8c1a86f4c9210d1d3576b946cebac35
|
3 |
size 14503
|
last-checkpoint/rng_state_3.pth
CHANGED
@@ -1,3 +1,3 @@
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
-
oid sha256:
|
3 |
size 14503
|
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:434def7e802ff1b8a0587a049f8cb9ff64f00a3dc17caa9f58fe716c7c9ce5bd
|
3 |
size 14503
|
last-checkpoint/scaler.pt
CHANGED
@@ -1,3 +1,3 @@
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
-
oid sha256:
|
3 |
size 559
|
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:e1e4bec7cc11f69c6b820a428063a4e9aab696285a5c1947c10e8d9e21010fca
|
3 |
size 559
|
last-checkpoint/scheduler.pt
CHANGED
@@ -1,3 +1,3 @@
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
-
oid sha256:
|
3 |
size 623
|
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:74433998881e570fe5797566e28e4a779b8ade1b81209fbf1fa056d46d071355
|
3 |
size 623
|
last-checkpoint/trainer_state.json
CHANGED
@@ -1,8 +1,8 @@
|
|
1 |
{
|
2 |
"best_metric": null,
|
3 |
"best_model_checkpoint": null,
|
4 |
-
"epoch": 6.
|
5 |
-
"global_step":
|
6 |
"is_hyper_param_search": false,
|
7 |
"is_local_process_zero": true,
|
8 |
"is_world_process_zero": true,
|
@@ -4266,6 +4266,66 @@
|
|
4266 |
"learning_rate": 3.7145176802452735e-05,
|
4267 |
"loss": 0.3223,
|
4268 |
"step": 355000
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
4269 |
}
|
4270 |
],
|
4271 |
"max_steps": 500000,
|
|
|
1 |
{
|
2 |
"best_metric": null,
|
3 |
"best_model_checkpoint": null,
|
4 |
+
"epoch": 6.127658488012868,
|
5 |
+
"global_step": 360000,
|
6 |
"is_hyper_param_search": false,
|
7 |
"is_local_process_zero": true,
|
8 |
"is_world_process_zero": true,
|
|
|
4266 |
"learning_rate": 3.7145176802452735e-05,
|
4267 |
"loss": 0.3223,
|
4268 |
"step": 355000
|
4269 |
+
},
|
4270 |
+
{
|
4271 |
+
"epoch": 6.05,
|
4272 |
+
"learning_rate": 3.697150590953425e-05,
|
4273 |
+
"loss": 0.322,
|
4274 |
+
"step": 355500
|
4275 |
+
},
|
4276 |
+
{
|
4277 |
+
"epoch": 6.06,
|
4278 |
+
"learning_rate": 3.679860575795753e-05,
|
4279 |
+
"loss": 0.3219,
|
4280 |
+
"step": 356000
|
4281 |
+
},
|
4282 |
+
{
|
4283 |
+
"epoch": 6.07,
|
4284 |
+
"learning_rate": 3.6625785068168896e-05,
|
4285 |
+
"loss": 0.3218,
|
4286 |
+
"step": 356500
|
4287 |
+
},
|
4288 |
+
{
|
4289 |
+
"epoch": 6.08,
|
4290 |
+
"learning_rate": 3.645339246437073e-05,
|
4291 |
+
"loss": 0.3218,
|
4292 |
+
"step": 357000
|
4293 |
+
},
|
4294 |
+
{
|
4295 |
+
"epoch": 6.09,
|
4296 |
+
"learning_rate": 3.6281429648008496e-05,
|
4297 |
+
"loss": 0.3216,
|
4298 |
+
"step": 357500
|
4299 |
+
},
|
4300 |
+
{
|
4301 |
+
"epoch": 6.09,
|
4302 |
+
"learning_rate": 3.610989831628571e-05,
|
4303 |
+
"loss": 0.322,
|
4304 |
+
"step": 358000
|
4305 |
+
},
|
4306 |
+
{
|
4307 |
+
"epoch": 6.1,
|
4308 |
+
"learning_rate": 3.593880016214741e-05,
|
4309 |
+
"loss": 0.3217,
|
4310 |
+
"step": 358500
|
4311 |
+
},
|
4312 |
+
{
|
4313 |
+
"epoch": 6.11,
|
4314 |
+
"learning_rate": 3.576813687426327e-05,
|
4315 |
+
"loss": 0.3218,
|
4316 |
+
"step": 359000
|
4317 |
+
},
|
4318 |
+
{
|
4319 |
+
"epoch": 6.12,
|
4320 |
+
"learning_rate": 3.5598250153690636e-05,
|
4321 |
+
"loss": 0.3217,
|
4322 |
+
"step": 359500
|
4323 |
+
},
|
4324 |
+
{
|
4325 |
+
"epoch": 6.13,
|
4326 |
+
"learning_rate": 3.5428460769004264e-05,
|
4327 |
+
"loss": 0.3216,
|
4328 |
+
"step": 360000
|
4329 |
}
|
4330 |
],
|
4331 |
"max_steps": 500000,
|
pytorch_model.bin
CHANGED
@@ -1,3 +1,3 @@
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
-
oid sha256:
|
3 |
size 201355195
|
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:7697a15ef9673439312fb036c35de00fc97c3f08924c4d9a382535bc3e9dd969
|
3 |
size 201355195
|