Training in progress, step 370000
Browse files- last-checkpoint/optimizer.pt +1 -1
- last-checkpoint/pytorch_model.bin +1 -1
- last-checkpoint/rng_state_0.pth +1 -1
- last-checkpoint/rng_state_1.pth +1 -1
- last-checkpoint/rng_state_2.pth +1 -1
- last-checkpoint/rng_state_3.pth +1 -1
- last-checkpoint/scaler.pt +1 -1
- last-checkpoint/scheduler.pt +1 -1
- last-checkpoint/trainer_state.json +62 -2
- pytorch_model.bin +1 -1
last-checkpoint/optimizer.pt
CHANGED
@@ -1,3 +1,3 @@
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
-
oid sha256:
|
3 |
size 402588883
|
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:c3d07d7a8f0aa3b3ddeda127b02dec391d7890a63ff5df5342ede01bb8f32c9f
|
3 |
size 402588883
|
last-checkpoint/pytorch_model.bin
CHANGED
@@ -1,3 +1,3 @@
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
-
oid sha256:
|
3 |
size 201355195
|
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:b92ea3bc6c893e4c7575c9088a3c4ed5b91dbfc3ebb18cdc9b74dbb39cdc78bc
|
3 |
size 201355195
|
last-checkpoint/rng_state_0.pth
CHANGED
@@ -1,3 +1,3 @@
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
-
oid sha256:
|
3 |
size 14503
|
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:398196715a9effbed0e7685942b87f397219fd2651bcc6534ef5961d088d105b
|
3 |
size 14503
|
last-checkpoint/rng_state_1.pth
CHANGED
@@ -1,3 +1,3 @@
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
-
oid sha256:
|
3 |
size 14503
|
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:7f6809c63049ae6865caa8fb9be2583b6b41d8333f9ac7d098842b30725d0392
|
3 |
size 14503
|
last-checkpoint/rng_state_2.pth
CHANGED
@@ -1,3 +1,3 @@
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
-
oid sha256:
|
3 |
size 14503
|
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:f8b1377295a61eb6ca9b47c70e5ac1e3d9bcc276b8d909f3eeeca65bfeaffbc8
|
3 |
size 14503
|
last-checkpoint/rng_state_3.pth
CHANGED
@@ -1,3 +1,3 @@
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
-
oid sha256:
|
3 |
size 14503
|
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:b3a2e0da128344a3ef2758897faf6fbc195c0f5daee70e87863ec5cd1022e1ce
|
3 |
size 14503
|
last-checkpoint/scaler.pt
CHANGED
@@ -1,3 +1,3 @@
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
-
oid sha256:
|
3 |
size 559
|
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:bc8031035ba0d95a5e0e17b97165fff2dcdd5abaaf6fa2ece0b778a65a4ed011
|
3 |
size 559
|
last-checkpoint/scheduler.pt
CHANGED
@@ -1,3 +1,3 @@
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
-
oid sha256:
|
3 |
size 623
|
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:54d3e471d80a359e1d9ff95554d35f657f28d0de979f0697e31bbee04063233e
|
3 |
size 623
|
last-checkpoint/trainer_state.json
CHANGED
@@ -1,8 +1,8 @@
|
|
1 |
{
|
2 |
"best_metric": null,
|
3 |
"best_model_checkpoint": null,
|
4 |
-
"epoch": 6.
|
5 |
-
"global_step":
|
6 |
"is_hyper_param_search": false,
|
7 |
"is_local_process_zero": true,
|
8 |
"is_world_process_zero": true,
|
@@ -4386,6 +4386,66 @@
|
|
4386 |
"learning_rate": 3.37553661059407e-05,
|
4387 |
"loss": 0.3218,
|
4388 |
"step": 365000
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
4389 |
}
|
4390 |
],
|
4391 |
"max_steps": 500000,
|
|
|
1 |
{
|
2 |
"best_metric": null,
|
3 |
"best_model_checkpoint": null,
|
4 |
+
"epoch": 6.297869805363359,
|
5 |
+
"global_step": 370000,
|
6 |
"is_hyper_param_search": false,
|
7 |
"is_local_process_zero": true,
|
8 |
"is_world_process_zero": true,
|
|
|
4386 |
"learning_rate": 3.37553661059407e-05,
|
4387 |
"loss": 0.3218,
|
4388 |
"step": 365000
|
4389 |
+
},
|
4390 |
+
{
|
4391 |
+
"epoch": 6.22,
|
4392 |
+
"learning_rate": 3.359050594396775e-05,
|
4393 |
+
"loss": 0.3216,
|
4394 |
+
"step": 365500
|
4395 |
+
},
|
4396 |
+
{
|
4397 |
+
"epoch": 6.23,
|
4398 |
+
"learning_rate": 3.342610382496483e-05,
|
4399 |
+
"loss": 0.3217,
|
4400 |
+
"step": 366000
|
4401 |
+
},
|
4402 |
+
{
|
4403 |
+
"epoch": 6.24,
|
4404 |
+
"learning_rate": 3.326216137151454e-05,
|
4405 |
+
"loss": 0.3217,
|
4406 |
+
"step": 366500
|
4407 |
+
},
|
4408 |
+
{
|
4409 |
+
"epoch": 6.25,
|
4410 |
+
"learning_rate": 3.309868020166266e-05,
|
4411 |
+
"loss": 0.3215,
|
4412 |
+
"step": 367000
|
4413 |
+
},
|
4414 |
+
{
|
4415 |
+
"epoch": 6.26,
|
4416 |
+
"learning_rate": 3.293566192890239e-05,
|
4417 |
+
"loss": 0.3215,
|
4418 |
+
"step": 367500
|
4419 |
+
},
|
4420 |
+
{
|
4421 |
+
"epoch": 6.26,
|
4422 |
+
"learning_rate": 3.277310816215822e-05,
|
4423 |
+
"loss": 0.3215,
|
4424 |
+
"step": 368000
|
4425 |
+
},
|
4426 |
+
{
|
4427 |
+
"epoch": 6.27,
|
4428 |
+
"learning_rate": 3.261134421484081e-05,
|
4429 |
+
"loss": 0.3215,
|
4430 |
+
"step": 368500
|
4431 |
+
},
|
4432 |
+
{
|
4433 |
+
"epoch": 6.28,
|
4434 |
+
"learning_rate": 3.244972333153504e-05,
|
4435 |
+
"loss": 0.3213,
|
4436 |
+
"step": 369000
|
4437 |
+
},
|
4438 |
+
{
|
4439 |
+
"epoch": 6.29,
|
4440 |
+
"learning_rate": 3.228857175026319e-05,
|
4441 |
+
"loss": 0.3213,
|
4442 |
+
"step": 369500
|
4443 |
+
},
|
4444 |
+
{
|
4445 |
+
"epoch": 6.3,
|
4446 |
+
"learning_rate": 3.212789106152626e-05,
|
4447 |
+
"loss": 0.3215,
|
4448 |
+
"step": 370000
|
4449 |
}
|
4450 |
],
|
4451 |
"max_steps": 500000,
|
pytorch_model.bin
CHANGED
@@ -1,3 +1,3 @@
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
-
oid sha256:
|
3 |
size 201355195
|
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:b92ea3bc6c893e4c7575c9088a3c4ed5b91dbfc3ebb18cdc9b74dbb39cdc78bc
|
3 |
size 201355195
|