Training in progress, step 295000
Browse files- last-checkpoint/optimizer.pt +1 -1
- last-checkpoint/pytorch_model.bin +1 -1
- last-checkpoint/rng_state_0.pth +1 -1
- last-checkpoint/rng_state_1.pth +1 -1
- last-checkpoint/rng_state_2.pth +1 -1
- last-checkpoint/rng_state_3.pth +1 -1
- last-checkpoint/scaler.pt +1 -1
- last-checkpoint/scheduler.pt +1 -1
- last-checkpoint/trainer_state.json +62 -2
- pytorch_model.bin +1 -1
last-checkpoint/optimizer.pt
CHANGED
@@ -1,3 +1,3 @@
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
-
oid sha256:
|
3 |
size 402588883
|
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:3750ebe1ae2da9f01607daef1cb133b33e971a37e9ce8e1d24eb674160fe4ca0
|
3 |
size 402588883
|
last-checkpoint/pytorch_model.bin
CHANGED
@@ -1,3 +1,3 @@
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
-
oid sha256:
|
3 |
size 201355195
|
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:2ac4829037cd81943e3821ef74a6801cc24dc1fa1d45cc60ec3587e504efaac1
|
3 |
size 201355195
|
last-checkpoint/rng_state_0.pth
CHANGED
@@ -1,3 +1,3 @@
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
-
oid sha256:
|
3 |
size 14503
|
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:3100e1f748b9c8003b83deeae985130a3ecb14b2cdba4bc463eb75df5150d5f3
|
3 |
size 14503
|
last-checkpoint/rng_state_1.pth
CHANGED
@@ -1,3 +1,3 @@
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
-
oid sha256:
|
3 |
size 14503
|
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:f63bff5bb77e4674cbdfea1305ba8b8680e73189bd93865f89ea62c792263704
|
3 |
size 14503
|
last-checkpoint/rng_state_2.pth
CHANGED
@@ -1,3 +1,3 @@
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
-
oid sha256:
|
3 |
size 14503
|
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:8b7134061263ea454a3d6008a2221f1728c1d3633dff4b7eeed6b2beff5d4c9f
|
3 |
size 14503
|
last-checkpoint/rng_state_3.pth
CHANGED
@@ -1,3 +1,3 @@
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
-
oid sha256:
|
3 |
size 14503
|
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:b8ed60e7003523f0c6427f971a8de5ddba4b001d4fd30e1ced45db1a7d82e7e5
|
3 |
size 14503
|
last-checkpoint/scaler.pt
CHANGED
@@ -1,3 +1,3 @@
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
-
oid sha256:
|
3 |
size 559
|
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:13e1aa031e346cfe6be393d14a7fbbf53792c8beb4d208dd0478a21af9fb3ce4
|
3 |
size 559
|
last-checkpoint/scheduler.pt
CHANGED
@@ -1,3 +1,3 @@
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
-
oid sha256:
|
3 |
size 623
|
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:13084d5229415d51bec58a80f9ece1ab058e32847d6e3120c6b36d493ed74470
|
3 |
size 623
|
last-checkpoint/trainer_state.json
CHANGED
@@ -1,8 +1,8 @@
|
|
1 |
{
|
2 |
"best_metric": null,
|
3 |
"best_model_checkpoint": null,
|
4 |
-
"epoch":
|
5 |
-
"global_step":
|
6 |
"is_hyper_param_search": false,
|
7 |
"is_local_process_zero": true,
|
8 |
"is_world_process_zero": true,
|
@@ -3486,6 +3486,66 @@
|
|
3486 |
"learning_rate": 6.264070302791827e-05,
|
3487 |
"loss": 0.3263,
|
3488 |
"step": 290000
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
3489 |
}
|
3490 |
],
|
3491 |
"max_steps": 500000,
|
|
|
1 |
{
|
2 |
"best_metric": null,
|
3 |
"best_model_checkpoint": null,
|
4 |
+
"epoch": 5.021276414668812,
|
5 |
+
"global_step": 295000,
|
6 |
"is_hyper_param_search": false,
|
7 |
"is_local_process_zero": true,
|
8 |
"is_world_process_zero": true,
|
|
|
3486 |
"learning_rate": 6.264070302791827e-05,
|
3487 |
"loss": 0.3263,
|
3488 |
"step": 290000
|
3489 |
+
},
|
3490 |
+
{
|
3491 |
+
"epoch": 4.94,
|
3492 |
+
"learning_rate": 6.242774701957516e-05,
|
3493 |
+
"loss": 0.3263,
|
3494 |
+
"step": 290500
|
3495 |
+
},
|
3496 |
+
{
|
3497 |
+
"epoch": 4.95,
|
3498 |
+
"learning_rate": 6.221496444227476e-05,
|
3499 |
+
"loss": 0.3261,
|
3500 |
+
"step": 291000
|
3501 |
+
},
|
3502 |
+
{
|
3503 |
+
"epoch": 4.96,
|
3504 |
+
"learning_rate": 6.200235739609522e-05,
|
3505 |
+
"loss": 0.3261,
|
3506 |
+
"step": 291500
|
3507 |
+
},
|
3508 |
+
{
|
3509 |
+
"epoch": 4.97,
|
3510 |
+
"learning_rate": 6.179035265954763e-05,
|
3511 |
+
"loss": 0.326,
|
3512 |
+
"step": 292000
|
3513 |
+
},
|
3514 |
+
{
|
3515 |
+
"epoch": 4.98,
|
3516 |
+
"learning_rate": 6.157810260735043e-05,
|
3517 |
+
"loss": 0.3261,
|
3518 |
+
"step": 292500
|
3519 |
+
},
|
3520 |
+
{
|
3521 |
+
"epoch": 4.99,
|
3522 |
+
"learning_rate": 6.136603437184328e-05,
|
3523 |
+
"loss": 0.3259,
|
3524 |
+
"step": 293000
|
3525 |
+
},
|
3526 |
+
{
|
3527 |
+
"epoch": 5.0,
|
3528 |
+
"learning_rate": 6.115457362977345e-05,
|
3529 |
+
"loss": 0.3258,
|
3530 |
+
"step": 293500
|
3531 |
+
},
|
3532 |
+
{
|
3533 |
+
"epoch": 5.0,
|
3534 |
+
"learning_rate": 6.0942874930827204e-05,
|
3535 |
+
"loss": 0.326,
|
3536 |
+
"step": 294000
|
3537 |
+
},
|
3538 |
+
{
|
3539 |
+
"epoch": 5.01,
|
3540 |
+
"learning_rate": 6.0731364318011724e-05,
|
3541 |
+
"loss": 0.3256,
|
3542 |
+
"step": 294500
|
3543 |
+
},
|
3544 |
+
{
|
3545 |
+
"epoch": 5.02,
|
3546 |
+
"learning_rate": 6.052004387885136e-05,
|
3547 |
+
"loss": 0.3258,
|
3548 |
+
"step": 295000
|
3549 |
}
|
3550 |
],
|
3551 |
"max_steps": 500000,
|
pytorch_model.bin
CHANGED
@@ -1,3 +1,3 @@
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
-
oid sha256:
|
3 |
size 201355195
|
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:2ac4829037cd81943e3821ef74a6801cc24dc1fa1d45cc60ec3587e504efaac1
|
3 |
size 201355195
|