Training in progress, step 300000
Browse files- last-checkpoint/optimizer.pt +1 -1
- last-checkpoint/pytorch_model.bin +1 -1
- last-checkpoint/rng_state_0.pth +1 -1
- last-checkpoint/rng_state_1.pth +1 -1
- last-checkpoint/rng_state_2.pth +1 -1
- last-checkpoint/rng_state_3.pth +1 -1
- last-checkpoint/scaler.pt +1 -1
- last-checkpoint/scheduler.pt +1 -1
- last-checkpoint/trainer_state.json +62 -2
- pytorch_model.bin +1 -1
last-checkpoint/optimizer.pt
CHANGED
@@ -1,3 +1,3 @@
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
-
oid sha256:
|
3 |
size 402588883
|
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:8e0fdaa95ab3769d07df2ec51220f48b4f0841e4213a25b8b108ae9d6226afa4
|
3 |
size 402588883
|
last-checkpoint/pytorch_model.bin
CHANGED
@@ -1,3 +1,3 @@
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
-
oid sha256:
|
3 |
size 201355195
|
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:443e5aa7e996d90b484593947423fcad9e8622bdc24cf5219c910bf081fc9e4d
|
3 |
size 201355195
|
last-checkpoint/rng_state_0.pth
CHANGED
@@ -1,3 +1,3 @@
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
-
oid sha256:
|
3 |
size 14503
|
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:00949b307e65df8e70eaa37baf0faf7e700ae38e102f65265d748363d1e1e7c4
|
3 |
size 14503
|
last-checkpoint/rng_state_1.pth
CHANGED
@@ -1,3 +1,3 @@
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
-
oid sha256:
|
3 |
size 14503
|
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:c9d6ac70ad3c1fe3d8c8884698b46b9244004811940695c46fa1538a5a5b31d7
|
3 |
size 14503
|
last-checkpoint/rng_state_2.pth
CHANGED
@@ -1,3 +1,3 @@
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
-
oid sha256:
|
3 |
size 14503
|
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:8a5e462f009c7a4ee06c1f9147b1621f035a4b94b586dd31adf28879a9b99385
|
3 |
size 14503
|
last-checkpoint/rng_state_3.pth
CHANGED
@@ -1,3 +1,3 @@
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
-
oid sha256:
|
3 |
size 14503
|
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:458d1568c66c2b6e998ac534b4eaa23acfd7b3a602f1f5dabbd94a34b322a8fb
|
3 |
size 14503
|
last-checkpoint/scaler.pt
CHANGED
@@ -1,3 +1,3 @@
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
-
oid sha256:
|
3 |
size 559
|
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:36dbb5a5f2343166fdcd4b09f2c157c64ef7e0b97c386ee5912cc0d77f2607ba
|
3 |
size 559
|
last-checkpoint/scheduler.pt
CHANGED
@@ -1,3 +1,3 @@
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
-
oid sha256:
|
3 |
size 623
|
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:671bcba81e7f4b7ad88fc97a70ccffd73844d7a00c4684cf55f5df6cd399eaa3
|
3 |
size 623
|
last-checkpoint/trainer_state.json
CHANGED
@@ -1,8 +1,8 @@
|
|
1 |
{
|
2 |
"best_metric": null,
|
3 |
"best_model_checkpoint": null,
|
4 |
-
"epoch": 5.
|
5 |
-
"global_step":
|
6 |
"is_hyper_param_search": false,
|
7 |
"is_local_process_zero": true,
|
8 |
"is_world_process_zero": true,
|
@@ -3546,6 +3546,66 @@
|
|
3546 |
"learning_rate": 6.052004387885136e-05,
|
3547 |
"loss": 0.3258,
|
3548 |
"step": 295000
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
3549 |
}
|
3550 |
],
|
3551 |
"max_steps": 500000,
|
|
|
1 |
{
|
2 |
"best_metric": null,
|
3 |
"best_model_checkpoint": null,
|
4 |
+
"epoch": 5.106382073344057,
|
5 |
+
"global_step": 300000,
|
6 |
"is_hyper_param_search": false,
|
7 |
"is_local_process_zero": true,
|
8 |
"is_world_process_zero": true,
|
|
|
3546 |
"learning_rate": 6.052004387885136e-05,
|
3547 |
"loss": 0.3258,
|
3548 |
"step": 295000
|
3549 |
+
},
|
3550 |
+
{
|
3551 |
+
"epoch": 5.03,
|
3552 |
+
"learning_rate": 6.030891569899353e-05,
|
3553 |
+
"loss": 0.3259,
|
3554 |
+
"step": 295500
|
3555 |
+
},
|
3556 |
+
{
|
3557 |
+
"epoch": 5.04,
|
3558 |
+
"learning_rate": 6.009798186218815e-05,
|
3559 |
+
"loss": 0.3256,
|
3560 |
+
"step": 296000
|
3561 |
+
},
|
3562 |
+
{
|
3563 |
+
"epoch": 5.05,
|
3564 |
+
"learning_rate": 5.988724445026701e-05,
|
3565 |
+
"loss": 0.3255,
|
3566 |
+
"step": 296500
|
3567 |
+
},
|
3568 |
+
{
|
3569 |
+
"epoch": 5.06,
|
3570 |
+
"learning_rate": 5.96767055431233e-05,
|
3571 |
+
"loss": 0.3255,
|
3572 |
+
"step": 297000
|
3573 |
+
},
|
3574 |
+
{
|
3575 |
+
"epoch": 5.06,
|
3576 |
+
"learning_rate": 5.946636721869104e-05,
|
3577 |
+
"loss": 0.3255,
|
3578 |
+
"step": 297500
|
3579 |
+
},
|
3580 |
+
{
|
3581 |
+
"epoch": 5.07,
|
3582 |
+
"learning_rate": 5.925665162062394e-05,
|
3583 |
+
"loss": 0.3254,
|
3584 |
+
"step": 298000
|
3585 |
+
},
|
3586 |
+
{
|
3587 |
+
"epoch": 5.08,
|
3588 |
+
"learning_rate": 5.904672027594373e-05,
|
3589 |
+
"loss": 0.3254,
|
3590 |
+
"step": 298500
|
3591 |
+
},
|
3592 |
+
{
|
3593 |
+
"epoch": 5.09,
|
3594 |
+
"learning_rate": 5.8836995731675224e-05,
|
3595 |
+
"loss": 0.3254,
|
3596 |
+
"step": 299000
|
3597 |
+
},
|
3598 |
+
{
|
3599 |
+
"epoch": 5.1,
|
3600 |
+
"learning_rate": 5.862748005771498e-05,
|
3601 |
+
"loss": 0.3254,
|
3602 |
+
"step": 299500
|
3603 |
+
},
|
3604 |
+
{
|
3605 |
+
"epoch": 5.11,
|
3606 |
+
"learning_rate": 5.841817532189814e-05,
|
3607 |
+
"loss": 0.3252,
|
3608 |
+
"step": 300000
|
3609 |
}
|
3610 |
],
|
3611 |
"max_steps": 500000,
|
pytorch_model.bin
CHANGED
@@ -1,3 +1,3 @@
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
-
oid sha256:
|
3 |
size 201355195
|
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:443e5aa7e996d90b484593947423fcad9e8622bdc24cf5219c910bf081fc9e4d
|
3 |
size 201355195
|