Training in progress, step 310000
Browse files- last-checkpoint/optimizer.pt +1 -1
- last-checkpoint/pytorch_model.bin +1 -1
- last-checkpoint/rng_state_0.pth +1 -1
- last-checkpoint/rng_state_1.pth +1 -1
- last-checkpoint/rng_state_2.pth +1 -1
- last-checkpoint/rng_state_3.pth +1 -1
- last-checkpoint/scaler.pt +1 -1
- last-checkpoint/scheduler.pt +1 -1
- last-checkpoint/trainer_state.json +62 -2
- pytorch_model.bin +1 -1
last-checkpoint/optimizer.pt
CHANGED
@@ -1,3 +1,3 @@
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
-
oid sha256:
|
3 |
size 402588883
|
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:11ba1af3ca07f05b59a0f9d045b8502a890e39d6d713b1a68a79774487c538c4
|
3 |
size 402588883
|
last-checkpoint/pytorch_model.bin
CHANGED
@@ -1,3 +1,3 @@
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
-
oid sha256:
|
3 |
size 201355195
|
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:33044061cdae69282e0841e3fa8fb5cc7bb7ba2c335ee94c43ec527fde5de60a
|
3 |
size 201355195
|
last-checkpoint/rng_state_0.pth
CHANGED
@@ -1,3 +1,3 @@
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
-
oid sha256:
|
3 |
size 14503
|
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:d9e955485a8b6c2204c1cd05e7146bc3d8d4aea199220bdeb0763bf9b3fe4990
|
3 |
size 14503
|
last-checkpoint/rng_state_1.pth
CHANGED
@@ -1,3 +1,3 @@
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
-
oid sha256:
|
3 |
size 14503
|
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:a40eb8bd77540d5f51aa2626d7dc28a426fa540c25aaff397ab33d9c2cfc9ca6
|
3 |
size 14503
|
last-checkpoint/rng_state_2.pth
CHANGED
@@ -1,3 +1,3 @@
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
-
oid sha256:
|
3 |
size 14503
|
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:aedd0ee2fba46776b8e9a3d54ff8ed40a6879de49804fa15d3dcd9b89d1ec8a0
|
3 |
size 14503
|
last-checkpoint/rng_state_3.pth
CHANGED
@@ -1,3 +1,3 @@
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
-
oid sha256:
|
3 |
size 14503
|
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:bbedddb259fec8d39a25799576be00b42005bed9171320d734f89be1d9f42f86
|
3 |
size 14503
|
last-checkpoint/scaler.pt
CHANGED
@@ -1,3 +1,3 @@
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
-
oid sha256:
|
3 |
size 559
|
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:513b23ca2b61699867f7e4f0e83f6e0bf3f9836045ea9437165be2c511401531
|
3 |
size 559
|
last-checkpoint/scheduler.pt
CHANGED
@@ -1,3 +1,3 @@
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
-
oid sha256:
|
3 |
size 623
|
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:1db0c9d6f53ee9862fd806add4b35c4edb1edf9c5abd3a62a1824f925a04677b
|
3 |
size 623
|
last-checkpoint/trainer_state.json
CHANGED
@@ -1,8 +1,8 @@
|
|
1 |
{
|
2 |
"best_metric": null,
|
3 |
"best_model_checkpoint": null,
|
4 |
-
"epoch": 5.
|
5 |
-
"global_step":
|
6 |
"is_hyper_param_search": false,
|
7 |
"is_local_process_zero": true,
|
8 |
"is_world_process_zero": true,
|
@@ -3666,6 +3666,66 @@
|
|
3666 |
"learning_rate": 5.633842476876674e-05,
|
3667 |
"loss": 0.3248,
|
3668 |
"step": 305000
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
3669 |
}
|
3670 |
],
|
3671 |
"max_steps": 500000,
|
|
|
1 |
{
|
2 |
"best_metric": null,
|
3 |
"best_model_checkpoint": null,
|
4 |
+
"epoch": 5.276593390694547,
|
5 |
+
"global_step": 310000,
|
6 |
"is_hyper_param_search": false,
|
7 |
"is_local_process_zero": true,
|
8 |
"is_world_process_zero": true,
|
|
|
3666 |
"learning_rate": 5.633842476876674e-05,
|
3667 |
"loss": 0.3248,
|
3668 |
"step": 305000
|
3669 |
+
},
|
3670 |
+
{
|
3671 |
+
"epoch": 5.2,
|
3672 |
+
"learning_rate": 5.6131574812333625e-05,
|
3673 |
+
"loss": 0.3251,
|
3674 |
+
"step": 305500
|
3675 |
+
},
|
3676 |
+
{
|
3677 |
+
"epoch": 5.21,
|
3678 |
+
"learning_rate": 5.592496042762104e-05,
|
3679 |
+
"loss": 0.3251,
|
3680 |
+
"step": 306000
|
3681 |
+
},
|
3682 |
+
{
|
3683 |
+
"epoch": 5.22,
|
3684 |
+
"learning_rate": 5.571858365382955e-05,
|
3685 |
+
"loss": 0.3251,
|
3686 |
+
"step": 306500
|
3687 |
+
},
|
3688 |
+
{
|
3689 |
+
"epoch": 5.23,
|
3690 |
+
"learning_rate": 5.5512446527814595e-05,
|
3691 |
+
"loss": 0.325,
|
3692 |
+
"step": 307000
|
3693 |
+
},
|
3694 |
+
{
|
3695 |
+
"epoch": 5.23,
|
3696 |
+
"learning_rate": 5.530655108406638e-05,
|
3697 |
+
"loss": 0.3248,
|
3698 |
+
"step": 307500
|
3699 |
+
},
|
3700 |
+
{
|
3701 |
+
"epoch": 5.24,
|
3702 |
+
"learning_rate": 5.5100899354689826e-05,
|
3703 |
+
"loss": 0.3249,
|
3704 |
+
"step": 308000
|
3705 |
+
},
|
3706 |
+
{
|
3707 |
+
"epoch": 5.25,
|
3708 |
+
"learning_rate": 5.4895903934754626e-05,
|
3709 |
+
"loss": 0.3249,
|
3710 |
+
"step": 308500
|
3711 |
+
},
|
3712 |
+
{
|
3713 |
+
"epoch": 5.26,
|
3714 |
+
"learning_rate": 5.469074522323032e-05,
|
3715 |
+
"loss": 0.3249,
|
3716 |
+
"step": 309000
|
3717 |
+
},
|
3718 |
+
{
|
3719 |
+
"epoch": 5.27,
|
3720 |
+
"learning_rate": 5.448624587103454e-05,
|
3721 |
+
"loss": 0.325,
|
3722 |
+
"step": 309500
|
3723 |
+
},
|
3724 |
+
{
|
3725 |
+
"epoch": 5.28,
|
3726 |
+
"learning_rate": 5.428158826048664e-05,
|
3727 |
+
"loss": 0.3249,
|
3728 |
+
"step": 310000
|
3729 |
}
|
3730 |
],
|
3731 |
"max_steps": 500000,
|
pytorch_model.bin
CHANGED
@@ -1,3 +1,3 @@
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
-
oid sha256:
|
3 |
size 201355195
|
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:33044061cdae69282e0841e3fa8fb5cc7bb7ba2c335ee94c43ec527fde5de60a
|
3 |
size 201355195
|