Training in progress, step 315000
Browse files- last-checkpoint/optimizer.pt +1 -1
- last-checkpoint/pytorch_model.bin +1 -1
- last-checkpoint/rng_state_0.pth +1 -1
- last-checkpoint/rng_state_1.pth +1 -1
- last-checkpoint/rng_state_2.pth +1 -1
- last-checkpoint/rng_state_3.pth +1 -1
- last-checkpoint/scaler.pt +1 -1
- last-checkpoint/scheduler.pt +1 -1
- last-checkpoint/trainer_state.json +62 -2
- pytorch_model.bin +1 -1
last-checkpoint/optimizer.pt
CHANGED
@@ -1,3 +1,3 @@
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
-
oid sha256:
|
3 |
size 402588883
|
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:986fd930859453f0bbb0a52eeb57549a8b1316f1d1780247ecfbfe0f9a13119a
|
3 |
size 402588883
|
last-checkpoint/pytorch_model.bin
CHANGED
@@ -1,3 +1,3 @@
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
-
oid sha256:
|
3 |
size 201355195
|
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:6eaf07203b519520199a41cc0c67ab543d94d3933f85b837c948f43b77677714
|
3 |
size 201355195
|
last-checkpoint/rng_state_0.pth
CHANGED
@@ -1,3 +1,3 @@
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
-
oid sha256:
|
3 |
size 14503
|
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:24be0beac655ccefbd3a84f5d3c8a9b96e97783841928199a33ad24a6531cc6e
|
3 |
size 14503
|
last-checkpoint/rng_state_1.pth
CHANGED
@@ -1,3 +1,3 @@
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
-
oid sha256:
|
3 |
size 14503
|
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:d9256796205b256efab4335c109c6da5a97f9254e1c45b4078531f232bcc477d
|
3 |
size 14503
|
last-checkpoint/rng_state_2.pth
CHANGED
@@ -1,3 +1,3 @@
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
-
oid sha256:
|
3 |
size 14503
|
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:f4f5cd324a541d3750857dce2626abd84585cc9b30b3c32ed627ee4167a9d0ba
|
3 |
size 14503
|
last-checkpoint/rng_state_3.pth
CHANGED
@@ -1,3 +1,3 @@
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
-
oid sha256:
|
3 |
size 14503
|
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:0ab7c0f0bc621de2c5e939fcd33f672573ad2f2ec8475c89ffb269bf1acc13d9
|
3 |
size 14503
|
last-checkpoint/scaler.pt
CHANGED
@@ -1,3 +1,3 @@
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
-
oid sha256:
|
3 |
size 559
|
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:dfae2ab72f777b3d01d09d07dc27793ce81bf079fbb253f8b977c5c09b6be3aa
|
3 |
size 559
|
last-checkpoint/scheduler.pt
CHANGED
@@ -1,3 +1,3 @@
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
-
oid sha256:
|
3 |
size 623
|
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:b2d58cc217d81f79958e9cab10c1ba79a5a9ada49617d3342ddc9e114493bf73
|
3 |
size 623
|
last-checkpoint/trainer_state.json
CHANGED
@@ -1,8 +1,8 @@
|
|
1 |
{
|
2 |
"best_metric": null,
|
3 |
"best_model_checkpoint": null,
|
4 |
-
"epoch": 5.
|
5 |
-
"global_step":
|
6 |
"is_hyper_param_search": false,
|
7 |
"is_local_process_zero": true,
|
8 |
"is_world_process_zero": true,
|
@@ -3726,6 +3726,66 @@
|
|
3726 |
"learning_rate": 5.428158826048664e-05,
|
3727 |
"loss": 0.3249,
|
3728 |
"step": 310000
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
3729 |
}
|
3730 |
],
|
3731 |
"max_steps": 500000,
|
|
|
1 |
{
|
2 |
"best_metric": null,
|
3 |
"best_model_checkpoint": null,
|
4 |
+
"epoch": 5.3616990493697925,
|
5 |
+
"global_step": 315000,
|
6 |
"is_hyper_param_search": false,
|
7 |
"is_local_process_zero": true,
|
8 |
"is_world_process_zero": true,
|
|
|
3726 |
"learning_rate": 5.428158826048664e-05,
|
3727 |
"loss": 0.3249,
|
3728 |
"step": 310000
|
3729 |
+
},
|
3730 |
+
{
|
3731 |
+
"epoch": 5.29,
|
3732 |
+
"learning_rate": 5.407718448027968e-05,
|
3733 |
+
"loss": 0.3248,
|
3734 |
+
"step": 310500
|
3735 |
+
},
|
3736 |
+
{
|
3737 |
+
"epoch": 5.29,
|
3738 |
+
"learning_rate": 5.387303654779643e-05,
|
3739 |
+
"loss": 0.3245,
|
3740 |
+
"step": 311000
|
3741 |
+
},
|
3742 |
+
{
|
3743 |
+
"epoch": 5.3,
|
3744 |
+
"learning_rate": 5.3669146477894576e-05,
|
3745 |
+
"loss": 0.3245,
|
3746 |
+
"step": 311500
|
3747 |
+
},
|
3748 |
+
{
|
3749 |
+
"epoch": 5.31,
|
3750 |
+
"learning_rate": 5.3465516282886794e-05,
|
3751 |
+
"loss": 0.3245,
|
3752 |
+
"step": 312000
|
3753 |
+
},
|
3754 |
+
{
|
3755 |
+
"epoch": 5.32,
|
3756 |
+
"learning_rate": 5.32621479725209e-05,
|
3757 |
+
"loss": 0.3245,
|
3758 |
+
"step": 312500
|
3759 |
+
},
|
3760 |
+
{
|
3761 |
+
"epoch": 5.33,
|
3762 |
+
"learning_rate": 5.305904355396001e-05,
|
3763 |
+
"loss": 0.3244,
|
3764 |
+
"step": 313000
|
3765 |
+
},
|
3766 |
+
{
|
3767 |
+
"epoch": 5.34,
|
3768 |
+
"learning_rate": 5.285620503176271e-05,
|
3769 |
+
"loss": 0.3243,
|
3770 |
+
"step": 313500
|
3771 |
+
},
|
3772 |
+
{
|
3773 |
+
"epoch": 5.34,
|
3774 |
+
"learning_rate": 5.265363440786338e-05,
|
3775 |
+
"loss": 0.3244,
|
3776 |
+
"step": 314000
|
3777 |
+
},
|
3778 |
+
{
|
3779 |
+
"epoch": 5.35,
|
3780 |
+
"learning_rate": 5.2451333681552255e-05,
|
3781 |
+
"loss": 0.3241,
|
3782 |
+
"step": 314500
|
3783 |
+
},
|
3784 |
+
{
|
3785 |
+
"epoch": 5.36,
|
3786 |
+
"learning_rate": 5.2249304849455853e-05,
|
3787 |
+
"loss": 0.3243,
|
3788 |
+
"step": 315000
|
3789 |
}
|
3790 |
],
|
3791 |
"max_steps": 500000,
|
pytorch_model.bin
CHANGED
@@ -1,3 +1,3 @@
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
-
oid sha256:
|
3 |
size 201355195
|
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:6eaf07203b519520199a41cc0c67ab543d94d3933f85b837c948f43b77677714
|
3 |
size 201355195
|