Training in progress, step 330000
Browse files- last-checkpoint/optimizer.pt +1 -1
- last-checkpoint/pytorch_model.bin +1 -1
- last-checkpoint/rng_state_0.pth +1 -1
- last-checkpoint/rng_state_1.pth +1 -1
- last-checkpoint/rng_state_2.pth +1 -1
- last-checkpoint/rng_state_3.pth +1 -1
- last-checkpoint/scaler.pt +1 -1
- last-checkpoint/scheduler.pt +1 -1
- last-checkpoint/trainer_state.json +62 -2
- pytorch_model.bin +1 -1
last-checkpoint/optimizer.pt
CHANGED
@@ -1,3 +1,3 @@
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
-
oid sha256:
|
3 |
size 402588883
|
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:93c386d61a4b6ef81bbfcb895bf31c8912994a5d8d326aea019e1ed65d741296
|
3 |
size 402588883
|
last-checkpoint/pytorch_model.bin
CHANGED
@@ -1,3 +1,3 @@
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
-
oid sha256:
|
3 |
size 201355195
|
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:d237c084114006c5f33d8e180c7dd892faa0c663e6711cc98e83c92531e4a9ae
|
3 |
size 201355195
|
last-checkpoint/rng_state_0.pth
CHANGED
@@ -1,3 +1,3 @@
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
-
oid sha256:
|
3 |
size 14503
|
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:cefdfcff28f636aad12436d5ff497346817f4d094e600a7df6bfb170c76276d7
|
3 |
size 14503
|
last-checkpoint/rng_state_1.pth
CHANGED
@@ -1,3 +1,3 @@
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
-
oid sha256:
|
3 |
size 14503
|
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:20bb0c8a84a659fe6e934f689d267fbe623cbcfd8dbdfb676c4f75156049a619
|
3 |
size 14503
|
last-checkpoint/rng_state_2.pth
CHANGED
@@ -1,3 +1,3 @@
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
-
oid sha256:
|
3 |
size 14503
|
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:3fb63874d7d3d5ac08ffad64465ec2fbfa345ac503e455335d667a26323bb2cc
|
3 |
size 14503
|
last-checkpoint/rng_state_3.pth
CHANGED
@@ -1,3 +1,3 @@
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
-
oid sha256:
|
3 |
size 14503
|
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:9667eb53f8fae1f918d3e0d457d7c34063204af804980f96316823e35c6471c8
|
3 |
size 14503
|
last-checkpoint/scaler.pt
CHANGED
@@ -1,3 +1,3 @@
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
-
oid sha256:
|
3 |
size 559
|
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:06288cf6f41b2bddb76e843dba8b3dabddbcaf6a803daeb605d0c871fbf06be6
|
3 |
size 559
|
last-checkpoint/scheduler.pt
CHANGED
@@ -1,3 +1,3 @@
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
-
oid sha256:
|
3 |
size 623
|
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:9cadbb633c7a5781675b6107c9161ef4941fe852de8b07b05a702996516fb85b
|
3 |
size 623
|
last-checkpoint/trainer_state.json
CHANGED
@@ -1,8 +1,8 @@
|
|
1 |
{
|
2 |
"best_metric": null,
|
3 |
"best_model_checkpoint": null,
|
4 |
-
"epoch": 5.
|
5 |
-
"global_step":
|
6 |
"is_hyper_param_search": false,
|
7 |
"is_local_process_zero": true,
|
8 |
"is_world_process_zero": true,
|
@@ -3906,6 +3906,66 @@
|
|
3906 |
"learning_rate": 4.827044454306512e-05,
|
3907 |
"loss": 0.3237,
|
3908 |
"step": 325000
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
3909 |
}
|
3910 |
],
|
3911 |
"max_steps": 500000,
|
|
|
1 |
{
|
2 |
"best_metric": null,
|
3 |
"best_model_checkpoint": null,
|
4 |
+
"epoch": 5.617016025395529,
|
5 |
+
"global_step": 330000,
|
6 |
"is_hyper_param_search": false,
|
7 |
"is_local_process_zero": true,
|
8 |
"is_world_process_zero": true,
|
|
|
3906 |
"learning_rate": 4.827044454306512e-05,
|
3907 |
"loss": 0.3237,
|
3908 |
"step": 325000
|
3909 |
+
},
|
3910 |
+
{
|
3911 |
+
"epoch": 5.54,
|
3912 |
+
"learning_rate": 4.807457927133891e-05,
|
3913 |
+
"loss": 0.3234,
|
3914 |
+
"step": 325500
|
3915 |
+
},
|
3916 |
+
{
|
3917 |
+
"epoch": 5.55,
|
3918 |
+
"learning_rate": 4.7879419875243765e-05,
|
3919 |
+
"loss": 0.3236,
|
3920 |
+
"step": 326000
|
3921 |
+
},
|
3922 |
+
{
|
3923 |
+
"epoch": 5.56,
|
3924 |
+
"learning_rate": 4.768418607958038e-05,
|
3925 |
+
"loss": 0.3236,
|
3926 |
+
"step": 326500
|
3927 |
+
},
|
3928 |
+
{
|
3929 |
+
"epoch": 5.57,
|
3930 |
+
"learning_rate": 4.748927122795397e-05,
|
3931 |
+
"loss": 0.3234,
|
3932 |
+
"step": 327000
|
3933 |
+
},
|
3934 |
+
{
|
3935 |
+
"epoch": 5.57,
|
3936 |
+
"learning_rate": 4.729467724409539e-05,
|
3937 |
+
"loss": 0.3233,
|
3938 |
+
"step": 327500
|
3939 |
+
},
|
3940 |
+
{
|
3941 |
+
"epoch": 5.58,
|
3942 |
+
"learning_rate": 4.710040604856878e-05,
|
3943 |
+
"loss": 0.3236,
|
3944 |
+
"step": 328000
|
3945 |
+
},
|
3946 |
+
{
|
3947 |
+
"epoch": 5.59,
|
3948 |
+
"learning_rate": 4.690723469535981e-05,
|
3949 |
+
"loss": 0.3235,
|
3950 |
+
"step": 328500
|
3951 |
+
},
|
3952 |
+
{
|
3953 |
+
"epoch": 5.6,
|
3954 |
+
"learning_rate": 4.6713613515138837e-05,
|
3955 |
+
"loss": 0.3234,
|
3956 |
+
"step": 329000
|
3957 |
+
},
|
3958 |
+
{
|
3959 |
+
"epoch": 5.61,
|
3960 |
+
"learning_rate": 4.652032085811418e-05,
|
3961 |
+
"loss": 0.3234,
|
3962 |
+
"step": 329500
|
3963 |
+
},
|
3964 |
+
{
|
3965 |
+
"epoch": 5.62,
|
3966 |
+
"learning_rate": 4.63273586320064e-05,
|
3967 |
+
"loss": 0.3235,
|
3968 |
+
"step": 330000
|
3969 |
}
|
3970 |
],
|
3971 |
"max_steps": 500000,
|
pytorch_model.bin
CHANGED
@@ -1,3 +1,3 @@
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
-
oid sha256:
|
3 |
size 201355195
|
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:d237c084114006c5f33d8e180c7dd892faa0c663e6711cc98e83c92531e4a9ae
|
3 |
size 201355195
|