Training in progress, step 335000
Browse files- last-checkpoint/optimizer.pt +1 -1
- last-checkpoint/pytorch_model.bin +1 -1
- last-checkpoint/rng_state_0.pth +1 -1
- last-checkpoint/rng_state_1.pth +1 -1
- last-checkpoint/rng_state_2.pth +1 -1
- last-checkpoint/rng_state_3.pth +1 -1
- last-checkpoint/scaler.pt +1 -1
- last-checkpoint/scheduler.pt +1 -1
- last-checkpoint/trainer_state.json +62 -2
- pytorch_model.bin +1 -1
last-checkpoint/optimizer.pt
CHANGED
@@ -1,3 +1,3 @@
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
-
oid sha256:
|
3 |
size 402588883
|
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:939d248a527eb497165f1bf7060d53f43322c7933a1ba71de5100c7e404ce85a
|
3 |
size 402588883
|
last-checkpoint/pytorch_model.bin
CHANGED
@@ -1,3 +1,3 @@
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
-
oid sha256:
|
3 |
size 201355195
|
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:1d649d0c6ecfcc65c828c103d2a888b212e5dbe03b91f9ccd0b4dfb9c64351b2
|
3 |
size 201355195
|
last-checkpoint/rng_state_0.pth
CHANGED
@@ -1,3 +1,3 @@
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
-
oid sha256:
|
3 |
size 14503
|
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:56e84016321a01e0368546b323377a45ad0805d65d9112522d3c528db3adebf7
|
3 |
size 14503
|
last-checkpoint/rng_state_1.pth
CHANGED
@@ -1,3 +1,3 @@
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
-
oid sha256:
|
3 |
size 14503
|
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:0d9a4f19ed98bc52de3d9f825e4cc563b4cde6045cdd45620e227104c4272941
|
3 |
size 14503
|
last-checkpoint/rng_state_2.pth
CHANGED
@@ -1,3 +1,3 @@
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
-
oid sha256:
|
3 |
size 14503
|
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:aafab39ea8a348fd9f1b8a4af6531230422ffb8726dce19b15d2e52b47fcbe9a
|
3 |
size 14503
|
last-checkpoint/rng_state_3.pth
CHANGED
@@ -1,3 +1,3 @@
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
-
oid sha256:
|
3 |
size 14503
|
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:29833b46f079abd1ae1687af5eaf0fb3f0c523c72cb6eb7d6dee4f081703eacd
|
3 |
size 14503
|
last-checkpoint/scaler.pt
CHANGED
@@ -1,3 +1,3 @@
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
-
oid sha256:
|
3 |
size 559
|
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:dcee40751dfd949836000607c46de013a7c1680e8c9d348fd9f5d7a4177a5fff
|
3 |
size 559
|
last-checkpoint/scheduler.pt
CHANGED
@@ -1,3 +1,3 @@
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
-
oid sha256:
|
3 |
size 623
|
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:3c39e93187239f8eb7faa42ef30f45c99619a534ecb50ec81e9bc0704a6cd1e6
|
3 |
size 623
|
last-checkpoint/trainer_state.json
CHANGED
@@ -1,8 +1,8 @@
|
|
1 |
{
|
2 |
"best_metric": null,
|
3 |
"best_model_checkpoint": null,
|
4 |
-
"epoch": 5.
|
5 |
-
"global_step":
|
6 |
"is_hyper_param_search": false,
|
7 |
"is_local_process_zero": true,
|
8 |
"is_world_process_zero": true,
|
@@ -3966,6 +3966,66 @@
|
|
3966 |
"learning_rate": 4.63273586320064e-05,
|
3967 |
"loss": 0.3235,
|
3968 |
"step": 330000
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
3969 |
}
|
3970 |
],
|
3971 |
"max_steps": 500000,
|
|
|
1 |
{
|
2 |
"best_metric": null,
|
3 |
"best_model_checkpoint": null,
|
4 |
+
"epoch": 5.702121684070774,
|
5 |
+
"global_step": 335000,
|
6 |
"is_hyper_param_search": false,
|
7 |
"is_local_process_zero": true,
|
8 |
"is_world_process_zero": true,
|
|
|
3966 |
"learning_rate": 4.63273586320064e-05,
|
3967 |
"loss": 0.3235,
|
3968 |
"step": 330000
|
3969 |
+
},
|
3970 |
+
{
|
3971 |
+
"epoch": 5.63,
|
3972 |
+
"learning_rate": 4.613472874127472e-05,
|
3973 |
+
"loss": 0.3233,
|
3974 |
+
"step": 330500
|
3975 |
+
},
|
3976 |
+
{
|
3977 |
+
"epoch": 5.63,
|
3978 |
+
"learning_rate": 4.594243308709837e-05,
|
3979 |
+
"loss": 0.3231,
|
3980 |
+
"step": 331000
|
3981 |
+
},
|
3982 |
+
{
|
3983 |
+
"epoch": 5.64,
|
3984 |
+
"learning_rate": 4.575047356735788e-05,
|
3985 |
+
"loss": 0.3231,
|
3986 |
+
"step": 331500
|
3987 |
+
},
|
3988 |
+
{
|
3989 |
+
"epoch": 5.65,
|
3990 |
+
"learning_rate": 4.5558852076616174e-05,
|
3991 |
+
"loss": 0.3234,
|
3992 |
+
"step": 332000
|
3993 |
+
},
|
3994 |
+
{
|
3995 |
+
"epoch": 5.66,
|
3996 |
+
"learning_rate": 4.536757050610003e-05,
|
3997 |
+
"loss": 0.3231,
|
3998 |
+
"step": 332500
|
3999 |
+
},
|
4000 |
+
{
|
4001 |
+
"epoch": 5.67,
|
4002 |
+
"learning_rate": 4.5176630743681286e-05,
|
4003 |
+
"loss": 0.323,
|
4004 |
+
"step": 333000
|
4005 |
+
},
|
4006 |
+
{
|
4007 |
+
"epoch": 5.68,
|
4008 |
+
"learning_rate": 4.498603467385838e-05,
|
4009 |
+
"loss": 0.3229,
|
4010 |
+
"step": 333500
|
4011 |
+
},
|
4012 |
+
{
|
4013 |
+
"epoch": 5.69,
|
4014 |
+
"learning_rate": 4.4795784177737496e-05,
|
4015 |
+
"loss": 0.323,
|
4016 |
+
"step": 334000
|
4017 |
+
},
|
4018 |
+
{
|
4019 |
+
"epoch": 5.69,
|
4020 |
+
"learning_rate": 4.460588113301429e-05,
|
4021 |
+
"loss": 0.3232,
|
4022 |
+
"step": 334500
|
4023 |
+
},
|
4024 |
+
{
|
4025 |
+
"epoch": 5.7,
|
4026 |
+
"learning_rate": 4.4416327413955116e-05,
|
4027 |
+
"loss": 0.3231,
|
4028 |
+
"step": 335000
|
4029 |
}
|
4030 |
],
|
4031 |
"max_steps": 500000,
|
pytorch_model.bin
CHANGED
@@ -1,3 +1,3 @@
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
-
oid sha256:
|
3 |
size 201355195
|
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:1d649d0c6ecfcc65c828c103d2a888b212e5dbe03b91f9ccd0b4dfb9c64351b2
|
3 |
size 201355195
|