Training in progress, step 190000
Browse files- last-checkpoint/optimizer.pt +1 -1
- last-checkpoint/pytorch_model.bin +1 -1
- last-checkpoint/rng_state_0.pth +1 -1
- last-checkpoint/rng_state_1.pth +1 -1
- last-checkpoint/rng_state_2.pth +1 -1
- last-checkpoint/rng_state_3.pth +1 -1
- last-checkpoint/scaler.pt +1 -1
- last-checkpoint/scheduler.pt +1 -1
- last-checkpoint/trainer_state.json +62 -2
- pytorch_model.bin +1 -1
last-checkpoint/optimizer.pt
CHANGED
@@ -1,3 +1,3 @@
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
-
oid sha256:
|
3 |
size 402588883
|
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:baaab21925e93460d24e02cac40a21109c4d19dc3a9c2c641547d7a31b0e2e34
|
3 |
size 402588883
|
last-checkpoint/pytorch_model.bin
CHANGED
@@ -1,3 +1,3 @@
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
-
oid sha256:
|
3 |
size 201355195
|
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:b2f77d6be3df78a76dc41f7ace05e7c297fca237b33e8a1a52bc11437ea6ea34
|
3 |
size 201355195
|
last-checkpoint/rng_state_0.pth
CHANGED
@@ -1,3 +1,3 @@
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
-
oid sha256:
|
3 |
size 14503
|
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:ca591252c1ef13b4a89d0485dc650e04b8acd26b162d76f5d6e7715cb8ec1d9a
|
3 |
size 14503
|
last-checkpoint/rng_state_1.pth
CHANGED
@@ -1,3 +1,3 @@
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
-
oid sha256:
|
3 |
size 14503
|
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:14d7cea85b5746ba7ba206fef3febbbcb3fada188d2b42064507d0774d655bdf
|
3 |
size 14503
|
last-checkpoint/rng_state_2.pth
CHANGED
@@ -1,3 +1,3 @@
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
-
oid sha256:
|
3 |
size 14503
|
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:3e728e7b4508f9b187d0b866da280fc06ff39068ea40e4f441c4315272f00f50
|
3 |
size 14503
|
last-checkpoint/rng_state_3.pth
CHANGED
@@ -1,3 +1,3 @@
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
-
oid sha256:
|
3 |
size 14503
|
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:e960b9fbdf27b87b2539486d0e91a8e405b6cc30b052f608dcadd5f555f9606b
|
3 |
size 14503
|
last-checkpoint/scaler.pt
CHANGED
@@ -1,3 +1,3 @@
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
-
oid sha256:
|
3 |
size 559
|
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:434679b868ab4977ed91cda292df1daebee137047ea8180e875efa6af98860b3
|
3 |
size 559
|
last-checkpoint/scheduler.pt
CHANGED
@@ -1,3 +1,3 @@
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
-
oid sha256:
|
3 |
size 623
|
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:692456d5c45157319246b6580538cab100d84ec4a09da8043159c331a645e1a5
|
3 |
size 623
|
last-checkpoint/trainer_state.json
CHANGED
@@ -1,8 +1,8 @@
|
|
1 |
{
|
2 |
"best_metric": null,
|
3 |
"best_model_checkpoint": null,
|
4 |
-
"epoch": 3.
|
5 |
-
"global_step":
|
6 |
"is_hyper_param_search": false,
|
7 |
"is_local_process_zero": true,
|
8 |
"is_world_process_zero": true,
|
@@ -2226,6 +2226,66 @@
|
|
2226 |
"learning_rate": 0.0001078286051242751,
|
2227 |
"loss": 0.3361,
|
2228 |
"step": 185000
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
2229 |
}
|
2230 |
],
|
2231 |
"max_steps": 500000,
|
|
|
1 |
{
|
2 |
"best_metric": null,
|
3 |
"best_model_checkpoint": null,
|
4 |
+
"epoch": 3.2340425531914896,
|
5 |
+
"global_step": 190000,
|
6 |
"is_hyper_param_search": false,
|
7 |
"is_local_process_zero": true,
|
8 |
"is_world_process_zero": true,
|
|
|
2226 |
"learning_rate": 0.0001078286051242751,
|
2227 |
"loss": 0.3361,
|
2228 |
"step": 185000
|
2229 |
+
},
|
2230 |
+
{
|
2231 |
+
"epoch": 3.16,
|
2232 |
+
"learning_rate": 0.00010762668177686892,
|
2233 |
+
"loss": 0.3355,
|
2234 |
+
"step": 185500
|
2235 |
+
},
|
2236 |
+
{
|
2237 |
+
"epoch": 3.17,
|
2238 |
+
"learning_rate": 0.0001074244857652669,
|
2239 |
+
"loss": 0.3352,
|
2240 |
+
"step": 186000
|
2241 |
+
},
|
2242 |
+
{
|
2243 |
+
"epoch": 3.17,
|
2244 |
+
"learning_rate": 0.00010722201908506211,
|
2245 |
+
"loss": 0.3364,
|
2246 |
+
"step": 186500
|
2247 |
+
},
|
2248 |
+
{
|
2249 |
+
"epoch": 3.18,
|
2250 |
+
"learning_rate": 0.00010701968947202353,
|
2251 |
+
"loss": 0.337,
|
2252 |
+
"step": 187000
|
2253 |
+
},
|
2254 |
+
{
|
2255 |
+
"epoch": 3.19,
|
2256 |
+
"learning_rate": 0.00010681668798339819,
|
2257 |
+
"loss": 0.3363,
|
2258 |
+
"step": 187500
|
2259 |
+
},
|
2260 |
+
{
|
2261 |
+
"epoch": 3.2,
|
2262 |
+
"learning_rate": 0.00010661342182488881,
|
2263 |
+
"loss": 0.3359,
|
2264 |
+
"step": 188000
|
2265 |
+
},
|
2266 |
+
{
|
2267 |
+
"epoch": 3.21,
|
2268 |
+
"learning_rate": 0.00010640989300265028,
|
2269 |
+
"loss": 0.3348,
|
2270 |
+
"step": 188500
|
2271 |
+
},
|
2272 |
+
{
|
2273 |
+
"epoch": 3.22,
|
2274 |
+
"learning_rate": 0.00010620610352542991,
|
2275 |
+
"loss": 0.3344,
|
2276 |
+
"step": 189000
|
2277 |
+
},
|
2278 |
+
{
|
2279 |
+
"epoch": 3.23,
|
2280 |
+
"learning_rate": 0.00010600205540454759,
|
2281 |
+
"loss": 0.3343,
|
2282 |
+
"step": 189500
|
2283 |
+
},
|
2284 |
+
{
|
2285 |
+
"epoch": 3.23,
|
2286 |
+
"learning_rate": 0.00010579775065387587,
|
2287 |
+
"loss": 0.3347,
|
2288 |
+
"step": 190000
|
2289 |
}
|
2290 |
],
|
2291 |
"max_steps": 500000,
|
pytorch_model.bin
CHANGED
@@ -1,3 +1,3 @@
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
-
oid sha256:
|
3 |
size 201355195
|
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:b2f77d6be3df78a76dc41f7ace05e7c297fca237b33e8a1a52bc11437ea6ea34
|
3 |
size 201355195
|