Commit
•
ae398a9
1
Parent(s):
c8065c4
Training in progress, step 310000
Browse files
last-checkpoint/optimizer.pt
CHANGED
@@ -1,3 +1,3 @@
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
-
oid sha256:
|
3 |
size 893441530
|
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:588b078274eaf2af1b331e1cd70a9be61bfe018939162cd4041561fac059b8c0
|
3 |
size 893441530
|
last-checkpoint/pytorch_model.bin
CHANGED
@@ -1,3 +1,3 @@
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
-
oid sha256:
|
3 |
size 454197066
|
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:f9ba1aac81f5af9078e69f687c5c373f267f4525e96f7d4ff79814a56c1216cb
|
3 |
size 454197066
|
last-checkpoint/rng_state.pth
CHANGED
@@ -1,3 +1,3 @@
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
-
oid sha256:
|
3 |
-
size
|
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:f1483cb709e4146d94296c64449eb045d3f6821657b2815235b2d1e5f2c693e9
|
3 |
+
size 14244
|
last-checkpoint/scheduler.pt
CHANGED
@@ -1,3 +1,3 @@
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
-
oid sha256:
|
3 |
size 1064
|
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:746b61609316d06785a028aa9340563cf7c6dcbeb2edf456b62699aedb8c3a07
|
3 |
size 1064
|
last-checkpoint/trainer_state.json
CHANGED
@@ -1,8 +1,8 @@
|
|
1 |
{
|
2 |
"best_metric": null,
|
3 |
"best_model_checkpoint": null,
|
4 |
-
"epoch": 0.
|
5 |
-
"global_step":
|
6 |
"is_hyper_param_search": false,
|
7 |
"is_local_process_zero": true,
|
8 |
"is_world_process_zero": true,
|
@@ -2226,11 +2226,85 @@
|
|
2226 |
"eval_samples_per_second": 108.487,
|
2227 |
"eval_steps_per_second": 13.561,
|
2228 |
"step": 300000
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
2229 |
}
|
2230 |
],
|
2231 |
"max_steps": 1000000,
|
2232 |
"num_train_epochs": 9223372036854775807,
|
2233 |
-
"total_flos": 2.
|
2234 |
"trial_name": null,
|
2235 |
"trial_params": null
|
2236 |
}
|
|
|
1 |
{
|
2 |
"best_metric": null,
|
3 |
"best_model_checkpoint": null,
|
4 |
+
"epoch": 0.31,
|
5 |
+
"global_step": 310000,
|
6 |
"is_hyper_param_search": false,
|
7 |
"is_local_process_zero": true,
|
8 |
"is_world_process_zero": true,
|
|
|
2226 |
"eval_samples_per_second": 108.487,
|
2227 |
"eval_steps_per_second": 13.561,
|
2228 |
"step": 300000
|
2229 |
+
},
|
2230 |
+
{
|
2231 |
+
"epoch": 0.3,
|
2232 |
+
"learning_rate": 1.732744634202854e-05,
|
2233 |
+
"loss": 0.3579,
|
2234 |
+
"step": 301000
|
2235 |
+
},
|
2236 |
+
{
|
2237 |
+
"epoch": 0.3,
|
2238 |
+
"learning_rate": 1.731675352071429e-05,
|
2239 |
+
"loss": 0.3569,
|
2240 |
+
"step": 302000
|
2241 |
+
},
|
2242 |
+
{
|
2243 |
+
"epoch": 0.3,
|
2244 |
+
"learning_rate": 1.7306028528854846e-05,
|
2245 |
+
"loss": 0.3561,
|
2246 |
+
"step": 303000
|
2247 |
+
},
|
2248 |
+
{
|
2249 |
+
"epoch": 0.3,
|
2250 |
+
"learning_rate": 1.7295271483737004e-05,
|
2251 |
+
"loss": 0.3563,
|
2252 |
+
"step": 304000
|
2253 |
+
},
|
2254 |
+
{
|
2255 |
+
"epoch": 0.3,
|
2256 |
+
"learning_rate": 1.7284482502998086e-05,
|
2257 |
+
"loss": 0.3567,
|
2258 |
+
"step": 305000
|
2259 |
+
},
|
2260 |
+
{
|
2261 |
+
"epoch": 0.3,
|
2262 |
+
"eval_runtime": 3353.5685,
|
2263 |
+
"eval_samples_per_second": 101.281,
|
2264 |
+
"eval_steps_per_second": 12.66,
|
2265 |
+
"step": 305000
|
2266 |
+
},
|
2267 |
+
{
|
2268 |
+
"epoch": 0.31,
|
2269 |
+
"learning_rate": 1.7273661704624656e-05,
|
2270 |
+
"loss": 0.3559,
|
2271 |
+
"step": 306000
|
2272 |
+
},
|
2273 |
+
{
|
2274 |
+
"epoch": 0.31,
|
2275 |
+
"learning_rate": 1.7262809206951228e-05,
|
2276 |
+
"loss": 0.355,
|
2277 |
+
"step": 307000
|
2278 |
+
},
|
2279 |
+
{
|
2280 |
+
"epoch": 0.31,
|
2281 |
+
"learning_rate": 1.725192512865898e-05,
|
2282 |
+
"loss": 0.3576,
|
2283 |
+
"step": 308000
|
2284 |
+
},
|
2285 |
+
{
|
2286 |
+
"epoch": 0.31,
|
2287 |
+
"learning_rate": 1.7241009588774453e-05,
|
2288 |
+
"loss": 0.3555,
|
2289 |
+
"step": 309000
|
2290 |
+
},
|
2291 |
+
{
|
2292 |
+
"epoch": 0.31,
|
2293 |
+
"learning_rate": 1.7230062706668237e-05,
|
2294 |
+
"loss": 0.3523,
|
2295 |
+
"step": 310000
|
2296 |
+
},
|
2297 |
+
{
|
2298 |
+
"epoch": 0.31,
|
2299 |
+
"eval_runtime": 3333.1448,
|
2300 |
+
"eval_samples_per_second": 101.901,
|
2301 |
+
"eval_steps_per_second": 12.738,
|
2302 |
+
"step": 310000
|
2303 |
}
|
2304 |
],
|
2305 |
"max_steps": 1000000,
|
2306 |
"num_train_epochs": 9223372036854775807,
|
2307 |
+
"total_flos": 2.744929153701642e+21,
|
2308 |
"trial_name": null,
|
2309 |
"trial_params": null
|
2310 |
}
|
pytorch_model.bin
CHANGED
@@ -1,3 +1,3 @@
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
-
oid sha256:
|
3 |
size 454197066
|
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:f9ba1aac81f5af9078e69f687c5c373f267f4525e96f7d4ff79814a56c1216cb
|
3 |
size 454197066
|