Training in progress, step 400, checkpoint
Browse files
last-checkpoint/adapter_model.safetensors
CHANGED
@@ -1,3 +1,3 @@
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
-
oid sha256:
|
3 |
size 72673016
|
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:710ed4ddb4c3c53e36700b43bf3a2d9a8da8dfe9323c0765ab7342c3703a85ec
|
3 |
size 72673016
|
last-checkpoint/optimizer.pt
CHANGED
@@ -1,3 +1,3 @@
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
-
oid sha256:
|
3 |
size 36892564
|
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:8ac250ac4ff5d7c0c9620858af3c4d4431e4adedcc34529beefb554d7d915fb8
|
3 |
size 36892564
|
last-checkpoint/rng_state.pth
CHANGED
@@ -1,3 +1,3 @@
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
-
oid sha256:
|
3 |
size 14244
|
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:b95251c50450ce5a4d7f4a82b9d2109ed98fb70bfda44ff1a6b473c0ce9cba43
|
3 |
size 14244
|
last-checkpoint/scheduler.pt
CHANGED
@@ -1,3 +1,3 @@
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
-
oid sha256:
|
3 |
size 1064
|
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:53069960eacae4f0d791eb39bd795814bb2120f9a4ba2ee8514aef805ac12eec
|
3 |
size 1064
|
last-checkpoint/trainer_state.json
CHANGED
@@ -1,9 +1,9 @@
|
|
1 |
{
|
2 |
"best_metric": null,
|
3 |
"best_model_checkpoint": null,
|
4 |
-
"epoch": 0.
|
5 |
"eval_steps": 1000,
|
6 |
-
"global_step":
|
7 |
"is_hyper_param_search": false,
|
8 |
"is_local_process_zero": true,
|
9 |
"is_world_process_zero": true,
|
@@ -2257,13 +2257,163 @@
|
|
2257 |
"learning_rate": 5.443548387096774e-05,
|
2258 |
"loss": 1.8827,
|
2259 |
"step": 375
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
2260 |
}
|
2261 |
],
|
2262 |
"logging_steps": 1,
|
2263 |
"max_steps": 501,
|
2264 |
"num_train_epochs": 1,
|
2265 |
"save_steps": 25,
|
2266 |
-
"total_flos": 1.
|
2267 |
"trial_name": null,
|
2268 |
"trial_params": null
|
2269 |
}
|
|
|
1 |
{
|
2 |
"best_metric": null,
|
3 |
"best_model_checkpoint": null,
|
4 |
+
"epoch": 0.07562151432082427,
|
5 |
"eval_steps": 1000,
|
6 |
+
"global_step": 400,
|
7 |
"is_hyper_param_search": false,
|
8 |
"is_local_process_zero": true,
|
9 |
"is_world_process_zero": true,
|
|
|
2257 |
"learning_rate": 5.443548387096774e-05,
|
2258 |
"loss": 1.8827,
|
2259 |
"step": 375
|
2260 |
+
},
|
2261 |
+
{
|
2262 |
+
"epoch": 0.07,
|
2263 |
+
"learning_rate": 5.403225806451613e-05,
|
2264 |
+
"loss": 1.8262,
|
2265 |
+
"step": 376
|
2266 |
+
},
|
2267 |
+
{
|
2268 |
+
"epoch": 0.07,
|
2269 |
+
"learning_rate": 5.362903225806452e-05,
|
2270 |
+
"loss": 1.7789,
|
2271 |
+
"step": 377
|
2272 |
+
},
|
2273 |
+
{
|
2274 |
+
"epoch": 0.07,
|
2275 |
+
"learning_rate": 5.32258064516129e-05,
|
2276 |
+
"loss": 1.7937,
|
2277 |
+
"step": 378
|
2278 |
+
},
|
2279 |
+
{
|
2280 |
+
"epoch": 0.07,
|
2281 |
+
"learning_rate": 5.282258064516129e-05,
|
2282 |
+
"loss": 1.8751,
|
2283 |
+
"step": 379
|
2284 |
+
},
|
2285 |
+
{
|
2286 |
+
"epoch": 0.07,
|
2287 |
+
"learning_rate": 5.241935483870968e-05,
|
2288 |
+
"loss": 1.7343,
|
2289 |
+
"step": 380
|
2290 |
+
},
|
2291 |
+
{
|
2292 |
+
"epoch": 0.07,
|
2293 |
+
"learning_rate": 5.201612903225807e-05,
|
2294 |
+
"loss": 1.68,
|
2295 |
+
"step": 381
|
2296 |
+
},
|
2297 |
+
{
|
2298 |
+
"epoch": 0.07,
|
2299 |
+
"learning_rate": 5.161290322580645e-05,
|
2300 |
+
"loss": 1.7465,
|
2301 |
+
"step": 382
|
2302 |
+
},
|
2303 |
+
{
|
2304 |
+
"epoch": 0.07,
|
2305 |
+
"learning_rate": 5.120967741935484e-05,
|
2306 |
+
"loss": 1.8655,
|
2307 |
+
"step": 383
|
2308 |
+
},
|
2309 |
+
{
|
2310 |
+
"epoch": 0.07,
|
2311 |
+
"learning_rate": 5.080645161290323e-05,
|
2312 |
+
"loss": 1.7398,
|
2313 |
+
"step": 384
|
2314 |
+
},
|
2315 |
+
{
|
2316 |
+
"epoch": 0.07,
|
2317 |
+
"learning_rate": 5.040322580645161e-05,
|
2318 |
+
"loss": 1.8772,
|
2319 |
+
"step": 385
|
2320 |
+
},
|
2321 |
+
{
|
2322 |
+
"epoch": 0.07,
|
2323 |
+
"learning_rate": 5e-05,
|
2324 |
+
"loss": 1.8151,
|
2325 |
+
"step": 386
|
2326 |
+
},
|
2327 |
+
{
|
2328 |
+
"epoch": 0.07,
|
2329 |
+
"learning_rate": 4.959677419354839e-05,
|
2330 |
+
"loss": 1.8336,
|
2331 |
+
"step": 387
|
2332 |
+
},
|
2333 |
+
{
|
2334 |
+
"epoch": 0.07,
|
2335 |
+
"learning_rate": 4.9193548387096775e-05,
|
2336 |
+
"loss": 1.8974,
|
2337 |
+
"step": 388
|
2338 |
+
},
|
2339 |
+
{
|
2340 |
+
"epoch": 0.07,
|
2341 |
+
"learning_rate": 4.8790322580645164e-05,
|
2342 |
+
"loss": 1.8635,
|
2343 |
+
"step": 389
|
2344 |
+
},
|
2345 |
+
{
|
2346 |
+
"epoch": 0.07,
|
2347 |
+
"learning_rate": 4.8387096774193554e-05,
|
2348 |
+
"loss": 1.8573,
|
2349 |
+
"step": 390
|
2350 |
+
},
|
2351 |
+
{
|
2352 |
+
"epoch": 0.07,
|
2353 |
+
"learning_rate": 4.7983870967741937e-05,
|
2354 |
+
"loss": 1.8567,
|
2355 |
+
"step": 391
|
2356 |
+
},
|
2357 |
+
{
|
2358 |
+
"epoch": 0.07,
|
2359 |
+
"learning_rate": 4.7580645161290326e-05,
|
2360 |
+
"loss": 1.7087,
|
2361 |
+
"step": 392
|
2362 |
+
},
|
2363 |
+
{
|
2364 |
+
"epoch": 0.07,
|
2365 |
+
"learning_rate": 4.7177419354838716e-05,
|
2366 |
+
"loss": 1.9176,
|
2367 |
+
"step": 393
|
2368 |
+
},
|
2369 |
+
{
|
2370 |
+
"epoch": 0.07,
|
2371 |
+
"learning_rate": 4.67741935483871e-05,
|
2372 |
+
"loss": 1.7583,
|
2373 |
+
"step": 394
|
2374 |
+
},
|
2375 |
+
{
|
2376 |
+
"epoch": 0.07,
|
2377 |
+
"learning_rate": 4.637096774193548e-05,
|
2378 |
+
"loss": 1.9632,
|
2379 |
+
"step": 395
|
2380 |
+
},
|
2381 |
+
{
|
2382 |
+
"epoch": 0.07,
|
2383 |
+
"learning_rate": 4.596774193548387e-05,
|
2384 |
+
"loss": 1.9195,
|
2385 |
+
"step": 396
|
2386 |
+
},
|
2387 |
+
{
|
2388 |
+
"epoch": 0.08,
|
2389 |
+
"learning_rate": 4.556451612903226e-05,
|
2390 |
+
"loss": 1.3862,
|
2391 |
+
"step": 397
|
2392 |
+
},
|
2393 |
+
{
|
2394 |
+
"epoch": 0.08,
|
2395 |
+
"learning_rate": 4.516129032258064e-05,
|
2396 |
+
"loss": 2.0477,
|
2397 |
+
"step": 398
|
2398 |
+
},
|
2399 |
+
{
|
2400 |
+
"epoch": 0.08,
|
2401 |
+
"learning_rate": 4.475806451612903e-05,
|
2402 |
+
"loss": 1.8659,
|
2403 |
+
"step": 399
|
2404 |
+
},
|
2405 |
+
{
|
2406 |
+
"epoch": 0.08,
|
2407 |
+
"learning_rate": 4.435483870967742e-05,
|
2408 |
+
"loss": 1.7193,
|
2409 |
+
"step": 400
|
2410 |
}
|
2411 |
],
|
2412 |
"logging_steps": 1,
|
2413 |
"max_steps": 501,
|
2414 |
"num_train_epochs": 1,
|
2415 |
"save_steps": 25,
|
2416 |
+
"total_flos": 1.9304515314991104e+16,
|
2417 |
"trial_name": null,
|
2418 |
"trial_params": null
|
2419 |
}
|