pakawadeep
commited on
Commit
•
e2e3c2f
1
Parent(s):
fdb9bf8
Training in progress, step 500, checkpoint
Browse files- last-checkpoint/adapter_model.safetensors +1 -1
- last-checkpoint/global_step500/bf16_zero_pp_rank_0_mp_rank_00_optim_states.pt +3 -0
- last-checkpoint/global_step500/mp_rank_00_model_states.pt +3 -0
- last-checkpoint/latest +1 -1
- last-checkpoint/rng_state.pth +1 -1
- last-checkpoint/trainer_state.json +178 -3
last-checkpoint/adapter_model.safetensors
CHANGED
@@ -1,3 +1,3 @@
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
-
oid sha256:
|
3 |
size 40036488
|
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:edb1b8a303fc9b3efcf849062236f63c083e832b9046d873cf9992c715843b93
|
3 |
size 40036488
|
last-checkpoint/global_step500/bf16_zero_pp_rank_0_mp_rank_00_optim_states.pt
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:abd7d6494f445634ed0c5199eae72b6d115d61e4207ed306563079f7f118a58a
|
3 |
+
size 239914192
|
last-checkpoint/global_step500/mp_rank_00_model_states.pt
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:439671f20c83645773f281ff7dc11fd332dece646507f0f9ceb23efecbbe4c75
|
3 |
+
size 143772857
|
last-checkpoint/latest
CHANGED
@@ -1 +1 @@
|
|
1 |
-
|
|
|
1 |
+
global_step500
|
last-checkpoint/rng_state.pth
CHANGED
@@ -1,3 +1,3 @@
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
-
oid sha256:
|
3 |
size 14244
|
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:a3480915ffec0a67b3b569a0849907362275927ef32fee988b0a6bace7e965c6
|
3 |
size 14244
|
last-checkpoint/trainer_state.json
CHANGED
@@ -1,9 +1,9 @@
|
|
1 |
{
|
2 |
"best_metric": null,
|
3 |
"best_model_checkpoint": null,
|
4 |
-
"epoch": 0.
|
5 |
"eval_steps": 1000,
|
6 |
-
"global_step":
|
7 |
"is_hyper_param_search": false,
|
8 |
"is_local_process_zero": true,
|
9 |
"is_world_process_zero": true,
|
@@ -3332,6 +3332,181 @@
|
|
3332 |
"learning_rate": 1.09979633401222e-05,
|
3333 |
"loss": 1.3586,
|
3334 |
"step": 475
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
3335 |
}
|
3336 |
],
|
3337 |
"logging_steps": 1,
|
@@ -3339,7 +3514,7 @@
|
|
3339 |
"num_input_tokens_seen": 0,
|
3340 |
"num_train_epochs": 1,
|
3341 |
"save_steps": 25,
|
3342 |
-
"total_flos":
|
3343 |
"train_batch_size": 2,
|
3344 |
"trial_name": null,
|
3345 |
"trial_params": null
|
|
|
1 |
{
|
2 |
"best_metric": null,
|
3 |
"best_model_checkpoint": null,
|
4 |
+
"epoch": 0.23579344494223062,
|
5 |
"eval_steps": 1000,
|
6 |
+
"global_step": 500,
|
7 |
"is_hyper_param_search": false,
|
8 |
"is_local_process_zero": true,
|
9 |
"is_world_process_zero": true,
|
|
|
3332 |
"learning_rate": 1.09979633401222e-05,
|
3333 |
"loss": 1.3586,
|
3334 |
"step": 475
|
3335 |
+
},
|
3336 |
+
{
|
3337 |
+
"epoch": 0.22447535958500353,
|
3338 |
+
"grad_norm": 1.565455350231205,
|
3339 |
+
"learning_rate": 1.059063136456212e-05,
|
3340 |
+
"loss": 1.2235,
|
3341 |
+
"step": 476
|
3342 |
+
},
|
3343 |
+
{
|
3344 |
+
"epoch": 0.224946946474888,
|
3345 |
+
"grad_norm": 1.4820242487973796,
|
3346 |
+
"learning_rate": 1.0183299389002038e-05,
|
3347 |
+
"loss": 0.731,
|
3348 |
+
"step": 477
|
3349 |
+
},
|
3350 |
+
{
|
3351 |
+
"epoch": 0.22541853336477247,
|
3352 |
+
"grad_norm": 1.5511126800670054,
|
3353 |
+
"learning_rate": 9.775967413441956e-06,
|
3354 |
+
"loss": 1.1143,
|
3355 |
+
"step": 478
|
3356 |
+
},
|
3357 |
+
{
|
3358 |
+
"epoch": 0.22589012025465693,
|
3359 |
+
"grad_norm": 1.9312597502922166,
|
3360 |
+
"learning_rate": 9.368635437881874e-06,
|
3361 |
+
"loss": 1.8651,
|
3362 |
+
"step": 479
|
3363 |
+
},
|
3364 |
+
{
|
3365 |
+
"epoch": 0.22636170714454137,
|
3366 |
+
"grad_norm": 2.332740151981935,
|
3367 |
+
"learning_rate": 8.961303462321792e-06,
|
3368 |
+
"loss": 1.5795,
|
3369 |
+
"step": 480
|
3370 |
+
},
|
3371 |
+
{
|
3372 |
+
"epoch": 0.22683329403442584,
|
3373 |
+
"grad_norm": 1.6625740665773265,
|
3374 |
+
"learning_rate": 8.55397148676171e-06,
|
3375 |
+
"loss": 0.8778,
|
3376 |
+
"step": 481
|
3377 |
+
},
|
3378 |
+
{
|
3379 |
+
"epoch": 0.2273048809243103,
|
3380 |
+
"grad_norm": 2.0164940904700503,
|
3381 |
+
"learning_rate": 8.14663951120163e-06,
|
3382 |
+
"loss": 1.2201,
|
3383 |
+
"step": 482
|
3384 |
+
},
|
3385 |
+
{
|
3386 |
+
"epoch": 0.22777646781419478,
|
3387 |
+
"grad_norm": 2.308672133920739,
|
3388 |
+
"learning_rate": 7.739307535641548e-06,
|
3389 |
+
"loss": 1.6128,
|
3390 |
+
"step": 483
|
3391 |
+
},
|
3392 |
+
{
|
3393 |
+
"epoch": 0.22824805470407922,
|
3394 |
+
"grad_norm": 3.6223058225453437,
|
3395 |
+
"learning_rate": 7.3319755600814665e-06,
|
3396 |
+
"loss": 1.1935,
|
3397 |
+
"step": 484
|
3398 |
+
},
|
3399 |
+
{
|
3400 |
+
"epoch": 0.22871964159396369,
|
3401 |
+
"grad_norm": 1.898440262906297,
|
3402 |
+
"learning_rate": 6.9246435845213855e-06,
|
3403 |
+
"loss": 1.3363,
|
3404 |
+
"step": 485
|
3405 |
+
},
|
3406 |
+
{
|
3407 |
+
"epoch": 0.22919122848384815,
|
3408 |
+
"grad_norm": 1.6065163269115639,
|
3409 |
+
"learning_rate": 6.517311608961303e-06,
|
3410 |
+
"loss": 1.0665,
|
3411 |
+
"step": 486
|
3412 |
+
},
|
3413 |
+
{
|
3414 |
+
"epoch": 0.22966281537373262,
|
3415 |
+
"grad_norm": 1.8701305737526783,
|
3416 |
+
"learning_rate": 6.109979633401222e-06,
|
3417 |
+
"loss": 1.6185,
|
3418 |
+
"step": 487
|
3419 |
+
},
|
3420 |
+
{
|
3421 |
+
"epoch": 0.23013440226361706,
|
3422 |
+
"grad_norm": 1.6728117270550722,
|
3423 |
+
"learning_rate": 5.702647657841141e-06,
|
3424 |
+
"loss": 1.4654,
|
3425 |
+
"step": 488
|
3426 |
+
},
|
3427 |
+
{
|
3428 |
+
"epoch": 0.23060598915350153,
|
3429 |
+
"grad_norm": 1.7046091455062304,
|
3430 |
+
"learning_rate": 5.29531568228106e-06,
|
3431 |
+
"loss": 1.0988,
|
3432 |
+
"step": 489
|
3433 |
+
},
|
3434 |
+
{
|
3435 |
+
"epoch": 0.231077576043386,
|
3436 |
+
"grad_norm": 1.6534105108434365,
|
3437 |
+
"learning_rate": 4.887983706720978e-06,
|
3438 |
+
"loss": 1.1103,
|
3439 |
+
"step": 490
|
3440 |
+
},
|
3441 |
+
{
|
3442 |
+
"epoch": 0.23154916293327046,
|
3443 |
+
"grad_norm": 1.4043753102008933,
|
3444 |
+
"learning_rate": 4.480651731160896e-06,
|
3445 |
+
"loss": 0.656,
|
3446 |
+
"step": 491
|
3447 |
+
},
|
3448 |
+
{
|
3449 |
+
"epoch": 0.2320207498231549,
|
3450 |
+
"grad_norm": 1.9403853927340866,
|
3451 |
+
"learning_rate": 4.073319755600815e-06,
|
3452 |
+
"loss": 1.6598,
|
3453 |
+
"step": 492
|
3454 |
+
},
|
3455 |
+
{
|
3456 |
+
"epoch": 0.23249233671303937,
|
3457 |
+
"grad_norm": 1.5035036811542513,
|
3458 |
+
"learning_rate": 3.6659877800407332e-06,
|
3459 |
+
"loss": 1.1763,
|
3460 |
+
"step": 493
|
3461 |
+
},
|
3462 |
+
{
|
3463 |
+
"epoch": 0.23296392360292384,
|
3464 |
+
"grad_norm": 2.0180415371872273,
|
3465 |
+
"learning_rate": 3.2586558044806514e-06,
|
3466 |
+
"loss": 1.2816,
|
3467 |
+
"step": 494
|
3468 |
+
},
|
3469 |
+
{
|
3470 |
+
"epoch": 0.2334355104928083,
|
3471 |
+
"grad_norm": 1.9388592781834748,
|
3472 |
+
"learning_rate": 2.8513238289205704e-06,
|
3473 |
+
"loss": 1.4855,
|
3474 |
+
"step": 495
|
3475 |
+
},
|
3476 |
+
{
|
3477 |
+
"epoch": 0.23390709738269277,
|
3478 |
+
"grad_norm": 2.3566969268057516,
|
3479 |
+
"learning_rate": 2.443991853360489e-06,
|
3480 |
+
"loss": 1.3763,
|
3481 |
+
"step": 496
|
3482 |
+
},
|
3483 |
+
{
|
3484 |
+
"epoch": 0.23437868427257721,
|
3485 |
+
"grad_norm": 1.506615118830032,
|
3486 |
+
"learning_rate": 2.0366598778004075e-06,
|
3487 |
+
"loss": 0.9294,
|
3488 |
+
"step": 497
|
3489 |
+
},
|
3490 |
+
{
|
3491 |
+
"epoch": 0.23485027116246168,
|
3492 |
+
"grad_norm": 1.9049222731064772,
|
3493 |
+
"learning_rate": 1.6293279022403257e-06,
|
3494 |
+
"loss": 1.5868,
|
3495 |
+
"step": 498
|
3496 |
+
},
|
3497 |
+
{
|
3498 |
+
"epoch": 0.23532185805234615,
|
3499 |
+
"grad_norm": 2.0393885793885573,
|
3500 |
+
"learning_rate": 1.2219959266802445e-06,
|
3501 |
+
"loss": 1.3604,
|
3502 |
+
"step": 499
|
3503 |
+
},
|
3504 |
+
{
|
3505 |
+
"epoch": 0.23579344494223062,
|
3506 |
+
"grad_norm": 1.53222897433984,
|
3507 |
+
"learning_rate": 8.146639511201628e-07,
|
3508 |
+
"loss": 1.0879,
|
3509 |
+
"step": 500
|
3510 |
}
|
3511 |
],
|
3512 |
"logging_steps": 1,
|
|
|
3514 |
"num_input_tokens_seen": 0,
|
3515 |
"num_train_epochs": 1,
|
3516 |
"save_steps": 25,
|
3517 |
+
"total_flos": 5267949826867200.0,
|
3518 |
"train_batch_size": 2,
|
3519 |
"trial_name": null,
|
3520 |
"trial_params": null
|