ardaspear's picture
Training in progress, step 50, checkpoint
599f81f verified
{
"best_metric": null,
"best_model_checkpoint": null,
"epoch": 0.11286681715575621,
"eval_steps": 5,
"global_step": 50,
"is_hyper_param_search": false,
"is_local_process_zero": true,
"is_world_process_zero": true,
"log_history": [
{
"epoch": 0.002257336343115124,
"eval_loss": 10.37696647644043,
"eval_runtime": 1.3605,
"eval_samples_per_second": 549.072,
"eval_steps_per_second": 69.093,
"step": 1
},
{
"epoch": 0.006772009029345372,
"grad_norm": 0.5311530232429504,
"learning_rate": 3e-05,
"loss": 10.3708,
"step": 3
},
{
"epoch": 0.011286681715575621,
"eval_loss": 10.375223159790039,
"eval_runtime": 1.3445,
"eval_samples_per_second": 555.583,
"eval_steps_per_second": 69.913,
"step": 5
},
{
"epoch": 0.013544018058690745,
"grad_norm": 0.4600737690925598,
"learning_rate": 6e-05,
"loss": 10.3742,
"step": 6
},
{
"epoch": 0.020316027088036117,
"grad_norm": 0.4388561248779297,
"learning_rate": 9e-05,
"loss": 10.3715,
"step": 9
},
{
"epoch": 0.022573363431151242,
"eval_loss": 10.36904525756836,
"eval_runtime": 1.3711,
"eval_samples_per_second": 544.833,
"eval_steps_per_second": 68.56,
"step": 10
},
{
"epoch": 0.02708803611738149,
"grad_norm": 0.39423269033432007,
"learning_rate": 9.938441702975689e-05,
"loss": 10.3707,
"step": 12
},
{
"epoch": 0.033860045146726865,
"grad_norm": 0.45872196555137634,
"learning_rate": 9.619397662556435e-05,
"loss": 10.3518,
"step": 15
},
{
"epoch": 0.033860045146726865,
"eval_loss": 10.359448432922363,
"eval_runtime": 1.4011,
"eval_samples_per_second": 533.148,
"eval_steps_per_second": 67.09,
"step": 15
},
{
"epoch": 0.040632054176072234,
"grad_norm": 0.5480523705482483,
"learning_rate": 9.045084971874738e-05,
"loss": 10.3586,
"step": 18
},
{
"epoch": 0.045146726862302484,
"eval_loss": 10.348822593688965,
"eval_runtime": 1.4092,
"eval_samples_per_second": 530.083,
"eval_steps_per_second": 66.704,
"step": 20
},
{
"epoch": 0.04740406320541761,
"grad_norm": 0.481340229511261,
"learning_rate": 8.247240241650918e-05,
"loss": 10.3482,
"step": 21
},
{
"epoch": 0.05417607223476298,
"grad_norm": 0.6495785713195801,
"learning_rate": 7.269952498697734e-05,
"loss": 10.3488,
"step": 24
},
{
"epoch": 0.056433408577878104,
"eval_loss": 10.337669372558594,
"eval_runtime": 1.3982,
"eval_samples_per_second": 534.277,
"eval_steps_per_second": 67.232,
"step": 25
},
{
"epoch": 0.060948081264108354,
"grad_norm": 0.7056164145469666,
"learning_rate": 6.167226819279528e-05,
"loss": 10.3473,
"step": 27
},
{
"epoch": 0.06772009029345373,
"grad_norm": 0.7669053673744202,
"learning_rate": 5e-05,
"loss": 10.3337,
"step": 30
},
{
"epoch": 0.06772009029345373,
"eval_loss": 10.328136444091797,
"eval_runtime": 1.3693,
"eval_samples_per_second": 545.526,
"eval_steps_per_second": 68.647,
"step": 30
},
{
"epoch": 0.0744920993227991,
"grad_norm": 0.6908026337623596,
"learning_rate": 3.832773180720475e-05,
"loss": 10.338,
"step": 33
},
{
"epoch": 0.07900677200902935,
"eval_loss": 10.320672035217285,
"eval_runtime": 1.3491,
"eval_samples_per_second": 553.69,
"eval_steps_per_second": 69.675,
"step": 35
},
{
"epoch": 0.08126410835214447,
"grad_norm": 0.54636549949646,
"learning_rate": 2.7300475013022663e-05,
"loss": 10.3207,
"step": 36
},
{
"epoch": 0.08803611738148984,
"grad_norm": 0.703861653804779,
"learning_rate": 1.7527597583490822e-05,
"loss": 10.3198,
"step": 39
},
{
"epoch": 0.09029345372460497,
"eval_loss": 10.316521644592285,
"eval_runtime": 1.3669,
"eval_samples_per_second": 546.511,
"eval_steps_per_second": 68.771,
"step": 40
},
{
"epoch": 0.09480812641083522,
"grad_norm": 0.5754713416099548,
"learning_rate": 9.549150281252633e-06,
"loss": 10.3213,
"step": 42
},
{
"epoch": 0.10158013544018059,
"grad_norm": 0.7273910641670227,
"learning_rate": 3.8060233744356633e-06,
"loss": 10.3121,
"step": 45
},
{
"epoch": 0.10158013544018059,
"eval_loss": 10.314825057983398,
"eval_runtime": 1.405,
"eval_samples_per_second": 531.663,
"eval_steps_per_second": 66.903,
"step": 45
},
{
"epoch": 0.10835214446952596,
"grad_norm": 0.7388740181922913,
"learning_rate": 6.15582970243117e-07,
"loss": 10.3121,
"step": 48
},
{
"epoch": 0.11286681715575621,
"eval_loss": 10.314505577087402,
"eval_runtime": 1.3781,
"eval_samples_per_second": 542.057,
"eval_steps_per_second": 68.211,
"step": 50
}
],
"logging_steps": 3,
"max_steps": 50,
"num_input_tokens_seen": 0,
"num_train_epochs": 1,
"save_steps": 5,
"stateful_callbacks": {
"TrainerControl": {
"args": {
"should_epoch_stop": false,
"should_evaluate": false,
"should_log": false,
"should_save": true,
"should_training_stop": true
},
"attributes": {}
}
},
"total_flos": 5577533030400.0,
"train_batch_size": 8,
"trial_name": null,
"trial_params": null
}