sentiment-lora-r16a0d0.2-0 / trainer_state.json
apwic's picture
End of training
7df1e63 verified
{
"best_metric": null,
"best_model_checkpoint": null,
"epoch": 20.0,
"eval_steps": 500,
"global_step": 2440,
"is_hyper_param_search": false,
"is_local_process_zero": true,
"is_world_process_zero": true,
"log_history": [
{
"epoch": 1.0,
"grad_norm": 5.218796253204346,
"learning_rate": 4.75e-05,
"loss": 0.5604,
"step": 122
},
{
"epoch": 1.0,
"eval_accuracy": 0.7268170426065163,
"eval_f1": 0.6626643397324026,
"eval_loss": 0.4996059536933899,
"eval_precision": 0.6671439480717831,
"eval_recall": 0.6592107655937443,
"eval_runtime": 5.1871,
"eval_samples_per_second": 76.921,
"eval_steps_per_second": 9.639,
"step": 122
},
{
"epoch": 2.0,
"grad_norm": 4.024437427520752,
"learning_rate": 4.5e-05,
"loss": 0.4842,
"step": 244
},
{
"epoch": 2.0,
"eval_accuracy": 0.7543859649122807,
"eval_f1": 0.7247113489157984,
"eval_loss": 0.4520498514175415,
"eval_precision": 0.7169408246101261,
"eval_recall": 0.7462265866521185,
"eval_runtime": 5.0789,
"eval_samples_per_second": 78.561,
"eval_steps_per_second": 9.845,
"step": 244
},
{
"epoch": 3.0,
"grad_norm": 4.7485127449035645,
"learning_rate": 4.25e-05,
"loss": 0.4079,
"step": 366
},
{
"epoch": 3.0,
"eval_accuracy": 0.8320802005012531,
"eval_f1": 0.800852224051491,
"eval_loss": 0.37486591935157776,
"eval_precision": 0.7962609970674487,
"eval_recall": 0.8061920349154392,
"eval_runtime": 5.055,
"eval_samples_per_second": 78.932,
"eval_steps_per_second": 9.891,
"step": 366
},
{
"epoch": 4.0,
"grad_norm": 2.6576170921325684,
"learning_rate": 4e-05,
"loss": 0.3378,
"step": 488
},
{
"epoch": 4.0,
"eval_accuracy": 0.8471177944862155,
"eval_f1": 0.8122237052238519,
"eval_loss": 0.36237815022468567,
"eval_precision": 0.8184491978609625,
"eval_recall": 0.8068285142753229,
"eval_runtime": 5.0874,
"eval_samples_per_second": 78.43,
"eval_steps_per_second": 9.828,
"step": 488
},
{
"epoch": 5.0,
"grad_norm": 3.3436532020568848,
"learning_rate": 3.7500000000000003e-05,
"loss": 0.3146,
"step": 610
},
{
"epoch": 5.0,
"eval_accuracy": 0.8471177944862155,
"eval_f1": 0.8234833375639119,
"eval_loss": 0.36204174160957336,
"eval_precision": 0.8130172220979647,
"eval_recall": 0.8393344244408074,
"eval_runtime": 5.0836,
"eval_samples_per_second": 78.488,
"eval_steps_per_second": 9.836,
"step": 610
},
{
"epoch": 6.0,
"grad_norm": 4.493219375610352,
"learning_rate": 3.5e-05,
"loss": 0.2935,
"step": 732
},
{
"epoch": 6.0,
"eval_accuracy": 0.849624060150376,
"eval_f1": 0.8252554744525548,
"eval_loss": 0.35182783007621765,
"eval_precision": 0.8157894736842105,
"eval_recall": 0.8386070194580832,
"eval_runtime": 5.0469,
"eval_samples_per_second": 79.059,
"eval_steps_per_second": 9.907,
"step": 732
},
{
"epoch": 7.0,
"grad_norm": 1.3236638307571411,
"learning_rate": 3.2500000000000004e-05,
"loss": 0.2842,
"step": 854
},
{
"epoch": 7.0,
"eval_accuracy": 0.8646616541353384,
"eval_f1": 0.8391129032258065,
"eval_loss": 0.3307338058948517,
"eval_precision": 0.8345705196182396,
"eval_recall": 0.8442444080741953,
"eval_runtime": 5.0617,
"eval_samples_per_second": 78.827,
"eval_steps_per_second": 9.878,
"step": 854
},
{
"epoch": 8.0,
"grad_norm": 2.9696807861328125,
"learning_rate": 3e-05,
"loss": 0.267,
"step": 976
},
{
"epoch": 8.0,
"eval_accuracy": 0.8621553884711779,
"eval_f1": 0.8341332527115377,
"eval_loss": 0.3191296458244324,
"eval_precision": 0.8333132275770553,
"eval_recall": 0.8349699945444626,
"eval_runtime": 5.1096,
"eval_samples_per_second": 78.089,
"eval_steps_per_second": 9.786,
"step": 976
},
{
"epoch": 9.0,
"grad_norm": 6.35373067855835,
"learning_rate": 2.7500000000000004e-05,
"loss": 0.2598,
"step": 1098
},
{
"epoch": 9.0,
"eval_accuracy": 0.8671679197994987,
"eval_f1": 0.8401647707947546,
"eval_loss": 0.31739577651023865,
"eval_precision": 0.8393298751432535,
"eval_recall": 0.8410165484633569,
"eval_runtime": 5.1627,
"eval_samples_per_second": 77.285,
"eval_steps_per_second": 9.685,
"step": 1098
},
{
"epoch": 10.0,
"grad_norm": 4.5958967208862305,
"learning_rate": 2.5e-05,
"loss": 0.2557,
"step": 1220
},
{
"epoch": 10.0,
"eval_accuracy": 0.8646616541353384,
"eval_f1": 0.8367430441898527,
"eval_loss": 0.30755287408828735,
"eval_precision": 0.8367430441898527,
"eval_recall": 0.8367430441898527,
"eval_runtime": 5.0513,
"eval_samples_per_second": 78.99,
"eval_steps_per_second": 9.898,
"step": 1220
},
{
"epoch": 11.0,
"grad_norm": 1.4262058734893799,
"learning_rate": 2.25e-05,
"loss": 0.2341,
"step": 1342
},
{
"epoch": 11.0,
"eval_accuracy": 0.8696741854636592,
"eval_f1": 0.844327731092437,
"eval_loss": 0.31438612937927246,
"eval_precision": 0.8411320530352577,
"eval_recall": 0.8477905073649754,
"eval_runtime": 5.0545,
"eval_samples_per_second": 78.939,
"eval_steps_per_second": 9.892,
"step": 1342
},
{
"epoch": 12.0,
"grad_norm": 10.927000999450684,
"learning_rate": 2e-05,
"loss": 0.2352,
"step": 1464
},
{
"epoch": 12.0,
"eval_accuracy": 0.8671679197994987,
"eval_f1": 0.8409485773595975,
"eval_loss": 0.31350696086883545,
"eval_precision": 0.8385304659498208,
"eval_recall": 0.8435170030914712,
"eval_runtime": 5.0621,
"eval_samples_per_second": 78.821,
"eval_steps_per_second": 9.877,
"step": 1464
},
{
"epoch": 13.0,
"grad_norm": 1.771910309791565,
"learning_rate": 1.75e-05,
"loss": 0.2335,
"step": 1586
},
{
"epoch": 13.0,
"eval_accuracy": 0.8721804511278195,
"eval_f1": 0.8469505178365937,
"eval_loss": 0.30345895886421204,
"eval_precision": 0.844489247311828,
"eval_recall": 0.8495635570103655,
"eval_runtime": 5.0626,
"eval_samples_per_second": 78.814,
"eval_steps_per_second": 9.876,
"step": 1586
},
{
"epoch": 14.0,
"grad_norm": 11.13768482208252,
"learning_rate": 1.5e-05,
"loss": 0.232,
"step": 1708
},
{
"epoch": 14.0,
"eval_accuracy": 0.8696741854636592,
"eval_f1": 0.8450716845878137,
"eval_loss": 0.3012339174747467,
"eval_precision": 0.8404471544715447,
"eval_recall": 0.8502909619930896,
"eval_runtime": 5.0472,
"eval_samples_per_second": 79.054,
"eval_steps_per_second": 9.906,
"step": 1708
},
{
"epoch": 15.0,
"grad_norm": 0.7109107971191406,
"learning_rate": 1.25e-05,
"loss": 0.221,
"step": 1830
},
{
"epoch": 15.0,
"eval_accuracy": 0.8671679197994987,
"eval_f1": 0.8424651921601347,
"eval_loss": 0.3049696981906891,
"eval_precision": 0.8372140762463343,
"eval_recall": 0.8485179123476996,
"eval_runtime": 5.0811,
"eval_samples_per_second": 78.526,
"eval_steps_per_second": 9.84,
"step": 1830
},
{
"epoch": 16.0,
"grad_norm": 6.484745979309082,
"learning_rate": 1e-05,
"loss": 0.216,
"step": 1952
},
{
"epoch": 16.0,
"eval_accuracy": 0.8696741854636592,
"eval_f1": 0.8457993935430168,
"eval_loss": 0.3015521466732025,
"eval_precision": 0.8398540145985401,
"eval_recall": 0.8527914166212038,
"eval_runtime": 5.0477,
"eval_samples_per_second": 79.046,
"eval_steps_per_second": 9.906,
"step": 1952
},
{
"epoch": 17.0,
"grad_norm": 10.693771362304688,
"learning_rate": 7.5e-06,
"loss": 0.2096,
"step": 2074
},
{
"epoch": 17.0,
"eval_accuracy": 0.8696741854636592,
"eval_f1": 0.8435671632788467,
"eval_loss": 0.2880638837814331,
"eval_precision": 0.8419117647058824,
"eval_recall": 0.8452900527368612,
"eval_runtime": 5.1498,
"eval_samples_per_second": 77.478,
"eval_steps_per_second": 9.709,
"step": 2074
},
{
"epoch": 18.0,
"grad_norm": 5.451054096221924,
"learning_rate": 5e-06,
"loss": 0.2184,
"step": 2196
},
{
"epoch": 18.0,
"eval_accuracy": 0.8696741854636592,
"eval_f1": 0.8465112150085814,
"eval_loss": 0.29656729102134705,
"eval_precision": 0.8393498147290412,
"eval_recall": 0.855291871249318,
"eval_runtime": 5.0876,
"eval_samples_per_second": 78.426,
"eval_steps_per_second": 9.828,
"step": 2196
},
{
"epoch": 19.0,
"grad_norm": 3.6623899936676025,
"learning_rate": 2.5e-06,
"loss": 0.2134,
"step": 2318
},
{
"epoch": 19.0,
"eval_accuracy": 0.8671679197994987,
"eval_f1": 0.8409485773595975,
"eval_loss": 0.2884305715560913,
"eval_precision": 0.8385304659498208,
"eval_recall": 0.8435170030914712,
"eval_runtime": 5.0715,
"eval_samples_per_second": 78.675,
"eval_steps_per_second": 9.859,
"step": 2318
},
{
"epoch": 20.0,
"grad_norm": 10.805831909179688,
"learning_rate": 0.0,
"loss": 0.2077,
"step": 2440
},
{
"epoch": 20.0,
"eval_accuracy": 0.8646616541353384,
"eval_f1": 0.8383403361344538,
"eval_loss": 0.289480060338974,
"eval_precision": 0.8352012604792199,
"eval_recall": 0.8417439534460811,
"eval_runtime": 5.0647,
"eval_samples_per_second": 78.78,
"eval_steps_per_second": 9.872,
"step": 2440
},
{
"epoch": 20.0,
"step": 2440,
"total_flos": 7635661149264000.0,
"train_loss": 0.28429694019380164,
"train_runtime": 1955.8264,
"train_samples_per_second": 37.202,
"train_steps_per_second": 1.248
}
],
"logging_steps": 500,
"max_steps": 2440,
"num_input_tokens_seen": 0,
"num_train_epochs": 20,
"save_steps": 500,
"total_flos": 7635661149264000.0,
"train_batch_size": 30,
"trial_name": null,
"trial_params": null
}