sentiment-lora-r4a2d0.1-0 / trainer_state.json
apwic's picture
End of training
acac7f8 verified
raw
history blame contribute delete
No virus
10.9 kB
{
"best_metric": null,
"best_model_checkpoint": null,
"epoch": 20.0,
"eval_steps": 500,
"global_step": 2440,
"is_hyper_param_search": false,
"is_local_process_zero": true,
"is_world_process_zero": true,
"log_history": [
{
"epoch": 1.0,
"grad_norm": 5.2719526290893555,
"learning_rate": 4.75e-05,
"loss": 0.5617,
"step": 122
},
{
"epoch": 1.0,
"eval_accuracy": 0.7192982456140351,
"eval_f1": 0.6543102914784331,
"eval_loss": 0.511669397354126,
"eval_precision": 0.6580196140461879,
"eval_recall": 0.6513911620294599,
"eval_runtime": 5.1581,
"eval_samples_per_second": 77.354,
"eval_steps_per_second": 9.694,
"step": 122
},
{
"epoch": 2.0,
"grad_norm": 3.8476994037628174,
"learning_rate": 4.5e-05,
"loss": 0.5046,
"step": 244
},
{
"epoch": 2.0,
"eval_accuracy": 0.7418546365914787,
"eval_f1": 0.7111966887091448,
"eval_loss": 0.49168047308921814,
"eval_precision": 0.7042004048582996,
"eval_recall": 0.7323604291689398,
"eval_runtime": 5.0878,
"eval_samples_per_second": 78.423,
"eval_steps_per_second": 9.827,
"step": 244
},
{
"epoch": 3.0,
"grad_norm": 4.1617326736450195,
"learning_rate": 4.25e-05,
"loss": 0.4798,
"step": 366
},
{
"epoch": 3.0,
"eval_accuracy": 0.7593984962406015,
"eval_f1": 0.7179215270413574,
"eval_loss": 0.4465886950492859,
"eval_precision": 0.7129198966408269,
"eval_recall": 0.7247681396617567,
"eval_runtime": 5.0585,
"eval_samples_per_second": 78.877,
"eval_steps_per_second": 9.884,
"step": 366
},
{
"epoch": 4.0,
"grad_norm": 2.705305337905884,
"learning_rate": 4e-05,
"loss": 0.4374,
"step": 488
},
{
"epoch": 4.0,
"eval_accuracy": 0.8195488721804511,
"eval_f1": 0.7740798993394149,
"eval_loss": 0.3993551731109619,
"eval_precision": 0.7865881658357387,
"eval_recall": 0.7648208765230042,
"eval_runtime": 5.0615,
"eval_samples_per_second": 78.831,
"eval_steps_per_second": 9.879,
"step": 488
},
{
"epoch": 5.0,
"grad_norm": 0.9225200414657593,
"learning_rate": 3.7500000000000003e-05,
"loss": 0.4037,
"step": 610
},
{
"epoch": 5.0,
"eval_accuracy": 0.7844611528822055,
"eval_f1": 0.7575187969924813,
"eval_loss": 0.41500648856163025,
"eval_precision": 0.748013422818792,
"eval_recall": 0.7800054555373704,
"eval_runtime": 5.0773,
"eval_samples_per_second": 78.585,
"eval_steps_per_second": 9.848,
"step": 610
},
{
"epoch": 6.0,
"grad_norm": 2.181605339050293,
"learning_rate": 3.5e-05,
"loss": 0.3741,
"step": 732
},
{
"epoch": 6.0,
"eval_accuracy": 0.8370927318295739,
"eval_f1": 0.8049369344976196,
"eval_loss": 0.3736521899700165,
"eval_precision": 0.8027777777777778,
"eval_recall": 0.8072376795781051,
"eval_runtime": 5.0871,
"eval_samples_per_second": 78.434,
"eval_steps_per_second": 9.829,
"step": 732
},
{
"epoch": 7.0,
"grad_norm": 0.7816860675811768,
"learning_rate": 3.2500000000000004e-05,
"loss": 0.3574,
"step": 854
},
{
"epoch": 7.0,
"eval_accuracy": 0.8220551378446115,
"eval_f1": 0.790906742443813,
"eval_loss": 0.3775876462459564,
"eval_precision": 0.7844931964944649,
"eval_recall": 0.7990998363338788,
"eval_runtime": 5.071,
"eval_samples_per_second": 78.683,
"eval_steps_per_second": 9.86,
"step": 854
},
{
"epoch": 8.0,
"grad_norm": 5.030299663543701,
"learning_rate": 3e-05,
"loss": 0.3387,
"step": 976
},
{
"epoch": 8.0,
"eval_accuracy": 0.8446115288220551,
"eval_f1": 0.8134839254478557,
"eval_loss": 0.3653636872768402,
"eval_precision": 0.8119747899159664,
"eval_recall": 0.8150572831423895,
"eval_runtime": 5.0666,
"eval_samples_per_second": 78.75,
"eval_steps_per_second": 9.868,
"step": 976
},
{
"epoch": 9.0,
"grad_norm": 6.920233726501465,
"learning_rate": 2.7500000000000004e-05,
"loss": 0.3293,
"step": 1098
},
{
"epoch": 9.0,
"eval_accuracy": 0.8370927318295739,
"eval_f1": 0.8067969337812972,
"eval_loss": 0.3626542389392853,
"eval_precision": 0.8021114369501466,
"eval_recall": 0.8122385888343335,
"eval_runtime": 5.0555,
"eval_samples_per_second": 78.924,
"eval_steps_per_second": 9.89,
"step": 1098
},
{
"epoch": 10.0,
"grad_norm": 6.010580539703369,
"learning_rate": 2.5e-05,
"loss": 0.3209,
"step": 1220
},
{
"epoch": 10.0,
"eval_accuracy": 0.8370927318295739,
"eval_f1": 0.8039756622954537,
"eval_loss": 0.35534289479255676,
"eval_precision": 0.8032299897460643,
"eval_recall": 0.8047372249499909,
"eval_runtime": 5.0991,
"eval_samples_per_second": 78.249,
"eval_steps_per_second": 9.806,
"step": 1220
},
{
"epoch": 11.0,
"grad_norm": 4.613595485687256,
"learning_rate": 2.25e-05,
"loss": 0.2967,
"step": 1342
},
{
"epoch": 11.0,
"eval_accuracy": 0.8345864661654135,
"eval_f1": 0.8051873113570456,
"eval_loss": 0.3674112856388092,
"eval_precision": 0.7989231125521075,
"eval_recall": 0.8129659938170577,
"eval_runtime": 5.0545,
"eval_samples_per_second": 78.94,
"eval_steps_per_second": 9.892,
"step": 1342
},
{
"epoch": 12.0,
"grad_norm": 11.547273635864258,
"learning_rate": 2e-05,
"loss": 0.2928,
"step": 1464
},
{
"epoch": 12.0,
"eval_accuracy": 0.8320802005012531,
"eval_f1": 0.8026866442779643,
"eval_loss": 0.37071213126182556,
"eval_precision": 0.795995733394834,
"eval_recall": 0.8111929441716675,
"eval_runtime": 5.0514,
"eval_samples_per_second": 78.988,
"eval_steps_per_second": 9.898,
"step": 1464
},
{
"epoch": 13.0,
"grad_norm": 1.8734816312789917,
"learning_rate": 1.75e-05,
"loss": 0.2967,
"step": 1586
},
{
"epoch": 13.0,
"eval_accuracy": 0.8471177944862155,
"eval_f1": 0.8160386984618873,
"eval_loss": 0.3514226973056793,
"eval_precision": 0.8152632848784607,
"eval_recall": 0.8168303327877796,
"eval_runtime": 5.0516,
"eval_samples_per_second": 78.985,
"eval_steps_per_second": 9.898,
"step": 1586
},
{
"epoch": 14.0,
"grad_norm": 11.493008613586426,
"learning_rate": 1.5e-05,
"loss": 0.2934,
"step": 1708
},
{
"epoch": 14.0,
"eval_accuracy": 0.8421052631578947,
"eval_f1": 0.8118502107020158,
"eval_loss": 0.3507131040096283,
"eval_precision": 0.8083091673078061,
"eval_recall": 0.8157846881251136,
"eval_runtime": 5.1625,
"eval_samples_per_second": 77.288,
"eval_steps_per_second": 9.685,
"step": 1708
},
{
"epoch": 15.0,
"grad_norm": 1.0511583089828491,
"learning_rate": 1.25e-05,
"loss": 0.2811,
"step": 1830
},
{
"epoch": 15.0,
"eval_accuracy": 0.8345864661654135,
"eval_f1": 0.8042838456507522,
"eval_loss": 0.35527709126472473,
"eval_precision": 0.7990802919708029,
"eval_recall": 0.8104655391889435,
"eval_runtime": 5.0748,
"eval_samples_per_second": 78.624,
"eval_steps_per_second": 9.853,
"step": 1830
},
{
"epoch": 16.0,
"grad_norm": 7.683447360992432,
"learning_rate": 1e-05,
"loss": 0.2738,
"step": 1952
},
{
"epoch": 16.0,
"eval_accuracy": 0.8421052631578947,
"eval_f1": 0.8136136136136136,
"eval_loss": 0.35545966029167175,
"eval_precision": 0.8076923076923077,
"eval_recall": 0.820785597381342,
"eval_runtime": 5.0459,
"eval_samples_per_second": 79.075,
"eval_steps_per_second": 9.909,
"step": 1952
},
{
"epoch": 17.0,
"grad_norm": 5.83898401260376,
"learning_rate": 7.5e-06,
"loss": 0.2717,
"step": 2074
},
{
"epoch": 17.0,
"eval_accuracy": 0.849624060150376,
"eval_f1": 0.8203781512605042,
"eval_loss": 0.34679991006851196,
"eval_precision": 0.8174088828111065,
"eval_recall": 0.823604291689398,
"eval_runtime": 5.07,
"eval_samples_per_second": 78.698,
"eval_steps_per_second": 9.862,
"step": 2074
},
{
"epoch": 18.0,
"grad_norm": 4.768757343292236,
"learning_rate": 5e-06,
"loss": 0.278,
"step": 2196
},
{
"epoch": 18.0,
"eval_accuracy": 0.8421052631578947,
"eval_f1": 0.8127416435111035,
"eval_loss": 0.3509637117385864,
"eval_precision": 0.8079618768328446,
"eval_recall": 0.8182851427532278,
"eval_runtime": 5.0492,
"eval_samples_per_second": 79.022,
"eval_steps_per_second": 9.903,
"step": 2196
},
{
"epoch": 19.0,
"grad_norm": 4.281126022338867,
"learning_rate": 2.5e-06,
"loss": 0.2701,
"step": 2318
},
{
"epoch": 19.0,
"eval_accuracy": 0.8471177944862155,
"eval_f1": 0.8178232198860786,
"eval_loss": 0.34708452224731445,
"eval_precision": 0.8142125821151684,
"eval_recall": 0.8218312420440079,
"eval_runtime": 5.0554,
"eval_samples_per_second": 78.926,
"eval_steps_per_second": 9.891,
"step": 2318
},
{
"epoch": 20.0,
"grad_norm": 7.043084621429443,
"learning_rate": 0.0,
"loss": 0.2722,
"step": 2440
},
{
"epoch": 20.0,
"eval_accuracy": 0.8446115288220551,
"eval_f1": 0.8152777777777778,
"eval_loss": 0.34833839535713196,
"eval_precision": 0.8110639802050195,
"eval_recall": 0.8200581923986179,
"eval_runtime": 5.0627,
"eval_samples_per_second": 78.812,
"eval_steps_per_second": 9.876,
"step": 2440
},
{
"epoch": 20.0,
"step": 2440,
"total_flos": 7597037114448000.0,
"train_loss": 0.34670459247026286,
"train_runtime": 1954.2035,
"train_samples_per_second": 37.233,
"train_steps_per_second": 1.249
}
],
"logging_steps": 500,
"max_steps": 2440,
"num_input_tokens_seen": 0,
"num_train_epochs": 20,
"save_steps": 500,
"total_flos": 7597037114448000.0,
"train_batch_size": 30,
"trial_name": null,
"trial_params": null
}