sentiment-lora-r2a0d0.05-1 / trainer_state.json
apwic's picture
End of training
4fd590e verified
raw
history blame contribute delete
No virus
10.9 kB
{
"best_metric": null,
"best_model_checkpoint": null,
"epoch": 20.0,
"eval_steps": 500,
"global_step": 2440,
"is_hyper_param_search": false,
"is_local_process_zero": true,
"is_world_process_zero": true,
"log_history": [
{
"epoch": 1.0,
"grad_norm": 5.168577671051025,
"learning_rate": 4.75e-05,
"loss": 0.5663,
"step": 122
},
{
"epoch": 1.0,
"eval_accuracy": 0.7293233082706767,
"eval_f1": 0.6572164948453608,
"eval_loss": 0.5216349363327026,
"eval_precision": 0.6677272727272727,
"eval_recall": 0.6509819967266777,
"eval_runtime": 4.6427,
"eval_samples_per_second": 85.941,
"eval_steps_per_second": 10.769,
"step": 122
},
{
"epoch": 2.0,
"grad_norm": 4.071716785430908,
"learning_rate": 4.5e-05,
"loss": 0.5149,
"step": 244
},
{
"epoch": 2.0,
"eval_accuracy": 0.7243107769423559,
"eval_f1": 0.681013431013431,
"eval_loss": 0.5133972764015198,
"eval_precision": 0.6758417508417509,
"eval_recall": 0.6899436261138389,
"eval_runtime": 5.1009,
"eval_samples_per_second": 78.221,
"eval_steps_per_second": 9.802,
"step": 244
},
{
"epoch": 3.0,
"grad_norm": 3.960922956466675,
"learning_rate": 4.25e-05,
"loss": 0.4925,
"step": 366
},
{
"epoch": 3.0,
"eval_accuracy": 0.7568922305764411,
"eval_f1": 0.7014049083067808,
"eval_loss": 0.48209038376808167,
"eval_precision": 0.70548914753067,
"eval_recall": 0.6979905437352246,
"eval_runtime": 5.0731,
"eval_samples_per_second": 78.65,
"eval_steps_per_second": 9.856,
"step": 366
},
{
"epoch": 4.0,
"grad_norm": 2.4121310710906982,
"learning_rate": 4e-05,
"loss": 0.4608,
"step": 488
},
{
"epoch": 4.0,
"eval_accuracy": 0.7644110275689223,
"eval_f1": 0.7114035087719299,
"eval_loss": 0.46544694900512695,
"eval_precision": 0.7149962462462462,
"eval_recall": 0.7083106019276232,
"eval_runtime": 5.1025,
"eval_samples_per_second": 78.197,
"eval_steps_per_second": 9.799,
"step": 488
},
{
"epoch": 5.0,
"grad_norm": 2.662163734436035,
"learning_rate": 3.7500000000000003e-05,
"loss": 0.4493,
"step": 610
},
{
"epoch": 5.0,
"eval_accuracy": 0.7568922305764411,
"eval_f1": 0.7193095695688436,
"eval_loss": 0.46004626154899597,
"eval_precision": 0.7126202191903377,
"eval_recall": 0.7304964539007093,
"eval_runtime": 5.0734,
"eval_samples_per_second": 78.645,
"eval_steps_per_second": 9.855,
"step": 610
},
{
"epoch": 6.0,
"grad_norm": 2.3096864223480225,
"learning_rate": 3.5e-05,
"loss": 0.4257,
"step": 732
},
{
"epoch": 6.0,
"eval_accuracy": 0.7869674185463659,
"eval_f1": 0.7369400814426992,
"eval_loss": 0.4306720495223999,
"eval_precision": 0.7433180603283696,
"eval_recall": 0.7317694126204765,
"eval_runtime": 5.1042,
"eval_samples_per_second": 78.17,
"eval_steps_per_second": 9.796,
"step": 732
},
{
"epoch": 7.0,
"grad_norm": 1.605820894241333,
"learning_rate": 3.2500000000000004e-05,
"loss": 0.4178,
"step": 854
},
{
"epoch": 7.0,
"eval_accuracy": 0.7969924812030075,
"eval_f1": 0.7580931280454488,
"eval_loss": 0.4180676341056824,
"eval_precision": 0.7551784340415459,
"eval_recall": 0.7613657028550646,
"eval_runtime": 5.0532,
"eval_samples_per_second": 78.959,
"eval_steps_per_second": 9.895,
"step": 854
},
{
"epoch": 8.0,
"grad_norm": 5.129829406738281,
"learning_rate": 3e-05,
"loss": 0.3977,
"step": 976
},
{
"epoch": 8.0,
"eval_accuracy": 0.8070175438596491,
"eval_f1": 0.7616986620127981,
"eval_loss": 0.3971773684024811,
"eval_precision": 0.7687094310805651,
"eval_recall": 0.7559556282960538,
"eval_runtime": 5.047,
"eval_samples_per_second": 79.057,
"eval_steps_per_second": 9.907,
"step": 976
},
{
"epoch": 9.0,
"grad_norm": 5.125335693359375,
"learning_rate": 2.7500000000000004e-05,
"loss": 0.3946,
"step": 1098
},
{
"epoch": 9.0,
"eval_accuracy": 0.8145363408521303,
"eval_f1": 0.7715978711553932,
"eval_loss": 0.3936789035797119,
"eval_precision": 0.7779341980385954,
"eval_recall": 0.7662756864884525,
"eval_runtime": 5.0565,
"eval_samples_per_second": 78.909,
"eval_steps_per_second": 9.888,
"step": 1098
},
{
"epoch": 10.0,
"grad_norm": 9.01931095123291,
"learning_rate": 2.5e-05,
"loss": 0.3762,
"step": 1220
},
{
"epoch": 10.0,
"eval_accuracy": 0.8245614035087719,
"eval_f1": 0.7738095238095237,
"eval_loss": 0.38743722438812256,
"eval_precision": 0.7995467004005903,
"eval_recall": 0.7583651573013275,
"eval_runtime": 5.1497,
"eval_samples_per_second": 77.481,
"eval_steps_per_second": 9.709,
"step": 1220
},
{
"epoch": 11.0,
"grad_norm": 4.317044734954834,
"learning_rate": 2.25e-05,
"loss": 0.3727,
"step": 1342
},
{
"epoch": 11.0,
"eval_accuracy": 0.8320802005012531,
"eval_f1": 0.7915149151881459,
"eval_loss": 0.3787190616130829,
"eval_precision": 0.801371627277996,
"eval_recall": 0.7836879432624113,
"eval_runtime": 5.0708,
"eval_samples_per_second": 78.687,
"eval_steps_per_second": 9.86,
"step": 1342
},
{
"epoch": 12.0,
"grad_norm": 9.624013900756836,
"learning_rate": 2e-05,
"loss": 0.3626,
"step": 1464
},
{
"epoch": 12.0,
"eval_accuracy": 0.8370927318295739,
"eval_f1": 0.7999105055663995,
"eval_loss": 0.37497109174728394,
"eval_precision": 0.8058980811575966,
"eval_recall": 0.794735406437534,
"eval_runtime": 5.0526,
"eval_samples_per_second": 78.97,
"eval_steps_per_second": 9.896,
"step": 1464
},
{
"epoch": 13.0,
"grad_norm": 4.469719886779785,
"learning_rate": 1.75e-05,
"loss": 0.359,
"step": 1586
},
{
"epoch": 13.0,
"eval_accuracy": 0.8295739348370927,
"eval_f1": 0.7802721088435374,
"eval_loss": 0.37280407547950745,
"eval_precision": 0.8065570314147164,
"eval_recall": 0.7644117112202218,
"eval_runtime": 5.0466,
"eval_samples_per_second": 79.063,
"eval_steps_per_second": 9.908,
"step": 1586
},
{
"epoch": 14.0,
"grad_norm": 6.554767608642578,
"learning_rate": 1.5e-05,
"loss": 0.3488,
"step": 1708
},
{
"epoch": 14.0,
"eval_accuracy": 0.8295739348370927,
"eval_f1": 0.7816021636937343,
"eval_loss": 0.370919793844223,
"eval_precision": 0.8049342105263158,
"eval_recall": 0.7669121658483361,
"eval_runtime": 5.0529,
"eval_samples_per_second": 78.965,
"eval_steps_per_second": 9.895,
"step": 1708
},
{
"epoch": 15.0,
"grad_norm": 1.5811327695846558,
"learning_rate": 1.25e-05,
"loss": 0.3445,
"step": 1830
},
{
"epoch": 15.0,
"eval_accuracy": 0.8421052631578947,
"eval_f1": 0.8050261780104713,
"eval_loss": 0.3667002022266388,
"eval_precision": 0.8131443298969072,
"eval_recall": 0.7982815057283142,
"eval_runtime": 5.0467,
"eval_samples_per_second": 79.061,
"eval_steps_per_second": 9.907,
"step": 1830
},
{
"epoch": 16.0,
"grad_norm": 5.522835731506348,
"learning_rate": 1e-05,
"loss": 0.3344,
"step": 1952
},
{
"epoch": 16.0,
"eval_accuracy": 0.8421052631578947,
"eval_f1": 0.8039617859231819,
"eval_loss": 0.3655822277069092,
"eval_precision": 0.8142185588254234,
"eval_recall": 0.7957810511002,
"eval_runtime": 5.05,
"eval_samples_per_second": 79.01,
"eval_steps_per_second": 9.901,
"step": 1952
},
{
"epoch": 17.0,
"grad_norm": 4.577361583709717,
"learning_rate": 7.5e-06,
"loss": 0.3339,
"step": 2074
},
{
"epoch": 17.0,
"eval_accuracy": 0.8395989974937343,
"eval_f1": 0.7991821327461466,
"eval_loss": 0.36540165543556213,
"eval_precision": 0.8127623983206507,
"eval_recall": 0.7890070921985816,
"eval_runtime": 5.0524,
"eval_samples_per_second": 78.973,
"eval_steps_per_second": 9.896,
"step": 2074
},
{
"epoch": 18.0,
"grad_norm": 2.5822412967681885,
"learning_rate": 5e-06,
"loss": 0.3357,
"step": 2196
},
{
"epoch": 18.0,
"eval_accuracy": 0.8421052631578947,
"eval_f1": 0.8028733423778791,
"eval_loss": 0.36383578181266785,
"eval_precision": 0.8154009126466754,
"eval_recall": 0.7932805964720859,
"eval_runtime": 5.0893,
"eval_samples_per_second": 78.399,
"eval_steps_per_second": 9.824,
"step": 2196
},
{
"epoch": 19.0,
"grad_norm": 4.345070838928223,
"learning_rate": 2.5e-06,
"loss": 0.3357,
"step": 2318
},
{
"epoch": 19.0,
"eval_accuracy": 0.8421052631578947,
"eval_f1": 0.8028733423778791,
"eval_loss": 0.36459243297576904,
"eval_precision": 0.8154009126466754,
"eval_recall": 0.7932805964720859,
"eval_runtime": 5.0505,
"eval_samples_per_second": 79.002,
"eval_steps_per_second": 9.9,
"step": 2318
},
{
"epoch": 20.0,
"grad_norm": 6.21327018737793,
"learning_rate": 0.0,
"loss": 0.3359,
"step": 2440
},
{
"epoch": 20.0,
"eval_accuracy": 0.8446115288220551,
"eval_f1": 0.8054576910978295,
"eval_loss": 0.3638042211532593,
"eval_precision": 0.8193059564418788,
"eval_recall": 0.795053646117476,
"eval_runtime": 5.0736,
"eval_samples_per_second": 78.643,
"eval_steps_per_second": 9.855,
"step": 2440
},
{
"epoch": 20.0,
"step": 2440,
"total_flos": 7590599775312000.0,
"train_loss": 0.39794730983796667,
"train_runtime": 1936.5815,
"train_samples_per_second": 37.571,
"train_steps_per_second": 1.26
}
],
"logging_steps": 500,
"max_steps": 2440,
"num_input_tokens_seen": 0,
"num_train_epochs": 20,
"save_steps": 500,
"total_flos": 7590599775312000.0,
"train_batch_size": 30,
"trial_name": null,
"trial_params": null
}