sentiment-lora-r2a2d0.05-1 / trainer_state.json
apwic's picture
End of training
d6c3892 verified
raw
history blame contribute delete
No virus
10.9 kB
{
"best_metric": null,
"best_model_checkpoint": null,
"epoch": 20.0,
"eval_steps": 500,
"global_step": 2440,
"is_hyper_param_search": false,
"is_local_process_zero": true,
"is_world_process_zero": true,
"log_history": [
{
"epoch": 1.0,
"grad_norm": 5.168577671051025,
"learning_rate": 4.75e-05,
"loss": 0.5663,
"step": 122
},
{
"epoch": 1.0,
"eval_accuracy": 0.7293233082706767,
"eval_f1": 0.6572164948453608,
"eval_loss": 0.5216349363327026,
"eval_precision": 0.6677272727272727,
"eval_recall": 0.6509819967266777,
"eval_runtime": 5.1733,
"eval_samples_per_second": 77.127,
"eval_steps_per_second": 9.665,
"step": 122
},
{
"epoch": 2.0,
"grad_norm": 4.071716785430908,
"learning_rate": 4.5e-05,
"loss": 0.5149,
"step": 244
},
{
"epoch": 2.0,
"eval_accuracy": 0.7243107769423559,
"eval_f1": 0.681013431013431,
"eval_loss": 0.5133972764015198,
"eval_precision": 0.6758417508417509,
"eval_recall": 0.6899436261138389,
"eval_runtime": 5.07,
"eval_samples_per_second": 78.698,
"eval_steps_per_second": 9.862,
"step": 244
},
{
"epoch": 3.0,
"grad_norm": 3.960922956466675,
"learning_rate": 4.25e-05,
"loss": 0.4925,
"step": 366
},
{
"epoch": 3.0,
"eval_accuracy": 0.7568922305764411,
"eval_f1": 0.7014049083067808,
"eval_loss": 0.48209038376808167,
"eval_precision": 0.70548914753067,
"eval_recall": 0.6979905437352246,
"eval_runtime": 5.0486,
"eval_samples_per_second": 79.032,
"eval_steps_per_second": 9.904,
"step": 366
},
{
"epoch": 4.0,
"grad_norm": 2.4121310710906982,
"learning_rate": 4e-05,
"loss": 0.4608,
"step": 488
},
{
"epoch": 4.0,
"eval_accuracy": 0.7644110275689223,
"eval_f1": 0.7114035087719299,
"eval_loss": 0.46544694900512695,
"eval_precision": 0.7149962462462462,
"eval_recall": 0.7083106019276232,
"eval_runtime": 5.0489,
"eval_samples_per_second": 79.027,
"eval_steps_per_second": 9.903,
"step": 488
},
{
"epoch": 5.0,
"grad_norm": 2.662163734436035,
"learning_rate": 3.7500000000000003e-05,
"loss": 0.4493,
"step": 610
},
{
"epoch": 5.0,
"eval_accuracy": 0.7568922305764411,
"eval_f1": 0.7193095695688436,
"eval_loss": 0.46004626154899597,
"eval_precision": 0.7126202191903377,
"eval_recall": 0.7304964539007093,
"eval_runtime": 5.0662,
"eval_samples_per_second": 78.757,
"eval_steps_per_second": 9.869,
"step": 610
},
{
"epoch": 6.0,
"grad_norm": 2.3096864223480225,
"learning_rate": 3.5e-05,
"loss": 0.4257,
"step": 732
},
{
"epoch": 6.0,
"eval_accuracy": 0.7869674185463659,
"eval_f1": 0.7369400814426992,
"eval_loss": 0.4306720495223999,
"eval_precision": 0.7433180603283696,
"eval_recall": 0.7317694126204765,
"eval_runtime": 5.0512,
"eval_samples_per_second": 78.992,
"eval_steps_per_second": 9.899,
"step": 732
},
{
"epoch": 7.0,
"grad_norm": 1.605820894241333,
"learning_rate": 3.2500000000000004e-05,
"loss": 0.4178,
"step": 854
},
{
"epoch": 7.0,
"eval_accuracy": 0.7969924812030075,
"eval_f1": 0.7580931280454488,
"eval_loss": 0.4180676341056824,
"eval_precision": 0.7551784340415459,
"eval_recall": 0.7613657028550646,
"eval_runtime": 5.0493,
"eval_samples_per_second": 79.021,
"eval_steps_per_second": 9.902,
"step": 854
},
{
"epoch": 8.0,
"grad_norm": 5.129829406738281,
"learning_rate": 3e-05,
"loss": 0.3977,
"step": 976
},
{
"epoch": 8.0,
"eval_accuracy": 0.8070175438596491,
"eval_f1": 0.7616986620127981,
"eval_loss": 0.3971773684024811,
"eval_precision": 0.7687094310805651,
"eval_recall": 0.7559556282960538,
"eval_runtime": 5.054,
"eval_samples_per_second": 78.947,
"eval_steps_per_second": 9.893,
"step": 976
},
{
"epoch": 9.0,
"grad_norm": 5.125335693359375,
"learning_rate": 2.7500000000000004e-05,
"loss": 0.3946,
"step": 1098
},
{
"epoch": 9.0,
"eval_accuracy": 0.8145363408521303,
"eval_f1": 0.7715978711553932,
"eval_loss": 0.3936789035797119,
"eval_precision": 0.7779341980385954,
"eval_recall": 0.7662756864884525,
"eval_runtime": 5.0992,
"eval_samples_per_second": 78.247,
"eval_steps_per_second": 9.805,
"step": 1098
},
{
"epoch": 10.0,
"grad_norm": 9.01931095123291,
"learning_rate": 2.5e-05,
"loss": 0.3762,
"step": 1220
},
{
"epoch": 10.0,
"eval_accuracy": 0.8245614035087719,
"eval_f1": 0.7738095238095237,
"eval_loss": 0.38743722438812256,
"eval_precision": 0.7995467004005903,
"eval_recall": 0.7583651573013275,
"eval_runtime": 5.0574,
"eval_samples_per_second": 78.895,
"eval_steps_per_second": 9.887,
"step": 1220
},
{
"epoch": 11.0,
"grad_norm": 4.317044734954834,
"learning_rate": 2.25e-05,
"loss": 0.3727,
"step": 1342
},
{
"epoch": 11.0,
"eval_accuracy": 0.8320802005012531,
"eval_f1": 0.7915149151881459,
"eval_loss": 0.3787190616130829,
"eval_precision": 0.801371627277996,
"eval_recall": 0.7836879432624113,
"eval_runtime": 5.081,
"eval_samples_per_second": 78.528,
"eval_steps_per_second": 9.841,
"step": 1342
},
{
"epoch": 12.0,
"grad_norm": 9.624013900756836,
"learning_rate": 2e-05,
"loss": 0.3626,
"step": 1464
},
{
"epoch": 12.0,
"eval_accuracy": 0.8370927318295739,
"eval_f1": 0.7999105055663995,
"eval_loss": 0.37497109174728394,
"eval_precision": 0.8058980811575966,
"eval_recall": 0.794735406437534,
"eval_runtime": 5.0584,
"eval_samples_per_second": 78.879,
"eval_steps_per_second": 9.885,
"step": 1464
},
{
"epoch": 13.0,
"grad_norm": 4.469719886779785,
"learning_rate": 1.75e-05,
"loss": 0.359,
"step": 1586
},
{
"epoch": 13.0,
"eval_accuracy": 0.8295739348370927,
"eval_f1": 0.7802721088435374,
"eval_loss": 0.37280407547950745,
"eval_precision": 0.8065570314147164,
"eval_recall": 0.7644117112202218,
"eval_runtime": 5.0511,
"eval_samples_per_second": 78.993,
"eval_steps_per_second": 9.899,
"step": 1586
},
{
"epoch": 14.0,
"grad_norm": 6.554767608642578,
"learning_rate": 1.5e-05,
"loss": 0.3488,
"step": 1708
},
{
"epoch": 14.0,
"eval_accuracy": 0.8295739348370927,
"eval_f1": 0.7816021636937343,
"eval_loss": 0.370919793844223,
"eval_precision": 0.8049342105263158,
"eval_recall": 0.7669121658483361,
"eval_runtime": 5.0593,
"eval_samples_per_second": 78.865,
"eval_steps_per_second": 9.883,
"step": 1708
},
{
"epoch": 15.0,
"grad_norm": 1.5811327695846558,
"learning_rate": 1.25e-05,
"loss": 0.3445,
"step": 1830
},
{
"epoch": 15.0,
"eval_accuracy": 0.8421052631578947,
"eval_f1": 0.8050261780104713,
"eval_loss": 0.3667002022266388,
"eval_precision": 0.8131443298969072,
"eval_recall": 0.7982815057283142,
"eval_runtime": 5.0728,
"eval_samples_per_second": 78.655,
"eval_steps_per_second": 9.856,
"step": 1830
},
{
"epoch": 16.0,
"grad_norm": 5.522835731506348,
"learning_rate": 1e-05,
"loss": 0.3344,
"step": 1952
},
{
"epoch": 16.0,
"eval_accuracy": 0.8421052631578947,
"eval_f1": 0.8039617859231819,
"eval_loss": 0.3655822277069092,
"eval_precision": 0.8142185588254234,
"eval_recall": 0.7957810511002,
"eval_runtime": 5.0487,
"eval_samples_per_second": 79.029,
"eval_steps_per_second": 9.903,
"step": 1952
},
{
"epoch": 17.0,
"grad_norm": 4.577361583709717,
"learning_rate": 7.5e-06,
"loss": 0.3339,
"step": 2074
},
{
"epoch": 17.0,
"eval_accuracy": 0.8395989974937343,
"eval_f1": 0.7991821327461466,
"eval_loss": 0.36540165543556213,
"eval_precision": 0.8127623983206507,
"eval_recall": 0.7890070921985816,
"eval_runtime": 5.0986,
"eval_samples_per_second": 78.256,
"eval_steps_per_second": 9.807,
"step": 2074
},
{
"epoch": 18.0,
"grad_norm": 2.5822412967681885,
"learning_rate": 5e-06,
"loss": 0.3357,
"step": 2196
},
{
"epoch": 18.0,
"eval_accuracy": 0.8421052631578947,
"eval_f1": 0.8028733423778791,
"eval_loss": 0.36383578181266785,
"eval_precision": 0.8154009126466754,
"eval_recall": 0.7932805964720859,
"eval_runtime": 5.0494,
"eval_samples_per_second": 79.02,
"eval_steps_per_second": 9.902,
"step": 2196
},
{
"epoch": 19.0,
"grad_norm": 4.345070838928223,
"learning_rate": 2.5e-06,
"loss": 0.3357,
"step": 2318
},
{
"epoch": 19.0,
"eval_accuracy": 0.8421052631578947,
"eval_f1": 0.8028733423778791,
"eval_loss": 0.36459243297576904,
"eval_precision": 0.8154009126466754,
"eval_recall": 0.7932805964720859,
"eval_runtime": 5.0572,
"eval_samples_per_second": 78.898,
"eval_steps_per_second": 9.887,
"step": 2318
},
{
"epoch": 20.0,
"grad_norm": 6.21327018737793,
"learning_rate": 0.0,
"loss": 0.3359,
"step": 2440
},
{
"epoch": 20.0,
"eval_accuracy": 0.8446115288220551,
"eval_f1": 0.8054576910978295,
"eval_loss": 0.3638042211532593,
"eval_precision": 0.8193059564418788,
"eval_recall": 0.795053646117476,
"eval_runtime": 5.0499,
"eval_samples_per_second": 79.011,
"eval_steps_per_second": 9.901,
"step": 2440
},
{
"epoch": 20.0,
"step": 2440,
"total_flos": 7590599775312000.0,
"train_loss": 0.39794730983796667,
"train_runtime": 1950.6296,
"train_samples_per_second": 37.301,
"train_steps_per_second": 1.251
}
],
"logging_steps": 500,
"max_steps": 2440,
"num_input_tokens_seen": 0,
"num_train_epochs": 20,
"save_steps": 500,
"total_flos": 7590599775312000.0,
"train_batch_size": 30,
"trial_name": null,
"trial_params": null
}