sentiment-lora-r4a2d0.15-0 / trainer_state.json
apwic's picture
End of training
abb446f verified
raw
history blame contribute delete
No virus
10.9 kB
{
"best_metric": null,
"best_model_checkpoint": null,
"epoch": 20.0,
"eval_steps": 500,
"global_step": 2440,
"is_hyper_param_search": false,
"is_local_process_zero": true,
"is_world_process_zero": true,
"log_history": [
{
"epoch": 1.0,
"grad_norm": 5.0601301193237305,
"learning_rate": 4.75e-05,
"loss": 0.5621,
"step": 122
},
{
"epoch": 1.0,
"eval_accuracy": 0.7218045112781954,
"eval_f1": 0.6526816032372154,
"eval_loss": 0.5100470185279846,
"eval_precision": 0.6593383311603651,
"eval_recall": 0.6481633024186215,
"eval_runtime": 5.1816,
"eval_samples_per_second": 77.003,
"eval_steps_per_second": 9.65,
"step": 122
},
{
"epoch": 2.0,
"grad_norm": 3.8218886852264404,
"learning_rate": 4.5e-05,
"loss": 0.5049,
"step": 244
},
{
"epoch": 2.0,
"eval_accuracy": 0.7343358395989975,
"eval_f1": 0.7011278195488722,
"eval_loss": 0.48896074295043945,
"eval_precision": 0.6944563758389262,
"eval_recall": 0.7195399163484271,
"eval_runtime": 5.1599,
"eval_samples_per_second": 77.327,
"eval_steps_per_second": 9.69,
"step": 244
},
{
"epoch": 3.0,
"grad_norm": 4.400880813598633,
"learning_rate": 4.25e-05,
"loss": 0.4776,
"step": 366
},
{
"epoch": 3.0,
"eval_accuracy": 0.7593984962406015,
"eval_f1": 0.7216117216117216,
"eval_loss": 0.4479529559612274,
"eval_precision": 0.7150252525252525,
"eval_recall": 0.7322695035460993,
"eval_runtime": 5.0706,
"eval_samples_per_second": 78.689,
"eval_steps_per_second": 9.861,
"step": 366
},
{
"epoch": 4.0,
"grad_norm": 2.5400054454803467,
"learning_rate": 4e-05,
"loss": 0.4422,
"step": 488
},
{
"epoch": 4.0,
"eval_accuracy": 0.7944862155388471,
"eval_f1": 0.7482456140350877,
"eval_loss": 0.4104432165622711,
"eval_precision": 0.75243993993994,
"eval_recall": 0.7445899254409893,
"eval_runtime": 5.0735,
"eval_samples_per_second": 78.644,
"eval_steps_per_second": 9.855,
"step": 488
},
{
"epoch": 5.0,
"grad_norm": 1.0803650617599487,
"learning_rate": 3.7500000000000003e-05,
"loss": 0.4146,
"step": 610
},
{
"epoch": 5.0,
"eval_accuracy": 0.7593984962406015,
"eval_f1": 0.7282941777323799,
"eval_loss": 0.42569735646247864,
"eval_precision": 0.7202380952380952,
"eval_recall": 0.7472722313147845,
"eval_runtime": 5.0563,
"eval_samples_per_second": 78.912,
"eval_steps_per_second": 9.889,
"step": 610
},
{
"epoch": 6.0,
"grad_norm": 1.867180585861206,
"learning_rate": 3.5e-05,
"loss": 0.3828,
"step": 732
},
{
"epoch": 6.0,
"eval_accuracy": 0.8245614035087719,
"eval_f1": 0.7894173351830629,
"eval_loss": 0.3869302272796631,
"eval_precision": 0.7880252100840336,
"eval_recall": 0.7908710674668122,
"eval_runtime": 5.0525,
"eval_samples_per_second": 78.971,
"eval_steps_per_second": 9.896,
"step": 732
},
{
"epoch": 7.0,
"grad_norm": 1.7762138843536377,
"learning_rate": 3.2500000000000004e-05,
"loss": 0.3697,
"step": 854
},
{
"epoch": 7.0,
"eval_accuracy": 0.8145363408521303,
"eval_f1": 0.7854090354090355,
"eval_loss": 0.3959118127822876,
"eval_precision": 0.7765993265993265,
"eval_recall": 0.7987815966539371,
"eval_runtime": 5.0493,
"eval_samples_per_second": 79.021,
"eval_steps_per_second": 9.902,
"step": 854
},
{
"epoch": 8.0,
"grad_norm": 5.161904335021973,
"learning_rate": 3e-05,
"loss": 0.3486,
"step": 976
},
{
"epoch": 8.0,
"eval_accuracy": 0.8320802005012531,
"eval_f1": 0.801779557335113,
"eval_loss": 0.38075554370880127,
"eval_precision": 0.7960927960927962,
"eval_recall": 0.8086924895435534,
"eval_runtime": 5.0702,
"eval_samples_per_second": 78.695,
"eval_steps_per_second": 9.861,
"step": 976
},
{
"epoch": 9.0,
"grad_norm": 4.554713249206543,
"learning_rate": 2.7500000000000004e-05,
"loss": 0.3437,
"step": 1098
},
{
"epoch": 9.0,
"eval_accuracy": 0.8270676691729323,
"eval_f1": 0.7949075143216848,
"eval_loss": 0.3737644851207733,
"eval_precision": 0.7904105571847508,
"eval_recall": 0.8001454809965449,
"eval_runtime": 5.0614,
"eval_samples_per_second": 78.832,
"eval_steps_per_second": 9.879,
"step": 1098
},
{
"epoch": 10.0,
"grad_norm": 6.5807719230651855,
"learning_rate": 2.5e-05,
"loss": 0.3317,
"step": 1220
},
{
"epoch": 10.0,
"eval_accuracy": 0.8471177944862155,
"eval_f1": 0.8151164115613962,
"eval_loss": 0.3643290102481842,
"eval_precision": 0.8159193371512123,
"eval_recall": 0.8143298781596654,
"eval_runtime": 5.1324,
"eval_samples_per_second": 77.742,
"eval_steps_per_second": 9.742,
"step": 1220
},
{
"epoch": 11.0,
"grad_norm": 4.961609363555908,
"learning_rate": 2.25e-05,
"loss": 0.3114,
"step": 1342
},
{
"epoch": 11.0,
"eval_accuracy": 0.8270676691729323,
"eval_f1": 0.7967966933608887,
"eval_loss": 0.36825552582740784,
"eval_precision": 0.7902444649446494,
"eval_recall": 0.8051463902527732,
"eval_runtime": 5.054,
"eval_samples_per_second": 78.948,
"eval_steps_per_second": 9.893,
"step": 1342
},
{
"epoch": 12.0,
"grad_norm": 10.724159240722656,
"learning_rate": 2e-05,
"loss": 0.3035,
"step": 1464
},
{
"epoch": 12.0,
"eval_accuracy": 0.8345864661654135,
"eval_f1": 0.8060710498409331,
"eval_loss": 0.36597177386283875,
"eval_precision": 0.7988372093023256,
"eval_recall": 0.8154664484451719,
"eval_runtime": 5.0587,
"eval_samples_per_second": 78.874,
"eval_steps_per_second": 9.884,
"step": 1464
},
{
"epoch": 13.0,
"grad_norm": 1.6210856437683105,
"learning_rate": 1.75e-05,
"loss": 0.3117,
"step": 1586
},
{
"epoch": 13.0,
"eval_accuracy": 0.8471177944862155,
"eval_f1": 0.8141734808401475,
"eval_loss": 0.3518490195274353,
"eval_precision": 0.8166666666666667,
"eval_recall": 0.8118294235315512,
"eval_runtime": 5.0476,
"eval_samples_per_second": 79.048,
"eval_steps_per_second": 9.906,
"step": 1586
},
{
"epoch": 14.0,
"grad_norm": 12.59556770324707,
"learning_rate": 1.5e-05,
"loss": 0.3048,
"step": 1708
},
{
"epoch": 14.0,
"eval_accuracy": 0.8446115288220551,
"eval_f1": 0.814390756302521,
"eval_loss": 0.3533446788787842,
"eval_precision": 0.8114780902550687,
"eval_recall": 0.8175577377705037,
"eval_runtime": 5.0775,
"eval_samples_per_second": 78.581,
"eval_steps_per_second": 9.847,
"step": 1708
},
{
"epoch": 15.0,
"grad_norm": 1.0728732347488403,
"learning_rate": 1.25e-05,
"loss": 0.2916,
"step": 1830
},
{
"epoch": 15.0,
"eval_accuracy": 0.8421052631578947,
"eval_f1": 0.8118502107020158,
"eval_loss": 0.35696789622306824,
"eval_precision": 0.8083091673078061,
"eval_recall": 0.8157846881251136,
"eval_runtime": 5.079,
"eval_samples_per_second": 78.558,
"eval_steps_per_second": 9.844,
"step": 1830
},
{
"epoch": 16.0,
"grad_norm": 6.875555992126465,
"learning_rate": 1e-05,
"loss": 0.2832,
"step": 1952
},
{
"epoch": 16.0,
"eval_accuracy": 0.8471177944862155,
"eval_f1": 0.8186863532409097,
"eval_loss": 0.35785549879074097,
"eval_precision": 0.8138123167155425,
"eval_recall": 0.8243316966721222,
"eval_runtime": 5.0553,
"eval_samples_per_second": 78.928,
"eval_steps_per_second": 9.891,
"step": 1952
},
{
"epoch": 17.0,
"grad_norm": 4.91969108581543,
"learning_rate": 7.5e-06,
"loss": 0.284,
"step": 2074
},
{
"epoch": 17.0,
"eval_accuracy": 0.8471177944862155,
"eval_f1": 0.8160386984618873,
"eval_loss": 0.3496108949184418,
"eval_precision": 0.8152632848784607,
"eval_recall": 0.8168303327877796,
"eval_runtime": 5.0785,
"eval_samples_per_second": 78.566,
"eval_steps_per_second": 9.845,
"step": 2074
},
{
"epoch": 18.0,
"grad_norm": 3.59561824798584,
"learning_rate": 5e-06,
"loss": 0.2906,
"step": 2196
},
{
"epoch": 18.0,
"eval_accuracy": 0.8446115288220551,
"eval_f1": 0.8152777777777778,
"eval_loss": 0.35370829701423645,
"eval_precision": 0.8110639802050195,
"eval_recall": 0.8200581923986179,
"eval_runtime": 5.1122,
"eval_samples_per_second": 78.049,
"eval_steps_per_second": 9.781,
"step": 2196
},
{
"epoch": 19.0,
"grad_norm": 4.449235916137695,
"learning_rate": 2.5e-06,
"loss": 0.2805,
"step": 2318
},
{
"epoch": 19.0,
"eval_accuracy": 0.849624060150376,
"eval_f1": 0.8186033824331697,
"eval_loss": 0.3505256772041321,
"eval_precision": 0.8186033824331697,
"eval_recall": 0.8186033824331697,
"eval_runtime": 5.0584,
"eval_samples_per_second": 78.878,
"eval_steps_per_second": 9.884,
"step": 2318
},
{
"epoch": 20.0,
"grad_norm": 8.314062118530273,
"learning_rate": 0.0,
"loss": 0.2815,
"step": 2440
},
{
"epoch": 20.0,
"eval_accuracy": 0.8471177944862155,
"eval_f1": 0.8169408154516122,
"eval_loss": 0.35131508111953735,
"eval_precision": 0.814695340501792,
"eval_recall": 0.8193307874158937,
"eval_runtime": 5.0622,
"eval_samples_per_second": 78.82,
"eval_steps_per_second": 9.877,
"step": 2440
},
{
"epoch": 20.0,
"step": 2440,
"total_flos": 7597037114448000.0,
"train_loss": 0.3560234429406338,
"train_runtime": 1955.3923,
"train_samples_per_second": 37.21,
"train_steps_per_second": 1.248
}
],
"logging_steps": 500,
"max_steps": 2440,
"num_input_tokens_seen": 0,
"num_train_epochs": 20,
"save_steps": 500,
"total_flos": 7597037114448000.0,
"train_batch_size": 30,
"trial_name": null,
"trial_params": null
}