sentiment-lora-r4a2d0.05-1 / trainer_state.json
apwic's picture
End of training
78ff675 verified
{
"best_metric": null,
"best_model_checkpoint": null,
"epoch": 20.0,
"eval_steps": 500,
"global_step": 2440,
"is_hyper_param_search": false,
"is_local_process_zero": true,
"is_world_process_zero": true,
"log_history": [
{
"epoch": 1.0,
"grad_norm": 5.044961452484131,
"learning_rate": 4.75e-05,
"loss": 0.5657,
"step": 122
},
{
"epoch": 1.0,
"eval_accuracy": 0.7243107769423559,
"eval_f1": 0.6488125720138267,
"eval_loss": 0.518221914768219,
"eval_precision": 0.660425343073667,
"eval_recall": 0.642434988179669,
"eval_runtime": 5.1435,
"eval_samples_per_second": 77.573,
"eval_steps_per_second": 9.721,
"step": 122
},
{
"epoch": 2.0,
"grad_norm": 3.9827260971069336,
"learning_rate": 4.5e-05,
"loss": 0.5109,
"step": 244
},
{
"epoch": 2.0,
"eval_accuracy": 0.7243107769423559,
"eval_f1": 0.6796350364963504,
"eval_loss": 0.5051248073577881,
"eval_precision": 0.674812030075188,
"eval_recall": 0.6874431714857246,
"eval_runtime": 5.053,
"eval_samples_per_second": 78.963,
"eval_steps_per_second": 9.895,
"step": 244
},
{
"epoch": 3.0,
"grad_norm": 3.8286046981811523,
"learning_rate": 4.25e-05,
"loss": 0.48,
"step": 366
},
{
"epoch": 3.0,
"eval_accuracy": 0.7568922305764411,
"eval_f1": 0.6947737005228665,
"eval_loss": 0.4642585515975952,
"eval_precision": 0.704743513567043,
"eval_recall": 0.6879887252227678,
"eval_runtime": 5.0487,
"eval_samples_per_second": 79.031,
"eval_steps_per_second": 9.904,
"step": 366
},
{
"epoch": 4.0,
"grad_norm": 2.501376152038574,
"learning_rate": 4e-05,
"loss": 0.434,
"step": 488
},
{
"epoch": 4.0,
"eval_accuracy": 0.7919799498746867,
"eval_f1": 0.7431297265852239,
"eval_loss": 0.4281364977359772,
"eval_precision": 0.7496659030164186,
"eval_recall": 0.7378159665393708,
"eval_runtime": 5.1077,
"eval_samples_per_second": 78.117,
"eval_steps_per_second": 9.789,
"step": 488
},
{
"epoch": 5.0,
"grad_norm": 2.9612770080566406,
"learning_rate": 3.7500000000000003e-05,
"loss": 0.4106,
"step": 610
},
{
"epoch": 5.0,
"eval_accuracy": 0.7919799498746867,
"eval_f1": 0.761811604105382,
"eval_loss": 0.4194311499595642,
"eval_precision": 0.7527992277992278,
"eval_recall": 0.777823240589198,
"eval_runtime": 5.0979,
"eval_samples_per_second": 78.267,
"eval_steps_per_second": 9.808,
"step": 610
},
{
"epoch": 6.0,
"grad_norm": 2.141845941543579,
"learning_rate": 3.5e-05,
"loss": 0.3812,
"step": 732
},
{
"epoch": 6.0,
"eval_accuracy": 0.8295739348370927,
"eval_f1": 0.785416007592534,
"eval_loss": 0.3935754895210266,
"eval_precision": 0.8008173300551531,
"eval_recall": 0.7744135297326786,
"eval_runtime": 5.0723,
"eval_samples_per_second": 78.662,
"eval_steps_per_second": 9.857,
"step": 732
},
{
"epoch": 7.0,
"grad_norm": 0.9107286930084229,
"learning_rate": 3.2500000000000004e-05,
"loss": 0.3689,
"step": 854
},
{
"epoch": 7.0,
"eval_accuracy": 0.8521303258145363,
"eval_f1": 0.8211781685593832,
"eval_loss": 0.37001386284828186,
"eval_precision": 0.8219964664310955,
"eval_recall": 0.8203764320785598,
"eval_runtime": 5.0582,
"eval_samples_per_second": 78.882,
"eval_steps_per_second": 9.885,
"step": 854
},
{
"epoch": 8.0,
"grad_norm": 4.303086280822754,
"learning_rate": 3e-05,
"loss": 0.3489,
"step": 976
},
{
"epoch": 8.0,
"eval_accuracy": 0.8345864661654135,
"eval_f1": 0.7905211912943871,
"eval_loss": 0.3656045198440552,
"eval_precision": 0.8087878787878788,
"eval_recall": 0.7779596290234588,
"eval_runtime": 5.1093,
"eval_samples_per_second": 78.093,
"eval_steps_per_second": 9.786,
"step": 976
},
{
"epoch": 9.0,
"grad_norm": 4.8105268478393555,
"learning_rate": 2.7500000000000004e-05,
"loss": 0.3502,
"step": 1098
},
{
"epoch": 9.0,
"eval_accuracy": 0.8370927318295739,
"eval_f1": 0.7954669127215085,
"eval_loss": 0.3640279769897461,
"eval_precision": 0.8101109130520895,
"eval_recall": 0.7847335879250773,
"eval_runtime": 5.0578,
"eval_samples_per_second": 78.888,
"eval_steps_per_second": 9.886,
"step": 1098
},
{
"epoch": 10.0,
"grad_norm": 7.1581597328186035,
"learning_rate": 2.5e-05,
"loss": 0.3349,
"step": 1220
},
{
"epoch": 10.0,
"eval_accuracy": 0.8345864661654135,
"eval_f1": 0.7917273014868713,
"eval_loss": 0.3607986867427826,
"eval_precision": 0.8074456774536514,
"eval_recall": 0.780460083651573,
"eval_runtime": 5.0779,
"eval_samples_per_second": 78.576,
"eval_steps_per_second": 9.847,
"step": 1220
},
{
"epoch": 11.0,
"grad_norm": 4.5321431159973145,
"learning_rate": 2.25e-05,
"loss": 0.3189,
"step": 1342
},
{
"epoch": 11.0,
"eval_accuracy": 0.8395989974937343,
"eval_f1": 0.7991821327461466,
"eval_loss": 0.3574356436729431,
"eval_precision": 0.8127623983206507,
"eval_recall": 0.7890070921985816,
"eval_runtime": 5.0826,
"eval_samples_per_second": 78.504,
"eval_steps_per_second": 9.838,
"step": 1342
},
{
"epoch": 12.0,
"grad_norm": 10.805797576904297,
"learning_rate": 2e-05,
"loss": 0.3121,
"step": 1464
},
{
"epoch": 12.0,
"eval_accuracy": 0.8471177944862155,
"eval_f1": 0.813209415123445,
"eval_loss": 0.3547250032424927,
"eval_precision": 0.8175087108013936,
"eval_recall": 0.809328968903437,
"eval_runtime": 5.0982,
"eval_samples_per_second": 78.263,
"eval_steps_per_second": 9.807,
"step": 1464
},
{
"epoch": 13.0,
"grad_norm": 4.36875057220459,
"learning_rate": 1.75e-05,
"loss": 0.3181,
"step": 1586
},
{
"epoch": 13.0,
"eval_accuracy": 0.8521303258145363,
"eval_f1": 0.8121903546212454,
"eval_loss": 0.347785085439682,
"eval_precision": 0.8331751305173232,
"eval_recall": 0.7978723404255319,
"eval_runtime": 5.0654,
"eval_samples_per_second": 78.769,
"eval_steps_per_second": 9.871,
"step": 1586
},
{
"epoch": 14.0,
"grad_norm": 10.049259185791016,
"learning_rate": 1.5e-05,
"loss": 0.3092,
"step": 1708
},
{
"epoch": 14.0,
"eval_accuracy": 0.8596491228070176,
"eval_f1": 0.8252627627627628,
"eval_loss": 0.34348130226135254,
"eval_precision": 0.8374149659863945,
"eval_recall": 0.8156937625022731,
"eval_runtime": 5.0614,
"eval_samples_per_second": 78.833,
"eval_steps_per_second": 9.879,
"step": 1708
},
{
"epoch": 15.0,
"grad_norm": 1.0126718282699585,
"learning_rate": 1.25e-05,
"loss": 0.3018,
"step": 1830
},
{
"epoch": 15.0,
"eval_accuracy": 0.8546365914786967,
"eval_f1": 0.8200130662020906,
"eval_loss": 0.34661754965782166,
"eval_precision": 0.8296312892075278,
"eval_recall": 0.812147663211493,
"eval_runtime": 5.0618,
"eval_samples_per_second": 78.825,
"eval_steps_per_second": 9.878,
"step": 1830
},
{
"epoch": 16.0,
"grad_norm": 7.444075584411621,
"learning_rate": 1e-05,
"loss": 0.2955,
"step": 1952
},
{
"epoch": 16.0,
"eval_accuracy": 0.8596491228070176,
"eval_f1": 0.8271551457392166,
"eval_loss": 0.33646759390830994,
"eval_precision": 0.8347358430876305,
"eval_recall": 0.8206946717585015,
"eval_runtime": 5.2151,
"eval_samples_per_second": 76.508,
"eval_steps_per_second": 9.588,
"step": 1952
},
{
"epoch": 17.0,
"grad_norm": 4.367713451385498,
"learning_rate": 7.5e-06,
"loss": 0.2917,
"step": 2074
},
{
"epoch": 17.0,
"eval_accuracy": 0.8596491228070176,
"eval_f1": 0.8252627627627628,
"eval_loss": 0.33527326583862305,
"eval_precision": 0.8374149659863945,
"eval_recall": 0.8156937625022731,
"eval_runtime": 5.0724,
"eval_samples_per_second": 78.661,
"eval_steps_per_second": 9.857,
"step": 2074
},
{
"epoch": 18.0,
"grad_norm": 3.2525553703308105,
"learning_rate": 5e-06,
"loss": 0.2956,
"step": 2196
},
{
"epoch": 18.0,
"eval_accuracy": 0.8596491228070176,
"eval_f1": 0.8262195121951219,
"eval_loss": 0.3378874957561493,
"eval_precision": 0.8360165151709128,
"eval_recall": 0.8181942171303873,
"eval_runtime": 5.0484,
"eval_samples_per_second": 79.035,
"eval_steps_per_second": 9.904,
"step": 2196
},
{
"epoch": 19.0,
"grad_norm": 4.5347208976745605,
"learning_rate": 2.5e-06,
"loss": 0.2899,
"step": 2318
},
{
"epoch": 19.0,
"eval_accuracy": 0.8646616541353384,
"eval_f1": 0.8305599245045612,
"eval_loss": 0.3353268504142761,
"eval_precision": 0.8454801889267909,
"eval_recall": 0.8192398617930533,
"eval_runtime": 5.0542,
"eval_samples_per_second": 78.945,
"eval_steps_per_second": 9.893,
"step": 2318
},
{
"epoch": 20.0,
"grad_norm": 8.835315704345703,
"learning_rate": 0.0,
"loss": 0.2885,
"step": 2440
},
{
"epoch": 20.0,
"eval_accuracy": 0.8621553884711779,
"eval_f1": 0.8288555273932541,
"eval_loss": 0.3355979323387146,
"eval_precision": 0.8399124219202783,
"eval_recall": 0.8199672667757774,
"eval_runtime": 5.1038,
"eval_samples_per_second": 78.178,
"eval_steps_per_second": 9.797,
"step": 2440
},
{
"epoch": 20.0,
"step": 2440,
"total_flos": 7597037114448000.0,
"train_loss": 0.3603187435963115,
"train_runtime": 1953.0761,
"train_samples_per_second": 37.254,
"train_steps_per_second": 1.249
}
],
"logging_steps": 500,
"max_steps": 2440,
"num_input_tokens_seen": 0,
"num_train_epochs": 20,
"save_steps": 500,
"total_flos": 7597037114448000.0,
"train_batch_size": 30,
"trial_name": null,
"trial_params": null
}