sentiment-lora-r4a1d0.05-1 / trainer_state.json
apwic's picture
End of training
741e2db verified
raw
history blame contribute delete
No virus
10.9 kB
{
"best_metric": null,
"best_model_checkpoint": null,
"epoch": 20.0,
"eval_steps": 500,
"global_step": 2440,
"is_hyper_param_search": false,
"is_local_process_zero": true,
"is_world_process_zero": true,
"log_history": [
{
"epoch": 1.0,
"grad_norm": 5.044961452484131,
"learning_rate": 4.75e-05,
"loss": 0.5657,
"step": 122
},
{
"epoch": 1.0,
"eval_accuracy": 0.7243107769423559,
"eval_f1": 0.6488125720138267,
"eval_loss": 0.518221914768219,
"eval_precision": 0.660425343073667,
"eval_recall": 0.642434988179669,
"eval_runtime": 5.1317,
"eval_samples_per_second": 77.752,
"eval_steps_per_second": 9.743,
"step": 122
},
{
"epoch": 2.0,
"grad_norm": 3.9827260971069336,
"learning_rate": 4.5e-05,
"loss": 0.5109,
"step": 244
},
{
"epoch": 2.0,
"eval_accuracy": 0.7243107769423559,
"eval_f1": 0.6796350364963504,
"eval_loss": 0.5051248073577881,
"eval_precision": 0.674812030075188,
"eval_recall": 0.6874431714857246,
"eval_runtime": 5.0559,
"eval_samples_per_second": 78.917,
"eval_steps_per_second": 9.889,
"step": 244
},
{
"epoch": 3.0,
"grad_norm": 3.8286046981811523,
"learning_rate": 4.25e-05,
"loss": 0.48,
"step": 366
},
{
"epoch": 3.0,
"eval_accuracy": 0.7568922305764411,
"eval_f1": 0.6947737005228665,
"eval_loss": 0.4642585515975952,
"eval_precision": 0.704743513567043,
"eval_recall": 0.6879887252227678,
"eval_runtime": 5.0508,
"eval_samples_per_second": 78.997,
"eval_steps_per_second": 9.899,
"step": 366
},
{
"epoch": 4.0,
"grad_norm": 2.501376152038574,
"learning_rate": 4e-05,
"loss": 0.434,
"step": 488
},
{
"epoch": 4.0,
"eval_accuracy": 0.7919799498746867,
"eval_f1": 0.7431297265852239,
"eval_loss": 0.4281364977359772,
"eval_precision": 0.7496659030164186,
"eval_recall": 0.7378159665393708,
"eval_runtime": 5.1744,
"eval_samples_per_second": 77.111,
"eval_steps_per_second": 9.663,
"step": 488
},
{
"epoch": 5.0,
"grad_norm": 2.9612770080566406,
"learning_rate": 3.7500000000000003e-05,
"loss": 0.4106,
"step": 610
},
{
"epoch": 5.0,
"eval_accuracy": 0.7919799498746867,
"eval_f1": 0.761811604105382,
"eval_loss": 0.4194311499595642,
"eval_precision": 0.7527992277992278,
"eval_recall": 0.777823240589198,
"eval_runtime": 5.0693,
"eval_samples_per_second": 78.709,
"eval_steps_per_second": 9.863,
"step": 610
},
{
"epoch": 6.0,
"grad_norm": 2.141845941543579,
"learning_rate": 3.5e-05,
"loss": 0.3812,
"step": 732
},
{
"epoch": 6.0,
"eval_accuracy": 0.8295739348370927,
"eval_f1": 0.785416007592534,
"eval_loss": 0.3935754895210266,
"eval_precision": 0.8008173300551531,
"eval_recall": 0.7744135297326786,
"eval_runtime": 5.0513,
"eval_samples_per_second": 78.989,
"eval_steps_per_second": 9.898,
"step": 732
},
{
"epoch": 7.0,
"grad_norm": 0.9107286930084229,
"learning_rate": 3.2500000000000004e-05,
"loss": 0.3689,
"step": 854
},
{
"epoch": 7.0,
"eval_accuracy": 0.8521303258145363,
"eval_f1": 0.8211781685593832,
"eval_loss": 0.37001386284828186,
"eval_precision": 0.8219964664310955,
"eval_recall": 0.8203764320785598,
"eval_runtime": 5.0579,
"eval_samples_per_second": 78.886,
"eval_steps_per_second": 9.885,
"step": 854
},
{
"epoch": 8.0,
"grad_norm": 4.303086280822754,
"learning_rate": 3e-05,
"loss": 0.3489,
"step": 976
},
{
"epoch": 8.0,
"eval_accuracy": 0.8345864661654135,
"eval_f1": 0.7905211912943871,
"eval_loss": 0.3656045198440552,
"eval_precision": 0.8087878787878788,
"eval_recall": 0.7779596290234588,
"eval_runtime": 5.1317,
"eval_samples_per_second": 77.752,
"eval_steps_per_second": 9.743,
"step": 976
},
{
"epoch": 9.0,
"grad_norm": 4.8105268478393555,
"learning_rate": 2.7500000000000004e-05,
"loss": 0.3502,
"step": 1098
},
{
"epoch": 9.0,
"eval_accuracy": 0.8370927318295739,
"eval_f1": 0.7954669127215085,
"eval_loss": 0.3640279769897461,
"eval_precision": 0.8101109130520895,
"eval_recall": 0.7847335879250773,
"eval_runtime": 5.0641,
"eval_samples_per_second": 78.79,
"eval_steps_per_second": 9.873,
"step": 1098
},
{
"epoch": 10.0,
"grad_norm": 7.1581597328186035,
"learning_rate": 2.5e-05,
"loss": 0.3349,
"step": 1220
},
{
"epoch": 10.0,
"eval_accuracy": 0.8345864661654135,
"eval_f1": 0.7917273014868713,
"eval_loss": 0.3607986867427826,
"eval_precision": 0.8074456774536514,
"eval_recall": 0.780460083651573,
"eval_runtime": 5.0646,
"eval_samples_per_second": 78.782,
"eval_steps_per_second": 9.872,
"step": 1220
},
{
"epoch": 11.0,
"grad_norm": 4.5321431159973145,
"learning_rate": 2.25e-05,
"loss": 0.3189,
"step": 1342
},
{
"epoch": 11.0,
"eval_accuracy": 0.8395989974937343,
"eval_f1": 0.7991821327461466,
"eval_loss": 0.3574356436729431,
"eval_precision": 0.8127623983206507,
"eval_recall": 0.7890070921985816,
"eval_runtime": 5.0619,
"eval_samples_per_second": 78.824,
"eval_steps_per_second": 9.878,
"step": 1342
},
{
"epoch": 12.0,
"grad_norm": 10.805797576904297,
"learning_rate": 2e-05,
"loss": 0.3121,
"step": 1464
},
{
"epoch": 12.0,
"eval_accuracy": 0.8471177944862155,
"eval_f1": 0.813209415123445,
"eval_loss": 0.3547250032424927,
"eval_precision": 0.8175087108013936,
"eval_recall": 0.809328968903437,
"eval_runtime": 5.0575,
"eval_samples_per_second": 78.893,
"eval_steps_per_second": 9.886,
"step": 1464
},
{
"epoch": 13.0,
"grad_norm": 4.36875057220459,
"learning_rate": 1.75e-05,
"loss": 0.3181,
"step": 1586
},
{
"epoch": 13.0,
"eval_accuracy": 0.8521303258145363,
"eval_f1": 0.8121903546212454,
"eval_loss": 0.347785085439682,
"eval_precision": 0.8331751305173232,
"eval_recall": 0.7978723404255319,
"eval_runtime": 5.0847,
"eval_samples_per_second": 78.471,
"eval_steps_per_second": 9.833,
"step": 1586
},
{
"epoch": 14.0,
"grad_norm": 10.049259185791016,
"learning_rate": 1.5e-05,
"loss": 0.3092,
"step": 1708
},
{
"epoch": 14.0,
"eval_accuracy": 0.8596491228070176,
"eval_f1": 0.8252627627627628,
"eval_loss": 0.34348130226135254,
"eval_precision": 0.8374149659863945,
"eval_recall": 0.8156937625022731,
"eval_runtime": 5.0603,
"eval_samples_per_second": 78.849,
"eval_steps_per_second": 9.881,
"step": 1708
},
{
"epoch": 15.0,
"grad_norm": 1.0126718282699585,
"learning_rate": 1.25e-05,
"loss": 0.3018,
"step": 1830
},
{
"epoch": 15.0,
"eval_accuracy": 0.8546365914786967,
"eval_f1": 0.8200130662020906,
"eval_loss": 0.34661754965782166,
"eval_precision": 0.8296312892075278,
"eval_recall": 0.812147663211493,
"eval_runtime": 5.0762,
"eval_samples_per_second": 78.603,
"eval_steps_per_second": 9.85,
"step": 1830
},
{
"epoch": 16.0,
"grad_norm": 7.444075584411621,
"learning_rate": 1e-05,
"loss": 0.2955,
"step": 1952
},
{
"epoch": 16.0,
"eval_accuracy": 0.8596491228070176,
"eval_f1": 0.8271551457392166,
"eval_loss": 0.33646759390830994,
"eval_precision": 0.8347358430876305,
"eval_recall": 0.8206946717585015,
"eval_runtime": 5.138,
"eval_samples_per_second": 77.657,
"eval_steps_per_second": 9.731,
"step": 1952
},
{
"epoch": 17.0,
"grad_norm": 4.367713451385498,
"learning_rate": 7.5e-06,
"loss": 0.2917,
"step": 2074
},
{
"epoch": 17.0,
"eval_accuracy": 0.8596491228070176,
"eval_f1": 0.8252627627627628,
"eval_loss": 0.33527326583862305,
"eval_precision": 0.8374149659863945,
"eval_recall": 0.8156937625022731,
"eval_runtime": 5.05,
"eval_samples_per_second": 79.01,
"eval_steps_per_second": 9.901,
"step": 2074
},
{
"epoch": 18.0,
"grad_norm": 3.2525553703308105,
"learning_rate": 5e-06,
"loss": 0.2956,
"step": 2196
},
{
"epoch": 18.0,
"eval_accuracy": 0.8596491228070176,
"eval_f1": 0.8262195121951219,
"eval_loss": 0.3378874957561493,
"eval_precision": 0.8360165151709128,
"eval_recall": 0.8181942171303873,
"eval_runtime": 5.0525,
"eval_samples_per_second": 78.971,
"eval_steps_per_second": 9.896,
"step": 2196
},
{
"epoch": 19.0,
"grad_norm": 4.5347208976745605,
"learning_rate": 2.5e-06,
"loss": 0.2899,
"step": 2318
},
{
"epoch": 19.0,
"eval_accuracy": 0.8646616541353384,
"eval_f1": 0.8305599245045612,
"eval_loss": 0.3353268504142761,
"eval_precision": 0.8454801889267909,
"eval_recall": 0.8192398617930533,
"eval_runtime": 5.0467,
"eval_samples_per_second": 79.061,
"eval_steps_per_second": 9.907,
"step": 2318
},
{
"epoch": 20.0,
"grad_norm": 8.835315704345703,
"learning_rate": 0.0,
"loss": 0.2885,
"step": 2440
},
{
"epoch": 20.0,
"eval_accuracy": 0.8621553884711779,
"eval_f1": 0.8288555273932541,
"eval_loss": 0.3355979323387146,
"eval_precision": 0.8399124219202783,
"eval_recall": 0.8199672667757774,
"eval_runtime": 5.0576,
"eval_samples_per_second": 78.891,
"eval_steps_per_second": 9.886,
"step": 2440
},
{
"epoch": 20.0,
"step": 2440,
"total_flos": 7597037114448000.0,
"train_loss": 0.3603187435963115,
"train_runtime": 1953.3721,
"train_samples_per_second": 37.248,
"train_steps_per_second": 1.249
}
],
"logging_steps": 500,
"max_steps": 2440,
"num_input_tokens_seen": 0,
"num_train_epochs": 20,
"save_steps": 500,
"total_flos": 7597037114448000.0,
"train_batch_size": 30,
"trial_name": null,
"trial_params": null
}