sentiment-base-2 / trainer_state.json
apwic's picture
End of training
b6f82ef verified
{
"best_metric": null,
"best_model_checkpoint": null,
"epoch": 20.0,
"eval_steps": 500,
"global_step": 2440,
"is_hyper_param_search": false,
"is_local_process_zero": true,
"is_world_process_zero": true,
"log_history": [
{
"epoch": 1.0,
"grad_norm": 20.148401260375977,
"learning_rate": 4.75e-05,
"loss": 0.3808,
"step": 122
},
{
"epoch": 1.0,
"eval_accuracy": 0.8646616541353384,
"eval_f1": 0.8186363636363636,
"eval_loss": 0.379351943731308,
"eval_precision": 0.8736897274633124,
"eval_recall": 0.7917348608837971,
"eval_runtime": 1.6409,
"eval_samples_per_second": 243.152,
"eval_steps_per_second": 30.47,
"step": 122
},
{
"epoch": 2.0,
"grad_norm": 21.554689407348633,
"learning_rate": 4.5e-05,
"loss": 0.221,
"step": 244
},
{
"epoch": 2.0,
"eval_accuracy": 0.8721804511278195,
"eval_f1": 0.8395201930584144,
"eval_loss": 0.2850644886493683,
"eval_precision": 0.8562091503267973,
"eval_recall": 0.8270594653573378,
"eval_runtime": 1.6504,
"eval_samples_per_second": 241.765,
"eval_steps_per_second": 30.296,
"step": 244
},
{
"epoch": 3.0,
"grad_norm": 0.09420396387577057,
"learning_rate": 4.25e-05,
"loss": 0.1363,
"step": 366
},
{
"epoch": 3.0,
"eval_accuracy": 0.8947368421052632,
"eval_f1": 0.8717238211879976,
"eval_loss": 0.38322028517723083,
"eval_precision": 0.8757194133300328,
"eval_recall": 0.8680214584469903,
"eval_runtime": 1.6524,
"eval_samples_per_second": 241.46,
"eval_steps_per_second": 30.258,
"step": 366
},
{
"epoch": 4.0,
"grad_norm": 7.119666576385498,
"learning_rate": 4e-05,
"loss": 0.099,
"step": 488
},
{
"epoch": 4.0,
"eval_accuracy": 0.8972431077694235,
"eval_f1": 0.8717112228173498,
"eval_loss": 0.4968295693397522,
"eval_precision": 0.8869295958279009,
"eval_recall": 0.8597926895799237,
"eval_runtime": 1.6515,
"eval_samples_per_second": 241.593,
"eval_steps_per_second": 30.275,
"step": 488
},
{
"epoch": 5.0,
"grad_norm": 0.1547642946243286,
"learning_rate": 3.7500000000000003e-05,
"loss": 0.0702,
"step": 610
},
{
"epoch": 5.0,
"eval_accuracy": 0.8696741854636592,
"eval_f1": 0.8377439939939939,
"eval_loss": 0.5204734802246094,
"eval_precision": 0.8503401360544218,
"eval_recall": 0.8277868703400618,
"eval_runtime": 1.6524,
"eval_samples_per_second": 241.469,
"eval_steps_per_second": 30.259,
"step": 610
},
{
"epoch": 6.0,
"grad_norm": 0.08600271493196487,
"learning_rate": 3.5e-05,
"loss": 0.0469,
"step": 732
},
{
"epoch": 6.0,
"eval_accuracy": 0.87468671679198,
"eval_f1": 0.8448388501742161,
"eval_loss": 0.5740100741386414,
"eval_precision": 0.8551721930610677,
"eval_recall": 0.8363338788870704,
"eval_runtime": 1.6555,
"eval_samples_per_second": 241.009,
"eval_steps_per_second": 30.202,
"step": 732
},
{
"epoch": 7.0,
"grad_norm": 0.024254148826003075,
"learning_rate": 3.2500000000000004e-05,
"loss": 0.0328,
"step": 854
},
{
"epoch": 7.0,
"eval_accuracy": 0.8847117794486216,
"eval_f1": 0.8629480286738351,
"eval_loss": 0.6011895537376404,
"eval_precision": 0.8580770590314599,
"eval_recall": 0.8684306237497728,
"eval_runtime": 1.6578,
"eval_samples_per_second": 240.677,
"eval_steps_per_second": 30.16,
"step": 854
},
{
"epoch": 8.0,
"grad_norm": 0.03784336522221565,
"learning_rate": 3e-05,
"loss": 0.0284,
"step": 976
},
{
"epoch": 8.0,
"eval_accuracy": 0.8972431077694235,
"eval_f1": 0.8737897035111135,
"eval_loss": 0.5402500033378601,
"eval_precision": 0.8812047813777917,
"eval_recall": 0.8672940534642661,
"eval_runtime": 1.6746,
"eval_samples_per_second": 238.262,
"eval_steps_per_second": 29.857,
"step": 976
},
{
"epoch": 9.0,
"grad_norm": 0.014071076177060604,
"learning_rate": 2.7500000000000004e-05,
"loss": 0.019,
"step": 1098
},
{
"epoch": 9.0,
"eval_accuracy": 0.8922305764411027,
"eval_f1": 0.8727838950061173,
"eval_loss": 0.5908846259117126,
"eval_precision": 0.8656898656898657,
"eval_recall": 0.8812511365702855,
"eval_runtime": 1.6539,
"eval_samples_per_second": 241.244,
"eval_steps_per_second": 30.231,
"step": 1098
},
{
"epoch": 10.0,
"grad_norm": 0.037436336278915405,
"learning_rate": 2.5e-05,
"loss": 0.016,
"step": 1220
},
{
"epoch": 10.0,
"eval_accuracy": 0.8822055137844611,
"eval_f1": 0.8521068445832446,
"eval_loss": 0.8931390047073364,
"eval_precision": 0.8693800752624282,
"eval_recall": 0.8391525731951264,
"eval_runtime": 1.6526,
"eval_samples_per_second": 241.431,
"eval_steps_per_second": 30.254,
"step": 1220
},
{
"epoch": 11.0,
"grad_norm": 0.01795610599219799,
"learning_rate": 2.25e-05,
"loss": 0.0167,
"step": 1342
},
{
"epoch": 11.0,
"eval_accuracy": 0.8972431077694235,
"eval_f1": 0.8751002084335417,
"eval_loss": 0.6617795825004578,
"eval_precision": 0.8780701754385964,
"eval_recall": 0.8722949627204946,
"eval_runtime": 1.6571,
"eval_samples_per_second": 240.783,
"eval_steps_per_second": 30.173,
"step": 1342
},
{
"epoch": 12.0,
"grad_norm": 0.007873360067605972,
"learning_rate": 2e-05,
"loss": 0.0168,
"step": 1464
},
{
"epoch": 12.0,
"eval_accuracy": 0.9022556390977443,
"eval_f1": 0.8811928811928812,
"eval_loss": 0.7512642741203308,
"eval_precision": 0.8842105263157894,
"eval_recall": 0.878341516639389,
"eval_runtime": 1.6587,
"eval_samples_per_second": 240.556,
"eval_steps_per_second": 30.145,
"step": 1464
},
{
"epoch": 13.0,
"grad_norm": 0.0045745461247861385,
"learning_rate": 1.75e-05,
"loss": 0.0064,
"step": 1586
},
{
"epoch": 13.0,
"eval_accuracy": 0.899749373433584,
"eval_f1": 0.8778322106552358,
"eval_loss": 0.751264750957489,
"eval_precision": 0.8818924438393465,
"eval_recall": 0.8740680123658847,
"eval_runtime": 1.6656,
"eval_samples_per_second": 239.555,
"eval_steps_per_second": 30.019,
"step": 1586
},
{
"epoch": 14.0,
"grad_norm": 0.002741220872849226,
"learning_rate": 1.5e-05,
"loss": 0.0078,
"step": 1708
},
{
"epoch": 14.0,
"eval_accuracy": 0.8947368421052632,
"eval_f1": 0.8703663593044124,
"eval_loss": 0.8151593208312988,
"eval_precision": 0.8789149003479912,
"eval_recall": 0.8630205491907619,
"eval_runtime": 1.6585,
"eval_samples_per_second": 240.577,
"eval_steps_per_second": 30.147,
"step": 1708
},
{
"epoch": 15.0,
"grad_norm": 0.004927061963826418,
"learning_rate": 1.25e-05,
"loss": 0.0064,
"step": 1830
},
{
"epoch": 15.0,
"eval_accuracy": 0.899749373433584,
"eval_f1": 0.879667048676036,
"eval_loss": 0.7460238337516785,
"eval_precision": 0.8778361344537815,
"eval_recall": 0.8815693762502272,
"eval_runtime": 1.6712,
"eval_samples_per_second": 238.744,
"eval_steps_per_second": 29.918,
"step": 1830
},
{
"epoch": 16.0,
"grad_norm": 0.0015839393017813563,
"learning_rate": 1e-05,
"loss": 0.0055,
"step": 1952
},
{
"epoch": 16.0,
"eval_accuracy": 0.8922305764411027,
"eval_f1": 0.8683279483657071,
"eval_loss": 0.8232345581054688,
"eval_precision": 0.873366724738676,
"eval_recall": 0.863747954173486,
"eval_runtime": 1.6703,
"eval_samples_per_second": 238.876,
"eval_steps_per_second": 29.934,
"step": 1952
},
{
"epoch": 17.0,
"grad_norm": 0.0020133736543357372,
"learning_rate": 7.5e-06,
"loss": 0.006,
"step": 2074
},
{
"epoch": 17.0,
"eval_accuracy": 0.8947368421052632,
"eval_f1": 0.8717238211879976,
"eval_loss": 0.8420803546905518,
"eval_precision": 0.8757194133300328,
"eval_recall": 0.8680214584469903,
"eval_runtime": 1.6698,
"eval_samples_per_second": 238.949,
"eval_steps_per_second": 29.943,
"step": 2074
},
{
"epoch": 18.0,
"grad_norm": 0.0020168637856841087,
"learning_rate": 5e-06,
"loss": 0.0052,
"step": 2196
},
{
"epoch": 18.0,
"eval_accuracy": 0.8872180451127819,
"eval_f1": 0.8649563392675828,
"eval_loss": 0.8441980481147766,
"eval_precision": 0.8623655913978494,
"eval_recall": 0.8677032187670486,
"eval_runtime": 1.6705,
"eval_samples_per_second": 238.849,
"eval_steps_per_second": 29.931,
"step": 2196
},
{
"epoch": 19.0,
"grad_norm": 0.0013460558839142323,
"learning_rate": 2.5e-06,
"loss": 0.0035,
"step": 2318
},
{
"epoch": 19.0,
"eval_accuracy": 0.8897243107769424,
"eval_f1": 0.8663031558425733,
"eval_loss": 0.8841463923454285,
"eval_precision": 0.8682026944274341,
"eval_recall": 0.8644753591562102,
"eval_runtime": 1.6699,
"eval_samples_per_second": 238.937,
"eval_steps_per_second": 29.942,
"step": 2318
},
{
"epoch": 20.0,
"grad_norm": 0.0018115871353074908,
"learning_rate": 0.0,
"loss": 0.0013,
"step": 2440
},
{
"epoch": 20.0,
"eval_accuracy": 0.8922305764411027,
"eval_f1": 0.8690075356742023,
"eval_loss": 0.8886067867279053,
"eval_precision": 0.8719298245614036,
"eval_recall": 0.8662484088016003,
"eval_runtime": 1.6565,
"eval_samples_per_second": 240.872,
"eval_steps_per_second": 30.184,
"step": 2440
},
{
"epoch": 20.0,
"step": 2440,
"total_flos": 7584162436176000.0,
"train_loss": 0.05631163200271911,
"train_runtime": 865.617,
"train_samples_per_second": 84.056,
"train_steps_per_second": 2.819
}
],
"logging_steps": 500,
"max_steps": 2440,
"num_input_tokens_seen": 0,
"num_train_epochs": 20,
"save_steps": 500,
"total_flos": 7584162436176000.0,
"train_batch_size": 30,
"trial_name": null,
"trial_params": null
}