|
{ |
|
"best_metric": null, |
|
"best_model_checkpoint": null, |
|
"epoch": 20.0, |
|
"eval_steps": 500, |
|
"global_step": 2440, |
|
"is_hyper_param_search": false, |
|
"is_local_process_zero": true, |
|
"is_world_process_zero": true, |
|
"log_history": [ |
|
{ |
|
"epoch": 1.0, |
|
"grad_norm": 20.148401260375977, |
|
"learning_rate": 4.75e-05, |
|
"loss": 0.3808, |
|
"step": 122 |
|
}, |
|
{ |
|
"epoch": 1.0, |
|
"eval_accuracy": 0.8646616541353384, |
|
"eval_f1": 0.8186363636363636, |
|
"eval_loss": 0.379351943731308, |
|
"eval_precision": 0.8736897274633124, |
|
"eval_recall": 0.7917348608837971, |
|
"eval_runtime": 1.6409, |
|
"eval_samples_per_second": 243.152, |
|
"eval_steps_per_second": 30.47, |
|
"step": 122 |
|
}, |
|
{ |
|
"epoch": 2.0, |
|
"grad_norm": 21.554689407348633, |
|
"learning_rate": 4.5e-05, |
|
"loss": 0.221, |
|
"step": 244 |
|
}, |
|
{ |
|
"epoch": 2.0, |
|
"eval_accuracy": 0.8721804511278195, |
|
"eval_f1": 0.8395201930584144, |
|
"eval_loss": 0.2850644886493683, |
|
"eval_precision": 0.8562091503267973, |
|
"eval_recall": 0.8270594653573378, |
|
"eval_runtime": 1.6504, |
|
"eval_samples_per_second": 241.765, |
|
"eval_steps_per_second": 30.296, |
|
"step": 244 |
|
}, |
|
{ |
|
"epoch": 3.0, |
|
"grad_norm": 0.09420396387577057, |
|
"learning_rate": 4.25e-05, |
|
"loss": 0.1363, |
|
"step": 366 |
|
}, |
|
{ |
|
"epoch": 3.0, |
|
"eval_accuracy": 0.8947368421052632, |
|
"eval_f1": 0.8717238211879976, |
|
"eval_loss": 0.38322028517723083, |
|
"eval_precision": 0.8757194133300328, |
|
"eval_recall": 0.8680214584469903, |
|
"eval_runtime": 1.6524, |
|
"eval_samples_per_second": 241.46, |
|
"eval_steps_per_second": 30.258, |
|
"step": 366 |
|
}, |
|
{ |
|
"epoch": 4.0, |
|
"grad_norm": 7.119666576385498, |
|
"learning_rate": 4e-05, |
|
"loss": 0.099, |
|
"step": 488 |
|
}, |
|
{ |
|
"epoch": 4.0, |
|
"eval_accuracy": 0.8972431077694235, |
|
"eval_f1": 0.8717112228173498, |
|
"eval_loss": 0.4968295693397522, |
|
"eval_precision": 0.8869295958279009, |
|
"eval_recall": 0.8597926895799237, |
|
"eval_runtime": 1.6515, |
|
"eval_samples_per_second": 241.593, |
|
"eval_steps_per_second": 30.275, |
|
"step": 488 |
|
}, |
|
{ |
|
"epoch": 5.0, |
|
"grad_norm": 0.1547642946243286, |
|
"learning_rate": 3.7500000000000003e-05, |
|
"loss": 0.0702, |
|
"step": 610 |
|
}, |
|
{ |
|
"epoch": 5.0, |
|
"eval_accuracy": 0.8696741854636592, |
|
"eval_f1": 0.8377439939939939, |
|
"eval_loss": 0.5204734802246094, |
|
"eval_precision": 0.8503401360544218, |
|
"eval_recall": 0.8277868703400618, |
|
"eval_runtime": 1.6524, |
|
"eval_samples_per_second": 241.469, |
|
"eval_steps_per_second": 30.259, |
|
"step": 610 |
|
}, |
|
{ |
|
"epoch": 6.0, |
|
"grad_norm": 0.08600271493196487, |
|
"learning_rate": 3.5e-05, |
|
"loss": 0.0469, |
|
"step": 732 |
|
}, |
|
{ |
|
"epoch": 6.0, |
|
"eval_accuracy": 0.87468671679198, |
|
"eval_f1": 0.8448388501742161, |
|
"eval_loss": 0.5740100741386414, |
|
"eval_precision": 0.8551721930610677, |
|
"eval_recall": 0.8363338788870704, |
|
"eval_runtime": 1.6555, |
|
"eval_samples_per_second": 241.009, |
|
"eval_steps_per_second": 30.202, |
|
"step": 732 |
|
}, |
|
{ |
|
"epoch": 7.0, |
|
"grad_norm": 0.024254148826003075, |
|
"learning_rate": 3.2500000000000004e-05, |
|
"loss": 0.0328, |
|
"step": 854 |
|
}, |
|
{ |
|
"epoch": 7.0, |
|
"eval_accuracy": 0.8847117794486216, |
|
"eval_f1": 0.8629480286738351, |
|
"eval_loss": 0.6011895537376404, |
|
"eval_precision": 0.8580770590314599, |
|
"eval_recall": 0.8684306237497728, |
|
"eval_runtime": 1.6578, |
|
"eval_samples_per_second": 240.677, |
|
"eval_steps_per_second": 30.16, |
|
"step": 854 |
|
}, |
|
{ |
|
"epoch": 8.0, |
|
"grad_norm": 0.03784336522221565, |
|
"learning_rate": 3e-05, |
|
"loss": 0.0284, |
|
"step": 976 |
|
}, |
|
{ |
|
"epoch": 8.0, |
|
"eval_accuracy": 0.8972431077694235, |
|
"eval_f1": 0.8737897035111135, |
|
"eval_loss": 0.5402500033378601, |
|
"eval_precision": 0.8812047813777917, |
|
"eval_recall": 0.8672940534642661, |
|
"eval_runtime": 1.6746, |
|
"eval_samples_per_second": 238.262, |
|
"eval_steps_per_second": 29.857, |
|
"step": 976 |
|
}, |
|
{ |
|
"epoch": 9.0, |
|
"grad_norm": 0.014071076177060604, |
|
"learning_rate": 2.7500000000000004e-05, |
|
"loss": 0.019, |
|
"step": 1098 |
|
}, |
|
{ |
|
"epoch": 9.0, |
|
"eval_accuracy": 0.8922305764411027, |
|
"eval_f1": 0.8727838950061173, |
|
"eval_loss": 0.5908846259117126, |
|
"eval_precision": 0.8656898656898657, |
|
"eval_recall": 0.8812511365702855, |
|
"eval_runtime": 1.6539, |
|
"eval_samples_per_second": 241.244, |
|
"eval_steps_per_second": 30.231, |
|
"step": 1098 |
|
}, |
|
{ |
|
"epoch": 10.0, |
|
"grad_norm": 0.037436336278915405, |
|
"learning_rate": 2.5e-05, |
|
"loss": 0.016, |
|
"step": 1220 |
|
}, |
|
{ |
|
"epoch": 10.0, |
|
"eval_accuracy": 0.8822055137844611, |
|
"eval_f1": 0.8521068445832446, |
|
"eval_loss": 0.8931390047073364, |
|
"eval_precision": 0.8693800752624282, |
|
"eval_recall": 0.8391525731951264, |
|
"eval_runtime": 1.6526, |
|
"eval_samples_per_second": 241.431, |
|
"eval_steps_per_second": 30.254, |
|
"step": 1220 |
|
}, |
|
{ |
|
"epoch": 11.0, |
|
"grad_norm": 0.01795610599219799, |
|
"learning_rate": 2.25e-05, |
|
"loss": 0.0167, |
|
"step": 1342 |
|
}, |
|
{ |
|
"epoch": 11.0, |
|
"eval_accuracy": 0.8972431077694235, |
|
"eval_f1": 0.8751002084335417, |
|
"eval_loss": 0.6617795825004578, |
|
"eval_precision": 0.8780701754385964, |
|
"eval_recall": 0.8722949627204946, |
|
"eval_runtime": 1.6571, |
|
"eval_samples_per_second": 240.783, |
|
"eval_steps_per_second": 30.173, |
|
"step": 1342 |
|
}, |
|
{ |
|
"epoch": 12.0, |
|
"grad_norm": 0.007873360067605972, |
|
"learning_rate": 2e-05, |
|
"loss": 0.0168, |
|
"step": 1464 |
|
}, |
|
{ |
|
"epoch": 12.0, |
|
"eval_accuracy": 0.9022556390977443, |
|
"eval_f1": 0.8811928811928812, |
|
"eval_loss": 0.7512642741203308, |
|
"eval_precision": 0.8842105263157894, |
|
"eval_recall": 0.878341516639389, |
|
"eval_runtime": 1.6587, |
|
"eval_samples_per_second": 240.556, |
|
"eval_steps_per_second": 30.145, |
|
"step": 1464 |
|
}, |
|
{ |
|
"epoch": 13.0, |
|
"grad_norm": 0.0045745461247861385, |
|
"learning_rate": 1.75e-05, |
|
"loss": 0.0064, |
|
"step": 1586 |
|
}, |
|
{ |
|
"epoch": 13.0, |
|
"eval_accuracy": 0.899749373433584, |
|
"eval_f1": 0.8778322106552358, |
|
"eval_loss": 0.751264750957489, |
|
"eval_precision": 0.8818924438393465, |
|
"eval_recall": 0.8740680123658847, |
|
"eval_runtime": 1.6656, |
|
"eval_samples_per_second": 239.555, |
|
"eval_steps_per_second": 30.019, |
|
"step": 1586 |
|
}, |
|
{ |
|
"epoch": 14.0, |
|
"grad_norm": 0.002741220872849226, |
|
"learning_rate": 1.5e-05, |
|
"loss": 0.0078, |
|
"step": 1708 |
|
}, |
|
{ |
|
"epoch": 14.0, |
|
"eval_accuracy": 0.8947368421052632, |
|
"eval_f1": 0.8703663593044124, |
|
"eval_loss": 0.8151593208312988, |
|
"eval_precision": 0.8789149003479912, |
|
"eval_recall": 0.8630205491907619, |
|
"eval_runtime": 1.6585, |
|
"eval_samples_per_second": 240.577, |
|
"eval_steps_per_second": 30.147, |
|
"step": 1708 |
|
}, |
|
{ |
|
"epoch": 15.0, |
|
"grad_norm": 0.004927061963826418, |
|
"learning_rate": 1.25e-05, |
|
"loss": 0.0064, |
|
"step": 1830 |
|
}, |
|
{ |
|
"epoch": 15.0, |
|
"eval_accuracy": 0.899749373433584, |
|
"eval_f1": 0.879667048676036, |
|
"eval_loss": 0.7460238337516785, |
|
"eval_precision": 0.8778361344537815, |
|
"eval_recall": 0.8815693762502272, |
|
"eval_runtime": 1.6712, |
|
"eval_samples_per_second": 238.744, |
|
"eval_steps_per_second": 29.918, |
|
"step": 1830 |
|
}, |
|
{ |
|
"epoch": 16.0, |
|
"grad_norm": 0.0015839393017813563, |
|
"learning_rate": 1e-05, |
|
"loss": 0.0055, |
|
"step": 1952 |
|
}, |
|
{ |
|
"epoch": 16.0, |
|
"eval_accuracy": 0.8922305764411027, |
|
"eval_f1": 0.8683279483657071, |
|
"eval_loss": 0.8232345581054688, |
|
"eval_precision": 0.873366724738676, |
|
"eval_recall": 0.863747954173486, |
|
"eval_runtime": 1.6703, |
|
"eval_samples_per_second": 238.876, |
|
"eval_steps_per_second": 29.934, |
|
"step": 1952 |
|
}, |
|
{ |
|
"epoch": 17.0, |
|
"grad_norm": 0.0020133736543357372, |
|
"learning_rate": 7.5e-06, |
|
"loss": 0.006, |
|
"step": 2074 |
|
}, |
|
{ |
|
"epoch": 17.0, |
|
"eval_accuracy": 0.8947368421052632, |
|
"eval_f1": 0.8717238211879976, |
|
"eval_loss": 0.8420803546905518, |
|
"eval_precision": 0.8757194133300328, |
|
"eval_recall": 0.8680214584469903, |
|
"eval_runtime": 1.6698, |
|
"eval_samples_per_second": 238.949, |
|
"eval_steps_per_second": 29.943, |
|
"step": 2074 |
|
}, |
|
{ |
|
"epoch": 18.0, |
|
"grad_norm": 0.0020168637856841087, |
|
"learning_rate": 5e-06, |
|
"loss": 0.0052, |
|
"step": 2196 |
|
}, |
|
{ |
|
"epoch": 18.0, |
|
"eval_accuracy": 0.8872180451127819, |
|
"eval_f1": 0.8649563392675828, |
|
"eval_loss": 0.8441980481147766, |
|
"eval_precision": 0.8623655913978494, |
|
"eval_recall": 0.8677032187670486, |
|
"eval_runtime": 1.6705, |
|
"eval_samples_per_second": 238.849, |
|
"eval_steps_per_second": 29.931, |
|
"step": 2196 |
|
}, |
|
{ |
|
"epoch": 19.0, |
|
"grad_norm": 0.0013460558839142323, |
|
"learning_rate": 2.5e-06, |
|
"loss": 0.0035, |
|
"step": 2318 |
|
}, |
|
{ |
|
"epoch": 19.0, |
|
"eval_accuracy": 0.8897243107769424, |
|
"eval_f1": 0.8663031558425733, |
|
"eval_loss": 0.8841463923454285, |
|
"eval_precision": 0.8682026944274341, |
|
"eval_recall": 0.8644753591562102, |
|
"eval_runtime": 1.6699, |
|
"eval_samples_per_second": 238.937, |
|
"eval_steps_per_second": 29.942, |
|
"step": 2318 |
|
}, |
|
{ |
|
"epoch": 20.0, |
|
"grad_norm": 0.0018115871353074908, |
|
"learning_rate": 0.0, |
|
"loss": 0.0013, |
|
"step": 2440 |
|
}, |
|
{ |
|
"epoch": 20.0, |
|
"eval_accuracy": 0.8922305764411027, |
|
"eval_f1": 0.8690075356742023, |
|
"eval_loss": 0.8886067867279053, |
|
"eval_precision": 0.8719298245614036, |
|
"eval_recall": 0.8662484088016003, |
|
"eval_runtime": 1.6565, |
|
"eval_samples_per_second": 240.872, |
|
"eval_steps_per_second": 30.184, |
|
"step": 2440 |
|
}, |
|
{ |
|
"epoch": 20.0, |
|
"step": 2440, |
|
"total_flos": 7584162436176000.0, |
|
"train_loss": 0.05631163200271911, |
|
"train_runtime": 865.617, |
|
"train_samples_per_second": 84.056, |
|
"train_steps_per_second": 2.819 |
|
} |
|
], |
|
"logging_steps": 500, |
|
"max_steps": 2440, |
|
"num_input_tokens_seen": 0, |
|
"num_train_epochs": 20, |
|
"save_steps": 500, |
|
"total_flos": 7584162436176000.0, |
|
"train_batch_size": 30, |
|
"trial_name": null, |
|
"trial_params": null |
|
} |
|
|