{ "best_metric": null, "best_model_checkpoint": null, "epoch": 20.0, "eval_steps": 500, "global_step": 2440, "is_hyper_param_search": false, "is_local_process_zero": true, "is_world_process_zero": true, "log_history": [ { "epoch": 1.0, "grad_norm": 20.148401260375977, "learning_rate": 4.75e-05, "loss": 0.3808, "step": 122 }, { "epoch": 1.0, "eval_accuracy": 0.8646616541353384, "eval_f1": 0.8186363636363636, "eval_loss": 0.379351943731308, "eval_precision": 0.8736897274633124, "eval_recall": 0.7917348608837971, "eval_runtime": 1.6409, "eval_samples_per_second": 243.152, "eval_steps_per_second": 30.47, "step": 122 }, { "epoch": 2.0, "grad_norm": 21.554689407348633, "learning_rate": 4.5e-05, "loss": 0.221, "step": 244 }, { "epoch": 2.0, "eval_accuracy": 0.8721804511278195, "eval_f1": 0.8395201930584144, "eval_loss": 0.2850644886493683, "eval_precision": 0.8562091503267973, "eval_recall": 0.8270594653573378, "eval_runtime": 1.6504, "eval_samples_per_second": 241.765, "eval_steps_per_second": 30.296, "step": 244 }, { "epoch": 3.0, "grad_norm": 0.09420396387577057, "learning_rate": 4.25e-05, "loss": 0.1363, "step": 366 }, { "epoch": 3.0, "eval_accuracy": 0.8947368421052632, "eval_f1": 0.8717238211879976, "eval_loss": 0.38322028517723083, "eval_precision": 0.8757194133300328, "eval_recall": 0.8680214584469903, "eval_runtime": 1.6524, "eval_samples_per_second": 241.46, "eval_steps_per_second": 30.258, "step": 366 }, { "epoch": 4.0, "grad_norm": 7.119666576385498, "learning_rate": 4e-05, "loss": 0.099, "step": 488 }, { "epoch": 4.0, "eval_accuracy": 0.8972431077694235, "eval_f1": 0.8717112228173498, "eval_loss": 0.4968295693397522, "eval_precision": 0.8869295958279009, "eval_recall": 0.8597926895799237, "eval_runtime": 1.6515, "eval_samples_per_second": 241.593, "eval_steps_per_second": 30.275, "step": 488 }, { "epoch": 5.0, "grad_norm": 0.1547642946243286, "learning_rate": 3.7500000000000003e-05, "loss": 0.0702, "step": 610 }, { "epoch": 5.0, "eval_accuracy": 0.8696741854636592, "eval_f1": 0.8377439939939939, "eval_loss": 0.5204734802246094, "eval_precision": 0.8503401360544218, "eval_recall": 0.8277868703400618, "eval_runtime": 1.6524, "eval_samples_per_second": 241.469, "eval_steps_per_second": 30.259, "step": 610 }, { "epoch": 6.0, "grad_norm": 0.08600271493196487, "learning_rate": 3.5e-05, "loss": 0.0469, "step": 732 }, { "epoch": 6.0, "eval_accuracy": 0.87468671679198, "eval_f1": 0.8448388501742161, "eval_loss": 0.5740100741386414, "eval_precision": 0.8551721930610677, "eval_recall": 0.8363338788870704, "eval_runtime": 1.6555, "eval_samples_per_second": 241.009, "eval_steps_per_second": 30.202, "step": 732 }, { "epoch": 7.0, "grad_norm": 0.024254148826003075, "learning_rate": 3.2500000000000004e-05, "loss": 0.0328, "step": 854 }, { "epoch": 7.0, "eval_accuracy": 0.8847117794486216, "eval_f1": 0.8629480286738351, "eval_loss": 0.6011895537376404, "eval_precision": 0.8580770590314599, "eval_recall": 0.8684306237497728, "eval_runtime": 1.6578, "eval_samples_per_second": 240.677, "eval_steps_per_second": 30.16, "step": 854 }, { "epoch": 8.0, "grad_norm": 0.03784336522221565, "learning_rate": 3e-05, "loss": 0.0284, "step": 976 }, { "epoch": 8.0, "eval_accuracy": 0.8972431077694235, "eval_f1": 0.8737897035111135, "eval_loss": 0.5402500033378601, "eval_precision": 0.8812047813777917, "eval_recall": 0.8672940534642661, "eval_runtime": 1.6746, "eval_samples_per_second": 238.262, "eval_steps_per_second": 29.857, "step": 976 }, { "epoch": 9.0, "grad_norm": 0.014071076177060604, "learning_rate": 2.7500000000000004e-05, "loss": 0.019, "step": 1098 }, { "epoch": 9.0, "eval_accuracy": 0.8922305764411027, "eval_f1": 0.8727838950061173, "eval_loss": 0.5908846259117126, "eval_precision": 0.8656898656898657, "eval_recall": 0.8812511365702855, "eval_runtime": 1.6539, "eval_samples_per_second": 241.244, "eval_steps_per_second": 30.231, "step": 1098 }, { "epoch": 10.0, "grad_norm": 0.037436336278915405, "learning_rate": 2.5e-05, "loss": 0.016, "step": 1220 }, { "epoch": 10.0, "eval_accuracy": 0.8822055137844611, "eval_f1": 0.8521068445832446, "eval_loss": 0.8931390047073364, "eval_precision": 0.8693800752624282, "eval_recall": 0.8391525731951264, "eval_runtime": 1.6526, "eval_samples_per_second": 241.431, "eval_steps_per_second": 30.254, "step": 1220 }, { "epoch": 11.0, "grad_norm": 0.01795610599219799, "learning_rate": 2.25e-05, "loss": 0.0167, "step": 1342 }, { "epoch": 11.0, "eval_accuracy": 0.8972431077694235, "eval_f1": 0.8751002084335417, "eval_loss": 0.6617795825004578, "eval_precision": 0.8780701754385964, "eval_recall": 0.8722949627204946, "eval_runtime": 1.6571, "eval_samples_per_second": 240.783, "eval_steps_per_second": 30.173, "step": 1342 }, { "epoch": 12.0, "grad_norm": 0.007873360067605972, "learning_rate": 2e-05, "loss": 0.0168, "step": 1464 }, { "epoch": 12.0, "eval_accuracy": 0.9022556390977443, "eval_f1": 0.8811928811928812, "eval_loss": 0.7512642741203308, "eval_precision": 0.8842105263157894, "eval_recall": 0.878341516639389, "eval_runtime": 1.6587, "eval_samples_per_second": 240.556, "eval_steps_per_second": 30.145, "step": 1464 }, { "epoch": 13.0, "grad_norm": 0.0045745461247861385, "learning_rate": 1.75e-05, "loss": 0.0064, "step": 1586 }, { "epoch": 13.0, "eval_accuracy": 0.899749373433584, "eval_f1": 0.8778322106552358, "eval_loss": 0.751264750957489, "eval_precision": 0.8818924438393465, "eval_recall": 0.8740680123658847, "eval_runtime": 1.6656, "eval_samples_per_second": 239.555, "eval_steps_per_second": 30.019, "step": 1586 }, { "epoch": 14.0, "grad_norm": 0.002741220872849226, "learning_rate": 1.5e-05, "loss": 0.0078, "step": 1708 }, { "epoch": 14.0, "eval_accuracy": 0.8947368421052632, "eval_f1": 0.8703663593044124, "eval_loss": 0.8151593208312988, "eval_precision": 0.8789149003479912, "eval_recall": 0.8630205491907619, "eval_runtime": 1.6585, "eval_samples_per_second": 240.577, "eval_steps_per_second": 30.147, "step": 1708 }, { "epoch": 15.0, "grad_norm": 0.004927061963826418, "learning_rate": 1.25e-05, "loss": 0.0064, "step": 1830 }, { "epoch": 15.0, "eval_accuracy": 0.899749373433584, "eval_f1": 0.879667048676036, "eval_loss": 0.7460238337516785, "eval_precision": 0.8778361344537815, "eval_recall": 0.8815693762502272, "eval_runtime": 1.6712, "eval_samples_per_second": 238.744, "eval_steps_per_second": 29.918, "step": 1830 }, { "epoch": 16.0, "grad_norm": 0.0015839393017813563, "learning_rate": 1e-05, "loss": 0.0055, "step": 1952 }, { "epoch": 16.0, "eval_accuracy": 0.8922305764411027, "eval_f1": 0.8683279483657071, "eval_loss": 0.8232345581054688, "eval_precision": 0.873366724738676, "eval_recall": 0.863747954173486, "eval_runtime": 1.6703, "eval_samples_per_second": 238.876, "eval_steps_per_second": 29.934, "step": 1952 }, { "epoch": 17.0, "grad_norm": 0.0020133736543357372, "learning_rate": 7.5e-06, "loss": 0.006, "step": 2074 }, { "epoch": 17.0, "eval_accuracy": 0.8947368421052632, "eval_f1": 0.8717238211879976, "eval_loss": 0.8420803546905518, "eval_precision": 0.8757194133300328, "eval_recall": 0.8680214584469903, "eval_runtime": 1.6698, "eval_samples_per_second": 238.949, "eval_steps_per_second": 29.943, "step": 2074 }, { "epoch": 18.0, "grad_norm": 0.0020168637856841087, "learning_rate": 5e-06, "loss": 0.0052, "step": 2196 }, { "epoch": 18.0, "eval_accuracy": 0.8872180451127819, "eval_f1": 0.8649563392675828, "eval_loss": 0.8441980481147766, "eval_precision": 0.8623655913978494, "eval_recall": 0.8677032187670486, "eval_runtime": 1.6705, "eval_samples_per_second": 238.849, "eval_steps_per_second": 29.931, "step": 2196 }, { "epoch": 19.0, "grad_norm": 0.0013460558839142323, "learning_rate": 2.5e-06, "loss": 0.0035, "step": 2318 }, { "epoch": 19.0, "eval_accuracy": 0.8897243107769424, "eval_f1": 0.8663031558425733, "eval_loss": 0.8841463923454285, "eval_precision": 0.8682026944274341, "eval_recall": 0.8644753591562102, "eval_runtime": 1.6699, "eval_samples_per_second": 238.937, "eval_steps_per_second": 29.942, "step": 2318 }, { "epoch": 20.0, "grad_norm": 0.0018115871353074908, "learning_rate": 0.0, "loss": 0.0013, "step": 2440 }, { "epoch": 20.0, "eval_accuracy": 0.8922305764411027, "eval_f1": 0.8690075356742023, "eval_loss": 0.8886067867279053, "eval_precision": 0.8719298245614036, "eval_recall": 0.8662484088016003, "eval_runtime": 1.6565, "eval_samples_per_second": 240.872, "eval_steps_per_second": 30.184, "step": 2440 }, { "epoch": 20.0, "step": 2440, "total_flos": 7584162436176000.0, "train_loss": 0.05631163200271911, "train_runtime": 865.617, "train_samples_per_second": 84.056, "train_steps_per_second": 2.819 } ], "logging_steps": 500, "max_steps": 2440, "num_input_tokens_seen": 0, "num_train_epochs": 20, "save_steps": 500, "total_flos": 7584162436176000.0, "train_batch_size": 30, "trial_name": null, "trial_params": null }