{ "best_metric": null, "best_model_checkpoint": null, "epoch": 10.0, "eval_steps": 500, "global_step": 6250, "is_hyper_param_search": false, "is_local_process_zero": true, "is_world_process_zero": true, "log_history": [ { "epoch": 1.0, "eval_accuracy": 0.9648939348670451, "eval_f1": 0.8973252804141502, "eval_loss": 0.189955934882164, "eval_precision": 0.8587943848059455, "eval_recall": 0.9394760614272809, "eval_runtime": 2.5262, "eval_samples_per_second": 395.851, "eval_steps_per_second": 24.939, "step": 625 }, { "epoch": 1.6, "grad_norm": 5.951064109802246, "learning_rate": 1.6800000000000002e-05, "loss": 0.0567, "step": 1000 }, { "epoch": 2.0, "eval_accuracy": 0.97169106662683, "eval_f1": 0.9429726088908846, "eval_loss": 0.1852019727230072, "eval_precision": 0.9375, "eval_recall": 0.948509485094851, "eval_runtime": 2.561, "eval_samples_per_second": 390.465, "eval_steps_per_second": 24.599, "step": 1250 }, { "epoch": 3.0, "eval_accuracy": 0.9731102479832686, "eval_f1": 0.9533213644524237, "eval_loss": 0.17272309958934784, "eval_precision": 0.9473684210526315, "eval_recall": 0.959349593495935, "eval_runtime": 3.904, "eval_samples_per_second": 256.151, "eval_steps_per_second": 16.137, "step": 1875 }, { "epoch": 3.2, "grad_norm": 0.4564518630504608, "learning_rate": 1.3600000000000002e-05, "loss": 0.0338, "step": 2000 }, { "epoch": 4.0, "eval_accuracy": 0.9777412608305945, "eval_f1": 0.9547288211564321, "eval_loss": 0.15044009685516357, "eval_precision": 0.947508896797153, "eval_recall": 0.962059620596206, "eval_runtime": 2.4284, "eval_samples_per_second": 411.796, "eval_steps_per_second": 25.943, "step": 2500 }, { "epoch": 4.8, "grad_norm": 0.006230680737644434, "learning_rate": 1.04e-05, "loss": 0.0318, "step": 3000 }, { "epoch": 5.0, "eval_accuracy": 0.9756498356737376, "eval_f1": 0.9563259792886086, "eval_loss": 0.17377431690692902, "eval_precision": 0.9533213644524237, "eval_recall": 0.959349593495935, "eval_runtime": 2.4667, "eval_samples_per_second": 405.403, "eval_steps_per_second": 25.54, "step": 3125 }, { "epoch": 6.0, "eval_accuracy": 0.9788616671646251, "eval_f1": 0.9630297565374211, "eval_loss": 0.1647682785987854, "eval_precision": 0.9612961296129613, "eval_recall": 0.964769647696477, "eval_runtime": 3.1374, "eval_samples_per_second": 318.737, "eval_steps_per_second": 20.08, "step": 3750 }, { "epoch": 6.4, "grad_norm": 0.0044134571217000484, "learning_rate": 7.2000000000000005e-06, "loss": 0.02, "step": 4000 }, { "epoch": 7.0, "eval_accuracy": 0.9770690170301762, "eval_f1": 0.9643340857787811, "eval_loss": 0.17450480163097382, "eval_precision": 0.9638989169675091, "eval_recall": 0.964769647696477, "eval_runtime": 2.53, "eval_samples_per_second": 395.256, "eval_steps_per_second": 24.901, "step": 4375 }, { "epoch": 8.0, "grad_norm": 0.0022473218850791454, "learning_rate": 4.000000000000001e-06, "loss": 0.0063, "step": 5000 }, { "epoch": 8.0, "eval_accuracy": 0.9793845234538393, "eval_f1": 0.9652370203160271, "eval_loss": 0.16244609653949738, "eval_precision": 0.9648014440433214, "eval_recall": 0.9656729900632339, "eval_runtime": 2.5135, "eval_samples_per_second": 397.859, "eval_steps_per_second": 25.065, "step": 5000 }, { "epoch": 9.0, "eval_accuracy": 0.98125186734389, "eval_f1": 0.9643340857787811, "eval_loss": 0.16350433230400085, "eval_precision": 0.9638989169675091, "eval_recall": 0.964769647696477, "eval_runtime": 2.4878, "eval_samples_per_second": 401.967, "eval_steps_per_second": 25.324, "step": 5625 }, { "epoch": 9.6, "grad_norm": 0.0036331522278487682, "learning_rate": 8.000000000000001e-07, "loss": 0.0036, "step": 6000 }, { "epoch": 10.0, "eval_accuracy": 0.9799820734986555, "eval_f1": 0.9643662607126748, "eval_loss": 0.16758297383785248, "eval_precision": 0.9630630630630631, "eval_recall": 0.9656729900632339, "eval_runtime": 2.4417, "eval_samples_per_second": 409.55, "eval_steps_per_second": 25.802, "step": 6250 } ], "logging_steps": 1000, "max_steps": 6250, "num_input_tokens_seen": 0, "num_train_epochs": 10, "save_steps": 500, "total_flos": 1559350717084512.0, "train_batch_size": 16, "trial_name": null, "trial_params": null }