{ "best_metric": 0.7496885984237682, "best_model_checkpoint": "training_dir/checkpoint-7000", "epoch": 1.4093329157532102, "eval_steps": 1000, "global_step": 9000, "is_hyper_param_search": false, "is_local_process_zero": true, "is_world_process_zero": true, "log_history": [ { "epoch": 0.15659254619480112, "grad_norm": 70.82987976074219, "learning_rate": 9.92108585858586e-06, "loss": 0.7494, "step": 1000 }, { "epoch": 0.15659254619480112, "eval_accuracy": 0.7027972027972028, "eval_f1": 0.6892915130770362, "eval_loss": 1.232393503189087, "eval_precision": 0.7022461657552481, "eval_recall": 0.7027972027972028, "eval_runtime": 45.3319, "eval_samples_per_second": 50.472, "eval_steps_per_second": 6.309, "step": 1000 }, { "epoch": 0.31318509238960224, "grad_norm": 38.00064468383789, "learning_rate": 9.763257575757577e-06, "loss": 0.4999, "step": 2000 }, { "epoch": 0.31318509238960224, "eval_accuracy": 0.7434440559440559, "eval_f1": 0.7341503634182242, "eval_loss": 0.9058426022529602, "eval_precision": 0.7427664697660185, "eval_recall": 0.7434440559440559, "eval_runtime": 44.9882, "eval_samples_per_second": 50.858, "eval_steps_per_second": 6.357, "step": 2000 }, { "epoch": 0.46977763858440336, "grad_norm": 19.2281494140625, "learning_rate": 9.605429292929293e-06, "loss": 0.4599, "step": 3000 }, { "epoch": 0.46977763858440336, "eval_accuracy": 0.729458041958042, "eval_f1": 0.7119098504970799, "eval_loss": 0.9390596747398376, "eval_precision": 0.7324985005723318, "eval_recall": 0.729458041958042, "eval_runtime": 45.3391, "eval_samples_per_second": 50.464, "eval_steps_per_second": 6.308, "step": 3000 }, { "epoch": 0.6263701847792045, "grad_norm": 22.899824142456055, "learning_rate": 9.44760101010101e-06, "loss": 0.4571, "step": 4000 }, { "epoch": 0.6263701847792045, "eval_accuracy": 0.743006993006993, "eval_f1": 0.724237846207803, "eval_loss": 0.9197003841400146, "eval_precision": 0.7509386418640936, "eval_recall": 0.743006993006993, "eval_runtime": 45.2447, "eval_samples_per_second": 50.569, "eval_steps_per_second": 6.321, "step": 4000 }, { "epoch": 0.7829627309740056, "grad_norm": 44.1195182800293, "learning_rate": 9.289772727272728e-06, "loss": 0.4542, "step": 5000 }, { "epoch": 0.7829627309740056, "eval_accuracy": 0.7517482517482518, "eval_f1": 0.7410787462847774, "eval_loss": 0.8051876425743103, "eval_precision": 0.750223869921944, "eval_recall": 0.7517482517482518, "eval_runtime": 49.8353, "eval_samples_per_second": 45.911, "eval_steps_per_second": 5.739, "step": 5000 }, { "epoch": 0.9395552771688067, "grad_norm": 120.46759796142578, "learning_rate": 9.131944444444445e-06, "loss": 0.4268, "step": 6000 }, { "epoch": 0.9395552771688067, "eval_accuracy": 0.7556818181818182, "eval_f1": 0.748088378029472, "eval_loss": 0.9902251958847046, "eval_precision": 0.757243064686842, "eval_recall": 0.7556818181818182, "eval_runtime": 49.224, "eval_samples_per_second": 46.481, "eval_steps_per_second": 5.81, "step": 6000 }, { "epoch": 1.096147823363608, "grad_norm": 60.6891975402832, "learning_rate": 8.974116161616161e-06, "loss": 0.4014, "step": 7000 }, { "epoch": 1.096147823363608, "eval_accuracy": 0.7574300699300699, "eval_f1": 0.7496885984237682, "eval_loss": 0.9940707087516785, "eval_precision": 0.7543068859180688, "eval_recall": 0.7574300699300699, "eval_runtime": 49.3422, "eval_samples_per_second": 46.37, "eval_steps_per_second": 5.796, "step": 7000 }, { "epoch": 1.252740369558409, "grad_norm": 43.56020736694336, "learning_rate": 8.816287878787879e-06, "loss": 0.4071, "step": 8000 }, { "epoch": 1.252740369558409, "eval_accuracy": 0.7390734265734266, "eval_f1": 0.7219289922874507, "eval_loss": 0.9887688755989075, "eval_precision": 0.738468925570974, "eval_recall": 0.7390734265734266, "eval_runtime": 49.3611, "eval_samples_per_second": 46.352, "eval_steps_per_second": 5.794, "step": 8000 }, { "epoch": 1.4093329157532102, "grad_norm": 11.064676284790039, "learning_rate": 8.658459595959596e-06, "loss": 0.392, "step": 9000 }, { "epoch": 1.4093329157532102, "eval_accuracy": 0.7460664335664335, "eval_f1": 0.7321628808235685, "eval_loss": 0.9361704587936401, "eval_precision": 0.746771852734093, "eval_recall": 0.7460664335664335, "eval_runtime": 49.422, "eval_samples_per_second": 46.295, "eval_steps_per_second": 5.787, "step": 9000 } ], "logging_steps": 1000, "max_steps": 63860, "num_input_tokens_seen": 0, "num_train_epochs": 10, "save_steps": 1000, "stateful_callbacks": { "TrainerControl": { "args": { "should_epoch_stop": false, "should_evaluate": false, "should_log": false, "should_save": true, "should_training_stop": false }, "attributes": {} } }, "total_flos": 1.8433656156522096e+16, "train_batch_size": 8, "trial_name": null, "trial_params": null }