{ "best_metric": null, "best_model_checkpoint": null, "epoch": 9.659090909090908, "global_step": 8500, "is_hyper_param_search": false, "is_local_process_zero": true, "is_world_process_zero": true, "log_history": [ { "epoch": 0.57, "learning_rate": 1.8863636363636366e-05, "loss": 0.9603, "step": 500 }, { "epoch": 1.0, "eval_accuracy": 0.917317101584343, "eval_f1": 0.780907047467936, "eval_loss": 0.3360126316547394, "eval_precision": 0.7684100962789487, "eval_recall": 0.7938172043010753, "eval_runtime": 4.4585, "eval_samples_per_second": 211.507, "eval_steps_per_second": 42.391, "step": 880 }, { "epoch": 1.14, "learning_rate": 1.772727272727273e-05, "loss": 0.4145, "step": 1000 }, { "epoch": 1.7, "learning_rate": 1.6590909090909094e-05, "loss": 0.2846, "step": 1500 }, { "epoch": 2.0, "eval_accuracy": 0.940004659832246, "eval_f1": 0.8470136913817051, "eval_loss": 0.23408983647823334, "eval_precision": 0.8338325302773799, "eval_recall": 0.8606182795698925, "eval_runtime": 5.0803, "eval_samples_per_second": 185.62, "eval_steps_per_second": 37.203, "step": 1760 }, { "epoch": 2.27, "learning_rate": 1.5454545454545454e-05, "loss": 0.2114, "step": 2000 }, { "epoch": 2.84, "learning_rate": 1.431818181818182e-05, "loss": 0.1634, "step": 2500 }, { "epoch": 3.0, "eval_accuracy": 0.9476351351351351, "eval_f1": 0.8742690058479533, "eval_loss": 0.20282697677612305, "eval_precision": 0.8646161934805467, "eval_recall": 0.8841397849462366, "eval_runtime": 3.3011, "eval_samples_per_second": 285.658, "eval_steps_per_second": 57.253, "step": 2640 }, { "epoch": 3.41, "learning_rate": 1.3181818181818183e-05, "loss": 0.1243, "step": 3000 }, { "epoch": 3.98, "learning_rate": 1.2045454545454547e-05, "loss": 0.1158, "step": 3500 }, { "epoch": 4.0, "eval_accuracy": 0.9500232991612302, "eval_f1": 0.8811803278688525, "eval_loss": 0.1980859786272049, "eval_precision": 0.8603072983354674, "eval_recall": 0.9030913978494624, "eval_runtime": 4.9698, "eval_samples_per_second": 189.745, "eval_steps_per_second": 38.03, "step": 3520 }, { "epoch": 4.55, "learning_rate": 1.0909090909090909e-05, "loss": 0.0808, "step": 4000 }, { "epoch": 5.0, "eval_accuracy": 0.9508970177073626, "eval_f1": 0.8861842105263158, "eval_loss": 0.19827169179916382, "eval_precision": 0.8679123711340206, "eval_recall": 0.905241935483871, "eval_runtime": 4.2259, "eval_samples_per_second": 223.15, "eval_steps_per_second": 44.725, "step": 4400 }, { "epoch": 5.11, "learning_rate": 9.772727272727273e-06, "loss": 0.0809, "step": 4500 }, { "epoch": 5.68, "learning_rate": 8.636363636363637e-06, "loss": 0.0679, "step": 5000 }, { "epoch": 6.0, "eval_accuracy": 0.952673578751165, "eval_f1": 0.89337822671156, "eval_loss": 0.1982535719871521, "eval_precision": 0.8779032048786817, "eval_recall": 0.9094086021505376, "eval_runtime": 4.536, "eval_samples_per_second": 207.892, "eval_steps_per_second": 41.667, "step": 5280 }, { "epoch": 6.25, "learning_rate": 7.500000000000001e-06, "loss": 0.0582, "step": 5500 }, { "epoch": 6.82, "learning_rate": 6.363636363636364e-06, "loss": 0.0468, "step": 6000 }, { "epoch": 7.0, "eval_accuracy": 0.9559354613233924, "eval_f1": 0.9001193792280144, "eval_loss": 0.20358432829380035, "eval_precision": 0.8884524744697565, "eval_recall": 0.9120967741935484, "eval_runtime": 4.9544, "eval_samples_per_second": 190.337, "eval_steps_per_second": 38.148, "step": 6160 }, { "epoch": 7.39, "learning_rate": 5.2272727272727274e-06, "loss": 0.0436, "step": 6500 }, { "epoch": 7.95, "learning_rate": 4.0909090909090915e-06, "loss": 0.0403, "step": 7000 }, { "epoch": 8.0, "eval_accuracy": 0.955527726001864, "eval_f1": 0.8995240613432047, "eval_loss": 0.20606616139411926, "eval_precision": 0.8850156087408949, "eval_recall": 0.9145161290322581, "eval_runtime": 3.9742, "eval_samples_per_second": 237.282, "eval_steps_per_second": 47.557, "step": 7040 }, { "epoch": 8.52, "learning_rate": 2.954545454545455e-06, "loss": 0.0336, "step": 7500 }, { "epoch": 9.0, "eval_accuracy": 0.9554694780987885, "eval_f1": 0.9019062748212867, "eval_loss": 0.21168170869350433, "eval_precision": 0.8884976525821596, "eval_recall": 0.915725806451613, "eval_runtime": 2.986, "eval_samples_per_second": 315.802, "eval_steps_per_second": 63.294, "step": 7920 }, { "epoch": 9.09, "learning_rate": 1.8181818181818183e-06, "loss": 0.0328, "step": 8000 }, { "epoch": 9.66, "learning_rate": 6.818181818181818e-07, "loss": 0.0312, "step": 8500 } ], "max_steps": 8800, "num_train_epochs": 10, "total_flos": 408432261397380.0, "trial_name": null, "trial_params": null }