{ "best_metric": null, "best_model_checkpoint": null, "epoch": 20.0, "eval_steps": 500, "global_step": 2440, "is_hyper_param_search": false, "is_local_process_zero": true, "is_world_process_zero": true, "log_history": [ { "epoch": 1.0, "grad_norm": 4.303664207458496, "learning_rate": 4.75e-05, "loss": 0.5535, "step": 122 }, { "epoch": 1.0, "eval_accuracy": 0.7293233082706767, "eval_f1": 0.6372727272727272, "eval_loss": 0.49923330545425415, "eval_precision": 0.6645702306079665, "eval_recall": 0.6284779050736498, "eval_runtime": 5.2819, "eval_samples_per_second": 75.541, "eval_steps_per_second": 9.466, "step": 122 }, { "epoch": 2.0, "grad_norm": 4.080423831939697, "learning_rate": 4.5e-05, "loss": 0.444, "step": 244 }, { "epoch": 2.0, "eval_accuracy": 0.8170426065162907, "eval_f1": 0.7960536910871955, "eval_loss": 0.4052737355232239, "eval_precision": 0.7846938775510204, "eval_recall": 0.8255591925804692, "eval_runtime": 5.5544, "eval_samples_per_second": 71.835, "eval_steps_per_second": 9.002, "step": 244 }, { "epoch": 3.0, "grad_norm": 4.110426425933838, "learning_rate": 4.25e-05, "loss": 0.3464, "step": 366 }, { "epoch": 3.0, "eval_accuracy": 0.8421052631578947, "eval_f1": 0.7905197629940748, "eval_loss": 0.3424628674983978, "eval_precision": 0.8345238095238094, "eval_recall": 0.7682760501909438, "eval_runtime": 5.6127, "eval_samples_per_second": 71.089, "eval_steps_per_second": 8.908, "step": 366 }, { "epoch": 4.0, "grad_norm": 1.6437464952468872, "learning_rate": 4e-05, "loss": 0.2852, "step": 488 }, { "epoch": 4.0, "eval_accuracy": 0.8721804511278195, "eval_f1": 0.8469505178365937, "eval_loss": 0.3135569393634796, "eval_precision": 0.844489247311828, "eval_recall": 0.8495635570103655, "eval_runtime": 5.5998, "eval_samples_per_second": 71.253, "eval_steps_per_second": 8.929, "step": 488 }, { "epoch": 5.0, "grad_norm": 3.6121034622192383, "learning_rate": 3.7500000000000003e-05, "loss": 0.2608, "step": 610 }, { "epoch": 5.0, "eval_accuracy": 0.8721804511278195, "eval_f1": 0.8469505178365937, "eval_loss": 0.3060314953327179, "eval_precision": 0.844489247311828, "eval_recall": 0.8495635570103655, "eval_runtime": 5.6686, "eval_samples_per_second": 70.388, "eval_steps_per_second": 8.821, "step": 610 }, { "epoch": 6.0, "grad_norm": 2.0154571533203125, "learning_rate": 3.5e-05, "loss": 0.2415, "step": 732 }, { "epoch": 6.0, "eval_accuracy": 0.8646616541353384, "eval_f1": 0.8447157518450185, "eval_loss": 0.3100413978099823, "eval_precision": 0.8325401217487549, "eval_recall": 0.864248045099109, "eval_runtime": 5.5541, "eval_samples_per_second": 71.838, "eval_steps_per_second": 9.002, "step": 732 }, { "epoch": 7.0, "grad_norm": 0.5025161504745483, "learning_rate": 3.2500000000000004e-05, "loss": 0.2329, "step": 854 }, { "epoch": 7.0, "eval_accuracy": 0.8847117794486216, "eval_f1": 0.8642214594306682, "eval_loss": 0.28597915172576904, "eval_precision": 0.8566755442334414, "eval_recall": 0.8734315330060011, "eval_runtime": 5.5629, "eval_samples_per_second": 71.725, "eval_steps_per_second": 8.988, "step": 854 }, { "epoch": 8.0, "grad_norm": 2.4278862476348877, "learning_rate": 3e-05, "loss": 0.199, "step": 976 }, { "epoch": 8.0, "eval_accuracy": 0.8872180451127819, "eval_f1": 0.8622036668943447, "eval_loss": 0.2878971993923187, "eval_precision": 0.8671602787456446, "eval_recall": 0.8577014002545917, "eval_runtime": 5.6077, "eval_samples_per_second": 71.152, "eval_steps_per_second": 8.916, "step": 976 }, { "epoch": 9.0, "grad_norm": 8.504670143127441, "learning_rate": 2.7500000000000004e-05, "loss": 0.1939, "step": 1098 }, { "epoch": 9.0, "eval_accuracy": 0.8897243107769424, "eval_f1": 0.8676337535436396, "eval_loss": 0.28258949518203735, "eval_precision": 0.8658613445378152, "eval_recall": 0.8694762684124386, "eval_runtime": 5.5938, "eval_samples_per_second": 71.329, "eval_steps_per_second": 8.939, "step": 1098 }, { "epoch": 10.0, "grad_norm": 2.462061882019043, "learning_rate": 2.5e-05, "loss": 0.1806, "step": 1220 }, { "epoch": 10.0, "eval_accuracy": 0.8796992481203008, "eval_f1": 0.8439374185136896, "eval_loss": 0.2981988787651062, "eval_precision": 0.8794955044955045, "eval_recall": 0.822376795781051, "eval_runtime": 5.5576, "eval_samples_per_second": 71.793, "eval_steps_per_second": 8.997, "step": 1220 }, { "epoch": 11.0, "grad_norm": 1.0077548027038574, "learning_rate": 2.25e-05, "loss": 0.1674, "step": 1342 }, { "epoch": 11.0, "eval_accuracy": 0.8947368421052632, "eval_f1": 0.8730223677032187, "eval_loss": 0.2734816372394562, "eval_precision": 0.8730223677032187, "eval_recall": 0.8730223677032187, "eval_runtime": 5.5924, "eval_samples_per_second": 71.346, "eval_steps_per_second": 8.941, "step": 1342 }, { "epoch": 12.0, "grad_norm": 3.9673709869384766, "learning_rate": 2e-05, "loss": 0.1553, "step": 1464 }, { "epoch": 12.0, "eval_accuracy": 0.8947368421052632, "eval_f1": 0.8717238211879976, "eval_loss": 0.2753015458583832, "eval_precision": 0.8757194133300328, "eval_recall": 0.8680214584469903, "eval_runtime": 5.5661, "eval_samples_per_second": 71.684, "eval_steps_per_second": 8.983, "step": 1464 }, { "epoch": 13.0, "grad_norm": 3.968949794769287, "learning_rate": 1.75e-05, "loss": 0.1431, "step": 1586 }, { "epoch": 13.0, "eval_accuracy": 0.8922305764411027, "eval_f1": 0.8661961395983623, "eval_loss": 0.2937251627445221, "eval_precision": 0.8784532165625604, "eval_recall": 0.8562465902891435, "eval_runtime": 5.5699, "eval_samples_per_second": 71.635, "eval_steps_per_second": 8.977, "step": 1586 }, { "epoch": 14.0, "grad_norm": 8.566404342651367, "learning_rate": 1.5e-05, "loss": 0.1417, "step": 1708 }, { "epoch": 14.0, "eval_accuracy": 0.9072681704260651, "eval_f1": 0.8910359080340997, "eval_loss": 0.29110613465309143, "eval_precision": 0.8822647601476015, "eval_recall": 0.9018912529550827, "eval_runtime": 5.5858, "eval_samples_per_second": 71.432, "eval_steps_per_second": 8.951, "step": 1708 }, { "epoch": 15.0, "grad_norm": 0.1758796125650406, "learning_rate": 1.25e-05, "loss": 0.1236, "step": 1830 }, { "epoch": 15.0, "eval_accuracy": 0.9022556390977443, "eval_f1": 0.8817957385392532, "eval_loss": 0.2955999970436096, "eval_precision": 0.8827677592299257, "eval_recall": 0.8808419712675032, "eval_runtime": 5.6052, "eval_samples_per_second": 71.184, "eval_steps_per_second": 8.92, "step": 1830 }, { "epoch": 16.0, "grad_norm": 0.7015694975852966, "learning_rate": 1e-05, "loss": 0.1304, "step": 1952 }, { "epoch": 16.0, "eval_accuracy": 0.9022556390977443, "eval_f1": 0.884617951284618, "eval_loss": 0.3010990023612976, "eval_precision": 0.8772893772893773, "eval_recall": 0.8933442444080741, "eval_runtime": 5.6368, "eval_samples_per_second": 70.785, "eval_steps_per_second": 8.87, "step": 1952 }, { "epoch": 17.0, "grad_norm": 0.19915825128555298, "learning_rate": 7.5e-06, "loss": 0.1164, "step": 2074 }, { "epoch": 17.0, "eval_accuracy": 0.899749373433584, "eval_f1": 0.879667048676036, "eval_loss": 0.29428762197494507, "eval_precision": 0.8778361344537815, "eval_recall": 0.8815693762502272, "eval_runtime": 5.5793, "eval_samples_per_second": 71.514, "eval_steps_per_second": 8.962, "step": 2074 }, { "epoch": 18.0, "grad_norm": 9.03445816040039, "learning_rate": 5e-06, "loss": 0.1144, "step": 2196 }, { "epoch": 18.0, "eval_accuracy": 0.8972431077694235, "eval_f1": 0.8775533117267087, "eval_loss": 0.2937219738960266, "eval_precision": 0.873246730188791, "eval_recall": 0.8822967812329514, "eval_runtime": 5.5648, "eval_samples_per_second": 71.7, "eval_steps_per_second": 8.985, "step": 2196 }, { "epoch": 19.0, "grad_norm": 5.45957612991333, "learning_rate": 2.5e-06, "loss": 0.1198, "step": 2318 }, { "epoch": 19.0, "eval_accuracy": 0.8972431077694235, "eval_f1": 0.8737897035111135, "eval_loss": 0.29848915338516235, "eval_precision": 0.8812047813777917, "eval_recall": 0.8672940534642661, "eval_runtime": 5.5642, "eval_samples_per_second": 71.709, "eval_steps_per_second": 8.986, "step": 2318 }, { "epoch": 20.0, "grad_norm": 3.5000033378601074, "learning_rate": 0.0, "loss": 0.1104, "step": 2440 }, { "epoch": 20.0, "eval_accuracy": 0.9022556390977443, "eval_f1": 0.8811928811928812, "eval_loss": 0.29284632205963135, "eval_precision": 0.8842105263157894, "eval_recall": 0.878341516639389, "eval_runtime": 5.5868, "eval_samples_per_second": 71.418, "eval_steps_per_second": 8.95, "step": 2440 }, { "epoch": 20.0, "step": 2440, "total_flos": 8551203605328000.0, "train_loss": 0.2130153269064231, "train_runtime": 2280.4037, "train_samples_per_second": 31.907, "train_steps_per_second": 1.07 } ], "logging_steps": 500, "max_steps": 2440, "num_input_tokens_seen": 0, "num_train_epochs": 20, "save_steps": 500, "total_flos": 8551203605328000.0, "train_batch_size": 30, "trial_name": null, "trial_params": null }