[ { "loss": 0.9641, "grad_norm": 12.902933120727539, "learning_rate": 1.8e-05, "epoch": 1.0, "step": 250 }, { "eval_loss": 0.6194158792495728, "eval_accuracy": 0.792, "eval_f1_score": 0.7818567350710909, "eval_runtime": 4.3481, "eval_samples_per_second": 459.97, "eval_steps_per_second": 7.36, "epoch": 1.0, "step": 250 }, { "loss": 0.4398, "grad_norm": 8.209343910217285, "learning_rate": 1.6000000000000003e-05, "epoch": 2.0, "step": 500 }, { "eval_loss": 0.33885374665260315, "eval_accuracy": 0.883, "eval_f1_score": 0.8825473908240207, "eval_runtime": 4.3879, "eval_samples_per_second": 455.804, "eval_steps_per_second": 7.293, "epoch": 2.0, "step": 500 }, { "loss": 0.258, "grad_norm": 7.1059184074401855, "learning_rate": 1.4e-05, "epoch": 3.0, "step": 750 }, { "eval_loss": 0.2948116958141327, "eval_accuracy": 0.8945, "eval_f1_score": 0.8950761079856652, "eval_runtime": 4.3338, "eval_samples_per_second": 461.493, "eval_steps_per_second": 7.384, "epoch": 3.0, "step": 750 }, { "loss": 0.1744, "grad_norm": 14.858270645141602, "learning_rate": 1.2e-05, "epoch": 4.0, "step": 1000 }, { "eval_loss": 0.28408634662628174, "eval_accuracy": 0.9035, "eval_f1_score": 0.9038343567745306, "eval_runtime": 4.2528, "eval_samples_per_second": 470.273, "eval_steps_per_second": 7.524, "epoch": 4.0, "step": 1000 }, { "loss": 0.132, "grad_norm": 10.98675537109375, "learning_rate": 1e-05, "epoch": 5.0, "step": 1250 }, { "eval_loss": 0.29371485114097595, "eval_accuracy": 0.8985, "eval_f1_score": 0.8982752344158218, "eval_runtime": 4.368, "eval_samples_per_second": 457.874, "eval_steps_per_second": 7.326, "epoch": 5.0, "step": 1250 }, { "loss": 0.1078, "grad_norm": 1.496416449546814, "learning_rate": 8.000000000000001e-06, "epoch": 6.0, "step": 1500 }, { "eval_loss": 0.27695581316947937, "eval_accuracy": 0.9055, "eval_f1_score": 0.9053682374738501, "eval_runtime": 4.2619, "eval_samples_per_second": 469.274, "eval_steps_per_second": 7.508, "epoch": 6.0, "step": 1500 }, { "loss": 0.0888, "grad_norm": 2.4278478622436523, "learning_rate": 6e-06, "epoch": 7.0, "step": 1750 }, { "eval_loss": 0.3017047643661499, "eval_accuracy": 0.903, "eval_f1_score": 0.9027803797533116, "eval_runtime": 4.3556, "eval_samples_per_second": 459.177, "eval_steps_per_second": 7.347, "epoch": 7.0, "step": 1750 }, { "loss": 0.0739, "grad_norm": 2.9510045051574707, "learning_rate": 4.000000000000001e-06, "epoch": 8.0, "step": 2000 }, { "eval_loss": 0.28290167450904846, "eval_accuracy": 0.9095, "eval_f1_score": 0.9096314060750578, "eval_runtime": 4.3333, "eval_samples_per_second": 461.542, "eval_steps_per_second": 7.385, "epoch": 8.0, "step": 2000 }, { "loss": 0.0611, "grad_norm": 9.680631637573242, "learning_rate": 2.0000000000000003e-06, "epoch": 9.0, "step": 2250 }, { "eval_loss": 0.306204229593277, "eval_accuracy": 0.91, "eval_f1_score": 0.9101550513849552, "eval_runtime": 4.3144, "eval_samples_per_second": 463.564, "eval_steps_per_second": 7.417, "epoch": 9.0, "step": 2250 }, { "loss": 0.0506, "grad_norm": 3.266329050064087, "learning_rate": 0.0, "epoch": 10.0, "step": 2500 }, { "eval_loss": 0.3068402111530304, "eval_accuracy": 0.9085, "eval_f1_score": 0.9086081968965166, "eval_runtime": 4.2835, "eval_samples_per_second": 466.913, "eval_steps_per_second": 7.471, "epoch": 10.0, "step": 2500 }, { "train_runtime": 1334.5966, "train_samples_per_second": 119.886, "train_steps_per_second": 1.873, "total_flos": 3891507413760000.0, "train_loss": 0.23506236724853516, "epoch": 10.0, "step": 2500 }, { "eval_loss": 0.3068402111530304, "eval_accuracy": 0.9085, "eval_f1_score": 0.9086081968965166, "eval_runtime": 4.7892, "eval_samples_per_second": 417.606, "eval_steps_per_second": 6.682, "epoch": 10.0, "step": 2500 } ]