{ "best_metric": null, "best_model_checkpoint": null, "epoch": 19.997442455242968, "global_step": 3900, "is_hyper_param_search": false, "is_local_process_zero": true, "is_world_process_zero": true, "log_history": [ { "epoch": 1.0, "eval_accuracy": 0.9110807180404663, "eval_loss": 0.2603251338005066, "eval_runtime": 3.7587, "eval_samples_per_second": 194.484, "eval_steps_per_second": 24.477, "step": 195 }, { "epoch": 2.0, "eval_accuracy": 0.9179206490516663, "eval_loss": 0.2944748103618622, "eval_runtime": 3.8108, "eval_samples_per_second": 191.821, "eval_steps_per_second": 24.142, "step": 390 }, { "epoch": 2.56, "learning_rate": 3.282996735800617e-05, "loss": 0.1807, "step": 500 }, { "epoch": 3.0, "eval_accuracy": 0.9083447456359863, "eval_loss": 0.43401873111724854, "eval_runtime": 3.7691, "eval_samples_per_second": 193.947, "eval_steps_per_second": 24.409, "step": 585 }, { "epoch": 4.0, "eval_accuracy": 0.9124487042427063, "eval_loss": 0.39779114723205566, "eval_runtime": 3.7531, "eval_samples_per_second": 194.771, "eval_steps_per_second": 24.513, "step": 780 }, { "epoch": 5.0, "eval_accuracy": 0.9042407870292664, "eval_loss": 0.6419674754142761, "eval_runtime": 3.6686, "eval_samples_per_second": 199.26, "eval_steps_per_second": 25.078, "step": 975 }, { "epoch": 5.13, "learning_rate": 2.800203098182879e-05, "loss": 0.0523, "step": 1000 }, { "epoch": 6.0, "eval_accuracy": 0.9069767594337463, "eval_loss": 0.6490611433982849, "eval_runtime": 3.596, "eval_samples_per_second": 203.28, "eval_steps_per_second": 25.584, "step": 1170 }, { "epoch": 7.0, "eval_accuracy": 0.9124487042427063, "eval_loss": 0.6302383542060852, "eval_runtime": 3.7728, "eval_samples_per_second": 193.754, "eval_steps_per_second": 24.385, "step": 1365 }, { "epoch": 7.69, "learning_rate": 2.3174094605651413e-05, "loss": 0.0146, "step": 1500 }, { "epoch": 8.0, "eval_accuracy": 0.9097127318382263, "eval_loss": 0.728517472743988, "eval_runtime": 3.574, "eval_samples_per_second": 204.533, "eval_steps_per_second": 25.742, "step": 1560 }, { "epoch": 9.0, "eval_accuracy": 0.9206566214561462, "eval_loss": 0.6614168882369995, "eval_runtime": 3.5326, "eval_samples_per_second": 206.93, "eval_steps_per_second": 26.043, "step": 1755 }, { "epoch": 10.0, "eval_accuracy": 0.9151846766471863, "eval_loss": 0.7393134236335754, "eval_runtime": 3.6341, "eval_samples_per_second": 201.15, "eval_steps_per_second": 25.316, "step": 1950 }, { "epoch": 10.26, "learning_rate": 1.8346158229474035e-05, "loss": 0.0063, "step": 2000 }, { "epoch": 11.0, "eval_accuracy": 0.9165526628494263, "eval_loss": 0.7556606531143188, "eval_runtime": 3.6289, "eval_samples_per_second": 201.437, "eval_steps_per_second": 25.352, "step": 2145 }, { "epoch": 12.0, "eval_accuracy": 0.9151846766471863, "eval_loss": 0.7709615230560303, "eval_runtime": 3.5866, "eval_samples_per_second": 203.815, "eval_steps_per_second": 25.651, "step": 2340 }, { "epoch": 12.82, "learning_rate": 1.3518221853296657e-05, "loss": 0.0037, "step": 2500 }, { "epoch": 13.0, "eval_accuracy": 0.9192886352539062, "eval_loss": 0.7879533171653748, "eval_runtime": 3.5996, "eval_samples_per_second": 203.079, "eval_steps_per_second": 25.558, "step": 2535 }, { "epoch": 14.0, "eval_accuracy": 0.9124487042427063, "eval_loss": 0.8315412402153015, "eval_runtime": 3.763, "eval_samples_per_second": 194.257, "eval_steps_per_second": 24.448, "step": 2730 }, { "epoch": 15.0, "eval_accuracy": 0.9179206490516663, "eval_loss": 0.8191958069801331, "eval_runtime": 3.6714, "eval_samples_per_second": 199.108, "eval_steps_per_second": 25.059, "step": 2925 }, { "epoch": 15.38, "learning_rate": 8.690285477119281e-06, "loss": 0.0001, "step": 3000 }, { "epoch": 16.0, "eval_accuracy": 0.9192886352539062, "eval_loss": 0.8122023940086365, "eval_runtime": 3.7565, "eval_samples_per_second": 194.598, "eval_steps_per_second": 24.491, "step": 3120 }, { "epoch": 17.0, "eval_accuracy": 0.9179206490516663, "eval_loss": 0.8226170539855957, "eval_runtime": 3.6589, "eval_samples_per_second": 199.788, "eval_steps_per_second": 25.144, "step": 3315 }, { "epoch": 17.95, "learning_rate": 3.862349100941902e-06, "loss": 0.0001, "step": 3500 }, { "epoch": 18.0, "eval_accuracy": 0.9179206490516663, "eval_loss": 0.8273718953132629, "eval_runtime": 3.7025, "eval_samples_per_second": 197.433, "eval_steps_per_second": 24.848, "step": 3510 }, { "epoch": 19.0, "eval_accuracy": 0.9179206490516663, "eval_loss": 0.8303136825561523, "eval_runtime": 3.6587, "eval_samples_per_second": 199.797, "eval_steps_per_second": 25.145, "step": 3705 }, { "epoch": 20.0, "eval_accuracy": 0.9179206490516663, "eval_loss": 0.8313552737236023, "eval_runtime": 3.6572, "eval_samples_per_second": 199.879, "eval_steps_per_second": 25.156, "step": 3900 }, { "epoch": 20.0, "step": 3900, "total_flos": 4114530683719680.0, "train_loss": 0.033037299305582656, "train_runtime": 1549.9583, "train_samples_per_second": 40.362, "train_steps_per_second": 2.516 } ], "max_steps": 3900, "num_train_epochs": 20, "total_flos": 4114530683719680.0, "trial_name": null, "trial_params": null }