{ "best_metric": 1.0628538131713867, "best_model_checkpoint": "bygpt5_quatags_con/medium/en/checkpoint-90832", "epoch": 7.999955964595535, "global_step": 90832, "is_hyper_param_search": false, "is_local_process_zero": true, "is_world_process_zero": true, "log_history": [ { "epoch": 1.0, "learning_rate": 3.948830567446131e-05, "loss": 1.2434, "step": 11354 }, { "epoch": 1.0, "eval_loss": 1.1300886869430542, "eval_runtime": 441.3582, "eval_samples_per_second": 45.736, "eval_steps_per_second": 5.719, "step": 11354 }, { "epoch": 2.0, "learning_rate": 3.704453340788571e-05, "loss": 1.1308, "step": 22708 }, { "epoch": 2.0, "eval_loss": 1.1001691818237305, "eval_runtime": 440.6446, "eval_samples_per_second": 45.81, "eval_steps_per_second": 5.728, "step": 22708 }, { "epoch": 3.0, "learning_rate": 3.282841708036157e-05, "loss": 1.0962, "step": 34062 }, { "epoch": 3.0, "eval_loss": 1.082809329032898, "eval_runtime": 440.424, "eval_samples_per_second": 45.833, "eval_steps_per_second": 5.731, "step": 34062 }, { "epoch": 4.0, "learning_rate": 2.727836169568317e-05, "loss": 1.0715, "step": 45416 }, { "epoch": 4.0, "eval_loss": 1.073394536972046, "eval_runtime": 440.7618, "eval_samples_per_second": 45.798, "eval_steps_per_second": 5.726, "step": 45416 }, { "epoch": 5.0, "learning_rate": 2.0971479426039337e-05, "loss": 1.0522, "step": 56770 }, { "epoch": 5.0, "eval_loss": 1.0667953491210938, "eval_runtime": 440.157, "eval_samples_per_second": 45.861, "eval_steps_per_second": 5.734, "step": 56770 }, { "epoch": 6.0, "learning_rate": 1.4563579677660773e-05, "loss": 1.0372, "step": 68124 }, { "epoch": 6.0, "eval_loss": 1.0641292333602905, "eval_runtime": 439.9553, "eval_samples_per_second": 45.882, "eval_steps_per_second": 5.737, "step": 68124 }, { "epoch": 7.0, "learning_rate": 8.720975971159198e-06, "loss": 1.0258, "step": 79478 }, { "epoch": 7.0, "eval_loss": 1.0637840032577515, "eval_runtime": 440.5762, "eval_samples_per_second": 45.817, "eval_steps_per_second": 5.729, "step": 79478 }, { "epoch": 8.0, "learning_rate": 4.051200571096512e-06, "loss": 1.0178, "step": 90832 }, { "epoch": 8.0, "eval_loss": 1.0628538131713867, "eval_runtime": 440.6394, "eval_samples_per_second": 45.811, "eval_steps_per_second": 5.728, "step": 90832 } ], "max_steps": 113540, "num_train_epochs": 10, "total_flos": 5.170056667435008e+17, "trial_name": null, "trial_params": null }