{ "best_metric": 0.3033078610897064, "best_model_checkpoint": "weights/checkpoint-2400", "epoch": 2.05477107402653, "global_step": 2400, "is_hyper_param_search": false, "is_local_process_zero": true, "is_world_process_zero": true, "log_history": [ { "epoch": 0.34, "learning_rate": 0.0008858447488584475, "loss": 1.0868, "step": 400 }, { "epoch": 0.34, "eval_loss": 0.3964288830757141, "eval_rouge1": 0.1061059158446104, "eval_rouge2": 0.03465244917613476, "eval_rougeL": 0.10627894663827464, "eval_rougeLsum": 0.10587803730968609, "eval_runtime": 213.0846, "eval_samples_per_second": 11.667, "eval_steps_per_second": 1.46, "step": 400 }, { "epoch": 0.68, "learning_rate": 0.000771689497716895, "loss": 0.4378, "step": 800 }, { "epoch": 0.68, "eval_loss": 0.3599437475204468, "eval_rouge1": 0.10699235686427112, "eval_rouge2": 0.034402581194616595, "eval_rougeL": 0.10726814102163426, "eval_rougeLsum": 0.10683004020509843, "eval_runtime": 209.0997, "eval_samples_per_second": 11.889, "eval_steps_per_second": 1.487, "step": 800 }, { "epoch": 1.03, "learning_rate": 0.0006575342465753425, "loss": 0.3695, "step": 1200 }, { "epoch": 1.03, "eval_loss": 0.3147580027580261, "eval_rouge1": 0.10717208698017014, "eval_rouge2": 0.03477288244690299, "eval_rougeL": 0.10748345500504963, "eval_rougeLsum": 0.10712451901224819, "eval_runtime": 208.2509, "eval_samples_per_second": 11.938, "eval_steps_per_second": 1.493, "step": 1200 }, { "epoch": 1.37, "learning_rate": 0.00054337899543379, "loss": 0.2645, "step": 1600 }, { "epoch": 1.37, "eval_loss": 0.31476476788520813, "eval_rouge1": 0.10597239490642546, "eval_rouge2": 0.033266358974880464, "eval_rougeL": 0.10614402559092825, "eval_rougeLsum": 0.10592368490095762, "eval_runtime": 208.6832, "eval_samples_per_second": 11.913, "eval_steps_per_second": 1.49, "step": 1600 }, { "epoch": 1.71, "learning_rate": 0.00042922374429223744, "loss": 0.2503, "step": 2000 }, { "epoch": 1.71, "eval_loss": 0.3020365834236145, "eval_rouge1": 0.10566329888936485, "eval_rouge2": 0.03209767038592902, "eval_rougeL": 0.10595888780924984, "eval_rougeLsum": 0.10570986122575018, "eval_runtime": 207.8626, "eval_samples_per_second": 11.96, "eval_steps_per_second": 1.496, "step": 2000 }, { "epoch": 2.05, "learning_rate": 0.00031506849315068495, "loss": 0.2264, "step": 2400 }, { "epoch": 2.05, "eval_loss": 0.3033078610897064, "eval_rouge1": 0.10519725760071695, "eval_rouge2": 0.03199324082356017, "eval_rougeL": 0.10550921669284499, "eval_rougeLsum": 0.10514026310687613, "eval_runtime": 207.427, "eval_samples_per_second": 11.985, "eval_steps_per_second": 1.499, "step": 2400 } ], "max_steps": 3504, "num_train_epochs": 3, "total_flos": 5666207247866880.0, "trial_name": null, "trial_params": null }