{ "best_metric": null, "best_model_checkpoint": null, "epoch": 1.0, "eval_steps": 397, "global_step": 2384, "is_hyper_param_search": false, "is_local_process_zero": true, "is_world_process_zero": true, "log_history": [ { "epoch": 0.17, "eval_gen_len": 81.6102, "eval_loss": 1.3396695852279663, "eval_rouge1": 52.6908, "eval_rouge2": 34.3367, "eval_rougeL": 43.9351, "eval_rougeLsum": 44.0153, "eval_runtime": 41.4692, "eval_samples_per_second": 1.423, "eval_steps_per_second": 0.723, "step": 397 }, { "epoch": 0.21, "grad_norm": 6.220447063446045, "learning_rate": 1.895763422818792e-05, "loss": 0.719, "step": 500 }, { "epoch": 0.33, "eval_gen_len": 84.2203, "eval_loss": 1.363059639930725, "eval_rouge1": 54.543, "eval_rouge2": 36.4199, "eval_rougeL": 45.8273, "eval_rougeLsum": 45.7925, "eval_runtime": 42.2281, "eval_samples_per_second": 1.397, "eval_steps_per_second": 0.71, "step": 794 }, { "epoch": 0.42, "grad_norm": 8.490495681762695, "learning_rate": 1.7908976510067115e-05, "loss": 0.7459, "step": 1000 }, { "epoch": 0.5, "eval_gen_len": 85.2373, "eval_loss": 1.3582559823989868, "eval_rouge1": 53.2262, "eval_rouge2": 34.8889, "eval_rougeL": 44.1043, "eval_rougeLsum": 44.0998, "eval_runtime": 42.6328, "eval_samples_per_second": 1.384, "eval_steps_per_second": 0.704, "step": 1191 }, { "epoch": 0.63, "grad_norm": 6.794929027557373, "learning_rate": 1.686031879194631e-05, "loss": 0.7154, "step": 1500 }, { "epoch": 0.67, "eval_gen_len": 83.7797, "eval_loss": 1.3886514902114868, "eval_rouge1": 54.9928, "eval_rouge2": 37.1125, "eval_rougeL": 46.4105, "eval_rougeLsum": 46.4044, "eval_runtime": 42.3464, "eval_samples_per_second": 1.393, "eval_steps_per_second": 0.708, "step": 1588 }, { "epoch": 0.83, "eval_gen_len": 85.8814, "eval_loss": 1.3405011892318726, "eval_rouge1": 52.5543, "eval_rouge2": 33.702, "eval_rougeL": 42.9428, "eval_rougeLsum": 43.0015, "eval_runtime": 43.1199, "eval_samples_per_second": 1.368, "eval_steps_per_second": 0.696, "step": 1985 }, { "epoch": 0.84, "grad_norm": 8.691970825195312, "learning_rate": 1.5811661073825504e-05, "loss": 0.7507, "step": 2000 }, { "epoch": 1.0, "eval_gen_len": 81.7797, "eval_loss": 1.3399206399917603, "eval_rouge1": 52.4327, "eval_rouge2": 34.1158, "eval_rougeL": 43.2742, "eval_rougeLsum": 43.1693, "eval_runtime": 41.935, "eval_samples_per_second": 1.407, "eval_steps_per_second": 0.715, "step": 2382 } ], "logging_steps": 500, "max_steps": 9536, "num_input_tokens_seen": 0, "num_train_epochs": 4, "save_steps": 1192, "total_flos": 5.17811143698432e+16, "train_batch_size": 2, "trial_name": null, "trial_params": null }