{ "best_metric": null, "best_model_checkpoint": null, "epoch": 1.8094089264173703, "eval_steps": 500, "global_step": 1500, "is_hyper_param_search": false, "is_local_process_zero": true, "is_world_process_zero": true, "log_history": [ { "epoch": 0.6031363088057901, "grad_norm": 1.831475853919983, "learning_rate": 1.3968636911942099e-05, "loss": 2.0688, "step": 500 }, { "epoch": 1.0, "eval_gen_len": 16.3908, "eval_loss": 1.8676584959030151, "eval_rouge1": 39.017, "eval_rouge2": 16.1999, "eval_rougeL": 32.1156, "eval_rougeLsum": 35.7767, "eval_runtime": 165.5803, "eval_samples_per_second": 8.902, "eval_steps_per_second": 0.562, "step": 829 }, { "epoch": 1.2062726176115803, "grad_norm": 2.035099744796753, "learning_rate": 7.937273823884198e-06, "loss": 2.0394, "step": 1000 }, { "epoch": 1.8094089264173703, "grad_norm": 2.077877998352051, "learning_rate": 1.9059107358262969e-06, "loss": 2.0146, "step": 1500 } ], "logging_steps": 500, "max_steps": 1658, "num_input_tokens_seen": 0, "num_train_epochs": 2, "save_steps": 500, "total_flos": 3137860544888832.0, "train_batch_size": 16, "trial_name": null, "trial_params": null }