{ "best_metric": null, "best_model_checkpoint": null, "epoch": 2.2248995983935744, "global_step": 69, "is_hyper_param_search": false, "is_local_process_zero": true, "is_world_process_zero": true, "log_history": [ { "epoch": 0.13, "learning_rate": 0.0008709677419354839, "loss": 2.7331, "step": 4 }, { "epoch": 0.26, "learning_rate": 0.0007419354838709678, "loss": 2.6157, "step": 8 }, { "epoch": 0.39, "learning_rate": 0.0006129032258064516, "loss": 2.566, "step": 12 }, { "epoch": 0.51, "learning_rate": 0.0004838709677419355, "loss": 2.48, "step": 16 }, { "epoch": 0.64, "learning_rate": 0.0003548387096774194, "loss": 2.3692, "step": 20 }, { "epoch": 0.77, "learning_rate": 0.00022580645161290321, "loss": 2.2791, "step": 24 }, { "epoch": 0.9, "learning_rate": 0.0006989247311827958, "loss": 2.2783, "step": 28 }, { "epoch": 1.0, "eval_gen_len": 20.55, "eval_loss": 2.021012306213379, "eval_rouge1": 32.3793, "eval_rouge2": 12.6427, "eval_rougeL": 27.4027, "eval_rougeLsum": 27.3, "eval_runtime": 3629.2928, "eval_samples_per_second": 0.028, "eval_steps_per_second": 0.009, "step": 31 }, { "epoch": 1.03, "learning_rate": 0.0006559139784946236, "loss": 2.2677, "step": 32 }, { "epoch": 1.16, "learning_rate": 0.0006129032258064516, "loss": 1.8913, "step": 36 }, { "epoch": 1.29, "learning_rate": 0.0005698924731182796, "loss": 1.9234, "step": 40 }, { "epoch": 1.42, "learning_rate": 0.0005268817204301075, "loss": 1.8524, "step": 44 }, { "epoch": 1.55, "learning_rate": 0.0004838709677419355, "loss": 1.8549, "step": 48 }, { "epoch": 1.67, "learning_rate": 0.00044086021505376343, "loss": 1.9571, "step": 52 }, { "epoch": 1.8, "learning_rate": 0.0003978494623655914, "loss": 1.8606, "step": 56 }, { "epoch": 1.93, "learning_rate": 0.0003548387096774194, "loss": 1.791, "step": 60 }, { "epoch": 2.0, "eval_gen_len": 21.6, "eval_loss": 1.9703446626663208, "eval_rouge1": 34.5495, "eval_rouge2": 12.6789, "eval_rougeL": 28.3741, "eval_rougeLsum": 28.4386, "eval_runtime": 3780.972, "eval_samples_per_second": 0.026, "eval_steps_per_second": 0.009, "step": 62 }, { "epoch": 2.06, "learning_rate": 0.0003118279569892473, "loss": 1.7687, "step": 64 }, { "epoch": 2.19, "learning_rate": 0.00026881720430107527, "loss": 1.5525, "step": 68 } ], "max_steps": 93, "num_train_epochs": 3, "total_flos": 1.813836532618199e+17, "trial_name": null, "trial_params": null }