{ "best_metric": null, "best_model_checkpoint": null, "epoch": 9.6, "eval_steps": 75, "global_step": 6000, "is_hyper_param_search": false, "is_local_process_zero": true, "is_world_process_zero": true, "log_history": [ { "epoch": 1.0, "grad_norm": 1.6603230237960815, "learning_rate": 0.00020833333333333335, "loss": 2.7781, "step": 625 }, { "epoch": 1.0, "eval_loss": 1.839109182357788, "eval_rouge1": 28.5024, "eval_rouge2": 11.2717, "eval_rougeL": 22.108, "eval_rougeLsum": 22.4361, "eval_runtime": 722.0927, "eval_samples_per_second": 13.849, "eval_steps_per_second": 0.866, "step": 625 }, { "epoch": 2.0, "grad_norm": 1.18350088596344, "learning_rate": 0.0004166666666666667, "loss": 2.0622, "step": 1250 }, { "epoch": 2.0, "eval_loss": 1.7575578689575195, "eval_rouge1": 28.0245, "eval_rouge2": 10.6112, "eval_rougeL": 21.7353, "eval_rougeLsum": 22.0685, "eval_runtime": 728.2015, "eval_samples_per_second": 13.732, "eval_steps_per_second": 0.858, "step": 1250 }, { "epoch": 3.0, "grad_norm": 0.9635187983512878, "learning_rate": 0.000625, "loss": 1.8636, "step": 1875 }, { "epoch": 3.0, "eval_loss": 1.617906928062439, "eval_rouge1": 27.353, "eval_rouge2": 10.6238, "eval_rougeL": 21.4686, "eval_rougeLsum": 21.7512, "eval_runtime": 730.8407, "eval_samples_per_second": 13.683, "eval_steps_per_second": 0.855, "step": 1875 }, { "epoch": 4.0, "grad_norm": 1.1745034456253052, "learning_rate": 0.0008333333333333334, "loss": 1.7408, "step": 2500 }, { "epoch": 4.0, "eval_loss": 1.6142878532409668, "eval_rouge1": 28.0928, "eval_rouge2": 11.2857, "eval_rougeL": 22.06, "eval_rougeLsum": 22.3629, "eval_runtime": 802.3401, "eval_samples_per_second": 12.464, "eval_steps_per_second": 0.779, "step": 2500 }, { "epoch": 5.0, "grad_norm": 0.7837355136871338, "learning_rate": 0.0009615384615384616, "loss": 1.6492, "step": 3125 }, { "epoch": 5.0, "eval_loss": 1.5411357879638672, "eval_rouge1": 27.8209, "eval_rouge2": 10.9184, "eval_rougeL": 21.6819, "eval_rougeLsum": 21.9773, "eval_runtime": 711.0964, "eval_samples_per_second": 14.063, "eval_steps_per_second": 0.879, "step": 3125 }, { "epoch": 6.0, "grad_norm": 0.5993546843528748, "learning_rate": 0.0007692307692307693, "loss": 1.5448, "step": 3750 }, { "epoch": 6.0, "eval_loss": 1.4802035093307495, "eval_rouge1": 28.0433, "eval_rouge2": 11.4232, "eval_rougeL": 22.0696, "eval_rougeLsum": 22.373, "eval_runtime": 728.4308, "eval_samples_per_second": 13.728, "eval_steps_per_second": 0.858, "step": 3750 }, { "epoch": 7.0, "grad_norm": 0.7141011357307434, "learning_rate": 0.0005769230769230769, "loss": 1.4454, "step": 4375 }, { "epoch": 7.0, "eval_loss": 1.462142825126648, "eval_rouge1": 27.8552, "eval_rouge2": 11.1708, "eval_rougeL": 21.8958, "eval_rougeLsum": 22.1949, "eval_runtime": 640.7723, "eval_samples_per_second": 15.606, "eval_steps_per_second": 0.975, "step": 4375 }, { "epoch": 8.0, "grad_norm": 0.6400988101959229, "learning_rate": 0.00038461538461538467, "loss": 1.3636, "step": 5000 }, { "epoch": 8.0, "eval_loss": 1.4522408246994019, "eval_rouge1": 28.3264, "eval_rouge2": 11.7945, "eval_rougeL": 22.3563, "eval_rougeLsum": 22.6524, "eval_runtime": 715.9922, "eval_samples_per_second": 13.967, "eval_steps_per_second": 0.873, "step": 5000 }, { "epoch": 9.0, "grad_norm": 0.6958301067352295, "learning_rate": 0.00019230769230769233, "loss": 1.2978, "step": 5625 }, { "epoch": 9.0, "eval_loss": 1.4346853494644165, "eval_rouge1": 28.444, "eval_rouge2": 11.9388, "eval_rougeL": 22.4279, "eval_rougeLsum": 22.7344, "eval_runtime": 652.829, "eval_samples_per_second": 15.318, "eval_steps_per_second": 0.957, "step": 5625 } ], "logging_steps": 625, "max_steps": 6250, "num_input_tokens_seen": 0, "num_train_epochs": 10, "save_steps": 500, "stateful_callbacks": { "TrainerControl": { "args": { "should_epoch_stop": false, "should_evaluate": false, "should_log": false, "should_save": true, "should_training_stop": false }, "attributes": {} } }, "total_flos": 5.075997032448e+16, "train_batch_size": 16, "trial_name": null, "trial_params": null }