{ "best_metric": null, "best_model_checkpoint": null, "epoch": 4.0, "eval_steps": 500, "global_step": 2500, "is_hyper_param_search": false, "is_local_process_zero": true, "is_world_process_zero": true, "log_history": [ { "epoch": 0.8, "learning_rate": 1.6000000000000003e-05, "loss": 2.0887, "step": 500 }, { "epoch": 1.0, "eval_f1": 0.9023, "eval_gen_len": 31.24818181818182, "eval_loss": 1.7361507415771484, "eval_precision": 0.9035, "eval_recall": 0.9015, "eval_rouge1": 0.4326, "eval_rouge2": 0.1871, "eval_rougeL": 0.3375, "eval_rougeLsum": 0.3373, "eval_runtime": 386.0456, "eval_samples_per_second": 2.849, "eval_steps_per_second": 0.357, "step": 625 }, { "epoch": 1.6, "learning_rate": 1.2e-05, "loss": 1.8362, "step": 1000 }, { "epoch": 2.0, "eval_f1": 0.905, "eval_gen_len": 30.303636363636365, "eval_loss": 1.6843606233596802, "eval_precision": 0.9071, "eval_recall": 0.9032, "eval_rouge1": 0.4466, "eval_rouge2": 0.1942, "eval_rougeL": 0.3511, "eval_rougeLsum": 0.3507, "eval_runtime": 351.8932, "eval_samples_per_second": 3.126, "eval_steps_per_second": 0.392, "step": 1250 }, { "epoch": 2.4, "learning_rate": 8.000000000000001e-06, "loss": 1.7784, "step": 1500 }, { "epoch": 3.0, "eval_f1": 0.9056, "eval_gen_len": 30.79909090909091, "eval_loss": 1.6666187047958374, "eval_precision": 0.907, "eval_recall": 0.9045, "eval_rouge1": 0.451, "eval_rouge2": 0.1992, "eval_rougeL": 0.3554, "eval_rougeLsum": 0.3551, "eval_runtime": 352.5825, "eval_samples_per_second": 3.12, "eval_steps_per_second": 0.391, "step": 1875 }, { "epoch": 3.2, "learning_rate": 4.000000000000001e-06, "loss": 1.7543, "step": 2000 }, { "epoch": 4.0, "learning_rate": 0.0, "loss": 1.7261, "step": 2500 }, { "epoch": 4.0, "eval_f1": 0.9064, "eval_gen_len": 30.85090909090909, "eval_loss": 1.6605653762817383, "eval_precision": 0.9078, "eval_recall": 0.9053, "eval_rouge1": 0.4557, "eval_rouge2": 0.2019, "eval_rougeL": 0.3603, "eval_rougeLsum": 0.3597, "eval_runtime": 359.5712, "eval_samples_per_second": 3.059, "eval_steps_per_second": 0.384, "step": 2500 }, { "epoch": 4.0, "step": 2500, "total_flos": 1.9957763220524237e+17, "train_loss": 1.8367231689453125, "train_runtime": 11495.8338, "train_samples_per_second": 6.959, "train_steps_per_second": 0.217 } ], "logging_steps": 500, "max_steps": 2500, "num_input_tokens_seen": 0, "num_train_epochs": 4, "save_steps": 500, "total_flos": 1.9957763220524237e+17, "train_batch_size": 8, "trial_name": null, "trial_params": null }