{ "best_metric": null, "best_model_checkpoint": null, "epoch": 3.9967417865870214, "eval_steps": 500, "global_step": 3680, "is_hyper_param_search": false, "is_local_process_zero": true, "is_world_process_zero": true, "log_history": [ { "epoch": 0.54, "learning_rate": 1.731521739130435e-05, "loss": 1.9542, "step": 500 }, { "epoch": 1.0, "eval_f1": 0.9149, "eval_gen_len": 18.56723716381418, "eval_loss": 1.534995198249817, "eval_precision": 0.9229, "eval_recall": 0.9074, "eval_rouge1": 0.4928, "eval_rouge2": 0.2436, "eval_rougeL": 0.4085, "eval_rougeLsum": 0.4086, "eval_runtime": 246.8012, "eval_samples_per_second": 3.314, "eval_steps_per_second": 0.831, "step": 920 }, { "epoch": 1.09, "learning_rate": 1.4603260869565218e-05, "loss": 1.7174, "step": 1000 }, { "epoch": 1.63, "learning_rate": 1.1885869565217392e-05, "loss": 1.6331, "step": 1500 }, { "epoch": 2.0, "eval_f1": 0.9166, "eval_gen_len": 18.815403422982886, "eval_loss": 1.491409182548523, "eval_precision": 0.9246, "eval_recall": 0.9092, "eval_rouge1": 0.5037, "eval_rouge2": 0.257, "eval_rougeL": 0.4202, "eval_rougeLsum": 0.4206, "eval_runtime": 247.8681, "eval_samples_per_second": 3.3, "eval_steps_per_second": 0.827, "step": 1841 }, { "epoch": 2.17, "learning_rate": 9.179347826086958e-06, "loss": 1.5996, "step": 2000 }, { "epoch": 2.72, "learning_rate": 6.472826086956522e-06, "loss": 1.5694, "step": 2500 }, { "epoch": 3.0, "eval_f1": 0.917, "eval_gen_len": 19.448655256723715, "eval_loss": 1.4760992527008057, "eval_precision": 0.9241, "eval_recall": 0.9103, "eval_rouge1": 0.5071, "eval_rouge2": 0.259, "eval_rougeL": 0.4212, "eval_rougeLsum": 0.4214, "eval_runtime": 249.775, "eval_samples_per_second": 3.275, "eval_steps_per_second": 0.821, "step": 2762 }, { "epoch": 3.26, "learning_rate": 3.7554347826086963e-06, "loss": 1.5609, "step": 3000 }, { "epoch": 3.8, "learning_rate": 1.048913043478261e-06, "loss": 1.5374, "step": 3500 }, { "epoch": 4.0, "eval_f1": 0.917, "eval_gen_len": 19.1479217603912, "eval_loss": 1.4708906412124634, "eval_precision": 0.9247, "eval_recall": 0.9099, "eval_rouge1": 0.5072, "eval_rouge2": 0.2631, "eval_rougeL": 0.4243, "eval_rougeLsum": 0.4244, "eval_runtime": 248.4801, "eval_samples_per_second": 3.292, "eval_steps_per_second": 0.825, "step": 3680 }, { "epoch": 4.0, "step": 3680, "total_flos": 3.997817815616717e+16, "train_loss": 1.6477281238721764, "train_runtime": 4221.2735, "train_samples_per_second": 13.96, "train_steps_per_second": 0.872 } ], "logging_steps": 500, "max_steps": 3680, "num_input_tokens_seen": 0, "num_train_epochs": 4, "save_steps": 500, "total_flos": 3.997817815616717e+16, "train_batch_size": 4, "trial_name": null, "trial_params": null }