{ "best_metric": 26.4112, "best_model_checkpoint": "my-model/checkpoint-6112", "epoch": 4.0, "eval_steps": 500, "global_step": 6112, "is_hyper_param_search": false, "is_local_process_zero": true, "is_world_process_zero": true, "log_history": [ { "epoch": 0.32722513089005234, "grad_norm": 1.6467379331588745, "learning_rate": 4.5909685863874345e-05, "loss": 0.0168, "step": 500 }, { "epoch": 0.6544502617801047, "grad_norm": 1.3730037212371826, "learning_rate": 4.181937172774869e-05, "loss": 0.0246, "step": 1000 }, { "epoch": 0.981675392670157, "grad_norm": 1.1599562168121338, "learning_rate": 3.7729057591623044e-05, "loss": 0.0308, "step": 1500 }, { "epoch": 1.0, "eval_loss": 5.495022296905518, "eval_rouge1": 24.8013, "eval_rouge2": 9.5895, "eval_rougeL": 21.5936, "eval_runtime": 434.7617, "eval_samples_per_second": 5.049, "eval_steps_per_second": 0.506, "step": 1528 }, { "epoch": 1.3089005235602094, "grad_norm": 3.42366361618042, "learning_rate": 3.3638743455497386e-05, "loss": 0.0539, "step": 2000 }, { "epoch": 1.6361256544502618, "grad_norm": 3.776289701461792, "learning_rate": 2.954842931937173e-05, "loss": 0.0727, "step": 2500 }, { "epoch": 1.9633507853403143, "grad_norm": 5.072182655334473, "learning_rate": 2.545811518324607e-05, "loss": 0.1352, "step": 3000 }, { "epoch": 2.0, "eval_loss": 5.219298839569092, "eval_rouge1": 24.8743, "eval_rouge2": 9.8439, "eval_rougeL": 21.6411, "eval_runtime": 433.0859, "eval_samples_per_second": 5.068, "eval_steps_per_second": 0.508, "step": 3056 }, { "epoch": 2.2905759162303667, "grad_norm": 6.1271209716796875, "learning_rate": 2.136780104712042e-05, "loss": 0.4727, "step": 3500 }, { "epoch": 2.6178010471204187, "grad_norm": 5.5732316970825195, "learning_rate": 1.7277486910994763e-05, "loss": 0.6959, "step": 4000 }, { "epoch": 2.945026178010471, "grad_norm": 5.73837423324585, "learning_rate": 1.3187172774869111e-05, "loss": 0.9382, "step": 4500 }, { "epoch": 3.0, "eval_loss": 3.44022798538208, "eval_rouge1": 26.0341, "eval_rouge2": 10.6222, "eval_rougeL": 22.7685, "eval_runtime": 417.9997, "eval_samples_per_second": 5.251, "eval_steps_per_second": 0.526, "step": 4584 }, { "epoch": 3.2722513089005236, "grad_norm": 5.353453159332275, "learning_rate": 9.096858638743457e-06, "loss": 0.9969, "step": 5000 }, { "epoch": 3.599476439790576, "grad_norm": 5.323329925537109, "learning_rate": 5.006544502617801e-06, "loss": 1.0924, "step": 5500 }, { "epoch": 3.9267015706806285, "grad_norm": 6.1474175453186035, "learning_rate": 9.162303664921465e-07, "loss": 1.2208, "step": 6000 }, { "epoch": 4.0, "eval_loss": 3.21282958984375, "eval_rouge1": 26.4112, "eval_rouge2": 10.9605, "eval_rougeL": 23.0258, "eval_runtime": 417.2936, "eval_samples_per_second": 5.26, "eval_steps_per_second": 0.527, "step": 6112 } ], "logging_steps": 500, "max_steps": 6112, "num_input_tokens_seen": 0, "num_train_epochs": 4, "save_steps": 500, "stateful_callbacks": { "TrainerControl": { "args": { "should_epoch_stop": false, "should_evaluate": false, "should_log": false, "should_save": true, "should_training_stop": true }, "attributes": {} } }, "total_flos": 2.930549272296653e+16, "train_batch_size": 10, "trial_name": null, "trial_params": null }