{ "best_metric": 1.2010900974273682, "best_model_checkpoint": "./checkpoints/pegasus-large/checkpoint-11412", "epoch": 12.0, "global_step": 11412, "is_hyper_param_search": false, "is_local_process_zero": true, "is_world_process_zero": true, "log_history": [ { "epoch": 0.27, "learning_rate": 5e-06, "loss": 8.7673, "step": 256 }, { "epoch": 0.54, "learning_rate": 1e-05, "loss": 7.6286, "step": 512 }, { "epoch": 0.81, "learning_rate": 9.93178426774675e-06, "loss": 7.0271, "step": 768 }, { "epoch": 1.0, "eval_loss": 6.360767364501953, "eval_runtime": 8.5969, "eval_samples_per_second": 58.16, "eval_steps_per_second": 7.328, "step": 951 }, { "epoch": 1.08, "learning_rate": 9.863568535493498e-06, "loss": 6.6826, "step": 1024 }, { "epoch": 1.35, "learning_rate": 9.795352803240248e-06, "loss": 6.0794, "step": 1280 }, { "epoch": 1.62, "learning_rate": 9.727137070986997e-06, "loss": 4.7646, "step": 1536 }, { "epoch": 1.88, "learning_rate": 9.658921338733747e-06, "loss": 2.6262, "step": 1792 }, { "epoch": 2.0, "eval_loss": 1.3431365489959717, "eval_runtime": 8.7742, "eval_samples_per_second": 56.985, "eval_steps_per_second": 7.18, "step": 1902 }, { "epoch": 2.15, "learning_rate": 9.590705606480494e-06, "loss": 1.5925, "step": 2048 }, { "epoch": 2.42, "learning_rate": 9.522489874227244e-06, "loss": 1.4186, "step": 2304 }, { "epoch": 2.69, "learning_rate": 9.454274141973993e-06, "loss": 1.4248, "step": 2560 }, { "epoch": 2.96, "learning_rate": 9.386058409720743e-06, "loss": 1.4015, "step": 2816 }, { "epoch": 3.0, "eval_loss": 1.2660728693008423, "eval_runtime": 8.6854, "eval_samples_per_second": 57.568, "eval_steps_per_second": 7.254, "step": 2853 }, { "epoch": 3.23, "learning_rate": 9.31784267746749e-06, "loss": 1.3516, "step": 3072 }, { "epoch": 3.5, "learning_rate": 9.24962694521424e-06, "loss": 1.2862, "step": 3328 }, { "epoch": 3.77, "learning_rate": 9.18141121296099e-06, "loss": 1.2847, "step": 3584 }, { "epoch": 4.0, "eval_loss": 1.238619327545166, "eval_runtime": 8.6335, "eval_samples_per_second": 57.914, "eval_steps_per_second": 7.297, "step": 3804 }, { "epoch": 4.04, "learning_rate": 9.11319548070774e-06, "loss": 1.3335, "step": 3840 }, { "epoch": 4.31, "learning_rate": 9.044979748454487e-06, "loss": 1.3177, "step": 4096 }, { "epoch": 4.58, "learning_rate": 8.976764016201236e-06, "loss": 1.2456, "step": 4352 }, { "epoch": 4.85, "learning_rate": 8.908548283947986e-06, "loss": 1.269, "step": 4608 }, { "epoch": 5.0, "eval_loss": 1.2249763011932373, "eval_runtime": 8.6401, "eval_samples_per_second": 57.87, "eval_steps_per_second": 7.292, "step": 4755 }, { "epoch": 5.11, "learning_rate": 8.840332551694735e-06, "loss": 1.2381, "step": 4864 }, { "epoch": 5.38, "learning_rate": 8.772116819441483e-06, "loss": 1.224, "step": 5120 }, { "epoch": 5.65, "learning_rate": 8.703901087188233e-06, "loss": 1.2753, "step": 5376 }, { "epoch": 5.92, "learning_rate": 8.635685354934982e-06, "loss": 1.2279, "step": 5632 }, { "epoch": 6.0, "eval_loss": 1.2167253494262695, "eval_runtime": 8.6769, "eval_samples_per_second": 57.625, "eval_steps_per_second": 7.261, "step": 5706 }, { "epoch": 6.19, "learning_rate": 8.567469622681732e-06, "loss": 1.2545, "step": 5888 }, { "epoch": 6.46, "learning_rate": 8.499253890428481e-06, "loss": 1.1907, "step": 6144 }, { "epoch": 6.73, "learning_rate": 8.431038158175229e-06, "loss": 1.2113, "step": 6400 }, { "epoch": 7.0, "learning_rate": 8.362822425921979e-06, "loss": 1.1956, "step": 6656 }, { "epoch": 7.0, "eval_loss": 1.210858941078186, "eval_runtime": 8.6349, "eval_samples_per_second": 57.904, "eval_steps_per_second": 7.296, "step": 6657 }, { "epoch": 7.27, "learning_rate": 8.294606693668728e-06, "loss": 1.1814, "step": 6912 }, { "epoch": 7.54, "learning_rate": 8.226390961415478e-06, "loss": 1.1784, "step": 7168 }, { "epoch": 7.81, "learning_rate": 8.158175229162227e-06, "loss": 1.179, "step": 7424 }, { "epoch": 8.0, "eval_loss": 1.2075146436691284, "eval_runtime": 8.6767, "eval_samples_per_second": 57.625, "eval_steps_per_second": 7.261, "step": 7608 }, { "epoch": 8.08, "learning_rate": 8.089959496908975e-06, "loss": 1.2127, "step": 7680 }, { "epoch": 8.34, "learning_rate": 8.021743764655724e-06, "loss": 1.1417, "step": 7936 }, { "epoch": 8.61, "learning_rate": 7.953528032402474e-06, "loss": 1.1988, "step": 8192 }, { "epoch": 8.88, "learning_rate": 7.885312300149223e-06, "loss": 1.183, "step": 8448 }, { "epoch": 9.0, "eval_loss": 1.2050586938858032, "eval_runtime": 8.769, "eval_samples_per_second": 57.019, "eval_steps_per_second": 7.184, "step": 8559 }, { "epoch": 9.15, "learning_rate": 7.817096567895973e-06, "loss": 1.1557, "step": 8704 }, { "epoch": 9.42, "learning_rate": 7.74888083564272e-06, "loss": 1.149, "step": 8960 }, { "epoch": 9.69, "learning_rate": 7.68066510338947e-06, "loss": 1.1279, "step": 9216 }, { "epoch": 9.96, "learning_rate": 7.612449371136219e-06, "loss": 1.1471, "step": 9472 }, { "epoch": 10.0, "eval_loss": 1.203889012336731, "eval_runtime": 8.6047, "eval_samples_per_second": 58.108, "eval_steps_per_second": 7.322, "step": 9510 }, { "epoch": 10.23, "learning_rate": 7.544233638882968e-06, "loss": 1.1345, "step": 9728 }, { "epoch": 10.5, "learning_rate": 7.476017906629717e-06, "loss": 1.1573, "step": 9984 }, { "epoch": 10.77, "learning_rate": 7.4078021743764664e-06, "loss": 1.1148, "step": 10240 }, { "epoch": 11.0, "eval_loss": 1.2023481130599976, "eval_runtime": 8.7109, "eval_samples_per_second": 57.4, "eval_steps_per_second": 7.232, "step": 10461 }, { "epoch": 11.04, "learning_rate": 7.339586442123215e-06, "loss": 1.1417, "step": 10496 }, { "epoch": 11.31, "learning_rate": 7.2713707098699646e-06, "loss": 1.1067, "step": 10752 }, { "epoch": 11.58, "learning_rate": 7.203154977616713e-06, "loss": 1.1289, "step": 11008 }, { "epoch": 11.84, "learning_rate": 7.134939245363463e-06, "loss": 1.1112, "step": 11264 }, { "epoch": 12.0, "eval_loss": 1.2010900974273682, "eval_runtime": 8.6876, "eval_samples_per_second": 57.553, "eval_steps_per_second": 7.252, "step": 11412 } ], "max_steps": 38040, "num_train_epochs": 40, "total_flos": 2.5573624673245594e+17, "trial_name": null, "trial_params": null }