{ "best_metric": 11.716, "best_model_checkpoint": "saved/tobyoki-pairwise/bart-base-japanese/BaseModel/checkpoint-4332", "epoch": 4.0, "global_step": 17328, "is_hyper_param_search": false, "is_local_process_zero": true, "is_world_process_zero": true, "log_history": [ { "epoch": 0.12, "learning_rate": 4.8845798707294554e-05, "loss": 2.1617, "step": 500 }, { "epoch": 0.23, "learning_rate": 4.7691597414589107e-05, "loss": 1.346, "step": 1000 }, { "epoch": 0.35, "learning_rate": 4.653739612188366e-05, "loss": 0.96, "step": 1500 }, { "epoch": 0.46, "learning_rate": 4.538319482917821e-05, "loss": 0.7184, "step": 2000 }, { "epoch": 0.58, "learning_rate": 4.422899353647276e-05, "loss": 0.5936, "step": 2500 }, { "epoch": 0.69, "learning_rate": 4.3074792243767315e-05, "loss": 0.4745, "step": 3000 }, { "epoch": 0.81, "learning_rate": 4.192059095106187e-05, "loss": 0.389, "step": 3500 }, { "epoch": 0.92, "learning_rate": 4.076638965835642e-05, "loss": 0.298, "step": 4000 }, { "epoch": 1.0, "eval_gen_len": 52.15822784810127, "eval_loss": 2.7362189292907715, "eval_rouge1": 11.716, "eval_rouge2": 1.7738, "eval_rougeL": 7.7212, "eval_rougeLsum": 10.0982, "eval_runtime": 282.218, "eval_samples_per_second": 2.239, "eval_steps_per_second": 2.239, "step": 4332 }, { "epoch": 1.04, "learning_rate": 3.961218836565097e-05, "loss": 0.2931, "step": 4500 }, { "epoch": 1.15, "learning_rate": 3.845798707294552e-05, "loss": 0.27, "step": 5000 }, { "epoch": 1.27, "learning_rate": 3.7303785780240075e-05, "loss": 0.2255, "step": 5500 }, { "epoch": 1.39, "learning_rate": 3.614958448753463e-05, "loss": 0.1818, "step": 6000 }, { "epoch": 1.5, "learning_rate": 3.499538319482918e-05, "loss": 0.1736, "step": 6500 }, { "epoch": 1.62, "learning_rate": 3.384118190212373e-05, "loss": 0.1894, "step": 7000 }, { "epoch": 1.73, "learning_rate": 3.2686980609418284e-05, "loss": 0.1642, "step": 7500 }, { "epoch": 1.85, "learning_rate": 3.1532779316712836e-05, "loss": 0.1262, "step": 8000 }, { "epoch": 1.96, "learning_rate": 3.0378578024007388e-05, "loss": 0.1596, "step": 8500 }, { "epoch": 2.0, "eval_gen_len": 42.75791139240506, "eval_loss": 3.1079843044281006, "eval_rouge1": 11.6339, "eval_rouge2": 1.9589, "eval_rougeL": 8.2226, "eval_rougeLsum": 9.5881, "eval_runtime": 452.5754, "eval_samples_per_second": 1.396, "eval_steps_per_second": 1.396, "step": 8664 }, { "epoch": 2.08, "learning_rate": 2.922437673130194e-05, "loss": 0.1249, "step": 9000 }, { "epoch": 2.19, "learning_rate": 2.8070175438596492e-05, "loss": 0.1056, "step": 9500 }, { "epoch": 2.31, "learning_rate": 2.6915974145891044e-05, "loss": 0.0981, "step": 10000 }, { "epoch": 2.42, "learning_rate": 2.5761772853185596e-05, "loss": 0.1025, "step": 10500 }, { "epoch": 2.54, "learning_rate": 2.460757156048015e-05, "loss": 0.0777, "step": 11000 }, { "epoch": 2.65, "learning_rate": 2.3453370267774704e-05, "loss": 0.0849, "step": 11500 }, { "epoch": 2.77, "learning_rate": 2.2299168975069256e-05, "loss": 0.095, "step": 12000 }, { "epoch": 2.89, "learning_rate": 2.1144967682363804e-05, "loss": 0.0805, "step": 12500 }, { "epoch": 3.0, "eval_gen_len": 43.24683544303797, "eval_loss": 3.2877912521362305, "eval_rouge1": 11.3717, "eval_rouge2": 2.0083, "eval_rougeL": 7.8703, "eval_rougeLsum": 9.6383, "eval_runtime": 236.6502, "eval_samples_per_second": 2.671, "eval_steps_per_second": 2.671, "step": 12996 }, { "epoch": 3.0, "learning_rate": 1.9990766389658356e-05, "loss": 0.081, "step": 13000 }, { "epoch": 3.12, "learning_rate": 1.883656509695291e-05, "loss": 0.0562, "step": 13500 }, { "epoch": 3.23, "learning_rate": 1.768236380424746e-05, "loss": 0.0659, "step": 14000 }, { "epoch": 3.35, "learning_rate": 1.6528162511542013e-05, "loss": 0.0641, "step": 14500 }, { "epoch": 3.46, "learning_rate": 1.5373961218836565e-05, "loss": 0.0621, "step": 15000 }, { "epoch": 3.58, "learning_rate": 1.4219759926131118e-05, "loss": 0.0495, "step": 15500 }, { "epoch": 3.69, "learning_rate": 1.306555863342567e-05, "loss": 0.052, "step": 16000 }, { "epoch": 3.81, "learning_rate": 1.1911357340720223e-05, "loss": 0.0624, "step": 16500 }, { "epoch": 3.92, "learning_rate": 1.0757156048014775e-05, "loss": 0.0576, "step": 17000 }, { "epoch": 4.0, "eval_gen_len": 43.36867088607595, "eval_loss": 3.3865511417388916, "eval_rouge1": 11.5579, "eval_rouge2": 2.3617, "eval_rougeL": 8.299, "eval_rougeLsum": 9.8507, "eval_runtime": 456.2669, "eval_samples_per_second": 1.385, "eval_steps_per_second": 1.385, "step": 17328 }, { "epoch": 4.0, "step": 17328, "total_flos": 2855176963080192.0, "train_loss": 0.29059598513036994, "train_runtime": 2646.0496, "train_samples_per_second": 8.186, "train_steps_per_second": 8.186 } ], "max_steps": 21660, "num_train_epochs": 5, "total_flos": 2855176963080192.0, "trial_name": null, "trial_params": null }