{ "best_metric": 13.7182, "best_model_checkpoint": "saved/tobyoki-pairwise/bart-base-japanese/BaseModel/checkpoint-8664", "epoch": 5.0, "global_step": 21660, "is_hyper_param_search": false, "is_local_process_zero": true, "is_world_process_zero": true, "log_history": [ { "epoch": 0.12, "learning_rate": 4.8845798707294554e-05, "loss": 2.1628, "step": 500 }, { "epoch": 0.23, "learning_rate": 4.7691597414589107e-05, "loss": 1.3515, "step": 1000 }, { "epoch": 0.35, "learning_rate": 4.653739612188366e-05, "loss": 0.9577, "step": 1500 }, { "epoch": 0.46, "learning_rate": 4.538319482917821e-05, "loss": 0.7042, "step": 2000 }, { "epoch": 0.58, "learning_rate": 4.422899353647276e-05, "loss": 0.5935, "step": 2500 }, { "epoch": 0.69, "learning_rate": 4.3074792243767315e-05, "loss": 0.4658, "step": 3000 }, { "epoch": 0.81, "learning_rate": 4.192059095106187e-05, "loss": 0.3854, "step": 3500 }, { "epoch": 0.92, "learning_rate": 4.076638965835642e-05, "loss": 0.2994, "step": 4000 }, { "epoch": 1.0, "eval_gen_len": 55.063291139240505, "eval_loss": 2.7883288860321045, "eval_rouge1": 11.1611, "eval_rouge2": 1.7768, "eval_rougeL": 7.5158, "eval_rougeLsum": 9.6222, "eval_runtime": 352.9978, "eval_samples_per_second": 1.79, "eval_steps_per_second": 1.79, "step": 4332 }, { "epoch": 1.04, "learning_rate": 3.961218836565097e-05, "loss": 0.2806, "step": 4500 }, { "epoch": 1.15, "learning_rate": 3.845798707294552e-05, "loss": 0.263, "step": 5000 }, { "epoch": 1.27, "learning_rate": 3.7303785780240075e-05, "loss": 0.2187, "step": 5500 }, { "epoch": 1.39, "learning_rate": 3.614958448753463e-05, "loss": 0.1761, "step": 6000 }, { "epoch": 1.5, "learning_rate": 3.499538319482918e-05, "loss": 0.1664, "step": 6500 }, { "epoch": 1.62, "learning_rate": 3.384118190212373e-05, "loss": 0.1896, "step": 7000 }, { "epoch": 1.73, "learning_rate": 3.2686980609418284e-05, "loss": 0.1627, "step": 7500 }, { "epoch": 1.85, "learning_rate": 3.1532779316712836e-05, "loss": 0.1146, "step": 8000 }, { "epoch": 1.96, "learning_rate": 3.0378578024007388e-05, "loss": 0.1513, "step": 8500 }, { "epoch": 2.0, "eval_gen_len": 57.35284810126582, "eval_loss": 3.128607988357544, "eval_rouge1": 13.7182, "eval_rouge2": 2.311, "eval_rougeL": 9.1726, "eval_rougeLsum": 11.5058, "eval_runtime": 358.8793, "eval_samples_per_second": 1.761, "eval_steps_per_second": 1.761, "step": 8664 }, { "epoch": 2.08, "learning_rate": 2.922437673130194e-05, "loss": 0.125, "step": 9000 }, { "epoch": 2.19, "learning_rate": 2.8070175438596492e-05, "loss": 0.1028, "step": 9500 }, { "epoch": 2.31, "learning_rate": 2.6915974145891044e-05, "loss": 0.0934, "step": 10000 }, { "epoch": 2.42, "learning_rate": 2.5761772853185596e-05, "loss": 0.0993, "step": 10500 }, { "epoch": 2.54, "learning_rate": 2.460757156048015e-05, "loss": 0.074, "step": 11000 }, { "epoch": 2.65, "learning_rate": 2.3453370267774704e-05, "loss": 0.0857, "step": 11500 }, { "epoch": 2.77, "learning_rate": 2.2299168975069256e-05, "loss": 0.0823, "step": 12000 }, { "epoch": 2.89, "learning_rate": 2.1144967682363804e-05, "loss": 0.0778, "step": 12500 }, { "epoch": 3.0, "eval_gen_len": 48.70886075949367, "eval_loss": 3.3238439559936523, "eval_rouge1": 12.1173, "eval_rouge2": 1.88, "eval_rougeL": 8.1156, "eval_rougeLsum": 10.1187, "eval_runtime": 315.5777, "eval_samples_per_second": 2.003, "eval_steps_per_second": 2.003, "step": 12996 }, { "epoch": 3.0, "learning_rate": 1.9990766389658356e-05, "loss": 0.078, "step": 13000 }, { "epoch": 3.12, "learning_rate": 1.883656509695291e-05, "loss": 0.0546, "step": 13500 }, { "epoch": 3.23, "learning_rate": 1.768236380424746e-05, "loss": 0.062, "step": 14000 }, { "epoch": 3.35, "learning_rate": 1.6528162511542013e-05, "loss": 0.0656, "step": 14500 }, { "epoch": 3.46, "learning_rate": 1.5373961218836565e-05, "loss": 0.0555, "step": 15000 }, { "epoch": 3.58, "learning_rate": 1.4219759926131118e-05, "loss": 0.0437, "step": 15500 }, { "epoch": 3.69, "learning_rate": 1.306555863342567e-05, "loss": 0.05, "step": 16000 }, { "epoch": 3.81, "learning_rate": 1.1911357340720223e-05, "loss": 0.0577, "step": 16500 }, { "epoch": 3.92, "learning_rate": 1.0757156048014775e-05, "loss": 0.056, "step": 17000 }, { "epoch": 4.0, "eval_gen_len": 50.7373417721519, "eval_loss": 3.4031858444213867, "eval_rouge1": 11.9555, "eval_rouge2": 2.0536, "eval_rougeL": 8.2185, "eval_rougeLsum": 10.0656, "eval_runtime": 343.5063, "eval_samples_per_second": 1.84, "eval_steps_per_second": 1.84, "step": 17328 }, { "epoch": 4.04, "learning_rate": 9.602954755309327e-06, "loss": 0.0416, "step": 17500 }, { "epoch": 4.16, "learning_rate": 8.448753462603879e-06, "loss": 0.0393, "step": 18000 }, { "epoch": 4.27, "learning_rate": 7.29455216989843e-06, "loss": 0.0368, "step": 18500 }, { "epoch": 4.39, "learning_rate": 6.140350877192982e-06, "loss": 0.0433, "step": 19000 }, { "epoch": 4.5, "learning_rate": 4.986149584487535e-06, "loss": 0.0421, "step": 19500 }, { "epoch": 4.62, "learning_rate": 3.831948291782087e-06, "loss": 0.038, "step": 20000 }, { "epoch": 4.73, "learning_rate": 2.6777469990766392e-06, "loss": 0.0285, "step": 20500 }, { "epoch": 4.85, "learning_rate": 1.5235457063711913e-06, "loss": 0.0343, "step": 21000 }, { "epoch": 4.96, "learning_rate": 3.693444136657433e-07, "loss": 0.0364, "step": 21500 }, { "epoch": 5.0, "eval_gen_len": 50.44462025316456, "eval_loss": 3.525162696838379, "eval_rouge1": 11.814, "eval_rouge2": 1.7965, "eval_rougeL": 8.0177, "eval_rougeLsum": 9.7342, "eval_runtime": 338.0819, "eval_samples_per_second": 1.869, "eval_steps_per_second": 1.869, "step": 21660 }, { "epoch": 5.0, "step": 21660, "total_flos": 3568971203850240.0, "train_loss": 0.23670565184904047, "train_runtime": 3830.6771, "train_samples_per_second": 5.654, "train_steps_per_second": 5.654 } ], "max_steps": 21660, "num_train_epochs": 5, "total_flos": 3568971203850240.0, "trial_name": null, "trial_params": null }