{ "best_metric": 1.4880632162094116, "best_model_checkpoint": "./zhko_mbartLarge_19p_run1/checkpoint-5000", "epoch": 8.973438621679827, "eval_steps": 5000, "global_step": 25000, "is_hyper_param_search": false, "is_local_process_zero": true, "is_world_process_zero": true, "log_history": [ { "epoch": 0.18, "learning_rate": 1.6666666666666667e-05, "loss": 2.4329, "step": 500 }, { "epoch": 0.36, "learning_rate": 3.3333333333333335e-05, "loss": 1.9318, "step": 1000 }, { "epoch": 0.54, "learning_rate": 5e-05, "loss": 1.8063, "step": 1500 }, { "epoch": 0.72, "learning_rate": 4.9379498634897e-05, "loss": 1.7429, "step": 2000 }, { "epoch": 0.9, "learning_rate": 4.875899726979399e-05, "loss": 1.6586, "step": 2500 }, { "epoch": 1.08, "learning_rate": 4.8138495904690995e-05, "loss": 1.5945, "step": 3000 }, { "epoch": 1.26, "learning_rate": 4.751799453958799e-05, "loss": 1.4888, "step": 3500 }, { "epoch": 1.44, "learning_rate": 4.689749317448498e-05, "loss": 1.3603, "step": 4000 }, { "epoch": 1.62, "learning_rate": 4.627699180938198e-05, "loss": 1.2398, "step": 4500 }, { "epoch": 1.79, "learning_rate": 4.5656490444278984e-05, "loss": 1.1911, "step": 5000 }, { "epoch": 1.79, "eval_bleu": 9.7977, "eval_gen_len": 14.6128, "eval_loss": 1.4880632162094116, "eval_runtime": 656.7417, "eval_samples_per_second": 16.966, "eval_steps_per_second": 1.061, "step": 5000 }, { "epoch": 1.97, "learning_rate": 4.5035989079175975e-05, "loss": 1.1667, "step": 5500 }, { "epoch": 2.15, "learning_rate": 4.441548771407297e-05, "loss": 1.129, "step": 6000 }, { "epoch": 2.33, "learning_rate": 4.379498634896997e-05, "loss": 1.048, "step": 6500 }, { "epoch": 2.51, "learning_rate": 4.317448498386697e-05, "loss": 0.9477, "step": 7000 }, { "epoch": 2.69, "learning_rate": 4.2553983618763964e-05, "loss": 0.8805, "step": 7500 }, { "epoch": 2.87, "learning_rate": 4.193348225366096e-05, "loss": 0.8654, "step": 8000 }, { "epoch": 3.05, "learning_rate": 4.131298088855796e-05, "loss": 0.8357, "step": 8500 }, { "epoch": 3.23, "learning_rate": 4.069247952345495e-05, "loss": 0.7993, "step": 9000 }, { "epoch": 3.41, "learning_rate": 4.0071978158351954e-05, "loss": 0.7272, "step": 9500 }, { "epoch": 3.59, "learning_rate": 3.945147679324895e-05, "loss": 0.6536, "step": 10000 }, { "epoch": 3.59, "eval_bleu": 13.3897, "eval_gen_len": 14.9179, "eval_loss": 1.6061058044433594, "eval_runtime": 661.3374, "eval_samples_per_second": 16.848, "eval_steps_per_second": 1.054, "step": 10000 }, { "epoch": 3.77, "learning_rate": 3.883097542814594e-05, "loss": 0.6277, "step": 10500 }, { "epoch": 3.95, "learning_rate": 3.821047406304294e-05, "loss": 0.6063, "step": 11000 }, { "epoch": 4.13, "learning_rate": 3.758997269793994e-05, "loss": 0.589, "step": 11500 }, { "epoch": 4.31, "learning_rate": 3.6969471332836934e-05, "loss": 0.5473, "step": 12000 }, { "epoch": 4.49, "learning_rate": 3.634896996773393e-05, "loss": 0.4878, "step": 12500 }, { "epoch": 4.67, "learning_rate": 3.572846860263093e-05, "loss": 0.4455, "step": 13000 }, { "epoch": 4.85, "learning_rate": 3.510796723752792e-05, "loss": 0.4362, "step": 13500 }, { "epoch": 5.03, "learning_rate": 3.4487465872424924e-05, "loss": 0.4156, "step": 14000 }, { "epoch": 5.2, "learning_rate": 3.386696450732192e-05, "loss": 0.4034, "step": 14500 }, { "epoch": 5.38, "learning_rate": 3.324646314221891e-05, "loss": 0.3665, "step": 15000 }, { "epoch": 5.38, "eval_bleu": 14.0018, "eval_gen_len": 15.2051, "eval_loss": 1.7928513288497925, "eval_runtime": 662.4842, "eval_samples_per_second": 16.819, "eval_steps_per_second": 1.052, "step": 15000 }, { "epoch": 5.56, "learning_rate": 3.262596177711591e-05, "loss": 0.3223, "step": 15500 }, { "epoch": 5.74, "learning_rate": 3.200546041201291e-05, "loss": 0.3039, "step": 16000 }, { "epoch": 5.92, "learning_rate": 3.1384959046909904e-05, "loss": 0.2964, "step": 16500 }, { "epoch": 6.1, "learning_rate": 3.07644576818069e-05, "loss": 0.2834, "step": 17000 }, { "epoch": 6.28, "learning_rate": 3.01439563167039e-05, "loss": 0.2662, "step": 17500 }, { "epoch": 6.46, "learning_rate": 2.9523454951600893e-05, "loss": 0.2376, "step": 18000 }, { "epoch": 6.64, "learning_rate": 2.8902953586497894e-05, "loss": 0.2152, "step": 18500 }, { "epoch": 6.82, "learning_rate": 2.828245222139489e-05, "loss": 0.2076, "step": 19000 }, { "epoch": 7.0, "learning_rate": 2.7661950856291885e-05, "loss": 0.1995, "step": 19500 }, { "epoch": 7.18, "learning_rate": 2.704144949118888e-05, "loss": 0.194, "step": 20000 }, { "epoch": 7.18, "eval_bleu": 14.7102, "eval_gen_len": 14.7308, "eval_loss": 1.9398521184921265, "eval_runtime": 638.6619, "eval_samples_per_second": 17.446, "eval_steps_per_second": 1.091, "step": 20000 }, { "epoch": 7.36, "learning_rate": 2.6420948126085876e-05, "loss": 0.1763, "step": 20500 }, { "epoch": 7.54, "learning_rate": 2.5800446760982877e-05, "loss": 0.1566, "step": 21000 }, { "epoch": 7.72, "learning_rate": 2.517994539587987e-05, "loss": 0.1471, "step": 21500 }, { "epoch": 7.9, "learning_rate": 2.455944403077687e-05, "loss": 0.1462, "step": 22000 }, { "epoch": 8.08, "learning_rate": 2.3938942665673866e-05, "loss": 0.1364, "step": 22500 }, { "epoch": 8.26, "learning_rate": 2.3318441300570863e-05, "loss": 0.1297, "step": 23000 }, { "epoch": 8.44, "learning_rate": 2.269793993546786e-05, "loss": 0.1174, "step": 23500 }, { "epoch": 8.61, "learning_rate": 2.2077438570364855e-05, "loss": 0.1075, "step": 24000 }, { "epoch": 8.79, "learning_rate": 2.1456937205261852e-05, "loss": 0.1041, "step": 24500 }, { "epoch": 8.97, "learning_rate": 2.083643584015885e-05, "loss": 0.1004, "step": 25000 }, { "epoch": 8.97, "eval_bleu": 14.9684, "eval_gen_len": 14.8811, "eval_loss": 2.0678608417510986, "eval_runtime": 646.6568, "eval_samples_per_second": 17.23, "eval_steps_per_second": 1.078, "step": 25000 }, { "epoch": 8.97, "step": 25000, "total_flos": 1.7337038143488e+18, "train_loss": 0.6854593217468262, "train_runtime": 45689.3015, "train_samples_per_second": 29.267, "train_steps_per_second": 0.915 } ], "logging_steps": 500, "max_steps": 41790, "num_train_epochs": 15, "save_steps": 5000, "total_flos": 1.7337038143488e+18, "trial_name": null, "trial_params": null }