{ "best_metric": 41.2618, "best_model_checkpoint": "checkpoint/kobigbirdbart_tapt_ep3_bs16_pre_noam_LB/checkpoint-34340", "epoch": 2.9999708802888674, "global_step": 51510, "is_hyper_param_search": false, "is_local_process_zero": true, "is_world_process_zero": true, "log_history": [ { "epoch": 0.06, "learning_rate": 0.0001, "loss": 4.0417, "step": 1000 }, { "epoch": 0.12, "learning_rate": 7.072834474681765e-05, "loss": 3.6688, "step": 2000 }, { "epoch": 0.17, "learning_rate": 5.775426231077362e-05, "loss": 3.5243, "step": 3000 }, { "epoch": 0.23, "learning_rate": 5.001874180112048e-05, "loss": 3.4178, "step": 4000 }, { "epoch": 0.29, "learning_rate": 4.473924094197383e-05, "loss": 3.367, "step": 5000 }, { "epoch": 0.35, "learning_rate": 4.084183301607125e-05, "loss": 3.3013, "step": 6000 }, { "epoch": 0.41, "learning_rate": 3.7812639995975144e-05, "loss": 3.2593, "step": 7000 }, { "epoch": 0.47, "learning_rate": 3.537080170562066e-05, "loss": 3.212, "step": 8000 }, { "epoch": 0.52, "learning_rate": 3.3348143212252787e-05, "loss": 3.1631, "step": 9000 }, { "epoch": 0.58, "learning_rate": 3.16370022285456e-05, "loss": 3.1368, "step": 10000 }, { "epoch": 0.64, "learning_rate": 3.0164835160286395e-05, "loss": 3.106, "step": 11000 }, { "epoch": 0.7, "learning_rate": 2.8880740270466042e-05, "loss": 3.0851, "step": 12000 }, { "epoch": 0.76, "learning_rate": 2.774780664821333e-05, "loss": 3.0639, "step": 13000 }, { "epoch": 0.82, "learning_rate": 2.6738528983830193e-05, "loss": 3.0412, "step": 14000 }, { "epoch": 0.87, "learning_rate": 2.583193464319257e-05, "loss": 3.0153, "step": 15000 }, { "epoch": 0.93, "learning_rate": 2.5011715272671623e-05, "loss": 2.9905, "step": 16000 }, { "epoch": 0.99, "learning_rate": 2.426497258953522e-05, "loss": 2.9887, "step": 17000 }, { "epoch": 1.0, "eval_gen_len": 20.0, "eval_loss": 3.2666966915130615, "eval_rouge1": 43.6697, "eval_rouge2": 28.3814, "eval_rougeL": 40.2632, "eval_rougeLsum": 40.2565, "eval_runtime": 2924.633, "eval_samples_per_second": 31.314, "eval_steps_per_second": 1.957, "step": 17170 }, { "epoch": 1.05, "learning_rate": 2.3581353179274502e-05, "loss": 2.7893, "step": 18000 }, { "epoch": 1.11, "learning_rate": 2.2952437303925296e-05, "loss": 2.724, "step": 19000 }, { "epoch": 1.16, "learning_rate": 2.2371298045736806e-05, "loss": 2.722, "step": 20000 }, { "epoch": 1.22, "learning_rate": 2.183217738419129e-05, "loss": 2.7157, "step": 21000 }, { "epoch": 1.28, "learning_rate": 2.1330244234932196e-05, "loss": 2.7097, "step": 22000 }, { "epoch": 1.34, "learning_rate": 2.0861411017276734e-05, "loss": 2.6939, "step": 23000 }, { "epoch": 1.4, "learning_rate": 2.0422192722261335e-05, "loss": 2.7026, "step": 24000 }, { "epoch": 1.46, "learning_rate": 2.0009597313304793e-05, "loss": 2.6871, "step": 25000 }, { "epoch": 1.51, "learning_rate": 1.9621039546293084e-05, "loss": 2.6776, "step": 26000 }, { "epoch": 1.57, "learning_rate": 1.925427251574924e-05, "loss": 2.6799, "step": 27000 }, { "epoch": 1.63, "learning_rate": 1.8907332773261164e-05, "loss": 2.6613, "step": 28000 }, { "epoch": 1.69, "learning_rate": 1.8578495948447844e-05, "loss": 2.6756, "step": 29000 }, { "epoch": 1.75, "learning_rate": 1.8266240576954328e-05, "loss": 2.6459, "step": 30000 }, { "epoch": 1.81, "learning_rate": 1.796921840006843e-05, "loss": 2.6352, "step": 31000 }, { "epoch": 1.86, "learning_rate": 1.768622981064213e-05, "loss": 2.6379, "step": 32000 }, { "epoch": 1.92, "learning_rate": 1.7416203423613194e-05, "loss": 2.6138, "step": 33000 }, { "epoch": 1.98, "learning_rate": 1.7158178976540783e-05, "loss": 2.6193, "step": 34000 }, { "epoch": 2.0, "eval_gen_len": 20.0, "eval_loss": 3.2624921798706055, "eval_rouge1": 44.7484, "eval_rouge2": 29.1819, "eval_rougeL": 41.2553, "eval_rougeLsum": 41.2618, "eval_runtime": 2925.4113, "eval_samples_per_second": 31.306, "eval_steps_per_second": 1.957, "step": 34340 }, { "epoch": 2.04, "learning_rate": 1.6911292937114278e-05, "loss": 2.5111, "step": 35000 }, { "epoch": 2.1, "learning_rate": 1.6674766325339214e-05, "loss": 2.4361, "step": 36000 }, { "epoch": 2.15, "learning_rate": 1.644789435859962e-05, "loss": 2.4523, "step": 37000 }, { "epoch": 2.21, "learning_rate": 1.6230037605656824e-05, "loss": 2.4485, "step": 38000 }, { "epoch": 2.27, "learning_rate": 1.6020614396421562e-05, "loss": 2.443, "step": 39000 }, { "epoch": 2.33, "learning_rate": 1.58190942821137e-05, "loss": 2.4408, "step": 40000 }, { "epoch": 2.39, "learning_rate": 1.562499237823282e-05, "loss": 2.4376, "step": 41000 }, { "epoch": 2.45, "learning_rate": 1.5437864452872526e-05, "loss": 2.4237, "step": 42000 }, { "epoch": 2.5, "learning_rate": 1.5257302647033036e-05, "loss": 2.4302, "step": 43000 }, { "epoch": 2.56, "learning_rate": 1.508293173302068e-05, "loss": 2.4272, "step": 44000 }, { "epoch": 2.62, "learning_rate": 1.4914405832764997e-05, "loss": 2.4191, "step": 45000 }, { "epoch": 2.68, "learning_rate": 1.4751405530700888e-05, "loss": 2.415, "step": 46000 }, { "epoch": 2.74, "learning_rate": 1.4593635326349197e-05, "loss": 2.4139, "step": 47000 }, { "epoch": 2.8, "learning_rate": 1.4440821380348212e-05, "loss": 2.409, "step": 48000 }, { "epoch": 2.85, "learning_rate": 1.4292709514804612e-05, "loss": 2.4043, "step": 49000 }, { "epoch": 2.91, "learning_rate": 1.4149063434732295e-05, "loss": 2.3984, "step": 50000 }, { "epoch": 2.97, "learning_rate": 1.4009663142259388e-05, "loss": 2.4011, "step": 51000 }, { "epoch": 3.0, "eval_gen_len": 20.0, "eval_loss": 3.302915573120117, "eval_rouge1": 44.3235, "eval_rouge2": 28.8361, "eval_rougeL": 40.7694, "eval_rougeLsum": 40.7674, "eval_runtime": 2928.7847, "eval_samples_per_second": 31.27, "eval_steps_per_second": 1.954, "step": 51510 }, { "epoch": 3.0, "step": 51510, "total_flos": 3.55488261007809e+17, "train_loss": 2.7859744007345357, "train_runtime": 40305.1206, "train_samples_per_second": 20.449, "train_steps_per_second": 1.278 } ], "max_steps": 51510, "num_train_epochs": 3, "total_flos": 3.55488261007809e+17, "trial_name": null, "trial_params": null }