{ "best_metric": 1.4883581399917603, "best_model_checkpoint": "ccdv_pegasus_xsum_summarization/checkpoint-13500", "epoch": 2.0012007204322595, "global_step": 30000, "is_hyper_param_search": false, "is_local_process_zero": true, "is_world_process_zero": true, "log_history": [ { "epoch": 0.03, "learning_rate": 4.944411091099104e-05, "loss": 1.3482, "step": 500 }, { "epoch": 0.03, "eval_gen_len": 190.7041, "eval_loss": 1.5671061277389526, "eval_rouge1": 43.9725, "eval_rouge2": 20.8852, "eval_rougeL": 29.6036, "eval_rougeLsum": 39.2595, "eval_runtime": 10745.9844, "eval_samples_per_second": 0.617, "eval_steps_per_second": 0.077, "step": 500 }, { "epoch": 0.07, "learning_rate": 4.8888221821982085e-05, "loss": 1.0335, "step": 1000 }, { "epoch": 0.07, "eval_gen_len": 159.8545, "eval_loss": 1.5465657711029053, "eval_rouge1": 44.9236, "eval_rouge2": 21.1853, "eval_rougeL": 30.4447, "eval_rougeLsum": 39.9918, "eval_runtime": 10231.1208, "eval_samples_per_second": 0.648, "eval_steps_per_second": 0.081, "step": 1000 }, { "epoch": 0.1, "learning_rate": 4.8332332732973116e-05, "loss": 1.0184, "step": 1500 }, { "epoch": 0.1, "eval_gen_len": 143.5453, "eval_loss": 1.5334348678588867, "eval_rouge1": 44.9483, "eval_rouge2": 20.9962, "eval_rougeL": 30.5328, "eval_rougeLsum": 40.0531, "eval_runtime": 8769.3615, "eval_samples_per_second": 0.756, "eval_steps_per_second": 0.095, "step": 1500 }, { "epoch": 0.13, "learning_rate": 4.777644364396416e-05, "loss": 1.0015, "step": 2000 }, { "epoch": 0.13, "eval_gen_len": 137.856, "eval_loss": 1.529853343963623, "eval_rouge1": 45.9034, "eval_rouge2": 21.784, "eval_rougeL": 31.4025, "eval_rougeLsum": 40.8983, "eval_runtime": 7582.4229, "eval_samples_per_second": 0.875, "eval_steps_per_second": 0.109, "step": 2000 }, { "epoch": 0.17, "learning_rate": 4.72205545549552e-05, "loss": 1.0101, "step": 2500 }, { "epoch": 0.17, "eval_gen_len": 134.5485, "eval_loss": 1.5291049480438232, "eval_rouge1": 45.6738, "eval_rouge2": 21.5853, "eval_rougeL": 31.1439, "eval_rougeLsum": 40.7442, "eval_runtime": 6945.0608, "eval_samples_per_second": 0.955, "eval_steps_per_second": 0.12, "step": 2500 }, { "epoch": 0.2, "learning_rate": 4.6664665465946236e-05, "loss": 0.9973, "step": 3000 }, { "epoch": 0.2, "eval_gen_len": 131.2587, "eval_loss": 1.523977518081665, "eval_rouge1": 45.5052, "eval_rouge2": 21.4202, "eval_rougeL": 31.1499, "eval_rougeLsum": 40.5736, "eval_runtime": 6458.4712, "eval_samples_per_second": 1.027, "eval_steps_per_second": 0.129, "step": 3000 }, { "epoch": 0.23, "learning_rate": 4.6108776376937274e-05, "loss": 0.9855, "step": 3500 }, { "epoch": 0.23, "eval_gen_len": 131.0582, "eval_loss": 1.5234577655792236, "eval_rouge1": 45.8336, "eval_rouge2": 21.7072, "eval_rougeL": 31.439, "eval_rougeLsum": 40.9387, "eval_runtime": 6345.3859, "eval_samples_per_second": 1.045, "eval_steps_per_second": 0.131, "step": 3500 }, { "epoch": 0.27, "learning_rate": 4.555288728792831e-05, "loss": 0.9868, "step": 4000 }, { "epoch": 0.27, "eval_gen_len": 127.9753, "eval_loss": 1.5183237791061401, "eval_rouge1": 45.6348, "eval_rouge2": 21.5462, "eval_rougeL": 31.3009, "eval_rougeLsum": 40.6469, "eval_runtime": 6091.2782, "eval_samples_per_second": 1.089, "eval_steps_per_second": 0.136, "step": 4000 }, { "epoch": 0.3, "learning_rate": 4.4996998198919356e-05, "loss": 0.9802, "step": 4500 }, { "epoch": 0.3, "eval_gen_len": 127.78, "eval_loss": 1.5132805109024048, "eval_rouge1": 45.4357, "eval_rouge2": 21.3339, "eval_rougeL": 31.1304, "eval_rougeLsum": 40.531, "eval_runtime": 5970.8563, "eval_samples_per_second": 1.111, "eval_steps_per_second": 0.139, "step": 4500 }, { "epoch": 0.33, "learning_rate": 4.4441109109910394e-05, "loss": 0.9743, "step": 5000 }, { "epoch": 0.33, "eval_gen_len": 126.9619, "eval_loss": 1.5101301670074463, "eval_rouge1": 45.4845, "eval_rouge2": 21.4302, "eval_rougeL": 31.2033, "eval_rougeLsum": 40.5934, "eval_runtime": 5820.9525, "eval_samples_per_second": 1.14, "eval_steps_per_second": 0.143, "step": 5000 }, { "epoch": 0.37, "learning_rate": 4.388522002090143e-05, "loss": 0.972, "step": 5500 }, { "epoch": 0.37, "eval_gen_len": 127.1796, "eval_loss": 1.5053614377975464, "eval_rouge1": 45.196, "eval_rouge2": 21.1882, "eval_rougeL": 30.9407, "eval_rougeLsum": 40.2648, "eval_runtime": 5768.324, "eval_samples_per_second": 1.15, "eval_steps_per_second": 0.144, "step": 5500 }, { "epoch": 0.4, "learning_rate": 4.332933093189247e-05, "loss": 0.9651, "step": 6000 }, { "epoch": 0.4, "eval_gen_len": 126.9254, "eval_loss": 1.5030862092971802, "eval_rouge1": 45.4822, "eval_rouge2": 21.4363, "eval_rougeL": 31.1422, "eval_rougeLsum": 40.5397, "eval_runtime": 5665.8916, "eval_samples_per_second": 1.171, "eval_steps_per_second": 0.146, "step": 6000 }, { "epoch": 0.43, "learning_rate": 4.277344184288351e-05, "loss": 0.9758, "step": 6500 }, { "epoch": 0.43, "eval_gen_len": 126.4933, "eval_loss": 1.495548963546753, "eval_rouge1": 45.299, "eval_rouge2": 21.346, "eval_rougeL": 31.0361, "eval_rougeLsum": 40.3325, "eval_runtime": 5589.2093, "eval_samples_per_second": 1.187, "eval_steps_per_second": 0.149, "step": 6500 }, { "epoch": 0.47, "learning_rate": 4.221755275387455e-05, "loss": 0.9652, "step": 7000 }, { "epoch": 0.47, "eval_gen_len": 126.0859, "eval_loss": 1.4975615739822388, "eval_rouge1": 45.4694, "eval_rouge2": 21.5044, "eval_rougeL": 31.1786, "eval_rougeLsum": 40.5032, "eval_runtime": 5569.8623, "eval_samples_per_second": 1.191, "eval_steps_per_second": 0.149, "step": 7000 }, { "epoch": 0.5, "learning_rate": 4.166166366486558e-05, "loss": 0.9601, "step": 7500 }, { "epoch": 0.5, "eval_gen_len": 126.8815, "eval_loss": 1.4945002794265747, "eval_rouge1": 45.1971, "eval_rouge2": 21.2682, "eval_rougeL": 30.9321, "eval_rougeLsum": 40.2959, "eval_runtime": 5557.7856, "eval_samples_per_second": 1.193, "eval_steps_per_second": 0.149, "step": 7500 }, { "epoch": 0.53, "learning_rate": 4.110577457585663e-05, "loss": 0.9502, "step": 8000 }, { "epoch": 0.53, "eval_gen_len": 126.4628, "eval_loss": 1.49406898021698, "eval_rouge1": 45.5653, "eval_rouge2": 21.5655, "eval_rougeL": 31.2703, "eval_rougeLsum": 40.5622, "eval_runtime": 5535.3927, "eval_samples_per_second": 1.198, "eval_steps_per_second": 0.15, "step": 8000 }, { "epoch": 0.57, "learning_rate": 4.0549885486847665e-05, "loss": 0.9537, "step": 8500 }, { "epoch": 0.57, "eval_gen_len": 126.5709, "eval_loss": 1.4941043853759766, "eval_rouge1": 45.2806, "eval_rouge2": 21.2587, "eval_rougeL": 30.93, "eval_rougeLsum": 40.354, "eval_runtime": 5533.5879, "eval_samples_per_second": 1.199, "eval_steps_per_second": 0.15, "step": 8500 }, { "epoch": 0.6, "learning_rate": 3.999399639783871e-05, "loss": 0.9629, "step": 9000 }, { "epoch": 0.6, "eval_gen_len": 126.547, "eval_loss": 1.4939745664596558, "eval_rouge1": 45.2474, "eval_rouge2": 21.275, "eval_rougeL": 30.9302, "eval_rougeLsum": 40.3377, "eval_runtime": 5530.7272, "eval_samples_per_second": 1.199, "eval_steps_per_second": 0.15, "step": 9000 }, { "epoch": 0.63, "learning_rate": 3.943810730882974e-05, "loss": 0.9528, "step": 9500 }, { "epoch": 0.63, "eval_gen_len": 126.768, "eval_loss": 1.4947481155395508, "eval_rouge1": 45.3619, "eval_rouge2": 21.3754, "eval_rougeL": 31.0723, "eval_rougeLsum": 40.4162, "eval_runtime": 5524.5717, "eval_samples_per_second": 1.201, "eval_steps_per_second": 0.15, "step": 9500 }, { "epoch": 0.67, "learning_rate": 3.888221821982078e-05, "loss": 0.9532, "step": 10000 }, { "epoch": 0.67, "eval_gen_len": 126.5323, "eval_loss": 1.4923893213272095, "eval_rouge1": 45.5763, "eval_rouge2": 21.6469, "eval_rougeL": 31.2585, "eval_rougeLsum": 40.5722, "eval_runtime": 5518.2912, "eval_samples_per_second": 1.202, "eval_steps_per_second": 0.15, "step": 10000 }, { "epoch": 0.7, "learning_rate": 3.832632913081182e-05, "loss": 0.945, "step": 10500 }, { "epoch": 0.7, "eval_gen_len": 126.69, "eval_loss": 1.4898710250854492, "eval_rouge1": 45.2629, "eval_rouge2": 21.3471, "eval_rougeL": 31.0405, "eval_rougeLsum": 40.3211, "eval_runtime": 6184.8714, "eval_samples_per_second": 1.072, "eval_steps_per_second": 0.134, "step": 10500 }, { "epoch": 0.73, "learning_rate": 3.777044004180286e-05, "loss": 0.9464, "step": 11000 }, { "epoch": 0.73, "eval_gen_len": 126.9052, "eval_loss": 1.489205002784729, "eval_rouge1": 45.3769, "eval_rouge2": 21.3457, "eval_rougeL": 30.9968, "eval_rougeLsum": 40.388, "eval_runtime": 5520.5499, "eval_samples_per_second": 1.202, "eval_steps_per_second": 0.15, "step": 11000 }, { "epoch": 0.77, "learning_rate": 3.7214550952793906e-05, "loss": 0.9544, "step": 11500 }, { "epoch": 0.77, "eval_gen_len": 126.5739, "eval_loss": 1.4892535209655762, "eval_rouge1": 45.411, "eval_rouge2": 21.3852, "eval_rougeL": 31.0295, "eval_rougeLsum": 40.4881, "eval_runtime": 5521.4271, "eval_samples_per_second": 1.201, "eval_steps_per_second": 0.15, "step": 11500 }, { "epoch": 0.8, "learning_rate": 3.6658661863784937e-05, "loss": 0.9467, "step": 12000 }, { "epoch": 0.8, "eval_gen_len": 126.7315, "eval_loss": 1.4929231405258179, "eval_rouge1": 45.4345, "eval_rouge2": 21.4378, "eval_rougeL": 31.1163, "eval_rougeLsum": 40.4393, "eval_runtime": 5524.2145, "eval_samples_per_second": 1.201, "eval_steps_per_second": 0.15, "step": 12000 }, { "epoch": 0.83, "learning_rate": 3.610277277477598e-05, "loss": 0.9517, "step": 12500 }, { "epoch": 0.83, "eval_gen_len": 126.58, "eval_loss": 1.4917516708374023, "eval_rouge1": 45.3614, "eval_rouge2": 21.3396, "eval_rougeL": 30.9925, "eval_rougeLsum": 40.3636, "eval_runtime": 5514.201, "eval_samples_per_second": 1.203, "eval_steps_per_second": 0.151, "step": 12500 }, { "epoch": 0.87, "learning_rate": 3.554688368576702e-05, "loss": 0.9497, "step": 13000 }, { "epoch": 0.87, "eval_gen_len": 126.7977, "eval_loss": 1.4918133020401, "eval_rouge1": 45.2485, "eval_rouge2": 21.2367, "eval_rougeL": 30.9282, "eval_rougeLsum": 40.3438, "eval_runtime": 6509.3818, "eval_samples_per_second": 1.019, "eval_steps_per_second": 0.128, "step": 13000 }, { "epoch": 0.9, "learning_rate": 3.499099459675806e-05, "loss": 0.9386, "step": 13500 }, { "epoch": 0.9, "eval_gen_len": 126.7524, "eval_loss": 1.4883581399917603, "eval_rouge1": 45.5038, "eval_rouge2": 21.5064, "eval_rougeL": 31.2132, "eval_rougeLsum": 40.5696, "eval_runtime": 5529.57, "eval_samples_per_second": 1.2, "eval_steps_per_second": 0.15, "step": 13500 }, { "epoch": 0.93, "learning_rate": 3.4435105507749095e-05, "loss": 0.9473, "step": 14000 }, { "epoch": 0.93, "eval_gen_len": 126.6534, "eval_loss": 1.4918317794799805, "eval_rouge1": 45.2367, "eval_rouge2": 21.2615, "eval_rougeL": 30.9179, "eval_rougeLsum": 40.2548, "eval_runtime": 5515.735, "eval_samples_per_second": 1.203, "eval_steps_per_second": 0.15, "step": 14000 }, { "epoch": 0.97, "learning_rate": 3.387921641874013e-05, "loss": 0.9235, "step": 14500 }, { "epoch": 0.97, "eval_gen_len": 126.5972, "eval_loss": 1.4897193908691406, "eval_rouge1": 45.8027, "eval_rouge2": 21.7228, "eval_rougeL": 31.3946, "eval_rougeLsum": 40.764, "eval_runtime": 5518.2889, "eval_samples_per_second": 1.202, "eval_steps_per_second": 0.15, "step": 14500 }, { "epoch": 1.0, "learning_rate": 3.332332732973118e-05, "loss": 0.9344, "step": 15000 }, { "epoch": 1.0, "eval_gen_len": 126.9212, "eval_loss": 1.4973394870758057, "eval_rouge1": 44.8773, "eval_rouge2": 20.9475, "eval_rougeL": 30.5827, "eval_rougeLsum": 39.9489, "eval_runtime": 5510.2549, "eval_samples_per_second": 1.204, "eval_steps_per_second": 0.151, "step": 15000 }, { "epoch": 1.03, "learning_rate": 3.2767438240722215e-05, "loss": 0.9139, "step": 15500 }, { "epoch": 1.03, "eval_gen_len": 126.692, "eval_loss": 1.5064738988876343, "eval_rouge1": 45.4207, "eval_rouge2": 21.3856, "eval_rougeL": 31.0837, "eval_rougeLsum": 40.4414, "eval_runtime": 5526.017, "eval_samples_per_second": 1.2, "eval_steps_per_second": 0.15, "step": 15500 }, { "epoch": 1.07, "learning_rate": 3.221154915171325e-05, "loss": 0.8939, "step": 16000 }, { "epoch": 1.07, "eval_gen_len": 126.5179, "eval_loss": 1.508902668952942, "eval_rouge1": 45.5575, "eval_rouge2": 21.5153, "eval_rougeL": 31.2115, "eval_rougeLsum": 40.5517, "eval_runtime": 5698.8075, "eval_samples_per_second": 1.164, "eval_steps_per_second": 0.146, "step": 16000 }, { "epoch": 1.1, "learning_rate": 3.165566006270429e-05, "loss": 0.8968, "step": 16500 }, { "epoch": 1.1, "eval_gen_len": 126.5447, "eval_loss": 1.5106098651885986, "eval_rouge1": 45.4574, "eval_rouge2": 21.4786, "eval_rougeL": 31.1065, "eval_rougeLsum": 40.495, "eval_runtime": 5606.9538, "eval_samples_per_second": 1.183, "eval_steps_per_second": 0.148, "step": 16500 }, { "epoch": 1.13, "learning_rate": 3.109977097369533e-05, "loss": 0.8999, "step": 17000 }, { "epoch": 1.13, "eval_gen_len": 126.6894, "eval_loss": 1.5100876092910767, "eval_rouge1": 45.4805, "eval_rouge2": 21.4579, "eval_rougeL": 31.1062, "eval_rougeLsum": 40.5138, "eval_runtime": 5594.355, "eval_samples_per_second": 1.186, "eval_steps_per_second": 0.148, "step": 17000 }, { "epoch": 1.17, "learning_rate": 3.054388188468637e-05, "loss": 0.903, "step": 17500 }, { "epoch": 1.17, "eval_gen_len": 126.5988, "eval_loss": 1.5103389024734497, "eval_rouge1": 45.495, "eval_rouge2": 21.4395, "eval_rougeL": 31.1445, "eval_rougeLsum": 40.4949, "eval_runtime": 5586.6059, "eval_samples_per_second": 1.187, "eval_steps_per_second": 0.149, "step": 17500 }, { "epoch": 1.2, "learning_rate": 2.9987992795677407e-05, "loss": 0.8988, "step": 18000 }, { "epoch": 1.2, "eval_gen_len": 126.5643, "eval_loss": 1.5120760202407837, "eval_rouge1": 45.2764, "eval_rouge2": 21.2652, "eval_rougeL": 30.944, "eval_rougeLsum": 40.3249, "eval_runtime": 5558.8098, "eval_samples_per_second": 1.193, "eval_steps_per_second": 0.149, "step": 18000 }, { "epoch": 1.23, "learning_rate": 2.9432103706668445e-05, "loss": 0.9027, "step": 18500 }, { "epoch": 1.23, "eval_gen_len": 126.8441, "eval_loss": 1.5092076063156128, "eval_rouge1": 45.4884, "eval_rouge2": 21.4334, "eval_rougeL": 31.0499, "eval_rougeLsum": 40.4796, "eval_runtime": 5536.9856, "eval_samples_per_second": 1.198, "eval_steps_per_second": 0.15, "step": 18500 }, { "epoch": 1.27, "learning_rate": 2.8876214617659486e-05, "loss": 0.9044, "step": 19000 }, { "epoch": 1.27, "eval_gen_len": 126.8737, "eval_loss": 1.5079020261764526, "eval_rouge1": 45.5708, "eval_rouge2": 21.5358, "eval_rougeL": 31.1862, "eval_rougeLsum": 40.594, "eval_runtime": 5524.867, "eval_samples_per_second": 1.201, "eval_steps_per_second": 0.15, "step": 19000 }, { "epoch": 1.3, "learning_rate": 2.8320325528650527e-05, "loss": 0.906, "step": 19500 }, { "epoch": 1.3, "eval_gen_len": 126.8627, "eval_loss": 1.5116254091262817, "eval_rouge1": 45.4542, "eval_rouge2": 21.4172, "eval_rougeL": 31.0754, "eval_rougeLsum": 40.439, "eval_runtime": 5524.341, "eval_samples_per_second": 1.201, "eval_steps_per_second": 0.15, "step": 19500 }, { "epoch": 1.33, "learning_rate": 2.776443643964157e-05, "loss": 0.8994, "step": 20000 }, { "epoch": 1.33, "eval_gen_len": 126.8206, "eval_loss": 1.5085355043411255, "eval_rouge1": 45.5424, "eval_rouge2": 21.5009, "eval_rougeL": 31.1428, "eval_rougeLsum": 40.5667, "eval_runtime": 5528.1375, "eval_samples_per_second": 1.2, "eval_steps_per_second": 0.15, "step": 20000 }, { "epoch": 1.37, "learning_rate": 2.7208547350632603e-05, "loss": 0.9088, "step": 20500 }, { "epoch": 1.37, "eval_gen_len": 126.7414, "eval_loss": 1.5124515295028687, "eval_rouge1": 45.3129, "eval_rouge2": 21.2629, "eval_rougeL": 30.9461, "eval_rougeLsum": 40.3271, "eval_runtime": 5534.3419, "eval_samples_per_second": 1.199, "eval_steps_per_second": 0.15, "step": 20500 }, { "epoch": 1.4, "learning_rate": 2.665265826162364e-05, "loss": 0.8983, "step": 21000 }, { "epoch": 1.4, "eval_gen_len": 126.357, "eval_loss": 1.5135449171066284, "eval_rouge1": 45.6846, "eval_rouge2": 21.6282, "eval_rougeL": 31.2929, "eval_rougeLsum": 40.6821, "eval_runtime": 5538.2932, "eval_samples_per_second": 1.198, "eval_steps_per_second": 0.15, "step": 21000 }, { "epoch": 1.43, "learning_rate": 2.6096769172614682e-05, "loss": 0.907, "step": 21500 }, { "epoch": 1.43, "eval_gen_len": 127.0029, "eval_loss": 1.5076923370361328, "eval_rouge1": 45.4873, "eval_rouge2": 21.455, "eval_rougeL": 31.1193, "eval_rougeLsum": 40.5128, "eval_runtime": 5539.9922, "eval_samples_per_second": 1.197, "eval_steps_per_second": 0.15, "step": 21500 }, { "epoch": 1.47, "learning_rate": 2.5540880083605723e-05, "loss": 0.9097, "step": 22000 }, { "epoch": 1.47, "eval_gen_len": 126.8553, "eval_loss": 1.5052434206008911, "eval_rouge1": 45.5988, "eval_rouge2": 21.6134, "eval_rougeL": 31.247, "eval_rougeLsum": 40.58, "eval_runtime": 5539.7468, "eval_samples_per_second": 1.197, "eval_steps_per_second": 0.15, "step": 22000 }, { "epoch": 1.5, "learning_rate": 2.498499099459676e-05, "loss": 0.9033, "step": 22500 }, { "epoch": 1.5, "eval_gen_len": 127.0048, "eval_loss": 1.5133850574493408, "eval_rouge1": 45.3223, "eval_rouge2": 21.2968, "eval_rougeL": 30.9357, "eval_rougeLsum": 40.3813, "eval_runtime": 9404.5, "eval_samples_per_second": 0.705, "eval_steps_per_second": 0.088, "step": 22500 }, { "epoch": 1.53, "learning_rate": 2.44291019055878e-05, "loss": 0.8925, "step": 23000 }, { "epoch": 1.53, "eval_gen_len": 126.7316, "eval_loss": 1.510839819908142, "eval_rouge1": 45.6747, "eval_rouge2": 21.6374, "eval_rougeL": 31.31, "eval_rougeLsum": 40.7015, "eval_runtime": 22732.2519, "eval_samples_per_second": 0.292, "eval_steps_per_second": 0.037, "step": 23000 }, { "epoch": 1.57, "learning_rate": 2.387321281657884e-05, "loss": 0.8913, "step": 23500 }, { "epoch": 1.57, "eval_gen_len": 126.6869, "eval_loss": 1.5129714012145996, "eval_rouge1": 45.6531, "eval_rouge2": 21.6354, "eval_rougeL": 31.2956, "eval_rougeLsum": 40.6555, "eval_runtime": 6945.8776, "eval_samples_per_second": 0.955, "eval_steps_per_second": 0.119, "step": 23500 }, { "epoch": 1.6, "learning_rate": 2.3317323727569874e-05, "loss": 0.8931, "step": 24000 }, { "epoch": 1.6, "eval_gen_len": 126.4862, "eval_loss": 1.5111068487167358, "eval_rouge1": 45.7876, "eval_rouge2": 21.7115, "eval_rougeL": 31.3274, "eval_rougeLsum": 40.7579, "eval_runtime": 5539.5619, "eval_samples_per_second": 1.197, "eval_steps_per_second": 0.15, "step": 24000 }, { "epoch": 1.63, "learning_rate": 2.2761434638560915e-05, "loss": 0.9009, "step": 24500 }, { "epoch": 1.63, "eval_gen_len": 126.6229, "eval_loss": 1.5083845853805542, "eval_rouge1": 45.6359, "eval_rouge2": 21.583, "eval_rougeL": 31.2775, "eval_rougeLsum": 40.6351, "eval_runtime": 5545.0209, "eval_samples_per_second": 1.196, "eval_steps_per_second": 0.15, "step": 24500 }, { "epoch": 1.67, "learning_rate": 2.2205545549551953e-05, "loss": 0.8925, "step": 25000 }, { "epoch": 1.67, "eval_gen_len": 126.8396, "eval_loss": 1.5094473361968994, "eval_rouge1": 45.397, "eval_rouge2": 21.4266, "eval_rougeL": 31.082, "eval_rougeLsum": 40.4261, "eval_runtime": 5534.6802, "eval_samples_per_second": 1.198, "eval_steps_per_second": 0.15, "step": 25000 }, { "epoch": 1.7, "learning_rate": 2.1649656460542994e-05, "loss": 0.8991, "step": 25500 }, { "epoch": 1.7, "eval_gen_len": 126.722, "eval_loss": 1.512014627456665, "eval_rouge1": 45.2851, "eval_rouge2": 21.2798, "eval_rougeL": 30.8973, "eval_rougeLsum": 40.2787, "eval_runtime": 5538.1327, "eval_samples_per_second": 1.198, "eval_steps_per_second": 0.15, "step": 25500 }, { "epoch": 1.73, "learning_rate": 2.1093767371534032e-05, "loss": 0.9019, "step": 26000 }, { "epoch": 1.73, "eval_gen_len": 126.8048, "eval_loss": 1.510252833366394, "eval_rouge1": 45.2905, "eval_rouge2": 21.2992, "eval_rougeL": 30.9204, "eval_rougeLsum": 40.3262, "eval_runtime": 5535.5354, "eval_samples_per_second": 1.198, "eval_steps_per_second": 0.15, "step": 26000 }, { "epoch": 1.77, "learning_rate": 2.0537878282525073e-05, "loss": 0.891, "step": 26500 }, { "epoch": 1.77, "eval_gen_len": 126.2902, "eval_loss": 1.5112383365631104, "eval_rouge1": 45.7091, "eval_rouge2": 21.6159, "eval_rougeL": 31.2889, "eval_rougeLsum": 40.6986, "eval_runtime": 5537.5343, "eval_samples_per_second": 1.198, "eval_steps_per_second": 0.15, "step": 26500 }, { "epoch": 1.8, "learning_rate": 1.998198919351611e-05, "loss": 0.898, "step": 27000 }, { "epoch": 1.8, "eval_gen_len": 126.5218, "eval_loss": 1.5084278583526611, "eval_rouge1": 45.4964, "eval_rouge2": 21.4702, "eval_rougeL": 31.177, "eval_rougeLsum": 40.5432, "eval_runtime": 5530.5865, "eval_samples_per_second": 1.199, "eval_steps_per_second": 0.15, "step": 27000 }, { "epoch": 1.83, "learning_rate": 1.942610010450715e-05, "loss": 0.8839, "step": 27500 }, { "epoch": 1.83, "eval_gen_len": 126.8648, "eval_loss": 1.5090144872665405, "eval_rouge1": 45.6279, "eval_rouge2": 21.5346, "eval_rougeL": 31.252, "eval_rougeLsum": 40.6096, "eval_runtime": 5522.5033, "eval_samples_per_second": 1.201, "eval_steps_per_second": 0.15, "step": 27500 }, { "epoch": 1.87, "learning_rate": 1.8870211015498187e-05, "loss": 0.8899, "step": 28000 }, { "epoch": 1.87, "eval_gen_len": 126.8498, "eval_loss": 1.5073039531707764, "eval_rouge1": 45.6406, "eval_rouge2": 21.5551, "eval_rougeL": 31.2519, "eval_rougeLsum": 40.6425, "eval_runtime": 5520.5026, "eval_samples_per_second": 1.202, "eval_steps_per_second": 0.15, "step": 28000 }, { "epoch": 1.9, "learning_rate": 1.8314321926489228e-05, "loss": 0.8904, "step": 28500 }, { "epoch": 1.9, "eval_gen_len": 126.689, "eval_loss": 1.5086652040481567, "eval_rouge1": 45.7334, "eval_rouge2": 21.7071, "eval_rougeL": 31.3069, "eval_rougeLsum": 40.6992, "eval_runtime": 5519.0861, "eval_samples_per_second": 1.202, "eval_steps_per_second": 0.15, "step": 28500 }, { "epoch": 1.93, "learning_rate": 1.7758432837480266e-05, "loss": 0.8958, "step": 29000 }, { "epoch": 1.93, "eval_gen_len": 126.9157, "eval_loss": 1.5112992525100708, "eval_rouge1": 45.4618, "eval_rouge2": 21.4623, "eval_rougeL": 31.0914, "eval_rougeLsum": 40.4648, "eval_runtime": 5520.0088, "eval_samples_per_second": 1.202, "eval_steps_per_second": 0.15, "step": 29000 }, { "epoch": 1.97, "learning_rate": 1.7202543748471307e-05, "loss": 0.8991, "step": 29500 }, { "epoch": 1.97, "eval_gen_len": 126.7855, "eval_loss": 1.5126971006393433, "eval_rouge1": 45.6364, "eval_rouge2": 21.5467, "eval_rougeL": 31.2001, "eval_rougeLsum": 40.5946, "eval_runtime": 5532.4138, "eval_samples_per_second": 1.199, "eval_steps_per_second": 0.15, "step": 29500 }, { "epoch": 2.0, "learning_rate": 1.6646654659462345e-05, "loss": 0.889, "step": 30000 }, { "epoch": 2.0, "eval_gen_len": 126.6989, "eval_loss": 1.5128982067108154, "eval_rouge1": 45.3668, "eval_rouge2": 21.3563, "eval_rougeL": 30.998, "eval_rougeLsum": 40.3714, "eval_runtime": 5736.9272, "eval_samples_per_second": 1.156, "eval_steps_per_second": 0.145, "step": 30000 } ], "max_steps": 44973, "num_train_epochs": 3, "total_flos": 7.801293866564321e+17, "trial_name": null, "trial_params": null }