{ "best_metric": 2.63411021232605, "best_model_checkpoint": "model/best_model_test_0423_small/checkpoint-55000", "epoch": 3.0, "global_step": 57807, "is_hyper_param_search": false, "is_local_process_zero": true, "is_world_process_zero": true, "log_history": [ { "epoch": 0.05, "learning_rate": 9.827010569654195e-05, "loss": 5.8165, "step": 1000 }, { "epoch": 0.05, "eval_gen_len": 18.0056, "eval_loss": 3.6540932655334473, "eval_rouge1": 11.6734, "eval_rouge2": 3.9865, "eval_rougeL": 11.5734, "eval_rougeLsum": 11.5375, "eval_runtime": 831.6458, "eval_samples_per_second": 5.793, "eval_steps_per_second": 2.897, "step": 1000 }, { "epoch": 0.1, "learning_rate": 9.654021139308389e-05, "loss": 4.306, "step": 2000 }, { "epoch": 0.1, "eval_gen_len": 16.8948, "eval_loss": 3.4290754795074463, "eval_rouge1": 12.0417, "eval_rouge2": 3.8419, "eval_rougeL": 11.9231, "eval_rougeLsum": 11.9223, "eval_runtime": 769.6461, "eval_samples_per_second": 6.26, "eval_steps_per_second": 3.13, "step": 2000 }, { "epoch": 0.16, "learning_rate": 9.481031708962583e-05, "loss": 4.1091, "step": 3000 }, { "epoch": 0.16, "eval_gen_len": 19.4016, "eval_loss": 3.364335060119629, "eval_rouge1": 13.661, "eval_rouge2": 4.5171, "eval_rougeL": 13.5123, "eval_rougeLsum": 13.5076, "eval_runtime": 875.4047, "eval_samples_per_second": 5.504, "eval_steps_per_second": 2.752, "step": 3000 }, { "epoch": 0.21, "learning_rate": 9.308042278616777e-05, "loss": 3.9637, "step": 4000 }, { "epoch": 0.21, "eval_gen_len": 18.4288, "eval_loss": 3.2573604583740234, "eval_rouge1": 13.8443, "eval_rouge2": 4.1761, "eval_rougeL": 13.689, "eval_rougeLsum": 13.6927, "eval_runtime": 771.3843, "eval_samples_per_second": 6.246, "eval_steps_per_second": 3.123, "step": 4000 }, { "epoch": 0.26, "learning_rate": 9.135052848270971e-05, "loss": 3.8205, "step": 5000 }, { "epoch": 0.26, "eval_gen_len": 21.5776, "eval_loss": 3.2433691024780273, "eval_rouge1": 13.5371, "eval_rouge2": 4.3639, "eval_rougeL": 13.3551, "eval_rougeLsum": 13.3552, "eval_runtime": 903.4907, "eval_samples_per_second": 5.333, "eval_steps_per_second": 2.666, "step": 5000 }, { "epoch": 0.31, "learning_rate": 8.962063417925165e-05, "loss": 3.7262, "step": 6000 }, { "epoch": 0.31, "eval_gen_len": 21.5548, "eval_loss": 3.1689953804016113, "eval_rouge1": 14.3668, "eval_rouge2": 4.8048, "eval_rougeL": 14.2191, "eval_rougeLsum": 14.1906, "eval_runtime": 870.3487, "eval_samples_per_second": 5.536, "eval_steps_per_second": 2.768, "step": 6000 }, { "epoch": 0.36, "learning_rate": 8.78907398757936e-05, "loss": 3.6887, "step": 7000 }, { "epoch": 0.36, "eval_gen_len": 20.89, "eval_loss": 3.0656516551971436, "eval_rouge1": 14.3265, "eval_rouge2": 4.436, "eval_rougeL": 14.212, "eval_rougeLsum": 14.205, "eval_runtime": 840.9965, "eval_samples_per_second": 5.729, "eval_steps_per_second": 2.864, "step": 7000 }, { "epoch": 0.42, "learning_rate": 8.616084557233554e-05, "loss": 3.6337, "step": 8000 }, { "epoch": 0.42, "eval_gen_len": 20.3651, "eval_loss": 3.031759262084961, "eval_rouge1": 14.6809, "eval_rouge2": 4.8345, "eval_rougeL": 14.5378, "eval_rougeLsum": 14.5331, "eval_runtime": 836.4852, "eval_samples_per_second": 5.76, "eval_steps_per_second": 2.88, "step": 8000 }, { "epoch": 0.47, "learning_rate": 8.443095126887747e-05, "loss": 3.5443, "step": 9000 }, { "epoch": 0.47, "eval_gen_len": 21.7742, "eval_loss": 3.0553905963897705, "eval_rouge1": 15.3372, "eval_rouge2": 4.9163, "eval_rougeL": 15.1794, "eval_rougeLsum": 15.1781, "eval_runtime": 893.8221, "eval_samples_per_second": 5.39, "eval_steps_per_second": 2.695, "step": 9000 }, { "epoch": 0.52, "learning_rate": 8.270105696541942e-05, "loss": 3.5203, "step": 10000 }, { "epoch": 0.52, "eval_gen_len": 20.8113, "eval_loss": 2.9792585372924805, "eval_rouge1": 14.9278, "eval_rouge2": 4.9656, "eval_rougeL": 14.7491, "eval_rougeLsum": 14.743, "eval_runtime": 848.3297, "eval_samples_per_second": 5.679, "eval_steps_per_second": 2.84, "step": 10000 }, { "epoch": 0.57, "learning_rate": 8.097116266196136e-05, "loss": 3.4936, "step": 11000 }, { "epoch": 0.57, "eval_gen_len": 23.4274, "eval_loss": 3.0078511238098145, "eval_rouge1": 15.7705, "eval_rouge2": 5.1453, "eval_rougeL": 15.5582, "eval_rougeLsum": 15.5756, "eval_runtime": 944.685, "eval_samples_per_second": 5.1, "eval_steps_per_second": 2.55, "step": 11000 }, { "epoch": 0.62, "learning_rate": 7.92412683585033e-05, "loss": 3.4592, "step": 12000 }, { "epoch": 0.62, "eval_gen_len": 22.7007, "eval_loss": 2.972140312194824, "eval_rouge1": 15.0201, "eval_rouge2": 5.1612, "eval_rougeL": 14.8508, "eval_rougeLsum": 14.8198, "eval_runtime": 914.5833, "eval_samples_per_second": 5.268, "eval_steps_per_second": 2.634, "step": 12000 }, { "epoch": 0.67, "learning_rate": 7.751137405504524e-05, "loss": 3.377, "step": 13000 }, { "epoch": 0.67, "eval_gen_len": 23.4427, "eval_loss": 3.01123309135437, "eval_rouge1": 15.9595, "eval_rouge2": 5.1133, "eval_rougeL": 15.78, "eval_rougeLsum": 15.7774, "eval_runtime": 950.6422, "eval_samples_per_second": 5.068, "eval_steps_per_second": 2.534, "step": 13000 }, { "epoch": 0.73, "learning_rate": 7.578147975158718e-05, "loss": 3.4158, "step": 14000 }, { "epoch": 0.73, "eval_gen_len": 21.6009, "eval_loss": 2.9238853454589844, "eval_rouge1": 14.7984, "eval_rouge2": 5.051, "eval_rougeL": 14.6943, "eval_rougeLsum": 14.6581, "eval_runtime": 878.6968, "eval_samples_per_second": 5.483, "eval_steps_per_second": 2.742, "step": 14000 }, { "epoch": 0.78, "learning_rate": 7.405158544812913e-05, "loss": 3.378, "step": 15000 }, { "epoch": 0.78, "eval_gen_len": 22.0828, "eval_loss": 2.889694929122925, "eval_rouge1": 16.5128, "eval_rouge2": 5.1923, "eval_rougeL": 16.3523, "eval_rougeLsum": 16.3265, "eval_runtime": 902.1756, "eval_samples_per_second": 5.34, "eval_steps_per_second": 2.67, "step": 15000 }, { "epoch": 0.83, "learning_rate": 7.232169114467106e-05, "loss": 3.3231, "step": 16000 }, { "epoch": 0.83, "eval_gen_len": 22.5807, "eval_loss": 2.9346752166748047, "eval_rouge1": 16.9997, "eval_rouge2": 5.5524, "eval_rougeL": 16.8534, "eval_rougeLsum": 16.8737, "eval_runtime": 895.2014, "eval_samples_per_second": 5.382, "eval_steps_per_second": 2.691, "step": 16000 }, { "epoch": 0.88, "learning_rate": 7.059179684121301e-05, "loss": 3.3268, "step": 17000 }, { "epoch": 0.88, "eval_gen_len": 23.6988, "eval_loss": 2.911571741104126, "eval_rouge1": 16.0261, "eval_rouge2": 5.4226, "eval_rougeL": 15.9234, "eval_rougeLsum": 15.914, "eval_runtime": 962.7416, "eval_samples_per_second": 5.004, "eval_steps_per_second": 2.502, "step": 17000 }, { "epoch": 0.93, "learning_rate": 6.886190253775494e-05, "loss": 3.3127, "step": 18000 }, { "epoch": 0.93, "eval_gen_len": 22.9481, "eval_loss": 2.861004590988159, "eval_rouge1": 16.6255, "eval_rouge2": 5.3554, "eval_rougeL": 16.4729, "eval_rougeLsum": 16.4569, "eval_runtime": 922.0145, "eval_samples_per_second": 5.226, "eval_steps_per_second": 2.613, "step": 18000 }, { "epoch": 0.99, "learning_rate": 6.713200823429688e-05, "loss": 3.2664, "step": 19000 }, { "epoch": 0.99, "eval_gen_len": 23.4423, "eval_loss": 2.860574722290039, "eval_rouge1": 17.7703, "eval_rouge2": 5.9475, "eval_rougeL": 17.6229, "eval_rougeLsum": 17.6259, "eval_runtime": 936.5594, "eval_samples_per_second": 5.144, "eval_steps_per_second": 2.572, "step": 19000 }, { "epoch": 1.04, "learning_rate": 6.540211393083882e-05, "loss": 3.1718, "step": 20000 }, { "epoch": 1.04, "eval_gen_len": 23.0093, "eval_loss": 2.8764114379882812, "eval_rouge1": 17.301, "eval_rouge2": 5.6262, "eval_rougeL": 17.122, "eval_rougeLsum": 17.1104, "eval_runtime": 908.2265, "eval_samples_per_second": 5.305, "eval_steps_per_second": 2.652, "step": 20000 }, { "epoch": 1.09, "learning_rate": 6.367221962738077e-05, "loss": 3.0987, "step": 21000 }, { "epoch": 1.09, "eval_gen_len": 20.9697, "eval_loss": 2.82820200920105, "eval_rouge1": 16.4718, "eval_rouge2": 5.2077, "eval_rougeL": 16.3394, "eval_rougeLsum": 16.3401, "eval_runtime": 831.2333, "eval_samples_per_second": 5.796, "eval_steps_per_second": 2.898, "step": 21000 }, { "epoch": 1.14, "learning_rate": 6.19423253239227e-05, "loss": 3.1486, "step": 22000 }, { "epoch": 1.14, "eval_gen_len": 22.7291, "eval_loss": 2.823465347290039, "eval_rouge1": 18.5594, "eval_rouge2": 5.9469, "eval_rougeL": 18.3882, "eval_rougeLsum": 18.3799, "eval_runtime": 901.4834, "eval_samples_per_second": 5.345, "eval_steps_per_second": 2.672, "step": 22000 }, { "epoch": 1.19, "learning_rate": 6.021243102046465e-05, "loss": 3.1435, "step": 23000 }, { "epoch": 1.19, "eval_gen_len": 22.9612, "eval_loss": 2.826120615005493, "eval_rouge1": 18.111, "eval_rouge2": 6.0309, "eval_rougeL": 17.9593, "eval_rougeLsum": 17.9613, "eval_runtime": 912.4414, "eval_samples_per_second": 5.28, "eval_steps_per_second": 2.64, "step": 23000 }, { "epoch": 1.25, "learning_rate": 5.84825367170066e-05, "loss": 3.1049, "step": 24000 }, { "epoch": 1.25, "eval_gen_len": 22.5558, "eval_loss": 2.8067939281463623, "eval_rouge1": 17.124, "eval_rouge2": 5.5675, "eval_rougeL": 16.9714, "eval_rougeLsum": 16.9876, "eval_runtime": 903.448, "eval_samples_per_second": 5.333, "eval_steps_per_second": 2.666, "step": 24000 }, { "epoch": 1.3, "learning_rate": 5.675264241354854e-05, "loss": 3.1357, "step": 25000 }, { "epoch": 1.3, "eval_gen_len": 23.0075, "eval_loss": 2.801447629928589, "eval_rouge1": 17.3916, "eval_rouge2": 5.8671, "eval_rougeL": 17.2148, "eval_rougeLsum": 17.2502, "eval_runtime": 917.374, "eval_samples_per_second": 5.252, "eval_steps_per_second": 2.626, "step": 25000 }, { "epoch": 1.35, "learning_rate": 5.5022748110090474e-05, "loss": 3.0904, "step": 26000 }, { "epoch": 1.35, "eval_gen_len": 22.1492, "eval_loss": 2.7790260314941406, "eval_rouge1": 17.419, "eval_rouge2": 5.6689, "eval_rougeL": 17.3125, "eval_rougeLsum": 17.3058, "eval_runtime": 879.4764, "eval_samples_per_second": 5.478, "eval_steps_per_second": 2.739, "step": 26000 }, { "epoch": 1.4, "learning_rate": 5.3292853806632414e-05, "loss": 3.0877, "step": 27000 }, { "epoch": 1.4, "eval_gen_len": 21.7522, "eval_loss": 2.7462034225463867, "eval_rouge1": 17.0605, "eval_rouge2": 5.4735, "eval_rougeL": 16.9414, "eval_rougeLsum": 16.9378, "eval_runtime": 878.5335, "eval_samples_per_second": 5.484, "eval_steps_per_second": 2.742, "step": 27000 }, { "epoch": 1.45, "learning_rate": 5.1562959503174354e-05, "loss": 3.0694, "step": 28000 }, { "epoch": 1.45, "eval_gen_len": 23.2005, "eval_loss": 2.75631046295166, "eval_rouge1": 17.752, "eval_rouge2": 5.8889, "eval_rougeL": 17.5967, "eval_rougeLsum": 17.619, "eval_runtime": 928.0873, "eval_samples_per_second": 5.191, "eval_steps_per_second": 2.596, "step": 28000 }, { "epoch": 1.51, "learning_rate": 4.98330651997163e-05, "loss": 3.0498, "step": 29000 }, { "epoch": 1.51, "eval_gen_len": 21.9369, "eval_loss": 2.752108335494995, "eval_rouge1": 17.9056, "eval_rouge2": 5.7754, "eval_rougeL": 17.7624, "eval_rougeLsum": 17.7836, "eval_runtime": 872.1773, "eval_samples_per_second": 5.524, "eval_steps_per_second": 2.762, "step": 29000 }, { "epoch": 1.56, "learning_rate": 4.810317089625824e-05, "loss": 3.0566, "step": 30000 }, { "epoch": 1.56, "eval_gen_len": 22.2358, "eval_loss": 2.7468161582946777, "eval_rouge1": 18.6531, "eval_rouge2": 6.0538, "eval_rougeL": 18.5397, "eval_rougeLsum": 18.5038, "eval_runtime": 878.1686, "eval_samples_per_second": 5.486, "eval_steps_per_second": 2.743, "step": 30000 }, { "epoch": 1.61, "learning_rate": 4.637327659280018e-05, "loss": 3.0489, "step": 31000 }, { "epoch": 1.61, "eval_gen_len": 22.0108, "eval_loss": 2.7450203895568848, "eval_rouge1": 18.4869, "eval_rouge2": 5.9297, "eval_rougeL": 18.3139, "eval_rougeLsum": 18.3169, "eval_runtime": 856.3376, "eval_samples_per_second": 5.626, "eval_steps_per_second": 2.813, "step": 31000 }, { "epoch": 1.66, "learning_rate": 4.464338228934212e-05, "loss": 3.0247, "step": 32000 }, { "epoch": 1.66, "eval_gen_len": 22.2071, "eval_loss": 2.744947671890259, "eval_rouge1": 18.5192, "eval_rouge2": 5.9966, "eval_rougeL": 18.3721, "eval_rougeLsum": 18.3569, "eval_runtime": 887.9355, "eval_samples_per_second": 5.426, "eval_steps_per_second": 2.713, "step": 32000 }, { "epoch": 1.71, "learning_rate": 4.291348798588406e-05, "loss": 2.9877, "step": 33000 }, { "epoch": 1.71, "eval_gen_len": 21.4595, "eval_loss": 2.7159626483917236, "eval_rouge1": 18.1655, "eval_rouge2": 5.9294, "eval_rougeL": 18.0304, "eval_rougeLsum": 18.0836, "eval_runtime": 847.8313, "eval_samples_per_second": 5.683, "eval_steps_per_second": 2.841, "step": 33000 }, { "epoch": 1.76, "learning_rate": 4.118359368242601e-05, "loss": 3.0383, "step": 34000 }, { "epoch": 1.76, "eval_gen_len": 22.9732, "eval_loss": 2.720228433609009, "eval_rouge1": 18.4959, "eval_rouge2": 6.2413, "eval_rougeL": 18.3363, "eval_rougeLsum": 18.3431, "eval_runtime": 911.6184, "eval_samples_per_second": 5.285, "eval_steps_per_second": 2.643, "step": 34000 }, { "epoch": 1.82, "learning_rate": 3.945369937896794e-05, "loss": 3.041, "step": 35000 }, { "epoch": 1.82, "eval_gen_len": 21.9435, "eval_loss": 2.6947648525238037, "eval_rouge1": 17.5306, "eval_rouge2": 5.8119, "eval_rougeL": 17.4011, "eval_rougeLsum": 17.4149, "eval_runtime": 881.522, "eval_samples_per_second": 5.466, "eval_steps_per_second": 2.733, "step": 35000 }, { "epoch": 1.87, "learning_rate": 3.772380507550989e-05, "loss": 2.9285, "step": 36000 }, { "epoch": 1.87, "eval_gen_len": 22.5174, "eval_loss": 2.6956820487976074, "eval_rouge1": 18.6418, "eval_rouge2": 6.1394, "eval_rougeL": 18.514, "eval_rougeLsum": 18.4823, "eval_runtime": 891.2207, "eval_samples_per_second": 5.406, "eval_steps_per_second": 2.703, "step": 36000 }, { "epoch": 1.92, "learning_rate": 3.599391077205183e-05, "loss": 3.0556, "step": 37000 }, { "epoch": 1.92, "eval_gen_len": 22.9315, "eval_loss": 2.7000110149383545, "eval_rouge1": 18.7387, "eval_rouge2": 6.0585, "eval_rougeL": 18.5761, "eval_rougeLsum": 18.574, "eval_runtime": 896.5509, "eval_samples_per_second": 5.374, "eval_steps_per_second": 2.687, "step": 37000 }, { "epoch": 1.97, "learning_rate": 3.426401646859377e-05, "loss": 3.0033, "step": 38000 }, { "epoch": 1.97, "eval_gen_len": 22.4726, "eval_loss": 2.697437047958374, "eval_rouge1": 17.9387, "eval_rouge2": 6.1387, "eval_rougeL": 17.8271, "eval_rougeLsum": 17.8111, "eval_runtime": 892.312, "eval_samples_per_second": 5.399, "eval_steps_per_second": 2.7, "step": 38000 }, { "epoch": 2.02, "learning_rate": 3.253412216513571e-05, "loss": 2.9207, "step": 39000 }, { "epoch": 2.02, "eval_gen_len": 23.0274, "eval_loss": 2.699842929840088, "eval_rouge1": 18.6073, "eval_rouge2": 6.1906, "eval_rougeL": 18.3891, "eval_rougeLsum": 18.4103, "eval_runtime": 911.8188, "eval_samples_per_second": 5.284, "eval_steps_per_second": 2.642, "step": 39000 }, { "epoch": 2.08, "learning_rate": 3.080422786167765e-05, "loss": 2.8922, "step": 40000 }, { "epoch": 2.08, "eval_gen_len": 22.0697, "eval_loss": 2.67978572845459, "eval_rouge1": 18.4017, "eval_rouge2": 6.2244, "eval_rougeL": 18.2321, "eval_rougeLsum": 18.2296, "eval_runtime": 869.5208, "eval_samples_per_second": 5.541, "eval_steps_per_second": 2.77, "step": 40000 }, { "epoch": 2.13, "learning_rate": 2.9074333558219595e-05, "loss": 2.8938, "step": 41000 }, { "epoch": 2.13, "eval_gen_len": 21.7017, "eval_loss": 2.666600227355957, "eval_rouge1": 18.8016, "eval_rouge2": 6.2066, "eval_rougeL": 18.6411, "eval_rougeLsum": 18.6353, "eval_runtime": 845.165, "eval_samples_per_second": 5.701, "eval_steps_per_second": 2.85, "step": 41000 }, { "epoch": 2.18, "learning_rate": 2.7344439254761532e-05, "loss": 2.9124, "step": 42000 }, { "epoch": 2.18, "eval_gen_len": 21.4303, "eval_loss": 2.6605563163757324, "eval_rouge1": 18.7544, "eval_rouge2": 6.3533, "eval_rougeL": 18.5923, "eval_rougeLsum": 18.5739, "eval_runtime": 843.6756, "eval_samples_per_second": 5.711, "eval_steps_per_second": 2.855, "step": 42000 }, { "epoch": 2.23, "learning_rate": 2.5614544951303476e-05, "loss": 2.8597, "step": 43000 }, { "epoch": 2.23, "eval_gen_len": 22.3352, "eval_loss": 2.694676399230957, "eval_rouge1": 18.8672, "eval_rouge2": 6.4526, "eval_rougeL": 18.7416, "eval_rougeLsum": 18.7482, "eval_runtime": 884.9439, "eval_samples_per_second": 5.444, "eval_steps_per_second": 2.722, "step": 43000 }, { "epoch": 2.28, "learning_rate": 2.388465064784542e-05, "loss": 2.8435, "step": 44000 }, { "epoch": 2.28, "eval_gen_len": 21.9081, "eval_loss": 2.6738336086273193, "eval_rouge1": 18.9405, "eval_rouge2": 6.356, "eval_rougeL": 18.7791, "eval_rougeLsum": 18.7729, "eval_runtime": 862.6512, "eval_samples_per_second": 5.585, "eval_steps_per_second": 2.793, "step": 44000 }, { "epoch": 2.34, "learning_rate": 2.215475634438736e-05, "loss": 2.8672, "step": 45000 }, { "epoch": 2.34, "eval_gen_len": 21.8869, "eval_loss": 2.6733603477478027, "eval_rouge1": 18.7509, "eval_rouge2": 6.3991, "eval_rougeL": 18.6175, "eval_rougeLsum": 18.5828, "eval_runtime": 863.0328, "eval_samples_per_second": 5.583, "eval_steps_per_second": 2.791, "step": 45000 }, { "epoch": 2.39, "learning_rate": 2.04248620409293e-05, "loss": 2.899, "step": 46000 }, { "epoch": 2.39, "eval_gen_len": 21.7694, "eval_loss": 2.6575164794921875, "eval_rouge1": 18.5529, "eval_rouge2": 6.3489, "eval_rougeL": 18.4139, "eval_rougeLsum": 18.401, "eval_runtime": 860.1836, "eval_samples_per_second": 5.601, "eval_steps_per_second": 2.801, "step": 46000 }, { "epoch": 2.44, "learning_rate": 1.869496773747124e-05, "loss": 2.8616, "step": 47000 }, { "epoch": 2.44, "eval_gen_len": 21.5685, "eval_loss": 2.6484768390655518, "eval_rouge1": 18.7563, "eval_rouge2": 6.268, "eval_rougeL": 18.6368, "eval_rougeLsum": 18.6253, "eval_runtime": 854.7636, "eval_samples_per_second": 5.637, "eval_steps_per_second": 2.818, "step": 47000 }, { "epoch": 2.49, "learning_rate": 1.696507343401318e-05, "loss": 2.8937, "step": 48000 }, { "epoch": 2.49, "eval_gen_len": 22.3337, "eval_loss": 2.648592472076416, "eval_rouge1": 18.6525, "eval_rouge2": 6.3426, "eval_rougeL": 18.5184, "eval_rougeLsum": 18.5129, "eval_runtime": 882.9047, "eval_samples_per_second": 5.457, "eval_steps_per_second": 2.728, "step": 48000 }, { "epoch": 2.54, "learning_rate": 1.5235179130555125e-05, "loss": 2.8446, "step": 49000 }, { "epoch": 2.54, "eval_gen_len": 22.3331, "eval_loss": 2.657186508178711, "eval_rouge1": 18.6529, "eval_rouge2": 6.2655, "eval_rougeL": 18.4915, "eval_rougeLsum": 18.4764, "eval_runtime": 873.4651, "eval_samples_per_second": 5.516, "eval_steps_per_second": 2.758, "step": 49000 }, { "epoch": 2.59, "learning_rate": 1.3505284827097065e-05, "loss": 2.8676, "step": 50000 }, { "epoch": 2.59, "eval_gen_len": 22.132, "eval_loss": 2.6608123779296875, "eval_rouge1": 19.0913, "eval_rouge2": 6.494, "eval_rougeL": 18.929, "eval_rougeLsum": 18.9233, "eval_runtime": 867.8739, "eval_samples_per_second": 5.551, "eval_steps_per_second": 2.776, "step": 50000 }, { "epoch": 2.65, "learning_rate": 1.1775390523639005e-05, "loss": 2.8794, "step": 51000 }, { "epoch": 2.65, "eval_gen_len": 22.2414, "eval_loss": 2.6582980155944824, "eval_rouge1": 18.7648, "eval_rouge2": 6.459, "eval_rougeL": 18.6276, "eval_rougeLsum": 18.6125, "eval_runtime": 877.0489, "eval_samples_per_second": 5.493, "eval_steps_per_second": 2.747, "step": 51000 }, { "epoch": 2.7, "learning_rate": 1.0045496220180947e-05, "loss": 2.8836, "step": 52000 }, { "epoch": 2.7, "eval_gen_len": 22.2551, "eval_loss": 2.6512138843536377, "eval_rouge1": 18.7243, "eval_rouge2": 6.3865, "eval_rougeL": 18.5848, "eval_rougeLsum": 18.5763, "eval_runtime": 879.8072, "eval_samples_per_second": 5.476, "eval_steps_per_second": 2.738, "step": 52000 }, { "epoch": 2.75, "learning_rate": 8.315601916722888e-06, "loss": 2.8174, "step": 53000 }, { "epoch": 2.75, "eval_gen_len": 22.1243, "eval_loss": 2.640946865081787, "eval_rouge1": 18.9393, "eval_rouge2": 6.3914, "eval_rougeL": 18.7733, "eval_rougeLsum": 18.7715, "eval_runtime": 881.5366, "eval_samples_per_second": 5.465, "eval_steps_per_second": 2.733, "step": 53000 }, { "epoch": 2.8, "learning_rate": 6.58570761326483e-06, "loss": 2.8494, "step": 54000 }, { "epoch": 2.8, "eval_gen_len": 21.7638, "eval_loss": 2.639634132385254, "eval_rouge1": 18.6126, "eval_rouge2": 6.4389, "eval_rougeL": 18.4673, "eval_rougeLsum": 18.4516, "eval_runtime": 860.8517, "eval_samples_per_second": 5.597, "eval_steps_per_second": 2.798, "step": 54000 }, { "epoch": 2.85, "learning_rate": 4.855813309806771e-06, "loss": 2.9025, "step": 55000 }, { "epoch": 2.85, "eval_gen_len": 22.1086, "eval_loss": 2.63411021232605, "eval_rouge1": 18.7681, "eval_rouge2": 6.3762, "eval_rougeL": 18.6081, "eval_rougeLsum": 18.6173, "eval_runtime": 872.623, "eval_samples_per_second": 5.521, "eval_steps_per_second": 2.761, "step": 55000 }, { "epoch": 2.91, "learning_rate": 3.1259190063487117e-06, "loss": 2.8754, "step": 56000 }, { "epoch": 2.91, "eval_gen_len": 22.3497, "eval_loss": 2.638812780380249, "eval_rouge1": 19.0828, "eval_rouge2": 6.5203, "eval_rougeL": 18.9334, "eval_rougeLsum": 18.9285, "eval_runtime": 879.763, "eval_samples_per_second": 5.476, "eval_steps_per_second": 2.738, "step": 56000 }, { "epoch": 2.96, "learning_rate": 1.3960247028906535e-06, "loss": 2.8489, "step": 57000 }, { "epoch": 2.96, "eval_gen_len": 21.9321, "eval_loss": 2.637495756149292, "eval_rouge1": 18.9219, "eval_rouge2": 6.4922, "eval_rougeL": 18.763, "eval_rougeLsum": 18.7437, "eval_runtime": 865.2523, "eval_samples_per_second": 5.568, "eval_steps_per_second": 2.784, "step": 57000 }, { "epoch": 3.0, "step": 57807, "total_flos": 8.129568206380646e+16, "train_loss": 3.210809704903007, "train_runtime": 55943.4873, "train_samples_per_second": 2.067, "train_steps_per_second": 1.033 } ], "max_steps": 57807, "num_train_epochs": 3, "total_flos": 8.129568206380646e+16, "trial_name": null, "trial_params": null }