{ "best_metric": 31.7626, "best_model_checkpoint": "/content/drive/Shareddrives/UCM_SHARED/TFM_ESG/Ejemplo Oficial de T5-Summarization de HF/t5-small_adafactor/checkpoint-8100", "epoch": 0.9880028228652082, "global_step": 8400, "is_hyper_param_search": false, "is_local_process_zero": true, "is_world_process_zero": true, "log_history": [ { "epoch": 0.02, "learning_rate": 0.0009823570924488356, "loss": 2.9361, "step": 150 }, { "epoch": 0.02, "eval_gen_len": 18.8845, "eval_loss": 2.6215693950653076, "eval_rouge1": 26.8542, "eval_rouge2": 6.8667, "eval_rougeL": 21.1484, "eval_rougeLsum": 21.1563, "eval_runtime": 342.0879, "eval_samples_per_second": 33.126, "eval_steps_per_second": 1.383, "step": 150 }, { "epoch": 0.04, "learning_rate": 0.0009648318042813455, "loss": 2.8543, "step": 300 }, { "epoch": 0.04, "eval_gen_len": 18.8097, "eval_loss": 2.5835769176483154, "eval_rouge1": 27.2234, "eval_rouge2": 7.1753, "eval_rougeL": 21.5276, "eval_rougeLsum": 21.5308, "eval_runtime": 342.5643, "eval_samples_per_second": 33.08, "eval_steps_per_second": 1.381, "step": 300 }, { "epoch": 0.05, "learning_rate": 0.0009471888967301812, "loss": 2.814, "step": 450 }, { "epoch": 0.05, "eval_gen_len": 18.8294, "eval_loss": 2.564615488052368, "eval_rouge1": 28.1695, "eval_rouge2": 7.7873, "eval_rougeL": 22.2229, "eval_rougeLsum": 22.2251, "eval_runtime": 341.6846, "eval_samples_per_second": 33.165, "eval_steps_per_second": 1.384, "step": 450 }, { "epoch": 0.07, "learning_rate": 0.0009295459891790166, "loss": 2.7861, "step": 600 }, { "epoch": 0.07, "eval_gen_len": 18.7867, "eval_loss": 2.5409207344055176, "eval_rouge1": 28.5349, "eval_rouge2": 7.9732, "eval_rougeL": 22.6959, "eval_rougeLsum": 22.7078, "eval_runtime": 341.6917, "eval_samples_per_second": 33.164, "eval_steps_per_second": 1.384, "step": 600 }, { "epoch": 0.09, "learning_rate": 0.0009119030816278523, "loss": 2.76, "step": 750 }, { "epoch": 0.09, "eval_gen_len": 18.7526, "eval_loss": 2.5160539150238037, "eval_rouge1": 28.5629, "eval_rouge2": 7.9485, "eval_rougeL": 22.6545, "eval_rougeLsum": 22.6617, "eval_runtime": 341.8755, "eval_samples_per_second": 33.147, "eval_steps_per_second": 1.384, "step": 750 }, { "epoch": 0.11, "learning_rate": 0.0008942601740766878, "loss": 2.7545, "step": 900 }, { "epoch": 0.11, "eval_gen_len": 18.7481, "eval_loss": 2.5028903484344482, "eval_rouge1": 29.1851, "eval_rouge2": 8.3586, "eval_rougeL": 23.1781, "eval_rougeLsum": 23.1815, "eval_runtime": 341.7767, "eval_samples_per_second": 33.156, "eval_steps_per_second": 1.384, "step": 900 }, { "epoch": 0.12, "learning_rate": 0.0008767348859091978, "loss": 2.7294, "step": 1050 }, { "epoch": 0.12, "eval_gen_len": 18.6948, "eval_loss": 2.4874510765075684, "eval_rouge1": 28.7611, "eval_rouge2": 8.0309, "eval_rougeL": 22.8289, "eval_rougeLsum": 22.8364, "eval_runtime": 341.7005, "eval_samples_per_second": 33.164, "eval_steps_per_second": 1.384, "step": 1050 }, { "epoch": 0.14, "learning_rate": 0.0008590919783580335, "loss": 2.7005, "step": 1200 }, { "epoch": 0.14, "eval_gen_len": 18.7843, "eval_loss": 2.4769885540008545, "eval_rouge1": 29.1072, "eval_rouge2": 8.2713, "eval_rougeL": 23.0568, "eval_rougeLsum": 23.0602, "eval_runtime": 341.9693, "eval_samples_per_second": 33.137, "eval_steps_per_second": 1.383, "step": 1200 }, { "epoch": 0.16, "learning_rate": 0.0008414490708068689, "loss": 2.7057, "step": 1350 }, { "epoch": 0.16, "eval_gen_len": 18.6973, "eval_loss": 2.46754789352417, "eval_rouge1": 29.0607, "eval_rouge2": 8.4308, "eval_rougeL": 23.1094, "eval_rougeLsum": 23.1125, "eval_runtime": 341.5228, "eval_samples_per_second": 33.181, "eval_steps_per_second": 1.385, "step": 1350 }, { "epoch": 0.18, "learning_rate": 0.0008238061632557046, "loss": 2.6779, "step": 1500 }, { "epoch": 0.18, "eval_gen_len": 18.7927, "eval_loss": 2.4461910724639893, "eval_rouge1": 29.4815, "eval_rouge2": 8.6203, "eval_rougeL": 23.3742, "eval_rougeLsum": 23.3745, "eval_runtime": 341.6546, "eval_samples_per_second": 33.168, "eval_steps_per_second": 1.384, "step": 1500 }, { "epoch": 0.19, "learning_rate": 0.00080616325570454, "loss": 2.6944, "step": 1650 }, { "epoch": 0.19, "eval_gen_len": 18.8219, "eval_loss": 2.4397881031036377, "eval_rouge1": 29.3817, "eval_rouge2": 8.5247, "eval_rougeL": 23.3088, "eval_rougeLsum": 23.3137, "eval_runtime": 341.8781, "eval_samples_per_second": 33.146, "eval_steps_per_second": 1.384, "step": 1650 }, { "epoch": 0.21, "learning_rate": 0.0007885203481533757, "loss": 2.6589, "step": 1800 }, { "epoch": 0.21, "eval_gen_len": 18.7917, "eval_loss": 2.429356813430786, "eval_rouge1": 29.1914, "eval_rouge2": 8.5565, "eval_rougeL": 23.2017, "eval_rougeLsum": 23.2039, "eval_runtime": 341.5952, "eval_samples_per_second": 33.174, "eval_steps_per_second": 1.385, "step": 1800 }, { "epoch": 0.23, "learning_rate": 0.0007708774406022112, "loss": 2.6473, "step": 1950 }, { "epoch": 0.23, "eval_gen_len": 18.7605, "eval_loss": 2.417249917984009, "eval_rouge1": 29.9482, "eval_rouge2": 8.8321, "eval_rougeL": 23.7848, "eval_rougeLsum": 23.7954, "eval_runtime": 341.816, "eval_samples_per_second": 33.152, "eval_steps_per_second": 1.384, "step": 1950 }, { "epoch": 0.25, "learning_rate": 0.0007532345330510469, "loss": 2.6498, "step": 2100 }, { "epoch": 0.25, "eval_gen_len": 18.8079, "eval_loss": 2.4158124923706055, "eval_rouge1": 29.5843, "eval_rouge2": 8.9229, "eval_rougeL": 23.6193, "eval_rougeLsum": 23.6255, "eval_runtime": 341.8291, "eval_samples_per_second": 33.151, "eval_steps_per_second": 1.384, "step": 2100 }, { "epoch": 0.26, "learning_rate": 0.0007355916254998823, "loss": 2.6321, "step": 2250 }, { "epoch": 0.26, "eval_gen_len": 18.7639, "eval_loss": 2.4022669792175293, "eval_rouge1": 29.9147, "eval_rouge2": 9.0085, "eval_rougeL": 23.8569, "eval_rougeLsum": 23.8709, "eval_runtime": 341.59, "eval_samples_per_second": 33.174, "eval_steps_per_second": 1.385, "step": 2250 }, { "epoch": 0.28, "learning_rate": 0.000717948717948718, "loss": 2.614, "step": 2400 }, { "epoch": 0.28, "eval_gen_len": 18.6922, "eval_loss": 2.3971035480499268, "eval_rouge1": 29.711, "eval_rouge2": 9.0017, "eval_rougeL": 23.761, "eval_rougeLsum": 23.7742, "eval_runtime": 341.4293, "eval_samples_per_second": 33.19, "eval_steps_per_second": 1.385, "step": 2400 }, { "epoch": 0.3, "learning_rate": 0.0007003058103975535, "loss": 2.6212, "step": 2550 }, { "epoch": 0.3, "eval_gen_len": 18.7082, "eval_loss": 2.390623092651367, "eval_rouge1": 29.498, "eval_rouge2": 8.6483, "eval_rougeL": 23.5006, "eval_rougeLsum": 23.5112, "eval_runtime": 341.8905, "eval_samples_per_second": 33.145, "eval_steps_per_second": 1.383, "step": 2550 }, { "epoch": 0.32, "learning_rate": 0.0006826629028463891, "loss": 2.6151, "step": 2700 }, { "epoch": 0.32, "eval_gen_len": 18.8923, "eval_loss": 2.3796188831329346, "eval_rouge1": 29.8502, "eval_rouge2": 8.9427, "eval_rougeL": 23.6665, "eval_rougeLsum": 23.6781, "eval_runtime": 341.6707, "eval_samples_per_second": 33.166, "eval_steps_per_second": 1.384, "step": 2700 }, { "epoch": 0.34, "learning_rate": 0.0006650199952952247, "loss": 2.6123, "step": 2850 }, { "epoch": 0.34, "eval_gen_len": 18.7541, "eval_loss": 2.371220588684082, "eval_rouge1": 30.0211, "eval_rouge2": 9.2412, "eval_rougeL": 24.0388, "eval_rougeLsum": 24.045, "eval_runtime": 341.6315, "eval_samples_per_second": 33.17, "eval_steps_per_second": 1.385, "step": 2850 }, { "epoch": 0.35, "learning_rate": 0.0006473770877440603, "loss": 2.5928, "step": 3000 }, { "epoch": 0.35, "eval_gen_len": 18.7963, "eval_loss": 2.359456777572632, "eval_rouge1": 30.3185, "eval_rouge2": 9.2796, "eval_rougeL": 24.2004, "eval_rougeLsum": 24.2101, "eval_runtime": 341.8833, "eval_samples_per_second": 33.146, "eval_steps_per_second": 1.384, "step": 3000 }, { "epoch": 0.37, "learning_rate": 0.0006297341801928958, "loss": 2.5746, "step": 3150 }, { "epoch": 0.37, "eval_gen_len": 18.8229, "eval_loss": 2.366935968399048, "eval_rouge1": 29.8303, "eval_rouge2": 9.1665, "eval_rougeL": 23.8553, "eval_rougeLsum": 23.8672, "eval_runtime": 341.7148, "eval_samples_per_second": 33.162, "eval_steps_per_second": 1.384, "step": 3150 }, { "epoch": 0.39, "learning_rate": 0.0006120912726417314, "loss": 2.5619, "step": 3300 }, { "epoch": 0.39, "eval_gen_len": 18.8087, "eval_loss": 2.3527944087982178, "eval_rouge1": 30.4684, "eval_rouge2": 9.4451, "eval_rougeL": 24.2867, "eval_rougeLsum": 24.2939, "eval_runtime": 341.8038, "eval_samples_per_second": 33.154, "eval_steps_per_second": 1.384, "step": 3300 }, { "epoch": 0.41, "learning_rate": 0.000594448365090567, "loss": 2.5677, "step": 3450 }, { "epoch": 0.41, "eval_gen_len": 18.8491, "eval_loss": 2.343312978744507, "eval_rouge1": 30.496, "eval_rouge2": 9.4356, "eval_rougeL": 24.31, "eval_rougeLsum": 24.3125, "eval_runtime": 342.0548, "eval_samples_per_second": 33.129, "eval_steps_per_second": 1.383, "step": 3450 }, { "epoch": 0.42, "learning_rate": 0.0005768054575394025, "loss": 2.5456, "step": 3600 }, { "epoch": 0.42, "eval_gen_len": 18.809, "eval_loss": 2.3360698223114014, "eval_rouge1": 30.5752, "eval_rouge2": 9.4803, "eval_rougeL": 24.3462, "eval_rougeLsum": 24.3559, "eval_runtime": 341.7402, "eval_samples_per_second": 33.16, "eval_steps_per_second": 1.384, "step": 3600 }, { "epoch": 0.44, "learning_rate": 0.0005591625499882381, "loss": 2.5669, "step": 3750 }, { "epoch": 0.44, "eval_gen_len": 18.8287, "eval_loss": 2.327768325805664, "eval_rouge1": 30.0561, "eval_rouge2": 9.2402, "eval_rougeL": 24.0535, "eval_rougeLsum": 24.0713, "eval_runtime": 341.71, "eval_samples_per_second": 33.163, "eval_steps_per_second": 1.384, "step": 3750 }, { "epoch": 0.46, "learning_rate": 0.0005415196424370737, "loss": 2.5446, "step": 3900 }, { "epoch": 0.46, "eval_gen_len": 18.8118, "eval_loss": 2.3254404067993164, "eval_rouge1": 30.3142, "eval_rouge2": 9.4929, "eval_rougeL": 24.335, "eval_rougeLsum": 24.3432, "eval_runtime": 341.8271, "eval_samples_per_second": 33.151, "eval_steps_per_second": 1.384, "step": 3900 }, { "epoch": 0.48, "learning_rate": 0.0005238767348859092, "loss": 2.5536, "step": 4050 }, { "epoch": 0.48, "eval_gen_len": 18.9035, "eval_loss": 2.3251070976257324, "eval_rouge1": 30.2523, "eval_rouge2": 9.3005, "eval_rougeL": 24.163, "eval_rougeLsum": 24.1711, "eval_runtime": 341.8151, "eval_samples_per_second": 33.152, "eval_steps_per_second": 1.384, "step": 4050 }, { "epoch": 0.49, "learning_rate": 0.0005062338273347448, "loss": 2.5154, "step": 4200 }, { "epoch": 0.49, "eval_gen_len": 18.7458, "eval_loss": 2.313904047012329, "eval_rouge1": 30.7718, "eval_rouge2": 9.8676, "eval_rougeL": 24.7111, "eval_rougeLsum": 24.7266, "eval_runtime": 341.6852, "eval_samples_per_second": 33.165, "eval_steps_per_second": 1.384, "step": 4200 }, { "epoch": 0.51, "learning_rate": 0.0004885909197835804, "loss": 2.5176, "step": 4350 }, { "epoch": 0.51, "eval_gen_len": 18.7983, "eval_loss": 2.309589385986328, "eval_rouge1": 30.5617, "eval_rouge2": 9.6562, "eval_rougeL": 24.4754, "eval_rougeLsum": 24.4862, "eval_runtime": 341.8284, "eval_samples_per_second": 33.151, "eval_steps_per_second": 1.384, "step": 4350 }, { "epoch": 0.53, "learning_rate": 0.0004709480122324159, "loss": 2.5307, "step": 4500 }, { "epoch": 0.53, "eval_gen_len": 18.8217, "eval_loss": 2.3089160919189453, "eval_rouge1": 30.601, "eval_rouge2": 9.6672, "eval_rougeL": 24.4465, "eval_rougeLsum": 24.4567, "eval_runtime": 341.6213, "eval_samples_per_second": 33.171, "eval_steps_per_second": 1.385, "step": 4500 }, { "epoch": 0.55, "learning_rate": 0.00045330510468125144, "loss": 2.515, "step": 4650 }, { "epoch": 0.55, "eval_gen_len": 18.8096, "eval_loss": 2.2991859912872314, "eval_rouge1": 31.1807, "eval_rouge2": 10.0105, "eval_rougeL": 24.9947, "eval_rougeLsum": 25.0096, "eval_runtime": 341.7452, "eval_samples_per_second": 33.159, "eval_steps_per_second": 1.384, "step": 4650 }, { "epoch": 0.56, "learning_rate": 0.000435662197130087, "loss": 2.5168, "step": 4800 }, { "epoch": 0.56, "eval_gen_len": 18.8254, "eval_loss": 2.292003631591797, "eval_rouge1": 30.8999, "eval_rouge2": 9.8832, "eval_rougeL": 24.7713, "eval_rougeLsum": 24.7823, "eval_runtime": 341.8542, "eval_samples_per_second": 33.149, "eval_steps_per_second": 1.384, "step": 4800 }, { "epoch": 0.58, "learning_rate": 0.0004180192895789226, "loss": 2.5021, "step": 4950 }, { "epoch": 0.58, "eval_gen_len": 18.8787, "eval_loss": 2.285507917404175, "eval_rouge1": 31.0569, "eval_rouge2": 9.8752, "eval_rougeL": 24.791, "eval_rougeLsum": 24.8033, "eval_runtime": 341.7747, "eval_samples_per_second": 33.156, "eval_steps_per_second": 1.384, "step": 4950 }, { "epoch": 0.6, "learning_rate": 0.00040037638202775815, "loss": 2.501, "step": 5100 }, { "epoch": 0.6, "eval_gen_len": 18.8315, "eval_loss": 2.2868235111236572, "eval_rouge1": 30.7132, "eval_rouge2": 9.8654, "eval_rougeL": 24.6084, "eval_rougeLsum": 24.6244, "eval_runtime": 341.9154, "eval_samples_per_second": 33.143, "eval_steps_per_second": 1.383, "step": 5100 }, { "epoch": 0.62, "learning_rate": 0.0003827334744765937, "loss": 2.4849, "step": 5250 }, { "epoch": 0.62, "eval_gen_len": 18.8196, "eval_loss": 2.2783043384552, "eval_rouge1": 31.3434, "eval_rouge2": 10.206, "eval_rougeL": 25.0954, "eval_rougeLsum": 25.1114, "eval_runtime": 341.9076, "eval_samples_per_second": 33.143, "eval_steps_per_second": 1.383, "step": 5250 }, { "epoch": 0.64, "learning_rate": 0.0003650905669254293, "loss": 2.4939, "step": 5400 }, { "epoch": 0.64, "eval_gen_len": 18.8579, "eval_loss": 2.275907039642334, "eval_rouge1": 31.1467, "eval_rouge2": 10.0457, "eval_rougeL": 24.964, "eval_rougeLsum": 24.9793, "eval_runtime": 341.6475, "eval_samples_per_second": 33.169, "eval_steps_per_second": 1.384, "step": 5400 }, { "epoch": 0.65, "learning_rate": 0.00034744765937426485, "loss": 2.4624, "step": 5550 }, { "epoch": 0.65, "eval_gen_len": 18.8098, "eval_loss": 2.2712931632995605, "eval_rouge1": 31.4288, "eval_rouge2": 10.1719, "eval_rougeL": 25.1096, "eval_rougeLsum": 25.1276, "eval_runtime": 341.723, "eval_samples_per_second": 33.161, "eval_steps_per_second": 1.384, "step": 5550 }, { "epoch": 0.67, "learning_rate": 0.0003298047518231004, "loss": 2.456, "step": 5700 }, { "epoch": 0.67, "eval_gen_len": 18.8262, "eval_loss": 2.2674217224121094, "eval_rouge1": 31.1515, "eval_rouge2": 10.1208, "eval_rougeL": 25.0456, "eval_rougeLsum": 25.0532, "eval_runtime": 341.9008, "eval_samples_per_second": 33.144, "eval_steps_per_second": 1.383, "step": 5700 }, { "epoch": 0.69, "learning_rate": 0.000312161844271936, "loss": 2.4667, "step": 5850 }, { "epoch": 0.69, "eval_gen_len": 18.7816, "eval_loss": 2.262035608291626, "eval_rouge1": 31.3741, "eval_rouge2": 10.1733, "eval_rougeL": 25.1421, "eval_rougeLsum": 25.159, "eval_runtime": 341.6066, "eval_samples_per_second": 33.173, "eval_steps_per_second": 1.385, "step": 5850 }, { "epoch": 0.71, "learning_rate": 0.000294636556104446, "loss": 2.4658, "step": 6000 }, { "epoch": 0.71, "eval_gen_len": 18.8, "eval_loss": 2.258843183517456, "eval_rouge1": 31.3913, "eval_rouge2": 10.2645, "eval_rougeL": 25.1746, "eval_rougeLsum": 25.188, "eval_runtime": 341.9247, "eval_samples_per_second": 33.142, "eval_steps_per_second": 1.383, "step": 6000 }, { "epoch": 0.72, "learning_rate": 0.00027699364855328156, "loss": 2.4943, "step": 6150 }, { "epoch": 0.72, "eval_gen_len": 18.8074, "eval_loss": 2.2533156871795654, "eval_rouge1": 31.3905, "eval_rouge2": 10.164, "eval_rougeL": 25.093, "eval_rougeLsum": 25.107, "eval_runtime": 341.8317, "eval_samples_per_second": 33.151, "eval_steps_per_second": 1.384, "step": 6150 }, { "epoch": 0.74, "learning_rate": 0.00025935074100211713, "loss": 2.473, "step": 6300 }, { "epoch": 0.74, "eval_gen_len": 18.7439, "eval_loss": 2.2551848888397217, "eval_rouge1": 31.1105, "eval_rouge2": 10.1939, "eval_rougeL": 24.9214, "eval_rougeLsum": 24.9321, "eval_runtime": 341.6034, "eval_samples_per_second": 33.173, "eval_steps_per_second": 1.385, "step": 6300 }, { "epoch": 0.76, "learning_rate": 0.00024170783345095273, "loss": 2.4687, "step": 6450 }, { "epoch": 0.76, "eval_gen_len": 18.7709, "eval_loss": 2.247098684310913, "eval_rouge1": 31.3387, "eval_rouge2": 10.2446, "eval_rougeL": 25.0746, "eval_rougeLsum": 25.0936, "eval_runtime": 341.7347, "eval_samples_per_second": 33.16, "eval_steps_per_second": 1.384, "step": 6450 }, { "epoch": 0.78, "learning_rate": 0.0002240649258997883, "loss": 2.4286, "step": 6600 }, { "epoch": 0.78, "eval_gen_len": 18.8313, "eval_loss": 2.2467916011810303, "eval_rouge1": 31.1953, "eval_rouge2": 10.1277, "eval_rougeL": 24.9535, "eval_rougeLsum": 24.9714, "eval_runtime": 341.8485, "eval_samples_per_second": 33.149, "eval_steps_per_second": 1.384, "step": 6600 }, { "epoch": 0.79, "learning_rate": 0.00020642201834862386, "loss": 2.4492, "step": 6750 }, { "epoch": 0.79, "eval_gen_len": 18.7977, "eval_loss": 2.2422139644622803, "eval_rouge1": 31.7203, "eval_rouge2": 10.4934, "eval_rougeL": 25.3862, "eval_rougeLsum": 25.3946, "eval_runtime": 341.6909, "eval_samples_per_second": 33.164, "eval_steps_per_second": 1.384, "step": 6750 }, { "epoch": 0.81, "learning_rate": 0.00018877911079745943, "loss": 2.4427, "step": 6900 }, { "epoch": 0.81, "eval_gen_len": 18.8144, "eval_loss": 2.2402756214141846, "eval_rouge1": 31.5498, "eval_rouge2": 10.4086, "eval_rougeL": 25.2384, "eval_rougeLsum": 25.2593, "eval_runtime": 341.7269, "eval_samples_per_second": 33.161, "eval_steps_per_second": 1.384, "step": 6900 }, { "epoch": 0.83, "learning_rate": 0.000171136203246295, "loss": 2.4641, "step": 7050 }, { "epoch": 0.83, "eval_gen_len": 18.7966, "eval_loss": 2.2365546226501465, "eval_rouge1": 31.4038, "eval_rouge2": 10.3691, "eval_rougeL": 25.1786, "eval_rougeLsum": 25.195, "eval_runtime": 341.8284, "eval_samples_per_second": 33.151, "eval_steps_per_second": 1.384, "step": 7050 }, { "epoch": 0.85, "learning_rate": 0.00015349329569513056, "loss": 2.4276, "step": 7200 }, { "epoch": 0.85, "eval_gen_len": 18.7789, "eval_loss": 2.2344412803649902, "eval_rouge1": 31.5076, "eval_rouge2": 10.4403, "eval_rougeL": 25.299, "eval_rougeLsum": 25.3201, "eval_runtime": 342.6233, "eval_samples_per_second": 33.074, "eval_steps_per_second": 1.381, "step": 7200 }, { "epoch": 0.86, "learning_rate": 0.00013585038814396613, "loss": 2.4402, "step": 7350 }, { "epoch": 0.86, "eval_gen_len": 18.7713, "eval_loss": 2.2317440509796143, "eval_rouge1": 31.5216, "eval_rouge2": 10.3901, "eval_rougeL": 25.276, "eval_rougeLsum": 25.2943, "eval_runtime": 345.4971, "eval_samples_per_second": 32.799, "eval_steps_per_second": 1.369, "step": 7350 }, { "epoch": 0.88, "learning_rate": 0.00011820748059280171, "loss": 2.44, "step": 7500 }, { "epoch": 0.88, "eval_gen_len": 18.7671, "eval_loss": 2.2292771339416504, "eval_rouge1": 31.4244, "eval_rouge2": 10.4211, "eval_rougeL": 25.2592, "eval_rougeLsum": 25.2735, "eval_runtime": 342.0133, "eval_samples_per_second": 33.133, "eval_steps_per_second": 1.383, "step": 7500 }, { "epoch": 0.9, "learning_rate": 0.00010056457304163728, "loss": 2.4251, "step": 7650 }, { "epoch": 0.9, "eval_gen_len": 18.7972, "eval_loss": 2.226907968521118, "eval_rouge1": 31.4887, "eval_rouge2": 10.3959, "eval_rougeL": 25.2335, "eval_rougeLsum": 25.2545, "eval_runtime": 341.7402, "eval_samples_per_second": 33.16, "eval_steps_per_second": 1.384, "step": 7650 }, { "epoch": 0.92, "learning_rate": 8.292166549047284e-05, "loss": 2.456, "step": 7800 }, { "epoch": 0.92, "eval_gen_len": 18.786, "eval_loss": 2.224229097366333, "eval_rouge1": 31.4508, "eval_rouge2": 10.4079, "eval_rougeL": 25.2328, "eval_rougeLsum": 25.2564, "eval_runtime": 341.8379, "eval_samples_per_second": 33.15, "eval_steps_per_second": 1.384, "step": 7800 }, { "epoch": 0.94, "learning_rate": 6.527875793930841e-05, "loss": 2.4181, "step": 7950 }, { "epoch": 0.94, "eval_gen_len": 18.8012, "eval_loss": 2.2223522663116455, "eval_rouge1": 31.6181, "eval_rouge2": 10.5558, "eval_rougeL": 25.3867, "eval_rougeLsum": 25.4042, "eval_runtime": 342.1259, "eval_samples_per_second": 33.122, "eval_steps_per_second": 1.383, "step": 7950 }, { "epoch": 0.95, "learning_rate": 4.763585038814397e-05, "loss": 2.4288, "step": 8100 }, { "epoch": 0.95, "eval_gen_len": 18.7953, "eval_loss": 2.22170352935791, "eval_rouge1": 31.7626, "eval_rouge2": 10.6059, "eval_rougeL": 25.4827, "eval_rougeLsum": 25.4958, "eval_runtime": 341.8728, "eval_samples_per_second": 33.147, "eval_steps_per_second": 1.384, "step": 8100 }, { "epoch": 0.97, "learning_rate": 2.9992942836979537e-05, "loss": 2.4327, "step": 8250 }, { "epoch": 0.97, "eval_gen_len": 18.7827, "eval_loss": 2.220174789428711, "eval_rouge1": 31.6839, "eval_rouge2": 10.5615, "eval_rougeL": 25.4137, "eval_rougeLsum": 25.433, "eval_runtime": 342.1089, "eval_samples_per_second": 33.124, "eval_steps_per_second": 1.383, "step": 8250 }, { "epoch": 0.99, "learning_rate": 1.2350035285815103e-05, "loss": 2.4118, "step": 8400 }, { "epoch": 0.99, "eval_gen_len": 18.7979, "eval_loss": 2.2196593284606934, "eval_rouge1": 31.6519, "eval_rouge2": 10.4949, "eval_rougeL": 25.3751, "eval_rougeLsum": 25.3984, "eval_runtime": 342.1132, "eval_samples_per_second": 33.124, "eval_steps_per_second": 1.383, "step": 8400 } ], "max_steps": 8502, "num_train_epochs": 1, "total_flos": 4.261943991730176e+16, "trial_name": null, "trial_params": null }