| { | |
| "best_metric": null, | |
| "best_model_checkpoint": null, | |
| "epoch": 3.0, | |
| "global_step": 608742, | |
| "is_hyper_param_search": false, | |
| "is_local_process_zero": true, | |
| "is_world_process_zero": true, | |
| "log_history": [ | |
| { | |
| "epoch": 0.0, | |
| "learning_rate": 4.9917863396972776e-05, | |
| "loss": 2.5388, | |
| "step": 1000 | |
| }, | |
| { | |
| "epoch": 0.01, | |
| "learning_rate": 4.983572679394555e-05, | |
| "loss": 2.4585, | |
| "step": 2000 | |
| }, | |
| { | |
| "epoch": 0.01, | |
| "learning_rate": 4.975359019091832e-05, | |
| "loss": 2.3898, | |
| "step": 3000 | |
| }, | |
| { | |
| "epoch": 0.02, | |
| "learning_rate": 4.9671453587891096e-05, | |
| "loss": 2.374, | |
| "step": 4000 | |
| }, | |
| { | |
| "epoch": 0.02, | |
| "learning_rate": 4.958931698486387e-05, | |
| "loss": 2.3447, | |
| "step": 5000 | |
| }, | |
| { | |
| "epoch": 0.03, | |
| "learning_rate": 4.950718038183664e-05, | |
| "loss": 2.3258, | |
| "step": 6000 | |
| }, | |
| { | |
| "epoch": 0.03, | |
| "learning_rate": 4.942504377880942e-05, | |
| "loss": 2.3434, | |
| "step": 7000 | |
| }, | |
| { | |
| "epoch": 0.04, | |
| "learning_rate": 4.934290717578219e-05, | |
| "loss": 2.3269, | |
| "step": 8000 | |
| }, | |
| { | |
| "epoch": 0.04, | |
| "learning_rate": 4.9260770572754964e-05, | |
| "loss": 2.311, | |
| "step": 9000 | |
| }, | |
| { | |
| "epoch": 0.05, | |
| "learning_rate": 4.917863396972773e-05, | |
| "loss": 2.3291, | |
| "step": 10000 | |
| }, | |
| { | |
| "epoch": 0.05, | |
| "eval_gen_len": 19.0, | |
| "eval_loss": 2.1906206607818604, | |
| "eval_rouge1": 18.6571, | |
| "eval_rouge2": 7.1341, | |
| "eval_rougeL": 14.8347, | |
| "eval_rougeLsum": 16.9545, | |
| "eval_runtime": 1520.2929, | |
| "eval_samples_per_second": 4.233, | |
| "eval_steps_per_second": 4.233, | |
| "step": 10000 | |
| }, | |
| { | |
| "epoch": 0.05, | |
| "learning_rate": 4.909649736670051e-05, | |
| "loss": 2.2924, | |
| "step": 11000 | |
| }, | |
| { | |
| "epoch": 0.06, | |
| "learning_rate": 4.9014360763673284e-05, | |
| "loss": 2.3032, | |
| "step": 12000 | |
| }, | |
| { | |
| "epoch": 0.06, | |
| "learning_rate": 4.893222416064606e-05, | |
| "loss": 2.2866, | |
| "step": 13000 | |
| }, | |
| { | |
| "epoch": 0.07, | |
| "learning_rate": 4.885008755761883e-05, | |
| "loss": 2.3188, | |
| "step": 14000 | |
| }, | |
| { | |
| "epoch": 0.07, | |
| "learning_rate": 4.8767950954591604e-05, | |
| "loss": 2.2888, | |
| "step": 15000 | |
| }, | |
| { | |
| "epoch": 0.08, | |
| "learning_rate": 4.868581435156437e-05, | |
| "loss": 2.2398, | |
| "step": 16000 | |
| }, | |
| { | |
| "epoch": 0.08, | |
| "learning_rate": 4.860367774853715e-05, | |
| "loss": 2.2625, | |
| "step": 17000 | |
| }, | |
| { | |
| "epoch": 0.09, | |
| "learning_rate": 4.8521541145509925e-05, | |
| "loss": 2.2503, | |
| "step": 18000 | |
| }, | |
| { | |
| "epoch": 0.09, | |
| "learning_rate": 4.84394045424827e-05, | |
| "loss": 2.2602, | |
| "step": 19000 | |
| }, | |
| { | |
| "epoch": 0.1, | |
| "learning_rate": 4.835726793945547e-05, | |
| "loss": 2.2454, | |
| "step": 20000 | |
| }, | |
| { | |
| "epoch": 0.1, | |
| "eval_gen_len": 18.99968924798011, | |
| "eval_loss": 2.154853105545044, | |
| "eval_rouge1": 18.5037, | |
| "eval_rouge2": 7.1908, | |
| "eval_rougeL": 14.7141, | |
| "eval_rougeLsum": 16.8233, | |
| "eval_runtime": 1517.1744, | |
| "eval_samples_per_second": 4.242, | |
| "eval_steps_per_second": 4.242, | |
| "step": 20000 | |
| }, | |
| { | |
| "epoch": 0.1, | |
| "learning_rate": 4.8275131336428245e-05, | |
| "loss": 2.2419, | |
| "step": 21000 | |
| }, | |
| { | |
| "epoch": 0.11, | |
| "learning_rate": 4.819299473340101e-05, | |
| "loss": 2.2383, | |
| "step": 22000 | |
| }, | |
| { | |
| "epoch": 0.11, | |
| "learning_rate": 4.8110858130373785e-05, | |
| "loss": 2.2164, | |
| "step": 23000 | |
| }, | |
| { | |
| "epoch": 0.12, | |
| "learning_rate": 4.8028721527346566e-05, | |
| "loss": 2.2459, | |
| "step": 24000 | |
| }, | |
| { | |
| "epoch": 0.12, | |
| "learning_rate": 4.794658492431934e-05, | |
| "loss": 2.183, | |
| "step": 25000 | |
| }, | |
| { | |
| "epoch": 0.13, | |
| "learning_rate": 4.786444832129211e-05, | |
| "loss": 2.2468, | |
| "step": 26000 | |
| }, | |
| { | |
| "epoch": 0.13, | |
| "learning_rate": 4.7782311718264886e-05, | |
| "loss": 2.2021, | |
| "step": 27000 | |
| }, | |
| { | |
| "epoch": 0.14, | |
| "learning_rate": 4.770017511523765e-05, | |
| "loss": 2.2307, | |
| "step": 28000 | |
| }, | |
| { | |
| "epoch": 0.14, | |
| "learning_rate": 4.7618038512210426e-05, | |
| "loss": 2.2125, | |
| "step": 29000 | |
| }, | |
| { | |
| "epoch": 0.15, | |
| "learning_rate": 4.75359019091832e-05, | |
| "loss": 2.2107, | |
| "step": 30000 | |
| }, | |
| { | |
| "epoch": 0.15, | |
| "eval_gen_len": 19.0, | |
| "eval_loss": 2.1013174057006836, | |
| "eval_rouge1": 18.7638, | |
| "eval_rouge2": 7.326, | |
| "eval_rougeL": 14.9437, | |
| "eval_rougeLsum": 17.072, | |
| "eval_runtime": 1516.9221, | |
| "eval_samples_per_second": 4.243, | |
| "eval_steps_per_second": 4.243, | |
| "step": 30000 | |
| }, | |
| { | |
| "epoch": 0.15, | |
| "learning_rate": 4.745376530615598e-05, | |
| "loss": 2.2083, | |
| "step": 31000 | |
| }, | |
| { | |
| "epoch": 0.16, | |
| "learning_rate": 4.7371628703128753e-05, | |
| "loss": 2.1939, | |
| "step": 32000 | |
| }, | |
| { | |
| "epoch": 0.16, | |
| "learning_rate": 4.728949210010152e-05, | |
| "loss": 2.2193, | |
| "step": 33000 | |
| }, | |
| { | |
| "epoch": 0.17, | |
| "learning_rate": 4.7207355497074294e-05, | |
| "loss": 2.1945, | |
| "step": 34000 | |
| }, | |
| { | |
| "epoch": 0.17, | |
| "learning_rate": 4.712521889404707e-05, | |
| "loss": 2.2031, | |
| "step": 35000 | |
| }, | |
| { | |
| "epoch": 0.18, | |
| "learning_rate": 4.704308229101984e-05, | |
| "loss": 2.183, | |
| "step": 36000 | |
| }, | |
| { | |
| "epoch": 0.18, | |
| "learning_rate": 4.696094568799262e-05, | |
| "loss": 2.1772, | |
| "step": 37000 | |
| }, | |
| { | |
| "epoch": 0.19, | |
| "learning_rate": 4.6878809084965394e-05, | |
| "loss": 2.142, | |
| "step": 38000 | |
| }, | |
| { | |
| "epoch": 0.19, | |
| "learning_rate": 4.679667248193816e-05, | |
| "loss": 2.1948, | |
| "step": 39000 | |
| }, | |
| { | |
| "epoch": 0.2, | |
| "learning_rate": 4.6714535878910934e-05, | |
| "loss": 2.1486, | |
| "step": 40000 | |
| }, | |
| { | |
| "epoch": 0.2, | |
| "eval_gen_len": 19.0, | |
| "eval_loss": 2.0845487117767334, | |
| "eval_rouge1": 18.6879, | |
| "eval_rouge2": 7.2441, | |
| "eval_rougeL": 14.8835, | |
| "eval_rougeLsum": 16.983, | |
| "eval_runtime": 1521.4222, | |
| "eval_samples_per_second": 4.23, | |
| "eval_steps_per_second": 4.23, | |
| "step": 40000 | |
| }, | |
| { | |
| "epoch": 0.2, | |
| "learning_rate": 4.663239927588371e-05, | |
| "loss": 2.1557, | |
| "step": 41000 | |
| }, | |
| { | |
| "epoch": 0.21, | |
| "learning_rate": 4.655026267285648e-05, | |
| "loss": 2.1982, | |
| "step": 42000 | |
| }, | |
| { | |
| "epoch": 0.21, | |
| "learning_rate": 4.6468126069829255e-05, | |
| "loss": 2.2035, | |
| "step": 43000 | |
| }, | |
| { | |
| "epoch": 0.22, | |
| "learning_rate": 4.6385989466802035e-05, | |
| "loss": 2.1822, | |
| "step": 44000 | |
| }, | |
| { | |
| "epoch": 0.22, | |
| "learning_rate": 4.63038528637748e-05, | |
| "loss": 2.1492, | |
| "step": 45000 | |
| }, | |
| { | |
| "epoch": 0.23, | |
| "learning_rate": 4.6221716260747575e-05, | |
| "loss": 2.1874, | |
| "step": 46000 | |
| }, | |
| { | |
| "epoch": 0.23, | |
| "learning_rate": 4.613957965772035e-05, | |
| "loss": 2.1865, | |
| "step": 47000 | |
| }, | |
| { | |
| "epoch": 0.24, | |
| "learning_rate": 4.605744305469312e-05, | |
| "loss": 2.1276, | |
| "step": 48000 | |
| }, | |
| { | |
| "epoch": 0.24, | |
| "learning_rate": 4.5975306451665896e-05, | |
| "loss": 2.1465, | |
| "step": 49000 | |
| }, | |
| { | |
| "epoch": 0.25, | |
| "learning_rate": 4.589316984863867e-05, | |
| "loss": 2.158, | |
| "step": 50000 | |
| }, | |
| { | |
| "epoch": 0.25, | |
| "eval_gen_len": 19.0, | |
| "eval_loss": 2.0698649883270264, | |
| "eval_rouge1": 18.8314, | |
| "eval_rouge2": 7.3712, | |
| "eval_rougeL": 15.0166, | |
| "eval_rougeLsum": 17.1215, | |
| "eval_runtime": 1517.8163, | |
| "eval_samples_per_second": 4.24, | |
| "eval_steps_per_second": 4.24, | |
| "step": 50000 | |
| }, | |
| { | |
| "epoch": 0.25, | |
| "learning_rate": 4.581103324561144e-05, | |
| "loss": 2.1378, | |
| "step": 51000 | |
| }, | |
| { | |
| "epoch": 0.26, | |
| "learning_rate": 4.5728896642584216e-05, | |
| "loss": 2.1079, | |
| "step": 52000 | |
| }, | |
| { | |
| "epoch": 0.26, | |
| "learning_rate": 4.564676003955699e-05, | |
| "loss": 2.1627, | |
| "step": 53000 | |
| }, | |
| { | |
| "epoch": 0.27, | |
| "learning_rate": 4.556462343652976e-05, | |
| "loss": 2.1117, | |
| "step": 54000 | |
| }, | |
| { | |
| "epoch": 0.27, | |
| "learning_rate": 4.5482486833502536e-05, | |
| "loss": 2.128, | |
| "step": 55000 | |
| }, | |
| { | |
| "epoch": 0.28, | |
| "learning_rate": 4.540035023047531e-05, | |
| "loss": 2.1649, | |
| "step": 56000 | |
| }, | |
| { | |
| "epoch": 0.28, | |
| "learning_rate": 4.5318213627448083e-05, | |
| "loss": 2.1666, | |
| "step": 57000 | |
| }, | |
| { | |
| "epoch": 0.29, | |
| "learning_rate": 4.523607702442086e-05, | |
| "loss": 2.145, | |
| "step": 58000 | |
| }, | |
| { | |
| "epoch": 0.29, | |
| "learning_rate": 4.515394042139363e-05, | |
| "loss": 2.136, | |
| "step": 59000 | |
| }, | |
| { | |
| "epoch": 0.3, | |
| "learning_rate": 4.5071803818366404e-05, | |
| "loss": 2.1476, | |
| "step": 60000 | |
| }, | |
| { | |
| "epoch": 0.3, | |
| "eval_gen_len": 18.99813548788067, | |
| "eval_loss": 2.042433500289917, | |
| "eval_rouge1": 18.9783, | |
| "eval_rouge2": 7.4138, | |
| "eval_rougeL": 15.1121, | |
| "eval_rougeLsum": 17.2778, | |
| "eval_runtime": 1522.9324, | |
| "eval_samples_per_second": 4.226, | |
| "eval_steps_per_second": 4.226, | |
| "step": 60000 | |
| }, | |
| { | |
| "epoch": 0.3, | |
| "learning_rate": 4.498966721533918e-05, | |
| "loss": 2.1453, | |
| "step": 61000 | |
| }, | |
| { | |
| "epoch": 0.31, | |
| "learning_rate": 4.490753061231195e-05, | |
| "loss": 2.1368, | |
| "step": 62000 | |
| }, | |
| { | |
| "epoch": 0.31, | |
| "learning_rate": 4.4825394009284724e-05, | |
| "loss": 2.1279, | |
| "step": 63000 | |
| }, | |
| { | |
| "epoch": 0.32, | |
| "learning_rate": 4.47432574062575e-05, | |
| "loss": 2.1521, | |
| "step": 64000 | |
| }, | |
| { | |
| "epoch": 0.32, | |
| "learning_rate": 4.466112080323027e-05, | |
| "loss": 2.113, | |
| "step": 65000 | |
| }, | |
| { | |
| "epoch": 0.33, | |
| "learning_rate": 4.4578984200203045e-05, | |
| "loss": 2.1298, | |
| "step": 66000 | |
| }, | |
| { | |
| "epoch": 0.33, | |
| "learning_rate": 4.449684759717582e-05, | |
| "loss": 2.1251, | |
| "step": 67000 | |
| }, | |
| { | |
| "epoch": 0.34, | |
| "learning_rate": 4.441471099414859e-05, | |
| "loss": 2.1407, | |
| "step": 68000 | |
| }, | |
| { | |
| "epoch": 0.34, | |
| "learning_rate": 4.4332574391121365e-05, | |
| "loss": 2.1299, | |
| "step": 69000 | |
| }, | |
| { | |
| "epoch": 0.34, | |
| "learning_rate": 4.425043778809413e-05, | |
| "loss": 2.1164, | |
| "step": 70000 | |
| }, | |
| { | |
| "epoch": 0.34, | |
| "eval_gen_len": 19.0, | |
| "eval_loss": 2.0348894596099854, | |
| "eval_rouge1": 18.9257, | |
| "eval_rouge2": 7.4649, | |
| "eval_rougeL": 15.0335, | |
| "eval_rougeLsum": 17.1819, | |
| "eval_runtime": 1523.2756, | |
| "eval_samples_per_second": 4.225, | |
| "eval_steps_per_second": 4.225, | |
| "step": 70000 | |
| }, | |
| { | |
| "epoch": 0.35, | |
| "learning_rate": 4.416830118506691e-05, | |
| "loss": 2.1055, | |
| "step": 71000 | |
| }, | |
| { | |
| "epoch": 0.35, | |
| "learning_rate": 4.4086164582039685e-05, | |
| "loss": 2.1239, | |
| "step": 72000 | |
| }, | |
| { | |
| "epoch": 0.36, | |
| "learning_rate": 4.400402797901246e-05, | |
| "loss": 2.1438, | |
| "step": 73000 | |
| }, | |
| { | |
| "epoch": 0.36, | |
| "learning_rate": 4.392189137598523e-05, | |
| "loss": 2.1106, | |
| "step": 74000 | |
| }, | |
| { | |
| "epoch": 0.37, | |
| "learning_rate": 4.3839754772958006e-05, | |
| "loss": 2.1359, | |
| "step": 75000 | |
| }, | |
| { | |
| "epoch": 0.37, | |
| "learning_rate": 4.375761816993077e-05, | |
| "loss": 2.0959, | |
| "step": 76000 | |
| }, | |
| { | |
| "epoch": 0.38, | |
| "learning_rate": 4.367548156690355e-05, | |
| "loss": 2.0987, | |
| "step": 77000 | |
| }, | |
| { | |
| "epoch": 0.38, | |
| "learning_rate": 4.3593344963876326e-05, | |
| "loss": 2.0794, | |
| "step": 78000 | |
| }, | |
| { | |
| "epoch": 0.39, | |
| "learning_rate": 4.35112083608491e-05, | |
| "loss": 2.13, | |
| "step": 79000 | |
| }, | |
| { | |
| "epoch": 0.39, | |
| "learning_rate": 4.342907175782187e-05, | |
| "loss": 2.079, | |
| "step": 80000 | |
| }, | |
| { | |
| "epoch": 0.39, | |
| "eval_gen_len": 18.999378495960222, | |
| "eval_loss": 2.020799160003662, | |
| "eval_rouge1": 18.643, | |
| "eval_rouge2": 7.4096, | |
| "eval_rougeL": 14.8927, | |
| "eval_rougeLsum": 16.9786, | |
| "eval_runtime": 1525.4414, | |
| "eval_samples_per_second": 4.219, | |
| "eval_steps_per_second": 4.219, | |
| "step": 80000 | |
| }, | |
| { | |
| "epoch": 0.4, | |
| "learning_rate": 4.334693515479465e-05, | |
| "loss": 2.0789, | |
| "step": 81000 | |
| }, | |
| { | |
| "epoch": 0.4, | |
| "learning_rate": 4.3264798551767413e-05, | |
| "loss": 2.1264, | |
| "step": 82000 | |
| }, | |
| { | |
| "epoch": 0.41, | |
| "learning_rate": 4.318266194874019e-05, | |
| "loss": 2.1415, | |
| "step": 83000 | |
| }, | |
| { | |
| "epoch": 0.41, | |
| "learning_rate": 4.310052534571297e-05, | |
| "loss": 2.0918, | |
| "step": 84000 | |
| }, | |
| { | |
| "epoch": 0.42, | |
| "learning_rate": 4.301838874268574e-05, | |
| "loss": 2.0827, | |
| "step": 85000 | |
| }, | |
| { | |
| "epoch": 0.42, | |
| "learning_rate": 4.2936252139658514e-05, | |
| "loss": 2.1044, | |
| "step": 86000 | |
| }, | |
| { | |
| "epoch": 0.43, | |
| "learning_rate": 4.285411553663128e-05, | |
| "loss": 2.1145, | |
| "step": 87000 | |
| }, | |
| { | |
| "epoch": 0.43, | |
| "learning_rate": 4.2771978933604054e-05, | |
| "loss": 2.1269, | |
| "step": 88000 | |
| }, | |
| { | |
| "epoch": 0.44, | |
| "learning_rate": 4.268984233057683e-05, | |
| "loss": 2.1013, | |
| "step": 89000 | |
| }, | |
| { | |
| "epoch": 0.44, | |
| "learning_rate": 4.26077057275496e-05, | |
| "loss": 2.101, | |
| "step": 90000 | |
| }, | |
| { | |
| "epoch": 0.44, | |
| "eval_gen_len": 19.0, | |
| "eval_loss": 2.0113208293914795, | |
| "eval_rouge1": 19.3881, | |
| "eval_rouge2": 7.7012, | |
| "eval_rougeL": 15.3981, | |
| "eval_rougeLsum": 17.6516, | |
| "eval_runtime": 1521.5665, | |
| "eval_samples_per_second": 4.23, | |
| "eval_steps_per_second": 4.23, | |
| "step": 90000 | |
| }, | |
| { | |
| "epoch": 0.45, | |
| "learning_rate": 4.252556912452238e-05, | |
| "loss": 2.116, | |
| "step": 91000 | |
| }, | |
| { | |
| "epoch": 0.45, | |
| "learning_rate": 4.2443432521495155e-05, | |
| "loss": 2.1321, | |
| "step": 92000 | |
| }, | |
| { | |
| "epoch": 0.46, | |
| "learning_rate": 4.236129591846792e-05, | |
| "loss": 2.0796, | |
| "step": 93000 | |
| }, | |
| { | |
| "epoch": 0.46, | |
| "learning_rate": 4.2279159315440695e-05, | |
| "loss": 2.0948, | |
| "step": 94000 | |
| }, | |
| { | |
| "epoch": 0.47, | |
| "learning_rate": 4.219702271241347e-05, | |
| "loss": 2.0966, | |
| "step": 95000 | |
| }, | |
| { | |
| "epoch": 0.47, | |
| "learning_rate": 4.211488610938624e-05, | |
| "loss": 2.0932, | |
| "step": 96000 | |
| }, | |
| { | |
| "epoch": 0.48, | |
| "learning_rate": 4.203274950635902e-05, | |
| "loss": 2.0807, | |
| "step": 97000 | |
| }, | |
| { | |
| "epoch": 0.48, | |
| "learning_rate": 4.1950612903331796e-05, | |
| "loss": 2.0852, | |
| "step": 98000 | |
| }, | |
| { | |
| "epoch": 0.49, | |
| "learning_rate": 4.186847630030456e-05, | |
| "loss": 2.1021, | |
| "step": 99000 | |
| }, | |
| { | |
| "epoch": 0.49, | |
| "learning_rate": 4.1786339697277336e-05, | |
| "loss": 2.0576, | |
| "step": 100000 | |
| }, | |
| { | |
| "epoch": 0.49, | |
| "eval_gen_len": 18.99922311995028, | |
| "eval_loss": 2.0021867752075195, | |
| "eval_rouge1": 18.9985, | |
| "eval_rouge2": 7.542, | |
| "eval_rougeL": 15.1157, | |
| "eval_rougeLsum": 17.2972, | |
| "eval_runtime": 1519.2175, | |
| "eval_samples_per_second": 4.236, | |
| "eval_steps_per_second": 4.236, | |
| "step": 100000 | |
| }, | |
| { | |
| "epoch": 0.5, | |
| "learning_rate": 4.170420309425011e-05, | |
| "loss": 2.0574, | |
| "step": 101000 | |
| }, | |
| { | |
| "epoch": 0.5, | |
| "learning_rate": 4.162206649122288e-05, | |
| "loss": 2.0968, | |
| "step": 102000 | |
| }, | |
| { | |
| "epoch": 0.51, | |
| "learning_rate": 4.1539929888195656e-05, | |
| "loss": 2.0973, | |
| "step": 103000 | |
| }, | |
| { | |
| "epoch": 0.51, | |
| "learning_rate": 4.1457793285168437e-05, | |
| "loss": 2.0847, | |
| "step": 104000 | |
| }, | |
| { | |
| "epoch": 0.52, | |
| "learning_rate": 4.13756566821412e-05, | |
| "loss": 2.1221, | |
| "step": 105000 | |
| }, | |
| { | |
| "epoch": 0.52, | |
| "learning_rate": 4.129352007911398e-05, | |
| "loss": 2.0739, | |
| "step": 106000 | |
| }, | |
| { | |
| "epoch": 0.53, | |
| "learning_rate": 4.121138347608675e-05, | |
| "loss": 2.0731, | |
| "step": 107000 | |
| }, | |
| { | |
| "epoch": 0.53, | |
| "learning_rate": 4.1129246873059524e-05, | |
| "loss": 2.0845, | |
| "step": 108000 | |
| }, | |
| { | |
| "epoch": 0.54, | |
| "learning_rate": 4.10471102700323e-05, | |
| "loss": 2.0975, | |
| "step": 109000 | |
| }, | |
| { | |
| "epoch": 0.54, | |
| "learning_rate": 4.096497366700507e-05, | |
| "loss": 2.0983, | |
| "step": 110000 | |
| }, | |
| { | |
| "epoch": 0.54, | |
| "eval_gen_len": 19.0, | |
| "eval_loss": 1.9940693378448486, | |
| "eval_rouge1": 18.7691, | |
| "eval_rouge2": 7.4625, | |
| "eval_rougeL": 15.0256, | |
| "eval_rougeLsum": 17.1146, | |
| "eval_runtime": 1517.5066, | |
| "eval_samples_per_second": 4.241, | |
| "eval_steps_per_second": 4.241, | |
| "step": 110000 | |
| }, | |
| { | |
| "epoch": 0.55, | |
| "learning_rate": 4.0882837063977844e-05, | |
| "loss": 2.0693, | |
| "step": 111000 | |
| }, | |
| { | |
| "epoch": 0.55, | |
| "learning_rate": 4.080070046095062e-05, | |
| "loss": 2.083, | |
| "step": 112000 | |
| }, | |
| { | |
| "epoch": 0.56, | |
| "learning_rate": 4.071856385792339e-05, | |
| "loss": 2.0648, | |
| "step": 113000 | |
| }, | |
| { | |
| "epoch": 0.56, | |
| "learning_rate": 4.0636427254896164e-05, | |
| "loss": 2.0948, | |
| "step": 114000 | |
| }, | |
| { | |
| "epoch": 0.57, | |
| "learning_rate": 4.055429065186894e-05, | |
| "loss": 2.0841, | |
| "step": 115000 | |
| }, | |
| { | |
| "epoch": 0.57, | |
| "learning_rate": 4.047215404884171e-05, | |
| "loss": 2.0567, | |
| "step": 116000 | |
| }, | |
| { | |
| "epoch": 0.58, | |
| "learning_rate": 4.0390017445814485e-05, | |
| "loss": 2.071, | |
| "step": 117000 | |
| }, | |
| { | |
| "epoch": 0.58, | |
| "learning_rate": 4.030788084278726e-05, | |
| "loss": 2.0915, | |
| "step": 118000 | |
| }, | |
| { | |
| "epoch": 0.59, | |
| "learning_rate": 4.022574423976003e-05, | |
| "loss": 2.085, | |
| "step": 119000 | |
| }, | |
| { | |
| "epoch": 0.59, | |
| "learning_rate": 4.0143607636732805e-05, | |
| "loss": 2.053, | |
| "step": 120000 | |
| }, | |
| { | |
| "epoch": 0.59, | |
| "eval_gen_len": 19.0, | |
| "eval_loss": 1.9855321645736694, | |
| "eval_rouge1": 19.002, | |
| "eval_rouge2": 7.5602, | |
| "eval_rougeL": 15.1497, | |
| "eval_rougeLsum": 17.2963, | |
| "eval_runtime": 1520.207, | |
| "eval_samples_per_second": 4.234, | |
| "eval_steps_per_second": 4.234, | |
| "step": 120000 | |
| }, | |
| { | |
| "epoch": 0.6, | |
| "learning_rate": 4.006147103370558e-05, | |
| "loss": 2.0593, | |
| "step": 121000 | |
| }, | |
| { | |
| "epoch": 0.6, | |
| "learning_rate": 3.997933443067835e-05, | |
| "loss": 2.0821, | |
| "step": 122000 | |
| }, | |
| { | |
| "epoch": 0.61, | |
| "learning_rate": 3.9897197827651126e-05, | |
| "loss": 2.0344, | |
| "step": 123000 | |
| }, | |
| { | |
| "epoch": 0.61, | |
| "learning_rate": 3.98150612246239e-05, | |
| "loss": 2.07, | |
| "step": 124000 | |
| }, | |
| { | |
| "epoch": 0.62, | |
| "learning_rate": 3.973292462159667e-05, | |
| "loss": 2.0605, | |
| "step": 125000 | |
| }, | |
| { | |
| "epoch": 0.62, | |
| "learning_rate": 3.9650788018569446e-05, | |
| "loss": 2.0728, | |
| "step": 126000 | |
| }, | |
| { | |
| "epoch": 0.63, | |
| "learning_rate": 3.956865141554222e-05, | |
| "loss": 2.0501, | |
| "step": 127000 | |
| }, | |
| { | |
| "epoch": 0.63, | |
| "learning_rate": 3.948651481251499e-05, | |
| "loss": 2.0895, | |
| "step": 128000 | |
| }, | |
| { | |
| "epoch": 0.64, | |
| "learning_rate": 3.9404378209487767e-05, | |
| "loss": 2.0706, | |
| "step": 129000 | |
| }, | |
| { | |
| "epoch": 0.64, | |
| "learning_rate": 3.932224160646053e-05, | |
| "loss": 2.0434, | |
| "step": 130000 | |
| }, | |
| { | |
| "epoch": 0.64, | |
| "eval_gen_len": 18.999378495960222, | |
| "eval_loss": 1.9785783290863037, | |
| "eval_rouge1": 19.2385, | |
| "eval_rouge2": 7.6533, | |
| "eval_rougeL": 15.3094, | |
| "eval_rougeLsum": 17.5439, | |
| "eval_runtime": 1520.5032, | |
| "eval_samples_per_second": 4.233, | |
| "eval_steps_per_second": 4.233, | |
| "step": 130000 | |
| }, | |
| { | |
| "epoch": 0.65, | |
| "learning_rate": 3.9240105003433313e-05, | |
| "loss": 2.0722, | |
| "step": 131000 | |
| }, | |
| { | |
| "epoch": 0.65, | |
| "learning_rate": 3.915796840040609e-05, | |
| "loss": 2.0524, | |
| "step": 132000 | |
| }, | |
| { | |
| "epoch": 0.66, | |
| "learning_rate": 3.907583179737886e-05, | |
| "loss": 2.0397, | |
| "step": 133000 | |
| }, | |
| { | |
| "epoch": 0.66, | |
| "learning_rate": 3.8993695194351634e-05, | |
| "loss": 2.0448, | |
| "step": 134000 | |
| }, | |
| { | |
| "epoch": 0.67, | |
| "learning_rate": 3.891155859132441e-05, | |
| "loss": 2.0676, | |
| "step": 135000 | |
| }, | |
| { | |
| "epoch": 0.67, | |
| "learning_rate": 3.8829421988297174e-05, | |
| "loss": 2.0729, | |
| "step": 136000 | |
| }, | |
| { | |
| "epoch": 0.68, | |
| "learning_rate": 3.8747285385269954e-05, | |
| "loss": 2.0462, | |
| "step": 137000 | |
| }, | |
| { | |
| "epoch": 0.68, | |
| "learning_rate": 3.866514878224273e-05, | |
| "loss": 2.0333, | |
| "step": 138000 | |
| }, | |
| { | |
| "epoch": 0.69, | |
| "learning_rate": 3.85830121792155e-05, | |
| "loss": 2.0849, | |
| "step": 139000 | |
| }, | |
| { | |
| "epoch": 0.69, | |
| "learning_rate": 3.8500875576188275e-05, | |
| "loss": 2.0354, | |
| "step": 140000 | |
| }, | |
| { | |
| "epoch": 0.69, | |
| "eval_gen_len": 18.99922311995028, | |
| "eval_loss": 1.9746133089065552, | |
| "eval_rouge1": 19.184, | |
| "eval_rouge2": 7.7307, | |
| "eval_rougeL": 15.2897, | |
| "eval_rougeLsum": 17.491, | |
| "eval_runtime": 1520.584, | |
| "eval_samples_per_second": 4.233, | |
| "eval_steps_per_second": 4.233, | |
| "step": 140000 | |
| }, | |
| { | |
| "epoch": 0.69, | |
| "learning_rate": 3.841873897316104e-05, | |
| "loss": 2.0346, | |
| "step": 141000 | |
| }, | |
| { | |
| "epoch": 0.7, | |
| "learning_rate": 3.8336602370133815e-05, | |
| "loss": 2.0341, | |
| "step": 142000 | |
| }, | |
| { | |
| "epoch": 0.7, | |
| "learning_rate": 3.825446576710659e-05, | |
| "loss": 2.0639, | |
| "step": 143000 | |
| }, | |
| { | |
| "epoch": 0.71, | |
| "learning_rate": 3.817232916407937e-05, | |
| "loss": 2.0725, | |
| "step": 144000 | |
| }, | |
| { | |
| "epoch": 0.71, | |
| "learning_rate": 3.809019256105214e-05, | |
| "loss": 2.0797, | |
| "step": 145000 | |
| }, | |
| { | |
| "epoch": 0.72, | |
| "learning_rate": 3.8008055958024916e-05, | |
| "loss": 2.0611, | |
| "step": 146000 | |
| }, | |
| { | |
| "epoch": 0.72, | |
| "learning_rate": 3.792591935499768e-05, | |
| "loss": 2.052, | |
| "step": 147000 | |
| }, | |
| { | |
| "epoch": 0.73, | |
| "learning_rate": 3.7843782751970456e-05, | |
| "loss": 2.0785, | |
| "step": 148000 | |
| }, | |
| { | |
| "epoch": 0.73, | |
| "learning_rate": 3.776164614894323e-05, | |
| "loss": 2.0294, | |
| "step": 149000 | |
| }, | |
| { | |
| "epoch": 0.74, | |
| "learning_rate": 3.7679509545916e-05, | |
| "loss": 2.0347, | |
| "step": 150000 | |
| }, | |
| { | |
| "epoch": 0.74, | |
| "eval_gen_len": 19.0, | |
| "eval_loss": 1.9638867378234863, | |
| "eval_rouge1": 19.2408, | |
| "eval_rouge2": 7.693, | |
| "eval_rougeL": 15.3357, | |
| "eval_rougeLsum": 17.5297, | |
| "eval_runtime": 1520.3586, | |
| "eval_samples_per_second": 4.233, | |
| "eval_steps_per_second": 4.233, | |
| "step": 150000 | |
| }, | |
| { | |
| "epoch": 0.74, | |
| "learning_rate": 3.759737294288878e-05, | |
| "loss": 2.0491, | |
| "step": 151000 | |
| }, | |
| { | |
| "epoch": 0.75, | |
| "learning_rate": 3.7515236339861556e-05, | |
| "loss": 2.0138, | |
| "step": 152000 | |
| }, | |
| { | |
| "epoch": 0.75, | |
| "learning_rate": 3.743309973683432e-05, | |
| "loss": 2.0463, | |
| "step": 153000 | |
| }, | |
| { | |
| "epoch": 0.76, | |
| "learning_rate": 3.7350963133807097e-05, | |
| "loss": 2.0256, | |
| "step": 154000 | |
| }, | |
| { | |
| "epoch": 0.76, | |
| "learning_rate": 3.726882653077987e-05, | |
| "loss": 2.0326, | |
| "step": 155000 | |
| }, | |
| { | |
| "epoch": 0.77, | |
| "learning_rate": 3.7186689927752643e-05, | |
| "loss": 2.0803, | |
| "step": 156000 | |
| }, | |
| { | |
| "epoch": 0.77, | |
| "learning_rate": 3.7104553324725424e-05, | |
| "loss": 2.0262, | |
| "step": 157000 | |
| }, | |
| { | |
| "epoch": 0.78, | |
| "learning_rate": 3.70224167216982e-05, | |
| "loss": 2.0451, | |
| "step": 158000 | |
| }, | |
| { | |
| "epoch": 0.78, | |
| "learning_rate": 3.6940280118670964e-05, | |
| "loss": 2.0626, | |
| "step": 159000 | |
| }, | |
| { | |
| "epoch": 0.79, | |
| "learning_rate": 3.685814351564374e-05, | |
| "loss": 2.0236, | |
| "step": 160000 | |
| }, | |
| { | |
| "epoch": 0.79, | |
| "eval_gen_len": 18.999844623990057, | |
| "eval_loss": 1.959045648574829, | |
| "eval_rouge1": 19.0781, | |
| "eval_rouge2": 7.6256, | |
| "eval_rougeL": 15.1932, | |
| "eval_rougeLsum": 17.3486, | |
| "eval_runtime": 1523.0964, | |
| "eval_samples_per_second": 4.226, | |
| "eval_steps_per_second": 4.226, | |
| "step": 160000 | |
| }, | |
| { | |
| "epoch": 0.79, | |
| "learning_rate": 3.677600691261651e-05, | |
| "loss": 2.0515, | |
| "step": 161000 | |
| }, | |
| { | |
| "epoch": 0.8, | |
| "learning_rate": 3.6693870309589284e-05, | |
| "loss": 2.035, | |
| "step": 162000 | |
| }, | |
| { | |
| "epoch": 0.8, | |
| "learning_rate": 3.661173370656206e-05, | |
| "loss": 2.0183, | |
| "step": 163000 | |
| }, | |
| { | |
| "epoch": 0.81, | |
| "learning_rate": 3.652959710353484e-05, | |
| "loss": 2.0334, | |
| "step": 164000 | |
| }, | |
| { | |
| "epoch": 0.81, | |
| "learning_rate": 3.6447460500507605e-05, | |
| "loss": 2.0461, | |
| "step": 165000 | |
| }, | |
| { | |
| "epoch": 0.82, | |
| "learning_rate": 3.636532389748038e-05, | |
| "loss": 2.0543, | |
| "step": 166000 | |
| }, | |
| { | |
| "epoch": 0.82, | |
| "learning_rate": 3.628318729445315e-05, | |
| "loss": 2.0217, | |
| "step": 167000 | |
| }, | |
| { | |
| "epoch": 0.83, | |
| "learning_rate": 3.6201050691425925e-05, | |
| "loss": 2.0564, | |
| "step": 168000 | |
| }, | |
| { | |
| "epoch": 0.83, | |
| "learning_rate": 3.61189140883987e-05, | |
| "loss": 2.0536, | |
| "step": 169000 | |
| }, | |
| { | |
| "epoch": 0.84, | |
| "learning_rate": 3.603677748537147e-05, | |
| "loss": 2.0187, | |
| "step": 170000 | |
| }, | |
| { | |
| "epoch": 0.84, | |
| "eval_gen_len": 19.0, | |
| "eval_loss": 1.953194499015808, | |
| "eval_rouge1": 19.0343, | |
| "eval_rouge2": 7.6792, | |
| "eval_rougeL": 15.1884, | |
| "eval_rougeLsum": 17.3519, | |
| "eval_runtime": 1525.8532, | |
| "eval_samples_per_second": 4.218, | |
| "eval_steps_per_second": 4.218, | |
| "step": 170000 | |
| }, | |
| { | |
| "epoch": 0.84, | |
| "learning_rate": 3.5954640882344246e-05, | |
| "loss": 1.9995, | |
| "step": 171000 | |
| }, | |
| { | |
| "epoch": 0.85, | |
| "learning_rate": 3.587250427931702e-05, | |
| "loss": 2.0071, | |
| "step": 172000 | |
| }, | |
| { | |
| "epoch": 0.85, | |
| "learning_rate": 3.579036767628979e-05, | |
| "loss": 2.0403, | |
| "step": 173000 | |
| }, | |
| { | |
| "epoch": 0.86, | |
| "learning_rate": 3.5708231073262566e-05, | |
| "loss": 2.019, | |
| "step": 174000 | |
| }, | |
| { | |
| "epoch": 0.86, | |
| "learning_rate": 3.562609447023534e-05, | |
| "loss": 2.0402, | |
| "step": 175000 | |
| }, | |
| { | |
| "epoch": 0.87, | |
| "learning_rate": 3.554395786720811e-05, | |
| "loss": 2.0302, | |
| "step": 176000 | |
| }, | |
| { | |
| "epoch": 0.87, | |
| "learning_rate": 3.5461821264180886e-05, | |
| "loss": 2.0411, | |
| "step": 177000 | |
| }, | |
| { | |
| "epoch": 0.88, | |
| "learning_rate": 3.537968466115366e-05, | |
| "loss": 2.0053, | |
| "step": 178000 | |
| }, | |
| { | |
| "epoch": 0.88, | |
| "learning_rate": 3.529754805812643e-05, | |
| "loss": 2.0221, | |
| "step": 179000 | |
| }, | |
| { | |
| "epoch": 0.89, | |
| "learning_rate": 3.521541145509921e-05, | |
| "loss": 1.9939, | |
| "step": 180000 | |
| }, | |
| { | |
| "epoch": 0.89, | |
| "eval_gen_len": 18.999844623990057, | |
| "eval_loss": 1.9485050439834595, | |
| "eval_rouge1": 18.8247, | |
| "eval_rouge2": 7.5005, | |
| "eval_rougeL": 15.0246, | |
| "eval_rougeLsum": 17.1485, | |
| "eval_runtime": 1524.2531, | |
| "eval_samples_per_second": 4.222, | |
| "eval_steps_per_second": 4.222, | |
| "step": 180000 | |
| }, | |
| { | |
| "epoch": 0.89, | |
| "learning_rate": 3.513327485207198e-05, | |
| "loss": 2.0132, | |
| "step": 181000 | |
| }, | |
| { | |
| "epoch": 0.9, | |
| "learning_rate": 3.5051138249044754e-05, | |
| "loss": 2.0426, | |
| "step": 182000 | |
| }, | |
| { | |
| "epoch": 0.9, | |
| "learning_rate": 3.496900164601753e-05, | |
| "loss": 2.0541, | |
| "step": 183000 | |
| }, | |
| { | |
| "epoch": 0.91, | |
| "learning_rate": 3.48868650429903e-05, | |
| "loss": 2.0383, | |
| "step": 184000 | |
| }, | |
| { | |
| "epoch": 0.91, | |
| "learning_rate": 3.4804728439963074e-05, | |
| "loss": 2.0174, | |
| "step": 185000 | |
| }, | |
| { | |
| "epoch": 0.92, | |
| "learning_rate": 3.472259183693585e-05, | |
| "loss": 2.0684, | |
| "step": 186000 | |
| }, | |
| { | |
| "epoch": 0.92, | |
| "learning_rate": 3.464045523390862e-05, | |
| "loss": 2.0326, | |
| "step": 187000 | |
| }, | |
| { | |
| "epoch": 0.93, | |
| "learning_rate": 3.4558318630881395e-05, | |
| "loss": 2.0231, | |
| "step": 188000 | |
| }, | |
| { | |
| "epoch": 0.93, | |
| "learning_rate": 3.447618202785417e-05, | |
| "loss": 2.0611, | |
| "step": 189000 | |
| }, | |
| { | |
| "epoch": 0.94, | |
| "learning_rate": 3.4394045424826935e-05, | |
| "loss": 1.9961, | |
| "step": 190000 | |
| }, | |
| { | |
| "epoch": 0.94, | |
| "eval_gen_len": 19.0, | |
| "eval_loss": 1.9503637552261353, | |
| "eval_rouge1": 19.0695, | |
| "eval_rouge2": 7.6559, | |
| "eval_rougeL": 15.2139, | |
| "eval_rougeLsum": 17.3814, | |
| "eval_runtime": 1527.0226, | |
| "eval_samples_per_second": 4.215, | |
| "eval_steps_per_second": 4.215, | |
| "step": 190000 | |
| }, | |
| { | |
| "epoch": 0.94, | |
| "learning_rate": 3.4311908821799715e-05, | |
| "loss": 2.031, | |
| "step": 191000 | |
| }, | |
| { | |
| "epoch": 0.95, | |
| "learning_rate": 3.422977221877249e-05, | |
| "loss": 2.0354, | |
| "step": 192000 | |
| }, | |
| { | |
| "epoch": 0.95, | |
| "learning_rate": 3.414763561574526e-05, | |
| "loss": 2.0262, | |
| "step": 193000 | |
| }, | |
| { | |
| "epoch": 0.96, | |
| "learning_rate": 3.4065499012718035e-05, | |
| "loss": 2.0298, | |
| "step": 194000 | |
| }, | |
| { | |
| "epoch": 0.96, | |
| "learning_rate": 3.39833624096908e-05, | |
| "loss": 2.0177, | |
| "step": 195000 | |
| }, | |
| { | |
| "epoch": 0.97, | |
| "learning_rate": 3.3901225806663576e-05, | |
| "loss": 2.0344, | |
| "step": 196000 | |
| }, | |
| { | |
| "epoch": 0.97, | |
| "learning_rate": 3.3819089203636356e-05, | |
| "loss": 2.0248, | |
| "step": 197000 | |
| }, | |
| { | |
| "epoch": 0.98, | |
| "learning_rate": 3.373695260060913e-05, | |
| "loss": 2.0045, | |
| "step": 198000 | |
| }, | |
| { | |
| "epoch": 0.98, | |
| "learning_rate": 3.36548159975819e-05, | |
| "loss": 1.9958, | |
| "step": 199000 | |
| }, | |
| { | |
| "epoch": 0.99, | |
| "learning_rate": 3.3572679394554676e-05, | |
| "loss": 2.0197, | |
| "step": 200000 | |
| }, | |
| { | |
| "epoch": 0.99, | |
| "eval_gen_len": 18.998756991920448, | |
| "eval_loss": 1.939936876296997, | |
| "eval_rouge1": 19.2821, | |
| "eval_rouge2": 7.6685, | |
| "eval_rougeL": 15.3029, | |
| "eval_rougeLsum": 17.5374, | |
| "eval_runtime": 1521.4259, | |
| "eval_samples_per_second": 4.23, | |
| "eval_steps_per_second": 4.23, | |
| "step": 200000 | |
| }, | |
| { | |
| "epoch": 0.99, | |
| "learning_rate": 3.349054279152744e-05, | |
| "loss": 2.0371, | |
| "step": 201000 | |
| }, | |
| { | |
| "epoch": 1.0, | |
| "learning_rate": 3.3408406188500216e-05, | |
| "loss": 2.037, | |
| "step": 202000 | |
| }, | |
| { | |
| "epoch": 1.0, | |
| "learning_rate": 3.332626958547299e-05, | |
| "loss": 2.0002, | |
| "step": 203000 | |
| }, | |
| { | |
| "epoch": 1.01, | |
| "learning_rate": 3.324413298244577e-05, | |
| "loss": 1.9706, | |
| "step": 204000 | |
| }, | |
| { | |
| "epoch": 1.01, | |
| "learning_rate": 3.3161996379418544e-05, | |
| "loss": 1.9593, | |
| "step": 205000 | |
| }, | |
| { | |
| "epoch": 1.02, | |
| "learning_rate": 3.307985977639132e-05, | |
| "loss": 1.9641, | |
| "step": 206000 | |
| }, | |
| { | |
| "epoch": 1.02, | |
| "learning_rate": 3.2997723173364084e-05, | |
| "loss": 1.9221, | |
| "step": 207000 | |
| }, | |
| { | |
| "epoch": 1.03, | |
| "learning_rate": 3.291558657033686e-05, | |
| "loss": 1.9641, | |
| "step": 208000 | |
| }, | |
| { | |
| "epoch": 1.03, | |
| "learning_rate": 3.283344996730963e-05, | |
| "loss": 1.9549, | |
| "step": 209000 | |
| }, | |
| { | |
| "epoch": 1.03, | |
| "learning_rate": 3.2751313364282404e-05, | |
| "loss": 1.9457, | |
| "step": 210000 | |
| }, | |
| { | |
| "epoch": 1.03, | |
| "eval_gen_len": 19.0, | |
| "eval_loss": 1.9349710941314697, | |
| "eval_rouge1": 19.053, | |
| "eval_rouge2": 7.6502, | |
| "eval_rougeL": 15.2123, | |
| "eval_rougeLsum": 17.3793, | |
| "eval_runtime": 1523.492, | |
| "eval_samples_per_second": 4.225, | |
| "eval_steps_per_second": 4.225, | |
| "step": 210000 | |
| }, | |
| { | |
| "epoch": 1.04, | |
| "learning_rate": 3.2669176761255184e-05, | |
| "loss": 1.953, | |
| "step": 211000 | |
| }, | |
| { | |
| "epoch": 1.04, | |
| "learning_rate": 3.258704015822796e-05, | |
| "loss": 1.9894, | |
| "step": 212000 | |
| }, | |
| { | |
| "epoch": 1.05, | |
| "learning_rate": 3.2504903555200725e-05, | |
| "loss": 1.9697, | |
| "step": 213000 | |
| }, | |
| { | |
| "epoch": 1.05, | |
| "learning_rate": 3.24227669521735e-05, | |
| "loss": 1.9755, | |
| "step": 214000 | |
| }, | |
| { | |
| "epoch": 1.06, | |
| "learning_rate": 3.234063034914627e-05, | |
| "loss": 1.9679, | |
| "step": 215000 | |
| }, | |
| { | |
| "epoch": 1.06, | |
| "learning_rate": 3.2258493746119045e-05, | |
| "loss": 1.9684, | |
| "step": 216000 | |
| }, | |
| { | |
| "epoch": 1.07, | |
| "learning_rate": 3.2176357143091825e-05, | |
| "loss": 1.9486, | |
| "step": 217000 | |
| }, | |
| { | |
| "epoch": 1.07, | |
| "learning_rate": 3.20942205400646e-05, | |
| "loss": 1.9681, | |
| "step": 218000 | |
| }, | |
| { | |
| "epoch": 1.08, | |
| "learning_rate": 3.2012083937037365e-05, | |
| "loss": 1.9626, | |
| "step": 219000 | |
| }, | |
| { | |
| "epoch": 1.08, | |
| "learning_rate": 3.192994733401014e-05, | |
| "loss": 1.9552, | |
| "step": 220000 | |
| }, | |
| { | |
| "epoch": 1.08, | |
| "eval_gen_len": 18.999844623990057, | |
| "eval_loss": 1.9317119121551514, | |
| "eval_rouge1": 19.1878, | |
| "eval_rouge2": 7.7235, | |
| "eval_rougeL": 15.3272, | |
| "eval_rougeLsum": 17.5252, | |
| "eval_runtime": 1525.5733, | |
| "eval_samples_per_second": 4.219, | |
| "eval_steps_per_second": 4.219, | |
| "step": 220000 | |
| }, | |
| { | |
| "epoch": 1.09, | |
| "learning_rate": 3.184781073098291e-05, | |
| "loss": 1.9579, | |
| "step": 221000 | |
| }, | |
| { | |
| "epoch": 1.09, | |
| "learning_rate": 3.1765674127955686e-05, | |
| "loss": 1.9793, | |
| "step": 222000 | |
| }, | |
| { | |
| "epoch": 1.1, | |
| "learning_rate": 3.168353752492846e-05, | |
| "loss": 1.96, | |
| "step": 223000 | |
| }, | |
| { | |
| "epoch": 1.1, | |
| "learning_rate": 3.160140092190124e-05, | |
| "loss": 1.9263, | |
| "step": 224000 | |
| }, | |
| { | |
| "epoch": 1.11, | |
| "learning_rate": 3.1519264318874006e-05, | |
| "loss": 1.9776, | |
| "step": 225000 | |
| }, | |
| { | |
| "epoch": 1.11, | |
| "learning_rate": 3.143712771584678e-05, | |
| "loss": 1.971, | |
| "step": 226000 | |
| }, | |
| { | |
| "epoch": 1.12, | |
| "learning_rate": 3.135499111281955e-05, | |
| "loss": 1.9795, | |
| "step": 227000 | |
| }, | |
| { | |
| "epoch": 1.12, | |
| "learning_rate": 3.1272854509792327e-05, | |
| "loss": 1.9563, | |
| "step": 228000 | |
| }, | |
| { | |
| "epoch": 1.13, | |
| "learning_rate": 3.11907179067651e-05, | |
| "loss": 1.9581, | |
| "step": 229000 | |
| }, | |
| { | |
| "epoch": 1.13, | |
| "learning_rate": 3.1108581303737874e-05, | |
| "loss": 1.9772, | |
| "step": 230000 | |
| }, | |
| { | |
| "epoch": 1.13, | |
| "eval_gen_len": 18.99968924798011, | |
| "eval_loss": 1.930535078048706, | |
| "eval_rouge1": 19.0855, | |
| "eval_rouge2": 7.6303, | |
| "eval_rougeL": 15.1943, | |
| "eval_rougeLsum": 17.3942, | |
| "eval_runtime": 1526.3538, | |
| "eval_samples_per_second": 4.217, | |
| "eval_steps_per_second": 4.217, | |
| "step": 230000 | |
| }, | |
| { | |
| "epoch": 1.14, | |
| "learning_rate": 3.102644470071065e-05, | |
| "loss": 1.9223, | |
| "step": 231000 | |
| }, | |
| { | |
| "epoch": 1.14, | |
| "learning_rate": 3.094430809768342e-05, | |
| "loss": 1.9549, | |
| "step": 232000 | |
| }, | |
| { | |
| "epoch": 1.15, | |
| "learning_rate": 3.0862171494656194e-05, | |
| "loss": 1.9634, | |
| "step": 233000 | |
| }, | |
| { | |
| "epoch": 1.15, | |
| "learning_rate": 3.078003489162897e-05, | |
| "loss": 1.9451, | |
| "step": 234000 | |
| }, | |
| { | |
| "epoch": 1.16, | |
| "learning_rate": 3.069789828860174e-05, | |
| "loss": 1.9573, | |
| "step": 235000 | |
| }, | |
| { | |
| "epoch": 1.16, | |
| "learning_rate": 3.0615761685574514e-05, | |
| "loss": 1.9482, | |
| "step": 236000 | |
| }, | |
| { | |
| "epoch": 1.17, | |
| "learning_rate": 3.053362508254729e-05, | |
| "loss": 1.9451, | |
| "step": 237000 | |
| }, | |
| { | |
| "epoch": 1.17, | |
| "learning_rate": 3.045148847952006e-05, | |
| "loss": 1.9727, | |
| "step": 238000 | |
| }, | |
| { | |
| "epoch": 1.18, | |
| "learning_rate": 3.0369351876492835e-05, | |
| "loss": 1.9723, | |
| "step": 239000 | |
| }, | |
| { | |
| "epoch": 1.18, | |
| "learning_rate": 3.0287215273465608e-05, | |
| "loss": 1.9171, | |
| "step": 240000 | |
| }, | |
| { | |
| "epoch": 1.18, | |
| "eval_gen_len": 18.99953387197017, | |
| "eval_loss": 1.9290984869003296, | |
| "eval_rouge1": 19.0711, | |
| "eval_rouge2": 7.6437, | |
| "eval_rougeL": 15.2175, | |
| "eval_rougeLsum": 17.3893, | |
| "eval_runtime": 1522.9939, | |
| "eval_samples_per_second": 4.226, | |
| "eval_steps_per_second": 4.226, | |
| "step": 240000 | |
| }, | |
| { | |
| "epoch": 1.19, | |
| "learning_rate": 3.020507867043838e-05, | |
| "loss": 1.9618, | |
| "step": 241000 | |
| }, | |
| { | |
| "epoch": 1.19, | |
| "learning_rate": 3.0122942067411152e-05, | |
| "loss": 1.9429, | |
| "step": 242000 | |
| }, | |
| { | |
| "epoch": 1.2, | |
| "learning_rate": 3.0040805464383925e-05, | |
| "loss": 1.9298, | |
| "step": 243000 | |
| }, | |
| { | |
| "epoch": 1.2, | |
| "learning_rate": 2.9958668861356702e-05, | |
| "loss": 1.9083, | |
| "step": 244000 | |
| }, | |
| { | |
| "epoch": 1.21, | |
| "learning_rate": 2.9876532258329476e-05, | |
| "loss": 1.9438, | |
| "step": 245000 | |
| }, | |
| { | |
| "epoch": 1.21, | |
| "learning_rate": 2.979439565530225e-05, | |
| "loss": 1.9818, | |
| "step": 246000 | |
| }, | |
| { | |
| "epoch": 1.22, | |
| "learning_rate": 2.9712259052275023e-05, | |
| "loss": 1.9651, | |
| "step": 247000 | |
| }, | |
| { | |
| "epoch": 1.22, | |
| "learning_rate": 2.9630122449247793e-05, | |
| "loss": 1.9426, | |
| "step": 248000 | |
| }, | |
| { | |
| "epoch": 1.23, | |
| "learning_rate": 2.9547985846220566e-05, | |
| "loss": 1.9581, | |
| "step": 249000 | |
| }, | |
| { | |
| "epoch": 1.23, | |
| "learning_rate": 2.946584924319334e-05, | |
| "loss": 1.9393, | |
| "step": 250000 | |
| }, | |
| { | |
| "epoch": 1.23, | |
| "eval_gen_len": 18.99953387197017, | |
| "eval_loss": 1.922972321510315, | |
| "eval_rouge1": 19.276, | |
| "eval_rouge2": 7.725, | |
| "eval_rougeL": 15.3826, | |
| "eval_rougeLsum": 17.586, | |
| "eval_runtime": 1527.3556, | |
| "eval_samples_per_second": 4.214, | |
| "eval_steps_per_second": 4.214, | |
| "step": 250000 | |
| }, | |
| { | |
| "epoch": 1.24, | |
| "learning_rate": 2.9383712640166116e-05, | |
| "loss": 1.9286, | |
| "step": 251000 | |
| }, | |
| { | |
| "epoch": 1.24, | |
| "learning_rate": 2.930157603713889e-05, | |
| "loss": 1.973, | |
| "step": 252000 | |
| }, | |
| { | |
| "epoch": 1.25, | |
| "learning_rate": 2.9219439434111663e-05, | |
| "loss": 1.959, | |
| "step": 253000 | |
| }, | |
| { | |
| "epoch": 1.25, | |
| "learning_rate": 2.9137302831084433e-05, | |
| "loss": 1.9155, | |
| "step": 254000 | |
| }, | |
| { | |
| "epoch": 1.26, | |
| "learning_rate": 2.9055166228057207e-05, | |
| "loss": 1.9591, | |
| "step": 255000 | |
| }, | |
| { | |
| "epoch": 1.26, | |
| "learning_rate": 2.897302962502998e-05, | |
| "loss": 1.9599, | |
| "step": 256000 | |
| }, | |
| { | |
| "epoch": 1.27, | |
| "learning_rate": 2.889089302200275e-05, | |
| "loss": 1.9633, | |
| "step": 257000 | |
| }, | |
| { | |
| "epoch": 1.27, | |
| "learning_rate": 2.880875641897553e-05, | |
| "loss": 1.9539, | |
| "step": 258000 | |
| }, | |
| { | |
| "epoch": 1.28, | |
| "learning_rate": 2.87266198159483e-05, | |
| "loss": 1.9385, | |
| "step": 259000 | |
| }, | |
| { | |
| "epoch": 1.28, | |
| "learning_rate": 2.8644483212921074e-05, | |
| "loss": 1.9295, | |
| "step": 260000 | |
| }, | |
| { | |
| "epoch": 1.28, | |
| "eval_gen_len": 18.997513983840896, | |
| "eval_loss": 1.9197078943252563, | |
| "eval_rouge1": 19.2999, | |
| "eval_rouge2": 7.7958, | |
| "eval_rougeL": 15.3961, | |
| "eval_rougeLsum": 17.6056, | |
| "eval_runtime": 1519.5533, | |
| "eval_samples_per_second": 4.235, | |
| "eval_steps_per_second": 4.235, | |
| "step": 260000 | |
| }, | |
| { | |
| "epoch": 1.29, | |
| "learning_rate": 2.8562346609893848e-05, | |
| "loss": 1.9237, | |
| "step": 261000 | |
| }, | |
| { | |
| "epoch": 1.29, | |
| "learning_rate": 2.848021000686662e-05, | |
| "loss": 1.9593, | |
| "step": 262000 | |
| }, | |
| { | |
| "epoch": 1.3, | |
| "learning_rate": 2.839807340383939e-05, | |
| "loss": 1.9675, | |
| "step": 263000 | |
| }, | |
| { | |
| "epoch": 1.3, | |
| "learning_rate": 2.831593680081217e-05, | |
| "loss": 1.9673, | |
| "step": 264000 | |
| }, | |
| { | |
| "epoch": 1.31, | |
| "learning_rate": 2.823380019778494e-05, | |
| "loss": 1.9418, | |
| "step": 265000 | |
| }, | |
| { | |
| "epoch": 1.31, | |
| "learning_rate": 2.8151663594757715e-05, | |
| "loss": 1.9407, | |
| "step": 266000 | |
| }, | |
| { | |
| "epoch": 1.32, | |
| "learning_rate": 2.806952699173049e-05, | |
| "loss": 1.9354, | |
| "step": 267000 | |
| }, | |
| { | |
| "epoch": 1.32, | |
| "learning_rate": 2.7987390388703262e-05, | |
| "loss": 1.9619, | |
| "step": 268000 | |
| }, | |
| { | |
| "epoch": 1.33, | |
| "learning_rate": 2.7905253785676032e-05, | |
| "loss": 1.9648, | |
| "step": 269000 | |
| }, | |
| { | |
| "epoch": 1.33, | |
| "learning_rate": 2.7823117182648806e-05, | |
| "loss": 1.9725, | |
| "step": 270000 | |
| }, | |
| { | |
| "epoch": 1.33, | |
| "eval_gen_len": 19.0, | |
| "eval_loss": 1.9172613620758057, | |
| "eval_rouge1": 19.2958, | |
| "eval_rouge2": 7.7121, | |
| "eval_rougeL": 15.3659, | |
| "eval_rougeLsum": 17.584, | |
| "eval_runtime": 1521.8661, | |
| "eval_samples_per_second": 4.229, | |
| "eval_steps_per_second": 4.229, | |
| "step": 270000 | |
| }, | |
| { | |
| "epoch": 1.34, | |
| "learning_rate": 2.7740980579621582e-05, | |
| "loss": 1.9281, | |
| "step": 271000 | |
| }, | |
| { | |
| "epoch": 1.34, | |
| "learning_rate": 2.7658843976594356e-05, | |
| "loss": 1.9511, | |
| "step": 272000 | |
| }, | |
| { | |
| "epoch": 1.35, | |
| "learning_rate": 2.757670737356713e-05, | |
| "loss": 1.9482, | |
| "step": 273000 | |
| }, | |
| { | |
| "epoch": 1.35, | |
| "learning_rate": 2.7494570770539903e-05, | |
| "loss": 1.9245, | |
| "step": 274000 | |
| }, | |
| { | |
| "epoch": 1.36, | |
| "learning_rate": 2.7412434167512673e-05, | |
| "loss": 1.9207, | |
| "step": 275000 | |
| }, | |
| { | |
| "epoch": 1.36, | |
| "learning_rate": 2.7330297564485446e-05, | |
| "loss": 1.963, | |
| "step": 276000 | |
| }, | |
| { | |
| "epoch": 1.37, | |
| "learning_rate": 2.724816096145822e-05, | |
| "loss": 1.9451, | |
| "step": 277000 | |
| }, | |
| { | |
| "epoch": 1.37, | |
| "learning_rate": 2.7166024358430997e-05, | |
| "loss": 1.9208, | |
| "step": 278000 | |
| }, | |
| { | |
| "epoch": 1.37, | |
| "learning_rate": 2.708388775540377e-05, | |
| "loss": 1.9598, | |
| "step": 279000 | |
| }, | |
| { | |
| "epoch": 1.38, | |
| "learning_rate": 2.7001751152376544e-05, | |
| "loss": 1.9668, | |
| "step": 280000 | |
| }, | |
| { | |
| "epoch": 1.38, | |
| "eval_gen_len": 18.999844623990057, | |
| "eval_loss": 1.9129323959350586, | |
| "eval_rouge1": 19.089, | |
| "eval_rouge2": 7.6846, | |
| "eval_rougeL": 15.2395, | |
| "eval_rougeLsum": 17.3879, | |
| "eval_runtime": 1526.0917, | |
| "eval_samples_per_second": 4.217, | |
| "eval_steps_per_second": 4.217, | |
| "step": 280000 | |
| }, | |
| { | |
| "epoch": 1.38, | |
| "learning_rate": 2.6919614549349314e-05, | |
| "loss": 1.9597, | |
| "step": 281000 | |
| }, | |
| { | |
| "epoch": 1.39, | |
| "learning_rate": 2.6837477946322087e-05, | |
| "loss": 1.954, | |
| "step": 282000 | |
| }, | |
| { | |
| "epoch": 1.39, | |
| "learning_rate": 2.675534134329486e-05, | |
| "loss": 1.9465, | |
| "step": 283000 | |
| }, | |
| { | |
| "epoch": 1.4, | |
| "learning_rate": 2.6673204740267638e-05, | |
| "loss": 1.9369, | |
| "step": 284000 | |
| }, | |
| { | |
| "epoch": 1.4, | |
| "learning_rate": 2.659106813724041e-05, | |
| "loss": 1.9787, | |
| "step": 285000 | |
| }, | |
| { | |
| "epoch": 1.41, | |
| "learning_rate": 2.650893153421318e-05, | |
| "loss": 1.9549, | |
| "step": 286000 | |
| }, | |
| { | |
| "epoch": 1.41, | |
| "learning_rate": 2.6426794931185955e-05, | |
| "loss": 1.9481, | |
| "step": 287000 | |
| }, | |
| { | |
| "epoch": 1.42, | |
| "learning_rate": 2.6344658328158728e-05, | |
| "loss": 1.9494, | |
| "step": 288000 | |
| }, | |
| { | |
| "epoch": 1.42, | |
| "learning_rate": 2.62625217251315e-05, | |
| "loss": 1.9459, | |
| "step": 289000 | |
| }, | |
| { | |
| "epoch": 1.43, | |
| "learning_rate": 2.618038512210427e-05, | |
| "loss": 1.941, | |
| "step": 290000 | |
| }, | |
| { | |
| "epoch": 1.43, | |
| "eval_gen_len": 18.99953387197017, | |
| "eval_loss": 1.9131526947021484, | |
| "eval_rouge1": 19.2127, | |
| "eval_rouge2": 7.7336, | |
| "eval_rougeL": 15.311, | |
| "eval_rougeLsum": 17.4742, | |
| "eval_runtime": 1524.4495, | |
| "eval_samples_per_second": 4.222, | |
| "eval_steps_per_second": 4.222, | |
| "step": 290000 | |
| }, | |
| { | |
| "epoch": 1.43, | |
| "learning_rate": 2.6098248519077052e-05, | |
| "loss": 1.93, | |
| "step": 291000 | |
| }, | |
| { | |
| "epoch": 1.44, | |
| "learning_rate": 2.6016111916049822e-05, | |
| "loss": 1.9407, | |
| "step": 292000 | |
| }, | |
| { | |
| "epoch": 1.44, | |
| "learning_rate": 2.5933975313022595e-05, | |
| "loss": 1.9342, | |
| "step": 293000 | |
| }, | |
| { | |
| "epoch": 1.45, | |
| "learning_rate": 2.585183870999537e-05, | |
| "loss": 1.9183, | |
| "step": 294000 | |
| }, | |
| { | |
| "epoch": 1.45, | |
| "learning_rate": 2.5769702106968142e-05, | |
| "loss": 1.9416, | |
| "step": 295000 | |
| }, | |
| { | |
| "epoch": 1.46, | |
| "learning_rate": 2.5687565503940912e-05, | |
| "loss": 1.9474, | |
| "step": 296000 | |
| }, | |
| { | |
| "epoch": 1.46, | |
| "learning_rate": 2.5605428900913686e-05, | |
| "loss": 1.9462, | |
| "step": 297000 | |
| }, | |
| { | |
| "epoch": 1.47, | |
| "learning_rate": 2.5523292297886463e-05, | |
| "loss": 1.9486, | |
| "step": 298000 | |
| }, | |
| { | |
| "epoch": 1.47, | |
| "learning_rate": 2.5441155694859236e-05, | |
| "loss": 1.9575, | |
| "step": 299000 | |
| }, | |
| { | |
| "epoch": 1.48, | |
| "learning_rate": 2.535901909183201e-05, | |
| "loss": 1.9427, | |
| "step": 300000 | |
| }, | |
| { | |
| "epoch": 1.48, | |
| "eval_gen_len": 18.999844623990057, | |
| "eval_loss": 1.9108291864395142, | |
| "eval_rouge1": 19.217, | |
| "eval_rouge2": 7.7591, | |
| "eval_rougeL": 15.334, | |
| "eval_rougeLsum": 17.53, | |
| "eval_runtime": 1523.73, | |
| "eval_samples_per_second": 4.224, | |
| "eval_steps_per_second": 4.224, | |
| "step": 300000 | |
| }, | |
| { | |
| "epoch": 1.48, | |
| "learning_rate": 2.5276882488804783e-05, | |
| "loss": 1.949, | |
| "step": 301000 | |
| }, | |
| { | |
| "epoch": 1.49, | |
| "learning_rate": 2.5194745885777553e-05, | |
| "loss": 1.9544, | |
| "step": 302000 | |
| }, | |
| { | |
| "epoch": 1.49, | |
| "learning_rate": 2.5112609282750327e-05, | |
| "loss": 1.9579, | |
| "step": 303000 | |
| }, | |
| { | |
| "epoch": 1.5, | |
| "learning_rate": 2.5030472679723104e-05, | |
| "loss": 1.9288, | |
| "step": 304000 | |
| }, | |
| { | |
| "epoch": 1.5, | |
| "learning_rate": 2.4948336076695874e-05, | |
| "loss": 1.9233, | |
| "step": 305000 | |
| }, | |
| { | |
| "epoch": 1.51, | |
| "learning_rate": 2.486619947366865e-05, | |
| "loss": 1.9387, | |
| "step": 306000 | |
| }, | |
| { | |
| "epoch": 1.51, | |
| "learning_rate": 2.4784062870641424e-05, | |
| "loss": 1.941, | |
| "step": 307000 | |
| }, | |
| { | |
| "epoch": 1.52, | |
| "learning_rate": 2.4701926267614194e-05, | |
| "loss": 1.9528, | |
| "step": 308000 | |
| }, | |
| { | |
| "epoch": 1.52, | |
| "learning_rate": 2.461978966458697e-05, | |
| "loss": 1.9589, | |
| "step": 309000 | |
| }, | |
| { | |
| "epoch": 1.53, | |
| "learning_rate": 2.4537653061559744e-05, | |
| "loss": 1.9521, | |
| "step": 310000 | |
| }, | |
| { | |
| "epoch": 1.53, | |
| "eval_gen_len": 19.0, | |
| "eval_loss": 1.9041023254394531, | |
| "eval_rouge1": 19.1285, | |
| "eval_rouge2": 7.6736, | |
| "eval_rougeL": 15.2625, | |
| "eval_rougeLsum": 17.458, | |
| "eval_runtime": 1526.8471, | |
| "eval_samples_per_second": 4.215, | |
| "eval_steps_per_second": 4.215, | |
| "step": 310000 | |
| }, | |
| { | |
| "epoch": 1.53, | |
| "learning_rate": 2.4455516458532514e-05, | |
| "loss": 1.928, | |
| "step": 311000 | |
| }, | |
| { | |
| "epoch": 1.54, | |
| "learning_rate": 2.4373379855505288e-05, | |
| "loss": 1.9481, | |
| "step": 312000 | |
| }, | |
| { | |
| "epoch": 1.54, | |
| "learning_rate": 2.4291243252478065e-05, | |
| "loss": 1.921, | |
| "step": 313000 | |
| }, | |
| { | |
| "epoch": 1.55, | |
| "learning_rate": 2.4209106649450835e-05, | |
| "loss": 1.9489, | |
| "step": 314000 | |
| }, | |
| { | |
| "epoch": 1.55, | |
| "learning_rate": 2.412697004642361e-05, | |
| "loss": 1.9416, | |
| "step": 315000 | |
| }, | |
| { | |
| "epoch": 1.56, | |
| "learning_rate": 2.4044833443396382e-05, | |
| "loss": 1.9482, | |
| "step": 316000 | |
| }, | |
| { | |
| "epoch": 1.56, | |
| "learning_rate": 2.3962696840369155e-05, | |
| "loss": 1.9555, | |
| "step": 317000 | |
| }, | |
| { | |
| "epoch": 1.57, | |
| "learning_rate": 2.388056023734193e-05, | |
| "loss": 1.9128, | |
| "step": 318000 | |
| }, | |
| { | |
| "epoch": 1.57, | |
| "learning_rate": 2.3798423634314702e-05, | |
| "loss": 1.9256, | |
| "step": 319000 | |
| }, | |
| { | |
| "epoch": 1.58, | |
| "learning_rate": 2.3716287031287476e-05, | |
| "loss": 1.9352, | |
| "step": 320000 | |
| }, | |
| { | |
| "epoch": 1.58, | |
| "eval_gen_len": 18.999067743940337, | |
| "eval_loss": 1.9040871858596802, | |
| "eval_rouge1": 19.1656, | |
| "eval_rouge2": 7.723, | |
| "eval_rougeL": 15.3035, | |
| "eval_rougeLsum": 17.4818, | |
| "eval_runtime": 1524.041, | |
| "eval_samples_per_second": 4.223, | |
| "eval_steps_per_second": 4.223, | |
| "step": 320000 | |
| }, | |
| { | |
| "epoch": 1.58, | |
| "learning_rate": 2.363415042826025e-05, | |
| "loss": 1.9509, | |
| "step": 321000 | |
| }, | |
| { | |
| "epoch": 1.59, | |
| "learning_rate": 2.3552013825233023e-05, | |
| "loss": 1.9452, | |
| "step": 322000 | |
| }, | |
| { | |
| "epoch": 1.59, | |
| "learning_rate": 2.3469877222205796e-05, | |
| "loss": 1.9343, | |
| "step": 323000 | |
| }, | |
| { | |
| "epoch": 1.6, | |
| "learning_rate": 2.338774061917857e-05, | |
| "loss": 1.943, | |
| "step": 324000 | |
| }, | |
| { | |
| "epoch": 1.6, | |
| "learning_rate": 2.3305604016151343e-05, | |
| "loss": 1.9381, | |
| "step": 325000 | |
| }, | |
| { | |
| "epoch": 1.61, | |
| "learning_rate": 2.3223467413124117e-05, | |
| "loss": 1.929, | |
| "step": 326000 | |
| }, | |
| { | |
| "epoch": 1.61, | |
| "learning_rate": 2.314133081009689e-05, | |
| "loss": 1.9389, | |
| "step": 327000 | |
| }, | |
| { | |
| "epoch": 1.62, | |
| "learning_rate": 2.3059194207069663e-05, | |
| "loss": 1.9373, | |
| "step": 328000 | |
| }, | |
| { | |
| "epoch": 1.62, | |
| "learning_rate": 2.2977057604042437e-05, | |
| "loss": 1.9295, | |
| "step": 329000 | |
| }, | |
| { | |
| "epoch": 1.63, | |
| "learning_rate": 2.289492100101521e-05, | |
| "loss": 1.9342, | |
| "step": 330000 | |
| }, | |
| { | |
| "epoch": 1.63, | |
| "eval_gen_len": 19.0, | |
| "eval_loss": 1.9004182815551758, | |
| "eval_rouge1": 19.2573, | |
| "eval_rouge2": 7.7766, | |
| "eval_rougeL": 15.3558, | |
| "eval_rougeLsum": 17.5382, | |
| "eval_runtime": 1526.2765, | |
| "eval_samples_per_second": 4.217, | |
| "eval_steps_per_second": 4.217, | |
| "step": 330000 | |
| }, | |
| { | |
| "epoch": 1.63, | |
| "learning_rate": 2.2812784397987984e-05, | |
| "loss": 1.9029, | |
| "step": 331000 | |
| }, | |
| { | |
| "epoch": 1.64, | |
| "learning_rate": 2.2730647794960754e-05, | |
| "loss": 1.9177, | |
| "step": 332000 | |
| }, | |
| { | |
| "epoch": 1.64, | |
| "learning_rate": 2.264851119193353e-05, | |
| "loss": 1.9567, | |
| "step": 333000 | |
| }, | |
| { | |
| "epoch": 1.65, | |
| "learning_rate": 2.2566374588906304e-05, | |
| "loss": 1.9195, | |
| "step": 334000 | |
| }, | |
| { | |
| "epoch": 1.65, | |
| "learning_rate": 2.2484237985879074e-05, | |
| "loss": 1.9185, | |
| "step": 335000 | |
| }, | |
| { | |
| "epoch": 1.66, | |
| "learning_rate": 2.240210138285185e-05, | |
| "loss": 1.9538, | |
| "step": 336000 | |
| }, | |
| { | |
| "epoch": 1.66, | |
| "learning_rate": 2.2319964779824625e-05, | |
| "loss": 1.9224, | |
| "step": 337000 | |
| }, | |
| { | |
| "epoch": 1.67, | |
| "learning_rate": 2.2237828176797395e-05, | |
| "loss": 1.9223, | |
| "step": 338000 | |
| }, | |
| { | |
| "epoch": 1.67, | |
| "learning_rate": 2.215569157377017e-05, | |
| "loss": 1.9206, | |
| "step": 339000 | |
| }, | |
| { | |
| "epoch": 1.68, | |
| "learning_rate": 2.2073554970742945e-05, | |
| "loss": 1.9631, | |
| "step": 340000 | |
| }, | |
| { | |
| "epoch": 1.68, | |
| "eval_gen_len": 18.999844623990057, | |
| "eval_loss": 1.8978149890899658, | |
| "eval_rouge1": 19.236, | |
| "eval_rouge2": 7.7584, | |
| "eval_rougeL": 15.3408, | |
| "eval_rougeLsum": 17.4993, | |
| "eval_runtime": 1526.0914, | |
| "eval_samples_per_second": 4.217, | |
| "eval_steps_per_second": 4.217, | |
| "step": 340000 | |
| }, | |
| { | |
| "epoch": 1.68, | |
| "learning_rate": 2.1991418367715715e-05, | |
| "loss": 1.9234, | |
| "step": 341000 | |
| }, | |
| { | |
| "epoch": 1.69, | |
| "learning_rate": 2.190928176468849e-05, | |
| "loss": 1.9284, | |
| "step": 342000 | |
| }, | |
| { | |
| "epoch": 1.69, | |
| "learning_rate": 2.1827145161661262e-05, | |
| "loss": 1.9425, | |
| "step": 343000 | |
| }, | |
| { | |
| "epoch": 1.7, | |
| "learning_rate": 2.1745008558634036e-05, | |
| "loss": 1.9478, | |
| "step": 344000 | |
| }, | |
| { | |
| "epoch": 1.7, | |
| "learning_rate": 2.166287195560681e-05, | |
| "loss": 1.9446, | |
| "step": 345000 | |
| }, | |
| { | |
| "epoch": 1.71, | |
| "learning_rate": 2.1580735352579583e-05, | |
| "loss": 1.9225, | |
| "step": 346000 | |
| }, | |
| { | |
| "epoch": 1.71, | |
| "learning_rate": 2.1498598749552356e-05, | |
| "loss": 1.8992, | |
| "step": 347000 | |
| }, | |
| { | |
| "epoch": 1.72, | |
| "learning_rate": 2.141646214652513e-05, | |
| "loss": 1.927, | |
| "step": 348000 | |
| }, | |
| { | |
| "epoch": 1.72, | |
| "learning_rate": 2.1334325543497903e-05, | |
| "loss": 1.945, | |
| "step": 349000 | |
| }, | |
| { | |
| "epoch": 1.72, | |
| "learning_rate": 2.1252188940470676e-05, | |
| "loss": 1.8987, | |
| "step": 350000 | |
| }, | |
| { | |
| "epoch": 1.72, | |
| "eval_gen_len": 18.99968924798011, | |
| "eval_loss": 1.8967584371566772, | |
| "eval_rouge1": 19.1716, | |
| "eval_rouge2": 7.7231, | |
| "eval_rougeL": 15.2836, | |
| "eval_rougeLsum": 17.4655, | |
| "eval_runtime": 1526.2768, | |
| "eval_samples_per_second": 4.217, | |
| "eval_steps_per_second": 4.217, | |
| "step": 350000 | |
| }, | |
| { | |
| "epoch": 1.73, | |
| "learning_rate": 2.117005233744345e-05, | |
| "loss": 1.9248, | |
| "step": 351000 | |
| }, | |
| { | |
| "epoch": 1.73, | |
| "learning_rate": 2.1087915734416223e-05, | |
| "loss": 1.9194, | |
| "step": 352000 | |
| }, | |
| { | |
| "epoch": 1.74, | |
| "learning_rate": 2.1005779131388997e-05, | |
| "loss": 1.9362, | |
| "step": 353000 | |
| }, | |
| { | |
| "epoch": 1.74, | |
| "learning_rate": 2.092364252836177e-05, | |
| "loss": 1.9331, | |
| "step": 354000 | |
| }, | |
| { | |
| "epoch": 1.75, | |
| "learning_rate": 2.0841505925334544e-05, | |
| "loss": 1.9351, | |
| "step": 355000 | |
| }, | |
| { | |
| "epoch": 1.75, | |
| "learning_rate": 2.0759369322307317e-05, | |
| "loss": 1.9365, | |
| "step": 356000 | |
| }, | |
| { | |
| "epoch": 1.76, | |
| "learning_rate": 2.067723271928009e-05, | |
| "loss": 1.9589, | |
| "step": 357000 | |
| }, | |
| { | |
| "epoch": 1.76, | |
| "learning_rate": 2.0595096116252864e-05, | |
| "loss": 1.9137, | |
| "step": 358000 | |
| }, | |
| { | |
| "epoch": 1.77, | |
| "learning_rate": 2.0512959513225638e-05, | |
| "loss": 1.9057, | |
| "step": 359000 | |
| }, | |
| { | |
| "epoch": 1.77, | |
| "learning_rate": 2.043082291019841e-05, | |
| "loss": 1.9433, | |
| "step": 360000 | |
| }, | |
| { | |
| "epoch": 1.77, | |
| "eval_gen_len": 18.999844623990057, | |
| "eval_loss": 1.892448902130127, | |
| "eval_rouge1": 19.2644, | |
| "eval_rouge2": 7.8294, | |
| "eval_rougeL": 15.4018, | |
| "eval_rougeLsum": 17.5808, | |
| "eval_runtime": 1517.481, | |
| "eval_samples_per_second": 4.241, | |
| "eval_steps_per_second": 4.241, | |
| "step": 360000 | |
| }, | |
| { | |
| "epoch": 1.78, | |
| "learning_rate": 2.0348686307171185e-05, | |
| "loss": 1.9228, | |
| "step": 361000 | |
| }, | |
| { | |
| "epoch": 1.78, | |
| "learning_rate": 2.0266549704143955e-05, | |
| "loss": 1.9367, | |
| "step": 362000 | |
| }, | |
| { | |
| "epoch": 1.79, | |
| "learning_rate": 2.018441310111673e-05, | |
| "loss": 1.9048, | |
| "step": 363000 | |
| }, | |
| { | |
| "epoch": 1.79, | |
| "learning_rate": 2.0102276498089505e-05, | |
| "loss": 1.9205, | |
| "step": 364000 | |
| }, | |
| { | |
| "epoch": 1.8, | |
| "learning_rate": 2.0020139895062275e-05, | |
| "loss": 1.9375, | |
| "step": 365000 | |
| }, | |
| { | |
| "epoch": 1.8, | |
| "learning_rate": 1.9938003292035052e-05, | |
| "loss": 1.9294, | |
| "step": 366000 | |
| }, | |
| { | |
| "epoch": 1.81, | |
| "learning_rate": 1.9855866689007825e-05, | |
| "loss": 1.9433, | |
| "step": 367000 | |
| }, | |
| { | |
| "epoch": 1.81, | |
| "learning_rate": 1.9773730085980595e-05, | |
| "loss": 1.9233, | |
| "step": 368000 | |
| }, | |
| { | |
| "epoch": 1.82, | |
| "learning_rate": 1.9691593482953372e-05, | |
| "loss": 1.9244, | |
| "step": 369000 | |
| }, | |
| { | |
| "epoch": 1.82, | |
| "learning_rate": 1.9609456879926142e-05, | |
| "loss": 1.9159, | |
| "step": 370000 | |
| }, | |
| { | |
| "epoch": 1.82, | |
| "eval_gen_len": 18.99953387197017, | |
| "eval_loss": 1.891238808631897, | |
| "eval_rouge1": 19.1833, | |
| "eval_rouge2": 7.8267, | |
| "eval_rougeL": 15.3175, | |
| "eval_rougeLsum": 17.4918, | |
| "eval_runtime": 1523.1153, | |
| "eval_samples_per_second": 4.226, | |
| "eval_steps_per_second": 4.226, | |
| "step": 370000 | |
| }, | |
| { | |
| "epoch": 1.83, | |
| "learning_rate": 1.9527320276898916e-05, | |
| "loss": 1.8884, | |
| "step": 371000 | |
| }, | |
| { | |
| "epoch": 1.83, | |
| "learning_rate": 1.944518367387169e-05, | |
| "loss": 1.9227, | |
| "step": 372000 | |
| }, | |
| { | |
| "epoch": 1.84, | |
| "learning_rate": 1.9363047070844463e-05, | |
| "loss": 1.9329, | |
| "step": 373000 | |
| }, | |
| { | |
| "epoch": 1.84, | |
| "learning_rate": 1.9280910467817236e-05, | |
| "loss": 1.9357, | |
| "step": 374000 | |
| }, | |
| { | |
| "epoch": 1.85, | |
| "learning_rate": 1.919877386479001e-05, | |
| "loss": 1.9376, | |
| "step": 375000 | |
| }, | |
| { | |
| "epoch": 1.85, | |
| "learning_rate": 1.9116637261762783e-05, | |
| "loss": 1.9475, | |
| "step": 376000 | |
| }, | |
| { | |
| "epoch": 1.86, | |
| "learning_rate": 1.9034500658735557e-05, | |
| "loss": 1.9127, | |
| "step": 377000 | |
| }, | |
| { | |
| "epoch": 1.86, | |
| "learning_rate": 1.895236405570833e-05, | |
| "loss": 1.9409, | |
| "step": 378000 | |
| }, | |
| { | |
| "epoch": 1.87, | |
| "learning_rate": 1.8870227452681104e-05, | |
| "loss": 1.9132, | |
| "step": 379000 | |
| }, | |
| { | |
| "epoch": 1.87, | |
| "learning_rate": 1.8788090849653877e-05, | |
| "loss": 1.9516, | |
| "step": 380000 | |
| }, | |
| { | |
| "epoch": 1.87, | |
| "eval_gen_len": 19.0, | |
| "eval_loss": 1.8856322765350342, | |
| "eval_rouge1": 19.3077, | |
| "eval_rouge2": 7.7432, | |
| "eval_rougeL": 15.3723, | |
| "eval_rougeLsum": 17.6115, | |
| "eval_runtime": 1522.416, | |
| "eval_samples_per_second": 4.227, | |
| "eval_steps_per_second": 4.227, | |
| "step": 380000 | |
| }, | |
| { | |
| "epoch": 1.88, | |
| "learning_rate": 1.870595424662665e-05, | |
| "loss": 1.9165, | |
| "step": 381000 | |
| }, | |
| { | |
| "epoch": 1.88, | |
| "learning_rate": 1.8623817643599424e-05, | |
| "loss": 1.9477, | |
| "step": 382000 | |
| }, | |
| { | |
| "epoch": 1.89, | |
| "learning_rate": 1.8541681040572198e-05, | |
| "loss": 1.9414, | |
| "step": 383000 | |
| }, | |
| { | |
| "epoch": 1.89, | |
| "learning_rate": 1.845954443754497e-05, | |
| "loss": 1.9261, | |
| "step": 384000 | |
| }, | |
| { | |
| "epoch": 1.9, | |
| "learning_rate": 1.8377407834517744e-05, | |
| "loss": 1.9134, | |
| "step": 385000 | |
| }, | |
| { | |
| "epoch": 1.9, | |
| "learning_rate": 1.8295271231490518e-05, | |
| "loss": 1.9261, | |
| "step": 386000 | |
| }, | |
| { | |
| "epoch": 1.91, | |
| "learning_rate": 1.821313462846329e-05, | |
| "loss": 1.9378, | |
| "step": 387000 | |
| }, | |
| { | |
| "epoch": 1.91, | |
| "learning_rate": 1.8130998025436065e-05, | |
| "loss": 1.9315, | |
| "step": 388000 | |
| }, | |
| { | |
| "epoch": 1.92, | |
| "learning_rate": 1.804886142240884e-05, | |
| "loss": 1.9237, | |
| "step": 389000 | |
| }, | |
| { | |
| "epoch": 1.92, | |
| "learning_rate": 1.7966724819381612e-05, | |
| "loss": 1.9218, | |
| "step": 390000 | |
| }, | |
| { | |
| "epoch": 1.92, | |
| "eval_gen_len": 18.999378495960222, | |
| "eval_loss": 1.8880378007888794, | |
| "eval_rouge1": 19.2668, | |
| "eval_rouge2": 7.8231, | |
| "eval_rougeL": 15.3834, | |
| "eval_rougeLsum": 17.5701, | |
| "eval_runtime": 1523.1133, | |
| "eval_samples_per_second": 4.226, | |
| "eval_steps_per_second": 4.226, | |
| "step": 390000 | |
| }, | |
| { | |
| "epoch": 1.93, | |
| "learning_rate": 1.7884588216354385e-05, | |
| "loss": 1.9352, | |
| "step": 391000 | |
| }, | |
| { | |
| "epoch": 1.93, | |
| "learning_rate": 1.7802451613327155e-05, | |
| "loss": 1.9194, | |
| "step": 392000 | |
| }, | |
| { | |
| "epoch": 1.94, | |
| "learning_rate": 1.7720315010299932e-05, | |
| "loss": 1.9406, | |
| "step": 393000 | |
| }, | |
| { | |
| "epoch": 1.94, | |
| "learning_rate": 1.7638178407272706e-05, | |
| "loss": 1.8828, | |
| "step": 394000 | |
| }, | |
| { | |
| "epoch": 1.95, | |
| "learning_rate": 1.7556041804245476e-05, | |
| "loss": 1.9586, | |
| "step": 395000 | |
| }, | |
| { | |
| "epoch": 1.95, | |
| "learning_rate": 1.7473905201218253e-05, | |
| "loss": 1.9326, | |
| "step": 396000 | |
| }, | |
| { | |
| "epoch": 1.96, | |
| "learning_rate": 1.7391768598191023e-05, | |
| "loss": 1.9263, | |
| "step": 397000 | |
| }, | |
| { | |
| "epoch": 1.96, | |
| "learning_rate": 1.7309631995163796e-05, | |
| "loss": 1.9454, | |
| "step": 398000 | |
| }, | |
| { | |
| "epoch": 1.97, | |
| "learning_rate": 1.7227495392136573e-05, | |
| "loss": 1.9489, | |
| "step": 399000 | |
| }, | |
| { | |
| "epoch": 1.97, | |
| "learning_rate": 1.7145358789109343e-05, | |
| "loss": 1.9159, | |
| "step": 400000 | |
| }, | |
| { | |
| "epoch": 1.97, | |
| "eval_gen_len": 18.99968924798011, | |
| "eval_loss": 1.8860150575637817, | |
| "eval_rouge1": 19.2224, | |
| "eval_rouge2": 7.7903, | |
| "eval_rougeL": 15.3488, | |
| "eval_rougeLsum": 17.4992, | |
| "eval_runtime": 1523.0673, | |
| "eval_samples_per_second": 4.226, | |
| "eval_steps_per_second": 4.226, | |
| "step": 400000 | |
| }, | |
| { | |
| "epoch": 1.98, | |
| "learning_rate": 1.7063222186082117e-05, | |
| "loss": 1.9111, | |
| "step": 401000 | |
| }, | |
| { | |
| "epoch": 1.98, | |
| "learning_rate": 1.698108558305489e-05, | |
| "loss": 1.945, | |
| "step": 402000 | |
| }, | |
| { | |
| "epoch": 1.99, | |
| "learning_rate": 1.6898948980027664e-05, | |
| "loss": 1.9172, | |
| "step": 403000 | |
| }, | |
| { | |
| "epoch": 1.99, | |
| "learning_rate": 1.6816812377000437e-05, | |
| "loss": 1.922, | |
| "step": 404000 | |
| }, | |
| { | |
| "epoch": 2.0, | |
| "learning_rate": 1.673467577397321e-05, | |
| "loss": 1.9482, | |
| "step": 405000 | |
| }, | |
| { | |
| "epoch": 2.0, | |
| "learning_rate": 1.6652539170945984e-05, | |
| "loss": 1.9079, | |
| "step": 406000 | |
| }, | |
| { | |
| "epoch": 2.01, | |
| "learning_rate": 1.6570402567918757e-05, | |
| "loss": 1.8751, | |
| "step": 407000 | |
| }, | |
| { | |
| "epoch": 2.01, | |
| "learning_rate": 1.648826596489153e-05, | |
| "loss": 1.8752, | |
| "step": 408000 | |
| }, | |
| { | |
| "epoch": 2.02, | |
| "learning_rate": 1.6406129361864304e-05, | |
| "loss": 1.8631, | |
| "step": 409000 | |
| }, | |
| { | |
| "epoch": 2.02, | |
| "learning_rate": 1.6323992758837078e-05, | |
| "loss": 1.8741, | |
| "step": 410000 | |
| }, | |
| { | |
| "epoch": 2.02, | |
| "eval_gen_len": 19.0, | |
| "eval_loss": 1.8853929042816162, | |
| "eval_rouge1": 19.2572, | |
| "eval_rouge2": 7.741, | |
| "eval_rougeL": 15.3405, | |
| "eval_rougeLsum": 17.5351, | |
| "eval_runtime": 1525.4271, | |
| "eval_samples_per_second": 4.219, | |
| "eval_steps_per_second": 4.219, | |
| "step": 410000 | |
| }, | |
| { | |
| "epoch": 2.03, | |
| "learning_rate": 1.624185615580985e-05, | |
| "loss": 1.894, | |
| "step": 411000 | |
| }, | |
| { | |
| "epoch": 2.03, | |
| "learning_rate": 1.6159719552782625e-05, | |
| "loss": 1.8928, | |
| "step": 412000 | |
| }, | |
| { | |
| "epoch": 2.04, | |
| "learning_rate": 1.6077582949755398e-05, | |
| "loss": 1.8793, | |
| "step": 413000 | |
| }, | |
| { | |
| "epoch": 2.04, | |
| "learning_rate": 1.5995446346728172e-05, | |
| "loss": 1.8638, | |
| "step": 414000 | |
| }, | |
| { | |
| "epoch": 2.05, | |
| "learning_rate": 1.5913309743700945e-05, | |
| "loss": 1.8688, | |
| "step": 415000 | |
| }, | |
| { | |
| "epoch": 2.05, | |
| "learning_rate": 1.583117314067372e-05, | |
| "loss": 1.8954, | |
| "step": 416000 | |
| }, | |
| { | |
| "epoch": 2.06, | |
| "learning_rate": 1.5749036537646492e-05, | |
| "loss": 1.8855, | |
| "step": 417000 | |
| }, | |
| { | |
| "epoch": 2.06, | |
| "learning_rate": 1.5666899934619266e-05, | |
| "loss": 1.878, | |
| "step": 418000 | |
| }, | |
| { | |
| "epoch": 2.06, | |
| "learning_rate": 1.558476333159204e-05, | |
| "loss": 1.8841, | |
| "step": 419000 | |
| }, | |
| { | |
| "epoch": 2.07, | |
| "learning_rate": 1.5502626728564813e-05, | |
| "loss": 1.8668, | |
| "step": 420000 | |
| }, | |
| { | |
| "epoch": 2.07, | |
| "eval_gen_len": 18.99953387197017, | |
| "eval_loss": 1.8854323625564575, | |
| "eval_rouge1": 19.3658, | |
| "eval_rouge2": 7.8593, | |
| "eval_rougeL": 15.4418, | |
| "eval_rougeLsum": 17.656, | |
| "eval_runtime": 1525.8366, | |
| "eval_samples_per_second": 4.218, | |
| "eval_steps_per_second": 4.218, | |
| "step": 420000 | |
| }, | |
| { | |
| "epoch": 2.07, | |
| "learning_rate": 1.5420490125537586e-05, | |
| "loss": 1.8813, | |
| "step": 421000 | |
| }, | |
| { | |
| "epoch": 2.08, | |
| "learning_rate": 1.5338353522510356e-05, | |
| "loss": 1.8512, | |
| "step": 422000 | |
| }, | |
| { | |
| "epoch": 2.08, | |
| "learning_rate": 1.5256216919483133e-05, | |
| "loss": 1.8336, | |
| "step": 423000 | |
| }, | |
| { | |
| "epoch": 2.09, | |
| "learning_rate": 1.5174080316455905e-05, | |
| "loss": 1.8732, | |
| "step": 424000 | |
| }, | |
| { | |
| "epoch": 2.09, | |
| "learning_rate": 1.5091943713428677e-05, | |
| "loss": 1.8875, | |
| "step": 425000 | |
| }, | |
| { | |
| "epoch": 2.1, | |
| "learning_rate": 1.5009807110401453e-05, | |
| "loss": 1.8944, | |
| "step": 426000 | |
| }, | |
| { | |
| "epoch": 2.1, | |
| "learning_rate": 1.4927670507374225e-05, | |
| "loss": 1.8778, | |
| "step": 427000 | |
| }, | |
| { | |
| "epoch": 2.11, | |
| "learning_rate": 1.4845533904346997e-05, | |
| "loss": 1.8787, | |
| "step": 428000 | |
| }, | |
| { | |
| "epoch": 2.11, | |
| "learning_rate": 1.4763397301319772e-05, | |
| "loss": 1.8692, | |
| "step": 429000 | |
| }, | |
| { | |
| "epoch": 2.12, | |
| "learning_rate": 1.4681260698292546e-05, | |
| "loss": 1.8638, | |
| "step": 430000 | |
| }, | |
| { | |
| "epoch": 2.12, | |
| "eval_gen_len": 18.99968924798011, | |
| "eval_loss": 1.8830713033676147, | |
| "eval_rouge1": 19.305, | |
| "eval_rouge2": 7.8218, | |
| "eval_rougeL": 15.3843, | |
| "eval_rougeLsum": 17.5861, | |
| "eval_runtime": 1521.2349, | |
| "eval_samples_per_second": 4.231, | |
| "eval_steps_per_second": 4.231, | |
| "step": 430000 | |
| }, | |
| { | |
| "epoch": 2.12, | |
| "learning_rate": 1.4599124095265317e-05, | |
| "loss": 1.8725, | |
| "step": 431000 | |
| }, | |
| { | |
| "epoch": 2.13, | |
| "learning_rate": 1.451698749223809e-05, | |
| "loss": 1.8709, | |
| "step": 432000 | |
| }, | |
| { | |
| "epoch": 2.13, | |
| "learning_rate": 1.4434850889210866e-05, | |
| "loss": 1.8559, | |
| "step": 433000 | |
| }, | |
| { | |
| "epoch": 2.14, | |
| "learning_rate": 1.4352714286183638e-05, | |
| "loss": 1.8811, | |
| "step": 434000 | |
| }, | |
| { | |
| "epoch": 2.14, | |
| "learning_rate": 1.4270577683156411e-05, | |
| "loss": 1.881, | |
| "step": 435000 | |
| }, | |
| { | |
| "epoch": 2.15, | |
| "learning_rate": 1.4188441080129186e-05, | |
| "loss": 1.908, | |
| "step": 436000 | |
| }, | |
| { | |
| "epoch": 2.15, | |
| "learning_rate": 1.4106304477101958e-05, | |
| "loss": 1.8907, | |
| "step": 437000 | |
| }, | |
| { | |
| "epoch": 2.16, | |
| "learning_rate": 1.4024167874074732e-05, | |
| "loss": 1.8883, | |
| "step": 438000 | |
| }, | |
| { | |
| "epoch": 2.16, | |
| "learning_rate": 1.3942031271047507e-05, | |
| "loss": 1.8982, | |
| "step": 439000 | |
| }, | |
| { | |
| "epoch": 2.17, | |
| "learning_rate": 1.3859894668020279e-05, | |
| "loss": 1.8334, | |
| "step": 440000 | |
| }, | |
| { | |
| "epoch": 2.17, | |
| "eval_gen_len": 18.999378495960222, | |
| "eval_loss": 1.8817341327667236, | |
| "eval_rouge1": 19.3269, | |
| "eval_rouge2": 7.8249, | |
| "eval_rougeL": 15.4231, | |
| "eval_rougeLsum": 17.5958, | |
| "eval_runtime": 1524.3688, | |
| "eval_samples_per_second": 4.222, | |
| "eval_steps_per_second": 4.222, | |
| "step": 440000 | |
| }, | |
| { | |
| "epoch": 2.17, | |
| "learning_rate": 1.3777758064993052e-05, | |
| "loss": 1.8756, | |
| "step": 441000 | |
| }, | |
| { | |
| "epoch": 2.18, | |
| "learning_rate": 1.3695621461965824e-05, | |
| "loss": 1.9022, | |
| "step": 442000 | |
| }, | |
| { | |
| "epoch": 2.18, | |
| "learning_rate": 1.3613484858938599e-05, | |
| "loss": 1.8977, | |
| "step": 443000 | |
| }, | |
| { | |
| "epoch": 2.19, | |
| "learning_rate": 1.3531348255911372e-05, | |
| "loss": 1.8735, | |
| "step": 444000 | |
| }, | |
| { | |
| "epoch": 2.19, | |
| "learning_rate": 1.3449211652884144e-05, | |
| "loss": 1.8932, | |
| "step": 445000 | |
| }, | |
| { | |
| "epoch": 2.2, | |
| "learning_rate": 1.336707504985692e-05, | |
| "loss": 1.8564, | |
| "step": 446000 | |
| }, | |
| { | |
| "epoch": 2.2, | |
| "learning_rate": 1.3284938446829693e-05, | |
| "loss": 1.8739, | |
| "step": 447000 | |
| }, | |
| { | |
| "epoch": 2.21, | |
| "learning_rate": 1.3202801843802465e-05, | |
| "loss": 1.8806, | |
| "step": 448000 | |
| }, | |
| { | |
| "epoch": 2.21, | |
| "learning_rate": 1.312066524077524e-05, | |
| "loss": 1.88, | |
| "step": 449000 | |
| }, | |
| { | |
| "epoch": 2.22, | |
| "learning_rate": 1.3038528637748013e-05, | |
| "loss": 1.8893, | |
| "step": 450000 | |
| }, | |
| { | |
| "epoch": 2.22, | |
| "eval_gen_len": 18.99968924798011, | |
| "eval_loss": 1.880333423614502, | |
| "eval_rouge1": 19.2949, | |
| "eval_rouge2": 7.7885, | |
| "eval_rougeL": 15.3947, | |
| "eval_rougeLsum": 17.585, | |
| "eval_runtime": 1522.1741, | |
| "eval_samples_per_second": 4.228, | |
| "eval_steps_per_second": 4.228, | |
| "step": 450000 | |
| }, | |
| { | |
| "epoch": 2.22, | |
| "learning_rate": 1.2956392034720785e-05, | |
| "loss": 1.9098, | |
| "step": 451000 | |
| }, | |
| { | |
| "epoch": 2.23, | |
| "learning_rate": 1.2874255431693557e-05, | |
| "loss": 1.8729, | |
| "step": 452000 | |
| }, | |
| { | |
| "epoch": 2.23, | |
| "learning_rate": 1.2792118828666334e-05, | |
| "loss": 1.8631, | |
| "step": 453000 | |
| }, | |
| { | |
| "epoch": 2.24, | |
| "learning_rate": 1.2709982225639105e-05, | |
| "loss": 1.8496, | |
| "step": 454000 | |
| }, | |
| { | |
| "epoch": 2.24, | |
| "learning_rate": 1.2627845622611877e-05, | |
| "loss": 1.904, | |
| "step": 455000 | |
| }, | |
| { | |
| "epoch": 2.25, | |
| "learning_rate": 1.2545709019584654e-05, | |
| "loss": 1.846, | |
| "step": 456000 | |
| }, | |
| { | |
| "epoch": 2.25, | |
| "learning_rate": 1.2463572416557426e-05, | |
| "loss": 1.873, | |
| "step": 457000 | |
| }, | |
| { | |
| "epoch": 2.26, | |
| "learning_rate": 1.2381435813530198e-05, | |
| "loss": 1.8765, | |
| "step": 458000 | |
| }, | |
| { | |
| "epoch": 2.26, | |
| "learning_rate": 1.2299299210502973e-05, | |
| "loss": 1.8701, | |
| "step": 459000 | |
| }, | |
| { | |
| "epoch": 2.27, | |
| "learning_rate": 1.2217162607475746e-05, | |
| "loss": 1.8929, | |
| "step": 460000 | |
| }, | |
| { | |
| "epoch": 2.27, | |
| "eval_gen_len": 18.99968924798011, | |
| "eval_loss": 1.8782871961593628, | |
| "eval_rouge1": 19.291, | |
| "eval_rouge2": 7.8346, | |
| "eval_rougeL": 15.428, | |
| "eval_rougeLsum": 17.5797, | |
| "eval_runtime": 1523.9631, | |
| "eval_samples_per_second": 4.223, | |
| "eval_steps_per_second": 4.223, | |
| "step": 460000 | |
| }, | |
| { | |
| "epoch": 2.27, | |
| "learning_rate": 1.2135026004448518e-05, | |
| "loss": 1.8712, | |
| "step": 461000 | |
| }, | |
| { | |
| "epoch": 2.28, | |
| "learning_rate": 1.2052889401421293e-05, | |
| "loss": 1.8656, | |
| "step": 462000 | |
| }, | |
| { | |
| "epoch": 2.28, | |
| "learning_rate": 1.1970752798394065e-05, | |
| "loss": 1.8611, | |
| "step": 463000 | |
| }, | |
| { | |
| "epoch": 2.29, | |
| "learning_rate": 1.1888616195366839e-05, | |
| "loss": 1.8746, | |
| "step": 464000 | |
| }, | |
| { | |
| "epoch": 2.29, | |
| "learning_rate": 1.1806479592339614e-05, | |
| "loss": 1.8663, | |
| "step": 465000 | |
| }, | |
| { | |
| "epoch": 2.3, | |
| "learning_rate": 1.1724342989312385e-05, | |
| "loss": 1.8654, | |
| "step": 466000 | |
| }, | |
| { | |
| "epoch": 2.3, | |
| "learning_rate": 1.1642206386285159e-05, | |
| "loss": 1.8758, | |
| "step": 467000 | |
| }, | |
| { | |
| "epoch": 2.31, | |
| "learning_rate": 1.1560069783257932e-05, | |
| "loss": 1.8621, | |
| "step": 468000 | |
| }, | |
| { | |
| "epoch": 2.31, | |
| "learning_rate": 1.1477933180230706e-05, | |
| "loss": 1.8706, | |
| "step": 469000 | |
| }, | |
| { | |
| "epoch": 2.32, | |
| "learning_rate": 1.139579657720348e-05, | |
| "loss": 1.861, | |
| "step": 470000 | |
| }, | |
| { | |
| "epoch": 2.32, | |
| "eval_gen_len": 18.99968924798011, | |
| "eval_loss": 1.8765554428100586, | |
| "eval_rouge1": 19.4284, | |
| "eval_rouge2": 7.8832, | |
| "eval_rougeL": 15.4746, | |
| "eval_rougeLsum": 17.6946, | |
| "eval_runtime": 1527.0295, | |
| "eval_samples_per_second": 4.215, | |
| "eval_steps_per_second": 4.215, | |
| "step": 470000 | |
| }, | |
| { | |
| "epoch": 2.32, | |
| "learning_rate": 1.1313659974176253e-05, | |
| "loss": 1.8747, | |
| "step": 471000 | |
| }, | |
| { | |
| "epoch": 2.33, | |
| "learning_rate": 1.1231523371149026e-05, | |
| "loss": 1.8785, | |
| "step": 472000 | |
| }, | |
| { | |
| "epoch": 2.33, | |
| "learning_rate": 1.1149386768121798e-05, | |
| "loss": 1.8707, | |
| "step": 473000 | |
| }, | |
| { | |
| "epoch": 2.34, | |
| "learning_rate": 1.1067250165094573e-05, | |
| "loss": 1.8429, | |
| "step": 474000 | |
| }, | |
| { | |
| "epoch": 2.34, | |
| "learning_rate": 1.0985113562067347e-05, | |
| "loss": 1.868, | |
| "step": 475000 | |
| }, | |
| { | |
| "epoch": 2.35, | |
| "learning_rate": 1.0902976959040118e-05, | |
| "loss": 1.8845, | |
| "step": 476000 | |
| }, | |
| { | |
| "epoch": 2.35, | |
| "learning_rate": 1.0820840356012894e-05, | |
| "loss": 1.8619, | |
| "step": 477000 | |
| }, | |
| { | |
| "epoch": 2.36, | |
| "learning_rate": 1.0738703752985665e-05, | |
| "loss": 1.8816, | |
| "step": 478000 | |
| }, | |
| { | |
| "epoch": 2.36, | |
| "learning_rate": 1.0656567149958439e-05, | |
| "loss": 1.853, | |
| "step": 479000 | |
| }, | |
| { | |
| "epoch": 2.37, | |
| "learning_rate": 1.0574430546931214e-05, | |
| "loss": 1.8719, | |
| "step": 480000 | |
| }, | |
| { | |
| "epoch": 2.37, | |
| "eval_gen_len": 18.999844623990057, | |
| "eval_loss": 1.8751263618469238, | |
| "eval_rouge1": 19.1525, | |
| "eval_rouge2": 7.7641, | |
| "eval_rougeL": 15.3348, | |
| "eval_rougeLsum": 17.47, | |
| "eval_runtime": 1525.1874, | |
| "eval_samples_per_second": 4.22, | |
| "eval_steps_per_second": 4.22, | |
| "step": 480000 | |
| }, | |
| { | |
| "epoch": 2.37, | |
| "learning_rate": 1.0492293943903986e-05, | |
| "loss": 1.8718, | |
| "step": 481000 | |
| }, | |
| { | |
| "epoch": 2.38, | |
| "learning_rate": 1.041015734087676e-05, | |
| "loss": 1.8682, | |
| "step": 482000 | |
| }, | |
| { | |
| "epoch": 2.38, | |
| "learning_rate": 1.0328020737849533e-05, | |
| "loss": 1.8624, | |
| "step": 483000 | |
| }, | |
| { | |
| "epoch": 2.39, | |
| "learning_rate": 1.0245884134822306e-05, | |
| "loss": 1.8749, | |
| "step": 484000 | |
| }, | |
| { | |
| "epoch": 2.39, | |
| "learning_rate": 1.016374753179508e-05, | |
| "loss": 1.8722, | |
| "step": 485000 | |
| }, | |
| { | |
| "epoch": 2.4, | |
| "learning_rate": 1.0081610928767853e-05, | |
| "loss": 1.8726, | |
| "step": 486000 | |
| }, | |
| { | |
| "epoch": 2.4, | |
| "learning_rate": 9.999474325740627e-06, | |
| "loss": 1.8983, | |
| "step": 487000 | |
| }, | |
| { | |
| "epoch": 2.4, | |
| "learning_rate": 9.917337722713398e-06, | |
| "loss": 1.8685, | |
| "step": 488000 | |
| }, | |
| { | |
| "epoch": 2.41, | |
| "learning_rate": 9.835201119686174e-06, | |
| "loss": 1.8877, | |
| "step": 489000 | |
| }, | |
| { | |
| "epoch": 2.41, | |
| "learning_rate": 9.753064516658947e-06, | |
| "loss": 1.8889, | |
| "step": 490000 | |
| }, | |
| { | |
| "epoch": 2.41, | |
| "eval_gen_len": 18.999844623990057, | |
| "eval_loss": 1.8741868734359741, | |
| "eval_rouge1": 19.1743, | |
| "eval_rouge2": 7.768, | |
| "eval_rougeL": 15.3292, | |
| "eval_rougeLsum": 17.4665, | |
| "eval_runtime": 1525.0328, | |
| "eval_samples_per_second": 4.22, | |
| "eval_steps_per_second": 4.22, | |
| "step": 490000 | |
| }, | |
| { | |
| "epoch": 2.42, | |
| "learning_rate": 9.670927913631719e-06, | |
| "loss": 1.8785, | |
| "step": 491000 | |
| }, | |
| { | |
| "epoch": 2.42, | |
| "learning_rate": 9.588791310604494e-06, | |
| "loss": 1.8707, | |
| "step": 492000 | |
| }, | |
| { | |
| "epoch": 2.43, | |
| "learning_rate": 9.506654707577266e-06, | |
| "loss": 1.864, | |
| "step": 493000 | |
| }, | |
| { | |
| "epoch": 2.43, | |
| "learning_rate": 9.42451810455004e-06, | |
| "loss": 1.8999, | |
| "step": 494000 | |
| }, | |
| { | |
| "epoch": 2.44, | |
| "learning_rate": 9.342381501522814e-06, | |
| "loss": 1.8596, | |
| "step": 495000 | |
| }, | |
| { | |
| "epoch": 2.44, | |
| "learning_rate": 9.260244898495586e-06, | |
| "loss": 1.8775, | |
| "step": 496000 | |
| }, | |
| { | |
| "epoch": 2.45, | |
| "learning_rate": 9.17810829546836e-06, | |
| "loss": 1.8408, | |
| "step": 497000 | |
| }, | |
| { | |
| "epoch": 2.45, | |
| "learning_rate": 9.095971692441133e-06, | |
| "loss": 1.8536, | |
| "step": 498000 | |
| }, | |
| { | |
| "epoch": 2.46, | |
| "learning_rate": 9.013835089413907e-06, | |
| "loss": 1.8566, | |
| "step": 499000 | |
| }, | |
| { | |
| "epoch": 2.46, | |
| "learning_rate": 8.93169848638668e-06, | |
| "loss": 1.8834, | |
| "step": 500000 | |
| }, | |
| { | |
| "epoch": 2.46, | |
| "eval_gen_len": 18.999844623990057, | |
| "eval_loss": 1.8723037242889404, | |
| "eval_rouge1": 19.3069, | |
| "eval_rouge2": 7.7935, | |
| "eval_rougeL": 15.3987, | |
| "eval_rougeLsum": 17.5913, | |
| "eval_runtime": 1524.9886, | |
| "eval_samples_per_second": 4.22, | |
| "eval_steps_per_second": 4.22, | |
| "step": 500000 | |
| }, | |
| { | |
| "epoch": 2.47, | |
| "learning_rate": 8.849561883359454e-06, | |
| "loss": 1.8751, | |
| "step": 501000 | |
| }, | |
| { | |
| "epoch": 2.47, | |
| "learning_rate": 8.767425280332227e-06, | |
| "loss": 1.8635, | |
| "step": 502000 | |
| }, | |
| { | |
| "epoch": 2.48, | |
| "learning_rate": 8.685288677304999e-06, | |
| "loss": 1.8827, | |
| "step": 503000 | |
| }, | |
| { | |
| "epoch": 2.48, | |
| "learning_rate": 8.603152074277774e-06, | |
| "loss": 1.8651, | |
| "step": 504000 | |
| }, | |
| { | |
| "epoch": 2.49, | |
| "learning_rate": 8.521015471250547e-06, | |
| "loss": 1.8554, | |
| "step": 505000 | |
| }, | |
| { | |
| "epoch": 2.49, | |
| "learning_rate": 8.43887886822332e-06, | |
| "loss": 1.8786, | |
| "step": 506000 | |
| }, | |
| { | |
| "epoch": 2.5, | |
| "learning_rate": 8.356742265196094e-06, | |
| "loss": 1.8857, | |
| "step": 507000 | |
| }, | |
| { | |
| "epoch": 2.5, | |
| "learning_rate": 8.274605662168866e-06, | |
| "loss": 1.8659, | |
| "step": 508000 | |
| }, | |
| { | |
| "epoch": 2.51, | |
| "learning_rate": 8.19246905914164e-06, | |
| "loss": 1.8536, | |
| "step": 509000 | |
| }, | |
| { | |
| "epoch": 2.51, | |
| "learning_rate": 8.110332456114415e-06, | |
| "loss": 1.8564, | |
| "step": 510000 | |
| }, | |
| { | |
| "epoch": 2.51, | |
| "eval_gen_len": 19.0, | |
| "eval_loss": 1.869491457939148, | |
| "eval_rouge1": 19.3217, | |
| "eval_rouge2": 7.8292, | |
| "eval_rougeL": 15.4063, | |
| "eval_rougeLsum": 17.6081, | |
| "eval_runtime": 1522.6506, | |
| "eval_samples_per_second": 4.227, | |
| "eval_steps_per_second": 4.227, | |
| "step": 510000 | |
| }, | |
| { | |
| "epoch": 2.52, | |
| "learning_rate": 8.028195853087187e-06, | |
| "loss": 1.8666, | |
| "step": 511000 | |
| }, | |
| { | |
| "epoch": 2.52, | |
| "learning_rate": 7.94605925005996e-06, | |
| "loss": 1.8539, | |
| "step": 512000 | |
| }, | |
| { | |
| "epoch": 2.53, | |
| "learning_rate": 7.863922647032733e-06, | |
| "loss": 1.8671, | |
| "step": 513000 | |
| }, | |
| { | |
| "epoch": 2.53, | |
| "learning_rate": 7.781786044005507e-06, | |
| "loss": 1.8613, | |
| "step": 514000 | |
| }, | |
| { | |
| "epoch": 2.54, | |
| "learning_rate": 7.69964944097828e-06, | |
| "loss": 1.8565, | |
| "step": 515000 | |
| }, | |
| { | |
| "epoch": 2.54, | |
| "learning_rate": 7.617512837951053e-06, | |
| "loss": 1.8802, | |
| "step": 516000 | |
| }, | |
| { | |
| "epoch": 2.55, | |
| "learning_rate": 7.535376234923827e-06, | |
| "loss": 1.8819, | |
| "step": 517000 | |
| }, | |
| { | |
| "epoch": 2.55, | |
| "learning_rate": 7.4532396318966e-06, | |
| "loss": 1.8422, | |
| "step": 518000 | |
| }, | |
| { | |
| "epoch": 2.56, | |
| "learning_rate": 7.3711030288693734e-06, | |
| "loss": 1.8632, | |
| "step": 519000 | |
| }, | |
| { | |
| "epoch": 2.56, | |
| "learning_rate": 7.288966425842148e-06, | |
| "loss": 1.8706, | |
| "step": 520000 | |
| }, | |
| { | |
| "epoch": 2.56, | |
| "eval_gen_len": 18.999844623990057, | |
| "eval_loss": 1.8696507215499878, | |
| "eval_rouge1": 19.294, | |
| "eval_rouge2": 7.8217, | |
| "eval_rougeL": 15.3964, | |
| "eval_rougeLsum": 17.581, | |
| "eval_runtime": 1525.9573, | |
| "eval_samples_per_second": 4.218, | |
| "eval_steps_per_second": 4.218, | |
| "step": 520000 | |
| }, | |
| { | |
| "epoch": 2.57, | |
| "learning_rate": 7.20682982281492e-06, | |
| "loss": 1.8729, | |
| "step": 521000 | |
| }, | |
| { | |
| "epoch": 2.57, | |
| "learning_rate": 7.124693219787694e-06, | |
| "loss": 1.8797, | |
| "step": 522000 | |
| }, | |
| { | |
| "epoch": 2.58, | |
| "learning_rate": 7.0425566167604665e-06, | |
| "loss": 1.8958, | |
| "step": 523000 | |
| }, | |
| { | |
| "epoch": 2.58, | |
| "learning_rate": 6.96042001373324e-06, | |
| "loss": 1.8556, | |
| "step": 524000 | |
| }, | |
| { | |
| "epoch": 2.59, | |
| "learning_rate": 6.878283410706014e-06, | |
| "loss": 1.8432, | |
| "step": 525000 | |
| }, | |
| { | |
| "epoch": 2.59, | |
| "learning_rate": 6.796146807678787e-06, | |
| "loss": 1.8349, | |
| "step": 526000 | |
| }, | |
| { | |
| "epoch": 2.6, | |
| "learning_rate": 6.71401020465156e-06, | |
| "loss": 1.8624, | |
| "step": 527000 | |
| }, | |
| { | |
| "epoch": 2.6, | |
| "learning_rate": 6.631873601624333e-06, | |
| "loss": 1.8717, | |
| "step": 528000 | |
| }, | |
| { | |
| "epoch": 2.61, | |
| "learning_rate": 6.549736998597107e-06, | |
| "loss": 1.8695, | |
| "step": 529000 | |
| }, | |
| { | |
| "epoch": 2.61, | |
| "learning_rate": 6.467600395569881e-06, | |
| "loss": 1.883, | |
| "step": 530000 | |
| }, | |
| { | |
| "epoch": 2.61, | |
| "eval_gen_len": 18.99953387197017, | |
| "eval_loss": 1.870278000831604, | |
| "eval_rouge1": 19.2784, | |
| "eval_rouge2": 7.8634, | |
| "eval_rougeL": 15.404, | |
| "eval_rougeLsum": 17.5942, | |
| "eval_runtime": 1520.2984, | |
| "eval_samples_per_second": 4.233, | |
| "eval_steps_per_second": 4.233, | |
| "step": 530000 | |
| }, | |
| { | |
| "epoch": 2.62, | |
| "learning_rate": 6.385463792542653e-06, | |
| "loss": 1.8673, | |
| "step": 531000 | |
| }, | |
| { | |
| "epoch": 2.62, | |
| "learning_rate": 6.303327189515428e-06, | |
| "loss": 1.883, | |
| "step": 532000 | |
| }, | |
| { | |
| "epoch": 2.63, | |
| "learning_rate": 6.2211905864882e-06, | |
| "loss": 1.889, | |
| "step": 533000 | |
| }, | |
| { | |
| "epoch": 2.63, | |
| "learning_rate": 6.139053983460974e-06, | |
| "loss": 1.8596, | |
| "step": 534000 | |
| }, | |
| { | |
| "epoch": 2.64, | |
| "learning_rate": 6.056917380433747e-06, | |
| "loss": 1.8711, | |
| "step": 535000 | |
| }, | |
| { | |
| "epoch": 2.64, | |
| "learning_rate": 5.974780777406521e-06, | |
| "loss": 1.8736, | |
| "step": 536000 | |
| }, | |
| { | |
| "epoch": 2.65, | |
| "learning_rate": 5.892644174379294e-06, | |
| "loss": 1.8853, | |
| "step": 537000 | |
| }, | |
| { | |
| "epoch": 2.65, | |
| "learning_rate": 5.810507571352068e-06, | |
| "loss": 1.8678, | |
| "step": 538000 | |
| }, | |
| { | |
| "epoch": 2.66, | |
| "learning_rate": 5.72837096832484e-06, | |
| "loss": 1.8685, | |
| "step": 539000 | |
| }, | |
| { | |
| "epoch": 2.66, | |
| "learning_rate": 5.646234365297614e-06, | |
| "loss": 1.8622, | |
| "step": 540000 | |
| }, | |
| { | |
| "epoch": 2.66, | |
| "eval_gen_len": 18.998756991920448, | |
| "eval_loss": 1.867693543434143, | |
| "eval_rouge1": 19.3165, | |
| "eval_rouge2": 7.8378, | |
| "eval_rougeL": 15.4259, | |
| "eval_rougeLsum": 17.6064, | |
| "eval_runtime": 1521.5883, | |
| "eval_samples_per_second": 4.23, | |
| "eval_steps_per_second": 4.23, | |
| "step": 540000 | |
| }, | |
| { | |
| "epoch": 2.67, | |
| "learning_rate": 5.564097762270387e-06, | |
| "loss": 1.8691, | |
| "step": 541000 | |
| }, | |
| { | |
| "epoch": 2.67, | |
| "learning_rate": 5.481961159243161e-06, | |
| "loss": 1.8412, | |
| "step": 542000 | |
| }, | |
| { | |
| "epoch": 2.68, | |
| "learning_rate": 5.399824556215934e-06, | |
| "loss": 1.8664, | |
| "step": 543000 | |
| }, | |
| { | |
| "epoch": 2.68, | |
| "learning_rate": 5.317687953188708e-06, | |
| "loss": 1.8751, | |
| "step": 544000 | |
| }, | |
| { | |
| "epoch": 2.69, | |
| "learning_rate": 5.23555135016148e-06, | |
| "loss": 1.8644, | |
| "step": 545000 | |
| }, | |
| { | |
| "epoch": 2.69, | |
| "learning_rate": 5.153414747134254e-06, | |
| "loss": 1.8644, | |
| "step": 546000 | |
| }, | |
| { | |
| "epoch": 2.7, | |
| "learning_rate": 5.071278144107028e-06, | |
| "loss": 1.8272, | |
| "step": 547000 | |
| }, | |
| { | |
| "epoch": 2.7, | |
| "learning_rate": 4.989141541079801e-06, | |
| "loss": 1.8476, | |
| "step": 548000 | |
| }, | |
| { | |
| "epoch": 2.71, | |
| "learning_rate": 4.907004938052574e-06, | |
| "loss": 1.8451, | |
| "step": 549000 | |
| }, | |
| { | |
| "epoch": 2.71, | |
| "learning_rate": 4.824868335025348e-06, | |
| "loss": 1.8781, | |
| "step": 550000 | |
| }, | |
| { | |
| "epoch": 2.71, | |
| "eval_gen_len": 19.0, | |
| "eval_loss": 1.8676103353500366, | |
| "eval_rouge1": 19.3237, | |
| "eval_rouge2": 7.7954, | |
| "eval_rougeL": 15.3995, | |
| "eval_rougeLsum": 17.6008, | |
| "eval_runtime": 1523.5294, | |
| "eval_samples_per_second": 4.224, | |
| "eval_steps_per_second": 4.224, | |
| "step": 550000 | |
| }, | |
| { | |
| "epoch": 2.72, | |
| "learning_rate": 4.742731731998121e-06, | |
| "loss": 1.8494, | |
| "step": 551000 | |
| }, | |
| { | |
| "epoch": 2.72, | |
| "learning_rate": 4.660595128970895e-06, | |
| "loss": 1.88, | |
| "step": 552000 | |
| }, | |
| { | |
| "epoch": 2.73, | |
| "learning_rate": 4.578458525943668e-06, | |
| "loss": 1.895, | |
| "step": 553000 | |
| }, | |
| { | |
| "epoch": 2.73, | |
| "learning_rate": 4.496321922916441e-06, | |
| "loss": 1.8533, | |
| "step": 554000 | |
| }, | |
| { | |
| "epoch": 2.74, | |
| "learning_rate": 4.414185319889214e-06, | |
| "loss": 1.8587, | |
| "step": 555000 | |
| }, | |
| { | |
| "epoch": 2.74, | |
| "learning_rate": 4.332048716861988e-06, | |
| "loss": 1.8437, | |
| "step": 556000 | |
| }, | |
| { | |
| "epoch": 2.75, | |
| "learning_rate": 4.249912113834761e-06, | |
| "loss": 1.8736, | |
| "step": 557000 | |
| }, | |
| { | |
| "epoch": 2.75, | |
| "learning_rate": 4.1677755108075346e-06, | |
| "loss": 1.8518, | |
| "step": 558000 | |
| }, | |
| { | |
| "epoch": 2.75, | |
| "learning_rate": 4.085638907780308e-06, | |
| "loss": 1.844, | |
| "step": 559000 | |
| }, | |
| { | |
| "epoch": 2.76, | |
| "learning_rate": 4.003502304753081e-06, | |
| "loss": 1.8793, | |
| "step": 560000 | |
| }, | |
| { | |
| "epoch": 2.76, | |
| "eval_gen_len": 18.99968924798011, | |
| "eval_loss": 1.8684626817703247, | |
| "eval_rouge1": 19.2141, | |
| "eval_rouge2": 7.7605, | |
| "eval_rougeL": 15.3345, | |
| "eval_rougeLsum": 17.5268, | |
| "eval_runtime": 1523.3316, | |
| "eval_samples_per_second": 4.225, | |
| "eval_steps_per_second": 4.225, | |
| "step": 560000 | |
| }, | |
| { | |
| "epoch": 2.76, | |
| "learning_rate": 3.921365701725854e-06, | |
| "loss": 1.8743, | |
| "step": 561000 | |
| }, | |
| { | |
| "epoch": 2.77, | |
| "learning_rate": 3.8392290986986284e-06, | |
| "loss": 1.8847, | |
| "step": 562000 | |
| }, | |
| { | |
| "epoch": 2.77, | |
| "learning_rate": 3.7570924956714015e-06, | |
| "loss": 1.8667, | |
| "step": 563000 | |
| }, | |
| { | |
| "epoch": 2.78, | |
| "learning_rate": 3.6749558926441745e-06, | |
| "loss": 1.8621, | |
| "step": 564000 | |
| }, | |
| { | |
| "epoch": 2.78, | |
| "learning_rate": 3.592819289616948e-06, | |
| "loss": 1.8736, | |
| "step": 565000 | |
| }, | |
| { | |
| "epoch": 2.79, | |
| "learning_rate": 3.510682686589721e-06, | |
| "loss": 1.8665, | |
| "step": 566000 | |
| }, | |
| { | |
| "epoch": 2.79, | |
| "learning_rate": 3.428546083562495e-06, | |
| "loss": 1.8334, | |
| "step": 567000 | |
| }, | |
| { | |
| "epoch": 2.8, | |
| "learning_rate": 3.346409480535268e-06, | |
| "loss": 1.8538, | |
| "step": 568000 | |
| }, | |
| { | |
| "epoch": 2.8, | |
| "learning_rate": 3.2642728775080415e-06, | |
| "loss": 1.8867, | |
| "step": 569000 | |
| }, | |
| { | |
| "epoch": 2.81, | |
| "learning_rate": 3.1821362744808145e-06, | |
| "loss": 1.8795, | |
| "step": 570000 | |
| }, | |
| { | |
| "epoch": 2.81, | |
| "eval_gen_len": 19.0, | |
| "eval_loss": 1.8675283193588257, | |
| "eval_rouge1": 19.2694, | |
| "eval_rouge2": 7.8082, | |
| "eval_rougeL": 15.3996, | |
| "eval_rougeLsum": 17.5831, | |
| "eval_runtime": 1527.2295, | |
| "eval_samples_per_second": 4.214, | |
| "eval_steps_per_second": 4.214, | |
| "step": 570000 | |
| }, | |
| { | |
| "epoch": 2.81, | |
| "learning_rate": 3.099999671453588e-06, | |
| "loss": 1.8677, | |
| "step": 571000 | |
| }, | |
| { | |
| "epoch": 2.82, | |
| "learning_rate": 3.0178630684263615e-06, | |
| "loss": 1.8988, | |
| "step": 572000 | |
| }, | |
| { | |
| "epoch": 2.82, | |
| "learning_rate": 2.9357264653991345e-06, | |
| "loss": 1.855, | |
| "step": 573000 | |
| }, | |
| { | |
| "epoch": 2.83, | |
| "learning_rate": 2.853589862371908e-06, | |
| "loss": 1.8923, | |
| "step": 574000 | |
| }, | |
| { | |
| "epoch": 2.83, | |
| "learning_rate": 2.7714532593446814e-06, | |
| "loss": 1.8652, | |
| "step": 575000 | |
| }, | |
| { | |
| "epoch": 2.84, | |
| "learning_rate": 2.689316656317455e-06, | |
| "loss": 1.8533, | |
| "step": 576000 | |
| }, | |
| { | |
| "epoch": 2.84, | |
| "learning_rate": 2.607180053290228e-06, | |
| "loss": 1.856, | |
| "step": 577000 | |
| }, | |
| { | |
| "epoch": 2.85, | |
| "learning_rate": 2.5250434502630014e-06, | |
| "loss": 1.8816, | |
| "step": 578000 | |
| }, | |
| { | |
| "epoch": 2.85, | |
| "learning_rate": 2.442906847235775e-06, | |
| "loss": 1.8648, | |
| "step": 579000 | |
| }, | |
| { | |
| "epoch": 2.86, | |
| "learning_rate": 2.3607702442085484e-06, | |
| "loss": 1.8425, | |
| "step": 580000 | |
| }, | |
| { | |
| "epoch": 2.86, | |
| "eval_gen_len": 18.99968924798011, | |
| "eval_loss": 1.8659350872039795, | |
| "eval_rouge1": 19.2886, | |
| "eval_rouge2": 7.7987, | |
| "eval_rougeL": 15.4005, | |
| "eval_rougeLsum": 17.5859, | |
| "eval_runtime": 1522.679, | |
| "eval_samples_per_second": 4.227, | |
| "eval_steps_per_second": 4.227, | |
| "step": 580000 | |
| }, | |
| { | |
| "epoch": 2.86, | |
| "learning_rate": 2.278633641181322e-06, | |
| "loss": 1.857, | |
| "step": 581000 | |
| }, | |
| { | |
| "epoch": 2.87, | |
| "learning_rate": 2.196497038154095e-06, | |
| "loss": 1.8913, | |
| "step": 582000 | |
| }, | |
| { | |
| "epoch": 2.87, | |
| "learning_rate": 2.1143604351268684e-06, | |
| "loss": 1.8634, | |
| "step": 583000 | |
| }, | |
| { | |
| "epoch": 2.88, | |
| "learning_rate": 2.032223832099642e-06, | |
| "loss": 1.8572, | |
| "step": 584000 | |
| }, | |
| { | |
| "epoch": 2.88, | |
| "learning_rate": 1.950087229072415e-06, | |
| "loss": 1.8534, | |
| "step": 585000 | |
| }, | |
| { | |
| "epoch": 2.89, | |
| "learning_rate": 1.8679506260451886e-06, | |
| "loss": 1.8754, | |
| "step": 586000 | |
| }, | |
| { | |
| "epoch": 2.89, | |
| "learning_rate": 1.7858140230179618e-06, | |
| "loss": 1.8696, | |
| "step": 587000 | |
| }, | |
| { | |
| "epoch": 2.9, | |
| "learning_rate": 1.7036774199907349e-06, | |
| "loss": 1.8459, | |
| "step": 588000 | |
| }, | |
| { | |
| "epoch": 2.9, | |
| "learning_rate": 1.6215408169635085e-06, | |
| "loss": 1.8807, | |
| "step": 589000 | |
| }, | |
| { | |
| "epoch": 2.91, | |
| "learning_rate": 1.5394042139362818e-06, | |
| "loss": 1.8605, | |
| "step": 590000 | |
| }, | |
| { | |
| "epoch": 2.91, | |
| "eval_gen_len": 18.99968924798011, | |
| "eval_loss": 1.865015983581543, | |
| "eval_rouge1": 19.2778, | |
| "eval_rouge2": 7.7934, | |
| "eval_rougeL": 15.3931, | |
| "eval_rougeLsum": 17.5809, | |
| "eval_runtime": 1522.2459, | |
| "eval_samples_per_second": 4.228, | |
| "eval_steps_per_second": 4.228, | |
| "step": 590000 | |
| }, | |
| { | |
| "epoch": 2.91, | |
| "learning_rate": 1.457267610909055e-06, | |
| "loss": 1.8694, | |
| "step": 591000 | |
| }, | |
| { | |
| "epoch": 2.92, | |
| "learning_rate": 1.3751310078818285e-06, | |
| "loss": 1.8429, | |
| "step": 592000 | |
| }, | |
| { | |
| "epoch": 2.92, | |
| "learning_rate": 1.292994404854602e-06, | |
| "loss": 1.8635, | |
| "step": 593000 | |
| }, | |
| { | |
| "epoch": 2.93, | |
| "learning_rate": 1.210857801827375e-06, | |
| "loss": 1.8677, | |
| "step": 594000 | |
| }, | |
| { | |
| "epoch": 2.93, | |
| "learning_rate": 1.1287211988001485e-06, | |
| "loss": 1.8495, | |
| "step": 595000 | |
| }, | |
| { | |
| "epoch": 2.94, | |
| "learning_rate": 1.046584595772922e-06, | |
| "loss": 1.8476, | |
| "step": 596000 | |
| }, | |
| { | |
| "epoch": 2.94, | |
| "learning_rate": 9.644479927456953e-07, | |
| "loss": 1.8705, | |
| "step": 597000 | |
| }, | |
| { | |
| "epoch": 2.95, | |
| "learning_rate": 8.823113897184687e-07, | |
| "loss": 1.8624, | |
| "step": 598000 | |
| }, | |
| { | |
| "epoch": 2.95, | |
| "learning_rate": 8.001747866912419e-07, | |
| "loss": 1.8715, | |
| "step": 599000 | |
| }, | |
| { | |
| "epoch": 2.96, | |
| "learning_rate": 7.180381836640152e-07, | |
| "loss": 1.8448, | |
| "step": 600000 | |
| }, | |
| { | |
| "epoch": 2.96, | |
| "eval_gen_len": 19.0, | |
| "eval_loss": 1.8655472993850708, | |
| "eval_rouge1": 19.2884, | |
| "eval_rouge2": 7.8087, | |
| "eval_rougeL": 15.4025, | |
| "eval_rougeLsum": 17.5856, | |
| "eval_runtime": 1525.333, | |
| "eval_samples_per_second": 4.219, | |
| "eval_steps_per_second": 4.219, | |
| "step": 600000 | |
| }, | |
| { | |
| "epoch": 2.96, | |
| "learning_rate": 6.359015806367887e-07, | |
| "loss": 1.8687, | |
| "step": 601000 | |
| }, | |
| { | |
| "epoch": 2.97, | |
| "learning_rate": 5.537649776095621e-07, | |
| "loss": 1.8574, | |
| "step": 602000 | |
| }, | |
| { | |
| "epoch": 2.97, | |
| "learning_rate": 4.716283745823354e-07, | |
| "loss": 1.841, | |
| "step": 603000 | |
| }, | |
| { | |
| "epoch": 2.98, | |
| "learning_rate": 3.8949177155510875e-07, | |
| "loss": 1.8731, | |
| "step": 604000 | |
| }, | |
| { | |
| "epoch": 2.98, | |
| "learning_rate": 3.073551685278821e-07, | |
| "loss": 1.8377, | |
| "step": 605000 | |
| }, | |
| { | |
| "epoch": 2.99, | |
| "learning_rate": 2.2521856550065546e-07, | |
| "loss": 1.8636, | |
| "step": 606000 | |
| }, | |
| { | |
| "epoch": 2.99, | |
| "learning_rate": 1.4308196247342882e-07, | |
| "loss": 1.8636, | |
| "step": 607000 | |
| }, | |
| { | |
| "epoch": 3.0, | |
| "learning_rate": 6.094535944620217e-08, | |
| "loss": 1.8456, | |
| "step": 608000 | |
| }, | |
| { | |
| "epoch": 3.0, | |
| "step": 608742, | |
| "total_flos": 7.356226256463053e+17, | |
| "train_loss": 1.974796563636501, | |
| "train_runtime": 174966.5466, | |
| "train_samples_per_second": 3.479, | |
| "train_steps_per_second": 3.479 | |
| } | |
| ], | |
| "max_steps": 608742, | |
| "num_train_epochs": 3, | |
| "total_flos": 7.356226256463053e+17, | |
| "trial_name": null, | |
| "trial_params": null | |
| } | |