{ "best_metric": 0.3075002644877919, "best_model_checkpoint": "ru_t5_logs/checkpoint-7200", "epoch": 1.21786719081204, "global_step": 7900, "is_hyper_param_search": false, "is_local_process_zero": true, "is_world_process_zero": true, "log_history": [ { "epoch": 0.0, "learning_rate": 5.555555555555555e-05, "loss": 8.8426, "step": 10 }, { "epoch": 0.0, "learning_rate": 0.0001111111111111111, "loss": 2.5171, "step": 20 }, { "epoch": 0.0, "learning_rate": 0.00016666666666666666, "loss": 1.4573, "step": 30 }, { "epoch": 0.01, "learning_rate": 0.0002222222222222222, "loss": 1.1602, "step": 40 }, { "epoch": 0.01, "learning_rate": 0.0002777777777777778, "loss": 0.9913, "step": 50 }, { "epoch": 0.01, "learning_rate": 0.0003333333333333333, "loss": 1.0849, "step": 60 }, { "epoch": 0.01, "learning_rate": 0.0003888888888888889, "loss": 1.1257, "step": 70 }, { "epoch": 0.01, "learning_rate": 0.0004444444444444444, "loss": 1.1418, "step": 80 }, { "epoch": 0.01, "learning_rate": 0.0005, "loss": 1.0913, "step": 90 }, { "epoch": 0.02, "learning_rate": 0.0004999996711179958, "loss": 1.147, "step": 100 }, { "epoch": 0.02, "eval_bleu": 0.057150905355919934, "eval_loss": 0.9198915958404541, "eval_meteor": 0.11030404785434701, "eval_rouge1": 0.279545903055327, "eval_rouge2": 0.1375545207790398, "eval_rougeL": 0.22993057523222138, "eval_rougeLsum": 0.23002986644843249, "eval_runtime": 1292.7346, "eval_samples_per_second": 1.128, "eval_steps_per_second": 0.188, "step": 100 }, { "epoch": 0.02, "learning_rate": 0.0004999986844728487, "loss": 1.067, "step": 110 }, { "epoch": 0.02, "learning_rate": 0.0004999970400671544, "loss": 1.1341, "step": 120 }, { "epoch": 0.02, "learning_rate": 0.0004999947379052395, "loss": 0.9852, "step": 130 }, { "epoch": 0.02, "learning_rate": 0.0004999917779931613, "loss": 1.0095, "step": 140 }, { "epoch": 0.02, "learning_rate": 0.0004999881603387073, "loss": 1.0118, "step": 150 }, { "epoch": 0.02, "learning_rate": 0.0004999838849513957, "loss": 1.0513, "step": 160 }, { "epoch": 0.03, "learning_rate": 0.0004999789518424755, "loss": 1.009, "step": 170 }, { "epoch": 0.03, "learning_rate": 0.0004999733610249258, "loss": 0.9138, "step": 180 }, { "epoch": 0.03, "learning_rate": 0.0004999671125134564, "loss": 0.9989, "step": 190 }, { "epoch": 0.03, "learning_rate": 0.0004999602063245074, "loss": 0.9443, "step": 200 }, { "epoch": 0.03, "eval_bleu": 0.08174242542801959, "eval_loss": 0.8470357656478882, "eval_meteor": 0.13334461127658528, "eval_rouge1": 0.3118630390437005, "eval_rouge2": 0.17036843477123215, "eval_rougeL": 0.26155329349275835, "eval_rougeLsum": 0.26141341054848355, "eval_runtime": 1098.495, "eval_samples_per_second": 1.327, "eval_steps_per_second": 0.221, "step": 200 }, { "epoch": 0.03, "learning_rate": 0.0004999526424762496, "loss": 0.9839, "step": 210 }, { "epoch": 0.03, "learning_rate": 0.0004999444209885838, "loss": 0.9479, "step": 220 }, { "epoch": 0.04, "learning_rate": 0.0004999355418831412, "loss": 0.917, "step": 230 }, { "epoch": 0.04, "learning_rate": 0.0004999260051832831, "loss": 0.87, "step": 240 }, { "epoch": 0.04, "learning_rate": 0.0004999158109141012, "loss": 0.9722, "step": 250 }, { "epoch": 0.04, "learning_rate": 0.0004999049591024172, "loss": 0.9619, "step": 260 }, { "epoch": 0.04, "learning_rate": 0.0004998934497767828, "loss": 1.0447, "step": 270 }, { "epoch": 0.04, "learning_rate": 0.0004998812829674797, "loss": 0.8547, "step": 280 }, { "epoch": 0.04, "learning_rate": 0.0004998684587065195, "loss": 0.8972, "step": 290 }, { "epoch": 0.05, "learning_rate": 0.0004998549770276435, "loss": 0.8871, "step": 300 }, { "epoch": 0.05, "eval_bleu": 0.046252384714512554, "eval_loss": 0.8526527881622314, "eval_meteor": 0.09538529454387626, "eval_rouge1": 0.27543511716574076, "eval_rouge2": 0.1605961208091074, "eval_rougeL": 0.24354530581569278, "eval_rougeLsum": 0.24344913233941234, "eval_runtime": 882.6979, "eval_samples_per_second": 1.652, "eval_steps_per_second": 0.275, "step": 300 }, { "epoch": 0.05, "learning_rate": 0.0004998408379663226, "loss": 0.8772, "step": 310 }, { "epoch": 0.05, "learning_rate": 0.0004998260415597578, "loss": 1.0016, "step": 320 }, { "epoch": 0.05, "learning_rate": 0.000499810587846879, "loss": 0.9394, "step": 330 }, { "epoch": 0.05, "learning_rate": 0.0004997944768683458, "loss": 0.8853, "step": 340 }, { "epoch": 0.05, "learning_rate": 0.0004997777086665473, "loss": 0.9091, "step": 350 }, { "epoch": 0.06, "learning_rate": 0.0004997602832856013, "loss": 0.8932, "step": 360 }, { "epoch": 0.06, "learning_rate": 0.0004997422007713551, "loss": 0.8659, "step": 370 }, { "epoch": 0.06, "learning_rate": 0.0004997234611713849, "loss": 0.9325, "step": 380 }, { "epoch": 0.06, "learning_rate": 0.0004997040645349955, "loss": 0.9674, "step": 390 }, { "epoch": 0.06, "learning_rate": 0.0004996840109132205, "loss": 0.9608, "step": 400 }, { "epoch": 0.06, "eval_bleu": 0.08586731335447921, "eval_loss": 0.8303579092025757, "eval_meteor": 0.1430676871634322, "eval_rouge1": 0.31709190550852295, "eval_rouge2": 0.17041273297322634, "eval_rougeL": 0.267869089287303, "eval_rougeLsum": 0.2677306207885424, "eval_runtime": 1072.7177, "eval_samples_per_second": 1.359, "eval_steps_per_second": 0.227, "step": 400 }, { "epoch": 0.06, "learning_rate": 0.0004996633003588222, "loss": 0.9616, "step": 410 }, { "epoch": 0.06, "learning_rate": 0.0004996419329262913, "loss": 0.9675, "step": 420 }, { "epoch": 0.07, "learning_rate": 0.0004996199086718466, "loss": 0.9418, "step": 430 }, { "epoch": 0.07, "learning_rate": 0.0004995972276534351, "loss": 0.9631, "step": 440 }, { "epoch": 0.07, "learning_rate": 0.0004995738899307319, "loss": 0.9218, "step": 450 }, { "epoch": 0.07, "learning_rate": 0.0004995498955651399, "loss": 0.984, "step": 460 }, { "epoch": 0.07, "learning_rate": 0.0004995252446197894, "loss": 0.9286, "step": 470 }, { "epoch": 0.07, "learning_rate": 0.0004994999371595388, "loss": 0.8722, "step": 480 }, { "epoch": 0.08, "learning_rate": 0.0004994739732509731, "loss": 0.8834, "step": 490 }, { "epoch": 0.08, "learning_rate": 0.000499447352962405, "loss": 0.953, "step": 500 }, { "epoch": 0.08, "eval_bleu": 0.09861710848144628, "eval_loss": 0.8263402581214905, "eval_meteor": 0.15312912847569377, "eval_rouge1": 0.3263458627968157, "eval_rouge2": 0.17504244009075126, "eval_rougeL": 0.2714073081979089, "eval_rougeLsum": 0.27118688987783984, "eval_runtime": 1146.0503, "eval_samples_per_second": 1.272, "eval_steps_per_second": 0.212, "step": 500 }, { "epoch": 0.08, "learning_rate": 0.0004994200763638739, "loss": 0.8363, "step": 510 }, { "epoch": 0.08, "learning_rate": 0.000499392143527146, "loss": 0.9203, "step": 520 }, { "epoch": 0.08, "learning_rate": 0.0004993635545257144, "loss": 0.9375, "step": 530 }, { "epoch": 0.08, "learning_rate": 0.0004993343094347979, "loss": 0.9548, "step": 540 }, { "epoch": 0.08, "learning_rate": 0.0004993044083313425, "loss": 0.9221, "step": 550 }, { "epoch": 0.09, "learning_rate": 0.0004992738512940194, "loss": 0.8666, "step": 560 }, { "epoch": 0.09, "learning_rate": 0.0004992426384032258, "loss": 0.8917, "step": 570 }, { "epoch": 0.09, "learning_rate": 0.0004992107697410848, "loss": 0.9237, "step": 580 }, { "epoch": 0.09, "learning_rate": 0.0004991782453914444, "loss": 0.8657, "step": 590 }, { "epoch": 0.09, "learning_rate": 0.0004991450654398782, "loss": 0.9489, "step": 600 }, { "epoch": 0.09, "eval_bleu": 0.10113643092041803, "eval_loss": 0.8359497785568237, "eval_meteor": 0.1654018684728887, "eval_rouge1": 0.33698613363555824, "eval_rouge2": 0.17694734353424882, "eval_rougeL": 0.2749185603408262, "eval_rougeLsum": 0.2750113209557301, "eval_runtime": 1213.3811, "eval_samples_per_second": 1.202, "eval_steps_per_second": 0.2, "step": 600 }, { "epoch": 0.09, "learning_rate": 0.0004991112299736844, "loss": 0.9289, "step": 610 }, { "epoch": 0.1, "learning_rate": 0.000499076739081886, "loss": 0.9123, "step": 620 }, { "epoch": 0.1, "learning_rate": 0.0004990415928552305, "loss": 0.9329, "step": 630 }, { "epoch": 0.1, "learning_rate": 0.0004990057913861896, "loss": 0.8874, "step": 640 }, { "epoch": 0.1, "learning_rate": 0.0004989693347689589, "loss": 0.9698, "step": 650 }, { "epoch": 0.1, "learning_rate": 0.000498932223099458, "loss": 0.831, "step": 660 }, { "epoch": 0.1, "learning_rate": 0.0004988944564753295, "loss": 0.8701, "step": 670 }, { "epoch": 0.1, "learning_rate": 0.0004988560349959396, "loss": 0.8953, "step": 680 }, { "epoch": 0.11, "learning_rate": 0.0004988169587623776, "loss": 0.9621, "step": 690 }, { "epoch": 0.11, "learning_rate": 0.0004987772278774548, "loss": 0.8948, "step": 700 }, { "epoch": 0.11, "eval_bleu": 0.04255087666728896, "eval_loss": 0.8361812233924866, "eval_meteor": 0.099997885358938, "eval_rouge1": 0.28420928207646434, "eval_rouge2": 0.1653214913198871, "eval_rougeL": 0.25267504887077563, "eval_rougeLsum": 0.25262418214939675, "eval_runtime": 754.6649, "eval_samples_per_second": 1.932, "eval_steps_per_second": 0.322, "step": 700 }, { "epoch": 0.11, "learning_rate": 0.0004987368424457058, "loss": 0.9126, "step": 710 }, { "epoch": 0.11, "learning_rate": 0.0004986958025733868, "loss": 0.9425, "step": 720 }, { "epoch": 0.11, "learning_rate": 0.000498654108368476, "loss": 0.8944, "step": 730 }, { "epoch": 0.11, "learning_rate": 0.0004986117599406733, "loss": 0.8977, "step": 740 }, { "epoch": 0.12, "learning_rate": 0.0004985687574013994, "loss": 0.8851, "step": 750 }, { "epoch": 0.12, "learning_rate": 0.0004985251008637968, "loss": 0.8542, "step": 760 }, { "epoch": 0.12, "learning_rate": 0.0004984807904427281, "loss": 0.8809, "step": 770 }, { "epoch": 0.12, "learning_rate": 0.0004984358262547766, "loss": 0.8334, "step": 780 }, { "epoch": 0.12, "learning_rate": 0.0004983902084182455, "loss": 0.8488, "step": 790 }, { "epoch": 0.12, "learning_rate": 0.000498343937053158, "loss": 0.8409, "step": 800 }, { "epoch": 0.12, "eval_bleu": 0.09329201888155486, "eval_loss": 0.8163634538650513, "eval_meteor": 0.15770696776351786, "eval_rouge1": 0.3315883123187934, "eval_rouge2": 0.1767418040045381, "eval_rougeL": 0.27737567013849995, "eval_rougeLsum": 0.27738229938080694, "eval_runtime": 1063.2636, "eval_samples_per_second": 1.371, "eval_steps_per_second": 0.229, "step": 800 }, { "epoch": 0.12, "learning_rate": 0.0004982970122812566, "loss": 0.7996, "step": 810 }, { "epoch": 0.13, "learning_rate": 0.0004982494342260029, "loss": 0.8564, "step": 820 }, { "epoch": 0.13, "learning_rate": 0.0004982012030125775, "loss": 0.9214, "step": 830 }, { "epoch": 0.13, "learning_rate": 0.0004981523187678796, "loss": 0.8301, "step": 840 }, { "epoch": 0.13, "learning_rate": 0.0004981027816205262, "loss": 0.8948, "step": 850 }, { "epoch": 0.13, "learning_rate": 0.0004980525917008523, "loss": 0.8467, "step": 860 }, { "epoch": 0.13, "learning_rate": 0.0004980017491409103, "loss": 0.9733, "step": 870 }, { "epoch": 0.14, "learning_rate": 0.0004979502540744702, "loss": 0.9432, "step": 880 }, { "epoch": 0.14, "learning_rate": 0.000497898106637018, "loss": 0.9256, "step": 890 }, { "epoch": 0.14, "learning_rate": 0.0004978453069657568, "loss": 0.8505, "step": 900 }, { "epoch": 0.14, "eval_bleu": 0.08288033858994784, "eval_loss": 0.8515655398368835, "eval_meteor": 0.13825343548644084, "eval_rouge1": 0.31117223601763094, "eval_rouge2": 0.17049205940823398, "eval_rougeL": 0.26282109139798354, "eval_rougeLsum": 0.26267587400999515, "eval_runtime": 1043.0682, "eval_samples_per_second": 1.398, "eval_steps_per_second": 0.233, "step": 900 }, { "epoch": 0.14, "learning_rate": 0.0004977918551996054, "loss": 0.8471, "step": 910 }, { "epoch": 0.14, "learning_rate": 0.0004977377514791983, "loss": 0.9285, "step": 920 }, { "epoch": 0.14, "learning_rate": 0.0004976829959468855, "loss": 0.8993, "step": 930 }, { "epoch": 0.14, "learning_rate": 0.0004976275887467319, "loss": 0.9278, "step": 940 }, { "epoch": 0.15, "learning_rate": 0.0004975715300245169, "loss": 0.8716, "step": 950 }, { "epoch": 0.15, "learning_rate": 0.0004975148199277342, "loss": 0.8836, "step": 960 }, { "epoch": 0.15, "learning_rate": 0.0004974574586055912, "loss": 0.9705, "step": 970 }, { "epoch": 0.15, "learning_rate": 0.0004973994462090088, "loss": 0.8766, "step": 980 }, { "epoch": 0.15, "learning_rate": 0.0004973407828906207, "loss": 0.8644, "step": 990 }, { "epoch": 0.15, "learning_rate": 0.0004972814688047736, "loss": 0.8516, "step": 1000 }, { "epoch": 0.15, "eval_bleu": 0.11086631103897832, "eval_loss": 0.8234091401100159, "eval_meteor": 0.1702736355204243, "eval_rouge1": 0.33339211687613324, "eval_rouge2": 0.17689852127575206, "eval_rougeL": 0.2749015362723488, "eval_rougeLsum": 0.2749567892668668, "eval_runtime": 1362.5888, "eval_samples_per_second": 1.07, "eval_steps_per_second": 0.178, "step": 1000 }, { "epoch": 0.16, "learning_rate": 0.0004972215041075261, "loss": 0.9137, "step": 1010 }, { "epoch": 0.16, "learning_rate": 0.0004971608889566486, "loss": 0.9558, "step": 1020 }, { "epoch": 0.16, "learning_rate": 0.0004970996235116231, "loss": 0.9403, "step": 1030 }, { "epoch": 0.16, "learning_rate": 0.0004970377079336422, "loss": 0.9096, "step": 1040 }, { "epoch": 0.16, "learning_rate": 0.0004969751423856095, "loss": 0.9792, "step": 1050 }, { "epoch": 0.16, "learning_rate": 0.0004969119270321383, "loss": 0.8368, "step": 1060 }, { "epoch": 0.16, "learning_rate": 0.0004968480620395519, "loss": 0.9117, "step": 1070 }, { "epoch": 0.17, "learning_rate": 0.0004967835475758825, "loss": 0.91, "step": 1080 }, { "epoch": 0.17, "learning_rate": 0.0004967183838108713, "loss": 0.897, "step": 1090 }, { "epoch": 0.17, "learning_rate": 0.0004966525709159679, "loss": 0.876, "step": 1100 }, { "epoch": 0.17, "eval_bleu": 0.08012098475489943, "eval_loss": 0.810808539390564, "eval_meteor": 0.13737464649721876, "eval_rouge1": 0.31919458917681476, "eval_rouge2": 0.17969717787411127, "eval_rougeL": 0.2725775781741208, "eval_rougeLsum": 0.27236890457104335, "eval_runtime": 942.8581, "eval_samples_per_second": 1.546, "eval_steps_per_second": 0.258, "step": 1100 }, { "epoch": 0.17, "learning_rate": 0.0004965861090643296, "loss": 0.9041, "step": 1110 }, { "epoch": 0.17, "learning_rate": 0.0004965189984308215, "loss": 0.9139, "step": 1120 }, { "epoch": 0.17, "learning_rate": 0.0004964512391920151, "loss": 0.8988, "step": 1130 }, { "epoch": 0.18, "learning_rate": 0.000496382831526189, "loss": 0.8624, "step": 1140 }, { "epoch": 0.18, "learning_rate": 0.0004963137756133274, "loss": 0.8518, "step": 1150 }, { "epoch": 0.18, "learning_rate": 0.0004962440716351205, "loss": 0.9235, "step": 1160 }, { "epoch": 0.18, "learning_rate": 0.0004961737197749633, "loss": 0.8071, "step": 1170 }, { "epoch": 0.18, "learning_rate": 0.0004961027202179554, "loss": 0.894, "step": 1180 }, { "epoch": 0.18, "learning_rate": 0.0004960310731509007, "loss": 0.9255, "step": 1190 }, { "epoch": 0.18, "learning_rate": 0.0004959587787623065, "loss": 0.8109, "step": 1200 }, { "epoch": 0.18, "eval_bleu": 0.09517651862521773, "eval_loss": 0.8174175024032593, "eval_meteor": 0.16180826590180308, "eval_rouge1": 0.33582745913845957, "eval_rouge2": 0.1811654909375417, "eval_rougeL": 0.281565966212259, "eval_rougeLsum": 0.2813838288792657, "eval_runtime": 1002.5623, "eval_samples_per_second": 1.454, "eval_steps_per_second": 0.242, "step": 1200 }, { "epoch": 0.19, "learning_rate": 0.0004958858372423837, "loss": 0.8624, "step": 1210 }, { "epoch": 0.19, "learning_rate": 0.000495812248783045, "loss": 0.8696, "step": 1220 }, { "epoch": 0.19, "learning_rate": 0.0004957380135779064, "loss": 0.859, "step": 1230 }, { "epoch": 0.19, "learning_rate": 0.0004956631318222846, "loss": 0.8144, "step": 1240 }, { "epoch": 0.19, "learning_rate": 0.0004955876037131974, "loss": 0.87, "step": 1250 }, { "epoch": 0.19, "learning_rate": 0.0004955114294493639, "loss": 0.8819, "step": 1260 }, { "epoch": 0.2, "learning_rate": 0.0004954346092312026, "loss": 0.8935, "step": 1270 }, { "epoch": 0.2, "learning_rate": 0.0004953571432608321, "loss": 0.8716, "step": 1280 }, { "epoch": 0.2, "learning_rate": 0.0004952790317420694, "loss": 0.8911, "step": 1290 }, { "epoch": 0.2, "learning_rate": 0.0004952002748804304, "loss": 0.8183, "step": 1300 }, { "epoch": 0.2, "eval_bleu": 0.10596849766835054, "eval_loss": 0.8043612837791443, "eval_meteor": 0.1691677125903321, "eval_rouge1": 0.3389925011791456, "eval_rouge2": 0.18348093472072707, "eval_rougeL": 0.28213366266002454, "eval_rougeLsum": 0.28213090498762367, "eval_runtime": 1141.1899, "eval_samples_per_second": 1.278, "eval_steps_per_second": 0.213, "step": 1300 }, { "epoch": 0.2, "learning_rate": 0.0004951208728831289, "loss": 0.8433, "step": 1310 }, { "epoch": 0.2, "learning_rate": 0.0004950408259590757, "loss": 0.8268, "step": 1320 }, { "epoch": 0.21, "learning_rate": 0.0004949601343188792, "loss": 0.911, "step": 1330 }, { "epoch": 0.21, "learning_rate": 0.0004948787981748433, "loss": 0.8638, "step": 1340 }, { "epoch": 0.21, "learning_rate": 0.0004947968177409681, "loss": 0.8691, "step": 1350 }, { "epoch": 0.21, "learning_rate": 0.0004947141932329486, "loss": 0.8499, "step": 1360 }, { "epoch": 0.21, "learning_rate": 0.0004946309248681745, "loss": 0.8832, "step": 1370 }, { "epoch": 0.21, "learning_rate": 0.0004945470128657297, "loss": 0.8744, "step": 1380 }, { "epoch": 0.21, "learning_rate": 0.0004944624574463913, "loss": 0.8219, "step": 1390 }, { "epoch": 0.22, "learning_rate": 0.0004943772588326292, "loss": 0.8407, "step": 1400 }, { "epoch": 0.22, "eval_bleu": 0.11433808067218891, "eval_loss": 0.8030869960784912, "eval_meteor": 0.17630048064575726, "eval_rouge1": 0.34423855201642795, "eval_rouge2": 0.18361911351356364, "eval_rougeL": 0.2837522876067602, "eval_rougeLsum": 0.28364725151298015, "eval_runtime": 1232.5857, "eval_samples_per_second": 1.183, "eval_steps_per_second": 0.197, "step": 1400 }, { "epoch": 0.22, "learning_rate": 0.0004942914172486059, "loss": 0.8932, "step": 1410 }, { "epoch": 0.22, "learning_rate": 0.0004942049329201754, "loss": 0.8544, "step": 1420 }, { "epoch": 0.22, "learning_rate": 0.0004941178060748829, "loss": 0.7995, "step": 1430 }, { "epoch": 0.22, "learning_rate": 0.0004940300369419637, "loss": 0.8576, "step": 1440 }, { "epoch": 0.22, "learning_rate": 0.0004939416257523436, "loss": 0.8654, "step": 1450 }, { "epoch": 0.23, "learning_rate": 0.0004938525727386373, "loss": 0.8559, "step": 1460 }, { "epoch": 0.23, "learning_rate": 0.0004937628781351483, "loss": 0.8738, "step": 1470 }, { "epoch": 0.23, "learning_rate": 0.000493672542177868, "loss": 0.9433, "step": 1480 }, { "epoch": 0.23, "learning_rate": 0.0004935815651044755, "loss": 0.8125, "step": 1490 }, { "epoch": 0.23, "learning_rate": 0.0004934899471543366, "loss": 0.886, "step": 1500 }, { "epoch": 0.23, "eval_bleu": 0.12742687031814315, "eval_loss": 0.805793821811676, "eval_meteor": 0.19296014590447547, "eval_rouge1": 0.35221171953589286, "eval_rouge2": 0.1846768871884069, "eval_rougeL": 0.2848280618153002, "eval_rougeLsum": 0.28493716966830007, "eval_runtime": 1452.5375, "eval_samples_per_second": 1.004, "eval_steps_per_second": 0.167, "step": 1500 }, { "epoch": 0.23, "learning_rate": 0.0004933976885685031, "loss": 0.8315, "step": 1510 }, { "epoch": 0.23, "learning_rate": 0.0004933047895897127, "loss": 0.8242, "step": 1520 }, { "epoch": 0.24, "learning_rate": 0.0004932112504623876, "loss": 0.8638, "step": 1530 }, { "epoch": 0.24, "learning_rate": 0.0004931170714326347, "loss": 0.831, "step": 1540 }, { "epoch": 0.24, "learning_rate": 0.0004930222527482442, "loss": 0.8754, "step": 1550 }, { "epoch": 0.24, "learning_rate": 0.0004929267946586894, "loss": 0.817, "step": 1560 }, { "epoch": 0.24, "learning_rate": 0.0004928306974151258, "loss": 0.906, "step": 1570 }, { "epoch": 0.24, "learning_rate": 0.0004927339612703908, "loss": 0.8899, "step": 1580 }, { "epoch": 0.25, "learning_rate": 0.0004926365864790025, "loss": 0.8845, "step": 1590 }, { "epoch": 0.25, "learning_rate": 0.0004925385732971595, "loss": 0.8761, "step": 1600 }, { "epoch": 0.25, "eval_bleu": 0.10770304213160696, "eval_loss": 0.8112803101539612, "eval_meteor": 0.17046581268631894, "eval_rouge1": 0.3402358184654584, "eval_rouge2": 0.1827631633093852, "eval_rougeL": 0.28122066588095507, "eval_rougeLsum": 0.28114478510696167, "eval_runtime": 1067.7239, "eval_samples_per_second": 1.366, "eval_steps_per_second": 0.228, "step": 1600 }, { "epoch": 0.25, "learning_rate": 0.0004924399219827398, "loss": 0.9299, "step": 1610 }, { "epoch": 0.25, "learning_rate": 0.0004923406327953007, "loss": 0.8943, "step": 1620 }, { "epoch": 0.25, "learning_rate": 0.0004922407059960776, "loss": 0.9165, "step": 1630 }, { "epoch": 0.25, "learning_rate": 0.0004921401418479834, "loss": 0.8499, "step": 1640 }, { "epoch": 0.25, "learning_rate": 0.0004920389406156082, "loss": 0.8766, "step": 1650 }, { "epoch": 0.26, "learning_rate": 0.000491937102565218, "loss": 0.8453, "step": 1660 }, { "epoch": 0.26, "learning_rate": 0.0004918346279647544, "loss": 0.8943, "step": 1670 }, { "epoch": 0.26, "learning_rate": 0.0004917315170838339, "loss": 0.8835, "step": 1680 }, { "epoch": 0.26, "learning_rate": 0.0004916277701937468, "loss": 0.8386, "step": 1690 }, { "epoch": 0.26, "learning_rate": 0.0004915233875674572, "loss": 0.872, "step": 1700 }, { "epoch": 0.26, "eval_bleu": 0.11788189676776079, "eval_loss": 0.802962601184845, "eval_meteor": 0.18209545205592362, "eval_rouge1": 0.3438678638236826, "eval_rouge2": 0.18232469842121968, "eval_rougeL": 0.28305359100970484, "eval_rougeLsum": 0.2829156060905057, "eval_runtime": 1203.8235, "eval_samples_per_second": 1.211, "eval_steps_per_second": 0.202, "step": 1700 }, { "epoch": 0.26, "learning_rate": 0.0004914183694796016, "loss": 0.8319, "step": 1710 }, { "epoch": 0.27, "learning_rate": 0.0004913127162064885, "loss": 0.8564, "step": 1720 }, { "epoch": 0.27, "learning_rate": 0.0004912064280260974, "loss": 0.8453, "step": 1730 }, { "epoch": 0.27, "learning_rate": 0.0004910995052180786, "loss": 0.8255, "step": 1740 }, { "epoch": 0.27, "learning_rate": 0.0004909919480637519, "loss": 0.8831, "step": 1750 }, { "epoch": 0.27, "learning_rate": 0.0004908837568461064, "loss": 0.8273, "step": 1760 }, { "epoch": 0.27, "learning_rate": 0.0004907749318497991, "loss": 0.8494, "step": 1770 }, { "epoch": 0.27, "learning_rate": 0.0004906654733611547, "loss": 0.8869, "step": 1780 }, { "epoch": 0.28, "learning_rate": 0.0004905553816681646, "loss": 0.778, "step": 1790 }, { "epoch": 0.28, "learning_rate": 0.0004904446570604862, "loss": 0.9082, "step": 1800 }, { "epoch": 0.28, "eval_bleu": 0.11523676534243875, "eval_loss": 0.8040370345115662, "eval_meteor": 0.17806856469850704, "eval_rouge1": 0.34321948648974265, "eval_rouge2": 0.18492135732773402, "eval_rougeL": 0.2827187054731721, "eval_rougeLsum": 0.2825956342144159, "eval_runtime": 1223.1433, "eval_samples_per_second": 1.192, "eval_steps_per_second": 0.199, "step": 1800 }, { "epoch": 0.28, "learning_rate": 0.0004903332998294422, "loss": 0.8266, "step": 1810 }, { "epoch": 0.28, "learning_rate": 0.0004902213102680197, "loss": 0.85, "step": 1820 }, { "epoch": 0.28, "learning_rate": 0.0004901086886708695, "loss": 0.8447, "step": 1830 }, { "epoch": 0.28, "learning_rate": 0.0004899954353343053, "loss": 0.8803, "step": 1840 }, { "epoch": 0.29, "learning_rate": 0.000489881550556303, "loss": 0.7804, "step": 1850 }, { "epoch": 0.29, "learning_rate": 0.0004897670346364998, "loss": 0.9184, "step": 1860 }, { "epoch": 0.29, "learning_rate": 0.0004896518878761937, "loss": 0.8075, "step": 1870 }, { "epoch": 0.29, "learning_rate": 0.000489536110578342, "loss": 0.8086, "step": 1880 }, { "epoch": 0.29, "learning_rate": 0.0004894197030475614, "loss": 0.8357, "step": 1890 }, { "epoch": 0.29, "learning_rate": 0.0004893026655901266, "loss": 0.818, "step": 1900 }, { "epoch": 0.29, "eval_bleu": 0.1152261234460638, "eval_loss": 0.7954283356666565, "eval_meteor": 0.18005815233416442, "eval_rouge1": 0.3480841484526223, "eval_rouge2": 0.1878809645185771, "eval_rougeL": 0.2866495022876356, "eval_rougeLsum": 0.2865319164852567, "eval_runtime": 1150.8093, "eval_samples_per_second": 1.267, "eval_steps_per_second": 0.211, "step": 1900 }, { "epoch": 0.29, "learning_rate": 0.0004891849985139697, "loss": 0.919, "step": 1910 }, { "epoch": 0.3, "learning_rate": 0.0004890667021286794, "loss": 0.8702, "step": 1920 }, { "epoch": 0.3, "learning_rate": 0.0004889477767455002, "loss": 0.8223, "step": 1930 }, { "epoch": 0.3, "learning_rate": 0.0004888282226773313, "loss": 0.8881, "step": 1940 }, { "epoch": 0.3, "learning_rate": 0.0004887080402387262, "loss": 0.8326, "step": 1950 }, { "epoch": 0.3, "learning_rate": 0.0004885872297458915, "loss": 0.7896, "step": 1960 }, { "epoch": 0.3, "learning_rate": 0.0004884657915166867, "loss": 0.8202, "step": 1970 }, { "epoch": 0.31, "learning_rate": 0.0004883437258706224, "loss": 0.8457, "step": 1980 }, { "epoch": 0.31, "learning_rate": 0.0004882210331288601, "loss": 0.8648, "step": 1990 }, { "epoch": 0.31, "learning_rate": 0.0004880977136142113, "loss": 0.7916, "step": 2000 }, { "epoch": 0.31, "eval_bleu": 0.102031392794918, "eval_loss": 0.7973849773406982, "eval_meteor": 0.1697416518292726, "eval_rouge1": 0.3407404763360161, "eval_rouge2": 0.18420618651745257, "eval_rougeL": 0.2837111287248353, "eval_rougeLsum": 0.28358533147000164, "eval_runtime": 1060.7578, "eval_samples_per_second": 1.374, "eval_steps_per_second": 0.229, "step": 2000 }, { "epoch": 0.31, "learning_rate": 0.00048797376765113667, "loss": 0.8344, "step": 2010 }, { "epoch": 0.31, "learning_rate": 0.0004878491955657448, "loss": 0.8599, "step": 2020 }, { "epoch": 0.31, "learning_rate": 0.000487723997685792, "loss": 0.8316, "step": 2030 }, { "epoch": 0.31, "learning_rate": 0.00048759817434068084, "loss": 0.7967, "step": 2040 }, { "epoch": 0.32, "learning_rate": 0.00048747172586145954, "loss": 0.8272, "step": 2050 }, { "epoch": 0.32, "learning_rate": 0.0004873446525808212, "loss": 0.8879, "step": 2060 }, { "epoch": 0.32, "learning_rate": 0.0004872169548331027, "loss": 0.8938, "step": 2070 }, { "epoch": 0.32, "learning_rate": 0.0004870886329542841, "loss": 0.9446, "step": 2080 }, { "epoch": 0.32, "learning_rate": 0.00048695968728198726, "loss": 0.8329, "step": 2090 }, { "epoch": 0.32, "learning_rate": 0.00048683011815547553, "loss": 0.8701, "step": 2100 }, { "epoch": 0.32, "eval_bleu": 0.10951609270697767, "eval_loss": 0.7971030473709106, "eval_meteor": 0.1724449245874015, "eval_rouge1": 0.34523527799298537, "eval_rouge2": 0.18728792305872893, "eval_rougeL": 0.2875415995221111, "eval_rougeLsum": 0.2878305472355617, "eval_runtime": 1081.3597, "eval_samples_per_second": 1.348, "eval_steps_per_second": 0.225, "step": 2100 }, { "epoch": 0.33, "learning_rate": 0.0004866999259156526, "loss": 0.856, "step": 2110 }, { "epoch": 0.33, "learning_rate": 0.0004865691109050615, "loss": 0.8573, "step": 2120 }, { "epoch": 0.33, "learning_rate": 0.0004864376734678839, "loss": 0.813, "step": 2130 }, { "epoch": 0.33, "learning_rate": 0.000486305613949939, "loss": 0.9175, "step": 2140 }, { "epoch": 0.33, "learning_rate": 0.00048617293269868277, "loss": 0.8486, "step": 2150 }, { "epoch": 0.33, "learning_rate": 0.0004860396300632072, "loss": 0.8752, "step": 2160 }, { "epoch": 0.33, "learning_rate": 0.0004859057063942387, "loss": 0.8291, "step": 2170 }, { "epoch": 0.34, "learning_rate": 0.00048577116204413817, "loss": 0.8263, "step": 2180 }, { "epoch": 0.34, "learning_rate": 0.00048563599736689935, "loss": 0.8634, "step": 2190 }, { "epoch": 0.34, "learning_rate": 0.00048550021271814793, "loss": 0.813, "step": 2200 }, { "epoch": 0.34, "eval_bleu": 0.11357786165382955, "eval_loss": 0.7968371510505676, "eval_meteor": 0.17812534934300692, "eval_rouge1": 0.34547989314882344, "eval_rouge2": 0.18647144671781885, "eval_rougeL": 0.28578813253772484, "eval_rougeLsum": 0.28593828002638405, "eval_runtime": 1224.0946, "eval_samples_per_second": 1.191, "eval_steps_per_second": 0.199, "step": 2200 }, { "epoch": 0.34, "learning_rate": 0.000485363808455141, "loss": 0.8431, "step": 2210 }, { "epoch": 0.34, "learning_rate": 0.0004852267849367659, "loss": 0.9313, "step": 2220 }, { "epoch": 0.34, "learning_rate": 0.000485089142523539, "loss": 0.8619, "step": 2230 }, { "epoch": 0.35, "learning_rate": 0.00048495088157760535, "loss": 0.8258, "step": 2240 }, { "epoch": 0.35, "learning_rate": 0.00048481200246273715, "loss": 0.8584, "step": 2250 }, { "epoch": 0.35, "learning_rate": 0.0004846725055443333, "loss": 0.8279, "step": 2260 }, { "epoch": 0.35, "learning_rate": 0.0004845323911894178, "loss": 0.847, "step": 2270 }, { "epoch": 0.35, "learning_rate": 0.00048439165976663947, "loss": 0.837, "step": 2280 }, { "epoch": 0.35, "learning_rate": 0.00048425031164627056, "loss": 0.8753, "step": 2290 }, { "epoch": 0.35, "learning_rate": 0.0004841083472002059, "loss": 0.8525, "step": 2300 }, { "epoch": 0.35, "eval_bleu": 0.11550228990610252, "eval_loss": 0.7897738218307495, "eval_meteor": 0.17967220096769168, "eval_rouge1": 0.34880707864872973, "eval_rouge2": 0.18938272334418443, "eval_rougeL": 0.28769647914513974, "eval_rougeLsum": 0.2877285709755628, "eval_runtime": 1181.2131, "eval_samples_per_second": 1.234, "eval_steps_per_second": 0.206, "step": 2300 }, { "epoch": 0.36, "learning_rate": 0.0004839657668019619, "loss": 0.831, "step": 2310 }, { "epoch": 0.36, "learning_rate": 0.00048382257082667566, "loss": 0.8446, "step": 2320 }, { "epoch": 0.36, "learning_rate": 0.00048367875965110366, "loss": 0.8041, "step": 2330 }, { "epoch": 0.36, "learning_rate": 0.0004835343336536212, "loss": 0.8464, "step": 2340 }, { "epoch": 0.36, "learning_rate": 0.0004833892932142213, "loss": 0.8198, "step": 2350 }, { "epoch": 0.36, "learning_rate": 0.00048324363871451325, "loss": 0.9116, "step": 2360 }, { "epoch": 0.37, "learning_rate": 0.00048309737053772245, "loss": 0.8471, "step": 2370 }, { "epoch": 0.37, "learning_rate": 0.00048295048906868854, "loss": 0.8676, "step": 2380 }, { "epoch": 0.37, "learning_rate": 0.00048280299469386493, "loss": 0.8034, "step": 2390 }, { "epoch": 0.37, "learning_rate": 0.0004826548878013175, "loss": 0.8494, "step": 2400 }, { "epoch": 0.37, "eval_bleu": 0.11484040536071449, "eval_loss": 0.7928580641746521, "eval_meteor": 0.17927183932418322, "eval_rouge1": 0.353015756939868, "eval_rouge2": 0.1937422671781578, "eval_rougeL": 0.29412424979719143, "eval_rougeLsum": 0.29416263018885086, "eval_runtime": 1151.8853, "eval_samples_per_second": 1.266, "eval_steps_per_second": 0.211, "step": 2400 }, { "epoch": 0.37, "learning_rate": 0.00048250616878072383, "loss": 0.8746, "step": 2410 }, { "epoch": 0.37, "learning_rate": 0.0004823568380233721, "loss": 0.8028, "step": 2420 }, { "epoch": 0.37, "learning_rate": 0.0004822068959221598, "loss": 0.9032, "step": 2430 }, { "epoch": 0.38, "learning_rate": 0.000482056342871593, "loss": 0.8321, "step": 2440 }, { "epoch": 0.38, "learning_rate": 0.0004819051792677852, "loss": 0.854, "step": 2450 }, { "epoch": 0.38, "learning_rate": 0.00048175340550845637, "loss": 0.9002, "step": 2460 }, { "epoch": 0.38, "learning_rate": 0.00048160102199293174, "loss": 0.8416, "step": 2470 }, { "epoch": 0.38, "learning_rate": 0.00048144802912214094, "loss": 0.879, "step": 2480 }, { "epoch": 0.38, "learning_rate": 0.00048129442729861663, "loss": 0.8377, "step": 2490 }, { "epoch": 0.39, "learning_rate": 0.00048114021692649404, "loss": 0.8439, "step": 2500 }, { "epoch": 0.39, "eval_bleu": 0.1110213869118398, "eval_loss": 0.7995119094848633, "eval_meteor": 0.17509915967969378, "eval_rouge1": 0.34284195131985784, "eval_rouge2": 0.1868890431147761, "eval_rougeL": 0.2857996409683133, "eval_rougeLsum": 0.28592281635680744, "eval_runtime": 1147.2498, "eval_samples_per_second": 1.271, "eval_steps_per_second": 0.212, "step": 2500 }, { "epoch": 0.39, "learning_rate": 0.000480985398411509, "loss": 0.8648, "step": 2510 }, { "epoch": 0.39, "learning_rate": 0.00048082997216099797, "loss": 0.8771, "step": 2520 }, { "epoch": 0.39, "learning_rate": 0.0004806739385838961, "loss": 0.8275, "step": 2530 }, { "epoch": 0.39, "learning_rate": 0.0004805172980907363, "loss": 0.8215, "step": 2540 }, { "epoch": 0.39, "learning_rate": 0.00048036005109364856, "loss": 0.8678, "step": 2550 }, { "epoch": 0.39, "learning_rate": 0.0004802021980063586, "loss": 0.8408, "step": 2560 }, { "epoch": 0.4, "learning_rate": 0.00048004373924418674, "loss": 0.8536, "step": 2570 }, { "epoch": 0.4, "learning_rate": 0.0004798846752240468, "loss": 0.8302, "step": 2580 }, { "epoch": 0.4, "learning_rate": 0.0004797250063644452, "loss": 0.8429, "step": 2590 }, { "epoch": 0.4, "learning_rate": 0.0004795647330854795, "loss": 0.8562, "step": 2600 }, { "epoch": 0.4, "eval_bleu": 0.10131964675585854, "eval_loss": 0.7919116616249084, "eval_meteor": 0.1612163387444336, "eval_rouge1": 0.3392932133690917, "eval_rouge2": 0.18896904080765833, "eval_rougeL": 0.2874520136930931, "eval_rougeLsum": 0.287545512675921, "eval_runtime": 998.4776, "eval_samples_per_second": 1.46, "eval_steps_per_second": 0.243, "step": 2600 }, { "epoch": 0.4, "learning_rate": 0.00047940385580883785, "loss": 0.8855, "step": 2610 }, { "epoch": 0.4, "learning_rate": 0.00047924237495779734, "loss": 0.845, "step": 2620 }, { "epoch": 0.41, "learning_rate": 0.00047908029095722305, "loss": 0.8403, "step": 2630 }, { "epoch": 0.41, "learning_rate": 0.00047891760423356724, "loss": 0.8222, "step": 2640 }, { "epoch": 0.41, "learning_rate": 0.00047875431521486757, "loss": 0.8677, "step": 2650 }, { "epoch": 0.41, "learning_rate": 0.0004785904243307468, "loss": 0.8145, "step": 2660 }, { "epoch": 0.41, "learning_rate": 0.0004784259320124109, "loss": 0.8303, "step": 2670 }, { "epoch": 0.41, "learning_rate": 0.00047826083869264847, "loss": 0.8224, "step": 2680 }, { "epoch": 0.41, "learning_rate": 0.00047809514480582916, "loss": 0.816, "step": 2690 }, { "epoch": 0.42, "learning_rate": 0.00047792885078790304, "loss": 0.7636, "step": 2700 }, { "epoch": 0.42, "eval_bleu": 0.10954714255898276, "eval_loss": 0.7920675277709961, "eval_meteor": 0.17267822313892012, "eval_rouge1": 0.34506677355407445, "eval_rouge2": 0.18817280144902515, "eval_rougeL": 0.2890166522888482, "eval_rougeLsum": 0.2890120291145137, "eval_runtime": 1093.769, "eval_samples_per_second": 1.333, "eval_steps_per_second": 0.222, "step": 2700 }, { "epoch": 0.42, "learning_rate": 0.0004777619570763988, "loss": 0.8926, "step": 2710 }, { "epoch": 0.42, "learning_rate": 0.00047759446411042335, "loss": 0.934, "step": 2720 }, { "epoch": 0.42, "learning_rate": 0.0004774263723306599, "loss": 0.8923, "step": 2730 }, { "epoch": 0.42, "learning_rate": 0.0004772576821793674, "loss": 0.8999, "step": 2740 }, { "epoch": 0.42, "learning_rate": 0.00047708839410037914, "loss": 0.8344, "step": 2750 }, { "epoch": 0.43, "learning_rate": 0.00047691850853910146, "loss": 0.8274, "step": 2760 }, { "epoch": 0.43, "learning_rate": 0.0004767480259425128, "loss": 0.7697, "step": 2770 }, { "epoch": 0.43, "learning_rate": 0.00047657694675916254, "loss": 0.8455, "step": 2780 }, { "epoch": 0.43, "learning_rate": 0.00047640527143916943, "loss": 0.8216, "step": 2790 }, { "epoch": 0.43, "learning_rate": 0.0004762330004342209, "loss": 0.8509, "step": 2800 }, { "epoch": 0.43, "eval_bleu": 0.11026373004464625, "eval_loss": 0.7961094379425049, "eval_meteor": 0.17107804416084108, "eval_rouge1": 0.34419235974247625, "eval_rouge2": 0.1879446304753386, "eval_rougeL": 0.2879382324644244, "eval_rougeLsum": 0.28804849261741966, "eval_runtime": 1114.3068, "eval_samples_per_second": 1.308, "eval_steps_per_second": 0.218, "step": 2800 }, { "epoch": 0.43, "learning_rate": 0.0004760601341975718, "loss": 0.832, "step": 2810 }, { "epoch": 0.43, "learning_rate": 0.0004758866731840426, "loss": 0.8718, "step": 2820 }, { "epoch": 0.44, "learning_rate": 0.00047571261785001913, "loss": 0.8075, "step": 2830 }, { "epoch": 0.44, "learning_rate": 0.0004755379686534507, "loss": 0.8044, "step": 2840 }, { "epoch": 0.44, "learning_rate": 0.00047536272605384905, "loss": 0.8582, "step": 2850 }, { "epoch": 0.44, "learning_rate": 0.00047518689051228734, "loss": 0.7933, "step": 2860 }, { "epoch": 0.44, "learning_rate": 0.00047501046249139885, "loss": 0.8387, "step": 2870 }, { "epoch": 0.44, "learning_rate": 0.0004748334424553754, "loss": 0.8743, "step": 2880 }, { "epoch": 0.45, "learning_rate": 0.0004746558308699667, "loss": 0.8453, "step": 2890 }, { "epoch": 0.45, "learning_rate": 0.00047447762820247876, "loss": 0.7834, "step": 2900 }, { "epoch": 0.45, "eval_bleu": 0.10144013679758371, "eval_loss": 0.7860347628593445, "eval_meteor": 0.16863044336346464, "eval_rouge1": 0.3483945653742756, "eval_rouge2": 0.1906512981948328, "eval_rougeL": 0.2928307931237276, "eval_rougeLsum": 0.292886082360032, "eval_runtime": 1008.318, "eval_samples_per_second": 1.446, "eval_steps_per_second": 0.241, "step": 2900 }, { "epoch": 0.45, "learning_rate": 0.00047429883492177284, "loss": 0.8794, "step": 2910 }, { "epoch": 0.45, "learning_rate": 0.00047411945149826397, "loss": 0.8534, "step": 2920 }, { "epoch": 0.45, "learning_rate": 0.00047393947840392015, "loss": 0.8286, "step": 2930 }, { "epoch": 0.45, "learning_rate": 0.0004737589161122605, "loss": 0.9363, "step": 2940 }, { "epoch": 0.45, "learning_rate": 0.0004735777650983547, "loss": 0.8218, "step": 2950 }, { "epoch": 0.46, "learning_rate": 0.00047339602583882105, "loss": 0.7604, "step": 2960 }, { "epoch": 0.46, "learning_rate": 0.00047321369881182584, "loss": 0.9392, "step": 2970 }, { "epoch": 0.46, "learning_rate": 0.0004730307844970817, "loss": 0.8586, "step": 2980 }, { "epoch": 0.46, "learning_rate": 0.00047284728337584637, "loss": 0.8175, "step": 2990 }, { "epoch": 0.46, "learning_rate": 0.00047266319593092167, "loss": 0.8156, "step": 3000 }, { "epoch": 0.46, "eval_bleu": 0.11205014855700891, "eval_loss": 0.7846682071685791, "eval_meteor": 0.1751311712892055, "eval_rouge1": 0.35172070697143076, "eval_rouge2": 0.1931095449214266, "eval_rougeL": 0.29234436972165456, "eval_rougeLsum": 0.29205838030588194, "eval_runtime": 1104.847, "eval_samples_per_second": 1.32, "eval_steps_per_second": 0.22, "step": 3000 }, { "epoch": 0.46, "learning_rate": 0.00047247852264665184, "loss": 0.821, "step": 3010 }, { "epoch": 0.47, "learning_rate": 0.0004722932640089228, "loss": 0.8089, "step": 3020 }, { "epoch": 0.47, "learning_rate": 0.0004721074205051603, "loss": 0.8205, "step": 3030 }, { "epoch": 0.47, "learning_rate": 0.000471920992624329, "loss": 0.8564, "step": 3040 }, { "epoch": 0.47, "learning_rate": 0.0004717339808569312, "loss": 0.8746, "step": 3050 }, { "epoch": 0.47, "learning_rate": 0.00047154638569500527, "loss": 0.7676, "step": 3060 }, { "epoch": 0.47, "learning_rate": 0.00047135820763212466, "loss": 0.7534, "step": 3070 }, { "epoch": 0.47, "learning_rate": 0.0004711694471633963, "loss": 0.8063, "step": 3080 }, { "epoch": 0.48, "learning_rate": 0.0004709801047854596, "loss": 0.8258, "step": 3090 }, { "epoch": 0.48, "learning_rate": 0.00047079018099648495, "loss": 0.8006, "step": 3100 }, { "epoch": 0.48, "eval_bleu": 0.10544844410608596, "eval_loss": 0.779005765914917, "eval_meteor": 0.16928172352068147, "eval_rouge1": 0.34521811226055105, "eval_rouge2": 0.188414735386506, "eval_rougeL": 0.2883152848683356, "eval_rougeLsum": 0.28845113954285684, "eval_runtime": 1093.1651, "eval_samples_per_second": 1.334, "eval_steps_per_second": 0.222, "step": 3100 }, { "epoch": 0.48, "learning_rate": 0.00047059967629617253, "loss": 0.8696, "step": 3110 }, { "epoch": 0.48, "learning_rate": 0.00047040859118575087, "loss": 0.8342, "step": 3120 }, { "epoch": 0.48, "learning_rate": 0.0004702169261679755, "loss": 0.8407, "step": 3130 }, { "epoch": 0.48, "learning_rate": 0.000470024681747128, "loss": 0.8277, "step": 3140 }, { "epoch": 0.49, "learning_rate": 0.0004698318584290141, "loss": 0.861, "step": 3150 }, { "epoch": 0.49, "learning_rate": 0.0004696384567209628, "loss": 0.8629, "step": 3160 }, { "epoch": 0.49, "learning_rate": 0.00046944447713182473, "loss": 0.7462, "step": 3170 }, { "epoch": 0.49, "learning_rate": 0.0004692499201719712, "loss": 0.8569, "step": 3180 }, { "epoch": 0.49, "learning_rate": 0.0004690547863532924, "loss": 0.7479, "step": 3190 }, { "epoch": 0.49, "learning_rate": 0.0004688590761891963, "loss": 0.7449, "step": 3200 }, { "epoch": 0.49, "eval_bleu": 0.11320699643880565, "eval_loss": 0.7808765769004822, "eval_meteor": 0.17517461553603783, "eval_rouge1": 0.34801418474464885, "eval_rouge2": 0.1926526264127003, "eval_rougeL": 0.2935871923613115, "eval_rougeLsum": 0.29356108231725353, "eval_runtime": 1147.8505, "eval_samples_per_second": 1.27, "eval_steps_per_second": 0.212, "step": 3200 }, { "epoch": 0.49, "learning_rate": 0.0004686627901946074, "loss": 0.8372, "step": 3210 }, { "epoch": 0.5, "learning_rate": 0.00046846592888596505, "loss": 0.8033, "step": 3220 }, { "epoch": 0.5, "learning_rate": 0.0004682684927812225, "loss": 0.8315, "step": 3230 }, { "epoch": 0.5, "learning_rate": 0.0004680704823998452, "loss": 0.7874, "step": 3240 }, { "epoch": 0.5, "learning_rate": 0.0004678718982628094, "loss": 0.8616, "step": 3250 }, { "epoch": 0.5, "learning_rate": 0.0004676727408926012, "loss": 0.8039, "step": 3260 }, { "epoch": 0.5, "learning_rate": 0.0004674730108132148, "loss": 0.8342, "step": 3270 }, { "epoch": 0.51, "learning_rate": 0.00046727270855015124, "loss": 0.8528, "step": 3280 }, { "epoch": 0.51, "learning_rate": 0.000467071834630417, "loss": 0.8195, "step": 3290 }, { "epoch": 0.51, "learning_rate": 0.0004668703895825226, "loss": 0.8123, "step": 3300 }, { "epoch": 0.51, "eval_bleu": 0.12139040928235188, "eval_loss": 0.7802536487579346, "eval_meteor": 0.18649658941096503, "eval_rouge1": 0.35447872684757437, "eval_rouge2": 0.19104999366552095, "eval_rougeL": 0.29335305776164255, "eval_rougeLsum": 0.29324750486448675, "eval_runtime": 1188.3453, "eval_samples_per_second": 1.227, "eval_steps_per_second": 0.204, "step": 3300 }, { "epoch": 0.51, "learning_rate": 0.0004666683739364812, "loss": 0.8181, "step": 3310 }, { "epoch": 0.51, "learning_rate": 0.0004664657882238074, "loss": 0.8842, "step": 3320 }, { "epoch": 0.51, "learning_rate": 0.00046626263297751546, "loss": 0.8528, "step": 3330 }, { "epoch": 0.51, "learning_rate": 0.0004660589087321183, "loss": 0.7764, "step": 3340 }, { "epoch": 0.52, "learning_rate": 0.0004658546160236257, "loss": 0.8313, "step": 3350 }, { "epoch": 0.52, "learning_rate": 0.00046564975538954334, "loss": 0.8438, "step": 3360 }, { "epoch": 0.52, "learning_rate": 0.00046544432736887097, "loss": 0.8519, "step": 3370 }, { "epoch": 0.52, "learning_rate": 0.00046523833250210135, "loss": 0.809, "step": 3380 }, { "epoch": 0.52, "learning_rate": 0.0004650317713312183, "loss": 0.8335, "step": 3390 }, { "epoch": 0.52, "learning_rate": 0.00046482464439969595, "loss": 0.8428, "step": 3400 }, { "epoch": 0.52, "eval_bleu": 0.1250886172570181, "eval_loss": 0.7814038991928101, "eval_meteor": 0.19055488234703907, "eval_rouge1": 0.3612439239393179, "eval_rouge2": 0.1958006760878207, "eval_rougeL": 0.2964134143411231, "eval_rougeLsum": 0.296116576418878, "eval_runtime": 1216.6365, "eval_samples_per_second": 1.198, "eval_steps_per_second": 0.2, "step": 3400 }, { "epoch": 0.53, "learning_rate": 0.0004646169522524969, "loss": 0.8177, "step": 3410 }, { "epoch": 0.53, "learning_rate": 0.0004644086954360708, "loss": 0.8482, "step": 3420 }, { "epoch": 0.53, "learning_rate": 0.0004641998744983529, "loss": 0.86, "step": 3430 }, { "epoch": 0.53, "learning_rate": 0.0004639904899887629, "loss": 0.8508, "step": 3440 }, { "epoch": 0.53, "learning_rate": 0.0004637805424582032, "loss": 0.9085, "step": 3450 }, { "epoch": 0.53, "learning_rate": 0.0004635700324590574, "loss": 0.8397, "step": 3460 }, { "epoch": 0.53, "learning_rate": 0.0004633589605451892, "loss": 0.8734, "step": 3470 }, { "epoch": 0.54, "learning_rate": 0.00046314732727194063, "loss": 0.7649, "step": 3480 }, { "epoch": 0.54, "learning_rate": 0.00046293513319613065, "loss": 0.8471, "step": 3490 }, { "epoch": 0.54, "learning_rate": 0.00046272237887605384, "loss": 0.832, "step": 3500 }, { "epoch": 0.54, "eval_bleu": 0.10486572244619506, "eval_loss": 0.7807884812355042, "eval_meteor": 0.16818865401790847, "eval_rouge1": 0.34705793027937726, "eval_rouge2": 0.19113964280370677, "eval_rougeL": 0.29299029802431953, "eval_rougeLsum": 0.29317886290919454, "eval_runtime": 1105.6756, "eval_samples_per_second": 1.319, "eval_steps_per_second": 0.22, "step": 3500 }, { "epoch": 0.54, "learning_rate": 0.0004625090648714786, "loss": 0.8337, "step": 3510 }, { "epoch": 0.54, "learning_rate": 0.00046229519174364607, "loss": 0.8794, "step": 3520 }, { "epoch": 0.54, "learning_rate": 0.0004620807600552686, "loss": 0.7694, "step": 3530 }, { "epoch": 0.55, "learning_rate": 0.0004618657703705277, "loss": 0.8027, "step": 3540 }, { "epoch": 0.55, "learning_rate": 0.0004616502232550734, "loss": 0.8519, "step": 3550 }, { "epoch": 0.55, "learning_rate": 0.0004614341192760224, "loss": 0.8001, "step": 3560 }, { "epoch": 0.55, "learning_rate": 0.0004612174590019562, "loss": 0.8368, "step": 3570 }, { "epoch": 0.55, "learning_rate": 0.0004610002430029201, "loss": 0.8555, "step": 3580 }, { "epoch": 0.55, "learning_rate": 0.00046078247185042177, "loss": 0.7932, "step": 3590 }, { "epoch": 0.55, "learning_rate": 0.00046056414611742903, "loss": 0.7795, "step": 3600 }, { "epoch": 0.55, "eval_bleu": 0.12987137116221253, "eval_loss": 0.7785532474517822, "eval_meteor": 0.1977638908170833, "eval_rouge1": 0.3563200854587399, "eval_rouge2": 0.19216790450914428, "eval_rougeL": 0.2927497382733434, "eval_rougeLsum": 0.2929236969907393, "eval_runtime": 1319.027, "eval_samples_per_second": 1.105, "eval_steps_per_second": 0.184, "step": 3600 }, { "epoch": 0.56, "learning_rate": 0.00046034526637836926, "loss": 0.7853, "step": 3610 }, { "epoch": 0.56, "learning_rate": 0.0004601258332091274, "loss": 0.7442, "step": 3620 }, { "epoch": 0.56, "learning_rate": 0.0004599058471870443, "loss": 0.8214, "step": 3630 }, { "epoch": 0.56, "learning_rate": 0.00045968530889091555, "loss": 0.7751, "step": 3640 }, { "epoch": 0.56, "learning_rate": 0.00045946421890098965, "loss": 0.8645, "step": 3650 }, { "epoch": 0.56, "learning_rate": 0.00045924257779896693, "loss": 0.8341, "step": 3660 }, { "epoch": 0.57, "learning_rate": 0.00045902038616799746, "loss": 0.8099, "step": 3670 }, { "epoch": 0.57, "learning_rate": 0.0004587976445926799, "loss": 0.8532, "step": 3680 }, { "epoch": 0.57, "learning_rate": 0.0004585743536590599, "loss": 0.851, "step": 3690 }, { "epoch": 0.57, "learning_rate": 0.0004583505139546281, "loss": 0.8155, "step": 3700 }, { "epoch": 0.57, "eval_bleu": 0.1275949150703291, "eval_loss": 0.7744527459144592, "eval_meteor": 0.19542313704697203, "eval_rouge1": 0.36221871637002456, "eval_rouge2": 0.19723981570527915, "eval_rougeL": 0.29817828224087256, "eval_rougeLsum": 0.29819760162358966, "eval_runtime": 1204.787, "eval_samples_per_second": 1.21, "eval_steps_per_second": 0.202, "step": 3700 }, { "epoch": 0.57, "learning_rate": 0.00045812612606831974, "loss": 0.7528, "step": 3710 }, { "epoch": 0.57, "learning_rate": 0.00045790119059051156, "loss": 0.8188, "step": 3720 }, { "epoch": 0.58, "learning_rate": 0.0004576757081130216, "loss": 0.8529, "step": 3730 }, { "epoch": 0.58, "learning_rate": 0.00045744967922910684, "loss": 0.7864, "step": 3740 }, { "epoch": 0.58, "learning_rate": 0.00045722310453346195, "loss": 0.78, "step": 3750 }, { "epoch": 0.58, "learning_rate": 0.00045699598462221766, "loss": 0.813, "step": 3760 }, { "epoch": 0.58, "learning_rate": 0.0004567683200929391, "loss": 0.8402, "step": 3770 }, { "epoch": 0.58, "learning_rate": 0.0004565401115446246, "loss": 0.8541, "step": 3780 }, { "epoch": 0.58, "learning_rate": 0.00045631135957770343, "loss": 0.7645, "step": 3790 }, { "epoch": 0.59, "learning_rate": 0.00045608206479403484, "loss": 0.8419, "step": 3800 }, { "epoch": 0.59, "eval_bleu": 0.11288874484370615, "eval_loss": 0.7737380862236023, "eval_meteor": 0.1795823568139638, "eval_rouge1": 0.3517171303500152, "eval_rouge2": 0.19480929623517923, "eval_rougeL": 0.29489583256807006, "eval_rougeLsum": 0.2948638738211926, "eval_runtime": 1083.0127, "eval_samples_per_second": 1.346, "eval_steps_per_second": 0.224, "step": 3800 }, { "epoch": 0.59, "learning_rate": 0.00045585222779690636, "loss": 0.7908, "step": 3810 }, { "epoch": 0.59, "learning_rate": 0.0004556218491910321, "loss": 0.7799, "step": 3820 }, { "epoch": 0.59, "learning_rate": 0.0004553909295825508, "loss": 0.7822, "step": 3830 }, { "epoch": 0.59, "learning_rate": 0.0004551594695790251, "loss": 0.817, "step": 3840 }, { "epoch": 0.59, "learning_rate": 0.0004549274697894392, "loss": 0.7824, "step": 3850 }, { "epoch": 0.6, "learning_rate": 0.00045469493082419757, "loss": 0.8274, "step": 3860 }, { "epoch": 0.6, "learning_rate": 0.0004544618532951231, "loss": 0.7928, "step": 3870 }, { "epoch": 0.6, "learning_rate": 0.00045422823781545596, "loss": 0.8542, "step": 3880 }, { "epoch": 0.6, "learning_rate": 0.0004539940849998516, "loss": 0.8367, "step": 3890 }, { "epoch": 0.6, "learning_rate": 0.00045375939546437916, "loss": 0.8581, "step": 3900 }, { "epoch": 0.6, "eval_bleu": 0.11781251984515774, "eval_loss": 0.777377724647522, "eval_meteor": 0.1829209829854384, "eval_rouge1": 0.35563054870017097, "eval_rouge2": 0.195963399617126, "eval_rougeL": 0.2979095627621663, "eval_rougeLsum": 0.2980344526869577, "eval_runtime": 1135.666, "eval_samples_per_second": 1.284, "eval_steps_per_second": 0.214, "step": 3900 }, { "epoch": 0.6, "learning_rate": 0.0004535241698265199, "loss": 0.8475, "step": 3910 }, { "epoch": 0.6, "learning_rate": 0.0004532884087051657, "loss": 0.8985, "step": 3920 }, { "epoch": 0.61, "learning_rate": 0.0004530521127206173, "loss": 0.8487, "step": 3930 }, { "epoch": 0.61, "learning_rate": 0.0004528152824945827, "loss": 0.7998, "step": 3940 }, { "epoch": 0.61, "learning_rate": 0.00045257791865017537, "loss": 0.7846, "step": 3950 }, { "epoch": 0.61, "learning_rate": 0.00045234002181191303, "loss": 0.7838, "step": 3960 }, { "epoch": 0.61, "learning_rate": 0.00045210159260571553, "loss": 0.8362, "step": 3970 }, { "epoch": 0.61, "learning_rate": 0.00045186263165890344, "loss": 0.8134, "step": 3980 }, { "epoch": 0.62, "learning_rate": 0.0004516231396001965, "loss": 0.7644, "step": 3990 }, { "epoch": 0.62, "learning_rate": 0.00045138311705971156, "loss": 0.8646, "step": 4000 }, { "epoch": 0.62, "eval_bleu": 0.13005061015510616, "eval_loss": 0.7740051746368408, "eval_meteor": 0.19733944533403236, "eval_rouge1": 0.3588018183491992, "eval_rouge2": 0.192617974264134, "eval_rougeL": 0.2937254663710055, "eval_rougeLsum": 0.2938043972565847, "eval_runtime": 1241.046, "eval_samples_per_second": 1.175, "eval_steps_per_second": 0.196, "step": 4000 }, { "epoch": 0.62, "learning_rate": 0.0004511425646689615, "loss": 0.7807, "step": 4010 }, { "epoch": 0.62, "learning_rate": 0.0004509014830608532, "loss": 0.8442, "step": 4020 }, { "epoch": 0.62, "learning_rate": 0.0004506598728696858, "loss": 0.8019, "step": 4030 }, { "epoch": 0.62, "learning_rate": 0.0004504177347311492, "loss": 0.7976, "step": 4040 }, { "epoch": 0.62, "learning_rate": 0.0004501750692823224, "loss": 0.9046, "step": 4050 }, { "epoch": 0.63, "learning_rate": 0.00044993187716167195, "loss": 0.7559, "step": 4060 }, { "epoch": 0.63, "learning_rate": 0.0004496881590090498, "loss": 0.8358, "step": 4070 }, { "epoch": 0.63, "learning_rate": 0.00044944391546569213, "loss": 0.791, "step": 4080 }, { "epoch": 0.63, "learning_rate": 0.00044919914717421737, "loss": 0.8007, "step": 4090 }, { "epoch": 0.63, "learning_rate": 0.0004489538547786246, "loss": 0.7515, "step": 4100 }, { "epoch": 0.63, "eval_bleu": 0.10412061373178255, "eval_loss": 0.7685180902481079, "eval_meteor": 0.1663022168419246, "eval_rouge1": 0.34510339415285696, "eval_rouge2": 0.19279130187913826, "eval_rougeL": 0.2909396669204617, "eval_rougeLsum": 0.29102359815063095, "eval_runtime": 1022.5977, "eval_samples_per_second": 1.426, "eval_steps_per_second": 0.238, "step": 4100 }, { "epoch": 0.63, "learning_rate": 0.00044870803892429193, "loss": 0.8091, "step": 4110 }, { "epoch": 0.64, "learning_rate": 0.0004484617002579745, "loss": 0.827, "step": 4120 }, { "epoch": 0.64, "learning_rate": 0.0004482148394278033, "loss": 0.8435, "step": 4130 }, { "epoch": 0.64, "learning_rate": 0.00044796745708328297, "loss": 0.7423, "step": 4140 }, { "epoch": 0.64, "learning_rate": 0.0004477195538752902, "loss": 0.8248, "step": 4150 }, { "epoch": 0.64, "learning_rate": 0.00044747113045607234, "loss": 0.8593, "step": 4160 }, { "epoch": 0.64, "learning_rate": 0.0004472221874792454, "loss": 0.8262, "step": 4170 }, { "epoch": 0.64, "learning_rate": 0.00044697272559979207, "loss": 0.7762, "step": 4180 }, { "epoch": 0.65, "learning_rate": 0.00044672274547406067, "loss": 0.8237, "step": 4190 }, { "epoch": 0.65, "learning_rate": 0.0004464722477597629, "loss": 0.8205, "step": 4200 }, { "epoch": 0.65, "eval_bleu": 0.1236377868366298, "eval_loss": 0.769066572189331, "eval_meteor": 0.1904404203843731, "eval_rouge1": 0.36051125596648215, "eval_rouge2": 0.19601074427606005, "eval_rougeL": 0.2983201969348075, "eval_rougeLsum": 0.2983845195227759, "eval_runtime": 1142.7885, "eval_samples_per_second": 1.276, "eval_steps_per_second": 0.213, "step": 4200 }, { "epoch": 0.65, "learning_rate": 0.0004462212331159724, "loss": 0.8109, "step": 4210 }, { "epoch": 0.65, "learning_rate": 0.0004459697022031225, "loss": 0.7642, "step": 4220 }, { "epoch": 0.65, "learning_rate": 0.0004457176556830054, "loss": 0.7603, "step": 4230 }, { "epoch": 0.65, "learning_rate": 0.0004454650942187695, "loss": 0.8168, "step": 4240 }, { "epoch": 0.66, "learning_rate": 0.0004452120184749181, "loss": 0.8137, "step": 4250 }, { "epoch": 0.66, "learning_rate": 0.00044495842911730773, "loss": 0.8485, "step": 4260 }, { "epoch": 0.66, "learning_rate": 0.0004447043268131462, "loss": 0.8846, "step": 4270 }, { "epoch": 0.66, "learning_rate": 0.0004444497122309909, "loss": 0.7891, "step": 4280 }, { "epoch": 0.66, "learning_rate": 0.0004441945860407471, "loss": 0.8096, "step": 4290 }, { "epoch": 0.66, "learning_rate": 0.000443938948913666, "loss": 0.7932, "step": 4300 }, { "epoch": 0.66, "eval_bleu": 0.11741021582498118, "eval_loss": 0.7680177688598633, "eval_meteor": 0.18724966148417066, "eval_rouge1": 0.3538289045097152, "eval_rouge2": 0.1926089993689462, "eval_rougeL": 0.2952244077253912, "eval_rougeLsum": 0.2950938047080252, "eval_runtime": 1113.7012, "eval_samples_per_second": 1.309, "eval_steps_per_second": 0.218, "step": 4300 }, { "epoch": 0.66, "learning_rate": 0.00044368280152234333, "loss": 0.7672, "step": 4310 }, { "epoch": 0.67, "learning_rate": 0.00044342614454071714, "loss": 0.7621, "step": 4320 }, { "epoch": 0.67, "learning_rate": 0.0004431689786440664, "loss": 0.8101, "step": 4330 }, { "epoch": 0.67, "learning_rate": 0.000442911304509009, "loss": 0.8431, "step": 4340 }, { "epoch": 0.67, "learning_rate": 0.0004426531228134999, "loss": 0.8133, "step": 4350 }, { "epoch": 0.67, "learning_rate": 0.0004423944342368297, "loss": 0.8458, "step": 4360 }, { "epoch": 0.67, "learning_rate": 0.0004421352394596225, "loss": 0.8306, "step": 4370 }, { "epoch": 0.68, "learning_rate": 0.00044187553916383445, "loss": 0.8032, "step": 4380 }, { "epoch": 0.68, "learning_rate": 0.00044161533403275135, "loss": 0.8051, "step": 4390 }, { "epoch": 0.68, "learning_rate": 0.0004413546247509875, "loss": 0.8578, "step": 4400 }, { "epoch": 0.68, "eval_bleu": 0.12595726943541374, "eval_loss": 0.7692683339118958, "eval_meteor": 0.19222266255963855, "eval_rouge1": 0.3581310742460724, "eval_rouge2": 0.19531037225008183, "eval_rougeL": 0.2956186541319774, "eval_rougeLsum": 0.2956367500630852, "eval_runtime": 1160.1163, "eval_samples_per_second": 1.257, "eval_steps_per_second": 0.209, "step": 4400 }, { "epoch": 0.68, "learning_rate": 0.00044109341200448385, "loss": 0.7249, "step": 4410 }, { "epoch": 0.68, "learning_rate": 0.0004408316964805056, "loss": 0.8155, "step": 4420 }, { "epoch": 0.68, "learning_rate": 0.000440569478867641, "loss": 0.8433, "step": 4430 }, { "epoch": 0.68, "learning_rate": 0.00044030675985579917, "loss": 0.7484, "step": 4440 }, { "epoch": 0.69, "learning_rate": 0.00044004354013620875, "loss": 0.8086, "step": 4450 }, { "epoch": 0.69, "learning_rate": 0.0004397798204014154, "loss": 0.8796, "step": 4460 }, { "epoch": 0.69, "learning_rate": 0.00043951560134528056, "loss": 0.8485, "step": 4470 }, { "epoch": 0.69, "learning_rate": 0.0004392508836629795, "loss": 0.7362, "step": 4480 }, { "epoch": 0.69, "learning_rate": 0.0004389856680509991, "loss": 0.8347, "step": 4490 }, { "epoch": 0.69, "learning_rate": 0.0004387199552071366, "loss": 0.8119, "step": 4500 }, { "epoch": 0.69, "eval_bleu": 0.12373981502065873, "eval_loss": 0.7634089589118958, "eval_meteor": 0.18792913443871737, "eval_rouge1": 0.3586570378567951, "eval_rouge2": 0.1957026657950927, "eval_rougeL": 0.29818979034251414, "eval_rougeLsum": 0.2982401703305406, "eval_runtime": 1151.9743, "eval_samples_per_second": 1.266, "eval_steps_per_second": 0.211, "step": 4500 }, { "epoch": 0.7, "learning_rate": 0.00043845374583049735, "loss": 0.7577, "step": 4510 }, { "epoch": 0.7, "learning_rate": 0.0004381870406214932, "loss": 0.7928, "step": 4520 }, { "epoch": 0.7, "learning_rate": 0.0004379198402818403, "loss": 0.7664, "step": 4530 }, { "epoch": 0.7, "learning_rate": 0.00043765214551455794, "loss": 0.7189, "step": 4540 }, { "epoch": 0.7, "learning_rate": 0.00043738395702396594, "loss": 0.8276, "step": 4550 }, { "epoch": 0.7, "learning_rate": 0.0004371152755156833, "loss": 0.7872, "step": 4560 }, { "epoch": 0.7, "learning_rate": 0.00043684610169662607, "loss": 0.8111, "step": 4570 }, { "epoch": 0.71, "learning_rate": 0.00043657643627500575, "loss": 0.8056, "step": 4580 }, { "epoch": 0.71, "learning_rate": 0.0004363062799603271, "loss": 0.7623, "step": 4590 }, { "epoch": 0.71, "learning_rate": 0.00043603563346338644, "loss": 0.8661, "step": 4600 }, { "epoch": 0.71, "eval_bleu": 0.11089234547528978, "eval_loss": 0.7632281184196472, "eval_meteor": 0.17341941602705138, "eval_rouge1": 0.34942191982099435, "eval_rouge2": 0.19591049653677217, "eval_rougeL": 0.29526297170998683, "eval_rougeLsum": 0.2952619744332252, "eval_runtime": 1071.5418, "eval_samples_per_second": 1.361, "eval_steps_per_second": 0.227, "step": 4600 }, { "epoch": 0.71, "learning_rate": 0.00043576449749627, "loss": 0.7433, "step": 4610 }, { "epoch": 0.71, "learning_rate": 0.0004354928727723516, "loss": 0.7855, "step": 4620 }, { "epoch": 0.71, "learning_rate": 0.00043522076000629124, "loss": 0.7527, "step": 4630 }, { "epoch": 0.72, "learning_rate": 0.00043494815991403275, "loss": 0.8015, "step": 4640 }, { "epoch": 0.72, "learning_rate": 0.0004346750732128023, "loss": 0.7345, "step": 4650 }, { "epoch": 0.72, "learning_rate": 0.0004344015006211062, "loss": 0.7952, "step": 4660 }, { "epoch": 0.72, "learning_rate": 0.0004341274428587294, "loss": 0.8057, "step": 4670 }, { "epoch": 0.72, "learning_rate": 0.00043385290064673317, "loss": 0.8136, "step": 4680 }, { "epoch": 0.72, "learning_rate": 0.0004335778747074535, "loss": 0.8069, "step": 4690 }, { "epoch": 0.72, "learning_rate": 0.00043330236576449887, "loss": 0.8397, "step": 4700 }, { "epoch": 0.72, "eval_bleu": 0.11830285193176951, "eval_loss": 0.7616626620292664, "eval_meteor": 0.1821488344842372, "eval_rouge1": 0.3558580945132578, "eval_rouge2": 0.197915707595695, "eval_rougeL": 0.2981723775850291, "eval_rougeLsum": 0.29833413809671927, "eval_runtime": 1132.5137, "eval_samples_per_second": 1.287, "eval_steps_per_second": 0.215, "step": 4700 }, { "epoch": 0.73, "learning_rate": 0.000433026374542749, "loss": 0.7386, "step": 4710 }, { "epoch": 0.73, "learning_rate": 0.00043274990176835217, "loss": 0.7961, "step": 4720 }, { "epoch": 0.73, "learning_rate": 0.00043247294816872365, "loss": 0.8104, "step": 4730 }, { "epoch": 0.73, "learning_rate": 0.0004321955144725439, "loss": 0.8091, "step": 4740 }, { "epoch": 0.73, "learning_rate": 0.00043191760140975666, "loss": 0.7693, "step": 4750 }, { "epoch": 0.73, "learning_rate": 0.0004316392097115666, "loss": 0.8092, "step": 4760 }, { "epoch": 0.74, "learning_rate": 0.000431360340110438, "loss": 0.8053, "step": 4770 }, { "epoch": 0.74, "learning_rate": 0.00043108099334009234, "loss": 0.7646, "step": 4780 }, { "epoch": 0.74, "learning_rate": 0.0004308011701355066, "loss": 0.8395, "step": 4790 }, { "epoch": 0.74, "learning_rate": 0.0004305208712329114, "loss": 0.7852, "step": 4800 }, { "epoch": 0.74, "eval_bleu": 0.12483561089578614, "eval_loss": 0.7647390365600586, "eval_meteor": 0.19032935944350426, "eval_rouge1": 0.35835454697825203, "eval_rouge2": 0.19542840978745862, "eval_rougeL": 0.29801385574610495, "eval_rougeLsum": 0.29793876372769, "eval_runtime": 1120.9038, "eval_samples_per_second": 1.301, "eval_steps_per_second": 0.217, "step": 4800 }, { "epoch": 0.74, "learning_rate": 0.0004302400973697888, "loss": 0.7485, "step": 4810 }, { "epoch": 0.74, "learning_rate": 0.00042995884928487054, "loss": 0.7812, "step": 4820 }, { "epoch": 0.74, "learning_rate": 0.00042967712771813614, "loss": 0.7857, "step": 4830 }, { "epoch": 0.75, "learning_rate": 0.00042939493341081087, "loss": 0.8019, "step": 4840 }, { "epoch": 0.75, "learning_rate": 0.00042911226710536365, "loss": 0.8257, "step": 4850 }, { "epoch": 0.75, "learning_rate": 0.00042882912954550544, "loss": 0.7601, "step": 4860 }, { "epoch": 0.75, "learning_rate": 0.00042854552147618706, "loss": 0.7856, "step": 4870 }, { "epoch": 0.75, "learning_rate": 0.0004282614436435972, "loss": 0.8138, "step": 4880 }, { "epoch": 0.75, "learning_rate": 0.0004279768967951605, "loss": 0.7765, "step": 4890 }, { "epoch": 0.76, "learning_rate": 0.00042769188167953565, "loss": 0.767, "step": 4900 }, { "epoch": 0.76, "eval_bleu": 0.1302305276945029, "eval_loss": 0.7597366571426392, "eval_meteor": 0.19752698525972517, "eval_rouge1": 0.36296173703809864, "eval_rouge2": 0.19816465507239917, "eval_rougeL": 0.3000226808734052, "eval_rougeLsum": 0.3000833989034842, "eval_runtime": 1189.0731, "eval_samples_per_second": 1.226, "eval_steps_per_second": 0.204, "step": 4900 }, { "epoch": 0.76, "learning_rate": 0.0004274063990466135, "loss": 0.8156, "step": 4910 }, { "epoch": 0.76, "learning_rate": 0.0004271204496475148, "loss": 0.7648, "step": 4920 }, { "epoch": 0.76, "learning_rate": 0.00042683403423458843, "loss": 0.7364, "step": 4930 }, { "epoch": 0.76, "learning_rate": 0.00042654715356140946, "loss": 0.8329, "step": 4940 }, { "epoch": 0.76, "learning_rate": 0.0004262598083827769, "loss": 0.8443, "step": 4950 }, { "epoch": 0.76, "learning_rate": 0.000425971999454712, "loss": 0.8809, "step": 4960 }, { "epoch": 0.77, "learning_rate": 0.0004256837275344564, "loss": 0.7959, "step": 4970 }, { "epoch": 0.77, "learning_rate": 0.0004253949933804694, "loss": 0.82, "step": 4980 }, { "epoch": 0.77, "learning_rate": 0.00042510579775242684, "loss": 0.8249, "step": 4990 }, { "epoch": 0.77, "learning_rate": 0.00042481614141121873, "loss": 0.8284, "step": 5000 }, { "epoch": 0.77, "eval_bleu": 0.13696974043564947, "eval_loss": 0.7628008127212524, "eval_meteor": 0.20833444182082805, "eval_rouge1": 0.367375191425503, "eval_rouge2": 0.1978131466130248, "eval_rougeL": 0.29990090210288556, "eval_rougeLsum": 0.3001498394981842, "eval_runtime": 1199.5655, "eval_samples_per_second": 1.215, "eval_steps_per_second": 0.203, "step": 5000 }, { "epoch": 0.77, "learning_rate": 0.000424526025118947, "loss": 0.7842, "step": 5010 }, { "epoch": 0.77, "learning_rate": 0.00042423544963892393, "loss": 0.8718, "step": 5020 }, { "epoch": 0.78, "learning_rate": 0.0004239444157356699, "loss": 0.8612, "step": 5030 }, { "epoch": 0.78, "learning_rate": 0.00042365292417491135, "loss": 0.7878, "step": 5040 }, { "epoch": 0.78, "learning_rate": 0.000423360975723579, "loss": 0.8274, "step": 5050 }, { "epoch": 0.78, "learning_rate": 0.0004230685711498055, "loss": 0.8017, "step": 5060 }, { "epoch": 0.78, "learning_rate": 0.0004227757112229237, "loss": 0.8154, "step": 5070 }, { "epoch": 0.78, "learning_rate": 0.00042248239671346455, "loss": 0.7849, "step": 5080 }, { "epoch": 0.78, "learning_rate": 0.0004221886283931549, "loss": 0.8234, "step": 5090 }, { "epoch": 0.79, "learning_rate": 0.00042189440703491556, "loss": 0.7984, "step": 5100 }, { "epoch": 0.79, "eval_bleu": 0.11530197031936106, "eval_loss": 0.755507230758667, "eval_meteor": 0.1806513827098456, "eval_rouge1": 0.3555621488323981, "eval_rouge2": 0.19846639016470374, "eval_rougeL": 0.29831273382603013, "eval_rougeLsum": 0.2980805463936066, "eval_runtime": 1059.4931, "eval_samples_per_second": 1.376, "eval_steps_per_second": 0.229, "step": 5100 }, { "epoch": 0.79, "learning_rate": 0.0004215997334128595, "loss": 0.8037, "step": 5110 }, { "epoch": 0.79, "learning_rate": 0.0004213046083022896, "loss": 0.7687, "step": 5120 }, { "epoch": 0.79, "learning_rate": 0.00042100903247969647, "loss": 0.7573, "step": 5130 }, { "epoch": 0.79, "learning_rate": 0.00042071300672275676, "loss": 0.8173, "step": 5140 }, { "epoch": 0.79, "learning_rate": 0.0004204165318103307, "loss": 0.8508, "step": 5150 }, { "epoch": 0.8, "learning_rate": 0.00042011960852246044, "loss": 0.8763, "step": 5160 }, { "epoch": 0.8, "learning_rate": 0.0004198222376403678, "loss": 0.8561, "step": 5170 }, { "epoch": 0.8, "learning_rate": 0.00041952441994645224, "loss": 0.8034, "step": 5180 }, { "epoch": 0.8, "learning_rate": 0.00041922615622428885, "loss": 0.7624, "step": 5190 }, { "epoch": 0.8, "learning_rate": 0.000418927447258626, "loss": 0.8129, "step": 5200 }, { "epoch": 0.8, "eval_bleu": 0.1280088834881739, "eval_loss": 0.7529436945915222, "eval_meteor": 0.19464046676396524, "eval_rouge1": 0.3620965211772262, "eval_rouge2": 0.19866324113960265, "eval_rougeL": 0.29920122666998356, "eval_rougeLsum": 0.2993664973235719, "eval_runtime": 1160.682, "eval_samples_per_second": 1.256, "eval_steps_per_second": 0.209, "step": 5200 }, { "epoch": 0.8, "learning_rate": 0.00041862829383538397, "loss": 0.8095, "step": 5210 }, { "epoch": 0.8, "learning_rate": 0.00041832869674165204, "loss": 0.7788, "step": 5220 }, { "epoch": 0.81, "learning_rate": 0.00041802865676568695, "loss": 0.8048, "step": 5230 }, { "epoch": 0.81, "learning_rate": 0.0004177281746969107, "loss": 0.8296, "step": 5240 }, { "epoch": 0.81, "learning_rate": 0.00041742725132590854, "loss": 0.7797, "step": 5250 }, { "epoch": 0.81, "learning_rate": 0.0004171258874444266, "loss": 0.8777, "step": 5260 }, { "epoch": 0.81, "learning_rate": 0.0004168240838453702, "loss": 0.7669, "step": 5270 }, { "epoch": 0.81, "learning_rate": 0.00041652184132280146, "loss": 0.831, "step": 5280 }, { "epoch": 0.82, "learning_rate": 0.00041621916067193746, "loss": 0.7852, "step": 5290 }, { "epoch": 0.82, "learning_rate": 0.00041591604268914796, "loss": 0.7811, "step": 5300 }, { "epoch": 0.82, "eval_bleu": 0.12320084852539886, "eval_loss": 0.7549387216567993, "eval_meteor": 0.18792325044373648, "eval_rouge1": 0.35864728570941573, "eval_rouge2": 0.19706396904795415, "eval_rougeL": 0.29758291424649863, "eval_rougeLsum": 0.29778392714680746, "eval_runtime": 1106.6121, "eval_samples_per_second": 1.318, "eval_steps_per_second": 0.22, "step": 5300 }, { "epoch": 0.82, "learning_rate": 0.0004156124881719533, "loss": 0.7769, "step": 5310 }, { "epoch": 0.82, "learning_rate": 0.0004153084979190224, "loss": 0.7397, "step": 5320 }, { "epoch": 0.82, "learning_rate": 0.00041500407273017075, "loss": 0.7779, "step": 5330 }, { "epoch": 0.82, "learning_rate": 0.0004146992134063581, "loss": 0.7955, "step": 5340 }, { "epoch": 0.82, "learning_rate": 0.00041439392074968617, "loss": 0.7659, "step": 5350 }, { "epoch": 0.83, "learning_rate": 0.00041408819556339735, "loss": 0.8533, "step": 5360 }, { "epoch": 0.83, "learning_rate": 0.00041378203865187154, "loss": 0.7967, "step": 5370 }, { "epoch": 0.83, "learning_rate": 0.00041347545082062476, "loss": 0.7941, "step": 5380 }, { "epoch": 0.83, "learning_rate": 0.0004131684328763069, "loss": 0.849, "step": 5390 }, { "epoch": 0.83, "learning_rate": 0.00041286098562669926, "loss": 0.836, "step": 5400 }, { "epoch": 0.83, "eval_bleu": 0.12588415215553295, "eval_loss": 0.75471031665802, "eval_meteor": 0.1968519512568269, "eval_rouge1": 0.36628943428680916, "eval_rouge2": 0.2000519092857415, "eval_rougeL": 0.30313942317590103, "eval_rougeLsum": 0.3031091247198662, "eval_runtime": 1092.5631, "eval_samples_per_second": 1.334, "eval_steps_per_second": 0.222, "step": 5400 }, { "epoch": 0.83, "learning_rate": 0.00041255310988071284, "loss": 0.7849, "step": 5410 }, { "epoch": 0.84, "learning_rate": 0.00041224480644838586, "loss": 0.7259, "step": 5420 }, { "epoch": 0.84, "learning_rate": 0.000411936076140882, "loss": 0.8354, "step": 5430 }, { "epoch": 0.84, "learning_rate": 0.0004116269197704881, "loss": 0.7819, "step": 5440 }, { "epoch": 0.84, "learning_rate": 0.0004113173381506117, "loss": 0.8633, "step": 5450 }, { "epoch": 0.84, "learning_rate": 0.0004110073320957795, "loss": 0.8141, "step": 5460 }, { "epoch": 0.84, "learning_rate": 0.0004106969024216348, "loss": 0.7929, "step": 5470 }, { "epoch": 0.84, "learning_rate": 0.0004103860499449355, "loss": 0.7972, "step": 5480 }, { "epoch": 0.85, "learning_rate": 0.0004100747754835518, "loss": 0.8356, "step": 5490 }, { "epoch": 0.85, "learning_rate": 0.0004097630798564643, "loss": 0.8168, "step": 5500 }, { "epoch": 0.85, "eval_bleu": 0.12183344025510169, "eval_loss": 0.7511031627655029, "eval_meteor": 0.18681450779014622, "eval_rouge1": 0.35671979001980275, "eval_rouge2": 0.1960218610645066, "eval_rougeL": 0.29562632322337584, "eval_rougeLsum": 0.2957310907035756, "eval_runtime": 1123.9062, "eval_samples_per_second": 1.297, "eval_steps_per_second": 0.216, "step": 5500 }, { "epoch": 0.85, "learning_rate": 0.0004094509638837617, "loss": 0.7949, "step": 5510 }, { "epoch": 0.85, "learning_rate": 0.0004091384283866385, "loss": 0.8108, "step": 5520 }, { "epoch": 0.85, "learning_rate": 0.00040882547418739316, "loss": 0.6972, "step": 5530 }, { "epoch": 0.85, "learning_rate": 0.00040851210210942577, "loss": 0.7515, "step": 5540 }, { "epoch": 0.86, "learning_rate": 0.00040819831297723573, "loss": 0.7821, "step": 5550 }, { "epoch": 0.86, "learning_rate": 0.0004078841076164199, "loss": 0.7728, "step": 5560 }, { "epoch": 0.86, "learning_rate": 0.0004075694868536701, "loss": 0.7493, "step": 5570 }, { "epoch": 0.86, "learning_rate": 0.00040725445151677136, "loss": 0.8138, "step": 5580 }, { "epoch": 0.86, "learning_rate": 0.0004069390024345991, "loss": 0.8215, "step": 5590 }, { "epoch": 0.86, "learning_rate": 0.0004066231404371177, "loss": 0.8057, "step": 5600 }, { "epoch": 0.86, "eval_bleu": 0.12330222084393866, "eval_loss": 0.7514679431915283, "eval_meteor": 0.19033581377995815, "eval_rouge1": 0.3581105465101981, "eval_rouge2": 0.19665944172196212, "eval_rougeL": 0.2981881930811607, "eval_rougeLsum": 0.2979884824891669, "eval_runtime": 1170.8391, "eval_samples_per_second": 1.245, "eval_steps_per_second": 0.208, "step": 5600 }, { "epoch": 0.86, "learning_rate": 0.00040630686635537773, "loss": 0.7275, "step": 5610 }, { "epoch": 0.87, "learning_rate": 0.000405990181021514, "loss": 0.7879, "step": 5620 }, { "epoch": 0.87, "learning_rate": 0.00040567308526874324, "loss": 0.7324, "step": 5630 }, { "epoch": 0.87, "learning_rate": 0.00040535557993136236, "loss": 0.7797, "step": 5640 }, { "epoch": 0.87, "learning_rate": 0.0004050376658447456, "loss": 0.7792, "step": 5650 }, { "epoch": 0.87, "learning_rate": 0.0004047193438453427, "loss": 0.7735, "step": 5660 }, { "epoch": 0.87, "learning_rate": 0.0004044006147706767, "loss": 0.7758, "step": 5670 }, { "epoch": 0.88, "learning_rate": 0.00040408147945934173, "loss": 0.8429, "step": 5680 }, { "epoch": 0.88, "learning_rate": 0.00040376193875100053, "loss": 0.7891, "step": 5690 }, { "epoch": 0.88, "learning_rate": 0.0004034419934863828, "loss": 0.8045, "step": 5700 }, { "epoch": 0.88, "eval_bleu": 0.12057087168942168, "eval_loss": 0.7541698217391968, "eval_meteor": 0.1864159375566591, "eval_rouge1": 0.3593783361406444, "eval_rouge2": 0.19683759603742187, "eval_rougeL": 0.29803496391685336, "eval_rougeLsum": 0.29788976506923015, "eval_runtime": 1127.7837, "eval_samples_per_second": 1.293, "eval_steps_per_second": 0.215, "step": 5700 }, { "epoch": 0.88, "learning_rate": 0.0004031216445072822, "loss": 0.7893, "step": 5710 }, { "epoch": 0.88, "learning_rate": 0.0004028008926565551, "loss": 0.8821, "step": 5720 }, { "epoch": 0.88, "learning_rate": 0.0004024797387781175, "loss": 0.8032, "step": 5730 }, { "epoch": 0.88, "learning_rate": 0.0004021581837169432, "loss": 0.7978, "step": 5740 }, { "epoch": 0.89, "learning_rate": 0.00040183622831906166, "loss": 0.8345, "step": 5750 }, { "epoch": 0.89, "learning_rate": 0.0004015138734315554, "loss": 0.7948, "step": 5760 }, { "epoch": 0.89, "learning_rate": 0.0004011911199025584, "loss": 0.7712, "step": 5770 }, { "epoch": 0.89, "learning_rate": 0.00040086796858125324, "loss": 0.8137, "step": 5780 }, { "epoch": 0.89, "learning_rate": 0.00040054442031786907, "loss": 0.7523, "step": 5790 }, { "epoch": 0.89, "learning_rate": 0.0004002204759636796, "loss": 0.7927, "step": 5800 }, { "epoch": 0.89, "eval_bleu": 0.12228391385106198, "eval_loss": 0.7471486926078796, "eval_meteor": 0.19151605381653838, "eval_rouge1": 0.3620636405755351, "eval_rouge2": 0.19861702778304668, "eval_rougeL": 0.30207238821110516, "eval_rougeLsum": 0.3019676001231871, "eval_runtime": 1147.7894, "eval_samples_per_second": 1.27, "eval_steps_per_second": 0.212, "step": 5800 }, { "epoch": 0.9, "learning_rate": 0.00039989613637100055, "loss": 0.7512, "step": 5810 }, { "epoch": 0.9, "learning_rate": 0.00039957140239318744, "loss": 0.7385, "step": 5820 }, { "epoch": 0.9, "learning_rate": 0.00039924627488463374, "loss": 0.8469, "step": 5830 }, { "epoch": 0.9, "learning_rate": 0.00039892075470076795, "loss": 0.72, "step": 5840 }, { "epoch": 0.9, "learning_rate": 0.0003985948426980521, "loss": 0.797, "step": 5850 }, { "epoch": 0.9, "learning_rate": 0.0003982685397339789, "loss": 0.7778, "step": 5860 }, { "epoch": 0.9, "learning_rate": 0.00039794184666706964, "loss": 0.7285, "step": 5870 }, { "epoch": 0.91, "learning_rate": 0.0003976147643568721, "loss": 0.7779, "step": 5880 }, { "epoch": 0.91, "learning_rate": 0.00039728729366395824, "loss": 0.7841, "step": 5890 }, { "epoch": 0.91, "learning_rate": 0.00039695943544992173, "loss": 0.8402, "step": 5900 }, { "epoch": 0.91, "eval_bleu": 0.11653429141819567, "eval_loss": 0.7500145435333252, "eval_meteor": 0.18259693460048834, "eval_rouge1": 0.35693896022311644, "eval_rouge2": 0.19481212920926488, "eval_rougeL": 0.2974158389948098, "eval_rougeLsum": 0.2972789083405306, "eval_runtime": 1127.326, "eval_samples_per_second": 1.293, "eval_steps_per_second": 0.216, "step": 5900 }, { "epoch": 0.91, "learning_rate": 0.000396631190577376, "loss": 0.8434, "step": 5910 }, { "epoch": 0.91, "learning_rate": 0.0003963025599099516, "loss": 0.8225, "step": 5920 }, { "epoch": 0.91, "learning_rate": 0.0003959735443122943, "loss": 0.7828, "step": 5930 }, { "epoch": 0.92, "learning_rate": 0.00039564414465006244, "loss": 0.7987, "step": 5940 }, { "epoch": 0.92, "learning_rate": 0.00039531436178992513, "loss": 0.7857, "step": 5950 }, { "epoch": 0.92, "learning_rate": 0.0003949841965995595, "loss": 0.7992, "step": 5960 }, { "epoch": 0.92, "learning_rate": 0.0003946536499476487, "loss": 0.788, "step": 5970 }, { "epoch": 0.92, "learning_rate": 0.00039432272270387955, "loss": 0.769, "step": 5980 }, { "epoch": 0.92, "learning_rate": 0.00039399141573893997, "loss": 0.8262, "step": 5990 }, { "epoch": 0.92, "learning_rate": 0.00039365972992451735, "loss": 0.7963, "step": 6000 }, { "epoch": 0.92, "eval_bleu": 0.12318553450668913, "eval_loss": 0.7483591437339783, "eval_meteor": 0.1913410867293855, "eval_rouge1": 0.3654868855873549, "eval_rouge2": 0.20050423202844517, "eval_rougeL": 0.30447787352072553, "eval_rougeLsum": 0.30429425068099136, "eval_runtime": 1124.7101, "eval_samples_per_second": 1.296, "eval_steps_per_second": 0.216, "step": 6000 }, { "epoch": 0.93, "learning_rate": 0.0003933276661332955, "loss": 0.7798, "step": 6010 }, { "epoch": 0.93, "learning_rate": 0.00039299522523895296, "loss": 0.8611, "step": 6020 }, { "epoch": 0.93, "learning_rate": 0.0003926624081161604, "loss": 0.8131, "step": 6030 }, { "epoch": 0.93, "learning_rate": 0.0003923292156405781, "loss": 0.7202, "step": 6040 }, { "epoch": 0.93, "learning_rate": 0.0003919956486888544, "loss": 0.7797, "step": 6050 }, { "epoch": 0.93, "learning_rate": 0.0003916617081386225, "loss": 0.7561, "step": 6060 }, { "epoch": 0.94, "learning_rate": 0.0003913273948684987, "loss": 0.71, "step": 6070 }, { "epoch": 0.94, "learning_rate": 0.00039099270975808, "loss": 0.7608, "step": 6080 }, { "epoch": 0.94, "learning_rate": 0.0003906576536879416, "loss": 0.8031, "step": 6090 }, { "epoch": 0.94, "learning_rate": 0.00039032222753963483, "loss": 0.8034, "step": 6100 }, { "epoch": 0.94, "eval_bleu": 0.11720116971140243, "eval_loss": 0.7478321194648743, "eval_meteor": 0.1819934943700329, "eval_rouge1": 0.35727692353329465, "eval_rouge2": 0.19816847975598717, "eval_rougeL": 0.29895230165351805, "eval_rougeLsum": 0.29907502151518195, "eval_runtime": 1070.5188, "eval_samples_per_second": 1.362, "eval_steps_per_second": 0.227, "step": 6100 }, { "epoch": 0.94, "learning_rate": 0.00038998643219568467, "loss": 0.7886, "step": 6110 }, { "epoch": 0.94, "learning_rate": 0.00038965026853958755, "loss": 0.7854, "step": 6120 }, { "epoch": 0.95, "learning_rate": 0.00038931373745580884, "loss": 0.7956, "step": 6130 }, { "epoch": 0.95, "learning_rate": 0.0003889768398297807, "loss": 0.7957, "step": 6140 }, { "epoch": 0.95, "learning_rate": 0.00038863957654789957, "loss": 0.7563, "step": 6150 }, { "epoch": 0.95, "learning_rate": 0.0003883019484975241, "loss": 0.7558, "step": 6160 }, { "epoch": 0.95, "learning_rate": 0.00038796395656697267, "loss": 0.797, "step": 6170 }, { "epoch": 0.95, "learning_rate": 0.00038762560164552095, "loss": 0.7864, "step": 6180 }, { "epoch": 0.95, "learning_rate": 0.0003872868846233997, "loss": 0.7932, "step": 6190 }, { "epoch": 0.96, "learning_rate": 0.0003869478063917924, "loss": 0.7569, "step": 6200 }, { "epoch": 0.96, "eval_bleu": 0.12021270355030027, "eval_loss": 0.7468777298927307, "eval_meteor": 0.18865042542151908, "eval_rouge1": 0.36340810125388445, "eval_rouge2": 0.20321855929268942, "eval_rougeL": 0.3042800348780287, "eval_rougeLsum": 0.3041477067076571, "eval_runtime": 1060.9151, "eval_samples_per_second": 1.374, "eval_steps_per_second": 0.229, "step": 6200 }, { "epoch": 0.96, "learning_rate": 0.0003866083678428328, "loss": 0.7893, "step": 6210 }, { "epoch": 0.96, "learning_rate": 0.0003862685698696028, "loss": 0.7841, "step": 6220 }, { "epoch": 0.96, "learning_rate": 0.0003859284133661299, "loss": 0.7696, "step": 6230 }, { "epoch": 0.96, "learning_rate": 0.0003855878992273849, "loss": 0.7964, "step": 6240 }, { "epoch": 0.96, "learning_rate": 0.0003852470283492796, "loss": 0.7731, "step": 6250 }, { "epoch": 0.97, "learning_rate": 0.0003849058016286644, "loss": 0.7562, "step": 6260 }, { "epoch": 0.97, "learning_rate": 0.00038456421996332593, "loss": 0.7756, "step": 6270 }, { "epoch": 0.97, "learning_rate": 0.00038422228425198456, "loss": 0.7327, "step": 6280 }, { "epoch": 0.97, "learning_rate": 0.00038387999539429255, "loss": 0.7831, "step": 6290 }, { "epoch": 0.97, "learning_rate": 0.0003835373542908308, "loss": 0.7728, "step": 6300 }, { "epoch": 0.97, "eval_bleu": 0.13571042313085763, "eval_loss": 0.7441371083259583, "eval_meteor": 0.20429787752537404, "eval_rouge1": 0.36910530156190763, "eval_rouge2": 0.20076171169403834, "eval_rougeL": 0.3028160316079058, "eval_rougeLsum": 0.3028887886618019, "eval_runtime": 1240.9106, "eval_samples_per_second": 1.175, "eval_steps_per_second": 0.196, "step": 6300 }, { "epoch": 0.97, "learning_rate": 0.0003831943618431074, "loss": 0.8109, "step": 6310 }, { "epoch": 0.97, "learning_rate": 0.0003828510189535548, "loss": 0.7687, "step": 6320 }, { "epoch": 0.98, "learning_rate": 0.00038250732652552713, "loss": 0.7796, "step": 6330 }, { "epoch": 0.98, "learning_rate": 0.00038216328546329854, "loss": 0.7713, "step": 6340 }, { "epoch": 0.98, "learning_rate": 0.00038181889667206036, "loss": 0.8039, "step": 6350 }, { "epoch": 0.98, "learning_rate": 0.0003814741610579189, "loss": 0.7761, "step": 6360 }, { "epoch": 0.98, "learning_rate": 0.00038112907952789264, "loss": 0.7536, "step": 6370 }, { "epoch": 0.98, "learning_rate": 0.0003807836529899106, "loss": 0.7478, "step": 6380 }, { "epoch": 0.99, "learning_rate": 0.00038043788235280927, "loss": 0.7639, "step": 6390 }, { "epoch": 0.99, "learning_rate": 0.0003800917685263307, "loss": 0.7624, "step": 6400 }, { "epoch": 0.99, "eval_bleu": 0.13360665201533722, "eval_loss": 0.743972659111023, "eval_meteor": 0.19919552001100382, "eval_rouge1": 0.3659102912435709, "eval_rouge2": 0.19789641111146775, "eval_rougeL": 0.3016512273674288, "eval_rougeLsum": 0.3015437367125981, "eval_runtime": 1272.3138, "eval_samples_per_second": 1.146, "eval_steps_per_second": 0.191, "step": 6400 }, { "epoch": 0.99, "learning_rate": 0.0003797453124211196, "loss": 0.7455, "step": 6410 }, { "epoch": 0.99, "learning_rate": 0.0003793985149487215, "loss": 0.7817, "step": 6420 }, { "epoch": 0.99, "learning_rate": 0.00037905137702158, "loss": 0.7936, "step": 6430 }, { "epoch": 0.99, "learning_rate": 0.00037870389955303426, "loss": 0.7884, "step": 6440 }, { "epoch": 0.99, "learning_rate": 0.00037835608345731717, "loss": 0.7477, "step": 6450 }, { "epoch": 1.0, "learning_rate": 0.0003780079296495523, "loss": 0.7333, "step": 6460 }, { "epoch": 1.0, "learning_rate": 0.0003776594390457517, "loss": 0.7712, "step": 6470 }, { "epoch": 1.0, "learning_rate": 0.00037731061256281395, "loss": 0.8028, "step": 6480 }, { "epoch": 1.0, "learning_rate": 0.0003769614511185209, "loss": 0.836, "step": 6490 }, { "epoch": 1.0, "learning_rate": 0.00037661195563153577, "loss": 0.7102, "step": 6500 }, { "epoch": 1.0, "eval_bleu": 0.13683765315402233, "eval_loss": 0.7432180643081665, "eval_meteor": 0.20768677295384516, "eval_rouge1": 0.3735959078332925, "eval_rouge2": 0.20419374346780084, "eval_rougeL": 0.30712118478863093, "eval_rougeLsum": 0.30707788341285575, "eval_runtime": 1270.0874, "eval_samples_per_second": 1.148, "eval_steps_per_second": 0.191, "step": 6500 }, { "epoch": 1.0, "learning_rate": 0.000376262127021401, "loss": 0.7216, "step": 6510 }, { "epoch": 1.01, "learning_rate": 0.00037591196620853515, "loss": 0.7167, "step": 6520 }, { "epoch": 1.01, "learning_rate": 0.0003755614741142309, "loss": 0.7174, "step": 6530 }, { "epoch": 1.01, "learning_rate": 0.0003752106516606526, "loss": 0.7206, "step": 6540 }, { "epoch": 1.01, "learning_rate": 0.0003748594997708339, "loss": 0.7271, "step": 6550 }, { "epoch": 1.01, "learning_rate": 0.00037450801936867497, "loss": 0.7166, "step": 6560 }, { "epoch": 1.01, "learning_rate": 0.0003741562113789405, "loss": 0.6894, "step": 6570 }, { "epoch": 1.01, "learning_rate": 0.000373804076727257, "loss": 0.7399, "step": 6580 }, { "epoch": 1.02, "learning_rate": 0.0003734516163401105, "loss": 0.7341, "step": 6590 }, { "epoch": 1.02, "learning_rate": 0.00037309883114484407, "loss": 0.6979, "step": 6600 }, { "epoch": 1.02, "eval_bleu": 0.11959061229637678, "eval_loss": 0.7399081587791443, "eval_meteor": 0.18578293382867828, "eval_rouge1": 0.35998311194622934, "eval_rouge2": 0.2008245839204704, "eval_rougeL": 0.30212159744533995, "eval_rougeLsum": 0.30208186381396035, "eval_runtime": 1117.6606, "eval_samples_per_second": 1.305, "eval_steps_per_second": 0.217, "step": 6600 }, { "epoch": 1.02, "learning_rate": 0.00037274572206965516, "loss": 0.695, "step": 6610 }, { "epoch": 1.02, "learning_rate": 0.0003723922900435937, "loss": 0.7373, "step": 6620 }, { "epoch": 1.02, "learning_rate": 0.00037203853599655914, "loss": 0.7002, "step": 6630 }, { "epoch": 1.02, "learning_rate": 0.0003716844608592981, "loss": 0.7566, "step": 6640 }, { "epoch": 1.03, "learning_rate": 0.00037133006556340216, "loss": 0.7111, "step": 6650 }, { "epoch": 1.03, "learning_rate": 0.0003709753510413052, "loss": 0.745, "step": 6660 }, { "epoch": 1.03, "learning_rate": 0.00037062031822628094, "loss": 0.6765, "step": 6670 }, { "epoch": 1.03, "learning_rate": 0.0003702649680524408, "loss": 0.7619, "step": 6680 }, { "epoch": 1.03, "learning_rate": 0.00036990930145473083, "loss": 0.6821, "step": 6690 }, { "epoch": 1.03, "learning_rate": 0.0003695533193689298, "loss": 0.7149, "step": 6700 }, { "epoch": 1.03, "eval_bleu": 0.12635236721625973, "eval_loss": 0.739450216293335, "eval_meteor": 0.19553725175716402, "eval_rouge1": 0.365661266915583, "eval_rouge2": 0.20178360342416046, "eval_rougeL": 0.3026326239453274, "eval_rougeLsum": 0.30259399461990677, "eval_runtime": 1155.7274, "eval_samples_per_second": 1.262, "eval_steps_per_second": 0.21, "step": 6700 }, { "epoch": 1.03, "learning_rate": 0.00036919702273164657, "loss": 0.7377, "step": 6710 }, { "epoch": 1.04, "learning_rate": 0.00036884041248031753, "loss": 0.7444, "step": 6720 }, { "epoch": 1.04, "learning_rate": 0.0003684834895532042, "loss": 0.7286, "step": 6730 }, { "epoch": 1.04, "learning_rate": 0.0003681262548893909, "loss": 0.7449, "step": 6740 }, { "epoch": 1.04, "learning_rate": 0.0003677687094287819, "loss": 0.6915, "step": 6750 }, { "epoch": 1.04, "learning_rate": 0.0003674108541120995, "loss": 0.7031, "step": 6760 }, { "epoch": 1.04, "learning_rate": 0.00036705268988088103, "loss": 0.7142, "step": 6770 }, { "epoch": 1.05, "learning_rate": 0.00036669421767747656, "loss": 0.7086, "step": 6780 }, { "epoch": 1.05, "learning_rate": 0.0003663354384450467, "loss": 0.7481, "step": 6790 }, { "epoch": 1.05, "learning_rate": 0.00036597635312755954, "loss": 0.6722, "step": 6800 }, { "epoch": 1.05, "eval_bleu": 0.11414956111209436, "eval_loss": 0.7422959804534912, "eval_meteor": 0.18158717314624995, "eval_rouge1": 0.35938872641078123, "eval_rouge2": 0.20238339161949742, "eval_rougeL": 0.3036388613445834, "eval_rougeLsum": 0.30368724785496093, "eval_runtime": 1059.4123, "eval_samples_per_second": 1.376, "eval_steps_per_second": 0.229, "step": 6800 }, { "epoch": 1.05, "learning_rate": 0.0003656169626697889, "loss": 0.6965, "step": 6810 }, { "epoch": 1.05, "learning_rate": 0.000365257268017311, "loss": 0.7239, "step": 6820 }, { "epoch": 1.05, "learning_rate": 0.0003648972701165027, "loss": 0.7147, "step": 6830 }, { "epoch": 1.05, "learning_rate": 0.00036453696991453865, "loss": 0.6588, "step": 6840 }, { "epoch": 1.06, "learning_rate": 0.0003641763683593889, "loss": 0.6452, "step": 6850 }, { "epoch": 1.06, "learning_rate": 0.0003638154663998163, "loss": 0.7578, "step": 6860 }, { "epoch": 1.06, "learning_rate": 0.00036345426498537417, "loss": 0.6807, "step": 6870 }, { "epoch": 1.06, "learning_rate": 0.00036309276506640365, "loss": 0.7922, "step": 6880 }, { "epoch": 1.06, "learning_rate": 0.00036273096759403123, "loss": 0.6959, "step": 6890 }, { "epoch": 1.06, "learning_rate": 0.0003623688735201664, "loss": 0.7319, "step": 6900 }, { "epoch": 1.06, "eval_bleu": 0.13311801380318097, "eval_loss": 0.739512026309967, "eval_meteor": 0.20301601278830728, "eval_rouge1": 0.3697671294885042, "eval_rouge2": 0.2040980609334162, "eval_rougeL": 0.30591621894549137, "eval_rougeLsum": 0.30575850009870087, "eval_runtime": 1168.6213, "eval_samples_per_second": 1.248, "eval_steps_per_second": 0.208, "step": 6900 }, { "epoch": 1.07, "learning_rate": 0.00036200648379749903, "loss": 0.7169, "step": 6910 }, { "epoch": 1.07, "learning_rate": 0.00036164379937949666, "loss": 0.7035, "step": 6920 }, { "epoch": 1.07, "learning_rate": 0.00036128082122040224, "loss": 0.6929, "step": 6930 }, { "epoch": 1.07, "learning_rate": 0.0003609175502752319, "loss": 0.7502, "step": 6940 }, { "epoch": 1.07, "learning_rate": 0.0003605539874997716, "loss": 0.729, "step": 6950 }, { "epoch": 1.07, "learning_rate": 0.00036019013385057557, "loss": 0.6907, "step": 6960 }, { "epoch": 1.07, "learning_rate": 0.00035982599028496306, "loss": 0.6899, "step": 6970 }, { "epoch": 1.08, "learning_rate": 0.00035946155776101613, "loss": 0.7194, "step": 6980 }, { "epoch": 1.08, "learning_rate": 0.0003590968372375774, "loss": 0.6805, "step": 6990 }, { "epoch": 1.08, "learning_rate": 0.00035873182967424667, "loss": 0.6992, "step": 7000 }, { "epoch": 1.08, "eval_bleu": 0.11900569290924122, "eval_loss": 0.7383832335472107, "eval_meteor": 0.18448493712506533, "eval_rouge1": 0.35725738552943453, "eval_rouge2": 0.19755022515559825, "eval_rougeL": 0.2990729972948073, "eval_rougeLsum": 0.2989527020663407, "eval_runtime": 1155.0098, "eval_samples_per_second": 1.262, "eval_steps_per_second": 0.21, "step": 7000 }, { "epoch": 1.08, "learning_rate": 0.00035836653603137954, "loss": 0.6816, "step": 7010 }, { "epoch": 1.08, "learning_rate": 0.000358000957270084, "loss": 0.707, "step": 7020 }, { "epoch": 1.08, "learning_rate": 0.0003576350943522182, "loss": 0.6911, "step": 7030 }, { "epoch": 1.09, "learning_rate": 0.000357268948240388, "loss": 0.6851, "step": 7040 }, { "epoch": 1.09, "learning_rate": 0.00035690251989794444, "loss": 0.742, "step": 7050 }, { "epoch": 1.09, "learning_rate": 0.0003565358102889809, "loss": 0.7222, "step": 7060 }, { "epoch": 1.09, "learning_rate": 0.00035616882037833083, "loss": 0.6707, "step": 7070 }, { "epoch": 1.09, "learning_rate": 0.00035580155113156545, "loss": 0.717, "step": 7080 }, { "epoch": 1.09, "learning_rate": 0.0003554340035149906, "loss": 0.6809, "step": 7090 }, { "epoch": 1.09, "learning_rate": 0.0003550661784956447, "loss": 0.699, "step": 7100 }, { "epoch": 1.09, "eval_bleu": 0.13124721878731666, "eval_loss": 0.7341772317886353, "eval_meteor": 0.20086043758202302, "eval_rouge1": 0.3665359469102716, "eval_rouge2": 0.20045162880972417, "eval_rougeL": 0.30219876970116155, "eval_rougeLsum": 0.30216060698118885, "eval_runtime": 1217.1151, "eval_samples_per_second": 1.198, "eval_steps_per_second": 0.2, "step": 7100 }, { "epoch": 1.1, "learning_rate": 0.00035469807704129595, "loss": 0.7358, "step": 7110 }, { "epoch": 1.1, "learning_rate": 0.00035432970012044005, "loss": 0.7044, "step": 7120 }, { "epoch": 1.1, "learning_rate": 0.00035396104870229705, "loss": 0.7466, "step": 7130 }, { "epoch": 1.1, "learning_rate": 0.0003535921237568097, "loss": 0.7178, "step": 7140 }, { "epoch": 1.1, "learning_rate": 0.00035322292625464014, "loss": 0.7379, "step": 7150 }, { "epoch": 1.1, "learning_rate": 0.0003528534571671677, "loss": 0.6904, "step": 7160 }, { "epoch": 1.11, "learning_rate": 0.00035248371746648624, "loss": 0.7317, "step": 7170 }, { "epoch": 1.11, "learning_rate": 0.0003521137081254016, "loss": 0.7052, "step": 7180 }, { "epoch": 1.11, "learning_rate": 0.00035174343011742915, "loss": 0.756, "step": 7190 }, { "epoch": 1.11, "learning_rate": 0.000351372884416791, "loss": 0.7159, "step": 7200 }, { "epoch": 1.11, "eval_bleu": 0.13246869374366876, "eval_loss": 0.7347835898399353, "eval_meteor": 0.20244538384279492, "eval_rouge1": 0.37153124241895075, "eval_rouge2": 0.20420649618044395, "eval_rougeL": 0.3075002644877919, "eval_rougeLsum": 0.3076795818578708, "eval_runtime": 1225.5167, "eval_samples_per_second": 1.19, "eval_steps_per_second": 0.198, "step": 7200 }, { "epoch": 1.11, "learning_rate": 0.00035100207199841374, "loss": 0.6935, "step": 7210 }, { "epoch": 1.11, "learning_rate": 0.0003506309938379255, "loss": 0.7689, "step": 7220 }, { "epoch": 1.11, "learning_rate": 0.00035025965091165385, "loss": 0.7423, "step": 7230 }, { "epoch": 1.12, "learning_rate": 0.0003498880441966228, "loss": 0.6649, "step": 7240 }, { "epoch": 1.12, "learning_rate": 0.0003495161746705503, "loss": 0.7144, "step": 7250 }, { "epoch": 1.12, "learning_rate": 0.0003491440433118462, "loss": 0.6854, "step": 7260 }, { "epoch": 1.12, "learning_rate": 0.00034877165109960863, "loss": 0.739, "step": 7270 }, { "epoch": 1.12, "learning_rate": 0.0003483989990136226, "loss": 0.6962, "step": 7280 }, { "epoch": 1.12, "learning_rate": 0.0003480260880343565, "loss": 0.7414, "step": 7290 }, { "epoch": 1.13, "learning_rate": 0.0003476529191429601, "loss": 0.7418, "step": 7300 }, { "epoch": 1.13, "eval_bleu": 0.12629642603617014, "eval_loss": 0.7300452589988708, "eval_meteor": 0.19607433639658048, "eval_rouge1": 0.36940732143709704, "eval_rouge2": 0.20324909716054756, "eval_rougeL": 0.30713128476276175, "eval_rougeLsum": 0.3072824822901492, "eval_runtime": 1148.1498, "eval_samples_per_second": 1.27, "eval_steps_per_second": 0.212, "step": 7300 }, { "epoch": 1.13, "learning_rate": 0.0003472794933212616, "loss": 0.7181, "step": 7310 }, { "epoch": 1.13, "learning_rate": 0.0003469058115517652, "loss": 0.7546, "step": 7320 }, { "epoch": 1.13, "learning_rate": 0.00034653187481764873, "loss": 0.736, "step": 7330 }, { "epoch": 1.13, "learning_rate": 0.00034615768410276065, "loss": 0.744, "step": 7340 }, { "epoch": 1.13, "learning_rate": 0.0003457832403916177, "loss": 0.7563, "step": 7350 }, { "epoch": 1.13, "learning_rate": 0.00034540854466940215, "loss": 0.6738, "step": 7360 }, { "epoch": 1.14, "learning_rate": 0.0003450335979219595, "loss": 0.7102, "step": 7370 }, { "epoch": 1.14, "learning_rate": 0.0003446584011357957, "loss": 0.7419, "step": 7380 }, { "epoch": 1.14, "learning_rate": 0.0003442829552980746, "loss": 0.7521, "step": 7390 }, { "epoch": 1.14, "learning_rate": 0.000343907261396615, "loss": 0.6713, "step": 7400 }, { "epoch": 1.14, "eval_bleu": 0.12662344706646492, "eval_loss": 0.7302644848823547, "eval_meteor": 0.19680753344212212, "eval_rouge1": 0.370672782958349, "eval_rouge2": 0.2051175852415017, "eval_rougeL": 0.30716206152120107, "eval_rougeLsum": 0.30713312439209517, "eval_runtime": 1127.3029, "eval_samples_per_second": 1.293, "eval_steps_per_second": 0.216, "step": 7400 }, { "epoch": 1.14, "learning_rate": 0.00034353132041988876, "loss": 0.7622, "step": 7410 }, { "epoch": 1.14, "learning_rate": 0.00034315513335701764, "loss": 0.6964, "step": 7420 }, { "epoch": 1.15, "learning_rate": 0.0003427787011977709, "loss": 0.7532, "step": 7430 }, { "epoch": 1.15, "learning_rate": 0.00034240202493256264, "loss": 0.6931, "step": 7440 }, { "epoch": 1.15, "learning_rate": 0.0003420251055524491, "loss": 0.7325, "step": 7450 }, { "epoch": 1.15, "learning_rate": 0.0003416479440491264, "loss": 0.6884, "step": 7460 }, { "epoch": 1.15, "learning_rate": 0.00034127054141492756, "loss": 0.7377, "step": 7470 }, { "epoch": 1.15, "learning_rate": 0.0003408928986428202, "loss": 0.7091, "step": 7480 }, { "epoch": 1.15, "learning_rate": 0.0003405150167264034, "loss": 0.7379, "step": 7490 }, { "epoch": 1.16, "learning_rate": 0.0003401368966599057, "loss": 0.704, "step": 7500 }, { "epoch": 1.16, "eval_bleu": 0.1258064652767695, "eval_loss": 0.7285297513008118, "eval_meteor": 0.19691865175723794, "eval_rouge1": 0.36778390805748723, "eval_rouge2": 0.20311221027278986, "eval_rougeL": 0.3054394126025268, "eval_rougeLsum": 0.3054229081295555, "eval_runtime": 1127.1968, "eval_samples_per_second": 1.293, "eval_steps_per_second": 0.216, "step": 7500 }, { "epoch": 1.16, "learning_rate": 0.00033975853943818223, "loss": 0.7004, "step": 7510 }, { "epoch": 1.16, "learning_rate": 0.00033937994605671214, "loss": 0.7505, "step": 7520 }, { "epoch": 1.16, "learning_rate": 0.0003390011175115956, "loss": 0.7212, "step": 7530 }, { "epoch": 1.16, "learning_rate": 0.0003386220547995519, "loss": 0.7163, "step": 7540 }, { "epoch": 1.16, "learning_rate": 0.00033824275891791624, "loss": 0.7683, "step": 7550 }, { "epoch": 1.17, "learning_rate": 0.00033786323086463734, "loss": 0.6846, "step": 7560 }, { "epoch": 1.17, "learning_rate": 0.0003374834716382748, "loss": 0.7276, "step": 7570 }, { "epoch": 1.17, "learning_rate": 0.00033710348223799634, "loss": 0.7359, "step": 7580 }, { "epoch": 1.17, "learning_rate": 0.00033672326366357544, "loss": 0.7125, "step": 7590 }, { "epoch": 1.17, "learning_rate": 0.00033634281691538847, "loss": 0.7155, "step": 7600 }, { "epoch": 1.17, "eval_bleu": 0.12742902408441137, "eval_loss": 0.7300394773483276, "eval_meteor": 0.19972564219243125, "eval_rouge1": 0.36676091746300093, "eval_rouge2": 0.20014907900892553, "eval_rougeL": 0.302702557140773, "eval_rougeLsum": 0.30281701017902063, "eval_runtime": 1175.8896, "eval_samples_per_second": 1.24, "eval_steps_per_second": 0.207, "step": 7600 }, { "epoch": 1.17, "learning_rate": 0.00033596214299441213, "loss": 0.6816, "step": 7610 }, { "epoch": 1.17, "learning_rate": 0.0003355812429022208, "loss": 0.656, "step": 7620 }, { "epoch": 1.18, "learning_rate": 0.000335200117640984, "loss": 0.7309, "step": 7630 }, { "epoch": 1.18, "learning_rate": 0.00033481876821346367, "loss": 0.7137, "step": 7640 }, { "epoch": 1.18, "learning_rate": 0.0003344371956230114, "loss": 0.7229, "step": 7650 }, { "epoch": 1.18, "learning_rate": 0.0003340554008735663, "loss": 0.7312, "step": 7660 }, { "epoch": 1.18, "learning_rate": 0.0003336733849696516, "loss": 0.6824, "step": 7670 }, { "epoch": 1.18, "learning_rate": 0.00033329114891637244, "loss": 0.7157, "step": 7680 }, { "epoch": 1.19, "learning_rate": 0.00033290869371941343, "loss": 0.7378, "step": 7690 }, { "epoch": 1.19, "learning_rate": 0.0003325260203850357, "loss": 0.7284, "step": 7700 }, { "epoch": 1.19, "eval_bleu": 0.1279752717045045, "eval_loss": 0.7327857613563538, "eval_meteor": 0.1978933380981099, "eval_rouge1": 0.3660675412873057, "eval_rouge2": 0.2007737061001636, "eval_rougeL": 0.3036859494669802, "eval_rougeLsum": 0.3035956514223758, "eval_runtime": 1203.7754, "eval_samples_per_second": 1.211, "eval_steps_per_second": 0.202, "step": 7700 }, { "epoch": 1.19, "learning_rate": 0.000332143129920074, "loss": 0.7286, "step": 7710 }, { "epoch": 1.19, "learning_rate": 0.00033176002333193475, "loss": 0.7142, "step": 7720 }, { "epoch": 1.19, "learning_rate": 0.0003313767016285929, "loss": 0.7226, "step": 7730 }, { "epoch": 1.19, "learning_rate": 0.00033099316581858924, "loss": 0.6984, "step": 7740 }, { "epoch": 1.19, "learning_rate": 0.000330609416911028, "loss": 0.7486, "step": 7750 }, { "epoch": 1.2, "learning_rate": 0.0003302254559155741, "loss": 0.6951, "step": 7760 }, { "epoch": 1.2, "learning_rate": 0.0003298412838424503, "loss": 0.6734, "step": 7770 }, { "epoch": 1.2, "learning_rate": 0.00032945690170243494, "loss": 0.7295, "step": 7780 }, { "epoch": 1.2, "learning_rate": 0.000329072310506859, "loss": 0.686, "step": 7790 }, { "epoch": 1.2, "learning_rate": 0.0003286875112676035, "loss": 0.6969, "step": 7800 }, { "epoch": 1.2, "eval_bleu": 0.12667328365779612, "eval_loss": 0.730004072189331, "eval_meteor": 0.19859259473423635, "eval_rouge1": 0.36608475297722565, "eval_rouge2": 0.20181171212849097, "eval_rougeL": 0.3048158401257285, "eval_rougeLsum": 0.30473273024993836, "eval_runtime": 1121.2619, "eval_samples_per_second": 1.3, "eval_steps_per_second": 0.217, "step": 7800 }, { "epoch": 1.2, "learning_rate": 0.0003283025049970967, "loss": 0.7053, "step": 7810 }, { "epoch": 1.21, "learning_rate": 0.0003279172927083117, "loss": 0.7112, "step": 7820 }, { "epoch": 1.21, "learning_rate": 0.00032753187541476357, "loss": 0.7294, "step": 7830 }, { "epoch": 1.21, "learning_rate": 0.0003271462541305069, "loss": 0.7703, "step": 7840 }, { "epoch": 1.21, "learning_rate": 0.00032676042987013287, "loss": 0.7219, "step": 7850 }, { "epoch": 1.21, "learning_rate": 0.0003263744036487667, "loss": 0.7527, "step": 7860 }, { "epoch": 1.21, "learning_rate": 0.000325988176482065, "loss": 0.7469, "step": 7870 }, { "epoch": 1.21, "learning_rate": 0.00032560174938621326, "loss": 0.7235, "step": 7880 }, { "epoch": 1.22, "learning_rate": 0.00032521512337792247, "loss": 0.7821, "step": 7890 }, { "epoch": 1.22, "learning_rate": 0.0003248282994744276, "loss": 0.7279, "step": 7900 }, { "epoch": 1.22, "eval_bleu": 0.13693040140777551, "eval_loss": 0.728911280632019, "eval_meteor": 0.20888724574067633, "eval_rouge1": 0.3680729526895363, "eval_rouge2": 0.2008207536043628, "eval_rougeL": 0.3017495392967735, "eval_rougeLsum": 0.30193972403551483, "eval_runtime": 1263.8659, "eval_samples_per_second": 1.154, "eval_steps_per_second": 0.192, "step": 7900 } ], "max_steps": 19458, "num_train_epochs": 3, "total_flos": 2.3091196043722752e+17, "trial_name": null, "trial_params": null }