{ "best_metric": null, "best_model_checkpoint": null, "epoch": 10.0, "global_step": 5656890, "is_hyper_param_search": false, "is_local_process_zero": true, "is_world_process_zero": true, "log_history": [ { "epoch": 0.02, "learning_rate": 4.991162104972874e-05, "loss": 5.8101, "step": 10000 }, { "epoch": 0.04, "learning_rate": 4.982326861579419e-05, "loss": 4.147, "step": 20000 }, { "epoch": 0.05, "learning_rate": 4.973490734308074e-05, "loss": 3.446, "step": 30000 }, { "epoch": 0.07, "learning_rate": 4.964655490914619e-05, "loss": 3.0968, "step": 40000 }, { "epoch": 0.09, "learning_rate": 4.9558193636432744e-05, "loss": 2.8867, "step": 50000 }, { "epoch": 0.11, "learning_rate": 4.94698500412771e-05, "loss": 2.7374, "step": 60000 }, { "epoch": 0.12, "learning_rate": 4.938149760734255e-05, "loss": 2.6213, "step": 70000 }, { "epoch": 0.14, "learning_rate": 4.92931363346291e-05, "loss": 2.5359, "step": 80000 }, { "epoch": 0.16, "learning_rate": 4.920478390069455e-05, "loss": 2.4665, "step": 90000 }, { "epoch": 0.18, "learning_rate": 4.91164226279811e-05, "loss": 2.4067, "step": 100000 }, { "epoch": 0.19, "learning_rate": 4.902807019404656e-05, "loss": 2.3518, "step": 110000 }, { "epoch": 0.21, "learning_rate": 4.893971776011201e-05, "loss": 2.311, "step": 120000 }, { "epoch": 0.23, "learning_rate": 4.885134764861965e-05, "loss": 2.2728, "step": 130000 }, { "epoch": 0.25, "learning_rate": 4.87629952146851e-05, "loss": 2.2382, "step": 140000 }, { "epoch": 0.27, "learning_rate": 4.867464278075056e-05, "loss": 2.2096, "step": 150000 }, { "epoch": 0.28, "learning_rate": 4.858629034681601e-05, "loss": 2.178, "step": 160000 }, { "epoch": 0.3, "learning_rate": 4.849793791288146e-05, "loss": 2.1548, "step": 170000 }, { "epoch": 0.32, "learning_rate": 4.840958547894692e-05, "loss": 2.1315, "step": 180000 }, { "epoch": 0.34, "learning_rate": 4.8321233045012365e-05, "loss": 2.1091, "step": 190000 }, { "epoch": 0.35, "learning_rate": 4.823288061107782e-05, "loss": 2.0882, "step": 200000 }, { "epoch": 0.37, "learning_rate": 4.814452817714327e-05, "loss": 2.0706, "step": 210000 }, { "epoch": 0.39, "learning_rate": 4.805618458198763e-05, "loss": 2.0587, "step": 220000 }, { "epoch": 0.41, "learning_rate": 4.7967832148053085e-05, "loss": 2.0406, "step": 230000 }, { "epoch": 0.42, "learning_rate": 4.787947971411853e-05, "loss": 2.0289, "step": 240000 }, { "epoch": 0.44, "learning_rate": 4.779112728018399e-05, "loss": 2.013, "step": 250000 }, { "epoch": 0.46, "learning_rate": 4.770276600747054e-05, "loss": 1.9989, "step": 260000 }, { "epoch": 0.48, "learning_rate": 4.7614413573535995e-05, "loss": 1.9863, "step": 270000 }, { "epoch": 0.49, "learning_rate": 4.752606997838035e-05, "loss": 1.9783, "step": 280000 }, { "epoch": 0.51, "learning_rate": 4.7437717544445804e-05, "loss": 1.9657, "step": 290000 }, { "epoch": 0.53, "learning_rate": 4.734935627173235e-05, "loss": 1.9566, "step": 300000 }, { "epoch": 0.55, "learning_rate": 4.72610038377978e-05, "loss": 1.9453, "step": 310000 }, { "epoch": 0.57, "learning_rate": 4.7172660242642156e-05, "loss": 1.9367, "step": 320000 }, { "epoch": 0.58, "learning_rate": 4.708430780870761e-05, "loss": 1.9306, "step": 330000 }, { "epoch": 0.6, "learning_rate": 4.699595537477307e-05, "loss": 1.9191, "step": 340000 }, { "epoch": 0.62, "learning_rate": 4.690761177961742e-05, "loss": 1.9128, "step": 350000 }, { "epoch": 0.64, "learning_rate": 4.6819259345682876e-05, "loss": 1.9027, "step": 360000 }, { "epoch": 0.65, "learning_rate": 4.673090691174833e-05, "loss": 1.8961, "step": 370000 }, { "epoch": 0.67, "learning_rate": 4.664254563903488e-05, "loss": 1.8879, "step": 380000 }, { "epoch": 0.69, "learning_rate": 4.6554202043879235e-05, "loss": 1.8775, "step": 390000 }, { "epoch": 0.71, "learning_rate": 4.646584960994469e-05, "loss": 1.8793, "step": 400000 }, { "epoch": 0.72, "learning_rate": 4.6377506014789044e-05, "loss": 1.8719, "step": 410000 }, { "epoch": 0.74, "learning_rate": 4.6289171258412315e-05, "loss": 1.8633, "step": 420000 }, { "epoch": 0.76, "learning_rate": 4.6200818824477763e-05, "loss": 1.8573, "step": 430000 }, { "epoch": 0.78, "learning_rate": 4.611245755176431e-05, "loss": 1.8464, "step": 440000 }, { "epoch": 0.8, "learning_rate": 4.602410511782976e-05, "loss": 1.8467, "step": 450000 }, { "epoch": 0.81, "learning_rate": 4.593575268389522e-05, "loss": 1.8403, "step": 460000 }, { "epoch": 0.83, "learning_rate": 4.584740024996067e-05, "loss": 1.836, "step": 470000 }, { "epoch": 0.85, "learning_rate": 4.575906549358393e-05, "loss": 1.8332, "step": 480000 }, { "epoch": 0.87, "learning_rate": 4.567071305964939e-05, "loss": 1.8271, "step": 490000 }, { "epoch": 0.88, "learning_rate": 4.5582360625714835e-05, "loss": 1.8172, "step": 500000 }, { "epoch": 0.9, "learning_rate": 4.5494017030559195e-05, "loss": 1.8169, "step": 510000 }, { "epoch": 0.92, "learning_rate": 4.540566459662465e-05, "loss": 1.8168, "step": 520000 }, { "epoch": 0.94, "learning_rate": 4.53173121626901e-05, "loss": 1.8088, "step": 530000 }, { "epoch": 0.95, "learning_rate": 4.522896856753446e-05, "loss": 1.801, "step": 540000 }, { "epoch": 0.97, "learning_rate": 4.514062497237882e-05, "loss": 1.8026, "step": 550000 }, { "epoch": 0.99, "learning_rate": 4.5052272538444275e-05, "loss": 1.7986, "step": 560000 }, { "epoch": 1.0, "eval_bleu": 28.7509, "eval_gen_len": 66.1132, "eval_loss": 1.8565547466278076, "eval_runtime": 2948.752, "eval_samples_per_second": 5.821, "eval_steps_per_second": 0.364, "step": 565689 }, { "epoch": 1.01, "learning_rate": 4.496392894328863e-05, "loss": 1.7781, "step": 570000 }, { "epoch": 1.03, "learning_rate": 4.487557650935408e-05, "loss": 1.7622, "step": 580000 }, { "epoch": 1.04, "learning_rate": 4.478722407541954e-05, "loss": 1.7623, "step": 590000 }, { "epoch": 1.06, "learning_rate": 4.469887164148499e-05, "loss": 1.7613, "step": 600000 }, { "epoch": 1.08, "learning_rate": 4.461052804632935e-05, "loss": 1.7595, "step": 610000 }, { "epoch": 1.1, "learning_rate": 4.45221756123948e-05, "loss": 1.7563, "step": 620000 }, { "epoch": 1.11, "learning_rate": 4.443382317846025e-05, "loss": 1.7571, "step": 630000 }, { "epoch": 1.13, "learning_rate": 4.434547958330461e-05, "loss": 1.7521, "step": 640000 }, { "epoch": 1.15, "learning_rate": 4.4257127149370066e-05, "loss": 1.7513, "step": 650000 }, { "epoch": 1.17, "learning_rate": 4.4168774715435515e-05, "loss": 1.7471, "step": 660000 }, { "epoch": 1.18, "learning_rate": 4.4080422281500964e-05, "loss": 1.7532, "step": 670000 }, { "epoch": 1.2, "learning_rate": 4.399207868634533e-05, "loss": 1.7469, "step": 680000 }, { "epoch": 1.22, "learning_rate": 4.390372625241078e-05, "loss": 1.7447, "step": 690000 }, { "epoch": 1.24, "learning_rate": 4.381537381847623e-05, "loss": 1.7385, "step": 700000 }, { "epoch": 1.26, "learning_rate": 4.372702138454168e-05, "loss": 1.7426, "step": 710000 }, { "epoch": 1.27, "learning_rate": 4.363867778938604e-05, "loss": 1.7381, "step": 720000 }, { "epoch": 1.29, "learning_rate": 4.355032535545149e-05, "loss": 1.7338, "step": 730000 }, { "epoch": 1.31, "learning_rate": 4.346197292151695e-05, "loss": 1.7307, "step": 740000 }, { "epoch": 1.33, "learning_rate": 4.33736204875824e-05, "loss": 1.7319, "step": 750000 }, { "epoch": 1.34, "learning_rate": 4.328525921486895e-05, "loss": 1.7279, "step": 760000 }, { "epoch": 1.36, "learning_rate": 4.3196915619713307e-05, "loss": 1.729, "step": 770000 }, { "epoch": 1.38, "learning_rate": 4.310856318577876e-05, "loss": 1.7254, "step": 780000 }, { "epoch": 1.4, "learning_rate": 4.3020201913065306e-05, "loss": 1.7207, "step": 790000 }, { "epoch": 1.41, "learning_rate": 4.293184947913076e-05, "loss": 1.7183, "step": 800000 }, { "epoch": 1.43, "learning_rate": 4.284350588397512e-05, "loss": 1.7184, "step": 810000 }, { "epoch": 1.45, "learning_rate": 4.275515345004057e-05, "loss": 1.7139, "step": 820000 }, { "epoch": 1.47, "learning_rate": 4.266679217732712e-05, "loss": 1.7155, "step": 830000 }, { "epoch": 1.48, "learning_rate": 4.257843090461367e-05, "loss": 1.7122, "step": 840000 }, { "epoch": 1.5, "learning_rate": 4.2490087309458025e-05, "loss": 1.7111, "step": 850000 }, { "epoch": 1.52, "learning_rate": 4.2401717197965665e-05, "loss": 1.7106, "step": 860000 }, { "epoch": 1.54, "learning_rate": 4.2313373602810025e-05, "loss": 1.7071, "step": 870000 }, { "epoch": 1.56, "learning_rate": 4.222502116887548e-05, "loss": 1.7081, "step": 880000 }, { "epoch": 1.57, "learning_rate": 4.213666873494093e-05, "loss": 1.7053, "step": 890000 }, { "epoch": 1.59, "learning_rate": 4.2048316301006384e-05, "loss": 1.7023, "step": 900000 }, { "epoch": 1.61, "learning_rate": 4.195996386707184e-05, "loss": 1.7017, "step": 910000 }, { "epoch": 1.63, "learning_rate": 4.1871611433137295e-05, "loss": 1.6979, "step": 920000 }, { "epoch": 1.64, "learning_rate": 4.178325016042384e-05, "loss": 1.6953, "step": 930000 }, { "epoch": 1.66, "learning_rate": 4.169489772648929e-05, "loss": 1.693, "step": 940000 }, { "epoch": 1.68, "learning_rate": 4.160653645377584e-05, "loss": 1.6934, "step": 950000 }, { "epoch": 1.7, "learning_rate": 4.1518184019841294e-05, "loss": 1.6899, "step": 960000 }, { "epoch": 1.71, "learning_rate": 4.142983158590675e-05, "loss": 1.6913, "step": 970000 }, { "epoch": 1.73, "learning_rate": 4.1341470313193294e-05, "loss": 1.691, "step": 980000 }, { "epoch": 1.75, "learning_rate": 4.125311787925875e-05, "loss": 1.6888, "step": 990000 }, { "epoch": 1.77, "learning_rate": 4.11647742841031e-05, "loss": 1.6888, "step": 1000000 }, { "epoch": 1.79, "learning_rate": 4.107641301138965e-05, "loss": 1.6857, "step": 1010000 }, { "epoch": 1.8, "learning_rate": 4.098806057745511e-05, "loss": 1.6854, "step": 1020000 }, { "epoch": 1.82, "learning_rate": 4.089970814352056e-05, "loss": 1.679, "step": 1030000 }, { "epoch": 1.84, "learning_rate": 4.08113468708071e-05, "loss": 1.68, "step": 1040000 }, { "epoch": 1.86, "learning_rate": 4.072299443687256e-05, "loss": 1.681, "step": 1050000 }, { "epoch": 1.87, "learning_rate": 4.063464200293801e-05, "loss": 1.6783, "step": 1060000 }, { "epoch": 1.89, "learning_rate": 4.0546298407782365e-05, "loss": 1.6766, "step": 1070000 }, { "epoch": 1.91, "learning_rate": 4.0457937135068916e-05, "loss": 1.6762, "step": 1080000 }, { "epoch": 1.93, "learning_rate": 4.036957586235546e-05, "loss": 1.6753, "step": 1090000 }, { "epoch": 1.94, "learning_rate": 4.0281223428420916e-05, "loss": 1.671, "step": 1100000 }, { "epoch": 1.96, "learning_rate": 4.019287099448637e-05, "loss": 1.668, "step": 1110000 }, { "epoch": 1.98, "learning_rate": 4.0104527399330724e-05, "loss": 1.6676, "step": 1120000 }, { "epoch": 2.0, "learning_rate": 4.0016166126617275e-05, "loss": 1.6695, "step": 1130000 }, { "epoch": 2.0, "eval_bleu": 29.525, "eval_gen_len": 66.2651, "eval_loss": 1.7653018236160278, "eval_runtime": 3037.2133, "eval_samples_per_second": 5.652, "eval_steps_per_second": 0.353, "step": 1131378 }, { "epoch": 2.02, "learning_rate": 3.992781369268273e-05, "loss": 1.6357, "step": 1140000 }, { "epoch": 2.03, "learning_rate": 3.9839461258748186e-05, "loss": 1.6309, "step": 1150000 }, { "epoch": 2.05, "learning_rate": 3.975109998603473e-05, "loss": 1.6355, "step": 1160000 }, { "epoch": 2.07, "learning_rate": 3.966275639087909e-05, "loss": 1.6321, "step": 1170000 }, { "epoch": 2.09, "learning_rate": 3.9574395118165634e-05, "loss": 1.6344, "step": 1180000 }, { "epoch": 2.1, "learning_rate": 3.948604268423109e-05, "loss": 1.6312, "step": 1190000 }, { "epoch": 2.12, "learning_rate": 3.939768141151764e-05, "loss": 1.6324, "step": 1200000 }, { "epoch": 2.14, "learning_rate": 3.9309328977583096e-05, "loss": 1.6357, "step": 1210000 }, { "epoch": 2.16, "learning_rate": 3.922097654364854e-05, "loss": 1.6325, "step": 1220000 }, { "epoch": 2.17, "learning_rate": 3.913262410971399e-05, "loss": 1.6327, "step": 1230000 }, { "epoch": 2.19, "learning_rate": 3.9044262837000544e-05, "loss": 1.632, "step": 1240000 }, { "epoch": 2.21, "learning_rate": 3.8955910403066e-05, "loss": 1.6315, "step": 1250000 }, { "epoch": 2.23, "learning_rate": 3.8867557969131455e-05, "loss": 1.6322, "step": 1260000 }, { "epoch": 2.25, "learning_rate": 3.877921437397581e-05, "loss": 1.6314, "step": 1270000 }, { "epoch": 2.26, "learning_rate": 3.8690861940041264e-05, "loss": 1.6317, "step": 1280000 }, { "epoch": 2.28, "learning_rate": 3.860250066732781e-05, "loss": 1.6275, "step": 1290000 }, { "epoch": 2.3, "learning_rate": 3.851415707217217e-05, "loss": 1.6296, "step": 1300000 }, { "epoch": 2.32, "learning_rate": 3.842579579945871e-05, "loss": 1.6305, "step": 1310000 }, { "epoch": 2.33, "learning_rate": 3.833743452674526e-05, "loss": 1.6266, "step": 1320000 }, { "epoch": 2.35, "learning_rate": 3.824908209281072e-05, "loss": 1.6273, "step": 1330000 }, { "epoch": 2.37, "learning_rate": 3.8160729658876174e-05, "loss": 1.6283, "step": 1340000 }, { "epoch": 2.39, "learning_rate": 3.807237722494162e-05, "loss": 1.629, "step": 1350000 }, { "epoch": 2.4, "learning_rate": 3.798403362978598e-05, "loss": 1.6269, "step": 1360000 }, { "epoch": 2.42, "learning_rate": 3.7895672357072526e-05, "loss": 1.6252, "step": 1370000 }, { "epoch": 2.44, "learning_rate": 3.7807328761916886e-05, "loss": 1.6221, "step": 1380000 }, { "epoch": 2.46, "learning_rate": 3.7718976327982335e-05, "loss": 1.6229, "step": 1390000 }, { "epoch": 2.47, "learning_rate": 3.7630615055268886e-05, "loss": 1.6226, "step": 1400000 }, { "epoch": 2.49, "learning_rate": 3.754226262133434e-05, "loss": 1.6224, "step": 1410000 }, { "epoch": 2.51, "learning_rate": 3.74539101873998e-05, "loss": 1.6207, "step": 1420000 }, { "epoch": 2.53, "learning_rate": 3.7365557753465245e-05, "loss": 1.6204, "step": 1430000 }, { "epoch": 2.55, "learning_rate": 3.72772053195307e-05, "loss": 1.6183, "step": 1440000 }, { "epoch": 2.56, "learning_rate": 3.7188861724375054e-05, "loss": 1.6235, "step": 1450000 }, { "epoch": 2.58, "learning_rate": 3.710050929044051e-05, "loss": 1.6207, "step": 1460000 }, { "epoch": 2.6, "learning_rate": 3.7012156856505965e-05, "loss": 1.618, "step": 1470000 }, { "epoch": 2.62, "learning_rate": 3.692381326135032e-05, "loss": 1.6162, "step": 1480000 }, { "epoch": 2.63, "learning_rate": 3.683546082741577e-05, "loss": 1.6194, "step": 1490000 }, { "epoch": 2.65, "learning_rate": 3.674711723226013e-05, "loss": 1.6154, "step": 1500000 }, { "epoch": 2.67, "learning_rate": 3.6658755959546684e-05, "loss": 1.6156, "step": 1510000 }, { "epoch": 2.69, "learning_rate": 3.6570403525612126e-05, "loss": 1.6144, "step": 1520000 }, { "epoch": 2.7, "learning_rate": 3.648205993045649e-05, "loss": 1.6125, "step": 1530000 }, { "epoch": 2.72, "learning_rate": 3.639369865774304e-05, "loss": 1.6134, "step": 1540000 }, { "epoch": 2.74, "learning_rate": 3.630534622380849e-05, "loss": 1.6102, "step": 1550000 }, { "epoch": 2.76, "learning_rate": 3.6217002628652845e-05, "loss": 1.6065, "step": 1560000 }, { "epoch": 2.78, "learning_rate": 3.6128641355939396e-05, "loss": 1.6073, "step": 1570000 }, { "epoch": 2.79, "learning_rate": 3.604028008322594e-05, "loss": 1.6071, "step": 1580000 }, { "epoch": 2.81, "learning_rate": 3.59519364880703e-05, "loss": 1.6046, "step": 1590000 }, { "epoch": 2.83, "learning_rate": 3.586359289291466e-05, "loss": 1.607, "step": 1600000 }, { "epoch": 2.85, "learning_rate": 3.5775231620201204e-05, "loss": 1.6069, "step": 1610000 }, { "epoch": 2.86, "learning_rate": 3.5686888025045564e-05, "loss": 1.6058, "step": 1620000 }, { "epoch": 2.88, "learning_rate": 3.5598526752332115e-05, "loss": 1.6031, "step": 1630000 }, { "epoch": 2.9, "learning_rate": 3.551018315717647e-05, "loss": 1.6047, "step": 1640000 }, { "epoch": 2.92, "learning_rate": 3.542182188446302e-05, "loss": 1.6042, "step": 1650000 }, { "epoch": 2.93, "learning_rate": 3.5333469450528475e-05, "loss": 1.6025, "step": 1660000 }, { "epoch": 2.95, "learning_rate": 3.524511701659392e-05, "loss": 1.6036, "step": 1670000 }, { "epoch": 2.97, "learning_rate": 3.515677342143828e-05, "loss": 1.6011, "step": 1680000 }, { "epoch": 2.99, "learning_rate": 3.506842098750374e-05, "loss": 1.6038, "step": 1690000 }, { "epoch": 3.0, "eval_bleu": 29.8841, "eval_gen_len": 66.1849, "eval_loss": 1.7081401348114014, "eval_runtime": 2996.6763, "eval_samples_per_second": 5.728, "eval_steps_per_second": 0.358, "step": 1697067 }, { "epoch": 3.01, "learning_rate": 3.498005971479029e-05, "loss": 1.5881, "step": 1700000 }, { "epoch": 3.02, "learning_rate": 3.489170728085573e-05, "loss": 1.5596, "step": 1710000 }, { "epoch": 3.04, "learning_rate": 3.48033636857001e-05, "loss": 1.5643, "step": 1720000 }, { "epoch": 3.06, "learning_rate": 3.471502009054445e-05, "loss": 1.563, "step": 1730000 }, { "epoch": 3.08, "learning_rate": 3.462667649538881e-05, "loss": 1.5633, "step": 1740000 }, { "epoch": 3.09, "learning_rate": 3.4538324061454266e-05, "loss": 1.5678, "step": 1750000 }, { "epoch": 3.11, "learning_rate": 3.444996278874081e-05, "loss": 1.5677, "step": 1760000 }, { "epoch": 3.13, "learning_rate": 3.436161919358517e-05, "loss": 1.5664, "step": 1770000 }, { "epoch": 3.15, "learning_rate": 3.427327559842953e-05, "loss": 1.57, "step": 1780000 }, { "epoch": 3.16, "learning_rate": 3.418492316449498e-05, "loss": 1.5695, "step": 1790000 }, { "epoch": 3.18, "learning_rate": 3.4096570730560434e-05, "loss": 1.5693, "step": 1800000 }, { "epoch": 3.2, "learning_rate": 3.4008227135404794e-05, "loss": 1.5669, "step": 1810000 }, { "epoch": 3.22, "learning_rate": 3.391987470147024e-05, "loss": 1.5677, "step": 1820000 }, { "epoch": 3.23, "learning_rate": 3.38315222675357e-05, "loss": 1.5689, "step": 1830000 }, { "epoch": 3.25, "learning_rate": 3.3743169833601154e-05, "loss": 1.5711, "step": 1840000 }, { "epoch": 3.27, "learning_rate": 3.36548173996666e-05, "loss": 1.5679, "step": 1850000 }, { "epoch": 3.29, "learning_rate": 3.3566456126953147e-05, "loss": 1.5705, "step": 1860000 }, { "epoch": 3.31, "learning_rate": 3.3478112531797506e-05, "loss": 1.5689, "step": 1870000 }, { "epoch": 3.32, "learning_rate": 3.338976009786296e-05, "loss": 1.5657, "step": 1880000 }, { "epoch": 3.34, "learning_rate": 3.330139882514951e-05, "loss": 1.5658, "step": 1890000 }, { "epoch": 3.36, "learning_rate": 3.3213055229993866e-05, "loss": 1.5675, "step": 1900000 }, { "epoch": 3.38, "learning_rate": 3.312469395728042e-05, "loss": 1.5664, "step": 1910000 }, { "epoch": 3.39, "learning_rate": 3.303635036212478e-05, "loss": 1.5668, "step": 1920000 }, { "epoch": 3.41, "learning_rate": 3.2947997928190225e-05, "loss": 1.5653, "step": 1930000 }, { "epoch": 3.43, "learning_rate": 3.285963665547677e-05, "loss": 1.5677, "step": 1940000 }, { "epoch": 3.45, "learning_rate": 3.2771293060321136e-05, "loss": 1.5633, "step": 1950000 }, { "epoch": 3.46, "learning_rate": 3.2682940626386585e-05, "loss": 1.5638, "step": 1960000 }, { "epoch": 3.48, "learning_rate": 3.2594579353673136e-05, "loss": 1.5635, "step": 1970000 }, { "epoch": 3.5, "learning_rate": 3.2506226919738584e-05, "loss": 1.566, "step": 1980000 }, { "epoch": 3.52, "learning_rate": 3.241787448580404e-05, "loss": 1.5641, "step": 1990000 }, { "epoch": 3.54, "learning_rate": 3.232950437431168e-05, "loss": 1.5634, "step": 2000000 }, { "epoch": 3.55, "learning_rate": 3.224116077915604e-05, "loss": 1.5658, "step": 2010000 }, { "epoch": 3.57, "learning_rate": 3.215280834522149e-05, "loss": 1.5644, "step": 2020000 }, { "epoch": 3.59, "learning_rate": 3.2064455911286943e-05, "loss": 1.5625, "step": 2030000 }, { "epoch": 3.61, "learning_rate": 3.19761034773524e-05, "loss": 1.562, "step": 2040000 }, { "epoch": 3.62, "learning_rate": 3.1887751043417854e-05, "loss": 1.5634, "step": 2050000 }, { "epoch": 3.64, "learning_rate": 3.179940744826221e-05, "loss": 1.5595, "step": 2060000 }, { "epoch": 3.66, "learning_rate": 3.171104617554876e-05, "loss": 1.5594, "step": 2070000 }, { "epoch": 3.68, "learning_rate": 3.1622693741614214e-05, "loss": 1.5609, "step": 2080000 }, { "epoch": 3.69, "learning_rate": 3.153434130767966e-05, "loss": 1.5606, "step": 2090000 }, { "epoch": 3.71, "learning_rate": 3.144598003496621e-05, "loss": 1.562, "step": 2100000 }, { "epoch": 3.73, "learning_rate": 3.135762760103166e-05, "loss": 1.5601, "step": 2110000 }, { "epoch": 3.75, "learning_rate": 3.126927516709712e-05, "loss": 1.5574, "step": 2120000 }, { "epoch": 3.77, "learning_rate": 3.118092273316257e-05, "loss": 1.557, "step": 2130000 }, { "epoch": 3.78, "learning_rate": 3.109257029922802e-05, "loss": 1.558, "step": 2140000 }, { "epoch": 3.8, "learning_rate": 3.100420902651457e-05, "loss": 1.5589, "step": 2150000 }, { "epoch": 3.82, "learning_rate": 3.0915865431358926e-05, "loss": 1.5563, "step": 2160000 }, { "epoch": 3.84, "learning_rate": 3.082751299742438e-05, "loss": 1.557, "step": 2170000 }, { "epoch": 3.85, "learning_rate": 3.073915172471093e-05, "loss": 1.5517, "step": 2180000 }, { "epoch": 3.87, "learning_rate": 3.0650808129555285e-05, "loss": 1.5569, "step": 2190000 }, { "epoch": 3.89, "learning_rate": 3.0562438018062925e-05, "loss": 1.5561, "step": 2200000 }, { "epoch": 3.91, "learning_rate": 3.047409442290729e-05, "loss": 1.5536, "step": 2210000 }, { "epoch": 3.92, "learning_rate": 3.0385733150193836e-05, "loss": 1.5567, "step": 2220000 }, { "epoch": 3.94, "learning_rate": 3.0297371877480386e-05, "loss": 1.5514, "step": 2230000 }, { "epoch": 3.96, "learning_rate": 3.0209028282324743e-05, "loss": 1.5542, "step": 2240000 }, { "epoch": 3.98, "learning_rate": 3.0120675848390195e-05, "loss": 1.5515, "step": 2250000 }, { "epoch": 4.0, "learning_rate": 3.0032323414455647e-05, "loss": 1.5515, "step": 2260000 }, { "epoch": 4.0, "eval_bleu": 30.588, "eval_gen_len": 65.9093, "eval_loss": 1.6601390838623047, "eval_runtime": 3100.6175, "eval_samples_per_second": 5.536, "eval_steps_per_second": 0.346, "step": 2262756 }, { "epoch": 4.01, "learning_rate": 2.9943970980521102e-05, "loss": 1.5183, "step": 2270000 }, { "epoch": 4.03, "learning_rate": 2.9855609707807647e-05, "loss": 1.5123, "step": 2280000 }, { "epoch": 4.05, "learning_rate": 2.9767257273873102e-05, "loss": 1.5146, "step": 2290000 }, { "epoch": 4.07, "learning_rate": 2.9678904839938554e-05, "loss": 1.5144, "step": 2300000 }, { "epoch": 4.08, "learning_rate": 2.9590552406004006e-05, "loss": 1.5173, "step": 2310000 }, { "epoch": 4.1, "learning_rate": 2.9502191133290557e-05, "loss": 1.5188, "step": 2320000 }, { "epoch": 4.12, "learning_rate": 2.9413838699356006e-05, "loss": 1.5195, "step": 2330000 }, { "epoch": 4.14, "learning_rate": 2.932549510420037e-05, "loss": 1.5181, "step": 2340000 }, { "epoch": 4.15, "learning_rate": 2.923714267026582e-05, "loss": 1.5199, "step": 2350000 }, { "epoch": 4.17, "learning_rate": 2.9148799075110177e-05, "loss": 1.5206, "step": 2360000 }, { "epoch": 4.19, "learning_rate": 2.9060446641175633e-05, "loss": 1.5207, "step": 2370000 }, { "epoch": 4.21, "learning_rate": 2.8972094207241085e-05, "loss": 1.5209, "step": 2380000 }, { "epoch": 4.22, "learning_rate": 2.8883741773306534e-05, "loss": 1.5192, "step": 2390000 }, { "epoch": 4.24, "learning_rate": 2.8795380500593084e-05, "loss": 1.5231, "step": 2400000 }, { "epoch": 4.26, "learning_rate": 2.870703690543744e-05, "loss": 1.5201, "step": 2410000 }, { "epoch": 4.28, "learning_rate": 2.8618684471502893e-05, "loss": 1.5204, "step": 2420000 }, { "epoch": 4.3, "learning_rate": 2.853033203756835e-05, "loss": 1.5224, "step": 2430000 }, { "epoch": 4.31, "learning_rate": 2.84419796036338e-05, "loss": 1.5221, "step": 2440000 }, { "epoch": 4.33, "learning_rate": 2.8353627169699252e-05, "loss": 1.52, "step": 2450000 }, { "epoch": 4.35, "learning_rate": 2.8265274735764708e-05, "loss": 1.5212, "step": 2460000 }, { "epoch": 4.37, "learning_rate": 2.8176931140609064e-05, "loss": 1.5241, "step": 2470000 }, { "epoch": 4.38, "learning_rate": 2.8088578706674516e-05, "loss": 1.5224, "step": 2480000 }, { "epoch": 4.4, "learning_rate": 2.8000235111518873e-05, "loss": 1.5185, "step": 2490000 }, { "epoch": 4.42, "learning_rate": 2.791188267758433e-05, "loss": 1.5184, "step": 2500000 }, { "epoch": 4.44, "learning_rate": 2.7823539082428685e-05, "loss": 1.5199, "step": 2510000 }, { "epoch": 4.45, "learning_rate": 2.7735186648494137e-05, "loss": 1.5198, "step": 2520000 }, { "epoch": 4.47, "learning_rate": 2.7646834214559592e-05, "loss": 1.5195, "step": 2530000 }, { "epoch": 4.49, "learning_rate": 2.7558481780625044e-05, "loss": 1.522, "step": 2540000 }, { "epoch": 4.51, "learning_rate": 2.74701293466905e-05, "loss": 1.5223, "step": 2550000 }, { "epoch": 4.53, "learning_rate": 2.7381776912755952e-05, "loss": 1.5183, "step": 2560000 }, { "epoch": 4.54, "learning_rate": 2.7293433317600308e-05, "loss": 1.5225, "step": 2570000 }, { "epoch": 4.56, "learning_rate": 2.7205080883665764e-05, "loss": 1.5198, "step": 2580000 }, { "epoch": 4.58, "learning_rate": 2.7116728449731216e-05, "loss": 1.5201, "step": 2590000 }, { "epoch": 4.6, "learning_rate": 2.7028384854575572e-05, "loss": 1.5195, "step": 2600000 }, { "epoch": 4.61, "learning_rate": 2.6940032420641028e-05, "loss": 1.52, "step": 2610000 }, { "epoch": 4.63, "learning_rate": 2.6851688825485384e-05, "loss": 1.5202, "step": 2620000 }, { "epoch": 4.65, "learning_rate": 2.6763336391550836e-05, "loss": 1.5192, "step": 2630000 }, { "epoch": 4.67, "learning_rate": 2.6674983957616288e-05, "loss": 1.5176, "step": 2640000 }, { "epoch": 4.68, "learning_rate": 2.6586631523681744e-05, "loss": 1.5153, "step": 2650000 }, { "epoch": 4.7, "learning_rate": 2.64982879285261e-05, "loss": 1.5156, "step": 2660000 }, { "epoch": 4.72, "learning_rate": 2.640992665581265e-05, "loss": 1.5191, "step": 2670000 }, { "epoch": 4.74, "learning_rate": 2.6321583060657008e-05, "loss": 1.5147, "step": 2680000 }, { "epoch": 4.76, "learning_rate": 2.6233239465501364e-05, "loss": 1.5169, "step": 2690000 }, { "epoch": 4.77, "learning_rate": 2.614489587034572e-05, "loss": 1.5174, "step": 2700000 }, { "epoch": 4.79, "learning_rate": 2.605653459763227e-05, "loss": 1.5164, "step": 2710000 }, { "epoch": 4.81, "learning_rate": 2.5968182163697724e-05, "loss": 1.5134, "step": 2720000 }, { "epoch": 4.83, "learning_rate": 2.587983856854208e-05, "loss": 1.5166, "step": 2730000 }, { "epoch": 4.84, "learning_rate": 2.5791486134607535e-05, "loss": 1.5162, "step": 2740000 }, { "epoch": 4.86, "learning_rate": 2.5703142539451892e-05, "loss": 1.5122, "step": 2750000 }, { "epoch": 4.88, "learning_rate": 2.5614781266738443e-05, "loss": 1.5134, "step": 2760000 }, { "epoch": 4.9, "learning_rate": 2.55264376715828e-05, "loss": 1.5135, "step": 2770000 }, { "epoch": 4.91, "learning_rate": 2.543808523764825e-05, "loss": 1.511, "step": 2780000 }, { "epoch": 4.93, "learning_rate": 2.5349741642492608e-05, "loss": 1.5089, "step": 2790000 }, { "epoch": 4.95, "learning_rate": 2.5261389208558063e-05, "loss": 1.5149, "step": 2800000 }, { "epoch": 4.97, "learning_rate": 2.517304561340242e-05, "loss": 1.5121, "step": 2810000 }, { "epoch": 4.99, "learning_rate": 2.5084693179467872e-05, "loss": 1.5115, "step": 2820000 }, { "epoch": 5.0, "eval_bleu": 30.9726, "eval_gen_len": 66.2171, "eval_loss": 1.6359007358551025, "eval_runtime": 3064.0636, "eval_samples_per_second": 5.602, "eval_steps_per_second": 0.35, "step": 2828445 }, { "epoch": 5.0, "learning_rate": 2.4996340745533324e-05, "loss": 1.5045, "step": 2830000 }, { "epoch": 5.02, "learning_rate": 2.4907988311598776e-05, "loss": 1.4705, "step": 2840000 }, { "epoch": 5.04, "learning_rate": 2.4819635877664228e-05, "loss": 1.4722, "step": 2850000 }, { "epoch": 5.06, "learning_rate": 2.4731292282508588e-05, "loss": 1.4756, "step": 2860000 }, { "epoch": 5.07, "learning_rate": 2.464293984857404e-05, "loss": 1.473, "step": 2870000 }, { "epoch": 5.09, "learning_rate": 2.45545962534184e-05, "loss": 1.4754, "step": 2880000 }, { "epoch": 5.11, "learning_rate": 2.4466252658262756e-05, "loss": 1.4775, "step": 2890000 }, { "epoch": 5.13, "learning_rate": 2.4377909063107116e-05, "loss": 1.4779, "step": 2900000 }, { "epoch": 5.14, "learning_rate": 2.4289556629172568e-05, "loss": 1.4802, "step": 2910000 }, { "epoch": 5.16, "learning_rate": 2.420120419523802e-05, "loss": 1.479, "step": 2920000 }, { "epoch": 5.18, "learning_rate": 2.4112851761303472e-05, "loss": 1.4802, "step": 2930000 }, { "epoch": 5.2, "learning_rate": 2.4024499327368928e-05, "loss": 1.4796, "step": 2940000 }, { "epoch": 5.21, "learning_rate": 2.3936155732213284e-05, "loss": 1.4798, "step": 2950000 }, { "epoch": 5.23, "learning_rate": 2.3847812137057644e-05, "loss": 1.4808, "step": 2960000 }, { "epoch": 5.25, "learning_rate": 2.3759459703123096e-05, "loss": 1.4811, "step": 2970000 }, { "epoch": 5.27, "learning_rate": 2.3671107269188548e-05, "loss": 1.4795, "step": 2980000 }, { "epoch": 5.29, "learning_rate": 2.3582763674032908e-05, "loss": 1.4812, "step": 2990000 }, { "epoch": 5.3, "learning_rate": 2.349441124009836e-05, "loss": 1.4828, "step": 3000000 }, { "epoch": 5.32, "learning_rate": 2.3406067644942717e-05, "loss": 1.4809, "step": 3010000 }, { "epoch": 5.34, "learning_rate": 2.3317715211008172e-05, "loss": 1.4843, "step": 3020000 }, { "epoch": 5.36, "learning_rate": 2.3229362777073624e-05, "loss": 1.4821, "step": 3030000 }, { "epoch": 5.37, "learning_rate": 2.3141010343139076e-05, "loss": 1.4808, "step": 3040000 }, { "epoch": 5.39, "learning_rate": 2.3052666747983432e-05, "loss": 1.4815, "step": 3050000 }, { "epoch": 5.41, "learning_rate": 2.2964323152827792e-05, "loss": 1.4796, "step": 3060000 }, { "epoch": 5.43, "learning_rate": 2.2875970718893244e-05, "loss": 1.4823, "step": 3070000 }, { "epoch": 5.44, "learning_rate": 2.2787618284958696e-05, "loss": 1.4802, "step": 3080000 }, { "epoch": 5.46, "learning_rate": 2.2699265851024152e-05, "loss": 1.4814, "step": 3090000 }, { "epoch": 5.48, "learning_rate": 2.26109134170896e-05, "loss": 1.482, "step": 3100000 }, { "epoch": 5.5, "learning_rate": 2.2522560983155056e-05, "loss": 1.4808, "step": 3110000 }, { "epoch": 5.52, "learning_rate": 2.2434208549220508e-05, "loss": 1.4823, "step": 3120000 }, { "epoch": 5.53, "learning_rate": 2.2345856115285963e-05, "loss": 1.4807, "step": 3130000 }, { "epoch": 5.55, "learning_rate": 2.225751252013032e-05, "loss": 1.4821, "step": 3140000 }, { "epoch": 5.57, "learning_rate": 2.216916892497468e-05, "loss": 1.4789, "step": 3150000 }, { "epoch": 5.59, "learning_rate": 2.2080825329819036e-05, "loss": 1.4809, "step": 3160000 }, { "epoch": 5.6, "learning_rate": 2.199245521832668e-05, "loss": 1.478, "step": 3170000 }, { "epoch": 5.62, "learning_rate": 2.1904111623171036e-05, "loss": 1.4817, "step": 3180000 }, { "epoch": 5.64, "learning_rate": 2.1815768028015396e-05, "loss": 1.4811, "step": 3190000 }, { "epoch": 5.66, "learning_rate": 2.1727415594080848e-05, "loss": 1.4791, "step": 3200000 }, { "epoch": 5.67, "learning_rate": 2.16390631601463e-05, "loss": 1.4789, "step": 3210000 }, { "epoch": 5.69, "learning_rate": 2.155071956499066e-05, "loss": 1.4778, "step": 3220000 }, { "epoch": 5.71, "learning_rate": 2.1462358292277207e-05, "loss": 1.4785, "step": 3230000 }, { "epoch": 5.73, "learning_rate": 2.137400585834266e-05, "loss": 1.4769, "step": 3240000 }, { "epoch": 5.75, "learning_rate": 2.128565342440811e-05, "loss": 1.4783, "step": 3250000 }, { "epoch": 5.76, "learning_rate": 2.119730982925247e-05, "loss": 1.4793, "step": 3260000 }, { "epoch": 5.78, "learning_rate": 2.1108966234096828e-05, "loss": 1.48, "step": 3270000 }, { "epoch": 5.8, "learning_rate": 2.102061380016228e-05, "loss": 1.4794, "step": 3280000 }, { "epoch": 5.82, "learning_rate": 2.0932261366227735e-05, "loss": 1.4788, "step": 3290000 }, { "epoch": 5.83, "learning_rate": 2.084391777107209e-05, "loss": 1.4791, "step": 3300000 }, { "epoch": 5.85, "learning_rate": 2.075557417591645e-05, "loss": 1.4779, "step": 3310000 }, { "epoch": 5.87, "learning_rate": 2.06672217419819e-05, "loss": 1.4751, "step": 3320000 }, { "epoch": 5.89, "learning_rate": 2.0578869308047356e-05, "loss": 1.4763, "step": 3330000 }, { "epoch": 5.9, "learning_rate": 2.0490508035333903e-05, "loss": 1.4751, "step": 3340000 }, { "epoch": 5.92, "learning_rate": 2.0402164440178263e-05, "loss": 1.4753, "step": 3350000 }, { "epoch": 5.94, "learning_rate": 2.0313812006243715e-05, "loss": 1.4737, "step": 3360000 }, { "epoch": 5.96, "learning_rate": 2.022546841108807e-05, "loss": 1.4755, "step": 3370000 }, { "epoch": 5.98, "learning_rate": 2.013710713837462e-05, "loss": 1.4756, "step": 3380000 }, { "epoch": 5.99, "learning_rate": 2.004876354321898e-05, "loss": 1.474, "step": 3390000 }, { "epoch": 6.0, "eval_bleu": 31.3244, "eval_gen_len": 66.1843, "eval_loss": 1.6097419261932373, "eval_runtime": 3209.3116, "eval_samples_per_second": 5.348, "eval_steps_per_second": 0.334, "step": 3394134 }, { "epoch": 6.01, "learning_rate": 1.9960402270505527e-05, "loss": 1.4509, "step": 3400000 }, { "epoch": 6.03, "learning_rate": 1.9872058675349883e-05, "loss": 1.4364, "step": 3410000 }, { "epoch": 6.05, "learning_rate": 1.978370624141534e-05, "loss": 1.4356, "step": 3420000 }, { "epoch": 6.06, "learning_rate": 1.969535380748079e-05, "loss": 1.4374, "step": 3430000 }, { "epoch": 6.08, "learning_rate": 1.9606992534767338e-05, "loss": 1.4387, "step": 3440000 }, { "epoch": 6.1, "learning_rate": 1.951864010083279e-05, "loss": 1.4395, "step": 3450000 }, { "epoch": 6.12, "learning_rate": 1.9430278828119338e-05, "loss": 1.4417, "step": 3460000 }, { "epoch": 6.13, "learning_rate": 1.9341935232963694e-05, "loss": 1.4411, "step": 3470000 }, { "epoch": 6.15, "learning_rate": 1.9253573960250245e-05, "loss": 1.4406, "step": 3480000 }, { "epoch": 6.17, "learning_rate": 1.9165221526315697e-05, "loss": 1.4435, "step": 3490000 }, { "epoch": 6.19, "learning_rate": 1.907686909238115e-05, "loss": 1.4451, "step": 3500000 }, { "epoch": 6.2, "learning_rate": 1.8988516658446605e-05, "loss": 1.4467, "step": 3510000 }, { "epoch": 6.22, "learning_rate": 1.8900164224512057e-05, "loss": 1.443, "step": 3520000 }, { "epoch": 6.24, "learning_rate": 1.8811811790577512e-05, "loss": 1.4465, "step": 3530000 }, { "epoch": 6.26, "learning_rate": 1.872345051786406e-05, "loss": 1.4401, "step": 3540000 }, { "epoch": 6.28, "learning_rate": 1.8635098083929508e-05, "loss": 1.4423, "step": 3550000 }, { "epoch": 6.29, "learning_rate": 1.8546745649994964e-05, "loss": 1.4443, "step": 3560000 }, { "epoch": 6.31, "learning_rate": 1.8458393216060416e-05, "loss": 1.4441, "step": 3570000 }, { "epoch": 6.33, "learning_rate": 1.8370040782125868e-05, "loss": 1.4448, "step": 3580000 }, { "epoch": 6.35, "learning_rate": 1.8281688348191323e-05, "loss": 1.4438, "step": 3590000 }, { "epoch": 6.36, "learning_rate": 1.819333591425677e-05, "loss": 1.4479, "step": 3600000 }, { "epoch": 6.38, "learning_rate": 1.8104983480322227e-05, "loss": 1.4444, "step": 3610000 }, { "epoch": 6.4, "learning_rate": 1.8016622207608775e-05, "loss": 1.4464, "step": 3620000 }, { "epoch": 6.42, "learning_rate": 1.7928278612453135e-05, "loss": 1.4453, "step": 3630000 }, { "epoch": 6.43, "learning_rate": 1.7839917339739682e-05, "loss": 1.4437, "step": 3640000 }, { "epoch": 6.45, "learning_rate": 1.775157374458404e-05, "loss": 1.4461, "step": 3650000 }, { "epoch": 6.47, "learning_rate": 1.766322131064949e-05, "loss": 1.4461, "step": 3660000 }, { "epoch": 6.49, "learning_rate": 1.7574868876714946e-05, "loss": 1.4452, "step": 3670000 }, { "epoch": 6.51, "learning_rate": 1.7486516442780398e-05, "loss": 1.4465, "step": 3680000 }, { "epoch": 6.52, "learning_rate": 1.7398164008845853e-05, "loss": 1.4484, "step": 3690000 }, { "epoch": 6.54, "learning_rate": 1.7309793897353493e-05, "loss": 1.4447, "step": 3700000 }, { "epoch": 6.56, "learning_rate": 1.7221450302197853e-05, "loss": 1.4449, "step": 3710000 }, { "epoch": 6.58, "learning_rate": 1.71330890294844e-05, "loss": 1.4437, "step": 3720000 }, { "epoch": 6.59, "learning_rate": 1.7044736595549853e-05, "loss": 1.4435, "step": 3730000 }, { "epoch": 6.61, "learning_rate": 1.695639300039421e-05, "loss": 1.4453, "step": 3740000 }, { "epoch": 6.63, "learning_rate": 1.6868031727680757e-05, "loss": 1.4469, "step": 3750000 }, { "epoch": 6.65, "learning_rate": 1.6779688132525113e-05, "loss": 1.4446, "step": 3760000 }, { "epoch": 6.66, "learning_rate": 1.6691326859811664e-05, "loss": 1.4432, "step": 3770000 }, { "epoch": 6.68, "learning_rate": 1.6602974425877116e-05, "loss": 1.4411, "step": 3780000 }, { "epoch": 6.7, "learning_rate": 1.6514621991942568e-05, "loss": 1.4424, "step": 3790000 }, { "epoch": 6.72, "learning_rate": 1.6426260719229116e-05, "loss": 1.4423, "step": 3800000 }, { "epoch": 6.74, "learning_rate": 1.633790828529457e-05, "loss": 1.4437, "step": 3810000 }, { "epoch": 6.75, "learning_rate": 1.6249555851360023e-05, "loss": 1.4468, "step": 3820000 }, { "epoch": 6.77, "learning_rate": 1.616119457864657e-05, "loss": 1.4434, "step": 3830000 }, { "epoch": 6.79, "learning_rate": 1.607285098349093e-05, "loss": 1.4394, "step": 3840000 }, { "epoch": 6.81, "learning_rate": 1.5984498549556383e-05, "loss": 1.4409, "step": 3850000 }, { "epoch": 6.82, "learning_rate": 1.5896146115621835e-05, "loss": 1.4404, "step": 3860000 }, { "epoch": 6.84, "learning_rate": 1.580779368168729e-05, "loss": 1.4402, "step": 3870000 }, { "epoch": 6.86, "learning_rate": 1.5719441247752742e-05, "loss": 1.4409, "step": 3880000 }, { "epoch": 6.88, "learning_rate": 1.56310976525971e-05, "loss": 1.4401, "step": 3890000 }, { "epoch": 6.89, "learning_rate": 1.5542736379883646e-05, "loss": 1.4392, "step": 3900000 }, { "epoch": 6.91, "learning_rate": 1.5454392784728006e-05, "loss": 1.4408, "step": 3910000 }, { "epoch": 6.93, "learning_rate": 1.5366031512014554e-05, "loss": 1.4408, "step": 3920000 }, { "epoch": 6.95, "learning_rate": 1.5277679078080006e-05, "loss": 1.4398, "step": 3930000 }, { "epoch": 6.96, "learning_rate": 1.5189326644145458e-05, "loss": 1.4415, "step": 3940000 }, { "epoch": 6.98, "learning_rate": 1.5100974210210911e-05, "loss": 1.4425, "step": 3950000 }, { "epoch": 7.0, "eval_bleu": 31.557, "eval_gen_len": 66.1481, "eval_loss": 1.5914360284805298, "eval_runtime": 3218.9621, "eval_samples_per_second": 5.332, "eval_steps_per_second": 0.333, "step": 3959823 }, { "epoch": 7.0, "learning_rate": 1.5012621776276365e-05, "loss": 1.4396, "step": 3960000 }, { "epoch": 7.02, "learning_rate": 1.4924269342341817e-05, "loss": 1.4025, "step": 3970000 }, { "epoch": 7.04, "learning_rate": 1.4835916908407271e-05, "loss": 1.4013, "step": 3980000 }, { "epoch": 7.05, "learning_rate": 1.4747564474472723e-05, "loss": 1.4035, "step": 3990000 }, { "epoch": 7.07, "learning_rate": 1.4659212040538175e-05, "loss": 1.4054, "step": 4000000 }, { "epoch": 7.09, "learning_rate": 1.4570850767824722e-05, "loss": 1.4067, "step": 4010000 }, { "epoch": 7.11, "learning_rate": 1.4482498333890176e-05, "loss": 1.4044, "step": 4020000 }, { "epoch": 7.12, "learning_rate": 1.439414589995563e-05, "loss": 1.4081, "step": 4030000 }, { "epoch": 7.14, "learning_rate": 1.4305802304799986e-05, "loss": 1.4049, "step": 4040000 }, { "epoch": 7.16, "learning_rate": 1.4217441032086537e-05, "loss": 1.4072, "step": 4050000 }, { "epoch": 7.18, "learning_rate": 1.4129079759373085e-05, "loss": 1.4085, "step": 4060000 }, { "epoch": 7.19, "learning_rate": 1.4040727325438539e-05, "loss": 1.4078, "step": 4070000 }, { "epoch": 7.21, "learning_rate": 1.3952383730282895e-05, "loss": 1.4066, "step": 4080000 }, { "epoch": 7.23, "learning_rate": 1.3864031296348349e-05, "loss": 1.4105, "step": 4090000 }, { "epoch": 7.25, "learning_rate": 1.3775678862413801e-05, "loss": 1.4116, "step": 4100000 }, { "epoch": 7.27, "learning_rate": 1.3687326428479253e-05, "loss": 1.4095, "step": 4110000 }, { "epoch": 7.28, "learning_rate": 1.3598973994544707e-05, "loss": 1.4104, "step": 4120000 }, { "epoch": 7.3, "learning_rate": 1.3510621560610159e-05, "loss": 1.4105, "step": 4130000 }, { "epoch": 7.32, "learning_rate": 1.3422269126675612e-05, "loss": 1.4104, "step": 4140000 }, { "epoch": 7.34, "learning_rate": 1.3333916692741066e-05, "loss": 1.4106, "step": 4150000 }, { "epoch": 7.35, "learning_rate": 1.3245564258806516e-05, "loss": 1.4114, "step": 4160000 }, { "epoch": 7.37, "learning_rate": 1.315721182487197e-05, "loss": 1.4108, "step": 4170000 }, { "epoch": 7.39, "learning_rate": 1.306886822971633e-05, "loss": 1.409, "step": 4180000 }, { "epoch": 7.41, "learning_rate": 1.298051579578178e-05, "loss": 1.4106, "step": 4190000 }, { "epoch": 7.42, "learning_rate": 1.2892163361847234e-05, "loss": 1.4118, "step": 4200000 }, { "epoch": 7.44, "learning_rate": 1.2803819766691594e-05, "loss": 1.4115, "step": 4210000 }, { "epoch": 7.46, "learning_rate": 1.2715467332757044e-05, "loss": 1.4135, "step": 4220000 }, { "epoch": 7.48, "learning_rate": 1.2627123737601404e-05, "loss": 1.4104, "step": 4230000 }, { "epoch": 7.5, "learning_rate": 1.2538771303666858e-05, "loss": 1.4124, "step": 4240000 }, { "epoch": 7.51, "learning_rate": 1.2450427708511214e-05, "loss": 1.4094, "step": 4250000 }, { "epoch": 7.53, "learning_rate": 1.2362075274576668e-05, "loss": 1.4058, "step": 4260000 }, { "epoch": 7.55, "learning_rate": 1.227372284064212e-05, "loss": 1.4108, "step": 4270000 }, { "epoch": 7.57, "learning_rate": 1.2185379245486478e-05, "loss": 1.411, "step": 4280000 }, { "epoch": 7.58, "learning_rate": 1.2097035650330837e-05, "loss": 1.4102, "step": 4290000 }, { "epoch": 7.6, "learning_rate": 1.200868321639629e-05, "loss": 1.4126, "step": 4300000 }, { "epoch": 7.62, "learning_rate": 1.1920330782461742e-05, "loss": 1.4104, "step": 4310000 }, { "epoch": 7.64, "learning_rate": 1.18319871873061e-05, "loss": 1.4127, "step": 4320000 }, { "epoch": 7.65, "learning_rate": 1.1743634753371553e-05, "loss": 1.4094, "step": 4330000 }, { "epoch": 7.67, "learning_rate": 1.165529115821591e-05, "loss": 1.4097, "step": 4340000 }, { "epoch": 7.69, "learning_rate": 1.1566938724281363e-05, "loss": 1.4095, "step": 4350000 }, { "epoch": 7.71, "learning_rate": 1.1478595129125721e-05, "loss": 1.4079, "step": 4360000 }, { "epoch": 7.73, "learning_rate": 1.1390242695191175e-05, "loss": 1.4127, "step": 4370000 }, { "epoch": 7.74, "learning_rate": 1.1301890261256627e-05, "loss": 1.4079, "step": 4380000 }, { "epoch": 7.76, "learning_rate": 1.1213546666100985e-05, "loss": 1.4065, "step": 4390000 }, { "epoch": 7.78, "learning_rate": 1.1125203070945343e-05, "loss": 1.4098, "step": 4400000 }, { "epoch": 7.8, "learning_rate": 1.1036850637010797e-05, "loss": 1.4123, "step": 4410000 }, { "epoch": 7.81, "learning_rate": 1.0948507041855153e-05, "loss": 1.409, "step": 4420000 }, { "epoch": 7.83, "learning_rate": 1.0860163446699512e-05, "loss": 1.4045, "step": 4430000 }, { "epoch": 7.85, "learning_rate": 1.0771811012764965e-05, "loss": 1.4102, "step": 4440000 }, { "epoch": 7.87, "learning_rate": 1.0683467417609323e-05, "loss": 1.4085, "step": 4450000 }, { "epoch": 7.88, "learning_rate": 1.0595114983674775e-05, "loss": 1.4038, "step": 4460000 }, { "epoch": 7.9, "learning_rate": 1.0506771388519134e-05, "loss": 1.4052, "step": 4470000 }, { "epoch": 7.92, "learning_rate": 1.0418427793363492e-05, "loss": 1.4094, "step": 4480000 }, { "epoch": 7.94, "learning_rate": 1.033008419820785e-05, "loss": 1.4071, "step": 4490000 }, { "epoch": 7.95, "learning_rate": 1.0241731764273304e-05, "loss": 1.4075, "step": 4500000 }, { "epoch": 7.97, "learning_rate": 1.0153379330338756e-05, "loss": 1.4047, "step": 4510000 }, { "epoch": 7.99, "learning_rate": 1.0065035735183114e-05, "loss": 1.4063, "step": 4520000 }, { "epoch": 8.0, "eval_bleu": 32.0886, "eval_gen_len": 65.8595, "eval_loss": 1.5665596723556519, "eval_runtime": 3002.5617, "eval_samples_per_second": 5.717, "eval_steps_per_second": 0.357, "step": 4525512 }, { "epoch": 8.01, "learning_rate": 9.976692140027472e-06, "loss": 1.3896, "step": 4530000 }, { "epoch": 8.03, "learning_rate": 9.88834854487183e-06, "loss": 1.3736, "step": 4540000 }, { "epoch": 8.04, "learning_rate": 9.799996110937282e-06, "loss": 1.3741, "step": 4550000 }, { "epoch": 8.06, "learning_rate": 9.71165251578164e-06, "loss": 1.3717, "step": 4560000 }, { "epoch": 8.08, "learning_rate": 9.623308920625997e-06, "loss": 1.3731, "step": 4570000 }, { "epoch": 8.1, "learning_rate": 9.534965325470355e-06, "loss": 1.375, "step": 4580000 }, { "epoch": 8.11, "learning_rate": 9.446612891535809e-06, "loss": 1.3764, "step": 4590000 }, { "epoch": 8.13, "learning_rate": 9.358269296380166e-06, "loss": 1.3752, "step": 4600000 }, { "epoch": 8.15, "learning_rate": 9.26991686244562e-06, "loss": 1.3768, "step": 4610000 }, { "epoch": 8.17, "learning_rate": 9.181573267289978e-06, "loss": 1.3767, "step": 4620000 }, { "epoch": 8.18, "learning_rate": 9.093229672134336e-06, "loss": 1.3769, "step": 4630000 }, { "epoch": 8.2, "learning_rate": 9.004886076978694e-06, "loss": 1.3772, "step": 4640000 }, { "epoch": 8.22, "learning_rate": 8.916533643044146e-06, "loss": 1.3766, "step": 4650000 }, { "epoch": 8.24, "learning_rate": 8.828190047888504e-06, "loss": 1.38, "step": 4660000 }, { "epoch": 8.26, "learning_rate": 8.739846452732862e-06, "loss": 1.3764, "step": 4670000 }, { "epoch": 8.27, "learning_rate": 8.65150285757722e-06, "loss": 1.3765, "step": 4680000 }, { "epoch": 8.29, "learning_rate": 8.563150423642673e-06, "loss": 1.3765, "step": 4690000 }, { "epoch": 8.31, "learning_rate": 8.47480682848703e-06, "loss": 1.3756, "step": 4700000 }, { "epoch": 8.33, "learning_rate": 8.386463233331389e-06, "loss": 1.3781, "step": 4710000 }, { "epoch": 8.34, "learning_rate": 8.298110799396843e-06, "loss": 1.3788, "step": 4720000 }, { "epoch": 8.36, "learning_rate": 8.209767204241201e-06, "loss": 1.3759, "step": 4730000 }, { "epoch": 8.38, "learning_rate": 8.121423609085559e-06, "loss": 1.3783, "step": 4740000 }, { "epoch": 8.4, "learning_rate": 8.033080013929916e-06, "loss": 1.3782, "step": 4750000 }, { "epoch": 8.41, "learning_rate": 7.944736418774274e-06, "loss": 1.3792, "step": 4760000 }, { "epoch": 8.43, "learning_rate": 7.856383984839727e-06, "loss": 1.3775, "step": 4770000 }, { "epoch": 8.45, "learning_rate": 7.768040389684086e-06, "loss": 1.3779, "step": 4780000 }, { "epoch": 8.47, "learning_rate": 7.679696794528444e-06, "loss": 1.3797, "step": 4790000 }, { "epoch": 8.49, "learning_rate": 7.591353199372801e-06, "loss": 1.3761, "step": 4800000 }, { "epoch": 8.5, "learning_rate": 7.503009604217158e-06, "loss": 1.3784, "step": 4810000 }, { "epoch": 8.52, "learning_rate": 7.414666009061516e-06, "loss": 1.3769, "step": 4820000 }, { "epoch": 8.54, "learning_rate": 7.32631357512697e-06, "loss": 1.3764, "step": 4830000 }, { "epoch": 8.56, "learning_rate": 7.237969979971328e-06, "loss": 1.3818, "step": 4840000 }, { "epoch": 8.57, "learning_rate": 7.149626384815686e-06, "loss": 1.3787, "step": 4850000 }, { "epoch": 8.59, "learning_rate": 7.061282789660044e-06, "loss": 1.3762, "step": 4860000 }, { "epoch": 8.61, "learning_rate": 6.972939194504401e-06, "loss": 1.3788, "step": 4870000 }, { "epoch": 8.63, "learning_rate": 6.884595599348759e-06, "loss": 1.3752, "step": 4880000 }, { "epoch": 8.64, "learning_rate": 6.796243165414212e-06, "loss": 1.3771, "step": 4890000 }, { "epoch": 8.66, "learning_rate": 6.70789957025857e-06, "loss": 1.3785, "step": 4900000 }, { "epoch": 8.68, "learning_rate": 6.619555975102928e-06, "loss": 1.3746, "step": 4910000 }, { "epoch": 8.7, "learning_rate": 6.531212379947286e-06, "loss": 1.3769, "step": 4920000 }, { "epoch": 8.72, "learning_rate": 6.4428599460127385e-06, "loss": 1.3781, "step": 4930000 }, { "epoch": 8.73, "learning_rate": 6.354516350857097e-06, "loss": 1.3756, "step": 4940000 }, { "epoch": 8.75, "learning_rate": 6.266172755701455e-06, "loss": 1.3761, "step": 4950000 }, { "epoch": 8.77, "learning_rate": 6.177829160545812e-06, "loss": 1.3762, "step": 4960000 }, { "epoch": 8.79, "learning_rate": 6.089476726611266e-06, "loss": 1.3754, "step": 4970000 }, { "epoch": 8.8, "learning_rate": 6.001133131455623e-06, "loss": 1.3728, "step": 4980000 }, { "epoch": 8.82, "learning_rate": 5.9127895362999815e-06, "loss": 1.3759, "step": 4990000 }, { "epoch": 8.84, "learning_rate": 5.82444594114434e-06, "loss": 1.374, "step": 5000000 }, { "epoch": 8.86, "learning_rate": 5.736102345988698e-06, "loss": 1.3737, "step": 5010000 }, { "epoch": 8.87, "learning_rate": 5.647758750833055e-06, "loss": 1.3749, "step": 5020000 }, { "epoch": 8.89, "learning_rate": 5.559406316898508e-06, "loss": 1.3757, "step": 5030000 }, { "epoch": 8.91, "learning_rate": 5.4710627217428655e-06, "loss": 1.376, "step": 5040000 }, { "epoch": 8.93, "learning_rate": 5.382719126587224e-06, "loss": 1.3753, "step": 5050000 }, { "epoch": 8.94, "learning_rate": 5.294375531431582e-06, "loss": 1.3763, "step": 5060000 }, { "epoch": 8.96, "learning_rate": 5.206031936275939e-06, "loss": 1.3731, "step": 5070000 }, { "epoch": 8.98, "learning_rate": 5.117679502341393e-06, "loss": 1.3711, "step": 5080000 }, { "epoch": 9.0, "learning_rate": 5.02933590718575e-06, "loss": 1.3724, "step": 5090000 }, { "epoch": 9.0, "eval_bleu": 32.3644, "eval_gen_len": 66.1648, "eval_loss": 1.5537199974060059, "eval_runtime": 3034.8877, "eval_samples_per_second": 5.656, "eval_steps_per_second": 0.354, "step": 5091201 }, { "epoch": 9.02, "learning_rate": 4.9409923120301085e-06, "loss": 1.3485, "step": 5100000 }, { "epoch": 9.03, "learning_rate": 4.852648716874467e-06, "loss": 1.3453, "step": 5110000 }, { "epoch": 9.05, "learning_rate": 4.764305121718825e-06, "loss": 1.3439, "step": 5120000 }, { "epoch": 9.07, "learning_rate": 4.675961526563182e-06, "loss": 1.3475, "step": 5130000 }, { "epoch": 9.09, "learning_rate": 4.5876179314075404e-06, "loss": 1.3443, "step": 5140000 }, { "epoch": 9.1, "learning_rate": 4.499265497472993e-06, "loss": 1.3456, "step": 5150000 }, { "epoch": 9.12, "learning_rate": 4.4109219023173515e-06, "loss": 1.3474, "step": 5160000 }, { "epoch": 9.14, "learning_rate": 4.32257830716171e-06, "loss": 1.3491, "step": 5170000 }, { "epoch": 9.16, "learning_rate": 4.234234712006067e-06, "loss": 1.3491, "step": 5180000 }, { "epoch": 9.17, "learning_rate": 4.14588227807152e-06, "loss": 1.3493, "step": 5190000 }, { "epoch": 9.19, "learning_rate": 4.057538682915878e-06, "loss": 1.3485, "step": 5200000 }, { "epoch": 9.21, "learning_rate": 3.9691950877602355e-06, "loss": 1.3506, "step": 5210000 }, { "epoch": 9.23, "learning_rate": 3.880842653825688e-06, "loss": 1.3498, "step": 5220000 }, { "epoch": 9.25, "learning_rate": 3.7924990586700466e-06, "loss": 1.3453, "step": 5230000 }, { "epoch": 9.26, "learning_rate": 3.7041554635144048e-06, "loss": 1.3475, "step": 5240000 }, { "epoch": 9.28, "learning_rate": 3.615811868358763e-06, "loss": 1.3469, "step": 5250000 }, { "epoch": 9.3, "learning_rate": 3.5274682732031203e-06, "loss": 1.3477, "step": 5260000 }, { "epoch": 9.32, "learning_rate": 3.4391246780474785e-06, "loss": 1.3443, "step": 5270000 }, { "epoch": 9.33, "learning_rate": 3.3507810828918367e-06, "loss": 1.3478, "step": 5280000 }, { "epoch": 9.35, "learning_rate": 3.2624374877361945e-06, "loss": 1.3473, "step": 5290000 }, { "epoch": 9.37, "learning_rate": 3.1740850538016474e-06, "loss": 1.349, "step": 5300000 }, { "epoch": 9.39, "learning_rate": 3.085741458646005e-06, "loss": 1.3485, "step": 5310000 }, { "epoch": 9.4, "learning_rate": 2.9973978634903633e-06, "loss": 1.349, "step": 5320000 }, { "epoch": 9.42, "learning_rate": 2.909054268334721e-06, "loss": 1.3478, "step": 5330000 }, { "epoch": 9.44, "learning_rate": 2.8207106731790793e-06, "loss": 1.3489, "step": 5340000 }, { "epoch": 9.46, "learning_rate": 2.732367078023437e-06, "loss": 1.3446, "step": 5350000 }, { "epoch": 9.48, "learning_rate": 2.644023482867795e-06, "loss": 1.3478, "step": 5360000 }, { "epoch": 9.49, "learning_rate": 2.5556798877121526e-06, "loss": 1.3491, "step": 5370000 }, { "epoch": 9.51, "learning_rate": 2.467336292556511e-06, "loss": 1.3467, "step": 5380000 }, { "epoch": 9.53, "learning_rate": 2.3789926974008686e-06, "loss": 1.3492, "step": 5390000 }, { "epoch": 9.55, "learning_rate": 2.290640263466322e-06, "loss": 1.3472, "step": 5400000 }, { "epoch": 9.56, "learning_rate": 2.2022966683106797e-06, "loss": 1.3439, "step": 5410000 }, { "epoch": 9.58, "learning_rate": 2.113953073155038e-06, "loss": 1.3474, "step": 5420000 }, { "epoch": 9.6, "learning_rate": 2.0256094779993956e-06, "loss": 1.3459, "step": 5430000 }, { "epoch": 9.62, "learning_rate": 1.9372658828437534e-06, "loss": 1.3485, "step": 5440000 }, { "epoch": 9.63, "learning_rate": 1.8489134489092063e-06, "loss": 1.3438, "step": 5450000 }, { "epoch": 9.65, "learning_rate": 1.7605698537535645e-06, "loss": 1.3449, "step": 5460000 }, { "epoch": 9.67, "learning_rate": 1.6722262585979223e-06, "loss": 1.3472, "step": 5470000 }, { "epoch": 9.69, "learning_rate": 1.5838826634422803e-06, "loss": 1.3476, "step": 5480000 }, { "epoch": 9.7, "learning_rate": 1.495539068286638e-06, "loss": 1.3462, "step": 5490000 }, { "epoch": 9.72, "learning_rate": 1.407195473130996e-06, "loss": 1.3479, "step": 5500000 }, { "epoch": 9.74, "learning_rate": 1.318851877975354e-06, "loss": 1.3441, "step": 5510000 }, { "epoch": 9.76, "learning_rate": 1.2304994440408069e-06, "loss": 1.3439, "step": 5520000 }, { "epoch": 9.78, "learning_rate": 1.1421558488851649e-06, "loss": 1.3459, "step": 5530000 }, { "epoch": 9.79, "learning_rate": 1.0538122537295229e-06, "loss": 1.3443, "step": 5540000 }, { "epoch": 9.81, "learning_rate": 9.654686585738808e-07, "loss": 1.3455, "step": 5550000 }, { "epoch": 9.83, "learning_rate": 8.771162246393337e-07, "loss": 1.3448, "step": 5560000 }, { "epoch": 9.85, "learning_rate": 7.887726294836917e-07, "loss": 1.3432, "step": 5570000 }, { "epoch": 9.86, "learning_rate": 7.004290343280496e-07, "loss": 1.3444, "step": 5580000 }, { "epoch": 9.88, "learning_rate": 6.120854391724075e-07, "loss": 1.3432, "step": 5590000 }, { "epoch": 9.9, "learning_rate": 5.237418440167655e-07, "loss": 1.3454, "step": 5600000 }, { "epoch": 9.92, "learning_rate": 4.353894100822184e-07, "loss": 1.3437, "step": 5610000 }, { "epoch": 9.93, "learning_rate": 3.4704581492657626e-07, "loss": 1.3447, "step": 5620000 }, { "epoch": 9.95, "learning_rate": 2.587022197709342e-07, "loss": 1.3441, "step": 5630000 }, { "epoch": 9.97, "learning_rate": 1.7035862461529215e-07, "loss": 1.3426, "step": 5640000 }, { "epoch": 9.99, "learning_rate": 8.201502945965009e-08, "loss": 1.3452, "step": 5650000 }, { "epoch": 10.0, "eval_bleu": 32.4724, "eval_gen_len": 66.1539, "eval_loss": 1.5473366975784302, "eval_runtime": 3064.956, "eval_samples_per_second": 5.6, "eval_steps_per_second": 0.35, "step": 5656890 }, { "epoch": 10.0, "step": 5656890, "total_flos": 1.8656478019360383e+19, "train_loss": 1.5652431253911159, "train_runtime": 1147827.3858, "train_samples_per_second": 78.854, "train_steps_per_second": 4.928 } ], "max_steps": 5656890, "num_train_epochs": 10, "total_flos": 1.8656478019360383e+19, "trial_name": null, "trial_params": null }