{ "best_metric": 0.15202689170837402, "best_model_checkpoint": "checkpoints_commonvoice/checkpoint-131848", "epoch": 40.0, "global_step": 659240, "is_hyper_param_search": false, "is_local_process_zero": true, "is_world_process_zero": true, "log_history": [ { "epoch": 0.03, "learning_rate": 1.9984831017535344e-05, "loss": 1.727, "step": 500 }, { "epoch": 0.06, "learning_rate": 1.996969237303562e-05, "loss": 0.9353, "step": 1000 }, { "epoch": 0.09, "learning_rate": 1.995452339057096e-05, "loss": 0.7454, "step": 1500 }, { "epoch": 0.12, "learning_rate": 1.9939354408106307e-05, "loss": 0.6372, "step": 2000 }, { "epoch": 0.15, "learning_rate": 1.992418542564165e-05, "loss": 0.5621, "step": 2500 }, { "epoch": 0.18, "learning_rate": 1.9909016443176995e-05, "loss": 0.5135, "step": 3000 }, { "epoch": 0.21, "learning_rate": 1.9893847460712338e-05, "loss": 0.4771, "step": 3500 }, { "epoch": 0.24, "learning_rate": 1.987867847824768e-05, "loss": 0.4487, "step": 4000 }, { "epoch": 0.27, "learning_rate": 1.9863509495783026e-05, "loss": 0.4204, "step": 4500 }, { "epoch": 0.3, "learning_rate": 1.984834051331837e-05, "loss": 0.4087, "step": 5000 }, { "epoch": 0.33, "learning_rate": 1.983317153085371e-05, "loss": 0.3839, "step": 5500 }, { "epoch": 0.36, "learning_rate": 1.9818032886353985e-05, "loss": 0.3703, "step": 6000 }, { "epoch": 0.39, "learning_rate": 1.980289424185426e-05, "loss": 0.3623, "step": 6500 }, { "epoch": 0.42, "learning_rate": 1.9787725259389602e-05, "loss": 0.3517, "step": 7000 }, { "epoch": 0.46, "learning_rate": 1.9772556276924944e-05, "loss": 0.3371, "step": 7500 }, { "epoch": 0.49, "learning_rate": 1.9757387294460287e-05, "loss": 0.3307, "step": 8000 }, { "epoch": 0.52, "learning_rate": 1.9742218311995633e-05, "loss": 0.3152, "step": 8500 }, { "epoch": 0.55, "learning_rate": 1.9727049329530975e-05, "loss": 0.317, "step": 9000 }, { "epoch": 0.58, "learning_rate": 1.971188034706632e-05, "loss": 0.3083, "step": 9500 }, { "epoch": 0.61, "learning_rate": 1.9696741702566592e-05, "loss": 0.3004, "step": 10000 }, { "epoch": 0.64, "learning_rate": 1.9681572720101938e-05, "loss": 0.2944, "step": 10500 }, { "epoch": 0.67, "learning_rate": 1.966640373763728e-05, "loss": 0.2949, "step": 11000 }, { "epoch": 0.7, "learning_rate": 1.9651234755172626e-05, "loss": 0.2863, "step": 11500 }, { "epoch": 0.73, "learning_rate": 1.963606577270797e-05, "loss": 0.2824, "step": 12000 }, { "epoch": 0.76, "learning_rate": 1.962089679024331e-05, "loss": 0.279, "step": 12500 }, { "epoch": 0.79, "learning_rate": 1.9605727807778657e-05, "loss": 0.2754, "step": 13000 }, { "epoch": 0.82, "learning_rate": 1.9590558825314e-05, "loss": 0.269, "step": 13500 }, { "epoch": 0.85, "learning_rate": 1.9575389842849345e-05, "loss": 0.2711, "step": 14000 }, { "epoch": 0.88, "learning_rate": 1.9560220860384688e-05, "loss": 0.2646, "step": 14500 }, { "epoch": 0.91, "learning_rate": 1.9545082215884962e-05, "loss": 0.2578, "step": 15000 }, { "epoch": 0.94, "learning_rate": 1.9529913233420305e-05, "loss": 0.2546, "step": 15500 }, { "epoch": 0.97, "learning_rate": 1.9514744250955647e-05, "loss": 0.2549, "step": 16000 }, { "epoch": 1.0, "eval_bleu": 83.5257, "eval_gen_len": 14.5799, "eval_loss": 0.22406013309955597, "eval_runtime": 225.5638, "eval_samples_per_second": 71.483, "eval_steps_per_second": 2.234, "step": 16481 }, { "epoch": 1.0, "learning_rate": 1.949957526849099e-05, "loss": 0.253, "step": 16500 }, { "epoch": 1.03, "learning_rate": 1.9484406286026335e-05, "loss": 0.2352, "step": 17000 }, { "epoch": 1.06, "learning_rate": 1.946926764152661e-05, "loss": 0.2366, "step": 17500 }, { "epoch": 1.09, "learning_rate": 1.9454098659061952e-05, "loss": 0.2336, "step": 18000 }, { "epoch": 1.12, "learning_rate": 1.9438929676597295e-05, "loss": 0.2336, "step": 18500 }, { "epoch": 1.15, "learning_rate": 1.9423760694132637e-05, "loss": 0.2314, "step": 19000 }, { "epoch": 1.18, "learning_rate": 1.940862204963291e-05, "loss": 0.2258, "step": 19500 }, { "epoch": 1.21, "learning_rate": 1.9393453067168254e-05, "loss": 0.2252, "step": 20000 }, { "epoch": 1.24, "learning_rate": 1.93782840847036e-05, "loss": 0.2236, "step": 20500 }, { "epoch": 1.27, "learning_rate": 1.9363115102238942e-05, "loss": 0.2223, "step": 21000 }, { "epoch": 1.3, "learning_rate": 1.9347946119774288e-05, "loss": 0.2214, "step": 21500 }, { "epoch": 1.33, "learning_rate": 1.933277713730963e-05, "loss": 0.218, "step": 22000 }, { "epoch": 1.37, "learning_rate": 1.9317608154844976e-05, "loss": 0.22, "step": 22500 }, { "epoch": 1.4, "learning_rate": 1.930243917238032e-05, "loss": 0.2198, "step": 23000 }, { "epoch": 1.43, "learning_rate": 1.928727018991566e-05, "loss": 0.2183, "step": 23500 }, { "epoch": 1.46, "learning_rate": 1.9272131545415936e-05, "loss": 0.2176, "step": 24000 }, { "epoch": 1.49, "learning_rate": 1.9256962562951278e-05, "loss": 0.212, "step": 24500 }, { "epoch": 1.52, "learning_rate": 1.9241823918451552e-05, "loss": 0.2111, "step": 25000 }, { "epoch": 1.55, "learning_rate": 1.9226654935986895e-05, "loss": 0.2132, "step": 25500 }, { "epoch": 1.58, "learning_rate": 1.921148595352224e-05, "loss": 0.2081, "step": 26000 }, { "epoch": 1.61, "learning_rate": 1.9196316971057583e-05, "loss": 0.2108, "step": 26500 }, { "epoch": 1.64, "learning_rate": 1.918114798859293e-05, "loss": 0.2078, "step": 27000 }, { "epoch": 1.67, "learning_rate": 1.916597900612827e-05, "loss": 0.2063, "step": 27500 }, { "epoch": 1.7, "learning_rate": 1.9150810023663614e-05, "loss": 0.2087, "step": 28000 }, { "epoch": 1.73, "learning_rate": 1.9135641041198957e-05, "loss": 0.2068, "step": 28500 }, { "epoch": 1.76, "learning_rate": 1.91204720587343e-05, "loss": 0.2031, "step": 29000 }, { "epoch": 1.79, "learning_rate": 1.9105303076269645e-05, "loss": 0.2018, "step": 29500 }, { "epoch": 1.82, "learning_rate": 1.9090134093804987e-05, "loss": 0.2015, "step": 30000 }, { "epoch": 1.85, "learning_rate": 1.9074965111340333e-05, "loss": 0.2039, "step": 30500 }, { "epoch": 1.88, "learning_rate": 1.9059796128875676e-05, "loss": 0.2015, "step": 31000 }, { "epoch": 1.91, "learning_rate": 1.904465748437595e-05, "loss": 0.2013, "step": 31500 }, { "epoch": 1.94, "learning_rate": 1.9029488501911292e-05, "loss": 0.1983, "step": 32000 }, { "epoch": 1.97, "learning_rate": 1.9014349857411567e-05, "loss": 0.2012, "step": 32500 }, { "epoch": 2.0, "eval_bleu": 85.8326, "eval_gen_len": 14.5856, "eval_loss": 0.18299731612205505, "eval_runtime": 219.7356, "eval_samples_per_second": 73.379, "eval_steps_per_second": 2.294, "step": 32962 }, { "epoch": 2.0, "learning_rate": 1.899918087494691e-05, "loss": 0.1988, "step": 33000 }, { "epoch": 2.03, "learning_rate": 1.8984011892482255e-05, "loss": 0.1824, "step": 33500 }, { "epoch": 2.06, "learning_rate": 1.8968873247982526e-05, "loss": 0.1835, "step": 34000 }, { "epoch": 2.09, "learning_rate": 1.8953704265517872e-05, "loss": 0.1822, "step": 34500 }, { "epoch": 2.12, "learning_rate": 1.8938535283053214e-05, "loss": 0.1818, "step": 35000 }, { "epoch": 2.15, "learning_rate": 1.8923366300588557e-05, "loss": 0.1792, "step": 35500 }, { "epoch": 2.18, "learning_rate": 1.8908197318123903e-05, "loss": 0.1818, "step": 36000 }, { "epoch": 2.21, "learning_rate": 1.8893058673624173e-05, "loss": 0.1797, "step": 36500 }, { "epoch": 2.25, "learning_rate": 1.887788969115952e-05, "loss": 0.178, "step": 37000 }, { "epoch": 2.28, "learning_rate": 1.8862720708694862e-05, "loss": 0.1795, "step": 37500 }, { "epoch": 2.31, "learning_rate": 1.8847551726230208e-05, "loss": 0.1804, "step": 38000 }, { "epoch": 2.34, "learning_rate": 1.883238274376555e-05, "loss": 0.1778, "step": 38500 }, { "epoch": 2.37, "learning_rate": 1.8817213761300893e-05, "loss": 0.182, "step": 39000 }, { "epoch": 2.4, "learning_rate": 1.880204477883624e-05, "loss": 0.1773, "step": 39500 }, { "epoch": 2.43, "learning_rate": 1.878687579637158e-05, "loss": 0.1775, "step": 40000 }, { "epoch": 2.46, "learning_rate": 1.8771706813906924e-05, "loss": 0.1777, "step": 40500 }, { "epoch": 2.49, "learning_rate": 1.8756537831442266e-05, "loss": 0.1766, "step": 41000 }, { "epoch": 2.52, "learning_rate": 1.874139918694254e-05, "loss": 0.177, "step": 41500 }, { "epoch": 2.55, "learning_rate": 1.8726230204477883e-05, "loss": 0.1757, "step": 42000 }, { "epoch": 2.58, "learning_rate": 1.8711091559978157e-05, "loss": 0.1761, "step": 42500 }, { "epoch": 2.61, "learning_rate": 1.8695922577513503e-05, "loss": 0.1747, "step": 43000 }, { "epoch": 2.64, "learning_rate": 1.8680753595048845e-05, "loss": 0.1756, "step": 43500 }, { "epoch": 2.67, "learning_rate": 1.8665584612584188e-05, "loss": 0.1775, "step": 44000 }, { "epoch": 2.7, "learning_rate": 1.8650415630119534e-05, "loss": 0.1745, "step": 44500 }, { "epoch": 2.73, "learning_rate": 1.8635246647654876e-05, "loss": 0.1721, "step": 45000 }, { "epoch": 2.76, "learning_rate": 1.8620077665190222e-05, "loss": 0.1742, "step": 45500 }, { "epoch": 2.79, "learning_rate": 1.8604908682725565e-05, "loss": 0.1721, "step": 46000 }, { "epoch": 2.82, "learning_rate": 1.8589739700260907e-05, "loss": 0.1734, "step": 46500 }, { "epoch": 2.85, "learning_rate": 1.8574570717796253e-05, "loss": 0.1729, "step": 47000 }, { "epoch": 2.88, "learning_rate": 1.8559401735331595e-05, "loss": 0.1719, "step": 47500 }, { "epoch": 2.91, "learning_rate": 1.854423275286694e-05, "loss": 0.1708, "step": 48000 }, { "epoch": 2.94, "learning_rate": 1.8529094108367212e-05, "loss": 0.1728, "step": 48500 }, { "epoch": 2.97, "learning_rate": 1.8513925125902558e-05, "loss": 0.1747, "step": 49000 }, { "epoch": 3.0, "eval_bleu": 86.3628, "eval_gen_len": 14.5561, "eval_loss": 0.16949382424354553, "eval_runtime": 217.2797, "eval_samples_per_second": 74.208, "eval_steps_per_second": 2.32, "step": 49443 }, { "epoch": 3.0, "learning_rate": 1.849878648140283e-05, "loss": 0.1677, "step": 49500 }, { "epoch": 3.03, "learning_rate": 1.8483617498938175e-05, "loss": 0.1542, "step": 50000 }, { "epoch": 3.06, "learning_rate": 1.8468448516473517e-05, "loss": 0.1569, "step": 50500 }, { "epoch": 3.09, "learning_rate": 1.845327953400886e-05, "loss": 0.1591, "step": 51000 }, { "epoch": 3.12, "learning_rate": 1.8438110551544206e-05, "loss": 0.1543, "step": 51500 }, { "epoch": 3.16, "learning_rate": 1.8422971907044476e-05, "loss": 0.1557, "step": 52000 }, { "epoch": 3.19, "learning_rate": 1.8407802924579822e-05, "loss": 0.1572, "step": 52500 }, { "epoch": 3.22, "learning_rate": 1.8392633942115165e-05, "loss": 0.1582, "step": 53000 }, { "epoch": 3.25, "learning_rate": 1.8377464959650507e-05, "loss": 0.1608, "step": 53500 }, { "epoch": 3.28, "learning_rate": 1.836229597718585e-05, "loss": 0.1535, "step": 54000 }, { "epoch": 3.31, "learning_rate": 1.8347126994721196e-05, "loss": 0.1571, "step": 54500 }, { "epoch": 3.34, "learning_rate": 1.8331958012256538e-05, "loss": 0.1548, "step": 55000 }, { "epoch": 3.37, "learning_rate": 1.8316789029791884e-05, "loss": 0.154, "step": 55500 }, { "epoch": 3.4, "learning_rate": 1.8301650385292155e-05, "loss": 0.1587, "step": 56000 }, { "epoch": 3.43, "learning_rate": 1.82864814028275e-05, "loss": 0.1552, "step": 56500 }, { "epoch": 3.46, "learning_rate": 1.8271312420362843e-05, "loss": 0.1499, "step": 57000 }, { "epoch": 3.49, "learning_rate": 1.8256173775863117e-05, "loss": 0.1564, "step": 57500 }, { "epoch": 3.52, "learning_rate": 1.824100479339846e-05, "loss": 0.1576, "step": 58000 }, { "epoch": 3.55, "learning_rate": 1.8225835810933802e-05, "loss": 0.1568, "step": 58500 }, { "epoch": 3.58, "learning_rate": 1.8210666828469148e-05, "loss": 0.1549, "step": 59000 }, { "epoch": 3.61, "learning_rate": 1.819549784600449e-05, "loss": 0.1531, "step": 59500 }, { "epoch": 3.64, "learning_rate": 1.8180328863539837e-05, "loss": 0.1563, "step": 60000 }, { "epoch": 3.67, "learning_rate": 1.816515988107518e-05, "loss": 0.1555, "step": 60500 }, { "epoch": 3.7, "learning_rate": 1.8149990898610525e-05, "loss": 0.1539, "step": 61000 }, { "epoch": 3.73, "learning_rate": 1.8134882592075724e-05, "loss": 0.1519, "step": 61500 }, { "epoch": 3.76, "learning_rate": 1.811971360961107e-05, "loss": 0.1552, "step": 62000 }, { "epoch": 3.79, "learning_rate": 1.8104544627146412e-05, "loss": 0.1545, "step": 62500 }, { "epoch": 3.82, "learning_rate": 1.808937564468176e-05, "loss": 0.1505, "step": 63000 }, { "epoch": 3.85, "learning_rate": 1.80742066622171e-05, "loss": 0.1551, "step": 63500 }, { "epoch": 3.88, "learning_rate": 1.8059037679752443e-05, "loss": 0.1553, "step": 64000 }, { "epoch": 3.91, "learning_rate": 1.8043868697287786e-05, "loss": 0.1524, "step": 64500 }, { "epoch": 3.94, "learning_rate": 1.802873005278806e-05, "loss": 0.1552, "step": 65000 }, { "epoch": 3.97, "learning_rate": 1.8013561070323406e-05, "loss": 0.1528, "step": 65500 }, { "epoch": 4.0, "eval_bleu": 86.9553, "eval_gen_len": 14.6311, "eval_loss": 0.16092169284820557, "eval_runtime": 219.7454, "eval_samples_per_second": 73.376, "eval_steps_per_second": 2.294, "step": 65924 }, { "epoch": 4.0, "learning_rate": 1.799839208785875e-05, "loss": 0.1528, "step": 66000 }, { "epoch": 4.03, "learning_rate": 1.798322310539409e-05, "loss": 0.1406, "step": 66500 }, { "epoch": 4.07, "learning_rate": 1.7968054122929433e-05, "loss": 0.1436, "step": 67000 }, { "epoch": 4.1, "learning_rate": 1.795288514046478e-05, "loss": 0.1407, "step": 67500 }, { "epoch": 4.13, "learning_rate": 1.7937716158000122e-05, "loss": 0.137, "step": 68000 }, { "epoch": 4.16, "learning_rate": 1.7922547175535468e-05, "loss": 0.1403, "step": 68500 }, { "epoch": 4.19, "learning_rate": 1.790740853103574e-05, "loss": 0.1405, "step": 69000 }, { "epoch": 4.22, "learning_rate": 1.7892269886536013e-05, "loss": 0.1408, "step": 69500 }, { "epoch": 4.25, "learning_rate": 1.7877100904071355e-05, "loss": 0.1354, "step": 70000 }, { "epoch": 4.28, "learning_rate": 1.786196225957163e-05, "loss": 0.1426, "step": 70500 }, { "epoch": 4.31, "learning_rate": 1.7846793277106972e-05, "loss": 0.1411, "step": 71000 }, { "epoch": 4.34, "learning_rate": 1.7831624294642318e-05, "loss": 0.1415, "step": 71500 }, { "epoch": 4.37, "learning_rate": 1.781645531217766e-05, "loss": 0.1398, "step": 72000 }, { "epoch": 4.4, "learning_rate": 1.7801286329713003e-05, "loss": 0.1398, "step": 72500 }, { "epoch": 4.43, "learning_rate": 1.778611734724835e-05, "loss": 0.139, "step": 73000 }, { "epoch": 4.46, "learning_rate": 1.777094836478369e-05, "loss": 0.142, "step": 73500 }, { "epoch": 4.49, "learning_rate": 1.7755779382319037e-05, "loss": 0.1423, "step": 74000 }, { "epoch": 4.52, "learning_rate": 1.774061039985438e-05, "loss": 0.1399, "step": 74500 }, { "epoch": 4.55, "learning_rate": 1.7725441417389725e-05, "loss": 0.1417, "step": 75000 }, { "epoch": 4.58, "learning_rate": 1.7710302772889996e-05, "loss": 0.141, "step": 75500 }, { "epoch": 4.61, "learning_rate": 1.7695133790425342e-05, "loss": 0.1434, "step": 76000 }, { "epoch": 4.64, "learning_rate": 1.7679964807960684e-05, "loss": 0.1372, "step": 76500 }, { "epoch": 4.67, "learning_rate": 1.7664795825496027e-05, "loss": 0.1399, "step": 77000 }, { "epoch": 4.7, "learning_rate": 1.764962684303137e-05, "loss": 0.1399, "step": 77500 }, { "epoch": 4.73, "learning_rate": 1.7634457860566715e-05, "loss": 0.1388, "step": 78000 }, { "epoch": 4.76, "learning_rate": 1.7619288878102058e-05, "loss": 0.1399, "step": 78500 }, { "epoch": 4.79, "learning_rate": 1.7604150233602332e-05, "loss": 0.1402, "step": 79000 }, { "epoch": 4.82, "learning_rate": 1.7588981251137675e-05, "loss": 0.1379, "step": 79500 }, { "epoch": 4.85, "learning_rate": 1.7573812268673017e-05, "loss": 0.1392, "step": 80000 }, { "epoch": 4.88, "learning_rate": 1.7558643286208363e-05, "loss": 0.1379, "step": 80500 }, { "epoch": 4.91, "learning_rate": 1.7543474303743705e-05, "loss": 0.1392, "step": 81000 }, { "epoch": 4.95, "learning_rate": 1.752830532127905e-05, "loss": 0.1397, "step": 81500 }, { "epoch": 4.98, "learning_rate": 1.7513166676779322e-05, "loss": 0.1398, "step": 82000 }, { "epoch": 5.0, "eval_bleu": 87.362, "eval_gen_len": 14.6795, "eval_loss": 0.15578030049800873, "eval_runtime": 221.0197, "eval_samples_per_second": 72.953, "eval_steps_per_second": 2.28, "step": 82405 }, { "epoch": 5.01, "learning_rate": 1.7497997694314668e-05, "loss": 0.1368, "step": 82500 }, { "epoch": 5.04, "learning_rate": 1.748282871185001e-05, "loss": 0.1257, "step": 83000 }, { "epoch": 5.07, "learning_rate": 1.7467659729385353e-05, "loss": 0.1253, "step": 83500 }, { "epoch": 5.1, "learning_rate": 1.7452521084885627e-05, "loss": 0.1295, "step": 84000 }, { "epoch": 5.13, "learning_rate": 1.743735210242097e-05, "loss": 0.1284, "step": 84500 }, { "epoch": 5.16, "learning_rate": 1.7422183119956316e-05, "loss": 0.1255, "step": 85000 }, { "epoch": 5.19, "learning_rate": 1.7407014137491658e-05, "loss": 0.1285, "step": 85500 }, { "epoch": 5.22, "learning_rate": 1.7391845155027004e-05, "loss": 0.129, "step": 86000 }, { "epoch": 5.25, "learning_rate": 1.7376676172562346e-05, "loss": 0.1286, "step": 86500 }, { "epoch": 5.28, "learning_rate": 1.736150719009769e-05, "loss": 0.1287, "step": 87000 }, { "epoch": 5.31, "learning_rate": 1.7346368545597963e-05, "loss": 0.1313, "step": 87500 }, { "epoch": 5.34, "learning_rate": 1.733119956313331e-05, "loss": 0.1312, "step": 88000 }, { "epoch": 5.37, "learning_rate": 1.731603058066865e-05, "loss": 0.129, "step": 88500 }, { "epoch": 5.4, "learning_rate": 1.7300861598203994e-05, "loss": 0.1262, "step": 89000 }, { "epoch": 5.43, "learning_rate": 1.7285692615739336e-05, "loss": 0.128, "step": 89500 }, { "epoch": 5.46, "learning_rate": 1.727052363327468e-05, "loss": 0.1298, "step": 90000 }, { "epoch": 5.49, "learning_rate": 1.7255354650810025e-05, "loss": 0.1299, "step": 90500 }, { "epoch": 5.52, "learning_rate": 1.7240185668345367e-05, "loss": 0.1291, "step": 91000 }, { "epoch": 5.55, "learning_rate": 1.7225016685880713e-05, "loss": 0.1287, "step": 91500 }, { "epoch": 5.58, "learning_rate": 1.7209847703416056e-05, "loss": 0.1252, "step": 92000 }, { "epoch": 5.61, "learning_rate": 1.71946787209514e-05, "loss": 0.1277, "step": 92500 }, { "epoch": 5.64, "learning_rate": 1.7179509738486744e-05, "loss": 0.1244, "step": 93000 }, { "epoch": 5.67, "learning_rate": 1.7164401431951947e-05, "loss": 0.1278, "step": 93500 }, { "epoch": 5.7, "learning_rate": 1.714923244948729e-05, "loss": 0.1299, "step": 94000 }, { "epoch": 5.73, "learning_rate": 1.713406346702263e-05, "loss": 0.1284, "step": 94500 }, { "epoch": 5.76, "learning_rate": 1.7118894484557977e-05, "loss": 0.1273, "step": 95000 }, { "epoch": 5.79, "learning_rate": 1.7103755840058248e-05, "loss": 0.1281, "step": 95500 }, { "epoch": 5.82, "learning_rate": 1.7088586857593594e-05, "loss": 0.1287, "step": 96000 }, { "epoch": 5.86, "learning_rate": 1.7073417875128937e-05, "loss": 0.1273, "step": 96500 }, { "epoch": 5.89, "learning_rate": 1.7058248892664283e-05, "loss": 0.1291, "step": 97000 }, { "epoch": 5.92, "learning_rate": 1.7043079910199625e-05, "loss": 0.1281, "step": 97500 }, { "epoch": 5.95, "learning_rate": 1.702791092773497e-05, "loss": 0.1251, "step": 98000 }, { "epoch": 5.98, "learning_rate": 1.7012772283235242e-05, "loss": 0.1252, "step": 98500 }, { "epoch": 6.0, "eval_bleu": 87.4518, "eval_gen_len": 14.6618, "eval_loss": 0.1534184217453003, "eval_runtime": 219.098, "eval_samples_per_second": 73.593, "eval_steps_per_second": 2.3, "step": 98886 }, { "epoch": 6.01, "learning_rate": 1.6997603300770588e-05, "loss": 0.122, "step": 99000 }, { "epoch": 6.04, "learning_rate": 1.698243431830593e-05, "loss": 0.1178, "step": 99500 }, { "epoch": 6.07, "learning_rate": 1.6967265335841273e-05, "loss": 0.1152, "step": 100000 }, { "epoch": 6.1, "learning_rate": 1.6952126691341547e-05, "loss": 0.117, "step": 100500 }, { "epoch": 6.13, "learning_rate": 1.693695770887689e-05, "loss": 0.1158, "step": 101000 }, { "epoch": 6.16, "learning_rate": 1.6921788726412235e-05, "loss": 0.1169, "step": 101500 }, { "epoch": 6.19, "learning_rate": 1.6906619743947578e-05, "loss": 0.1164, "step": 102000 }, { "epoch": 6.22, "learning_rate": 1.689145076148292e-05, "loss": 0.1191, "step": 102500 }, { "epoch": 6.25, "learning_rate": 1.6876281779018263e-05, "loss": 0.1186, "step": 103000 }, { "epoch": 6.28, "learning_rate": 1.686111279655361e-05, "loss": 0.1184, "step": 103500 }, { "epoch": 6.31, "learning_rate": 1.684594381408895e-05, "loss": 0.1162, "step": 104000 }, { "epoch": 6.34, "learning_rate": 1.6830774831624297e-05, "loss": 0.1172, "step": 104500 }, { "epoch": 6.37, "learning_rate": 1.681560584915964e-05, "loss": 0.1188, "step": 105000 }, { "epoch": 6.4, "learning_rate": 1.6800467204659914e-05, "loss": 0.1174, "step": 105500 }, { "epoch": 6.43, "learning_rate": 1.6785298222195256e-05, "loss": 0.1194, "step": 106000 }, { "epoch": 6.46, "learning_rate": 1.67701292397306e-05, "loss": 0.1187, "step": 106500 }, { "epoch": 6.49, "learning_rate": 1.6754960257265944e-05, "loss": 0.1172, "step": 107000 }, { "epoch": 6.52, "learning_rate": 1.6739821612766215e-05, "loss": 0.1185, "step": 107500 }, { "epoch": 6.55, "learning_rate": 1.672465263030156e-05, "loss": 0.1184, "step": 108000 }, { "epoch": 6.58, "learning_rate": 1.6709483647836904e-05, "loss": 0.1182, "step": 108500 }, { "epoch": 6.61, "learning_rate": 1.669431466537225e-05, "loss": 0.1174, "step": 109000 }, { "epoch": 6.64, "learning_rate": 1.6679145682907592e-05, "loss": 0.1183, "step": 109500 }, { "epoch": 6.67, "learning_rate": 1.6663976700442938e-05, "loss": 0.1186, "step": 110000 }, { "epoch": 6.7, "learning_rate": 1.664880771797828e-05, "loss": 0.1161, "step": 110500 }, { "epoch": 6.74, "learning_rate": 1.6633638735513623e-05, "loss": 0.1186, "step": 111000 }, { "epoch": 6.77, "learning_rate": 1.6618469753048965e-05, "loss": 0.1172, "step": 111500 }, { "epoch": 6.8, "learning_rate": 1.660333110854924e-05, "loss": 0.118, "step": 112000 }, { "epoch": 6.83, "learning_rate": 1.6588162126084585e-05, "loss": 0.1171, "step": 112500 }, { "epoch": 6.86, "learning_rate": 1.6572993143619928e-05, "loss": 0.1166, "step": 113000 }, { "epoch": 6.89, "learning_rate": 1.655782416115527e-05, "loss": 0.1189, "step": 113500 }, { "epoch": 6.92, "learning_rate": 1.6542685516655545e-05, "loss": 0.1186, "step": 114000 }, { "epoch": 6.95, "learning_rate": 1.6527516534190887e-05, "loss": 0.1187, "step": 114500 }, { "epoch": 6.98, "learning_rate": 1.651234755172623e-05, "loss": 0.1173, "step": 115000 }, { "epoch": 7.0, "eval_bleu": 87.3802, "eval_gen_len": 14.6933, "eval_loss": 0.15458865463733673, "eval_runtime": 219.3387, "eval_samples_per_second": 73.512, "eval_steps_per_second": 2.298, "step": 115367 }, { "epoch": 7.01, "learning_rate": 1.6497178569261575e-05, "loss": 0.1161, "step": 115500 }, { "epoch": 7.04, "learning_rate": 1.6482009586796918e-05, "loss": 0.1073, "step": 116000 }, { "epoch": 7.07, "learning_rate": 1.6466840604332264e-05, "loss": 0.1058, "step": 116500 }, { "epoch": 7.1, "learning_rate": 1.6451671621867606e-05, "loss": 0.1068, "step": 117000 }, { "epoch": 7.13, "learning_rate": 1.643653297736788e-05, "loss": 0.1093, "step": 117500 }, { "epoch": 7.16, "learning_rate": 1.6421363994903223e-05, "loss": 0.1076, "step": 118000 }, { "epoch": 7.19, "learning_rate": 1.6406195012438566e-05, "loss": 0.1089, "step": 118500 }, { "epoch": 7.22, "learning_rate": 1.639102602997391e-05, "loss": 0.1094, "step": 119000 }, { "epoch": 7.25, "learning_rate": 1.6375857047509254e-05, "loss": 0.1074, "step": 119500 }, { "epoch": 7.28, "learning_rate": 1.63606880650446e-05, "loss": 0.1098, "step": 120000 }, { "epoch": 7.31, "learning_rate": 1.6345519082579942e-05, "loss": 0.1083, "step": 120500 }, { "epoch": 7.34, "learning_rate": 1.6330380438080216e-05, "loss": 0.1083, "step": 121000 }, { "epoch": 7.37, "learning_rate": 1.631521145561556e-05, "loss": 0.1098, "step": 121500 }, { "epoch": 7.4, "learning_rate": 1.6300042473150905e-05, "loss": 0.1066, "step": 122000 }, { "epoch": 7.43, "learning_rate": 1.6284873490686247e-05, "loss": 0.1075, "step": 122500 }, { "epoch": 7.46, "learning_rate": 1.626970450822159e-05, "loss": 0.1086, "step": 123000 }, { "epoch": 7.49, "learning_rate": 1.6254565863721864e-05, "loss": 0.1078, "step": 123500 }, { "epoch": 7.52, "learning_rate": 1.6239396881257207e-05, "loss": 0.1119, "step": 124000 }, { "epoch": 7.55, "learning_rate": 1.622422789879255e-05, "loss": 0.1079, "step": 124500 }, { "epoch": 7.58, "learning_rate": 1.6209058916327895e-05, "loss": 0.1086, "step": 125000 }, { "epoch": 7.61, "learning_rate": 1.6193889933863237e-05, "loss": 0.1091, "step": 125500 }, { "epoch": 7.65, "learning_rate": 1.617875128936351e-05, "loss": 0.1075, "step": 126000 }, { "epoch": 7.68, "learning_rate": 1.6163582306898854e-05, "loss": 0.1102, "step": 126500 }, { "epoch": 7.71, "learning_rate": 1.6148413324434197e-05, "loss": 0.1097, "step": 127000 }, { "epoch": 7.74, "learning_rate": 1.6133244341969542e-05, "loss": 0.1077, "step": 127500 }, { "epoch": 7.77, "learning_rate": 1.6118075359504885e-05, "loss": 0.1106, "step": 128000 }, { "epoch": 7.8, "learning_rate": 1.610290637704023e-05, "loss": 0.1095, "step": 128500 }, { "epoch": 7.83, "learning_rate": 1.6087737394575573e-05, "loss": 0.1104, "step": 129000 }, { "epoch": 7.86, "learning_rate": 1.6072598750075844e-05, "loss": 0.1097, "step": 129500 }, { "epoch": 7.89, "learning_rate": 1.605742976761119e-05, "loss": 0.1085, "step": 130000 }, { "epoch": 7.92, "learning_rate": 1.6042260785146533e-05, "loss": 0.1091, "step": 130500 }, { "epoch": 7.95, "learning_rate": 1.602709180268188e-05, "loss": 0.1088, "step": 131000 }, { "epoch": 7.98, "learning_rate": 1.601192282021722e-05, "loss": 0.1092, "step": 131500 }, { "epoch": 8.0, "eval_bleu": 87.7655, "eval_gen_len": 14.7622, "eval_loss": 0.15202689170837402, "eval_runtime": 220.0905, "eval_samples_per_second": 73.261, "eval_steps_per_second": 2.29, "step": 131848 }, { "epoch": 8.01, "learning_rate": 1.5996784175717495e-05, "loss": 0.1078, "step": 132000 }, { "epoch": 8.04, "learning_rate": 1.5981615193252838e-05, "loss": 0.0986, "step": 132500 }, { "epoch": 8.07, "learning_rate": 1.5966446210788183e-05, "loss": 0.0993, "step": 133000 }, { "epoch": 8.1, "learning_rate": 1.5951277228323526e-05, "loss": 0.097, "step": 133500 }, { "epoch": 8.13, "learning_rate": 1.593610824585887e-05, "loss": 0.0987, "step": 134000 }, { "epoch": 8.16, "learning_rate": 1.5920969601359143e-05, "loss": 0.0996, "step": 134500 }, { "epoch": 8.19, "learning_rate": 1.590580061889449e-05, "loss": 0.0987, "step": 135000 }, { "epoch": 8.22, "learning_rate": 1.589063163642983e-05, "loss": 0.0987, "step": 135500 }, { "epoch": 8.25, "learning_rate": 1.5875462653965174e-05, "loss": 0.1005, "step": 136000 }, { "epoch": 8.28, "learning_rate": 1.5860293671500516e-05, "loss": 0.0987, "step": 136500 }, { "epoch": 8.31, "learning_rate": 1.584512468903586e-05, "loss": 0.1009, "step": 137000 }, { "epoch": 8.34, "learning_rate": 1.5829986044536133e-05, "loss": 0.1018, "step": 137500 }, { "epoch": 8.37, "learning_rate": 1.581481706207148e-05, "loss": 0.0992, "step": 138000 }, { "epoch": 8.4, "learning_rate": 1.579964807960682e-05, "loss": 0.1013, "step": 138500 }, { "epoch": 8.43, "learning_rate": 1.5784479097142164e-05, "loss": 0.0987, "step": 139000 }, { "epoch": 8.46, "learning_rate": 1.5769340452642438e-05, "loss": 0.1006, "step": 139500 }, { "epoch": 8.49, "learning_rate": 1.575417147017778e-05, "loss": 0.1, "step": 140000 }, { "epoch": 8.52, "learning_rate": 1.5739002487713126e-05, "loss": 0.1023, "step": 140500 }, { "epoch": 8.56, "learning_rate": 1.572383350524847e-05, "loss": 0.1011, "step": 141000 }, { "epoch": 8.59, "learning_rate": 1.570866452278381e-05, "loss": 0.1019, "step": 141500 }, { "epoch": 8.62, "learning_rate": 1.5693525878284085e-05, "loss": 0.0995, "step": 142000 }, { "epoch": 8.65, "learning_rate": 1.567838723378436e-05, "loss": 0.1015, "step": 142500 }, { "epoch": 8.68, "learning_rate": 1.5663218251319702e-05, "loss": 0.1, "step": 143000 }, { "epoch": 8.71, "learning_rate": 1.5648049268855045e-05, "loss": 0.1012, "step": 143500 }, { "epoch": 8.74, "learning_rate": 1.563288028639039e-05, "loss": 0.1012, "step": 144000 }, { "epoch": 8.77, "learning_rate": 1.5617711303925733e-05, "loss": 0.103, "step": 144500 }, { "epoch": 8.8, "learning_rate": 1.560254232146108e-05, "loss": 0.1021, "step": 145000 }, { "epoch": 8.83, "learning_rate": 1.558737333899642e-05, "loss": 0.103, "step": 145500 }, { "epoch": 8.86, "learning_rate": 1.5572204356531767e-05, "loss": 0.1016, "step": 146000 }, { "epoch": 8.89, "learning_rate": 1.555703537406711e-05, "loss": 0.1013, "step": 146500 }, { "epoch": 8.92, "learning_rate": 1.5541866391602452e-05, "loss": 0.1009, "step": 147000 }, { "epoch": 8.95, "learning_rate": 1.5526727747102726e-05, "loss": 0.1024, "step": 147500 }, { "epoch": 8.98, "learning_rate": 1.551155876463807e-05, "loss": 0.1019, "step": 148000 }, { "epoch": 9.0, "eval_bleu": 87.7707, "eval_gen_len": 14.676, "eval_loss": 0.15273568034172058, "eval_runtime": 220.9435, "eval_samples_per_second": 72.978, "eval_steps_per_second": 2.281, "step": 148329 }, { "epoch": 9.01, "learning_rate": 1.5496389782173415e-05, "loss": 0.0979, "step": 148500 }, { "epoch": 9.04, "learning_rate": 1.5481220799708757e-05, "loss": 0.0906, "step": 149000 }, { "epoch": 9.07, "learning_rate": 1.54660518172441e-05, "loss": 0.0902, "step": 149500 }, { "epoch": 9.1, "learning_rate": 1.5450882834779442e-05, "loss": 0.093, "step": 150000 }, { "epoch": 9.13, "learning_rate": 1.5435744190279716e-05, "loss": 0.0926, "step": 150500 }, { "epoch": 9.16, "learning_rate": 1.542057520781506e-05, "loss": 0.0905, "step": 151000 }, { "epoch": 9.19, "learning_rate": 1.5405406225350405e-05, "loss": 0.0915, "step": 151500 }, { "epoch": 9.22, "learning_rate": 1.5390237242885747e-05, "loss": 0.092, "step": 152000 }, { "epoch": 9.25, "learning_rate": 1.5375068260421093e-05, "loss": 0.0922, "step": 152500 }, { "epoch": 9.28, "learning_rate": 1.5359899277956436e-05, "loss": 0.0913, "step": 153000 }, { "epoch": 9.31, "learning_rate": 1.5344730295491778e-05, "loss": 0.0932, "step": 153500 }, { "epoch": 9.34, "learning_rate": 1.5329591650992052e-05, "loss": 0.0925, "step": 154000 }, { "epoch": 9.37, "learning_rate": 1.5314422668527395e-05, "loss": 0.0927, "step": 154500 }, { "epoch": 9.4, "learning_rate": 1.529925368606274e-05, "loss": 0.0933, "step": 155000 }, { "epoch": 9.44, "learning_rate": 1.5284084703598083e-05, "loss": 0.0928, "step": 155500 }, { "epoch": 9.47, "learning_rate": 1.5268946059098357e-05, "loss": 0.094, "step": 156000 }, { "epoch": 9.5, "learning_rate": 1.52537770766337e-05, "loss": 0.0925, "step": 156500 }, { "epoch": 9.53, "learning_rate": 1.5238608094169046e-05, "loss": 0.093, "step": 157000 }, { "epoch": 9.56, "learning_rate": 1.5223439111704388e-05, "loss": 0.0936, "step": 157500 }, { "epoch": 9.59, "learning_rate": 1.5208270129239732e-05, "loss": 0.0935, "step": 158000 }, { "epoch": 9.62, "learning_rate": 1.5193101146775075e-05, "loss": 0.0913, "step": 158500 }, { "epoch": 9.65, "learning_rate": 1.5177932164310417e-05, "loss": 0.0943, "step": 159000 }, { "epoch": 9.68, "learning_rate": 1.5162763181845763e-05, "loss": 0.093, "step": 159500 }, { "epoch": 9.71, "learning_rate": 1.5147624537346036e-05, "loss": 0.0943, "step": 160000 }, { "epoch": 9.74, "learning_rate": 1.513248589284631e-05, "loss": 0.0952, "step": 160500 }, { "epoch": 9.77, "learning_rate": 1.5117316910381653e-05, "loss": 0.0926, "step": 161000 }, { "epoch": 9.8, "learning_rate": 1.5102147927916997e-05, "loss": 0.0939, "step": 161500 }, { "epoch": 9.83, "learning_rate": 1.508697894545234e-05, "loss": 0.0964, "step": 162000 }, { "epoch": 9.86, "learning_rate": 1.5071809962987685e-05, "loss": 0.0959, "step": 162500 }, { "epoch": 9.89, "learning_rate": 1.5056640980523028e-05, "loss": 0.0936, "step": 163000 }, { "epoch": 9.92, "learning_rate": 1.5041502336023302e-05, "loss": 0.0937, "step": 163500 }, { "epoch": 9.95, "learning_rate": 1.5026333353558644e-05, "loss": 0.094, "step": 164000 }, { "epoch": 9.98, "learning_rate": 1.5011164371093988e-05, "loss": 0.0965, "step": 164500 }, { "epoch": 10.0, "eval_bleu": 87.6567, "eval_gen_len": 14.6305, "eval_loss": 0.1554132103919983, "eval_runtime": 219.4742, "eval_samples_per_second": 73.467, "eval_steps_per_second": 2.296, "step": 164810 }, { "epoch": 10.01, "learning_rate": 1.4995995388629333e-05, "loss": 0.0908, "step": 165000 }, { "epoch": 10.04, "learning_rate": 1.4980826406164675e-05, "loss": 0.0832, "step": 165500 }, { "epoch": 10.07, "learning_rate": 1.496568776166495e-05, "loss": 0.0833, "step": 166000 }, { "epoch": 10.1, "learning_rate": 1.4950518779200292e-05, "loss": 0.0843, "step": 166500 }, { "epoch": 10.13, "learning_rate": 1.4935349796735636e-05, "loss": 0.0844, "step": 167000 }, { "epoch": 10.16, "learning_rate": 1.4920180814270979e-05, "loss": 0.0831, "step": 167500 }, { "epoch": 10.19, "learning_rate": 1.4905011831806324e-05, "loss": 0.0843, "step": 168000 }, { "epoch": 10.22, "learning_rate": 1.4889842849341667e-05, "loss": 0.0856, "step": 168500 }, { "epoch": 10.25, "learning_rate": 1.4874673866877011e-05, "loss": 0.0856, "step": 169000 }, { "epoch": 10.28, "learning_rate": 1.4859504884412355e-05, "loss": 0.0867, "step": 169500 }, { "epoch": 10.31, "learning_rate": 1.4844396577877558e-05, "loss": 0.0856, "step": 170000 }, { "epoch": 10.35, "learning_rate": 1.48292275954129e-05, "loss": 0.0866, "step": 170500 }, { "epoch": 10.38, "learning_rate": 1.4814058612948246e-05, "loss": 0.0857, "step": 171000 }, { "epoch": 10.41, "learning_rate": 1.4798889630483589e-05, "loss": 0.0862, "step": 171500 }, { "epoch": 10.44, "learning_rate": 1.4783720648018931e-05, "loss": 0.0866, "step": 172000 }, { "epoch": 10.47, "learning_rate": 1.4768551665554275e-05, "loss": 0.0874, "step": 172500 }, { "epoch": 10.5, "learning_rate": 1.4753413021054548e-05, "loss": 0.0869, "step": 173000 }, { "epoch": 10.53, "learning_rate": 1.4738244038589892e-05, "loss": 0.0876, "step": 173500 }, { "epoch": 10.56, "learning_rate": 1.4723075056125236e-05, "loss": 0.0891, "step": 174000 }, { "epoch": 10.59, "learning_rate": 1.470790607366058e-05, "loss": 0.0866, "step": 174500 }, { "epoch": 10.62, "learning_rate": 1.4692737091195923e-05, "loss": 0.088, "step": 175000 }, { "epoch": 10.65, "learning_rate": 1.4677598446696197e-05, "loss": 0.0868, "step": 175500 }, { "epoch": 10.68, "learning_rate": 1.466245980219647e-05, "loss": 0.0873, "step": 176000 }, { "epoch": 10.71, "learning_rate": 1.4647290819731814e-05, "loss": 0.0836, "step": 176500 }, { "epoch": 10.74, "learning_rate": 1.4632121837267156e-05, "loss": 0.0886, "step": 177000 }, { "epoch": 10.77, "learning_rate": 1.4616952854802502e-05, "loss": 0.0858, "step": 177500 }, { "epoch": 10.8, "learning_rate": 1.4601814210302773e-05, "loss": 0.0876, "step": 178000 }, { "epoch": 10.83, "learning_rate": 1.4586645227838119e-05, "loss": 0.0869, "step": 178500 }, { "epoch": 10.86, "learning_rate": 1.4571476245373461e-05, "loss": 0.0865, "step": 179000 }, { "epoch": 10.89, "learning_rate": 1.4556307262908804e-05, "loss": 0.0877, "step": 179500 }, { "epoch": 10.92, "learning_rate": 1.454113828044415e-05, "loss": 0.089, "step": 180000 }, { "epoch": 10.95, "learning_rate": 1.4525969297979492e-05, "loss": 0.0883, "step": 180500 }, { "epoch": 10.98, "learning_rate": 1.4510800315514836e-05, "loss": 0.0875, "step": 181000 }, { "epoch": 11.0, "eval_bleu": 87.8907, "eval_gen_len": 14.7111, "eval_loss": 0.15501651167869568, "eval_runtime": 220.1923, "eval_samples_per_second": 73.227, "eval_steps_per_second": 2.289, "step": 181291 }, { "epoch": 11.01, "learning_rate": 1.4495631333050179e-05, "loss": 0.0855, "step": 181500 }, { "epoch": 11.04, "learning_rate": 1.4480462350585525e-05, "loss": 0.0761, "step": 182000 }, { "epoch": 11.07, "learning_rate": 1.4465293368120867e-05, "loss": 0.0785, "step": 182500 }, { "epoch": 11.1, "learning_rate": 1.4450124385656213e-05, "loss": 0.0783, "step": 183000 }, { "epoch": 11.13, "learning_rate": 1.4434985741156484e-05, "loss": 0.0792, "step": 183500 }, { "epoch": 11.16, "learning_rate": 1.441981675869183e-05, "loss": 0.0777, "step": 184000 }, { "epoch": 11.19, "learning_rate": 1.4404647776227172e-05, "loss": 0.0782, "step": 184500 }, { "epoch": 11.23, "learning_rate": 1.4389478793762515e-05, "loss": 0.0771, "step": 185000 }, { "epoch": 11.26, "learning_rate": 1.4374309811297859e-05, "loss": 0.0802, "step": 185500 }, { "epoch": 11.29, "learning_rate": 1.4359140828833203e-05, "loss": 0.079, "step": 186000 }, { "epoch": 11.32, "learning_rate": 1.4343971846368547e-05, "loss": 0.0791, "step": 186500 }, { "epoch": 11.35, "learning_rate": 1.432883320186882e-05, "loss": 0.0788, "step": 187000 }, { "epoch": 11.38, "learning_rate": 1.4313694557369094e-05, "loss": 0.0784, "step": 187500 }, { "epoch": 11.41, "learning_rate": 1.4298525574904437e-05, "loss": 0.0806, "step": 188000 }, { "epoch": 11.44, "learning_rate": 1.428335659243978e-05, "loss": 0.0805, "step": 188500 }, { "epoch": 11.47, "learning_rate": 1.4268187609975123e-05, "loss": 0.0805, "step": 189000 }, { "epoch": 11.5, "learning_rate": 1.4253018627510469e-05, "loss": 0.0807, "step": 189500 }, { "epoch": 11.53, "learning_rate": 1.4237849645045812e-05, "loss": 0.0814, "step": 190000 }, { "epoch": 11.56, "learning_rate": 1.4222680662581154e-05, "loss": 0.079, "step": 190500 }, { "epoch": 11.59, "learning_rate": 1.4207542018081428e-05, "loss": 0.0796, "step": 191000 }, { "epoch": 11.62, "learning_rate": 1.419237303561677e-05, "loss": 0.0796, "step": 191500 }, { "epoch": 11.65, "learning_rate": 1.4177204053152117e-05, "loss": 0.0789, "step": 192000 }, { "epoch": 11.68, "learning_rate": 1.416203507068746e-05, "loss": 0.0836, "step": 192500 }, { "epoch": 11.71, "learning_rate": 1.4146866088222803e-05, "loss": 0.0803, "step": 193000 }, { "epoch": 11.74, "learning_rate": 1.4131697105758146e-05, "loss": 0.08, "step": 193500 }, { "epoch": 11.77, "learning_rate": 1.4116528123293492e-05, "loss": 0.0828, "step": 194000 }, { "epoch": 11.8, "learning_rate": 1.4101359140828834e-05, "loss": 0.0817, "step": 194500 }, { "epoch": 11.83, "learning_rate": 1.4086190158364178e-05, "loss": 0.0807, "step": 195000 }, { "epoch": 11.86, "learning_rate": 1.4071021175899523e-05, "loss": 0.0815, "step": 195500 }, { "epoch": 11.89, "learning_rate": 1.4055852193434865e-05, "loss": 0.08, "step": 196000 }, { "epoch": 11.92, "learning_rate": 1.404068321097021e-05, "loss": 0.0815, "step": 196500 }, { "epoch": 11.95, "learning_rate": 1.4025544566470482e-05, "loss": 0.0817, "step": 197000 }, { "epoch": 11.98, "learning_rate": 1.4010375584005826e-05, "loss": 0.0822, "step": 197500 }, { "epoch": 12.0, "eval_bleu": 87.654, "eval_gen_len": 14.6811, "eval_loss": 0.15846888720989227, "eval_runtime": 218.8023, "eval_samples_per_second": 73.692, "eval_steps_per_second": 2.303, "step": 197772 }, { "epoch": 12.01, "learning_rate": 1.3995236939506098e-05, "loss": 0.0786, "step": 198000 }, { "epoch": 12.04, "learning_rate": 1.3980067957041443e-05, "loss": 0.0711, "step": 198500 }, { "epoch": 12.07, "learning_rate": 1.3964898974576785e-05, "loss": 0.0716, "step": 199000 }, { "epoch": 12.1, "learning_rate": 1.3949729992112131e-05, "loss": 0.0719, "step": 199500 }, { "epoch": 12.14, "learning_rate": 1.3934591347612404e-05, "loss": 0.0736, "step": 200000 }, { "epoch": 12.17, "learning_rate": 1.3919422365147748e-05, "loss": 0.0722, "step": 200500 }, { "epoch": 12.2, "learning_rate": 1.390425338268309e-05, "loss": 0.0746, "step": 201000 }, { "epoch": 12.23, "learning_rate": 1.3889084400218436e-05, "loss": 0.0748, "step": 201500 }, { "epoch": 12.26, "learning_rate": 1.3873945755718707e-05, "loss": 0.0723, "step": 202000 }, { "epoch": 12.29, "learning_rate": 1.3858776773254053e-05, "loss": 0.073, "step": 202500 }, { "epoch": 12.32, "learning_rate": 1.3843607790789395e-05, "loss": 0.0722, "step": 203000 }, { "epoch": 12.35, "learning_rate": 1.3828438808324738e-05, "loss": 0.0731, "step": 203500 }, { "epoch": 12.38, "learning_rate": 1.3813269825860082e-05, "loss": 0.0726, "step": 204000 }, { "epoch": 12.41, "learning_rate": 1.3798100843395426e-05, "loss": 0.0761, "step": 204500 }, { "epoch": 12.44, "learning_rate": 1.378293186093077e-05, "loss": 0.0744, "step": 205000 }, { "epoch": 12.47, "learning_rate": 1.3767762878466113e-05, "loss": 0.074, "step": 205500 }, { "epoch": 12.5, "learning_rate": 1.3752593896001459e-05, "loss": 0.0741, "step": 206000 }, { "epoch": 12.53, "learning_rate": 1.3737424913536801e-05, "loss": 0.0734, "step": 206500 }, { "epoch": 12.56, "learning_rate": 1.3722255931072144e-05, "loss": 0.0745, "step": 207000 }, { "epoch": 12.59, "learning_rate": 1.3707086948607488e-05, "loss": 0.0745, "step": 207500 }, { "epoch": 12.62, "learning_rate": 1.369194830410776e-05, "loss": 0.0745, "step": 208000 }, { "epoch": 12.65, "learning_rate": 1.3676779321643106e-05, "loss": 0.0745, "step": 208500 }, { "epoch": 12.68, "learning_rate": 1.3661610339178449e-05, "loss": 0.0747, "step": 209000 }, { "epoch": 12.71, "learning_rate": 1.3646441356713793e-05, "loss": 0.0748, "step": 209500 }, { "epoch": 12.74, "learning_rate": 1.3631272374249135e-05, "loss": 0.0755, "step": 210000 }, { "epoch": 12.77, "learning_rate": 1.3616103391784481e-05, "loss": 0.0735, "step": 210500 }, { "epoch": 12.8, "learning_rate": 1.3600934409319824e-05, "loss": 0.0738, "step": 211000 }, { "epoch": 12.83, "learning_rate": 1.3585765426855168e-05, "loss": 0.0748, "step": 211500 }, { "epoch": 12.86, "learning_rate": 1.357062678235544e-05, "loss": 0.0752, "step": 212000 }, { "epoch": 12.89, "learning_rate": 1.3555457799890785e-05, "loss": 0.0749, "step": 212500 }, { "epoch": 12.92, "learning_rate": 1.3540288817426129e-05, "loss": 0.0745, "step": 213000 }, { "epoch": 12.95, "learning_rate": 1.3525119834961471e-05, "loss": 0.0752, "step": 213500 }, { "epoch": 12.98, "learning_rate": 1.3509981190461746e-05, "loss": 0.077, "step": 214000 }, { "epoch": 13.0, "eval_bleu": 87.6388, "eval_gen_len": 14.7814, "eval_loss": 0.16246692836284637, "eval_runtime": 220.1042, "eval_samples_per_second": 73.256, "eval_steps_per_second": 2.29, "step": 214253 }, { "epoch": 13.01, "learning_rate": 1.3494812207997088e-05, "loss": 0.0717, "step": 214500 }, { "epoch": 13.05, "learning_rate": 1.3479643225532432e-05, "loss": 0.0665, "step": 215000 }, { "epoch": 13.08, "learning_rate": 1.3464474243067775e-05, "loss": 0.066, "step": 215500 }, { "epoch": 13.11, "learning_rate": 1.3449335598568049e-05, "loss": 0.0663, "step": 216000 }, { "epoch": 13.14, "learning_rate": 1.3434166616103393e-05, "loss": 0.0675, "step": 216500 }, { "epoch": 13.17, "learning_rate": 1.3418997633638737e-05, "loss": 0.067, "step": 217000 }, { "epoch": 13.2, "learning_rate": 1.340382865117408e-05, "loss": 0.0684, "step": 217500 }, { "epoch": 13.23, "learning_rate": 1.3388690006674354e-05, "loss": 0.067, "step": 218000 }, { "epoch": 13.26, "learning_rate": 1.3373521024209697e-05, "loss": 0.0678, "step": 218500 }, { "epoch": 13.29, "learning_rate": 1.3358352041745042e-05, "loss": 0.0658, "step": 219000 }, { "epoch": 13.32, "learning_rate": 1.3343183059280385e-05, "loss": 0.0657, "step": 219500 }, { "epoch": 13.35, "learning_rate": 1.3328014076815727e-05, "loss": 0.0687, "step": 220000 }, { "epoch": 13.38, "learning_rate": 1.3312845094351072e-05, "loss": 0.0667, "step": 220500 }, { "epoch": 13.41, "learning_rate": 1.3297676111886416e-05, "loss": 0.068, "step": 221000 }, { "epoch": 13.44, "learning_rate": 1.328253746738669e-05, "loss": 0.0692, "step": 221500 }, { "epoch": 13.47, "learning_rate": 1.3267368484922032e-05, "loss": 0.0688, "step": 222000 }, { "epoch": 13.5, "learning_rate": 1.3252199502457377e-05, "loss": 0.0675, "step": 222500 }, { "epoch": 13.53, "learning_rate": 1.3237030519992719e-05, "loss": 0.0688, "step": 223000 }, { "epoch": 13.56, "learning_rate": 1.3221861537528065e-05, "loss": 0.0676, "step": 223500 }, { "epoch": 13.59, "learning_rate": 1.3206692555063408e-05, "loss": 0.0681, "step": 224000 }, { "epoch": 13.62, "learning_rate": 1.3191553910563682e-05, "loss": 0.0676, "step": 224500 }, { "epoch": 13.65, "learning_rate": 1.3176384928099024e-05, "loss": 0.0709, "step": 225000 }, { "epoch": 13.68, "learning_rate": 1.3161215945634367e-05, "loss": 0.0693, "step": 225500 }, { "epoch": 13.71, "learning_rate": 1.3146046963169713e-05, "loss": 0.0686, "step": 226000 }, { "epoch": 13.74, "learning_rate": 1.3130877980705055e-05, "loss": 0.0693, "step": 226500 }, { "epoch": 13.77, "learning_rate": 1.31157089982404e-05, "loss": 0.0705, "step": 227000 }, { "epoch": 13.8, "learning_rate": 1.3100540015775742e-05, "loss": 0.0695, "step": 227500 }, { "epoch": 13.83, "learning_rate": 1.3085401371276016e-05, "loss": 0.0698, "step": 228000 }, { "epoch": 13.86, "learning_rate": 1.3070232388811358e-05, "loss": 0.0695, "step": 228500 }, { "epoch": 13.89, "learning_rate": 1.3055063406346704e-05, "loss": 0.0688, "step": 229000 }, { "epoch": 13.93, "learning_rate": 1.3039894423882047e-05, "loss": 0.0718, "step": 229500 }, { "epoch": 13.96, "learning_rate": 1.3024725441417391e-05, "loss": 0.0704, "step": 230000 }, { "epoch": 13.99, "learning_rate": 1.3009556458952735e-05, "loss": 0.0734, "step": 230500 }, { "epoch": 14.0, "eval_bleu": 87.771, "eval_gen_len": 14.7707, "eval_loss": 0.16568207740783691, "eval_runtime": 221.3957, "eval_samples_per_second": 72.829, "eval_steps_per_second": 2.276, "step": 230734 }, { "epoch": 14.02, "learning_rate": 1.299441781445301e-05, "loss": 0.066, "step": 231000 }, { "epoch": 14.05, "learning_rate": 1.2979248831988352e-05, "loss": 0.0608, "step": 231500 }, { "epoch": 14.08, "learning_rate": 1.2964079849523694e-05, "loss": 0.0603, "step": 232000 }, { "epoch": 14.11, "learning_rate": 1.2948910867059039e-05, "loss": 0.0606, "step": 232500 }, { "epoch": 14.14, "learning_rate": 1.2933741884594381e-05, "loss": 0.0612, "step": 233000 }, { "epoch": 14.17, "learning_rate": 1.2918572902129727e-05, "loss": 0.0618, "step": 233500 }, { "epoch": 14.2, "learning_rate": 1.290340391966507e-05, "loss": 0.0614, "step": 234000 }, { "epoch": 14.23, "learning_rate": 1.2888234937200415e-05, "loss": 0.0614, "step": 234500 }, { "epoch": 14.26, "learning_rate": 1.2873096292700686e-05, "loss": 0.061, "step": 235000 }, { "epoch": 14.29, "learning_rate": 1.2857927310236032e-05, "loss": 0.0638, "step": 235500 }, { "epoch": 14.32, "learning_rate": 1.2842758327771374e-05, "loss": 0.0615, "step": 236000 }, { "epoch": 14.35, "learning_rate": 1.2827589345306717e-05, "loss": 0.0637, "step": 236500 }, { "epoch": 14.38, "learning_rate": 1.2812420362842061e-05, "loss": 0.063, "step": 237000 }, { "epoch": 14.41, "learning_rate": 1.2797281718342334e-05, "loss": 0.0623, "step": 237500 }, { "epoch": 14.44, "learning_rate": 1.2782112735877678e-05, "loss": 0.0627, "step": 238000 }, { "epoch": 14.47, "learning_rate": 1.2766943753413022e-05, "loss": 0.0649, "step": 238500 }, { "epoch": 14.5, "learning_rate": 1.2751774770948366e-05, "loss": 0.0639, "step": 239000 }, { "epoch": 14.53, "learning_rate": 1.2736605788483709e-05, "loss": 0.0632, "step": 239500 }, { "epoch": 14.56, "learning_rate": 1.2721467143983983e-05, "loss": 0.064, "step": 240000 }, { "epoch": 14.59, "learning_rate": 1.2706298161519325e-05, "loss": 0.0641, "step": 240500 }, { "epoch": 14.62, "learning_rate": 1.2691129179054671e-05, "loss": 0.0639, "step": 241000 }, { "epoch": 14.65, "learning_rate": 1.2675960196590014e-05, "loss": 0.0645, "step": 241500 }, { "epoch": 14.68, "learning_rate": 1.2660821552090288e-05, "loss": 0.065, "step": 242000 }, { "epoch": 14.71, "learning_rate": 1.264565256962563e-05, "loss": 0.0653, "step": 242500 }, { "epoch": 14.74, "learning_rate": 1.2630483587160973e-05, "loss": 0.0646, "step": 243000 }, { "epoch": 14.77, "learning_rate": 1.2615314604696319e-05, "loss": 0.0658, "step": 243500 }, { "epoch": 14.8, "learning_rate": 1.260017596019659e-05, "loss": 0.0647, "step": 244000 }, { "epoch": 14.84, "learning_rate": 1.2585006977731936e-05, "loss": 0.0627, "step": 244500 }, { "epoch": 14.87, "learning_rate": 1.2569837995267278e-05, "loss": 0.0635, "step": 245000 }, { "epoch": 14.9, "learning_rate": 1.2554669012802622e-05, "loss": 0.0649, "step": 245500 }, { "epoch": 14.93, "learning_rate": 1.2539500030337965e-05, "loss": 0.0638, "step": 246000 }, { "epoch": 14.96, "learning_rate": 1.252433104787331e-05, "loss": 0.0652, "step": 246500 }, { "epoch": 14.99, "learning_rate": 1.2509162065408653e-05, "loss": 0.0649, "step": 247000 }, { "epoch": 15.0, "eval_bleu": 87.6289, "eval_gen_len": 14.7254, "eval_loss": 0.1687079817056656, "eval_runtime": 219.5287, "eval_samples_per_second": 73.448, "eval_steps_per_second": 2.296, "step": 247215 }, { "epoch": 15.02, "learning_rate": 1.2494023420908927e-05, "loss": 0.0556, "step": 247500 }, { "epoch": 15.05, "learning_rate": 1.247885443844427e-05, "loss": 0.0569, "step": 248000 }, { "epoch": 15.08, "learning_rate": 1.2463685455979616e-05, "loss": 0.0562, "step": 248500 }, { "epoch": 15.11, "learning_rate": 1.2448516473514958e-05, "loss": 0.0566, "step": 249000 }, { "epoch": 15.14, "learning_rate": 1.24333474910503e-05, "loss": 0.0575, "step": 249500 }, { "epoch": 15.17, "learning_rate": 1.2418178508585645e-05, "loss": 0.0576, "step": 250000 }, { "epoch": 15.2, "learning_rate": 1.2403039864085917e-05, "loss": 0.0577, "step": 250500 }, { "epoch": 15.23, "learning_rate": 1.2387870881621262e-05, "loss": 0.0562, "step": 251000 }, { "epoch": 15.26, "learning_rate": 1.2372701899156606e-05, "loss": 0.0566, "step": 251500 }, { "epoch": 15.29, "learning_rate": 1.235753291669195e-05, "loss": 0.0582, "step": 252000 }, { "epoch": 15.32, "learning_rate": 1.2342363934227292e-05, "loss": 0.0582, "step": 252500 }, { "epoch": 15.35, "learning_rate": 1.2327194951762638e-05, "loss": 0.0575, "step": 253000 }, { "epoch": 15.38, "learning_rate": 1.231202596929798e-05, "loss": 0.0593, "step": 253500 }, { "epoch": 15.41, "learning_rate": 1.2296856986833323e-05, "loss": 0.0584, "step": 254000 }, { "epoch": 15.44, "learning_rate": 1.2281688004368667e-05, "loss": 0.0583, "step": 254500 }, { "epoch": 15.47, "learning_rate": 1.226654935986894e-05, "loss": 0.058, "step": 255000 }, { "epoch": 15.5, "learning_rate": 1.2251380377404284e-05, "loss": 0.0594, "step": 255500 }, { "epoch": 15.53, "learning_rate": 1.2236211394939628e-05, "loss": 0.0601, "step": 256000 }, { "epoch": 15.56, "learning_rate": 1.2221072750439903e-05, "loss": 0.0582, "step": 256500 }, { "epoch": 15.59, "learning_rate": 1.2205903767975245e-05, "loss": 0.059, "step": 257000 }, { "epoch": 15.62, "learning_rate": 1.219073478551059e-05, "loss": 0.0574, "step": 257500 }, { "epoch": 15.65, "learning_rate": 1.2175565803045932e-05, "loss": 0.0589, "step": 258000 }, { "epoch": 15.68, "learning_rate": 1.2160396820581278e-05, "loss": 0.0586, "step": 258500 }, { "epoch": 15.72, "learning_rate": 1.214522783811662e-05, "loss": 0.0578, "step": 259000 }, { "epoch": 15.75, "learning_rate": 1.2130058855651964e-05, "loss": 0.0604, "step": 259500 }, { "epoch": 15.78, "learning_rate": 1.2114889873187308e-05, "loss": 0.059, "step": 260000 }, { "epoch": 15.81, "learning_rate": 1.209975122868758e-05, "loss": 0.0606, "step": 260500 }, { "epoch": 15.84, "learning_rate": 1.2084582246222925e-05, "loss": 0.0605, "step": 261000 }, { "epoch": 15.87, "learning_rate": 1.2069443601723196e-05, "loss": 0.0604, "step": 261500 }, { "epoch": 15.9, "learning_rate": 1.2054274619258542e-05, "loss": 0.0593, "step": 262000 }, { "epoch": 15.93, "learning_rate": 1.2039105636793884e-05, "loss": 0.0614, "step": 262500 }, { "epoch": 15.96, "learning_rate": 1.2023936654329229e-05, "loss": 0.0588, "step": 263000 }, { "epoch": 15.99, "learning_rate": 1.2008767671864571e-05, "loss": 0.0604, "step": 263500 }, { "epoch": 16.0, "eval_bleu": 87.8903, "eval_gen_len": 14.7974, "eval_loss": 0.1746528446674347, "eval_runtime": 222.3549, "eval_samples_per_second": 72.515, "eval_steps_per_second": 2.267, "step": 263696 }, { "epoch": 16.02, "learning_rate": 1.1993598689399917e-05, "loss": 0.0544, "step": 264000 }, { "epoch": 16.05, "learning_rate": 1.197846004490019e-05, "loss": 0.051, "step": 264500 }, { "epoch": 16.08, "learning_rate": 1.1963291062435534e-05, "loss": 0.0513, "step": 265000 }, { "epoch": 16.11, "learning_rate": 1.1948122079970876e-05, "loss": 0.0521, "step": 265500 }, { "epoch": 16.14, "learning_rate": 1.1932953097506222e-05, "loss": 0.0526, "step": 266000 }, { "epoch": 16.17, "learning_rate": 1.1917784115041564e-05, "loss": 0.0524, "step": 266500 }, { "epoch": 16.2, "learning_rate": 1.1902615132576907e-05, "loss": 0.0531, "step": 267000 }, { "epoch": 16.23, "learning_rate": 1.1887446150112251e-05, "loss": 0.0537, "step": 267500 }, { "epoch": 16.26, "learning_rate": 1.1872277167647595e-05, "loss": 0.0541, "step": 268000 }, { "epoch": 16.29, "learning_rate": 1.1857168861112798e-05, "loss": 0.0543, "step": 268500 }, { "epoch": 16.32, "learning_rate": 1.184199987864814e-05, "loss": 0.0537, "step": 269000 }, { "epoch": 16.35, "learning_rate": 1.1826830896183486e-05, "loss": 0.0534, "step": 269500 }, { "epoch": 16.38, "learning_rate": 1.1811661913718829e-05, "loss": 0.0527, "step": 270000 }, { "epoch": 16.41, "learning_rate": 1.1796492931254173e-05, "loss": 0.0535, "step": 270500 }, { "epoch": 16.44, "learning_rate": 1.1781323948789515e-05, "loss": 0.0545, "step": 271000 }, { "epoch": 16.47, "learning_rate": 1.1766154966324861e-05, "loss": 0.0537, "step": 271500 }, { "epoch": 16.5, "learning_rate": 1.1750985983860204e-05, "loss": 0.0538, "step": 272000 }, { "epoch": 16.53, "learning_rate": 1.1735847339360478e-05, "loss": 0.054, "step": 272500 }, { "epoch": 16.56, "learning_rate": 1.1720708694860749e-05, "loss": 0.055, "step": 273000 }, { "epoch": 16.59, "learning_rate": 1.1705539712396093e-05, "loss": 0.0544, "step": 273500 }, { "epoch": 16.63, "learning_rate": 1.1690370729931437e-05, "loss": 0.0557, "step": 274000 }, { "epoch": 16.66, "learning_rate": 1.167520174746678e-05, "loss": 0.0535, "step": 274500 }, { "epoch": 16.69, "learning_rate": 1.1660032765002126e-05, "loss": 0.0546, "step": 275000 }, { "epoch": 16.72, "learning_rate": 1.1644894120502396e-05, "loss": 0.0555, "step": 275500 }, { "epoch": 16.75, "learning_rate": 1.1629725138037742e-05, "loss": 0.0549, "step": 276000 }, { "epoch": 16.78, "learning_rate": 1.1614556155573085e-05, "loss": 0.0553, "step": 276500 }, { "epoch": 16.81, "learning_rate": 1.1599387173108429e-05, "loss": 0.0562, "step": 277000 }, { "epoch": 16.84, "learning_rate": 1.1584218190643773e-05, "loss": 0.0557, "step": 277500 }, { "epoch": 16.87, "learning_rate": 1.1569049208179117e-05, "loss": 0.0555, "step": 278000 }, { "epoch": 16.9, "learning_rate": 1.155388022571446e-05, "loss": 0.0566, "step": 278500 }, { "epoch": 16.93, "learning_rate": 1.1538711243249802e-05, "loss": 0.0571, "step": 279000 }, { "epoch": 16.96, "learning_rate": 1.1523572598750076e-05, "loss": 0.0558, "step": 279500 }, { "epoch": 16.99, "learning_rate": 1.1508403616285419e-05, "loss": 0.0555, "step": 280000 }, { "epoch": 17.0, "eval_bleu": 87.7539, "eval_gen_len": 14.7936, "eval_loss": 0.17870613932609558, "eval_runtime": 227.9238, "eval_samples_per_second": 70.743, "eval_steps_per_second": 2.211, "step": 280177 }, { "epoch": 17.02, "learning_rate": 1.1493234633820765e-05, "loss": 0.0499, "step": 280500 }, { "epoch": 17.05, "learning_rate": 1.1478065651356107e-05, "loss": 0.0476, "step": 281000 }, { "epoch": 17.08, "learning_rate": 1.1462896668891452e-05, "loss": 0.0495, "step": 281500 }, { "epoch": 17.11, "learning_rate": 1.1447727686426796e-05, "loss": 0.0482, "step": 282000 }, { "epoch": 17.14, "learning_rate": 1.143258904192707e-05, "loss": 0.0475, "step": 282500 }, { "epoch": 17.17, "learning_rate": 1.1417420059462412e-05, "loss": 0.0499, "step": 283000 }, { "epoch": 17.2, "learning_rate": 1.1402251076997757e-05, "loss": 0.049, "step": 283500 }, { "epoch": 17.23, "learning_rate": 1.1387082094533099e-05, "loss": 0.0481, "step": 284000 }, { "epoch": 17.26, "learning_rate": 1.1371913112068442e-05, "loss": 0.0479, "step": 284500 }, { "epoch": 17.29, "learning_rate": 1.1356774467568716e-05, "loss": 0.0486, "step": 285000 }, { "epoch": 17.32, "learning_rate": 1.134163582306899e-05, "loss": 0.0501, "step": 285500 }, { "epoch": 17.35, "learning_rate": 1.1326466840604332e-05, "loss": 0.051, "step": 286000 }, { "epoch": 17.38, "learning_rate": 1.1311297858139677e-05, "loss": 0.0487, "step": 286500 }, { "epoch": 17.41, "learning_rate": 1.1296128875675021e-05, "loss": 0.0504, "step": 287000 }, { "epoch": 17.44, "learning_rate": 1.1280959893210363e-05, "loss": 0.0503, "step": 287500 }, { "epoch": 17.47, "learning_rate": 1.126579091074571e-05, "loss": 0.0499, "step": 288000 }, { "epoch": 17.51, "learning_rate": 1.1250621928281052e-05, "loss": 0.0517, "step": 288500 }, { "epoch": 17.54, "learning_rate": 1.1235452945816396e-05, "loss": 0.0504, "step": 289000 }, { "epoch": 17.57, "learning_rate": 1.1220283963351738e-05, "loss": 0.0499, "step": 289500 }, { "epoch": 17.6, "learning_rate": 1.1205114980887084e-05, "loss": 0.0525, "step": 290000 }, { "epoch": 17.63, "learning_rate": 1.1189945998422427e-05, "loss": 0.0496, "step": 290500 }, { "epoch": 17.66, "learning_rate": 1.1174807353922701e-05, "loss": 0.0498, "step": 291000 }, { "epoch": 17.69, "learning_rate": 1.1159638371458043e-05, "loss": 0.0513, "step": 291500 }, { "epoch": 17.72, "learning_rate": 1.1144469388993386e-05, "loss": 0.0501, "step": 292000 }, { "epoch": 17.75, "learning_rate": 1.1129300406528732e-05, "loss": 0.0511, "step": 292500 }, { "epoch": 17.78, "learning_rate": 1.1114131424064074e-05, "loss": 0.0503, "step": 293000 }, { "epoch": 17.81, "learning_rate": 1.1098962441599419e-05, "loss": 0.0515, "step": 293500 }, { "epoch": 17.84, "learning_rate": 1.1083793459134761e-05, "loss": 0.0527, "step": 294000 }, { "epoch": 17.87, "learning_rate": 1.1068624476670107e-05, "loss": 0.0493, "step": 294500 }, { "epoch": 17.9, "learning_rate": 1.105348583217038e-05, "loss": 0.0503, "step": 295000 }, { "epoch": 17.93, "learning_rate": 1.1038316849705724e-05, "loss": 0.0506, "step": 295500 }, { "epoch": 17.96, "learning_rate": 1.1023208543170924e-05, "loss": 0.0508, "step": 296000 }, { "epoch": 17.99, "learning_rate": 1.100803956070627e-05, "loss": 0.051, "step": 296500 }, { "epoch": 18.0, "eval_bleu": 87.7359, "eval_gen_len": 14.7867, "eval_loss": 0.18140269815921783, "eval_runtime": 217.161, "eval_samples_per_second": 74.249, "eval_steps_per_second": 2.321, "step": 296658 }, { "epoch": 18.02, "learning_rate": 1.0992870578241613e-05, "loss": 0.0467, "step": 297000 }, { "epoch": 18.05, "learning_rate": 1.0977701595776957e-05, "loss": 0.0441, "step": 297500 }, { "epoch": 18.08, "learning_rate": 1.09625326133123e-05, "loss": 0.0444, "step": 298000 }, { "epoch": 18.11, "learning_rate": 1.0947363630847642e-05, "loss": 0.0444, "step": 298500 }, { "epoch": 18.14, "learning_rate": 1.0932194648382988e-05, "loss": 0.0448, "step": 299000 }, { "epoch": 18.17, "learning_rate": 1.091702566591833e-05, "loss": 0.0446, "step": 299500 }, { "epoch": 18.2, "learning_rate": 1.0901856683453676e-05, "loss": 0.0451, "step": 300000 }, { "epoch": 18.23, "learning_rate": 1.0886687700989019e-05, "loss": 0.0452, "step": 300500 }, { "epoch": 18.26, "learning_rate": 1.0871518718524363e-05, "loss": 0.0457, "step": 301000 }, { "epoch": 18.29, "learning_rate": 1.0856349736059705e-05, "loss": 0.0463, "step": 301500 }, { "epoch": 18.32, "learning_rate": 1.0841180753595048e-05, "loss": 0.0451, "step": 302000 }, { "epoch": 18.35, "learning_rate": 1.082610278502518e-05, "loss": 0.0457, "step": 302500 }, { "epoch": 18.38, "learning_rate": 1.0810933802560526e-05, "loss": 0.0462, "step": 303000 }, { "epoch": 18.42, "learning_rate": 1.0795764820095869e-05, "loss": 0.0456, "step": 303500 }, { "epoch": 18.45, "learning_rate": 1.0780595837631213e-05, "loss": 0.0459, "step": 304000 }, { "epoch": 18.48, "learning_rate": 1.0765457193131485e-05, "loss": 0.0475, "step": 304500 }, { "epoch": 18.51, "learning_rate": 1.0750288210666828e-05, "loss": 0.047, "step": 305000 }, { "epoch": 18.54, "learning_rate": 1.0735119228202174e-05, "loss": 0.0458, "step": 305500 }, { "epoch": 18.57, "learning_rate": 1.0719950245737516e-05, "loss": 0.0471, "step": 306000 }, { "epoch": 18.6, "learning_rate": 1.070478126327286e-05, "loss": 0.0464, "step": 306500 }, { "epoch": 18.63, "learning_rate": 1.0689612280808203e-05, "loss": 0.0472, "step": 307000 }, { "epoch": 18.66, "learning_rate": 1.0674443298343549e-05, "loss": 0.0468, "step": 307500 }, { "epoch": 18.69, "learning_rate": 1.0659274315878891e-05, "loss": 0.0463, "step": 308000 }, { "epoch": 18.72, "learning_rate": 1.0644135671379166e-05, "loss": 0.0473, "step": 308500 }, { "epoch": 18.75, "learning_rate": 1.0628966688914508e-05, "loss": 0.0472, "step": 309000 }, { "epoch": 18.78, "learning_rate": 1.0613797706449854e-05, "loss": 0.0456, "step": 309500 }, { "epoch": 18.81, "learning_rate": 1.0598628723985196e-05, "loss": 0.0474, "step": 310000 }, { "epoch": 18.84, "learning_rate": 1.0583459741520539e-05, "loss": 0.0466, "step": 310500 }, { "epoch": 18.87, "learning_rate": 1.0568290759055883e-05, "loss": 0.0471, "step": 311000 }, { "epoch": 18.9, "learning_rate": 1.0553121776591226e-05, "loss": 0.0477, "step": 311500 }, { "epoch": 18.93, "learning_rate": 1.0537952794126572e-05, "loss": 0.0472, "step": 312000 }, { "epoch": 18.96, "learning_rate": 1.0522783811661914e-05, "loss": 0.0469, "step": 312500 }, { "epoch": 18.99, "learning_rate": 1.0507645167162188e-05, "loss": 0.0466, "step": 313000 }, { "epoch": 19.0, "eval_bleu": 87.7646, "eval_gen_len": 14.7869, "eval_loss": 0.18415296077728271, "eval_runtime": 217.4372, "eval_samples_per_second": 74.155, "eval_steps_per_second": 2.318, "step": 313139 }, { "epoch": 19.02, "learning_rate": 1.049247618469753e-05, "loss": 0.0435, "step": 313500 }, { "epoch": 19.05, "learning_rate": 1.0477307202232877e-05, "loss": 0.0413, "step": 314000 }, { "epoch": 19.08, "learning_rate": 1.0462168557733147e-05, "loss": 0.0409, "step": 314500 }, { "epoch": 19.11, "learning_rate": 1.0446999575268493e-05, "loss": 0.0408, "step": 315000 }, { "epoch": 19.14, "learning_rate": 1.0431830592803836e-05, "loss": 0.0414, "step": 315500 }, { "epoch": 19.17, "learning_rate": 1.0416661610339178e-05, "loss": 0.0416, "step": 316000 }, { "epoch": 19.2, "learning_rate": 1.0401492627874522e-05, "loss": 0.0417, "step": 316500 }, { "epoch": 19.23, "learning_rate": 1.0386323645409867e-05, "loss": 0.0422, "step": 317000 }, { "epoch": 19.26, "learning_rate": 1.037115466294521e-05, "loss": 0.0424, "step": 317500 }, { "epoch": 19.29, "learning_rate": 1.0355985680480553e-05, "loss": 0.0412, "step": 318000 }, { "epoch": 19.33, "learning_rate": 1.03408166980159e-05, "loss": 0.0422, "step": 318500 }, { "epoch": 19.36, "learning_rate": 1.0325647715551242e-05, "loss": 0.0401, "step": 319000 }, { "epoch": 19.39, "learning_rate": 1.0310478733086586e-05, "loss": 0.0411, "step": 319500 }, { "epoch": 19.42, "learning_rate": 1.0295340088586858e-05, "loss": 0.0428, "step": 320000 }, { "epoch": 19.45, "learning_rate": 1.0280171106122203e-05, "loss": 0.0443, "step": 320500 }, { "epoch": 19.48, "learning_rate": 1.0265002123657545e-05, "loss": 0.0435, "step": 321000 }, { "epoch": 19.51, "learning_rate": 1.024983314119289e-05, "loss": 0.0412, "step": 321500 }, { "epoch": 19.54, "learning_rate": 1.0234664158728233e-05, "loss": 0.0433, "step": 322000 }, { "epoch": 19.57, "learning_rate": 1.0219525514228506e-05, "loss": 0.0425, "step": 322500 }, { "epoch": 19.6, "learning_rate": 1.020435653176385e-05, "loss": 0.0428, "step": 323000 }, { "epoch": 19.63, "learning_rate": 1.0189187549299193e-05, "loss": 0.0432, "step": 323500 }, { "epoch": 19.66, "learning_rate": 1.0174018566834538e-05, "loss": 0.0428, "step": 324000 }, { "epoch": 19.69, "learning_rate": 1.0158849584369881e-05, "loss": 0.0435, "step": 324500 }, { "epoch": 19.72, "learning_rate": 1.0143680601905225e-05, "loss": 0.0433, "step": 325000 }, { "epoch": 19.75, "learning_rate": 1.012851161944057e-05, "loss": 0.0433, "step": 325500 }, { "epoch": 19.78, "learning_rate": 1.0113342636975914e-05, "loss": 0.0441, "step": 326000 }, { "epoch": 19.81, "learning_rate": 1.0098203992476186e-05, "loss": 0.0434, "step": 326500 }, { "epoch": 19.84, "learning_rate": 1.0083035010011529e-05, "loss": 0.0425, "step": 327000 }, { "epoch": 19.87, "learning_rate": 1.0067866027546873e-05, "loss": 0.0442, "step": 327500 }, { "epoch": 19.9, "learning_rate": 1.0052697045082215e-05, "loss": 0.0438, "step": 328000 }, { "epoch": 19.93, "learning_rate": 1.0037528062617561e-05, "loss": 0.0435, "step": 328500 }, { "epoch": 19.96, "learning_rate": 1.0022359080152904e-05, "loss": 0.0436, "step": 329000 }, { "epoch": 19.99, "learning_rate": 1.0007220435653178e-05, "loss": 0.046, "step": 329500 }, { "epoch": 20.0, "eval_bleu": 87.8274, "eval_gen_len": 14.7548, "eval_loss": 0.1875508427619934, "eval_runtime": 216.5355, "eval_samples_per_second": 74.464, "eval_steps_per_second": 2.328, "step": 329620 }, { "epoch": 20.02, "learning_rate": 9.99205145318852e-06, "loss": 0.0389, "step": 330000 }, { "epoch": 20.05, "learning_rate": 9.976882470723864e-06, "loss": 0.0373, "step": 330500 }, { "epoch": 20.08, "learning_rate": 9.961713488259209e-06, "loss": 0.0381, "step": 331000 }, { "epoch": 20.11, "learning_rate": 9.946544505794551e-06, "loss": 0.0383, "step": 331500 }, { "epoch": 20.14, "learning_rate": 9.931375523329895e-06, "loss": 0.039, "step": 332000 }, { "epoch": 20.17, "learning_rate": 9.91623687883017e-06, "loss": 0.039, "step": 332500 }, { "epoch": 20.21, "learning_rate": 9.901067896365512e-06, "loss": 0.0385, "step": 333000 }, { "epoch": 20.24, "learning_rate": 9.885898913900856e-06, "loss": 0.0387, "step": 333500 }, { "epoch": 20.27, "learning_rate": 9.8707299314362e-06, "loss": 0.0384, "step": 334000 }, { "epoch": 20.3, "learning_rate": 9.855560948971545e-06, "loss": 0.0393, "step": 334500 }, { "epoch": 20.33, "learning_rate": 9.840391966506887e-06, "loss": 0.0393, "step": 335000 }, { "epoch": 20.36, "learning_rate": 9.825222984042231e-06, "loss": 0.038, "step": 335500 }, { "epoch": 20.39, "learning_rate": 9.810084339542504e-06, "loss": 0.04, "step": 336000 }, { "epoch": 20.42, "learning_rate": 9.794915357077848e-06, "loss": 0.04, "step": 336500 }, { "epoch": 20.45, "learning_rate": 9.779746374613192e-06, "loss": 0.0397, "step": 337000 }, { "epoch": 20.48, "learning_rate": 9.764577392148535e-06, "loss": 0.0396, "step": 337500 }, { "epoch": 20.51, "learning_rate": 9.749408409683879e-06, "loss": 0.0386, "step": 338000 }, { "epoch": 20.54, "learning_rate": 9.734239427219223e-06, "loss": 0.0391, "step": 338500 }, { "epoch": 20.57, "learning_rate": 9.719070444754567e-06, "loss": 0.0404, "step": 339000 }, { "epoch": 20.6, "learning_rate": 9.703901462289911e-06, "loss": 0.04, "step": 339500 }, { "epoch": 20.63, "learning_rate": 9.688762817790184e-06, "loss": 0.0407, "step": 340000 }, { "epoch": 20.66, "learning_rate": 9.673593835325528e-06, "loss": 0.04, "step": 340500 }, { "epoch": 20.69, "learning_rate": 9.65842485286087e-06, "loss": 0.0399, "step": 341000 }, { "epoch": 20.72, "learning_rate": 9.643255870396215e-06, "loss": 0.0381, "step": 341500 }, { "epoch": 20.75, "learning_rate": 9.628117225896487e-06, "loss": 0.0395, "step": 342000 }, { "epoch": 20.78, "learning_rate": 9.612948243431831e-06, "loss": 0.0397, "step": 342500 }, { "epoch": 20.81, "learning_rate": 9.597809598932104e-06, "loss": 0.0407, "step": 343000 }, { "epoch": 20.84, "learning_rate": 9.582640616467448e-06, "loss": 0.04, "step": 343500 }, { "epoch": 20.87, "learning_rate": 9.567471634002792e-06, "loss": 0.0411, "step": 344000 }, { "epoch": 20.9, "learning_rate": 9.552302651538135e-06, "loss": 0.0412, "step": 344500 }, { "epoch": 20.93, "learning_rate": 9.537133669073479e-06, "loss": 0.0396, "step": 345000 }, { "epoch": 20.96, "learning_rate": 9.521964686608823e-06, "loss": 0.0407, "step": 345500 }, { "epoch": 20.99, "learning_rate": 9.506826042109096e-06, "loss": 0.0402, "step": 346000 }, { "epoch": 21.0, "eval_bleu": 87.6913, "eval_gen_len": 14.761, "eval_loss": 0.19335225224494934, "eval_runtime": 217.2279, "eval_samples_per_second": 74.226, "eval_steps_per_second": 2.32, "step": 346101 }, { "epoch": 21.02, "learning_rate": 9.49168739760937e-06, "loss": 0.0358, "step": 346500 }, { "epoch": 21.05, "learning_rate": 9.476518415144712e-06, "loss": 0.034, "step": 347000 }, { "epoch": 21.08, "learning_rate": 9.461349432680057e-06, "loss": 0.0357, "step": 347500 }, { "epoch": 21.12, "learning_rate": 9.4461804502154e-06, "loss": 0.0359, "step": 348000 }, { "epoch": 21.15, "learning_rate": 9.431011467750743e-06, "loss": 0.0361, "step": 348500 }, { "epoch": 21.18, "learning_rate": 9.415842485286087e-06, "loss": 0.0361, "step": 349000 }, { "epoch": 21.21, "learning_rate": 9.400673502821432e-06, "loss": 0.0359, "step": 349500 }, { "epoch": 21.24, "learning_rate": 9.385504520356776e-06, "loss": 0.0349, "step": 350000 }, { "epoch": 21.27, "learning_rate": 9.370335537892118e-06, "loss": 0.0363, "step": 350500 }, { "epoch": 21.3, "learning_rate": 9.355166555427463e-06, "loss": 0.0351, "step": 351000 }, { "epoch": 21.33, "learning_rate": 9.340027910927735e-06, "loss": 0.0365, "step": 351500 }, { "epoch": 21.36, "learning_rate": 9.32488926642801e-06, "loss": 0.0361, "step": 352000 }, { "epoch": 21.39, "learning_rate": 9.309720283963353e-06, "loss": 0.0367, "step": 352500 }, { "epoch": 21.42, "learning_rate": 9.294551301498696e-06, "loss": 0.0355, "step": 353000 }, { "epoch": 21.45, "learning_rate": 9.27938231903404e-06, "loss": 0.0364, "step": 353500 }, { "epoch": 21.48, "learning_rate": 9.264213336569383e-06, "loss": 0.0361, "step": 354000 }, { "epoch": 21.51, "learning_rate": 9.249044354104727e-06, "loss": 0.0367, "step": 354500 }, { "epoch": 21.54, "learning_rate": 9.233905709605e-06, "loss": 0.0372, "step": 355000 }, { "epoch": 21.57, "learning_rate": 9.218736727140343e-06, "loss": 0.0371, "step": 355500 }, { "epoch": 21.6, "learning_rate": 9.203567744675688e-06, "loss": 0.037, "step": 356000 }, { "epoch": 21.63, "learning_rate": 9.188398762211032e-06, "loss": 0.0376, "step": 356500 }, { "epoch": 21.66, "learning_rate": 9.173229779746376e-06, "loss": 0.0369, "step": 357000 }, { "epoch": 21.69, "learning_rate": 9.158060797281719e-06, "loss": 0.0379, "step": 357500 }, { "epoch": 21.72, "learning_rate": 9.142891814817063e-06, "loss": 0.0372, "step": 358000 }, { "epoch": 21.75, "learning_rate": 9.127722832352407e-06, "loss": 0.0375, "step": 358500 }, { "epoch": 21.78, "learning_rate": 9.112553849887751e-06, "loss": 0.038, "step": 359000 }, { "epoch": 21.81, "learning_rate": 9.097384867423094e-06, "loss": 0.0376, "step": 359500 }, { "epoch": 21.84, "learning_rate": 9.082215884958438e-06, "loss": 0.0366, "step": 360000 }, { "epoch": 21.87, "learning_rate": 9.067046902493782e-06, "loss": 0.038, "step": 360500 }, { "epoch": 21.9, "learning_rate": 9.051908257994054e-06, "loss": 0.0375, "step": 361000 }, { "epoch": 21.93, "learning_rate": 9.036739275529399e-06, "loss": 0.0363, "step": 361500 }, { "epoch": 21.96, "learning_rate": 9.021570293064741e-06, "loss": 0.0369, "step": 362000 }, { "epoch": 22.0, "learning_rate": 9.006401310600085e-06, "loss": 0.0374, "step": 362500 }, { "epoch": 22.0, "eval_bleu": 87.7892, "eval_gen_len": 14.7636, "eval_loss": 0.19496643543243408, "eval_runtime": 216.9414, "eval_samples_per_second": 74.324, "eval_steps_per_second": 2.323, "step": 362582 }, { "epoch": 22.03, "learning_rate": 8.99123232813543e-06, "loss": 0.0331, "step": 363000 }, { "epoch": 22.06, "learning_rate": 8.976093683635702e-06, "loss": 0.0317, "step": 363500 }, { "epoch": 22.09, "learning_rate": 8.960924701171046e-06, "loss": 0.0319, "step": 364000 }, { "epoch": 22.12, "learning_rate": 8.94575571870639e-06, "loss": 0.0338, "step": 364500 }, { "epoch": 22.15, "learning_rate": 8.930586736241733e-06, "loss": 0.0331, "step": 365000 }, { "epoch": 22.18, "learning_rate": 8.915417753777077e-06, "loss": 0.0339, "step": 365500 }, { "epoch": 22.21, "learning_rate": 8.90030944724228e-06, "loss": 0.0336, "step": 366000 }, { "epoch": 22.24, "learning_rate": 8.885140464777622e-06, "loss": 0.0325, "step": 366500 }, { "epoch": 22.27, "learning_rate": 8.869971482312966e-06, "loss": 0.0324, "step": 367000 }, { "epoch": 22.3, "learning_rate": 8.85480249984831e-06, "loss": 0.0336, "step": 367500 }, { "epoch": 22.33, "learning_rate": 8.839633517383655e-06, "loss": 0.0336, "step": 368000 }, { "epoch": 22.36, "learning_rate": 8.824464534918999e-06, "loss": 0.0333, "step": 368500 }, { "epoch": 22.39, "learning_rate": 8.809295552454343e-06, "loss": 0.0335, "step": 369000 }, { "epoch": 22.42, "learning_rate": 8.794126569989686e-06, "loss": 0.0327, "step": 369500 }, { "epoch": 22.45, "learning_rate": 8.77895758752503e-06, "loss": 0.0348, "step": 370000 }, { "epoch": 22.48, "learning_rate": 8.763788605060374e-06, "loss": 0.0348, "step": 370500 }, { "epoch": 22.51, "learning_rate": 8.748649960560646e-06, "loss": 0.0343, "step": 371000 }, { "epoch": 22.54, "learning_rate": 8.733480978095989e-06, "loss": 0.0345, "step": 371500 }, { "epoch": 22.57, "learning_rate": 8.718311995631333e-06, "loss": 0.0334, "step": 372000 }, { "epoch": 22.6, "learning_rate": 8.703143013166677e-06, "loss": 0.0341, "step": 372500 }, { "epoch": 22.63, "learning_rate": 8.687974030702021e-06, "loss": 0.0342, "step": 373000 }, { "epoch": 22.66, "learning_rate": 8.672805048237366e-06, "loss": 0.0336, "step": 373500 }, { "epoch": 22.69, "learning_rate": 8.657636065772708e-06, "loss": 0.0343, "step": 374000 }, { "epoch": 22.72, "learning_rate": 8.642467083308052e-06, "loss": 0.0341, "step": 374500 }, { "epoch": 22.75, "learning_rate": 8.627298100843396e-06, "loss": 0.0339, "step": 375000 }, { "epoch": 22.78, "learning_rate": 8.612159456343669e-06, "loss": 0.0345, "step": 375500 }, { "epoch": 22.81, "learning_rate": 8.596990473879013e-06, "loss": 0.036, "step": 376000 }, { "epoch": 22.84, "learning_rate": 8.581821491414357e-06, "loss": 0.0348, "step": 376500 }, { "epoch": 22.87, "learning_rate": 8.5666525089497e-06, "loss": 0.0348, "step": 377000 }, { "epoch": 22.91, "learning_rate": 8.551513864449972e-06, "loss": 0.0343, "step": 377500 }, { "epoch": 22.94, "learning_rate": 8.536344881985317e-06, "loss": 0.036, "step": 378000 }, { "epoch": 22.97, "learning_rate": 8.52117589952066e-06, "loss": 0.0356, "step": 378500 }, { "epoch": 23.0, "learning_rate": 8.506006917056005e-06, "loss": 0.035, "step": 379000 }, { "epoch": 23.0, "eval_bleu": 87.8202, "eval_gen_len": 14.7439, "eval_loss": 0.1992015838623047, "eval_runtime": 216.913, "eval_samples_per_second": 74.334, "eval_steps_per_second": 2.324, "step": 379063 }, { "epoch": 23.03, "learning_rate": 8.490837934591349e-06, "loss": 0.0311, "step": 379500 }, { "epoch": 23.06, "learning_rate": 8.475699290091622e-06, "loss": 0.0301, "step": 380000 }, { "epoch": 23.09, "learning_rate": 8.460530307626966e-06, "loss": 0.0302, "step": 380500 }, { "epoch": 23.12, "learning_rate": 8.445361325162308e-06, "loss": 0.0295, "step": 381000 }, { "epoch": 23.15, "learning_rate": 8.430192342697652e-06, "loss": 0.0303, "step": 381500 }, { "epoch": 23.18, "learning_rate": 8.415023360232997e-06, "loss": 0.0308, "step": 382000 }, { "epoch": 23.21, "learning_rate": 8.39988471573327e-06, "loss": 0.0304, "step": 382500 }, { "epoch": 23.24, "learning_rate": 8.384715733268613e-06, "loss": 0.0309, "step": 383000 }, { "epoch": 23.27, "learning_rate": 8.369577088768886e-06, "loss": 0.031, "step": 383500 }, { "epoch": 23.3, "learning_rate": 8.35443844426916e-06, "loss": 0.0306, "step": 384000 }, { "epoch": 23.33, "learning_rate": 8.339269461804503e-06, "loss": 0.0315, "step": 384500 }, { "epoch": 23.36, "learning_rate": 8.324100479339847e-06, "loss": 0.0323, "step": 385000 }, { "epoch": 23.39, "learning_rate": 8.30893149687519e-06, "loss": 0.0324, "step": 385500 }, { "epoch": 23.42, "learning_rate": 8.293762514410533e-06, "loss": 0.032, "step": 386000 }, { "epoch": 23.45, "learning_rate": 8.278593531945878e-06, "loss": 0.0312, "step": 386500 }, { "epoch": 23.48, "learning_rate": 8.263424549481222e-06, "loss": 0.0318, "step": 387000 }, { "epoch": 23.51, "learning_rate": 8.248255567016566e-06, "loss": 0.032, "step": 387500 }, { "epoch": 23.54, "learning_rate": 8.233086584551908e-06, "loss": 0.0317, "step": 388000 }, { "epoch": 23.57, "learning_rate": 8.217917602087253e-06, "loss": 0.0319, "step": 388500 }, { "epoch": 23.6, "learning_rate": 8.202748619622597e-06, "loss": 0.0317, "step": 389000 }, { "epoch": 23.63, "learning_rate": 8.18760997512287e-06, "loss": 0.0311, "step": 389500 }, { "epoch": 23.66, "learning_rate": 8.172440992658212e-06, "loss": 0.0323, "step": 390000 }, { "epoch": 23.69, "learning_rate": 8.157272010193556e-06, "loss": 0.0316, "step": 390500 }, { "epoch": 23.72, "learning_rate": 8.1421030277289e-06, "loss": 0.0304, "step": 391000 }, { "epoch": 23.75, "learning_rate": 8.126964383229173e-06, "loss": 0.0315, "step": 391500 }, { "epoch": 23.78, "learning_rate": 8.111795400764517e-06, "loss": 0.0322, "step": 392000 }, { "epoch": 23.82, "learning_rate": 8.096626418299861e-06, "loss": 0.0328, "step": 392500 }, { "epoch": 23.85, "learning_rate": 8.081457435835205e-06, "loss": 0.032, "step": 393000 }, { "epoch": 23.88, "learning_rate": 8.06628845337055e-06, "loss": 0.0315, "step": 393500 }, { "epoch": 23.91, "learning_rate": 8.051119470905892e-06, "loss": 0.0326, "step": 394000 }, { "epoch": 23.94, "learning_rate": 8.035950488441236e-06, "loss": 0.0317, "step": 394500 }, { "epoch": 23.97, "learning_rate": 8.020781505976579e-06, "loss": 0.0318, "step": 395000 }, { "epoch": 24.0, "learning_rate": 8.005612523511923e-06, "loss": 0.0333, "step": 395500 }, { "epoch": 24.0, "eval_bleu": 87.6814, "eval_gen_len": 14.7444, "eval_loss": 0.20281654596328735, "eval_runtime": 216.1907, "eval_samples_per_second": 74.582, "eval_steps_per_second": 2.331, "step": 395544 }, { "epoch": 24.03, "learning_rate": 7.990473879012195e-06, "loss": 0.0286, "step": 396000 }, { "epoch": 24.06, "learning_rate": 7.97530489654754e-06, "loss": 0.0284, "step": 396500 }, { "epoch": 24.09, "learning_rate": 7.960135914082884e-06, "loss": 0.0281, "step": 397000 }, { "epoch": 24.12, "learning_rate": 7.944966931618228e-06, "loss": 0.0278, "step": 397500 }, { "epoch": 24.15, "learning_rate": 7.9298282871185e-06, "loss": 0.0283, "step": 398000 }, { "epoch": 24.18, "learning_rate": 7.914659304653845e-06, "loss": 0.0279, "step": 398500 }, { "epoch": 24.21, "learning_rate": 7.899490322189189e-06, "loss": 0.0286, "step": 399000 }, { "epoch": 24.24, "learning_rate": 7.884321339724533e-06, "loss": 0.0283, "step": 399500 }, { "epoch": 24.27, "learning_rate": 7.869182695224805e-06, "loss": 0.0296, "step": 400000 }, { "epoch": 24.3, "learning_rate": 7.85401371276015e-06, "loss": 0.0285, "step": 400500 }, { "epoch": 24.33, "learning_rate": 7.838844730295492e-06, "loss": 0.0292, "step": 401000 }, { "epoch": 24.36, "learning_rate": 7.823675747830836e-06, "loss": 0.0288, "step": 401500 }, { "epoch": 24.39, "learning_rate": 7.808506765366179e-06, "loss": 0.0286, "step": 402000 }, { "epoch": 24.42, "learning_rate": 7.793337782901523e-06, "loss": 0.03, "step": 402500 }, { "epoch": 24.45, "learning_rate": 7.778168800436867e-06, "loss": 0.03, "step": 403000 }, { "epoch": 24.48, "learning_rate": 7.762999817972211e-06, "loss": 0.0294, "step": 403500 }, { "epoch": 24.51, "learning_rate": 7.747830835507556e-06, "loss": 0.0288, "step": 404000 }, { "epoch": 24.54, "learning_rate": 7.732661853042898e-06, "loss": 0.0287, "step": 404500 }, { "epoch": 24.57, "learning_rate": 7.717492870578242e-06, "loss": 0.0304, "step": 405000 }, { "epoch": 24.6, "learning_rate": 7.702323888113586e-06, "loss": 0.0306, "step": 405500 }, { "epoch": 24.63, "learning_rate": 7.687154905648929e-06, "loss": 0.0298, "step": 406000 }, { "epoch": 24.66, "learning_rate": 7.672016261149203e-06, "loss": 0.0304, "step": 406500 }, { "epoch": 24.7, "learning_rate": 7.656847278684546e-06, "loss": 0.0292, "step": 407000 }, { "epoch": 24.73, "learning_rate": 7.64170863418482e-06, "loss": 0.0302, "step": 407500 }, { "epoch": 24.76, "learning_rate": 7.626539651720163e-06, "loss": 0.0308, "step": 408000 }, { "epoch": 24.79, "learning_rate": 7.6113706692555065e-06, "loss": 0.0296, "step": 408500 }, { "epoch": 24.82, "learning_rate": 7.596201686790851e-06, "loss": 0.0294, "step": 409000 }, { "epoch": 24.85, "learning_rate": 7.581032704326194e-06, "loss": 0.0297, "step": 409500 }, { "epoch": 24.88, "learning_rate": 7.565894059826467e-06, "loss": 0.0302, "step": 410000 }, { "epoch": 24.91, "learning_rate": 7.550725077361812e-06, "loss": 0.0288, "step": 410500 }, { "epoch": 24.94, "learning_rate": 7.535556094897155e-06, "loss": 0.0303, "step": 411000 }, { "epoch": 24.97, "learning_rate": 7.520387112432499e-06, "loss": 0.03, "step": 411500 }, { "epoch": 25.0, "learning_rate": 7.5052181299678425e-06, "loss": 0.0293, "step": 412000 }, { "epoch": 25.0, "eval_bleu": 87.6001, "eval_gen_len": 14.8399, "eval_loss": 0.2072879821062088, "eval_runtime": 218.5505, "eval_samples_per_second": 73.777, "eval_steps_per_second": 2.306, "step": 412025 }, { "epoch": 25.03, "learning_rate": 7.490079485468116e-06, "loss": 0.0256, "step": 412500 }, { "epoch": 25.06, "learning_rate": 7.47491050300346e-06, "loss": 0.0272, "step": 413000 }, { "epoch": 25.09, "learning_rate": 7.4597415205388025e-06, "loss": 0.0268, "step": 413500 }, { "epoch": 25.12, "learning_rate": 7.444572538074147e-06, "loss": 0.0263, "step": 414000 }, { "epoch": 25.15, "learning_rate": 7.42940355560949e-06, "loss": 0.0268, "step": 414500 }, { "epoch": 25.18, "learning_rate": 7.414234573144834e-06, "loss": 0.0277, "step": 415000 }, { "epoch": 25.21, "learning_rate": 7.3990655906801775e-06, "loss": 0.0261, "step": 415500 }, { "epoch": 25.24, "learning_rate": 7.383896608215522e-06, "loss": 0.0269, "step": 416000 }, { "epoch": 25.27, "learning_rate": 7.368757963715794e-06, "loss": 0.0267, "step": 416500 }, { "epoch": 25.3, "learning_rate": 7.3535889812511384e-06, "loss": 0.0281, "step": 417000 }, { "epoch": 25.33, "learning_rate": 7.338419998786483e-06, "loss": 0.0279, "step": 417500 }, { "epoch": 25.36, "learning_rate": 7.323281354286755e-06, "loss": 0.0275, "step": 418000 }, { "epoch": 25.39, "learning_rate": 7.308112371822099e-06, "loss": 0.0264, "step": 418500 }, { "epoch": 25.42, "learning_rate": 7.292943389357442e-06, "loss": 0.0279, "step": 419000 }, { "epoch": 25.45, "learning_rate": 7.277774406892786e-06, "loss": 0.0276, "step": 419500 }, { "epoch": 25.48, "learning_rate": 7.262605424428129e-06, "loss": 0.0277, "step": 420000 }, { "epoch": 25.51, "learning_rate": 7.2474364419634735e-06, "loss": 0.0266, "step": 420500 }, { "epoch": 25.54, "learning_rate": 7.232267459498818e-06, "loss": 0.0277, "step": 421000 }, { "epoch": 25.57, "learning_rate": 7.217098477034161e-06, "loss": 0.0282, "step": 421500 }, { "epoch": 25.61, "learning_rate": 7.201959832534434e-06, "loss": 0.0269, "step": 422000 }, { "epoch": 25.64, "learning_rate": 7.186790850069778e-06, "loss": 0.0274, "step": 422500 }, { "epoch": 25.67, "learning_rate": 7.171652205570051e-06, "loss": 0.0279, "step": 423000 }, { "epoch": 25.7, "learning_rate": 7.156483223105395e-06, "loss": 0.028, "step": 423500 }, { "epoch": 25.73, "learning_rate": 7.141314240640739e-06, "loss": 0.0262, "step": 424000 }, { "epoch": 25.76, "learning_rate": 7.126175596141012e-06, "loss": 0.0279, "step": 424500 }, { "epoch": 25.79, "learning_rate": 7.111006613676355e-06, "loss": 0.0274, "step": 425000 }, { "epoch": 25.82, "learning_rate": 7.0958376312116995e-06, "loss": 0.0277, "step": 425500 }, { "epoch": 25.85, "learning_rate": 7.080668648747042e-06, "loss": 0.028, "step": 426000 }, { "epoch": 25.88, "learning_rate": 7.065499666282386e-06, "loss": 0.0278, "step": 426500 }, { "epoch": 25.91, "learning_rate": 7.0503306838177295e-06, "loss": 0.0282, "step": 427000 }, { "epoch": 25.94, "learning_rate": 7.035161701353074e-06, "loss": 0.0277, "step": 427500 }, { "epoch": 25.97, "learning_rate": 7.019992718888418e-06, "loss": 0.0287, "step": 428000 }, { "epoch": 26.0, "learning_rate": 7.004823736423761e-06, "loss": 0.0277, "step": 428500 }, { "epoch": 26.0, "eval_bleu": 87.7954, "eval_gen_len": 14.789, "eval_loss": 0.2085237354040146, "eval_runtime": 217.0143, "eval_samples_per_second": 74.299, "eval_steps_per_second": 2.322, "step": 428506 }, { "epoch": 26.03, "learning_rate": 6.989685091924035e-06, "loss": 0.025, "step": 429000 }, { "epoch": 26.06, "learning_rate": 6.974516109459378e-06, "loss": 0.0242, "step": 429500 }, { "epoch": 26.09, "learning_rate": 6.959347126994722e-06, "loss": 0.0256, "step": 430000 }, { "epoch": 26.12, "learning_rate": 6.944178144530066e-06, "loss": 0.0255, "step": 430500 }, { "epoch": 26.15, "learning_rate": 6.929039500030339e-06, "loss": 0.0246, "step": 431000 }, { "epoch": 26.18, "learning_rate": 6.913870517565682e-06, "loss": 0.0251, "step": 431500 }, { "epoch": 26.21, "learning_rate": 6.8987015351010255e-06, "loss": 0.0253, "step": 432000 }, { "epoch": 26.24, "learning_rate": 6.88353255263637e-06, "loss": 0.0257, "step": 432500 }, { "epoch": 26.27, "learning_rate": 6.868363570171713e-06, "loss": 0.0255, "step": 433000 }, { "epoch": 26.3, "learning_rate": 6.853194587707057e-06, "loss": 0.0255, "step": 433500 }, { "epoch": 26.33, "learning_rate": 6.838025605242401e-06, "loss": 0.0253, "step": 434000 }, { "epoch": 26.36, "learning_rate": 6.822856622777745e-06, "loss": 0.0256, "step": 434500 }, { "epoch": 26.39, "learning_rate": 6.807717978278018e-06, "loss": 0.0251, "step": 435000 }, { "epoch": 26.42, "learning_rate": 6.792548995813361e-06, "loss": 0.0248, "step": 435500 }, { "epoch": 26.45, "learning_rate": 6.777380013348706e-06, "loss": 0.0256, "step": 436000 }, { "epoch": 26.49, "learning_rate": 6.76221103088405e-06, "loss": 0.0256, "step": 436500 }, { "epoch": 26.52, "learning_rate": 6.747042048419392e-06, "loss": 0.0257, "step": 437000 }, { "epoch": 26.55, "learning_rate": 6.731873065954736e-06, "loss": 0.0257, "step": 437500 }, { "epoch": 26.58, "learning_rate": 6.716734421455009e-06, "loss": 0.025, "step": 438000 }, { "epoch": 26.61, "learning_rate": 6.701565438990353e-06, "loss": 0.0265, "step": 438500 }, { "epoch": 26.64, "learning_rate": 6.6863964565256965e-06, "loss": 0.0257, "step": 439000 }, { "epoch": 26.67, "learning_rate": 6.671227474061041e-06, "loss": 0.026, "step": 439500 }, { "epoch": 26.7, "learning_rate": 6.656088829561313e-06, "loss": 0.0249, "step": 440000 }, { "epoch": 26.73, "learning_rate": 6.640919847096657e-06, "loss": 0.0249, "step": 440500 }, { "epoch": 26.76, "learning_rate": 6.6257508646320016e-06, "loss": 0.0264, "step": 441000 }, { "epoch": 26.79, "learning_rate": 6.610581882167345e-06, "loss": 0.0254, "step": 441500 }, { "epoch": 26.82, "learning_rate": 6.595412899702689e-06, "loss": 0.0263, "step": 442000 }, { "epoch": 26.85, "learning_rate": 6.580243917238032e-06, "loss": 0.0263, "step": 442500 }, { "epoch": 26.88, "learning_rate": 6.565074934773376e-06, "loss": 0.0259, "step": 443000 }, { "epoch": 26.91, "learning_rate": 6.549936290273648e-06, "loss": 0.0257, "step": 443500 }, { "epoch": 26.94, "learning_rate": 6.5347673078089925e-06, "loss": 0.0264, "step": 444000 }, { "epoch": 26.97, "learning_rate": 6.519598325344337e-06, "loss": 0.0266, "step": 444500 }, { "epoch": 27.0, "eval_bleu": 87.6557, "eval_gen_len": 14.8303, "eval_loss": 0.21119491755962372, "eval_runtime": 217.2258, "eval_samples_per_second": 74.227, "eval_steps_per_second": 2.32, "step": 444987 }, { "epoch": 27.0, "learning_rate": 6.50442934287968e-06, "loss": 0.0266, "step": 445000 }, { "epoch": 27.03, "learning_rate": 6.489260360415024e-06, "loss": 0.0232, "step": 445500 }, { "epoch": 27.06, "learning_rate": 6.4740913779503675e-06, "loss": 0.0232, "step": 446000 }, { "epoch": 27.09, "learning_rate": 6.458952733450641e-06, "loss": 0.0232, "step": 446500 }, { "epoch": 27.12, "learning_rate": 6.443783750985984e-06, "loss": 0.0231, "step": 447000 }, { "epoch": 27.15, "learning_rate": 6.4286451064862576e-06, "loss": 0.0233, "step": 447500 }, { "epoch": 27.18, "learning_rate": 6.413476124021602e-06, "loss": 0.0233, "step": 448000 }, { "epoch": 27.21, "learning_rate": 6.398307141556945e-06, "loss": 0.0235, "step": 448500 }, { "epoch": 27.24, "learning_rate": 6.3831381590922884e-06, "loss": 0.024, "step": 449000 }, { "epoch": 27.27, "learning_rate": 6.367969176627632e-06, "loss": 0.0238, "step": 449500 }, { "epoch": 27.3, "learning_rate": 6.352800194162976e-06, "loss": 0.0239, "step": 450000 }, { "epoch": 27.33, "learning_rate": 6.337631211698319e-06, "loss": 0.0237, "step": 450500 }, { "epoch": 27.36, "learning_rate": 6.3224622292336635e-06, "loss": 0.0233, "step": 451000 }, { "epoch": 27.4, "learning_rate": 6.307293246769008e-06, "loss": 0.024, "step": 451500 }, { "epoch": 27.43, "learning_rate": 6.292124264304351e-06, "loss": 0.024, "step": 452000 }, { "epoch": 27.46, "learning_rate": 6.276955281839695e-06, "loss": 0.0241, "step": 452500 }, { "epoch": 27.49, "learning_rate": 6.2617862993750385e-06, "loss": 0.0244, "step": 453000 }, { "epoch": 27.52, "learning_rate": 6.246617316910382e-06, "loss": 0.0244, "step": 453500 }, { "epoch": 27.55, "learning_rate": 6.231478672410656e-06, "loss": 0.0243, "step": 454000 }, { "epoch": 27.58, "learning_rate": 6.216340027910929e-06, "loss": 0.0242, "step": 454500 }, { "epoch": 27.61, "learning_rate": 6.201171045446271e-06, "loss": 0.0228, "step": 455000 }, { "epoch": 27.64, "learning_rate": 6.186002062981615e-06, "loss": 0.0243, "step": 455500 }, { "epoch": 27.67, "learning_rate": 6.1708330805169595e-06, "loss": 0.0242, "step": 456000 }, { "epoch": 27.7, "learning_rate": 6.155664098052303e-06, "loss": 0.0238, "step": 456500 }, { "epoch": 27.73, "learning_rate": 6.140495115587647e-06, "loss": 0.0237, "step": 457000 }, { "epoch": 27.76, "learning_rate": 6.1253564710879195e-06, "loss": 0.0255, "step": 457500 }, { "epoch": 27.79, "learning_rate": 6.110217826588193e-06, "loss": 0.025, "step": 458000 }, { "epoch": 27.82, "learning_rate": 6.095048844123537e-06, "loss": 0.0244, "step": 458500 }, { "epoch": 27.85, "learning_rate": 6.07987986165888e-06, "loss": 0.0248, "step": 459000 }, { "epoch": 27.88, "learning_rate": 6.0647108791942246e-06, "loss": 0.0251, "step": 459500 }, { "epoch": 27.91, "learning_rate": 6.049541896729568e-06, "loss": 0.0249, "step": 460000 }, { "epoch": 27.94, "learning_rate": 6.034372914264912e-06, "loss": 0.0251, "step": 460500 }, { "epoch": 27.97, "learning_rate": 6.0192039318002546e-06, "loss": 0.0247, "step": 461000 }, { "epoch": 28.0, "eval_bleu": 87.8108, "eval_gen_len": 14.797, "eval_loss": 0.21366077661514282, "eval_runtime": 216.9055, "eval_samples_per_second": 74.337, "eval_steps_per_second": 2.324, "step": 461468 }, { "epoch": 28.0, "learning_rate": 6.004034949335599e-06, "loss": 0.0241, "step": 461500 }, { "epoch": 28.03, "learning_rate": 5.988865966870943e-06, "loss": 0.0218, "step": 462000 }, { "epoch": 28.06, "learning_rate": 5.973696984406286e-06, "loss": 0.0221, "step": 462500 }, { "epoch": 28.09, "learning_rate": 5.95855833990656e-06, "loss": 0.0213, "step": 463000 }, { "epoch": 28.12, "learning_rate": 5.943389357441903e-06, "loss": 0.0219, "step": 463500 }, { "epoch": 28.15, "learning_rate": 5.928220374977247e-06, "loss": 0.0223, "step": 464000 }, { "epoch": 28.18, "learning_rate": 5.913051392512591e-06, "loss": 0.0222, "step": 464500 }, { "epoch": 28.21, "learning_rate": 5.897882410047935e-06, "loss": 0.0222, "step": 465000 }, { "epoch": 28.24, "learning_rate": 5.882743765548208e-06, "loss": 0.0226, "step": 465500 }, { "epoch": 28.27, "learning_rate": 5.867574783083551e-06, "loss": 0.0224, "step": 466000 }, { "epoch": 28.31, "learning_rate": 5.8524058006188956e-06, "loss": 0.0224, "step": 466500 }, { "epoch": 28.34, "learning_rate": 5.837236818154238e-06, "loss": 0.0229, "step": 467000 }, { "epoch": 28.37, "learning_rate": 5.822098173654511e-06, "loss": 0.0222, "step": 467500 }, { "epoch": 28.4, "learning_rate": 5.806929191189855e-06, "loss": 0.0232, "step": 468000 }, { "epoch": 28.43, "learning_rate": 5.791760208725199e-06, "loss": 0.0227, "step": 468500 }, { "epoch": 28.46, "learning_rate": 5.776591226260543e-06, "loss": 0.0231, "step": 469000 }, { "epoch": 28.49, "learning_rate": 5.761452581760816e-06, "loss": 0.0218, "step": 469500 }, { "epoch": 28.52, "learning_rate": 5.74628359929616e-06, "loss": 0.0228, "step": 470000 }, { "epoch": 28.55, "learning_rate": 5.731114616831503e-06, "loss": 0.0228, "step": 470500 }, { "epoch": 28.58, "learning_rate": 5.715945634366847e-06, "loss": 0.023, "step": 471000 }, { "epoch": 28.61, "learning_rate": 5.7007766519021915e-06, "loss": 0.0224, "step": 471500 }, { "epoch": 28.64, "learning_rate": 5.685607669437535e-06, "loss": 0.0225, "step": 472000 }, { "epoch": 28.67, "learning_rate": 5.670438686972878e-06, "loss": 0.0227, "step": 472500 }, { "epoch": 28.7, "learning_rate": 5.6552697045082216e-06, "loss": 0.0232, "step": 473000 }, { "epoch": 28.73, "learning_rate": 5.640100722043566e-06, "loss": 0.0237, "step": 473500 }, { "epoch": 28.76, "learning_rate": 5.624931739578909e-06, "loss": 0.0222, "step": 474000 }, { "epoch": 28.79, "learning_rate": 5.6097930950791824e-06, "loss": 0.0224, "step": 474500 }, { "epoch": 28.82, "learning_rate": 5.594624112614527e-06, "loss": 0.0226, "step": 475000 }, { "epoch": 28.85, "learning_rate": 5.57945513014987e-06, "loss": 0.0223, "step": 475500 }, { "epoch": 28.88, "learning_rate": 5.564286147685214e-06, "loss": 0.0227, "step": 476000 }, { "epoch": 28.91, "learning_rate": 5.5491171652205575e-06, "loss": 0.0225, "step": 476500 }, { "epoch": 28.94, "learning_rate": 5.533948182755902e-06, "loss": 0.0226, "step": 477000 }, { "epoch": 28.97, "learning_rate": 5.518779200291244e-06, "loss": 0.0232, "step": 477500 }, { "epoch": 29.0, "eval_bleu": 87.6826, "eval_gen_len": 14.7702, "eval_loss": 0.21583835780620575, "eval_runtime": 217.0792, "eval_samples_per_second": 74.277, "eval_steps_per_second": 2.322, "step": 477949 }, { "epoch": 29.0, "learning_rate": 5.503640555791518e-06, "loss": 0.0228, "step": 478000 }, { "epoch": 29.03, "learning_rate": 5.488471573326861e-06, "loss": 0.0209, "step": 478500 }, { "epoch": 29.06, "learning_rate": 5.473302590862205e-06, "loss": 0.0209, "step": 479000 }, { "epoch": 29.09, "learning_rate": 5.458133608397549e-06, "loss": 0.021, "step": 479500 }, { "epoch": 29.12, "learning_rate": 5.4429646259328926e-06, "loss": 0.0199, "step": 480000 }, { "epoch": 29.15, "learning_rate": 5.427795643468237e-06, "loss": 0.0207, "step": 480500 }, { "epoch": 29.19, "learning_rate": 5.412656998968509e-06, "loss": 0.0213, "step": 481000 }, { "epoch": 29.22, "learning_rate": 5.3974880165038535e-06, "loss": 0.0207, "step": 481500 }, { "epoch": 29.25, "learning_rate": 5.382319034039198e-06, "loss": 0.0212, "step": 482000 }, { "epoch": 29.28, "learning_rate": 5.367150051574541e-06, "loss": 0.0211, "step": 482500 }, { "epoch": 29.31, "learning_rate": 5.351981069109885e-06, "loss": 0.0218, "step": 483000 }, { "epoch": 29.34, "learning_rate": 5.336842424610158e-06, "loss": 0.0207, "step": 483500 }, { "epoch": 29.37, "learning_rate": 5.321673442145502e-06, "loss": 0.0205, "step": 484000 }, { "epoch": 29.4, "learning_rate": 5.306504459680844e-06, "loss": 0.0205, "step": 484500 }, { "epoch": 29.43, "learning_rate": 5.2913354772161885e-06, "loss": 0.0213, "step": 485000 }, { "epoch": 29.46, "learning_rate": 5.276166494751533e-06, "loss": 0.022, "step": 485500 }, { "epoch": 29.49, "learning_rate": 5.260997512286876e-06, "loss": 0.0212, "step": 486000 }, { "epoch": 29.52, "learning_rate": 5.24582852982222e-06, "loss": 0.0213, "step": 486500 }, { "epoch": 29.55, "learning_rate": 5.230659547357564e-06, "loss": 0.021, "step": 487000 }, { "epoch": 29.58, "learning_rate": 5.215490564892908e-06, "loss": 0.0213, "step": 487500 }, { "epoch": 29.61, "learning_rate": 5.20035192039318e-06, "loss": 0.0212, "step": 488000 }, { "epoch": 29.64, "learning_rate": 5.1851829379285245e-06, "loss": 0.0213, "step": 488500 }, { "epoch": 29.67, "learning_rate": 5.170013955463869e-06, "loss": 0.0216, "step": 489000 }, { "epoch": 29.7, "learning_rate": 5.154844972999211e-06, "loss": 0.0215, "step": 489500 }, { "epoch": 29.73, "learning_rate": 5.139675990534555e-06, "loss": 0.0211, "step": 490000 }, { "epoch": 29.76, "learning_rate": 5.124537346034828e-06, "loss": 0.0213, "step": 490500 }, { "epoch": 29.79, "learning_rate": 5.109398701535101e-06, "loss": 0.0213, "step": 491000 }, { "epoch": 29.82, "learning_rate": 5.0942297190704445e-06, "loss": 0.0207, "step": 491500 }, { "epoch": 29.85, "learning_rate": 5.079060736605789e-06, "loss": 0.0217, "step": 492000 }, { "epoch": 29.88, "learning_rate": 5.063891754141133e-06, "loss": 0.0222, "step": 492500 }, { "epoch": 29.91, "learning_rate": 5.048783447606335e-06, "loss": 0.0215, "step": 493000 }, { "epoch": 29.94, "learning_rate": 5.033614465141679e-06, "loss": 0.022, "step": 493500 }, { "epoch": 29.97, "learning_rate": 5.018445482677022e-06, "loss": 0.0225, "step": 494000 }, { "epoch": 30.0, "eval_bleu": 87.759, "eval_gen_len": 14.7979, "eval_loss": 0.21940629184246063, "eval_runtime": 217.8188, "eval_samples_per_second": 74.025, "eval_steps_per_second": 2.314, "step": 494430 }, { "epoch": 30.0, "learning_rate": 5.003276500212366e-06, "loss": 0.0203, "step": 494500 }, { "epoch": 30.03, "learning_rate": 4.98810751774771e-06, "loss": 0.0189, "step": 495000 }, { "epoch": 30.06, "learning_rate": 4.972938535283053e-06, "loss": 0.0198, "step": 495500 }, { "epoch": 30.1, "learning_rate": 4.957769552818397e-06, "loss": 0.0201, "step": 496000 }, { "epoch": 30.13, "learning_rate": 4.942600570353741e-06, "loss": 0.0194, "step": 496500 }, { "epoch": 30.16, "learning_rate": 4.927431587889085e-06, "loss": 0.0199, "step": 497000 }, { "epoch": 30.19, "learning_rate": 4.912262605424429e-06, "loss": 0.02, "step": 497500 }, { "epoch": 30.22, "learning_rate": 4.897093622959772e-06, "loss": 0.0194, "step": 498000 }, { "epoch": 30.25, "learning_rate": 4.8819246404951156e-06, "loss": 0.0199, "step": 498500 }, { "epoch": 30.28, "learning_rate": 4.86675565803046e-06, "loss": 0.02, "step": 499000 }, { "epoch": 30.31, "learning_rate": 4.851586675565804e-06, "loss": 0.0198, "step": 499500 }, { "epoch": 30.34, "learning_rate": 4.836417693101147e-06, "loss": 0.0195, "step": 500000 }, { "epoch": 30.37, "learning_rate": 4.821248710636491e-06, "loss": 0.0194, "step": 500500 }, { "epoch": 30.4, "learning_rate": 4.806079728171835e-06, "loss": 0.0192, "step": 501000 }, { "epoch": 30.43, "learning_rate": 4.790941083672107e-06, "loss": 0.0193, "step": 501500 }, { "epoch": 30.46, "learning_rate": 4.775802439172381e-06, "loss": 0.0201, "step": 502000 }, { "epoch": 30.49, "learning_rate": 4.760633456707725e-06, "loss": 0.0201, "step": 502500 }, { "epoch": 30.52, "learning_rate": 4.745464474243068e-06, "loss": 0.0209, "step": 503000 }, { "epoch": 30.55, "learning_rate": 4.7302954917784115e-06, "loss": 0.0202, "step": 503500 }, { "epoch": 30.58, "learning_rate": 4.715156847278685e-06, "loss": 0.0209, "step": 504000 }, { "epoch": 30.61, "learning_rate": 4.699987864814028e-06, "loss": 0.02, "step": 504500 }, { "epoch": 30.64, "learning_rate": 4.684818882349372e-06, "loss": 0.0206, "step": 505000 }, { "epoch": 30.67, "learning_rate": 4.669649899884717e-06, "loss": 0.02, "step": 505500 }, { "epoch": 30.7, "learning_rate": 4.65448091742006e-06, "loss": 0.0211, "step": 506000 }, { "epoch": 30.73, "learning_rate": 4.639311934955403e-06, "loss": 0.0203, "step": 506500 }, { "epoch": 30.76, "learning_rate": 4.6241429524907475e-06, "loss": 0.0202, "step": 507000 }, { "epoch": 30.79, "learning_rate": 4.608973970026091e-06, "loss": 0.0207, "step": 507500 }, { "epoch": 30.82, "learning_rate": 4.593804987561435e-06, "loss": 0.0211, "step": 508000 }, { "epoch": 30.85, "learning_rate": 4.5786663430617075e-06, "loss": 0.0206, "step": 508500 }, { "epoch": 30.88, "learning_rate": 4.563497360597052e-06, "loss": 0.0204, "step": 509000 }, { "epoch": 30.91, "learning_rate": 4.548328378132395e-06, "loss": 0.0204, "step": 509500 }, { "epoch": 30.94, "learning_rate": 4.533159395667739e-06, "loss": 0.0195, "step": 510000 }, { "epoch": 30.98, "learning_rate": 4.5179904132030825e-06, "loss": 0.0205, "step": 510500 }, { "epoch": 31.0, "eval_bleu": 87.805, "eval_gen_len": 14.8214, "eval_loss": 0.22064241766929626, "eval_runtime": 221.1914, "eval_samples_per_second": 72.896, "eval_steps_per_second": 2.279, "step": 510911 }, { "epoch": 31.01, "learning_rate": 4.502821430738427e-06, "loss": 0.0203, "step": 511000 }, { "epoch": 31.04, "learning_rate": 4.487682786238699e-06, "loss": 0.0182, "step": 511500 }, { "epoch": 31.07, "learning_rate": 4.4725138037740434e-06, "loss": 0.0175, "step": 512000 }, { "epoch": 31.1, "learning_rate": 4.457344821309387e-06, "loss": 0.0187, "step": 512500 }, { "epoch": 31.13, "learning_rate": 4.442175838844731e-06, "loss": 0.0184, "step": 513000 }, { "epoch": 31.16, "learning_rate": 4.427006856380074e-06, "loss": 0.0183, "step": 513500 }, { "epoch": 31.19, "learning_rate": 4.411868211880348e-06, "loss": 0.019, "step": 514000 }, { "epoch": 31.22, "learning_rate": 4.396699229415691e-06, "loss": 0.0191, "step": 514500 }, { "epoch": 31.25, "learning_rate": 4.381530246951035e-06, "loss": 0.0189, "step": 515000 }, { "epoch": 31.28, "learning_rate": 4.3663916024513085e-06, "loss": 0.018, "step": 515500 }, { "epoch": 31.31, "learning_rate": 4.351222619986651e-06, "loss": 0.0192, "step": 516000 }, { "epoch": 31.34, "learning_rate": 4.336053637521995e-06, "loss": 0.0195, "step": 516500 }, { "epoch": 31.37, "learning_rate": 4.320884655057339e-06, "loss": 0.0189, "step": 517000 }, { "epoch": 31.4, "learning_rate": 4.305715672592683e-06, "loss": 0.0186, "step": 517500 }, { "epoch": 31.43, "learning_rate": 4.290577028092956e-06, "loss": 0.0186, "step": 518000 }, { "epoch": 31.46, "learning_rate": 4.2754080456282994e-06, "loss": 0.0196, "step": 518500 }, { "epoch": 31.49, "learning_rate": 4.260239063163643e-06, "loss": 0.0197, "step": 519000 }, { "epoch": 31.52, "learning_rate": 4.245070080698987e-06, "loss": 0.0191, "step": 519500 }, { "epoch": 31.55, "learning_rate": 4.229901098234331e-06, "loss": 0.0191, "step": 520000 }, { "epoch": 31.58, "learning_rate": 4.2147321157696745e-06, "loss": 0.0195, "step": 520500 }, { "epoch": 31.61, "learning_rate": 4.199563133305018e-06, "loss": 0.0193, "step": 521000 }, { "epoch": 31.64, "learning_rate": 4.184394150840362e-06, "loss": 0.0183, "step": 521500 }, { "epoch": 31.67, "learning_rate": 4.169225168375705e-06, "loss": 0.0195, "step": 522000 }, { "epoch": 31.7, "learning_rate": 4.1540561859110495e-06, "loss": 0.0186, "step": 522500 }, { "epoch": 31.73, "learning_rate": 4.138887203446394e-06, "loss": 0.0185, "step": 523000 }, { "epoch": 31.76, "learning_rate": 4.123748558946666e-06, "loss": 0.0189, "step": 523500 }, { "epoch": 31.79, "learning_rate": 4.1085795764820096e-06, "loss": 0.0194, "step": 524000 }, { "epoch": 31.82, "learning_rate": 4.093410594017354e-06, "loss": 0.0198, "step": 524500 }, { "epoch": 31.85, "learning_rate": 4.078241611552697e-06, "loss": 0.0197, "step": 525000 }, { "epoch": 31.89, "learning_rate": 4.0631029670529704e-06, "loss": 0.0193, "step": 525500 }, { "epoch": 31.92, "learning_rate": 4.047933984588315e-06, "loss": 0.0197, "step": 526000 }, { "epoch": 31.95, "learning_rate": 4.032795340088587e-06, "loss": 0.0202, "step": 526500 }, { "epoch": 31.98, "learning_rate": 4.017626357623931e-06, "loss": 0.0191, "step": 527000 }, { "epoch": 32.0, "eval_bleu": 87.9338, "eval_gen_len": 14.778, "eval_loss": 0.22409668564796448, "eval_runtime": 218.5886, "eval_samples_per_second": 73.764, "eval_steps_per_second": 2.306, "step": 527392 }, { "epoch": 32.01, "learning_rate": 4.002457375159275e-06, "loss": 0.019, "step": 527500 }, { "epoch": 32.04, "learning_rate": 3.987288392694618e-06, "loss": 0.0174, "step": 528000 }, { "epoch": 32.07, "learning_rate": 3.972119410229962e-06, "loss": 0.0174, "step": 528500 }, { "epoch": 32.1, "learning_rate": 3.9569504277653055e-06, "loss": 0.0175, "step": 529000 }, { "epoch": 32.13, "learning_rate": 3.94178144530065e-06, "loss": 0.0179, "step": 529500 }, { "epoch": 32.16, "learning_rate": 3.926612462835993e-06, "loss": 0.0178, "step": 530000 }, { "epoch": 32.19, "learning_rate": 3.911443480371337e-06, "loss": 0.0178, "step": 530500 }, { "epoch": 32.22, "learning_rate": 3.89630483587161e-06, "loss": 0.0185, "step": 531000 }, { "epoch": 32.25, "learning_rate": 3.881135853406954e-06, "loss": 0.0188, "step": 531500 }, { "epoch": 32.28, "learning_rate": 3.865966870942297e-06, "loss": 0.0178, "step": 532000 }, { "epoch": 32.31, "learning_rate": 3.8507978884776415e-06, "loss": 0.0183, "step": 532500 }, { "epoch": 32.34, "learning_rate": 3.835628906012985e-06, "loss": 0.0178, "step": 533000 }, { "epoch": 32.37, "learning_rate": 3.820490261513258e-06, "loss": 0.0178, "step": 533500 }, { "epoch": 32.4, "learning_rate": 3.8053212790486015e-06, "loss": 0.0184, "step": 534000 }, { "epoch": 32.43, "learning_rate": 3.7901522965839453e-06, "loss": 0.0183, "step": 534500 }, { "epoch": 32.46, "learning_rate": 3.7750136520842186e-06, "loss": 0.0181, "step": 535000 }, { "epoch": 32.49, "learning_rate": 3.7598446696195624e-06, "loss": 0.0178, "step": 535500 }, { "epoch": 32.52, "learning_rate": 3.744675687154906e-06, "loss": 0.0182, "step": 536000 }, { "epoch": 32.55, "learning_rate": 3.7295067046902495e-06, "loss": 0.0185, "step": 536500 }, { "epoch": 32.58, "learning_rate": 3.7143377222255932e-06, "loss": 0.0182, "step": 537000 }, { "epoch": 32.61, "learning_rate": 3.699168739760937e-06, "loss": 0.0185, "step": 537500 }, { "epoch": 32.64, "learning_rate": 3.683999757296281e-06, "loss": 0.0183, "step": 538000 }, { "epoch": 32.67, "learning_rate": 3.668830774831625e-06, "loss": 0.0176, "step": 538500 }, { "epoch": 32.7, "learning_rate": 3.6536617923669683e-06, "loss": 0.0186, "step": 539000 }, { "epoch": 32.73, "learning_rate": 3.638492809902312e-06, "loss": 0.0181, "step": 539500 }, { "epoch": 32.77, "learning_rate": 3.623354165402585e-06, "loss": 0.018, "step": 540000 }, { "epoch": 32.8, "learning_rate": 3.6081851829379288e-06, "loss": 0.0183, "step": 540500 }, { "epoch": 32.83, "learning_rate": 3.5930162004732725e-06, "loss": 0.0184, "step": 541000 }, { "epoch": 32.86, "learning_rate": 3.577847218008616e-06, "loss": 0.0182, "step": 541500 }, { "epoch": 32.89, "learning_rate": 3.5627085735088896e-06, "loss": 0.0181, "step": 542000 }, { "epoch": 32.92, "learning_rate": 3.5475699290091626e-06, "loss": 0.0187, "step": 542500 }, { "epoch": 32.95, "learning_rate": 3.532400946544506e-06, "loss": 0.0191, "step": 543000 }, { "epoch": 32.98, "learning_rate": 3.5172319640798497e-06, "loss": 0.0185, "step": 543500 }, { "epoch": 33.0, "eval_bleu": 87.8214, "eval_gen_len": 14.792, "eval_loss": 0.22567500174045563, "eval_runtime": 224.8959, "eval_samples_per_second": 71.695, "eval_steps_per_second": 2.241, "step": 543873 }, { "epoch": 33.01, "learning_rate": 3.5020629816151934e-06, "loss": 0.0179, "step": 544000 }, { "epoch": 33.04, "learning_rate": 3.486893999150537e-06, "loss": 0.0169, "step": 544500 }, { "epoch": 33.07, "learning_rate": 3.4717250166858814e-06, "loss": 0.0163, "step": 545000 }, { "epoch": 33.1, "learning_rate": 3.4565560342212247e-06, "loss": 0.0168, "step": 545500 }, { "epoch": 33.13, "learning_rate": 3.4414173897214977e-06, "loss": 0.0174, "step": 546000 }, { "epoch": 33.16, "learning_rate": 3.4262484072568414e-06, "loss": 0.017, "step": 546500 }, { "epoch": 33.19, "learning_rate": 3.411079424792185e-06, "loss": 0.0174, "step": 547000 }, { "epoch": 33.22, "learning_rate": 3.395910442327529e-06, "loss": 0.0174, "step": 547500 }, { "epoch": 33.25, "learning_rate": 3.380741459862873e-06, "loss": 0.0168, "step": 548000 }, { "epoch": 33.28, "learning_rate": 3.365572477398216e-06, "loss": 0.0176, "step": 548500 }, { "epoch": 33.31, "learning_rate": 3.3504034949335602e-06, "loss": 0.017, "step": 549000 }, { "epoch": 33.34, "learning_rate": 3.335234512468904e-06, "loss": 0.0174, "step": 549500 }, { "epoch": 33.37, "learning_rate": 3.320095867969177e-06, "loss": 0.0174, "step": 550000 }, { "epoch": 33.4, "learning_rate": 3.3049268855045207e-06, "loss": 0.0178, "step": 550500 }, { "epoch": 33.43, "learning_rate": 3.289757903039864e-06, "loss": 0.0174, "step": 551000 }, { "epoch": 33.46, "learning_rate": 3.2745889205752078e-06, "loss": 0.0171, "step": 551500 }, { "epoch": 33.49, "learning_rate": 3.259419938110552e-06, "loss": 0.0169, "step": 552000 }, { "epoch": 33.52, "learning_rate": 3.2442509556458957e-06, "loss": 0.017, "step": 552500 }, { "epoch": 33.55, "learning_rate": 3.2291123111461687e-06, "loss": 0.0187, "step": 553000 }, { "epoch": 33.58, "learning_rate": 3.2139736666464416e-06, "loss": 0.0176, "step": 553500 }, { "epoch": 33.61, "learning_rate": 3.1988046841817854e-06, "loss": 0.0174, "step": 554000 }, { "epoch": 33.64, "learning_rate": 3.183635701717129e-06, "loss": 0.0174, "step": 554500 }, { "epoch": 33.68, "learning_rate": 3.1684667192524725e-06, "loss": 0.0172, "step": 555000 }, { "epoch": 33.71, "learning_rate": 3.1532977367878167e-06, "loss": 0.0174, "step": 555500 }, { "epoch": 33.74, "learning_rate": 3.1381287543231604e-06, "loss": 0.017, "step": 556000 }, { "epoch": 33.77, "learning_rate": 3.122959771858504e-06, "loss": 0.0181, "step": 556500 }, { "epoch": 33.8, "learning_rate": 3.1077907893938475e-06, "loss": 0.0175, "step": 557000 }, { "epoch": 33.83, "learning_rate": 3.0926218069291913e-06, "loss": 0.0167, "step": 557500 }, { "epoch": 33.86, "learning_rate": 3.0774831624294642e-06, "loss": 0.018, "step": 558000 }, { "epoch": 33.89, "learning_rate": 3.062314179964808e-06, "loss": 0.0176, "step": 558500 }, { "epoch": 33.92, "learning_rate": 3.047145197500152e-06, "loss": 0.018, "step": 559000 }, { "epoch": 33.95, "learning_rate": 3.031976215035496e-06, "loss": 0.0174, "step": 559500 }, { "epoch": 33.98, "learning_rate": 3.016837570535769e-06, "loss": 0.0177, "step": 560000 }, { "epoch": 34.0, "eval_bleu": 87.793, "eval_gen_len": 14.7892, "eval_loss": 0.22562597692012787, "eval_runtime": 226.3249, "eval_samples_per_second": 71.243, "eval_steps_per_second": 2.227, "step": 560354 }, { "epoch": 34.01, "learning_rate": 3.001668588071112e-06, "loss": 0.0177, "step": 560500 }, { "epoch": 34.04, "learning_rate": 2.986499605606456e-06, "loss": 0.0161, "step": 561000 }, { "epoch": 34.07, "learning_rate": 2.9713306231417997e-06, "loss": 0.0154, "step": 561500 }, { "epoch": 34.1, "learning_rate": 2.9561919786420727e-06, "loss": 0.017, "step": 562000 }, { "epoch": 34.13, "learning_rate": 2.941053334142346e-06, "loss": 0.0161, "step": 562500 }, { "epoch": 34.16, "learning_rate": 2.9258843516776898e-06, "loss": 0.0164, "step": 563000 }, { "epoch": 34.19, "learning_rate": 2.9107153692130335e-06, "loss": 0.016, "step": 563500 }, { "epoch": 34.22, "learning_rate": 2.8955463867483773e-06, "loss": 0.0172, "step": 564000 }, { "epoch": 34.25, "learning_rate": 2.8803774042837206e-06, "loss": 0.0156, "step": 564500 }, { "epoch": 34.28, "learning_rate": 2.8652084218190644e-06, "loss": 0.0169, "step": 565000 }, { "epoch": 34.31, "learning_rate": 2.8500394393544086e-06, "loss": 0.0171, "step": 565500 }, { "epoch": 34.34, "learning_rate": 2.8349007948546815e-06, "loss": 0.0161, "step": 566000 }, { "epoch": 34.37, "learning_rate": 2.8197318123900253e-06, "loss": 0.0168, "step": 566500 }, { "epoch": 34.4, "learning_rate": 2.804562829925369e-06, "loss": 0.0164, "step": 567000 }, { "epoch": 34.43, "learning_rate": 2.7893938474607124e-06, "loss": 0.0166, "step": 567500 }, { "epoch": 34.46, "learning_rate": 2.7742552029609853e-06, "loss": 0.0164, "step": 568000 }, { "epoch": 34.49, "learning_rate": 2.759086220496329e-06, "loss": 0.0169, "step": 568500 }, { "epoch": 34.52, "learning_rate": 2.7439172380316733e-06, "loss": 0.0168, "step": 569000 }, { "epoch": 34.55, "learning_rate": 2.728748255567017e-06, "loss": 0.0164, "step": 569500 }, { "epoch": 34.59, "learning_rate": 2.7135792731023604e-06, "loss": 0.0164, "step": 570000 }, { "epoch": 34.62, "learning_rate": 2.6984406286026337e-06, "loss": 0.0166, "step": 570500 }, { "epoch": 34.65, "learning_rate": 2.683271646137977e-06, "loss": 0.0164, "step": 571000 }, { "epoch": 34.68, "learning_rate": 2.668102663673321e-06, "loss": 0.0167, "step": 571500 }, { "epoch": 34.71, "learning_rate": 2.6529336812086646e-06, "loss": 0.0172, "step": 572000 }, { "epoch": 34.74, "learning_rate": 2.6377646987440088e-06, "loss": 0.0172, "step": 572500 }, { "epoch": 34.77, "learning_rate": 2.622595716279352e-06, "loss": 0.0166, "step": 573000 }, { "epoch": 34.8, "learning_rate": 2.607426733814696e-06, "loss": 0.0166, "step": 573500 }, { "epoch": 34.83, "learning_rate": 2.5922577513500396e-06, "loss": 0.017, "step": 574000 }, { "epoch": 34.86, "learning_rate": 2.5770887688853834e-06, "loss": 0.0164, "step": 574500 }, { "epoch": 34.89, "learning_rate": 2.5619501243856563e-06, "loss": 0.0174, "step": 575000 }, { "epoch": 34.92, "learning_rate": 2.5467811419210005e-06, "loss": 0.0167, "step": 575500 }, { "epoch": 34.95, "learning_rate": 2.5316424974212735e-06, "loss": 0.0169, "step": 576000 }, { "epoch": 34.98, "learning_rate": 2.5164735149566172e-06, "loss": 0.0172, "step": 576500 }, { "epoch": 35.0, "eval_bleu": 87.8156, "eval_gen_len": 14.7718, "eval_loss": 0.22844606637954712, "eval_runtime": 225.457, "eval_samples_per_second": 71.517, "eval_steps_per_second": 2.235, "step": 576835 }, { "epoch": 35.01, "learning_rate": 2.5013045324919606e-06, "loss": 0.0161, "step": 577000 }, { "epoch": 35.04, "learning_rate": 2.4861355500273043e-06, "loss": 0.0152, "step": 577500 }, { "epoch": 35.07, "learning_rate": 2.470966567562648e-06, "loss": 0.0162, "step": 578000 }, { "epoch": 35.1, "learning_rate": 2.455827923062921e-06, "loss": 0.0156, "step": 578500 }, { "epoch": 35.13, "learning_rate": 2.4406589405982648e-06, "loss": 0.0154, "step": 579000 }, { "epoch": 35.16, "learning_rate": 2.4254899581336085e-06, "loss": 0.0161, "step": 579500 }, { "epoch": 35.19, "learning_rate": 2.4103209756689523e-06, "loss": 0.0156, "step": 580000 }, { "epoch": 35.22, "learning_rate": 2.395151993204296e-06, "loss": 0.0159, "step": 580500 }, { "epoch": 35.25, "learning_rate": 2.380013348704569e-06, "loss": 0.0163, "step": 581000 }, { "epoch": 35.28, "learning_rate": 2.3648443662399128e-06, "loss": 0.0162, "step": 581500 }, { "epoch": 35.31, "learning_rate": 2.3496753837752565e-06, "loss": 0.016, "step": 582000 }, { "epoch": 35.34, "learning_rate": 2.3345064013106003e-06, "loss": 0.0156, "step": 582500 }, { "epoch": 35.37, "learning_rate": 2.3193677568108732e-06, "loss": 0.0159, "step": 583000 }, { "epoch": 35.4, "learning_rate": 2.304198774346217e-06, "loss": 0.0158, "step": 583500 }, { "epoch": 35.43, "learning_rate": 2.2890297918815607e-06, "loss": 0.016, "step": 584000 }, { "epoch": 35.47, "learning_rate": 2.2738608094169045e-06, "loss": 0.0162, "step": 584500 }, { "epoch": 35.5, "learning_rate": 2.2586918269522483e-06, "loss": 0.0156, "step": 585000 }, { "epoch": 35.53, "learning_rate": 2.243522844487592e-06, "loss": 0.0158, "step": 585500 }, { "epoch": 35.56, "learning_rate": 2.228384199987865e-06, "loss": 0.0167, "step": 586000 }, { "epoch": 35.59, "learning_rate": 2.2132152175232087e-06, "loss": 0.0155, "step": 586500 }, { "epoch": 35.62, "learning_rate": 2.1980462350585525e-06, "loss": 0.0168, "step": 587000 }, { "epoch": 35.65, "learning_rate": 2.1828772525938963e-06, "loss": 0.0161, "step": 587500 }, { "epoch": 35.68, "learning_rate": 2.16770827012924e-06, "loss": 0.0156, "step": 588000 }, { "epoch": 35.71, "learning_rate": 2.1525392876645838e-06, "loss": 0.0162, "step": 588500 }, { "epoch": 35.74, "learning_rate": 2.137370305199927e-06, "loss": 0.0152, "step": 589000 }, { "epoch": 35.77, "learning_rate": 2.1222013227352713e-06, "loss": 0.0161, "step": 589500 }, { "epoch": 35.8, "learning_rate": 2.1070323402706146e-06, "loss": 0.0163, "step": 590000 }, { "epoch": 35.83, "learning_rate": 2.091893695770888e-06, "loss": 0.0161, "step": 590500 }, { "epoch": 35.86, "learning_rate": 2.0767247133062313e-06, "loss": 0.0158, "step": 591000 }, { "epoch": 35.89, "learning_rate": 2.0615860688065047e-06, "loss": 0.0164, "step": 591500 }, { "epoch": 35.92, "learning_rate": 2.0464170863418485e-06, "loss": 0.0159, "step": 592000 }, { "epoch": 35.95, "learning_rate": 2.031248103877192e-06, "loss": 0.0158, "step": 592500 }, { "epoch": 35.98, "learning_rate": 2.016079121412536e-06, "loss": 0.0162, "step": 593000 }, { "epoch": 36.0, "eval_bleu": 87.7375, "eval_gen_len": 14.7845, "eval_loss": 0.22915604710578918, "eval_runtime": 219.2242, "eval_samples_per_second": 73.55, "eval_steps_per_second": 2.299, "step": 593316 }, { "epoch": 36.01, "learning_rate": 2.0009101389478797e-06, "loss": 0.0157, "step": 593500 }, { "epoch": 36.04, "learning_rate": 1.985741156483223e-06, "loss": 0.0153, "step": 594000 }, { "epoch": 36.07, "learning_rate": 1.9705721740185673e-06, "loss": 0.0148, "step": 594500 }, { "epoch": 36.1, "learning_rate": 1.9554031915539106e-06, "loss": 0.0149, "step": 595000 }, { "epoch": 36.13, "learning_rate": 1.9402645470541835e-06, "loss": 0.0156, "step": 595500 }, { "epoch": 36.16, "learning_rate": 1.9250955645895273e-06, "loss": 0.0153, "step": 596000 }, { "epoch": 36.19, "learning_rate": 1.909926582124871e-06, "loss": 0.0156, "step": 596500 }, { "epoch": 36.22, "learning_rate": 1.894757599660215e-06, "loss": 0.0156, "step": 597000 }, { "epoch": 36.25, "learning_rate": 1.8795886171955588e-06, "loss": 0.0158, "step": 597500 }, { "epoch": 36.28, "learning_rate": 1.8644196347309024e-06, "loss": 0.0152, "step": 598000 }, { "epoch": 36.31, "learning_rate": 1.8492506522662461e-06, "loss": 0.0151, "step": 598500 }, { "epoch": 36.34, "learning_rate": 1.8340816698015899e-06, "loss": 0.0154, "step": 599000 }, { "epoch": 36.38, "learning_rate": 1.8189126873369336e-06, "loss": 0.0158, "step": 599500 }, { "epoch": 36.41, "learning_rate": 1.8037740428372066e-06, "loss": 0.0153, "step": 600000 }, { "epoch": 36.44, "learning_rate": 1.7886050603725501e-06, "loss": 0.0154, "step": 600500 }, { "epoch": 36.47, "learning_rate": 1.773436077907894e-06, "loss": 0.0148, "step": 601000 }, { "epoch": 36.5, "learning_rate": 1.758297433408167e-06, "loss": 0.0154, "step": 601500 }, { "epoch": 36.53, "learning_rate": 1.74315878890844e-06, "loss": 0.0156, "step": 602000 }, { "epoch": 36.56, "learning_rate": 1.727989806443784e-06, "loss": 0.0152, "step": 602500 }, { "epoch": 36.59, "learning_rate": 1.7128511619440569e-06, "loss": 0.015, "step": 603000 }, { "epoch": 36.62, "learning_rate": 1.6976821794794006e-06, "loss": 0.0149, "step": 603500 }, { "epoch": 36.65, "learning_rate": 1.6825131970147444e-06, "loss": 0.0152, "step": 604000 }, { "epoch": 36.68, "learning_rate": 1.6673442145500882e-06, "loss": 0.015, "step": 604500 }, { "epoch": 36.71, "learning_rate": 1.6521752320854317e-06, "loss": 0.015, "step": 605000 }, { "epoch": 36.74, "learning_rate": 1.6370062496207757e-06, "loss": 0.0157, "step": 605500 }, { "epoch": 36.77, "learning_rate": 1.6218372671561192e-06, "loss": 0.0158, "step": 606000 }, { "epoch": 36.8, "learning_rate": 1.606668284691463e-06, "loss": 0.0157, "step": 606500 }, { "epoch": 36.83, "learning_rate": 1.5914993022268068e-06, "loss": 0.0154, "step": 607000 }, { "epoch": 36.86, "learning_rate": 1.5763303197621505e-06, "loss": 0.0157, "step": 607500 }, { "epoch": 36.89, "learning_rate": 1.5611916752624235e-06, "loss": 0.0155, "step": 608000 }, { "epoch": 36.92, "learning_rate": 1.5460226927977672e-06, "loss": 0.0153, "step": 608500 }, { "epoch": 36.95, "learning_rate": 1.5308537103331108e-06, "loss": 0.016, "step": 609000 }, { "epoch": 36.98, "learning_rate": 1.5156847278684548e-06, "loss": 0.0157, "step": 609500 }, { "epoch": 37.0, "eval_bleu": 87.8714, "eval_gen_len": 14.8, "eval_loss": 0.23112483322620392, "eval_runtime": 219.854, "eval_samples_per_second": 73.34, "eval_steps_per_second": 2.292, "step": 609797 }, { "epoch": 37.01, "learning_rate": 1.5005157454037985e-06, "loss": 0.0144, "step": 610000 }, { "epoch": 37.04, "learning_rate": 1.485346762939142e-06, "loss": 0.0146, "step": 610500 }, { "epoch": 37.07, "learning_rate": 1.470177780474486e-06, "loss": 0.0145, "step": 611000 }, { "epoch": 37.1, "learning_rate": 1.4550087980098296e-06, "loss": 0.015, "step": 611500 }, { "epoch": 37.13, "learning_rate": 1.4398701535101025e-06, "loss": 0.0146, "step": 612000 }, { "epoch": 37.16, "learning_rate": 1.4247011710454465e-06, "loss": 0.0143, "step": 612500 }, { "epoch": 37.19, "learning_rate": 1.40953218858079e-06, "loss": 0.0152, "step": 613000 }, { "epoch": 37.22, "learning_rate": 1.3943632061161338e-06, "loss": 0.0153, "step": 613500 }, { "epoch": 37.26, "learning_rate": 1.3791942236514778e-06, "loss": 0.0146, "step": 614000 }, { "epoch": 37.29, "learning_rate": 1.3640555791517507e-06, "loss": 0.015, "step": 614500 }, { "epoch": 37.32, "learning_rate": 1.3488865966870943e-06, "loss": 0.0148, "step": 615000 }, { "epoch": 37.35, "learning_rate": 1.3337479521873672e-06, "loss": 0.0149, "step": 615500 }, { "epoch": 37.38, "learning_rate": 1.3185789697227112e-06, "loss": 0.0147, "step": 616000 }, { "epoch": 37.41, "learning_rate": 1.303409987258055e-06, "loss": 0.0153, "step": 616500 }, { "epoch": 37.44, "learning_rate": 1.2882410047933985e-06, "loss": 0.0139, "step": 617000 }, { "epoch": 37.47, "learning_rate": 1.2730720223287425e-06, "loss": 0.0161, "step": 617500 }, { "epoch": 37.5, "learning_rate": 1.257903039864086e-06, "loss": 0.0153, "step": 618000 }, { "epoch": 37.53, "learning_rate": 1.2427340573994298e-06, "loss": 0.0153, "step": 618500 }, { "epoch": 37.56, "learning_rate": 1.2275954128997027e-06, "loss": 0.0152, "step": 619000 }, { "epoch": 37.59, "learning_rate": 1.2124264304350465e-06, "loss": 0.015, "step": 619500 }, { "epoch": 37.62, "learning_rate": 1.1972574479703902e-06, "loss": 0.0147, "step": 620000 }, { "epoch": 37.65, "learning_rate": 1.182088465505734e-06, "loss": 0.0154, "step": 620500 }, { "epoch": 37.68, "learning_rate": 1.1669194830410778e-06, "loss": 0.0144, "step": 621000 }, { "epoch": 37.71, "learning_rate": 1.1517505005764213e-06, "loss": 0.015, "step": 621500 }, { "epoch": 37.74, "learning_rate": 1.136581518111765e-06, "loss": 0.0155, "step": 622000 }, { "epoch": 37.77, "learning_rate": 1.121412535647109e-06, "loss": 0.0147, "step": 622500 }, { "epoch": 37.8, "learning_rate": 1.1062435531824526e-06, "loss": 0.0148, "step": 623000 }, { "epoch": 37.83, "learning_rate": 1.0910745707177964e-06, "loss": 0.0149, "step": 623500 }, { "epoch": 37.86, "learning_rate": 1.0759055882531401e-06, "loss": 0.0148, "step": 624000 }, { "epoch": 37.89, "learning_rate": 1.0607366057884839e-06, "loss": 0.0151, "step": 624500 }, { "epoch": 37.92, "learning_rate": 1.0455979612887568e-06, "loss": 0.0152, "step": 625000 }, { "epoch": 37.95, "learning_rate": 1.0304289788241006e-06, "loss": 0.0146, "step": 625500 }, { "epoch": 37.98, "learning_rate": 1.0152599963594443e-06, "loss": 0.0153, "step": 626000 }, { "epoch": 38.0, "eval_bleu": 87.8712, "eval_gen_len": 14.799, "eval_loss": 0.23195865750312805, "eval_runtime": 218.5324, "eval_samples_per_second": 73.783, "eval_steps_per_second": 2.306, "step": 626278 }, { "epoch": 38.01, "learning_rate": 1.0000910138947881e-06, "loss": 0.0147, "step": 626500 }, { "epoch": 38.04, "learning_rate": 9.849220314301319e-07, "loss": 0.0142, "step": 627000 }, { "epoch": 38.07, "learning_rate": 9.697530489654754e-07, "loss": 0.0151, "step": 627500 }, { "epoch": 38.1, "learning_rate": 9.545840665008192e-07, "loss": 0.0151, "step": 628000 }, { "epoch": 38.13, "learning_rate": 9.394454220010922e-07, "loss": 0.0143, "step": 628500 }, { "epoch": 38.17, "learning_rate": 9.242764395364359e-07, "loss": 0.0147, "step": 629000 }, { "epoch": 38.2, "learning_rate": 9.091074570717796e-07, "loss": 0.0144, "step": 629500 }, { "epoch": 38.23, "learning_rate": 8.939384746071235e-07, "loss": 0.0148, "step": 630000 }, { "epoch": 38.26, "learning_rate": 8.787998301073964e-07, "loss": 0.015, "step": 630500 }, { "epoch": 38.29, "learning_rate": 8.636308476427402e-07, "loss": 0.0138, "step": 631000 }, { "epoch": 38.32, "learning_rate": 8.484618651780839e-07, "loss": 0.0146, "step": 631500 }, { "epoch": 38.35, "learning_rate": 8.332928827134276e-07, "loss": 0.0144, "step": 632000 }, { "epoch": 38.38, "learning_rate": 8.181239002487714e-07, "loss": 0.0144, "step": 632500 }, { "epoch": 38.41, "learning_rate": 8.02954917784115e-07, "loss": 0.0142, "step": 633000 }, { "epoch": 38.44, "learning_rate": 7.878466112493175e-07, "loss": 0.0145, "step": 633500 }, { "epoch": 38.47, "learning_rate": 7.726776287846611e-07, "loss": 0.015, "step": 634000 }, { "epoch": 38.5, "learning_rate": 7.575086463200049e-07, "loss": 0.0147, "step": 634500 }, { "epoch": 38.53, "learning_rate": 7.423396638553485e-07, "loss": 0.0153, "step": 635000 }, { "epoch": 38.56, "learning_rate": 7.271706813906924e-07, "loss": 0.0151, "step": 635500 }, { "epoch": 38.59, "learning_rate": 7.120016989260362e-07, "loss": 0.0143, "step": 636000 }, { "epoch": 38.62, "learning_rate": 6.968327164613798e-07, "loss": 0.0143, "step": 636500 }, { "epoch": 38.65, "learning_rate": 6.816637339967236e-07, "loss": 0.015, "step": 637000 }, { "epoch": 38.68, "learning_rate": 6.664947515320672e-07, "loss": 0.0145, "step": 637500 }, { "epoch": 38.71, "learning_rate": 6.51325769067411e-07, "loss": 0.0142, "step": 638000 }, { "epoch": 38.74, "learning_rate": 6.361871245676841e-07, "loss": 0.0149, "step": 638500 }, { "epoch": 38.77, "learning_rate": 6.210181421030278e-07, "loss": 0.0146, "step": 639000 }, { "epoch": 38.8, "learning_rate": 6.058491596383715e-07, "loss": 0.015, "step": 639500 }, { "epoch": 38.83, "learning_rate": 5.906801771737152e-07, "loss": 0.0147, "step": 640000 }, { "epoch": 38.86, "learning_rate": 5.755415326739883e-07, "loss": 0.0146, "step": 640500 }, { "epoch": 38.89, "learning_rate": 5.603725502093319e-07, "loss": 0.0147, "step": 641000 }, { "epoch": 38.92, "learning_rate": 5.452035677446757e-07, "loss": 0.0147, "step": 641500 }, { "epoch": 38.95, "learning_rate": 5.300345852800195e-07, "loss": 0.0149, "step": 642000 }, { "epoch": 38.98, "learning_rate": 5.148656028153632e-07, "loss": 0.0149, "step": 642500 }, { "epoch": 39.0, "eval_bleu": 87.8655, "eval_gen_len": 14.7899, "eval_loss": 0.2320941537618637, "eval_runtime": 218.6389, "eval_samples_per_second": 73.747, "eval_steps_per_second": 2.305, "step": 642759 }, { "epoch": 39.01, "learning_rate": 4.997269583156363e-07, "loss": 0.0142, "step": 643000 }, { "epoch": 39.04, "learning_rate": 4.845579758509799e-07, "loss": 0.0143, "step": 643500 }, { "epoch": 39.08, "learning_rate": 4.693889933863237e-07, "loss": 0.0145, "step": 644000 }, { "epoch": 39.11, "learning_rate": 4.542200109216674e-07, "loss": 0.0145, "step": 644500 }, { "epoch": 39.14, "learning_rate": 4.390510284570111e-07, "loss": 0.0142, "step": 645000 }, { "epoch": 39.17, "learning_rate": 4.238820459923549e-07, "loss": 0.0145, "step": 645500 }, { "epoch": 39.2, "learning_rate": 4.087130635276986e-07, "loss": 0.0141, "step": 646000 }, { "epoch": 39.23, "learning_rate": 3.9354408106304233e-07, "loss": 0.0143, "step": 646500 }, { "epoch": 39.26, "learning_rate": 3.7840543656331537e-07, "loss": 0.0143, "step": 647000 }, { "epoch": 39.29, "learning_rate": 3.632364540986591e-07, "loss": 0.0149, "step": 647500 }, { "epoch": 39.32, "learning_rate": 3.480674716340028e-07, "loss": 0.0146, "step": 648000 }, { "epoch": 39.35, "learning_rate": 3.3289848916934655e-07, "loss": 0.0142, "step": 648500 }, { "epoch": 39.38, "learning_rate": 3.1772950670469026e-07, "loss": 0.0145, "step": 649000 }, { "epoch": 39.41, "learning_rate": 3.0256052424003397e-07, "loss": 0.0145, "step": 649500 }, { "epoch": 39.44, "learning_rate": 2.8739154177537773e-07, "loss": 0.0141, "step": 650000 }, { "epoch": 39.47, "learning_rate": 2.7225289727565077e-07, "loss": 0.0146, "step": 650500 }, { "epoch": 39.5, "learning_rate": 2.5708391481099453e-07, "loss": 0.0146, "step": 651000 }, { "epoch": 39.53, "learning_rate": 2.4191493234633824e-07, "loss": 0.0141, "step": 651500 }, { "epoch": 39.56, "learning_rate": 2.2674594988168195e-07, "loss": 0.0146, "step": 652000 }, { "epoch": 39.59, "learning_rate": 2.115769674170257e-07, "loss": 0.0147, "step": 652500 }, { "epoch": 39.62, "learning_rate": 1.964383229172987e-07, "loss": 0.014, "step": 653000 }, { "epoch": 39.65, "learning_rate": 1.8129967841757177e-07, "loss": 0.0138, "step": 653500 }, { "epoch": 39.68, "learning_rate": 1.6613069595291548e-07, "loss": 0.0148, "step": 654000 }, { "epoch": 39.71, "learning_rate": 1.5096171348825922e-07, "loss": 0.0145, "step": 654500 }, { "epoch": 39.74, "learning_rate": 1.3579273102360295e-07, "loss": 0.0142, "step": 655000 }, { "epoch": 39.77, "learning_rate": 1.2062374855894669e-07, "loss": 0.0146, "step": 655500 }, { "epoch": 39.8, "learning_rate": 1.0545476609429041e-07, "loss": 0.0144, "step": 656000 }, { "epoch": 39.83, "learning_rate": 9.028578362963412e-08, "loss": 0.014, "step": 656500 }, { "epoch": 39.86, "learning_rate": 7.514713912990717e-08, "loss": 0.0141, "step": 657000 }, { "epoch": 39.89, "learning_rate": 5.99781566652509e-08, "loss": 0.0139, "step": 657500 }, { "epoch": 39.92, "learning_rate": 4.4809174200594624e-08, "loss": 0.0147, "step": 658000 }, { "epoch": 39.96, "learning_rate": 2.9640191735938357e-08, "loss": 0.0144, "step": 658500 }, { "epoch": 39.99, "learning_rate": 1.4471209271282084e-08, "loss": 0.0144, "step": 659000 }, { "epoch": 40.0, "eval_bleu": 87.8962, "eval_gen_len": 14.792, "eval_loss": 0.23238389194011688, "eval_runtime": 218.3118, "eval_samples_per_second": 73.858, "eval_steps_per_second": 2.309, "step": 659240 } ], "max_steps": 659240, "num_train_epochs": 40, "total_flos": 1.3263324878544568e+18, "trial_name": null, "trial_params": null }