{ "best_metric": null, "best_model_checkpoint": null, "epoch": 0.18433058885567705, "global_step": 1249500, "is_hyper_param_search": false, "is_local_process_zero": true, "is_world_process_zero": true, "log_history": [ { "epoch": 0.0, "learning_rate": 4.9998148198352554e-05, "loss": 2.5633, "step": 500 }, { "epoch": 0.0, "learning_rate": 4.99962963967051e-05, "loss": 2.4025, "step": 1000 }, { "epoch": 0.0, "learning_rate": 4.999444459505765e-05, "loss": 2.4857, "step": 1500 }, { "epoch": 0.0, "learning_rate": 4.9992592793410196e-05, "loss": 2.4036, "step": 2000 }, { "epoch": 0.0, "learning_rate": 4.999074099176275e-05, "loss": 2.4167, "step": 2500 }, { "epoch": 0.0, "learning_rate": 4.998888919011529e-05, "loss": 2.4199, "step": 3000 }, { "epoch": 0.0, "learning_rate": 4.998703738846784e-05, "loss": 2.3777, "step": 3500 }, { "epoch": 0.0, "learning_rate": 4.998518558682039e-05, "loss": 2.3574, "step": 4000 }, { "epoch": 0.0, "learning_rate": 4.9983333785172935e-05, "loss": 2.3653, "step": 4500 }, { "epoch": 0.0, "learning_rate": 4.998148198352549e-05, "loss": 2.3203, "step": 5000 }, { "epoch": 0.0, "learning_rate": 4.997963018187803e-05, "loss": 2.3002, "step": 5500 }, { "epoch": 0.0, "learning_rate": 4.9977778380230584e-05, "loss": 2.2327, "step": 6000 }, { "epoch": 0.0, "learning_rate": 4.997592657858313e-05, "loss": 2.3823, "step": 6500 }, { "epoch": 0.0, "learning_rate": 4.997407477693568e-05, "loss": 2.3095, "step": 7000 }, { "epoch": 0.0, "learning_rate": 4.997222297528823e-05, "loss": 2.3012, "step": 7500 }, { "epoch": 0.0, "learning_rate": 4.997037117364078e-05, "loss": 2.2502, "step": 8000 }, { "epoch": 0.0, "learning_rate": 4.996851937199332e-05, "loss": 2.3248, "step": 8500 }, { "epoch": 0.0, "learning_rate": 4.996666757034587e-05, "loss": 2.3523, "step": 9000 }, { "epoch": 0.0, "learning_rate": 4.996481576869842e-05, "loss": 2.2897, "step": 9500 }, { "epoch": 0.0, "learning_rate": 4.996296396705097e-05, "loss": 2.2968, "step": 10000 }, { "epoch": 0.0, "learning_rate": 4.996111169011484e-05, "loss": 2.2047, "step": 10500 }, { "epoch": 0.0, "learning_rate": 4.995925986583459e-05, "loss": 2.3323, "step": 11000 }, { "epoch": 0.0, "learning_rate": 4.995740804155435e-05, "loss": 2.2564, "step": 11500 }, { "epoch": 0.0, "learning_rate": 4.99555562172741e-05, "loss": 2.3009, "step": 12000 }, { "epoch": 0.0, "learning_rate": 4.995370439299386e-05, "loss": 2.2419, "step": 12500 }, { "epoch": 0.0, "learning_rate": 4.995185256871361e-05, "loss": 2.2346, "step": 13000 }, { "epoch": 0.0, "learning_rate": 4.995000074443336e-05, "loss": 2.304, "step": 13500 }, { "epoch": 0.0, "learning_rate": 4.9948148920153116e-05, "loss": 2.223, "step": 14000 }, { "epoch": 0.0, "learning_rate": 4.994629709587287e-05, "loss": 2.2424, "step": 14500 }, { "epoch": 0.0, "learning_rate": 4.994444527159263e-05, "loss": 2.2479, "step": 15000 }, { "epoch": 0.0, "learning_rate": 4.994259344731238e-05, "loss": 2.2607, "step": 15500 }, { "epoch": 0.0, "learning_rate": 4.9940741623032137e-05, "loss": 2.2446, "step": 16000 }, { "epoch": 0.0, "learning_rate": 4.993888979875189e-05, "loss": 2.1891, "step": 16500 }, { "epoch": 0.0, "learning_rate": 4.993703797447164e-05, "loss": 2.2201, "step": 17000 }, { "epoch": 0.0, "learning_rate": 4.9935186150191394e-05, "loss": 2.2409, "step": 17500 }, { "epoch": 0.0, "learning_rate": 4.993333432591115e-05, "loss": 2.2319, "step": 18000 }, { "epoch": 0.0, "learning_rate": 4.993148250163091e-05, "loss": 2.2554, "step": 18500 }, { "epoch": 0.0, "learning_rate": 4.992963067735066e-05, "loss": 2.2191, "step": 19000 }, { "epoch": 0.0, "learning_rate": 4.9927778853070415e-05, "loss": 2.1847, "step": 19500 }, { "epoch": 0.0, "learning_rate": 4.9925927028790165e-05, "loss": 2.1872, "step": 20000 }, { "epoch": 0.0, "learning_rate": 4.9924075204509915e-05, "loss": 2.2293, "step": 20500 }, { "epoch": 0.0, "learning_rate": 4.992222338022968e-05, "loss": 2.2537, "step": 21000 }, { "epoch": 0.0, "learning_rate": 4.992037155594943e-05, "loss": 2.2332, "step": 21500 }, { "epoch": 0.0, "learning_rate": 4.9918519731669186e-05, "loss": 2.2963, "step": 22000 }, { "epoch": 0.0, "learning_rate": 4.9916667907388936e-05, "loss": 2.2041, "step": 22500 }, { "epoch": 0.01, "learning_rate": 4.9914816083108686e-05, "loss": 2.1912, "step": 23000 }, { "epoch": 0.01, "learning_rate": 4.991296425882844e-05, "loss": 2.1984, "step": 23500 }, { "epoch": 0.01, "learning_rate": 4.99111124345482e-05, "loss": 2.2336, "step": 24000 }, { "epoch": 0.01, "learning_rate": 4.990926061026796e-05, "loss": 2.1302, "step": 24500 }, { "epoch": 0.01, "learning_rate": 4.990740878598771e-05, "loss": 2.2171, "step": 25000 }, { "epoch": 0.01, "learning_rate": 4.9905556961707464e-05, "loss": 2.1807, "step": 25500 }, { "epoch": 0.01, "learning_rate": 4.9903705137427215e-05, "loss": 2.1588, "step": 26000 }, { "epoch": 0.01, "learning_rate": 4.9901853313146965e-05, "loss": 2.1912, "step": 26500 }, { "epoch": 0.01, "learning_rate": 4.990000148886672e-05, "loss": 2.2455, "step": 27000 }, { "epoch": 0.01, "learning_rate": 4.989814966458648e-05, "loss": 2.1608, "step": 27500 }, { "epoch": 0.01, "learning_rate": 4.9896297840306236e-05, "loss": 2.1814, "step": 28000 }, { "epoch": 0.01, "learning_rate": 4.9894446016025986e-05, "loss": 2.0651, "step": 28500 }, { "epoch": 0.01, "learning_rate": 4.989259419174574e-05, "loss": 2.1137, "step": 29000 }, { "epoch": 0.01, "learning_rate": 4.989074236746549e-05, "loss": 2.2112, "step": 29500 }, { "epoch": 0.01, "learning_rate": 4.988889054318524e-05, "loss": 2.1663, "step": 30000 }, { "epoch": 0.01, "learning_rate": 4.988703871890501e-05, "loss": 2.1839, "step": 30500 }, { "epoch": 0.01, "learning_rate": 4.988518689462476e-05, "loss": 2.1793, "step": 31000 }, { "epoch": 0.01, "learning_rate": 4.9883335070344514e-05, "loss": 2.1851, "step": 31500 }, { "epoch": 0.01, "learning_rate": 4.9881483246064264e-05, "loss": 2.1889, "step": 32000 }, { "epoch": 0.01, "learning_rate": 4.987963142178402e-05, "loss": 2.0989, "step": 32500 }, { "epoch": 0.01, "learning_rate": 4.987777959750377e-05, "loss": 2.102, "step": 33000 }, { "epoch": 0.01, "learning_rate": 4.987592777322353e-05, "loss": 2.0911, "step": 33500 }, { "epoch": 0.01, "learning_rate": 4.9874075948943285e-05, "loss": 2.2145, "step": 34000 }, { "epoch": 0.01, "learning_rate": 4.9872224124663035e-05, "loss": 2.1246, "step": 34500 }, { "epoch": 0.01, "learning_rate": 4.987037230038279e-05, "loss": 2.1473, "step": 35000 }, { "epoch": 0.01, "learning_rate": 4.986852047610254e-05, "loss": 2.1508, "step": 35500 }, { "epoch": 0.01, "learning_rate": 4.986666865182229e-05, "loss": 2.1553, "step": 36000 }, { "epoch": 0.01, "learning_rate": 4.986481682754205e-05, "loss": 2.1734, "step": 36500 }, { "epoch": 0.01, "learning_rate": 4.9862965003261806e-05, "loss": 2.2153, "step": 37000 }, { "epoch": 0.01, "learning_rate": 4.986111317898156e-05, "loss": 2.1443, "step": 37500 }, { "epoch": 0.01, "learning_rate": 4.9859261354701314e-05, "loss": 2.2392, "step": 38000 }, { "epoch": 0.01, "learning_rate": 4.985740953042107e-05, "loss": 2.117, "step": 38500 }, { "epoch": 0.01, "learning_rate": 4.985555770614082e-05, "loss": 2.1762, "step": 39000 }, { "epoch": 0.01, "learning_rate": 4.985370588186057e-05, "loss": 2.1363, "step": 39500 }, { "epoch": 0.01, "learning_rate": 4.9851854057580335e-05, "loss": 2.1806, "step": 40000 }, { "epoch": 0.01, "learning_rate": 4.9850002233300085e-05, "loss": 2.1484, "step": 40500 }, { "epoch": 0.01, "learning_rate": 4.984815040901984e-05, "loss": 2.1418, "step": 41000 }, { "epoch": 0.01, "learning_rate": 4.984629858473959e-05, "loss": 2.1191, "step": 41500 }, { "epoch": 0.01, "learning_rate": 4.984444676045935e-05, "loss": 2.2352, "step": 42000 }, { "epoch": 0.01, "learning_rate": 4.98425949361791e-05, "loss": 2.1433, "step": 42500 }, { "epoch": 0.01, "learning_rate": 4.984074311189885e-05, "loss": 2.1621, "step": 43000 }, { "epoch": 0.01, "learning_rate": 4.983889128761861e-05, "loss": 2.0863, "step": 43500 }, { "epoch": 0.01, "learning_rate": 4.983703946333836e-05, "loss": 2.201, "step": 44000 }, { "epoch": 0.01, "learning_rate": 4.983518763905812e-05, "loss": 2.1628, "step": 44500 }, { "epoch": 0.01, "learning_rate": 4.983333581477787e-05, "loss": 2.1656, "step": 45000 }, { "epoch": 0.01, "learning_rate": 4.983148399049763e-05, "loss": 2.154, "step": 45500 }, { "epoch": 0.01, "learning_rate": 4.982963216621738e-05, "loss": 2.1882, "step": 46000 }, { "epoch": 0.01, "learning_rate": 4.9827780341937134e-05, "loss": 2.1192, "step": 46500 }, { "epoch": 0.01, "learning_rate": 4.982592851765689e-05, "loss": 2.0503, "step": 47000 }, { "epoch": 0.01, "learning_rate": 4.982407669337664e-05, "loss": 2.1834, "step": 47500 }, { "epoch": 0.01, "learning_rate": 4.98222248690964e-05, "loss": 2.1964, "step": 48000 }, { "epoch": 0.01, "learning_rate": 4.982037304481615e-05, "loss": 2.1844, "step": 48500 }, { "epoch": 0.01, "learning_rate": 4.9818521220535905e-05, "loss": 2.15, "step": 49000 }, { "epoch": 0.01, "learning_rate": 4.981666939625566e-05, "loss": 2.1419, "step": 49500 }, { "epoch": 0.01, "learning_rate": 4.981481757197541e-05, "loss": 2.1146, "step": 50000 }, { "epoch": 0.01, "learning_rate": 4.981296574769517e-05, "loss": 2.1196, "step": 50500 }, { "epoch": 0.01, "learning_rate": 4.981111392341492e-05, "loss": 2.0982, "step": 51000 }, { "epoch": 0.01, "learning_rate": 4.9809262099134677e-05, "loss": 2.1951, "step": 51500 }, { "epoch": 0.01, "learning_rate": 4.980741027485443e-05, "loss": 2.0885, "step": 52000 }, { "epoch": 0.01, "learning_rate": 4.980555845057418e-05, "loss": 2.0822, "step": 52500 }, { "epoch": 0.01, "learning_rate": 4.980370662629394e-05, "loss": 2.107, "step": 53000 }, { "epoch": 0.01, "learning_rate": 4.980185480201369e-05, "loss": 2.0801, "step": 53500 }, { "epoch": 0.01, "learning_rate": 4.980000297773345e-05, "loss": 2.1561, "step": 54000 }, { "epoch": 0.01, "learning_rate": 4.97981511534532e-05, "loss": 2.0842, "step": 54500 }, { "epoch": 0.01, "learning_rate": 4.9796299329172955e-05, "loss": 2.0803, "step": 55000 }, { "epoch": 0.01, "learning_rate": 4.9794447504892705e-05, "loss": 2.2168, "step": 55500 }, { "epoch": 0.01, "learning_rate": 4.979259568061246e-05, "loss": 2.114, "step": 56000 }, { "epoch": 0.01, "learning_rate": 4.979074385633222e-05, "loss": 2.1965, "step": 56500 }, { "epoch": 0.01, "learning_rate": 4.978889203205197e-05, "loss": 2.1872, "step": 57000 }, { "epoch": 0.01, "learning_rate": 4.9787040207771726e-05, "loss": 2.1307, "step": 57500 }, { "epoch": 0.01, "learning_rate": 4.9785188383491476e-05, "loss": 2.1543, "step": 58000 }, { "epoch": 0.01, "learning_rate": 4.978333655921123e-05, "loss": 2.1638, "step": 58500 }, { "epoch": 0.01, "learning_rate": 4.978148473493099e-05, "loss": 2.152, "step": 59000 }, { "epoch": 0.01, "learning_rate": 4.977963291065074e-05, "loss": 2.185, "step": 59500 }, { "epoch": 0.01, "learning_rate": 4.97777810863705e-05, "loss": 2.083, "step": 60000 }, { "epoch": 0.01, "learning_rate": 4.977592926209025e-05, "loss": 2.1583, "step": 60500 }, { "epoch": 0.01, "learning_rate": 4.9774077437810004e-05, "loss": 2.1324, "step": 61000 }, { "epoch": 0.01, "learning_rate": 4.9772225613529754e-05, "loss": 2.1022, "step": 61500 }, { "epoch": 0.01, "learning_rate": 4.977037378924951e-05, "loss": 2.1016, "step": 62000 }, { "epoch": 0.01, "learning_rate": 4.976852196496927e-05, "loss": 2.2154, "step": 62500 }, { "epoch": 0.01, "learning_rate": 4.976667014068902e-05, "loss": 2.1377, "step": 63000 }, { "epoch": 0.01, "learning_rate": 4.9764818316408775e-05, "loss": 2.1611, "step": 63500 }, { "epoch": 0.01, "learning_rate": 4.9762966492128526e-05, "loss": 2.1962, "step": 64000 }, { "epoch": 0.01, "learning_rate": 4.976111466784828e-05, "loss": 2.1179, "step": 64500 }, { "epoch": 0.01, "learning_rate": 4.975926284356803e-05, "loss": 2.136, "step": 65000 }, { "epoch": 0.01, "learning_rate": 4.975741101928779e-05, "loss": 2.0581, "step": 65500 }, { "epoch": 0.01, "learning_rate": 4.975555919500755e-05, "loss": 2.1673, "step": 66000 }, { "epoch": 0.01, "learning_rate": 4.97537073707273e-05, "loss": 2.1709, "step": 66500 }, { "epoch": 0.01, "learning_rate": 4.9751855546447054e-05, "loss": 2.1291, "step": 67000 }, { "epoch": 0.01, "learning_rate": 4.9750003722166804e-05, "loss": 2.1081, "step": 67500 }, { "epoch": 0.02, "learning_rate": 4.974815189788656e-05, "loss": 2.0995, "step": 68000 }, { "epoch": 0.02, "learning_rate": 4.974630007360631e-05, "loss": 2.1093, "step": 68500 }, { "epoch": 0.02, "learning_rate": 4.974444824932607e-05, "loss": 2.0995, "step": 69000 }, { "epoch": 0.02, "learning_rate": 4.9742596425045825e-05, "loss": 2.3111, "step": 69500 }, { "epoch": 0.02, "learning_rate": 4.9740744600765575e-05, "loss": 2.2349, "step": 70000 }, { "epoch": 0.02, "learning_rate": 4.973889277648533e-05, "loss": 2.0986, "step": 70500 }, { "epoch": 0.02, "learning_rate": 4.973704095220508e-05, "loss": 2.1087, "step": 71000 }, { "epoch": 0.02, "learning_rate": 4.973518912792484e-05, "loss": 2.1279, "step": 71500 }, { "epoch": 0.02, "learning_rate": 4.9733337303644596e-05, "loss": 2.1361, "step": 72000 }, { "epoch": 0.02, "learning_rate": 4.9731485479364346e-05, "loss": 2.1142, "step": 72500 }, { "epoch": 0.02, "learning_rate": 4.97296336550841e-05, "loss": 2.1393, "step": 73000 }, { "epoch": 0.02, "learning_rate": 4.9727781830803853e-05, "loss": 2.0751, "step": 73500 }, { "epoch": 0.02, "learning_rate": 4.972593000652361e-05, "loss": 2.1786, "step": 74000 }, { "epoch": 0.02, "learning_rate": 4.972407818224336e-05, "loss": 2.1415, "step": 74500 }, { "epoch": 0.02, "learning_rate": 4.972222635796312e-05, "loss": 2.094, "step": 75000 }, { "epoch": 0.02, "learning_rate": 4.9720374533682874e-05, "loss": 2.158, "step": 75500 }, { "epoch": 0.02, "learning_rate": 4.9718522709402625e-05, "loss": 2.0468, "step": 76000 }, { "epoch": 0.02, "learning_rate": 4.971667088512238e-05, "loss": 2.082, "step": 76500 }, { "epoch": 0.02, "learning_rate": 4.971481906084213e-05, "loss": 2.1283, "step": 77000 }, { "epoch": 0.02, "learning_rate": 4.971296723656189e-05, "loss": 2.1468, "step": 77500 }, { "epoch": 0.02, "learning_rate": 4.971111541228164e-05, "loss": 2.1154, "step": 78000 }, { "epoch": 0.02, "learning_rate": 4.9709263588001396e-05, "loss": 2.1036, "step": 78500 }, { "epoch": 0.02, "learning_rate": 4.970741176372115e-05, "loss": 2.1031, "step": 79000 }, { "epoch": 0.02, "learning_rate": 4.97055599394409e-05, "loss": 2.0857, "step": 79500 }, { "epoch": 0.02, "learning_rate": 4.970370811516066e-05, "loss": 2.1558, "step": 80000 }, { "epoch": 0.02, "learning_rate": 4.970185629088041e-05, "loss": 2.0955, "step": 80500 }, { "epoch": 0.02, "learning_rate": 4.970000446660017e-05, "loss": 2.0567, "step": 81000 }, { "epoch": 0.02, "learning_rate": 4.9698152642319924e-05, "loss": 2.1498, "step": 81500 }, { "epoch": 0.02, "learning_rate": 4.9696300818039674e-05, "loss": 2.1512, "step": 82000 }, { "epoch": 0.02, "learning_rate": 4.969444899375943e-05, "loss": 2.098, "step": 82500 }, { "epoch": 0.02, "learning_rate": 4.969259716947918e-05, "loss": 2.1491, "step": 83000 }, { "epoch": 0.02, "learning_rate": 4.969074534519894e-05, "loss": 2.1296, "step": 83500 }, { "epoch": 0.02, "learning_rate": 4.968889352091869e-05, "loss": 2.064, "step": 84000 }, { "epoch": 0.02, "learning_rate": 4.9687041696638445e-05, "loss": 2.148, "step": 84500 }, { "epoch": 0.02, "learning_rate": 4.96851898723582e-05, "loss": 2.1478, "step": 85000 }, { "epoch": 0.02, "learning_rate": 4.968333804807795e-05, "loss": 2.1299, "step": 85500 }, { "epoch": 0.02, "learning_rate": 4.968148622379771e-05, "loss": 2.1889, "step": 86000 }, { "epoch": 0.02, "learning_rate": 4.967963439951746e-05, "loss": 2.0996, "step": 86500 }, { "epoch": 0.02, "learning_rate": 4.9677782575237216e-05, "loss": 2.1575, "step": 87000 }, { "epoch": 0.02, "learning_rate": 4.967593075095697e-05, "loss": 2.12, "step": 87500 }, { "epoch": 0.02, "learning_rate": 4.9674078926676724e-05, "loss": 2.094, "step": 88000 }, { "epoch": 0.02, "learning_rate": 4.967222710239648e-05, "loss": 2.1591, "step": 88500 }, { "epoch": 0.02, "learning_rate": 4.967037527811623e-05, "loss": 2.1463, "step": 89000 }, { "epoch": 0.02, "learning_rate": 4.966852345383599e-05, "loss": 2.0924, "step": 89500 }, { "epoch": 0.02, "learning_rate": 4.966667162955574e-05, "loss": 2.1121, "step": 90000 }, { "epoch": 0.02, "learning_rate": 4.9664819805275495e-05, "loss": 2.1229, "step": 90500 }, { "epoch": 0.02, "learning_rate": 4.966296798099525e-05, "loss": 2.1183, "step": 91000 }, { "epoch": 0.02, "learning_rate": 4.9661116156715e-05, "loss": 2.0884, "step": 91500 }, { "epoch": 0.02, "learning_rate": 4.965926433243476e-05, "loss": 2.1724, "step": 92000 }, { "epoch": 0.02, "learning_rate": 4.965741250815451e-05, "loss": 2.1064, "step": 92500 }, { "epoch": 0.02, "learning_rate": 4.9655560683874266e-05, "loss": 2.1236, "step": 93000 }, { "epoch": 0.02, "learning_rate": 4.9653708859594016e-05, "loss": 2.159, "step": 93500 }, { "epoch": 0.02, "learning_rate": 4.965185703531377e-05, "loss": 2.0801, "step": 94000 }, { "epoch": 0.02, "learning_rate": 4.965000521103353e-05, "loss": 2.1024, "step": 94500 }, { "epoch": 0.02, "learning_rate": 4.964815338675328e-05, "loss": 2.1097, "step": 95000 }, { "epoch": 0.02, "learning_rate": 4.964630156247304e-05, "loss": 2.1249, "step": 95500 }, { "epoch": 0.02, "learning_rate": 4.964444973819279e-05, "loss": 2.0557, "step": 96000 }, { "epoch": 0.02, "learning_rate": 4.9642597913912544e-05, "loss": 2.0868, "step": 96500 }, { "epoch": 0.02, "learning_rate": 4.9640746089632294e-05, "loss": 2.1668, "step": 97000 }, { "epoch": 0.02, "learning_rate": 4.963889426535205e-05, "loss": 2.1173, "step": 97500 }, { "epoch": 0.02, "learning_rate": 4.963704244107181e-05, "loss": 2.1173, "step": 98000 }, { "epoch": 0.02, "learning_rate": 4.963519061679156e-05, "loss": 2.0945, "step": 98500 }, { "epoch": 0.02, "learning_rate": 4.9633338792511315e-05, "loss": 2.0548, "step": 99000 }, { "epoch": 0.02, "learning_rate": 4.9631486968231066e-05, "loss": 2.0306, "step": 99500 }, { "epoch": 0.02, "learning_rate": 4.962963514395082e-05, "loss": 2.0759, "step": 100000 }, { "epoch": 0.02, "learning_rate": 4.962778331967058e-05, "loss": 2.1732, "step": 100500 }, { "epoch": 0.02, "learning_rate": 4.962593149539033e-05, "loss": 2.1157, "step": 101000 }, { "epoch": 0.02, "learning_rate": 4.9624079671110087e-05, "loss": 2.148, "step": 101500 }, { "epoch": 0.02, "learning_rate": 4.962222784682984e-05, "loss": 2.1079, "step": 102000 }, { "epoch": 0.02, "learning_rate": 4.9620376022549594e-05, "loss": 2.127, "step": 102500 }, { "epoch": 0.02, "learning_rate": 4.9618524198269344e-05, "loss": 2.1, "step": 103000 }, { "epoch": 0.02, "learning_rate": 4.96166723739891e-05, "loss": 2.071, "step": 103500 }, { "epoch": 0.02, "learning_rate": 4.961482054970886e-05, "loss": 2.1219, "step": 104000 }, { "epoch": 0.02, "learning_rate": 4.961296872542861e-05, "loss": 2.1071, "step": 104500 }, { "epoch": 0.02, "learning_rate": 4.9611116901148365e-05, "loss": 2.0968, "step": 105000 }, { "epoch": 0.02, "learning_rate": 4.9609265076868115e-05, "loss": 2.1153, "step": 105500 }, { "epoch": 0.02, "learning_rate": 4.960741325258787e-05, "loss": 2.0603, "step": 106000 }, { "epoch": 0.02, "learning_rate": 4.960556142830762e-05, "loss": 2.1243, "step": 106500 }, { "epoch": 0.02, "learning_rate": 4.960370960402738e-05, "loss": 2.1658, "step": 107000 }, { "epoch": 0.02, "learning_rate": 4.9601857779747136e-05, "loss": 2.0951, "step": 107500 }, { "epoch": 0.02, "learning_rate": 4.9600005955466886e-05, "loss": 2.1128, "step": 108000 }, { "epoch": 0.02, "learning_rate": 4.959815413118664e-05, "loss": 2.1391, "step": 108500 }, { "epoch": 0.02, "learning_rate": 4.959630230690639e-05, "loss": 2.1156, "step": 109000 }, { "epoch": 0.02, "learning_rate": 4.959445048262615e-05, "loss": 2.0878, "step": 109500 }, { "epoch": 0.02, "learning_rate": 4.959259865834591e-05, "loss": 2.1156, "step": 110000 }, { "epoch": 0.02, "learning_rate": 4.959074683406566e-05, "loss": 2.1387, "step": 110500 }, { "epoch": 0.02, "learning_rate": 4.9588895009785414e-05, "loss": 2.1429, "step": 111000 }, { "epoch": 0.02, "learning_rate": 4.9587043185505165e-05, "loss": 2.1445, "step": 111500 }, { "epoch": 0.02, "learning_rate": 4.958519136122492e-05, "loss": 2.1135, "step": 112000 }, { "epoch": 0.02, "learning_rate": 4.958333953694467e-05, "loss": 2.1144, "step": 112500 }, { "epoch": 0.03, "learning_rate": 4.958148771266443e-05, "loss": 2.1433, "step": 113000 }, { "epoch": 0.03, "learning_rate": 4.9579635888384186e-05, "loss": 2.1211, "step": 113500 }, { "epoch": 0.03, "learning_rate": 4.9577784064103936e-05, "loss": 2.1933, "step": 114000 }, { "epoch": 0.03, "learning_rate": 4.957593223982369e-05, "loss": 2.1649, "step": 114500 }, { "epoch": 0.03, "learning_rate": 4.957408041554344e-05, "loss": 2.0976, "step": 115000 }, { "epoch": 0.03, "learning_rate": 4.95722285912632e-05, "loss": 2.116, "step": 115500 }, { "epoch": 0.03, "learning_rate": 4.957037676698295e-05, "loss": 2.0661, "step": 116000 }, { "epoch": 0.03, "learning_rate": 4.956852494270271e-05, "loss": 2.1022, "step": 116500 }, { "epoch": 0.03, "learning_rate": 4.9566673118422464e-05, "loss": 2.1366, "step": 117000 }, { "epoch": 0.03, "learning_rate": 4.9564821294142214e-05, "loss": 2.1154, "step": 117500 }, { "epoch": 0.03, "learning_rate": 4.956296946986197e-05, "loss": 2.0719, "step": 118000 }, { "epoch": 0.03, "learning_rate": 4.956111764558172e-05, "loss": 2.1059, "step": 118500 }, { "epoch": 0.03, "learning_rate": 4.955926582130148e-05, "loss": 2.0687, "step": 119000 }, { "epoch": 0.03, "learning_rate": 4.955741399702123e-05, "loss": 2.1175, "step": 119500 }, { "epoch": 0.03, "learning_rate": 4.9555562172740985e-05, "loss": 2.1016, "step": 120000 }, { "epoch": 0.03, "learning_rate": 4.955371034846074e-05, "loss": 2.091, "step": 120500 }, { "epoch": 0.03, "learning_rate": 4.955185852418049e-05, "loss": 2.1525, "step": 121000 }, { "epoch": 0.03, "learning_rate": 4.955000669990025e-05, "loss": 2.1353, "step": 121500 }, { "epoch": 0.03, "learning_rate": 4.954815487562e-05, "loss": 2.1404, "step": 122000 }, { "epoch": 0.03, "learning_rate": 4.9546303051339756e-05, "loss": 2.1124, "step": 122500 }, { "epoch": 0.03, "learning_rate": 4.954445122705951e-05, "loss": 2.0695, "step": 123000 }, { "epoch": 0.03, "learning_rate": 4.9542599402779263e-05, "loss": 2.0679, "step": 123500 }, { "epoch": 0.03, "learning_rate": 4.954074757849902e-05, "loss": 2.0789, "step": 124000 }, { "epoch": 0.03, "learning_rate": 4.953889575421877e-05, "loss": 2.1837, "step": 124500 }, { "epoch": 0.03, "learning_rate": 4.953704392993853e-05, "loss": 2.0892, "step": 125000 }, { "epoch": 0.03, "learning_rate": 4.953519210565828e-05, "loss": 2.1429, "step": 125500 }, { "epoch": 0.03, "learning_rate": 4.9533340281378035e-05, "loss": 2.1141, "step": 126000 }, { "epoch": 0.03, "learning_rate": 4.953148845709779e-05, "loss": 2.1018, "step": 126500 }, { "epoch": 0.03, "learning_rate": 4.952963663281754e-05, "loss": 2.0873, "step": 127000 }, { "epoch": 0.03, "learning_rate": 4.95277848085373e-05, "loss": 2.0736, "step": 127500 }, { "epoch": 0.03, "learning_rate": 4.952593298425705e-05, "loss": 2.0752, "step": 128000 }, { "epoch": 0.03, "learning_rate": 4.9524081159976806e-05, "loss": 2.102, "step": 128500 }, { "epoch": 0.03, "learning_rate": 4.9522229335696556e-05, "loss": 2.132, "step": 129000 }, { "epoch": 0.03, "learning_rate": 4.952037751141631e-05, "loss": 2.1201, "step": 129500 }, { "epoch": 0.03, "learning_rate": 4.951852568713607e-05, "loss": 2.1781, "step": 130000 }, { "epoch": 0.03, "learning_rate": 4.951667386285582e-05, "loss": 2.0607, "step": 130500 }, { "epoch": 0.03, "learning_rate": 4.951482203857558e-05, "loss": 2.1332, "step": 131000 }, { "epoch": 0.03, "learning_rate": 4.951297021429533e-05, "loss": 2.0999, "step": 131500 }, { "epoch": 0.03, "learning_rate": 4.9511118390015084e-05, "loss": 2.1018, "step": 132000 }, { "epoch": 0.03, "learning_rate": 4.950926656573484e-05, "loss": 2.0752, "step": 132500 }, { "epoch": 0.03, "learning_rate": 4.950741474145459e-05, "loss": 2.1125, "step": 133000 }, { "epoch": 0.03, "learning_rate": 4.950556291717435e-05, "loss": 2.0988, "step": 133500 }, { "epoch": 0.03, "learning_rate": 4.95037110928941e-05, "loss": 2.0444, "step": 134000 }, { "epoch": 0.03, "learning_rate": 4.9501859268613855e-05, "loss": 2.0619, "step": 134500 }, { "epoch": 0.03, "learning_rate": 4.9500007444333606e-05, "loss": 2.1532, "step": 135000 }, { "epoch": 0.03, "learning_rate": 4.949815562005336e-05, "loss": 2.1091, "step": 135500 }, { "epoch": 0.03, "learning_rate": 4.949630379577312e-05, "loss": 2.0799, "step": 136000 }, { "epoch": 0.03, "learning_rate": 4.949445197149287e-05, "loss": 2.0619, "step": 136500 }, { "epoch": 0.03, "learning_rate": 4.9492600147212627e-05, "loss": 2.0687, "step": 137000 }, { "epoch": 0.03, "learning_rate": 4.949074832293238e-05, "loss": 2.133, "step": 137500 }, { "epoch": 0.03, "learning_rate": 4.9488896498652134e-05, "loss": 2.0766, "step": 138000 }, { "epoch": 0.03, "learning_rate": 4.9487044674371884e-05, "loss": 2.1147, "step": 138500 }, { "epoch": 0.03, "learning_rate": 4.948519285009164e-05, "loss": 2.1444, "step": 139000 }, { "epoch": 0.03, "learning_rate": 4.94833410258114e-05, "loss": 2.138, "step": 139500 }, { "epoch": 0.03, "learning_rate": 4.948148920153115e-05, "loss": 2.0898, "step": 140000 }, { "epoch": 0.03, "learning_rate": 4.9479637377250905e-05, "loss": 2.0965, "step": 140500 }, { "epoch": 0.03, "learning_rate": 4.9477785552970655e-05, "loss": 2.0402, "step": 141000 }, { "epoch": 0.03, "learning_rate": 4.947593372869041e-05, "loss": 2.0965, "step": 141500 }, { "epoch": 0.03, "learning_rate": 4.947408190441017e-05, "loss": 2.1203, "step": 142000 }, { "epoch": 0.03, "learning_rate": 4.947223008012992e-05, "loss": 2.0735, "step": 142500 }, { "epoch": 0.03, "learning_rate": 4.9470378255849676e-05, "loss": 2.0774, "step": 143000 }, { "epoch": 0.03, "learning_rate": 4.9468526431569426e-05, "loss": 2.0433, "step": 143500 }, { "epoch": 0.03, "learning_rate": 4.946667460728918e-05, "loss": 2.0941, "step": 144000 }, { "epoch": 0.03, "learning_rate": 4.946482278300893e-05, "loss": 2.0883, "step": 144500 }, { "epoch": 0.03, "learning_rate": 4.946297095872869e-05, "loss": 2.0986, "step": 145000 }, { "epoch": 0.03, "learning_rate": 4.946111913444845e-05, "loss": 2.1054, "step": 145500 }, { "epoch": 0.03, "learning_rate": 4.94592673101682e-05, "loss": 2.095, "step": 146000 }, { "epoch": 0.03, "learning_rate": 4.9457415485887954e-05, "loss": 2.048, "step": 146500 }, { "epoch": 0.03, "learning_rate": 4.9455563661607704e-05, "loss": 2.1582, "step": 147000 }, { "epoch": 0.03, "learning_rate": 4.945371183732746e-05, "loss": 2.0933, "step": 147500 }, { "epoch": 0.03, "learning_rate": 4.945186001304721e-05, "loss": 2.0986, "step": 148000 }, { "epoch": 0.03, "learning_rate": 4.9450008188766975e-05, "loss": 2.0879, "step": 148500 }, { "epoch": 0.03, "learning_rate": 4.9448156364486725e-05, "loss": 2.1448, "step": 149000 }, { "epoch": 0.03, "learning_rate": 4.9446304540206476e-05, "loss": 2.0606, "step": 149500 }, { "epoch": 0.03, "learning_rate": 4.944445271592623e-05, "loss": 2.1036, "step": 150000 }, { "epoch": 0.03, "learning_rate": 4.944260089164598e-05, "loss": 2.0317, "step": 150500 }, { "epoch": 0.03, "learning_rate": 4.944074906736574e-05, "loss": 2.1222, "step": 151000 }, { "epoch": 0.03, "learning_rate": 4.94388972430855e-05, "loss": 2.0757, "step": 151500 }, { "epoch": 0.03, "learning_rate": 4.943704541880525e-05, "loss": 2.066, "step": 152000 }, { "epoch": 0.03, "learning_rate": 4.9435193594525004e-05, "loss": 2.1251, "step": 152500 }, { "epoch": 0.03, "learning_rate": 4.9433341770244754e-05, "loss": 2.0925, "step": 153000 }, { "epoch": 0.03, "learning_rate": 4.943148994596451e-05, "loss": 2.0619, "step": 153500 }, { "epoch": 0.03, "learning_rate": 4.942963812168426e-05, "loss": 2.0371, "step": 154000 }, { "epoch": 0.03, "learning_rate": 4.942778629740402e-05, "loss": 2.1089, "step": 154500 }, { "epoch": 0.03, "learning_rate": 4.9425934473123775e-05, "loss": 2.0807, "step": 155000 }, { "epoch": 0.03, "learning_rate": 4.9424082648843525e-05, "loss": 2.0697, "step": 155500 }, { "epoch": 0.03, "learning_rate": 4.942223082456328e-05, "loss": 2.0802, "step": 156000 }, { "epoch": 0.03, "learning_rate": 4.942037900028303e-05, "loss": 2.0421, "step": 156500 }, { "epoch": 0.03, "learning_rate": 4.941852717600279e-05, "loss": 2.0534, "step": 157000 }, { "epoch": 0.03, "learning_rate": 4.941667535172254e-05, "loss": 2.0713, "step": 157500 }, { "epoch": 0.04, "learning_rate": 4.94148235274423e-05, "loss": 2.0668, "step": 158000 }, { "epoch": 0.04, "learning_rate": 4.941297170316205e-05, "loss": 2.0462, "step": 158500 }, { "epoch": 0.04, "learning_rate": 4.9411119878881803e-05, "loss": 2.0347, "step": 159000 }, { "epoch": 0.04, "learning_rate": 4.940926805460156e-05, "loss": 2.1034, "step": 159500 }, { "epoch": 0.04, "learning_rate": 4.940741623032131e-05, "loss": 2.0372, "step": 160000 }, { "epoch": 0.04, "learning_rate": 4.940556440604107e-05, "loss": 2.0649, "step": 160500 }, { "epoch": 0.04, "learning_rate": 4.9403712581760824e-05, "loss": 2.104, "step": 161000 }, { "epoch": 0.04, "learning_rate": 4.940186075748058e-05, "loss": 2.1171, "step": 161500 }, { "epoch": 0.04, "learning_rate": 4.940000893320033e-05, "loss": 2.0524, "step": 162000 }, { "epoch": 0.04, "learning_rate": 4.939815710892008e-05, "loss": 2.0812, "step": 162500 }, { "epoch": 0.04, "learning_rate": 4.939630528463984e-05, "loss": 2.0633, "step": 163000 }, { "epoch": 0.04, "learning_rate": 4.939445346035959e-05, "loss": 2.0875, "step": 163500 }, { "epoch": 0.04, "learning_rate": 4.9392601636079346e-05, "loss": 2.0515, "step": 164000 }, { "epoch": 0.04, "learning_rate": 4.93907498117991e-05, "loss": 2.0684, "step": 164500 }, { "epoch": 0.04, "learning_rate": 4.938889798751885e-05, "loss": 2.1043, "step": 165000 }, { "epoch": 0.04, "learning_rate": 4.938704616323861e-05, "loss": 2.0895, "step": 165500 }, { "epoch": 0.04, "learning_rate": 4.938519433895836e-05, "loss": 2.1472, "step": 166000 }, { "epoch": 0.04, "learning_rate": 4.938334251467812e-05, "loss": 2.1056, "step": 166500 }, { "epoch": 0.04, "learning_rate": 4.938149069039787e-05, "loss": 2.1839, "step": 167000 }, { "epoch": 0.04, "learning_rate": 4.937963886611763e-05, "loss": 2.1721, "step": 167500 }, { "epoch": 0.04, "learning_rate": 4.937778704183738e-05, "loss": 2.0804, "step": 168000 }, { "epoch": 0.04, "learning_rate": 4.937593521755713e-05, "loss": 2.1136, "step": 168500 }, { "epoch": 0.04, "learning_rate": 4.937408339327689e-05, "loss": 2.1256, "step": 169000 }, { "epoch": 0.04, "learning_rate": 4.937223156899664e-05, "loss": 2.1167, "step": 169500 }, { "epoch": 0.04, "learning_rate": 4.9370379744716395e-05, "loss": 2.0798, "step": 170000 }, { "epoch": 0.04, "learning_rate": 4.9368527920436145e-05, "loss": 2.0936, "step": 170500 }, { "epoch": 0.04, "learning_rate": 4.936667609615591e-05, "loss": 2.0949, "step": 171000 }, { "epoch": 0.04, "learning_rate": 4.936482427187566e-05, "loss": 2.0507, "step": 171500 }, { "epoch": 0.04, "learning_rate": 4.936297244759541e-05, "loss": 2.1179, "step": 172000 }, { "epoch": 0.04, "learning_rate": 4.9361120623315166e-05, "loss": 2.122, "step": 172500 }, { "epoch": 0.04, "learning_rate": 4.9359268799034917e-05, "loss": 2.0264, "step": 173000 }, { "epoch": 0.04, "learning_rate": 4.9357416974754674e-05, "loss": 2.0497, "step": 173500 }, { "epoch": 0.04, "learning_rate": 4.935556515047443e-05, "loss": 2.0554, "step": 174000 }, { "epoch": 0.04, "learning_rate": 4.935371332619419e-05, "loss": 2.1613, "step": 174500 }, { "epoch": 0.04, "learning_rate": 4.935186150191394e-05, "loss": 2.0881, "step": 175000 }, { "epoch": 0.04, "learning_rate": 4.935000967763369e-05, "loss": 2.037, "step": 175500 }, { "epoch": 0.04, "learning_rate": 4.9348157853353445e-05, "loss": 2.0376, "step": 176000 }, { "epoch": 0.04, "learning_rate": 4.9346306029073195e-05, "loss": 2.044, "step": 176500 }, { "epoch": 0.04, "learning_rate": 4.934445420479296e-05, "loss": 2.0583, "step": 177000 }, { "epoch": 0.04, "learning_rate": 4.934260238051271e-05, "loss": 2.1067, "step": 177500 }, { "epoch": 0.04, "learning_rate": 4.934075055623246e-05, "loss": 2.0444, "step": 178000 }, { "epoch": 0.04, "learning_rate": 4.9338898731952216e-05, "loss": 2.0586, "step": 178500 }, { "epoch": 0.04, "learning_rate": 4.9337046907671966e-05, "loss": 2.0813, "step": 179000 }, { "epoch": 0.04, "learning_rate": 4.933519508339172e-05, "loss": 2.077, "step": 179500 }, { "epoch": 0.04, "learning_rate": 4.933334325911147e-05, "loss": 2.0301, "step": 180000 }, { "epoch": 0.04, "learning_rate": 4.933149143483124e-05, "loss": 2.0865, "step": 180500 }, { "epoch": 0.04, "learning_rate": 4.932963961055099e-05, "loss": 2.1051, "step": 181000 }, { "epoch": 0.04, "learning_rate": 4.932778778627074e-05, "loss": 2.1441, "step": 181500 }, { "epoch": 0.04, "learning_rate": 4.9325935961990494e-05, "loss": 2.0732, "step": 182000 }, { "epoch": 0.04, "learning_rate": 4.9324084137710244e-05, "loss": 2.1316, "step": 182500 }, { "epoch": 0.04, "learning_rate": 4.932223231343e-05, "loss": 2.1063, "step": 183000 }, { "epoch": 0.04, "learning_rate": 4.932038048914976e-05, "loss": 2.1785, "step": 183500 }, { "epoch": 0.04, "learning_rate": 4.9318528664869515e-05, "loss": 2.1108, "step": 184000 }, { "epoch": 0.04, "learning_rate": 4.9316676840589265e-05, "loss": 2.0622, "step": 184500 }, { "epoch": 0.04, "learning_rate": 4.9314825016309016e-05, "loss": 2.0539, "step": 185000 }, { "epoch": 0.04, "learning_rate": 4.931297319202877e-05, "loss": 2.0998, "step": 185500 }, { "epoch": 0.04, "learning_rate": 4.931112136774852e-05, "loss": 2.0329, "step": 186000 }, { "epoch": 0.04, "learning_rate": 4.9309269543468286e-05, "loss": 2.1002, "step": 186500 }, { "epoch": 0.04, "learning_rate": 4.9307417719188037e-05, "loss": 2.1119, "step": 187000 }, { "epoch": 0.04, "learning_rate": 4.9305565894907794e-05, "loss": 2.1018, "step": 187500 }, { "epoch": 0.04, "learning_rate": 4.9303714070627544e-05, "loss": 2.0742, "step": 188000 }, { "epoch": 0.04, "learning_rate": 4.9301862246347294e-05, "loss": 2.0729, "step": 188500 }, { "epoch": 0.04, "learning_rate": 4.930001042206705e-05, "loss": 2.0995, "step": 189000 }, { "epoch": 0.04, "learning_rate": 4.92981585977868e-05, "loss": 2.1262, "step": 189500 }, { "epoch": 0.04, "learning_rate": 4.9296306773506565e-05, "loss": 2.1104, "step": 190000 }, { "epoch": 0.04, "learning_rate": 4.9294454949226315e-05, "loss": 2.0692, "step": 190500 }, { "epoch": 0.04, "learning_rate": 4.929260312494607e-05, "loss": 2.0884, "step": 191000 }, { "epoch": 0.04, "learning_rate": 4.929075130066582e-05, "loss": 2.047, "step": 191500 }, { "epoch": 0.04, "learning_rate": 4.928889947638557e-05, "loss": 2.0389, "step": 192000 }, { "epoch": 0.04, "learning_rate": 4.928704765210533e-05, "loss": 2.091, "step": 192500 }, { "epoch": 0.04, "learning_rate": 4.9285195827825086e-05, "loss": 2.0903, "step": 193000 }, { "epoch": 0.04, "learning_rate": 4.928334400354484e-05, "loss": 2.0842, "step": 193500 }, { "epoch": 0.04, "learning_rate": 4.928149217926459e-05, "loss": 2.11, "step": 194000 }, { "epoch": 0.04, "learning_rate": 4.927964035498434e-05, "loss": 2.1346, "step": 194500 }, { "epoch": 0.04, "learning_rate": 4.92777885307041e-05, "loss": 2.0544, "step": 195000 }, { "epoch": 0.04, "learning_rate": 4.927593670642385e-05, "loss": 2.1018, "step": 195500 }, { "epoch": 0.04, "learning_rate": 4.927408488214361e-05, "loss": 2.1567, "step": 196000 }, { "epoch": 0.04, "learning_rate": 4.9272233057863364e-05, "loss": 2.038, "step": 196500 }, { "epoch": 0.04, "learning_rate": 4.927038123358312e-05, "loss": 2.1098, "step": 197000 }, { "epoch": 0.04, "learning_rate": 4.926852940930287e-05, "loss": 2.0882, "step": 197500 }, { "epoch": 0.04, "learning_rate": 4.926667758502262e-05, "loss": 2.1377, "step": 198000 }, { "epoch": 0.04, "learning_rate": 4.926482576074238e-05, "loss": 2.0804, "step": 198500 }, { "epoch": 0.04, "learning_rate": 4.926297393646213e-05, "loss": 2.0966, "step": 199000 }, { "epoch": 0.04, "learning_rate": 4.926112211218189e-05, "loss": 2.0618, "step": 199500 }, { "epoch": 0.04, "learning_rate": 4.925927028790164e-05, "loss": 2.0894, "step": 200000 }, { "epoch": 0.04, "learning_rate": 4.92574184636214e-05, "loss": 2.1082, "step": 200500 }, { "epoch": 0.04, "learning_rate": 4.925556663934115e-05, "loss": 2.0714, "step": 201000 }, { "epoch": 0.04, "learning_rate": 4.92537148150609e-05, "loss": 2.0899, "step": 201500 }, { "epoch": 0.04, "learning_rate": 4.925186299078066e-05, "loss": 2.1127, "step": 202000 }, { "epoch": 0.04, "learning_rate": 4.9250011166500414e-05, "loss": 2.0379, "step": 202500 }, { "epoch": 0.05, "learning_rate": 4.924815934222017e-05, "loss": 2.1386, "step": 203000 }, { "epoch": 0.05, "learning_rate": 4.924630751793992e-05, "loss": 2.1004, "step": 203500 }, { "epoch": 0.05, "learning_rate": 4.924445569365968e-05, "loss": 2.1193, "step": 204000 }, { "epoch": 0.05, "learning_rate": 4.924260386937943e-05, "loss": 2.0352, "step": 204500 }, { "epoch": 0.05, "learning_rate": 4.924075204509918e-05, "loss": 1.9945, "step": 205000 }, { "epoch": 0.05, "learning_rate": 4.9238900220818935e-05, "loss": 2.048, "step": 205500 }, { "epoch": 0.05, "learning_rate": 4.923704839653869e-05, "loss": 2.1062, "step": 206000 }, { "epoch": 0.05, "learning_rate": 4.923519657225845e-05, "loss": 2.1527, "step": 206500 }, { "epoch": 0.05, "learning_rate": 4.92333447479782e-05, "loss": 2.0849, "step": 207000 }, { "epoch": 0.05, "learning_rate": 4.923149292369795e-05, "loss": 2.1391, "step": 207500 }, { "epoch": 0.05, "learning_rate": 4.9229641099417706e-05, "loss": 2.1056, "step": 208000 }, { "epoch": 0.05, "learning_rate": 4.9227789275137457e-05, "loss": 2.0838, "step": 208500 }, { "epoch": 0.05, "learning_rate": 4.922593745085722e-05, "loss": 2.0591, "step": 209000 }, { "epoch": 0.05, "learning_rate": 4.922408562657697e-05, "loss": 2.0663, "step": 209500 }, { "epoch": 0.05, "learning_rate": 4.922223380229673e-05, "loss": 2.0937, "step": 210000 }, { "epoch": 0.05, "learning_rate": 4.922038197801648e-05, "loss": 2.069, "step": 210500 }, { "epoch": 0.05, "learning_rate": 4.921853015373623e-05, "loss": 2.1053, "step": 211000 }, { "epoch": 0.05, "learning_rate": 4.9216678329455985e-05, "loss": 2.1391, "step": 211500 }, { "epoch": 0.05, "learning_rate": 4.9214826505175735e-05, "loss": 2.1385, "step": 212000 }, { "epoch": 0.05, "learning_rate": 4.92129746808955e-05, "loss": 2.0814, "step": 212500 }, { "epoch": 0.05, "learning_rate": 4.921112285661525e-05, "loss": 2.0214, "step": 213000 }, { "epoch": 0.05, "learning_rate": 4.9209271032335006e-05, "loss": 2.0682, "step": 213500 }, { "epoch": 0.05, "learning_rate": 4.9207419208054756e-05, "loss": 2.0033, "step": 214000 }, { "epoch": 0.05, "learning_rate": 4.9205567383774506e-05, "loss": 2.1075, "step": 214500 }, { "epoch": 0.05, "learning_rate": 4.920371555949426e-05, "loss": 2.1303, "step": 215000 }, { "epoch": 0.05, "learning_rate": 4.920186373521402e-05, "loss": 2.0722, "step": 215500 }, { "epoch": 0.05, "learning_rate": 4.920001191093378e-05, "loss": 2.0589, "step": 216000 }, { "epoch": 0.05, "learning_rate": 4.919816008665353e-05, "loss": 2.1009, "step": 216500 }, { "epoch": 0.05, "learning_rate": 4.9196308262373284e-05, "loss": 2.0362, "step": 217000 }, { "epoch": 0.05, "learning_rate": 4.9194456438093034e-05, "loss": 2.0887, "step": 217500 }, { "epoch": 0.05, "learning_rate": 4.9192604613812784e-05, "loss": 2.0315, "step": 218000 }, { "epoch": 0.05, "learning_rate": 4.919075278953255e-05, "loss": 2.0048, "step": 218500 }, { "epoch": 0.05, "learning_rate": 4.91889009652523e-05, "loss": 2.0886, "step": 219000 }, { "epoch": 0.05, "learning_rate": 4.9187049140972055e-05, "loss": 2.1068, "step": 219500 }, { "epoch": 0.05, "learning_rate": 4.9185197316691805e-05, "loss": 2.0436, "step": 220000 }, { "epoch": 0.05, "learning_rate": 4.9183345492411555e-05, "loss": 2.1176, "step": 220500 }, { "epoch": 0.05, "learning_rate": 4.918149366813131e-05, "loss": 2.0312, "step": 221000 }, { "epoch": 0.05, "learning_rate": 4.917964184385106e-05, "loss": 2.0521, "step": 221500 }, { "epoch": 0.05, "learning_rate": 4.9177790019570826e-05, "loss": 2.1194, "step": 222000 }, { "epoch": 0.05, "learning_rate": 4.9175938195290577e-05, "loss": 2.1554, "step": 222500 }, { "epoch": 0.05, "learning_rate": 4.9174086371010333e-05, "loss": 2.0491, "step": 223000 }, { "epoch": 0.05, "learning_rate": 4.9172234546730084e-05, "loss": 2.1222, "step": 223500 }, { "epoch": 0.05, "learning_rate": 4.9170382722449834e-05, "loss": 2.1292, "step": 224000 }, { "epoch": 0.05, "learning_rate": 4.916853089816959e-05, "loss": 2.1109, "step": 224500 }, { "epoch": 0.05, "learning_rate": 4.916667907388935e-05, "loss": 2.1183, "step": 225000 }, { "epoch": 0.05, "learning_rate": 4.9164827249609105e-05, "loss": 2.0613, "step": 225500 }, { "epoch": 0.05, "learning_rate": 4.9162975425328855e-05, "loss": 2.1023, "step": 226000 }, { "epoch": 0.05, "learning_rate": 4.916112360104861e-05, "loss": 2.1123, "step": 226500 }, { "epoch": 0.05, "learning_rate": 4.915927177676836e-05, "loss": 2.0664, "step": 227000 }, { "epoch": 0.05, "learning_rate": 4.915741995248811e-05, "loss": 2.0816, "step": 227500 }, { "epoch": 0.05, "learning_rate": 4.9155568128207876e-05, "loss": 2.0749, "step": 228000 }, { "epoch": 0.05, "learning_rate": 4.9153716303927626e-05, "loss": 2.0996, "step": 228500 }, { "epoch": 0.05, "learning_rate": 4.915186447964738e-05, "loss": 2.0893, "step": 229000 }, { "epoch": 0.05, "learning_rate": 4.915001265536713e-05, "loss": 2.1056, "step": 229500 }, { "epoch": 0.05, "learning_rate": 4.914816083108689e-05, "loss": 2.0729, "step": 230000 }, { "epoch": 0.05, "learning_rate": 4.914630900680664e-05, "loss": 2.0735, "step": 230500 }, { "epoch": 0.05, "learning_rate": 4.914445718252639e-05, "loss": 2.1005, "step": 231000 }, { "epoch": 0.05, "learning_rate": 4.9142605358246154e-05, "loss": 2.144, "step": 231500 }, { "epoch": 0.05, "learning_rate": 4.9140753533965904e-05, "loss": 2.1338, "step": 232000 }, { "epoch": 0.05, "learning_rate": 4.913890170968566e-05, "loss": 2.0836, "step": 232500 }, { "epoch": 0.05, "learning_rate": 4.913704988540541e-05, "loss": 2.1221, "step": 233000 }, { "epoch": 0.05, "learning_rate": 4.913519806112517e-05, "loss": 2.0788, "step": 233500 }, { "epoch": 0.05, "learning_rate": 4.913334623684492e-05, "loss": 2.0521, "step": 234000 }, { "epoch": 0.05, "learning_rate": 4.9131494412564675e-05, "loss": 2.1534, "step": 234500 }, { "epoch": 0.05, "learning_rate": 4.912964258828443e-05, "loss": 2.1214, "step": 235000 }, { "epoch": 0.05, "learning_rate": 4.912779076400418e-05, "loss": 2.1256, "step": 235500 }, { "epoch": 0.05, "learning_rate": 4.912593893972394e-05, "loss": 2.1058, "step": 236000 }, { "epoch": 0.05, "learning_rate": 4.912408711544369e-05, "loss": 2.023, "step": 236500 }, { "epoch": 0.05, "learning_rate": 4.912223529116344e-05, "loss": 2.092, "step": 237000 }, { "epoch": 0.05, "learning_rate": 4.91203834668832e-05, "loss": 2.0712, "step": 237500 }, { "epoch": 0.05, "learning_rate": 4.9118531642602954e-05, "loss": 2.0955, "step": 238000 }, { "epoch": 0.05, "learning_rate": 4.911667981832271e-05, "loss": 2.088, "step": 238500 }, { "epoch": 0.05, "learning_rate": 4.911482799404246e-05, "loss": 2.0495, "step": 239000 }, { "epoch": 0.05, "learning_rate": 4.911297616976222e-05, "loss": 2.0736, "step": 239500 }, { "epoch": 0.05, "learning_rate": 4.911112434548197e-05, "loss": 2.0218, "step": 240000 }, { "epoch": 0.05, "learning_rate": 4.910927252120172e-05, "loss": 2.1898, "step": 240500 }, { "epoch": 0.05, "learning_rate": 4.910742069692148e-05, "loss": 2.1209, "step": 241000 }, { "epoch": 0.05, "learning_rate": 4.910556887264123e-05, "loss": 2.1103, "step": 241500 }, { "epoch": 0.05, "learning_rate": 4.910371704836099e-05, "loss": 2.1283, "step": 242000 }, { "epoch": 0.05, "learning_rate": 4.910186522408074e-05, "loss": 2.1047, "step": 242500 }, { "epoch": 0.05, "learning_rate": 4.9100013399800496e-05, "loss": 2.0582, "step": 243000 }, { "epoch": 0.05, "learning_rate": 4.9098161575520246e-05, "loss": 2.051, "step": 243500 }, { "epoch": 0.05, "learning_rate": 4.909630975124e-05, "loss": 2.1267, "step": 244000 }, { "epoch": 0.05, "learning_rate": 4.909445792695976e-05, "loss": 2.0628, "step": 244500 }, { "epoch": 0.05, "learning_rate": 4.909260610267951e-05, "loss": 2.0915, "step": 245000 }, { "epoch": 0.05, "learning_rate": 4.909075427839927e-05, "loss": 2.0771, "step": 245500 }, { "epoch": 0.05, "learning_rate": 4.908890245411902e-05, "loss": 2.0893, "step": 246000 }, { "epoch": 0.05, "learning_rate": 4.9087050629838774e-05, "loss": 2.0784, "step": 246500 }, { "epoch": 0.05, "learning_rate": 4.9085198805558525e-05, "loss": 2.0892, "step": 247000 }, { "epoch": 0.05, "learning_rate": 4.908334698127828e-05, "loss": 2.0572, "step": 247500 }, { "epoch": 0.06, "learning_rate": 4.908149515699804e-05, "loss": 2.0948, "step": 248000 }, { "epoch": 0.06, "learning_rate": 4.907964333271779e-05, "loss": 2.1058, "step": 248500 }, { "epoch": 0.06, "learning_rate": 4.9077791508437546e-05, "loss": 2.0597, "step": 249000 }, { "epoch": 0.06, "learning_rate": 4.9075939684157296e-05, "loss": 2.0909, "step": 249500 }, { "epoch": 0.06, "learning_rate": 4.9074087859877046e-05, "loss": 2.0595, "step": 250000 }, { "epoch": 0.06, "learning_rate": 4.907223603559681e-05, "loss": 2.0492, "step": 250500 }, { "epoch": 0.06, "learning_rate": 4.907038421131656e-05, "loss": 2.0546, "step": 251000 }, { "epoch": 0.06, "learning_rate": 4.906853238703632e-05, "loss": 2.1159, "step": 251500 }, { "epoch": 0.06, "learning_rate": 4.906668056275607e-05, "loss": 2.1385, "step": 252000 }, { "epoch": 0.06, "learning_rate": 4.9064828738475824e-05, "loss": 2.0528, "step": 252500 }, { "epoch": 0.06, "learning_rate": 4.9062976914195574e-05, "loss": 2.0894, "step": 253000 }, { "epoch": 0.06, "learning_rate": 4.906112508991533e-05, "loss": 2.0223, "step": 253500 }, { "epoch": 0.06, "learning_rate": 4.905927326563509e-05, "loss": 2.0927, "step": 254000 }, { "epoch": 0.06, "learning_rate": 4.905742144135484e-05, "loss": 2.0777, "step": 254500 }, { "epoch": 0.06, "learning_rate": 4.9055569617074595e-05, "loss": 2.0878, "step": 255000 }, { "epoch": 0.06, "learning_rate": 4.9053717792794345e-05, "loss": 2.1174, "step": 255500 }, { "epoch": 0.06, "learning_rate": 4.90518659685141e-05, "loss": 2.0705, "step": 256000 }, { "epoch": 0.06, "learning_rate": 4.905001414423385e-05, "loss": 2.0456, "step": 256500 }, { "epoch": 0.06, "learning_rate": 4.904816231995361e-05, "loss": 2.0628, "step": 257000 }, { "epoch": 0.06, "learning_rate": 4.9046310495673366e-05, "loss": 2.1184, "step": 257500 }, { "epoch": 0.06, "learning_rate": 4.9044458671393116e-05, "loss": 2.1294, "step": 258000 }, { "epoch": 0.06, "learning_rate": 4.904260684711287e-05, "loss": 2.078, "step": 258500 }, { "epoch": 0.06, "learning_rate": 4.9040755022832624e-05, "loss": 2.0868, "step": 259000 }, { "epoch": 0.06, "learning_rate": 4.903890319855238e-05, "loss": 2.0613, "step": 259500 }, { "epoch": 0.06, "learning_rate": 4.903705137427214e-05, "loss": 2.0518, "step": 260000 }, { "epoch": 0.06, "learning_rate": 4.903519954999189e-05, "loss": 2.0432, "step": 260500 }, { "epoch": 0.06, "learning_rate": 4.9033347725711645e-05, "loss": 2.0214, "step": 261000 }, { "epoch": 0.06, "learning_rate": 4.9031495901431395e-05, "loss": 2.0357, "step": 261500 }, { "epoch": 0.06, "learning_rate": 4.902964407715115e-05, "loss": 2.0896, "step": 262000 }, { "epoch": 0.06, "learning_rate": 4.90277922528709e-05, "loss": 2.0427, "step": 262500 }, { "epoch": 0.06, "learning_rate": 4.902594042859065e-05, "loss": 2.0874, "step": 263000 }, { "epoch": 0.06, "learning_rate": 4.9024088604310416e-05, "loss": 2.0956, "step": 263500 }, { "epoch": 0.06, "learning_rate": 4.9022236780030166e-05, "loss": 2.089, "step": 264000 }, { "epoch": 0.06, "learning_rate": 4.902038495574992e-05, "loss": 2.1201, "step": 264500 }, { "epoch": 0.06, "learning_rate": 4.901853313146967e-05, "loss": 2.0422, "step": 265000 }, { "epoch": 0.06, "learning_rate": 4.901668130718943e-05, "loss": 2.0893, "step": 265500 }, { "epoch": 0.06, "learning_rate": 4.901482948290918e-05, "loss": 2.0884, "step": 266000 }, { "epoch": 0.06, "learning_rate": 4.901297765862894e-05, "loss": 2.0242, "step": 266500 }, { "epoch": 0.06, "learning_rate": 4.9011125834348694e-05, "loss": 2.0888, "step": 267000 }, { "epoch": 0.06, "learning_rate": 4.9009274010068444e-05, "loss": 2.108, "step": 267500 }, { "epoch": 0.06, "learning_rate": 4.90074221857882e-05, "loss": 2.0993, "step": 268000 }, { "epoch": 0.06, "learning_rate": 4.900557036150795e-05, "loss": 2.0372, "step": 268500 }, { "epoch": 0.06, "learning_rate": 4.900371853722771e-05, "loss": 2.0594, "step": 269000 }, { "epoch": 0.06, "learning_rate": 4.9001866712947465e-05, "loss": 1.9995, "step": 269500 }, { "epoch": 0.06, "learning_rate": 4.9000014888667215e-05, "loss": 2.1141, "step": 270000 }, { "epoch": 0.06, "learning_rate": 4.899816306438697e-05, "loss": 2.0906, "step": 270500 }, { "epoch": 0.06, "learning_rate": 4.899631124010672e-05, "loss": 2.0663, "step": 271000 }, { "epoch": 0.06, "learning_rate": 4.899445941582648e-05, "loss": 2.0266, "step": 271500 }, { "epoch": 0.06, "learning_rate": 4.899260759154623e-05, "loss": 2.0186, "step": 272000 }, { "epoch": 0.06, "learning_rate": 4.8990755767265987e-05, "loss": 2.0313, "step": 272500 }, { "epoch": 0.06, "learning_rate": 4.8988903942985744e-05, "loss": 2.0928, "step": 273000 }, { "epoch": 0.06, "learning_rate": 4.8987052118705494e-05, "loss": 2.0998, "step": 273500 }, { "epoch": 0.06, "learning_rate": 4.898520029442525e-05, "loss": 2.0823, "step": 274000 }, { "epoch": 0.06, "learning_rate": 4.8983348470145e-05, "loss": 2.1264, "step": 274500 }, { "epoch": 0.06, "learning_rate": 4.898149664586476e-05, "loss": 2.1021, "step": 275000 }, { "epoch": 0.06, "learning_rate": 4.897964482158451e-05, "loss": 2.1273, "step": 275500 }, { "epoch": 0.06, "learning_rate": 4.8977792997304265e-05, "loss": 2.0928, "step": 276000 }, { "epoch": 0.06, "learning_rate": 4.897594117302402e-05, "loss": 2.0685, "step": 276500 }, { "epoch": 0.06, "learning_rate": 4.897408934874377e-05, "loss": 2.0811, "step": 277000 }, { "epoch": 0.06, "learning_rate": 4.897223752446353e-05, "loss": 2.081, "step": 277500 }, { "epoch": 0.06, "learning_rate": 4.897038570018328e-05, "loss": 2.0601, "step": 278000 }, { "epoch": 0.06, "learning_rate": 4.8968533875903036e-05, "loss": 2.1053, "step": 278500 }, { "epoch": 0.06, "learning_rate": 4.896668205162279e-05, "loss": 2.1081, "step": 279000 }, { "epoch": 0.06, "learning_rate": 4.896483022734254e-05, "loss": 2.098, "step": 279500 }, { "epoch": 0.06, "learning_rate": 4.89629784030623e-05, "loss": 2.1769, "step": 280000 }, { "epoch": 0.06, "learning_rate": 4.896112657878205e-05, "loss": 2.083, "step": 280500 }, { "epoch": 0.06, "learning_rate": 4.895927475450181e-05, "loss": 2.0933, "step": 281000 }, { "epoch": 0.06, "learning_rate": 4.895742293022156e-05, "loss": 2.1113, "step": 281500 }, { "epoch": 0.06, "learning_rate": 4.8955571105941314e-05, "loss": 2.1, "step": 282000 }, { "epoch": 0.06, "learning_rate": 4.895371928166107e-05, "loss": 2.062, "step": 282500 }, { "epoch": 0.06, "learning_rate": 4.895186745738082e-05, "loss": 2.1055, "step": 283000 }, { "epoch": 0.06, "learning_rate": 4.895001563310058e-05, "loss": 2.018, "step": 283500 }, { "epoch": 0.06, "learning_rate": 4.894816380882033e-05, "loss": 2.0741, "step": 284000 }, { "epoch": 0.06, "learning_rate": 4.8946311984540086e-05, "loss": 2.1163, "step": 284500 }, { "epoch": 0.06, "learning_rate": 4.8944460160259836e-05, "loss": 2.0718, "step": 285000 }, { "epoch": 0.06, "learning_rate": 4.894260833597959e-05, "loss": 2.0572, "step": 285500 }, { "epoch": 0.06, "learning_rate": 4.894075651169935e-05, "loss": 2.0786, "step": 286000 }, { "epoch": 0.06, "learning_rate": 4.89389046874191e-05, "loss": 2.1539, "step": 286500 }, { "epoch": 0.06, "learning_rate": 4.893705286313886e-05, "loss": 2.0523, "step": 287000 }, { "epoch": 0.06, "learning_rate": 4.893520103885861e-05, "loss": 2.0635, "step": 287500 }, { "epoch": 0.06, "learning_rate": 4.8933349214578364e-05, "loss": 2.0962, "step": 288000 }, { "epoch": 0.06, "learning_rate": 4.8931497390298114e-05, "loss": 2.1501, "step": 288500 }, { "epoch": 0.06, "learning_rate": 4.892964556601787e-05, "loss": 2.0864, "step": 289000 }, { "epoch": 0.06, "learning_rate": 4.892779374173763e-05, "loss": 2.0495, "step": 289500 }, { "epoch": 0.06, "learning_rate": 4.892594191745738e-05, "loss": 2.0418, "step": 290000 }, { "epoch": 0.06, "learning_rate": 4.8924090093177135e-05, "loss": 2.1161, "step": 290500 }, { "epoch": 0.06, "learning_rate": 4.8922238268896885e-05, "loss": 2.0743, "step": 291000 }, { "epoch": 0.06, "learning_rate": 4.892038644461664e-05, "loss": 2.1056, "step": 291500 }, { "epoch": 0.06, "learning_rate": 4.89185346203364e-05, "loss": 2.0449, "step": 292000 }, { "epoch": 0.06, "learning_rate": 4.891668279605615e-05, "loss": 2.1395, "step": 292500 }, { "epoch": 0.07, "learning_rate": 4.8914830971775906e-05, "loss": 2.1422, "step": 293000 }, { "epoch": 0.07, "learning_rate": 4.8912979147495656e-05, "loss": 2.0949, "step": 293500 }, { "epoch": 0.07, "learning_rate": 4.891112732321541e-05, "loss": 2.0853, "step": 294000 }, { "epoch": 0.07, "learning_rate": 4.8909275498935163e-05, "loss": 2.0809, "step": 294500 }, { "epoch": 0.07, "learning_rate": 4.890742367465492e-05, "loss": 2.0908, "step": 295000 }, { "epoch": 0.07, "learning_rate": 4.890557185037468e-05, "loss": 1.9894, "step": 295500 }, { "epoch": 0.07, "learning_rate": 4.890372002609443e-05, "loss": 2.1431, "step": 296000 }, { "epoch": 0.07, "learning_rate": 4.8901868201814184e-05, "loss": 2.0502, "step": 296500 }, { "epoch": 0.07, "learning_rate": 4.8900016377533935e-05, "loss": 2.0828, "step": 297000 }, { "epoch": 0.07, "learning_rate": 4.889816455325369e-05, "loss": 1.9749, "step": 297500 }, { "epoch": 0.07, "learning_rate": 4.889631272897344e-05, "loss": 2.091, "step": 298000 }, { "epoch": 0.07, "learning_rate": 4.88944609046932e-05, "loss": 2.0509, "step": 298500 }, { "epoch": 0.07, "learning_rate": 4.8892609080412956e-05, "loss": 2.0141, "step": 299000 }, { "epoch": 0.07, "learning_rate": 4.8890757256132706e-05, "loss": 2.1212, "step": 299500 }, { "epoch": 0.07, "learning_rate": 4.888890543185246e-05, "loss": 2.1005, "step": 300000 }, { "epoch": 0.07, "learning_rate": 4.888705360757221e-05, "loss": 2.1135, "step": 300500 }, { "epoch": 0.07, "learning_rate": 4.888520178329197e-05, "loss": 2.0971, "step": 301000 }, { "epoch": 0.07, "learning_rate": 4.888334995901173e-05, "loss": 2.0434, "step": 301500 }, { "epoch": 0.07, "learning_rate": 4.888149813473148e-05, "loss": 2.0563, "step": 302000 }, { "epoch": 0.07, "learning_rate": 4.8879646310451234e-05, "loss": 2.0624, "step": 302500 }, { "epoch": 0.07, "learning_rate": 4.8877794486170984e-05, "loss": 2.0478, "step": 303000 }, { "epoch": 0.07, "learning_rate": 4.887594266189074e-05, "loss": 2.058, "step": 303500 }, { "epoch": 0.07, "learning_rate": 4.887409083761049e-05, "loss": 2.0822, "step": 304000 }, { "epoch": 0.07, "learning_rate": 4.887223901333025e-05, "loss": 2.0673, "step": 304500 }, { "epoch": 0.07, "learning_rate": 4.8870387189050005e-05, "loss": 2.0657, "step": 305000 }, { "epoch": 0.07, "learning_rate": 4.8868535364769755e-05, "loss": 2.0843, "step": 305500 }, { "epoch": 0.07, "learning_rate": 4.886668354048951e-05, "loss": 2.068, "step": 306000 }, { "epoch": 0.07, "learning_rate": 4.886483171620926e-05, "loss": 2.0716, "step": 306500 }, { "epoch": 0.07, "learning_rate": 4.886297989192902e-05, "loss": 2.0293, "step": 307000 }, { "epoch": 0.07, "learning_rate": 4.886112806764877e-05, "loss": 2.0616, "step": 307500 }, { "epoch": 0.07, "learning_rate": 4.8859276243368526e-05, "loss": 2.0941, "step": 308000 }, { "epoch": 0.07, "learning_rate": 4.8857424419088283e-05, "loss": 1.9884, "step": 308500 }, { "epoch": 0.07, "learning_rate": 4.8855572594808034e-05, "loss": 2.0897, "step": 309000 }, { "epoch": 0.07, "learning_rate": 4.885372077052779e-05, "loss": 2.0541, "step": 309500 }, { "epoch": 0.07, "learning_rate": 4.885186894624754e-05, "loss": 1.9981, "step": 310000 }, { "epoch": 0.07, "learning_rate": 4.88500171219673e-05, "loss": 2.0618, "step": 310500 }, { "epoch": 0.07, "learning_rate": 4.8848165297687055e-05, "loss": 2.0483, "step": 311000 }, { "epoch": 0.07, "learning_rate": 4.8846313473406805e-05, "loss": 2.1106, "step": 311500 }, { "epoch": 0.07, "learning_rate": 4.884446164912656e-05, "loss": 2.0483, "step": 312000 }, { "epoch": 0.07, "learning_rate": 4.884260982484631e-05, "loss": 2.0277, "step": 312500 }, { "epoch": 0.07, "learning_rate": 4.884075800056607e-05, "loss": 1.9978, "step": 313000 }, { "epoch": 0.07, "learning_rate": 4.883890617628582e-05, "loss": 2.0311, "step": 313500 }, { "epoch": 0.07, "learning_rate": 4.8837054352005576e-05, "loss": 2.059, "step": 314000 }, { "epoch": 0.07, "learning_rate": 4.883520252772533e-05, "loss": 2.0999, "step": 314500 }, { "epoch": 0.07, "learning_rate": 4.883335070344508e-05, "loss": 2.0588, "step": 315000 }, { "epoch": 0.07, "learning_rate": 4.883149887916484e-05, "loss": 2.0898, "step": 315500 }, { "epoch": 0.07, "learning_rate": 4.882964705488459e-05, "loss": 2.1084, "step": 316000 }, { "epoch": 0.07, "learning_rate": 4.882779523060435e-05, "loss": 2.1465, "step": 316500 }, { "epoch": 0.07, "learning_rate": 4.88259434063241e-05, "loss": 2.105, "step": 317000 }, { "epoch": 0.07, "learning_rate": 4.8824091582043854e-05, "loss": 2.0882, "step": 317500 }, { "epoch": 0.07, "learning_rate": 4.882223975776361e-05, "loss": 2.0737, "step": 318000 }, { "epoch": 0.07, "learning_rate": 4.882038793348336e-05, "loss": 2.1274, "step": 318500 }, { "epoch": 0.07, "learning_rate": 4.881853610920312e-05, "loss": 2.0419, "step": 319000 }, { "epoch": 0.07, "learning_rate": 4.881668428492287e-05, "loss": 2.0014, "step": 319500 }, { "epoch": 0.07, "learning_rate": 4.8814832460642625e-05, "loss": 2.0236, "step": 320000 }, { "epoch": 0.07, "learning_rate": 4.881298063636238e-05, "loss": 2.1488, "step": 320500 }, { "epoch": 0.07, "learning_rate": 4.881112881208213e-05, "loss": 2.0795, "step": 321000 }, { "epoch": 0.07, "learning_rate": 4.880927698780189e-05, "loss": 2.0669, "step": 321500 }, { "epoch": 0.07, "learning_rate": 4.880742516352164e-05, "loss": 2.1564, "step": 322000 }, { "epoch": 0.07, "learning_rate": 4.88055733392414e-05, "loss": 2.0505, "step": 322500 }, { "epoch": 0.07, "learning_rate": 4.880372151496115e-05, "loss": 2.0582, "step": 323000 }, { "epoch": 0.07, "learning_rate": 4.8801869690680904e-05, "loss": 2.1198, "step": 323500 }, { "epoch": 0.07, "learning_rate": 4.880001786640066e-05, "loss": 2.0779, "step": 324000 }, { "epoch": 0.07, "learning_rate": 4.879816604212041e-05, "loss": 2.098, "step": 324500 }, { "epoch": 0.07, "learning_rate": 4.879631421784017e-05, "loss": 2.068, "step": 325000 }, { "epoch": 0.07, "learning_rate": 4.879446239355992e-05, "loss": 2.0426, "step": 325500 }, { "epoch": 0.07, "learning_rate": 4.8792610569279675e-05, "loss": 2.0374, "step": 326000 }, { "epoch": 0.07, "learning_rate": 4.8790758744999425e-05, "loss": 2.0615, "step": 326500 }, { "epoch": 0.07, "learning_rate": 4.878890692071918e-05, "loss": 2.1002, "step": 327000 }, { "epoch": 0.07, "learning_rate": 4.878705509643894e-05, "loss": 2.1018, "step": 327500 }, { "epoch": 0.07, "learning_rate": 4.878520327215869e-05, "loss": 2.0349, "step": 328000 }, { "epoch": 0.07, "learning_rate": 4.8783351447878446e-05, "loss": 2.0651, "step": 328500 }, { "epoch": 0.07, "learning_rate": 4.8781499623598196e-05, "loss": 1.9743, "step": 329000 }, { "epoch": 0.07, "learning_rate": 4.877964779931795e-05, "loss": 2.0576, "step": 329500 }, { "epoch": 0.07, "learning_rate": 4.877779597503771e-05, "loss": 2.0568, "step": 330000 }, { "epoch": 0.07, "learning_rate": 4.877594415075746e-05, "loss": 2.0538, "step": 330500 }, { "epoch": 0.07, "learning_rate": 4.877409232647722e-05, "loss": 2.0727, "step": 331000 }, { "epoch": 0.07, "learning_rate": 4.877224050219697e-05, "loss": 2.1174, "step": 331500 }, { "epoch": 0.07, "learning_rate": 4.8770388677916724e-05, "loss": 2.1466, "step": 332000 }, { "epoch": 0.07, "learning_rate": 4.8768536853636475e-05, "loss": 2.1125, "step": 332500 }, { "epoch": 0.07, "learning_rate": 4.876668502935623e-05, "loss": 2.0779, "step": 333000 }, { "epoch": 0.07, "learning_rate": 4.876483320507599e-05, "loss": 2.065, "step": 333500 }, { "epoch": 0.07, "learning_rate": 4.876298138079574e-05, "loss": 2.0613, "step": 334000 }, { "epoch": 0.07, "learning_rate": 4.8761129556515496e-05, "loss": 2.1109, "step": 334500 }, { "epoch": 0.07, "learning_rate": 4.8759277732235246e-05, "loss": 2.0144, "step": 335000 }, { "epoch": 0.07, "learning_rate": 4.8757425907955e-05, "loss": 2.1222, "step": 335500 }, { "epoch": 0.07, "learning_rate": 4.875557408367475e-05, "loss": 2.0914, "step": 336000 }, { "epoch": 0.07, "learning_rate": 4.875372225939451e-05, "loss": 2.0208, "step": 336500 }, { "epoch": 0.07, "learning_rate": 4.875187043511427e-05, "loss": 2.0179, "step": 337000 }, { "epoch": 0.07, "learning_rate": 4.875001861083402e-05, "loss": 2.0414, "step": 337500 }, { "epoch": 0.08, "learning_rate": 4.8748166786553774e-05, "loss": 2.1039, "step": 338000 }, { "epoch": 0.08, "learning_rate": 4.8746314962273524e-05, "loss": 2.0596, "step": 338500 }, { "epoch": 0.08, "learning_rate": 4.874446313799328e-05, "loss": 2.0743, "step": 339000 }, { "epoch": 0.08, "learning_rate": 4.874261131371303e-05, "loss": 2.0327, "step": 339500 }, { "epoch": 0.08, "learning_rate": 4.874075948943279e-05, "loss": 2.0576, "step": 340000 }, { "epoch": 0.08, "learning_rate": 4.8738907665152545e-05, "loss": 2.0559, "step": 340500 }, { "epoch": 0.08, "learning_rate": 4.8737055840872295e-05, "loss": 2.042, "step": 341000 }, { "epoch": 0.08, "learning_rate": 4.873520401659205e-05, "loss": 2.0752, "step": 341500 }, { "epoch": 0.08, "learning_rate": 4.87333521923118e-05, "loss": 2.0516, "step": 342000 }, { "epoch": 0.08, "learning_rate": 4.873150036803156e-05, "loss": 2.1037, "step": 342500 }, { "epoch": 0.08, "learning_rate": 4.8729648543751316e-05, "loss": 2.1216, "step": 343000 }, { "epoch": 0.08, "learning_rate": 4.8727796719471066e-05, "loss": 2.0398, "step": 343500 }, { "epoch": 0.08, "learning_rate": 4.872594489519082e-05, "loss": 2.0446, "step": 344000 }, { "epoch": 0.08, "learning_rate": 4.8724093070910574e-05, "loss": 2.0327, "step": 344500 }, { "epoch": 0.08, "learning_rate": 4.872224124663033e-05, "loss": 2.0941, "step": 345000 }, { "epoch": 0.08, "learning_rate": 4.872038942235008e-05, "loss": 1.995, "step": 345500 }, { "epoch": 0.08, "learning_rate": 4.8718537598069844e-05, "loss": 2.0341, "step": 346000 }, { "epoch": 0.08, "learning_rate": 4.8716685773789595e-05, "loss": 1.9983, "step": 346500 }, { "epoch": 0.08, "learning_rate": 4.8714833949509345e-05, "loss": 2.0519, "step": 347000 }, { "epoch": 0.08, "learning_rate": 4.87129821252291e-05, "loss": 2.0699, "step": 347500 }, { "epoch": 0.08, "learning_rate": 4.871113030094885e-05, "loss": 2.0541, "step": 348000 }, { "epoch": 0.08, "learning_rate": 4.870927847666861e-05, "loss": 2.1049, "step": 348500 }, { "epoch": 0.08, "learning_rate": 4.870742665238836e-05, "loss": 2.0521, "step": 349000 }, { "epoch": 0.08, "learning_rate": 4.8705574828108116e-05, "loss": 2.0555, "step": 349500 }, { "epoch": 0.08, "learning_rate": 4.870372300382787e-05, "loss": 2.0386, "step": 350000 }, { "epoch": 0.08, "learning_rate": 4.870187117954762e-05, "loss": 2.0703, "step": 350500 }, { "epoch": 0.08, "learning_rate": 4.870001935526738e-05, "loss": 2.1147, "step": 351000 }, { "epoch": 0.08, "learning_rate": 4.869816753098713e-05, "loss": 2.025, "step": 351500 }, { "epoch": 0.08, "learning_rate": 4.869631570670689e-05, "loss": 2.1052, "step": 352000 }, { "epoch": 0.08, "learning_rate": 4.8694463882426644e-05, "loss": 2.1232, "step": 352500 }, { "epoch": 0.08, "learning_rate": 4.8692612058146394e-05, "loss": 2.0589, "step": 353000 }, { "epoch": 0.08, "learning_rate": 4.869076023386615e-05, "loss": 2.0957, "step": 353500 }, { "epoch": 0.08, "learning_rate": 4.86889084095859e-05, "loss": 2.1189, "step": 354000 }, { "epoch": 0.08, "learning_rate": 4.868705658530566e-05, "loss": 2.0901, "step": 354500 }, { "epoch": 0.08, "learning_rate": 4.868520476102541e-05, "loss": 2.0272, "step": 355000 }, { "epoch": 0.08, "learning_rate": 4.868335293674517e-05, "loss": 2.0451, "step": 355500 }, { "epoch": 0.08, "learning_rate": 4.868150111246492e-05, "loss": 2.0311, "step": 356000 }, { "epoch": 0.08, "learning_rate": 4.867964928818467e-05, "loss": 2.0617, "step": 356500 }, { "epoch": 0.0, "learning_rate": 4.867779746390443e-05, "loss": 2.0693, "step": 357000 }, { "epoch": 0.0, "learning_rate": 4.867594563962418e-05, "loss": 2.1253, "step": 357500 }, { "epoch": 0.0, "learning_rate": 4.8674093815343937e-05, "loss": 2.0701, "step": 358000 }, { "epoch": 0.0, "learning_rate": 4.867224199106369e-05, "loss": 2.0521, "step": 358500 }, { "epoch": 0.0, "learning_rate": 4.867039016678345e-05, "loss": 2.0298, "step": 359000 }, { "epoch": 0.0, "learning_rate": 4.86685383425032e-05, "loss": 2.0321, "step": 359500 }, { "epoch": 0.0, "learning_rate": 4.866668651822295e-05, "loss": 2.0373, "step": 360000 }, { "epoch": 0.0, "learning_rate": 4.866483469394271e-05, "loss": 2.0453, "step": 360500 }, { "epoch": 0.0, "learning_rate": 4.866298286966246e-05, "loss": 2.0423, "step": 361000 }, { "epoch": 0.0, "learning_rate": 4.8661131045382215e-05, "loss": 2.0792, "step": 361500 }, { "epoch": 0.0, "learning_rate": 4.865927922110197e-05, "loss": 2.1337, "step": 362000 }, { "epoch": 0.0, "learning_rate": 4.865742739682172e-05, "loss": 1.9964, "step": 362500 }, { "epoch": 0.0, "learning_rate": 4.865557557254148e-05, "loss": 2.0639, "step": 363000 }, { "epoch": 0.0, "learning_rate": 4.865372374826123e-05, "loss": 2.0159, "step": 363500 }, { "epoch": 0.0, "learning_rate": 4.8651871923980986e-05, "loss": 2.0956, "step": 364000 }, { "epoch": 0.0, "learning_rate": 4.8650020099700736e-05, "loss": 2.0309, "step": 364500 }, { "epoch": 0.0, "learning_rate": 4.864816827542049e-05, "loss": 2.0339, "step": 365000 }, { "epoch": 0.0, "learning_rate": 4.864631645114025e-05, "loss": 2.0868, "step": 365500 }, { "epoch": 0.0, "learning_rate": 4.864446462686e-05, "loss": 1.9837, "step": 366000 }, { "epoch": 0.0, "learning_rate": 4.864261280257976e-05, "loss": 2.0658, "step": 366500 }, { "epoch": 0.0, "learning_rate": 4.864076097829951e-05, "loss": 1.9948, "step": 367000 }, { "epoch": 0.0, "learning_rate": 4.8638909154019264e-05, "loss": 2.0789, "step": 367500 }, { "epoch": 0.0, "learning_rate": 4.8637057329739015e-05, "loss": 2.0611, "step": 368000 }, { "epoch": 0.0, "learning_rate": 4.863520550545878e-05, "loss": 2.0874, "step": 368500 }, { "epoch": 0.0, "learning_rate": 4.863335368117853e-05, "loss": 2.0583, "step": 369000 }, { "epoch": 0.0, "learning_rate": 4.863150185689828e-05, "loss": 2.0309, "step": 369500 }, { "epoch": 0.0, "learning_rate": 4.8629650032618036e-05, "loss": 2.0677, "step": 370000 }, { "epoch": 0.0, "learning_rate": 4.8627798208337786e-05, "loss": 1.9741, "step": 370500 }, { "epoch": 0.0, "learning_rate": 4.862594638405754e-05, "loss": 2.0508, "step": 371000 }, { "epoch": 0.0, "learning_rate": 4.86240945597773e-05, "loss": 2.0375, "step": 371500 }, { "epoch": 0.0, "learning_rate": 4.8622242735497057e-05, "loss": 2.0502, "step": 372000 }, { "epoch": 0.0, "learning_rate": 4.862039091121681e-05, "loss": 2.0519, "step": 372500 }, { "epoch": 0.0, "learning_rate": 4.861853908693656e-05, "loss": 1.9897, "step": 373000 }, { "epoch": 0.0, "learning_rate": 4.8616687262656314e-05, "loss": 2.0186, "step": 373500 }, { "epoch": 0.0, "learning_rate": 4.8614835438376064e-05, "loss": 2.031, "step": 374000 }, { "epoch": 0.0, "learning_rate": 4.861298361409582e-05, "loss": 2.0009, "step": 374500 }, { "epoch": 0.0, "learning_rate": 4.861113178981558e-05, "loss": 2.0465, "step": 375000 }, { "epoch": 0.0, "learning_rate": 4.8609279965535335e-05, "loss": 2.0583, "step": 375500 }, { "epoch": 0.0, "learning_rate": 4.8607428141255085e-05, "loss": 1.9984, "step": 376000 }, { "epoch": 0.0, "learning_rate": 4.8605576316974835e-05, "loss": 2.0159, "step": 376500 }, { "epoch": 0.0, "learning_rate": 4.860372449269459e-05, "loss": 2.0457, "step": 377000 }, { "epoch": 0.0, "learning_rate": 4.860187266841434e-05, "loss": 2.0298, "step": 377500 }, { "epoch": 0.0, "learning_rate": 4.8600020844134106e-05, "loss": 2.027, "step": 378000 }, { "epoch": 0.0, "learning_rate": 4.8598169019853856e-05, "loss": 2.1008, "step": 378500 }, { "epoch": 0.0, "learning_rate": 4.8596317195573606e-05, "loss": 2.0358, "step": 379000 }, { "epoch": 0.01, "learning_rate": 4.859446537129336e-05, "loss": 2.0131, "step": 379500 }, { "epoch": 0.01, "learning_rate": 4.8592613547013113e-05, "loss": 2.0221, "step": 380000 }, { "epoch": 0.01, "learning_rate": 4.859076172273287e-05, "loss": 2.0612, "step": 380500 }, { "epoch": 0.01, "learning_rate": 4.858890989845263e-05, "loss": 1.9445, "step": 381000 }, { "epoch": 0.01, "learning_rate": 4.8587058074172384e-05, "loss": 2.0688, "step": 381500 }, { "epoch": 0.01, "learning_rate": 4.8585206249892134e-05, "loss": 2.0329, "step": 382000 }, { "epoch": 0.01, "learning_rate": 4.8583354425611885e-05, "loss": 1.9824, "step": 382500 }, { "epoch": 0.01, "learning_rate": 4.858150260133164e-05, "loss": 2.0359, "step": 383000 }, { "epoch": 0.01, "learning_rate": 4.857965077705139e-05, "loss": 2.0619, "step": 383500 }, { "epoch": 0.01, "learning_rate": 4.857779895277115e-05, "loss": 1.9951, "step": 384000 }, { "epoch": 0.01, "learning_rate": 4.8575947128490906e-05, "loss": 2.0312, "step": 384500 }, { "epoch": 0.01, "learning_rate": 4.857409530421066e-05, "loss": 1.8873, "step": 385000 }, { "epoch": 0.01, "learning_rate": 4.857224347993041e-05, "loss": 1.9459, "step": 385500 }, { "epoch": 0.01, "learning_rate": 4.857039165565016e-05, "loss": 2.0782, "step": 386000 }, { "epoch": 0.01, "learning_rate": 4.856853983136992e-05, "loss": 1.974, "step": 386500 }, { "epoch": 0.01, "learning_rate": 4.856668800708967e-05, "loss": 2.0018, "step": 387000 }, { "epoch": 0.01, "learning_rate": 4.8564836182809434e-05, "loss": 2.0126, "step": 387500 }, { "epoch": 0.01, "learning_rate": 4.8562984358529184e-05, "loss": 1.999, "step": 388000 }, { "epoch": 0.0, "learning_rate": 4.856113253424894e-05, "loss": 1.5514, "step": 388500 }, { "epoch": 0.0, "learning_rate": 4.855928070996869e-05, "loss": 1.5872, "step": 389000 }, { "epoch": 0.0, "learning_rate": 4.855742888568844e-05, "loss": 1.5182, "step": 389500 }, { "epoch": 0.0, "learning_rate": 4.85555770614082e-05, "loss": 1.4703, "step": 390000 }, { "epoch": 0.0, "learning_rate": 4.855372523712795e-05, "loss": 1.454, "step": 390500 }, { "epoch": 0.0, "learning_rate": 4.855187341284771e-05, "loss": 1.4345, "step": 391000 }, { "epoch": 0.0, "learning_rate": 4.855002158856746e-05, "loss": 1.433, "step": 391500 }, { "epoch": 0.0, "learning_rate": 4.854816976428721e-05, "loss": 1.4309, "step": 392000 }, { "epoch": 0.0, "learning_rate": 4.854631794000697e-05, "loss": 1.3785, "step": 392500 }, { "epoch": 0.0, "learning_rate": 4.854446611572672e-05, "loss": 1.4492, "step": 393000 }, { "epoch": 0.0, "learning_rate": 4.8542614291446476e-05, "loss": 1.4585, "step": 393500 }, { "epoch": 0.0, "learning_rate": 4.8540762467166233e-05, "loss": 1.3263, "step": 394000 }, { "epoch": 0.0, "learning_rate": 4.853891064288599e-05, "loss": 1.411, "step": 394500 }, { "epoch": 0.0, "learning_rate": 4.853705881860574e-05, "loss": 1.3492, "step": 395000 }, { "epoch": 0.0, "learning_rate": 4.853520699432549e-05, "loss": 1.4024, "step": 395500 }, { "epoch": 0.0, "learning_rate": 4.853335517004525e-05, "loss": 1.3816, "step": 396000 }, { "epoch": 0.0, "learning_rate": 4.8531503345765e-05, "loss": 1.3385, "step": 396500 }, { "epoch": 0.0, "learning_rate": 4.852965152148476e-05, "loss": 1.4043, "step": 397000 }, { "epoch": 0.0, "learning_rate": 4.852779969720451e-05, "loss": 1.3078, "step": 397500 }, { "epoch": 0.0, "learning_rate": 4.852594787292427e-05, "loss": 1.3505, "step": 398000 }, { "epoch": 0.0, "learning_rate": 4.852409604864402e-05, "loss": 1.3205, "step": 398500 }, { "epoch": 0.0, "learning_rate": 4.852224422436377e-05, "loss": 1.3744, "step": 399000 }, { "epoch": 0.0, "learning_rate": 4.8520392400083526e-05, "loss": 1.3697, "step": 399500 }, { "epoch": 0.0, "learning_rate": 4.8518540575803276e-05, "loss": 1.3913, "step": 400000 }, { "epoch": 0.0, "learning_rate": 4.851668875152304e-05, "loss": 1.3664, "step": 400500 }, { "epoch": 0.0, "learning_rate": 4.851483692724279e-05, "loss": 1.3093, "step": 401000 }, { "epoch": 0.0, "learning_rate": 4.851298510296255e-05, "loss": 1.3701, "step": 401500 }, { "epoch": 0.0, "learning_rate": 4.85111332786823e-05, "loss": 1.2745, "step": 402000 }, { "epoch": 0.0, "learning_rate": 4.850928145440205e-05, "loss": 1.3443, "step": 402500 }, { "epoch": 0.0, "learning_rate": 4.8507429630121804e-05, "loss": 1.3313, "step": 403000 }, { "epoch": 0.0, "learning_rate": 4.850557780584156e-05, "loss": 1.3441, "step": 403500 }, { "epoch": 0.0, "learning_rate": 4.850372598156132e-05, "loss": 1.3171, "step": 404000 }, { "epoch": 0.0, "learning_rate": 4.850187415728107e-05, "loss": 1.2825, "step": 404500 }, { "epoch": 0.0, "learning_rate": 4.850002233300082e-05, "loss": 1.317, "step": 405000 }, { "epoch": 0.0, "learning_rate": 4.8498170508720575e-05, "loss": 1.313, "step": 405500 }, { "epoch": 0.0, "learning_rate": 4.8496318684440326e-05, "loss": 1.2906, "step": 406000 }, { "epoch": 0.0, "learning_rate": 4.849446686016009e-05, "loss": 1.3497, "step": 406500 }, { "epoch": 0.0, "learning_rate": 4.849261503587984e-05, "loss": 1.352, "step": 407000 }, { "epoch": 0.0, "learning_rate": 4.8490763211599596e-05, "loss": 1.3175, "step": 407500 }, { "epoch": 0.0, "learning_rate": 4.848891138731935e-05, "loss": 1.336, "step": 408000 }, { "epoch": 0.0, "learning_rate": 4.84870595630391e-05, "loss": 1.3383, "step": 408500 }, { "epoch": 0.0, "learning_rate": 4.8485207738758854e-05, "loss": 1.3373, "step": 409000 }, { "epoch": 0.0, "learning_rate": 4.8483355914478604e-05, "loss": 1.3319, "step": 409500 }, { "epoch": 0.0, "learning_rate": 4.848150409019837e-05, "loss": 1.386, "step": 410000 }, { "epoch": 0.0, "learning_rate": 4.847965226591812e-05, "loss": 1.3308, "step": 410500 }, { "epoch": 0.01, "learning_rate": 4.8477800441637875e-05, "loss": 1.306, "step": 411000 }, { "epoch": 0.01, "learning_rate": 4.8475948617357625e-05, "loss": 1.3129, "step": 411500 }, { "epoch": 0.01, "learning_rate": 4.8474096793077375e-05, "loss": 1.3699, "step": 412000 }, { "epoch": 0.01, "learning_rate": 4.847224496879713e-05, "loss": 1.2743, "step": 412500 }, { "epoch": 0.01, "learning_rate": 4.847039314451689e-05, "loss": 1.3443, "step": 413000 }, { "epoch": 0.01, "learning_rate": 4.8468541320236646e-05, "loss": 1.355, "step": 413500 }, { "epoch": 0.01, "learning_rate": 4.8466689495956396e-05, "loss": 1.2891, "step": 414000 }, { "epoch": 0.01, "learning_rate": 4.846483767167615e-05, "loss": 1.3233, "step": 414500 }, { "epoch": 0.01, "learning_rate": 4.84629858473959e-05, "loss": 1.3748, "step": 415000 }, { "epoch": 0.01, "learning_rate": 4.846113402311565e-05, "loss": 1.3159, "step": 415500 }, { "epoch": 0.01, "learning_rate": 4.845928219883541e-05, "loss": 1.3326, "step": 416000 }, { "epoch": 0.01, "learning_rate": 4.845743037455517e-05, "loss": 1.2145, "step": 416500 }, { "epoch": 0.01, "learning_rate": 4.8455578550274924e-05, "loss": 1.2815, "step": 417000 }, { "epoch": 0.01, "learning_rate": 4.8453726725994674e-05, "loss": 1.4068, "step": 417500 }, { "epoch": 0.01, "learning_rate": 4.845187490171443e-05, "loss": 1.3169, "step": 418000 }, { "epoch": 0.01, "learning_rate": 4.845002307743418e-05, "loss": 1.32, "step": 418500 }, { "epoch": 0.01, "learning_rate": 4.844817125315393e-05, "loss": 1.3116, "step": 419000 }, { "epoch": 0.01, "learning_rate": 4.8446319428873695e-05, "loss": 1.3248, "step": 419500 }, { "epoch": 0.0, "learning_rate": 4.8444467604593446e-05, "loss": 0.9912, "step": 420000 }, { "epoch": 0.0, "learning_rate": 4.84426157803132e-05, "loss": 0.2414, "step": 420500 }, { "epoch": 0.0, "learning_rate": 4.844076395603295e-05, "loss": 1.1361, "step": 421000 }, { "epoch": 0.0, "learning_rate": 4.84389121317527e-05, "loss": 1.0437, "step": 421500 }, { "epoch": 0.0, "learning_rate": 4.843706030747246e-05, "loss": 1.0156, "step": 422000 }, { "epoch": 0.0, "learning_rate": 4.843520848319222e-05, "loss": 0.9771, "step": 422500 }, { "epoch": 0.0, "learning_rate": 4.8433356658911974e-05, "loss": 0.947, "step": 423000 }, { "epoch": 0.0, "learning_rate": 4.8431504834631724e-05, "loss": 0.9558, "step": 423500 }, { "epoch": 0.0, "learning_rate": 4.842965301035148e-05, "loss": 0.9735, "step": 424000 }, { "epoch": 0.0, "learning_rate": 4.842780118607123e-05, "loss": 0.9152, "step": 424500 }, { "epoch": 0.0, "learning_rate": 4.842594936179098e-05, "loss": 0.9708, "step": 425000 }, { "epoch": 0.0, "learning_rate": 4.842409753751074e-05, "loss": 0.9948, "step": 425500 }, { "epoch": 0.0, "learning_rate": 4.8422245713230495e-05, "loss": 0.9122, "step": 426000 }, { "epoch": 0.0, "learning_rate": 4.842039388895025e-05, "loss": 0.9536, "step": 426500 }, { "epoch": 0.0, "learning_rate": 4.841854206467e-05, "loss": 0.8994, "step": 427000 }, { "epoch": 0.0, "learning_rate": 4.841669024038976e-05, "loss": 0.9621, "step": 427500 }, { "epoch": 0.0, "learning_rate": 4.841483841610951e-05, "loss": 0.9441, "step": 428000 }, { "epoch": 0.0, "learning_rate": 4.841298659182926e-05, "loss": 0.885, "step": 428500 }, { "epoch": 0.0, "learning_rate": 4.841113476754902e-05, "loss": 0.9768, "step": 429000 }, { "epoch": 0.0, "learning_rate": 4.840928294326877e-05, "loss": 0.8844, "step": 429500 }, { "epoch": 0.0, "learning_rate": 4.840743111898853e-05, "loss": 0.9265, "step": 430000 }, { "epoch": 0.0, "learning_rate": 4.840557929470828e-05, "loss": 0.8965, "step": 430500 }, { "epoch": 0.0, "learning_rate": 4.840372747042804e-05, "loss": 0.9333, "step": 431000 }, { "epoch": 0.0, "learning_rate": 4.840187564614779e-05, "loss": 0.9553, "step": 431500 }, { "epoch": 0.0, "learning_rate": 4.8400023821867545e-05, "loss": 0.9454, "step": 432000 }, { "epoch": 0.0, "learning_rate": 4.83981719975873e-05, "loss": 0.9002, "step": 432500 }, { "epoch": 0.0, "learning_rate": 4.839632017330705e-05, "loss": 0.869, "step": 433000 }, { "epoch": 0.0, "learning_rate": 4.839446834902681e-05, "loss": 0.9202, "step": 433500 }, { "epoch": 0.0, "learning_rate": 4.839261652474656e-05, "loss": 0.881, "step": 434000 }, { "epoch": 0.0, "learning_rate": 4.839076470046631e-05, "loss": 0.906, "step": 434500 }, { "epoch": 0.0, "learning_rate": 4.8388912876186066e-05, "loss": 0.9354, "step": 435000 }, { "epoch": 0.0, "learning_rate": 4.838706105190582e-05, "loss": 0.9224, "step": 435500 }, { "epoch": 0.0, "learning_rate": 4.838520922762558e-05, "loss": 0.9339, "step": 436000 }, { "epoch": 0.0, "learning_rate": 4.838335740334533e-05, "loss": 0.8621, "step": 436500 }, { "epoch": 0.0, "learning_rate": 4.838150557906509e-05, "loss": 0.9027, "step": 437000 }, { "epoch": 0.0, "learning_rate": 4.837965375478484e-05, "loss": 0.8946, "step": 437500 }, { "epoch": 0.0, "learning_rate": 4.837780193050459e-05, "loss": 0.8645, "step": 438000 }, { "epoch": 0.0, "learning_rate": 4.837595010622435e-05, "loss": 0.9124, "step": 438500 }, { "epoch": 0.0, "learning_rate": 4.83740982819441e-05, "loss": 0.9027, "step": 439000 }, { "epoch": 0.0, "learning_rate": 4.837224645766386e-05, "loss": 0.9064, "step": 439500 }, { "epoch": 0.0, "learning_rate": 4.837039463338361e-05, "loss": 0.9091, "step": 440000 }, { "epoch": 0.0, "learning_rate": 4.8368542809103365e-05, "loss": 0.9159, "step": 440500 }, { "epoch": 0.0, "learning_rate": 4.8366690984823115e-05, "loss": 0.907, "step": 441000 }, { "epoch": 0.0, "learning_rate": 4.8364839160542866e-05, "loss": 0.8951, "step": 441500 }, { "epoch": 0.0, "learning_rate": 4.836298733626263e-05, "loss": 0.9397, "step": 442000 }, { "epoch": 0.0, "learning_rate": 4.836113551198238e-05, "loss": 0.8823, "step": 442500 }, { "epoch": 0.01, "learning_rate": 4.8359283687702136e-05, "loss": 0.8911, "step": 443000 }, { "epoch": 0.01, "learning_rate": 4.8357431863421887e-05, "loss": 0.8936, "step": 443500 }, { "epoch": 0.01, "learning_rate": 4.8355580039141643e-05, "loss": 0.9403, "step": 444000 }, { "epoch": 0.01, "learning_rate": 4.8353728214861394e-05, "loss": 0.8754, "step": 444500 }, { "epoch": 0.01, "learning_rate": 4.835187639058115e-05, "loss": 0.9071, "step": 445000 }, { "epoch": 0.01, "learning_rate": 4.835002456630091e-05, "loss": 0.9121, "step": 445500 }, { "epoch": 0.01, "learning_rate": 4.834817274202066e-05, "loss": 0.8832, "step": 446000 }, { "epoch": 0.01, "learning_rate": 4.8346320917740415e-05, "loss": 0.8992, "step": 446500 }, { "epoch": 0.01, "learning_rate": 4.8344469093460165e-05, "loss": 0.8978, "step": 447000 }, { "epoch": 0.01, "learning_rate": 4.834261726917992e-05, "loss": 0.8992, "step": 447500 }, { "epoch": 0.01, "learning_rate": 4.834076544489968e-05, "loss": 0.8995, "step": 448000 }, { "epoch": 0.01, "learning_rate": 4.833891362061943e-05, "loss": 0.8194, "step": 448500 }, { "epoch": 0.01, "learning_rate": 4.8337061796339186e-05, "loss": 0.8614, "step": 449000 }, { "epoch": 0.01, "learning_rate": 4.8335209972058936e-05, "loss": 0.9629, "step": 449500 }, { "epoch": 0.01, "learning_rate": 4.833335814777869e-05, "loss": 0.8858, "step": 450000 }, { "epoch": 0.01, "learning_rate": 4.833150632349844e-05, "loss": 0.8869, "step": 450500 }, { "epoch": 0.01, "learning_rate": 4.832965449921819e-05, "loss": 0.9027, "step": 451000 }, { "epoch": 0.01, "learning_rate": 4.832780267493796e-05, "loss": 0.9113, "step": 451500 }, { "epoch": 0.01, "learning_rate": 4.832595085065771e-05, "loss": 2.3254, "step": 452000 }, { "epoch": 0.01, "learning_rate": 4.8324099026377464e-05, "loss": 2.1624, "step": 452500 }, { "epoch": 0.01, "learning_rate": 4.8322247202097214e-05, "loss": 2.1429, "step": 453000 }, { "epoch": 0.01, "learning_rate": 4.832039537781697e-05, "loss": 2.1301, "step": 453500 }, { "epoch": 0.01, "learning_rate": 4.831854355353672e-05, "loss": 2.2401, "step": 454000 }, { "epoch": 0.01, "learning_rate": 4.831669172925648e-05, "loss": 2.1386, "step": 454500 }, { "epoch": 0.01, "learning_rate": 4.8314839904976235e-05, "loss": 2.178, "step": 455000 }, { "epoch": 0.01, "learning_rate": 4.8312988080695985e-05, "loss": 2.1548, "step": 455500 }, { "epoch": 0.01, "learning_rate": 4.831113625641574e-05, "loss": 2.1627, "step": 456000 }, { "epoch": 0.01, "learning_rate": 4.830928443213549e-05, "loss": 2.2119, "step": 456500 }, { "epoch": 0.01, "learning_rate": 4.830743260785525e-05, "loss": 2.212, "step": 457000 }, { "epoch": 0.01, "learning_rate": 4.8305580783575007e-05, "loss": 2.1224, "step": 457500 }, { "epoch": 0.01, "learning_rate": 4.830372895929476e-05, "loss": 2.2152, "step": 458000 }, { "epoch": 0.01, "learning_rate": 4.8301877135014514e-05, "loss": 2.1278, "step": 458500 }, { "epoch": 0.01, "learning_rate": 4.8300025310734264e-05, "loss": 2.1791, "step": 459000 }, { "epoch": 0.01, "learning_rate": 4.829817348645402e-05, "loss": 2.137, "step": 459500 }, { "epoch": 0.01, "learning_rate": 4.829632166217377e-05, "loss": 2.177, "step": 460000 }, { "epoch": 0.01, "learning_rate": 4.829446983789353e-05, "loss": 2.1272, "step": 460500 }, { "epoch": 0.01, "learning_rate": 4.8292618013613285e-05, "loss": 2.1177, "step": 461000 }, { "epoch": 0.01, "learning_rate": 4.8290766189333035e-05, "loss": 2.106, "step": 461500 }, { "epoch": 0.01, "learning_rate": 4.828891436505279e-05, "loss": 2.2041, "step": 462000 }, { "epoch": 0.01, "learning_rate": 4.828706254077254e-05, "loss": 2.1161, "step": 462500 }, { "epoch": 0.01, "learning_rate": 4.82852107164923e-05, "loss": 2.1244, "step": 463000 }, { "epoch": 0.01, "learning_rate": 4.828335889221205e-05, "loss": 2.0704, "step": 463500 }, { "epoch": 0.01, "learning_rate": 4.8281507067931806e-05, "loss": 2.1964, "step": 464000 }, { "epoch": 0.01, "learning_rate": 4.827965524365156e-05, "loss": 2.1661, "step": 464500 }, { "epoch": 0.01, "learning_rate": 4.827780341937131e-05, "loss": 2.1416, "step": 465000 }, { "epoch": 0.01, "learning_rate": 4.827595159509107e-05, "loss": 2.1467, "step": 465500 }, { "epoch": 0.01, "learning_rate": 4.827409977081082e-05, "loss": 2.1394, "step": 466000 }, { "epoch": 0.01, "learning_rate": 4.827224794653058e-05, "loss": 2.079, "step": 466500 }, { "epoch": 0.01, "learning_rate": 4.827039612225033e-05, "loss": 2.0361, "step": 467000 }, { "epoch": 0.01, "learning_rate": 4.8268544297970084e-05, "loss": 2.15, "step": 467500 }, { "epoch": 0.01, "learning_rate": 4.826669247368984e-05, "loss": 2.1647, "step": 468000 }, { "epoch": 0.01, "learning_rate": 4.826484064940959e-05, "loss": 2.1629, "step": 468500 }, { "epoch": 0.01, "learning_rate": 4.826298882512935e-05, "loss": 2.1334, "step": 469000 }, { "epoch": 0.01, "learning_rate": 4.82611370008491e-05, "loss": 2.1717, "step": 469500 }, { "epoch": 0.01, "learning_rate": 4.8259285176568856e-05, "loss": 2.1106, "step": 470000 }, { "epoch": 0.01, "learning_rate": 4.825743335228861e-05, "loss": 2.0979, "step": 470500 }, { "epoch": 0.01, "learning_rate": 4.825558152800836e-05, "loss": 2.0478, "step": 471000 }, { "epoch": 0.01, "learning_rate": 4.825372970372812e-05, "loss": 2.1778, "step": 471500 }, { "epoch": 0.01, "learning_rate": 4.825187787944787e-05, "loss": 2.1486, "step": 472000 }, { "epoch": 0.01, "learning_rate": 4.825002605516763e-05, "loss": 2.0521, "step": 472500 }, { "epoch": 0.01, "learning_rate": 4.824817423088738e-05, "loss": 2.0744, "step": 473000 }, { "epoch": 0.01, "learning_rate": 4.8246322406607134e-05, "loss": 2.027, "step": 473500 }, { "epoch": 0.01, "learning_rate": 4.824447058232689e-05, "loss": 2.1347, "step": 474000 }, { "epoch": 0.01, "learning_rate": 4.824261875804664e-05, "loss": 2.0729, "step": 474500 }, { "epoch": 0.01, "learning_rate": 4.82407669337664e-05, "loss": 2.0448, "step": 475000 }, { "epoch": 0.01, "learning_rate": 4.823891510948615e-05, "loss": 2.1762, "step": 475500 }, { "epoch": 0.01, "learning_rate": 4.8237063285205905e-05, "loss": 2.0928, "step": 476000 }, { "epoch": 0.01, "learning_rate": 4.8235211460925655e-05, "loss": 2.1397, "step": 476500 }, { "epoch": 0.01, "learning_rate": 4.823335963664541e-05, "loss": 2.1395, "step": 477000 }, { "epoch": 0.01, "learning_rate": 4.823150781236517e-05, "loss": 2.0817, "step": 477500 }, { "epoch": 0.01, "learning_rate": 4.822965598808492e-05, "loss": 2.1291, "step": 478000 }, { "epoch": 0.01, "learning_rate": 4.8227804163804676e-05, "loss": 2.1392, "step": 478500 }, { "epoch": 0.01, "learning_rate": 4.8225952339524426e-05, "loss": 2.1123, "step": 479000 }, { "epoch": 0.01, "learning_rate": 4.8224100515244183e-05, "loss": 2.1467, "step": 479500 }, { "epoch": 0.01, "learning_rate": 4.822224869096394e-05, "loss": 2.0869, "step": 480000 }, { "epoch": 0.01, "learning_rate": 4.822039686668369e-05, "loss": 2.1907, "step": 480500 }, { "epoch": 0.01, "learning_rate": 4.821854504240345e-05, "loss": 2.1319, "step": 481000 }, { "epoch": 0.01, "learning_rate": 4.82166932181232e-05, "loss": 2.0684, "step": 481500 }, { "epoch": 0.01, "learning_rate": 4.8214841393842955e-05, "loss": 2.0638, "step": 482000 }, { "epoch": 0.01, "learning_rate": 4.8212989569562705e-05, "loss": 2.17, "step": 482500 }, { "epoch": 0.01, "learning_rate": 4.821113774528246e-05, "loss": 2.0686, "step": 483000 }, { "epoch": 0.01, "learning_rate": 4.820928592100222e-05, "loss": 2.1202, "step": 483500 }, { "epoch": 0.01, "learning_rate": 4.820743409672197e-05, "loss": 2.1897, "step": 484000 }, { "epoch": 0.01, "learning_rate": 4.8205582272441726e-05, "loss": 2.0982, "step": 484500 }, { "epoch": 0.01, "learning_rate": 4.8203730448161476e-05, "loss": 2.1002, "step": 485000 }, { "epoch": 0.01, "learning_rate": 4.820187862388123e-05, "loss": 2.0226, "step": 485500 }, { "epoch": 0.01, "learning_rate": 4.820002679960098e-05, "loss": 2.1291, "step": 486000 }, { "epoch": 0.01, "learning_rate": 4.819817497532074e-05, "loss": 2.1365, "step": 486500 }, { "epoch": 0.01, "learning_rate": 4.81963231510405e-05, "loss": 2.0817, "step": 487000 }, { "epoch": 0.01, "learning_rate": 4.819447132676025e-05, "loss": 2.0787, "step": 487500 }, { "epoch": 0.02, "learning_rate": 4.8192619502480004e-05, "loss": 2.0578, "step": 488000 }, { "epoch": 0.02, "learning_rate": 4.8190767678199754e-05, "loss": 2.0518, "step": 488500 }, { "epoch": 0.02, "learning_rate": 4.818891585391951e-05, "loss": 2.0799, "step": 489000 }, { "epoch": 0.02, "learning_rate": 4.818706402963927e-05, "loss": 2.1135, "step": 489500 }, { "epoch": 0.02, "learning_rate": 4.818521220535902e-05, "loss": 2.1702, "step": 490000 }, { "epoch": 0.02, "learning_rate": 4.8183360381078775e-05, "loss": 2.0636, "step": 490500 }, { "epoch": 0.02, "learning_rate": 4.8181508556798525e-05, "loss": 2.0613, "step": 491000 }, { "epoch": 0.02, "learning_rate": 4.817965673251828e-05, "loss": 2.0866, "step": 491500 }, { "epoch": 0.02, "learning_rate": 4.817780490823803e-05, "loss": 2.0757, "step": 492000 }, { "epoch": 0.02, "learning_rate": 4.817595308395779e-05, "loss": 2.071, "step": 492500 }, { "epoch": 0.02, "learning_rate": 4.8174101259677546e-05, "loss": 2.0989, "step": 493000 }, { "epoch": 0.02, "learning_rate": 4.8172249435397297e-05, "loss": 2.0395, "step": 493500 }, { "epoch": 0.02, "learning_rate": 4.8170397611117054e-05, "loss": 2.121, "step": 494000 }, { "epoch": 0.02, "learning_rate": 4.8168545786836804e-05, "loss": 2.1112, "step": 494500 }, { "epoch": 0.02, "learning_rate": 4.816669396255656e-05, "loss": 2.0351, "step": 495000 }, { "epoch": 0.02, "learning_rate": 4.816484213827631e-05, "loss": 2.139, "step": 495500 }, { "epoch": 0.02, "learning_rate": 4.816299031399607e-05, "loss": 2.0029, "step": 496000 }, { "epoch": 0.02, "learning_rate": 4.8161138489715825e-05, "loss": 2.05, "step": 496500 }, { "epoch": 0.02, "learning_rate": 4.8159286665435575e-05, "loss": 2.0657, "step": 497000 }, { "epoch": 0.02, "learning_rate": 4.815743484115533e-05, "loss": 2.106, "step": 497500 }, { "epoch": 0.02, "learning_rate": 4.815558301687508e-05, "loss": 2.0543, "step": 498000 }, { "epoch": 0.02, "learning_rate": 4.815373119259484e-05, "loss": 2.0514, "step": 498500 }, { "epoch": 0.02, "learning_rate": 4.8151879368314596e-05, "loss": 2.0788, "step": 499000 }, { "epoch": 0.02, "learning_rate": 4.8150027544034346e-05, "loss": 2.0402, "step": 499500 }, { "epoch": 0.02, "learning_rate": 4.81481757197541e-05, "loss": 2.1085, "step": 500000 }, { "epoch": 0.02, "learning_rate": 4.814632389547385e-05, "loss": 2.0492, "step": 500500 }, { "epoch": 0.02, "learning_rate": 4.814447207119361e-05, "loss": 2.0417, "step": 501000 }, { "epoch": 0.02, "learning_rate": 4.814262024691336e-05, "loss": 2.1058, "step": 501500 }, { "epoch": 0.02, "learning_rate": 4.814076842263312e-05, "loss": 2.0954, "step": 502000 }, { "epoch": 0.02, "learning_rate": 4.8138916598352874e-05, "loss": 2.0358, "step": 502500 }, { "epoch": 0.02, "learning_rate": 4.8137064774072624e-05, "loss": 2.1121, "step": 503000 }, { "epoch": 0.02, "learning_rate": 4.813521294979238e-05, "loss": 2.064, "step": 503500 }, { "epoch": 0.02, "learning_rate": 4.813336112551213e-05, "loss": 2.0141, "step": 504000 }, { "epoch": 0.02, "learning_rate": 4.813150930123189e-05, "loss": 2.0963, "step": 504500 }, { "epoch": 0.02, "learning_rate": 4.812965747695164e-05, "loss": 2.092, "step": 505000 }, { "epoch": 0.02, "learning_rate": 4.8127805652671396e-05, "loss": 2.0702, "step": 505500 }, { "epoch": 0.02, "learning_rate": 4.812595382839115e-05, "loss": 2.132, "step": 506000 }, { "epoch": 0.02, "learning_rate": 4.81241020041109e-05, "loss": 2.0766, "step": 506500 }, { "epoch": 0.02, "learning_rate": 4.812225017983066e-05, "loss": 2.1155, "step": 507000 }, { "epoch": 0.02, "learning_rate": 4.812039835555041e-05, "loss": 2.0564, "step": 507500 }, { "epoch": 0.02, "learning_rate": 4.811854653127017e-05, "loss": 2.0596, "step": 508000 }, { "epoch": 0.02, "learning_rate": 4.8116694706989924e-05, "loss": 2.1147, "step": 508500 }, { "epoch": 0.02, "learning_rate": 4.8114842882709674e-05, "loss": 2.1054, "step": 509000 }, { "epoch": 0.02, "learning_rate": 4.811299105842943e-05, "loss": 2.0307, "step": 509500 }, { "epoch": 0.02, "learning_rate": 4.811113923414918e-05, "loss": 2.0596, "step": 510000 }, { "epoch": 0.02, "learning_rate": 4.810928740986894e-05, "loss": 2.0605, "step": 510500 }, { "epoch": 0.02, "learning_rate": 4.810743558558869e-05, "loss": 2.056, "step": 511000 }, { "epoch": 0.02, "learning_rate": 4.8105583761308445e-05, "loss": 2.0342, "step": 511500 }, { "epoch": 0.02, "learning_rate": 4.81037319370282e-05, "loss": 2.1145, "step": 512000 }, { "epoch": 0.02, "learning_rate": 4.810188011274795e-05, "loss": 2.0648, "step": 512500 }, { "epoch": 0.02, "learning_rate": 4.810002828846771e-05, "loss": 2.1499, "step": 513000 }, { "epoch": 0.02, "learning_rate": 4.809817646418746e-05, "loss": 2.1408, "step": 513500 }, { "epoch": 0.02, "learning_rate": 4.8096324639907216e-05, "loss": 2.0036, "step": 514000 }, { "epoch": 0.02, "learning_rate": 4.8094472815626966e-05, "loss": 2.0102, "step": 514500 }, { "epoch": 0.02, "learning_rate": 4.809262099134672e-05, "loss": 2.0586, "step": 515000 }, { "epoch": 0.02, "learning_rate": 4.809076916706648e-05, "loss": 2.094, "step": 515500 }, { "epoch": 0.02, "learning_rate": 4.808891734278623e-05, "loss": 2.0283, "step": 516000 }, { "epoch": 0.02, "learning_rate": 4.808706551850599e-05, "loss": 2.0327, "step": 516500 }, { "epoch": 0.02, "learning_rate": 4.808521369422574e-05, "loss": 2.1024, "step": 517000 }, { "epoch": 0.02, "learning_rate": 4.8083361869945495e-05, "loss": 2.0714, "step": 517500 }, { "epoch": 0.02, "learning_rate": 4.8081510045665245e-05, "loss": 2.0896, "step": 518000 }, { "epoch": 0.02, "learning_rate": 4.8079658221385e-05, "loss": 2.0503, "step": 518500 }, { "epoch": 0.02, "learning_rate": 4.807780639710476e-05, "loss": 2.0088, "step": 519000 }, { "epoch": 0.02, "learning_rate": 4.807595457282451e-05, "loss": 1.9966, "step": 519500 }, { "epoch": 0.02, "learning_rate": 4.8074102748544266e-05, "loss": 2.0258, "step": 520000 }, { "epoch": 0.02, "learning_rate": 4.8072250924264016e-05, "loss": 2.1098, "step": 520500 }, { "epoch": 0.02, "learning_rate": 4.807039909998377e-05, "loss": 2.0957, "step": 521000 }, { "epoch": 0.02, "learning_rate": 4.806854727570353e-05, "loss": 2.2702, "step": 521500 }, { "epoch": 0.02, "learning_rate": 4.806669545142328e-05, "loss": 2.0737, "step": 522000 }, { "epoch": 0.02, "learning_rate": 4.806484362714304e-05, "loss": 2.0648, "step": 522500 }, { "epoch": 0.02, "learning_rate": 4.806299180286279e-05, "loss": 2.0431, "step": 523000 }, { "epoch": 0.02, "learning_rate": 4.8061139978582544e-05, "loss": 2.0472, "step": 523500 }, { "epoch": 0.02, "learning_rate": 4.8059288154302294e-05, "loss": 2.0653, "step": 524000 }, { "epoch": 0.02, "learning_rate": 4.805743633002205e-05, "loss": 2.0622, "step": 524500 }, { "epoch": 0.02, "learning_rate": 4.805558450574181e-05, "loss": 2.0607, "step": 525000 }, { "epoch": 0.02, "learning_rate": 4.805373268146156e-05, "loss": 2.0656, "step": 525500 }, { "epoch": 0.02, "learning_rate": 4.8051880857181315e-05, "loss": 2.0377, "step": 526000 }, { "epoch": 0.02, "learning_rate": 4.8050029032901065e-05, "loss": 2.0661, "step": 526500 }, { "epoch": 0.02, "learning_rate": 4.804817720862082e-05, "loss": 2.086, "step": 527000 }, { "epoch": 0.02, "learning_rate": 4.804632538434057e-05, "loss": 2.029, "step": 527500 }, { "epoch": 0.02, "learning_rate": 4.804447356006033e-05, "loss": 2.0559, "step": 528000 }, { "epoch": 0.02, "learning_rate": 4.8042621735780086e-05, "loss": 2.0894, "step": 528500 }, { "epoch": 0.02, "learning_rate": 4.8040769911499837e-05, "loss": 2.0668, "step": 529000 }, { "epoch": 0.02, "learning_rate": 4.8038918087219593e-05, "loss": 2.0502, "step": 529500 }, { "epoch": 0.02, "learning_rate": 4.8037066262939344e-05, "loss": 2.0705, "step": 530000 }, { "epoch": 0.02, "learning_rate": 4.80352144386591e-05, "loss": 2.0961, "step": 530500 }, { "epoch": 0.02, "learning_rate": 4.803336261437886e-05, "loss": 2.0796, "step": 531000 }, { "epoch": 0.02, "learning_rate": 4.803151079009861e-05, "loss": 2.0869, "step": 531500 }, { "epoch": 0.02, "learning_rate": 4.8029658965818365e-05, "loss": 2.0623, "step": 532000 }, { "epoch": 0.02, "learning_rate": 4.8027807141538115e-05, "loss": 2.0733, "step": 532500 }, { "epoch": 0.03, "learning_rate": 4.802595531725787e-05, "loss": 2.1174, "step": 533000 }, { "epoch": 0.03, "learning_rate": 4.802410349297762e-05, "loss": 2.0586, "step": 533500 }, { "epoch": 0.03, "learning_rate": 4.802225166869738e-05, "loss": 2.1289, "step": 534000 }, { "epoch": 0.03, "learning_rate": 4.8020399844417136e-05, "loss": 2.1147, "step": 534500 }, { "epoch": 0.03, "learning_rate": 4.8018548020136886e-05, "loss": 2.0457, "step": 535000 }, { "epoch": 0.03, "learning_rate": 4.801669619585664e-05, "loss": 2.0674, "step": 535500 }, { "epoch": 0.03, "learning_rate": 4.801484437157639e-05, "loss": 2.011, "step": 536000 }, { "epoch": 0.03, "learning_rate": 4.801299254729615e-05, "loss": 2.0423, "step": 536500 }, { "epoch": 0.03, "learning_rate": 4.80111407230159e-05, "loss": 2.0702, "step": 537000 }, { "epoch": 0.03, "learning_rate": 4.800928889873566e-05, "loss": 2.0714, "step": 537500 }, { "epoch": 0.03, "learning_rate": 4.8007437074455414e-05, "loss": 2.0333, "step": 538000 }, { "epoch": 0.03, "learning_rate": 4.8005585250175164e-05, "loss": 2.0615, "step": 538500 }, { "epoch": 0.03, "learning_rate": 4.800373342589492e-05, "loss": 2.0282, "step": 539000 }, { "epoch": 0.03, "learning_rate": 4.800188160161467e-05, "loss": 2.0782, "step": 539500 }, { "epoch": 0.03, "learning_rate": 4.800002977733443e-05, "loss": 2.0383, "step": 540000 }, { "epoch": 0.03, "learning_rate": 4.7998177953054185e-05, "loss": 2.0591, "step": 540500 }, { "epoch": 0.03, "learning_rate": 4.7996326128773935e-05, "loss": 2.0856, "step": 541000 }, { "epoch": 0.03, "learning_rate": 4.799447430449369e-05, "loss": 2.0869, "step": 541500 }, { "epoch": 0.03, "learning_rate": 4.799262248021344e-05, "loss": 2.0858, "step": 542000 }, { "epoch": 0.03, "learning_rate": 4.79907706559332e-05, "loss": 2.0886, "step": 542500 }, { "epoch": 0.03, "learning_rate": 4.798891883165295e-05, "loss": 2.0282, "step": 543000 }, { "epoch": 0.03, "learning_rate": 4.798706700737271e-05, "loss": 2.0458, "step": 543500 }, { "epoch": 0.03, "learning_rate": 4.7985215183092464e-05, "loss": 1.9995, "step": 544000 }, { "epoch": 0.03, "learning_rate": 4.7983363358812214e-05, "loss": 2.1449, "step": 544500 }, { "epoch": 0.03, "learning_rate": 4.798151153453197e-05, "loss": 2.0537, "step": 545000 }, { "epoch": 0.03, "learning_rate": 4.797965971025172e-05, "loss": 2.089, "step": 545500 }, { "epoch": 0.03, "learning_rate": 4.797780788597148e-05, "loss": 2.0663, "step": 546000 }, { "epoch": 0.03, "learning_rate": 4.797595606169123e-05, "loss": 2.0505, "step": 546500 }, { "epoch": 0.03, "learning_rate": 4.797410423741099e-05, "loss": 2.0557, "step": 547000 }, { "epoch": 0.03, "learning_rate": 4.797225241313074e-05, "loss": 2.0147, "step": 547500 }, { "epoch": 0.03, "learning_rate": 4.797040058885049e-05, "loss": 2.0047, "step": 548000 }, { "epoch": 0.03, "learning_rate": 4.796854876457025e-05, "loss": 2.0574, "step": 548500 }, { "epoch": 0.03, "learning_rate": 4.796669694029e-05, "loss": 2.0718, "step": 549000 }, { "epoch": 0.03, "learning_rate": 4.7964845116009756e-05, "loss": 2.0654, "step": 549500 }, { "epoch": 0.03, "learning_rate": 4.796299329172951e-05, "loss": 2.1151, "step": 550000 }, { "epoch": 0.03, "learning_rate": 4.796114146744926e-05, "loss": 2.0348, "step": 550500 }, { "epoch": 0.03, "learning_rate": 4.795928964316902e-05, "loss": 2.0774, "step": 551000 }, { "epoch": 0.03, "learning_rate": 4.795743781888877e-05, "loss": 2.0468, "step": 551500 }, { "epoch": 0.03, "learning_rate": 4.795558599460853e-05, "loss": 2.0486, "step": 552000 }, { "epoch": 0.03, "learning_rate": 4.795373417032828e-05, "loss": 2.0349, "step": 552500 }, { "epoch": 0.03, "learning_rate": 4.7951882346048034e-05, "loss": 2.0806, "step": 553000 }, { "epoch": 0.03, "learning_rate": 4.795003052176779e-05, "loss": 2.0708, "step": 553500 }, { "epoch": 0.03, "learning_rate": 4.794817869748754e-05, "loss": 1.9739, "step": 554000 }, { "epoch": 0.03, "learning_rate": 4.79463268732073e-05, "loss": 2.0079, "step": 554500 }, { "epoch": 0.03, "learning_rate": 4.794447504892705e-05, "loss": 2.0891, "step": 555000 }, { "epoch": 0.03, "learning_rate": 4.7942623224646806e-05, "loss": 2.0833, "step": 555500 }, { "epoch": 0.03, "learning_rate": 4.7940771400366556e-05, "loss": 2.0084, "step": 556000 }, { "epoch": 0.03, "learning_rate": 4.793891957608632e-05, "loss": 2.0073, "step": 556500 }, { "epoch": 0.03, "learning_rate": 4.793706775180607e-05, "loss": 2.0094, "step": 557000 }, { "epoch": 0.03, "learning_rate": 4.793521592752582e-05, "loss": 2.0845, "step": 557500 }, { "epoch": 0.03, "learning_rate": 4.793336410324558e-05, "loss": 2.019, "step": 558000 }, { "epoch": 0.03, "learning_rate": 4.793151227896533e-05, "loss": 2.0751, "step": 558500 }, { "epoch": 0.03, "learning_rate": 4.7929660454685084e-05, "loss": 2.0965, "step": 559000 }, { "epoch": 0.03, "learning_rate": 4.792780863040484e-05, "loss": 2.0792, "step": 559500 }, { "epoch": 0.03, "learning_rate": 4.79259568061246e-05, "loss": 2.0228, "step": 560000 }, { "epoch": 0.03, "learning_rate": 4.792410498184435e-05, "loss": 2.0399, "step": 560500 }, { "epoch": 0.03, "learning_rate": 4.79222531575641e-05, "loss": 2.003, "step": 561000 }, { "epoch": 0.03, "learning_rate": 4.7920401333283855e-05, "loss": 2.0385, "step": 561500 }, { "epoch": 0.03, "learning_rate": 4.7918549509003605e-05, "loss": 2.0809, "step": 562000 }, { "epoch": 0.03, "learning_rate": 4.791669768472336e-05, "loss": 2.0491, "step": 562500 }, { "epoch": 0.03, "learning_rate": 4.791484586044312e-05, "loss": 2.0152, "step": 563000 }, { "epoch": 0.03, "learning_rate": 4.791299403616287e-05, "loss": 1.9715, "step": 563500 }, { "epoch": 0.03, "learning_rate": 4.7911142211882626e-05, "loss": 2.0302, "step": 564000 }, { "epoch": 0.03, "learning_rate": 4.7909290387602376e-05, "loss": 2.0566, "step": 564500 }, { "epoch": 0.03, "learning_rate": 4.7907438563322133e-05, "loss": 2.0745, "step": 565000 }, { "epoch": 0.03, "learning_rate": 4.7905586739041884e-05, "loss": 2.045, "step": 565500 }, { "epoch": 0.03, "learning_rate": 4.790373491476165e-05, "loss": 2.0369, "step": 566000 }, { "epoch": 0.03, "learning_rate": 4.79018830904814e-05, "loss": 1.9952, "step": 566500 }, { "epoch": 0.03, "learning_rate": 4.790003126620115e-05, "loss": 2.1119, "step": 567000 }, { "epoch": 0.03, "learning_rate": 4.7898179441920905e-05, "loss": 2.046, "step": 567500 }, { "epoch": 0.03, "learning_rate": 4.7896327617640655e-05, "loss": 2.0293, "step": 568000 }, { "epoch": 0.03, "learning_rate": 4.789447579336041e-05, "loss": 2.0215, "step": 568500 }, { "epoch": 0.03, "learning_rate": 4.789262396908016e-05, "loss": 2.0929, "step": 569000 }, { "epoch": 0.03, "learning_rate": 4.7890772144799926e-05, "loss": 2.012, "step": 569500 }, { "epoch": 0.03, "learning_rate": 4.7888920320519676e-05, "loss": 2.0962, "step": 570000 }, { "epoch": 0.03, "learning_rate": 4.7887068496239426e-05, "loss": 1.9749, "step": 570500 }, { "epoch": 0.03, "learning_rate": 4.788521667195918e-05, "loss": 2.0525, "step": 571000 }, { "epoch": 0.03, "learning_rate": 4.788336484767893e-05, "loss": 2.0129, "step": 571500 }, { "epoch": 0.03, "learning_rate": 4.788151302339869e-05, "loss": 2.025, "step": 572000 }, { "epoch": 0.03, "learning_rate": 4.787966119911845e-05, "loss": 2.0897, "step": 572500 }, { "epoch": 0.03, "learning_rate": 4.7877809374838204e-05, "loss": 2.0451, "step": 573000 }, { "epoch": 0.03, "learning_rate": 4.7875957550557954e-05, "loss": 1.9899, "step": 573500 }, { "epoch": 0.03, "learning_rate": 4.7874105726277704e-05, "loss": 2.0091, "step": 574000 }, { "epoch": 0.03, "learning_rate": 4.787225390199746e-05, "loss": 2.0697, "step": 574500 }, { "epoch": 0.03, "learning_rate": 4.787040207771721e-05, "loss": 2.0429, "step": 575000 }, { "epoch": 0.03, "learning_rate": 4.7868550253436975e-05, "loss": 2.0134, "step": 575500 }, { "epoch": 0.03, "learning_rate": 4.7866698429156725e-05, "loss": 2.0047, "step": 576000 }, { "epoch": 0.03, "learning_rate": 4.7864846604876475e-05, "loss": 1.9744, "step": 576500 }, { "epoch": 0.03, "learning_rate": 4.786299478059623e-05, "loss": 2.0071, "step": 577000 }, { "epoch": 0.03, "learning_rate": 4.786114295631598e-05, "loss": 2.0124, "step": 577500 }, { "epoch": 0.04, "learning_rate": 4.785929113203574e-05, "loss": 2.0256, "step": 578000 }, { "epoch": 0.04, "learning_rate": 4.785743930775549e-05, "loss": 1.9891, "step": 578500 }, { "epoch": 0.04, "learning_rate": 4.785558748347525e-05, "loss": 2.004, "step": 579000 }, { "epoch": 0.04, "learning_rate": 4.7853735659195004e-05, "loss": 2.0468, "step": 579500 }, { "epoch": 0.04, "learning_rate": 4.7851883834914754e-05, "loss": 1.9861, "step": 580000 }, { "epoch": 0.04, "learning_rate": 4.785003201063451e-05, "loss": 2.022, "step": 580500 }, { "epoch": 0.04, "learning_rate": 4.784818018635426e-05, "loss": 2.0494, "step": 581000 }, { "epoch": 0.04, "learning_rate": 4.784632836207402e-05, "loss": 2.0752, "step": 581500 }, { "epoch": 0.04, "learning_rate": 4.7844476537793775e-05, "loss": 2.0183, "step": 582000 }, { "epoch": 0.04, "learning_rate": 4.784262471351353e-05, "loss": 2.0207, "step": 582500 }, { "epoch": 0.04, "learning_rate": 4.784077288923328e-05, "loss": 2.0097, "step": 583000 }, { "epoch": 0.04, "learning_rate": 4.783892106495303e-05, "loss": 2.0166, "step": 583500 }, { "epoch": 0.04, "learning_rate": 4.783706924067279e-05, "loss": 1.9941, "step": 584000 }, { "epoch": 0.04, "learning_rate": 4.783521741639254e-05, "loss": 2.0063, "step": 584500 }, { "epoch": 0.04, "learning_rate": 4.7833365592112296e-05, "loss": 2.0428, "step": 585000 }, { "epoch": 0.04, "learning_rate": 4.783151376783205e-05, "loss": 2.0325, "step": 585500 }, { "epoch": 0.04, "learning_rate": 4.782966194355181e-05, "loss": 2.0902, "step": 586000 }, { "epoch": 0.04, "learning_rate": 4.782781011927156e-05, "loss": 2.0336, "step": 586500 }, { "epoch": 0.04, "learning_rate": 4.782595829499131e-05, "loss": 2.0924, "step": 587000 }, { "epoch": 0.04, "learning_rate": 4.782410647071107e-05, "loss": 2.1075, "step": 587500 }, { "epoch": 0.04, "learning_rate": 4.782225464643082e-05, "loss": 2.033, "step": 588000 }, { "epoch": 0.04, "learning_rate": 4.782040282215058e-05, "loss": 2.0679, "step": 588500 }, { "epoch": 0.04, "learning_rate": 4.781855099787033e-05, "loss": 2.0452, "step": 589000 }, { "epoch": 0.04, "learning_rate": 4.781669917359009e-05, "loss": 2.017, "step": 589500 }, { "epoch": 0.04, "learning_rate": 4.781484734930984e-05, "loss": 2.0043, "step": 590000 }, { "epoch": 0.04, "learning_rate": 4.781299552502959e-05, "loss": 2.0344, "step": 590500 }, { "epoch": 0.04, "learning_rate": 4.7811143700749346e-05, "loss": 2.0212, "step": 591000 }, { "epoch": 0.04, "learning_rate": 4.78092918764691e-05, "loss": 1.9791, "step": 591500 }, { "epoch": 0.04, "learning_rate": 4.780744005218886e-05, "loss": 2.0494, "step": 592000 }, { "epoch": 0.04, "learning_rate": 4.780558822790861e-05, "loss": 2.0608, "step": 592500 }, { "epoch": 0.04, "learning_rate": 4.780373640362836e-05, "loss": 1.9911, "step": 593000 }, { "epoch": 0.04, "learning_rate": 4.780188457934812e-05, "loss": 1.9922, "step": 593500 }, { "epoch": 0.04, "learning_rate": 4.780003275506787e-05, "loss": 2.0017, "step": 594000 }, { "epoch": 0.04, "learning_rate": 4.7798180930787624e-05, "loss": 2.1381, "step": 594500 }, { "epoch": 0.04, "learning_rate": 4.779632910650738e-05, "loss": 2.0281, "step": 595000 }, { "epoch": 0.04, "learning_rate": 4.779447728222714e-05, "loss": 1.9434, "step": 595500 }, { "epoch": 0.04, "learning_rate": 4.779262545794689e-05, "loss": 1.9834, "step": 596000 }, { "epoch": 0.04, "learning_rate": 4.779077363366664e-05, "loss": 1.9982, "step": 596500 }, { "epoch": 0.04, "learning_rate": 4.7788921809386395e-05, "loss": 2.035, "step": 597000 }, { "epoch": 0.04, "learning_rate": 4.7787069985106145e-05, "loss": 2.0292, "step": 597500 }, { "epoch": 0.04, "learning_rate": 4.778521816082591e-05, "loss": 2.019, "step": 598000 }, { "epoch": 0.04, "learning_rate": 4.778336633654566e-05, "loss": 2.0064, "step": 598500 }, { "epoch": 0.04, "learning_rate": 4.7781514512265416e-05, "loss": 2.041, "step": 599000 }, { "epoch": 0.04, "learning_rate": 4.7779662687985166e-05, "loss": 2.0466, "step": 599500 }, { "epoch": 0.04, "learning_rate": 4.7777810863704916e-05, "loss": 1.9773, "step": 600000 }, { "epoch": 0.04, "learning_rate": 4.777595903942467e-05, "loss": 2.0342, "step": 600500 }, { "epoch": 0.04, "learning_rate": 4.777410721514443e-05, "loss": 2.0407, "step": 601000 }, { "epoch": 0.04, "learning_rate": 4.777225539086419e-05, "loss": 2.0732, "step": 601500 }, { "epoch": 0.04, "learning_rate": 4.777040356658394e-05, "loss": 2.0047, "step": 602000 }, { "epoch": 0.04, "learning_rate": 4.7768551742303694e-05, "loss": 2.0766, "step": 602500 }, { "epoch": 0.04, "learning_rate": 4.7766699918023445e-05, "loss": 2.0555, "step": 603000 }, { "epoch": 0.04, "learning_rate": 4.7764848093743195e-05, "loss": 2.1317, "step": 603500 }, { "epoch": 0.04, "learning_rate": 4.776299626946295e-05, "loss": 2.0506, "step": 604000 }, { "epoch": 0.04, "learning_rate": 4.776114444518271e-05, "loss": 2.0278, "step": 604500 }, { "epoch": 0.04, "learning_rate": 4.7759292620902466e-05, "loss": 2.0139, "step": 605000 }, { "epoch": 0.04, "learning_rate": 4.7757440796622216e-05, "loss": 2.0501, "step": 605500 }, { "epoch": 0.04, "learning_rate": 4.7755588972341966e-05, "loss": 2.0084, "step": 606000 }, { "epoch": 0.04, "learning_rate": 4.775373714806172e-05, "loss": 2.0657, "step": 606500 }, { "epoch": 0.04, "learning_rate": 4.775188532378147e-05, "loss": 2.0552, "step": 607000 }, { "epoch": 0.04, "learning_rate": 4.775003349950124e-05, "loss": 2.0163, "step": 607500 }, { "epoch": 0.04, "learning_rate": 4.774818167522099e-05, "loss": 2.0418, "step": 608000 }, { "epoch": 0.04, "learning_rate": 4.7746329850940744e-05, "loss": 2.0153, "step": 608500 }, { "epoch": 0.04, "learning_rate": 4.7744478026660494e-05, "loss": 2.0354, "step": 609000 }, { "epoch": 0.04, "learning_rate": 4.7742626202380244e-05, "loss": 2.0825, "step": 609500 }, { "epoch": 0.04, "learning_rate": 4.77407743781e-05, "loss": 2.0415, "step": 610000 }, { "epoch": 0.04, "learning_rate": 4.773892255381975e-05, "loss": 2.0278, "step": 610500 }, { "epoch": 0.04, "learning_rate": 4.7737070729539515e-05, "loss": 2.0174, "step": 611000 }, { "epoch": 0.04, "learning_rate": 4.7735218905259265e-05, "loss": 1.9517, "step": 611500 }, { "epoch": 0.04, "learning_rate": 4.773336708097902e-05, "loss": 1.9906, "step": 612000 }, { "epoch": 0.04, "learning_rate": 4.773151525669877e-05, "loss": 2.0513, "step": 612500 }, { "epoch": 0.04, "learning_rate": 4.772966343241852e-05, "loss": 2.0269, "step": 613000 }, { "epoch": 0.04, "learning_rate": 4.772781160813828e-05, "loss": 2.0175, "step": 613500 }, { "epoch": 0.04, "learning_rate": 4.7725959783858036e-05, "loss": 2.0251, "step": 614000 }, { "epoch": 0.04, "learning_rate": 4.772410795957779e-05, "loss": 2.0621, "step": 614500 }, { "epoch": 0.04, "learning_rate": 4.7722256135297543e-05, "loss": 1.9919, "step": 615000 }, { "epoch": 0.04, "learning_rate": 4.77204043110173e-05, "loss": 2.043, "step": 615500 }, { "epoch": 0.04, "learning_rate": 4.771855248673705e-05, "loss": 2.081, "step": 616000 }, { "epoch": 0.04, "learning_rate": 4.77167006624568e-05, "loss": 1.9483, "step": 616500 }, { "epoch": 0.04, "learning_rate": 4.7714848838176564e-05, "loss": 2.0136, "step": 617000 }, { "epoch": 0.04, "learning_rate": 4.7712997013896315e-05, "loss": 2.0351, "step": 617500 }, { "epoch": 0.04, "learning_rate": 4.771114518961607e-05, "loss": 2.0785, "step": 618000 }, { "epoch": 0.04, "learning_rate": 4.770929336533582e-05, "loss": 2.0317, "step": 618500 }, { "epoch": 0.04, "learning_rate": 4.770744154105557e-05, "loss": 2.0219, "step": 619000 }, { "epoch": 0.04, "learning_rate": 4.770558971677533e-05, "loss": 1.9949, "step": 619500 }, { "epoch": 0.04, "learning_rate": 4.770373789249508e-05, "loss": 2.0794, "step": 620000 }, { "epoch": 0.04, "learning_rate": 4.770188606821484e-05, "loss": 2.0506, "step": 620500 }, { "epoch": 0.04, "learning_rate": 4.770003424393459e-05, "loss": 2.0202, "step": 621000 }, { "epoch": 0.04, "learning_rate": 4.769818241965435e-05, "loss": 2.0131, "step": 621500 }, { "epoch": 0.04, "learning_rate": 4.76963305953741e-05, "loss": 2.0382, "step": 622000 }, { "epoch": 0.04, "learning_rate": 4.769447877109385e-05, "loss": 1.9874, "step": 622500 }, { "epoch": 0.05, "learning_rate": 4.769262694681361e-05, "loss": 2.0775, "step": 623000 }, { "epoch": 0.05, "learning_rate": 4.7690775122533364e-05, "loss": 2.0465, "step": 623500 }, { "epoch": 0.05, "learning_rate": 4.768892329825312e-05, "loss": 2.0227, "step": 624000 }, { "epoch": 0.05, "learning_rate": 4.768707147397287e-05, "loss": 1.9616, "step": 624500 }, { "epoch": 0.05, "learning_rate": 4.768521964969263e-05, "loss": 1.9414, "step": 625000 }, { "epoch": 0.05, "learning_rate": 4.768336782541238e-05, "loss": 2.0177, "step": 625500 }, { "epoch": 0.05, "learning_rate": 4.768151600113213e-05, "loss": 2.1022, "step": 626000 }, { "epoch": 0.05, "learning_rate": 4.767966417685189e-05, "loss": 2.0752, "step": 626500 }, { "epoch": 0.05, "learning_rate": 4.767781235257164e-05, "loss": 2.0178, "step": 627000 }, { "epoch": 0.05, "learning_rate": 4.76759605282914e-05, "loss": 2.0748, "step": 627500 }, { "epoch": 0.05, "learning_rate": 4.767410870401115e-05, "loss": 2.0444, "step": 628000 }, { "epoch": 0.05, "learning_rate": 4.7672256879730906e-05, "loss": 2.0182, "step": 628500 }, { "epoch": 0.05, "learning_rate": 4.767040505545066e-05, "loss": 1.9869, "step": 629000 }, { "epoch": 0.05, "learning_rate": 4.766855323117041e-05, "loss": 2.0076, "step": 629500 }, { "epoch": 0.05, "learning_rate": 4.766670140689017e-05, "loss": 2.0308, "step": 630000 }, { "epoch": 0.05, "learning_rate": 4.766484958260992e-05, "loss": 2.0345, "step": 630500 }, { "epoch": 0.05, "learning_rate": 4.766299775832968e-05, "loss": 2.0501, "step": 631000 }, { "epoch": 0.05, "learning_rate": 4.766114593404943e-05, "loss": 2.0931, "step": 631500 }, { "epoch": 0.05, "learning_rate": 4.7659294109769185e-05, "loss": 2.0988, "step": 632000 }, { "epoch": 0.05, "learning_rate": 4.7657442285488935e-05, "loss": 2.0448, "step": 632500 }, { "epoch": 0.05, "learning_rate": 4.765559046120869e-05, "loss": 1.9611, "step": 633000 }, { "epoch": 0.05, "learning_rate": 4.765373863692845e-05, "loss": 2.0259, "step": 633500 }, { "epoch": 0.05, "learning_rate": 4.76518868126482e-05, "loss": 1.9305, "step": 634000 }, { "epoch": 0.05, "learning_rate": 4.7650034988367956e-05, "loss": 2.0294, "step": 634500 }, { "epoch": 0.05, "learning_rate": 4.7648183164087706e-05, "loss": 2.0582, "step": 635000 }, { "epoch": 0.05, "learning_rate": 4.7646331339807456e-05, "loss": 2.0443, "step": 635500 }, { "epoch": 0.05, "learning_rate": 4.764447951552721e-05, "loss": 2.0119, "step": 636000 }, { "epoch": 0.05, "learning_rate": 4.764262769124697e-05, "loss": 2.0091, "step": 636500 }, { "epoch": 0.05, "learning_rate": 4.764077586696673e-05, "loss": 1.9696, "step": 637000 }, { "epoch": 0.05, "learning_rate": 4.763892404268648e-05, "loss": 2.0121, "step": 637500 }, { "epoch": 0.05, "learning_rate": 4.7637072218406234e-05, "loss": 1.9961, "step": 638000 }, { "epoch": 0.05, "learning_rate": 4.7635220394125984e-05, "loss": 1.9558, "step": 638500 }, { "epoch": 0.05, "learning_rate": 4.7633368569845735e-05, "loss": 2.021, "step": 639000 }, { "epoch": 0.05, "learning_rate": 4.76315167455655e-05, "loss": 2.0348, "step": 639500 }, { "epoch": 0.05, "learning_rate": 4.762966492128525e-05, "loss": 2.019, "step": 640000 }, { "epoch": 0.05, "learning_rate": 4.7627813097005005e-05, "loss": 2.0637, "step": 640500 }, { "epoch": 0.05, "learning_rate": 4.7625961272724756e-05, "loss": 1.9666, "step": 641000 }, { "epoch": 0.05, "learning_rate": 4.762410944844451e-05, "loss": 2.0059, "step": 641500 }, { "epoch": 0.05, "learning_rate": 4.762225762416426e-05, "loss": 2.0479, "step": 642000 }, { "epoch": 0.05, "learning_rate": 4.762040579988402e-05, "loss": 2.0932, "step": 642500 }, { "epoch": 0.05, "learning_rate": 4.761855397560378e-05, "loss": 1.9762, "step": 643000 }, { "epoch": 0.05, "learning_rate": 4.761670215132353e-05, "loss": 2.0326, "step": 643500 }, { "epoch": 0.05, "learning_rate": 4.7614850327043284e-05, "loss": 2.0609, "step": 644000 }, { "epoch": 0.05, "learning_rate": 4.7612998502763034e-05, "loss": 2.0542, "step": 644500 }, { "epoch": 0.05, "learning_rate": 4.761114667848279e-05, "loss": 2.0496, "step": 645000 }, { "epoch": 0.05, "learning_rate": 4.760929485420254e-05, "loss": 2.0059, "step": 645500 }, { "epoch": 0.05, "learning_rate": 4.76074430299223e-05, "loss": 2.0539, "step": 646000 }, { "epoch": 0.05, "learning_rate": 4.7605591205642055e-05, "loss": 2.0313, "step": 646500 }, { "epoch": 0.05, "learning_rate": 4.7603739381361805e-05, "loss": 2.0165, "step": 647000 }, { "epoch": 0.05, "learning_rate": 4.760188755708156e-05, "loss": 2.014, "step": 647500 }, { "epoch": 0.05, "learning_rate": 4.760003573280131e-05, "loss": 2.0117, "step": 648000 }, { "epoch": 0.05, "learning_rate": 4.759818390852106e-05, "loss": 2.0378, "step": 648500 }, { "epoch": 0.05, "learning_rate": 4.7596332084240826e-05, "loss": 2.0072, "step": 649000 }, { "epoch": 0.05, "learning_rate": 4.7594480259960576e-05, "loss": 2.0517, "step": 649500 }, { "epoch": 0.05, "learning_rate": 4.759262843568033e-05, "loss": 2.0016, "step": 650000 }, { "epoch": 0.05, "learning_rate": 4.7590776611400083e-05, "loss": 2.018, "step": 650500 }, { "epoch": 0.05, "learning_rate": 4.758892478711984e-05, "loss": 2.0559, "step": 651000 }, { "epoch": 0.05, "learning_rate": 4.758707296283959e-05, "loss": 2.0775, "step": 651500 }, { "epoch": 0.05, "learning_rate": 4.758522113855935e-05, "loss": 2.0608, "step": 652000 }, { "epoch": 0.05, "learning_rate": 4.7583369314279104e-05, "loss": 2.0145, "step": 652500 }, { "epoch": 0.05, "learning_rate": 4.7581517489998855e-05, "loss": 2.0347, "step": 653000 }, { "epoch": 0.05, "learning_rate": 4.757966566571861e-05, "loss": 2.0073, "step": 653500 }, { "epoch": 0.05, "learning_rate": 4.757781384143836e-05, "loss": 1.9822, "step": 654000 }, { "epoch": 0.05, "learning_rate": 4.757596201715812e-05, "loss": 2.0566, "step": 654500 }, { "epoch": 0.05, "learning_rate": 4.757411019287787e-05, "loss": 2.0311, "step": 655000 }, { "epoch": 0.05, "learning_rate": 4.7572258368597626e-05, "loss": 2.0328, "step": 655500 }, { "epoch": 0.05, "learning_rate": 4.757040654431738e-05, "loss": 2.0276, "step": 656000 }, { "epoch": 0.05, "learning_rate": 4.756855472003713e-05, "loss": 1.9355, "step": 656500 }, { "epoch": 0.05, "learning_rate": 4.756670289575689e-05, "loss": 2.0245, "step": 657000 }, { "epoch": 0.05, "learning_rate": 4.756485107147664e-05, "loss": 2.0109, "step": 657500 }, { "epoch": 0.05, "learning_rate": 4.75629992471964e-05, "loss": 2.0427, "step": 658000 }, { "epoch": 0.05, "learning_rate": 4.7561147422916154e-05, "loss": 2.0446, "step": 658500 }, { "epoch": 0.05, "learning_rate": 4.7559295598635904e-05, "loss": 1.9809, "step": 659000 }, { "epoch": 0.05, "learning_rate": 4.755744377435566e-05, "loss": 2.0091, "step": 659500 }, { "epoch": 0.05, "learning_rate": 4.755559195007541e-05, "loss": 1.9967, "step": 660000 }, { "epoch": 0.05, "learning_rate": 4.755374012579517e-05, "loss": 2.1323, "step": 660500 }, { "epoch": 0.05, "learning_rate": 4.755188830151492e-05, "loss": 2.0253, "step": 661000 }, { "epoch": 0.05, "learning_rate": 4.755003647723467e-05, "loss": 2.0328, "step": 661500 }, { "epoch": 0.05, "learning_rate": 4.754818465295443e-05, "loss": 2.0563, "step": 662000 }, { "epoch": 0.05, "learning_rate": 4.754633282867418e-05, "loss": 2.0115, "step": 662500 }, { "epoch": 0.05, "learning_rate": 4.754448100439394e-05, "loss": 1.9977, "step": 663000 }, { "epoch": 0.05, "learning_rate": 4.754262918011369e-05, "loss": 1.9745, "step": 663500 }, { "epoch": 0.05, "learning_rate": 4.7540777355833446e-05, "loss": 2.043, "step": 664000 }, { "epoch": 0.05, "learning_rate": 4.7538925531553197e-05, "loss": 2.0064, "step": 664500 }, { "epoch": 0.05, "learning_rate": 4.7537073707272954e-05, "loss": 2.0269, "step": 665000 }, { "epoch": 0.05, "learning_rate": 4.753522188299271e-05, "loss": 1.9855, "step": 665500 }, { "epoch": 0.05, "learning_rate": 4.753337005871246e-05, "loss": 2.0638, "step": 666000 }, { "epoch": 0.05, "learning_rate": 4.753151823443222e-05, "loss": 2.0233, "step": 666500 }, { "epoch": 0.05, "learning_rate": 4.752966641015197e-05, "loss": 2.0178, "step": 667000 }, { "epoch": 0.05, "learning_rate": 4.7527814585871725e-05, "loss": 2.0099, "step": 667500 }, { "epoch": 0.06, "learning_rate": 4.752596276159148e-05, "loss": 2.0214, "step": 668000 }, { "epoch": 0.06, "learning_rate": 4.752411093731123e-05, "loss": 2.0505, "step": 668500 }, { "epoch": 0.06, "learning_rate": 4.752225911303099e-05, "loss": 2.0089, "step": 669000 }, { "epoch": 0.06, "learning_rate": 4.752040728875074e-05, "loss": 2.0446, "step": 669500 }, { "epoch": 0.06, "learning_rate": 4.7518555464470496e-05, "loss": 1.991, "step": 670000 }, { "epoch": 0.06, "learning_rate": 4.7516703640190246e-05, "loss": 1.974, "step": 670500 }, { "epoch": 0.06, "learning_rate": 4.751485181591e-05, "loss": 1.9898, "step": 671000 }, { "epoch": 0.06, "learning_rate": 4.751299999162976e-05, "loss": 2.0428, "step": 671500 }, { "epoch": 0.06, "learning_rate": 4.751114816734951e-05, "loss": 2.0752, "step": 672000 }, { "epoch": 0.06, "learning_rate": 4.750929634306927e-05, "loss": 1.9674, "step": 672500 }, { "epoch": 0.06, "learning_rate": 4.750744451878902e-05, "loss": 2.0534, "step": 673000 }, { "epoch": 0.06, "learning_rate": 4.7505592694508774e-05, "loss": 1.9736, "step": 673500 }, { "epoch": 0.06, "learning_rate": 4.7503740870228524e-05, "loss": 2.0287, "step": 674000 }, { "epoch": 0.06, "learning_rate": 4.750188904594828e-05, "loss": 2.0177, "step": 674500 }, { "epoch": 0.06, "learning_rate": 4.750003722166804e-05, "loss": 2.0582, "step": 675000 }, { "epoch": 0.06, "learning_rate": 4.749818539738779e-05, "loss": 2.0747, "step": 675500 }, { "epoch": 0.06, "learning_rate": 4.7496333573107545e-05, "loss": 2.0076, "step": 676000 }, { "epoch": 0.06, "learning_rate": 4.7494481748827296e-05, "loss": 1.9876, "step": 676500 }, { "epoch": 0.06, "learning_rate": 4.749262992454705e-05, "loss": 1.9917, "step": 677000 }, { "epoch": 0.06, "learning_rate": 4.749077810026681e-05, "loss": 2.0341, "step": 677500 }, { "epoch": 0.06, "learning_rate": 4.748892627598656e-05, "loss": 2.0301, "step": 678000 }, { "epoch": 0.06, "learning_rate": 4.7487074451706317e-05, "loss": 2.0224, "step": 678500 }, { "epoch": 0.06, "learning_rate": 4.748522262742607e-05, "loss": 2.03, "step": 679000 }, { "epoch": 0.06, "learning_rate": 4.7483370803145824e-05, "loss": 1.9859, "step": 679500 }, { "epoch": 0.06, "learning_rate": 4.7481518978865574e-05, "loss": 1.9941, "step": 680000 }, { "epoch": 0.06, "learning_rate": 4.747966715458533e-05, "loss": 1.9824, "step": 680500 }, { "epoch": 0.06, "learning_rate": 4.747781533030509e-05, "loss": 1.98, "step": 681000 }, { "epoch": 0.06, "learning_rate": 4.747596350602484e-05, "loss": 1.9798, "step": 681500 }, { "epoch": 0.06, "learning_rate": 4.7474111681744595e-05, "loss": 2.0378, "step": 682000 }, { "epoch": 0.06, "learning_rate": 4.7472259857464345e-05, "loss": 1.9793, "step": 682500 }, { "epoch": 0.06, "learning_rate": 4.74704080331841e-05, "loss": 2.0431, "step": 683000 }, { "epoch": 0.06, "learning_rate": 4.746855620890385e-05, "loss": 2.0299, "step": 683500 }, { "epoch": 0.06, "learning_rate": 4.746670438462361e-05, "loss": 2.0331, "step": 684000 }, { "epoch": 0.06, "learning_rate": 4.7464852560343366e-05, "loss": 2.0741, "step": 684500 }, { "epoch": 0.06, "learning_rate": 4.7463000736063116e-05, "loss": 1.9826, "step": 685000 }, { "epoch": 0.06, "learning_rate": 4.746114891178287e-05, "loss": 2.0529, "step": 685500 }, { "epoch": 0.06, "learning_rate": 4.745929708750262e-05, "loss": 2.0303, "step": 686000 }, { "epoch": 0.06, "learning_rate": 4.745744526322238e-05, "loss": 1.9617, "step": 686500 }, { "epoch": 0.06, "learning_rate": 4.745559343894213e-05, "loss": 2.0406, "step": 687000 }, { "epoch": 0.06, "learning_rate": 4.745374161466189e-05, "loss": 2.0287, "step": 687500 }, { "epoch": 0.06, "learning_rate": 4.7451889790381644e-05, "loss": 2.0281, "step": 688000 }, { "epoch": 0.06, "learning_rate": 4.7450037966101394e-05, "loss": 1.9655, "step": 688500 }, { "epoch": 0.06, "learning_rate": 4.744818614182115e-05, "loss": 1.9935, "step": 689000 }, { "epoch": 0.06, "learning_rate": 4.74463343175409e-05, "loss": 1.9347, "step": 689500 }, { "epoch": 0.06, "learning_rate": 4.744448249326066e-05, "loss": 2.0455, "step": 690000 }, { "epoch": 0.06, "learning_rate": 4.7442630668980416e-05, "loss": 2.0223, "step": 690500 }, { "epoch": 0.06, "learning_rate": 4.7440778844700166e-05, "loss": 1.9986, "step": 691000 }, { "epoch": 0.06, "learning_rate": 4.743892702041992e-05, "loss": 1.9804, "step": 691500 }, { "epoch": 0.06, "learning_rate": 4.743707519613967e-05, "loss": 1.9666, "step": 692000 }, { "epoch": 0.06, "learning_rate": 4.743522337185943e-05, "loss": 2.0033, "step": 692500 }, { "epoch": 0.06, "learning_rate": 4.743337154757918e-05, "loss": 2.0306, "step": 693000 }, { "epoch": 0.06, "learning_rate": 4.743151972329894e-05, "loss": 2.0487, "step": 693500 }, { "epoch": 0.06, "learning_rate": 4.7429667899018694e-05, "loss": 2.014, "step": 694000 }, { "epoch": 0.06, "learning_rate": 4.7427816074738444e-05, "loss": 2.1064, "step": 694500 }, { "epoch": 0.06, "learning_rate": 4.74259642504582e-05, "loss": 2.0571, "step": 695000 }, { "epoch": 0.06, "learning_rate": 4.742411242617795e-05, "loss": 2.0911, "step": 695500 }, { "epoch": 0.06, "learning_rate": 4.742226060189771e-05, "loss": 2.04, "step": 696000 }, { "epoch": 0.06, "learning_rate": 4.742040877761746e-05, "loss": 2.0139, "step": 696500 }, { "epoch": 0.06, "learning_rate": 4.7418556953337215e-05, "loss": 1.9906, "step": 697000 }, { "epoch": 0.06, "learning_rate": 4.741670512905697e-05, "loss": 2.0458, "step": 697500 }, { "epoch": 0.06, "learning_rate": 4.741485330477672e-05, "loss": 2.0134, "step": 698000 }, { "epoch": 0.06, "learning_rate": 4.741300148049648e-05, "loss": 2.0296, "step": 698500 }, { "epoch": 0.06, "learning_rate": 4.741114965621623e-05, "loss": 2.0438, "step": 699000 }, { "epoch": 0.06, "learning_rate": 4.7409297831935986e-05, "loss": 2.0439, "step": 699500 }, { "epoch": 0.06, "learning_rate": 4.740744600765574e-05, "loss": 2.12, "step": 700000 }, { "epoch": 0.06, "learning_rate": 4.7405594183375493e-05, "loss": 2.0337, "step": 700500 }, { "epoch": 0.06, "learning_rate": 4.740374235909525e-05, "loss": 2.0302, "step": 701000 }, { "epoch": 0.06, "learning_rate": 4.7401890534815e-05, "loss": 2.0418, "step": 701500 }, { "epoch": 0.06, "learning_rate": 4.740003871053476e-05, "loss": 2.0212, "step": 702000 }, { "epoch": 0.06, "learning_rate": 4.739818688625451e-05, "loss": 1.9915, "step": 702500 }, { "epoch": 0.06, "learning_rate": 4.7396335061974265e-05, "loss": 2.0892, "step": 703000 }, { "epoch": 0.06, "learning_rate": 4.739448323769402e-05, "loss": 1.9904, "step": 703500 }, { "epoch": 0.06, "learning_rate": 4.739263141341377e-05, "loss": 2.1306, "step": 704000 }, { "epoch": 0.06, "learning_rate": 4.739077958913353e-05, "loss": 2.1016, "step": 704500 }, { "epoch": 0.06, "learning_rate": 4.738892776485328e-05, "loss": 2.0034, "step": 705000 }, { "epoch": 0.06, "learning_rate": 4.7387075940573036e-05, "loss": 1.9966, "step": 705500 }, { "epoch": 0.06, "learning_rate": 4.7385224116292786e-05, "loss": 2.0254, "step": 706000 }, { "epoch": 0.06, "learning_rate": 4.738337229201254e-05, "loss": 2.0833, "step": 706500 }, { "epoch": 0.06, "learning_rate": 4.73815204677323e-05, "loss": 2.0133, "step": 707000 }, { "epoch": 0.06, "learning_rate": 4.737966864345205e-05, "loss": 1.9995, "step": 707500 }, { "epoch": 0.06, "learning_rate": 4.737781681917181e-05, "loss": 2.0442, "step": 708000 }, { "epoch": 0.06, "learning_rate": 4.737596499489156e-05, "loss": 2.071, "step": 708500 }, { "epoch": 0.06, "learning_rate": 4.7374113170611314e-05, "loss": 2.0073, "step": 709000 }, { "epoch": 0.06, "learning_rate": 4.737226134633107e-05, "loss": 1.9698, "step": 709500 }, { "epoch": 0.06, "learning_rate": 4.737040952205082e-05, "loss": 1.9787, "step": 710000 }, { "epoch": 0.06, "learning_rate": 4.736855769777058e-05, "loss": 2.0202, "step": 710500 }, { "epoch": 0.06, "learning_rate": 4.736670587349033e-05, "loss": 2.0006, "step": 711000 }, { "epoch": 0.06, "learning_rate": 4.7364854049210085e-05, "loss": 2.0398, "step": 711500 }, { "epoch": 0.06, "learning_rate": 4.7363002224929835e-05, "loss": 1.9721, "step": 712000 }, { "epoch": 0.06, "learning_rate": 4.736115040064959e-05, "loss": 2.0857, "step": 712500 }, { "epoch": 0.07, "learning_rate": 4.735929857636935e-05, "loss": 2.0813, "step": 713000 }, { "epoch": 0.07, "learning_rate": 4.73574467520891e-05, "loss": 2.0385, "step": 713500 }, { "epoch": 0.07, "learning_rate": 4.7355594927808856e-05, "loss": 2.0053, "step": 714000 }, { "epoch": 0.07, "learning_rate": 4.735374310352861e-05, "loss": 2.0042, "step": 714500 }, { "epoch": 0.07, "learning_rate": 4.7351891279248364e-05, "loss": 2.027, "step": 715000 }, { "epoch": 0.07, "learning_rate": 4.7350039454968114e-05, "loss": 1.933, "step": 715500 }, { "epoch": 0.07, "learning_rate": 4.734818763068787e-05, "loss": 2.0986, "step": 716000 }, { "epoch": 0.07, "learning_rate": 4.734633580640763e-05, "loss": 1.9804, "step": 716500 }, { "epoch": 0.07, "learning_rate": 4.734448398212738e-05, "loss": 2.0332, "step": 717000 }, { "epoch": 0.07, "learning_rate": 4.7342632157847135e-05, "loss": 1.9209, "step": 717500 }, { "epoch": 0.07, "learning_rate": 4.7340780333566885e-05, "loss": 2.0463, "step": 718000 }, { "epoch": 0.07, "learning_rate": 4.733892850928664e-05, "loss": 2.003, "step": 718500 }, { "epoch": 0.07, "learning_rate": 4.73370766850064e-05, "loss": 1.9589, "step": 719000 }, { "epoch": 0.07, "learning_rate": 4.733522486072615e-05, "loss": 2.066, "step": 719500 }, { "epoch": 0.07, "learning_rate": 4.7333373036445906e-05, "loss": 2.0514, "step": 720000 }, { "epoch": 0.07, "learning_rate": 4.7331521212165656e-05, "loss": 2.042, "step": 720500 }, { "epoch": 0.07, "learning_rate": 4.732966938788541e-05, "loss": 2.0349, "step": 721000 }, { "epoch": 0.07, "learning_rate": 4.732781756360516e-05, "loss": 1.9522, "step": 721500 }, { "epoch": 0.07, "learning_rate": 4.732596573932492e-05, "loss": 2.0051, "step": 722000 }, { "epoch": 0.07, "learning_rate": 4.732411391504468e-05, "loss": 1.9622, "step": 722500 }, { "epoch": 0.07, "learning_rate": 4.732226209076443e-05, "loss": 1.9827, "step": 723000 }, { "epoch": 0.07, "learning_rate": 4.7320410266484184e-05, "loss": 2.0155, "step": 723500 }, { "epoch": 0.07, "learning_rate": 4.7318558442203934e-05, "loss": 2.016, "step": 724000 }, { "epoch": 0.07, "learning_rate": 4.731670661792369e-05, "loss": 1.9934, "step": 724500 }, { "epoch": 0.07, "learning_rate": 4.731485479364344e-05, "loss": 2.0207, "step": 725000 }, { "epoch": 0.07, "learning_rate": 4.73130029693632e-05, "loss": 2.0133, "step": 725500 }, { "epoch": 0.07, "learning_rate": 4.7311151145082955e-05, "loss": 2.0223, "step": 726000 }, { "epoch": 0.07, "learning_rate": 4.7309299320802706e-05, "loss": 2.0113, "step": 726500 }, { "epoch": 0.07, "learning_rate": 4.730744749652246e-05, "loss": 1.9765, "step": 727000 }, { "epoch": 0.07, "learning_rate": 4.730559567224221e-05, "loss": 2.0156, "step": 727500 }, { "epoch": 0.07, "learning_rate": 4.730374384796197e-05, "loss": 2.0305, "step": 728000 }, { "epoch": 0.07, "learning_rate": 4.730189202368173e-05, "loss": 1.9378, "step": 728500 }, { "epoch": 0.07, "learning_rate": 4.730004019940148e-05, "loss": 2.0449, "step": 729000 }, { "epoch": 0.07, "learning_rate": 4.7298188375121234e-05, "loss": 1.9851, "step": 729500 }, { "epoch": 0.07, "learning_rate": 4.7296336550840984e-05, "loss": 1.9278, "step": 730000 }, { "epoch": 0.07, "learning_rate": 4.729448472656074e-05, "loss": 1.9945, "step": 730500 }, { "epoch": 0.07, "learning_rate": 4.729263290228049e-05, "loss": 1.9841, "step": 731000 }, { "epoch": 0.07, "learning_rate": 4.729078107800025e-05, "loss": 2.0696, "step": 731500 }, { "epoch": 0.07, "learning_rate": 4.7288929253720005e-05, "loss": 1.9924, "step": 732000 }, { "epoch": 0.07, "learning_rate": 4.7287077429439755e-05, "loss": 1.954, "step": 732500 }, { "epoch": 0.07, "learning_rate": 4.728522560515951e-05, "loss": 1.9555, "step": 733000 }, { "epoch": 0.07, "learning_rate": 4.728337378087926e-05, "loss": 1.9455, "step": 733500 }, { "epoch": 0.07, "learning_rate": 4.728152195659902e-05, "loss": 2.0125, "step": 734000 }, { "epoch": 0.07, "learning_rate": 4.727967013231877e-05, "loss": 2.0331, "step": 734500 }, { "epoch": 0.07, "learning_rate": 4.7277818308038526e-05, "loss": 1.9452, "step": 735000 }, { "epoch": 0.07, "learning_rate": 4.727596648375828e-05, "loss": 2.0267, "step": 735500 }, { "epoch": 0.07, "learning_rate": 4.727411465947803e-05, "loss": 2.0673, "step": 736000 }, { "epoch": 0.07, "learning_rate": 4.727226283519779e-05, "loss": 2.0823, "step": 736500 }, { "epoch": 0.07, "learning_rate": 4.727041101091754e-05, "loss": 2.0305, "step": 737000 }, { "epoch": 0.07, "learning_rate": 4.72685591866373e-05, "loss": 2.025, "step": 737500 }, { "epoch": 0.07, "learning_rate": 4.726670736235705e-05, "loss": 2.0237, "step": 738000 }, { "epoch": 0.07, "learning_rate": 4.7264855538076805e-05, "loss": 2.0442, "step": 738500 }, { "epoch": 0.07, "learning_rate": 4.726300371379656e-05, "loss": 1.998, "step": 739000 }, { "epoch": 0.07, "learning_rate": 4.726115188951631e-05, "loss": 1.9642, "step": 739500 }, { "epoch": 0.07, "learning_rate": 4.725930006523607e-05, "loss": 1.9426, "step": 740000 }, { "epoch": 0.07, "learning_rate": 4.725744824095582e-05, "loss": 2.0887, "step": 740500 }, { "epoch": 0.07, "learning_rate": 4.7255596416675576e-05, "loss": 2.0098, "step": 741000 }, { "epoch": 0.07, "learning_rate": 4.725374459239533e-05, "loss": 1.9978, "step": 741500 }, { "epoch": 0.07, "learning_rate": 4.725189276811508e-05, "loss": 2.0871, "step": 742000 }, { "epoch": 0.07, "learning_rate": 4.725004094383484e-05, "loss": 1.9859, "step": 742500 }, { "epoch": 0.07, "learning_rate": 4.724818911955459e-05, "loss": 2.0032, "step": 743000 }, { "epoch": 0.07, "learning_rate": 4.724633729527435e-05, "loss": 2.057, "step": 743500 }, { "epoch": 0.07, "learning_rate": 4.72444854709941e-05, "loss": 2.0079, "step": 744000 }, { "epoch": 0.07, "learning_rate": 4.724263364671386e-05, "loss": 2.0698, "step": 744500 }, { "epoch": 0.07, "learning_rate": 4.724078182243361e-05, "loss": 1.9859, "step": 745000 }, { "epoch": 0.07, "learning_rate": 4.723892999815336e-05, "loss": 1.9798, "step": 745500 }, { "epoch": 0.07, "learning_rate": 4.723707817387312e-05, "loss": 1.9868, "step": 746000 }, { "epoch": 0.07, "learning_rate": 4.723522634959287e-05, "loss": 1.9978, "step": 746500 }, { "epoch": 0.07, "learning_rate": 4.7233374525312625e-05, "loss": 2.0636, "step": 747000 }, { "epoch": 0.07, "learning_rate": 4.7231522701032375e-05, "loss": 2.0346, "step": 747500 }, { "epoch": 0.07, "learning_rate": 4.722967087675213e-05, "loss": 1.9638, "step": 748000 }, { "epoch": 0.07, "learning_rate": 4.722781905247189e-05, "loss": 1.9988, "step": 748500 }, { "epoch": 0.07, "learning_rate": 4.722596722819164e-05, "loss": 1.9167, "step": 749000 }, { "epoch": 0.07, "learning_rate": 4.7224115403911396e-05, "loss": 1.9686, "step": 749500 }, { "epoch": 0.07, "learning_rate": 4.7222263579631147e-05, "loss": 2.0086, "step": 750000 }, { "epoch": 0.07, "learning_rate": 4.7220411755350904e-05, "loss": 1.9991, "step": 750500 }, { "epoch": 0.07, "learning_rate": 4.721855993107066e-05, "loss": 2.0061, "step": 751000 }, { "epoch": 0.07, "learning_rate": 4.721670810679041e-05, "loss": 2.0566, "step": 751500 }, { "epoch": 0.07, "learning_rate": 4.721485628251017e-05, "loss": 2.0546, "step": 752000 }, { "epoch": 0.07, "learning_rate": 4.721300445822992e-05, "loss": 2.0482, "step": 752500 }, { "epoch": 0.07, "learning_rate": 4.7211152633949675e-05, "loss": 2.0236, "step": 753000 }, { "epoch": 0.07, "learning_rate": 4.7209300809669425e-05, "loss": 1.9986, "step": 753500 }, { "epoch": 0.07, "learning_rate": 4.720744898538919e-05, "loss": 2.0081, "step": 754000 }, { "epoch": 0.07, "learning_rate": 4.720559716110894e-05, "loss": 2.0366, "step": 754500 }, { "epoch": 0.07, "learning_rate": 4.720374533682869e-05, "loss": 1.9745, "step": 755000 }, { "epoch": 0.07, "learning_rate": 4.7201893512548446e-05, "loss": 2.0764, "step": 755500 }, { "epoch": 0.07, "learning_rate": 4.7200041688268196e-05, "loss": 2.0012, "step": 756000 }, { "epoch": 0.07, "learning_rate": 4.719818986398795e-05, "loss": 1.9298, "step": 756500 }, { "epoch": 0.07, "learning_rate": 4.71963380397077e-05, "loss": 1.9376, "step": 757000 }, { "epoch": 0.07, "learning_rate": 4.719448621542747e-05, "loss": 1.9849, "step": 757500 }, { "epoch": 0.08, "learning_rate": 4.719263439114722e-05, "loss": 2.0167, "step": 758000 }, { "epoch": 0.08, "learning_rate": 4.719078256686697e-05, "loss": 1.9952, "step": 758500 }, { "epoch": 0.08, "learning_rate": 4.7188930742586724e-05, "loss": 2.0323, "step": 759000 }, { "epoch": 0.08, "learning_rate": 4.7187078918306474e-05, "loss": 1.9649, "step": 759500 }, { "epoch": 0.08, "learning_rate": 4.718522709402623e-05, "loss": 1.9798, "step": 760000 }, { "epoch": 0.08, "learning_rate": 4.718337526974599e-05, "loss": 1.9652, "step": 760500 }, { "epoch": 0.08, "learning_rate": 4.718152344546574e-05, "loss": 1.9687, "step": 761000 }, { "epoch": 0.08, "learning_rate": 4.7179671621185495e-05, "loss": 2.0004, "step": 761500 }, { "epoch": 0.08, "learning_rate": 4.7177819796905246e-05, "loss": 1.9736, "step": 762000 }, { "epoch": 0.08, "learning_rate": 4.7175967972625e-05, "loss": 2.0267, "step": 762500 }, { "epoch": 0.08, "learning_rate": 4.717411614834475e-05, "loss": 2.0713, "step": 763000 }, { "epoch": 0.08, "learning_rate": 4.717226432406451e-05, "loss": 1.9763, "step": 763500 }, { "epoch": 0.08, "learning_rate": 4.7170412499784267e-05, "loss": 1.9805, "step": 764000 }, { "epoch": 0.08, "learning_rate": 4.716856067550402e-05, "loss": 1.9565, "step": 764500 }, { "epoch": 0.08, "learning_rate": 4.7166708851223774e-05, "loss": 2.0273, "step": 765000 }, { "epoch": 0.08, "learning_rate": 4.7164857026943524e-05, "loss": 1.9555, "step": 765500 }, { "epoch": 0.08, "learning_rate": 4.716300520266328e-05, "loss": 1.9901, "step": 766000 }, { "epoch": 0.08, "learning_rate": 4.716115337838303e-05, "loss": 1.9322, "step": 766500 }, { "epoch": 0.08, "learning_rate": 4.7159301554102795e-05, "loss": 1.9818, "step": 767000 }, { "epoch": 0.08, "learning_rate": 4.7157449729822545e-05, "loss": 1.991, "step": 767500 }, { "epoch": 0.08, "learning_rate": 4.7155597905542295e-05, "loss": 1.9674, "step": 768000 }, { "epoch": 0.08, "learning_rate": 4.715374608126205e-05, "loss": 2.051, "step": 768500 }, { "epoch": 0.08, "learning_rate": 4.71518942569818e-05, "loss": 1.978, "step": 769000 }, { "epoch": 0.08, "learning_rate": 4.715004243270156e-05, "loss": 1.9921, "step": 769500 }, { "epoch": 0.08, "learning_rate": 4.7148190608421316e-05, "loss": 1.9828, "step": 770000 }, { "epoch": 0.08, "learning_rate": 4.714633878414107e-05, "loss": 1.9944, "step": 770500 }, { "epoch": 0.08, "learning_rate": 4.714448695986082e-05, "loss": 2.0489, "step": 771000 }, { "epoch": 0.08, "learning_rate": 4.714263513558057e-05, "loss": 1.9693, "step": 771500 }, { "epoch": 0.08, "learning_rate": 4.714078331130033e-05, "loss": 2.0434, "step": 772000 }, { "epoch": 0.08, "learning_rate": 4.713893148702008e-05, "loss": 2.0486, "step": 772500 }, { "epoch": 0.08, "learning_rate": 4.713707966273984e-05, "loss": 1.9808, "step": 773000 }, { "epoch": 0.08, "learning_rate": 4.7135227838459594e-05, "loss": 2.0136, "step": 773500 }, { "epoch": 0.08, "learning_rate": 4.713337601417935e-05, "loss": 2.0469, "step": 774000 }, { "epoch": 0.08, "learning_rate": 4.71315241898991e-05, "loss": 2.0264, "step": 774500 }, { "epoch": 0.08, "learning_rate": 4.712967236561885e-05, "loss": 1.9639, "step": 775000 }, { "epoch": 0.08, "learning_rate": 4.712782054133861e-05, "loss": 1.9759, "step": 775500 }, { "epoch": 0.08, "learning_rate": 4.712596871705836e-05, "loss": 2.0106, "step": 776000 }, { "epoch": 0.08, "learning_rate": 4.712411689277812e-05, "loss": 2.008, "step": 776500 }, { "epoch": 0.08, "learning_rate": 4.712226506849787e-05, "loss": 2.0776, "step": 777000 }, { "epoch": 0.08, "learning_rate": 4.712041324421762e-05, "loss": 2.1293, "step": 777500 }, { "epoch": 0.08, "learning_rate": 4.711856141993738e-05, "loss": 2.0102, "step": 778000 }, { "epoch": 0.08, "learning_rate": 4.711670959565713e-05, "loss": 2.0072, "step": 778500 }, { "epoch": 0.08, "learning_rate": 4.711485777137689e-05, "loss": 2.1087, "step": 779000 }, { "epoch": 0.08, "learning_rate": 4.7113005947096644e-05, "loss": 1.9805, "step": 779500 }, { "epoch": 0.08, "learning_rate": 4.71111541228164e-05, "loss": 2.0418, "step": 780000 }, { "epoch": 0.08, "learning_rate": 4.710930229853615e-05, "loss": 2.0132, "step": 780500 }, { "epoch": 0.08, "learning_rate": 4.71074504742559e-05, "loss": 2.0929, "step": 781000 }, { "epoch": 0.08, "learning_rate": 4.710559864997566e-05, "loss": 2.0832, "step": 781500 }, { "epoch": 0.08, "learning_rate": 4.710374682569541e-05, "loss": 2.0905, "step": 782000 }, { "epoch": 0.08, "learning_rate": 4.7101895001415165e-05, "loss": 1.9694, "step": 782500 }, { "epoch": 0.08, "learning_rate": 4.710004317713492e-05, "loss": 2.0284, "step": 783000 }, { "epoch": 0.08, "learning_rate": 4.709819135285468e-05, "loss": 2.0593, "step": 783500 }, { "epoch": 0.08, "learning_rate": 4.709633952857443e-05, "loss": 2.0327, "step": 784000 }, { "epoch": 0.08, "learning_rate": 4.709448770429418e-05, "loss": 2.0788, "step": 784500 }, { "epoch": 0.08, "learning_rate": 4.7092635880013936e-05, "loss": 2.0705, "step": 785000 }, { "epoch": 0.08, "learning_rate": 4.7090784055733686e-05, "loss": 2.0316, "step": 785500 }, { "epoch": 0.08, "learning_rate": 4.708893223145345e-05, "loss": 2.0512, "step": 786000 }, { "epoch": 0.08, "learning_rate": 4.70870804071732e-05, "loss": 2.1205, "step": 786500 }, { "epoch": 0.08, "learning_rate": 4.708522858289296e-05, "loss": 2.0344, "step": 787000 }, { "epoch": 0.08, "learning_rate": 4.708337675861271e-05, "loss": 2.0452, "step": 787500 }, { "epoch": 0.08, "learning_rate": 4.708152493433246e-05, "loss": 2.0642, "step": 788000 }, { "epoch": 0.08, "learning_rate": 4.7079673110052215e-05, "loss": 2.0969, "step": 788500 }, { "epoch": 0.08, "learning_rate": 4.7077821285771965e-05, "loss": 2.0998, "step": 789000 }, { "epoch": 0.08, "learning_rate": 4.707596946149173e-05, "loss": 2.0191, "step": 789500 }, { "epoch": 0.08, "learning_rate": 4.707411763721148e-05, "loss": 2.0373, "step": 790000 }, { "epoch": 0.08, "learning_rate": 4.707226581293123e-05, "loss": 2.0556, "step": 790500 }, { "epoch": 0.08, "learning_rate": 4.7070413988650986e-05, "loss": 2.0802, "step": 791000 }, { "epoch": 0.08, "learning_rate": 4.7068562164370736e-05, "loss": 2.0375, "step": 791500 }, { "epoch": 0.08, "learning_rate": 4.706671034009049e-05, "loss": 2.0806, "step": 792000 }, { "epoch": 0.08, "learning_rate": 4.706485851581025e-05, "loss": 2.1036, "step": 792500 }, { "epoch": 0.08, "learning_rate": 4.706300669153001e-05, "loss": 2.0088, "step": 793000 }, { "epoch": 0.08, "learning_rate": 4.706115486724976e-05, "loss": 2.0511, "step": 793500 }, { "epoch": 0.08, "learning_rate": 4.705930304296951e-05, "loss": 2.0661, "step": 794000 }, { "epoch": 0.08, "learning_rate": 4.7057451218689264e-05, "loss": 2.0636, "step": 794500 }, { "epoch": 0.08, "learning_rate": 4.7055599394409014e-05, "loss": 2.0155, "step": 795000 }, { "epoch": 0.08, "learning_rate": 4.705374757012878e-05, "loss": 2.0469, "step": 795500 }, { "epoch": 0.08, "learning_rate": 4.705189574584853e-05, "loss": 2.0243, "step": 796000 }, { "epoch": 0.08, "learning_rate": 4.7050043921568285e-05, "loss": 1.9999, "step": 796500 }, { "epoch": 0.08, "learning_rate": 4.7048192097288035e-05, "loss": 2.0995, "step": 797000 }, { "epoch": 0.08, "learning_rate": 4.7046340273007785e-05, "loss": 1.9903, "step": 797500 }, { "epoch": 0.08, "learning_rate": 4.704448844872754e-05, "loss": 2.0851, "step": 798000 }, { "epoch": 0.08, "learning_rate": 4.704263662444729e-05, "loss": 2.0155, "step": 798500 }, { "epoch": 0.08, "learning_rate": 4.7040784800167056e-05, "loss": 2.0831, "step": 799000 }, { "epoch": 0.08, "learning_rate": 4.7038932975886806e-05, "loss": 2.0779, "step": 799500 }, { "epoch": 0.08, "learning_rate": 4.7037081151606563e-05, "loss": 2.0864, "step": 800000 }, { "epoch": 0.08, "learning_rate": 4.7035229327326314e-05, "loss": 2.0051, "step": 800500 }, { "epoch": 0.08, "learning_rate": 4.7033377503046064e-05, "loss": 2.0414, "step": 801000 }, { "epoch": 0.08, "learning_rate": 4.703152567876582e-05, "loss": 2.027, "step": 801500 }, { "epoch": 0.08, "learning_rate": 4.702967385448558e-05, "loss": 2.0134, "step": 802000 }, { "epoch": 0.08, "learning_rate": 4.7027822030205335e-05, "loss": 2.0272, "step": 802500 }, { "epoch": 0.09, "learning_rate": 4.7025970205925085e-05, "loss": 2.0365, "step": 803000 }, { "epoch": 0.09, "learning_rate": 4.7024118381644835e-05, "loss": 2.086, "step": 803500 }, { "epoch": 0.09, "learning_rate": 4.702226655736459e-05, "loss": 2.0551, "step": 804000 }, { "epoch": 0.09, "learning_rate": 4.702041473308434e-05, "loss": 2.1284, "step": 804500 }, { "epoch": 0.09, "learning_rate": 4.7018562908804106e-05, "loss": 2.0559, "step": 805000 }, { "epoch": 0.09, "learning_rate": 4.7016711084523856e-05, "loss": 2.0499, "step": 805500 }, { "epoch": 0.09, "learning_rate": 4.701485926024361e-05, "loss": 2.0513, "step": 806000 }, { "epoch": 0.09, "learning_rate": 4.701300743596336e-05, "loss": 2.0434, "step": 806500 }, { "epoch": 0.09, "learning_rate": 4.701115561168311e-05, "loss": 2.1082, "step": 807000 }, { "epoch": 0.09, "learning_rate": 4.700930378740287e-05, "loss": 2.0744, "step": 807500 }, { "epoch": 0.09, "learning_rate": 4.700745196312262e-05, "loss": 2.0394, "step": 808000 }, { "epoch": 0.09, "learning_rate": 4.7005600138842384e-05, "loss": 2.0527, "step": 808500 }, { "epoch": 0.09, "learning_rate": 4.7003748314562134e-05, "loss": 2.0863, "step": 809000 }, { "epoch": 0.09, "learning_rate": 4.700189649028189e-05, "loss": 2.0699, "step": 809500 }, { "epoch": 0.09, "learning_rate": 4.700004466600164e-05, "loss": 2.017, "step": 810000 }, { "epoch": 0.09, "learning_rate": 4.699819284172139e-05, "loss": 2.06, "step": 810500 }, { "epoch": 0.09, "learning_rate": 4.699634101744115e-05, "loss": 2.1166, "step": 811000 }, { "epoch": 0.09, "learning_rate": 4.6994489193160905e-05, "loss": 2.1231, "step": 811500 }, { "epoch": 0.09, "learning_rate": 4.699263736888066e-05, "loss": 2.0806, "step": 812000 }, { "epoch": 0.09, "learning_rate": 4.699078554460041e-05, "loss": 2.0696, "step": 812500 }, { "epoch": 0.09, "learning_rate": 4.698893372032017e-05, "loss": 2.054, "step": 813000 }, { "epoch": 0.09, "learning_rate": 4.698708189603992e-05, "loss": 1.9506, "step": 813500 }, { "epoch": 0.09, "learning_rate": 4.698523007175967e-05, "loss": 2.1082, "step": 814000 }, { "epoch": 0.09, "learning_rate": 4.698337824747943e-05, "loss": 2.0141, "step": 814500 }, { "epoch": 0.09, "learning_rate": 4.6981526423199184e-05, "loss": 2.1588, "step": 815000 }, { "epoch": 0.09, "learning_rate": 4.697967459891894e-05, "loss": 2.0799, "step": 815500 }, { "epoch": 0.09, "learning_rate": 4.697782277463869e-05, "loss": 2.0682, "step": 816000 }, { "epoch": 0.09, "learning_rate": 4.697597095035845e-05, "loss": 2.0533, "step": 816500 }, { "epoch": 0.09, "learning_rate": 4.69741191260782e-05, "loss": 2.0551, "step": 817000 }, { "epoch": 0.09, "learning_rate": 4.697226730179795e-05, "loss": 2.094, "step": 817500 }, { "epoch": 0.09, "learning_rate": 4.697041547751771e-05, "loss": 2.0754, "step": 818000 }, { "epoch": 0.09, "learning_rate": 4.696856365323746e-05, "loss": 2.0422, "step": 818500 }, { "epoch": 0.09, "learning_rate": 4.696671182895722e-05, "loss": 2.0126, "step": 819000 }, { "epoch": 0.09, "learning_rate": 4.696486000467697e-05, "loss": 2.1076, "step": 819500 }, { "epoch": 0.09, "learning_rate": 4.696300818039672e-05, "loss": 2.0516, "step": 820000 }, { "epoch": 0.09, "learning_rate": 4.6961156356116476e-05, "loss": 2.0432, "step": 820500 }, { "epoch": 0.09, "learning_rate": 4.695930453183623e-05, "loss": 2.0257, "step": 821000 }, { "epoch": 0.09, "learning_rate": 4.695745270755599e-05, "loss": 2.0407, "step": 821500 }, { "epoch": 0.09, "learning_rate": 4.695560088327574e-05, "loss": 2.074, "step": 822000 }, { "epoch": 0.09, "learning_rate": 4.69537490589955e-05, "loss": 2.1457, "step": 822500 }, { "epoch": 0.09, "learning_rate": 4.695189723471525e-05, "loss": 2.0851, "step": 823000 }, { "epoch": 0.09, "learning_rate": 4.6950045410435e-05, "loss": 2.1232, "step": 823500 }, { "epoch": 0.09, "learning_rate": 4.6948193586154755e-05, "loss": 2.0155, "step": 824000 }, { "epoch": 0.09, "learning_rate": 4.694634176187451e-05, "loss": 2.0658, "step": 824500 }, { "epoch": 0.09, "learning_rate": 4.694448993759427e-05, "loss": 2.0657, "step": 825000 }, { "epoch": 0.09, "learning_rate": 4.694263811331402e-05, "loss": 2.0418, "step": 825500 }, { "epoch": 0.09, "learning_rate": 4.6940786289033776e-05, "loss": 2.0449, "step": 826000 }, { "epoch": 0.09, "learning_rate": 4.6938934464753526e-05, "loss": 2.0938, "step": 826500 }, { "epoch": 0.09, "learning_rate": 4.6937082640473276e-05, "loss": 2.0486, "step": 827000 }, { "epoch": 0.09, "learning_rate": 4.693523081619304e-05, "loss": 2.0815, "step": 827500 }, { "epoch": 0.09, "learning_rate": 4.693337899191279e-05, "loss": 2.0487, "step": 828000 }, { "epoch": 0.09, "learning_rate": 4.693152716763255e-05, "loss": 2.0896, "step": 828500 }, { "epoch": 0.09, "learning_rate": 4.69296753433523e-05, "loss": 1.9657, "step": 829000 }, { "epoch": 0.09, "learning_rate": 4.6927823519072054e-05, "loss": 2.0729, "step": 829500 }, { "epoch": 0.09, "learning_rate": 4.6925971694791804e-05, "loss": 2.0866, "step": 830000 }, { "epoch": 0.09, "learning_rate": 4.692411987051156e-05, "loss": 2.055, "step": 830500 }, { "epoch": 0.09, "learning_rate": 4.692226804623132e-05, "loss": 1.9861, "step": 831000 }, { "epoch": 0.09, "learning_rate": 4.692041622195107e-05, "loss": 2.0604, "step": 831500 }, { "epoch": 0.09, "learning_rate": 4.6918564397670825e-05, "loss": 2.0937, "step": 832000 }, { "epoch": 0.09, "learning_rate": 4.6916712573390575e-05, "loss": 2.0143, "step": 832500 }, { "epoch": 0.09, "learning_rate": 4.6914860749110325e-05, "loss": 2.0175, "step": 833000 }, { "epoch": 0.09, "learning_rate": 4.691300892483008e-05, "loss": 2.0605, "step": 833500 }, { "epoch": 0.09, "learning_rate": 4.691115710054984e-05, "loss": 2.093, "step": 834000 }, { "epoch": 0.09, "learning_rate": 4.6909305276269596e-05, "loss": 2.103, "step": 834500 }, { "epoch": 0.09, "learning_rate": 4.6907453451989346e-05, "loss": 2.0349, "step": 835000 }, { "epoch": 0.09, "learning_rate": 4.69056016277091e-05, "loss": 2.0606, "step": 835500 }, { "epoch": 0.09, "learning_rate": 4.6903749803428854e-05, "loss": 2.062, "step": 836000 }, { "epoch": 0.09, "learning_rate": 4.6901897979148604e-05, "loss": 2.008, "step": 836500 }, { "epoch": 0.09, "learning_rate": 4.690004615486837e-05, "loss": 2.0876, "step": 837000 }, { "epoch": 0.09, "learning_rate": 4.689819433058812e-05, "loss": 2.051, "step": 837500 }, { "epoch": 0.09, "learning_rate": 4.6896342506307875e-05, "loss": 2.0967, "step": 838000 }, { "epoch": 0.09, "learning_rate": 4.6894490682027625e-05, "loss": 2.0893, "step": 838500 }, { "epoch": 0.09, "learning_rate": 4.689263885774738e-05, "loss": 2.0889, "step": 839000 }, { "epoch": 0.09, "learning_rate": 4.689078703346713e-05, "loss": 2.0079, "step": 839500 }, { "epoch": 0.09, "learning_rate": 4.688893520918688e-05, "loss": 2.0384, "step": 840000 }, { "epoch": 0.09, "learning_rate": 4.6887083384906646e-05, "loss": 2.1172, "step": 840500 }, { "epoch": 0.09, "learning_rate": 4.6885231560626396e-05, "loss": 2.0889, "step": 841000 }, { "epoch": 0.09, "learning_rate": 4.688337973634615e-05, "loss": 1.9858, "step": 841500 }, { "epoch": 0.09, "learning_rate": 4.68815279120659e-05, "loss": 1.9447, "step": 842000 }, { "epoch": 0.09, "learning_rate": 4.687967608778566e-05, "loss": 2.0305, "step": 842500 }, { "epoch": 0.09, "learning_rate": 4.687782426350541e-05, "loss": 2.0814, "step": 843000 }, { "epoch": 0.09, "learning_rate": 4.687597243922517e-05, "loss": 2.055, "step": 843500 }, { "epoch": 0.09, "learning_rate": 4.6874120614944924e-05, "loss": 2.0451, "step": 844000 }, { "epoch": 0.09, "learning_rate": 4.6872268790664674e-05, "loss": 2.0967, "step": 844500 }, { "epoch": 0.09, "learning_rate": 4.687041696638443e-05, "loss": 2.0854, "step": 845000 }, { "epoch": 0.09, "learning_rate": 4.686856514210418e-05, "loss": 2.0563, "step": 845500 }, { "epoch": 0.09, "learning_rate": 4.686671331782393e-05, "loss": 2.0294, "step": 846000 }, { "epoch": 0.09, "learning_rate": 4.6864861493543695e-05, "loss": 2.0123, "step": 846500 }, { "epoch": 0.09, "learning_rate": 4.6863009669263445e-05, "loss": 2.0806, "step": 847000 }, { "epoch": 0.09, "learning_rate": 4.68611578449832e-05, "loss": 2.0407, "step": 847500 }, { "epoch": 0.1, "learning_rate": 4.685930602070295e-05, "loss": 2.0924, "step": 848000 }, { "epoch": 0.1, "learning_rate": 4.685745419642271e-05, "loss": 2.1218, "step": 848500 }, { "epoch": 0.1, "learning_rate": 4.685560237214246e-05, "loss": 2.0243, "step": 849000 }, { "epoch": 0.1, "learning_rate": 4.685375054786221e-05, "loss": 2.1099, "step": 849500 }, { "epoch": 0.1, "learning_rate": 4.6851898723581973e-05, "loss": 2.0467, "step": 850000 }, { "epoch": 0.1, "learning_rate": 4.6850046899301724e-05, "loss": 2.0296, "step": 850500 }, { "epoch": 0.1, "learning_rate": 4.684819507502148e-05, "loss": 2.0885, "step": 851000 }, { "epoch": 0.1, "learning_rate": 4.684634325074123e-05, "loss": 2.0313, "step": 851500 }, { "epoch": 0.1, "learning_rate": 4.684449142646099e-05, "loss": 2.0465, "step": 852000 }, { "epoch": 0.1, "learning_rate": 4.684263960218074e-05, "loss": 2.0468, "step": 852500 }, { "epoch": 0.1, "learning_rate": 4.6840787777900495e-05, "loss": 2.0439, "step": 853000 }, { "epoch": 0.1, "learning_rate": 4.683893595362025e-05, "loss": 2.0157, "step": 853500 }, { "epoch": 0.1, "learning_rate": 4.683708412934e-05, "loss": 2.028, "step": 854000 }, { "epoch": 0.1, "learning_rate": 4.683523230505976e-05, "loss": 2.0658, "step": 854500 }, { "epoch": 0.1, "learning_rate": 4.683338048077951e-05, "loss": 2.083, "step": 855000 }, { "epoch": 0.1, "learning_rate": 4.6831528656499266e-05, "loss": 2.1119, "step": 855500 }, { "epoch": 0.1, "learning_rate": 4.682967683221902e-05, "loss": 2.1112, "step": 856000 }, { "epoch": 0.1, "learning_rate": 4.682782500793877e-05, "loss": 2.052, "step": 856500 }, { "epoch": 0.1, "learning_rate": 4.682597318365853e-05, "loss": 1.993, "step": 857000 }, { "epoch": 0.1, "learning_rate": 4.682412135937828e-05, "loss": 2.0158, "step": 857500 }, { "epoch": 0.1, "learning_rate": 4.682226953509804e-05, "loss": 1.9846, "step": 858000 }, { "epoch": 0.1, "learning_rate": 4.682041771081779e-05, "loss": 2.0556, "step": 858500 }, { "epoch": 0.1, "learning_rate": 4.6818565886537544e-05, "loss": 2.0791, "step": 859000 }, { "epoch": 0.1, "learning_rate": 4.68167140622573e-05, "loss": 2.0545, "step": 859500 }, { "epoch": 0.1, "learning_rate": 4.681486223797705e-05, "loss": 2.0347, "step": 860000 }, { "epoch": 0.1, "learning_rate": 4.681301041369681e-05, "loss": 2.0501, "step": 860500 }, { "epoch": 0.1, "learning_rate": 4.681115858941656e-05, "loss": 2.0396, "step": 861000 }, { "epoch": 0.1, "learning_rate": 4.6809306765136315e-05, "loss": 2.04, "step": 861500 }, { "epoch": 0.1, "learning_rate": 4.6807454940856066e-05, "loss": 2.0529, "step": 862000 }, { "epoch": 0.1, "learning_rate": 4.680560311657582e-05, "loss": 2.0652, "step": 862500 }, { "epoch": 0.1, "learning_rate": 4.680375129229558e-05, "loss": 2.1191, "step": 863000 }, { "epoch": 0.1, "learning_rate": 4.680189946801533e-05, "loss": 2.0805, "step": 863500 }, { "epoch": 0.1, "learning_rate": 4.680004764373509e-05, "loss": 2.0712, "step": 864000 }, { "epoch": 0.1, "learning_rate": 4.679819581945484e-05, "loss": 2.0654, "step": 864500 }, { "epoch": 0.1, "learning_rate": 4.6796343995174594e-05, "loss": 2.027, "step": 865000 }, { "epoch": 0.1, "learning_rate": 4.6794492170894344e-05, "loss": 2.0291, "step": 865500 }, { "epoch": 0.1, "learning_rate": 4.67926403466141e-05, "loss": 2.007, "step": 866000 }, { "epoch": 0.1, "learning_rate": 4.679078852233386e-05, "loss": 2.0595, "step": 866500 }, { "epoch": 0.1, "learning_rate": 4.678893669805361e-05, "loss": 2.0951, "step": 867000 }, { "epoch": 0.1, "learning_rate": 4.6787084873773365e-05, "loss": 2.0557, "step": 867500 }, { "epoch": 0.1, "learning_rate": 4.6785233049493115e-05, "loss": 2.0327, "step": 868000 }, { "epoch": 0.1, "learning_rate": 4.678338122521287e-05, "loss": 2.0578, "step": 868500 }, { "epoch": 0.1, "learning_rate": 4.678152940093263e-05, "loss": 2.0355, "step": 869000 }, { "epoch": 0.1, "learning_rate": 4.677967757665238e-05, "loss": 2.0443, "step": 869500 }, { "epoch": 0.1, "learning_rate": 4.6777825752372136e-05, "loss": 2.1054, "step": 870000 }, { "epoch": 0.1, "learning_rate": 4.6775973928091886e-05, "loss": 2.0842, "step": 870500 }, { "epoch": 0.1, "learning_rate": 4.677412210381164e-05, "loss": 2.0282, "step": 871000 }, { "epoch": 0.1, "learning_rate": 4.6772270279531393e-05, "loss": 2.1102, "step": 871500 }, { "epoch": 0.1, "learning_rate": 4.677041845525115e-05, "loss": 2.1004, "step": 872000 }, { "epoch": 0.1, "learning_rate": 4.676856663097091e-05, "loss": 2.043, "step": 872500 }, { "epoch": 0.1, "learning_rate": 4.676671480669066e-05, "loss": 2.0572, "step": 873000 }, { "epoch": 0.1, "learning_rate": 4.6764862982410414e-05, "loss": 2.0051, "step": 873500 }, { "epoch": 0.1, "learning_rate": 4.6763011158130165e-05, "loss": 2.0985, "step": 874000 }, { "epoch": 0.1, "learning_rate": 4.676115933384992e-05, "loss": 2.0952, "step": 874500 }, { "epoch": 0.1, "learning_rate": 4.675930750956967e-05, "loss": 2.0283, "step": 875000 }, { "epoch": 0.1, "learning_rate": 4.675745568528943e-05, "loss": 2.0579, "step": 875500 }, { "epoch": 0.1, "learning_rate": 4.6755603861009186e-05, "loss": 2.0514, "step": 876000 }, { "epoch": 0.1, "learning_rate": 4.6753752036728936e-05, "loss": 2.1338, "step": 876500 }, { "epoch": 0.1, "learning_rate": 4.675190021244869e-05, "loss": 2.1004, "step": 877000 }, { "epoch": 0.1, "learning_rate": 4.675004838816844e-05, "loss": 2.0008, "step": 877500 }, { "epoch": 0.1, "learning_rate": 4.67481965638882e-05, "loss": 2.0826, "step": 878000 }, { "epoch": 0.1, "learning_rate": 4.674634473960796e-05, "loss": 2.0655, "step": 878500 }, { "epoch": 0.1, "learning_rate": 4.674449291532771e-05, "loss": 2.0472, "step": 879000 }, { "epoch": 0.1, "learning_rate": 4.6742641091047464e-05, "loss": 1.9664, "step": 879500 }, { "epoch": 0.1, "learning_rate": 4.6740789266767214e-05, "loss": 2.0571, "step": 880000 }, { "epoch": 0.1, "learning_rate": 4.673893744248697e-05, "loss": 2.0605, "step": 880500 }, { "epoch": 0.1, "learning_rate": 4.673708561820672e-05, "loss": 2.0526, "step": 881000 }, { "epoch": 0.1, "learning_rate": 4.673523379392648e-05, "loss": 2.0348, "step": 881500 }, { "epoch": 0.1, "learning_rate": 4.6733381969646235e-05, "loss": 2.0136, "step": 882000 }, { "epoch": 0.1, "learning_rate": 4.6731530145365985e-05, "loss": 2.1102, "step": 882500 }, { "epoch": 0.1, "learning_rate": 4.672967832108574e-05, "loss": 2.0325, "step": 883000 }, { "epoch": 0.1, "learning_rate": 4.672782649680549e-05, "loss": 2.0209, "step": 883500 }, { "epoch": 0.1, "learning_rate": 4.672597467252525e-05, "loss": 2.078, "step": 884000 }, { "epoch": 0.1, "learning_rate": 4.6724122848245e-05, "loss": 2.0908, "step": 884500 }, { "epoch": 0.1, "learning_rate": 4.6722271023964756e-05, "loss": 1.9744, "step": 885000 }, { "epoch": 0.1, "learning_rate": 4.6720419199684513e-05, "loss": 2.0464, "step": 885500 }, { "epoch": 0.1, "learning_rate": 4.6718567375404264e-05, "loss": 2.0477, "step": 886000 }, { "epoch": 0.1, "learning_rate": 4.671671555112402e-05, "loss": 2.0451, "step": 886500 }, { "epoch": 0.1, "learning_rate": 4.671486372684377e-05, "loss": 1.9843, "step": 887000 }, { "epoch": 0.1, "learning_rate": 4.671301190256353e-05, "loss": 2.012, "step": 887500 }, { "epoch": 0.1, "learning_rate": 4.6711160078283285e-05, "loss": 2.0533, "step": 888000 }, { "epoch": 0.1, "learning_rate": 4.6709308254003035e-05, "loss": 2.0422, "step": 888500 }, { "epoch": 0.1, "learning_rate": 4.670745642972279e-05, "loss": 2.0168, "step": 889000 }, { "epoch": 0.1, "learning_rate": 4.670560460544254e-05, "loss": 2.0118, "step": 889500 }, { "epoch": 0.1, "learning_rate": 4.67037527811623e-05, "loss": 2.0773, "step": 890000 }, { "epoch": 0.1, "learning_rate": 4.670190095688205e-05, "loss": 2.133, "step": 890500 }, { "epoch": 0.1, "learning_rate": 4.6700049132601806e-05, "loss": 2.0453, "step": 891000 }, { "epoch": 0.1, "learning_rate": 4.669819730832156e-05, "loss": 2.0374, "step": 891500 }, { "epoch": 0.1, "learning_rate": 4.669634548404131e-05, "loss": 2.0671, "step": 892000 }, { "epoch": 0.1, "learning_rate": 4.669449365976107e-05, "loss": 2.0917, "step": 892500 }, { "epoch": 0.11, "learning_rate": 4.669264183548082e-05, "loss": 2.0369, "step": 893000 }, { "epoch": 0.11, "learning_rate": 4.669079001120058e-05, "loss": 2.0395, "step": 893500 }, { "epoch": 0.11, "learning_rate": 4.668893818692033e-05, "loss": 2.056, "step": 894000 }, { "epoch": 0.11, "learning_rate": 4.6687086362640084e-05, "loss": 2.047, "step": 894500 }, { "epoch": 0.11, "learning_rate": 4.668523453835984e-05, "loss": 2.0453, "step": 895000 }, { "epoch": 0.11, "learning_rate": 4.668338271407959e-05, "loss": 2.0838, "step": 895500 }, { "epoch": 0.11, "learning_rate": 4.668153088979935e-05, "loss": 2.0307, "step": 896000 }, { "epoch": 0.11, "learning_rate": 4.66796790655191e-05, "loss": 2.01, "step": 896500 }, { "epoch": 0.11, "learning_rate": 4.6677827241238855e-05, "loss": 2.0041, "step": 897000 }, { "epoch": 0.11, "learning_rate": 4.667597541695861e-05, "loss": 2.0933, "step": 897500 }, { "epoch": 0.11, "learning_rate": 4.667412359267836e-05, "loss": 2.1238, "step": 898000 }, { "epoch": 0.11, "learning_rate": 4.667227176839812e-05, "loss": 2.0307, "step": 898500 }, { "epoch": 0.11, "learning_rate": 4.667041994411787e-05, "loss": 2.0666, "step": 899000 }, { "epoch": 0.11, "learning_rate": 4.6668568119837627e-05, "loss": 2.1073, "step": 899500 }, { "epoch": 0.11, "learning_rate": 4.666671629555738e-05, "loss": 2.044, "step": 900000 }, { "epoch": 0.11, "learning_rate": 4.6664864471277134e-05, "loss": 2.1487, "step": 900500 }, { "epoch": 0.11, "learning_rate": 4.666301264699689e-05, "loss": 2.008, "step": 901000 }, { "epoch": 0.11, "learning_rate": 4.666116082271664e-05, "loss": 2.0978, "step": 901500 }, { "epoch": 0.11, "learning_rate": 4.66593089984364e-05, "loss": 2.0229, "step": 902000 }, { "epoch": 0.11, "learning_rate": 4.665745717415615e-05, "loss": 2.0439, "step": 902500 }, { "epoch": 0.11, "learning_rate": 4.6655605349875905e-05, "loss": 1.985, "step": 903000 }, { "epoch": 0.11, "learning_rate": 4.6653753525595655e-05, "loss": 2.0497, "step": 903500 }, { "epoch": 0.11, "learning_rate": 4.665190170131541e-05, "loss": 2.0531, "step": 904000 }, { "epoch": 0.11, "learning_rate": 4.665004987703517e-05, "loss": 2.059, "step": 904500 }, { "epoch": 0.11, "learning_rate": 4.664819805275492e-05, "loss": 2.056, "step": 905000 }, { "epoch": 0.11, "learning_rate": 4.6646346228474676e-05, "loss": 1.9899, "step": 905500 }, { "epoch": 0.11, "learning_rate": 4.6644494404194426e-05, "loss": 1.9882, "step": 906000 }, { "epoch": 0.11, "learning_rate": 4.664264257991418e-05, "loss": 2.0522, "step": 906500 }, { "epoch": 0.11, "learning_rate": 4.664079075563394e-05, "loss": 2.0068, "step": 907000 }, { "epoch": 0.11, "learning_rate": 4.663893893135369e-05, "loss": 2.0886, "step": 907500 }, { "epoch": 0.11, "learning_rate": 4.663708710707345e-05, "loss": 2.1016, "step": 908000 }, { "epoch": 0.11, "learning_rate": 4.66352352827932e-05, "loss": 2.0114, "step": 908500 }, { "epoch": 0.11, "learning_rate": 4.6633383458512954e-05, "loss": 1.9802, "step": 909000 }, { "epoch": 0.11, "learning_rate": 4.6631531634232705e-05, "loss": 2.0844, "step": 909500 }, { "epoch": 0.11, "learning_rate": 4.662967980995246e-05, "loss": 2.0211, "step": 910000 }, { "epoch": 0.11, "learning_rate": 4.662782798567222e-05, "loss": 2.0286, "step": 910500 }, { "epoch": 0.11, "learning_rate": 4.662597616139197e-05, "loss": 2.0299, "step": 911000 }, { "epoch": 0.11, "learning_rate": 4.6624124337111726e-05, "loss": 2.0454, "step": 911500 }, { "epoch": 0.11, "learning_rate": 4.6622272512831476e-05, "loss": 2.0932, "step": 912000 }, { "epoch": 0.11, "learning_rate": 4.662042068855123e-05, "loss": 2.0509, "step": 912500 }, { "epoch": 0.11, "learning_rate": 4.661856886427098e-05, "loss": 2.0061, "step": 913000 }, { "epoch": 0.11, "learning_rate": 4.661671703999074e-05, "loss": 1.9975, "step": 913500 }, { "epoch": 0.11, "learning_rate": 4.66148652157105e-05, "loss": 1.991, "step": 914000 }, { "epoch": 0.11, "learning_rate": 4.661301339143025e-05, "loss": 2.1047, "step": 914500 }, { "epoch": 0.11, "learning_rate": 4.6611161567150004e-05, "loss": 2.0214, "step": 915000 }, { "epoch": 0.11, "learning_rate": 4.6609309742869754e-05, "loss": 2.0069, "step": 915500 }, { "epoch": 0.11, "learning_rate": 4.660745791858951e-05, "loss": 2.0557, "step": 916000 }, { "epoch": 0.11, "learning_rate": 4.660560609430926e-05, "loss": 2.0242, "step": 916500 }, { "epoch": 0.11, "learning_rate": 4.660375427002902e-05, "loss": 2.0493, "step": 917000 }, { "epoch": 0.11, "learning_rate": 4.6601902445748775e-05, "loss": 1.9199, "step": 917500 }, { "epoch": 0.11, "learning_rate": 4.6600050621468525e-05, "loss": 2.0903, "step": 918000 }, { "epoch": 0.11, "learning_rate": 4.659819879718828e-05, "loss": 2.0927, "step": 918500 }, { "epoch": 0.11, "learning_rate": 4.659634697290803e-05, "loss": 2.022, "step": 919000 }, { "epoch": 0.11, "learning_rate": 4.659449514862779e-05, "loss": 2.0474, "step": 919500 }, { "epoch": 0.11, "learning_rate": 4.6592643324347546e-05, "loss": 2.0682, "step": 920000 }, { "epoch": 0.11, "learning_rate": 4.6590791500067296e-05, "loss": 1.9685, "step": 920500 }, { "epoch": 0.11, "learning_rate": 4.658893967578705e-05, "loss": 2.0483, "step": 921000 }, { "epoch": 0.11, "learning_rate": 4.6587087851506803e-05, "loss": 2.0165, "step": 921500 }, { "epoch": 0.11, "learning_rate": 4.658523602722656e-05, "loss": 2.0368, "step": 922000 }, { "epoch": 0.11, "learning_rate": 4.658338420294631e-05, "loss": 2.0493, "step": 922500 }, { "epoch": 0.11, "learning_rate": 4.658153237866607e-05, "loss": 1.9734, "step": 923000 }, { "epoch": 0.11, "learning_rate": 4.6579680554385825e-05, "loss": 2.058, "step": 923500 }, { "epoch": 0.11, "learning_rate": 4.6577828730105575e-05, "loss": 2.0363, "step": 924000 }, { "epoch": 0.11, "learning_rate": 4.657597690582533e-05, "loss": 2.0874, "step": 924500 }, { "epoch": 0.11, "learning_rate": 4.657412508154508e-05, "loss": 2.0214, "step": 925000 }, { "epoch": 0.11, "learning_rate": 4.657227325726484e-05, "loss": 2.0202, "step": 925500 }, { "epoch": 0.11, "learning_rate": 4.657042143298459e-05, "loss": 2.0941, "step": 926000 }, { "epoch": 0.11, "learning_rate": 4.6568569608704346e-05, "loss": 2.1147, "step": 926500 }, { "epoch": 0.11, "learning_rate": 4.65667177844241e-05, "loss": 2.013, "step": 927000 }, { "epoch": 0.11, "learning_rate": 4.656486596014385e-05, "loss": 2.0582, "step": 927500 }, { "epoch": 0.11, "learning_rate": 4.656301413586361e-05, "loss": 1.9939, "step": 928000 }, { "epoch": 0.11, "learning_rate": 4.656116231158336e-05, "loss": 2.0514, "step": 928500 }, { "epoch": 0.11, "learning_rate": 4.655931048730312e-05, "loss": 2.0541, "step": 929000 }, { "epoch": 0.11, "learning_rate": 4.6557458663022874e-05, "loss": 2.0357, "step": 929500 }, { "epoch": 0.11, "learning_rate": 4.6555606838742624e-05, "loss": 2.0714, "step": 930000 }, { "epoch": 0.11, "learning_rate": 4.655375501446238e-05, "loss": 2.0783, "step": 930500 }, { "epoch": 0.11, "learning_rate": 4.655190319018213e-05, "loss": 1.9661, "step": 931000 }, { "epoch": 0.11, "learning_rate": 4.655005136590189e-05, "loss": 2.0796, "step": 931500 }, { "epoch": 0.11, "learning_rate": 4.654819954162164e-05, "loss": 2.115, "step": 932000 }, { "epoch": 0.11, "learning_rate": 4.6546347717341395e-05, "loss": 2.0506, "step": 932500 }, { "epoch": 0.11, "learning_rate": 4.654449589306115e-05, "loss": 2.1206, "step": 933000 }, { "epoch": 0.11, "learning_rate": 4.65426440687809e-05, "loss": 2.0115, "step": 933500 }, { "epoch": 0.11, "learning_rate": 4.654079224450066e-05, "loss": 1.9922, "step": 934000 }, { "epoch": 0.11, "learning_rate": 4.653894042022041e-05, "loss": 1.9956, "step": 934500 }, { "epoch": 0.11, "learning_rate": 4.6537088595940167e-05, "loss": 2.0415, "step": 935000 }, { "epoch": 0.11, "learning_rate": 4.653523677165992e-05, "loss": 2.0342, "step": 935500 }, { "epoch": 0.11, "learning_rate": 4.6533384947379674e-05, "loss": 2.0324, "step": 936000 }, { "epoch": 0.11, "learning_rate": 4.653153312309943e-05, "loss": 2.0401, "step": 936500 }, { "epoch": 0.11, "learning_rate": 4.652968129881918e-05, "loss": 2.0224, "step": 937000 }, { "epoch": 0.11, "learning_rate": 4.652782947453894e-05, "loss": 2.0999, "step": 937500 }, { "epoch": 0.12, "learning_rate": 4.652597765025869e-05, "loss": 2.0523, "step": 938000 }, { "epoch": 0.12, "learning_rate": 4.6524125825978445e-05, "loss": 2.0315, "step": 938500 }, { "epoch": 0.12, "learning_rate": 4.65222740016982e-05, "loss": 1.9957, "step": 939000 }, { "epoch": 0.12, "learning_rate": 4.652042217741795e-05, "loss": 2.073, "step": 939500 }, { "epoch": 0.12, "learning_rate": 4.651857035313771e-05, "loss": 1.9975, "step": 940000 }, { "epoch": 0.12, "learning_rate": 4.651671852885746e-05, "loss": 1.9802, "step": 940500 }, { "epoch": 0.12, "learning_rate": 4.6514866704577216e-05, "loss": 2.0256, "step": 941000 }, { "epoch": 0.12, "learning_rate": 4.6513014880296966e-05, "loss": 2.0479, "step": 941500 }, { "epoch": 0.12, "learning_rate": 4.651116305601672e-05, "loss": 2.0764, "step": 942000 }, { "epoch": 0.12, "learning_rate": 4.650931123173648e-05, "loss": 1.9871, "step": 942500 }, { "epoch": 0.12, "learning_rate": 4.650745940745623e-05, "loss": 2.0974, "step": 943000 }, { "epoch": 0.12, "learning_rate": 4.650560758317599e-05, "loss": 1.9827, "step": 943500 }, { "epoch": 0.12, "learning_rate": 4.650375575889574e-05, "loss": 2.076, "step": 944000 }, { "epoch": 0.12, "learning_rate": 4.6501903934615494e-05, "loss": 2.0276, "step": 944500 }, { "epoch": 0.12, "learning_rate": 4.6500052110335244e-05, "loss": 2.0284, "step": 945000 }, { "epoch": 0.12, "learning_rate": 4.6498200286055e-05, "loss": 2.0503, "step": 945500 }, { "epoch": 0.12, "learning_rate": 4.649634846177476e-05, "loss": 2.117, "step": 946000 }, { "epoch": 0.12, "learning_rate": 4.649449663749451e-05, "loss": 2.0124, "step": 946500 }, { "epoch": 0.12, "learning_rate": 4.6492644813214265e-05, "loss": 1.9898, "step": 947000 }, { "epoch": 0.12, "learning_rate": 4.6490792988934016e-05, "loss": 2.0763, "step": 947500 }, { "epoch": 0.12, "learning_rate": 4.648894116465377e-05, "loss": 2.0705, "step": 948000 }, { "epoch": 0.12, "learning_rate": 4.648708934037353e-05, "loss": 2.0323, "step": 948500 }, { "epoch": 0.12, "learning_rate": 4.648523751609328e-05, "loss": 2.0466, "step": 949000 }, { "epoch": 0.12, "learning_rate": 4.648338569181304e-05, "loss": 2.001, "step": 949500 }, { "epoch": 0.12, "learning_rate": 4.648153386753279e-05, "loss": 2.0487, "step": 950000 }, { "epoch": 0.12, "learning_rate": 4.6479682043252544e-05, "loss": 2.086, "step": 950500 }, { "epoch": 0.12, "learning_rate": 4.6477830218972294e-05, "loss": 1.9899, "step": 951000 }, { "epoch": 0.12, "learning_rate": 4.647597839469205e-05, "loss": 2.0867, "step": 951500 }, { "epoch": 0.12, "learning_rate": 4.647412657041181e-05, "loss": 2.0617, "step": 952000 }, { "epoch": 0.12, "learning_rate": 4.647227474613156e-05, "loss": 1.9912, "step": 952500 }, { "epoch": 0.12, "learning_rate": 4.6470422921851315e-05, "loss": 2.0193, "step": 953000 }, { "epoch": 0.12, "learning_rate": 4.6468571097571065e-05, "loss": 2.0106, "step": 953500 }, { "epoch": 0.12, "learning_rate": 4.646671927329082e-05, "loss": 2.0216, "step": 954000 }, { "epoch": 0.12, "learning_rate": 4.646486744901057e-05, "loss": 2.0525, "step": 954500 }, { "epoch": 0.12, "learning_rate": 4.6463015624730336e-05, "loss": 2.0356, "step": 955000 }, { "epoch": 0.12, "learning_rate": 4.6461163800450086e-05, "loss": 2.1147, "step": 955500 }, { "epoch": 0.12, "learning_rate": 4.6459311976169836e-05, "loss": 2.0354, "step": 956000 }, { "epoch": 0.12, "learning_rate": 4.645746015188959e-05, "loss": 2.0831, "step": 956500 }, { "epoch": 0.12, "learning_rate": 4.6455608327609343e-05, "loss": 2.0503, "step": 957000 }, { "epoch": 0.12, "learning_rate": 4.64537565033291e-05, "loss": 1.9702, "step": 957500 }, { "epoch": 0.12, "learning_rate": 4.645190467904886e-05, "loss": 1.9823, "step": 958000 }, { "epoch": 0.12, "learning_rate": 4.6450052854768614e-05, "loss": 2.0705, "step": 958500 }, { "epoch": 0.12, "learning_rate": 4.6448201030488364e-05, "loss": 2.0601, "step": 959000 }, { "epoch": 0.12, "learning_rate": 4.6446349206208115e-05, "loss": 2.0164, "step": 959500 }, { "epoch": 0.12, "learning_rate": 4.644449738192787e-05, "loss": 2.1266, "step": 960000 }, { "epoch": 0.12, "learning_rate": 4.644264555764762e-05, "loss": 2.0423, "step": 960500 }, { "epoch": 0.12, "learning_rate": 4.644079373336738e-05, "loss": 2.0309, "step": 961000 }, { "epoch": 0.12, "learning_rate": 4.6438941909087136e-05, "loss": 2.0068, "step": 961500 }, { "epoch": 0.12, "learning_rate": 4.6437090084806886e-05, "loss": 2.0237, "step": 962000 }, { "epoch": 0.12, "learning_rate": 4.643523826052664e-05, "loss": 2.0418, "step": 962500 }, { "epoch": 0.12, "learning_rate": 4.643338643624639e-05, "loss": 2.0352, "step": 963000 }, { "epoch": 0.12, "learning_rate": 4.643153461196615e-05, "loss": 2.0765, "step": 963500 }, { "epoch": 0.12, "learning_rate": 4.64296827876859e-05, "loss": 2.092, "step": 964000 }, { "epoch": 0.12, "learning_rate": 4.6427830963405664e-05, "loss": 2.0081, "step": 964500 }, { "epoch": 0.12, "learning_rate": 4.6425979139125414e-05, "loss": 2.01, "step": 965000 }, { "epoch": 0.12, "learning_rate": 4.6424127314845164e-05, "loss": 2.0473, "step": 965500 }, { "epoch": 0.12, "learning_rate": 4.642227549056492e-05, "loss": 2.0386, "step": 966000 }, { "epoch": 0.12, "learning_rate": 4.642042366628467e-05, "loss": 2.0354, "step": 966500 }, { "epoch": 0.12, "learning_rate": 4.641857184200443e-05, "loss": 2.0138, "step": 967000 }, { "epoch": 0.12, "learning_rate": 4.641672001772418e-05, "loss": 2.0468, "step": 967500 }, { "epoch": 0.12, "learning_rate": 4.641486819344394e-05, "loss": 1.9797, "step": 968000 }, { "epoch": 0.12, "learning_rate": 4.641301636916369e-05, "loss": 2.1049, "step": 968500 }, { "epoch": 0.12, "learning_rate": 4.641116454488344e-05, "loss": 2.0446, "step": 969000 }, { "epoch": 0.12, "learning_rate": 4.64093127206032e-05, "loss": 2.0752, "step": 969500 }, { "epoch": 0.12, "learning_rate": 4.640746089632295e-05, "loss": 2.0608, "step": 970000 }, { "epoch": 0.12, "learning_rate": 4.6405609072042706e-05, "loss": 2.0546, "step": 970500 }, { "epoch": 0.12, "learning_rate": 4.6403757247762463e-05, "loss": 2.0337, "step": 971000 }, { "epoch": 0.12, "learning_rate": 4.640190542348222e-05, "loss": 2.0205, "step": 971500 }, { "epoch": 0.12, "learning_rate": 4.640005359920197e-05, "loss": 2.1151, "step": 972000 }, { "epoch": 0.12, "learning_rate": 4.639820177492172e-05, "loss": 2.0644, "step": 972500 }, { "epoch": 0.12, "learning_rate": 4.639634995064148e-05, "loss": 2.0661, "step": 973000 }, { "epoch": 0.12, "learning_rate": 4.639449812636123e-05, "loss": 1.9976, "step": 973500 }, { "epoch": 0.12, "learning_rate": 4.639264630208099e-05, "loss": 2.0132, "step": 974000 }, { "epoch": 0.12, "learning_rate": 4.639079447780074e-05, "loss": 2.0034, "step": 974500 }, { "epoch": 0.12, "learning_rate": 4.638894265352049e-05, "loss": 1.9839, "step": 975000 }, { "epoch": 0.12, "learning_rate": 4.638709082924025e-05, "loss": 2.1059, "step": 975500 }, { "epoch": 0.12, "learning_rate": 4.638523900496e-05, "loss": 1.9894, "step": 976000 }, { "epoch": 0.12, "learning_rate": 4.6383387180679756e-05, "loss": 2.047, "step": 976500 }, { "epoch": 0.12, "learning_rate": 4.6381535356399506e-05, "loss": 1.9589, "step": 977000 }, { "epoch": 0.12, "learning_rate": 4.637968353211927e-05, "loss": 2.0083, "step": 977500 }, { "epoch": 0.12, "learning_rate": 4.637783170783902e-05, "loss": 2.0254, "step": 978000 }, { "epoch": 0.12, "learning_rate": 4.637597988355877e-05, "loss": 2.0772, "step": 978500 }, { "epoch": 0.12, "learning_rate": 4.637412805927853e-05, "loss": 2.0772, "step": 979000 }, { "epoch": 0.12, "learning_rate": 4.637227623499828e-05, "loss": 2.0938, "step": 979500 }, { "epoch": 0.12, "learning_rate": 4.6370424410718034e-05, "loss": 2.0982, "step": 980000 }, { "epoch": 0.12, "learning_rate": 4.636857258643779e-05, "loss": 2.0493, "step": 980500 }, { "epoch": 0.12, "learning_rate": 4.636672076215755e-05, "loss": 2.0379, "step": 981000 }, { "epoch": 0.12, "learning_rate": 4.63648689378773e-05, "loss": 2.0638, "step": 981500 }, { "epoch": 0.12, "learning_rate": 4.636301711359705e-05, "loss": 2.1105, "step": 982000 }, { "epoch": 0.12, "learning_rate": 4.6361165289316805e-05, "loss": 2.0282, "step": 982500 }, { "epoch": 0.13, "learning_rate": 4.6359313465036556e-05, "loss": 1.988, "step": 983000 }, { "epoch": 0.13, "learning_rate": 4.635746164075631e-05, "loss": 2.0554, "step": 983500 }, { "epoch": 0.13, "learning_rate": 4.635560981647607e-05, "loss": 2.1075, "step": 984000 }, { "epoch": 0.13, "learning_rate": 4.6353757992195826e-05, "loss": 2.0504, "step": 984500 }, { "epoch": 0.13, "learning_rate": 4.6351906167915577e-05, "loss": 2.0456, "step": 985000 }, { "epoch": 0.13, "learning_rate": 4.635005434363533e-05, "loss": 2.0108, "step": 985500 }, { "epoch": 0.13, "learning_rate": 4.6348202519355084e-05, "loss": 2.0372, "step": 986000 }, { "epoch": 0.13, "learning_rate": 4.6346350695074834e-05, "loss": 2.0026, "step": 986500 }, { "epoch": 0.13, "learning_rate": 4.63444988707946e-05, "loss": 1.9993, "step": 987000 }, { "epoch": 0.13, "learning_rate": 4.634264704651435e-05, "loss": 2.0701, "step": 987500 }, { "epoch": 0.13, "learning_rate": 4.63407952222341e-05, "loss": 2.0102, "step": 988000 }, { "epoch": 0.13, "learning_rate": 4.6338943397953855e-05, "loss": 1.982, "step": 988500 }, { "epoch": 0.13, "learning_rate": 4.6337091573673605e-05, "loss": 2.0209, "step": 989000 }, { "epoch": 0.13, "learning_rate": 4.633523974939336e-05, "loss": 2.0488, "step": 989500 }, { "epoch": 0.13, "learning_rate": 4.633338792511312e-05, "loss": 2.0136, "step": 990000 }, { "epoch": 0.13, "learning_rate": 4.6331536100832876e-05, "loss": 2.1251, "step": 990500 }, { "epoch": 0.13, "learning_rate": 4.6329684276552626e-05, "loss": 2.0328, "step": 991000 }, { "epoch": 0.13, "learning_rate": 4.6327832452272376e-05, "loss": 1.9825, "step": 991500 }, { "epoch": 0.13, "learning_rate": 4.632598062799213e-05, "loss": 2.06, "step": 992000 }, { "epoch": 0.13, "learning_rate": 4.632412880371188e-05, "loss": 2.0039, "step": 992500 }, { "epoch": 0.13, "learning_rate": 4.632227697943164e-05, "loss": 1.9938, "step": 993000 }, { "epoch": 0.13, "learning_rate": 4.63204251551514e-05, "loss": 2.0274, "step": 993500 }, { "epoch": 0.13, "learning_rate": 4.6318573330871154e-05, "loss": 2.0711, "step": 994000 }, { "epoch": 0.13, "learning_rate": 4.6316721506590904e-05, "loss": 1.9769, "step": 994500 }, { "epoch": 0.13, "learning_rate": 4.6314869682310655e-05, "loss": 1.9745, "step": 995000 }, { "epoch": 0.13, "learning_rate": 4.631301785803041e-05, "loss": 1.992, "step": 995500 }, { "epoch": 0.13, "learning_rate": 4.631116603375016e-05, "loss": 2.0302, "step": 996000 }, { "epoch": 0.13, "learning_rate": 4.6309314209469925e-05, "loss": 2.0624, "step": 996500 }, { "epoch": 0.13, "learning_rate": 4.6307462385189676e-05, "loss": 1.9892, "step": 997000 }, { "epoch": 0.13, "learning_rate": 4.630561056090943e-05, "loss": 2.0339, "step": 997500 }, { "epoch": 0.13, "learning_rate": 4.630375873662918e-05, "loss": 2.0717, "step": 998000 }, { "epoch": 0.13, "learning_rate": 4.630190691234893e-05, "loss": 2.0768, "step": 998500 }, { "epoch": 0.13, "learning_rate": 4.630005508806869e-05, "loss": 2.0493, "step": 999000 }, { "epoch": 0.13, "learning_rate": 4.629820326378845e-05, "loss": 2.0204, "step": 999500 }, { "epoch": 0.13, "learning_rate": 4.6296351439508204e-05, "loss": 2.0824, "step": 1000000 }, { "epoch": 0.13, "learning_rate": 4.6294499615227954e-05, "loss": 2.1043, "step": 1000500 }, { "epoch": 0.13, "learning_rate": 4.629264779094771e-05, "loss": 2.0029, "step": 1001000 }, { "epoch": 0.13, "learning_rate": 4.629079596666746e-05, "loss": 2.0412, "step": 1001500 }, { "epoch": 0.13, "learning_rate": 4.628894414238721e-05, "loss": 2.0522, "step": 1002000 }, { "epoch": 0.13, "learning_rate": 4.628709231810697e-05, "loss": 2.0814, "step": 1002500 }, { "epoch": 0.13, "learning_rate": 4.6285240493826725e-05, "loss": 2.1019, "step": 1003000 }, { "epoch": 0.13, "learning_rate": 4.628338866954648e-05, "loss": 2.0045, "step": 1003500 }, { "epoch": 0.13, "learning_rate": 4.628153684526623e-05, "loss": 1.9963, "step": 1004000 }, { "epoch": 0.13, "learning_rate": 4.627968502098598e-05, "loss": 2.0839, "step": 1004500 }, { "epoch": 0.13, "learning_rate": 4.627783319670574e-05, "loss": 2.0057, "step": 1005000 }, { "epoch": 0.13, "learning_rate": 4.627598137242549e-05, "loss": 1.9783, "step": 1005500 }, { "epoch": 0.13, "learning_rate": 4.627412954814525e-05, "loss": 2.0617, "step": 1006000 }, { "epoch": 0.13, "learning_rate": 4.6272277723865e-05, "loss": 2.0306, "step": 1006500 }, { "epoch": 0.13, "learning_rate": 4.627042589958476e-05, "loss": 2.0356, "step": 1007000 }, { "epoch": 0.13, "learning_rate": 4.626857407530451e-05, "loss": 1.9083, "step": 1007500 }, { "epoch": 0.13, "learning_rate": 4.626672225102426e-05, "loss": 2.0926, "step": 1008000 }, { "epoch": 0.13, "learning_rate": 4.626487042674402e-05, "loss": 2.1131, "step": 1008500 }, { "epoch": 0.13, "learning_rate": 4.626301860246377e-05, "loss": 2.0052, "step": 1009000 }, { "epoch": 0.13, "learning_rate": 4.626116677818353e-05, "loss": 2.1092, "step": 1009500 }, { "epoch": 0.13, "learning_rate": 4.625931495390328e-05, "loss": 1.9954, "step": 1010000 }, { "epoch": 0.13, "learning_rate": 4.625746312962304e-05, "loss": 1.9873, "step": 1010500 }, { "epoch": 0.13, "learning_rate": 4.625561130534279e-05, "loss": 2.0889, "step": 1011000 }, { "epoch": 0.13, "learning_rate": 4.625375948106254e-05, "loss": 2.0882, "step": 1011500 }, { "epoch": 0.13, "learning_rate": 4.6251907656782296e-05, "loss": 2.044, "step": 1012000 }, { "epoch": 0.13, "learning_rate": 4.625005583250205e-05, "loss": 1.9891, "step": 1012500 }, { "epoch": 0.13, "learning_rate": 4.624820400822181e-05, "loss": 2.0041, "step": 1013000 }, { "epoch": 0.13, "learning_rate": 4.624635218394156e-05, "loss": 2.0249, "step": 1013500 }, { "epoch": 0.13, "learning_rate": 4.624450035966132e-05, "loss": 2.0084, "step": 1014000 }, { "epoch": 0.13, "learning_rate": 4.624264853538107e-05, "loss": 2.0522, "step": 1014500 }, { "epoch": 0.13, "learning_rate": 4.624079671110082e-05, "loss": 2.0631, "step": 1015000 }, { "epoch": 0.13, "learning_rate": 4.623894488682058e-05, "loss": 2.1273, "step": 1015500 }, { "epoch": 0.13, "learning_rate": 4.623709306254033e-05, "loss": 2.0269, "step": 1016000 }, { "epoch": 0.13, "learning_rate": 4.623524123826009e-05, "loss": 1.9982, "step": 1016500 }, { "epoch": 0.13, "learning_rate": 4.623338941397984e-05, "loss": 2.0475, "step": 1017000 }, { "epoch": 0.13, "learning_rate": 4.623153758969959e-05, "loss": 2.0393, "step": 1017500 }, { "epoch": 0.13, "learning_rate": 4.6229685765419345e-05, "loss": 2.0585, "step": 1018000 }, { "epoch": 0.13, "learning_rate": 4.6227833941139095e-05, "loss": 2.05, "step": 1018500 }, { "epoch": 0.13, "learning_rate": 4.622598211685886e-05, "loss": 1.9668, "step": 1019000 }, { "epoch": 0.13, "learning_rate": 4.622413029257861e-05, "loss": 2.0707, "step": 1019500 }, { "epoch": 0.13, "learning_rate": 4.6222278468298366e-05, "loss": 2.0517, "step": 1020000 }, { "epoch": 0.13, "learning_rate": 4.6220426644018117e-05, "loss": 2.1031, "step": 1020500 }, { "epoch": 0.13, "learning_rate": 4.621857481973787e-05, "loss": 2.0798, "step": 1021000 }, { "epoch": 0.13, "learning_rate": 4.6216722995457624e-05, "loss": 2.0894, "step": 1021500 }, { "epoch": 0.13, "learning_rate": 4.621487117117738e-05, "loss": 2.0638, "step": 1022000 }, { "epoch": 0.13, "learning_rate": 4.621301934689714e-05, "loss": 2.0821, "step": 1022500 }, { "epoch": 0.13, "learning_rate": 4.621116752261689e-05, "loss": 2.0366, "step": 1023000 }, { "epoch": 0.13, "learning_rate": 4.6209315698336645e-05, "loss": 2.042, "step": 1023500 }, { "epoch": 0.13, "learning_rate": 4.6207463874056395e-05, "loss": 1.9892, "step": 1024000 }, { "epoch": 0.13, "learning_rate": 4.6205612049776145e-05, "loss": 2.0839, "step": 1024500 }, { "epoch": 0.13, "learning_rate": 4.620376022549591e-05, "loss": 2.0717, "step": 1025000 }, { "epoch": 0.13, "learning_rate": 4.620190840121566e-05, "loss": 2.0489, "step": 1025500 }, { "epoch": 0.13, "learning_rate": 4.6200056576935416e-05, "loss": 2.1028, "step": 1026000 }, { "epoch": 0.13, "learning_rate": 4.6198204752655166e-05, "loss": 2.0154, "step": 1026500 }, { "epoch": 0.13, "learning_rate": 4.619635292837492e-05, "loss": 2.1096, "step": 1027000 }, { "epoch": 0.13, "learning_rate": 4.619450110409467e-05, "loss": 2.0815, "step": 1027500 }, { "epoch": 0.14, "learning_rate": 4.619264927981442e-05, "loss": 2.0281, "step": 1028000 }, { "epoch": 0.14, "learning_rate": 4.619079745553419e-05, "loss": 2.1375, "step": 1028500 }, { "epoch": 0.14, "learning_rate": 4.618894563125394e-05, "loss": 2.0621, "step": 1029000 }, { "epoch": 0.14, "learning_rate": 4.6187093806973694e-05, "loss": 1.9994, "step": 1029500 }, { "epoch": 0.14, "learning_rate": 4.6185241982693444e-05, "loss": 2.0178, "step": 1030000 }, { "epoch": 0.14, "learning_rate": 4.6183390158413194e-05, "loss": 2.0268, "step": 1030500 }, { "epoch": 0.14, "learning_rate": 4.618153833413295e-05, "loss": 2.0808, "step": 1031000 }, { "epoch": 0.14, "learning_rate": 4.617968650985271e-05, "loss": 2.0078, "step": 1031500 }, { "epoch": 0.14, "learning_rate": 4.6177834685572465e-05, "loss": 2.0562, "step": 1032000 }, { "epoch": 0.14, "learning_rate": 4.6175982861292215e-05, "loss": 2.0047, "step": 1032500 }, { "epoch": 0.14, "learning_rate": 4.617413103701197e-05, "loss": 2.0494, "step": 1033000 }, { "epoch": 0.14, "learning_rate": 4.617227921273172e-05, "loss": 2.0175, "step": 1033500 }, { "epoch": 0.14, "learning_rate": 4.617042738845147e-05, "loss": 2.0532, "step": 1034000 }, { "epoch": 0.14, "learning_rate": 4.616857556417123e-05, "loss": 2.085, "step": 1034500 }, { "epoch": 0.14, "learning_rate": 4.616672373989099e-05, "loss": 2.0665, "step": 1035000 }, { "epoch": 0.14, "learning_rate": 4.6164871915610744e-05, "loss": 1.9793, "step": 1035500 }, { "epoch": 0.14, "learning_rate": 4.6163020091330494e-05, "loss": 2.0211, "step": 1036000 }, { "epoch": 0.14, "learning_rate": 4.616116826705025e-05, "loss": 2.0065, "step": 1036500 }, { "epoch": 0.14, "learning_rate": 4.615931644277e-05, "loss": 2.0266, "step": 1037000 }, { "epoch": 0.14, "learning_rate": 4.615746461848975e-05, "loss": 2.049, "step": 1037500 }, { "epoch": 0.14, "learning_rate": 4.6155612794209515e-05, "loss": 2.0853, "step": 1038000 }, { "epoch": 0.14, "learning_rate": 4.6153760969929265e-05, "loss": 2.0657, "step": 1038500 }, { "epoch": 0.14, "learning_rate": 4.615190914564902e-05, "loss": 2.0305, "step": 1039000 }, { "epoch": 0.14, "learning_rate": 4.615005732136877e-05, "loss": 2.0104, "step": 1039500 }, { "epoch": 0.14, "learning_rate": 4.614820549708853e-05, "loss": 2.0452, "step": 1040000 }, { "epoch": 0.14, "learning_rate": 4.614635367280828e-05, "loss": 2.0034, "step": 1040500 }, { "epoch": 0.14, "learning_rate": 4.6144501848528036e-05, "loss": 2.0114, "step": 1041000 }, { "epoch": 0.14, "learning_rate": 4.614265002424779e-05, "loss": 2.0384, "step": 1041500 }, { "epoch": 0.14, "learning_rate": 4.614079819996754e-05, "loss": 2.0609, "step": 1042000 }, { "epoch": 0.14, "learning_rate": 4.61389463756873e-05, "loss": 2.0699, "step": 1042500 }, { "epoch": 0.14, "learning_rate": 4.613709455140705e-05, "loss": 2.0296, "step": 1043000 }, { "epoch": 0.14, "learning_rate": 4.613524272712681e-05, "loss": 2.0341, "step": 1043500 }, { "epoch": 0.14, "learning_rate": 4.613339090284656e-05, "loss": 2.0044, "step": 1044000 }, { "epoch": 0.14, "learning_rate": 4.6131539078566314e-05, "loss": 2.0148, "step": 1044500 }, { "epoch": 0.14, "learning_rate": 4.612968725428607e-05, "loss": 1.9761, "step": 1045000 }, { "epoch": 0.14, "learning_rate": 4.612783543000582e-05, "loss": 1.9635, "step": 1045500 }, { "epoch": 0.14, "learning_rate": 4.612598360572558e-05, "loss": 2.0579, "step": 1046000 }, { "epoch": 0.14, "learning_rate": 4.612413178144533e-05, "loss": 2.0461, "step": 1046500 }, { "epoch": 0.14, "learning_rate": 4.612227995716508e-05, "loss": 2.0131, "step": 1047000 }, { "epoch": 0.14, "learning_rate": 4.612042813288484e-05, "loss": 2.0641, "step": 1047500 }, { "epoch": 0.14, "learning_rate": 4.611857630860459e-05, "loss": 2.0182, "step": 1048000 }, { "epoch": 0.14, "learning_rate": 4.611672448432435e-05, "loss": 2.0754, "step": 1048500 }, { "epoch": 0.14, "learning_rate": 4.61148726600441e-05, "loss": 2.0843, "step": 1049000 }, { "epoch": 0.14, "learning_rate": 4.611302083576386e-05, "loss": 2.0939, "step": 1049500 }, { "epoch": 0.14, "learning_rate": 4.611116901148361e-05, "loss": 2.0775, "step": 1050000 }, { "epoch": 0.14, "learning_rate": 4.6109317187203364e-05, "loss": 2.0064, "step": 1050500 }, { "epoch": 0.14, "learning_rate": 4.610746536292312e-05, "loss": 1.9742, "step": 1051000 }, { "epoch": 0.14, "learning_rate": 4.610561353864287e-05, "loss": 2.0307, "step": 1051500 }, { "epoch": 0.14, "learning_rate": 4.610376171436263e-05, "loss": 2.0393, "step": 1052000 }, { "epoch": 0.14, "learning_rate": 4.610190989008238e-05, "loss": 2.0434, "step": 1052500 }, { "epoch": 0.14, "learning_rate": 4.6100058065802135e-05, "loss": 1.9794, "step": 1053000 }, { "epoch": 0.14, "learning_rate": 4.6098206241521885e-05, "loss": 2.0144, "step": 1053500 }, { "epoch": 0.14, "learning_rate": 4.609635441724164e-05, "loss": 2.1097, "step": 1054000 }, { "epoch": 0.14, "learning_rate": 4.60945025929614e-05, "loss": 2.0711, "step": 1054500 }, { "epoch": 0.14, "learning_rate": 4.609265076868115e-05, "loss": 2.0481, "step": 1055000 }, { "epoch": 0.14, "learning_rate": 4.6090798944400906e-05, "loss": 1.982, "step": 1055500 }, { "epoch": 0.14, "learning_rate": 4.6088947120120656e-05, "loss": 1.9887, "step": 1056000 }, { "epoch": 0.14, "learning_rate": 4.608709529584041e-05, "loss": 2.0213, "step": 1056500 }, { "epoch": 0.14, "learning_rate": 4.608524347156017e-05, "loss": 2.0595, "step": 1057000 }, { "epoch": 0.14, "learning_rate": 4.608339164727992e-05, "loss": 2.0202, "step": 1057500 }, { "epoch": 0.14, "learning_rate": 4.608153982299968e-05, "loss": 2.0724, "step": 1058000 }, { "epoch": 0.14, "learning_rate": 4.607968799871943e-05, "loss": 2.0174, "step": 1058500 }, { "epoch": 0.14, "learning_rate": 4.6077836174439185e-05, "loss": 2.0645, "step": 1059000 }, { "epoch": 0.14, "learning_rate": 4.6075984350158935e-05, "loss": 2.0285, "step": 1059500 }, { "epoch": 0.14, "learning_rate": 4.6074132525878685e-05, "loss": 2.0194, "step": 1060000 }, { "epoch": 0.14, "learning_rate": 4.607228070159845e-05, "loss": 2.0411, "step": 1060500 }, { "epoch": 0.14, "learning_rate": 4.60704288773182e-05, "loss": 2.0141, "step": 1061000 }, { "epoch": 0.14, "learning_rate": 4.6068577053037956e-05, "loss": 2.0023, "step": 1061500 }, { "epoch": 0.14, "learning_rate": 4.6066725228757706e-05, "loss": 2.0146, "step": 1062000 }, { "epoch": 0.14, "learning_rate": 4.606487340447746e-05, "loss": 2.0538, "step": 1062500 }, { "epoch": 0.14, "learning_rate": 4.606302158019721e-05, "loss": 2.0315, "step": 1063000 }, { "epoch": 0.14, "learning_rate": 4.606116975591697e-05, "loss": 2.0389, "step": 1063500 }, { "epoch": 0.14, "learning_rate": 4.605931793163673e-05, "loss": 2.0816, "step": 1064000 }, { "epoch": 0.14, "learning_rate": 4.605746610735648e-05, "loss": 2.0201, "step": 1064500 }, { "epoch": 0.14, "learning_rate": 4.6055614283076234e-05, "loss": 2.0379, "step": 1065000 }, { "epoch": 0.14, "learning_rate": 4.6053762458795984e-05, "loss": 1.9517, "step": 1065500 }, { "epoch": 0.14, "learning_rate": 4.605191063451574e-05, "loss": 2.0394, "step": 1066000 }, { "epoch": 0.14, "learning_rate": 4.60500588102355e-05, "loss": 2.0043, "step": 1066500 }, { "epoch": 0.14, "learning_rate": 4.604820698595525e-05, "loss": 2.0323, "step": 1067000 }, { "epoch": 0.14, "learning_rate": 4.6046355161675005e-05, "loss": 2.0322, "step": 1067500 }, { "epoch": 0.14, "learning_rate": 4.6044503337394755e-05, "loss": 1.9481, "step": 1068000 }, { "epoch": 0.14, "learning_rate": 4.604265151311451e-05, "loss": 2.0834, "step": 1068500 }, { "epoch": 0.14, "learning_rate": 4.604079968883426e-05, "loss": 2.0398, "step": 1069000 }, { "epoch": 0.14, "learning_rate": 4.603894786455402e-05, "loss": 2.0324, "step": 1069500 }, { "epoch": 0.14, "learning_rate": 4.6037096040273776e-05, "loss": 2.0576, "step": 1070000 }, { "epoch": 0.14, "learning_rate": 4.6035244215993527e-05, "loss": 1.9908, "step": 1070500 }, { "epoch": 0.14, "learning_rate": 4.6033392391713284e-05, "loss": 2.004, "step": 1071000 }, { "epoch": 0.14, "learning_rate": 4.6031540567433034e-05, "loss": 2.0846, "step": 1071500 }, { "epoch": 0.14, "learning_rate": 4.602968874315279e-05, "loss": 1.9894, "step": 1072000 }, { "epoch": 0.14, "learning_rate": 4.602783691887254e-05, "loss": 2.0524, "step": 1072500 }, { "epoch": 0.15, "learning_rate": 4.60259850945923e-05, "loss": 2.0706, "step": 1073000 }, { "epoch": 0.15, "learning_rate": 4.6024133270312055e-05, "loss": 1.9906, "step": 1073500 }, { "epoch": 0.15, "learning_rate": 4.6022281446031805e-05, "loss": 1.9503, "step": 1074000 }, { "epoch": 0.15, "learning_rate": 4.602042962175156e-05, "loss": 2.0304, "step": 1074500 }, { "epoch": 0.15, "learning_rate": 4.601857779747131e-05, "loss": 2.0718, "step": 1075000 }, { "epoch": 0.15, "learning_rate": 4.601672597319107e-05, "loss": 2.036, "step": 1075500 }, { "epoch": 0.15, "learning_rate": 4.6014874148910826e-05, "loss": 2.0283, "step": 1076000 }, { "epoch": 0.15, "learning_rate": 4.6013022324630576e-05, "loss": 2.0526, "step": 1076500 }, { "epoch": 0.15, "learning_rate": 4.601117050035033e-05, "loss": 2.0656, "step": 1077000 }, { "epoch": 0.15, "learning_rate": 4.600931867607008e-05, "loss": 1.9943, "step": 1077500 }, { "epoch": 0.15, "learning_rate": 4.600746685178984e-05, "loss": 2.0667, "step": 1078000 }, { "epoch": 0.15, "learning_rate": 4.600561502750959e-05, "loss": 2.0688, "step": 1078500 }, { "epoch": 0.15, "learning_rate": 4.600376320322935e-05, "loss": 2.0692, "step": 1079000 }, { "epoch": 0.15, "learning_rate": 4.6001911378949104e-05, "loss": 2.0125, "step": 1079500 }, { "epoch": 0.15, "learning_rate": 4.6000059554668854e-05, "loss": 2.0157, "step": 1080000 }, { "epoch": 0.15, "learning_rate": 4.599820773038861e-05, "loss": 2.0318, "step": 1080500 }, { "epoch": 0.15, "learning_rate": 4.599635590610836e-05, "loss": 2.0082, "step": 1081000 }, { "epoch": 0.15, "learning_rate": 4.599450408182812e-05, "loss": 2.0442, "step": 1081500 }, { "epoch": 0.15, "learning_rate": 4.599265225754787e-05, "loss": 1.9515, "step": 1082000 }, { "epoch": 0.15, "learning_rate": 4.5990800433267626e-05, "loss": 2.03, "step": 1082500 }, { "epoch": 0.15, "learning_rate": 4.598894860898738e-05, "loss": 2.007, "step": 1083000 }, { "epoch": 0.15, "learning_rate": 4.598709678470713e-05, "loss": 2.0143, "step": 1083500 }, { "epoch": 0.15, "learning_rate": 4.598524496042689e-05, "loss": 2.045, "step": 1084000 }, { "epoch": 0.15, "learning_rate": 4.598339313614664e-05, "loss": 2.0811, "step": 1084500 }, { "epoch": 0.15, "learning_rate": 4.59815413118664e-05, "loss": 2.0096, "step": 1085000 }, { "epoch": 0.15, "learning_rate": 4.597968948758615e-05, "loss": 1.973, "step": 1085500 }, { "epoch": 0.15, "learning_rate": 4.5977837663305904e-05, "loss": 1.9465, "step": 1086000 }, { "epoch": 0.15, "learning_rate": 4.597598583902566e-05, "loss": 1.9726, "step": 1086500 }, { "epoch": 0.15, "learning_rate": 4.597413401474541e-05, "loss": 2.0709, "step": 1087000 }, { "epoch": 0.15, "learning_rate": 4.597228219046517e-05, "loss": 2.0036, "step": 1087500 }, { "epoch": 0.15, "learning_rate": 4.597043036618492e-05, "loss": 1.9962, "step": 1088000 }, { "epoch": 0.15, "learning_rate": 4.5968578541904675e-05, "loss": 1.9791, "step": 1088500 }, { "epoch": 0.15, "learning_rate": 4.596672671762443e-05, "loss": 2.0552, "step": 1089000 }, { "epoch": 0.15, "learning_rate": 4.596487489334418e-05, "loss": 1.9897, "step": 1089500 }, { "epoch": 0.15, "learning_rate": 4.596302306906394e-05, "loss": 2.0258, "step": 1090000 }, { "epoch": 0.15, "learning_rate": 4.596117124478369e-05, "loss": 2.01, "step": 1090500 }, { "epoch": 0.15, "learning_rate": 4.5959319420503446e-05, "loss": 2.0384, "step": 1091000 }, { "epoch": 0.15, "learning_rate": 4.5957467596223196e-05, "loss": 2.0046, "step": 1091500 }, { "epoch": 0.15, "learning_rate": 4.595561577194295e-05, "loss": 2.0059, "step": 1092000 }, { "epoch": 0.15, "learning_rate": 4.595376394766271e-05, "loss": 2.0942, "step": 1092500 }, { "epoch": 0.15, "learning_rate": 4.595191212338246e-05, "loss": 2.0283, "step": 1093000 }, { "epoch": 0.15, "learning_rate": 4.595006029910222e-05, "loss": 2.0263, "step": 1093500 }, { "epoch": 0.15, "learning_rate": 4.594820847482197e-05, "loss": 2.0925, "step": 1094000 }, { "epoch": 0.15, "learning_rate": 4.5946356650541724e-05, "loss": 1.9677, "step": 1094500 }, { "epoch": 0.15, "learning_rate": 4.5944504826261475e-05, "loss": 2.0579, "step": 1095000 }, { "epoch": 0.15, "learning_rate": 4.594265300198123e-05, "loss": 2.0022, "step": 1095500 }, { "epoch": 0.15, "learning_rate": 4.594080117770099e-05, "loss": 2.0642, "step": 1096000 }, { "epoch": 0.15, "learning_rate": 4.593894935342074e-05, "loss": 2.0249, "step": 1096500 }, { "epoch": 0.15, "learning_rate": 4.5937097529140496e-05, "loss": 2.0108, "step": 1097000 }, { "epoch": 0.15, "learning_rate": 4.5935245704860246e-05, "loss": 2.0411, "step": 1097500 }, { "epoch": 0.15, "learning_rate": 4.593339388058e-05, "loss": 2.0254, "step": 1098000 }, { "epoch": 0.15, "learning_rate": 4.593154205629976e-05, "loss": 2.0359, "step": 1098500 }, { "epoch": 0.15, "learning_rate": 4.592969023201951e-05, "loss": 1.9545, "step": 1099000 }, { "epoch": 0.15, "learning_rate": 4.592783840773927e-05, "loss": 2.0384, "step": 1099500 }, { "epoch": 0.15, "learning_rate": 4.592598658345902e-05, "loss": 2.0318, "step": 1100000 }, { "epoch": 0.15, "learning_rate": 4.5924134759178774e-05, "loss": 2.0491, "step": 1100500 }, { "epoch": 0.15, "learning_rate": 4.5922282934898524e-05, "loss": 2.0369, "step": 1101000 }, { "epoch": 0.15, "learning_rate": 4.592043111061828e-05, "loss": 1.9809, "step": 1101500 }, { "epoch": 0.15, "learning_rate": 4.591857928633804e-05, "loss": 2.0405, "step": 1102000 }, { "epoch": 0.15, "learning_rate": 4.591672746205779e-05, "loss": 2.0773, "step": 1102500 }, { "epoch": 0.15, "learning_rate": 4.5914875637777545e-05, "loss": 1.9896, "step": 1103000 }, { "epoch": 0.15, "learning_rate": 4.5913023813497295e-05, "loss": 2.0292, "step": 1103500 }, { "epoch": 0.15, "learning_rate": 4.591117198921705e-05, "loss": 2.0313, "step": 1104000 }, { "epoch": 0.15, "learning_rate": 4.59093201649368e-05, "loss": 2.051, "step": 1104500 }, { "epoch": 0.15, "learning_rate": 4.590746834065656e-05, "loss": 2.0128, "step": 1105000 }, { "epoch": 0.15, "learning_rate": 4.5905616516376316e-05, "loss": 2.006, "step": 1105500 }, { "epoch": 0.15, "learning_rate": 4.5903764692096066e-05, "loss": 2.0581, "step": 1106000 }, { "epoch": 0.15, "learning_rate": 4.5901912867815823e-05, "loss": 1.9873, "step": 1106500 }, { "epoch": 0.15, "learning_rate": 4.5900061043535574e-05, "loss": 2.0416, "step": 1107000 }, { "epoch": 0.15, "learning_rate": 4.589820921925533e-05, "loss": 2.0782, "step": 1107500 }, { "epoch": 0.15, "learning_rate": 4.589635739497509e-05, "loss": 2.0184, "step": 1108000 }, { "epoch": 0.15, "learning_rate": 4.589450557069484e-05, "loss": 2.0644, "step": 1108500 }, { "epoch": 0.15, "learning_rate": 4.5892653746414595e-05, "loss": 1.9821, "step": 1109000 }, { "epoch": 0.15, "learning_rate": 4.5890801922134345e-05, "loss": 2.0233, "step": 1109500 }, { "epoch": 0.15, "learning_rate": 4.58889500978541e-05, "loss": 2.0325, "step": 1110000 }, { "epoch": 0.15, "learning_rate": 4.588709827357385e-05, "loss": 2.0265, "step": 1110500 }, { "epoch": 0.15, "learning_rate": 4.588524644929361e-05, "loss": 2.0334, "step": 1111000 }, { "epoch": 0.15, "learning_rate": 4.5883394625013366e-05, "loss": 2.0002, "step": 1111500 }, { "epoch": 0.15, "learning_rate": 4.5881542800733116e-05, "loss": 1.9944, "step": 1112000 }, { "epoch": 0.15, "learning_rate": 4.587969097645287e-05, "loss": 2.071, "step": 1112500 }, { "epoch": 0.15, "learning_rate": 4.587783915217262e-05, "loss": 1.9979, "step": 1113000 }, { "epoch": 0.15, "learning_rate": 4.587598732789238e-05, "loss": 2.0582, "step": 1113500 }, { "epoch": 0.15, "learning_rate": 4.587413550361213e-05, "loss": 1.9804, "step": 1114000 }, { "epoch": 0.15, "learning_rate": 4.587228367933189e-05, "loss": 2.0089, "step": 1114500 }, { "epoch": 0.15, "learning_rate": 4.5870431855051644e-05, "loss": 2.0825, "step": 1115000 }, { "epoch": 0.15, "learning_rate": 4.5868580030771394e-05, "loss": 2.0297, "step": 1115500 }, { "epoch": 0.15, "learning_rate": 4.586672820649115e-05, "loss": 2.0135, "step": 1116000 }, { "epoch": 0.15, "learning_rate": 4.58648763822109e-05, "loss": 2.0259, "step": 1116500 }, { "epoch": 0.15, "learning_rate": 4.586302455793066e-05, "loss": 2.095, "step": 1117000 }, { "epoch": 0.15, "learning_rate": 4.5861172733650415e-05, "loss": 1.9724, "step": 1117500 }, { "epoch": 0.16, "learning_rate": 4.5859320909370165e-05, "loss": 1.9995, "step": 1118000 }, { "epoch": 0.16, "learning_rate": 4.585746908508992e-05, "loss": 2.0823, "step": 1118500 }, { "epoch": 0.16, "learning_rate": 4.585561726080967e-05, "loss": 1.9989, "step": 1119000 }, { "epoch": 0.16, "learning_rate": 4.585376543652943e-05, "loss": 1.9797, "step": 1119500 }, { "epoch": 0.16, "learning_rate": 4.585191361224918e-05, "loss": 2.0385, "step": 1120000 }, { "epoch": 0.16, "learning_rate": 4.585006178796894e-05, "loss": 2.088, "step": 1120500 }, { "epoch": 0.16, "learning_rate": 4.5848209963688694e-05, "loss": 2.0721, "step": 1121000 }, { "epoch": 0.16, "learning_rate": 4.5846358139408444e-05, "loss": 1.9574, "step": 1121500 }, { "epoch": 0.16, "learning_rate": 4.58445063151282e-05, "loss": 2.036, "step": 1122000 }, { "epoch": 0.16, "learning_rate": 4.584265449084795e-05, "loss": 1.9909, "step": 1122500 }, { "epoch": 0.16, "learning_rate": 4.584080266656771e-05, "loss": 1.9879, "step": 1123000 }, { "epoch": 0.16, "learning_rate": 4.583895084228746e-05, "loss": 2.089, "step": 1123500 }, { "epoch": 0.16, "learning_rate": 4.5837099018007215e-05, "loss": 1.9891, "step": 1124000 }, { "epoch": 0.16, "learning_rate": 4.583524719372697e-05, "loss": 2.0456, "step": 1124500 }, { "epoch": 0.16, "learning_rate": 4.583339536944672e-05, "loss": 1.9962, "step": 1125000 }, { "epoch": 0.16, "learning_rate": 4.583154354516648e-05, "loss": 2.003, "step": 1125500 }, { "epoch": 0.16, "learning_rate": 4.582969172088623e-05, "loss": 2.0341, "step": 1126000 }, { "epoch": 0.16, "learning_rate": 4.5827839896605986e-05, "loss": 2.0192, "step": 1126500 }, { "epoch": 0.16, "learning_rate": 4.582598807232574e-05, "loss": 1.9658, "step": 1127000 }, { "epoch": 0.16, "learning_rate": 4.582413624804549e-05, "loss": 1.9976, "step": 1127500 }, { "epoch": 0.16, "learning_rate": 4.582228442376525e-05, "loss": 2.0131, "step": 1128000 }, { "epoch": 0.16, "learning_rate": 4.5820432599485e-05, "loss": 2.0462, "step": 1128500 }, { "epoch": 0.16, "learning_rate": 4.581858077520476e-05, "loss": 2.0675, "step": 1129000 }, { "epoch": 0.16, "learning_rate": 4.581672895092451e-05, "loss": 2.0386, "step": 1129500 }, { "epoch": 0.16, "learning_rate": 4.5814877126644264e-05, "loss": 2.0208, "step": 1130000 }, { "epoch": 0.16, "learning_rate": 4.581302530236402e-05, "loss": 1.9804, "step": 1130500 }, { "epoch": 0.16, "learning_rate": 4.581117347808377e-05, "loss": 1.9951, "step": 1131000 }, { "epoch": 0.16, "learning_rate": 4.580932165380353e-05, "loss": 1.9944, "step": 1131500 }, { "epoch": 0.16, "learning_rate": 4.580746982952328e-05, "loss": 1.9934, "step": 1132000 }, { "epoch": 0.16, "learning_rate": 4.5805618005243036e-05, "loss": 2.0437, "step": 1132500 }, { "epoch": 0.16, "learning_rate": 4.5803766180962786e-05, "loss": 2.0452, "step": 1133000 }, { "epoch": 0.16, "learning_rate": 4.580191435668254e-05, "loss": 2.0159, "step": 1133500 }, { "epoch": 0.16, "learning_rate": 4.58000625324023e-05, "loss": 1.9983, "step": 1134000 }, { "epoch": 0.16, "learning_rate": 4.579821070812205e-05, "loss": 2.0513, "step": 1134500 }, { "epoch": 0.16, "learning_rate": 4.579635888384181e-05, "loss": 2.0089, "step": 1135000 }, { "epoch": 0.16, "learning_rate": 4.579450705956156e-05, "loss": 2.0429, "step": 1135500 }, { "epoch": 0.16, "learning_rate": 4.5792655235281314e-05, "loss": 2.0428, "step": 1136000 }, { "epoch": 0.16, "learning_rate": 4.5790803411001064e-05, "loss": 2.0082, "step": 1136500 }, { "epoch": 0.16, "learning_rate": 4.578895158672082e-05, "loss": 2.0299, "step": 1137000 }, { "epoch": 0.16, "learning_rate": 4.578709976244058e-05, "loss": 1.9961, "step": 1137500 }, { "epoch": 0.16, "learning_rate": 4.578524793816033e-05, "loss": 2.04, "step": 1138000 }, { "epoch": 0.16, "learning_rate": 4.5783396113880085e-05, "loss": 2.0125, "step": 1138500 }, { "epoch": 0.16, "learning_rate": 4.5781544289599835e-05, "loss": 2.0359, "step": 1139000 }, { "epoch": 0.16, "learning_rate": 4.577969246531959e-05, "loss": 2.0513, "step": 1139500 }, { "epoch": 0.16, "learning_rate": 4.577784064103935e-05, "loss": 1.9749, "step": 1140000 }, { "epoch": 0.16, "learning_rate": 4.57759888167591e-05, "loss": 1.9861, "step": 1140500 }, { "epoch": 0.16, "learning_rate": 4.5774136992478856e-05, "loss": 2.0236, "step": 1141000 }, { "epoch": 0.16, "learning_rate": 4.5772285168198606e-05, "loss": 2.0338, "step": 1141500 }, { "epoch": 0.16, "learning_rate": 4.577043334391836e-05, "loss": 2.0213, "step": 1142000 }, { "epoch": 0.16, "learning_rate": 4.5768581519638114e-05, "loss": 2.0148, "step": 1142500 }, { "epoch": 0.16, "learning_rate": 4.576672969535788e-05, "loss": 1.9878, "step": 1143000 }, { "epoch": 0.16, "learning_rate": 4.576487787107763e-05, "loss": 1.9863, "step": 1143500 }, { "epoch": 0.16, "learning_rate": 4.576302604679738e-05, "loss": 2.0854, "step": 1144000 }, { "epoch": 0.16, "learning_rate": 4.5761174222517135e-05, "loss": 2.0522, "step": 1144500 }, { "epoch": 0.16, "learning_rate": 4.5759322398236885e-05, "loss": 1.9886, "step": 1145000 }, { "epoch": 0.16, "learning_rate": 4.575747057395664e-05, "loss": 2.0324, "step": 1145500 }, { "epoch": 0.16, "learning_rate": 4.575561874967639e-05, "loss": 2.0437, "step": 1146000 }, { "epoch": 0.16, "learning_rate": 4.575376692539615e-05, "loss": 1.9871, "step": 1146500 }, { "epoch": 0.16, "learning_rate": 4.5751915101115906e-05, "loss": 2.0339, "step": 1147000 }, { "epoch": 0.16, "learning_rate": 4.5750063276835656e-05, "loss": 1.9999, "step": 1147500 }, { "epoch": 0.16, "learning_rate": 4.574821145255541e-05, "loss": 2.0565, "step": 1148000 }, { "epoch": 0.16, "learning_rate": 4.574635962827516e-05, "loss": 1.9668, "step": 1148500 }, { "epoch": 0.16, "learning_rate": 4.574450780399492e-05, "loss": 1.9387, "step": 1149000 }, { "epoch": 0.16, "learning_rate": 4.574265597971468e-05, "loss": 2.0382, "step": 1149500 }, { "epoch": 0.16, "learning_rate": 4.574080415543443e-05, "loss": 2.018, "step": 1150000 }, { "epoch": 0.16, "learning_rate": 4.5738952331154184e-05, "loss": 2.0086, "step": 1150500 }, { "epoch": 0.16, "learning_rate": 4.5737100506873934e-05, "loss": 2.0138, "step": 1151000 }, { "epoch": 0.16, "learning_rate": 4.573524868259369e-05, "loss": 2.0495, "step": 1151500 }, { "epoch": 0.16, "learning_rate": 4.573339685831344e-05, "loss": 2.0011, "step": 1152000 }, { "epoch": 0.16, "learning_rate": 4.5731545034033205e-05, "loss": 2.0153, "step": 1152500 }, { "epoch": 0.16, "learning_rate": 4.5729693209752955e-05, "loss": 1.9924, "step": 1153000 }, { "epoch": 0.16, "learning_rate": 4.5727841385472705e-05, "loss": 2.0114, "step": 1153500 }, { "epoch": 0.16, "learning_rate": 4.572598956119246e-05, "loss": 1.9851, "step": 1154000 }, { "epoch": 0.16, "learning_rate": 4.572413773691221e-05, "loss": 2.0088, "step": 1154500 }, { "epoch": 0.16, "learning_rate": 4.572228591263197e-05, "loss": 2.0583, "step": 1155000 }, { "epoch": 0.16, "learning_rate": 4.572043408835172e-05, "loss": 2.0373, "step": 1155500 }, { "epoch": 0.16, "learning_rate": 4.571858226407148e-05, "loss": 2.0252, "step": 1156000 }, { "epoch": 0.16, "learning_rate": 4.5716730439791234e-05, "loss": 2.0274, "step": 1156500 }, { "epoch": 0.16, "learning_rate": 4.5714878615510984e-05, "loss": 2.0626, "step": 1157000 }, { "epoch": 0.16, "learning_rate": 4.571302679123074e-05, "loss": 2.0646, "step": 1157500 }, { "epoch": 0.16, "learning_rate": 4.571117496695049e-05, "loss": 2.0683, "step": 1158000 }, { "epoch": 0.16, "learning_rate": 4.570932314267025e-05, "loss": 2.0225, "step": 1158500 }, { "epoch": 0.16, "learning_rate": 4.5707471318390005e-05, "loss": 1.9791, "step": 1159000 }, { "epoch": 0.16, "learning_rate": 4.5705619494109755e-05, "loss": 2.0097, "step": 1159500 }, { "epoch": 0.16, "learning_rate": 4.570376766982951e-05, "loss": 2.01, "step": 1160000 }, { "epoch": 0.16, "learning_rate": 4.570191584554926e-05, "loss": 1.9769, "step": 1160500 }, { "epoch": 0.16, "learning_rate": 4.570006402126902e-05, "loss": 2.0588, "step": 1161000 }, { "epoch": 0.16, "learning_rate": 4.569821219698877e-05, "loss": 2.0773, "step": 1161500 }, { "epoch": 0.16, "learning_rate": 4.5696360372708526e-05, "loss": 2.0109, "step": 1162000 }, { "epoch": 0.16, "learning_rate": 4.569450854842828e-05, "loss": 2.0382, "step": 1162500 }, { "epoch": 0.17, "learning_rate": 4.569265672414803e-05, "loss": 2.022, "step": 1163000 }, { "epoch": 0.17, "learning_rate": 4.569080489986779e-05, "loss": 1.9878, "step": 1163500 }, { "epoch": 0.17, "learning_rate": 4.568895307558754e-05, "loss": 1.9953, "step": 1164000 }, { "epoch": 0.17, "learning_rate": 4.56871012513073e-05, "loss": 2.0324, "step": 1164500 }, { "epoch": 0.17, "learning_rate": 4.568524942702705e-05, "loss": 1.9929, "step": 1165000 }, { "epoch": 0.17, "learning_rate": 4.568339760274681e-05, "loss": 2.0366, "step": 1165500 }, { "epoch": 0.17, "learning_rate": 4.568154577846656e-05, "loss": 1.9813, "step": 1166000 }, { "epoch": 0.17, "learning_rate": 4.567969395418631e-05, "loss": 1.9828, "step": 1166500 }, { "epoch": 0.17, "learning_rate": 4.567784212990607e-05, "loss": 2.0847, "step": 1167000 }, { "epoch": 0.17, "learning_rate": 4.567599030562582e-05, "loss": 1.9774, "step": 1167500 }, { "epoch": 0.17, "learning_rate": 4.5674138481345576e-05, "loss": 2.0894, "step": 1168000 }, { "epoch": 0.17, "learning_rate": 4.567228665706533e-05, "loss": 1.9716, "step": 1168500 }, { "epoch": 0.17, "learning_rate": 4.567043483278509e-05, "loss": 2.0528, "step": 1169000 }, { "epoch": 0.17, "learning_rate": 4.566858300850484e-05, "loss": 1.9761, "step": 1169500 }, { "epoch": 0.17, "learning_rate": 4.566673118422459e-05, "loss": 2.0017, "step": 1170000 }, { "epoch": 0.17, "learning_rate": 4.566487935994435e-05, "loss": 2.0189, "step": 1170500 }, { "epoch": 0.17, "learning_rate": 4.56630275356641e-05, "loss": 1.9752, "step": 1171000 }, { "epoch": 0.17, "learning_rate": 4.5661175711383854e-05, "loss": 1.9397, "step": 1171500 }, { "epoch": 0.17, "learning_rate": 4.565932388710361e-05, "loss": 2.0487, "step": 1172000 }, { "epoch": 0.17, "learning_rate": 4.565747206282336e-05, "loss": 2.0155, "step": 1172500 }, { "epoch": 0.17, "learning_rate": 4.565562023854312e-05, "loss": 1.9776, "step": 1173000 }, { "epoch": 0.17, "learning_rate": 4.565376841426287e-05, "loss": 2.0014, "step": 1173500 }, { "epoch": 0.17, "learning_rate": 4.5651916589982625e-05, "loss": 1.9584, "step": 1174000 }, { "epoch": 0.17, "learning_rate": 4.5650064765702375e-05, "loss": 1.9828, "step": 1174500 }, { "epoch": 0.17, "learning_rate": 4.564821294142214e-05, "loss": 2.0071, "step": 1175000 }, { "epoch": 0.17, "learning_rate": 4.564636111714189e-05, "loss": 1.9823, "step": 1175500 }, { "epoch": 0.17, "learning_rate": 4.564450929286164e-05, "loss": 1.9643, "step": 1176000 }, { "epoch": 0.17, "learning_rate": 4.5642657468581396e-05, "loss": 2.0379, "step": 1176500 }, { "epoch": 0.17, "learning_rate": 4.5640805644301146e-05, "loss": 2.0049, "step": 1177000 }, { "epoch": 0.17, "learning_rate": 4.56389538200209e-05, "loss": 2.0408, "step": 1177500 }, { "epoch": 0.17, "learning_rate": 4.563710199574066e-05, "loss": 2.0529, "step": 1178000 }, { "epoch": 0.17, "learning_rate": 4.563525017146042e-05, "loss": 2.0234, "step": 1178500 }, { "epoch": 0.17, "learning_rate": 4.563339834718017e-05, "loss": 2.046, "step": 1179000 }, { "epoch": 0.17, "learning_rate": 4.563154652289992e-05, "loss": 2.0445, "step": 1179500 }, { "epoch": 0.17, "learning_rate": 4.5629694698619674e-05, "loss": 2.0227, "step": 1180000 }, { "epoch": 0.17, "learning_rate": 4.5627842874339425e-05, "loss": 2.0697, "step": 1180500 }, { "epoch": 0.17, "learning_rate": 4.562599105005918e-05, "loss": 2.0044, "step": 1181000 }, { "epoch": 0.17, "learning_rate": 4.562413922577894e-05, "loss": 1.9942, "step": 1181500 }, { "epoch": 0.17, "learning_rate": 4.5622287401498695e-05, "loss": 1.9858, "step": 1182000 }, { "epoch": 0.17, "learning_rate": 4.5620435577218446e-05, "loss": 2.0483, "step": 1182500 }, { "epoch": 0.17, "learning_rate": 4.5618583752938196e-05, "loss": 2.1029, "step": 1183000 }, { "epoch": 0.17, "learning_rate": 4.561673192865795e-05, "loss": 2.034, "step": 1183500 }, { "epoch": 0.17, "learning_rate": 4.56148801043777e-05, "loss": 2.0263, "step": 1184000 }, { "epoch": 0.17, "learning_rate": 4.561302828009747e-05, "loss": 2.0165, "step": 1184500 }, { "epoch": 0.17, "learning_rate": 4.561117645581722e-05, "loss": 2.1276, "step": 1185000 }, { "epoch": 0.17, "learning_rate": 4.5609324631536974e-05, "loss": 2.0112, "step": 1185500 }, { "epoch": 0.17, "learning_rate": 4.5607472807256724e-05, "loss": 2.0514, "step": 1186000 }, { "epoch": 0.17, "learning_rate": 4.5605620982976474e-05, "loss": 2.0065, "step": 1186500 }, { "epoch": 0.17, "learning_rate": 4.560376915869623e-05, "loss": 2.0716, "step": 1187000 }, { "epoch": 0.17, "learning_rate": 4.560191733441598e-05, "loss": 2.0466, "step": 1187500 }, { "epoch": 0.17, "learning_rate": 4.5600065510135745e-05, "loss": 2.0075, "step": 1188000 }, { "epoch": 0.17, "learning_rate": 4.5598213685855495e-05, "loss": 1.9578, "step": 1188500 }, { "epoch": 0.17, "learning_rate": 4.5596361861575245e-05, "loss": 2.063, "step": 1189000 }, { "epoch": 0.17, "learning_rate": 4.5594510037295e-05, "loss": 2.0231, "step": 1189500 }, { "epoch": 0.17, "learning_rate": 4.559265821301475e-05, "loss": 2.0299, "step": 1190000 }, { "epoch": 0.17, "learning_rate": 4.559080638873451e-05, "loss": 2.1112, "step": 1190500 }, { "epoch": 0.17, "learning_rate": 4.5588954564454266e-05, "loss": 2.0467, "step": 1191000 }, { "epoch": 0.17, "learning_rate": 4.558710274017402e-05, "loss": 1.9726, "step": 1191500 }, { "epoch": 0.17, "learning_rate": 4.5585250915893773e-05, "loss": 2.0669, "step": 1192000 }, { "epoch": 0.17, "learning_rate": 4.5583399091613524e-05, "loss": 1.9855, "step": 1192500 }, { "epoch": 0.17, "learning_rate": 4.558154726733328e-05, "loss": 2.0615, "step": 1193000 }, { "epoch": 0.17, "learning_rate": 4.557969544305303e-05, "loss": 2.026, "step": 1193500 }, { "epoch": 0.17, "learning_rate": 4.5577843618772794e-05, "loss": 1.9829, "step": 1194000 }, { "epoch": 0.17, "learning_rate": 4.5575991794492545e-05, "loss": 1.9632, "step": 1194500 }, { "epoch": 0.17, "learning_rate": 4.55741399702123e-05, "loss": 2.0398, "step": 1195000 }, { "epoch": 0.17, "learning_rate": 4.557228814593205e-05, "loss": 2.0325, "step": 1195500 }, { "epoch": 0.17, "learning_rate": 4.55704363216518e-05, "loss": 2.0186, "step": 1196000 }, { "epoch": 0.17, "learning_rate": 4.556858449737156e-05, "loss": 1.9944, "step": 1196500 }, { "epoch": 0.17, "learning_rate": 4.556673267309131e-05, "loss": 2.0366, "step": 1197000 }, { "epoch": 0.17, "learning_rate": 4.556488084881107e-05, "loss": 1.9559, "step": 1197500 }, { "epoch": 0.17, "learning_rate": 4.556302902453082e-05, "loss": 2.0433, "step": 1198000 }, { "epoch": 0.17, "learning_rate": 4.556117720025058e-05, "loss": 2.0049, "step": 1198500 }, { "epoch": 0.17, "learning_rate": 4.555932537597033e-05, "loss": 1.9842, "step": 1199000 }, { "epoch": 0.17, "learning_rate": 4.555747355169008e-05, "loss": 1.9653, "step": 1199500 }, { "epoch": 0.17, "learning_rate": 4.555562172740984e-05, "loss": 2.0843, "step": 1200000 }, { "epoch": 0.17, "learning_rate": 4.5553769903129594e-05, "loss": 2.0587, "step": 1200500 }, { "epoch": 0.17, "learning_rate": 4.555191807884935e-05, "loss": 2.0026, "step": 1201000 }, { "epoch": 0.17, "learning_rate": 4.55500662545691e-05, "loss": 2.0253, "step": 1201500 }, { "epoch": 0.17, "learning_rate": 4.554821443028885e-05, "loss": 1.9794, "step": 1202000 }, { "epoch": 0.17, "learning_rate": 4.554636260600861e-05, "loss": 2.0672, "step": 1202500 }, { "epoch": 0.17, "learning_rate": 4.554451078172836e-05, "loss": 1.9861, "step": 1203000 }, { "epoch": 0.17, "learning_rate": 4.554265895744812e-05, "loss": 2.0267, "step": 1203500 }, { "epoch": 0.17, "learning_rate": 4.554080713316787e-05, "loss": 2.0156, "step": 1204000 }, { "epoch": 0.17, "learning_rate": 4.553895530888763e-05, "loss": 1.9506, "step": 1204500 }, { "epoch": 0.17, "learning_rate": 4.553710348460738e-05, "loss": 2.0643, "step": 1205000 }, { "epoch": 0.17, "learning_rate": 4.553525166032713e-05, "loss": 1.9997, "step": 1205500 }, { "epoch": 0.17, "learning_rate": 4.553339983604689e-05, "loss": 2.0217, "step": 1206000 }, { "epoch": 0.17, "learning_rate": 4.553154801176664e-05, "loss": 2.061, "step": 1206500 }, { "epoch": 0.17, "learning_rate": 4.55296961874864e-05, "loss": 1.9712, "step": 1207000 }, { "epoch": 0.17, "learning_rate": 4.552784436320615e-05, "loss": 1.9848, "step": 1207500 }, { "epoch": 0.18, "learning_rate": 4.552599253892591e-05, "loss": 1.9508, "step": 1208000 }, { "epoch": 0.18, "learning_rate": 4.552414071464566e-05, "loss": 2.0733, "step": 1208500 }, { "epoch": 0.18, "learning_rate": 4.552228889036541e-05, "loss": 2.0766, "step": 1209000 }, { "epoch": 0.18, "learning_rate": 4.5520437066085165e-05, "loss": 2.0268, "step": 1209500 }, { "epoch": 0.18, "learning_rate": 4.551858524180492e-05, "loss": 2.0696, "step": 1210000 }, { "epoch": 0.18, "learning_rate": 4.551673341752468e-05, "loss": 2.0019, "step": 1210500 }, { "epoch": 0.18, "learning_rate": 4.551488159324443e-05, "loss": 2.0083, "step": 1211000 }, { "epoch": 0.18, "learning_rate": 4.5513029768964186e-05, "loss": 2.0026, "step": 1211500 }, { "epoch": 0.18, "learning_rate": 4.5511177944683936e-05, "loss": 2.0136, "step": 1212000 }, { "epoch": 0.18, "learning_rate": 4.5509326120403686e-05, "loss": 2.0361, "step": 1212500 }, { "epoch": 0.18, "learning_rate": 4.550747429612344e-05, "loss": 2.0245, "step": 1213000 }, { "epoch": 0.18, "learning_rate": 4.55056224718432e-05, "loss": 2.0176, "step": 1213500 }, { "epoch": 0.18, "learning_rate": 4.550377064756296e-05, "loss": 2.0951, "step": 1214000 }, { "epoch": 0.18, "learning_rate": 4.550191882328271e-05, "loss": 2.0029, "step": 1214500 }, { "epoch": 0.18, "learning_rate": 4.550006699900246e-05, "loss": 2.0388, "step": 1215000 }, { "epoch": 0.18, "learning_rate": 4.5498215174722214e-05, "loss": 1.9956, "step": 1215500 }, { "epoch": 0.18, "learning_rate": 4.5496363350441965e-05, "loss": 2.0941, "step": 1216000 }, { "epoch": 0.18, "learning_rate": 4.549451152616173e-05, "loss": 2.0493, "step": 1216500 }, { "epoch": 0.18, "learning_rate": 4.549265970188148e-05, "loss": 1.9668, "step": 1217000 }, { "epoch": 0.18, "learning_rate": 4.5490807877601235e-05, "loss": 2.0112, "step": 1217500 }, { "epoch": 0.18, "learning_rate": 4.5488956053320986e-05, "loss": 2.01, "step": 1218000 }, { "epoch": 0.18, "learning_rate": 4.5487104229040736e-05, "loss": 2.0454, "step": 1218500 }, { "epoch": 0.18, "learning_rate": 4.548525240476049e-05, "loss": 1.9673, "step": 1219000 }, { "epoch": 0.18, "learning_rate": 4.548340058048025e-05, "loss": 2.0913, "step": 1219500 }, { "epoch": 0.18, "learning_rate": 4.5481548756200007e-05, "loss": 2.0534, "step": 1220000 }, { "epoch": 0.18, "learning_rate": 4.547969693191976e-05, "loss": 1.996, "step": 1220500 }, { "epoch": 0.18, "learning_rate": 4.5477845107639514e-05, "loss": 2.0159, "step": 1221000 }, { "epoch": 0.18, "learning_rate": 4.5475993283359264e-05, "loss": 2.0308, "step": 1221500 }, { "epoch": 0.18, "learning_rate": 4.5474141459079014e-05, "loss": 2.0111, "step": 1222000 }, { "epoch": 0.18, "learning_rate": 4.547228963479877e-05, "loss": 1.9588, "step": 1222500 }, { "epoch": 0.18, "learning_rate": 4.547043781051853e-05, "loss": 1.9907, "step": 1223000 }, { "epoch": 0.18, "learning_rate": 4.5468585986238285e-05, "loss": 1.9381, "step": 1223500 }, { "epoch": 0.18, "learning_rate": 4.5466734161958035e-05, "loss": 2.0461, "step": 1224000 }, { "epoch": 0.18, "learning_rate": 4.546488233767779e-05, "loss": 2.0119, "step": 1224500 }, { "epoch": 0.18, "learning_rate": 4.546303051339754e-05, "loss": 2.0473, "step": 1225000 }, { "epoch": 0.18, "learning_rate": 4.546117868911729e-05, "loss": 2.0373, "step": 1225500 }, { "epoch": 0.18, "learning_rate": 4.5459326864837056e-05, "loss": 1.948, "step": 1226000 }, { "epoch": 0.18, "learning_rate": 4.5457475040556806e-05, "loss": 1.9465, "step": 1226500 }, { "epoch": 0.18, "learning_rate": 4.545562321627656e-05, "loss": 2.0531, "step": 1227000 }, { "epoch": 0.18, "learning_rate": 4.545377139199631e-05, "loss": 2.0145, "step": 1227500 }, { "epoch": 0.18, "learning_rate": 4.545191956771607e-05, "loss": 1.9744, "step": 1228000 }, { "epoch": 0.18, "learning_rate": 4.545006774343582e-05, "loss": 2.0182, "step": 1228500 }, { "epoch": 0.18, "learning_rate": 4.544821591915558e-05, "loss": 2.0322, "step": 1229000 }, { "epoch": 0.18, "learning_rate": 4.5446364094875334e-05, "loss": 2.0257, "step": 1229500 }, { "epoch": 0.18, "learning_rate": 4.5444512270595085e-05, "loss": 2.0792, "step": 1230000 }, { "epoch": 0.18, "learning_rate": 4.544266044631484e-05, "loss": 1.97, "step": 1230500 }, { "epoch": 0.18, "learning_rate": 4.544080862203459e-05, "loss": 1.9821, "step": 1231000 }, { "epoch": 0.18, "learning_rate": 4.543895679775434e-05, "loss": 2.0477, "step": 1231500 }, { "epoch": 0.18, "learning_rate": 4.54371049734741e-05, "loss": 1.9921, "step": 1232000 }, { "epoch": 0.18, "learning_rate": 4.5435253149193856e-05, "loss": 1.9702, "step": 1232500 }, { "epoch": 0.18, "learning_rate": 4.543340132491361e-05, "loss": 2.0535, "step": 1233000 }, { "epoch": 0.18, "learning_rate": 4.543154950063336e-05, "loss": 2.0334, "step": 1233500 }, { "epoch": 0.18, "learning_rate": 4.542969767635312e-05, "loss": 1.9955, "step": 1234000 }, { "epoch": 0.18, "learning_rate": 4.542784585207287e-05, "loss": 2.0129, "step": 1234500 }, { "epoch": 0.18, "learning_rate": 4.542599402779262e-05, "loss": 2.0197, "step": 1235000 }, { "epoch": 0.18, "learning_rate": 4.5424142203512384e-05, "loss": 2.0355, "step": 1235500 }, { "epoch": 0.18, "learning_rate": 4.5422290379232134e-05, "loss": 1.9437, "step": 1236000 }, { "epoch": 0.18, "learning_rate": 4.542043855495189e-05, "loss": 2.0292, "step": 1236500 }, { "epoch": 0.18, "learning_rate": 4.541858673067164e-05, "loss": 2.0148, "step": 1237000 }, { "epoch": 0.18, "learning_rate": 4.54167349063914e-05, "loss": 1.999, "step": 1237500 }, { "epoch": 0.18, "learning_rate": 4.541488308211115e-05, "loss": 1.9867, "step": 1238000 }, { "epoch": 0.18, "learning_rate": 4.54130312578309e-05, "loss": 2.0071, "step": 1238500 }, { "epoch": 0.18, "learning_rate": 4.541117943355066e-05, "loss": 1.9873, "step": 1239000 }, { "epoch": 0.18, "learning_rate": 4.540932760927041e-05, "loss": 2.0363, "step": 1239500 }, { "epoch": 0.18, "learning_rate": 4.540747578499017e-05, "loss": 2.0003, "step": 1240000 }, { "epoch": 0.18, "learning_rate": 4.540562396070992e-05, "loss": 2.0571, "step": 1240500 }, { "epoch": 0.18, "learning_rate": 4.5403772136429676e-05, "loss": 2.04, "step": 1241000 }, { "epoch": 0.18, "learning_rate": 4.5401920312149427e-05, "loss": 2.0291, "step": 1241500 }, { "epoch": 0.18, "learning_rate": 4.5400068487869183e-05, "loss": 2.0359, "step": 1242000 }, { "epoch": 0.18, "learning_rate": 4.539821666358894e-05, "loss": 1.9683, "step": 1242500 }, { "epoch": 0.18, "learning_rate": 4.539636483930869e-05, "loss": 2.0222, "step": 1243000 }, { "epoch": 0.18, "learning_rate": 4.539451301502845e-05, "loss": 1.9784, "step": 1243500 }, { "epoch": 0.18, "learning_rate": 4.53926611907482e-05, "loss": 2.0293, "step": 1244000 }, { "epoch": 0.18, "learning_rate": 4.539080936646795e-05, "loss": 2.0801, "step": 1244500 }, { "epoch": 0.18, "learning_rate": 4.538895754218771e-05, "loss": 2.0896, "step": 1245000 }, { "epoch": 0.18, "learning_rate": 4.538710571790746e-05, "loss": 2.0129, "step": 1245500 }, { "epoch": 0.18, "learning_rate": 4.538525389362722e-05, "loss": 1.9908, "step": 1246000 }, { "epoch": 0.18, "learning_rate": 4.538340206934697e-05, "loss": 2.0363, "step": 1246500 }, { "epoch": 0.18, "learning_rate": 4.5381550245066726e-05, "loss": 2.009, "step": 1247000 }, { "epoch": 0.18, "learning_rate": 4.5379698420786476e-05, "loss": 2.0021, "step": 1247500 }, { "epoch": 0.18, "learning_rate": 4.5377846596506226e-05, "loss": 2.0504, "step": 1248000 }, { "epoch": 0.18, "learning_rate": 4.537599477222599e-05, "loss": 1.942, "step": 1248500 }, { "epoch": 0.18, "learning_rate": 4.537414294794574e-05, "loss": 2.0592, "step": 1249000 }, { "epoch": 0.18, "learning_rate": 4.53722911236655e-05, "loss": 1.9323, "step": 1249500 } ], "max_steps": 13500201, "num_train_epochs": 3, "total_flos": 1.594877582673861e+17, "trial_name": null, "trial_params": null }