{ "best_metric": null, "best_model_checkpoint": null, "epoch": 49.99994058582378, "global_step": 420750, "is_hyper_param_search": false, "is_local_process_zero": true, "is_world_process_zero": true, "log_history": [ { "epoch": 0.06, "learning_rate": 2.3624480095068333e-07, "loss": 2.5928, "step": 500 }, { "epoch": 0.12, "learning_rate": 4.7391562685680333e-07, "loss": 1.753, "step": 1000 }, { "epoch": 0.18, "learning_rate": 7.115864527629234e-07, "loss": 1.5245, "step": 1500 }, { "epoch": 0.24, "learning_rate": 9.492572786690435e-07, "loss": 1.428, "step": 2000 }, { "epoch": 0.3, "learning_rate": 1.1869281045751634e-06, "loss": 1.3632, "step": 2500 }, { "epoch": 0.36, "learning_rate": 1.4245989304812835e-06, "loss": 1.3351, "step": 3000 }, { "epoch": 0.42, "learning_rate": 1.6622697563874036e-06, "loss": 1.3066, "step": 3500 }, { "epoch": 0.48, "learning_rate": 1.8999405822935234e-06, "loss": 1.266, "step": 4000 }, { "epoch": 0.53, "learning_rate": 2.137611408199644e-06, "loss": 1.2228, "step": 4500 }, { "epoch": 0.59, "learning_rate": 2.375282234105764e-06, "loss": 1.2047, "step": 5000 }, { "epoch": 0.65, "learning_rate": 2.6129530600118837e-06, "loss": 1.193, "step": 5500 }, { "epoch": 0.71, "learning_rate": 2.8501485442661915e-06, "loss": 1.169, "step": 6000 }, { "epoch": 0.77, "learning_rate": 3.0878193701723116e-06, "loss": 1.1512, "step": 6500 }, { "epoch": 0.83, "learning_rate": 3.325490196078431e-06, "loss": 1.1434, "step": 7000 }, { "epoch": 0.89, "learning_rate": 3.563161021984552e-06, "loss": 1.1305, "step": 7500 }, { "epoch": 0.95, "learning_rate": 3.8008318478906718e-06, "loss": 1.1059, "step": 8000 }, { "epoch": 1.01, "learning_rate": 4.038502673796792e-06, "loss": 1.1072, "step": 8500 }, { "epoch": 1.07, "learning_rate": 4.276173499702912e-06, "loss": 1.0856, "step": 9000 }, { "epoch": 1.13, "learning_rate": 4.5138443256090316e-06, "loss": 1.0605, "step": 9500 }, { "epoch": 1.19, "learning_rate": 4.751515151515152e-06, "loss": 1.0481, "step": 10000 }, { "epoch": 1.25, "learning_rate": 4.989185977421272e-06, "loss": 1.0203, "step": 10500 }, { "epoch": 1.31, "learning_rate": 5.226381461675579e-06, "loss": 0.9944, "step": 11000 }, { "epoch": 1.37, "learning_rate": 5.464052287581699e-06, "loss": 0.9667, "step": 11500 }, { "epoch": 1.43, "learning_rate": 5.7012477718360074e-06, "loss": 0.9407, "step": 12000 }, { "epoch": 1.49, "learning_rate": 5.938443256090315e-06, "loss": 0.9194, "step": 12500 }, { "epoch": 1.54, "learning_rate": 6.176114081996435e-06, "loss": 0.891, "step": 13000 }, { "epoch": 1.6, "learning_rate": 6.413784907902556e-06, "loss": 0.8853, "step": 13500 }, { "epoch": 1.66, "learning_rate": 6.651455733808676e-06, "loss": 0.8699, "step": 14000 }, { "epoch": 1.72, "learning_rate": 6.889126559714796e-06, "loss": 0.8597, "step": 14500 }, { "epoch": 1.78, "learning_rate": 7.126797385620916e-06, "loss": 0.8384, "step": 15000 }, { "epoch": 1.84, "learning_rate": 7.364468211527036e-06, "loss": 0.8305, "step": 15500 }, { "epoch": 1.9, "learning_rate": 7.602139037433156e-06, "loss": 0.8346, "step": 16000 }, { "epoch": 1.96, "learning_rate": 7.839809863339275e-06, "loss": 0.8277, "step": 16500 }, { "epoch": 2.02, "learning_rate": 8.077005347593585e-06, "loss": 0.8125, "step": 17000 }, { "epoch": 2.08, "learning_rate": 8.314676173499705e-06, "loss": 0.8124, "step": 17500 }, { "epoch": 2.14, "learning_rate": 8.552346999405825e-06, "loss": 0.806, "step": 18000 }, { "epoch": 2.2, "learning_rate": 8.790017825311945e-06, "loss": 0.8022, "step": 18500 }, { "epoch": 2.26, "learning_rate": 9.027688651218063e-06, "loss": 0.7958, "step": 19000 }, { "epoch": 2.32, "learning_rate": 9.265359477124183e-06, "loss": 0.7901, "step": 19500 }, { "epoch": 2.38, "learning_rate": 9.503030303030303e-06, "loss": 0.7868, "step": 20000 }, { "epoch": 2.44, "learning_rate": 9.740701128936423e-06, "loss": 0.7788, "step": 20500 }, { "epoch": 2.5, "learning_rate": 9.978371954842543e-06, "loss": 0.7823, "step": 21000 }, { "epoch": 2.55, "learning_rate": 1.0216042780748663e-05, "loss": 0.7767, "step": 21500 }, { "epoch": 2.61, "learning_rate": 1.0453238265002973e-05, "loss": 0.7645, "step": 22000 }, { "epoch": 2.67, "learning_rate": 1.0690909090909091e-05, "loss": 0.7687, "step": 22500 }, { "epoch": 2.73, "learning_rate": 1.0928579916815211e-05, "loss": 0.7664, "step": 23000 }, { "epoch": 2.79, "learning_rate": 1.1166250742721331e-05, "loss": 0.7569, "step": 23500 }, { "epoch": 2.85, "learning_rate": 1.1403921568627451e-05, "loss": 0.772, "step": 24000 }, { "epoch": 2.91, "learning_rate": 1.1641592394533571e-05, "loss": 0.7562, "step": 24500 }, { "epoch": 2.97, "learning_rate": 1.1879263220439691e-05, "loss": 0.7599, "step": 25000 }, { "epoch": 3.03, "learning_rate": 1.2116934046345811e-05, "loss": 0.7524, "step": 25500 }, { "epoch": 3.09, "learning_rate": 1.2354604872251931e-05, "loss": 0.7476, "step": 26000 }, { "epoch": 3.15, "learning_rate": 1.2592275698158052e-05, "loss": 0.7385, "step": 26500 }, { "epoch": 3.21, "learning_rate": 1.282947118241236e-05, "loss": 0.7428, "step": 27000 }, { "epoch": 3.27, "learning_rate": 1.306714200831848e-05, "loss": 0.746, "step": 27500 }, { "epoch": 3.33, "learning_rate": 1.3304337492572788e-05, "loss": 0.7406, "step": 28000 }, { "epoch": 3.39, "learning_rate": 1.3542008318478908e-05, "loss": 0.7264, "step": 28500 }, { "epoch": 3.45, "learning_rate": 1.3779679144385028e-05, "loss": 0.7346, "step": 29000 }, { "epoch": 3.51, "learning_rate": 1.4017349970291148e-05, "loss": 0.7266, "step": 29500 }, { "epoch": 3.57, "learning_rate": 1.4255020796197267e-05, "loss": 0.7355, "step": 30000 }, { "epoch": 3.62, "learning_rate": 1.4492691622103387e-05, "loss": 0.7319, "step": 30500 }, { "epoch": 3.68, "learning_rate": 1.4730362448009507e-05, "loss": 0.7304, "step": 31000 }, { "epoch": 3.74, "learning_rate": 1.4968033273915627e-05, "loss": 0.7226, "step": 31500 }, { "epoch": 3.8, "learning_rate": 1.5205228758169936e-05, "loss": 0.7288, "step": 32000 }, { "epoch": 3.86, "learning_rate": 1.5442899584076056e-05, "loss": 0.7214, "step": 32500 }, { "epoch": 3.92, "learning_rate": 1.5680095068330364e-05, "loss": 0.7219, "step": 33000 }, { "epoch": 3.98, "learning_rate": 1.5917765894236484e-05, "loss": 0.718, "step": 33500 }, { "epoch": 4.04, "learning_rate": 1.615496137849079e-05, "loss": 0.7132, "step": 34000 }, { "epoch": 4.1, "learning_rate": 1.639263220439691e-05, "loss": 0.7183, "step": 34500 }, { "epoch": 4.16, "learning_rate": 1.663030303030303e-05, "loss": 0.7123, "step": 35000 }, { "epoch": 4.22, "learning_rate": 1.686797385620915e-05, "loss": 0.7013, "step": 35500 }, { "epoch": 4.28, "learning_rate": 1.710564468211527e-05, "loss": 0.7093, "step": 36000 }, { "epoch": 4.34, "learning_rate": 1.734284016636958e-05, "loss": 0.704, "step": 36500 }, { "epoch": 4.4, "learning_rate": 1.75805109922757e-05, "loss": 0.7047, "step": 37000 }, { "epoch": 4.46, "learning_rate": 1.781818181818182e-05, "loss": 0.7041, "step": 37500 }, { "epoch": 4.52, "learning_rate": 1.805585264408794e-05, "loss": 0.7005, "step": 38000 }, { "epoch": 4.58, "learning_rate": 1.829352346999406e-05, "loss": 0.7098, "step": 38500 }, { "epoch": 4.63, "learning_rate": 1.853119429590018e-05, "loss": 0.7019, "step": 39000 }, { "epoch": 4.69, "learning_rate": 1.87688651218063e-05, "loss": 0.7024, "step": 39500 }, { "epoch": 4.75, "learning_rate": 1.900653594771242e-05, "loss": 0.7015, "step": 40000 }, { "epoch": 4.81, "learning_rate": 1.924420677361854e-05, "loss": 0.7032, "step": 40500 }, { "epoch": 4.87, "learning_rate": 1.948187759952466e-05, "loss": 0.6989, "step": 41000 }, { "epoch": 4.93, "learning_rate": 1.971954842543078e-05, "loss": 0.7023, "step": 41500 }, { "epoch": 4.99, "learning_rate": 1.99572192513369e-05, "loss": 0.7068, "step": 42000 }, { "epoch": 5.05, "learning_rate": 1.9978345546973e-05, "loss": 0.6804, "step": 42500 }, { "epoch": 5.11, "learning_rate": 1.9951937677427877e-05, "loss": 0.6888, "step": 43000 }, { "epoch": 5.17, "learning_rate": 1.992552980788275e-05, "loss": 0.6932, "step": 43500 }, { "epoch": 5.23, "learning_rate": 1.9899121938337625e-05, "loss": 0.6849, "step": 44000 }, { "epoch": 5.29, "learning_rate": 1.987276688453159e-05, "loss": 0.6776, "step": 44500 }, { "epoch": 5.35, "learning_rate": 1.9846411830725558e-05, "loss": 0.6897, "step": 45000 }, { "epoch": 5.41, "learning_rate": 1.9820003961180434e-05, "loss": 0.683, "step": 45500 }, { "epoch": 5.47, "learning_rate": 1.979359609163531e-05, "loss": 0.6834, "step": 46000 }, { "epoch": 5.53, "learning_rate": 1.9767188222090182e-05, "loss": 0.6811, "step": 46500 }, { "epoch": 5.59, "learning_rate": 1.9740780352545058e-05, "loss": 0.6775, "step": 47000 }, { "epoch": 5.64, "learning_rate": 1.9714372482999934e-05, "loss": 0.683, "step": 47500 }, { "epoch": 5.7, "learning_rate": 1.968796461345481e-05, "loss": 0.6799, "step": 48000 }, { "epoch": 5.76, "learning_rate": 1.9661556743909686e-05, "loss": 0.6754, "step": 48500 }, { "epoch": 5.82, "learning_rate": 1.9635201690103652e-05, "loss": 0.684, "step": 49000 }, { "epoch": 5.88, "learning_rate": 1.9608793820558528e-05, "loss": 0.6753, "step": 49500 }, { "epoch": 5.94, "learning_rate": 1.9582385951013404e-05, "loss": 0.6843, "step": 50000 }, { "epoch": 6.0, "learning_rate": 1.955597808146828e-05, "loss": 0.684, "step": 50500 }, { "epoch": 6.06, "learning_rate": 1.9529623027662246e-05, "loss": 0.6578, "step": 51000 }, { "epoch": 6.12, "learning_rate": 1.950326797385621e-05, "loss": 0.6686, "step": 51500 }, { "epoch": 6.18, "learning_rate": 1.9476860104311085e-05, "loss": 0.6696, "step": 52000 }, { "epoch": 6.24, "learning_rate": 1.945045223476596e-05, "loss": 0.6721, "step": 52500 }, { "epoch": 6.3, "learning_rate": 1.9424044365220836e-05, "loss": 0.6645, "step": 53000 }, { "epoch": 6.36, "learning_rate": 1.9397689311414803e-05, "loss": 0.6636, "step": 53500 }, { "epoch": 6.42, "learning_rate": 1.937128144186968e-05, "loss": 0.6644, "step": 54000 }, { "epoch": 6.48, "learning_rate": 1.9344873572324555e-05, "loss": 0.6697, "step": 54500 }, { "epoch": 6.54, "learning_rate": 1.931846570277943e-05, "loss": 0.6738, "step": 55000 }, { "epoch": 6.6, "learning_rate": 1.9292057833234306e-05, "loss": 0.6645, "step": 55500 }, { "epoch": 6.65, "learning_rate": 1.9265649963689182e-05, "loss": 0.6703, "step": 56000 }, { "epoch": 6.71, "learning_rate": 1.9239242094144058e-05, "loss": 0.6667, "step": 56500 }, { "epoch": 6.77, "learning_rate": 1.9212834224598934e-05, "loss": 0.6677, "step": 57000 }, { "epoch": 6.83, "learning_rate": 1.9186479170792897e-05, "loss": 0.6693, "step": 57500 }, { "epoch": 6.89, "learning_rate": 1.9160071301247773e-05, "loss": 0.6614, "step": 58000 }, { "epoch": 6.95, "learning_rate": 1.913366343170265e-05, "loss": 0.6604, "step": 58500 }, { "epoch": 7.01, "learning_rate": 1.9107255562157525e-05, "loss": 0.6591, "step": 59000 }, { "epoch": 7.07, "learning_rate": 1.908090050835149e-05, "loss": 0.6471, "step": 59500 }, { "epoch": 7.13, "learning_rate": 1.9054492638806367e-05, "loss": 0.6539, "step": 60000 }, { "epoch": 7.19, "learning_rate": 1.9028084769261243e-05, "loss": 0.6461, "step": 60500 }, { "epoch": 7.25, "learning_rate": 1.900167689971612e-05, "loss": 0.6514, "step": 61000 }, { "epoch": 7.31, "learning_rate": 1.897526903017099e-05, "loss": 0.6452, "step": 61500 }, { "epoch": 7.37, "learning_rate": 1.8948861160625867e-05, "loss": 0.6513, "step": 62000 }, { "epoch": 7.43, "learning_rate": 1.8922453291080743e-05, "loss": 0.6534, "step": 62500 }, { "epoch": 7.49, "learning_rate": 1.889604542153562e-05, "loss": 0.65, "step": 63000 }, { "epoch": 7.55, "learning_rate": 1.8869637551990495e-05, "loss": 0.6486, "step": 63500 }, { "epoch": 7.61, "learning_rate": 1.884328249818446e-05, "loss": 0.6498, "step": 64000 }, { "epoch": 7.66, "learning_rate": 1.8816874628639337e-05, "loss": 0.6424, "step": 64500 }, { "epoch": 7.72, "learning_rate": 1.8790466759094213e-05, "loss": 0.6492, "step": 65000 }, { "epoch": 7.78, "learning_rate": 1.8764058889549086e-05, "loss": 0.6564, "step": 65500 }, { "epoch": 7.84, "learning_rate": 1.8737703835743052e-05, "loss": 0.6467, "step": 66000 }, { "epoch": 7.9, "learning_rate": 1.8711295966197928e-05, "loss": 0.6461, "step": 66500 }, { "epoch": 7.96, "learning_rate": 1.8684888096652804e-05, "loss": 0.6424, "step": 67000 }, { "epoch": 8.02, "learning_rate": 1.865848022710768e-05, "loss": 0.6487, "step": 67500 }, { "epoch": 8.08, "learning_rate": 1.8632072357562556e-05, "loss": 0.6331, "step": 68000 }, { "epoch": 8.14, "learning_rate": 1.8605664488017428e-05, "loss": 0.6351, "step": 68500 }, { "epoch": 8.2, "learning_rate": 1.8579256618472304e-05, "loss": 0.6298, "step": 69000 }, { "epoch": 8.26, "learning_rate": 1.855284874892718e-05, "loss": 0.6365, "step": 69500 }, { "epoch": 8.32, "learning_rate": 1.8526440879382056e-05, "loss": 0.6371, "step": 70000 }, { "epoch": 8.38, "learning_rate": 1.8500085825576025e-05, "loss": 0.6312, "step": 70500 }, { "epoch": 8.44, "learning_rate": 1.847378358750908e-05, "loss": 0.6438, "step": 71000 }, { "epoch": 8.5, "learning_rate": 1.8447375717963954e-05, "loss": 0.6307, "step": 71500 }, { "epoch": 8.56, "learning_rate": 1.842096784841883e-05, "loss": 0.6377, "step": 72000 }, { "epoch": 8.62, "learning_rate": 1.8394559978873706e-05, "loss": 0.6457, "step": 72500 }, { "epoch": 8.67, "learning_rate": 1.8368152109328582e-05, "loss": 0.6377, "step": 73000 }, { "epoch": 8.73, "learning_rate": 1.8341744239783458e-05, "loss": 0.6394, "step": 73500 }, { "epoch": 8.79, "learning_rate": 1.8315389185977424e-05, "loss": 0.6392, "step": 74000 }, { "epoch": 8.85, "learning_rate": 1.82889813164323e-05, "loss": 0.6424, "step": 74500 }, { "epoch": 8.91, "learning_rate": 1.8262573446887173e-05, "loss": 0.6333, "step": 75000 }, { "epoch": 8.97, "learning_rate": 1.823621839308114e-05, "loss": 0.6422, "step": 75500 }, { "epoch": 9.03, "learning_rate": 1.8209810523536015e-05, "loss": 0.6311, "step": 76000 }, { "epoch": 9.09, "learning_rate": 1.818340265399089e-05, "loss": 0.6238, "step": 76500 }, { "epoch": 9.15, "learning_rate": 1.8156994784445767e-05, "loss": 0.6179, "step": 77000 }, { "epoch": 9.21, "learning_rate": 1.813058691490064e-05, "loss": 0.6344, "step": 77500 }, { "epoch": 9.27, "learning_rate": 1.8104179045355515e-05, "loss": 0.6303, "step": 78000 }, { "epoch": 9.33, "learning_rate": 1.807777117581039e-05, "loss": 0.625, "step": 78500 }, { "epoch": 9.39, "learning_rate": 1.8051363306265267e-05, "loss": 0.6237, "step": 79000 }, { "epoch": 9.45, "learning_rate": 1.8024955436720143e-05, "loss": 0.6332, "step": 79500 }, { "epoch": 9.51, "learning_rate": 1.799854756717502e-05, "loss": 0.6239, "step": 80000 }, { "epoch": 9.57, "learning_rate": 1.7972139697629895e-05, "loss": 0.627, "step": 80500 }, { "epoch": 9.63, "learning_rate": 1.794573182808477e-05, "loss": 0.6202, "step": 81000 }, { "epoch": 9.69, "learning_rate": 1.7919376774278737e-05, "loss": 0.63, "step": 81500 }, { "epoch": 9.74, "learning_rate": 1.7892968904733613e-05, "loss": 0.6205, "step": 82000 }, { "epoch": 9.8, "learning_rate": 1.786656103518849e-05, "loss": 0.6337, "step": 82500 }, { "epoch": 9.86, "learning_rate": 1.7840153165643365e-05, "loss": 0.6234, "step": 83000 }, { "epoch": 9.92, "learning_rate": 1.781374529609824e-05, "loss": 0.6349, "step": 83500 }, { "epoch": 9.98, "learning_rate": 1.7787337426553117e-05, "loss": 0.6336, "step": 84000 }, { "epoch": 10.04, "learning_rate": 1.776092955700799e-05, "loss": 0.6131, "step": 84500 }, { "epoch": 10.1, "learning_rate": 1.7734521687462865e-05, "loss": 0.6177, "step": 85000 }, { "epoch": 10.16, "learning_rate": 1.770816663365683e-05, "loss": 0.6209, "step": 85500 }, { "epoch": 10.22, "learning_rate": 1.7681758764111707e-05, "loss": 0.619, "step": 86000 }, { "epoch": 10.28, "learning_rate": 1.7655350894566583e-05, "loss": 0.6142, "step": 86500 }, { "epoch": 10.34, "learning_rate": 1.762894302502146e-05, "loss": 0.6134, "step": 87000 }, { "epoch": 10.4, "learning_rate": 1.7602535155476332e-05, "loss": 0.6174, "step": 87500 }, { "epoch": 10.46, "learning_rate": 1.7576180101670298e-05, "loss": 0.6203, "step": 88000 }, { "epoch": 10.52, "learning_rate": 1.7549772232125174e-05, "loss": 0.6171, "step": 88500 }, { "epoch": 10.58, "learning_rate": 1.752336436258005e-05, "loss": 0.6186, "step": 89000 }, { "epoch": 10.64, "learning_rate": 1.7496956493034926e-05, "loss": 0.617, "step": 89500 }, { "epoch": 10.7, "learning_rate": 1.74705486234898e-05, "loss": 0.6117, "step": 90000 }, { "epoch": 10.75, "learning_rate": 1.7444140753944678e-05, "loss": 0.6131, "step": 90500 }, { "epoch": 10.81, "learning_rate": 1.7417732884399553e-05, "loss": 0.6187, "step": 91000 }, { "epoch": 10.87, "learning_rate": 1.739132501485443e-05, "loss": 0.6215, "step": 91500 }, { "epoch": 10.93, "learning_rate": 1.7364969961048396e-05, "loss": 0.6237, "step": 92000 }, { "epoch": 10.99, "learning_rate": 1.733856209150327e-05, "loss": 0.6269, "step": 92500 }, { "epoch": 11.05, "learning_rate": 1.7312207037697234e-05, "loss": 0.6065, "step": 93000 }, { "epoch": 11.11, "learning_rate": 1.728579916815211e-05, "loss": 0.603, "step": 93500 }, { "epoch": 11.17, "learning_rate": 1.7259391298606986e-05, "loss": 0.6037, "step": 94000 }, { "epoch": 11.23, "learning_rate": 1.7232983429061862e-05, "loss": 0.6081, "step": 94500 }, { "epoch": 11.29, "learning_rate": 1.7206575559516738e-05, "loss": 0.602, "step": 95000 }, { "epoch": 11.35, "learning_rate": 1.718016768997161e-05, "loss": 0.6132, "step": 95500 }, { "epoch": 11.41, "learning_rate": 1.7153759820426487e-05, "loss": 0.6049, "step": 96000 }, { "epoch": 11.47, "learning_rate": 1.7127351950881362e-05, "loss": 0.6088, "step": 96500 }, { "epoch": 11.53, "learning_rate": 1.710094408133624e-05, "loss": 0.6063, "step": 97000 }, { "epoch": 11.59, "learning_rate": 1.7074589027530205e-05, "loss": 0.6086, "step": 97500 }, { "epoch": 11.65, "learning_rate": 1.704818115798508e-05, "loss": 0.6102, "step": 98000 }, { "epoch": 11.71, "learning_rate": 1.7021826104179047e-05, "loss": 0.6068, "step": 98500 }, { "epoch": 11.76, "learning_rate": 1.6995418234633923e-05, "loss": 0.6122, "step": 99000 }, { "epoch": 11.82, "learning_rate": 1.69690103650888e-05, "loss": 0.6119, "step": 99500 }, { "epoch": 11.88, "learning_rate": 1.6942602495543675e-05, "loss": 0.6058, "step": 100000 }, { "epoch": 11.94, "learning_rate": 1.691619462599855e-05, "loss": 0.6158, "step": 100500 }, { "epoch": 12.0, "learning_rate": 1.6889786756453423e-05, "loss": 0.6219, "step": 101000 }, { "epoch": 12.06, "learning_rate": 1.68633788869083e-05, "loss": 0.5941, "step": 101500 }, { "epoch": 12.12, "learning_rate": 1.6836971017363175e-05, "loss": 0.5933, "step": 102000 }, { "epoch": 12.18, "learning_rate": 1.681056314781805e-05, "loss": 0.6045, "step": 102500 }, { "epoch": 12.24, "learning_rate": 1.6784155278272927e-05, "loss": 0.5995, "step": 103000 }, { "epoch": 12.3, "learning_rate": 1.6757747408727803e-05, "loss": 0.5976, "step": 103500 }, { "epoch": 12.36, "learning_rate": 1.673139235492177e-05, "loss": 0.6016, "step": 104000 }, { "epoch": 12.42, "learning_rate": 1.670498448537664e-05, "loss": 0.6029, "step": 104500 }, { "epoch": 12.48, "learning_rate": 1.6678576615831517e-05, "loss": 0.6008, "step": 105000 }, { "epoch": 12.54, "learning_rate": 1.6652221562025487e-05, "loss": 0.5978, "step": 105500 }, { "epoch": 12.6, "learning_rate": 1.6625813692480363e-05, "loss": 0.6029, "step": 106000 }, { "epoch": 12.66, "learning_rate": 1.6599405822935235e-05, "loss": 0.6088, "step": 106500 }, { "epoch": 12.72, "learning_rate": 1.657299795339011e-05, "loss": 0.6071, "step": 107000 }, { "epoch": 12.77, "learning_rate": 1.6546590083844987e-05, "loss": 0.6025, "step": 107500 }, { "epoch": 12.83, "learning_rate": 1.6520182214299863e-05, "loss": 0.5966, "step": 108000 }, { "epoch": 12.89, "learning_rate": 1.649377434475474e-05, "loss": 0.6058, "step": 108500 }, { "epoch": 12.95, "learning_rate": 1.6467419290948702e-05, "loss": 0.6109, "step": 109000 }, { "epoch": 13.01, "learning_rate": 1.6441011421403578e-05, "loss": 0.6024, "step": 109500 }, { "epoch": 13.07, "learning_rate": 1.6414603551858454e-05, "loss": 0.5963, "step": 110000 }, { "epoch": 13.13, "learning_rate": 1.638819568231333e-05, "loss": 0.5861, "step": 110500 }, { "epoch": 13.19, "learning_rate": 1.6361787812768206e-05, "loss": 0.5939, "step": 111000 }, { "epoch": 13.25, "learning_rate": 1.633537994322308e-05, "loss": 0.5859, "step": 111500 }, { "epoch": 13.31, "learning_rate": 1.6308972073677957e-05, "loss": 0.5916, "step": 112000 }, { "epoch": 13.37, "learning_rate": 1.6282564204132833e-05, "loss": 0.5938, "step": 112500 }, { "epoch": 13.43, "learning_rate": 1.62562091503268e-05, "loss": 0.5925, "step": 113000 }, { "epoch": 13.49, "learning_rate": 1.6229801280781675e-05, "loss": 0.5945, "step": 113500 }, { "epoch": 13.55, "learning_rate": 1.620339341123655e-05, "loss": 0.5999, "step": 114000 }, { "epoch": 13.61, "learning_rate": 1.6176985541691427e-05, "loss": 0.5904, "step": 114500 }, { "epoch": 13.67, "learning_rate": 1.615063048788539e-05, "loss": 0.5988, "step": 115000 }, { "epoch": 13.73, "learning_rate": 1.6124222618340266e-05, "loss": 0.5963, "step": 115500 }, { "epoch": 13.78, "learning_rate": 1.6097814748795142e-05, "loss": 0.592, "step": 116000 }, { "epoch": 13.84, "learning_rate": 1.6071406879250018e-05, "loss": 0.5954, "step": 116500 }, { "epoch": 13.9, "learning_rate": 1.6044999009704894e-05, "loss": 0.5966, "step": 117000 }, { "epoch": 13.96, "learning_rate": 1.601859114015977e-05, "loss": 0.6015, "step": 117500 }, { "epoch": 14.02, "learning_rate": 1.5992183270614646e-05, "loss": 0.5891, "step": 118000 }, { "epoch": 14.08, "learning_rate": 1.596577540106952e-05, "loss": 0.5819, "step": 118500 }, { "epoch": 14.14, "learning_rate": 1.5939367531524394e-05, "loss": 0.5883, "step": 119000 }, { "epoch": 14.2, "learning_rate": 1.591301247771836e-05, "loss": 0.5881, "step": 119500 }, { "epoch": 14.26, "learning_rate": 1.5886604608173236e-05, "loss": 0.584, "step": 120000 }, { "epoch": 14.32, "learning_rate": 1.5860196738628112e-05, "loss": 0.5888, "step": 120500 }, { "epoch": 14.38, "learning_rate": 1.5833788869082988e-05, "loss": 0.5911, "step": 121000 }, { "epoch": 14.44, "learning_rate": 1.5807380999537864e-05, "loss": 0.5846, "step": 121500 }, { "epoch": 14.5, "learning_rate": 1.5780973129992737e-05, "loss": 0.5836, "step": 122000 }, { "epoch": 14.56, "learning_rate": 1.5754565260447613e-05, "loss": 0.5838, "step": 122500 }, { "epoch": 14.62, "learning_rate": 1.5728210206641582e-05, "loss": 0.5883, "step": 123000 }, { "epoch": 14.68, "learning_rate": 1.5701802337096458e-05, "loss": 0.5875, "step": 123500 }, { "epoch": 14.74, "learning_rate": 1.5675394467551334e-05, "loss": 0.5901, "step": 124000 }, { "epoch": 14.79, "learning_rate": 1.5648986598006207e-05, "loss": 0.5881, "step": 124500 }, { "epoch": 14.85, "learning_rate": 1.5622631544200173e-05, "loss": 0.5951, "step": 125000 }, { "epoch": 14.91, "learning_rate": 1.559622367465505e-05, "loss": 0.5898, "step": 125500 }, { "epoch": 14.97, "learning_rate": 1.5569815805109925e-05, "loss": 0.5861, "step": 126000 }, { "epoch": 15.03, "learning_rate": 1.55434079355648e-05, "loss": 0.5754, "step": 126500 }, { "epoch": 15.09, "learning_rate": 1.5517000066019676e-05, "loss": 0.5751, "step": 127000 }, { "epoch": 15.15, "learning_rate": 1.549064501221364e-05, "loss": 0.5814, "step": 127500 }, { "epoch": 15.21, "learning_rate": 1.5464237142668515e-05, "loss": 0.5853, "step": 128000 }, { "epoch": 15.27, "learning_rate": 1.543782927312339e-05, "loss": 0.5832, "step": 128500 }, { "epoch": 15.33, "learning_rate": 1.5411421403578267e-05, "loss": 0.5831, "step": 129000 }, { "epoch": 15.39, "learning_rate": 1.5385013534033143e-05, "loss": 0.583, "step": 129500 }, { "epoch": 15.45, "learning_rate": 1.535860566448802e-05, "loss": 0.5708, "step": 130000 }, { "epoch": 15.51, "learning_rate": 1.5332250610681985e-05, "loss": 0.5816, "step": 130500 }, { "epoch": 15.57, "learning_rate": 1.530584274113686e-05, "loss": 0.5845, "step": 131000 }, { "epoch": 15.63, "learning_rate": 1.5279434871591737e-05, "loss": 0.5821, "step": 131500 }, { "epoch": 15.69, "learning_rate": 1.5253027002046611e-05, "loss": 0.5835, "step": 132000 }, { "epoch": 15.75, "learning_rate": 1.5226619132501485e-05, "loss": 0.5796, "step": 132500 }, { "epoch": 15.81, "learning_rate": 1.5200211262956361e-05, "loss": 0.5825, "step": 133000 }, { "epoch": 15.86, "learning_rate": 1.5173803393411237e-05, "loss": 0.5854, "step": 133500 }, { "epoch": 15.92, "learning_rate": 1.5147395523866113e-05, "loss": 0.5813, "step": 134000 }, { "epoch": 15.98, "learning_rate": 1.5121093285799168e-05, "loss": 0.584, "step": 134500 }, { "epoch": 16.04, "learning_rate": 1.5094685416254044e-05, "loss": 0.5734, "step": 135000 }, { "epoch": 16.1, "learning_rate": 1.506827754670892e-05, "loss": 0.5647, "step": 135500 }, { "epoch": 16.16, "learning_rate": 1.5041869677163796e-05, "loss": 0.5729, "step": 136000 }, { "epoch": 16.22, "learning_rate": 1.501551462335776e-05, "loss": 0.5692, "step": 136500 }, { "epoch": 16.28, "learning_rate": 1.4989159569551728e-05, "loss": 0.5703, "step": 137000 }, { "epoch": 16.34, "learning_rate": 1.4962751700006604e-05, "loss": 0.5798, "step": 137500 }, { "epoch": 16.4, "learning_rate": 1.493634383046148e-05, "loss": 0.5785, "step": 138000 }, { "epoch": 16.46, "learning_rate": 1.4909935960916356e-05, "loss": 0.57, "step": 138500 }, { "epoch": 16.52, "learning_rate": 1.488358090711032e-05, "loss": 0.5747, "step": 139000 }, { "epoch": 16.58, "learning_rate": 1.4857173037565196e-05, "loss": 0.5761, "step": 139500 }, { "epoch": 16.64, "learning_rate": 1.483076516802007e-05, "loss": 0.5756, "step": 140000 }, { "epoch": 16.7, "learning_rate": 1.4804357298474947e-05, "loss": 0.5727, "step": 140500 }, { "epoch": 16.76, "learning_rate": 1.4777949428929823e-05, "loss": 0.5826, "step": 141000 }, { "epoch": 16.82, "learning_rate": 1.4751541559384698e-05, "loss": 0.5862, "step": 141500 }, { "epoch": 16.87, "learning_rate": 1.4725133689839573e-05, "loss": 0.5752, "step": 142000 }, { "epoch": 16.93, "learning_rate": 1.4698725820294449e-05, "loss": 0.583, "step": 142500 }, { "epoch": 16.99, "learning_rate": 1.4672317950749325e-05, "loss": 0.5805, "step": 143000 }, { "epoch": 17.05, "learning_rate": 1.464596289694329e-05, "loss": 0.566, "step": 143500 }, { "epoch": 17.11, "learning_rate": 1.4619555027398167e-05, "loss": 0.5645, "step": 144000 }, { "epoch": 17.17, "learning_rate": 1.4593199973592131e-05, "loss": 0.5645, "step": 144500 }, { "epoch": 17.23, "learning_rate": 1.4566792104047007e-05, "loss": 0.5691, "step": 145000 }, { "epoch": 17.29, "learning_rate": 1.4540384234501883e-05, "loss": 0.5693, "step": 145500 }, { "epoch": 17.35, "learning_rate": 1.4513976364956759e-05, "loss": 0.5731, "step": 146000 }, { "epoch": 17.41, "learning_rate": 1.4487568495411635e-05, "loss": 0.5658, "step": 146500 }, { "epoch": 17.47, "learning_rate": 1.446116062586651e-05, "loss": 0.5761, "step": 147000 }, { "epoch": 17.53, "learning_rate": 1.4434752756321383e-05, "loss": 0.5687, "step": 147500 }, { "epoch": 17.59, "learning_rate": 1.440834488677626e-05, "loss": 0.5724, "step": 148000 }, { "epoch": 17.65, "learning_rate": 1.4381937017231135e-05, "loss": 0.5683, "step": 148500 }, { "epoch": 17.71, "learning_rate": 1.4355581963425103e-05, "loss": 0.5772, "step": 149000 }, { "epoch": 17.77, "learning_rate": 1.4329174093879979e-05, "loss": 0.5669, "step": 149500 }, { "epoch": 17.83, "learning_rate": 1.4302819040073944e-05, "loss": 0.5678, "step": 150000 }, { "epoch": 17.88, "learning_rate": 1.427646398626791e-05, "loss": 0.5703, "step": 150500 }, { "epoch": 17.94, "learning_rate": 1.4250056116722784e-05, "loss": 0.5687, "step": 151000 }, { "epoch": 18.0, "learning_rate": 1.422364824717766e-05, "loss": 0.5656, "step": 151500 }, { "epoch": 18.06, "learning_rate": 1.4197240377632536e-05, "loss": 0.5563, "step": 152000 }, { "epoch": 18.12, "learning_rate": 1.4170885323826502e-05, "loss": 0.5584, "step": 152500 }, { "epoch": 18.18, "learning_rate": 1.4144477454281378e-05, "loss": 0.5699, "step": 153000 }, { "epoch": 18.24, "learning_rate": 1.4118069584736254e-05, "loss": 0.5573, "step": 153500 }, { "epoch": 18.3, "learning_rate": 1.4091661715191126e-05, "loss": 0.5646, "step": 154000 }, { "epoch": 18.36, "learning_rate": 1.4065306661385094e-05, "loss": 0.5608, "step": 154500 }, { "epoch": 18.42, "learning_rate": 1.4038951607579059e-05, "loss": 0.566, "step": 155000 }, { "epoch": 18.48, "learning_rate": 1.4012543738033935e-05, "loss": 0.5709, "step": 155500 }, { "epoch": 18.54, "learning_rate": 1.398613586848881e-05, "loss": 0.5665, "step": 156000 }, { "epoch": 18.6, "learning_rate": 1.3959727998943687e-05, "loss": 0.5642, "step": 156500 }, { "epoch": 18.66, "learning_rate": 1.3933320129398563e-05, "loss": 0.5647, "step": 157000 }, { "epoch": 18.72, "learning_rate": 1.3906912259853439e-05, "loss": 0.5716, "step": 157500 }, { "epoch": 18.78, "learning_rate": 1.3880504390308314e-05, "loss": 0.5673, "step": 158000 }, { "epoch": 18.84, "learning_rate": 1.3854096520763189e-05, "loss": 0.5671, "step": 158500 }, { "epoch": 18.89, "learning_rate": 1.3827688651218063e-05, "loss": 0.5657, "step": 159000 }, { "epoch": 18.95, "learning_rate": 1.3801280781672939e-05, "loss": 0.5629, "step": 159500 }, { "epoch": 19.01, "learning_rate": 1.3774872912127815e-05, "loss": 0.5674, "step": 160000 }, { "epoch": 19.07, "learning_rate": 1.374846504258269e-05, "loss": 0.5609, "step": 160500 }, { "epoch": 19.13, "learning_rate": 1.3722057173037567e-05, "loss": 0.5642, "step": 161000 }, { "epoch": 19.19, "learning_rate": 1.3695649303492443e-05, "loss": 0.5545, "step": 161500 }, { "epoch": 19.25, "learning_rate": 1.3669241433947318e-05, "loss": 0.5548, "step": 162000 }, { "epoch": 19.31, "learning_rate": 1.3642833564402194e-05, "loss": 0.5523, "step": 162500 }, { "epoch": 19.37, "learning_rate": 1.3616425694857069e-05, "loss": 0.5602, "step": 163000 }, { "epoch": 19.43, "learning_rate": 1.3590017825311943e-05, "loss": 0.5627, "step": 163500 }, { "epoch": 19.49, "learning_rate": 1.3563609955766819e-05, "loss": 0.5549, "step": 164000 }, { "epoch": 19.55, "learning_rate": 1.3537202086221695e-05, "loss": 0.5606, "step": 164500 }, { "epoch": 19.61, "learning_rate": 1.351079421667657e-05, "loss": 0.5672, "step": 165000 }, { "epoch": 19.67, "learning_rate": 1.3484386347131447e-05, "loss": 0.5658, "step": 165500 }, { "epoch": 19.73, "learning_rate": 1.3457978477586322e-05, "loss": 0.5628, "step": 166000 }, { "epoch": 19.79, "learning_rate": 1.3431570608041198e-05, "loss": 0.5612, "step": 166500 }, { "epoch": 19.85, "learning_rate": 1.3405162738496074e-05, "loss": 0.5643, "step": 167000 }, { "epoch": 19.9, "learning_rate": 1.337875486895095e-05, "loss": 0.568, "step": 167500 }, { "epoch": 19.96, "learning_rate": 1.3352346999405823e-05, "loss": 0.563, "step": 168000 }, { "epoch": 20.02, "learning_rate": 1.332599194559979e-05, "loss": 0.5639, "step": 168500 }, { "epoch": 20.08, "learning_rate": 1.3299584076054665e-05, "loss": 0.5526, "step": 169000 }, { "epoch": 20.14, "learning_rate": 1.327317620650954e-05, "loss": 0.5549, "step": 169500 }, { "epoch": 20.2, "learning_rate": 1.3246768336964417e-05, "loss": 0.5477, "step": 170000 }, { "epoch": 20.26, "learning_rate": 1.3220360467419293e-05, "loss": 0.5534, "step": 170500 }, { "epoch": 20.32, "learning_rate": 1.3194005413613257e-05, "loss": 0.5555, "step": 171000 }, { "epoch": 20.38, "learning_rate": 1.3167597544068133e-05, "loss": 0.5646, "step": 171500 }, { "epoch": 20.44, "learning_rate": 1.3141189674523009e-05, "loss": 0.5538, "step": 172000 }, { "epoch": 20.5, "learning_rate": 1.3114781804977885e-05, "loss": 0.5567, "step": 172500 }, { "epoch": 20.56, "learning_rate": 1.3088373935432761e-05, "loss": 0.5553, "step": 173000 }, { "epoch": 20.62, "learning_rate": 1.3061966065887635e-05, "loss": 0.5573, "step": 173500 }, { "epoch": 20.68, "learning_rate": 1.3035611012081601e-05, "loss": 0.5624, "step": 174000 }, { "epoch": 20.74, "learning_rate": 1.3009255958275566e-05, "loss": 0.5553, "step": 174500 }, { "epoch": 20.8, "learning_rate": 1.2982848088730442e-05, "loss": 0.57, "step": 175000 }, { "epoch": 20.86, "learning_rate": 1.2956440219185318e-05, "loss": 0.5502, "step": 175500 }, { "epoch": 20.91, "learning_rate": 1.2930032349640194e-05, "loss": 0.5544, "step": 176000 }, { "epoch": 20.97, "learning_rate": 1.290367729583416e-05, "loss": 0.5627, "step": 176500 }, { "epoch": 21.03, "learning_rate": 1.2877269426289034e-05, "loss": 0.5574, "step": 177000 }, { "epoch": 21.09, "learning_rate": 1.285086155674391e-05, "loss": 0.5469, "step": 177500 }, { "epoch": 21.15, "learning_rate": 1.2824453687198786e-05, "loss": 0.5495, "step": 178000 }, { "epoch": 21.21, "learning_rate": 1.2798045817653662e-05, "loss": 0.5523, "step": 178500 }, { "epoch": 21.27, "learning_rate": 1.2771690763847628e-05, "loss": 0.5467, "step": 179000 }, { "epoch": 21.33, "learning_rate": 1.2745282894302504e-05, "loss": 0.5487, "step": 179500 }, { "epoch": 21.39, "learning_rate": 1.2718875024757378e-05, "loss": 0.546, "step": 180000 }, { "epoch": 21.45, "learning_rate": 1.2692467155212254e-05, "loss": 0.5461, "step": 180500 }, { "epoch": 21.51, "learning_rate": 1.266605928566713e-05, "loss": 0.554, "step": 181000 }, { "epoch": 21.57, "learning_rate": 1.2639704231861096e-05, "loss": 0.5493, "step": 181500 }, { "epoch": 21.63, "learning_rate": 1.2613296362315972e-05, "loss": 0.5548, "step": 182000 }, { "epoch": 21.69, "learning_rate": 1.2586888492770846e-05, "loss": 0.5503, "step": 182500 }, { "epoch": 21.75, "learning_rate": 1.256048062322572e-05, "loss": 0.5604, "step": 183000 }, { "epoch": 21.81, "learning_rate": 1.2534072753680597e-05, "loss": 0.5622, "step": 183500 }, { "epoch": 21.87, "learning_rate": 1.2507664884135473e-05, "loss": 0.549, "step": 184000 }, { "epoch": 21.93, "learning_rate": 1.2481257014590348e-05, "loss": 0.5535, "step": 184500 }, { "epoch": 21.98, "learning_rate": 1.2454849145045224e-05, "loss": 0.5572, "step": 185000 }, { "epoch": 22.04, "learning_rate": 1.24284412755001e-05, "loss": 0.5508, "step": 185500 }, { "epoch": 22.1, "learning_rate": 1.2402033405954976e-05, "loss": 0.5454, "step": 186000 }, { "epoch": 22.16, "learning_rate": 1.237567835214894e-05, "loss": 0.551, "step": 186500 }, { "epoch": 22.22, "learning_rate": 1.2349270482603817e-05, "loss": 0.5454, "step": 187000 }, { "epoch": 22.28, "learning_rate": 1.2322862613058693e-05, "loss": 0.5445, "step": 187500 }, { "epoch": 22.34, "learning_rate": 1.2296454743513569e-05, "loss": 0.5493, "step": 188000 }, { "epoch": 22.4, "learning_rate": 1.2270046873968444e-05, "loss": 0.5429, "step": 188500 }, { "epoch": 22.46, "learning_rate": 1.224363900442332e-05, "loss": 0.5507, "step": 189000 }, { "epoch": 22.52, "learning_rate": 1.2217231134878196e-05, "loss": 0.5439, "step": 189500 }, { "epoch": 22.58, "learning_rate": 1.2190823265333069e-05, "loss": 0.5484, "step": 190000 }, { "epoch": 22.64, "learning_rate": 1.2164468211527037e-05, "loss": 0.5467, "step": 190500 }, { "epoch": 22.7, "learning_rate": 1.2138113157721001e-05, "loss": 0.5501, "step": 191000 }, { "epoch": 22.76, "learning_rate": 1.2111705288175877e-05, "loss": 0.5495, "step": 191500 }, { "epoch": 22.82, "learning_rate": 1.2085297418630753e-05, "loss": 0.552, "step": 192000 }, { "epoch": 22.88, "learning_rate": 1.205894236482472e-05, "loss": 0.548, "step": 192500 }, { "epoch": 22.94, "learning_rate": 1.2032534495279595e-05, "loss": 0.5501, "step": 193000 }, { "epoch": 22.99, "learning_rate": 1.200617944147356e-05, "loss": 0.5499, "step": 193500 }, { "epoch": 23.05, "learning_rate": 1.1979771571928436e-05, "loss": 0.5376, "step": 194000 }, { "epoch": 23.11, "learning_rate": 1.1953363702383312e-05, "loss": 0.5413, "step": 194500 }, { "epoch": 23.17, "learning_rate": 1.1926955832838188e-05, "loss": 0.5504, "step": 195000 }, { "epoch": 23.23, "learning_rate": 1.1900547963293064e-05, "loss": 0.5448, "step": 195500 }, { "epoch": 23.29, "learning_rate": 1.1874140093747936e-05, "loss": 0.553, "step": 196000 }, { "epoch": 23.35, "learning_rate": 1.1847732224202812e-05, "loss": 0.5535, "step": 196500 }, { "epoch": 23.41, "learning_rate": 1.1821324354657688e-05, "loss": 0.5449, "step": 197000 }, { "epoch": 23.47, "learning_rate": 1.1794916485112564e-05, "loss": 0.5452, "step": 197500 }, { "epoch": 23.53, "learning_rate": 1.176850861556744e-05, "loss": 0.5496, "step": 198000 }, { "epoch": 23.59, "learning_rate": 1.1742100746022316e-05, "loss": 0.5528, "step": 198500 }, { "epoch": 23.65, "learning_rate": 1.1715692876477192e-05, "loss": 0.5388, "step": 199000 }, { "epoch": 23.71, "learning_rate": 1.1689337822671156e-05, "loss": 0.5412, "step": 199500 }, { "epoch": 23.77, "learning_rate": 1.1662929953126032e-05, "loss": 0.5494, "step": 200000 }, { "epoch": 23.83, "learning_rate": 1.1636522083580908e-05, "loss": 0.5403, "step": 200500 }, { "epoch": 23.89, "learning_rate": 1.1610114214035784e-05, "loss": 0.5522, "step": 201000 }, { "epoch": 23.95, "learning_rate": 1.1583759160229748e-05, "loss": 0.5522, "step": 201500 }, { "epoch": 24.0, "learning_rate": 1.1557351290684624e-05, "loss": 0.5405, "step": 202000 }, { "epoch": 24.06, "learning_rate": 1.15309434211395e-05, "loss": 0.5371, "step": 202500 }, { "epoch": 24.12, "learning_rate": 1.1504588367333466e-05, "loss": 0.5378, "step": 203000 }, { "epoch": 24.18, "learning_rate": 1.1478180497788342e-05, "loss": 0.543, "step": 203500 }, { "epoch": 24.24, "learning_rate": 1.1451772628243218e-05, "loss": 0.5359, "step": 204000 }, { "epoch": 24.3, "learning_rate": 1.1425364758698093e-05, "loss": 0.5344, "step": 204500 }, { "epoch": 24.36, "learning_rate": 1.1398956889152968e-05, "loss": 0.548, "step": 205000 }, { "epoch": 24.42, "learning_rate": 1.1372549019607844e-05, "loss": 0.5406, "step": 205500 }, { "epoch": 24.48, "learning_rate": 1.134619396580181e-05, "loss": 0.5398, "step": 206000 }, { "epoch": 24.54, "learning_rate": 1.1319786096256687e-05, "loss": 0.5322, "step": 206500 }, { "epoch": 24.6, "learning_rate": 1.1293378226711559e-05, "loss": 0.5327, "step": 207000 }, { "epoch": 24.66, "learning_rate": 1.1266970357166435e-05, "loss": 0.5395, "step": 207500 }, { "epoch": 24.72, "learning_rate": 1.1240615303360403e-05, "loss": 0.5401, "step": 208000 }, { "epoch": 24.78, "learning_rate": 1.1214207433815279e-05, "loss": 0.5446, "step": 208500 }, { "epoch": 24.84, "learning_rate": 1.1187852380009243e-05, "loss": 0.5431, "step": 209000 }, { "epoch": 24.9, "learning_rate": 1.116144451046412e-05, "loss": 0.543, "step": 209500 }, { "epoch": 24.96, "learning_rate": 1.1135036640918995e-05, "loss": 0.541, "step": 210000 }, { "epoch": 25.01, "learning_rate": 1.1108628771373871e-05, "loss": 0.5417, "step": 210500 }, { "epoch": 25.07, "learning_rate": 1.1082273717567836e-05, "loss": 0.5306, "step": 211000 }, { "epoch": 25.13, "learning_rate": 1.1055865848022712e-05, "loss": 0.5355, "step": 211500 }, { "epoch": 25.19, "learning_rate": 1.1029457978477588e-05, "loss": 0.5358, "step": 212000 }, { "epoch": 25.25, "learning_rate": 1.1003050108932463e-05, "loss": 0.5386, "step": 212500 }, { "epoch": 25.31, "learning_rate": 1.0976642239387338e-05, "loss": 0.54, "step": 213000 }, { "epoch": 25.37, "learning_rate": 1.0950287185581304e-05, "loss": 0.5376, "step": 213500 }, { "epoch": 25.43, "learning_rate": 1.0923879316036178e-05, "loss": 0.5369, "step": 214000 }, { "epoch": 25.49, "learning_rate": 1.0897471446491054e-05, "loss": 0.5352, "step": 214500 }, { "epoch": 25.55, "learning_rate": 1.087106357694593e-05, "loss": 0.5446, "step": 215000 }, { "epoch": 25.61, "learning_rate": 1.0844655707400806e-05, "loss": 0.5399, "step": 215500 }, { "epoch": 25.67, "learning_rate": 1.0818247837855682e-05, "loss": 0.5382, "step": 216000 }, { "epoch": 25.73, "learning_rate": 1.0791839968310558e-05, "loss": 0.5416, "step": 216500 }, { "epoch": 25.79, "learning_rate": 1.0765484914504522e-05, "loss": 0.5414, "step": 217000 }, { "epoch": 25.85, "learning_rate": 1.0739077044959398e-05, "loss": 0.5394, "step": 217500 }, { "epoch": 25.91, "learning_rate": 1.0712669175414274e-05, "loss": 0.5451, "step": 218000 }, { "epoch": 25.97, "learning_rate": 1.068626130586915e-05, "loss": 0.5402, "step": 218500 }, { "epoch": 26.02, "learning_rate": 1.0659853436324026e-05, "loss": 0.5345, "step": 219000 }, { "epoch": 26.08, "learning_rate": 1.063349838251799e-05, "loss": 0.5235, "step": 219500 }, { "epoch": 26.14, "learning_rate": 1.0607090512972866e-05, "loss": 0.5328, "step": 220000 }, { "epoch": 26.2, "learning_rate": 1.0580682643427742e-05, "loss": 0.5376, "step": 220500 }, { "epoch": 26.26, "learning_rate": 1.0554274773882618e-05, "loss": 0.5319, "step": 221000 }, { "epoch": 26.32, "learning_rate": 1.0527866904337494e-05, "loss": 0.5324, "step": 221500 }, { "epoch": 26.38, "learning_rate": 1.050145903479237e-05, "loss": 0.5334, "step": 222000 }, { "epoch": 26.44, "learning_rate": 1.0475051165247246e-05, "loss": 0.5338, "step": 222500 }, { "epoch": 26.5, "learning_rate": 1.044864329570212e-05, "loss": 0.5367, "step": 223000 }, { "epoch": 26.56, "learning_rate": 1.0422235426156994e-05, "loss": 0.5337, "step": 223500 }, { "epoch": 26.62, "learning_rate": 1.039582755661187e-05, "loss": 0.5306, "step": 224000 }, { "epoch": 26.68, "learning_rate": 1.0369419687066746e-05, "loss": 0.5417, "step": 224500 }, { "epoch": 26.74, "learning_rate": 1.0343011817521622e-05, "loss": 0.534, "step": 225000 }, { "epoch": 26.8, "learning_rate": 1.0316603947976498e-05, "loss": 0.5341, "step": 225500 }, { "epoch": 26.86, "learning_rate": 1.0290196078431374e-05, "loss": 0.535, "step": 226000 }, { "epoch": 26.92, "learning_rate": 1.0263841024625339e-05, "loss": 0.5414, "step": 226500 }, { "epoch": 26.98, "learning_rate": 1.0237433155080215e-05, "loss": 0.5375, "step": 227000 }, { "epoch": 27.04, "learning_rate": 1.021102528553509e-05, "loss": 0.5268, "step": 227500 }, { "epoch": 27.09, "learning_rate": 1.0184617415989966e-05, "loss": 0.5277, "step": 228000 }, { "epoch": 27.15, "learning_rate": 1.0158262362183933e-05, "loss": 0.5304, "step": 228500 }, { "epoch": 27.21, "learning_rate": 1.0131854492638807e-05, "loss": 0.5291, "step": 229000 }, { "epoch": 27.27, "learning_rate": 1.0105446623093683e-05, "loss": 0.5287, "step": 229500 }, { "epoch": 27.33, "learning_rate": 1.0079091569287649e-05, "loss": 0.5346, "step": 230000 }, { "epoch": 27.39, "learning_rate": 1.0052683699742525e-05, "loss": 0.5357, "step": 230500 }, { "epoch": 27.45, "learning_rate": 1.0026275830197401e-05, "loss": 0.5317, "step": 231000 }, { "epoch": 27.51, "learning_rate": 9.999920776391365e-06, "loss": 0.5187, "step": 231500 }, { "epoch": 27.57, "learning_rate": 9.973512906846241e-06, "loss": 0.5361, "step": 232000 }, { "epoch": 27.63, "learning_rate": 9.947105037301117e-06, "loss": 0.5361, "step": 232500 }, { "epoch": 27.69, "learning_rate": 9.920697167755991e-06, "loss": 0.5292, "step": 233000 }, { "epoch": 27.75, "learning_rate": 9.894289298210867e-06, "loss": 0.5317, "step": 233500 }, { "epoch": 27.81, "learning_rate": 9.867881428665743e-06, "loss": 0.5294, "step": 234000 }, { "epoch": 27.87, "learning_rate": 9.84147355912062e-06, "loss": 0.5348, "step": 234500 }, { "epoch": 27.93, "learning_rate": 9.815065689575495e-06, "loss": 0.5443, "step": 235000 }, { "epoch": 27.99, "learning_rate": 9.788710635769461e-06, "loss": 0.5355, "step": 235500 }, { "epoch": 28.05, "learning_rate": 9.762302766224336e-06, "loss": 0.5297, "step": 236000 }, { "epoch": 28.1, "learning_rate": 9.735894896679212e-06, "loss": 0.5306, "step": 236500 }, { "epoch": 28.16, "learning_rate": 9.709487027134087e-06, "loss": 0.5251, "step": 237000 }, { "epoch": 28.22, "learning_rate": 9.683079157588963e-06, "loss": 0.5255, "step": 237500 }, { "epoch": 28.28, "learning_rate": 9.656671288043838e-06, "loss": 0.5222, "step": 238000 }, { "epoch": 28.34, "learning_rate": 9.630263418498714e-06, "loss": 0.5281, "step": 238500 }, { "epoch": 28.4, "learning_rate": 9.60385554895359e-06, "loss": 0.5262, "step": 239000 }, { "epoch": 28.46, "learning_rate": 9.577447679408465e-06, "loss": 0.5245, "step": 239500 }, { "epoch": 28.52, "learning_rate": 9.551039809863341e-06, "loss": 0.5321, "step": 240000 }, { "epoch": 28.58, "learning_rate": 9.524684756057306e-06, "loss": 0.5354, "step": 240500 }, { "epoch": 28.64, "learning_rate": 9.498276886512182e-06, "loss": 0.5264, "step": 241000 }, { "epoch": 28.7, "learning_rate": 9.471869016967058e-06, "loss": 0.5244, "step": 241500 }, { "epoch": 28.76, "learning_rate": 9.445461147421932e-06, "loss": 0.5337, "step": 242000 }, { "epoch": 28.82, "learning_rate": 9.419106093615898e-06, "loss": 0.529, "step": 242500 }, { "epoch": 28.88, "learning_rate": 9.392698224070774e-06, "loss": 0.5317, "step": 243000 }, { "epoch": 28.94, "learning_rate": 9.366290354525648e-06, "loss": 0.5285, "step": 243500 }, { "epoch": 29.0, "learning_rate": 9.339882484980524e-06, "loss": 0.5338, "step": 244000 }, { "epoch": 29.06, "learning_rate": 9.3134746154354e-06, "loss": 0.5181, "step": 244500 }, { "epoch": 29.11, "learning_rate": 9.287066745890276e-06, "loss": 0.528, "step": 245000 }, { "epoch": 29.17, "learning_rate": 9.260658876345152e-06, "loss": 0.5173, "step": 245500 }, { "epoch": 29.23, "learning_rate": 9.234251006800026e-06, "loss": 0.5261, "step": 246000 }, { "epoch": 29.29, "learning_rate": 9.207895952993992e-06, "loss": 0.5258, "step": 246500 }, { "epoch": 29.35, "learning_rate": 9.181488083448868e-06, "loss": 0.5252, "step": 247000 }, { "epoch": 29.41, "learning_rate": 9.155080213903744e-06, "loss": 0.5298, "step": 247500 }, { "epoch": 29.47, "learning_rate": 9.12867234435862e-06, "loss": 0.519, "step": 248000 }, { "epoch": 29.53, "learning_rate": 9.102317290552586e-06, "loss": 0.5197, "step": 248500 }, { "epoch": 29.59, "learning_rate": 9.07590942100746e-06, "loss": 0.5199, "step": 249000 }, { "epoch": 29.65, "learning_rate": 9.049501551462337e-06, "loss": 0.5279, "step": 249500 }, { "epoch": 29.71, "learning_rate": 9.023093681917212e-06, "loss": 0.5245, "step": 250000 }, { "epoch": 29.77, "learning_rate": 8.996738628111179e-06, "loss": 0.5243, "step": 250500 }, { "epoch": 29.83, "learning_rate": 8.970330758566053e-06, "loss": 0.5296, "step": 251000 }, { "epoch": 29.89, "learning_rate": 8.94397570476002e-06, "loss": 0.5266, "step": 251500 }, { "epoch": 29.95, "learning_rate": 8.917567835214893e-06, "loss": 0.5282, "step": 252000 }, { "epoch": 30.01, "learning_rate": 8.89115996566977e-06, "loss": 0.5299, "step": 252500 }, { "epoch": 30.07, "learning_rate": 8.864752096124645e-06, "loss": 0.5238, "step": 253000 }, { "epoch": 30.12, "learning_rate": 8.838344226579521e-06, "loss": 0.5256, "step": 253500 }, { "epoch": 30.18, "learning_rate": 8.811936357034397e-06, "loss": 0.522, "step": 254000 }, { "epoch": 30.24, "learning_rate": 8.785528487489271e-06, "loss": 0.5249, "step": 254500 }, { "epoch": 30.3, "learning_rate": 8.759120617944147e-06, "loss": 0.5206, "step": 255000 }, { "epoch": 30.36, "learning_rate": 8.732712748399023e-06, "loss": 0.5193, "step": 255500 }, { "epoch": 30.42, "learning_rate": 8.70635769459299e-06, "loss": 0.529, "step": 256000 }, { "epoch": 30.48, "learning_rate": 8.680002640786956e-06, "loss": 0.5159, "step": 256500 }, { "epoch": 30.54, "learning_rate": 8.653594771241832e-06, "loss": 0.5337, "step": 257000 }, { "epoch": 30.6, "learning_rate": 8.627186901696706e-06, "loss": 0.5298, "step": 257500 }, { "epoch": 30.66, "learning_rate": 8.600779032151582e-06, "loss": 0.5296, "step": 258000 }, { "epoch": 30.72, "learning_rate": 8.574371162606458e-06, "loss": 0.5227, "step": 258500 }, { "epoch": 30.78, "learning_rate": 8.547963293061334e-06, "loss": 0.5296, "step": 259000 }, { "epoch": 30.84, "learning_rate": 8.52155542351621e-06, "loss": 0.53, "step": 259500 }, { "epoch": 30.9, "learning_rate": 8.495147553971084e-06, "loss": 0.5256, "step": 260000 }, { "epoch": 30.96, "learning_rate": 8.46873968442596e-06, "loss": 0.5272, "step": 260500 }, { "epoch": 31.02, "learning_rate": 8.442384630619926e-06, "loss": 0.5294, "step": 261000 }, { "epoch": 31.08, "learning_rate": 8.415976761074802e-06, "loss": 0.511, "step": 261500 }, { "epoch": 31.13, "learning_rate": 8.389568891529678e-06, "loss": 0.5233, "step": 262000 }, { "epoch": 31.19, "learning_rate": 8.363161021984552e-06, "loss": 0.5217, "step": 262500 }, { "epoch": 31.25, "learning_rate": 8.336753152439428e-06, "loss": 0.5162, "step": 263000 }, { "epoch": 31.31, "learning_rate": 8.310345282894304e-06, "loss": 0.5129, "step": 263500 }, { "epoch": 31.37, "learning_rate": 8.28393741334918e-06, "loss": 0.5178, "step": 264000 }, { "epoch": 31.43, "learning_rate": 8.257529543804056e-06, "loss": 0.5155, "step": 264500 }, { "epoch": 31.49, "learning_rate": 8.23112167425893e-06, "loss": 0.5211, "step": 265000 }, { "epoch": 31.55, "learning_rate": 8.204819436191986e-06, "loss": 0.5248, "step": 265500 }, { "epoch": 31.61, "learning_rate": 8.17841156664686e-06, "loss": 0.5344, "step": 266000 }, { "epoch": 31.67, "learning_rate": 8.152003697101737e-06, "loss": 0.5237, "step": 266500 }, { "epoch": 31.73, "learning_rate": 8.125595827556612e-06, "loss": 0.5172, "step": 267000 }, { "epoch": 31.79, "learning_rate": 8.099240773750579e-06, "loss": 0.5237, "step": 267500 }, { "epoch": 31.85, "learning_rate": 8.072832904205455e-06, "loss": 0.5229, "step": 268000 }, { "epoch": 31.91, "learning_rate": 8.046425034660329e-06, "loss": 0.5186, "step": 268500 }, { "epoch": 31.97, "learning_rate": 8.020069980854295e-06, "loss": 0.5287, "step": 269000 }, { "epoch": 32.03, "learning_rate": 7.993662111309171e-06, "loss": 0.527, "step": 269500 }, { "epoch": 32.09, "learning_rate": 7.967254241764047e-06, "loss": 0.5139, "step": 270000 }, { "epoch": 32.14, "learning_rate": 7.940846372218923e-06, "loss": 0.5184, "step": 270500 }, { "epoch": 32.2, "learning_rate": 7.914438502673799e-06, "loss": 0.5189, "step": 271000 }, { "epoch": 32.26, "learning_rate": 7.888030633128673e-06, "loss": 0.5221, "step": 271500 }, { "epoch": 32.32, "learning_rate": 7.861622763583549e-06, "loss": 0.5176, "step": 272000 }, { "epoch": 32.38, "learning_rate": 7.835267709777515e-06, "loss": 0.5134, "step": 272500 }, { "epoch": 32.44, "learning_rate": 7.80885984023239e-06, "loss": 0.5187, "step": 273000 }, { "epoch": 32.5, "learning_rate": 7.782451970687265e-06, "loss": 0.522, "step": 273500 }, { "epoch": 32.56, "learning_rate": 7.756044101142141e-06, "loss": 0.5151, "step": 274000 }, { "epoch": 32.62, "learning_rate": 7.729636231597017e-06, "loss": 0.5237, "step": 274500 }, { "epoch": 32.68, "learning_rate": 7.703228362051893e-06, "loss": 0.5176, "step": 275000 }, { "epoch": 32.74, "learning_rate": 7.676820492506767e-06, "loss": 0.5227, "step": 275500 }, { "epoch": 32.8, "learning_rate": 7.650412622961643e-06, "loss": 0.5162, "step": 276000 }, { "epoch": 32.86, "learning_rate": 7.624004753416518e-06, "loss": 0.521, "step": 276500 }, { "epoch": 32.92, "learning_rate": 7.597596883871394e-06, "loss": 0.5231, "step": 277000 }, { "epoch": 32.98, "learning_rate": 7.57124183006536e-06, "loss": 0.5198, "step": 277500 }, { "epoch": 33.04, "learning_rate": 7.5448339605202355e-06, "loss": 0.5086, "step": 278000 }, { "epoch": 33.1, "learning_rate": 7.518426090975111e-06, "loss": 0.5124, "step": 278500 }, { "epoch": 33.16, "learning_rate": 7.4920182214299865e-06, "loss": 0.5175, "step": 279000 }, { "epoch": 33.21, "learning_rate": 7.465663167623952e-06, "loss": 0.5131, "step": 279500 }, { "epoch": 33.27, "learning_rate": 7.439255298078828e-06, "loss": 0.5114, "step": 280000 }, { "epoch": 33.33, "learning_rate": 7.412847428533704e-06, "loss": 0.5118, "step": 280500 }, { "epoch": 33.39, "learning_rate": 7.38643955898858e-06, "loss": 0.5116, "step": 281000 }, { "epoch": 33.45, "learning_rate": 7.360084505182545e-06, "loss": 0.5197, "step": 281500 }, { "epoch": 33.51, "learning_rate": 7.33367663563742e-06, "loss": 0.5161, "step": 282000 }, { "epoch": 33.57, "learning_rate": 7.307268766092296e-06, "loss": 0.5166, "step": 282500 }, { "epoch": 33.63, "learning_rate": 7.280913712286261e-06, "loss": 0.5203, "step": 283000 }, { "epoch": 33.69, "learning_rate": 7.254505842741137e-06, "loss": 0.513, "step": 283500 }, { "epoch": 33.75, "learning_rate": 7.228097973196013e-06, "loss": 0.5139, "step": 284000 }, { "epoch": 33.81, "learning_rate": 7.201690103650889e-06, "loss": 0.5123, "step": 284500 }, { "epoch": 33.87, "learning_rate": 7.175282234105763e-06, "loss": 0.5191, "step": 285000 }, { "epoch": 33.93, "learning_rate": 7.148874364560639e-06, "loss": 0.5228, "step": 285500 }, { "epoch": 33.99, "learning_rate": 7.122466495015515e-06, "loss": 0.5165, "step": 286000 }, { "epoch": 34.05, "learning_rate": 7.096058625470391e-06, "loss": 0.5089, "step": 286500 }, { "epoch": 34.11, "learning_rate": 7.069650755925266e-06, "loss": 0.5066, "step": 287000 }, { "epoch": 34.17, "learning_rate": 7.043242886380141e-06, "loss": 0.514, "step": 287500 }, { "epoch": 34.22, "learning_rate": 7.0168878325741075e-06, "loss": 0.5099, "step": 288000 }, { "epoch": 34.28, "learning_rate": 6.9904799630289834e-06, "loss": 0.5124, "step": 288500 }, { "epoch": 34.34, "learning_rate": 6.964072093483859e-06, "loss": 0.5097, "step": 289000 }, { "epoch": 34.4, "learning_rate": 6.9376642239387344e-06, "loss": 0.5155, "step": 289500 }, { "epoch": 34.46, "learning_rate": 6.9112563543936095e-06, "loss": 0.5146, "step": 290000 }, { "epoch": 34.52, "learning_rate": 6.8848484848484854e-06, "loss": 0.5178, "step": 290500 }, { "epoch": 34.58, "learning_rate": 6.858493431042451e-06, "loss": 0.5158, "step": 291000 }, { "epoch": 34.64, "learning_rate": 6.832085561497327e-06, "loss": 0.5156, "step": 291500 }, { "epoch": 34.7, "learning_rate": 6.805677691952203e-06, "loss": 0.5132, "step": 292000 }, { "epoch": 34.76, "learning_rate": 6.779322638146169e-06, "loss": 0.5171, "step": 292500 }, { "epoch": 34.82, "learning_rate": 6.752914768601043e-06, "loss": 0.5186, "step": 293000 }, { "epoch": 34.88, "learning_rate": 6.726506899055919e-06, "loss": 0.5098, "step": 293500 }, { "epoch": 34.94, "learning_rate": 6.700099029510795e-06, "loss": 0.5044, "step": 294000 }, { "epoch": 35.0, "learning_rate": 6.673691159965671e-06, "loss": 0.5106, "step": 294500 }, { "epoch": 35.06, "learning_rate": 6.647336106159636e-06, "loss": 0.5083, "step": 295000 }, { "epoch": 35.12, "learning_rate": 6.620928236614511e-06, "loss": 0.5165, "step": 295500 }, { "epoch": 35.18, "learning_rate": 6.594520367069387e-06, "loss": 0.5091, "step": 296000 }, { "epoch": 35.23, "learning_rate": 6.568112497524262e-06, "loss": 0.506, "step": 296500 }, { "epoch": 35.29, "learning_rate": 6.541704627979138e-06, "loss": 0.5113, "step": 297000 }, { "epoch": 35.35, "learning_rate": 6.515296758434014e-06, "loss": 0.5108, "step": 297500 }, { "epoch": 35.41, "learning_rate": 6.488888888888889e-06, "loss": 0.5115, "step": 298000 }, { "epoch": 35.47, "learning_rate": 6.462533835082855e-06, "loss": 0.512, "step": 298500 }, { "epoch": 35.53, "learning_rate": 6.4361259655377306e-06, "loss": 0.5078, "step": 299000 }, { "epoch": 35.59, "learning_rate": 6.4097180959926065e-06, "loss": 0.5146, "step": 299500 }, { "epoch": 35.65, "learning_rate": 6.383310226447482e-06, "loss": 0.5133, "step": 300000 }, { "epoch": 35.71, "learning_rate": 6.356902356902357e-06, "loss": 0.5201, "step": 300500 }, { "epoch": 35.77, "learning_rate": 6.3304944873572326e-06, "loss": 0.5132, "step": 301000 }, { "epoch": 35.83, "learning_rate": 6.304139433551199e-06, "loss": 0.5139, "step": 301500 }, { "epoch": 35.89, "learning_rate": 6.277731564006075e-06, "loss": 0.5084, "step": 302000 }, { "epoch": 35.95, "learning_rate": 6.25132369446095e-06, "loss": 0.5155, "step": 302500 }, { "epoch": 36.01, "learning_rate": 6.224915824915826e-06, "loss": 0.5168, "step": 303000 }, { "epoch": 36.07, "learning_rate": 6.198507955370701e-06, "loss": 0.5004, "step": 303500 }, { "epoch": 36.13, "learning_rate": 6.172100085825577e-06, "loss": 0.5142, "step": 304000 }, { "epoch": 36.19, "learning_rate": 6.1458506634977235e-06, "loss": 0.5038, "step": 304500 }, { "epoch": 36.24, "learning_rate": 6.119442793952598e-06, "loss": 0.5157, "step": 305000 }, { "epoch": 36.3, "learning_rate": 6.093034924407474e-06, "loss": 0.5114, "step": 305500 }, { "epoch": 36.36, "learning_rate": 6.06662705486235e-06, "loss": 0.5136, "step": 306000 }, { "epoch": 36.42, "learning_rate": 6.0402191853172255e-06, "loss": 0.5034, "step": 306500 }, { "epoch": 36.48, "learning_rate": 6.0138113157721e-06, "loss": 0.517, "step": 307000 }, { "epoch": 36.54, "learning_rate": 5.987456261966066e-06, "loss": 0.516, "step": 307500 }, { "epoch": 36.6, "learning_rate": 5.961048392420942e-06, "loss": 0.5104, "step": 308000 }, { "epoch": 36.66, "learning_rate": 5.934640522875818e-06, "loss": 0.5108, "step": 308500 }, { "epoch": 36.72, "learning_rate": 5.908232653330693e-06, "loss": 0.5094, "step": 309000 }, { "epoch": 36.78, "learning_rate": 5.881824783785569e-06, "loss": 0.5145, "step": 309500 }, { "epoch": 36.84, "learning_rate": 5.855416914240444e-06, "loss": 0.5176, "step": 310000 }, { "epoch": 36.9, "learning_rate": 5.82900904469532e-06, "loss": 0.5082, "step": 310500 }, { "epoch": 36.96, "learning_rate": 5.802601175150196e-06, "loss": 0.5141, "step": 311000 }, { "epoch": 37.02, "learning_rate": 5.776193305605071e-06, "loss": 0.5116, "step": 311500 }, { "epoch": 37.08, "learning_rate": 5.749785436059946e-06, "loss": 0.5058, "step": 312000 }, { "epoch": 37.14, "learning_rate": 5.723377566514822e-06, "loss": 0.5042, "step": 312500 }, { "epoch": 37.2, "learning_rate": 5.696969696969698e-06, "loss": 0.5097, "step": 313000 }, { "epoch": 37.25, "learning_rate": 5.670614643163663e-06, "loss": 0.5118, "step": 313500 }, { "epoch": 37.31, "learning_rate": 5.644206773618539e-06, "loss": 0.5056, "step": 314000 }, { "epoch": 37.37, "learning_rate": 5.617798904073415e-06, "loss": 0.5143, "step": 314500 }, { "epoch": 37.43, "learning_rate": 5.59144385026738e-06, "loss": 0.516, "step": 315000 }, { "epoch": 37.49, "learning_rate": 5.565035980722255e-06, "loss": 0.5028, "step": 315500 }, { "epoch": 37.55, "learning_rate": 5.538628111177131e-06, "loss": 0.5079, "step": 316000 }, { "epoch": 37.61, "learning_rate": 5.512220241632007e-06, "loss": 0.5101, "step": 316500 }, { "epoch": 37.67, "learning_rate": 5.485812372086883e-06, "loss": 0.5101, "step": 317000 }, { "epoch": 37.73, "learning_rate": 5.459404502541757e-06, "loss": 0.5077, "step": 317500 }, { "epoch": 37.79, "learning_rate": 5.432996632996633e-06, "loss": 0.5094, "step": 318000 }, { "epoch": 37.85, "learning_rate": 5.406588763451509e-06, "loss": 0.5128, "step": 318500 }, { "epoch": 37.91, "learning_rate": 5.380180893906385e-06, "loss": 0.5121, "step": 319000 }, { "epoch": 37.97, "learning_rate": 5.353773024361259e-06, "loss": 0.5071, "step": 319500 }, { "epoch": 38.03, "learning_rate": 5.327365154816135e-06, "loss": 0.5088, "step": 320000 }, { "epoch": 38.09, "learning_rate": 5.300957285271011e-06, "loss": 0.5037, "step": 320500 }, { "epoch": 38.15, "learning_rate": 5.274549415725887e-06, "loss": 0.5083, "step": 321000 }, { "epoch": 38.21, "learning_rate": 5.248141546180763e-06, "loss": 0.5101, "step": 321500 }, { "epoch": 38.26, "learning_rate": 5.221733676635637e-06, "loss": 0.5046, "step": 322000 }, { "epoch": 38.32, "learning_rate": 5.195325807090513e-06, "loss": 0.5021, "step": 322500 }, { "epoch": 38.38, "learning_rate": 5.169023569023569e-06, "loss": 0.5116, "step": 323000 }, { "epoch": 38.44, "learning_rate": 5.142615699478445e-06, "loss": 0.5018, "step": 323500 }, { "epoch": 38.5, "learning_rate": 5.116207829933321e-06, "loss": 0.5087, "step": 324000 }, { "epoch": 38.56, "learning_rate": 5.089799960388197e-06, "loss": 0.5033, "step": 324500 }, { "epoch": 38.62, "learning_rate": 5.063392090843071e-06, "loss": 0.515, "step": 325000 }, { "epoch": 38.68, "learning_rate": 5.036984221297947e-06, "loss": 0.5031, "step": 325500 }, { "epoch": 38.74, "learning_rate": 5.010576351752823e-06, "loss": 0.5077, "step": 326000 }, { "epoch": 38.8, "learning_rate": 4.984168482207699e-06, "loss": 0.5142, "step": 326500 }, { "epoch": 38.86, "learning_rate": 4.957760612662574e-06, "loss": 0.5064, "step": 327000 }, { "epoch": 38.92, "learning_rate": 4.93135274311745e-06, "loss": 0.5103, "step": 327500 }, { "epoch": 38.98, "learning_rate": 4.904944873572325e-06, "loss": 0.5062, "step": 328000 }, { "epoch": 39.04, "learning_rate": 4.878537004027201e-06, "loss": 0.5077, "step": 328500 }, { "epoch": 39.1, "learning_rate": 4.852181950221166e-06, "loss": 0.4985, "step": 329000 }, { "epoch": 39.16, "learning_rate": 4.825774080676042e-06, "loss": 0.4965, "step": 329500 }, { "epoch": 39.22, "learning_rate": 4.799419026870007e-06, "loss": 0.5059, "step": 330000 }, { "epoch": 39.28, "learning_rate": 4.773011157324883e-06, "loss": 0.5083, "step": 330500 }, { "epoch": 39.33, "learning_rate": 4.746603287779758e-06, "loss": 0.4982, "step": 331000 }, { "epoch": 39.39, "learning_rate": 4.720195418234634e-06, "loss": 0.5043, "step": 331500 }, { "epoch": 39.45, "learning_rate": 4.6938403644286005e-06, "loss": 0.5067, "step": 332000 }, { "epoch": 39.51, "learning_rate": 4.667432494883476e-06, "loss": 0.5068, "step": 332500 }, { "epoch": 39.57, "learning_rate": 4.6410246253383515e-06, "loss": 0.5096, "step": 333000 }, { "epoch": 39.63, "learning_rate": 4.614616755793227e-06, "loss": 0.5103, "step": 333500 }, { "epoch": 39.69, "learning_rate": 4.5882088862481025e-06, "loss": 0.5069, "step": 334000 }, { "epoch": 39.75, "learning_rate": 4.561853832442068e-06, "loss": 0.5107, "step": 334500 }, { "epoch": 39.81, "learning_rate": 4.535445962896944e-06, "loss": 0.5059, "step": 335000 }, { "epoch": 39.87, "learning_rate": 4.509038093351819e-06, "loss": 0.51, "step": 335500 }, { "epoch": 39.93, "learning_rate": 4.482630223806695e-06, "loss": 0.5019, "step": 336000 }, { "epoch": 39.99, "learning_rate": 4.45622235426157e-06, "loss": 0.505, "step": 336500 }, { "epoch": 40.05, "learning_rate": 4.429867300455536e-06, "loss": 0.4982, "step": 337000 }, { "epoch": 40.11, "learning_rate": 4.403459430910412e-06, "loss": 0.5016, "step": 337500 }, { "epoch": 40.17, "learning_rate": 4.377051561365287e-06, "loss": 0.5085, "step": 338000 }, { "epoch": 40.23, "learning_rate": 4.350643691820163e-06, "loss": 0.5048, "step": 338500 }, { "epoch": 40.29, "learning_rate": 4.324288638014128e-06, "loss": 0.4955, "step": 339000 }, { "epoch": 40.34, "learning_rate": 4.297880768469004e-06, "loss": 0.4997, "step": 339500 }, { "epoch": 40.4, "learning_rate": 4.271472898923879e-06, "loss": 0.5124, "step": 340000 }, { "epoch": 40.46, "learning_rate": 4.245065029378755e-06, "loss": 0.511, "step": 340500 }, { "epoch": 40.52, "learning_rate": 4.2187099755727216e-06, "loss": 0.5061, "step": 341000 }, { "epoch": 40.58, "learning_rate": 4.192302106027597e-06, "loss": 0.5064, "step": 341500 }, { "epoch": 40.64, "learning_rate": 4.165947052221562e-06, "loss": 0.5041, "step": 342000 }, { "epoch": 40.7, "learning_rate": 4.139539182676438e-06, "loss": 0.5062, "step": 342500 }, { "epoch": 40.76, "learning_rate": 4.113131313131313e-06, "loss": 0.5009, "step": 343000 }, { "epoch": 40.82, "learning_rate": 4.086723443586189e-06, "loss": 0.511, "step": 343500 }, { "epoch": 40.88, "learning_rate": 4.060315574041064e-06, "loss": 0.5067, "step": 344000 }, { "epoch": 40.94, "learning_rate": 4.03396052023503e-06, "loss": 0.5067, "step": 344500 }, { "epoch": 41.0, "learning_rate": 4.007552650689906e-06, "loss": 0.5056, "step": 345000 }, { "epoch": 41.06, "learning_rate": 3.981144781144781e-06, "loss": 0.4987, "step": 345500 }, { "epoch": 41.12, "learning_rate": 3.954736911599657e-06, "loss": 0.499, "step": 346000 }, { "epoch": 41.18, "learning_rate": 3.928329042054533e-06, "loss": 0.5054, "step": 346500 }, { "epoch": 41.24, "learning_rate": 3.901921172509408e-06, "loss": 0.501, "step": 347000 }, { "epoch": 41.3, "learning_rate": 3.875513302964284e-06, "loss": 0.5058, "step": 347500 }, { "epoch": 41.35, "learning_rate": 3.849105433419159e-06, "loss": 0.4985, "step": 348000 }, { "epoch": 41.41, "learning_rate": 3.822697563874035e-06, "loss": 0.5019, "step": 348500 }, { "epoch": 41.47, "learning_rate": 3.7963953258070908e-06, "loss": 0.508, "step": 349000 }, { "epoch": 41.53, "learning_rate": 3.7699874562619667e-06, "loss": 0.5063, "step": 349500 }, { "epoch": 41.59, "learning_rate": 3.7435795867168418e-06, "loss": 0.5048, "step": 350000 }, { "epoch": 41.65, "learning_rate": 3.7171717171717177e-06, "loss": 0.5026, "step": 350500 }, { "epoch": 41.71, "learning_rate": 3.6907638476265928e-06, "loss": 0.504, "step": 351000 }, { "epoch": 41.77, "learning_rate": 3.6643559780814687e-06, "loss": 0.5011, "step": 351500 }, { "epoch": 41.83, "learning_rate": 3.637948108536344e-06, "loss": 0.503, "step": 352000 }, { "epoch": 41.89, "learning_rate": 3.6115402389912197e-06, "loss": 0.5153, "step": 352500 }, { "epoch": 41.95, "learning_rate": 3.5851323694460956e-06, "loss": 0.5018, "step": 353000 }, { "epoch": 42.01, "learning_rate": 3.5587244999009707e-06, "loss": 0.5066, "step": 353500 }, { "epoch": 42.07, "learning_rate": 3.5323694460949365e-06, "loss": 0.5071, "step": 354000 }, { "epoch": 42.13, "learning_rate": 3.5059615765498124e-06, "loss": 0.5045, "step": 354500 }, { "epoch": 42.19, "learning_rate": 3.4795537070046875e-06, "loss": 0.4959, "step": 355000 }, { "epoch": 42.25, "learning_rate": 3.4531458374595634e-06, "loss": 0.5034, "step": 355500 }, { "epoch": 42.31, "learning_rate": 3.426790783653529e-06, "loss": 0.5075, "step": 356000 }, { "epoch": 42.36, "learning_rate": 3.400435729847495e-06, "loss": 0.5076, "step": 356500 }, { "epoch": 42.42, "learning_rate": 3.37402786030237e-06, "loss": 0.4914, "step": 357000 }, { "epoch": 42.48, "learning_rate": 3.347619990757246e-06, "loss": 0.4985, "step": 357500 }, { "epoch": 42.54, "learning_rate": 3.321212121212121e-06, "loss": 0.5032, "step": 358000 }, { "epoch": 42.6, "learning_rate": 3.294804251666997e-06, "loss": 0.5022, "step": 358500 }, { "epoch": 42.66, "learning_rate": 3.268396382121873e-06, "loss": 0.5061, "step": 359000 }, { "epoch": 42.72, "learning_rate": 3.241988512576748e-06, "loss": 0.5, "step": 359500 }, { "epoch": 42.78, "learning_rate": 3.215580643031624e-06, "loss": 0.5041, "step": 360000 }, { "epoch": 42.84, "learning_rate": 3.189172773486499e-06, "loss": 0.5044, "step": 360500 }, { "epoch": 42.9, "learning_rate": 3.162817719680465e-06, "loss": 0.502, "step": 361000 }, { "epoch": 42.96, "learning_rate": 3.1364098501353407e-06, "loss": 0.5066, "step": 361500 }, { "epoch": 43.02, "learning_rate": 3.110001980590216e-06, "loss": 0.508, "step": 362000 }, { "epoch": 43.08, "learning_rate": 3.0835941110450917e-06, "loss": 0.4986, "step": 362500 }, { "epoch": 43.14, "learning_rate": 3.0571862414999672e-06, "loss": 0.5001, "step": 363000 }, { "epoch": 43.2, "learning_rate": 3.0308840034330233e-06, "loss": 0.4982, "step": 363500 }, { "epoch": 43.26, "learning_rate": 3.004476133887899e-06, "loss": 0.5028, "step": 364000 }, { "epoch": 43.32, "learning_rate": 2.9780682643427743e-06, "loss": 0.5069, "step": 364500 }, { "epoch": 43.37, "learning_rate": 2.9516603947976502e-06, "loss": 0.5049, "step": 365000 }, { "epoch": 43.43, "learning_rate": 2.9252525252525253e-06, "loss": 0.499, "step": 365500 }, { "epoch": 43.49, "learning_rate": 2.8988446557074012e-06, "loss": 0.5101, "step": 366000 }, { "epoch": 43.55, "learning_rate": 2.8724367861622763e-06, "loss": 0.4984, "step": 366500 }, { "epoch": 43.61, "learning_rate": 2.8460289166171522e-06, "loss": 0.5001, "step": 367000 }, { "epoch": 43.67, "learning_rate": 2.8196210470720277e-06, "loss": 0.504, "step": 367500 }, { "epoch": 43.73, "learning_rate": 2.7932659932659935e-06, "loss": 0.502, "step": 368000 }, { "epoch": 43.79, "learning_rate": 2.766858123720869e-06, "loss": 0.4984, "step": 368500 }, { "epoch": 43.85, "learning_rate": 2.7404502541757445e-06, "loss": 0.5024, "step": 369000 }, { "epoch": 43.91, "learning_rate": 2.71404238463062e-06, "loss": 0.5015, "step": 369500 }, { "epoch": 43.97, "learning_rate": 2.687687330824586e-06, "loss": 0.4996, "step": 370000 }, { "epoch": 44.03, "learning_rate": 2.6612794612794613e-06, "loss": 0.4953, "step": 370500 }, { "epoch": 44.09, "learning_rate": 2.634871591734337e-06, "loss": 0.4966, "step": 371000 }, { "epoch": 44.15, "learning_rate": 2.6084637221892128e-06, "loss": 0.4956, "step": 371500 }, { "epoch": 44.21, "learning_rate": 2.5820558526440883e-06, "loss": 0.4997, "step": 372000 }, { "epoch": 44.27, "learning_rate": 2.555700798838054e-06, "loss": 0.4986, "step": 372500 }, { "epoch": 44.33, "learning_rate": 2.5292929292929296e-06, "loss": 0.502, "step": 373000 }, { "epoch": 44.39, "learning_rate": 2.502885059747805e-06, "loss": 0.4984, "step": 373500 }, { "epoch": 44.44, "learning_rate": 2.4764771902026806e-06, "loss": 0.4984, "step": 374000 }, { "epoch": 44.5, "learning_rate": 2.4501221363966464e-06, "loss": 0.502, "step": 374500 }, { "epoch": 44.56, "learning_rate": 2.423714266851522e-06, "loss": 0.4959, "step": 375000 }, { "epoch": 44.62, "learning_rate": 2.3973063973063978e-06, "loss": 0.4984, "step": 375500 }, { "epoch": 44.68, "learning_rate": 2.3708985277612733e-06, "loss": 0.5018, "step": 376000 }, { "epoch": 44.74, "learning_rate": 2.3444906582161488e-06, "loss": 0.5011, "step": 376500 }, { "epoch": 44.8, "learning_rate": 2.3180827886710243e-06, "loss": 0.503, "step": 377000 }, { "epoch": 44.86, "learning_rate": 2.2916749191258998e-06, "loss": 0.4926, "step": 377500 }, { "epoch": 44.92, "learning_rate": 2.2652670495807753e-06, "loss": 0.4976, "step": 378000 }, { "epoch": 44.98, "learning_rate": 2.238911995774741e-06, "loss": 0.5007, "step": 378500 }, { "epoch": 45.04, "learning_rate": 2.2125041262296166e-06, "loss": 0.5084, "step": 379000 }, { "epoch": 45.1, "learning_rate": 2.186096256684492e-06, "loss": 0.5022, "step": 379500 }, { "epoch": 45.16, "learning_rate": 2.1596883871393676e-06, "loss": 0.4922, "step": 380000 }, { "epoch": 45.22, "learning_rate": 2.1332805175942435e-06, "loss": 0.4957, "step": 380500 }, { "epoch": 45.28, "learning_rate": 2.1069254637882093e-06, "loss": 0.5032, "step": 381000 }, { "epoch": 45.34, "learning_rate": 2.080570409982175e-06, "loss": 0.504, "step": 381500 }, { "epoch": 45.4, "learning_rate": 2.0541625404370506e-06, "loss": 0.5108, "step": 382000 }, { "epoch": 45.45, "learning_rate": 2.027754670891926e-06, "loss": 0.4992, "step": 382500 }, { "epoch": 45.51, "learning_rate": 2.0013468013468016e-06, "loss": 0.4986, "step": 383000 }, { "epoch": 45.57, "learning_rate": 1.974938931801677e-06, "loss": 0.4964, "step": 383500 }, { "epoch": 45.63, "learning_rate": 1.948583877995643e-06, "loss": 0.4974, "step": 384000 }, { "epoch": 45.69, "learning_rate": 1.9221760084505184e-06, "loss": 0.4955, "step": 384500 }, { "epoch": 45.75, "learning_rate": 1.895768138905394e-06, "loss": 0.5006, "step": 385000 }, { "epoch": 45.81, "learning_rate": 1.8693602693602694e-06, "loss": 0.5056, "step": 385500 }, { "epoch": 45.87, "learning_rate": 1.842952399815145e-06, "loss": 0.5018, "step": 386000 }, { "epoch": 45.93, "learning_rate": 1.8165445302700204e-06, "loss": 0.4974, "step": 386500 }, { "epoch": 45.99, "learning_rate": 1.7901366607248963e-06, "loss": 0.5057, "step": 387000 }, { "epoch": 46.05, "learning_rate": 1.7637816069188621e-06, "loss": 0.4892, "step": 387500 }, { "epoch": 46.11, "learning_rate": 1.7373737373737376e-06, "loss": 0.4942, "step": 388000 }, { "epoch": 46.17, "learning_rate": 1.7109658678286131e-06, "loss": 0.5011, "step": 388500 }, { "epoch": 46.23, "learning_rate": 1.6845579982834886e-06, "loss": 0.4945, "step": 389000 }, { "epoch": 46.29, "learning_rate": 1.6581501287383641e-06, "loss": 0.4942, "step": 389500 }, { "epoch": 46.35, "learning_rate": 1.6317422591932396e-06, "loss": 0.4968, "step": 390000 }, { "epoch": 46.41, "learning_rate": 1.6053343896481151e-06, "loss": 0.4979, "step": 390500 }, { "epoch": 46.46, "learning_rate": 1.5789265201029908e-06, "loss": 0.5005, "step": 391000 }, { "epoch": 46.52, "learning_rate": 1.5525186505578663e-06, "loss": 0.4946, "step": 391500 }, { "epoch": 46.58, "learning_rate": 1.5261107810127418e-06, "loss": 0.4923, "step": 392000 }, { "epoch": 46.64, "learning_rate": 1.4997557272067076e-06, "loss": 0.5, "step": 392500 }, { "epoch": 46.7, "learning_rate": 1.4733478576615833e-06, "loss": 0.4971, "step": 393000 }, { "epoch": 46.76, "learning_rate": 1.4469399881164588e-06, "loss": 0.4977, "step": 393500 }, { "epoch": 46.82, "learning_rate": 1.4205321185713343e-06, "loss": 0.4925, "step": 394000 }, { "epoch": 46.88, "learning_rate": 1.3941770647653001e-06, "loss": 0.4987, "step": 394500 }, { "epoch": 46.94, "learning_rate": 1.3677691952201759e-06, "loss": 0.5004, "step": 395000 }, { "epoch": 47.0, "learning_rate": 1.3413613256750514e-06, "loss": 0.4961, "step": 395500 }, { "epoch": 47.06, "learning_rate": 1.3149534561299269e-06, "loss": 0.494, "step": 396000 }, { "epoch": 47.12, "learning_rate": 1.2885455865848024e-06, "loss": 0.4941, "step": 396500 }, { "epoch": 47.18, "learning_rate": 1.2621377170396779e-06, "loss": 0.5015, "step": 397000 }, { "epoch": 47.24, "learning_rate": 1.2357298474945536e-06, "loss": 0.4976, "step": 397500 }, { "epoch": 47.3, "learning_rate": 1.209321977949429e-06, "loss": 0.4887, "step": 398000 }, { "epoch": 47.36, "learning_rate": 1.1829141084043046e-06, "loss": 0.5032, "step": 398500 }, { "epoch": 47.42, "learning_rate": 1.15650623885918e-06, "loss": 0.4959, "step": 399000 }, { "epoch": 47.47, "learning_rate": 1.1302568165313265e-06, "loss": 0.4965, "step": 399500 }, { "epoch": 47.53, "learning_rate": 1.103848946986202e-06, "loss": 0.4949, "step": 400000 }, { "epoch": 47.59, "learning_rate": 1.0774938931801678e-06, "loss": 0.4879, "step": 400500 }, { "epoch": 47.65, "learning_rate": 1.0510860236350433e-06, "loss": 0.4935, "step": 401000 }, { "epoch": 47.71, "learning_rate": 1.024678154089919e-06, "loss": 0.4954, "step": 401500 }, { "epoch": 47.77, "learning_rate": 9.982702845447945e-07, "loss": 0.4919, "step": 402000 }, { "epoch": 47.83, "learning_rate": 9.7186241499967e-07, "loss": 0.4974, "step": 402500 }, { "epoch": 47.89, "learning_rate": 9.454545454545455e-07, "loss": 0.5004, "step": 403000 }, { "epoch": 47.95, "learning_rate": 9.190466759094211e-07, "loss": 0.5003, "step": 403500 }, { "epoch": 48.01, "learning_rate": 8.926388063642967e-07, "loss": 0.5044, "step": 404000 }, { "epoch": 48.07, "learning_rate": 8.662309368191722e-07, "loss": 0.489, "step": 404500 }, { "epoch": 48.13, "learning_rate": 8.398230672740478e-07, "loss": 0.4942, "step": 405000 }, { "epoch": 48.19, "learning_rate": 8.134151977289233e-07, "loss": 0.4996, "step": 405500 }, { "epoch": 48.25, "learning_rate": 7.870073281837988e-07, "loss": 0.4896, "step": 406000 }, { "epoch": 48.31, "learning_rate": 7.605994586386743e-07, "loss": 0.494, "step": 406500 }, { "epoch": 48.37, "learning_rate": 7.3419158909355e-07, "loss": 0.4911, "step": 407000 }, { "epoch": 48.43, "learning_rate": 7.077837195484255e-07, "loss": 0.4961, "step": 407500 }, { "epoch": 48.48, "learning_rate": 6.81375850003301e-07, "loss": 0.4894, "step": 408000 }, { "epoch": 48.54, "learning_rate": 6.549679804581765e-07, "loss": 0.4952, "step": 408500 }, { "epoch": 48.6, "learning_rate": 6.285601109130521e-07, "loss": 0.5021, "step": 409000 }, { "epoch": 48.66, "learning_rate": 6.021522413679277e-07, "loss": 0.5009, "step": 409500 }, { "epoch": 48.72, "learning_rate": 5.757971875618935e-07, "loss": 0.4962, "step": 410000 }, { "epoch": 48.78, "learning_rate": 5.49389318016769e-07, "loss": 0.4939, "step": 410500 }, { "epoch": 48.84, "learning_rate": 5.229814484716446e-07, "loss": 0.497, "step": 411000 }, { "epoch": 48.9, "learning_rate": 4.965735789265201e-07, "loss": 0.4995, "step": 411500 }, { "epoch": 48.96, "learning_rate": 4.7016570938139573e-07, "loss": 0.5008, "step": 412000 }, { "epoch": 49.02, "learning_rate": 4.4386347131445177e-07, "loss": 0.5001, "step": 412500 }, { "epoch": 49.08, "learning_rate": 4.174556017693273e-07, "loss": 0.4934, "step": 413000 }, { "epoch": 49.14, "learning_rate": 3.910477322242028e-07, "loss": 0.4949, "step": 413500 }, { "epoch": 49.2, "learning_rate": 3.6463986267907843e-07, "loss": 0.5009, "step": 414000 }, { "epoch": 49.26, "learning_rate": 3.3823199313395393e-07, "loss": 0.5013, "step": 414500 }, { "epoch": 49.32, "learning_rate": 3.118241235888295e-07, "loss": 0.5009, "step": 415000 }, { "epoch": 49.38, "learning_rate": 2.8541625404370504e-07, "loss": 0.4927, "step": 415500 }, { "epoch": 49.44, "learning_rate": 2.590083844985806e-07, "loss": 0.5005, "step": 416000 }, { "epoch": 49.49, "learning_rate": 2.326533306925464e-07, "loss": 0.4908, "step": 416500 }, { "epoch": 49.55, "learning_rate": 2.0624546114742194e-07, "loss": 0.4904, "step": 417000 }, { "epoch": 49.61, "learning_rate": 1.798375916022975e-07, "loss": 0.5013, "step": 417500 }, { "epoch": 49.67, "learning_rate": 1.5342972205717305e-07, "loss": 0.4991, "step": 418000 }, { "epoch": 49.73, "learning_rate": 1.270218525120486e-07, "loss": 0.4921, "step": 418500 }, { "epoch": 49.79, "learning_rate": 1.006667987060144e-07, "loss": 0.4987, "step": 419000 }, { "epoch": 49.85, "learning_rate": 7.425892916088995e-08, "loss": 0.4946, "step": 419500 }, { "epoch": 49.91, "learning_rate": 4.790387535485575e-08, "loss": 0.492, "step": 420000 }, { "epoch": 49.97, "learning_rate": 2.14960058097313e-08, "loss": 0.4974, "step": 420500 } ], "max_steps": 420750, "num_train_epochs": 50, "total_flos": 7.065593235141296e+19, "trial_name": null, "trial_params": null }