DialogLED-base-16384 / trainer_state.json
Ming Zhong
First model version
a7eb229
{
"best_metric": null,
"best_model_checkpoint": null,
"epoch": 49.99994058582378,
"global_step": 420750,
"is_hyper_param_search": false,
"is_local_process_zero": true,
"is_world_process_zero": true,
"log_history": [
{
"epoch": 0.06,
"learning_rate": 2.3624480095068333e-07,
"loss": 2.5928,
"step": 500
},
{
"epoch": 0.12,
"learning_rate": 4.7391562685680333e-07,
"loss": 1.753,
"step": 1000
},
{
"epoch": 0.18,
"learning_rate": 7.115864527629234e-07,
"loss": 1.5245,
"step": 1500
},
{
"epoch": 0.24,
"learning_rate": 9.492572786690435e-07,
"loss": 1.428,
"step": 2000
},
{
"epoch": 0.3,
"learning_rate": 1.1869281045751634e-06,
"loss": 1.3632,
"step": 2500
},
{
"epoch": 0.36,
"learning_rate": 1.4245989304812835e-06,
"loss": 1.3351,
"step": 3000
},
{
"epoch": 0.42,
"learning_rate": 1.6622697563874036e-06,
"loss": 1.3066,
"step": 3500
},
{
"epoch": 0.48,
"learning_rate": 1.8999405822935234e-06,
"loss": 1.266,
"step": 4000
},
{
"epoch": 0.53,
"learning_rate": 2.137611408199644e-06,
"loss": 1.2228,
"step": 4500
},
{
"epoch": 0.59,
"learning_rate": 2.375282234105764e-06,
"loss": 1.2047,
"step": 5000
},
{
"epoch": 0.65,
"learning_rate": 2.6129530600118837e-06,
"loss": 1.193,
"step": 5500
},
{
"epoch": 0.71,
"learning_rate": 2.8501485442661915e-06,
"loss": 1.169,
"step": 6000
},
{
"epoch": 0.77,
"learning_rate": 3.0878193701723116e-06,
"loss": 1.1512,
"step": 6500
},
{
"epoch": 0.83,
"learning_rate": 3.325490196078431e-06,
"loss": 1.1434,
"step": 7000
},
{
"epoch": 0.89,
"learning_rate": 3.563161021984552e-06,
"loss": 1.1305,
"step": 7500
},
{
"epoch": 0.95,
"learning_rate": 3.8008318478906718e-06,
"loss": 1.1059,
"step": 8000
},
{
"epoch": 1.01,
"learning_rate": 4.038502673796792e-06,
"loss": 1.1072,
"step": 8500
},
{
"epoch": 1.07,
"learning_rate": 4.276173499702912e-06,
"loss": 1.0856,
"step": 9000
},
{
"epoch": 1.13,
"learning_rate": 4.5138443256090316e-06,
"loss": 1.0605,
"step": 9500
},
{
"epoch": 1.19,
"learning_rate": 4.751515151515152e-06,
"loss": 1.0481,
"step": 10000
},
{
"epoch": 1.25,
"learning_rate": 4.989185977421272e-06,
"loss": 1.0203,
"step": 10500
},
{
"epoch": 1.31,
"learning_rate": 5.226381461675579e-06,
"loss": 0.9944,
"step": 11000
},
{
"epoch": 1.37,
"learning_rate": 5.464052287581699e-06,
"loss": 0.9667,
"step": 11500
},
{
"epoch": 1.43,
"learning_rate": 5.7012477718360074e-06,
"loss": 0.9407,
"step": 12000
},
{
"epoch": 1.49,
"learning_rate": 5.938443256090315e-06,
"loss": 0.9194,
"step": 12500
},
{
"epoch": 1.54,
"learning_rate": 6.176114081996435e-06,
"loss": 0.891,
"step": 13000
},
{
"epoch": 1.6,
"learning_rate": 6.413784907902556e-06,
"loss": 0.8853,
"step": 13500
},
{
"epoch": 1.66,
"learning_rate": 6.651455733808676e-06,
"loss": 0.8699,
"step": 14000
},
{
"epoch": 1.72,
"learning_rate": 6.889126559714796e-06,
"loss": 0.8597,
"step": 14500
},
{
"epoch": 1.78,
"learning_rate": 7.126797385620916e-06,
"loss": 0.8384,
"step": 15000
},
{
"epoch": 1.84,
"learning_rate": 7.364468211527036e-06,
"loss": 0.8305,
"step": 15500
},
{
"epoch": 1.9,
"learning_rate": 7.602139037433156e-06,
"loss": 0.8346,
"step": 16000
},
{
"epoch": 1.96,
"learning_rate": 7.839809863339275e-06,
"loss": 0.8277,
"step": 16500
},
{
"epoch": 2.02,
"learning_rate": 8.077005347593585e-06,
"loss": 0.8125,
"step": 17000
},
{
"epoch": 2.08,
"learning_rate": 8.314676173499705e-06,
"loss": 0.8124,
"step": 17500
},
{
"epoch": 2.14,
"learning_rate": 8.552346999405825e-06,
"loss": 0.806,
"step": 18000
},
{
"epoch": 2.2,
"learning_rate": 8.790017825311945e-06,
"loss": 0.8022,
"step": 18500
},
{
"epoch": 2.26,
"learning_rate": 9.027688651218063e-06,
"loss": 0.7958,
"step": 19000
},
{
"epoch": 2.32,
"learning_rate": 9.265359477124183e-06,
"loss": 0.7901,
"step": 19500
},
{
"epoch": 2.38,
"learning_rate": 9.503030303030303e-06,
"loss": 0.7868,
"step": 20000
},
{
"epoch": 2.44,
"learning_rate": 9.740701128936423e-06,
"loss": 0.7788,
"step": 20500
},
{
"epoch": 2.5,
"learning_rate": 9.978371954842543e-06,
"loss": 0.7823,
"step": 21000
},
{
"epoch": 2.55,
"learning_rate": 1.0216042780748663e-05,
"loss": 0.7767,
"step": 21500
},
{
"epoch": 2.61,
"learning_rate": 1.0453238265002973e-05,
"loss": 0.7645,
"step": 22000
},
{
"epoch": 2.67,
"learning_rate": 1.0690909090909091e-05,
"loss": 0.7687,
"step": 22500
},
{
"epoch": 2.73,
"learning_rate": 1.0928579916815211e-05,
"loss": 0.7664,
"step": 23000
},
{
"epoch": 2.79,
"learning_rate": 1.1166250742721331e-05,
"loss": 0.7569,
"step": 23500
},
{
"epoch": 2.85,
"learning_rate": 1.1403921568627451e-05,
"loss": 0.772,
"step": 24000
},
{
"epoch": 2.91,
"learning_rate": 1.1641592394533571e-05,
"loss": 0.7562,
"step": 24500
},
{
"epoch": 2.97,
"learning_rate": 1.1879263220439691e-05,
"loss": 0.7599,
"step": 25000
},
{
"epoch": 3.03,
"learning_rate": 1.2116934046345811e-05,
"loss": 0.7524,
"step": 25500
},
{
"epoch": 3.09,
"learning_rate": 1.2354604872251931e-05,
"loss": 0.7476,
"step": 26000
},
{
"epoch": 3.15,
"learning_rate": 1.2592275698158052e-05,
"loss": 0.7385,
"step": 26500
},
{
"epoch": 3.21,
"learning_rate": 1.282947118241236e-05,
"loss": 0.7428,
"step": 27000
},
{
"epoch": 3.27,
"learning_rate": 1.306714200831848e-05,
"loss": 0.746,
"step": 27500
},
{
"epoch": 3.33,
"learning_rate": 1.3304337492572788e-05,
"loss": 0.7406,
"step": 28000
},
{
"epoch": 3.39,
"learning_rate": 1.3542008318478908e-05,
"loss": 0.7264,
"step": 28500
},
{
"epoch": 3.45,
"learning_rate": 1.3779679144385028e-05,
"loss": 0.7346,
"step": 29000
},
{
"epoch": 3.51,
"learning_rate": 1.4017349970291148e-05,
"loss": 0.7266,
"step": 29500
},
{
"epoch": 3.57,
"learning_rate": 1.4255020796197267e-05,
"loss": 0.7355,
"step": 30000
},
{
"epoch": 3.62,
"learning_rate": 1.4492691622103387e-05,
"loss": 0.7319,
"step": 30500
},
{
"epoch": 3.68,
"learning_rate": 1.4730362448009507e-05,
"loss": 0.7304,
"step": 31000
},
{
"epoch": 3.74,
"learning_rate": 1.4968033273915627e-05,
"loss": 0.7226,
"step": 31500
},
{
"epoch": 3.8,
"learning_rate": 1.5205228758169936e-05,
"loss": 0.7288,
"step": 32000
},
{
"epoch": 3.86,
"learning_rate": 1.5442899584076056e-05,
"loss": 0.7214,
"step": 32500
},
{
"epoch": 3.92,
"learning_rate": 1.5680095068330364e-05,
"loss": 0.7219,
"step": 33000
},
{
"epoch": 3.98,
"learning_rate": 1.5917765894236484e-05,
"loss": 0.718,
"step": 33500
},
{
"epoch": 4.04,
"learning_rate": 1.615496137849079e-05,
"loss": 0.7132,
"step": 34000
},
{
"epoch": 4.1,
"learning_rate": 1.639263220439691e-05,
"loss": 0.7183,
"step": 34500
},
{
"epoch": 4.16,
"learning_rate": 1.663030303030303e-05,
"loss": 0.7123,
"step": 35000
},
{
"epoch": 4.22,
"learning_rate": 1.686797385620915e-05,
"loss": 0.7013,
"step": 35500
},
{
"epoch": 4.28,
"learning_rate": 1.710564468211527e-05,
"loss": 0.7093,
"step": 36000
},
{
"epoch": 4.34,
"learning_rate": 1.734284016636958e-05,
"loss": 0.704,
"step": 36500
},
{
"epoch": 4.4,
"learning_rate": 1.75805109922757e-05,
"loss": 0.7047,
"step": 37000
},
{
"epoch": 4.46,
"learning_rate": 1.781818181818182e-05,
"loss": 0.7041,
"step": 37500
},
{
"epoch": 4.52,
"learning_rate": 1.805585264408794e-05,
"loss": 0.7005,
"step": 38000
},
{
"epoch": 4.58,
"learning_rate": 1.829352346999406e-05,
"loss": 0.7098,
"step": 38500
},
{
"epoch": 4.63,
"learning_rate": 1.853119429590018e-05,
"loss": 0.7019,
"step": 39000
},
{
"epoch": 4.69,
"learning_rate": 1.87688651218063e-05,
"loss": 0.7024,
"step": 39500
},
{
"epoch": 4.75,
"learning_rate": 1.900653594771242e-05,
"loss": 0.7015,
"step": 40000
},
{
"epoch": 4.81,
"learning_rate": 1.924420677361854e-05,
"loss": 0.7032,
"step": 40500
},
{
"epoch": 4.87,
"learning_rate": 1.948187759952466e-05,
"loss": 0.6989,
"step": 41000
},
{
"epoch": 4.93,
"learning_rate": 1.971954842543078e-05,
"loss": 0.7023,
"step": 41500
},
{
"epoch": 4.99,
"learning_rate": 1.99572192513369e-05,
"loss": 0.7068,
"step": 42000
},
{
"epoch": 5.05,
"learning_rate": 1.9978345546973e-05,
"loss": 0.6804,
"step": 42500
},
{
"epoch": 5.11,
"learning_rate": 1.9951937677427877e-05,
"loss": 0.6888,
"step": 43000
},
{
"epoch": 5.17,
"learning_rate": 1.992552980788275e-05,
"loss": 0.6932,
"step": 43500
},
{
"epoch": 5.23,
"learning_rate": 1.9899121938337625e-05,
"loss": 0.6849,
"step": 44000
},
{
"epoch": 5.29,
"learning_rate": 1.987276688453159e-05,
"loss": 0.6776,
"step": 44500
},
{
"epoch": 5.35,
"learning_rate": 1.9846411830725558e-05,
"loss": 0.6897,
"step": 45000
},
{
"epoch": 5.41,
"learning_rate": 1.9820003961180434e-05,
"loss": 0.683,
"step": 45500
},
{
"epoch": 5.47,
"learning_rate": 1.979359609163531e-05,
"loss": 0.6834,
"step": 46000
},
{
"epoch": 5.53,
"learning_rate": 1.9767188222090182e-05,
"loss": 0.6811,
"step": 46500
},
{
"epoch": 5.59,
"learning_rate": 1.9740780352545058e-05,
"loss": 0.6775,
"step": 47000
},
{
"epoch": 5.64,
"learning_rate": 1.9714372482999934e-05,
"loss": 0.683,
"step": 47500
},
{
"epoch": 5.7,
"learning_rate": 1.968796461345481e-05,
"loss": 0.6799,
"step": 48000
},
{
"epoch": 5.76,
"learning_rate": 1.9661556743909686e-05,
"loss": 0.6754,
"step": 48500
},
{
"epoch": 5.82,
"learning_rate": 1.9635201690103652e-05,
"loss": 0.684,
"step": 49000
},
{
"epoch": 5.88,
"learning_rate": 1.9608793820558528e-05,
"loss": 0.6753,
"step": 49500
},
{
"epoch": 5.94,
"learning_rate": 1.9582385951013404e-05,
"loss": 0.6843,
"step": 50000
},
{
"epoch": 6.0,
"learning_rate": 1.955597808146828e-05,
"loss": 0.684,
"step": 50500
},
{
"epoch": 6.06,
"learning_rate": 1.9529623027662246e-05,
"loss": 0.6578,
"step": 51000
},
{
"epoch": 6.12,
"learning_rate": 1.950326797385621e-05,
"loss": 0.6686,
"step": 51500
},
{
"epoch": 6.18,
"learning_rate": 1.9476860104311085e-05,
"loss": 0.6696,
"step": 52000
},
{
"epoch": 6.24,
"learning_rate": 1.945045223476596e-05,
"loss": 0.6721,
"step": 52500
},
{
"epoch": 6.3,
"learning_rate": 1.9424044365220836e-05,
"loss": 0.6645,
"step": 53000
},
{
"epoch": 6.36,
"learning_rate": 1.9397689311414803e-05,
"loss": 0.6636,
"step": 53500
},
{
"epoch": 6.42,
"learning_rate": 1.937128144186968e-05,
"loss": 0.6644,
"step": 54000
},
{
"epoch": 6.48,
"learning_rate": 1.9344873572324555e-05,
"loss": 0.6697,
"step": 54500
},
{
"epoch": 6.54,
"learning_rate": 1.931846570277943e-05,
"loss": 0.6738,
"step": 55000
},
{
"epoch": 6.6,
"learning_rate": 1.9292057833234306e-05,
"loss": 0.6645,
"step": 55500
},
{
"epoch": 6.65,
"learning_rate": 1.9265649963689182e-05,
"loss": 0.6703,
"step": 56000
},
{
"epoch": 6.71,
"learning_rate": 1.9239242094144058e-05,
"loss": 0.6667,
"step": 56500
},
{
"epoch": 6.77,
"learning_rate": 1.9212834224598934e-05,
"loss": 0.6677,
"step": 57000
},
{
"epoch": 6.83,
"learning_rate": 1.9186479170792897e-05,
"loss": 0.6693,
"step": 57500
},
{
"epoch": 6.89,
"learning_rate": 1.9160071301247773e-05,
"loss": 0.6614,
"step": 58000
},
{
"epoch": 6.95,
"learning_rate": 1.913366343170265e-05,
"loss": 0.6604,
"step": 58500
},
{
"epoch": 7.01,
"learning_rate": 1.9107255562157525e-05,
"loss": 0.6591,
"step": 59000
},
{
"epoch": 7.07,
"learning_rate": 1.908090050835149e-05,
"loss": 0.6471,
"step": 59500
},
{
"epoch": 7.13,
"learning_rate": 1.9054492638806367e-05,
"loss": 0.6539,
"step": 60000
},
{
"epoch": 7.19,
"learning_rate": 1.9028084769261243e-05,
"loss": 0.6461,
"step": 60500
},
{
"epoch": 7.25,
"learning_rate": 1.900167689971612e-05,
"loss": 0.6514,
"step": 61000
},
{
"epoch": 7.31,
"learning_rate": 1.897526903017099e-05,
"loss": 0.6452,
"step": 61500
},
{
"epoch": 7.37,
"learning_rate": 1.8948861160625867e-05,
"loss": 0.6513,
"step": 62000
},
{
"epoch": 7.43,
"learning_rate": 1.8922453291080743e-05,
"loss": 0.6534,
"step": 62500
},
{
"epoch": 7.49,
"learning_rate": 1.889604542153562e-05,
"loss": 0.65,
"step": 63000
},
{
"epoch": 7.55,
"learning_rate": 1.8869637551990495e-05,
"loss": 0.6486,
"step": 63500
},
{
"epoch": 7.61,
"learning_rate": 1.884328249818446e-05,
"loss": 0.6498,
"step": 64000
},
{
"epoch": 7.66,
"learning_rate": 1.8816874628639337e-05,
"loss": 0.6424,
"step": 64500
},
{
"epoch": 7.72,
"learning_rate": 1.8790466759094213e-05,
"loss": 0.6492,
"step": 65000
},
{
"epoch": 7.78,
"learning_rate": 1.8764058889549086e-05,
"loss": 0.6564,
"step": 65500
},
{
"epoch": 7.84,
"learning_rate": 1.8737703835743052e-05,
"loss": 0.6467,
"step": 66000
},
{
"epoch": 7.9,
"learning_rate": 1.8711295966197928e-05,
"loss": 0.6461,
"step": 66500
},
{
"epoch": 7.96,
"learning_rate": 1.8684888096652804e-05,
"loss": 0.6424,
"step": 67000
},
{
"epoch": 8.02,
"learning_rate": 1.865848022710768e-05,
"loss": 0.6487,
"step": 67500
},
{
"epoch": 8.08,
"learning_rate": 1.8632072357562556e-05,
"loss": 0.6331,
"step": 68000
},
{
"epoch": 8.14,
"learning_rate": 1.8605664488017428e-05,
"loss": 0.6351,
"step": 68500
},
{
"epoch": 8.2,
"learning_rate": 1.8579256618472304e-05,
"loss": 0.6298,
"step": 69000
},
{
"epoch": 8.26,
"learning_rate": 1.855284874892718e-05,
"loss": 0.6365,
"step": 69500
},
{
"epoch": 8.32,
"learning_rate": 1.8526440879382056e-05,
"loss": 0.6371,
"step": 70000
},
{
"epoch": 8.38,
"learning_rate": 1.8500085825576025e-05,
"loss": 0.6312,
"step": 70500
},
{
"epoch": 8.44,
"learning_rate": 1.847378358750908e-05,
"loss": 0.6438,
"step": 71000
},
{
"epoch": 8.5,
"learning_rate": 1.8447375717963954e-05,
"loss": 0.6307,
"step": 71500
},
{
"epoch": 8.56,
"learning_rate": 1.842096784841883e-05,
"loss": 0.6377,
"step": 72000
},
{
"epoch": 8.62,
"learning_rate": 1.8394559978873706e-05,
"loss": 0.6457,
"step": 72500
},
{
"epoch": 8.67,
"learning_rate": 1.8368152109328582e-05,
"loss": 0.6377,
"step": 73000
},
{
"epoch": 8.73,
"learning_rate": 1.8341744239783458e-05,
"loss": 0.6394,
"step": 73500
},
{
"epoch": 8.79,
"learning_rate": 1.8315389185977424e-05,
"loss": 0.6392,
"step": 74000
},
{
"epoch": 8.85,
"learning_rate": 1.82889813164323e-05,
"loss": 0.6424,
"step": 74500
},
{
"epoch": 8.91,
"learning_rate": 1.8262573446887173e-05,
"loss": 0.6333,
"step": 75000
},
{
"epoch": 8.97,
"learning_rate": 1.823621839308114e-05,
"loss": 0.6422,
"step": 75500
},
{
"epoch": 9.03,
"learning_rate": 1.8209810523536015e-05,
"loss": 0.6311,
"step": 76000
},
{
"epoch": 9.09,
"learning_rate": 1.818340265399089e-05,
"loss": 0.6238,
"step": 76500
},
{
"epoch": 9.15,
"learning_rate": 1.8156994784445767e-05,
"loss": 0.6179,
"step": 77000
},
{
"epoch": 9.21,
"learning_rate": 1.813058691490064e-05,
"loss": 0.6344,
"step": 77500
},
{
"epoch": 9.27,
"learning_rate": 1.8104179045355515e-05,
"loss": 0.6303,
"step": 78000
},
{
"epoch": 9.33,
"learning_rate": 1.807777117581039e-05,
"loss": 0.625,
"step": 78500
},
{
"epoch": 9.39,
"learning_rate": 1.8051363306265267e-05,
"loss": 0.6237,
"step": 79000
},
{
"epoch": 9.45,
"learning_rate": 1.8024955436720143e-05,
"loss": 0.6332,
"step": 79500
},
{
"epoch": 9.51,
"learning_rate": 1.799854756717502e-05,
"loss": 0.6239,
"step": 80000
},
{
"epoch": 9.57,
"learning_rate": 1.7972139697629895e-05,
"loss": 0.627,
"step": 80500
},
{
"epoch": 9.63,
"learning_rate": 1.794573182808477e-05,
"loss": 0.6202,
"step": 81000
},
{
"epoch": 9.69,
"learning_rate": 1.7919376774278737e-05,
"loss": 0.63,
"step": 81500
},
{
"epoch": 9.74,
"learning_rate": 1.7892968904733613e-05,
"loss": 0.6205,
"step": 82000
},
{
"epoch": 9.8,
"learning_rate": 1.786656103518849e-05,
"loss": 0.6337,
"step": 82500
},
{
"epoch": 9.86,
"learning_rate": 1.7840153165643365e-05,
"loss": 0.6234,
"step": 83000
},
{
"epoch": 9.92,
"learning_rate": 1.781374529609824e-05,
"loss": 0.6349,
"step": 83500
},
{
"epoch": 9.98,
"learning_rate": 1.7787337426553117e-05,
"loss": 0.6336,
"step": 84000
},
{
"epoch": 10.04,
"learning_rate": 1.776092955700799e-05,
"loss": 0.6131,
"step": 84500
},
{
"epoch": 10.1,
"learning_rate": 1.7734521687462865e-05,
"loss": 0.6177,
"step": 85000
},
{
"epoch": 10.16,
"learning_rate": 1.770816663365683e-05,
"loss": 0.6209,
"step": 85500
},
{
"epoch": 10.22,
"learning_rate": 1.7681758764111707e-05,
"loss": 0.619,
"step": 86000
},
{
"epoch": 10.28,
"learning_rate": 1.7655350894566583e-05,
"loss": 0.6142,
"step": 86500
},
{
"epoch": 10.34,
"learning_rate": 1.762894302502146e-05,
"loss": 0.6134,
"step": 87000
},
{
"epoch": 10.4,
"learning_rate": 1.7602535155476332e-05,
"loss": 0.6174,
"step": 87500
},
{
"epoch": 10.46,
"learning_rate": 1.7576180101670298e-05,
"loss": 0.6203,
"step": 88000
},
{
"epoch": 10.52,
"learning_rate": 1.7549772232125174e-05,
"loss": 0.6171,
"step": 88500
},
{
"epoch": 10.58,
"learning_rate": 1.752336436258005e-05,
"loss": 0.6186,
"step": 89000
},
{
"epoch": 10.64,
"learning_rate": 1.7496956493034926e-05,
"loss": 0.617,
"step": 89500
},
{
"epoch": 10.7,
"learning_rate": 1.74705486234898e-05,
"loss": 0.6117,
"step": 90000
},
{
"epoch": 10.75,
"learning_rate": 1.7444140753944678e-05,
"loss": 0.6131,
"step": 90500
},
{
"epoch": 10.81,
"learning_rate": 1.7417732884399553e-05,
"loss": 0.6187,
"step": 91000
},
{
"epoch": 10.87,
"learning_rate": 1.739132501485443e-05,
"loss": 0.6215,
"step": 91500
},
{
"epoch": 10.93,
"learning_rate": 1.7364969961048396e-05,
"loss": 0.6237,
"step": 92000
},
{
"epoch": 10.99,
"learning_rate": 1.733856209150327e-05,
"loss": 0.6269,
"step": 92500
},
{
"epoch": 11.05,
"learning_rate": 1.7312207037697234e-05,
"loss": 0.6065,
"step": 93000
},
{
"epoch": 11.11,
"learning_rate": 1.728579916815211e-05,
"loss": 0.603,
"step": 93500
},
{
"epoch": 11.17,
"learning_rate": 1.7259391298606986e-05,
"loss": 0.6037,
"step": 94000
},
{
"epoch": 11.23,
"learning_rate": 1.7232983429061862e-05,
"loss": 0.6081,
"step": 94500
},
{
"epoch": 11.29,
"learning_rate": 1.7206575559516738e-05,
"loss": 0.602,
"step": 95000
},
{
"epoch": 11.35,
"learning_rate": 1.718016768997161e-05,
"loss": 0.6132,
"step": 95500
},
{
"epoch": 11.41,
"learning_rate": 1.7153759820426487e-05,
"loss": 0.6049,
"step": 96000
},
{
"epoch": 11.47,
"learning_rate": 1.7127351950881362e-05,
"loss": 0.6088,
"step": 96500
},
{
"epoch": 11.53,
"learning_rate": 1.710094408133624e-05,
"loss": 0.6063,
"step": 97000
},
{
"epoch": 11.59,
"learning_rate": 1.7074589027530205e-05,
"loss": 0.6086,
"step": 97500
},
{
"epoch": 11.65,
"learning_rate": 1.704818115798508e-05,
"loss": 0.6102,
"step": 98000
},
{
"epoch": 11.71,
"learning_rate": 1.7021826104179047e-05,
"loss": 0.6068,
"step": 98500
},
{
"epoch": 11.76,
"learning_rate": 1.6995418234633923e-05,
"loss": 0.6122,
"step": 99000
},
{
"epoch": 11.82,
"learning_rate": 1.69690103650888e-05,
"loss": 0.6119,
"step": 99500
},
{
"epoch": 11.88,
"learning_rate": 1.6942602495543675e-05,
"loss": 0.6058,
"step": 100000
},
{
"epoch": 11.94,
"learning_rate": 1.691619462599855e-05,
"loss": 0.6158,
"step": 100500
},
{
"epoch": 12.0,
"learning_rate": 1.6889786756453423e-05,
"loss": 0.6219,
"step": 101000
},
{
"epoch": 12.06,
"learning_rate": 1.68633788869083e-05,
"loss": 0.5941,
"step": 101500
},
{
"epoch": 12.12,
"learning_rate": 1.6836971017363175e-05,
"loss": 0.5933,
"step": 102000
},
{
"epoch": 12.18,
"learning_rate": 1.681056314781805e-05,
"loss": 0.6045,
"step": 102500
},
{
"epoch": 12.24,
"learning_rate": 1.6784155278272927e-05,
"loss": 0.5995,
"step": 103000
},
{
"epoch": 12.3,
"learning_rate": 1.6757747408727803e-05,
"loss": 0.5976,
"step": 103500
},
{
"epoch": 12.36,
"learning_rate": 1.673139235492177e-05,
"loss": 0.6016,
"step": 104000
},
{
"epoch": 12.42,
"learning_rate": 1.670498448537664e-05,
"loss": 0.6029,
"step": 104500
},
{
"epoch": 12.48,
"learning_rate": 1.6678576615831517e-05,
"loss": 0.6008,
"step": 105000
},
{
"epoch": 12.54,
"learning_rate": 1.6652221562025487e-05,
"loss": 0.5978,
"step": 105500
},
{
"epoch": 12.6,
"learning_rate": 1.6625813692480363e-05,
"loss": 0.6029,
"step": 106000
},
{
"epoch": 12.66,
"learning_rate": 1.6599405822935235e-05,
"loss": 0.6088,
"step": 106500
},
{
"epoch": 12.72,
"learning_rate": 1.657299795339011e-05,
"loss": 0.6071,
"step": 107000
},
{
"epoch": 12.77,
"learning_rate": 1.6546590083844987e-05,
"loss": 0.6025,
"step": 107500
},
{
"epoch": 12.83,
"learning_rate": 1.6520182214299863e-05,
"loss": 0.5966,
"step": 108000
},
{
"epoch": 12.89,
"learning_rate": 1.649377434475474e-05,
"loss": 0.6058,
"step": 108500
},
{
"epoch": 12.95,
"learning_rate": 1.6467419290948702e-05,
"loss": 0.6109,
"step": 109000
},
{
"epoch": 13.01,
"learning_rate": 1.6441011421403578e-05,
"loss": 0.6024,
"step": 109500
},
{
"epoch": 13.07,
"learning_rate": 1.6414603551858454e-05,
"loss": 0.5963,
"step": 110000
},
{
"epoch": 13.13,
"learning_rate": 1.638819568231333e-05,
"loss": 0.5861,
"step": 110500
},
{
"epoch": 13.19,
"learning_rate": 1.6361787812768206e-05,
"loss": 0.5939,
"step": 111000
},
{
"epoch": 13.25,
"learning_rate": 1.633537994322308e-05,
"loss": 0.5859,
"step": 111500
},
{
"epoch": 13.31,
"learning_rate": 1.6308972073677957e-05,
"loss": 0.5916,
"step": 112000
},
{
"epoch": 13.37,
"learning_rate": 1.6282564204132833e-05,
"loss": 0.5938,
"step": 112500
},
{
"epoch": 13.43,
"learning_rate": 1.62562091503268e-05,
"loss": 0.5925,
"step": 113000
},
{
"epoch": 13.49,
"learning_rate": 1.6229801280781675e-05,
"loss": 0.5945,
"step": 113500
},
{
"epoch": 13.55,
"learning_rate": 1.620339341123655e-05,
"loss": 0.5999,
"step": 114000
},
{
"epoch": 13.61,
"learning_rate": 1.6176985541691427e-05,
"loss": 0.5904,
"step": 114500
},
{
"epoch": 13.67,
"learning_rate": 1.615063048788539e-05,
"loss": 0.5988,
"step": 115000
},
{
"epoch": 13.73,
"learning_rate": 1.6124222618340266e-05,
"loss": 0.5963,
"step": 115500
},
{
"epoch": 13.78,
"learning_rate": 1.6097814748795142e-05,
"loss": 0.592,
"step": 116000
},
{
"epoch": 13.84,
"learning_rate": 1.6071406879250018e-05,
"loss": 0.5954,
"step": 116500
},
{
"epoch": 13.9,
"learning_rate": 1.6044999009704894e-05,
"loss": 0.5966,
"step": 117000
},
{
"epoch": 13.96,
"learning_rate": 1.601859114015977e-05,
"loss": 0.6015,
"step": 117500
},
{
"epoch": 14.02,
"learning_rate": 1.5992183270614646e-05,
"loss": 0.5891,
"step": 118000
},
{
"epoch": 14.08,
"learning_rate": 1.596577540106952e-05,
"loss": 0.5819,
"step": 118500
},
{
"epoch": 14.14,
"learning_rate": 1.5939367531524394e-05,
"loss": 0.5883,
"step": 119000
},
{
"epoch": 14.2,
"learning_rate": 1.591301247771836e-05,
"loss": 0.5881,
"step": 119500
},
{
"epoch": 14.26,
"learning_rate": 1.5886604608173236e-05,
"loss": 0.584,
"step": 120000
},
{
"epoch": 14.32,
"learning_rate": 1.5860196738628112e-05,
"loss": 0.5888,
"step": 120500
},
{
"epoch": 14.38,
"learning_rate": 1.5833788869082988e-05,
"loss": 0.5911,
"step": 121000
},
{
"epoch": 14.44,
"learning_rate": 1.5807380999537864e-05,
"loss": 0.5846,
"step": 121500
},
{
"epoch": 14.5,
"learning_rate": 1.5780973129992737e-05,
"loss": 0.5836,
"step": 122000
},
{
"epoch": 14.56,
"learning_rate": 1.5754565260447613e-05,
"loss": 0.5838,
"step": 122500
},
{
"epoch": 14.62,
"learning_rate": 1.5728210206641582e-05,
"loss": 0.5883,
"step": 123000
},
{
"epoch": 14.68,
"learning_rate": 1.5701802337096458e-05,
"loss": 0.5875,
"step": 123500
},
{
"epoch": 14.74,
"learning_rate": 1.5675394467551334e-05,
"loss": 0.5901,
"step": 124000
},
{
"epoch": 14.79,
"learning_rate": 1.5648986598006207e-05,
"loss": 0.5881,
"step": 124500
},
{
"epoch": 14.85,
"learning_rate": 1.5622631544200173e-05,
"loss": 0.5951,
"step": 125000
},
{
"epoch": 14.91,
"learning_rate": 1.559622367465505e-05,
"loss": 0.5898,
"step": 125500
},
{
"epoch": 14.97,
"learning_rate": 1.5569815805109925e-05,
"loss": 0.5861,
"step": 126000
},
{
"epoch": 15.03,
"learning_rate": 1.55434079355648e-05,
"loss": 0.5754,
"step": 126500
},
{
"epoch": 15.09,
"learning_rate": 1.5517000066019676e-05,
"loss": 0.5751,
"step": 127000
},
{
"epoch": 15.15,
"learning_rate": 1.549064501221364e-05,
"loss": 0.5814,
"step": 127500
},
{
"epoch": 15.21,
"learning_rate": 1.5464237142668515e-05,
"loss": 0.5853,
"step": 128000
},
{
"epoch": 15.27,
"learning_rate": 1.543782927312339e-05,
"loss": 0.5832,
"step": 128500
},
{
"epoch": 15.33,
"learning_rate": 1.5411421403578267e-05,
"loss": 0.5831,
"step": 129000
},
{
"epoch": 15.39,
"learning_rate": 1.5385013534033143e-05,
"loss": 0.583,
"step": 129500
},
{
"epoch": 15.45,
"learning_rate": 1.535860566448802e-05,
"loss": 0.5708,
"step": 130000
},
{
"epoch": 15.51,
"learning_rate": 1.5332250610681985e-05,
"loss": 0.5816,
"step": 130500
},
{
"epoch": 15.57,
"learning_rate": 1.530584274113686e-05,
"loss": 0.5845,
"step": 131000
},
{
"epoch": 15.63,
"learning_rate": 1.5279434871591737e-05,
"loss": 0.5821,
"step": 131500
},
{
"epoch": 15.69,
"learning_rate": 1.5253027002046611e-05,
"loss": 0.5835,
"step": 132000
},
{
"epoch": 15.75,
"learning_rate": 1.5226619132501485e-05,
"loss": 0.5796,
"step": 132500
},
{
"epoch": 15.81,
"learning_rate": 1.5200211262956361e-05,
"loss": 0.5825,
"step": 133000
},
{
"epoch": 15.86,
"learning_rate": 1.5173803393411237e-05,
"loss": 0.5854,
"step": 133500
},
{
"epoch": 15.92,
"learning_rate": 1.5147395523866113e-05,
"loss": 0.5813,
"step": 134000
},
{
"epoch": 15.98,
"learning_rate": 1.5121093285799168e-05,
"loss": 0.584,
"step": 134500
},
{
"epoch": 16.04,
"learning_rate": 1.5094685416254044e-05,
"loss": 0.5734,
"step": 135000
},
{
"epoch": 16.1,
"learning_rate": 1.506827754670892e-05,
"loss": 0.5647,
"step": 135500
},
{
"epoch": 16.16,
"learning_rate": 1.5041869677163796e-05,
"loss": 0.5729,
"step": 136000
},
{
"epoch": 16.22,
"learning_rate": 1.501551462335776e-05,
"loss": 0.5692,
"step": 136500
},
{
"epoch": 16.28,
"learning_rate": 1.4989159569551728e-05,
"loss": 0.5703,
"step": 137000
},
{
"epoch": 16.34,
"learning_rate": 1.4962751700006604e-05,
"loss": 0.5798,
"step": 137500
},
{
"epoch": 16.4,
"learning_rate": 1.493634383046148e-05,
"loss": 0.5785,
"step": 138000
},
{
"epoch": 16.46,
"learning_rate": 1.4909935960916356e-05,
"loss": 0.57,
"step": 138500
},
{
"epoch": 16.52,
"learning_rate": 1.488358090711032e-05,
"loss": 0.5747,
"step": 139000
},
{
"epoch": 16.58,
"learning_rate": 1.4857173037565196e-05,
"loss": 0.5761,
"step": 139500
},
{
"epoch": 16.64,
"learning_rate": 1.483076516802007e-05,
"loss": 0.5756,
"step": 140000
},
{
"epoch": 16.7,
"learning_rate": 1.4804357298474947e-05,
"loss": 0.5727,
"step": 140500
},
{
"epoch": 16.76,
"learning_rate": 1.4777949428929823e-05,
"loss": 0.5826,
"step": 141000
},
{
"epoch": 16.82,
"learning_rate": 1.4751541559384698e-05,
"loss": 0.5862,
"step": 141500
},
{
"epoch": 16.87,
"learning_rate": 1.4725133689839573e-05,
"loss": 0.5752,
"step": 142000
},
{
"epoch": 16.93,
"learning_rate": 1.4698725820294449e-05,
"loss": 0.583,
"step": 142500
},
{
"epoch": 16.99,
"learning_rate": 1.4672317950749325e-05,
"loss": 0.5805,
"step": 143000
},
{
"epoch": 17.05,
"learning_rate": 1.464596289694329e-05,
"loss": 0.566,
"step": 143500
},
{
"epoch": 17.11,
"learning_rate": 1.4619555027398167e-05,
"loss": 0.5645,
"step": 144000
},
{
"epoch": 17.17,
"learning_rate": 1.4593199973592131e-05,
"loss": 0.5645,
"step": 144500
},
{
"epoch": 17.23,
"learning_rate": 1.4566792104047007e-05,
"loss": 0.5691,
"step": 145000
},
{
"epoch": 17.29,
"learning_rate": 1.4540384234501883e-05,
"loss": 0.5693,
"step": 145500
},
{
"epoch": 17.35,
"learning_rate": 1.4513976364956759e-05,
"loss": 0.5731,
"step": 146000
},
{
"epoch": 17.41,
"learning_rate": 1.4487568495411635e-05,
"loss": 0.5658,
"step": 146500
},
{
"epoch": 17.47,
"learning_rate": 1.446116062586651e-05,
"loss": 0.5761,
"step": 147000
},
{
"epoch": 17.53,
"learning_rate": 1.4434752756321383e-05,
"loss": 0.5687,
"step": 147500
},
{
"epoch": 17.59,
"learning_rate": 1.440834488677626e-05,
"loss": 0.5724,
"step": 148000
},
{
"epoch": 17.65,
"learning_rate": 1.4381937017231135e-05,
"loss": 0.5683,
"step": 148500
},
{
"epoch": 17.71,
"learning_rate": 1.4355581963425103e-05,
"loss": 0.5772,
"step": 149000
},
{
"epoch": 17.77,
"learning_rate": 1.4329174093879979e-05,
"loss": 0.5669,
"step": 149500
},
{
"epoch": 17.83,
"learning_rate": 1.4302819040073944e-05,
"loss": 0.5678,
"step": 150000
},
{
"epoch": 17.88,
"learning_rate": 1.427646398626791e-05,
"loss": 0.5703,
"step": 150500
},
{
"epoch": 17.94,
"learning_rate": 1.4250056116722784e-05,
"loss": 0.5687,
"step": 151000
},
{
"epoch": 18.0,
"learning_rate": 1.422364824717766e-05,
"loss": 0.5656,
"step": 151500
},
{
"epoch": 18.06,
"learning_rate": 1.4197240377632536e-05,
"loss": 0.5563,
"step": 152000
},
{
"epoch": 18.12,
"learning_rate": 1.4170885323826502e-05,
"loss": 0.5584,
"step": 152500
},
{
"epoch": 18.18,
"learning_rate": 1.4144477454281378e-05,
"loss": 0.5699,
"step": 153000
},
{
"epoch": 18.24,
"learning_rate": 1.4118069584736254e-05,
"loss": 0.5573,
"step": 153500
},
{
"epoch": 18.3,
"learning_rate": 1.4091661715191126e-05,
"loss": 0.5646,
"step": 154000
},
{
"epoch": 18.36,
"learning_rate": 1.4065306661385094e-05,
"loss": 0.5608,
"step": 154500
},
{
"epoch": 18.42,
"learning_rate": 1.4038951607579059e-05,
"loss": 0.566,
"step": 155000
},
{
"epoch": 18.48,
"learning_rate": 1.4012543738033935e-05,
"loss": 0.5709,
"step": 155500
},
{
"epoch": 18.54,
"learning_rate": 1.398613586848881e-05,
"loss": 0.5665,
"step": 156000
},
{
"epoch": 18.6,
"learning_rate": 1.3959727998943687e-05,
"loss": 0.5642,
"step": 156500
},
{
"epoch": 18.66,
"learning_rate": 1.3933320129398563e-05,
"loss": 0.5647,
"step": 157000
},
{
"epoch": 18.72,
"learning_rate": 1.3906912259853439e-05,
"loss": 0.5716,
"step": 157500
},
{
"epoch": 18.78,
"learning_rate": 1.3880504390308314e-05,
"loss": 0.5673,
"step": 158000
},
{
"epoch": 18.84,
"learning_rate": 1.3854096520763189e-05,
"loss": 0.5671,
"step": 158500
},
{
"epoch": 18.89,
"learning_rate": 1.3827688651218063e-05,
"loss": 0.5657,
"step": 159000
},
{
"epoch": 18.95,
"learning_rate": 1.3801280781672939e-05,
"loss": 0.5629,
"step": 159500
},
{
"epoch": 19.01,
"learning_rate": 1.3774872912127815e-05,
"loss": 0.5674,
"step": 160000
},
{
"epoch": 19.07,
"learning_rate": 1.374846504258269e-05,
"loss": 0.5609,
"step": 160500
},
{
"epoch": 19.13,
"learning_rate": 1.3722057173037567e-05,
"loss": 0.5642,
"step": 161000
},
{
"epoch": 19.19,
"learning_rate": 1.3695649303492443e-05,
"loss": 0.5545,
"step": 161500
},
{
"epoch": 19.25,
"learning_rate": 1.3669241433947318e-05,
"loss": 0.5548,
"step": 162000
},
{
"epoch": 19.31,
"learning_rate": 1.3642833564402194e-05,
"loss": 0.5523,
"step": 162500
},
{
"epoch": 19.37,
"learning_rate": 1.3616425694857069e-05,
"loss": 0.5602,
"step": 163000
},
{
"epoch": 19.43,
"learning_rate": 1.3590017825311943e-05,
"loss": 0.5627,
"step": 163500
},
{
"epoch": 19.49,
"learning_rate": 1.3563609955766819e-05,
"loss": 0.5549,
"step": 164000
},
{
"epoch": 19.55,
"learning_rate": 1.3537202086221695e-05,
"loss": 0.5606,
"step": 164500
},
{
"epoch": 19.61,
"learning_rate": 1.351079421667657e-05,
"loss": 0.5672,
"step": 165000
},
{
"epoch": 19.67,
"learning_rate": 1.3484386347131447e-05,
"loss": 0.5658,
"step": 165500
},
{
"epoch": 19.73,
"learning_rate": 1.3457978477586322e-05,
"loss": 0.5628,
"step": 166000
},
{
"epoch": 19.79,
"learning_rate": 1.3431570608041198e-05,
"loss": 0.5612,
"step": 166500
},
{
"epoch": 19.85,
"learning_rate": 1.3405162738496074e-05,
"loss": 0.5643,
"step": 167000
},
{
"epoch": 19.9,
"learning_rate": 1.337875486895095e-05,
"loss": 0.568,
"step": 167500
},
{
"epoch": 19.96,
"learning_rate": 1.3352346999405823e-05,
"loss": 0.563,
"step": 168000
},
{
"epoch": 20.02,
"learning_rate": 1.332599194559979e-05,
"loss": 0.5639,
"step": 168500
},
{
"epoch": 20.08,
"learning_rate": 1.3299584076054665e-05,
"loss": 0.5526,
"step": 169000
},
{
"epoch": 20.14,
"learning_rate": 1.327317620650954e-05,
"loss": 0.5549,
"step": 169500
},
{
"epoch": 20.2,
"learning_rate": 1.3246768336964417e-05,
"loss": 0.5477,
"step": 170000
},
{
"epoch": 20.26,
"learning_rate": 1.3220360467419293e-05,
"loss": 0.5534,
"step": 170500
},
{
"epoch": 20.32,
"learning_rate": 1.3194005413613257e-05,
"loss": 0.5555,
"step": 171000
},
{
"epoch": 20.38,
"learning_rate": 1.3167597544068133e-05,
"loss": 0.5646,
"step": 171500
},
{
"epoch": 20.44,
"learning_rate": 1.3141189674523009e-05,
"loss": 0.5538,
"step": 172000
},
{
"epoch": 20.5,
"learning_rate": 1.3114781804977885e-05,
"loss": 0.5567,
"step": 172500
},
{
"epoch": 20.56,
"learning_rate": 1.3088373935432761e-05,
"loss": 0.5553,
"step": 173000
},
{
"epoch": 20.62,
"learning_rate": 1.3061966065887635e-05,
"loss": 0.5573,
"step": 173500
},
{
"epoch": 20.68,
"learning_rate": 1.3035611012081601e-05,
"loss": 0.5624,
"step": 174000
},
{
"epoch": 20.74,
"learning_rate": 1.3009255958275566e-05,
"loss": 0.5553,
"step": 174500
},
{
"epoch": 20.8,
"learning_rate": 1.2982848088730442e-05,
"loss": 0.57,
"step": 175000
},
{
"epoch": 20.86,
"learning_rate": 1.2956440219185318e-05,
"loss": 0.5502,
"step": 175500
},
{
"epoch": 20.91,
"learning_rate": 1.2930032349640194e-05,
"loss": 0.5544,
"step": 176000
},
{
"epoch": 20.97,
"learning_rate": 1.290367729583416e-05,
"loss": 0.5627,
"step": 176500
},
{
"epoch": 21.03,
"learning_rate": 1.2877269426289034e-05,
"loss": 0.5574,
"step": 177000
},
{
"epoch": 21.09,
"learning_rate": 1.285086155674391e-05,
"loss": 0.5469,
"step": 177500
},
{
"epoch": 21.15,
"learning_rate": 1.2824453687198786e-05,
"loss": 0.5495,
"step": 178000
},
{
"epoch": 21.21,
"learning_rate": 1.2798045817653662e-05,
"loss": 0.5523,
"step": 178500
},
{
"epoch": 21.27,
"learning_rate": 1.2771690763847628e-05,
"loss": 0.5467,
"step": 179000
},
{
"epoch": 21.33,
"learning_rate": 1.2745282894302504e-05,
"loss": 0.5487,
"step": 179500
},
{
"epoch": 21.39,
"learning_rate": 1.2718875024757378e-05,
"loss": 0.546,
"step": 180000
},
{
"epoch": 21.45,
"learning_rate": 1.2692467155212254e-05,
"loss": 0.5461,
"step": 180500
},
{
"epoch": 21.51,
"learning_rate": 1.266605928566713e-05,
"loss": 0.554,
"step": 181000
},
{
"epoch": 21.57,
"learning_rate": 1.2639704231861096e-05,
"loss": 0.5493,
"step": 181500
},
{
"epoch": 21.63,
"learning_rate": 1.2613296362315972e-05,
"loss": 0.5548,
"step": 182000
},
{
"epoch": 21.69,
"learning_rate": 1.2586888492770846e-05,
"loss": 0.5503,
"step": 182500
},
{
"epoch": 21.75,
"learning_rate": 1.256048062322572e-05,
"loss": 0.5604,
"step": 183000
},
{
"epoch": 21.81,
"learning_rate": 1.2534072753680597e-05,
"loss": 0.5622,
"step": 183500
},
{
"epoch": 21.87,
"learning_rate": 1.2507664884135473e-05,
"loss": 0.549,
"step": 184000
},
{
"epoch": 21.93,
"learning_rate": 1.2481257014590348e-05,
"loss": 0.5535,
"step": 184500
},
{
"epoch": 21.98,
"learning_rate": 1.2454849145045224e-05,
"loss": 0.5572,
"step": 185000
},
{
"epoch": 22.04,
"learning_rate": 1.24284412755001e-05,
"loss": 0.5508,
"step": 185500
},
{
"epoch": 22.1,
"learning_rate": 1.2402033405954976e-05,
"loss": 0.5454,
"step": 186000
},
{
"epoch": 22.16,
"learning_rate": 1.237567835214894e-05,
"loss": 0.551,
"step": 186500
},
{
"epoch": 22.22,
"learning_rate": 1.2349270482603817e-05,
"loss": 0.5454,
"step": 187000
},
{
"epoch": 22.28,
"learning_rate": 1.2322862613058693e-05,
"loss": 0.5445,
"step": 187500
},
{
"epoch": 22.34,
"learning_rate": 1.2296454743513569e-05,
"loss": 0.5493,
"step": 188000
},
{
"epoch": 22.4,
"learning_rate": 1.2270046873968444e-05,
"loss": 0.5429,
"step": 188500
},
{
"epoch": 22.46,
"learning_rate": 1.224363900442332e-05,
"loss": 0.5507,
"step": 189000
},
{
"epoch": 22.52,
"learning_rate": 1.2217231134878196e-05,
"loss": 0.5439,
"step": 189500
},
{
"epoch": 22.58,
"learning_rate": 1.2190823265333069e-05,
"loss": 0.5484,
"step": 190000
},
{
"epoch": 22.64,
"learning_rate": 1.2164468211527037e-05,
"loss": 0.5467,
"step": 190500
},
{
"epoch": 22.7,
"learning_rate": 1.2138113157721001e-05,
"loss": 0.5501,
"step": 191000
},
{
"epoch": 22.76,
"learning_rate": 1.2111705288175877e-05,
"loss": 0.5495,
"step": 191500
},
{
"epoch": 22.82,
"learning_rate": 1.2085297418630753e-05,
"loss": 0.552,
"step": 192000
},
{
"epoch": 22.88,
"learning_rate": 1.205894236482472e-05,
"loss": 0.548,
"step": 192500
},
{
"epoch": 22.94,
"learning_rate": 1.2032534495279595e-05,
"loss": 0.5501,
"step": 193000
},
{
"epoch": 22.99,
"learning_rate": 1.200617944147356e-05,
"loss": 0.5499,
"step": 193500
},
{
"epoch": 23.05,
"learning_rate": 1.1979771571928436e-05,
"loss": 0.5376,
"step": 194000
},
{
"epoch": 23.11,
"learning_rate": 1.1953363702383312e-05,
"loss": 0.5413,
"step": 194500
},
{
"epoch": 23.17,
"learning_rate": 1.1926955832838188e-05,
"loss": 0.5504,
"step": 195000
},
{
"epoch": 23.23,
"learning_rate": 1.1900547963293064e-05,
"loss": 0.5448,
"step": 195500
},
{
"epoch": 23.29,
"learning_rate": 1.1874140093747936e-05,
"loss": 0.553,
"step": 196000
},
{
"epoch": 23.35,
"learning_rate": 1.1847732224202812e-05,
"loss": 0.5535,
"step": 196500
},
{
"epoch": 23.41,
"learning_rate": 1.1821324354657688e-05,
"loss": 0.5449,
"step": 197000
},
{
"epoch": 23.47,
"learning_rate": 1.1794916485112564e-05,
"loss": 0.5452,
"step": 197500
},
{
"epoch": 23.53,
"learning_rate": 1.176850861556744e-05,
"loss": 0.5496,
"step": 198000
},
{
"epoch": 23.59,
"learning_rate": 1.1742100746022316e-05,
"loss": 0.5528,
"step": 198500
},
{
"epoch": 23.65,
"learning_rate": 1.1715692876477192e-05,
"loss": 0.5388,
"step": 199000
},
{
"epoch": 23.71,
"learning_rate": 1.1689337822671156e-05,
"loss": 0.5412,
"step": 199500
},
{
"epoch": 23.77,
"learning_rate": 1.1662929953126032e-05,
"loss": 0.5494,
"step": 200000
},
{
"epoch": 23.83,
"learning_rate": 1.1636522083580908e-05,
"loss": 0.5403,
"step": 200500
},
{
"epoch": 23.89,
"learning_rate": 1.1610114214035784e-05,
"loss": 0.5522,
"step": 201000
},
{
"epoch": 23.95,
"learning_rate": 1.1583759160229748e-05,
"loss": 0.5522,
"step": 201500
},
{
"epoch": 24.0,
"learning_rate": 1.1557351290684624e-05,
"loss": 0.5405,
"step": 202000
},
{
"epoch": 24.06,
"learning_rate": 1.15309434211395e-05,
"loss": 0.5371,
"step": 202500
},
{
"epoch": 24.12,
"learning_rate": 1.1504588367333466e-05,
"loss": 0.5378,
"step": 203000
},
{
"epoch": 24.18,
"learning_rate": 1.1478180497788342e-05,
"loss": 0.543,
"step": 203500
},
{
"epoch": 24.24,
"learning_rate": 1.1451772628243218e-05,
"loss": 0.5359,
"step": 204000
},
{
"epoch": 24.3,
"learning_rate": 1.1425364758698093e-05,
"loss": 0.5344,
"step": 204500
},
{
"epoch": 24.36,
"learning_rate": 1.1398956889152968e-05,
"loss": 0.548,
"step": 205000
},
{
"epoch": 24.42,
"learning_rate": 1.1372549019607844e-05,
"loss": 0.5406,
"step": 205500
},
{
"epoch": 24.48,
"learning_rate": 1.134619396580181e-05,
"loss": 0.5398,
"step": 206000
},
{
"epoch": 24.54,
"learning_rate": 1.1319786096256687e-05,
"loss": 0.5322,
"step": 206500
},
{
"epoch": 24.6,
"learning_rate": 1.1293378226711559e-05,
"loss": 0.5327,
"step": 207000
},
{
"epoch": 24.66,
"learning_rate": 1.1266970357166435e-05,
"loss": 0.5395,
"step": 207500
},
{
"epoch": 24.72,
"learning_rate": 1.1240615303360403e-05,
"loss": 0.5401,
"step": 208000
},
{
"epoch": 24.78,
"learning_rate": 1.1214207433815279e-05,
"loss": 0.5446,
"step": 208500
},
{
"epoch": 24.84,
"learning_rate": 1.1187852380009243e-05,
"loss": 0.5431,
"step": 209000
},
{
"epoch": 24.9,
"learning_rate": 1.116144451046412e-05,
"loss": 0.543,
"step": 209500
},
{
"epoch": 24.96,
"learning_rate": 1.1135036640918995e-05,
"loss": 0.541,
"step": 210000
},
{
"epoch": 25.01,
"learning_rate": 1.1108628771373871e-05,
"loss": 0.5417,
"step": 210500
},
{
"epoch": 25.07,
"learning_rate": 1.1082273717567836e-05,
"loss": 0.5306,
"step": 211000
},
{
"epoch": 25.13,
"learning_rate": 1.1055865848022712e-05,
"loss": 0.5355,
"step": 211500
},
{
"epoch": 25.19,
"learning_rate": 1.1029457978477588e-05,
"loss": 0.5358,
"step": 212000
},
{
"epoch": 25.25,
"learning_rate": 1.1003050108932463e-05,
"loss": 0.5386,
"step": 212500
},
{
"epoch": 25.31,
"learning_rate": 1.0976642239387338e-05,
"loss": 0.54,
"step": 213000
},
{
"epoch": 25.37,
"learning_rate": 1.0950287185581304e-05,
"loss": 0.5376,
"step": 213500
},
{
"epoch": 25.43,
"learning_rate": 1.0923879316036178e-05,
"loss": 0.5369,
"step": 214000
},
{
"epoch": 25.49,
"learning_rate": 1.0897471446491054e-05,
"loss": 0.5352,
"step": 214500
},
{
"epoch": 25.55,
"learning_rate": 1.087106357694593e-05,
"loss": 0.5446,
"step": 215000
},
{
"epoch": 25.61,
"learning_rate": 1.0844655707400806e-05,
"loss": 0.5399,
"step": 215500
},
{
"epoch": 25.67,
"learning_rate": 1.0818247837855682e-05,
"loss": 0.5382,
"step": 216000
},
{
"epoch": 25.73,
"learning_rate": 1.0791839968310558e-05,
"loss": 0.5416,
"step": 216500
},
{
"epoch": 25.79,
"learning_rate": 1.0765484914504522e-05,
"loss": 0.5414,
"step": 217000
},
{
"epoch": 25.85,
"learning_rate": 1.0739077044959398e-05,
"loss": 0.5394,
"step": 217500
},
{
"epoch": 25.91,
"learning_rate": 1.0712669175414274e-05,
"loss": 0.5451,
"step": 218000
},
{
"epoch": 25.97,
"learning_rate": 1.068626130586915e-05,
"loss": 0.5402,
"step": 218500
},
{
"epoch": 26.02,
"learning_rate": 1.0659853436324026e-05,
"loss": 0.5345,
"step": 219000
},
{
"epoch": 26.08,
"learning_rate": 1.063349838251799e-05,
"loss": 0.5235,
"step": 219500
},
{
"epoch": 26.14,
"learning_rate": 1.0607090512972866e-05,
"loss": 0.5328,
"step": 220000
},
{
"epoch": 26.2,
"learning_rate": 1.0580682643427742e-05,
"loss": 0.5376,
"step": 220500
},
{
"epoch": 26.26,
"learning_rate": 1.0554274773882618e-05,
"loss": 0.5319,
"step": 221000
},
{
"epoch": 26.32,
"learning_rate": 1.0527866904337494e-05,
"loss": 0.5324,
"step": 221500
},
{
"epoch": 26.38,
"learning_rate": 1.050145903479237e-05,
"loss": 0.5334,
"step": 222000
},
{
"epoch": 26.44,
"learning_rate": 1.0475051165247246e-05,
"loss": 0.5338,
"step": 222500
},
{
"epoch": 26.5,
"learning_rate": 1.044864329570212e-05,
"loss": 0.5367,
"step": 223000
},
{
"epoch": 26.56,
"learning_rate": 1.0422235426156994e-05,
"loss": 0.5337,
"step": 223500
},
{
"epoch": 26.62,
"learning_rate": 1.039582755661187e-05,
"loss": 0.5306,
"step": 224000
},
{
"epoch": 26.68,
"learning_rate": 1.0369419687066746e-05,
"loss": 0.5417,
"step": 224500
},
{
"epoch": 26.74,
"learning_rate": 1.0343011817521622e-05,
"loss": 0.534,
"step": 225000
},
{
"epoch": 26.8,
"learning_rate": 1.0316603947976498e-05,
"loss": 0.5341,
"step": 225500
},
{
"epoch": 26.86,
"learning_rate": 1.0290196078431374e-05,
"loss": 0.535,
"step": 226000
},
{
"epoch": 26.92,
"learning_rate": 1.0263841024625339e-05,
"loss": 0.5414,
"step": 226500
},
{
"epoch": 26.98,
"learning_rate": 1.0237433155080215e-05,
"loss": 0.5375,
"step": 227000
},
{
"epoch": 27.04,
"learning_rate": 1.021102528553509e-05,
"loss": 0.5268,
"step": 227500
},
{
"epoch": 27.09,
"learning_rate": 1.0184617415989966e-05,
"loss": 0.5277,
"step": 228000
},
{
"epoch": 27.15,
"learning_rate": 1.0158262362183933e-05,
"loss": 0.5304,
"step": 228500
},
{
"epoch": 27.21,
"learning_rate": 1.0131854492638807e-05,
"loss": 0.5291,
"step": 229000
},
{
"epoch": 27.27,
"learning_rate": 1.0105446623093683e-05,
"loss": 0.5287,
"step": 229500
},
{
"epoch": 27.33,
"learning_rate": 1.0079091569287649e-05,
"loss": 0.5346,
"step": 230000
},
{
"epoch": 27.39,
"learning_rate": 1.0052683699742525e-05,
"loss": 0.5357,
"step": 230500
},
{
"epoch": 27.45,
"learning_rate": 1.0026275830197401e-05,
"loss": 0.5317,
"step": 231000
},
{
"epoch": 27.51,
"learning_rate": 9.999920776391365e-06,
"loss": 0.5187,
"step": 231500
},
{
"epoch": 27.57,
"learning_rate": 9.973512906846241e-06,
"loss": 0.5361,
"step": 232000
},
{
"epoch": 27.63,
"learning_rate": 9.947105037301117e-06,
"loss": 0.5361,
"step": 232500
},
{
"epoch": 27.69,
"learning_rate": 9.920697167755991e-06,
"loss": 0.5292,
"step": 233000
},
{
"epoch": 27.75,
"learning_rate": 9.894289298210867e-06,
"loss": 0.5317,
"step": 233500
},
{
"epoch": 27.81,
"learning_rate": 9.867881428665743e-06,
"loss": 0.5294,
"step": 234000
},
{
"epoch": 27.87,
"learning_rate": 9.84147355912062e-06,
"loss": 0.5348,
"step": 234500
},
{
"epoch": 27.93,
"learning_rate": 9.815065689575495e-06,
"loss": 0.5443,
"step": 235000
},
{
"epoch": 27.99,
"learning_rate": 9.788710635769461e-06,
"loss": 0.5355,
"step": 235500
},
{
"epoch": 28.05,
"learning_rate": 9.762302766224336e-06,
"loss": 0.5297,
"step": 236000
},
{
"epoch": 28.1,
"learning_rate": 9.735894896679212e-06,
"loss": 0.5306,
"step": 236500
},
{
"epoch": 28.16,
"learning_rate": 9.709487027134087e-06,
"loss": 0.5251,
"step": 237000
},
{
"epoch": 28.22,
"learning_rate": 9.683079157588963e-06,
"loss": 0.5255,
"step": 237500
},
{
"epoch": 28.28,
"learning_rate": 9.656671288043838e-06,
"loss": 0.5222,
"step": 238000
},
{
"epoch": 28.34,
"learning_rate": 9.630263418498714e-06,
"loss": 0.5281,
"step": 238500
},
{
"epoch": 28.4,
"learning_rate": 9.60385554895359e-06,
"loss": 0.5262,
"step": 239000
},
{
"epoch": 28.46,
"learning_rate": 9.577447679408465e-06,
"loss": 0.5245,
"step": 239500
},
{
"epoch": 28.52,
"learning_rate": 9.551039809863341e-06,
"loss": 0.5321,
"step": 240000
},
{
"epoch": 28.58,
"learning_rate": 9.524684756057306e-06,
"loss": 0.5354,
"step": 240500
},
{
"epoch": 28.64,
"learning_rate": 9.498276886512182e-06,
"loss": 0.5264,
"step": 241000
},
{
"epoch": 28.7,
"learning_rate": 9.471869016967058e-06,
"loss": 0.5244,
"step": 241500
},
{
"epoch": 28.76,
"learning_rate": 9.445461147421932e-06,
"loss": 0.5337,
"step": 242000
},
{
"epoch": 28.82,
"learning_rate": 9.419106093615898e-06,
"loss": 0.529,
"step": 242500
},
{
"epoch": 28.88,
"learning_rate": 9.392698224070774e-06,
"loss": 0.5317,
"step": 243000
},
{
"epoch": 28.94,
"learning_rate": 9.366290354525648e-06,
"loss": 0.5285,
"step": 243500
},
{
"epoch": 29.0,
"learning_rate": 9.339882484980524e-06,
"loss": 0.5338,
"step": 244000
},
{
"epoch": 29.06,
"learning_rate": 9.3134746154354e-06,
"loss": 0.5181,
"step": 244500
},
{
"epoch": 29.11,
"learning_rate": 9.287066745890276e-06,
"loss": 0.528,
"step": 245000
},
{
"epoch": 29.17,
"learning_rate": 9.260658876345152e-06,
"loss": 0.5173,
"step": 245500
},
{
"epoch": 29.23,
"learning_rate": 9.234251006800026e-06,
"loss": 0.5261,
"step": 246000
},
{
"epoch": 29.29,
"learning_rate": 9.207895952993992e-06,
"loss": 0.5258,
"step": 246500
},
{
"epoch": 29.35,
"learning_rate": 9.181488083448868e-06,
"loss": 0.5252,
"step": 247000
},
{
"epoch": 29.41,
"learning_rate": 9.155080213903744e-06,
"loss": 0.5298,
"step": 247500
},
{
"epoch": 29.47,
"learning_rate": 9.12867234435862e-06,
"loss": 0.519,
"step": 248000
},
{
"epoch": 29.53,
"learning_rate": 9.102317290552586e-06,
"loss": 0.5197,
"step": 248500
},
{
"epoch": 29.59,
"learning_rate": 9.07590942100746e-06,
"loss": 0.5199,
"step": 249000
},
{
"epoch": 29.65,
"learning_rate": 9.049501551462337e-06,
"loss": 0.5279,
"step": 249500
},
{
"epoch": 29.71,
"learning_rate": 9.023093681917212e-06,
"loss": 0.5245,
"step": 250000
},
{
"epoch": 29.77,
"learning_rate": 8.996738628111179e-06,
"loss": 0.5243,
"step": 250500
},
{
"epoch": 29.83,
"learning_rate": 8.970330758566053e-06,
"loss": 0.5296,
"step": 251000
},
{
"epoch": 29.89,
"learning_rate": 8.94397570476002e-06,
"loss": 0.5266,
"step": 251500
},
{
"epoch": 29.95,
"learning_rate": 8.917567835214893e-06,
"loss": 0.5282,
"step": 252000
},
{
"epoch": 30.01,
"learning_rate": 8.89115996566977e-06,
"loss": 0.5299,
"step": 252500
},
{
"epoch": 30.07,
"learning_rate": 8.864752096124645e-06,
"loss": 0.5238,
"step": 253000
},
{
"epoch": 30.12,
"learning_rate": 8.838344226579521e-06,
"loss": 0.5256,
"step": 253500
},
{
"epoch": 30.18,
"learning_rate": 8.811936357034397e-06,
"loss": 0.522,
"step": 254000
},
{
"epoch": 30.24,
"learning_rate": 8.785528487489271e-06,
"loss": 0.5249,
"step": 254500
},
{
"epoch": 30.3,
"learning_rate": 8.759120617944147e-06,
"loss": 0.5206,
"step": 255000
},
{
"epoch": 30.36,
"learning_rate": 8.732712748399023e-06,
"loss": 0.5193,
"step": 255500
},
{
"epoch": 30.42,
"learning_rate": 8.70635769459299e-06,
"loss": 0.529,
"step": 256000
},
{
"epoch": 30.48,
"learning_rate": 8.680002640786956e-06,
"loss": 0.5159,
"step": 256500
},
{
"epoch": 30.54,
"learning_rate": 8.653594771241832e-06,
"loss": 0.5337,
"step": 257000
},
{
"epoch": 30.6,
"learning_rate": 8.627186901696706e-06,
"loss": 0.5298,
"step": 257500
},
{
"epoch": 30.66,
"learning_rate": 8.600779032151582e-06,
"loss": 0.5296,
"step": 258000
},
{
"epoch": 30.72,
"learning_rate": 8.574371162606458e-06,
"loss": 0.5227,
"step": 258500
},
{
"epoch": 30.78,
"learning_rate": 8.547963293061334e-06,
"loss": 0.5296,
"step": 259000
},
{
"epoch": 30.84,
"learning_rate": 8.52155542351621e-06,
"loss": 0.53,
"step": 259500
},
{
"epoch": 30.9,
"learning_rate": 8.495147553971084e-06,
"loss": 0.5256,
"step": 260000
},
{
"epoch": 30.96,
"learning_rate": 8.46873968442596e-06,
"loss": 0.5272,
"step": 260500
},
{
"epoch": 31.02,
"learning_rate": 8.442384630619926e-06,
"loss": 0.5294,
"step": 261000
},
{
"epoch": 31.08,
"learning_rate": 8.415976761074802e-06,
"loss": 0.511,
"step": 261500
},
{
"epoch": 31.13,
"learning_rate": 8.389568891529678e-06,
"loss": 0.5233,
"step": 262000
},
{
"epoch": 31.19,
"learning_rate": 8.363161021984552e-06,
"loss": 0.5217,
"step": 262500
},
{
"epoch": 31.25,
"learning_rate": 8.336753152439428e-06,
"loss": 0.5162,
"step": 263000
},
{
"epoch": 31.31,
"learning_rate": 8.310345282894304e-06,
"loss": 0.5129,
"step": 263500
},
{
"epoch": 31.37,
"learning_rate": 8.28393741334918e-06,
"loss": 0.5178,
"step": 264000
},
{
"epoch": 31.43,
"learning_rate": 8.257529543804056e-06,
"loss": 0.5155,
"step": 264500
},
{
"epoch": 31.49,
"learning_rate": 8.23112167425893e-06,
"loss": 0.5211,
"step": 265000
},
{
"epoch": 31.55,
"learning_rate": 8.204819436191986e-06,
"loss": 0.5248,
"step": 265500
},
{
"epoch": 31.61,
"learning_rate": 8.17841156664686e-06,
"loss": 0.5344,
"step": 266000
},
{
"epoch": 31.67,
"learning_rate": 8.152003697101737e-06,
"loss": 0.5237,
"step": 266500
},
{
"epoch": 31.73,
"learning_rate": 8.125595827556612e-06,
"loss": 0.5172,
"step": 267000
},
{
"epoch": 31.79,
"learning_rate": 8.099240773750579e-06,
"loss": 0.5237,
"step": 267500
},
{
"epoch": 31.85,
"learning_rate": 8.072832904205455e-06,
"loss": 0.5229,
"step": 268000
},
{
"epoch": 31.91,
"learning_rate": 8.046425034660329e-06,
"loss": 0.5186,
"step": 268500
},
{
"epoch": 31.97,
"learning_rate": 8.020069980854295e-06,
"loss": 0.5287,
"step": 269000
},
{
"epoch": 32.03,
"learning_rate": 7.993662111309171e-06,
"loss": 0.527,
"step": 269500
},
{
"epoch": 32.09,
"learning_rate": 7.967254241764047e-06,
"loss": 0.5139,
"step": 270000
},
{
"epoch": 32.14,
"learning_rate": 7.940846372218923e-06,
"loss": 0.5184,
"step": 270500
},
{
"epoch": 32.2,
"learning_rate": 7.914438502673799e-06,
"loss": 0.5189,
"step": 271000
},
{
"epoch": 32.26,
"learning_rate": 7.888030633128673e-06,
"loss": 0.5221,
"step": 271500
},
{
"epoch": 32.32,
"learning_rate": 7.861622763583549e-06,
"loss": 0.5176,
"step": 272000
},
{
"epoch": 32.38,
"learning_rate": 7.835267709777515e-06,
"loss": 0.5134,
"step": 272500
},
{
"epoch": 32.44,
"learning_rate": 7.80885984023239e-06,
"loss": 0.5187,
"step": 273000
},
{
"epoch": 32.5,
"learning_rate": 7.782451970687265e-06,
"loss": 0.522,
"step": 273500
},
{
"epoch": 32.56,
"learning_rate": 7.756044101142141e-06,
"loss": 0.5151,
"step": 274000
},
{
"epoch": 32.62,
"learning_rate": 7.729636231597017e-06,
"loss": 0.5237,
"step": 274500
},
{
"epoch": 32.68,
"learning_rate": 7.703228362051893e-06,
"loss": 0.5176,
"step": 275000
},
{
"epoch": 32.74,
"learning_rate": 7.676820492506767e-06,
"loss": 0.5227,
"step": 275500
},
{
"epoch": 32.8,
"learning_rate": 7.650412622961643e-06,
"loss": 0.5162,
"step": 276000
},
{
"epoch": 32.86,
"learning_rate": 7.624004753416518e-06,
"loss": 0.521,
"step": 276500
},
{
"epoch": 32.92,
"learning_rate": 7.597596883871394e-06,
"loss": 0.5231,
"step": 277000
},
{
"epoch": 32.98,
"learning_rate": 7.57124183006536e-06,
"loss": 0.5198,
"step": 277500
},
{
"epoch": 33.04,
"learning_rate": 7.5448339605202355e-06,
"loss": 0.5086,
"step": 278000
},
{
"epoch": 33.1,
"learning_rate": 7.518426090975111e-06,
"loss": 0.5124,
"step": 278500
},
{
"epoch": 33.16,
"learning_rate": 7.4920182214299865e-06,
"loss": 0.5175,
"step": 279000
},
{
"epoch": 33.21,
"learning_rate": 7.465663167623952e-06,
"loss": 0.5131,
"step": 279500
},
{
"epoch": 33.27,
"learning_rate": 7.439255298078828e-06,
"loss": 0.5114,
"step": 280000
},
{
"epoch": 33.33,
"learning_rate": 7.412847428533704e-06,
"loss": 0.5118,
"step": 280500
},
{
"epoch": 33.39,
"learning_rate": 7.38643955898858e-06,
"loss": 0.5116,
"step": 281000
},
{
"epoch": 33.45,
"learning_rate": 7.360084505182545e-06,
"loss": 0.5197,
"step": 281500
},
{
"epoch": 33.51,
"learning_rate": 7.33367663563742e-06,
"loss": 0.5161,
"step": 282000
},
{
"epoch": 33.57,
"learning_rate": 7.307268766092296e-06,
"loss": 0.5166,
"step": 282500
},
{
"epoch": 33.63,
"learning_rate": 7.280913712286261e-06,
"loss": 0.5203,
"step": 283000
},
{
"epoch": 33.69,
"learning_rate": 7.254505842741137e-06,
"loss": 0.513,
"step": 283500
},
{
"epoch": 33.75,
"learning_rate": 7.228097973196013e-06,
"loss": 0.5139,
"step": 284000
},
{
"epoch": 33.81,
"learning_rate": 7.201690103650889e-06,
"loss": 0.5123,
"step": 284500
},
{
"epoch": 33.87,
"learning_rate": 7.175282234105763e-06,
"loss": 0.5191,
"step": 285000
},
{
"epoch": 33.93,
"learning_rate": 7.148874364560639e-06,
"loss": 0.5228,
"step": 285500
},
{
"epoch": 33.99,
"learning_rate": 7.122466495015515e-06,
"loss": 0.5165,
"step": 286000
},
{
"epoch": 34.05,
"learning_rate": 7.096058625470391e-06,
"loss": 0.5089,
"step": 286500
},
{
"epoch": 34.11,
"learning_rate": 7.069650755925266e-06,
"loss": 0.5066,
"step": 287000
},
{
"epoch": 34.17,
"learning_rate": 7.043242886380141e-06,
"loss": 0.514,
"step": 287500
},
{
"epoch": 34.22,
"learning_rate": 7.0168878325741075e-06,
"loss": 0.5099,
"step": 288000
},
{
"epoch": 34.28,
"learning_rate": 6.9904799630289834e-06,
"loss": 0.5124,
"step": 288500
},
{
"epoch": 34.34,
"learning_rate": 6.964072093483859e-06,
"loss": 0.5097,
"step": 289000
},
{
"epoch": 34.4,
"learning_rate": 6.9376642239387344e-06,
"loss": 0.5155,
"step": 289500
},
{
"epoch": 34.46,
"learning_rate": 6.9112563543936095e-06,
"loss": 0.5146,
"step": 290000
},
{
"epoch": 34.52,
"learning_rate": 6.8848484848484854e-06,
"loss": 0.5178,
"step": 290500
},
{
"epoch": 34.58,
"learning_rate": 6.858493431042451e-06,
"loss": 0.5158,
"step": 291000
},
{
"epoch": 34.64,
"learning_rate": 6.832085561497327e-06,
"loss": 0.5156,
"step": 291500
},
{
"epoch": 34.7,
"learning_rate": 6.805677691952203e-06,
"loss": 0.5132,
"step": 292000
},
{
"epoch": 34.76,
"learning_rate": 6.779322638146169e-06,
"loss": 0.5171,
"step": 292500
},
{
"epoch": 34.82,
"learning_rate": 6.752914768601043e-06,
"loss": 0.5186,
"step": 293000
},
{
"epoch": 34.88,
"learning_rate": 6.726506899055919e-06,
"loss": 0.5098,
"step": 293500
},
{
"epoch": 34.94,
"learning_rate": 6.700099029510795e-06,
"loss": 0.5044,
"step": 294000
},
{
"epoch": 35.0,
"learning_rate": 6.673691159965671e-06,
"loss": 0.5106,
"step": 294500
},
{
"epoch": 35.06,
"learning_rate": 6.647336106159636e-06,
"loss": 0.5083,
"step": 295000
},
{
"epoch": 35.12,
"learning_rate": 6.620928236614511e-06,
"loss": 0.5165,
"step": 295500
},
{
"epoch": 35.18,
"learning_rate": 6.594520367069387e-06,
"loss": 0.5091,
"step": 296000
},
{
"epoch": 35.23,
"learning_rate": 6.568112497524262e-06,
"loss": 0.506,
"step": 296500
},
{
"epoch": 35.29,
"learning_rate": 6.541704627979138e-06,
"loss": 0.5113,
"step": 297000
},
{
"epoch": 35.35,
"learning_rate": 6.515296758434014e-06,
"loss": 0.5108,
"step": 297500
},
{
"epoch": 35.41,
"learning_rate": 6.488888888888889e-06,
"loss": 0.5115,
"step": 298000
},
{
"epoch": 35.47,
"learning_rate": 6.462533835082855e-06,
"loss": 0.512,
"step": 298500
},
{
"epoch": 35.53,
"learning_rate": 6.4361259655377306e-06,
"loss": 0.5078,
"step": 299000
},
{
"epoch": 35.59,
"learning_rate": 6.4097180959926065e-06,
"loss": 0.5146,
"step": 299500
},
{
"epoch": 35.65,
"learning_rate": 6.383310226447482e-06,
"loss": 0.5133,
"step": 300000
},
{
"epoch": 35.71,
"learning_rate": 6.356902356902357e-06,
"loss": 0.5201,
"step": 300500
},
{
"epoch": 35.77,
"learning_rate": 6.3304944873572326e-06,
"loss": 0.5132,
"step": 301000
},
{
"epoch": 35.83,
"learning_rate": 6.304139433551199e-06,
"loss": 0.5139,
"step": 301500
},
{
"epoch": 35.89,
"learning_rate": 6.277731564006075e-06,
"loss": 0.5084,
"step": 302000
},
{
"epoch": 35.95,
"learning_rate": 6.25132369446095e-06,
"loss": 0.5155,
"step": 302500
},
{
"epoch": 36.01,
"learning_rate": 6.224915824915826e-06,
"loss": 0.5168,
"step": 303000
},
{
"epoch": 36.07,
"learning_rate": 6.198507955370701e-06,
"loss": 0.5004,
"step": 303500
},
{
"epoch": 36.13,
"learning_rate": 6.172100085825577e-06,
"loss": 0.5142,
"step": 304000
},
{
"epoch": 36.19,
"learning_rate": 6.1458506634977235e-06,
"loss": 0.5038,
"step": 304500
},
{
"epoch": 36.24,
"learning_rate": 6.119442793952598e-06,
"loss": 0.5157,
"step": 305000
},
{
"epoch": 36.3,
"learning_rate": 6.093034924407474e-06,
"loss": 0.5114,
"step": 305500
},
{
"epoch": 36.36,
"learning_rate": 6.06662705486235e-06,
"loss": 0.5136,
"step": 306000
},
{
"epoch": 36.42,
"learning_rate": 6.0402191853172255e-06,
"loss": 0.5034,
"step": 306500
},
{
"epoch": 36.48,
"learning_rate": 6.0138113157721e-06,
"loss": 0.517,
"step": 307000
},
{
"epoch": 36.54,
"learning_rate": 5.987456261966066e-06,
"loss": 0.516,
"step": 307500
},
{
"epoch": 36.6,
"learning_rate": 5.961048392420942e-06,
"loss": 0.5104,
"step": 308000
},
{
"epoch": 36.66,
"learning_rate": 5.934640522875818e-06,
"loss": 0.5108,
"step": 308500
},
{
"epoch": 36.72,
"learning_rate": 5.908232653330693e-06,
"loss": 0.5094,
"step": 309000
},
{
"epoch": 36.78,
"learning_rate": 5.881824783785569e-06,
"loss": 0.5145,
"step": 309500
},
{
"epoch": 36.84,
"learning_rate": 5.855416914240444e-06,
"loss": 0.5176,
"step": 310000
},
{
"epoch": 36.9,
"learning_rate": 5.82900904469532e-06,
"loss": 0.5082,
"step": 310500
},
{
"epoch": 36.96,
"learning_rate": 5.802601175150196e-06,
"loss": 0.5141,
"step": 311000
},
{
"epoch": 37.02,
"learning_rate": 5.776193305605071e-06,
"loss": 0.5116,
"step": 311500
},
{
"epoch": 37.08,
"learning_rate": 5.749785436059946e-06,
"loss": 0.5058,
"step": 312000
},
{
"epoch": 37.14,
"learning_rate": 5.723377566514822e-06,
"loss": 0.5042,
"step": 312500
},
{
"epoch": 37.2,
"learning_rate": 5.696969696969698e-06,
"loss": 0.5097,
"step": 313000
},
{
"epoch": 37.25,
"learning_rate": 5.670614643163663e-06,
"loss": 0.5118,
"step": 313500
},
{
"epoch": 37.31,
"learning_rate": 5.644206773618539e-06,
"loss": 0.5056,
"step": 314000
},
{
"epoch": 37.37,
"learning_rate": 5.617798904073415e-06,
"loss": 0.5143,
"step": 314500
},
{
"epoch": 37.43,
"learning_rate": 5.59144385026738e-06,
"loss": 0.516,
"step": 315000
},
{
"epoch": 37.49,
"learning_rate": 5.565035980722255e-06,
"loss": 0.5028,
"step": 315500
},
{
"epoch": 37.55,
"learning_rate": 5.538628111177131e-06,
"loss": 0.5079,
"step": 316000
},
{
"epoch": 37.61,
"learning_rate": 5.512220241632007e-06,
"loss": 0.5101,
"step": 316500
},
{
"epoch": 37.67,
"learning_rate": 5.485812372086883e-06,
"loss": 0.5101,
"step": 317000
},
{
"epoch": 37.73,
"learning_rate": 5.459404502541757e-06,
"loss": 0.5077,
"step": 317500
},
{
"epoch": 37.79,
"learning_rate": 5.432996632996633e-06,
"loss": 0.5094,
"step": 318000
},
{
"epoch": 37.85,
"learning_rate": 5.406588763451509e-06,
"loss": 0.5128,
"step": 318500
},
{
"epoch": 37.91,
"learning_rate": 5.380180893906385e-06,
"loss": 0.5121,
"step": 319000
},
{
"epoch": 37.97,
"learning_rate": 5.353773024361259e-06,
"loss": 0.5071,
"step": 319500
},
{
"epoch": 38.03,
"learning_rate": 5.327365154816135e-06,
"loss": 0.5088,
"step": 320000
},
{
"epoch": 38.09,
"learning_rate": 5.300957285271011e-06,
"loss": 0.5037,
"step": 320500
},
{
"epoch": 38.15,
"learning_rate": 5.274549415725887e-06,
"loss": 0.5083,
"step": 321000
},
{
"epoch": 38.21,
"learning_rate": 5.248141546180763e-06,
"loss": 0.5101,
"step": 321500
},
{
"epoch": 38.26,
"learning_rate": 5.221733676635637e-06,
"loss": 0.5046,
"step": 322000
},
{
"epoch": 38.32,
"learning_rate": 5.195325807090513e-06,
"loss": 0.5021,
"step": 322500
},
{
"epoch": 38.38,
"learning_rate": 5.169023569023569e-06,
"loss": 0.5116,
"step": 323000
},
{
"epoch": 38.44,
"learning_rate": 5.142615699478445e-06,
"loss": 0.5018,
"step": 323500
},
{
"epoch": 38.5,
"learning_rate": 5.116207829933321e-06,
"loss": 0.5087,
"step": 324000
},
{
"epoch": 38.56,
"learning_rate": 5.089799960388197e-06,
"loss": 0.5033,
"step": 324500
},
{
"epoch": 38.62,
"learning_rate": 5.063392090843071e-06,
"loss": 0.515,
"step": 325000
},
{
"epoch": 38.68,
"learning_rate": 5.036984221297947e-06,
"loss": 0.5031,
"step": 325500
},
{
"epoch": 38.74,
"learning_rate": 5.010576351752823e-06,
"loss": 0.5077,
"step": 326000
},
{
"epoch": 38.8,
"learning_rate": 4.984168482207699e-06,
"loss": 0.5142,
"step": 326500
},
{
"epoch": 38.86,
"learning_rate": 4.957760612662574e-06,
"loss": 0.5064,
"step": 327000
},
{
"epoch": 38.92,
"learning_rate": 4.93135274311745e-06,
"loss": 0.5103,
"step": 327500
},
{
"epoch": 38.98,
"learning_rate": 4.904944873572325e-06,
"loss": 0.5062,
"step": 328000
},
{
"epoch": 39.04,
"learning_rate": 4.878537004027201e-06,
"loss": 0.5077,
"step": 328500
},
{
"epoch": 39.1,
"learning_rate": 4.852181950221166e-06,
"loss": 0.4985,
"step": 329000
},
{
"epoch": 39.16,
"learning_rate": 4.825774080676042e-06,
"loss": 0.4965,
"step": 329500
},
{
"epoch": 39.22,
"learning_rate": 4.799419026870007e-06,
"loss": 0.5059,
"step": 330000
},
{
"epoch": 39.28,
"learning_rate": 4.773011157324883e-06,
"loss": 0.5083,
"step": 330500
},
{
"epoch": 39.33,
"learning_rate": 4.746603287779758e-06,
"loss": 0.4982,
"step": 331000
},
{
"epoch": 39.39,
"learning_rate": 4.720195418234634e-06,
"loss": 0.5043,
"step": 331500
},
{
"epoch": 39.45,
"learning_rate": 4.6938403644286005e-06,
"loss": 0.5067,
"step": 332000
},
{
"epoch": 39.51,
"learning_rate": 4.667432494883476e-06,
"loss": 0.5068,
"step": 332500
},
{
"epoch": 39.57,
"learning_rate": 4.6410246253383515e-06,
"loss": 0.5096,
"step": 333000
},
{
"epoch": 39.63,
"learning_rate": 4.614616755793227e-06,
"loss": 0.5103,
"step": 333500
},
{
"epoch": 39.69,
"learning_rate": 4.5882088862481025e-06,
"loss": 0.5069,
"step": 334000
},
{
"epoch": 39.75,
"learning_rate": 4.561853832442068e-06,
"loss": 0.5107,
"step": 334500
},
{
"epoch": 39.81,
"learning_rate": 4.535445962896944e-06,
"loss": 0.5059,
"step": 335000
},
{
"epoch": 39.87,
"learning_rate": 4.509038093351819e-06,
"loss": 0.51,
"step": 335500
},
{
"epoch": 39.93,
"learning_rate": 4.482630223806695e-06,
"loss": 0.5019,
"step": 336000
},
{
"epoch": 39.99,
"learning_rate": 4.45622235426157e-06,
"loss": 0.505,
"step": 336500
},
{
"epoch": 40.05,
"learning_rate": 4.429867300455536e-06,
"loss": 0.4982,
"step": 337000
},
{
"epoch": 40.11,
"learning_rate": 4.403459430910412e-06,
"loss": 0.5016,
"step": 337500
},
{
"epoch": 40.17,
"learning_rate": 4.377051561365287e-06,
"loss": 0.5085,
"step": 338000
},
{
"epoch": 40.23,
"learning_rate": 4.350643691820163e-06,
"loss": 0.5048,
"step": 338500
},
{
"epoch": 40.29,
"learning_rate": 4.324288638014128e-06,
"loss": 0.4955,
"step": 339000
},
{
"epoch": 40.34,
"learning_rate": 4.297880768469004e-06,
"loss": 0.4997,
"step": 339500
},
{
"epoch": 40.4,
"learning_rate": 4.271472898923879e-06,
"loss": 0.5124,
"step": 340000
},
{
"epoch": 40.46,
"learning_rate": 4.245065029378755e-06,
"loss": 0.511,
"step": 340500
},
{
"epoch": 40.52,
"learning_rate": 4.2187099755727216e-06,
"loss": 0.5061,
"step": 341000
},
{
"epoch": 40.58,
"learning_rate": 4.192302106027597e-06,
"loss": 0.5064,
"step": 341500
},
{
"epoch": 40.64,
"learning_rate": 4.165947052221562e-06,
"loss": 0.5041,
"step": 342000
},
{
"epoch": 40.7,
"learning_rate": 4.139539182676438e-06,
"loss": 0.5062,
"step": 342500
},
{
"epoch": 40.76,
"learning_rate": 4.113131313131313e-06,
"loss": 0.5009,
"step": 343000
},
{
"epoch": 40.82,
"learning_rate": 4.086723443586189e-06,
"loss": 0.511,
"step": 343500
},
{
"epoch": 40.88,
"learning_rate": 4.060315574041064e-06,
"loss": 0.5067,
"step": 344000
},
{
"epoch": 40.94,
"learning_rate": 4.03396052023503e-06,
"loss": 0.5067,
"step": 344500
},
{
"epoch": 41.0,
"learning_rate": 4.007552650689906e-06,
"loss": 0.5056,
"step": 345000
},
{
"epoch": 41.06,
"learning_rate": 3.981144781144781e-06,
"loss": 0.4987,
"step": 345500
},
{
"epoch": 41.12,
"learning_rate": 3.954736911599657e-06,
"loss": 0.499,
"step": 346000
},
{
"epoch": 41.18,
"learning_rate": 3.928329042054533e-06,
"loss": 0.5054,
"step": 346500
},
{
"epoch": 41.24,
"learning_rate": 3.901921172509408e-06,
"loss": 0.501,
"step": 347000
},
{
"epoch": 41.3,
"learning_rate": 3.875513302964284e-06,
"loss": 0.5058,
"step": 347500
},
{
"epoch": 41.35,
"learning_rate": 3.849105433419159e-06,
"loss": 0.4985,
"step": 348000
},
{
"epoch": 41.41,
"learning_rate": 3.822697563874035e-06,
"loss": 0.5019,
"step": 348500
},
{
"epoch": 41.47,
"learning_rate": 3.7963953258070908e-06,
"loss": 0.508,
"step": 349000
},
{
"epoch": 41.53,
"learning_rate": 3.7699874562619667e-06,
"loss": 0.5063,
"step": 349500
},
{
"epoch": 41.59,
"learning_rate": 3.7435795867168418e-06,
"loss": 0.5048,
"step": 350000
},
{
"epoch": 41.65,
"learning_rate": 3.7171717171717177e-06,
"loss": 0.5026,
"step": 350500
},
{
"epoch": 41.71,
"learning_rate": 3.6907638476265928e-06,
"loss": 0.504,
"step": 351000
},
{
"epoch": 41.77,
"learning_rate": 3.6643559780814687e-06,
"loss": 0.5011,
"step": 351500
},
{
"epoch": 41.83,
"learning_rate": 3.637948108536344e-06,
"loss": 0.503,
"step": 352000
},
{
"epoch": 41.89,
"learning_rate": 3.6115402389912197e-06,
"loss": 0.5153,
"step": 352500
},
{
"epoch": 41.95,
"learning_rate": 3.5851323694460956e-06,
"loss": 0.5018,
"step": 353000
},
{
"epoch": 42.01,
"learning_rate": 3.5587244999009707e-06,
"loss": 0.5066,
"step": 353500
},
{
"epoch": 42.07,
"learning_rate": 3.5323694460949365e-06,
"loss": 0.5071,
"step": 354000
},
{
"epoch": 42.13,
"learning_rate": 3.5059615765498124e-06,
"loss": 0.5045,
"step": 354500
},
{
"epoch": 42.19,
"learning_rate": 3.4795537070046875e-06,
"loss": 0.4959,
"step": 355000
},
{
"epoch": 42.25,
"learning_rate": 3.4531458374595634e-06,
"loss": 0.5034,
"step": 355500
},
{
"epoch": 42.31,
"learning_rate": 3.426790783653529e-06,
"loss": 0.5075,
"step": 356000
},
{
"epoch": 42.36,
"learning_rate": 3.400435729847495e-06,
"loss": 0.5076,
"step": 356500
},
{
"epoch": 42.42,
"learning_rate": 3.37402786030237e-06,
"loss": 0.4914,
"step": 357000
},
{
"epoch": 42.48,
"learning_rate": 3.347619990757246e-06,
"loss": 0.4985,
"step": 357500
},
{
"epoch": 42.54,
"learning_rate": 3.321212121212121e-06,
"loss": 0.5032,
"step": 358000
},
{
"epoch": 42.6,
"learning_rate": 3.294804251666997e-06,
"loss": 0.5022,
"step": 358500
},
{
"epoch": 42.66,
"learning_rate": 3.268396382121873e-06,
"loss": 0.5061,
"step": 359000
},
{
"epoch": 42.72,
"learning_rate": 3.241988512576748e-06,
"loss": 0.5,
"step": 359500
},
{
"epoch": 42.78,
"learning_rate": 3.215580643031624e-06,
"loss": 0.5041,
"step": 360000
},
{
"epoch": 42.84,
"learning_rate": 3.189172773486499e-06,
"loss": 0.5044,
"step": 360500
},
{
"epoch": 42.9,
"learning_rate": 3.162817719680465e-06,
"loss": 0.502,
"step": 361000
},
{
"epoch": 42.96,
"learning_rate": 3.1364098501353407e-06,
"loss": 0.5066,
"step": 361500
},
{
"epoch": 43.02,
"learning_rate": 3.110001980590216e-06,
"loss": 0.508,
"step": 362000
},
{
"epoch": 43.08,
"learning_rate": 3.0835941110450917e-06,
"loss": 0.4986,
"step": 362500
},
{
"epoch": 43.14,
"learning_rate": 3.0571862414999672e-06,
"loss": 0.5001,
"step": 363000
},
{
"epoch": 43.2,
"learning_rate": 3.0308840034330233e-06,
"loss": 0.4982,
"step": 363500
},
{
"epoch": 43.26,
"learning_rate": 3.004476133887899e-06,
"loss": 0.5028,
"step": 364000
},
{
"epoch": 43.32,
"learning_rate": 2.9780682643427743e-06,
"loss": 0.5069,
"step": 364500
},
{
"epoch": 43.37,
"learning_rate": 2.9516603947976502e-06,
"loss": 0.5049,
"step": 365000
},
{
"epoch": 43.43,
"learning_rate": 2.9252525252525253e-06,
"loss": 0.499,
"step": 365500
},
{
"epoch": 43.49,
"learning_rate": 2.8988446557074012e-06,
"loss": 0.5101,
"step": 366000
},
{
"epoch": 43.55,
"learning_rate": 2.8724367861622763e-06,
"loss": 0.4984,
"step": 366500
},
{
"epoch": 43.61,
"learning_rate": 2.8460289166171522e-06,
"loss": 0.5001,
"step": 367000
},
{
"epoch": 43.67,
"learning_rate": 2.8196210470720277e-06,
"loss": 0.504,
"step": 367500
},
{
"epoch": 43.73,
"learning_rate": 2.7932659932659935e-06,
"loss": 0.502,
"step": 368000
},
{
"epoch": 43.79,
"learning_rate": 2.766858123720869e-06,
"loss": 0.4984,
"step": 368500
},
{
"epoch": 43.85,
"learning_rate": 2.7404502541757445e-06,
"loss": 0.5024,
"step": 369000
},
{
"epoch": 43.91,
"learning_rate": 2.71404238463062e-06,
"loss": 0.5015,
"step": 369500
},
{
"epoch": 43.97,
"learning_rate": 2.687687330824586e-06,
"loss": 0.4996,
"step": 370000
},
{
"epoch": 44.03,
"learning_rate": 2.6612794612794613e-06,
"loss": 0.4953,
"step": 370500
},
{
"epoch": 44.09,
"learning_rate": 2.634871591734337e-06,
"loss": 0.4966,
"step": 371000
},
{
"epoch": 44.15,
"learning_rate": 2.6084637221892128e-06,
"loss": 0.4956,
"step": 371500
},
{
"epoch": 44.21,
"learning_rate": 2.5820558526440883e-06,
"loss": 0.4997,
"step": 372000
},
{
"epoch": 44.27,
"learning_rate": 2.555700798838054e-06,
"loss": 0.4986,
"step": 372500
},
{
"epoch": 44.33,
"learning_rate": 2.5292929292929296e-06,
"loss": 0.502,
"step": 373000
},
{
"epoch": 44.39,
"learning_rate": 2.502885059747805e-06,
"loss": 0.4984,
"step": 373500
},
{
"epoch": 44.44,
"learning_rate": 2.4764771902026806e-06,
"loss": 0.4984,
"step": 374000
},
{
"epoch": 44.5,
"learning_rate": 2.4501221363966464e-06,
"loss": 0.502,
"step": 374500
},
{
"epoch": 44.56,
"learning_rate": 2.423714266851522e-06,
"loss": 0.4959,
"step": 375000
},
{
"epoch": 44.62,
"learning_rate": 2.3973063973063978e-06,
"loss": 0.4984,
"step": 375500
},
{
"epoch": 44.68,
"learning_rate": 2.3708985277612733e-06,
"loss": 0.5018,
"step": 376000
},
{
"epoch": 44.74,
"learning_rate": 2.3444906582161488e-06,
"loss": 0.5011,
"step": 376500
},
{
"epoch": 44.8,
"learning_rate": 2.3180827886710243e-06,
"loss": 0.503,
"step": 377000
},
{
"epoch": 44.86,
"learning_rate": 2.2916749191258998e-06,
"loss": 0.4926,
"step": 377500
},
{
"epoch": 44.92,
"learning_rate": 2.2652670495807753e-06,
"loss": 0.4976,
"step": 378000
},
{
"epoch": 44.98,
"learning_rate": 2.238911995774741e-06,
"loss": 0.5007,
"step": 378500
},
{
"epoch": 45.04,
"learning_rate": 2.2125041262296166e-06,
"loss": 0.5084,
"step": 379000
},
{
"epoch": 45.1,
"learning_rate": 2.186096256684492e-06,
"loss": 0.5022,
"step": 379500
},
{
"epoch": 45.16,
"learning_rate": 2.1596883871393676e-06,
"loss": 0.4922,
"step": 380000
},
{
"epoch": 45.22,
"learning_rate": 2.1332805175942435e-06,
"loss": 0.4957,
"step": 380500
},
{
"epoch": 45.28,
"learning_rate": 2.1069254637882093e-06,
"loss": 0.5032,
"step": 381000
},
{
"epoch": 45.34,
"learning_rate": 2.080570409982175e-06,
"loss": 0.504,
"step": 381500
},
{
"epoch": 45.4,
"learning_rate": 2.0541625404370506e-06,
"loss": 0.5108,
"step": 382000
},
{
"epoch": 45.45,
"learning_rate": 2.027754670891926e-06,
"loss": 0.4992,
"step": 382500
},
{
"epoch": 45.51,
"learning_rate": 2.0013468013468016e-06,
"loss": 0.4986,
"step": 383000
},
{
"epoch": 45.57,
"learning_rate": 1.974938931801677e-06,
"loss": 0.4964,
"step": 383500
},
{
"epoch": 45.63,
"learning_rate": 1.948583877995643e-06,
"loss": 0.4974,
"step": 384000
},
{
"epoch": 45.69,
"learning_rate": 1.9221760084505184e-06,
"loss": 0.4955,
"step": 384500
},
{
"epoch": 45.75,
"learning_rate": 1.895768138905394e-06,
"loss": 0.5006,
"step": 385000
},
{
"epoch": 45.81,
"learning_rate": 1.8693602693602694e-06,
"loss": 0.5056,
"step": 385500
},
{
"epoch": 45.87,
"learning_rate": 1.842952399815145e-06,
"loss": 0.5018,
"step": 386000
},
{
"epoch": 45.93,
"learning_rate": 1.8165445302700204e-06,
"loss": 0.4974,
"step": 386500
},
{
"epoch": 45.99,
"learning_rate": 1.7901366607248963e-06,
"loss": 0.5057,
"step": 387000
},
{
"epoch": 46.05,
"learning_rate": 1.7637816069188621e-06,
"loss": 0.4892,
"step": 387500
},
{
"epoch": 46.11,
"learning_rate": 1.7373737373737376e-06,
"loss": 0.4942,
"step": 388000
},
{
"epoch": 46.17,
"learning_rate": 1.7109658678286131e-06,
"loss": 0.5011,
"step": 388500
},
{
"epoch": 46.23,
"learning_rate": 1.6845579982834886e-06,
"loss": 0.4945,
"step": 389000
},
{
"epoch": 46.29,
"learning_rate": 1.6581501287383641e-06,
"loss": 0.4942,
"step": 389500
},
{
"epoch": 46.35,
"learning_rate": 1.6317422591932396e-06,
"loss": 0.4968,
"step": 390000
},
{
"epoch": 46.41,
"learning_rate": 1.6053343896481151e-06,
"loss": 0.4979,
"step": 390500
},
{
"epoch": 46.46,
"learning_rate": 1.5789265201029908e-06,
"loss": 0.5005,
"step": 391000
},
{
"epoch": 46.52,
"learning_rate": 1.5525186505578663e-06,
"loss": 0.4946,
"step": 391500
},
{
"epoch": 46.58,
"learning_rate": 1.5261107810127418e-06,
"loss": 0.4923,
"step": 392000
},
{
"epoch": 46.64,
"learning_rate": 1.4997557272067076e-06,
"loss": 0.5,
"step": 392500
},
{
"epoch": 46.7,
"learning_rate": 1.4733478576615833e-06,
"loss": 0.4971,
"step": 393000
},
{
"epoch": 46.76,
"learning_rate": 1.4469399881164588e-06,
"loss": 0.4977,
"step": 393500
},
{
"epoch": 46.82,
"learning_rate": 1.4205321185713343e-06,
"loss": 0.4925,
"step": 394000
},
{
"epoch": 46.88,
"learning_rate": 1.3941770647653001e-06,
"loss": 0.4987,
"step": 394500
},
{
"epoch": 46.94,
"learning_rate": 1.3677691952201759e-06,
"loss": 0.5004,
"step": 395000
},
{
"epoch": 47.0,
"learning_rate": 1.3413613256750514e-06,
"loss": 0.4961,
"step": 395500
},
{
"epoch": 47.06,
"learning_rate": 1.3149534561299269e-06,
"loss": 0.494,
"step": 396000
},
{
"epoch": 47.12,
"learning_rate": 1.2885455865848024e-06,
"loss": 0.4941,
"step": 396500
},
{
"epoch": 47.18,
"learning_rate": 1.2621377170396779e-06,
"loss": 0.5015,
"step": 397000
},
{
"epoch": 47.24,
"learning_rate": 1.2357298474945536e-06,
"loss": 0.4976,
"step": 397500
},
{
"epoch": 47.3,
"learning_rate": 1.209321977949429e-06,
"loss": 0.4887,
"step": 398000
},
{
"epoch": 47.36,
"learning_rate": 1.1829141084043046e-06,
"loss": 0.5032,
"step": 398500
},
{
"epoch": 47.42,
"learning_rate": 1.15650623885918e-06,
"loss": 0.4959,
"step": 399000
},
{
"epoch": 47.47,
"learning_rate": 1.1302568165313265e-06,
"loss": 0.4965,
"step": 399500
},
{
"epoch": 47.53,
"learning_rate": 1.103848946986202e-06,
"loss": 0.4949,
"step": 400000
},
{
"epoch": 47.59,
"learning_rate": 1.0774938931801678e-06,
"loss": 0.4879,
"step": 400500
},
{
"epoch": 47.65,
"learning_rate": 1.0510860236350433e-06,
"loss": 0.4935,
"step": 401000
},
{
"epoch": 47.71,
"learning_rate": 1.024678154089919e-06,
"loss": 0.4954,
"step": 401500
},
{
"epoch": 47.77,
"learning_rate": 9.982702845447945e-07,
"loss": 0.4919,
"step": 402000
},
{
"epoch": 47.83,
"learning_rate": 9.7186241499967e-07,
"loss": 0.4974,
"step": 402500
},
{
"epoch": 47.89,
"learning_rate": 9.454545454545455e-07,
"loss": 0.5004,
"step": 403000
},
{
"epoch": 47.95,
"learning_rate": 9.190466759094211e-07,
"loss": 0.5003,
"step": 403500
},
{
"epoch": 48.01,
"learning_rate": 8.926388063642967e-07,
"loss": 0.5044,
"step": 404000
},
{
"epoch": 48.07,
"learning_rate": 8.662309368191722e-07,
"loss": 0.489,
"step": 404500
},
{
"epoch": 48.13,
"learning_rate": 8.398230672740478e-07,
"loss": 0.4942,
"step": 405000
},
{
"epoch": 48.19,
"learning_rate": 8.134151977289233e-07,
"loss": 0.4996,
"step": 405500
},
{
"epoch": 48.25,
"learning_rate": 7.870073281837988e-07,
"loss": 0.4896,
"step": 406000
},
{
"epoch": 48.31,
"learning_rate": 7.605994586386743e-07,
"loss": 0.494,
"step": 406500
},
{
"epoch": 48.37,
"learning_rate": 7.3419158909355e-07,
"loss": 0.4911,
"step": 407000
},
{
"epoch": 48.43,
"learning_rate": 7.077837195484255e-07,
"loss": 0.4961,
"step": 407500
},
{
"epoch": 48.48,
"learning_rate": 6.81375850003301e-07,
"loss": 0.4894,
"step": 408000
},
{
"epoch": 48.54,
"learning_rate": 6.549679804581765e-07,
"loss": 0.4952,
"step": 408500
},
{
"epoch": 48.6,
"learning_rate": 6.285601109130521e-07,
"loss": 0.5021,
"step": 409000
},
{
"epoch": 48.66,
"learning_rate": 6.021522413679277e-07,
"loss": 0.5009,
"step": 409500
},
{
"epoch": 48.72,
"learning_rate": 5.757971875618935e-07,
"loss": 0.4962,
"step": 410000
},
{
"epoch": 48.78,
"learning_rate": 5.49389318016769e-07,
"loss": 0.4939,
"step": 410500
},
{
"epoch": 48.84,
"learning_rate": 5.229814484716446e-07,
"loss": 0.497,
"step": 411000
},
{
"epoch": 48.9,
"learning_rate": 4.965735789265201e-07,
"loss": 0.4995,
"step": 411500
},
{
"epoch": 48.96,
"learning_rate": 4.7016570938139573e-07,
"loss": 0.5008,
"step": 412000
},
{
"epoch": 49.02,
"learning_rate": 4.4386347131445177e-07,
"loss": 0.5001,
"step": 412500
},
{
"epoch": 49.08,
"learning_rate": 4.174556017693273e-07,
"loss": 0.4934,
"step": 413000
},
{
"epoch": 49.14,
"learning_rate": 3.910477322242028e-07,
"loss": 0.4949,
"step": 413500
},
{
"epoch": 49.2,
"learning_rate": 3.6463986267907843e-07,
"loss": 0.5009,
"step": 414000
},
{
"epoch": 49.26,
"learning_rate": 3.3823199313395393e-07,
"loss": 0.5013,
"step": 414500
},
{
"epoch": 49.32,
"learning_rate": 3.118241235888295e-07,
"loss": 0.5009,
"step": 415000
},
{
"epoch": 49.38,
"learning_rate": 2.8541625404370504e-07,
"loss": 0.4927,
"step": 415500
},
{
"epoch": 49.44,
"learning_rate": 2.590083844985806e-07,
"loss": 0.5005,
"step": 416000
},
{
"epoch": 49.49,
"learning_rate": 2.326533306925464e-07,
"loss": 0.4908,
"step": 416500
},
{
"epoch": 49.55,
"learning_rate": 2.0624546114742194e-07,
"loss": 0.4904,
"step": 417000
},
{
"epoch": 49.61,
"learning_rate": 1.798375916022975e-07,
"loss": 0.5013,
"step": 417500
},
{
"epoch": 49.67,
"learning_rate": 1.5342972205717305e-07,
"loss": 0.4991,
"step": 418000
},
{
"epoch": 49.73,
"learning_rate": 1.270218525120486e-07,
"loss": 0.4921,
"step": 418500
},
{
"epoch": 49.79,
"learning_rate": 1.006667987060144e-07,
"loss": 0.4987,
"step": 419000
},
{
"epoch": 49.85,
"learning_rate": 7.425892916088995e-08,
"loss": 0.4946,
"step": 419500
},
{
"epoch": 49.91,
"learning_rate": 4.790387535485575e-08,
"loss": 0.492,
"step": 420000
},
{
"epoch": 49.97,
"learning_rate": 2.14960058097313e-08,
"loss": 0.4974,
"step": 420500
}
],
"max_steps": 420750,
"num_train_epochs": 50,
"total_flos": 7.065593235141296e+19,
"trial_name": null,
"trial_params": null
}