diff --git "a/trainer_state.json" "b/trainer_state.json" new file mode 100644--- /dev/null +++ "b/trainer_state.json" @@ -0,0 +1,18970 @@ +{ + "best_metric": null, + "best_model_checkpoint": null, + "epoch": 5.0, + "global_step": 315900, + "is_hyper_param_search": false, + "is_local_process_zero": true, + "is_world_process_zero": true, + "log_history": [ + { + "epoch": 0.0, + "learning_rate": 4.9984172206394435e-06, + "loss": 2.7157, + "step": 100 + }, + { + "epoch": 0.0, + "learning_rate": 4.996834441278886e-06, + "loss": 2.0977, + "step": 200 + }, + { + "epoch": 0.0, + "learning_rate": 4.995251661918329e-06, + "loss": 1.9549, + "step": 300 + }, + { + "epoch": 0.01, + "learning_rate": 4.993668882557772e-06, + "loss": 1.9347, + "step": 400 + }, + { + "epoch": 0.01, + "learning_rate": 4.992086103197215e-06, + "loss": 1.8296, + "step": 500 + }, + { + "epoch": 0.01, + "learning_rate": 4.990503323836657e-06, + "loss": 1.8133, + "step": 600 + }, + { + "epoch": 0.01, + "learning_rate": 4.9889205444761e-06, + "loss": 1.7539, + "step": 700 + }, + { + "epoch": 0.01, + "learning_rate": 4.987337765115544e-06, + "loss": 1.7711, + "step": 800 + }, + { + "epoch": 0.01, + "learning_rate": 4.985754985754986e-06, + "loss": 1.7314, + "step": 900 + }, + { + "epoch": 0.02, + "learning_rate": 4.984172206394429e-06, + "loss": 1.7503, + "step": 1000 + }, + { + "epoch": 0.02, + "learning_rate": 4.982589427033872e-06, + "loss": 1.722, + "step": 1100 + }, + { + "epoch": 0.02, + "learning_rate": 4.981006647673315e-06, + "loss": 1.719, + "step": 1200 + }, + { + "epoch": 0.02, + "learning_rate": 4.9794238683127575e-06, + "loss": 1.7309, + "step": 1300 + }, + { + "epoch": 0.02, + "learning_rate": 4.9778410889522005e-06, + "loss": 1.7112, + "step": 1400 + }, + { + "epoch": 0.02, + "learning_rate": 4.976258309591644e-06, + "loss": 1.6686, + "step": 1500 + }, + { + "epoch": 0.03, + "learning_rate": 4.974675530231086e-06, + "loss": 1.6855, + "step": 1600 + }, + { + "epoch": 0.03, + "learning_rate": 4.973092750870529e-06, + "loss": 1.6698, + "step": 1700 + }, + { + "epoch": 0.03, + "learning_rate": 4.971509971509972e-06, + "loss": 1.6515, + "step": 1800 + }, + { + "epoch": 0.03, + "learning_rate": 4.969927192149415e-06, + "loss": 1.6495, + "step": 1900 + }, + { + "epoch": 0.03, + "learning_rate": 4.968344412788857e-06, + "loss": 1.6818, + "step": 2000 + }, + { + "epoch": 0.03, + "learning_rate": 4.9667616334283e-06, + "loss": 1.6612, + "step": 2100 + }, + { + "epoch": 0.03, + "learning_rate": 4.965178854067743e-06, + "loss": 1.6644, + "step": 2200 + }, + { + "epoch": 0.04, + "learning_rate": 4.963596074707186e-06, + "loss": 1.6534, + "step": 2300 + }, + { + "epoch": 0.04, + "learning_rate": 4.962013295346629e-06, + "loss": 1.6291, + "step": 2400 + }, + { + "epoch": 0.04, + "learning_rate": 4.960430515986072e-06, + "loss": 1.6356, + "step": 2500 + }, + { + "epoch": 0.04, + "learning_rate": 4.958847736625515e-06, + "loss": 1.6253, + "step": 2600 + }, + { + "epoch": 0.04, + "learning_rate": 4.957264957264958e-06, + "loss": 1.6522, + "step": 2700 + }, + { + "epoch": 0.04, + "learning_rate": 4.955682177904401e-06, + "loss": 1.6269, + "step": 2800 + }, + { + "epoch": 0.05, + "learning_rate": 4.954099398543844e-06, + "loss": 1.5901, + "step": 2900 + }, + { + "epoch": 0.05, + "learning_rate": 4.952516619183286e-06, + "loss": 1.6162, + "step": 3000 + }, + { + "epoch": 0.05, + "learning_rate": 4.950933839822729e-06, + "loss": 1.6118, + "step": 3100 + }, + { + "epoch": 0.05, + "learning_rate": 4.949351060462172e-06, + "loss": 1.6182, + "step": 3200 + }, + { + "epoch": 0.05, + "learning_rate": 4.947768281101615e-06, + "loss": 1.6465, + "step": 3300 + }, + { + "epoch": 0.05, + "learning_rate": 4.946185501741057e-06, + "loss": 1.5952, + "step": 3400 + }, + { + "epoch": 0.06, + "learning_rate": 4.9446027223805e-06, + "loss": 1.6144, + "step": 3500 + }, + { + "epoch": 0.06, + "learning_rate": 4.943019943019943e-06, + "loss": 1.599, + "step": 3600 + }, + { + "epoch": 0.06, + "learning_rate": 4.941437163659386e-06, + "loss": 1.5459, + "step": 3700 + }, + { + "epoch": 0.06, + "learning_rate": 4.939854384298829e-06, + "loss": 1.6284, + "step": 3800 + }, + { + "epoch": 0.06, + "learning_rate": 4.938271604938272e-06, + "loss": 1.6161, + "step": 3900 + }, + { + "epoch": 0.06, + "learning_rate": 4.936688825577715e-06, + "loss": 1.5846, + "step": 4000 + }, + { + "epoch": 0.06, + "learning_rate": 4.935106046217158e-06, + "loss": 1.6138, + "step": 4100 + }, + { + "epoch": 0.07, + "learning_rate": 4.933523266856601e-06, + "loss": 1.5781, + "step": 4200 + }, + { + "epoch": 0.07, + "learning_rate": 4.931940487496044e-06, + "loss": 1.6329, + "step": 4300 + }, + { + "epoch": 0.07, + "learning_rate": 4.930357708135486e-06, + "loss": 1.6064, + "step": 4400 + }, + { + "epoch": 0.07, + "learning_rate": 4.928774928774929e-06, + "loss": 1.5841, + "step": 4500 + }, + { + "epoch": 0.07, + "learning_rate": 4.927192149414372e-06, + "loss": 1.5837, + "step": 4600 + }, + { + "epoch": 0.07, + "learning_rate": 4.925609370053815e-06, + "loss": 1.5881, + "step": 4700 + }, + { + "epoch": 0.08, + "learning_rate": 4.924026590693257e-06, + "loss": 1.5882, + "step": 4800 + }, + { + "epoch": 0.08, + "learning_rate": 4.9224438113327004e-06, + "loss": 1.5677, + "step": 4900 + }, + { + "epoch": 0.08, + "learning_rate": 4.9208610319721435e-06, + "loss": 1.5944, + "step": 5000 + }, + { + "epoch": 0.08, + "learning_rate": 4.9192782526115865e-06, + "loss": 1.5942, + "step": 5100 + }, + { + "epoch": 0.08, + "learning_rate": 4.917695473251029e-06, + "loss": 1.5794, + "step": 5200 + }, + { + "epoch": 0.08, + "learning_rate": 4.916112693890472e-06, + "loss": 1.5736, + "step": 5300 + }, + { + "epoch": 0.09, + "learning_rate": 4.914529914529915e-06, + "loss": 1.5591, + "step": 5400 + }, + { + "epoch": 0.09, + "learning_rate": 4.912947135169358e-06, + "loss": 1.5977, + "step": 5500 + }, + { + "epoch": 0.09, + "learning_rate": 4.911364355808801e-06, + "loss": 1.5528, + "step": 5600 + }, + { + "epoch": 0.09, + "learning_rate": 4.909781576448244e-06, + "loss": 1.5853, + "step": 5700 + }, + { + "epoch": 0.09, + "learning_rate": 4.908198797087686e-06, + "loss": 1.5599, + "step": 5800 + }, + { + "epoch": 0.09, + "learning_rate": 4.906616017727129e-06, + "loss": 1.5539, + "step": 5900 + }, + { + "epoch": 0.09, + "learning_rate": 4.905033238366572e-06, + "loss": 1.5947, + "step": 6000 + }, + { + "epoch": 0.1, + "learning_rate": 4.903450459006015e-06, + "loss": 1.5608, + "step": 6100 + }, + { + "epoch": 0.1, + "learning_rate": 4.9018676796454575e-06, + "loss": 1.5595, + "step": 6200 + }, + { + "epoch": 0.1, + "learning_rate": 4.9002849002849006e-06, + "loss": 1.5548, + "step": 6300 + }, + { + "epoch": 0.1, + "learning_rate": 4.898702120924344e-06, + "loss": 1.538, + "step": 6400 + }, + { + "epoch": 0.1, + "learning_rate": 4.897119341563787e-06, + "loss": 1.5576, + "step": 6500 + }, + { + "epoch": 0.1, + "learning_rate": 4.895536562203229e-06, + "loss": 1.5606, + "step": 6600 + }, + { + "epoch": 0.11, + "learning_rate": 4.893953782842672e-06, + "loss": 1.5404, + "step": 6700 + }, + { + "epoch": 0.11, + "learning_rate": 4.892371003482115e-06, + "loss": 1.5638, + "step": 6800 + }, + { + "epoch": 0.11, + "learning_rate": 4.890788224121558e-06, + "loss": 1.543, + "step": 6900 + }, + { + "epoch": 0.11, + "learning_rate": 4.889205444761e-06, + "loss": 1.5597, + "step": 7000 + }, + { + "epoch": 0.11, + "learning_rate": 4.887622665400444e-06, + "loss": 1.5368, + "step": 7100 + }, + { + "epoch": 0.11, + "learning_rate": 4.886039886039886e-06, + "loss": 1.5206, + "step": 7200 + }, + { + "epoch": 0.12, + "learning_rate": 4.884457106679329e-06, + "loss": 1.5332, + "step": 7300 + }, + { + "epoch": 0.12, + "learning_rate": 4.882874327318772e-06, + "loss": 1.5274, + "step": 7400 + }, + { + "epoch": 0.12, + "learning_rate": 4.8812915479582154e-06, + "loss": 1.5499, + "step": 7500 + }, + { + "epoch": 0.12, + "learning_rate": 4.879708768597658e-06, + "loss": 1.5177, + "step": 7600 + }, + { + "epoch": 0.12, + "learning_rate": 4.878125989237101e-06, + "loss": 1.535, + "step": 7700 + }, + { + "epoch": 0.12, + "learning_rate": 4.876543209876544e-06, + "loss": 1.5256, + "step": 7800 + }, + { + "epoch": 0.13, + "learning_rate": 4.874960430515987e-06, + "loss": 1.5351, + "step": 7900 + }, + { + "epoch": 0.13, + "learning_rate": 4.873377651155429e-06, + "loss": 1.5399, + "step": 8000 + }, + { + "epoch": 0.13, + "learning_rate": 4.871794871794872e-06, + "loss": 1.5561, + "step": 8100 + }, + { + "epoch": 0.13, + "learning_rate": 4.870212092434315e-06, + "loss": 1.5573, + "step": 8200 + }, + { + "epoch": 0.13, + "learning_rate": 4.868629313073758e-06, + "loss": 1.5409, + "step": 8300 + }, + { + "epoch": 0.13, + "learning_rate": 4.8670465337132e-06, + "loss": 1.5533, + "step": 8400 + }, + { + "epoch": 0.13, + "learning_rate": 4.865463754352643e-06, + "loss": 1.5604, + "step": 8500 + }, + { + "epoch": 0.14, + "learning_rate": 4.8638809749920864e-06, + "loss": 1.5658, + "step": 8600 + }, + { + "epoch": 0.14, + "learning_rate": 4.8622981956315295e-06, + "loss": 1.5564, + "step": 8700 + }, + { + "epoch": 0.14, + "learning_rate": 4.8607154162709725e-06, + "loss": 1.5404, + "step": 8800 + }, + { + "epoch": 0.14, + "learning_rate": 4.859132636910416e-06, + "loss": 1.5486, + "step": 8900 + }, + { + "epoch": 0.14, + "learning_rate": 4.857549857549858e-06, + "loss": 1.5597, + "step": 9000 + }, + { + "epoch": 0.14, + "learning_rate": 4.855967078189301e-06, + "loss": 1.5589, + "step": 9100 + }, + { + "epoch": 0.15, + "learning_rate": 4.854384298828744e-06, + "loss": 1.5272, + "step": 9200 + }, + { + "epoch": 0.15, + "learning_rate": 4.852801519468187e-06, + "loss": 1.5532, + "step": 9300 + }, + { + "epoch": 0.15, + "learning_rate": 4.851218740107629e-06, + "loss": 1.5287, + "step": 9400 + }, + { + "epoch": 0.15, + "learning_rate": 4.849635960747072e-06, + "loss": 1.5108, + "step": 9500 + }, + { + "epoch": 0.15, + "learning_rate": 4.848053181386515e-06, + "loss": 1.528, + "step": 9600 + }, + { + "epoch": 0.15, + "learning_rate": 4.846470402025958e-06, + "loss": 1.5438, + "step": 9700 + }, + { + "epoch": 0.16, + "learning_rate": 4.8448876226654005e-06, + "loss": 1.5515, + "step": 9800 + }, + { + "epoch": 0.16, + "learning_rate": 4.8433048433048435e-06, + "loss": 1.5165, + "step": 9900 + }, + { + "epoch": 0.16, + "learning_rate": 4.8417220639442866e-06, + "loss": 1.5248, + "step": 10000 + }, + { + "epoch": 0.16, + "learning_rate": 4.840139284583729e-06, + "loss": 1.5513, + "step": 10100 + }, + { + "epoch": 0.16, + "learning_rate": 4.838556505223173e-06, + "loss": 1.5307, + "step": 10200 + }, + { + "epoch": 0.16, + "learning_rate": 4.836973725862616e-06, + "loss": 1.5391, + "step": 10300 + }, + { + "epoch": 0.16, + "learning_rate": 4.835390946502058e-06, + "loss": 1.5345, + "step": 10400 + }, + { + "epoch": 0.17, + "learning_rate": 4.833808167141501e-06, + "loss": 1.5431, + "step": 10500 + }, + { + "epoch": 0.17, + "learning_rate": 4.832225387780944e-06, + "loss": 1.5306, + "step": 10600 + }, + { + "epoch": 0.17, + "learning_rate": 4.830642608420387e-06, + "loss": 1.5027, + "step": 10700 + }, + { + "epoch": 0.17, + "learning_rate": 4.829059829059829e-06, + "loss": 1.5342, + "step": 10800 + }, + { + "epoch": 0.17, + "learning_rate": 4.827477049699272e-06, + "loss": 1.5243, + "step": 10900 + }, + { + "epoch": 0.17, + "learning_rate": 4.825894270338715e-06, + "loss": 1.5012, + "step": 11000 + }, + { + "epoch": 0.18, + "learning_rate": 4.824311490978158e-06, + "loss": 1.5478, + "step": 11100 + }, + { + "epoch": 0.18, + "learning_rate": 4.822728711617601e-06, + "loss": 1.5242, + "step": 11200 + }, + { + "epoch": 0.18, + "learning_rate": 4.821145932257044e-06, + "loss": 1.5134, + "step": 11300 + }, + { + "epoch": 0.18, + "learning_rate": 4.819563152896487e-06, + "loss": 1.4938, + "step": 11400 + }, + { + "epoch": 0.18, + "learning_rate": 4.817980373535929e-06, + "loss": 1.5079, + "step": 11500 + }, + { + "epoch": 0.18, + "learning_rate": 4.816397594175372e-06, + "loss": 1.5136, + "step": 11600 + }, + { + "epoch": 0.19, + "learning_rate": 4.814814814814815e-06, + "loss": 1.5143, + "step": 11700 + }, + { + "epoch": 0.19, + "learning_rate": 4.813232035454258e-06, + "loss": 1.5187, + "step": 11800 + }, + { + "epoch": 0.19, + "learning_rate": 4.811649256093701e-06, + "loss": 1.5127, + "step": 11900 + }, + { + "epoch": 0.19, + "learning_rate": 4.810066476733144e-06, + "loss": 1.5269, + "step": 12000 + }, + { + "epoch": 0.19, + "learning_rate": 4.808483697372587e-06, + "loss": 1.4965, + "step": 12100 + }, + { + "epoch": 0.19, + "learning_rate": 4.806900918012029e-06, + "loss": 1.531, + "step": 12200 + }, + { + "epoch": 0.19, + "learning_rate": 4.8053181386514724e-06, + "loss": 1.526, + "step": 12300 + }, + { + "epoch": 0.2, + "learning_rate": 4.8037353592909155e-06, + "loss": 1.5332, + "step": 12400 + }, + { + "epoch": 0.2, + "learning_rate": 4.8021525799303585e-06, + "loss": 1.5157, + "step": 12500 + }, + { + "epoch": 0.2, + "learning_rate": 4.800569800569801e-06, + "loss": 1.4796, + "step": 12600 + }, + { + "epoch": 0.2, + "learning_rate": 4.798987021209244e-06, + "loss": 1.52, + "step": 12700 + }, + { + "epoch": 0.2, + "learning_rate": 4.797404241848687e-06, + "loss": 1.5035, + "step": 12800 + }, + { + "epoch": 0.2, + "learning_rate": 4.795821462488129e-06, + "loss": 1.5278, + "step": 12900 + }, + { + "epoch": 0.21, + "learning_rate": 4.794238683127572e-06, + "loss": 1.4903, + "step": 13000 + }, + { + "epoch": 0.21, + "learning_rate": 4.792655903767015e-06, + "loss": 1.5122, + "step": 13100 + }, + { + "epoch": 0.21, + "learning_rate": 4.791073124406458e-06, + "loss": 1.537, + "step": 13200 + }, + { + "epoch": 0.21, + "learning_rate": 4.789490345045901e-06, + "loss": 1.5129, + "step": 13300 + }, + { + "epoch": 0.21, + "learning_rate": 4.787907565685344e-06, + "loss": 1.5047, + "step": 13400 + }, + { + "epoch": 0.21, + "learning_rate": 4.786324786324787e-06, + "loss": 1.495, + "step": 13500 + }, + { + "epoch": 0.22, + "learning_rate": 4.7847420069642295e-06, + "loss": 1.5288, + "step": 13600 + }, + { + "epoch": 0.22, + "learning_rate": 4.7831592276036726e-06, + "loss": 1.494, + "step": 13700 + }, + { + "epoch": 0.22, + "learning_rate": 4.781576448243116e-06, + "loss": 1.4887, + "step": 13800 + }, + { + "epoch": 0.22, + "learning_rate": 4.779993668882558e-06, + "loss": 1.4916, + "step": 13900 + }, + { + "epoch": 0.22, + "learning_rate": 4.778410889522001e-06, + "loss": 1.4996, + "step": 14000 + }, + { + "epoch": 0.22, + "learning_rate": 4.776828110161444e-06, + "loss": 1.484, + "step": 14100 + }, + { + "epoch": 0.22, + "learning_rate": 4.775245330800887e-06, + "loss": 1.4899, + "step": 14200 + }, + { + "epoch": 0.23, + "learning_rate": 4.773662551440329e-06, + "loss": 1.5139, + "step": 14300 + }, + { + "epoch": 0.23, + "learning_rate": 4.772079772079772e-06, + "loss": 1.4931, + "step": 14400 + }, + { + "epoch": 0.23, + "learning_rate": 4.770496992719215e-06, + "loss": 1.4975, + "step": 14500 + }, + { + "epoch": 0.23, + "learning_rate": 4.768914213358658e-06, + "loss": 1.5215, + "step": 14600 + }, + { + "epoch": 0.23, + "learning_rate": 4.7673314339981005e-06, + "loss": 1.5098, + "step": 14700 + }, + { + "epoch": 0.23, + "learning_rate": 4.7657486546375435e-06, + "loss": 1.4932, + "step": 14800 + }, + { + "epoch": 0.24, + "learning_rate": 4.7641658752769874e-06, + "loss": 1.4865, + "step": 14900 + }, + { + "epoch": 0.24, + "learning_rate": 4.76258309591643e-06, + "loss": 1.5267, + "step": 15000 + }, + { + "epoch": 0.24, + "learning_rate": 4.761000316555873e-06, + "loss": 1.5013, + "step": 15100 + }, + { + "epoch": 0.24, + "learning_rate": 4.759417537195316e-06, + "loss": 1.5071, + "step": 15200 + }, + { + "epoch": 0.24, + "learning_rate": 4.757834757834758e-06, + "loss": 1.4658, + "step": 15300 + }, + { + "epoch": 0.24, + "learning_rate": 4.756251978474201e-06, + "loss": 1.4965, + "step": 15400 + }, + { + "epoch": 0.25, + "learning_rate": 4.754669199113644e-06, + "loss": 1.4838, + "step": 15500 + }, + { + "epoch": 0.25, + "learning_rate": 4.753086419753087e-06, + "loss": 1.5135, + "step": 15600 + }, + { + "epoch": 0.25, + "learning_rate": 4.751503640392529e-06, + "loss": 1.4951, + "step": 15700 + }, + { + "epoch": 0.25, + "learning_rate": 4.749920861031972e-06, + "loss": 1.4844, + "step": 15800 + }, + { + "epoch": 0.25, + "learning_rate": 4.748338081671415e-06, + "loss": 1.5125, + "step": 15900 + }, + { + "epoch": 0.25, + "learning_rate": 4.7467553023108584e-06, + "loss": 1.4884, + "step": 16000 + }, + { + "epoch": 0.25, + "learning_rate": 4.745172522950301e-06, + "loss": 1.4913, + "step": 16100 + }, + { + "epoch": 0.26, + "learning_rate": 4.743589743589744e-06, + "loss": 1.5169, + "step": 16200 + }, + { + "epoch": 0.26, + "learning_rate": 4.742006964229187e-06, + "loss": 1.4794, + "step": 16300 + }, + { + "epoch": 0.26, + "learning_rate": 4.74042418486863e-06, + "loss": 1.4748, + "step": 16400 + }, + { + "epoch": 0.26, + "learning_rate": 4.738841405508073e-06, + "loss": 1.4652, + "step": 16500 + }, + { + "epoch": 0.26, + "learning_rate": 4.737258626147516e-06, + "loss": 1.4735, + "step": 16600 + }, + { + "epoch": 0.26, + "learning_rate": 4.735675846786958e-06, + "loss": 1.5015, + "step": 16700 + }, + { + "epoch": 0.27, + "learning_rate": 4.734093067426401e-06, + "loss": 1.4786, + "step": 16800 + }, + { + "epoch": 0.27, + "learning_rate": 4.732510288065844e-06, + "loss": 1.5143, + "step": 16900 + }, + { + "epoch": 0.27, + "learning_rate": 4.730927508705287e-06, + "loss": 1.4592, + "step": 17000 + }, + { + "epoch": 0.27, + "learning_rate": 4.729344729344729e-06, + "loss": 1.5153, + "step": 17100 + }, + { + "epoch": 0.27, + "learning_rate": 4.7277619499841725e-06, + "loss": 1.4857, + "step": 17200 + }, + { + "epoch": 0.27, + "learning_rate": 4.7261791706236155e-06, + "loss": 1.4842, + "step": 17300 + }, + { + "epoch": 0.28, + "learning_rate": 4.7245963912630586e-06, + "loss": 1.4627, + "step": 17400 + }, + { + "epoch": 0.28, + "learning_rate": 4.723013611902501e-06, + "loss": 1.504, + "step": 17500 + }, + { + "epoch": 0.28, + "learning_rate": 4.721430832541944e-06, + "loss": 1.4901, + "step": 17600 + }, + { + "epoch": 0.28, + "learning_rate": 4.719848053181387e-06, + "loss": 1.4816, + "step": 17700 + }, + { + "epoch": 0.28, + "learning_rate": 4.71826527382083e-06, + "loss": 1.5071, + "step": 17800 + }, + { + "epoch": 0.28, + "learning_rate": 4.716682494460272e-06, + "loss": 1.4601, + "step": 17900 + }, + { + "epoch": 0.28, + "learning_rate": 4.715099715099716e-06, + "loss": 1.5007, + "step": 18000 + }, + { + "epoch": 0.29, + "learning_rate": 4.713516935739158e-06, + "loss": 1.4707, + "step": 18100 + }, + { + "epoch": 0.29, + "learning_rate": 4.711934156378601e-06, + "loss": 1.4962, + "step": 18200 + }, + { + "epoch": 0.29, + "learning_rate": 4.710351377018044e-06, + "loss": 1.4876, + "step": 18300 + }, + { + "epoch": 0.29, + "learning_rate": 4.708768597657487e-06, + "loss": 1.4863, + "step": 18400 + }, + { + "epoch": 0.29, + "learning_rate": 4.7071858182969295e-06, + "loss": 1.4886, + "step": 18500 + }, + { + "epoch": 0.29, + "learning_rate": 4.705603038936373e-06, + "loss": 1.4909, + "step": 18600 + }, + { + "epoch": 0.3, + "learning_rate": 4.704020259575816e-06, + "loss": 1.4637, + "step": 18700 + }, + { + "epoch": 0.3, + "learning_rate": 4.702437480215259e-06, + "loss": 1.4712, + "step": 18800 + }, + { + "epoch": 0.3, + "learning_rate": 4.700854700854701e-06, + "loss": 1.5079, + "step": 18900 + }, + { + "epoch": 0.3, + "learning_rate": 4.699271921494144e-06, + "loss": 1.4609, + "step": 19000 + }, + { + "epoch": 0.3, + "learning_rate": 4.697689142133587e-06, + "loss": 1.4822, + "step": 19100 + }, + { + "epoch": 0.3, + "learning_rate": 4.69610636277303e-06, + "loss": 1.4737, + "step": 19200 + }, + { + "epoch": 0.31, + "learning_rate": 4.694523583412472e-06, + "loss": 1.4596, + "step": 19300 + }, + { + "epoch": 0.31, + "learning_rate": 4.692940804051915e-06, + "loss": 1.4944, + "step": 19400 + }, + { + "epoch": 0.31, + "learning_rate": 4.691358024691358e-06, + "loss": 1.4751, + "step": 19500 + }, + { + "epoch": 0.31, + "learning_rate": 4.689775245330801e-06, + "loss": 1.4876, + "step": 19600 + }, + { + "epoch": 0.31, + "learning_rate": 4.688192465970244e-06, + "loss": 1.4693, + "step": 19700 + }, + { + "epoch": 0.31, + "learning_rate": 4.6866096866096875e-06, + "loss": 1.4817, + "step": 19800 + }, + { + "epoch": 0.31, + "learning_rate": 4.68502690724913e-06, + "loss": 1.4886, + "step": 19900 + }, + { + "epoch": 0.32, + "learning_rate": 4.683444127888573e-06, + "loss": 1.4817, + "step": 20000 + }, + { + "epoch": 0.32, + "learning_rate": 4.681861348528016e-06, + "loss": 1.5018, + "step": 20100 + }, + { + "epoch": 0.32, + "learning_rate": 4.680278569167459e-06, + "loss": 1.4991, + "step": 20200 + }, + { + "epoch": 0.32, + "learning_rate": 4.678695789806901e-06, + "loss": 1.4847, + "step": 20300 + }, + { + "epoch": 0.32, + "learning_rate": 4.677113010446344e-06, + "loss": 1.4705, + "step": 20400 + }, + { + "epoch": 0.32, + "learning_rate": 4.675530231085787e-06, + "loss": 1.4771, + "step": 20500 + }, + { + "epoch": 0.33, + "learning_rate": 4.67394745172523e-06, + "loss": 1.4849, + "step": 20600 + }, + { + "epoch": 0.33, + "learning_rate": 4.672364672364672e-06, + "loss": 1.4823, + "step": 20700 + }, + { + "epoch": 0.33, + "learning_rate": 4.670781893004115e-06, + "loss": 1.4846, + "step": 20800 + }, + { + "epoch": 0.33, + "learning_rate": 4.6691991136435585e-06, + "loss": 1.4653, + "step": 20900 + }, + { + "epoch": 0.33, + "learning_rate": 4.6676163342830015e-06, + "loss": 1.4846, + "step": 21000 + }, + { + "epoch": 0.33, + "learning_rate": 4.666033554922444e-06, + "loss": 1.4902, + "step": 21100 + }, + { + "epoch": 0.34, + "learning_rate": 4.664450775561888e-06, + "loss": 1.4528, + "step": 21200 + }, + { + "epoch": 0.34, + "learning_rate": 4.66286799620133e-06, + "loss": 1.481, + "step": 21300 + }, + { + "epoch": 0.34, + "learning_rate": 4.661285216840773e-06, + "loss": 1.4948, + "step": 21400 + }, + { + "epoch": 0.34, + "learning_rate": 4.659702437480216e-06, + "loss": 1.4664, + "step": 21500 + }, + { + "epoch": 0.34, + "learning_rate": 4.658119658119659e-06, + "loss": 1.4726, + "step": 21600 + }, + { + "epoch": 0.34, + "learning_rate": 4.656536878759101e-06, + "loss": 1.4724, + "step": 21700 + }, + { + "epoch": 0.35, + "learning_rate": 4.654954099398544e-06, + "loss": 1.4566, + "step": 21800 + }, + { + "epoch": 0.35, + "learning_rate": 4.653371320037987e-06, + "loss": 1.479, + "step": 21900 + }, + { + "epoch": 0.35, + "learning_rate": 4.65178854067743e-06, + "loss": 1.4506, + "step": 22000 + }, + { + "epoch": 0.35, + "learning_rate": 4.6502057613168725e-06, + "loss": 1.4463, + "step": 22100 + }, + { + "epoch": 0.35, + "learning_rate": 4.6486229819563155e-06, + "loss": 1.465, + "step": 22200 + }, + { + "epoch": 0.35, + "learning_rate": 4.647040202595759e-06, + "loss": 1.4401, + "step": 22300 + }, + { + "epoch": 0.35, + "learning_rate": 4.645457423235202e-06, + "loss": 1.4706, + "step": 22400 + }, + { + "epoch": 0.36, + "learning_rate": 4.643874643874644e-06, + "loss": 1.4524, + "step": 22500 + }, + { + "epoch": 0.36, + "learning_rate": 4.642291864514087e-06, + "loss": 1.5014, + "step": 22600 + }, + { + "epoch": 0.36, + "learning_rate": 4.64070908515353e-06, + "loss": 1.494, + "step": 22700 + }, + { + "epoch": 0.36, + "learning_rate": 4.639126305792973e-06, + "loss": 1.4935, + "step": 22800 + }, + { + "epoch": 0.36, + "learning_rate": 4.637543526432416e-06, + "loss": 1.4658, + "step": 22900 + }, + { + "epoch": 0.36, + "learning_rate": 4.635960747071859e-06, + "loss": 1.4634, + "step": 23000 + }, + { + "epoch": 0.37, + "learning_rate": 4.634377967711301e-06, + "loss": 1.485, + "step": 23100 + }, + { + "epoch": 0.37, + "learning_rate": 4.632795188350744e-06, + "loss": 1.4486, + "step": 23200 + }, + { + "epoch": 0.37, + "learning_rate": 4.631212408990187e-06, + "loss": 1.4421, + "step": 23300 + }, + { + "epoch": 0.37, + "learning_rate": 4.62962962962963e-06, + "loss": 1.445, + "step": 23400 + }, + { + "epoch": 0.37, + "learning_rate": 4.628046850269073e-06, + "loss": 1.4629, + "step": 23500 + }, + { + "epoch": 0.37, + "learning_rate": 4.626464070908516e-06, + "loss": 1.4684, + "step": 23600 + }, + { + "epoch": 0.38, + "learning_rate": 4.624881291547959e-06, + "loss": 1.4675, + "step": 23700 + }, + { + "epoch": 0.38, + "learning_rate": 4.623298512187401e-06, + "loss": 1.4695, + "step": 23800 + }, + { + "epoch": 0.38, + "learning_rate": 4.621715732826844e-06, + "loss": 1.4649, + "step": 23900 + }, + { + "epoch": 0.38, + "learning_rate": 4.620132953466287e-06, + "loss": 1.465, + "step": 24000 + }, + { + "epoch": 0.38, + "learning_rate": 4.61855017410573e-06, + "loss": 1.4431, + "step": 24100 + }, + { + "epoch": 0.38, + "learning_rate": 4.616967394745172e-06, + "loss": 1.4568, + "step": 24200 + }, + { + "epoch": 0.38, + "learning_rate": 4.615384615384616e-06, + "loss": 1.4792, + "step": 24300 + }, + { + "epoch": 0.39, + "learning_rate": 4.613801836024059e-06, + "loss": 1.4436, + "step": 24400 + }, + { + "epoch": 0.39, + "learning_rate": 4.612219056663501e-06, + "loss": 1.484, + "step": 24500 + }, + { + "epoch": 0.39, + "learning_rate": 4.6106362773029444e-06, + "loss": 1.4689, + "step": 24600 + }, + { + "epoch": 0.39, + "learning_rate": 4.6090534979423875e-06, + "loss": 1.4532, + "step": 24700 + }, + { + "epoch": 0.39, + "learning_rate": 4.6074707185818305e-06, + "loss": 1.4641, + "step": 24800 + }, + { + "epoch": 0.39, + "learning_rate": 4.605887939221273e-06, + "loss": 1.48, + "step": 24900 + }, + { + "epoch": 0.4, + "learning_rate": 4.604305159860716e-06, + "loss": 1.4509, + "step": 25000 + }, + { + "epoch": 0.4, + "learning_rate": 4.602722380500159e-06, + "loss": 1.4455, + "step": 25100 + }, + { + "epoch": 0.4, + "learning_rate": 4.601139601139601e-06, + "loss": 1.483, + "step": 25200 + }, + { + "epoch": 0.4, + "learning_rate": 4.599556821779044e-06, + "loss": 1.4654, + "step": 25300 + }, + { + "epoch": 0.4, + "learning_rate": 4.597974042418487e-06, + "loss": 1.4881, + "step": 25400 + }, + { + "epoch": 0.4, + "learning_rate": 4.59639126305793e-06, + "loss": 1.4636, + "step": 25500 + }, + { + "epoch": 0.41, + "learning_rate": 4.594808483697372e-06, + "loss": 1.4807, + "step": 25600 + }, + { + "epoch": 0.41, + "learning_rate": 4.5932257043368154e-06, + "loss": 1.4491, + "step": 25700 + }, + { + "epoch": 0.41, + "learning_rate": 4.5916429249762585e-06, + "loss": 1.4742, + "step": 25800 + }, + { + "epoch": 0.41, + "learning_rate": 4.5900601456157015e-06, + "loss": 1.4801, + "step": 25900 + }, + { + "epoch": 0.41, + "learning_rate": 4.588477366255145e-06, + "loss": 1.4539, + "step": 26000 + }, + { + "epoch": 0.41, + "learning_rate": 4.586894586894588e-06, + "loss": 1.463, + "step": 26100 + }, + { + "epoch": 0.41, + "learning_rate": 4.585311807534031e-06, + "loss": 1.4843, + "step": 26200 + }, + { + "epoch": 0.42, + "learning_rate": 4.583729028173473e-06, + "loss": 1.4538, + "step": 26300 + }, + { + "epoch": 0.42, + "learning_rate": 4.582146248812916e-06, + "loss": 1.4753, + "step": 26400 + }, + { + "epoch": 0.42, + "learning_rate": 4.580563469452359e-06, + "loss": 1.437, + "step": 26500 + }, + { + "epoch": 0.42, + "learning_rate": 4.578980690091801e-06, + "loss": 1.4746, + "step": 26600 + }, + { + "epoch": 0.42, + "learning_rate": 4.577397910731244e-06, + "loss": 1.4728, + "step": 26700 + }, + { + "epoch": 0.42, + "learning_rate": 4.575815131370687e-06, + "loss": 1.4861, + "step": 26800 + }, + { + "epoch": 0.43, + "learning_rate": 4.57423235201013e-06, + "loss": 1.477, + "step": 26900 + }, + { + "epoch": 0.43, + "learning_rate": 4.5726495726495725e-06, + "loss": 1.4606, + "step": 27000 + }, + { + "epoch": 0.43, + "learning_rate": 4.5710667932890156e-06, + "loss": 1.4259, + "step": 27100 + }, + { + "epoch": 0.43, + "learning_rate": 4.569484013928459e-06, + "loss": 1.4523, + "step": 27200 + }, + { + "epoch": 0.43, + "learning_rate": 4.567901234567902e-06, + "loss": 1.473, + "step": 27300 + }, + { + "epoch": 0.43, + "learning_rate": 4.566318455207345e-06, + "loss": 1.4553, + "step": 27400 + }, + { + "epoch": 0.44, + "learning_rate": 4.564735675846788e-06, + "loss": 1.4734, + "step": 27500 + }, + { + "epoch": 0.44, + "learning_rate": 4.563152896486231e-06, + "loss": 1.477, + "step": 27600 + }, + { + "epoch": 0.44, + "learning_rate": 4.561570117125673e-06, + "loss": 1.4569, + "step": 27700 + }, + { + "epoch": 0.44, + "learning_rate": 4.559987337765116e-06, + "loss": 1.4534, + "step": 27800 + }, + { + "epoch": 0.44, + "learning_rate": 4.558404558404559e-06, + "loss": 1.4799, + "step": 27900 + }, + { + "epoch": 0.44, + "learning_rate": 4.556821779044001e-06, + "loss": 1.4623, + "step": 28000 + }, + { + "epoch": 0.44, + "learning_rate": 4.555238999683444e-06, + "loss": 1.4833, + "step": 28100 + }, + { + "epoch": 0.45, + "learning_rate": 4.553656220322887e-06, + "loss": 1.4746, + "step": 28200 + }, + { + "epoch": 0.45, + "learning_rate": 4.5520734409623304e-06, + "loss": 1.4497, + "step": 28300 + }, + { + "epoch": 0.45, + "learning_rate": 4.550490661601773e-06, + "loss": 1.4657, + "step": 28400 + }, + { + "epoch": 0.45, + "learning_rate": 4.548907882241216e-06, + "loss": 1.4704, + "step": 28500 + }, + { + "epoch": 0.45, + "learning_rate": 4.547325102880659e-06, + "loss": 1.4502, + "step": 28600 + }, + { + "epoch": 0.45, + "learning_rate": 4.545742323520102e-06, + "loss": 1.4662, + "step": 28700 + }, + { + "epoch": 0.46, + "learning_rate": 4.544159544159544e-06, + "loss": 1.4481, + "step": 28800 + }, + { + "epoch": 0.46, + "learning_rate": 4.542576764798987e-06, + "loss": 1.4462, + "step": 28900 + }, + { + "epoch": 0.46, + "learning_rate": 4.54099398543843e-06, + "loss": 1.4492, + "step": 29000 + }, + { + "epoch": 0.46, + "learning_rate": 4.539411206077873e-06, + "loss": 1.471, + "step": 29100 + }, + { + "epoch": 0.46, + "learning_rate": 4.537828426717316e-06, + "loss": 1.4365, + "step": 29200 + }, + { + "epoch": 0.46, + "learning_rate": 4.536245647356759e-06, + "loss": 1.4449, + "step": 29300 + }, + { + "epoch": 0.47, + "learning_rate": 4.5346628679962014e-06, + "loss": 1.4707, + "step": 29400 + }, + { + "epoch": 0.47, + "learning_rate": 4.5330800886356445e-06, + "loss": 1.4279, + "step": 29500 + }, + { + "epoch": 0.47, + "learning_rate": 4.5314973092750875e-06, + "loss": 1.4622, + "step": 29600 + }, + { + "epoch": 0.47, + "learning_rate": 4.5299145299145306e-06, + "loss": 1.4691, + "step": 29700 + }, + { + "epoch": 0.47, + "learning_rate": 4.528331750553973e-06, + "loss": 1.436, + "step": 29800 + }, + { + "epoch": 0.47, + "learning_rate": 4.526748971193416e-06, + "loss": 1.4551, + "step": 29900 + }, + { + "epoch": 0.47, + "learning_rate": 4.525166191832859e-06, + "loss": 1.4604, + "step": 30000 + }, + { + "epoch": 0.48, + "learning_rate": 4.523583412472302e-06, + "loss": 1.4489, + "step": 30100 + }, + { + "epoch": 0.48, + "learning_rate": 4.522000633111744e-06, + "loss": 1.4366, + "step": 30200 + }, + { + "epoch": 0.48, + "learning_rate": 4.520417853751187e-06, + "loss": 1.4495, + "step": 30300 + }, + { + "epoch": 0.48, + "learning_rate": 4.51883507439063e-06, + "loss": 1.4462, + "step": 30400 + }, + { + "epoch": 0.48, + "learning_rate": 4.517252295030073e-06, + "loss": 1.4486, + "step": 30500 + }, + { + "epoch": 0.48, + "learning_rate": 4.515669515669516e-06, + "loss": 1.4479, + "step": 30600 + }, + { + "epoch": 0.49, + "learning_rate": 4.514086736308959e-06, + "loss": 1.4437, + "step": 30700 + }, + { + "epoch": 0.49, + "learning_rate": 4.5125039569484016e-06, + "loss": 1.4438, + "step": 30800 + }, + { + "epoch": 0.49, + "learning_rate": 4.510921177587845e-06, + "loss": 1.4517, + "step": 30900 + }, + { + "epoch": 0.49, + "learning_rate": 4.509338398227288e-06, + "loss": 1.4333, + "step": 31000 + }, + { + "epoch": 0.49, + "learning_rate": 4.507755618866731e-06, + "loss": 1.4399, + "step": 31100 + }, + { + "epoch": 0.49, + "learning_rate": 4.506172839506173e-06, + "loss": 1.4419, + "step": 31200 + }, + { + "epoch": 0.5, + "learning_rate": 4.504590060145616e-06, + "loss": 1.4447, + "step": 31300 + }, + { + "epoch": 0.5, + "learning_rate": 4.503007280785059e-06, + "loss": 1.4499, + "step": 31400 + }, + { + "epoch": 0.5, + "learning_rate": 4.501424501424502e-06, + "loss": 1.4598, + "step": 31500 + }, + { + "epoch": 0.5, + "learning_rate": 4.499841722063944e-06, + "loss": 1.4347, + "step": 31600 + }, + { + "epoch": 0.5, + "learning_rate": 4.498258942703387e-06, + "loss": 1.452, + "step": 31700 + }, + { + "epoch": 0.5, + "learning_rate": 4.49667616334283e-06, + "loss": 1.424, + "step": 31800 + }, + { + "epoch": 0.5, + "learning_rate": 4.495093383982273e-06, + "loss": 1.4336, + "step": 31900 + }, + { + "epoch": 0.51, + "learning_rate": 4.493510604621716e-06, + "loss": 1.4374, + "step": 32000 + }, + { + "epoch": 0.51, + "learning_rate": 4.4919278252611595e-06, + "loss": 1.4201, + "step": 32100 + }, + { + "epoch": 0.51, + "learning_rate": 4.490345045900602e-06, + "loss": 1.438, + "step": 32200 + }, + { + "epoch": 0.51, + "learning_rate": 4.488762266540045e-06, + "loss": 1.4631, + "step": 32300 + }, + { + "epoch": 0.51, + "learning_rate": 4.487179487179488e-06, + "loss": 1.4354, + "step": 32400 + }, + { + "epoch": 0.51, + "learning_rate": 4.485596707818931e-06, + "loss": 1.4279, + "step": 32500 + }, + { + "epoch": 0.52, + "learning_rate": 4.484013928458373e-06, + "loss": 1.4515, + "step": 32600 + }, + { + "epoch": 0.52, + "learning_rate": 4.482431149097816e-06, + "loss": 1.4536, + "step": 32700 + }, + { + "epoch": 0.52, + "learning_rate": 4.480848369737259e-06, + "loss": 1.4647, + "step": 32800 + }, + { + "epoch": 0.52, + "learning_rate": 4.479265590376702e-06, + "loss": 1.4503, + "step": 32900 + }, + { + "epoch": 0.52, + "learning_rate": 4.477682811016144e-06, + "loss": 1.459, + "step": 33000 + }, + { + "epoch": 0.52, + "learning_rate": 4.4761000316555874e-06, + "loss": 1.4207, + "step": 33100 + }, + { + "epoch": 0.53, + "learning_rate": 4.4745172522950305e-06, + "loss": 1.4527, + "step": 33200 + }, + { + "epoch": 0.53, + "learning_rate": 4.4729344729344735e-06, + "loss": 1.4496, + "step": 33300 + }, + { + "epoch": 0.53, + "learning_rate": 4.471351693573916e-06, + "loss": 1.4407, + "step": 33400 + }, + { + "epoch": 0.53, + "learning_rate": 4.469768914213359e-06, + "loss": 1.4301, + "step": 33500 + }, + { + "epoch": 0.53, + "learning_rate": 4.468186134852802e-06, + "loss": 1.4445, + "step": 33600 + }, + { + "epoch": 0.53, + "learning_rate": 4.466603355492245e-06, + "loss": 1.4255, + "step": 33700 + }, + { + "epoch": 0.53, + "learning_rate": 4.465020576131688e-06, + "loss": 1.4477, + "step": 33800 + }, + { + "epoch": 0.54, + "learning_rate": 4.463437796771131e-06, + "loss": 1.4566, + "step": 33900 + }, + { + "epoch": 0.54, + "learning_rate": 4.461855017410573e-06, + "loss": 1.4372, + "step": 34000 + }, + { + "epoch": 0.54, + "learning_rate": 4.460272238050016e-06, + "loss": 1.4341, + "step": 34100 + }, + { + "epoch": 0.54, + "learning_rate": 4.458689458689459e-06, + "loss": 1.4123, + "step": 34200 + }, + { + "epoch": 0.54, + "learning_rate": 4.457106679328902e-06, + "loss": 1.4265, + "step": 34300 + }, + { + "epoch": 0.54, + "learning_rate": 4.4555238999683445e-06, + "loss": 1.4165, + "step": 34400 + }, + { + "epoch": 0.55, + "learning_rate": 4.4539411206077876e-06, + "loss": 1.4402, + "step": 34500 + }, + { + "epoch": 0.55, + "learning_rate": 4.452358341247231e-06, + "loss": 1.4502, + "step": 34600 + }, + { + "epoch": 0.55, + "learning_rate": 4.450775561886674e-06, + "loss": 1.4312, + "step": 34700 + }, + { + "epoch": 0.55, + "learning_rate": 4.449192782526116e-06, + "loss": 1.434, + "step": 34800 + }, + { + "epoch": 0.55, + "learning_rate": 4.447610003165559e-06, + "loss": 1.4188, + "step": 34900 + }, + { + "epoch": 0.55, + "learning_rate": 4.446027223805002e-06, + "loss": 1.4335, + "step": 35000 + }, + { + "epoch": 0.56, + "learning_rate": 4.444444444444444e-06, + "loss": 1.4238, + "step": 35100 + }, + { + "epoch": 0.56, + "learning_rate": 4.442861665083888e-06, + "loss": 1.453, + "step": 35200 + }, + { + "epoch": 0.56, + "learning_rate": 4.441278885723331e-06, + "loss": 1.432, + "step": 35300 + }, + { + "epoch": 0.56, + "learning_rate": 4.439696106362773e-06, + "loss": 1.4386, + "step": 35400 + }, + { + "epoch": 0.56, + "learning_rate": 4.438113327002216e-06, + "loss": 1.4323, + "step": 35500 + }, + { + "epoch": 0.56, + "learning_rate": 4.436530547641659e-06, + "loss": 1.4428, + "step": 35600 + }, + { + "epoch": 0.57, + "learning_rate": 4.4349477682811024e-06, + "loss": 1.4313, + "step": 35700 + }, + { + "epoch": 0.57, + "learning_rate": 4.433364988920545e-06, + "loss": 1.4494, + "step": 35800 + }, + { + "epoch": 0.57, + "learning_rate": 4.431782209559988e-06, + "loss": 1.4385, + "step": 35900 + }, + { + "epoch": 0.57, + "learning_rate": 4.430199430199431e-06, + "loss": 1.4008, + "step": 36000 + }, + { + "epoch": 0.57, + "learning_rate": 4.428616650838874e-06, + "loss": 1.4263, + "step": 36100 + }, + { + "epoch": 0.57, + "learning_rate": 4.427033871478316e-06, + "loss": 1.4529, + "step": 36200 + }, + { + "epoch": 0.57, + "learning_rate": 4.425451092117759e-06, + "loss": 1.4358, + "step": 36300 + }, + { + "epoch": 0.58, + "learning_rate": 4.423868312757202e-06, + "loss": 1.4295, + "step": 36400 + }, + { + "epoch": 0.58, + "learning_rate": 4.422285533396644e-06, + "loss": 1.427, + "step": 36500 + }, + { + "epoch": 0.58, + "learning_rate": 4.420702754036087e-06, + "loss": 1.4718, + "step": 36600 + }, + { + "epoch": 0.58, + "learning_rate": 4.41911997467553e-06, + "loss": 1.4339, + "step": 36700 + }, + { + "epoch": 0.58, + "learning_rate": 4.417537195314973e-06, + "loss": 1.4423, + "step": 36800 + }, + { + "epoch": 0.58, + "learning_rate": 4.4159544159544165e-06, + "loss": 1.432, + "step": 36900 + }, + { + "epoch": 0.59, + "learning_rate": 4.4143716365938595e-06, + "loss": 1.4387, + "step": 37000 + }, + { + "epoch": 0.59, + "learning_rate": 4.4127888572333026e-06, + "loss": 1.4456, + "step": 37100 + }, + { + "epoch": 0.59, + "learning_rate": 4.411206077872745e-06, + "loss": 1.426, + "step": 37200 + }, + { + "epoch": 0.59, + "learning_rate": 4.409623298512188e-06, + "loss": 1.4078, + "step": 37300 + }, + { + "epoch": 0.59, + "learning_rate": 4.408040519151631e-06, + "loss": 1.4237, + "step": 37400 + }, + { + "epoch": 0.59, + "learning_rate": 4.406457739791074e-06, + "loss": 1.4276, + "step": 37500 + }, + { + "epoch": 0.6, + "learning_rate": 4.404874960430516e-06, + "loss": 1.4364, + "step": 37600 + }, + { + "epoch": 0.6, + "learning_rate": 4.403292181069959e-06, + "loss": 1.4426, + "step": 37700 + }, + { + "epoch": 0.6, + "learning_rate": 4.401709401709402e-06, + "loss": 1.4288, + "step": 37800 + }, + { + "epoch": 0.6, + "learning_rate": 4.400126622348844e-06, + "loss": 1.4281, + "step": 37900 + }, + { + "epoch": 0.6, + "learning_rate": 4.3985438429882875e-06, + "loss": 1.4293, + "step": 38000 + }, + { + "epoch": 0.6, + "learning_rate": 4.3969610636277305e-06, + "loss": 1.4186, + "step": 38100 + }, + { + "epoch": 0.6, + "learning_rate": 4.3953782842671736e-06, + "loss": 1.4555, + "step": 38200 + }, + { + "epoch": 0.61, + "learning_rate": 4.393795504906616e-06, + "loss": 1.4183, + "step": 38300 + }, + { + "epoch": 0.61, + "learning_rate": 4.39221272554606e-06, + "loss": 1.4672, + "step": 38400 + }, + { + "epoch": 0.61, + "learning_rate": 4.390629946185503e-06, + "loss": 1.448, + "step": 38500 + }, + { + "epoch": 0.61, + "learning_rate": 4.389047166824945e-06, + "loss": 1.4271, + "step": 38600 + }, + { + "epoch": 0.61, + "learning_rate": 4.387464387464388e-06, + "loss": 1.4183, + "step": 38700 + }, + { + "epoch": 0.61, + "learning_rate": 4.385881608103831e-06, + "loss": 1.4348, + "step": 38800 + }, + { + "epoch": 0.62, + "learning_rate": 4.384298828743273e-06, + "loss": 1.432, + "step": 38900 + }, + { + "epoch": 0.62, + "learning_rate": 4.382716049382716e-06, + "loss": 1.4337, + "step": 39000 + }, + { + "epoch": 0.62, + "learning_rate": 4.381133270022159e-06, + "loss": 1.4462, + "step": 39100 + }, + { + "epoch": 0.62, + "learning_rate": 4.379550490661602e-06, + "loss": 1.4178, + "step": 39200 + }, + { + "epoch": 0.62, + "learning_rate": 4.3779677113010445e-06, + "loss": 1.4556, + "step": 39300 + }, + { + "epoch": 0.62, + "learning_rate": 4.376384931940488e-06, + "loss": 1.4308, + "step": 39400 + }, + { + "epoch": 0.63, + "learning_rate": 4.374802152579931e-06, + "loss": 1.4346, + "step": 39500 + }, + { + "epoch": 0.63, + "learning_rate": 4.373219373219374e-06, + "loss": 1.4331, + "step": 39600 + }, + { + "epoch": 0.63, + "learning_rate": 4.371636593858816e-06, + "loss": 1.4558, + "step": 39700 + }, + { + "epoch": 0.63, + "learning_rate": 4.370053814498259e-06, + "loss": 1.4381, + "step": 39800 + }, + { + "epoch": 0.63, + "learning_rate": 4.368471035137703e-06, + "loss": 1.3987, + "step": 39900 + }, + { + "epoch": 0.63, + "learning_rate": 4.366888255777145e-06, + "loss": 1.4353, + "step": 40000 + }, + { + "epoch": 0.63, + "learning_rate": 4.365305476416588e-06, + "loss": 1.4133, + "step": 40100 + }, + { + "epoch": 0.64, + "learning_rate": 4.363722697056031e-06, + "loss": 1.4334, + "step": 40200 + }, + { + "epoch": 0.64, + "learning_rate": 4.362139917695473e-06, + "loss": 1.4134, + "step": 40300 + }, + { + "epoch": 0.64, + "learning_rate": 4.360557138334916e-06, + "loss": 1.4354, + "step": 40400 + }, + { + "epoch": 0.64, + "learning_rate": 4.358974358974359e-06, + "loss": 1.4146, + "step": 40500 + }, + { + "epoch": 0.64, + "learning_rate": 4.3573915796138025e-06, + "loss": 1.4488, + "step": 40600 + }, + { + "epoch": 0.64, + "learning_rate": 4.355808800253245e-06, + "loss": 1.4184, + "step": 40700 + }, + { + "epoch": 0.65, + "learning_rate": 4.354226020892688e-06, + "loss": 1.4268, + "step": 40800 + }, + { + "epoch": 0.65, + "learning_rate": 4.352643241532131e-06, + "loss": 1.4266, + "step": 40900 + }, + { + "epoch": 0.65, + "learning_rate": 4.351060462171574e-06, + "loss": 1.4584, + "step": 41000 + }, + { + "epoch": 0.65, + "learning_rate": 4.349477682811016e-06, + "loss": 1.4223, + "step": 41100 + }, + { + "epoch": 0.65, + "learning_rate": 4.347894903450459e-06, + "loss": 1.4329, + "step": 41200 + }, + { + "epoch": 0.65, + "learning_rate": 4.346312124089902e-06, + "loss": 1.4228, + "step": 41300 + }, + { + "epoch": 0.66, + "learning_rate": 4.344729344729345e-06, + "loss": 1.422, + "step": 41400 + }, + { + "epoch": 0.66, + "learning_rate": 4.343146565368788e-06, + "loss": 1.3957, + "step": 41500 + }, + { + "epoch": 0.66, + "learning_rate": 4.341563786008231e-06, + "loss": 1.4411, + "step": 41600 + }, + { + "epoch": 0.66, + "learning_rate": 4.3399810066476735e-06, + "loss": 1.4368, + "step": 41700 + }, + { + "epoch": 0.66, + "learning_rate": 4.3383982272871165e-06, + "loss": 1.417, + "step": 41800 + }, + { + "epoch": 0.66, + "learning_rate": 4.3368154479265595e-06, + "loss": 1.4198, + "step": 41900 + }, + { + "epoch": 0.66, + "learning_rate": 4.335232668566003e-06, + "loss": 1.4302, + "step": 42000 + }, + { + "epoch": 0.67, + "learning_rate": 4.333649889205445e-06, + "loss": 1.422, + "step": 42100 + }, + { + "epoch": 0.67, + "learning_rate": 4.332067109844888e-06, + "loss": 1.4339, + "step": 42200 + }, + { + "epoch": 0.67, + "learning_rate": 4.330484330484331e-06, + "loss": 1.4336, + "step": 42300 + }, + { + "epoch": 0.67, + "learning_rate": 4.328901551123774e-06, + "loss": 1.4441, + "step": 42400 + }, + { + "epoch": 0.67, + "learning_rate": 4.327318771763216e-06, + "loss": 1.429, + "step": 42500 + }, + { + "epoch": 0.67, + "learning_rate": 4.325735992402659e-06, + "loss": 1.4372, + "step": 42600 + }, + { + "epoch": 0.68, + "learning_rate": 4.324153213042102e-06, + "loss": 1.4223, + "step": 42700 + }, + { + "epoch": 0.68, + "learning_rate": 4.322570433681545e-06, + "loss": 1.4173, + "step": 42800 + }, + { + "epoch": 0.68, + "learning_rate": 4.3209876543209875e-06, + "loss": 1.3985, + "step": 42900 + }, + { + "epoch": 0.68, + "learning_rate": 4.3194048749604305e-06, + "loss": 1.4475, + "step": 43000 + }, + { + "epoch": 0.68, + "learning_rate": 4.317822095599874e-06, + "loss": 1.4295, + "step": 43100 + }, + { + "epoch": 0.68, + "learning_rate": 4.316239316239317e-06, + "loss": 1.4288, + "step": 43200 + }, + { + "epoch": 0.69, + "learning_rate": 4.31465653687876e-06, + "loss": 1.429, + "step": 43300 + }, + { + "epoch": 0.69, + "learning_rate": 4.313073757518203e-06, + "loss": 1.4169, + "step": 43400 + }, + { + "epoch": 0.69, + "learning_rate": 4.311490978157645e-06, + "loss": 1.4496, + "step": 43500 + }, + { + "epoch": 0.69, + "learning_rate": 4.309908198797088e-06, + "loss": 1.4237, + "step": 43600 + }, + { + "epoch": 0.69, + "learning_rate": 4.308325419436531e-06, + "loss": 1.4393, + "step": 43700 + }, + { + "epoch": 0.69, + "learning_rate": 4.306742640075974e-06, + "loss": 1.4146, + "step": 43800 + }, + { + "epoch": 0.69, + "learning_rate": 4.305159860715416e-06, + "loss": 1.3996, + "step": 43900 + }, + { + "epoch": 0.7, + "learning_rate": 4.303577081354859e-06, + "loss": 1.4271, + "step": 44000 + }, + { + "epoch": 0.7, + "learning_rate": 4.301994301994302e-06, + "loss": 1.4479, + "step": 44100 + }, + { + "epoch": 0.7, + "learning_rate": 4.300411522633745e-06, + "loss": 1.4312, + "step": 44200 + }, + { + "epoch": 0.7, + "learning_rate": 4.298828743273188e-06, + "loss": 1.4182, + "step": 44300 + }, + { + "epoch": 0.7, + "learning_rate": 4.297245963912631e-06, + "loss": 1.4275, + "step": 44400 + }, + { + "epoch": 0.7, + "learning_rate": 4.295663184552074e-06, + "loss": 1.3982, + "step": 44500 + }, + { + "epoch": 0.71, + "learning_rate": 4.294080405191517e-06, + "loss": 1.4644, + "step": 44600 + }, + { + "epoch": 0.71, + "learning_rate": 4.29249762583096e-06, + "loss": 1.4463, + "step": 44700 + }, + { + "epoch": 0.71, + "learning_rate": 4.290914846470403e-06, + "loss": 1.3969, + "step": 44800 + }, + { + "epoch": 0.71, + "learning_rate": 4.289332067109845e-06, + "loss": 1.3956, + "step": 44900 + }, + { + "epoch": 0.71, + "learning_rate": 4.287749287749288e-06, + "loss": 1.4116, + "step": 45000 + }, + { + "epoch": 0.71, + "learning_rate": 4.286166508388731e-06, + "loss": 1.4401, + "step": 45100 + }, + { + "epoch": 0.72, + "learning_rate": 4.284583729028174e-06, + "loss": 1.4236, + "step": 45200 + }, + { + "epoch": 0.72, + "learning_rate": 4.283000949667616e-06, + "loss": 1.4446, + "step": 45300 + }, + { + "epoch": 0.72, + "learning_rate": 4.2814181703070594e-06, + "loss": 1.4417, + "step": 45400 + }, + { + "epoch": 0.72, + "learning_rate": 4.2798353909465025e-06, + "loss": 1.4123, + "step": 45500 + }, + { + "epoch": 0.72, + "learning_rate": 4.2782526115859455e-06, + "loss": 1.3983, + "step": 45600 + }, + { + "epoch": 0.72, + "learning_rate": 4.276669832225388e-06, + "loss": 1.4268, + "step": 45700 + }, + { + "epoch": 0.72, + "learning_rate": 4.275087052864831e-06, + "loss": 1.4113, + "step": 45800 + }, + { + "epoch": 0.73, + "learning_rate": 4.273504273504274e-06, + "loss": 1.4372, + "step": 45900 + }, + { + "epoch": 0.73, + "learning_rate": 4.271921494143717e-06, + "loss": 1.4374, + "step": 46000 + }, + { + "epoch": 0.73, + "learning_rate": 4.270338714783159e-06, + "loss": 1.421, + "step": 46100 + }, + { + "epoch": 0.73, + "learning_rate": 4.268755935422603e-06, + "loss": 1.4311, + "step": 46200 + }, + { + "epoch": 0.73, + "learning_rate": 4.267173156062045e-06, + "loss": 1.4422, + "step": 46300 + }, + { + "epoch": 0.73, + "learning_rate": 4.265590376701488e-06, + "loss": 1.4392, + "step": 46400 + }, + { + "epoch": 0.74, + "learning_rate": 4.264007597340931e-06, + "loss": 1.4286, + "step": 46500 + }, + { + "epoch": 0.74, + "learning_rate": 4.262424817980374e-06, + "loss": 1.4098, + "step": 46600 + }, + { + "epoch": 0.74, + "learning_rate": 4.2608420386198165e-06, + "loss": 1.439, + "step": 46700 + }, + { + "epoch": 0.74, + "learning_rate": 4.2592592592592596e-06, + "loss": 1.4023, + "step": 46800 + }, + { + "epoch": 0.74, + "learning_rate": 4.257676479898703e-06, + "loss": 1.4369, + "step": 46900 + }, + { + "epoch": 0.74, + "learning_rate": 4.256093700538146e-06, + "loss": 1.4135, + "step": 47000 + }, + { + "epoch": 0.75, + "learning_rate": 4.254510921177588e-06, + "loss": 1.4164, + "step": 47100 + }, + { + "epoch": 0.75, + "learning_rate": 4.252928141817031e-06, + "loss": 1.4133, + "step": 47200 + }, + { + "epoch": 0.75, + "learning_rate": 4.251345362456474e-06, + "loss": 1.4166, + "step": 47300 + }, + { + "epoch": 0.75, + "learning_rate": 4.249762583095917e-06, + "loss": 1.4116, + "step": 47400 + }, + { + "epoch": 0.75, + "learning_rate": 4.248179803735359e-06, + "loss": 1.4534, + "step": 47500 + }, + { + "epoch": 0.75, + "learning_rate": 4.246597024374802e-06, + "loss": 1.4292, + "step": 47600 + }, + { + "epoch": 0.75, + "learning_rate": 4.245014245014245e-06, + "loss": 1.4159, + "step": 47700 + }, + { + "epoch": 0.76, + "learning_rate": 4.243431465653688e-06, + "loss": 1.416, + "step": 47800 + }, + { + "epoch": 0.76, + "learning_rate": 4.241848686293131e-06, + "loss": 1.4072, + "step": 47900 + }, + { + "epoch": 0.76, + "learning_rate": 4.2402659069325745e-06, + "loss": 1.3975, + "step": 48000 + }, + { + "epoch": 0.76, + "learning_rate": 4.238683127572017e-06, + "loss": 1.4115, + "step": 48100 + }, + { + "epoch": 0.76, + "learning_rate": 4.23710034821146e-06, + "loss": 1.4114, + "step": 48200 + }, + { + "epoch": 0.76, + "learning_rate": 4.235517568850903e-06, + "loss": 1.4278, + "step": 48300 + }, + { + "epoch": 0.77, + "learning_rate": 4.233934789490346e-06, + "loss": 1.4234, + "step": 48400 + }, + { + "epoch": 0.77, + "learning_rate": 4.232352010129788e-06, + "loss": 1.4219, + "step": 48500 + }, + { + "epoch": 0.77, + "learning_rate": 4.230769230769231e-06, + "loss": 1.4103, + "step": 48600 + }, + { + "epoch": 0.77, + "learning_rate": 4.229186451408674e-06, + "loss": 1.4068, + "step": 48700 + }, + { + "epoch": 0.77, + "learning_rate": 4.227603672048117e-06, + "loss": 1.4026, + "step": 48800 + }, + { + "epoch": 0.77, + "learning_rate": 4.226020892687559e-06, + "loss": 1.4273, + "step": 48900 + }, + { + "epoch": 0.78, + "learning_rate": 4.224438113327002e-06, + "loss": 1.4096, + "step": 49000 + }, + { + "epoch": 0.78, + "learning_rate": 4.2228553339664454e-06, + "loss": 1.4006, + "step": 49100 + }, + { + "epoch": 0.78, + "learning_rate": 4.221272554605888e-06, + "loss": 1.4202, + "step": 49200 + }, + { + "epoch": 0.78, + "learning_rate": 4.2196897752453315e-06, + "loss": 1.4173, + "step": 49300 + }, + { + "epoch": 0.78, + "learning_rate": 4.218106995884775e-06, + "loss": 1.4324, + "step": 49400 + }, + { + "epoch": 0.78, + "learning_rate": 4.216524216524217e-06, + "loss": 1.4135, + "step": 49500 + }, + { + "epoch": 0.79, + "learning_rate": 4.21494143716366e-06, + "loss": 1.4551, + "step": 49600 + }, + { + "epoch": 0.79, + "learning_rate": 4.213358657803103e-06, + "loss": 1.4121, + "step": 49700 + }, + { + "epoch": 0.79, + "learning_rate": 4.211775878442546e-06, + "loss": 1.4196, + "step": 49800 + }, + { + "epoch": 0.79, + "learning_rate": 4.210193099081988e-06, + "loss": 1.4014, + "step": 49900 + }, + { + "epoch": 0.79, + "learning_rate": 4.208610319721431e-06, + "loss": 1.4063, + "step": 50000 + }, + { + "epoch": 0.79, + "learning_rate": 4.207027540360874e-06, + "loss": 1.438, + "step": 50100 + }, + { + "epoch": 0.79, + "learning_rate": 4.2054447610003164e-06, + "loss": 1.4008, + "step": 50200 + }, + { + "epoch": 0.8, + "learning_rate": 4.2038619816397595e-06, + "loss": 1.4143, + "step": 50300 + }, + { + "epoch": 0.8, + "learning_rate": 4.2022792022792025e-06, + "loss": 1.4159, + "step": 50400 + }, + { + "epoch": 0.8, + "learning_rate": 4.2006964229186456e-06, + "loss": 1.4183, + "step": 50500 + }, + { + "epoch": 0.8, + "learning_rate": 4.199113643558088e-06, + "loss": 1.4222, + "step": 50600 + }, + { + "epoch": 0.8, + "learning_rate": 4.197530864197531e-06, + "loss": 1.4191, + "step": 50700 + }, + { + "epoch": 0.8, + "learning_rate": 4.195948084836974e-06, + "loss": 1.4222, + "step": 50800 + }, + { + "epoch": 0.81, + "learning_rate": 4.194365305476417e-06, + "loss": 1.4086, + "step": 50900 + }, + { + "epoch": 0.81, + "learning_rate": 4.19278252611586e-06, + "loss": 1.406, + "step": 51000 + }, + { + "epoch": 0.81, + "learning_rate": 4.191199746755303e-06, + "loss": 1.4127, + "step": 51100 + }, + { + "epoch": 0.81, + "learning_rate": 4.189616967394746e-06, + "loss": 1.4103, + "step": 51200 + }, + { + "epoch": 0.81, + "learning_rate": 4.188034188034188e-06, + "loss": 1.4243, + "step": 51300 + }, + { + "epoch": 0.81, + "learning_rate": 4.186451408673631e-06, + "loss": 1.3865, + "step": 51400 + }, + { + "epoch": 0.82, + "learning_rate": 4.184868629313074e-06, + "loss": 1.4028, + "step": 51500 + }, + { + "epoch": 0.82, + "learning_rate": 4.1832858499525166e-06, + "loss": 1.4309, + "step": 51600 + }, + { + "epoch": 0.82, + "learning_rate": 4.18170307059196e-06, + "loss": 1.4066, + "step": 51700 + }, + { + "epoch": 0.82, + "learning_rate": 4.180120291231403e-06, + "loss": 1.3914, + "step": 51800 + }, + { + "epoch": 0.82, + "learning_rate": 4.178537511870846e-06, + "loss": 1.4097, + "step": 51900 + }, + { + "epoch": 0.82, + "learning_rate": 4.176954732510288e-06, + "loss": 1.4072, + "step": 52000 + }, + { + "epoch": 0.82, + "learning_rate": 4.175371953149731e-06, + "loss": 1.4314, + "step": 52100 + }, + { + "epoch": 0.83, + "learning_rate": 4.173789173789174e-06, + "loss": 1.4212, + "step": 52200 + }, + { + "epoch": 0.83, + "learning_rate": 4.172206394428617e-06, + "loss": 1.4086, + "step": 52300 + }, + { + "epoch": 0.83, + "learning_rate": 4.17062361506806e-06, + "loss": 1.3854, + "step": 52400 + }, + { + "epoch": 0.83, + "learning_rate": 4.169040835707503e-06, + "loss": 1.3973, + "step": 52500 + }, + { + "epoch": 0.83, + "learning_rate": 4.167458056346946e-06, + "loss": 1.448, + "step": 52600 + }, + { + "epoch": 0.83, + "learning_rate": 4.165875276986388e-06, + "loss": 1.4231, + "step": 52700 + }, + { + "epoch": 0.84, + "learning_rate": 4.1642924976258314e-06, + "loss": 1.4071, + "step": 52800 + }, + { + "epoch": 0.84, + "learning_rate": 4.1627097182652745e-06, + "loss": 1.4263, + "step": 52900 + }, + { + "epoch": 0.84, + "learning_rate": 4.161126938904717e-06, + "loss": 1.4128, + "step": 53000 + }, + { + "epoch": 0.84, + "learning_rate": 4.15954415954416e-06, + "loss": 1.424, + "step": 53100 + }, + { + "epoch": 0.84, + "learning_rate": 4.157961380183603e-06, + "loss": 1.3951, + "step": 53200 + }, + { + "epoch": 0.84, + "learning_rate": 4.156378600823046e-06, + "loss": 1.4439, + "step": 53300 + }, + { + "epoch": 0.85, + "learning_rate": 4.154795821462488e-06, + "loss": 1.3815, + "step": 53400 + }, + { + "epoch": 0.85, + "learning_rate": 4.153213042101931e-06, + "loss": 1.4348, + "step": 53500 + }, + { + "epoch": 0.85, + "learning_rate": 4.151630262741374e-06, + "loss": 1.4249, + "step": 53600 + }, + { + "epoch": 0.85, + "learning_rate": 4.150047483380817e-06, + "loss": 1.4264, + "step": 53700 + }, + { + "epoch": 0.85, + "learning_rate": 4.148464704020259e-06, + "loss": 1.416, + "step": 53800 + }, + { + "epoch": 0.85, + "learning_rate": 4.1468819246597024e-06, + "loss": 1.4141, + "step": 53900 + }, + { + "epoch": 0.85, + "learning_rate": 4.145299145299146e-06, + "loss": 1.4463, + "step": 54000 + }, + { + "epoch": 0.86, + "learning_rate": 4.1437163659385885e-06, + "loss": 1.4166, + "step": 54100 + }, + { + "epoch": 0.86, + "learning_rate": 4.1421335865780316e-06, + "loss": 1.4101, + "step": 54200 + }, + { + "epoch": 0.86, + "learning_rate": 4.140550807217475e-06, + "loss": 1.4364, + "step": 54300 + }, + { + "epoch": 0.86, + "learning_rate": 4.138968027856917e-06, + "loss": 1.4172, + "step": 54400 + }, + { + "epoch": 0.86, + "learning_rate": 4.13738524849636e-06, + "loss": 1.416, + "step": 54500 + }, + { + "epoch": 0.86, + "learning_rate": 4.135802469135803e-06, + "loss": 1.4179, + "step": 54600 + }, + { + "epoch": 0.87, + "learning_rate": 4.134219689775246e-06, + "loss": 1.3969, + "step": 54700 + }, + { + "epoch": 0.87, + "learning_rate": 4.132636910414688e-06, + "loss": 1.4152, + "step": 54800 + }, + { + "epoch": 0.87, + "learning_rate": 4.131054131054131e-06, + "loss": 1.4426, + "step": 54900 + }, + { + "epoch": 0.87, + "learning_rate": 4.129471351693574e-06, + "loss": 1.4159, + "step": 55000 + }, + { + "epoch": 0.87, + "learning_rate": 4.127888572333017e-06, + "loss": 1.3969, + "step": 55100 + }, + { + "epoch": 0.87, + "learning_rate": 4.1263057929724595e-06, + "loss": 1.4, + "step": 55200 + }, + { + "epoch": 0.88, + "learning_rate": 4.1247230136119026e-06, + "loss": 1.4173, + "step": 55300 + }, + { + "epoch": 0.88, + "learning_rate": 4.123140234251346e-06, + "loss": 1.4031, + "step": 55400 + }, + { + "epoch": 0.88, + "learning_rate": 4.121557454890789e-06, + "loss": 1.417, + "step": 55500 + }, + { + "epoch": 0.88, + "learning_rate": 4.119974675530232e-06, + "loss": 1.4255, + "step": 55600 + }, + { + "epoch": 0.88, + "learning_rate": 4.118391896169675e-06, + "loss": 1.4058, + "step": 55700 + }, + { + "epoch": 0.88, + "learning_rate": 4.116809116809117e-06, + "loss": 1.4117, + "step": 55800 + }, + { + "epoch": 0.88, + "learning_rate": 4.11522633744856e-06, + "loss": 1.4221, + "step": 55900 + }, + { + "epoch": 0.89, + "learning_rate": 4.113643558088003e-06, + "loss": 1.4102, + "step": 56000 + }, + { + "epoch": 0.89, + "learning_rate": 4.112060778727446e-06, + "loss": 1.4293, + "step": 56100 + }, + { + "epoch": 0.89, + "learning_rate": 4.110477999366888e-06, + "loss": 1.4133, + "step": 56200 + }, + { + "epoch": 0.89, + "learning_rate": 4.108895220006331e-06, + "loss": 1.4377, + "step": 56300 + }, + { + "epoch": 0.89, + "learning_rate": 4.107312440645774e-06, + "loss": 1.4066, + "step": 56400 + }, + { + "epoch": 0.89, + "learning_rate": 4.1057296612852174e-06, + "loss": 1.439, + "step": 56500 + }, + { + "epoch": 0.9, + "learning_rate": 4.10414688192466e-06, + "loss": 1.4073, + "step": 56600 + }, + { + "epoch": 0.9, + "learning_rate": 4.102564102564103e-06, + "loss": 1.4286, + "step": 56700 + }, + { + "epoch": 0.9, + "learning_rate": 4.100981323203546e-06, + "loss": 1.4036, + "step": 56800 + }, + { + "epoch": 0.9, + "learning_rate": 4.099398543842989e-06, + "loss": 1.4184, + "step": 56900 + }, + { + "epoch": 0.9, + "learning_rate": 4.097815764482431e-06, + "loss": 1.3984, + "step": 57000 + }, + { + "epoch": 0.9, + "learning_rate": 4.096232985121875e-06, + "loss": 1.404, + "step": 57100 + }, + { + "epoch": 0.91, + "learning_rate": 4.094650205761317e-06, + "loss": 1.4018, + "step": 57200 + }, + { + "epoch": 0.91, + "learning_rate": 4.09306742640076e-06, + "loss": 1.3751, + "step": 57300 + }, + { + "epoch": 0.91, + "learning_rate": 4.091484647040203e-06, + "loss": 1.3923, + "step": 57400 + }, + { + "epoch": 0.91, + "learning_rate": 4.089901867679646e-06, + "loss": 1.4188, + "step": 57500 + }, + { + "epoch": 0.91, + "learning_rate": 4.088319088319088e-06, + "loss": 1.4196, + "step": 57600 + }, + { + "epoch": 0.91, + "learning_rate": 4.0867363089585315e-06, + "loss": 1.4308, + "step": 57700 + }, + { + "epoch": 0.91, + "learning_rate": 4.0851535295979745e-06, + "loss": 1.399, + "step": 57800 + }, + { + "epoch": 0.92, + "learning_rate": 4.0835707502374176e-06, + "loss": 1.3927, + "step": 57900 + }, + { + "epoch": 0.92, + "learning_rate": 4.08198797087686e-06, + "loss": 1.4348, + "step": 58000 + }, + { + "epoch": 0.92, + "learning_rate": 4.080405191516303e-06, + "loss": 1.4094, + "step": 58100 + }, + { + "epoch": 0.92, + "learning_rate": 4.078822412155746e-06, + "loss": 1.3906, + "step": 58200 + }, + { + "epoch": 0.92, + "learning_rate": 4.077239632795189e-06, + "loss": 1.4106, + "step": 58300 + }, + { + "epoch": 0.92, + "learning_rate": 4.075656853434631e-06, + "loss": 1.4167, + "step": 58400 + }, + { + "epoch": 0.93, + "learning_rate": 4.074074074074074e-06, + "loss": 1.4388, + "step": 58500 + }, + { + "epoch": 0.93, + "learning_rate": 4.072491294713517e-06, + "loss": 1.4128, + "step": 58600 + }, + { + "epoch": 0.93, + "learning_rate": 4.07090851535296e-06, + "loss": 1.393, + "step": 58700 + }, + { + "epoch": 0.93, + "learning_rate": 4.069325735992403e-06, + "loss": 1.397, + "step": 58800 + }, + { + "epoch": 0.93, + "learning_rate": 4.067742956631846e-06, + "loss": 1.4215, + "step": 58900 + }, + { + "epoch": 0.93, + "learning_rate": 4.0661601772712885e-06, + "loss": 1.4096, + "step": 59000 + }, + { + "epoch": 0.94, + "learning_rate": 4.064577397910732e-06, + "loss": 1.3946, + "step": 59100 + }, + { + "epoch": 0.94, + "learning_rate": 4.062994618550175e-06, + "loss": 1.4143, + "step": 59200 + }, + { + "epoch": 0.94, + "learning_rate": 4.061411839189618e-06, + "loss": 1.4104, + "step": 59300 + }, + { + "epoch": 0.94, + "learning_rate": 4.05982905982906e-06, + "loss": 1.3939, + "step": 59400 + }, + { + "epoch": 0.94, + "learning_rate": 4.058246280468503e-06, + "loss": 1.4336, + "step": 59500 + }, + { + "epoch": 0.94, + "learning_rate": 4.056663501107946e-06, + "loss": 1.3844, + "step": 59600 + }, + { + "epoch": 0.94, + "learning_rate": 4.055080721747389e-06, + "loss": 1.4193, + "step": 59700 + }, + { + "epoch": 0.95, + "learning_rate": 4.053497942386831e-06, + "loss": 1.356, + "step": 59800 + }, + { + "epoch": 0.95, + "learning_rate": 4.051915163026274e-06, + "loss": 1.405, + "step": 59900 + }, + { + "epoch": 0.95, + "learning_rate": 4.050332383665717e-06, + "loss": 1.3894, + "step": 60000 + }, + { + "epoch": 0.95, + "learning_rate": 4.0487496043051595e-06, + "loss": 1.421, + "step": 60100 + }, + { + "epoch": 0.95, + "learning_rate": 4.047166824944603e-06, + "loss": 1.4048, + "step": 60200 + }, + { + "epoch": 0.95, + "learning_rate": 4.0455840455840465e-06, + "loss": 1.4176, + "step": 60300 + }, + { + "epoch": 0.96, + "learning_rate": 4.044001266223489e-06, + "loss": 1.4025, + "step": 60400 + }, + { + "epoch": 0.96, + "learning_rate": 4.042418486862932e-06, + "loss": 1.412, + "step": 60500 + }, + { + "epoch": 0.96, + "learning_rate": 4.040835707502375e-06, + "loss": 1.4001, + "step": 60600 + }, + { + "epoch": 0.96, + "learning_rate": 4.039252928141818e-06, + "loss": 1.4107, + "step": 60700 + }, + { + "epoch": 0.96, + "learning_rate": 4.03767014878126e-06, + "loss": 1.4193, + "step": 60800 + }, + { + "epoch": 0.96, + "learning_rate": 4.036087369420703e-06, + "loss": 1.4219, + "step": 60900 + }, + { + "epoch": 0.97, + "learning_rate": 4.034504590060146e-06, + "loss": 1.4235, + "step": 61000 + }, + { + "epoch": 0.97, + "learning_rate": 4.032921810699589e-06, + "loss": 1.4157, + "step": 61100 + }, + { + "epoch": 0.97, + "learning_rate": 4.031339031339031e-06, + "loss": 1.3992, + "step": 61200 + }, + { + "epoch": 0.97, + "learning_rate": 4.029756251978474e-06, + "loss": 1.4021, + "step": 61300 + }, + { + "epoch": 0.97, + "learning_rate": 4.0281734726179175e-06, + "loss": 1.4146, + "step": 61400 + }, + { + "epoch": 0.97, + "learning_rate": 4.02659069325736e-06, + "loss": 1.4072, + "step": 61500 + }, + { + "epoch": 0.97, + "learning_rate": 4.025007913896803e-06, + "loss": 1.4073, + "step": 61600 + }, + { + "epoch": 0.98, + "learning_rate": 4.023425134536246e-06, + "loss": 1.3853, + "step": 61700 + }, + { + "epoch": 0.98, + "learning_rate": 4.021842355175689e-06, + "loss": 1.379, + "step": 61800 + }, + { + "epoch": 0.98, + "learning_rate": 4.020259575815132e-06, + "loss": 1.3842, + "step": 61900 + }, + { + "epoch": 0.98, + "learning_rate": 4.018676796454575e-06, + "loss": 1.4068, + "step": 62000 + }, + { + "epoch": 0.98, + "learning_rate": 4.017094017094018e-06, + "loss": 1.4011, + "step": 62100 + }, + { + "epoch": 0.98, + "learning_rate": 4.01551123773346e-06, + "loss": 1.3949, + "step": 62200 + }, + { + "epoch": 0.99, + "learning_rate": 4.013928458372903e-06, + "loss": 1.4121, + "step": 62300 + }, + { + "epoch": 0.99, + "learning_rate": 4.012345679012346e-06, + "loss": 1.4173, + "step": 62400 + }, + { + "epoch": 0.99, + "learning_rate": 4.010762899651789e-06, + "loss": 1.4121, + "step": 62500 + }, + { + "epoch": 0.99, + "learning_rate": 4.0091801202912315e-06, + "loss": 1.4009, + "step": 62600 + }, + { + "epoch": 0.99, + "learning_rate": 4.0075973409306745e-06, + "loss": 1.4124, + "step": 62700 + }, + { + "epoch": 0.99, + "learning_rate": 4.006014561570118e-06, + "loss": 1.4065, + "step": 62800 + }, + { + "epoch": 1.0, + "learning_rate": 4.00443178220956e-06, + "loss": 1.3818, + "step": 62900 + }, + { + "epoch": 1.0, + "learning_rate": 4.002849002849003e-06, + "loss": 1.3962, + "step": 63000 + }, + { + "epoch": 1.0, + "learning_rate": 4.001266223488446e-06, + "loss": 1.3835, + "step": 63100 + }, + { + "epoch": 1.0, + "learning_rate": 3.999683444127889e-06, + "loss": 1.3742, + "step": 63200 + }, + { + "epoch": 1.0, + "learning_rate": 3.998100664767331e-06, + "loss": 1.3604, + "step": 63300 + }, + { + "epoch": 1.0, + "learning_rate": 3.996517885406775e-06, + "loss": 1.3773, + "step": 63400 + }, + { + "epoch": 1.01, + "learning_rate": 3.994935106046218e-06, + "loss": 1.3636, + "step": 63500 + }, + { + "epoch": 1.01, + "learning_rate": 3.99335232668566e-06, + "loss": 1.3522, + "step": 63600 + }, + { + "epoch": 1.01, + "learning_rate": 3.991769547325103e-06, + "loss": 1.3363, + "step": 63700 + }, + { + "epoch": 1.01, + "learning_rate": 3.990186767964546e-06, + "loss": 1.361, + "step": 63800 + }, + { + "epoch": 1.01, + "learning_rate": 3.9886039886039894e-06, + "loss": 1.3685, + "step": 63900 + }, + { + "epoch": 1.01, + "learning_rate": 3.987021209243432e-06, + "loss": 1.3688, + "step": 64000 + }, + { + "epoch": 1.01, + "learning_rate": 3.985438429882875e-06, + "loss": 1.384, + "step": 64100 + }, + { + "epoch": 1.02, + "learning_rate": 3.983855650522318e-06, + "loss": 1.3353, + "step": 64200 + }, + { + "epoch": 1.02, + "learning_rate": 3.98227287116176e-06, + "loss": 1.3898, + "step": 64300 + }, + { + "epoch": 1.02, + "learning_rate": 3.980690091801203e-06, + "loss": 1.3438, + "step": 64400 + }, + { + "epoch": 1.02, + "learning_rate": 3.979107312440646e-06, + "loss": 1.3367, + "step": 64500 + }, + { + "epoch": 1.02, + "learning_rate": 3.977524533080089e-06, + "loss": 1.3665, + "step": 64600 + }, + { + "epoch": 1.02, + "learning_rate": 3.975941753719531e-06, + "loss": 1.353, + "step": 64700 + }, + { + "epoch": 1.03, + "learning_rate": 3.974358974358974e-06, + "loss": 1.3453, + "step": 64800 + }, + { + "epoch": 1.03, + "learning_rate": 3.972776194998417e-06, + "loss": 1.3369, + "step": 64900 + }, + { + "epoch": 1.03, + "learning_rate": 3.97119341563786e-06, + "loss": 1.348, + "step": 65000 + }, + { + "epoch": 1.03, + "learning_rate": 3.9696106362773035e-06, + "loss": 1.3639, + "step": 65100 + }, + { + "epoch": 1.03, + "learning_rate": 3.9680278569167465e-06, + "loss": 1.3676, + "step": 65200 + }, + { + "epoch": 1.03, + "learning_rate": 3.966445077556189e-06, + "loss": 1.3446, + "step": 65300 + }, + { + "epoch": 1.04, + "learning_rate": 3.964862298195632e-06, + "loss": 1.3595, + "step": 65400 + }, + { + "epoch": 1.04, + "learning_rate": 3.963279518835075e-06, + "loss": 1.3416, + "step": 65500 + }, + { + "epoch": 1.04, + "learning_rate": 3.961696739474518e-06, + "loss": 1.352, + "step": 65600 + }, + { + "epoch": 1.04, + "learning_rate": 3.96011396011396e-06, + "loss": 1.3585, + "step": 65700 + }, + { + "epoch": 1.04, + "learning_rate": 3.958531180753403e-06, + "loss": 1.3617, + "step": 65800 + }, + { + "epoch": 1.04, + "learning_rate": 3.956948401392846e-06, + "loss": 1.3404, + "step": 65900 + }, + { + "epoch": 1.04, + "learning_rate": 3.955365622032289e-06, + "loss": 1.3627, + "step": 66000 + }, + { + "epoch": 1.05, + "learning_rate": 3.953782842671731e-06, + "loss": 1.354, + "step": 66100 + }, + { + "epoch": 1.05, + "learning_rate": 3.9522000633111744e-06, + "loss": 1.3701, + "step": 66200 + }, + { + "epoch": 1.05, + "learning_rate": 3.9506172839506175e-06, + "loss": 1.3561, + "step": 66300 + }, + { + "epoch": 1.05, + "learning_rate": 3.9490345045900605e-06, + "loss": 1.3754, + "step": 66400 + }, + { + "epoch": 1.05, + "learning_rate": 3.947451725229504e-06, + "loss": 1.3656, + "step": 66500 + }, + { + "epoch": 1.05, + "learning_rate": 3.945868945868947e-06, + "loss": 1.3586, + "step": 66600 + }, + { + "epoch": 1.06, + "learning_rate": 3.944286166508389e-06, + "loss": 1.3348, + "step": 66700 + }, + { + "epoch": 1.06, + "learning_rate": 3.942703387147832e-06, + "loss": 1.3549, + "step": 66800 + }, + { + "epoch": 1.06, + "learning_rate": 3.941120607787275e-06, + "loss": 1.3488, + "step": 66900 + }, + { + "epoch": 1.06, + "learning_rate": 3.939537828426718e-06, + "loss": 1.3692, + "step": 67000 + }, + { + "epoch": 1.06, + "learning_rate": 3.93795504906616e-06, + "loss": 1.3401, + "step": 67100 + }, + { + "epoch": 1.06, + "learning_rate": 3.936372269705603e-06, + "loss": 1.3545, + "step": 67200 + }, + { + "epoch": 1.07, + "learning_rate": 3.934789490345046e-06, + "loss": 1.347, + "step": 67300 + }, + { + "epoch": 1.07, + "learning_rate": 3.933206710984489e-06, + "loss": 1.3796, + "step": 67400 + }, + { + "epoch": 1.07, + "learning_rate": 3.9316239316239315e-06, + "loss": 1.3656, + "step": 67500 + }, + { + "epoch": 1.07, + "learning_rate": 3.9300411522633746e-06, + "loss": 1.3286, + "step": 67600 + }, + { + "epoch": 1.07, + "learning_rate": 3.928458372902818e-06, + "loss": 1.3433, + "step": 67700 + }, + { + "epoch": 1.07, + "learning_rate": 3.926875593542261e-06, + "loss": 1.3645, + "step": 67800 + }, + { + "epoch": 1.07, + "learning_rate": 3.925292814181703e-06, + "loss": 1.3678, + "step": 67900 + }, + { + "epoch": 1.08, + "learning_rate": 3.923710034821146e-06, + "loss": 1.3597, + "step": 68000 + }, + { + "epoch": 1.08, + "learning_rate": 3.922127255460589e-06, + "loss": 1.3543, + "step": 68100 + }, + { + "epoch": 1.08, + "learning_rate": 3.920544476100032e-06, + "loss": 1.3634, + "step": 68200 + }, + { + "epoch": 1.08, + "learning_rate": 3.918961696739475e-06, + "loss": 1.343, + "step": 68300 + }, + { + "epoch": 1.08, + "learning_rate": 3.917378917378918e-06, + "loss": 1.3481, + "step": 68400 + }, + { + "epoch": 1.08, + "learning_rate": 3.91579613801836e-06, + "loss": 1.348, + "step": 68500 + }, + { + "epoch": 1.09, + "learning_rate": 3.914213358657803e-06, + "loss": 1.356, + "step": 68600 + }, + { + "epoch": 1.09, + "learning_rate": 3.912630579297246e-06, + "loss": 1.3819, + "step": 68700 + }, + { + "epoch": 1.09, + "learning_rate": 3.9110477999366895e-06, + "loss": 1.3476, + "step": 68800 + }, + { + "epoch": 1.09, + "learning_rate": 3.909465020576132e-06, + "loss": 1.3359, + "step": 68900 + }, + { + "epoch": 1.09, + "learning_rate": 3.907882241215575e-06, + "loss": 1.3606, + "step": 69000 + }, + { + "epoch": 1.09, + "learning_rate": 3.906299461855018e-06, + "loss": 1.3676, + "step": 69100 + }, + { + "epoch": 1.1, + "learning_rate": 3.904716682494461e-06, + "loss": 1.3724, + "step": 69200 + }, + { + "epoch": 1.1, + "learning_rate": 3.903133903133903e-06, + "loss": 1.3654, + "step": 69300 + }, + { + "epoch": 1.1, + "learning_rate": 3.901551123773346e-06, + "loss": 1.3772, + "step": 69400 + }, + { + "epoch": 1.1, + "learning_rate": 3.899968344412789e-06, + "loss": 1.3806, + "step": 69500 + }, + { + "epoch": 1.1, + "learning_rate": 3.898385565052232e-06, + "loss": 1.3633, + "step": 69600 + }, + { + "epoch": 1.1, + "learning_rate": 3.896802785691675e-06, + "loss": 1.3594, + "step": 69700 + }, + { + "epoch": 1.1, + "learning_rate": 3.895220006331118e-06, + "loss": 1.3504, + "step": 69800 + }, + { + "epoch": 1.11, + "learning_rate": 3.8936372269705604e-06, + "loss": 1.3525, + "step": 69900 + }, + { + "epoch": 1.11, + "learning_rate": 3.8920544476100035e-06, + "loss": 1.3608, + "step": 70000 + }, + { + "epoch": 1.11, + "learning_rate": 3.8904716682494465e-06, + "loss": 1.348, + "step": 70100 + }, + { + "epoch": 1.11, + "learning_rate": 3.88888888888889e-06, + "loss": 1.3252, + "step": 70200 + }, + { + "epoch": 1.11, + "learning_rate": 3.887306109528332e-06, + "loss": 1.3624, + "step": 70300 + }, + { + "epoch": 1.11, + "learning_rate": 3.885723330167775e-06, + "loss": 1.348, + "step": 70400 + }, + { + "epoch": 1.12, + "learning_rate": 3.884140550807218e-06, + "loss": 1.3765, + "step": 70500 + }, + { + "epoch": 1.12, + "learning_rate": 3.882557771446661e-06, + "loss": 1.3879, + "step": 70600 + }, + { + "epoch": 1.12, + "learning_rate": 3.880974992086103e-06, + "loss": 1.3765, + "step": 70700 + }, + { + "epoch": 1.12, + "learning_rate": 3.879392212725546e-06, + "loss": 1.3685, + "step": 70800 + }, + { + "epoch": 1.12, + "learning_rate": 3.877809433364989e-06, + "loss": 1.37, + "step": 70900 + }, + { + "epoch": 1.12, + "learning_rate": 3.876226654004432e-06, + "loss": 1.3613, + "step": 71000 + }, + { + "epoch": 1.13, + "learning_rate": 3.8746438746438745e-06, + "loss": 1.3731, + "step": 71100 + }, + { + "epoch": 1.13, + "learning_rate": 3.873061095283318e-06, + "loss": 1.3673, + "step": 71200 + }, + { + "epoch": 1.13, + "learning_rate": 3.8714783159227606e-06, + "loss": 1.3548, + "step": 71300 + }, + { + "epoch": 1.13, + "learning_rate": 3.869895536562204e-06, + "loss": 1.344, + "step": 71400 + }, + { + "epoch": 1.13, + "learning_rate": 3.868312757201647e-06, + "loss": 1.3536, + "step": 71500 + }, + { + "epoch": 1.13, + "learning_rate": 3.86672997784109e-06, + "loss": 1.3772, + "step": 71600 + }, + { + "epoch": 1.13, + "learning_rate": 3.865147198480532e-06, + "loss": 1.3332, + "step": 71700 + }, + { + "epoch": 1.14, + "learning_rate": 3.863564419119975e-06, + "loss": 1.3838, + "step": 71800 + }, + { + "epoch": 1.14, + "learning_rate": 3.861981639759418e-06, + "loss": 1.3746, + "step": 71900 + }, + { + "epoch": 1.14, + "learning_rate": 3.860398860398861e-06, + "loss": 1.3498, + "step": 72000 + }, + { + "epoch": 1.14, + "learning_rate": 3.858816081038303e-06, + "loss": 1.3559, + "step": 72100 + }, + { + "epoch": 1.14, + "learning_rate": 3.857233301677746e-06, + "loss": 1.3602, + "step": 72200 + }, + { + "epoch": 1.14, + "learning_rate": 3.855650522317189e-06, + "loss": 1.3448, + "step": 72300 + }, + { + "epoch": 1.15, + "learning_rate": 3.854067742956632e-06, + "loss": 1.3547, + "step": 72400 + }, + { + "epoch": 1.15, + "learning_rate": 3.852484963596075e-06, + "loss": 1.3709, + "step": 72500 + }, + { + "epoch": 1.15, + "learning_rate": 3.850902184235518e-06, + "loss": 1.364, + "step": 72600 + }, + { + "epoch": 1.15, + "learning_rate": 3.849319404874961e-06, + "loss": 1.3403, + "step": 72700 + }, + { + "epoch": 1.15, + "learning_rate": 3.847736625514404e-06, + "loss": 1.3509, + "step": 72800 + }, + { + "epoch": 1.15, + "learning_rate": 3.846153846153847e-06, + "loss": 1.3617, + "step": 72900 + }, + { + "epoch": 1.16, + "learning_rate": 3.84457106679329e-06, + "loss": 1.3364, + "step": 73000 + }, + { + "epoch": 1.16, + "learning_rate": 3.842988287432732e-06, + "loss": 1.3216, + "step": 73100 + }, + { + "epoch": 1.16, + "learning_rate": 3.841405508072175e-06, + "loss": 1.3694, + "step": 73200 + }, + { + "epoch": 1.16, + "learning_rate": 3.839822728711618e-06, + "loss": 1.3433, + "step": 73300 + }, + { + "epoch": 1.16, + "learning_rate": 3.838239949351061e-06, + "loss": 1.3569, + "step": 73400 + }, + { + "epoch": 1.16, + "learning_rate": 3.836657169990503e-06, + "loss": 1.3632, + "step": 73500 + }, + { + "epoch": 1.16, + "learning_rate": 3.8350743906299464e-06, + "loss": 1.3487, + "step": 73600 + }, + { + "epoch": 1.17, + "learning_rate": 3.8334916112693895e-06, + "loss": 1.3595, + "step": 73700 + }, + { + "epoch": 1.17, + "learning_rate": 3.8319088319088325e-06, + "loss": 1.3269, + "step": 73800 + }, + { + "epoch": 1.17, + "learning_rate": 3.830326052548275e-06, + "loss": 1.3452, + "step": 73900 + }, + { + "epoch": 1.17, + "learning_rate": 3.828743273187718e-06, + "loss": 1.3676, + "step": 74000 + }, + { + "epoch": 1.17, + "learning_rate": 3.827160493827161e-06, + "loss": 1.3345, + "step": 74100 + }, + { + "epoch": 1.17, + "learning_rate": 3.825577714466603e-06, + "loss": 1.3674, + "step": 74200 + }, + { + "epoch": 1.18, + "learning_rate": 3.823994935106047e-06, + "loss": 1.3462, + "step": 74300 + }, + { + "epoch": 1.18, + "learning_rate": 3.82241215574549e-06, + "loss": 1.3423, + "step": 74400 + }, + { + "epoch": 1.18, + "learning_rate": 3.820829376384932e-06, + "loss": 1.3688, + "step": 74500 + }, + { + "epoch": 1.18, + "learning_rate": 3.819246597024375e-06, + "loss": 1.3577, + "step": 74600 + }, + { + "epoch": 1.18, + "learning_rate": 3.817663817663818e-06, + "loss": 1.3475, + "step": 74700 + }, + { + "epoch": 1.18, + "learning_rate": 3.816081038303261e-06, + "loss": 1.3605, + "step": 74800 + }, + { + "epoch": 1.19, + "learning_rate": 3.814498258942704e-06, + "loss": 1.3702, + "step": 74900 + }, + { + "epoch": 1.19, + "learning_rate": 3.8129154795821466e-06, + "loss": 1.3731, + "step": 75000 + }, + { + "epoch": 1.19, + "learning_rate": 3.8113327002215896e-06, + "loss": 1.364, + "step": 75100 + }, + { + "epoch": 1.19, + "learning_rate": 3.8097499208610322e-06, + "loss": 1.3373, + "step": 75200 + }, + { + "epoch": 1.19, + "learning_rate": 3.808167141500475e-06, + "loss": 1.356, + "step": 75300 + }, + { + "epoch": 1.19, + "learning_rate": 3.806584362139918e-06, + "loss": 1.3346, + "step": 75400 + }, + { + "epoch": 1.19, + "learning_rate": 3.8050015827793605e-06, + "loss": 1.3646, + "step": 75500 + }, + { + "epoch": 1.2, + "learning_rate": 3.8034188034188036e-06, + "loss": 1.353, + "step": 75600 + }, + { + "epoch": 1.2, + "learning_rate": 3.801836024058246e-06, + "loss": 1.3246, + "step": 75700 + }, + { + "epoch": 1.2, + "learning_rate": 3.8002532446976893e-06, + "loss": 1.3659, + "step": 75800 + }, + { + "epoch": 1.2, + "learning_rate": 3.7986704653371327e-06, + "loss": 1.3665, + "step": 75900 + }, + { + "epoch": 1.2, + "learning_rate": 3.7970876859765753e-06, + "loss": 1.3685, + "step": 76000 + }, + { + "epoch": 1.2, + "learning_rate": 3.7955049066160184e-06, + "loss": 1.344, + "step": 76100 + }, + { + "epoch": 1.21, + "learning_rate": 3.793922127255461e-06, + "loss": 1.3183, + "step": 76200 + }, + { + "epoch": 1.21, + "learning_rate": 3.792339347894904e-06, + "loss": 1.3421, + "step": 76300 + }, + { + "epoch": 1.21, + "learning_rate": 3.7907565685343467e-06, + "loss": 1.3504, + "step": 76400 + }, + { + "epoch": 1.21, + "learning_rate": 3.7891737891737893e-06, + "loss": 1.3559, + "step": 76500 + }, + { + "epoch": 1.21, + "learning_rate": 3.7875910098132324e-06, + "loss": 1.3221, + "step": 76600 + }, + { + "epoch": 1.21, + "learning_rate": 3.786008230452675e-06, + "loss": 1.3545, + "step": 76700 + }, + { + "epoch": 1.22, + "learning_rate": 3.784425451092118e-06, + "loss": 1.3611, + "step": 76800 + }, + { + "epoch": 1.22, + "learning_rate": 3.7828426717315607e-06, + "loss": 1.3621, + "step": 76900 + }, + { + "epoch": 1.22, + "learning_rate": 3.7812598923710037e-06, + "loss": 1.354, + "step": 77000 + }, + { + "epoch": 1.22, + "learning_rate": 3.7796771130104463e-06, + "loss": 1.3293, + "step": 77100 + }, + { + "epoch": 1.22, + "learning_rate": 3.7780943336498894e-06, + "loss": 1.3496, + "step": 77200 + }, + { + "epoch": 1.22, + "learning_rate": 3.776511554289332e-06, + "loss": 1.3431, + "step": 77300 + }, + { + "epoch": 1.23, + "learning_rate": 3.774928774928775e-06, + "loss": 1.3338, + "step": 77400 + }, + { + "epoch": 1.23, + "learning_rate": 3.7733459955682185e-06, + "loss": 1.3329, + "step": 77500 + }, + { + "epoch": 1.23, + "learning_rate": 3.771763216207661e-06, + "loss": 1.3387, + "step": 77600 + }, + { + "epoch": 1.23, + "learning_rate": 3.770180436847104e-06, + "loss": 1.3766, + "step": 77700 + }, + { + "epoch": 1.23, + "learning_rate": 3.768597657486547e-06, + "loss": 1.3163, + "step": 77800 + }, + { + "epoch": 1.23, + "learning_rate": 3.7670148781259894e-06, + "loss": 1.3449, + "step": 77900 + }, + { + "epoch": 1.23, + "learning_rate": 3.7654320987654325e-06, + "loss": 1.3351, + "step": 78000 + }, + { + "epoch": 1.24, + "learning_rate": 3.763849319404875e-06, + "loss": 1.3551, + "step": 78100 + }, + { + "epoch": 1.24, + "learning_rate": 3.762266540044318e-06, + "loss": 1.3391, + "step": 78200 + }, + { + "epoch": 1.24, + "learning_rate": 3.760683760683761e-06, + "loss": 1.3542, + "step": 78300 + }, + { + "epoch": 1.24, + "learning_rate": 3.759100981323204e-06, + "loss": 1.332, + "step": 78400 + }, + { + "epoch": 1.24, + "learning_rate": 3.7575182019626465e-06, + "loss": 1.3421, + "step": 78500 + }, + { + "epoch": 1.24, + "learning_rate": 3.7559354226020895e-06, + "loss": 1.3687, + "step": 78600 + }, + { + "epoch": 1.25, + "learning_rate": 3.754352643241532e-06, + "loss": 1.3348, + "step": 78700 + }, + { + "epoch": 1.25, + "learning_rate": 3.752769863880975e-06, + "loss": 1.3163, + "step": 78800 + }, + { + "epoch": 1.25, + "learning_rate": 3.751187084520418e-06, + "loss": 1.3787, + "step": 78900 + }, + { + "epoch": 1.25, + "learning_rate": 3.7496043051598613e-06, + "loss": 1.3313, + "step": 79000 + }, + { + "epoch": 1.25, + "learning_rate": 3.7480215257993043e-06, + "loss": 1.3514, + "step": 79100 + }, + { + "epoch": 1.25, + "learning_rate": 3.746438746438747e-06, + "loss": 1.3543, + "step": 79200 + }, + { + "epoch": 1.26, + "learning_rate": 3.7448559670781896e-06, + "loss": 1.3407, + "step": 79300 + }, + { + "epoch": 1.26, + "learning_rate": 3.7432731877176326e-06, + "loss": 1.3521, + "step": 79400 + }, + { + "epoch": 1.26, + "learning_rate": 3.7416904083570752e-06, + "loss": 1.3531, + "step": 79500 + }, + { + "epoch": 1.26, + "learning_rate": 3.7401076289965183e-06, + "loss": 1.3366, + "step": 79600 + }, + { + "epoch": 1.26, + "learning_rate": 3.738524849635961e-06, + "loss": 1.3766, + "step": 79700 + }, + { + "epoch": 1.26, + "learning_rate": 3.736942070275404e-06, + "loss": 1.3361, + "step": 79800 + }, + { + "epoch": 1.26, + "learning_rate": 3.7353592909148466e-06, + "loss": 1.353, + "step": 79900 + }, + { + "epoch": 1.27, + "learning_rate": 3.7337765115542896e-06, + "loss": 1.3788, + "step": 80000 + }, + { + "epoch": 1.27, + "learning_rate": 3.7321937321937323e-06, + "loss": 1.3579, + "step": 80100 + }, + { + "epoch": 1.27, + "learning_rate": 3.7306109528331753e-06, + "loss": 1.3516, + "step": 80200 + }, + { + "epoch": 1.27, + "learning_rate": 3.729028173472618e-06, + "loss": 1.3363, + "step": 80300 + }, + { + "epoch": 1.27, + "learning_rate": 3.727445394112061e-06, + "loss": 1.3517, + "step": 80400 + }, + { + "epoch": 1.27, + "learning_rate": 3.7258626147515036e-06, + "loss": 1.3608, + "step": 80500 + }, + { + "epoch": 1.28, + "learning_rate": 3.724279835390947e-06, + "loss": 1.3653, + "step": 80600 + }, + { + "epoch": 1.28, + "learning_rate": 3.7226970560303897e-06, + "loss": 1.3336, + "step": 80700 + }, + { + "epoch": 1.28, + "learning_rate": 3.7211142766698328e-06, + "loss": 1.3465, + "step": 80800 + }, + { + "epoch": 1.28, + "learning_rate": 3.7195314973092754e-06, + "loss": 1.3553, + "step": 80900 + }, + { + "epoch": 1.28, + "learning_rate": 3.7179487179487184e-06, + "loss": 1.369, + "step": 81000 + }, + { + "epoch": 1.28, + "learning_rate": 3.716365938588161e-06, + "loss": 1.3499, + "step": 81100 + }, + { + "epoch": 1.29, + "learning_rate": 3.714783159227604e-06, + "loss": 1.3304, + "step": 81200 + }, + { + "epoch": 1.29, + "learning_rate": 3.7132003798670467e-06, + "loss": 1.3423, + "step": 81300 + }, + { + "epoch": 1.29, + "learning_rate": 3.7116176005064898e-06, + "loss": 1.3477, + "step": 81400 + }, + { + "epoch": 1.29, + "learning_rate": 3.7100348211459324e-06, + "loss": 1.3555, + "step": 81500 + }, + { + "epoch": 1.29, + "learning_rate": 3.7084520417853754e-06, + "loss": 1.3383, + "step": 81600 + }, + { + "epoch": 1.29, + "learning_rate": 3.706869262424818e-06, + "loss": 1.337, + "step": 81700 + }, + { + "epoch": 1.29, + "learning_rate": 3.705286483064261e-06, + "loss": 1.3622, + "step": 81800 + }, + { + "epoch": 1.3, + "learning_rate": 3.7037037037037037e-06, + "loss": 1.3406, + "step": 81900 + }, + { + "epoch": 1.3, + "learning_rate": 3.7021209243431468e-06, + "loss": 1.3649, + "step": 82000 + }, + { + "epoch": 1.3, + "learning_rate": 3.7005381449825894e-06, + "loss": 1.3329, + "step": 82100 + }, + { + "epoch": 1.3, + "learning_rate": 3.698955365622033e-06, + "loss": 1.3278, + "step": 82200 + }, + { + "epoch": 1.3, + "learning_rate": 3.6973725862614755e-06, + "loss": 1.3509, + "step": 82300 + }, + { + "epoch": 1.3, + "learning_rate": 3.6957898069009186e-06, + "loss": 1.3532, + "step": 82400 + }, + { + "epoch": 1.31, + "learning_rate": 3.694207027540361e-06, + "loss": 1.3628, + "step": 82500 + }, + { + "epoch": 1.31, + "learning_rate": 3.6926242481798042e-06, + "loss": 1.3405, + "step": 82600 + }, + { + "epoch": 1.31, + "learning_rate": 3.691041468819247e-06, + "loss": 1.3567, + "step": 82700 + }, + { + "epoch": 1.31, + "learning_rate": 3.68945868945869e-06, + "loss": 1.3371, + "step": 82800 + }, + { + "epoch": 1.31, + "learning_rate": 3.6878759100981325e-06, + "loss": 1.3617, + "step": 82900 + }, + { + "epoch": 1.31, + "learning_rate": 3.6862931307375756e-06, + "loss": 1.3455, + "step": 83000 + }, + { + "epoch": 1.32, + "learning_rate": 3.684710351377018e-06, + "loss": 1.3153, + "step": 83100 + }, + { + "epoch": 1.32, + "learning_rate": 3.6831275720164612e-06, + "loss": 1.3322, + "step": 83200 + }, + { + "epoch": 1.32, + "learning_rate": 3.681544792655904e-06, + "loss": 1.3395, + "step": 83300 + }, + { + "epoch": 1.32, + "learning_rate": 3.679962013295347e-06, + "loss": 1.3499, + "step": 83400 + }, + { + "epoch": 1.32, + "learning_rate": 3.6783792339347895e-06, + "loss": 1.3425, + "step": 83500 + }, + { + "epoch": 1.32, + "learning_rate": 3.6767964545742326e-06, + "loss": 1.3486, + "step": 83600 + }, + { + "epoch": 1.32, + "learning_rate": 3.6752136752136756e-06, + "loss": 1.3436, + "step": 83700 + }, + { + "epoch": 1.33, + "learning_rate": 3.6736308958531187e-06, + "loss": 1.3287, + "step": 83800 + }, + { + "epoch": 1.33, + "learning_rate": 3.6720481164925613e-06, + "loss": 1.3592, + "step": 83900 + }, + { + "epoch": 1.33, + "learning_rate": 3.6704653371320044e-06, + "loss": 1.384, + "step": 84000 + }, + { + "epoch": 1.33, + "learning_rate": 3.668882557771447e-06, + "loss": 1.3429, + "step": 84100 + }, + { + "epoch": 1.33, + "learning_rate": 3.66729977841089e-06, + "loss": 1.3577, + "step": 84200 + }, + { + "epoch": 1.33, + "learning_rate": 3.6657169990503327e-06, + "loss": 1.335, + "step": 84300 + }, + { + "epoch": 1.34, + "learning_rate": 3.6641342196897757e-06, + "loss": 1.351, + "step": 84400 + }, + { + "epoch": 1.34, + "learning_rate": 3.6625514403292183e-06, + "loss": 1.3479, + "step": 84500 + }, + { + "epoch": 1.34, + "learning_rate": 3.6609686609686614e-06, + "loss": 1.3309, + "step": 84600 + }, + { + "epoch": 1.34, + "learning_rate": 3.659385881608104e-06, + "loss": 1.3299, + "step": 84700 + }, + { + "epoch": 1.34, + "learning_rate": 3.657803102247547e-06, + "loss": 1.341, + "step": 84800 + }, + { + "epoch": 1.34, + "learning_rate": 3.6562203228869897e-06, + "loss": 1.3577, + "step": 84900 + }, + { + "epoch": 1.35, + "learning_rate": 3.6546375435264327e-06, + "loss": 1.3114, + "step": 85000 + }, + { + "epoch": 1.35, + "learning_rate": 3.6530547641658753e-06, + "loss": 1.3533, + "step": 85100 + }, + { + "epoch": 1.35, + "learning_rate": 3.651471984805318e-06, + "loss": 1.3538, + "step": 85200 + }, + { + "epoch": 1.35, + "learning_rate": 3.6498892054447614e-06, + "loss": 1.3231, + "step": 85300 + }, + { + "epoch": 1.35, + "learning_rate": 3.6483064260842045e-06, + "loss": 1.3331, + "step": 85400 + }, + { + "epoch": 1.35, + "learning_rate": 3.646723646723647e-06, + "loss": 1.3476, + "step": 85500 + }, + { + "epoch": 1.35, + "learning_rate": 3.64514086736309e-06, + "loss": 1.3395, + "step": 85600 + }, + { + "epoch": 1.36, + "learning_rate": 3.6435580880025328e-06, + "loss": 1.3632, + "step": 85700 + }, + { + "epoch": 1.36, + "learning_rate": 3.641975308641976e-06, + "loss": 1.3583, + "step": 85800 + }, + { + "epoch": 1.36, + "learning_rate": 3.6403925292814185e-06, + "loss": 1.3493, + "step": 85900 + }, + { + "epoch": 1.36, + "learning_rate": 3.6388097499208615e-06, + "loss": 1.3395, + "step": 86000 + }, + { + "epoch": 1.36, + "learning_rate": 3.637226970560304e-06, + "loss": 1.3199, + "step": 86100 + }, + { + "epoch": 1.36, + "learning_rate": 3.635644191199747e-06, + "loss": 1.3621, + "step": 86200 + }, + { + "epoch": 1.37, + "learning_rate": 3.63406141183919e-06, + "loss": 1.3265, + "step": 86300 + }, + { + "epoch": 1.37, + "learning_rate": 3.632478632478633e-06, + "loss": 1.3263, + "step": 86400 + }, + { + "epoch": 1.37, + "learning_rate": 3.6308958531180755e-06, + "loss": 1.3395, + "step": 86500 + }, + { + "epoch": 1.37, + "learning_rate": 3.629313073757518e-06, + "loss": 1.3416, + "step": 86600 + }, + { + "epoch": 1.37, + "learning_rate": 3.627730294396961e-06, + "loss": 1.3286, + "step": 86700 + }, + { + "epoch": 1.37, + "learning_rate": 3.6261475150364046e-06, + "loss": 1.3738, + "step": 86800 + }, + { + "epoch": 1.38, + "learning_rate": 3.6245647356758472e-06, + "loss": 1.3188, + "step": 86900 + }, + { + "epoch": 1.38, + "learning_rate": 3.6229819563152903e-06, + "loss": 1.352, + "step": 87000 + }, + { + "epoch": 1.38, + "learning_rate": 3.621399176954733e-06, + "loss": 1.3397, + "step": 87100 + }, + { + "epoch": 1.38, + "learning_rate": 3.619816397594176e-06, + "loss": 1.3484, + "step": 87200 + }, + { + "epoch": 1.38, + "learning_rate": 3.6182336182336186e-06, + "loss": 1.3384, + "step": 87300 + }, + { + "epoch": 1.38, + "learning_rate": 3.6166508388730616e-06, + "loss": 1.372, + "step": 87400 + }, + { + "epoch": 1.38, + "learning_rate": 3.6150680595125043e-06, + "loss": 1.3385, + "step": 87500 + }, + { + "epoch": 1.39, + "learning_rate": 3.6134852801519473e-06, + "loss": 1.3598, + "step": 87600 + }, + { + "epoch": 1.39, + "learning_rate": 3.61190250079139e-06, + "loss": 1.326, + "step": 87700 + }, + { + "epoch": 1.39, + "learning_rate": 3.6103197214308326e-06, + "loss": 1.368, + "step": 87800 + }, + { + "epoch": 1.39, + "learning_rate": 3.6087369420702756e-06, + "loss": 1.3561, + "step": 87900 + }, + { + "epoch": 1.39, + "learning_rate": 3.6071541627097182e-06, + "loss": 1.3387, + "step": 88000 + }, + { + "epoch": 1.39, + "learning_rate": 3.6055713833491613e-06, + "loss": 1.3297, + "step": 88100 + }, + { + "epoch": 1.4, + "learning_rate": 3.603988603988604e-06, + "loss": 1.364, + "step": 88200 + }, + { + "epoch": 1.4, + "learning_rate": 3.602405824628047e-06, + "loss": 1.3651, + "step": 88300 + }, + { + "epoch": 1.4, + "learning_rate": 3.6008230452674904e-06, + "loss": 1.3374, + "step": 88400 + }, + { + "epoch": 1.4, + "learning_rate": 3.599240265906933e-06, + "loss": 1.3361, + "step": 88500 + }, + { + "epoch": 1.4, + "learning_rate": 3.597657486546376e-06, + "loss": 1.3155, + "step": 88600 + }, + { + "epoch": 1.4, + "learning_rate": 3.5960747071858187e-06, + "loss": 1.3294, + "step": 88700 + }, + { + "epoch": 1.41, + "learning_rate": 3.5944919278252618e-06, + "loss": 1.3177, + "step": 88800 + }, + { + "epoch": 1.41, + "learning_rate": 3.5929091484647044e-06, + "loss": 1.3472, + "step": 88900 + }, + { + "epoch": 1.41, + "learning_rate": 3.5913263691041474e-06, + "loss": 1.3551, + "step": 89000 + }, + { + "epoch": 1.41, + "learning_rate": 3.58974358974359e-06, + "loss": 1.3543, + "step": 89100 + }, + { + "epoch": 1.41, + "learning_rate": 3.5881608103830327e-06, + "loss": 1.338, + "step": 89200 + }, + { + "epoch": 1.41, + "learning_rate": 3.5865780310224757e-06, + "loss": 1.3542, + "step": 89300 + }, + { + "epoch": 1.42, + "learning_rate": 3.5849952516619184e-06, + "loss": 1.3447, + "step": 89400 + }, + { + "epoch": 1.42, + "learning_rate": 3.5834124723013614e-06, + "loss": 1.3206, + "step": 89500 + }, + { + "epoch": 1.42, + "learning_rate": 3.581829692940804e-06, + "loss": 1.3711, + "step": 89600 + }, + { + "epoch": 1.42, + "learning_rate": 3.580246913580247e-06, + "loss": 1.3349, + "step": 89700 + }, + { + "epoch": 1.42, + "learning_rate": 3.5786641342196897e-06, + "loss": 1.3522, + "step": 89800 + }, + { + "epoch": 1.42, + "learning_rate": 3.5770813548591327e-06, + "loss": 1.3342, + "step": 89900 + }, + { + "epoch": 1.42, + "learning_rate": 3.5754985754985762e-06, + "loss": 1.3598, + "step": 90000 + }, + { + "epoch": 1.43, + "learning_rate": 3.573915796138019e-06, + "loss": 1.3272, + "step": 90100 + }, + { + "epoch": 1.43, + "learning_rate": 3.572333016777462e-06, + "loss": 1.3515, + "step": 90200 + }, + { + "epoch": 1.43, + "learning_rate": 3.5707502374169045e-06, + "loss": 1.3289, + "step": 90300 + }, + { + "epoch": 1.43, + "learning_rate": 3.569167458056347e-06, + "loss": 1.3632, + "step": 90400 + }, + { + "epoch": 1.43, + "learning_rate": 3.56758467869579e-06, + "loss": 1.3324, + "step": 90500 + }, + { + "epoch": 1.43, + "learning_rate": 3.566001899335233e-06, + "loss": 1.3339, + "step": 90600 + }, + { + "epoch": 1.44, + "learning_rate": 3.564419119974676e-06, + "loss": 1.3432, + "step": 90700 + }, + { + "epoch": 1.44, + "learning_rate": 3.5628363406141185e-06, + "loss": 1.3628, + "step": 90800 + }, + { + "epoch": 1.44, + "learning_rate": 3.5612535612535615e-06, + "loss": 1.3518, + "step": 90900 + }, + { + "epoch": 1.44, + "learning_rate": 3.559670781893004e-06, + "loss": 1.3588, + "step": 91000 + }, + { + "epoch": 1.44, + "learning_rate": 3.558088002532447e-06, + "loss": 1.336, + "step": 91100 + }, + { + "epoch": 1.44, + "learning_rate": 3.55650522317189e-06, + "loss": 1.3541, + "step": 91200 + }, + { + "epoch": 1.45, + "learning_rate": 3.554922443811333e-06, + "loss": 1.3326, + "step": 91300 + }, + { + "epoch": 1.45, + "learning_rate": 3.5533396644507755e-06, + "loss": 1.3279, + "step": 91400 + }, + { + "epoch": 1.45, + "learning_rate": 3.551756885090219e-06, + "loss": 1.3506, + "step": 91500 + }, + { + "epoch": 1.45, + "learning_rate": 3.550174105729662e-06, + "loss": 1.3268, + "step": 91600 + }, + { + "epoch": 1.45, + "learning_rate": 3.5485913263691046e-06, + "loss": 1.3444, + "step": 91700 + }, + { + "epoch": 1.45, + "learning_rate": 3.5470085470085473e-06, + "loss": 1.3283, + "step": 91800 + }, + { + "epoch": 1.45, + "learning_rate": 3.5454257676479903e-06, + "loss": 1.3466, + "step": 91900 + }, + { + "epoch": 1.46, + "learning_rate": 3.543842988287433e-06, + "loss": 1.3298, + "step": 92000 + }, + { + "epoch": 1.46, + "learning_rate": 3.542260208926876e-06, + "loss": 1.334, + "step": 92100 + }, + { + "epoch": 1.46, + "learning_rate": 3.5406774295663186e-06, + "loss": 1.3271, + "step": 92200 + }, + { + "epoch": 1.46, + "learning_rate": 3.5390946502057617e-06, + "loss": 1.3278, + "step": 92300 + }, + { + "epoch": 1.46, + "learning_rate": 3.5375118708452043e-06, + "loss": 1.3547, + "step": 92400 + }, + { + "epoch": 1.46, + "learning_rate": 3.5359290914846473e-06, + "loss": 1.3342, + "step": 92500 + }, + { + "epoch": 1.47, + "learning_rate": 3.53434631212409e-06, + "loss": 1.3258, + "step": 92600 + }, + { + "epoch": 1.47, + "learning_rate": 3.532763532763533e-06, + "loss": 1.3216, + "step": 92700 + }, + { + "epoch": 1.47, + "learning_rate": 3.5311807534029756e-06, + "loss": 1.327, + "step": 92800 + }, + { + "epoch": 1.47, + "learning_rate": 3.5295979740424187e-06, + "loss": 1.3605, + "step": 92900 + }, + { + "epoch": 1.47, + "learning_rate": 3.5280151946818613e-06, + "loss": 1.3544, + "step": 93000 + }, + { + "epoch": 1.47, + "learning_rate": 3.5264324153213048e-06, + "loss": 1.3404, + "step": 93100 + }, + { + "epoch": 1.48, + "learning_rate": 3.5248496359607474e-06, + "loss": 1.3455, + "step": 93200 + }, + { + "epoch": 1.48, + "learning_rate": 3.5232668566001904e-06, + "loss": 1.3543, + "step": 93300 + }, + { + "epoch": 1.48, + "learning_rate": 3.521684077239633e-06, + "loss": 1.3596, + "step": 93400 + }, + { + "epoch": 1.48, + "learning_rate": 3.520101297879076e-06, + "loss": 1.331, + "step": 93500 + }, + { + "epoch": 1.48, + "learning_rate": 3.5185185185185187e-06, + "loss": 1.3409, + "step": 93600 + }, + { + "epoch": 1.48, + "learning_rate": 3.516935739157962e-06, + "loss": 1.3539, + "step": 93700 + }, + { + "epoch": 1.48, + "learning_rate": 3.5153529597974044e-06, + "loss": 1.338, + "step": 93800 + }, + { + "epoch": 1.49, + "learning_rate": 3.5137701804368475e-06, + "loss": 1.3295, + "step": 93900 + }, + { + "epoch": 1.49, + "learning_rate": 3.51218740107629e-06, + "loss": 1.3394, + "step": 94000 + }, + { + "epoch": 1.49, + "learning_rate": 3.510604621715733e-06, + "loss": 1.3233, + "step": 94100 + }, + { + "epoch": 1.49, + "learning_rate": 3.5090218423551758e-06, + "loss": 1.3655, + "step": 94200 + }, + { + "epoch": 1.49, + "learning_rate": 3.507439062994619e-06, + "loss": 1.3424, + "step": 94300 + }, + { + "epoch": 1.49, + "learning_rate": 3.5058562836340614e-06, + "loss": 1.3325, + "step": 94400 + }, + { + "epoch": 1.5, + "learning_rate": 3.5042735042735045e-06, + "loss": 1.3422, + "step": 94500 + }, + { + "epoch": 1.5, + "learning_rate": 3.502690724912947e-06, + "loss": 1.3492, + "step": 94600 + }, + { + "epoch": 1.5, + "learning_rate": 3.5011079455523906e-06, + "loss": 1.3447, + "step": 94700 + }, + { + "epoch": 1.5, + "learning_rate": 3.499525166191833e-06, + "loss": 1.3561, + "step": 94800 + }, + { + "epoch": 1.5, + "learning_rate": 3.4979423868312762e-06, + "loss": 1.2977, + "step": 94900 + }, + { + "epoch": 1.5, + "learning_rate": 3.496359607470719e-06, + "loss": 1.3085, + "step": 95000 + }, + { + "epoch": 1.51, + "learning_rate": 3.494776828110162e-06, + "loss": 1.3448, + "step": 95100 + }, + { + "epoch": 1.51, + "learning_rate": 3.4931940487496045e-06, + "loss": 1.328, + "step": 95200 + }, + { + "epoch": 1.51, + "learning_rate": 3.4916112693890476e-06, + "loss": 1.3417, + "step": 95300 + }, + { + "epoch": 1.51, + "learning_rate": 3.4900284900284902e-06, + "loss": 1.3226, + "step": 95400 + }, + { + "epoch": 1.51, + "learning_rate": 3.4884457106679333e-06, + "loss": 1.3326, + "step": 95500 + }, + { + "epoch": 1.51, + "learning_rate": 3.486862931307376e-06, + "loss": 1.3389, + "step": 95600 + }, + { + "epoch": 1.51, + "learning_rate": 3.485280151946819e-06, + "loss": 1.342, + "step": 95700 + }, + { + "epoch": 1.52, + "learning_rate": 3.4836973725862616e-06, + "loss": 1.3514, + "step": 95800 + }, + { + "epoch": 1.52, + "learning_rate": 3.4821145932257046e-06, + "loss": 1.3468, + "step": 95900 + }, + { + "epoch": 1.52, + "learning_rate": 3.4805318138651472e-06, + "loss": 1.3344, + "step": 96000 + }, + { + "epoch": 1.52, + "learning_rate": 3.4789490345045903e-06, + "loss": 1.3234, + "step": 96100 + }, + { + "epoch": 1.52, + "learning_rate": 3.4773662551440333e-06, + "loss": 1.3577, + "step": 96200 + }, + { + "epoch": 1.52, + "learning_rate": 3.4757834757834764e-06, + "loss": 1.324, + "step": 96300 + }, + { + "epoch": 1.53, + "learning_rate": 3.474200696422919e-06, + "loss": 1.347, + "step": 96400 + }, + { + "epoch": 1.53, + "learning_rate": 3.472617917062362e-06, + "loss": 1.3533, + "step": 96500 + }, + { + "epoch": 1.53, + "learning_rate": 3.4710351377018047e-06, + "loss": 1.3493, + "step": 96600 + }, + { + "epoch": 1.53, + "learning_rate": 3.4694523583412477e-06, + "loss": 1.3317, + "step": 96700 + }, + { + "epoch": 1.53, + "learning_rate": 3.4678695789806903e-06, + "loss": 1.3557, + "step": 96800 + }, + { + "epoch": 1.53, + "learning_rate": 3.4662867996201334e-06, + "loss": 1.3582, + "step": 96900 + }, + { + "epoch": 1.54, + "learning_rate": 3.464704020259576e-06, + "loss": 1.3431, + "step": 97000 + }, + { + "epoch": 1.54, + "learning_rate": 3.463121240899019e-06, + "loss": 1.372, + "step": 97100 + }, + { + "epoch": 1.54, + "learning_rate": 3.4615384615384617e-06, + "loss": 1.3122, + "step": 97200 + }, + { + "epoch": 1.54, + "learning_rate": 3.4599556821779047e-06, + "loss": 1.356, + "step": 97300 + }, + { + "epoch": 1.54, + "learning_rate": 3.4583729028173474e-06, + "loss": 1.3469, + "step": 97400 + }, + { + "epoch": 1.54, + "learning_rate": 3.4567901234567904e-06, + "loss": 1.3448, + "step": 97500 + }, + { + "epoch": 1.54, + "learning_rate": 3.455207344096233e-06, + "loss": 1.352, + "step": 97600 + }, + { + "epoch": 1.55, + "learning_rate": 3.4536245647356757e-06, + "loss": 1.3369, + "step": 97700 + }, + { + "epoch": 1.55, + "learning_rate": 3.452041785375119e-06, + "loss": 1.3427, + "step": 97800 + }, + { + "epoch": 1.55, + "learning_rate": 3.450459006014562e-06, + "loss": 1.3204, + "step": 97900 + }, + { + "epoch": 1.55, + "learning_rate": 3.448876226654005e-06, + "loss": 1.3367, + "step": 98000 + }, + { + "epoch": 1.55, + "learning_rate": 3.447293447293448e-06, + "loss": 1.3189, + "step": 98100 + }, + { + "epoch": 1.55, + "learning_rate": 3.4457106679328905e-06, + "loss": 1.3274, + "step": 98200 + }, + { + "epoch": 1.56, + "learning_rate": 3.4441278885723335e-06, + "loss": 1.3206, + "step": 98300 + }, + { + "epoch": 1.56, + "learning_rate": 3.442545109211776e-06, + "loss": 1.3636, + "step": 98400 + }, + { + "epoch": 1.56, + "learning_rate": 3.440962329851219e-06, + "loss": 1.3236, + "step": 98500 + }, + { + "epoch": 1.56, + "learning_rate": 3.439379550490662e-06, + "loss": 1.3303, + "step": 98600 + }, + { + "epoch": 1.56, + "learning_rate": 3.437796771130105e-06, + "loss": 1.346, + "step": 98700 + }, + { + "epoch": 1.56, + "learning_rate": 3.4362139917695475e-06, + "loss": 1.3576, + "step": 98800 + }, + { + "epoch": 1.57, + "learning_rate": 3.4346312124089905e-06, + "loss": 1.3425, + "step": 98900 + }, + { + "epoch": 1.57, + "learning_rate": 3.433048433048433e-06, + "loss": 1.3458, + "step": 99000 + }, + { + "epoch": 1.57, + "learning_rate": 3.431465653687876e-06, + "loss": 1.319, + "step": 99100 + }, + { + "epoch": 1.57, + "learning_rate": 3.429882874327319e-06, + "loss": 1.3236, + "step": 99200 + }, + { + "epoch": 1.57, + "learning_rate": 3.4283000949667615e-06, + "loss": 1.3306, + "step": 99300 + }, + { + "epoch": 1.57, + "learning_rate": 3.426717315606205e-06, + "loss": 1.3295, + "step": 99400 + }, + { + "epoch": 1.57, + "learning_rate": 3.425134536245648e-06, + "loss": 1.3378, + "step": 99500 + }, + { + "epoch": 1.58, + "learning_rate": 3.4235517568850906e-06, + "loss": 1.3242, + "step": 99600 + }, + { + "epoch": 1.58, + "learning_rate": 3.4219689775245337e-06, + "loss": 1.3366, + "step": 99700 + }, + { + "epoch": 1.58, + "learning_rate": 3.4203861981639763e-06, + "loss": 1.3188, + "step": 99800 + }, + { + "epoch": 1.58, + "learning_rate": 3.4188034188034193e-06, + "loss": 1.3388, + "step": 99900 + }, + { + "epoch": 1.58, + "learning_rate": 3.417220639442862e-06, + "loss": 1.3445, + "step": 100000 + }, + { + "epoch": 1.58, + "learning_rate": 3.415637860082305e-06, + "loss": 1.3382, + "step": 100100 + }, + { + "epoch": 1.59, + "learning_rate": 3.4140550807217476e-06, + "loss": 1.3336, + "step": 100200 + }, + { + "epoch": 1.59, + "learning_rate": 3.4124723013611902e-06, + "loss": 1.3389, + "step": 100300 + }, + { + "epoch": 1.59, + "learning_rate": 3.4108895220006333e-06, + "loss": 1.3076, + "step": 100400 + }, + { + "epoch": 1.59, + "learning_rate": 3.409306742640076e-06, + "loss": 1.3149, + "step": 100500 + }, + { + "epoch": 1.59, + "learning_rate": 3.407723963279519e-06, + "loss": 1.3258, + "step": 100600 + }, + { + "epoch": 1.59, + "learning_rate": 3.4061411839189616e-06, + "loss": 1.3366, + "step": 100700 + }, + { + "epoch": 1.6, + "learning_rate": 3.4045584045584046e-06, + "loss": 1.3558, + "step": 100800 + }, + { + "epoch": 1.6, + "learning_rate": 3.402975625197848e-06, + "loss": 1.3258, + "step": 100900 + }, + { + "epoch": 1.6, + "learning_rate": 3.4013928458372907e-06, + "loss": 1.3214, + "step": 101000 + }, + { + "epoch": 1.6, + "learning_rate": 3.3998100664767338e-06, + "loss": 1.3206, + "step": 101100 + }, + { + "epoch": 1.6, + "learning_rate": 3.3982272871161764e-06, + "loss": 1.344, + "step": 101200 + }, + { + "epoch": 1.6, + "learning_rate": 3.3966445077556195e-06, + "loss": 1.3272, + "step": 101300 + }, + { + "epoch": 1.6, + "learning_rate": 3.395061728395062e-06, + "loss": 1.345, + "step": 101400 + }, + { + "epoch": 1.61, + "learning_rate": 3.393478949034505e-06, + "loss": 1.3512, + "step": 101500 + }, + { + "epoch": 1.61, + "learning_rate": 3.3918961696739478e-06, + "loss": 1.3251, + "step": 101600 + }, + { + "epoch": 1.61, + "learning_rate": 3.3903133903133904e-06, + "loss": 1.3308, + "step": 101700 + }, + { + "epoch": 1.61, + "learning_rate": 3.3887306109528334e-06, + "loss": 1.3093, + "step": 101800 + }, + { + "epoch": 1.61, + "learning_rate": 3.387147831592276e-06, + "loss": 1.3604, + "step": 101900 + }, + { + "epoch": 1.61, + "learning_rate": 3.385565052231719e-06, + "loss": 1.333, + "step": 102000 + }, + { + "epoch": 1.62, + "learning_rate": 3.3839822728711617e-06, + "loss": 1.323, + "step": 102100 + }, + { + "epoch": 1.62, + "learning_rate": 3.3823994935106048e-06, + "loss": 1.3351, + "step": 102200 + }, + { + "epoch": 1.62, + "learning_rate": 3.3808167141500474e-06, + "loss": 1.3332, + "step": 102300 + }, + { + "epoch": 1.62, + "learning_rate": 3.3792339347894904e-06, + "loss": 1.3592, + "step": 102400 + }, + { + "epoch": 1.62, + "learning_rate": 3.377651155428934e-06, + "loss": 1.3237, + "step": 102500 + }, + { + "epoch": 1.62, + "learning_rate": 3.3760683760683765e-06, + "loss": 1.3622, + "step": 102600 + }, + { + "epoch": 1.63, + "learning_rate": 3.3744855967078196e-06, + "loss": 1.3377, + "step": 102700 + }, + { + "epoch": 1.63, + "learning_rate": 3.372902817347262e-06, + "loss": 1.3373, + "step": 102800 + }, + { + "epoch": 1.63, + "learning_rate": 3.371320037986705e-06, + "loss": 1.3233, + "step": 102900 + }, + { + "epoch": 1.63, + "learning_rate": 3.369737258626148e-06, + "loss": 1.3689, + "step": 103000 + }, + { + "epoch": 1.63, + "learning_rate": 3.3681544792655905e-06, + "loss": 1.3075, + "step": 103100 + }, + { + "epoch": 1.63, + "learning_rate": 3.3665716999050336e-06, + "loss": 1.3618, + "step": 103200 + }, + { + "epoch": 1.64, + "learning_rate": 3.364988920544476e-06, + "loss": 1.3438, + "step": 103300 + }, + { + "epoch": 1.64, + "learning_rate": 3.3634061411839192e-06, + "loss": 1.3298, + "step": 103400 + }, + { + "epoch": 1.64, + "learning_rate": 3.361823361823362e-06, + "loss": 1.3454, + "step": 103500 + }, + { + "epoch": 1.64, + "learning_rate": 3.360240582462805e-06, + "loss": 1.3319, + "step": 103600 + }, + { + "epoch": 1.64, + "learning_rate": 3.3586578031022475e-06, + "loss": 1.3207, + "step": 103700 + }, + { + "epoch": 1.64, + "learning_rate": 3.3570750237416906e-06, + "loss": 1.3493, + "step": 103800 + }, + { + "epoch": 1.64, + "learning_rate": 3.355492244381133e-06, + "loss": 1.3293, + "step": 103900 + }, + { + "epoch": 1.65, + "learning_rate": 3.3539094650205767e-06, + "loss": 1.32, + "step": 104000 + }, + { + "epoch": 1.65, + "learning_rate": 3.3523266856600197e-06, + "loss": 1.3179, + "step": 104100 + }, + { + "epoch": 1.65, + "learning_rate": 3.3507439062994623e-06, + "loss": 1.3528, + "step": 104200 + }, + { + "epoch": 1.65, + "learning_rate": 3.349161126938905e-06, + "loss": 1.3387, + "step": 104300 + }, + { + "epoch": 1.65, + "learning_rate": 3.347578347578348e-06, + "loss": 1.3413, + "step": 104400 + }, + { + "epoch": 1.65, + "learning_rate": 3.3459955682177906e-06, + "loss": 1.3168, + "step": 104500 + }, + { + "epoch": 1.66, + "learning_rate": 3.3444127888572337e-06, + "loss": 1.3271, + "step": 104600 + }, + { + "epoch": 1.66, + "learning_rate": 3.3428300094966763e-06, + "loss": 1.3384, + "step": 104700 + }, + { + "epoch": 1.66, + "learning_rate": 3.3412472301361194e-06, + "loss": 1.3482, + "step": 104800 + }, + { + "epoch": 1.66, + "learning_rate": 3.339664450775562e-06, + "loss": 1.3239, + "step": 104900 + }, + { + "epoch": 1.66, + "learning_rate": 3.338081671415005e-06, + "loss": 1.3381, + "step": 105000 + }, + { + "epoch": 1.66, + "learning_rate": 3.3364988920544477e-06, + "loss": 1.3379, + "step": 105100 + }, + { + "epoch": 1.67, + "learning_rate": 3.3349161126938907e-06, + "loss": 1.3056, + "step": 105200 + }, + { + "epoch": 1.67, + "learning_rate": 3.3333333333333333e-06, + "loss": 1.3402, + "step": 105300 + }, + { + "epoch": 1.67, + "learning_rate": 3.3317505539727764e-06, + "loss": 1.3289, + "step": 105400 + }, + { + "epoch": 1.67, + "learning_rate": 3.330167774612219e-06, + "loss": 1.3045, + "step": 105500 + }, + { + "epoch": 1.67, + "learning_rate": 3.3285849952516625e-06, + "loss": 1.3117, + "step": 105600 + }, + { + "epoch": 1.67, + "learning_rate": 3.327002215891105e-06, + "loss": 1.314, + "step": 105700 + }, + { + "epoch": 1.67, + "learning_rate": 3.325419436530548e-06, + "loss": 1.355, + "step": 105800 + }, + { + "epoch": 1.68, + "learning_rate": 3.3238366571699908e-06, + "loss": 1.3459, + "step": 105900 + }, + { + "epoch": 1.68, + "learning_rate": 3.322253877809434e-06, + "loss": 1.3268, + "step": 106000 + }, + { + "epoch": 1.68, + "learning_rate": 3.3206710984488764e-06, + "loss": 1.3285, + "step": 106100 + }, + { + "epoch": 1.68, + "learning_rate": 3.3190883190883195e-06, + "loss": 1.3304, + "step": 106200 + }, + { + "epoch": 1.68, + "learning_rate": 3.317505539727762e-06, + "loss": 1.3438, + "step": 106300 + }, + { + "epoch": 1.68, + "learning_rate": 3.315922760367205e-06, + "loss": 1.3257, + "step": 106400 + }, + { + "epoch": 1.69, + "learning_rate": 3.3143399810066478e-06, + "loss": 1.3216, + "step": 106500 + }, + { + "epoch": 1.69, + "learning_rate": 3.312757201646091e-06, + "loss": 1.3075, + "step": 106600 + }, + { + "epoch": 1.69, + "learning_rate": 3.3111744222855335e-06, + "loss": 1.3277, + "step": 106700 + }, + { + "epoch": 1.69, + "learning_rate": 3.3095916429249765e-06, + "loss": 1.3688, + "step": 106800 + }, + { + "epoch": 1.69, + "learning_rate": 3.308008863564419e-06, + "loss": 1.3486, + "step": 106900 + }, + { + "epoch": 1.69, + "learning_rate": 3.306426084203862e-06, + "loss": 1.3362, + "step": 107000 + }, + { + "epoch": 1.7, + "learning_rate": 3.304843304843305e-06, + "loss": 1.3165, + "step": 107100 + }, + { + "epoch": 1.7, + "learning_rate": 3.3032605254827483e-06, + "loss": 1.324, + "step": 107200 + }, + { + "epoch": 1.7, + "learning_rate": 3.301677746122191e-06, + "loss": 1.3148, + "step": 107300 + }, + { + "epoch": 1.7, + "learning_rate": 3.300094966761634e-06, + "loss": 1.3415, + "step": 107400 + }, + { + "epoch": 1.7, + "learning_rate": 3.2985121874010766e-06, + "loss": 1.326, + "step": 107500 + }, + { + "epoch": 1.7, + "learning_rate": 3.2969294080405196e-06, + "loss": 1.3257, + "step": 107600 + }, + { + "epoch": 1.7, + "learning_rate": 3.2953466286799622e-06, + "loss": 1.3203, + "step": 107700 + }, + { + "epoch": 1.71, + "learning_rate": 3.2937638493194053e-06, + "loss": 1.312, + "step": 107800 + }, + { + "epoch": 1.71, + "learning_rate": 3.292181069958848e-06, + "loss": 1.3526, + "step": 107900 + }, + { + "epoch": 1.71, + "learning_rate": 3.290598290598291e-06, + "loss": 1.3468, + "step": 108000 + }, + { + "epoch": 1.71, + "learning_rate": 3.2890155112377336e-06, + "loss": 1.3067, + "step": 108100 + }, + { + "epoch": 1.71, + "learning_rate": 3.2874327318771766e-06, + "loss": 1.3456, + "step": 108200 + }, + { + "epoch": 1.71, + "learning_rate": 3.2858499525166193e-06, + "loss": 1.3207, + "step": 108300 + }, + { + "epoch": 1.72, + "learning_rate": 3.2842671731560623e-06, + "loss": 1.3252, + "step": 108400 + }, + { + "epoch": 1.72, + "learning_rate": 3.282684393795505e-06, + "loss": 1.3169, + "step": 108500 + }, + { + "epoch": 1.72, + "learning_rate": 3.281101614434948e-06, + "loss": 1.3476, + "step": 108600 + }, + { + "epoch": 1.72, + "learning_rate": 3.279518835074391e-06, + "loss": 1.3189, + "step": 108700 + }, + { + "epoch": 1.72, + "learning_rate": 3.277936055713834e-06, + "loss": 1.3409, + "step": 108800 + }, + { + "epoch": 1.72, + "learning_rate": 3.2763532763532767e-06, + "loss": 1.3318, + "step": 108900 + }, + { + "epoch": 1.73, + "learning_rate": 3.2747704969927197e-06, + "loss": 1.3365, + "step": 109000 + }, + { + "epoch": 1.73, + "learning_rate": 3.2731877176321624e-06, + "loss": 1.3341, + "step": 109100 + }, + { + "epoch": 1.73, + "learning_rate": 3.2716049382716054e-06, + "loss": 1.324, + "step": 109200 + }, + { + "epoch": 1.73, + "learning_rate": 3.270022158911048e-06, + "loss": 1.326, + "step": 109300 + }, + { + "epoch": 1.73, + "learning_rate": 3.268439379550491e-06, + "loss": 1.3212, + "step": 109400 + }, + { + "epoch": 1.73, + "learning_rate": 3.2668566001899337e-06, + "loss": 1.3492, + "step": 109500 + }, + { + "epoch": 1.73, + "learning_rate": 3.2652738208293768e-06, + "loss": 1.3452, + "step": 109600 + }, + { + "epoch": 1.74, + "learning_rate": 3.2636910414688194e-06, + "loss": 1.3287, + "step": 109700 + }, + { + "epoch": 1.74, + "learning_rate": 3.2621082621082624e-06, + "loss": 1.3424, + "step": 109800 + }, + { + "epoch": 1.74, + "learning_rate": 3.260525482747705e-06, + "loss": 1.3312, + "step": 109900 + }, + { + "epoch": 1.74, + "learning_rate": 3.258942703387148e-06, + "loss": 1.3166, + "step": 110000 + }, + { + "epoch": 1.74, + "learning_rate": 3.2573599240265907e-06, + "loss": 1.309, + "step": 110100 + }, + { + "epoch": 1.74, + "learning_rate": 3.2557771446660334e-06, + "loss": 1.3397, + "step": 110200 + }, + { + "epoch": 1.75, + "learning_rate": 3.254194365305477e-06, + "loss": 1.3423, + "step": 110300 + }, + { + "epoch": 1.75, + "learning_rate": 3.25261158594492e-06, + "loss": 1.3385, + "step": 110400 + }, + { + "epoch": 1.75, + "learning_rate": 3.2510288065843625e-06, + "loss": 1.3396, + "step": 110500 + }, + { + "epoch": 1.75, + "learning_rate": 3.2494460272238055e-06, + "loss": 1.3332, + "step": 110600 + }, + { + "epoch": 1.75, + "learning_rate": 3.247863247863248e-06, + "loss": 1.3321, + "step": 110700 + }, + { + "epoch": 1.75, + "learning_rate": 3.2462804685026912e-06, + "loss": 1.3402, + "step": 110800 + }, + { + "epoch": 1.76, + "learning_rate": 3.244697689142134e-06, + "loss": 1.3369, + "step": 110900 + }, + { + "epoch": 1.76, + "learning_rate": 3.243114909781577e-06, + "loss": 1.3383, + "step": 111000 + }, + { + "epoch": 1.76, + "learning_rate": 3.2415321304210195e-06, + "loss": 1.3591, + "step": 111100 + }, + { + "epoch": 1.76, + "learning_rate": 3.2399493510604626e-06, + "loss": 1.3379, + "step": 111200 + }, + { + "epoch": 1.76, + "learning_rate": 3.238366571699905e-06, + "loss": 1.3493, + "step": 111300 + }, + { + "epoch": 1.76, + "learning_rate": 3.2367837923393482e-06, + "loss": 1.3709, + "step": 111400 + }, + { + "epoch": 1.76, + "learning_rate": 3.235201012978791e-06, + "loss": 1.3163, + "step": 111500 + }, + { + "epoch": 1.77, + "learning_rate": 3.2336182336182335e-06, + "loss": 1.34, + "step": 111600 + }, + { + "epoch": 1.77, + "learning_rate": 3.2320354542576765e-06, + "loss": 1.3182, + "step": 111700 + }, + { + "epoch": 1.77, + "learning_rate": 3.230452674897119e-06, + "loss": 1.3316, + "step": 111800 + }, + { + "epoch": 1.77, + "learning_rate": 3.2288698955365626e-06, + "loss": 1.3312, + "step": 111900 + }, + { + "epoch": 1.77, + "learning_rate": 3.2272871161760057e-06, + "loss": 1.3393, + "step": 112000 + }, + { + "epoch": 1.77, + "learning_rate": 3.2257043368154483e-06, + "loss": 1.3649, + "step": 112100 + }, + { + "epoch": 1.78, + "learning_rate": 3.2241215574548913e-06, + "loss": 1.3315, + "step": 112200 + }, + { + "epoch": 1.78, + "learning_rate": 3.222538778094334e-06, + "loss": 1.3465, + "step": 112300 + }, + { + "epoch": 1.78, + "learning_rate": 3.220955998733777e-06, + "loss": 1.3265, + "step": 112400 + }, + { + "epoch": 1.78, + "learning_rate": 3.2193732193732196e-06, + "loss": 1.3435, + "step": 112500 + }, + { + "epoch": 1.78, + "learning_rate": 3.2177904400126627e-06, + "loss": 1.3268, + "step": 112600 + }, + { + "epoch": 1.78, + "learning_rate": 3.2162076606521053e-06, + "loss": 1.345, + "step": 112700 + }, + { + "epoch": 1.79, + "learning_rate": 3.214624881291548e-06, + "loss": 1.3492, + "step": 112800 + }, + { + "epoch": 1.79, + "learning_rate": 3.213042101930991e-06, + "loss": 1.3159, + "step": 112900 + }, + { + "epoch": 1.79, + "learning_rate": 3.2114593225704336e-06, + "loss": 1.3553, + "step": 113000 + }, + { + "epoch": 1.79, + "learning_rate": 3.2098765432098767e-06, + "loss": 1.3485, + "step": 113100 + }, + { + "epoch": 1.79, + "learning_rate": 3.2082937638493193e-06, + "loss": 1.3291, + "step": 113200 + }, + { + "epoch": 1.79, + "learning_rate": 3.2067109844887623e-06, + "loss": 1.3574, + "step": 113300 + }, + { + "epoch": 1.79, + "learning_rate": 3.205128205128206e-06, + "loss": 1.3107, + "step": 113400 + }, + { + "epoch": 1.8, + "learning_rate": 3.2035454257676484e-06, + "loss": 1.339, + "step": 113500 + }, + { + "epoch": 1.8, + "learning_rate": 3.2019626464070915e-06, + "loss": 1.3372, + "step": 113600 + }, + { + "epoch": 1.8, + "learning_rate": 3.200379867046534e-06, + "loss": 1.3562, + "step": 113700 + }, + { + "epoch": 1.8, + "learning_rate": 3.198797087685977e-06, + "loss": 1.3162, + "step": 113800 + }, + { + "epoch": 1.8, + "learning_rate": 3.1972143083254198e-06, + "loss": 1.3386, + "step": 113900 + }, + { + "epoch": 1.8, + "learning_rate": 3.195631528964863e-06, + "loss": 1.3453, + "step": 114000 + }, + { + "epoch": 1.81, + "learning_rate": 3.1940487496043054e-06, + "loss": 1.3481, + "step": 114100 + }, + { + "epoch": 1.81, + "learning_rate": 3.192465970243748e-06, + "loss": 1.3414, + "step": 114200 + }, + { + "epoch": 1.81, + "learning_rate": 3.190883190883191e-06, + "loss": 1.3339, + "step": 114300 + }, + { + "epoch": 1.81, + "learning_rate": 3.1893004115226337e-06, + "loss": 1.3333, + "step": 114400 + }, + { + "epoch": 1.81, + "learning_rate": 3.187717632162077e-06, + "loss": 1.3391, + "step": 114500 + }, + { + "epoch": 1.81, + "learning_rate": 3.1861348528015194e-06, + "loss": 1.3127, + "step": 114600 + }, + { + "epoch": 1.82, + "learning_rate": 3.1845520734409625e-06, + "loss": 1.3352, + "step": 114700 + }, + { + "epoch": 1.82, + "learning_rate": 3.182969294080405e-06, + "loss": 1.3104, + "step": 114800 + }, + { + "epoch": 1.82, + "learning_rate": 3.181386514719848e-06, + "loss": 1.3151, + "step": 114900 + }, + { + "epoch": 1.82, + "learning_rate": 3.1798037353592916e-06, + "loss": 1.3351, + "step": 115000 + }, + { + "epoch": 1.82, + "learning_rate": 3.1782209559987342e-06, + "loss": 1.3245, + "step": 115100 + }, + { + "epoch": 1.82, + "learning_rate": 3.1766381766381773e-06, + "loss": 1.3054, + "step": 115200 + }, + { + "epoch": 1.82, + "learning_rate": 3.17505539727762e-06, + "loss": 1.326, + "step": 115300 + }, + { + "epoch": 1.83, + "learning_rate": 3.1734726179170625e-06, + "loss": 1.3504, + "step": 115400 + }, + { + "epoch": 1.83, + "learning_rate": 3.1718898385565056e-06, + "loss": 1.3345, + "step": 115500 + }, + { + "epoch": 1.83, + "learning_rate": 3.170307059195948e-06, + "loss": 1.3097, + "step": 115600 + }, + { + "epoch": 1.83, + "learning_rate": 3.1687242798353912e-06, + "loss": 1.3416, + "step": 115700 + }, + { + "epoch": 1.83, + "learning_rate": 3.167141500474834e-06, + "loss": 1.3275, + "step": 115800 + }, + { + "epoch": 1.83, + "learning_rate": 3.165558721114277e-06, + "loss": 1.318, + "step": 115900 + }, + { + "epoch": 1.84, + "learning_rate": 3.1639759417537195e-06, + "loss": 1.3182, + "step": 116000 + }, + { + "epoch": 1.84, + "learning_rate": 3.1623931623931626e-06, + "loss": 1.3638, + "step": 116100 + }, + { + "epoch": 1.84, + "learning_rate": 3.1608103830326052e-06, + "loss": 1.3402, + "step": 116200 + }, + { + "epoch": 1.84, + "learning_rate": 3.1592276036720483e-06, + "loss": 1.3261, + "step": 116300 + }, + { + "epoch": 1.84, + "learning_rate": 3.157644824311491e-06, + "loss": 1.3304, + "step": 116400 + }, + { + "epoch": 1.84, + "learning_rate": 3.156062044950934e-06, + "loss": 1.3446, + "step": 116500 + }, + { + "epoch": 1.85, + "learning_rate": 3.1544792655903774e-06, + "loss": 1.3422, + "step": 116600 + }, + { + "epoch": 1.85, + "learning_rate": 3.15289648622982e-06, + "loss": 1.332, + "step": 116700 + }, + { + "epoch": 1.85, + "learning_rate": 3.1513137068692627e-06, + "loss": 1.3041, + "step": 116800 + }, + { + "epoch": 1.85, + "learning_rate": 3.1497309275087057e-06, + "loss": 1.3017, + "step": 116900 + }, + { + "epoch": 1.85, + "learning_rate": 3.1481481481481483e-06, + "loss": 1.3405, + "step": 117000 + }, + { + "epoch": 1.85, + "learning_rate": 3.1465653687875914e-06, + "loss": 1.3474, + "step": 117100 + }, + { + "epoch": 1.86, + "learning_rate": 3.144982589427034e-06, + "loss": 1.333, + "step": 117200 + }, + { + "epoch": 1.86, + "learning_rate": 3.143399810066477e-06, + "loss": 1.325, + "step": 117300 + }, + { + "epoch": 1.86, + "learning_rate": 3.1418170307059197e-06, + "loss": 1.3349, + "step": 117400 + }, + { + "epoch": 1.86, + "learning_rate": 3.1402342513453627e-06, + "loss": 1.3099, + "step": 117500 + }, + { + "epoch": 1.86, + "learning_rate": 3.1386514719848053e-06, + "loss": 1.3403, + "step": 117600 + }, + { + "epoch": 1.86, + "learning_rate": 3.1370686926242484e-06, + "loss": 1.3311, + "step": 117700 + }, + { + "epoch": 1.86, + "learning_rate": 3.135485913263691e-06, + "loss": 1.3707, + "step": 117800 + }, + { + "epoch": 1.87, + "learning_rate": 3.133903133903134e-06, + "loss": 1.3376, + "step": 117900 + }, + { + "epoch": 1.87, + "learning_rate": 3.1323203545425767e-06, + "loss": 1.3179, + "step": 118000 + }, + { + "epoch": 1.87, + "learning_rate": 3.13073757518202e-06, + "loss": 1.3404, + "step": 118100 + }, + { + "epoch": 1.87, + "learning_rate": 3.1291547958214628e-06, + "loss": 1.3039, + "step": 118200 + }, + { + "epoch": 1.87, + "learning_rate": 3.127572016460906e-06, + "loss": 1.3341, + "step": 118300 + }, + { + "epoch": 1.87, + "learning_rate": 3.1259892371003485e-06, + "loss": 1.3285, + "step": 118400 + }, + { + "epoch": 1.88, + "learning_rate": 3.1244064577397915e-06, + "loss": 1.3153, + "step": 118500 + }, + { + "epoch": 1.88, + "learning_rate": 3.122823678379234e-06, + "loss": 1.3395, + "step": 118600 + }, + { + "epoch": 1.88, + "learning_rate": 3.121240899018677e-06, + "loss": 1.326, + "step": 118700 + }, + { + "epoch": 1.88, + "learning_rate": 3.11965811965812e-06, + "loss": 1.3294, + "step": 118800 + }, + { + "epoch": 1.88, + "learning_rate": 3.118075340297563e-06, + "loss": 1.3103, + "step": 118900 + }, + { + "epoch": 1.88, + "learning_rate": 3.1164925609370055e-06, + "loss": 1.3332, + "step": 119000 + }, + { + "epoch": 1.89, + "learning_rate": 3.1149097815764485e-06, + "loss": 1.3203, + "step": 119100 + }, + { + "epoch": 1.89, + "learning_rate": 3.113327002215891e-06, + "loss": 1.3298, + "step": 119200 + }, + { + "epoch": 1.89, + "learning_rate": 3.111744222855334e-06, + "loss": 1.328, + "step": 119300 + }, + { + "epoch": 1.89, + "learning_rate": 3.110161443494777e-06, + "loss": 1.332, + "step": 119400 + }, + { + "epoch": 1.89, + "learning_rate": 3.10857866413422e-06, + "loss": 1.3562, + "step": 119500 + }, + { + "epoch": 1.89, + "learning_rate": 3.1069958847736625e-06, + "loss": 1.3013, + "step": 119600 + }, + { + "epoch": 1.89, + "learning_rate": 3.105413105413106e-06, + "loss": 1.3436, + "step": 119700 + }, + { + "epoch": 1.9, + "learning_rate": 3.1038303260525486e-06, + "loss": 1.3275, + "step": 119800 + }, + { + "epoch": 1.9, + "learning_rate": 3.1022475466919916e-06, + "loss": 1.3135, + "step": 119900 + }, + { + "epoch": 1.9, + "learning_rate": 3.1006647673314343e-06, + "loss": 1.3158, + "step": 120000 + }, + { + "epoch": 1.9, + "learning_rate": 3.0990819879708773e-06, + "loss": 1.3128, + "step": 120100 + }, + { + "epoch": 1.9, + "learning_rate": 3.09749920861032e-06, + "loss": 1.3119, + "step": 120200 + }, + { + "epoch": 1.9, + "learning_rate": 3.095916429249763e-06, + "loss": 1.3106, + "step": 120300 + }, + { + "epoch": 1.91, + "learning_rate": 3.0943336498892056e-06, + "loss": 1.3477, + "step": 120400 + }, + { + "epoch": 1.91, + "learning_rate": 3.0927508705286487e-06, + "loss": 1.3346, + "step": 120500 + }, + { + "epoch": 1.91, + "learning_rate": 3.0911680911680913e-06, + "loss": 1.3455, + "step": 120600 + }, + { + "epoch": 1.91, + "learning_rate": 3.0895853118075343e-06, + "loss": 1.3575, + "step": 120700 + }, + { + "epoch": 1.91, + "learning_rate": 3.088002532446977e-06, + "loss": 1.3338, + "step": 120800 + }, + { + "epoch": 1.91, + "learning_rate": 3.08641975308642e-06, + "loss": 1.331, + "step": 120900 + }, + { + "epoch": 1.92, + "learning_rate": 3.0848369737258626e-06, + "loss": 1.3082, + "step": 121000 + }, + { + "epoch": 1.92, + "learning_rate": 3.0832541943653057e-06, + "loss": 1.3121, + "step": 121100 + }, + { + "epoch": 1.92, + "learning_rate": 3.0816714150047487e-06, + "loss": 1.324, + "step": 121200 + }, + { + "epoch": 1.92, + "learning_rate": 3.0800886356441918e-06, + "loss": 1.327, + "step": 121300 + }, + { + "epoch": 1.92, + "learning_rate": 3.0785058562836344e-06, + "loss": 1.3438, + "step": 121400 + }, + { + "epoch": 1.92, + "learning_rate": 3.0769230769230774e-06, + "loss": 1.3315, + "step": 121500 + }, + { + "epoch": 1.92, + "learning_rate": 3.07534029756252e-06, + "loss": 1.3018, + "step": 121600 + }, + { + "epoch": 1.93, + "learning_rate": 3.073757518201963e-06, + "loss": 1.3402, + "step": 121700 + }, + { + "epoch": 1.93, + "learning_rate": 3.0721747388414057e-06, + "loss": 1.3302, + "step": 121800 + }, + { + "epoch": 1.93, + "learning_rate": 3.0705919594808488e-06, + "loss": 1.3526, + "step": 121900 + }, + { + "epoch": 1.93, + "learning_rate": 3.0690091801202914e-06, + "loss": 1.3117, + "step": 122000 + }, + { + "epoch": 1.93, + "learning_rate": 3.0674264007597345e-06, + "loss": 1.3181, + "step": 122100 + }, + { + "epoch": 1.93, + "learning_rate": 3.065843621399177e-06, + "loss": 1.317, + "step": 122200 + }, + { + "epoch": 1.94, + "learning_rate": 3.06426084203862e-06, + "loss": 1.3328, + "step": 122300 + }, + { + "epoch": 1.94, + "learning_rate": 3.0626780626780627e-06, + "loss": 1.3485, + "step": 122400 + }, + { + "epoch": 1.94, + "learning_rate": 3.061095283317506e-06, + "loss": 1.3157, + "step": 122500 + }, + { + "epoch": 1.94, + "learning_rate": 3.0595125039569484e-06, + "loss": 1.3191, + "step": 122600 + }, + { + "epoch": 1.94, + "learning_rate": 3.0579297245963915e-06, + "loss": 1.3431, + "step": 122700 + }, + { + "epoch": 1.94, + "learning_rate": 3.0563469452358345e-06, + "loss": 1.3507, + "step": 122800 + }, + { + "epoch": 1.95, + "learning_rate": 3.0547641658752776e-06, + "loss": 1.329, + "step": 122900 + }, + { + "epoch": 1.95, + "learning_rate": 3.05318138651472e-06, + "loss": 1.3124, + "step": 123000 + }, + { + "epoch": 1.95, + "learning_rate": 3.0515986071541632e-06, + "loss": 1.3227, + "step": 123100 + }, + { + "epoch": 1.95, + "learning_rate": 3.050015827793606e-06, + "loss": 1.3298, + "step": 123200 + }, + { + "epoch": 1.95, + "learning_rate": 3.048433048433049e-06, + "loss": 1.3061, + "step": 123300 + }, + { + "epoch": 1.95, + "learning_rate": 3.0468502690724915e-06, + "loss": 1.3505, + "step": 123400 + }, + { + "epoch": 1.95, + "learning_rate": 3.0452674897119346e-06, + "loss": 1.3581, + "step": 123500 + }, + { + "epoch": 1.96, + "learning_rate": 3.043684710351377e-06, + "loss": 1.3231, + "step": 123600 + }, + { + "epoch": 1.96, + "learning_rate": 3.0421019309908203e-06, + "loss": 1.3397, + "step": 123700 + }, + { + "epoch": 1.96, + "learning_rate": 3.040519151630263e-06, + "loss": 1.3816, + "step": 123800 + }, + { + "epoch": 1.96, + "learning_rate": 3.038936372269706e-06, + "loss": 1.346, + "step": 123900 + }, + { + "epoch": 1.96, + "learning_rate": 3.0373535929091486e-06, + "loss": 1.3171, + "step": 124000 + }, + { + "epoch": 1.96, + "learning_rate": 3.035770813548591e-06, + "loss": 1.3305, + "step": 124100 + }, + { + "epoch": 1.97, + "learning_rate": 3.0341880341880342e-06, + "loss": 1.3195, + "step": 124200 + }, + { + "epoch": 1.97, + "learning_rate": 3.032605254827477e-06, + "loss": 1.3489, + "step": 124300 + }, + { + "epoch": 1.97, + "learning_rate": 3.0310224754669203e-06, + "loss": 1.3424, + "step": 124400 + }, + { + "epoch": 1.97, + "learning_rate": 3.0294396961063634e-06, + "loss": 1.3456, + "step": 124500 + }, + { + "epoch": 1.97, + "learning_rate": 3.027856916745806e-06, + "loss": 1.3357, + "step": 124600 + }, + { + "epoch": 1.97, + "learning_rate": 3.026274137385249e-06, + "loss": 1.3321, + "step": 124700 + }, + { + "epoch": 1.98, + "learning_rate": 3.0246913580246917e-06, + "loss": 1.3198, + "step": 124800 + }, + { + "epoch": 1.98, + "learning_rate": 3.0231085786641347e-06, + "loss": 1.3266, + "step": 124900 + }, + { + "epoch": 1.98, + "learning_rate": 3.0215257993035773e-06, + "loss": 1.3294, + "step": 125000 + }, + { + "epoch": 1.98, + "learning_rate": 3.0199430199430204e-06, + "loss": 1.3104, + "step": 125100 + }, + { + "epoch": 1.98, + "learning_rate": 3.018360240582463e-06, + "loss": 1.352, + "step": 125200 + }, + { + "epoch": 1.98, + "learning_rate": 3.016777461221906e-06, + "loss": 1.3145, + "step": 125300 + }, + { + "epoch": 1.98, + "learning_rate": 3.0151946818613487e-06, + "loss": 1.3587, + "step": 125400 + }, + { + "epoch": 1.99, + "learning_rate": 3.0136119025007913e-06, + "loss": 1.3504, + "step": 125500 + }, + { + "epoch": 1.99, + "learning_rate": 3.0120291231402344e-06, + "loss": 1.3412, + "step": 125600 + }, + { + "epoch": 1.99, + "learning_rate": 3.010446343779677e-06, + "loss": 1.3132, + "step": 125700 + }, + { + "epoch": 1.99, + "learning_rate": 3.00886356441912e-06, + "loss": 1.3386, + "step": 125800 + }, + { + "epoch": 1.99, + "learning_rate": 3.0072807850585635e-06, + "loss": 1.3341, + "step": 125900 + }, + { + "epoch": 1.99, + "learning_rate": 3.005698005698006e-06, + "loss": 1.3187, + "step": 126000 + }, + { + "epoch": 2.0, + "learning_rate": 3.004115226337449e-06, + "loss": 1.326, + "step": 126100 + }, + { + "epoch": 2.0, + "learning_rate": 3.002532446976892e-06, + "loss": 1.3339, + "step": 126200 + }, + { + "epoch": 2.0, + "learning_rate": 3.000949667616335e-06, + "loss": 1.3504, + "step": 126300 + }, + { + "epoch": 2.0, + "learning_rate": 2.9993668882557775e-06, + "loss": 1.3061, + "step": 126400 + }, + { + "epoch": 2.0, + "learning_rate": 2.9977841088952205e-06, + "loss": 1.2796, + "step": 126500 + }, + { + "epoch": 2.0, + "learning_rate": 2.996201329534663e-06, + "loss": 1.313, + "step": 126600 + }, + { + "epoch": 2.01, + "learning_rate": 2.9946185501741058e-06, + "loss": 1.2852, + "step": 126700 + }, + { + "epoch": 2.01, + "learning_rate": 2.993035770813549e-06, + "loss": 1.2886, + "step": 126800 + }, + { + "epoch": 2.01, + "learning_rate": 2.9914529914529914e-06, + "loss": 1.2743, + "step": 126900 + }, + { + "epoch": 2.01, + "learning_rate": 2.9898702120924345e-06, + "loss": 1.2901, + "step": 127000 + }, + { + "epoch": 2.01, + "learning_rate": 2.988287432731877e-06, + "loss": 1.281, + "step": 127100 + }, + { + "epoch": 2.01, + "learning_rate": 2.98670465337132e-06, + "loss": 1.2892, + "step": 127200 + }, + { + "epoch": 2.01, + "learning_rate": 2.9851218740107628e-06, + "loss": 1.2896, + "step": 127300 + }, + { + "epoch": 2.02, + "learning_rate": 2.983539094650206e-06, + "loss": 1.2983, + "step": 127400 + }, + { + "epoch": 2.02, + "learning_rate": 2.9819563152896493e-06, + "loss": 1.2872, + "step": 127500 + }, + { + "epoch": 2.02, + "learning_rate": 2.980373535929092e-06, + "loss": 1.2886, + "step": 127600 + }, + { + "epoch": 2.02, + "learning_rate": 2.978790756568535e-06, + "loss": 1.282, + "step": 127700 + }, + { + "epoch": 2.02, + "learning_rate": 2.9772079772079776e-06, + "loss": 1.2946, + "step": 127800 + }, + { + "epoch": 2.02, + "learning_rate": 2.9756251978474206e-06, + "loss": 1.2729, + "step": 127900 + }, + { + "epoch": 2.03, + "learning_rate": 2.9740424184868633e-06, + "loss": 1.2968, + "step": 128000 + }, + { + "epoch": 2.03, + "learning_rate": 2.972459639126306e-06, + "loss": 1.2882, + "step": 128100 + }, + { + "epoch": 2.03, + "learning_rate": 2.970876859765749e-06, + "loss": 1.2777, + "step": 128200 + }, + { + "epoch": 2.03, + "learning_rate": 2.9692940804051916e-06, + "loss": 1.3108, + "step": 128300 + }, + { + "epoch": 2.03, + "learning_rate": 2.9677113010446346e-06, + "loss": 1.3069, + "step": 128400 + }, + { + "epoch": 2.03, + "learning_rate": 2.9661285216840772e-06, + "loss": 1.2808, + "step": 128500 + }, + { + "epoch": 2.04, + "learning_rate": 2.9645457423235203e-06, + "loss": 1.2905, + "step": 128600 + }, + { + "epoch": 2.04, + "learning_rate": 2.962962962962963e-06, + "loss": 1.3046, + "step": 128700 + }, + { + "epoch": 2.04, + "learning_rate": 2.961380183602406e-06, + "loss": 1.2843, + "step": 128800 + }, + { + "epoch": 2.04, + "learning_rate": 2.9597974042418486e-06, + "loss": 1.2923, + "step": 128900 + }, + { + "epoch": 2.04, + "learning_rate": 2.9582146248812916e-06, + "loss": 1.2785, + "step": 129000 + }, + { + "epoch": 2.04, + "learning_rate": 2.956631845520735e-06, + "loss": 1.293, + "step": 129100 + }, + { + "epoch": 2.04, + "learning_rate": 2.9550490661601777e-06, + "loss": 1.3079, + "step": 129200 + }, + { + "epoch": 2.05, + "learning_rate": 2.9534662867996203e-06, + "loss": 1.2952, + "step": 129300 + }, + { + "epoch": 2.05, + "learning_rate": 2.9518835074390634e-06, + "loss": 1.3053, + "step": 129400 + }, + { + "epoch": 2.05, + "learning_rate": 2.950300728078506e-06, + "loss": 1.3084, + "step": 129500 + }, + { + "epoch": 2.05, + "learning_rate": 2.948717948717949e-06, + "loss": 1.3001, + "step": 129600 + }, + { + "epoch": 2.05, + "learning_rate": 2.9471351693573917e-06, + "loss": 1.2768, + "step": 129700 + }, + { + "epoch": 2.05, + "learning_rate": 2.9455523899968347e-06, + "loss": 1.2784, + "step": 129800 + }, + { + "epoch": 2.06, + "learning_rate": 2.9439696106362774e-06, + "loss": 1.2952, + "step": 129900 + }, + { + "epoch": 2.06, + "learning_rate": 2.9423868312757204e-06, + "loss": 1.2591, + "step": 130000 + }, + { + "epoch": 2.06, + "learning_rate": 2.940804051915163e-06, + "loss": 1.2895, + "step": 130100 + }, + { + "epoch": 2.06, + "learning_rate": 2.939221272554606e-06, + "loss": 1.2566, + "step": 130200 + }, + { + "epoch": 2.06, + "learning_rate": 2.9376384931940487e-06, + "loss": 1.2555, + "step": 130300 + }, + { + "epoch": 2.06, + "learning_rate": 2.9360557138334918e-06, + "loss": 1.2803, + "step": 130400 + }, + { + "epoch": 2.07, + "learning_rate": 2.9344729344729344e-06, + "loss": 1.3069, + "step": 130500 + }, + { + "epoch": 2.07, + "learning_rate": 2.932890155112378e-06, + "loss": 1.3106, + "step": 130600 + }, + { + "epoch": 2.07, + "learning_rate": 2.9313073757518205e-06, + "loss": 1.2818, + "step": 130700 + }, + { + "epoch": 2.07, + "learning_rate": 2.9297245963912635e-06, + "loss": 1.2884, + "step": 130800 + }, + { + "epoch": 2.07, + "learning_rate": 2.928141817030706e-06, + "loss": 1.2982, + "step": 130900 + }, + { + "epoch": 2.07, + "learning_rate": 2.926559037670149e-06, + "loss": 1.3007, + "step": 131000 + }, + { + "epoch": 2.08, + "learning_rate": 2.924976258309592e-06, + "loss": 1.2833, + "step": 131100 + }, + { + "epoch": 2.08, + "learning_rate": 2.923393478949035e-06, + "loss": 1.3145, + "step": 131200 + }, + { + "epoch": 2.08, + "learning_rate": 2.9218106995884775e-06, + "loss": 1.2746, + "step": 131300 + }, + { + "epoch": 2.08, + "learning_rate": 2.9202279202279205e-06, + "loss": 1.2941, + "step": 131400 + }, + { + "epoch": 2.08, + "learning_rate": 2.918645140867363e-06, + "loss": 1.2643, + "step": 131500 + }, + { + "epoch": 2.08, + "learning_rate": 2.9170623615068062e-06, + "loss": 1.2868, + "step": 131600 + }, + { + "epoch": 2.08, + "learning_rate": 2.915479582146249e-06, + "loss": 1.2955, + "step": 131700 + }, + { + "epoch": 2.09, + "learning_rate": 2.913896802785692e-06, + "loss": 1.2999, + "step": 131800 + }, + { + "epoch": 2.09, + "learning_rate": 2.9123140234251345e-06, + "loss": 1.2886, + "step": 131900 + }, + { + "epoch": 2.09, + "learning_rate": 2.9107312440645776e-06, + "loss": 1.2749, + "step": 132000 + }, + { + "epoch": 2.09, + "learning_rate": 2.90914846470402e-06, + "loss": 1.2916, + "step": 132100 + }, + { + "epoch": 2.09, + "learning_rate": 2.9075656853434637e-06, + "loss": 1.2621, + "step": 132200 + }, + { + "epoch": 2.09, + "learning_rate": 2.9059829059829063e-06, + "loss": 1.2701, + "step": 132300 + }, + { + "epoch": 2.1, + "learning_rate": 2.9044001266223493e-06, + "loss": 1.3084, + "step": 132400 + }, + { + "epoch": 2.1, + "learning_rate": 2.902817347261792e-06, + "loss": 1.3103, + "step": 132500 + }, + { + "epoch": 2.1, + "learning_rate": 2.901234567901235e-06, + "loss": 1.2679, + "step": 132600 + }, + { + "epoch": 2.1, + "learning_rate": 2.8996517885406776e-06, + "loss": 1.2784, + "step": 132700 + }, + { + "epoch": 2.1, + "learning_rate": 2.8980690091801207e-06, + "loss": 1.286, + "step": 132800 + }, + { + "epoch": 2.1, + "learning_rate": 2.8964862298195633e-06, + "loss": 1.3071, + "step": 132900 + }, + { + "epoch": 2.11, + "learning_rate": 2.8949034504590063e-06, + "loss": 1.281, + "step": 133000 + }, + { + "epoch": 2.11, + "learning_rate": 2.893320671098449e-06, + "loss": 1.2711, + "step": 133100 + }, + { + "epoch": 2.11, + "learning_rate": 2.891737891737892e-06, + "loss": 1.3096, + "step": 133200 + }, + { + "epoch": 2.11, + "learning_rate": 2.8901551123773346e-06, + "loss": 1.2531, + "step": 133300 + }, + { + "epoch": 2.11, + "learning_rate": 2.8885723330167777e-06, + "loss": 1.2893, + "step": 133400 + }, + { + "epoch": 2.11, + "learning_rate": 2.8869895536562203e-06, + "loss": 1.3043, + "step": 133500 + }, + { + "epoch": 2.11, + "learning_rate": 2.8854067742956634e-06, + "loss": 1.2917, + "step": 133600 + }, + { + "epoch": 2.12, + "learning_rate": 2.883823994935106e-06, + "loss": 1.2718, + "step": 133700 + }, + { + "epoch": 2.12, + "learning_rate": 2.8822412155745495e-06, + "loss": 1.2837, + "step": 133800 + }, + { + "epoch": 2.12, + "learning_rate": 2.880658436213992e-06, + "loss": 1.2853, + "step": 133900 + }, + { + "epoch": 2.12, + "learning_rate": 2.879075656853435e-06, + "loss": 1.2695, + "step": 134000 + }, + { + "epoch": 2.12, + "learning_rate": 2.8774928774928778e-06, + "loss": 1.2764, + "step": 134100 + }, + { + "epoch": 2.12, + "learning_rate": 2.875910098132321e-06, + "loss": 1.2888, + "step": 134200 + }, + { + "epoch": 2.13, + "learning_rate": 2.8743273187717634e-06, + "loss": 1.2961, + "step": 134300 + }, + { + "epoch": 2.13, + "learning_rate": 2.8727445394112065e-06, + "loss": 1.2867, + "step": 134400 + }, + { + "epoch": 2.13, + "learning_rate": 2.871161760050649e-06, + "loss": 1.2587, + "step": 134500 + }, + { + "epoch": 2.13, + "learning_rate": 2.869578980690092e-06, + "loss": 1.2934, + "step": 134600 + }, + { + "epoch": 2.13, + "learning_rate": 2.8679962013295348e-06, + "loss": 1.2906, + "step": 134700 + }, + { + "epoch": 2.13, + "learning_rate": 2.866413421968978e-06, + "loss": 1.3073, + "step": 134800 + }, + { + "epoch": 2.14, + "learning_rate": 2.8648306426084204e-06, + "loss": 1.2593, + "step": 134900 + }, + { + "epoch": 2.14, + "learning_rate": 2.8632478632478635e-06, + "loss": 1.2991, + "step": 135000 + }, + { + "epoch": 2.14, + "learning_rate": 2.861665083887306e-06, + "loss": 1.3139, + "step": 135100 + }, + { + "epoch": 2.14, + "learning_rate": 2.860082304526749e-06, + "loss": 1.2677, + "step": 135200 + }, + { + "epoch": 2.14, + "learning_rate": 2.858499525166192e-06, + "loss": 1.2836, + "step": 135300 + }, + { + "epoch": 2.14, + "learning_rate": 2.8569167458056353e-06, + "loss": 1.299, + "step": 135400 + }, + { + "epoch": 2.14, + "learning_rate": 2.855333966445078e-06, + "loss": 1.2876, + "step": 135500 + }, + { + "epoch": 2.15, + "learning_rate": 2.853751187084521e-06, + "loss": 1.2551, + "step": 135600 + }, + { + "epoch": 2.15, + "learning_rate": 2.8521684077239636e-06, + "loss": 1.285, + "step": 135700 + }, + { + "epoch": 2.15, + "learning_rate": 2.8505856283634066e-06, + "loss": 1.2765, + "step": 135800 + }, + { + "epoch": 2.15, + "learning_rate": 2.8490028490028492e-06, + "loss": 1.2646, + "step": 135900 + }, + { + "epoch": 2.15, + "learning_rate": 2.8474200696422923e-06, + "loss": 1.2749, + "step": 136000 + }, + { + "epoch": 2.15, + "learning_rate": 2.845837290281735e-06, + "loss": 1.3016, + "step": 136100 + }, + { + "epoch": 2.16, + "learning_rate": 2.844254510921178e-06, + "loss": 1.2995, + "step": 136200 + }, + { + "epoch": 2.16, + "learning_rate": 2.8426717315606206e-06, + "loss": 1.3023, + "step": 136300 + }, + { + "epoch": 2.16, + "learning_rate": 2.8410889522000636e-06, + "loss": 1.2772, + "step": 136400 + }, + { + "epoch": 2.16, + "learning_rate": 2.8395061728395062e-06, + "loss": 1.2784, + "step": 136500 + }, + { + "epoch": 2.16, + "learning_rate": 2.837923393478949e-06, + "loss": 1.3212, + "step": 136600 + }, + { + "epoch": 2.16, + "learning_rate": 2.836340614118392e-06, + "loss": 1.2975, + "step": 136700 + }, + { + "epoch": 2.17, + "learning_rate": 2.8347578347578345e-06, + "loss": 1.2954, + "step": 136800 + }, + { + "epoch": 2.17, + "learning_rate": 2.833175055397278e-06, + "loss": 1.2626, + "step": 136900 + }, + { + "epoch": 2.17, + "learning_rate": 2.831592276036721e-06, + "loss": 1.2924, + "step": 137000 + }, + { + "epoch": 2.17, + "learning_rate": 2.8300094966761637e-06, + "loss": 1.3133, + "step": 137100 + }, + { + "epoch": 2.17, + "learning_rate": 2.8284267173156067e-06, + "loss": 1.2915, + "step": 137200 + }, + { + "epoch": 2.17, + "learning_rate": 2.8268439379550494e-06, + "loss": 1.2693, + "step": 137300 + }, + { + "epoch": 2.17, + "learning_rate": 2.8252611585944924e-06, + "loss": 1.2822, + "step": 137400 + }, + { + "epoch": 2.18, + "learning_rate": 2.823678379233935e-06, + "loss": 1.2854, + "step": 137500 + }, + { + "epoch": 2.18, + "learning_rate": 2.822095599873378e-06, + "loss": 1.2886, + "step": 137600 + }, + { + "epoch": 2.18, + "learning_rate": 2.8205128205128207e-06, + "loss": 1.2906, + "step": 137700 + }, + { + "epoch": 2.18, + "learning_rate": 2.8189300411522637e-06, + "loss": 1.2743, + "step": 137800 + }, + { + "epoch": 2.18, + "learning_rate": 2.8173472617917064e-06, + "loss": 1.2775, + "step": 137900 + }, + { + "epoch": 2.18, + "learning_rate": 2.815764482431149e-06, + "loss": 1.283, + "step": 138000 + }, + { + "epoch": 2.19, + "learning_rate": 2.814181703070592e-06, + "loss": 1.2714, + "step": 138100 + }, + { + "epoch": 2.19, + "learning_rate": 2.8125989237100347e-06, + "loss": 1.2827, + "step": 138200 + }, + { + "epoch": 2.19, + "learning_rate": 2.8110161443494777e-06, + "loss": 1.2831, + "step": 138300 + }, + { + "epoch": 2.19, + "learning_rate": 2.8094333649889203e-06, + "loss": 1.2901, + "step": 138400 + }, + { + "epoch": 2.19, + "learning_rate": 2.807850585628364e-06, + "loss": 1.2779, + "step": 138500 + }, + { + "epoch": 2.19, + "learning_rate": 2.806267806267807e-06, + "loss": 1.3033, + "step": 138600 + }, + { + "epoch": 2.2, + "learning_rate": 2.8046850269072495e-06, + "loss": 1.2836, + "step": 138700 + }, + { + "epoch": 2.2, + "learning_rate": 2.8031022475466925e-06, + "loss": 1.2855, + "step": 138800 + }, + { + "epoch": 2.2, + "learning_rate": 2.801519468186135e-06, + "loss": 1.2746, + "step": 138900 + }, + { + "epoch": 2.2, + "learning_rate": 2.799936688825578e-06, + "loss": 1.2776, + "step": 139000 + }, + { + "epoch": 2.2, + "learning_rate": 2.798353909465021e-06, + "loss": 1.304, + "step": 139100 + }, + { + "epoch": 2.2, + "learning_rate": 2.7967711301044635e-06, + "loss": 1.2999, + "step": 139200 + }, + { + "epoch": 2.2, + "learning_rate": 2.7951883507439065e-06, + "loss": 1.2552, + "step": 139300 + }, + { + "epoch": 2.21, + "learning_rate": 2.793605571383349e-06, + "loss": 1.2923, + "step": 139400 + }, + { + "epoch": 2.21, + "learning_rate": 2.792022792022792e-06, + "loss": 1.2958, + "step": 139500 + }, + { + "epoch": 2.21, + "learning_rate": 2.790440012662235e-06, + "loss": 1.2625, + "step": 139600 + }, + { + "epoch": 2.21, + "learning_rate": 2.788857233301678e-06, + "loss": 1.2961, + "step": 139700 + }, + { + "epoch": 2.21, + "learning_rate": 2.7872744539411205e-06, + "loss": 1.2867, + "step": 139800 + }, + { + "epoch": 2.21, + "learning_rate": 2.7856916745805635e-06, + "loss": 1.2944, + "step": 139900 + }, + { + "epoch": 2.22, + "learning_rate": 2.784108895220007e-06, + "loss": 1.2867, + "step": 140000 + }, + { + "epoch": 2.22, + "learning_rate": 2.7825261158594496e-06, + "loss": 1.2793, + "step": 140100 + }, + { + "epoch": 2.22, + "learning_rate": 2.7809433364988927e-06, + "loss": 1.2909, + "step": 140200 + }, + { + "epoch": 2.22, + "learning_rate": 2.7793605571383353e-06, + "loss": 1.2997, + "step": 140300 + }, + { + "epoch": 2.22, + "learning_rate": 2.7777777777777783e-06, + "loss": 1.2504, + "step": 140400 + }, + { + "epoch": 2.22, + "learning_rate": 2.776194998417221e-06, + "loss": 1.2861, + "step": 140500 + }, + { + "epoch": 2.23, + "learning_rate": 2.7746122190566636e-06, + "loss": 1.2919, + "step": 140600 + }, + { + "epoch": 2.23, + "learning_rate": 2.7730294396961066e-06, + "loss": 1.286, + "step": 140700 + }, + { + "epoch": 2.23, + "learning_rate": 2.7714466603355493e-06, + "loss": 1.2712, + "step": 140800 + }, + { + "epoch": 2.23, + "learning_rate": 2.7698638809749923e-06, + "loss": 1.2846, + "step": 140900 + }, + { + "epoch": 2.23, + "learning_rate": 2.768281101614435e-06, + "loss": 1.2571, + "step": 141000 + }, + { + "epoch": 2.23, + "learning_rate": 2.766698322253878e-06, + "loss": 1.2722, + "step": 141100 + }, + { + "epoch": 2.23, + "learning_rate": 2.7651155428933206e-06, + "loss": 1.2952, + "step": 141200 + }, + { + "epoch": 2.24, + "learning_rate": 2.7635327635327636e-06, + "loss": 1.2922, + "step": 141300 + }, + { + "epoch": 2.24, + "learning_rate": 2.7619499841722063e-06, + "loss": 1.3144, + "step": 141400 + }, + { + "epoch": 2.24, + "learning_rate": 2.7603672048116493e-06, + "loss": 1.2814, + "step": 141500 + }, + { + "epoch": 2.24, + "learning_rate": 2.758784425451093e-06, + "loss": 1.3146, + "step": 141600 + }, + { + "epoch": 2.24, + "learning_rate": 2.7572016460905354e-06, + "loss": 1.2577, + "step": 141700 + }, + { + "epoch": 2.24, + "learning_rate": 2.755618866729978e-06, + "loss": 1.2683, + "step": 141800 + }, + { + "epoch": 2.25, + "learning_rate": 2.754036087369421e-06, + "loss": 1.291, + "step": 141900 + }, + { + "epoch": 2.25, + "learning_rate": 2.7524533080088637e-06, + "loss": 1.2828, + "step": 142000 + }, + { + "epoch": 2.25, + "learning_rate": 2.7508705286483068e-06, + "loss": 1.2706, + "step": 142100 + }, + { + "epoch": 2.25, + "learning_rate": 2.7492877492877494e-06, + "loss": 1.2598, + "step": 142200 + }, + { + "epoch": 2.25, + "learning_rate": 2.7477049699271924e-06, + "loss": 1.3022, + "step": 142300 + }, + { + "epoch": 2.25, + "learning_rate": 2.746122190566635e-06, + "loss": 1.291, + "step": 142400 + }, + { + "epoch": 2.26, + "learning_rate": 2.744539411206078e-06, + "loss": 1.2797, + "step": 142500 + }, + { + "epoch": 2.26, + "learning_rate": 2.7429566318455207e-06, + "loss": 1.2605, + "step": 142600 + }, + { + "epoch": 2.26, + "learning_rate": 2.7413738524849638e-06, + "loss": 1.3126, + "step": 142700 + }, + { + "epoch": 2.26, + "learning_rate": 2.7397910731244064e-06, + "loss": 1.3085, + "step": 142800 + }, + { + "epoch": 2.26, + "learning_rate": 2.7382082937638494e-06, + "loss": 1.2736, + "step": 142900 + }, + { + "epoch": 2.26, + "learning_rate": 2.736625514403292e-06, + "loss": 1.2723, + "step": 143000 + }, + { + "epoch": 2.26, + "learning_rate": 2.7350427350427355e-06, + "loss": 1.284, + "step": 143100 + }, + { + "epoch": 2.27, + "learning_rate": 2.733459955682178e-06, + "loss": 1.301, + "step": 143200 + }, + { + "epoch": 2.27, + "learning_rate": 2.7318771763216212e-06, + "loss": 1.2705, + "step": 143300 + }, + { + "epoch": 2.27, + "learning_rate": 2.730294396961064e-06, + "loss": 1.2826, + "step": 143400 + }, + { + "epoch": 2.27, + "learning_rate": 2.728711617600507e-06, + "loss": 1.2732, + "step": 143500 + }, + { + "epoch": 2.27, + "learning_rate": 2.7271288382399495e-06, + "loss": 1.298, + "step": 143600 + }, + { + "epoch": 2.27, + "learning_rate": 2.7255460588793926e-06, + "loss": 1.285, + "step": 143700 + }, + { + "epoch": 2.28, + "learning_rate": 2.723963279518835e-06, + "loss": 1.2824, + "step": 143800 + }, + { + "epoch": 2.28, + "learning_rate": 2.7223805001582782e-06, + "loss": 1.2705, + "step": 143900 + }, + { + "epoch": 2.28, + "learning_rate": 2.720797720797721e-06, + "loss": 1.2845, + "step": 144000 + }, + { + "epoch": 2.28, + "learning_rate": 2.719214941437164e-06, + "loss": 1.2869, + "step": 144100 + }, + { + "epoch": 2.28, + "learning_rate": 2.7176321620766065e-06, + "loss": 1.2995, + "step": 144200 + }, + { + "epoch": 2.28, + "learning_rate": 2.7160493827160496e-06, + "loss": 1.3236, + "step": 144300 + }, + { + "epoch": 2.29, + "learning_rate": 2.714466603355492e-06, + "loss": 1.2612, + "step": 144400 + }, + { + "epoch": 2.29, + "learning_rate": 2.7128838239949353e-06, + "loss": 1.3095, + "step": 144500 + }, + { + "epoch": 2.29, + "learning_rate": 2.711301044634378e-06, + "loss": 1.2815, + "step": 144600 + }, + { + "epoch": 2.29, + "learning_rate": 2.7097182652738213e-06, + "loss": 1.2872, + "step": 144700 + }, + { + "epoch": 2.29, + "learning_rate": 2.708135485913264e-06, + "loss": 1.2812, + "step": 144800 + }, + { + "epoch": 2.29, + "learning_rate": 2.706552706552707e-06, + "loss": 1.3119, + "step": 144900 + }, + { + "epoch": 2.3, + "learning_rate": 2.7049699271921496e-06, + "loss": 1.2929, + "step": 145000 + }, + { + "epoch": 2.3, + "learning_rate": 2.7033871478315927e-06, + "loss": 1.284, + "step": 145100 + }, + { + "epoch": 2.3, + "learning_rate": 2.7018043684710353e-06, + "loss": 1.2944, + "step": 145200 + }, + { + "epoch": 2.3, + "learning_rate": 2.7002215891104784e-06, + "loss": 1.2904, + "step": 145300 + }, + { + "epoch": 2.3, + "learning_rate": 2.698638809749921e-06, + "loss": 1.2871, + "step": 145400 + }, + { + "epoch": 2.3, + "learning_rate": 2.697056030389364e-06, + "loss": 1.2858, + "step": 145500 + }, + { + "epoch": 2.3, + "learning_rate": 2.6954732510288067e-06, + "loss": 1.2898, + "step": 145600 + }, + { + "epoch": 2.31, + "learning_rate": 2.6938904716682497e-06, + "loss": 1.2727, + "step": 145700 + }, + { + "epoch": 2.31, + "learning_rate": 2.6923076923076923e-06, + "loss": 1.2853, + "step": 145800 + }, + { + "epoch": 2.31, + "learning_rate": 2.6907249129471354e-06, + "loss": 1.2831, + "step": 145900 + }, + { + "epoch": 2.31, + "learning_rate": 2.689142133586578e-06, + "loss": 1.2912, + "step": 146000 + }, + { + "epoch": 2.31, + "learning_rate": 2.687559354226021e-06, + "loss": 1.2902, + "step": 146100 + }, + { + "epoch": 2.31, + "learning_rate": 2.6859765748654637e-06, + "loss": 1.2999, + "step": 146200 + }, + { + "epoch": 2.32, + "learning_rate": 2.684393795504907e-06, + "loss": 1.2977, + "step": 146300 + }, + { + "epoch": 2.32, + "learning_rate": 2.6828110161443498e-06, + "loss": 1.2701, + "step": 146400 + }, + { + "epoch": 2.32, + "learning_rate": 2.681228236783793e-06, + "loss": 1.2808, + "step": 146500 + }, + { + "epoch": 2.32, + "learning_rate": 2.6796454574232354e-06, + "loss": 1.291, + "step": 146600 + }, + { + "epoch": 2.32, + "learning_rate": 2.6780626780626785e-06, + "loss": 1.3003, + "step": 146700 + }, + { + "epoch": 2.32, + "learning_rate": 2.676479898702121e-06, + "loss": 1.2885, + "step": 146800 + }, + { + "epoch": 2.33, + "learning_rate": 2.674897119341564e-06, + "loss": 1.2894, + "step": 146900 + }, + { + "epoch": 2.33, + "learning_rate": 2.673314339981007e-06, + "loss": 1.2799, + "step": 147000 + }, + { + "epoch": 2.33, + "learning_rate": 2.67173156062045e-06, + "loss": 1.2916, + "step": 147100 + }, + { + "epoch": 2.33, + "learning_rate": 2.6701487812598925e-06, + "loss": 1.2769, + "step": 147200 + }, + { + "epoch": 2.33, + "learning_rate": 2.6685660018993355e-06, + "loss": 1.259, + "step": 147300 + }, + { + "epoch": 2.33, + "learning_rate": 2.666983222538778e-06, + "loss": 1.2908, + "step": 147400 + }, + { + "epoch": 2.33, + "learning_rate": 2.665400443178221e-06, + "loss": 1.3013, + "step": 147500 + }, + { + "epoch": 2.34, + "learning_rate": 2.663817663817664e-06, + "loss": 1.2974, + "step": 147600 + }, + { + "epoch": 2.34, + "learning_rate": 2.662234884457107e-06, + "loss": 1.281, + "step": 147700 + }, + { + "epoch": 2.34, + "learning_rate": 2.66065210509655e-06, + "loss": 1.2776, + "step": 147800 + }, + { + "epoch": 2.34, + "learning_rate": 2.659069325735993e-06, + "loss": 1.3079, + "step": 147900 + }, + { + "epoch": 2.34, + "learning_rate": 2.6574865463754356e-06, + "loss": 1.2748, + "step": 148000 + }, + { + "epoch": 2.34, + "learning_rate": 2.6559037670148786e-06, + "loss": 1.2847, + "step": 148100 + }, + { + "epoch": 2.35, + "learning_rate": 2.6543209876543212e-06, + "loss": 1.2881, + "step": 148200 + }, + { + "epoch": 2.35, + "learning_rate": 2.6527382082937643e-06, + "loss": 1.2602, + "step": 148300 + }, + { + "epoch": 2.35, + "learning_rate": 2.651155428933207e-06, + "loss": 1.2774, + "step": 148400 + }, + { + "epoch": 2.35, + "learning_rate": 2.64957264957265e-06, + "loss": 1.2866, + "step": 148500 + }, + { + "epoch": 2.35, + "learning_rate": 2.6479898702120926e-06, + "loss": 1.2806, + "step": 148600 + }, + { + "epoch": 2.35, + "learning_rate": 2.6464070908515356e-06, + "loss": 1.3143, + "step": 148700 + }, + { + "epoch": 2.36, + "learning_rate": 2.6448243114909783e-06, + "loss": 1.2822, + "step": 148800 + }, + { + "epoch": 2.36, + "learning_rate": 2.6432415321304213e-06, + "loss": 1.2634, + "step": 148900 + }, + { + "epoch": 2.36, + "learning_rate": 2.641658752769864e-06, + "loss": 1.2712, + "step": 149000 + }, + { + "epoch": 2.36, + "learning_rate": 2.6400759734093066e-06, + "loss": 1.2949, + "step": 149100 + }, + { + "epoch": 2.36, + "learning_rate": 2.6384931940487496e-06, + "loss": 1.2724, + "step": 149200 + }, + { + "epoch": 2.36, + "learning_rate": 2.6369104146881922e-06, + "loss": 1.2675, + "step": 149300 + }, + { + "epoch": 2.36, + "learning_rate": 2.6353276353276357e-06, + "loss": 1.2976, + "step": 149400 + }, + { + "epoch": 2.37, + "learning_rate": 2.6337448559670788e-06, + "loss": 1.2862, + "step": 149500 + }, + { + "epoch": 2.37, + "learning_rate": 2.6321620766065214e-06, + "loss": 1.2641, + "step": 149600 + }, + { + "epoch": 2.37, + "learning_rate": 2.6305792972459644e-06, + "loss": 1.2874, + "step": 149700 + }, + { + "epoch": 2.37, + "learning_rate": 2.628996517885407e-06, + "loss": 1.2915, + "step": 149800 + }, + { + "epoch": 2.37, + "learning_rate": 2.62741373852485e-06, + "loss": 1.2946, + "step": 149900 + }, + { + "epoch": 2.37, + "learning_rate": 2.6258309591642927e-06, + "loss": 1.2815, + "step": 150000 + }, + { + "epoch": 2.38, + "learning_rate": 2.6242481798037358e-06, + "loss": 1.2842, + "step": 150100 + }, + { + "epoch": 2.38, + "learning_rate": 2.6226654004431784e-06, + "loss": 1.2673, + "step": 150200 + }, + { + "epoch": 2.38, + "learning_rate": 2.6210826210826214e-06, + "loss": 1.2714, + "step": 150300 + }, + { + "epoch": 2.38, + "learning_rate": 2.619499841722064e-06, + "loss": 1.2804, + "step": 150400 + }, + { + "epoch": 2.38, + "learning_rate": 2.6179170623615067e-06, + "loss": 1.2692, + "step": 150500 + }, + { + "epoch": 2.38, + "learning_rate": 2.6163342830009497e-06, + "loss": 1.255, + "step": 150600 + }, + { + "epoch": 2.39, + "learning_rate": 2.6147515036403924e-06, + "loss": 1.2839, + "step": 150700 + }, + { + "epoch": 2.39, + "learning_rate": 2.6131687242798354e-06, + "loss": 1.282, + "step": 150800 + }, + { + "epoch": 2.39, + "learning_rate": 2.611585944919278e-06, + "loss": 1.2551, + "step": 150900 + }, + { + "epoch": 2.39, + "learning_rate": 2.6100031655587215e-06, + "loss": 1.279, + "step": 151000 + }, + { + "epoch": 2.39, + "learning_rate": 2.6084203861981646e-06, + "loss": 1.2863, + "step": 151100 + }, + { + "epoch": 2.39, + "learning_rate": 2.606837606837607e-06, + "loss": 1.2644, + "step": 151200 + }, + { + "epoch": 2.39, + "learning_rate": 2.6052548274770502e-06, + "loss": 1.2841, + "step": 151300 + }, + { + "epoch": 2.4, + "learning_rate": 2.603672048116493e-06, + "loss": 1.2954, + "step": 151400 + }, + { + "epoch": 2.4, + "learning_rate": 2.602089268755936e-06, + "loss": 1.2965, + "step": 151500 + }, + { + "epoch": 2.4, + "learning_rate": 2.6005064893953785e-06, + "loss": 1.2769, + "step": 151600 + }, + { + "epoch": 2.4, + "learning_rate": 2.598923710034821e-06, + "loss": 1.3023, + "step": 151700 + }, + { + "epoch": 2.4, + "learning_rate": 2.597340930674264e-06, + "loss": 1.3077, + "step": 151800 + }, + { + "epoch": 2.4, + "learning_rate": 2.595758151313707e-06, + "loss": 1.2736, + "step": 151900 + }, + { + "epoch": 2.41, + "learning_rate": 2.59417537195315e-06, + "loss": 1.3131, + "step": 152000 + }, + { + "epoch": 2.41, + "learning_rate": 2.5925925925925925e-06, + "loss": 1.3021, + "step": 152100 + }, + { + "epoch": 2.41, + "learning_rate": 2.5910098132320355e-06, + "loss": 1.2775, + "step": 152200 + }, + { + "epoch": 2.41, + "learning_rate": 2.589427033871478e-06, + "loss": 1.2894, + "step": 152300 + }, + { + "epoch": 2.41, + "learning_rate": 2.587844254510921e-06, + "loss": 1.3002, + "step": 152400 + }, + { + "epoch": 2.41, + "learning_rate": 2.5862614751503647e-06, + "loss": 1.2762, + "step": 152500 + }, + { + "epoch": 2.42, + "learning_rate": 2.5846786957898073e-06, + "loss": 1.283, + "step": 152600 + }, + { + "epoch": 2.42, + "learning_rate": 2.5830959164292504e-06, + "loss": 1.2851, + "step": 152700 + }, + { + "epoch": 2.42, + "learning_rate": 2.581513137068693e-06, + "loss": 1.289, + "step": 152800 + }, + { + "epoch": 2.42, + "learning_rate": 2.579930357708136e-06, + "loss": 1.2804, + "step": 152900 + }, + { + "epoch": 2.42, + "learning_rate": 2.5783475783475787e-06, + "loss": 1.2968, + "step": 153000 + }, + { + "epoch": 2.42, + "learning_rate": 2.5767647989870213e-06, + "loss": 1.2773, + "step": 153100 + }, + { + "epoch": 2.42, + "learning_rate": 2.5751820196264643e-06, + "loss": 1.2509, + "step": 153200 + }, + { + "epoch": 2.43, + "learning_rate": 2.573599240265907e-06, + "loss": 1.2857, + "step": 153300 + }, + { + "epoch": 2.43, + "learning_rate": 2.57201646090535e-06, + "loss": 1.2871, + "step": 153400 + }, + { + "epoch": 2.43, + "learning_rate": 2.5704336815447926e-06, + "loss": 1.2932, + "step": 153500 + }, + { + "epoch": 2.43, + "learning_rate": 2.5688509021842357e-06, + "loss": 1.2669, + "step": 153600 + }, + { + "epoch": 2.43, + "learning_rate": 2.5672681228236783e-06, + "loss": 1.2839, + "step": 153700 + }, + { + "epoch": 2.43, + "learning_rate": 2.5656853434631213e-06, + "loss": 1.2592, + "step": 153800 + }, + { + "epoch": 2.44, + "learning_rate": 2.564102564102564e-06, + "loss": 1.268, + "step": 153900 + }, + { + "epoch": 2.44, + "learning_rate": 2.562519784742007e-06, + "loss": 1.2906, + "step": 154000 + }, + { + "epoch": 2.44, + "learning_rate": 2.5609370053814505e-06, + "loss": 1.2903, + "step": 154100 + }, + { + "epoch": 2.44, + "learning_rate": 2.559354226020893e-06, + "loss": 1.2691, + "step": 154200 + }, + { + "epoch": 2.44, + "learning_rate": 2.5577714466603357e-06, + "loss": 1.3002, + "step": 154300 + }, + { + "epoch": 2.44, + "learning_rate": 2.5561886672997788e-06, + "loss": 1.281, + "step": 154400 + }, + { + "epoch": 2.45, + "learning_rate": 2.5546058879392214e-06, + "loss": 1.2702, + "step": 154500 + }, + { + "epoch": 2.45, + "learning_rate": 2.5530231085786645e-06, + "loss": 1.2892, + "step": 154600 + }, + { + "epoch": 2.45, + "learning_rate": 2.551440329218107e-06, + "loss": 1.2741, + "step": 154700 + }, + { + "epoch": 2.45, + "learning_rate": 2.54985754985755e-06, + "loss": 1.2741, + "step": 154800 + }, + { + "epoch": 2.45, + "learning_rate": 2.5482747704969927e-06, + "loss": 1.3219, + "step": 154900 + }, + { + "epoch": 2.45, + "learning_rate": 2.546691991136436e-06, + "loss": 1.2679, + "step": 155000 + }, + { + "epoch": 2.45, + "learning_rate": 2.5451092117758784e-06, + "loss": 1.2688, + "step": 155100 + }, + { + "epoch": 2.46, + "learning_rate": 2.5435264324153215e-06, + "loss": 1.2968, + "step": 155200 + }, + { + "epoch": 2.46, + "learning_rate": 2.541943653054764e-06, + "loss": 1.3159, + "step": 155300 + }, + { + "epoch": 2.46, + "learning_rate": 2.540360873694207e-06, + "loss": 1.2848, + "step": 155400 + }, + { + "epoch": 2.46, + "learning_rate": 2.5387780943336498e-06, + "loss": 1.2605, + "step": 155500 + }, + { + "epoch": 2.46, + "learning_rate": 2.537195314973093e-06, + "loss": 1.276, + "step": 155600 + }, + { + "epoch": 2.46, + "learning_rate": 2.535612535612536e-06, + "loss": 1.2905, + "step": 155700 + }, + { + "epoch": 2.47, + "learning_rate": 2.534029756251979e-06, + "loss": 1.2514, + "step": 155800 + }, + { + "epoch": 2.47, + "learning_rate": 2.5324469768914215e-06, + "loss": 1.2799, + "step": 155900 + }, + { + "epoch": 2.47, + "learning_rate": 2.5308641975308646e-06, + "loss": 1.2998, + "step": 156000 + }, + { + "epoch": 2.47, + "learning_rate": 2.529281418170307e-06, + "loss": 1.291, + "step": 156100 + }, + { + "epoch": 2.47, + "learning_rate": 2.5276986388097503e-06, + "loss": 1.2712, + "step": 156200 + }, + { + "epoch": 2.47, + "learning_rate": 2.526115859449193e-06, + "loss": 1.2876, + "step": 156300 + }, + { + "epoch": 2.48, + "learning_rate": 2.524533080088636e-06, + "loss": 1.2815, + "step": 156400 + }, + { + "epoch": 2.48, + "learning_rate": 2.5229503007280786e-06, + "loss": 1.2951, + "step": 156500 + }, + { + "epoch": 2.48, + "learning_rate": 2.5213675213675216e-06, + "loss": 1.2857, + "step": 156600 + }, + { + "epoch": 2.48, + "learning_rate": 2.5197847420069642e-06, + "loss": 1.2703, + "step": 156700 + }, + { + "epoch": 2.48, + "learning_rate": 2.5182019626464073e-06, + "loss": 1.2849, + "step": 156800 + }, + { + "epoch": 2.48, + "learning_rate": 2.51661918328585e-06, + "loss": 1.3097, + "step": 156900 + }, + { + "epoch": 2.48, + "learning_rate": 2.515036403925293e-06, + "loss": 1.2862, + "step": 157000 + }, + { + "epoch": 2.49, + "learning_rate": 2.5134536245647356e-06, + "loss": 1.2717, + "step": 157100 + }, + { + "epoch": 2.49, + "learning_rate": 2.511870845204179e-06, + "loss": 1.2889, + "step": 157200 + }, + { + "epoch": 2.49, + "learning_rate": 2.5102880658436217e-06, + "loss": 1.3, + "step": 157300 + }, + { + "epoch": 2.49, + "learning_rate": 2.5087052864830647e-06, + "loss": 1.2936, + "step": 157400 + }, + { + "epoch": 2.49, + "learning_rate": 2.5071225071225073e-06, + "loss": 1.2755, + "step": 157500 + }, + { + "epoch": 2.49, + "learning_rate": 2.5055397277619504e-06, + "loss": 1.2885, + "step": 157600 + }, + { + "epoch": 2.5, + "learning_rate": 2.503956948401393e-06, + "loss": 1.2881, + "step": 157700 + }, + { + "epoch": 2.5, + "learning_rate": 2.502374169040836e-06, + "loss": 1.3128, + "step": 157800 + }, + { + "epoch": 2.5, + "learning_rate": 2.5007913896802787e-06, + "loss": 1.2987, + "step": 157900 + }, + { + "epoch": 2.5, + "learning_rate": 2.4992086103197217e-06, + "loss": 1.2818, + "step": 158000 + }, + { + "epoch": 2.5, + "learning_rate": 2.4976258309591644e-06, + "loss": 1.2771, + "step": 158100 + }, + { + "epoch": 2.5, + "learning_rate": 2.4960430515986074e-06, + "loss": 1.2731, + "step": 158200 + }, + { + "epoch": 2.51, + "learning_rate": 2.49446027223805e-06, + "loss": 1.2701, + "step": 158300 + }, + { + "epoch": 2.51, + "learning_rate": 2.492877492877493e-06, + "loss": 1.2838, + "step": 158400 + }, + { + "epoch": 2.51, + "learning_rate": 2.491294713516936e-06, + "loss": 1.2786, + "step": 158500 + }, + { + "epoch": 2.51, + "learning_rate": 2.4897119341563787e-06, + "loss": 1.2603, + "step": 158600 + }, + { + "epoch": 2.51, + "learning_rate": 2.488129154795822e-06, + "loss": 1.292, + "step": 158700 + }, + { + "epoch": 2.51, + "learning_rate": 2.4865463754352644e-06, + "loss": 1.2703, + "step": 158800 + }, + { + "epoch": 2.52, + "learning_rate": 2.4849635960747075e-06, + "loss": 1.2658, + "step": 158900 + }, + { + "epoch": 2.52, + "learning_rate": 2.48338081671415e-06, + "loss": 1.2922, + "step": 159000 + }, + { + "epoch": 2.52, + "learning_rate": 2.481798037353593e-06, + "loss": 1.2781, + "step": 159100 + }, + { + "epoch": 2.52, + "learning_rate": 2.480215257993036e-06, + "loss": 1.2777, + "step": 159200 + }, + { + "epoch": 2.52, + "learning_rate": 2.478632478632479e-06, + "loss": 1.2759, + "step": 159300 + }, + { + "epoch": 2.52, + "learning_rate": 2.477049699271922e-06, + "loss": 1.287, + "step": 159400 + }, + { + "epoch": 2.52, + "learning_rate": 2.4754669199113645e-06, + "loss": 1.2505, + "step": 159500 + }, + { + "epoch": 2.53, + "learning_rate": 2.4738841405508075e-06, + "loss": 1.2884, + "step": 159600 + }, + { + "epoch": 2.53, + "learning_rate": 2.47230136119025e-06, + "loss": 1.2841, + "step": 159700 + }, + { + "epoch": 2.53, + "learning_rate": 2.470718581829693e-06, + "loss": 1.2788, + "step": 159800 + }, + { + "epoch": 2.53, + "learning_rate": 2.469135802469136e-06, + "loss": 1.2529, + "step": 159900 + }, + { + "epoch": 2.53, + "learning_rate": 2.467553023108579e-06, + "loss": 1.2602, + "step": 160000 + }, + { + "epoch": 2.53, + "learning_rate": 2.465970243748022e-06, + "loss": 1.2755, + "step": 160100 + }, + { + "epoch": 2.54, + "learning_rate": 2.4643874643874645e-06, + "loss": 1.2771, + "step": 160200 + }, + { + "epoch": 2.54, + "learning_rate": 2.4628046850269076e-06, + "loss": 1.2608, + "step": 160300 + }, + { + "epoch": 2.54, + "learning_rate": 2.4612219056663502e-06, + "loss": 1.2642, + "step": 160400 + }, + { + "epoch": 2.54, + "learning_rate": 2.4596391263057933e-06, + "loss": 1.2574, + "step": 160500 + }, + { + "epoch": 2.54, + "learning_rate": 2.458056346945236e-06, + "loss": 1.2792, + "step": 160600 + }, + { + "epoch": 2.54, + "learning_rate": 2.456473567584679e-06, + "loss": 1.2582, + "step": 160700 + }, + { + "epoch": 2.55, + "learning_rate": 2.454890788224122e-06, + "loss": 1.2884, + "step": 160800 + }, + { + "epoch": 2.55, + "learning_rate": 2.4533080088635646e-06, + "loss": 1.2669, + "step": 160900 + }, + { + "epoch": 2.55, + "learning_rate": 2.4517252295030077e-06, + "loss": 1.2882, + "step": 161000 + }, + { + "epoch": 2.55, + "learning_rate": 2.4501424501424503e-06, + "loss": 1.2643, + "step": 161100 + }, + { + "epoch": 2.55, + "learning_rate": 2.4485596707818933e-06, + "loss": 1.2855, + "step": 161200 + }, + { + "epoch": 2.55, + "learning_rate": 2.446976891421336e-06, + "loss": 1.2679, + "step": 161300 + }, + { + "epoch": 2.55, + "learning_rate": 2.445394112060779e-06, + "loss": 1.2799, + "step": 161400 + }, + { + "epoch": 2.56, + "learning_rate": 2.443811332700222e-06, + "loss": 1.2646, + "step": 161500 + }, + { + "epoch": 2.56, + "learning_rate": 2.4422285533396647e-06, + "loss": 1.2751, + "step": 161600 + }, + { + "epoch": 2.56, + "learning_rate": 2.4406457739791077e-06, + "loss": 1.2822, + "step": 161700 + }, + { + "epoch": 2.56, + "learning_rate": 2.4390629946185503e-06, + "loss": 1.2619, + "step": 161800 + }, + { + "epoch": 2.56, + "learning_rate": 2.4374802152579934e-06, + "loss": 1.2626, + "step": 161900 + }, + { + "epoch": 2.56, + "learning_rate": 2.435897435897436e-06, + "loss": 1.2936, + "step": 162000 + }, + { + "epoch": 2.57, + "learning_rate": 2.434314656536879e-06, + "loss": 1.248, + "step": 162100 + }, + { + "epoch": 2.57, + "learning_rate": 2.4327318771763217e-06, + "loss": 1.2828, + "step": 162200 + }, + { + "epoch": 2.57, + "learning_rate": 2.4311490978157647e-06, + "loss": 1.2596, + "step": 162300 + }, + { + "epoch": 2.57, + "learning_rate": 2.429566318455208e-06, + "loss": 1.2892, + "step": 162400 + }, + { + "epoch": 2.57, + "learning_rate": 2.4279835390946504e-06, + "loss": 1.2711, + "step": 162500 + }, + { + "epoch": 2.57, + "learning_rate": 2.4264007597340935e-06, + "loss": 1.2868, + "step": 162600 + }, + { + "epoch": 2.58, + "learning_rate": 2.424817980373536e-06, + "loss": 1.2772, + "step": 162700 + }, + { + "epoch": 2.58, + "learning_rate": 2.423235201012979e-06, + "loss": 1.2728, + "step": 162800 + }, + { + "epoch": 2.58, + "learning_rate": 2.4216524216524218e-06, + "loss": 1.2838, + "step": 162900 + }, + { + "epoch": 2.58, + "learning_rate": 2.4200696422918644e-06, + "loss": 1.2813, + "step": 163000 + }, + { + "epoch": 2.58, + "learning_rate": 2.418486862931308e-06, + "loss": 1.2664, + "step": 163100 + }, + { + "epoch": 2.58, + "learning_rate": 2.4169040835707505e-06, + "loss": 1.2921, + "step": 163200 + }, + { + "epoch": 2.58, + "learning_rate": 2.4153213042101935e-06, + "loss": 1.3039, + "step": 163300 + }, + { + "epoch": 2.59, + "learning_rate": 2.413738524849636e-06, + "loss": 1.2669, + "step": 163400 + }, + { + "epoch": 2.59, + "learning_rate": 2.412155745489079e-06, + "loss": 1.2897, + "step": 163500 + }, + { + "epoch": 2.59, + "learning_rate": 2.410572966128522e-06, + "loss": 1.2796, + "step": 163600 + }, + { + "epoch": 2.59, + "learning_rate": 2.4089901867679644e-06, + "loss": 1.2657, + "step": 163700 + }, + { + "epoch": 2.59, + "learning_rate": 2.4074074074074075e-06, + "loss": 1.2913, + "step": 163800 + }, + { + "epoch": 2.59, + "learning_rate": 2.4058246280468505e-06, + "loss": 1.2962, + "step": 163900 + }, + { + "epoch": 2.6, + "learning_rate": 2.4042418486862936e-06, + "loss": 1.2736, + "step": 164000 + }, + { + "epoch": 2.6, + "learning_rate": 2.4026590693257362e-06, + "loss": 1.2807, + "step": 164100 + }, + { + "epoch": 2.6, + "learning_rate": 2.4010762899651793e-06, + "loss": 1.2704, + "step": 164200 + }, + { + "epoch": 2.6, + "learning_rate": 2.399493510604622e-06, + "loss": 1.2899, + "step": 164300 + }, + { + "epoch": 2.6, + "learning_rate": 2.3979107312440645e-06, + "loss": 1.247, + "step": 164400 + }, + { + "epoch": 2.6, + "learning_rate": 2.3963279518835076e-06, + "loss": 1.2702, + "step": 164500 + }, + { + "epoch": 2.61, + "learning_rate": 2.3947451725229506e-06, + "loss": 1.3203, + "step": 164600 + }, + { + "epoch": 2.61, + "learning_rate": 2.3931623931623937e-06, + "loss": 1.3029, + "step": 164700 + }, + { + "epoch": 2.61, + "learning_rate": 2.3915796138018363e-06, + "loss": 1.3014, + "step": 164800 + }, + { + "epoch": 2.61, + "learning_rate": 2.389996834441279e-06, + "loss": 1.2693, + "step": 164900 + }, + { + "epoch": 2.61, + "learning_rate": 2.388414055080722e-06, + "loss": 1.2998, + "step": 165000 + }, + { + "epoch": 2.61, + "learning_rate": 2.3868312757201646e-06, + "loss": 1.2903, + "step": 165100 + }, + { + "epoch": 2.61, + "learning_rate": 2.3852484963596076e-06, + "loss": 1.2914, + "step": 165200 + }, + { + "epoch": 2.62, + "learning_rate": 2.3836657169990502e-06, + "loss": 1.2729, + "step": 165300 + }, + { + "epoch": 2.62, + "learning_rate": 2.3820829376384937e-06, + "loss": 1.2835, + "step": 165400 + }, + { + "epoch": 2.62, + "learning_rate": 2.3805001582779363e-06, + "loss": 1.2828, + "step": 165500 + }, + { + "epoch": 2.62, + "learning_rate": 2.378917378917379e-06, + "loss": 1.2891, + "step": 165600 + }, + { + "epoch": 2.62, + "learning_rate": 2.377334599556822e-06, + "loss": 1.2837, + "step": 165700 + }, + { + "epoch": 2.62, + "learning_rate": 2.3757518201962646e-06, + "loss": 1.3014, + "step": 165800 + }, + { + "epoch": 2.63, + "learning_rate": 2.3741690408357077e-06, + "loss": 1.271, + "step": 165900 + }, + { + "epoch": 2.63, + "learning_rate": 2.3725862614751503e-06, + "loss": 1.275, + "step": 166000 + }, + { + "epoch": 2.63, + "learning_rate": 2.3710034821145934e-06, + "loss": 1.281, + "step": 166100 + }, + { + "epoch": 2.63, + "learning_rate": 2.3694207027540364e-06, + "loss": 1.2778, + "step": 166200 + }, + { + "epoch": 2.63, + "learning_rate": 2.367837923393479e-06, + "loss": 1.2923, + "step": 166300 + }, + { + "epoch": 2.63, + "learning_rate": 2.366255144032922e-06, + "loss": 1.2918, + "step": 166400 + }, + { + "epoch": 2.64, + "learning_rate": 2.3646723646723647e-06, + "loss": 1.2531, + "step": 166500 + }, + { + "epoch": 2.64, + "learning_rate": 2.3630895853118078e-06, + "loss": 1.3017, + "step": 166600 + }, + { + "epoch": 2.64, + "learning_rate": 2.3615068059512504e-06, + "loss": 1.3031, + "step": 166700 + }, + { + "epoch": 2.64, + "learning_rate": 2.3599240265906934e-06, + "loss": 1.2741, + "step": 166800 + }, + { + "epoch": 2.64, + "learning_rate": 2.358341247230136e-06, + "loss": 1.2919, + "step": 166900 + }, + { + "epoch": 2.64, + "learning_rate": 2.356758467869579e-06, + "loss": 1.2754, + "step": 167000 + }, + { + "epoch": 2.64, + "learning_rate": 2.355175688509022e-06, + "loss": 1.3103, + "step": 167100 + }, + { + "epoch": 2.65, + "learning_rate": 2.3535929091484648e-06, + "loss": 1.3057, + "step": 167200 + }, + { + "epoch": 2.65, + "learning_rate": 2.352010129787908e-06, + "loss": 1.3086, + "step": 167300 + }, + { + "epoch": 2.65, + "learning_rate": 2.3504273504273504e-06, + "loss": 1.2568, + "step": 167400 + }, + { + "epoch": 2.65, + "learning_rate": 2.3488445710667935e-06, + "loss": 1.267, + "step": 167500 + }, + { + "epoch": 2.65, + "learning_rate": 2.347261791706236e-06, + "loss": 1.2607, + "step": 167600 + }, + { + "epoch": 2.65, + "learning_rate": 2.345679012345679e-06, + "loss": 1.2655, + "step": 167700 + }, + { + "epoch": 2.66, + "learning_rate": 2.344096232985122e-06, + "loss": 1.2726, + "step": 167800 + }, + { + "epoch": 2.66, + "learning_rate": 2.342513453624565e-06, + "loss": 1.2957, + "step": 167900 + }, + { + "epoch": 2.66, + "learning_rate": 2.340930674264008e-06, + "loss": 1.303, + "step": 168000 + }, + { + "epoch": 2.66, + "learning_rate": 2.3393478949034505e-06, + "loss": 1.2565, + "step": 168100 + }, + { + "epoch": 2.66, + "learning_rate": 2.3377651155428936e-06, + "loss": 1.2627, + "step": 168200 + }, + { + "epoch": 2.66, + "learning_rate": 2.336182336182336e-06, + "loss": 1.2835, + "step": 168300 + }, + { + "epoch": 2.67, + "learning_rate": 2.3345995568217792e-06, + "loss": 1.2903, + "step": 168400 + }, + { + "epoch": 2.67, + "learning_rate": 2.333016777461222e-06, + "loss": 1.2856, + "step": 168500 + }, + { + "epoch": 2.67, + "learning_rate": 2.331433998100665e-06, + "loss": 1.2892, + "step": 168600 + }, + { + "epoch": 2.67, + "learning_rate": 2.329851218740108e-06, + "loss": 1.2951, + "step": 168700 + }, + { + "epoch": 2.67, + "learning_rate": 2.3282684393795506e-06, + "loss": 1.2871, + "step": 168800 + }, + { + "epoch": 2.67, + "learning_rate": 2.3266856600189936e-06, + "loss": 1.2634, + "step": 168900 + }, + { + "epoch": 2.67, + "learning_rate": 2.3251028806584362e-06, + "loss": 1.2854, + "step": 169000 + }, + { + "epoch": 2.68, + "learning_rate": 2.3235201012978793e-06, + "loss": 1.2907, + "step": 169100 + }, + { + "epoch": 2.68, + "learning_rate": 2.321937321937322e-06, + "loss": 1.2936, + "step": 169200 + }, + { + "epoch": 2.68, + "learning_rate": 2.320354542576765e-06, + "loss": 1.286, + "step": 169300 + }, + { + "epoch": 2.68, + "learning_rate": 2.318771763216208e-06, + "loss": 1.2911, + "step": 169400 + }, + { + "epoch": 2.68, + "learning_rate": 2.3171889838556506e-06, + "loss": 1.3014, + "step": 169500 + }, + { + "epoch": 2.68, + "learning_rate": 2.3156062044950937e-06, + "loss": 1.294, + "step": 169600 + }, + { + "epoch": 2.69, + "learning_rate": 2.3140234251345363e-06, + "loss": 1.281, + "step": 169700 + }, + { + "epoch": 2.69, + "learning_rate": 2.3124406457739794e-06, + "loss": 1.3081, + "step": 169800 + }, + { + "epoch": 2.69, + "learning_rate": 2.310857866413422e-06, + "loss": 1.2751, + "step": 169900 + }, + { + "epoch": 2.69, + "learning_rate": 2.309275087052865e-06, + "loss": 1.2948, + "step": 170000 + }, + { + "epoch": 2.69, + "learning_rate": 2.307692307692308e-06, + "loss": 1.2792, + "step": 170100 + }, + { + "epoch": 2.69, + "learning_rate": 2.3061095283317507e-06, + "loss": 1.2642, + "step": 170200 + }, + { + "epoch": 2.7, + "learning_rate": 2.3045267489711937e-06, + "loss": 1.2786, + "step": 170300 + }, + { + "epoch": 2.7, + "learning_rate": 2.3029439696106364e-06, + "loss": 1.3118, + "step": 170400 + }, + { + "epoch": 2.7, + "learning_rate": 2.3013611902500794e-06, + "loss": 1.2878, + "step": 170500 + }, + { + "epoch": 2.7, + "learning_rate": 2.299778410889522e-06, + "loss": 1.3062, + "step": 170600 + }, + { + "epoch": 2.7, + "learning_rate": 2.298195631528965e-06, + "loss": 1.2905, + "step": 170700 + }, + { + "epoch": 2.7, + "learning_rate": 2.2966128521684077e-06, + "loss": 1.2815, + "step": 170800 + }, + { + "epoch": 2.7, + "learning_rate": 2.2950300728078508e-06, + "loss": 1.2741, + "step": 170900 + }, + { + "epoch": 2.71, + "learning_rate": 2.293447293447294e-06, + "loss": 1.2942, + "step": 171000 + }, + { + "epoch": 2.71, + "learning_rate": 2.2918645140867364e-06, + "loss": 1.2476, + "step": 171100 + }, + { + "epoch": 2.71, + "learning_rate": 2.2902817347261795e-06, + "loss": 1.2634, + "step": 171200 + }, + { + "epoch": 2.71, + "learning_rate": 2.288698955365622e-06, + "loss": 1.2898, + "step": 171300 + }, + { + "epoch": 2.71, + "learning_rate": 2.287116176005065e-06, + "loss": 1.2751, + "step": 171400 + }, + { + "epoch": 2.71, + "learning_rate": 2.2855333966445078e-06, + "loss": 1.3013, + "step": 171500 + }, + { + "epoch": 2.72, + "learning_rate": 2.283950617283951e-06, + "loss": 1.2886, + "step": 171600 + }, + { + "epoch": 2.72, + "learning_rate": 2.282367837923394e-06, + "loss": 1.2535, + "step": 171700 + }, + { + "epoch": 2.72, + "learning_rate": 2.2807850585628365e-06, + "loss": 1.2751, + "step": 171800 + }, + { + "epoch": 2.72, + "learning_rate": 2.2792022792022796e-06, + "loss": 1.2938, + "step": 171900 + }, + { + "epoch": 2.72, + "learning_rate": 2.277619499841722e-06, + "loss": 1.2777, + "step": 172000 + }, + { + "epoch": 2.72, + "learning_rate": 2.2760367204811652e-06, + "loss": 1.2935, + "step": 172100 + }, + { + "epoch": 2.73, + "learning_rate": 2.274453941120608e-06, + "loss": 1.2723, + "step": 172200 + }, + { + "epoch": 2.73, + "learning_rate": 2.272871161760051e-06, + "loss": 1.2664, + "step": 172300 + }, + { + "epoch": 2.73, + "learning_rate": 2.2712883823994935e-06, + "loss": 1.2758, + "step": 172400 + }, + { + "epoch": 2.73, + "learning_rate": 2.2697056030389366e-06, + "loss": 1.2746, + "step": 172500 + }, + { + "epoch": 2.73, + "learning_rate": 2.2681228236783796e-06, + "loss": 1.2814, + "step": 172600 + }, + { + "epoch": 2.73, + "learning_rate": 2.2665400443178222e-06, + "loss": 1.3054, + "step": 172700 + }, + { + "epoch": 2.74, + "learning_rate": 2.2649572649572653e-06, + "loss": 1.249, + "step": 172800 + }, + { + "epoch": 2.74, + "learning_rate": 2.263374485596708e-06, + "loss": 1.2662, + "step": 172900 + }, + { + "epoch": 2.74, + "learning_rate": 2.261791706236151e-06, + "loss": 1.2934, + "step": 173000 + }, + { + "epoch": 2.74, + "learning_rate": 2.2602089268755936e-06, + "loss": 1.2933, + "step": 173100 + }, + { + "epoch": 2.74, + "learning_rate": 2.2586261475150366e-06, + "loss": 1.2583, + "step": 173200 + }, + { + "epoch": 2.74, + "learning_rate": 2.2570433681544797e-06, + "loss": 1.2778, + "step": 173300 + }, + { + "epoch": 2.74, + "learning_rate": 2.2554605887939223e-06, + "loss": 1.2888, + "step": 173400 + }, + { + "epoch": 2.75, + "learning_rate": 2.2538778094333654e-06, + "loss": 1.2716, + "step": 173500 + }, + { + "epoch": 2.75, + "learning_rate": 2.252295030072808e-06, + "loss": 1.2647, + "step": 173600 + }, + { + "epoch": 2.75, + "learning_rate": 2.250712250712251e-06, + "loss": 1.2459, + "step": 173700 + }, + { + "epoch": 2.75, + "learning_rate": 2.2491294713516936e-06, + "loss": 1.259, + "step": 173800 + }, + { + "epoch": 2.75, + "learning_rate": 2.2475466919911367e-06, + "loss": 1.2816, + "step": 173900 + }, + { + "epoch": 2.75, + "learning_rate": 2.2459639126305797e-06, + "loss": 1.2973, + "step": 174000 + }, + { + "epoch": 2.76, + "learning_rate": 2.2443811332700224e-06, + "loss": 1.263, + "step": 174100 + }, + { + "epoch": 2.76, + "learning_rate": 2.2427983539094654e-06, + "loss": 1.2899, + "step": 174200 + }, + { + "epoch": 2.76, + "learning_rate": 2.241215574548908e-06, + "loss": 1.2728, + "step": 174300 + }, + { + "epoch": 2.76, + "learning_rate": 2.239632795188351e-06, + "loss": 1.2855, + "step": 174400 + }, + { + "epoch": 2.76, + "learning_rate": 2.2380500158277937e-06, + "loss": 1.2836, + "step": 174500 + }, + { + "epoch": 2.76, + "learning_rate": 2.2364672364672368e-06, + "loss": 1.2571, + "step": 174600 + }, + { + "epoch": 2.77, + "learning_rate": 2.2348844571066794e-06, + "loss": 1.2874, + "step": 174700 + }, + { + "epoch": 2.77, + "learning_rate": 2.2333016777461224e-06, + "loss": 1.2911, + "step": 174800 + }, + { + "epoch": 2.77, + "learning_rate": 2.2317188983855655e-06, + "loss": 1.2679, + "step": 174900 + }, + { + "epoch": 2.77, + "learning_rate": 2.230136119025008e-06, + "loss": 1.2628, + "step": 175000 + }, + { + "epoch": 2.77, + "learning_rate": 2.228553339664451e-06, + "loss": 1.2869, + "step": 175100 + }, + { + "epoch": 2.77, + "learning_rate": 2.2269705603038938e-06, + "loss": 1.2891, + "step": 175200 + }, + { + "epoch": 2.77, + "learning_rate": 2.225387780943337e-06, + "loss": 1.2748, + "step": 175300 + }, + { + "epoch": 2.78, + "learning_rate": 2.2238050015827795e-06, + "loss": 1.2615, + "step": 175400 + }, + { + "epoch": 2.78, + "learning_rate": 2.222222222222222e-06, + "loss": 1.2676, + "step": 175500 + }, + { + "epoch": 2.78, + "learning_rate": 2.2206394428616655e-06, + "loss": 1.2829, + "step": 175600 + }, + { + "epoch": 2.78, + "learning_rate": 2.219056663501108e-06, + "loss": 1.2769, + "step": 175700 + }, + { + "epoch": 2.78, + "learning_rate": 2.2174738841405512e-06, + "loss": 1.2651, + "step": 175800 + }, + { + "epoch": 2.78, + "learning_rate": 2.215891104779994e-06, + "loss": 1.288, + "step": 175900 + }, + { + "epoch": 2.79, + "learning_rate": 2.214308325419437e-06, + "loss": 1.2966, + "step": 176000 + }, + { + "epoch": 2.79, + "learning_rate": 2.2127255460588795e-06, + "loss": 1.2975, + "step": 176100 + }, + { + "epoch": 2.79, + "learning_rate": 2.211142766698322e-06, + "loss": 1.2709, + "step": 176200 + }, + { + "epoch": 2.79, + "learning_rate": 2.209559987337765e-06, + "loss": 1.2717, + "step": 176300 + }, + { + "epoch": 2.79, + "learning_rate": 2.2079772079772082e-06, + "loss": 1.2737, + "step": 176400 + }, + { + "epoch": 2.79, + "learning_rate": 2.2063944286166513e-06, + "loss": 1.2705, + "step": 176500 + }, + { + "epoch": 2.8, + "learning_rate": 2.204811649256094e-06, + "loss": 1.2532, + "step": 176600 + }, + { + "epoch": 2.8, + "learning_rate": 2.203228869895537e-06, + "loss": 1.2836, + "step": 176700 + }, + { + "epoch": 2.8, + "learning_rate": 2.2016460905349796e-06, + "loss": 1.2802, + "step": 176800 + }, + { + "epoch": 2.8, + "learning_rate": 2.200063311174422e-06, + "loss": 1.2907, + "step": 176900 + }, + { + "epoch": 2.8, + "learning_rate": 2.1984805318138653e-06, + "loss": 1.2926, + "step": 177000 + }, + { + "epoch": 2.8, + "learning_rate": 2.196897752453308e-06, + "loss": 1.2737, + "step": 177100 + }, + { + "epoch": 2.8, + "learning_rate": 2.1953149730927513e-06, + "loss": 1.3116, + "step": 177200 + }, + { + "epoch": 2.81, + "learning_rate": 2.193732193732194e-06, + "loss": 1.283, + "step": 177300 + }, + { + "epoch": 2.81, + "learning_rate": 2.1921494143716366e-06, + "loss": 1.2873, + "step": 177400 + }, + { + "epoch": 2.81, + "learning_rate": 2.1905666350110796e-06, + "loss": 1.271, + "step": 177500 + }, + { + "epoch": 2.81, + "learning_rate": 2.1889838556505223e-06, + "loss": 1.293, + "step": 177600 + }, + { + "epoch": 2.81, + "learning_rate": 2.1874010762899653e-06, + "loss": 1.2981, + "step": 177700 + }, + { + "epoch": 2.81, + "learning_rate": 2.185818296929408e-06, + "loss": 1.3009, + "step": 177800 + }, + { + "epoch": 2.82, + "learning_rate": 2.1842355175688514e-06, + "loss": 1.2716, + "step": 177900 + }, + { + "epoch": 2.82, + "learning_rate": 2.182652738208294e-06, + "loss": 1.2789, + "step": 178000 + }, + { + "epoch": 2.82, + "learning_rate": 2.1810699588477367e-06, + "loss": 1.2717, + "step": 178100 + }, + { + "epoch": 2.82, + "learning_rate": 2.1794871794871797e-06, + "loss": 1.2844, + "step": 178200 + }, + { + "epoch": 2.82, + "learning_rate": 2.1779044001266223e-06, + "loss": 1.3079, + "step": 178300 + }, + { + "epoch": 2.82, + "learning_rate": 2.1763216207660654e-06, + "loss": 1.2719, + "step": 178400 + }, + { + "epoch": 2.83, + "learning_rate": 2.174738841405508e-06, + "loss": 1.3024, + "step": 178500 + }, + { + "epoch": 2.83, + "learning_rate": 2.173156062044951e-06, + "loss": 1.2851, + "step": 178600 + }, + { + "epoch": 2.83, + "learning_rate": 2.171573282684394e-06, + "loss": 1.2829, + "step": 178700 + }, + { + "epoch": 2.83, + "learning_rate": 2.1699905033238367e-06, + "loss": 1.2971, + "step": 178800 + }, + { + "epoch": 2.83, + "learning_rate": 2.1684077239632798e-06, + "loss": 1.2774, + "step": 178900 + }, + { + "epoch": 2.83, + "learning_rate": 2.1668249446027224e-06, + "loss": 1.2784, + "step": 179000 + }, + { + "epoch": 2.83, + "learning_rate": 2.1652421652421654e-06, + "loss": 1.2699, + "step": 179100 + }, + { + "epoch": 2.84, + "learning_rate": 2.163659385881608e-06, + "loss": 1.2804, + "step": 179200 + }, + { + "epoch": 2.84, + "learning_rate": 2.162076606521051e-06, + "loss": 1.2879, + "step": 179300 + }, + { + "epoch": 2.84, + "learning_rate": 2.1604938271604937e-06, + "loss": 1.2742, + "step": 179400 + }, + { + "epoch": 2.84, + "learning_rate": 2.158911047799937e-06, + "loss": 1.2722, + "step": 179500 + }, + { + "epoch": 2.84, + "learning_rate": 2.15732826843938e-06, + "loss": 1.2702, + "step": 179600 + }, + { + "epoch": 2.84, + "learning_rate": 2.1557454890788225e-06, + "loss": 1.2969, + "step": 179700 + }, + { + "epoch": 2.85, + "learning_rate": 2.1541627097182655e-06, + "loss": 1.2828, + "step": 179800 + }, + { + "epoch": 2.85, + "learning_rate": 2.152579930357708e-06, + "loss": 1.2896, + "step": 179900 + }, + { + "epoch": 2.85, + "learning_rate": 2.150997150997151e-06, + "loss": 1.2637, + "step": 180000 + }, + { + "epoch": 2.85, + "learning_rate": 2.149414371636594e-06, + "loss": 1.2725, + "step": 180100 + }, + { + "epoch": 2.85, + "learning_rate": 2.147831592276037e-06, + "loss": 1.2688, + "step": 180200 + }, + { + "epoch": 2.85, + "learning_rate": 2.14624881291548e-06, + "loss": 1.3062, + "step": 180300 + }, + { + "epoch": 2.86, + "learning_rate": 2.1446660335549225e-06, + "loss": 1.2713, + "step": 180400 + }, + { + "epoch": 2.86, + "learning_rate": 2.1430832541943656e-06, + "loss": 1.3065, + "step": 180500 + }, + { + "epoch": 2.86, + "learning_rate": 2.141500474833808e-06, + "loss": 1.2709, + "step": 180600 + }, + { + "epoch": 2.86, + "learning_rate": 2.1399176954732512e-06, + "loss": 1.2857, + "step": 180700 + }, + { + "epoch": 2.86, + "learning_rate": 2.138334916112694e-06, + "loss": 1.2632, + "step": 180800 + }, + { + "epoch": 2.86, + "learning_rate": 2.136752136752137e-06, + "loss": 1.2468, + "step": 180900 + }, + { + "epoch": 2.86, + "learning_rate": 2.1351693573915795e-06, + "loss": 1.2784, + "step": 181000 + }, + { + "epoch": 2.87, + "learning_rate": 2.1335865780310226e-06, + "loss": 1.2511, + "step": 181100 + }, + { + "epoch": 2.87, + "learning_rate": 2.1320037986704656e-06, + "loss": 1.2788, + "step": 181200 + }, + { + "epoch": 2.87, + "learning_rate": 2.1304210193099083e-06, + "loss": 1.2766, + "step": 181300 + }, + { + "epoch": 2.87, + "learning_rate": 2.1288382399493513e-06, + "loss": 1.2914, + "step": 181400 + }, + { + "epoch": 2.87, + "learning_rate": 2.127255460588794e-06, + "loss": 1.2572, + "step": 181500 + }, + { + "epoch": 2.87, + "learning_rate": 2.125672681228237e-06, + "loss": 1.2663, + "step": 181600 + }, + { + "epoch": 2.88, + "learning_rate": 2.1240899018676796e-06, + "loss": 1.273, + "step": 181700 + }, + { + "epoch": 2.88, + "learning_rate": 2.1225071225071227e-06, + "loss": 1.3021, + "step": 181800 + }, + { + "epoch": 2.88, + "learning_rate": 2.1209243431465657e-06, + "loss": 1.2695, + "step": 181900 + }, + { + "epoch": 2.88, + "learning_rate": 2.1193415637860083e-06, + "loss": 1.2785, + "step": 182000 + }, + { + "epoch": 2.88, + "learning_rate": 2.1177587844254514e-06, + "loss": 1.308, + "step": 182100 + }, + { + "epoch": 2.88, + "learning_rate": 2.116176005064894e-06, + "loss": 1.275, + "step": 182200 + }, + { + "epoch": 2.89, + "learning_rate": 2.114593225704337e-06, + "loss": 1.2769, + "step": 182300 + }, + { + "epoch": 2.89, + "learning_rate": 2.1130104463437797e-06, + "loss": 1.2948, + "step": 182400 + }, + { + "epoch": 2.89, + "learning_rate": 2.1114276669832227e-06, + "loss": 1.2922, + "step": 182500 + }, + { + "epoch": 2.89, + "learning_rate": 2.1098448876226658e-06, + "loss": 1.2751, + "step": 182600 + }, + { + "epoch": 2.89, + "learning_rate": 2.1082621082621084e-06, + "loss": 1.2982, + "step": 182700 + }, + { + "epoch": 2.89, + "learning_rate": 2.1066793289015514e-06, + "loss": 1.2891, + "step": 182800 + }, + { + "epoch": 2.89, + "learning_rate": 2.105096549540994e-06, + "loss": 1.2642, + "step": 182900 + }, + { + "epoch": 2.9, + "learning_rate": 2.103513770180437e-06, + "loss": 1.282, + "step": 183000 + }, + { + "epoch": 2.9, + "learning_rate": 2.1019309908198797e-06, + "loss": 1.2484, + "step": 183100 + }, + { + "epoch": 2.9, + "learning_rate": 2.1003482114593228e-06, + "loss": 1.2937, + "step": 183200 + }, + { + "epoch": 2.9, + "learning_rate": 2.0987654320987654e-06, + "loss": 1.2722, + "step": 183300 + }, + { + "epoch": 2.9, + "learning_rate": 2.0971826527382085e-06, + "loss": 1.2755, + "step": 183400 + }, + { + "epoch": 2.9, + "learning_rate": 2.0955998733776515e-06, + "loss": 1.2612, + "step": 183500 + }, + { + "epoch": 2.91, + "learning_rate": 2.094017094017094e-06, + "loss": 1.2788, + "step": 183600 + }, + { + "epoch": 2.91, + "learning_rate": 2.092434314656537e-06, + "loss": 1.302, + "step": 183700 + }, + { + "epoch": 2.91, + "learning_rate": 2.09085153529598e-06, + "loss": 1.2644, + "step": 183800 + }, + { + "epoch": 2.91, + "learning_rate": 2.089268755935423e-06, + "loss": 1.3188, + "step": 183900 + }, + { + "epoch": 2.91, + "learning_rate": 2.0876859765748655e-06, + "loss": 1.2803, + "step": 184000 + }, + { + "epoch": 2.91, + "learning_rate": 2.0861031972143085e-06, + "loss": 1.2829, + "step": 184100 + }, + { + "epoch": 2.92, + "learning_rate": 2.0845204178537516e-06, + "loss": 1.2981, + "step": 184200 + }, + { + "epoch": 2.92, + "learning_rate": 2.082937638493194e-06, + "loss": 1.264, + "step": 184300 + }, + { + "epoch": 2.92, + "learning_rate": 2.0813548591326372e-06, + "loss": 1.2735, + "step": 184400 + }, + { + "epoch": 2.92, + "learning_rate": 2.07977207977208e-06, + "loss": 1.2693, + "step": 184500 + }, + { + "epoch": 2.92, + "learning_rate": 2.078189300411523e-06, + "loss": 1.2836, + "step": 184600 + }, + { + "epoch": 2.92, + "learning_rate": 2.0766065210509655e-06, + "loss": 1.2621, + "step": 184700 + }, + { + "epoch": 2.92, + "learning_rate": 2.0750237416904086e-06, + "loss": 1.2567, + "step": 184800 + }, + { + "epoch": 2.93, + "learning_rate": 2.0734409623298512e-06, + "loss": 1.269, + "step": 184900 + }, + { + "epoch": 2.93, + "learning_rate": 2.0718581829692943e-06, + "loss": 1.2755, + "step": 185000 + }, + { + "epoch": 2.93, + "learning_rate": 2.0702754036087373e-06, + "loss": 1.2778, + "step": 185100 + }, + { + "epoch": 2.93, + "learning_rate": 2.06869262424818e-06, + "loss": 1.2769, + "step": 185200 + }, + { + "epoch": 2.93, + "learning_rate": 2.067109844887623e-06, + "loss": 1.2784, + "step": 185300 + }, + { + "epoch": 2.93, + "learning_rate": 2.0655270655270656e-06, + "loss": 1.2756, + "step": 185400 + }, + { + "epoch": 2.94, + "learning_rate": 2.0639442861665087e-06, + "loss": 1.2588, + "step": 185500 + }, + { + "epoch": 2.94, + "learning_rate": 2.0623615068059513e-06, + "loss": 1.2923, + "step": 185600 + }, + { + "epoch": 2.94, + "learning_rate": 2.0607787274453943e-06, + "loss": 1.2884, + "step": 185700 + }, + { + "epoch": 2.94, + "learning_rate": 2.0591959480848374e-06, + "loss": 1.2911, + "step": 185800 + }, + { + "epoch": 2.94, + "learning_rate": 2.05761316872428e-06, + "loss": 1.2752, + "step": 185900 + }, + { + "epoch": 2.94, + "learning_rate": 2.056030389363723e-06, + "loss": 1.2973, + "step": 186000 + }, + { + "epoch": 2.95, + "learning_rate": 2.0544476100031657e-06, + "loss": 1.2618, + "step": 186100 + }, + { + "epoch": 2.95, + "learning_rate": 2.0528648306426087e-06, + "loss": 1.2849, + "step": 186200 + }, + { + "epoch": 2.95, + "learning_rate": 2.0512820512820513e-06, + "loss": 1.2698, + "step": 186300 + }, + { + "epoch": 2.95, + "learning_rate": 2.0496992719214944e-06, + "loss": 1.2903, + "step": 186400 + }, + { + "epoch": 2.95, + "learning_rate": 2.0481164925609374e-06, + "loss": 1.2819, + "step": 186500 + }, + { + "epoch": 2.95, + "learning_rate": 2.04653371320038e-06, + "loss": 1.2618, + "step": 186600 + }, + { + "epoch": 2.96, + "learning_rate": 2.044950933839823e-06, + "loss": 1.2659, + "step": 186700 + }, + { + "epoch": 2.96, + "learning_rate": 2.0433681544792657e-06, + "loss": 1.2797, + "step": 186800 + }, + { + "epoch": 2.96, + "learning_rate": 2.0417853751187088e-06, + "loss": 1.2685, + "step": 186900 + }, + { + "epoch": 2.96, + "learning_rate": 2.0402025957581514e-06, + "loss": 1.2847, + "step": 187000 + }, + { + "epoch": 2.96, + "learning_rate": 2.0386198163975945e-06, + "loss": 1.2835, + "step": 187100 + }, + { + "epoch": 2.96, + "learning_rate": 2.037037037037037e-06, + "loss": 1.2828, + "step": 187200 + }, + { + "epoch": 2.96, + "learning_rate": 2.03545425767648e-06, + "loss": 1.2635, + "step": 187300 + }, + { + "epoch": 2.97, + "learning_rate": 2.033871478315923e-06, + "loss": 1.2737, + "step": 187400 + }, + { + "epoch": 2.97, + "learning_rate": 2.032288698955366e-06, + "loss": 1.2667, + "step": 187500 + }, + { + "epoch": 2.97, + "learning_rate": 2.030705919594809e-06, + "loss": 1.2842, + "step": 187600 + }, + { + "epoch": 2.97, + "learning_rate": 2.0291231402342515e-06, + "loss": 1.2708, + "step": 187700 + }, + { + "epoch": 2.97, + "learning_rate": 2.0275403608736945e-06, + "loss": 1.2875, + "step": 187800 + }, + { + "epoch": 2.97, + "learning_rate": 2.025957581513137e-06, + "loss": 1.2784, + "step": 187900 + }, + { + "epoch": 2.98, + "learning_rate": 2.0243748021525798e-06, + "loss": 1.2915, + "step": 188000 + }, + { + "epoch": 2.98, + "learning_rate": 2.0227920227920232e-06, + "loss": 1.2535, + "step": 188100 + }, + { + "epoch": 2.98, + "learning_rate": 2.021209243431466e-06, + "loss": 1.2692, + "step": 188200 + }, + { + "epoch": 2.98, + "learning_rate": 2.019626464070909e-06, + "loss": 1.2783, + "step": 188300 + }, + { + "epoch": 2.98, + "learning_rate": 2.0180436847103515e-06, + "loss": 1.254, + "step": 188400 + }, + { + "epoch": 2.98, + "learning_rate": 2.0164609053497946e-06, + "loss": 1.2628, + "step": 188500 + }, + { + "epoch": 2.99, + "learning_rate": 2.014878125989237e-06, + "loss": 1.2939, + "step": 188600 + }, + { + "epoch": 2.99, + "learning_rate": 2.01329534662868e-06, + "loss": 1.2817, + "step": 188700 + }, + { + "epoch": 2.99, + "learning_rate": 2.011712567268123e-06, + "loss": 1.276, + "step": 188800 + }, + { + "epoch": 2.99, + "learning_rate": 2.010129787907566e-06, + "loss": 1.2744, + "step": 188900 + }, + { + "epoch": 2.99, + "learning_rate": 2.008547008547009e-06, + "loss": 1.2634, + "step": 189000 + }, + { + "epoch": 2.99, + "learning_rate": 2.0069642291864516e-06, + "loss": 1.2708, + "step": 189100 + }, + { + "epoch": 2.99, + "learning_rate": 2.0053814498258946e-06, + "loss": 1.2825, + "step": 189200 + }, + { + "epoch": 3.0, + "learning_rate": 2.0037986704653373e-06, + "loss": 1.3043, + "step": 189300 + }, + { + "epoch": 3.0, + "learning_rate": 2.00221589110478e-06, + "loss": 1.2951, + "step": 189400 + }, + { + "epoch": 3.0, + "learning_rate": 2.000633111744223e-06, + "loss": 1.2802, + "step": 189500 + }, + { + "epoch": 3.0, + "learning_rate": 1.9990503323836656e-06, + "loss": 1.2612, + "step": 189600 + }, + { + "epoch": 3.0, + "learning_rate": 1.997467553023109e-06, + "loss": 1.2405, + "step": 189700 + }, + { + "epoch": 3.0, + "learning_rate": 1.9958847736625517e-06, + "loss": 1.2401, + "step": 189800 + }, + { + "epoch": 3.01, + "learning_rate": 1.9943019943019947e-06, + "loss": 1.2274, + "step": 189900 + }, + { + "epoch": 3.01, + "learning_rate": 1.9927192149414373e-06, + "loss": 1.2452, + "step": 190000 + }, + { + "epoch": 3.01, + "learning_rate": 1.99113643558088e-06, + "loss": 1.2378, + "step": 190100 + }, + { + "epoch": 3.01, + "learning_rate": 1.989553656220323e-06, + "loss": 1.2384, + "step": 190200 + }, + { + "epoch": 3.01, + "learning_rate": 1.9879708768597656e-06, + "loss": 1.2451, + "step": 190300 + }, + { + "epoch": 3.01, + "learning_rate": 1.9863880974992087e-06, + "loss": 1.2583, + "step": 190400 + }, + { + "epoch": 3.02, + "learning_rate": 1.9848053181386517e-06, + "loss": 1.2231, + "step": 190500 + }, + { + "epoch": 3.02, + "learning_rate": 1.9832225387780944e-06, + "loss": 1.2353, + "step": 190600 + }, + { + "epoch": 3.02, + "learning_rate": 1.9816397594175374e-06, + "loss": 1.2643, + "step": 190700 + }, + { + "epoch": 3.02, + "learning_rate": 1.98005698005698e-06, + "loss": 1.2729, + "step": 190800 + }, + { + "epoch": 3.02, + "learning_rate": 1.978474200696423e-06, + "loss": 1.2783, + "step": 190900 + }, + { + "epoch": 3.02, + "learning_rate": 1.9768914213358657e-06, + "loss": 1.2253, + "step": 191000 + }, + { + "epoch": 3.02, + "learning_rate": 1.9753086419753087e-06, + "loss": 1.2331, + "step": 191100 + }, + { + "epoch": 3.03, + "learning_rate": 1.973725862614752e-06, + "loss": 1.2509, + "step": 191200 + }, + { + "epoch": 3.03, + "learning_rate": 1.9721430832541944e-06, + "loss": 1.2361, + "step": 191300 + }, + { + "epoch": 3.03, + "learning_rate": 1.9705603038936375e-06, + "loss": 1.253, + "step": 191400 + }, + { + "epoch": 3.03, + "learning_rate": 1.96897752453308e-06, + "loss": 1.2475, + "step": 191500 + }, + { + "epoch": 3.03, + "learning_rate": 1.967394745172523e-06, + "loss": 1.2385, + "step": 191600 + }, + { + "epoch": 3.03, + "learning_rate": 1.9658119658119658e-06, + "loss": 1.2559, + "step": 191700 + }, + { + "epoch": 3.04, + "learning_rate": 1.964229186451409e-06, + "loss": 1.2609, + "step": 191800 + }, + { + "epoch": 3.04, + "learning_rate": 1.9626464070908514e-06, + "loss": 1.2372, + "step": 191900 + }, + { + "epoch": 3.04, + "learning_rate": 1.9610636277302945e-06, + "loss": 1.2321, + "step": 192000 + }, + { + "epoch": 3.04, + "learning_rate": 1.9594808483697375e-06, + "loss": 1.2499, + "step": 192100 + }, + { + "epoch": 3.04, + "learning_rate": 1.95789806900918e-06, + "loss": 1.2412, + "step": 192200 + }, + { + "epoch": 3.04, + "learning_rate": 1.956315289648623e-06, + "loss": 1.2322, + "step": 192300 + }, + { + "epoch": 3.05, + "learning_rate": 1.954732510288066e-06, + "loss": 1.2493, + "step": 192400 + }, + { + "epoch": 3.05, + "learning_rate": 1.953149730927509e-06, + "loss": 1.2598, + "step": 192500 + }, + { + "epoch": 3.05, + "learning_rate": 1.9515669515669515e-06, + "loss": 1.2199, + "step": 192600 + }, + { + "epoch": 3.05, + "learning_rate": 1.9499841722063945e-06, + "loss": 1.2525, + "step": 192700 + }, + { + "epoch": 3.05, + "learning_rate": 1.9484013928458376e-06, + "loss": 1.2735, + "step": 192800 + }, + { + "epoch": 3.05, + "learning_rate": 1.9468186134852802e-06, + "loss": 1.2406, + "step": 192900 + }, + { + "epoch": 3.05, + "learning_rate": 1.9452358341247233e-06, + "loss": 1.236, + "step": 193000 + }, + { + "epoch": 3.06, + "learning_rate": 1.943653054764166e-06, + "loss": 1.2599, + "step": 193100 + }, + { + "epoch": 3.06, + "learning_rate": 1.942070275403609e-06, + "loss": 1.2612, + "step": 193200 + }, + { + "epoch": 3.06, + "learning_rate": 1.9404874960430516e-06, + "loss": 1.2271, + "step": 193300 + }, + { + "epoch": 3.06, + "learning_rate": 1.9389047166824946e-06, + "loss": 1.2178, + "step": 193400 + }, + { + "epoch": 3.06, + "learning_rate": 1.9373219373219372e-06, + "loss": 1.2459, + "step": 193500 + }, + { + "epoch": 3.06, + "learning_rate": 1.9357391579613803e-06, + "loss": 1.2591, + "step": 193600 + }, + { + "epoch": 3.07, + "learning_rate": 1.9341563786008233e-06, + "loss": 1.2471, + "step": 193700 + }, + { + "epoch": 3.07, + "learning_rate": 1.932573599240266e-06, + "loss": 1.2676, + "step": 193800 + }, + { + "epoch": 3.07, + "learning_rate": 1.930990819879709e-06, + "loss": 1.249, + "step": 193900 + }, + { + "epoch": 3.07, + "learning_rate": 1.9294080405191516e-06, + "loss": 1.2489, + "step": 194000 + }, + { + "epoch": 3.07, + "learning_rate": 1.9278252611585947e-06, + "loss": 1.2576, + "step": 194100 + }, + { + "epoch": 3.07, + "learning_rate": 1.9262424817980373e-06, + "loss": 1.2459, + "step": 194200 + }, + { + "epoch": 3.08, + "learning_rate": 1.9246597024374803e-06, + "loss": 1.229, + "step": 194300 + }, + { + "epoch": 3.08, + "learning_rate": 1.9230769230769234e-06, + "loss": 1.2454, + "step": 194400 + }, + { + "epoch": 3.08, + "learning_rate": 1.921494143716366e-06, + "loss": 1.2555, + "step": 194500 + }, + { + "epoch": 3.08, + "learning_rate": 1.919911364355809e-06, + "loss": 1.24, + "step": 194600 + }, + { + "epoch": 3.08, + "learning_rate": 1.9183285849952517e-06, + "loss": 1.2455, + "step": 194700 + }, + { + "epoch": 3.08, + "learning_rate": 1.9167458056346947e-06, + "loss": 1.2728, + "step": 194800 + }, + { + "epoch": 3.08, + "learning_rate": 1.9151630262741374e-06, + "loss": 1.2666, + "step": 194900 + }, + { + "epoch": 3.09, + "learning_rate": 1.9135802469135804e-06, + "loss": 1.238, + "step": 195000 + }, + { + "epoch": 3.09, + "learning_rate": 1.9119974675530235e-06, + "loss": 1.2514, + "step": 195100 + }, + { + "epoch": 3.09, + "learning_rate": 1.910414688192466e-06, + "loss": 1.2493, + "step": 195200 + }, + { + "epoch": 3.09, + "learning_rate": 1.908831908831909e-06, + "loss": 1.2498, + "step": 195300 + }, + { + "epoch": 3.09, + "learning_rate": 1.907249129471352e-06, + "loss": 1.2379, + "step": 195400 + }, + { + "epoch": 3.09, + "learning_rate": 1.9056663501107948e-06, + "loss": 1.2071, + "step": 195500 + }, + { + "epoch": 3.1, + "learning_rate": 1.9040835707502374e-06, + "loss": 1.2708, + "step": 195600 + }, + { + "epoch": 3.1, + "learning_rate": 1.9025007913896803e-06, + "loss": 1.2384, + "step": 195700 + }, + { + "epoch": 3.1, + "learning_rate": 1.900918012029123e-06, + "loss": 1.2763, + "step": 195800 + }, + { + "epoch": 3.1, + "learning_rate": 1.8993352326685664e-06, + "loss": 1.2591, + "step": 195900 + }, + { + "epoch": 3.1, + "learning_rate": 1.8977524533080092e-06, + "loss": 1.2338, + "step": 196000 + }, + { + "epoch": 3.1, + "learning_rate": 1.896169673947452e-06, + "loss": 1.255, + "step": 196100 + }, + { + "epoch": 3.11, + "learning_rate": 1.8945868945868947e-06, + "loss": 1.2154, + "step": 196200 + }, + { + "epoch": 3.11, + "learning_rate": 1.8930041152263375e-06, + "loss": 1.2323, + "step": 196300 + }, + { + "epoch": 3.11, + "learning_rate": 1.8914213358657803e-06, + "loss": 1.2435, + "step": 196400 + }, + { + "epoch": 3.11, + "learning_rate": 1.8898385565052232e-06, + "loss": 1.2459, + "step": 196500 + }, + { + "epoch": 3.11, + "learning_rate": 1.888255777144666e-06, + "loss": 1.2382, + "step": 196600 + }, + { + "epoch": 3.11, + "learning_rate": 1.8866729977841093e-06, + "loss": 1.2192, + "step": 196700 + }, + { + "epoch": 3.11, + "learning_rate": 1.885090218423552e-06, + "loss": 1.232, + "step": 196800 + }, + { + "epoch": 3.12, + "learning_rate": 1.8835074390629947e-06, + "loss": 1.2427, + "step": 196900 + }, + { + "epoch": 3.12, + "learning_rate": 1.8819246597024376e-06, + "loss": 1.2299, + "step": 197000 + }, + { + "epoch": 3.12, + "learning_rate": 1.8803418803418804e-06, + "loss": 1.2627, + "step": 197100 + }, + { + "epoch": 3.12, + "learning_rate": 1.8787591009813232e-06, + "loss": 1.2491, + "step": 197200 + }, + { + "epoch": 3.12, + "learning_rate": 1.877176321620766e-06, + "loss": 1.2408, + "step": 197300 + }, + { + "epoch": 3.12, + "learning_rate": 1.875593542260209e-06, + "loss": 1.2532, + "step": 197400 + }, + { + "epoch": 3.13, + "learning_rate": 1.8740107628996522e-06, + "loss": 1.2322, + "step": 197500 + }, + { + "epoch": 3.13, + "learning_rate": 1.8724279835390948e-06, + "loss": 1.2301, + "step": 197600 + }, + { + "epoch": 3.13, + "learning_rate": 1.8708452041785376e-06, + "loss": 1.2351, + "step": 197700 + }, + { + "epoch": 3.13, + "learning_rate": 1.8692624248179805e-06, + "loss": 1.2381, + "step": 197800 + }, + { + "epoch": 3.13, + "learning_rate": 1.8676796454574233e-06, + "loss": 1.2292, + "step": 197900 + }, + { + "epoch": 3.13, + "learning_rate": 1.8660968660968661e-06, + "loss": 1.2353, + "step": 198000 + }, + { + "epoch": 3.14, + "learning_rate": 1.864514086736309e-06, + "loss": 1.2478, + "step": 198100 + }, + { + "epoch": 3.14, + "learning_rate": 1.8629313073757518e-06, + "loss": 1.2674, + "step": 198200 + }, + { + "epoch": 3.14, + "learning_rate": 1.8613485280151949e-06, + "loss": 1.2407, + "step": 198300 + }, + { + "epoch": 3.14, + "learning_rate": 1.8597657486546377e-06, + "loss": 1.252, + "step": 198400 + }, + { + "epoch": 3.14, + "learning_rate": 1.8581829692940805e-06, + "loss": 1.2296, + "step": 198500 + }, + { + "epoch": 3.14, + "learning_rate": 1.8566001899335234e-06, + "loss": 1.2212, + "step": 198600 + }, + { + "epoch": 3.14, + "learning_rate": 1.8550174105729662e-06, + "loss": 1.2593, + "step": 198700 + }, + { + "epoch": 3.15, + "learning_rate": 1.853434631212409e-06, + "loss": 1.2589, + "step": 198800 + }, + { + "epoch": 3.15, + "learning_rate": 1.8518518518518519e-06, + "loss": 1.2658, + "step": 198900 + }, + { + "epoch": 3.15, + "learning_rate": 1.8502690724912947e-06, + "loss": 1.2562, + "step": 199000 + }, + { + "epoch": 3.15, + "learning_rate": 1.8486862931307378e-06, + "loss": 1.2194, + "step": 199100 + }, + { + "epoch": 3.15, + "learning_rate": 1.8471035137701806e-06, + "loss": 1.2305, + "step": 199200 + }, + { + "epoch": 3.15, + "learning_rate": 1.8455207344096234e-06, + "loss": 1.2331, + "step": 199300 + }, + { + "epoch": 3.16, + "learning_rate": 1.8439379550490663e-06, + "loss": 1.2488, + "step": 199400 + }, + { + "epoch": 3.16, + "learning_rate": 1.842355175688509e-06, + "loss": 1.2838, + "step": 199500 + }, + { + "epoch": 3.16, + "learning_rate": 1.840772396327952e-06, + "loss": 1.2588, + "step": 199600 + }, + { + "epoch": 3.16, + "learning_rate": 1.8391896169673948e-06, + "loss": 1.2524, + "step": 199700 + }, + { + "epoch": 3.16, + "learning_rate": 1.8376068376068378e-06, + "loss": 1.2301, + "step": 199800 + }, + { + "epoch": 3.16, + "learning_rate": 1.8360240582462807e-06, + "loss": 1.2341, + "step": 199900 + }, + { + "epoch": 3.17, + "learning_rate": 1.8344412788857235e-06, + "loss": 1.2495, + "step": 200000 + }, + { + "epoch": 3.17, + "learning_rate": 1.8328584995251663e-06, + "loss": 1.249, + "step": 200100 + }, + { + "epoch": 3.17, + "learning_rate": 1.8312757201646092e-06, + "loss": 1.2413, + "step": 200200 + }, + { + "epoch": 3.17, + "learning_rate": 1.829692940804052e-06, + "loss": 1.251, + "step": 200300 + }, + { + "epoch": 3.17, + "learning_rate": 1.8281101614434948e-06, + "loss": 1.2444, + "step": 200400 + }, + { + "epoch": 3.17, + "learning_rate": 1.8265273820829377e-06, + "loss": 1.2483, + "step": 200500 + }, + { + "epoch": 3.18, + "learning_rate": 1.8249446027223807e-06, + "loss": 1.2452, + "step": 200600 + }, + { + "epoch": 3.18, + "learning_rate": 1.8233618233618236e-06, + "loss": 1.2351, + "step": 200700 + }, + { + "epoch": 3.18, + "learning_rate": 1.8217790440012664e-06, + "loss": 1.2388, + "step": 200800 + }, + { + "epoch": 3.18, + "learning_rate": 1.8201962646407092e-06, + "loss": 1.2543, + "step": 200900 + }, + { + "epoch": 3.18, + "learning_rate": 1.818613485280152e-06, + "loss": 1.2439, + "step": 201000 + }, + { + "epoch": 3.18, + "learning_rate": 1.817030705919595e-06, + "loss": 1.2244, + "step": 201100 + }, + { + "epoch": 3.18, + "learning_rate": 1.8154479265590377e-06, + "loss": 1.2469, + "step": 201200 + }, + { + "epoch": 3.19, + "learning_rate": 1.8138651471984806e-06, + "loss": 1.2573, + "step": 201300 + }, + { + "epoch": 3.19, + "learning_rate": 1.8122823678379236e-06, + "loss": 1.2158, + "step": 201400 + }, + { + "epoch": 3.19, + "learning_rate": 1.8106995884773665e-06, + "loss": 1.2588, + "step": 201500 + }, + { + "epoch": 3.19, + "learning_rate": 1.8091168091168093e-06, + "loss": 1.2726, + "step": 201600 + }, + { + "epoch": 3.19, + "learning_rate": 1.8075340297562521e-06, + "loss": 1.2704, + "step": 201700 + }, + { + "epoch": 3.19, + "learning_rate": 1.805951250395695e-06, + "loss": 1.2376, + "step": 201800 + }, + { + "epoch": 3.2, + "learning_rate": 1.8043684710351378e-06, + "loss": 1.2476, + "step": 201900 + }, + { + "epoch": 3.2, + "learning_rate": 1.8027856916745806e-06, + "loss": 1.25, + "step": 202000 + }, + { + "epoch": 3.2, + "learning_rate": 1.8012029123140235e-06, + "loss": 1.2597, + "step": 202100 + }, + { + "epoch": 3.2, + "learning_rate": 1.7996201329534665e-06, + "loss": 1.2478, + "step": 202200 + }, + { + "epoch": 3.2, + "learning_rate": 1.7980373535929094e-06, + "loss": 1.2715, + "step": 202300 + }, + { + "epoch": 3.2, + "learning_rate": 1.7964545742323522e-06, + "loss": 1.2387, + "step": 202400 + }, + { + "epoch": 3.21, + "learning_rate": 1.794871794871795e-06, + "loss": 1.2514, + "step": 202500 + }, + { + "epoch": 3.21, + "learning_rate": 1.7932890155112379e-06, + "loss": 1.2611, + "step": 202600 + }, + { + "epoch": 3.21, + "learning_rate": 1.7917062361506807e-06, + "loss": 1.2358, + "step": 202700 + }, + { + "epoch": 3.21, + "learning_rate": 1.7901234567901235e-06, + "loss": 1.2601, + "step": 202800 + }, + { + "epoch": 3.21, + "learning_rate": 1.7885406774295664e-06, + "loss": 1.2259, + "step": 202900 + }, + { + "epoch": 3.21, + "learning_rate": 1.7869578980690094e-06, + "loss": 1.2487, + "step": 203000 + }, + { + "epoch": 3.21, + "learning_rate": 1.7853751187084523e-06, + "loss": 1.2563, + "step": 203100 + }, + { + "epoch": 3.22, + "learning_rate": 1.783792339347895e-06, + "loss": 1.267, + "step": 203200 + }, + { + "epoch": 3.22, + "learning_rate": 1.782209559987338e-06, + "loss": 1.2506, + "step": 203300 + }, + { + "epoch": 3.22, + "learning_rate": 1.7806267806267808e-06, + "loss": 1.2634, + "step": 203400 + }, + { + "epoch": 3.22, + "learning_rate": 1.7790440012662236e-06, + "loss": 1.2611, + "step": 203500 + }, + { + "epoch": 3.22, + "learning_rate": 1.7774612219056664e-06, + "loss": 1.2441, + "step": 203600 + }, + { + "epoch": 3.22, + "learning_rate": 1.7758784425451095e-06, + "loss": 1.2578, + "step": 203700 + }, + { + "epoch": 3.23, + "learning_rate": 1.7742956631845523e-06, + "loss": 1.2381, + "step": 203800 + }, + { + "epoch": 3.23, + "learning_rate": 1.7727128838239952e-06, + "loss": 1.2298, + "step": 203900 + }, + { + "epoch": 3.23, + "learning_rate": 1.771130104463438e-06, + "loss": 1.2178, + "step": 204000 + }, + { + "epoch": 3.23, + "learning_rate": 1.7695473251028808e-06, + "loss": 1.2671, + "step": 204100 + }, + { + "epoch": 3.23, + "learning_rate": 1.7679645457423237e-06, + "loss": 1.2419, + "step": 204200 + }, + { + "epoch": 3.23, + "learning_rate": 1.7663817663817665e-06, + "loss": 1.2411, + "step": 204300 + }, + { + "epoch": 3.24, + "learning_rate": 1.7647989870212093e-06, + "loss": 1.2656, + "step": 204400 + }, + { + "epoch": 3.24, + "learning_rate": 1.7632162076606524e-06, + "loss": 1.256, + "step": 204500 + }, + { + "epoch": 3.24, + "learning_rate": 1.7616334283000952e-06, + "loss": 1.249, + "step": 204600 + }, + { + "epoch": 3.24, + "learning_rate": 1.760050648939538e-06, + "loss": 1.2494, + "step": 204700 + }, + { + "epoch": 3.24, + "learning_rate": 1.758467869578981e-06, + "loss": 1.2468, + "step": 204800 + }, + { + "epoch": 3.24, + "learning_rate": 1.7568850902184237e-06, + "loss": 1.2431, + "step": 204900 + }, + { + "epoch": 3.24, + "learning_rate": 1.7553023108578666e-06, + "loss": 1.2088, + "step": 205000 + }, + { + "epoch": 3.25, + "learning_rate": 1.7537195314973094e-06, + "loss": 1.2292, + "step": 205100 + }, + { + "epoch": 3.25, + "learning_rate": 1.7521367521367522e-06, + "loss": 1.2328, + "step": 205200 + }, + { + "epoch": 3.25, + "learning_rate": 1.7505539727761953e-06, + "loss": 1.273, + "step": 205300 + }, + { + "epoch": 3.25, + "learning_rate": 1.7489711934156381e-06, + "loss": 1.2562, + "step": 205400 + }, + { + "epoch": 3.25, + "learning_rate": 1.747388414055081e-06, + "loss": 1.2129, + "step": 205500 + }, + { + "epoch": 3.25, + "learning_rate": 1.7458056346945238e-06, + "loss": 1.241, + "step": 205600 + }, + { + "epoch": 3.26, + "learning_rate": 1.7442228553339666e-06, + "loss": 1.2482, + "step": 205700 + }, + { + "epoch": 3.26, + "learning_rate": 1.7426400759734095e-06, + "loss": 1.2848, + "step": 205800 + }, + { + "epoch": 3.26, + "learning_rate": 1.7410572966128523e-06, + "loss": 1.2477, + "step": 205900 + }, + { + "epoch": 3.26, + "learning_rate": 1.7394745172522951e-06, + "loss": 1.2461, + "step": 206000 + }, + { + "epoch": 3.26, + "learning_rate": 1.7378917378917382e-06, + "loss": 1.2796, + "step": 206100 + }, + { + "epoch": 3.26, + "learning_rate": 1.736308958531181e-06, + "loss": 1.2306, + "step": 206200 + }, + { + "epoch": 3.27, + "learning_rate": 1.7347261791706239e-06, + "loss": 1.2568, + "step": 206300 + }, + { + "epoch": 3.27, + "learning_rate": 1.7331433998100667e-06, + "loss": 1.227, + "step": 206400 + }, + { + "epoch": 3.27, + "learning_rate": 1.7315606204495095e-06, + "loss": 1.2486, + "step": 206500 + }, + { + "epoch": 3.27, + "learning_rate": 1.7299778410889524e-06, + "loss": 1.2436, + "step": 206600 + }, + { + "epoch": 3.27, + "learning_rate": 1.7283950617283952e-06, + "loss": 1.2449, + "step": 206700 + }, + { + "epoch": 3.27, + "learning_rate": 1.7268122823678378e-06, + "loss": 1.2456, + "step": 206800 + }, + { + "epoch": 3.27, + "learning_rate": 1.725229503007281e-06, + "loss": 1.2389, + "step": 206900 + }, + { + "epoch": 3.28, + "learning_rate": 1.723646723646724e-06, + "loss": 1.2394, + "step": 207000 + }, + { + "epoch": 3.28, + "learning_rate": 1.7220639442861668e-06, + "loss": 1.2534, + "step": 207100 + }, + { + "epoch": 3.28, + "learning_rate": 1.7204811649256096e-06, + "loss": 1.2481, + "step": 207200 + }, + { + "epoch": 3.28, + "learning_rate": 1.7188983855650524e-06, + "loss": 1.2398, + "step": 207300 + }, + { + "epoch": 3.28, + "learning_rate": 1.7173156062044953e-06, + "loss": 1.2393, + "step": 207400 + }, + { + "epoch": 3.28, + "learning_rate": 1.715732826843938e-06, + "loss": 1.2248, + "step": 207500 + }, + { + "epoch": 3.29, + "learning_rate": 1.7141500474833807e-06, + "loss": 1.2493, + "step": 207600 + }, + { + "epoch": 3.29, + "learning_rate": 1.712567268122824e-06, + "loss": 1.2608, + "step": 207700 + }, + { + "epoch": 3.29, + "learning_rate": 1.7109844887622668e-06, + "loss": 1.2488, + "step": 207800 + }, + { + "epoch": 3.29, + "learning_rate": 1.7094017094017097e-06, + "loss": 1.252, + "step": 207900 + }, + { + "epoch": 3.29, + "learning_rate": 1.7078189300411525e-06, + "loss": 1.247, + "step": 208000 + }, + { + "epoch": 3.29, + "learning_rate": 1.7062361506805951e-06, + "loss": 1.2525, + "step": 208100 + }, + { + "epoch": 3.3, + "learning_rate": 1.704653371320038e-06, + "loss": 1.2471, + "step": 208200 + }, + { + "epoch": 3.3, + "learning_rate": 1.7030705919594808e-06, + "loss": 1.2298, + "step": 208300 + }, + { + "epoch": 3.3, + "learning_rate": 1.701487812598924e-06, + "loss": 1.2743, + "step": 208400 + }, + { + "epoch": 3.3, + "learning_rate": 1.6999050332383669e-06, + "loss": 1.2406, + "step": 208500 + }, + { + "epoch": 3.3, + "learning_rate": 1.6983222538778097e-06, + "loss": 1.2562, + "step": 208600 + }, + { + "epoch": 3.3, + "learning_rate": 1.6967394745172526e-06, + "loss": 1.2634, + "step": 208700 + }, + { + "epoch": 3.3, + "learning_rate": 1.6951566951566952e-06, + "loss": 1.2591, + "step": 208800 + }, + { + "epoch": 3.31, + "learning_rate": 1.693573915796138e-06, + "loss": 1.252, + "step": 208900 + }, + { + "epoch": 3.31, + "learning_rate": 1.6919911364355809e-06, + "loss": 1.2353, + "step": 209000 + }, + { + "epoch": 3.31, + "learning_rate": 1.6904083570750237e-06, + "loss": 1.2397, + "step": 209100 + }, + { + "epoch": 3.31, + "learning_rate": 1.688825577714467e-06, + "loss": 1.2503, + "step": 209200 + }, + { + "epoch": 3.31, + "learning_rate": 1.6872427983539098e-06, + "loss": 1.2477, + "step": 209300 + }, + { + "epoch": 3.31, + "learning_rate": 1.6856600189933524e-06, + "loss": 1.2638, + "step": 209400 + }, + { + "epoch": 3.32, + "learning_rate": 1.6840772396327953e-06, + "loss": 1.2328, + "step": 209500 + }, + { + "epoch": 3.32, + "learning_rate": 1.682494460272238e-06, + "loss": 1.2335, + "step": 209600 + }, + { + "epoch": 3.32, + "learning_rate": 1.680911680911681e-06, + "loss": 1.2445, + "step": 209700 + }, + { + "epoch": 3.32, + "learning_rate": 1.6793289015511238e-06, + "loss": 1.2558, + "step": 209800 + }, + { + "epoch": 3.32, + "learning_rate": 1.6777461221905666e-06, + "loss": 1.2411, + "step": 209900 + }, + { + "epoch": 3.32, + "learning_rate": 1.6761633428300099e-06, + "loss": 1.2376, + "step": 210000 + }, + { + "epoch": 3.33, + "learning_rate": 1.6745805634694525e-06, + "loss": 1.2362, + "step": 210100 + }, + { + "epoch": 3.33, + "learning_rate": 1.6729977841088953e-06, + "loss": 1.2222, + "step": 210200 + }, + { + "epoch": 3.33, + "learning_rate": 1.6714150047483382e-06, + "loss": 1.2419, + "step": 210300 + }, + { + "epoch": 3.33, + "learning_rate": 1.669832225387781e-06, + "loss": 1.2606, + "step": 210400 + }, + { + "epoch": 3.33, + "learning_rate": 1.6682494460272238e-06, + "loss": 1.26, + "step": 210500 + }, + { + "epoch": 3.33, + "learning_rate": 1.6666666666666667e-06, + "loss": 1.249, + "step": 210600 + }, + { + "epoch": 3.33, + "learning_rate": 1.6650838873061095e-06, + "loss": 1.2807, + "step": 210700 + }, + { + "epoch": 3.34, + "learning_rate": 1.6635011079455525e-06, + "loss": 1.2249, + "step": 210800 + }, + { + "epoch": 3.34, + "learning_rate": 1.6619183285849954e-06, + "loss": 1.2672, + "step": 210900 + }, + { + "epoch": 3.34, + "learning_rate": 1.6603355492244382e-06, + "loss": 1.2518, + "step": 211000 + }, + { + "epoch": 3.34, + "learning_rate": 1.658752769863881e-06, + "loss": 1.2495, + "step": 211100 + }, + { + "epoch": 3.34, + "learning_rate": 1.6571699905033239e-06, + "loss": 1.2569, + "step": 211200 + }, + { + "epoch": 3.34, + "learning_rate": 1.6555872111427667e-06, + "loss": 1.2431, + "step": 211300 + }, + { + "epoch": 3.35, + "learning_rate": 1.6540044317822096e-06, + "loss": 1.2373, + "step": 211400 + }, + { + "epoch": 3.35, + "learning_rate": 1.6524216524216524e-06, + "loss": 1.2348, + "step": 211500 + }, + { + "epoch": 3.35, + "learning_rate": 1.6508388730610954e-06, + "loss": 1.2432, + "step": 211600 + }, + { + "epoch": 3.35, + "learning_rate": 1.6492560937005383e-06, + "loss": 1.2426, + "step": 211700 + }, + { + "epoch": 3.35, + "learning_rate": 1.6476733143399811e-06, + "loss": 1.2697, + "step": 211800 + }, + { + "epoch": 3.35, + "learning_rate": 1.646090534979424e-06, + "loss": 1.2502, + "step": 211900 + }, + { + "epoch": 3.36, + "learning_rate": 1.6445077556188668e-06, + "loss": 1.253, + "step": 212000 + }, + { + "epoch": 3.36, + "learning_rate": 1.6429249762583096e-06, + "loss": 1.2616, + "step": 212100 + }, + { + "epoch": 3.36, + "learning_rate": 1.6413421968977525e-06, + "loss": 1.2869, + "step": 212200 + }, + { + "epoch": 3.36, + "learning_rate": 1.6397594175371955e-06, + "loss": 1.2456, + "step": 212300 + }, + { + "epoch": 3.36, + "learning_rate": 1.6381766381766383e-06, + "loss": 1.2771, + "step": 212400 + }, + { + "epoch": 3.36, + "learning_rate": 1.6365938588160812e-06, + "loss": 1.2429, + "step": 212500 + }, + { + "epoch": 3.36, + "learning_rate": 1.635011079455524e-06, + "loss": 1.2574, + "step": 212600 + }, + { + "epoch": 3.37, + "learning_rate": 1.6334283000949669e-06, + "loss": 1.2408, + "step": 212700 + }, + { + "epoch": 3.37, + "learning_rate": 1.6318455207344097e-06, + "loss": 1.2654, + "step": 212800 + }, + { + "epoch": 3.37, + "learning_rate": 1.6302627413738525e-06, + "loss": 1.2428, + "step": 212900 + }, + { + "epoch": 3.37, + "learning_rate": 1.6286799620132954e-06, + "loss": 1.2224, + "step": 213000 + }, + { + "epoch": 3.37, + "learning_rate": 1.6270971826527384e-06, + "loss": 1.2795, + "step": 213100 + }, + { + "epoch": 3.37, + "learning_rate": 1.6255144032921812e-06, + "loss": 1.2448, + "step": 213200 + }, + { + "epoch": 3.38, + "learning_rate": 1.623931623931624e-06, + "loss": 1.2453, + "step": 213300 + }, + { + "epoch": 3.38, + "learning_rate": 1.622348844571067e-06, + "loss": 1.2457, + "step": 213400 + }, + { + "epoch": 3.38, + "learning_rate": 1.6207660652105098e-06, + "loss": 1.2379, + "step": 213500 + }, + { + "epoch": 3.38, + "learning_rate": 1.6191832858499526e-06, + "loss": 1.2555, + "step": 213600 + }, + { + "epoch": 3.38, + "learning_rate": 1.6176005064893954e-06, + "loss": 1.2506, + "step": 213700 + }, + { + "epoch": 3.38, + "learning_rate": 1.6160177271288383e-06, + "loss": 1.2602, + "step": 213800 + }, + { + "epoch": 3.39, + "learning_rate": 1.6144349477682813e-06, + "loss": 1.2719, + "step": 213900 + }, + { + "epoch": 3.39, + "learning_rate": 1.6128521684077241e-06, + "loss": 1.245, + "step": 214000 + }, + { + "epoch": 3.39, + "learning_rate": 1.611269389047167e-06, + "loss": 1.2508, + "step": 214100 + }, + { + "epoch": 3.39, + "learning_rate": 1.6096866096866098e-06, + "loss": 1.2412, + "step": 214200 + }, + { + "epoch": 3.39, + "learning_rate": 1.6081038303260527e-06, + "loss": 1.2291, + "step": 214300 + }, + { + "epoch": 3.39, + "learning_rate": 1.6065210509654955e-06, + "loss": 1.2507, + "step": 214400 + }, + { + "epoch": 3.4, + "learning_rate": 1.6049382716049383e-06, + "loss": 1.2273, + "step": 214500 + }, + { + "epoch": 3.4, + "learning_rate": 1.6033554922443812e-06, + "loss": 1.2531, + "step": 214600 + }, + { + "epoch": 3.4, + "learning_rate": 1.6017727128838242e-06, + "loss": 1.2221, + "step": 214700 + }, + { + "epoch": 3.4, + "learning_rate": 1.600189933523267e-06, + "loss": 1.2632, + "step": 214800 + }, + { + "epoch": 3.4, + "learning_rate": 1.5986071541627099e-06, + "loss": 1.2397, + "step": 214900 + }, + { + "epoch": 3.4, + "learning_rate": 1.5970243748021527e-06, + "loss": 1.236, + "step": 215000 + }, + { + "epoch": 3.4, + "learning_rate": 1.5954415954415956e-06, + "loss": 1.2437, + "step": 215100 + }, + { + "epoch": 3.41, + "learning_rate": 1.5938588160810384e-06, + "loss": 1.2488, + "step": 215200 + }, + { + "epoch": 3.41, + "learning_rate": 1.5922760367204812e-06, + "loss": 1.229, + "step": 215300 + }, + { + "epoch": 3.41, + "learning_rate": 1.590693257359924e-06, + "loss": 1.2075, + "step": 215400 + }, + { + "epoch": 3.41, + "learning_rate": 1.5891104779993671e-06, + "loss": 1.2419, + "step": 215500 + }, + { + "epoch": 3.41, + "learning_rate": 1.58752769863881e-06, + "loss": 1.2519, + "step": 215600 + }, + { + "epoch": 3.41, + "learning_rate": 1.5859449192782528e-06, + "loss": 1.2538, + "step": 215700 + }, + { + "epoch": 3.42, + "learning_rate": 1.5843621399176956e-06, + "loss": 1.242, + "step": 215800 + }, + { + "epoch": 3.42, + "learning_rate": 1.5827793605571385e-06, + "loss": 1.2382, + "step": 215900 + }, + { + "epoch": 3.42, + "learning_rate": 1.5811965811965813e-06, + "loss": 1.2555, + "step": 216000 + }, + { + "epoch": 3.42, + "learning_rate": 1.5796138018360241e-06, + "loss": 1.2095, + "step": 216100 + }, + { + "epoch": 3.42, + "learning_rate": 1.578031022475467e-06, + "loss": 1.2067, + "step": 216200 + }, + { + "epoch": 3.42, + "learning_rate": 1.57644824311491e-06, + "loss": 1.2299, + "step": 216300 + }, + { + "epoch": 3.43, + "learning_rate": 1.5748654637543529e-06, + "loss": 1.2532, + "step": 216400 + }, + { + "epoch": 3.43, + "learning_rate": 1.5732826843937957e-06, + "loss": 1.2626, + "step": 216500 + }, + { + "epoch": 3.43, + "learning_rate": 1.5716999050332385e-06, + "loss": 1.2278, + "step": 216600 + }, + { + "epoch": 3.43, + "learning_rate": 1.5701171256726814e-06, + "loss": 1.2322, + "step": 216700 + }, + { + "epoch": 3.43, + "learning_rate": 1.5685343463121242e-06, + "loss": 1.2374, + "step": 216800 + }, + { + "epoch": 3.43, + "learning_rate": 1.566951566951567e-06, + "loss": 1.2623, + "step": 216900 + }, + { + "epoch": 3.43, + "learning_rate": 1.56536878759101e-06, + "loss": 1.2337, + "step": 217000 + }, + { + "epoch": 3.44, + "learning_rate": 1.563786008230453e-06, + "loss": 1.2624, + "step": 217100 + }, + { + "epoch": 3.44, + "learning_rate": 1.5622032288698958e-06, + "loss": 1.2655, + "step": 217200 + }, + { + "epoch": 3.44, + "learning_rate": 1.5606204495093386e-06, + "loss": 1.2449, + "step": 217300 + }, + { + "epoch": 3.44, + "learning_rate": 1.5590376701487814e-06, + "loss": 1.249, + "step": 217400 + }, + { + "epoch": 3.44, + "learning_rate": 1.5574548907882243e-06, + "loss": 1.2301, + "step": 217500 + }, + { + "epoch": 3.44, + "learning_rate": 1.555872111427667e-06, + "loss": 1.2445, + "step": 217600 + }, + { + "epoch": 3.45, + "learning_rate": 1.55428933206711e-06, + "loss": 1.2463, + "step": 217700 + }, + { + "epoch": 3.45, + "learning_rate": 1.552706552706553e-06, + "loss": 1.2428, + "step": 217800 + }, + { + "epoch": 3.45, + "learning_rate": 1.5511237733459958e-06, + "loss": 1.2259, + "step": 217900 + }, + { + "epoch": 3.45, + "learning_rate": 1.5495409939854387e-06, + "loss": 1.2691, + "step": 218000 + }, + { + "epoch": 3.45, + "learning_rate": 1.5479582146248815e-06, + "loss": 1.2534, + "step": 218100 + }, + { + "epoch": 3.45, + "learning_rate": 1.5463754352643243e-06, + "loss": 1.2381, + "step": 218200 + }, + { + "epoch": 3.46, + "learning_rate": 1.5447926559037672e-06, + "loss": 1.2581, + "step": 218300 + }, + { + "epoch": 3.46, + "learning_rate": 1.54320987654321e-06, + "loss": 1.2314, + "step": 218400 + }, + { + "epoch": 3.46, + "learning_rate": 1.5416270971826528e-06, + "loss": 1.2477, + "step": 218500 + }, + { + "epoch": 3.46, + "learning_rate": 1.5400443178220959e-06, + "loss": 1.2518, + "step": 218600 + }, + { + "epoch": 3.46, + "learning_rate": 1.5384615384615387e-06, + "loss": 1.2412, + "step": 218700 + }, + { + "epoch": 3.46, + "learning_rate": 1.5368787591009816e-06, + "loss": 1.2748, + "step": 218800 + }, + { + "epoch": 3.46, + "learning_rate": 1.5352959797404244e-06, + "loss": 1.2315, + "step": 218900 + }, + { + "epoch": 3.47, + "learning_rate": 1.5337132003798672e-06, + "loss": 1.2391, + "step": 219000 + }, + { + "epoch": 3.47, + "learning_rate": 1.53213042101931e-06, + "loss": 1.2561, + "step": 219100 + }, + { + "epoch": 3.47, + "learning_rate": 1.530547641658753e-06, + "loss": 1.2338, + "step": 219200 + }, + { + "epoch": 3.47, + "learning_rate": 1.5289648622981957e-06, + "loss": 1.2484, + "step": 219300 + }, + { + "epoch": 3.47, + "learning_rate": 1.5273820829376388e-06, + "loss": 1.242, + "step": 219400 + }, + { + "epoch": 3.47, + "learning_rate": 1.5257993035770816e-06, + "loss": 1.2733, + "step": 219500 + }, + { + "epoch": 3.48, + "learning_rate": 1.5242165242165245e-06, + "loss": 1.2338, + "step": 219600 + }, + { + "epoch": 3.48, + "learning_rate": 1.5226337448559673e-06, + "loss": 1.2568, + "step": 219700 + }, + { + "epoch": 3.48, + "learning_rate": 1.5210509654954101e-06, + "loss": 1.2168, + "step": 219800 + }, + { + "epoch": 3.48, + "learning_rate": 1.519468186134853e-06, + "loss": 1.2227, + "step": 219900 + }, + { + "epoch": 3.48, + "learning_rate": 1.5178854067742956e-06, + "loss": 1.2381, + "step": 220000 + }, + { + "epoch": 3.48, + "learning_rate": 1.5163026274137384e-06, + "loss": 1.236, + "step": 220100 + }, + { + "epoch": 3.49, + "learning_rate": 1.5147198480531817e-06, + "loss": 1.2648, + "step": 220200 + }, + { + "epoch": 3.49, + "learning_rate": 1.5131370686926245e-06, + "loss": 1.2629, + "step": 220300 + }, + { + "epoch": 3.49, + "learning_rate": 1.5115542893320674e-06, + "loss": 1.2506, + "step": 220400 + }, + { + "epoch": 3.49, + "learning_rate": 1.5099715099715102e-06, + "loss": 1.2663, + "step": 220500 + }, + { + "epoch": 3.49, + "learning_rate": 1.508388730610953e-06, + "loss": 1.2484, + "step": 220600 + }, + { + "epoch": 3.49, + "learning_rate": 1.5068059512503957e-06, + "loss": 1.2824, + "step": 220700 + }, + { + "epoch": 3.49, + "learning_rate": 1.5052231718898385e-06, + "loss": 1.2388, + "step": 220800 + }, + { + "epoch": 3.5, + "learning_rate": 1.5036403925292817e-06, + "loss": 1.2586, + "step": 220900 + }, + { + "epoch": 3.5, + "learning_rate": 1.5020576131687246e-06, + "loss": 1.2517, + "step": 221000 + }, + { + "epoch": 3.5, + "learning_rate": 1.5004748338081674e-06, + "loss": 1.2667, + "step": 221100 + }, + { + "epoch": 3.5, + "learning_rate": 1.4988920544476103e-06, + "loss": 1.2656, + "step": 221200 + }, + { + "epoch": 3.5, + "learning_rate": 1.4973092750870529e-06, + "loss": 1.2394, + "step": 221300 + }, + { + "epoch": 3.5, + "learning_rate": 1.4957264957264957e-06, + "loss": 1.215, + "step": 221400 + }, + { + "epoch": 3.51, + "learning_rate": 1.4941437163659386e-06, + "loss": 1.2729, + "step": 221500 + }, + { + "epoch": 3.51, + "learning_rate": 1.4925609370053814e-06, + "loss": 1.2605, + "step": 221600 + }, + { + "epoch": 3.51, + "learning_rate": 1.4909781576448246e-06, + "loss": 1.214, + "step": 221700 + }, + { + "epoch": 3.51, + "learning_rate": 1.4893953782842675e-06, + "loss": 1.2753, + "step": 221800 + }, + { + "epoch": 3.51, + "learning_rate": 1.4878125989237103e-06, + "loss": 1.2588, + "step": 221900 + }, + { + "epoch": 3.51, + "learning_rate": 1.486229819563153e-06, + "loss": 1.2522, + "step": 222000 + }, + { + "epoch": 3.52, + "learning_rate": 1.4846470402025958e-06, + "loss": 1.2276, + "step": 222100 + }, + { + "epoch": 3.52, + "learning_rate": 1.4830642608420386e-06, + "loss": 1.2501, + "step": 222200 + }, + { + "epoch": 3.52, + "learning_rate": 1.4814814814814815e-06, + "loss": 1.2317, + "step": 222300 + }, + { + "epoch": 3.52, + "learning_rate": 1.4798987021209243e-06, + "loss": 1.2656, + "step": 222400 + }, + { + "epoch": 3.52, + "learning_rate": 1.4783159227603675e-06, + "loss": 1.2541, + "step": 222500 + }, + { + "epoch": 3.52, + "learning_rate": 1.4767331433998102e-06, + "loss": 1.259, + "step": 222600 + }, + { + "epoch": 3.52, + "learning_rate": 1.475150364039253e-06, + "loss": 1.2336, + "step": 222700 + }, + { + "epoch": 3.53, + "learning_rate": 1.4735675846786958e-06, + "loss": 1.2568, + "step": 222800 + }, + { + "epoch": 3.53, + "learning_rate": 1.4719848053181387e-06, + "loss": 1.237, + "step": 222900 + }, + { + "epoch": 3.53, + "learning_rate": 1.4704020259575815e-06, + "loss": 1.2408, + "step": 223000 + }, + { + "epoch": 3.53, + "learning_rate": 1.4688192465970244e-06, + "loss": 1.2355, + "step": 223100 + }, + { + "epoch": 3.53, + "learning_rate": 1.4672364672364672e-06, + "loss": 1.2569, + "step": 223200 + }, + { + "epoch": 3.53, + "learning_rate": 1.4656536878759102e-06, + "loss": 1.2255, + "step": 223300 + }, + { + "epoch": 3.54, + "learning_rate": 1.464070908515353e-06, + "loss": 1.2289, + "step": 223400 + }, + { + "epoch": 3.54, + "learning_rate": 1.462488129154796e-06, + "loss": 1.2258, + "step": 223500 + }, + { + "epoch": 3.54, + "learning_rate": 1.4609053497942387e-06, + "loss": 1.2568, + "step": 223600 + }, + { + "epoch": 3.54, + "learning_rate": 1.4593225704336816e-06, + "loss": 1.2682, + "step": 223700 + }, + { + "epoch": 3.54, + "learning_rate": 1.4577397910731244e-06, + "loss": 1.222, + "step": 223800 + }, + { + "epoch": 3.54, + "learning_rate": 1.4561570117125673e-06, + "loss": 1.2279, + "step": 223900 + }, + { + "epoch": 3.55, + "learning_rate": 1.45457423235201e-06, + "loss": 1.2364, + "step": 224000 + }, + { + "epoch": 3.55, + "learning_rate": 1.4529914529914531e-06, + "loss": 1.236, + "step": 224100 + }, + { + "epoch": 3.55, + "learning_rate": 1.451408673630896e-06, + "loss": 1.2355, + "step": 224200 + }, + { + "epoch": 3.55, + "learning_rate": 1.4498258942703388e-06, + "loss": 1.2598, + "step": 224300 + }, + { + "epoch": 3.55, + "learning_rate": 1.4482431149097816e-06, + "loss": 1.2585, + "step": 224400 + }, + { + "epoch": 3.55, + "learning_rate": 1.4466603355492245e-06, + "loss": 1.2652, + "step": 224500 + }, + { + "epoch": 3.55, + "learning_rate": 1.4450775561886673e-06, + "loss": 1.2346, + "step": 224600 + }, + { + "epoch": 3.56, + "learning_rate": 1.4434947768281102e-06, + "loss": 1.2436, + "step": 224700 + }, + { + "epoch": 3.56, + "learning_rate": 1.441911997467553e-06, + "loss": 1.2388, + "step": 224800 + }, + { + "epoch": 3.56, + "learning_rate": 1.440329218106996e-06, + "loss": 1.2255, + "step": 224900 + }, + { + "epoch": 3.56, + "learning_rate": 1.4387464387464389e-06, + "loss": 1.2149, + "step": 225000 + }, + { + "epoch": 3.56, + "learning_rate": 1.4371636593858817e-06, + "loss": 1.2252, + "step": 225100 + }, + { + "epoch": 3.56, + "learning_rate": 1.4355808800253245e-06, + "loss": 1.2472, + "step": 225200 + }, + { + "epoch": 3.57, + "learning_rate": 1.4339981006647674e-06, + "loss": 1.2453, + "step": 225300 + }, + { + "epoch": 3.57, + "learning_rate": 1.4324153213042102e-06, + "loss": 1.2376, + "step": 225400 + }, + { + "epoch": 3.57, + "learning_rate": 1.430832541943653e-06, + "loss": 1.2351, + "step": 225500 + }, + { + "epoch": 3.57, + "learning_rate": 1.429249762583096e-06, + "loss": 1.2365, + "step": 225600 + }, + { + "epoch": 3.57, + "learning_rate": 1.427666983222539e-06, + "loss": 1.2242, + "step": 225700 + }, + { + "epoch": 3.57, + "learning_rate": 1.4260842038619818e-06, + "loss": 1.2405, + "step": 225800 + }, + { + "epoch": 3.58, + "learning_rate": 1.4245014245014246e-06, + "loss": 1.238, + "step": 225900 + }, + { + "epoch": 3.58, + "learning_rate": 1.4229186451408674e-06, + "loss": 1.2643, + "step": 226000 + }, + { + "epoch": 3.58, + "learning_rate": 1.4213358657803103e-06, + "loss": 1.2401, + "step": 226100 + }, + { + "epoch": 3.58, + "learning_rate": 1.4197530864197531e-06, + "loss": 1.2699, + "step": 226200 + }, + { + "epoch": 3.58, + "learning_rate": 1.418170307059196e-06, + "loss": 1.2191, + "step": 226300 + }, + { + "epoch": 3.58, + "learning_rate": 1.416587527698639e-06, + "loss": 1.2451, + "step": 226400 + }, + { + "epoch": 3.58, + "learning_rate": 1.4150047483380818e-06, + "loss": 1.244, + "step": 226500 + }, + { + "epoch": 3.59, + "learning_rate": 1.4134219689775247e-06, + "loss": 1.2351, + "step": 226600 + }, + { + "epoch": 3.59, + "learning_rate": 1.4118391896169675e-06, + "loss": 1.2529, + "step": 226700 + }, + { + "epoch": 3.59, + "learning_rate": 1.4102564102564104e-06, + "loss": 1.2416, + "step": 226800 + }, + { + "epoch": 3.59, + "learning_rate": 1.4086736308958532e-06, + "loss": 1.2517, + "step": 226900 + }, + { + "epoch": 3.59, + "learning_rate": 1.407090851535296e-06, + "loss": 1.2529, + "step": 227000 + }, + { + "epoch": 3.59, + "learning_rate": 1.4055080721747389e-06, + "loss": 1.2695, + "step": 227100 + }, + { + "epoch": 3.6, + "learning_rate": 1.403925292814182e-06, + "loss": 1.2373, + "step": 227200 + }, + { + "epoch": 3.6, + "learning_rate": 1.4023425134536247e-06, + "loss": 1.2243, + "step": 227300 + }, + { + "epoch": 3.6, + "learning_rate": 1.4007597340930676e-06, + "loss": 1.2498, + "step": 227400 + }, + { + "epoch": 3.6, + "learning_rate": 1.3991769547325104e-06, + "loss": 1.2622, + "step": 227500 + }, + { + "epoch": 3.6, + "learning_rate": 1.3975941753719533e-06, + "loss": 1.2382, + "step": 227600 + }, + { + "epoch": 3.6, + "learning_rate": 1.396011396011396e-06, + "loss": 1.2502, + "step": 227700 + }, + { + "epoch": 3.61, + "learning_rate": 1.394428616650839e-06, + "loss": 1.2512, + "step": 227800 + }, + { + "epoch": 3.61, + "learning_rate": 1.3928458372902818e-06, + "loss": 1.2618, + "step": 227900 + }, + { + "epoch": 3.61, + "learning_rate": 1.3912630579297248e-06, + "loss": 1.2564, + "step": 228000 + }, + { + "epoch": 3.61, + "learning_rate": 1.3896802785691676e-06, + "loss": 1.2455, + "step": 228100 + }, + { + "epoch": 3.61, + "learning_rate": 1.3880974992086105e-06, + "loss": 1.2435, + "step": 228200 + }, + { + "epoch": 3.61, + "learning_rate": 1.3865147198480533e-06, + "loss": 1.246, + "step": 228300 + }, + { + "epoch": 3.62, + "learning_rate": 1.3849319404874962e-06, + "loss": 1.2369, + "step": 228400 + }, + { + "epoch": 3.62, + "learning_rate": 1.383349161126939e-06, + "loss": 1.2676, + "step": 228500 + }, + { + "epoch": 3.62, + "learning_rate": 1.3817663817663818e-06, + "loss": 1.2402, + "step": 228600 + }, + { + "epoch": 3.62, + "learning_rate": 1.3801836024058247e-06, + "loss": 1.2247, + "step": 228700 + }, + { + "epoch": 3.62, + "learning_rate": 1.3786008230452677e-06, + "loss": 1.2288, + "step": 228800 + }, + { + "epoch": 3.62, + "learning_rate": 1.3770180436847105e-06, + "loss": 1.2513, + "step": 228900 + }, + { + "epoch": 3.62, + "learning_rate": 1.3754352643241534e-06, + "loss": 1.2511, + "step": 229000 + }, + { + "epoch": 3.63, + "learning_rate": 1.3738524849635962e-06, + "loss": 1.2334, + "step": 229100 + }, + { + "epoch": 3.63, + "learning_rate": 1.372269705603039e-06, + "loss": 1.2434, + "step": 229200 + }, + { + "epoch": 3.63, + "learning_rate": 1.3706869262424819e-06, + "loss": 1.2542, + "step": 229300 + }, + { + "epoch": 3.63, + "learning_rate": 1.3691041468819247e-06, + "loss": 1.2356, + "step": 229400 + }, + { + "epoch": 3.63, + "learning_rate": 1.3675213675213678e-06, + "loss": 1.2485, + "step": 229500 + }, + { + "epoch": 3.63, + "learning_rate": 1.3659385881608106e-06, + "loss": 1.2517, + "step": 229600 + }, + { + "epoch": 3.64, + "learning_rate": 1.3643558088002534e-06, + "loss": 1.2377, + "step": 229700 + }, + { + "epoch": 3.64, + "learning_rate": 1.3627730294396963e-06, + "loss": 1.2531, + "step": 229800 + }, + { + "epoch": 3.64, + "learning_rate": 1.3611902500791391e-06, + "loss": 1.2534, + "step": 229900 + }, + { + "epoch": 3.64, + "learning_rate": 1.359607470718582e-06, + "loss": 1.2547, + "step": 230000 + }, + { + "epoch": 3.64, + "learning_rate": 1.3580246913580248e-06, + "loss": 1.2592, + "step": 230100 + }, + { + "epoch": 3.64, + "learning_rate": 1.3564419119974676e-06, + "loss": 1.2643, + "step": 230200 + }, + { + "epoch": 3.65, + "learning_rate": 1.3548591326369107e-06, + "loss": 1.2549, + "step": 230300 + }, + { + "epoch": 3.65, + "learning_rate": 1.3532763532763535e-06, + "loss": 1.2592, + "step": 230400 + }, + { + "epoch": 3.65, + "learning_rate": 1.3516935739157963e-06, + "loss": 1.2509, + "step": 230500 + }, + { + "epoch": 3.65, + "learning_rate": 1.3501107945552392e-06, + "loss": 1.2405, + "step": 230600 + }, + { + "epoch": 3.65, + "learning_rate": 1.348528015194682e-06, + "loss": 1.25, + "step": 230700 + }, + { + "epoch": 3.65, + "learning_rate": 1.3469452358341249e-06, + "loss": 1.2289, + "step": 230800 + }, + { + "epoch": 3.65, + "learning_rate": 1.3453624564735677e-06, + "loss": 1.2611, + "step": 230900 + }, + { + "epoch": 3.66, + "learning_rate": 1.3437796771130105e-06, + "loss": 1.2404, + "step": 231000 + }, + { + "epoch": 3.66, + "learning_rate": 1.3421968977524536e-06, + "loss": 1.23, + "step": 231100 + }, + { + "epoch": 3.66, + "learning_rate": 1.3406141183918964e-06, + "loss": 1.2433, + "step": 231200 + }, + { + "epoch": 3.66, + "learning_rate": 1.3390313390313392e-06, + "loss": 1.2484, + "step": 231300 + }, + { + "epoch": 3.66, + "learning_rate": 1.337448559670782e-06, + "loss": 1.2499, + "step": 231400 + }, + { + "epoch": 3.66, + "learning_rate": 1.335865780310225e-06, + "loss": 1.218, + "step": 231500 + }, + { + "epoch": 3.67, + "learning_rate": 1.3342830009496678e-06, + "loss": 1.2401, + "step": 231600 + }, + { + "epoch": 3.67, + "learning_rate": 1.3327002215891106e-06, + "loss": 1.2423, + "step": 231700 + }, + { + "epoch": 3.67, + "learning_rate": 1.3311174422285534e-06, + "loss": 1.2505, + "step": 231800 + }, + { + "epoch": 3.67, + "learning_rate": 1.3295346628679965e-06, + "loss": 1.2788, + "step": 231900 + }, + { + "epoch": 3.67, + "learning_rate": 1.3279518835074393e-06, + "loss": 1.2591, + "step": 232000 + }, + { + "epoch": 3.67, + "learning_rate": 1.3263691041468821e-06, + "loss": 1.2424, + "step": 232100 + }, + { + "epoch": 3.68, + "learning_rate": 1.324786324786325e-06, + "loss": 1.2317, + "step": 232200 + }, + { + "epoch": 3.68, + "learning_rate": 1.3232035454257678e-06, + "loss": 1.2419, + "step": 232300 + }, + { + "epoch": 3.68, + "learning_rate": 1.3216207660652107e-06, + "loss": 1.2291, + "step": 232400 + }, + { + "epoch": 3.68, + "learning_rate": 1.3200379867046533e-06, + "loss": 1.2314, + "step": 232500 + }, + { + "epoch": 3.68, + "learning_rate": 1.3184552073440961e-06, + "loss": 1.2604, + "step": 232600 + }, + { + "epoch": 3.68, + "learning_rate": 1.3168724279835394e-06, + "loss": 1.2483, + "step": 232700 + }, + { + "epoch": 3.68, + "learning_rate": 1.3152896486229822e-06, + "loss": 1.2415, + "step": 232800 + }, + { + "epoch": 3.69, + "learning_rate": 1.313706869262425e-06, + "loss": 1.2391, + "step": 232900 + }, + { + "epoch": 3.69, + "learning_rate": 1.3121240899018679e-06, + "loss": 1.2217, + "step": 233000 + }, + { + "epoch": 3.69, + "learning_rate": 1.3105413105413107e-06, + "loss": 1.2702, + "step": 233100 + }, + { + "epoch": 3.69, + "learning_rate": 1.3089585311807533e-06, + "loss": 1.2514, + "step": 233200 + }, + { + "epoch": 3.69, + "learning_rate": 1.3073757518201962e-06, + "loss": 1.27, + "step": 233300 + }, + { + "epoch": 3.69, + "learning_rate": 1.305792972459639e-06, + "loss": 1.2208, + "step": 233400 + }, + { + "epoch": 3.7, + "learning_rate": 1.3042101930990823e-06, + "loss": 1.2622, + "step": 233500 + }, + { + "epoch": 3.7, + "learning_rate": 1.3026274137385251e-06, + "loss": 1.2381, + "step": 233600 + }, + { + "epoch": 3.7, + "learning_rate": 1.301044634377968e-06, + "loss": 1.2485, + "step": 233700 + }, + { + "epoch": 3.7, + "learning_rate": 1.2994618550174106e-06, + "loss": 1.2427, + "step": 233800 + }, + { + "epoch": 3.7, + "learning_rate": 1.2978790756568534e-06, + "loss": 1.2365, + "step": 233900 + }, + { + "epoch": 3.7, + "learning_rate": 1.2962962962962962e-06, + "loss": 1.2406, + "step": 234000 + }, + { + "epoch": 3.71, + "learning_rate": 1.294713516935739e-06, + "loss": 1.2346, + "step": 234100 + }, + { + "epoch": 3.71, + "learning_rate": 1.2931307375751823e-06, + "loss": 1.236, + "step": 234200 + }, + { + "epoch": 3.71, + "learning_rate": 1.2915479582146252e-06, + "loss": 1.2465, + "step": 234300 + }, + { + "epoch": 3.71, + "learning_rate": 1.289965178854068e-06, + "loss": 1.2357, + "step": 234400 + }, + { + "epoch": 3.71, + "learning_rate": 1.2883823994935106e-06, + "loss": 1.2427, + "step": 234500 + }, + { + "epoch": 3.71, + "learning_rate": 1.2867996201329535e-06, + "loss": 1.2411, + "step": 234600 + }, + { + "epoch": 3.71, + "learning_rate": 1.2852168407723963e-06, + "loss": 1.2346, + "step": 234700 + }, + { + "epoch": 3.72, + "learning_rate": 1.2836340614118391e-06, + "loss": 1.2327, + "step": 234800 + }, + { + "epoch": 3.72, + "learning_rate": 1.282051282051282e-06, + "loss": 1.2583, + "step": 234900 + }, + { + "epoch": 3.72, + "learning_rate": 1.2804685026907252e-06, + "loss": 1.2683, + "step": 235000 + }, + { + "epoch": 3.72, + "learning_rate": 1.2788857233301679e-06, + "loss": 1.2552, + "step": 235100 + }, + { + "epoch": 3.72, + "learning_rate": 1.2773029439696107e-06, + "loss": 1.2634, + "step": 235200 + }, + { + "epoch": 3.72, + "learning_rate": 1.2757201646090535e-06, + "loss": 1.2703, + "step": 235300 + }, + { + "epoch": 3.73, + "learning_rate": 1.2741373852484964e-06, + "loss": 1.2357, + "step": 235400 + }, + { + "epoch": 3.73, + "learning_rate": 1.2725546058879392e-06, + "loss": 1.2311, + "step": 235500 + }, + { + "epoch": 3.73, + "learning_rate": 1.270971826527382e-06, + "loss": 1.2546, + "step": 235600 + }, + { + "epoch": 3.73, + "learning_rate": 1.2693890471668249e-06, + "loss": 1.2428, + "step": 235700 + }, + { + "epoch": 3.73, + "learning_rate": 1.267806267806268e-06, + "loss": 1.2363, + "step": 235800 + }, + { + "epoch": 3.73, + "learning_rate": 1.2662234884457108e-06, + "loss": 1.2555, + "step": 235900 + }, + { + "epoch": 3.74, + "learning_rate": 1.2646407090851536e-06, + "loss": 1.2612, + "step": 236000 + }, + { + "epoch": 3.74, + "learning_rate": 1.2630579297245964e-06, + "loss": 1.2647, + "step": 236100 + }, + { + "epoch": 3.74, + "learning_rate": 1.2614751503640393e-06, + "loss": 1.244, + "step": 236200 + }, + { + "epoch": 3.74, + "learning_rate": 1.2598923710034821e-06, + "loss": 1.247, + "step": 236300 + }, + { + "epoch": 3.74, + "learning_rate": 1.258309591642925e-06, + "loss": 1.2188, + "step": 236400 + }, + { + "epoch": 3.74, + "learning_rate": 1.2567268122823678e-06, + "loss": 1.2389, + "step": 236500 + }, + { + "epoch": 3.74, + "learning_rate": 1.2551440329218108e-06, + "loss": 1.2465, + "step": 236600 + }, + { + "epoch": 3.75, + "learning_rate": 1.2535612535612537e-06, + "loss": 1.2485, + "step": 236700 + }, + { + "epoch": 3.75, + "learning_rate": 1.2519784742006965e-06, + "loss": 1.2558, + "step": 236800 + }, + { + "epoch": 3.75, + "learning_rate": 1.2503956948401393e-06, + "loss": 1.2336, + "step": 236900 + }, + { + "epoch": 3.75, + "learning_rate": 1.2488129154795822e-06, + "loss": 1.2336, + "step": 237000 + }, + { + "epoch": 3.75, + "learning_rate": 1.247230136119025e-06, + "loss": 1.2232, + "step": 237100 + }, + { + "epoch": 3.75, + "learning_rate": 1.245647356758468e-06, + "loss": 1.2447, + "step": 237200 + }, + { + "epoch": 3.76, + "learning_rate": 1.244064577397911e-06, + "loss": 1.2451, + "step": 237300 + }, + { + "epoch": 3.76, + "learning_rate": 1.2424817980373537e-06, + "loss": 1.2262, + "step": 237400 + }, + { + "epoch": 3.76, + "learning_rate": 1.2408990186767966e-06, + "loss": 1.2374, + "step": 237500 + }, + { + "epoch": 3.76, + "learning_rate": 1.2393162393162394e-06, + "loss": 1.257, + "step": 237600 + }, + { + "epoch": 3.76, + "learning_rate": 1.2377334599556822e-06, + "loss": 1.2302, + "step": 237700 + }, + { + "epoch": 3.76, + "learning_rate": 1.236150680595125e-06, + "loss": 1.2363, + "step": 237800 + }, + { + "epoch": 3.77, + "learning_rate": 1.234567901234568e-06, + "loss": 1.2497, + "step": 237900 + }, + { + "epoch": 3.77, + "learning_rate": 1.232985121874011e-06, + "loss": 1.2276, + "step": 238000 + }, + { + "epoch": 3.77, + "learning_rate": 1.2314023425134538e-06, + "loss": 1.2419, + "step": 238100 + }, + { + "epoch": 3.77, + "learning_rate": 1.2298195631528966e-06, + "loss": 1.2191, + "step": 238200 + }, + { + "epoch": 3.77, + "learning_rate": 1.2282367837923395e-06, + "loss": 1.2259, + "step": 238300 + }, + { + "epoch": 3.77, + "learning_rate": 1.2266540044317823e-06, + "loss": 1.2626, + "step": 238400 + }, + { + "epoch": 3.77, + "learning_rate": 1.2250712250712251e-06, + "loss": 1.2465, + "step": 238500 + }, + { + "epoch": 3.78, + "learning_rate": 1.223488445710668e-06, + "loss": 1.233, + "step": 238600 + }, + { + "epoch": 3.78, + "learning_rate": 1.221905666350111e-06, + "loss": 1.2341, + "step": 238700 + }, + { + "epoch": 3.78, + "learning_rate": 1.2203228869895539e-06, + "loss": 1.2505, + "step": 238800 + }, + { + "epoch": 3.78, + "learning_rate": 1.2187401076289967e-06, + "loss": 1.2452, + "step": 238900 + }, + { + "epoch": 3.78, + "learning_rate": 1.2171573282684395e-06, + "loss": 1.2341, + "step": 239000 + }, + { + "epoch": 3.78, + "learning_rate": 1.2155745489078824e-06, + "loss": 1.2519, + "step": 239100 + }, + { + "epoch": 3.79, + "learning_rate": 1.2139917695473252e-06, + "loss": 1.2568, + "step": 239200 + }, + { + "epoch": 3.79, + "learning_rate": 1.212408990186768e-06, + "loss": 1.2417, + "step": 239300 + }, + { + "epoch": 3.79, + "learning_rate": 1.2108262108262109e-06, + "loss": 1.2238, + "step": 239400 + }, + { + "epoch": 3.79, + "learning_rate": 1.209243431465654e-06, + "loss": 1.2603, + "step": 239500 + }, + { + "epoch": 3.79, + "learning_rate": 1.2076606521050968e-06, + "loss": 1.2635, + "step": 239600 + }, + { + "epoch": 3.79, + "learning_rate": 1.2060778727445396e-06, + "loss": 1.2414, + "step": 239700 + }, + { + "epoch": 3.8, + "learning_rate": 1.2044950933839822e-06, + "loss": 1.2324, + "step": 239800 + }, + { + "epoch": 3.8, + "learning_rate": 1.2029123140234253e-06, + "loss": 1.2282, + "step": 239900 + }, + { + "epoch": 3.8, + "learning_rate": 1.2013295346628681e-06, + "loss": 1.2431, + "step": 240000 + }, + { + "epoch": 3.8, + "learning_rate": 1.199746755302311e-06, + "loss": 1.2492, + "step": 240100 + }, + { + "epoch": 3.8, + "learning_rate": 1.1981639759417538e-06, + "loss": 1.2618, + "step": 240200 + }, + { + "epoch": 3.8, + "learning_rate": 1.1965811965811968e-06, + "loss": 1.2537, + "step": 240300 + }, + { + "epoch": 3.81, + "learning_rate": 1.1949984172206395e-06, + "loss": 1.2318, + "step": 240400 + }, + { + "epoch": 3.81, + "learning_rate": 1.1934156378600823e-06, + "loss": 1.2362, + "step": 240500 + }, + { + "epoch": 3.81, + "learning_rate": 1.1918328584995251e-06, + "loss": 1.2557, + "step": 240600 + }, + { + "epoch": 3.81, + "learning_rate": 1.1902500791389682e-06, + "loss": 1.2382, + "step": 240700 + }, + { + "epoch": 3.81, + "learning_rate": 1.188667299778411e-06, + "loss": 1.2282, + "step": 240800 + }, + { + "epoch": 3.81, + "learning_rate": 1.1870845204178538e-06, + "loss": 1.2551, + "step": 240900 + }, + { + "epoch": 3.81, + "learning_rate": 1.1855017410572967e-06, + "loss": 1.2666, + "step": 241000 + }, + { + "epoch": 3.82, + "learning_rate": 1.1839189616967395e-06, + "loss": 1.2554, + "step": 241100 + }, + { + "epoch": 3.82, + "learning_rate": 1.1823361823361824e-06, + "loss": 1.249, + "step": 241200 + }, + { + "epoch": 3.82, + "learning_rate": 1.1807534029756252e-06, + "loss": 1.2368, + "step": 241300 + }, + { + "epoch": 3.82, + "learning_rate": 1.179170623615068e-06, + "loss": 1.2152, + "step": 241400 + }, + { + "epoch": 3.82, + "learning_rate": 1.177587844254511e-06, + "loss": 1.2264, + "step": 241500 + }, + { + "epoch": 3.82, + "learning_rate": 1.176005064893954e-06, + "loss": 1.2387, + "step": 241600 + }, + { + "epoch": 3.83, + "learning_rate": 1.1744222855333967e-06, + "loss": 1.2525, + "step": 241700 + }, + { + "epoch": 3.83, + "learning_rate": 1.1728395061728396e-06, + "loss": 1.2551, + "step": 241800 + }, + { + "epoch": 3.83, + "learning_rate": 1.1712567268122824e-06, + "loss": 1.2517, + "step": 241900 + }, + { + "epoch": 3.83, + "learning_rate": 1.1696739474517253e-06, + "loss": 1.2395, + "step": 242000 + }, + { + "epoch": 3.83, + "learning_rate": 1.168091168091168e-06, + "loss": 1.2725, + "step": 242100 + }, + { + "epoch": 3.83, + "learning_rate": 1.166508388730611e-06, + "loss": 1.2384, + "step": 242200 + }, + { + "epoch": 3.84, + "learning_rate": 1.164925609370054e-06, + "loss": 1.2415, + "step": 242300 + }, + { + "epoch": 3.84, + "learning_rate": 1.1633428300094968e-06, + "loss": 1.2425, + "step": 242400 + }, + { + "epoch": 3.84, + "learning_rate": 1.1617600506489396e-06, + "loss": 1.2423, + "step": 242500 + }, + { + "epoch": 3.84, + "learning_rate": 1.1601772712883825e-06, + "loss": 1.2445, + "step": 242600 + }, + { + "epoch": 3.84, + "learning_rate": 1.1585944919278253e-06, + "loss": 1.2468, + "step": 242700 + }, + { + "epoch": 3.84, + "learning_rate": 1.1570117125672682e-06, + "loss": 1.2683, + "step": 242800 + }, + { + "epoch": 3.84, + "learning_rate": 1.155428933206711e-06, + "loss": 1.2497, + "step": 242900 + }, + { + "epoch": 3.85, + "learning_rate": 1.153846153846154e-06, + "loss": 1.2405, + "step": 243000 + }, + { + "epoch": 3.85, + "learning_rate": 1.1522633744855969e-06, + "loss": 1.2389, + "step": 243100 + }, + { + "epoch": 3.85, + "learning_rate": 1.1506805951250397e-06, + "loss": 1.2181, + "step": 243200 + }, + { + "epoch": 3.85, + "learning_rate": 1.1490978157644825e-06, + "loss": 1.244, + "step": 243300 + }, + { + "epoch": 3.85, + "learning_rate": 1.1475150364039254e-06, + "loss": 1.2576, + "step": 243400 + }, + { + "epoch": 3.85, + "learning_rate": 1.1459322570433682e-06, + "loss": 1.2137, + "step": 243500 + }, + { + "epoch": 3.86, + "learning_rate": 1.144349477682811e-06, + "loss": 1.2546, + "step": 243600 + }, + { + "epoch": 3.86, + "learning_rate": 1.1427666983222539e-06, + "loss": 1.2682, + "step": 243700 + }, + { + "epoch": 3.86, + "learning_rate": 1.141183918961697e-06, + "loss": 1.2553, + "step": 243800 + }, + { + "epoch": 3.86, + "learning_rate": 1.1396011396011398e-06, + "loss": 1.2623, + "step": 243900 + }, + { + "epoch": 3.86, + "learning_rate": 1.1380183602405826e-06, + "loss": 1.2418, + "step": 244000 + }, + { + "epoch": 3.86, + "learning_rate": 1.1364355808800254e-06, + "loss": 1.2509, + "step": 244100 + }, + { + "epoch": 3.87, + "learning_rate": 1.1348528015194683e-06, + "loss": 1.2589, + "step": 244200 + }, + { + "epoch": 3.87, + "learning_rate": 1.1332700221589111e-06, + "loss": 1.2667, + "step": 244300 + }, + { + "epoch": 3.87, + "learning_rate": 1.131687242798354e-06, + "loss": 1.2304, + "step": 244400 + }, + { + "epoch": 3.87, + "learning_rate": 1.1301044634377968e-06, + "loss": 1.2306, + "step": 244500 + }, + { + "epoch": 3.87, + "learning_rate": 1.1285216840772398e-06, + "loss": 1.2414, + "step": 244600 + }, + { + "epoch": 3.87, + "learning_rate": 1.1269389047166827e-06, + "loss": 1.2494, + "step": 244700 + }, + { + "epoch": 3.87, + "learning_rate": 1.1253561253561255e-06, + "loss": 1.233, + "step": 244800 + }, + { + "epoch": 3.88, + "learning_rate": 1.1237733459955683e-06, + "loss": 1.2552, + "step": 244900 + }, + { + "epoch": 3.88, + "learning_rate": 1.1221905666350112e-06, + "loss": 1.237, + "step": 245000 + }, + { + "epoch": 3.88, + "learning_rate": 1.120607787274454e-06, + "loss": 1.2529, + "step": 245100 + }, + { + "epoch": 3.88, + "learning_rate": 1.1190250079138969e-06, + "loss": 1.236, + "step": 245200 + }, + { + "epoch": 3.88, + "learning_rate": 1.1174422285533397e-06, + "loss": 1.2658, + "step": 245300 + }, + { + "epoch": 3.88, + "learning_rate": 1.1158594491927827e-06, + "loss": 1.2609, + "step": 245400 + }, + { + "epoch": 3.89, + "learning_rate": 1.1142766698322256e-06, + "loss": 1.2423, + "step": 245500 + }, + { + "epoch": 3.89, + "learning_rate": 1.1126938904716684e-06, + "loss": 1.2388, + "step": 245600 + }, + { + "epoch": 3.89, + "learning_rate": 1.111111111111111e-06, + "loss": 1.2253, + "step": 245700 + }, + { + "epoch": 3.89, + "learning_rate": 1.109528331750554e-06, + "loss": 1.2525, + "step": 245800 + }, + { + "epoch": 3.89, + "learning_rate": 1.107945552389997e-06, + "loss": 1.2705, + "step": 245900 + }, + { + "epoch": 3.89, + "learning_rate": 1.1063627730294398e-06, + "loss": 1.2519, + "step": 246000 + }, + { + "epoch": 3.9, + "learning_rate": 1.1047799936688826e-06, + "loss": 1.2501, + "step": 246100 + }, + { + "epoch": 3.9, + "learning_rate": 1.1031972143083256e-06, + "loss": 1.2612, + "step": 246200 + }, + { + "epoch": 3.9, + "learning_rate": 1.1016144349477685e-06, + "loss": 1.237, + "step": 246300 + }, + { + "epoch": 3.9, + "learning_rate": 1.100031655587211e-06, + "loss": 1.2562, + "step": 246400 + }, + { + "epoch": 3.9, + "learning_rate": 1.098448876226654e-06, + "loss": 1.2435, + "step": 246500 + }, + { + "epoch": 3.9, + "learning_rate": 1.096866096866097e-06, + "loss": 1.2513, + "step": 246600 + }, + { + "epoch": 3.9, + "learning_rate": 1.0952833175055398e-06, + "loss": 1.2429, + "step": 246700 + }, + { + "epoch": 3.91, + "learning_rate": 1.0937005381449827e-06, + "loss": 1.2486, + "step": 246800 + }, + { + "epoch": 3.91, + "learning_rate": 1.0921177587844257e-06, + "loss": 1.2547, + "step": 246900 + }, + { + "epoch": 3.91, + "learning_rate": 1.0905349794238683e-06, + "loss": 1.2369, + "step": 247000 + }, + { + "epoch": 3.91, + "learning_rate": 1.0889522000633112e-06, + "loss": 1.242, + "step": 247100 + }, + { + "epoch": 3.91, + "learning_rate": 1.087369420702754e-06, + "loss": 1.2345, + "step": 247200 + }, + { + "epoch": 3.91, + "learning_rate": 1.085786641342197e-06, + "loss": 1.2694, + "step": 247300 + }, + { + "epoch": 3.92, + "learning_rate": 1.0842038619816399e-06, + "loss": 1.2281, + "step": 247400 + }, + { + "epoch": 3.92, + "learning_rate": 1.0826210826210827e-06, + "loss": 1.225, + "step": 247500 + }, + { + "epoch": 3.92, + "learning_rate": 1.0810383032605256e-06, + "loss": 1.2525, + "step": 247600 + }, + { + "epoch": 3.92, + "learning_rate": 1.0794555238999684e-06, + "loss": 1.2331, + "step": 247700 + }, + { + "epoch": 3.92, + "learning_rate": 1.0778727445394112e-06, + "loss": 1.2281, + "step": 247800 + }, + { + "epoch": 3.92, + "learning_rate": 1.076289965178854e-06, + "loss": 1.2252, + "step": 247900 + }, + { + "epoch": 3.93, + "learning_rate": 1.074707185818297e-06, + "loss": 1.254, + "step": 248000 + }, + { + "epoch": 3.93, + "learning_rate": 1.07312440645774e-06, + "loss": 1.2318, + "step": 248100 + }, + { + "epoch": 3.93, + "learning_rate": 1.0715416270971828e-06, + "loss": 1.2201, + "step": 248200 + }, + { + "epoch": 3.93, + "learning_rate": 1.0699588477366256e-06, + "loss": 1.2346, + "step": 248300 + }, + { + "epoch": 3.93, + "learning_rate": 1.0683760683760685e-06, + "loss": 1.2332, + "step": 248400 + }, + { + "epoch": 3.93, + "learning_rate": 1.0667932890155113e-06, + "loss": 1.2522, + "step": 248500 + }, + { + "epoch": 3.93, + "learning_rate": 1.0652105096549541e-06, + "loss": 1.2432, + "step": 248600 + }, + { + "epoch": 3.94, + "learning_rate": 1.063627730294397e-06, + "loss": 1.243, + "step": 248700 + }, + { + "epoch": 3.94, + "learning_rate": 1.0620449509338398e-06, + "loss": 1.2388, + "step": 248800 + }, + { + "epoch": 3.94, + "learning_rate": 1.0604621715732829e-06, + "loss": 1.2575, + "step": 248900 + }, + { + "epoch": 3.94, + "learning_rate": 1.0588793922127257e-06, + "loss": 1.2382, + "step": 249000 + }, + { + "epoch": 3.94, + "learning_rate": 1.0572966128521685e-06, + "loss": 1.2576, + "step": 249100 + }, + { + "epoch": 3.94, + "learning_rate": 1.0557138334916114e-06, + "loss": 1.2447, + "step": 249200 + }, + { + "epoch": 3.95, + "learning_rate": 1.0541310541310542e-06, + "loss": 1.2306, + "step": 249300 + }, + { + "epoch": 3.95, + "learning_rate": 1.052548274770497e-06, + "loss": 1.2472, + "step": 249400 + }, + { + "epoch": 3.95, + "learning_rate": 1.0509654954099399e-06, + "loss": 1.264, + "step": 249500 + }, + { + "epoch": 3.95, + "learning_rate": 1.0493827160493827e-06, + "loss": 1.2432, + "step": 249600 + }, + { + "epoch": 3.95, + "learning_rate": 1.0477999366888258e-06, + "loss": 1.2438, + "step": 249700 + }, + { + "epoch": 3.95, + "learning_rate": 1.0462171573282686e-06, + "loss": 1.2565, + "step": 249800 + }, + { + "epoch": 3.96, + "learning_rate": 1.0446343779677114e-06, + "loss": 1.2437, + "step": 249900 + }, + { + "epoch": 3.96, + "learning_rate": 1.0430515986071543e-06, + "loss": 1.2349, + "step": 250000 + }, + { + "epoch": 3.96, + "learning_rate": 1.041468819246597e-06, + "loss": 1.2452, + "step": 250100 + }, + { + "epoch": 3.96, + "learning_rate": 1.03988603988604e-06, + "loss": 1.2405, + "step": 250200 + }, + { + "epoch": 3.96, + "learning_rate": 1.0383032605254828e-06, + "loss": 1.2347, + "step": 250300 + }, + { + "epoch": 3.96, + "learning_rate": 1.0367204811649256e-06, + "loss": 1.2407, + "step": 250400 + }, + { + "epoch": 3.96, + "learning_rate": 1.0351377018043687e-06, + "loss": 1.2514, + "step": 250500 + }, + { + "epoch": 3.97, + "learning_rate": 1.0335549224438115e-06, + "loss": 1.2345, + "step": 250600 + }, + { + "epoch": 3.97, + "learning_rate": 1.0319721430832543e-06, + "loss": 1.2235, + "step": 250700 + }, + { + "epoch": 3.97, + "learning_rate": 1.0303893637226972e-06, + "loss": 1.211, + "step": 250800 + }, + { + "epoch": 3.97, + "learning_rate": 1.02880658436214e-06, + "loss": 1.271, + "step": 250900 + }, + { + "epoch": 3.97, + "learning_rate": 1.0272238050015828e-06, + "loss": 1.2656, + "step": 251000 + }, + { + "epoch": 3.97, + "learning_rate": 1.0256410256410257e-06, + "loss": 1.2491, + "step": 251100 + }, + { + "epoch": 3.98, + "learning_rate": 1.0240582462804687e-06, + "loss": 1.2323, + "step": 251200 + }, + { + "epoch": 3.98, + "learning_rate": 1.0224754669199116e-06, + "loss": 1.2322, + "step": 251300 + }, + { + "epoch": 3.98, + "learning_rate": 1.0208926875593544e-06, + "loss": 1.2369, + "step": 251400 + }, + { + "epoch": 3.98, + "learning_rate": 1.0193099081987972e-06, + "loss": 1.2164, + "step": 251500 + }, + { + "epoch": 3.98, + "learning_rate": 1.01772712883824e-06, + "loss": 1.2385, + "step": 251600 + }, + { + "epoch": 3.98, + "learning_rate": 1.016144349477683e-06, + "loss": 1.2391, + "step": 251700 + }, + { + "epoch": 3.99, + "learning_rate": 1.0145615701171257e-06, + "loss": 1.2339, + "step": 251800 + }, + { + "epoch": 3.99, + "learning_rate": 1.0129787907565686e-06, + "loss": 1.2648, + "step": 251900 + }, + { + "epoch": 3.99, + "learning_rate": 1.0113960113960116e-06, + "loss": 1.2392, + "step": 252000 + }, + { + "epoch": 3.99, + "learning_rate": 1.0098132320354545e-06, + "loss": 1.2161, + "step": 252100 + }, + { + "epoch": 3.99, + "learning_rate": 1.0082304526748973e-06, + "loss": 1.2221, + "step": 252200 + }, + { + "epoch": 3.99, + "learning_rate": 1.00664767331434e-06, + "loss": 1.2626, + "step": 252300 + }, + { + "epoch": 3.99, + "learning_rate": 1.005064893953783e-06, + "loss": 1.2323, + "step": 252400 + }, + { + "epoch": 4.0, + "learning_rate": 1.0034821145932258e-06, + "loss": 1.2225, + "step": 252500 + }, + { + "epoch": 4.0, + "learning_rate": 1.0018993352326686e-06, + "loss": 1.2237, + "step": 252600 + }, + { + "epoch": 4.0, + "learning_rate": 1.0003165558721115e-06, + "loss": 1.2393, + "step": 252700 + }, + { + "epoch": 4.0, + "learning_rate": 9.987337765115545e-07, + "loss": 1.2331, + "step": 252800 + }, + { + "epoch": 4.0, + "learning_rate": 9.971509971509974e-07, + "loss": 1.2363, + "step": 252900 + }, + { + "epoch": 4.0, + "learning_rate": 9.9556821779044e-07, + "loss": 1.2385, + "step": 253000 + }, + { + "epoch": 4.01, + "learning_rate": 9.939854384298828e-07, + "loss": 1.2217, + "step": 253100 + }, + { + "epoch": 4.01, + "learning_rate": 9.924026590693259e-07, + "loss": 1.2262, + "step": 253200 + }, + { + "epoch": 4.01, + "learning_rate": 9.908198797087687e-07, + "loss": 1.2354, + "step": 253300 + }, + { + "epoch": 4.01, + "learning_rate": 9.892371003482115e-07, + "loss": 1.2145, + "step": 253400 + }, + { + "epoch": 4.01, + "learning_rate": 9.876543209876544e-07, + "loss": 1.2181, + "step": 253500 + }, + { + "epoch": 4.01, + "learning_rate": 9.860715416270972e-07, + "loss": 1.226, + "step": 253600 + }, + { + "epoch": 4.02, + "learning_rate": 9.8448876226654e-07, + "loss": 1.2222, + "step": 253700 + }, + { + "epoch": 4.02, + "learning_rate": 9.829059829059829e-07, + "loss": 1.2253, + "step": 253800 + }, + { + "epoch": 4.02, + "learning_rate": 9.813232035454257e-07, + "loss": 1.2306, + "step": 253900 + }, + { + "epoch": 4.02, + "learning_rate": 9.797404241848688e-07, + "loss": 1.2386, + "step": 254000 + }, + { + "epoch": 4.02, + "learning_rate": 9.781576448243116e-07, + "loss": 1.2346, + "step": 254100 + }, + { + "epoch": 4.02, + "learning_rate": 9.765748654637544e-07, + "loss": 1.2149, + "step": 254200 + }, + { + "epoch": 4.03, + "learning_rate": 9.749920861031973e-07, + "loss": 1.2564, + "step": 254300 + }, + { + "epoch": 4.03, + "learning_rate": 9.734093067426401e-07, + "loss": 1.2195, + "step": 254400 + }, + { + "epoch": 4.03, + "learning_rate": 9.71826527382083e-07, + "loss": 1.2174, + "step": 254500 + }, + { + "epoch": 4.03, + "learning_rate": 9.702437480215258e-07, + "loss": 1.2197, + "step": 254600 + }, + { + "epoch": 4.03, + "learning_rate": 9.686609686609686e-07, + "loss": 1.2259, + "step": 254700 + }, + { + "epoch": 4.03, + "learning_rate": 9.670781893004117e-07, + "loss": 1.2089, + "step": 254800 + }, + { + "epoch": 4.03, + "learning_rate": 9.654954099398545e-07, + "loss": 1.2201, + "step": 254900 + }, + { + "epoch": 4.04, + "learning_rate": 9.639126305792973e-07, + "loss": 1.2364, + "step": 255000 + }, + { + "epoch": 4.04, + "learning_rate": 9.623298512187402e-07, + "loss": 1.2486, + "step": 255100 + }, + { + "epoch": 4.04, + "learning_rate": 9.60747071858183e-07, + "loss": 1.2484, + "step": 255200 + }, + { + "epoch": 4.04, + "learning_rate": 9.591642924976258e-07, + "loss": 1.2237, + "step": 255300 + }, + { + "epoch": 4.04, + "learning_rate": 9.575815131370687e-07, + "loss": 1.197, + "step": 255400 + }, + { + "epoch": 4.04, + "learning_rate": 9.559987337765117e-07, + "loss": 1.2274, + "step": 255500 + }, + { + "epoch": 4.05, + "learning_rate": 9.544159544159546e-07, + "loss": 1.2307, + "step": 255600 + }, + { + "epoch": 4.05, + "learning_rate": 9.528331750553974e-07, + "loss": 1.2244, + "step": 255700 + }, + { + "epoch": 4.05, + "learning_rate": 9.512503956948401e-07, + "loss": 1.2181, + "step": 255800 + }, + { + "epoch": 4.05, + "learning_rate": 9.496676163342832e-07, + "loss": 1.206, + "step": 255900 + }, + { + "epoch": 4.05, + "learning_rate": 9.48084836973726e-07, + "loss": 1.2437, + "step": 256000 + }, + { + "epoch": 4.05, + "learning_rate": 9.465020576131687e-07, + "loss": 1.2216, + "step": 256100 + }, + { + "epoch": 4.06, + "learning_rate": 9.449192782526116e-07, + "loss": 1.2245, + "step": 256200 + }, + { + "epoch": 4.06, + "learning_rate": 9.433364988920546e-07, + "loss": 1.223, + "step": 256300 + }, + { + "epoch": 4.06, + "learning_rate": 9.417537195314974e-07, + "loss": 1.2319, + "step": 256400 + }, + { + "epoch": 4.06, + "learning_rate": 9.401709401709402e-07, + "loss": 1.2453, + "step": 256500 + }, + { + "epoch": 4.06, + "learning_rate": 9.38588160810383e-07, + "loss": 1.234, + "step": 256600 + }, + { + "epoch": 4.06, + "learning_rate": 9.370053814498261e-07, + "loss": 1.2314, + "step": 256700 + }, + { + "epoch": 4.06, + "learning_rate": 9.354226020892688e-07, + "loss": 1.2013, + "step": 256800 + }, + { + "epoch": 4.07, + "learning_rate": 9.338398227287116e-07, + "loss": 1.2229, + "step": 256900 + }, + { + "epoch": 4.07, + "learning_rate": 9.322570433681545e-07, + "loss": 1.2013, + "step": 257000 + }, + { + "epoch": 4.07, + "learning_rate": 9.306742640075974e-07, + "loss": 1.2169, + "step": 257100 + }, + { + "epoch": 4.07, + "learning_rate": 9.290914846470403e-07, + "loss": 1.2046, + "step": 257200 + }, + { + "epoch": 4.07, + "learning_rate": 9.275087052864831e-07, + "loss": 1.2601, + "step": 257300 + }, + { + "epoch": 4.07, + "learning_rate": 9.259259259259259e-07, + "loss": 1.2527, + "step": 257400 + }, + { + "epoch": 4.08, + "learning_rate": 9.243431465653689e-07, + "loss": 1.2336, + "step": 257500 + }, + { + "epoch": 4.08, + "learning_rate": 9.227603672048117e-07, + "loss": 1.2232, + "step": 257600 + }, + { + "epoch": 4.08, + "learning_rate": 9.211775878442545e-07, + "loss": 1.2148, + "step": 257700 + }, + { + "epoch": 4.08, + "learning_rate": 9.195948084836974e-07, + "loss": 1.217, + "step": 257800 + }, + { + "epoch": 4.08, + "learning_rate": 9.180120291231403e-07, + "loss": 1.2315, + "step": 257900 + }, + { + "epoch": 4.08, + "learning_rate": 9.164292497625832e-07, + "loss": 1.2368, + "step": 258000 + }, + { + "epoch": 4.09, + "learning_rate": 9.14846470402026e-07, + "loss": 1.2126, + "step": 258100 + }, + { + "epoch": 4.09, + "learning_rate": 9.132636910414688e-07, + "loss": 1.2545, + "step": 258200 + }, + { + "epoch": 4.09, + "learning_rate": 9.116809116809118e-07, + "loss": 1.2237, + "step": 258300 + }, + { + "epoch": 4.09, + "learning_rate": 9.100981323203546e-07, + "loss": 1.2131, + "step": 258400 + }, + { + "epoch": 4.09, + "learning_rate": 9.085153529597975e-07, + "loss": 1.2287, + "step": 258500 + }, + { + "epoch": 4.09, + "learning_rate": 9.069325735992403e-07, + "loss": 1.2276, + "step": 258600 + }, + { + "epoch": 4.09, + "learning_rate": 9.053497942386832e-07, + "loss": 1.1935, + "step": 258700 + }, + { + "epoch": 4.1, + "learning_rate": 9.037670148781261e-07, + "loss": 1.205, + "step": 258800 + }, + { + "epoch": 4.1, + "learning_rate": 9.021842355175689e-07, + "loss": 1.2195, + "step": 258900 + }, + { + "epoch": 4.1, + "learning_rate": 9.006014561570117e-07, + "loss": 1.2067, + "step": 259000 + }, + { + "epoch": 4.1, + "learning_rate": 8.990186767964547e-07, + "loss": 1.2016, + "step": 259100 + }, + { + "epoch": 4.1, + "learning_rate": 8.974358974358975e-07, + "loss": 1.2289, + "step": 259200 + }, + { + "epoch": 4.1, + "learning_rate": 8.958531180753404e-07, + "loss": 1.2059, + "step": 259300 + }, + { + "epoch": 4.11, + "learning_rate": 8.942703387147832e-07, + "loss": 1.2441, + "step": 259400 + }, + { + "epoch": 4.11, + "learning_rate": 8.926875593542261e-07, + "loss": 1.2385, + "step": 259500 + }, + { + "epoch": 4.11, + "learning_rate": 8.91104779993669e-07, + "loss": 1.2383, + "step": 259600 + }, + { + "epoch": 4.11, + "learning_rate": 8.895220006331118e-07, + "loss": 1.2277, + "step": 259700 + }, + { + "epoch": 4.11, + "learning_rate": 8.879392212725547e-07, + "loss": 1.2149, + "step": 259800 + }, + { + "epoch": 4.11, + "learning_rate": 8.863564419119976e-07, + "loss": 1.2047, + "step": 259900 + }, + { + "epoch": 4.12, + "learning_rate": 8.847736625514404e-07, + "loss": 1.2147, + "step": 260000 + }, + { + "epoch": 4.12, + "learning_rate": 8.831908831908833e-07, + "loss": 1.2101, + "step": 260100 + }, + { + "epoch": 4.12, + "learning_rate": 8.816081038303262e-07, + "loss": 1.2134, + "step": 260200 + }, + { + "epoch": 4.12, + "learning_rate": 8.80025324469769e-07, + "loss": 1.2361, + "step": 260300 + }, + { + "epoch": 4.12, + "learning_rate": 8.784425451092119e-07, + "loss": 1.2083, + "step": 260400 + }, + { + "epoch": 4.12, + "learning_rate": 8.768597657486547e-07, + "loss": 1.2284, + "step": 260500 + }, + { + "epoch": 4.12, + "learning_rate": 8.752769863880976e-07, + "loss": 1.2245, + "step": 260600 + }, + { + "epoch": 4.13, + "learning_rate": 8.736942070275405e-07, + "loss": 1.2216, + "step": 260700 + }, + { + "epoch": 4.13, + "learning_rate": 8.721114276669833e-07, + "loss": 1.2279, + "step": 260800 + }, + { + "epoch": 4.13, + "learning_rate": 8.705286483064262e-07, + "loss": 1.2534, + "step": 260900 + }, + { + "epoch": 4.13, + "learning_rate": 8.689458689458691e-07, + "loss": 1.2281, + "step": 261000 + }, + { + "epoch": 4.13, + "learning_rate": 8.673630895853119e-07, + "loss": 1.2376, + "step": 261100 + }, + { + "epoch": 4.13, + "learning_rate": 8.657803102247548e-07, + "loss": 1.2057, + "step": 261200 + }, + { + "epoch": 4.14, + "learning_rate": 8.641975308641976e-07, + "loss": 1.2238, + "step": 261300 + }, + { + "epoch": 4.14, + "learning_rate": 8.626147515036405e-07, + "loss": 1.2365, + "step": 261400 + }, + { + "epoch": 4.14, + "learning_rate": 8.610319721430834e-07, + "loss": 1.2265, + "step": 261500 + }, + { + "epoch": 4.14, + "learning_rate": 8.594491927825262e-07, + "loss": 1.2328, + "step": 261600 + }, + { + "epoch": 4.14, + "learning_rate": 8.57866413421969e-07, + "loss": 1.2375, + "step": 261700 + }, + { + "epoch": 4.14, + "learning_rate": 8.56283634061412e-07, + "loss": 1.2156, + "step": 261800 + }, + { + "epoch": 4.15, + "learning_rate": 8.547008547008548e-07, + "loss": 1.2192, + "step": 261900 + }, + { + "epoch": 4.15, + "learning_rate": 8.531180753402976e-07, + "loss": 1.2027, + "step": 262000 + }, + { + "epoch": 4.15, + "learning_rate": 8.515352959797404e-07, + "loss": 1.21, + "step": 262100 + }, + { + "epoch": 4.15, + "learning_rate": 8.499525166191834e-07, + "loss": 1.2121, + "step": 262200 + }, + { + "epoch": 4.15, + "learning_rate": 8.483697372586263e-07, + "loss": 1.2086, + "step": 262300 + }, + { + "epoch": 4.15, + "learning_rate": 8.46786957898069e-07, + "loss": 1.2432, + "step": 262400 + }, + { + "epoch": 4.15, + "learning_rate": 8.452041785375118e-07, + "loss": 1.2317, + "step": 262500 + }, + { + "epoch": 4.16, + "learning_rate": 8.436213991769549e-07, + "loss": 1.2257, + "step": 262600 + }, + { + "epoch": 4.16, + "learning_rate": 8.420386198163976e-07, + "loss": 1.2124, + "step": 262700 + }, + { + "epoch": 4.16, + "learning_rate": 8.404558404558405e-07, + "loss": 1.2134, + "step": 262800 + }, + { + "epoch": 4.16, + "learning_rate": 8.388730610952833e-07, + "loss": 1.1924, + "step": 262900 + }, + { + "epoch": 4.16, + "learning_rate": 8.372902817347262e-07, + "loss": 1.2177, + "step": 263000 + }, + { + "epoch": 4.16, + "learning_rate": 8.357075023741691e-07, + "loss": 1.2286, + "step": 263100 + }, + { + "epoch": 4.17, + "learning_rate": 8.341247230136119e-07, + "loss": 1.2167, + "step": 263200 + }, + { + "epoch": 4.17, + "learning_rate": 8.325419436530547e-07, + "loss": 1.2271, + "step": 263300 + }, + { + "epoch": 4.17, + "learning_rate": 8.309591642924977e-07, + "loss": 1.2244, + "step": 263400 + }, + { + "epoch": 4.17, + "learning_rate": 8.293763849319405e-07, + "loss": 1.2319, + "step": 263500 + }, + { + "epoch": 4.17, + "learning_rate": 8.277936055713834e-07, + "loss": 1.2276, + "step": 263600 + }, + { + "epoch": 4.17, + "learning_rate": 8.262108262108262e-07, + "loss": 1.2367, + "step": 263700 + }, + { + "epoch": 4.18, + "learning_rate": 8.246280468502691e-07, + "loss": 1.2384, + "step": 263800 + }, + { + "epoch": 4.18, + "learning_rate": 8.23045267489712e-07, + "loss": 1.2504, + "step": 263900 + }, + { + "epoch": 4.18, + "learning_rate": 8.214624881291548e-07, + "loss": 1.2502, + "step": 264000 + }, + { + "epoch": 4.18, + "learning_rate": 8.198797087685978e-07, + "loss": 1.24, + "step": 264100 + }, + { + "epoch": 4.18, + "learning_rate": 8.182969294080406e-07, + "loss": 1.2296, + "step": 264200 + }, + { + "epoch": 4.18, + "learning_rate": 8.167141500474834e-07, + "loss": 1.2239, + "step": 264300 + }, + { + "epoch": 4.18, + "learning_rate": 8.151313706869263e-07, + "loss": 1.2178, + "step": 264400 + }, + { + "epoch": 4.19, + "learning_rate": 8.135485913263692e-07, + "loss": 1.2132, + "step": 264500 + }, + { + "epoch": 4.19, + "learning_rate": 8.11965811965812e-07, + "loss": 1.2107, + "step": 264600 + }, + { + "epoch": 4.19, + "learning_rate": 8.103830326052549e-07, + "loss": 1.214, + "step": 264700 + }, + { + "epoch": 4.19, + "learning_rate": 8.088002532446977e-07, + "loss": 1.2367, + "step": 264800 + }, + { + "epoch": 4.19, + "learning_rate": 8.072174738841407e-07, + "loss": 1.2442, + "step": 264900 + }, + { + "epoch": 4.19, + "learning_rate": 8.056346945235835e-07, + "loss": 1.2177, + "step": 265000 + }, + { + "epoch": 4.2, + "learning_rate": 8.040519151630263e-07, + "loss": 1.1992, + "step": 265100 + }, + { + "epoch": 4.2, + "learning_rate": 8.024691358024692e-07, + "loss": 1.2128, + "step": 265200 + }, + { + "epoch": 4.2, + "learning_rate": 8.008863564419121e-07, + "loss": 1.2227, + "step": 265300 + }, + { + "epoch": 4.2, + "learning_rate": 7.993035770813549e-07, + "loss": 1.2192, + "step": 265400 + }, + { + "epoch": 4.2, + "learning_rate": 7.977207977207978e-07, + "loss": 1.2104, + "step": 265500 + }, + { + "epoch": 4.2, + "learning_rate": 7.961380183602406e-07, + "loss": 1.2301, + "step": 265600 + }, + { + "epoch": 4.21, + "learning_rate": 7.945552389996836e-07, + "loss": 1.2364, + "step": 265700 + }, + { + "epoch": 4.21, + "learning_rate": 7.929724596391264e-07, + "loss": 1.2211, + "step": 265800 + }, + { + "epoch": 4.21, + "learning_rate": 7.913896802785692e-07, + "loss": 1.2089, + "step": 265900 + }, + { + "epoch": 4.21, + "learning_rate": 7.898069009180121e-07, + "loss": 1.217, + "step": 266000 + }, + { + "epoch": 4.21, + "learning_rate": 7.88224121557455e-07, + "loss": 1.2247, + "step": 266100 + }, + { + "epoch": 4.21, + "learning_rate": 7.866413421968978e-07, + "loss": 1.209, + "step": 266200 + }, + { + "epoch": 4.21, + "learning_rate": 7.850585628363407e-07, + "loss": 1.2159, + "step": 266300 + }, + { + "epoch": 4.22, + "learning_rate": 7.834757834757835e-07, + "loss": 1.2051, + "step": 266400 + }, + { + "epoch": 4.22, + "learning_rate": 7.818930041152265e-07, + "loss": 1.2303, + "step": 266500 + }, + { + "epoch": 4.22, + "learning_rate": 7.803102247546693e-07, + "loss": 1.2138, + "step": 266600 + }, + { + "epoch": 4.22, + "learning_rate": 7.787274453941121e-07, + "loss": 1.2188, + "step": 266700 + }, + { + "epoch": 4.22, + "learning_rate": 7.77144666033555e-07, + "loss": 1.2283, + "step": 266800 + }, + { + "epoch": 4.22, + "learning_rate": 7.755618866729979e-07, + "loss": 1.1906, + "step": 266900 + }, + { + "epoch": 4.23, + "learning_rate": 7.739791073124407e-07, + "loss": 1.2488, + "step": 267000 + }, + { + "epoch": 4.23, + "learning_rate": 7.723963279518836e-07, + "loss": 1.2176, + "step": 267100 + }, + { + "epoch": 4.23, + "learning_rate": 7.708135485913264e-07, + "loss": 1.215, + "step": 267200 + }, + { + "epoch": 4.23, + "learning_rate": 7.692307692307694e-07, + "loss": 1.2364, + "step": 267300 + }, + { + "epoch": 4.23, + "learning_rate": 7.676479898702122e-07, + "loss": 1.2603, + "step": 267400 + }, + { + "epoch": 4.23, + "learning_rate": 7.66065210509655e-07, + "loss": 1.2257, + "step": 267500 + }, + { + "epoch": 4.24, + "learning_rate": 7.644824311490979e-07, + "loss": 1.2262, + "step": 267600 + }, + { + "epoch": 4.24, + "learning_rate": 7.628996517885408e-07, + "loss": 1.2265, + "step": 267700 + }, + { + "epoch": 4.24, + "learning_rate": 7.613168724279836e-07, + "loss": 1.2112, + "step": 267800 + }, + { + "epoch": 4.24, + "learning_rate": 7.597340930674265e-07, + "loss": 1.2195, + "step": 267900 + }, + { + "epoch": 4.24, + "learning_rate": 7.581513137068692e-07, + "loss": 1.1993, + "step": 268000 + }, + { + "epoch": 4.24, + "learning_rate": 7.565685343463123e-07, + "loss": 1.2303, + "step": 268100 + }, + { + "epoch": 4.25, + "learning_rate": 7.549857549857551e-07, + "loss": 1.2501, + "step": 268200 + }, + { + "epoch": 4.25, + "learning_rate": 7.534029756251978e-07, + "loss": 1.2242, + "step": 268300 + }, + { + "epoch": 4.25, + "learning_rate": 7.518201962646409e-07, + "loss": 1.2056, + "step": 268400 + }, + { + "epoch": 4.25, + "learning_rate": 7.502374169040837e-07, + "loss": 1.2103, + "step": 268500 + }, + { + "epoch": 4.25, + "learning_rate": 7.486546375435264e-07, + "loss": 1.2102, + "step": 268600 + }, + { + "epoch": 4.25, + "learning_rate": 7.470718581829693e-07, + "loss": 1.2354, + "step": 268700 + }, + { + "epoch": 4.25, + "learning_rate": 7.454890788224123e-07, + "loss": 1.2311, + "step": 268800 + }, + { + "epoch": 4.26, + "learning_rate": 7.439062994618552e-07, + "loss": 1.2315, + "step": 268900 + }, + { + "epoch": 4.26, + "learning_rate": 7.423235201012979e-07, + "loss": 1.2165, + "step": 269000 + }, + { + "epoch": 4.26, + "learning_rate": 7.407407407407407e-07, + "loss": 1.23, + "step": 269100 + }, + { + "epoch": 4.26, + "learning_rate": 7.391579613801838e-07, + "loss": 1.2151, + "step": 269200 + }, + { + "epoch": 4.26, + "learning_rate": 7.375751820196265e-07, + "loss": 1.2227, + "step": 269300 + }, + { + "epoch": 4.26, + "learning_rate": 7.359924026590693e-07, + "loss": 1.1945, + "step": 269400 + }, + { + "epoch": 4.27, + "learning_rate": 7.344096232985122e-07, + "loss": 1.2185, + "step": 269500 + }, + { + "epoch": 4.27, + "learning_rate": 7.328268439379551e-07, + "loss": 1.2332, + "step": 269600 + }, + { + "epoch": 4.27, + "learning_rate": 7.31244064577398e-07, + "loss": 1.2355, + "step": 269700 + }, + { + "epoch": 4.27, + "learning_rate": 7.296612852168408e-07, + "loss": 1.222, + "step": 269800 + }, + { + "epoch": 4.27, + "learning_rate": 7.280785058562836e-07, + "loss": 1.2238, + "step": 269900 + }, + { + "epoch": 4.27, + "learning_rate": 7.264957264957266e-07, + "loss": 1.2278, + "step": 270000 + }, + { + "epoch": 4.28, + "learning_rate": 7.249129471351694e-07, + "loss": 1.223, + "step": 270100 + }, + { + "epoch": 4.28, + "learning_rate": 7.233301677746122e-07, + "loss": 1.236, + "step": 270200 + }, + { + "epoch": 4.28, + "learning_rate": 7.217473884140551e-07, + "loss": 1.237, + "step": 270300 + }, + { + "epoch": 4.28, + "learning_rate": 7.20164609053498e-07, + "loss": 1.21, + "step": 270400 + }, + { + "epoch": 4.28, + "learning_rate": 7.185818296929409e-07, + "loss": 1.2176, + "step": 270500 + }, + { + "epoch": 4.28, + "learning_rate": 7.169990503323837e-07, + "loss": 1.2053, + "step": 270600 + }, + { + "epoch": 4.28, + "learning_rate": 7.154162709718265e-07, + "loss": 1.2249, + "step": 270700 + }, + { + "epoch": 4.29, + "learning_rate": 7.138334916112695e-07, + "loss": 1.2079, + "step": 270800 + }, + { + "epoch": 4.29, + "learning_rate": 7.122507122507123e-07, + "loss": 1.2091, + "step": 270900 + }, + { + "epoch": 4.29, + "learning_rate": 7.106679328901551e-07, + "loss": 1.2176, + "step": 271000 + }, + { + "epoch": 4.29, + "learning_rate": 7.09085153529598e-07, + "loss": 1.2224, + "step": 271100 + }, + { + "epoch": 4.29, + "learning_rate": 7.075023741690409e-07, + "loss": 1.1919, + "step": 271200 + }, + { + "epoch": 4.29, + "learning_rate": 7.059195948084838e-07, + "loss": 1.2166, + "step": 271300 + }, + { + "epoch": 4.3, + "learning_rate": 7.043368154479266e-07, + "loss": 1.2271, + "step": 271400 + }, + { + "epoch": 4.3, + "learning_rate": 7.027540360873694e-07, + "loss": 1.2394, + "step": 271500 + }, + { + "epoch": 4.3, + "learning_rate": 7.011712567268124e-07, + "loss": 1.2107, + "step": 271600 + }, + { + "epoch": 4.3, + "learning_rate": 6.995884773662552e-07, + "loss": 1.2444, + "step": 271700 + }, + { + "epoch": 4.3, + "learning_rate": 6.98005698005698e-07, + "loss": 1.2411, + "step": 271800 + }, + { + "epoch": 4.3, + "learning_rate": 6.964229186451409e-07, + "loss": 1.2274, + "step": 271900 + }, + { + "epoch": 4.31, + "learning_rate": 6.948401392845838e-07, + "loss": 1.199, + "step": 272000 + }, + { + "epoch": 4.31, + "learning_rate": 6.932573599240267e-07, + "loss": 1.231, + "step": 272100 + }, + { + "epoch": 4.31, + "learning_rate": 6.916745805634695e-07, + "loss": 1.2462, + "step": 272200 + }, + { + "epoch": 4.31, + "learning_rate": 6.900918012029123e-07, + "loss": 1.235, + "step": 272300 + }, + { + "epoch": 4.31, + "learning_rate": 6.885090218423553e-07, + "loss": 1.2136, + "step": 272400 + }, + { + "epoch": 4.31, + "learning_rate": 6.869262424817981e-07, + "loss": 1.2406, + "step": 272500 + }, + { + "epoch": 4.31, + "learning_rate": 6.853434631212409e-07, + "loss": 1.2045, + "step": 272600 + }, + { + "epoch": 4.32, + "learning_rate": 6.837606837606839e-07, + "loss": 1.2363, + "step": 272700 + }, + { + "epoch": 4.32, + "learning_rate": 6.821779044001267e-07, + "loss": 1.2156, + "step": 272800 + }, + { + "epoch": 4.32, + "learning_rate": 6.805951250395696e-07, + "loss": 1.2183, + "step": 272900 + }, + { + "epoch": 4.32, + "learning_rate": 6.790123456790124e-07, + "loss": 1.2254, + "step": 273000 + }, + { + "epoch": 4.32, + "learning_rate": 6.774295663184553e-07, + "loss": 1.2367, + "step": 273100 + }, + { + "epoch": 4.32, + "learning_rate": 6.758467869578982e-07, + "loss": 1.2146, + "step": 273200 + }, + { + "epoch": 4.33, + "learning_rate": 6.74264007597341e-07, + "loss": 1.2576, + "step": 273300 + }, + { + "epoch": 4.33, + "learning_rate": 6.726812282367838e-07, + "loss": 1.2112, + "step": 273400 + }, + { + "epoch": 4.33, + "learning_rate": 6.710984488762268e-07, + "loss": 1.2366, + "step": 273500 + }, + { + "epoch": 4.33, + "learning_rate": 6.695156695156696e-07, + "loss": 1.2143, + "step": 273600 + }, + { + "epoch": 4.33, + "learning_rate": 6.679328901551125e-07, + "loss": 1.2253, + "step": 273700 + }, + { + "epoch": 4.33, + "learning_rate": 6.663501107945553e-07, + "loss": 1.2102, + "step": 273800 + }, + { + "epoch": 4.34, + "learning_rate": 6.647673314339982e-07, + "loss": 1.2178, + "step": 273900 + }, + { + "epoch": 4.34, + "learning_rate": 6.631845520734411e-07, + "loss": 1.2304, + "step": 274000 + }, + { + "epoch": 4.34, + "learning_rate": 6.616017727128839e-07, + "loss": 1.2131, + "step": 274100 + }, + { + "epoch": 4.34, + "learning_rate": 6.600189933523266e-07, + "loss": 1.2112, + "step": 274200 + }, + { + "epoch": 4.34, + "learning_rate": 6.584362139917697e-07, + "loss": 1.234, + "step": 274300 + }, + { + "epoch": 4.34, + "learning_rate": 6.568534346312125e-07, + "loss": 1.2111, + "step": 274400 + }, + { + "epoch": 4.34, + "learning_rate": 6.552706552706554e-07, + "loss": 1.2304, + "step": 274500 + }, + { + "epoch": 4.35, + "learning_rate": 6.536878759100981e-07, + "loss": 1.2073, + "step": 274600 + }, + { + "epoch": 4.35, + "learning_rate": 6.521050965495411e-07, + "loss": 1.2147, + "step": 274700 + }, + { + "epoch": 4.35, + "learning_rate": 6.50522317188984e-07, + "loss": 1.2278, + "step": 274800 + }, + { + "epoch": 4.35, + "learning_rate": 6.489395378284267e-07, + "loss": 1.255, + "step": 274900 + }, + { + "epoch": 4.35, + "learning_rate": 6.473567584678695e-07, + "loss": 1.2166, + "step": 275000 + }, + { + "epoch": 4.35, + "learning_rate": 6.457739791073126e-07, + "loss": 1.2096, + "step": 275100 + }, + { + "epoch": 4.36, + "learning_rate": 6.441911997467553e-07, + "loss": 1.2371, + "step": 275200 + }, + { + "epoch": 4.36, + "learning_rate": 6.426084203861982e-07, + "loss": 1.2566, + "step": 275300 + }, + { + "epoch": 4.36, + "learning_rate": 6.41025641025641e-07, + "loss": 1.2389, + "step": 275400 + }, + { + "epoch": 4.36, + "learning_rate": 6.394428616650839e-07, + "loss": 1.2247, + "step": 275500 + }, + { + "epoch": 4.36, + "learning_rate": 6.378600823045268e-07, + "loss": 1.2499, + "step": 275600 + }, + { + "epoch": 4.36, + "learning_rate": 6.362773029439696e-07, + "loss": 1.2511, + "step": 275700 + }, + { + "epoch": 4.37, + "learning_rate": 6.346945235834124e-07, + "loss": 1.2391, + "step": 275800 + }, + { + "epoch": 4.37, + "learning_rate": 6.331117442228554e-07, + "loss": 1.203, + "step": 275900 + }, + { + "epoch": 4.37, + "learning_rate": 6.315289648622982e-07, + "loss": 1.2202, + "step": 276000 + }, + { + "epoch": 4.37, + "learning_rate": 6.299461855017411e-07, + "loss": 1.2067, + "step": 276100 + }, + { + "epoch": 4.37, + "learning_rate": 6.283634061411839e-07, + "loss": 1.2445, + "step": 276200 + }, + { + "epoch": 4.37, + "learning_rate": 6.267806267806268e-07, + "loss": 1.2124, + "step": 276300 + }, + { + "epoch": 4.37, + "learning_rate": 6.251978474200697e-07, + "loss": 1.2279, + "step": 276400 + }, + { + "epoch": 4.38, + "learning_rate": 6.236150680595125e-07, + "loss": 1.2147, + "step": 276500 + }, + { + "epoch": 4.38, + "learning_rate": 6.220322886989554e-07, + "loss": 1.2426, + "step": 276600 + }, + { + "epoch": 4.38, + "learning_rate": 6.204495093383983e-07, + "loss": 1.2176, + "step": 276700 + }, + { + "epoch": 4.38, + "learning_rate": 6.188667299778411e-07, + "loss": 1.2209, + "step": 276800 + }, + { + "epoch": 4.38, + "learning_rate": 6.17283950617284e-07, + "loss": 1.2265, + "step": 276900 + }, + { + "epoch": 4.38, + "learning_rate": 6.157011712567269e-07, + "loss": 1.2294, + "step": 277000 + }, + { + "epoch": 4.39, + "learning_rate": 6.141183918961697e-07, + "loss": 1.2359, + "step": 277100 + }, + { + "epoch": 4.39, + "learning_rate": 6.125356125356126e-07, + "loss": 1.2245, + "step": 277200 + }, + { + "epoch": 4.39, + "learning_rate": 6.109528331750555e-07, + "loss": 1.2216, + "step": 277300 + }, + { + "epoch": 4.39, + "learning_rate": 6.093700538144983e-07, + "loss": 1.2325, + "step": 277400 + }, + { + "epoch": 4.39, + "learning_rate": 6.077872744539412e-07, + "loss": 1.2228, + "step": 277500 + }, + { + "epoch": 4.39, + "learning_rate": 6.06204495093384e-07, + "loss": 1.2395, + "step": 277600 + }, + { + "epoch": 4.4, + "learning_rate": 6.04621715732827e-07, + "loss": 1.2272, + "step": 277700 + }, + { + "epoch": 4.4, + "learning_rate": 6.030389363722698e-07, + "loss": 1.2402, + "step": 277800 + }, + { + "epoch": 4.4, + "learning_rate": 6.014561570117126e-07, + "loss": 1.2283, + "step": 277900 + }, + { + "epoch": 4.4, + "learning_rate": 5.998733776511555e-07, + "loss": 1.2129, + "step": 278000 + }, + { + "epoch": 4.4, + "learning_rate": 5.982905982905984e-07, + "loss": 1.2297, + "step": 278100 + }, + { + "epoch": 4.4, + "learning_rate": 5.967078189300411e-07, + "loss": 1.2234, + "step": 278200 + }, + { + "epoch": 4.4, + "learning_rate": 5.951250395694841e-07, + "loss": 1.2344, + "step": 278300 + }, + { + "epoch": 4.41, + "learning_rate": 5.935422602089269e-07, + "loss": 1.2161, + "step": 278400 + }, + { + "epoch": 4.41, + "learning_rate": 5.919594808483698e-07, + "loss": 1.2153, + "step": 278500 + }, + { + "epoch": 4.41, + "learning_rate": 5.903767014878126e-07, + "loss": 1.2233, + "step": 278600 + }, + { + "epoch": 4.41, + "learning_rate": 5.887939221272555e-07, + "loss": 1.2114, + "step": 278700 + }, + { + "epoch": 4.41, + "learning_rate": 5.872111427666984e-07, + "loss": 1.2281, + "step": 278800 + }, + { + "epoch": 4.41, + "learning_rate": 5.856283634061412e-07, + "loss": 1.241, + "step": 278900 + }, + { + "epoch": 4.42, + "learning_rate": 5.84045584045584e-07, + "loss": 1.2111, + "step": 279000 + }, + { + "epoch": 4.42, + "learning_rate": 5.82462804685027e-07, + "loss": 1.211, + "step": 279100 + }, + { + "epoch": 4.42, + "learning_rate": 5.808800253244698e-07, + "loss": 1.2186, + "step": 279200 + }, + { + "epoch": 4.42, + "learning_rate": 5.792972459639127e-07, + "loss": 1.2508, + "step": 279300 + }, + { + "epoch": 4.42, + "learning_rate": 5.777144666033555e-07, + "loss": 1.198, + "step": 279400 + }, + { + "epoch": 4.42, + "learning_rate": 5.761316872427984e-07, + "loss": 1.2177, + "step": 279500 + }, + { + "epoch": 4.43, + "learning_rate": 5.745489078822413e-07, + "loss": 1.2377, + "step": 279600 + }, + { + "epoch": 4.43, + "learning_rate": 5.729661285216841e-07, + "loss": 1.2285, + "step": 279700 + }, + { + "epoch": 4.43, + "learning_rate": 5.713833491611269e-07, + "loss": 1.2058, + "step": 279800 + }, + { + "epoch": 4.43, + "learning_rate": 5.698005698005699e-07, + "loss": 1.199, + "step": 279900 + }, + { + "epoch": 4.43, + "learning_rate": 5.682177904400127e-07, + "loss": 1.2427, + "step": 280000 + }, + { + "epoch": 4.43, + "learning_rate": 5.666350110794556e-07, + "loss": 1.2292, + "step": 280100 + }, + { + "epoch": 4.43, + "learning_rate": 5.650522317188984e-07, + "loss": 1.2054, + "step": 280200 + }, + { + "epoch": 4.44, + "learning_rate": 5.634694523583413e-07, + "loss": 1.24, + "step": 280300 + }, + { + "epoch": 4.44, + "learning_rate": 5.618866729977842e-07, + "loss": 1.2224, + "step": 280400 + }, + { + "epoch": 4.44, + "learning_rate": 5.60303893637227e-07, + "loss": 1.2031, + "step": 280500 + }, + { + "epoch": 4.44, + "learning_rate": 5.587211142766698e-07, + "loss": 1.2405, + "step": 280600 + }, + { + "epoch": 4.44, + "learning_rate": 5.571383349161128e-07, + "loss": 1.2526, + "step": 280700 + }, + { + "epoch": 4.44, + "learning_rate": 5.555555555555555e-07, + "loss": 1.2041, + "step": 280800 + }, + { + "epoch": 4.45, + "learning_rate": 5.539727761949985e-07, + "loss": 1.2327, + "step": 280900 + }, + { + "epoch": 4.45, + "learning_rate": 5.523899968344413e-07, + "loss": 1.2322, + "step": 281000 + }, + { + "epoch": 4.45, + "learning_rate": 5.508072174738842e-07, + "loss": 1.2342, + "step": 281100 + }, + { + "epoch": 4.45, + "learning_rate": 5.49224438113327e-07, + "loss": 1.2051, + "step": 281200 + }, + { + "epoch": 4.45, + "learning_rate": 5.476416587527699e-07, + "loss": 1.242, + "step": 281300 + }, + { + "epoch": 4.45, + "learning_rate": 5.460588793922129e-07, + "loss": 1.1998, + "step": 281400 + }, + { + "epoch": 4.46, + "learning_rate": 5.444761000316556e-07, + "loss": 1.2141, + "step": 281500 + }, + { + "epoch": 4.46, + "learning_rate": 5.428933206710985e-07, + "loss": 1.2291, + "step": 281600 + }, + { + "epoch": 4.46, + "learning_rate": 5.413105413105414e-07, + "loss": 1.2211, + "step": 281700 + }, + { + "epoch": 4.46, + "learning_rate": 5.397277619499842e-07, + "loss": 1.2303, + "step": 281800 + }, + { + "epoch": 4.46, + "learning_rate": 5.38144982589427e-07, + "loss": 1.2191, + "step": 281900 + }, + { + "epoch": 4.46, + "learning_rate": 5.3656220322887e-07, + "loss": 1.2361, + "step": 282000 + }, + { + "epoch": 4.47, + "learning_rate": 5.349794238683128e-07, + "loss": 1.2239, + "step": 282100 + }, + { + "epoch": 4.47, + "learning_rate": 5.333966445077556e-07, + "loss": 1.2313, + "step": 282200 + }, + { + "epoch": 4.47, + "learning_rate": 5.318138651471985e-07, + "loss": 1.1873, + "step": 282300 + }, + { + "epoch": 4.47, + "learning_rate": 5.302310857866414e-07, + "loss": 1.2486, + "step": 282400 + }, + { + "epoch": 4.47, + "learning_rate": 5.286483064260843e-07, + "loss": 1.1866, + "step": 282500 + }, + { + "epoch": 4.47, + "learning_rate": 5.270655270655271e-07, + "loss": 1.2402, + "step": 282600 + }, + { + "epoch": 4.47, + "learning_rate": 5.254827477049699e-07, + "loss": 1.2061, + "step": 282700 + }, + { + "epoch": 4.48, + "learning_rate": 5.238999683444129e-07, + "loss": 1.2431, + "step": 282800 + }, + { + "epoch": 4.48, + "learning_rate": 5.223171889838557e-07, + "loss": 1.2132, + "step": 282900 + }, + { + "epoch": 4.48, + "learning_rate": 5.207344096232985e-07, + "loss": 1.205, + "step": 283000 + }, + { + "epoch": 4.48, + "learning_rate": 5.191516302627414e-07, + "loss": 1.2221, + "step": 283100 + }, + { + "epoch": 4.48, + "learning_rate": 5.175688509021843e-07, + "loss": 1.2291, + "step": 283200 + }, + { + "epoch": 4.48, + "learning_rate": 5.159860715416272e-07, + "loss": 1.2316, + "step": 283300 + }, + { + "epoch": 4.49, + "learning_rate": 5.1440329218107e-07, + "loss": 1.2116, + "step": 283400 + }, + { + "epoch": 4.49, + "learning_rate": 5.128205128205128e-07, + "loss": 1.2375, + "step": 283500 + }, + { + "epoch": 4.49, + "learning_rate": 5.112377334599558e-07, + "loss": 1.2154, + "step": 283600 + }, + { + "epoch": 4.49, + "learning_rate": 5.096549540993986e-07, + "loss": 1.2367, + "step": 283700 + }, + { + "epoch": 4.49, + "learning_rate": 5.080721747388414e-07, + "loss": 1.2299, + "step": 283800 + }, + { + "epoch": 4.49, + "learning_rate": 5.064893953782843e-07, + "loss": 1.2425, + "step": 283900 + }, + { + "epoch": 4.5, + "learning_rate": 5.049066160177272e-07, + "loss": 1.2154, + "step": 284000 + }, + { + "epoch": 4.5, + "learning_rate": 5.0332383665717e-07, + "loss": 1.2427, + "step": 284100 + }, + { + "epoch": 4.5, + "learning_rate": 5.017410572966129e-07, + "loss": 1.2093, + "step": 284200 + }, + { + "epoch": 4.5, + "learning_rate": 5.001582779360557e-07, + "loss": 1.2424, + "step": 284300 + }, + { + "epoch": 4.5, + "learning_rate": 4.985754985754987e-07, + "loss": 1.2285, + "step": 284400 + }, + { + "epoch": 4.5, + "learning_rate": 4.969927192149414e-07, + "loss": 1.2309, + "step": 284500 + }, + { + "epoch": 4.5, + "learning_rate": 4.954099398543844e-07, + "loss": 1.2446, + "step": 284600 + }, + { + "epoch": 4.51, + "learning_rate": 4.938271604938272e-07, + "loss": 1.2018, + "step": 284700 + }, + { + "epoch": 4.51, + "learning_rate": 4.9224438113327e-07, + "loss": 1.2095, + "step": 284800 + }, + { + "epoch": 4.51, + "learning_rate": 4.906616017727129e-07, + "loss": 1.2228, + "step": 284900 + }, + { + "epoch": 4.51, + "learning_rate": 4.890788224121558e-07, + "loss": 1.2014, + "step": 285000 + }, + { + "epoch": 4.51, + "learning_rate": 4.874960430515986e-07, + "loss": 1.2248, + "step": 285100 + }, + { + "epoch": 4.51, + "learning_rate": 4.859132636910415e-07, + "loss": 1.2065, + "step": 285200 + }, + { + "epoch": 4.52, + "learning_rate": 4.843304843304843e-07, + "loss": 1.21, + "step": 285300 + }, + { + "epoch": 4.52, + "learning_rate": 4.827477049699273e-07, + "loss": 1.2023, + "step": 285400 + }, + { + "epoch": 4.52, + "learning_rate": 4.811649256093701e-07, + "loss": 1.2253, + "step": 285500 + }, + { + "epoch": 4.52, + "learning_rate": 4.795821462488129e-07, + "loss": 1.2357, + "step": 285600 + }, + { + "epoch": 4.52, + "learning_rate": 4.779993668882559e-07, + "loss": 1.1998, + "step": 285700 + }, + { + "epoch": 4.52, + "learning_rate": 4.764165875276987e-07, + "loss": 1.2349, + "step": 285800 + }, + { + "epoch": 4.53, + "learning_rate": 4.748338081671416e-07, + "loss": 1.2145, + "step": 285900 + }, + { + "epoch": 4.53, + "learning_rate": 4.732510288065844e-07, + "loss": 1.2483, + "step": 286000 + }, + { + "epoch": 4.53, + "learning_rate": 4.716682494460273e-07, + "loss": 1.2169, + "step": 286100 + }, + { + "epoch": 4.53, + "learning_rate": 4.700854700854701e-07, + "loss": 1.218, + "step": 286200 + }, + { + "epoch": 4.53, + "learning_rate": 4.6850269072491304e-07, + "loss": 1.2338, + "step": 286300 + }, + { + "epoch": 4.53, + "learning_rate": 4.669199113643558e-07, + "loss": 1.2377, + "step": 286400 + }, + { + "epoch": 4.53, + "learning_rate": 4.653371320037987e-07, + "loss": 1.1896, + "step": 286500 + }, + { + "epoch": 4.54, + "learning_rate": 4.6375435264324155e-07, + "loss": 1.2143, + "step": 286600 + }, + { + "epoch": 4.54, + "learning_rate": 4.6217157328268444e-07, + "loss": 1.2243, + "step": 286700 + }, + { + "epoch": 4.54, + "learning_rate": 4.605887939221273e-07, + "loss": 1.214, + "step": 286800 + }, + { + "epoch": 4.54, + "learning_rate": 4.5900601456157016e-07, + "loss": 1.2217, + "step": 286900 + }, + { + "epoch": 4.54, + "learning_rate": 4.57423235201013e-07, + "loss": 1.2288, + "step": 287000 + }, + { + "epoch": 4.54, + "learning_rate": 4.558404558404559e-07, + "loss": 1.2313, + "step": 287100 + }, + { + "epoch": 4.55, + "learning_rate": 4.542576764798987e-07, + "loss": 1.2183, + "step": 287200 + }, + { + "epoch": 4.55, + "learning_rate": 4.526748971193416e-07, + "loss": 1.2458, + "step": 287300 + }, + { + "epoch": 4.55, + "learning_rate": 4.5109211775878445e-07, + "loss": 1.2088, + "step": 287400 + }, + { + "epoch": 4.55, + "learning_rate": 4.4950933839822734e-07, + "loss": 1.2247, + "step": 287500 + }, + { + "epoch": 4.55, + "learning_rate": 4.479265590376702e-07, + "loss": 1.232, + "step": 287600 + }, + { + "epoch": 4.55, + "learning_rate": 4.4634377967711306e-07, + "loss": 1.2408, + "step": 287700 + }, + { + "epoch": 4.56, + "learning_rate": 4.447610003165559e-07, + "loss": 1.2542, + "step": 287800 + }, + { + "epoch": 4.56, + "learning_rate": 4.431782209559988e-07, + "loss": 1.2232, + "step": 287900 + }, + { + "epoch": 4.56, + "learning_rate": 4.415954415954416e-07, + "loss": 1.2252, + "step": 288000 + }, + { + "epoch": 4.56, + "learning_rate": 4.400126622348845e-07, + "loss": 1.2167, + "step": 288100 + }, + { + "epoch": 4.56, + "learning_rate": 4.3842988287432735e-07, + "loss": 1.2194, + "step": 288200 + }, + { + "epoch": 4.56, + "learning_rate": 4.3684710351377024e-07, + "loss": 1.2245, + "step": 288300 + }, + { + "epoch": 4.56, + "learning_rate": 4.352643241532131e-07, + "loss": 1.2244, + "step": 288400 + }, + { + "epoch": 4.57, + "learning_rate": 4.3368154479265597e-07, + "loss": 1.2042, + "step": 288500 + }, + { + "epoch": 4.57, + "learning_rate": 4.320987654320988e-07, + "loss": 1.2115, + "step": 288600 + }, + { + "epoch": 4.57, + "learning_rate": 4.305159860715417e-07, + "loss": 1.2074, + "step": 288700 + }, + { + "epoch": 4.57, + "learning_rate": 4.289332067109845e-07, + "loss": 1.233, + "step": 288800 + }, + { + "epoch": 4.57, + "learning_rate": 4.273504273504274e-07, + "loss": 1.2184, + "step": 288900 + }, + { + "epoch": 4.57, + "learning_rate": 4.257676479898702e-07, + "loss": 1.2244, + "step": 289000 + }, + { + "epoch": 4.58, + "learning_rate": 4.2418486862931314e-07, + "loss": 1.23, + "step": 289100 + }, + { + "epoch": 4.58, + "learning_rate": 4.226020892687559e-07, + "loss": 1.24, + "step": 289200 + }, + { + "epoch": 4.58, + "learning_rate": 4.210193099081988e-07, + "loss": 1.2348, + "step": 289300 + }, + { + "epoch": 4.58, + "learning_rate": 4.1943653054764165e-07, + "loss": 1.2298, + "step": 289400 + }, + { + "epoch": 4.58, + "learning_rate": 4.1785375118708454e-07, + "loss": 1.2101, + "step": 289500 + }, + { + "epoch": 4.58, + "learning_rate": 4.162709718265274e-07, + "loss": 1.205, + "step": 289600 + }, + { + "epoch": 4.59, + "learning_rate": 4.1468819246597026e-07, + "loss": 1.2398, + "step": 289700 + }, + { + "epoch": 4.59, + "learning_rate": 4.131054131054131e-07, + "loss": 1.2069, + "step": 289800 + }, + { + "epoch": 4.59, + "learning_rate": 4.11522633744856e-07, + "loss": 1.2107, + "step": 289900 + }, + { + "epoch": 4.59, + "learning_rate": 4.099398543842989e-07, + "loss": 1.2189, + "step": 290000 + }, + { + "epoch": 4.59, + "learning_rate": 4.083570750237417e-07, + "loss": 1.2256, + "step": 290100 + }, + { + "epoch": 4.59, + "learning_rate": 4.067742956631846e-07, + "loss": 1.2452, + "step": 290200 + }, + { + "epoch": 4.59, + "learning_rate": 4.0519151630262744e-07, + "loss": 1.2137, + "step": 290300 + }, + { + "epoch": 4.6, + "learning_rate": 4.0360873694207033e-07, + "loss": 1.2218, + "step": 290400 + }, + { + "epoch": 4.6, + "learning_rate": 4.0202595758151316e-07, + "loss": 1.2185, + "step": 290500 + }, + { + "epoch": 4.6, + "learning_rate": 4.0044317822095605e-07, + "loss": 1.2252, + "step": 290600 + }, + { + "epoch": 4.6, + "learning_rate": 3.988603988603989e-07, + "loss": 1.2214, + "step": 290700 + }, + { + "epoch": 4.6, + "learning_rate": 3.972776194998418e-07, + "loss": 1.2231, + "step": 290800 + }, + { + "epoch": 4.6, + "learning_rate": 3.956948401392846e-07, + "loss": 1.2375, + "step": 290900 + }, + { + "epoch": 4.61, + "learning_rate": 3.941120607787275e-07, + "loss": 1.1954, + "step": 291000 + }, + { + "epoch": 4.61, + "learning_rate": 3.9252928141817034e-07, + "loss": 1.2018, + "step": 291100 + }, + { + "epoch": 4.61, + "learning_rate": 3.9094650205761323e-07, + "loss": 1.2031, + "step": 291200 + }, + { + "epoch": 4.61, + "learning_rate": 3.8936372269705607e-07, + "loss": 1.2245, + "step": 291300 + }, + { + "epoch": 4.61, + "learning_rate": 3.8778094333649895e-07, + "loss": 1.2096, + "step": 291400 + }, + { + "epoch": 4.61, + "learning_rate": 3.861981639759418e-07, + "loss": 1.2666, + "step": 291500 + }, + { + "epoch": 4.62, + "learning_rate": 3.846153846153847e-07, + "loss": 1.2485, + "step": 291600 + }, + { + "epoch": 4.62, + "learning_rate": 3.830326052548275e-07, + "loss": 1.2282, + "step": 291700 + }, + { + "epoch": 4.62, + "learning_rate": 3.814498258942704e-07, + "loss": 1.2175, + "step": 291800 + }, + { + "epoch": 4.62, + "learning_rate": 3.7986704653371324e-07, + "loss": 1.2144, + "step": 291900 + }, + { + "epoch": 4.62, + "learning_rate": 3.7828426717315613e-07, + "loss": 1.2098, + "step": 292000 + }, + { + "epoch": 4.62, + "learning_rate": 3.767014878125989e-07, + "loss": 1.2274, + "step": 292100 + }, + { + "epoch": 4.62, + "learning_rate": 3.7511870845204186e-07, + "loss": 1.2184, + "step": 292200 + }, + { + "epoch": 4.63, + "learning_rate": 3.7353592909148464e-07, + "loss": 1.2081, + "step": 292300 + }, + { + "epoch": 4.63, + "learning_rate": 3.719531497309276e-07, + "loss": 1.2077, + "step": 292400 + }, + { + "epoch": 4.63, + "learning_rate": 3.7037037037037036e-07, + "loss": 1.2075, + "step": 292500 + }, + { + "epoch": 4.63, + "learning_rate": 3.6878759100981325e-07, + "loss": 1.1956, + "step": 292600 + }, + { + "epoch": 4.63, + "learning_rate": 3.672048116492561e-07, + "loss": 1.2263, + "step": 292700 + }, + { + "epoch": 4.63, + "learning_rate": 3.65622032288699e-07, + "loss": 1.2194, + "step": 292800 + }, + { + "epoch": 4.64, + "learning_rate": 3.640392529281418e-07, + "loss": 1.2004, + "step": 292900 + }, + { + "epoch": 4.64, + "learning_rate": 3.624564735675847e-07, + "loss": 1.2458, + "step": 293000 + }, + { + "epoch": 4.64, + "learning_rate": 3.6087369420702754e-07, + "loss": 1.2324, + "step": 293100 + }, + { + "epoch": 4.64, + "learning_rate": 3.5929091484647043e-07, + "loss": 1.2312, + "step": 293200 + }, + { + "epoch": 4.64, + "learning_rate": 3.5770813548591326e-07, + "loss": 1.212, + "step": 293300 + }, + { + "epoch": 4.64, + "learning_rate": 3.5612535612535615e-07, + "loss": 1.1959, + "step": 293400 + }, + { + "epoch": 4.65, + "learning_rate": 3.54542576764799e-07, + "loss": 1.1879, + "step": 293500 + }, + { + "epoch": 4.65, + "learning_rate": 3.529597974042419e-07, + "loss": 1.2029, + "step": 293600 + }, + { + "epoch": 4.65, + "learning_rate": 3.513770180436847e-07, + "loss": 1.223, + "step": 293700 + }, + { + "epoch": 4.65, + "learning_rate": 3.497942386831276e-07, + "loss": 1.2122, + "step": 293800 + }, + { + "epoch": 4.65, + "learning_rate": 3.4821145932257044e-07, + "loss": 1.2194, + "step": 293900 + }, + { + "epoch": 4.65, + "learning_rate": 3.4662867996201333e-07, + "loss": 1.2188, + "step": 294000 + }, + { + "epoch": 4.65, + "learning_rate": 3.4504590060145617e-07, + "loss": 1.233, + "step": 294100 + }, + { + "epoch": 4.66, + "learning_rate": 3.4346312124089905e-07, + "loss": 1.2418, + "step": 294200 + }, + { + "epoch": 4.66, + "learning_rate": 3.4188034188034194e-07, + "loss": 1.222, + "step": 294300 + }, + { + "epoch": 4.66, + "learning_rate": 3.402975625197848e-07, + "loss": 1.2436, + "step": 294400 + }, + { + "epoch": 4.66, + "learning_rate": 3.3871478315922767e-07, + "loss": 1.2186, + "step": 294500 + }, + { + "epoch": 4.66, + "learning_rate": 3.371320037986705e-07, + "loss": 1.2061, + "step": 294600 + }, + { + "epoch": 4.66, + "learning_rate": 3.355492244381134e-07, + "loss": 1.2245, + "step": 294700 + }, + { + "epoch": 4.67, + "learning_rate": 3.3396644507755623e-07, + "loss": 1.2239, + "step": 294800 + }, + { + "epoch": 4.67, + "learning_rate": 3.323836657169991e-07, + "loss": 1.2345, + "step": 294900 + }, + { + "epoch": 4.67, + "learning_rate": 3.3080088635644195e-07, + "loss": 1.2085, + "step": 295000 + }, + { + "epoch": 4.67, + "learning_rate": 3.2921810699588484e-07, + "loss": 1.1785, + "step": 295100 + }, + { + "epoch": 4.67, + "learning_rate": 3.276353276353277e-07, + "loss": 1.2198, + "step": 295200 + }, + { + "epoch": 4.67, + "learning_rate": 3.2605254827477057e-07, + "loss": 1.2277, + "step": 295300 + }, + { + "epoch": 4.68, + "learning_rate": 3.2446976891421335e-07, + "loss": 1.1924, + "step": 295400 + }, + { + "epoch": 4.68, + "learning_rate": 3.228869895536563e-07, + "loss": 1.1952, + "step": 295500 + }, + { + "epoch": 4.68, + "learning_rate": 3.213042101930991e-07, + "loss": 1.2185, + "step": 295600 + }, + { + "epoch": 4.68, + "learning_rate": 3.1972143083254197e-07, + "loss": 1.242, + "step": 295700 + }, + { + "epoch": 4.68, + "learning_rate": 3.181386514719848e-07, + "loss": 1.2069, + "step": 295800 + }, + { + "epoch": 4.68, + "learning_rate": 3.165558721114277e-07, + "loss": 1.2212, + "step": 295900 + }, + { + "epoch": 4.69, + "learning_rate": 3.1497309275087053e-07, + "loss": 1.2264, + "step": 296000 + }, + { + "epoch": 4.69, + "learning_rate": 3.133903133903134e-07, + "loss": 1.1997, + "step": 296100 + }, + { + "epoch": 4.69, + "learning_rate": 3.1180753402975625e-07, + "loss": 1.2288, + "step": 296200 + }, + { + "epoch": 4.69, + "learning_rate": 3.1022475466919914e-07, + "loss": 1.2201, + "step": 296300 + }, + { + "epoch": 4.69, + "learning_rate": 3.08641975308642e-07, + "loss": 1.2367, + "step": 296400 + }, + { + "epoch": 4.69, + "learning_rate": 3.0705919594808487e-07, + "loss": 1.238, + "step": 296500 + }, + { + "epoch": 4.69, + "learning_rate": 3.0547641658752776e-07, + "loss": 1.2364, + "step": 296600 + }, + { + "epoch": 4.7, + "learning_rate": 3.038936372269706e-07, + "loss": 1.2199, + "step": 296700 + }, + { + "epoch": 4.7, + "learning_rate": 3.023108578664135e-07, + "loss": 1.202, + "step": 296800 + }, + { + "epoch": 4.7, + "learning_rate": 3.007280785058563e-07, + "loss": 1.2126, + "step": 296900 + }, + { + "epoch": 4.7, + "learning_rate": 2.991452991452992e-07, + "loss": 1.2282, + "step": 297000 + }, + { + "epoch": 4.7, + "learning_rate": 2.9756251978474204e-07, + "loss": 1.2345, + "step": 297100 + }, + { + "epoch": 4.7, + "learning_rate": 2.959797404241849e-07, + "loss": 1.2016, + "step": 297200 + }, + { + "epoch": 4.71, + "learning_rate": 2.9439696106362777e-07, + "loss": 1.229, + "step": 297300 + }, + { + "epoch": 4.71, + "learning_rate": 2.928141817030706e-07, + "loss": 1.2291, + "step": 297400 + }, + { + "epoch": 4.71, + "learning_rate": 2.912314023425135e-07, + "loss": 1.2324, + "step": 297500 + }, + { + "epoch": 4.71, + "learning_rate": 2.8964862298195633e-07, + "loss": 1.2067, + "step": 297600 + }, + { + "epoch": 4.71, + "learning_rate": 2.880658436213992e-07, + "loss": 1.1926, + "step": 297700 + }, + { + "epoch": 4.71, + "learning_rate": 2.8648306426084205e-07, + "loss": 1.2281, + "step": 297800 + }, + { + "epoch": 4.72, + "learning_rate": 2.8490028490028494e-07, + "loss": 1.234, + "step": 297900 + }, + { + "epoch": 4.72, + "learning_rate": 2.833175055397278e-07, + "loss": 1.2233, + "step": 298000 + }, + { + "epoch": 4.72, + "learning_rate": 2.8173472617917067e-07, + "loss": 1.2169, + "step": 298100 + }, + { + "epoch": 4.72, + "learning_rate": 2.801519468186135e-07, + "loss": 1.2032, + "step": 298200 + }, + { + "epoch": 4.72, + "learning_rate": 2.785691674580564e-07, + "loss": 1.2224, + "step": 298300 + }, + { + "epoch": 4.72, + "learning_rate": 2.7698638809749923e-07, + "loss": 1.2104, + "step": 298400 + }, + { + "epoch": 4.72, + "learning_rate": 2.754036087369421e-07, + "loss": 1.2418, + "step": 298500 + }, + { + "epoch": 4.73, + "learning_rate": 2.7382082937638496e-07, + "loss": 1.221, + "step": 298600 + }, + { + "epoch": 4.73, + "learning_rate": 2.722380500158278e-07, + "loss": 1.2161, + "step": 298700 + }, + { + "epoch": 4.73, + "learning_rate": 2.706552706552707e-07, + "loss": 1.2257, + "step": 298800 + }, + { + "epoch": 4.73, + "learning_rate": 2.690724912947135e-07, + "loss": 1.2346, + "step": 298900 + }, + { + "epoch": 4.73, + "learning_rate": 2.674897119341564e-07, + "loss": 1.2346, + "step": 299000 + }, + { + "epoch": 4.73, + "learning_rate": 2.6590693257359924e-07, + "loss": 1.2251, + "step": 299100 + }, + { + "epoch": 4.74, + "learning_rate": 2.6432415321304213e-07, + "loss": 1.231, + "step": 299200 + }, + { + "epoch": 4.74, + "learning_rate": 2.6274137385248497e-07, + "loss": 1.2039, + "step": 299300 + }, + { + "epoch": 4.74, + "learning_rate": 2.6115859449192786e-07, + "loss": 1.2161, + "step": 299400 + }, + { + "epoch": 4.74, + "learning_rate": 2.595758151313707e-07, + "loss": 1.2304, + "step": 299500 + }, + { + "epoch": 4.74, + "learning_rate": 2.579930357708136e-07, + "loss": 1.2324, + "step": 299600 + }, + { + "epoch": 4.74, + "learning_rate": 2.564102564102564e-07, + "loss": 1.2109, + "step": 299700 + }, + { + "epoch": 4.75, + "learning_rate": 2.548274770496993e-07, + "loss": 1.2228, + "step": 299800 + }, + { + "epoch": 4.75, + "learning_rate": 2.5324469768914214e-07, + "loss": 1.1968, + "step": 299900 + }, + { + "epoch": 4.75, + "learning_rate": 2.51661918328585e-07, + "loss": 1.2128, + "step": 300000 + }, + { + "epoch": 4.75, + "learning_rate": 2.5007913896802787e-07, + "loss": 1.212, + "step": 300100 + }, + { + "epoch": 4.75, + "learning_rate": 2.484963596074707e-07, + "loss": 1.2323, + "step": 300200 + }, + { + "epoch": 4.75, + "learning_rate": 2.469135802469136e-07, + "loss": 1.2144, + "step": 300300 + }, + { + "epoch": 4.75, + "learning_rate": 2.4533080088635643e-07, + "loss": 1.2202, + "step": 300400 + }, + { + "epoch": 4.76, + "learning_rate": 2.437480215257993e-07, + "loss": 1.2058, + "step": 300500 + }, + { + "epoch": 4.76, + "learning_rate": 2.4216524216524215e-07, + "loss": 1.2323, + "step": 300600 + }, + { + "epoch": 4.76, + "learning_rate": 2.4058246280468504e-07, + "loss": 1.2371, + "step": 300700 + }, + { + "epoch": 4.76, + "learning_rate": 2.3899968344412793e-07, + "loss": 1.243, + "step": 300800 + }, + { + "epoch": 4.76, + "learning_rate": 2.374169040835708e-07, + "loss": 1.2271, + "step": 300900 + }, + { + "epoch": 4.76, + "learning_rate": 2.3583412472301366e-07, + "loss": 1.2325, + "step": 301000 + }, + { + "epoch": 4.77, + "learning_rate": 2.3425134536245652e-07, + "loss": 1.2121, + "step": 301100 + }, + { + "epoch": 4.77, + "learning_rate": 2.3266856600189936e-07, + "loss": 1.2104, + "step": 301200 + }, + { + "epoch": 4.77, + "learning_rate": 2.3108578664134222e-07, + "loss": 1.2204, + "step": 301300 + }, + { + "epoch": 4.77, + "learning_rate": 2.2950300728078508e-07, + "loss": 1.2291, + "step": 301400 + }, + { + "epoch": 4.77, + "learning_rate": 2.2792022792022794e-07, + "loss": 1.2444, + "step": 301500 + }, + { + "epoch": 4.77, + "learning_rate": 2.263374485596708e-07, + "loss": 1.2285, + "step": 301600 + }, + { + "epoch": 4.78, + "learning_rate": 2.2475466919911367e-07, + "loss": 1.2038, + "step": 301700 + }, + { + "epoch": 4.78, + "learning_rate": 2.2317188983855653e-07, + "loss": 1.217, + "step": 301800 + }, + { + "epoch": 4.78, + "learning_rate": 2.215891104779994e-07, + "loss": 1.215, + "step": 301900 + }, + { + "epoch": 4.78, + "learning_rate": 2.2000633111744226e-07, + "loss": 1.2345, + "step": 302000 + }, + { + "epoch": 4.78, + "learning_rate": 2.1842355175688512e-07, + "loss": 1.2156, + "step": 302100 + }, + { + "epoch": 4.78, + "learning_rate": 2.1684077239632798e-07, + "loss": 1.2218, + "step": 302200 + }, + { + "epoch": 4.78, + "learning_rate": 2.1525799303577085e-07, + "loss": 1.2102, + "step": 302300 + }, + { + "epoch": 4.79, + "learning_rate": 2.136752136752137e-07, + "loss": 1.2177, + "step": 302400 + }, + { + "epoch": 4.79, + "learning_rate": 2.1209243431465657e-07, + "loss": 1.2422, + "step": 302500 + }, + { + "epoch": 4.79, + "learning_rate": 2.105096549540994e-07, + "loss": 1.224, + "step": 302600 + }, + { + "epoch": 4.79, + "learning_rate": 2.0892687559354227e-07, + "loss": 1.2141, + "step": 302700 + }, + { + "epoch": 4.79, + "learning_rate": 2.0734409623298513e-07, + "loss": 1.2344, + "step": 302800 + }, + { + "epoch": 4.79, + "learning_rate": 2.05761316872428e-07, + "loss": 1.2134, + "step": 302900 + }, + { + "epoch": 4.8, + "learning_rate": 2.0417853751187086e-07, + "loss": 1.2184, + "step": 303000 + }, + { + "epoch": 4.8, + "learning_rate": 2.0259575815131372e-07, + "loss": 1.2245, + "step": 303100 + }, + { + "epoch": 4.8, + "learning_rate": 2.0101297879075658e-07, + "loss": 1.2339, + "step": 303200 + }, + { + "epoch": 4.8, + "learning_rate": 1.9943019943019944e-07, + "loss": 1.2247, + "step": 303300 + }, + { + "epoch": 4.8, + "learning_rate": 1.978474200696423e-07, + "loss": 1.2184, + "step": 303400 + }, + { + "epoch": 4.8, + "learning_rate": 1.9626464070908517e-07, + "loss": 1.2284, + "step": 303500 + }, + { + "epoch": 4.81, + "learning_rate": 1.9468186134852803e-07, + "loss": 1.2347, + "step": 303600 + }, + { + "epoch": 4.81, + "learning_rate": 1.930990819879709e-07, + "loss": 1.2117, + "step": 303700 + }, + { + "epoch": 4.81, + "learning_rate": 1.9151630262741376e-07, + "loss": 1.2254, + "step": 303800 + }, + { + "epoch": 4.81, + "learning_rate": 1.8993352326685662e-07, + "loss": 1.2108, + "step": 303900 + }, + { + "epoch": 4.81, + "learning_rate": 1.8835074390629946e-07, + "loss": 1.2197, + "step": 304000 + }, + { + "epoch": 4.81, + "learning_rate": 1.8676796454574232e-07, + "loss": 1.2218, + "step": 304100 + }, + { + "epoch": 4.81, + "learning_rate": 1.8518518518518518e-07, + "loss": 1.2229, + "step": 304200 + }, + { + "epoch": 4.82, + "learning_rate": 1.8360240582462804e-07, + "loss": 1.2485, + "step": 304300 + }, + { + "epoch": 4.82, + "learning_rate": 1.820196264640709e-07, + "loss": 1.2303, + "step": 304400 + }, + { + "epoch": 4.82, + "learning_rate": 1.8043684710351377e-07, + "loss": 1.2372, + "step": 304500 + }, + { + "epoch": 4.82, + "learning_rate": 1.7885406774295663e-07, + "loss": 1.2463, + "step": 304600 + }, + { + "epoch": 4.82, + "learning_rate": 1.772712883823995e-07, + "loss": 1.2267, + "step": 304700 + }, + { + "epoch": 4.82, + "learning_rate": 1.7568850902184236e-07, + "loss": 1.2206, + "step": 304800 + }, + { + "epoch": 4.83, + "learning_rate": 1.7410572966128522e-07, + "loss": 1.212, + "step": 304900 + }, + { + "epoch": 4.83, + "learning_rate": 1.7252295030072808e-07, + "loss": 1.2316, + "step": 305000 + }, + { + "epoch": 4.83, + "learning_rate": 1.7094017094017097e-07, + "loss": 1.2193, + "step": 305100 + }, + { + "epoch": 4.83, + "learning_rate": 1.6935739157961383e-07, + "loss": 1.2505, + "step": 305200 + }, + { + "epoch": 4.83, + "learning_rate": 1.677746122190567e-07, + "loss": 1.2173, + "step": 305300 + }, + { + "epoch": 4.83, + "learning_rate": 1.6619183285849956e-07, + "loss": 1.2255, + "step": 305400 + }, + { + "epoch": 4.84, + "learning_rate": 1.6460905349794242e-07, + "loss": 1.1953, + "step": 305500 + }, + { + "epoch": 4.84, + "learning_rate": 1.6302627413738528e-07, + "loss": 1.2153, + "step": 305600 + }, + { + "epoch": 4.84, + "learning_rate": 1.6144349477682815e-07, + "loss": 1.2301, + "step": 305700 + }, + { + "epoch": 4.84, + "learning_rate": 1.5986071541627098e-07, + "loss": 1.2233, + "step": 305800 + }, + { + "epoch": 4.84, + "learning_rate": 1.5827793605571385e-07, + "loss": 1.2252, + "step": 305900 + }, + { + "epoch": 4.84, + "learning_rate": 1.566951566951567e-07, + "loss": 1.2092, + "step": 306000 + }, + { + "epoch": 4.84, + "learning_rate": 1.5511237733459957e-07, + "loss": 1.2321, + "step": 306100 + }, + { + "epoch": 4.85, + "learning_rate": 1.5352959797404243e-07, + "loss": 1.2337, + "step": 306200 + }, + { + "epoch": 4.85, + "learning_rate": 1.519468186134853e-07, + "loss": 1.2172, + "step": 306300 + }, + { + "epoch": 4.85, + "learning_rate": 1.5036403925292816e-07, + "loss": 1.2362, + "step": 306400 + }, + { + "epoch": 4.85, + "learning_rate": 1.4878125989237102e-07, + "loss": 1.2429, + "step": 306500 + }, + { + "epoch": 4.85, + "learning_rate": 1.4719848053181388e-07, + "loss": 1.2289, + "step": 306600 + }, + { + "epoch": 4.85, + "learning_rate": 1.4561570117125675e-07, + "loss": 1.2219, + "step": 306700 + }, + { + "epoch": 4.86, + "learning_rate": 1.440329218106996e-07, + "loss": 1.2242, + "step": 306800 + }, + { + "epoch": 4.86, + "learning_rate": 1.4245014245014247e-07, + "loss": 1.1894, + "step": 306900 + }, + { + "epoch": 4.86, + "learning_rate": 1.4086736308958533e-07, + "loss": 1.2284, + "step": 307000 + }, + { + "epoch": 4.86, + "learning_rate": 1.392845837290282e-07, + "loss": 1.2378, + "step": 307100 + }, + { + "epoch": 4.86, + "learning_rate": 1.3770180436847106e-07, + "loss": 1.2331, + "step": 307200 + }, + { + "epoch": 4.86, + "learning_rate": 1.361190250079139e-07, + "loss": 1.2469, + "step": 307300 + }, + { + "epoch": 4.87, + "learning_rate": 1.3453624564735676e-07, + "loss": 1.2334, + "step": 307400 + }, + { + "epoch": 4.87, + "learning_rate": 1.3295346628679962e-07, + "loss": 1.2104, + "step": 307500 + }, + { + "epoch": 4.87, + "learning_rate": 1.3137068692624248e-07, + "loss": 1.2208, + "step": 307600 + }, + { + "epoch": 4.87, + "learning_rate": 1.2978790756568535e-07, + "loss": 1.2203, + "step": 307700 + }, + { + "epoch": 4.87, + "learning_rate": 1.282051282051282e-07, + "loss": 1.219, + "step": 307800 + }, + { + "epoch": 4.87, + "learning_rate": 1.2662234884457107e-07, + "loss": 1.2067, + "step": 307900 + }, + { + "epoch": 4.87, + "learning_rate": 1.2503956948401393e-07, + "loss": 1.2382, + "step": 308000 + }, + { + "epoch": 4.88, + "learning_rate": 1.234567901234568e-07, + "loss": 1.2283, + "step": 308100 + }, + { + "epoch": 4.88, + "learning_rate": 1.2187401076289966e-07, + "loss": 1.2223, + "step": 308200 + }, + { + "epoch": 4.88, + "learning_rate": 1.2029123140234252e-07, + "loss": 1.2019, + "step": 308300 + }, + { + "epoch": 4.88, + "learning_rate": 1.187084520417854e-07, + "loss": 1.2453, + "step": 308400 + }, + { + "epoch": 4.88, + "learning_rate": 1.1712567268122826e-07, + "loss": 1.2536, + "step": 308500 + }, + { + "epoch": 4.88, + "learning_rate": 1.1554289332067111e-07, + "loss": 1.2293, + "step": 308600 + }, + { + "epoch": 4.89, + "learning_rate": 1.1396011396011397e-07, + "loss": 1.2118, + "step": 308700 + }, + { + "epoch": 4.89, + "learning_rate": 1.1237733459955683e-07, + "loss": 1.2164, + "step": 308800 + }, + { + "epoch": 4.89, + "learning_rate": 1.107945552389997e-07, + "loss": 1.234, + "step": 308900 + }, + { + "epoch": 4.89, + "learning_rate": 1.0921177587844256e-07, + "loss": 1.2312, + "step": 309000 + }, + { + "epoch": 4.89, + "learning_rate": 1.0762899651788542e-07, + "loss": 1.2062, + "step": 309100 + }, + { + "epoch": 4.89, + "learning_rate": 1.0604621715732829e-07, + "loss": 1.2238, + "step": 309200 + }, + { + "epoch": 4.9, + "learning_rate": 1.0446343779677113e-07, + "loss": 1.222, + "step": 309300 + }, + { + "epoch": 4.9, + "learning_rate": 1.02880658436214e-07, + "loss": 1.2275, + "step": 309400 + }, + { + "epoch": 4.9, + "learning_rate": 1.0129787907565686e-07, + "loss": 1.2366, + "step": 309500 + }, + { + "epoch": 4.9, + "learning_rate": 9.971509971509972e-08, + "loss": 1.2173, + "step": 309600 + }, + { + "epoch": 4.9, + "learning_rate": 9.813232035454258e-08, + "loss": 1.2097, + "step": 309700 + }, + { + "epoch": 4.9, + "learning_rate": 9.654954099398545e-08, + "loss": 1.2409, + "step": 309800 + }, + { + "epoch": 4.91, + "learning_rate": 9.496676163342831e-08, + "loss": 1.2271, + "step": 309900 + }, + { + "epoch": 4.91, + "learning_rate": 9.338398227287116e-08, + "loss": 1.2154, + "step": 310000 + }, + { + "epoch": 4.91, + "learning_rate": 9.180120291231402e-08, + "loss": 1.2535, + "step": 310100 + }, + { + "epoch": 4.91, + "learning_rate": 9.021842355175688e-08, + "loss": 1.2182, + "step": 310200 + }, + { + "epoch": 4.91, + "learning_rate": 8.863564419119975e-08, + "loss": 1.2073, + "step": 310300 + }, + { + "epoch": 4.91, + "learning_rate": 8.705286483064261e-08, + "loss": 1.2331, + "step": 310400 + }, + { + "epoch": 4.91, + "learning_rate": 8.547008547008549e-08, + "loss": 1.2228, + "step": 310500 + }, + { + "epoch": 4.92, + "learning_rate": 8.388730610952835e-08, + "loss": 1.2424, + "step": 310600 + }, + { + "epoch": 4.92, + "learning_rate": 8.230452674897121e-08, + "loss": 1.2394, + "step": 310700 + }, + { + "epoch": 4.92, + "learning_rate": 8.072174738841407e-08, + "loss": 1.2078, + "step": 310800 + }, + { + "epoch": 4.92, + "learning_rate": 7.913896802785692e-08, + "loss": 1.2206, + "step": 310900 + }, + { + "epoch": 4.92, + "learning_rate": 7.755618866729979e-08, + "loss": 1.2444, + "step": 311000 + }, + { + "epoch": 4.92, + "learning_rate": 7.597340930674265e-08, + "loss": 1.2195, + "step": 311100 + }, + { + "epoch": 4.93, + "learning_rate": 7.439062994618551e-08, + "loss": 1.2308, + "step": 311200 + }, + { + "epoch": 4.93, + "learning_rate": 7.280785058562837e-08, + "loss": 1.2342, + "step": 311300 + }, + { + "epoch": 4.93, + "learning_rate": 7.122507122507124e-08, + "loss": 1.2097, + "step": 311400 + }, + { + "epoch": 4.93, + "learning_rate": 6.96422918645141e-08, + "loss": 1.2102, + "step": 311500 + }, + { + "epoch": 4.93, + "learning_rate": 6.805951250395695e-08, + "loss": 1.2217, + "step": 311600 + }, + { + "epoch": 4.93, + "learning_rate": 6.647673314339981e-08, + "loss": 1.1993, + "step": 311700 + }, + { + "epoch": 4.94, + "learning_rate": 6.489395378284267e-08, + "loss": 1.2376, + "step": 311800 + }, + { + "epoch": 4.94, + "learning_rate": 6.331117442228554e-08, + "loss": 1.2212, + "step": 311900 + }, + { + "epoch": 4.94, + "learning_rate": 6.17283950617284e-08, + "loss": 1.1984, + "step": 312000 + }, + { + "epoch": 4.94, + "learning_rate": 6.014561570117126e-08, + "loss": 1.2196, + "step": 312100 + }, + { + "epoch": 4.94, + "learning_rate": 5.856283634061413e-08, + "loss": 1.2022, + "step": 312200 + }, + { + "epoch": 4.94, + "learning_rate": 5.6980056980056986e-08, + "loss": 1.2208, + "step": 312300 + }, + { + "epoch": 4.94, + "learning_rate": 5.539727761949985e-08, + "loss": 1.2318, + "step": 312400 + }, + { + "epoch": 4.95, + "learning_rate": 5.381449825894271e-08, + "loss": 1.2267, + "step": 312500 + }, + { + "epoch": 4.95, + "learning_rate": 5.223171889838557e-08, + "loss": 1.1789, + "step": 312600 + }, + { + "epoch": 4.95, + "learning_rate": 5.064893953782843e-08, + "loss": 1.2309, + "step": 312700 + }, + { + "epoch": 4.95, + "learning_rate": 4.906616017727129e-08, + "loss": 1.2244, + "step": 312800 + }, + { + "epoch": 4.95, + "learning_rate": 4.7483380816714155e-08, + "loss": 1.2148, + "step": 312900 + }, + { + "epoch": 4.95, + "learning_rate": 4.590060145615701e-08, + "loss": 1.2476, + "step": 313000 + }, + { + "epoch": 4.96, + "learning_rate": 4.4317822095599874e-08, + "loss": 1.2182, + "step": 313100 + }, + { + "epoch": 4.96, + "learning_rate": 4.273504273504274e-08, + "loss": 1.2218, + "step": 313200 + }, + { + "epoch": 4.96, + "learning_rate": 4.1152263374485605e-08, + "loss": 1.2346, + "step": 313300 + }, + { + "epoch": 4.96, + "learning_rate": 3.956948401392846e-08, + "loss": 1.2107, + "step": 313400 + }, + { + "epoch": 4.96, + "learning_rate": 3.7986704653371324e-08, + "loss": 1.2277, + "step": 313500 + }, + { + "epoch": 4.96, + "learning_rate": 3.6403925292814187e-08, + "loss": 1.2254, + "step": 313600 + }, + { + "epoch": 4.97, + "learning_rate": 3.482114593225705e-08, + "loss": 1.2188, + "step": 313700 + }, + { + "epoch": 4.97, + "learning_rate": 3.3238366571699905e-08, + "loss": 1.2252, + "step": 313800 + }, + { + "epoch": 4.97, + "learning_rate": 3.165558721114277e-08, + "loss": 1.2335, + "step": 313900 + }, + { + "epoch": 4.97, + "learning_rate": 3.007280785058563e-08, + "loss": 1.2149, + "step": 314000 + }, + { + "epoch": 4.97, + "learning_rate": 2.8490028490028493e-08, + "loss": 1.2068, + "step": 314100 + }, + { + "epoch": 4.97, + "learning_rate": 2.6907249129471356e-08, + "loss": 1.2225, + "step": 314200 + }, + { + "epoch": 4.97, + "learning_rate": 2.5324469768914215e-08, + "loss": 1.2119, + "step": 314300 + }, + { + "epoch": 4.98, + "learning_rate": 2.3741690408357078e-08, + "loss": 1.2168, + "step": 314400 + }, + { + "epoch": 4.98, + "learning_rate": 2.2158911047799937e-08, + "loss": 1.2255, + "step": 314500 + }, + { + "epoch": 4.98, + "learning_rate": 2.0576131687242803e-08, + "loss": 1.2219, + "step": 314600 + }, + { + "epoch": 4.98, + "learning_rate": 1.8993352326685662e-08, + "loss": 1.2177, + "step": 314700 + }, + { + "epoch": 4.98, + "learning_rate": 1.7410572966128525e-08, + "loss": 1.2205, + "step": 314800 + }, + { + "epoch": 4.98, + "learning_rate": 1.5827793605571384e-08, + "loss": 1.2058, + "step": 314900 + }, + { + "epoch": 4.99, + "learning_rate": 1.4245014245014247e-08, + "loss": 1.2225, + "step": 315000 + }, + { + "epoch": 4.99, + "learning_rate": 1.2662234884457107e-08, + "loss": 1.2069, + "step": 315100 + }, + { + "epoch": 4.99, + "learning_rate": 1.1079455523899968e-08, + "loss": 1.2139, + "step": 315200 + }, + { + "epoch": 4.99, + "learning_rate": 9.496676163342831e-09, + "loss": 1.2175, + "step": 315300 + }, + { + "epoch": 4.99, + "learning_rate": 7.913896802785692e-09, + "loss": 1.2145, + "step": 315400 + }, + { + "epoch": 4.99, + "learning_rate": 6.331117442228554e-09, + "loss": 1.1867, + "step": 315500 + }, + { + "epoch": 5.0, + "learning_rate": 4.7483380816714155e-09, + "loss": 1.2111, + "step": 315600 + }, + { + "epoch": 5.0, + "learning_rate": 3.165558721114277e-09, + "loss": 1.2163, + "step": 315700 + }, + { + "epoch": 5.0, + "learning_rate": 1.5827793605571384e-09, + "loss": 1.2097, + "step": 315800 + }, + { + "epoch": 5.0, + "learning_rate": 0.0, + "loss": 1.2281, + "step": 315900 + } + ], + "max_steps": 315900, + "num_train_epochs": 5, + "total_flos": 9.28756872184293e+18, + "trial_name": null, + "trial_params": null +}