{ "best_metric": null, "best_model_checkpoint": null, "epoch": 0.08765198306037775, "global_step": 100000, "is_hyper_param_search": false, "is_local_process_zero": true, "is_world_process_zero": true, "log_history": [ { "epoch": 0.0, "learning_rate": 4.999583652897995e-05, "loss": 1.5746, "step": 100 }, { "epoch": 0.0, "learning_rate": 4.999145392790621e-05, "loss": 1.4499, "step": 200 }, { "epoch": 0.0, "learning_rate": 4.998707132683248e-05, "loss": 1.3728, "step": 300 }, { "epoch": 0.0, "learning_rate": 4.998268872575874e-05, "loss": 1.3729, "step": 400 }, { "epoch": 0.0, "learning_rate": 4.9978393776706475e-05, "loss": 1.23, "step": 500 }, { "epoch": 0.0, "learning_rate": 4.997401117563274e-05, "loss": 1.1736, "step": 600 }, { "epoch": 0.0, "learning_rate": 4.9969628574559e-05, "loss": 1.1768, "step": 700 }, { "epoch": 0.0, "learning_rate": 4.9965245973485266e-05, "loss": 1.2188, "step": 800 }, { "epoch": 0.0, "learning_rate": 4.9960863372411524e-05, "loss": 1.1797, "step": 900 }, { "epoch": 0.0, "learning_rate": 4.9956480771337794e-05, "loss": 1.1905, "step": 1000 }, { "epoch": 0.0, "learning_rate": 4.995209817026406e-05, "loss": 1.2828, "step": 1100 }, { "epoch": 0.0, "learning_rate": 4.9947715569190315e-05, "loss": 1.1102, "step": 1200 }, { "epoch": 0.0, "learning_rate": 4.994333296811658e-05, "loss": 1.1689, "step": 1300 }, { "epoch": 0.0, "learning_rate": 4.993895036704284e-05, "loss": 1.1574, "step": 1400 }, { "epoch": 0.0, "learning_rate": 4.993456776596911e-05, "loss": 1.2361, "step": 1500 }, { "epoch": 0.0, "learning_rate": 4.9930185164895364e-05, "loss": 1.1391, "step": 1600 }, { "epoch": 0.0, "learning_rate": 4.9925802563821635e-05, "loss": 1.1652, "step": 1700 }, { "epoch": 0.0, "learning_rate": 4.99214199627479e-05, "loss": 1.162, "step": 1800 }, { "epoch": 0.0, "learning_rate": 4.9917037361674156e-05, "loss": 1.1477, "step": 1900 }, { "epoch": 0.0, "learning_rate": 4.991265476060042e-05, "loss": 1.171, "step": 2000 }, { "epoch": 0.0, "learning_rate": 4.990827215952668e-05, "loss": 1.1214, "step": 2100 }, { "epoch": 0.0, "learning_rate": 4.990388955845294e-05, "loss": 1.0911, "step": 2200 }, { "epoch": 0.0, "learning_rate": 4.9899506957379204e-05, "loss": 1.1262, "step": 2300 }, { "epoch": 0.0, "learning_rate": 4.989512435630547e-05, "loss": 1.0649, "step": 2400 }, { "epoch": 0.0, "learning_rate": 4.989074175523173e-05, "loss": 1.13, "step": 2500 }, { "epoch": 0.0, "learning_rate": 4.9886359154157996e-05, "loss": 1.1008, "step": 2600 }, { "epoch": 0.0, "learning_rate": 4.988197655308426e-05, "loss": 1.0733, "step": 2700 }, { "epoch": 0.0, "learning_rate": 4.9877593952010524e-05, "loss": 1.0636, "step": 2800 }, { "epoch": 0.0, "learning_rate": 4.987321135093678e-05, "loss": 1.1448, "step": 2900 }, { "epoch": 0.0, "learning_rate": 4.9868828749863045e-05, "loss": 1.1237, "step": 3000 }, { "epoch": 0.0, "learning_rate": 4.986444614878931e-05, "loss": 1.1278, "step": 3100 }, { "epoch": 0.0, "learning_rate": 4.986006354771557e-05, "loss": 1.0606, "step": 3200 }, { "epoch": 0.0, "learning_rate": 4.9855680946641836e-05, "loss": 1.1164, "step": 3300 }, { "epoch": 0.0, "learning_rate": 4.98512983455681e-05, "loss": 1.0511, "step": 3400 }, { "epoch": 0.0, "learning_rate": 4.984691574449436e-05, "loss": 1.0267, "step": 3500 }, { "epoch": 0.0, "learning_rate": 4.984253314342062e-05, "loss": 1.0842, "step": 3600 }, { "epoch": 0.0, "learning_rate": 4.9838150542346885e-05, "loss": 1.1009, "step": 3700 }, { "epoch": 0.0, "learning_rate": 4.983376794127315e-05, "loss": 1.0525, "step": 3800 }, { "epoch": 0.0, "learning_rate": 4.9829385340199406e-05, "loss": 1.0772, "step": 3900 }, { "epoch": 0.0, "learning_rate": 4.9825002739125677e-05, "loss": 1.0564, "step": 4000 }, { "epoch": 0.0, "learning_rate": 4.982062013805194e-05, "loss": 1.0593, "step": 4100 }, { "epoch": 0.0, "learning_rate": 4.98162375369782e-05, "loss": 1.0939, "step": 4200 }, { "epoch": 0.0, "learning_rate": 4.981185493590446e-05, "loss": 1.0162, "step": 4300 }, { "epoch": 0.0, "learning_rate": 4.9807472334830725e-05, "loss": 1.0549, "step": 4400 }, { "epoch": 0.0, "learning_rate": 4.980308973375698e-05, "loss": 1.0467, "step": 4500 }, { "epoch": 0.0, "learning_rate": 4.9798707132683246e-05, "loss": 1.0434, "step": 4600 }, { "epoch": 0.0, "learning_rate": 4.979432453160952e-05, "loss": 1.0023, "step": 4700 }, { "epoch": 0.0, "learning_rate": 4.9789941930535774e-05, "loss": 1.1335, "step": 4800 }, { "epoch": 0.0, "learning_rate": 4.978555932946204e-05, "loss": 1.0314, "step": 4900 }, { "epoch": 0.0, "learning_rate": 4.97811767283883e-05, "loss": 1.0841, "step": 5000 }, { "epoch": 0.0, "learning_rate": 4.9776794127314566e-05, "loss": 1.02, "step": 5100 }, { "epoch": 0.0, "learning_rate": 4.977241152624082e-05, "loss": 0.999, "step": 5200 }, { "epoch": 0.0, "learning_rate": 4.9768028925167087e-05, "loss": 1.0875, "step": 5300 }, { "epoch": 0.0, "learning_rate": 4.976364632409335e-05, "loss": 1.0271, "step": 5400 }, { "epoch": 0.0, "learning_rate": 4.9759263723019614e-05, "loss": 1.0556, "step": 5500 }, { "epoch": 0.0, "learning_rate": 4.975488112194588e-05, "loss": 1.0384, "step": 5600 }, { "epoch": 0.0, "learning_rate": 4.975049852087214e-05, "loss": 1.07, "step": 5700 }, { "epoch": 0.01, "learning_rate": 4.9746115919798406e-05, "loss": 1.0087, "step": 5800 }, { "epoch": 0.01, "learning_rate": 4.974173331872466e-05, "loss": 1.0818, "step": 5900 }, { "epoch": 0.01, "learning_rate": 4.973735071765093e-05, "loss": 1.0697, "step": 6000 }, { "epoch": 0.01, "learning_rate": 4.973296811657719e-05, "loss": 1.0354, "step": 6100 }, { "epoch": 0.01, "learning_rate": 4.9728585515503455e-05, "loss": 0.9858, "step": 6200 }, { "epoch": 0.01, "learning_rate": 4.972420291442972e-05, "loss": 1.0291, "step": 6300 }, { "epoch": 0.01, "learning_rate": 4.971982031335598e-05, "loss": 1.0206, "step": 6400 }, { "epoch": 0.01, "learning_rate": 4.971543771228224e-05, "loss": 1.0487, "step": 6500 }, { "epoch": 0.01, "learning_rate": 4.97110551112085e-05, "loss": 1.0847, "step": 6600 }, { "epoch": 0.01, "learning_rate": 4.970667251013477e-05, "loss": 1.0272, "step": 6700 }, { "epoch": 0.01, "learning_rate": 4.970228990906103e-05, "loss": 1.0268, "step": 6800 }, { "epoch": 0.01, "learning_rate": 4.969790730798729e-05, "loss": 1.0012, "step": 6900 }, { "epoch": 0.01, "learning_rate": 4.969352470691356e-05, "loss": 0.96, "step": 7000 }, { "epoch": 0.01, "learning_rate": 4.968914210583982e-05, "loss": 1.0239, "step": 7100 }, { "epoch": 0.01, "learning_rate": 4.968475950476608e-05, "loss": 1.0212, "step": 7200 }, { "epoch": 0.01, "learning_rate": 4.9680376903692344e-05, "loss": 1.018, "step": 7300 }, { "epoch": 0.01, "learning_rate": 4.967599430261861e-05, "loss": 0.9627, "step": 7400 }, { "epoch": 0.01, "learning_rate": 4.9671611701544865e-05, "loss": 1.0514, "step": 7500 }, { "epoch": 0.01, "learning_rate": 4.966722910047113e-05, "loss": 1.0319, "step": 7600 }, { "epoch": 0.01, "learning_rate": 4.96628464993974e-05, "loss": 0.9913, "step": 7700 }, { "epoch": 0.01, "learning_rate": 4.9658463898323656e-05, "loss": 1.0072, "step": 7800 }, { "epoch": 0.01, "learning_rate": 4.965408129724992e-05, "loss": 0.9955, "step": 7900 }, { "epoch": 0.01, "learning_rate": 4.9649698696176184e-05, "loss": 1.0485, "step": 8000 }, { "epoch": 0.01, "learning_rate": 4.964531609510245e-05, "loss": 0.9666, "step": 8100 }, { "epoch": 0.01, "learning_rate": 4.9640933494028705e-05, "loss": 0.9327, "step": 8200 }, { "epoch": 0.01, "learning_rate": 4.963655089295497e-05, "loss": 1.0228, "step": 8300 }, { "epoch": 0.01, "learning_rate": 4.963216829188123e-05, "loss": 1.0134, "step": 8400 }, { "epoch": 0.01, "learning_rate": 4.96277856908075e-05, "loss": 1.0276, "step": 8500 }, { "epoch": 0.01, "learning_rate": 4.962340308973376e-05, "loss": 0.9459, "step": 8600 }, { "epoch": 0.01, "learning_rate": 4.9619020488660024e-05, "loss": 0.9719, "step": 8700 }, { "epoch": 0.01, "learning_rate": 4.961463788758629e-05, "loss": 0.9243, "step": 8800 }, { "epoch": 0.01, "learning_rate": 4.9610255286512545e-05, "loss": 0.9991, "step": 8900 }, { "epoch": 0.01, "learning_rate": 4.960587268543881e-05, "loss": 0.9566, "step": 9000 }, { "epoch": 0.01, "learning_rate": 4.960149008436507e-05, "loss": 0.9744, "step": 9100 }, { "epoch": 0.01, "learning_rate": 4.959710748329134e-05, "loss": 0.9836, "step": 9200 }, { "epoch": 0.01, "learning_rate": 4.95927248822176e-05, "loss": 0.8892, "step": 9300 }, { "epoch": 0.01, "learning_rate": 4.9588342281143865e-05, "loss": 0.9609, "step": 9400 }, { "epoch": 0.01, "learning_rate": 4.958395968007012e-05, "loss": 0.9613, "step": 9500 }, { "epoch": 0.01, "learning_rate": 4.9579577078996386e-05, "loss": 1.0236, "step": 9600 }, { "epoch": 0.01, "learning_rate": 4.957519447792265e-05, "loss": 0.9404, "step": 9700 }, { "epoch": 0.01, "learning_rate": 4.9570811876848913e-05, "loss": 0.91, "step": 9800 }, { "epoch": 0.01, "learning_rate": 4.956642927577517e-05, "loss": 0.9923, "step": 9900 }, { "epoch": 0.01, "learning_rate": 4.956204667470144e-05, "loss": 0.9815, "step": 10000 }, { "epoch": 0.01, "learning_rate": 4.9557664073627705e-05, "loss": 0.9069, "step": 10100 }, { "epoch": 0.01, "learning_rate": 4.955328147255396e-05, "loss": 0.9944, "step": 10200 }, { "epoch": 0.01, "learning_rate": 4.9548898871480226e-05, "loss": 0.9553, "step": 10300 }, { "epoch": 0.01, "learning_rate": 4.954451627040649e-05, "loss": 0.9449, "step": 10400 }, { "epoch": 0.01, "learning_rate": 4.954013366933275e-05, "loss": 0.9538, "step": 10500 }, { "epoch": 0.01, "learning_rate": 4.953575106825901e-05, "loss": 0.9735, "step": 10600 }, { "epoch": 0.01, "learning_rate": 4.953136846718528e-05, "loss": 0.9722, "step": 10700 }, { "epoch": 0.01, "learning_rate": 4.952698586611154e-05, "loss": 0.9786, "step": 10800 }, { "epoch": 0.01, "learning_rate": 4.95226032650378e-05, "loss": 0.9305, "step": 10900 }, { "epoch": 0.01, "learning_rate": 4.9518220663964066e-05, "loss": 0.9171, "step": 11000 }, { "epoch": 0.01, "learning_rate": 4.951383806289033e-05, "loss": 0.9424, "step": 11100 }, { "epoch": 0.01, "learning_rate": 4.950945546181659e-05, "loss": 0.9316, "step": 11200 }, { "epoch": 0.01, "learning_rate": 4.950507286074285e-05, "loss": 1.0145, "step": 11300 }, { "epoch": 0.01, "learning_rate": 4.9500690259669115e-05, "loss": 0.9797, "step": 11400 }, { "epoch": 0.01, "learning_rate": 4.949630765859538e-05, "loss": 0.9035, "step": 11500 }, { "epoch": 0.01, "learning_rate": 4.949192505752164e-05, "loss": 0.9201, "step": 11600 }, { "epoch": 0.01, "learning_rate": 4.948754245644791e-05, "loss": 0.9216, "step": 11700 }, { "epoch": 0.01, "learning_rate": 4.9483159855374164e-05, "loss": 0.945, "step": 11800 }, { "epoch": 0.01, "learning_rate": 4.947877725430043e-05, "loss": 0.9036, "step": 11900 }, { "epoch": 0.01, "learning_rate": 4.947439465322669e-05, "loss": 0.9586, "step": 12000 }, { "epoch": 0.01, "learning_rate": 4.9470012052152955e-05, "loss": 0.93, "step": 12100 }, { "epoch": 0.01, "learning_rate": 4.946562945107922e-05, "loss": 0.9111, "step": 12200 }, { "epoch": 0.01, "learning_rate": 4.946124685000548e-05, "loss": 0.9481, "step": 12300 }, { "epoch": 0.01, "learning_rate": 4.945686424893175e-05, "loss": 0.9746, "step": 12400 }, { "epoch": 0.01, "learning_rate": 4.945252547386874e-05, "loss": 0.8887, "step": 12500 }, { "epoch": 0.01, "learning_rate": 4.944814287279501e-05, "loss": 0.8922, "step": 12600 }, { "epoch": 0.01, "learning_rate": 4.944376027172127e-05, "loss": 0.9359, "step": 12700 }, { "epoch": 0.01, "learning_rate": 4.943937767064753e-05, "loss": 0.9479, "step": 12800 }, { "epoch": 0.01, "learning_rate": 4.943499506957379e-05, "loss": 0.8957, "step": 12900 }, { "epoch": 0.01, "learning_rate": 4.943061246850006e-05, "loss": 0.9511, "step": 13000 }, { "epoch": 0.01, "learning_rate": 4.942622986742632e-05, "loss": 0.8991, "step": 13100 }, { "epoch": 0.01, "learning_rate": 4.9421847266352584e-05, "loss": 0.9246, "step": 13200 }, { "epoch": 0.01, "learning_rate": 4.941746466527885e-05, "loss": 0.8767, "step": 13300 }, { "epoch": 0.01, "learning_rate": 4.9413082064205105e-05, "loss": 0.9596, "step": 13400 }, { "epoch": 0.01, "learning_rate": 4.940869946313137e-05, "loss": 0.8822, "step": 13500 }, { "epoch": 0.01, "learning_rate": 4.940431686205763e-05, "loss": 0.9094, "step": 13600 }, { "epoch": 0.01, "learning_rate": 4.9399934260983896e-05, "loss": 0.9085, "step": 13700 }, { "epoch": 0.01, "learning_rate": 4.939555165991016e-05, "loss": 0.891, "step": 13800 }, { "epoch": 0.01, "learning_rate": 4.9391169058836424e-05, "loss": 0.8784, "step": 13900 }, { "epoch": 0.01, "learning_rate": 4.938678645776269e-05, "loss": 0.9138, "step": 14000 }, { "epoch": 0.01, "learning_rate": 4.9382403856688945e-05, "loss": 0.8773, "step": 14100 }, { "epoch": 0.01, "learning_rate": 4.937802125561521e-05, "loss": 0.8547, "step": 14200 }, { "epoch": 0.01, "learning_rate": 4.937363865454147e-05, "loss": 0.9284, "step": 14300 }, { "epoch": 0.01, "learning_rate": 4.936925605346773e-05, "loss": 0.9839, "step": 14400 }, { "epoch": 0.01, "learning_rate": 4.9364873452394e-05, "loss": 0.887, "step": 14500 }, { "epoch": 0.01, "learning_rate": 4.9360490851320264e-05, "loss": 0.9668, "step": 14600 }, { "epoch": 0.01, "learning_rate": 4.935610825024652e-05, "loss": 0.9022, "step": 14700 }, { "epoch": 0.01, "learning_rate": 4.9351725649172785e-05, "loss": 0.8769, "step": 14800 }, { "epoch": 0.01, "learning_rate": 4.934734304809905e-05, "loss": 0.9015, "step": 14900 }, { "epoch": 0.01, "learning_rate": 4.934296044702531e-05, "loss": 0.9319, "step": 15000 }, { "epoch": 0.01, "learning_rate": 4.933857784595157e-05, "loss": 0.8847, "step": 15100 }, { "epoch": 0.01, "learning_rate": 4.933419524487784e-05, "loss": 0.911, "step": 15200 }, { "epoch": 0.01, "learning_rate": 4.9329812643804105e-05, "loss": 0.9238, "step": 15300 }, { "epoch": 0.01, "learning_rate": 4.932543004273036e-05, "loss": 0.8539, "step": 15400 }, { "epoch": 0.01, "learning_rate": 4.9321047441656626e-05, "loss": 0.8901, "step": 15500 }, { "epoch": 0.01, "learning_rate": 4.931666484058289e-05, "loss": 0.8523, "step": 15600 }, { "epoch": 0.01, "learning_rate": 4.931228223950915e-05, "loss": 0.8737, "step": 15700 }, { "epoch": 0.01, "learning_rate": 4.930789963843541e-05, "loss": 0.8563, "step": 15800 }, { "epoch": 0.01, "learning_rate": 4.9303517037361674e-05, "loss": 0.9147, "step": 15900 }, { "epoch": 0.01, "learning_rate": 4.929917826229868e-05, "loss": 0.91, "step": 16000 }, { "epoch": 0.01, "learning_rate": 4.929479566122494e-05, "loss": 0.8723, "step": 16100 }, { "epoch": 0.01, "learning_rate": 4.9290413060151205e-05, "loss": 0.9087, "step": 16200 }, { "epoch": 0.01, "learning_rate": 4.928603045907746e-05, "loss": 0.911, "step": 16300 }, { "epoch": 0.01, "learning_rate": 4.9281647858003726e-05, "loss": 0.87, "step": 16400 }, { "epoch": 0.01, "learning_rate": 4.927726525692999e-05, "loss": 0.878, "step": 16500 }, { "epoch": 0.01, "learning_rate": 4.9272882655856254e-05, "loss": 0.8579, "step": 16600 }, { "epoch": 0.01, "learning_rate": 4.926850005478252e-05, "loss": 0.9406, "step": 16700 }, { "epoch": 0.01, "learning_rate": 4.926411745370878e-05, "loss": 0.9255, "step": 16800 }, { "epoch": 0.01, "learning_rate": 4.9259734852635045e-05, "loss": 0.8492, "step": 16900 }, { "epoch": 0.01, "learning_rate": 4.92553522515613e-05, "loss": 0.8543, "step": 17000 }, { "epoch": 0.01, "learning_rate": 4.9250969650487566e-05, "loss": 0.8901, "step": 17100 }, { "epoch": 0.02, "learning_rate": 4.924658704941383e-05, "loss": 0.8168, "step": 17200 }, { "epoch": 0.02, "learning_rate": 4.924220444834009e-05, "loss": 0.9083, "step": 17300 }, { "epoch": 0.02, "learning_rate": 4.923782184726635e-05, "loss": 0.9301, "step": 17400 }, { "epoch": 0.02, "learning_rate": 4.923343924619262e-05, "loss": 0.8885, "step": 17500 }, { "epoch": 0.02, "learning_rate": 4.922905664511888e-05, "loss": 0.9209, "step": 17600 }, { "epoch": 0.02, "learning_rate": 4.922467404404514e-05, "loss": 0.8404, "step": 17700 }, { "epoch": 0.02, "learning_rate": 4.922029144297141e-05, "loss": 0.9039, "step": 17800 }, { "epoch": 0.02, "learning_rate": 4.921590884189767e-05, "loss": 0.9151, "step": 17900 }, { "epoch": 0.02, "learning_rate": 4.921152624082393e-05, "loss": 0.9139, "step": 18000 }, { "epoch": 0.02, "learning_rate": 4.920714363975019e-05, "loss": 0.9031, "step": 18100 }, { "epoch": 0.02, "learning_rate": 4.920276103867646e-05, "loss": 0.9497, "step": 18200 }, { "epoch": 0.02, "learning_rate": 4.919837843760272e-05, "loss": 0.9277, "step": 18300 }, { "epoch": 0.02, "learning_rate": 4.919399583652898e-05, "loss": 0.8629, "step": 18400 }, { "epoch": 0.02, "learning_rate": 4.918961323545525e-05, "loss": 0.8593, "step": 18500 }, { "epoch": 0.02, "learning_rate": 4.9185230634381504e-05, "loss": 0.9478, "step": 18600 }, { "epoch": 0.02, "learning_rate": 4.918084803330777e-05, "loss": 0.8978, "step": 18700 }, { "epoch": 0.02, "learning_rate": 4.917646543223403e-05, "loss": 0.8154, "step": 18800 }, { "epoch": 0.02, "learning_rate": 4.9172082831160296e-05, "loss": 0.8861, "step": 18900 }, { "epoch": 0.02, "learning_rate": 4.916770023008656e-05, "loss": 0.9065, "step": 19000 }, { "epoch": 0.02, "learning_rate": 4.9163317629012823e-05, "loss": 0.9063, "step": 19100 }, { "epoch": 0.02, "learning_rate": 4.915893502793909e-05, "loss": 0.8666, "step": 19200 }, { "epoch": 0.02, "learning_rate": 4.9154596252876083e-05, "loss": 0.8928, "step": 19300 }, { "epoch": 0.02, "learning_rate": 4.915021365180235e-05, "loss": 0.8607, "step": 19400 }, { "epoch": 0.02, "learning_rate": 4.914583105072861e-05, "loss": 0.8939, "step": 19500 }, { "epoch": 0.02, "learning_rate": 4.914144844965487e-05, "loss": 0.8878, "step": 19600 }, { "epoch": 0.02, "learning_rate": 4.913706584858114e-05, "loss": 0.8956, "step": 19700 }, { "epoch": 0.02, "learning_rate": 4.91326832475074e-05, "loss": 0.886, "step": 19800 }, { "epoch": 0.02, "learning_rate": 4.912830064643366e-05, "loss": 0.8777, "step": 19900 }, { "epoch": 0.02, "learning_rate": 4.9123918045359924e-05, "loss": 0.8847, "step": 20000 }, { "epoch": 0.02, "learning_rate": 4.911953544428619e-05, "loss": 0.8396, "step": 20100 }, { "epoch": 0.02, "learning_rate": 4.9115152843212445e-05, "loss": 0.8237, "step": 20200 }, { "epoch": 0.02, "learning_rate": 4.911077024213871e-05, "loss": 0.8754, "step": 20300 }, { "epoch": 0.02, "learning_rate": 4.910638764106498e-05, "loss": 0.8846, "step": 20400 }, { "epoch": 0.02, "learning_rate": 4.9102005039991236e-05, "loss": 0.8414, "step": 20500 }, { "epoch": 0.02, "learning_rate": 4.90976224389175e-05, "loss": 0.8441, "step": 20600 }, { "epoch": 0.02, "learning_rate": 4.9093239837843764e-05, "loss": 0.8856, "step": 20700 }, { "epoch": 0.02, "learning_rate": 4.908885723677003e-05, "loss": 0.9202, "step": 20800 }, { "epoch": 0.02, "learning_rate": 4.9084474635696285e-05, "loss": 0.8612, "step": 20900 }, { "epoch": 0.02, "learning_rate": 4.908009203462255e-05, "loss": 0.852, "step": 21000 }, { "epoch": 0.02, "learning_rate": 4.907570943354881e-05, "loss": 0.8106, "step": 21100 }, { "epoch": 0.02, "learning_rate": 4.907132683247508e-05, "loss": 0.8366, "step": 21200 }, { "epoch": 0.02, "learning_rate": 4.906694423140134e-05, "loss": 0.9048, "step": 21300 }, { "epoch": 0.02, "learning_rate": 4.9062561630327604e-05, "loss": 0.9017, "step": 21400 }, { "epoch": 0.02, "learning_rate": 4.905817902925386e-05, "loss": 0.8672, "step": 21500 }, { "epoch": 0.02, "learning_rate": 4.9053796428180125e-05, "loss": 0.8904, "step": 21600 }, { "epoch": 0.02, "learning_rate": 4.904941382710639e-05, "loss": 0.8486, "step": 21700 }, { "epoch": 0.02, "learning_rate": 4.904503122603265e-05, "loss": 0.9009, "step": 21800 }, { "epoch": 0.02, "learning_rate": 4.904064862495892e-05, "loss": 0.8664, "step": 21900 }, { "epoch": 0.02, "learning_rate": 4.903626602388518e-05, "loss": 0.9054, "step": 22000 }, { "epoch": 0.02, "learning_rate": 4.9031883422811445e-05, "loss": 0.8422, "step": 22100 }, { "epoch": 0.02, "learning_rate": 4.90275008217377e-05, "loss": 0.8579, "step": 22200 }, { "epoch": 0.02, "learning_rate": 4.9023118220663966e-05, "loss": 0.9171, "step": 22300 }, { "epoch": 0.02, "learning_rate": 4.901873561959023e-05, "loss": 0.863, "step": 22400 }, { "epoch": 0.02, "learning_rate": 4.901435301851649e-05, "loss": 0.876, "step": 22500 }, { "epoch": 0.02, "learning_rate": 4.900997041744275e-05, "loss": 0.8192, "step": 22600 }, { "epoch": 0.02, "learning_rate": 4.900558781636902e-05, "loss": 0.8993, "step": 22700 }, { "epoch": 0.02, "learning_rate": 4.900120521529528e-05, "loss": 0.8771, "step": 22800 }, { "epoch": 0.02, "learning_rate": 4.899682261422154e-05, "loss": 0.8902, "step": 22900 }, { "epoch": 0.02, "learning_rate": 4.8992440013147806e-05, "loss": 0.9054, "step": 23000 }, { "epoch": 0.02, "learning_rate": 4.898805741207407e-05, "loss": 0.8684, "step": 23100 }, { "epoch": 0.02, "learning_rate": 4.898367481100033e-05, "loss": 0.8823, "step": 23200 }, { "epoch": 0.02, "learning_rate": 4.897929220992659e-05, "loss": 0.8566, "step": 23300 }, { "epoch": 0.02, "learning_rate": 4.897490960885286e-05, "loss": 0.856, "step": 23400 }, { "epoch": 0.02, "learning_rate": 4.897052700777912e-05, "loss": 0.9151, "step": 23500 }, { "epoch": 0.02, "learning_rate": 4.896614440670538e-05, "loss": 0.8057, "step": 23600 }, { "epoch": 0.02, "learning_rate": 4.8961805631642386e-05, "loss": 0.8463, "step": 23700 }, { "epoch": 0.02, "learning_rate": 4.895742303056864e-05, "loss": 0.8553, "step": 23800 }, { "epoch": 0.02, "learning_rate": 4.8953040429494907e-05, "loss": 0.8477, "step": 23900 }, { "epoch": 0.02, "learning_rate": 4.894865782842117e-05, "loss": 0.7987, "step": 24000 }, { "epoch": 0.02, "learning_rate": 4.894427522734743e-05, "loss": 0.874, "step": 24100 }, { "epoch": 0.02, "learning_rate": 4.89398926262737e-05, "loss": 0.9251, "step": 24200 }, { "epoch": 0.02, "learning_rate": 4.893551002519996e-05, "loss": 0.8381, "step": 24300 }, { "epoch": 0.02, "learning_rate": 4.893112742412622e-05, "loss": 0.8819, "step": 24400 }, { "epoch": 0.02, "learning_rate": 4.892674482305248e-05, "loss": 0.8195, "step": 24500 }, { "epoch": 0.02, "learning_rate": 4.892236222197875e-05, "loss": 0.8431, "step": 24600 }, { "epoch": 0.02, "learning_rate": 4.891797962090501e-05, "loss": 0.8708, "step": 24700 }, { "epoch": 0.02, "learning_rate": 4.891359701983127e-05, "loss": 0.8316, "step": 24800 }, { "epoch": 0.02, "learning_rate": 4.890921441875754e-05, "loss": 0.8241, "step": 24900 }, { "epoch": 0.02, "learning_rate": 4.89048318176838e-05, "loss": 0.8462, "step": 25000 }, { "epoch": 0.02, "learning_rate": 4.890044921661006e-05, "loss": 0.8586, "step": 25100 }, { "epoch": 0.02, "learning_rate": 4.889606661553632e-05, "loss": 0.8579, "step": 25200 }, { "epoch": 0.02, "learning_rate": 4.889168401446259e-05, "loss": 0.8235, "step": 25300 }, { "epoch": 0.02, "learning_rate": 4.8887301413388844e-05, "loss": 0.8788, "step": 25400 }, { "epoch": 0.02, "learning_rate": 4.888291881231511e-05, "loss": 0.831, "step": 25500 }, { "epoch": 0.02, "learning_rate": 4.887853621124137e-05, "loss": 0.8978, "step": 25600 }, { "epoch": 0.02, "learning_rate": 4.8874153610167636e-05, "loss": 0.8545, "step": 25700 }, { "epoch": 0.02, "learning_rate": 4.88697710090939e-05, "loss": 0.8262, "step": 25800 }, { "epoch": 0.02, "learning_rate": 4.8865388408020164e-05, "loss": 0.826, "step": 25900 }, { "epoch": 0.02, "learning_rate": 4.886100580694643e-05, "loss": 0.8311, "step": 26000 }, { "epoch": 0.02, "learning_rate": 4.8856623205872685e-05, "loss": 0.8451, "step": 26100 }, { "epoch": 0.02, "learning_rate": 4.885224060479895e-05, "loss": 0.8688, "step": 26200 }, { "epoch": 0.02, "learning_rate": 4.884785800372521e-05, "loss": 0.8388, "step": 26300 }, { "epoch": 0.02, "learning_rate": 4.8843475402651476e-05, "loss": 0.8552, "step": 26400 }, { "epoch": 0.02, "learning_rate": 4.883909280157774e-05, "loss": 0.8321, "step": 26500 }, { "epoch": 0.02, "learning_rate": 4.8834710200504004e-05, "loss": 0.8686, "step": 26600 }, { "epoch": 0.02, "learning_rate": 4.883032759943026e-05, "loss": 0.8, "step": 26700 }, { "epoch": 0.02, "learning_rate": 4.8825944998356525e-05, "loss": 0.8083, "step": 26800 }, { "epoch": 0.02, "learning_rate": 4.882156239728279e-05, "loss": 0.8455, "step": 26900 }, { "epoch": 0.02, "learning_rate": 4.881717979620905e-05, "loss": 0.8525, "step": 27000 }, { "epoch": 0.02, "learning_rate": 4.881279719513531e-05, "loss": 0.8326, "step": 27100 }, { "epoch": 0.02, "learning_rate": 4.880841459406158e-05, "loss": 0.8332, "step": 27200 }, { "epoch": 0.02, "learning_rate": 4.8804031992987844e-05, "loss": 0.8877, "step": 27300 }, { "epoch": 0.02, "learning_rate": 4.87996493919141e-05, "loss": 0.807, "step": 27400 }, { "epoch": 0.02, "learning_rate": 4.8795266790840365e-05, "loss": 0.8194, "step": 27500 }, { "epoch": 0.02, "learning_rate": 4.879092801577737e-05, "loss": 0.8663, "step": 27600 }, { "epoch": 0.02, "learning_rate": 4.8786545414703625e-05, "loss": 0.8823, "step": 27700 }, { "epoch": 0.02, "learning_rate": 4.878216281362989e-05, "loss": 0.8301, "step": 27800 }, { "epoch": 0.02, "learning_rate": 4.877778021255616e-05, "loss": 0.9158, "step": 27900 }, { "epoch": 0.02, "learning_rate": 4.877339761148242e-05, "loss": 0.8453, "step": 28000 }, { "epoch": 0.02, "learning_rate": 4.876901501040868e-05, "loss": 0.8206, "step": 28100 }, { "epoch": 0.02, "learning_rate": 4.876467623534568e-05, "loss": 0.8457, "step": 28200 }, { "epoch": 0.02, "learning_rate": 4.876029363427194e-05, "loss": 0.8287, "step": 28300 }, { "epoch": 0.02, "learning_rate": 4.8755911033198205e-05, "loss": 0.8129, "step": 28400 }, { "epoch": 0.02, "learning_rate": 4.875152843212447e-05, "loss": 0.8421, "step": 28500 }, { "epoch": 0.03, "learning_rate": 4.874714583105073e-05, "loss": 0.8434, "step": 28600 }, { "epoch": 0.03, "learning_rate": 4.8742763229976996e-05, "loss": 0.8504, "step": 28700 }, { "epoch": 0.03, "learning_rate": 4.873838062890326e-05, "loss": 0.8047, "step": 28800 }, { "epoch": 0.03, "learning_rate": 4.873399802782952e-05, "loss": 0.8734, "step": 28900 }, { "epoch": 0.03, "learning_rate": 4.872961542675578e-05, "loss": 0.8675, "step": 29000 }, { "epoch": 0.03, "learning_rate": 4.8725232825682045e-05, "loss": 0.8998, "step": 29100 }, { "epoch": 0.03, "learning_rate": 4.87208502246083e-05, "loss": 0.8296, "step": 29200 }, { "epoch": 0.03, "learning_rate": 4.8716467623534566e-05, "loss": 0.8139, "step": 29300 }, { "epoch": 0.03, "learning_rate": 4.871208502246084e-05, "loss": 0.83, "step": 29400 }, { "epoch": 0.03, "learning_rate": 4.8707702421387094e-05, "loss": 0.8226, "step": 29500 }, { "epoch": 0.03, "learning_rate": 4.870331982031336e-05, "loss": 0.7953, "step": 29600 }, { "epoch": 0.03, "learning_rate": 4.869893721923962e-05, "loss": 0.8594, "step": 29700 }, { "epoch": 0.03, "learning_rate": 4.8694554618165885e-05, "loss": 0.8271, "step": 29800 }, { "epoch": 0.03, "learning_rate": 4.869017201709214e-05, "loss": 0.8439, "step": 29900 }, { "epoch": 0.03, "learning_rate": 4.8685789416018406e-05, "loss": 0.8464, "step": 30000 }, { "epoch": 0.03, "learning_rate": 4.868140681494468e-05, "loss": 0.8639, "step": 30100 }, { "epoch": 0.03, "learning_rate": 4.8677024213870934e-05, "loss": 0.7947, "step": 30200 }, { "epoch": 0.03, "learning_rate": 4.86726416127972e-05, "loss": 0.9023, "step": 30300 }, { "epoch": 0.03, "learning_rate": 4.866825901172346e-05, "loss": 0.8229, "step": 30400 }, { "epoch": 0.03, "learning_rate": 4.8663876410649726e-05, "loss": 0.8011, "step": 30500 }, { "epoch": 0.03, "learning_rate": 4.865949380957598e-05, "loss": 0.8158, "step": 30600 }, { "epoch": 0.03, "learning_rate": 4.865511120850225e-05, "loss": 0.8365, "step": 30700 }, { "epoch": 0.03, "learning_rate": 4.865072860742851e-05, "loss": 0.8189, "step": 30800 }, { "epoch": 0.03, "learning_rate": 4.8646346006354774e-05, "loss": 0.8553, "step": 30900 }, { "epoch": 0.03, "learning_rate": 4.864196340528104e-05, "loss": 0.8438, "step": 31000 }, { "epoch": 0.03, "learning_rate": 4.86375808042073e-05, "loss": 0.7937, "step": 31100 }, { "epoch": 0.03, "learning_rate": 4.863319820313356e-05, "loss": 0.852, "step": 31200 }, { "epoch": 0.03, "learning_rate": 4.862881560205982e-05, "loss": 0.8729, "step": 31300 }, { "epoch": 0.03, "learning_rate": 4.862443300098609e-05, "loss": 0.8307, "step": 31400 }, { "epoch": 0.03, "learning_rate": 4.862005039991235e-05, "loss": 0.8071, "step": 31500 }, { "epoch": 0.03, "learning_rate": 4.8615667798838615e-05, "loss": 0.7917, "step": 31600 }, { "epoch": 0.03, "learning_rate": 4.861128519776488e-05, "loss": 0.8596, "step": 31700 }, { "epoch": 0.03, "learning_rate": 4.860690259669114e-05, "loss": 0.7898, "step": 31800 }, { "epoch": 0.03, "learning_rate": 4.86025199956174e-05, "loss": 0.7932, "step": 31900 }, { "epoch": 0.03, "learning_rate": 4.8598137394543664e-05, "loss": 0.8602, "step": 32000 }, { "epoch": 0.03, "learning_rate": 4.859375479346993e-05, "loss": 0.8325, "step": 32100 }, { "epoch": 0.03, "learning_rate": 4.8589372192396185e-05, "loss": 0.8609, "step": 32200 }, { "epoch": 0.03, "learning_rate": 4.8585033417333194e-05, "loss": 0.7698, "step": 32300 }, { "epoch": 0.03, "learning_rate": 4.858065081625945e-05, "loss": 0.8472, "step": 32400 }, { "epoch": 0.03, "learning_rate": 4.8576268215185715e-05, "loss": 0.8362, "step": 32500 }, { "epoch": 0.03, "learning_rate": 4.857188561411198e-05, "loss": 0.8125, "step": 32600 }, { "epoch": 0.03, "learning_rate": 4.856750301303824e-05, "loss": 0.8368, "step": 32700 }, { "epoch": 0.03, "learning_rate": 4.85631204119645e-05, "loss": 0.7988, "step": 32800 }, { "epoch": 0.03, "learning_rate": 4.8558737810890764e-05, "loss": 0.8356, "step": 32900 }, { "epoch": 0.03, "learning_rate": 4.855435520981703e-05, "loss": 0.8606, "step": 33000 }, { "epoch": 0.03, "learning_rate": 4.854997260874329e-05, "loss": 0.7586, "step": 33100 }, { "epoch": 0.03, "learning_rate": 4.8545590007669556e-05, "loss": 0.8198, "step": 33200 }, { "epoch": 0.03, "learning_rate": 4.854120740659582e-05, "loss": 0.8062, "step": 33300 }, { "epoch": 0.03, "learning_rate": 4.8536824805522077e-05, "loss": 0.7724, "step": 33400 }, { "epoch": 0.03, "learning_rate": 4.853244220444834e-05, "loss": 0.833, "step": 33500 }, { "epoch": 0.03, "learning_rate": 4.8528059603374604e-05, "loss": 0.7965, "step": 33600 }, { "epoch": 0.03, "learning_rate": 4.852367700230087e-05, "loss": 0.7809, "step": 33700 }, { "epoch": 0.03, "learning_rate": 4.851929440122713e-05, "loss": 0.7981, "step": 33800 }, { "epoch": 0.03, "learning_rate": 4.8514911800153396e-05, "loss": 0.7943, "step": 33900 }, { "epoch": 0.03, "learning_rate": 4.851052919907966e-05, "loss": 0.7953, "step": 34000 }, { "epoch": 0.03, "learning_rate": 4.850614659800592e-05, "loss": 0.7816, "step": 34100 }, { "epoch": 0.03, "learning_rate": 4.850176399693218e-05, "loss": 0.8317, "step": 34200 }, { "epoch": 0.03, "learning_rate": 4.8497381395858445e-05, "loss": 0.8589, "step": 34300 }, { "epoch": 0.03, "learning_rate": 4.849304262079544e-05, "loss": 0.8085, "step": 34400 }, { "epoch": 0.03, "learning_rate": 4.8488660019721705e-05, "loss": 0.8092, "step": 34500 }, { "epoch": 0.03, "learning_rate": 4.8484277418647975e-05, "loss": 0.8121, "step": 34600 }, { "epoch": 0.03, "learning_rate": 4.847989481757423e-05, "loss": 0.7966, "step": 34700 }, { "epoch": 0.03, "learning_rate": 4.8475512216500496e-05, "loss": 0.8091, "step": 34800 }, { "epoch": 0.03, "learning_rate": 4.847112961542676e-05, "loss": 0.7875, "step": 34900 }, { "epoch": 0.03, "learning_rate": 4.846674701435302e-05, "loss": 0.7973, "step": 35000 }, { "epoch": 0.03, "learning_rate": 4.846236441327928e-05, "loss": 0.8513, "step": 35100 }, { "epoch": 0.03, "learning_rate": 4.8457981812205545e-05, "loss": 0.826, "step": 35200 }, { "epoch": 0.03, "learning_rate": 4.845359921113181e-05, "loss": 0.7278, "step": 35300 }, { "epoch": 0.03, "learning_rate": 4.844921661005807e-05, "loss": 0.7903, "step": 35400 }, { "epoch": 0.03, "learning_rate": 4.8444834008984337e-05, "loss": 0.8301, "step": 35500 }, { "epoch": 0.03, "learning_rate": 4.84404514079106e-05, "loss": 0.7878, "step": 35600 }, { "epoch": 0.03, "learning_rate": 4.843606880683686e-05, "loss": 0.8124, "step": 35700 }, { "epoch": 0.03, "learning_rate": 4.843168620576312e-05, "loss": 0.8015, "step": 35800 }, { "epoch": 0.03, "learning_rate": 4.8427303604689385e-05, "loss": 0.7861, "step": 35900 }, { "epoch": 0.03, "learning_rate": 4.842292100361564e-05, "loss": 0.7964, "step": 36000 }, { "epoch": 0.03, "learning_rate": 4.841853840254191e-05, "loss": 0.8175, "step": 36100 }, { "epoch": 0.03, "learning_rate": 4.841415580146818e-05, "loss": 0.8299, "step": 36200 }, { "epoch": 0.03, "learning_rate": 4.8409773200394434e-05, "loss": 0.8111, "step": 36300 }, { "epoch": 0.03, "learning_rate": 4.84053905993207e-05, "loss": 0.8189, "step": 36400 }, { "epoch": 0.03, "learning_rate": 4.840100799824696e-05, "loss": 0.8433, "step": 36500 }, { "epoch": 0.03, "learning_rate": 4.8396625397173226e-05, "loss": 0.8442, "step": 36600 }, { "epoch": 0.03, "learning_rate": 4.839224279609948e-05, "loss": 0.8015, "step": 36700 }, { "epoch": 0.03, "learning_rate": 4.8387860195025753e-05, "loss": 0.8183, "step": 36800 }, { "epoch": 0.03, "learning_rate": 4.838347759395202e-05, "loss": 0.8216, "step": 36900 }, { "epoch": 0.03, "learning_rate": 4.8379094992878274e-05, "loss": 0.8307, "step": 37000 }, { "epoch": 0.03, "learning_rate": 4.837471239180454e-05, "loss": 0.8289, "step": 37100 }, { "epoch": 0.03, "learning_rate": 4.83703297907308e-05, "loss": 0.7687, "step": 37200 }, { "epoch": 0.03, "learning_rate": 4.8365947189657066e-05, "loss": 0.8037, "step": 37300 }, { "epoch": 0.03, "learning_rate": 4.836156458858332e-05, "loss": 0.8104, "step": 37400 }, { "epoch": 0.03, "learning_rate": 4.835718198750959e-05, "loss": 0.8128, "step": 37500 }, { "epoch": 0.03, "learning_rate": 4.835279938643585e-05, "loss": 0.8136, "step": 37600 }, { "epoch": 0.03, "learning_rate": 4.8348416785362115e-05, "loss": 0.7878, "step": 37700 }, { "epoch": 0.03, "learning_rate": 4.834403418428838e-05, "loss": 0.8161, "step": 37800 }, { "epoch": 0.03, "learning_rate": 4.833965158321464e-05, "loss": 0.7801, "step": 37900 }, { "epoch": 0.03, "learning_rate": 4.83352689821409e-05, "loss": 0.7813, "step": 38000 }, { "epoch": 0.03, "learning_rate": 4.8330886381067163e-05, "loss": 0.7949, "step": 38100 }, { "epoch": 0.03, "learning_rate": 4.8326547606004166e-05, "loss": 0.8184, "step": 38200 }, { "epoch": 0.03, "learning_rate": 4.832216500493043e-05, "loss": 0.7786, "step": 38300 }, { "epoch": 0.03, "learning_rate": 4.8317782403856694e-05, "loss": 0.7931, "step": 38400 }, { "epoch": 0.03, "learning_rate": 4.831339980278296e-05, "loss": 0.8074, "step": 38500 }, { "epoch": 0.03, "learning_rate": 4.8309017201709215e-05, "loss": 0.8203, "step": 38600 }, { "epoch": 0.03, "learning_rate": 4.830463460063548e-05, "loss": 0.8012, "step": 38700 }, { "epoch": 0.03, "learning_rate": 4.830025199956174e-05, "loss": 0.7963, "step": 38800 }, { "epoch": 0.03, "learning_rate": 4.8295869398488e-05, "loss": 0.7476, "step": 38900 }, { "epoch": 0.03, "learning_rate": 4.829148679741427e-05, "loss": 0.7956, "step": 39000 }, { "epoch": 0.03, "learning_rate": 4.8287104196340534e-05, "loss": 0.7766, "step": 39100 }, { "epoch": 0.03, "learning_rate": 4.828272159526679e-05, "loss": 0.7682, "step": 39200 }, { "epoch": 0.03, "learning_rate": 4.8278338994193055e-05, "loss": 0.8261, "step": 39300 }, { "epoch": 0.03, "learning_rate": 4.827395639311932e-05, "loss": 0.8424, "step": 39400 }, { "epoch": 0.03, "learning_rate": 4.826957379204558e-05, "loss": 0.8633, "step": 39500 }, { "epoch": 0.03, "learning_rate": 4.826519119097184e-05, "loss": 0.7965, "step": 39600 }, { "epoch": 0.03, "learning_rate": 4.8260808589898104e-05, "loss": 0.8124, "step": 39700 }, { "epoch": 0.03, "learning_rate": 4.8256425988824375e-05, "loss": 0.8227, "step": 39800 }, { "epoch": 0.03, "learning_rate": 4.825204338775063e-05, "loss": 0.7894, "step": 39900 }, { "epoch": 0.04, "learning_rate": 4.8247660786676896e-05, "loss": 0.7997, "step": 40000 }, { "epoch": 0.04, "learning_rate": 4.824327818560316e-05, "loss": 0.7992, "step": 40100 }, { "epoch": 0.04, "learning_rate": 4.823889558452942e-05, "loss": 0.7851, "step": 40200 }, { "epoch": 0.04, "learning_rate": 4.823451298345568e-05, "loss": 0.7955, "step": 40300 }, { "epoch": 0.04, "learning_rate": 4.8230174208392684e-05, "loss": 0.8048, "step": 40400 }, { "epoch": 0.04, "learning_rate": 4.822579160731895e-05, "loss": 0.7969, "step": 40500 }, { "epoch": 0.04, "learning_rate": 4.822140900624521e-05, "loss": 0.7691, "step": 40600 }, { "epoch": 0.04, "learning_rate": 4.8217026405171475e-05, "loss": 0.7473, "step": 40700 }, { "epoch": 0.04, "learning_rate": 4.821264380409773e-05, "loss": 0.8393, "step": 40800 }, { "epoch": 0.04, "learning_rate": 4.8208261203023996e-05, "loss": 0.8068, "step": 40900 }, { "epoch": 0.04, "learning_rate": 4.820387860195026e-05, "loss": 0.7939, "step": 41000 }, { "epoch": 0.04, "learning_rate": 4.8199496000876524e-05, "loss": 0.7244, "step": 41100 }, { "epoch": 0.04, "learning_rate": 4.819511339980278e-05, "loss": 0.7812, "step": 41200 }, { "epoch": 0.04, "learning_rate": 4.819073079872905e-05, "loss": 0.8116, "step": 41300 }, { "epoch": 0.04, "learning_rate": 4.8186348197655316e-05, "loss": 0.8082, "step": 41400 }, { "epoch": 0.04, "learning_rate": 4.818196559658157e-05, "loss": 0.7542, "step": 41500 }, { "epoch": 0.04, "learning_rate": 4.8177582995507836e-05, "loss": 0.8032, "step": 41600 }, { "epoch": 0.04, "learning_rate": 4.81732003944341e-05, "loss": 0.7832, "step": 41700 }, { "epoch": 0.04, "learning_rate": 4.816881779336036e-05, "loss": 0.8094, "step": 41800 }, { "epoch": 0.04, "learning_rate": 4.816443519228662e-05, "loss": 0.7754, "step": 41900 }, { "epoch": 0.04, "learning_rate": 4.816005259121289e-05, "loss": 0.7744, "step": 42000 }, { "epoch": 0.04, "learning_rate": 4.815566999013915e-05, "loss": 0.7838, "step": 42100 }, { "epoch": 0.04, "learning_rate": 4.815128738906541e-05, "loss": 0.7864, "step": 42200 }, { "epoch": 0.04, "learning_rate": 4.814690478799168e-05, "loss": 0.7357, "step": 42300 }, { "epoch": 0.04, "learning_rate": 4.814252218691794e-05, "loss": 0.8131, "step": 42400 }, { "epoch": 0.04, "learning_rate": 4.81381395858442e-05, "loss": 0.7851, "step": 42500 }, { "epoch": 0.04, "learning_rate": 4.813375698477046e-05, "loss": 0.7482, "step": 42600 }, { "epoch": 0.04, "learning_rate": 4.8129374383696726e-05, "loss": 0.7435, "step": 42700 }, { "epoch": 0.04, "learning_rate": 4.812499178262299e-05, "loss": 0.7796, "step": 42800 }, { "epoch": 0.04, "learning_rate": 4.812060918154925e-05, "loss": 0.7952, "step": 42900 }, { "epoch": 0.04, "learning_rate": 4.811622658047552e-05, "loss": 0.7713, "step": 43000 }, { "epoch": 0.04, "learning_rate": 4.8111843979401774e-05, "loss": 0.7929, "step": 43100 }, { "epoch": 0.04, "learning_rate": 4.810746137832804e-05, "loss": 0.8022, "step": 43200 }, { "epoch": 0.04, "learning_rate": 4.81030787772543e-05, "loss": 0.786, "step": 43300 }, { "epoch": 0.04, "learning_rate": 4.8098696176180566e-05, "loss": 0.8356, "step": 43400 }, { "epoch": 0.04, "learning_rate": 4.809431357510683e-05, "loss": 0.8147, "step": 43500 }, { "epoch": 0.04, "learning_rate": 4.8089930974033094e-05, "loss": 0.7672, "step": 43600 }, { "epoch": 0.04, "learning_rate": 4.808554837295936e-05, "loss": 0.7966, "step": 43700 }, { "epoch": 0.04, "learning_rate": 4.8081165771885615e-05, "loss": 0.7498, "step": 43800 }, { "epoch": 0.04, "learning_rate": 4.807678317081188e-05, "loss": 0.8215, "step": 43900 }, { "epoch": 0.04, "learning_rate": 4.807240056973814e-05, "loss": 0.7742, "step": 44000 }, { "epoch": 0.04, "learning_rate": 4.80680179686644e-05, "loss": 0.7749, "step": 44100 }, { "epoch": 0.04, "learning_rate": 4.806363536759066e-05, "loss": 0.8101, "step": 44200 }, { "epoch": 0.04, "learning_rate": 4.8059252766516934e-05, "loss": 0.781, "step": 44300 }, { "epoch": 0.04, "learning_rate": 4.805487016544319e-05, "loss": 0.7469, "step": 44400 }, { "epoch": 0.04, "learning_rate": 4.8050531390380194e-05, "loss": 0.8202, "step": 44500 }, { "epoch": 0.04, "learning_rate": 4.804614878930646e-05, "loss": 0.7887, "step": 44600 }, { "epoch": 0.04, "learning_rate": 4.8041766188232715e-05, "loss": 0.794, "step": 44700 }, { "epoch": 0.04, "learning_rate": 4.803738358715898e-05, "loss": 0.8125, "step": 44800 }, { "epoch": 0.04, "learning_rate": 4.803300098608524e-05, "loss": 0.7705, "step": 44900 }, { "epoch": 0.04, "learning_rate": 4.8028618385011507e-05, "loss": 0.8103, "step": 45000 }, { "epoch": 0.04, "learning_rate": 4.802423578393777e-05, "loss": 0.7986, "step": 45100 }, { "epoch": 0.04, "learning_rate": 4.8019853182864034e-05, "loss": 0.7652, "step": 45200 }, { "epoch": 0.04, "learning_rate": 4.80154705817903e-05, "loss": 0.7579, "step": 45300 }, { "epoch": 0.04, "learning_rate": 4.8011087980716555e-05, "loss": 0.8024, "step": 45400 }, { "epoch": 0.04, "learning_rate": 4.800670537964282e-05, "loss": 0.7476, "step": 45500 }, { "epoch": 0.04, "learning_rate": 4.800232277856908e-05, "loss": 0.733, "step": 45600 }, { "epoch": 0.04, "learning_rate": 4.799794017749535e-05, "loss": 0.7761, "step": 45700 }, { "epoch": 0.04, "learning_rate": 4.799355757642161e-05, "loss": 0.7532, "step": 45800 }, { "epoch": 0.04, "learning_rate": 4.7989174975347875e-05, "loss": 0.8413, "step": 45900 }, { "epoch": 0.04, "learning_rate": 4.798479237427413e-05, "loss": 0.8088, "step": 46000 }, { "epoch": 0.04, "learning_rate": 4.7980453599211135e-05, "loss": 0.7517, "step": 46100 }, { "epoch": 0.04, "learning_rate": 4.79760709981374e-05, "loss": 0.7889, "step": 46200 }, { "epoch": 0.04, "learning_rate": 4.7971688397063656e-05, "loss": 0.793, "step": 46300 }, { "epoch": 0.04, "learning_rate": 4.796730579598992e-05, "loss": 0.8077, "step": 46400 }, { "epoch": 0.04, "learning_rate": 4.796292319491619e-05, "loss": 0.7563, "step": 46500 }, { "epoch": 0.04, "learning_rate": 4.795854059384245e-05, "loss": 0.7346, "step": 46600 }, { "epoch": 0.04, "learning_rate": 4.795415799276871e-05, "loss": 0.7577, "step": 46700 }, { "epoch": 0.04, "learning_rate": 4.7949775391694975e-05, "loss": 0.7868, "step": 46800 }, { "epoch": 0.04, "learning_rate": 4.794539279062123e-05, "loss": 0.7709, "step": 46900 }, { "epoch": 0.04, "learning_rate": 4.7941010189547496e-05, "loss": 0.7339, "step": 47000 }, { "epoch": 0.04, "learning_rate": 4.793662758847376e-05, "loss": 0.7477, "step": 47100 }, { "epoch": 0.04, "learning_rate": 4.7932244987400024e-05, "loss": 0.7336, "step": 47200 }, { "epoch": 0.04, "learning_rate": 4.792786238632629e-05, "loss": 0.7679, "step": 47300 }, { "epoch": 0.04, "learning_rate": 4.792347978525255e-05, "loss": 0.7354, "step": 47400 }, { "epoch": 0.04, "learning_rate": 4.7919097184178815e-05, "loss": 0.8235, "step": 47500 }, { "epoch": 0.04, "learning_rate": 4.791471458310507e-05, "loss": 0.7937, "step": 47600 }, { "epoch": 0.04, "learning_rate": 4.7910331982031336e-05, "loss": 0.7564, "step": 47700 }, { "epoch": 0.04, "learning_rate": 4.79059493809576e-05, "loss": 0.7613, "step": 47800 }, { "epoch": 0.04, "learning_rate": 4.7901566779883864e-05, "loss": 0.8323, "step": 47900 }, { "epoch": 0.04, "learning_rate": 4.789718417881013e-05, "loss": 0.7595, "step": 48000 }, { "epoch": 0.04, "learning_rate": 4.789280157773639e-05, "loss": 0.7905, "step": 48100 }, { "epoch": 0.04, "learning_rate": 4.788841897666265e-05, "loss": 0.7947, "step": 48200 }, { "epoch": 0.04, "learning_rate": 4.788403637558891e-05, "loss": 0.7528, "step": 48300 }, { "epoch": 0.04, "learning_rate": 4.787965377451518e-05, "loss": 0.7599, "step": 48400 }, { "epoch": 0.04, "learning_rate": 4.787527117344144e-05, "loss": 0.8073, "step": 48500 }, { "epoch": 0.04, "learning_rate": 4.78708885723677e-05, "loss": 0.763, "step": 48600 }, { "epoch": 0.04, "learning_rate": 4.786650597129397e-05, "loss": 0.7329, "step": 48700 }, { "epoch": 0.04, "learning_rate": 4.786212337022023e-05, "loss": 0.7333, "step": 48800 }, { "epoch": 0.04, "learning_rate": 4.785774076914649e-05, "loss": 0.8079, "step": 48900 }, { "epoch": 0.04, "learning_rate": 4.785335816807275e-05, "loss": 0.7613, "step": 49000 }, { "epoch": 0.04, "learning_rate": 4.784897556699902e-05, "loss": 0.7352, "step": 49100 }, { "epoch": 0.04, "learning_rate": 4.784459296592528e-05, "loss": 0.7474, "step": 49200 }, { "epoch": 0.04, "learning_rate": 4.784021036485154e-05, "loss": 0.7597, "step": 49300 }, { "epoch": 0.04, "learning_rate": 4.78358277637778e-05, "loss": 0.751, "step": 49400 }, { "epoch": 0.04, "learning_rate": 4.783144516270407e-05, "loss": 0.7342, "step": 49500 }, { "epoch": 0.04, "learning_rate": 4.782706256163033e-05, "loss": 0.7375, "step": 49600 }, { "epoch": 0.04, "learning_rate": 4.7822679960556594e-05, "loss": 0.7683, "step": 49700 }, { "epoch": 0.04, "learning_rate": 4.781829735948286e-05, "loss": 0.7944, "step": 49800 }, { "epoch": 0.04, "learning_rate": 4.7813914758409114e-05, "loss": 0.7641, "step": 49900 }, { "epoch": 0.04, "learning_rate": 4.780953215733538e-05, "loss": 0.7704, "step": 50000 }, { "epoch": 0.04, "eval_loss": 0.7738358378410339, "eval_runtime": 79510.7981, "eval_samples_per_second": 6.398, "eval_steps_per_second": 3.199, "step": 50000 }, { "epoch": 0.04, "learning_rate": 4.780514955626164e-05, "loss": 0.7621, "step": 50100 }, { "epoch": 0.04, "learning_rate": 4.7800766955187906e-05, "loss": 0.7515, "step": 50200 }, { "epoch": 0.04, "learning_rate": 4.779638435411417e-05, "loss": 0.7785, "step": 50300 }, { "epoch": 0.04, "learning_rate": 4.7792001753040434e-05, "loss": 0.7162, "step": 50400 }, { "epoch": 0.04, "learning_rate": 4.77876191519667e-05, "loss": 0.8159, "step": 50500 }, { "epoch": 0.04, "learning_rate": 4.7783236550892955e-05, "loss": 0.7779, "step": 50600 }, { "epoch": 0.04, "learning_rate": 4.777885394981922e-05, "loss": 0.7708, "step": 50700 }, { "epoch": 0.04, "learning_rate": 4.777447134874548e-05, "loss": 0.7731, "step": 50800 }, { "epoch": 0.04, "learning_rate": 4.777008874767174e-05, "loss": 0.7875, "step": 50900 }, { "epoch": 0.04, "learning_rate": 4.776570614659801e-05, "loss": 0.7395, "step": 51000 }, { "epoch": 0.04, "learning_rate": 4.7761323545524274e-05, "loss": 0.7735, "step": 51100 }, { "epoch": 0.04, "learning_rate": 4.775694094445053e-05, "loss": 0.7586, "step": 51200 }, { "epoch": 0.04, "learning_rate": 4.7752558343376795e-05, "loss": 0.7871, "step": 51300 }, { "epoch": 0.05, "learning_rate": 4.774817574230306e-05, "loss": 0.7513, "step": 51400 }, { "epoch": 0.05, "learning_rate": 4.774379314122932e-05, "loss": 0.7893, "step": 51500 }, { "epoch": 0.05, "learning_rate": 4.773941054015558e-05, "loss": 0.7829, "step": 51600 }, { "epoch": 0.05, "learning_rate": 4.773502793908185e-05, "loss": 0.7594, "step": 51700 }, { "epoch": 0.05, "learning_rate": 4.7730645338008115e-05, "loss": 0.7645, "step": 51800 }, { "epoch": 0.05, "learning_rate": 4.772626273693437e-05, "loss": 0.7781, "step": 51900 }, { "epoch": 0.05, "learning_rate": 4.7721880135860636e-05, "loss": 0.7786, "step": 52000 }, { "epoch": 0.05, "learning_rate": 4.771754136079764e-05, "loss": 0.7585, "step": 52100 }, { "epoch": 0.05, "learning_rate": 4.7713158759723896e-05, "loss": 0.7763, "step": 52200 }, { "epoch": 0.05, "learning_rate": 4.770877615865016e-05, "loss": 0.7818, "step": 52300 }, { "epoch": 0.05, "learning_rate": 4.770443738358716e-05, "loss": 0.7509, "step": 52400 }, { "epoch": 0.05, "learning_rate": 4.7700054782513426e-05, "loss": 0.7659, "step": 52500 }, { "epoch": 0.05, "learning_rate": 4.769567218143969e-05, "loss": 0.7889, "step": 52600 }, { "epoch": 0.05, "learning_rate": 4.769128958036595e-05, "loss": 0.7128, "step": 52700 }, { "epoch": 0.05, "learning_rate": 4.768690697929221e-05, "loss": 0.815, "step": 52800 }, { "epoch": 0.05, "learning_rate": 4.7682524378218475e-05, "loss": 0.7634, "step": 52900 }, { "epoch": 0.05, "learning_rate": 4.767814177714474e-05, "loss": 0.7788, "step": 53000 }, { "epoch": 0.05, "learning_rate": 4.7673759176070996e-05, "loss": 0.7781, "step": 53100 }, { "epoch": 0.05, "learning_rate": 4.7669376574997267e-05, "loss": 0.7423, "step": 53200 }, { "epoch": 0.05, "learning_rate": 4.766499397392353e-05, "loss": 0.733, "step": 53300 }, { "epoch": 0.05, "learning_rate": 4.766061137284979e-05, "loss": 0.7859, "step": 53400 }, { "epoch": 0.05, "learning_rate": 4.765622877177605e-05, "loss": 0.7551, "step": 53500 }, { "epoch": 0.05, "learning_rate": 4.7651846170702315e-05, "loss": 0.8219, "step": 53600 }, { "epoch": 0.05, "learning_rate": 4.764746356962857e-05, "loss": 0.7225, "step": 53700 }, { "epoch": 0.05, "learning_rate": 4.7643080968554836e-05, "loss": 0.7389, "step": 53800 }, { "epoch": 0.05, "learning_rate": 4.763869836748111e-05, "loss": 0.7525, "step": 53900 }, { "epoch": 0.05, "learning_rate": 4.7634315766407364e-05, "loss": 0.8057, "step": 54000 }, { "epoch": 0.05, "learning_rate": 4.762993316533363e-05, "loss": 0.7388, "step": 54100 }, { "epoch": 0.05, "learning_rate": 4.762555056425989e-05, "loss": 0.7811, "step": 54200 }, { "epoch": 0.05, "learning_rate": 4.7621167963186156e-05, "loss": 0.7686, "step": 54300 }, { "epoch": 0.05, "learning_rate": 4.761678536211241e-05, "loss": 0.7874, "step": 54400 }, { "epoch": 0.05, "learning_rate": 4.7612402761038677e-05, "loss": 0.7815, "step": 54500 }, { "epoch": 0.05, "learning_rate": 4.760802015996494e-05, "loss": 0.742, "step": 54600 }, { "epoch": 0.05, "learning_rate": 4.7603637558891204e-05, "loss": 0.7245, "step": 54700 }, { "epoch": 0.05, "learning_rate": 4.759925495781747e-05, "loss": 0.7406, "step": 54800 }, { "epoch": 0.05, "learning_rate": 4.759487235674373e-05, "loss": 0.7995, "step": 54900 }, { "epoch": 0.05, "learning_rate": 4.759048975566999e-05, "loss": 0.7812, "step": 55000 }, { "epoch": 0.05, "learning_rate": 4.758610715459625e-05, "loss": 0.7596, "step": 55100 }, { "epoch": 0.05, "learning_rate": 4.758172455352252e-05, "loss": 0.8012, "step": 55200 }, { "epoch": 0.05, "learning_rate": 4.757734195244878e-05, "loss": 0.7518, "step": 55300 }, { "epoch": 0.05, "learning_rate": 4.7572959351375045e-05, "loss": 0.766, "step": 55400 }, { "epoch": 0.05, "learning_rate": 4.756857675030131e-05, "loss": 0.7832, "step": 55500 }, { "epoch": 0.05, "learning_rate": 4.756419414922757e-05, "loss": 0.8165, "step": 55600 }, { "epoch": 0.05, "learning_rate": 4.755981154815383e-05, "loss": 0.7623, "step": 55700 }, { "epoch": 0.05, "learning_rate": 4.7555428947080093e-05, "loss": 0.766, "step": 55800 }, { "epoch": 0.05, "learning_rate": 4.755104634600636e-05, "loss": 0.7707, "step": 55900 }, { "epoch": 0.05, "learning_rate": 4.754666374493262e-05, "loss": 0.7161, "step": 56000 }, { "epoch": 0.05, "learning_rate": 4.754228114385888e-05, "loss": 0.7701, "step": 56100 }, { "epoch": 0.05, "learning_rate": 4.753789854278515e-05, "loss": 0.8108, "step": 56200 }, { "epoch": 0.05, "learning_rate": 4.753351594171141e-05, "loss": 0.7544, "step": 56300 }, { "epoch": 0.05, "learning_rate": 4.752913334063767e-05, "loss": 0.7288, "step": 56400 }, { "epoch": 0.05, "learning_rate": 4.752479456557467e-05, "loss": 0.7159, "step": 56500 }, { "epoch": 0.05, "learning_rate": 4.752041196450093e-05, "loss": 0.7498, "step": 56600 }, { "epoch": 0.05, "learning_rate": 4.7516029363427194e-05, "loss": 0.7357, "step": 56700 }, { "epoch": 0.05, "learning_rate": 4.751164676235346e-05, "loss": 0.7562, "step": 56800 }, { "epoch": 0.05, "learning_rate": 4.750726416127972e-05, "loss": 0.8008, "step": 56900 }, { "epoch": 0.05, "learning_rate": 4.7502881560205985e-05, "loss": 0.7223, "step": 57000 }, { "epoch": 0.05, "learning_rate": 4.749849895913225e-05, "loss": 0.7899, "step": 57100 }, { "epoch": 0.05, "learning_rate": 4.749411635805851e-05, "loss": 0.758, "step": 57200 }, { "epoch": 0.05, "learning_rate": 4.748973375698477e-05, "loss": 0.7737, "step": 57300 }, { "epoch": 0.05, "learning_rate": 4.7485351155911034e-05, "loss": 0.7913, "step": 57400 }, { "epoch": 0.05, "learning_rate": 4.74809685548373e-05, "loss": 0.7597, "step": 57500 }, { "epoch": 0.05, "learning_rate": 4.7476585953763555e-05, "loss": 0.7742, "step": 57600 }, { "epoch": 0.05, "learning_rate": 4.7472247178700565e-05, "loss": 0.7813, "step": 57700 }, { "epoch": 0.05, "learning_rate": 4.746786457762682e-05, "loss": 0.742, "step": 57800 }, { "epoch": 0.05, "learning_rate": 4.7463481976553086e-05, "loss": 0.7631, "step": 57900 }, { "epoch": 0.05, "learning_rate": 4.745909937547935e-05, "loss": 0.7224, "step": 58000 }, { "epoch": 0.05, "learning_rate": 4.7454716774405614e-05, "loss": 0.7356, "step": 58100 }, { "epoch": 0.05, "learning_rate": 4.745033417333187e-05, "loss": 0.7337, "step": 58200 }, { "epoch": 0.05, "learning_rate": 4.7445951572258134e-05, "loss": 0.7632, "step": 58300 }, { "epoch": 0.05, "learning_rate": 4.7441568971184405e-05, "loss": 0.7116, "step": 58400 }, { "epoch": 0.05, "learning_rate": 4.743718637011066e-05, "loss": 0.8256, "step": 58500 }, { "epoch": 0.05, "learning_rate": 4.7432803769036926e-05, "loss": 0.74, "step": 58600 }, { "epoch": 0.05, "learning_rate": 4.742842116796319e-05, "loss": 0.7193, "step": 58700 }, { "epoch": 0.05, "learning_rate": 4.7424038566889454e-05, "loss": 0.6876, "step": 58800 }, { "epoch": 0.05, "learning_rate": 4.741965596581571e-05, "loss": 0.752, "step": 58900 }, { "epoch": 0.05, "learning_rate": 4.7415273364741975e-05, "loss": 0.7214, "step": 59000 }, { "epoch": 0.05, "learning_rate": 4.741089076366824e-05, "loss": 0.7663, "step": 59100 }, { "epoch": 0.05, "learning_rate": 4.74065081625945e-05, "loss": 0.6977, "step": 59200 }, { "epoch": 0.05, "learning_rate": 4.7402125561520766e-05, "loss": 0.7689, "step": 59300 }, { "epoch": 0.05, "learning_rate": 4.739774296044703e-05, "loss": 0.7537, "step": 59400 }, { "epoch": 0.05, "learning_rate": 4.739336035937329e-05, "loss": 0.7406, "step": 59500 }, { "epoch": 0.05, "learning_rate": 4.738897775829955e-05, "loss": 0.7167, "step": 59600 }, { "epoch": 0.05, "learning_rate": 4.7384595157225815e-05, "loss": 0.7397, "step": 59700 }, { "epoch": 0.05, "learning_rate": 4.738021255615208e-05, "loss": 0.7276, "step": 59800 }, { "epoch": 0.05, "learning_rate": 4.737582995507834e-05, "loss": 0.7912, "step": 59900 }, { "epoch": 0.05, "learning_rate": 4.737144735400461e-05, "loss": 0.7575, "step": 60000 }, { "epoch": 0.05, "learning_rate": 4.736706475293087e-05, "loss": 0.8229, "step": 60100 }, { "epoch": 0.05, "learning_rate": 4.736268215185713e-05, "loss": 0.789, "step": 60200 }, { "epoch": 0.05, "learning_rate": 4.735834337679413e-05, "loss": 0.7348, "step": 60300 }, { "epoch": 0.05, "learning_rate": 4.735396077572039e-05, "loss": 0.7947, "step": 60400 }, { "epoch": 0.05, "learning_rate": 4.734957817464665e-05, "loss": 0.7363, "step": 60500 }, { "epoch": 0.05, "learning_rate": 4.734519557357292e-05, "loss": 0.7129, "step": 60600 }, { "epoch": 0.05, "learning_rate": 4.734081297249918e-05, "loss": 0.7337, "step": 60700 }, { "epoch": 0.05, "learning_rate": 4.733643037142544e-05, "loss": 0.7345, "step": 60800 }, { "epoch": 0.05, "learning_rate": 4.733204777035171e-05, "loss": 0.683, "step": 60900 }, { "epoch": 0.05, "learning_rate": 4.732766516927797e-05, "loss": 0.7007, "step": 61000 }, { "epoch": 0.05, "learning_rate": 4.732328256820423e-05, "loss": 0.7743, "step": 61100 }, { "epoch": 0.05, "learning_rate": 4.731889996713049e-05, "loss": 0.7295, "step": 61200 }, { "epoch": 0.05, "learning_rate": 4.7314517366056756e-05, "loss": 0.745, "step": 61300 }, { "epoch": 0.05, "learning_rate": 4.731013476498302e-05, "loss": 0.7281, "step": 61400 }, { "epoch": 0.05, "learning_rate": 4.7305752163909284e-05, "loss": 0.7476, "step": 61500 }, { "epoch": 0.05, "learning_rate": 4.730136956283555e-05, "loss": 0.7481, "step": 61600 }, { "epoch": 0.05, "learning_rate": 4.7296986961761805e-05, "loss": 0.757, "step": 61700 }, { "epoch": 0.05, "learning_rate": 4.729260436068807e-05, "loss": 0.6987, "step": 61800 }, { "epoch": 0.05, "learning_rate": 4.728822175961433e-05, "loss": 0.7239, "step": 61900 }, { "epoch": 0.05, "learning_rate": 4.7283839158540596e-05, "loss": 0.7217, "step": 62000 }, { "epoch": 0.05, "learning_rate": 4.727945655746686e-05, "loss": 0.7273, "step": 62100 }, { "epoch": 0.05, "learning_rate": 4.7275073956393124e-05, "loss": 0.7117, "step": 62200 }, { "epoch": 0.05, "learning_rate": 4.727069135531939e-05, "loss": 0.755, "step": 62300 }, { "epoch": 0.05, "learning_rate": 4.7266308754245645e-05, "loss": 0.7831, "step": 62400 }, { "epoch": 0.05, "learning_rate": 4.726192615317191e-05, "loss": 0.7663, "step": 62500 }, { "epoch": 0.05, "learning_rate": 4.725758737810891e-05, "loss": 0.7515, "step": 62600 }, { "epoch": 0.05, "learning_rate": 4.725320477703517e-05, "loss": 0.7329, "step": 62700 }, { "epoch": 0.06, "learning_rate": 4.724882217596144e-05, "loss": 0.7341, "step": 62800 }, { "epoch": 0.06, "learning_rate": 4.72444395748877e-05, "loss": 0.7197, "step": 62900 }, { "epoch": 0.06, "learning_rate": 4.724005697381396e-05, "loss": 0.7218, "step": 63000 }, { "epoch": 0.06, "learning_rate": 4.7235674372740224e-05, "loss": 0.7251, "step": 63100 }, { "epoch": 0.06, "learning_rate": 4.723129177166649e-05, "loss": 0.7748, "step": 63200 }, { "epoch": 0.06, "learning_rate": 4.7226909170592745e-05, "loss": 0.7033, "step": 63300 }, { "epoch": 0.06, "learning_rate": 4.722252656951901e-05, "loss": 0.7744, "step": 63400 }, { "epoch": 0.06, "learning_rate": 4.721814396844527e-05, "loss": 0.7845, "step": 63500 }, { "epoch": 0.06, "learning_rate": 4.721376136737154e-05, "loss": 0.7196, "step": 63600 }, { "epoch": 0.06, "learning_rate": 4.72093787662978e-05, "loss": 0.7127, "step": 63700 }, { "epoch": 0.06, "learning_rate": 4.7204996165224065e-05, "loss": 0.7302, "step": 63800 }, { "epoch": 0.06, "learning_rate": 4.720065739016106e-05, "loss": 0.7455, "step": 63900 }, { "epoch": 0.06, "learning_rate": 4.7196274789087325e-05, "loss": 0.7602, "step": 64000 }, { "epoch": 0.06, "learning_rate": 4.719189218801359e-05, "loss": 0.7566, "step": 64100 }, { "epoch": 0.06, "learning_rate": 4.7187509586939846e-05, "loss": 0.7339, "step": 64200 }, { "epoch": 0.06, "learning_rate": 4.7183126985866116e-05, "loss": 0.8176, "step": 64300 }, { "epoch": 0.06, "learning_rate": 4.717874438479238e-05, "loss": 0.7827, "step": 64400 }, { "epoch": 0.06, "learning_rate": 4.717436178371864e-05, "loss": 0.7373, "step": 64500 }, { "epoch": 0.06, "learning_rate": 4.71699791826449e-05, "loss": 0.7771, "step": 64600 }, { "epoch": 0.06, "learning_rate": 4.7165596581571165e-05, "loss": 0.7641, "step": 64700 }, { "epoch": 0.06, "learning_rate": 4.716121398049743e-05, "loss": 0.7342, "step": 64800 }, { "epoch": 0.06, "learning_rate": 4.7156831379423686e-05, "loss": 0.6829, "step": 64900 }, { "epoch": 0.06, "learning_rate": 4.715244877834995e-05, "loss": 0.6875, "step": 65000 }, { "epoch": 0.06, "learning_rate": 4.714806617727622e-05, "loss": 0.721, "step": 65100 }, { "epoch": 0.06, "learning_rate": 4.714368357620248e-05, "loss": 0.7646, "step": 65200 }, { "epoch": 0.06, "learning_rate": 4.713930097512874e-05, "loss": 0.7395, "step": 65300 }, { "epoch": 0.06, "learning_rate": 4.7134918374055005e-05, "loss": 0.6838, "step": 65400 }, { "epoch": 0.06, "learning_rate": 4.713053577298126e-05, "loss": 0.696, "step": 65500 }, { "epoch": 0.06, "learning_rate": 4.7126153171907526e-05, "loss": 0.7509, "step": 65600 }, { "epoch": 0.06, "learning_rate": 4.712177057083379e-05, "loss": 0.7076, "step": 65700 }, { "epoch": 0.06, "learning_rate": 4.7117387969760054e-05, "loss": 0.721, "step": 65800 }, { "epoch": 0.06, "learning_rate": 4.711300536868632e-05, "loss": 0.7331, "step": 65900 }, { "epoch": 0.06, "learning_rate": 4.710862276761258e-05, "loss": 0.7376, "step": 66000 }, { "epoch": 0.06, "learning_rate": 4.7104240166538846e-05, "loss": 0.7188, "step": 66100 }, { "epoch": 0.06, "learning_rate": 4.70998575654651e-05, "loss": 0.7657, "step": 66200 }, { "epoch": 0.06, "learning_rate": 4.709547496439137e-05, "loss": 0.6871, "step": 66300 }, { "epoch": 0.06, "learning_rate": 4.709109236331763e-05, "loss": 0.7189, "step": 66400 }, { "epoch": 0.06, "learning_rate": 4.7086709762243894e-05, "loss": 0.7227, "step": 66500 }, { "epoch": 0.06, "learning_rate": 4.708232716117016e-05, "loss": 0.7571, "step": 66600 }, { "epoch": 0.06, "learning_rate": 4.707794456009642e-05, "loss": 0.7342, "step": 66700 }, { "epoch": 0.06, "learning_rate": 4.7073561959022686e-05, "loss": 0.7506, "step": 66800 }, { "epoch": 0.06, "learning_rate": 4.706917935794894e-05, "loss": 0.8274, "step": 66900 }, { "epoch": 0.06, "learning_rate": 4.706479675687521e-05, "loss": 0.7701, "step": 67000 }, { "epoch": 0.06, "learning_rate": 4.706041415580147e-05, "loss": 0.7761, "step": 67100 }, { "epoch": 0.06, "learning_rate": 4.705603155472773e-05, "loss": 0.8041, "step": 67200 }, { "epoch": 0.06, "learning_rate": 4.7051648953654e-05, "loss": 0.7401, "step": 67300 }, { "epoch": 0.06, "learning_rate": 4.704726635258026e-05, "loss": 0.7756, "step": 67400 }, { "epoch": 0.06, "learning_rate": 4.704288375150652e-05, "loss": 0.7213, "step": 67500 }, { "epoch": 0.06, "learning_rate": 4.7038501150432784e-05, "loss": 0.7411, "step": 67600 }, { "epoch": 0.06, "learning_rate": 4.703411854935905e-05, "loss": 0.8019, "step": 67700 }, { "epoch": 0.06, "learning_rate": 4.702973594828531e-05, "loss": 0.7075, "step": 67800 }, { "epoch": 0.06, "learning_rate": 4.702535334721157e-05, "loss": 0.7681, "step": 67900 }, { "epoch": 0.06, "learning_rate": 4.702097074613783e-05, "loss": 0.7269, "step": 68000 }, { "epoch": 0.06, "learning_rate": 4.70165881450641e-05, "loss": 0.7258, "step": 68100 }, { "epoch": 0.06, "learning_rate": 4.701220554399036e-05, "loss": 0.7877, "step": 68200 }, { "epoch": 0.06, "learning_rate": 4.7007822942916624e-05, "loss": 0.7218, "step": 68300 }, { "epoch": 0.06, "learning_rate": 4.700344034184289e-05, "loss": 0.7208, "step": 68400 }, { "epoch": 0.06, "learning_rate": 4.6999057740769145e-05, "loss": 0.7071, "step": 68500 }, { "epoch": 0.06, "learning_rate": 4.699467513969541e-05, "loss": 0.7576, "step": 68600 }, { "epoch": 0.06, "learning_rate": 4.699029253862167e-05, "loss": 0.7345, "step": 68700 }, { "epoch": 0.06, "learning_rate": 4.6985909937547936e-05, "loss": 0.7571, "step": 68800 }, { "epoch": 0.06, "learning_rate": 4.69815273364742e-05, "loss": 0.7324, "step": 68900 }, { "epoch": 0.06, "learning_rate": 4.6977144735400464e-05, "loss": 0.6766, "step": 69000 }, { "epoch": 0.06, "learning_rate": 4.697276213432673e-05, "loss": 0.7446, "step": 69100 }, { "epoch": 0.06, "learning_rate": 4.6968379533252985e-05, "loss": 0.7312, "step": 69200 }, { "epoch": 0.06, "learning_rate": 4.696399693217925e-05, "loss": 0.772, "step": 69300 }, { "epoch": 0.06, "learning_rate": 4.695961433110551e-05, "loss": 0.72, "step": 69400 }, { "epoch": 0.06, "learning_rate": 4.695523173003178e-05, "loss": 0.7472, "step": 69500 }, { "epoch": 0.06, "learning_rate": 4.695084912895804e-05, "loss": 0.7464, "step": 69600 }, { "epoch": 0.06, "learning_rate": 4.6946466527884305e-05, "loss": 0.7257, "step": 69700 }, { "epoch": 0.06, "learning_rate": 4.694208392681056e-05, "loss": 0.7017, "step": 69800 }, { "epoch": 0.06, "learning_rate": 4.6937701325736826e-05, "loss": 0.6693, "step": 69900 }, { "epoch": 0.06, "learning_rate": 4.693331872466309e-05, "loss": 0.7577, "step": 70000 }, { "epoch": 0.06, "learning_rate": 4.692893612358935e-05, "loss": 0.808, "step": 70100 }, { "epoch": 0.06, "learning_rate": 4.692459734852635e-05, "loss": 0.7028, "step": 70200 }, { "epoch": 0.06, "learning_rate": 4.692021474745262e-05, "loss": 0.7183, "step": 70300 }, { "epoch": 0.06, "learning_rate": 4.691583214637888e-05, "loss": 0.721, "step": 70400 }, { "epoch": 0.06, "learning_rate": 4.691144954530514e-05, "loss": 0.7186, "step": 70500 }, { "epoch": 0.06, "learning_rate": 4.6907066944231405e-05, "loss": 0.7153, "step": 70600 }, { "epoch": 0.06, "learning_rate": 4.690268434315767e-05, "loss": 0.754, "step": 70700 }, { "epoch": 0.06, "learning_rate": 4.6898301742083926e-05, "loss": 0.7444, "step": 70800 }, { "epoch": 0.06, "learning_rate": 4.689391914101019e-05, "loss": 0.7054, "step": 70900 }, { "epoch": 0.06, "learning_rate": 4.688953653993646e-05, "loss": 0.7525, "step": 71000 }, { "epoch": 0.06, "learning_rate": 4.688515393886272e-05, "loss": 0.7603, "step": 71100 }, { "epoch": 0.06, "learning_rate": 4.688077133778898e-05, "loss": 0.7066, "step": 71200 }, { "epoch": 0.06, "learning_rate": 4.6876388736715245e-05, "loss": 0.7383, "step": 71300 }, { "epoch": 0.06, "learning_rate": 4.68720061356415e-05, "loss": 0.7191, "step": 71400 }, { "epoch": 0.06, "learning_rate": 4.6867623534567766e-05, "loss": 0.7293, "step": 71500 }, { "epoch": 0.06, "learning_rate": 4.686324093349403e-05, "loss": 0.7277, "step": 71600 }, { "epoch": 0.06, "learning_rate": 4.6858858332420294e-05, "loss": 0.7192, "step": 71700 }, { "epoch": 0.06, "learning_rate": 4.685447573134656e-05, "loss": 0.6965, "step": 71800 }, { "epoch": 0.06, "learning_rate": 4.685009313027282e-05, "loss": 0.7534, "step": 71900 }, { "epoch": 0.06, "learning_rate": 4.6845710529199086e-05, "loss": 0.7258, "step": 72000 }, { "epoch": 0.06, "learning_rate": 4.684132792812534e-05, "loss": 0.6781, "step": 72100 }, { "epoch": 0.06, "learning_rate": 4.6836945327051607e-05, "loss": 0.7204, "step": 72200 }, { "epoch": 0.06, "learning_rate": 4.68326065519886e-05, "loss": 0.7178, "step": 72300 }, { "epoch": 0.06, "learning_rate": 4.6828223950914867e-05, "loss": 0.6867, "step": 72400 }, { "epoch": 0.06, "learning_rate": 4.682384134984114e-05, "loss": 0.7365, "step": 72500 }, { "epoch": 0.06, "learning_rate": 4.6819458748767394e-05, "loss": 0.7376, "step": 72600 }, { "epoch": 0.06, "learning_rate": 4.681507614769366e-05, "loss": 0.7513, "step": 72700 }, { "epoch": 0.06, "learning_rate": 4.681069354661992e-05, "loss": 0.6767, "step": 72800 }, { "epoch": 0.06, "learning_rate": 4.680635477155692e-05, "loss": 0.7045, "step": 72900 }, { "epoch": 0.06, "learning_rate": 4.680197217048318e-05, "loss": 0.7005, "step": 73000 }, { "epoch": 0.06, "learning_rate": 4.6797589569409446e-05, "loss": 0.6519, "step": 73100 }, { "epoch": 0.06, "learning_rate": 4.679320696833571e-05, "loss": 0.7094, "step": 73200 }, { "epoch": 0.06, "learning_rate": 4.6788824367261974e-05, "loss": 0.7142, "step": 73300 }, { "epoch": 0.06, "learning_rate": 4.678444176618824e-05, "loss": 0.6942, "step": 73400 }, { "epoch": 0.06, "learning_rate": 4.67800591651145e-05, "loss": 0.6909, "step": 73500 }, { "epoch": 0.06, "learning_rate": 4.677567656404076e-05, "loss": 0.7346, "step": 73600 }, { "epoch": 0.06, "learning_rate": 4.677129396296702e-05, "loss": 0.7068, "step": 73700 }, { "epoch": 0.06, "learning_rate": 4.6766911361893286e-05, "loss": 0.7227, "step": 73800 }, { "epoch": 0.06, "learning_rate": 4.6762528760819543e-05, "loss": 0.7196, "step": 73900 }, { "epoch": 0.06, "learning_rate": 4.6758146159745814e-05, "loss": 0.7207, "step": 74000 }, { "epoch": 0.06, "learning_rate": 4.675376355867208e-05, "loss": 0.7757, "step": 74100 }, { "epoch": 0.07, "learning_rate": 4.6749380957598335e-05, "loss": 0.7319, "step": 74200 }, { "epoch": 0.07, "learning_rate": 4.67449983565246e-05, "loss": 0.7092, "step": 74300 }, { "epoch": 0.07, "learning_rate": 4.674061575545086e-05, "loss": 0.7526, "step": 74400 }, { "epoch": 0.07, "learning_rate": 4.673623315437713e-05, "loss": 0.7176, "step": 74500 }, { "epoch": 0.07, "learning_rate": 4.6731850553303384e-05, "loss": 0.726, "step": 74600 }, { "epoch": 0.07, "learning_rate": 4.6727467952229654e-05, "loss": 0.68, "step": 74700 }, { "epoch": 0.07, "learning_rate": 4.672308535115592e-05, "loss": 0.7784, "step": 74800 }, { "epoch": 0.07, "learning_rate": 4.6718746576092914e-05, "loss": 0.7321, "step": 74900 }, { "epoch": 0.07, "learning_rate": 4.671436397501918e-05, "loss": 0.7704, "step": 75000 }, { "epoch": 0.07, "learning_rate": 4.6709981373945435e-05, "loss": 0.7464, "step": 75100 }, { "epoch": 0.07, "learning_rate": 4.67055987728717e-05, "loss": 0.7443, "step": 75200 }, { "epoch": 0.07, "learning_rate": 4.670121617179796e-05, "loss": 0.7081, "step": 75300 }, { "epoch": 0.07, "learning_rate": 4.669683357072423e-05, "loss": 0.7639, "step": 75400 }, { "epoch": 0.07, "learning_rate": 4.669245096965049e-05, "loss": 0.7396, "step": 75500 }, { "epoch": 0.07, "learning_rate": 4.6688068368576755e-05, "loss": 0.6984, "step": 75600 }, { "epoch": 0.07, "learning_rate": 4.668368576750302e-05, "loss": 0.6922, "step": 75700 }, { "epoch": 0.07, "learning_rate": 4.6679303166429276e-05, "loss": 0.6644, "step": 75800 }, { "epoch": 0.07, "learning_rate": 4.667492056535554e-05, "loss": 0.7566, "step": 75900 }, { "epoch": 0.07, "learning_rate": 4.6670537964281803e-05, "loss": 0.6941, "step": 76000 }, { "epoch": 0.07, "learning_rate": 4.666615536320806e-05, "loss": 0.7252, "step": 76100 }, { "epoch": 0.07, "learning_rate": 4.666177276213433e-05, "loss": 0.7189, "step": 76200 }, { "epoch": 0.07, "learning_rate": 4.6657390161060595e-05, "loss": 0.7131, "step": 76300 }, { "epoch": 0.07, "learning_rate": 4.665300755998685e-05, "loss": 0.6844, "step": 76400 }, { "epoch": 0.07, "learning_rate": 4.6648624958913116e-05, "loss": 0.7517, "step": 76500 }, { "epoch": 0.07, "learning_rate": 4.664424235783938e-05, "loss": 0.7278, "step": 76600 }, { "epoch": 0.07, "learning_rate": 4.6639859756765644e-05, "loss": 0.7431, "step": 76700 }, { "epoch": 0.07, "learning_rate": 4.66354771556919e-05, "loss": 0.7321, "step": 76800 }, { "epoch": 0.07, "learning_rate": 4.6631094554618165e-05, "loss": 0.6624, "step": 76900 }, { "epoch": 0.07, "learning_rate": 4.6626711953544435e-05, "loss": 0.7143, "step": 77000 }, { "epoch": 0.07, "learning_rate": 4.662232935247069e-05, "loss": 0.7634, "step": 77100 }, { "epoch": 0.07, "learning_rate": 4.6617946751396956e-05, "loss": 0.6772, "step": 77200 }, { "epoch": 0.07, "learning_rate": 4.661356415032322e-05, "loss": 0.7267, "step": 77300 }, { "epoch": 0.07, "learning_rate": 4.6609181549249484e-05, "loss": 0.7375, "step": 77400 }, { "epoch": 0.07, "learning_rate": 4.660479894817574e-05, "loss": 0.711, "step": 77500 }, { "epoch": 0.07, "learning_rate": 4.6600416347102005e-05, "loss": 0.7006, "step": 77600 }, { "epoch": 0.07, "learning_rate": 4.6596033746028276e-05, "loss": 0.7981, "step": 77700 }, { "epoch": 0.07, "learning_rate": 4.659165114495453e-05, "loss": 0.7234, "step": 77800 }, { "epoch": 0.07, "learning_rate": 4.65872685438808e-05, "loss": 0.7595, "step": 77900 }, { "epoch": 0.07, "learning_rate": 4.658288594280706e-05, "loss": 0.7176, "step": 78000 }, { "epoch": 0.07, "learning_rate": 4.657850334173332e-05, "loss": 0.7093, "step": 78100 }, { "epoch": 0.07, "learning_rate": 4.657412074065958e-05, "loss": 0.7116, "step": 78200 }, { "epoch": 0.07, "learning_rate": 4.6569738139585845e-05, "loss": 0.7516, "step": 78300 }, { "epoch": 0.07, "learning_rate": 4.656535553851211e-05, "loss": 0.7237, "step": 78400 }, { "epoch": 0.07, "learning_rate": 4.656097293743837e-05, "loss": 0.7441, "step": 78500 }, { "epoch": 0.07, "learning_rate": 4.655659033636464e-05, "loss": 0.7624, "step": 78600 }, { "epoch": 0.07, "learning_rate": 4.65522077352909e-05, "loss": 0.7424, "step": 78700 }, { "epoch": 0.07, "learning_rate": 4.654782513421716e-05, "loss": 0.7186, "step": 78800 }, { "epoch": 0.07, "learning_rate": 4.654344253314342e-05, "loss": 0.7447, "step": 78900 }, { "epoch": 0.07, "learning_rate": 4.6539059932069686e-05, "loss": 0.7018, "step": 79000 }, { "epoch": 0.07, "learning_rate": 4.653472115700668e-05, "loss": 0.7174, "step": 79100 }, { "epoch": 0.07, "learning_rate": 4.653033855593295e-05, "loss": 0.7036, "step": 79200 }, { "epoch": 0.07, "learning_rate": 4.652595595485921e-05, "loss": 0.7419, "step": 79300 }, { "epoch": 0.07, "learning_rate": 4.6521573353785474e-05, "loss": 0.6937, "step": 79400 }, { "epoch": 0.07, "learning_rate": 4.651719075271174e-05, "loss": 0.7182, "step": 79500 }, { "epoch": 0.07, "learning_rate": 4.6512808151638e-05, "loss": 0.7337, "step": 79600 }, { "epoch": 0.07, "learning_rate": 4.650842555056426e-05, "loss": 0.7442, "step": 79700 }, { "epoch": 0.07, "learning_rate": 4.650404294949052e-05, "loss": 0.6751, "step": 79800 }, { "epoch": 0.07, "learning_rate": 4.649966034841679e-05, "loss": 0.7122, "step": 79900 }, { "epoch": 0.07, "learning_rate": 4.649527774734305e-05, "loss": 0.7274, "step": 80000 }, { "epoch": 0.07, "learning_rate": 4.6490895146269314e-05, "loss": 0.7567, "step": 80100 }, { "epoch": 0.07, "learning_rate": 4.648651254519558e-05, "loss": 0.722, "step": 80200 }, { "epoch": 0.07, "learning_rate": 4.6482129944121835e-05, "loss": 0.7685, "step": 80300 }, { "epoch": 0.07, "learning_rate": 4.64777473430481e-05, "loss": 0.6959, "step": 80400 }, { "epoch": 0.07, "learning_rate": 4.647336474197436e-05, "loss": 0.7012, "step": 80500 }, { "epoch": 0.07, "learning_rate": 4.6468982140900627e-05, "loss": 0.6581, "step": 80600 }, { "epoch": 0.07, "learning_rate": 4.646459953982689e-05, "loss": 0.7807, "step": 80700 }, { "epoch": 0.07, "learning_rate": 4.6460216938753154e-05, "loss": 0.6778, "step": 80800 }, { "epoch": 0.07, "learning_rate": 4.645583433767942e-05, "loss": 0.6772, "step": 80900 }, { "epoch": 0.07, "learning_rate": 4.6451451736605675e-05, "loss": 0.7037, "step": 81000 }, { "epoch": 0.07, "learning_rate": 4.644706913553194e-05, "loss": 0.7486, "step": 81100 }, { "epoch": 0.07, "learning_rate": 4.64426865344582e-05, "loss": 0.7091, "step": 81200 }, { "epoch": 0.07, "learning_rate": 4.643830393338447e-05, "loss": 0.7258, "step": 81300 }, { "epoch": 0.07, "learning_rate": 4.643392133231073e-05, "loss": 0.714, "step": 81400 }, { "epoch": 0.07, "learning_rate": 4.6429538731236995e-05, "loss": 0.7957, "step": 81500 }, { "epoch": 0.07, "learning_rate": 4.642515613016326e-05, "loss": 0.6991, "step": 81600 }, { "epoch": 0.07, "learning_rate": 4.6420773529089516e-05, "loss": 0.7377, "step": 81700 }, { "epoch": 0.07, "learning_rate": 4.641639092801578e-05, "loss": 0.7104, "step": 81800 }, { "epoch": 0.07, "learning_rate": 4.641200832694204e-05, "loss": 0.6876, "step": 81900 }, { "epoch": 0.07, "learning_rate": 4.64076257258683e-05, "loss": 0.6625, "step": 82000 }, { "epoch": 0.07, "learning_rate": 4.64032869508053e-05, "loss": 0.6925, "step": 82100 }, { "epoch": 0.07, "learning_rate": 4.639890434973157e-05, "loss": 0.7097, "step": 82200 }, { "epoch": 0.07, "learning_rate": 4.639452174865783e-05, "loss": 0.7141, "step": 82300 }, { "epoch": 0.07, "learning_rate": 4.6390139147584095e-05, "loss": 0.7273, "step": 82400 }, { "epoch": 0.07, "learning_rate": 4.638575654651036e-05, "loss": 0.7188, "step": 82500 }, { "epoch": 0.07, "learning_rate": 4.6381373945436616e-05, "loss": 0.7167, "step": 82600 }, { "epoch": 0.07, "learning_rate": 4.637699134436288e-05, "loss": 0.6817, "step": 82700 }, { "epoch": 0.07, "learning_rate": 4.6372608743289144e-05, "loss": 0.6985, "step": 82800 }, { "epoch": 0.07, "learning_rate": 4.636822614221541e-05, "loss": 0.7166, "step": 82900 }, { "epoch": 0.07, "learning_rate": 4.636384354114167e-05, "loss": 0.7242, "step": 83000 }, { "epoch": 0.07, "learning_rate": 4.6359460940067935e-05, "loss": 0.6676, "step": 83100 }, { "epoch": 0.07, "learning_rate": 4.635507833899419e-05, "loss": 0.7121, "step": 83200 }, { "epoch": 0.07, "learning_rate": 4.6350695737920456e-05, "loss": 0.7046, "step": 83300 }, { "epoch": 0.07, "learning_rate": 4.634631313684672e-05, "loss": 0.7065, "step": 83400 }, { "epoch": 0.07, "learning_rate": 4.6341930535772984e-05, "loss": 0.7239, "step": 83500 }, { "epoch": 0.07, "learning_rate": 4.633754793469924e-05, "loss": 0.6839, "step": 83600 }, { "epoch": 0.07, "learning_rate": 4.633316533362551e-05, "loss": 0.7234, "step": 83700 }, { "epoch": 0.07, "learning_rate": 4.6328782732551776e-05, "loss": 0.7392, "step": 83800 }, { "epoch": 0.07, "learning_rate": 4.632440013147803e-05, "loss": 0.7325, "step": 83900 }, { "epoch": 0.07, "learning_rate": 4.63200175304043e-05, "loss": 0.6921, "step": 84000 }, { "epoch": 0.07, "learning_rate": 4.631563492933056e-05, "loss": 0.7407, "step": 84100 }, { "epoch": 0.07, "learning_rate": 4.6311252328256824e-05, "loss": 0.6938, "step": 84200 }, { "epoch": 0.07, "learning_rate": 4.630686972718308e-05, "loss": 0.7158, "step": 84300 }, { "epoch": 0.07, "learning_rate": 4.630248712610935e-05, "loss": 0.6607, "step": 84400 }, { "epoch": 0.07, "learning_rate": 4.6298104525035616e-05, "loss": 0.7155, "step": 84500 }, { "epoch": 0.07, "learning_rate": 4.629372192396187e-05, "loss": 0.7071, "step": 84600 }, { "epoch": 0.07, "learning_rate": 4.628933932288814e-05, "loss": 0.7122, "step": 84700 }, { "epoch": 0.07, "learning_rate": 4.62849567218144e-05, "loss": 0.6807, "step": 84800 }, { "epoch": 0.07, "learning_rate": 4.628057412074066e-05, "loss": 0.6818, "step": 84900 }, { "epoch": 0.07, "learning_rate": 4.627619151966692e-05, "loss": 0.7619, "step": 85000 }, { "epoch": 0.07, "learning_rate": 4.6271808918593186e-05, "loss": 0.728, "step": 85100 }, { "epoch": 0.07, "learning_rate": 4.626742631751945e-05, "loss": 0.6841, "step": 85200 }, { "epoch": 0.07, "learning_rate": 4.6263043716445713e-05, "loss": 0.6992, "step": 85300 }, { "epoch": 0.07, "learning_rate": 4.625866111537198e-05, "loss": 0.6894, "step": 85400 }, { "epoch": 0.07, "learning_rate": 4.625427851429824e-05, "loss": 0.7057, "step": 85500 }, { "epoch": 0.08, "learning_rate": 4.62498959132245e-05, "loss": 0.7071, "step": 85600 }, { "epoch": 0.08, "learning_rate": 4.624551331215076e-05, "loss": 0.6997, "step": 85700 }, { "epoch": 0.08, "learning_rate": 4.6241130711077026e-05, "loss": 0.6991, "step": 85800 }, { "epoch": 0.08, "learning_rate": 4.623674811000329e-05, "loss": 0.6834, "step": 85900 }, { "epoch": 0.08, "learning_rate": 4.6232365508929554e-05, "loss": 0.7059, "step": 86000 }, { "epoch": 0.08, "learning_rate": 4.622798290785582e-05, "loss": 0.7146, "step": 86100 }, { "epoch": 0.08, "learning_rate": 4.6223600306782075e-05, "loss": 0.6711, "step": 86200 }, { "epoch": 0.08, "learning_rate": 4.621926153171908e-05, "loss": 0.7628, "step": 86300 }, { "epoch": 0.08, "learning_rate": 4.621487893064534e-05, "loss": 0.7287, "step": 86400 }, { "epoch": 0.08, "learning_rate": 4.62104963295716e-05, "loss": 0.7211, "step": 86500 }, { "epoch": 0.08, "learning_rate": 4.620611372849787e-05, "loss": 0.7384, "step": 86600 }, { "epoch": 0.08, "learning_rate": 4.620173112742413e-05, "loss": 0.6255, "step": 86700 }, { "epoch": 0.08, "learning_rate": 4.619734852635039e-05, "loss": 0.7012, "step": 86800 }, { "epoch": 0.08, "learning_rate": 4.6192965925276654e-05, "loss": 0.7017, "step": 86900 }, { "epoch": 0.08, "learning_rate": 4.618858332420292e-05, "loss": 0.7227, "step": 87000 }, { "epoch": 0.08, "learning_rate": 4.6184200723129175e-05, "loss": 0.7206, "step": 87100 }, { "epoch": 0.08, "learning_rate": 4.617981812205544e-05, "loss": 0.7285, "step": 87200 }, { "epoch": 0.08, "learning_rate": 4.61754355209817e-05, "loss": 0.7348, "step": 87300 }, { "epoch": 0.08, "learning_rate": 4.617105291990797e-05, "loss": 0.6571, "step": 87400 }, { "epoch": 0.08, "learning_rate": 4.616667031883423e-05, "loss": 0.6924, "step": 87500 }, { "epoch": 0.08, "learning_rate": 4.6162287717760495e-05, "loss": 0.6576, "step": 87600 }, { "epoch": 0.08, "learning_rate": 4.615790511668676e-05, "loss": 0.7257, "step": 87700 }, { "epoch": 0.08, "learning_rate": 4.6153522515613015e-05, "loss": 0.671, "step": 87800 }, { "epoch": 0.08, "learning_rate": 4.614913991453928e-05, "loss": 0.6536, "step": 87900 }, { "epoch": 0.08, "learning_rate": 4.614475731346554e-05, "loss": 0.7071, "step": 88000 }, { "epoch": 0.08, "learning_rate": 4.614037471239181e-05, "loss": 0.7386, "step": 88100 }, { "epoch": 0.08, "learning_rate": 4.613599211131807e-05, "loss": 0.728, "step": 88200 }, { "epoch": 0.08, "learning_rate": 4.6131609510244335e-05, "loss": 0.6844, "step": 88300 }, { "epoch": 0.08, "learning_rate": 4.612727073518133e-05, "loss": 0.7101, "step": 88400 }, { "epoch": 0.08, "learning_rate": 4.6122888134107595e-05, "loss": 0.722, "step": 88500 }, { "epoch": 0.08, "learning_rate": 4.611850553303386e-05, "loss": 0.7147, "step": 88600 }, { "epoch": 0.08, "learning_rate": 4.6114122931960116e-05, "loss": 0.7095, "step": 88700 }, { "epoch": 0.08, "learning_rate": 4.610974033088638e-05, "loss": 0.6681, "step": 88800 }, { "epoch": 0.08, "learning_rate": 4.610535772981265e-05, "loss": 0.6952, "step": 88900 }, { "epoch": 0.08, "learning_rate": 4.610097512873891e-05, "loss": 0.703, "step": 89000 }, { "epoch": 0.08, "learning_rate": 4.609659252766517e-05, "loss": 0.6901, "step": 89100 }, { "epoch": 0.08, "learning_rate": 4.6092209926591435e-05, "loss": 0.7554, "step": 89200 }, { "epoch": 0.08, "learning_rate": 4.60878273255177e-05, "loss": 0.6939, "step": 89300 }, { "epoch": 0.08, "learning_rate": 4.6083444724443956e-05, "loss": 0.6715, "step": 89400 }, { "epoch": 0.08, "learning_rate": 4.607906212337022e-05, "loss": 0.749, "step": 89500 }, { "epoch": 0.08, "learning_rate": 4.607467952229649e-05, "loss": 0.6863, "step": 89600 }, { "epoch": 0.08, "learning_rate": 4.607029692122275e-05, "loss": 0.6819, "step": 89700 }, { "epoch": 0.08, "learning_rate": 4.606591432014901e-05, "loss": 0.7021, "step": 89800 }, { "epoch": 0.08, "learning_rate": 4.6061531719075276e-05, "loss": 0.7092, "step": 89900 }, { "epoch": 0.08, "learning_rate": 4.605714911800153e-05, "loss": 0.6554, "step": 90000 }, { "epoch": 0.08, "learning_rate": 4.6052766516927797e-05, "loss": 0.6724, "step": 90100 }, { "epoch": 0.08, "learning_rate": 4.604838391585406e-05, "loss": 0.7186, "step": 90200 }, { "epoch": 0.08, "learning_rate": 4.6044001314780324e-05, "loss": 0.7339, "step": 90300 }, { "epoch": 0.08, "learning_rate": 4.603961871370659e-05, "loss": 0.6829, "step": 90400 }, { "epoch": 0.08, "learning_rate": 4.603527993864359e-05, "loss": 0.7774, "step": 90500 }, { "epoch": 0.08, "learning_rate": 4.603089733756985e-05, "loss": 0.7495, "step": 90600 }, { "epoch": 0.08, "learning_rate": 4.602651473649611e-05, "loss": 0.7107, "step": 90700 }, { "epoch": 0.08, "learning_rate": 4.6022132135422376e-05, "loss": 0.6963, "step": 90800 }, { "epoch": 0.08, "learning_rate": 4.601774953434864e-05, "loss": 0.6871, "step": 90900 }, { "epoch": 0.08, "learning_rate": 4.60133669332749e-05, "loss": 0.6969, "step": 91000 }, { "epoch": 0.08, "learning_rate": 4.600898433220117e-05, "loss": 0.6446, "step": 91100 }, { "epoch": 0.08, "learning_rate": 4.6004601731127425e-05, "loss": 0.7064, "step": 91200 }, { "epoch": 0.08, "learning_rate": 4.600021913005369e-05, "loss": 0.678, "step": 91300 }, { "epoch": 0.08, "learning_rate": 4.599583652897995e-05, "loss": 0.683, "step": 91400 }, { "epoch": 0.08, "learning_rate": 4.5991453927906216e-05, "loss": 0.7163, "step": 91500 }, { "epoch": 0.08, "learning_rate": 4.598707132683247e-05, "loss": 0.672, "step": 91600 }, { "epoch": 0.08, "learning_rate": 4.598268872575874e-05, "loss": 0.7174, "step": 91700 }, { "epoch": 0.08, "learning_rate": 4.5978306124685e-05, "loss": 0.6895, "step": 91800 }, { "epoch": 0.08, "learning_rate": 4.5973923523611265e-05, "loss": 0.6954, "step": 91900 }, { "epoch": 0.08, "learning_rate": 4.596958474854827e-05, "loss": 0.6717, "step": 92000 }, { "epoch": 0.08, "learning_rate": 4.596520214747453e-05, "loss": 0.6878, "step": 92100 }, { "epoch": 0.08, "learning_rate": 4.596081954640079e-05, "loss": 0.6949, "step": 92200 }, { "epoch": 0.08, "learning_rate": 4.595643694532705e-05, "loss": 0.7045, "step": 92300 }, { "epoch": 0.08, "learning_rate": 4.595205434425332e-05, "loss": 0.6891, "step": 92400 }, { "epoch": 0.08, "learning_rate": 4.5947671743179574e-05, "loss": 0.6674, "step": 92500 }, { "epoch": 0.08, "learning_rate": 4.5943289142105844e-05, "loss": 0.7046, "step": 92600 }, { "epoch": 0.08, "learning_rate": 4.593890654103211e-05, "loss": 0.7174, "step": 92700 }, { "epoch": 0.08, "learning_rate": 4.5934523939958365e-05, "loss": 0.7262, "step": 92800 }, { "epoch": 0.08, "learning_rate": 4.593014133888463e-05, "loss": 0.7004, "step": 92900 }, { "epoch": 0.08, "learning_rate": 4.592575873781089e-05, "loss": 0.7554, "step": 93000 }, { "epoch": 0.08, "learning_rate": 4.592137613673716e-05, "loss": 0.6858, "step": 93100 }, { "epoch": 0.08, "learning_rate": 4.5916993535663414e-05, "loss": 0.7065, "step": 93200 }, { "epoch": 0.08, "learning_rate": 4.5912610934589685e-05, "loss": 0.7125, "step": 93300 }, { "epoch": 0.08, "learning_rate": 4.590822833351595e-05, "loss": 0.6974, "step": 93400 }, { "epoch": 0.08, "learning_rate": 4.5903845732442206e-05, "loss": 0.6802, "step": 93500 }, { "epoch": 0.08, "learning_rate": 4.589946313136847e-05, "loss": 0.7025, "step": 93600 }, { "epoch": 0.08, "learning_rate": 4.5895080530294733e-05, "loss": 0.7076, "step": 93700 }, { "epoch": 0.08, "learning_rate": 4.589069792922099e-05, "loss": 0.7197, "step": 93800 }, { "epoch": 0.08, "learning_rate": 4.5886315328147254e-05, "loss": 0.6886, "step": 93900 }, { "epoch": 0.08, "learning_rate": 4.588193272707352e-05, "loss": 0.6892, "step": 94000 }, { "epoch": 0.08, "learning_rate": 4.587755012599978e-05, "loss": 0.6741, "step": 94100 }, { "epoch": 0.08, "learning_rate": 4.5873167524926046e-05, "loss": 0.6638, "step": 94200 }, { "epoch": 0.08, "learning_rate": 4.586878492385231e-05, "loss": 0.6688, "step": 94300 }, { "epoch": 0.08, "learning_rate": 4.5864402322778574e-05, "loss": 0.6862, "step": 94400 }, { "epoch": 0.08, "learning_rate": 4.586001972170483e-05, "loss": 0.6893, "step": 94500 }, { "epoch": 0.08, "learning_rate": 4.5855637120631095e-05, "loss": 0.6769, "step": 94600 }, { "epoch": 0.08, "learning_rate": 4.585125451955736e-05, "loss": 0.7071, "step": 94700 }, { "epoch": 0.08, "learning_rate": 4.584687191848362e-05, "loss": 0.6765, "step": 94800 }, { "epoch": 0.08, "learning_rate": 4.5842489317409886e-05, "loss": 0.6897, "step": 94900 }, { "epoch": 0.08, "learning_rate": 4.583810671633615e-05, "loss": 0.7437, "step": 95000 }, { "epoch": 0.08, "learning_rate": 4.5833724115262414e-05, "loss": 0.6911, "step": 95100 }, { "epoch": 0.08, "learning_rate": 4.582934151418867e-05, "loss": 0.6477, "step": 95200 }, { "epoch": 0.08, "learning_rate": 4.5824958913114935e-05, "loss": 0.6628, "step": 95300 }, { "epoch": 0.08, "learning_rate": 4.58205763120412e-05, "loss": 0.6515, "step": 95400 }, { "epoch": 0.08, "learning_rate": 4.5816193710967456e-05, "loss": 0.7047, "step": 95500 }, { "epoch": 0.08, "learning_rate": 4.581181110989373e-05, "loss": 0.6956, "step": 95600 }, { "epoch": 0.08, "learning_rate": 4.580742850881999e-05, "loss": 0.7025, "step": 95700 }, { "epoch": 0.08, "learning_rate": 4.580304590774625e-05, "loss": 0.6809, "step": 95800 }, { "epoch": 0.08, "learning_rate": 4.579866330667251e-05, "loss": 0.6886, "step": 95900 }, { "epoch": 0.08, "learning_rate": 4.5794280705598775e-05, "loss": 0.6719, "step": 96000 }, { "epoch": 0.08, "learning_rate": 4.578989810452504e-05, "loss": 0.6753, "step": 96100 }, { "epoch": 0.08, "learning_rate": 4.5785559329462035e-05, "loss": 0.705, "step": 96200 }, { "epoch": 0.08, "learning_rate": 4.5781176728388306e-05, "loss": 0.6916, "step": 96300 }, { "epoch": 0.08, "learning_rate": 4.577679412731456e-05, "loss": 0.6835, "step": 96400 }, { "epoch": 0.08, "learning_rate": 4.577241152624083e-05, "loss": 0.6574, "step": 96500 }, { "epoch": 0.08, "learning_rate": 4.576802892516709e-05, "loss": 0.668, "step": 96600 }, { "epoch": 0.08, "learning_rate": 4.576364632409335e-05, "loss": 0.6833, "step": 96700 }, { "epoch": 0.08, "learning_rate": 4.575926372301961e-05, "loss": 0.6991, "step": 96800 }, { "epoch": 0.08, "learning_rate": 4.5754881121945876e-05, "loss": 0.6763, "step": 96900 }, { "epoch": 0.09, "learning_rate": 4.575049852087214e-05, "loss": 0.6778, "step": 97000 }, { "epoch": 0.09, "learning_rate": 4.5746115919798404e-05, "loss": 0.7062, "step": 97100 }, { "epoch": 0.09, "learning_rate": 4.574173331872467e-05, "loss": 0.6658, "step": 97200 }, { "epoch": 0.09, "learning_rate": 4.573735071765093e-05, "loss": 0.7119, "step": 97300 }, { "epoch": 0.09, "learning_rate": 4.573296811657719e-05, "loss": 0.6761, "step": 97400 }, { "epoch": 0.09, "learning_rate": 4.572858551550345e-05, "loss": 0.707, "step": 97500 }, { "epoch": 0.09, "learning_rate": 4.5724202914429716e-05, "loss": 0.7145, "step": 97600 }, { "epoch": 0.09, "learning_rate": 4.571982031335597e-05, "loss": 0.6571, "step": 97700 }, { "epoch": 0.09, "learning_rate": 4.5715437712282244e-05, "loss": 0.6944, "step": 97800 }, { "epoch": 0.09, "learning_rate": 4.571105511120851e-05, "loss": 0.6707, "step": 97900 }, { "epoch": 0.09, "learning_rate": 4.5706672510134765e-05, "loss": 0.6993, "step": 98000 }, { "epoch": 0.09, "learning_rate": 4.570228990906103e-05, "loss": 0.681, "step": 98100 }, { "epoch": 0.09, "learning_rate": 4.569790730798729e-05, "loss": 0.689, "step": 98200 }, { "epoch": 0.09, "learning_rate": 4.5693524706913557e-05, "loss": 0.6907, "step": 98300 }, { "epoch": 0.09, "learning_rate": 4.5689142105839814e-05, "loss": 0.6748, "step": 98400 }, { "epoch": 0.09, "learning_rate": 4.568475950476608e-05, "loss": 0.697, "step": 98500 }, { "epoch": 0.09, "learning_rate": 4.568037690369235e-05, "loss": 0.6928, "step": 98600 }, { "epoch": 0.09, "learning_rate": 4.5675994302618605e-05, "loss": 0.6745, "step": 98700 }, { "epoch": 0.09, "learning_rate": 4.567161170154487e-05, "loss": 0.6472, "step": 98800 }, { "epoch": 0.09, "learning_rate": 4.566722910047113e-05, "loss": 0.6838, "step": 98900 }, { "epoch": 0.09, "learning_rate": 4.56628464993974e-05, "loss": 0.7179, "step": 99000 }, { "epoch": 0.09, "learning_rate": 4.5658463898323654e-05, "loss": 0.6876, "step": 99100 }, { "epoch": 0.09, "learning_rate": 4.565408129724992e-05, "loss": 0.7313, "step": 99200 }, { "epoch": 0.09, "learning_rate": 4.564969869617619e-05, "loss": 0.6861, "step": 99300 }, { "epoch": 0.09, "learning_rate": 4.5645359921113185e-05, "loss": 0.6832, "step": 99400 }, { "epoch": 0.09, "learning_rate": 4.564097732003945e-05, "loss": 0.6628, "step": 99500 }, { "epoch": 0.09, "learning_rate": 4.5636594718965706e-05, "loss": 0.7351, "step": 99600 }, { "epoch": 0.09, "learning_rate": 4.563221211789197e-05, "loss": 0.6581, "step": 99700 }, { "epoch": 0.09, "learning_rate": 4.562782951681823e-05, "loss": 0.7143, "step": 99800 }, { "epoch": 0.09, "learning_rate": 4.56234469157445e-05, "loss": 0.6572, "step": 99900 }, { "epoch": 0.09, "learning_rate": 4.561906431467076e-05, "loss": 0.697, "step": 100000 }, { "epoch": 0.09, "eval_loss": 0.6870002746582031, "eval_runtime": 79438.6345, "eval_samples_per_second": 6.404, "eval_steps_per_second": 3.202, "step": 100000 } ], "max_steps": 1140875, "num_train_epochs": 1, "total_flos": 1.4859311775744e+18, "trial_name": null, "trial_params": null }