mrm8488's picture
Add 100k steps ckpt
37513d8
{
"best_metric": null,
"best_model_checkpoint": null,
"epoch": 0.08765198306037775,
"global_step": 100000,
"is_hyper_param_search": false,
"is_local_process_zero": true,
"is_world_process_zero": true,
"log_history": [
{
"epoch": 0.0,
"learning_rate": 4.999583652897995e-05,
"loss": 1.5746,
"step": 100
},
{
"epoch": 0.0,
"learning_rate": 4.999145392790621e-05,
"loss": 1.4499,
"step": 200
},
{
"epoch": 0.0,
"learning_rate": 4.998707132683248e-05,
"loss": 1.3728,
"step": 300
},
{
"epoch": 0.0,
"learning_rate": 4.998268872575874e-05,
"loss": 1.3729,
"step": 400
},
{
"epoch": 0.0,
"learning_rate": 4.9978393776706475e-05,
"loss": 1.23,
"step": 500
},
{
"epoch": 0.0,
"learning_rate": 4.997401117563274e-05,
"loss": 1.1736,
"step": 600
},
{
"epoch": 0.0,
"learning_rate": 4.9969628574559e-05,
"loss": 1.1768,
"step": 700
},
{
"epoch": 0.0,
"learning_rate": 4.9965245973485266e-05,
"loss": 1.2188,
"step": 800
},
{
"epoch": 0.0,
"learning_rate": 4.9960863372411524e-05,
"loss": 1.1797,
"step": 900
},
{
"epoch": 0.0,
"learning_rate": 4.9956480771337794e-05,
"loss": 1.1905,
"step": 1000
},
{
"epoch": 0.0,
"learning_rate": 4.995209817026406e-05,
"loss": 1.2828,
"step": 1100
},
{
"epoch": 0.0,
"learning_rate": 4.9947715569190315e-05,
"loss": 1.1102,
"step": 1200
},
{
"epoch": 0.0,
"learning_rate": 4.994333296811658e-05,
"loss": 1.1689,
"step": 1300
},
{
"epoch": 0.0,
"learning_rate": 4.993895036704284e-05,
"loss": 1.1574,
"step": 1400
},
{
"epoch": 0.0,
"learning_rate": 4.993456776596911e-05,
"loss": 1.2361,
"step": 1500
},
{
"epoch": 0.0,
"learning_rate": 4.9930185164895364e-05,
"loss": 1.1391,
"step": 1600
},
{
"epoch": 0.0,
"learning_rate": 4.9925802563821635e-05,
"loss": 1.1652,
"step": 1700
},
{
"epoch": 0.0,
"learning_rate": 4.99214199627479e-05,
"loss": 1.162,
"step": 1800
},
{
"epoch": 0.0,
"learning_rate": 4.9917037361674156e-05,
"loss": 1.1477,
"step": 1900
},
{
"epoch": 0.0,
"learning_rate": 4.991265476060042e-05,
"loss": 1.171,
"step": 2000
},
{
"epoch": 0.0,
"learning_rate": 4.990827215952668e-05,
"loss": 1.1214,
"step": 2100
},
{
"epoch": 0.0,
"learning_rate": 4.990388955845294e-05,
"loss": 1.0911,
"step": 2200
},
{
"epoch": 0.0,
"learning_rate": 4.9899506957379204e-05,
"loss": 1.1262,
"step": 2300
},
{
"epoch": 0.0,
"learning_rate": 4.989512435630547e-05,
"loss": 1.0649,
"step": 2400
},
{
"epoch": 0.0,
"learning_rate": 4.989074175523173e-05,
"loss": 1.13,
"step": 2500
},
{
"epoch": 0.0,
"learning_rate": 4.9886359154157996e-05,
"loss": 1.1008,
"step": 2600
},
{
"epoch": 0.0,
"learning_rate": 4.988197655308426e-05,
"loss": 1.0733,
"step": 2700
},
{
"epoch": 0.0,
"learning_rate": 4.9877593952010524e-05,
"loss": 1.0636,
"step": 2800
},
{
"epoch": 0.0,
"learning_rate": 4.987321135093678e-05,
"loss": 1.1448,
"step": 2900
},
{
"epoch": 0.0,
"learning_rate": 4.9868828749863045e-05,
"loss": 1.1237,
"step": 3000
},
{
"epoch": 0.0,
"learning_rate": 4.986444614878931e-05,
"loss": 1.1278,
"step": 3100
},
{
"epoch": 0.0,
"learning_rate": 4.986006354771557e-05,
"loss": 1.0606,
"step": 3200
},
{
"epoch": 0.0,
"learning_rate": 4.9855680946641836e-05,
"loss": 1.1164,
"step": 3300
},
{
"epoch": 0.0,
"learning_rate": 4.98512983455681e-05,
"loss": 1.0511,
"step": 3400
},
{
"epoch": 0.0,
"learning_rate": 4.984691574449436e-05,
"loss": 1.0267,
"step": 3500
},
{
"epoch": 0.0,
"learning_rate": 4.984253314342062e-05,
"loss": 1.0842,
"step": 3600
},
{
"epoch": 0.0,
"learning_rate": 4.9838150542346885e-05,
"loss": 1.1009,
"step": 3700
},
{
"epoch": 0.0,
"learning_rate": 4.983376794127315e-05,
"loss": 1.0525,
"step": 3800
},
{
"epoch": 0.0,
"learning_rate": 4.9829385340199406e-05,
"loss": 1.0772,
"step": 3900
},
{
"epoch": 0.0,
"learning_rate": 4.9825002739125677e-05,
"loss": 1.0564,
"step": 4000
},
{
"epoch": 0.0,
"learning_rate": 4.982062013805194e-05,
"loss": 1.0593,
"step": 4100
},
{
"epoch": 0.0,
"learning_rate": 4.98162375369782e-05,
"loss": 1.0939,
"step": 4200
},
{
"epoch": 0.0,
"learning_rate": 4.981185493590446e-05,
"loss": 1.0162,
"step": 4300
},
{
"epoch": 0.0,
"learning_rate": 4.9807472334830725e-05,
"loss": 1.0549,
"step": 4400
},
{
"epoch": 0.0,
"learning_rate": 4.980308973375698e-05,
"loss": 1.0467,
"step": 4500
},
{
"epoch": 0.0,
"learning_rate": 4.9798707132683246e-05,
"loss": 1.0434,
"step": 4600
},
{
"epoch": 0.0,
"learning_rate": 4.979432453160952e-05,
"loss": 1.0023,
"step": 4700
},
{
"epoch": 0.0,
"learning_rate": 4.9789941930535774e-05,
"loss": 1.1335,
"step": 4800
},
{
"epoch": 0.0,
"learning_rate": 4.978555932946204e-05,
"loss": 1.0314,
"step": 4900
},
{
"epoch": 0.0,
"learning_rate": 4.97811767283883e-05,
"loss": 1.0841,
"step": 5000
},
{
"epoch": 0.0,
"learning_rate": 4.9776794127314566e-05,
"loss": 1.02,
"step": 5100
},
{
"epoch": 0.0,
"learning_rate": 4.977241152624082e-05,
"loss": 0.999,
"step": 5200
},
{
"epoch": 0.0,
"learning_rate": 4.9768028925167087e-05,
"loss": 1.0875,
"step": 5300
},
{
"epoch": 0.0,
"learning_rate": 4.976364632409335e-05,
"loss": 1.0271,
"step": 5400
},
{
"epoch": 0.0,
"learning_rate": 4.9759263723019614e-05,
"loss": 1.0556,
"step": 5500
},
{
"epoch": 0.0,
"learning_rate": 4.975488112194588e-05,
"loss": 1.0384,
"step": 5600
},
{
"epoch": 0.0,
"learning_rate": 4.975049852087214e-05,
"loss": 1.07,
"step": 5700
},
{
"epoch": 0.01,
"learning_rate": 4.9746115919798406e-05,
"loss": 1.0087,
"step": 5800
},
{
"epoch": 0.01,
"learning_rate": 4.974173331872466e-05,
"loss": 1.0818,
"step": 5900
},
{
"epoch": 0.01,
"learning_rate": 4.973735071765093e-05,
"loss": 1.0697,
"step": 6000
},
{
"epoch": 0.01,
"learning_rate": 4.973296811657719e-05,
"loss": 1.0354,
"step": 6100
},
{
"epoch": 0.01,
"learning_rate": 4.9728585515503455e-05,
"loss": 0.9858,
"step": 6200
},
{
"epoch": 0.01,
"learning_rate": 4.972420291442972e-05,
"loss": 1.0291,
"step": 6300
},
{
"epoch": 0.01,
"learning_rate": 4.971982031335598e-05,
"loss": 1.0206,
"step": 6400
},
{
"epoch": 0.01,
"learning_rate": 4.971543771228224e-05,
"loss": 1.0487,
"step": 6500
},
{
"epoch": 0.01,
"learning_rate": 4.97110551112085e-05,
"loss": 1.0847,
"step": 6600
},
{
"epoch": 0.01,
"learning_rate": 4.970667251013477e-05,
"loss": 1.0272,
"step": 6700
},
{
"epoch": 0.01,
"learning_rate": 4.970228990906103e-05,
"loss": 1.0268,
"step": 6800
},
{
"epoch": 0.01,
"learning_rate": 4.969790730798729e-05,
"loss": 1.0012,
"step": 6900
},
{
"epoch": 0.01,
"learning_rate": 4.969352470691356e-05,
"loss": 0.96,
"step": 7000
},
{
"epoch": 0.01,
"learning_rate": 4.968914210583982e-05,
"loss": 1.0239,
"step": 7100
},
{
"epoch": 0.01,
"learning_rate": 4.968475950476608e-05,
"loss": 1.0212,
"step": 7200
},
{
"epoch": 0.01,
"learning_rate": 4.9680376903692344e-05,
"loss": 1.018,
"step": 7300
},
{
"epoch": 0.01,
"learning_rate": 4.967599430261861e-05,
"loss": 0.9627,
"step": 7400
},
{
"epoch": 0.01,
"learning_rate": 4.9671611701544865e-05,
"loss": 1.0514,
"step": 7500
},
{
"epoch": 0.01,
"learning_rate": 4.966722910047113e-05,
"loss": 1.0319,
"step": 7600
},
{
"epoch": 0.01,
"learning_rate": 4.96628464993974e-05,
"loss": 0.9913,
"step": 7700
},
{
"epoch": 0.01,
"learning_rate": 4.9658463898323656e-05,
"loss": 1.0072,
"step": 7800
},
{
"epoch": 0.01,
"learning_rate": 4.965408129724992e-05,
"loss": 0.9955,
"step": 7900
},
{
"epoch": 0.01,
"learning_rate": 4.9649698696176184e-05,
"loss": 1.0485,
"step": 8000
},
{
"epoch": 0.01,
"learning_rate": 4.964531609510245e-05,
"loss": 0.9666,
"step": 8100
},
{
"epoch": 0.01,
"learning_rate": 4.9640933494028705e-05,
"loss": 0.9327,
"step": 8200
},
{
"epoch": 0.01,
"learning_rate": 4.963655089295497e-05,
"loss": 1.0228,
"step": 8300
},
{
"epoch": 0.01,
"learning_rate": 4.963216829188123e-05,
"loss": 1.0134,
"step": 8400
},
{
"epoch": 0.01,
"learning_rate": 4.96277856908075e-05,
"loss": 1.0276,
"step": 8500
},
{
"epoch": 0.01,
"learning_rate": 4.962340308973376e-05,
"loss": 0.9459,
"step": 8600
},
{
"epoch": 0.01,
"learning_rate": 4.9619020488660024e-05,
"loss": 0.9719,
"step": 8700
},
{
"epoch": 0.01,
"learning_rate": 4.961463788758629e-05,
"loss": 0.9243,
"step": 8800
},
{
"epoch": 0.01,
"learning_rate": 4.9610255286512545e-05,
"loss": 0.9991,
"step": 8900
},
{
"epoch": 0.01,
"learning_rate": 4.960587268543881e-05,
"loss": 0.9566,
"step": 9000
},
{
"epoch": 0.01,
"learning_rate": 4.960149008436507e-05,
"loss": 0.9744,
"step": 9100
},
{
"epoch": 0.01,
"learning_rate": 4.959710748329134e-05,
"loss": 0.9836,
"step": 9200
},
{
"epoch": 0.01,
"learning_rate": 4.95927248822176e-05,
"loss": 0.8892,
"step": 9300
},
{
"epoch": 0.01,
"learning_rate": 4.9588342281143865e-05,
"loss": 0.9609,
"step": 9400
},
{
"epoch": 0.01,
"learning_rate": 4.958395968007012e-05,
"loss": 0.9613,
"step": 9500
},
{
"epoch": 0.01,
"learning_rate": 4.9579577078996386e-05,
"loss": 1.0236,
"step": 9600
},
{
"epoch": 0.01,
"learning_rate": 4.957519447792265e-05,
"loss": 0.9404,
"step": 9700
},
{
"epoch": 0.01,
"learning_rate": 4.9570811876848913e-05,
"loss": 0.91,
"step": 9800
},
{
"epoch": 0.01,
"learning_rate": 4.956642927577517e-05,
"loss": 0.9923,
"step": 9900
},
{
"epoch": 0.01,
"learning_rate": 4.956204667470144e-05,
"loss": 0.9815,
"step": 10000
},
{
"epoch": 0.01,
"learning_rate": 4.9557664073627705e-05,
"loss": 0.9069,
"step": 10100
},
{
"epoch": 0.01,
"learning_rate": 4.955328147255396e-05,
"loss": 0.9944,
"step": 10200
},
{
"epoch": 0.01,
"learning_rate": 4.9548898871480226e-05,
"loss": 0.9553,
"step": 10300
},
{
"epoch": 0.01,
"learning_rate": 4.954451627040649e-05,
"loss": 0.9449,
"step": 10400
},
{
"epoch": 0.01,
"learning_rate": 4.954013366933275e-05,
"loss": 0.9538,
"step": 10500
},
{
"epoch": 0.01,
"learning_rate": 4.953575106825901e-05,
"loss": 0.9735,
"step": 10600
},
{
"epoch": 0.01,
"learning_rate": 4.953136846718528e-05,
"loss": 0.9722,
"step": 10700
},
{
"epoch": 0.01,
"learning_rate": 4.952698586611154e-05,
"loss": 0.9786,
"step": 10800
},
{
"epoch": 0.01,
"learning_rate": 4.95226032650378e-05,
"loss": 0.9305,
"step": 10900
},
{
"epoch": 0.01,
"learning_rate": 4.9518220663964066e-05,
"loss": 0.9171,
"step": 11000
},
{
"epoch": 0.01,
"learning_rate": 4.951383806289033e-05,
"loss": 0.9424,
"step": 11100
},
{
"epoch": 0.01,
"learning_rate": 4.950945546181659e-05,
"loss": 0.9316,
"step": 11200
},
{
"epoch": 0.01,
"learning_rate": 4.950507286074285e-05,
"loss": 1.0145,
"step": 11300
},
{
"epoch": 0.01,
"learning_rate": 4.9500690259669115e-05,
"loss": 0.9797,
"step": 11400
},
{
"epoch": 0.01,
"learning_rate": 4.949630765859538e-05,
"loss": 0.9035,
"step": 11500
},
{
"epoch": 0.01,
"learning_rate": 4.949192505752164e-05,
"loss": 0.9201,
"step": 11600
},
{
"epoch": 0.01,
"learning_rate": 4.948754245644791e-05,
"loss": 0.9216,
"step": 11700
},
{
"epoch": 0.01,
"learning_rate": 4.9483159855374164e-05,
"loss": 0.945,
"step": 11800
},
{
"epoch": 0.01,
"learning_rate": 4.947877725430043e-05,
"loss": 0.9036,
"step": 11900
},
{
"epoch": 0.01,
"learning_rate": 4.947439465322669e-05,
"loss": 0.9586,
"step": 12000
},
{
"epoch": 0.01,
"learning_rate": 4.9470012052152955e-05,
"loss": 0.93,
"step": 12100
},
{
"epoch": 0.01,
"learning_rate": 4.946562945107922e-05,
"loss": 0.9111,
"step": 12200
},
{
"epoch": 0.01,
"learning_rate": 4.946124685000548e-05,
"loss": 0.9481,
"step": 12300
},
{
"epoch": 0.01,
"learning_rate": 4.945686424893175e-05,
"loss": 0.9746,
"step": 12400
},
{
"epoch": 0.01,
"learning_rate": 4.945252547386874e-05,
"loss": 0.8887,
"step": 12500
},
{
"epoch": 0.01,
"learning_rate": 4.944814287279501e-05,
"loss": 0.8922,
"step": 12600
},
{
"epoch": 0.01,
"learning_rate": 4.944376027172127e-05,
"loss": 0.9359,
"step": 12700
},
{
"epoch": 0.01,
"learning_rate": 4.943937767064753e-05,
"loss": 0.9479,
"step": 12800
},
{
"epoch": 0.01,
"learning_rate": 4.943499506957379e-05,
"loss": 0.8957,
"step": 12900
},
{
"epoch": 0.01,
"learning_rate": 4.943061246850006e-05,
"loss": 0.9511,
"step": 13000
},
{
"epoch": 0.01,
"learning_rate": 4.942622986742632e-05,
"loss": 0.8991,
"step": 13100
},
{
"epoch": 0.01,
"learning_rate": 4.9421847266352584e-05,
"loss": 0.9246,
"step": 13200
},
{
"epoch": 0.01,
"learning_rate": 4.941746466527885e-05,
"loss": 0.8767,
"step": 13300
},
{
"epoch": 0.01,
"learning_rate": 4.9413082064205105e-05,
"loss": 0.9596,
"step": 13400
},
{
"epoch": 0.01,
"learning_rate": 4.940869946313137e-05,
"loss": 0.8822,
"step": 13500
},
{
"epoch": 0.01,
"learning_rate": 4.940431686205763e-05,
"loss": 0.9094,
"step": 13600
},
{
"epoch": 0.01,
"learning_rate": 4.9399934260983896e-05,
"loss": 0.9085,
"step": 13700
},
{
"epoch": 0.01,
"learning_rate": 4.939555165991016e-05,
"loss": 0.891,
"step": 13800
},
{
"epoch": 0.01,
"learning_rate": 4.9391169058836424e-05,
"loss": 0.8784,
"step": 13900
},
{
"epoch": 0.01,
"learning_rate": 4.938678645776269e-05,
"loss": 0.9138,
"step": 14000
},
{
"epoch": 0.01,
"learning_rate": 4.9382403856688945e-05,
"loss": 0.8773,
"step": 14100
},
{
"epoch": 0.01,
"learning_rate": 4.937802125561521e-05,
"loss": 0.8547,
"step": 14200
},
{
"epoch": 0.01,
"learning_rate": 4.937363865454147e-05,
"loss": 0.9284,
"step": 14300
},
{
"epoch": 0.01,
"learning_rate": 4.936925605346773e-05,
"loss": 0.9839,
"step": 14400
},
{
"epoch": 0.01,
"learning_rate": 4.9364873452394e-05,
"loss": 0.887,
"step": 14500
},
{
"epoch": 0.01,
"learning_rate": 4.9360490851320264e-05,
"loss": 0.9668,
"step": 14600
},
{
"epoch": 0.01,
"learning_rate": 4.935610825024652e-05,
"loss": 0.9022,
"step": 14700
},
{
"epoch": 0.01,
"learning_rate": 4.9351725649172785e-05,
"loss": 0.8769,
"step": 14800
},
{
"epoch": 0.01,
"learning_rate": 4.934734304809905e-05,
"loss": 0.9015,
"step": 14900
},
{
"epoch": 0.01,
"learning_rate": 4.934296044702531e-05,
"loss": 0.9319,
"step": 15000
},
{
"epoch": 0.01,
"learning_rate": 4.933857784595157e-05,
"loss": 0.8847,
"step": 15100
},
{
"epoch": 0.01,
"learning_rate": 4.933419524487784e-05,
"loss": 0.911,
"step": 15200
},
{
"epoch": 0.01,
"learning_rate": 4.9329812643804105e-05,
"loss": 0.9238,
"step": 15300
},
{
"epoch": 0.01,
"learning_rate": 4.932543004273036e-05,
"loss": 0.8539,
"step": 15400
},
{
"epoch": 0.01,
"learning_rate": 4.9321047441656626e-05,
"loss": 0.8901,
"step": 15500
},
{
"epoch": 0.01,
"learning_rate": 4.931666484058289e-05,
"loss": 0.8523,
"step": 15600
},
{
"epoch": 0.01,
"learning_rate": 4.931228223950915e-05,
"loss": 0.8737,
"step": 15700
},
{
"epoch": 0.01,
"learning_rate": 4.930789963843541e-05,
"loss": 0.8563,
"step": 15800
},
{
"epoch": 0.01,
"learning_rate": 4.9303517037361674e-05,
"loss": 0.9147,
"step": 15900
},
{
"epoch": 0.01,
"learning_rate": 4.929917826229868e-05,
"loss": 0.91,
"step": 16000
},
{
"epoch": 0.01,
"learning_rate": 4.929479566122494e-05,
"loss": 0.8723,
"step": 16100
},
{
"epoch": 0.01,
"learning_rate": 4.9290413060151205e-05,
"loss": 0.9087,
"step": 16200
},
{
"epoch": 0.01,
"learning_rate": 4.928603045907746e-05,
"loss": 0.911,
"step": 16300
},
{
"epoch": 0.01,
"learning_rate": 4.9281647858003726e-05,
"loss": 0.87,
"step": 16400
},
{
"epoch": 0.01,
"learning_rate": 4.927726525692999e-05,
"loss": 0.878,
"step": 16500
},
{
"epoch": 0.01,
"learning_rate": 4.9272882655856254e-05,
"loss": 0.8579,
"step": 16600
},
{
"epoch": 0.01,
"learning_rate": 4.926850005478252e-05,
"loss": 0.9406,
"step": 16700
},
{
"epoch": 0.01,
"learning_rate": 4.926411745370878e-05,
"loss": 0.9255,
"step": 16800
},
{
"epoch": 0.01,
"learning_rate": 4.9259734852635045e-05,
"loss": 0.8492,
"step": 16900
},
{
"epoch": 0.01,
"learning_rate": 4.92553522515613e-05,
"loss": 0.8543,
"step": 17000
},
{
"epoch": 0.01,
"learning_rate": 4.9250969650487566e-05,
"loss": 0.8901,
"step": 17100
},
{
"epoch": 0.02,
"learning_rate": 4.924658704941383e-05,
"loss": 0.8168,
"step": 17200
},
{
"epoch": 0.02,
"learning_rate": 4.924220444834009e-05,
"loss": 0.9083,
"step": 17300
},
{
"epoch": 0.02,
"learning_rate": 4.923782184726635e-05,
"loss": 0.9301,
"step": 17400
},
{
"epoch": 0.02,
"learning_rate": 4.923343924619262e-05,
"loss": 0.8885,
"step": 17500
},
{
"epoch": 0.02,
"learning_rate": 4.922905664511888e-05,
"loss": 0.9209,
"step": 17600
},
{
"epoch": 0.02,
"learning_rate": 4.922467404404514e-05,
"loss": 0.8404,
"step": 17700
},
{
"epoch": 0.02,
"learning_rate": 4.922029144297141e-05,
"loss": 0.9039,
"step": 17800
},
{
"epoch": 0.02,
"learning_rate": 4.921590884189767e-05,
"loss": 0.9151,
"step": 17900
},
{
"epoch": 0.02,
"learning_rate": 4.921152624082393e-05,
"loss": 0.9139,
"step": 18000
},
{
"epoch": 0.02,
"learning_rate": 4.920714363975019e-05,
"loss": 0.9031,
"step": 18100
},
{
"epoch": 0.02,
"learning_rate": 4.920276103867646e-05,
"loss": 0.9497,
"step": 18200
},
{
"epoch": 0.02,
"learning_rate": 4.919837843760272e-05,
"loss": 0.9277,
"step": 18300
},
{
"epoch": 0.02,
"learning_rate": 4.919399583652898e-05,
"loss": 0.8629,
"step": 18400
},
{
"epoch": 0.02,
"learning_rate": 4.918961323545525e-05,
"loss": 0.8593,
"step": 18500
},
{
"epoch": 0.02,
"learning_rate": 4.9185230634381504e-05,
"loss": 0.9478,
"step": 18600
},
{
"epoch": 0.02,
"learning_rate": 4.918084803330777e-05,
"loss": 0.8978,
"step": 18700
},
{
"epoch": 0.02,
"learning_rate": 4.917646543223403e-05,
"loss": 0.8154,
"step": 18800
},
{
"epoch": 0.02,
"learning_rate": 4.9172082831160296e-05,
"loss": 0.8861,
"step": 18900
},
{
"epoch": 0.02,
"learning_rate": 4.916770023008656e-05,
"loss": 0.9065,
"step": 19000
},
{
"epoch": 0.02,
"learning_rate": 4.9163317629012823e-05,
"loss": 0.9063,
"step": 19100
},
{
"epoch": 0.02,
"learning_rate": 4.915893502793909e-05,
"loss": 0.8666,
"step": 19200
},
{
"epoch": 0.02,
"learning_rate": 4.9154596252876083e-05,
"loss": 0.8928,
"step": 19300
},
{
"epoch": 0.02,
"learning_rate": 4.915021365180235e-05,
"loss": 0.8607,
"step": 19400
},
{
"epoch": 0.02,
"learning_rate": 4.914583105072861e-05,
"loss": 0.8939,
"step": 19500
},
{
"epoch": 0.02,
"learning_rate": 4.914144844965487e-05,
"loss": 0.8878,
"step": 19600
},
{
"epoch": 0.02,
"learning_rate": 4.913706584858114e-05,
"loss": 0.8956,
"step": 19700
},
{
"epoch": 0.02,
"learning_rate": 4.91326832475074e-05,
"loss": 0.886,
"step": 19800
},
{
"epoch": 0.02,
"learning_rate": 4.912830064643366e-05,
"loss": 0.8777,
"step": 19900
},
{
"epoch": 0.02,
"learning_rate": 4.9123918045359924e-05,
"loss": 0.8847,
"step": 20000
},
{
"epoch": 0.02,
"learning_rate": 4.911953544428619e-05,
"loss": 0.8396,
"step": 20100
},
{
"epoch": 0.02,
"learning_rate": 4.9115152843212445e-05,
"loss": 0.8237,
"step": 20200
},
{
"epoch": 0.02,
"learning_rate": 4.911077024213871e-05,
"loss": 0.8754,
"step": 20300
},
{
"epoch": 0.02,
"learning_rate": 4.910638764106498e-05,
"loss": 0.8846,
"step": 20400
},
{
"epoch": 0.02,
"learning_rate": 4.9102005039991236e-05,
"loss": 0.8414,
"step": 20500
},
{
"epoch": 0.02,
"learning_rate": 4.90976224389175e-05,
"loss": 0.8441,
"step": 20600
},
{
"epoch": 0.02,
"learning_rate": 4.9093239837843764e-05,
"loss": 0.8856,
"step": 20700
},
{
"epoch": 0.02,
"learning_rate": 4.908885723677003e-05,
"loss": 0.9202,
"step": 20800
},
{
"epoch": 0.02,
"learning_rate": 4.9084474635696285e-05,
"loss": 0.8612,
"step": 20900
},
{
"epoch": 0.02,
"learning_rate": 4.908009203462255e-05,
"loss": 0.852,
"step": 21000
},
{
"epoch": 0.02,
"learning_rate": 4.907570943354881e-05,
"loss": 0.8106,
"step": 21100
},
{
"epoch": 0.02,
"learning_rate": 4.907132683247508e-05,
"loss": 0.8366,
"step": 21200
},
{
"epoch": 0.02,
"learning_rate": 4.906694423140134e-05,
"loss": 0.9048,
"step": 21300
},
{
"epoch": 0.02,
"learning_rate": 4.9062561630327604e-05,
"loss": 0.9017,
"step": 21400
},
{
"epoch": 0.02,
"learning_rate": 4.905817902925386e-05,
"loss": 0.8672,
"step": 21500
},
{
"epoch": 0.02,
"learning_rate": 4.9053796428180125e-05,
"loss": 0.8904,
"step": 21600
},
{
"epoch": 0.02,
"learning_rate": 4.904941382710639e-05,
"loss": 0.8486,
"step": 21700
},
{
"epoch": 0.02,
"learning_rate": 4.904503122603265e-05,
"loss": 0.9009,
"step": 21800
},
{
"epoch": 0.02,
"learning_rate": 4.904064862495892e-05,
"loss": 0.8664,
"step": 21900
},
{
"epoch": 0.02,
"learning_rate": 4.903626602388518e-05,
"loss": 0.9054,
"step": 22000
},
{
"epoch": 0.02,
"learning_rate": 4.9031883422811445e-05,
"loss": 0.8422,
"step": 22100
},
{
"epoch": 0.02,
"learning_rate": 4.90275008217377e-05,
"loss": 0.8579,
"step": 22200
},
{
"epoch": 0.02,
"learning_rate": 4.9023118220663966e-05,
"loss": 0.9171,
"step": 22300
},
{
"epoch": 0.02,
"learning_rate": 4.901873561959023e-05,
"loss": 0.863,
"step": 22400
},
{
"epoch": 0.02,
"learning_rate": 4.901435301851649e-05,
"loss": 0.876,
"step": 22500
},
{
"epoch": 0.02,
"learning_rate": 4.900997041744275e-05,
"loss": 0.8192,
"step": 22600
},
{
"epoch": 0.02,
"learning_rate": 4.900558781636902e-05,
"loss": 0.8993,
"step": 22700
},
{
"epoch": 0.02,
"learning_rate": 4.900120521529528e-05,
"loss": 0.8771,
"step": 22800
},
{
"epoch": 0.02,
"learning_rate": 4.899682261422154e-05,
"loss": 0.8902,
"step": 22900
},
{
"epoch": 0.02,
"learning_rate": 4.8992440013147806e-05,
"loss": 0.9054,
"step": 23000
},
{
"epoch": 0.02,
"learning_rate": 4.898805741207407e-05,
"loss": 0.8684,
"step": 23100
},
{
"epoch": 0.02,
"learning_rate": 4.898367481100033e-05,
"loss": 0.8823,
"step": 23200
},
{
"epoch": 0.02,
"learning_rate": 4.897929220992659e-05,
"loss": 0.8566,
"step": 23300
},
{
"epoch": 0.02,
"learning_rate": 4.897490960885286e-05,
"loss": 0.856,
"step": 23400
},
{
"epoch": 0.02,
"learning_rate": 4.897052700777912e-05,
"loss": 0.9151,
"step": 23500
},
{
"epoch": 0.02,
"learning_rate": 4.896614440670538e-05,
"loss": 0.8057,
"step": 23600
},
{
"epoch": 0.02,
"learning_rate": 4.8961805631642386e-05,
"loss": 0.8463,
"step": 23700
},
{
"epoch": 0.02,
"learning_rate": 4.895742303056864e-05,
"loss": 0.8553,
"step": 23800
},
{
"epoch": 0.02,
"learning_rate": 4.8953040429494907e-05,
"loss": 0.8477,
"step": 23900
},
{
"epoch": 0.02,
"learning_rate": 4.894865782842117e-05,
"loss": 0.7987,
"step": 24000
},
{
"epoch": 0.02,
"learning_rate": 4.894427522734743e-05,
"loss": 0.874,
"step": 24100
},
{
"epoch": 0.02,
"learning_rate": 4.89398926262737e-05,
"loss": 0.9251,
"step": 24200
},
{
"epoch": 0.02,
"learning_rate": 4.893551002519996e-05,
"loss": 0.8381,
"step": 24300
},
{
"epoch": 0.02,
"learning_rate": 4.893112742412622e-05,
"loss": 0.8819,
"step": 24400
},
{
"epoch": 0.02,
"learning_rate": 4.892674482305248e-05,
"loss": 0.8195,
"step": 24500
},
{
"epoch": 0.02,
"learning_rate": 4.892236222197875e-05,
"loss": 0.8431,
"step": 24600
},
{
"epoch": 0.02,
"learning_rate": 4.891797962090501e-05,
"loss": 0.8708,
"step": 24700
},
{
"epoch": 0.02,
"learning_rate": 4.891359701983127e-05,
"loss": 0.8316,
"step": 24800
},
{
"epoch": 0.02,
"learning_rate": 4.890921441875754e-05,
"loss": 0.8241,
"step": 24900
},
{
"epoch": 0.02,
"learning_rate": 4.89048318176838e-05,
"loss": 0.8462,
"step": 25000
},
{
"epoch": 0.02,
"learning_rate": 4.890044921661006e-05,
"loss": 0.8586,
"step": 25100
},
{
"epoch": 0.02,
"learning_rate": 4.889606661553632e-05,
"loss": 0.8579,
"step": 25200
},
{
"epoch": 0.02,
"learning_rate": 4.889168401446259e-05,
"loss": 0.8235,
"step": 25300
},
{
"epoch": 0.02,
"learning_rate": 4.8887301413388844e-05,
"loss": 0.8788,
"step": 25400
},
{
"epoch": 0.02,
"learning_rate": 4.888291881231511e-05,
"loss": 0.831,
"step": 25500
},
{
"epoch": 0.02,
"learning_rate": 4.887853621124137e-05,
"loss": 0.8978,
"step": 25600
},
{
"epoch": 0.02,
"learning_rate": 4.8874153610167636e-05,
"loss": 0.8545,
"step": 25700
},
{
"epoch": 0.02,
"learning_rate": 4.88697710090939e-05,
"loss": 0.8262,
"step": 25800
},
{
"epoch": 0.02,
"learning_rate": 4.8865388408020164e-05,
"loss": 0.826,
"step": 25900
},
{
"epoch": 0.02,
"learning_rate": 4.886100580694643e-05,
"loss": 0.8311,
"step": 26000
},
{
"epoch": 0.02,
"learning_rate": 4.8856623205872685e-05,
"loss": 0.8451,
"step": 26100
},
{
"epoch": 0.02,
"learning_rate": 4.885224060479895e-05,
"loss": 0.8688,
"step": 26200
},
{
"epoch": 0.02,
"learning_rate": 4.884785800372521e-05,
"loss": 0.8388,
"step": 26300
},
{
"epoch": 0.02,
"learning_rate": 4.8843475402651476e-05,
"loss": 0.8552,
"step": 26400
},
{
"epoch": 0.02,
"learning_rate": 4.883909280157774e-05,
"loss": 0.8321,
"step": 26500
},
{
"epoch": 0.02,
"learning_rate": 4.8834710200504004e-05,
"loss": 0.8686,
"step": 26600
},
{
"epoch": 0.02,
"learning_rate": 4.883032759943026e-05,
"loss": 0.8,
"step": 26700
},
{
"epoch": 0.02,
"learning_rate": 4.8825944998356525e-05,
"loss": 0.8083,
"step": 26800
},
{
"epoch": 0.02,
"learning_rate": 4.882156239728279e-05,
"loss": 0.8455,
"step": 26900
},
{
"epoch": 0.02,
"learning_rate": 4.881717979620905e-05,
"loss": 0.8525,
"step": 27000
},
{
"epoch": 0.02,
"learning_rate": 4.881279719513531e-05,
"loss": 0.8326,
"step": 27100
},
{
"epoch": 0.02,
"learning_rate": 4.880841459406158e-05,
"loss": 0.8332,
"step": 27200
},
{
"epoch": 0.02,
"learning_rate": 4.8804031992987844e-05,
"loss": 0.8877,
"step": 27300
},
{
"epoch": 0.02,
"learning_rate": 4.87996493919141e-05,
"loss": 0.807,
"step": 27400
},
{
"epoch": 0.02,
"learning_rate": 4.8795266790840365e-05,
"loss": 0.8194,
"step": 27500
},
{
"epoch": 0.02,
"learning_rate": 4.879092801577737e-05,
"loss": 0.8663,
"step": 27600
},
{
"epoch": 0.02,
"learning_rate": 4.8786545414703625e-05,
"loss": 0.8823,
"step": 27700
},
{
"epoch": 0.02,
"learning_rate": 4.878216281362989e-05,
"loss": 0.8301,
"step": 27800
},
{
"epoch": 0.02,
"learning_rate": 4.877778021255616e-05,
"loss": 0.9158,
"step": 27900
},
{
"epoch": 0.02,
"learning_rate": 4.877339761148242e-05,
"loss": 0.8453,
"step": 28000
},
{
"epoch": 0.02,
"learning_rate": 4.876901501040868e-05,
"loss": 0.8206,
"step": 28100
},
{
"epoch": 0.02,
"learning_rate": 4.876467623534568e-05,
"loss": 0.8457,
"step": 28200
},
{
"epoch": 0.02,
"learning_rate": 4.876029363427194e-05,
"loss": 0.8287,
"step": 28300
},
{
"epoch": 0.02,
"learning_rate": 4.8755911033198205e-05,
"loss": 0.8129,
"step": 28400
},
{
"epoch": 0.02,
"learning_rate": 4.875152843212447e-05,
"loss": 0.8421,
"step": 28500
},
{
"epoch": 0.03,
"learning_rate": 4.874714583105073e-05,
"loss": 0.8434,
"step": 28600
},
{
"epoch": 0.03,
"learning_rate": 4.8742763229976996e-05,
"loss": 0.8504,
"step": 28700
},
{
"epoch": 0.03,
"learning_rate": 4.873838062890326e-05,
"loss": 0.8047,
"step": 28800
},
{
"epoch": 0.03,
"learning_rate": 4.873399802782952e-05,
"loss": 0.8734,
"step": 28900
},
{
"epoch": 0.03,
"learning_rate": 4.872961542675578e-05,
"loss": 0.8675,
"step": 29000
},
{
"epoch": 0.03,
"learning_rate": 4.8725232825682045e-05,
"loss": 0.8998,
"step": 29100
},
{
"epoch": 0.03,
"learning_rate": 4.87208502246083e-05,
"loss": 0.8296,
"step": 29200
},
{
"epoch": 0.03,
"learning_rate": 4.8716467623534566e-05,
"loss": 0.8139,
"step": 29300
},
{
"epoch": 0.03,
"learning_rate": 4.871208502246084e-05,
"loss": 0.83,
"step": 29400
},
{
"epoch": 0.03,
"learning_rate": 4.8707702421387094e-05,
"loss": 0.8226,
"step": 29500
},
{
"epoch": 0.03,
"learning_rate": 4.870331982031336e-05,
"loss": 0.7953,
"step": 29600
},
{
"epoch": 0.03,
"learning_rate": 4.869893721923962e-05,
"loss": 0.8594,
"step": 29700
},
{
"epoch": 0.03,
"learning_rate": 4.8694554618165885e-05,
"loss": 0.8271,
"step": 29800
},
{
"epoch": 0.03,
"learning_rate": 4.869017201709214e-05,
"loss": 0.8439,
"step": 29900
},
{
"epoch": 0.03,
"learning_rate": 4.8685789416018406e-05,
"loss": 0.8464,
"step": 30000
},
{
"epoch": 0.03,
"learning_rate": 4.868140681494468e-05,
"loss": 0.8639,
"step": 30100
},
{
"epoch": 0.03,
"learning_rate": 4.8677024213870934e-05,
"loss": 0.7947,
"step": 30200
},
{
"epoch": 0.03,
"learning_rate": 4.86726416127972e-05,
"loss": 0.9023,
"step": 30300
},
{
"epoch": 0.03,
"learning_rate": 4.866825901172346e-05,
"loss": 0.8229,
"step": 30400
},
{
"epoch": 0.03,
"learning_rate": 4.8663876410649726e-05,
"loss": 0.8011,
"step": 30500
},
{
"epoch": 0.03,
"learning_rate": 4.865949380957598e-05,
"loss": 0.8158,
"step": 30600
},
{
"epoch": 0.03,
"learning_rate": 4.865511120850225e-05,
"loss": 0.8365,
"step": 30700
},
{
"epoch": 0.03,
"learning_rate": 4.865072860742851e-05,
"loss": 0.8189,
"step": 30800
},
{
"epoch": 0.03,
"learning_rate": 4.8646346006354774e-05,
"loss": 0.8553,
"step": 30900
},
{
"epoch": 0.03,
"learning_rate": 4.864196340528104e-05,
"loss": 0.8438,
"step": 31000
},
{
"epoch": 0.03,
"learning_rate": 4.86375808042073e-05,
"loss": 0.7937,
"step": 31100
},
{
"epoch": 0.03,
"learning_rate": 4.863319820313356e-05,
"loss": 0.852,
"step": 31200
},
{
"epoch": 0.03,
"learning_rate": 4.862881560205982e-05,
"loss": 0.8729,
"step": 31300
},
{
"epoch": 0.03,
"learning_rate": 4.862443300098609e-05,
"loss": 0.8307,
"step": 31400
},
{
"epoch": 0.03,
"learning_rate": 4.862005039991235e-05,
"loss": 0.8071,
"step": 31500
},
{
"epoch": 0.03,
"learning_rate": 4.8615667798838615e-05,
"loss": 0.7917,
"step": 31600
},
{
"epoch": 0.03,
"learning_rate": 4.861128519776488e-05,
"loss": 0.8596,
"step": 31700
},
{
"epoch": 0.03,
"learning_rate": 4.860690259669114e-05,
"loss": 0.7898,
"step": 31800
},
{
"epoch": 0.03,
"learning_rate": 4.86025199956174e-05,
"loss": 0.7932,
"step": 31900
},
{
"epoch": 0.03,
"learning_rate": 4.8598137394543664e-05,
"loss": 0.8602,
"step": 32000
},
{
"epoch": 0.03,
"learning_rate": 4.859375479346993e-05,
"loss": 0.8325,
"step": 32100
},
{
"epoch": 0.03,
"learning_rate": 4.8589372192396185e-05,
"loss": 0.8609,
"step": 32200
},
{
"epoch": 0.03,
"learning_rate": 4.8585033417333194e-05,
"loss": 0.7698,
"step": 32300
},
{
"epoch": 0.03,
"learning_rate": 4.858065081625945e-05,
"loss": 0.8472,
"step": 32400
},
{
"epoch": 0.03,
"learning_rate": 4.8576268215185715e-05,
"loss": 0.8362,
"step": 32500
},
{
"epoch": 0.03,
"learning_rate": 4.857188561411198e-05,
"loss": 0.8125,
"step": 32600
},
{
"epoch": 0.03,
"learning_rate": 4.856750301303824e-05,
"loss": 0.8368,
"step": 32700
},
{
"epoch": 0.03,
"learning_rate": 4.85631204119645e-05,
"loss": 0.7988,
"step": 32800
},
{
"epoch": 0.03,
"learning_rate": 4.8558737810890764e-05,
"loss": 0.8356,
"step": 32900
},
{
"epoch": 0.03,
"learning_rate": 4.855435520981703e-05,
"loss": 0.8606,
"step": 33000
},
{
"epoch": 0.03,
"learning_rate": 4.854997260874329e-05,
"loss": 0.7586,
"step": 33100
},
{
"epoch": 0.03,
"learning_rate": 4.8545590007669556e-05,
"loss": 0.8198,
"step": 33200
},
{
"epoch": 0.03,
"learning_rate": 4.854120740659582e-05,
"loss": 0.8062,
"step": 33300
},
{
"epoch": 0.03,
"learning_rate": 4.8536824805522077e-05,
"loss": 0.7724,
"step": 33400
},
{
"epoch": 0.03,
"learning_rate": 4.853244220444834e-05,
"loss": 0.833,
"step": 33500
},
{
"epoch": 0.03,
"learning_rate": 4.8528059603374604e-05,
"loss": 0.7965,
"step": 33600
},
{
"epoch": 0.03,
"learning_rate": 4.852367700230087e-05,
"loss": 0.7809,
"step": 33700
},
{
"epoch": 0.03,
"learning_rate": 4.851929440122713e-05,
"loss": 0.7981,
"step": 33800
},
{
"epoch": 0.03,
"learning_rate": 4.8514911800153396e-05,
"loss": 0.7943,
"step": 33900
},
{
"epoch": 0.03,
"learning_rate": 4.851052919907966e-05,
"loss": 0.7953,
"step": 34000
},
{
"epoch": 0.03,
"learning_rate": 4.850614659800592e-05,
"loss": 0.7816,
"step": 34100
},
{
"epoch": 0.03,
"learning_rate": 4.850176399693218e-05,
"loss": 0.8317,
"step": 34200
},
{
"epoch": 0.03,
"learning_rate": 4.8497381395858445e-05,
"loss": 0.8589,
"step": 34300
},
{
"epoch": 0.03,
"learning_rate": 4.849304262079544e-05,
"loss": 0.8085,
"step": 34400
},
{
"epoch": 0.03,
"learning_rate": 4.8488660019721705e-05,
"loss": 0.8092,
"step": 34500
},
{
"epoch": 0.03,
"learning_rate": 4.8484277418647975e-05,
"loss": 0.8121,
"step": 34600
},
{
"epoch": 0.03,
"learning_rate": 4.847989481757423e-05,
"loss": 0.7966,
"step": 34700
},
{
"epoch": 0.03,
"learning_rate": 4.8475512216500496e-05,
"loss": 0.8091,
"step": 34800
},
{
"epoch": 0.03,
"learning_rate": 4.847112961542676e-05,
"loss": 0.7875,
"step": 34900
},
{
"epoch": 0.03,
"learning_rate": 4.846674701435302e-05,
"loss": 0.7973,
"step": 35000
},
{
"epoch": 0.03,
"learning_rate": 4.846236441327928e-05,
"loss": 0.8513,
"step": 35100
},
{
"epoch": 0.03,
"learning_rate": 4.8457981812205545e-05,
"loss": 0.826,
"step": 35200
},
{
"epoch": 0.03,
"learning_rate": 4.845359921113181e-05,
"loss": 0.7278,
"step": 35300
},
{
"epoch": 0.03,
"learning_rate": 4.844921661005807e-05,
"loss": 0.7903,
"step": 35400
},
{
"epoch": 0.03,
"learning_rate": 4.8444834008984337e-05,
"loss": 0.8301,
"step": 35500
},
{
"epoch": 0.03,
"learning_rate": 4.84404514079106e-05,
"loss": 0.7878,
"step": 35600
},
{
"epoch": 0.03,
"learning_rate": 4.843606880683686e-05,
"loss": 0.8124,
"step": 35700
},
{
"epoch": 0.03,
"learning_rate": 4.843168620576312e-05,
"loss": 0.8015,
"step": 35800
},
{
"epoch": 0.03,
"learning_rate": 4.8427303604689385e-05,
"loss": 0.7861,
"step": 35900
},
{
"epoch": 0.03,
"learning_rate": 4.842292100361564e-05,
"loss": 0.7964,
"step": 36000
},
{
"epoch": 0.03,
"learning_rate": 4.841853840254191e-05,
"loss": 0.8175,
"step": 36100
},
{
"epoch": 0.03,
"learning_rate": 4.841415580146818e-05,
"loss": 0.8299,
"step": 36200
},
{
"epoch": 0.03,
"learning_rate": 4.8409773200394434e-05,
"loss": 0.8111,
"step": 36300
},
{
"epoch": 0.03,
"learning_rate": 4.84053905993207e-05,
"loss": 0.8189,
"step": 36400
},
{
"epoch": 0.03,
"learning_rate": 4.840100799824696e-05,
"loss": 0.8433,
"step": 36500
},
{
"epoch": 0.03,
"learning_rate": 4.8396625397173226e-05,
"loss": 0.8442,
"step": 36600
},
{
"epoch": 0.03,
"learning_rate": 4.839224279609948e-05,
"loss": 0.8015,
"step": 36700
},
{
"epoch": 0.03,
"learning_rate": 4.8387860195025753e-05,
"loss": 0.8183,
"step": 36800
},
{
"epoch": 0.03,
"learning_rate": 4.838347759395202e-05,
"loss": 0.8216,
"step": 36900
},
{
"epoch": 0.03,
"learning_rate": 4.8379094992878274e-05,
"loss": 0.8307,
"step": 37000
},
{
"epoch": 0.03,
"learning_rate": 4.837471239180454e-05,
"loss": 0.8289,
"step": 37100
},
{
"epoch": 0.03,
"learning_rate": 4.83703297907308e-05,
"loss": 0.7687,
"step": 37200
},
{
"epoch": 0.03,
"learning_rate": 4.8365947189657066e-05,
"loss": 0.8037,
"step": 37300
},
{
"epoch": 0.03,
"learning_rate": 4.836156458858332e-05,
"loss": 0.8104,
"step": 37400
},
{
"epoch": 0.03,
"learning_rate": 4.835718198750959e-05,
"loss": 0.8128,
"step": 37500
},
{
"epoch": 0.03,
"learning_rate": 4.835279938643585e-05,
"loss": 0.8136,
"step": 37600
},
{
"epoch": 0.03,
"learning_rate": 4.8348416785362115e-05,
"loss": 0.7878,
"step": 37700
},
{
"epoch": 0.03,
"learning_rate": 4.834403418428838e-05,
"loss": 0.8161,
"step": 37800
},
{
"epoch": 0.03,
"learning_rate": 4.833965158321464e-05,
"loss": 0.7801,
"step": 37900
},
{
"epoch": 0.03,
"learning_rate": 4.83352689821409e-05,
"loss": 0.7813,
"step": 38000
},
{
"epoch": 0.03,
"learning_rate": 4.8330886381067163e-05,
"loss": 0.7949,
"step": 38100
},
{
"epoch": 0.03,
"learning_rate": 4.8326547606004166e-05,
"loss": 0.8184,
"step": 38200
},
{
"epoch": 0.03,
"learning_rate": 4.832216500493043e-05,
"loss": 0.7786,
"step": 38300
},
{
"epoch": 0.03,
"learning_rate": 4.8317782403856694e-05,
"loss": 0.7931,
"step": 38400
},
{
"epoch": 0.03,
"learning_rate": 4.831339980278296e-05,
"loss": 0.8074,
"step": 38500
},
{
"epoch": 0.03,
"learning_rate": 4.8309017201709215e-05,
"loss": 0.8203,
"step": 38600
},
{
"epoch": 0.03,
"learning_rate": 4.830463460063548e-05,
"loss": 0.8012,
"step": 38700
},
{
"epoch": 0.03,
"learning_rate": 4.830025199956174e-05,
"loss": 0.7963,
"step": 38800
},
{
"epoch": 0.03,
"learning_rate": 4.8295869398488e-05,
"loss": 0.7476,
"step": 38900
},
{
"epoch": 0.03,
"learning_rate": 4.829148679741427e-05,
"loss": 0.7956,
"step": 39000
},
{
"epoch": 0.03,
"learning_rate": 4.8287104196340534e-05,
"loss": 0.7766,
"step": 39100
},
{
"epoch": 0.03,
"learning_rate": 4.828272159526679e-05,
"loss": 0.7682,
"step": 39200
},
{
"epoch": 0.03,
"learning_rate": 4.8278338994193055e-05,
"loss": 0.8261,
"step": 39300
},
{
"epoch": 0.03,
"learning_rate": 4.827395639311932e-05,
"loss": 0.8424,
"step": 39400
},
{
"epoch": 0.03,
"learning_rate": 4.826957379204558e-05,
"loss": 0.8633,
"step": 39500
},
{
"epoch": 0.03,
"learning_rate": 4.826519119097184e-05,
"loss": 0.7965,
"step": 39600
},
{
"epoch": 0.03,
"learning_rate": 4.8260808589898104e-05,
"loss": 0.8124,
"step": 39700
},
{
"epoch": 0.03,
"learning_rate": 4.8256425988824375e-05,
"loss": 0.8227,
"step": 39800
},
{
"epoch": 0.03,
"learning_rate": 4.825204338775063e-05,
"loss": 0.7894,
"step": 39900
},
{
"epoch": 0.04,
"learning_rate": 4.8247660786676896e-05,
"loss": 0.7997,
"step": 40000
},
{
"epoch": 0.04,
"learning_rate": 4.824327818560316e-05,
"loss": 0.7992,
"step": 40100
},
{
"epoch": 0.04,
"learning_rate": 4.823889558452942e-05,
"loss": 0.7851,
"step": 40200
},
{
"epoch": 0.04,
"learning_rate": 4.823451298345568e-05,
"loss": 0.7955,
"step": 40300
},
{
"epoch": 0.04,
"learning_rate": 4.8230174208392684e-05,
"loss": 0.8048,
"step": 40400
},
{
"epoch": 0.04,
"learning_rate": 4.822579160731895e-05,
"loss": 0.7969,
"step": 40500
},
{
"epoch": 0.04,
"learning_rate": 4.822140900624521e-05,
"loss": 0.7691,
"step": 40600
},
{
"epoch": 0.04,
"learning_rate": 4.8217026405171475e-05,
"loss": 0.7473,
"step": 40700
},
{
"epoch": 0.04,
"learning_rate": 4.821264380409773e-05,
"loss": 0.8393,
"step": 40800
},
{
"epoch": 0.04,
"learning_rate": 4.8208261203023996e-05,
"loss": 0.8068,
"step": 40900
},
{
"epoch": 0.04,
"learning_rate": 4.820387860195026e-05,
"loss": 0.7939,
"step": 41000
},
{
"epoch": 0.04,
"learning_rate": 4.8199496000876524e-05,
"loss": 0.7244,
"step": 41100
},
{
"epoch": 0.04,
"learning_rate": 4.819511339980278e-05,
"loss": 0.7812,
"step": 41200
},
{
"epoch": 0.04,
"learning_rate": 4.819073079872905e-05,
"loss": 0.8116,
"step": 41300
},
{
"epoch": 0.04,
"learning_rate": 4.8186348197655316e-05,
"loss": 0.8082,
"step": 41400
},
{
"epoch": 0.04,
"learning_rate": 4.818196559658157e-05,
"loss": 0.7542,
"step": 41500
},
{
"epoch": 0.04,
"learning_rate": 4.8177582995507836e-05,
"loss": 0.8032,
"step": 41600
},
{
"epoch": 0.04,
"learning_rate": 4.81732003944341e-05,
"loss": 0.7832,
"step": 41700
},
{
"epoch": 0.04,
"learning_rate": 4.816881779336036e-05,
"loss": 0.8094,
"step": 41800
},
{
"epoch": 0.04,
"learning_rate": 4.816443519228662e-05,
"loss": 0.7754,
"step": 41900
},
{
"epoch": 0.04,
"learning_rate": 4.816005259121289e-05,
"loss": 0.7744,
"step": 42000
},
{
"epoch": 0.04,
"learning_rate": 4.815566999013915e-05,
"loss": 0.7838,
"step": 42100
},
{
"epoch": 0.04,
"learning_rate": 4.815128738906541e-05,
"loss": 0.7864,
"step": 42200
},
{
"epoch": 0.04,
"learning_rate": 4.814690478799168e-05,
"loss": 0.7357,
"step": 42300
},
{
"epoch": 0.04,
"learning_rate": 4.814252218691794e-05,
"loss": 0.8131,
"step": 42400
},
{
"epoch": 0.04,
"learning_rate": 4.81381395858442e-05,
"loss": 0.7851,
"step": 42500
},
{
"epoch": 0.04,
"learning_rate": 4.813375698477046e-05,
"loss": 0.7482,
"step": 42600
},
{
"epoch": 0.04,
"learning_rate": 4.8129374383696726e-05,
"loss": 0.7435,
"step": 42700
},
{
"epoch": 0.04,
"learning_rate": 4.812499178262299e-05,
"loss": 0.7796,
"step": 42800
},
{
"epoch": 0.04,
"learning_rate": 4.812060918154925e-05,
"loss": 0.7952,
"step": 42900
},
{
"epoch": 0.04,
"learning_rate": 4.811622658047552e-05,
"loss": 0.7713,
"step": 43000
},
{
"epoch": 0.04,
"learning_rate": 4.8111843979401774e-05,
"loss": 0.7929,
"step": 43100
},
{
"epoch": 0.04,
"learning_rate": 4.810746137832804e-05,
"loss": 0.8022,
"step": 43200
},
{
"epoch": 0.04,
"learning_rate": 4.81030787772543e-05,
"loss": 0.786,
"step": 43300
},
{
"epoch": 0.04,
"learning_rate": 4.8098696176180566e-05,
"loss": 0.8356,
"step": 43400
},
{
"epoch": 0.04,
"learning_rate": 4.809431357510683e-05,
"loss": 0.8147,
"step": 43500
},
{
"epoch": 0.04,
"learning_rate": 4.8089930974033094e-05,
"loss": 0.7672,
"step": 43600
},
{
"epoch": 0.04,
"learning_rate": 4.808554837295936e-05,
"loss": 0.7966,
"step": 43700
},
{
"epoch": 0.04,
"learning_rate": 4.8081165771885615e-05,
"loss": 0.7498,
"step": 43800
},
{
"epoch": 0.04,
"learning_rate": 4.807678317081188e-05,
"loss": 0.8215,
"step": 43900
},
{
"epoch": 0.04,
"learning_rate": 4.807240056973814e-05,
"loss": 0.7742,
"step": 44000
},
{
"epoch": 0.04,
"learning_rate": 4.80680179686644e-05,
"loss": 0.7749,
"step": 44100
},
{
"epoch": 0.04,
"learning_rate": 4.806363536759066e-05,
"loss": 0.8101,
"step": 44200
},
{
"epoch": 0.04,
"learning_rate": 4.8059252766516934e-05,
"loss": 0.781,
"step": 44300
},
{
"epoch": 0.04,
"learning_rate": 4.805487016544319e-05,
"loss": 0.7469,
"step": 44400
},
{
"epoch": 0.04,
"learning_rate": 4.8050531390380194e-05,
"loss": 0.8202,
"step": 44500
},
{
"epoch": 0.04,
"learning_rate": 4.804614878930646e-05,
"loss": 0.7887,
"step": 44600
},
{
"epoch": 0.04,
"learning_rate": 4.8041766188232715e-05,
"loss": 0.794,
"step": 44700
},
{
"epoch": 0.04,
"learning_rate": 4.803738358715898e-05,
"loss": 0.8125,
"step": 44800
},
{
"epoch": 0.04,
"learning_rate": 4.803300098608524e-05,
"loss": 0.7705,
"step": 44900
},
{
"epoch": 0.04,
"learning_rate": 4.8028618385011507e-05,
"loss": 0.8103,
"step": 45000
},
{
"epoch": 0.04,
"learning_rate": 4.802423578393777e-05,
"loss": 0.7986,
"step": 45100
},
{
"epoch": 0.04,
"learning_rate": 4.8019853182864034e-05,
"loss": 0.7652,
"step": 45200
},
{
"epoch": 0.04,
"learning_rate": 4.80154705817903e-05,
"loss": 0.7579,
"step": 45300
},
{
"epoch": 0.04,
"learning_rate": 4.8011087980716555e-05,
"loss": 0.8024,
"step": 45400
},
{
"epoch": 0.04,
"learning_rate": 4.800670537964282e-05,
"loss": 0.7476,
"step": 45500
},
{
"epoch": 0.04,
"learning_rate": 4.800232277856908e-05,
"loss": 0.733,
"step": 45600
},
{
"epoch": 0.04,
"learning_rate": 4.799794017749535e-05,
"loss": 0.7761,
"step": 45700
},
{
"epoch": 0.04,
"learning_rate": 4.799355757642161e-05,
"loss": 0.7532,
"step": 45800
},
{
"epoch": 0.04,
"learning_rate": 4.7989174975347875e-05,
"loss": 0.8413,
"step": 45900
},
{
"epoch": 0.04,
"learning_rate": 4.798479237427413e-05,
"loss": 0.8088,
"step": 46000
},
{
"epoch": 0.04,
"learning_rate": 4.7980453599211135e-05,
"loss": 0.7517,
"step": 46100
},
{
"epoch": 0.04,
"learning_rate": 4.79760709981374e-05,
"loss": 0.7889,
"step": 46200
},
{
"epoch": 0.04,
"learning_rate": 4.7971688397063656e-05,
"loss": 0.793,
"step": 46300
},
{
"epoch": 0.04,
"learning_rate": 4.796730579598992e-05,
"loss": 0.8077,
"step": 46400
},
{
"epoch": 0.04,
"learning_rate": 4.796292319491619e-05,
"loss": 0.7563,
"step": 46500
},
{
"epoch": 0.04,
"learning_rate": 4.795854059384245e-05,
"loss": 0.7346,
"step": 46600
},
{
"epoch": 0.04,
"learning_rate": 4.795415799276871e-05,
"loss": 0.7577,
"step": 46700
},
{
"epoch": 0.04,
"learning_rate": 4.7949775391694975e-05,
"loss": 0.7868,
"step": 46800
},
{
"epoch": 0.04,
"learning_rate": 4.794539279062123e-05,
"loss": 0.7709,
"step": 46900
},
{
"epoch": 0.04,
"learning_rate": 4.7941010189547496e-05,
"loss": 0.7339,
"step": 47000
},
{
"epoch": 0.04,
"learning_rate": 4.793662758847376e-05,
"loss": 0.7477,
"step": 47100
},
{
"epoch": 0.04,
"learning_rate": 4.7932244987400024e-05,
"loss": 0.7336,
"step": 47200
},
{
"epoch": 0.04,
"learning_rate": 4.792786238632629e-05,
"loss": 0.7679,
"step": 47300
},
{
"epoch": 0.04,
"learning_rate": 4.792347978525255e-05,
"loss": 0.7354,
"step": 47400
},
{
"epoch": 0.04,
"learning_rate": 4.7919097184178815e-05,
"loss": 0.8235,
"step": 47500
},
{
"epoch": 0.04,
"learning_rate": 4.791471458310507e-05,
"loss": 0.7937,
"step": 47600
},
{
"epoch": 0.04,
"learning_rate": 4.7910331982031336e-05,
"loss": 0.7564,
"step": 47700
},
{
"epoch": 0.04,
"learning_rate": 4.79059493809576e-05,
"loss": 0.7613,
"step": 47800
},
{
"epoch": 0.04,
"learning_rate": 4.7901566779883864e-05,
"loss": 0.8323,
"step": 47900
},
{
"epoch": 0.04,
"learning_rate": 4.789718417881013e-05,
"loss": 0.7595,
"step": 48000
},
{
"epoch": 0.04,
"learning_rate": 4.789280157773639e-05,
"loss": 0.7905,
"step": 48100
},
{
"epoch": 0.04,
"learning_rate": 4.788841897666265e-05,
"loss": 0.7947,
"step": 48200
},
{
"epoch": 0.04,
"learning_rate": 4.788403637558891e-05,
"loss": 0.7528,
"step": 48300
},
{
"epoch": 0.04,
"learning_rate": 4.787965377451518e-05,
"loss": 0.7599,
"step": 48400
},
{
"epoch": 0.04,
"learning_rate": 4.787527117344144e-05,
"loss": 0.8073,
"step": 48500
},
{
"epoch": 0.04,
"learning_rate": 4.78708885723677e-05,
"loss": 0.763,
"step": 48600
},
{
"epoch": 0.04,
"learning_rate": 4.786650597129397e-05,
"loss": 0.7329,
"step": 48700
},
{
"epoch": 0.04,
"learning_rate": 4.786212337022023e-05,
"loss": 0.7333,
"step": 48800
},
{
"epoch": 0.04,
"learning_rate": 4.785774076914649e-05,
"loss": 0.8079,
"step": 48900
},
{
"epoch": 0.04,
"learning_rate": 4.785335816807275e-05,
"loss": 0.7613,
"step": 49000
},
{
"epoch": 0.04,
"learning_rate": 4.784897556699902e-05,
"loss": 0.7352,
"step": 49100
},
{
"epoch": 0.04,
"learning_rate": 4.784459296592528e-05,
"loss": 0.7474,
"step": 49200
},
{
"epoch": 0.04,
"learning_rate": 4.784021036485154e-05,
"loss": 0.7597,
"step": 49300
},
{
"epoch": 0.04,
"learning_rate": 4.78358277637778e-05,
"loss": 0.751,
"step": 49400
},
{
"epoch": 0.04,
"learning_rate": 4.783144516270407e-05,
"loss": 0.7342,
"step": 49500
},
{
"epoch": 0.04,
"learning_rate": 4.782706256163033e-05,
"loss": 0.7375,
"step": 49600
},
{
"epoch": 0.04,
"learning_rate": 4.7822679960556594e-05,
"loss": 0.7683,
"step": 49700
},
{
"epoch": 0.04,
"learning_rate": 4.781829735948286e-05,
"loss": 0.7944,
"step": 49800
},
{
"epoch": 0.04,
"learning_rate": 4.7813914758409114e-05,
"loss": 0.7641,
"step": 49900
},
{
"epoch": 0.04,
"learning_rate": 4.780953215733538e-05,
"loss": 0.7704,
"step": 50000
},
{
"epoch": 0.04,
"eval_loss": 0.7738358378410339,
"eval_runtime": 79510.7981,
"eval_samples_per_second": 6.398,
"eval_steps_per_second": 3.199,
"step": 50000
},
{
"epoch": 0.04,
"learning_rate": 4.780514955626164e-05,
"loss": 0.7621,
"step": 50100
},
{
"epoch": 0.04,
"learning_rate": 4.7800766955187906e-05,
"loss": 0.7515,
"step": 50200
},
{
"epoch": 0.04,
"learning_rate": 4.779638435411417e-05,
"loss": 0.7785,
"step": 50300
},
{
"epoch": 0.04,
"learning_rate": 4.7792001753040434e-05,
"loss": 0.7162,
"step": 50400
},
{
"epoch": 0.04,
"learning_rate": 4.77876191519667e-05,
"loss": 0.8159,
"step": 50500
},
{
"epoch": 0.04,
"learning_rate": 4.7783236550892955e-05,
"loss": 0.7779,
"step": 50600
},
{
"epoch": 0.04,
"learning_rate": 4.777885394981922e-05,
"loss": 0.7708,
"step": 50700
},
{
"epoch": 0.04,
"learning_rate": 4.777447134874548e-05,
"loss": 0.7731,
"step": 50800
},
{
"epoch": 0.04,
"learning_rate": 4.777008874767174e-05,
"loss": 0.7875,
"step": 50900
},
{
"epoch": 0.04,
"learning_rate": 4.776570614659801e-05,
"loss": 0.7395,
"step": 51000
},
{
"epoch": 0.04,
"learning_rate": 4.7761323545524274e-05,
"loss": 0.7735,
"step": 51100
},
{
"epoch": 0.04,
"learning_rate": 4.775694094445053e-05,
"loss": 0.7586,
"step": 51200
},
{
"epoch": 0.04,
"learning_rate": 4.7752558343376795e-05,
"loss": 0.7871,
"step": 51300
},
{
"epoch": 0.05,
"learning_rate": 4.774817574230306e-05,
"loss": 0.7513,
"step": 51400
},
{
"epoch": 0.05,
"learning_rate": 4.774379314122932e-05,
"loss": 0.7893,
"step": 51500
},
{
"epoch": 0.05,
"learning_rate": 4.773941054015558e-05,
"loss": 0.7829,
"step": 51600
},
{
"epoch": 0.05,
"learning_rate": 4.773502793908185e-05,
"loss": 0.7594,
"step": 51700
},
{
"epoch": 0.05,
"learning_rate": 4.7730645338008115e-05,
"loss": 0.7645,
"step": 51800
},
{
"epoch": 0.05,
"learning_rate": 4.772626273693437e-05,
"loss": 0.7781,
"step": 51900
},
{
"epoch": 0.05,
"learning_rate": 4.7721880135860636e-05,
"loss": 0.7786,
"step": 52000
},
{
"epoch": 0.05,
"learning_rate": 4.771754136079764e-05,
"loss": 0.7585,
"step": 52100
},
{
"epoch": 0.05,
"learning_rate": 4.7713158759723896e-05,
"loss": 0.7763,
"step": 52200
},
{
"epoch": 0.05,
"learning_rate": 4.770877615865016e-05,
"loss": 0.7818,
"step": 52300
},
{
"epoch": 0.05,
"learning_rate": 4.770443738358716e-05,
"loss": 0.7509,
"step": 52400
},
{
"epoch": 0.05,
"learning_rate": 4.7700054782513426e-05,
"loss": 0.7659,
"step": 52500
},
{
"epoch": 0.05,
"learning_rate": 4.769567218143969e-05,
"loss": 0.7889,
"step": 52600
},
{
"epoch": 0.05,
"learning_rate": 4.769128958036595e-05,
"loss": 0.7128,
"step": 52700
},
{
"epoch": 0.05,
"learning_rate": 4.768690697929221e-05,
"loss": 0.815,
"step": 52800
},
{
"epoch": 0.05,
"learning_rate": 4.7682524378218475e-05,
"loss": 0.7634,
"step": 52900
},
{
"epoch": 0.05,
"learning_rate": 4.767814177714474e-05,
"loss": 0.7788,
"step": 53000
},
{
"epoch": 0.05,
"learning_rate": 4.7673759176070996e-05,
"loss": 0.7781,
"step": 53100
},
{
"epoch": 0.05,
"learning_rate": 4.7669376574997267e-05,
"loss": 0.7423,
"step": 53200
},
{
"epoch": 0.05,
"learning_rate": 4.766499397392353e-05,
"loss": 0.733,
"step": 53300
},
{
"epoch": 0.05,
"learning_rate": 4.766061137284979e-05,
"loss": 0.7859,
"step": 53400
},
{
"epoch": 0.05,
"learning_rate": 4.765622877177605e-05,
"loss": 0.7551,
"step": 53500
},
{
"epoch": 0.05,
"learning_rate": 4.7651846170702315e-05,
"loss": 0.8219,
"step": 53600
},
{
"epoch": 0.05,
"learning_rate": 4.764746356962857e-05,
"loss": 0.7225,
"step": 53700
},
{
"epoch": 0.05,
"learning_rate": 4.7643080968554836e-05,
"loss": 0.7389,
"step": 53800
},
{
"epoch": 0.05,
"learning_rate": 4.763869836748111e-05,
"loss": 0.7525,
"step": 53900
},
{
"epoch": 0.05,
"learning_rate": 4.7634315766407364e-05,
"loss": 0.8057,
"step": 54000
},
{
"epoch": 0.05,
"learning_rate": 4.762993316533363e-05,
"loss": 0.7388,
"step": 54100
},
{
"epoch": 0.05,
"learning_rate": 4.762555056425989e-05,
"loss": 0.7811,
"step": 54200
},
{
"epoch": 0.05,
"learning_rate": 4.7621167963186156e-05,
"loss": 0.7686,
"step": 54300
},
{
"epoch": 0.05,
"learning_rate": 4.761678536211241e-05,
"loss": 0.7874,
"step": 54400
},
{
"epoch": 0.05,
"learning_rate": 4.7612402761038677e-05,
"loss": 0.7815,
"step": 54500
},
{
"epoch": 0.05,
"learning_rate": 4.760802015996494e-05,
"loss": 0.742,
"step": 54600
},
{
"epoch": 0.05,
"learning_rate": 4.7603637558891204e-05,
"loss": 0.7245,
"step": 54700
},
{
"epoch": 0.05,
"learning_rate": 4.759925495781747e-05,
"loss": 0.7406,
"step": 54800
},
{
"epoch": 0.05,
"learning_rate": 4.759487235674373e-05,
"loss": 0.7995,
"step": 54900
},
{
"epoch": 0.05,
"learning_rate": 4.759048975566999e-05,
"loss": 0.7812,
"step": 55000
},
{
"epoch": 0.05,
"learning_rate": 4.758610715459625e-05,
"loss": 0.7596,
"step": 55100
},
{
"epoch": 0.05,
"learning_rate": 4.758172455352252e-05,
"loss": 0.8012,
"step": 55200
},
{
"epoch": 0.05,
"learning_rate": 4.757734195244878e-05,
"loss": 0.7518,
"step": 55300
},
{
"epoch": 0.05,
"learning_rate": 4.7572959351375045e-05,
"loss": 0.766,
"step": 55400
},
{
"epoch": 0.05,
"learning_rate": 4.756857675030131e-05,
"loss": 0.7832,
"step": 55500
},
{
"epoch": 0.05,
"learning_rate": 4.756419414922757e-05,
"loss": 0.8165,
"step": 55600
},
{
"epoch": 0.05,
"learning_rate": 4.755981154815383e-05,
"loss": 0.7623,
"step": 55700
},
{
"epoch": 0.05,
"learning_rate": 4.7555428947080093e-05,
"loss": 0.766,
"step": 55800
},
{
"epoch": 0.05,
"learning_rate": 4.755104634600636e-05,
"loss": 0.7707,
"step": 55900
},
{
"epoch": 0.05,
"learning_rate": 4.754666374493262e-05,
"loss": 0.7161,
"step": 56000
},
{
"epoch": 0.05,
"learning_rate": 4.754228114385888e-05,
"loss": 0.7701,
"step": 56100
},
{
"epoch": 0.05,
"learning_rate": 4.753789854278515e-05,
"loss": 0.8108,
"step": 56200
},
{
"epoch": 0.05,
"learning_rate": 4.753351594171141e-05,
"loss": 0.7544,
"step": 56300
},
{
"epoch": 0.05,
"learning_rate": 4.752913334063767e-05,
"loss": 0.7288,
"step": 56400
},
{
"epoch": 0.05,
"learning_rate": 4.752479456557467e-05,
"loss": 0.7159,
"step": 56500
},
{
"epoch": 0.05,
"learning_rate": 4.752041196450093e-05,
"loss": 0.7498,
"step": 56600
},
{
"epoch": 0.05,
"learning_rate": 4.7516029363427194e-05,
"loss": 0.7357,
"step": 56700
},
{
"epoch": 0.05,
"learning_rate": 4.751164676235346e-05,
"loss": 0.7562,
"step": 56800
},
{
"epoch": 0.05,
"learning_rate": 4.750726416127972e-05,
"loss": 0.8008,
"step": 56900
},
{
"epoch": 0.05,
"learning_rate": 4.7502881560205985e-05,
"loss": 0.7223,
"step": 57000
},
{
"epoch": 0.05,
"learning_rate": 4.749849895913225e-05,
"loss": 0.7899,
"step": 57100
},
{
"epoch": 0.05,
"learning_rate": 4.749411635805851e-05,
"loss": 0.758,
"step": 57200
},
{
"epoch": 0.05,
"learning_rate": 4.748973375698477e-05,
"loss": 0.7737,
"step": 57300
},
{
"epoch": 0.05,
"learning_rate": 4.7485351155911034e-05,
"loss": 0.7913,
"step": 57400
},
{
"epoch": 0.05,
"learning_rate": 4.74809685548373e-05,
"loss": 0.7597,
"step": 57500
},
{
"epoch": 0.05,
"learning_rate": 4.7476585953763555e-05,
"loss": 0.7742,
"step": 57600
},
{
"epoch": 0.05,
"learning_rate": 4.7472247178700565e-05,
"loss": 0.7813,
"step": 57700
},
{
"epoch": 0.05,
"learning_rate": 4.746786457762682e-05,
"loss": 0.742,
"step": 57800
},
{
"epoch": 0.05,
"learning_rate": 4.7463481976553086e-05,
"loss": 0.7631,
"step": 57900
},
{
"epoch": 0.05,
"learning_rate": 4.745909937547935e-05,
"loss": 0.7224,
"step": 58000
},
{
"epoch": 0.05,
"learning_rate": 4.7454716774405614e-05,
"loss": 0.7356,
"step": 58100
},
{
"epoch": 0.05,
"learning_rate": 4.745033417333187e-05,
"loss": 0.7337,
"step": 58200
},
{
"epoch": 0.05,
"learning_rate": 4.7445951572258134e-05,
"loss": 0.7632,
"step": 58300
},
{
"epoch": 0.05,
"learning_rate": 4.7441568971184405e-05,
"loss": 0.7116,
"step": 58400
},
{
"epoch": 0.05,
"learning_rate": 4.743718637011066e-05,
"loss": 0.8256,
"step": 58500
},
{
"epoch": 0.05,
"learning_rate": 4.7432803769036926e-05,
"loss": 0.74,
"step": 58600
},
{
"epoch": 0.05,
"learning_rate": 4.742842116796319e-05,
"loss": 0.7193,
"step": 58700
},
{
"epoch": 0.05,
"learning_rate": 4.7424038566889454e-05,
"loss": 0.6876,
"step": 58800
},
{
"epoch": 0.05,
"learning_rate": 4.741965596581571e-05,
"loss": 0.752,
"step": 58900
},
{
"epoch": 0.05,
"learning_rate": 4.7415273364741975e-05,
"loss": 0.7214,
"step": 59000
},
{
"epoch": 0.05,
"learning_rate": 4.741089076366824e-05,
"loss": 0.7663,
"step": 59100
},
{
"epoch": 0.05,
"learning_rate": 4.74065081625945e-05,
"loss": 0.6977,
"step": 59200
},
{
"epoch": 0.05,
"learning_rate": 4.7402125561520766e-05,
"loss": 0.7689,
"step": 59300
},
{
"epoch": 0.05,
"learning_rate": 4.739774296044703e-05,
"loss": 0.7537,
"step": 59400
},
{
"epoch": 0.05,
"learning_rate": 4.739336035937329e-05,
"loss": 0.7406,
"step": 59500
},
{
"epoch": 0.05,
"learning_rate": 4.738897775829955e-05,
"loss": 0.7167,
"step": 59600
},
{
"epoch": 0.05,
"learning_rate": 4.7384595157225815e-05,
"loss": 0.7397,
"step": 59700
},
{
"epoch": 0.05,
"learning_rate": 4.738021255615208e-05,
"loss": 0.7276,
"step": 59800
},
{
"epoch": 0.05,
"learning_rate": 4.737582995507834e-05,
"loss": 0.7912,
"step": 59900
},
{
"epoch": 0.05,
"learning_rate": 4.737144735400461e-05,
"loss": 0.7575,
"step": 60000
},
{
"epoch": 0.05,
"learning_rate": 4.736706475293087e-05,
"loss": 0.8229,
"step": 60100
},
{
"epoch": 0.05,
"learning_rate": 4.736268215185713e-05,
"loss": 0.789,
"step": 60200
},
{
"epoch": 0.05,
"learning_rate": 4.735834337679413e-05,
"loss": 0.7348,
"step": 60300
},
{
"epoch": 0.05,
"learning_rate": 4.735396077572039e-05,
"loss": 0.7947,
"step": 60400
},
{
"epoch": 0.05,
"learning_rate": 4.734957817464665e-05,
"loss": 0.7363,
"step": 60500
},
{
"epoch": 0.05,
"learning_rate": 4.734519557357292e-05,
"loss": 0.7129,
"step": 60600
},
{
"epoch": 0.05,
"learning_rate": 4.734081297249918e-05,
"loss": 0.7337,
"step": 60700
},
{
"epoch": 0.05,
"learning_rate": 4.733643037142544e-05,
"loss": 0.7345,
"step": 60800
},
{
"epoch": 0.05,
"learning_rate": 4.733204777035171e-05,
"loss": 0.683,
"step": 60900
},
{
"epoch": 0.05,
"learning_rate": 4.732766516927797e-05,
"loss": 0.7007,
"step": 61000
},
{
"epoch": 0.05,
"learning_rate": 4.732328256820423e-05,
"loss": 0.7743,
"step": 61100
},
{
"epoch": 0.05,
"learning_rate": 4.731889996713049e-05,
"loss": 0.7295,
"step": 61200
},
{
"epoch": 0.05,
"learning_rate": 4.7314517366056756e-05,
"loss": 0.745,
"step": 61300
},
{
"epoch": 0.05,
"learning_rate": 4.731013476498302e-05,
"loss": 0.7281,
"step": 61400
},
{
"epoch": 0.05,
"learning_rate": 4.7305752163909284e-05,
"loss": 0.7476,
"step": 61500
},
{
"epoch": 0.05,
"learning_rate": 4.730136956283555e-05,
"loss": 0.7481,
"step": 61600
},
{
"epoch": 0.05,
"learning_rate": 4.7296986961761805e-05,
"loss": 0.757,
"step": 61700
},
{
"epoch": 0.05,
"learning_rate": 4.729260436068807e-05,
"loss": 0.6987,
"step": 61800
},
{
"epoch": 0.05,
"learning_rate": 4.728822175961433e-05,
"loss": 0.7239,
"step": 61900
},
{
"epoch": 0.05,
"learning_rate": 4.7283839158540596e-05,
"loss": 0.7217,
"step": 62000
},
{
"epoch": 0.05,
"learning_rate": 4.727945655746686e-05,
"loss": 0.7273,
"step": 62100
},
{
"epoch": 0.05,
"learning_rate": 4.7275073956393124e-05,
"loss": 0.7117,
"step": 62200
},
{
"epoch": 0.05,
"learning_rate": 4.727069135531939e-05,
"loss": 0.755,
"step": 62300
},
{
"epoch": 0.05,
"learning_rate": 4.7266308754245645e-05,
"loss": 0.7831,
"step": 62400
},
{
"epoch": 0.05,
"learning_rate": 4.726192615317191e-05,
"loss": 0.7663,
"step": 62500
},
{
"epoch": 0.05,
"learning_rate": 4.725758737810891e-05,
"loss": 0.7515,
"step": 62600
},
{
"epoch": 0.05,
"learning_rate": 4.725320477703517e-05,
"loss": 0.7329,
"step": 62700
},
{
"epoch": 0.06,
"learning_rate": 4.724882217596144e-05,
"loss": 0.7341,
"step": 62800
},
{
"epoch": 0.06,
"learning_rate": 4.72444395748877e-05,
"loss": 0.7197,
"step": 62900
},
{
"epoch": 0.06,
"learning_rate": 4.724005697381396e-05,
"loss": 0.7218,
"step": 63000
},
{
"epoch": 0.06,
"learning_rate": 4.7235674372740224e-05,
"loss": 0.7251,
"step": 63100
},
{
"epoch": 0.06,
"learning_rate": 4.723129177166649e-05,
"loss": 0.7748,
"step": 63200
},
{
"epoch": 0.06,
"learning_rate": 4.7226909170592745e-05,
"loss": 0.7033,
"step": 63300
},
{
"epoch": 0.06,
"learning_rate": 4.722252656951901e-05,
"loss": 0.7744,
"step": 63400
},
{
"epoch": 0.06,
"learning_rate": 4.721814396844527e-05,
"loss": 0.7845,
"step": 63500
},
{
"epoch": 0.06,
"learning_rate": 4.721376136737154e-05,
"loss": 0.7196,
"step": 63600
},
{
"epoch": 0.06,
"learning_rate": 4.72093787662978e-05,
"loss": 0.7127,
"step": 63700
},
{
"epoch": 0.06,
"learning_rate": 4.7204996165224065e-05,
"loss": 0.7302,
"step": 63800
},
{
"epoch": 0.06,
"learning_rate": 4.720065739016106e-05,
"loss": 0.7455,
"step": 63900
},
{
"epoch": 0.06,
"learning_rate": 4.7196274789087325e-05,
"loss": 0.7602,
"step": 64000
},
{
"epoch": 0.06,
"learning_rate": 4.719189218801359e-05,
"loss": 0.7566,
"step": 64100
},
{
"epoch": 0.06,
"learning_rate": 4.7187509586939846e-05,
"loss": 0.7339,
"step": 64200
},
{
"epoch": 0.06,
"learning_rate": 4.7183126985866116e-05,
"loss": 0.8176,
"step": 64300
},
{
"epoch": 0.06,
"learning_rate": 4.717874438479238e-05,
"loss": 0.7827,
"step": 64400
},
{
"epoch": 0.06,
"learning_rate": 4.717436178371864e-05,
"loss": 0.7373,
"step": 64500
},
{
"epoch": 0.06,
"learning_rate": 4.71699791826449e-05,
"loss": 0.7771,
"step": 64600
},
{
"epoch": 0.06,
"learning_rate": 4.7165596581571165e-05,
"loss": 0.7641,
"step": 64700
},
{
"epoch": 0.06,
"learning_rate": 4.716121398049743e-05,
"loss": 0.7342,
"step": 64800
},
{
"epoch": 0.06,
"learning_rate": 4.7156831379423686e-05,
"loss": 0.6829,
"step": 64900
},
{
"epoch": 0.06,
"learning_rate": 4.715244877834995e-05,
"loss": 0.6875,
"step": 65000
},
{
"epoch": 0.06,
"learning_rate": 4.714806617727622e-05,
"loss": 0.721,
"step": 65100
},
{
"epoch": 0.06,
"learning_rate": 4.714368357620248e-05,
"loss": 0.7646,
"step": 65200
},
{
"epoch": 0.06,
"learning_rate": 4.713930097512874e-05,
"loss": 0.7395,
"step": 65300
},
{
"epoch": 0.06,
"learning_rate": 4.7134918374055005e-05,
"loss": 0.6838,
"step": 65400
},
{
"epoch": 0.06,
"learning_rate": 4.713053577298126e-05,
"loss": 0.696,
"step": 65500
},
{
"epoch": 0.06,
"learning_rate": 4.7126153171907526e-05,
"loss": 0.7509,
"step": 65600
},
{
"epoch": 0.06,
"learning_rate": 4.712177057083379e-05,
"loss": 0.7076,
"step": 65700
},
{
"epoch": 0.06,
"learning_rate": 4.7117387969760054e-05,
"loss": 0.721,
"step": 65800
},
{
"epoch": 0.06,
"learning_rate": 4.711300536868632e-05,
"loss": 0.7331,
"step": 65900
},
{
"epoch": 0.06,
"learning_rate": 4.710862276761258e-05,
"loss": 0.7376,
"step": 66000
},
{
"epoch": 0.06,
"learning_rate": 4.7104240166538846e-05,
"loss": 0.7188,
"step": 66100
},
{
"epoch": 0.06,
"learning_rate": 4.70998575654651e-05,
"loss": 0.7657,
"step": 66200
},
{
"epoch": 0.06,
"learning_rate": 4.709547496439137e-05,
"loss": 0.6871,
"step": 66300
},
{
"epoch": 0.06,
"learning_rate": 4.709109236331763e-05,
"loss": 0.7189,
"step": 66400
},
{
"epoch": 0.06,
"learning_rate": 4.7086709762243894e-05,
"loss": 0.7227,
"step": 66500
},
{
"epoch": 0.06,
"learning_rate": 4.708232716117016e-05,
"loss": 0.7571,
"step": 66600
},
{
"epoch": 0.06,
"learning_rate": 4.707794456009642e-05,
"loss": 0.7342,
"step": 66700
},
{
"epoch": 0.06,
"learning_rate": 4.7073561959022686e-05,
"loss": 0.7506,
"step": 66800
},
{
"epoch": 0.06,
"learning_rate": 4.706917935794894e-05,
"loss": 0.8274,
"step": 66900
},
{
"epoch": 0.06,
"learning_rate": 4.706479675687521e-05,
"loss": 0.7701,
"step": 67000
},
{
"epoch": 0.06,
"learning_rate": 4.706041415580147e-05,
"loss": 0.7761,
"step": 67100
},
{
"epoch": 0.06,
"learning_rate": 4.705603155472773e-05,
"loss": 0.8041,
"step": 67200
},
{
"epoch": 0.06,
"learning_rate": 4.7051648953654e-05,
"loss": 0.7401,
"step": 67300
},
{
"epoch": 0.06,
"learning_rate": 4.704726635258026e-05,
"loss": 0.7756,
"step": 67400
},
{
"epoch": 0.06,
"learning_rate": 4.704288375150652e-05,
"loss": 0.7213,
"step": 67500
},
{
"epoch": 0.06,
"learning_rate": 4.7038501150432784e-05,
"loss": 0.7411,
"step": 67600
},
{
"epoch": 0.06,
"learning_rate": 4.703411854935905e-05,
"loss": 0.8019,
"step": 67700
},
{
"epoch": 0.06,
"learning_rate": 4.702973594828531e-05,
"loss": 0.7075,
"step": 67800
},
{
"epoch": 0.06,
"learning_rate": 4.702535334721157e-05,
"loss": 0.7681,
"step": 67900
},
{
"epoch": 0.06,
"learning_rate": 4.702097074613783e-05,
"loss": 0.7269,
"step": 68000
},
{
"epoch": 0.06,
"learning_rate": 4.70165881450641e-05,
"loss": 0.7258,
"step": 68100
},
{
"epoch": 0.06,
"learning_rate": 4.701220554399036e-05,
"loss": 0.7877,
"step": 68200
},
{
"epoch": 0.06,
"learning_rate": 4.7007822942916624e-05,
"loss": 0.7218,
"step": 68300
},
{
"epoch": 0.06,
"learning_rate": 4.700344034184289e-05,
"loss": 0.7208,
"step": 68400
},
{
"epoch": 0.06,
"learning_rate": 4.6999057740769145e-05,
"loss": 0.7071,
"step": 68500
},
{
"epoch": 0.06,
"learning_rate": 4.699467513969541e-05,
"loss": 0.7576,
"step": 68600
},
{
"epoch": 0.06,
"learning_rate": 4.699029253862167e-05,
"loss": 0.7345,
"step": 68700
},
{
"epoch": 0.06,
"learning_rate": 4.6985909937547936e-05,
"loss": 0.7571,
"step": 68800
},
{
"epoch": 0.06,
"learning_rate": 4.69815273364742e-05,
"loss": 0.7324,
"step": 68900
},
{
"epoch": 0.06,
"learning_rate": 4.6977144735400464e-05,
"loss": 0.6766,
"step": 69000
},
{
"epoch": 0.06,
"learning_rate": 4.697276213432673e-05,
"loss": 0.7446,
"step": 69100
},
{
"epoch": 0.06,
"learning_rate": 4.6968379533252985e-05,
"loss": 0.7312,
"step": 69200
},
{
"epoch": 0.06,
"learning_rate": 4.696399693217925e-05,
"loss": 0.772,
"step": 69300
},
{
"epoch": 0.06,
"learning_rate": 4.695961433110551e-05,
"loss": 0.72,
"step": 69400
},
{
"epoch": 0.06,
"learning_rate": 4.695523173003178e-05,
"loss": 0.7472,
"step": 69500
},
{
"epoch": 0.06,
"learning_rate": 4.695084912895804e-05,
"loss": 0.7464,
"step": 69600
},
{
"epoch": 0.06,
"learning_rate": 4.6946466527884305e-05,
"loss": 0.7257,
"step": 69700
},
{
"epoch": 0.06,
"learning_rate": 4.694208392681056e-05,
"loss": 0.7017,
"step": 69800
},
{
"epoch": 0.06,
"learning_rate": 4.6937701325736826e-05,
"loss": 0.6693,
"step": 69900
},
{
"epoch": 0.06,
"learning_rate": 4.693331872466309e-05,
"loss": 0.7577,
"step": 70000
},
{
"epoch": 0.06,
"learning_rate": 4.692893612358935e-05,
"loss": 0.808,
"step": 70100
},
{
"epoch": 0.06,
"learning_rate": 4.692459734852635e-05,
"loss": 0.7028,
"step": 70200
},
{
"epoch": 0.06,
"learning_rate": 4.692021474745262e-05,
"loss": 0.7183,
"step": 70300
},
{
"epoch": 0.06,
"learning_rate": 4.691583214637888e-05,
"loss": 0.721,
"step": 70400
},
{
"epoch": 0.06,
"learning_rate": 4.691144954530514e-05,
"loss": 0.7186,
"step": 70500
},
{
"epoch": 0.06,
"learning_rate": 4.6907066944231405e-05,
"loss": 0.7153,
"step": 70600
},
{
"epoch": 0.06,
"learning_rate": 4.690268434315767e-05,
"loss": 0.754,
"step": 70700
},
{
"epoch": 0.06,
"learning_rate": 4.6898301742083926e-05,
"loss": 0.7444,
"step": 70800
},
{
"epoch": 0.06,
"learning_rate": 4.689391914101019e-05,
"loss": 0.7054,
"step": 70900
},
{
"epoch": 0.06,
"learning_rate": 4.688953653993646e-05,
"loss": 0.7525,
"step": 71000
},
{
"epoch": 0.06,
"learning_rate": 4.688515393886272e-05,
"loss": 0.7603,
"step": 71100
},
{
"epoch": 0.06,
"learning_rate": 4.688077133778898e-05,
"loss": 0.7066,
"step": 71200
},
{
"epoch": 0.06,
"learning_rate": 4.6876388736715245e-05,
"loss": 0.7383,
"step": 71300
},
{
"epoch": 0.06,
"learning_rate": 4.68720061356415e-05,
"loss": 0.7191,
"step": 71400
},
{
"epoch": 0.06,
"learning_rate": 4.6867623534567766e-05,
"loss": 0.7293,
"step": 71500
},
{
"epoch": 0.06,
"learning_rate": 4.686324093349403e-05,
"loss": 0.7277,
"step": 71600
},
{
"epoch": 0.06,
"learning_rate": 4.6858858332420294e-05,
"loss": 0.7192,
"step": 71700
},
{
"epoch": 0.06,
"learning_rate": 4.685447573134656e-05,
"loss": 0.6965,
"step": 71800
},
{
"epoch": 0.06,
"learning_rate": 4.685009313027282e-05,
"loss": 0.7534,
"step": 71900
},
{
"epoch": 0.06,
"learning_rate": 4.6845710529199086e-05,
"loss": 0.7258,
"step": 72000
},
{
"epoch": 0.06,
"learning_rate": 4.684132792812534e-05,
"loss": 0.6781,
"step": 72100
},
{
"epoch": 0.06,
"learning_rate": 4.6836945327051607e-05,
"loss": 0.7204,
"step": 72200
},
{
"epoch": 0.06,
"learning_rate": 4.68326065519886e-05,
"loss": 0.7178,
"step": 72300
},
{
"epoch": 0.06,
"learning_rate": 4.6828223950914867e-05,
"loss": 0.6867,
"step": 72400
},
{
"epoch": 0.06,
"learning_rate": 4.682384134984114e-05,
"loss": 0.7365,
"step": 72500
},
{
"epoch": 0.06,
"learning_rate": 4.6819458748767394e-05,
"loss": 0.7376,
"step": 72600
},
{
"epoch": 0.06,
"learning_rate": 4.681507614769366e-05,
"loss": 0.7513,
"step": 72700
},
{
"epoch": 0.06,
"learning_rate": 4.681069354661992e-05,
"loss": 0.6767,
"step": 72800
},
{
"epoch": 0.06,
"learning_rate": 4.680635477155692e-05,
"loss": 0.7045,
"step": 72900
},
{
"epoch": 0.06,
"learning_rate": 4.680197217048318e-05,
"loss": 0.7005,
"step": 73000
},
{
"epoch": 0.06,
"learning_rate": 4.6797589569409446e-05,
"loss": 0.6519,
"step": 73100
},
{
"epoch": 0.06,
"learning_rate": 4.679320696833571e-05,
"loss": 0.7094,
"step": 73200
},
{
"epoch": 0.06,
"learning_rate": 4.6788824367261974e-05,
"loss": 0.7142,
"step": 73300
},
{
"epoch": 0.06,
"learning_rate": 4.678444176618824e-05,
"loss": 0.6942,
"step": 73400
},
{
"epoch": 0.06,
"learning_rate": 4.67800591651145e-05,
"loss": 0.6909,
"step": 73500
},
{
"epoch": 0.06,
"learning_rate": 4.677567656404076e-05,
"loss": 0.7346,
"step": 73600
},
{
"epoch": 0.06,
"learning_rate": 4.677129396296702e-05,
"loss": 0.7068,
"step": 73700
},
{
"epoch": 0.06,
"learning_rate": 4.6766911361893286e-05,
"loss": 0.7227,
"step": 73800
},
{
"epoch": 0.06,
"learning_rate": 4.6762528760819543e-05,
"loss": 0.7196,
"step": 73900
},
{
"epoch": 0.06,
"learning_rate": 4.6758146159745814e-05,
"loss": 0.7207,
"step": 74000
},
{
"epoch": 0.06,
"learning_rate": 4.675376355867208e-05,
"loss": 0.7757,
"step": 74100
},
{
"epoch": 0.07,
"learning_rate": 4.6749380957598335e-05,
"loss": 0.7319,
"step": 74200
},
{
"epoch": 0.07,
"learning_rate": 4.67449983565246e-05,
"loss": 0.7092,
"step": 74300
},
{
"epoch": 0.07,
"learning_rate": 4.674061575545086e-05,
"loss": 0.7526,
"step": 74400
},
{
"epoch": 0.07,
"learning_rate": 4.673623315437713e-05,
"loss": 0.7176,
"step": 74500
},
{
"epoch": 0.07,
"learning_rate": 4.6731850553303384e-05,
"loss": 0.726,
"step": 74600
},
{
"epoch": 0.07,
"learning_rate": 4.6727467952229654e-05,
"loss": 0.68,
"step": 74700
},
{
"epoch": 0.07,
"learning_rate": 4.672308535115592e-05,
"loss": 0.7784,
"step": 74800
},
{
"epoch": 0.07,
"learning_rate": 4.6718746576092914e-05,
"loss": 0.7321,
"step": 74900
},
{
"epoch": 0.07,
"learning_rate": 4.671436397501918e-05,
"loss": 0.7704,
"step": 75000
},
{
"epoch": 0.07,
"learning_rate": 4.6709981373945435e-05,
"loss": 0.7464,
"step": 75100
},
{
"epoch": 0.07,
"learning_rate": 4.67055987728717e-05,
"loss": 0.7443,
"step": 75200
},
{
"epoch": 0.07,
"learning_rate": 4.670121617179796e-05,
"loss": 0.7081,
"step": 75300
},
{
"epoch": 0.07,
"learning_rate": 4.669683357072423e-05,
"loss": 0.7639,
"step": 75400
},
{
"epoch": 0.07,
"learning_rate": 4.669245096965049e-05,
"loss": 0.7396,
"step": 75500
},
{
"epoch": 0.07,
"learning_rate": 4.6688068368576755e-05,
"loss": 0.6984,
"step": 75600
},
{
"epoch": 0.07,
"learning_rate": 4.668368576750302e-05,
"loss": 0.6922,
"step": 75700
},
{
"epoch": 0.07,
"learning_rate": 4.6679303166429276e-05,
"loss": 0.6644,
"step": 75800
},
{
"epoch": 0.07,
"learning_rate": 4.667492056535554e-05,
"loss": 0.7566,
"step": 75900
},
{
"epoch": 0.07,
"learning_rate": 4.6670537964281803e-05,
"loss": 0.6941,
"step": 76000
},
{
"epoch": 0.07,
"learning_rate": 4.666615536320806e-05,
"loss": 0.7252,
"step": 76100
},
{
"epoch": 0.07,
"learning_rate": 4.666177276213433e-05,
"loss": 0.7189,
"step": 76200
},
{
"epoch": 0.07,
"learning_rate": 4.6657390161060595e-05,
"loss": 0.7131,
"step": 76300
},
{
"epoch": 0.07,
"learning_rate": 4.665300755998685e-05,
"loss": 0.6844,
"step": 76400
},
{
"epoch": 0.07,
"learning_rate": 4.6648624958913116e-05,
"loss": 0.7517,
"step": 76500
},
{
"epoch": 0.07,
"learning_rate": 4.664424235783938e-05,
"loss": 0.7278,
"step": 76600
},
{
"epoch": 0.07,
"learning_rate": 4.6639859756765644e-05,
"loss": 0.7431,
"step": 76700
},
{
"epoch": 0.07,
"learning_rate": 4.66354771556919e-05,
"loss": 0.7321,
"step": 76800
},
{
"epoch": 0.07,
"learning_rate": 4.6631094554618165e-05,
"loss": 0.6624,
"step": 76900
},
{
"epoch": 0.07,
"learning_rate": 4.6626711953544435e-05,
"loss": 0.7143,
"step": 77000
},
{
"epoch": 0.07,
"learning_rate": 4.662232935247069e-05,
"loss": 0.7634,
"step": 77100
},
{
"epoch": 0.07,
"learning_rate": 4.6617946751396956e-05,
"loss": 0.6772,
"step": 77200
},
{
"epoch": 0.07,
"learning_rate": 4.661356415032322e-05,
"loss": 0.7267,
"step": 77300
},
{
"epoch": 0.07,
"learning_rate": 4.6609181549249484e-05,
"loss": 0.7375,
"step": 77400
},
{
"epoch": 0.07,
"learning_rate": 4.660479894817574e-05,
"loss": 0.711,
"step": 77500
},
{
"epoch": 0.07,
"learning_rate": 4.6600416347102005e-05,
"loss": 0.7006,
"step": 77600
},
{
"epoch": 0.07,
"learning_rate": 4.6596033746028276e-05,
"loss": 0.7981,
"step": 77700
},
{
"epoch": 0.07,
"learning_rate": 4.659165114495453e-05,
"loss": 0.7234,
"step": 77800
},
{
"epoch": 0.07,
"learning_rate": 4.65872685438808e-05,
"loss": 0.7595,
"step": 77900
},
{
"epoch": 0.07,
"learning_rate": 4.658288594280706e-05,
"loss": 0.7176,
"step": 78000
},
{
"epoch": 0.07,
"learning_rate": 4.657850334173332e-05,
"loss": 0.7093,
"step": 78100
},
{
"epoch": 0.07,
"learning_rate": 4.657412074065958e-05,
"loss": 0.7116,
"step": 78200
},
{
"epoch": 0.07,
"learning_rate": 4.6569738139585845e-05,
"loss": 0.7516,
"step": 78300
},
{
"epoch": 0.07,
"learning_rate": 4.656535553851211e-05,
"loss": 0.7237,
"step": 78400
},
{
"epoch": 0.07,
"learning_rate": 4.656097293743837e-05,
"loss": 0.7441,
"step": 78500
},
{
"epoch": 0.07,
"learning_rate": 4.655659033636464e-05,
"loss": 0.7624,
"step": 78600
},
{
"epoch": 0.07,
"learning_rate": 4.65522077352909e-05,
"loss": 0.7424,
"step": 78700
},
{
"epoch": 0.07,
"learning_rate": 4.654782513421716e-05,
"loss": 0.7186,
"step": 78800
},
{
"epoch": 0.07,
"learning_rate": 4.654344253314342e-05,
"loss": 0.7447,
"step": 78900
},
{
"epoch": 0.07,
"learning_rate": 4.6539059932069686e-05,
"loss": 0.7018,
"step": 79000
},
{
"epoch": 0.07,
"learning_rate": 4.653472115700668e-05,
"loss": 0.7174,
"step": 79100
},
{
"epoch": 0.07,
"learning_rate": 4.653033855593295e-05,
"loss": 0.7036,
"step": 79200
},
{
"epoch": 0.07,
"learning_rate": 4.652595595485921e-05,
"loss": 0.7419,
"step": 79300
},
{
"epoch": 0.07,
"learning_rate": 4.6521573353785474e-05,
"loss": 0.6937,
"step": 79400
},
{
"epoch": 0.07,
"learning_rate": 4.651719075271174e-05,
"loss": 0.7182,
"step": 79500
},
{
"epoch": 0.07,
"learning_rate": 4.6512808151638e-05,
"loss": 0.7337,
"step": 79600
},
{
"epoch": 0.07,
"learning_rate": 4.650842555056426e-05,
"loss": 0.7442,
"step": 79700
},
{
"epoch": 0.07,
"learning_rate": 4.650404294949052e-05,
"loss": 0.6751,
"step": 79800
},
{
"epoch": 0.07,
"learning_rate": 4.649966034841679e-05,
"loss": 0.7122,
"step": 79900
},
{
"epoch": 0.07,
"learning_rate": 4.649527774734305e-05,
"loss": 0.7274,
"step": 80000
},
{
"epoch": 0.07,
"learning_rate": 4.6490895146269314e-05,
"loss": 0.7567,
"step": 80100
},
{
"epoch": 0.07,
"learning_rate": 4.648651254519558e-05,
"loss": 0.722,
"step": 80200
},
{
"epoch": 0.07,
"learning_rate": 4.6482129944121835e-05,
"loss": 0.7685,
"step": 80300
},
{
"epoch": 0.07,
"learning_rate": 4.64777473430481e-05,
"loss": 0.6959,
"step": 80400
},
{
"epoch": 0.07,
"learning_rate": 4.647336474197436e-05,
"loss": 0.7012,
"step": 80500
},
{
"epoch": 0.07,
"learning_rate": 4.6468982140900627e-05,
"loss": 0.6581,
"step": 80600
},
{
"epoch": 0.07,
"learning_rate": 4.646459953982689e-05,
"loss": 0.7807,
"step": 80700
},
{
"epoch": 0.07,
"learning_rate": 4.6460216938753154e-05,
"loss": 0.6778,
"step": 80800
},
{
"epoch": 0.07,
"learning_rate": 4.645583433767942e-05,
"loss": 0.6772,
"step": 80900
},
{
"epoch": 0.07,
"learning_rate": 4.6451451736605675e-05,
"loss": 0.7037,
"step": 81000
},
{
"epoch": 0.07,
"learning_rate": 4.644706913553194e-05,
"loss": 0.7486,
"step": 81100
},
{
"epoch": 0.07,
"learning_rate": 4.64426865344582e-05,
"loss": 0.7091,
"step": 81200
},
{
"epoch": 0.07,
"learning_rate": 4.643830393338447e-05,
"loss": 0.7258,
"step": 81300
},
{
"epoch": 0.07,
"learning_rate": 4.643392133231073e-05,
"loss": 0.714,
"step": 81400
},
{
"epoch": 0.07,
"learning_rate": 4.6429538731236995e-05,
"loss": 0.7957,
"step": 81500
},
{
"epoch": 0.07,
"learning_rate": 4.642515613016326e-05,
"loss": 0.6991,
"step": 81600
},
{
"epoch": 0.07,
"learning_rate": 4.6420773529089516e-05,
"loss": 0.7377,
"step": 81700
},
{
"epoch": 0.07,
"learning_rate": 4.641639092801578e-05,
"loss": 0.7104,
"step": 81800
},
{
"epoch": 0.07,
"learning_rate": 4.641200832694204e-05,
"loss": 0.6876,
"step": 81900
},
{
"epoch": 0.07,
"learning_rate": 4.64076257258683e-05,
"loss": 0.6625,
"step": 82000
},
{
"epoch": 0.07,
"learning_rate": 4.64032869508053e-05,
"loss": 0.6925,
"step": 82100
},
{
"epoch": 0.07,
"learning_rate": 4.639890434973157e-05,
"loss": 0.7097,
"step": 82200
},
{
"epoch": 0.07,
"learning_rate": 4.639452174865783e-05,
"loss": 0.7141,
"step": 82300
},
{
"epoch": 0.07,
"learning_rate": 4.6390139147584095e-05,
"loss": 0.7273,
"step": 82400
},
{
"epoch": 0.07,
"learning_rate": 4.638575654651036e-05,
"loss": 0.7188,
"step": 82500
},
{
"epoch": 0.07,
"learning_rate": 4.6381373945436616e-05,
"loss": 0.7167,
"step": 82600
},
{
"epoch": 0.07,
"learning_rate": 4.637699134436288e-05,
"loss": 0.6817,
"step": 82700
},
{
"epoch": 0.07,
"learning_rate": 4.6372608743289144e-05,
"loss": 0.6985,
"step": 82800
},
{
"epoch": 0.07,
"learning_rate": 4.636822614221541e-05,
"loss": 0.7166,
"step": 82900
},
{
"epoch": 0.07,
"learning_rate": 4.636384354114167e-05,
"loss": 0.7242,
"step": 83000
},
{
"epoch": 0.07,
"learning_rate": 4.6359460940067935e-05,
"loss": 0.6676,
"step": 83100
},
{
"epoch": 0.07,
"learning_rate": 4.635507833899419e-05,
"loss": 0.7121,
"step": 83200
},
{
"epoch": 0.07,
"learning_rate": 4.6350695737920456e-05,
"loss": 0.7046,
"step": 83300
},
{
"epoch": 0.07,
"learning_rate": 4.634631313684672e-05,
"loss": 0.7065,
"step": 83400
},
{
"epoch": 0.07,
"learning_rate": 4.6341930535772984e-05,
"loss": 0.7239,
"step": 83500
},
{
"epoch": 0.07,
"learning_rate": 4.633754793469924e-05,
"loss": 0.6839,
"step": 83600
},
{
"epoch": 0.07,
"learning_rate": 4.633316533362551e-05,
"loss": 0.7234,
"step": 83700
},
{
"epoch": 0.07,
"learning_rate": 4.6328782732551776e-05,
"loss": 0.7392,
"step": 83800
},
{
"epoch": 0.07,
"learning_rate": 4.632440013147803e-05,
"loss": 0.7325,
"step": 83900
},
{
"epoch": 0.07,
"learning_rate": 4.63200175304043e-05,
"loss": 0.6921,
"step": 84000
},
{
"epoch": 0.07,
"learning_rate": 4.631563492933056e-05,
"loss": 0.7407,
"step": 84100
},
{
"epoch": 0.07,
"learning_rate": 4.6311252328256824e-05,
"loss": 0.6938,
"step": 84200
},
{
"epoch": 0.07,
"learning_rate": 4.630686972718308e-05,
"loss": 0.7158,
"step": 84300
},
{
"epoch": 0.07,
"learning_rate": 4.630248712610935e-05,
"loss": 0.6607,
"step": 84400
},
{
"epoch": 0.07,
"learning_rate": 4.6298104525035616e-05,
"loss": 0.7155,
"step": 84500
},
{
"epoch": 0.07,
"learning_rate": 4.629372192396187e-05,
"loss": 0.7071,
"step": 84600
},
{
"epoch": 0.07,
"learning_rate": 4.628933932288814e-05,
"loss": 0.7122,
"step": 84700
},
{
"epoch": 0.07,
"learning_rate": 4.62849567218144e-05,
"loss": 0.6807,
"step": 84800
},
{
"epoch": 0.07,
"learning_rate": 4.628057412074066e-05,
"loss": 0.6818,
"step": 84900
},
{
"epoch": 0.07,
"learning_rate": 4.627619151966692e-05,
"loss": 0.7619,
"step": 85000
},
{
"epoch": 0.07,
"learning_rate": 4.6271808918593186e-05,
"loss": 0.728,
"step": 85100
},
{
"epoch": 0.07,
"learning_rate": 4.626742631751945e-05,
"loss": 0.6841,
"step": 85200
},
{
"epoch": 0.07,
"learning_rate": 4.6263043716445713e-05,
"loss": 0.6992,
"step": 85300
},
{
"epoch": 0.07,
"learning_rate": 4.625866111537198e-05,
"loss": 0.6894,
"step": 85400
},
{
"epoch": 0.07,
"learning_rate": 4.625427851429824e-05,
"loss": 0.7057,
"step": 85500
},
{
"epoch": 0.08,
"learning_rate": 4.62498959132245e-05,
"loss": 0.7071,
"step": 85600
},
{
"epoch": 0.08,
"learning_rate": 4.624551331215076e-05,
"loss": 0.6997,
"step": 85700
},
{
"epoch": 0.08,
"learning_rate": 4.6241130711077026e-05,
"loss": 0.6991,
"step": 85800
},
{
"epoch": 0.08,
"learning_rate": 4.623674811000329e-05,
"loss": 0.6834,
"step": 85900
},
{
"epoch": 0.08,
"learning_rate": 4.6232365508929554e-05,
"loss": 0.7059,
"step": 86000
},
{
"epoch": 0.08,
"learning_rate": 4.622798290785582e-05,
"loss": 0.7146,
"step": 86100
},
{
"epoch": 0.08,
"learning_rate": 4.6223600306782075e-05,
"loss": 0.6711,
"step": 86200
},
{
"epoch": 0.08,
"learning_rate": 4.621926153171908e-05,
"loss": 0.7628,
"step": 86300
},
{
"epoch": 0.08,
"learning_rate": 4.621487893064534e-05,
"loss": 0.7287,
"step": 86400
},
{
"epoch": 0.08,
"learning_rate": 4.62104963295716e-05,
"loss": 0.7211,
"step": 86500
},
{
"epoch": 0.08,
"learning_rate": 4.620611372849787e-05,
"loss": 0.7384,
"step": 86600
},
{
"epoch": 0.08,
"learning_rate": 4.620173112742413e-05,
"loss": 0.6255,
"step": 86700
},
{
"epoch": 0.08,
"learning_rate": 4.619734852635039e-05,
"loss": 0.7012,
"step": 86800
},
{
"epoch": 0.08,
"learning_rate": 4.6192965925276654e-05,
"loss": 0.7017,
"step": 86900
},
{
"epoch": 0.08,
"learning_rate": 4.618858332420292e-05,
"loss": 0.7227,
"step": 87000
},
{
"epoch": 0.08,
"learning_rate": 4.6184200723129175e-05,
"loss": 0.7206,
"step": 87100
},
{
"epoch": 0.08,
"learning_rate": 4.617981812205544e-05,
"loss": 0.7285,
"step": 87200
},
{
"epoch": 0.08,
"learning_rate": 4.61754355209817e-05,
"loss": 0.7348,
"step": 87300
},
{
"epoch": 0.08,
"learning_rate": 4.617105291990797e-05,
"loss": 0.6571,
"step": 87400
},
{
"epoch": 0.08,
"learning_rate": 4.616667031883423e-05,
"loss": 0.6924,
"step": 87500
},
{
"epoch": 0.08,
"learning_rate": 4.6162287717760495e-05,
"loss": 0.6576,
"step": 87600
},
{
"epoch": 0.08,
"learning_rate": 4.615790511668676e-05,
"loss": 0.7257,
"step": 87700
},
{
"epoch": 0.08,
"learning_rate": 4.6153522515613015e-05,
"loss": 0.671,
"step": 87800
},
{
"epoch": 0.08,
"learning_rate": 4.614913991453928e-05,
"loss": 0.6536,
"step": 87900
},
{
"epoch": 0.08,
"learning_rate": 4.614475731346554e-05,
"loss": 0.7071,
"step": 88000
},
{
"epoch": 0.08,
"learning_rate": 4.614037471239181e-05,
"loss": 0.7386,
"step": 88100
},
{
"epoch": 0.08,
"learning_rate": 4.613599211131807e-05,
"loss": 0.728,
"step": 88200
},
{
"epoch": 0.08,
"learning_rate": 4.6131609510244335e-05,
"loss": 0.6844,
"step": 88300
},
{
"epoch": 0.08,
"learning_rate": 4.612727073518133e-05,
"loss": 0.7101,
"step": 88400
},
{
"epoch": 0.08,
"learning_rate": 4.6122888134107595e-05,
"loss": 0.722,
"step": 88500
},
{
"epoch": 0.08,
"learning_rate": 4.611850553303386e-05,
"loss": 0.7147,
"step": 88600
},
{
"epoch": 0.08,
"learning_rate": 4.6114122931960116e-05,
"loss": 0.7095,
"step": 88700
},
{
"epoch": 0.08,
"learning_rate": 4.610974033088638e-05,
"loss": 0.6681,
"step": 88800
},
{
"epoch": 0.08,
"learning_rate": 4.610535772981265e-05,
"loss": 0.6952,
"step": 88900
},
{
"epoch": 0.08,
"learning_rate": 4.610097512873891e-05,
"loss": 0.703,
"step": 89000
},
{
"epoch": 0.08,
"learning_rate": 4.609659252766517e-05,
"loss": 0.6901,
"step": 89100
},
{
"epoch": 0.08,
"learning_rate": 4.6092209926591435e-05,
"loss": 0.7554,
"step": 89200
},
{
"epoch": 0.08,
"learning_rate": 4.60878273255177e-05,
"loss": 0.6939,
"step": 89300
},
{
"epoch": 0.08,
"learning_rate": 4.6083444724443956e-05,
"loss": 0.6715,
"step": 89400
},
{
"epoch": 0.08,
"learning_rate": 4.607906212337022e-05,
"loss": 0.749,
"step": 89500
},
{
"epoch": 0.08,
"learning_rate": 4.607467952229649e-05,
"loss": 0.6863,
"step": 89600
},
{
"epoch": 0.08,
"learning_rate": 4.607029692122275e-05,
"loss": 0.6819,
"step": 89700
},
{
"epoch": 0.08,
"learning_rate": 4.606591432014901e-05,
"loss": 0.7021,
"step": 89800
},
{
"epoch": 0.08,
"learning_rate": 4.6061531719075276e-05,
"loss": 0.7092,
"step": 89900
},
{
"epoch": 0.08,
"learning_rate": 4.605714911800153e-05,
"loss": 0.6554,
"step": 90000
},
{
"epoch": 0.08,
"learning_rate": 4.6052766516927797e-05,
"loss": 0.6724,
"step": 90100
},
{
"epoch": 0.08,
"learning_rate": 4.604838391585406e-05,
"loss": 0.7186,
"step": 90200
},
{
"epoch": 0.08,
"learning_rate": 4.6044001314780324e-05,
"loss": 0.7339,
"step": 90300
},
{
"epoch": 0.08,
"learning_rate": 4.603961871370659e-05,
"loss": 0.6829,
"step": 90400
},
{
"epoch": 0.08,
"learning_rate": 4.603527993864359e-05,
"loss": 0.7774,
"step": 90500
},
{
"epoch": 0.08,
"learning_rate": 4.603089733756985e-05,
"loss": 0.7495,
"step": 90600
},
{
"epoch": 0.08,
"learning_rate": 4.602651473649611e-05,
"loss": 0.7107,
"step": 90700
},
{
"epoch": 0.08,
"learning_rate": 4.6022132135422376e-05,
"loss": 0.6963,
"step": 90800
},
{
"epoch": 0.08,
"learning_rate": 4.601774953434864e-05,
"loss": 0.6871,
"step": 90900
},
{
"epoch": 0.08,
"learning_rate": 4.60133669332749e-05,
"loss": 0.6969,
"step": 91000
},
{
"epoch": 0.08,
"learning_rate": 4.600898433220117e-05,
"loss": 0.6446,
"step": 91100
},
{
"epoch": 0.08,
"learning_rate": 4.6004601731127425e-05,
"loss": 0.7064,
"step": 91200
},
{
"epoch": 0.08,
"learning_rate": 4.600021913005369e-05,
"loss": 0.678,
"step": 91300
},
{
"epoch": 0.08,
"learning_rate": 4.599583652897995e-05,
"loss": 0.683,
"step": 91400
},
{
"epoch": 0.08,
"learning_rate": 4.5991453927906216e-05,
"loss": 0.7163,
"step": 91500
},
{
"epoch": 0.08,
"learning_rate": 4.598707132683247e-05,
"loss": 0.672,
"step": 91600
},
{
"epoch": 0.08,
"learning_rate": 4.598268872575874e-05,
"loss": 0.7174,
"step": 91700
},
{
"epoch": 0.08,
"learning_rate": 4.5978306124685e-05,
"loss": 0.6895,
"step": 91800
},
{
"epoch": 0.08,
"learning_rate": 4.5973923523611265e-05,
"loss": 0.6954,
"step": 91900
},
{
"epoch": 0.08,
"learning_rate": 4.596958474854827e-05,
"loss": 0.6717,
"step": 92000
},
{
"epoch": 0.08,
"learning_rate": 4.596520214747453e-05,
"loss": 0.6878,
"step": 92100
},
{
"epoch": 0.08,
"learning_rate": 4.596081954640079e-05,
"loss": 0.6949,
"step": 92200
},
{
"epoch": 0.08,
"learning_rate": 4.595643694532705e-05,
"loss": 0.7045,
"step": 92300
},
{
"epoch": 0.08,
"learning_rate": 4.595205434425332e-05,
"loss": 0.6891,
"step": 92400
},
{
"epoch": 0.08,
"learning_rate": 4.5947671743179574e-05,
"loss": 0.6674,
"step": 92500
},
{
"epoch": 0.08,
"learning_rate": 4.5943289142105844e-05,
"loss": 0.7046,
"step": 92600
},
{
"epoch": 0.08,
"learning_rate": 4.593890654103211e-05,
"loss": 0.7174,
"step": 92700
},
{
"epoch": 0.08,
"learning_rate": 4.5934523939958365e-05,
"loss": 0.7262,
"step": 92800
},
{
"epoch": 0.08,
"learning_rate": 4.593014133888463e-05,
"loss": 0.7004,
"step": 92900
},
{
"epoch": 0.08,
"learning_rate": 4.592575873781089e-05,
"loss": 0.7554,
"step": 93000
},
{
"epoch": 0.08,
"learning_rate": 4.592137613673716e-05,
"loss": 0.6858,
"step": 93100
},
{
"epoch": 0.08,
"learning_rate": 4.5916993535663414e-05,
"loss": 0.7065,
"step": 93200
},
{
"epoch": 0.08,
"learning_rate": 4.5912610934589685e-05,
"loss": 0.7125,
"step": 93300
},
{
"epoch": 0.08,
"learning_rate": 4.590822833351595e-05,
"loss": 0.6974,
"step": 93400
},
{
"epoch": 0.08,
"learning_rate": 4.5903845732442206e-05,
"loss": 0.6802,
"step": 93500
},
{
"epoch": 0.08,
"learning_rate": 4.589946313136847e-05,
"loss": 0.7025,
"step": 93600
},
{
"epoch": 0.08,
"learning_rate": 4.5895080530294733e-05,
"loss": 0.7076,
"step": 93700
},
{
"epoch": 0.08,
"learning_rate": 4.589069792922099e-05,
"loss": 0.7197,
"step": 93800
},
{
"epoch": 0.08,
"learning_rate": 4.5886315328147254e-05,
"loss": 0.6886,
"step": 93900
},
{
"epoch": 0.08,
"learning_rate": 4.588193272707352e-05,
"loss": 0.6892,
"step": 94000
},
{
"epoch": 0.08,
"learning_rate": 4.587755012599978e-05,
"loss": 0.6741,
"step": 94100
},
{
"epoch": 0.08,
"learning_rate": 4.5873167524926046e-05,
"loss": 0.6638,
"step": 94200
},
{
"epoch": 0.08,
"learning_rate": 4.586878492385231e-05,
"loss": 0.6688,
"step": 94300
},
{
"epoch": 0.08,
"learning_rate": 4.5864402322778574e-05,
"loss": 0.6862,
"step": 94400
},
{
"epoch": 0.08,
"learning_rate": 4.586001972170483e-05,
"loss": 0.6893,
"step": 94500
},
{
"epoch": 0.08,
"learning_rate": 4.5855637120631095e-05,
"loss": 0.6769,
"step": 94600
},
{
"epoch": 0.08,
"learning_rate": 4.585125451955736e-05,
"loss": 0.7071,
"step": 94700
},
{
"epoch": 0.08,
"learning_rate": 4.584687191848362e-05,
"loss": 0.6765,
"step": 94800
},
{
"epoch": 0.08,
"learning_rate": 4.5842489317409886e-05,
"loss": 0.6897,
"step": 94900
},
{
"epoch": 0.08,
"learning_rate": 4.583810671633615e-05,
"loss": 0.7437,
"step": 95000
},
{
"epoch": 0.08,
"learning_rate": 4.5833724115262414e-05,
"loss": 0.6911,
"step": 95100
},
{
"epoch": 0.08,
"learning_rate": 4.582934151418867e-05,
"loss": 0.6477,
"step": 95200
},
{
"epoch": 0.08,
"learning_rate": 4.5824958913114935e-05,
"loss": 0.6628,
"step": 95300
},
{
"epoch": 0.08,
"learning_rate": 4.58205763120412e-05,
"loss": 0.6515,
"step": 95400
},
{
"epoch": 0.08,
"learning_rate": 4.5816193710967456e-05,
"loss": 0.7047,
"step": 95500
},
{
"epoch": 0.08,
"learning_rate": 4.581181110989373e-05,
"loss": 0.6956,
"step": 95600
},
{
"epoch": 0.08,
"learning_rate": 4.580742850881999e-05,
"loss": 0.7025,
"step": 95700
},
{
"epoch": 0.08,
"learning_rate": 4.580304590774625e-05,
"loss": 0.6809,
"step": 95800
},
{
"epoch": 0.08,
"learning_rate": 4.579866330667251e-05,
"loss": 0.6886,
"step": 95900
},
{
"epoch": 0.08,
"learning_rate": 4.5794280705598775e-05,
"loss": 0.6719,
"step": 96000
},
{
"epoch": 0.08,
"learning_rate": 4.578989810452504e-05,
"loss": 0.6753,
"step": 96100
},
{
"epoch": 0.08,
"learning_rate": 4.5785559329462035e-05,
"loss": 0.705,
"step": 96200
},
{
"epoch": 0.08,
"learning_rate": 4.5781176728388306e-05,
"loss": 0.6916,
"step": 96300
},
{
"epoch": 0.08,
"learning_rate": 4.577679412731456e-05,
"loss": 0.6835,
"step": 96400
},
{
"epoch": 0.08,
"learning_rate": 4.577241152624083e-05,
"loss": 0.6574,
"step": 96500
},
{
"epoch": 0.08,
"learning_rate": 4.576802892516709e-05,
"loss": 0.668,
"step": 96600
},
{
"epoch": 0.08,
"learning_rate": 4.576364632409335e-05,
"loss": 0.6833,
"step": 96700
},
{
"epoch": 0.08,
"learning_rate": 4.575926372301961e-05,
"loss": 0.6991,
"step": 96800
},
{
"epoch": 0.08,
"learning_rate": 4.5754881121945876e-05,
"loss": 0.6763,
"step": 96900
},
{
"epoch": 0.09,
"learning_rate": 4.575049852087214e-05,
"loss": 0.6778,
"step": 97000
},
{
"epoch": 0.09,
"learning_rate": 4.5746115919798404e-05,
"loss": 0.7062,
"step": 97100
},
{
"epoch": 0.09,
"learning_rate": 4.574173331872467e-05,
"loss": 0.6658,
"step": 97200
},
{
"epoch": 0.09,
"learning_rate": 4.573735071765093e-05,
"loss": 0.7119,
"step": 97300
},
{
"epoch": 0.09,
"learning_rate": 4.573296811657719e-05,
"loss": 0.6761,
"step": 97400
},
{
"epoch": 0.09,
"learning_rate": 4.572858551550345e-05,
"loss": 0.707,
"step": 97500
},
{
"epoch": 0.09,
"learning_rate": 4.5724202914429716e-05,
"loss": 0.7145,
"step": 97600
},
{
"epoch": 0.09,
"learning_rate": 4.571982031335597e-05,
"loss": 0.6571,
"step": 97700
},
{
"epoch": 0.09,
"learning_rate": 4.5715437712282244e-05,
"loss": 0.6944,
"step": 97800
},
{
"epoch": 0.09,
"learning_rate": 4.571105511120851e-05,
"loss": 0.6707,
"step": 97900
},
{
"epoch": 0.09,
"learning_rate": 4.5706672510134765e-05,
"loss": 0.6993,
"step": 98000
},
{
"epoch": 0.09,
"learning_rate": 4.570228990906103e-05,
"loss": 0.681,
"step": 98100
},
{
"epoch": 0.09,
"learning_rate": 4.569790730798729e-05,
"loss": 0.689,
"step": 98200
},
{
"epoch": 0.09,
"learning_rate": 4.5693524706913557e-05,
"loss": 0.6907,
"step": 98300
},
{
"epoch": 0.09,
"learning_rate": 4.5689142105839814e-05,
"loss": 0.6748,
"step": 98400
},
{
"epoch": 0.09,
"learning_rate": 4.568475950476608e-05,
"loss": 0.697,
"step": 98500
},
{
"epoch": 0.09,
"learning_rate": 4.568037690369235e-05,
"loss": 0.6928,
"step": 98600
},
{
"epoch": 0.09,
"learning_rate": 4.5675994302618605e-05,
"loss": 0.6745,
"step": 98700
},
{
"epoch": 0.09,
"learning_rate": 4.567161170154487e-05,
"loss": 0.6472,
"step": 98800
},
{
"epoch": 0.09,
"learning_rate": 4.566722910047113e-05,
"loss": 0.6838,
"step": 98900
},
{
"epoch": 0.09,
"learning_rate": 4.56628464993974e-05,
"loss": 0.7179,
"step": 99000
},
{
"epoch": 0.09,
"learning_rate": 4.5658463898323654e-05,
"loss": 0.6876,
"step": 99100
},
{
"epoch": 0.09,
"learning_rate": 4.565408129724992e-05,
"loss": 0.7313,
"step": 99200
},
{
"epoch": 0.09,
"learning_rate": 4.564969869617619e-05,
"loss": 0.6861,
"step": 99300
},
{
"epoch": 0.09,
"learning_rate": 4.5645359921113185e-05,
"loss": 0.6832,
"step": 99400
},
{
"epoch": 0.09,
"learning_rate": 4.564097732003945e-05,
"loss": 0.6628,
"step": 99500
},
{
"epoch": 0.09,
"learning_rate": 4.5636594718965706e-05,
"loss": 0.7351,
"step": 99600
},
{
"epoch": 0.09,
"learning_rate": 4.563221211789197e-05,
"loss": 0.6581,
"step": 99700
},
{
"epoch": 0.09,
"learning_rate": 4.562782951681823e-05,
"loss": 0.7143,
"step": 99800
},
{
"epoch": 0.09,
"learning_rate": 4.56234469157445e-05,
"loss": 0.6572,
"step": 99900
},
{
"epoch": 0.09,
"learning_rate": 4.561906431467076e-05,
"loss": 0.697,
"step": 100000
},
{
"epoch": 0.09,
"eval_loss": 0.6870002746582031,
"eval_runtime": 79438.6345,
"eval_samples_per_second": 6.404,
"eval_steps_per_second": 3.202,
"step": 100000
}
],
"max_steps": 1140875,
"num_train_epochs": 1,
"total_flos": 1.4859311775744e+18,
"trial_name": null,
"trial_params": null
}