{ "best_metric": null, "best_model_checkpoint": null, "epoch": 0.9985129939161912, "global_step": 301500, "is_hyper_param_search": false, "is_local_process_zero": true, "is_world_process_zero": true, "log_history": [ { "epoch": 0.0, "learning_rate": 4.998344091220703e-05, "loss": 2.749, "step": 100 }, { "epoch": 0.0, "learning_rate": 4.9966881824414056e-05, "loss": 2.7594, "step": 200 }, { "epoch": 0.0, "learning_rate": 4.9950322736621087e-05, "loss": 2.7598, "step": 300 }, { "epoch": 0.0, "learning_rate": 4.993376364882812e-05, "loss": 2.7508, "step": 400 }, { "epoch": 0.0, "learning_rate": 4.991720456103514e-05, "loss": 2.7558, "step": 500 }, { "epoch": 0.0, "learning_rate": 4.990064547324218e-05, "loss": 2.7641, "step": 600 }, { "epoch": 0.0, "learning_rate": 4.98840863854492e-05, "loss": 2.7775, "step": 700 }, { "epoch": 0.0, "learning_rate": 4.986752729765623e-05, "loss": 2.7908, "step": 800 }, { "epoch": 0.0, "learning_rate": 4.9850968209863255e-05, "loss": 2.767, "step": 900 }, { "epoch": 0.0, "learning_rate": 4.9834409122070285e-05, "loss": 2.7804, "step": 1000 }, { "epoch": 0.0, "learning_rate": 4.9817850034277315e-05, "loss": 2.7667, "step": 1100 }, { "epoch": 0.0, "learning_rate": 4.980129094648434e-05, "loss": 2.7679, "step": 1200 }, { "epoch": 0.0, "learning_rate": 4.978473185869137e-05, "loss": 2.792, "step": 1300 }, { "epoch": 0.0, "learning_rate": 4.97681727708984e-05, "loss": 2.7904, "step": 1400 }, { "epoch": 0.0, "learning_rate": 4.975161368310542e-05, "loss": 2.7738, "step": 1500 }, { "epoch": 0.01, "learning_rate": 4.973505459531246e-05, "loss": 2.7821, "step": 1600 }, { "epoch": 0.01, "learning_rate": 4.9718495507519484e-05, "loss": 2.7821, "step": 1700 }, { "epoch": 0.01, "learning_rate": 4.9701936419726514e-05, "loss": 2.7774, "step": 1800 }, { "epoch": 0.01, "learning_rate": 4.9685377331933544e-05, "loss": 2.7651, "step": 1900 }, { "epoch": 0.01, "learning_rate": 4.966881824414057e-05, "loss": 2.7752, "step": 2000 }, { "epoch": 0.01, "learning_rate": 4.96522591563476e-05, "loss": 2.7897, "step": 2100 }, { "epoch": 0.01, "learning_rate": 4.963570006855462e-05, "loss": 2.7725, "step": 2200 }, { "epoch": 0.01, "learning_rate": 4.961914098076165e-05, "loss": 2.7753, "step": 2300 }, { "epoch": 0.01, "learning_rate": 4.960258189296868e-05, "loss": 2.7824, "step": 2400 }, { "epoch": 0.01, "learning_rate": 4.9586022805175706e-05, "loss": 2.7757, "step": 2500 }, { "epoch": 0.01, "learning_rate": 4.956946371738274e-05, "loss": 2.7658, "step": 2600 }, { "epoch": 0.01, "learning_rate": 4.9552904629589766e-05, "loss": 2.7781, "step": 2700 }, { "epoch": 0.01, "learning_rate": 4.9536345541796797e-05, "loss": 2.7819, "step": 2800 }, { "epoch": 0.01, "learning_rate": 4.951978645400383e-05, "loss": 2.7963, "step": 2900 }, { "epoch": 0.01, "learning_rate": 4.950322736621085e-05, "loss": 2.7773, "step": 3000 }, { "epoch": 0.01, "learning_rate": 4.948666827841788e-05, "loss": 2.7823, "step": 3100 }, { "epoch": 0.01, "learning_rate": 4.947010919062491e-05, "loss": 2.7753, "step": 3200 }, { "epoch": 0.01, "learning_rate": 4.9453550102831935e-05, "loss": 2.769, "step": 3300 }, { "epoch": 0.01, "learning_rate": 4.9436991015038965e-05, "loss": 2.7742, "step": 3400 }, { "epoch": 0.01, "learning_rate": 4.942043192724599e-05, "loss": 2.7804, "step": 3500 }, { "epoch": 0.01, "learning_rate": 4.9403872839453025e-05, "loss": 2.7811, "step": 3600 }, { "epoch": 0.01, "learning_rate": 4.9387313751660056e-05, "loss": 2.786, "step": 3700 }, { "epoch": 0.01, "learning_rate": 4.937075466386708e-05, "loss": 2.7747, "step": 3800 }, { "epoch": 0.01, "learning_rate": 4.935419557607411e-05, "loss": 2.7802, "step": 3900 }, { "epoch": 0.01, "learning_rate": 4.933763648828113e-05, "loss": 2.7754, "step": 4000 }, { "epoch": 0.01, "learning_rate": 4.932107740048816e-05, "loss": 2.7792, "step": 4100 }, { "epoch": 0.01, "learning_rate": 4.9304518312695194e-05, "loss": 2.784, "step": 4200 }, { "epoch": 0.01, "learning_rate": 4.928795922490222e-05, "loss": 2.7872, "step": 4300 }, { "epoch": 0.01, "learning_rate": 4.927140013710925e-05, "loss": 2.7802, "step": 4400 }, { "epoch": 0.01, "learning_rate": 4.925484104931628e-05, "loss": 2.7902, "step": 4500 }, { "epoch": 0.02, "learning_rate": 4.923828196152331e-05, "loss": 2.7898, "step": 4600 }, { "epoch": 0.02, "learning_rate": 4.922172287373034e-05, "loss": 2.7698, "step": 4700 }, { "epoch": 0.02, "learning_rate": 4.920516378593736e-05, "loss": 2.7779, "step": 4800 }, { "epoch": 0.02, "learning_rate": 4.918860469814439e-05, "loss": 2.7792, "step": 4900 }, { "epoch": 0.02, "learning_rate": 4.917204561035142e-05, "loss": 2.7807, "step": 5000 }, { "epoch": 0.02, "learning_rate": 4.9155486522558446e-05, "loss": 2.7927, "step": 5100 }, { "epoch": 0.02, "learning_rate": 4.9138927434765476e-05, "loss": 2.773, "step": 5200 }, { "epoch": 0.02, "learning_rate": 4.91223683469725e-05, "loss": 2.7723, "step": 5300 }, { "epoch": 0.02, "learning_rate": 4.910580925917953e-05, "loss": 2.7633, "step": 5400 }, { "epoch": 0.02, "learning_rate": 4.908925017138656e-05, "loss": 2.7821, "step": 5500 }, { "epoch": 0.02, "learning_rate": 4.907269108359359e-05, "loss": 2.7641, "step": 5600 }, { "epoch": 0.02, "learning_rate": 4.905613199580062e-05, "loss": 2.7798, "step": 5700 }, { "epoch": 0.02, "learning_rate": 4.9039572908007645e-05, "loss": 2.7849, "step": 5800 }, { "epoch": 0.02, "learning_rate": 4.9023013820214675e-05, "loss": 2.781, "step": 5900 }, { "epoch": 0.02, "learning_rate": 4.9006454732421705e-05, "loss": 2.7916, "step": 6000 }, { "epoch": 0.02, "learning_rate": 4.898989564462873e-05, "loss": 2.7924, "step": 6100 }, { "epoch": 0.02, "learning_rate": 4.897333655683576e-05, "loss": 2.7795, "step": 6200 }, { "epoch": 0.02, "learning_rate": 4.895677746904279e-05, "loss": 2.789, "step": 6300 }, { "epoch": 0.02, "learning_rate": 4.894021838124981e-05, "loss": 2.779, "step": 6400 }, { "epoch": 0.02, "learning_rate": 4.892365929345684e-05, "loss": 2.7844, "step": 6500 }, { "epoch": 0.02, "learning_rate": 4.8907100205663873e-05, "loss": 2.7797, "step": 6600 }, { "epoch": 0.02, "learning_rate": 4.8890541117870904e-05, "loss": 2.7895, "step": 6700 }, { "epoch": 0.02, "learning_rate": 4.887398203007793e-05, "loss": 2.7759, "step": 6800 }, { "epoch": 0.02, "learning_rate": 4.885742294228496e-05, "loss": 2.7844, "step": 6900 }, { "epoch": 0.02, "learning_rate": 4.884086385449199e-05, "loss": 2.7638, "step": 7000 }, { "epoch": 0.02, "learning_rate": 4.882430476669901e-05, "loss": 2.7765, "step": 7100 }, { "epoch": 0.02, "learning_rate": 4.880774567890604e-05, "loss": 2.7778, "step": 7200 }, { "epoch": 0.02, "learning_rate": 4.879118659111307e-05, "loss": 2.7846, "step": 7300 }, { "epoch": 0.02, "learning_rate": 4.8774627503320095e-05, "loss": 2.7719, "step": 7400 }, { "epoch": 0.02, "learning_rate": 4.8758068415527126e-05, "loss": 2.7748, "step": 7500 }, { "epoch": 0.03, "learning_rate": 4.8741509327734156e-05, "loss": 2.7751, "step": 7600 }, { "epoch": 0.03, "learning_rate": 4.8724950239941186e-05, "loss": 2.781, "step": 7700 }, { "epoch": 0.03, "learning_rate": 4.870839115214822e-05, "loss": 2.7754, "step": 7800 }, { "epoch": 0.03, "learning_rate": 4.869183206435524e-05, "loss": 2.7904, "step": 7900 }, { "epoch": 0.03, "learning_rate": 4.867527297656227e-05, "loss": 2.777, "step": 8000 }, { "epoch": 0.03, "learning_rate": 4.8658713888769294e-05, "loss": 2.7694, "step": 8100 }, { "epoch": 0.03, "learning_rate": 4.8642154800976324e-05, "loss": 2.7766, "step": 8200 }, { "epoch": 0.03, "learning_rate": 4.8625595713183355e-05, "loss": 2.7873, "step": 8300 }, { "epoch": 0.03, "learning_rate": 4.860903662539038e-05, "loss": 2.7604, "step": 8400 }, { "epoch": 0.03, "learning_rate": 4.859247753759741e-05, "loss": 2.78, "step": 8500 }, { "epoch": 0.03, "learning_rate": 4.857591844980444e-05, "loss": 2.7831, "step": 8600 }, { "epoch": 0.03, "learning_rate": 4.855935936201147e-05, "loss": 2.7907, "step": 8700 }, { "epoch": 0.03, "learning_rate": 4.85428002742185e-05, "loss": 2.7846, "step": 8800 }, { "epoch": 0.03, "learning_rate": 4.852624118642552e-05, "loss": 2.7813, "step": 8900 }, { "epoch": 0.03, "learning_rate": 4.850968209863255e-05, "loss": 2.7797, "step": 9000 }, { "epoch": 0.03, "learning_rate": 4.8493123010839583e-05, "loss": 2.7758, "step": 9100 }, { "epoch": 0.03, "learning_rate": 4.847656392304661e-05, "loss": 2.78, "step": 9200 }, { "epoch": 0.03, "learning_rate": 4.846000483525364e-05, "loss": 2.7885, "step": 9300 }, { "epoch": 0.03, "learning_rate": 4.844344574746066e-05, "loss": 2.7855, "step": 9400 }, { "epoch": 0.03, "learning_rate": 4.842688665966769e-05, "loss": 2.7835, "step": 9500 }, { "epoch": 0.03, "learning_rate": 4.841032757187473e-05, "loss": 2.7868, "step": 9600 }, { "epoch": 0.03, "learning_rate": 4.839376848408175e-05, "loss": 2.7991, "step": 9700 }, { "epoch": 0.03, "learning_rate": 4.837720939628878e-05, "loss": 2.7867, "step": 9800 }, { "epoch": 0.03, "learning_rate": 4.8360650308495806e-05, "loss": 2.7845, "step": 9900 }, { "epoch": 0.03, "learning_rate": 4.8344091220702836e-05, "loss": 2.7796, "step": 10000 }, { "epoch": 0.03, "learning_rate": 4.8327532132909866e-05, "loss": 2.7918, "step": 10100 }, { "epoch": 0.03, "learning_rate": 4.831097304511689e-05, "loss": 2.806, "step": 10200 }, { "epoch": 0.03, "learning_rate": 4.829441395732392e-05, "loss": 2.7917, "step": 10300 }, { "epoch": 0.03, "learning_rate": 4.827785486953095e-05, "loss": 2.7812, "step": 10400 }, { "epoch": 0.03, "learning_rate": 4.8261295781737974e-05, "loss": 2.7863, "step": 10500 }, { "epoch": 0.04, "learning_rate": 4.824473669394501e-05, "loss": 2.7812, "step": 10600 }, { "epoch": 0.04, "learning_rate": 4.8228177606152034e-05, "loss": 2.7725, "step": 10700 }, { "epoch": 0.04, "learning_rate": 4.8211618518359065e-05, "loss": 2.7792, "step": 10800 }, { "epoch": 0.04, "learning_rate": 4.8195059430566095e-05, "loss": 2.7858, "step": 10900 }, { "epoch": 0.04, "learning_rate": 4.817850034277312e-05, "loss": 2.7859, "step": 11000 }, { "epoch": 0.04, "learning_rate": 4.816194125498015e-05, "loss": 2.7818, "step": 11100 }, { "epoch": 0.04, "learning_rate": 4.814538216718717e-05, "loss": 2.7695, "step": 11200 }, { "epoch": 0.04, "learning_rate": 4.81288230793942e-05, "loss": 2.7782, "step": 11300 }, { "epoch": 0.04, "learning_rate": 4.811226399160123e-05, "loss": 2.7696, "step": 11400 }, { "epoch": 0.04, "learning_rate": 4.8095704903808256e-05, "loss": 2.775, "step": 11500 }, { "epoch": 0.04, "learning_rate": 4.8079145816015294e-05, "loss": 2.7835, "step": 11600 }, { "epoch": 0.04, "learning_rate": 4.806258672822232e-05, "loss": 2.8033, "step": 11700 }, { "epoch": 0.04, "learning_rate": 4.804602764042935e-05, "loss": 2.7839, "step": 11800 }, { "epoch": 0.04, "learning_rate": 4.802946855263638e-05, "loss": 2.7871, "step": 11900 }, { "epoch": 0.04, "learning_rate": 4.80129094648434e-05, "loss": 2.7912, "step": 12000 }, { "epoch": 0.04, "learning_rate": 4.799635037705043e-05, "loss": 2.7906, "step": 12100 }, { "epoch": 0.04, "learning_rate": 4.797979128925746e-05, "loss": 2.7903, "step": 12200 }, { "epoch": 0.04, "learning_rate": 4.7963232201464485e-05, "loss": 2.7771, "step": 12300 }, { "epoch": 0.04, "learning_rate": 4.7946673113671516e-05, "loss": 2.7825, "step": 12400 }, { "epoch": 0.04, "learning_rate": 4.793011402587854e-05, "loss": 2.7946, "step": 12500 }, { "epoch": 0.04, "learning_rate": 4.7913554938085576e-05, "loss": 2.7858, "step": 12600 }, { "epoch": 0.04, "learning_rate": 4.78969958502926e-05, "loss": 2.7768, "step": 12700 }, { "epoch": 0.04, "learning_rate": 4.788043676249963e-05, "loss": 2.7875, "step": 12800 }, { "epoch": 0.04, "learning_rate": 4.786387767470666e-05, "loss": 2.7762, "step": 12900 }, { "epoch": 0.04, "learning_rate": 4.7847318586913684e-05, "loss": 2.7937, "step": 13000 }, { "epoch": 0.04, "learning_rate": 4.7830759499120714e-05, "loss": 2.7903, "step": 13100 }, { "epoch": 0.04, "learning_rate": 4.7814200411327744e-05, "loss": 2.7923, "step": 13200 }, { "epoch": 0.04, "learning_rate": 4.779764132353477e-05, "loss": 2.7874, "step": 13300 }, { "epoch": 0.04, "learning_rate": 4.77810822357418e-05, "loss": 2.7847, "step": 13400 }, { "epoch": 0.04, "learning_rate": 4.776452314794883e-05, "loss": 2.772, "step": 13500 }, { "epoch": 0.05, "learning_rate": 4.774796406015586e-05, "loss": 2.7846, "step": 13600 }, { "epoch": 0.05, "learning_rate": 4.773140497236289e-05, "loss": 2.7824, "step": 13700 }, { "epoch": 0.05, "learning_rate": 4.771484588456991e-05, "loss": 2.7781, "step": 13800 }, { "epoch": 0.05, "learning_rate": 4.769828679677694e-05, "loss": 2.7761, "step": 13900 }, { "epoch": 0.05, "learning_rate": 4.7681727708983967e-05, "loss": 2.7677, "step": 14000 }, { "epoch": 0.05, "learning_rate": 4.7665168621191e-05, "loss": 2.7686, "step": 14100 }, { "epoch": 0.05, "learning_rate": 4.764860953339803e-05, "loss": 2.7688, "step": 14200 }, { "epoch": 0.05, "learning_rate": 4.763205044560505e-05, "loss": 2.7817, "step": 14300 }, { "epoch": 0.05, "learning_rate": 4.761549135781208e-05, "loss": 2.7674, "step": 14400 }, { "epoch": 0.05, "learning_rate": 4.759893227001911e-05, "loss": 2.7738, "step": 14500 }, { "epoch": 0.05, "learning_rate": 4.758237318222614e-05, "loss": 2.7799, "step": 14600 }, { "epoch": 0.05, "learning_rate": 4.756581409443317e-05, "loss": 2.7818, "step": 14700 }, { "epoch": 0.05, "learning_rate": 4.7549255006640195e-05, "loss": 2.7859, "step": 14800 }, { "epoch": 0.05, "learning_rate": 4.7532695918847226e-05, "loss": 2.7762, "step": 14900 }, { "epoch": 0.05, "learning_rate": 4.7516136831054256e-05, "loss": 2.7851, "step": 15000 }, { "epoch": 0.05, "learning_rate": 4.749957774326128e-05, "loss": 2.7814, "step": 15100 }, { "epoch": 0.05, "learning_rate": 4.748301865546831e-05, "loss": 2.7973, "step": 15200 }, { "epoch": 0.05, "learning_rate": 4.746645956767533e-05, "loss": 2.7885, "step": 15300 }, { "epoch": 0.05, "learning_rate": 4.7449900479882364e-05, "loss": 2.776, "step": 15400 }, { "epoch": 0.05, "learning_rate": 4.7433341392089394e-05, "loss": 2.7677, "step": 15500 }, { "epoch": 0.05, "learning_rate": 4.7416782304296424e-05, "loss": 2.7916, "step": 15600 }, { "epoch": 0.05, "learning_rate": 4.7400223216503454e-05, "loss": 2.7928, "step": 15700 }, { "epoch": 0.05, "learning_rate": 4.738366412871048e-05, "loss": 2.7846, "step": 15800 }, { "epoch": 0.05, "learning_rate": 4.736710504091751e-05, "loss": 2.7773, "step": 15900 }, { "epoch": 0.05, "learning_rate": 4.735054595312454e-05, "loss": 2.783, "step": 16000 }, { "epoch": 0.05, "learning_rate": 4.733398686533156e-05, "loss": 2.7862, "step": 16100 }, { "epoch": 0.05, "learning_rate": 4.731742777753859e-05, "loss": 2.7786, "step": 16200 }, { "epoch": 0.05, "learning_rate": 4.730086868974562e-05, "loss": 2.7859, "step": 16300 }, { "epoch": 0.05, "learning_rate": 4.7284309601952646e-05, "loss": 2.7774, "step": 16400 }, { "epoch": 0.05, "learning_rate": 4.7267750514159677e-05, "loss": 2.7832, "step": 16500 }, { "epoch": 0.05, "learning_rate": 4.725119142636671e-05, "loss": 2.7747, "step": 16600 }, { "epoch": 0.06, "learning_rate": 4.723463233857374e-05, "loss": 2.7643, "step": 16700 }, { "epoch": 0.06, "learning_rate": 4.721807325078077e-05, "loss": 2.7757, "step": 16800 }, { "epoch": 0.06, "learning_rate": 4.720151416298779e-05, "loss": 2.7791, "step": 16900 }, { "epoch": 0.06, "learning_rate": 4.718495507519482e-05, "loss": 2.7813, "step": 17000 }, { "epoch": 0.06, "learning_rate": 4.7168395987401845e-05, "loss": 2.7935, "step": 17100 }, { "epoch": 0.06, "learning_rate": 4.7151836899608875e-05, "loss": 2.7802, "step": 17200 }, { "epoch": 0.06, "learning_rate": 4.7135277811815905e-05, "loss": 2.7884, "step": 17300 }, { "epoch": 0.06, "learning_rate": 4.711871872402293e-05, "loss": 2.7835, "step": 17400 }, { "epoch": 0.06, "learning_rate": 4.710215963622996e-05, "loss": 2.7925, "step": 17500 }, { "epoch": 0.06, "learning_rate": 4.708560054843699e-05, "loss": 2.7974, "step": 17600 }, { "epoch": 0.06, "learning_rate": 4.706904146064402e-05, "loss": 2.7875, "step": 17700 }, { "epoch": 0.06, "learning_rate": 4.705248237285105e-05, "loss": 2.7878, "step": 17800 }, { "epoch": 0.06, "learning_rate": 4.7035923285058074e-05, "loss": 2.7869, "step": 17900 }, { "epoch": 0.06, "learning_rate": 4.7019364197265104e-05, "loss": 2.7716, "step": 18000 }, { "epoch": 0.06, "learning_rate": 4.7002805109472134e-05, "loss": 2.7774, "step": 18100 }, { "epoch": 0.06, "learning_rate": 4.698624602167916e-05, "loss": 2.7824, "step": 18200 }, { "epoch": 0.06, "learning_rate": 4.696968693388619e-05, "loss": 2.7863, "step": 18300 }, { "epoch": 0.06, "learning_rate": 4.695312784609321e-05, "loss": 2.7737, "step": 18400 }, { "epoch": 0.06, "learning_rate": 4.693656875830024e-05, "loss": 2.7721, "step": 18500 }, { "epoch": 0.06, "learning_rate": 4.692000967050727e-05, "loss": 2.7834, "step": 18600 }, { "epoch": 0.06, "learning_rate": 4.69034505827143e-05, "loss": 2.7756, "step": 18700 }, { "epoch": 0.06, "learning_rate": 4.688689149492133e-05, "loss": 2.7757, "step": 18800 }, { "epoch": 0.06, "learning_rate": 4.6870332407128356e-05, "loss": 2.7818, "step": 18900 }, { "epoch": 0.06, "learning_rate": 4.685377331933539e-05, "loss": 2.774, "step": 19000 }, { "epoch": 0.06, "learning_rate": 4.683721423154242e-05, "loss": 2.7885, "step": 19100 }, { "epoch": 0.06, "learning_rate": 4.682065514374944e-05, "loss": 2.7653, "step": 19200 }, { "epoch": 0.06, "learning_rate": 4.680409605595647e-05, "loss": 2.7829, "step": 19300 }, { "epoch": 0.06, "learning_rate": 4.67875369681635e-05, "loss": 2.7784, "step": 19400 }, { "epoch": 0.06, "learning_rate": 4.6770977880370525e-05, "loss": 2.7826, "step": 19500 }, { "epoch": 0.06, "learning_rate": 4.675441879257756e-05, "loss": 2.7876, "step": 19600 }, { "epoch": 0.07, "learning_rate": 4.6737859704784585e-05, "loss": 2.7801, "step": 19700 }, { "epoch": 0.07, "learning_rate": 4.6721300616991615e-05, "loss": 2.7858, "step": 19800 }, { "epoch": 0.07, "learning_rate": 4.670474152919864e-05, "loss": 2.7851, "step": 19900 }, { "epoch": 0.07, "learning_rate": 4.668818244140567e-05, "loss": 2.7871, "step": 20000 }, { "epoch": 0.07, "learning_rate": 4.66716233536127e-05, "loss": 2.7874, "step": 20100 }, { "epoch": 0.07, "learning_rate": 4.665506426581972e-05, "loss": 2.7855, "step": 20200 }, { "epoch": 0.07, "learning_rate": 4.6638505178026753e-05, "loss": 2.7686, "step": 20300 }, { "epoch": 0.07, "learning_rate": 4.6621946090233784e-05, "loss": 2.7826, "step": 20400 }, { "epoch": 0.07, "learning_rate": 4.660538700244081e-05, "loss": 2.7919, "step": 20500 }, { "epoch": 0.07, "learning_rate": 4.6588827914647844e-05, "loss": 2.7845, "step": 20600 }, { "epoch": 0.07, "learning_rate": 4.657226882685487e-05, "loss": 2.8025, "step": 20700 }, { "epoch": 0.07, "learning_rate": 4.65557097390619e-05, "loss": 2.7732, "step": 20800 }, { "epoch": 0.07, "learning_rate": 4.653915065126893e-05, "loss": 2.7927, "step": 20900 }, { "epoch": 0.07, "learning_rate": 4.652259156347595e-05, "loss": 2.7786, "step": 21000 }, { "epoch": 0.07, "learning_rate": 4.650603247568298e-05, "loss": 2.7611, "step": 21100 }, { "epoch": 0.07, "learning_rate": 4.6489473387890006e-05, "loss": 2.7858, "step": 21200 }, { "epoch": 0.07, "learning_rate": 4.6472914300097036e-05, "loss": 2.7905, "step": 21300 }, { "epoch": 0.07, "learning_rate": 4.6456355212304066e-05, "loss": 2.7664, "step": 21400 }, { "epoch": 0.07, "learning_rate": 4.643979612451109e-05, "loss": 2.7882, "step": 21500 }, { "epoch": 0.07, "learning_rate": 4.642323703671813e-05, "loss": 2.7849, "step": 21600 }, { "epoch": 0.07, "learning_rate": 4.640667794892515e-05, "loss": 2.7855, "step": 21700 }, { "epoch": 0.07, "learning_rate": 4.639011886113218e-05, "loss": 2.7765, "step": 21800 }, { "epoch": 0.07, "learning_rate": 4.637355977333921e-05, "loss": 2.7879, "step": 21900 }, { "epoch": 0.07, "learning_rate": 4.6357000685546235e-05, "loss": 2.779, "step": 22000 }, { "epoch": 0.07, "learning_rate": 4.6340441597753265e-05, "loss": 2.7673, "step": 22100 }, { "epoch": 0.07, "learning_rate": 4.6323882509960295e-05, "loss": 2.7804, "step": 22200 }, { "epoch": 0.07, "learning_rate": 4.630732342216732e-05, "loss": 2.7752, "step": 22300 }, { "epoch": 0.07, "learning_rate": 4.629076433437435e-05, "loss": 2.776, "step": 22400 }, { "epoch": 0.07, "learning_rate": 4.627420524658137e-05, "loss": 2.769, "step": 22500 }, { "epoch": 0.07, "learning_rate": 4.625764615878841e-05, "loss": 2.7803, "step": 22600 }, { "epoch": 0.08, "learning_rate": 4.624108707099544e-05, "loss": 2.7783, "step": 22700 }, { "epoch": 0.08, "learning_rate": 4.6224527983202463e-05, "loss": 2.7791, "step": 22800 }, { "epoch": 0.08, "learning_rate": 4.6207968895409494e-05, "loss": 2.7947, "step": 22900 }, { "epoch": 0.08, "learning_rate": 4.619140980761652e-05, "loss": 2.7764, "step": 23000 }, { "epoch": 0.08, "learning_rate": 4.617485071982355e-05, "loss": 2.7893, "step": 23100 }, { "epoch": 0.08, "learning_rate": 4.615829163203058e-05, "loss": 2.7636, "step": 23200 }, { "epoch": 0.08, "learning_rate": 4.61417325442376e-05, "loss": 2.7794, "step": 23300 }, { "epoch": 0.08, "learning_rate": 4.612517345644463e-05, "loss": 2.7965, "step": 23400 }, { "epoch": 0.08, "learning_rate": 4.610861436865166e-05, "loss": 2.7818, "step": 23500 }, { "epoch": 0.08, "learning_rate": 4.609205528085869e-05, "loss": 2.7773, "step": 23600 }, { "epoch": 0.08, "learning_rate": 4.607549619306572e-05, "loss": 2.7809, "step": 23700 }, { "epoch": 0.08, "learning_rate": 4.6058937105272746e-05, "loss": 2.7609, "step": 23800 }, { "epoch": 0.08, "learning_rate": 4.6042378017479776e-05, "loss": 2.7931, "step": 23900 }, { "epoch": 0.08, "learning_rate": 4.602581892968681e-05, "loss": 2.7823, "step": 24000 }, { "epoch": 0.08, "learning_rate": 4.600925984189383e-05, "loss": 2.8011, "step": 24100 }, { "epoch": 0.08, "learning_rate": 4.599270075410086e-05, "loss": 2.7806, "step": 24200 }, { "epoch": 0.08, "learning_rate": 4.5976141666307884e-05, "loss": 2.7745, "step": 24300 }, { "epoch": 0.08, "learning_rate": 4.5959582578514914e-05, "loss": 2.7718, "step": 24400 }, { "epoch": 0.08, "learning_rate": 4.5943023490721945e-05, "loss": 2.7815, "step": 24500 }, { "epoch": 0.08, "learning_rate": 4.5926464402928975e-05, "loss": 2.7826, "step": 24600 }, { "epoch": 0.08, "learning_rate": 4.5909905315136005e-05, "loss": 2.7894, "step": 24700 }, { "epoch": 0.08, "learning_rate": 4.589334622734303e-05, "loss": 2.7665, "step": 24800 }, { "epoch": 0.08, "learning_rate": 4.587678713955006e-05, "loss": 2.7787, "step": 24900 }, { "epoch": 0.08, "learning_rate": 4.586022805175709e-05, "loss": 2.7896, "step": 25000 }, { "epoch": 0.08, "learning_rate": 4.584366896396411e-05, "loss": 2.78, "step": 25100 }, { "epoch": 0.08, "learning_rate": 4.582710987617114e-05, "loss": 2.7894, "step": 25200 }, { "epoch": 0.08, "learning_rate": 4.5810550788378174e-05, "loss": 2.7943, "step": 25300 }, { "epoch": 0.08, "learning_rate": 4.57939917005852e-05, "loss": 2.7871, "step": 25400 }, { "epoch": 0.08, "learning_rate": 4.577743261279223e-05, "loss": 2.7814, "step": 25500 }, { "epoch": 0.08, "learning_rate": 4.576087352499926e-05, "loss": 2.7911, "step": 25600 }, { "epoch": 0.09, "learning_rate": 4.574431443720629e-05, "loss": 2.7669, "step": 25700 }, { "epoch": 0.09, "learning_rate": 4.572775534941331e-05, "loss": 2.7794, "step": 25800 }, { "epoch": 0.09, "learning_rate": 4.571119626162034e-05, "loss": 2.803, "step": 25900 }, { "epoch": 0.09, "learning_rate": 4.569463717382737e-05, "loss": 2.7836, "step": 26000 }, { "epoch": 0.09, "learning_rate": 4.5678078086034396e-05, "loss": 2.781, "step": 26100 }, { "epoch": 0.09, "learning_rate": 4.5661518998241426e-05, "loss": 2.7913, "step": 26200 }, { "epoch": 0.09, "learning_rate": 4.5644959910448456e-05, "loss": 2.7712, "step": 26300 }, { "epoch": 0.09, "learning_rate": 4.562840082265548e-05, "loss": 2.7788, "step": 26400 }, { "epoch": 0.09, "learning_rate": 4.561184173486251e-05, "loss": 2.7796, "step": 26500 }, { "epoch": 0.09, "learning_rate": 4.559528264706954e-05, "loss": 2.7754, "step": 26600 }, { "epoch": 0.09, "learning_rate": 4.557872355927657e-05, "loss": 2.7809, "step": 26700 }, { "epoch": 0.09, "learning_rate": 4.55621644714836e-05, "loss": 2.793, "step": 26800 }, { "epoch": 0.09, "learning_rate": 4.5545605383690624e-05, "loss": 2.7834, "step": 26900 }, { "epoch": 0.09, "learning_rate": 4.5529046295897655e-05, "loss": 2.798, "step": 27000 }, { "epoch": 0.09, "learning_rate": 4.551248720810468e-05, "loss": 2.7781, "step": 27100 }, { "epoch": 0.09, "learning_rate": 4.549592812031171e-05, "loss": 2.7852, "step": 27200 }, { "epoch": 0.09, "learning_rate": 4.547936903251874e-05, "loss": 2.782, "step": 27300 }, { "epoch": 0.09, "learning_rate": 4.546280994472576e-05, "loss": 2.7856, "step": 27400 }, { "epoch": 0.09, "learning_rate": 4.544625085693279e-05, "loss": 2.7721, "step": 27500 }, { "epoch": 0.09, "learning_rate": 4.542969176913982e-05, "loss": 2.7839, "step": 27600 }, { "epoch": 0.09, "learning_rate": 4.541313268134685e-05, "loss": 2.7848, "step": 27700 }, { "epoch": 0.09, "learning_rate": 4.5396573593553884e-05, "loss": 2.7943, "step": 27800 }, { "epoch": 0.09, "learning_rate": 4.538001450576091e-05, "loss": 2.7878, "step": 27900 }, { "epoch": 0.09, "learning_rate": 4.536345541796794e-05, "loss": 2.7936, "step": 28000 }, { "epoch": 0.09, "learning_rate": 4.534689633017497e-05, "loss": 2.7804, "step": 28100 }, { "epoch": 0.09, "learning_rate": 4.533033724238199e-05, "loss": 2.7908, "step": 28200 }, { "epoch": 0.09, "learning_rate": 4.531377815458902e-05, "loss": 2.7857, "step": 28300 }, { "epoch": 0.09, "learning_rate": 4.5297219066796045e-05, "loss": 2.7779, "step": 28400 }, { "epoch": 0.09, "learning_rate": 4.5280659979003075e-05, "loss": 2.7891, "step": 28500 }, { "epoch": 0.09, "learning_rate": 4.526410089121011e-05, "loss": 2.7715, "step": 28600 }, { "epoch": 0.1, "learning_rate": 4.5247541803417136e-05, "loss": 2.7961, "step": 28700 }, { "epoch": 0.1, "learning_rate": 4.5230982715624166e-05, "loss": 2.781, "step": 28800 }, { "epoch": 0.1, "learning_rate": 4.521442362783119e-05, "loss": 2.7705, "step": 28900 }, { "epoch": 0.1, "learning_rate": 4.519786454003822e-05, "loss": 2.7888, "step": 29000 }, { "epoch": 0.1, "learning_rate": 4.518130545224525e-05, "loss": 2.7971, "step": 29100 }, { "epoch": 0.1, "learning_rate": 4.5164746364452274e-05, "loss": 2.7816, "step": 29200 }, { "epoch": 0.1, "learning_rate": 4.5148187276659304e-05, "loss": 2.7808, "step": 29300 }, { "epoch": 0.1, "learning_rate": 4.5131628188866334e-05, "loss": 2.7827, "step": 29400 }, { "epoch": 0.1, "learning_rate": 4.511506910107336e-05, "loss": 2.7661, "step": 29500 }, { "epoch": 0.1, "learning_rate": 4.5098510013280395e-05, "loss": 2.7774, "step": 29600 }, { "epoch": 0.1, "learning_rate": 4.508195092548742e-05, "loss": 2.7698, "step": 29700 }, { "epoch": 0.1, "learning_rate": 4.506539183769445e-05, "loss": 2.7824, "step": 29800 }, { "epoch": 0.1, "learning_rate": 4.504883274990148e-05, "loss": 2.7745, "step": 29900 }, { "epoch": 0.1, "learning_rate": 4.50322736621085e-05, "loss": 2.7787, "step": 30000 }, { "epoch": 0.1, "learning_rate": 4.501571457431553e-05, "loss": 2.7825, "step": 30100 }, { "epoch": 0.1, "learning_rate": 4.4999155486522557e-05, "loss": 2.7763, "step": 30200 }, { "epoch": 0.1, "learning_rate": 4.498259639872959e-05, "loss": 2.7907, "step": 30300 }, { "epoch": 0.1, "learning_rate": 4.496603731093662e-05, "loss": 2.7634, "step": 30400 }, { "epoch": 0.1, "learning_rate": 4.494947822314364e-05, "loss": 2.7782, "step": 30500 }, { "epoch": 0.1, "learning_rate": 4.493291913535068e-05, "loss": 2.7861, "step": 30600 }, { "epoch": 0.1, "learning_rate": 4.49163600475577e-05, "loss": 2.7676, "step": 30700 }, { "epoch": 0.1, "learning_rate": 4.489980095976473e-05, "loss": 2.7768, "step": 30800 }, { "epoch": 0.1, "learning_rate": 4.488324187197176e-05, "loss": 2.7853, "step": 30900 }, { "epoch": 0.1, "learning_rate": 4.4866682784178785e-05, "loss": 2.7915, "step": 31000 }, { "epoch": 0.1, "learning_rate": 4.4850123696385816e-05, "loss": 2.7747, "step": 31100 }, { "epoch": 0.1, "learning_rate": 4.4833564608592846e-05, "loss": 2.7749, "step": 31200 }, { "epoch": 0.1, "learning_rate": 4.481700552079987e-05, "loss": 2.7841, "step": 31300 }, { "epoch": 0.1, "learning_rate": 4.48004464330069e-05, "loss": 2.7805, "step": 31400 }, { "epoch": 0.1, "learning_rate": 4.478388734521392e-05, "loss": 2.7825, "step": 31500 }, { "epoch": 0.1, "learning_rate": 4.476732825742096e-05, "loss": 2.7719, "step": 31600 }, { "epoch": 0.1, "learning_rate": 4.475076916962799e-05, "loss": 2.7913, "step": 31700 }, { "epoch": 0.11, "learning_rate": 4.4734210081835014e-05, "loss": 2.7799, "step": 31800 }, { "epoch": 0.11, "learning_rate": 4.4717650994042045e-05, "loss": 2.7791, "step": 31900 }, { "epoch": 0.11, "learning_rate": 4.470109190624907e-05, "loss": 2.8031, "step": 32000 }, { "epoch": 0.11, "learning_rate": 4.46845328184561e-05, "loss": 2.7789, "step": 32100 }, { "epoch": 0.11, "learning_rate": 4.466797373066313e-05, "loss": 2.7778, "step": 32200 }, { "epoch": 0.11, "learning_rate": 4.465141464287015e-05, "loss": 2.7862, "step": 32300 }, { "epoch": 0.11, "learning_rate": 4.463485555507718e-05, "loss": 2.7664, "step": 32400 }, { "epoch": 0.11, "learning_rate": 4.461829646728421e-05, "loss": 2.7767, "step": 32500 }, { "epoch": 0.11, "learning_rate": 4.460173737949124e-05, "loss": 2.7814, "step": 32600 }, { "epoch": 0.11, "learning_rate": 4.458517829169827e-05, "loss": 2.7786, "step": 32700 }, { "epoch": 0.11, "learning_rate": 4.45686192039053e-05, "loss": 2.765, "step": 32800 }, { "epoch": 0.11, "learning_rate": 4.455206011611233e-05, "loss": 2.783, "step": 32900 }, { "epoch": 0.11, "learning_rate": 4.453550102831935e-05, "loss": 2.7747, "step": 33000 }, { "epoch": 0.11, "learning_rate": 4.451894194052638e-05, "loss": 2.7765, "step": 33100 }, { "epoch": 0.11, "learning_rate": 4.450238285273341e-05, "loss": 2.7934, "step": 33200 }, { "epoch": 0.11, "learning_rate": 4.4485823764940435e-05, "loss": 2.7947, "step": 33300 }, { "epoch": 0.11, "learning_rate": 4.4469264677147465e-05, "loss": 2.7732, "step": 33400 }, { "epoch": 0.11, "learning_rate": 4.4452705589354495e-05, "loss": 2.7886, "step": 33500 }, { "epoch": 0.11, "learning_rate": 4.4436146501561526e-05, "loss": 2.7673, "step": 33600 }, { "epoch": 0.11, "learning_rate": 4.4419587413768556e-05, "loss": 2.7789, "step": 33700 }, { "epoch": 0.11, "learning_rate": 4.440302832597558e-05, "loss": 2.7755, "step": 33800 }, { "epoch": 0.11, "learning_rate": 4.438646923818261e-05, "loss": 2.7756, "step": 33900 }, { "epoch": 0.11, "learning_rate": 4.436991015038964e-05, "loss": 2.7785, "step": 34000 }, { "epoch": 0.11, "learning_rate": 4.4353351062596664e-05, "loss": 2.7778, "step": 34100 }, { "epoch": 0.11, "learning_rate": 4.4336791974803694e-05, "loss": 2.7646, "step": 34200 }, { "epoch": 0.11, "learning_rate": 4.432023288701072e-05, "loss": 2.7771, "step": 34300 }, { "epoch": 0.11, "learning_rate": 4.430367379921775e-05, "loss": 2.7974, "step": 34400 }, { "epoch": 0.11, "learning_rate": 4.428711471142478e-05, "loss": 2.7814, "step": 34500 }, { "epoch": 0.11, "learning_rate": 4.427055562363181e-05, "loss": 2.7861, "step": 34600 }, { "epoch": 0.11, "learning_rate": 4.425399653583884e-05, "loss": 2.7909, "step": 34700 }, { "epoch": 0.12, "learning_rate": 4.423743744804586e-05, "loss": 2.7781, "step": 34800 }, { "epoch": 0.12, "learning_rate": 4.422087836025289e-05, "loss": 2.7919, "step": 34900 }, { "epoch": 0.12, "learning_rate": 4.420431927245992e-05, "loss": 2.7792, "step": 35000 }, { "epoch": 0.12, "learning_rate": 4.4187760184666946e-05, "loss": 2.7902, "step": 35100 }, { "epoch": 0.12, "learning_rate": 4.417120109687398e-05, "loss": 2.7809, "step": 35200 }, { "epoch": 0.12, "learning_rate": 4.415464200908101e-05, "loss": 2.7827, "step": 35300 }, { "epoch": 0.12, "learning_rate": 4.413808292128803e-05, "loss": 2.7782, "step": 35400 }, { "epoch": 0.12, "learning_rate": 4.412152383349506e-05, "loss": 2.7922, "step": 35500 }, { "epoch": 0.12, "learning_rate": 4.410496474570209e-05, "loss": 2.788, "step": 35600 }, { "epoch": 0.12, "learning_rate": 4.408840565790912e-05, "loss": 2.7745, "step": 35700 }, { "epoch": 0.12, "learning_rate": 4.407184657011615e-05, "loss": 2.7619, "step": 35800 }, { "epoch": 0.12, "learning_rate": 4.4055287482323175e-05, "loss": 2.7751, "step": 35900 }, { "epoch": 0.12, "learning_rate": 4.4038728394530206e-05, "loss": 2.781, "step": 36000 }, { "epoch": 0.12, "learning_rate": 4.402216930673723e-05, "loss": 2.7855, "step": 36100 }, { "epoch": 0.12, "learning_rate": 4.400561021894426e-05, "loss": 2.7878, "step": 36200 }, { "epoch": 0.12, "learning_rate": 4.398905113115129e-05, "loss": 2.7891, "step": 36300 }, { "epoch": 0.12, "learning_rate": 4.397249204335831e-05, "loss": 2.7766, "step": 36400 }, { "epoch": 0.12, "learning_rate": 4.3955932955565343e-05, "loss": 2.7772, "step": 36500 }, { "epoch": 0.12, "learning_rate": 4.3939373867772374e-05, "loss": 2.7807, "step": 36600 }, { "epoch": 0.12, "learning_rate": 4.3922814779979404e-05, "loss": 2.7763, "step": 36700 }, { "epoch": 0.12, "learning_rate": 4.3906255692186434e-05, "loss": 2.7632, "step": 36800 }, { "epoch": 0.12, "learning_rate": 4.388969660439346e-05, "loss": 2.7803, "step": 36900 }, { "epoch": 0.12, "learning_rate": 4.387313751660049e-05, "loss": 2.7679, "step": 37000 }, { "epoch": 0.12, "learning_rate": 4.385657842880752e-05, "loss": 2.7711, "step": 37100 }, { "epoch": 0.12, "learning_rate": 4.384001934101454e-05, "loss": 2.7788, "step": 37200 }, { "epoch": 0.12, "learning_rate": 4.382346025322157e-05, "loss": 2.7683, "step": 37300 }, { "epoch": 0.12, "learning_rate": 4.3806901165428596e-05, "loss": 2.7817, "step": 37400 }, { "epoch": 0.12, "learning_rate": 4.3790342077635626e-05, "loss": 2.7799, "step": 37500 }, { "epoch": 0.12, "learning_rate": 4.377378298984266e-05, "loss": 2.7864, "step": 37600 }, { "epoch": 0.12, "learning_rate": 4.375722390204969e-05, "loss": 2.7847, "step": 37700 }, { "epoch": 0.13, "learning_rate": 4.374066481425672e-05, "loss": 2.7812, "step": 37800 }, { "epoch": 0.13, "learning_rate": 4.372410572646374e-05, "loss": 2.7792, "step": 37900 }, { "epoch": 0.13, "learning_rate": 4.370754663867077e-05, "loss": 2.7858, "step": 38000 }, { "epoch": 0.13, "learning_rate": 4.36909875508778e-05, "loss": 2.7867, "step": 38100 }, { "epoch": 0.13, "learning_rate": 4.3674428463084825e-05, "loss": 2.7868, "step": 38200 }, { "epoch": 0.13, "learning_rate": 4.3657869375291855e-05, "loss": 2.7763, "step": 38300 }, { "epoch": 0.13, "learning_rate": 4.3641310287498885e-05, "loss": 2.7739, "step": 38400 }, { "epoch": 0.13, "learning_rate": 4.362475119970591e-05, "loss": 2.7827, "step": 38500 }, { "epoch": 0.13, "learning_rate": 4.3608192111912946e-05, "loss": 2.7661, "step": 38600 }, { "epoch": 0.13, "learning_rate": 4.359163302411997e-05, "loss": 2.7839, "step": 38700 }, { "epoch": 0.13, "learning_rate": 4.3575073936327e-05, "loss": 2.7827, "step": 38800 }, { "epoch": 0.13, "learning_rate": 4.355851484853403e-05, "loss": 2.7967, "step": 38900 }, { "epoch": 0.13, "learning_rate": 4.3541955760741054e-05, "loss": 2.78, "step": 39000 }, { "epoch": 0.13, "learning_rate": 4.3525396672948084e-05, "loss": 2.7883, "step": 39100 }, { "epoch": 0.13, "learning_rate": 4.350883758515511e-05, "loss": 2.7717, "step": 39200 }, { "epoch": 0.13, "learning_rate": 4.349227849736214e-05, "loss": 2.7651, "step": 39300 }, { "epoch": 0.13, "learning_rate": 4.347571940956917e-05, "loss": 2.7869, "step": 39400 }, { "epoch": 0.13, "learning_rate": 4.345916032177619e-05, "loss": 2.7739, "step": 39500 }, { "epoch": 0.13, "learning_rate": 4.344260123398323e-05, "loss": 2.7743, "step": 39600 }, { "epoch": 0.13, "learning_rate": 4.342604214619025e-05, "loss": 2.7709, "step": 39700 }, { "epoch": 0.13, "learning_rate": 4.340948305839728e-05, "loss": 2.7829, "step": 39800 }, { "epoch": 0.13, "learning_rate": 4.339292397060431e-05, "loss": 2.783, "step": 39900 }, { "epoch": 0.13, "learning_rate": 4.3376364882811336e-05, "loss": 2.7823, "step": 40000 }, { "epoch": 0.13, "learning_rate": 4.3359805795018366e-05, "loss": 2.7846, "step": 40100 }, { "epoch": 0.13, "learning_rate": 4.334324670722539e-05, "loss": 2.7777, "step": 40200 }, { "epoch": 0.13, "learning_rate": 4.332668761943242e-05, "loss": 2.7726, "step": 40300 }, { "epoch": 0.13, "learning_rate": 4.331012853163945e-05, "loss": 2.7785, "step": 40400 }, { "epoch": 0.13, "learning_rate": 4.3293569443846474e-05, "loss": 2.776, "step": 40500 }, { "epoch": 0.13, "learning_rate": 4.327701035605351e-05, "loss": 2.7743, "step": 40600 }, { "epoch": 0.13, "learning_rate": 4.3260451268260535e-05, "loss": 2.7747, "step": 40700 }, { "epoch": 0.14, "learning_rate": 4.3243892180467565e-05, "loss": 2.7821, "step": 40800 }, { "epoch": 0.14, "learning_rate": 4.3227333092674595e-05, "loss": 2.7702, "step": 40900 }, { "epoch": 0.14, "learning_rate": 4.321077400488162e-05, "loss": 2.7712, "step": 41000 }, { "epoch": 0.14, "learning_rate": 4.319421491708865e-05, "loss": 2.7613, "step": 41100 }, { "epoch": 0.14, "learning_rate": 4.317765582929568e-05, "loss": 2.769, "step": 41200 }, { "epoch": 0.14, "learning_rate": 4.31610967415027e-05, "loss": 2.7878, "step": 41300 }, { "epoch": 0.14, "learning_rate": 4.314453765370973e-05, "loss": 2.7684, "step": 41400 }, { "epoch": 0.14, "learning_rate": 4.312797856591676e-05, "loss": 2.7804, "step": 41500 }, { "epoch": 0.14, "learning_rate": 4.3111419478123794e-05, "loss": 2.7601, "step": 41600 }, { "epoch": 0.14, "learning_rate": 4.3094860390330824e-05, "loss": 2.7866, "step": 41700 }, { "epoch": 0.14, "learning_rate": 4.307830130253785e-05, "loss": 2.7717, "step": 41800 }, { "epoch": 0.14, "learning_rate": 4.306174221474488e-05, "loss": 2.7905, "step": 41900 }, { "epoch": 0.14, "learning_rate": 4.30451831269519e-05, "loss": 2.7613, "step": 42000 }, { "epoch": 0.14, "learning_rate": 4.302862403915893e-05, "loss": 2.7759, "step": 42100 }, { "epoch": 0.14, "learning_rate": 4.301206495136596e-05, "loss": 2.7852, "step": 42200 }, { "epoch": 0.14, "learning_rate": 4.2995505863572986e-05, "loss": 2.7731, "step": 42300 }, { "epoch": 0.14, "learning_rate": 4.2978946775780016e-05, "loss": 2.7889, "step": 42400 }, { "epoch": 0.14, "learning_rate": 4.2962387687987046e-05, "loss": 2.7963, "step": 42500 }, { "epoch": 0.14, "learning_rate": 4.2945828600194077e-05, "loss": 2.7962, "step": 42600 }, { "epoch": 0.14, "learning_rate": 4.292926951240111e-05, "loss": 2.7735, "step": 42700 }, { "epoch": 0.14, "learning_rate": 4.291271042460813e-05, "loss": 2.7668, "step": 42800 }, { "epoch": 0.14, "learning_rate": 4.289615133681516e-05, "loss": 2.7756, "step": 42900 }, { "epoch": 0.14, "learning_rate": 4.287959224902219e-05, "loss": 2.7854, "step": 43000 }, { "epoch": 0.14, "learning_rate": 4.2863033161229214e-05, "loss": 2.786, "step": 43100 }, { "epoch": 0.14, "learning_rate": 4.2846474073436245e-05, "loss": 2.7745, "step": 43200 }, { "epoch": 0.14, "learning_rate": 4.282991498564327e-05, "loss": 2.7814, "step": 43300 }, { "epoch": 0.14, "learning_rate": 4.28133558978503e-05, "loss": 2.7922, "step": 43400 }, { "epoch": 0.14, "learning_rate": 4.279679681005733e-05, "loss": 2.7818, "step": 43500 }, { "epoch": 0.14, "learning_rate": 4.278023772226436e-05, "loss": 2.7714, "step": 43600 }, { "epoch": 0.14, "learning_rate": 4.276367863447139e-05, "loss": 2.7854, "step": 43700 }, { "epoch": 0.15, "learning_rate": 4.274711954667841e-05, "loss": 2.7787, "step": 43800 }, { "epoch": 0.15, "learning_rate": 4.273056045888544e-05, "loss": 2.788, "step": 43900 }, { "epoch": 0.15, "learning_rate": 4.2714001371092474e-05, "loss": 2.7661, "step": 44000 }, { "epoch": 0.15, "learning_rate": 4.26974422832995e-05, "loss": 2.7924, "step": 44100 }, { "epoch": 0.15, "learning_rate": 4.268088319550653e-05, "loss": 2.788, "step": 44200 }, { "epoch": 0.15, "learning_rate": 4.266432410771356e-05, "loss": 2.7842, "step": 44300 }, { "epoch": 0.15, "learning_rate": 4.264776501992058e-05, "loss": 2.7806, "step": 44400 }, { "epoch": 0.15, "learning_rate": 4.263120593212761e-05, "loss": 2.7746, "step": 44500 }, { "epoch": 0.15, "learning_rate": 4.261464684433464e-05, "loss": 2.788, "step": 44600 }, { "epoch": 0.15, "learning_rate": 4.259808775654167e-05, "loss": 2.7874, "step": 44700 }, { "epoch": 0.15, "learning_rate": 4.25815286687487e-05, "loss": 2.7711, "step": 44800 }, { "epoch": 0.15, "learning_rate": 4.2564969580955726e-05, "loss": 2.7841, "step": 44900 }, { "epoch": 0.15, "learning_rate": 4.2548410493162756e-05, "loss": 2.7632, "step": 45000 }, { "epoch": 0.15, "learning_rate": 4.253185140536978e-05, "loss": 2.7884, "step": 45100 }, { "epoch": 0.15, "learning_rate": 4.251529231757681e-05, "loss": 2.7688, "step": 45200 }, { "epoch": 0.15, "learning_rate": 4.249873322978384e-05, "loss": 2.7905, "step": 45300 }, { "epoch": 0.15, "learning_rate": 4.2482174141990864e-05, "loss": 2.7803, "step": 45400 }, { "epoch": 0.15, "learning_rate": 4.2465615054197894e-05, "loss": 2.7682, "step": 45500 }, { "epoch": 0.15, "learning_rate": 4.2449055966404925e-05, "loss": 2.7746, "step": 45600 }, { "epoch": 0.15, "learning_rate": 4.2432496878611955e-05, "loss": 2.7714, "step": 45700 }, { "epoch": 0.15, "learning_rate": 4.2415937790818985e-05, "loss": 2.7784, "step": 45800 }, { "epoch": 0.15, "learning_rate": 4.239937870302601e-05, "loss": 2.7796, "step": 45900 }, { "epoch": 0.15, "learning_rate": 4.238281961523304e-05, "loss": 2.768, "step": 46000 }, { "epoch": 0.15, "learning_rate": 4.236626052744007e-05, "loss": 2.787, "step": 46100 }, { "epoch": 0.15, "learning_rate": 4.234970143964709e-05, "loss": 2.7978, "step": 46200 }, { "epoch": 0.15, "learning_rate": 4.233314235185412e-05, "loss": 2.7752, "step": 46300 }, { "epoch": 0.15, "learning_rate": 4.2316583264061147e-05, "loss": 2.766, "step": 46400 }, { "epoch": 0.15, "learning_rate": 4.230002417626818e-05, "loss": 2.7893, "step": 46500 }, { "epoch": 0.15, "learning_rate": 4.228346508847521e-05, "loss": 2.778, "step": 46600 }, { "epoch": 0.15, "learning_rate": 4.226690600068224e-05, "loss": 2.7681, "step": 46700 }, { "epoch": 0.15, "learning_rate": 4.225034691288927e-05, "loss": 2.7622, "step": 46800 }, { "epoch": 0.16, "learning_rate": 4.223378782509629e-05, "loss": 2.7893, "step": 46900 }, { "epoch": 0.16, "learning_rate": 4.221722873730332e-05, "loss": 2.8002, "step": 47000 }, { "epoch": 0.16, "learning_rate": 4.220066964951035e-05, "loss": 2.7741, "step": 47100 }, { "epoch": 0.16, "learning_rate": 4.2184110561717375e-05, "loss": 2.7814, "step": 47200 }, { "epoch": 0.16, "learning_rate": 4.2167551473924406e-05, "loss": 2.7686, "step": 47300 }, { "epoch": 0.16, "learning_rate": 4.215099238613143e-05, "loss": 2.7809, "step": 47400 }, { "epoch": 0.16, "learning_rate": 4.213443329833846e-05, "loss": 2.7718, "step": 47500 }, { "epoch": 0.16, "learning_rate": 4.21178742105455e-05, "loss": 2.764, "step": 47600 }, { "epoch": 0.16, "learning_rate": 4.210131512275252e-05, "loss": 2.7808, "step": 47700 }, { "epoch": 0.16, "learning_rate": 4.208475603495955e-05, "loss": 2.7696, "step": 47800 }, { "epoch": 0.16, "learning_rate": 4.2068196947166574e-05, "loss": 2.7685, "step": 47900 }, { "epoch": 0.16, "learning_rate": 4.2051637859373604e-05, "loss": 2.773, "step": 48000 }, { "epoch": 0.16, "learning_rate": 4.2035078771580635e-05, "loss": 2.7705, "step": 48100 }, { "epoch": 0.16, "learning_rate": 4.201851968378766e-05, "loss": 2.7686, "step": 48200 }, { "epoch": 0.16, "learning_rate": 4.200196059599469e-05, "loss": 2.7807, "step": 48300 }, { "epoch": 0.16, "learning_rate": 4.198540150820172e-05, "loss": 2.7745, "step": 48400 }, { "epoch": 0.16, "learning_rate": 4.196884242040874e-05, "loss": 2.7956, "step": 48500 }, { "epoch": 0.16, "learning_rate": 4.195228333261578e-05, "loss": 2.7742, "step": 48600 }, { "epoch": 0.16, "learning_rate": 4.19357242448228e-05, "loss": 2.7747, "step": 48700 }, { "epoch": 0.16, "learning_rate": 4.191916515702983e-05, "loss": 2.7816, "step": 48800 }, { "epoch": 0.16, "learning_rate": 4.1902606069236863e-05, "loss": 2.7863, "step": 48900 }, { "epoch": 0.16, "learning_rate": 4.188604698144389e-05, "loss": 2.7786, "step": 49000 }, { "epoch": 0.16, "learning_rate": 4.186948789365092e-05, "loss": 2.775, "step": 49100 }, { "epoch": 0.16, "learning_rate": 4.185292880585794e-05, "loss": 2.7834, "step": 49200 }, { "epoch": 0.16, "learning_rate": 4.183636971806497e-05, "loss": 2.7661, "step": 49300 }, { "epoch": 0.16, "learning_rate": 4.1819810630272e-05, "loss": 2.7726, "step": 49400 }, { "epoch": 0.16, "learning_rate": 4.1803251542479025e-05, "loss": 2.7777, "step": 49500 }, { "epoch": 0.16, "learning_rate": 4.178669245468606e-05, "loss": 2.7784, "step": 49600 }, { "epoch": 0.16, "learning_rate": 4.1770133366893085e-05, "loss": 2.7934, "step": 49700 }, { "epoch": 0.16, "learning_rate": 4.1753574279100116e-05, "loss": 2.7814, "step": 49800 }, { "epoch": 0.17, "learning_rate": 4.1737015191307146e-05, "loss": 2.7681, "step": 49900 }, { "epoch": 0.17, "learning_rate": 4.172045610351417e-05, "loss": 2.7667, "step": 50000 }, { "epoch": 0.17, "learning_rate": 4.17038970157212e-05, "loss": 2.783, "step": 50100 }, { "epoch": 0.17, "learning_rate": 4.168733792792823e-05, "loss": 2.767, "step": 50200 }, { "epoch": 0.17, "learning_rate": 4.1670778840135254e-05, "loss": 2.7718, "step": 50300 }, { "epoch": 0.17, "learning_rate": 4.1654219752342284e-05, "loss": 2.7742, "step": 50400 }, { "epoch": 0.17, "learning_rate": 4.163766066454931e-05, "loss": 2.7618, "step": 50500 }, { "epoch": 0.17, "learning_rate": 4.1621101576756345e-05, "loss": 2.7596, "step": 50600 }, { "epoch": 0.17, "learning_rate": 4.1604542488963375e-05, "loss": 2.7777, "step": 50700 }, { "epoch": 0.17, "learning_rate": 4.15879834011704e-05, "loss": 2.7843, "step": 50800 }, { "epoch": 0.17, "learning_rate": 4.157142431337743e-05, "loss": 2.7709, "step": 50900 }, { "epoch": 0.17, "learning_rate": 4.155486522558445e-05, "loss": 2.7638, "step": 51000 }, { "epoch": 0.17, "learning_rate": 4.153830613779148e-05, "loss": 2.7725, "step": 51100 }, { "epoch": 0.17, "learning_rate": 4.152174704999851e-05, "loss": 2.7636, "step": 51200 }, { "epoch": 0.17, "learning_rate": 4.1505187962205536e-05, "loss": 2.7787, "step": 51300 }, { "epoch": 0.17, "learning_rate": 4.148862887441257e-05, "loss": 2.7779, "step": 51400 }, { "epoch": 0.17, "learning_rate": 4.14720697866196e-05, "loss": 2.7758, "step": 51500 }, { "epoch": 0.17, "learning_rate": 4.145551069882663e-05, "loss": 2.7652, "step": 51600 }, { "epoch": 0.17, "learning_rate": 4.143895161103366e-05, "loss": 2.7841, "step": 51700 }, { "epoch": 0.17, "learning_rate": 4.142239252324068e-05, "loss": 2.7681, "step": 51800 }, { "epoch": 0.17, "learning_rate": 4.140583343544771e-05, "loss": 2.7802, "step": 51900 }, { "epoch": 0.17, "learning_rate": 4.138927434765474e-05, "loss": 2.7882, "step": 52000 }, { "epoch": 0.17, "learning_rate": 4.1372715259861765e-05, "loss": 2.781, "step": 52100 }, { "epoch": 0.17, "learning_rate": 4.1356156172068796e-05, "loss": 2.7906, "step": 52200 }, { "epoch": 0.17, "learning_rate": 4.133959708427582e-05, "loss": 2.7847, "step": 52300 }, { "epoch": 0.17, "learning_rate": 4.132303799648285e-05, "loss": 2.7832, "step": 52400 }, { "epoch": 0.17, "learning_rate": 4.130647890868988e-05, "loss": 2.7637, "step": 52500 }, { "epoch": 0.17, "learning_rate": 4.128991982089691e-05, "loss": 2.7748, "step": 52600 }, { "epoch": 0.17, "learning_rate": 4.127336073310394e-05, "loss": 2.7669, "step": 52700 }, { "epoch": 0.17, "learning_rate": 4.1256801645310964e-05, "loss": 2.7726, "step": 52800 }, { "epoch": 0.18, "learning_rate": 4.1240242557517994e-05, "loss": 2.7811, "step": 52900 }, { "epoch": 0.18, "learning_rate": 4.1223683469725024e-05, "loss": 2.7769, "step": 53000 }, { "epoch": 0.18, "learning_rate": 4.120712438193205e-05, "loss": 2.7668, "step": 53100 }, { "epoch": 0.18, "learning_rate": 4.119056529413908e-05, "loss": 2.786, "step": 53200 }, { "epoch": 0.18, "learning_rate": 4.117400620634611e-05, "loss": 2.7671, "step": 53300 }, { "epoch": 0.18, "learning_rate": 4.115744711855313e-05, "loss": 2.7594, "step": 53400 }, { "epoch": 0.18, "learning_rate": 4.114088803076016e-05, "loss": 2.7735, "step": 53500 }, { "epoch": 0.18, "learning_rate": 4.112432894296719e-05, "loss": 2.7822, "step": 53600 }, { "epoch": 0.18, "learning_rate": 4.110776985517422e-05, "loss": 2.7798, "step": 53700 }, { "epoch": 0.18, "learning_rate": 4.1091210767381246e-05, "loss": 2.7783, "step": 53800 }, { "epoch": 0.18, "learning_rate": 4.107465167958828e-05, "loss": 2.7799, "step": 53900 }, { "epoch": 0.18, "learning_rate": 4.105809259179531e-05, "loss": 2.7772, "step": 54000 }, { "epoch": 0.18, "learning_rate": 4.104153350400233e-05, "loss": 2.7765, "step": 54100 }, { "epoch": 0.18, "learning_rate": 4.102497441620936e-05, "loss": 2.7779, "step": 54200 }, { "epoch": 0.18, "learning_rate": 4.100841532841639e-05, "loss": 2.7596, "step": 54300 }, { "epoch": 0.18, "learning_rate": 4.0991856240623415e-05, "loss": 2.7687, "step": 54400 }, { "epoch": 0.18, "learning_rate": 4.0975297152830445e-05, "loss": 2.7869, "step": 54500 }, { "epoch": 0.18, "learning_rate": 4.0958738065037475e-05, "loss": 2.7692, "step": 54600 }, { "epoch": 0.18, "learning_rate": 4.0942178977244506e-05, "loss": 2.7662, "step": 54700 }, { "epoch": 0.18, "learning_rate": 4.0925619889451536e-05, "loss": 2.7748, "step": 54800 }, { "epoch": 0.18, "learning_rate": 4.090906080165856e-05, "loss": 2.7703, "step": 54900 }, { "epoch": 0.18, "learning_rate": 4.089250171386559e-05, "loss": 2.7659, "step": 55000 }, { "epoch": 0.18, "learning_rate": 4.087594262607261e-05, "loss": 2.7831, "step": 55100 }, { "epoch": 0.18, "learning_rate": 4.0859383538279644e-05, "loss": 2.7628, "step": 55200 }, { "epoch": 0.18, "learning_rate": 4.0842824450486674e-05, "loss": 2.7835, "step": 55300 }, { "epoch": 0.18, "learning_rate": 4.08262653626937e-05, "loss": 2.7772, "step": 55400 }, { "epoch": 0.18, "learning_rate": 4.080970627490073e-05, "loss": 2.7799, "step": 55500 }, { "epoch": 0.18, "learning_rate": 4.079314718710776e-05, "loss": 2.8003, "step": 55600 }, { "epoch": 0.18, "learning_rate": 4.077658809931479e-05, "loss": 2.7896, "step": 55700 }, { "epoch": 0.18, "learning_rate": 4.076002901152182e-05, "loss": 2.7685, "step": 55800 }, { "epoch": 0.19, "learning_rate": 4.074346992372884e-05, "loss": 2.7851, "step": 55900 }, { "epoch": 0.19, "learning_rate": 4.072691083593587e-05, "loss": 2.78, "step": 56000 }, { "epoch": 0.19, "learning_rate": 4.07103517481429e-05, "loss": 2.7783, "step": 56100 }, { "epoch": 0.19, "learning_rate": 4.0693792660349926e-05, "loss": 2.7738, "step": 56200 }, { "epoch": 0.19, "learning_rate": 4.0677233572556957e-05, "loss": 2.7777, "step": 56300 }, { "epoch": 0.19, "learning_rate": 4.066067448476398e-05, "loss": 2.7716, "step": 56400 }, { "epoch": 0.19, "learning_rate": 4.064411539697101e-05, "loss": 2.7767, "step": 56500 }, { "epoch": 0.19, "learning_rate": 4.062755630917805e-05, "loss": 2.7845, "step": 56600 }, { "epoch": 0.19, "learning_rate": 4.061099722138507e-05, "loss": 2.7696, "step": 56700 }, { "epoch": 0.19, "learning_rate": 4.05944381335921e-05, "loss": 2.7712, "step": 56800 }, { "epoch": 0.19, "learning_rate": 4.0577879045799125e-05, "loss": 2.7781, "step": 56900 }, { "epoch": 0.19, "learning_rate": 4.0561319958006155e-05, "loss": 2.7841, "step": 57000 }, { "epoch": 0.19, "learning_rate": 4.0544760870213185e-05, "loss": 2.7655, "step": 57100 }, { "epoch": 0.19, "learning_rate": 4.052820178242021e-05, "loss": 2.7694, "step": 57200 }, { "epoch": 0.19, "learning_rate": 4.051164269462724e-05, "loss": 2.7764, "step": 57300 }, { "epoch": 0.19, "learning_rate": 4.049508360683427e-05, "loss": 2.7616, "step": 57400 }, { "epoch": 0.19, "learning_rate": 4.047852451904129e-05, "loss": 2.7696, "step": 57500 }, { "epoch": 0.19, "learning_rate": 4.046196543124833e-05, "loss": 2.7623, "step": 57600 }, { "epoch": 0.19, "learning_rate": 4.0445406343455354e-05, "loss": 2.7686, "step": 57700 }, { "epoch": 0.19, "learning_rate": 4.0428847255662384e-05, "loss": 2.7781, "step": 57800 }, { "epoch": 0.19, "learning_rate": 4.0412288167869414e-05, "loss": 2.7686, "step": 57900 }, { "epoch": 0.19, "learning_rate": 4.039572908007644e-05, "loss": 2.7831, "step": 58000 }, { "epoch": 0.19, "learning_rate": 4.037916999228347e-05, "loss": 2.7731, "step": 58100 }, { "epoch": 0.19, "learning_rate": 4.036261090449049e-05, "loss": 2.7776, "step": 58200 }, { "epoch": 0.19, "learning_rate": 4.034605181669752e-05, "loss": 2.7802, "step": 58300 }, { "epoch": 0.19, "learning_rate": 4.032949272890455e-05, "loss": 2.7728, "step": 58400 }, { "epoch": 0.19, "learning_rate": 4.0312933641111576e-05, "loss": 2.761, "step": 58500 }, { "epoch": 0.19, "learning_rate": 4.029637455331861e-05, "loss": 2.7785, "step": 58600 }, { "epoch": 0.19, "learning_rate": 4.0279815465525636e-05, "loss": 2.7761, "step": 58700 }, { "epoch": 0.19, "learning_rate": 4.0263256377732667e-05, "loss": 2.76, "step": 58800 }, { "epoch": 0.2, "learning_rate": 4.02466972899397e-05, "loss": 2.7783, "step": 58900 }, { "epoch": 0.2, "learning_rate": 4.023013820214672e-05, "loss": 2.7653, "step": 59000 }, { "epoch": 0.2, "learning_rate": 4.021357911435375e-05, "loss": 2.7826, "step": 59100 }, { "epoch": 0.2, "learning_rate": 4.019702002656078e-05, "loss": 2.7748, "step": 59200 }, { "epoch": 0.2, "learning_rate": 4.0180460938767805e-05, "loss": 2.7869, "step": 59300 }, { "epoch": 0.2, "learning_rate": 4.0163901850974835e-05, "loss": 2.7672, "step": 59400 }, { "epoch": 0.2, "learning_rate": 4.014734276318186e-05, "loss": 2.7845, "step": 59500 }, { "epoch": 0.2, "learning_rate": 4.0130783675388895e-05, "loss": 2.7661, "step": 59600 }, { "epoch": 0.2, "learning_rate": 4.011422458759592e-05, "loss": 2.7639, "step": 59700 }, { "epoch": 0.2, "learning_rate": 4.009766549980295e-05, "loss": 2.7762, "step": 59800 }, { "epoch": 0.2, "learning_rate": 4.008110641200998e-05, "loss": 2.7801, "step": 59900 }, { "epoch": 0.2, "learning_rate": 4.0064547324217e-05, "loss": 2.7627, "step": 60000 }, { "epoch": 0.2, "learning_rate": 4.004798823642403e-05, "loss": 2.7793, "step": 60100 }, { "epoch": 0.2, "learning_rate": 4.0031429148631064e-05, "loss": 2.7856, "step": 60200 }, { "epoch": 0.2, "learning_rate": 4.001487006083809e-05, "loss": 2.784, "step": 60300 }, { "epoch": 0.2, "learning_rate": 3.999831097304512e-05, "loss": 2.7667, "step": 60400 }, { "epoch": 0.2, "learning_rate": 3.998175188525215e-05, "loss": 2.777, "step": 60500 }, { "epoch": 0.2, "learning_rate": 3.996519279745918e-05, "loss": 2.7757, "step": 60600 }, { "epoch": 0.2, "learning_rate": 3.994863370966621e-05, "loss": 2.7821, "step": 60700 }, { "epoch": 0.2, "learning_rate": 3.993207462187323e-05, "loss": 2.7787, "step": 60800 }, { "epoch": 0.2, "learning_rate": 3.991551553408026e-05, "loss": 2.7794, "step": 60900 }, { "epoch": 0.2, "learning_rate": 3.9898956446287286e-05, "loss": 2.7627, "step": 61000 }, { "epoch": 0.2, "learning_rate": 3.9882397358494316e-05, "loss": 2.7562, "step": 61100 }, { "epoch": 0.2, "learning_rate": 3.9865838270701346e-05, "loss": 2.7877, "step": 61200 }, { "epoch": 0.2, "learning_rate": 3.984927918290837e-05, "loss": 2.7775, "step": 61300 }, { "epoch": 0.2, "learning_rate": 3.98327200951154e-05, "loss": 2.7629, "step": 61400 }, { "epoch": 0.2, "learning_rate": 3.981616100732243e-05, "loss": 2.7684, "step": 61500 }, { "epoch": 0.2, "learning_rate": 3.979960191952946e-05, "loss": 2.7833, "step": 61600 }, { "epoch": 0.2, "learning_rate": 3.978304283173649e-05, "loss": 2.7686, "step": 61700 }, { "epoch": 0.2, "learning_rate": 3.9766483743943515e-05, "loss": 2.7748, "step": 61800 }, { "epoch": 0.21, "learning_rate": 3.9749924656150545e-05, "loss": 2.7687, "step": 61900 }, { "epoch": 0.21, "learning_rate": 3.9733365568357575e-05, "loss": 2.7628, "step": 62000 }, { "epoch": 0.21, "learning_rate": 3.97168064805646e-05, "loss": 2.7854, "step": 62100 }, { "epoch": 0.21, "learning_rate": 3.970024739277163e-05, "loss": 2.7701, "step": 62200 }, { "epoch": 0.21, "learning_rate": 3.968368830497865e-05, "loss": 2.7823, "step": 62300 }, { "epoch": 0.21, "learning_rate": 3.966712921718568e-05, "loss": 2.7639, "step": 62400 }, { "epoch": 0.21, "learning_rate": 3.965057012939271e-05, "loss": 2.7754, "step": 62500 }, { "epoch": 0.21, "learning_rate": 3.9634011041599743e-05, "loss": 2.7737, "step": 62600 }, { "epoch": 0.21, "learning_rate": 3.9617451953806774e-05, "loss": 2.7724, "step": 62700 }, { "epoch": 0.21, "learning_rate": 3.96008928660138e-05, "loss": 2.7747, "step": 62800 }, { "epoch": 0.21, "learning_rate": 3.958433377822083e-05, "loss": 2.7731, "step": 62900 }, { "epoch": 0.21, "learning_rate": 3.956777469042786e-05, "loss": 2.7666, "step": 63000 }, { "epoch": 0.21, "learning_rate": 3.955121560263488e-05, "loss": 2.7722, "step": 63100 }, { "epoch": 0.21, "learning_rate": 3.953465651484191e-05, "loss": 2.7725, "step": 63200 }, { "epoch": 0.21, "learning_rate": 3.951809742704894e-05, "loss": 2.7758, "step": 63300 }, { "epoch": 0.21, "learning_rate": 3.9501538339255965e-05, "loss": 2.7809, "step": 63400 }, { "epoch": 0.21, "learning_rate": 3.9484979251462996e-05, "loss": 2.785, "step": 63500 }, { "epoch": 0.21, "learning_rate": 3.9468420163670026e-05, "loss": 2.7749, "step": 63600 }, { "epoch": 0.21, "learning_rate": 3.9451861075877056e-05, "loss": 2.7759, "step": 63700 }, { "epoch": 0.21, "learning_rate": 3.943530198808409e-05, "loss": 2.7773, "step": 63800 }, { "epoch": 0.21, "learning_rate": 3.941874290029111e-05, "loss": 2.7561, "step": 63900 }, { "epoch": 0.21, "learning_rate": 3.940218381249814e-05, "loss": 2.7739, "step": 64000 }, { "epoch": 0.21, "learning_rate": 3.9385624724705164e-05, "loss": 2.7602, "step": 64100 }, { "epoch": 0.21, "learning_rate": 3.9369065636912194e-05, "loss": 2.7807, "step": 64200 }, { "epoch": 0.21, "learning_rate": 3.9352506549119225e-05, "loss": 2.7672, "step": 64300 }, { "epoch": 0.21, "learning_rate": 3.933594746132625e-05, "loss": 2.7706, "step": 64400 }, { "epoch": 0.21, "learning_rate": 3.931938837353328e-05, "loss": 2.7772, "step": 64500 }, { "epoch": 0.21, "learning_rate": 3.930282928574031e-05, "loss": 2.7707, "step": 64600 }, { "epoch": 0.21, "learning_rate": 3.928627019794734e-05, "loss": 2.7742, "step": 64700 }, { "epoch": 0.21, "learning_rate": 3.926971111015437e-05, "loss": 2.7632, "step": 64800 }, { "epoch": 0.21, "learning_rate": 3.925315202236139e-05, "loss": 2.7765, "step": 64900 }, { "epoch": 0.22, "learning_rate": 3.923659293456842e-05, "loss": 2.7612, "step": 65000 }, { "epoch": 0.22, "learning_rate": 3.9220033846775453e-05, "loss": 2.7697, "step": 65100 }, { "epoch": 0.22, "learning_rate": 3.920347475898248e-05, "loss": 2.7762, "step": 65200 }, { "epoch": 0.22, "learning_rate": 3.918691567118951e-05, "loss": 2.7635, "step": 65300 }, { "epoch": 0.22, "learning_rate": 3.917035658339653e-05, "loss": 2.7821, "step": 65400 }, { "epoch": 0.22, "learning_rate": 3.915379749560356e-05, "loss": 2.7695, "step": 65500 }, { "epoch": 0.22, "learning_rate": 3.913723840781059e-05, "loss": 2.7734, "step": 65600 }, { "epoch": 0.22, "learning_rate": 3.912067932001762e-05, "loss": 2.7684, "step": 65700 }, { "epoch": 0.22, "learning_rate": 3.910412023222465e-05, "loss": 2.7958, "step": 65800 }, { "epoch": 0.22, "learning_rate": 3.9087561144431676e-05, "loss": 2.7738, "step": 65900 }, { "epoch": 0.22, "learning_rate": 3.9071002056638706e-05, "loss": 2.7866, "step": 66000 }, { "epoch": 0.22, "learning_rate": 3.9054442968845736e-05, "loss": 2.7766, "step": 66100 }, { "epoch": 0.22, "learning_rate": 3.903788388105276e-05, "loss": 2.7637, "step": 66200 }, { "epoch": 0.22, "learning_rate": 3.902132479325979e-05, "loss": 2.7692, "step": 66300 }, { "epoch": 0.22, "learning_rate": 3.900476570546682e-05, "loss": 2.7877, "step": 66400 }, { "epoch": 0.22, "learning_rate": 3.8988206617673844e-05, "loss": 2.778, "step": 66500 }, { "epoch": 0.22, "learning_rate": 3.897164752988088e-05, "loss": 2.767, "step": 66600 }, { "epoch": 0.22, "learning_rate": 3.8955088442087904e-05, "loss": 2.7792, "step": 66700 }, { "epoch": 0.22, "learning_rate": 3.8938529354294935e-05, "loss": 2.7619, "step": 66800 }, { "epoch": 0.22, "learning_rate": 3.892197026650196e-05, "loss": 2.7858, "step": 66900 }, { "epoch": 0.22, "learning_rate": 3.890541117870899e-05, "loss": 2.7774, "step": 67000 }, { "epoch": 0.22, "learning_rate": 3.888885209091602e-05, "loss": 2.78, "step": 67100 }, { "epoch": 0.22, "learning_rate": 3.887229300312304e-05, "loss": 2.77, "step": 67200 }, { "epoch": 0.22, "learning_rate": 3.885573391533007e-05, "loss": 2.7834, "step": 67300 }, { "epoch": 0.22, "learning_rate": 3.88391748275371e-05, "loss": 2.7817, "step": 67400 }, { "epoch": 0.22, "learning_rate": 3.8822615739744126e-05, "loss": 2.7656, "step": 67500 }, { "epoch": 0.22, "learning_rate": 3.8806056651951164e-05, "loss": 2.7873, "step": 67600 }, { "epoch": 0.22, "learning_rate": 3.878949756415819e-05, "loss": 2.7568, "step": 67700 }, { "epoch": 0.22, "learning_rate": 3.877293847636522e-05, "loss": 2.7832, "step": 67800 }, { "epoch": 0.22, "learning_rate": 3.875637938857225e-05, "loss": 2.7802, "step": 67900 }, { "epoch": 0.23, "learning_rate": 3.873982030077927e-05, "loss": 2.7669, "step": 68000 }, { "epoch": 0.23, "learning_rate": 3.87232612129863e-05, "loss": 2.7748, "step": 68100 }, { "epoch": 0.23, "learning_rate": 3.8706702125193325e-05, "loss": 2.767, "step": 68200 }, { "epoch": 0.23, "learning_rate": 3.8690143037400355e-05, "loss": 2.7727, "step": 68300 }, { "epoch": 0.23, "learning_rate": 3.8673583949607386e-05, "loss": 2.7725, "step": 68400 }, { "epoch": 0.23, "learning_rate": 3.865702486181441e-05, "loss": 2.7773, "step": 68500 }, { "epoch": 0.23, "learning_rate": 3.8640465774021446e-05, "loss": 2.7834, "step": 68600 }, { "epoch": 0.23, "learning_rate": 3.862390668622847e-05, "loss": 2.7777, "step": 68700 }, { "epoch": 0.23, "learning_rate": 3.86073475984355e-05, "loss": 2.7757, "step": 68800 }, { "epoch": 0.23, "learning_rate": 3.859078851064253e-05, "loss": 2.7624, "step": 68900 }, { "epoch": 0.23, "learning_rate": 3.8574229422849554e-05, "loss": 2.7765, "step": 69000 }, { "epoch": 0.23, "learning_rate": 3.8557670335056584e-05, "loss": 2.7782, "step": 69100 }, { "epoch": 0.23, "learning_rate": 3.8541111247263614e-05, "loss": 2.7757, "step": 69200 }, { "epoch": 0.23, "learning_rate": 3.852455215947064e-05, "loss": 2.775, "step": 69300 }, { "epoch": 0.23, "learning_rate": 3.850799307167767e-05, "loss": 2.7646, "step": 69400 }, { "epoch": 0.23, "learning_rate": 3.849143398388469e-05, "loss": 2.7669, "step": 69500 }, { "epoch": 0.23, "learning_rate": 3.847487489609173e-05, "loss": 2.7643, "step": 69600 }, { "epoch": 0.23, "learning_rate": 3.845831580829876e-05, "loss": 2.7815, "step": 69700 }, { "epoch": 0.23, "learning_rate": 3.844175672050578e-05, "loss": 2.7665, "step": 69800 }, { "epoch": 0.23, "learning_rate": 3.842519763271281e-05, "loss": 2.7642, "step": 69900 }, { "epoch": 0.23, "learning_rate": 3.8408638544919837e-05, "loss": 2.7747, "step": 70000 }, { "epoch": 0.23, "learning_rate": 3.839207945712687e-05, "loss": 2.7862, "step": 70100 }, { "epoch": 0.23, "learning_rate": 3.83755203693339e-05, "loss": 2.755, "step": 70200 }, { "epoch": 0.23, "learning_rate": 3.835896128154092e-05, "loss": 2.7713, "step": 70300 }, { "epoch": 0.23, "learning_rate": 3.834240219374795e-05, "loss": 2.7665, "step": 70400 }, { "epoch": 0.23, "learning_rate": 3.832584310595498e-05, "loss": 2.7831, "step": 70500 }, { "epoch": 0.23, "learning_rate": 3.830928401816201e-05, "loss": 2.774, "step": 70600 }, { "epoch": 0.23, "learning_rate": 3.829272493036904e-05, "loss": 2.7802, "step": 70700 }, { "epoch": 0.23, "learning_rate": 3.8276165842576065e-05, "loss": 2.7653, "step": 70800 }, { "epoch": 0.23, "learning_rate": 3.8259606754783096e-05, "loss": 2.7576, "step": 70900 }, { "epoch": 0.24, "learning_rate": 3.8243047666990126e-05, "loss": 2.7891, "step": 71000 }, { "epoch": 0.24, "learning_rate": 3.822648857919715e-05, "loss": 2.7715, "step": 71100 }, { "epoch": 0.24, "learning_rate": 3.820992949140418e-05, "loss": 2.764, "step": 71200 }, { "epoch": 0.24, "learning_rate": 3.81933704036112e-05, "loss": 2.7699, "step": 71300 }, { "epoch": 0.24, "learning_rate": 3.8176811315818234e-05, "loss": 2.7682, "step": 71400 }, { "epoch": 0.24, "learning_rate": 3.8160252228025264e-05, "loss": 2.7829, "step": 71500 }, { "epoch": 0.24, "learning_rate": 3.8143693140232294e-05, "loss": 2.7736, "step": 71600 }, { "epoch": 0.24, "learning_rate": 3.8127134052439325e-05, "loss": 2.789, "step": 71700 }, { "epoch": 0.24, "learning_rate": 3.811057496464635e-05, "loss": 2.7784, "step": 71800 }, { "epoch": 0.24, "learning_rate": 3.809401587685338e-05, "loss": 2.7782, "step": 71900 }, { "epoch": 0.24, "learning_rate": 3.807745678906041e-05, "loss": 2.776, "step": 72000 }, { "epoch": 0.24, "learning_rate": 3.806089770126743e-05, "loss": 2.7795, "step": 72100 }, { "epoch": 0.24, "learning_rate": 3.804433861347446e-05, "loss": 2.7474, "step": 72200 }, { "epoch": 0.24, "learning_rate": 3.802777952568149e-05, "loss": 2.7784, "step": 72300 }, { "epoch": 0.24, "learning_rate": 3.8011220437888516e-05, "loss": 2.7837, "step": 72400 }, { "epoch": 0.24, "learning_rate": 3.7994661350095547e-05, "loss": 2.7615, "step": 72500 }, { "epoch": 0.24, "learning_rate": 3.797810226230258e-05, "loss": 2.7742, "step": 72600 }, { "epoch": 0.24, "learning_rate": 3.796154317450961e-05, "loss": 2.7735, "step": 72700 }, { "epoch": 0.24, "learning_rate": 3.794498408671663e-05, "loss": 2.7739, "step": 72800 }, { "epoch": 0.24, "learning_rate": 3.792842499892366e-05, "loss": 2.7716, "step": 72900 }, { "epoch": 0.24, "learning_rate": 3.791186591113069e-05, "loss": 2.7806, "step": 73000 }, { "epoch": 0.24, "learning_rate": 3.7895306823337715e-05, "loss": 2.7703, "step": 73100 }, { "epoch": 0.24, "learning_rate": 3.7878747735544745e-05, "loss": 2.7753, "step": 73200 }, { "epoch": 0.24, "learning_rate": 3.7862188647751775e-05, "loss": 2.7746, "step": 73300 }, { "epoch": 0.24, "learning_rate": 3.78456295599588e-05, "loss": 2.7836, "step": 73400 }, { "epoch": 0.24, "learning_rate": 3.782907047216583e-05, "loss": 2.7821, "step": 73500 }, { "epoch": 0.24, "learning_rate": 3.781251138437286e-05, "loss": 2.7711, "step": 73600 }, { "epoch": 0.24, "learning_rate": 3.779595229657989e-05, "loss": 2.762, "step": 73700 }, { "epoch": 0.24, "learning_rate": 3.777939320878692e-05, "loss": 2.7759, "step": 73800 }, { "epoch": 0.24, "learning_rate": 3.7762834120993944e-05, "loss": 2.7617, "step": 73900 }, { "epoch": 0.25, "learning_rate": 3.7746275033200974e-05, "loss": 2.7818, "step": 74000 }, { "epoch": 0.25, "learning_rate": 3.7729715945408e-05, "loss": 2.7662, "step": 74100 }, { "epoch": 0.25, "learning_rate": 3.771315685761503e-05, "loss": 2.783, "step": 74200 }, { "epoch": 0.25, "learning_rate": 3.769659776982206e-05, "loss": 2.773, "step": 74300 }, { "epoch": 0.25, "learning_rate": 3.768003868202908e-05, "loss": 2.7712, "step": 74400 }, { "epoch": 0.25, "learning_rate": 3.766347959423611e-05, "loss": 2.7709, "step": 74500 }, { "epoch": 0.25, "learning_rate": 3.764692050644314e-05, "loss": 2.7641, "step": 74600 }, { "epoch": 0.25, "learning_rate": 3.763036141865017e-05, "loss": 2.7659, "step": 74700 }, { "epoch": 0.25, "learning_rate": 3.76138023308572e-05, "loss": 2.7594, "step": 74800 }, { "epoch": 0.25, "learning_rate": 3.7597243243064226e-05, "loss": 2.7561, "step": 74900 }, { "epoch": 0.25, "learning_rate": 3.758068415527126e-05, "loss": 2.7747, "step": 75000 }, { "epoch": 0.25, "learning_rate": 3.756412506747829e-05, "loss": 2.7664, "step": 75100 }, { "epoch": 0.25, "learning_rate": 3.754756597968531e-05, "loss": 2.7907, "step": 75200 }, { "epoch": 0.25, "learning_rate": 3.753100689189234e-05, "loss": 2.7689, "step": 75300 }, { "epoch": 0.25, "learning_rate": 3.7514447804099364e-05, "loss": 2.7684, "step": 75400 }, { "epoch": 0.25, "learning_rate": 3.7497888716306395e-05, "loss": 2.7677, "step": 75500 }, { "epoch": 0.25, "learning_rate": 3.748132962851343e-05, "loss": 2.7735, "step": 75600 }, { "epoch": 0.25, "learning_rate": 3.7464770540720455e-05, "loss": 2.757, "step": 75700 }, { "epoch": 0.25, "learning_rate": 3.7448211452927485e-05, "loss": 2.7717, "step": 75800 }, { "epoch": 0.25, "learning_rate": 3.743165236513451e-05, "loss": 2.7637, "step": 75900 }, { "epoch": 0.25, "learning_rate": 3.741509327734154e-05, "loss": 2.7833, "step": 76000 }, { "epoch": 0.25, "learning_rate": 3.739853418954857e-05, "loss": 2.7627, "step": 76100 }, { "epoch": 0.25, "learning_rate": 3.738197510175559e-05, "loss": 2.7698, "step": 76200 }, { "epoch": 0.25, "learning_rate": 3.7365416013962623e-05, "loss": 2.7685, "step": 76300 }, { "epoch": 0.25, "learning_rate": 3.7348856926169654e-05, "loss": 2.7677, "step": 76400 }, { "epoch": 0.25, "learning_rate": 3.733229783837668e-05, "loss": 2.7688, "step": 76500 }, { "epoch": 0.25, "learning_rate": 3.7315738750583714e-05, "loss": 2.7634, "step": 76600 }, { "epoch": 0.25, "learning_rate": 3.729917966279074e-05, "loss": 2.7738, "step": 76700 }, { "epoch": 0.25, "learning_rate": 3.728262057499777e-05, "loss": 2.7739, "step": 76800 }, { "epoch": 0.25, "learning_rate": 3.72660614872048e-05, "loss": 2.7631, "step": 76900 }, { "epoch": 0.26, "learning_rate": 3.724950239941182e-05, "loss": 2.7487, "step": 77000 }, { "epoch": 0.26, "learning_rate": 3.723294331161885e-05, "loss": 2.7657, "step": 77100 }, { "epoch": 0.26, "learning_rate": 3.7216384223825876e-05, "loss": 2.7645, "step": 77200 }, { "epoch": 0.26, "learning_rate": 3.7199825136032906e-05, "loss": 2.7838, "step": 77300 }, { "epoch": 0.26, "learning_rate": 3.7183266048239936e-05, "loss": 2.7717, "step": 77400 }, { "epoch": 0.26, "learning_rate": 3.716670696044696e-05, "loss": 2.7816, "step": 77500 }, { "epoch": 0.26, "learning_rate": 3.7150147872654e-05, "loss": 2.7741, "step": 77600 }, { "epoch": 0.26, "learning_rate": 3.713358878486102e-05, "loss": 2.7727, "step": 77700 }, { "epoch": 0.26, "learning_rate": 3.711702969706805e-05, "loss": 2.7721, "step": 77800 }, { "epoch": 0.26, "learning_rate": 3.710047060927508e-05, "loss": 2.7768, "step": 77900 }, { "epoch": 0.26, "learning_rate": 3.7083911521482105e-05, "loss": 2.7654, "step": 78000 }, { "epoch": 0.26, "learning_rate": 3.7067352433689135e-05, "loss": 2.7549, "step": 78100 }, { "epoch": 0.26, "learning_rate": 3.7050793345896165e-05, "loss": 2.7627, "step": 78200 }, { "epoch": 0.26, "learning_rate": 3.703423425810319e-05, "loss": 2.7784, "step": 78300 }, { "epoch": 0.26, "learning_rate": 3.701767517031022e-05, "loss": 2.7635, "step": 78400 }, { "epoch": 0.26, "learning_rate": 3.700111608251724e-05, "loss": 2.7707, "step": 78500 }, { "epoch": 0.26, "learning_rate": 3.698455699472428e-05, "loss": 2.7545, "step": 78600 }, { "epoch": 0.26, "learning_rate": 3.69679979069313e-05, "loss": 2.7658, "step": 78700 }, { "epoch": 0.26, "learning_rate": 3.6951438819138333e-05, "loss": 2.7701, "step": 78800 }, { "epoch": 0.26, "learning_rate": 3.6934879731345364e-05, "loss": 2.7816, "step": 78900 }, { "epoch": 0.26, "learning_rate": 3.691832064355239e-05, "loss": 2.771, "step": 79000 }, { "epoch": 0.26, "learning_rate": 3.690176155575942e-05, "loss": 2.7745, "step": 79100 }, { "epoch": 0.26, "learning_rate": 3.688520246796645e-05, "loss": 2.7756, "step": 79200 }, { "epoch": 0.26, "learning_rate": 3.686864338017347e-05, "loss": 2.7655, "step": 79300 }, { "epoch": 0.26, "learning_rate": 3.68520842923805e-05, "loss": 2.7838, "step": 79400 }, { "epoch": 0.26, "learning_rate": 3.683552520458753e-05, "loss": 2.7654, "step": 79500 }, { "epoch": 0.26, "learning_rate": 3.681896611679456e-05, "loss": 2.7728, "step": 79600 }, { "epoch": 0.26, "learning_rate": 3.680240702900159e-05, "loss": 2.7612, "step": 79700 }, { "epoch": 0.26, "learning_rate": 3.6785847941208616e-05, "loss": 2.7618, "step": 79800 }, { "epoch": 0.26, "learning_rate": 3.6769288853415646e-05, "loss": 2.7593, "step": 79900 }, { "epoch": 0.26, "learning_rate": 3.675272976562267e-05, "loss": 2.7689, "step": 80000 }, { "epoch": 0.27, "learning_rate": 3.67361706778297e-05, "loss": 2.7535, "step": 80100 }, { "epoch": 0.27, "learning_rate": 3.671961159003673e-05, "loss": 2.7723, "step": 80200 }, { "epoch": 0.27, "learning_rate": 3.6703052502243754e-05, "loss": 2.759, "step": 80300 }, { "epoch": 0.27, "learning_rate": 3.6686493414450784e-05, "loss": 2.7656, "step": 80400 }, { "epoch": 0.27, "learning_rate": 3.6669934326657815e-05, "loss": 2.7597, "step": 80500 }, { "epoch": 0.27, "learning_rate": 3.6653375238864845e-05, "loss": 2.7767, "step": 80600 }, { "epoch": 0.27, "learning_rate": 3.6636816151071875e-05, "loss": 2.7654, "step": 80700 }, { "epoch": 0.27, "learning_rate": 3.66202570632789e-05, "loss": 2.7803, "step": 80800 }, { "epoch": 0.27, "learning_rate": 3.660369797548593e-05, "loss": 2.7708, "step": 80900 }, { "epoch": 0.27, "learning_rate": 3.658713888769296e-05, "loss": 2.7643, "step": 81000 }, { "epoch": 0.27, "learning_rate": 3.657057979989998e-05, "loss": 2.7694, "step": 81100 }, { "epoch": 0.27, "learning_rate": 3.655402071210701e-05, "loss": 2.7566, "step": 81200 }, { "epoch": 0.27, "learning_rate": 3.653746162431404e-05, "loss": 2.7798, "step": 81300 }, { "epoch": 0.27, "learning_rate": 3.652090253652107e-05, "loss": 2.7593, "step": 81400 }, { "epoch": 0.27, "learning_rate": 3.65043434487281e-05, "loss": 2.7631, "step": 81500 }, { "epoch": 0.27, "learning_rate": 3.648778436093513e-05, "loss": 2.7619, "step": 81600 }, { "epoch": 0.27, "learning_rate": 3.647122527314216e-05, "loss": 2.7562, "step": 81700 }, { "epoch": 0.27, "learning_rate": 3.645466618534918e-05, "loss": 2.7616, "step": 81800 }, { "epoch": 0.27, "learning_rate": 3.643810709755621e-05, "loss": 2.7752, "step": 81900 }, { "epoch": 0.27, "learning_rate": 3.642154800976324e-05, "loss": 2.7622, "step": 82000 }, { "epoch": 0.27, "learning_rate": 3.6404988921970266e-05, "loss": 2.7738, "step": 82100 }, { "epoch": 0.27, "learning_rate": 3.6388429834177296e-05, "loss": 2.7691, "step": 82200 }, { "epoch": 0.27, "learning_rate": 3.6371870746384326e-05, "loss": 2.7709, "step": 82300 }, { "epoch": 0.27, "learning_rate": 3.635531165859135e-05, "loss": 2.789, "step": 82400 }, { "epoch": 0.27, "learning_rate": 3.633875257079838e-05, "loss": 2.7553, "step": 82500 }, { "epoch": 0.27, "learning_rate": 3.632219348300541e-05, "loss": 2.7778, "step": 82600 }, { "epoch": 0.27, "learning_rate": 3.630563439521244e-05, "loss": 2.7791, "step": 82700 }, { "epoch": 0.27, "learning_rate": 3.628907530741947e-05, "loss": 2.777, "step": 82800 }, { "epoch": 0.27, "learning_rate": 3.6272516219626494e-05, "loss": 2.7583, "step": 82900 }, { "epoch": 0.27, "learning_rate": 3.6255957131833525e-05, "loss": 2.7702, "step": 83000 }, { "epoch": 0.28, "learning_rate": 3.623939804404055e-05, "loss": 2.778, "step": 83100 }, { "epoch": 0.28, "learning_rate": 3.622283895624758e-05, "loss": 2.7604, "step": 83200 }, { "epoch": 0.28, "learning_rate": 3.620627986845461e-05, "loss": 2.7674, "step": 83300 }, { "epoch": 0.28, "learning_rate": 3.618972078066163e-05, "loss": 2.781, "step": 83400 }, { "epoch": 0.28, "learning_rate": 3.617316169286866e-05, "loss": 2.7616, "step": 83500 }, { "epoch": 0.28, "learning_rate": 3.615660260507569e-05, "loss": 2.7769, "step": 83600 }, { "epoch": 0.28, "learning_rate": 3.614004351728272e-05, "loss": 2.7688, "step": 83700 }, { "epoch": 0.28, "learning_rate": 3.6123484429489754e-05, "loss": 2.7652, "step": 83800 }, { "epoch": 0.28, "learning_rate": 3.610692534169678e-05, "loss": 2.7685, "step": 83900 }, { "epoch": 0.28, "learning_rate": 3.609036625390381e-05, "loss": 2.7694, "step": 84000 }, { "epoch": 0.28, "learning_rate": 3.607380716611084e-05, "loss": 2.7786, "step": 84100 }, { "epoch": 0.28, "learning_rate": 3.605724807831786e-05, "loss": 2.7719, "step": 84200 }, { "epoch": 0.28, "learning_rate": 3.604068899052489e-05, "loss": 2.7769, "step": 84300 }, { "epoch": 0.28, "learning_rate": 3.6024129902731915e-05, "loss": 2.7651, "step": 84400 }, { "epoch": 0.28, "learning_rate": 3.6007570814938945e-05, "loss": 2.7535, "step": 84500 }, { "epoch": 0.28, "learning_rate": 3.5991011727145976e-05, "loss": 2.757, "step": 84600 }, { "epoch": 0.28, "learning_rate": 3.5974452639353006e-05, "loss": 2.7571, "step": 84700 }, { "epoch": 0.28, "learning_rate": 3.5957893551560036e-05, "loss": 2.7545, "step": 84800 }, { "epoch": 0.28, "learning_rate": 3.594133446376706e-05, "loss": 2.7571, "step": 84900 }, { "epoch": 0.28, "learning_rate": 3.592477537597409e-05, "loss": 2.7558, "step": 85000 }, { "epoch": 0.28, "learning_rate": 3.590821628818112e-05, "loss": 2.7639, "step": 85100 }, { "epoch": 0.28, "learning_rate": 3.5891657200388144e-05, "loss": 2.7656, "step": 85200 }, { "epoch": 0.28, "learning_rate": 3.5875098112595174e-05, "loss": 2.7654, "step": 85300 }, { "epoch": 0.28, "learning_rate": 3.5858539024802204e-05, "loss": 2.7549, "step": 85400 }, { "epoch": 0.28, "learning_rate": 3.584197993700923e-05, "loss": 2.762, "step": 85500 }, { "epoch": 0.28, "learning_rate": 3.5825420849216265e-05, "loss": 2.7716, "step": 85600 }, { "epoch": 0.28, "learning_rate": 3.580886176142329e-05, "loss": 2.7848, "step": 85700 }, { "epoch": 0.28, "learning_rate": 3.579230267363032e-05, "loss": 2.7675, "step": 85800 }, { "epoch": 0.28, "learning_rate": 3.577574358583734e-05, "loss": 2.7798, "step": 85900 }, { "epoch": 0.28, "learning_rate": 3.575918449804437e-05, "loss": 2.7544, "step": 86000 }, { "epoch": 0.29, "learning_rate": 3.57426254102514e-05, "loss": 2.7634, "step": 86100 }, { "epoch": 0.29, "learning_rate": 3.5726066322458427e-05, "loss": 2.7796, "step": 86200 }, { "epoch": 0.29, "learning_rate": 3.570950723466546e-05, "loss": 2.7618, "step": 86300 }, { "epoch": 0.29, "learning_rate": 3.569294814687249e-05, "loss": 2.7575, "step": 86400 }, { "epoch": 0.29, "learning_rate": 3.567638905907951e-05, "loss": 2.7789, "step": 86500 }, { "epoch": 0.29, "learning_rate": 3.565982997128655e-05, "loss": 2.7719, "step": 86600 }, { "epoch": 0.29, "learning_rate": 3.564327088349357e-05, "loss": 2.7506, "step": 86700 }, { "epoch": 0.29, "learning_rate": 3.56267117957006e-05, "loss": 2.757, "step": 86800 }, { "epoch": 0.29, "learning_rate": 3.561015270790763e-05, "loss": 2.7716, "step": 86900 }, { "epoch": 0.29, "learning_rate": 3.5593593620114655e-05, "loss": 2.7676, "step": 87000 }, { "epoch": 0.29, "learning_rate": 3.5577034532321686e-05, "loss": 2.7605, "step": 87100 }, { "epoch": 0.29, "learning_rate": 3.556047544452871e-05, "loss": 2.7766, "step": 87200 }, { "epoch": 0.29, "learning_rate": 3.554391635673574e-05, "loss": 2.7602, "step": 87300 }, { "epoch": 0.29, "learning_rate": 3.552735726894277e-05, "loss": 2.7676, "step": 87400 }, { "epoch": 0.29, "learning_rate": 3.551079818114979e-05, "loss": 2.7678, "step": 87500 }, { "epoch": 0.29, "learning_rate": 3.549423909335683e-05, "loss": 2.7571, "step": 87600 }, { "epoch": 0.29, "learning_rate": 3.5477680005563854e-05, "loss": 2.7761, "step": 87700 }, { "epoch": 0.29, "learning_rate": 3.5461120917770884e-05, "loss": 2.7722, "step": 87800 }, { "epoch": 0.29, "learning_rate": 3.5444561829977915e-05, "loss": 2.7728, "step": 87900 }, { "epoch": 0.29, "learning_rate": 3.542800274218494e-05, "loss": 2.7666, "step": 88000 }, { "epoch": 0.29, "learning_rate": 3.541144365439197e-05, "loss": 2.7674, "step": 88100 }, { "epoch": 0.29, "learning_rate": 3.5394884566599e-05, "loss": 2.7665, "step": 88200 }, { "epoch": 0.29, "learning_rate": 3.537832547880602e-05, "loss": 2.7498, "step": 88300 }, { "epoch": 0.29, "learning_rate": 3.536176639101305e-05, "loss": 2.7735, "step": 88400 }, { "epoch": 0.29, "learning_rate": 3.5345207303220076e-05, "loss": 2.7572, "step": 88500 }, { "epoch": 0.29, "learning_rate": 3.532864821542711e-05, "loss": 2.7603, "step": 88600 }, { "epoch": 0.29, "learning_rate": 3.5312089127634143e-05, "loss": 2.7502, "step": 88700 }, { "epoch": 0.29, "learning_rate": 3.529553003984117e-05, "loss": 2.774, "step": 88800 }, { "epoch": 0.29, "learning_rate": 3.52789709520482e-05, "loss": 2.786, "step": 88900 }, { "epoch": 0.29, "learning_rate": 3.526241186425522e-05, "loss": 2.7827, "step": 89000 }, { "epoch": 0.3, "learning_rate": 3.524585277646225e-05, "loss": 2.7652, "step": 89100 }, { "epoch": 0.3, "learning_rate": 3.522929368866928e-05, "loss": 2.7652, "step": 89200 }, { "epoch": 0.3, "learning_rate": 3.5212734600876305e-05, "loss": 2.7619, "step": 89300 }, { "epoch": 0.3, "learning_rate": 3.5196175513083335e-05, "loss": 2.7673, "step": 89400 }, { "epoch": 0.3, "learning_rate": 3.5179616425290365e-05, "loss": 2.7625, "step": 89500 }, { "epoch": 0.3, "learning_rate": 3.5163057337497396e-05, "loss": 2.7567, "step": 89600 }, { "epoch": 0.3, "learning_rate": 3.5146498249704426e-05, "loss": 2.7665, "step": 89700 }, { "epoch": 0.3, "learning_rate": 3.512993916191145e-05, "loss": 2.7669, "step": 89800 }, { "epoch": 0.3, "learning_rate": 3.511338007411848e-05, "loss": 2.7634, "step": 89900 }, { "epoch": 0.3, "learning_rate": 3.509682098632551e-05, "loss": 2.7788, "step": 90000 }, { "epoch": 0.3, "learning_rate": 3.5080261898532534e-05, "loss": 2.7633, "step": 90100 }, { "epoch": 0.3, "learning_rate": 3.5063702810739564e-05, "loss": 2.7672, "step": 90200 }, { "epoch": 0.3, "learning_rate": 3.504714372294659e-05, "loss": 2.7642, "step": 90300 }, { "epoch": 0.3, "learning_rate": 3.503058463515362e-05, "loss": 2.7647, "step": 90400 }, { "epoch": 0.3, "learning_rate": 3.501402554736065e-05, "loss": 2.7723, "step": 90500 }, { "epoch": 0.3, "learning_rate": 3.499746645956768e-05, "loss": 2.7747, "step": 90600 }, { "epoch": 0.3, "learning_rate": 3.498090737177471e-05, "loss": 2.7588, "step": 90700 }, { "epoch": 0.3, "learning_rate": 3.496434828398173e-05, "loss": 2.7673, "step": 90800 }, { "epoch": 0.3, "learning_rate": 3.494778919618876e-05, "loss": 2.7654, "step": 90900 }, { "epoch": 0.3, "learning_rate": 3.493123010839579e-05, "loss": 2.7616, "step": 91000 }, { "epoch": 0.3, "learning_rate": 3.4914671020602816e-05, "loss": 2.7796, "step": 91100 }, { "epoch": 0.3, "learning_rate": 3.489811193280985e-05, "loss": 2.7652, "step": 91200 }, { "epoch": 0.3, "learning_rate": 3.488155284501688e-05, "loss": 2.7671, "step": 91300 }, { "epoch": 0.3, "learning_rate": 3.48649937572239e-05, "loss": 2.7556, "step": 91400 }, { "epoch": 0.3, "learning_rate": 3.484843466943093e-05, "loss": 2.7542, "step": 91500 }, { "epoch": 0.3, "learning_rate": 3.483187558163796e-05, "loss": 2.7582, "step": 91600 }, { "epoch": 0.3, "learning_rate": 3.481531649384499e-05, "loss": 2.7596, "step": 91700 }, { "epoch": 0.3, "learning_rate": 3.4798757406052015e-05, "loss": 2.7774, "step": 91800 }, { "epoch": 0.3, "learning_rate": 3.4782198318259045e-05, "loss": 2.7661, "step": 91900 }, { "epoch": 0.3, "learning_rate": 3.4765639230466076e-05, "loss": 2.7551, "step": 92000 }, { "epoch": 0.31, "learning_rate": 3.47490801426731e-05, "loss": 2.7545, "step": 92100 }, { "epoch": 0.31, "learning_rate": 3.473252105488013e-05, "loss": 2.7801, "step": 92200 }, { "epoch": 0.31, "learning_rate": 3.471596196708716e-05, "loss": 2.782, "step": 92300 }, { "epoch": 0.31, "learning_rate": 3.469940287929418e-05, "loss": 2.763, "step": 92400 }, { "epoch": 0.31, "learning_rate": 3.4682843791501213e-05, "loss": 2.7406, "step": 92500 }, { "epoch": 0.31, "learning_rate": 3.4666284703708244e-05, "loss": 2.765, "step": 92600 }, { "epoch": 0.31, "learning_rate": 3.4649725615915274e-05, "loss": 2.7653, "step": 92700 }, { "epoch": 0.31, "learning_rate": 3.4633166528122304e-05, "loss": 2.7632, "step": 92800 }, { "epoch": 0.31, "learning_rate": 3.461660744032933e-05, "loss": 2.761, "step": 92900 }, { "epoch": 0.31, "learning_rate": 3.460004835253636e-05, "loss": 2.7688, "step": 93000 }, { "epoch": 0.31, "learning_rate": 3.458348926474338e-05, "loss": 2.7714, "step": 93100 }, { "epoch": 0.31, "learning_rate": 3.456693017695041e-05, "loss": 2.7672, "step": 93200 }, { "epoch": 0.31, "learning_rate": 3.455037108915744e-05, "loss": 2.7682, "step": 93300 }, { "epoch": 0.31, "learning_rate": 3.4533812001364466e-05, "loss": 2.7727, "step": 93400 }, { "epoch": 0.31, "learning_rate": 3.4517252913571496e-05, "loss": 2.7762, "step": 93500 }, { "epoch": 0.31, "learning_rate": 3.4500693825778526e-05, "loss": 2.7726, "step": 93600 }, { "epoch": 0.31, "learning_rate": 3.448413473798556e-05, "loss": 2.7657, "step": 93700 }, { "epoch": 0.31, "learning_rate": 3.446757565019259e-05, "loss": 2.775, "step": 93800 }, { "epoch": 0.31, "learning_rate": 3.445101656239961e-05, "loss": 2.775, "step": 93900 }, { "epoch": 0.31, "learning_rate": 3.443445747460664e-05, "loss": 2.7544, "step": 94000 }, { "epoch": 0.31, "learning_rate": 3.441789838681367e-05, "loss": 2.7638, "step": 94100 }, { "epoch": 0.31, "learning_rate": 3.4401339299020695e-05, "loss": 2.7636, "step": 94200 }, { "epoch": 0.31, "learning_rate": 3.4384780211227725e-05, "loss": 2.7631, "step": 94300 }, { "epoch": 0.31, "learning_rate": 3.436822112343475e-05, "loss": 2.7649, "step": 94400 }, { "epoch": 0.31, "learning_rate": 3.435166203564178e-05, "loss": 2.7604, "step": 94500 }, { "epoch": 0.31, "learning_rate": 3.4335102947848816e-05, "loss": 2.7816, "step": 94600 }, { "epoch": 0.31, "learning_rate": 3.431854386005584e-05, "loss": 2.7671, "step": 94700 }, { "epoch": 0.31, "learning_rate": 3.430198477226287e-05, "loss": 2.7798, "step": 94800 }, { "epoch": 0.31, "learning_rate": 3.428542568446989e-05, "loss": 2.769, "step": 94900 }, { "epoch": 0.31, "learning_rate": 3.4268866596676924e-05, "loss": 2.7652, "step": 95000 }, { "epoch": 0.31, "learning_rate": 3.4252307508883954e-05, "loss": 2.7537, "step": 95100 }, { "epoch": 0.32, "learning_rate": 3.423574842109098e-05, "loss": 2.767, "step": 95200 }, { "epoch": 0.32, "learning_rate": 3.421918933329801e-05, "loss": 2.7716, "step": 95300 }, { "epoch": 0.32, "learning_rate": 3.420263024550504e-05, "loss": 2.7678, "step": 95400 }, { "epoch": 0.32, "learning_rate": 3.418607115771206e-05, "loss": 2.7497, "step": 95500 }, { "epoch": 0.32, "learning_rate": 3.41695120699191e-05, "loss": 2.7556, "step": 95600 }, { "epoch": 0.32, "learning_rate": 3.415295298212612e-05, "loss": 2.751, "step": 95700 }, { "epoch": 0.32, "learning_rate": 3.413639389433315e-05, "loss": 2.7716, "step": 95800 }, { "epoch": 0.32, "learning_rate": 3.411983480654018e-05, "loss": 2.7641, "step": 95900 }, { "epoch": 0.32, "learning_rate": 3.4103275718747206e-05, "loss": 2.7445, "step": 96000 }, { "epoch": 0.32, "learning_rate": 3.4086716630954236e-05, "loss": 2.7678, "step": 96100 }, { "epoch": 0.32, "learning_rate": 3.407015754316126e-05, "loss": 2.7545, "step": 96200 }, { "epoch": 0.32, "learning_rate": 3.405359845536829e-05, "loss": 2.7554, "step": 96300 }, { "epoch": 0.32, "learning_rate": 3.403703936757532e-05, "loss": 2.755, "step": 96400 }, { "epoch": 0.32, "learning_rate": 3.4020480279782344e-05, "loss": 2.7867, "step": 96500 }, { "epoch": 0.32, "learning_rate": 3.400392119198938e-05, "loss": 2.7597, "step": 96600 }, { "epoch": 0.32, "learning_rate": 3.3987362104196405e-05, "loss": 2.7579, "step": 96700 }, { "epoch": 0.32, "learning_rate": 3.3970803016403435e-05, "loss": 2.7721, "step": 96800 }, { "epoch": 0.32, "learning_rate": 3.3954243928610465e-05, "loss": 2.7588, "step": 96900 }, { "epoch": 0.32, "learning_rate": 3.393768484081749e-05, "loss": 2.7565, "step": 97000 }, { "epoch": 0.32, "learning_rate": 3.392112575302452e-05, "loss": 2.756, "step": 97100 }, { "epoch": 0.32, "learning_rate": 3.390456666523155e-05, "loss": 2.767, "step": 97200 }, { "epoch": 0.32, "learning_rate": 3.388800757743857e-05, "loss": 2.7759, "step": 97300 }, { "epoch": 0.32, "learning_rate": 3.38714484896456e-05, "loss": 2.7578, "step": 97400 }, { "epoch": 0.32, "learning_rate": 3.385488940185263e-05, "loss": 2.7654, "step": 97500 }, { "epoch": 0.32, "learning_rate": 3.3838330314059664e-05, "loss": 2.757, "step": 97600 }, { "epoch": 0.32, "learning_rate": 3.3821771226266694e-05, "loss": 2.7645, "step": 97700 }, { "epoch": 0.32, "learning_rate": 3.380521213847372e-05, "loss": 2.7632, "step": 97800 }, { "epoch": 0.32, "learning_rate": 3.378865305068075e-05, "loss": 2.7691, "step": 97900 }, { "epoch": 0.32, "learning_rate": 3.377209396288777e-05, "loss": 2.7674, "step": 98000 }, { "epoch": 0.32, "learning_rate": 3.37555348750948e-05, "loss": 2.7589, "step": 98100 }, { "epoch": 0.33, "learning_rate": 3.373897578730183e-05, "loss": 2.7618, "step": 98200 }, { "epoch": 0.33, "learning_rate": 3.3722416699508856e-05, "loss": 2.7834, "step": 98300 }, { "epoch": 0.33, "learning_rate": 3.3705857611715886e-05, "loss": 2.7622, "step": 98400 }, { "epoch": 0.33, "learning_rate": 3.3689298523922916e-05, "loss": 2.779, "step": 98500 }, { "epoch": 0.33, "learning_rate": 3.3672739436129947e-05, "loss": 2.7589, "step": 98600 }, { "epoch": 0.33, "learning_rate": 3.365618034833698e-05, "loss": 2.7683, "step": 98700 }, { "epoch": 0.33, "learning_rate": 3.3639621260544e-05, "loss": 2.7735, "step": 98800 }, { "epoch": 0.33, "learning_rate": 3.362306217275103e-05, "loss": 2.7465, "step": 98900 }, { "epoch": 0.33, "learning_rate": 3.3606503084958054e-05, "loss": 2.7598, "step": 99000 }, { "epoch": 0.33, "learning_rate": 3.3589943997165084e-05, "loss": 2.7521, "step": 99100 }, { "epoch": 0.33, "learning_rate": 3.3573384909372115e-05, "loss": 2.7639, "step": 99200 }, { "epoch": 0.33, "learning_rate": 3.355682582157914e-05, "loss": 2.7595, "step": 99300 }, { "epoch": 0.33, "learning_rate": 3.354026673378617e-05, "loss": 2.7575, "step": 99400 }, { "epoch": 0.33, "learning_rate": 3.35237076459932e-05, "loss": 2.7489, "step": 99500 }, { "epoch": 0.33, "learning_rate": 3.350714855820023e-05, "loss": 2.7656, "step": 99600 }, { "epoch": 0.33, "learning_rate": 3.349058947040726e-05, "loss": 2.7598, "step": 99700 }, { "epoch": 0.33, "learning_rate": 3.347403038261428e-05, "loss": 2.7486, "step": 99800 }, { "epoch": 0.33, "learning_rate": 3.345747129482131e-05, "loss": 2.7553, "step": 99900 }, { "epoch": 0.33, "learning_rate": 3.3440912207028344e-05, "loss": 2.7728, "step": 100000 }, { "epoch": 0.33, "learning_rate": 3.342435311923537e-05, "loss": 2.7468, "step": 100100 }, { "epoch": 0.33, "learning_rate": 3.34077940314424e-05, "loss": 2.7582, "step": 100200 }, { "epoch": 0.33, "learning_rate": 3.339123494364942e-05, "loss": 2.7531, "step": 100300 }, { "epoch": 0.33, "learning_rate": 3.337467585585645e-05, "loss": 2.7754, "step": 100400 }, { "epoch": 0.33, "learning_rate": 3.335811676806348e-05, "loss": 2.7671, "step": 100500 }, { "epoch": 0.33, "learning_rate": 3.334155768027051e-05, "loss": 2.7632, "step": 100600 }, { "epoch": 0.33, "learning_rate": 3.332499859247754e-05, "loss": 2.7483, "step": 100700 }, { "epoch": 0.33, "learning_rate": 3.3308439504684566e-05, "loss": 2.7764, "step": 100800 }, { "epoch": 0.33, "learning_rate": 3.3291880416891596e-05, "loss": 2.7616, "step": 100900 }, { "epoch": 0.33, "learning_rate": 3.3275321329098626e-05, "loss": 2.7647, "step": 101000 }, { "epoch": 0.33, "learning_rate": 3.325876224130565e-05, "loss": 2.7571, "step": 101100 }, { "epoch": 0.34, "learning_rate": 3.324220315351268e-05, "loss": 2.751, "step": 101200 }, { "epoch": 0.34, "learning_rate": 3.322564406571971e-05, "loss": 2.7625, "step": 101300 }, { "epoch": 0.34, "learning_rate": 3.3209084977926734e-05, "loss": 2.7686, "step": 101400 }, { "epoch": 0.34, "learning_rate": 3.3192525890133764e-05, "loss": 2.7722, "step": 101500 }, { "epoch": 0.34, "learning_rate": 3.3175966802340795e-05, "loss": 2.7697, "step": 101600 }, { "epoch": 0.34, "learning_rate": 3.3159407714547825e-05, "loss": 2.7732, "step": 101700 }, { "epoch": 0.34, "learning_rate": 3.3142848626754855e-05, "loss": 2.7656, "step": 101800 }, { "epoch": 0.34, "learning_rate": 3.312628953896188e-05, "loss": 2.7592, "step": 101900 }, { "epoch": 0.34, "learning_rate": 3.310973045116891e-05, "loss": 2.7466, "step": 102000 }, { "epoch": 0.34, "learning_rate": 3.309317136337593e-05, "loss": 2.763, "step": 102100 }, { "epoch": 0.34, "learning_rate": 3.307661227558296e-05, "loss": 2.773, "step": 102200 }, { "epoch": 0.34, "learning_rate": 3.306005318778999e-05, "loss": 2.7662, "step": 102300 }, { "epoch": 0.34, "learning_rate": 3.3043494099997017e-05, "loss": 2.7679, "step": 102400 }, { "epoch": 0.34, "learning_rate": 3.302693501220405e-05, "loss": 2.7694, "step": 102500 }, { "epoch": 0.34, "learning_rate": 3.301037592441108e-05, "loss": 2.7698, "step": 102600 }, { "epoch": 0.34, "learning_rate": 3.299381683661811e-05, "loss": 2.7527, "step": 102700 }, { "epoch": 0.34, "learning_rate": 3.297725774882514e-05, "loss": 2.7665, "step": 102800 }, { "epoch": 0.34, "learning_rate": 3.296069866103216e-05, "loss": 2.7618, "step": 102900 }, { "epoch": 0.34, "learning_rate": 3.294413957323919e-05, "loss": 2.7532, "step": 103000 }, { "epoch": 0.34, "learning_rate": 3.292758048544622e-05, "loss": 2.7778, "step": 103100 }, { "epoch": 0.34, "learning_rate": 3.2911021397653245e-05, "loss": 2.7687, "step": 103200 }, { "epoch": 0.34, "learning_rate": 3.2894462309860276e-05, "loss": 2.7574, "step": 103300 }, { "epoch": 0.34, "learning_rate": 3.28779032220673e-05, "loss": 2.7666, "step": 103400 }, { "epoch": 0.34, "learning_rate": 3.286134413427433e-05, "loss": 2.7559, "step": 103500 }, { "epoch": 0.34, "learning_rate": 3.284478504648137e-05, "loss": 2.7526, "step": 103600 }, { "epoch": 0.34, "learning_rate": 3.282822595868839e-05, "loss": 2.758, "step": 103700 }, { "epoch": 0.34, "learning_rate": 3.281166687089542e-05, "loss": 2.7578, "step": 103800 }, { "epoch": 0.34, "learning_rate": 3.2795107783102444e-05, "loss": 2.7533, "step": 103900 }, { "epoch": 0.34, "learning_rate": 3.2778548695309474e-05, "loss": 2.7938, "step": 104000 }, { "epoch": 0.34, "learning_rate": 3.2761989607516505e-05, "loss": 2.7731, "step": 104100 }, { "epoch": 0.35, "learning_rate": 3.274543051972353e-05, "loss": 2.7624, "step": 104200 }, { "epoch": 0.35, "learning_rate": 3.272887143193056e-05, "loss": 2.7623, "step": 104300 }, { "epoch": 0.35, "learning_rate": 3.271231234413759e-05, "loss": 2.7708, "step": 104400 }, { "epoch": 0.35, "learning_rate": 3.269575325634461e-05, "loss": 2.7552, "step": 104500 }, { "epoch": 0.35, "learning_rate": 3.267919416855165e-05, "loss": 2.7578, "step": 104600 }, { "epoch": 0.35, "learning_rate": 3.266263508075867e-05, "loss": 2.7699, "step": 104700 }, { "epoch": 0.35, "learning_rate": 3.26460759929657e-05, "loss": 2.7627, "step": 104800 }, { "epoch": 0.35, "learning_rate": 3.2629516905172733e-05, "loss": 2.7735, "step": 104900 }, { "epoch": 0.35, "learning_rate": 3.261295781737976e-05, "loss": 2.7692, "step": 105000 }, { "epoch": 0.35, "learning_rate": 3.259639872958679e-05, "loss": 2.7679, "step": 105100 }, { "epoch": 0.35, "learning_rate": 3.257983964179381e-05, "loss": 2.7618, "step": 105200 }, { "epoch": 0.35, "learning_rate": 3.256328055400084e-05, "loss": 2.7637, "step": 105300 }, { "epoch": 0.35, "learning_rate": 3.254672146620787e-05, "loss": 2.7641, "step": 105400 }, { "epoch": 0.35, "learning_rate": 3.2530162378414895e-05, "loss": 2.7697, "step": 105500 }, { "epoch": 0.35, "learning_rate": 3.251360329062193e-05, "loss": 2.7751, "step": 105600 }, { "epoch": 0.35, "learning_rate": 3.2497044202828956e-05, "loss": 2.7631, "step": 105700 }, { "epoch": 0.35, "learning_rate": 3.2480485115035986e-05, "loss": 2.7671, "step": 105800 }, { "epoch": 0.35, "learning_rate": 3.2463926027243016e-05, "loss": 2.7605, "step": 105900 }, { "epoch": 0.35, "learning_rate": 3.244736693945004e-05, "loss": 2.7699, "step": 106000 }, { "epoch": 0.35, "learning_rate": 3.243080785165707e-05, "loss": 2.7588, "step": 106100 }, { "epoch": 0.35, "learning_rate": 3.2414248763864093e-05, "loss": 2.7724, "step": 106200 }, { "epoch": 0.35, "learning_rate": 3.2397689676071124e-05, "loss": 2.7479, "step": 106300 }, { "epoch": 0.35, "learning_rate": 3.2381130588278154e-05, "loss": 2.762, "step": 106400 }, { "epoch": 0.35, "learning_rate": 3.236457150048518e-05, "loss": 2.7441, "step": 106500 }, { "epoch": 0.35, "learning_rate": 3.2348012412692215e-05, "loss": 2.7582, "step": 106600 }, { "epoch": 0.35, "learning_rate": 3.233145332489924e-05, "loss": 2.7559, "step": 106700 }, { "epoch": 0.35, "learning_rate": 3.231489423710627e-05, "loss": 2.7591, "step": 106800 }, { "epoch": 0.35, "learning_rate": 3.22983351493133e-05, "loss": 2.7725, "step": 106900 }, { "epoch": 0.35, "learning_rate": 3.228177606152032e-05, "loss": 2.7694, "step": 107000 }, { "epoch": 0.35, "learning_rate": 3.226521697372735e-05, "loss": 2.7617, "step": 107100 }, { "epoch": 0.36, "learning_rate": 3.224865788593438e-05, "loss": 2.7571, "step": 107200 }, { "epoch": 0.36, "learning_rate": 3.2232098798141406e-05, "loss": 2.7704, "step": 107300 }, { "epoch": 0.36, "learning_rate": 3.221553971034844e-05, "loss": 2.7718, "step": 107400 }, { "epoch": 0.36, "learning_rate": 3.219898062255546e-05, "loss": 2.7644, "step": 107500 }, { "epoch": 0.36, "learning_rate": 3.21824215347625e-05, "loss": 2.7794, "step": 107600 }, { "epoch": 0.36, "learning_rate": 3.216586244696953e-05, "loss": 2.7497, "step": 107700 }, { "epoch": 0.36, "learning_rate": 3.214930335917655e-05, "loss": 2.7569, "step": 107800 }, { "epoch": 0.36, "learning_rate": 3.213274427138358e-05, "loss": 2.7458, "step": 107900 }, { "epoch": 0.36, "learning_rate": 3.2116185183590605e-05, "loss": 2.7596, "step": 108000 }, { "epoch": 0.36, "learning_rate": 3.2099626095797635e-05, "loss": 2.7562, "step": 108100 }, { "epoch": 0.36, "learning_rate": 3.2083067008004666e-05, "loss": 2.76, "step": 108200 }, { "epoch": 0.36, "learning_rate": 3.206650792021169e-05, "loss": 2.7697, "step": 108300 }, { "epoch": 0.36, "learning_rate": 3.204994883241872e-05, "loss": 2.7668, "step": 108400 }, { "epoch": 0.36, "learning_rate": 3.203338974462575e-05, "loss": 2.7502, "step": 108500 }, { "epoch": 0.36, "learning_rate": 3.201683065683278e-05, "loss": 2.7758, "step": 108600 }, { "epoch": 0.36, "learning_rate": 3.200027156903981e-05, "loss": 2.7576, "step": 108700 }, { "epoch": 0.36, "learning_rate": 3.1983712481246834e-05, "loss": 2.7622, "step": 108800 }, { "epoch": 0.36, "learning_rate": 3.1967153393453864e-05, "loss": 2.7674, "step": 108900 }, { "epoch": 0.36, "learning_rate": 3.1950594305660894e-05, "loss": 2.775, "step": 109000 }, { "epoch": 0.36, "learning_rate": 3.193403521786792e-05, "loss": 2.7526, "step": 109100 }, { "epoch": 0.36, "learning_rate": 3.191747613007495e-05, "loss": 2.7595, "step": 109200 }, { "epoch": 0.36, "learning_rate": 3.190091704228197e-05, "loss": 2.7526, "step": 109300 }, { "epoch": 0.36, "learning_rate": 3.1884357954489e-05, "loss": 2.758, "step": 109400 }, { "epoch": 0.36, "learning_rate": 3.186779886669603e-05, "loss": 2.7494, "step": 109500 }, { "epoch": 0.36, "learning_rate": 3.185123977890306e-05, "loss": 2.7511, "step": 109600 }, { "epoch": 0.36, "learning_rate": 3.183468069111009e-05, "loss": 2.7453, "step": 109700 }, { "epoch": 0.36, "learning_rate": 3.1818121603317116e-05, "loss": 2.7605, "step": 109800 }, { "epoch": 0.36, "learning_rate": 3.180156251552415e-05, "loss": 2.7455, "step": 109900 }, { "epoch": 0.36, "learning_rate": 3.178500342773118e-05, "loss": 2.7523, "step": 110000 }, { "epoch": 0.36, "learning_rate": 3.17684443399382e-05, "loss": 2.7579, "step": 110100 }, { "epoch": 0.36, "learning_rate": 3.175188525214523e-05, "loss": 2.767, "step": 110200 }, { "epoch": 0.37, "learning_rate": 3.173532616435226e-05, "loss": 2.7535, "step": 110300 }, { "epoch": 0.37, "learning_rate": 3.1718767076559285e-05, "loss": 2.7637, "step": 110400 }, { "epoch": 0.37, "learning_rate": 3.1702207988766315e-05, "loss": 2.7577, "step": 110500 }, { "epoch": 0.37, "learning_rate": 3.1685648900973345e-05, "loss": 2.7741, "step": 110600 }, { "epoch": 0.37, "learning_rate": 3.1669089813180376e-05, "loss": 2.7747, "step": 110700 }, { "epoch": 0.37, "learning_rate": 3.1652530725387406e-05, "loss": 2.7647, "step": 110800 }, { "epoch": 0.37, "learning_rate": 3.163597163759443e-05, "loss": 2.7625, "step": 110900 }, { "epoch": 0.37, "learning_rate": 3.161941254980146e-05, "loss": 2.7657, "step": 111000 }, { "epoch": 0.37, "learning_rate": 3.160285346200848e-05, "loss": 2.7449, "step": 111100 }, { "epoch": 0.37, "learning_rate": 3.1586294374215514e-05, "loss": 2.7622, "step": 111200 }, { "epoch": 0.37, "learning_rate": 3.1569735286422544e-05, "loss": 2.7657, "step": 111300 }, { "epoch": 0.37, "learning_rate": 3.155317619862957e-05, "loss": 2.759, "step": 111400 }, { "epoch": 0.37, "learning_rate": 3.15366171108366e-05, "loss": 2.7602, "step": 111500 }, { "epoch": 0.37, "learning_rate": 3.152005802304363e-05, "loss": 2.7635, "step": 111600 }, { "epoch": 0.37, "learning_rate": 3.150349893525066e-05, "loss": 2.7549, "step": 111700 }, { "epoch": 0.37, "learning_rate": 3.148693984745769e-05, "loss": 2.7618, "step": 111800 }, { "epoch": 0.37, "learning_rate": 3.147038075966471e-05, "loss": 2.7703, "step": 111900 }, { "epoch": 0.37, "learning_rate": 3.145382167187174e-05, "loss": 2.7709, "step": 112000 }, { "epoch": 0.37, "learning_rate": 3.143726258407877e-05, "loss": 2.7669, "step": 112100 }, { "epoch": 0.37, "learning_rate": 3.1420703496285796e-05, "loss": 2.7517, "step": 112200 }, { "epoch": 0.37, "learning_rate": 3.1404144408492827e-05, "loss": 2.7328, "step": 112300 }, { "epoch": 0.37, "learning_rate": 3.138758532069985e-05, "loss": 2.7516, "step": 112400 }, { "epoch": 0.37, "learning_rate": 3.137102623290688e-05, "loss": 2.759, "step": 112500 }, { "epoch": 0.37, "learning_rate": 3.135446714511391e-05, "loss": 2.7515, "step": 112600 }, { "epoch": 0.37, "learning_rate": 3.133790805732094e-05, "loss": 2.7585, "step": 112700 }, { "epoch": 0.37, "learning_rate": 3.132134896952797e-05, "loss": 2.7767, "step": 112800 }, { "epoch": 0.37, "learning_rate": 3.1304789881734995e-05, "loss": 2.7467, "step": 112900 }, { "epoch": 0.37, "learning_rate": 3.1288230793942025e-05, "loss": 2.7571, "step": 113000 }, { "epoch": 0.37, "learning_rate": 3.1271671706149055e-05, "loss": 2.7672, "step": 113100 }, { "epoch": 0.37, "learning_rate": 3.125511261835608e-05, "loss": 2.7683, "step": 113200 }, { "epoch": 0.38, "learning_rate": 3.123855353056311e-05, "loss": 2.7675, "step": 113300 }, { "epoch": 0.38, "learning_rate": 3.122199444277013e-05, "loss": 2.7559, "step": 113400 }, { "epoch": 0.38, "learning_rate": 3.120543535497716e-05, "loss": 2.7681, "step": 113500 }, { "epoch": 0.38, "learning_rate": 3.11888762671842e-05, "loss": 2.7606, "step": 113600 }, { "epoch": 0.38, "learning_rate": 3.1172317179391224e-05, "loss": 2.7505, "step": 113700 }, { "epoch": 0.38, "learning_rate": 3.1155758091598254e-05, "loss": 2.7445, "step": 113800 }, { "epoch": 0.38, "learning_rate": 3.113919900380528e-05, "loss": 2.755, "step": 113900 }, { "epoch": 0.38, "learning_rate": 3.112263991601231e-05, "loss": 2.7527, "step": 114000 }, { "epoch": 0.38, "learning_rate": 3.110608082821934e-05, "loss": 2.7576, "step": 114100 }, { "epoch": 0.38, "learning_rate": 3.108952174042636e-05, "loss": 2.7761, "step": 114200 }, { "epoch": 0.38, "learning_rate": 3.107296265263339e-05, "loss": 2.7507, "step": 114300 }, { "epoch": 0.38, "learning_rate": 3.105640356484042e-05, "loss": 2.7524, "step": 114400 }, { "epoch": 0.38, "learning_rate": 3.1039844477047446e-05, "loss": 2.7524, "step": 114500 }, { "epoch": 0.38, "learning_rate": 3.102328538925448e-05, "loss": 2.7679, "step": 114600 }, { "epoch": 0.38, "learning_rate": 3.1006726301461506e-05, "loss": 2.7491, "step": 114700 }, { "epoch": 0.38, "learning_rate": 3.0990167213668537e-05, "loss": 2.766, "step": 114800 }, { "epoch": 0.38, "learning_rate": 3.097360812587557e-05, "loss": 2.7598, "step": 114900 }, { "epoch": 0.38, "learning_rate": 3.095704903808259e-05, "loss": 2.7573, "step": 115000 }, { "epoch": 0.38, "learning_rate": 3.094048995028962e-05, "loss": 2.772, "step": 115100 }, { "epoch": 0.38, "learning_rate": 3.0923930862496644e-05, "loss": 2.7522, "step": 115200 }, { "epoch": 0.38, "learning_rate": 3.0907371774703675e-05, "loss": 2.7459, "step": 115300 }, { "epoch": 0.38, "learning_rate": 3.0890812686910705e-05, "loss": 2.7562, "step": 115400 }, { "epoch": 0.38, "learning_rate": 3.087425359911773e-05, "loss": 2.7545, "step": 115500 }, { "epoch": 0.38, "learning_rate": 3.0857694511324765e-05, "loss": 2.7687, "step": 115600 }, { "epoch": 0.38, "learning_rate": 3.084113542353179e-05, "loss": 2.7686, "step": 115700 }, { "epoch": 0.38, "learning_rate": 3.082457633573882e-05, "loss": 2.7615, "step": 115800 }, { "epoch": 0.38, "learning_rate": 3.080801724794585e-05, "loss": 2.7742, "step": 115900 }, { "epoch": 0.38, "learning_rate": 3.079145816015287e-05, "loss": 2.7593, "step": 116000 }, { "epoch": 0.38, "learning_rate": 3.07748990723599e-05, "loss": 2.7572, "step": 116100 }, { "epoch": 0.38, "learning_rate": 3.0758339984566934e-05, "loss": 2.7566, "step": 116200 }, { "epoch": 0.39, "learning_rate": 3.074178089677396e-05, "loss": 2.7496, "step": 116300 }, { "epoch": 0.39, "learning_rate": 3.072522180898099e-05, "loss": 2.767, "step": 116400 }, { "epoch": 0.39, "learning_rate": 3.070866272118801e-05, "loss": 2.747, "step": 116500 }, { "epoch": 0.39, "learning_rate": 3.069210363339505e-05, "loss": 2.7728, "step": 116600 }, { "epoch": 0.39, "learning_rate": 3.067554454560208e-05, "loss": 2.7727, "step": 116700 }, { "epoch": 0.39, "learning_rate": 3.06589854578091e-05, "loss": 2.749, "step": 116800 }, { "epoch": 0.39, "learning_rate": 3.064242637001613e-05, "loss": 2.7447, "step": 116900 }, { "epoch": 0.39, "learning_rate": 3.0625867282223156e-05, "loss": 2.7553, "step": 117000 }, { "epoch": 0.39, "learning_rate": 3.0609308194430186e-05, "loss": 2.7591, "step": 117100 }, { "epoch": 0.39, "learning_rate": 3.0592749106637216e-05, "loss": 2.7478, "step": 117200 }, { "epoch": 0.39, "learning_rate": 3.057619001884424e-05, "loss": 2.7632, "step": 117300 }, { "epoch": 0.39, "learning_rate": 3.055963093105127e-05, "loss": 2.754, "step": 117400 }, { "epoch": 0.39, "learning_rate": 3.05430718432583e-05, "loss": 2.7479, "step": 117500 }, { "epoch": 0.39, "learning_rate": 3.052651275546533e-05, "loss": 2.7646, "step": 117600 }, { "epoch": 0.39, "learning_rate": 3.0509953667672358e-05, "loss": 2.7489, "step": 117700 }, { "epoch": 0.39, "learning_rate": 3.0493394579879388e-05, "loss": 2.7734, "step": 117800 }, { "epoch": 0.39, "learning_rate": 3.0476835492086415e-05, "loss": 2.7647, "step": 117900 }, { "epoch": 0.39, "learning_rate": 3.0460276404293442e-05, "loss": 2.7644, "step": 118000 }, { "epoch": 0.39, "learning_rate": 3.044371731650047e-05, "loss": 2.7662, "step": 118100 }, { "epoch": 0.39, "learning_rate": 3.04271582287075e-05, "loss": 2.7603, "step": 118200 }, { "epoch": 0.39, "learning_rate": 3.0410599140914526e-05, "loss": 2.7531, "step": 118300 }, { "epoch": 0.39, "learning_rate": 3.0394040053121553e-05, "loss": 2.7671, "step": 118400 }, { "epoch": 0.39, "learning_rate": 3.037748096532858e-05, "loss": 2.7601, "step": 118500 }, { "epoch": 0.39, "learning_rate": 3.0360921877535613e-05, "loss": 2.758, "step": 118600 }, { "epoch": 0.39, "learning_rate": 3.034436278974264e-05, "loss": 2.7721, "step": 118700 }, { "epoch": 0.39, "learning_rate": 3.032780370194967e-05, "loss": 2.7658, "step": 118800 }, { "epoch": 0.39, "learning_rate": 3.0311244614156698e-05, "loss": 2.7738, "step": 118900 }, { "epoch": 0.39, "learning_rate": 3.0294685526363724e-05, "loss": 2.7705, "step": 119000 }, { "epoch": 0.39, "learning_rate": 3.027812643857075e-05, "loss": 2.755, "step": 119100 }, { "epoch": 0.39, "learning_rate": 3.026156735077778e-05, "loss": 2.7609, "step": 119200 }, { "epoch": 0.4, "learning_rate": 3.024500826298481e-05, "loss": 2.7629, "step": 119300 }, { "epoch": 0.4, "learning_rate": 3.0228449175191835e-05, "loss": 2.7596, "step": 119400 }, { "epoch": 0.4, "learning_rate": 3.0211890087398866e-05, "loss": 2.7434, "step": 119500 }, { "epoch": 0.4, "learning_rate": 3.0195330999605896e-05, "loss": 2.7797, "step": 119600 }, { "epoch": 0.4, "learning_rate": 3.0178771911812926e-05, "loss": 2.7545, "step": 119700 }, { "epoch": 0.4, "learning_rate": 3.0162212824019953e-05, "loss": 2.7634, "step": 119800 }, { "epoch": 0.4, "learning_rate": 3.014565373622698e-05, "loss": 2.7587, "step": 119900 }, { "epoch": 0.4, "learning_rate": 3.0129094648434007e-05, "loss": 2.7685, "step": 120000 }, { "epoch": 0.4, "learning_rate": 3.0112535560641037e-05, "loss": 2.7517, "step": 120100 }, { "epoch": 0.4, "learning_rate": 3.0095976472848064e-05, "loss": 2.7565, "step": 120200 }, { "epoch": 0.4, "learning_rate": 3.007941738505509e-05, "loss": 2.7566, "step": 120300 }, { "epoch": 0.4, "learning_rate": 3.0062858297262118e-05, "loss": 2.7513, "step": 120400 }, { "epoch": 0.4, "learning_rate": 3.004629920946915e-05, "loss": 2.7686, "step": 120500 }, { "epoch": 0.4, "learning_rate": 3.0029740121676182e-05, "loss": 2.7608, "step": 120600 }, { "epoch": 0.4, "learning_rate": 3.001318103388321e-05, "loss": 2.7566, "step": 120700 }, { "epoch": 0.4, "learning_rate": 2.9996621946090236e-05, "loss": 2.7598, "step": 120800 }, { "epoch": 0.4, "learning_rate": 2.9980062858297263e-05, "loss": 2.7567, "step": 120900 }, { "epoch": 0.4, "learning_rate": 2.9963503770504293e-05, "loss": 2.7671, "step": 121000 }, { "epoch": 0.4, "learning_rate": 2.994694468271132e-05, "loss": 2.7633, "step": 121100 }, { "epoch": 0.4, "learning_rate": 2.9930385594918347e-05, "loss": 2.7682, "step": 121200 }, { "epoch": 0.4, "learning_rate": 2.9913826507125374e-05, "loss": 2.7629, "step": 121300 }, { "epoch": 0.4, "learning_rate": 2.9897267419332404e-05, "loss": 2.7464, "step": 121400 }, { "epoch": 0.4, "learning_rate": 2.988070833153943e-05, "loss": 2.7524, "step": 121500 }, { "epoch": 0.4, "learning_rate": 2.9864149243746465e-05, "loss": 2.7651, "step": 121600 }, { "epoch": 0.4, "learning_rate": 2.9847590155953492e-05, "loss": 2.7569, "step": 121700 }, { "epoch": 0.4, "learning_rate": 2.983103106816052e-05, "loss": 2.7521, "step": 121800 }, { "epoch": 0.4, "learning_rate": 2.981447198036755e-05, "loss": 2.7674, "step": 121900 }, { "epoch": 0.4, "learning_rate": 2.9797912892574576e-05, "loss": 2.7561, "step": 122000 }, { "epoch": 0.4, "learning_rate": 2.9781353804781603e-05, "loss": 2.7656, "step": 122100 }, { "epoch": 0.4, "learning_rate": 2.976479471698863e-05, "loss": 2.7588, "step": 122200 }, { "epoch": 0.41, "learning_rate": 2.974823562919566e-05, "loss": 2.7578, "step": 122300 }, { "epoch": 0.41, "learning_rate": 2.9731676541402687e-05, "loss": 2.7505, "step": 122400 }, { "epoch": 0.41, "learning_rate": 2.9715117453609714e-05, "loss": 2.769, "step": 122500 }, { "epoch": 0.41, "learning_rate": 2.9698558365816747e-05, "loss": 2.7546, "step": 122600 }, { "epoch": 0.41, "learning_rate": 2.9681999278023774e-05, "loss": 2.7474, "step": 122700 }, { "epoch": 0.41, "learning_rate": 2.9665440190230805e-05, "loss": 2.7522, "step": 122800 }, { "epoch": 0.41, "learning_rate": 2.964888110243783e-05, "loss": 2.7692, "step": 122900 }, { "epoch": 0.41, "learning_rate": 2.963232201464486e-05, "loss": 2.7625, "step": 123000 }, { "epoch": 0.41, "learning_rate": 2.9615762926851885e-05, "loss": 2.7502, "step": 123100 }, { "epoch": 0.41, "learning_rate": 2.9599203839058916e-05, "loss": 2.7606, "step": 123200 }, { "epoch": 0.41, "learning_rate": 2.9582644751265943e-05, "loss": 2.7565, "step": 123300 }, { "epoch": 0.41, "learning_rate": 2.956608566347297e-05, "loss": 2.7703, "step": 123400 }, { "epoch": 0.41, "learning_rate": 2.9549526575679996e-05, "loss": 2.7646, "step": 123500 }, { "epoch": 0.41, "learning_rate": 2.953296748788703e-05, "loss": 2.7451, "step": 123600 }, { "epoch": 0.41, "learning_rate": 2.951640840009406e-05, "loss": 2.7623, "step": 123700 }, { "epoch": 0.41, "learning_rate": 2.9499849312301087e-05, "loss": 2.7494, "step": 123800 }, { "epoch": 0.41, "learning_rate": 2.9483290224508114e-05, "loss": 2.7483, "step": 123900 }, { "epoch": 0.41, "learning_rate": 2.946673113671514e-05, "loss": 2.7581, "step": 124000 }, { "epoch": 0.41, "learning_rate": 2.945017204892217e-05, "loss": 2.7801, "step": 124100 }, { "epoch": 0.41, "learning_rate": 2.94336129611292e-05, "loss": 2.76, "step": 124200 }, { "epoch": 0.41, "learning_rate": 2.9417053873336225e-05, "loss": 2.7638, "step": 124300 }, { "epoch": 0.41, "learning_rate": 2.9400494785543252e-05, "loss": 2.7367, "step": 124400 }, { "epoch": 0.41, "learning_rate": 2.9383935697750283e-05, "loss": 2.7502, "step": 124500 }, { "epoch": 0.41, "learning_rate": 2.9367376609957313e-05, "loss": 2.7542, "step": 124600 }, { "epoch": 0.41, "learning_rate": 2.9350817522164343e-05, "loss": 2.7528, "step": 124700 }, { "epoch": 0.41, "learning_rate": 2.933425843437137e-05, "loss": 2.7583, "step": 124800 }, { "epoch": 0.41, "learning_rate": 2.9317699346578397e-05, "loss": 2.7601, "step": 124900 }, { "epoch": 0.41, "learning_rate": 2.9301140258785427e-05, "loss": 2.7696, "step": 125000 }, { "epoch": 0.41, "learning_rate": 2.9284581170992454e-05, "loss": 2.7524, "step": 125100 }, { "epoch": 0.41, "learning_rate": 2.926802208319948e-05, "loss": 2.7628, "step": 125200 }, { "epoch": 0.41, "learning_rate": 2.9251462995406508e-05, "loss": 2.7501, "step": 125300 }, { "epoch": 0.42, "learning_rate": 2.9234903907613538e-05, "loss": 2.7626, "step": 125400 }, { "epoch": 0.42, "learning_rate": 2.9218344819820565e-05, "loss": 2.7555, "step": 125500 }, { "epoch": 0.42, "learning_rate": 2.92017857320276e-05, "loss": 2.7382, "step": 125600 }, { "epoch": 0.42, "learning_rate": 2.9185226644234626e-05, "loss": 2.7408, "step": 125700 }, { "epoch": 0.42, "learning_rate": 2.9168667556441653e-05, "loss": 2.7702, "step": 125800 }, { "epoch": 0.42, "learning_rate": 2.915210846864868e-05, "loss": 2.744, "step": 125900 }, { "epoch": 0.42, "learning_rate": 2.913554938085571e-05, "loss": 2.7721, "step": 126000 }, { "epoch": 0.42, "learning_rate": 2.9118990293062737e-05, "loss": 2.758, "step": 126100 }, { "epoch": 0.42, "learning_rate": 2.9102431205269764e-05, "loss": 2.758, "step": 126200 }, { "epoch": 0.42, "learning_rate": 2.908587211747679e-05, "loss": 2.7522, "step": 126300 }, { "epoch": 0.42, "learning_rate": 2.906931302968382e-05, "loss": 2.7548, "step": 126400 }, { "epoch": 0.42, "learning_rate": 2.9052753941890848e-05, "loss": 2.7558, "step": 126500 }, { "epoch": 0.42, "learning_rate": 2.903619485409788e-05, "loss": 2.7564, "step": 126600 }, { "epoch": 0.42, "learning_rate": 2.901963576630491e-05, "loss": 2.756, "step": 126700 }, { "epoch": 0.42, "learning_rate": 2.9003076678511935e-05, "loss": 2.7608, "step": 126800 }, { "epoch": 0.42, "learning_rate": 2.8986517590718966e-05, "loss": 2.7604, "step": 126900 }, { "epoch": 0.42, "learning_rate": 2.8969958502925993e-05, "loss": 2.7504, "step": 127000 }, { "epoch": 0.42, "learning_rate": 2.895339941513302e-05, "loss": 2.7661, "step": 127100 }, { "epoch": 0.42, "learning_rate": 2.8936840327340046e-05, "loss": 2.747, "step": 127200 }, { "epoch": 0.42, "learning_rate": 2.8920281239547077e-05, "loss": 2.7703, "step": 127300 }, { "epoch": 0.42, "learning_rate": 2.8903722151754104e-05, "loss": 2.7673, "step": 127400 }, { "epoch": 0.42, "learning_rate": 2.888716306396113e-05, "loss": 2.7584, "step": 127500 }, { "epoch": 0.42, "learning_rate": 2.8870603976168164e-05, "loss": 2.7669, "step": 127600 }, { "epoch": 0.42, "learning_rate": 2.885404488837519e-05, "loss": 2.7593, "step": 127700 }, { "epoch": 0.42, "learning_rate": 2.883748580058222e-05, "loss": 2.7525, "step": 127800 }, { "epoch": 0.42, "learning_rate": 2.882092671278925e-05, "loss": 2.7673, "step": 127900 }, { "epoch": 0.42, "learning_rate": 2.8804367624996275e-05, "loss": 2.7647, "step": 128000 }, { "epoch": 0.42, "learning_rate": 2.8787808537203302e-05, "loss": 2.7812, "step": 128100 }, { "epoch": 0.42, "learning_rate": 2.8771249449410332e-05, "loss": 2.7534, "step": 128200 }, { "epoch": 0.42, "learning_rate": 2.875469036161736e-05, "loss": 2.7551, "step": 128300 }, { "epoch": 0.43, "learning_rate": 2.8738131273824386e-05, "loss": 2.7509, "step": 128400 }, { "epoch": 0.43, "learning_rate": 2.8721572186031413e-05, "loss": 2.7596, "step": 128500 }, { "epoch": 0.43, "learning_rate": 2.8705013098238447e-05, "loss": 2.7566, "step": 128600 }, { "epoch": 0.43, "learning_rate": 2.8688454010445477e-05, "loss": 2.7621, "step": 128700 }, { "epoch": 0.43, "learning_rate": 2.8671894922652504e-05, "loss": 2.7649, "step": 128800 }, { "epoch": 0.43, "learning_rate": 2.865533583485953e-05, "loss": 2.754, "step": 128900 }, { "epoch": 0.43, "learning_rate": 2.8638776747066558e-05, "loss": 2.7649, "step": 129000 }, { "epoch": 0.43, "learning_rate": 2.8622217659273588e-05, "loss": 2.7483, "step": 129100 }, { "epoch": 0.43, "learning_rate": 2.8605658571480615e-05, "loss": 2.7457, "step": 129200 }, { "epoch": 0.43, "learning_rate": 2.8589099483687642e-05, "loss": 2.7721, "step": 129300 }, { "epoch": 0.43, "learning_rate": 2.857254039589467e-05, "loss": 2.7535, "step": 129400 }, { "epoch": 0.43, "learning_rate": 2.85559813081017e-05, "loss": 2.7428, "step": 129500 }, { "epoch": 0.43, "learning_rate": 2.8539422220308733e-05, "loss": 2.7509, "step": 129600 }, { "epoch": 0.43, "learning_rate": 2.852286313251576e-05, "loss": 2.7598, "step": 129700 }, { "epoch": 0.43, "learning_rate": 2.8506304044722787e-05, "loss": 2.76, "step": 129800 }, { "epoch": 0.43, "learning_rate": 2.8489744956929814e-05, "loss": 2.7483, "step": 129900 }, { "epoch": 0.43, "learning_rate": 2.8473185869136844e-05, "loss": 2.7416, "step": 130000 }, { "epoch": 0.43, "learning_rate": 2.845662678134387e-05, "loss": 2.7611, "step": 130100 }, { "epoch": 0.43, "learning_rate": 2.8440067693550898e-05, "loss": 2.7499, "step": 130200 }, { "epoch": 0.43, "learning_rate": 2.8423508605757925e-05, "loss": 2.7544, "step": 130300 }, { "epoch": 0.43, "learning_rate": 2.8406949517964955e-05, "loss": 2.751, "step": 130400 }, { "epoch": 0.43, "learning_rate": 2.8390390430171982e-05, "loss": 2.7685, "step": 130500 }, { "epoch": 0.43, "learning_rate": 2.8373831342379016e-05, "loss": 2.7527, "step": 130600 }, { "epoch": 0.43, "learning_rate": 2.8357272254586043e-05, "loss": 2.738, "step": 130700 }, { "epoch": 0.43, "learning_rate": 2.834071316679307e-05, "loss": 2.7563, "step": 130800 }, { "epoch": 0.43, "learning_rate": 2.83241540790001e-05, "loss": 2.7542, "step": 130900 }, { "epoch": 0.43, "learning_rate": 2.8307594991207127e-05, "loss": 2.7592, "step": 131000 }, { "epoch": 0.43, "learning_rate": 2.8291035903414154e-05, "loss": 2.7531, "step": 131100 }, { "epoch": 0.43, "learning_rate": 2.827447681562118e-05, "loss": 2.7603, "step": 131200 }, { "epoch": 0.43, "learning_rate": 2.825791772782821e-05, "loss": 2.758, "step": 131300 }, { "epoch": 0.44, "learning_rate": 2.8241358640035238e-05, "loss": 2.7593, "step": 131400 }, { "epoch": 0.44, "learning_rate": 2.8224799552242265e-05, "loss": 2.7529, "step": 131500 }, { "epoch": 0.44, "learning_rate": 2.8208240464449298e-05, "loss": 2.7574, "step": 131600 }, { "epoch": 0.44, "learning_rate": 2.8191681376656325e-05, "loss": 2.764, "step": 131700 }, { "epoch": 0.44, "learning_rate": 2.8175122288863352e-05, "loss": 2.7549, "step": 131800 }, { "epoch": 0.44, "learning_rate": 2.8158563201070382e-05, "loss": 2.7575, "step": 131900 }, { "epoch": 0.44, "learning_rate": 2.814200411327741e-05, "loss": 2.7618, "step": 132000 }, { "epoch": 0.44, "learning_rate": 2.8125445025484436e-05, "loss": 2.7562, "step": 132100 }, { "epoch": 0.44, "learning_rate": 2.8108885937691467e-05, "loss": 2.7533, "step": 132200 }, { "epoch": 0.44, "learning_rate": 2.8092326849898493e-05, "loss": 2.7619, "step": 132300 }, { "epoch": 0.44, "learning_rate": 2.807576776210552e-05, "loss": 2.7684, "step": 132400 }, { "epoch": 0.44, "learning_rate": 2.8059208674312547e-05, "loss": 2.7523, "step": 132500 }, { "epoch": 0.44, "learning_rate": 2.804264958651958e-05, "loss": 2.7546, "step": 132600 }, { "epoch": 0.44, "learning_rate": 2.8026090498726608e-05, "loss": 2.7422, "step": 132700 }, { "epoch": 0.44, "learning_rate": 2.8009531410933638e-05, "loss": 2.739, "step": 132800 }, { "epoch": 0.44, "learning_rate": 2.7992972323140665e-05, "loss": 2.7486, "step": 132900 }, { "epoch": 0.44, "learning_rate": 2.7976413235347692e-05, "loss": 2.7702, "step": 133000 }, { "epoch": 0.44, "learning_rate": 2.795985414755472e-05, "loss": 2.757, "step": 133100 }, { "epoch": 0.44, "learning_rate": 2.794329505976175e-05, "loss": 2.753, "step": 133200 }, { "epoch": 0.44, "learning_rate": 2.7926735971968776e-05, "loss": 2.7609, "step": 133300 }, { "epoch": 0.44, "learning_rate": 2.7910176884175803e-05, "loss": 2.7548, "step": 133400 }, { "epoch": 0.44, "learning_rate": 2.789361779638283e-05, "loss": 2.7507, "step": 133500 }, { "epoch": 0.44, "learning_rate": 2.7877058708589864e-05, "loss": 2.7667, "step": 133600 }, { "epoch": 0.44, "learning_rate": 2.7860499620796894e-05, "loss": 2.7481, "step": 133700 }, { "epoch": 0.44, "learning_rate": 2.784394053300392e-05, "loss": 2.7568, "step": 133800 }, { "epoch": 0.44, "learning_rate": 2.7827381445210948e-05, "loss": 2.7523, "step": 133900 }, { "epoch": 0.44, "learning_rate": 2.7810822357417975e-05, "loss": 2.7501, "step": 134000 }, { "epoch": 0.44, "learning_rate": 2.7794263269625005e-05, "loss": 2.7641, "step": 134100 }, { "epoch": 0.44, "learning_rate": 2.7777704181832032e-05, "loss": 2.7577, "step": 134200 }, { "epoch": 0.44, "learning_rate": 2.776114509403906e-05, "loss": 2.7708, "step": 134300 }, { "epoch": 0.45, "learning_rate": 2.7744586006246086e-05, "loss": 2.7554, "step": 134400 }, { "epoch": 0.45, "learning_rate": 2.7728026918453116e-05, "loss": 2.7637, "step": 134500 }, { "epoch": 0.45, "learning_rate": 2.771146783066015e-05, "loss": 2.7588, "step": 134600 }, { "epoch": 0.45, "learning_rate": 2.7694908742867177e-05, "loss": 2.7354, "step": 134700 }, { "epoch": 0.45, "learning_rate": 2.7678349655074203e-05, "loss": 2.7412, "step": 134800 }, { "epoch": 0.45, "learning_rate": 2.766179056728123e-05, "loss": 2.7553, "step": 134900 }, { "epoch": 0.45, "learning_rate": 2.764523147948826e-05, "loss": 2.7737, "step": 135000 }, { "epoch": 0.45, "learning_rate": 2.7628672391695288e-05, "loss": 2.7591, "step": 135100 }, { "epoch": 0.45, "learning_rate": 2.7612113303902315e-05, "loss": 2.7673, "step": 135200 }, { "epoch": 0.45, "learning_rate": 2.759555421610934e-05, "loss": 2.7629, "step": 135300 }, { "epoch": 0.45, "learning_rate": 2.7578995128316372e-05, "loss": 2.7573, "step": 135400 }, { "epoch": 0.45, "learning_rate": 2.75624360405234e-05, "loss": 2.7672, "step": 135500 }, { "epoch": 0.45, "learning_rate": 2.7545876952730432e-05, "loss": 2.7616, "step": 135600 }, { "epoch": 0.45, "learning_rate": 2.752931786493746e-05, "loss": 2.7643, "step": 135700 }, { "epoch": 0.45, "learning_rate": 2.7512758777144486e-05, "loss": 2.7573, "step": 135800 }, { "epoch": 0.45, "learning_rate": 2.7496199689351516e-05, "loss": 2.7671, "step": 135900 }, { "epoch": 0.45, "learning_rate": 2.7479640601558543e-05, "loss": 2.7442, "step": 136000 }, { "epoch": 0.45, "learning_rate": 2.746308151376557e-05, "loss": 2.7525, "step": 136100 }, { "epoch": 0.45, "learning_rate": 2.7446522425972597e-05, "loss": 2.7564, "step": 136200 }, { "epoch": 0.45, "learning_rate": 2.7429963338179627e-05, "loss": 2.7669, "step": 136300 }, { "epoch": 0.45, "learning_rate": 2.7413404250386654e-05, "loss": 2.7549, "step": 136400 }, { "epoch": 0.45, "learning_rate": 2.739684516259368e-05, "loss": 2.7513, "step": 136500 }, { "epoch": 0.45, "learning_rate": 2.7380286074800715e-05, "loss": 2.7709, "step": 136600 }, { "epoch": 0.45, "learning_rate": 2.7363726987007742e-05, "loss": 2.76, "step": 136700 }, { "epoch": 0.45, "learning_rate": 2.7347167899214772e-05, "loss": 2.7617, "step": 136800 }, { "epoch": 0.45, "learning_rate": 2.73306088114218e-05, "loss": 2.7655, "step": 136900 }, { "epoch": 0.45, "learning_rate": 2.7314049723628826e-05, "loss": 2.758, "step": 137000 }, { "epoch": 0.45, "learning_rate": 2.7297490635835853e-05, "loss": 2.7473, "step": 137100 }, { "epoch": 0.45, "learning_rate": 2.7280931548042883e-05, "loss": 2.7639, "step": 137200 }, { "epoch": 0.45, "learning_rate": 2.726437246024991e-05, "loss": 2.7329, "step": 137300 }, { "epoch": 0.46, "learning_rate": 2.7247813372456937e-05, "loss": 2.738, "step": 137400 }, { "epoch": 0.46, "learning_rate": 2.7231254284663964e-05, "loss": 2.7539, "step": 137500 }, { "epoch": 0.46, "learning_rate": 2.7214695196870998e-05, "loss": 2.7639, "step": 137600 }, { "epoch": 0.46, "learning_rate": 2.7198136109078025e-05, "loss": 2.7594, "step": 137700 }, { "epoch": 0.46, "learning_rate": 2.7181577021285055e-05, "loss": 2.7552, "step": 137800 }, { "epoch": 0.46, "learning_rate": 2.7165017933492082e-05, "loss": 2.7571, "step": 137900 }, { "epoch": 0.46, "learning_rate": 2.714845884569911e-05, "loss": 2.7518, "step": 138000 }, { "epoch": 0.46, "learning_rate": 2.713189975790614e-05, "loss": 2.7523, "step": 138100 }, { "epoch": 0.46, "learning_rate": 2.7115340670113166e-05, "loss": 2.7529, "step": 138200 }, { "epoch": 0.46, "learning_rate": 2.7098781582320193e-05, "loss": 2.7553, "step": 138300 }, { "epoch": 0.46, "learning_rate": 2.708222249452722e-05, "loss": 2.7652, "step": 138400 }, { "epoch": 0.46, "learning_rate": 2.706566340673425e-05, "loss": 2.7662, "step": 138500 }, { "epoch": 0.46, "learning_rate": 2.704910431894128e-05, "loss": 2.7495, "step": 138600 }, { "epoch": 0.46, "learning_rate": 2.703254523114831e-05, "loss": 2.7556, "step": 138700 }, { "epoch": 0.46, "learning_rate": 2.7015986143355338e-05, "loss": 2.7361, "step": 138800 }, { "epoch": 0.46, "learning_rate": 2.6999427055562364e-05, "loss": 2.7461, "step": 138900 }, { "epoch": 0.46, "learning_rate": 2.698286796776939e-05, "loss": 2.7653, "step": 139000 }, { "epoch": 0.46, "learning_rate": 2.696630887997642e-05, "loss": 2.7568, "step": 139100 }, { "epoch": 0.46, "learning_rate": 2.694974979218345e-05, "loss": 2.7588, "step": 139200 }, { "epoch": 0.46, "learning_rate": 2.6933190704390475e-05, "loss": 2.758, "step": 139300 }, { "epoch": 0.46, "learning_rate": 2.6916631616597506e-05, "loss": 2.7678, "step": 139400 }, { "epoch": 0.46, "learning_rate": 2.6900072528804533e-05, "loss": 2.7525, "step": 139500 }, { "epoch": 0.46, "learning_rate": 2.6883513441011566e-05, "loss": 2.7535, "step": 139600 }, { "epoch": 0.46, "learning_rate": 2.6866954353218593e-05, "loss": 2.7437, "step": 139700 }, { "epoch": 0.46, "learning_rate": 2.685039526542562e-05, "loss": 2.7405, "step": 139800 }, { "epoch": 0.46, "learning_rate": 2.6833836177632647e-05, "loss": 2.7649, "step": 139900 }, { "epoch": 0.46, "learning_rate": 2.6817277089839677e-05, "loss": 2.7471, "step": 140000 }, { "epoch": 0.46, "learning_rate": 2.6800718002046704e-05, "loss": 2.7658, "step": 140100 }, { "epoch": 0.46, "learning_rate": 2.678415891425373e-05, "loss": 2.7554, "step": 140200 }, { "epoch": 0.46, "learning_rate": 2.6767599826460758e-05, "loss": 2.7472, "step": 140300 }, { "epoch": 0.46, "learning_rate": 2.675104073866779e-05, "loss": 2.7463, "step": 140400 }, { "epoch": 0.47, "learning_rate": 2.6734481650874815e-05, "loss": 2.7655, "step": 140500 }, { "epoch": 0.47, "learning_rate": 2.671792256308185e-05, "loss": 2.754, "step": 140600 }, { "epoch": 0.47, "learning_rate": 2.6701363475288876e-05, "loss": 2.7632, "step": 140700 }, { "epoch": 0.47, "learning_rate": 2.6684804387495903e-05, "loss": 2.7481, "step": 140800 }, { "epoch": 0.47, "learning_rate": 2.6668245299702933e-05, "loss": 2.7548, "step": 140900 }, { "epoch": 0.47, "learning_rate": 2.665168621190996e-05, "loss": 2.7708, "step": 141000 }, { "epoch": 0.47, "learning_rate": 2.6635127124116987e-05, "loss": 2.7772, "step": 141100 }, { "epoch": 0.47, "learning_rate": 2.6618568036324014e-05, "loss": 2.7578, "step": 141200 }, { "epoch": 0.47, "learning_rate": 2.6602008948531044e-05, "loss": 2.7649, "step": 141300 }, { "epoch": 0.47, "learning_rate": 2.658544986073807e-05, "loss": 2.768, "step": 141400 }, { "epoch": 0.47, "learning_rate": 2.6568890772945098e-05, "loss": 2.7488, "step": 141500 }, { "epoch": 0.47, "learning_rate": 2.6552331685152132e-05, "loss": 2.7581, "step": 141600 }, { "epoch": 0.47, "learning_rate": 2.653577259735916e-05, "loss": 2.7477, "step": 141700 }, { "epoch": 0.47, "learning_rate": 2.651921350956619e-05, "loss": 2.7524, "step": 141800 }, { "epoch": 0.47, "learning_rate": 2.6502654421773216e-05, "loss": 2.7611, "step": 141900 }, { "epoch": 0.47, "learning_rate": 2.6486095333980243e-05, "loss": 2.7412, "step": 142000 }, { "epoch": 0.47, "learning_rate": 2.646953624618727e-05, "loss": 2.7678, "step": 142100 }, { "epoch": 0.47, "learning_rate": 2.64529771583943e-05, "loss": 2.775, "step": 142200 }, { "epoch": 0.47, "learning_rate": 2.6436418070601327e-05, "loss": 2.7517, "step": 142300 }, { "epoch": 0.47, "learning_rate": 2.6419858982808354e-05, "loss": 2.7551, "step": 142400 }, { "epoch": 0.47, "learning_rate": 2.640329989501538e-05, "loss": 2.7571, "step": 142500 }, { "epoch": 0.47, "learning_rate": 2.6386740807222414e-05, "loss": 2.7454, "step": 142600 }, { "epoch": 0.47, "learning_rate": 2.6370181719429445e-05, "loss": 2.7501, "step": 142700 }, { "epoch": 0.47, "learning_rate": 2.635362263163647e-05, "loss": 2.7465, "step": 142800 }, { "epoch": 0.47, "learning_rate": 2.63370635438435e-05, "loss": 2.7412, "step": 142900 }, { "epoch": 0.47, "learning_rate": 2.6320504456050525e-05, "loss": 2.745, "step": 143000 }, { "epoch": 0.47, "learning_rate": 2.6303945368257556e-05, "loss": 2.7597, "step": 143100 }, { "epoch": 0.47, "learning_rate": 2.6287386280464583e-05, "loss": 2.746, "step": 143200 }, { "epoch": 0.47, "learning_rate": 2.627082719267161e-05, "loss": 2.7431, "step": 143300 }, { "epoch": 0.47, "learning_rate": 2.6254268104878636e-05, "loss": 2.7406, "step": 143400 }, { "epoch": 0.48, "learning_rate": 2.6237709017085667e-05, "loss": 2.7644, "step": 143500 }, { "epoch": 0.48, "learning_rate": 2.62211499292927e-05, "loss": 2.7502, "step": 143600 }, { "epoch": 0.48, "learning_rate": 2.6204590841499727e-05, "loss": 2.7523, "step": 143700 }, { "epoch": 0.48, "learning_rate": 2.6188031753706754e-05, "loss": 2.7476, "step": 143800 }, { "epoch": 0.48, "learning_rate": 2.617147266591378e-05, "loss": 2.7528, "step": 143900 }, { "epoch": 0.48, "learning_rate": 2.615491357812081e-05, "loss": 2.7566, "step": 144000 }, { "epoch": 0.48, "learning_rate": 2.613835449032784e-05, "loss": 2.756, "step": 144100 }, { "epoch": 0.48, "learning_rate": 2.6121795402534865e-05, "loss": 2.7496, "step": 144200 }, { "epoch": 0.48, "learning_rate": 2.6105236314741892e-05, "loss": 2.7507, "step": 144300 }, { "epoch": 0.48, "learning_rate": 2.6088677226948922e-05, "loss": 2.7564, "step": 144400 }, { "epoch": 0.48, "learning_rate": 2.607211813915595e-05, "loss": 2.76, "step": 144500 }, { "epoch": 0.48, "learning_rate": 2.6055559051362983e-05, "loss": 2.7514, "step": 144600 }, { "epoch": 0.48, "learning_rate": 2.603899996357001e-05, "loss": 2.7562, "step": 144700 }, { "epoch": 0.48, "learning_rate": 2.6022440875777037e-05, "loss": 2.7583, "step": 144800 }, { "epoch": 0.48, "learning_rate": 2.6005881787984064e-05, "loss": 2.7492, "step": 144900 }, { "epoch": 0.48, "learning_rate": 2.5989322700191094e-05, "loss": 2.7662, "step": 145000 }, { "epoch": 0.48, "learning_rate": 2.597276361239812e-05, "loss": 2.7653, "step": 145100 }, { "epoch": 0.48, "learning_rate": 2.5956204524605148e-05, "loss": 2.7465, "step": 145200 }, { "epoch": 0.48, "learning_rate": 2.5939645436812178e-05, "loss": 2.7628, "step": 145300 }, { "epoch": 0.48, "learning_rate": 2.5923086349019205e-05, "loss": 2.7586, "step": 145400 }, { "epoch": 0.48, "learning_rate": 2.5906527261226232e-05, "loss": 2.7376, "step": 145500 }, { "epoch": 0.48, "learning_rate": 2.5889968173433266e-05, "loss": 2.7586, "step": 145600 }, { "epoch": 0.48, "learning_rate": 2.5873409085640293e-05, "loss": 2.7498, "step": 145700 }, { "epoch": 0.48, "learning_rate": 2.585684999784732e-05, "loss": 2.7767, "step": 145800 }, { "epoch": 0.48, "learning_rate": 2.584029091005435e-05, "loss": 2.7533, "step": 145900 }, { "epoch": 0.48, "learning_rate": 2.5823731822261377e-05, "loss": 2.7485, "step": 146000 }, { "epoch": 0.48, "learning_rate": 2.5807172734468404e-05, "loss": 2.769, "step": 146100 }, { "epoch": 0.48, "learning_rate": 2.579061364667543e-05, "loss": 2.7463, "step": 146200 }, { "epoch": 0.48, "learning_rate": 2.577405455888246e-05, "loss": 2.7542, "step": 146300 }, { "epoch": 0.48, "learning_rate": 2.5757495471089488e-05, "loss": 2.7532, "step": 146400 }, { "epoch": 0.49, "learning_rate": 2.5740936383296515e-05, "loss": 2.7563, "step": 146500 }, { "epoch": 0.49, "learning_rate": 2.572437729550355e-05, "loss": 2.7542, "step": 146600 }, { "epoch": 0.49, "learning_rate": 2.5707818207710575e-05, "loss": 2.7628, "step": 146700 }, { "epoch": 0.49, "learning_rate": 2.5691259119917606e-05, "loss": 2.7446, "step": 146800 }, { "epoch": 0.49, "learning_rate": 2.5674700032124633e-05, "loss": 2.749, "step": 146900 }, { "epoch": 0.49, "learning_rate": 2.565814094433166e-05, "loss": 2.7578, "step": 147000 }, { "epoch": 0.49, "learning_rate": 2.5641581856538686e-05, "loss": 2.7656, "step": 147100 }, { "epoch": 0.49, "learning_rate": 2.5625022768745717e-05, "loss": 2.7646, "step": 147200 }, { "epoch": 0.49, "learning_rate": 2.5608463680952744e-05, "loss": 2.731, "step": 147300 }, { "epoch": 0.49, "learning_rate": 2.559190459315977e-05, "loss": 2.7534, "step": 147400 }, { "epoch": 0.49, "learning_rate": 2.5575345505366797e-05, "loss": 2.7461, "step": 147500 }, { "epoch": 0.49, "learning_rate": 2.555878641757383e-05, "loss": 2.7473, "step": 147600 }, { "epoch": 0.49, "learning_rate": 2.554222732978086e-05, "loss": 2.7605, "step": 147700 }, { "epoch": 0.49, "learning_rate": 2.552566824198789e-05, "loss": 2.7654, "step": 147800 }, { "epoch": 0.49, "learning_rate": 2.5509109154194915e-05, "loss": 2.7463, "step": 147900 }, { "epoch": 0.49, "learning_rate": 2.5492550066401942e-05, "loss": 2.7581, "step": 148000 }, { "epoch": 0.49, "learning_rate": 2.5475990978608972e-05, "loss": 2.7486, "step": 148100 }, { "epoch": 0.49, "learning_rate": 2.5459431890816e-05, "loss": 2.7464, "step": 148200 }, { "epoch": 0.49, "learning_rate": 2.5442872803023026e-05, "loss": 2.7435, "step": 148300 }, { "epoch": 0.49, "learning_rate": 2.5426313715230053e-05, "loss": 2.7494, "step": 148400 }, { "epoch": 0.49, "learning_rate": 2.5409754627437083e-05, "loss": 2.7672, "step": 148500 }, { "epoch": 0.49, "learning_rate": 2.5393195539644117e-05, "loss": 2.7532, "step": 148600 }, { "epoch": 0.49, "learning_rate": 2.5376636451851144e-05, "loss": 2.7632, "step": 148700 }, { "epoch": 0.49, "learning_rate": 2.536007736405817e-05, "loss": 2.7478, "step": 148800 }, { "epoch": 0.49, "learning_rate": 2.5343518276265198e-05, "loss": 2.732, "step": 148900 }, { "epoch": 0.49, "learning_rate": 2.5326959188472228e-05, "loss": 2.7473, "step": 149000 }, { "epoch": 0.49, "learning_rate": 2.5310400100679255e-05, "loss": 2.7461, "step": 149100 }, { "epoch": 0.49, "learning_rate": 2.5293841012886282e-05, "loss": 2.7607, "step": 149200 }, { "epoch": 0.49, "learning_rate": 2.527728192509331e-05, "loss": 2.7626, "step": 149300 }, { "epoch": 0.49, "learning_rate": 2.526072283730034e-05, "loss": 2.7515, "step": 149400 }, { "epoch": 0.5, "learning_rate": 2.5244163749507366e-05, "loss": 2.7182, "step": 149500 }, { "epoch": 0.5, "learning_rate": 2.52276046617144e-05, "loss": 2.7523, "step": 149600 }, { "epoch": 0.5, "learning_rate": 2.5211045573921427e-05, "loss": 2.7541, "step": 149700 }, { "epoch": 0.5, "learning_rate": 2.5194486486128454e-05, "loss": 2.7619, "step": 149800 }, { "epoch": 0.5, "learning_rate": 2.5177927398335484e-05, "loss": 2.7523, "step": 149900 }, { "epoch": 0.5, "learning_rate": 2.516136831054251e-05, "loss": 2.7475, "step": 150000 }, { "epoch": 0.5, "learning_rate": 2.5144809222749538e-05, "loss": 2.7466, "step": 150100 }, { "epoch": 0.5, "learning_rate": 2.5128250134956565e-05, "loss": 2.7518, "step": 150200 }, { "epoch": 0.5, "learning_rate": 2.5111691047163595e-05, "loss": 2.7484, "step": 150300 }, { "epoch": 0.5, "learning_rate": 2.5095131959370622e-05, "loss": 2.745, "step": 150400 }, { "epoch": 0.5, "learning_rate": 2.507857287157765e-05, "loss": 2.7493, "step": 150500 }, { "epoch": 0.5, "learning_rate": 2.5062013783784682e-05, "loss": 2.75, "step": 150600 }, { "epoch": 0.5, "learning_rate": 2.504545469599171e-05, "loss": 2.7496, "step": 150700 }, { "epoch": 0.5, "learning_rate": 2.502889560819874e-05, "loss": 2.7602, "step": 150800 }, { "epoch": 0.5, "learning_rate": 2.5012336520405767e-05, "loss": 2.755, "step": 150900 }, { "epoch": 0.5, "learning_rate": 2.4995777432612794e-05, "loss": 2.7668, "step": 151000 }, { "epoch": 0.5, "learning_rate": 2.497921834481982e-05, "loss": 2.7389, "step": 151100 }, { "epoch": 0.5, "learning_rate": 2.496265925702685e-05, "loss": 2.739, "step": 151200 }, { "epoch": 0.5, "learning_rate": 2.494610016923388e-05, "loss": 2.7475, "step": 151300 }, { "epoch": 0.5, "learning_rate": 2.4929541081440908e-05, "loss": 2.7705, "step": 151400 }, { "epoch": 0.5, "learning_rate": 2.4912981993647935e-05, "loss": 2.7449, "step": 151500 }, { "epoch": 0.5, "learning_rate": 2.4896422905854962e-05, "loss": 2.7536, "step": 151600 }, { "epoch": 0.5, "learning_rate": 2.4879863818061992e-05, "loss": 2.7573, "step": 151700 }, { "epoch": 0.5, "learning_rate": 2.4863304730269022e-05, "loss": 2.7434, "step": 151800 }, { "epoch": 0.5, "learning_rate": 2.484674564247605e-05, "loss": 2.7633, "step": 151900 }, { "epoch": 0.5, "learning_rate": 2.4830186554683076e-05, "loss": 2.7484, "step": 152000 }, { "epoch": 0.5, "learning_rate": 2.4813627466890103e-05, "loss": 2.7682, "step": 152100 }, { "epoch": 0.5, "learning_rate": 2.4797068379097133e-05, "loss": 2.7473, "step": 152200 }, { "epoch": 0.5, "learning_rate": 2.4780509291304164e-05, "loss": 2.7521, "step": 152300 }, { "epoch": 0.5, "learning_rate": 2.476395020351119e-05, "loss": 2.756, "step": 152400 }, { "epoch": 0.51, "learning_rate": 2.4747391115718218e-05, "loss": 2.7471, "step": 152500 }, { "epoch": 0.51, "learning_rate": 2.4730832027925248e-05, "loss": 2.7623, "step": 152600 }, { "epoch": 0.51, "learning_rate": 2.4714272940132275e-05, "loss": 2.752, "step": 152700 }, { "epoch": 0.51, "learning_rate": 2.4697713852339305e-05, "loss": 2.7477, "step": 152800 }, { "epoch": 0.51, "learning_rate": 2.4681154764546332e-05, "loss": 2.7503, "step": 152900 }, { "epoch": 0.51, "learning_rate": 2.466459567675336e-05, "loss": 2.7422, "step": 153000 }, { "epoch": 0.51, "learning_rate": 2.464803658896039e-05, "loss": 2.7497, "step": 153100 }, { "epoch": 0.51, "learning_rate": 2.4631477501167416e-05, "loss": 2.7603, "step": 153200 }, { "epoch": 0.51, "learning_rate": 2.4614918413374446e-05, "loss": 2.7386, "step": 153300 }, { "epoch": 0.51, "learning_rate": 2.4598359325581473e-05, "loss": 2.7537, "step": 153400 }, { "epoch": 0.51, "learning_rate": 2.4581800237788504e-05, "loss": 2.7566, "step": 153500 }, { "epoch": 0.51, "learning_rate": 2.456524114999553e-05, "loss": 2.7501, "step": 153600 }, { "epoch": 0.51, "learning_rate": 2.4548682062202557e-05, "loss": 2.7485, "step": 153700 }, { "epoch": 0.51, "learning_rate": 2.4532122974409588e-05, "loss": 2.7623, "step": 153800 }, { "epoch": 0.51, "learning_rate": 2.4515563886616615e-05, "loss": 2.7502, "step": 153900 }, { "epoch": 0.51, "learning_rate": 2.4499004798823645e-05, "loss": 2.7534, "step": 154000 }, { "epoch": 0.51, "learning_rate": 2.4482445711030672e-05, "loss": 2.7535, "step": 154100 }, { "epoch": 0.51, "learning_rate": 2.44658866232377e-05, "loss": 2.7581, "step": 154200 }, { "epoch": 0.51, "learning_rate": 2.444932753544473e-05, "loss": 2.7491, "step": 154300 }, { "epoch": 0.51, "learning_rate": 2.443276844765176e-05, "loss": 2.759, "step": 154400 }, { "epoch": 0.51, "learning_rate": 2.4416209359858786e-05, "loss": 2.7406, "step": 154500 }, { "epoch": 0.51, "learning_rate": 2.4399650272065813e-05, "loss": 2.7256, "step": 154600 }, { "epoch": 0.51, "learning_rate": 2.438309118427284e-05, "loss": 2.7426, "step": 154700 }, { "epoch": 0.51, "learning_rate": 2.436653209647987e-05, "loss": 2.7443, "step": 154800 }, { "epoch": 0.51, "learning_rate": 2.43499730086869e-05, "loss": 2.7427, "step": 154900 }, { "epoch": 0.51, "learning_rate": 2.4333413920893928e-05, "loss": 2.7406, "step": 155000 }, { "epoch": 0.51, "learning_rate": 2.4316854833100954e-05, "loss": 2.7413, "step": 155100 }, { "epoch": 0.51, "learning_rate": 2.430029574530798e-05, "loss": 2.7546, "step": 155200 }, { "epoch": 0.51, "learning_rate": 2.4283736657515012e-05, "loss": 2.7549, "step": 155300 }, { "epoch": 0.51, "learning_rate": 2.4267177569722042e-05, "loss": 2.7449, "step": 155400 }, { "epoch": 0.51, "learning_rate": 2.425061848192907e-05, "loss": 2.7493, "step": 155500 }, { "epoch": 0.52, "learning_rate": 2.4234059394136096e-05, "loss": 2.7444, "step": 155600 }, { "epoch": 0.52, "learning_rate": 2.4217500306343123e-05, "loss": 2.7492, "step": 155700 }, { "epoch": 0.52, "learning_rate": 2.4200941218550156e-05, "loss": 2.7444, "step": 155800 }, { "epoch": 0.52, "learning_rate": 2.4184382130757183e-05, "loss": 2.7415, "step": 155900 }, { "epoch": 0.52, "learning_rate": 2.416782304296421e-05, "loss": 2.7471, "step": 156000 }, { "epoch": 0.52, "learning_rate": 2.4151263955171237e-05, "loss": 2.7594, "step": 156100 }, { "epoch": 0.52, "learning_rate": 2.4134704867378267e-05, "loss": 2.7685, "step": 156200 }, { "epoch": 0.52, "learning_rate": 2.4118145779585298e-05, "loss": 2.7384, "step": 156300 }, { "epoch": 0.52, "learning_rate": 2.4101586691792325e-05, "loss": 2.7496, "step": 156400 }, { "epoch": 0.52, "learning_rate": 2.408502760399935e-05, "loss": 2.7602, "step": 156500 }, { "epoch": 0.52, "learning_rate": 2.406846851620638e-05, "loss": 2.7505, "step": 156600 }, { "epoch": 0.52, "learning_rate": 2.405190942841341e-05, "loss": 2.7545, "step": 156700 }, { "epoch": 0.52, "learning_rate": 2.403535034062044e-05, "loss": 2.732, "step": 156800 }, { "epoch": 0.52, "learning_rate": 2.4018791252827466e-05, "loss": 2.7503, "step": 156900 }, { "epoch": 0.52, "learning_rate": 2.4002232165034493e-05, "loss": 2.7363, "step": 157000 }, { "epoch": 0.52, "learning_rate": 2.3985673077241523e-05, "loss": 2.75, "step": 157100 }, { "epoch": 0.52, "learning_rate": 2.396911398944855e-05, "loss": 2.738, "step": 157200 }, { "epoch": 0.52, "learning_rate": 2.395255490165558e-05, "loss": 2.7571, "step": 157300 }, { "epoch": 0.52, "learning_rate": 2.3935995813862607e-05, "loss": 2.7663, "step": 157400 }, { "epoch": 0.52, "learning_rate": 2.3919436726069634e-05, "loss": 2.7488, "step": 157500 }, { "epoch": 0.52, "learning_rate": 2.3902877638276665e-05, "loss": 2.7504, "step": 157600 }, { "epoch": 0.52, "learning_rate": 2.388631855048369e-05, "loss": 2.7441, "step": 157700 }, { "epoch": 0.52, "learning_rate": 2.3869759462690722e-05, "loss": 2.7415, "step": 157800 }, { "epoch": 0.52, "learning_rate": 2.385320037489775e-05, "loss": 2.7663, "step": 157900 }, { "epoch": 0.52, "learning_rate": 2.383664128710478e-05, "loss": 2.7357, "step": 158000 }, { "epoch": 0.52, "learning_rate": 2.3820082199311806e-05, "loss": 2.736, "step": 158100 }, { "epoch": 0.52, "learning_rate": 2.3803523111518833e-05, "loss": 2.7418, "step": 158200 }, { "epoch": 0.52, "learning_rate": 2.3786964023725863e-05, "loss": 2.7541, "step": 158300 }, { "epoch": 0.52, "learning_rate": 2.377040493593289e-05, "loss": 2.7487, "step": 158400 }, { "epoch": 0.52, "learning_rate": 2.375384584813992e-05, "loss": 2.7572, "step": 158500 }, { "epoch": 0.53, "learning_rate": 2.3737286760346947e-05, "loss": 2.7491, "step": 158600 }, { "epoch": 0.53, "learning_rate": 2.3720727672553974e-05, "loss": 2.7457, "step": 158700 }, { "epoch": 0.53, "learning_rate": 2.3704168584761004e-05, "loss": 2.7538, "step": 158800 }, { "epoch": 0.53, "learning_rate": 2.368760949696803e-05, "loss": 2.7595, "step": 158900 }, { "epoch": 0.53, "learning_rate": 2.367105040917506e-05, "loss": 2.7432, "step": 159000 }, { "epoch": 0.53, "learning_rate": 2.365449132138209e-05, "loss": 2.7392, "step": 159100 }, { "epoch": 0.53, "learning_rate": 2.3637932233589115e-05, "loss": 2.7461, "step": 159200 }, { "epoch": 0.53, "learning_rate": 2.3621373145796146e-05, "loss": 2.759, "step": 159300 }, { "epoch": 0.53, "learning_rate": 2.3604814058003176e-05, "loss": 2.7552, "step": 159400 }, { "epoch": 0.53, "learning_rate": 2.3588254970210203e-05, "loss": 2.756, "step": 159500 }, { "epoch": 0.53, "learning_rate": 2.357169588241723e-05, "loss": 2.7384, "step": 159600 }, { "epoch": 0.53, "learning_rate": 2.3555136794624257e-05, "loss": 2.7421, "step": 159700 }, { "epoch": 0.53, "learning_rate": 2.3538577706831287e-05, "loss": 2.7392, "step": 159800 }, { "epoch": 0.53, "learning_rate": 2.3522018619038317e-05, "loss": 2.7529, "step": 159900 }, { "epoch": 0.53, "learning_rate": 2.3505459531245344e-05, "loss": 2.7453, "step": 160000 }, { "epoch": 0.53, "learning_rate": 2.348890044345237e-05, "loss": 2.7512, "step": 160100 }, { "epoch": 0.53, "learning_rate": 2.3472341355659398e-05, "loss": 2.7456, "step": 160200 }, { "epoch": 0.53, "learning_rate": 2.3455782267866432e-05, "loss": 2.7468, "step": 160300 }, { "epoch": 0.53, "learning_rate": 2.343922318007346e-05, "loss": 2.7498, "step": 160400 }, { "epoch": 0.53, "learning_rate": 2.3422664092280486e-05, "loss": 2.766, "step": 160500 }, { "epoch": 0.53, "learning_rate": 2.3406105004487513e-05, "loss": 2.741, "step": 160600 }, { "epoch": 0.53, "learning_rate": 2.3389545916694543e-05, "loss": 2.7401, "step": 160700 }, { "epoch": 0.53, "learning_rate": 2.3372986828901573e-05, "loss": 2.7291, "step": 160800 }, { "epoch": 0.53, "learning_rate": 2.33564277411086e-05, "loss": 2.7573, "step": 160900 }, { "epoch": 0.53, "learning_rate": 2.3339868653315627e-05, "loss": 2.7449, "step": 161000 }, { "epoch": 0.53, "learning_rate": 2.3323309565522654e-05, "loss": 2.7527, "step": 161100 }, { "epoch": 0.53, "learning_rate": 2.3306750477729684e-05, "loss": 2.7485, "step": 161200 }, { "epoch": 0.53, "learning_rate": 2.3290191389936714e-05, "loss": 2.7476, "step": 161300 }, { "epoch": 0.53, "learning_rate": 2.327363230214374e-05, "loss": 2.7679, "step": 161400 }, { "epoch": 0.53, "learning_rate": 2.3257073214350768e-05, "loss": 2.7428, "step": 161500 }, { "epoch": 0.54, "learning_rate": 2.32405141265578e-05, "loss": 2.7318, "step": 161600 }, { "epoch": 0.54, "learning_rate": 2.3223955038764826e-05, "loss": 2.7465, "step": 161700 }, { "epoch": 0.54, "learning_rate": 2.3207395950971856e-05, "loss": 2.74, "step": 161800 }, { "epoch": 0.54, "learning_rate": 2.3190836863178883e-05, "loss": 2.7525, "step": 161900 }, { "epoch": 0.54, "learning_rate": 2.317427777538591e-05, "loss": 2.7559, "step": 162000 }, { "epoch": 0.54, "learning_rate": 2.315771868759294e-05, "loss": 2.7388, "step": 162100 }, { "epoch": 0.54, "learning_rate": 2.3141159599799967e-05, "loss": 2.752, "step": 162200 }, { "epoch": 0.54, "learning_rate": 2.3124600512006997e-05, "loss": 2.7547, "step": 162300 }, { "epoch": 0.54, "learning_rate": 2.3108041424214024e-05, "loss": 2.7303, "step": 162400 }, { "epoch": 0.54, "learning_rate": 2.309148233642105e-05, "loss": 2.7317, "step": 162500 }, { "epoch": 0.54, "learning_rate": 2.307492324862808e-05, "loss": 2.7413, "step": 162600 }, { "epoch": 0.54, "learning_rate": 2.3058364160835108e-05, "loss": 2.7639, "step": 162700 }, { "epoch": 0.54, "learning_rate": 2.304180507304214e-05, "loss": 2.7692, "step": 162800 }, { "epoch": 0.54, "learning_rate": 2.3025245985249165e-05, "loss": 2.7522, "step": 162900 }, { "epoch": 0.54, "learning_rate": 2.3008686897456196e-05, "loss": 2.7499, "step": 163000 }, { "epoch": 0.54, "learning_rate": 2.2992127809663223e-05, "loss": 2.7452, "step": 163100 }, { "epoch": 0.54, "learning_rate": 2.297556872187025e-05, "loss": 2.7692, "step": 163200 }, { "epoch": 0.54, "learning_rate": 2.295900963407728e-05, "loss": 2.7514, "step": 163300 }, { "epoch": 0.54, "learning_rate": 2.2942450546284307e-05, "loss": 2.7399, "step": 163400 }, { "epoch": 0.54, "learning_rate": 2.2925891458491337e-05, "loss": 2.751, "step": 163500 }, { "epoch": 0.54, "learning_rate": 2.2909332370698364e-05, "loss": 2.7342, "step": 163600 }, { "epoch": 0.54, "learning_rate": 2.289277328290539e-05, "loss": 2.7472, "step": 163700 }, { "epoch": 0.54, "learning_rate": 2.287621419511242e-05, "loss": 2.7626, "step": 163800 }, { "epoch": 0.54, "learning_rate": 2.285965510731945e-05, "loss": 2.7511, "step": 163900 }, { "epoch": 0.54, "learning_rate": 2.284309601952648e-05, "loss": 2.7535, "step": 164000 }, { "epoch": 0.54, "learning_rate": 2.2826536931733505e-05, "loss": 2.7506, "step": 164100 }, { "epoch": 0.54, "learning_rate": 2.2809977843940532e-05, "loss": 2.7679, "step": 164200 }, { "epoch": 0.54, "learning_rate": 2.2793418756147562e-05, "loss": 2.7446, "step": 164300 }, { "epoch": 0.54, "learning_rate": 2.2776859668354593e-05, "loss": 2.7439, "step": 164400 }, { "epoch": 0.54, "learning_rate": 2.276030058056162e-05, "loss": 2.7631, "step": 164500 }, { "epoch": 0.55, "learning_rate": 2.2743741492768647e-05, "loss": 2.7448, "step": 164600 }, { "epoch": 0.55, "learning_rate": 2.2727182404975674e-05, "loss": 2.7571, "step": 164700 }, { "epoch": 0.55, "learning_rate": 2.2710623317182704e-05, "loss": 2.7517, "step": 164800 }, { "epoch": 0.55, "learning_rate": 2.2694064229389734e-05, "loss": 2.7324, "step": 164900 }, { "epoch": 0.55, "learning_rate": 2.267750514159676e-05, "loss": 2.7428, "step": 165000 }, { "epoch": 0.55, "learning_rate": 2.2660946053803788e-05, "loss": 2.7627, "step": 165100 }, { "epoch": 0.55, "learning_rate": 2.2644386966010818e-05, "loss": 2.7404, "step": 165200 }, { "epoch": 0.55, "learning_rate": 2.262782787821785e-05, "loss": 2.7404, "step": 165300 }, { "epoch": 0.55, "learning_rate": 2.2611268790424875e-05, "loss": 2.7552, "step": 165400 }, { "epoch": 0.55, "learning_rate": 2.2594709702631902e-05, "loss": 2.7461, "step": 165500 }, { "epoch": 0.55, "learning_rate": 2.257815061483893e-05, "loss": 2.7497, "step": 165600 }, { "epoch": 0.55, "learning_rate": 2.256159152704596e-05, "loss": 2.75, "step": 165700 }, { "epoch": 0.55, "learning_rate": 2.254503243925299e-05, "loss": 2.7509, "step": 165800 }, { "epoch": 0.55, "learning_rate": 2.2528473351460017e-05, "loss": 2.7442, "step": 165900 }, { "epoch": 0.55, "learning_rate": 2.2511914263667044e-05, "loss": 2.7411, "step": 166000 }, { "epoch": 0.55, "learning_rate": 2.249535517587407e-05, "loss": 2.7603, "step": 166100 }, { "epoch": 0.55, "learning_rate": 2.24787960880811e-05, "loss": 2.7477, "step": 166200 }, { "epoch": 0.55, "learning_rate": 2.246223700028813e-05, "loss": 2.742, "step": 166300 }, { "epoch": 0.55, "learning_rate": 2.2445677912495158e-05, "loss": 2.7428, "step": 166400 }, { "epoch": 0.55, "learning_rate": 2.2429118824702185e-05, "loss": 2.7425, "step": 166500 }, { "epoch": 0.55, "learning_rate": 2.2412559736909215e-05, "loss": 2.7574, "step": 166600 }, { "epoch": 0.55, "learning_rate": 2.2396000649116242e-05, "loss": 2.7359, "step": 166700 }, { "epoch": 0.55, "learning_rate": 2.2379441561323273e-05, "loss": 2.7462, "step": 166800 }, { "epoch": 0.55, "learning_rate": 2.23628824735303e-05, "loss": 2.7471, "step": 166900 }, { "epoch": 0.55, "learning_rate": 2.2346323385737326e-05, "loss": 2.7516, "step": 167000 }, { "epoch": 0.55, "learning_rate": 2.2329764297944357e-05, "loss": 2.7484, "step": 167100 }, { "epoch": 0.55, "learning_rate": 2.2313205210151384e-05, "loss": 2.7419, "step": 167200 }, { "epoch": 0.55, "learning_rate": 2.2296646122358414e-05, "loss": 2.7483, "step": 167300 }, { "epoch": 0.55, "learning_rate": 2.228008703456544e-05, "loss": 2.7379, "step": 167400 }, { "epoch": 0.55, "learning_rate": 2.226352794677247e-05, "loss": 2.7434, "step": 167500 }, { "epoch": 0.56, "learning_rate": 2.2246968858979498e-05, "loss": 2.7443, "step": 167600 }, { "epoch": 0.56, "learning_rate": 2.2230409771186525e-05, "loss": 2.7423, "step": 167700 }, { "epoch": 0.56, "learning_rate": 2.2213850683393555e-05, "loss": 2.7478, "step": 167800 }, { "epoch": 0.56, "learning_rate": 2.2197291595600582e-05, "loss": 2.7371, "step": 167900 }, { "epoch": 0.56, "learning_rate": 2.2180732507807612e-05, "loss": 2.7521, "step": 168000 }, { "epoch": 0.56, "learning_rate": 2.216417342001464e-05, "loss": 2.7481, "step": 168100 }, { "epoch": 0.56, "learning_rate": 2.2147614332221666e-05, "loss": 2.7661, "step": 168200 }, { "epoch": 0.56, "learning_rate": 2.2131055244428697e-05, "loss": 2.7528, "step": 168300 }, { "epoch": 0.56, "learning_rate": 2.2114496156635723e-05, "loss": 2.7454, "step": 168400 }, { "epoch": 0.56, "learning_rate": 2.2097937068842754e-05, "loss": 2.7572, "step": 168500 }, { "epoch": 0.56, "learning_rate": 2.208137798104978e-05, "loss": 2.7555, "step": 168600 }, { "epoch": 0.56, "learning_rate": 2.2064818893256808e-05, "loss": 2.7397, "step": 168700 }, { "epoch": 0.56, "learning_rate": 2.2048259805463838e-05, "loss": 2.7472, "step": 168800 }, { "epoch": 0.56, "learning_rate": 2.2031700717670868e-05, "loss": 2.7525, "step": 168900 }, { "epoch": 0.56, "learning_rate": 2.2015141629877895e-05, "loss": 2.7479, "step": 169000 }, { "epoch": 0.56, "learning_rate": 2.1998582542084922e-05, "loss": 2.7548, "step": 169100 }, { "epoch": 0.56, "learning_rate": 2.198202345429195e-05, "loss": 2.7515, "step": 169200 }, { "epoch": 0.56, "learning_rate": 2.196546436649898e-05, "loss": 2.7598, "step": 169300 }, { "epoch": 0.56, "learning_rate": 2.194890527870601e-05, "loss": 2.7638, "step": 169400 }, { "epoch": 0.56, "learning_rate": 2.1932346190913036e-05, "loss": 2.7554, "step": 169500 }, { "epoch": 0.56, "learning_rate": 2.1915787103120063e-05, "loss": 2.7554, "step": 169600 }, { "epoch": 0.56, "learning_rate": 2.189922801532709e-05, "loss": 2.7186, "step": 169700 }, { "epoch": 0.56, "learning_rate": 2.1882668927534124e-05, "loss": 2.7371, "step": 169800 }, { "epoch": 0.56, "learning_rate": 2.186610983974115e-05, "loss": 2.7606, "step": 169900 }, { "epoch": 0.56, "learning_rate": 2.1849550751948178e-05, "loss": 2.7465, "step": 170000 }, { "epoch": 0.56, "learning_rate": 2.1832991664155205e-05, "loss": 2.7502, "step": 170100 }, { "epoch": 0.56, "learning_rate": 2.1816432576362235e-05, "loss": 2.7428, "step": 170200 }, { "epoch": 0.56, "learning_rate": 2.1799873488569265e-05, "loss": 2.7499, "step": 170300 }, { "epoch": 0.56, "learning_rate": 2.1783314400776292e-05, "loss": 2.7497, "step": 170400 }, { "epoch": 0.56, "learning_rate": 2.176675531298332e-05, "loss": 2.7497, "step": 170500 }, { "epoch": 0.56, "learning_rate": 2.1750196225190346e-05, "loss": 2.7504, "step": 170600 }, { "epoch": 0.57, "learning_rate": 2.1733637137397376e-05, "loss": 2.7567, "step": 170700 }, { "epoch": 0.57, "learning_rate": 2.1717078049604407e-05, "loss": 2.7605, "step": 170800 }, { "epoch": 0.57, "learning_rate": 2.1700518961811433e-05, "loss": 2.7443, "step": 170900 }, { "epoch": 0.57, "learning_rate": 2.168395987401846e-05, "loss": 2.7547, "step": 171000 }, { "epoch": 0.57, "learning_rate": 2.166740078622549e-05, "loss": 2.7432, "step": 171100 }, { "epoch": 0.57, "learning_rate": 2.1650841698432518e-05, "loss": 2.7484, "step": 171200 }, { "epoch": 0.57, "learning_rate": 2.1634282610639548e-05, "loss": 2.7579, "step": 171300 }, { "epoch": 0.57, "learning_rate": 2.1617723522846575e-05, "loss": 2.7375, "step": 171400 }, { "epoch": 0.57, "learning_rate": 2.1601164435053602e-05, "loss": 2.7608, "step": 171500 }, { "epoch": 0.57, "learning_rate": 2.1584605347260632e-05, "loss": 2.7453, "step": 171600 }, { "epoch": 0.57, "learning_rate": 2.156804625946766e-05, "loss": 2.747, "step": 171700 }, { "epoch": 0.57, "learning_rate": 2.155148717167469e-05, "loss": 2.7596, "step": 171800 }, { "epoch": 0.57, "learning_rate": 2.1534928083881716e-05, "loss": 2.7526, "step": 171900 }, { "epoch": 0.57, "learning_rate": 2.1518368996088743e-05, "loss": 2.7525, "step": 172000 }, { "epoch": 0.57, "learning_rate": 2.1501809908295773e-05, "loss": 2.7486, "step": 172100 }, { "epoch": 0.57, "learning_rate": 2.14852508205028e-05, "loss": 2.7473, "step": 172200 }, { "epoch": 0.57, "learning_rate": 2.146869173270983e-05, "loss": 2.7536, "step": 172300 }, { "epoch": 0.57, "learning_rate": 2.1452132644916857e-05, "loss": 2.7546, "step": 172400 }, { "epoch": 0.57, "learning_rate": 2.1435573557123888e-05, "loss": 2.734, "step": 172500 }, { "epoch": 0.57, "learning_rate": 2.1419014469330915e-05, "loss": 2.7437, "step": 172600 }, { "epoch": 0.57, "learning_rate": 2.140245538153794e-05, "loss": 2.7471, "step": 172700 }, { "epoch": 0.57, "learning_rate": 2.1385896293744972e-05, "loss": 2.7469, "step": 172800 }, { "epoch": 0.57, "learning_rate": 2.1369337205952e-05, "loss": 2.759, "step": 172900 }, { "epoch": 0.57, "learning_rate": 2.135277811815903e-05, "loss": 2.756, "step": 173000 }, { "epoch": 0.57, "learning_rate": 2.1336219030366056e-05, "loss": 2.745, "step": 173100 }, { "epoch": 0.57, "learning_rate": 2.1319659942573083e-05, "loss": 2.7487, "step": 173200 }, { "epoch": 0.57, "learning_rate": 2.1303100854780113e-05, "loss": 2.7487, "step": 173300 }, { "epoch": 0.57, "learning_rate": 2.1286541766987144e-05, "loss": 2.7374, "step": 173400 }, { "epoch": 0.57, "learning_rate": 2.126998267919417e-05, "loss": 2.7561, "step": 173500 }, { "epoch": 0.57, "learning_rate": 2.1253423591401197e-05, "loss": 2.752, "step": 173600 }, { "epoch": 0.58, "learning_rate": 2.1236864503608224e-05, "loss": 2.7373, "step": 173700 }, { "epoch": 0.58, "learning_rate": 2.1220305415815255e-05, "loss": 2.7426, "step": 173800 }, { "epoch": 0.58, "learning_rate": 2.1203746328022285e-05, "loss": 2.7453, "step": 173900 }, { "epoch": 0.58, "learning_rate": 2.1187187240229312e-05, "loss": 2.7396, "step": 174000 }, { "epoch": 0.58, "learning_rate": 2.117062815243634e-05, "loss": 2.7657, "step": 174100 }, { "epoch": 0.58, "learning_rate": 2.1154069064643366e-05, "loss": 2.7561, "step": 174200 }, { "epoch": 0.58, "learning_rate": 2.1137509976850396e-05, "loss": 2.7527, "step": 174300 }, { "epoch": 0.58, "learning_rate": 2.1120950889057426e-05, "loss": 2.7513, "step": 174400 }, { "epoch": 0.58, "learning_rate": 2.1104391801264453e-05, "loss": 2.7367, "step": 174500 }, { "epoch": 0.58, "learning_rate": 2.108783271347148e-05, "loss": 2.7505, "step": 174600 }, { "epoch": 0.58, "learning_rate": 2.107127362567851e-05, "loss": 2.7495, "step": 174700 }, { "epoch": 0.58, "learning_rate": 2.105471453788554e-05, "loss": 2.7543, "step": 174800 }, { "epoch": 0.58, "learning_rate": 2.1038155450092568e-05, "loss": 2.7465, "step": 174900 }, { "epoch": 0.58, "learning_rate": 2.1021596362299594e-05, "loss": 2.73, "step": 175000 }, { "epoch": 0.58, "learning_rate": 2.100503727450662e-05, "loss": 2.7408, "step": 175100 }, { "epoch": 0.58, "learning_rate": 2.098847818671365e-05, "loss": 2.7525, "step": 175200 }, { "epoch": 0.58, "learning_rate": 2.0971919098920682e-05, "loss": 2.7428, "step": 175300 }, { "epoch": 0.58, "learning_rate": 2.095536001112771e-05, "loss": 2.7372, "step": 175400 }, { "epoch": 0.58, "learning_rate": 2.0938800923334736e-05, "loss": 2.7413, "step": 175500 }, { "epoch": 0.58, "learning_rate": 2.0922241835541763e-05, "loss": 2.7394, "step": 175600 }, { "epoch": 0.58, "learning_rate": 2.0905682747748793e-05, "loss": 2.7547, "step": 175700 }, { "epoch": 0.58, "learning_rate": 2.0889123659955823e-05, "loss": 2.7519, "step": 175800 }, { "epoch": 0.58, "learning_rate": 2.087256457216285e-05, "loss": 2.7428, "step": 175900 }, { "epoch": 0.58, "learning_rate": 2.0856005484369877e-05, "loss": 2.7628, "step": 176000 }, { "epoch": 0.58, "learning_rate": 2.0839446396576907e-05, "loss": 2.7362, "step": 176100 }, { "epoch": 0.58, "learning_rate": 2.0822887308783934e-05, "loss": 2.742, "step": 176200 }, { "epoch": 0.58, "learning_rate": 2.0806328220990965e-05, "loss": 2.7439, "step": 176300 }, { "epoch": 0.58, "learning_rate": 2.078976913319799e-05, "loss": 2.7554, "step": 176400 }, { "epoch": 0.58, "learning_rate": 2.077321004540502e-05, "loss": 2.7583, "step": 176500 }, { "epoch": 0.58, "learning_rate": 2.075665095761205e-05, "loss": 2.7484, "step": 176600 }, { "epoch": 0.59, "learning_rate": 2.0740091869819076e-05, "loss": 2.7455, "step": 176700 }, { "epoch": 0.59, "learning_rate": 2.0723532782026106e-05, "loss": 2.7394, "step": 176800 }, { "epoch": 0.59, "learning_rate": 2.0706973694233133e-05, "loss": 2.7497, "step": 176900 }, { "epoch": 0.59, "learning_rate": 2.0690414606440163e-05, "loss": 2.7391, "step": 177000 }, { "epoch": 0.59, "learning_rate": 2.067385551864719e-05, "loss": 2.7412, "step": 177100 }, { "epoch": 0.59, "learning_rate": 2.0657296430854217e-05, "loss": 2.7654, "step": 177200 }, { "epoch": 0.59, "learning_rate": 2.0640737343061247e-05, "loss": 2.7475, "step": 177300 }, { "epoch": 0.59, "learning_rate": 2.0624178255268274e-05, "loss": 2.7368, "step": 177400 }, { "epoch": 0.59, "learning_rate": 2.0607619167475305e-05, "loss": 2.7449, "step": 177500 }, { "epoch": 0.59, "learning_rate": 2.059106007968233e-05, "loss": 2.7554, "step": 177600 }, { "epoch": 0.59, "learning_rate": 2.057450099188936e-05, "loss": 2.7463, "step": 177700 }, { "epoch": 0.59, "learning_rate": 2.055794190409639e-05, "loss": 2.7435, "step": 177800 }, { "epoch": 0.59, "learning_rate": 2.0541382816303416e-05, "loss": 2.749, "step": 177900 }, { "epoch": 0.59, "learning_rate": 2.0524823728510446e-05, "loss": 2.7578, "step": 178000 }, { "epoch": 0.59, "learning_rate": 2.0508264640717473e-05, "loss": 2.7403, "step": 178100 }, { "epoch": 0.59, "learning_rate": 2.04917055529245e-05, "loss": 2.7413, "step": 178200 }, { "epoch": 0.59, "learning_rate": 2.047514646513153e-05, "loss": 2.7531, "step": 178300 }, { "epoch": 0.59, "learning_rate": 2.045858737733856e-05, "loss": 2.7382, "step": 178400 }, { "epoch": 0.59, "learning_rate": 2.0442028289545587e-05, "loss": 2.7328, "step": 178500 }, { "epoch": 0.59, "learning_rate": 2.0425469201752614e-05, "loss": 2.7523, "step": 178600 }, { "epoch": 0.59, "learning_rate": 2.040891011395964e-05, "loss": 2.751, "step": 178700 }, { "epoch": 0.59, "learning_rate": 2.039235102616667e-05, "loss": 2.7621, "step": 178800 }, { "epoch": 0.59, "learning_rate": 2.03757919383737e-05, "loss": 2.7409, "step": 178900 }, { "epoch": 0.59, "learning_rate": 2.035923285058073e-05, "loss": 2.7606, "step": 179000 }, { "epoch": 0.59, "learning_rate": 2.0342673762787755e-05, "loss": 2.7597, "step": 179100 }, { "epoch": 0.59, "learning_rate": 2.0326114674994782e-05, "loss": 2.734, "step": 179200 }, { "epoch": 0.59, "learning_rate": 2.0309555587201816e-05, "loss": 2.7502, "step": 179300 }, { "epoch": 0.59, "learning_rate": 2.0292996499408843e-05, "loss": 2.7438, "step": 179400 }, { "epoch": 0.59, "learning_rate": 2.027643741161587e-05, "loss": 2.737, "step": 179500 }, { "epoch": 0.59, "learning_rate": 2.0259878323822897e-05, "loss": 2.7505, "step": 179600 }, { "epoch": 0.6, "learning_rate": 2.0243319236029927e-05, "loss": 2.7519, "step": 179700 }, { "epoch": 0.6, "learning_rate": 2.0226760148236957e-05, "loss": 2.7409, "step": 179800 }, { "epoch": 0.6, "learning_rate": 2.0210201060443984e-05, "loss": 2.7325, "step": 179900 }, { "epoch": 0.6, "learning_rate": 2.019364197265101e-05, "loss": 2.7537, "step": 180000 }, { "epoch": 0.6, "learning_rate": 2.0177082884858038e-05, "loss": 2.7362, "step": 180100 }, { "epoch": 0.6, "learning_rate": 2.016052379706507e-05, "loss": 2.7377, "step": 180200 }, { "epoch": 0.6, "learning_rate": 2.01439647092721e-05, "loss": 2.7443, "step": 180300 }, { "epoch": 0.6, "learning_rate": 2.0127405621479126e-05, "loss": 2.7377, "step": 180400 }, { "epoch": 0.6, "learning_rate": 2.0110846533686153e-05, "loss": 2.7517, "step": 180500 }, { "epoch": 0.6, "learning_rate": 2.0094287445893183e-05, "loss": 2.7443, "step": 180600 }, { "epoch": 0.6, "learning_rate": 2.007772835810021e-05, "loss": 2.7444, "step": 180700 }, { "epoch": 0.6, "learning_rate": 2.006116927030724e-05, "loss": 2.7496, "step": 180800 }, { "epoch": 0.6, "learning_rate": 2.0044610182514267e-05, "loss": 2.7418, "step": 180900 }, { "epoch": 0.6, "learning_rate": 2.0028051094721294e-05, "loss": 2.7402, "step": 181000 }, { "epoch": 0.6, "learning_rate": 2.0011492006928324e-05, "loss": 2.7368, "step": 181100 }, { "epoch": 0.6, "learning_rate": 1.999493291913535e-05, "loss": 2.7474, "step": 181200 }, { "epoch": 0.6, "learning_rate": 1.997837383134238e-05, "loss": 2.7471, "step": 181300 }, { "epoch": 0.6, "learning_rate": 1.9961814743549408e-05, "loss": 2.7449, "step": 181400 }, { "epoch": 0.6, "learning_rate": 1.9945255655756435e-05, "loss": 2.7289, "step": 181500 }, { "epoch": 0.6, "learning_rate": 1.9928696567963465e-05, "loss": 2.7406, "step": 181600 }, { "epoch": 0.6, "learning_rate": 1.9912137480170492e-05, "loss": 2.7669, "step": 181700 }, { "epoch": 0.6, "learning_rate": 1.9895578392377523e-05, "loss": 2.7532, "step": 181800 }, { "epoch": 0.6, "learning_rate": 1.987901930458455e-05, "loss": 2.7445, "step": 181900 }, { "epoch": 0.6, "learning_rate": 1.986246021679158e-05, "loss": 2.7673, "step": 182000 }, { "epoch": 0.6, "learning_rate": 1.9845901128998607e-05, "loss": 2.7517, "step": 182100 }, { "epoch": 0.6, "learning_rate": 1.9829342041205634e-05, "loss": 2.747, "step": 182200 }, { "epoch": 0.6, "learning_rate": 1.9812782953412664e-05, "loss": 2.746, "step": 182300 }, { "epoch": 0.6, "learning_rate": 1.979622386561969e-05, "loss": 2.7475, "step": 182400 }, { "epoch": 0.6, "learning_rate": 1.977966477782672e-05, "loss": 2.7567, "step": 182500 }, { "epoch": 0.6, "learning_rate": 1.9763105690033748e-05, "loss": 2.7468, "step": 182600 }, { "epoch": 0.61, "learning_rate": 1.9746546602240775e-05, "loss": 2.7424, "step": 182700 }, { "epoch": 0.61, "learning_rate": 1.9729987514447805e-05, "loss": 2.7375, "step": 182800 }, { "epoch": 0.61, "learning_rate": 1.9713428426654836e-05, "loss": 2.74, "step": 182900 }, { "epoch": 0.61, "learning_rate": 1.9696869338861863e-05, "loss": 2.7503, "step": 183000 }, { "epoch": 0.61, "learning_rate": 1.968031025106889e-05, "loss": 2.7485, "step": 183100 }, { "epoch": 0.61, "learning_rate": 1.9663751163275916e-05, "loss": 2.7386, "step": 183200 }, { "epoch": 0.61, "learning_rate": 1.9647192075482947e-05, "loss": 2.73, "step": 183300 }, { "epoch": 0.61, "learning_rate": 1.9630632987689977e-05, "loss": 2.7399, "step": 183400 }, { "epoch": 0.61, "learning_rate": 1.9614073899897004e-05, "loss": 2.7399, "step": 183500 }, { "epoch": 0.61, "learning_rate": 1.959751481210403e-05, "loss": 2.7438, "step": 183600 }, { "epoch": 0.61, "learning_rate": 1.9580955724311058e-05, "loss": 2.7527, "step": 183700 }, { "epoch": 0.61, "learning_rate": 1.956439663651809e-05, "loss": 2.7412, "step": 183800 }, { "epoch": 0.61, "learning_rate": 1.954783754872512e-05, "loss": 2.7294, "step": 183900 }, { "epoch": 0.61, "learning_rate": 1.9531278460932145e-05, "loss": 2.7384, "step": 184000 }, { "epoch": 0.61, "learning_rate": 1.9514719373139172e-05, "loss": 2.7353, "step": 184100 }, { "epoch": 0.61, "learning_rate": 1.9498160285346202e-05, "loss": 2.7428, "step": 184200 }, { "epoch": 0.61, "learning_rate": 1.9481601197553233e-05, "loss": 2.7493, "step": 184300 }, { "epoch": 0.61, "learning_rate": 1.946504210976026e-05, "loss": 2.728, "step": 184400 }, { "epoch": 0.61, "learning_rate": 1.9448483021967287e-05, "loss": 2.7371, "step": 184500 }, { "epoch": 0.61, "learning_rate": 1.9431923934174313e-05, "loss": 2.7402, "step": 184600 }, { "epoch": 0.61, "learning_rate": 1.9415364846381344e-05, "loss": 2.7451, "step": 184700 }, { "epoch": 0.61, "learning_rate": 1.9398805758588374e-05, "loss": 2.7383, "step": 184800 }, { "epoch": 0.61, "learning_rate": 1.93822466707954e-05, "loss": 2.7366, "step": 184900 }, { "epoch": 0.61, "learning_rate": 1.9365687583002428e-05, "loss": 2.7473, "step": 185000 }, { "epoch": 0.61, "learning_rate": 1.9349128495209455e-05, "loss": 2.7424, "step": 185100 }, { "epoch": 0.61, "learning_rate": 1.9332569407416485e-05, "loss": 2.7531, "step": 185200 }, { "epoch": 0.61, "learning_rate": 1.9316010319623515e-05, "loss": 2.7323, "step": 185300 }, { "epoch": 0.61, "learning_rate": 1.9299451231830542e-05, "loss": 2.7391, "step": 185400 }, { "epoch": 0.61, "learning_rate": 1.928289214403757e-05, "loss": 2.7523, "step": 185500 }, { "epoch": 0.61, "learning_rate": 1.92663330562446e-05, "loss": 2.7478, "step": 185600 }, { "epoch": 0.62, "learning_rate": 1.9249773968451626e-05, "loss": 2.7492, "step": 185700 }, { "epoch": 0.62, "learning_rate": 1.9233214880658657e-05, "loss": 2.7306, "step": 185800 }, { "epoch": 0.62, "learning_rate": 1.9216655792865684e-05, "loss": 2.7402, "step": 185900 }, { "epoch": 0.62, "learning_rate": 1.920009670507271e-05, "loss": 2.7429, "step": 186000 }, { "epoch": 0.62, "learning_rate": 1.918353761727974e-05, "loss": 2.7388, "step": 186100 }, { "epoch": 0.62, "learning_rate": 1.9166978529486768e-05, "loss": 2.7384, "step": 186200 }, { "epoch": 0.62, "learning_rate": 1.9150419441693798e-05, "loss": 2.7398, "step": 186300 }, { "epoch": 0.62, "learning_rate": 1.9133860353900825e-05, "loss": 2.7571, "step": 186400 }, { "epoch": 0.62, "learning_rate": 1.9117301266107855e-05, "loss": 2.7548, "step": 186500 }, { "epoch": 0.62, "learning_rate": 1.9100742178314882e-05, "loss": 2.7339, "step": 186600 }, { "epoch": 0.62, "learning_rate": 1.908418309052191e-05, "loss": 2.7359, "step": 186700 }, { "epoch": 0.62, "learning_rate": 1.906762400272894e-05, "loss": 2.7395, "step": 186800 }, { "epoch": 0.62, "learning_rate": 1.9051064914935966e-05, "loss": 2.7336, "step": 186900 }, { "epoch": 0.62, "learning_rate": 1.9034505827142997e-05, "loss": 2.7336, "step": 187000 }, { "epoch": 0.62, "learning_rate": 1.9017946739350024e-05, "loss": 2.7447, "step": 187100 }, { "epoch": 0.62, "learning_rate": 1.900138765155705e-05, "loss": 2.7439, "step": 187200 }, { "epoch": 0.62, "learning_rate": 1.898482856376408e-05, "loss": 2.743, "step": 187300 }, { "epoch": 0.62, "learning_rate": 1.896826947597111e-05, "loss": 2.7444, "step": 187400 }, { "epoch": 0.62, "learning_rate": 1.8951710388178138e-05, "loss": 2.7332, "step": 187500 }, { "epoch": 0.62, "learning_rate": 1.8935151300385165e-05, "loss": 2.7369, "step": 187600 }, { "epoch": 0.62, "learning_rate": 1.8918592212592192e-05, "loss": 2.7413, "step": 187700 }, { "epoch": 0.62, "learning_rate": 1.8902033124799222e-05, "loss": 2.7716, "step": 187800 }, { "epoch": 0.62, "learning_rate": 1.8885474037006252e-05, "loss": 2.7431, "step": 187900 }, { "epoch": 0.62, "learning_rate": 1.886891494921328e-05, "loss": 2.7496, "step": 188000 }, { "epoch": 0.62, "learning_rate": 1.8852355861420306e-05, "loss": 2.7485, "step": 188100 }, { "epoch": 0.62, "learning_rate": 1.8835796773627333e-05, "loss": 2.7476, "step": 188200 }, { "epoch": 0.62, "learning_rate": 1.8819237685834363e-05, "loss": 2.7482, "step": 188300 }, { "epoch": 0.62, "learning_rate": 1.8802678598041394e-05, "loss": 2.7405, "step": 188400 }, { "epoch": 0.62, "learning_rate": 1.878611951024842e-05, "loss": 2.7468, "step": 188500 }, { "epoch": 0.62, "learning_rate": 1.8769560422455448e-05, "loss": 2.7531, "step": 188600 }, { "epoch": 0.62, "learning_rate": 1.8753001334662474e-05, "loss": 2.7557, "step": 188700 }, { "epoch": 0.63, "learning_rate": 1.8736442246869508e-05, "loss": 2.7441, "step": 188800 }, { "epoch": 0.63, "learning_rate": 1.8719883159076535e-05, "loss": 2.7522, "step": 188900 }, { "epoch": 0.63, "learning_rate": 1.8703324071283562e-05, "loss": 2.744, "step": 189000 }, { "epoch": 0.63, "learning_rate": 1.868676498349059e-05, "loss": 2.7527, "step": 189100 }, { "epoch": 0.63, "learning_rate": 1.867020589569762e-05, "loss": 2.7664, "step": 189200 }, { "epoch": 0.63, "learning_rate": 1.865364680790465e-05, "loss": 2.7386, "step": 189300 }, { "epoch": 0.63, "learning_rate": 1.8637087720111676e-05, "loss": 2.7297, "step": 189400 }, { "epoch": 0.63, "learning_rate": 1.8620528632318703e-05, "loss": 2.7398, "step": 189500 }, { "epoch": 0.63, "learning_rate": 1.860396954452573e-05, "loss": 2.7605, "step": 189600 }, { "epoch": 0.63, "learning_rate": 1.858741045673276e-05, "loss": 2.749, "step": 189700 }, { "epoch": 0.63, "learning_rate": 1.857085136893979e-05, "loss": 2.7359, "step": 189800 }, { "epoch": 0.63, "learning_rate": 1.8554292281146818e-05, "loss": 2.737, "step": 189900 }, { "epoch": 0.63, "learning_rate": 1.8537733193353845e-05, "loss": 2.7476, "step": 190000 }, { "epoch": 0.63, "learning_rate": 1.8521174105560875e-05, "loss": 2.7243, "step": 190100 }, { "epoch": 0.63, "learning_rate": 1.8504615017767902e-05, "loss": 2.74, "step": 190200 }, { "epoch": 0.63, "learning_rate": 1.8488055929974932e-05, "loss": 2.7522, "step": 190300 }, { "epoch": 0.63, "learning_rate": 1.847149684218196e-05, "loss": 2.7543, "step": 190400 }, { "epoch": 0.63, "learning_rate": 1.8454937754388986e-05, "loss": 2.7374, "step": 190500 }, { "epoch": 0.63, "learning_rate": 1.8438378666596016e-05, "loss": 2.7397, "step": 190600 }, { "epoch": 0.63, "learning_rate": 1.8421819578803043e-05, "loss": 2.7264, "step": 190700 }, { "epoch": 0.63, "learning_rate": 1.8405260491010073e-05, "loss": 2.7341, "step": 190800 }, { "epoch": 0.63, "learning_rate": 1.83887014032171e-05, "loss": 2.7382, "step": 190900 }, { "epoch": 0.63, "learning_rate": 1.837214231542413e-05, "loss": 2.7346, "step": 191000 }, { "epoch": 0.63, "learning_rate": 1.8355583227631158e-05, "loss": 2.7357, "step": 191100 }, { "epoch": 0.63, "learning_rate": 1.8339024139838185e-05, "loss": 2.7492, "step": 191200 }, { "epoch": 0.63, "learning_rate": 1.8322465052045215e-05, "loss": 2.7492, "step": 191300 }, { "epoch": 0.63, "learning_rate": 1.8305905964252242e-05, "loss": 2.7432, "step": 191400 }, { "epoch": 0.63, "learning_rate": 1.8289346876459272e-05, "loss": 2.7422, "step": 191500 }, { "epoch": 0.63, "learning_rate": 1.82727877886663e-05, "loss": 2.7427, "step": 191600 }, { "epoch": 0.63, "learning_rate": 1.8256228700873326e-05, "loss": 2.7504, "step": 191700 }, { "epoch": 0.64, "learning_rate": 1.8239669613080356e-05, "loss": 2.7348, "step": 191800 }, { "epoch": 0.64, "learning_rate": 1.8223110525287383e-05, "loss": 2.7355, "step": 191900 }, { "epoch": 0.64, "learning_rate": 1.8206551437494413e-05, "loss": 2.7447, "step": 192000 }, { "epoch": 0.64, "learning_rate": 1.818999234970144e-05, "loss": 2.7517, "step": 192100 }, { "epoch": 0.64, "learning_rate": 1.8173433261908467e-05, "loss": 2.7496, "step": 192200 }, { "epoch": 0.64, "learning_rate": 1.8156874174115497e-05, "loss": 2.7394, "step": 192300 }, { "epoch": 0.64, "learning_rate": 1.8140315086322528e-05, "loss": 2.7283, "step": 192400 }, { "epoch": 0.64, "learning_rate": 1.8123755998529555e-05, "loss": 2.7389, "step": 192500 }, { "epoch": 0.64, "learning_rate": 1.810719691073658e-05, "loss": 2.7368, "step": 192600 }, { "epoch": 0.64, "learning_rate": 1.809063782294361e-05, "loss": 2.7407, "step": 192700 }, { "epoch": 0.64, "learning_rate": 1.807407873515064e-05, "loss": 2.7288, "step": 192800 }, { "epoch": 0.64, "learning_rate": 1.805751964735767e-05, "loss": 2.7248, "step": 192900 }, { "epoch": 0.64, "learning_rate": 1.8040960559564696e-05, "loss": 2.7259, "step": 193000 }, { "epoch": 0.64, "learning_rate": 1.8024401471771723e-05, "loss": 2.7475, "step": 193100 }, { "epoch": 0.64, "learning_rate": 1.800784238397875e-05, "loss": 2.7297, "step": 193200 }, { "epoch": 0.64, "learning_rate": 1.7991283296185784e-05, "loss": 2.7498, "step": 193300 }, { "epoch": 0.64, "learning_rate": 1.797472420839281e-05, "loss": 2.7517, "step": 193400 }, { "epoch": 0.64, "learning_rate": 1.7958165120599837e-05, "loss": 2.7365, "step": 193500 }, { "epoch": 0.64, "learning_rate": 1.7941606032806864e-05, "loss": 2.7297, "step": 193600 }, { "epoch": 0.64, "learning_rate": 1.7925046945013895e-05, "loss": 2.7415, "step": 193700 }, { "epoch": 0.64, "learning_rate": 1.7908487857220925e-05, "loss": 2.7242, "step": 193800 }, { "epoch": 0.64, "learning_rate": 1.7891928769427952e-05, "loss": 2.7425, "step": 193900 }, { "epoch": 0.64, "learning_rate": 1.787536968163498e-05, "loss": 2.7562, "step": 194000 }, { "epoch": 0.64, "learning_rate": 1.7858810593842006e-05, "loss": 2.727, "step": 194100 }, { "epoch": 0.64, "learning_rate": 1.7842251506049036e-05, "loss": 2.7497, "step": 194200 }, { "epoch": 0.64, "learning_rate": 1.7825692418256066e-05, "loss": 2.7484, "step": 194300 }, { "epoch": 0.64, "learning_rate": 1.7809133330463093e-05, "loss": 2.7343, "step": 194400 }, { "epoch": 0.64, "learning_rate": 1.779257424267012e-05, "loss": 2.7723, "step": 194500 }, { "epoch": 0.64, "learning_rate": 1.777601515487715e-05, "loss": 2.7399, "step": 194600 }, { "epoch": 0.64, "learning_rate": 1.7759456067084177e-05, "loss": 2.7376, "step": 194700 }, { "epoch": 0.65, "learning_rate": 1.7742896979291208e-05, "loss": 2.7485, "step": 194800 }, { "epoch": 0.65, "learning_rate": 1.7726337891498234e-05, "loss": 2.7326, "step": 194900 }, { "epoch": 0.65, "learning_rate": 1.770977880370526e-05, "loss": 2.7403, "step": 195000 }, { "epoch": 0.65, "learning_rate": 1.769321971591229e-05, "loss": 2.7306, "step": 195100 }, { "epoch": 0.65, "learning_rate": 1.767666062811932e-05, "loss": 2.7347, "step": 195200 }, { "epoch": 0.65, "learning_rate": 1.766010154032635e-05, "loss": 2.7404, "step": 195300 }, { "epoch": 0.65, "learning_rate": 1.7643542452533376e-05, "loss": 2.7213, "step": 195400 }, { "epoch": 0.65, "learning_rate": 1.7626983364740403e-05, "loss": 2.7374, "step": 195500 }, { "epoch": 0.65, "learning_rate": 1.7610424276947433e-05, "loss": 2.7227, "step": 195600 }, { "epoch": 0.65, "learning_rate": 1.759386518915446e-05, "loss": 2.7226, "step": 195700 }, { "epoch": 0.65, "learning_rate": 1.757730610136149e-05, "loss": 2.7351, "step": 195800 }, { "epoch": 0.65, "learning_rate": 1.7560747013568517e-05, "loss": 2.7491, "step": 195900 }, { "epoch": 0.65, "learning_rate": 1.7544187925775547e-05, "loss": 2.7374, "step": 196000 }, { "epoch": 0.65, "learning_rate": 1.7527628837982574e-05, "loss": 2.7398, "step": 196100 }, { "epoch": 0.65, "learning_rate": 1.75110697501896e-05, "loss": 2.7369, "step": 196200 }, { "epoch": 0.65, "learning_rate": 1.749451066239663e-05, "loss": 2.7385, "step": 196300 }, { "epoch": 0.65, "learning_rate": 1.747795157460366e-05, "loss": 2.7371, "step": 196400 }, { "epoch": 0.65, "learning_rate": 1.746139248681069e-05, "loss": 2.7375, "step": 196500 }, { "epoch": 0.65, "learning_rate": 1.7444833399017716e-05, "loss": 2.7333, "step": 196600 }, { "epoch": 0.65, "learning_rate": 1.7428274311224743e-05, "loss": 2.7297, "step": 196700 }, { "epoch": 0.65, "learning_rate": 1.7411715223431773e-05, "loss": 2.7652, "step": 196800 }, { "epoch": 0.65, "learning_rate": 1.7395156135638803e-05, "loss": 2.7275, "step": 196900 }, { "epoch": 0.65, "learning_rate": 1.737859704784583e-05, "loss": 2.7314, "step": 197000 }, { "epoch": 0.65, "learning_rate": 1.7362037960052857e-05, "loss": 2.7405, "step": 197100 }, { "epoch": 0.65, "learning_rate": 1.7345478872259884e-05, "loss": 2.7291, "step": 197200 }, { "epoch": 0.65, "learning_rate": 1.7328919784466914e-05, "loss": 2.7445, "step": 197300 }, { "epoch": 0.65, "learning_rate": 1.7312360696673944e-05, "loss": 2.7509, "step": 197400 }, { "epoch": 0.65, "learning_rate": 1.729580160888097e-05, "loss": 2.7429, "step": 197500 }, { "epoch": 0.65, "learning_rate": 1.7279242521088e-05, "loss": 2.7431, "step": 197600 }, { "epoch": 0.65, "learning_rate": 1.7262683433295025e-05, "loss": 2.7473, "step": 197700 }, { "epoch": 0.66, "learning_rate": 1.7246124345502056e-05, "loss": 2.7478, "step": 197800 }, { "epoch": 0.66, "learning_rate": 1.7229565257709086e-05, "loss": 2.7408, "step": 197900 }, { "epoch": 0.66, "learning_rate": 1.7213006169916113e-05, "loss": 2.7434, "step": 198000 }, { "epoch": 0.66, "learning_rate": 1.719644708212314e-05, "loss": 2.7364, "step": 198100 }, { "epoch": 0.66, "learning_rate": 1.717988799433017e-05, "loss": 2.7433, "step": 198200 }, { "epoch": 0.66, "learning_rate": 1.71633289065372e-05, "loss": 2.7405, "step": 198300 }, { "epoch": 0.66, "learning_rate": 1.7146769818744227e-05, "loss": 2.7361, "step": 198400 }, { "epoch": 0.66, "learning_rate": 1.7130210730951254e-05, "loss": 2.7447, "step": 198500 }, { "epoch": 0.66, "learning_rate": 1.711365164315828e-05, "loss": 2.7294, "step": 198600 }, { "epoch": 0.66, "learning_rate": 1.709709255536531e-05, "loss": 2.731, "step": 198700 }, { "epoch": 0.66, "learning_rate": 1.708053346757234e-05, "loss": 2.7371, "step": 198800 }, { "epoch": 0.66, "learning_rate": 1.706397437977937e-05, "loss": 2.7419, "step": 198900 }, { "epoch": 0.66, "learning_rate": 1.7047415291986395e-05, "loss": 2.749, "step": 199000 }, { "epoch": 0.66, "learning_rate": 1.7030856204193422e-05, "loss": 2.7477, "step": 199100 }, { "epoch": 0.66, "learning_rate": 1.7014297116400453e-05, "loss": 2.7424, "step": 199200 }, { "epoch": 0.66, "learning_rate": 1.6997738028607483e-05, "loss": 2.7354, "step": 199300 }, { "epoch": 0.66, "learning_rate": 1.698117894081451e-05, "loss": 2.7491, "step": 199400 }, { "epoch": 0.66, "learning_rate": 1.6964619853021537e-05, "loss": 2.751, "step": 199500 }, { "epoch": 0.66, "learning_rate": 1.6948060765228567e-05, "loss": 2.7344, "step": 199600 }, { "epoch": 0.66, "learning_rate": 1.6931501677435594e-05, "loss": 2.7399, "step": 199700 }, { "epoch": 0.66, "learning_rate": 1.6914942589642624e-05, "loss": 2.7261, "step": 199800 }, { "epoch": 0.66, "learning_rate": 1.689838350184965e-05, "loss": 2.7282, "step": 199900 }, { "epoch": 0.66, "learning_rate": 1.6881824414056678e-05, "loss": 2.7567, "step": 200000 }, { "epoch": 0.66, "learning_rate": 1.686526532626371e-05, "loss": 2.7304, "step": 200100 }, { "epoch": 0.66, "learning_rate": 1.6848706238470735e-05, "loss": 2.7329, "step": 200200 }, { "epoch": 0.66, "learning_rate": 1.6832147150677766e-05, "loss": 2.7499, "step": 200300 }, { "epoch": 0.66, "learning_rate": 1.6815588062884792e-05, "loss": 2.7355, "step": 200400 }, { "epoch": 0.66, "learning_rate": 1.6799028975091823e-05, "loss": 2.7495, "step": 200500 }, { "epoch": 0.66, "learning_rate": 1.678246988729885e-05, "loss": 2.7438, "step": 200600 }, { "epoch": 0.66, "learning_rate": 1.6765910799505877e-05, "loss": 2.7387, "step": 200700 }, { "epoch": 0.67, "learning_rate": 1.6749351711712907e-05, "loss": 2.7485, "step": 200800 }, { "epoch": 0.67, "learning_rate": 1.6732792623919934e-05, "loss": 2.7428, "step": 200900 }, { "epoch": 0.67, "learning_rate": 1.6716233536126964e-05, "loss": 2.7364, "step": 201000 }, { "epoch": 0.67, "learning_rate": 1.669967444833399e-05, "loss": 2.7413, "step": 201100 }, { "epoch": 0.67, "learning_rate": 1.6683115360541018e-05, "loss": 2.7507, "step": 201200 }, { "epoch": 0.67, "learning_rate": 1.6666556272748048e-05, "loss": 2.713, "step": 201300 }, { "epoch": 0.67, "learning_rate": 1.6649997184955075e-05, "loss": 2.7451, "step": 201400 }, { "epoch": 0.67, "learning_rate": 1.6633438097162105e-05, "loss": 2.7487, "step": 201500 }, { "epoch": 0.67, "learning_rate": 1.6616879009369132e-05, "loss": 2.7377, "step": 201600 }, { "epoch": 0.67, "learning_rate": 1.660031992157616e-05, "loss": 2.7419, "step": 201700 }, { "epoch": 0.67, "learning_rate": 1.658376083378319e-05, "loss": 2.7301, "step": 201800 }, { "epoch": 0.67, "learning_rate": 1.656720174599022e-05, "loss": 2.7458, "step": 201900 }, { "epoch": 0.67, "learning_rate": 1.6550642658197247e-05, "loss": 2.7329, "step": 202000 }, { "epoch": 0.67, "learning_rate": 1.6534083570404274e-05, "loss": 2.7547, "step": 202100 }, { "epoch": 0.67, "learning_rate": 1.65175244826113e-05, "loss": 2.7139, "step": 202200 }, { "epoch": 0.67, "learning_rate": 1.650096539481833e-05, "loss": 2.7398, "step": 202300 }, { "epoch": 0.67, "learning_rate": 1.648440630702536e-05, "loss": 2.7443, "step": 202400 }, { "epoch": 0.67, "learning_rate": 1.6467847219232388e-05, "loss": 2.7437, "step": 202500 }, { "epoch": 0.67, "learning_rate": 1.6451288131439415e-05, "loss": 2.7371, "step": 202600 }, { "epoch": 0.67, "learning_rate": 1.6434729043646442e-05, "loss": 2.7587, "step": 202700 }, { "epoch": 0.67, "learning_rate": 1.6418169955853476e-05, "loss": 2.7479, "step": 202800 }, { "epoch": 0.67, "learning_rate": 1.6401610868060503e-05, "loss": 2.738, "step": 202900 }, { "epoch": 0.67, "learning_rate": 1.638505178026753e-05, "loss": 2.7384, "step": 203000 }, { "epoch": 0.67, "learning_rate": 1.6368492692474556e-05, "loss": 2.7542, "step": 203100 }, { "epoch": 0.67, "learning_rate": 1.6351933604681587e-05, "loss": 2.7438, "step": 203200 }, { "epoch": 0.67, "learning_rate": 1.6335374516888617e-05, "loss": 2.74, "step": 203300 }, { "epoch": 0.67, "learning_rate": 1.6318815429095644e-05, "loss": 2.7262, "step": 203400 }, { "epoch": 0.67, "learning_rate": 1.630225634130267e-05, "loss": 2.7397, "step": 203500 }, { "epoch": 0.67, "learning_rate": 1.6285697253509698e-05, "loss": 2.7411, "step": 203600 }, { "epoch": 0.67, "learning_rate": 1.6269138165716728e-05, "loss": 2.7475, "step": 203700 }, { "epoch": 0.67, "learning_rate": 1.625257907792376e-05, "loss": 2.7394, "step": 203800 }, { "epoch": 0.68, "learning_rate": 1.6236019990130785e-05, "loss": 2.727, "step": 203900 }, { "epoch": 0.68, "learning_rate": 1.6219460902337812e-05, "loss": 2.7518, "step": 204000 }, { "epoch": 0.68, "learning_rate": 1.6202901814544842e-05, "loss": 2.7461, "step": 204100 }, { "epoch": 0.68, "learning_rate": 1.618634272675187e-05, "loss": 2.734, "step": 204200 }, { "epoch": 0.68, "learning_rate": 1.61697836389589e-05, "loss": 2.7206, "step": 204300 }, { "epoch": 0.68, "learning_rate": 1.6153224551165927e-05, "loss": 2.762, "step": 204400 }, { "epoch": 0.68, "learning_rate": 1.6136665463372953e-05, "loss": 2.7351, "step": 204500 }, { "epoch": 0.68, "learning_rate": 1.6120106375579984e-05, "loss": 2.7437, "step": 204600 }, { "epoch": 0.68, "learning_rate": 1.610354728778701e-05, "loss": 2.7519, "step": 204700 }, { "epoch": 0.68, "learning_rate": 1.608698819999404e-05, "loss": 2.7358, "step": 204800 }, { "epoch": 0.68, "learning_rate": 1.6070429112201068e-05, "loss": 2.7435, "step": 204900 }, { "epoch": 0.68, "learning_rate": 1.6053870024408095e-05, "loss": 2.7644, "step": 205000 }, { "epoch": 0.68, "learning_rate": 1.6037310936615125e-05, "loss": 2.7336, "step": 205100 }, { "epoch": 0.68, "learning_rate": 1.6020751848822152e-05, "loss": 2.7355, "step": 205200 }, { "epoch": 0.68, "learning_rate": 1.6004192761029182e-05, "loss": 2.7354, "step": 205300 }, { "epoch": 0.68, "learning_rate": 1.598763367323621e-05, "loss": 2.7431, "step": 205400 }, { "epoch": 0.68, "learning_rate": 1.597107458544324e-05, "loss": 2.7423, "step": 205500 }, { "epoch": 0.68, "learning_rate": 1.5954515497650266e-05, "loss": 2.7402, "step": 205600 }, { "epoch": 0.68, "learning_rate": 1.5937956409857293e-05, "loss": 2.7426, "step": 205700 }, { "epoch": 0.68, "learning_rate": 1.5921397322064324e-05, "loss": 2.7303, "step": 205800 }, { "epoch": 0.68, "learning_rate": 1.590483823427135e-05, "loss": 2.7449, "step": 205900 }, { "epoch": 0.68, "learning_rate": 1.588827914647838e-05, "loss": 2.7423, "step": 206000 }, { "epoch": 0.68, "learning_rate": 1.5871720058685408e-05, "loss": 2.7294, "step": 206100 }, { "epoch": 0.68, "learning_rate": 1.5855160970892435e-05, "loss": 2.7421, "step": 206200 }, { "epoch": 0.68, "learning_rate": 1.5838601883099465e-05, "loss": 2.7409, "step": 206300 }, { "epoch": 0.68, "learning_rate": 1.5822042795306495e-05, "loss": 2.7481, "step": 206400 }, { "epoch": 0.68, "learning_rate": 1.5805483707513522e-05, "loss": 2.7521, "step": 206500 }, { "epoch": 0.68, "learning_rate": 1.578892461972055e-05, "loss": 2.7391, "step": 206600 }, { "epoch": 0.68, "learning_rate": 1.5772365531927576e-05, "loss": 2.7332, "step": 206700 }, { "epoch": 0.68, "learning_rate": 1.5755806444134606e-05, "loss": 2.7301, "step": 206800 }, { "epoch": 0.69, "learning_rate": 1.5739247356341637e-05, "loss": 2.7335, "step": 206900 }, { "epoch": 0.69, "learning_rate": 1.5722688268548664e-05, "loss": 2.7397, "step": 207000 }, { "epoch": 0.69, "learning_rate": 1.570612918075569e-05, "loss": 2.7328, "step": 207100 }, { "epoch": 0.69, "learning_rate": 1.5689570092962717e-05, "loss": 2.745, "step": 207200 }, { "epoch": 0.69, "learning_rate": 1.5673011005169748e-05, "loss": 2.7366, "step": 207300 }, { "epoch": 0.69, "learning_rate": 1.5656451917376778e-05, "loss": 2.7424, "step": 207400 }, { "epoch": 0.69, "learning_rate": 1.5639892829583805e-05, "loss": 2.7485, "step": 207500 }, { "epoch": 0.69, "learning_rate": 1.5623333741790832e-05, "loss": 2.7466, "step": 207600 }, { "epoch": 0.69, "learning_rate": 1.5606774653997862e-05, "loss": 2.7467, "step": 207700 }, { "epoch": 0.69, "learning_rate": 1.5590215566204892e-05, "loss": 2.7453, "step": 207800 }, { "epoch": 0.69, "learning_rate": 1.557365647841192e-05, "loss": 2.7395, "step": 207900 }, { "epoch": 0.69, "learning_rate": 1.5557097390618946e-05, "loss": 2.748, "step": 208000 }, { "epoch": 0.69, "learning_rate": 1.5540538302825973e-05, "loss": 2.7507, "step": 208100 }, { "epoch": 0.69, "learning_rate": 1.5523979215033003e-05, "loss": 2.7468, "step": 208200 }, { "epoch": 0.69, "learning_rate": 1.5507420127240034e-05, "loss": 2.7397, "step": 208300 }, { "epoch": 0.69, "learning_rate": 1.549086103944706e-05, "loss": 2.7297, "step": 208400 }, { "epoch": 0.69, "learning_rate": 1.5474301951654088e-05, "loss": 2.7447, "step": 208500 }, { "epoch": 0.69, "learning_rate": 1.5457742863861114e-05, "loss": 2.7362, "step": 208600 }, { "epoch": 0.69, "learning_rate": 1.5441183776068145e-05, "loss": 2.7343, "step": 208700 }, { "epoch": 0.69, "learning_rate": 1.5424624688275175e-05, "loss": 2.7268, "step": 208800 }, { "epoch": 0.69, "learning_rate": 1.5408065600482202e-05, "loss": 2.7216, "step": 208900 }, { "epoch": 0.69, "learning_rate": 1.539150651268923e-05, "loss": 2.7378, "step": 209000 }, { "epoch": 0.69, "learning_rate": 1.537494742489626e-05, "loss": 2.7276, "step": 209100 }, { "epoch": 0.69, "learning_rate": 1.5358388337103286e-05, "loss": 2.7316, "step": 209200 }, { "epoch": 0.69, "learning_rate": 1.5341829249310316e-05, "loss": 2.7451, "step": 209300 }, { "epoch": 0.69, "learning_rate": 1.5325270161517343e-05, "loss": 2.7364, "step": 209400 }, { "epoch": 0.69, "learning_rate": 1.530871107372437e-05, "loss": 2.7355, "step": 209500 }, { "epoch": 0.69, "learning_rate": 1.52921519859314e-05, "loss": 2.7339, "step": 209600 }, { "epoch": 0.69, "learning_rate": 1.5275592898138427e-05, "loss": 2.7487, "step": 209700 }, { "epoch": 0.69, "learning_rate": 1.5259033810345458e-05, "loss": 2.7462, "step": 209800 }, { "epoch": 0.7, "learning_rate": 1.5242474722552486e-05, "loss": 2.7466, "step": 209900 }, { "epoch": 0.7, "learning_rate": 1.5225915634759513e-05, "loss": 2.7356, "step": 210000 }, { "epoch": 0.7, "learning_rate": 1.5209356546966542e-05, "loss": 2.7226, "step": 210100 }, { "epoch": 0.7, "learning_rate": 1.5192797459173569e-05, "loss": 2.7362, "step": 210200 }, { "epoch": 0.7, "learning_rate": 1.5176238371380599e-05, "loss": 2.7423, "step": 210300 }, { "epoch": 0.7, "learning_rate": 1.5159679283587628e-05, "loss": 2.7491, "step": 210400 }, { "epoch": 0.7, "learning_rate": 1.5143120195794655e-05, "loss": 2.7306, "step": 210500 }, { "epoch": 0.7, "learning_rate": 1.5126561108001683e-05, "loss": 2.7319, "step": 210600 }, { "epoch": 0.7, "learning_rate": 1.511000202020871e-05, "loss": 2.7384, "step": 210700 }, { "epoch": 0.7, "learning_rate": 1.509344293241574e-05, "loss": 2.7373, "step": 210800 }, { "epoch": 0.7, "learning_rate": 1.5076883844622769e-05, "loss": 2.7249, "step": 210900 }, { "epoch": 0.7, "learning_rate": 1.5060324756829796e-05, "loss": 2.743, "step": 211000 }, { "epoch": 0.7, "learning_rate": 1.5043765669036824e-05, "loss": 2.7335, "step": 211100 }, { "epoch": 0.7, "learning_rate": 1.5027206581243853e-05, "loss": 2.7283, "step": 211200 }, { "epoch": 0.7, "learning_rate": 1.5010647493450883e-05, "loss": 2.734, "step": 211300 }, { "epoch": 0.7, "learning_rate": 1.499408840565791e-05, "loss": 2.731, "step": 211400 }, { "epoch": 0.7, "learning_rate": 1.4977529317864939e-05, "loss": 2.7261, "step": 211500 }, { "epoch": 0.7, "learning_rate": 1.4960970230071966e-05, "loss": 2.733, "step": 211600 }, { "epoch": 0.7, "learning_rate": 1.4944411142278994e-05, "loss": 2.7482, "step": 211700 }, { "epoch": 0.7, "learning_rate": 1.4927852054486025e-05, "loss": 2.7321, "step": 211800 }, { "epoch": 0.7, "learning_rate": 1.4911292966693052e-05, "loss": 2.7307, "step": 211900 }, { "epoch": 0.7, "learning_rate": 1.489473387890008e-05, "loss": 2.7265, "step": 212000 }, { "epoch": 0.7, "learning_rate": 1.4878174791107107e-05, "loss": 2.7469, "step": 212100 }, { "epoch": 0.7, "learning_rate": 1.4861615703314136e-05, "loss": 2.7325, "step": 212200 }, { "epoch": 0.7, "learning_rate": 1.4845056615521166e-05, "loss": 2.7321, "step": 212300 }, { "epoch": 0.7, "learning_rate": 1.4828497527728195e-05, "loss": 2.7393, "step": 212400 }, { "epoch": 0.7, "learning_rate": 1.4811938439935222e-05, "loss": 2.7369, "step": 212500 }, { "epoch": 0.7, "learning_rate": 1.479537935214225e-05, "loss": 2.7519, "step": 212600 }, { "epoch": 0.7, "learning_rate": 1.4778820264349277e-05, "loss": 2.7335, "step": 212700 }, { "epoch": 0.7, "learning_rate": 1.4762261176556307e-05, "loss": 2.7435, "step": 212800 }, { "epoch": 0.71, "learning_rate": 1.4745702088763336e-05, "loss": 2.7406, "step": 212900 }, { "epoch": 0.71, "learning_rate": 1.4729143000970363e-05, "loss": 2.7373, "step": 213000 }, { "epoch": 0.71, "learning_rate": 1.4712583913177392e-05, "loss": 2.7298, "step": 213100 }, { "epoch": 0.71, "learning_rate": 1.4696024825384418e-05, "loss": 2.7432, "step": 213200 }, { "epoch": 0.71, "learning_rate": 1.467946573759145e-05, "loss": 2.7399, "step": 213300 }, { "epoch": 0.71, "learning_rate": 1.4662906649798477e-05, "loss": 2.7334, "step": 213400 }, { "epoch": 0.71, "learning_rate": 1.4646347562005506e-05, "loss": 2.721, "step": 213500 }, { "epoch": 0.71, "learning_rate": 1.4629788474212533e-05, "loss": 2.7371, "step": 213600 }, { "epoch": 0.71, "learning_rate": 1.4613229386419561e-05, "loss": 2.7389, "step": 213700 }, { "epoch": 0.71, "learning_rate": 1.4596670298626592e-05, "loss": 2.7398, "step": 213800 }, { "epoch": 0.71, "learning_rate": 1.4580111210833619e-05, "loss": 2.7177, "step": 213900 }, { "epoch": 0.71, "learning_rate": 1.4563552123040647e-05, "loss": 2.7336, "step": 214000 }, { "epoch": 0.71, "learning_rate": 1.4546993035247674e-05, "loss": 2.7318, "step": 214100 }, { "epoch": 0.71, "learning_rate": 1.4530433947454703e-05, "loss": 2.7353, "step": 214200 }, { "epoch": 0.71, "learning_rate": 1.4513874859661733e-05, "loss": 2.736, "step": 214300 }, { "epoch": 0.71, "learning_rate": 1.449731577186876e-05, "loss": 2.7365, "step": 214400 }, { "epoch": 0.71, "learning_rate": 1.4480756684075789e-05, "loss": 2.7302, "step": 214500 }, { "epoch": 0.71, "learning_rate": 1.4464197596282816e-05, "loss": 2.7378, "step": 214600 }, { "epoch": 0.71, "learning_rate": 1.4447638508489844e-05, "loss": 2.7422, "step": 214700 }, { "epoch": 0.71, "learning_rate": 1.4431079420696874e-05, "loss": 2.7343, "step": 214800 }, { "epoch": 0.71, "learning_rate": 1.4414520332903903e-05, "loss": 2.743, "step": 214900 }, { "epoch": 0.71, "learning_rate": 1.439796124511093e-05, "loss": 2.7572, "step": 215000 }, { "epoch": 0.71, "learning_rate": 1.4381402157317959e-05, "loss": 2.7367, "step": 215100 }, { "epoch": 0.71, "learning_rate": 1.4364843069524985e-05, "loss": 2.7367, "step": 215200 }, { "epoch": 0.71, "learning_rate": 1.4348283981732016e-05, "loss": 2.7333, "step": 215300 }, { "epoch": 0.71, "learning_rate": 1.4331724893939044e-05, "loss": 2.7396, "step": 215400 }, { "epoch": 0.71, "learning_rate": 1.4315165806146071e-05, "loss": 2.737, "step": 215500 }, { "epoch": 0.71, "learning_rate": 1.42986067183531e-05, "loss": 2.7206, "step": 215600 }, { "epoch": 0.71, "learning_rate": 1.4282047630560127e-05, "loss": 2.7255, "step": 215700 }, { "epoch": 0.71, "learning_rate": 1.4265488542767159e-05, "loss": 2.7462, "step": 215800 }, { "epoch": 0.72, "learning_rate": 1.4248929454974186e-05, "loss": 2.7506, "step": 215900 }, { "epoch": 0.72, "learning_rate": 1.4232370367181214e-05, "loss": 2.7401, "step": 216000 }, { "epoch": 0.72, "learning_rate": 1.4215811279388241e-05, "loss": 2.7385, "step": 216100 }, { "epoch": 0.72, "learning_rate": 1.419925219159527e-05, "loss": 2.7381, "step": 216200 }, { "epoch": 0.72, "learning_rate": 1.41826931038023e-05, "loss": 2.7312, "step": 216300 }, { "epoch": 0.72, "learning_rate": 1.4166134016009327e-05, "loss": 2.7415, "step": 216400 }, { "epoch": 0.72, "learning_rate": 1.4149574928216356e-05, "loss": 2.7345, "step": 216500 }, { "epoch": 0.72, "learning_rate": 1.4133015840423383e-05, "loss": 2.748, "step": 216600 }, { "epoch": 0.72, "learning_rate": 1.4116456752630411e-05, "loss": 2.748, "step": 216700 }, { "epoch": 0.72, "learning_rate": 1.4099897664837441e-05, "loss": 2.7453, "step": 216800 }, { "epoch": 0.72, "learning_rate": 1.408333857704447e-05, "loss": 2.7378, "step": 216900 }, { "epoch": 0.72, "learning_rate": 1.4066779489251497e-05, "loss": 2.7328, "step": 217000 }, { "epoch": 0.72, "learning_rate": 1.4050220401458526e-05, "loss": 2.7334, "step": 217100 }, { "epoch": 0.72, "learning_rate": 1.4033661313665552e-05, "loss": 2.7476, "step": 217200 }, { "epoch": 0.72, "learning_rate": 1.4017102225872583e-05, "loss": 2.7333, "step": 217300 }, { "epoch": 0.72, "learning_rate": 1.4000543138079611e-05, "loss": 2.7417, "step": 217400 }, { "epoch": 0.72, "learning_rate": 1.3983984050286638e-05, "loss": 2.741, "step": 217500 }, { "epoch": 0.72, "learning_rate": 1.3967424962493667e-05, "loss": 2.7236, "step": 217600 }, { "epoch": 0.72, "learning_rate": 1.3950865874700694e-05, "loss": 2.7387, "step": 217700 }, { "epoch": 0.72, "learning_rate": 1.3934306786907724e-05, "loss": 2.7393, "step": 217800 }, { "epoch": 0.72, "learning_rate": 1.3917747699114753e-05, "loss": 2.7202, "step": 217900 }, { "epoch": 0.72, "learning_rate": 1.390118861132178e-05, "loss": 2.744, "step": 218000 }, { "epoch": 0.72, "learning_rate": 1.3884629523528808e-05, "loss": 2.7358, "step": 218100 }, { "epoch": 0.72, "learning_rate": 1.3868070435735835e-05, "loss": 2.7504, "step": 218200 }, { "epoch": 0.72, "learning_rate": 1.3851511347942867e-05, "loss": 2.7312, "step": 218300 }, { "epoch": 0.72, "learning_rate": 1.3834952260149894e-05, "loss": 2.7199, "step": 218400 }, { "epoch": 0.72, "learning_rate": 1.3818393172356923e-05, "loss": 2.7406, "step": 218500 }, { "epoch": 0.72, "learning_rate": 1.380183408456395e-05, "loss": 2.7419, "step": 218600 }, { "epoch": 0.72, "learning_rate": 1.3785274996770978e-05, "loss": 2.742, "step": 218700 }, { "epoch": 0.72, "learning_rate": 1.3768715908978008e-05, "loss": 2.7436, "step": 218800 }, { "epoch": 0.72, "learning_rate": 1.3752156821185035e-05, "loss": 2.7303, "step": 218900 }, { "epoch": 0.73, "learning_rate": 1.3735597733392064e-05, "loss": 2.7314, "step": 219000 }, { "epoch": 0.73, "learning_rate": 1.3719038645599091e-05, "loss": 2.7448, "step": 219100 }, { "epoch": 0.73, "learning_rate": 1.370247955780612e-05, "loss": 2.7358, "step": 219200 }, { "epoch": 0.73, "learning_rate": 1.368592047001315e-05, "loss": 2.7307, "step": 219300 }, { "epoch": 0.73, "learning_rate": 1.3669361382220178e-05, "loss": 2.7211, "step": 219400 }, { "epoch": 0.73, "learning_rate": 1.3652802294427205e-05, "loss": 2.7314, "step": 219500 }, { "epoch": 0.73, "learning_rate": 1.3636243206634234e-05, "loss": 2.7512, "step": 219600 }, { "epoch": 0.73, "learning_rate": 1.361968411884126e-05, "loss": 2.7242, "step": 219700 }, { "epoch": 0.73, "learning_rate": 1.3603125031048291e-05, "loss": 2.7493, "step": 219800 }, { "epoch": 0.73, "learning_rate": 1.358656594325532e-05, "loss": 2.7336, "step": 219900 }, { "epoch": 0.73, "learning_rate": 1.3570006855462347e-05, "loss": 2.7397, "step": 220000 }, { "epoch": 0.73, "learning_rate": 1.3553447767669375e-05, "loss": 2.7303, "step": 220100 }, { "epoch": 0.73, "learning_rate": 1.3536888679876402e-05, "loss": 2.7419, "step": 220200 }, { "epoch": 0.73, "learning_rate": 1.3520329592083432e-05, "loss": 2.7435, "step": 220300 }, { "epoch": 0.73, "learning_rate": 1.3503770504290461e-05, "loss": 2.7441, "step": 220400 }, { "epoch": 0.73, "learning_rate": 1.348721141649749e-05, "loss": 2.7431, "step": 220500 }, { "epoch": 0.73, "learning_rate": 1.3470652328704517e-05, "loss": 2.7252, "step": 220600 }, { "epoch": 0.73, "learning_rate": 1.3454093240911545e-05, "loss": 2.7465, "step": 220700 }, { "epoch": 0.73, "learning_rate": 1.3437534153118576e-05, "loss": 2.747, "step": 220800 }, { "epoch": 0.73, "learning_rate": 1.3420975065325602e-05, "loss": 2.7359, "step": 220900 }, { "epoch": 0.73, "learning_rate": 1.3404415977532631e-05, "loss": 2.7248, "step": 221000 }, { "epoch": 0.73, "learning_rate": 1.3387856889739658e-05, "loss": 2.7419, "step": 221100 }, { "epoch": 0.73, "learning_rate": 1.3371297801946687e-05, "loss": 2.7386, "step": 221200 }, { "epoch": 0.73, "learning_rate": 1.3354738714153717e-05, "loss": 2.7346, "step": 221300 }, { "epoch": 0.73, "learning_rate": 1.3338179626360744e-05, "loss": 2.7413, "step": 221400 }, { "epoch": 0.73, "learning_rate": 1.3321620538567772e-05, "loss": 2.7421, "step": 221500 }, { "epoch": 0.73, "learning_rate": 1.33050614507748e-05, "loss": 2.7441, "step": 221600 }, { "epoch": 0.73, "learning_rate": 1.3288502362981828e-05, "loss": 2.7452, "step": 221700 }, { "epoch": 0.73, "learning_rate": 1.3271943275188858e-05, "loss": 2.7443, "step": 221800 }, { "epoch": 0.73, "learning_rate": 1.3255384187395887e-05, "loss": 2.7384, "step": 221900 }, { "epoch": 0.74, "learning_rate": 1.3238825099602914e-05, "loss": 2.741, "step": 222000 }, { "epoch": 0.74, "learning_rate": 1.3222266011809942e-05, "loss": 2.7404, "step": 222100 }, { "epoch": 0.74, "learning_rate": 1.320570692401697e-05, "loss": 2.7281, "step": 222200 }, { "epoch": 0.74, "learning_rate": 1.3189147836224e-05, "loss": 2.7304, "step": 222300 }, { "epoch": 0.74, "learning_rate": 1.3172588748431028e-05, "loss": 2.7464, "step": 222400 }, { "epoch": 0.74, "learning_rate": 1.3156029660638055e-05, "loss": 2.7167, "step": 222500 }, { "epoch": 0.74, "learning_rate": 1.3139470572845084e-05, "loss": 2.7363, "step": 222600 }, { "epoch": 0.74, "learning_rate": 1.312291148505211e-05, "loss": 2.7286, "step": 222700 }, { "epoch": 0.74, "learning_rate": 1.3106352397259143e-05, "loss": 2.7519, "step": 222800 }, { "epoch": 0.74, "learning_rate": 1.308979330946617e-05, "loss": 2.741, "step": 222900 }, { "epoch": 0.74, "learning_rate": 1.3073234221673198e-05, "loss": 2.7186, "step": 223000 }, { "epoch": 0.74, "learning_rate": 1.3056675133880225e-05, "loss": 2.7306, "step": 223100 }, { "epoch": 0.74, "learning_rate": 1.3040116046087254e-05, "loss": 2.732, "step": 223200 }, { "epoch": 0.74, "learning_rate": 1.3023556958294284e-05, "loss": 2.7292, "step": 223300 }, { "epoch": 0.74, "learning_rate": 1.300699787050131e-05, "loss": 2.7223, "step": 223400 }, { "epoch": 0.74, "learning_rate": 1.299043878270834e-05, "loss": 2.7343, "step": 223500 }, { "epoch": 0.74, "learning_rate": 1.2973879694915366e-05, "loss": 2.7359, "step": 223600 }, { "epoch": 0.74, "learning_rate": 1.2957320607122395e-05, "loss": 2.7215, "step": 223700 }, { "epoch": 0.74, "learning_rate": 1.2940761519329425e-05, "loss": 2.7199, "step": 223800 }, { "epoch": 0.74, "learning_rate": 1.2924202431536452e-05, "loss": 2.7357, "step": 223900 }, { "epoch": 0.74, "learning_rate": 1.290764334374348e-05, "loss": 2.7388, "step": 224000 }, { "epoch": 0.74, "learning_rate": 1.289108425595051e-05, "loss": 2.7297, "step": 224100 }, { "epoch": 0.74, "learning_rate": 1.2874525168157536e-05, "loss": 2.7261, "step": 224200 }, { "epoch": 0.74, "learning_rate": 1.2857966080364567e-05, "loss": 2.7593, "step": 224300 }, { "epoch": 0.74, "learning_rate": 1.2841406992571595e-05, "loss": 2.7388, "step": 224400 }, { "epoch": 0.74, "learning_rate": 1.2824847904778622e-05, "loss": 2.7441, "step": 224500 }, { "epoch": 0.74, "learning_rate": 1.280828881698565e-05, "loss": 2.7375, "step": 224600 }, { "epoch": 0.74, "learning_rate": 1.2791729729192678e-05, "loss": 2.7274, "step": 224700 }, { "epoch": 0.74, "learning_rate": 1.2775170641399708e-05, "loss": 2.7279, "step": 224800 }, { "epoch": 0.74, "learning_rate": 1.2758611553606736e-05, "loss": 2.7245, "step": 224900 }, { "epoch": 0.75, "learning_rate": 1.2742052465813763e-05, "loss": 2.7332, "step": 225000 }, { "epoch": 0.75, "learning_rate": 1.2725493378020792e-05, "loss": 2.7349, "step": 225100 }, { "epoch": 0.75, "learning_rate": 1.2708934290227819e-05, "loss": 2.7183, "step": 225200 }, { "epoch": 0.75, "learning_rate": 1.2692375202434851e-05, "loss": 2.744, "step": 225300 }, { "epoch": 0.75, "learning_rate": 1.2675816114641878e-05, "loss": 2.755, "step": 225400 }, { "epoch": 0.75, "learning_rate": 1.2659257026848906e-05, "loss": 2.7396, "step": 225500 }, { "epoch": 0.75, "learning_rate": 1.2642697939055933e-05, "loss": 2.7201, "step": 225600 }, { "epoch": 0.75, "learning_rate": 1.2626138851262962e-05, "loss": 2.7371, "step": 225700 }, { "epoch": 0.75, "learning_rate": 1.2609579763469992e-05, "loss": 2.7388, "step": 225800 }, { "epoch": 0.75, "learning_rate": 1.2593020675677019e-05, "loss": 2.7232, "step": 225900 }, { "epoch": 0.75, "learning_rate": 1.2576461587884048e-05, "loss": 2.7321, "step": 226000 }, { "epoch": 0.75, "learning_rate": 1.2559902500091075e-05, "loss": 2.7349, "step": 226100 }, { "epoch": 0.75, "learning_rate": 1.2543343412298103e-05, "loss": 2.7497, "step": 226200 }, { "epoch": 0.75, "learning_rate": 1.2526784324505134e-05, "loss": 2.7385, "step": 226300 }, { "epoch": 0.75, "learning_rate": 1.2510225236712162e-05, "loss": 2.7397, "step": 226400 }, { "epoch": 0.75, "learning_rate": 1.2493666148919189e-05, "loss": 2.7345, "step": 226500 }, { "epoch": 0.75, "learning_rate": 1.2477107061126218e-05, "loss": 2.734, "step": 226600 }, { "epoch": 0.75, "learning_rate": 1.2460547973333246e-05, "loss": 2.731, "step": 226700 }, { "epoch": 0.75, "learning_rate": 1.2443988885540273e-05, "loss": 2.7298, "step": 226800 }, { "epoch": 0.75, "learning_rate": 1.2427429797747302e-05, "loss": 2.7303, "step": 226900 }, { "epoch": 0.75, "learning_rate": 1.241087070995433e-05, "loss": 2.7343, "step": 227000 }, { "epoch": 0.75, "learning_rate": 1.2394311622161359e-05, "loss": 2.7504, "step": 227100 }, { "epoch": 0.75, "learning_rate": 1.2377752534368388e-05, "loss": 2.7192, "step": 227200 }, { "epoch": 0.75, "learning_rate": 1.2361193446575416e-05, "loss": 2.7333, "step": 227300 }, { "epoch": 0.75, "learning_rate": 1.2344634358782443e-05, "loss": 2.7464, "step": 227400 }, { "epoch": 0.75, "learning_rate": 1.2328075270989472e-05, "loss": 2.7402, "step": 227500 }, { "epoch": 0.75, "learning_rate": 1.23115161831965e-05, "loss": 2.7246, "step": 227600 }, { "epoch": 0.75, "learning_rate": 1.2294957095403529e-05, "loss": 2.7297, "step": 227700 }, { "epoch": 0.75, "learning_rate": 1.2278398007610558e-05, "loss": 2.738, "step": 227800 }, { "epoch": 0.75, "learning_rate": 1.2261838919817584e-05, "loss": 2.7303, "step": 227900 }, { "epoch": 0.76, "learning_rate": 1.2245279832024615e-05, "loss": 2.7445, "step": 228000 }, { "epoch": 0.76, "learning_rate": 1.2228720744231642e-05, "loss": 2.7405, "step": 228100 }, { "epoch": 0.76, "learning_rate": 1.2212161656438672e-05, "loss": 2.7304, "step": 228200 }, { "epoch": 0.76, "learning_rate": 1.2195602568645699e-05, "loss": 2.723, "step": 228300 }, { "epoch": 0.76, "learning_rate": 1.2179043480852728e-05, "loss": 2.7344, "step": 228400 }, { "epoch": 0.76, "learning_rate": 1.2162484393059756e-05, "loss": 2.7267, "step": 228500 }, { "epoch": 0.76, "learning_rate": 1.2145925305266783e-05, "loss": 2.7427, "step": 228600 }, { "epoch": 0.76, "learning_rate": 1.2129366217473813e-05, "loss": 2.7328, "step": 228700 }, { "epoch": 0.76, "learning_rate": 1.211280712968084e-05, "loss": 2.7359, "step": 228800 }, { "epoch": 0.76, "learning_rate": 1.2096248041887869e-05, "loss": 2.7302, "step": 228900 }, { "epoch": 0.76, "learning_rate": 1.2079688954094897e-05, "loss": 2.742, "step": 229000 }, { "epoch": 0.76, "learning_rate": 1.2063129866301926e-05, "loss": 2.7364, "step": 229100 }, { "epoch": 0.76, "learning_rate": 1.2046570778508955e-05, "loss": 2.732, "step": 229200 }, { "epoch": 0.76, "learning_rate": 1.2030011690715982e-05, "loss": 2.7296, "step": 229300 }, { "epoch": 0.76, "learning_rate": 1.201345260292301e-05, "loss": 2.728, "step": 229400 }, { "epoch": 0.76, "learning_rate": 1.1996893515130039e-05, "loss": 2.7314, "step": 229500 }, { "epoch": 0.76, "learning_rate": 1.1980334427337067e-05, "loss": 2.7189, "step": 229600 }, { "epoch": 0.76, "learning_rate": 1.1963775339544096e-05, "loss": 2.7374, "step": 229700 }, { "epoch": 0.76, "learning_rate": 1.1947216251751125e-05, "loss": 2.7278, "step": 229800 }, { "epoch": 0.76, "learning_rate": 1.1930657163958152e-05, "loss": 2.7257, "step": 229900 }, { "epoch": 0.76, "learning_rate": 1.1914098076165182e-05, "loss": 2.731, "step": 230000 }, { "epoch": 0.76, "learning_rate": 1.1897538988372209e-05, "loss": 2.7389, "step": 230100 }, { "epoch": 0.76, "learning_rate": 1.1880979900579237e-05, "loss": 2.7379, "step": 230200 }, { "epoch": 0.76, "learning_rate": 1.1864420812786266e-05, "loss": 2.7315, "step": 230300 }, { "epoch": 0.76, "learning_rate": 1.1847861724993293e-05, "loss": 2.7324, "step": 230400 }, { "epoch": 0.76, "learning_rate": 1.1831302637200323e-05, "loss": 2.7426, "step": 230500 }, { "epoch": 0.76, "learning_rate": 1.181474354940735e-05, "loss": 2.7392, "step": 230600 }, { "epoch": 0.76, "learning_rate": 1.179818446161438e-05, "loss": 2.7276, "step": 230700 }, { "epoch": 0.76, "learning_rate": 1.1781625373821407e-05, "loss": 2.7275, "step": 230800 }, { "epoch": 0.76, "learning_rate": 1.1765066286028436e-05, "loss": 2.7456, "step": 230900 }, { "epoch": 0.77, "learning_rate": 1.1748507198235464e-05, "loss": 2.7287, "step": 231000 }, { "epoch": 0.77, "learning_rate": 1.1731948110442491e-05, "loss": 2.7375, "step": 231100 }, { "epoch": 0.77, "learning_rate": 1.1715389022649522e-05, "loss": 2.7393, "step": 231200 }, { "epoch": 0.77, "learning_rate": 1.1698829934856549e-05, "loss": 2.7384, "step": 231300 }, { "epoch": 0.77, "learning_rate": 1.1682270847063577e-05, "loss": 2.7277, "step": 231400 }, { "epoch": 0.77, "learning_rate": 1.1665711759270606e-05, "loss": 2.7267, "step": 231500 }, { "epoch": 0.77, "learning_rate": 1.1649152671477634e-05, "loss": 2.7496, "step": 231600 }, { "epoch": 0.77, "learning_rate": 1.1632593583684663e-05, "loss": 2.7337, "step": 231700 }, { "epoch": 0.77, "learning_rate": 1.1616034495891692e-05, "loss": 2.7402, "step": 231800 }, { "epoch": 0.77, "learning_rate": 1.1599475408098719e-05, "loss": 2.7322, "step": 231900 }, { "epoch": 0.77, "learning_rate": 1.1582916320305747e-05, "loss": 2.7449, "step": 232000 }, { "epoch": 0.77, "learning_rate": 1.1566357232512776e-05, "loss": 2.717, "step": 232100 }, { "epoch": 0.77, "learning_rate": 1.1549798144719804e-05, "loss": 2.721, "step": 232200 }, { "epoch": 0.77, "learning_rate": 1.1533239056926833e-05, "loss": 2.7512, "step": 232300 }, { "epoch": 0.77, "learning_rate": 1.151667996913386e-05, "loss": 2.7197, "step": 232400 }, { "epoch": 0.77, "learning_rate": 1.150012088134089e-05, "loss": 2.7312, "step": 232500 }, { "epoch": 0.77, "learning_rate": 1.1483561793547917e-05, "loss": 2.7292, "step": 232600 }, { "epoch": 0.77, "learning_rate": 1.1467002705754946e-05, "loss": 2.7392, "step": 232700 }, { "epoch": 0.77, "learning_rate": 1.1450443617961974e-05, "loss": 2.7247, "step": 232800 }, { "epoch": 0.77, "learning_rate": 1.1433884530169001e-05, "loss": 2.7287, "step": 232900 }, { "epoch": 0.77, "learning_rate": 1.1417325442376031e-05, "loss": 2.7429, "step": 233000 }, { "epoch": 0.77, "learning_rate": 1.1400766354583058e-05, "loss": 2.7295, "step": 233100 }, { "epoch": 0.77, "learning_rate": 1.1384207266790089e-05, "loss": 2.7364, "step": 233200 }, { "epoch": 0.77, "learning_rate": 1.1367648178997116e-05, "loss": 2.7233, "step": 233300 }, { "epoch": 0.77, "learning_rate": 1.1351089091204144e-05, "loss": 2.7265, "step": 233400 }, { "epoch": 0.77, "learning_rate": 1.1334530003411173e-05, "loss": 2.7441, "step": 233500 }, { "epoch": 0.77, "learning_rate": 1.1317970915618201e-05, "loss": 2.7349, "step": 233600 }, { "epoch": 0.77, "learning_rate": 1.130141182782523e-05, "loss": 2.7382, "step": 233700 }, { "epoch": 0.77, "learning_rate": 1.1284852740032257e-05, "loss": 2.7291, "step": 233800 }, { "epoch": 0.77, "learning_rate": 1.1268293652239286e-05, "loss": 2.7563, "step": 233900 }, { "epoch": 0.77, "learning_rate": 1.1251734564446314e-05, "loss": 2.7223, "step": 234000 }, { "epoch": 0.78, "learning_rate": 1.1235175476653343e-05, "loss": 2.7279, "step": 234100 }, { "epoch": 0.78, "learning_rate": 1.1218616388860371e-05, "loss": 2.744, "step": 234200 }, { "epoch": 0.78, "learning_rate": 1.12020573010674e-05, "loss": 2.727, "step": 234300 }, { "epoch": 0.78, "learning_rate": 1.1185498213274427e-05, "loss": 2.7376, "step": 234400 }, { "epoch": 0.78, "learning_rate": 1.1168939125481455e-05, "loss": 2.7504, "step": 234500 }, { "epoch": 0.78, "learning_rate": 1.1152380037688484e-05, "loss": 2.7306, "step": 234600 }, { "epoch": 0.78, "learning_rate": 1.1135820949895513e-05, "loss": 2.7442, "step": 234700 }, { "epoch": 0.78, "learning_rate": 1.1119261862102541e-05, "loss": 2.746, "step": 234800 }, { "epoch": 0.78, "learning_rate": 1.1102702774309568e-05, "loss": 2.7402, "step": 234900 }, { "epoch": 0.78, "learning_rate": 1.1086143686516599e-05, "loss": 2.7389, "step": 235000 }, { "epoch": 0.78, "learning_rate": 1.1069584598723625e-05, "loss": 2.7335, "step": 235100 }, { "epoch": 0.78, "learning_rate": 1.1053025510930656e-05, "loss": 2.7338, "step": 235200 }, { "epoch": 0.78, "learning_rate": 1.1036466423137683e-05, "loss": 2.7424, "step": 235300 }, { "epoch": 0.78, "learning_rate": 1.1019907335344711e-05, "loss": 2.74, "step": 235400 }, { "epoch": 0.78, "learning_rate": 1.100334824755174e-05, "loss": 2.7222, "step": 235500 }, { "epoch": 0.78, "learning_rate": 1.0986789159758767e-05, "loss": 2.741, "step": 235600 }, { "epoch": 0.78, "learning_rate": 1.0970230071965797e-05, "loss": 2.7255, "step": 235700 }, { "epoch": 0.78, "learning_rate": 1.0953670984172824e-05, "loss": 2.7184, "step": 235800 }, { "epoch": 0.78, "learning_rate": 1.0937111896379853e-05, "loss": 2.728, "step": 235900 }, { "epoch": 0.78, "learning_rate": 1.0920552808586881e-05, "loss": 2.7346, "step": 236000 }, { "epoch": 0.78, "learning_rate": 1.090399372079391e-05, "loss": 2.7221, "step": 236100 }, { "epoch": 0.78, "learning_rate": 1.0887434633000938e-05, "loss": 2.7363, "step": 236200 }, { "epoch": 0.78, "learning_rate": 1.0870875545207965e-05, "loss": 2.7295, "step": 236300 }, { "epoch": 0.78, "learning_rate": 1.0854316457414994e-05, "loss": 2.7392, "step": 236400 }, { "epoch": 0.78, "learning_rate": 1.0837757369622023e-05, "loss": 2.7505, "step": 236500 }, { "epoch": 0.78, "learning_rate": 1.0821198281829051e-05, "loss": 2.7302, "step": 236600 }, { "epoch": 0.78, "learning_rate": 1.080463919403608e-05, "loss": 2.7467, "step": 236700 }, { "epoch": 0.78, "learning_rate": 1.0788080106243108e-05, "loss": 2.749, "step": 236800 }, { "epoch": 0.78, "learning_rate": 1.0771521018450135e-05, "loss": 2.7397, "step": 236900 }, { "epoch": 0.78, "learning_rate": 1.0754961930657166e-05, "loss": 2.7338, "step": 237000 }, { "epoch": 0.79, "learning_rate": 1.0738402842864192e-05, "loss": 2.7321, "step": 237100 }, { "epoch": 0.79, "learning_rate": 1.0721843755071221e-05, "loss": 2.7236, "step": 237200 }, { "epoch": 0.79, "learning_rate": 1.070528466727825e-05, "loss": 2.7381, "step": 237300 }, { "epoch": 0.79, "learning_rate": 1.0688725579485277e-05, "loss": 2.7401, "step": 237400 }, { "epoch": 0.79, "learning_rate": 1.0672166491692307e-05, "loss": 2.7288, "step": 237500 }, { "epoch": 0.79, "learning_rate": 1.0655607403899334e-05, "loss": 2.7309, "step": 237600 }, { "epoch": 0.79, "learning_rate": 1.0639048316106364e-05, "loss": 2.7342, "step": 237700 }, { "epoch": 0.79, "learning_rate": 1.0622489228313391e-05, "loss": 2.7188, "step": 237800 }, { "epoch": 0.79, "learning_rate": 1.060593014052042e-05, "loss": 2.733, "step": 237900 }, { "epoch": 0.79, "learning_rate": 1.0589371052727448e-05, "loss": 2.7289, "step": 238000 }, { "epoch": 0.79, "learning_rate": 1.0572811964934475e-05, "loss": 2.7486, "step": 238100 }, { "epoch": 0.79, "learning_rate": 1.0556252877141505e-05, "loss": 2.7332, "step": 238200 }, { "epoch": 0.79, "learning_rate": 1.0539693789348532e-05, "loss": 2.7219, "step": 238300 }, { "epoch": 0.79, "learning_rate": 1.0523134701555561e-05, "loss": 2.7237, "step": 238400 }, { "epoch": 0.79, "learning_rate": 1.050657561376259e-05, "loss": 2.7207, "step": 238500 }, { "epoch": 0.79, "learning_rate": 1.0490016525969618e-05, "loss": 2.7311, "step": 238600 }, { "epoch": 0.79, "learning_rate": 1.0473457438176647e-05, "loss": 2.72, "step": 238700 }, { "epoch": 0.79, "learning_rate": 1.0456898350383675e-05, "loss": 2.7227, "step": 238800 }, { "epoch": 0.79, "learning_rate": 1.0440339262590702e-05, "loss": 2.7341, "step": 238900 }, { "epoch": 0.79, "learning_rate": 1.0423780174797731e-05, "loss": 2.7221, "step": 239000 }, { "epoch": 0.79, "learning_rate": 1.040722108700476e-05, "loss": 2.7184, "step": 239100 }, { "epoch": 0.79, "learning_rate": 1.0390661999211788e-05, "loss": 2.7403, "step": 239200 }, { "epoch": 0.79, "learning_rate": 1.0374102911418817e-05, "loss": 2.7283, "step": 239300 }, { "epoch": 0.79, "learning_rate": 1.0357543823625844e-05, "loss": 2.7418, "step": 239400 }, { "epoch": 0.79, "learning_rate": 1.0340984735832874e-05, "loss": 2.7326, "step": 239500 }, { "epoch": 0.79, "learning_rate": 1.03244256480399e-05, "loss": 2.724, "step": 239600 }, { "epoch": 0.79, "learning_rate": 1.030786656024693e-05, "loss": 2.7366, "step": 239700 }, { "epoch": 0.79, "learning_rate": 1.0291307472453958e-05, "loss": 2.7332, "step": 239800 }, { "epoch": 0.79, "learning_rate": 1.0274748384660985e-05, "loss": 2.7303, "step": 239900 }, { "epoch": 0.79, "learning_rate": 1.0258189296868015e-05, "loss": 2.7411, "step": 240000 }, { "epoch": 0.8, "learning_rate": 1.0241630209075042e-05, "loss": 2.7556, "step": 240100 }, { "epoch": 0.8, "learning_rate": 1.0225071121282072e-05, "loss": 2.7342, "step": 240200 }, { "epoch": 0.8, "learning_rate": 1.02085120334891e-05, "loss": 2.7435, "step": 240300 }, { "epoch": 0.8, "learning_rate": 1.0191952945696128e-05, "loss": 2.7391, "step": 240400 }, { "epoch": 0.8, "learning_rate": 1.0175393857903157e-05, "loss": 2.747, "step": 240500 }, { "epoch": 0.8, "learning_rate": 1.0158834770110185e-05, "loss": 2.7265, "step": 240600 }, { "epoch": 0.8, "learning_rate": 1.0142275682317214e-05, "loss": 2.7361, "step": 240700 }, { "epoch": 0.8, "learning_rate": 1.012571659452424e-05, "loss": 2.7358, "step": 240800 }, { "epoch": 0.8, "learning_rate": 1.010915750673127e-05, "loss": 2.7215, "step": 240900 }, { "epoch": 0.8, "learning_rate": 1.0092598418938298e-05, "loss": 2.7271, "step": 241000 }, { "epoch": 0.8, "learning_rate": 1.0076039331145327e-05, "loss": 2.7506, "step": 241100 }, { "epoch": 0.8, "learning_rate": 1.0059480243352355e-05, "loss": 2.7365, "step": 241200 }, { "epoch": 0.8, "learning_rate": 1.0042921155559384e-05, "loss": 2.7371, "step": 241300 }, { "epoch": 0.8, "learning_rate": 1.002636206776641e-05, "loss": 2.7289, "step": 241400 }, { "epoch": 0.8, "learning_rate": 1.000980297997344e-05, "loss": 2.7282, "step": 241500 }, { "epoch": 0.8, "learning_rate": 9.993243892180468e-06, "loss": 2.7335, "step": 241600 }, { "epoch": 0.8, "learning_rate": 9.976684804387496e-06, "loss": 2.7417, "step": 241700 }, { "epoch": 0.8, "learning_rate": 9.960125716594525e-06, "loss": 2.7407, "step": 241800 }, { "epoch": 0.8, "learning_rate": 9.943566628801552e-06, "loss": 2.7359, "step": 241900 }, { "epoch": 0.8, "learning_rate": 9.927007541008582e-06, "loss": 2.7315, "step": 242000 }, { "epoch": 0.8, "learning_rate": 9.91044845321561e-06, "loss": 2.7384, "step": 242100 }, { "epoch": 0.8, "learning_rate": 9.893889365422638e-06, "loss": 2.7202, "step": 242200 }, { "epoch": 0.8, "learning_rate": 9.877330277629666e-06, "loss": 2.7463, "step": 242300 }, { "epoch": 0.8, "learning_rate": 9.860771189836695e-06, "loss": 2.7396, "step": 242400 }, { "epoch": 0.8, "learning_rate": 9.844212102043724e-06, "loss": 2.7273, "step": 242500 }, { "epoch": 0.8, "learning_rate": 9.82765301425075e-06, "loss": 2.7276, "step": 242600 }, { "epoch": 0.8, "learning_rate": 9.81109392645778e-06, "loss": 2.7437, "step": 242700 }, { "epoch": 0.8, "learning_rate": 9.794534838664808e-06, "loss": 2.7189, "step": 242800 }, { "epoch": 0.8, "learning_rate": 9.777975750871836e-06, "loss": 2.7196, "step": 242900 }, { "epoch": 0.8, "learning_rate": 9.761416663078865e-06, "loss": 2.7228, "step": 243000 }, { "epoch": 0.81, "learning_rate": 9.744857575285894e-06, "loss": 2.7269, "step": 243100 }, { "epoch": 0.81, "learning_rate": 9.728298487492922e-06, "loss": 2.7313, "step": 243200 }, { "epoch": 0.81, "learning_rate": 9.711739399699949e-06, "loss": 2.7326, "step": 243300 }, { "epoch": 0.81, "learning_rate": 9.695180311906978e-06, "loss": 2.7212, "step": 243400 }, { "epoch": 0.81, "learning_rate": 9.678621224114006e-06, "loss": 2.7277, "step": 243500 }, { "epoch": 0.81, "learning_rate": 9.662062136321035e-06, "loss": 2.7364, "step": 243600 }, { "epoch": 0.81, "learning_rate": 9.645503048528063e-06, "loss": 2.7339, "step": 243700 }, { "epoch": 0.81, "learning_rate": 9.628943960735092e-06, "loss": 2.7141, "step": 243800 }, { "epoch": 0.81, "learning_rate": 9.612384872942119e-06, "loss": 2.7451, "step": 243900 }, { "epoch": 0.81, "learning_rate": 9.595825785149148e-06, "loss": 2.7415, "step": 244000 }, { "epoch": 0.81, "learning_rate": 9.579266697356176e-06, "loss": 2.7323, "step": 244100 }, { "epoch": 0.81, "learning_rate": 9.562707609563205e-06, "loss": 2.7279, "step": 244200 }, { "epoch": 0.81, "learning_rate": 9.546148521770233e-06, "loss": 2.7405, "step": 244300 }, { "epoch": 0.81, "learning_rate": 9.52958943397726e-06, "loss": 2.7326, "step": 244400 }, { "epoch": 0.81, "learning_rate": 9.51303034618429e-06, "loss": 2.7187, "step": 244500 }, { "epoch": 0.81, "learning_rate": 9.496471258391318e-06, "loss": 2.7227, "step": 244600 }, { "epoch": 0.81, "learning_rate": 9.479912170598348e-06, "loss": 2.715, "step": 244700 }, { "epoch": 0.81, "learning_rate": 9.463353082805375e-06, "loss": 2.7438, "step": 244800 }, { "epoch": 0.81, "learning_rate": 9.446793995012403e-06, "loss": 2.737, "step": 244900 }, { "epoch": 0.81, "learning_rate": 9.430234907219432e-06, "loss": 2.7309, "step": 245000 }, { "epoch": 0.81, "learning_rate": 9.413675819426459e-06, "loss": 2.7345, "step": 245100 }, { "epoch": 0.81, "learning_rate": 9.39711673163349e-06, "loss": 2.732, "step": 245200 }, { "epoch": 0.81, "learning_rate": 9.380557643840516e-06, "loss": 2.7364, "step": 245300 }, { "epoch": 0.81, "learning_rate": 9.363998556047545e-06, "loss": 2.7486, "step": 245400 }, { "epoch": 0.81, "learning_rate": 9.347439468254573e-06, "loss": 2.715, "step": 245500 }, { "epoch": 0.81, "learning_rate": 9.330880380461602e-06, "loss": 2.7204, "step": 245600 }, { "epoch": 0.81, "learning_rate": 9.31432129266863e-06, "loss": 2.7261, "step": 245700 }, { "epoch": 0.81, "learning_rate": 9.297762204875657e-06, "loss": 2.7294, "step": 245800 }, { "epoch": 0.81, "learning_rate": 9.281203117082686e-06, "loss": 2.7232, "step": 245900 }, { "epoch": 0.81, "learning_rate": 9.264644029289715e-06, "loss": 2.7211, "step": 246000 }, { "epoch": 0.82, "learning_rate": 9.248084941496743e-06, "loss": 2.7481, "step": 246100 }, { "epoch": 0.82, "learning_rate": 9.231525853703772e-06, "loss": 2.7316, "step": 246200 }, { "epoch": 0.82, "learning_rate": 9.2149667659108e-06, "loss": 2.7335, "step": 246300 }, { "epoch": 0.82, "learning_rate": 9.198407678117827e-06, "loss": 2.7222, "step": 246400 }, { "epoch": 0.82, "learning_rate": 9.181848590324858e-06, "loss": 2.7335, "step": 246500 }, { "epoch": 0.82, "learning_rate": 9.165289502531885e-06, "loss": 2.7255, "step": 246600 }, { "epoch": 0.82, "learning_rate": 9.148730414738913e-06, "loss": 2.7286, "step": 246700 }, { "epoch": 0.82, "learning_rate": 9.132171326945942e-06, "loss": 2.724, "step": 246800 }, { "epoch": 0.82, "learning_rate": 9.115612239152969e-06, "loss": 2.7224, "step": 246900 }, { "epoch": 0.82, "learning_rate": 9.099053151359999e-06, "loss": 2.7295, "step": 247000 }, { "epoch": 0.82, "learning_rate": 9.082494063567026e-06, "loss": 2.7223, "step": 247100 }, { "epoch": 0.82, "learning_rate": 9.065934975774056e-06, "loss": 2.7345, "step": 247200 }, { "epoch": 0.82, "learning_rate": 9.049375887981083e-06, "loss": 2.7282, "step": 247300 }, { "epoch": 0.82, "learning_rate": 9.032816800188112e-06, "loss": 2.7213, "step": 247400 }, { "epoch": 0.82, "learning_rate": 9.01625771239514e-06, "loss": 2.7421, "step": 247500 }, { "epoch": 0.82, "learning_rate": 8.999698624602167e-06, "loss": 2.7305, "step": 247600 }, { "epoch": 0.82, "learning_rate": 8.983139536809198e-06, "loss": 2.7322, "step": 247700 }, { "epoch": 0.82, "learning_rate": 8.966580449016224e-06, "loss": 2.7298, "step": 247800 }, { "epoch": 0.82, "learning_rate": 8.950021361223253e-06, "loss": 2.7287, "step": 247900 }, { "epoch": 0.82, "learning_rate": 8.933462273430282e-06, "loss": 2.7343, "step": 248000 }, { "epoch": 0.82, "learning_rate": 8.91690318563731e-06, "loss": 2.7329, "step": 248100 }, { "epoch": 0.82, "learning_rate": 8.900344097844339e-06, "loss": 2.7402, "step": 248200 }, { "epoch": 0.82, "learning_rate": 8.883785010051367e-06, "loss": 2.7289, "step": 248300 }, { "epoch": 0.82, "learning_rate": 8.867225922258394e-06, "loss": 2.7376, "step": 248400 }, { "epoch": 0.82, "learning_rate": 8.850666834465423e-06, "loss": 2.7236, "step": 248500 }, { "epoch": 0.82, "learning_rate": 8.834107746672452e-06, "loss": 2.728, "step": 248600 }, { "epoch": 0.82, "learning_rate": 8.81754865887948e-06, "loss": 2.7391, "step": 248700 }, { "epoch": 0.82, "learning_rate": 8.800989571086509e-06, "loss": 2.7361, "step": 248800 }, { "epoch": 0.82, "learning_rate": 8.784430483293536e-06, "loss": 2.75, "step": 248900 }, { "epoch": 0.82, "learning_rate": 8.767871395500566e-06, "loss": 2.7213, "step": 249000 }, { "epoch": 0.82, "learning_rate": 8.751312307707593e-06, "loss": 2.7327, "step": 249100 }, { "epoch": 0.83, "learning_rate": 8.734753219914622e-06, "loss": 2.7299, "step": 249200 }, { "epoch": 0.83, "learning_rate": 8.71819413212165e-06, "loss": 2.722, "step": 249300 }, { "epoch": 0.83, "learning_rate": 8.701635044328677e-06, "loss": 2.7191, "step": 249400 }, { "epoch": 0.83, "learning_rate": 8.685075956535707e-06, "loss": 2.7378, "step": 249500 }, { "epoch": 0.83, "learning_rate": 8.668516868742734e-06, "loss": 2.7283, "step": 249600 }, { "epoch": 0.83, "learning_rate": 8.651957780949765e-06, "loss": 2.7348, "step": 249700 }, { "epoch": 0.83, "learning_rate": 8.635398693156791e-06, "loss": 2.7295, "step": 249800 }, { "epoch": 0.83, "learning_rate": 8.61883960536382e-06, "loss": 2.7287, "step": 249900 }, { "epoch": 0.83, "learning_rate": 8.602280517570849e-06, "loss": 2.7304, "step": 250000 }, { "epoch": 0.83, "learning_rate": 8.585721429777877e-06, "loss": 2.7209, "step": 250100 }, { "epoch": 0.83, "learning_rate": 8.569162341984906e-06, "loss": 2.7467, "step": 250200 }, { "epoch": 0.83, "learning_rate": 8.552603254191933e-06, "loss": 2.7398, "step": 250300 }, { "epoch": 0.83, "learning_rate": 8.536044166398961e-06, "loss": 2.7311, "step": 250400 }, { "epoch": 0.83, "learning_rate": 8.51948507860599e-06, "loss": 2.7132, "step": 250500 }, { "epoch": 0.83, "learning_rate": 8.502925990813019e-06, "loss": 2.727, "step": 250600 }, { "epoch": 0.83, "learning_rate": 8.486366903020047e-06, "loss": 2.7394, "step": 250700 }, { "epoch": 0.83, "learning_rate": 8.469807815227076e-06, "loss": 2.7202, "step": 250800 }, { "epoch": 0.83, "learning_rate": 8.453248727434103e-06, "loss": 2.7196, "step": 250900 }, { "epoch": 0.83, "learning_rate": 8.436689639641131e-06, "loss": 2.7282, "step": 251000 }, { "epoch": 0.83, "learning_rate": 8.42013055184816e-06, "loss": 2.725, "step": 251100 }, { "epoch": 0.83, "learning_rate": 8.403571464055189e-06, "loss": 2.7321, "step": 251200 }, { "epoch": 0.83, "learning_rate": 8.387012376262217e-06, "loss": 2.7328, "step": 251300 }, { "epoch": 0.83, "learning_rate": 8.370453288469244e-06, "loss": 2.7245, "step": 251400 }, { "epoch": 0.83, "learning_rate": 8.353894200676274e-06, "loss": 2.7153, "step": 251500 }, { "epoch": 0.83, "learning_rate": 8.337335112883301e-06, "loss": 2.7212, "step": 251600 }, { "epoch": 0.83, "learning_rate": 8.320776025090332e-06, "loss": 2.7287, "step": 251700 }, { "epoch": 0.83, "learning_rate": 8.304216937297359e-06, "loss": 2.7498, "step": 251800 }, { "epoch": 0.83, "learning_rate": 8.287657849504387e-06, "loss": 2.7277, "step": 251900 }, { "epoch": 0.83, "learning_rate": 8.271098761711416e-06, "loss": 2.7236, "step": 252000 }, { "epoch": 0.83, "learning_rate": 8.254539673918443e-06, "loss": 2.747, "step": 252100 }, { "epoch": 0.84, "learning_rate": 8.237980586125473e-06, "loss": 2.7302, "step": 252200 }, { "epoch": 0.84, "learning_rate": 8.2214214983325e-06, "loss": 2.7151, "step": 252300 }, { "epoch": 0.84, "learning_rate": 8.204862410539528e-06, "loss": 2.7216, "step": 252400 }, { "epoch": 0.84, "learning_rate": 8.188303322746557e-06, "loss": 2.7344, "step": 252500 }, { "epoch": 0.84, "learning_rate": 8.171744234953586e-06, "loss": 2.7268, "step": 252600 }, { "epoch": 0.84, "learning_rate": 8.155185147160614e-06, "loss": 2.7449, "step": 252700 }, { "epoch": 0.84, "learning_rate": 8.138626059367641e-06, "loss": 2.7301, "step": 252800 }, { "epoch": 0.84, "learning_rate": 8.12206697157467e-06, "loss": 2.7315, "step": 252900 }, { "epoch": 0.84, "learning_rate": 8.105507883781698e-06, "loss": 2.7146, "step": 253000 }, { "epoch": 0.84, "learning_rate": 8.088948795988727e-06, "loss": 2.7296, "step": 253100 }, { "epoch": 0.84, "learning_rate": 8.072389708195756e-06, "loss": 2.7314, "step": 253200 }, { "epoch": 0.84, "learning_rate": 8.055830620402784e-06, "loss": 2.7356, "step": 253300 }, { "epoch": 0.84, "learning_rate": 8.039271532609811e-06, "loss": 2.7349, "step": 253400 }, { "epoch": 0.84, "learning_rate": 8.022712444816841e-06, "loss": 2.7229, "step": 253500 }, { "epoch": 0.84, "learning_rate": 8.006153357023868e-06, "loss": 2.7295, "step": 253600 }, { "epoch": 0.84, "learning_rate": 7.989594269230897e-06, "loss": 2.7299, "step": 253700 }, { "epoch": 0.84, "learning_rate": 7.973035181437926e-06, "loss": 2.7343, "step": 253800 }, { "epoch": 0.84, "learning_rate": 7.956476093644952e-06, "loss": 2.72, "step": 253900 }, { "epoch": 0.84, "learning_rate": 7.939917005851983e-06, "loss": 2.7371, "step": 254000 }, { "epoch": 0.84, "learning_rate": 7.92335791805901e-06, "loss": 2.7391, "step": 254100 }, { "epoch": 0.84, "learning_rate": 7.90679883026604e-06, "loss": 2.7278, "step": 254200 }, { "epoch": 0.84, "learning_rate": 7.890239742473067e-06, "loss": 2.7396, "step": 254300 }, { "epoch": 0.84, "learning_rate": 7.873680654680095e-06, "loss": 2.7233, "step": 254400 }, { "epoch": 0.84, "learning_rate": 7.857121566887124e-06, "loss": 2.7259, "step": 254500 }, { "epoch": 0.84, "learning_rate": 7.840562479094151e-06, "loss": 2.7401, "step": 254600 }, { "epoch": 0.84, "learning_rate": 7.824003391301181e-06, "loss": 2.7365, "step": 254700 }, { "epoch": 0.84, "learning_rate": 7.807444303508208e-06, "loss": 2.7104, "step": 254800 }, { "epoch": 0.84, "learning_rate": 7.790885215715237e-06, "loss": 2.7307, "step": 254900 }, { "epoch": 0.84, "learning_rate": 7.774326127922265e-06, "loss": 2.7335, "step": 255000 }, { "epoch": 0.84, "learning_rate": 7.757767040129294e-06, "loss": 2.7234, "step": 255100 }, { "epoch": 0.85, "learning_rate": 7.741207952336323e-06, "loss": 2.7422, "step": 255200 }, { "epoch": 0.85, "learning_rate": 7.724648864543351e-06, "loss": 2.7418, "step": 255300 }, { "epoch": 0.85, "learning_rate": 7.708089776750378e-06, "loss": 2.7111, "step": 255400 }, { "epoch": 0.85, "learning_rate": 7.691530688957407e-06, "loss": 2.7152, "step": 255500 }, { "epoch": 0.85, "learning_rate": 7.674971601164435e-06, "loss": 2.7339, "step": 255600 }, { "epoch": 0.85, "learning_rate": 7.658412513371464e-06, "loss": 2.7347, "step": 255700 }, { "epoch": 0.85, "learning_rate": 7.641853425578493e-06, "loss": 2.7201, "step": 255800 }, { "epoch": 0.85, "learning_rate": 7.6252943377855195e-06, "loss": 2.7186, "step": 255900 }, { "epoch": 0.85, "learning_rate": 7.608735249992549e-06, "loss": 2.7346, "step": 256000 }, { "epoch": 0.85, "learning_rate": 7.592176162199577e-06, "loss": 2.7375, "step": 256100 }, { "epoch": 0.85, "learning_rate": 7.575617074406606e-06, "loss": 2.716, "step": 256200 }, { "epoch": 0.85, "learning_rate": 7.559057986613634e-06, "loss": 2.7346, "step": 256300 }, { "epoch": 0.85, "learning_rate": 7.542498898820662e-06, "loss": 2.7283, "step": 256400 }, { "epoch": 0.85, "learning_rate": 7.525939811027691e-06, "loss": 2.7511, "step": 256500 }, { "epoch": 0.85, "learning_rate": 7.509380723234719e-06, "loss": 2.7392, "step": 256600 }, { "epoch": 0.85, "learning_rate": 7.4928216354417475e-06, "loss": 2.7376, "step": 256700 }, { "epoch": 0.85, "learning_rate": 7.476262547648775e-06, "loss": 2.7439, "step": 256800 }, { "epoch": 0.85, "learning_rate": 7.459703459855803e-06, "loss": 2.728, "step": 256900 }, { "epoch": 0.85, "learning_rate": 7.4431443720628324e-06, "loss": 2.7225, "step": 257000 }, { "epoch": 0.85, "learning_rate": 7.42658528426986e-06, "loss": 2.7322, "step": 257100 }, { "epoch": 0.85, "learning_rate": 7.41002619647689e-06, "loss": 2.7183, "step": 257200 }, { "epoch": 0.85, "learning_rate": 7.393467108683917e-06, "loss": 2.7408, "step": 257300 }, { "epoch": 0.85, "learning_rate": 7.376908020890945e-06, "loss": 2.7228, "step": 257400 }, { "epoch": 0.85, "learning_rate": 7.360348933097974e-06, "loss": 2.7164, "step": 257500 }, { "epoch": 0.85, "learning_rate": 7.3437898453050015e-06, "loss": 2.7168, "step": 257600 }, { "epoch": 0.85, "learning_rate": 7.327230757512031e-06, "loss": 2.7279, "step": 257700 }, { "epoch": 0.85, "learning_rate": 7.310671669719059e-06, "loss": 2.7405, "step": 257800 }, { "epoch": 0.85, "learning_rate": 7.2941125819260865e-06, "loss": 2.7341, "step": 257900 }, { "epoch": 0.85, "learning_rate": 7.277553494133116e-06, "loss": 2.7095, "step": 258000 }, { "epoch": 0.85, "learning_rate": 7.260994406340144e-06, "loss": 2.7196, "step": 258100 }, { "epoch": 0.86, "learning_rate": 7.244435318547173e-06, "loss": 2.7109, "step": 258200 }, { "epoch": 0.86, "learning_rate": 7.227876230754201e-06, "loss": 2.7292, "step": 258300 }, { "epoch": 0.86, "learning_rate": 7.211317142961229e-06, "loss": 2.7126, "step": 258400 }, { "epoch": 0.86, "learning_rate": 7.194758055168257e-06, "loss": 2.7343, "step": 258500 }, { "epoch": 0.86, "learning_rate": 7.178198967375285e-06, "loss": 2.7322, "step": 258600 }, { "epoch": 0.86, "learning_rate": 7.1616398795823145e-06, "loss": 2.732, "step": 258700 }, { "epoch": 0.86, "learning_rate": 7.145080791789342e-06, "loss": 2.7219, "step": 258800 }, { "epoch": 0.86, "learning_rate": 7.12852170399637e-06, "loss": 2.7284, "step": 258900 }, { "epoch": 0.86, "learning_rate": 7.1119626162033995e-06, "loss": 2.7177, "step": 259000 }, { "epoch": 0.86, "learning_rate": 7.095403528410427e-06, "loss": 2.7243, "step": 259100 }, { "epoch": 0.86, "learning_rate": 7.078844440617456e-06, "loss": 2.7318, "step": 259200 }, { "epoch": 0.86, "learning_rate": 7.062285352824484e-06, "loss": 2.7144, "step": 259300 }, { "epoch": 0.86, "learning_rate": 7.045726265031511e-06, "loss": 2.7396, "step": 259400 }, { "epoch": 0.86, "learning_rate": 7.029167177238541e-06, "loss": 2.7345, "step": 259500 }, { "epoch": 0.86, "learning_rate": 7.0126080894455686e-06, "loss": 2.7433, "step": 259600 }, { "epoch": 0.86, "learning_rate": 6.996049001652598e-06, "loss": 2.7284, "step": 259700 }, { "epoch": 0.86, "learning_rate": 6.979489913859626e-06, "loss": 2.7347, "step": 259800 }, { "epoch": 0.86, "learning_rate": 6.9629308260666535e-06, "loss": 2.7212, "step": 259900 }, { "epoch": 0.86, "learning_rate": 6.946371738273683e-06, "loss": 2.7116, "step": 260000 }, { "epoch": 0.86, "learning_rate": 6.929812650480711e-06, "loss": 2.7319, "step": 260100 }, { "epoch": 0.86, "learning_rate": 6.913253562687739e-06, "loss": 2.7408, "step": 260200 }, { "epoch": 0.86, "learning_rate": 6.896694474894767e-06, "loss": 2.7192, "step": 260300 }, { "epoch": 0.86, "learning_rate": 6.880135387101795e-06, "loss": 2.7284, "step": 260400 }, { "epoch": 0.86, "learning_rate": 6.863576299308824e-06, "loss": 2.7441, "step": 260500 }, { "epoch": 0.86, "learning_rate": 6.847017211515852e-06, "loss": 2.7253, "step": 260600 }, { "epoch": 0.86, "learning_rate": 6.8304581237228815e-06, "loss": 2.7369, "step": 260700 }, { "epoch": 0.86, "learning_rate": 6.813899035929909e-06, "loss": 2.7321, "step": 260800 }, { "epoch": 0.86, "learning_rate": 6.797339948136937e-06, "loss": 2.728, "step": 260900 }, { "epoch": 0.86, "learning_rate": 6.780780860343966e-06, "loss": 2.7196, "step": 261000 }, { "epoch": 0.86, "learning_rate": 6.764221772550993e-06, "loss": 2.7382, "step": 261100 }, { "epoch": 0.87, "learning_rate": 6.747662684758023e-06, "loss": 2.7289, "step": 261200 }, { "epoch": 0.87, "learning_rate": 6.731103596965051e-06, "loss": 2.7446, "step": 261300 }, { "epoch": 0.87, "learning_rate": 6.714544509172078e-06, "loss": 2.7189, "step": 261400 }, { "epoch": 0.87, "learning_rate": 6.697985421379108e-06, "loss": 2.7191, "step": 261500 }, { "epoch": 0.87, "learning_rate": 6.681426333586136e-06, "loss": 2.7291, "step": 261600 }, { "epoch": 0.87, "learning_rate": 6.664867245793165e-06, "loss": 2.7239, "step": 261700 }, { "epoch": 0.87, "learning_rate": 6.648308158000193e-06, "loss": 2.7359, "step": 261800 }, { "epoch": 0.87, "learning_rate": 6.6317490702072206e-06, "loss": 2.7416, "step": 261900 }, { "epoch": 0.87, "learning_rate": 6.615189982414249e-06, "loss": 2.7367, "step": 262000 }, { "epoch": 0.87, "learning_rate": 6.598630894621277e-06, "loss": 2.7358, "step": 262100 }, { "epoch": 0.87, "learning_rate": 6.582071806828306e-06, "loss": 2.7254, "step": 262200 }, { "epoch": 0.87, "learning_rate": 6.565512719035334e-06, "loss": 2.7489, "step": 262300 }, { "epoch": 0.87, "learning_rate": 6.548953631242362e-06, "loss": 2.7352, "step": 262400 }, { "epoch": 0.87, "learning_rate": 6.532394543449391e-06, "loss": 2.7318, "step": 262500 }, { "epoch": 0.87, "learning_rate": 6.515835455656419e-06, "loss": 2.7296, "step": 262600 }, { "epoch": 0.87, "learning_rate": 6.499276367863448e-06, "loss": 2.737, "step": 262700 }, { "epoch": 0.87, "learning_rate": 6.4827172800704755e-06, "loss": 2.732, "step": 262800 }, { "epoch": 0.87, "learning_rate": 6.466158192277503e-06, "loss": 2.7358, "step": 262900 }, { "epoch": 0.87, "learning_rate": 6.449599104484533e-06, "loss": 2.7234, "step": 263000 }, { "epoch": 0.87, "learning_rate": 6.4330400166915604e-06, "loss": 2.7303, "step": 263100 }, { "epoch": 0.87, "learning_rate": 6.41648092889859e-06, "loss": 2.7186, "step": 263200 }, { "epoch": 0.87, "learning_rate": 6.399921841105618e-06, "loss": 2.7135, "step": 263300 }, { "epoch": 0.87, "learning_rate": 6.383362753312645e-06, "loss": 2.7306, "step": 263400 }, { "epoch": 0.87, "learning_rate": 6.366803665519675e-06, "loss": 2.7262, "step": 263500 }, { "epoch": 0.87, "learning_rate": 6.350244577726703e-06, "loss": 2.7382, "step": 263600 }, { "epoch": 0.87, "learning_rate": 6.333685489933731e-06, "loss": 2.7274, "step": 263700 }, { "epoch": 0.87, "learning_rate": 6.317126402140759e-06, "loss": 2.7158, "step": 263800 }, { "epoch": 0.87, "learning_rate": 6.300567314347787e-06, "loss": 2.7248, "step": 263900 }, { "epoch": 0.87, "learning_rate": 6.284008226554816e-06, "loss": 2.7407, "step": 264000 }, { "epoch": 0.87, "learning_rate": 6.267449138761844e-06, "loss": 2.7307, "step": 264100 }, { "epoch": 0.87, "learning_rate": 6.250890050968873e-06, "loss": 2.729, "step": 264200 }, { "epoch": 0.88, "learning_rate": 6.234330963175901e-06, "loss": 2.7309, "step": 264300 }, { "epoch": 0.88, "learning_rate": 6.21777187538293e-06, "loss": 2.73, "step": 264400 }, { "epoch": 0.88, "learning_rate": 6.2012127875899575e-06, "loss": 2.7356, "step": 264500 }, { "epoch": 0.88, "learning_rate": 6.184653699796985e-06, "loss": 2.7466, "step": 264600 }, { "epoch": 0.88, "learning_rate": 6.168094612004014e-06, "loss": 2.7231, "step": 264700 }, { "epoch": 0.88, "learning_rate": 6.1515355242110425e-06, "loss": 2.7323, "step": 264800 }, { "epoch": 0.88, "learning_rate": 6.134976436418071e-06, "loss": 2.7242, "step": 264900 }, { "epoch": 0.88, "learning_rate": 6.1184173486251e-06, "loss": 2.6975, "step": 265000 }, { "epoch": 0.88, "learning_rate": 6.1018582608321275e-06, "loss": 2.7308, "step": 265100 }, { "epoch": 0.88, "learning_rate": 6.085299173039156e-06, "loss": 2.7255, "step": 265200 }, { "epoch": 0.88, "learning_rate": 6.068740085246185e-06, "loss": 2.739, "step": 265300 }, { "epoch": 0.88, "learning_rate": 6.0521809974532124e-06, "loss": 2.7254, "step": 265400 }, { "epoch": 0.88, "learning_rate": 6.035621909660241e-06, "loss": 2.7237, "step": 265500 }, { "epoch": 0.88, "learning_rate": 6.019062821867269e-06, "loss": 2.735, "step": 265600 }, { "epoch": 0.88, "learning_rate": 6.002503734074297e-06, "loss": 2.727, "step": 265700 }, { "epoch": 0.88, "learning_rate": 5.985944646281326e-06, "loss": 2.7353, "step": 265800 }, { "epoch": 0.88, "learning_rate": 5.969385558488355e-06, "loss": 2.711, "step": 265900 }, { "epoch": 0.88, "learning_rate": 5.952826470695383e-06, "loss": 2.7353, "step": 266000 }, { "epoch": 0.88, "learning_rate": 5.936267382902411e-06, "loss": 2.7081, "step": 266100 }, { "epoch": 0.88, "learning_rate": 5.91970829510944e-06, "loss": 2.7261, "step": 266200 }, { "epoch": 0.88, "learning_rate": 5.903149207316467e-06, "loss": 2.7307, "step": 266300 }, { "epoch": 0.88, "learning_rate": 5.886590119523496e-06, "loss": 2.7225, "step": 266400 }, { "epoch": 0.88, "learning_rate": 5.8700310317305246e-06, "loss": 2.7391, "step": 266500 }, { "epoch": 0.88, "learning_rate": 5.853471943937552e-06, "loss": 2.7216, "step": 266600 }, { "epoch": 0.88, "learning_rate": 5.836912856144581e-06, "loss": 2.7253, "step": 266700 }, { "epoch": 0.88, "learning_rate": 5.8203537683516095e-06, "loss": 2.7175, "step": 266800 }, { "epoch": 0.88, "learning_rate": 5.803794680558638e-06, "loss": 2.7278, "step": 266900 }, { "epoch": 0.88, "learning_rate": 5.787235592765666e-06, "loss": 2.7356, "step": 267000 }, { "epoch": 0.88, "learning_rate": 5.7706765049726945e-06, "loss": 2.7228, "step": 267100 }, { "epoch": 0.88, "learning_rate": 5.754117417179722e-06, "loss": 2.7192, "step": 267200 }, { "epoch": 0.89, "learning_rate": 5.737558329386751e-06, "loss": 2.7422, "step": 267300 }, { "epoch": 0.89, "learning_rate": 5.7209992415937795e-06, "loss": 2.7233, "step": 267400 }, { "epoch": 0.89, "learning_rate": 5.704440153800808e-06, "loss": 2.7247, "step": 267500 }, { "epoch": 0.89, "learning_rate": 5.687881066007836e-06, "loss": 2.7277, "step": 267600 }, { "epoch": 0.89, "learning_rate": 5.6713219782148644e-06, "loss": 2.7189, "step": 267700 }, { "epoch": 0.89, "learning_rate": 5.654762890421893e-06, "loss": 2.7251, "step": 267800 }, { "epoch": 0.89, "learning_rate": 5.638203802628921e-06, "loss": 2.718, "step": 267900 }, { "epoch": 0.89, "learning_rate": 5.621644714835949e-06, "loss": 2.7373, "step": 268000 }, { "epoch": 0.89, "learning_rate": 5.605085627042977e-06, "loss": 2.7404, "step": 268100 }, { "epoch": 0.89, "learning_rate": 5.588526539250006e-06, "loss": 2.7182, "step": 268200 }, { "epoch": 0.89, "learning_rate": 5.571967451457034e-06, "loss": 2.7222, "step": 268300 }, { "epoch": 0.89, "learning_rate": 5.555408363664063e-06, "loss": 2.7348, "step": 268400 }, { "epoch": 0.89, "learning_rate": 5.538849275871092e-06, "loss": 2.7245, "step": 268500 }, { "epoch": 0.89, "learning_rate": 5.522290188078119e-06, "loss": 2.7349, "step": 268600 }, { "epoch": 0.89, "learning_rate": 5.505731100285148e-06, "loss": 2.7358, "step": 268700 }, { "epoch": 0.89, "learning_rate": 5.489172012492176e-06, "loss": 2.7235, "step": 268800 }, { "epoch": 0.89, "learning_rate": 5.472612924699204e-06, "loss": 2.7184, "step": 268900 }, { "epoch": 0.89, "learning_rate": 5.456053836906233e-06, "loss": 2.7291, "step": 269000 }, { "epoch": 0.89, "learning_rate": 5.439494749113261e-06, "loss": 2.7339, "step": 269100 }, { "epoch": 0.89, "learning_rate": 5.422935661320289e-06, "loss": 2.7224, "step": 269200 }, { "epoch": 0.89, "learning_rate": 5.406376573527318e-06, "loss": 2.7226, "step": 269300 }, { "epoch": 0.89, "learning_rate": 5.3898174857343465e-06, "loss": 2.728, "step": 269400 }, { "epoch": 0.89, "learning_rate": 5.373258397941375e-06, "loss": 2.7079, "step": 269500 }, { "epoch": 0.89, "learning_rate": 5.356699310148403e-06, "loss": 2.7412, "step": 269600 }, { "epoch": 0.89, "learning_rate": 5.340140222355431e-06, "loss": 2.7284, "step": 269700 }, { "epoch": 0.89, "learning_rate": 5.323581134562459e-06, "loss": 2.7307, "step": 269800 }, { "epoch": 0.89, "learning_rate": 5.307022046769488e-06, "loss": 2.7229, "step": 269900 }, { "epoch": 0.89, "learning_rate": 5.2904629589765164e-06, "loss": 2.7277, "step": 270000 }, { "epoch": 0.89, "learning_rate": 5.273903871183544e-06, "loss": 2.7349, "step": 270100 }, { "epoch": 0.89, "learning_rate": 5.257344783390573e-06, "loss": 2.7279, "step": 270200 }, { "epoch": 0.9, "learning_rate": 5.240785695597601e-06, "loss": 2.7321, "step": 270300 }, { "epoch": 0.9, "learning_rate": 5.22422660780463e-06, "loss": 2.7224, "step": 270400 }, { "epoch": 0.9, "learning_rate": 5.207667520011658e-06, "loss": 2.7141, "step": 270500 }, { "epoch": 0.9, "learning_rate": 5.1911084322186855e-06, "loss": 2.732, "step": 270600 }, { "epoch": 0.9, "learning_rate": 5.174549344425714e-06, "loss": 2.7396, "step": 270700 }, { "epoch": 0.9, "learning_rate": 5.157990256632743e-06, "loss": 2.731, "step": 270800 }, { "epoch": 0.9, "learning_rate": 5.141431168839771e-06, "loss": 2.7335, "step": 270900 }, { "epoch": 0.9, "learning_rate": 5.1248720810468e-06, "loss": 2.7279, "step": 271000 }, { "epoch": 0.9, "learning_rate": 5.108312993253828e-06, "loss": 2.7192, "step": 271100 }, { "epoch": 0.9, "learning_rate": 5.091753905460856e-06, "loss": 2.7251, "step": 271200 }, { "epoch": 0.9, "learning_rate": 5.075194817667885e-06, "loss": 2.7374, "step": 271300 }, { "epoch": 0.9, "learning_rate": 5.058635729874913e-06, "loss": 2.7258, "step": 271400 }, { "epoch": 0.9, "learning_rate": 5.042076642081941e-06, "loss": 2.7273, "step": 271500 }, { "epoch": 0.9, "learning_rate": 5.025517554288969e-06, "loss": 2.7369, "step": 271600 }, { "epoch": 0.9, "learning_rate": 5.008958466495998e-06, "loss": 2.7309, "step": 271700 }, { "epoch": 0.9, "learning_rate": 4.992399378703026e-06, "loss": 2.7458, "step": 271800 }, { "epoch": 0.9, "learning_rate": 4.975840290910055e-06, "loss": 2.735, "step": 271900 }, { "epoch": 0.9, "learning_rate": 4.9592812031170835e-06, "loss": 2.7119, "step": 272000 }, { "epoch": 0.9, "learning_rate": 4.942722115324111e-06, "loss": 2.7245, "step": 272100 }, { "epoch": 0.9, "learning_rate": 4.92616302753114e-06, "loss": 2.7267, "step": 272200 }, { "epoch": 0.9, "learning_rate": 4.909603939738168e-06, "loss": 2.7273, "step": 272300 }, { "epoch": 0.9, "learning_rate": 4.893044851945196e-06, "loss": 2.73, "step": 272400 }, { "epoch": 0.9, "learning_rate": 4.876485764152225e-06, "loss": 2.7165, "step": 272500 }, { "epoch": 0.9, "learning_rate": 4.8599266763592525e-06, "loss": 2.7229, "step": 272600 }, { "epoch": 0.9, "learning_rate": 4.843367588566281e-06, "loss": 2.7099, "step": 272700 }, { "epoch": 0.9, "learning_rate": 4.82680850077331e-06, "loss": 2.7166, "step": 272800 }, { "epoch": 0.9, "learning_rate": 4.810249412980338e-06, "loss": 2.7251, "step": 272900 }, { "epoch": 0.9, "learning_rate": 4.793690325187367e-06, "loss": 2.7302, "step": 273000 }, { "epoch": 0.9, "learning_rate": 4.777131237394395e-06, "loss": 2.7249, "step": 273100 }, { "epoch": 0.9, "learning_rate": 4.7605721496014225e-06, "loss": 2.7428, "step": 273200 }, { "epoch": 0.91, "learning_rate": 4.744013061808451e-06, "loss": 2.7258, "step": 273300 }, { "epoch": 0.91, "learning_rate": 4.72745397401548e-06, "loss": 2.7171, "step": 273400 }, { "epoch": 0.91, "learning_rate": 4.710894886222508e-06, "loss": 2.7237, "step": 273500 }, { "epoch": 0.91, "learning_rate": 4.694335798429536e-06, "loss": 2.7271, "step": 273600 }, { "epoch": 0.91, "learning_rate": 4.677776710636565e-06, "loss": 2.7188, "step": 273700 }, { "epoch": 0.91, "learning_rate": 4.661217622843593e-06, "loss": 2.7193, "step": 273800 }, { "epoch": 0.91, "learning_rate": 4.644658535050622e-06, "loss": 2.7333, "step": 273900 }, { "epoch": 0.91, "learning_rate": 4.62809944725765e-06, "loss": 2.7178, "step": 274000 }, { "epoch": 0.91, "learning_rate": 4.611540359464677e-06, "loss": 2.7317, "step": 274100 }, { "epoch": 0.91, "learning_rate": 4.594981271671706e-06, "loss": 2.713, "step": 274200 }, { "epoch": 0.91, "learning_rate": 4.578422183878735e-06, "loss": 2.7263, "step": 274300 }, { "epoch": 0.91, "learning_rate": 4.561863096085763e-06, "loss": 2.7143, "step": 274400 }, { "epoch": 0.91, "learning_rate": 4.545304008292792e-06, "loss": 2.7164, "step": 274500 }, { "epoch": 0.91, "learning_rate": 4.52874492049982e-06, "loss": 2.7314, "step": 274600 }, { "epoch": 0.91, "learning_rate": 4.512185832706848e-06, "loss": 2.7256, "step": 274700 }, { "epoch": 0.91, "learning_rate": 4.495626744913877e-06, "loss": 2.7266, "step": 274800 }, { "epoch": 0.91, "learning_rate": 4.4790676571209045e-06, "loss": 2.7285, "step": 274900 }, { "epoch": 0.91, "learning_rate": 4.462508569327933e-06, "loss": 2.7193, "step": 275000 }, { "epoch": 0.91, "learning_rate": 4.445949481534961e-06, "loss": 2.7338, "step": 275100 }, { "epoch": 0.91, "learning_rate": 4.4293903937419895e-06, "loss": 2.7124, "step": 275200 }, { "epoch": 0.91, "learning_rate": 4.412831305949018e-06, "loss": 2.7237, "step": 275300 }, { "epoch": 0.91, "learning_rate": 4.396272218156047e-06, "loss": 2.7338, "step": 275400 }, { "epoch": 0.91, "learning_rate": 4.379713130363075e-06, "loss": 2.7274, "step": 275500 }, { "epoch": 0.91, "learning_rate": 4.363154042570103e-06, "loss": 2.7306, "step": 275600 }, { "epoch": 0.91, "learning_rate": 4.346594954777132e-06, "loss": 2.7527, "step": 275700 }, { "epoch": 0.91, "learning_rate": 4.3300358669841595e-06, "loss": 2.7208, "step": 275800 }, { "epoch": 0.91, "learning_rate": 4.313476779191188e-06, "loss": 2.7245, "step": 275900 }, { "epoch": 0.91, "learning_rate": 4.296917691398217e-06, "loss": 2.7296, "step": 276000 }, { "epoch": 0.91, "learning_rate": 4.2803586036052444e-06, "loss": 2.7225, "step": 276100 }, { "epoch": 0.91, "learning_rate": 4.263799515812273e-06, "loss": 2.735, "step": 276200 }, { "epoch": 0.92, "learning_rate": 4.247240428019302e-06, "loss": 2.7253, "step": 276300 }, { "epoch": 0.92, "learning_rate": 4.23068134022633e-06, "loss": 2.7186, "step": 276400 }, { "epoch": 0.92, "learning_rate": 4.214122252433359e-06, "loss": 2.719, "step": 276500 }, { "epoch": 0.92, "learning_rate": 4.197563164640387e-06, "loss": 2.7098, "step": 276600 }, { "epoch": 0.92, "learning_rate": 4.181004076847414e-06, "loss": 2.7251, "step": 276700 }, { "epoch": 0.92, "learning_rate": 4.164444989054443e-06, "loss": 2.7436, "step": 276800 }, { "epoch": 0.92, "learning_rate": 4.1478859012614716e-06, "loss": 2.7442, "step": 276900 }, { "epoch": 0.92, "learning_rate": 4.1313268134685e-06, "loss": 2.7355, "step": 277000 }, { "epoch": 0.92, "learning_rate": 4.114767725675528e-06, "loss": 2.728, "step": 277100 }, { "epoch": 0.92, "learning_rate": 4.0982086378825565e-06, "loss": 2.7312, "step": 277200 }, { "epoch": 0.92, "learning_rate": 4.081649550089585e-06, "loss": 2.7312, "step": 277300 }, { "epoch": 0.92, "learning_rate": 4.065090462296614e-06, "loss": 2.7253, "step": 277400 }, { "epoch": 0.92, "learning_rate": 4.0485313745036415e-06, "loss": 2.7228, "step": 277500 }, { "epoch": 0.92, "learning_rate": 4.031972286710669e-06, "loss": 2.7449, "step": 277600 }, { "epoch": 0.92, "learning_rate": 4.015413198917698e-06, "loss": 2.7197, "step": 277700 }, { "epoch": 0.92, "learning_rate": 3.9988541111247265e-06, "loss": 2.7268, "step": 277800 }, { "epoch": 0.92, "learning_rate": 3.982295023331755e-06, "loss": 2.7166, "step": 277900 }, { "epoch": 0.92, "learning_rate": 3.965735935538784e-06, "loss": 2.7307, "step": 278000 }, { "epoch": 0.92, "learning_rate": 3.9491768477458115e-06, "loss": 2.7178, "step": 278100 }, { "epoch": 0.92, "learning_rate": 3.93261775995284e-06, "loss": 2.7128, "step": 278200 }, { "epoch": 0.92, "learning_rate": 3.916058672159869e-06, "loss": 2.7134, "step": 278300 }, { "epoch": 0.92, "learning_rate": 3.899499584366896e-06, "loss": 2.7318, "step": 278400 }, { "epoch": 0.92, "learning_rate": 3.882940496573925e-06, "loss": 2.7211, "step": 278500 }, { "epoch": 0.92, "learning_rate": 3.866381408780953e-06, "loss": 2.7183, "step": 278600 }, { "epoch": 0.92, "learning_rate": 3.849822320987981e-06, "loss": 2.7227, "step": 278700 }, { "epoch": 0.92, "learning_rate": 3.83326323319501e-06, "loss": 2.7284, "step": 278800 }, { "epoch": 0.92, "learning_rate": 3.816704145402039e-06, "loss": 2.7242, "step": 278900 }, { "epoch": 0.92, "learning_rate": 3.8001450576090668e-06, "loss": 2.7107, "step": 279000 }, { "epoch": 0.92, "learning_rate": 3.7835859698160945e-06, "loss": 2.7204, "step": 279100 }, { "epoch": 0.92, "learning_rate": 3.767026882023123e-06, "loss": 2.7288, "step": 279200 }, { "epoch": 0.92, "learning_rate": 3.7504677942301518e-06, "loss": 2.7214, "step": 279300 }, { "epoch": 0.93, "learning_rate": 3.73390870643718e-06, "loss": 2.7142, "step": 279400 }, { "epoch": 0.93, "learning_rate": 3.7173496186442085e-06, "loss": 2.7304, "step": 279500 }, { "epoch": 0.93, "learning_rate": 3.7007905308512363e-06, "loss": 2.7156, "step": 279600 }, { "epoch": 0.93, "learning_rate": 3.684231443058265e-06, "loss": 2.7212, "step": 279700 }, { "epoch": 0.93, "learning_rate": 3.6676723552652935e-06, "loss": 2.7351, "step": 279800 }, { "epoch": 0.93, "learning_rate": 3.6511132674723217e-06, "loss": 2.7355, "step": 279900 }, { "epoch": 0.93, "learning_rate": 3.6345541796793503e-06, "loss": 2.7245, "step": 280000 }, { "epoch": 0.93, "learning_rate": 3.617995091886378e-06, "loss": 2.7301, "step": 280100 }, { "epoch": 0.93, "learning_rate": 3.6014360040934067e-06, "loss": 2.7221, "step": 280200 }, { "epoch": 0.93, "learning_rate": 3.584876916300435e-06, "loss": 2.7412, "step": 280300 }, { "epoch": 0.93, "learning_rate": 3.5683178285074634e-06, "loss": 2.7349, "step": 280400 }, { "epoch": 0.93, "learning_rate": 3.551758740714492e-06, "loss": 2.7274, "step": 280500 }, { "epoch": 0.93, "learning_rate": 3.53519965292152e-06, "loss": 2.7227, "step": 280600 }, { "epoch": 0.93, "learning_rate": 3.5186405651285484e-06, "loss": 2.7299, "step": 280700 }, { "epoch": 0.93, "learning_rate": 3.5020814773355766e-06, "loss": 2.7289, "step": 280800 }, { "epoch": 0.93, "learning_rate": 3.485522389542605e-06, "loss": 2.7145, "step": 280900 }, { "epoch": 0.93, "learning_rate": 3.468963301749634e-06, "loss": 2.7389, "step": 281000 }, { "epoch": 0.93, "learning_rate": 3.4524042139566616e-06, "loss": 2.7196, "step": 281100 }, { "epoch": 0.93, "learning_rate": 3.4358451261636898e-06, "loss": 2.7207, "step": 281200 }, { "epoch": 0.93, "learning_rate": 3.4192860383707184e-06, "loss": 2.7045, "step": 281300 }, { "epoch": 0.93, "learning_rate": 3.402726950577747e-06, "loss": 2.7072, "step": 281400 }, { "epoch": 0.93, "learning_rate": 3.3861678627847756e-06, "loss": 2.7164, "step": 281500 }, { "epoch": 0.93, "learning_rate": 3.3696087749918033e-06, "loss": 2.7163, "step": 281600 }, { "epoch": 0.93, "learning_rate": 3.3530496871988315e-06, "loss": 2.7205, "step": 281700 }, { "epoch": 0.93, "learning_rate": 3.33649059940586e-06, "loss": 2.7341, "step": 281800 }, { "epoch": 0.93, "learning_rate": 3.3199315116128887e-06, "loss": 2.714, "step": 281900 }, { "epoch": 0.93, "learning_rate": 3.303372423819917e-06, "loss": 2.7277, "step": 282000 }, { "epoch": 0.93, "learning_rate": 3.2868133360269447e-06, "loss": 2.7128, "step": 282100 }, { "epoch": 0.93, "learning_rate": 3.2702542482339733e-06, "loss": 2.7217, "step": 282200 }, { "epoch": 0.93, "learning_rate": 3.253695160441002e-06, "loss": 2.7212, "step": 282300 }, { "epoch": 0.94, "learning_rate": 3.2371360726480305e-06, "loss": 2.7114, "step": 282400 }, { "epoch": 0.94, "learning_rate": 3.2205769848550587e-06, "loss": 2.7221, "step": 282500 }, { "epoch": 0.94, "learning_rate": 3.2040178970620864e-06, "loss": 2.7163, "step": 282600 }, { "epoch": 0.94, "learning_rate": 3.187458809269115e-06, "loss": 2.731, "step": 282700 }, { "epoch": 0.94, "learning_rate": 3.1708997214761436e-06, "loss": 2.7097, "step": 282800 }, { "epoch": 0.94, "learning_rate": 3.154340633683172e-06, "loss": 2.7065, "step": 282900 }, { "epoch": 0.94, "learning_rate": 3.1377815458902004e-06, "loss": 2.7314, "step": 283000 }, { "epoch": 0.94, "learning_rate": 3.1212224580972286e-06, "loss": 2.718, "step": 283100 }, { "epoch": 0.94, "learning_rate": 3.1046633703042568e-06, "loss": 2.716, "step": 283200 }, { "epoch": 0.94, "learning_rate": 3.0881042825112854e-06, "loss": 2.7145, "step": 283300 }, { "epoch": 0.94, "learning_rate": 3.0715451947183136e-06, "loss": 2.7253, "step": 283400 }, { "epoch": 0.94, "learning_rate": 3.0549861069253417e-06, "loss": 2.7139, "step": 283500 }, { "epoch": 0.94, "learning_rate": 3.0384270191323704e-06, "loss": 2.7446, "step": 283600 }, { "epoch": 0.94, "learning_rate": 3.0218679313393985e-06, "loss": 2.7274, "step": 283700 }, { "epoch": 0.94, "learning_rate": 3.0053088435464267e-06, "loss": 2.7205, "step": 283800 }, { "epoch": 0.94, "learning_rate": 2.9887497557534553e-06, "loss": 2.7416, "step": 283900 }, { "epoch": 0.94, "learning_rate": 2.9721906679604835e-06, "loss": 2.7285, "step": 284000 }, { "epoch": 0.94, "learning_rate": 2.955631580167512e-06, "loss": 2.7306, "step": 284100 }, { "epoch": 0.94, "learning_rate": 2.9390724923745403e-06, "loss": 2.7156, "step": 284200 }, { "epoch": 0.94, "learning_rate": 2.9225134045815685e-06, "loss": 2.7181, "step": 284300 }, { "epoch": 0.94, "learning_rate": 2.905954316788597e-06, "loss": 2.7271, "step": 284400 }, { "epoch": 0.94, "learning_rate": 2.8893952289956253e-06, "loss": 2.7315, "step": 284500 }, { "epoch": 0.94, "learning_rate": 2.872836141202654e-06, "loss": 2.7206, "step": 284600 }, { "epoch": 0.94, "learning_rate": 2.8562770534096816e-06, "loss": 2.7288, "step": 284700 }, { "epoch": 0.94, "learning_rate": 2.8397179656167102e-06, "loss": 2.7197, "step": 284800 }, { "epoch": 0.94, "learning_rate": 2.823158877823739e-06, "loss": 2.7203, "step": 284900 }, { "epoch": 0.94, "learning_rate": 2.806599790030767e-06, "loss": 2.7156, "step": 285000 }, { "epoch": 0.94, "learning_rate": 2.790040702237795e-06, "loss": 2.7297, "step": 285100 }, { "epoch": 0.94, "learning_rate": 2.7734816144448234e-06, "loss": 2.7292, "step": 285200 }, { "epoch": 0.94, "learning_rate": 2.756922526651852e-06, "loss": 2.7221, "step": 285300 }, { "epoch": 0.95, "learning_rate": 2.7403634388588806e-06, "loss": 2.7198, "step": 285400 }, { "epoch": 0.95, "learning_rate": 2.7238043510659088e-06, "loss": 2.7139, "step": 285500 }, { "epoch": 0.95, "learning_rate": 2.707245263272937e-06, "loss": 2.7316, "step": 285600 }, { "epoch": 0.95, "learning_rate": 2.690686175479965e-06, "loss": 2.7253, "step": 285700 }, { "epoch": 0.95, "learning_rate": 2.6741270876869937e-06, "loss": 2.7337, "step": 285800 }, { "epoch": 0.95, "learning_rate": 2.657567999894022e-06, "loss": 2.728, "step": 285900 }, { "epoch": 0.95, "learning_rate": 2.64100891210105e-06, "loss": 2.7149, "step": 286000 }, { "epoch": 0.95, "learning_rate": 2.6244498243080787e-06, "loss": 2.73, "step": 286100 }, { "epoch": 0.95, "learning_rate": 2.607890736515107e-06, "loss": 2.7245, "step": 286200 }, { "epoch": 0.95, "learning_rate": 2.5913316487221355e-06, "loss": 2.7081, "step": 286300 }, { "epoch": 0.95, "learning_rate": 2.5747725609291637e-06, "loss": 2.7265, "step": 286400 }, { "epoch": 0.95, "learning_rate": 2.558213473136192e-06, "loss": 2.7257, "step": 286500 }, { "epoch": 0.95, "learning_rate": 2.5416543853432205e-06, "loss": 2.7335, "step": 286600 }, { "epoch": 0.95, "learning_rate": 2.5250952975502487e-06, "loss": 2.7297, "step": 286700 }, { "epoch": 0.95, "learning_rate": 2.508536209757277e-06, "loss": 2.719, "step": 286800 }, { "epoch": 0.95, "learning_rate": 2.4919771219643054e-06, "loss": 2.7298, "step": 286900 }, { "epoch": 0.95, "learning_rate": 2.4754180341713336e-06, "loss": 2.7118, "step": 287000 }, { "epoch": 0.95, "learning_rate": 2.4588589463783622e-06, "loss": 2.7186, "step": 287100 }, { "epoch": 0.95, "learning_rate": 2.4422998585853904e-06, "loss": 2.7432, "step": 287200 }, { "epoch": 0.95, "learning_rate": 2.4257407707924186e-06, "loss": 2.7148, "step": 287300 }, { "epoch": 0.95, "learning_rate": 2.409181682999447e-06, "loss": 2.7257, "step": 287400 }, { "epoch": 0.95, "learning_rate": 2.3926225952064754e-06, "loss": 2.7234, "step": 287500 }, { "epoch": 0.95, "learning_rate": 2.376063507413504e-06, "loss": 2.7229, "step": 287600 }, { "epoch": 0.95, "learning_rate": 2.3595044196205317e-06, "loss": 2.7151, "step": 287700 }, { "epoch": 0.95, "learning_rate": 2.3429453318275604e-06, "loss": 2.7244, "step": 287800 }, { "epoch": 0.95, "learning_rate": 2.3263862440345885e-06, "loss": 2.7171, "step": 287900 }, { "epoch": 0.95, "learning_rate": 2.309827156241617e-06, "loss": 2.7241, "step": 288000 }, { "epoch": 0.95, "learning_rate": 2.2932680684486453e-06, "loss": 2.7232, "step": 288100 }, { "epoch": 0.95, "learning_rate": 2.2767089806556735e-06, "loss": 2.7207, "step": 288200 }, { "epoch": 0.95, "learning_rate": 2.260149892862702e-06, "loss": 2.732, "step": 288300 }, { "epoch": 0.96, "learning_rate": 2.2435908050697303e-06, "loss": 2.712, "step": 288400 }, { "epoch": 0.96, "learning_rate": 2.227031717276759e-06, "loss": 2.7331, "step": 288500 }, { "epoch": 0.96, "learning_rate": 2.210472629483787e-06, "loss": 2.7276, "step": 288600 }, { "epoch": 0.96, "learning_rate": 2.1939135416908153e-06, "loss": 2.7387, "step": 288700 }, { "epoch": 0.96, "learning_rate": 2.177354453897844e-06, "loss": 2.7252, "step": 288800 }, { "epoch": 0.96, "learning_rate": 2.160795366104872e-06, "loss": 2.7242, "step": 288900 }, { "epoch": 0.96, "learning_rate": 2.1442362783119002e-06, "loss": 2.7224, "step": 289000 }, { "epoch": 0.96, "learning_rate": 2.127677190518929e-06, "loss": 2.7175, "step": 289100 }, { "epoch": 0.96, "learning_rate": 2.111118102725957e-06, "loss": 2.72, "step": 289200 }, { "epoch": 0.96, "learning_rate": 2.0945590149329856e-06, "loss": 2.7025, "step": 289300 }, { "epoch": 0.96, "learning_rate": 2.077999927140014e-06, "loss": 2.713, "step": 289400 }, { "epoch": 0.96, "learning_rate": 2.061440839347042e-06, "loss": 2.7141, "step": 289500 }, { "epoch": 0.96, "learning_rate": 2.0448817515540706e-06, "loss": 2.7224, "step": 289600 }, { "epoch": 0.96, "learning_rate": 2.0283226637610988e-06, "loss": 2.7317, "step": 289700 }, { "epoch": 0.96, "learning_rate": 2.0117635759681274e-06, "loss": 2.7207, "step": 289800 }, { "epoch": 0.96, "learning_rate": 1.995204488175155e-06, "loss": 2.7055, "step": 289900 }, { "epoch": 0.96, "learning_rate": 1.9786454003821837e-06, "loss": 2.7205, "step": 290000 }, { "epoch": 0.96, "learning_rate": 1.9620863125892123e-06, "loss": 2.7238, "step": 290100 }, { "epoch": 0.96, "learning_rate": 1.9455272247962405e-06, "loss": 2.7276, "step": 290200 }, { "epoch": 0.96, "learning_rate": 1.9289681370032687e-06, "loss": 2.7268, "step": 290300 }, { "epoch": 0.96, "learning_rate": 1.912409049210297e-06, "loss": 2.7161, "step": 290400 }, { "epoch": 0.96, "learning_rate": 1.8958499614173255e-06, "loss": 2.7319, "step": 290500 }, { "epoch": 0.96, "learning_rate": 1.8792908736243539e-06, "loss": 2.7139, "step": 290600 }, { "epoch": 0.96, "learning_rate": 1.862731785831382e-06, "loss": 2.7192, "step": 290700 }, { "epoch": 0.96, "learning_rate": 1.8461726980384107e-06, "loss": 2.7209, "step": 290800 }, { "epoch": 0.96, "learning_rate": 1.8296136102454387e-06, "loss": 2.7222, "step": 290900 }, { "epoch": 0.96, "learning_rate": 1.8130545224524673e-06, "loss": 2.7217, "step": 291000 }, { "epoch": 0.96, "learning_rate": 1.7964954346594957e-06, "loss": 2.7228, "step": 291100 }, { "epoch": 0.96, "learning_rate": 1.7799363468665238e-06, "loss": 2.7226, "step": 291200 }, { "epoch": 0.96, "learning_rate": 1.7633772590735522e-06, "loss": 2.7128, "step": 291300 }, { "epoch": 0.97, "learning_rate": 1.7468181712805804e-06, "loss": 2.7225, "step": 291400 }, { "epoch": 0.97, "learning_rate": 1.7302590834876088e-06, "loss": 2.7256, "step": 291500 }, { "epoch": 0.97, "learning_rate": 1.7136999956946374e-06, "loss": 2.7139, "step": 291600 }, { "epoch": 0.97, "learning_rate": 1.6971409079016656e-06, "loss": 2.7198, "step": 291700 }, { "epoch": 0.97, "learning_rate": 1.680581820108694e-06, "loss": 2.7132, "step": 291800 }, { "epoch": 0.97, "learning_rate": 1.6640227323157222e-06, "loss": 2.7164, "step": 291900 }, { "epoch": 0.97, "learning_rate": 1.6474636445227506e-06, "loss": 2.7285, "step": 292000 }, { "epoch": 0.97, "learning_rate": 1.630904556729779e-06, "loss": 2.7306, "step": 292100 }, { "epoch": 0.97, "learning_rate": 1.6143454689368071e-06, "loss": 2.7126, "step": 292200 }, { "epoch": 0.97, "learning_rate": 1.5977863811438357e-06, "loss": 2.7196, "step": 292300 }, { "epoch": 0.97, "learning_rate": 1.5812272933508637e-06, "loss": 2.7182, "step": 292400 }, { "epoch": 0.97, "learning_rate": 1.5646682055578923e-06, "loss": 2.7335, "step": 292500 }, { "epoch": 0.97, "learning_rate": 1.5481091177649205e-06, "loss": 2.7233, "step": 292600 }, { "epoch": 0.97, "learning_rate": 1.531550029971949e-06, "loss": 2.7149, "step": 292700 }, { "epoch": 0.97, "learning_rate": 1.5149909421789773e-06, "loss": 2.7299, "step": 292800 }, { "epoch": 0.97, "learning_rate": 1.4984318543860057e-06, "loss": 2.7152, "step": 292900 }, { "epoch": 0.97, "learning_rate": 1.4818727665930339e-06, "loss": 2.7089, "step": 293000 }, { "epoch": 0.97, "learning_rate": 1.4653136788000623e-06, "loss": 2.7286, "step": 293100 }, { "epoch": 0.97, "learning_rate": 1.4487545910070906e-06, "loss": 2.7195, "step": 293200 }, { "epoch": 0.97, "learning_rate": 1.432195503214119e-06, "loss": 2.7164, "step": 293300 }, { "epoch": 0.97, "learning_rate": 1.4156364154211474e-06, "loss": 2.7264, "step": 293400 }, { "epoch": 0.97, "learning_rate": 1.3990773276281756e-06, "loss": 2.7372, "step": 293500 }, { "epoch": 0.97, "learning_rate": 1.382518239835204e-06, "loss": 2.731, "step": 293600 }, { "epoch": 0.97, "learning_rate": 1.3659591520422324e-06, "loss": 2.7163, "step": 293700 }, { "epoch": 0.97, "learning_rate": 1.3494000642492608e-06, "loss": 2.7156, "step": 293800 }, { "epoch": 0.97, "learning_rate": 1.332840976456289e-06, "loss": 2.726, "step": 293900 }, { "epoch": 0.97, "learning_rate": 1.3162818886633174e-06, "loss": 2.7266, "step": 294000 }, { "epoch": 0.97, "learning_rate": 1.2997228008703456e-06, "loss": 2.6988, "step": 294100 }, { "epoch": 0.97, "learning_rate": 1.2831637130773742e-06, "loss": 2.7276, "step": 294200 }, { "epoch": 0.97, "learning_rate": 1.2666046252844023e-06, "loss": 2.7213, "step": 294300 }, { "epoch": 0.97, "learning_rate": 1.2500455374914307e-06, "loss": 2.7351, "step": 294400 }, { "epoch": 0.98, "learning_rate": 1.2334864496984591e-06, "loss": 2.7231, "step": 294500 }, { "epoch": 0.98, "learning_rate": 1.2169273619054873e-06, "loss": 2.7209, "step": 294600 }, { "epoch": 0.98, "learning_rate": 1.200368274112516e-06, "loss": 2.7231, "step": 294700 }, { "epoch": 0.98, "learning_rate": 1.183809186319544e-06, "loss": 2.7204, "step": 294800 }, { "epoch": 0.98, "learning_rate": 1.1672500985265725e-06, "loss": 2.7318, "step": 294900 }, { "epoch": 0.98, "learning_rate": 1.1506910107336007e-06, "loss": 2.7227, "step": 295000 }, { "epoch": 0.98, "learning_rate": 1.134131922940629e-06, "loss": 2.7249, "step": 295100 }, { "epoch": 0.98, "learning_rate": 1.1175728351476575e-06, "loss": 2.7216, "step": 295200 }, { "epoch": 0.98, "learning_rate": 1.1010137473546859e-06, "loss": 2.7317, "step": 295300 }, { "epoch": 0.98, "learning_rate": 1.084454659561714e-06, "loss": 2.7187, "step": 295400 }, { "epoch": 0.98, "learning_rate": 1.0678955717687424e-06, "loss": 2.7258, "step": 295500 }, { "epoch": 0.98, "learning_rate": 1.0513364839757708e-06, "loss": 2.7267, "step": 295600 }, { "epoch": 0.98, "learning_rate": 1.0347773961827992e-06, "loss": 2.7247, "step": 295700 }, { "epoch": 0.98, "learning_rate": 1.0182183083898276e-06, "loss": 2.7303, "step": 295800 }, { "epoch": 0.98, "learning_rate": 1.0016592205968558e-06, "loss": 2.7117, "step": 295900 }, { "epoch": 0.98, "learning_rate": 9.851001328038842e-07, "loss": 2.7089, "step": 296000 }, { "epoch": 0.98, "learning_rate": 9.685410450109124e-07, "loss": 2.7345, "step": 296100 }, { "epoch": 0.98, "learning_rate": 9.519819572179409e-07, "loss": 2.7273, "step": 296200 }, { "epoch": 0.98, "learning_rate": 9.354228694249692e-07, "loss": 2.7351, "step": 296300 }, { "epoch": 0.98, "learning_rate": 9.188637816319976e-07, "loss": 2.7199, "step": 296400 }, { "epoch": 0.98, "learning_rate": 9.023046938390258e-07, "loss": 2.7407, "step": 296500 }, { "epoch": 0.98, "learning_rate": 8.857456060460541e-07, "loss": 2.7201, "step": 296600 }, { "epoch": 0.98, "learning_rate": 8.691865182530826e-07, "loss": 2.7112, "step": 296700 }, { "epoch": 0.98, "learning_rate": 8.526274304601109e-07, "loss": 2.7131, "step": 296800 }, { "epoch": 0.98, "learning_rate": 8.360683426671392e-07, "loss": 2.7137, "step": 296900 }, { "epoch": 0.98, "learning_rate": 8.195092548741675e-07, "loss": 2.719, "step": 297000 }, { "epoch": 0.98, "learning_rate": 8.029501670811958e-07, "loss": 2.7234, "step": 297100 }, { "epoch": 0.98, "learning_rate": 7.863910792882243e-07, "loss": 2.7248, "step": 297200 }, { "epoch": 0.98, "learning_rate": 7.698319914952525e-07, "loss": 2.7146, "step": 297300 }, { "epoch": 0.98, "learning_rate": 7.532729037022809e-07, "loss": 2.7145, "step": 297400 }, { "epoch": 0.99, "learning_rate": 7.367138159093093e-07, "loss": 2.7288, "step": 297500 }, { "epoch": 0.99, "learning_rate": 7.201547281163375e-07, "loss": 2.714, "step": 297600 }, { "epoch": 0.99, "learning_rate": 7.035956403233659e-07, "loss": 2.7175, "step": 297700 }, { "epoch": 0.99, "learning_rate": 6.870365525303942e-07, "loss": 2.7141, "step": 297800 }, { "epoch": 0.99, "learning_rate": 6.704774647374226e-07, "loss": 2.7221, "step": 297900 }, { "epoch": 0.99, "learning_rate": 6.539183769444509e-07, "loss": 2.7387, "step": 298000 }, { "epoch": 0.99, "learning_rate": 6.373592891514793e-07, "loss": 2.7228, "step": 298100 }, { "epoch": 0.99, "learning_rate": 6.208002013585076e-07, "loss": 2.7076, "step": 298200 }, { "epoch": 0.99, "learning_rate": 6.042411135655359e-07, "loss": 2.7287, "step": 298300 }, { "epoch": 0.99, "learning_rate": 5.876820257725643e-07, "loss": 2.7311, "step": 298400 }, { "epoch": 0.99, "learning_rate": 5.711229379795926e-07, "loss": 2.7195, "step": 298500 }, { "epoch": 0.99, "learning_rate": 5.545638501866209e-07, "loss": 2.7107, "step": 298600 }, { "epoch": 0.99, "learning_rate": 5.380047623936492e-07, "loss": 2.7212, "step": 298700 }, { "epoch": 0.99, "learning_rate": 5.214456746006776e-07, "loss": 2.7202, "step": 298800 }, { "epoch": 0.99, "learning_rate": 5.04886586807706e-07, "loss": 2.6954, "step": 298900 }, { "epoch": 0.99, "learning_rate": 4.883274990147343e-07, "loss": 2.7267, "step": 299000 }, { "epoch": 0.99, "learning_rate": 4.7176841122176265e-07, "loss": 2.723, "step": 299100 }, { "epoch": 0.99, "learning_rate": 4.55209323428791e-07, "loss": 2.7153, "step": 299200 }, { "epoch": 0.99, "learning_rate": 4.386502356358193e-07, "loss": 2.711, "step": 299300 }, { "epoch": 0.99, "learning_rate": 4.2209114784284767e-07, "loss": 2.7385, "step": 299400 }, { "epoch": 0.99, "learning_rate": 4.0553206004987596e-07, "loss": 2.7214, "step": 299500 }, { "epoch": 0.99, "learning_rate": 3.889729722569043e-07, "loss": 2.7111, "step": 299600 }, { "epoch": 0.99, "learning_rate": 3.7241388446393264e-07, "loss": 2.7106, "step": 299700 }, { "epoch": 0.99, "learning_rate": 3.5585479667096104e-07, "loss": 2.7095, "step": 299800 }, { "epoch": 0.99, "learning_rate": 3.392957088779894e-07, "loss": 2.7217, "step": 299900 }, { "epoch": 0.99, "learning_rate": 3.2273662108501766e-07, "loss": 2.7331, "step": 300000 }, { "epoch": 0.99, "learning_rate": 3.06177533292046e-07, "loss": 2.7229, "step": 300100 }, { "epoch": 0.99, "learning_rate": 2.8961844549907435e-07, "loss": 2.7105, "step": 300200 }, { "epoch": 0.99, "learning_rate": 2.730593577061027e-07, "loss": 2.7208, "step": 300300 }, { "epoch": 0.99, "learning_rate": 2.5650026991313103e-07, "loss": 2.7261, "step": 300400 }, { "epoch": 1.0, "learning_rate": 2.3994118212015937e-07, "loss": 2.7352, "step": 300500 }, { "epoch": 1.0, "learning_rate": 2.233820943271877e-07, "loss": 2.7271, "step": 300600 }, { "epoch": 1.0, "learning_rate": 2.0682300653421605e-07, "loss": 2.727, "step": 300700 }, { "epoch": 1.0, "learning_rate": 1.902639187412444e-07, "loss": 2.7298, "step": 300800 }, { "epoch": 1.0, "learning_rate": 1.7370483094827273e-07, "loss": 2.7344, "step": 300900 }, { "epoch": 1.0, "learning_rate": 1.5714574315530107e-07, "loss": 2.7185, "step": 301000 }, { "epoch": 1.0, "learning_rate": 1.4058665536232941e-07, "loss": 2.7195, "step": 301100 }, { "epoch": 1.0, "learning_rate": 1.2402756756935775e-07, "loss": 2.7201, "step": 301200 }, { "epoch": 1.0, "learning_rate": 1.074684797763861e-07, "loss": 2.7188, "step": 301300 }, { "epoch": 1.0, "learning_rate": 9.090939198341442e-08, "loss": 2.7252, "step": 301400 }, { "epoch": 1.0, "learning_rate": 7.435030419044276e-08, "loss": 2.7194, "step": 301500 } ], "max_steps": 301949, "num_train_epochs": 1, "total_flos": 8.170868148535296e+18, "trial_name": null, "trial_params": null }