diff --git "a/trainer_state.json" "b/trainer_state.json" new file mode 100644--- /dev/null +++ "b/trainer_state.json" @@ -0,0 +1,18106 @@ +{ + "best_metric": null, + "best_model_checkpoint": null, + "epoch": 0.9985129939161912, + "global_step": 301500, + "is_hyper_param_search": false, + "is_local_process_zero": true, + "is_world_process_zero": true, + "log_history": [ + { + "epoch": 0.0, + "learning_rate": 4.998344091220703e-05, + "loss": 2.749, + "step": 100 + }, + { + "epoch": 0.0, + "learning_rate": 4.9966881824414056e-05, + "loss": 2.7594, + "step": 200 + }, + { + "epoch": 0.0, + "learning_rate": 4.9950322736621087e-05, + "loss": 2.7598, + "step": 300 + }, + { + "epoch": 0.0, + "learning_rate": 4.993376364882812e-05, + "loss": 2.7508, + "step": 400 + }, + { + "epoch": 0.0, + "learning_rate": 4.991720456103514e-05, + "loss": 2.7558, + "step": 500 + }, + { + "epoch": 0.0, + "learning_rate": 4.990064547324218e-05, + "loss": 2.7641, + "step": 600 + }, + { + "epoch": 0.0, + "learning_rate": 4.98840863854492e-05, + "loss": 2.7775, + "step": 700 + }, + { + "epoch": 0.0, + "learning_rate": 4.986752729765623e-05, + "loss": 2.7908, + "step": 800 + }, + { + "epoch": 0.0, + "learning_rate": 4.9850968209863255e-05, + "loss": 2.767, + "step": 900 + }, + { + "epoch": 0.0, + "learning_rate": 4.9834409122070285e-05, + "loss": 2.7804, + "step": 1000 + }, + { + "epoch": 0.0, + "learning_rate": 4.9817850034277315e-05, + "loss": 2.7667, + "step": 1100 + }, + { + "epoch": 0.0, + "learning_rate": 4.980129094648434e-05, + "loss": 2.7679, + "step": 1200 + }, + { + "epoch": 0.0, + "learning_rate": 4.978473185869137e-05, + "loss": 2.792, + "step": 1300 + }, + { + "epoch": 0.0, + "learning_rate": 4.97681727708984e-05, + "loss": 2.7904, + "step": 1400 + }, + { + "epoch": 0.0, + "learning_rate": 4.975161368310542e-05, + "loss": 2.7738, + "step": 1500 + }, + { + "epoch": 0.01, + "learning_rate": 4.973505459531246e-05, + "loss": 2.7821, + "step": 1600 + }, + { + "epoch": 0.01, + "learning_rate": 4.9718495507519484e-05, + "loss": 2.7821, + "step": 1700 + }, + { + "epoch": 0.01, + "learning_rate": 4.9701936419726514e-05, + "loss": 2.7774, + "step": 1800 + }, + { + "epoch": 0.01, + "learning_rate": 4.9685377331933544e-05, + "loss": 2.7651, + "step": 1900 + }, + { + "epoch": 0.01, + "learning_rate": 4.966881824414057e-05, + "loss": 2.7752, + "step": 2000 + }, + { + "epoch": 0.01, + "learning_rate": 4.96522591563476e-05, + "loss": 2.7897, + "step": 2100 + }, + { + "epoch": 0.01, + "learning_rate": 4.963570006855462e-05, + "loss": 2.7725, + "step": 2200 + }, + { + "epoch": 0.01, + "learning_rate": 4.961914098076165e-05, + "loss": 2.7753, + "step": 2300 + }, + { + "epoch": 0.01, + "learning_rate": 4.960258189296868e-05, + "loss": 2.7824, + "step": 2400 + }, + { + "epoch": 0.01, + "learning_rate": 4.9586022805175706e-05, + "loss": 2.7757, + "step": 2500 + }, + { + "epoch": 0.01, + "learning_rate": 4.956946371738274e-05, + "loss": 2.7658, + "step": 2600 + }, + { + "epoch": 0.01, + "learning_rate": 4.9552904629589766e-05, + "loss": 2.7781, + "step": 2700 + }, + { + "epoch": 0.01, + "learning_rate": 4.9536345541796797e-05, + "loss": 2.7819, + "step": 2800 + }, + { + "epoch": 0.01, + "learning_rate": 4.951978645400383e-05, + "loss": 2.7963, + "step": 2900 + }, + { + "epoch": 0.01, + "learning_rate": 4.950322736621085e-05, + "loss": 2.7773, + "step": 3000 + }, + { + "epoch": 0.01, + "learning_rate": 4.948666827841788e-05, + "loss": 2.7823, + "step": 3100 + }, + { + "epoch": 0.01, + "learning_rate": 4.947010919062491e-05, + "loss": 2.7753, + "step": 3200 + }, + { + "epoch": 0.01, + "learning_rate": 4.9453550102831935e-05, + "loss": 2.769, + "step": 3300 + }, + { + "epoch": 0.01, + "learning_rate": 4.9436991015038965e-05, + "loss": 2.7742, + "step": 3400 + }, + { + "epoch": 0.01, + "learning_rate": 4.942043192724599e-05, + "loss": 2.7804, + "step": 3500 + }, + { + "epoch": 0.01, + "learning_rate": 4.9403872839453025e-05, + "loss": 2.7811, + "step": 3600 + }, + { + "epoch": 0.01, + "learning_rate": 4.9387313751660056e-05, + "loss": 2.786, + "step": 3700 + }, + { + "epoch": 0.01, + "learning_rate": 4.937075466386708e-05, + "loss": 2.7747, + "step": 3800 + }, + { + "epoch": 0.01, + "learning_rate": 4.935419557607411e-05, + "loss": 2.7802, + "step": 3900 + }, + { + "epoch": 0.01, + "learning_rate": 4.933763648828113e-05, + "loss": 2.7754, + "step": 4000 + }, + { + "epoch": 0.01, + "learning_rate": 4.932107740048816e-05, + "loss": 2.7792, + "step": 4100 + }, + { + "epoch": 0.01, + "learning_rate": 4.9304518312695194e-05, + "loss": 2.784, + "step": 4200 + }, + { + "epoch": 0.01, + "learning_rate": 4.928795922490222e-05, + "loss": 2.7872, + "step": 4300 + }, + { + "epoch": 0.01, + "learning_rate": 4.927140013710925e-05, + "loss": 2.7802, + "step": 4400 + }, + { + "epoch": 0.01, + "learning_rate": 4.925484104931628e-05, + "loss": 2.7902, + "step": 4500 + }, + { + "epoch": 0.02, + "learning_rate": 4.923828196152331e-05, + "loss": 2.7898, + "step": 4600 + }, + { + "epoch": 0.02, + "learning_rate": 4.922172287373034e-05, + "loss": 2.7698, + "step": 4700 + }, + { + "epoch": 0.02, + "learning_rate": 4.920516378593736e-05, + "loss": 2.7779, + "step": 4800 + }, + { + "epoch": 0.02, + "learning_rate": 4.918860469814439e-05, + "loss": 2.7792, + "step": 4900 + }, + { + "epoch": 0.02, + "learning_rate": 4.917204561035142e-05, + "loss": 2.7807, + "step": 5000 + }, + { + "epoch": 0.02, + "learning_rate": 4.9155486522558446e-05, + "loss": 2.7927, + "step": 5100 + }, + { + "epoch": 0.02, + "learning_rate": 4.9138927434765476e-05, + "loss": 2.773, + "step": 5200 + }, + { + "epoch": 0.02, + "learning_rate": 4.91223683469725e-05, + "loss": 2.7723, + "step": 5300 + }, + { + "epoch": 0.02, + "learning_rate": 4.910580925917953e-05, + "loss": 2.7633, + "step": 5400 + }, + { + "epoch": 0.02, + "learning_rate": 4.908925017138656e-05, + "loss": 2.7821, + "step": 5500 + }, + { + "epoch": 0.02, + "learning_rate": 4.907269108359359e-05, + "loss": 2.7641, + "step": 5600 + }, + { + "epoch": 0.02, + "learning_rate": 4.905613199580062e-05, + "loss": 2.7798, + "step": 5700 + }, + { + "epoch": 0.02, + "learning_rate": 4.9039572908007645e-05, + "loss": 2.7849, + "step": 5800 + }, + { + "epoch": 0.02, + "learning_rate": 4.9023013820214675e-05, + "loss": 2.781, + "step": 5900 + }, + { + "epoch": 0.02, + "learning_rate": 4.9006454732421705e-05, + "loss": 2.7916, + "step": 6000 + }, + { + "epoch": 0.02, + "learning_rate": 4.898989564462873e-05, + "loss": 2.7924, + "step": 6100 + }, + { + "epoch": 0.02, + "learning_rate": 4.897333655683576e-05, + "loss": 2.7795, + "step": 6200 + }, + { + "epoch": 0.02, + "learning_rate": 4.895677746904279e-05, + "loss": 2.789, + "step": 6300 + }, + { + "epoch": 0.02, + "learning_rate": 4.894021838124981e-05, + "loss": 2.779, + "step": 6400 + }, + { + "epoch": 0.02, + "learning_rate": 4.892365929345684e-05, + "loss": 2.7844, + "step": 6500 + }, + { + "epoch": 0.02, + "learning_rate": 4.8907100205663873e-05, + "loss": 2.7797, + "step": 6600 + }, + { + "epoch": 0.02, + "learning_rate": 4.8890541117870904e-05, + "loss": 2.7895, + "step": 6700 + }, + { + "epoch": 0.02, + "learning_rate": 4.887398203007793e-05, + "loss": 2.7759, + "step": 6800 + }, + { + "epoch": 0.02, + "learning_rate": 4.885742294228496e-05, + "loss": 2.7844, + "step": 6900 + }, + { + "epoch": 0.02, + "learning_rate": 4.884086385449199e-05, + "loss": 2.7638, + "step": 7000 + }, + { + "epoch": 0.02, + "learning_rate": 4.882430476669901e-05, + "loss": 2.7765, + "step": 7100 + }, + { + "epoch": 0.02, + "learning_rate": 4.880774567890604e-05, + "loss": 2.7778, + "step": 7200 + }, + { + "epoch": 0.02, + "learning_rate": 4.879118659111307e-05, + "loss": 2.7846, + "step": 7300 + }, + { + "epoch": 0.02, + "learning_rate": 4.8774627503320095e-05, + "loss": 2.7719, + "step": 7400 + }, + { + "epoch": 0.02, + "learning_rate": 4.8758068415527126e-05, + "loss": 2.7748, + "step": 7500 + }, + { + "epoch": 0.03, + "learning_rate": 4.8741509327734156e-05, + "loss": 2.7751, + "step": 7600 + }, + { + "epoch": 0.03, + "learning_rate": 4.8724950239941186e-05, + "loss": 2.781, + "step": 7700 + }, + { + "epoch": 0.03, + "learning_rate": 4.870839115214822e-05, + "loss": 2.7754, + "step": 7800 + }, + { + "epoch": 0.03, + "learning_rate": 4.869183206435524e-05, + "loss": 2.7904, + "step": 7900 + }, + { + "epoch": 0.03, + "learning_rate": 4.867527297656227e-05, + "loss": 2.777, + "step": 8000 + }, + { + "epoch": 0.03, + "learning_rate": 4.8658713888769294e-05, + "loss": 2.7694, + "step": 8100 + }, + { + "epoch": 0.03, + "learning_rate": 4.8642154800976324e-05, + "loss": 2.7766, + "step": 8200 + }, + { + "epoch": 0.03, + "learning_rate": 4.8625595713183355e-05, + "loss": 2.7873, + "step": 8300 + }, + { + "epoch": 0.03, + "learning_rate": 4.860903662539038e-05, + "loss": 2.7604, + "step": 8400 + }, + { + "epoch": 0.03, + "learning_rate": 4.859247753759741e-05, + "loss": 2.78, + "step": 8500 + }, + { + "epoch": 0.03, + "learning_rate": 4.857591844980444e-05, + "loss": 2.7831, + "step": 8600 + }, + { + "epoch": 0.03, + "learning_rate": 4.855935936201147e-05, + "loss": 2.7907, + "step": 8700 + }, + { + "epoch": 0.03, + "learning_rate": 4.85428002742185e-05, + "loss": 2.7846, + "step": 8800 + }, + { + "epoch": 0.03, + "learning_rate": 4.852624118642552e-05, + "loss": 2.7813, + "step": 8900 + }, + { + "epoch": 0.03, + "learning_rate": 4.850968209863255e-05, + "loss": 2.7797, + "step": 9000 + }, + { + "epoch": 0.03, + "learning_rate": 4.8493123010839583e-05, + "loss": 2.7758, + "step": 9100 + }, + { + "epoch": 0.03, + "learning_rate": 4.847656392304661e-05, + "loss": 2.78, + "step": 9200 + }, + { + "epoch": 0.03, + "learning_rate": 4.846000483525364e-05, + "loss": 2.7885, + "step": 9300 + }, + { + "epoch": 0.03, + "learning_rate": 4.844344574746066e-05, + "loss": 2.7855, + "step": 9400 + }, + { + "epoch": 0.03, + "learning_rate": 4.842688665966769e-05, + "loss": 2.7835, + "step": 9500 + }, + { + "epoch": 0.03, + "learning_rate": 4.841032757187473e-05, + "loss": 2.7868, + "step": 9600 + }, + { + "epoch": 0.03, + "learning_rate": 4.839376848408175e-05, + "loss": 2.7991, + "step": 9700 + }, + { + "epoch": 0.03, + "learning_rate": 4.837720939628878e-05, + "loss": 2.7867, + "step": 9800 + }, + { + "epoch": 0.03, + "learning_rate": 4.8360650308495806e-05, + "loss": 2.7845, + "step": 9900 + }, + { + "epoch": 0.03, + "learning_rate": 4.8344091220702836e-05, + "loss": 2.7796, + "step": 10000 + }, + { + "epoch": 0.03, + "learning_rate": 4.8327532132909866e-05, + "loss": 2.7918, + "step": 10100 + }, + { + "epoch": 0.03, + "learning_rate": 4.831097304511689e-05, + "loss": 2.806, + "step": 10200 + }, + { + "epoch": 0.03, + "learning_rate": 4.829441395732392e-05, + "loss": 2.7917, + "step": 10300 + }, + { + "epoch": 0.03, + "learning_rate": 4.827785486953095e-05, + "loss": 2.7812, + "step": 10400 + }, + { + "epoch": 0.03, + "learning_rate": 4.8261295781737974e-05, + "loss": 2.7863, + "step": 10500 + }, + { + "epoch": 0.04, + "learning_rate": 4.824473669394501e-05, + "loss": 2.7812, + "step": 10600 + }, + { + "epoch": 0.04, + "learning_rate": 4.8228177606152034e-05, + "loss": 2.7725, + "step": 10700 + }, + { + "epoch": 0.04, + "learning_rate": 4.8211618518359065e-05, + "loss": 2.7792, + "step": 10800 + }, + { + "epoch": 0.04, + "learning_rate": 4.8195059430566095e-05, + "loss": 2.7858, + "step": 10900 + }, + { + "epoch": 0.04, + "learning_rate": 4.817850034277312e-05, + "loss": 2.7859, + "step": 11000 + }, + { + "epoch": 0.04, + "learning_rate": 4.816194125498015e-05, + "loss": 2.7818, + "step": 11100 + }, + { + "epoch": 0.04, + "learning_rate": 4.814538216718717e-05, + "loss": 2.7695, + "step": 11200 + }, + { + "epoch": 0.04, + "learning_rate": 4.81288230793942e-05, + "loss": 2.7782, + "step": 11300 + }, + { + "epoch": 0.04, + "learning_rate": 4.811226399160123e-05, + "loss": 2.7696, + "step": 11400 + }, + { + "epoch": 0.04, + "learning_rate": 4.8095704903808256e-05, + "loss": 2.775, + "step": 11500 + }, + { + "epoch": 0.04, + "learning_rate": 4.8079145816015294e-05, + "loss": 2.7835, + "step": 11600 + }, + { + "epoch": 0.04, + "learning_rate": 4.806258672822232e-05, + "loss": 2.8033, + "step": 11700 + }, + { + "epoch": 0.04, + "learning_rate": 4.804602764042935e-05, + "loss": 2.7839, + "step": 11800 + }, + { + "epoch": 0.04, + "learning_rate": 4.802946855263638e-05, + "loss": 2.7871, + "step": 11900 + }, + { + "epoch": 0.04, + "learning_rate": 4.80129094648434e-05, + "loss": 2.7912, + "step": 12000 + }, + { + "epoch": 0.04, + "learning_rate": 4.799635037705043e-05, + "loss": 2.7906, + "step": 12100 + }, + { + "epoch": 0.04, + "learning_rate": 4.797979128925746e-05, + "loss": 2.7903, + "step": 12200 + }, + { + "epoch": 0.04, + "learning_rate": 4.7963232201464485e-05, + "loss": 2.7771, + "step": 12300 + }, + { + "epoch": 0.04, + "learning_rate": 4.7946673113671516e-05, + "loss": 2.7825, + "step": 12400 + }, + { + "epoch": 0.04, + "learning_rate": 4.793011402587854e-05, + "loss": 2.7946, + "step": 12500 + }, + { + "epoch": 0.04, + "learning_rate": 4.7913554938085576e-05, + "loss": 2.7858, + "step": 12600 + }, + { + "epoch": 0.04, + "learning_rate": 4.78969958502926e-05, + "loss": 2.7768, + "step": 12700 + }, + { + "epoch": 0.04, + "learning_rate": 4.788043676249963e-05, + "loss": 2.7875, + "step": 12800 + }, + { + "epoch": 0.04, + "learning_rate": 4.786387767470666e-05, + "loss": 2.7762, + "step": 12900 + }, + { + "epoch": 0.04, + "learning_rate": 4.7847318586913684e-05, + "loss": 2.7937, + "step": 13000 + }, + { + "epoch": 0.04, + "learning_rate": 4.7830759499120714e-05, + "loss": 2.7903, + "step": 13100 + }, + { + "epoch": 0.04, + "learning_rate": 4.7814200411327744e-05, + "loss": 2.7923, + "step": 13200 + }, + { + "epoch": 0.04, + "learning_rate": 4.779764132353477e-05, + "loss": 2.7874, + "step": 13300 + }, + { + "epoch": 0.04, + "learning_rate": 4.77810822357418e-05, + "loss": 2.7847, + "step": 13400 + }, + { + "epoch": 0.04, + "learning_rate": 4.776452314794883e-05, + "loss": 2.772, + "step": 13500 + }, + { + "epoch": 0.05, + "learning_rate": 4.774796406015586e-05, + "loss": 2.7846, + "step": 13600 + }, + { + "epoch": 0.05, + "learning_rate": 4.773140497236289e-05, + "loss": 2.7824, + "step": 13700 + }, + { + "epoch": 0.05, + "learning_rate": 4.771484588456991e-05, + "loss": 2.7781, + "step": 13800 + }, + { + "epoch": 0.05, + "learning_rate": 4.769828679677694e-05, + "loss": 2.7761, + "step": 13900 + }, + { + "epoch": 0.05, + "learning_rate": 4.7681727708983967e-05, + "loss": 2.7677, + "step": 14000 + }, + { + "epoch": 0.05, + "learning_rate": 4.7665168621191e-05, + "loss": 2.7686, + "step": 14100 + }, + { + "epoch": 0.05, + "learning_rate": 4.764860953339803e-05, + "loss": 2.7688, + "step": 14200 + }, + { + "epoch": 0.05, + "learning_rate": 4.763205044560505e-05, + "loss": 2.7817, + "step": 14300 + }, + { + "epoch": 0.05, + "learning_rate": 4.761549135781208e-05, + "loss": 2.7674, + "step": 14400 + }, + { + "epoch": 0.05, + "learning_rate": 4.759893227001911e-05, + "loss": 2.7738, + "step": 14500 + }, + { + "epoch": 0.05, + "learning_rate": 4.758237318222614e-05, + "loss": 2.7799, + "step": 14600 + }, + { + "epoch": 0.05, + "learning_rate": 4.756581409443317e-05, + "loss": 2.7818, + "step": 14700 + }, + { + "epoch": 0.05, + "learning_rate": 4.7549255006640195e-05, + "loss": 2.7859, + "step": 14800 + }, + { + "epoch": 0.05, + "learning_rate": 4.7532695918847226e-05, + "loss": 2.7762, + "step": 14900 + }, + { + "epoch": 0.05, + "learning_rate": 4.7516136831054256e-05, + "loss": 2.7851, + "step": 15000 + }, + { + "epoch": 0.05, + "learning_rate": 4.749957774326128e-05, + "loss": 2.7814, + "step": 15100 + }, + { + "epoch": 0.05, + "learning_rate": 4.748301865546831e-05, + "loss": 2.7973, + "step": 15200 + }, + { + "epoch": 0.05, + "learning_rate": 4.746645956767533e-05, + "loss": 2.7885, + "step": 15300 + }, + { + "epoch": 0.05, + "learning_rate": 4.7449900479882364e-05, + "loss": 2.776, + "step": 15400 + }, + { + "epoch": 0.05, + "learning_rate": 4.7433341392089394e-05, + "loss": 2.7677, + "step": 15500 + }, + { + "epoch": 0.05, + "learning_rate": 4.7416782304296424e-05, + "loss": 2.7916, + "step": 15600 + }, + { + "epoch": 0.05, + "learning_rate": 4.7400223216503454e-05, + "loss": 2.7928, + "step": 15700 + }, + { + "epoch": 0.05, + "learning_rate": 4.738366412871048e-05, + "loss": 2.7846, + "step": 15800 + }, + { + "epoch": 0.05, + "learning_rate": 4.736710504091751e-05, + "loss": 2.7773, + "step": 15900 + }, + { + "epoch": 0.05, + "learning_rate": 4.735054595312454e-05, + "loss": 2.783, + "step": 16000 + }, + { + "epoch": 0.05, + "learning_rate": 4.733398686533156e-05, + "loss": 2.7862, + "step": 16100 + }, + { + "epoch": 0.05, + "learning_rate": 4.731742777753859e-05, + "loss": 2.7786, + "step": 16200 + }, + { + "epoch": 0.05, + "learning_rate": 4.730086868974562e-05, + "loss": 2.7859, + "step": 16300 + }, + { + "epoch": 0.05, + "learning_rate": 4.7284309601952646e-05, + "loss": 2.7774, + "step": 16400 + }, + { + "epoch": 0.05, + "learning_rate": 4.7267750514159677e-05, + "loss": 2.7832, + "step": 16500 + }, + { + "epoch": 0.05, + "learning_rate": 4.725119142636671e-05, + "loss": 2.7747, + "step": 16600 + }, + { + "epoch": 0.06, + "learning_rate": 4.723463233857374e-05, + "loss": 2.7643, + "step": 16700 + }, + { + "epoch": 0.06, + "learning_rate": 4.721807325078077e-05, + "loss": 2.7757, + "step": 16800 + }, + { + "epoch": 0.06, + "learning_rate": 4.720151416298779e-05, + "loss": 2.7791, + "step": 16900 + }, + { + "epoch": 0.06, + "learning_rate": 4.718495507519482e-05, + "loss": 2.7813, + "step": 17000 + }, + { + "epoch": 0.06, + "learning_rate": 4.7168395987401845e-05, + "loss": 2.7935, + "step": 17100 + }, + { + "epoch": 0.06, + "learning_rate": 4.7151836899608875e-05, + "loss": 2.7802, + "step": 17200 + }, + { + "epoch": 0.06, + "learning_rate": 4.7135277811815905e-05, + "loss": 2.7884, + "step": 17300 + }, + { + "epoch": 0.06, + "learning_rate": 4.711871872402293e-05, + "loss": 2.7835, + "step": 17400 + }, + { + "epoch": 0.06, + "learning_rate": 4.710215963622996e-05, + "loss": 2.7925, + "step": 17500 + }, + { + "epoch": 0.06, + "learning_rate": 4.708560054843699e-05, + "loss": 2.7974, + "step": 17600 + }, + { + "epoch": 0.06, + "learning_rate": 4.706904146064402e-05, + "loss": 2.7875, + "step": 17700 + }, + { + "epoch": 0.06, + "learning_rate": 4.705248237285105e-05, + "loss": 2.7878, + "step": 17800 + }, + { + "epoch": 0.06, + "learning_rate": 4.7035923285058074e-05, + "loss": 2.7869, + "step": 17900 + }, + { + "epoch": 0.06, + "learning_rate": 4.7019364197265104e-05, + "loss": 2.7716, + "step": 18000 + }, + { + "epoch": 0.06, + "learning_rate": 4.7002805109472134e-05, + "loss": 2.7774, + "step": 18100 + }, + { + "epoch": 0.06, + "learning_rate": 4.698624602167916e-05, + "loss": 2.7824, + "step": 18200 + }, + { + "epoch": 0.06, + "learning_rate": 4.696968693388619e-05, + "loss": 2.7863, + "step": 18300 + }, + { + "epoch": 0.06, + "learning_rate": 4.695312784609321e-05, + "loss": 2.7737, + "step": 18400 + }, + { + "epoch": 0.06, + "learning_rate": 4.693656875830024e-05, + "loss": 2.7721, + "step": 18500 + }, + { + "epoch": 0.06, + "learning_rate": 4.692000967050727e-05, + "loss": 2.7834, + "step": 18600 + }, + { + "epoch": 0.06, + "learning_rate": 4.69034505827143e-05, + "loss": 2.7756, + "step": 18700 + }, + { + "epoch": 0.06, + "learning_rate": 4.688689149492133e-05, + "loss": 2.7757, + "step": 18800 + }, + { + "epoch": 0.06, + "learning_rate": 4.6870332407128356e-05, + "loss": 2.7818, + "step": 18900 + }, + { + "epoch": 0.06, + "learning_rate": 4.685377331933539e-05, + "loss": 2.774, + "step": 19000 + }, + { + "epoch": 0.06, + "learning_rate": 4.683721423154242e-05, + "loss": 2.7885, + "step": 19100 + }, + { + "epoch": 0.06, + "learning_rate": 4.682065514374944e-05, + "loss": 2.7653, + "step": 19200 + }, + { + "epoch": 0.06, + "learning_rate": 4.680409605595647e-05, + "loss": 2.7829, + "step": 19300 + }, + { + "epoch": 0.06, + "learning_rate": 4.67875369681635e-05, + "loss": 2.7784, + "step": 19400 + }, + { + "epoch": 0.06, + "learning_rate": 4.6770977880370525e-05, + "loss": 2.7826, + "step": 19500 + }, + { + "epoch": 0.06, + "learning_rate": 4.675441879257756e-05, + "loss": 2.7876, + "step": 19600 + }, + { + "epoch": 0.07, + "learning_rate": 4.6737859704784585e-05, + "loss": 2.7801, + "step": 19700 + }, + { + "epoch": 0.07, + "learning_rate": 4.6721300616991615e-05, + "loss": 2.7858, + "step": 19800 + }, + { + "epoch": 0.07, + "learning_rate": 4.670474152919864e-05, + "loss": 2.7851, + "step": 19900 + }, + { + "epoch": 0.07, + "learning_rate": 4.668818244140567e-05, + "loss": 2.7871, + "step": 20000 + }, + { + "epoch": 0.07, + "learning_rate": 4.66716233536127e-05, + "loss": 2.7874, + "step": 20100 + }, + { + "epoch": 0.07, + "learning_rate": 4.665506426581972e-05, + "loss": 2.7855, + "step": 20200 + }, + { + "epoch": 0.07, + "learning_rate": 4.6638505178026753e-05, + "loss": 2.7686, + "step": 20300 + }, + { + "epoch": 0.07, + "learning_rate": 4.6621946090233784e-05, + "loss": 2.7826, + "step": 20400 + }, + { + "epoch": 0.07, + "learning_rate": 4.660538700244081e-05, + "loss": 2.7919, + "step": 20500 + }, + { + "epoch": 0.07, + "learning_rate": 4.6588827914647844e-05, + "loss": 2.7845, + "step": 20600 + }, + { + "epoch": 0.07, + "learning_rate": 4.657226882685487e-05, + "loss": 2.8025, + "step": 20700 + }, + { + "epoch": 0.07, + "learning_rate": 4.65557097390619e-05, + "loss": 2.7732, + "step": 20800 + }, + { + "epoch": 0.07, + "learning_rate": 4.653915065126893e-05, + "loss": 2.7927, + "step": 20900 + }, + { + "epoch": 0.07, + "learning_rate": 4.652259156347595e-05, + "loss": 2.7786, + "step": 21000 + }, + { + "epoch": 0.07, + "learning_rate": 4.650603247568298e-05, + "loss": 2.7611, + "step": 21100 + }, + { + "epoch": 0.07, + "learning_rate": 4.6489473387890006e-05, + "loss": 2.7858, + "step": 21200 + }, + { + "epoch": 0.07, + "learning_rate": 4.6472914300097036e-05, + "loss": 2.7905, + "step": 21300 + }, + { + "epoch": 0.07, + "learning_rate": 4.6456355212304066e-05, + "loss": 2.7664, + "step": 21400 + }, + { + "epoch": 0.07, + "learning_rate": 4.643979612451109e-05, + "loss": 2.7882, + "step": 21500 + }, + { + "epoch": 0.07, + "learning_rate": 4.642323703671813e-05, + "loss": 2.7849, + "step": 21600 + }, + { + "epoch": 0.07, + "learning_rate": 4.640667794892515e-05, + "loss": 2.7855, + "step": 21700 + }, + { + "epoch": 0.07, + "learning_rate": 4.639011886113218e-05, + "loss": 2.7765, + "step": 21800 + }, + { + "epoch": 0.07, + "learning_rate": 4.637355977333921e-05, + "loss": 2.7879, + "step": 21900 + }, + { + "epoch": 0.07, + "learning_rate": 4.6357000685546235e-05, + "loss": 2.779, + "step": 22000 + }, + { + "epoch": 0.07, + "learning_rate": 4.6340441597753265e-05, + "loss": 2.7673, + "step": 22100 + }, + { + "epoch": 0.07, + "learning_rate": 4.6323882509960295e-05, + "loss": 2.7804, + "step": 22200 + }, + { + "epoch": 0.07, + "learning_rate": 4.630732342216732e-05, + "loss": 2.7752, + "step": 22300 + }, + { + "epoch": 0.07, + "learning_rate": 4.629076433437435e-05, + "loss": 2.776, + "step": 22400 + }, + { + "epoch": 0.07, + "learning_rate": 4.627420524658137e-05, + "loss": 2.769, + "step": 22500 + }, + { + "epoch": 0.07, + "learning_rate": 4.625764615878841e-05, + "loss": 2.7803, + "step": 22600 + }, + { + "epoch": 0.08, + "learning_rate": 4.624108707099544e-05, + "loss": 2.7783, + "step": 22700 + }, + { + "epoch": 0.08, + "learning_rate": 4.6224527983202463e-05, + "loss": 2.7791, + "step": 22800 + }, + { + "epoch": 0.08, + "learning_rate": 4.6207968895409494e-05, + "loss": 2.7947, + "step": 22900 + }, + { + "epoch": 0.08, + "learning_rate": 4.619140980761652e-05, + "loss": 2.7764, + "step": 23000 + }, + { + "epoch": 0.08, + "learning_rate": 4.617485071982355e-05, + "loss": 2.7893, + "step": 23100 + }, + { + "epoch": 0.08, + "learning_rate": 4.615829163203058e-05, + "loss": 2.7636, + "step": 23200 + }, + { + "epoch": 0.08, + "learning_rate": 4.61417325442376e-05, + "loss": 2.7794, + "step": 23300 + }, + { + "epoch": 0.08, + "learning_rate": 4.612517345644463e-05, + "loss": 2.7965, + "step": 23400 + }, + { + "epoch": 0.08, + "learning_rate": 4.610861436865166e-05, + "loss": 2.7818, + "step": 23500 + }, + { + "epoch": 0.08, + "learning_rate": 4.609205528085869e-05, + "loss": 2.7773, + "step": 23600 + }, + { + "epoch": 0.08, + "learning_rate": 4.607549619306572e-05, + "loss": 2.7809, + "step": 23700 + }, + { + "epoch": 0.08, + "learning_rate": 4.6058937105272746e-05, + "loss": 2.7609, + "step": 23800 + }, + { + "epoch": 0.08, + "learning_rate": 4.6042378017479776e-05, + "loss": 2.7931, + "step": 23900 + }, + { + "epoch": 0.08, + "learning_rate": 4.602581892968681e-05, + "loss": 2.7823, + "step": 24000 + }, + { + "epoch": 0.08, + "learning_rate": 4.600925984189383e-05, + "loss": 2.8011, + "step": 24100 + }, + { + "epoch": 0.08, + "learning_rate": 4.599270075410086e-05, + "loss": 2.7806, + "step": 24200 + }, + { + "epoch": 0.08, + "learning_rate": 4.5976141666307884e-05, + "loss": 2.7745, + "step": 24300 + }, + { + "epoch": 0.08, + "learning_rate": 4.5959582578514914e-05, + "loss": 2.7718, + "step": 24400 + }, + { + "epoch": 0.08, + "learning_rate": 4.5943023490721945e-05, + "loss": 2.7815, + "step": 24500 + }, + { + "epoch": 0.08, + "learning_rate": 4.5926464402928975e-05, + "loss": 2.7826, + "step": 24600 + }, + { + "epoch": 0.08, + "learning_rate": 4.5909905315136005e-05, + "loss": 2.7894, + "step": 24700 + }, + { + "epoch": 0.08, + "learning_rate": 4.589334622734303e-05, + "loss": 2.7665, + "step": 24800 + }, + { + "epoch": 0.08, + "learning_rate": 4.587678713955006e-05, + "loss": 2.7787, + "step": 24900 + }, + { + "epoch": 0.08, + "learning_rate": 4.586022805175709e-05, + "loss": 2.7896, + "step": 25000 + }, + { + "epoch": 0.08, + "learning_rate": 4.584366896396411e-05, + "loss": 2.78, + "step": 25100 + }, + { + "epoch": 0.08, + "learning_rate": 4.582710987617114e-05, + "loss": 2.7894, + "step": 25200 + }, + { + "epoch": 0.08, + "learning_rate": 4.5810550788378174e-05, + "loss": 2.7943, + "step": 25300 + }, + { + "epoch": 0.08, + "learning_rate": 4.57939917005852e-05, + "loss": 2.7871, + "step": 25400 + }, + { + "epoch": 0.08, + "learning_rate": 4.577743261279223e-05, + "loss": 2.7814, + "step": 25500 + }, + { + "epoch": 0.08, + "learning_rate": 4.576087352499926e-05, + "loss": 2.7911, + "step": 25600 + }, + { + "epoch": 0.09, + "learning_rate": 4.574431443720629e-05, + "loss": 2.7669, + "step": 25700 + }, + { + "epoch": 0.09, + "learning_rate": 4.572775534941331e-05, + "loss": 2.7794, + "step": 25800 + }, + { + "epoch": 0.09, + "learning_rate": 4.571119626162034e-05, + "loss": 2.803, + "step": 25900 + }, + { + "epoch": 0.09, + "learning_rate": 4.569463717382737e-05, + "loss": 2.7836, + "step": 26000 + }, + { + "epoch": 0.09, + "learning_rate": 4.5678078086034396e-05, + "loss": 2.781, + "step": 26100 + }, + { + "epoch": 0.09, + "learning_rate": 4.5661518998241426e-05, + "loss": 2.7913, + "step": 26200 + }, + { + "epoch": 0.09, + "learning_rate": 4.5644959910448456e-05, + "loss": 2.7712, + "step": 26300 + }, + { + "epoch": 0.09, + "learning_rate": 4.562840082265548e-05, + "loss": 2.7788, + "step": 26400 + }, + { + "epoch": 0.09, + "learning_rate": 4.561184173486251e-05, + "loss": 2.7796, + "step": 26500 + }, + { + "epoch": 0.09, + "learning_rate": 4.559528264706954e-05, + "loss": 2.7754, + "step": 26600 + }, + { + "epoch": 0.09, + "learning_rate": 4.557872355927657e-05, + "loss": 2.7809, + "step": 26700 + }, + { + "epoch": 0.09, + "learning_rate": 4.55621644714836e-05, + "loss": 2.793, + "step": 26800 + }, + { + "epoch": 0.09, + "learning_rate": 4.5545605383690624e-05, + "loss": 2.7834, + "step": 26900 + }, + { + "epoch": 0.09, + "learning_rate": 4.5529046295897655e-05, + "loss": 2.798, + "step": 27000 + }, + { + "epoch": 0.09, + "learning_rate": 4.551248720810468e-05, + "loss": 2.7781, + "step": 27100 + }, + { + "epoch": 0.09, + "learning_rate": 4.549592812031171e-05, + "loss": 2.7852, + "step": 27200 + }, + { + "epoch": 0.09, + "learning_rate": 4.547936903251874e-05, + "loss": 2.782, + "step": 27300 + }, + { + "epoch": 0.09, + "learning_rate": 4.546280994472576e-05, + "loss": 2.7856, + "step": 27400 + }, + { + "epoch": 0.09, + "learning_rate": 4.544625085693279e-05, + "loss": 2.7721, + "step": 27500 + }, + { + "epoch": 0.09, + "learning_rate": 4.542969176913982e-05, + "loss": 2.7839, + "step": 27600 + }, + { + "epoch": 0.09, + "learning_rate": 4.541313268134685e-05, + "loss": 2.7848, + "step": 27700 + }, + { + "epoch": 0.09, + "learning_rate": 4.5396573593553884e-05, + "loss": 2.7943, + "step": 27800 + }, + { + "epoch": 0.09, + "learning_rate": 4.538001450576091e-05, + "loss": 2.7878, + "step": 27900 + }, + { + "epoch": 0.09, + "learning_rate": 4.536345541796794e-05, + "loss": 2.7936, + "step": 28000 + }, + { + "epoch": 0.09, + "learning_rate": 4.534689633017497e-05, + "loss": 2.7804, + "step": 28100 + }, + { + "epoch": 0.09, + "learning_rate": 4.533033724238199e-05, + "loss": 2.7908, + "step": 28200 + }, + { + "epoch": 0.09, + "learning_rate": 4.531377815458902e-05, + "loss": 2.7857, + "step": 28300 + }, + { + "epoch": 0.09, + "learning_rate": 4.5297219066796045e-05, + "loss": 2.7779, + "step": 28400 + }, + { + "epoch": 0.09, + "learning_rate": 4.5280659979003075e-05, + "loss": 2.7891, + "step": 28500 + }, + { + "epoch": 0.09, + "learning_rate": 4.526410089121011e-05, + "loss": 2.7715, + "step": 28600 + }, + { + "epoch": 0.1, + "learning_rate": 4.5247541803417136e-05, + "loss": 2.7961, + "step": 28700 + }, + { + "epoch": 0.1, + "learning_rate": 4.5230982715624166e-05, + "loss": 2.781, + "step": 28800 + }, + { + "epoch": 0.1, + "learning_rate": 4.521442362783119e-05, + "loss": 2.7705, + "step": 28900 + }, + { + "epoch": 0.1, + "learning_rate": 4.519786454003822e-05, + "loss": 2.7888, + "step": 29000 + }, + { + "epoch": 0.1, + "learning_rate": 4.518130545224525e-05, + "loss": 2.7971, + "step": 29100 + }, + { + "epoch": 0.1, + "learning_rate": 4.5164746364452274e-05, + "loss": 2.7816, + "step": 29200 + }, + { + "epoch": 0.1, + "learning_rate": 4.5148187276659304e-05, + "loss": 2.7808, + "step": 29300 + }, + { + "epoch": 0.1, + "learning_rate": 4.5131628188866334e-05, + "loss": 2.7827, + "step": 29400 + }, + { + "epoch": 0.1, + "learning_rate": 4.511506910107336e-05, + "loss": 2.7661, + "step": 29500 + }, + { + "epoch": 0.1, + "learning_rate": 4.5098510013280395e-05, + "loss": 2.7774, + "step": 29600 + }, + { + "epoch": 0.1, + "learning_rate": 4.508195092548742e-05, + "loss": 2.7698, + "step": 29700 + }, + { + "epoch": 0.1, + "learning_rate": 4.506539183769445e-05, + "loss": 2.7824, + "step": 29800 + }, + { + "epoch": 0.1, + "learning_rate": 4.504883274990148e-05, + "loss": 2.7745, + "step": 29900 + }, + { + "epoch": 0.1, + "learning_rate": 4.50322736621085e-05, + "loss": 2.7787, + "step": 30000 + }, + { + "epoch": 0.1, + "learning_rate": 4.501571457431553e-05, + "loss": 2.7825, + "step": 30100 + }, + { + "epoch": 0.1, + "learning_rate": 4.4999155486522557e-05, + "loss": 2.7763, + "step": 30200 + }, + { + "epoch": 0.1, + "learning_rate": 4.498259639872959e-05, + "loss": 2.7907, + "step": 30300 + }, + { + "epoch": 0.1, + "learning_rate": 4.496603731093662e-05, + "loss": 2.7634, + "step": 30400 + }, + { + "epoch": 0.1, + "learning_rate": 4.494947822314364e-05, + "loss": 2.7782, + "step": 30500 + }, + { + "epoch": 0.1, + "learning_rate": 4.493291913535068e-05, + "loss": 2.7861, + "step": 30600 + }, + { + "epoch": 0.1, + "learning_rate": 4.49163600475577e-05, + "loss": 2.7676, + "step": 30700 + }, + { + "epoch": 0.1, + "learning_rate": 4.489980095976473e-05, + "loss": 2.7768, + "step": 30800 + }, + { + "epoch": 0.1, + "learning_rate": 4.488324187197176e-05, + "loss": 2.7853, + "step": 30900 + }, + { + "epoch": 0.1, + "learning_rate": 4.4866682784178785e-05, + "loss": 2.7915, + "step": 31000 + }, + { + "epoch": 0.1, + "learning_rate": 4.4850123696385816e-05, + "loss": 2.7747, + "step": 31100 + }, + { + "epoch": 0.1, + "learning_rate": 4.4833564608592846e-05, + "loss": 2.7749, + "step": 31200 + }, + { + "epoch": 0.1, + "learning_rate": 4.481700552079987e-05, + "loss": 2.7841, + "step": 31300 + }, + { + "epoch": 0.1, + "learning_rate": 4.48004464330069e-05, + "loss": 2.7805, + "step": 31400 + }, + { + "epoch": 0.1, + "learning_rate": 4.478388734521392e-05, + "loss": 2.7825, + "step": 31500 + }, + { + "epoch": 0.1, + "learning_rate": 4.476732825742096e-05, + "loss": 2.7719, + "step": 31600 + }, + { + "epoch": 0.1, + "learning_rate": 4.475076916962799e-05, + "loss": 2.7913, + "step": 31700 + }, + { + "epoch": 0.11, + "learning_rate": 4.4734210081835014e-05, + "loss": 2.7799, + "step": 31800 + }, + { + "epoch": 0.11, + "learning_rate": 4.4717650994042045e-05, + "loss": 2.7791, + "step": 31900 + }, + { + "epoch": 0.11, + "learning_rate": 4.470109190624907e-05, + "loss": 2.8031, + "step": 32000 + }, + { + "epoch": 0.11, + "learning_rate": 4.46845328184561e-05, + "loss": 2.7789, + "step": 32100 + }, + { + "epoch": 0.11, + "learning_rate": 4.466797373066313e-05, + "loss": 2.7778, + "step": 32200 + }, + { + "epoch": 0.11, + "learning_rate": 4.465141464287015e-05, + "loss": 2.7862, + "step": 32300 + }, + { + "epoch": 0.11, + "learning_rate": 4.463485555507718e-05, + "loss": 2.7664, + "step": 32400 + }, + { + "epoch": 0.11, + "learning_rate": 4.461829646728421e-05, + "loss": 2.7767, + "step": 32500 + }, + { + "epoch": 0.11, + "learning_rate": 4.460173737949124e-05, + "loss": 2.7814, + "step": 32600 + }, + { + "epoch": 0.11, + "learning_rate": 4.458517829169827e-05, + "loss": 2.7786, + "step": 32700 + }, + { + "epoch": 0.11, + "learning_rate": 4.45686192039053e-05, + "loss": 2.765, + "step": 32800 + }, + { + "epoch": 0.11, + "learning_rate": 4.455206011611233e-05, + "loss": 2.783, + "step": 32900 + }, + { + "epoch": 0.11, + "learning_rate": 4.453550102831935e-05, + "loss": 2.7747, + "step": 33000 + }, + { + "epoch": 0.11, + "learning_rate": 4.451894194052638e-05, + "loss": 2.7765, + "step": 33100 + }, + { + "epoch": 0.11, + "learning_rate": 4.450238285273341e-05, + "loss": 2.7934, + "step": 33200 + }, + { + "epoch": 0.11, + "learning_rate": 4.4485823764940435e-05, + "loss": 2.7947, + "step": 33300 + }, + { + "epoch": 0.11, + "learning_rate": 4.4469264677147465e-05, + "loss": 2.7732, + "step": 33400 + }, + { + "epoch": 0.11, + "learning_rate": 4.4452705589354495e-05, + "loss": 2.7886, + "step": 33500 + }, + { + "epoch": 0.11, + "learning_rate": 4.4436146501561526e-05, + "loss": 2.7673, + "step": 33600 + }, + { + "epoch": 0.11, + "learning_rate": 4.4419587413768556e-05, + "loss": 2.7789, + "step": 33700 + }, + { + "epoch": 0.11, + "learning_rate": 4.440302832597558e-05, + "loss": 2.7755, + "step": 33800 + }, + { + "epoch": 0.11, + "learning_rate": 4.438646923818261e-05, + "loss": 2.7756, + "step": 33900 + }, + { + "epoch": 0.11, + "learning_rate": 4.436991015038964e-05, + "loss": 2.7785, + "step": 34000 + }, + { + "epoch": 0.11, + "learning_rate": 4.4353351062596664e-05, + "loss": 2.7778, + "step": 34100 + }, + { + "epoch": 0.11, + "learning_rate": 4.4336791974803694e-05, + "loss": 2.7646, + "step": 34200 + }, + { + "epoch": 0.11, + "learning_rate": 4.432023288701072e-05, + "loss": 2.7771, + "step": 34300 + }, + { + "epoch": 0.11, + "learning_rate": 4.430367379921775e-05, + "loss": 2.7974, + "step": 34400 + }, + { + "epoch": 0.11, + "learning_rate": 4.428711471142478e-05, + "loss": 2.7814, + "step": 34500 + }, + { + "epoch": 0.11, + "learning_rate": 4.427055562363181e-05, + "loss": 2.7861, + "step": 34600 + }, + { + "epoch": 0.11, + "learning_rate": 4.425399653583884e-05, + "loss": 2.7909, + "step": 34700 + }, + { + "epoch": 0.12, + "learning_rate": 4.423743744804586e-05, + "loss": 2.7781, + "step": 34800 + }, + { + "epoch": 0.12, + "learning_rate": 4.422087836025289e-05, + "loss": 2.7919, + "step": 34900 + }, + { + "epoch": 0.12, + "learning_rate": 4.420431927245992e-05, + "loss": 2.7792, + "step": 35000 + }, + { + "epoch": 0.12, + "learning_rate": 4.4187760184666946e-05, + "loss": 2.7902, + "step": 35100 + }, + { + "epoch": 0.12, + "learning_rate": 4.417120109687398e-05, + "loss": 2.7809, + "step": 35200 + }, + { + "epoch": 0.12, + "learning_rate": 4.415464200908101e-05, + "loss": 2.7827, + "step": 35300 + }, + { + "epoch": 0.12, + "learning_rate": 4.413808292128803e-05, + "loss": 2.7782, + "step": 35400 + }, + { + "epoch": 0.12, + "learning_rate": 4.412152383349506e-05, + "loss": 2.7922, + "step": 35500 + }, + { + "epoch": 0.12, + "learning_rate": 4.410496474570209e-05, + "loss": 2.788, + "step": 35600 + }, + { + "epoch": 0.12, + "learning_rate": 4.408840565790912e-05, + "loss": 2.7745, + "step": 35700 + }, + { + "epoch": 0.12, + "learning_rate": 4.407184657011615e-05, + "loss": 2.7619, + "step": 35800 + }, + { + "epoch": 0.12, + "learning_rate": 4.4055287482323175e-05, + "loss": 2.7751, + "step": 35900 + }, + { + "epoch": 0.12, + "learning_rate": 4.4038728394530206e-05, + "loss": 2.781, + "step": 36000 + }, + { + "epoch": 0.12, + "learning_rate": 4.402216930673723e-05, + "loss": 2.7855, + "step": 36100 + }, + { + "epoch": 0.12, + "learning_rate": 4.400561021894426e-05, + "loss": 2.7878, + "step": 36200 + }, + { + "epoch": 0.12, + "learning_rate": 4.398905113115129e-05, + "loss": 2.7891, + "step": 36300 + }, + { + "epoch": 0.12, + "learning_rate": 4.397249204335831e-05, + "loss": 2.7766, + "step": 36400 + }, + { + "epoch": 0.12, + "learning_rate": 4.3955932955565343e-05, + "loss": 2.7772, + "step": 36500 + }, + { + "epoch": 0.12, + "learning_rate": 4.3939373867772374e-05, + "loss": 2.7807, + "step": 36600 + }, + { + "epoch": 0.12, + "learning_rate": 4.3922814779979404e-05, + "loss": 2.7763, + "step": 36700 + }, + { + "epoch": 0.12, + "learning_rate": 4.3906255692186434e-05, + "loss": 2.7632, + "step": 36800 + }, + { + "epoch": 0.12, + "learning_rate": 4.388969660439346e-05, + "loss": 2.7803, + "step": 36900 + }, + { + "epoch": 0.12, + "learning_rate": 4.387313751660049e-05, + "loss": 2.7679, + "step": 37000 + }, + { + "epoch": 0.12, + "learning_rate": 4.385657842880752e-05, + "loss": 2.7711, + "step": 37100 + }, + { + "epoch": 0.12, + "learning_rate": 4.384001934101454e-05, + "loss": 2.7788, + "step": 37200 + }, + { + "epoch": 0.12, + "learning_rate": 4.382346025322157e-05, + "loss": 2.7683, + "step": 37300 + }, + { + "epoch": 0.12, + "learning_rate": 4.3806901165428596e-05, + "loss": 2.7817, + "step": 37400 + }, + { + "epoch": 0.12, + "learning_rate": 4.3790342077635626e-05, + "loss": 2.7799, + "step": 37500 + }, + { + "epoch": 0.12, + "learning_rate": 4.377378298984266e-05, + "loss": 2.7864, + "step": 37600 + }, + { + "epoch": 0.12, + "learning_rate": 4.375722390204969e-05, + "loss": 2.7847, + "step": 37700 + }, + { + "epoch": 0.13, + "learning_rate": 4.374066481425672e-05, + "loss": 2.7812, + "step": 37800 + }, + { + "epoch": 0.13, + "learning_rate": 4.372410572646374e-05, + "loss": 2.7792, + "step": 37900 + }, + { + "epoch": 0.13, + "learning_rate": 4.370754663867077e-05, + "loss": 2.7858, + "step": 38000 + }, + { + "epoch": 0.13, + "learning_rate": 4.36909875508778e-05, + "loss": 2.7867, + "step": 38100 + }, + { + "epoch": 0.13, + "learning_rate": 4.3674428463084825e-05, + "loss": 2.7868, + "step": 38200 + }, + { + "epoch": 0.13, + "learning_rate": 4.3657869375291855e-05, + "loss": 2.7763, + "step": 38300 + }, + { + "epoch": 0.13, + "learning_rate": 4.3641310287498885e-05, + "loss": 2.7739, + "step": 38400 + }, + { + "epoch": 0.13, + "learning_rate": 4.362475119970591e-05, + "loss": 2.7827, + "step": 38500 + }, + { + "epoch": 0.13, + "learning_rate": 4.3608192111912946e-05, + "loss": 2.7661, + "step": 38600 + }, + { + "epoch": 0.13, + "learning_rate": 4.359163302411997e-05, + "loss": 2.7839, + "step": 38700 + }, + { + "epoch": 0.13, + "learning_rate": 4.3575073936327e-05, + "loss": 2.7827, + "step": 38800 + }, + { + "epoch": 0.13, + "learning_rate": 4.355851484853403e-05, + "loss": 2.7967, + "step": 38900 + }, + { + "epoch": 0.13, + "learning_rate": 4.3541955760741054e-05, + "loss": 2.78, + "step": 39000 + }, + { + "epoch": 0.13, + "learning_rate": 4.3525396672948084e-05, + "loss": 2.7883, + "step": 39100 + }, + { + "epoch": 0.13, + "learning_rate": 4.350883758515511e-05, + "loss": 2.7717, + "step": 39200 + }, + { + "epoch": 0.13, + "learning_rate": 4.349227849736214e-05, + "loss": 2.7651, + "step": 39300 + }, + { + "epoch": 0.13, + "learning_rate": 4.347571940956917e-05, + "loss": 2.7869, + "step": 39400 + }, + { + "epoch": 0.13, + "learning_rate": 4.345916032177619e-05, + "loss": 2.7739, + "step": 39500 + }, + { + "epoch": 0.13, + "learning_rate": 4.344260123398323e-05, + "loss": 2.7743, + "step": 39600 + }, + { + "epoch": 0.13, + "learning_rate": 4.342604214619025e-05, + "loss": 2.7709, + "step": 39700 + }, + { + "epoch": 0.13, + "learning_rate": 4.340948305839728e-05, + "loss": 2.7829, + "step": 39800 + }, + { + "epoch": 0.13, + "learning_rate": 4.339292397060431e-05, + "loss": 2.783, + "step": 39900 + }, + { + "epoch": 0.13, + "learning_rate": 4.3376364882811336e-05, + "loss": 2.7823, + "step": 40000 + }, + { + "epoch": 0.13, + "learning_rate": 4.3359805795018366e-05, + "loss": 2.7846, + "step": 40100 + }, + { + "epoch": 0.13, + "learning_rate": 4.334324670722539e-05, + "loss": 2.7777, + "step": 40200 + }, + { + "epoch": 0.13, + "learning_rate": 4.332668761943242e-05, + "loss": 2.7726, + "step": 40300 + }, + { + "epoch": 0.13, + "learning_rate": 4.331012853163945e-05, + "loss": 2.7785, + "step": 40400 + }, + { + "epoch": 0.13, + "learning_rate": 4.3293569443846474e-05, + "loss": 2.776, + "step": 40500 + }, + { + "epoch": 0.13, + "learning_rate": 4.327701035605351e-05, + "loss": 2.7743, + "step": 40600 + }, + { + "epoch": 0.13, + "learning_rate": 4.3260451268260535e-05, + "loss": 2.7747, + "step": 40700 + }, + { + "epoch": 0.14, + "learning_rate": 4.3243892180467565e-05, + "loss": 2.7821, + "step": 40800 + }, + { + "epoch": 0.14, + "learning_rate": 4.3227333092674595e-05, + "loss": 2.7702, + "step": 40900 + }, + { + "epoch": 0.14, + "learning_rate": 4.321077400488162e-05, + "loss": 2.7712, + "step": 41000 + }, + { + "epoch": 0.14, + "learning_rate": 4.319421491708865e-05, + "loss": 2.7613, + "step": 41100 + }, + { + "epoch": 0.14, + "learning_rate": 4.317765582929568e-05, + "loss": 2.769, + "step": 41200 + }, + { + "epoch": 0.14, + "learning_rate": 4.31610967415027e-05, + "loss": 2.7878, + "step": 41300 + }, + { + "epoch": 0.14, + "learning_rate": 4.314453765370973e-05, + "loss": 2.7684, + "step": 41400 + }, + { + "epoch": 0.14, + "learning_rate": 4.312797856591676e-05, + "loss": 2.7804, + "step": 41500 + }, + { + "epoch": 0.14, + "learning_rate": 4.3111419478123794e-05, + "loss": 2.7601, + "step": 41600 + }, + { + "epoch": 0.14, + "learning_rate": 4.3094860390330824e-05, + "loss": 2.7866, + "step": 41700 + }, + { + "epoch": 0.14, + "learning_rate": 4.307830130253785e-05, + "loss": 2.7717, + "step": 41800 + }, + { + "epoch": 0.14, + "learning_rate": 4.306174221474488e-05, + "loss": 2.7905, + "step": 41900 + }, + { + "epoch": 0.14, + "learning_rate": 4.30451831269519e-05, + "loss": 2.7613, + "step": 42000 + }, + { + "epoch": 0.14, + "learning_rate": 4.302862403915893e-05, + "loss": 2.7759, + "step": 42100 + }, + { + "epoch": 0.14, + "learning_rate": 4.301206495136596e-05, + "loss": 2.7852, + "step": 42200 + }, + { + "epoch": 0.14, + "learning_rate": 4.2995505863572986e-05, + "loss": 2.7731, + "step": 42300 + }, + { + "epoch": 0.14, + "learning_rate": 4.2978946775780016e-05, + "loss": 2.7889, + "step": 42400 + }, + { + "epoch": 0.14, + "learning_rate": 4.2962387687987046e-05, + "loss": 2.7963, + "step": 42500 + }, + { + "epoch": 0.14, + "learning_rate": 4.2945828600194077e-05, + "loss": 2.7962, + "step": 42600 + }, + { + "epoch": 0.14, + "learning_rate": 4.292926951240111e-05, + "loss": 2.7735, + "step": 42700 + }, + { + "epoch": 0.14, + "learning_rate": 4.291271042460813e-05, + "loss": 2.7668, + "step": 42800 + }, + { + "epoch": 0.14, + "learning_rate": 4.289615133681516e-05, + "loss": 2.7756, + "step": 42900 + }, + { + "epoch": 0.14, + "learning_rate": 4.287959224902219e-05, + "loss": 2.7854, + "step": 43000 + }, + { + "epoch": 0.14, + "learning_rate": 4.2863033161229214e-05, + "loss": 2.786, + "step": 43100 + }, + { + "epoch": 0.14, + "learning_rate": 4.2846474073436245e-05, + "loss": 2.7745, + "step": 43200 + }, + { + "epoch": 0.14, + "learning_rate": 4.282991498564327e-05, + "loss": 2.7814, + "step": 43300 + }, + { + "epoch": 0.14, + "learning_rate": 4.28133558978503e-05, + "loss": 2.7922, + "step": 43400 + }, + { + "epoch": 0.14, + "learning_rate": 4.279679681005733e-05, + "loss": 2.7818, + "step": 43500 + }, + { + "epoch": 0.14, + "learning_rate": 4.278023772226436e-05, + "loss": 2.7714, + "step": 43600 + }, + { + "epoch": 0.14, + "learning_rate": 4.276367863447139e-05, + "loss": 2.7854, + "step": 43700 + }, + { + "epoch": 0.15, + "learning_rate": 4.274711954667841e-05, + "loss": 2.7787, + "step": 43800 + }, + { + "epoch": 0.15, + "learning_rate": 4.273056045888544e-05, + "loss": 2.788, + "step": 43900 + }, + { + "epoch": 0.15, + "learning_rate": 4.2714001371092474e-05, + "loss": 2.7661, + "step": 44000 + }, + { + "epoch": 0.15, + "learning_rate": 4.26974422832995e-05, + "loss": 2.7924, + "step": 44100 + }, + { + "epoch": 0.15, + "learning_rate": 4.268088319550653e-05, + "loss": 2.788, + "step": 44200 + }, + { + "epoch": 0.15, + "learning_rate": 4.266432410771356e-05, + "loss": 2.7842, + "step": 44300 + }, + { + "epoch": 0.15, + "learning_rate": 4.264776501992058e-05, + "loss": 2.7806, + "step": 44400 + }, + { + "epoch": 0.15, + "learning_rate": 4.263120593212761e-05, + "loss": 2.7746, + "step": 44500 + }, + { + "epoch": 0.15, + "learning_rate": 4.261464684433464e-05, + "loss": 2.788, + "step": 44600 + }, + { + "epoch": 0.15, + "learning_rate": 4.259808775654167e-05, + "loss": 2.7874, + "step": 44700 + }, + { + "epoch": 0.15, + "learning_rate": 4.25815286687487e-05, + "loss": 2.7711, + "step": 44800 + }, + { + "epoch": 0.15, + "learning_rate": 4.2564969580955726e-05, + "loss": 2.7841, + "step": 44900 + }, + { + "epoch": 0.15, + "learning_rate": 4.2548410493162756e-05, + "loss": 2.7632, + "step": 45000 + }, + { + "epoch": 0.15, + "learning_rate": 4.253185140536978e-05, + "loss": 2.7884, + "step": 45100 + }, + { + "epoch": 0.15, + "learning_rate": 4.251529231757681e-05, + "loss": 2.7688, + "step": 45200 + }, + { + "epoch": 0.15, + "learning_rate": 4.249873322978384e-05, + "loss": 2.7905, + "step": 45300 + }, + { + "epoch": 0.15, + "learning_rate": 4.2482174141990864e-05, + "loss": 2.7803, + "step": 45400 + }, + { + "epoch": 0.15, + "learning_rate": 4.2465615054197894e-05, + "loss": 2.7682, + "step": 45500 + }, + { + "epoch": 0.15, + "learning_rate": 4.2449055966404925e-05, + "loss": 2.7746, + "step": 45600 + }, + { + "epoch": 0.15, + "learning_rate": 4.2432496878611955e-05, + "loss": 2.7714, + "step": 45700 + }, + { + "epoch": 0.15, + "learning_rate": 4.2415937790818985e-05, + "loss": 2.7784, + "step": 45800 + }, + { + "epoch": 0.15, + "learning_rate": 4.239937870302601e-05, + "loss": 2.7796, + "step": 45900 + }, + { + "epoch": 0.15, + "learning_rate": 4.238281961523304e-05, + "loss": 2.768, + "step": 46000 + }, + { + "epoch": 0.15, + "learning_rate": 4.236626052744007e-05, + "loss": 2.787, + "step": 46100 + }, + { + "epoch": 0.15, + "learning_rate": 4.234970143964709e-05, + "loss": 2.7978, + "step": 46200 + }, + { + "epoch": 0.15, + "learning_rate": 4.233314235185412e-05, + "loss": 2.7752, + "step": 46300 + }, + { + "epoch": 0.15, + "learning_rate": 4.2316583264061147e-05, + "loss": 2.766, + "step": 46400 + }, + { + "epoch": 0.15, + "learning_rate": 4.230002417626818e-05, + "loss": 2.7893, + "step": 46500 + }, + { + "epoch": 0.15, + "learning_rate": 4.228346508847521e-05, + "loss": 2.778, + "step": 46600 + }, + { + "epoch": 0.15, + "learning_rate": 4.226690600068224e-05, + "loss": 2.7681, + "step": 46700 + }, + { + "epoch": 0.15, + "learning_rate": 4.225034691288927e-05, + "loss": 2.7622, + "step": 46800 + }, + { + "epoch": 0.16, + "learning_rate": 4.223378782509629e-05, + "loss": 2.7893, + "step": 46900 + }, + { + "epoch": 0.16, + "learning_rate": 4.221722873730332e-05, + "loss": 2.8002, + "step": 47000 + }, + { + "epoch": 0.16, + "learning_rate": 4.220066964951035e-05, + "loss": 2.7741, + "step": 47100 + }, + { + "epoch": 0.16, + "learning_rate": 4.2184110561717375e-05, + "loss": 2.7814, + "step": 47200 + }, + { + "epoch": 0.16, + "learning_rate": 4.2167551473924406e-05, + "loss": 2.7686, + "step": 47300 + }, + { + "epoch": 0.16, + "learning_rate": 4.215099238613143e-05, + "loss": 2.7809, + "step": 47400 + }, + { + "epoch": 0.16, + "learning_rate": 4.213443329833846e-05, + "loss": 2.7718, + "step": 47500 + }, + { + "epoch": 0.16, + "learning_rate": 4.21178742105455e-05, + "loss": 2.764, + "step": 47600 + }, + { + "epoch": 0.16, + "learning_rate": 4.210131512275252e-05, + "loss": 2.7808, + "step": 47700 + }, + { + "epoch": 0.16, + "learning_rate": 4.208475603495955e-05, + "loss": 2.7696, + "step": 47800 + }, + { + "epoch": 0.16, + "learning_rate": 4.2068196947166574e-05, + "loss": 2.7685, + "step": 47900 + }, + { + "epoch": 0.16, + "learning_rate": 4.2051637859373604e-05, + "loss": 2.773, + "step": 48000 + }, + { + "epoch": 0.16, + "learning_rate": 4.2035078771580635e-05, + "loss": 2.7705, + "step": 48100 + }, + { + "epoch": 0.16, + "learning_rate": 4.201851968378766e-05, + "loss": 2.7686, + "step": 48200 + }, + { + "epoch": 0.16, + "learning_rate": 4.200196059599469e-05, + "loss": 2.7807, + "step": 48300 + }, + { + "epoch": 0.16, + "learning_rate": 4.198540150820172e-05, + "loss": 2.7745, + "step": 48400 + }, + { + "epoch": 0.16, + "learning_rate": 4.196884242040874e-05, + "loss": 2.7956, + "step": 48500 + }, + { + "epoch": 0.16, + "learning_rate": 4.195228333261578e-05, + "loss": 2.7742, + "step": 48600 + }, + { + "epoch": 0.16, + "learning_rate": 4.19357242448228e-05, + "loss": 2.7747, + "step": 48700 + }, + { + "epoch": 0.16, + "learning_rate": 4.191916515702983e-05, + "loss": 2.7816, + "step": 48800 + }, + { + "epoch": 0.16, + "learning_rate": 4.1902606069236863e-05, + "loss": 2.7863, + "step": 48900 + }, + { + "epoch": 0.16, + "learning_rate": 4.188604698144389e-05, + "loss": 2.7786, + "step": 49000 + }, + { + "epoch": 0.16, + "learning_rate": 4.186948789365092e-05, + "loss": 2.775, + "step": 49100 + }, + { + "epoch": 0.16, + "learning_rate": 4.185292880585794e-05, + "loss": 2.7834, + "step": 49200 + }, + { + "epoch": 0.16, + "learning_rate": 4.183636971806497e-05, + "loss": 2.7661, + "step": 49300 + }, + { + "epoch": 0.16, + "learning_rate": 4.1819810630272e-05, + "loss": 2.7726, + "step": 49400 + }, + { + "epoch": 0.16, + "learning_rate": 4.1803251542479025e-05, + "loss": 2.7777, + "step": 49500 + }, + { + "epoch": 0.16, + "learning_rate": 4.178669245468606e-05, + "loss": 2.7784, + "step": 49600 + }, + { + "epoch": 0.16, + "learning_rate": 4.1770133366893085e-05, + "loss": 2.7934, + "step": 49700 + }, + { + "epoch": 0.16, + "learning_rate": 4.1753574279100116e-05, + "loss": 2.7814, + "step": 49800 + }, + { + "epoch": 0.17, + "learning_rate": 4.1737015191307146e-05, + "loss": 2.7681, + "step": 49900 + }, + { + "epoch": 0.17, + "learning_rate": 4.172045610351417e-05, + "loss": 2.7667, + "step": 50000 + }, + { + "epoch": 0.17, + "learning_rate": 4.17038970157212e-05, + "loss": 2.783, + "step": 50100 + }, + { + "epoch": 0.17, + "learning_rate": 4.168733792792823e-05, + "loss": 2.767, + "step": 50200 + }, + { + "epoch": 0.17, + "learning_rate": 4.1670778840135254e-05, + "loss": 2.7718, + "step": 50300 + }, + { + "epoch": 0.17, + "learning_rate": 4.1654219752342284e-05, + "loss": 2.7742, + "step": 50400 + }, + { + "epoch": 0.17, + "learning_rate": 4.163766066454931e-05, + "loss": 2.7618, + "step": 50500 + }, + { + "epoch": 0.17, + "learning_rate": 4.1621101576756345e-05, + "loss": 2.7596, + "step": 50600 + }, + { + "epoch": 0.17, + "learning_rate": 4.1604542488963375e-05, + "loss": 2.7777, + "step": 50700 + }, + { + "epoch": 0.17, + "learning_rate": 4.15879834011704e-05, + "loss": 2.7843, + "step": 50800 + }, + { + "epoch": 0.17, + "learning_rate": 4.157142431337743e-05, + "loss": 2.7709, + "step": 50900 + }, + { + "epoch": 0.17, + "learning_rate": 4.155486522558445e-05, + "loss": 2.7638, + "step": 51000 + }, + { + "epoch": 0.17, + "learning_rate": 4.153830613779148e-05, + "loss": 2.7725, + "step": 51100 + }, + { + "epoch": 0.17, + "learning_rate": 4.152174704999851e-05, + "loss": 2.7636, + "step": 51200 + }, + { + "epoch": 0.17, + "learning_rate": 4.1505187962205536e-05, + "loss": 2.7787, + "step": 51300 + }, + { + "epoch": 0.17, + "learning_rate": 4.148862887441257e-05, + "loss": 2.7779, + "step": 51400 + }, + { + "epoch": 0.17, + "learning_rate": 4.14720697866196e-05, + "loss": 2.7758, + "step": 51500 + }, + { + "epoch": 0.17, + "learning_rate": 4.145551069882663e-05, + "loss": 2.7652, + "step": 51600 + }, + { + "epoch": 0.17, + "learning_rate": 4.143895161103366e-05, + "loss": 2.7841, + "step": 51700 + }, + { + "epoch": 0.17, + "learning_rate": 4.142239252324068e-05, + "loss": 2.7681, + "step": 51800 + }, + { + "epoch": 0.17, + "learning_rate": 4.140583343544771e-05, + "loss": 2.7802, + "step": 51900 + }, + { + "epoch": 0.17, + "learning_rate": 4.138927434765474e-05, + "loss": 2.7882, + "step": 52000 + }, + { + "epoch": 0.17, + "learning_rate": 4.1372715259861765e-05, + "loss": 2.781, + "step": 52100 + }, + { + "epoch": 0.17, + "learning_rate": 4.1356156172068796e-05, + "loss": 2.7906, + "step": 52200 + }, + { + "epoch": 0.17, + "learning_rate": 4.133959708427582e-05, + "loss": 2.7847, + "step": 52300 + }, + { + "epoch": 0.17, + "learning_rate": 4.132303799648285e-05, + "loss": 2.7832, + "step": 52400 + }, + { + "epoch": 0.17, + "learning_rate": 4.130647890868988e-05, + "loss": 2.7637, + "step": 52500 + }, + { + "epoch": 0.17, + "learning_rate": 4.128991982089691e-05, + "loss": 2.7748, + "step": 52600 + }, + { + "epoch": 0.17, + "learning_rate": 4.127336073310394e-05, + "loss": 2.7669, + "step": 52700 + }, + { + "epoch": 0.17, + "learning_rate": 4.1256801645310964e-05, + "loss": 2.7726, + "step": 52800 + }, + { + "epoch": 0.18, + "learning_rate": 4.1240242557517994e-05, + "loss": 2.7811, + "step": 52900 + }, + { + "epoch": 0.18, + "learning_rate": 4.1223683469725024e-05, + "loss": 2.7769, + "step": 53000 + }, + { + "epoch": 0.18, + "learning_rate": 4.120712438193205e-05, + "loss": 2.7668, + "step": 53100 + }, + { + "epoch": 0.18, + "learning_rate": 4.119056529413908e-05, + "loss": 2.786, + "step": 53200 + }, + { + "epoch": 0.18, + "learning_rate": 4.117400620634611e-05, + "loss": 2.7671, + "step": 53300 + }, + { + "epoch": 0.18, + "learning_rate": 4.115744711855313e-05, + "loss": 2.7594, + "step": 53400 + }, + { + "epoch": 0.18, + "learning_rate": 4.114088803076016e-05, + "loss": 2.7735, + "step": 53500 + }, + { + "epoch": 0.18, + "learning_rate": 4.112432894296719e-05, + "loss": 2.7822, + "step": 53600 + }, + { + "epoch": 0.18, + "learning_rate": 4.110776985517422e-05, + "loss": 2.7798, + "step": 53700 + }, + { + "epoch": 0.18, + "learning_rate": 4.1091210767381246e-05, + "loss": 2.7783, + "step": 53800 + }, + { + "epoch": 0.18, + "learning_rate": 4.107465167958828e-05, + "loss": 2.7799, + "step": 53900 + }, + { + "epoch": 0.18, + "learning_rate": 4.105809259179531e-05, + "loss": 2.7772, + "step": 54000 + }, + { + "epoch": 0.18, + "learning_rate": 4.104153350400233e-05, + "loss": 2.7765, + "step": 54100 + }, + { + "epoch": 0.18, + "learning_rate": 4.102497441620936e-05, + "loss": 2.7779, + "step": 54200 + }, + { + "epoch": 0.18, + "learning_rate": 4.100841532841639e-05, + "loss": 2.7596, + "step": 54300 + }, + { + "epoch": 0.18, + "learning_rate": 4.0991856240623415e-05, + "loss": 2.7687, + "step": 54400 + }, + { + "epoch": 0.18, + "learning_rate": 4.0975297152830445e-05, + "loss": 2.7869, + "step": 54500 + }, + { + "epoch": 0.18, + "learning_rate": 4.0958738065037475e-05, + "loss": 2.7692, + "step": 54600 + }, + { + "epoch": 0.18, + "learning_rate": 4.0942178977244506e-05, + "loss": 2.7662, + "step": 54700 + }, + { + "epoch": 0.18, + "learning_rate": 4.0925619889451536e-05, + "loss": 2.7748, + "step": 54800 + }, + { + "epoch": 0.18, + "learning_rate": 4.090906080165856e-05, + "loss": 2.7703, + "step": 54900 + }, + { + "epoch": 0.18, + "learning_rate": 4.089250171386559e-05, + "loss": 2.7659, + "step": 55000 + }, + { + "epoch": 0.18, + "learning_rate": 4.087594262607261e-05, + "loss": 2.7831, + "step": 55100 + }, + { + "epoch": 0.18, + "learning_rate": 4.0859383538279644e-05, + "loss": 2.7628, + "step": 55200 + }, + { + "epoch": 0.18, + "learning_rate": 4.0842824450486674e-05, + "loss": 2.7835, + "step": 55300 + }, + { + "epoch": 0.18, + "learning_rate": 4.08262653626937e-05, + "loss": 2.7772, + "step": 55400 + }, + { + "epoch": 0.18, + "learning_rate": 4.080970627490073e-05, + "loss": 2.7799, + "step": 55500 + }, + { + "epoch": 0.18, + "learning_rate": 4.079314718710776e-05, + "loss": 2.8003, + "step": 55600 + }, + { + "epoch": 0.18, + "learning_rate": 4.077658809931479e-05, + "loss": 2.7896, + "step": 55700 + }, + { + "epoch": 0.18, + "learning_rate": 4.076002901152182e-05, + "loss": 2.7685, + "step": 55800 + }, + { + "epoch": 0.19, + "learning_rate": 4.074346992372884e-05, + "loss": 2.7851, + "step": 55900 + }, + { + "epoch": 0.19, + "learning_rate": 4.072691083593587e-05, + "loss": 2.78, + "step": 56000 + }, + { + "epoch": 0.19, + "learning_rate": 4.07103517481429e-05, + "loss": 2.7783, + "step": 56100 + }, + { + "epoch": 0.19, + "learning_rate": 4.0693792660349926e-05, + "loss": 2.7738, + "step": 56200 + }, + { + "epoch": 0.19, + "learning_rate": 4.0677233572556957e-05, + "loss": 2.7777, + "step": 56300 + }, + { + "epoch": 0.19, + "learning_rate": 4.066067448476398e-05, + "loss": 2.7716, + "step": 56400 + }, + { + "epoch": 0.19, + "learning_rate": 4.064411539697101e-05, + "loss": 2.7767, + "step": 56500 + }, + { + "epoch": 0.19, + "learning_rate": 4.062755630917805e-05, + "loss": 2.7845, + "step": 56600 + }, + { + "epoch": 0.19, + "learning_rate": 4.061099722138507e-05, + "loss": 2.7696, + "step": 56700 + }, + { + "epoch": 0.19, + "learning_rate": 4.05944381335921e-05, + "loss": 2.7712, + "step": 56800 + }, + { + "epoch": 0.19, + "learning_rate": 4.0577879045799125e-05, + "loss": 2.7781, + "step": 56900 + }, + { + "epoch": 0.19, + "learning_rate": 4.0561319958006155e-05, + "loss": 2.7841, + "step": 57000 + }, + { + "epoch": 0.19, + "learning_rate": 4.0544760870213185e-05, + "loss": 2.7655, + "step": 57100 + }, + { + "epoch": 0.19, + "learning_rate": 4.052820178242021e-05, + "loss": 2.7694, + "step": 57200 + }, + { + "epoch": 0.19, + "learning_rate": 4.051164269462724e-05, + "loss": 2.7764, + "step": 57300 + }, + { + "epoch": 0.19, + "learning_rate": 4.049508360683427e-05, + "loss": 2.7616, + "step": 57400 + }, + { + "epoch": 0.19, + "learning_rate": 4.047852451904129e-05, + "loss": 2.7696, + "step": 57500 + }, + { + "epoch": 0.19, + "learning_rate": 4.046196543124833e-05, + "loss": 2.7623, + "step": 57600 + }, + { + "epoch": 0.19, + "learning_rate": 4.0445406343455354e-05, + "loss": 2.7686, + "step": 57700 + }, + { + "epoch": 0.19, + "learning_rate": 4.0428847255662384e-05, + "loss": 2.7781, + "step": 57800 + }, + { + "epoch": 0.19, + "learning_rate": 4.0412288167869414e-05, + "loss": 2.7686, + "step": 57900 + }, + { + "epoch": 0.19, + "learning_rate": 4.039572908007644e-05, + "loss": 2.7831, + "step": 58000 + }, + { + "epoch": 0.19, + "learning_rate": 4.037916999228347e-05, + "loss": 2.7731, + "step": 58100 + }, + { + "epoch": 0.19, + "learning_rate": 4.036261090449049e-05, + "loss": 2.7776, + "step": 58200 + }, + { + "epoch": 0.19, + "learning_rate": 4.034605181669752e-05, + "loss": 2.7802, + "step": 58300 + }, + { + "epoch": 0.19, + "learning_rate": 4.032949272890455e-05, + "loss": 2.7728, + "step": 58400 + }, + { + "epoch": 0.19, + "learning_rate": 4.0312933641111576e-05, + "loss": 2.761, + "step": 58500 + }, + { + "epoch": 0.19, + "learning_rate": 4.029637455331861e-05, + "loss": 2.7785, + "step": 58600 + }, + { + "epoch": 0.19, + "learning_rate": 4.0279815465525636e-05, + "loss": 2.7761, + "step": 58700 + }, + { + "epoch": 0.19, + "learning_rate": 4.0263256377732667e-05, + "loss": 2.76, + "step": 58800 + }, + { + "epoch": 0.2, + "learning_rate": 4.02466972899397e-05, + "loss": 2.7783, + "step": 58900 + }, + { + "epoch": 0.2, + "learning_rate": 4.023013820214672e-05, + "loss": 2.7653, + "step": 59000 + }, + { + "epoch": 0.2, + "learning_rate": 4.021357911435375e-05, + "loss": 2.7826, + "step": 59100 + }, + { + "epoch": 0.2, + "learning_rate": 4.019702002656078e-05, + "loss": 2.7748, + "step": 59200 + }, + { + "epoch": 0.2, + "learning_rate": 4.0180460938767805e-05, + "loss": 2.7869, + "step": 59300 + }, + { + "epoch": 0.2, + "learning_rate": 4.0163901850974835e-05, + "loss": 2.7672, + "step": 59400 + }, + { + "epoch": 0.2, + "learning_rate": 4.014734276318186e-05, + "loss": 2.7845, + "step": 59500 + }, + { + "epoch": 0.2, + "learning_rate": 4.0130783675388895e-05, + "loss": 2.7661, + "step": 59600 + }, + { + "epoch": 0.2, + "learning_rate": 4.011422458759592e-05, + "loss": 2.7639, + "step": 59700 + }, + { + "epoch": 0.2, + "learning_rate": 4.009766549980295e-05, + "loss": 2.7762, + "step": 59800 + }, + { + "epoch": 0.2, + "learning_rate": 4.008110641200998e-05, + "loss": 2.7801, + "step": 59900 + }, + { + "epoch": 0.2, + "learning_rate": 4.0064547324217e-05, + "loss": 2.7627, + "step": 60000 + }, + { + "epoch": 0.2, + "learning_rate": 4.004798823642403e-05, + "loss": 2.7793, + "step": 60100 + }, + { + "epoch": 0.2, + "learning_rate": 4.0031429148631064e-05, + "loss": 2.7856, + "step": 60200 + }, + { + "epoch": 0.2, + "learning_rate": 4.001487006083809e-05, + "loss": 2.784, + "step": 60300 + }, + { + "epoch": 0.2, + "learning_rate": 3.999831097304512e-05, + "loss": 2.7667, + "step": 60400 + }, + { + "epoch": 0.2, + "learning_rate": 3.998175188525215e-05, + "loss": 2.777, + "step": 60500 + }, + { + "epoch": 0.2, + "learning_rate": 3.996519279745918e-05, + "loss": 2.7757, + "step": 60600 + }, + { + "epoch": 0.2, + "learning_rate": 3.994863370966621e-05, + "loss": 2.7821, + "step": 60700 + }, + { + "epoch": 0.2, + "learning_rate": 3.993207462187323e-05, + "loss": 2.7787, + "step": 60800 + }, + { + "epoch": 0.2, + "learning_rate": 3.991551553408026e-05, + "loss": 2.7794, + "step": 60900 + }, + { + "epoch": 0.2, + "learning_rate": 3.9898956446287286e-05, + "loss": 2.7627, + "step": 61000 + }, + { + "epoch": 0.2, + "learning_rate": 3.9882397358494316e-05, + "loss": 2.7562, + "step": 61100 + }, + { + "epoch": 0.2, + "learning_rate": 3.9865838270701346e-05, + "loss": 2.7877, + "step": 61200 + }, + { + "epoch": 0.2, + "learning_rate": 3.984927918290837e-05, + "loss": 2.7775, + "step": 61300 + }, + { + "epoch": 0.2, + "learning_rate": 3.98327200951154e-05, + "loss": 2.7629, + "step": 61400 + }, + { + "epoch": 0.2, + "learning_rate": 3.981616100732243e-05, + "loss": 2.7684, + "step": 61500 + }, + { + "epoch": 0.2, + "learning_rate": 3.979960191952946e-05, + "loss": 2.7833, + "step": 61600 + }, + { + "epoch": 0.2, + "learning_rate": 3.978304283173649e-05, + "loss": 2.7686, + "step": 61700 + }, + { + "epoch": 0.2, + "learning_rate": 3.9766483743943515e-05, + "loss": 2.7748, + "step": 61800 + }, + { + "epoch": 0.21, + "learning_rate": 3.9749924656150545e-05, + "loss": 2.7687, + "step": 61900 + }, + { + "epoch": 0.21, + "learning_rate": 3.9733365568357575e-05, + "loss": 2.7628, + "step": 62000 + }, + { + "epoch": 0.21, + "learning_rate": 3.97168064805646e-05, + "loss": 2.7854, + "step": 62100 + }, + { + "epoch": 0.21, + "learning_rate": 3.970024739277163e-05, + "loss": 2.7701, + "step": 62200 + }, + { + "epoch": 0.21, + "learning_rate": 3.968368830497865e-05, + "loss": 2.7823, + "step": 62300 + }, + { + "epoch": 0.21, + "learning_rate": 3.966712921718568e-05, + "loss": 2.7639, + "step": 62400 + }, + { + "epoch": 0.21, + "learning_rate": 3.965057012939271e-05, + "loss": 2.7754, + "step": 62500 + }, + { + "epoch": 0.21, + "learning_rate": 3.9634011041599743e-05, + "loss": 2.7737, + "step": 62600 + }, + { + "epoch": 0.21, + "learning_rate": 3.9617451953806774e-05, + "loss": 2.7724, + "step": 62700 + }, + { + "epoch": 0.21, + "learning_rate": 3.96008928660138e-05, + "loss": 2.7747, + "step": 62800 + }, + { + "epoch": 0.21, + "learning_rate": 3.958433377822083e-05, + "loss": 2.7731, + "step": 62900 + }, + { + "epoch": 0.21, + "learning_rate": 3.956777469042786e-05, + "loss": 2.7666, + "step": 63000 + }, + { + "epoch": 0.21, + "learning_rate": 3.955121560263488e-05, + "loss": 2.7722, + "step": 63100 + }, + { + "epoch": 0.21, + "learning_rate": 3.953465651484191e-05, + "loss": 2.7725, + "step": 63200 + }, + { + "epoch": 0.21, + "learning_rate": 3.951809742704894e-05, + "loss": 2.7758, + "step": 63300 + }, + { + "epoch": 0.21, + "learning_rate": 3.9501538339255965e-05, + "loss": 2.7809, + "step": 63400 + }, + { + "epoch": 0.21, + "learning_rate": 3.9484979251462996e-05, + "loss": 2.785, + "step": 63500 + }, + { + "epoch": 0.21, + "learning_rate": 3.9468420163670026e-05, + "loss": 2.7749, + "step": 63600 + }, + { + "epoch": 0.21, + "learning_rate": 3.9451861075877056e-05, + "loss": 2.7759, + "step": 63700 + }, + { + "epoch": 0.21, + "learning_rate": 3.943530198808409e-05, + "loss": 2.7773, + "step": 63800 + }, + { + "epoch": 0.21, + "learning_rate": 3.941874290029111e-05, + "loss": 2.7561, + "step": 63900 + }, + { + "epoch": 0.21, + "learning_rate": 3.940218381249814e-05, + "loss": 2.7739, + "step": 64000 + }, + { + "epoch": 0.21, + "learning_rate": 3.9385624724705164e-05, + "loss": 2.7602, + "step": 64100 + }, + { + "epoch": 0.21, + "learning_rate": 3.9369065636912194e-05, + "loss": 2.7807, + "step": 64200 + }, + { + "epoch": 0.21, + "learning_rate": 3.9352506549119225e-05, + "loss": 2.7672, + "step": 64300 + }, + { + "epoch": 0.21, + "learning_rate": 3.933594746132625e-05, + "loss": 2.7706, + "step": 64400 + }, + { + "epoch": 0.21, + "learning_rate": 3.931938837353328e-05, + "loss": 2.7772, + "step": 64500 + }, + { + "epoch": 0.21, + "learning_rate": 3.930282928574031e-05, + "loss": 2.7707, + "step": 64600 + }, + { + "epoch": 0.21, + "learning_rate": 3.928627019794734e-05, + "loss": 2.7742, + "step": 64700 + }, + { + "epoch": 0.21, + "learning_rate": 3.926971111015437e-05, + "loss": 2.7632, + "step": 64800 + }, + { + "epoch": 0.21, + "learning_rate": 3.925315202236139e-05, + "loss": 2.7765, + "step": 64900 + }, + { + "epoch": 0.22, + "learning_rate": 3.923659293456842e-05, + "loss": 2.7612, + "step": 65000 + }, + { + "epoch": 0.22, + "learning_rate": 3.9220033846775453e-05, + "loss": 2.7697, + "step": 65100 + }, + { + "epoch": 0.22, + "learning_rate": 3.920347475898248e-05, + "loss": 2.7762, + "step": 65200 + }, + { + "epoch": 0.22, + "learning_rate": 3.918691567118951e-05, + "loss": 2.7635, + "step": 65300 + }, + { + "epoch": 0.22, + "learning_rate": 3.917035658339653e-05, + "loss": 2.7821, + "step": 65400 + }, + { + "epoch": 0.22, + "learning_rate": 3.915379749560356e-05, + "loss": 2.7695, + "step": 65500 + }, + { + "epoch": 0.22, + "learning_rate": 3.913723840781059e-05, + "loss": 2.7734, + "step": 65600 + }, + { + "epoch": 0.22, + "learning_rate": 3.912067932001762e-05, + "loss": 2.7684, + "step": 65700 + }, + { + "epoch": 0.22, + "learning_rate": 3.910412023222465e-05, + "loss": 2.7958, + "step": 65800 + }, + { + "epoch": 0.22, + "learning_rate": 3.9087561144431676e-05, + "loss": 2.7738, + "step": 65900 + }, + { + "epoch": 0.22, + "learning_rate": 3.9071002056638706e-05, + "loss": 2.7866, + "step": 66000 + }, + { + "epoch": 0.22, + "learning_rate": 3.9054442968845736e-05, + "loss": 2.7766, + "step": 66100 + }, + { + "epoch": 0.22, + "learning_rate": 3.903788388105276e-05, + "loss": 2.7637, + "step": 66200 + }, + { + "epoch": 0.22, + "learning_rate": 3.902132479325979e-05, + "loss": 2.7692, + "step": 66300 + }, + { + "epoch": 0.22, + "learning_rate": 3.900476570546682e-05, + "loss": 2.7877, + "step": 66400 + }, + { + "epoch": 0.22, + "learning_rate": 3.8988206617673844e-05, + "loss": 2.778, + "step": 66500 + }, + { + "epoch": 0.22, + "learning_rate": 3.897164752988088e-05, + "loss": 2.767, + "step": 66600 + }, + { + "epoch": 0.22, + "learning_rate": 3.8955088442087904e-05, + "loss": 2.7792, + "step": 66700 + }, + { + "epoch": 0.22, + "learning_rate": 3.8938529354294935e-05, + "loss": 2.7619, + "step": 66800 + }, + { + "epoch": 0.22, + "learning_rate": 3.892197026650196e-05, + "loss": 2.7858, + "step": 66900 + }, + { + "epoch": 0.22, + "learning_rate": 3.890541117870899e-05, + "loss": 2.7774, + "step": 67000 + }, + { + "epoch": 0.22, + "learning_rate": 3.888885209091602e-05, + "loss": 2.78, + "step": 67100 + }, + { + "epoch": 0.22, + "learning_rate": 3.887229300312304e-05, + "loss": 2.77, + "step": 67200 + }, + { + "epoch": 0.22, + "learning_rate": 3.885573391533007e-05, + "loss": 2.7834, + "step": 67300 + }, + { + "epoch": 0.22, + "learning_rate": 3.88391748275371e-05, + "loss": 2.7817, + "step": 67400 + }, + { + "epoch": 0.22, + "learning_rate": 3.8822615739744126e-05, + "loss": 2.7656, + "step": 67500 + }, + { + "epoch": 0.22, + "learning_rate": 3.8806056651951164e-05, + "loss": 2.7873, + "step": 67600 + }, + { + "epoch": 0.22, + "learning_rate": 3.878949756415819e-05, + "loss": 2.7568, + "step": 67700 + }, + { + "epoch": 0.22, + "learning_rate": 3.877293847636522e-05, + "loss": 2.7832, + "step": 67800 + }, + { + "epoch": 0.22, + "learning_rate": 3.875637938857225e-05, + "loss": 2.7802, + "step": 67900 + }, + { + "epoch": 0.23, + "learning_rate": 3.873982030077927e-05, + "loss": 2.7669, + "step": 68000 + }, + { + "epoch": 0.23, + "learning_rate": 3.87232612129863e-05, + "loss": 2.7748, + "step": 68100 + }, + { + "epoch": 0.23, + "learning_rate": 3.8706702125193325e-05, + "loss": 2.767, + "step": 68200 + }, + { + "epoch": 0.23, + "learning_rate": 3.8690143037400355e-05, + "loss": 2.7727, + "step": 68300 + }, + { + "epoch": 0.23, + "learning_rate": 3.8673583949607386e-05, + "loss": 2.7725, + "step": 68400 + }, + { + "epoch": 0.23, + "learning_rate": 3.865702486181441e-05, + "loss": 2.7773, + "step": 68500 + }, + { + "epoch": 0.23, + "learning_rate": 3.8640465774021446e-05, + "loss": 2.7834, + "step": 68600 + }, + { + "epoch": 0.23, + "learning_rate": 3.862390668622847e-05, + "loss": 2.7777, + "step": 68700 + }, + { + "epoch": 0.23, + "learning_rate": 3.86073475984355e-05, + "loss": 2.7757, + "step": 68800 + }, + { + "epoch": 0.23, + "learning_rate": 3.859078851064253e-05, + "loss": 2.7624, + "step": 68900 + }, + { + "epoch": 0.23, + "learning_rate": 3.8574229422849554e-05, + "loss": 2.7765, + "step": 69000 + }, + { + "epoch": 0.23, + "learning_rate": 3.8557670335056584e-05, + "loss": 2.7782, + "step": 69100 + }, + { + "epoch": 0.23, + "learning_rate": 3.8541111247263614e-05, + "loss": 2.7757, + "step": 69200 + }, + { + "epoch": 0.23, + "learning_rate": 3.852455215947064e-05, + "loss": 2.775, + "step": 69300 + }, + { + "epoch": 0.23, + "learning_rate": 3.850799307167767e-05, + "loss": 2.7646, + "step": 69400 + }, + { + "epoch": 0.23, + "learning_rate": 3.849143398388469e-05, + "loss": 2.7669, + "step": 69500 + }, + { + "epoch": 0.23, + "learning_rate": 3.847487489609173e-05, + "loss": 2.7643, + "step": 69600 + }, + { + "epoch": 0.23, + "learning_rate": 3.845831580829876e-05, + "loss": 2.7815, + "step": 69700 + }, + { + "epoch": 0.23, + "learning_rate": 3.844175672050578e-05, + "loss": 2.7665, + "step": 69800 + }, + { + "epoch": 0.23, + "learning_rate": 3.842519763271281e-05, + "loss": 2.7642, + "step": 69900 + }, + { + "epoch": 0.23, + "learning_rate": 3.8408638544919837e-05, + "loss": 2.7747, + "step": 70000 + }, + { + "epoch": 0.23, + "learning_rate": 3.839207945712687e-05, + "loss": 2.7862, + "step": 70100 + }, + { + "epoch": 0.23, + "learning_rate": 3.83755203693339e-05, + "loss": 2.755, + "step": 70200 + }, + { + "epoch": 0.23, + "learning_rate": 3.835896128154092e-05, + "loss": 2.7713, + "step": 70300 + }, + { + "epoch": 0.23, + "learning_rate": 3.834240219374795e-05, + "loss": 2.7665, + "step": 70400 + }, + { + "epoch": 0.23, + "learning_rate": 3.832584310595498e-05, + "loss": 2.7831, + "step": 70500 + }, + { + "epoch": 0.23, + "learning_rate": 3.830928401816201e-05, + "loss": 2.774, + "step": 70600 + }, + { + "epoch": 0.23, + "learning_rate": 3.829272493036904e-05, + "loss": 2.7802, + "step": 70700 + }, + { + "epoch": 0.23, + "learning_rate": 3.8276165842576065e-05, + "loss": 2.7653, + "step": 70800 + }, + { + "epoch": 0.23, + "learning_rate": 3.8259606754783096e-05, + "loss": 2.7576, + "step": 70900 + }, + { + "epoch": 0.24, + "learning_rate": 3.8243047666990126e-05, + "loss": 2.7891, + "step": 71000 + }, + { + "epoch": 0.24, + "learning_rate": 3.822648857919715e-05, + "loss": 2.7715, + "step": 71100 + }, + { + "epoch": 0.24, + "learning_rate": 3.820992949140418e-05, + "loss": 2.764, + "step": 71200 + }, + { + "epoch": 0.24, + "learning_rate": 3.81933704036112e-05, + "loss": 2.7699, + "step": 71300 + }, + { + "epoch": 0.24, + "learning_rate": 3.8176811315818234e-05, + "loss": 2.7682, + "step": 71400 + }, + { + "epoch": 0.24, + "learning_rate": 3.8160252228025264e-05, + "loss": 2.7829, + "step": 71500 + }, + { + "epoch": 0.24, + "learning_rate": 3.8143693140232294e-05, + "loss": 2.7736, + "step": 71600 + }, + { + "epoch": 0.24, + "learning_rate": 3.8127134052439325e-05, + "loss": 2.789, + "step": 71700 + }, + { + "epoch": 0.24, + "learning_rate": 3.811057496464635e-05, + "loss": 2.7784, + "step": 71800 + }, + { + "epoch": 0.24, + "learning_rate": 3.809401587685338e-05, + "loss": 2.7782, + "step": 71900 + }, + { + "epoch": 0.24, + "learning_rate": 3.807745678906041e-05, + "loss": 2.776, + "step": 72000 + }, + { + "epoch": 0.24, + "learning_rate": 3.806089770126743e-05, + "loss": 2.7795, + "step": 72100 + }, + { + "epoch": 0.24, + "learning_rate": 3.804433861347446e-05, + "loss": 2.7474, + "step": 72200 + }, + { + "epoch": 0.24, + "learning_rate": 3.802777952568149e-05, + "loss": 2.7784, + "step": 72300 + }, + { + "epoch": 0.24, + "learning_rate": 3.8011220437888516e-05, + "loss": 2.7837, + "step": 72400 + }, + { + "epoch": 0.24, + "learning_rate": 3.7994661350095547e-05, + "loss": 2.7615, + "step": 72500 + }, + { + "epoch": 0.24, + "learning_rate": 3.797810226230258e-05, + "loss": 2.7742, + "step": 72600 + }, + { + "epoch": 0.24, + "learning_rate": 3.796154317450961e-05, + "loss": 2.7735, + "step": 72700 + }, + { + "epoch": 0.24, + "learning_rate": 3.794498408671663e-05, + "loss": 2.7739, + "step": 72800 + }, + { + "epoch": 0.24, + "learning_rate": 3.792842499892366e-05, + "loss": 2.7716, + "step": 72900 + }, + { + "epoch": 0.24, + "learning_rate": 3.791186591113069e-05, + "loss": 2.7806, + "step": 73000 + }, + { + "epoch": 0.24, + "learning_rate": 3.7895306823337715e-05, + "loss": 2.7703, + "step": 73100 + }, + { + "epoch": 0.24, + "learning_rate": 3.7878747735544745e-05, + "loss": 2.7753, + "step": 73200 + }, + { + "epoch": 0.24, + "learning_rate": 3.7862188647751775e-05, + "loss": 2.7746, + "step": 73300 + }, + { + "epoch": 0.24, + "learning_rate": 3.78456295599588e-05, + "loss": 2.7836, + "step": 73400 + }, + { + "epoch": 0.24, + "learning_rate": 3.782907047216583e-05, + "loss": 2.7821, + "step": 73500 + }, + { + "epoch": 0.24, + "learning_rate": 3.781251138437286e-05, + "loss": 2.7711, + "step": 73600 + }, + { + "epoch": 0.24, + "learning_rate": 3.779595229657989e-05, + "loss": 2.762, + "step": 73700 + }, + { + "epoch": 0.24, + "learning_rate": 3.777939320878692e-05, + "loss": 2.7759, + "step": 73800 + }, + { + "epoch": 0.24, + "learning_rate": 3.7762834120993944e-05, + "loss": 2.7617, + "step": 73900 + }, + { + "epoch": 0.25, + "learning_rate": 3.7746275033200974e-05, + "loss": 2.7818, + "step": 74000 + }, + { + "epoch": 0.25, + "learning_rate": 3.7729715945408e-05, + "loss": 2.7662, + "step": 74100 + }, + { + "epoch": 0.25, + "learning_rate": 3.771315685761503e-05, + "loss": 2.783, + "step": 74200 + }, + { + "epoch": 0.25, + "learning_rate": 3.769659776982206e-05, + "loss": 2.773, + "step": 74300 + }, + { + "epoch": 0.25, + "learning_rate": 3.768003868202908e-05, + "loss": 2.7712, + "step": 74400 + }, + { + "epoch": 0.25, + "learning_rate": 3.766347959423611e-05, + "loss": 2.7709, + "step": 74500 + }, + { + "epoch": 0.25, + "learning_rate": 3.764692050644314e-05, + "loss": 2.7641, + "step": 74600 + }, + { + "epoch": 0.25, + "learning_rate": 3.763036141865017e-05, + "loss": 2.7659, + "step": 74700 + }, + { + "epoch": 0.25, + "learning_rate": 3.76138023308572e-05, + "loss": 2.7594, + "step": 74800 + }, + { + "epoch": 0.25, + "learning_rate": 3.7597243243064226e-05, + "loss": 2.7561, + "step": 74900 + }, + { + "epoch": 0.25, + "learning_rate": 3.758068415527126e-05, + "loss": 2.7747, + "step": 75000 + }, + { + "epoch": 0.25, + "learning_rate": 3.756412506747829e-05, + "loss": 2.7664, + "step": 75100 + }, + { + "epoch": 0.25, + "learning_rate": 3.754756597968531e-05, + "loss": 2.7907, + "step": 75200 + }, + { + "epoch": 0.25, + "learning_rate": 3.753100689189234e-05, + "loss": 2.7689, + "step": 75300 + }, + { + "epoch": 0.25, + "learning_rate": 3.7514447804099364e-05, + "loss": 2.7684, + "step": 75400 + }, + { + "epoch": 0.25, + "learning_rate": 3.7497888716306395e-05, + "loss": 2.7677, + "step": 75500 + }, + { + "epoch": 0.25, + "learning_rate": 3.748132962851343e-05, + "loss": 2.7735, + "step": 75600 + }, + { + "epoch": 0.25, + "learning_rate": 3.7464770540720455e-05, + "loss": 2.757, + "step": 75700 + }, + { + "epoch": 0.25, + "learning_rate": 3.7448211452927485e-05, + "loss": 2.7717, + "step": 75800 + }, + { + "epoch": 0.25, + "learning_rate": 3.743165236513451e-05, + "loss": 2.7637, + "step": 75900 + }, + { + "epoch": 0.25, + "learning_rate": 3.741509327734154e-05, + "loss": 2.7833, + "step": 76000 + }, + { + "epoch": 0.25, + "learning_rate": 3.739853418954857e-05, + "loss": 2.7627, + "step": 76100 + }, + { + "epoch": 0.25, + "learning_rate": 3.738197510175559e-05, + "loss": 2.7698, + "step": 76200 + }, + { + "epoch": 0.25, + "learning_rate": 3.7365416013962623e-05, + "loss": 2.7685, + "step": 76300 + }, + { + "epoch": 0.25, + "learning_rate": 3.7348856926169654e-05, + "loss": 2.7677, + "step": 76400 + }, + { + "epoch": 0.25, + "learning_rate": 3.733229783837668e-05, + "loss": 2.7688, + "step": 76500 + }, + { + "epoch": 0.25, + "learning_rate": 3.7315738750583714e-05, + "loss": 2.7634, + "step": 76600 + }, + { + "epoch": 0.25, + "learning_rate": 3.729917966279074e-05, + "loss": 2.7738, + "step": 76700 + }, + { + "epoch": 0.25, + "learning_rate": 3.728262057499777e-05, + "loss": 2.7739, + "step": 76800 + }, + { + "epoch": 0.25, + "learning_rate": 3.72660614872048e-05, + "loss": 2.7631, + "step": 76900 + }, + { + "epoch": 0.26, + "learning_rate": 3.724950239941182e-05, + "loss": 2.7487, + "step": 77000 + }, + { + "epoch": 0.26, + "learning_rate": 3.723294331161885e-05, + "loss": 2.7657, + "step": 77100 + }, + { + "epoch": 0.26, + "learning_rate": 3.7216384223825876e-05, + "loss": 2.7645, + "step": 77200 + }, + { + "epoch": 0.26, + "learning_rate": 3.7199825136032906e-05, + "loss": 2.7838, + "step": 77300 + }, + { + "epoch": 0.26, + "learning_rate": 3.7183266048239936e-05, + "loss": 2.7717, + "step": 77400 + }, + { + "epoch": 0.26, + "learning_rate": 3.716670696044696e-05, + "loss": 2.7816, + "step": 77500 + }, + { + "epoch": 0.26, + "learning_rate": 3.7150147872654e-05, + "loss": 2.7741, + "step": 77600 + }, + { + "epoch": 0.26, + "learning_rate": 3.713358878486102e-05, + "loss": 2.7727, + "step": 77700 + }, + { + "epoch": 0.26, + "learning_rate": 3.711702969706805e-05, + "loss": 2.7721, + "step": 77800 + }, + { + "epoch": 0.26, + "learning_rate": 3.710047060927508e-05, + "loss": 2.7768, + "step": 77900 + }, + { + "epoch": 0.26, + "learning_rate": 3.7083911521482105e-05, + "loss": 2.7654, + "step": 78000 + }, + { + "epoch": 0.26, + "learning_rate": 3.7067352433689135e-05, + "loss": 2.7549, + "step": 78100 + }, + { + "epoch": 0.26, + "learning_rate": 3.7050793345896165e-05, + "loss": 2.7627, + "step": 78200 + }, + { + "epoch": 0.26, + "learning_rate": 3.703423425810319e-05, + "loss": 2.7784, + "step": 78300 + }, + { + "epoch": 0.26, + "learning_rate": 3.701767517031022e-05, + "loss": 2.7635, + "step": 78400 + }, + { + "epoch": 0.26, + "learning_rate": 3.700111608251724e-05, + "loss": 2.7707, + "step": 78500 + }, + { + "epoch": 0.26, + "learning_rate": 3.698455699472428e-05, + "loss": 2.7545, + "step": 78600 + }, + { + "epoch": 0.26, + "learning_rate": 3.69679979069313e-05, + "loss": 2.7658, + "step": 78700 + }, + { + "epoch": 0.26, + "learning_rate": 3.6951438819138333e-05, + "loss": 2.7701, + "step": 78800 + }, + { + "epoch": 0.26, + "learning_rate": 3.6934879731345364e-05, + "loss": 2.7816, + "step": 78900 + }, + { + "epoch": 0.26, + "learning_rate": 3.691832064355239e-05, + "loss": 2.771, + "step": 79000 + }, + { + "epoch": 0.26, + "learning_rate": 3.690176155575942e-05, + "loss": 2.7745, + "step": 79100 + }, + { + "epoch": 0.26, + "learning_rate": 3.688520246796645e-05, + "loss": 2.7756, + "step": 79200 + }, + { + "epoch": 0.26, + "learning_rate": 3.686864338017347e-05, + "loss": 2.7655, + "step": 79300 + }, + { + "epoch": 0.26, + "learning_rate": 3.68520842923805e-05, + "loss": 2.7838, + "step": 79400 + }, + { + "epoch": 0.26, + "learning_rate": 3.683552520458753e-05, + "loss": 2.7654, + "step": 79500 + }, + { + "epoch": 0.26, + "learning_rate": 3.681896611679456e-05, + "loss": 2.7728, + "step": 79600 + }, + { + "epoch": 0.26, + "learning_rate": 3.680240702900159e-05, + "loss": 2.7612, + "step": 79700 + }, + { + "epoch": 0.26, + "learning_rate": 3.6785847941208616e-05, + "loss": 2.7618, + "step": 79800 + }, + { + "epoch": 0.26, + "learning_rate": 3.6769288853415646e-05, + "loss": 2.7593, + "step": 79900 + }, + { + "epoch": 0.26, + "learning_rate": 3.675272976562267e-05, + "loss": 2.7689, + "step": 80000 + }, + { + "epoch": 0.27, + "learning_rate": 3.67361706778297e-05, + "loss": 2.7535, + "step": 80100 + }, + { + "epoch": 0.27, + "learning_rate": 3.671961159003673e-05, + "loss": 2.7723, + "step": 80200 + }, + { + "epoch": 0.27, + "learning_rate": 3.6703052502243754e-05, + "loss": 2.759, + "step": 80300 + }, + { + "epoch": 0.27, + "learning_rate": 3.6686493414450784e-05, + "loss": 2.7656, + "step": 80400 + }, + { + "epoch": 0.27, + "learning_rate": 3.6669934326657815e-05, + "loss": 2.7597, + "step": 80500 + }, + { + "epoch": 0.27, + "learning_rate": 3.6653375238864845e-05, + "loss": 2.7767, + "step": 80600 + }, + { + "epoch": 0.27, + "learning_rate": 3.6636816151071875e-05, + "loss": 2.7654, + "step": 80700 + }, + { + "epoch": 0.27, + "learning_rate": 3.66202570632789e-05, + "loss": 2.7803, + "step": 80800 + }, + { + "epoch": 0.27, + "learning_rate": 3.660369797548593e-05, + "loss": 2.7708, + "step": 80900 + }, + { + "epoch": 0.27, + "learning_rate": 3.658713888769296e-05, + "loss": 2.7643, + "step": 81000 + }, + { + "epoch": 0.27, + "learning_rate": 3.657057979989998e-05, + "loss": 2.7694, + "step": 81100 + }, + { + "epoch": 0.27, + "learning_rate": 3.655402071210701e-05, + "loss": 2.7566, + "step": 81200 + }, + { + "epoch": 0.27, + "learning_rate": 3.653746162431404e-05, + "loss": 2.7798, + "step": 81300 + }, + { + "epoch": 0.27, + "learning_rate": 3.652090253652107e-05, + "loss": 2.7593, + "step": 81400 + }, + { + "epoch": 0.27, + "learning_rate": 3.65043434487281e-05, + "loss": 2.7631, + "step": 81500 + }, + { + "epoch": 0.27, + "learning_rate": 3.648778436093513e-05, + "loss": 2.7619, + "step": 81600 + }, + { + "epoch": 0.27, + "learning_rate": 3.647122527314216e-05, + "loss": 2.7562, + "step": 81700 + }, + { + "epoch": 0.27, + "learning_rate": 3.645466618534918e-05, + "loss": 2.7616, + "step": 81800 + }, + { + "epoch": 0.27, + "learning_rate": 3.643810709755621e-05, + "loss": 2.7752, + "step": 81900 + }, + { + "epoch": 0.27, + "learning_rate": 3.642154800976324e-05, + "loss": 2.7622, + "step": 82000 + }, + { + "epoch": 0.27, + "learning_rate": 3.6404988921970266e-05, + "loss": 2.7738, + "step": 82100 + }, + { + "epoch": 0.27, + "learning_rate": 3.6388429834177296e-05, + "loss": 2.7691, + "step": 82200 + }, + { + "epoch": 0.27, + "learning_rate": 3.6371870746384326e-05, + "loss": 2.7709, + "step": 82300 + }, + { + "epoch": 0.27, + "learning_rate": 3.635531165859135e-05, + "loss": 2.789, + "step": 82400 + }, + { + "epoch": 0.27, + "learning_rate": 3.633875257079838e-05, + "loss": 2.7553, + "step": 82500 + }, + { + "epoch": 0.27, + "learning_rate": 3.632219348300541e-05, + "loss": 2.7778, + "step": 82600 + }, + { + "epoch": 0.27, + "learning_rate": 3.630563439521244e-05, + "loss": 2.7791, + "step": 82700 + }, + { + "epoch": 0.27, + "learning_rate": 3.628907530741947e-05, + "loss": 2.777, + "step": 82800 + }, + { + "epoch": 0.27, + "learning_rate": 3.6272516219626494e-05, + "loss": 2.7583, + "step": 82900 + }, + { + "epoch": 0.27, + "learning_rate": 3.6255957131833525e-05, + "loss": 2.7702, + "step": 83000 + }, + { + "epoch": 0.28, + "learning_rate": 3.623939804404055e-05, + "loss": 2.778, + "step": 83100 + }, + { + "epoch": 0.28, + "learning_rate": 3.622283895624758e-05, + "loss": 2.7604, + "step": 83200 + }, + { + "epoch": 0.28, + "learning_rate": 3.620627986845461e-05, + "loss": 2.7674, + "step": 83300 + }, + { + "epoch": 0.28, + "learning_rate": 3.618972078066163e-05, + "loss": 2.781, + "step": 83400 + }, + { + "epoch": 0.28, + "learning_rate": 3.617316169286866e-05, + "loss": 2.7616, + "step": 83500 + }, + { + "epoch": 0.28, + "learning_rate": 3.615660260507569e-05, + "loss": 2.7769, + "step": 83600 + }, + { + "epoch": 0.28, + "learning_rate": 3.614004351728272e-05, + "loss": 2.7688, + "step": 83700 + }, + { + "epoch": 0.28, + "learning_rate": 3.6123484429489754e-05, + "loss": 2.7652, + "step": 83800 + }, + { + "epoch": 0.28, + "learning_rate": 3.610692534169678e-05, + "loss": 2.7685, + "step": 83900 + }, + { + "epoch": 0.28, + "learning_rate": 3.609036625390381e-05, + "loss": 2.7694, + "step": 84000 + }, + { + "epoch": 0.28, + "learning_rate": 3.607380716611084e-05, + "loss": 2.7786, + "step": 84100 + }, + { + "epoch": 0.28, + "learning_rate": 3.605724807831786e-05, + "loss": 2.7719, + "step": 84200 + }, + { + "epoch": 0.28, + "learning_rate": 3.604068899052489e-05, + "loss": 2.7769, + "step": 84300 + }, + { + "epoch": 0.28, + "learning_rate": 3.6024129902731915e-05, + "loss": 2.7651, + "step": 84400 + }, + { + "epoch": 0.28, + "learning_rate": 3.6007570814938945e-05, + "loss": 2.7535, + "step": 84500 + }, + { + "epoch": 0.28, + "learning_rate": 3.5991011727145976e-05, + "loss": 2.757, + "step": 84600 + }, + { + "epoch": 0.28, + "learning_rate": 3.5974452639353006e-05, + "loss": 2.7571, + "step": 84700 + }, + { + "epoch": 0.28, + "learning_rate": 3.5957893551560036e-05, + "loss": 2.7545, + "step": 84800 + }, + { + "epoch": 0.28, + "learning_rate": 3.594133446376706e-05, + "loss": 2.7571, + "step": 84900 + }, + { + "epoch": 0.28, + "learning_rate": 3.592477537597409e-05, + "loss": 2.7558, + "step": 85000 + }, + { + "epoch": 0.28, + "learning_rate": 3.590821628818112e-05, + "loss": 2.7639, + "step": 85100 + }, + { + "epoch": 0.28, + "learning_rate": 3.5891657200388144e-05, + "loss": 2.7656, + "step": 85200 + }, + { + "epoch": 0.28, + "learning_rate": 3.5875098112595174e-05, + "loss": 2.7654, + "step": 85300 + }, + { + "epoch": 0.28, + "learning_rate": 3.5858539024802204e-05, + "loss": 2.7549, + "step": 85400 + }, + { + "epoch": 0.28, + "learning_rate": 3.584197993700923e-05, + "loss": 2.762, + "step": 85500 + }, + { + "epoch": 0.28, + "learning_rate": 3.5825420849216265e-05, + "loss": 2.7716, + "step": 85600 + }, + { + "epoch": 0.28, + "learning_rate": 3.580886176142329e-05, + "loss": 2.7848, + "step": 85700 + }, + { + "epoch": 0.28, + "learning_rate": 3.579230267363032e-05, + "loss": 2.7675, + "step": 85800 + }, + { + "epoch": 0.28, + "learning_rate": 3.577574358583734e-05, + "loss": 2.7798, + "step": 85900 + }, + { + "epoch": 0.28, + "learning_rate": 3.575918449804437e-05, + "loss": 2.7544, + "step": 86000 + }, + { + "epoch": 0.29, + "learning_rate": 3.57426254102514e-05, + "loss": 2.7634, + "step": 86100 + }, + { + "epoch": 0.29, + "learning_rate": 3.5726066322458427e-05, + "loss": 2.7796, + "step": 86200 + }, + { + "epoch": 0.29, + "learning_rate": 3.570950723466546e-05, + "loss": 2.7618, + "step": 86300 + }, + { + "epoch": 0.29, + "learning_rate": 3.569294814687249e-05, + "loss": 2.7575, + "step": 86400 + }, + { + "epoch": 0.29, + "learning_rate": 3.567638905907951e-05, + "loss": 2.7789, + "step": 86500 + }, + { + "epoch": 0.29, + "learning_rate": 3.565982997128655e-05, + "loss": 2.7719, + "step": 86600 + }, + { + "epoch": 0.29, + "learning_rate": 3.564327088349357e-05, + "loss": 2.7506, + "step": 86700 + }, + { + "epoch": 0.29, + "learning_rate": 3.56267117957006e-05, + "loss": 2.757, + "step": 86800 + }, + { + "epoch": 0.29, + "learning_rate": 3.561015270790763e-05, + "loss": 2.7716, + "step": 86900 + }, + { + "epoch": 0.29, + "learning_rate": 3.5593593620114655e-05, + "loss": 2.7676, + "step": 87000 + }, + { + "epoch": 0.29, + "learning_rate": 3.5577034532321686e-05, + "loss": 2.7605, + "step": 87100 + }, + { + "epoch": 0.29, + "learning_rate": 3.556047544452871e-05, + "loss": 2.7766, + "step": 87200 + }, + { + "epoch": 0.29, + "learning_rate": 3.554391635673574e-05, + "loss": 2.7602, + "step": 87300 + }, + { + "epoch": 0.29, + "learning_rate": 3.552735726894277e-05, + "loss": 2.7676, + "step": 87400 + }, + { + "epoch": 0.29, + "learning_rate": 3.551079818114979e-05, + "loss": 2.7678, + "step": 87500 + }, + { + "epoch": 0.29, + "learning_rate": 3.549423909335683e-05, + "loss": 2.7571, + "step": 87600 + }, + { + "epoch": 0.29, + "learning_rate": 3.5477680005563854e-05, + "loss": 2.7761, + "step": 87700 + }, + { + "epoch": 0.29, + "learning_rate": 3.5461120917770884e-05, + "loss": 2.7722, + "step": 87800 + }, + { + "epoch": 0.29, + "learning_rate": 3.5444561829977915e-05, + "loss": 2.7728, + "step": 87900 + }, + { + "epoch": 0.29, + "learning_rate": 3.542800274218494e-05, + "loss": 2.7666, + "step": 88000 + }, + { + "epoch": 0.29, + "learning_rate": 3.541144365439197e-05, + "loss": 2.7674, + "step": 88100 + }, + { + "epoch": 0.29, + "learning_rate": 3.5394884566599e-05, + "loss": 2.7665, + "step": 88200 + }, + { + "epoch": 0.29, + "learning_rate": 3.537832547880602e-05, + "loss": 2.7498, + "step": 88300 + }, + { + "epoch": 0.29, + "learning_rate": 3.536176639101305e-05, + "loss": 2.7735, + "step": 88400 + }, + { + "epoch": 0.29, + "learning_rate": 3.5345207303220076e-05, + "loss": 2.7572, + "step": 88500 + }, + { + "epoch": 0.29, + "learning_rate": 3.532864821542711e-05, + "loss": 2.7603, + "step": 88600 + }, + { + "epoch": 0.29, + "learning_rate": 3.5312089127634143e-05, + "loss": 2.7502, + "step": 88700 + }, + { + "epoch": 0.29, + "learning_rate": 3.529553003984117e-05, + "loss": 2.774, + "step": 88800 + }, + { + "epoch": 0.29, + "learning_rate": 3.52789709520482e-05, + "loss": 2.786, + "step": 88900 + }, + { + "epoch": 0.29, + "learning_rate": 3.526241186425522e-05, + "loss": 2.7827, + "step": 89000 + }, + { + "epoch": 0.3, + "learning_rate": 3.524585277646225e-05, + "loss": 2.7652, + "step": 89100 + }, + { + "epoch": 0.3, + "learning_rate": 3.522929368866928e-05, + "loss": 2.7652, + "step": 89200 + }, + { + "epoch": 0.3, + "learning_rate": 3.5212734600876305e-05, + "loss": 2.7619, + "step": 89300 + }, + { + "epoch": 0.3, + "learning_rate": 3.5196175513083335e-05, + "loss": 2.7673, + "step": 89400 + }, + { + "epoch": 0.3, + "learning_rate": 3.5179616425290365e-05, + "loss": 2.7625, + "step": 89500 + }, + { + "epoch": 0.3, + "learning_rate": 3.5163057337497396e-05, + "loss": 2.7567, + "step": 89600 + }, + { + "epoch": 0.3, + "learning_rate": 3.5146498249704426e-05, + "loss": 2.7665, + "step": 89700 + }, + { + "epoch": 0.3, + "learning_rate": 3.512993916191145e-05, + "loss": 2.7669, + "step": 89800 + }, + { + "epoch": 0.3, + "learning_rate": 3.511338007411848e-05, + "loss": 2.7634, + "step": 89900 + }, + { + "epoch": 0.3, + "learning_rate": 3.509682098632551e-05, + "loss": 2.7788, + "step": 90000 + }, + { + "epoch": 0.3, + "learning_rate": 3.5080261898532534e-05, + "loss": 2.7633, + "step": 90100 + }, + { + "epoch": 0.3, + "learning_rate": 3.5063702810739564e-05, + "loss": 2.7672, + "step": 90200 + }, + { + "epoch": 0.3, + "learning_rate": 3.504714372294659e-05, + "loss": 2.7642, + "step": 90300 + }, + { + "epoch": 0.3, + "learning_rate": 3.503058463515362e-05, + "loss": 2.7647, + "step": 90400 + }, + { + "epoch": 0.3, + "learning_rate": 3.501402554736065e-05, + "loss": 2.7723, + "step": 90500 + }, + { + "epoch": 0.3, + "learning_rate": 3.499746645956768e-05, + "loss": 2.7747, + "step": 90600 + }, + { + "epoch": 0.3, + "learning_rate": 3.498090737177471e-05, + "loss": 2.7588, + "step": 90700 + }, + { + "epoch": 0.3, + "learning_rate": 3.496434828398173e-05, + "loss": 2.7673, + "step": 90800 + }, + { + "epoch": 0.3, + "learning_rate": 3.494778919618876e-05, + "loss": 2.7654, + "step": 90900 + }, + { + "epoch": 0.3, + "learning_rate": 3.493123010839579e-05, + "loss": 2.7616, + "step": 91000 + }, + { + "epoch": 0.3, + "learning_rate": 3.4914671020602816e-05, + "loss": 2.7796, + "step": 91100 + }, + { + "epoch": 0.3, + "learning_rate": 3.489811193280985e-05, + "loss": 2.7652, + "step": 91200 + }, + { + "epoch": 0.3, + "learning_rate": 3.488155284501688e-05, + "loss": 2.7671, + "step": 91300 + }, + { + "epoch": 0.3, + "learning_rate": 3.48649937572239e-05, + "loss": 2.7556, + "step": 91400 + }, + { + "epoch": 0.3, + "learning_rate": 3.484843466943093e-05, + "loss": 2.7542, + "step": 91500 + }, + { + "epoch": 0.3, + "learning_rate": 3.483187558163796e-05, + "loss": 2.7582, + "step": 91600 + }, + { + "epoch": 0.3, + "learning_rate": 3.481531649384499e-05, + "loss": 2.7596, + "step": 91700 + }, + { + "epoch": 0.3, + "learning_rate": 3.4798757406052015e-05, + "loss": 2.7774, + "step": 91800 + }, + { + "epoch": 0.3, + "learning_rate": 3.4782198318259045e-05, + "loss": 2.7661, + "step": 91900 + }, + { + "epoch": 0.3, + "learning_rate": 3.4765639230466076e-05, + "loss": 2.7551, + "step": 92000 + }, + { + "epoch": 0.31, + "learning_rate": 3.47490801426731e-05, + "loss": 2.7545, + "step": 92100 + }, + { + "epoch": 0.31, + "learning_rate": 3.473252105488013e-05, + "loss": 2.7801, + "step": 92200 + }, + { + "epoch": 0.31, + "learning_rate": 3.471596196708716e-05, + "loss": 2.782, + "step": 92300 + }, + { + "epoch": 0.31, + "learning_rate": 3.469940287929418e-05, + "loss": 2.763, + "step": 92400 + }, + { + "epoch": 0.31, + "learning_rate": 3.4682843791501213e-05, + "loss": 2.7406, + "step": 92500 + }, + { + "epoch": 0.31, + "learning_rate": 3.4666284703708244e-05, + "loss": 2.765, + "step": 92600 + }, + { + "epoch": 0.31, + "learning_rate": 3.4649725615915274e-05, + "loss": 2.7653, + "step": 92700 + }, + { + "epoch": 0.31, + "learning_rate": 3.4633166528122304e-05, + "loss": 2.7632, + "step": 92800 + }, + { + "epoch": 0.31, + "learning_rate": 3.461660744032933e-05, + "loss": 2.761, + "step": 92900 + }, + { + "epoch": 0.31, + "learning_rate": 3.460004835253636e-05, + "loss": 2.7688, + "step": 93000 + }, + { + "epoch": 0.31, + "learning_rate": 3.458348926474338e-05, + "loss": 2.7714, + "step": 93100 + }, + { + "epoch": 0.31, + "learning_rate": 3.456693017695041e-05, + "loss": 2.7672, + "step": 93200 + }, + { + "epoch": 0.31, + "learning_rate": 3.455037108915744e-05, + "loss": 2.7682, + "step": 93300 + }, + { + "epoch": 0.31, + "learning_rate": 3.4533812001364466e-05, + "loss": 2.7727, + "step": 93400 + }, + { + "epoch": 0.31, + "learning_rate": 3.4517252913571496e-05, + "loss": 2.7762, + "step": 93500 + }, + { + "epoch": 0.31, + "learning_rate": 3.4500693825778526e-05, + "loss": 2.7726, + "step": 93600 + }, + { + "epoch": 0.31, + "learning_rate": 3.448413473798556e-05, + "loss": 2.7657, + "step": 93700 + }, + { + "epoch": 0.31, + "learning_rate": 3.446757565019259e-05, + "loss": 2.775, + "step": 93800 + }, + { + "epoch": 0.31, + "learning_rate": 3.445101656239961e-05, + "loss": 2.775, + "step": 93900 + }, + { + "epoch": 0.31, + "learning_rate": 3.443445747460664e-05, + "loss": 2.7544, + "step": 94000 + }, + { + "epoch": 0.31, + "learning_rate": 3.441789838681367e-05, + "loss": 2.7638, + "step": 94100 + }, + { + "epoch": 0.31, + "learning_rate": 3.4401339299020695e-05, + "loss": 2.7636, + "step": 94200 + }, + { + "epoch": 0.31, + "learning_rate": 3.4384780211227725e-05, + "loss": 2.7631, + "step": 94300 + }, + { + "epoch": 0.31, + "learning_rate": 3.436822112343475e-05, + "loss": 2.7649, + "step": 94400 + }, + { + "epoch": 0.31, + "learning_rate": 3.435166203564178e-05, + "loss": 2.7604, + "step": 94500 + }, + { + "epoch": 0.31, + "learning_rate": 3.4335102947848816e-05, + "loss": 2.7816, + "step": 94600 + }, + { + "epoch": 0.31, + "learning_rate": 3.431854386005584e-05, + "loss": 2.7671, + "step": 94700 + }, + { + "epoch": 0.31, + "learning_rate": 3.430198477226287e-05, + "loss": 2.7798, + "step": 94800 + }, + { + "epoch": 0.31, + "learning_rate": 3.428542568446989e-05, + "loss": 2.769, + "step": 94900 + }, + { + "epoch": 0.31, + "learning_rate": 3.4268866596676924e-05, + "loss": 2.7652, + "step": 95000 + }, + { + "epoch": 0.31, + "learning_rate": 3.4252307508883954e-05, + "loss": 2.7537, + "step": 95100 + }, + { + "epoch": 0.32, + "learning_rate": 3.423574842109098e-05, + "loss": 2.767, + "step": 95200 + }, + { + "epoch": 0.32, + "learning_rate": 3.421918933329801e-05, + "loss": 2.7716, + "step": 95300 + }, + { + "epoch": 0.32, + "learning_rate": 3.420263024550504e-05, + "loss": 2.7678, + "step": 95400 + }, + { + "epoch": 0.32, + "learning_rate": 3.418607115771206e-05, + "loss": 2.7497, + "step": 95500 + }, + { + "epoch": 0.32, + "learning_rate": 3.41695120699191e-05, + "loss": 2.7556, + "step": 95600 + }, + { + "epoch": 0.32, + "learning_rate": 3.415295298212612e-05, + "loss": 2.751, + "step": 95700 + }, + { + "epoch": 0.32, + "learning_rate": 3.413639389433315e-05, + "loss": 2.7716, + "step": 95800 + }, + { + "epoch": 0.32, + "learning_rate": 3.411983480654018e-05, + "loss": 2.7641, + "step": 95900 + }, + { + "epoch": 0.32, + "learning_rate": 3.4103275718747206e-05, + "loss": 2.7445, + "step": 96000 + }, + { + "epoch": 0.32, + "learning_rate": 3.4086716630954236e-05, + "loss": 2.7678, + "step": 96100 + }, + { + "epoch": 0.32, + "learning_rate": 3.407015754316126e-05, + "loss": 2.7545, + "step": 96200 + }, + { + "epoch": 0.32, + "learning_rate": 3.405359845536829e-05, + "loss": 2.7554, + "step": 96300 + }, + { + "epoch": 0.32, + "learning_rate": 3.403703936757532e-05, + "loss": 2.755, + "step": 96400 + }, + { + "epoch": 0.32, + "learning_rate": 3.4020480279782344e-05, + "loss": 2.7867, + "step": 96500 + }, + { + "epoch": 0.32, + "learning_rate": 3.400392119198938e-05, + "loss": 2.7597, + "step": 96600 + }, + { + "epoch": 0.32, + "learning_rate": 3.3987362104196405e-05, + "loss": 2.7579, + "step": 96700 + }, + { + "epoch": 0.32, + "learning_rate": 3.3970803016403435e-05, + "loss": 2.7721, + "step": 96800 + }, + { + "epoch": 0.32, + "learning_rate": 3.3954243928610465e-05, + "loss": 2.7588, + "step": 96900 + }, + { + "epoch": 0.32, + "learning_rate": 3.393768484081749e-05, + "loss": 2.7565, + "step": 97000 + }, + { + "epoch": 0.32, + "learning_rate": 3.392112575302452e-05, + "loss": 2.756, + "step": 97100 + }, + { + "epoch": 0.32, + "learning_rate": 3.390456666523155e-05, + "loss": 2.767, + "step": 97200 + }, + { + "epoch": 0.32, + "learning_rate": 3.388800757743857e-05, + "loss": 2.7759, + "step": 97300 + }, + { + "epoch": 0.32, + "learning_rate": 3.38714484896456e-05, + "loss": 2.7578, + "step": 97400 + }, + { + "epoch": 0.32, + "learning_rate": 3.385488940185263e-05, + "loss": 2.7654, + "step": 97500 + }, + { + "epoch": 0.32, + "learning_rate": 3.3838330314059664e-05, + "loss": 2.757, + "step": 97600 + }, + { + "epoch": 0.32, + "learning_rate": 3.3821771226266694e-05, + "loss": 2.7645, + "step": 97700 + }, + { + "epoch": 0.32, + "learning_rate": 3.380521213847372e-05, + "loss": 2.7632, + "step": 97800 + }, + { + "epoch": 0.32, + "learning_rate": 3.378865305068075e-05, + "loss": 2.7691, + "step": 97900 + }, + { + "epoch": 0.32, + "learning_rate": 3.377209396288777e-05, + "loss": 2.7674, + "step": 98000 + }, + { + "epoch": 0.32, + "learning_rate": 3.37555348750948e-05, + "loss": 2.7589, + "step": 98100 + }, + { + "epoch": 0.33, + "learning_rate": 3.373897578730183e-05, + "loss": 2.7618, + "step": 98200 + }, + { + "epoch": 0.33, + "learning_rate": 3.3722416699508856e-05, + "loss": 2.7834, + "step": 98300 + }, + { + "epoch": 0.33, + "learning_rate": 3.3705857611715886e-05, + "loss": 2.7622, + "step": 98400 + }, + { + "epoch": 0.33, + "learning_rate": 3.3689298523922916e-05, + "loss": 2.779, + "step": 98500 + }, + { + "epoch": 0.33, + "learning_rate": 3.3672739436129947e-05, + "loss": 2.7589, + "step": 98600 + }, + { + "epoch": 0.33, + "learning_rate": 3.365618034833698e-05, + "loss": 2.7683, + "step": 98700 + }, + { + "epoch": 0.33, + "learning_rate": 3.3639621260544e-05, + "loss": 2.7735, + "step": 98800 + }, + { + "epoch": 0.33, + "learning_rate": 3.362306217275103e-05, + "loss": 2.7465, + "step": 98900 + }, + { + "epoch": 0.33, + "learning_rate": 3.3606503084958054e-05, + "loss": 2.7598, + "step": 99000 + }, + { + "epoch": 0.33, + "learning_rate": 3.3589943997165084e-05, + "loss": 2.7521, + "step": 99100 + }, + { + "epoch": 0.33, + "learning_rate": 3.3573384909372115e-05, + "loss": 2.7639, + "step": 99200 + }, + { + "epoch": 0.33, + "learning_rate": 3.355682582157914e-05, + "loss": 2.7595, + "step": 99300 + }, + { + "epoch": 0.33, + "learning_rate": 3.354026673378617e-05, + "loss": 2.7575, + "step": 99400 + }, + { + "epoch": 0.33, + "learning_rate": 3.35237076459932e-05, + "loss": 2.7489, + "step": 99500 + }, + { + "epoch": 0.33, + "learning_rate": 3.350714855820023e-05, + "loss": 2.7656, + "step": 99600 + }, + { + "epoch": 0.33, + "learning_rate": 3.349058947040726e-05, + "loss": 2.7598, + "step": 99700 + }, + { + "epoch": 0.33, + "learning_rate": 3.347403038261428e-05, + "loss": 2.7486, + "step": 99800 + }, + { + "epoch": 0.33, + "learning_rate": 3.345747129482131e-05, + "loss": 2.7553, + "step": 99900 + }, + { + "epoch": 0.33, + "learning_rate": 3.3440912207028344e-05, + "loss": 2.7728, + "step": 100000 + }, + { + "epoch": 0.33, + "learning_rate": 3.342435311923537e-05, + "loss": 2.7468, + "step": 100100 + }, + { + "epoch": 0.33, + "learning_rate": 3.34077940314424e-05, + "loss": 2.7582, + "step": 100200 + }, + { + "epoch": 0.33, + "learning_rate": 3.339123494364942e-05, + "loss": 2.7531, + "step": 100300 + }, + { + "epoch": 0.33, + "learning_rate": 3.337467585585645e-05, + "loss": 2.7754, + "step": 100400 + }, + { + "epoch": 0.33, + "learning_rate": 3.335811676806348e-05, + "loss": 2.7671, + "step": 100500 + }, + { + "epoch": 0.33, + "learning_rate": 3.334155768027051e-05, + "loss": 2.7632, + "step": 100600 + }, + { + "epoch": 0.33, + "learning_rate": 3.332499859247754e-05, + "loss": 2.7483, + "step": 100700 + }, + { + "epoch": 0.33, + "learning_rate": 3.3308439504684566e-05, + "loss": 2.7764, + "step": 100800 + }, + { + "epoch": 0.33, + "learning_rate": 3.3291880416891596e-05, + "loss": 2.7616, + "step": 100900 + }, + { + "epoch": 0.33, + "learning_rate": 3.3275321329098626e-05, + "loss": 2.7647, + "step": 101000 + }, + { + "epoch": 0.33, + "learning_rate": 3.325876224130565e-05, + "loss": 2.7571, + "step": 101100 + }, + { + "epoch": 0.34, + "learning_rate": 3.324220315351268e-05, + "loss": 2.751, + "step": 101200 + }, + { + "epoch": 0.34, + "learning_rate": 3.322564406571971e-05, + "loss": 2.7625, + "step": 101300 + }, + { + "epoch": 0.34, + "learning_rate": 3.3209084977926734e-05, + "loss": 2.7686, + "step": 101400 + }, + { + "epoch": 0.34, + "learning_rate": 3.3192525890133764e-05, + "loss": 2.7722, + "step": 101500 + }, + { + "epoch": 0.34, + "learning_rate": 3.3175966802340795e-05, + "loss": 2.7697, + "step": 101600 + }, + { + "epoch": 0.34, + "learning_rate": 3.3159407714547825e-05, + "loss": 2.7732, + "step": 101700 + }, + { + "epoch": 0.34, + "learning_rate": 3.3142848626754855e-05, + "loss": 2.7656, + "step": 101800 + }, + { + "epoch": 0.34, + "learning_rate": 3.312628953896188e-05, + "loss": 2.7592, + "step": 101900 + }, + { + "epoch": 0.34, + "learning_rate": 3.310973045116891e-05, + "loss": 2.7466, + "step": 102000 + }, + { + "epoch": 0.34, + "learning_rate": 3.309317136337593e-05, + "loss": 2.763, + "step": 102100 + }, + { + "epoch": 0.34, + "learning_rate": 3.307661227558296e-05, + "loss": 2.773, + "step": 102200 + }, + { + "epoch": 0.34, + "learning_rate": 3.306005318778999e-05, + "loss": 2.7662, + "step": 102300 + }, + { + "epoch": 0.34, + "learning_rate": 3.3043494099997017e-05, + "loss": 2.7679, + "step": 102400 + }, + { + "epoch": 0.34, + "learning_rate": 3.302693501220405e-05, + "loss": 2.7694, + "step": 102500 + }, + { + "epoch": 0.34, + "learning_rate": 3.301037592441108e-05, + "loss": 2.7698, + "step": 102600 + }, + { + "epoch": 0.34, + "learning_rate": 3.299381683661811e-05, + "loss": 2.7527, + "step": 102700 + }, + { + "epoch": 0.34, + "learning_rate": 3.297725774882514e-05, + "loss": 2.7665, + "step": 102800 + }, + { + "epoch": 0.34, + "learning_rate": 3.296069866103216e-05, + "loss": 2.7618, + "step": 102900 + }, + { + "epoch": 0.34, + "learning_rate": 3.294413957323919e-05, + "loss": 2.7532, + "step": 103000 + }, + { + "epoch": 0.34, + "learning_rate": 3.292758048544622e-05, + "loss": 2.7778, + "step": 103100 + }, + { + "epoch": 0.34, + "learning_rate": 3.2911021397653245e-05, + "loss": 2.7687, + "step": 103200 + }, + { + "epoch": 0.34, + "learning_rate": 3.2894462309860276e-05, + "loss": 2.7574, + "step": 103300 + }, + { + "epoch": 0.34, + "learning_rate": 3.28779032220673e-05, + "loss": 2.7666, + "step": 103400 + }, + { + "epoch": 0.34, + "learning_rate": 3.286134413427433e-05, + "loss": 2.7559, + "step": 103500 + }, + { + "epoch": 0.34, + "learning_rate": 3.284478504648137e-05, + "loss": 2.7526, + "step": 103600 + }, + { + "epoch": 0.34, + "learning_rate": 3.282822595868839e-05, + "loss": 2.758, + "step": 103700 + }, + { + "epoch": 0.34, + "learning_rate": 3.281166687089542e-05, + "loss": 2.7578, + "step": 103800 + }, + { + "epoch": 0.34, + "learning_rate": 3.2795107783102444e-05, + "loss": 2.7533, + "step": 103900 + }, + { + "epoch": 0.34, + "learning_rate": 3.2778548695309474e-05, + "loss": 2.7938, + "step": 104000 + }, + { + "epoch": 0.34, + "learning_rate": 3.2761989607516505e-05, + "loss": 2.7731, + "step": 104100 + }, + { + "epoch": 0.35, + "learning_rate": 3.274543051972353e-05, + "loss": 2.7624, + "step": 104200 + }, + { + "epoch": 0.35, + "learning_rate": 3.272887143193056e-05, + "loss": 2.7623, + "step": 104300 + }, + { + "epoch": 0.35, + "learning_rate": 3.271231234413759e-05, + "loss": 2.7708, + "step": 104400 + }, + { + "epoch": 0.35, + "learning_rate": 3.269575325634461e-05, + "loss": 2.7552, + "step": 104500 + }, + { + "epoch": 0.35, + "learning_rate": 3.267919416855165e-05, + "loss": 2.7578, + "step": 104600 + }, + { + "epoch": 0.35, + "learning_rate": 3.266263508075867e-05, + "loss": 2.7699, + "step": 104700 + }, + { + "epoch": 0.35, + "learning_rate": 3.26460759929657e-05, + "loss": 2.7627, + "step": 104800 + }, + { + "epoch": 0.35, + "learning_rate": 3.2629516905172733e-05, + "loss": 2.7735, + "step": 104900 + }, + { + "epoch": 0.35, + "learning_rate": 3.261295781737976e-05, + "loss": 2.7692, + "step": 105000 + }, + { + "epoch": 0.35, + "learning_rate": 3.259639872958679e-05, + "loss": 2.7679, + "step": 105100 + }, + { + "epoch": 0.35, + "learning_rate": 3.257983964179381e-05, + "loss": 2.7618, + "step": 105200 + }, + { + "epoch": 0.35, + "learning_rate": 3.256328055400084e-05, + "loss": 2.7637, + "step": 105300 + }, + { + "epoch": 0.35, + "learning_rate": 3.254672146620787e-05, + "loss": 2.7641, + "step": 105400 + }, + { + "epoch": 0.35, + "learning_rate": 3.2530162378414895e-05, + "loss": 2.7697, + "step": 105500 + }, + { + "epoch": 0.35, + "learning_rate": 3.251360329062193e-05, + "loss": 2.7751, + "step": 105600 + }, + { + "epoch": 0.35, + "learning_rate": 3.2497044202828956e-05, + "loss": 2.7631, + "step": 105700 + }, + { + "epoch": 0.35, + "learning_rate": 3.2480485115035986e-05, + "loss": 2.7671, + "step": 105800 + }, + { + "epoch": 0.35, + "learning_rate": 3.2463926027243016e-05, + "loss": 2.7605, + "step": 105900 + }, + { + "epoch": 0.35, + "learning_rate": 3.244736693945004e-05, + "loss": 2.7699, + "step": 106000 + }, + { + "epoch": 0.35, + "learning_rate": 3.243080785165707e-05, + "loss": 2.7588, + "step": 106100 + }, + { + "epoch": 0.35, + "learning_rate": 3.2414248763864093e-05, + "loss": 2.7724, + "step": 106200 + }, + { + "epoch": 0.35, + "learning_rate": 3.2397689676071124e-05, + "loss": 2.7479, + "step": 106300 + }, + { + "epoch": 0.35, + "learning_rate": 3.2381130588278154e-05, + "loss": 2.762, + "step": 106400 + }, + { + "epoch": 0.35, + "learning_rate": 3.236457150048518e-05, + "loss": 2.7441, + "step": 106500 + }, + { + "epoch": 0.35, + "learning_rate": 3.2348012412692215e-05, + "loss": 2.7582, + "step": 106600 + }, + { + "epoch": 0.35, + "learning_rate": 3.233145332489924e-05, + "loss": 2.7559, + "step": 106700 + }, + { + "epoch": 0.35, + "learning_rate": 3.231489423710627e-05, + "loss": 2.7591, + "step": 106800 + }, + { + "epoch": 0.35, + "learning_rate": 3.22983351493133e-05, + "loss": 2.7725, + "step": 106900 + }, + { + "epoch": 0.35, + "learning_rate": 3.228177606152032e-05, + "loss": 2.7694, + "step": 107000 + }, + { + "epoch": 0.35, + "learning_rate": 3.226521697372735e-05, + "loss": 2.7617, + "step": 107100 + }, + { + "epoch": 0.36, + "learning_rate": 3.224865788593438e-05, + "loss": 2.7571, + "step": 107200 + }, + { + "epoch": 0.36, + "learning_rate": 3.2232098798141406e-05, + "loss": 2.7704, + "step": 107300 + }, + { + "epoch": 0.36, + "learning_rate": 3.221553971034844e-05, + "loss": 2.7718, + "step": 107400 + }, + { + "epoch": 0.36, + "learning_rate": 3.219898062255546e-05, + "loss": 2.7644, + "step": 107500 + }, + { + "epoch": 0.36, + "learning_rate": 3.21824215347625e-05, + "loss": 2.7794, + "step": 107600 + }, + { + "epoch": 0.36, + "learning_rate": 3.216586244696953e-05, + "loss": 2.7497, + "step": 107700 + }, + { + "epoch": 0.36, + "learning_rate": 3.214930335917655e-05, + "loss": 2.7569, + "step": 107800 + }, + { + "epoch": 0.36, + "learning_rate": 3.213274427138358e-05, + "loss": 2.7458, + "step": 107900 + }, + { + "epoch": 0.36, + "learning_rate": 3.2116185183590605e-05, + "loss": 2.7596, + "step": 108000 + }, + { + "epoch": 0.36, + "learning_rate": 3.2099626095797635e-05, + "loss": 2.7562, + "step": 108100 + }, + { + "epoch": 0.36, + "learning_rate": 3.2083067008004666e-05, + "loss": 2.76, + "step": 108200 + }, + { + "epoch": 0.36, + "learning_rate": 3.206650792021169e-05, + "loss": 2.7697, + "step": 108300 + }, + { + "epoch": 0.36, + "learning_rate": 3.204994883241872e-05, + "loss": 2.7668, + "step": 108400 + }, + { + "epoch": 0.36, + "learning_rate": 3.203338974462575e-05, + "loss": 2.7502, + "step": 108500 + }, + { + "epoch": 0.36, + "learning_rate": 3.201683065683278e-05, + "loss": 2.7758, + "step": 108600 + }, + { + "epoch": 0.36, + "learning_rate": 3.200027156903981e-05, + "loss": 2.7576, + "step": 108700 + }, + { + "epoch": 0.36, + "learning_rate": 3.1983712481246834e-05, + "loss": 2.7622, + "step": 108800 + }, + { + "epoch": 0.36, + "learning_rate": 3.1967153393453864e-05, + "loss": 2.7674, + "step": 108900 + }, + { + "epoch": 0.36, + "learning_rate": 3.1950594305660894e-05, + "loss": 2.775, + "step": 109000 + }, + { + "epoch": 0.36, + "learning_rate": 3.193403521786792e-05, + "loss": 2.7526, + "step": 109100 + }, + { + "epoch": 0.36, + "learning_rate": 3.191747613007495e-05, + "loss": 2.7595, + "step": 109200 + }, + { + "epoch": 0.36, + "learning_rate": 3.190091704228197e-05, + "loss": 2.7526, + "step": 109300 + }, + { + "epoch": 0.36, + "learning_rate": 3.1884357954489e-05, + "loss": 2.758, + "step": 109400 + }, + { + "epoch": 0.36, + "learning_rate": 3.186779886669603e-05, + "loss": 2.7494, + "step": 109500 + }, + { + "epoch": 0.36, + "learning_rate": 3.185123977890306e-05, + "loss": 2.7511, + "step": 109600 + }, + { + "epoch": 0.36, + "learning_rate": 3.183468069111009e-05, + "loss": 2.7453, + "step": 109700 + }, + { + "epoch": 0.36, + "learning_rate": 3.1818121603317116e-05, + "loss": 2.7605, + "step": 109800 + }, + { + "epoch": 0.36, + "learning_rate": 3.180156251552415e-05, + "loss": 2.7455, + "step": 109900 + }, + { + "epoch": 0.36, + "learning_rate": 3.178500342773118e-05, + "loss": 2.7523, + "step": 110000 + }, + { + "epoch": 0.36, + "learning_rate": 3.17684443399382e-05, + "loss": 2.7579, + "step": 110100 + }, + { + "epoch": 0.36, + "learning_rate": 3.175188525214523e-05, + "loss": 2.767, + "step": 110200 + }, + { + "epoch": 0.37, + "learning_rate": 3.173532616435226e-05, + "loss": 2.7535, + "step": 110300 + }, + { + "epoch": 0.37, + "learning_rate": 3.1718767076559285e-05, + "loss": 2.7637, + "step": 110400 + }, + { + "epoch": 0.37, + "learning_rate": 3.1702207988766315e-05, + "loss": 2.7577, + "step": 110500 + }, + { + "epoch": 0.37, + "learning_rate": 3.1685648900973345e-05, + "loss": 2.7741, + "step": 110600 + }, + { + "epoch": 0.37, + "learning_rate": 3.1669089813180376e-05, + "loss": 2.7747, + "step": 110700 + }, + { + "epoch": 0.37, + "learning_rate": 3.1652530725387406e-05, + "loss": 2.7647, + "step": 110800 + }, + { + "epoch": 0.37, + "learning_rate": 3.163597163759443e-05, + "loss": 2.7625, + "step": 110900 + }, + { + "epoch": 0.37, + "learning_rate": 3.161941254980146e-05, + "loss": 2.7657, + "step": 111000 + }, + { + "epoch": 0.37, + "learning_rate": 3.160285346200848e-05, + "loss": 2.7449, + "step": 111100 + }, + { + "epoch": 0.37, + "learning_rate": 3.1586294374215514e-05, + "loss": 2.7622, + "step": 111200 + }, + { + "epoch": 0.37, + "learning_rate": 3.1569735286422544e-05, + "loss": 2.7657, + "step": 111300 + }, + { + "epoch": 0.37, + "learning_rate": 3.155317619862957e-05, + "loss": 2.759, + "step": 111400 + }, + { + "epoch": 0.37, + "learning_rate": 3.15366171108366e-05, + "loss": 2.7602, + "step": 111500 + }, + { + "epoch": 0.37, + "learning_rate": 3.152005802304363e-05, + "loss": 2.7635, + "step": 111600 + }, + { + "epoch": 0.37, + "learning_rate": 3.150349893525066e-05, + "loss": 2.7549, + "step": 111700 + }, + { + "epoch": 0.37, + "learning_rate": 3.148693984745769e-05, + "loss": 2.7618, + "step": 111800 + }, + { + "epoch": 0.37, + "learning_rate": 3.147038075966471e-05, + "loss": 2.7703, + "step": 111900 + }, + { + "epoch": 0.37, + "learning_rate": 3.145382167187174e-05, + "loss": 2.7709, + "step": 112000 + }, + { + "epoch": 0.37, + "learning_rate": 3.143726258407877e-05, + "loss": 2.7669, + "step": 112100 + }, + { + "epoch": 0.37, + "learning_rate": 3.1420703496285796e-05, + "loss": 2.7517, + "step": 112200 + }, + { + "epoch": 0.37, + "learning_rate": 3.1404144408492827e-05, + "loss": 2.7328, + "step": 112300 + }, + { + "epoch": 0.37, + "learning_rate": 3.138758532069985e-05, + "loss": 2.7516, + "step": 112400 + }, + { + "epoch": 0.37, + "learning_rate": 3.137102623290688e-05, + "loss": 2.759, + "step": 112500 + }, + { + "epoch": 0.37, + "learning_rate": 3.135446714511391e-05, + "loss": 2.7515, + "step": 112600 + }, + { + "epoch": 0.37, + "learning_rate": 3.133790805732094e-05, + "loss": 2.7585, + "step": 112700 + }, + { + "epoch": 0.37, + "learning_rate": 3.132134896952797e-05, + "loss": 2.7767, + "step": 112800 + }, + { + "epoch": 0.37, + "learning_rate": 3.1304789881734995e-05, + "loss": 2.7467, + "step": 112900 + }, + { + "epoch": 0.37, + "learning_rate": 3.1288230793942025e-05, + "loss": 2.7571, + "step": 113000 + }, + { + "epoch": 0.37, + "learning_rate": 3.1271671706149055e-05, + "loss": 2.7672, + "step": 113100 + }, + { + "epoch": 0.37, + "learning_rate": 3.125511261835608e-05, + "loss": 2.7683, + "step": 113200 + }, + { + "epoch": 0.38, + "learning_rate": 3.123855353056311e-05, + "loss": 2.7675, + "step": 113300 + }, + { + "epoch": 0.38, + "learning_rate": 3.122199444277013e-05, + "loss": 2.7559, + "step": 113400 + }, + { + "epoch": 0.38, + "learning_rate": 3.120543535497716e-05, + "loss": 2.7681, + "step": 113500 + }, + { + "epoch": 0.38, + "learning_rate": 3.11888762671842e-05, + "loss": 2.7606, + "step": 113600 + }, + { + "epoch": 0.38, + "learning_rate": 3.1172317179391224e-05, + "loss": 2.7505, + "step": 113700 + }, + { + "epoch": 0.38, + "learning_rate": 3.1155758091598254e-05, + "loss": 2.7445, + "step": 113800 + }, + { + "epoch": 0.38, + "learning_rate": 3.113919900380528e-05, + "loss": 2.755, + "step": 113900 + }, + { + "epoch": 0.38, + "learning_rate": 3.112263991601231e-05, + "loss": 2.7527, + "step": 114000 + }, + { + "epoch": 0.38, + "learning_rate": 3.110608082821934e-05, + "loss": 2.7576, + "step": 114100 + }, + { + "epoch": 0.38, + "learning_rate": 3.108952174042636e-05, + "loss": 2.7761, + "step": 114200 + }, + { + "epoch": 0.38, + "learning_rate": 3.107296265263339e-05, + "loss": 2.7507, + "step": 114300 + }, + { + "epoch": 0.38, + "learning_rate": 3.105640356484042e-05, + "loss": 2.7524, + "step": 114400 + }, + { + "epoch": 0.38, + "learning_rate": 3.1039844477047446e-05, + "loss": 2.7524, + "step": 114500 + }, + { + "epoch": 0.38, + "learning_rate": 3.102328538925448e-05, + "loss": 2.7679, + "step": 114600 + }, + { + "epoch": 0.38, + "learning_rate": 3.1006726301461506e-05, + "loss": 2.7491, + "step": 114700 + }, + { + "epoch": 0.38, + "learning_rate": 3.0990167213668537e-05, + "loss": 2.766, + "step": 114800 + }, + { + "epoch": 0.38, + "learning_rate": 3.097360812587557e-05, + "loss": 2.7598, + "step": 114900 + }, + { + "epoch": 0.38, + "learning_rate": 3.095704903808259e-05, + "loss": 2.7573, + "step": 115000 + }, + { + "epoch": 0.38, + "learning_rate": 3.094048995028962e-05, + "loss": 2.772, + "step": 115100 + }, + { + "epoch": 0.38, + "learning_rate": 3.0923930862496644e-05, + "loss": 2.7522, + "step": 115200 + }, + { + "epoch": 0.38, + "learning_rate": 3.0907371774703675e-05, + "loss": 2.7459, + "step": 115300 + }, + { + "epoch": 0.38, + "learning_rate": 3.0890812686910705e-05, + "loss": 2.7562, + "step": 115400 + }, + { + "epoch": 0.38, + "learning_rate": 3.087425359911773e-05, + "loss": 2.7545, + "step": 115500 + }, + { + "epoch": 0.38, + "learning_rate": 3.0857694511324765e-05, + "loss": 2.7687, + "step": 115600 + }, + { + "epoch": 0.38, + "learning_rate": 3.084113542353179e-05, + "loss": 2.7686, + "step": 115700 + }, + { + "epoch": 0.38, + "learning_rate": 3.082457633573882e-05, + "loss": 2.7615, + "step": 115800 + }, + { + "epoch": 0.38, + "learning_rate": 3.080801724794585e-05, + "loss": 2.7742, + "step": 115900 + }, + { + "epoch": 0.38, + "learning_rate": 3.079145816015287e-05, + "loss": 2.7593, + "step": 116000 + }, + { + "epoch": 0.38, + "learning_rate": 3.07748990723599e-05, + "loss": 2.7572, + "step": 116100 + }, + { + "epoch": 0.38, + "learning_rate": 3.0758339984566934e-05, + "loss": 2.7566, + "step": 116200 + }, + { + "epoch": 0.39, + "learning_rate": 3.074178089677396e-05, + "loss": 2.7496, + "step": 116300 + }, + { + "epoch": 0.39, + "learning_rate": 3.072522180898099e-05, + "loss": 2.767, + "step": 116400 + }, + { + "epoch": 0.39, + "learning_rate": 3.070866272118801e-05, + "loss": 2.747, + "step": 116500 + }, + { + "epoch": 0.39, + "learning_rate": 3.069210363339505e-05, + "loss": 2.7728, + "step": 116600 + }, + { + "epoch": 0.39, + "learning_rate": 3.067554454560208e-05, + "loss": 2.7727, + "step": 116700 + }, + { + "epoch": 0.39, + "learning_rate": 3.06589854578091e-05, + "loss": 2.749, + "step": 116800 + }, + { + "epoch": 0.39, + "learning_rate": 3.064242637001613e-05, + "loss": 2.7447, + "step": 116900 + }, + { + "epoch": 0.39, + "learning_rate": 3.0625867282223156e-05, + "loss": 2.7553, + "step": 117000 + }, + { + "epoch": 0.39, + "learning_rate": 3.0609308194430186e-05, + "loss": 2.7591, + "step": 117100 + }, + { + "epoch": 0.39, + "learning_rate": 3.0592749106637216e-05, + "loss": 2.7478, + "step": 117200 + }, + { + "epoch": 0.39, + "learning_rate": 3.057619001884424e-05, + "loss": 2.7632, + "step": 117300 + }, + { + "epoch": 0.39, + "learning_rate": 3.055963093105127e-05, + "loss": 2.754, + "step": 117400 + }, + { + "epoch": 0.39, + "learning_rate": 3.05430718432583e-05, + "loss": 2.7479, + "step": 117500 + }, + { + "epoch": 0.39, + "learning_rate": 3.052651275546533e-05, + "loss": 2.7646, + "step": 117600 + }, + { + "epoch": 0.39, + "learning_rate": 3.0509953667672358e-05, + "loss": 2.7489, + "step": 117700 + }, + { + "epoch": 0.39, + "learning_rate": 3.0493394579879388e-05, + "loss": 2.7734, + "step": 117800 + }, + { + "epoch": 0.39, + "learning_rate": 3.0476835492086415e-05, + "loss": 2.7647, + "step": 117900 + }, + { + "epoch": 0.39, + "learning_rate": 3.0460276404293442e-05, + "loss": 2.7644, + "step": 118000 + }, + { + "epoch": 0.39, + "learning_rate": 3.044371731650047e-05, + "loss": 2.7662, + "step": 118100 + }, + { + "epoch": 0.39, + "learning_rate": 3.04271582287075e-05, + "loss": 2.7603, + "step": 118200 + }, + { + "epoch": 0.39, + "learning_rate": 3.0410599140914526e-05, + "loss": 2.7531, + "step": 118300 + }, + { + "epoch": 0.39, + "learning_rate": 3.0394040053121553e-05, + "loss": 2.7671, + "step": 118400 + }, + { + "epoch": 0.39, + "learning_rate": 3.037748096532858e-05, + "loss": 2.7601, + "step": 118500 + }, + { + "epoch": 0.39, + "learning_rate": 3.0360921877535613e-05, + "loss": 2.758, + "step": 118600 + }, + { + "epoch": 0.39, + "learning_rate": 3.034436278974264e-05, + "loss": 2.7721, + "step": 118700 + }, + { + "epoch": 0.39, + "learning_rate": 3.032780370194967e-05, + "loss": 2.7658, + "step": 118800 + }, + { + "epoch": 0.39, + "learning_rate": 3.0311244614156698e-05, + "loss": 2.7738, + "step": 118900 + }, + { + "epoch": 0.39, + "learning_rate": 3.0294685526363724e-05, + "loss": 2.7705, + "step": 119000 + }, + { + "epoch": 0.39, + "learning_rate": 3.027812643857075e-05, + "loss": 2.755, + "step": 119100 + }, + { + "epoch": 0.39, + "learning_rate": 3.026156735077778e-05, + "loss": 2.7609, + "step": 119200 + }, + { + "epoch": 0.4, + "learning_rate": 3.024500826298481e-05, + "loss": 2.7629, + "step": 119300 + }, + { + "epoch": 0.4, + "learning_rate": 3.0228449175191835e-05, + "loss": 2.7596, + "step": 119400 + }, + { + "epoch": 0.4, + "learning_rate": 3.0211890087398866e-05, + "loss": 2.7434, + "step": 119500 + }, + { + "epoch": 0.4, + "learning_rate": 3.0195330999605896e-05, + "loss": 2.7797, + "step": 119600 + }, + { + "epoch": 0.4, + "learning_rate": 3.0178771911812926e-05, + "loss": 2.7545, + "step": 119700 + }, + { + "epoch": 0.4, + "learning_rate": 3.0162212824019953e-05, + "loss": 2.7634, + "step": 119800 + }, + { + "epoch": 0.4, + "learning_rate": 3.014565373622698e-05, + "loss": 2.7587, + "step": 119900 + }, + { + "epoch": 0.4, + "learning_rate": 3.0129094648434007e-05, + "loss": 2.7685, + "step": 120000 + }, + { + "epoch": 0.4, + "learning_rate": 3.0112535560641037e-05, + "loss": 2.7517, + "step": 120100 + }, + { + "epoch": 0.4, + "learning_rate": 3.0095976472848064e-05, + "loss": 2.7565, + "step": 120200 + }, + { + "epoch": 0.4, + "learning_rate": 3.007941738505509e-05, + "loss": 2.7566, + "step": 120300 + }, + { + "epoch": 0.4, + "learning_rate": 3.0062858297262118e-05, + "loss": 2.7513, + "step": 120400 + }, + { + "epoch": 0.4, + "learning_rate": 3.004629920946915e-05, + "loss": 2.7686, + "step": 120500 + }, + { + "epoch": 0.4, + "learning_rate": 3.0029740121676182e-05, + "loss": 2.7608, + "step": 120600 + }, + { + "epoch": 0.4, + "learning_rate": 3.001318103388321e-05, + "loss": 2.7566, + "step": 120700 + }, + { + "epoch": 0.4, + "learning_rate": 2.9996621946090236e-05, + "loss": 2.7598, + "step": 120800 + }, + { + "epoch": 0.4, + "learning_rate": 2.9980062858297263e-05, + "loss": 2.7567, + "step": 120900 + }, + { + "epoch": 0.4, + "learning_rate": 2.9963503770504293e-05, + "loss": 2.7671, + "step": 121000 + }, + { + "epoch": 0.4, + "learning_rate": 2.994694468271132e-05, + "loss": 2.7633, + "step": 121100 + }, + { + "epoch": 0.4, + "learning_rate": 2.9930385594918347e-05, + "loss": 2.7682, + "step": 121200 + }, + { + "epoch": 0.4, + "learning_rate": 2.9913826507125374e-05, + "loss": 2.7629, + "step": 121300 + }, + { + "epoch": 0.4, + "learning_rate": 2.9897267419332404e-05, + "loss": 2.7464, + "step": 121400 + }, + { + "epoch": 0.4, + "learning_rate": 2.988070833153943e-05, + "loss": 2.7524, + "step": 121500 + }, + { + "epoch": 0.4, + "learning_rate": 2.9864149243746465e-05, + "loss": 2.7651, + "step": 121600 + }, + { + "epoch": 0.4, + "learning_rate": 2.9847590155953492e-05, + "loss": 2.7569, + "step": 121700 + }, + { + "epoch": 0.4, + "learning_rate": 2.983103106816052e-05, + "loss": 2.7521, + "step": 121800 + }, + { + "epoch": 0.4, + "learning_rate": 2.981447198036755e-05, + "loss": 2.7674, + "step": 121900 + }, + { + "epoch": 0.4, + "learning_rate": 2.9797912892574576e-05, + "loss": 2.7561, + "step": 122000 + }, + { + "epoch": 0.4, + "learning_rate": 2.9781353804781603e-05, + "loss": 2.7656, + "step": 122100 + }, + { + "epoch": 0.4, + "learning_rate": 2.976479471698863e-05, + "loss": 2.7588, + "step": 122200 + }, + { + "epoch": 0.41, + "learning_rate": 2.974823562919566e-05, + "loss": 2.7578, + "step": 122300 + }, + { + "epoch": 0.41, + "learning_rate": 2.9731676541402687e-05, + "loss": 2.7505, + "step": 122400 + }, + { + "epoch": 0.41, + "learning_rate": 2.9715117453609714e-05, + "loss": 2.769, + "step": 122500 + }, + { + "epoch": 0.41, + "learning_rate": 2.9698558365816747e-05, + "loss": 2.7546, + "step": 122600 + }, + { + "epoch": 0.41, + "learning_rate": 2.9681999278023774e-05, + "loss": 2.7474, + "step": 122700 + }, + { + "epoch": 0.41, + "learning_rate": 2.9665440190230805e-05, + "loss": 2.7522, + "step": 122800 + }, + { + "epoch": 0.41, + "learning_rate": 2.964888110243783e-05, + "loss": 2.7692, + "step": 122900 + }, + { + "epoch": 0.41, + "learning_rate": 2.963232201464486e-05, + "loss": 2.7625, + "step": 123000 + }, + { + "epoch": 0.41, + "learning_rate": 2.9615762926851885e-05, + "loss": 2.7502, + "step": 123100 + }, + { + "epoch": 0.41, + "learning_rate": 2.9599203839058916e-05, + "loss": 2.7606, + "step": 123200 + }, + { + "epoch": 0.41, + "learning_rate": 2.9582644751265943e-05, + "loss": 2.7565, + "step": 123300 + }, + { + "epoch": 0.41, + "learning_rate": 2.956608566347297e-05, + "loss": 2.7703, + "step": 123400 + }, + { + "epoch": 0.41, + "learning_rate": 2.9549526575679996e-05, + "loss": 2.7646, + "step": 123500 + }, + { + "epoch": 0.41, + "learning_rate": 2.953296748788703e-05, + "loss": 2.7451, + "step": 123600 + }, + { + "epoch": 0.41, + "learning_rate": 2.951640840009406e-05, + "loss": 2.7623, + "step": 123700 + }, + { + "epoch": 0.41, + "learning_rate": 2.9499849312301087e-05, + "loss": 2.7494, + "step": 123800 + }, + { + "epoch": 0.41, + "learning_rate": 2.9483290224508114e-05, + "loss": 2.7483, + "step": 123900 + }, + { + "epoch": 0.41, + "learning_rate": 2.946673113671514e-05, + "loss": 2.7581, + "step": 124000 + }, + { + "epoch": 0.41, + "learning_rate": 2.945017204892217e-05, + "loss": 2.7801, + "step": 124100 + }, + { + "epoch": 0.41, + "learning_rate": 2.94336129611292e-05, + "loss": 2.76, + "step": 124200 + }, + { + "epoch": 0.41, + "learning_rate": 2.9417053873336225e-05, + "loss": 2.7638, + "step": 124300 + }, + { + "epoch": 0.41, + "learning_rate": 2.9400494785543252e-05, + "loss": 2.7367, + "step": 124400 + }, + { + "epoch": 0.41, + "learning_rate": 2.9383935697750283e-05, + "loss": 2.7502, + "step": 124500 + }, + { + "epoch": 0.41, + "learning_rate": 2.9367376609957313e-05, + "loss": 2.7542, + "step": 124600 + }, + { + "epoch": 0.41, + "learning_rate": 2.9350817522164343e-05, + "loss": 2.7528, + "step": 124700 + }, + { + "epoch": 0.41, + "learning_rate": 2.933425843437137e-05, + "loss": 2.7583, + "step": 124800 + }, + { + "epoch": 0.41, + "learning_rate": 2.9317699346578397e-05, + "loss": 2.7601, + "step": 124900 + }, + { + "epoch": 0.41, + "learning_rate": 2.9301140258785427e-05, + "loss": 2.7696, + "step": 125000 + }, + { + "epoch": 0.41, + "learning_rate": 2.9284581170992454e-05, + "loss": 2.7524, + "step": 125100 + }, + { + "epoch": 0.41, + "learning_rate": 2.926802208319948e-05, + "loss": 2.7628, + "step": 125200 + }, + { + "epoch": 0.41, + "learning_rate": 2.9251462995406508e-05, + "loss": 2.7501, + "step": 125300 + }, + { + "epoch": 0.42, + "learning_rate": 2.9234903907613538e-05, + "loss": 2.7626, + "step": 125400 + }, + { + "epoch": 0.42, + "learning_rate": 2.9218344819820565e-05, + "loss": 2.7555, + "step": 125500 + }, + { + "epoch": 0.42, + "learning_rate": 2.92017857320276e-05, + "loss": 2.7382, + "step": 125600 + }, + { + "epoch": 0.42, + "learning_rate": 2.9185226644234626e-05, + "loss": 2.7408, + "step": 125700 + }, + { + "epoch": 0.42, + "learning_rate": 2.9168667556441653e-05, + "loss": 2.7702, + "step": 125800 + }, + { + "epoch": 0.42, + "learning_rate": 2.915210846864868e-05, + "loss": 2.744, + "step": 125900 + }, + { + "epoch": 0.42, + "learning_rate": 2.913554938085571e-05, + "loss": 2.7721, + "step": 126000 + }, + { + "epoch": 0.42, + "learning_rate": 2.9118990293062737e-05, + "loss": 2.758, + "step": 126100 + }, + { + "epoch": 0.42, + "learning_rate": 2.9102431205269764e-05, + "loss": 2.758, + "step": 126200 + }, + { + "epoch": 0.42, + "learning_rate": 2.908587211747679e-05, + "loss": 2.7522, + "step": 126300 + }, + { + "epoch": 0.42, + "learning_rate": 2.906931302968382e-05, + "loss": 2.7548, + "step": 126400 + }, + { + "epoch": 0.42, + "learning_rate": 2.9052753941890848e-05, + "loss": 2.7558, + "step": 126500 + }, + { + "epoch": 0.42, + "learning_rate": 2.903619485409788e-05, + "loss": 2.7564, + "step": 126600 + }, + { + "epoch": 0.42, + "learning_rate": 2.901963576630491e-05, + "loss": 2.756, + "step": 126700 + }, + { + "epoch": 0.42, + "learning_rate": 2.9003076678511935e-05, + "loss": 2.7608, + "step": 126800 + }, + { + "epoch": 0.42, + "learning_rate": 2.8986517590718966e-05, + "loss": 2.7604, + "step": 126900 + }, + { + "epoch": 0.42, + "learning_rate": 2.8969958502925993e-05, + "loss": 2.7504, + "step": 127000 + }, + { + "epoch": 0.42, + "learning_rate": 2.895339941513302e-05, + "loss": 2.7661, + "step": 127100 + }, + { + "epoch": 0.42, + "learning_rate": 2.8936840327340046e-05, + "loss": 2.747, + "step": 127200 + }, + { + "epoch": 0.42, + "learning_rate": 2.8920281239547077e-05, + "loss": 2.7703, + "step": 127300 + }, + { + "epoch": 0.42, + "learning_rate": 2.8903722151754104e-05, + "loss": 2.7673, + "step": 127400 + }, + { + "epoch": 0.42, + "learning_rate": 2.888716306396113e-05, + "loss": 2.7584, + "step": 127500 + }, + { + "epoch": 0.42, + "learning_rate": 2.8870603976168164e-05, + "loss": 2.7669, + "step": 127600 + }, + { + "epoch": 0.42, + "learning_rate": 2.885404488837519e-05, + "loss": 2.7593, + "step": 127700 + }, + { + "epoch": 0.42, + "learning_rate": 2.883748580058222e-05, + "loss": 2.7525, + "step": 127800 + }, + { + "epoch": 0.42, + "learning_rate": 2.882092671278925e-05, + "loss": 2.7673, + "step": 127900 + }, + { + "epoch": 0.42, + "learning_rate": 2.8804367624996275e-05, + "loss": 2.7647, + "step": 128000 + }, + { + "epoch": 0.42, + "learning_rate": 2.8787808537203302e-05, + "loss": 2.7812, + "step": 128100 + }, + { + "epoch": 0.42, + "learning_rate": 2.8771249449410332e-05, + "loss": 2.7534, + "step": 128200 + }, + { + "epoch": 0.42, + "learning_rate": 2.875469036161736e-05, + "loss": 2.7551, + "step": 128300 + }, + { + "epoch": 0.43, + "learning_rate": 2.8738131273824386e-05, + "loss": 2.7509, + "step": 128400 + }, + { + "epoch": 0.43, + "learning_rate": 2.8721572186031413e-05, + "loss": 2.7596, + "step": 128500 + }, + { + "epoch": 0.43, + "learning_rate": 2.8705013098238447e-05, + "loss": 2.7566, + "step": 128600 + }, + { + "epoch": 0.43, + "learning_rate": 2.8688454010445477e-05, + "loss": 2.7621, + "step": 128700 + }, + { + "epoch": 0.43, + "learning_rate": 2.8671894922652504e-05, + "loss": 2.7649, + "step": 128800 + }, + { + "epoch": 0.43, + "learning_rate": 2.865533583485953e-05, + "loss": 2.754, + "step": 128900 + }, + { + "epoch": 0.43, + "learning_rate": 2.8638776747066558e-05, + "loss": 2.7649, + "step": 129000 + }, + { + "epoch": 0.43, + "learning_rate": 2.8622217659273588e-05, + "loss": 2.7483, + "step": 129100 + }, + { + "epoch": 0.43, + "learning_rate": 2.8605658571480615e-05, + "loss": 2.7457, + "step": 129200 + }, + { + "epoch": 0.43, + "learning_rate": 2.8589099483687642e-05, + "loss": 2.7721, + "step": 129300 + }, + { + "epoch": 0.43, + "learning_rate": 2.857254039589467e-05, + "loss": 2.7535, + "step": 129400 + }, + { + "epoch": 0.43, + "learning_rate": 2.85559813081017e-05, + "loss": 2.7428, + "step": 129500 + }, + { + "epoch": 0.43, + "learning_rate": 2.8539422220308733e-05, + "loss": 2.7509, + "step": 129600 + }, + { + "epoch": 0.43, + "learning_rate": 2.852286313251576e-05, + "loss": 2.7598, + "step": 129700 + }, + { + "epoch": 0.43, + "learning_rate": 2.8506304044722787e-05, + "loss": 2.76, + "step": 129800 + }, + { + "epoch": 0.43, + "learning_rate": 2.8489744956929814e-05, + "loss": 2.7483, + "step": 129900 + }, + { + "epoch": 0.43, + "learning_rate": 2.8473185869136844e-05, + "loss": 2.7416, + "step": 130000 + }, + { + "epoch": 0.43, + "learning_rate": 2.845662678134387e-05, + "loss": 2.7611, + "step": 130100 + }, + { + "epoch": 0.43, + "learning_rate": 2.8440067693550898e-05, + "loss": 2.7499, + "step": 130200 + }, + { + "epoch": 0.43, + "learning_rate": 2.8423508605757925e-05, + "loss": 2.7544, + "step": 130300 + }, + { + "epoch": 0.43, + "learning_rate": 2.8406949517964955e-05, + "loss": 2.751, + "step": 130400 + }, + { + "epoch": 0.43, + "learning_rate": 2.8390390430171982e-05, + "loss": 2.7685, + "step": 130500 + }, + { + "epoch": 0.43, + "learning_rate": 2.8373831342379016e-05, + "loss": 2.7527, + "step": 130600 + }, + { + "epoch": 0.43, + "learning_rate": 2.8357272254586043e-05, + "loss": 2.738, + "step": 130700 + }, + { + "epoch": 0.43, + "learning_rate": 2.834071316679307e-05, + "loss": 2.7563, + "step": 130800 + }, + { + "epoch": 0.43, + "learning_rate": 2.83241540790001e-05, + "loss": 2.7542, + "step": 130900 + }, + { + "epoch": 0.43, + "learning_rate": 2.8307594991207127e-05, + "loss": 2.7592, + "step": 131000 + }, + { + "epoch": 0.43, + "learning_rate": 2.8291035903414154e-05, + "loss": 2.7531, + "step": 131100 + }, + { + "epoch": 0.43, + "learning_rate": 2.827447681562118e-05, + "loss": 2.7603, + "step": 131200 + }, + { + "epoch": 0.43, + "learning_rate": 2.825791772782821e-05, + "loss": 2.758, + "step": 131300 + }, + { + "epoch": 0.44, + "learning_rate": 2.8241358640035238e-05, + "loss": 2.7593, + "step": 131400 + }, + { + "epoch": 0.44, + "learning_rate": 2.8224799552242265e-05, + "loss": 2.7529, + "step": 131500 + }, + { + "epoch": 0.44, + "learning_rate": 2.8208240464449298e-05, + "loss": 2.7574, + "step": 131600 + }, + { + "epoch": 0.44, + "learning_rate": 2.8191681376656325e-05, + "loss": 2.764, + "step": 131700 + }, + { + "epoch": 0.44, + "learning_rate": 2.8175122288863352e-05, + "loss": 2.7549, + "step": 131800 + }, + { + "epoch": 0.44, + "learning_rate": 2.8158563201070382e-05, + "loss": 2.7575, + "step": 131900 + }, + { + "epoch": 0.44, + "learning_rate": 2.814200411327741e-05, + "loss": 2.7618, + "step": 132000 + }, + { + "epoch": 0.44, + "learning_rate": 2.8125445025484436e-05, + "loss": 2.7562, + "step": 132100 + }, + { + "epoch": 0.44, + "learning_rate": 2.8108885937691467e-05, + "loss": 2.7533, + "step": 132200 + }, + { + "epoch": 0.44, + "learning_rate": 2.8092326849898493e-05, + "loss": 2.7619, + "step": 132300 + }, + { + "epoch": 0.44, + "learning_rate": 2.807576776210552e-05, + "loss": 2.7684, + "step": 132400 + }, + { + "epoch": 0.44, + "learning_rate": 2.8059208674312547e-05, + "loss": 2.7523, + "step": 132500 + }, + { + "epoch": 0.44, + "learning_rate": 2.804264958651958e-05, + "loss": 2.7546, + "step": 132600 + }, + { + "epoch": 0.44, + "learning_rate": 2.8026090498726608e-05, + "loss": 2.7422, + "step": 132700 + }, + { + "epoch": 0.44, + "learning_rate": 2.8009531410933638e-05, + "loss": 2.739, + "step": 132800 + }, + { + "epoch": 0.44, + "learning_rate": 2.7992972323140665e-05, + "loss": 2.7486, + "step": 132900 + }, + { + "epoch": 0.44, + "learning_rate": 2.7976413235347692e-05, + "loss": 2.7702, + "step": 133000 + }, + { + "epoch": 0.44, + "learning_rate": 2.795985414755472e-05, + "loss": 2.757, + "step": 133100 + }, + { + "epoch": 0.44, + "learning_rate": 2.794329505976175e-05, + "loss": 2.753, + "step": 133200 + }, + { + "epoch": 0.44, + "learning_rate": 2.7926735971968776e-05, + "loss": 2.7609, + "step": 133300 + }, + { + "epoch": 0.44, + "learning_rate": 2.7910176884175803e-05, + "loss": 2.7548, + "step": 133400 + }, + { + "epoch": 0.44, + "learning_rate": 2.789361779638283e-05, + "loss": 2.7507, + "step": 133500 + }, + { + "epoch": 0.44, + "learning_rate": 2.7877058708589864e-05, + "loss": 2.7667, + "step": 133600 + }, + { + "epoch": 0.44, + "learning_rate": 2.7860499620796894e-05, + "loss": 2.7481, + "step": 133700 + }, + { + "epoch": 0.44, + "learning_rate": 2.784394053300392e-05, + "loss": 2.7568, + "step": 133800 + }, + { + "epoch": 0.44, + "learning_rate": 2.7827381445210948e-05, + "loss": 2.7523, + "step": 133900 + }, + { + "epoch": 0.44, + "learning_rate": 2.7810822357417975e-05, + "loss": 2.7501, + "step": 134000 + }, + { + "epoch": 0.44, + "learning_rate": 2.7794263269625005e-05, + "loss": 2.7641, + "step": 134100 + }, + { + "epoch": 0.44, + "learning_rate": 2.7777704181832032e-05, + "loss": 2.7577, + "step": 134200 + }, + { + "epoch": 0.44, + "learning_rate": 2.776114509403906e-05, + "loss": 2.7708, + "step": 134300 + }, + { + "epoch": 0.45, + "learning_rate": 2.7744586006246086e-05, + "loss": 2.7554, + "step": 134400 + }, + { + "epoch": 0.45, + "learning_rate": 2.7728026918453116e-05, + "loss": 2.7637, + "step": 134500 + }, + { + "epoch": 0.45, + "learning_rate": 2.771146783066015e-05, + "loss": 2.7588, + "step": 134600 + }, + { + "epoch": 0.45, + "learning_rate": 2.7694908742867177e-05, + "loss": 2.7354, + "step": 134700 + }, + { + "epoch": 0.45, + "learning_rate": 2.7678349655074203e-05, + "loss": 2.7412, + "step": 134800 + }, + { + "epoch": 0.45, + "learning_rate": 2.766179056728123e-05, + "loss": 2.7553, + "step": 134900 + }, + { + "epoch": 0.45, + "learning_rate": 2.764523147948826e-05, + "loss": 2.7737, + "step": 135000 + }, + { + "epoch": 0.45, + "learning_rate": 2.7628672391695288e-05, + "loss": 2.7591, + "step": 135100 + }, + { + "epoch": 0.45, + "learning_rate": 2.7612113303902315e-05, + "loss": 2.7673, + "step": 135200 + }, + { + "epoch": 0.45, + "learning_rate": 2.759555421610934e-05, + "loss": 2.7629, + "step": 135300 + }, + { + "epoch": 0.45, + "learning_rate": 2.7578995128316372e-05, + "loss": 2.7573, + "step": 135400 + }, + { + "epoch": 0.45, + "learning_rate": 2.75624360405234e-05, + "loss": 2.7672, + "step": 135500 + }, + { + "epoch": 0.45, + "learning_rate": 2.7545876952730432e-05, + "loss": 2.7616, + "step": 135600 + }, + { + "epoch": 0.45, + "learning_rate": 2.752931786493746e-05, + "loss": 2.7643, + "step": 135700 + }, + { + "epoch": 0.45, + "learning_rate": 2.7512758777144486e-05, + "loss": 2.7573, + "step": 135800 + }, + { + "epoch": 0.45, + "learning_rate": 2.7496199689351516e-05, + "loss": 2.7671, + "step": 135900 + }, + { + "epoch": 0.45, + "learning_rate": 2.7479640601558543e-05, + "loss": 2.7442, + "step": 136000 + }, + { + "epoch": 0.45, + "learning_rate": 2.746308151376557e-05, + "loss": 2.7525, + "step": 136100 + }, + { + "epoch": 0.45, + "learning_rate": 2.7446522425972597e-05, + "loss": 2.7564, + "step": 136200 + }, + { + "epoch": 0.45, + "learning_rate": 2.7429963338179627e-05, + "loss": 2.7669, + "step": 136300 + }, + { + "epoch": 0.45, + "learning_rate": 2.7413404250386654e-05, + "loss": 2.7549, + "step": 136400 + }, + { + "epoch": 0.45, + "learning_rate": 2.739684516259368e-05, + "loss": 2.7513, + "step": 136500 + }, + { + "epoch": 0.45, + "learning_rate": 2.7380286074800715e-05, + "loss": 2.7709, + "step": 136600 + }, + { + "epoch": 0.45, + "learning_rate": 2.7363726987007742e-05, + "loss": 2.76, + "step": 136700 + }, + { + "epoch": 0.45, + "learning_rate": 2.7347167899214772e-05, + "loss": 2.7617, + "step": 136800 + }, + { + "epoch": 0.45, + "learning_rate": 2.73306088114218e-05, + "loss": 2.7655, + "step": 136900 + }, + { + "epoch": 0.45, + "learning_rate": 2.7314049723628826e-05, + "loss": 2.758, + "step": 137000 + }, + { + "epoch": 0.45, + "learning_rate": 2.7297490635835853e-05, + "loss": 2.7473, + "step": 137100 + }, + { + "epoch": 0.45, + "learning_rate": 2.7280931548042883e-05, + "loss": 2.7639, + "step": 137200 + }, + { + "epoch": 0.45, + "learning_rate": 2.726437246024991e-05, + "loss": 2.7329, + "step": 137300 + }, + { + "epoch": 0.46, + "learning_rate": 2.7247813372456937e-05, + "loss": 2.738, + "step": 137400 + }, + { + "epoch": 0.46, + "learning_rate": 2.7231254284663964e-05, + "loss": 2.7539, + "step": 137500 + }, + { + "epoch": 0.46, + "learning_rate": 2.7214695196870998e-05, + "loss": 2.7639, + "step": 137600 + }, + { + "epoch": 0.46, + "learning_rate": 2.7198136109078025e-05, + "loss": 2.7594, + "step": 137700 + }, + { + "epoch": 0.46, + "learning_rate": 2.7181577021285055e-05, + "loss": 2.7552, + "step": 137800 + }, + { + "epoch": 0.46, + "learning_rate": 2.7165017933492082e-05, + "loss": 2.7571, + "step": 137900 + }, + { + "epoch": 0.46, + "learning_rate": 2.714845884569911e-05, + "loss": 2.7518, + "step": 138000 + }, + { + "epoch": 0.46, + "learning_rate": 2.713189975790614e-05, + "loss": 2.7523, + "step": 138100 + }, + { + "epoch": 0.46, + "learning_rate": 2.7115340670113166e-05, + "loss": 2.7529, + "step": 138200 + }, + { + "epoch": 0.46, + "learning_rate": 2.7098781582320193e-05, + "loss": 2.7553, + "step": 138300 + }, + { + "epoch": 0.46, + "learning_rate": 2.708222249452722e-05, + "loss": 2.7652, + "step": 138400 + }, + { + "epoch": 0.46, + "learning_rate": 2.706566340673425e-05, + "loss": 2.7662, + "step": 138500 + }, + { + "epoch": 0.46, + "learning_rate": 2.704910431894128e-05, + "loss": 2.7495, + "step": 138600 + }, + { + "epoch": 0.46, + "learning_rate": 2.703254523114831e-05, + "loss": 2.7556, + "step": 138700 + }, + { + "epoch": 0.46, + "learning_rate": 2.7015986143355338e-05, + "loss": 2.7361, + "step": 138800 + }, + { + "epoch": 0.46, + "learning_rate": 2.6999427055562364e-05, + "loss": 2.7461, + "step": 138900 + }, + { + "epoch": 0.46, + "learning_rate": 2.698286796776939e-05, + "loss": 2.7653, + "step": 139000 + }, + { + "epoch": 0.46, + "learning_rate": 2.696630887997642e-05, + "loss": 2.7568, + "step": 139100 + }, + { + "epoch": 0.46, + "learning_rate": 2.694974979218345e-05, + "loss": 2.7588, + "step": 139200 + }, + { + "epoch": 0.46, + "learning_rate": 2.6933190704390475e-05, + "loss": 2.758, + "step": 139300 + }, + { + "epoch": 0.46, + "learning_rate": 2.6916631616597506e-05, + "loss": 2.7678, + "step": 139400 + }, + { + "epoch": 0.46, + "learning_rate": 2.6900072528804533e-05, + "loss": 2.7525, + "step": 139500 + }, + { + "epoch": 0.46, + "learning_rate": 2.6883513441011566e-05, + "loss": 2.7535, + "step": 139600 + }, + { + "epoch": 0.46, + "learning_rate": 2.6866954353218593e-05, + "loss": 2.7437, + "step": 139700 + }, + { + "epoch": 0.46, + "learning_rate": 2.685039526542562e-05, + "loss": 2.7405, + "step": 139800 + }, + { + "epoch": 0.46, + "learning_rate": 2.6833836177632647e-05, + "loss": 2.7649, + "step": 139900 + }, + { + "epoch": 0.46, + "learning_rate": 2.6817277089839677e-05, + "loss": 2.7471, + "step": 140000 + }, + { + "epoch": 0.46, + "learning_rate": 2.6800718002046704e-05, + "loss": 2.7658, + "step": 140100 + }, + { + "epoch": 0.46, + "learning_rate": 2.678415891425373e-05, + "loss": 2.7554, + "step": 140200 + }, + { + "epoch": 0.46, + "learning_rate": 2.6767599826460758e-05, + "loss": 2.7472, + "step": 140300 + }, + { + "epoch": 0.46, + "learning_rate": 2.675104073866779e-05, + "loss": 2.7463, + "step": 140400 + }, + { + "epoch": 0.47, + "learning_rate": 2.6734481650874815e-05, + "loss": 2.7655, + "step": 140500 + }, + { + "epoch": 0.47, + "learning_rate": 2.671792256308185e-05, + "loss": 2.754, + "step": 140600 + }, + { + "epoch": 0.47, + "learning_rate": 2.6701363475288876e-05, + "loss": 2.7632, + "step": 140700 + }, + { + "epoch": 0.47, + "learning_rate": 2.6684804387495903e-05, + "loss": 2.7481, + "step": 140800 + }, + { + "epoch": 0.47, + "learning_rate": 2.6668245299702933e-05, + "loss": 2.7548, + "step": 140900 + }, + { + "epoch": 0.47, + "learning_rate": 2.665168621190996e-05, + "loss": 2.7708, + "step": 141000 + }, + { + "epoch": 0.47, + "learning_rate": 2.6635127124116987e-05, + "loss": 2.7772, + "step": 141100 + }, + { + "epoch": 0.47, + "learning_rate": 2.6618568036324014e-05, + "loss": 2.7578, + "step": 141200 + }, + { + "epoch": 0.47, + "learning_rate": 2.6602008948531044e-05, + "loss": 2.7649, + "step": 141300 + }, + { + "epoch": 0.47, + "learning_rate": 2.658544986073807e-05, + "loss": 2.768, + "step": 141400 + }, + { + "epoch": 0.47, + "learning_rate": 2.6568890772945098e-05, + "loss": 2.7488, + "step": 141500 + }, + { + "epoch": 0.47, + "learning_rate": 2.6552331685152132e-05, + "loss": 2.7581, + "step": 141600 + }, + { + "epoch": 0.47, + "learning_rate": 2.653577259735916e-05, + "loss": 2.7477, + "step": 141700 + }, + { + "epoch": 0.47, + "learning_rate": 2.651921350956619e-05, + "loss": 2.7524, + "step": 141800 + }, + { + "epoch": 0.47, + "learning_rate": 2.6502654421773216e-05, + "loss": 2.7611, + "step": 141900 + }, + { + "epoch": 0.47, + "learning_rate": 2.6486095333980243e-05, + "loss": 2.7412, + "step": 142000 + }, + { + "epoch": 0.47, + "learning_rate": 2.646953624618727e-05, + "loss": 2.7678, + "step": 142100 + }, + { + "epoch": 0.47, + "learning_rate": 2.64529771583943e-05, + "loss": 2.775, + "step": 142200 + }, + { + "epoch": 0.47, + "learning_rate": 2.6436418070601327e-05, + "loss": 2.7517, + "step": 142300 + }, + { + "epoch": 0.47, + "learning_rate": 2.6419858982808354e-05, + "loss": 2.7551, + "step": 142400 + }, + { + "epoch": 0.47, + "learning_rate": 2.640329989501538e-05, + "loss": 2.7571, + "step": 142500 + }, + { + "epoch": 0.47, + "learning_rate": 2.6386740807222414e-05, + "loss": 2.7454, + "step": 142600 + }, + { + "epoch": 0.47, + "learning_rate": 2.6370181719429445e-05, + "loss": 2.7501, + "step": 142700 + }, + { + "epoch": 0.47, + "learning_rate": 2.635362263163647e-05, + "loss": 2.7465, + "step": 142800 + }, + { + "epoch": 0.47, + "learning_rate": 2.63370635438435e-05, + "loss": 2.7412, + "step": 142900 + }, + { + "epoch": 0.47, + "learning_rate": 2.6320504456050525e-05, + "loss": 2.745, + "step": 143000 + }, + { + "epoch": 0.47, + "learning_rate": 2.6303945368257556e-05, + "loss": 2.7597, + "step": 143100 + }, + { + "epoch": 0.47, + "learning_rate": 2.6287386280464583e-05, + "loss": 2.746, + "step": 143200 + }, + { + "epoch": 0.47, + "learning_rate": 2.627082719267161e-05, + "loss": 2.7431, + "step": 143300 + }, + { + "epoch": 0.47, + "learning_rate": 2.6254268104878636e-05, + "loss": 2.7406, + "step": 143400 + }, + { + "epoch": 0.48, + "learning_rate": 2.6237709017085667e-05, + "loss": 2.7644, + "step": 143500 + }, + { + "epoch": 0.48, + "learning_rate": 2.62211499292927e-05, + "loss": 2.7502, + "step": 143600 + }, + { + "epoch": 0.48, + "learning_rate": 2.6204590841499727e-05, + "loss": 2.7523, + "step": 143700 + }, + { + "epoch": 0.48, + "learning_rate": 2.6188031753706754e-05, + "loss": 2.7476, + "step": 143800 + }, + { + "epoch": 0.48, + "learning_rate": 2.617147266591378e-05, + "loss": 2.7528, + "step": 143900 + }, + { + "epoch": 0.48, + "learning_rate": 2.615491357812081e-05, + "loss": 2.7566, + "step": 144000 + }, + { + "epoch": 0.48, + "learning_rate": 2.613835449032784e-05, + "loss": 2.756, + "step": 144100 + }, + { + "epoch": 0.48, + "learning_rate": 2.6121795402534865e-05, + "loss": 2.7496, + "step": 144200 + }, + { + "epoch": 0.48, + "learning_rate": 2.6105236314741892e-05, + "loss": 2.7507, + "step": 144300 + }, + { + "epoch": 0.48, + "learning_rate": 2.6088677226948922e-05, + "loss": 2.7564, + "step": 144400 + }, + { + "epoch": 0.48, + "learning_rate": 2.607211813915595e-05, + "loss": 2.76, + "step": 144500 + }, + { + "epoch": 0.48, + "learning_rate": 2.6055559051362983e-05, + "loss": 2.7514, + "step": 144600 + }, + { + "epoch": 0.48, + "learning_rate": 2.603899996357001e-05, + "loss": 2.7562, + "step": 144700 + }, + { + "epoch": 0.48, + "learning_rate": 2.6022440875777037e-05, + "loss": 2.7583, + "step": 144800 + }, + { + "epoch": 0.48, + "learning_rate": 2.6005881787984064e-05, + "loss": 2.7492, + "step": 144900 + }, + { + "epoch": 0.48, + "learning_rate": 2.5989322700191094e-05, + "loss": 2.7662, + "step": 145000 + }, + { + "epoch": 0.48, + "learning_rate": 2.597276361239812e-05, + "loss": 2.7653, + "step": 145100 + }, + { + "epoch": 0.48, + "learning_rate": 2.5956204524605148e-05, + "loss": 2.7465, + "step": 145200 + }, + { + "epoch": 0.48, + "learning_rate": 2.5939645436812178e-05, + "loss": 2.7628, + "step": 145300 + }, + { + "epoch": 0.48, + "learning_rate": 2.5923086349019205e-05, + "loss": 2.7586, + "step": 145400 + }, + { + "epoch": 0.48, + "learning_rate": 2.5906527261226232e-05, + "loss": 2.7376, + "step": 145500 + }, + { + "epoch": 0.48, + "learning_rate": 2.5889968173433266e-05, + "loss": 2.7586, + "step": 145600 + }, + { + "epoch": 0.48, + "learning_rate": 2.5873409085640293e-05, + "loss": 2.7498, + "step": 145700 + }, + { + "epoch": 0.48, + "learning_rate": 2.585684999784732e-05, + "loss": 2.7767, + "step": 145800 + }, + { + "epoch": 0.48, + "learning_rate": 2.584029091005435e-05, + "loss": 2.7533, + "step": 145900 + }, + { + "epoch": 0.48, + "learning_rate": 2.5823731822261377e-05, + "loss": 2.7485, + "step": 146000 + }, + { + "epoch": 0.48, + "learning_rate": 2.5807172734468404e-05, + "loss": 2.769, + "step": 146100 + }, + { + "epoch": 0.48, + "learning_rate": 2.579061364667543e-05, + "loss": 2.7463, + "step": 146200 + }, + { + "epoch": 0.48, + "learning_rate": 2.577405455888246e-05, + "loss": 2.7542, + "step": 146300 + }, + { + "epoch": 0.48, + "learning_rate": 2.5757495471089488e-05, + "loss": 2.7532, + "step": 146400 + }, + { + "epoch": 0.49, + "learning_rate": 2.5740936383296515e-05, + "loss": 2.7563, + "step": 146500 + }, + { + "epoch": 0.49, + "learning_rate": 2.572437729550355e-05, + "loss": 2.7542, + "step": 146600 + }, + { + "epoch": 0.49, + "learning_rate": 2.5707818207710575e-05, + "loss": 2.7628, + "step": 146700 + }, + { + "epoch": 0.49, + "learning_rate": 2.5691259119917606e-05, + "loss": 2.7446, + "step": 146800 + }, + { + "epoch": 0.49, + "learning_rate": 2.5674700032124633e-05, + "loss": 2.749, + "step": 146900 + }, + { + "epoch": 0.49, + "learning_rate": 2.565814094433166e-05, + "loss": 2.7578, + "step": 147000 + }, + { + "epoch": 0.49, + "learning_rate": 2.5641581856538686e-05, + "loss": 2.7656, + "step": 147100 + }, + { + "epoch": 0.49, + "learning_rate": 2.5625022768745717e-05, + "loss": 2.7646, + "step": 147200 + }, + { + "epoch": 0.49, + "learning_rate": 2.5608463680952744e-05, + "loss": 2.731, + "step": 147300 + }, + { + "epoch": 0.49, + "learning_rate": 2.559190459315977e-05, + "loss": 2.7534, + "step": 147400 + }, + { + "epoch": 0.49, + "learning_rate": 2.5575345505366797e-05, + "loss": 2.7461, + "step": 147500 + }, + { + "epoch": 0.49, + "learning_rate": 2.555878641757383e-05, + "loss": 2.7473, + "step": 147600 + }, + { + "epoch": 0.49, + "learning_rate": 2.554222732978086e-05, + "loss": 2.7605, + "step": 147700 + }, + { + "epoch": 0.49, + "learning_rate": 2.552566824198789e-05, + "loss": 2.7654, + "step": 147800 + }, + { + "epoch": 0.49, + "learning_rate": 2.5509109154194915e-05, + "loss": 2.7463, + "step": 147900 + }, + { + "epoch": 0.49, + "learning_rate": 2.5492550066401942e-05, + "loss": 2.7581, + "step": 148000 + }, + { + "epoch": 0.49, + "learning_rate": 2.5475990978608972e-05, + "loss": 2.7486, + "step": 148100 + }, + { + "epoch": 0.49, + "learning_rate": 2.5459431890816e-05, + "loss": 2.7464, + "step": 148200 + }, + { + "epoch": 0.49, + "learning_rate": 2.5442872803023026e-05, + "loss": 2.7435, + "step": 148300 + }, + { + "epoch": 0.49, + "learning_rate": 2.5426313715230053e-05, + "loss": 2.7494, + "step": 148400 + }, + { + "epoch": 0.49, + "learning_rate": 2.5409754627437083e-05, + "loss": 2.7672, + "step": 148500 + }, + { + "epoch": 0.49, + "learning_rate": 2.5393195539644117e-05, + "loss": 2.7532, + "step": 148600 + }, + { + "epoch": 0.49, + "learning_rate": 2.5376636451851144e-05, + "loss": 2.7632, + "step": 148700 + }, + { + "epoch": 0.49, + "learning_rate": 2.536007736405817e-05, + "loss": 2.7478, + "step": 148800 + }, + { + "epoch": 0.49, + "learning_rate": 2.5343518276265198e-05, + "loss": 2.732, + "step": 148900 + }, + { + "epoch": 0.49, + "learning_rate": 2.5326959188472228e-05, + "loss": 2.7473, + "step": 149000 + }, + { + "epoch": 0.49, + "learning_rate": 2.5310400100679255e-05, + "loss": 2.7461, + "step": 149100 + }, + { + "epoch": 0.49, + "learning_rate": 2.5293841012886282e-05, + "loss": 2.7607, + "step": 149200 + }, + { + "epoch": 0.49, + "learning_rate": 2.527728192509331e-05, + "loss": 2.7626, + "step": 149300 + }, + { + "epoch": 0.49, + "learning_rate": 2.526072283730034e-05, + "loss": 2.7515, + "step": 149400 + }, + { + "epoch": 0.5, + "learning_rate": 2.5244163749507366e-05, + "loss": 2.7182, + "step": 149500 + }, + { + "epoch": 0.5, + "learning_rate": 2.52276046617144e-05, + "loss": 2.7523, + "step": 149600 + }, + { + "epoch": 0.5, + "learning_rate": 2.5211045573921427e-05, + "loss": 2.7541, + "step": 149700 + }, + { + "epoch": 0.5, + "learning_rate": 2.5194486486128454e-05, + "loss": 2.7619, + "step": 149800 + }, + { + "epoch": 0.5, + "learning_rate": 2.5177927398335484e-05, + "loss": 2.7523, + "step": 149900 + }, + { + "epoch": 0.5, + "learning_rate": 2.516136831054251e-05, + "loss": 2.7475, + "step": 150000 + }, + { + "epoch": 0.5, + "learning_rate": 2.5144809222749538e-05, + "loss": 2.7466, + "step": 150100 + }, + { + "epoch": 0.5, + "learning_rate": 2.5128250134956565e-05, + "loss": 2.7518, + "step": 150200 + }, + { + "epoch": 0.5, + "learning_rate": 2.5111691047163595e-05, + "loss": 2.7484, + "step": 150300 + }, + { + "epoch": 0.5, + "learning_rate": 2.5095131959370622e-05, + "loss": 2.745, + "step": 150400 + }, + { + "epoch": 0.5, + "learning_rate": 2.507857287157765e-05, + "loss": 2.7493, + "step": 150500 + }, + { + "epoch": 0.5, + "learning_rate": 2.5062013783784682e-05, + "loss": 2.75, + "step": 150600 + }, + { + "epoch": 0.5, + "learning_rate": 2.504545469599171e-05, + "loss": 2.7496, + "step": 150700 + }, + { + "epoch": 0.5, + "learning_rate": 2.502889560819874e-05, + "loss": 2.7602, + "step": 150800 + }, + { + "epoch": 0.5, + "learning_rate": 2.5012336520405767e-05, + "loss": 2.755, + "step": 150900 + }, + { + "epoch": 0.5, + "learning_rate": 2.4995777432612794e-05, + "loss": 2.7668, + "step": 151000 + }, + { + "epoch": 0.5, + "learning_rate": 2.497921834481982e-05, + "loss": 2.7389, + "step": 151100 + }, + { + "epoch": 0.5, + "learning_rate": 2.496265925702685e-05, + "loss": 2.739, + "step": 151200 + }, + { + "epoch": 0.5, + "learning_rate": 2.494610016923388e-05, + "loss": 2.7475, + "step": 151300 + }, + { + "epoch": 0.5, + "learning_rate": 2.4929541081440908e-05, + "loss": 2.7705, + "step": 151400 + }, + { + "epoch": 0.5, + "learning_rate": 2.4912981993647935e-05, + "loss": 2.7449, + "step": 151500 + }, + { + "epoch": 0.5, + "learning_rate": 2.4896422905854962e-05, + "loss": 2.7536, + "step": 151600 + }, + { + "epoch": 0.5, + "learning_rate": 2.4879863818061992e-05, + "loss": 2.7573, + "step": 151700 + }, + { + "epoch": 0.5, + "learning_rate": 2.4863304730269022e-05, + "loss": 2.7434, + "step": 151800 + }, + { + "epoch": 0.5, + "learning_rate": 2.484674564247605e-05, + "loss": 2.7633, + "step": 151900 + }, + { + "epoch": 0.5, + "learning_rate": 2.4830186554683076e-05, + "loss": 2.7484, + "step": 152000 + }, + { + "epoch": 0.5, + "learning_rate": 2.4813627466890103e-05, + "loss": 2.7682, + "step": 152100 + }, + { + "epoch": 0.5, + "learning_rate": 2.4797068379097133e-05, + "loss": 2.7473, + "step": 152200 + }, + { + "epoch": 0.5, + "learning_rate": 2.4780509291304164e-05, + "loss": 2.7521, + "step": 152300 + }, + { + "epoch": 0.5, + "learning_rate": 2.476395020351119e-05, + "loss": 2.756, + "step": 152400 + }, + { + "epoch": 0.51, + "learning_rate": 2.4747391115718218e-05, + "loss": 2.7471, + "step": 152500 + }, + { + "epoch": 0.51, + "learning_rate": 2.4730832027925248e-05, + "loss": 2.7623, + "step": 152600 + }, + { + "epoch": 0.51, + "learning_rate": 2.4714272940132275e-05, + "loss": 2.752, + "step": 152700 + }, + { + "epoch": 0.51, + "learning_rate": 2.4697713852339305e-05, + "loss": 2.7477, + "step": 152800 + }, + { + "epoch": 0.51, + "learning_rate": 2.4681154764546332e-05, + "loss": 2.7503, + "step": 152900 + }, + { + "epoch": 0.51, + "learning_rate": 2.466459567675336e-05, + "loss": 2.7422, + "step": 153000 + }, + { + "epoch": 0.51, + "learning_rate": 2.464803658896039e-05, + "loss": 2.7497, + "step": 153100 + }, + { + "epoch": 0.51, + "learning_rate": 2.4631477501167416e-05, + "loss": 2.7603, + "step": 153200 + }, + { + "epoch": 0.51, + "learning_rate": 2.4614918413374446e-05, + "loss": 2.7386, + "step": 153300 + }, + { + "epoch": 0.51, + "learning_rate": 2.4598359325581473e-05, + "loss": 2.7537, + "step": 153400 + }, + { + "epoch": 0.51, + "learning_rate": 2.4581800237788504e-05, + "loss": 2.7566, + "step": 153500 + }, + { + "epoch": 0.51, + "learning_rate": 2.456524114999553e-05, + "loss": 2.7501, + "step": 153600 + }, + { + "epoch": 0.51, + "learning_rate": 2.4548682062202557e-05, + "loss": 2.7485, + "step": 153700 + }, + { + "epoch": 0.51, + "learning_rate": 2.4532122974409588e-05, + "loss": 2.7623, + "step": 153800 + }, + { + "epoch": 0.51, + "learning_rate": 2.4515563886616615e-05, + "loss": 2.7502, + "step": 153900 + }, + { + "epoch": 0.51, + "learning_rate": 2.4499004798823645e-05, + "loss": 2.7534, + "step": 154000 + }, + { + "epoch": 0.51, + "learning_rate": 2.4482445711030672e-05, + "loss": 2.7535, + "step": 154100 + }, + { + "epoch": 0.51, + "learning_rate": 2.44658866232377e-05, + "loss": 2.7581, + "step": 154200 + }, + { + "epoch": 0.51, + "learning_rate": 2.444932753544473e-05, + "loss": 2.7491, + "step": 154300 + }, + { + "epoch": 0.51, + "learning_rate": 2.443276844765176e-05, + "loss": 2.759, + "step": 154400 + }, + { + "epoch": 0.51, + "learning_rate": 2.4416209359858786e-05, + "loss": 2.7406, + "step": 154500 + }, + { + "epoch": 0.51, + "learning_rate": 2.4399650272065813e-05, + "loss": 2.7256, + "step": 154600 + }, + { + "epoch": 0.51, + "learning_rate": 2.438309118427284e-05, + "loss": 2.7426, + "step": 154700 + }, + { + "epoch": 0.51, + "learning_rate": 2.436653209647987e-05, + "loss": 2.7443, + "step": 154800 + }, + { + "epoch": 0.51, + "learning_rate": 2.43499730086869e-05, + "loss": 2.7427, + "step": 154900 + }, + { + "epoch": 0.51, + "learning_rate": 2.4333413920893928e-05, + "loss": 2.7406, + "step": 155000 + }, + { + "epoch": 0.51, + "learning_rate": 2.4316854833100954e-05, + "loss": 2.7413, + "step": 155100 + }, + { + "epoch": 0.51, + "learning_rate": 2.430029574530798e-05, + "loss": 2.7546, + "step": 155200 + }, + { + "epoch": 0.51, + "learning_rate": 2.4283736657515012e-05, + "loss": 2.7549, + "step": 155300 + }, + { + "epoch": 0.51, + "learning_rate": 2.4267177569722042e-05, + "loss": 2.7449, + "step": 155400 + }, + { + "epoch": 0.51, + "learning_rate": 2.425061848192907e-05, + "loss": 2.7493, + "step": 155500 + }, + { + "epoch": 0.52, + "learning_rate": 2.4234059394136096e-05, + "loss": 2.7444, + "step": 155600 + }, + { + "epoch": 0.52, + "learning_rate": 2.4217500306343123e-05, + "loss": 2.7492, + "step": 155700 + }, + { + "epoch": 0.52, + "learning_rate": 2.4200941218550156e-05, + "loss": 2.7444, + "step": 155800 + }, + { + "epoch": 0.52, + "learning_rate": 2.4184382130757183e-05, + "loss": 2.7415, + "step": 155900 + }, + { + "epoch": 0.52, + "learning_rate": 2.416782304296421e-05, + "loss": 2.7471, + "step": 156000 + }, + { + "epoch": 0.52, + "learning_rate": 2.4151263955171237e-05, + "loss": 2.7594, + "step": 156100 + }, + { + "epoch": 0.52, + "learning_rate": 2.4134704867378267e-05, + "loss": 2.7685, + "step": 156200 + }, + { + "epoch": 0.52, + "learning_rate": 2.4118145779585298e-05, + "loss": 2.7384, + "step": 156300 + }, + { + "epoch": 0.52, + "learning_rate": 2.4101586691792325e-05, + "loss": 2.7496, + "step": 156400 + }, + { + "epoch": 0.52, + "learning_rate": 2.408502760399935e-05, + "loss": 2.7602, + "step": 156500 + }, + { + "epoch": 0.52, + "learning_rate": 2.406846851620638e-05, + "loss": 2.7505, + "step": 156600 + }, + { + "epoch": 0.52, + "learning_rate": 2.405190942841341e-05, + "loss": 2.7545, + "step": 156700 + }, + { + "epoch": 0.52, + "learning_rate": 2.403535034062044e-05, + "loss": 2.732, + "step": 156800 + }, + { + "epoch": 0.52, + "learning_rate": 2.4018791252827466e-05, + "loss": 2.7503, + "step": 156900 + }, + { + "epoch": 0.52, + "learning_rate": 2.4002232165034493e-05, + "loss": 2.7363, + "step": 157000 + }, + { + "epoch": 0.52, + "learning_rate": 2.3985673077241523e-05, + "loss": 2.75, + "step": 157100 + }, + { + "epoch": 0.52, + "learning_rate": 2.396911398944855e-05, + "loss": 2.738, + "step": 157200 + }, + { + "epoch": 0.52, + "learning_rate": 2.395255490165558e-05, + "loss": 2.7571, + "step": 157300 + }, + { + "epoch": 0.52, + "learning_rate": 2.3935995813862607e-05, + "loss": 2.7663, + "step": 157400 + }, + { + "epoch": 0.52, + "learning_rate": 2.3919436726069634e-05, + "loss": 2.7488, + "step": 157500 + }, + { + "epoch": 0.52, + "learning_rate": 2.3902877638276665e-05, + "loss": 2.7504, + "step": 157600 + }, + { + "epoch": 0.52, + "learning_rate": 2.388631855048369e-05, + "loss": 2.7441, + "step": 157700 + }, + { + "epoch": 0.52, + "learning_rate": 2.3869759462690722e-05, + "loss": 2.7415, + "step": 157800 + }, + { + "epoch": 0.52, + "learning_rate": 2.385320037489775e-05, + "loss": 2.7663, + "step": 157900 + }, + { + "epoch": 0.52, + "learning_rate": 2.383664128710478e-05, + "loss": 2.7357, + "step": 158000 + }, + { + "epoch": 0.52, + "learning_rate": 2.3820082199311806e-05, + "loss": 2.736, + "step": 158100 + }, + { + "epoch": 0.52, + "learning_rate": 2.3803523111518833e-05, + "loss": 2.7418, + "step": 158200 + }, + { + "epoch": 0.52, + "learning_rate": 2.3786964023725863e-05, + "loss": 2.7541, + "step": 158300 + }, + { + "epoch": 0.52, + "learning_rate": 2.377040493593289e-05, + "loss": 2.7487, + "step": 158400 + }, + { + "epoch": 0.52, + "learning_rate": 2.375384584813992e-05, + "loss": 2.7572, + "step": 158500 + }, + { + "epoch": 0.53, + "learning_rate": 2.3737286760346947e-05, + "loss": 2.7491, + "step": 158600 + }, + { + "epoch": 0.53, + "learning_rate": 2.3720727672553974e-05, + "loss": 2.7457, + "step": 158700 + }, + { + "epoch": 0.53, + "learning_rate": 2.3704168584761004e-05, + "loss": 2.7538, + "step": 158800 + }, + { + "epoch": 0.53, + "learning_rate": 2.368760949696803e-05, + "loss": 2.7595, + "step": 158900 + }, + { + "epoch": 0.53, + "learning_rate": 2.367105040917506e-05, + "loss": 2.7432, + "step": 159000 + }, + { + "epoch": 0.53, + "learning_rate": 2.365449132138209e-05, + "loss": 2.7392, + "step": 159100 + }, + { + "epoch": 0.53, + "learning_rate": 2.3637932233589115e-05, + "loss": 2.7461, + "step": 159200 + }, + { + "epoch": 0.53, + "learning_rate": 2.3621373145796146e-05, + "loss": 2.759, + "step": 159300 + }, + { + "epoch": 0.53, + "learning_rate": 2.3604814058003176e-05, + "loss": 2.7552, + "step": 159400 + }, + { + "epoch": 0.53, + "learning_rate": 2.3588254970210203e-05, + "loss": 2.756, + "step": 159500 + }, + { + "epoch": 0.53, + "learning_rate": 2.357169588241723e-05, + "loss": 2.7384, + "step": 159600 + }, + { + "epoch": 0.53, + "learning_rate": 2.3555136794624257e-05, + "loss": 2.7421, + "step": 159700 + }, + { + "epoch": 0.53, + "learning_rate": 2.3538577706831287e-05, + "loss": 2.7392, + "step": 159800 + }, + { + "epoch": 0.53, + "learning_rate": 2.3522018619038317e-05, + "loss": 2.7529, + "step": 159900 + }, + { + "epoch": 0.53, + "learning_rate": 2.3505459531245344e-05, + "loss": 2.7453, + "step": 160000 + }, + { + "epoch": 0.53, + "learning_rate": 2.348890044345237e-05, + "loss": 2.7512, + "step": 160100 + }, + { + "epoch": 0.53, + "learning_rate": 2.3472341355659398e-05, + "loss": 2.7456, + "step": 160200 + }, + { + "epoch": 0.53, + "learning_rate": 2.3455782267866432e-05, + "loss": 2.7468, + "step": 160300 + }, + { + "epoch": 0.53, + "learning_rate": 2.343922318007346e-05, + "loss": 2.7498, + "step": 160400 + }, + { + "epoch": 0.53, + "learning_rate": 2.3422664092280486e-05, + "loss": 2.766, + "step": 160500 + }, + { + "epoch": 0.53, + "learning_rate": 2.3406105004487513e-05, + "loss": 2.741, + "step": 160600 + }, + { + "epoch": 0.53, + "learning_rate": 2.3389545916694543e-05, + "loss": 2.7401, + "step": 160700 + }, + { + "epoch": 0.53, + "learning_rate": 2.3372986828901573e-05, + "loss": 2.7291, + "step": 160800 + }, + { + "epoch": 0.53, + "learning_rate": 2.33564277411086e-05, + "loss": 2.7573, + "step": 160900 + }, + { + "epoch": 0.53, + "learning_rate": 2.3339868653315627e-05, + "loss": 2.7449, + "step": 161000 + }, + { + "epoch": 0.53, + "learning_rate": 2.3323309565522654e-05, + "loss": 2.7527, + "step": 161100 + }, + { + "epoch": 0.53, + "learning_rate": 2.3306750477729684e-05, + "loss": 2.7485, + "step": 161200 + }, + { + "epoch": 0.53, + "learning_rate": 2.3290191389936714e-05, + "loss": 2.7476, + "step": 161300 + }, + { + "epoch": 0.53, + "learning_rate": 2.327363230214374e-05, + "loss": 2.7679, + "step": 161400 + }, + { + "epoch": 0.53, + "learning_rate": 2.3257073214350768e-05, + "loss": 2.7428, + "step": 161500 + }, + { + "epoch": 0.54, + "learning_rate": 2.32405141265578e-05, + "loss": 2.7318, + "step": 161600 + }, + { + "epoch": 0.54, + "learning_rate": 2.3223955038764826e-05, + "loss": 2.7465, + "step": 161700 + }, + { + "epoch": 0.54, + "learning_rate": 2.3207395950971856e-05, + "loss": 2.74, + "step": 161800 + }, + { + "epoch": 0.54, + "learning_rate": 2.3190836863178883e-05, + "loss": 2.7525, + "step": 161900 + }, + { + "epoch": 0.54, + "learning_rate": 2.317427777538591e-05, + "loss": 2.7559, + "step": 162000 + }, + { + "epoch": 0.54, + "learning_rate": 2.315771868759294e-05, + "loss": 2.7388, + "step": 162100 + }, + { + "epoch": 0.54, + "learning_rate": 2.3141159599799967e-05, + "loss": 2.752, + "step": 162200 + }, + { + "epoch": 0.54, + "learning_rate": 2.3124600512006997e-05, + "loss": 2.7547, + "step": 162300 + }, + { + "epoch": 0.54, + "learning_rate": 2.3108041424214024e-05, + "loss": 2.7303, + "step": 162400 + }, + { + "epoch": 0.54, + "learning_rate": 2.309148233642105e-05, + "loss": 2.7317, + "step": 162500 + }, + { + "epoch": 0.54, + "learning_rate": 2.307492324862808e-05, + "loss": 2.7413, + "step": 162600 + }, + { + "epoch": 0.54, + "learning_rate": 2.3058364160835108e-05, + "loss": 2.7639, + "step": 162700 + }, + { + "epoch": 0.54, + "learning_rate": 2.304180507304214e-05, + "loss": 2.7692, + "step": 162800 + }, + { + "epoch": 0.54, + "learning_rate": 2.3025245985249165e-05, + "loss": 2.7522, + "step": 162900 + }, + { + "epoch": 0.54, + "learning_rate": 2.3008686897456196e-05, + "loss": 2.7499, + "step": 163000 + }, + { + "epoch": 0.54, + "learning_rate": 2.2992127809663223e-05, + "loss": 2.7452, + "step": 163100 + }, + { + "epoch": 0.54, + "learning_rate": 2.297556872187025e-05, + "loss": 2.7692, + "step": 163200 + }, + { + "epoch": 0.54, + "learning_rate": 2.295900963407728e-05, + "loss": 2.7514, + "step": 163300 + }, + { + "epoch": 0.54, + "learning_rate": 2.2942450546284307e-05, + "loss": 2.7399, + "step": 163400 + }, + { + "epoch": 0.54, + "learning_rate": 2.2925891458491337e-05, + "loss": 2.751, + "step": 163500 + }, + { + "epoch": 0.54, + "learning_rate": 2.2909332370698364e-05, + "loss": 2.7342, + "step": 163600 + }, + { + "epoch": 0.54, + "learning_rate": 2.289277328290539e-05, + "loss": 2.7472, + "step": 163700 + }, + { + "epoch": 0.54, + "learning_rate": 2.287621419511242e-05, + "loss": 2.7626, + "step": 163800 + }, + { + "epoch": 0.54, + "learning_rate": 2.285965510731945e-05, + "loss": 2.7511, + "step": 163900 + }, + { + "epoch": 0.54, + "learning_rate": 2.284309601952648e-05, + "loss": 2.7535, + "step": 164000 + }, + { + "epoch": 0.54, + "learning_rate": 2.2826536931733505e-05, + "loss": 2.7506, + "step": 164100 + }, + { + "epoch": 0.54, + "learning_rate": 2.2809977843940532e-05, + "loss": 2.7679, + "step": 164200 + }, + { + "epoch": 0.54, + "learning_rate": 2.2793418756147562e-05, + "loss": 2.7446, + "step": 164300 + }, + { + "epoch": 0.54, + "learning_rate": 2.2776859668354593e-05, + "loss": 2.7439, + "step": 164400 + }, + { + "epoch": 0.54, + "learning_rate": 2.276030058056162e-05, + "loss": 2.7631, + "step": 164500 + }, + { + "epoch": 0.55, + "learning_rate": 2.2743741492768647e-05, + "loss": 2.7448, + "step": 164600 + }, + { + "epoch": 0.55, + "learning_rate": 2.2727182404975674e-05, + "loss": 2.7571, + "step": 164700 + }, + { + "epoch": 0.55, + "learning_rate": 2.2710623317182704e-05, + "loss": 2.7517, + "step": 164800 + }, + { + "epoch": 0.55, + "learning_rate": 2.2694064229389734e-05, + "loss": 2.7324, + "step": 164900 + }, + { + "epoch": 0.55, + "learning_rate": 2.267750514159676e-05, + "loss": 2.7428, + "step": 165000 + }, + { + "epoch": 0.55, + "learning_rate": 2.2660946053803788e-05, + "loss": 2.7627, + "step": 165100 + }, + { + "epoch": 0.55, + "learning_rate": 2.2644386966010818e-05, + "loss": 2.7404, + "step": 165200 + }, + { + "epoch": 0.55, + "learning_rate": 2.262782787821785e-05, + "loss": 2.7404, + "step": 165300 + }, + { + "epoch": 0.55, + "learning_rate": 2.2611268790424875e-05, + "loss": 2.7552, + "step": 165400 + }, + { + "epoch": 0.55, + "learning_rate": 2.2594709702631902e-05, + "loss": 2.7461, + "step": 165500 + }, + { + "epoch": 0.55, + "learning_rate": 2.257815061483893e-05, + "loss": 2.7497, + "step": 165600 + }, + { + "epoch": 0.55, + "learning_rate": 2.256159152704596e-05, + "loss": 2.75, + "step": 165700 + }, + { + "epoch": 0.55, + "learning_rate": 2.254503243925299e-05, + "loss": 2.7509, + "step": 165800 + }, + { + "epoch": 0.55, + "learning_rate": 2.2528473351460017e-05, + "loss": 2.7442, + "step": 165900 + }, + { + "epoch": 0.55, + "learning_rate": 2.2511914263667044e-05, + "loss": 2.7411, + "step": 166000 + }, + { + "epoch": 0.55, + "learning_rate": 2.249535517587407e-05, + "loss": 2.7603, + "step": 166100 + }, + { + "epoch": 0.55, + "learning_rate": 2.24787960880811e-05, + "loss": 2.7477, + "step": 166200 + }, + { + "epoch": 0.55, + "learning_rate": 2.246223700028813e-05, + "loss": 2.742, + "step": 166300 + }, + { + "epoch": 0.55, + "learning_rate": 2.2445677912495158e-05, + "loss": 2.7428, + "step": 166400 + }, + { + "epoch": 0.55, + "learning_rate": 2.2429118824702185e-05, + "loss": 2.7425, + "step": 166500 + }, + { + "epoch": 0.55, + "learning_rate": 2.2412559736909215e-05, + "loss": 2.7574, + "step": 166600 + }, + { + "epoch": 0.55, + "learning_rate": 2.2396000649116242e-05, + "loss": 2.7359, + "step": 166700 + }, + { + "epoch": 0.55, + "learning_rate": 2.2379441561323273e-05, + "loss": 2.7462, + "step": 166800 + }, + { + "epoch": 0.55, + "learning_rate": 2.23628824735303e-05, + "loss": 2.7471, + "step": 166900 + }, + { + "epoch": 0.55, + "learning_rate": 2.2346323385737326e-05, + "loss": 2.7516, + "step": 167000 + }, + { + "epoch": 0.55, + "learning_rate": 2.2329764297944357e-05, + "loss": 2.7484, + "step": 167100 + }, + { + "epoch": 0.55, + "learning_rate": 2.2313205210151384e-05, + "loss": 2.7419, + "step": 167200 + }, + { + "epoch": 0.55, + "learning_rate": 2.2296646122358414e-05, + "loss": 2.7483, + "step": 167300 + }, + { + "epoch": 0.55, + "learning_rate": 2.228008703456544e-05, + "loss": 2.7379, + "step": 167400 + }, + { + "epoch": 0.55, + "learning_rate": 2.226352794677247e-05, + "loss": 2.7434, + "step": 167500 + }, + { + "epoch": 0.56, + "learning_rate": 2.2246968858979498e-05, + "loss": 2.7443, + "step": 167600 + }, + { + "epoch": 0.56, + "learning_rate": 2.2230409771186525e-05, + "loss": 2.7423, + "step": 167700 + }, + { + "epoch": 0.56, + "learning_rate": 2.2213850683393555e-05, + "loss": 2.7478, + "step": 167800 + }, + { + "epoch": 0.56, + "learning_rate": 2.2197291595600582e-05, + "loss": 2.7371, + "step": 167900 + }, + { + "epoch": 0.56, + "learning_rate": 2.2180732507807612e-05, + "loss": 2.7521, + "step": 168000 + }, + { + "epoch": 0.56, + "learning_rate": 2.216417342001464e-05, + "loss": 2.7481, + "step": 168100 + }, + { + "epoch": 0.56, + "learning_rate": 2.2147614332221666e-05, + "loss": 2.7661, + "step": 168200 + }, + { + "epoch": 0.56, + "learning_rate": 2.2131055244428697e-05, + "loss": 2.7528, + "step": 168300 + }, + { + "epoch": 0.56, + "learning_rate": 2.2114496156635723e-05, + "loss": 2.7454, + "step": 168400 + }, + { + "epoch": 0.56, + "learning_rate": 2.2097937068842754e-05, + "loss": 2.7572, + "step": 168500 + }, + { + "epoch": 0.56, + "learning_rate": 2.208137798104978e-05, + "loss": 2.7555, + "step": 168600 + }, + { + "epoch": 0.56, + "learning_rate": 2.2064818893256808e-05, + "loss": 2.7397, + "step": 168700 + }, + { + "epoch": 0.56, + "learning_rate": 2.2048259805463838e-05, + "loss": 2.7472, + "step": 168800 + }, + { + "epoch": 0.56, + "learning_rate": 2.2031700717670868e-05, + "loss": 2.7525, + "step": 168900 + }, + { + "epoch": 0.56, + "learning_rate": 2.2015141629877895e-05, + "loss": 2.7479, + "step": 169000 + }, + { + "epoch": 0.56, + "learning_rate": 2.1998582542084922e-05, + "loss": 2.7548, + "step": 169100 + }, + { + "epoch": 0.56, + "learning_rate": 2.198202345429195e-05, + "loss": 2.7515, + "step": 169200 + }, + { + "epoch": 0.56, + "learning_rate": 2.196546436649898e-05, + "loss": 2.7598, + "step": 169300 + }, + { + "epoch": 0.56, + "learning_rate": 2.194890527870601e-05, + "loss": 2.7638, + "step": 169400 + }, + { + "epoch": 0.56, + "learning_rate": 2.1932346190913036e-05, + "loss": 2.7554, + "step": 169500 + }, + { + "epoch": 0.56, + "learning_rate": 2.1915787103120063e-05, + "loss": 2.7554, + "step": 169600 + }, + { + "epoch": 0.56, + "learning_rate": 2.189922801532709e-05, + "loss": 2.7186, + "step": 169700 + }, + { + "epoch": 0.56, + "learning_rate": 2.1882668927534124e-05, + "loss": 2.7371, + "step": 169800 + }, + { + "epoch": 0.56, + "learning_rate": 2.186610983974115e-05, + "loss": 2.7606, + "step": 169900 + }, + { + "epoch": 0.56, + "learning_rate": 2.1849550751948178e-05, + "loss": 2.7465, + "step": 170000 + }, + { + "epoch": 0.56, + "learning_rate": 2.1832991664155205e-05, + "loss": 2.7502, + "step": 170100 + }, + { + "epoch": 0.56, + "learning_rate": 2.1816432576362235e-05, + "loss": 2.7428, + "step": 170200 + }, + { + "epoch": 0.56, + "learning_rate": 2.1799873488569265e-05, + "loss": 2.7499, + "step": 170300 + }, + { + "epoch": 0.56, + "learning_rate": 2.1783314400776292e-05, + "loss": 2.7497, + "step": 170400 + }, + { + "epoch": 0.56, + "learning_rate": 2.176675531298332e-05, + "loss": 2.7497, + "step": 170500 + }, + { + "epoch": 0.56, + "learning_rate": 2.1750196225190346e-05, + "loss": 2.7504, + "step": 170600 + }, + { + "epoch": 0.57, + "learning_rate": 2.1733637137397376e-05, + "loss": 2.7567, + "step": 170700 + }, + { + "epoch": 0.57, + "learning_rate": 2.1717078049604407e-05, + "loss": 2.7605, + "step": 170800 + }, + { + "epoch": 0.57, + "learning_rate": 2.1700518961811433e-05, + "loss": 2.7443, + "step": 170900 + }, + { + "epoch": 0.57, + "learning_rate": 2.168395987401846e-05, + "loss": 2.7547, + "step": 171000 + }, + { + "epoch": 0.57, + "learning_rate": 2.166740078622549e-05, + "loss": 2.7432, + "step": 171100 + }, + { + "epoch": 0.57, + "learning_rate": 2.1650841698432518e-05, + "loss": 2.7484, + "step": 171200 + }, + { + "epoch": 0.57, + "learning_rate": 2.1634282610639548e-05, + "loss": 2.7579, + "step": 171300 + }, + { + "epoch": 0.57, + "learning_rate": 2.1617723522846575e-05, + "loss": 2.7375, + "step": 171400 + }, + { + "epoch": 0.57, + "learning_rate": 2.1601164435053602e-05, + "loss": 2.7608, + "step": 171500 + }, + { + "epoch": 0.57, + "learning_rate": 2.1584605347260632e-05, + "loss": 2.7453, + "step": 171600 + }, + { + "epoch": 0.57, + "learning_rate": 2.156804625946766e-05, + "loss": 2.747, + "step": 171700 + }, + { + "epoch": 0.57, + "learning_rate": 2.155148717167469e-05, + "loss": 2.7596, + "step": 171800 + }, + { + "epoch": 0.57, + "learning_rate": 2.1534928083881716e-05, + "loss": 2.7526, + "step": 171900 + }, + { + "epoch": 0.57, + "learning_rate": 2.1518368996088743e-05, + "loss": 2.7525, + "step": 172000 + }, + { + "epoch": 0.57, + "learning_rate": 2.1501809908295773e-05, + "loss": 2.7486, + "step": 172100 + }, + { + "epoch": 0.57, + "learning_rate": 2.14852508205028e-05, + "loss": 2.7473, + "step": 172200 + }, + { + "epoch": 0.57, + "learning_rate": 2.146869173270983e-05, + "loss": 2.7536, + "step": 172300 + }, + { + "epoch": 0.57, + "learning_rate": 2.1452132644916857e-05, + "loss": 2.7546, + "step": 172400 + }, + { + "epoch": 0.57, + "learning_rate": 2.1435573557123888e-05, + "loss": 2.734, + "step": 172500 + }, + { + "epoch": 0.57, + "learning_rate": 2.1419014469330915e-05, + "loss": 2.7437, + "step": 172600 + }, + { + "epoch": 0.57, + "learning_rate": 2.140245538153794e-05, + "loss": 2.7471, + "step": 172700 + }, + { + "epoch": 0.57, + "learning_rate": 2.1385896293744972e-05, + "loss": 2.7469, + "step": 172800 + }, + { + "epoch": 0.57, + "learning_rate": 2.1369337205952e-05, + "loss": 2.759, + "step": 172900 + }, + { + "epoch": 0.57, + "learning_rate": 2.135277811815903e-05, + "loss": 2.756, + "step": 173000 + }, + { + "epoch": 0.57, + "learning_rate": 2.1336219030366056e-05, + "loss": 2.745, + "step": 173100 + }, + { + "epoch": 0.57, + "learning_rate": 2.1319659942573083e-05, + "loss": 2.7487, + "step": 173200 + }, + { + "epoch": 0.57, + "learning_rate": 2.1303100854780113e-05, + "loss": 2.7487, + "step": 173300 + }, + { + "epoch": 0.57, + "learning_rate": 2.1286541766987144e-05, + "loss": 2.7374, + "step": 173400 + }, + { + "epoch": 0.57, + "learning_rate": 2.126998267919417e-05, + "loss": 2.7561, + "step": 173500 + }, + { + "epoch": 0.57, + "learning_rate": 2.1253423591401197e-05, + "loss": 2.752, + "step": 173600 + }, + { + "epoch": 0.58, + "learning_rate": 2.1236864503608224e-05, + "loss": 2.7373, + "step": 173700 + }, + { + "epoch": 0.58, + "learning_rate": 2.1220305415815255e-05, + "loss": 2.7426, + "step": 173800 + }, + { + "epoch": 0.58, + "learning_rate": 2.1203746328022285e-05, + "loss": 2.7453, + "step": 173900 + }, + { + "epoch": 0.58, + "learning_rate": 2.1187187240229312e-05, + "loss": 2.7396, + "step": 174000 + }, + { + "epoch": 0.58, + "learning_rate": 2.117062815243634e-05, + "loss": 2.7657, + "step": 174100 + }, + { + "epoch": 0.58, + "learning_rate": 2.1154069064643366e-05, + "loss": 2.7561, + "step": 174200 + }, + { + "epoch": 0.58, + "learning_rate": 2.1137509976850396e-05, + "loss": 2.7527, + "step": 174300 + }, + { + "epoch": 0.58, + "learning_rate": 2.1120950889057426e-05, + "loss": 2.7513, + "step": 174400 + }, + { + "epoch": 0.58, + "learning_rate": 2.1104391801264453e-05, + "loss": 2.7367, + "step": 174500 + }, + { + "epoch": 0.58, + "learning_rate": 2.108783271347148e-05, + "loss": 2.7505, + "step": 174600 + }, + { + "epoch": 0.58, + "learning_rate": 2.107127362567851e-05, + "loss": 2.7495, + "step": 174700 + }, + { + "epoch": 0.58, + "learning_rate": 2.105471453788554e-05, + "loss": 2.7543, + "step": 174800 + }, + { + "epoch": 0.58, + "learning_rate": 2.1038155450092568e-05, + "loss": 2.7465, + "step": 174900 + }, + { + "epoch": 0.58, + "learning_rate": 2.1021596362299594e-05, + "loss": 2.73, + "step": 175000 + }, + { + "epoch": 0.58, + "learning_rate": 2.100503727450662e-05, + "loss": 2.7408, + "step": 175100 + }, + { + "epoch": 0.58, + "learning_rate": 2.098847818671365e-05, + "loss": 2.7525, + "step": 175200 + }, + { + "epoch": 0.58, + "learning_rate": 2.0971919098920682e-05, + "loss": 2.7428, + "step": 175300 + }, + { + "epoch": 0.58, + "learning_rate": 2.095536001112771e-05, + "loss": 2.7372, + "step": 175400 + }, + { + "epoch": 0.58, + "learning_rate": 2.0938800923334736e-05, + "loss": 2.7413, + "step": 175500 + }, + { + "epoch": 0.58, + "learning_rate": 2.0922241835541763e-05, + "loss": 2.7394, + "step": 175600 + }, + { + "epoch": 0.58, + "learning_rate": 2.0905682747748793e-05, + "loss": 2.7547, + "step": 175700 + }, + { + "epoch": 0.58, + "learning_rate": 2.0889123659955823e-05, + "loss": 2.7519, + "step": 175800 + }, + { + "epoch": 0.58, + "learning_rate": 2.087256457216285e-05, + "loss": 2.7428, + "step": 175900 + }, + { + "epoch": 0.58, + "learning_rate": 2.0856005484369877e-05, + "loss": 2.7628, + "step": 176000 + }, + { + "epoch": 0.58, + "learning_rate": 2.0839446396576907e-05, + "loss": 2.7362, + "step": 176100 + }, + { + "epoch": 0.58, + "learning_rate": 2.0822887308783934e-05, + "loss": 2.742, + "step": 176200 + }, + { + "epoch": 0.58, + "learning_rate": 2.0806328220990965e-05, + "loss": 2.7439, + "step": 176300 + }, + { + "epoch": 0.58, + "learning_rate": 2.078976913319799e-05, + "loss": 2.7554, + "step": 176400 + }, + { + "epoch": 0.58, + "learning_rate": 2.077321004540502e-05, + "loss": 2.7583, + "step": 176500 + }, + { + "epoch": 0.58, + "learning_rate": 2.075665095761205e-05, + "loss": 2.7484, + "step": 176600 + }, + { + "epoch": 0.59, + "learning_rate": 2.0740091869819076e-05, + "loss": 2.7455, + "step": 176700 + }, + { + "epoch": 0.59, + "learning_rate": 2.0723532782026106e-05, + "loss": 2.7394, + "step": 176800 + }, + { + "epoch": 0.59, + "learning_rate": 2.0706973694233133e-05, + "loss": 2.7497, + "step": 176900 + }, + { + "epoch": 0.59, + "learning_rate": 2.0690414606440163e-05, + "loss": 2.7391, + "step": 177000 + }, + { + "epoch": 0.59, + "learning_rate": 2.067385551864719e-05, + "loss": 2.7412, + "step": 177100 + }, + { + "epoch": 0.59, + "learning_rate": 2.0657296430854217e-05, + "loss": 2.7654, + "step": 177200 + }, + { + "epoch": 0.59, + "learning_rate": 2.0640737343061247e-05, + "loss": 2.7475, + "step": 177300 + }, + { + "epoch": 0.59, + "learning_rate": 2.0624178255268274e-05, + "loss": 2.7368, + "step": 177400 + }, + { + "epoch": 0.59, + "learning_rate": 2.0607619167475305e-05, + "loss": 2.7449, + "step": 177500 + }, + { + "epoch": 0.59, + "learning_rate": 2.059106007968233e-05, + "loss": 2.7554, + "step": 177600 + }, + { + "epoch": 0.59, + "learning_rate": 2.057450099188936e-05, + "loss": 2.7463, + "step": 177700 + }, + { + "epoch": 0.59, + "learning_rate": 2.055794190409639e-05, + "loss": 2.7435, + "step": 177800 + }, + { + "epoch": 0.59, + "learning_rate": 2.0541382816303416e-05, + "loss": 2.749, + "step": 177900 + }, + { + "epoch": 0.59, + "learning_rate": 2.0524823728510446e-05, + "loss": 2.7578, + "step": 178000 + }, + { + "epoch": 0.59, + "learning_rate": 2.0508264640717473e-05, + "loss": 2.7403, + "step": 178100 + }, + { + "epoch": 0.59, + "learning_rate": 2.04917055529245e-05, + "loss": 2.7413, + "step": 178200 + }, + { + "epoch": 0.59, + "learning_rate": 2.047514646513153e-05, + "loss": 2.7531, + "step": 178300 + }, + { + "epoch": 0.59, + "learning_rate": 2.045858737733856e-05, + "loss": 2.7382, + "step": 178400 + }, + { + "epoch": 0.59, + "learning_rate": 2.0442028289545587e-05, + "loss": 2.7328, + "step": 178500 + }, + { + "epoch": 0.59, + "learning_rate": 2.0425469201752614e-05, + "loss": 2.7523, + "step": 178600 + }, + { + "epoch": 0.59, + "learning_rate": 2.040891011395964e-05, + "loss": 2.751, + "step": 178700 + }, + { + "epoch": 0.59, + "learning_rate": 2.039235102616667e-05, + "loss": 2.7621, + "step": 178800 + }, + { + "epoch": 0.59, + "learning_rate": 2.03757919383737e-05, + "loss": 2.7409, + "step": 178900 + }, + { + "epoch": 0.59, + "learning_rate": 2.035923285058073e-05, + "loss": 2.7606, + "step": 179000 + }, + { + "epoch": 0.59, + "learning_rate": 2.0342673762787755e-05, + "loss": 2.7597, + "step": 179100 + }, + { + "epoch": 0.59, + "learning_rate": 2.0326114674994782e-05, + "loss": 2.734, + "step": 179200 + }, + { + "epoch": 0.59, + "learning_rate": 2.0309555587201816e-05, + "loss": 2.7502, + "step": 179300 + }, + { + "epoch": 0.59, + "learning_rate": 2.0292996499408843e-05, + "loss": 2.7438, + "step": 179400 + }, + { + "epoch": 0.59, + "learning_rate": 2.027643741161587e-05, + "loss": 2.737, + "step": 179500 + }, + { + "epoch": 0.59, + "learning_rate": 2.0259878323822897e-05, + "loss": 2.7505, + "step": 179600 + }, + { + "epoch": 0.6, + "learning_rate": 2.0243319236029927e-05, + "loss": 2.7519, + "step": 179700 + }, + { + "epoch": 0.6, + "learning_rate": 2.0226760148236957e-05, + "loss": 2.7409, + "step": 179800 + }, + { + "epoch": 0.6, + "learning_rate": 2.0210201060443984e-05, + "loss": 2.7325, + "step": 179900 + }, + { + "epoch": 0.6, + "learning_rate": 2.019364197265101e-05, + "loss": 2.7537, + "step": 180000 + }, + { + "epoch": 0.6, + "learning_rate": 2.0177082884858038e-05, + "loss": 2.7362, + "step": 180100 + }, + { + "epoch": 0.6, + "learning_rate": 2.016052379706507e-05, + "loss": 2.7377, + "step": 180200 + }, + { + "epoch": 0.6, + "learning_rate": 2.01439647092721e-05, + "loss": 2.7443, + "step": 180300 + }, + { + "epoch": 0.6, + "learning_rate": 2.0127405621479126e-05, + "loss": 2.7377, + "step": 180400 + }, + { + "epoch": 0.6, + "learning_rate": 2.0110846533686153e-05, + "loss": 2.7517, + "step": 180500 + }, + { + "epoch": 0.6, + "learning_rate": 2.0094287445893183e-05, + "loss": 2.7443, + "step": 180600 + }, + { + "epoch": 0.6, + "learning_rate": 2.007772835810021e-05, + "loss": 2.7444, + "step": 180700 + }, + { + "epoch": 0.6, + "learning_rate": 2.006116927030724e-05, + "loss": 2.7496, + "step": 180800 + }, + { + "epoch": 0.6, + "learning_rate": 2.0044610182514267e-05, + "loss": 2.7418, + "step": 180900 + }, + { + "epoch": 0.6, + "learning_rate": 2.0028051094721294e-05, + "loss": 2.7402, + "step": 181000 + }, + { + "epoch": 0.6, + "learning_rate": 2.0011492006928324e-05, + "loss": 2.7368, + "step": 181100 + }, + { + "epoch": 0.6, + "learning_rate": 1.999493291913535e-05, + "loss": 2.7474, + "step": 181200 + }, + { + "epoch": 0.6, + "learning_rate": 1.997837383134238e-05, + "loss": 2.7471, + "step": 181300 + }, + { + "epoch": 0.6, + "learning_rate": 1.9961814743549408e-05, + "loss": 2.7449, + "step": 181400 + }, + { + "epoch": 0.6, + "learning_rate": 1.9945255655756435e-05, + "loss": 2.7289, + "step": 181500 + }, + { + "epoch": 0.6, + "learning_rate": 1.9928696567963465e-05, + "loss": 2.7406, + "step": 181600 + }, + { + "epoch": 0.6, + "learning_rate": 1.9912137480170492e-05, + "loss": 2.7669, + "step": 181700 + }, + { + "epoch": 0.6, + "learning_rate": 1.9895578392377523e-05, + "loss": 2.7532, + "step": 181800 + }, + { + "epoch": 0.6, + "learning_rate": 1.987901930458455e-05, + "loss": 2.7445, + "step": 181900 + }, + { + "epoch": 0.6, + "learning_rate": 1.986246021679158e-05, + "loss": 2.7673, + "step": 182000 + }, + { + "epoch": 0.6, + "learning_rate": 1.9845901128998607e-05, + "loss": 2.7517, + "step": 182100 + }, + { + "epoch": 0.6, + "learning_rate": 1.9829342041205634e-05, + "loss": 2.747, + "step": 182200 + }, + { + "epoch": 0.6, + "learning_rate": 1.9812782953412664e-05, + "loss": 2.746, + "step": 182300 + }, + { + "epoch": 0.6, + "learning_rate": 1.979622386561969e-05, + "loss": 2.7475, + "step": 182400 + }, + { + "epoch": 0.6, + "learning_rate": 1.977966477782672e-05, + "loss": 2.7567, + "step": 182500 + }, + { + "epoch": 0.6, + "learning_rate": 1.9763105690033748e-05, + "loss": 2.7468, + "step": 182600 + }, + { + "epoch": 0.61, + "learning_rate": 1.9746546602240775e-05, + "loss": 2.7424, + "step": 182700 + }, + { + "epoch": 0.61, + "learning_rate": 1.9729987514447805e-05, + "loss": 2.7375, + "step": 182800 + }, + { + "epoch": 0.61, + "learning_rate": 1.9713428426654836e-05, + "loss": 2.74, + "step": 182900 + }, + { + "epoch": 0.61, + "learning_rate": 1.9696869338861863e-05, + "loss": 2.7503, + "step": 183000 + }, + { + "epoch": 0.61, + "learning_rate": 1.968031025106889e-05, + "loss": 2.7485, + "step": 183100 + }, + { + "epoch": 0.61, + "learning_rate": 1.9663751163275916e-05, + "loss": 2.7386, + "step": 183200 + }, + { + "epoch": 0.61, + "learning_rate": 1.9647192075482947e-05, + "loss": 2.73, + "step": 183300 + }, + { + "epoch": 0.61, + "learning_rate": 1.9630632987689977e-05, + "loss": 2.7399, + "step": 183400 + }, + { + "epoch": 0.61, + "learning_rate": 1.9614073899897004e-05, + "loss": 2.7399, + "step": 183500 + }, + { + "epoch": 0.61, + "learning_rate": 1.959751481210403e-05, + "loss": 2.7438, + "step": 183600 + }, + { + "epoch": 0.61, + "learning_rate": 1.9580955724311058e-05, + "loss": 2.7527, + "step": 183700 + }, + { + "epoch": 0.61, + "learning_rate": 1.956439663651809e-05, + "loss": 2.7412, + "step": 183800 + }, + { + "epoch": 0.61, + "learning_rate": 1.954783754872512e-05, + "loss": 2.7294, + "step": 183900 + }, + { + "epoch": 0.61, + "learning_rate": 1.9531278460932145e-05, + "loss": 2.7384, + "step": 184000 + }, + { + "epoch": 0.61, + "learning_rate": 1.9514719373139172e-05, + "loss": 2.7353, + "step": 184100 + }, + { + "epoch": 0.61, + "learning_rate": 1.9498160285346202e-05, + "loss": 2.7428, + "step": 184200 + }, + { + "epoch": 0.61, + "learning_rate": 1.9481601197553233e-05, + "loss": 2.7493, + "step": 184300 + }, + { + "epoch": 0.61, + "learning_rate": 1.946504210976026e-05, + "loss": 2.728, + "step": 184400 + }, + { + "epoch": 0.61, + "learning_rate": 1.9448483021967287e-05, + "loss": 2.7371, + "step": 184500 + }, + { + "epoch": 0.61, + "learning_rate": 1.9431923934174313e-05, + "loss": 2.7402, + "step": 184600 + }, + { + "epoch": 0.61, + "learning_rate": 1.9415364846381344e-05, + "loss": 2.7451, + "step": 184700 + }, + { + "epoch": 0.61, + "learning_rate": 1.9398805758588374e-05, + "loss": 2.7383, + "step": 184800 + }, + { + "epoch": 0.61, + "learning_rate": 1.93822466707954e-05, + "loss": 2.7366, + "step": 184900 + }, + { + "epoch": 0.61, + "learning_rate": 1.9365687583002428e-05, + "loss": 2.7473, + "step": 185000 + }, + { + "epoch": 0.61, + "learning_rate": 1.9349128495209455e-05, + "loss": 2.7424, + "step": 185100 + }, + { + "epoch": 0.61, + "learning_rate": 1.9332569407416485e-05, + "loss": 2.7531, + "step": 185200 + }, + { + "epoch": 0.61, + "learning_rate": 1.9316010319623515e-05, + "loss": 2.7323, + "step": 185300 + }, + { + "epoch": 0.61, + "learning_rate": 1.9299451231830542e-05, + "loss": 2.7391, + "step": 185400 + }, + { + "epoch": 0.61, + "learning_rate": 1.928289214403757e-05, + "loss": 2.7523, + "step": 185500 + }, + { + "epoch": 0.61, + "learning_rate": 1.92663330562446e-05, + "loss": 2.7478, + "step": 185600 + }, + { + "epoch": 0.62, + "learning_rate": 1.9249773968451626e-05, + "loss": 2.7492, + "step": 185700 + }, + { + "epoch": 0.62, + "learning_rate": 1.9233214880658657e-05, + "loss": 2.7306, + "step": 185800 + }, + { + "epoch": 0.62, + "learning_rate": 1.9216655792865684e-05, + "loss": 2.7402, + "step": 185900 + }, + { + "epoch": 0.62, + "learning_rate": 1.920009670507271e-05, + "loss": 2.7429, + "step": 186000 + }, + { + "epoch": 0.62, + "learning_rate": 1.918353761727974e-05, + "loss": 2.7388, + "step": 186100 + }, + { + "epoch": 0.62, + "learning_rate": 1.9166978529486768e-05, + "loss": 2.7384, + "step": 186200 + }, + { + "epoch": 0.62, + "learning_rate": 1.9150419441693798e-05, + "loss": 2.7398, + "step": 186300 + }, + { + "epoch": 0.62, + "learning_rate": 1.9133860353900825e-05, + "loss": 2.7571, + "step": 186400 + }, + { + "epoch": 0.62, + "learning_rate": 1.9117301266107855e-05, + "loss": 2.7548, + "step": 186500 + }, + { + "epoch": 0.62, + "learning_rate": 1.9100742178314882e-05, + "loss": 2.7339, + "step": 186600 + }, + { + "epoch": 0.62, + "learning_rate": 1.908418309052191e-05, + "loss": 2.7359, + "step": 186700 + }, + { + "epoch": 0.62, + "learning_rate": 1.906762400272894e-05, + "loss": 2.7395, + "step": 186800 + }, + { + "epoch": 0.62, + "learning_rate": 1.9051064914935966e-05, + "loss": 2.7336, + "step": 186900 + }, + { + "epoch": 0.62, + "learning_rate": 1.9034505827142997e-05, + "loss": 2.7336, + "step": 187000 + }, + { + "epoch": 0.62, + "learning_rate": 1.9017946739350024e-05, + "loss": 2.7447, + "step": 187100 + }, + { + "epoch": 0.62, + "learning_rate": 1.900138765155705e-05, + "loss": 2.7439, + "step": 187200 + }, + { + "epoch": 0.62, + "learning_rate": 1.898482856376408e-05, + "loss": 2.743, + "step": 187300 + }, + { + "epoch": 0.62, + "learning_rate": 1.896826947597111e-05, + "loss": 2.7444, + "step": 187400 + }, + { + "epoch": 0.62, + "learning_rate": 1.8951710388178138e-05, + "loss": 2.7332, + "step": 187500 + }, + { + "epoch": 0.62, + "learning_rate": 1.8935151300385165e-05, + "loss": 2.7369, + "step": 187600 + }, + { + "epoch": 0.62, + "learning_rate": 1.8918592212592192e-05, + "loss": 2.7413, + "step": 187700 + }, + { + "epoch": 0.62, + "learning_rate": 1.8902033124799222e-05, + "loss": 2.7716, + "step": 187800 + }, + { + "epoch": 0.62, + "learning_rate": 1.8885474037006252e-05, + "loss": 2.7431, + "step": 187900 + }, + { + "epoch": 0.62, + "learning_rate": 1.886891494921328e-05, + "loss": 2.7496, + "step": 188000 + }, + { + "epoch": 0.62, + "learning_rate": 1.8852355861420306e-05, + "loss": 2.7485, + "step": 188100 + }, + { + "epoch": 0.62, + "learning_rate": 1.8835796773627333e-05, + "loss": 2.7476, + "step": 188200 + }, + { + "epoch": 0.62, + "learning_rate": 1.8819237685834363e-05, + "loss": 2.7482, + "step": 188300 + }, + { + "epoch": 0.62, + "learning_rate": 1.8802678598041394e-05, + "loss": 2.7405, + "step": 188400 + }, + { + "epoch": 0.62, + "learning_rate": 1.878611951024842e-05, + "loss": 2.7468, + "step": 188500 + }, + { + "epoch": 0.62, + "learning_rate": 1.8769560422455448e-05, + "loss": 2.7531, + "step": 188600 + }, + { + "epoch": 0.62, + "learning_rate": 1.8753001334662474e-05, + "loss": 2.7557, + "step": 188700 + }, + { + "epoch": 0.63, + "learning_rate": 1.8736442246869508e-05, + "loss": 2.7441, + "step": 188800 + }, + { + "epoch": 0.63, + "learning_rate": 1.8719883159076535e-05, + "loss": 2.7522, + "step": 188900 + }, + { + "epoch": 0.63, + "learning_rate": 1.8703324071283562e-05, + "loss": 2.744, + "step": 189000 + }, + { + "epoch": 0.63, + "learning_rate": 1.868676498349059e-05, + "loss": 2.7527, + "step": 189100 + }, + { + "epoch": 0.63, + "learning_rate": 1.867020589569762e-05, + "loss": 2.7664, + "step": 189200 + }, + { + "epoch": 0.63, + "learning_rate": 1.865364680790465e-05, + "loss": 2.7386, + "step": 189300 + }, + { + "epoch": 0.63, + "learning_rate": 1.8637087720111676e-05, + "loss": 2.7297, + "step": 189400 + }, + { + "epoch": 0.63, + "learning_rate": 1.8620528632318703e-05, + "loss": 2.7398, + "step": 189500 + }, + { + "epoch": 0.63, + "learning_rate": 1.860396954452573e-05, + "loss": 2.7605, + "step": 189600 + }, + { + "epoch": 0.63, + "learning_rate": 1.858741045673276e-05, + "loss": 2.749, + "step": 189700 + }, + { + "epoch": 0.63, + "learning_rate": 1.857085136893979e-05, + "loss": 2.7359, + "step": 189800 + }, + { + "epoch": 0.63, + "learning_rate": 1.8554292281146818e-05, + "loss": 2.737, + "step": 189900 + }, + { + "epoch": 0.63, + "learning_rate": 1.8537733193353845e-05, + "loss": 2.7476, + "step": 190000 + }, + { + "epoch": 0.63, + "learning_rate": 1.8521174105560875e-05, + "loss": 2.7243, + "step": 190100 + }, + { + "epoch": 0.63, + "learning_rate": 1.8504615017767902e-05, + "loss": 2.74, + "step": 190200 + }, + { + "epoch": 0.63, + "learning_rate": 1.8488055929974932e-05, + "loss": 2.7522, + "step": 190300 + }, + { + "epoch": 0.63, + "learning_rate": 1.847149684218196e-05, + "loss": 2.7543, + "step": 190400 + }, + { + "epoch": 0.63, + "learning_rate": 1.8454937754388986e-05, + "loss": 2.7374, + "step": 190500 + }, + { + "epoch": 0.63, + "learning_rate": 1.8438378666596016e-05, + "loss": 2.7397, + "step": 190600 + }, + { + "epoch": 0.63, + "learning_rate": 1.8421819578803043e-05, + "loss": 2.7264, + "step": 190700 + }, + { + "epoch": 0.63, + "learning_rate": 1.8405260491010073e-05, + "loss": 2.7341, + "step": 190800 + }, + { + "epoch": 0.63, + "learning_rate": 1.83887014032171e-05, + "loss": 2.7382, + "step": 190900 + }, + { + "epoch": 0.63, + "learning_rate": 1.837214231542413e-05, + "loss": 2.7346, + "step": 191000 + }, + { + "epoch": 0.63, + "learning_rate": 1.8355583227631158e-05, + "loss": 2.7357, + "step": 191100 + }, + { + "epoch": 0.63, + "learning_rate": 1.8339024139838185e-05, + "loss": 2.7492, + "step": 191200 + }, + { + "epoch": 0.63, + "learning_rate": 1.8322465052045215e-05, + "loss": 2.7492, + "step": 191300 + }, + { + "epoch": 0.63, + "learning_rate": 1.8305905964252242e-05, + "loss": 2.7432, + "step": 191400 + }, + { + "epoch": 0.63, + "learning_rate": 1.8289346876459272e-05, + "loss": 2.7422, + "step": 191500 + }, + { + "epoch": 0.63, + "learning_rate": 1.82727877886663e-05, + "loss": 2.7427, + "step": 191600 + }, + { + "epoch": 0.63, + "learning_rate": 1.8256228700873326e-05, + "loss": 2.7504, + "step": 191700 + }, + { + "epoch": 0.64, + "learning_rate": 1.8239669613080356e-05, + "loss": 2.7348, + "step": 191800 + }, + { + "epoch": 0.64, + "learning_rate": 1.8223110525287383e-05, + "loss": 2.7355, + "step": 191900 + }, + { + "epoch": 0.64, + "learning_rate": 1.8206551437494413e-05, + "loss": 2.7447, + "step": 192000 + }, + { + "epoch": 0.64, + "learning_rate": 1.818999234970144e-05, + "loss": 2.7517, + "step": 192100 + }, + { + "epoch": 0.64, + "learning_rate": 1.8173433261908467e-05, + "loss": 2.7496, + "step": 192200 + }, + { + "epoch": 0.64, + "learning_rate": 1.8156874174115497e-05, + "loss": 2.7394, + "step": 192300 + }, + { + "epoch": 0.64, + "learning_rate": 1.8140315086322528e-05, + "loss": 2.7283, + "step": 192400 + }, + { + "epoch": 0.64, + "learning_rate": 1.8123755998529555e-05, + "loss": 2.7389, + "step": 192500 + }, + { + "epoch": 0.64, + "learning_rate": 1.810719691073658e-05, + "loss": 2.7368, + "step": 192600 + }, + { + "epoch": 0.64, + "learning_rate": 1.809063782294361e-05, + "loss": 2.7407, + "step": 192700 + }, + { + "epoch": 0.64, + "learning_rate": 1.807407873515064e-05, + "loss": 2.7288, + "step": 192800 + }, + { + "epoch": 0.64, + "learning_rate": 1.805751964735767e-05, + "loss": 2.7248, + "step": 192900 + }, + { + "epoch": 0.64, + "learning_rate": 1.8040960559564696e-05, + "loss": 2.7259, + "step": 193000 + }, + { + "epoch": 0.64, + "learning_rate": 1.8024401471771723e-05, + "loss": 2.7475, + "step": 193100 + }, + { + "epoch": 0.64, + "learning_rate": 1.800784238397875e-05, + "loss": 2.7297, + "step": 193200 + }, + { + "epoch": 0.64, + "learning_rate": 1.7991283296185784e-05, + "loss": 2.7498, + "step": 193300 + }, + { + "epoch": 0.64, + "learning_rate": 1.797472420839281e-05, + "loss": 2.7517, + "step": 193400 + }, + { + "epoch": 0.64, + "learning_rate": 1.7958165120599837e-05, + "loss": 2.7365, + "step": 193500 + }, + { + "epoch": 0.64, + "learning_rate": 1.7941606032806864e-05, + "loss": 2.7297, + "step": 193600 + }, + { + "epoch": 0.64, + "learning_rate": 1.7925046945013895e-05, + "loss": 2.7415, + "step": 193700 + }, + { + "epoch": 0.64, + "learning_rate": 1.7908487857220925e-05, + "loss": 2.7242, + "step": 193800 + }, + { + "epoch": 0.64, + "learning_rate": 1.7891928769427952e-05, + "loss": 2.7425, + "step": 193900 + }, + { + "epoch": 0.64, + "learning_rate": 1.787536968163498e-05, + "loss": 2.7562, + "step": 194000 + }, + { + "epoch": 0.64, + "learning_rate": 1.7858810593842006e-05, + "loss": 2.727, + "step": 194100 + }, + { + "epoch": 0.64, + "learning_rate": 1.7842251506049036e-05, + "loss": 2.7497, + "step": 194200 + }, + { + "epoch": 0.64, + "learning_rate": 1.7825692418256066e-05, + "loss": 2.7484, + "step": 194300 + }, + { + "epoch": 0.64, + "learning_rate": 1.7809133330463093e-05, + "loss": 2.7343, + "step": 194400 + }, + { + "epoch": 0.64, + "learning_rate": 1.779257424267012e-05, + "loss": 2.7723, + "step": 194500 + }, + { + "epoch": 0.64, + "learning_rate": 1.777601515487715e-05, + "loss": 2.7399, + "step": 194600 + }, + { + "epoch": 0.64, + "learning_rate": 1.7759456067084177e-05, + "loss": 2.7376, + "step": 194700 + }, + { + "epoch": 0.65, + "learning_rate": 1.7742896979291208e-05, + "loss": 2.7485, + "step": 194800 + }, + { + "epoch": 0.65, + "learning_rate": 1.7726337891498234e-05, + "loss": 2.7326, + "step": 194900 + }, + { + "epoch": 0.65, + "learning_rate": 1.770977880370526e-05, + "loss": 2.7403, + "step": 195000 + }, + { + "epoch": 0.65, + "learning_rate": 1.769321971591229e-05, + "loss": 2.7306, + "step": 195100 + }, + { + "epoch": 0.65, + "learning_rate": 1.767666062811932e-05, + "loss": 2.7347, + "step": 195200 + }, + { + "epoch": 0.65, + "learning_rate": 1.766010154032635e-05, + "loss": 2.7404, + "step": 195300 + }, + { + "epoch": 0.65, + "learning_rate": 1.7643542452533376e-05, + "loss": 2.7213, + "step": 195400 + }, + { + "epoch": 0.65, + "learning_rate": 1.7626983364740403e-05, + "loss": 2.7374, + "step": 195500 + }, + { + "epoch": 0.65, + "learning_rate": 1.7610424276947433e-05, + "loss": 2.7227, + "step": 195600 + }, + { + "epoch": 0.65, + "learning_rate": 1.759386518915446e-05, + "loss": 2.7226, + "step": 195700 + }, + { + "epoch": 0.65, + "learning_rate": 1.757730610136149e-05, + "loss": 2.7351, + "step": 195800 + }, + { + "epoch": 0.65, + "learning_rate": 1.7560747013568517e-05, + "loss": 2.7491, + "step": 195900 + }, + { + "epoch": 0.65, + "learning_rate": 1.7544187925775547e-05, + "loss": 2.7374, + "step": 196000 + }, + { + "epoch": 0.65, + "learning_rate": 1.7527628837982574e-05, + "loss": 2.7398, + "step": 196100 + }, + { + "epoch": 0.65, + "learning_rate": 1.75110697501896e-05, + "loss": 2.7369, + "step": 196200 + }, + { + "epoch": 0.65, + "learning_rate": 1.749451066239663e-05, + "loss": 2.7385, + "step": 196300 + }, + { + "epoch": 0.65, + "learning_rate": 1.747795157460366e-05, + "loss": 2.7371, + "step": 196400 + }, + { + "epoch": 0.65, + "learning_rate": 1.746139248681069e-05, + "loss": 2.7375, + "step": 196500 + }, + { + "epoch": 0.65, + "learning_rate": 1.7444833399017716e-05, + "loss": 2.7333, + "step": 196600 + }, + { + "epoch": 0.65, + "learning_rate": 1.7428274311224743e-05, + "loss": 2.7297, + "step": 196700 + }, + { + "epoch": 0.65, + "learning_rate": 1.7411715223431773e-05, + "loss": 2.7652, + "step": 196800 + }, + { + "epoch": 0.65, + "learning_rate": 1.7395156135638803e-05, + "loss": 2.7275, + "step": 196900 + }, + { + "epoch": 0.65, + "learning_rate": 1.737859704784583e-05, + "loss": 2.7314, + "step": 197000 + }, + { + "epoch": 0.65, + "learning_rate": 1.7362037960052857e-05, + "loss": 2.7405, + "step": 197100 + }, + { + "epoch": 0.65, + "learning_rate": 1.7345478872259884e-05, + "loss": 2.7291, + "step": 197200 + }, + { + "epoch": 0.65, + "learning_rate": 1.7328919784466914e-05, + "loss": 2.7445, + "step": 197300 + }, + { + "epoch": 0.65, + "learning_rate": 1.7312360696673944e-05, + "loss": 2.7509, + "step": 197400 + }, + { + "epoch": 0.65, + "learning_rate": 1.729580160888097e-05, + "loss": 2.7429, + "step": 197500 + }, + { + "epoch": 0.65, + "learning_rate": 1.7279242521088e-05, + "loss": 2.7431, + "step": 197600 + }, + { + "epoch": 0.65, + "learning_rate": 1.7262683433295025e-05, + "loss": 2.7473, + "step": 197700 + }, + { + "epoch": 0.66, + "learning_rate": 1.7246124345502056e-05, + "loss": 2.7478, + "step": 197800 + }, + { + "epoch": 0.66, + "learning_rate": 1.7229565257709086e-05, + "loss": 2.7408, + "step": 197900 + }, + { + "epoch": 0.66, + "learning_rate": 1.7213006169916113e-05, + "loss": 2.7434, + "step": 198000 + }, + { + "epoch": 0.66, + "learning_rate": 1.719644708212314e-05, + "loss": 2.7364, + "step": 198100 + }, + { + "epoch": 0.66, + "learning_rate": 1.717988799433017e-05, + "loss": 2.7433, + "step": 198200 + }, + { + "epoch": 0.66, + "learning_rate": 1.71633289065372e-05, + "loss": 2.7405, + "step": 198300 + }, + { + "epoch": 0.66, + "learning_rate": 1.7146769818744227e-05, + "loss": 2.7361, + "step": 198400 + }, + { + "epoch": 0.66, + "learning_rate": 1.7130210730951254e-05, + "loss": 2.7447, + "step": 198500 + }, + { + "epoch": 0.66, + "learning_rate": 1.711365164315828e-05, + "loss": 2.7294, + "step": 198600 + }, + { + "epoch": 0.66, + "learning_rate": 1.709709255536531e-05, + "loss": 2.731, + "step": 198700 + }, + { + "epoch": 0.66, + "learning_rate": 1.708053346757234e-05, + "loss": 2.7371, + "step": 198800 + }, + { + "epoch": 0.66, + "learning_rate": 1.706397437977937e-05, + "loss": 2.7419, + "step": 198900 + }, + { + "epoch": 0.66, + "learning_rate": 1.7047415291986395e-05, + "loss": 2.749, + "step": 199000 + }, + { + "epoch": 0.66, + "learning_rate": 1.7030856204193422e-05, + "loss": 2.7477, + "step": 199100 + }, + { + "epoch": 0.66, + "learning_rate": 1.7014297116400453e-05, + "loss": 2.7424, + "step": 199200 + }, + { + "epoch": 0.66, + "learning_rate": 1.6997738028607483e-05, + "loss": 2.7354, + "step": 199300 + }, + { + "epoch": 0.66, + "learning_rate": 1.698117894081451e-05, + "loss": 2.7491, + "step": 199400 + }, + { + "epoch": 0.66, + "learning_rate": 1.6964619853021537e-05, + "loss": 2.751, + "step": 199500 + }, + { + "epoch": 0.66, + "learning_rate": 1.6948060765228567e-05, + "loss": 2.7344, + "step": 199600 + }, + { + "epoch": 0.66, + "learning_rate": 1.6931501677435594e-05, + "loss": 2.7399, + "step": 199700 + }, + { + "epoch": 0.66, + "learning_rate": 1.6914942589642624e-05, + "loss": 2.7261, + "step": 199800 + }, + { + "epoch": 0.66, + "learning_rate": 1.689838350184965e-05, + "loss": 2.7282, + "step": 199900 + }, + { + "epoch": 0.66, + "learning_rate": 1.6881824414056678e-05, + "loss": 2.7567, + "step": 200000 + }, + { + "epoch": 0.66, + "learning_rate": 1.686526532626371e-05, + "loss": 2.7304, + "step": 200100 + }, + { + "epoch": 0.66, + "learning_rate": 1.6848706238470735e-05, + "loss": 2.7329, + "step": 200200 + }, + { + "epoch": 0.66, + "learning_rate": 1.6832147150677766e-05, + "loss": 2.7499, + "step": 200300 + }, + { + "epoch": 0.66, + "learning_rate": 1.6815588062884792e-05, + "loss": 2.7355, + "step": 200400 + }, + { + "epoch": 0.66, + "learning_rate": 1.6799028975091823e-05, + "loss": 2.7495, + "step": 200500 + }, + { + "epoch": 0.66, + "learning_rate": 1.678246988729885e-05, + "loss": 2.7438, + "step": 200600 + }, + { + "epoch": 0.66, + "learning_rate": 1.6765910799505877e-05, + "loss": 2.7387, + "step": 200700 + }, + { + "epoch": 0.67, + "learning_rate": 1.6749351711712907e-05, + "loss": 2.7485, + "step": 200800 + }, + { + "epoch": 0.67, + "learning_rate": 1.6732792623919934e-05, + "loss": 2.7428, + "step": 200900 + }, + { + "epoch": 0.67, + "learning_rate": 1.6716233536126964e-05, + "loss": 2.7364, + "step": 201000 + }, + { + "epoch": 0.67, + "learning_rate": 1.669967444833399e-05, + "loss": 2.7413, + "step": 201100 + }, + { + "epoch": 0.67, + "learning_rate": 1.6683115360541018e-05, + "loss": 2.7507, + "step": 201200 + }, + { + "epoch": 0.67, + "learning_rate": 1.6666556272748048e-05, + "loss": 2.713, + "step": 201300 + }, + { + "epoch": 0.67, + "learning_rate": 1.6649997184955075e-05, + "loss": 2.7451, + "step": 201400 + }, + { + "epoch": 0.67, + "learning_rate": 1.6633438097162105e-05, + "loss": 2.7487, + "step": 201500 + }, + { + "epoch": 0.67, + "learning_rate": 1.6616879009369132e-05, + "loss": 2.7377, + "step": 201600 + }, + { + "epoch": 0.67, + "learning_rate": 1.660031992157616e-05, + "loss": 2.7419, + "step": 201700 + }, + { + "epoch": 0.67, + "learning_rate": 1.658376083378319e-05, + "loss": 2.7301, + "step": 201800 + }, + { + "epoch": 0.67, + "learning_rate": 1.656720174599022e-05, + "loss": 2.7458, + "step": 201900 + }, + { + "epoch": 0.67, + "learning_rate": 1.6550642658197247e-05, + "loss": 2.7329, + "step": 202000 + }, + { + "epoch": 0.67, + "learning_rate": 1.6534083570404274e-05, + "loss": 2.7547, + "step": 202100 + }, + { + "epoch": 0.67, + "learning_rate": 1.65175244826113e-05, + "loss": 2.7139, + "step": 202200 + }, + { + "epoch": 0.67, + "learning_rate": 1.650096539481833e-05, + "loss": 2.7398, + "step": 202300 + }, + { + "epoch": 0.67, + "learning_rate": 1.648440630702536e-05, + "loss": 2.7443, + "step": 202400 + }, + { + "epoch": 0.67, + "learning_rate": 1.6467847219232388e-05, + "loss": 2.7437, + "step": 202500 + }, + { + "epoch": 0.67, + "learning_rate": 1.6451288131439415e-05, + "loss": 2.7371, + "step": 202600 + }, + { + "epoch": 0.67, + "learning_rate": 1.6434729043646442e-05, + "loss": 2.7587, + "step": 202700 + }, + { + "epoch": 0.67, + "learning_rate": 1.6418169955853476e-05, + "loss": 2.7479, + "step": 202800 + }, + { + "epoch": 0.67, + "learning_rate": 1.6401610868060503e-05, + "loss": 2.738, + "step": 202900 + }, + { + "epoch": 0.67, + "learning_rate": 1.638505178026753e-05, + "loss": 2.7384, + "step": 203000 + }, + { + "epoch": 0.67, + "learning_rate": 1.6368492692474556e-05, + "loss": 2.7542, + "step": 203100 + }, + { + "epoch": 0.67, + "learning_rate": 1.6351933604681587e-05, + "loss": 2.7438, + "step": 203200 + }, + { + "epoch": 0.67, + "learning_rate": 1.6335374516888617e-05, + "loss": 2.74, + "step": 203300 + }, + { + "epoch": 0.67, + "learning_rate": 1.6318815429095644e-05, + "loss": 2.7262, + "step": 203400 + }, + { + "epoch": 0.67, + "learning_rate": 1.630225634130267e-05, + "loss": 2.7397, + "step": 203500 + }, + { + "epoch": 0.67, + "learning_rate": 1.6285697253509698e-05, + "loss": 2.7411, + "step": 203600 + }, + { + "epoch": 0.67, + "learning_rate": 1.6269138165716728e-05, + "loss": 2.7475, + "step": 203700 + }, + { + "epoch": 0.67, + "learning_rate": 1.625257907792376e-05, + "loss": 2.7394, + "step": 203800 + }, + { + "epoch": 0.68, + "learning_rate": 1.6236019990130785e-05, + "loss": 2.727, + "step": 203900 + }, + { + "epoch": 0.68, + "learning_rate": 1.6219460902337812e-05, + "loss": 2.7518, + "step": 204000 + }, + { + "epoch": 0.68, + "learning_rate": 1.6202901814544842e-05, + "loss": 2.7461, + "step": 204100 + }, + { + "epoch": 0.68, + "learning_rate": 1.618634272675187e-05, + "loss": 2.734, + "step": 204200 + }, + { + "epoch": 0.68, + "learning_rate": 1.61697836389589e-05, + "loss": 2.7206, + "step": 204300 + }, + { + "epoch": 0.68, + "learning_rate": 1.6153224551165927e-05, + "loss": 2.762, + "step": 204400 + }, + { + "epoch": 0.68, + "learning_rate": 1.6136665463372953e-05, + "loss": 2.7351, + "step": 204500 + }, + { + "epoch": 0.68, + "learning_rate": 1.6120106375579984e-05, + "loss": 2.7437, + "step": 204600 + }, + { + "epoch": 0.68, + "learning_rate": 1.610354728778701e-05, + "loss": 2.7519, + "step": 204700 + }, + { + "epoch": 0.68, + "learning_rate": 1.608698819999404e-05, + "loss": 2.7358, + "step": 204800 + }, + { + "epoch": 0.68, + "learning_rate": 1.6070429112201068e-05, + "loss": 2.7435, + "step": 204900 + }, + { + "epoch": 0.68, + "learning_rate": 1.6053870024408095e-05, + "loss": 2.7644, + "step": 205000 + }, + { + "epoch": 0.68, + "learning_rate": 1.6037310936615125e-05, + "loss": 2.7336, + "step": 205100 + }, + { + "epoch": 0.68, + "learning_rate": 1.6020751848822152e-05, + "loss": 2.7355, + "step": 205200 + }, + { + "epoch": 0.68, + "learning_rate": 1.6004192761029182e-05, + "loss": 2.7354, + "step": 205300 + }, + { + "epoch": 0.68, + "learning_rate": 1.598763367323621e-05, + "loss": 2.7431, + "step": 205400 + }, + { + "epoch": 0.68, + "learning_rate": 1.597107458544324e-05, + "loss": 2.7423, + "step": 205500 + }, + { + "epoch": 0.68, + "learning_rate": 1.5954515497650266e-05, + "loss": 2.7402, + "step": 205600 + }, + { + "epoch": 0.68, + "learning_rate": 1.5937956409857293e-05, + "loss": 2.7426, + "step": 205700 + }, + { + "epoch": 0.68, + "learning_rate": 1.5921397322064324e-05, + "loss": 2.7303, + "step": 205800 + }, + { + "epoch": 0.68, + "learning_rate": 1.590483823427135e-05, + "loss": 2.7449, + "step": 205900 + }, + { + "epoch": 0.68, + "learning_rate": 1.588827914647838e-05, + "loss": 2.7423, + "step": 206000 + }, + { + "epoch": 0.68, + "learning_rate": 1.5871720058685408e-05, + "loss": 2.7294, + "step": 206100 + }, + { + "epoch": 0.68, + "learning_rate": 1.5855160970892435e-05, + "loss": 2.7421, + "step": 206200 + }, + { + "epoch": 0.68, + "learning_rate": 1.5838601883099465e-05, + "loss": 2.7409, + "step": 206300 + }, + { + "epoch": 0.68, + "learning_rate": 1.5822042795306495e-05, + "loss": 2.7481, + "step": 206400 + }, + { + "epoch": 0.68, + "learning_rate": 1.5805483707513522e-05, + "loss": 2.7521, + "step": 206500 + }, + { + "epoch": 0.68, + "learning_rate": 1.578892461972055e-05, + "loss": 2.7391, + "step": 206600 + }, + { + "epoch": 0.68, + "learning_rate": 1.5772365531927576e-05, + "loss": 2.7332, + "step": 206700 + }, + { + "epoch": 0.68, + "learning_rate": 1.5755806444134606e-05, + "loss": 2.7301, + "step": 206800 + }, + { + "epoch": 0.69, + "learning_rate": 1.5739247356341637e-05, + "loss": 2.7335, + "step": 206900 + }, + { + "epoch": 0.69, + "learning_rate": 1.5722688268548664e-05, + "loss": 2.7397, + "step": 207000 + }, + { + "epoch": 0.69, + "learning_rate": 1.570612918075569e-05, + "loss": 2.7328, + "step": 207100 + }, + { + "epoch": 0.69, + "learning_rate": 1.5689570092962717e-05, + "loss": 2.745, + "step": 207200 + }, + { + "epoch": 0.69, + "learning_rate": 1.5673011005169748e-05, + "loss": 2.7366, + "step": 207300 + }, + { + "epoch": 0.69, + "learning_rate": 1.5656451917376778e-05, + "loss": 2.7424, + "step": 207400 + }, + { + "epoch": 0.69, + "learning_rate": 1.5639892829583805e-05, + "loss": 2.7485, + "step": 207500 + }, + { + "epoch": 0.69, + "learning_rate": 1.5623333741790832e-05, + "loss": 2.7466, + "step": 207600 + }, + { + "epoch": 0.69, + "learning_rate": 1.5606774653997862e-05, + "loss": 2.7467, + "step": 207700 + }, + { + "epoch": 0.69, + "learning_rate": 1.5590215566204892e-05, + "loss": 2.7453, + "step": 207800 + }, + { + "epoch": 0.69, + "learning_rate": 1.557365647841192e-05, + "loss": 2.7395, + "step": 207900 + }, + { + "epoch": 0.69, + "learning_rate": 1.5557097390618946e-05, + "loss": 2.748, + "step": 208000 + }, + { + "epoch": 0.69, + "learning_rate": 1.5540538302825973e-05, + "loss": 2.7507, + "step": 208100 + }, + { + "epoch": 0.69, + "learning_rate": 1.5523979215033003e-05, + "loss": 2.7468, + "step": 208200 + }, + { + "epoch": 0.69, + "learning_rate": 1.5507420127240034e-05, + "loss": 2.7397, + "step": 208300 + }, + { + "epoch": 0.69, + "learning_rate": 1.549086103944706e-05, + "loss": 2.7297, + "step": 208400 + }, + { + "epoch": 0.69, + "learning_rate": 1.5474301951654088e-05, + "loss": 2.7447, + "step": 208500 + }, + { + "epoch": 0.69, + "learning_rate": 1.5457742863861114e-05, + "loss": 2.7362, + "step": 208600 + }, + { + "epoch": 0.69, + "learning_rate": 1.5441183776068145e-05, + "loss": 2.7343, + "step": 208700 + }, + { + "epoch": 0.69, + "learning_rate": 1.5424624688275175e-05, + "loss": 2.7268, + "step": 208800 + }, + { + "epoch": 0.69, + "learning_rate": 1.5408065600482202e-05, + "loss": 2.7216, + "step": 208900 + }, + { + "epoch": 0.69, + "learning_rate": 1.539150651268923e-05, + "loss": 2.7378, + "step": 209000 + }, + { + "epoch": 0.69, + "learning_rate": 1.537494742489626e-05, + "loss": 2.7276, + "step": 209100 + }, + { + "epoch": 0.69, + "learning_rate": 1.5358388337103286e-05, + "loss": 2.7316, + "step": 209200 + }, + { + "epoch": 0.69, + "learning_rate": 1.5341829249310316e-05, + "loss": 2.7451, + "step": 209300 + }, + { + "epoch": 0.69, + "learning_rate": 1.5325270161517343e-05, + "loss": 2.7364, + "step": 209400 + }, + { + "epoch": 0.69, + "learning_rate": 1.530871107372437e-05, + "loss": 2.7355, + "step": 209500 + }, + { + "epoch": 0.69, + "learning_rate": 1.52921519859314e-05, + "loss": 2.7339, + "step": 209600 + }, + { + "epoch": 0.69, + "learning_rate": 1.5275592898138427e-05, + "loss": 2.7487, + "step": 209700 + }, + { + "epoch": 0.69, + "learning_rate": 1.5259033810345458e-05, + "loss": 2.7462, + "step": 209800 + }, + { + "epoch": 0.7, + "learning_rate": 1.5242474722552486e-05, + "loss": 2.7466, + "step": 209900 + }, + { + "epoch": 0.7, + "learning_rate": 1.5225915634759513e-05, + "loss": 2.7356, + "step": 210000 + }, + { + "epoch": 0.7, + "learning_rate": 1.5209356546966542e-05, + "loss": 2.7226, + "step": 210100 + }, + { + "epoch": 0.7, + "learning_rate": 1.5192797459173569e-05, + "loss": 2.7362, + "step": 210200 + }, + { + "epoch": 0.7, + "learning_rate": 1.5176238371380599e-05, + "loss": 2.7423, + "step": 210300 + }, + { + "epoch": 0.7, + "learning_rate": 1.5159679283587628e-05, + "loss": 2.7491, + "step": 210400 + }, + { + "epoch": 0.7, + "learning_rate": 1.5143120195794655e-05, + "loss": 2.7306, + "step": 210500 + }, + { + "epoch": 0.7, + "learning_rate": 1.5126561108001683e-05, + "loss": 2.7319, + "step": 210600 + }, + { + "epoch": 0.7, + "learning_rate": 1.511000202020871e-05, + "loss": 2.7384, + "step": 210700 + }, + { + "epoch": 0.7, + "learning_rate": 1.509344293241574e-05, + "loss": 2.7373, + "step": 210800 + }, + { + "epoch": 0.7, + "learning_rate": 1.5076883844622769e-05, + "loss": 2.7249, + "step": 210900 + }, + { + "epoch": 0.7, + "learning_rate": 1.5060324756829796e-05, + "loss": 2.743, + "step": 211000 + }, + { + "epoch": 0.7, + "learning_rate": 1.5043765669036824e-05, + "loss": 2.7335, + "step": 211100 + }, + { + "epoch": 0.7, + "learning_rate": 1.5027206581243853e-05, + "loss": 2.7283, + "step": 211200 + }, + { + "epoch": 0.7, + "learning_rate": 1.5010647493450883e-05, + "loss": 2.734, + "step": 211300 + }, + { + "epoch": 0.7, + "learning_rate": 1.499408840565791e-05, + "loss": 2.731, + "step": 211400 + }, + { + "epoch": 0.7, + "learning_rate": 1.4977529317864939e-05, + "loss": 2.7261, + "step": 211500 + }, + { + "epoch": 0.7, + "learning_rate": 1.4960970230071966e-05, + "loss": 2.733, + "step": 211600 + }, + { + "epoch": 0.7, + "learning_rate": 1.4944411142278994e-05, + "loss": 2.7482, + "step": 211700 + }, + { + "epoch": 0.7, + "learning_rate": 1.4927852054486025e-05, + "loss": 2.7321, + "step": 211800 + }, + { + "epoch": 0.7, + "learning_rate": 1.4911292966693052e-05, + "loss": 2.7307, + "step": 211900 + }, + { + "epoch": 0.7, + "learning_rate": 1.489473387890008e-05, + "loss": 2.7265, + "step": 212000 + }, + { + "epoch": 0.7, + "learning_rate": 1.4878174791107107e-05, + "loss": 2.7469, + "step": 212100 + }, + { + "epoch": 0.7, + "learning_rate": 1.4861615703314136e-05, + "loss": 2.7325, + "step": 212200 + }, + { + "epoch": 0.7, + "learning_rate": 1.4845056615521166e-05, + "loss": 2.7321, + "step": 212300 + }, + { + "epoch": 0.7, + "learning_rate": 1.4828497527728195e-05, + "loss": 2.7393, + "step": 212400 + }, + { + "epoch": 0.7, + "learning_rate": 1.4811938439935222e-05, + "loss": 2.7369, + "step": 212500 + }, + { + "epoch": 0.7, + "learning_rate": 1.479537935214225e-05, + "loss": 2.7519, + "step": 212600 + }, + { + "epoch": 0.7, + "learning_rate": 1.4778820264349277e-05, + "loss": 2.7335, + "step": 212700 + }, + { + "epoch": 0.7, + "learning_rate": 1.4762261176556307e-05, + "loss": 2.7435, + "step": 212800 + }, + { + "epoch": 0.71, + "learning_rate": 1.4745702088763336e-05, + "loss": 2.7406, + "step": 212900 + }, + { + "epoch": 0.71, + "learning_rate": 1.4729143000970363e-05, + "loss": 2.7373, + "step": 213000 + }, + { + "epoch": 0.71, + "learning_rate": 1.4712583913177392e-05, + "loss": 2.7298, + "step": 213100 + }, + { + "epoch": 0.71, + "learning_rate": 1.4696024825384418e-05, + "loss": 2.7432, + "step": 213200 + }, + { + "epoch": 0.71, + "learning_rate": 1.467946573759145e-05, + "loss": 2.7399, + "step": 213300 + }, + { + "epoch": 0.71, + "learning_rate": 1.4662906649798477e-05, + "loss": 2.7334, + "step": 213400 + }, + { + "epoch": 0.71, + "learning_rate": 1.4646347562005506e-05, + "loss": 2.721, + "step": 213500 + }, + { + "epoch": 0.71, + "learning_rate": 1.4629788474212533e-05, + "loss": 2.7371, + "step": 213600 + }, + { + "epoch": 0.71, + "learning_rate": 1.4613229386419561e-05, + "loss": 2.7389, + "step": 213700 + }, + { + "epoch": 0.71, + "learning_rate": 1.4596670298626592e-05, + "loss": 2.7398, + "step": 213800 + }, + { + "epoch": 0.71, + "learning_rate": 1.4580111210833619e-05, + "loss": 2.7177, + "step": 213900 + }, + { + "epoch": 0.71, + "learning_rate": 1.4563552123040647e-05, + "loss": 2.7336, + "step": 214000 + }, + { + "epoch": 0.71, + "learning_rate": 1.4546993035247674e-05, + "loss": 2.7318, + "step": 214100 + }, + { + "epoch": 0.71, + "learning_rate": 1.4530433947454703e-05, + "loss": 2.7353, + "step": 214200 + }, + { + "epoch": 0.71, + "learning_rate": 1.4513874859661733e-05, + "loss": 2.736, + "step": 214300 + }, + { + "epoch": 0.71, + "learning_rate": 1.449731577186876e-05, + "loss": 2.7365, + "step": 214400 + }, + { + "epoch": 0.71, + "learning_rate": 1.4480756684075789e-05, + "loss": 2.7302, + "step": 214500 + }, + { + "epoch": 0.71, + "learning_rate": 1.4464197596282816e-05, + "loss": 2.7378, + "step": 214600 + }, + { + "epoch": 0.71, + "learning_rate": 1.4447638508489844e-05, + "loss": 2.7422, + "step": 214700 + }, + { + "epoch": 0.71, + "learning_rate": 1.4431079420696874e-05, + "loss": 2.7343, + "step": 214800 + }, + { + "epoch": 0.71, + "learning_rate": 1.4414520332903903e-05, + "loss": 2.743, + "step": 214900 + }, + { + "epoch": 0.71, + "learning_rate": 1.439796124511093e-05, + "loss": 2.7572, + "step": 215000 + }, + { + "epoch": 0.71, + "learning_rate": 1.4381402157317959e-05, + "loss": 2.7367, + "step": 215100 + }, + { + "epoch": 0.71, + "learning_rate": 1.4364843069524985e-05, + "loss": 2.7367, + "step": 215200 + }, + { + "epoch": 0.71, + "learning_rate": 1.4348283981732016e-05, + "loss": 2.7333, + "step": 215300 + }, + { + "epoch": 0.71, + "learning_rate": 1.4331724893939044e-05, + "loss": 2.7396, + "step": 215400 + }, + { + "epoch": 0.71, + "learning_rate": 1.4315165806146071e-05, + "loss": 2.737, + "step": 215500 + }, + { + "epoch": 0.71, + "learning_rate": 1.42986067183531e-05, + "loss": 2.7206, + "step": 215600 + }, + { + "epoch": 0.71, + "learning_rate": 1.4282047630560127e-05, + "loss": 2.7255, + "step": 215700 + }, + { + "epoch": 0.71, + "learning_rate": 1.4265488542767159e-05, + "loss": 2.7462, + "step": 215800 + }, + { + "epoch": 0.72, + "learning_rate": 1.4248929454974186e-05, + "loss": 2.7506, + "step": 215900 + }, + { + "epoch": 0.72, + "learning_rate": 1.4232370367181214e-05, + "loss": 2.7401, + "step": 216000 + }, + { + "epoch": 0.72, + "learning_rate": 1.4215811279388241e-05, + "loss": 2.7385, + "step": 216100 + }, + { + "epoch": 0.72, + "learning_rate": 1.419925219159527e-05, + "loss": 2.7381, + "step": 216200 + }, + { + "epoch": 0.72, + "learning_rate": 1.41826931038023e-05, + "loss": 2.7312, + "step": 216300 + }, + { + "epoch": 0.72, + "learning_rate": 1.4166134016009327e-05, + "loss": 2.7415, + "step": 216400 + }, + { + "epoch": 0.72, + "learning_rate": 1.4149574928216356e-05, + "loss": 2.7345, + "step": 216500 + }, + { + "epoch": 0.72, + "learning_rate": 1.4133015840423383e-05, + "loss": 2.748, + "step": 216600 + }, + { + "epoch": 0.72, + "learning_rate": 1.4116456752630411e-05, + "loss": 2.748, + "step": 216700 + }, + { + "epoch": 0.72, + "learning_rate": 1.4099897664837441e-05, + "loss": 2.7453, + "step": 216800 + }, + { + "epoch": 0.72, + "learning_rate": 1.408333857704447e-05, + "loss": 2.7378, + "step": 216900 + }, + { + "epoch": 0.72, + "learning_rate": 1.4066779489251497e-05, + "loss": 2.7328, + "step": 217000 + }, + { + "epoch": 0.72, + "learning_rate": 1.4050220401458526e-05, + "loss": 2.7334, + "step": 217100 + }, + { + "epoch": 0.72, + "learning_rate": 1.4033661313665552e-05, + "loss": 2.7476, + "step": 217200 + }, + { + "epoch": 0.72, + "learning_rate": 1.4017102225872583e-05, + "loss": 2.7333, + "step": 217300 + }, + { + "epoch": 0.72, + "learning_rate": 1.4000543138079611e-05, + "loss": 2.7417, + "step": 217400 + }, + { + "epoch": 0.72, + "learning_rate": 1.3983984050286638e-05, + "loss": 2.741, + "step": 217500 + }, + { + "epoch": 0.72, + "learning_rate": 1.3967424962493667e-05, + "loss": 2.7236, + "step": 217600 + }, + { + "epoch": 0.72, + "learning_rate": 1.3950865874700694e-05, + "loss": 2.7387, + "step": 217700 + }, + { + "epoch": 0.72, + "learning_rate": 1.3934306786907724e-05, + "loss": 2.7393, + "step": 217800 + }, + { + "epoch": 0.72, + "learning_rate": 1.3917747699114753e-05, + "loss": 2.7202, + "step": 217900 + }, + { + "epoch": 0.72, + "learning_rate": 1.390118861132178e-05, + "loss": 2.744, + "step": 218000 + }, + { + "epoch": 0.72, + "learning_rate": 1.3884629523528808e-05, + "loss": 2.7358, + "step": 218100 + }, + { + "epoch": 0.72, + "learning_rate": 1.3868070435735835e-05, + "loss": 2.7504, + "step": 218200 + }, + { + "epoch": 0.72, + "learning_rate": 1.3851511347942867e-05, + "loss": 2.7312, + "step": 218300 + }, + { + "epoch": 0.72, + "learning_rate": 1.3834952260149894e-05, + "loss": 2.7199, + "step": 218400 + }, + { + "epoch": 0.72, + "learning_rate": 1.3818393172356923e-05, + "loss": 2.7406, + "step": 218500 + }, + { + "epoch": 0.72, + "learning_rate": 1.380183408456395e-05, + "loss": 2.7419, + "step": 218600 + }, + { + "epoch": 0.72, + "learning_rate": 1.3785274996770978e-05, + "loss": 2.742, + "step": 218700 + }, + { + "epoch": 0.72, + "learning_rate": 1.3768715908978008e-05, + "loss": 2.7436, + "step": 218800 + }, + { + "epoch": 0.72, + "learning_rate": 1.3752156821185035e-05, + "loss": 2.7303, + "step": 218900 + }, + { + "epoch": 0.73, + "learning_rate": 1.3735597733392064e-05, + "loss": 2.7314, + "step": 219000 + }, + { + "epoch": 0.73, + "learning_rate": 1.3719038645599091e-05, + "loss": 2.7448, + "step": 219100 + }, + { + "epoch": 0.73, + "learning_rate": 1.370247955780612e-05, + "loss": 2.7358, + "step": 219200 + }, + { + "epoch": 0.73, + "learning_rate": 1.368592047001315e-05, + "loss": 2.7307, + "step": 219300 + }, + { + "epoch": 0.73, + "learning_rate": 1.3669361382220178e-05, + "loss": 2.7211, + "step": 219400 + }, + { + "epoch": 0.73, + "learning_rate": 1.3652802294427205e-05, + "loss": 2.7314, + "step": 219500 + }, + { + "epoch": 0.73, + "learning_rate": 1.3636243206634234e-05, + "loss": 2.7512, + "step": 219600 + }, + { + "epoch": 0.73, + "learning_rate": 1.361968411884126e-05, + "loss": 2.7242, + "step": 219700 + }, + { + "epoch": 0.73, + "learning_rate": 1.3603125031048291e-05, + "loss": 2.7493, + "step": 219800 + }, + { + "epoch": 0.73, + "learning_rate": 1.358656594325532e-05, + "loss": 2.7336, + "step": 219900 + }, + { + "epoch": 0.73, + "learning_rate": 1.3570006855462347e-05, + "loss": 2.7397, + "step": 220000 + }, + { + "epoch": 0.73, + "learning_rate": 1.3553447767669375e-05, + "loss": 2.7303, + "step": 220100 + }, + { + "epoch": 0.73, + "learning_rate": 1.3536888679876402e-05, + "loss": 2.7419, + "step": 220200 + }, + { + "epoch": 0.73, + "learning_rate": 1.3520329592083432e-05, + "loss": 2.7435, + "step": 220300 + }, + { + "epoch": 0.73, + "learning_rate": 1.3503770504290461e-05, + "loss": 2.7441, + "step": 220400 + }, + { + "epoch": 0.73, + "learning_rate": 1.348721141649749e-05, + "loss": 2.7431, + "step": 220500 + }, + { + "epoch": 0.73, + "learning_rate": 1.3470652328704517e-05, + "loss": 2.7252, + "step": 220600 + }, + { + "epoch": 0.73, + "learning_rate": 1.3454093240911545e-05, + "loss": 2.7465, + "step": 220700 + }, + { + "epoch": 0.73, + "learning_rate": 1.3437534153118576e-05, + "loss": 2.747, + "step": 220800 + }, + { + "epoch": 0.73, + "learning_rate": 1.3420975065325602e-05, + "loss": 2.7359, + "step": 220900 + }, + { + "epoch": 0.73, + "learning_rate": 1.3404415977532631e-05, + "loss": 2.7248, + "step": 221000 + }, + { + "epoch": 0.73, + "learning_rate": 1.3387856889739658e-05, + "loss": 2.7419, + "step": 221100 + }, + { + "epoch": 0.73, + "learning_rate": 1.3371297801946687e-05, + "loss": 2.7386, + "step": 221200 + }, + { + "epoch": 0.73, + "learning_rate": 1.3354738714153717e-05, + "loss": 2.7346, + "step": 221300 + }, + { + "epoch": 0.73, + "learning_rate": 1.3338179626360744e-05, + "loss": 2.7413, + "step": 221400 + }, + { + "epoch": 0.73, + "learning_rate": 1.3321620538567772e-05, + "loss": 2.7421, + "step": 221500 + }, + { + "epoch": 0.73, + "learning_rate": 1.33050614507748e-05, + "loss": 2.7441, + "step": 221600 + }, + { + "epoch": 0.73, + "learning_rate": 1.3288502362981828e-05, + "loss": 2.7452, + "step": 221700 + }, + { + "epoch": 0.73, + "learning_rate": 1.3271943275188858e-05, + "loss": 2.7443, + "step": 221800 + }, + { + "epoch": 0.73, + "learning_rate": 1.3255384187395887e-05, + "loss": 2.7384, + "step": 221900 + }, + { + "epoch": 0.74, + "learning_rate": 1.3238825099602914e-05, + "loss": 2.741, + "step": 222000 + }, + { + "epoch": 0.74, + "learning_rate": 1.3222266011809942e-05, + "loss": 2.7404, + "step": 222100 + }, + { + "epoch": 0.74, + "learning_rate": 1.320570692401697e-05, + "loss": 2.7281, + "step": 222200 + }, + { + "epoch": 0.74, + "learning_rate": 1.3189147836224e-05, + "loss": 2.7304, + "step": 222300 + }, + { + "epoch": 0.74, + "learning_rate": 1.3172588748431028e-05, + "loss": 2.7464, + "step": 222400 + }, + { + "epoch": 0.74, + "learning_rate": 1.3156029660638055e-05, + "loss": 2.7167, + "step": 222500 + }, + { + "epoch": 0.74, + "learning_rate": 1.3139470572845084e-05, + "loss": 2.7363, + "step": 222600 + }, + { + "epoch": 0.74, + "learning_rate": 1.312291148505211e-05, + "loss": 2.7286, + "step": 222700 + }, + { + "epoch": 0.74, + "learning_rate": 1.3106352397259143e-05, + "loss": 2.7519, + "step": 222800 + }, + { + "epoch": 0.74, + "learning_rate": 1.308979330946617e-05, + "loss": 2.741, + "step": 222900 + }, + { + "epoch": 0.74, + "learning_rate": 1.3073234221673198e-05, + "loss": 2.7186, + "step": 223000 + }, + { + "epoch": 0.74, + "learning_rate": 1.3056675133880225e-05, + "loss": 2.7306, + "step": 223100 + }, + { + "epoch": 0.74, + "learning_rate": 1.3040116046087254e-05, + "loss": 2.732, + "step": 223200 + }, + { + "epoch": 0.74, + "learning_rate": 1.3023556958294284e-05, + "loss": 2.7292, + "step": 223300 + }, + { + "epoch": 0.74, + "learning_rate": 1.300699787050131e-05, + "loss": 2.7223, + "step": 223400 + }, + { + "epoch": 0.74, + "learning_rate": 1.299043878270834e-05, + "loss": 2.7343, + "step": 223500 + }, + { + "epoch": 0.74, + "learning_rate": 1.2973879694915366e-05, + "loss": 2.7359, + "step": 223600 + }, + { + "epoch": 0.74, + "learning_rate": 1.2957320607122395e-05, + "loss": 2.7215, + "step": 223700 + }, + { + "epoch": 0.74, + "learning_rate": 1.2940761519329425e-05, + "loss": 2.7199, + "step": 223800 + }, + { + "epoch": 0.74, + "learning_rate": 1.2924202431536452e-05, + "loss": 2.7357, + "step": 223900 + }, + { + "epoch": 0.74, + "learning_rate": 1.290764334374348e-05, + "loss": 2.7388, + "step": 224000 + }, + { + "epoch": 0.74, + "learning_rate": 1.289108425595051e-05, + "loss": 2.7297, + "step": 224100 + }, + { + "epoch": 0.74, + "learning_rate": 1.2874525168157536e-05, + "loss": 2.7261, + "step": 224200 + }, + { + "epoch": 0.74, + "learning_rate": 1.2857966080364567e-05, + "loss": 2.7593, + "step": 224300 + }, + { + "epoch": 0.74, + "learning_rate": 1.2841406992571595e-05, + "loss": 2.7388, + "step": 224400 + }, + { + "epoch": 0.74, + "learning_rate": 1.2824847904778622e-05, + "loss": 2.7441, + "step": 224500 + }, + { + "epoch": 0.74, + "learning_rate": 1.280828881698565e-05, + "loss": 2.7375, + "step": 224600 + }, + { + "epoch": 0.74, + "learning_rate": 1.2791729729192678e-05, + "loss": 2.7274, + "step": 224700 + }, + { + "epoch": 0.74, + "learning_rate": 1.2775170641399708e-05, + "loss": 2.7279, + "step": 224800 + }, + { + "epoch": 0.74, + "learning_rate": 1.2758611553606736e-05, + "loss": 2.7245, + "step": 224900 + }, + { + "epoch": 0.75, + "learning_rate": 1.2742052465813763e-05, + "loss": 2.7332, + "step": 225000 + }, + { + "epoch": 0.75, + "learning_rate": 1.2725493378020792e-05, + "loss": 2.7349, + "step": 225100 + }, + { + "epoch": 0.75, + "learning_rate": 1.2708934290227819e-05, + "loss": 2.7183, + "step": 225200 + }, + { + "epoch": 0.75, + "learning_rate": 1.2692375202434851e-05, + "loss": 2.744, + "step": 225300 + }, + { + "epoch": 0.75, + "learning_rate": 1.2675816114641878e-05, + "loss": 2.755, + "step": 225400 + }, + { + "epoch": 0.75, + "learning_rate": 1.2659257026848906e-05, + "loss": 2.7396, + "step": 225500 + }, + { + "epoch": 0.75, + "learning_rate": 1.2642697939055933e-05, + "loss": 2.7201, + "step": 225600 + }, + { + "epoch": 0.75, + "learning_rate": 1.2626138851262962e-05, + "loss": 2.7371, + "step": 225700 + }, + { + "epoch": 0.75, + "learning_rate": 1.2609579763469992e-05, + "loss": 2.7388, + "step": 225800 + }, + { + "epoch": 0.75, + "learning_rate": 1.2593020675677019e-05, + "loss": 2.7232, + "step": 225900 + }, + { + "epoch": 0.75, + "learning_rate": 1.2576461587884048e-05, + "loss": 2.7321, + "step": 226000 + }, + { + "epoch": 0.75, + "learning_rate": 1.2559902500091075e-05, + "loss": 2.7349, + "step": 226100 + }, + { + "epoch": 0.75, + "learning_rate": 1.2543343412298103e-05, + "loss": 2.7497, + "step": 226200 + }, + { + "epoch": 0.75, + "learning_rate": 1.2526784324505134e-05, + "loss": 2.7385, + "step": 226300 + }, + { + "epoch": 0.75, + "learning_rate": 1.2510225236712162e-05, + "loss": 2.7397, + "step": 226400 + }, + { + "epoch": 0.75, + "learning_rate": 1.2493666148919189e-05, + "loss": 2.7345, + "step": 226500 + }, + { + "epoch": 0.75, + "learning_rate": 1.2477107061126218e-05, + "loss": 2.734, + "step": 226600 + }, + { + "epoch": 0.75, + "learning_rate": 1.2460547973333246e-05, + "loss": 2.731, + "step": 226700 + }, + { + "epoch": 0.75, + "learning_rate": 1.2443988885540273e-05, + "loss": 2.7298, + "step": 226800 + }, + { + "epoch": 0.75, + "learning_rate": 1.2427429797747302e-05, + "loss": 2.7303, + "step": 226900 + }, + { + "epoch": 0.75, + "learning_rate": 1.241087070995433e-05, + "loss": 2.7343, + "step": 227000 + }, + { + "epoch": 0.75, + "learning_rate": 1.2394311622161359e-05, + "loss": 2.7504, + "step": 227100 + }, + { + "epoch": 0.75, + "learning_rate": 1.2377752534368388e-05, + "loss": 2.7192, + "step": 227200 + }, + { + "epoch": 0.75, + "learning_rate": 1.2361193446575416e-05, + "loss": 2.7333, + "step": 227300 + }, + { + "epoch": 0.75, + "learning_rate": 1.2344634358782443e-05, + "loss": 2.7464, + "step": 227400 + }, + { + "epoch": 0.75, + "learning_rate": 1.2328075270989472e-05, + "loss": 2.7402, + "step": 227500 + }, + { + "epoch": 0.75, + "learning_rate": 1.23115161831965e-05, + "loss": 2.7246, + "step": 227600 + }, + { + "epoch": 0.75, + "learning_rate": 1.2294957095403529e-05, + "loss": 2.7297, + "step": 227700 + }, + { + "epoch": 0.75, + "learning_rate": 1.2278398007610558e-05, + "loss": 2.738, + "step": 227800 + }, + { + "epoch": 0.75, + "learning_rate": 1.2261838919817584e-05, + "loss": 2.7303, + "step": 227900 + }, + { + "epoch": 0.76, + "learning_rate": 1.2245279832024615e-05, + "loss": 2.7445, + "step": 228000 + }, + { + "epoch": 0.76, + "learning_rate": 1.2228720744231642e-05, + "loss": 2.7405, + "step": 228100 + }, + { + "epoch": 0.76, + "learning_rate": 1.2212161656438672e-05, + "loss": 2.7304, + "step": 228200 + }, + { + "epoch": 0.76, + "learning_rate": 1.2195602568645699e-05, + "loss": 2.723, + "step": 228300 + }, + { + "epoch": 0.76, + "learning_rate": 1.2179043480852728e-05, + "loss": 2.7344, + "step": 228400 + }, + { + "epoch": 0.76, + "learning_rate": 1.2162484393059756e-05, + "loss": 2.7267, + "step": 228500 + }, + { + "epoch": 0.76, + "learning_rate": 1.2145925305266783e-05, + "loss": 2.7427, + "step": 228600 + }, + { + "epoch": 0.76, + "learning_rate": 1.2129366217473813e-05, + "loss": 2.7328, + "step": 228700 + }, + { + "epoch": 0.76, + "learning_rate": 1.211280712968084e-05, + "loss": 2.7359, + "step": 228800 + }, + { + "epoch": 0.76, + "learning_rate": 1.2096248041887869e-05, + "loss": 2.7302, + "step": 228900 + }, + { + "epoch": 0.76, + "learning_rate": 1.2079688954094897e-05, + "loss": 2.742, + "step": 229000 + }, + { + "epoch": 0.76, + "learning_rate": 1.2063129866301926e-05, + "loss": 2.7364, + "step": 229100 + }, + { + "epoch": 0.76, + "learning_rate": 1.2046570778508955e-05, + "loss": 2.732, + "step": 229200 + }, + { + "epoch": 0.76, + "learning_rate": 1.2030011690715982e-05, + "loss": 2.7296, + "step": 229300 + }, + { + "epoch": 0.76, + "learning_rate": 1.201345260292301e-05, + "loss": 2.728, + "step": 229400 + }, + { + "epoch": 0.76, + "learning_rate": 1.1996893515130039e-05, + "loss": 2.7314, + "step": 229500 + }, + { + "epoch": 0.76, + "learning_rate": 1.1980334427337067e-05, + "loss": 2.7189, + "step": 229600 + }, + { + "epoch": 0.76, + "learning_rate": 1.1963775339544096e-05, + "loss": 2.7374, + "step": 229700 + }, + { + "epoch": 0.76, + "learning_rate": 1.1947216251751125e-05, + "loss": 2.7278, + "step": 229800 + }, + { + "epoch": 0.76, + "learning_rate": 1.1930657163958152e-05, + "loss": 2.7257, + "step": 229900 + }, + { + "epoch": 0.76, + "learning_rate": 1.1914098076165182e-05, + "loss": 2.731, + "step": 230000 + }, + { + "epoch": 0.76, + "learning_rate": 1.1897538988372209e-05, + "loss": 2.7389, + "step": 230100 + }, + { + "epoch": 0.76, + "learning_rate": 1.1880979900579237e-05, + "loss": 2.7379, + "step": 230200 + }, + { + "epoch": 0.76, + "learning_rate": 1.1864420812786266e-05, + "loss": 2.7315, + "step": 230300 + }, + { + "epoch": 0.76, + "learning_rate": 1.1847861724993293e-05, + "loss": 2.7324, + "step": 230400 + }, + { + "epoch": 0.76, + "learning_rate": 1.1831302637200323e-05, + "loss": 2.7426, + "step": 230500 + }, + { + "epoch": 0.76, + "learning_rate": 1.181474354940735e-05, + "loss": 2.7392, + "step": 230600 + }, + { + "epoch": 0.76, + "learning_rate": 1.179818446161438e-05, + "loss": 2.7276, + "step": 230700 + }, + { + "epoch": 0.76, + "learning_rate": 1.1781625373821407e-05, + "loss": 2.7275, + "step": 230800 + }, + { + "epoch": 0.76, + "learning_rate": 1.1765066286028436e-05, + "loss": 2.7456, + "step": 230900 + }, + { + "epoch": 0.77, + "learning_rate": 1.1748507198235464e-05, + "loss": 2.7287, + "step": 231000 + }, + { + "epoch": 0.77, + "learning_rate": 1.1731948110442491e-05, + "loss": 2.7375, + "step": 231100 + }, + { + "epoch": 0.77, + "learning_rate": 1.1715389022649522e-05, + "loss": 2.7393, + "step": 231200 + }, + { + "epoch": 0.77, + "learning_rate": 1.1698829934856549e-05, + "loss": 2.7384, + "step": 231300 + }, + { + "epoch": 0.77, + "learning_rate": 1.1682270847063577e-05, + "loss": 2.7277, + "step": 231400 + }, + { + "epoch": 0.77, + "learning_rate": 1.1665711759270606e-05, + "loss": 2.7267, + "step": 231500 + }, + { + "epoch": 0.77, + "learning_rate": 1.1649152671477634e-05, + "loss": 2.7496, + "step": 231600 + }, + { + "epoch": 0.77, + "learning_rate": 1.1632593583684663e-05, + "loss": 2.7337, + "step": 231700 + }, + { + "epoch": 0.77, + "learning_rate": 1.1616034495891692e-05, + "loss": 2.7402, + "step": 231800 + }, + { + "epoch": 0.77, + "learning_rate": 1.1599475408098719e-05, + "loss": 2.7322, + "step": 231900 + }, + { + "epoch": 0.77, + "learning_rate": 1.1582916320305747e-05, + "loss": 2.7449, + "step": 232000 + }, + { + "epoch": 0.77, + "learning_rate": 1.1566357232512776e-05, + "loss": 2.717, + "step": 232100 + }, + { + "epoch": 0.77, + "learning_rate": 1.1549798144719804e-05, + "loss": 2.721, + "step": 232200 + }, + { + "epoch": 0.77, + "learning_rate": 1.1533239056926833e-05, + "loss": 2.7512, + "step": 232300 + }, + { + "epoch": 0.77, + "learning_rate": 1.151667996913386e-05, + "loss": 2.7197, + "step": 232400 + }, + { + "epoch": 0.77, + "learning_rate": 1.150012088134089e-05, + "loss": 2.7312, + "step": 232500 + }, + { + "epoch": 0.77, + "learning_rate": 1.1483561793547917e-05, + "loss": 2.7292, + "step": 232600 + }, + { + "epoch": 0.77, + "learning_rate": 1.1467002705754946e-05, + "loss": 2.7392, + "step": 232700 + }, + { + "epoch": 0.77, + "learning_rate": 1.1450443617961974e-05, + "loss": 2.7247, + "step": 232800 + }, + { + "epoch": 0.77, + "learning_rate": 1.1433884530169001e-05, + "loss": 2.7287, + "step": 232900 + }, + { + "epoch": 0.77, + "learning_rate": 1.1417325442376031e-05, + "loss": 2.7429, + "step": 233000 + }, + { + "epoch": 0.77, + "learning_rate": 1.1400766354583058e-05, + "loss": 2.7295, + "step": 233100 + }, + { + "epoch": 0.77, + "learning_rate": 1.1384207266790089e-05, + "loss": 2.7364, + "step": 233200 + }, + { + "epoch": 0.77, + "learning_rate": 1.1367648178997116e-05, + "loss": 2.7233, + "step": 233300 + }, + { + "epoch": 0.77, + "learning_rate": 1.1351089091204144e-05, + "loss": 2.7265, + "step": 233400 + }, + { + "epoch": 0.77, + "learning_rate": 1.1334530003411173e-05, + "loss": 2.7441, + "step": 233500 + }, + { + "epoch": 0.77, + "learning_rate": 1.1317970915618201e-05, + "loss": 2.7349, + "step": 233600 + }, + { + "epoch": 0.77, + "learning_rate": 1.130141182782523e-05, + "loss": 2.7382, + "step": 233700 + }, + { + "epoch": 0.77, + "learning_rate": 1.1284852740032257e-05, + "loss": 2.7291, + "step": 233800 + }, + { + "epoch": 0.77, + "learning_rate": 1.1268293652239286e-05, + "loss": 2.7563, + "step": 233900 + }, + { + "epoch": 0.77, + "learning_rate": 1.1251734564446314e-05, + "loss": 2.7223, + "step": 234000 + }, + { + "epoch": 0.78, + "learning_rate": 1.1235175476653343e-05, + "loss": 2.7279, + "step": 234100 + }, + { + "epoch": 0.78, + "learning_rate": 1.1218616388860371e-05, + "loss": 2.744, + "step": 234200 + }, + { + "epoch": 0.78, + "learning_rate": 1.12020573010674e-05, + "loss": 2.727, + "step": 234300 + }, + { + "epoch": 0.78, + "learning_rate": 1.1185498213274427e-05, + "loss": 2.7376, + "step": 234400 + }, + { + "epoch": 0.78, + "learning_rate": 1.1168939125481455e-05, + "loss": 2.7504, + "step": 234500 + }, + { + "epoch": 0.78, + "learning_rate": 1.1152380037688484e-05, + "loss": 2.7306, + "step": 234600 + }, + { + "epoch": 0.78, + "learning_rate": 1.1135820949895513e-05, + "loss": 2.7442, + "step": 234700 + }, + { + "epoch": 0.78, + "learning_rate": 1.1119261862102541e-05, + "loss": 2.746, + "step": 234800 + }, + { + "epoch": 0.78, + "learning_rate": 1.1102702774309568e-05, + "loss": 2.7402, + "step": 234900 + }, + { + "epoch": 0.78, + "learning_rate": 1.1086143686516599e-05, + "loss": 2.7389, + "step": 235000 + }, + { + "epoch": 0.78, + "learning_rate": 1.1069584598723625e-05, + "loss": 2.7335, + "step": 235100 + }, + { + "epoch": 0.78, + "learning_rate": 1.1053025510930656e-05, + "loss": 2.7338, + "step": 235200 + }, + { + "epoch": 0.78, + "learning_rate": 1.1036466423137683e-05, + "loss": 2.7424, + "step": 235300 + }, + { + "epoch": 0.78, + "learning_rate": 1.1019907335344711e-05, + "loss": 2.74, + "step": 235400 + }, + { + "epoch": 0.78, + "learning_rate": 1.100334824755174e-05, + "loss": 2.7222, + "step": 235500 + }, + { + "epoch": 0.78, + "learning_rate": 1.0986789159758767e-05, + "loss": 2.741, + "step": 235600 + }, + { + "epoch": 0.78, + "learning_rate": 1.0970230071965797e-05, + "loss": 2.7255, + "step": 235700 + }, + { + "epoch": 0.78, + "learning_rate": 1.0953670984172824e-05, + "loss": 2.7184, + "step": 235800 + }, + { + "epoch": 0.78, + "learning_rate": 1.0937111896379853e-05, + "loss": 2.728, + "step": 235900 + }, + { + "epoch": 0.78, + "learning_rate": 1.0920552808586881e-05, + "loss": 2.7346, + "step": 236000 + }, + { + "epoch": 0.78, + "learning_rate": 1.090399372079391e-05, + "loss": 2.7221, + "step": 236100 + }, + { + "epoch": 0.78, + "learning_rate": 1.0887434633000938e-05, + "loss": 2.7363, + "step": 236200 + }, + { + "epoch": 0.78, + "learning_rate": 1.0870875545207965e-05, + "loss": 2.7295, + "step": 236300 + }, + { + "epoch": 0.78, + "learning_rate": 1.0854316457414994e-05, + "loss": 2.7392, + "step": 236400 + }, + { + "epoch": 0.78, + "learning_rate": 1.0837757369622023e-05, + "loss": 2.7505, + "step": 236500 + }, + { + "epoch": 0.78, + "learning_rate": 1.0821198281829051e-05, + "loss": 2.7302, + "step": 236600 + }, + { + "epoch": 0.78, + "learning_rate": 1.080463919403608e-05, + "loss": 2.7467, + "step": 236700 + }, + { + "epoch": 0.78, + "learning_rate": 1.0788080106243108e-05, + "loss": 2.749, + "step": 236800 + }, + { + "epoch": 0.78, + "learning_rate": 1.0771521018450135e-05, + "loss": 2.7397, + "step": 236900 + }, + { + "epoch": 0.78, + "learning_rate": 1.0754961930657166e-05, + "loss": 2.7338, + "step": 237000 + }, + { + "epoch": 0.79, + "learning_rate": 1.0738402842864192e-05, + "loss": 2.7321, + "step": 237100 + }, + { + "epoch": 0.79, + "learning_rate": 1.0721843755071221e-05, + "loss": 2.7236, + "step": 237200 + }, + { + "epoch": 0.79, + "learning_rate": 1.070528466727825e-05, + "loss": 2.7381, + "step": 237300 + }, + { + "epoch": 0.79, + "learning_rate": 1.0688725579485277e-05, + "loss": 2.7401, + "step": 237400 + }, + { + "epoch": 0.79, + "learning_rate": 1.0672166491692307e-05, + "loss": 2.7288, + "step": 237500 + }, + { + "epoch": 0.79, + "learning_rate": 1.0655607403899334e-05, + "loss": 2.7309, + "step": 237600 + }, + { + "epoch": 0.79, + "learning_rate": 1.0639048316106364e-05, + "loss": 2.7342, + "step": 237700 + }, + { + "epoch": 0.79, + "learning_rate": 1.0622489228313391e-05, + "loss": 2.7188, + "step": 237800 + }, + { + "epoch": 0.79, + "learning_rate": 1.060593014052042e-05, + "loss": 2.733, + "step": 237900 + }, + { + "epoch": 0.79, + "learning_rate": 1.0589371052727448e-05, + "loss": 2.7289, + "step": 238000 + }, + { + "epoch": 0.79, + "learning_rate": 1.0572811964934475e-05, + "loss": 2.7486, + "step": 238100 + }, + { + "epoch": 0.79, + "learning_rate": 1.0556252877141505e-05, + "loss": 2.7332, + "step": 238200 + }, + { + "epoch": 0.79, + "learning_rate": 1.0539693789348532e-05, + "loss": 2.7219, + "step": 238300 + }, + { + "epoch": 0.79, + "learning_rate": 1.0523134701555561e-05, + "loss": 2.7237, + "step": 238400 + }, + { + "epoch": 0.79, + "learning_rate": 1.050657561376259e-05, + "loss": 2.7207, + "step": 238500 + }, + { + "epoch": 0.79, + "learning_rate": 1.0490016525969618e-05, + "loss": 2.7311, + "step": 238600 + }, + { + "epoch": 0.79, + "learning_rate": 1.0473457438176647e-05, + "loss": 2.72, + "step": 238700 + }, + { + "epoch": 0.79, + "learning_rate": 1.0456898350383675e-05, + "loss": 2.7227, + "step": 238800 + }, + { + "epoch": 0.79, + "learning_rate": 1.0440339262590702e-05, + "loss": 2.7341, + "step": 238900 + }, + { + "epoch": 0.79, + "learning_rate": 1.0423780174797731e-05, + "loss": 2.7221, + "step": 239000 + }, + { + "epoch": 0.79, + "learning_rate": 1.040722108700476e-05, + "loss": 2.7184, + "step": 239100 + }, + { + "epoch": 0.79, + "learning_rate": 1.0390661999211788e-05, + "loss": 2.7403, + "step": 239200 + }, + { + "epoch": 0.79, + "learning_rate": 1.0374102911418817e-05, + "loss": 2.7283, + "step": 239300 + }, + { + "epoch": 0.79, + "learning_rate": 1.0357543823625844e-05, + "loss": 2.7418, + "step": 239400 + }, + { + "epoch": 0.79, + "learning_rate": 1.0340984735832874e-05, + "loss": 2.7326, + "step": 239500 + }, + { + "epoch": 0.79, + "learning_rate": 1.03244256480399e-05, + "loss": 2.724, + "step": 239600 + }, + { + "epoch": 0.79, + "learning_rate": 1.030786656024693e-05, + "loss": 2.7366, + "step": 239700 + }, + { + "epoch": 0.79, + "learning_rate": 1.0291307472453958e-05, + "loss": 2.7332, + "step": 239800 + }, + { + "epoch": 0.79, + "learning_rate": 1.0274748384660985e-05, + "loss": 2.7303, + "step": 239900 + }, + { + "epoch": 0.79, + "learning_rate": 1.0258189296868015e-05, + "loss": 2.7411, + "step": 240000 + }, + { + "epoch": 0.8, + "learning_rate": 1.0241630209075042e-05, + "loss": 2.7556, + "step": 240100 + }, + { + "epoch": 0.8, + "learning_rate": 1.0225071121282072e-05, + "loss": 2.7342, + "step": 240200 + }, + { + "epoch": 0.8, + "learning_rate": 1.02085120334891e-05, + "loss": 2.7435, + "step": 240300 + }, + { + "epoch": 0.8, + "learning_rate": 1.0191952945696128e-05, + "loss": 2.7391, + "step": 240400 + }, + { + "epoch": 0.8, + "learning_rate": 1.0175393857903157e-05, + "loss": 2.747, + "step": 240500 + }, + { + "epoch": 0.8, + "learning_rate": 1.0158834770110185e-05, + "loss": 2.7265, + "step": 240600 + }, + { + "epoch": 0.8, + "learning_rate": 1.0142275682317214e-05, + "loss": 2.7361, + "step": 240700 + }, + { + "epoch": 0.8, + "learning_rate": 1.012571659452424e-05, + "loss": 2.7358, + "step": 240800 + }, + { + "epoch": 0.8, + "learning_rate": 1.010915750673127e-05, + "loss": 2.7215, + "step": 240900 + }, + { + "epoch": 0.8, + "learning_rate": 1.0092598418938298e-05, + "loss": 2.7271, + "step": 241000 + }, + { + "epoch": 0.8, + "learning_rate": 1.0076039331145327e-05, + "loss": 2.7506, + "step": 241100 + }, + { + "epoch": 0.8, + "learning_rate": 1.0059480243352355e-05, + "loss": 2.7365, + "step": 241200 + }, + { + "epoch": 0.8, + "learning_rate": 1.0042921155559384e-05, + "loss": 2.7371, + "step": 241300 + }, + { + "epoch": 0.8, + "learning_rate": 1.002636206776641e-05, + "loss": 2.7289, + "step": 241400 + }, + { + "epoch": 0.8, + "learning_rate": 1.000980297997344e-05, + "loss": 2.7282, + "step": 241500 + }, + { + "epoch": 0.8, + "learning_rate": 9.993243892180468e-06, + "loss": 2.7335, + "step": 241600 + }, + { + "epoch": 0.8, + "learning_rate": 9.976684804387496e-06, + "loss": 2.7417, + "step": 241700 + }, + { + "epoch": 0.8, + "learning_rate": 9.960125716594525e-06, + "loss": 2.7407, + "step": 241800 + }, + { + "epoch": 0.8, + "learning_rate": 9.943566628801552e-06, + "loss": 2.7359, + "step": 241900 + }, + { + "epoch": 0.8, + "learning_rate": 9.927007541008582e-06, + "loss": 2.7315, + "step": 242000 + }, + { + "epoch": 0.8, + "learning_rate": 9.91044845321561e-06, + "loss": 2.7384, + "step": 242100 + }, + { + "epoch": 0.8, + "learning_rate": 9.893889365422638e-06, + "loss": 2.7202, + "step": 242200 + }, + { + "epoch": 0.8, + "learning_rate": 9.877330277629666e-06, + "loss": 2.7463, + "step": 242300 + }, + { + "epoch": 0.8, + "learning_rate": 9.860771189836695e-06, + "loss": 2.7396, + "step": 242400 + }, + { + "epoch": 0.8, + "learning_rate": 9.844212102043724e-06, + "loss": 2.7273, + "step": 242500 + }, + { + "epoch": 0.8, + "learning_rate": 9.82765301425075e-06, + "loss": 2.7276, + "step": 242600 + }, + { + "epoch": 0.8, + "learning_rate": 9.81109392645778e-06, + "loss": 2.7437, + "step": 242700 + }, + { + "epoch": 0.8, + "learning_rate": 9.794534838664808e-06, + "loss": 2.7189, + "step": 242800 + }, + { + "epoch": 0.8, + "learning_rate": 9.777975750871836e-06, + "loss": 2.7196, + "step": 242900 + }, + { + "epoch": 0.8, + "learning_rate": 9.761416663078865e-06, + "loss": 2.7228, + "step": 243000 + }, + { + "epoch": 0.81, + "learning_rate": 9.744857575285894e-06, + "loss": 2.7269, + "step": 243100 + }, + { + "epoch": 0.81, + "learning_rate": 9.728298487492922e-06, + "loss": 2.7313, + "step": 243200 + }, + { + "epoch": 0.81, + "learning_rate": 9.711739399699949e-06, + "loss": 2.7326, + "step": 243300 + }, + { + "epoch": 0.81, + "learning_rate": 9.695180311906978e-06, + "loss": 2.7212, + "step": 243400 + }, + { + "epoch": 0.81, + "learning_rate": 9.678621224114006e-06, + "loss": 2.7277, + "step": 243500 + }, + { + "epoch": 0.81, + "learning_rate": 9.662062136321035e-06, + "loss": 2.7364, + "step": 243600 + }, + { + "epoch": 0.81, + "learning_rate": 9.645503048528063e-06, + "loss": 2.7339, + "step": 243700 + }, + { + "epoch": 0.81, + "learning_rate": 9.628943960735092e-06, + "loss": 2.7141, + "step": 243800 + }, + { + "epoch": 0.81, + "learning_rate": 9.612384872942119e-06, + "loss": 2.7451, + "step": 243900 + }, + { + "epoch": 0.81, + "learning_rate": 9.595825785149148e-06, + "loss": 2.7415, + "step": 244000 + }, + { + "epoch": 0.81, + "learning_rate": 9.579266697356176e-06, + "loss": 2.7323, + "step": 244100 + }, + { + "epoch": 0.81, + "learning_rate": 9.562707609563205e-06, + "loss": 2.7279, + "step": 244200 + }, + { + "epoch": 0.81, + "learning_rate": 9.546148521770233e-06, + "loss": 2.7405, + "step": 244300 + }, + { + "epoch": 0.81, + "learning_rate": 9.52958943397726e-06, + "loss": 2.7326, + "step": 244400 + }, + { + "epoch": 0.81, + "learning_rate": 9.51303034618429e-06, + "loss": 2.7187, + "step": 244500 + }, + { + "epoch": 0.81, + "learning_rate": 9.496471258391318e-06, + "loss": 2.7227, + "step": 244600 + }, + { + "epoch": 0.81, + "learning_rate": 9.479912170598348e-06, + "loss": 2.715, + "step": 244700 + }, + { + "epoch": 0.81, + "learning_rate": 9.463353082805375e-06, + "loss": 2.7438, + "step": 244800 + }, + { + "epoch": 0.81, + "learning_rate": 9.446793995012403e-06, + "loss": 2.737, + "step": 244900 + }, + { + "epoch": 0.81, + "learning_rate": 9.430234907219432e-06, + "loss": 2.7309, + "step": 245000 + }, + { + "epoch": 0.81, + "learning_rate": 9.413675819426459e-06, + "loss": 2.7345, + "step": 245100 + }, + { + "epoch": 0.81, + "learning_rate": 9.39711673163349e-06, + "loss": 2.732, + "step": 245200 + }, + { + "epoch": 0.81, + "learning_rate": 9.380557643840516e-06, + "loss": 2.7364, + "step": 245300 + }, + { + "epoch": 0.81, + "learning_rate": 9.363998556047545e-06, + "loss": 2.7486, + "step": 245400 + }, + { + "epoch": 0.81, + "learning_rate": 9.347439468254573e-06, + "loss": 2.715, + "step": 245500 + }, + { + "epoch": 0.81, + "learning_rate": 9.330880380461602e-06, + "loss": 2.7204, + "step": 245600 + }, + { + "epoch": 0.81, + "learning_rate": 9.31432129266863e-06, + "loss": 2.7261, + "step": 245700 + }, + { + "epoch": 0.81, + "learning_rate": 9.297762204875657e-06, + "loss": 2.7294, + "step": 245800 + }, + { + "epoch": 0.81, + "learning_rate": 9.281203117082686e-06, + "loss": 2.7232, + "step": 245900 + }, + { + "epoch": 0.81, + "learning_rate": 9.264644029289715e-06, + "loss": 2.7211, + "step": 246000 + }, + { + "epoch": 0.82, + "learning_rate": 9.248084941496743e-06, + "loss": 2.7481, + "step": 246100 + }, + { + "epoch": 0.82, + "learning_rate": 9.231525853703772e-06, + "loss": 2.7316, + "step": 246200 + }, + { + "epoch": 0.82, + "learning_rate": 9.2149667659108e-06, + "loss": 2.7335, + "step": 246300 + }, + { + "epoch": 0.82, + "learning_rate": 9.198407678117827e-06, + "loss": 2.7222, + "step": 246400 + }, + { + "epoch": 0.82, + "learning_rate": 9.181848590324858e-06, + "loss": 2.7335, + "step": 246500 + }, + { + "epoch": 0.82, + "learning_rate": 9.165289502531885e-06, + "loss": 2.7255, + "step": 246600 + }, + { + "epoch": 0.82, + "learning_rate": 9.148730414738913e-06, + "loss": 2.7286, + "step": 246700 + }, + { + "epoch": 0.82, + "learning_rate": 9.132171326945942e-06, + "loss": 2.724, + "step": 246800 + }, + { + "epoch": 0.82, + "learning_rate": 9.115612239152969e-06, + "loss": 2.7224, + "step": 246900 + }, + { + "epoch": 0.82, + "learning_rate": 9.099053151359999e-06, + "loss": 2.7295, + "step": 247000 + }, + { + "epoch": 0.82, + "learning_rate": 9.082494063567026e-06, + "loss": 2.7223, + "step": 247100 + }, + { + "epoch": 0.82, + "learning_rate": 9.065934975774056e-06, + "loss": 2.7345, + "step": 247200 + }, + { + "epoch": 0.82, + "learning_rate": 9.049375887981083e-06, + "loss": 2.7282, + "step": 247300 + }, + { + "epoch": 0.82, + "learning_rate": 9.032816800188112e-06, + "loss": 2.7213, + "step": 247400 + }, + { + "epoch": 0.82, + "learning_rate": 9.01625771239514e-06, + "loss": 2.7421, + "step": 247500 + }, + { + "epoch": 0.82, + "learning_rate": 8.999698624602167e-06, + "loss": 2.7305, + "step": 247600 + }, + { + "epoch": 0.82, + "learning_rate": 8.983139536809198e-06, + "loss": 2.7322, + "step": 247700 + }, + { + "epoch": 0.82, + "learning_rate": 8.966580449016224e-06, + "loss": 2.7298, + "step": 247800 + }, + { + "epoch": 0.82, + "learning_rate": 8.950021361223253e-06, + "loss": 2.7287, + "step": 247900 + }, + { + "epoch": 0.82, + "learning_rate": 8.933462273430282e-06, + "loss": 2.7343, + "step": 248000 + }, + { + "epoch": 0.82, + "learning_rate": 8.91690318563731e-06, + "loss": 2.7329, + "step": 248100 + }, + { + "epoch": 0.82, + "learning_rate": 8.900344097844339e-06, + "loss": 2.7402, + "step": 248200 + }, + { + "epoch": 0.82, + "learning_rate": 8.883785010051367e-06, + "loss": 2.7289, + "step": 248300 + }, + { + "epoch": 0.82, + "learning_rate": 8.867225922258394e-06, + "loss": 2.7376, + "step": 248400 + }, + { + "epoch": 0.82, + "learning_rate": 8.850666834465423e-06, + "loss": 2.7236, + "step": 248500 + }, + { + "epoch": 0.82, + "learning_rate": 8.834107746672452e-06, + "loss": 2.728, + "step": 248600 + }, + { + "epoch": 0.82, + "learning_rate": 8.81754865887948e-06, + "loss": 2.7391, + "step": 248700 + }, + { + "epoch": 0.82, + "learning_rate": 8.800989571086509e-06, + "loss": 2.7361, + "step": 248800 + }, + { + "epoch": 0.82, + "learning_rate": 8.784430483293536e-06, + "loss": 2.75, + "step": 248900 + }, + { + "epoch": 0.82, + "learning_rate": 8.767871395500566e-06, + "loss": 2.7213, + "step": 249000 + }, + { + "epoch": 0.82, + "learning_rate": 8.751312307707593e-06, + "loss": 2.7327, + "step": 249100 + }, + { + "epoch": 0.83, + "learning_rate": 8.734753219914622e-06, + "loss": 2.7299, + "step": 249200 + }, + { + "epoch": 0.83, + "learning_rate": 8.71819413212165e-06, + "loss": 2.722, + "step": 249300 + }, + { + "epoch": 0.83, + "learning_rate": 8.701635044328677e-06, + "loss": 2.7191, + "step": 249400 + }, + { + "epoch": 0.83, + "learning_rate": 8.685075956535707e-06, + "loss": 2.7378, + "step": 249500 + }, + { + "epoch": 0.83, + "learning_rate": 8.668516868742734e-06, + "loss": 2.7283, + "step": 249600 + }, + { + "epoch": 0.83, + "learning_rate": 8.651957780949765e-06, + "loss": 2.7348, + "step": 249700 + }, + { + "epoch": 0.83, + "learning_rate": 8.635398693156791e-06, + "loss": 2.7295, + "step": 249800 + }, + { + "epoch": 0.83, + "learning_rate": 8.61883960536382e-06, + "loss": 2.7287, + "step": 249900 + }, + { + "epoch": 0.83, + "learning_rate": 8.602280517570849e-06, + "loss": 2.7304, + "step": 250000 + }, + { + "epoch": 0.83, + "learning_rate": 8.585721429777877e-06, + "loss": 2.7209, + "step": 250100 + }, + { + "epoch": 0.83, + "learning_rate": 8.569162341984906e-06, + "loss": 2.7467, + "step": 250200 + }, + { + "epoch": 0.83, + "learning_rate": 8.552603254191933e-06, + "loss": 2.7398, + "step": 250300 + }, + { + "epoch": 0.83, + "learning_rate": 8.536044166398961e-06, + "loss": 2.7311, + "step": 250400 + }, + { + "epoch": 0.83, + "learning_rate": 8.51948507860599e-06, + "loss": 2.7132, + "step": 250500 + }, + { + "epoch": 0.83, + "learning_rate": 8.502925990813019e-06, + "loss": 2.727, + "step": 250600 + }, + { + "epoch": 0.83, + "learning_rate": 8.486366903020047e-06, + "loss": 2.7394, + "step": 250700 + }, + { + "epoch": 0.83, + "learning_rate": 8.469807815227076e-06, + "loss": 2.7202, + "step": 250800 + }, + { + "epoch": 0.83, + "learning_rate": 8.453248727434103e-06, + "loss": 2.7196, + "step": 250900 + }, + { + "epoch": 0.83, + "learning_rate": 8.436689639641131e-06, + "loss": 2.7282, + "step": 251000 + }, + { + "epoch": 0.83, + "learning_rate": 8.42013055184816e-06, + "loss": 2.725, + "step": 251100 + }, + { + "epoch": 0.83, + "learning_rate": 8.403571464055189e-06, + "loss": 2.7321, + "step": 251200 + }, + { + "epoch": 0.83, + "learning_rate": 8.387012376262217e-06, + "loss": 2.7328, + "step": 251300 + }, + { + "epoch": 0.83, + "learning_rate": 8.370453288469244e-06, + "loss": 2.7245, + "step": 251400 + }, + { + "epoch": 0.83, + "learning_rate": 8.353894200676274e-06, + "loss": 2.7153, + "step": 251500 + }, + { + "epoch": 0.83, + "learning_rate": 8.337335112883301e-06, + "loss": 2.7212, + "step": 251600 + }, + { + "epoch": 0.83, + "learning_rate": 8.320776025090332e-06, + "loss": 2.7287, + "step": 251700 + }, + { + "epoch": 0.83, + "learning_rate": 8.304216937297359e-06, + "loss": 2.7498, + "step": 251800 + }, + { + "epoch": 0.83, + "learning_rate": 8.287657849504387e-06, + "loss": 2.7277, + "step": 251900 + }, + { + "epoch": 0.83, + "learning_rate": 8.271098761711416e-06, + "loss": 2.7236, + "step": 252000 + }, + { + "epoch": 0.83, + "learning_rate": 8.254539673918443e-06, + "loss": 2.747, + "step": 252100 + }, + { + "epoch": 0.84, + "learning_rate": 8.237980586125473e-06, + "loss": 2.7302, + "step": 252200 + }, + { + "epoch": 0.84, + "learning_rate": 8.2214214983325e-06, + "loss": 2.7151, + "step": 252300 + }, + { + "epoch": 0.84, + "learning_rate": 8.204862410539528e-06, + "loss": 2.7216, + "step": 252400 + }, + { + "epoch": 0.84, + "learning_rate": 8.188303322746557e-06, + "loss": 2.7344, + "step": 252500 + }, + { + "epoch": 0.84, + "learning_rate": 8.171744234953586e-06, + "loss": 2.7268, + "step": 252600 + }, + { + "epoch": 0.84, + "learning_rate": 8.155185147160614e-06, + "loss": 2.7449, + "step": 252700 + }, + { + "epoch": 0.84, + "learning_rate": 8.138626059367641e-06, + "loss": 2.7301, + "step": 252800 + }, + { + "epoch": 0.84, + "learning_rate": 8.12206697157467e-06, + "loss": 2.7315, + "step": 252900 + }, + { + "epoch": 0.84, + "learning_rate": 8.105507883781698e-06, + "loss": 2.7146, + "step": 253000 + }, + { + "epoch": 0.84, + "learning_rate": 8.088948795988727e-06, + "loss": 2.7296, + "step": 253100 + }, + { + "epoch": 0.84, + "learning_rate": 8.072389708195756e-06, + "loss": 2.7314, + "step": 253200 + }, + { + "epoch": 0.84, + "learning_rate": 8.055830620402784e-06, + "loss": 2.7356, + "step": 253300 + }, + { + "epoch": 0.84, + "learning_rate": 8.039271532609811e-06, + "loss": 2.7349, + "step": 253400 + }, + { + "epoch": 0.84, + "learning_rate": 8.022712444816841e-06, + "loss": 2.7229, + "step": 253500 + }, + { + "epoch": 0.84, + "learning_rate": 8.006153357023868e-06, + "loss": 2.7295, + "step": 253600 + }, + { + "epoch": 0.84, + "learning_rate": 7.989594269230897e-06, + "loss": 2.7299, + "step": 253700 + }, + { + "epoch": 0.84, + "learning_rate": 7.973035181437926e-06, + "loss": 2.7343, + "step": 253800 + }, + { + "epoch": 0.84, + "learning_rate": 7.956476093644952e-06, + "loss": 2.72, + "step": 253900 + }, + { + "epoch": 0.84, + "learning_rate": 7.939917005851983e-06, + "loss": 2.7371, + "step": 254000 + }, + { + "epoch": 0.84, + "learning_rate": 7.92335791805901e-06, + "loss": 2.7391, + "step": 254100 + }, + { + "epoch": 0.84, + "learning_rate": 7.90679883026604e-06, + "loss": 2.7278, + "step": 254200 + }, + { + "epoch": 0.84, + "learning_rate": 7.890239742473067e-06, + "loss": 2.7396, + "step": 254300 + }, + { + "epoch": 0.84, + "learning_rate": 7.873680654680095e-06, + "loss": 2.7233, + "step": 254400 + }, + { + "epoch": 0.84, + "learning_rate": 7.857121566887124e-06, + "loss": 2.7259, + "step": 254500 + }, + { + "epoch": 0.84, + "learning_rate": 7.840562479094151e-06, + "loss": 2.7401, + "step": 254600 + }, + { + "epoch": 0.84, + "learning_rate": 7.824003391301181e-06, + "loss": 2.7365, + "step": 254700 + }, + { + "epoch": 0.84, + "learning_rate": 7.807444303508208e-06, + "loss": 2.7104, + "step": 254800 + }, + { + "epoch": 0.84, + "learning_rate": 7.790885215715237e-06, + "loss": 2.7307, + "step": 254900 + }, + { + "epoch": 0.84, + "learning_rate": 7.774326127922265e-06, + "loss": 2.7335, + "step": 255000 + }, + { + "epoch": 0.84, + "learning_rate": 7.757767040129294e-06, + "loss": 2.7234, + "step": 255100 + }, + { + "epoch": 0.85, + "learning_rate": 7.741207952336323e-06, + "loss": 2.7422, + "step": 255200 + }, + { + "epoch": 0.85, + "learning_rate": 7.724648864543351e-06, + "loss": 2.7418, + "step": 255300 + }, + { + "epoch": 0.85, + "learning_rate": 7.708089776750378e-06, + "loss": 2.7111, + "step": 255400 + }, + { + "epoch": 0.85, + "learning_rate": 7.691530688957407e-06, + "loss": 2.7152, + "step": 255500 + }, + { + "epoch": 0.85, + "learning_rate": 7.674971601164435e-06, + "loss": 2.7339, + "step": 255600 + }, + { + "epoch": 0.85, + "learning_rate": 7.658412513371464e-06, + "loss": 2.7347, + "step": 255700 + }, + { + "epoch": 0.85, + "learning_rate": 7.641853425578493e-06, + "loss": 2.7201, + "step": 255800 + }, + { + "epoch": 0.85, + "learning_rate": 7.6252943377855195e-06, + "loss": 2.7186, + "step": 255900 + }, + { + "epoch": 0.85, + "learning_rate": 7.608735249992549e-06, + "loss": 2.7346, + "step": 256000 + }, + { + "epoch": 0.85, + "learning_rate": 7.592176162199577e-06, + "loss": 2.7375, + "step": 256100 + }, + { + "epoch": 0.85, + "learning_rate": 7.575617074406606e-06, + "loss": 2.716, + "step": 256200 + }, + { + "epoch": 0.85, + "learning_rate": 7.559057986613634e-06, + "loss": 2.7346, + "step": 256300 + }, + { + "epoch": 0.85, + "learning_rate": 7.542498898820662e-06, + "loss": 2.7283, + "step": 256400 + }, + { + "epoch": 0.85, + "learning_rate": 7.525939811027691e-06, + "loss": 2.7511, + "step": 256500 + }, + { + "epoch": 0.85, + "learning_rate": 7.509380723234719e-06, + "loss": 2.7392, + "step": 256600 + }, + { + "epoch": 0.85, + "learning_rate": 7.4928216354417475e-06, + "loss": 2.7376, + "step": 256700 + }, + { + "epoch": 0.85, + "learning_rate": 7.476262547648775e-06, + "loss": 2.7439, + "step": 256800 + }, + { + "epoch": 0.85, + "learning_rate": 7.459703459855803e-06, + "loss": 2.728, + "step": 256900 + }, + { + "epoch": 0.85, + "learning_rate": 7.4431443720628324e-06, + "loss": 2.7225, + "step": 257000 + }, + { + "epoch": 0.85, + "learning_rate": 7.42658528426986e-06, + "loss": 2.7322, + "step": 257100 + }, + { + "epoch": 0.85, + "learning_rate": 7.41002619647689e-06, + "loss": 2.7183, + "step": 257200 + }, + { + "epoch": 0.85, + "learning_rate": 7.393467108683917e-06, + "loss": 2.7408, + "step": 257300 + }, + { + "epoch": 0.85, + "learning_rate": 7.376908020890945e-06, + "loss": 2.7228, + "step": 257400 + }, + { + "epoch": 0.85, + "learning_rate": 7.360348933097974e-06, + "loss": 2.7164, + "step": 257500 + }, + { + "epoch": 0.85, + "learning_rate": 7.3437898453050015e-06, + "loss": 2.7168, + "step": 257600 + }, + { + "epoch": 0.85, + "learning_rate": 7.327230757512031e-06, + "loss": 2.7279, + "step": 257700 + }, + { + "epoch": 0.85, + "learning_rate": 7.310671669719059e-06, + "loss": 2.7405, + "step": 257800 + }, + { + "epoch": 0.85, + "learning_rate": 7.2941125819260865e-06, + "loss": 2.7341, + "step": 257900 + }, + { + "epoch": 0.85, + "learning_rate": 7.277553494133116e-06, + "loss": 2.7095, + "step": 258000 + }, + { + "epoch": 0.85, + "learning_rate": 7.260994406340144e-06, + "loss": 2.7196, + "step": 258100 + }, + { + "epoch": 0.86, + "learning_rate": 7.244435318547173e-06, + "loss": 2.7109, + "step": 258200 + }, + { + "epoch": 0.86, + "learning_rate": 7.227876230754201e-06, + "loss": 2.7292, + "step": 258300 + }, + { + "epoch": 0.86, + "learning_rate": 7.211317142961229e-06, + "loss": 2.7126, + "step": 258400 + }, + { + "epoch": 0.86, + "learning_rate": 7.194758055168257e-06, + "loss": 2.7343, + "step": 258500 + }, + { + "epoch": 0.86, + "learning_rate": 7.178198967375285e-06, + "loss": 2.7322, + "step": 258600 + }, + { + "epoch": 0.86, + "learning_rate": 7.1616398795823145e-06, + "loss": 2.732, + "step": 258700 + }, + { + "epoch": 0.86, + "learning_rate": 7.145080791789342e-06, + "loss": 2.7219, + "step": 258800 + }, + { + "epoch": 0.86, + "learning_rate": 7.12852170399637e-06, + "loss": 2.7284, + "step": 258900 + }, + { + "epoch": 0.86, + "learning_rate": 7.1119626162033995e-06, + "loss": 2.7177, + "step": 259000 + }, + { + "epoch": 0.86, + "learning_rate": 7.095403528410427e-06, + "loss": 2.7243, + "step": 259100 + }, + { + "epoch": 0.86, + "learning_rate": 7.078844440617456e-06, + "loss": 2.7318, + "step": 259200 + }, + { + "epoch": 0.86, + "learning_rate": 7.062285352824484e-06, + "loss": 2.7144, + "step": 259300 + }, + { + "epoch": 0.86, + "learning_rate": 7.045726265031511e-06, + "loss": 2.7396, + "step": 259400 + }, + { + "epoch": 0.86, + "learning_rate": 7.029167177238541e-06, + "loss": 2.7345, + "step": 259500 + }, + { + "epoch": 0.86, + "learning_rate": 7.0126080894455686e-06, + "loss": 2.7433, + "step": 259600 + }, + { + "epoch": 0.86, + "learning_rate": 6.996049001652598e-06, + "loss": 2.7284, + "step": 259700 + }, + { + "epoch": 0.86, + "learning_rate": 6.979489913859626e-06, + "loss": 2.7347, + "step": 259800 + }, + { + "epoch": 0.86, + "learning_rate": 6.9629308260666535e-06, + "loss": 2.7212, + "step": 259900 + }, + { + "epoch": 0.86, + "learning_rate": 6.946371738273683e-06, + "loss": 2.7116, + "step": 260000 + }, + { + "epoch": 0.86, + "learning_rate": 6.929812650480711e-06, + "loss": 2.7319, + "step": 260100 + }, + { + "epoch": 0.86, + "learning_rate": 6.913253562687739e-06, + "loss": 2.7408, + "step": 260200 + }, + { + "epoch": 0.86, + "learning_rate": 6.896694474894767e-06, + "loss": 2.7192, + "step": 260300 + }, + { + "epoch": 0.86, + "learning_rate": 6.880135387101795e-06, + "loss": 2.7284, + "step": 260400 + }, + { + "epoch": 0.86, + "learning_rate": 6.863576299308824e-06, + "loss": 2.7441, + "step": 260500 + }, + { + "epoch": 0.86, + "learning_rate": 6.847017211515852e-06, + "loss": 2.7253, + "step": 260600 + }, + { + "epoch": 0.86, + "learning_rate": 6.8304581237228815e-06, + "loss": 2.7369, + "step": 260700 + }, + { + "epoch": 0.86, + "learning_rate": 6.813899035929909e-06, + "loss": 2.7321, + "step": 260800 + }, + { + "epoch": 0.86, + "learning_rate": 6.797339948136937e-06, + "loss": 2.728, + "step": 260900 + }, + { + "epoch": 0.86, + "learning_rate": 6.780780860343966e-06, + "loss": 2.7196, + "step": 261000 + }, + { + "epoch": 0.86, + "learning_rate": 6.764221772550993e-06, + "loss": 2.7382, + "step": 261100 + }, + { + "epoch": 0.87, + "learning_rate": 6.747662684758023e-06, + "loss": 2.7289, + "step": 261200 + }, + { + "epoch": 0.87, + "learning_rate": 6.731103596965051e-06, + "loss": 2.7446, + "step": 261300 + }, + { + "epoch": 0.87, + "learning_rate": 6.714544509172078e-06, + "loss": 2.7189, + "step": 261400 + }, + { + "epoch": 0.87, + "learning_rate": 6.697985421379108e-06, + "loss": 2.7191, + "step": 261500 + }, + { + "epoch": 0.87, + "learning_rate": 6.681426333586136e-06, + "loss": 2.7291, + "step": 261600 + }, + { + "epoch": 0.87, + "learning_rate": 6.664867245793165e-06, + "loss": 2.7239, + "step": 261700 + }, + { + "epoch": 0.87, + "learning_rate": 6.648308158000193e-06, + "loss": 2.7359, + "step": 261800 + }, + { + "epoch": 0.87, + "learning_rate": 6.6317490702072206e-06, + "loss": 2.7416, + "step": 261900 + }, + { + "epoch": 0.87, + "learning_rate": 6.615189982414249e-06, + "loss": 2.7367, + "step": 262000 + }, + { + "epoch": 0.87, + "learning_rate": 6.598630894621277e-06, + "loss": 2.7358, + "step": 262100 + }, + { + "epoch": 0.87, + "learning_rate": 6.582071806828306e-06, + "loss": 2.7254, + "step": 262200 + }, + { + "epoch": 0.87, + "learning_rate": 6.565512719035334e-06, + "loss": 2.7489, + "step": 262300 + }, + { + "epoch": 0.87, + "learning_rate": 6.548953631242362e-06, + "loss": 2.7352, + "step": 262400 + }, + { + "epoch": 0.87, + "learning_rate": 6.532394543449391e-06, + "loss": 2.7318, + "step": 262500 + }, + { + "epoch": 0.87, + "learning_rate": 6.515835455656419e-06, + "loss": 2.7296, + "step": 262600 + }, + { + "epoch": 0.87, + "learning_rate": 6.499276367863448e-06, + "loss": 2.737, + "step": 262700 + }, + { + "epoch": 0.87, + "learning_rate": 6.4827172800704755e-06, + "loss": 2.732, + "step": 262800 + }, + { + "epoch": 0.87, + "learning_rate": 6.466158192277503e-06, + "loss": 2.7358, + "step": 262900 + }, + { + "epoch": 0.87, + "learning_rate": 6.449599104484533e-06, + "loss": 2.7234, + "step": 263000 + }, + { + "epoch": 0.87, + "learning_rate": 6.4330400166915604e-06, + "loss": 2.7303, + "step": 263100 + }, + { + "epoch": 0.87, + "learning_rate": 6.41648092889859e-06, + "loss": 2.7186, + "step": 263200 + }, + { + "epoch": 0.87, + "learning_rate": 6.399921841105618e-06, + "loss": 2.7135, + "step": 263300 + }, + { + "epoch": 0.87, + "learning_rate": 6.383362753312645e-06, + "loss": 2.7306, + "step": 263400 + }, + { + "epoch": 0.87, + "learning_rate": 6.366803665519675e-06, + "loss": 2.7262, + "step": 263500 + }, + { + "epoch": 0.87, + "learning_rate": 6.350244577726703e-06, + "loss": 2.7382, + "step": 263600 + }, + { + "epoch": 0.87, + "learning_rate": 6.333685489933731e-06, + "loss": 2.7274, + "step": 263700 + }, + { + "epoch": 0.87, + "learning_rate": 6.317126402140759e-06, + "loss": 2.7158, + "step": 263800 + }, + { + "epoch": 0.87, + "learning_rate": 6.300567314347787e-06, + "loss": 2.7248, + "step": 263900 + }, + { + "epoch": 0.87, + "learning_rate": 6.284008226554816e-06, + "loss": 2.7407, + "step": 264000 + }, + { + "epoch": 0.87, + "learning_rate": 6.267449138761844e-06, + "loss": 2.7307, + "step": 264100 + }, + { + "epoch": 0.87, + "learning_rate": 6.250890050968873e-06, + "loss": 2.729, + "step": 264200 + }, + { + "epoch": 0.88, + "learning_rate": 6.234330963175901e-06, + "loss": 2.7309, + "step": 264300 + }, + { + "epoch": 0.88, + "learning_rate": 6.21777187538293e-06, + "loss": 2.73, + "step": 264400 + }, + { + "epoch": 0.88, + "learning_rate": 6.2012127875899575e-06, + "loss": 2.7356, + "step": 264500 + }, + { + "epoch": 0.88, + "learning_rate": 6.184653699796985e-06, + "loss": 2.7466, + "step": 264600 + }, + { + "epoch": 0.88, + "learning_rate": 6.168094612004014e-06, + "loss": 2.7231, + "step": 264700 + }, + { + "epoch": 0.88, + "learning_rate": 6.1515355242110425e-06, + "loss": 2.7323, + "step": 264800 + }, + { + "epoch": 0.88, + "learning_rate": 6.134976436418071e-06, + "loss": 2.7242, + "step": 264900 + }, + { + "epoch": 0.88, + "learning_rate": 6.1184173486251e-06, + "loss": 2.6975, + "step": 265000 + }, + { + "epoch": 0.88, + "learning_rate": 6.1018582608321275e-06, + "loss": 2.7308, + "step": 265100 + }, + { + "epoch": 0.88, + "learning_rate": 6.085299173039156e-06, + "loss": 2.7255, + "step": 265200 + }, + { + "epoch": 0.88, + "learning_rate": 6.068740085246185e-06, + "loss": 2.739, + "step": 265300 + }, + { + "epoch": 0.88, + "learning_rate": 6.0521809974532124e-06, + "loss": 2.7254, + "step": 265400 + }, + { + "epoch": 0.88, + "learning_rate": 6.035621909660241e-06, + "loss": 2.7237, + "step": 265500 + }, + { + "epoch": 0.88, + "learning_rate": 6.019062821867269e-06, + "loss": 2.735, + "step": 265600 + }, + { + "epoch": 0.88, + "learning_rate": 6.002503734074297e-06, + "loss": 2.727, + "step": 265700 + }, + { + "epoch": 0.88, + "learning_rate": 5.985944646281326e-06, + "loss": 2.7353, + "step": 265800 + }, + { + "epoch": 0.88, + "learning_rate": 5.969385558488355e-06, + "loss": 2.711, + "step": 265900 + }, + { + "epoch": 0.88, + "learning_rate": 5.952826470695383e-06, + "loss": 2.7353, + "step": 266000 + }, + { + "epoch": 0.88, + "learning_rate": 5.936267382902411e-06, + "loss": 2.7081, + "step": 266100 + }, + { + "epoch": 0.88, + "learning_rate": 5.91970829510944e-06, + "loss": 2.7261, + "step": 266200 + }, + { + "epoch": 0.88, + "learning_rate": 5.903149207316467e-06, + "loss": 2.7307, + "step": 266300 + }, + { + "epoch": 0.88, + "learning_rate": 5.886590119523496e-06, + "loss": 2.7225, + "step": 266400 + }, + { + "epoch": 0.88, + "learning_rate": 5.8700310317305246e-06, + "loss": 2.7391, + "step": 266500 + }, + { + "epoch": 0.88, + "learning_rate": 5.853471943937552e-06, + "loss": 2.7216, + "step": 266600 + }, + { + "epoch": 0.88, + "learning_rate": 5.836912856144581e-06, + "loss": 2.7253, + "step": 266700 + }, + { + "epoch": 0.88, + "learning_rate": 5.8203537683516095e-06, + "loss": 2.7175, + "step": 266800 + }, + { + "epoch": 0.88, + "learning_rate": 5.803794680558638e-06, + "loss": 2.7278, + "step": 266900 + }, + { + "epoch": 0.88, + "learning_rate": 5.787235592765666e-06, + "loss": 2.7356, + "step": 267000 + }, + { + "epoch": 0.88, + "learning_rate": 5.7706765049726945e-06, + "loss": 2.7228, + "step": 267100 + }, + { + "epoch": 0.88, + "learning_rate": 5.754117417179722e-06, + "loss": 2.7192, + "step": 267200 + }, + { + "epoch": 0.89, + "learning_rate": 5.737558329386751e-06, + "loss": 2.7422, + "step": 267300 + }, + { + "epoch": 0.89, + "learning_rate": 5.7209992415937795e-06, + "loss": 2.7233, + "step": 267400 + }, + { + "epoch": 0.89, + "learning_rate": 5.704440153800808e-06, + "loss": 2.7247, + "step": 267500 + }, + { + "epoch": 0.89, + "learning_rate": 5.687881066007836e-06, + "loss": 2.7277, + "step": 267600 + }, + { + "epoch": 0.89, + "learning_rate": 5.6713219782148644e-06, + "loss": 2.7189, + "step": 267700 + }, + { + "epoch": 0.89, + "learning_rate": 5.654762890421893e-06, + "loss": 2.7251, + "step": 267800 + }, + { + "epoch": 0.89, + "learning_rate": 5.638203802628921e-06, + "loss": 2.718, + "step": 267900 + }, + { + "epoch": 0.89, + "learning_rate": 5.621644714835949e-06, + "loss": 2.7373, + "step": 268000 + }, + { + "epoch": 0.89, + "learning_rate": 5.605085627042977e-06, + "loss": 2.7404, + "step": 268100 + }, + { + "epoch": 0.89, + "learning_rate": 5.588526539250006e-06, + "loss": 2.7182, + "step": 268200 + }, + { + "epoch": 0.89, + "learning_rate": 5.571967451457034e-06, + "loss": 2.7222, + "step": 268300 + }, + { + "epoch": 0.89, + "learning_rate": 5.555408363664063e-06, + "loss": 2.7348, + "step": 268400 + }, + { + "epoch": 0.89, + "learning_rate": 5.538849275871092e-06, + "loss": 2.7245, + "step": 268500 + }, + { + "epoch": 0.89, + "learning_rate": 5.522290188078119e-06, + "loss": 2.7349, + "step": 268600 + }, + { + "epoch": 0.89, + "learning_rate": 5.505731100285148e-06, + "loss": 2.7358, + "step": 268700 + }, + { + "epoch": 0.89, + "learning_rate": 5.489172012492176e-06, + "loss": 2.7235, + "step": 268800 + }, + { + "epoch": 0.89, + "learning_rate": 5.472612924699204e-06, + "loss": 2.7184, + "step": 268900 + }, + { + "epoch": 0.89, + "learning_rate": 5.456053836906233e-06, + "loss": 2.7291, + "step": 269000 + }, + { + "epoch": 0.89, + "learning_rate": 5.439494749113261e-06, + "loss": 2.7339, + "step": 269100 + }, + { + "epoch": 0.89, + "learning_rate": 5.422935661320289e-06, + "loss": 2.7224, + "step": 269200 + }, + { + "epoch": 0.89, + "learning_rate": 5.406376573527318e-06, + "loss": 2.7226, + "step": 269300 + }, + { + "epoch": 0.89, + "learning_rate": 5.3898174857343465e-06, + "loss": 2.728, + "step": 269400 + }, + { + "epoch": 0.89, + "learning_rate": 5.373258397941375e-06, + "loss": 2.7079, + "step": 269500 + }, + { + "epoch": 0.89, + "learning_rate": 5.356699310148403e-06, + "loss": 2.7412, + "step": 269600 + }, + { + "epoch": 0.89, + "learning_rate": 5.340140222355431e-06, + "loss": 2.7284, + "step": 269700 + }, + { + "epoch": 0.89, + "learning_rate": 5.323581134562459e-06, + "loss": 2.7307, + "step": 269800 + }, + { + "epoch": 0.89, + "learning_rate": 5.307022046769488e-06, + "loss": 2.7229, + "step": 269900 + }, + { + "epoch": 0.89, + "learning_rate": 5.2904629589765164e-06, + "loss": 2.7277, + "step": 270000 + }, + { + "epoch": 0.89, + "learning_rate": 5.273903871183544e-06, + "loss": 2.7349, + "step": 270100 + }, + { + "epoch": 0.89, + "learning_rate": 5.257344783390573e-06, + "loss": 2.7279, + "step": 270200 + }, + { + "epoch": 0.9, + "learning_rate": 5.240785695597601e-06, + "loss": 2.7321, + "step": 270300 + }, + { + "epoch": 0.9, + "learning_rate": 5.22422660780463e-06, + "loss": 2.7224, + "step": 270400 + }, + { + "epoch": 0.9, + "learning_rate": 5.207667520011658e-06, + "loss": 2.7141, + "step": 270500 + }, + { + "epoch": 0.9, + "learning_rate": 5.1911084322186855e-06, + "loss": 2.732, + "step": 270600 + }, + { + "epoch": 0.9, + "learning_rate": 5.174549344425714e-06, + "loss": 2.7396, + "step": 270700 + }, + { + "epoch": 0.9, + "learning_rate": 5.157990256632743e-06, + "loss": 2.731, + "step": 270800 + }, + { + "epoch": 0.9, + "learning_rate": 5.141431168839771e-06, + "loss": 2.7335, + "step": 270900 + }, + { + "epoch": 0.9, + "learning_rate": 5.1248720810468e-06, + "loss": 2.7279, + "step": 271000 + }, + { + "epoch": 0.9, + "learning_rate": 5.108312993253828e-06, + "loss": 2.7192, + "step": 271100 + }, + { + "epoch": 0.9, + "learning_rate": 5.091753905460856e-06, + "loss": 2.7251, + "step": 271200 + }, + { + "epoch": 0.9, + "learning_rate": 5.075194817667885e-06, + "loss": 2.7374, + "step": 271300 + }, + { + "epoch": 0.9, + "learning_rate": 5.058635729874913e-06, + "loss": 2.7258, + "step": 271400 + }, + { + "epoch": 0.9, + "learning_rate": 5.042076642081941e-06, + "loss": 2.7273, + "step": 271500 + }, + { + "epoch": 0.9, + "learning_rate": 5.025517554288969e-06, + "loss": 2.7369, + "step": 271600 + }, + { + "epoch": 0.9, + "learning_rate": 5.008958466495998e-06, + "loss": 2.7309, + "step": 271700 + }, + { + "epoch": 0.9, + "learning_rate": 4.992399378703026e-06, + "loss": 2.7458, + "step": 271800 + }, + { + "epoch": 0.9, + "learning_rate": 4.975840290910055e-06, + "loss": 2.735, + "step": 271900 + }, + { + "epoch": 0.9, + "learning_rate": 4.9592812031170835e-06, + "loss": 2.7119, + "step": 272000 + }, + { + "epoch": 0.9, + "learning_rate": 4.942722115324111e-06, + "loss": 2.7245, + "step": 272100 + }, + { + "epoch": 0.9, + "learning_rate": 4.92616302753114e-06, + "loss": 2.7267, + "step": 272200 + }, + { + "epoch": 0.9, + "learning_rate": 4.909603939738168e-06, + "loss": 2.7273, + "step": 272300 + }, + { + "epoch": 0.9, + "learning_rate": 4.893044851945196e-06, + "loss": 2.73, + "step": 272400 + }, + { + "epoch": 0.9, + "learning_rate": 4.876485764152225e-06, + "loss": 2.7165, + "step": 272500 + }, + { + "epoch": 0.9, + "learning_rate": 4.8599266763592525e-06, + "loss": 2.7229, + "step": 272600 + }, + { + "epoch": 0.9, + "learning_rate": 4.843367588566281e-06, + "loss": 2.7099, + "step": 272700 + }, + { + "epoch": 0.9, + "learning_rate": 4.82680850077331e-06, + "loss": 2.7166, + "step": 272800 + }, + { + "epoch": 0.9, + "learning_rate": 4.810249412980338e-06, + "loss": 2.7251, + "step": 272900 + }, + { + "epoch": 0.9, + "learning_rate": 4.793690325187367e-06, + "loss": 2.7302, + "step": 273000 + }, + { + "epoch": 0.9, + "learning_rate": 4.777131237394395e-06, + "loss": 2.7249, + "step": 273100 + }, + { + "epoch": 0.9, + "learning_rate": 4.7605721496014225e-06, + "loss": 2.7428, + "step": 273200 + }, + { + "epoch": 0.91, + "learning_rate": 4.744013061808451e-06, + "loss": 2.7258, + "step": 273300 + }, + { + "epoch": 0.91, + "learning_rate": 4.72745397401548e-06, + "loss": 2.7171, + "step": 273400 + }, + { + "epoch": 0.91, + "learning_rate": 4.710894886222508e-06, + "loss": 2.7237, + "step": 273500 + }, + { + "epoch": 0.91, + "learning_rate": 4.694335798429536e-06, + "loss": 2.7271, + "step": 273600 + }, + { + "epoch": 0.91, + "learning_rate": 4.677776710636565e-06, + "loss": 2.7188, + "step": 273700 + }, + { + "epoch": 0.91, + "learning_rate": 4.661217622843593e-06, + "loss": 2.7193, + "step": 273800 + }, + { + "epoch": 0.91, + "learning_rate": 4.644658535050622e-06, + "loss": 2.7333, + "step": 273900 + }, + { + "epoch": 0.91, + "learning_rate": 4.62809944725765e-06, + "loss": 2.7178, + "step": 274000 + }, + { + "epoch": 0.91, + "learning_rate": 4.611540359464677e-06, + "loss": 2.7317, + "step": 274100 + }, + { + "epoch": 0.91, + "learning_rate": 4.594981271671706e-06, + "loss": 2.713, + "step": 274200 + }, + { + "epoch": 0.91, + "learning_rate": 4.578422183878735e-06, + "loss": 2.7263, + "step": 274300 + }, + { + "epoch": 0.91, + "learning_rate": 4.561863096085763e-06, + "loss": 2.7143, + "step": 274400 + }, + { + "epoch": 0.91, + "learning_rate": 4.545304008292792e-06, + "loss": 2.7164, + "step": 274500 + }, + { + "epoch": 0.91, + "learning_rate": 4.52874492049982e-06, + "loss": 2.7314, + "step": 274600 + }, + { + "epoch": 0.91, + "learning_rate": 4.512185832706848e-06, + "loss": 2.7256, + "step": 274700 + }, + { + "epoch": 0.91, + "learning_rate": 4.495626744913877e-06, + "loss": 2.7266, + "step": 274800 + }, + { + "epoch": 0.91, + "learning_rate": 4.4790676571209045e-06, + "loss": 2.7285, + "step": 274900 + }, + { + "epoch": 0.91, + "learning_rate": 4.462508569327933e-06, + "loss": 2.7193, + "step": 275000 + }, + { + "epoch": 0.91, + "learning_rate": 4.445949481534961e-06, + "loss": 2.7338, + "step": 275100 + }, + { + "epoch": 0.91, + "learning_rate": 4.4293903937419895e-06, + "loss": 2.7124, + "step": 275200 + }, + { + "epoch": 0.91, + "learning_rate": 4.412831305949018e-06, + "loss": 2.7237, + "step": 275300 + }, + { + "epoch": 0.91, + "learning_rate": 4.396272218156047e-06, + "loss": 2.7338, + "step": 275400 + }, + { + "epoch": 0.91, + "learning_rate": 4.379713130363075e-06, + "loss": 2.7274, + "step": 275500 + }, + { + "epoch": 0.91, + "learning_rate": 4.363154042570103e-06, + "loss": 2.7306, + "step": 275600 + }, + { + "epoch": 0.91, + "learning_rate": 4.346594954777132e-06, + "loss": 2.7527, + "step": 275700 + }, + { + "epoch": 0.91, + "learning_rate": 4.3300358669841595e-06, + "loss": 2.7208, + "step": 275800 + }, + { + "epoch": 0.91, + "learning_rate": 4.313476779191188e-06, + "loss": 2.7245, + "step": 275900 + }, + { + "epoch": 0.91, + "learning_rate": 4.296917691398217e-06, + "loss": 2.7296, + "step": 276000 + }, + { + "epoch": 0.91, + "learning_rate": 4.2803586036052444e-06, + "loss": 2.7225, + "step": 276100 + }, + { + "epoch": 0.91, + "learning_rate": 4.263799515812273e-06, + "loss": 2.735, + "step": 276200 + }, + { + "epoch": 0.92, + "learning_rate": 4.247240428019302e-06, + "loss": 2.7253, + "step": 276300 + }, + { + "epoch": 0.92, + "learning_rate": 4.23068134022633e-06, + "loss": 2.7186, + "step": 276400 + }, + { + "epoch": 0.92, + "learning_rate": 4.214122252433359e-06, + "loss": 2.719, + "step": 276500 + }, + { + "epoch": 0.92, + "learning_rate": 4.197563164640387e-06, + "loss": 2.7098, + "step": 276600 + }, + { + "epoch": 0.92, + "learning_rate": 4.181004076847414e-06, + "loss": 2.7251, + "step": 276700 + }, + { + "epoch": 0.92, + "learning_rate": 4.164444989054443e-06, + "loss": 2.7436, + "step": 276800 + }, + { + "epoch": 0.92, + "learning_rate": 4.1478859012614716e-06, + "loss": 2.7442, + "step": 276900 + }, + { + "epoch": 0.92, + "learning_rate": 4.1313268134685e-06, + "loss": 2.7355, + "step": 277000 + }, + { + "epoch": 0.92, + "learning_rate": 4.114767725675528e-06, + "loss": 2.728, + "step": 277100 + }, + { + "epoch": 0.92, + "learning_rate": 4.0982086378825565e-06, + "loss": 2.7312, + "step": 277200 + }, + { + "epoch": 0.92, + "learning_rate": 4.081649550089585e-06, + "loss": 2.7312, + "step": 277300 + }, + { + "epoch": 0.92, + "learning_rate": 4.065090462296614e-06, + "loss": 2.7253, + "step": 277400 + }, + { + "epoch": 0.92, + "learning_rate": 4.0485313745036415e-06, + "loss": 2.7228, + "step": 277500 + }, + { + "epoch": 0.92, + "learning_rate": 4.031972286710669e-06, + "loss": 2.7449, + "step": 277600 + }, + { + "epoch": 0.92, + "learning_rate": 4.015413198917698e-06, + "loss": 2.7197, + "step": 277700 + }, + { + "epoch": 0.92, + "learning_rate": 3.9988541111247265e-06, + "loss": 2.7268, + "step": 277800 + }, + { + "epoch": 0.92, + "learning_rate": 3.982295023331755e-06, + "loss": 2.7166, + "step": 277900 + }, + { + "epoch": 0.92, + "learning_rate": 3.965735935538784e-06, + "loss": 2.7307, + "step": 278000 + }, + { + "epoch": 0.92, + "learning_rate": 3.9491768477458115e-06, + "loss": 2.7178, + "step": 278100 + }, + { + "epoch": 0.92, + "learning_rate": 3.93261775995284e-06, + "loss": 2.7128, + "step": 278200 + }, + { + "epoch": 0.92, + "learning_rate": 3.916058672159869e-06, + "loss": 2.7134, + "step": 278300 + }, + { + "epoch": 0.92, + "learning_rate": 3.899499584366896e-06, + "loss": 2.7318, + "step": 278400 + }, + { + "epoch": 0.92, + "learning_rate": 3.882940496573925e-06, + "loss": 2.7211, + "step": 278500 + }, + { + "epoch": 0.92, + "learning_rate": 3.866381408780953e-06, + "loss": 2.7183, + "step": 278600 + }, + { + "epoch": 0.92, + "learning_rate": 3.849822320987981e-06, + "loss": 2.7227, + "step": 278700 + }, + { + "epoch": 0.92, + "learning_rate": 3.83326323319501e-06, + "loss": 2.7284, + "step": 278800 + }, + { + "epoch": 0.92, + "learning_rate": 3.816704145402039e-06, + "loss": 2.7242, + "step": 278900 + }, + { + "epoch": 0.92, + "learning_rate": 3.8001450576090668e-06, + "loss": 2.7107, + "step": 279000 + }, + { + "epoch": 0.92, + "learning_rate": 3.7835859698160945e-06, + "loss": 2.7204, + "step": 279100 + }, + { + "epoch": 0.92, + "learning_rate": 3.767026882023123e-06, + "loss": 2.7288, + "step": 279200 + }, + { + "epoch": 0.92, + "learning_rate": 3.7504677942301518e-06, + "loss": 2.7214, + "step": 279300 + }, + { + "epoch": 0.93, + "learning_rate": 3.73390870643718e-06, + "loss": 2.7142, + "step": 279400 + }, + { + "epoch": 0.93, + "learning_rate": 3.7173496186442085e-06, + "loss": 2.7304, + "step": 279500 + }, + { + "epoch": 0.93, + "learning_rate": 3.7007905308512363e-06, + "loss": 2.7156, + "step": 279600 + }, + { + "epoch": 0.93, + "learning_rate": 3.684231443058265e-06, + "loss": 2.7212, + "step": 279700 + }, + { + "epoch": 0.93, + "learning_rate": 3.6676723552652935e-06, + "loss": 2.7351, + "step": 279800 + }, + { + "epoch": 0.93, + "learning_rate": 3.6511132674723217e-06, + "loss": 2.7355, + "step": 279900 + }, + { + "epoch": 0.93, + "learning_rate": 3.6345541796793503e-06, + "loss": 2.7245, + "step": 280000 + }, + { + "epoch": 0.93, + "learning_rate": 3.617995091886378e-06, + "loss": 2.7301, + "step": 280100 + }, + { + "epoch": 0.93, + "learning_rate": 3.6014360040934067e-06, + "loss": 2.7221, + "step": 280200 + }, + { + "epoch": 0.93, + "learning_rate": 3.584876916300435e-06, + "loss": 2.7412, + "step": 280300 + }, + { + "epoch": 0.93, + "learning_rate": 3.5683178285074634e-06, + "loss": 2.7349, + "step": 280400 + }, + { + "epoch": 0.93, + "learning_rate": 3.551758740714492e-06, + "loss": 2.7274, + "step": 280500 + }, + { + "epoch": 0.93, + "learning_rate": 3.53519965292152e-06, + "loss": 2.7227, + "step": 280600 + }, + { + "epoch": 0.93, + "learning_rate": 3.5186405651285484e-06, + "loss": 2.7299, + "step": 280700 + }, + { + "epoch": 0.93, + "learning_rate": 3.5020814773355766e-06, + "loss": 2.7289, + "step": 280800 + }, + { + "epoch": 0.93, + "learning_rate": 3.485522389542605e-06, + "loss": 2.7145, + "step": 280900 + }, + { + "epoch": 0.93, + "learning_rate": 3.468963301749634e-06, + "loss": 2.7389, + "step": 281000 + }, + { + "epoch": 0.93, + "learning_rate": 3.4524042139566616e-06, + "loss": 2.7196, + "step": 281100 + }, + { + "epoch": 0.93, + "learning_rate": 3.4358451261636898e-06, + "loss": 2.7207, + "step": 281200 + }, + { + "epoch": 0.93, + "learning_rate": 3.4192860383707184e-06, + "loss": 2.7045, + "step": 281300 + }, + { + "epoch": 0.93, + "learning_rate": 3.402726950577747e-06, + "loss": 2.7072, + "step": 281400 + }, + { + "epoch": 0.93, + "learning_rate": 3.3861678627847756e-06, + "loss": 2.7164, + "step": 281500 + }, + { + "epoch": 0.93, + "learning_rate": 3.3696087749918033e-06, + "loss": 2.7163, + "step": 281600 + }, + { + "epoch": 0.93, + "learning_rate": 3.3530496871988315e-06, + "loss": 2.7205, + "step": 281700 + }, + { + "epoch": 0.93, + "learning_rate": 3.33649059940586e-06, + "loss": 2.7341, + "step": 281800 + }, + { + "epoch": 0.93, + "learning_rate": 3.3199315116128887e-06, + "loss": 2.714, + "step": 281900 + }, + { + "epoch": 0.93, + "learning_rate": 3.303372423819917e-06, + "loss": 2.7277, + "step": 282000 + }, + { + "epoch": 0.93, + "learning_rate": 3.2868133360269447e-06, + "loss": 2.7128, + "step": 282100 + }, + { + "epoch": 0.93, + "learning_rate": 3.2702542482339733e-06, + "loss": 2.7217, + "step": 282200 + }, + { + "epoch": 0.93, + "learning_rate": 3.253695160441002e-06, + "loss": 2.7212, + "step": 282300 + }, + { + "epoch": 0.94, + "learning_rate": 3.2371360726480305e-06, + "loss": 2.7114, + "step": 282400 + }, + { + "epoch": 0.94, + "learning_rate": 3.2205769848550587e-06, + "loss": 2.7221, + "step": 282500 + }, + { + "epoch": 0.94, + "learning_rate": 3.2040178970620864e-06, + "loss": 2.7163, + "step": 282600 + }, + { + "epoch": 0.94, + "learning_rate": 3.187458809269115e-06, + "loss": 2.731, + "step": 282700 + }, + { + "epoch": 0.94, + "learning_rate": 3.1708997214761436e-06, + "loss": 2.7097, + "step": 282800 + }, + { + "epoch": 0.94, + "learning_rate": 3.154340633683172e-06, + "loss": 2.7065, + "step": 282900 + }, + { + "epoch": 0.94, + "learning_rate": 3.1377815458902004e-06, + "loss": 2.7314, + "step": 283000 + }, + { + "epoch": 0.94, + "learning_rate": 3.1212224580972286e-06, + "loss": 2.718, + "step": 283100 + }, + { + "epoch": 0.94, + "learning_rate": 3.1046633703042568e-06, + "loss": 2.716, + "step": 283200 + }, + { + "epoch": 0.94, + "learning_rate": 3.0881042825112854e-06, + "loss": 2.7145, + "step": 283300 + }, + { + "epoch": 0.94, + "learning_rate": 3.0715451947183136e-06, + "loss": 2.7253, + "step": 283400 + }, + { + "epoch": 0.94, + "learning_rate": 3.0549861069253417e-06, + "loss": 2.7139, + "step": 283500 + }, + { + "epoch": 0.94, + "learning_rate": 3.0384270191323704e-06, + "loss": 2.7446, + "step": 283600 + }, + { + "epoch": 0.94, + "learning_rate": 3.0218679313393985e-06, + "loss": 2.7274, + "step": 283700 + }, + { + "epoch": 0.94, + "learning_rate": 3.0053088435464267e-06, + "loss": 2.7205, + "step": 283800 + }, + { + "epoch": 0.94, + "learning_rate": 2.9887497557534553e-06, + "loss": 2.7416, + "step": 283900 + }, + { + "epoch": 0.94, + "learning_rate": 2.9721906679604835e-06, + "loss": 2.7285, + "step": 284000 + }, + { + "epoch": 0.94, + "learning_rate": 2.955631580167512e-06, + "loss": 2.7306, + "step": 284100 + }, + { + "epoch": 0.94, + "learning_rate": 2.9390724923745403e-06, + "loss": 2.7156, + "step": 284200 + }, + { + "epoch": 0.94, + "learning_rate": 2.9225134045815685e-06, + "loss": 2.7181, + "step": 284300 + }, + { + "epoch": 0.94, + "learning_rate": 2.905954316788597e-06, + "loss": 2.7271, + "step": 284400 + }, + { + "epoch": 0.94, + "learning_rate": 2.8893952289956253e-06, + "loss": 2.7315, + "step": 284500 + }, + { + "epoch": 0.94, + "learning_rate": 2.872836141202654e-06, + "loss": 2.7206, + "step": 284600 + }, + { + "epoch": 0.94, + "learning_rate": 2.8562770534096816e-06, + "loss": 2.7288, + "step": 284700 + }, + { + "epoch": 0.94, + "learning_rate": 2.8397179656167102e-06, + "loss": 2.7197, + "step": 284800 + }, + { + "epoch": 0.94, + "learning_rate": 2.823158877823739e-06, + "loss": 2.7203, + "step": 284900 + }, + { + "epoch": 0.94, + "learning_rate": 2.806599790030767e-06, + "loss": 2.7156, + "step": 285000 + }, + { + "epoch": 0.94, + "learning_rate": 2.790040702237795e-06, + "loss": 2.7297, + "step": 285100 + }, + { + "epoch": 0.94, + "learning_rate": 2.7734816144448234e-06, + "loss": 2.7292, + "step": 285200 + }, + { + "epoch": 0.94, + "learning_rate": 2.756922526651852e-06, + "loss": 2.7221, + "step": 285300 + }, + { + "epoch": 0.95, + "learning_rate": 2.7403634388588806e-06, + "loss": 2.7198, + "step": 285400 + }, + { + "epoch": 0.95, + "learning_rate": 2.7238043510659088e-06, + "loss": 2.7139, + "step": 285500 + }, + { + "epoch": 0.95, + "learning_rate": 2.707245263272937e-06, + "loss": 2.7316, + "step": 285600 + }, + { + "epoch": 0.95, + "learning_rate": 2.690686175479965e-06, + "loss": 2.7253, + "step": 285700 + }, + { + "epoch": 0.95, + "learning_rate": 2.6741270876869937e-06, + "loss": 2.7337, + "step": 285800 + }, + { + "epoch": 0.95, + "learning_rate": 2.657567999894022e-06, + "loss": 2.728, + "step": 285900 + }, + { + "epoch": 0.95, + "learning_rate": 2.64100891210105e-06, + "loss": 2.7149, + "step": 286000 + }, + { + "epoch": 0.95, + "learning_rate": 2.6244498243080787e-06, + "loss": 2.73, + "step": 286100 + }, + { + "epoch": 0.95, + "learning_rate": 2.607890736515107e-06, + "loss": 2.7245, + "step": 286200 + }, + { + "epoch": 0.95, + "learning_rate": 2.5913316487221355e-06, + "loss": 2.7081, + "step": 286300 + }, + { + "epoch": 0.95, + "learning_rate": 2.5747725609291637e-06, + "loss": 2.7265, + "step": 286400 + }, + { + "epoch": 0.95, + "learning_rate": 2.558213473136192e-06, + "loss": 2.7257, + "step": 286500 + }, + { + "epoch": 0.95, + "learning_rate": 2.5416543853432205e-06, + "loss": 2.7335, + "step": 286600 + }, + { + "epoch": 0.95, + "learning_rate": 2.5250952975502487e-06, + "loss": 2.7297, + "step": 286700 + }, + { + "epoch": 0.95, + "learning_rate": 2.508536209757277e-06, + "loss": 2.719, + "step": 286800 + }, + { + "epoch": 0.95, + "learning_rate": 2.4919771219643054e-06, + "loss": 2.7298, + "step": 286900 + }, + { + "epoch": 0.95, + "learning_rate": 2.4754180341713336e-06, + "loss": 2.7118, + "step": 287000 + }, + { + "epoch": 0.95, + "learning_rate": 2.4588589463783622e-06, + "loss": 2.7186, + "step": 287100 + }, + { + "epoch": 0.95, + "learning_rate": 2.4422998585853904e-06, + "loss": 2.7432, + "step": 287200 + }, + { + "epoch": 0.95, + "learning_rate": 2.4257407707924186e-06, + "loss": 2.7148, + "step": 287300 + }, + { + "epoch": 0.95, + "learning_rate": 2.409181682999447e-06, + "loss": 2.7257, + "step": 287400 + }, + { + "epoch": 0.95, + "learning_rate": 2.3926225952064754e-06, + "loss": 2.7234, + "step": 287500 + }, + { + "epoch": 0.95, + "learning_rate": 2.376063507413504e-06, + "loss": 2.7229, + "step": 287600 + }, + { + "epoch": 0.95, + "learning_rate": 2.3595044196205317e-06, + "loss": 2.7151, + "step": 287700 + }, + { + "epoch": 0.95, + "learning_rate": 2.3429453318275604e-06, + "loss": 2.7244, + "step": 287800 + }, + { + "epoch": 0.95, + "learning_rate": 2.3263862440345885e-06, + "loss": 2.7171, + "step": 287900 + }, + { + "epoch": 0.95, + "learning_rate": 2.309827156241617e-06, + "loss": 2.7241, + "step": 288000 + }, + { + "epoch": 0.95, + "learning_rate": 2.2932680684486453e-06, + "loss": 2.7232, + "step": 288100 + }, + { + "epoch": 0.95, + "learning_rate": 2.2767089806556735e-06, + "loss": 2.7207, + "step": 288200 + }, + { + "epoch": 0.95, + "learning_rate": 2.260149892862702e-06, + "loss": 2.732, + "step": 288300 + }, + { + "epoch": 0.96, + "learning_rate": 2.2435908050697303e-06, + "loss": 2.712, + "step": 288400 + }, + { + "epoch": 0.96, + "learning_rate": 2.227031717276759e-06, + "loss": 2.7331, + "step": 288500 + }, + { + "epoch": 0.96, + "learning_rate": 2.210472629483787e-06, + "loss": 2.7276, + "step": 288600 + }, + { + "epoch": 0.96, + "learning_rate": 2.1939135416908153e-06, + "loss": 2.7387, + "step": 288700 + }, + { + "epoch": 0.96, + "learning_rate": 2.177354453897844e-06, + "loss": 2.7252, + "step": 288800 + }, + { + "epoch": 0.96, + "learning_rate": 2.160795366104872e-06, + "loss": 2.7242, + "step": 288900 + }, + { + "epoch": 0.96, + "learning_rate": 2.1442362783119002e-06, + "loss": 2.7224, + "step": 289000 + }, + { + "epoch": 0.96, + "learning_rate": 2.127677190518929e-06, + "loss": 2.7175, + "step": 289100 + }, + { + "epoch": 0.96, + "learning_rate": 2.111118102725957e-06, + "loss": 2.72, + "step": 289200 + }, + { + "epoch": 0.96, + "learning_rate": 2.0945590149329856e-06, + "loss": 2.7025, + "step": 289300 + }, + { + "epoch": 0.96, + "learning_rate": 2.077999927140014e-06, + "loss": 2.713, + "step": 289400 + }, + { + "epoch": 0.96, + "learning_rate": 2.061440839347042e-06, + "loss": 2.7141, + "step": 289500 + }, + { + "epoch": 0.96, + "learning_rate": 2.0448817515540706e-06, + "loss": 2.7224, + "step": 289600 + }, + { + "epoch": 0.96, + "learning_rate": 2.0283226637610988e-06, + "loss": 2.7317, + "step": 289700 + }, + { + "epoch": 0.96, + "learning_rate": 2.0117635759681274e-06, + "loss": 2.7207, + "step": 289800 + }, + { + "epoch": 0.96, + "learning_rate": 1.995204488175155e-06, + "loss": 2.7055, + "step": 289900 + }, + { + "epoch": 0.96, + "learning_rate": 1.9786454003821837e-06, + "loss": 2.7205, + "step": 290000 + }, + { + "epoch": 0.96, + "learning_rate": 1.9620863125892123e-06, + "loss": 2.7238, + "step": 290100 + }, + { + "epoch": 0.96, + "learning_rate": 1.9455272247962405e-06, + "loss": 2.7276, + "step": 290200 + }, + { + "epoch": 0.96, + "learning_rate": 1.9289681370032687e-06, + "loss": 2.7268, + "step": 290300 + }, + { + "epoch": 0.96, + "learning_rate": 1.912409049210297e-06, + "loss": 2.7161, + "step": 290400 + }, + { + "epoch": 0.96, + "learning_rate": 1.8958499614173255e-06, + "loss": 2.7319, + "step": 290500 + }, + { + "epoch": 0.96, + "learning_rate": 1.8792908736243539e-06, + "loss": 2.7139, + "step": 290600 + }, + { + "epoch": 0.96, + "learning_rate": 1.862731785831382e-06, + "loss": 2.7192, + "step": 290700 + }, + { + "epoch": 0.96, + "learning_rate": 1.8461726980384107e-06, + "loss": 2.7209, + "step": 290800 + }, + { + "epoch": 0.96, + "learning_rate": 1.8296136102454387e-06, + "loss": 2.7222, + "step": 290900 + }, + { + "epoch": 0.96, + "learning_rate": 1.8130545224524673e-06, + "loss": 2.7217, + "step": 291000 + }, + { + "epoch": 0.96, + "learning_rate": 1.7964954346594957e-06, + "loss": 2.7228, + "step": 291100 + }, + { + "epoch": 0.96, + "learning_rate": 1.7799363468665238e-06, + "loss": 2.7226, + "step": 291200 + }, + { + "epoch": 0.96, + "learning_rate": 1.7633772590735522e-06, + "loss": 2.7128, + "step": 291300 + }, + { + "epoch": 0.97, + "learning_rate": 1.7468181712805804e-06, + "loss": 2.7225, + "step": 291400 + }, + { + "epoch": 0.97, + "learning_rate": 1.7302590834876088e-06, + "loss": 2.7256, + "step": 291500 + }, + { + "epoch": 0.97, + "learning_rate": 1.7136999956946374e-06, + "loss": 2.7139, + "step": 291600 + }, + { + "epoch": 0.97, + "learning_rate": 1.6971409079016656e-06, + "loss": 2.7198, + "step": 291700 + }, + { + "epoch": 0.97, + "learning_rate": 1.680581820108694e-06, + "loss": 2.7132, + "step": 291800 + }, + { + "epoch": 0.97, + "learning_rate": 1.6640227323157222e-06, + "loss": 2.7164, + "step": 291900 + }, + { + "epoch": 0.97, + "learning_rate": 1.6474636445227506e-06, + "loss": 2.7285, + "step": 292000 + }, + { + "epoch": 0.97, + "learning_rate": 1.630904556729779e-06, + "loss": 2.7306, + "step": 292100 + }, + { + "epoch": 0.97, + "learning_rate": 1.6143454689368071e-06, + "loss": 2.7126, + "step": 292200 + }, + { + "epoch": 0.97, + "learning_rate": 1.5977863811438357e-06, + "loss": 2.7196, + "step": 292300 + }, + { + "epoch": 0.97, + "learning_rate": 1.5812272933508637e-06, + "loss": 2.7182, + "step": 292400 + }, + { + "epoch": 0.97, + "learning_rate": 1.5646682055578923e-06, + "loss": 2.7335, + "step": 292500 + }, + { + "epoch": 0.97, + "learning_rate": 1.5481091177649205e-06, + "loss": 2.7233, + "step": 292600 + }, + { + "epoch": 0.97, + "learning_rate": 1.531550029971949e-06, + "loss": 2.7149, + "step": 292700 + }, + { + "epoch": 0.97, + "learning_rate": 1.5149909421789773e-06, + "loss": 2.7299, + "step": 292800 + }, + { + "epoch": 0.97, + "learning_rate": 1.4984318543860057e-06, + "loss": 2.7152, + "step": 292900 + }, + { + "epoch": 0.97, + "learning_rate": 1.4818727665930339e-06, + "loss": 2.7089, + "step": 293000 + }, + { + "epoch": 0.97, + "learning_rate": 1.4653136788000623e-06, + "loss": 2.7286, + "step": 293100 + }, + { + "epoch": 0.97, + "learning_rate": 1.4487545910070906e-06, + "loss": 2.7195, + "step": 293200 + }, + { + "epoch": 0.97, + "learning_rate": 1.432195503214119e-06, + "loss": 2.7164, + "step": 293300 + }, + { + "epoch": 0.97, + "learning_rate": 1.4156364154211474e-06, + "loss": 2.7264, + "step": 293400 + }, + { + "epoch": 0.97, + "learning_rate": 1.3990773276281756e-06, + "loss": 2.7372, + "step": 293500 + }, + { + "epoch": 0.97, + "learning_rate": 1.382518239835204e-06, + "loss": 2.731, + "step": 293600 + }, + { + "epoch": 0.97, + "learning_rate": 1.3659591520422324e-06, + "loss": 2.7163, + "step": 293700 + }, + { + "epoch": 0.97, + "learning_rate": 1.3494000642492608e-06, + "loss": 2.7156, + "step": 293800 + }, + { + "epoch": 0.97, + "learning_rate": 1.332840976456289e-06, + "loss": 2.726, + "step": 293900 + }, + { + "epoch": 0.97, + "learning_rate": 1.3162818886633174e-06, + "loss": 2.7266, + "step": 294000 + }, + { + "epoch": 0.97, + "learning_rate": 1.2997228008703456e-06, + "loss": 2.6988, + "step": 294100 + }, + { + "epoch": 0.97, + "learning_rate": 1.2831637130773742e-06, + "loss": 2.7276, + "step": 294200 + }, + { + "epoch": 0.97, + "learning_rate": 1.2666046252844023e-06, + "loss": 2.7213, + "step": 294300 + }, + { + "epoch": 0.97, + "learning_rate": 1.2500455374914307e-06, + "loss": 2.7351, + "step": 294400 + }, + { + "epoch": 0.98, + "learning_rate": 1.2334864496984591e-06, + "loss": 2.7231, + "step": 294500 + }, + { + "epoch": 0.98, + "learning_rate": 1.2169273619054873e-06, + "loss": 2.7209, + "step": 294600 + }, + { + "epoch": 0.98, + "learning_rate": 1.200368274112516e-06, + "loss": 2.7231, + "step": 294700 + }, + { + "epoch": 0.98, + "learning_rate": 1.183809186319544e-06, + "loss": 2.7204, + "step": 294800 + }, + { + "epoch": 0.98, + "learning_rate": 1.1672500985265725e-06, + "loss": 2.7318, + "step": 294900 + }, + { + "epoch": 0.98, + "learning_rate": 1.1506910107336007e-06, + "loss": 2.7227, + "step": 295000 + }, + { + "epoch": 0.98, + "learning_rate": 1.134131922940629e-06, + "loss": 2.7249, + "step": 295100 + }, + { + "epoch": 0.98, + "learning_rate": 1.1175728351476575e-06, + "loss": 2.7216, + "step": 295200 + }, + { + "epoch": 0.98, + "learning_rate": 1.1010137473546859e-06, + "loss": 2.7317, + "step": 295300 + }, + { + "epoch": 0.98, + "learning_rate": 1.084454659561714e-06, + "loss": 2.7187, + "step": 295400 + }, + { + "epoch": 0.98, + "learning_rate": 1.0678955717687424e-06, + "loss": 2.7258, + "step": 295500 + }, + { + "epoch": 0.98, + "learning_rate": 1.0513364839757708e-06, + "loss": 2.7267, + "step": 295600 + }, + { + "epoch": 0.98, + "learning_rate": 1.0347773961827992e-06, + "loss": 2.7247, + "step": 295700 + }, + { + "epoch": 0.98, + "learning_rate": 1.0182183083898276e-06, + "loss": 2.7303, + "step": 295800 + }, + { + "epoch": 0.98, + "learning_rate": 1.0016592205968558e-06, + "loss": 2.7117, + "step": 295900 + }, + { + "epoch": 0.98, + "learning_rate": 9.851001328038842e-07, + "loss": 2.7089, + "step": 296000 + }, + { + "epoch": 0.98, + "learning_rate": 9.685410450109124e-07, + "loss": 2.7345, + "step": 296100 + }, + { + "epoch": 0.98, + "learning_rate": 9.519819572179409e-07, + "loss": 2.7273, + "step": 296200 + }, + { + "epoch": 0.98, + "learning_rate": 9.354228694249692e-07, + "loss": 2.7351, + "step": 296300 + }, + { + "epoch": 0.98, + "learning_rate": 9.188637816319976e-07, + "loss": 2.7199, + "step": 296400 + }, + { + "epoch": 0.98, + "learning_rate": 9.023046938390258e-07, + "loss": 2.7407, + "step": 296500 + }, + { + "epoch": 0.98, + "learning_rate": 8.857456060460541e-07, + "loss": 2.7201, + "step": 296600 + }, + { + "epoch": 0.98, + "learning_rate": 8.691865182530826e-07, + "loss": 2.7112, + "step": 296700 + }, + { + "epoch": 0.98, + "learning_rate": 8.526274304601109e-07, + "loss": 2.7131, + "step": 296800 + }, + { + "epoch": 0.98, + "learning_rate": 8.360683426671392e-07, + "loss": 2.7137, + "step": 296900 + }, + { + "epoch": 0.98, + "learning_rate": 8.195092548741675e-07, + "loss": 2.719, + "step": 297000 + }, + { + "epoch": 0.98, + "learning_rate": 8.029501670811958e-07, + "loss": 2.7234, + "step": 297100 + }, + { + "epoch": 0.98, + "learning_rate": 7.863910792882243e-07, + "loss": 2.7248, + "step": 297200 + }, + { + "epoch": 0.98, + "learning_rate": 7.698319914952525e-07, + "loss": 2.7146, + "step": 297300 + }, + { + "epoch": 0.98, + "learning_rate": 7.532729037022809e-07, + "loss": 2.7145, + "step": 297400 + }, + { + "epoch": 0.99, + "learning_rate": 7.367138159093093e-07, + "loss": 2.7288, + "step": 297500 + }, + { + "epoch": 0.99, + "learning_rate": 7.201547281163375e-07, + "loss": 2.714, + "step": 297600 + }, + { + "epoch": 0.99, + "learning_rate": 7.035956403233659e-07, + "loss": 2.7175, + "step": 297700 + }, + { + "epoch": 0.99, + "learning_rate": 6.870365525303942e-07, + "loss": 2.7141, + "step": 297800 + }, + { + "epoch": 0.99, + "learning_rate": 6.704774647374226e-07, + "loss": 2.7221, + "step": 297900 + }, + { + "epoch": 0.99, + "learning_rate": 6.539183769444509e-07, + "loss": 2.7387, + "step": 298000 + }, + { + "epoch": 0.99, + "learning_rate": 6.373592891514793e-07, + "loss": 2.7228, + "step": 298100 + }, + { + "epoch": 0.99, + "learning_rate": 6.208002013585076e-07, + "loss": 2.7076, + "step": 298200 + }, + { + "epoch": 0.99, + "learning_rate": 6.042411135655359e-07, + "loss": 2.7287, + "step": 298300 + }, + { + "epoch": 0.99, + "learning_rate": 5.876820257725643e-07, + "loss": 2.7311, + "step": 298400 + }, + { + "epoch": 0.99, + "learning_rate": 5.711229379795926e-07, + "loss": 2.7195, + "step": 298500 + }, + { + "epoch": 0.99, + "learning_rate": 5.545638501866209e-07, + "loss": 2.7107, + "step": 298600 + }, + { + "epoch": 0.99, + "learning_rate": 5.380047623936492e-07, + "loss": 2.7212, + "step": 298700 + }, + { + "epoch": 0.99, + "learning_rate": 5.214456746006776e-07, + "loss": 2.7202, + "step": 298800 + }, + { + "epoch": 0.99, + "learning_rate": 5.04886586807706e-07, + "loss": 2.6954, + "step": 298900 + }, + { + "epoch": 0.99, + "learning_rate": 4.883274990147343e-07, + "loss": 2.7267, + "step": 299000 + }, + { + "epoch": 0.99, + "learning_rate": 4.7176841122176265e-07, + "loss": 2.723, + "step": 299100 + }, + { + "epoch": 0.99, + "learning_rate": 4.55209323428791e-07, + "loss": 2.7153, + "step": 299200 + }, + { + "epoch": 0.99, + "learning_rate": 4.386502356358193e-07, + "loss": 2.711, + "step": 299300 + }, + { + "epoch": 0.99, + "learning_rate": 4.2209114784284767e-07, + "loss": 2.7385, + "step": 299400 + }, + { + "epoch": 0.99, + "learning_rate": 4.0553206004987596e-07, + "loss": 2.7214, + "step": 299500 + }, + { + "epoch": 0.99, + "learning_rate": 3.889729722569043e-07, + "loss": 2.7111, + "step": 299600 + }, + { + "epoch": 0.99, + "learning_rate": 3.7241388446393264e-07, + "loss": 2.7106, + "step": 299700 + }, + { + "epoch": 0.99, + "learning_rate": 3.5585479667096104e-07, + "loss": 2.7095, + "step": 299800 + }, + { + "epoch": 0.99, + "learning_rate": 3.392957088779894e-07, + "loss": 2.7217, + "step": 299900 + }, + { + "epoch": 0.99, + "learning_rate": 3.2273662108501766e-07, + "loss": 2.7331, + "step": 300000 + }, + { + "epoch": 0.99, + "learning_rate": 3.06177533292046e-07, + "loss": 2.7229, + "step": 300100 + }, + { + "epoch": 0.99, + "learning_rate": 2.8961844549907435e-07, + "loss": 2.7105, + "step": 300200 + }, + { + "epoch": 0.99, + "learning_rate": 2.730593577061027e-07, + "loss": 2.7208, + "step": 300300 + }, + { + "epoch": 0.99, + "learning_rate": 2.5650026991313103e-07, + "loss": 2.7261, + "step": 300400 + }, + { + "epoch": 1.0, + "learning_rate": 2.3994118212015937e-07, + "loss": 2.7352, + "step": 300500 + }, + { + "epoch": 1.0, + "learning_rate": 2.233820943271877e-07, + "loss": 2.7271, + "step": 300600 + }, + { + "epoch": 1.0, + "learning_rate": 2.0682300653421605e-07, + "loss": 2.727, + "step": 300700 + }, + { + "epoch": 1.0, + "learning_rate": 1.902639187412444e-07, + "loss": 2.7298, + "step": 300800 + }, + { + "epoch": 1.0, + "learning_rate": 1.7370483094827273e-07, + "loss": 2.7344, + "step": 300900 + }, + { + "epoch": 1.0, + "learning_rate": 1.5714574315530107e-07, + "loss": 2.7185, + "step": 301000 + }, + { + "epoch": 1.0, + "learning_rate": 1.4058665536232941e-07, + "loss": 2.7195, + "step": 301100 + }, + { + "epoch": 1.0, + "learning_rate": 1.2402756756935775e-07, + "loss": 2.7201, + "step": 301200 + }, + { + "epoch": 1.0, + "learning_rate": 1.074684797763861e-07, + "loss": 2.7188, + "step": 301300 + }, + { + "epoch": 1.0, + "learning_rate": 9.090939198341442e-08, + "loss": 2.7252, + "step": 301400 + }, + { + "epoch": 1.0, + "learning_rate": 7.435030419044276e-08, + "loss": 2.7194, + "step": 301500 + } + ], + "max_steps": 301949, + "num_train_epochs": 1, + "total_flos": 8.170868148535296e+18, + "trial_name": null, + "trial_params": null +}