diff --git "a/trainer_state.json" "b/trainer_state.json" --- "a/trainer_state.json" +++ "b/trainer_state.json" @@ -1,18136 +1,18106 @@ { "best_metric": null, "best_model_checkpoint": null, - "epoch": 0.9998841193901369, - "global_step": 302000, + "epoch": 0.9985129939161912, + "global_step": 301500, "is_hyper_param_search": false, "is_local_process_zero": true, "is_world_process_zero": true, "log_history": [ { "epoch": 0.0, - "learning_rate": 4.9983445627162415e-05, - "loss": 2.7723, + "learning_rate": 4.998344091220703e-05, + "loss": 2.749, "step": 100 }, { "epoch": 0.0, - "learning_rate": 4.9966891254324834e-05, - "loss": 2.7712, + "learning_rate": 4.9966881824414056e-05, + "loss": 2.7594, "step": 200 }, { "epoch": 0.0, - "learning_rate": 4.9950336881487246e-05, - "loss": 2.7831, + "learning_rate": 4.9950322736621087e-05, + "loss": 2.7598, "step": 300 }, { "epoch": 0.0, - "learning_rate": 4.9933782508649665e-05, - "loss": 2.7613, + "learning_rate": 4.993376364882812e-05, + "loss": 2.7508, "step": 400 }, { "epoch": 0.0, - "learning_rate": 4.991722813581208e-05, - "loss": 2.7686, + "learning_rate": 4.991720456103514e-05, + "loss": 2.7558, "step": 500 }, { "epoch": 0.0, - "learning_rate": 4.990067376297449e-05, - "loss": 2.7701, + "learning_rate": 4.990064547324218e-05, + "loss": 2.7641, "step": 600 }, { "epoch": 0.0, - "learning_rate": 4.988411939013691e-05, - "loss": 2.7748, + "learning_rate": 4.98840863854492e-05, + "loss": 2.7775, "step": 700 }, { "epoch": 0.0, - "learning_rate": 4.986756501729932e-05, - "loss": 2.7806, + "learning_rate": 4.986752729765623e-05, + "loss": 2.7908, "step": 800 }, { "epoch": 0.0, - "learning_rate": 4.985101064446174e-05, - "loss": 2.7774, + "learning_rate": 4.9850968209863255e-05, + "loss": 2.767, "step": 900 }, { "epoch": 0.0, - "learning_rate": 4.983445627162415e-05, - "loss": 2.7915, + "learning_rate": 4.9834409122070285e-05, + "loss": 2.7804, "step": 1000 }, { "epoch": 0.0, - "learning_rate": 4.9817901898786564e-05, - "loss": 2.7917, + "learning_rate": 4.9817850034277315e-05, + "loss": 2.7667, "step": 1100 }, { "epoch": 0.0, - "learning_rate": 4.9801347525948983e-05, - "loss": 2.7785, + "learning_rate": 4.980129094648434e-05, + "loss": 2.7679, "step": 1200 }, { "epoch": 0.0, - "learning_rate": 4.9784793153111396e-05, - "loss": 2.8012, + "learning_rate": 4.978473185869137e-05, + "loss": 2.792, "step": 1300 }, { "epoch": 0.0, - "learning_rate": 4.9768238780273815e-05, - "loss": 2.7804, + "learning_rate": 4.97681727708984e-05, + "loss": 2.7904, "step": 1400 }, { "epoch": 0.0, - "learning_rate": 4.975168440743623e-05, - "loss": 2.7963, + "learning_rate": 4.975161368310542e-05, + "loss": 2.7738, "step": 1500 }, { "epoch": 0.01, - "learning_rate": 4.9735130034598646e-05, - "loss": 2.7811, + "learning_rate": 4.973505459531246e-05, + "loss": 2.7821, "step": 1600 }, { "epoch": 0.01, - "learning_rate": 4.971857566176106e-05, - "loss": 2.7846, + "learning_rate": 4.9718495507519484e-05, + "loss": 2.7821, "step": 1700 }, { "epoch": 0.01, - "learning_rate": 4.970202128892347e-05, - "loss": 2.7903, + "learning_rate": 4.9701936419726514e-05, + "loss": 2.7774, "step": 1800 }, { "epoch": 0.01, - "learning_rate": 4.968546691608589e-05, - "loss": 2.7826, + "learning_rate": 4.9685377331933544e-05, + "loss": 2.7651, "step": 1900 }, { "epoch": 0.01, - "learning_rate": 4.96689125432483e-05, - "loss": 2.7807, + "learning_rate": 4.966881824414057e-05, + "loss": 2.7752, "step": 2000 }, { "epoch": 0.01, - "learning_rate": 4.965235817041072e-05, - "loss": 2.7934, + "learning_rate": 4.96522591563476e-05, + "loss": 2.7897, "step": 2100 }, { "epoch": 0.01, - "learning_rate": 4.963580379757313e-05, - "loss": 2.7766, + "learning_rate": 4.963570006855462e-05, + "loss": 2.7725, "step": 2200 }, { "epoch": 0.01, - "learning_rate": 4.9619249424735545e-05, - "loss": 2.7896, + "learning_rate": 4.961914098076165e-05, + "loss": 2.7753, "step": 2300 }, { "epoch": 0.01, - "learning_rate": 4.9602695051897964e-05, - "loss": 2.7883, + "learning_rate": 4.960258189296868e-05, + "loss": 2.7824, "step": 2400 }, { "epoch": 0.01, - "learning_rate": 4.958614067906038e-05, - "loss": 2.8038, + "learning_rate": 4.9586022805175706e-05, + "loss": 2.7757, "step": 2500 }, { "epoch": 0.01, - "learning_rate": 4.9569586306222796e-05, - "loss": 2.7987, + "learning_rate": 4.956946371738274e-05, + "loss": 2.7658, "step": 2600 }, { "epoch": 0.01, - "learning_rate": 4.955303193338521e-05, - "loss": 2.7908, + "learning_rate": 4.9552904629589766e-05, + "loss": 2.7781, "step": 2700 }, { "epoch": 0.01, - "learning_rate": 4.953647756054762e-05, - "loss": 2.7882, + "learning_rate": 4.9536345541796797e-05, + "loss": 2.7819, "step": 2800 }, { "epoch": 0.01, - "learning_rate": 4.951992318771004e-05, - "loss": 2.7946, + "learning_rate": 4.951978645400383e-05, + "loss": 2.7963, "step": 2900 }, { "epoch": 0.01, - "learning_rate": 4.950336881487245e-05, - "loss": 2.8016, + "learning_rate": 4.950322736621085e-05, + "loss": 2.7773, "step": 3000 }, { "epoch": 0.01, - "learning_rate": 4.948681444203487e-05, - "loss": 2.8027, + "learning_rate": 4.948666827841788e-05, + "loss": 2.7823, "step": 3100 }, { "epoch": 0.01, - "learning_rate": 4.947026006919728e-05, - "loss": 2.7752, + "learning_rate": 4.947010919062491e-05, + "loss": 2.7753, "step": 3200 }, { "epoch": 0.01, - "learning_rate": 4.9453705696359695e-05, - "loss": 2.7759, + "learning_rate": 4.9453550102831935e-05, + "loss": 2.769, "step": 3300 }, { "epoch": 0.01, - "learning_rate": 4.9437151323522114e-05, - "loss": 2.7988, + "learning_rate": 4.9436991015038965e-05, + "loss": 2.7742, "step": 3400 }, { "epoch": 0.01, - "learning_rate": 4.9420596950684526e-05, - "loss": 2.791, + "learning_rate": 4.942043192724599e-05, + "loss": 2.7804, "step": 3500 }, { "epoch": 0.01, - "learning_rate": 4.9404042577846945e-05, - "loss": 2.8068, + "learning_rate": 4.9403872839453025e-05, + "loss": 2.7811, "step": 3600 }, { "epoch": 0.01, - "learning_rate": 4.938748820500936e-05, - "loss": 2.8004, + "learning_rate": 4.9387313751660056e-05, + "loss": 2.786, "step": 3700 }, { "epoch": 0.01, - "learning_rate": 4.937093383217177e-05, - "loss": 2.7909, + "learning_rate": 4.937075466386708e-05, + "loss": 2.7747, "step": 3800 }, { "epoch": 0.01, - "learning_rate": 4.935437945933419e-05, - "loss": 2.7892, + "learning_rate": 4.935419557607411e-05, + "loss": 2.7802, "step": 3900 }, { "epoch": 0.01, - "learning_rate": 4.93378250864966e-05, - "loss": 2.789, + "learning_rate": 4.933763648828113e-05, + "loss": 2.7754, "step": 4000 }, { "epoch": 0.01, - "learning_rate": 4.932127071365902e-05, - "loss": 2.7978, + "learning_rate": 4.932107740048816e-05, + "loss": 2.7792, "step": 4100 }, { "epoch": 0.01, - "learning_rate": 4.930471634082143e-05, - "loss": 2.8031, + "learning_rate": 4.9304518312695194e-05, + "loss": 2.784, "step": 4200 }, { "epoch": 0.01, - "learning_rate": 4.9288161967983845e-05, - "loss": 2.8041, + "learning_rate": 4.928795922490222e-05, + "loss": 2.7872, "step": 4300 }, { "epoch": 0.01, - "learning_rate": 4.9271607595146264e-05, - "loss": 2.8039, + "learning_rate": 4.927140013710925e-05, + "loss": 2.7802, "step": 4400 }, { "epoch": 0.01, - "learning_rate": 4.925505322230867e-05, - "loss": 2.7947, + "learning_rate": 4.925484104931628e-05, + "loss": 2.7902, "step": 4500 }, { "epoch": 0.02, - "learning_rate": 4.923849884947109e-05, - "loss": 2.7976, + "learning_rate": 4.923828196152331e-05, + "loss": 2.7898, "step": 4600 }, { "epoch": 0.02, - "learning_rate": 4.92219444766335e-05, - "loss": 2.7924, + "learning_rate": 4.922172287373034e-05, + "loss": 2.7698, "step": 4700 }, { "epoch": 0.02, - "learning_rate": 4.920539010379592e-05, - "loss": 2.7834, + "learning_rate": 4.920516378593736e-05, + "loss": 2.7779, "step": 4800 }, { "epoch": 0.02, - "learning_rate": 4.918883573095833e-05, - "loss": 2.7912, + "learning_rate": 4.918860469814439e-05, + "loss": 2.7792, "step": 4900 }, { "epoch": 0.02, - "learning_rate": 4.9172281358120744e-05, - "loss": 2.8047, + "learning_rate": 4.917204561035142e-05, + "loss": 2.7807, "step": 5000 }, { "epoch": 0.02, - "learning_rate": 4.915572698528316e-05, - "loss": 2.7892, + "learning_rate": 4.9155486522558446e-05, + "loss": 2.7927, "step": 5100 }, { "epoch": 0.02, - "learning_rate": 4.9139172612445575e-05, - "loss": 2.7825, + "learning_rate": 4.9138927434765476e-05, + "loss": 2.773, "step": 5200 }, { "epoch": 0.02, - "learning_rate": 4.9122618239607995e-05, - "loss": 2.7918, + "learning_rate": 4.91223683469725e-05, + "loss": 2.7723, "step": 5300 }, { "epoch": 0.02, - "learning_rate": 4.910606386677041e-05, - "loss": 2.7886, + "learning_rate": 4.910580925917953e-05, + "loss": 2.7633, "step": 5400 }, { "epoch": 0.02, - "learning_rate": 4.9089509493932826e-05, - "loss": 2.79, + "learning_rate": 4.908925017138656e-05, + "loss": 2.7821, "step": 5500 }, { "epoch": 0.02, - "learning_rate": 4.907295512109524e-05, - "loss": 2.7986, + "learning_rate": 4.907269108359359e-05, + "loss": 2.7641, "step": 5600 }, { "epoch": 0.02, - "learning_rate": 4.905640074825765e-05, - "loss": 2.8048, + "learning_rate": 4.905613199580062e-05, + "loss": 2.7798, "step": 5700 }, { "epoch": 0.02, - "learning_rate": 4.903984637542007e-05, - "loss": 2.7946, + "learning_rate": 4.9039572908007645e-05, + "loss": 2.7849, "step": 5800 }, { "epoch": 0.02, - "learning_rate": 4.902329200258248e-05, - "loss": 2.8066, + "learning_rate": 4.9023013820214675e-05, + "loss": 2.781, "step": 5900 }, { "epoch": 0.02, - "learning_rate": 4.90067376297449e-05, - "loss": 2.7966, + "learning_rate": 4.9006454732421705e-05, + "loss": 2.7916, "step": 6000 }, { "epoch": 0.02, - "learning_rate": 4.899018325690731e-05, - "loss": 2.7911, + "learning_rate": 4.898989564462873e-05, + "loss": 2.7924, "step": 6100 }, { "epoch": 0.02, - "learning_rate": 4.8973628884069725e-05, - "loss": 2.793, + "learning_rate": 4.897333655683576e-05, + "loss": 2.7795, "step": 6200 }, { "epoch": 0.02, - "learning_rate": 4.8957074511232144e-05, - "loss": 2.7782, + "learning_rate": 4.895677746904279e-05, + "loss": 2.789, "step": 6300 }, { "epoch": 0.02, - "learning_rate": 4.8940520138394556e-05, - "loss": 2.8033, + "learning_rate": 4.894021838124981e-05, + "loss": 2.779, "step": 6400 }, { "epoch": 0.02, - "learning_rate": 4.8923965765556976e-05, - "loss": 2.8095, + "learning_rate": 4.892365929345684e-05, + "loss": 2.7844, "step": 6500 }, { "epoch": 0.02, - "learning_rate": 4.890741139271939e-05, - "loss": 2.7841, + "learning_rate": 4.8907100205663873e-05, + "loss": 2.7797, "step": 6600 }, { "epoch": 0.02, - "learning_rate": 4.88908570198818e-05, - "loss": 2.8143, + "learning_rate": 4.8890541117870904e-05, + "loss": 2.7895, "step": 6700 }, { "epoch": 0.02, - "learning_rate": 4.887430264704422e-05, - "loss": 2.7956, + "learning_rate": 4.887398203007793e-05, + "loss": 2.7759, "step": 6800 }, { "epoch": 0.02, - "learning_rate": 4.885774827420663e-05, - "loss": 2.8094, + "learning_rate": 4.885742294228496e-05, + "loss": 2.7844, "step": 6900 }, { "epoch": 0.02, - "learning_rate": 4.884119390136905e-05, - "loss": 2.7828, + "learning_rate": 4.884086385449199e-05, + "loss": 2.7638, "step": 7000 }, { "epoch": 0.02, - "learning_rate": 4.882463952853146e-05, - "loss": 2.8003, + "learning_rate": 4.882430476669901e-05, + "loss": 2.7765, "step": 7100 }, { "epoch": 0.02, - "learning_rate": 4.8808085155693875e-05, - "loss": 2.7788, + "learning_rate": 4.880774567890604e-05, + "loss": 2.7778, "step": 7200 }, { "epoch": 0.02, - "learning_rate": 4.8791530782856294e-05, - "loss": 2.812, + "learning_rate": 4.879118659111307e-05, + "loss": 2.7846, "step": 7300 }, { "epoch": 0.02, - "learning_rate": 4.8774976410018706e-05, - "loss": 2.7785, + "learning_rate": 4.8774627503320095e-05, + "loss": 2.7719, "step": 7400 }, { "epoch": 0.02, - "learning_rate": 4.8758422037181125e-05, - "loss": 2.7923, + "learning_rate": 4.8758068415527126e-05, + "loss": 2.7748, "step": 7500 }, { "epoch": 0.03, - "learning_rate": 4.874186766434354e-05, - "loss": 2.8001, + "learning_rate": 4.8741509327734156e-05, + "loss": 2.7751, "step": 7600 }, { "epoch": 0.03, - "learning_rate": 4.872531329150595e-05, - "loss": 2.7898, + "learning_rate": 4.8724950239941186e-05, + "loss": 2.781, "step": 7700 }, { "epoch": 0.03, - "learning_rate": 4.870875891866837e-05, - "loss": 2.8008, + "learning_rate": 4.870839115214822e-05, + "loss": 2.7754, "step": 7800 }, { "epoch": 0.03, - "learning_rate": 4.869220454583078e-05, - "loss": 2.7907, + "learning_rate": 4.869183206435524e-05, + "loss": 2.7904, "step": 7900 }, { "epoch": 0.03, - "learning_rate": 4.86756501729932e-05, - "loss": 2.791, + "learning_rate": 4.867527297656227e-05, + "loss": 2.777, "step": 8000 }, { "epoch": 0.03, - "learning_rate": 4.865909580015561e-05, - "loss": 2.7839, + "learning_rate": 4.8658713888769294e-05, + "loss": 2.7694, "step": 8100 }, { "epoch": 0.03, - "learning_rate": 4.8642541427318025e-05, - "loss": 2.7939, + "learning_rate": 4.8642154800976324e-05, + "loss": 2.7766, "step": 8200 }, { "epoch": 0.03, - "learning_rate": 4.8625987054480444e-05, - "loss": 2.7828, + "learning_rate": 4.8625595713183355e-05, + "loss": 2.7873, "step": 8300 }, { "epoch": 0.03, - "learning_rate": 4.8609432681642856e-05, - "loss": 2.7884, + "learning_rate": 4.860903662539038e-05, + "loss": 2.7604, "step": 8400 }, { "epoch": 0.03, - "learning_rate": 4.8592878308805275e-05, - "loss": 2.798, + "learning_rate": 4.859247753759741e-05, + "loss": 2.78, "step": 8500 }, { "epoch": 0.03, - "learning_rate": 4.857632393596769e-05, - "loss": 2.791, + "learning_rate": 4.857591844980444e-05, + "loss": 2.7831, "step": 8600 }, { "epoch": 0.03, - "learning_rate": 4.8559769563130106e-05, - "loss": 2.8015, + "learning_rate": 4.855935936201147e-05, + "loss": 2.7907, "step": 8700 }, { "epoch": 0.03, - "learning_rate": 4.854321519029252e-05, - "loss": 2.7992, + "learning_rate": 4.85428002742185e-05, + "loss": 2.7846, "step": 8800 }, { "epoch": 0.03, - "learning_rate": 4.852666081745493e-05, - "loss": 2.794, + "learning_rate": 4.852624118642552e-05, + "loss": 2.7813, "step": 8900 }, { "epoch": 0.03, - "learning_rate": 4.851010644461735e-05, - "loss": 2.7896, + "learning_rate": 4.850968209863255e-05, + "loss": 2.7797, "step": 9000 }, { "epoch": 0.03, - "learning_rate": 4.849355207177976e-05, - "loss": 2.792, + "learning_rate": 4.8493123010839583e-05, + "loss": 2.7758, "step": 9100 }, { "epoch": 0.03, - "learning_rate": 4.847699769894218e-05, - "loss": 2.7891, + "learning_rate": 4.847656392304661e-05, + "loss": 2.78, "step": 9200 }, { "epoch": 0.03, - "learning_rate": 4.846044332610459e-05, - "loss": 2.7891, + "learning_rate": 4.846000483525364e-05, + "loss": 2.7885, "step": 9300 }, { "epoch": 0.03, - "learning_rate": 4.8443888953267006e-05, - "loss": 2.8021, + "learning_rate": 4.844344574746066e-05, + "loss": 2.7855, "step": 9400 }, { "epoch": 0.03, - "learning_rate": 4.8427334580429425e-05, - "loss": 2.8079, + "learning_rate": 4.842688665966769e-05, + "loss": 2.7835, "step": 9500 }, { "epoch": 0.03, - "learning_rate": 4.841078020759184e-05, - "loss": 2.8038, + "learning_rate": 4.841032757187473e-05, + "loss": 2.7868, "step": 9600 }, { "epoch": 0.03, - "learning_rate": 4.8394225834754256e-05, - "loss": 2.7942, + "learning_rate": 4.839376848408175e-05, + "loss": 2.7991, "step": 9700 }, { "epoch": 0.03, - "learning_rate": 4.837767146191667e-05, - "loss": 2.805, + "learning_rate": 4.837720939628878e-05, + "loss": 2.7867, "step": 9800 }, { "epoch": 0.03, - "learning_rate": 4.836111708907908e-05, - "loss": 2.8073, + "learning_rate": 4.8360650308495806e-05, + "loss": 2.7845, "step": 9900 }, { "epoch": 0.03, - "learning_rate": 4.83445627162415e-05, - "loss": 2.8066, + "learning_rate": 4.8344091220702836e-05, + "loss": 2.7796, "step": 10000 }, { "epoch": 0.03, - "learning_rate": 4.832800834340391e-05, - "loss": 2.7873, + "learning_rate": 4.8327532132909866e-05, + "loss": 2.7918, "step": 10100 }, { "epoch": 0.03, - "learning_rate": 4.831145397056633e-05, - "loss": 2.7863, + "learning_rate": 4.831097304511689e-05, + "loss": 2.806, "step": 10200 }, { "epoch": 0.03, - "learning_rate": 4.829489959772874e-05, - "loss": 2.8084, + "learning_rate": 4.829441395732392e-05, + "loss": 2.7917, "step": 10300 }, { "epoch": 0.03, - "learning_rate": 4.8278345224891155e-05, - "loss": 2.7936, + "learning_rate": 4.827785486953095e-05, + "loss": 2.7812, "step": 10400 }, { "epoch": 0.03, - "learning_rate": 4.8261790852053574e-05, - "loss": 2.8019, + "learning_rate": 4.8261295781737974e-05, + "loss": 2.7863, "step": 10500 }, { "epoch": 0.04, - "learning_rate": 4.824523647921599e-05, - "loss": 2.7848, + "learning_rate": 4.824473669394501e-05, + "loss": 2.7812, "step": 10600 }, { "epoch": 0.04, - "learning_rate": 4.8228682106378406e-05, - "loss": 2.8021, + "learning_rate": 4.8228177606152034e-05, + "loss": 2.7725, "step": 10700 }, { "epoch": 0.04, - "learning_rate": 4.821212773354082e-05, - "loss": 2.8008, + "learning_rate": 4.8211618518359065e-05, + "loss": 2.7792, "step": 10800 }, { "epoch": 0.04, - "learning_rate": 4.819557336070323e-05, - "loss": 2.7934, + "learning_rate": 4.8195059430566095e-05, + "loss": 2.7858, "step": 10900 }, { "epoch": 0.04, - "learning_rate": 4.817901898786565e-05, - "loss": 2.7936, + "learning_rate": 4.817850034277312e-05, + "loss": 2.7859, "step": 11000 }, { "epoch": 0.04, - "learning_rate": 4.816246461502806e-05, - "loss": 2.8028, + "learning_rate": 4.816194125498015e-05, + "loss": 2.7818, "step": 11100 }, { "epoch": 0.04, - "learning_rate": 4.814591024219048e-05, - "loss": 2.7983, + "learning_rate": 4.814538216718717e-05, + "loss": 2.7695, "step": 11200 }, { "epoch": 0.04, - "learning_rate": 4.812935586935289e-05, - "loss": 2.7971, + "learning_rate": 4.81288230793942e-05, + "loss": 2.7782, "step": 11300 }, { "epoch": 0.04, - "learning_rate": 4.8112801496515305e-05, - "loss": 2.7895, + "learning_rate": 4.811226399160123e-05, + "loss": 2.7696, "step": 11400 }, { "epoch": 0.04, - "learning_rate": 4.8096247123677724e-05, - "loss": 2.8001, + "learning_rate": 4.8095704903808256e-05, + "loss": 2.775, "step": 11500 }, { "epoch": 0.04, - "learning_rate": 4.8079692750840136e-05, - "loss": 2.8016, + "learning_rate": 4.8079145816015294e-05, + "loss": 2.7835, "step": 11600 }, { "epoch": 0.04, - "learning_rate": 4.8063138378002555e-05, - "loss": 2.8056, + "learning_rate": 4.806258672822232e-05, + "loss": 2.8033, "step": 11700 }, { "epoch": 0.04, - "learning_rate": 4.804658400516497e-05, - "loss": 2.792, + "learning_rate": 4.804602764042935e-05, + "loss": 2.7839, "step": 11800 }, { "epoch": 0.04, - "learning_rate": 4.803002963232739e-05, - "loss": 2.7795, + "learning_rate": 4.802946855263638e-05, + "loss": 2.7871, "step": 11900 }, { "epoch": 0.04, - "learning_rate": 4.80134752594898e-05, - "loss": 2.7853, + "learning_rate": 4.80129094648434e-05, + "loss": 2.7912, "step": 12000 }, { "epoch": 0.04, - "learning_rate": 4.799692088665221e-05, - "loss": 2.7869, + "learning_rate": 4.799635037705043e-05, + "loss": 2.7906, "step": 12100 }, { "epoch": 0.04, - "learning_rate": 4.798036651381463e-05, - "loss": 2.796, + "learning_rate": 4.797979128925746e-05, + "loss": 2.7903, "step": 12200 }, { "epoch": 0.04, - "learning_rate": 4.796381214097704e-05, - "loss": 2.7972, + "learning_rate": 4.7963232201464485e-05, + "loss": 2.7771, "step": 12300 }, { "epoch": 0.04, - "learning_rate": 4.794725776813946e-05, - "loss": 2.8135, + "learning_rate": 4.7946673113671516e-05, + "loss": 2.7825, "step": 12400 }, { "epoch": 0.04, - "learning_rate": 4.7930703395301874e-05, - "loss": 2.7929, + "learning_rate": 4.793011402587854e-05, + "loss": 2.7946, "step": 12500 }, { "epoch": 0.04, - "learning_rate": 4.7914149022464286e-05, - "loss": 2.7916, + "learning_rate": 4.7913554938085576e-05, + "loss": 2.7858, "step": 12600 }, { "epoch": 0.04, - "learning_rate": 4.7897594649626705e-05, - "loss": 2.7987, + "learning_rate": 4.78969958502926e-05, + "loss": 2.7768, "step": 12700 }, { "epoch": 0.04, - "learning_rate": 4.788104027678912e-05, - "loss": 2.7871, + "learning_rate": 4.788043676249963e-05, + "loss": 2.7875, "step": 12800 }, { "epoch": 0.04, - "learning_rate": 4.7864485903951536e-05, - "loss": 2.799, + "learning_rate": 4.786387767470666e-05, + "loss": 2.7762, "step": 12900 }, { "epoch": 0.04, - "learning_rate": 4.784793153111395e-05, - "loss": 2.7973, + "learning_rate": 4.7847318586913684e-05, + "loss": 2.7937, "step": 13000 }, { "epoch": 0.04, - "learning_rate": 4.783137715827636e-05, - "loss": 2.7883, + "learning_rate": 4.7830759499120714e-05, + "loss": 2.7903, "step": 13100 }, { "epoch": 0.04, - "learning_rate": 4.781482278543878e-05, - "loss": 2.7874, + "learning_rate": 4.7814200411327744e-05, + "loss": 2.7923, "step": 13200 }, { "epoch": 0.04, - "learning_rate": 4.7798268412601185e-05, - "loss": 2.7897, + "learning_rate": 4.779764132353477e-05, + "loss": 2.7874, "step": 13300 }, { "epoch": 0.04, - "learning_rate": 4.7781714039763604e-05, - "loss": 2.7865, + "learning_rate": 4.77810822357418e-05, + "loss": 2.7847, "step": 13400 }, { "epoch": 0.04, - "learning_rate": 4.776515966692602e-05, - "loss": 2.8049, + "learning_rate": 4.776452314794883e-05, + "loss": 2.772, "step": 13500 }, { "epoch": 0.05, - "learning_rate": 4.7748605294088436e-05, - "loss": 2.8022, + "learning_rate": 4.774796406015586e-05, + "loss": 2.7846, "step": 13600 }, { "epoch": 0.05, - "learning_rate": 4.773205092125085e-05, - "loss": 2.7926, + "learning_rate": 4.773140497236289e-05, + "loss": 2.7824, "step": 13700 }, { "epoch": 0.05, - "learning_rate": 4.771549654841326e-05, - "loss": 2.8083, + "learning_rate": 4.771484588456991e-05, + "loss": 2.7781, "step": 13800 }, { "epoch": 0.05, - "learning_rate": 4.769894217557568e-05, - "loss": 2.7993, + "learning_rate": 4.769828679677694e-05, + "loss": 2.7761, "step": 13900 }, { "epoch": 0.05, - "learning_rate": 4.768238780273809e-05, - "loss": 2.7907, + "learning_rate": 4.7681727708983967e-05, + "loss": 2.7677, "step": 14000 }, { "epoch": 0.05, - "learning_rate": 4.766583342990051e-05, - "loss": 2.786, + "learning_rate": 4.7665168621191e-05, + "loss": 2.7686, "step": 14100 }, { "epoch": 0.05, - "learning_rate": 4.764927905706292e-05, - "loss": 2.7756, + "learning_rate": 4.764860953339803e-05, + "loss": 2.7688, "step": 14200 }, { "epoch": 0.05, - "learning_rate": 4.7632724684225335e-05, - "loss": 2.8032, + "learning_rate": 4.763205044560505e-05, + "loss": 2.7817, "step": 14300 }, { "epoch": 0.05, - "learning_rate": 4.7616170311387754e-05, - "loss": 2.807, + "learning_rate": 4.761549135781208e-05, + "loss": 2.7674, "step": 14400 }, { "epoch": 0.05, - "learning_rate": 4.7599615938550166e-05, - "loss": 2.8051, + "learning_rate": 4.759893227001911e-05, + "loss": 2.7738, "step": 14500 }, { "epoch": 0.05, - "learning_rate": 4.7583061565712586e-05, - "loss": 2.7948, + "learning_rate": 4.758237318222614e-05, + "loss": 2.7799, "step": 14600 }, { "epoch": 0.05, - "learning_rate": 4.7566507192875e-05, - "loss": 2.7929, + "learning_rate": 4.756581409443317e-05, + "loss": 2.7818, "step": 14700 }, { "epoch": 0.05, - "learning_rate": 4.754995282003741e-05, - "loss": 2.8025, + "learning_rate": 4.7549255006640195e-05, + "loss": 2.7859, "step": 14800 }, { "epoch": 0.05, - "learning_rate": 4.753339844719983e-05, - "loss": 2.7916, + "learning_rate": 4.7532695918847226e-05, + "loss": 2.7762, "step": 14900 }, { "epoch": 0.05, - "learning_rate": 4.751684407436224e-05, - "loss": 2.8024, + "learning_rate": 4.7516136831054256e-05, + "loss": 2.7851, "step": 15000 }, { "epoch": 0.05, - "learning_rate": 4.750028970152466e-05, - "loss": 2.8043, + "learning_rate": 4.749957774326128e-05, + "loss": 2.7814, "step": 15100 }, { "epoch": 0.05, - "learning_rate": 4.748373532868707e-05, - "loss": 2.8092, + "learning_rate": 4.748301865546831e-05, + "loss": 2.7973, "step": 15200 }, { "epoch": 0.05, - "learning_rate": 4.746718095584949e-05, - "loss": 2.7981, + "learning_rate": 4.746645956767533e-05, + "loss": 2.7885, "step": 15300 }, { "epoch": 0.05, - "learning_rate": 4.7450626583011904e-05, - "loss": 2.7903, + "learning_rate": 4.7449900479882364e-05, + "loss": 2.776, "step": 15400 }, { "epoch": 0.05, - "learning_rate": 4.7434072210174316e-05, - "loss": 2.7969, + "learning_rate": 4.7433341392089394e-05, + "loss": 2.7677, "step": 15500 }, { "epoch": 0.05, - "learning_rate": 4.7417517837336735e-05, - "loss": 2.8021, + "learning_rate": 4.7416782304296424e-05, + "loss": 2.7916, "step": 15600 }, { "epoch": 0.05, - "learning_rate": 4.740096346449915e-05, - "loss": 2.7899, + "learning_rate": 4.7400223216503454e-05, + "loss": 2.7928, "step": 15700 }, { "epoch": 0.05, - "learning_rate": 4.7384409091661567e-05, - "loss": 2.7982, + "learning_rate": 4.738366412871048e-05, + "loss": 2.7846, "step": 15800 }, { "epoch": 0.05, - "learning_rate": 4.736785471882398e-05, - "loss": 2.789, + "learning_rate": 4.736710504091751e-05, + "loss": 2.7773, "step": 15900 }, { "epoch": 0.05, - "learning_rate": 4.735130034598639e-05, - "loss": 2.7935, + "learning_rate": 4.735054595312454e-05, + "loss": 2.783, "step": 16000 }, { "epoch": 0.05, - "learning_rate": 4.733474597314881e-05, - "loss": 2.8002, + "learning_rate": 4.733398686533156e-05, + "loss": 2.7862, "step": 16100 }, { "epoch": 0.05, - "learning_rate": 4.731819160031122e-05, - "loss": 2.8021, + "learning_rate": 4.731742777753859e-05, + "loss": 2.7786, "step": 16200 }, { "epoch": 0.05, - "learning_rate": 4.730163722747364e-05, - "loss": 2.7968, + "learning_rate": 4.730086868974562e-05, + "loss": 2.7859, "step": 16300 }, { "epoch": 0.05, - "learning_rate": 4.7285082854636054e-05, - "loss": 2.7927, + "learning_rate": 4.7284309601952646e-05, + "loss": 2.7774, "step": 16400 }, { "epoch": 0.05, - "learning_rate": 4.7268528481798466e-05, - "loss": 2.7902, + "learning_rate": 4.7267750514159677e-05, + "loss": 2.7832, "step": 16500 }, { "epoch": 0.05, - "learning_rate": 4.7251974108960885e-05, - "loss": 2.7975, + "learning_rate": 4.725119142636671e-05, + "loss": 2.7747, "step": 16600 }, { "epoch": 0.06, - "learning_rate": 4.72354197361233e-05, - "loss": 2.8031, + "learning_rate": 4.723463233857374e-05, + "loss": 2.7643, "step": 16700 }, { "epoch": 0.06, - "learning_rate": 4.7218865363285716e-05, - "loss": 2.7943, + "learning_rate": 4.721807325078077e-05, + "loss": 2.7757, "step": 16800 }, { "epoch": 0.06, - "learning_rate": 4.720231099044813e-05, - "loss": 2.7947, + "learning_rate": 4.720151416298779e-05, + "loss": 2.7791, "step": 16900 }, { "epoch": 0.06, - "learning_rate": 4.718575661761054e-05, - "loss": 2.8, + "learning_rate": 4.718495507519482e-05, + "loss": 2.7813, "step": 17000 }, { "epoch": 0.06, - "learning_rate": 4.716920224477296e-05, - "loss": 2.8024, + "learning_rate": 4.7168395987401845e-05, + "loss": 2.7935, "step": 17100 }, { "epoch": 0.06, - "learning_rate": 4.715264787193537e-05, - "loss": 2.804, + "learning_rate": 4.7151836899608875e-05, + "loss": 2.7802, "step": 17200 }, { "epoch": 0.06, - "learning_rate": 4.713609349909779e-05, - "loss": 2.7841, + "learning_rate": 4.7135277811815905e-05, + "loss": 2.7884, "step": 17300 }, { "epoch": 0.06, - "learning_rate": 4.71195391262602e-05, - "loss": 2.792, + "learning_rate": 4.711871872402293e-05, + "loss": 2.7835, "step": 17400 }, { "epoch": 0.06, - "learning_rate": 4.7102984753422616e-05, - "loss": 2.793, + "learning_rate": 4.710215963622996e-05, + "loss": 2.7925, "step": 17500 }, { "epoch": 0.06, - "learning_rate": 4.7086430380585035e-05, - "loss": 2.7858, + "learning_rate": 4.708560054843699e-05, + "loss": 2.7974, "step": 17600 }, { "epoch": 0.06, - "learning_rate": 4.706987600774745e-05, - "loss": 2.8024, + "learning_rate": 4.706904146064402e-05, + "loss": 2.7875, "step": 17700 }, { "epoch": 0.06, - "learning_rate": 4.7053321634909866e-05, - "loss": 2.8041, + "learning_rate": 4.705248237285105e-05, + "loss": 2.7878, "step": 17800 }, { "epoch": 0.06, - "learning_rate": 4.703676726207228e-05, - "loss": 2.7926, + "learning_rate": 4.7035923285058074e-05, + "loss": 2.7869, "step": 17900 }, { "epoch": 0.06, - "learning_rate": 4.702021288923469e-05, - "loss": 2.7944, + "learning_rate": 4.7019364197265104e-05, + "loss": 2.7716, "step": 18000 }, { "epoch": 0.06, - "learning_rate": 4.700365851639711e-05, - "loss": 2.7989, + "learning_rate": 4.7002805109472134e-05, + "loss": 2.7774, "step": 18100 }, { "epoch": 0.06, - "learning_rate": 4.698710414355952e-05, - "loss": 2.7909, + "learning_rate": 4.698624602167916e-05, + "loss": 2.7824, "step": 18200 }, { "epoch": 0.06, - "learning_rate": 4.697054977072194e-05, - "loss": 2.7963, + "learning_rate": 4.696968693388619e-05, + "loss": 2.7863, "step": 18300 }, { "epoch": 0.06, - "learning_rate": 4.695399539788435e-05, - "loss": 2.7873, + "learning_rate": 4.695312784609321e-05, + "loss": 2.7737, "step": 18400 }, { "epoch": 0.06, - "learning_rate": 4.693744102504677e-05, - "loss": 2.7833, + "learning_rate": 4.693656875830024e-05, + "loss": 2.7721, "step": 18500 }, { "epoch": 0.06, - "learning_rate": 4.6920886652209184e-05, - "loss": 2.7892, + "learning_rate": 4.692000967050727e-05, + "loss": 2.7834, "step": 18600 }, { "epoch": 0.06, - "learning_rate": 4.69043322793716e-05, - "loss": 2.7939, + "learning_rate": 4.69034505827143e-05, + "loss": 2.7756, "step": 18700 }, { "epoch": 0.06, - "learning_rate": 4.6887777906534016e-05, - "loss": 2.7966, + "learning_rate": 4.688689149492133e-05, + "loss": 2.7757, "step": 18800 }, { "epoch": 0.06, - "learning_rate": 4.687122353369643e-05, - "loss": 2.7968, + "learning_rate": 4.6870332407128356e-05, + "loss": 2.7818, "step": 18900 }, { "epoch": 0.06, - "learning_rate": 4.685466916085885e-05, - "loss": 2.786, + "learning_rate": 4.685377331933539e-05, + "loss": 2.774, "step": 19000 }, { "epoch": 0.06, - "learning_rate": 4.683811478802126e-05, - "loss": 2.8032, + "learning_rate": 4.683721423154242e-05, + "loss": 2.7885, "step": 19100 }, { "epoch": 0.06, - "learning_rate": 4.682156041518367e-05, - "loss": 2.8071, + "learning_rate": 4.682065514374944e-05, + "loss": 2.7653, "step": 19200 }, { "epoch": 0.06, - "learning_rate": 4.680500604234609e-05, - "loss": 2.8007, + "learning_rate": 4.680409605595647e-05, + "loss": 2.7829, "step": 19300 }, { "epoch": 0.06, - "learning_rate": 4.67884516695085e-05, - "loss": 2.7902, + "learning_rate": 4.67875369681635e-05, + "loss": 2.7784, "step": 19400 }, { "epoch": 0.06, - "learning_rate": 4.677189729667092e-05, - "loss": 2.7954, + "learning_rate": 4.6770977880370525e-05, + "loss": 2.7826, "step": 19500 }, { "epoch": 0.06, - "learning_rate": 4.6755342923833334e-05, - "loss": 2.7869, + "learning_rate": 4.675441879257756e-05, + "loss": 2.7876, "step": 19600 }, { "epoch": 0.07, - "learning_rate": 4.6738788550995746e-05, - "loss": 2.7838, + "learning_rate": 4.6737859704784585e-05, + "loss": 2.7801, "step": 19700 }, { "epoch": 0.07, - "learning_rate": 4.6722234178158165e-05, - "loss": 2.7894, + "learning_rate": 4.6721300616991615e-05, + "loss": 2.7858, "step": 19800 }, { "epoch": 0.07, - "learning_rate": 4.670567980532058e-05, - "loss": 2.7915, + "learning_rate": 4.670474152919864e-05, + "loss": 2.7851, "step": 19900 }, { "epoch": 0.07, - "learning_rate": 4.6689125432483e-05, - "loss": 2.7971, + "learning_rate": 4.668818244140567e-05, + "loss": 2.7871, "step": 20000 }, { "epoch": 0.07, - "learning_rate": 4.667257105964541e-05, - "loss": 2.7916, + "learning_rate": 4.66716233536127e-05, + "loss": 2.7874, "step": 20100 }, { "epoch": 0.07, - "learning_rate": 4.665601668680782e-05, - "loss": 2.7978, + "learning_rate": 4.665506426581972e-05, + "loss": 2.7855, "step": 20200 }, { "epoch": 0.07, - "learning_rate": 4.663946231397024e-05, - "loss": 2.7959, + "learning_rate": 4.6638505178026753e-05, + "loss": 2.7686, "step": 20300 }, { "epoch": 0.07, - "learning_rate": 4.662290794113265e-05, - "loss": 2.7885, + "learning_rate": 4.6621946090233784e-05, + "loss": 2.7826, "step": 20400 }, { "epoch": 0.07, - "learning_rate": 4.660635356829507e-05, - "loss": 2.7849, + "learning_rate": 4.660538700244081e-05, + "loss": 2.7919, "step": 20500 }, { "epoch": 0.07, - "learning_rate": 4.6589799195457484e-05, - "loss": 2.8034, + "learning_rate": 4.6588827914647844e-05, + "loss": 2.7845, "step": 20600 }, { "epoch": 0.07, - "learning_rate": 4.6573244822619896e-05, - "loss": 2.7967, + "learning_rate": 4.657226882685487e-05, + "loss": 2.8025, "step": 20700 }, { "epoch": 0.07, - "learning_rate": 4.6556690449782315e-05, - "loss": 2.7981, + "learning_rate": 4.65557097390619e-05, + "loss": 2.7732, "step": 20800 }, { "epoch": 0.07, - "learning_rate": 4.654013607694473e-05, - "loss": 2.8194, + "learning_rate": 4.653915065126893e-05, + "loss": 2.7927, "step": 20900 }, { "epoch": 0.07, - "learning_rate": 4.6523581704107146e-05, - "loss": 2.798, + "learning_rate": 4.652259156347595e-05, + "loss": 2.7786, "step": 21000 }, { "epoch": 0.07, - "learning_rate": 4.650702733126956e-05, - "loss": 2.7977, + "learning_rate": 4.650603247568298e-05, + "loss": 2.7611, "step": 21100 }, { "epoch": 0.07, - "learning_rate": 4.649047295843197e-05, - "loss": 2.8072, + "learning_rate": 4.6489473387890006e-05, + "loss": 2.7858, "step": 21200 }, { "epoch": 0.07, - "learning_rate": 4.647391858559439e-05, - "loss": 2.8019, + "learning_rate": 4.6472914300097036e-05, + "loss": 2.7905, "step": 21300 }, { "epoch": 0.07, - "learning_rate": 4.64573642127568e-05, - "loss": 2.7966, + "learning_rate": 4.6456355212304066e-05, + "loss": 2.7664, "step": 21400 }, { "epoch": 0.07, - "learning_rate": 4.644080983991922e-05, - "loss": 2.7915, + "learning_rate": 4.643979612451109e-05, + "loss": 2.7882, "step": 21500 }, { "epoch": 0.07, - "learning_rate": 4.6424255467081634e-05, - "loss": 2.7733, + "learning_rate": 4.642323703671813e-05, + "loss": 2.7849, "step": 21600 }, { "epoch": 0.07, - "learning_rate": 4.640770109424405e-05, - "loss": 2.7924, + "learning_rate": 4.640667794892515e-05, + "loss": 2.7855, "step": 21700 }, { "epoch": 0.07, - "learning_rate": 4.6391146721406465e-05, - "loss": 2.7882, + "learning_rate": 4.639011886113218e-05, + "loss": 2.7765, "step": 21800 }, { "epoch": 0.07, - "learning_rate": 4.637459234856888e-05, - "loss": 2.7788, + "learning_rate": 4.637355977333921e-05, + "loss": 2.7879, "step": 21900 }, { "epoch": 0.07, - "learning_rate": 4.6358037975731296e-05, - "loss": 2.782, + "learning_rate": 4.6357000685546235e-05, + "loss": 2.779, "step": 22000 }, { "epoch": 0.07, - "learning_rate": 4.63414836028937e-05, - "loss": 2.7981, + "learning_rate": 4.6340441597753265e-05, + "loss": 2.7673, "step": 22100 }, { "epoch": 0.07, - "learning_rate": 4.632492923005612e-05, - "loss": 2.7761, + "learning_rate": 4.6323882509960295e-05, + "loss": 2.7804, "step": 22200 }, { "epoch": 0.07, - "learning_rate": 4.630837485721853e-05, - "loss": 2.8116, + "learning_rate": 4.630732342216732e-05, + "loss": 2.7752, "step": 22300 }, { "epoch": 0.07, - "learning_rate": 4.629182048438095e-05, - "loss": 2.792, + "learning_rate": 4.629076433437435e-05, + "loss": 2.776, "step": 22400 }, { "epoch": 0.07, - "learning_rate": 4.6275266111543364e-05, - "loss": 2.7842, + "learning_rate": 4.627420524658137e-05, + "loss": 2.769, "step": 22500 }, { "epoch": 0.07, - "learning_rate": 4.6258711738705776e-05, - "loss": 2.789, + "learning_rate": 4.625764615878841e-05, + "loss": 2.7803, "step": 22600 }, { "epoch": 0.08, - "learning_rate": 4.6242157365868195e-05, - "loss": 2.8023, + "learning_rate": 4.624108707099544e-05, + "loss": 2.7783, "step": 22700 }, { "epoch": 0.08, - "learning_rate": 4.622560299303061e-05, - "loss": 2.805, + "learning_rate": 4.6224527983202463e-05, + "loss": 2.7791, "step": 22800 }, { "epoch": 0.08, - "learning_rate": 4.620904862019303e-05, - "loss": 2.8074, + "learning_rate": 4.6207968895409494e-05, + "loss": 2.7947, "step": 22900 }, { "epoch": 0.08, - "learning_rate": 4.619249424735544e-05, - "loss": 2.8029, + "learning_rate": 4.619140980761652e-05, + "loss": 2.7764, "step": 23000 }, { "epoch": 0.08, - "learning_rate": 4.617593987451785e-05, - "loss": 2.8212, + "learning_rate": 4.617485071982355e-05, + "loss": 2.7893, "step": 23100 }, { "epoch": 0.08, - "learning_rate": 4.615938550168027e-05, - "loss": 2.8042, + "learning_rate": 4.615829163203058e-05, + "loss": 2.7636, "step": 23200 }, { "epoch": 0.08, - "learning_rate": 4.614283112884268e-05, - "loss": 2.7906, + "learning_rate": 4.61417325442376e-05, + "loss": 2.7794, "step": 23300 }, { "epoch": 0.08, - "learning_rate": 4.61262767560051e-05, - "loss": 2.7914, + "learning_rate": 4.612517345644463e-05, + "loss": 2.7965, "step": 23400 }, { "epoch": 0.08, - "learning_rate": 4.6109722383167514e-05, - "loss": 2.802, + "learning_rate": 4.610861436865166e-05, + "loss": 2.7818, "step": 23500 }, { "epoch": 0.08, - "learning_rate": 4.6093168010329926e-05, - "loss": 2.7898, + "learning_rate": 4.609205528085869e-05, + "loss": 2.7773, "step": 23600 }, { "epoch": 0.08, - "learning_rate": 4.6076613637492345e-05, - "loss": 2.7944, + "learning_rate": 4.607549619306572e-05, + "loss": 2.7809, "step": 23700 }, { "epoch": 0.08, - "learning_rate": 4.606005926465476e-05, - "loss": 2.7828, + "learning_rate": 4.6058937105272746e-05, + "loss": 2.7609, "step": 23800 }, { "epoch": 0.08, - "learning_rate": 4.6043504891817176e-05, - "loss": 2.7855, + "learning_rate": 4.6042378017479776e-05, + "loss": 2.7931, "step": 23900 }, { "epoch": 0.08, - "learning_rate": 4.602695051897959e-05, - "loss": 2.798, + "learning_rate": 4.602581892968681e-05, + "loss": 2.7823, "step": 24000 }, { "epoch": 0.08, - "learning_rate": 4.6010396146142e-05, - "loss": 2.8217, + "learning_rate": 4.600925984189383e-05, + "loss": 2.8011, "step": 24100 }, { "epoch": 0.08, - "learning_rate": 4.599384177330442e-05, - "loss": 2.8046, + "learning_rate": 4.599270075410086e-05, + "loss": 2.7806, "step": 24200 }, { "epoch": 0.08, - "learning_rate": 4.597728740046683e-05, - "loss": 2.7894, + "learning_rate": 4.5976141666307884e-05, + "loss": 2.7745, "step": 24300 }, { "epoch": 0.08, - "learning_rate": 4.596073302762925e-05, - "loss": 2.7988, + "learning_rate": 4.5959582578514914e-05, + "loss": 2.7718, "step": 24400 }, { "epoch": 0.08, - "learning_rate": 4.5944178654791664e-05, - "loss": 2.8025, + "learning_rate": 4.5943023490721945e-05, + "loss": 2.7815, "step": 24500 }, { "epoch": 0.08, - "learning_rate": 4.5927624281954076e-05, - "loss": 2.8177, + "learning_rate": 4.5926464402928975e-05, + "loss": 2.7826, "step": 24600 }, { "epoch": 0.08, - "learning_rate": 4.5911069909116495e-05, - "loss": 2.7866, + "learning_rate": 4.5909905315136005e-05, + "loss": 2.7894, "step": 24700 }, { "epoch": 0.08, - "learning_rate": 4.589451553627891e-05, - "loss": 2.7887, + "learning_rate": 4.589334622734303e-05, + "loss": 2.7665, "step": 24800 }, { "epoch": 0.08, - "learning_rate": 4.5877961163441326e-05, - "loss": 2.7962, + "learning_rate": 4.587678713955006e-05, + "loss": 2.7787, "step": 24900 }, { "epoch": 0.08, - "learning_rate": 4.586140679060374e-05, - "loss": 2.803, + "learning_rate": 4.586022805175709e-05, + "loss": 2.7896, "step": 25000 }, { "epoch": 0.08, - "learning_rate": 4.584485241776616e-05, - "loss": 2.7793, + "learning_rate": 4.584366896396411e-05, + "loss": 2.78, "step": 25100 }, { "epoch": 0.08, - "learning_rate": 4.582829804492857e-05, - "loss": 2.797, + "learning_rate": 4.582710987617114e-05, + "loss": 2.7894, "step": 25200 }, { "epoch": 0.08, - "learning_rate": 4.581174367209098e-05, - "loss": 2.7889, + "learning_rate": 4.5810550788378174e-05, + "loss": 2.7943, "step": 25300 }, { "epoch": 0.08, - "learning_rate": 4.57951892992534e-05, - "loss": 2.7864, + "learning_rate": 4.57939917005852e-05, + "loss": 2.7871, "step": 25400 }, { "epoch": 0.08, - "learning_rate": 4.577863492641581e-05, - "loss": 2.7977, + "learning_rate": 4.577743261279223e-05, + "loss": 2.7814, "step": 25500 }, { "epoch": 0.08, - "learning_rate": 4.576208055357823e-05, - "loss": 2.7741, + "learning_rate": 4.576087352499926e-05, + "loss": 2.7911, "step": 25600 }, { "epoch": 0.09, - "learning_rate": 4.5745526180740645e-05, - "loss": 2.792, + "learning_rate": 4.574431443720629e-05, + "loss": 2.7669, "step": 25700 }, { "epoch": 0.09, - "learning_rate": 4.572897180790306e-05, - "loss": 2.7956, + "learning_rate": 4.572775534941331e-05, + "loss": 2.7794, "step": 25800 }, { "epoch": 0.09, - "learning_rate": 4.5712417435065476e-05, - "loss": 2.7899, + "learning_rate": 4.571119626162034e-05, + "loss": 2.803, "step": 25900 }, { "epoch": 0.09, - "learning_rate": 4.569586306222789e-05, - "loss": 2.8051, + "learning_rate": 4.569463717382737e-05, + "loss": 2.7836, "step": 26000 }, { "epoch": 0.09, - "learning_rate": 4.567930868939031e-05, - "loss": 2.7869, + "learning_rate": 4.5678078086034396e-05, + "loss": 2.781, "step": 26100 }, { "epoch": 0.09, - "learning_rate": 4.566275431655272e-05, - "loss": 2.7706, + "learning_rate": 4.5661518998241426e-05, + "loss": 2.7913, "step": 26200 }, { "epoch": 0.09, - "learning_rate": 4.564619994371513e-05, - "loss": 2.7979, + "learning_rate": 4.5644959910448456e-05, + "loss": 2.7712, "step": 26300 }, { "epoch": 0.09, - "learning_rate": 4.562964557087755e-05, - "loss": 2.7957, + "learning_rate": 4.562840082265548e-05, + "loss": 2.7788, "step": 26400 }, { "epoch": 0.09, - "learning_rate": 4.561309119803996e-05, - "loss": 2.8008, + "learning_rate": 4.561184173486251e-05, + "loss": 2.7796, "step": 26500 }, { "epoch": 0.09, - "learning_rate": 4.559653682520238e-05, - "loss": 2.7969, + "learning_rate": 4.559528264706954e-05, + "loss": 2.7754, "step": 26600 }, { "epoch": 0.09, - "learning_rate": 4.5579982452364794e-05, - "loss": 2.7943, + "learning_rate": 4.557872355927657e-05, + "loss": 2.7809, "step": 26700 }, { "epoch": 0.09, - "learning_rate": 4.5563428079527207e-05, - "loss": 2.8, + "learning_rate": 4.55621644714836e-05, + "loss": 2.793, "step": 26800 }, { "epoch": 0.09, - "learning_rate": 4.5546873706689626e-05, - "loss": 2.8088, + "learning_rate": 4.5545605383690624e-05, + "loss": 2.7834, "step": 26900 }, { "epoch": 0.09, - "learning_rate": 4.553031933385204e-05, - "loss": 2.8057, + "learning_rate": 4.5529046295897655e-05, + "loss": 2.798, "step": 27000 }, { "epoch": 0.09, - "learning_rate": 4.551376496101446e-05, - "loss": 2.7967, + "learning_rate": 4.551248720810468e-05, + "loss": 2.7781, "step": 27100 }, { "epoch": 0.09, - "learning_rate": 4.549721058817687e-05, - "loss": 2.7814, + "learning_rate": 4.549592812031171e-05, + "loss": 2.7852, "step": 27200 }, { "epoch": 0.09, - "learning_rate": 4.548065621533928e-05, - "loss": 2.7779, + "learning_rate": 4.547936903251874e-05, + "loss": 2.782, "step": 27300 }, { "epoch": 0.09, - "learning_rate": 4.54641018425017e-05, - "loss": 2.7828, + "learning_rate": 4.546280994472576e-05, + "loss": 2.7856, "step": 27400 }, { "epoch": 0.09, - "learning_rate": 4.544754746966411e-05, - "loss": 2.7916, + "learning_rate": 4.544625085693279e-05, + "loss": 2.7721, "step": 27500 }, { "epoch": 0.09, - "learning_rate": 4.543099309682653e-05, - "loss": 2.7926, + "learning_rate": 4.542969176913982e-05, + "loss": 2.7839, "step": 27600 }, { "epoch": 0.09, - "learning_rate": 4.5414438723988944e-05, - "loss": 2.8087, + "learning_rate": 4.541313268134685e-05, + "loss": 2.7848, "step": 27700 }, { "epoch": 0.09, - "learning_rate": 4.5397884351151356e-05, - "loss": 2.7749, + "learning_rate": 4.5396573593553884e-05, + "loss": 2.7943, "step": 27800 }, { "epoch": 0.09, - "learning_rate": 4.5381329978313775e-05, - "loss": 2.8012, + "learning_rate": 4.538001450576091e-05, + "loss": 2.7878, "step": 27900 }, { "epoch": 0.09, - "learning_rate": 4.536477560547619e-05, - "loss": 2.8014, + "learning_rate": 4.536345541796794e-05, + "loss": 2.7936, "step": 28000 }, { "epoch": 0.09, - "learning_rate": 4.534822123263861e-05, - "loss": 2.7832, + "learning_rate": 4.534689633017497e-05, + "loss": 2.7804, "step": 28100 }, { "epoch": 0.09, - "learning_rate": 4.533166685980102e-05, - "loss": 2.7901, + "learning_rate": 4.533033724238199e-05, + "loss": 2.7908, "step": 28200 }, { "epoch": 0.09, - "learning_rate": 4.531511248696344e-05, - "loss": 2.785, + "learning_rate": 4.531377815458902e-05, + "loss": 2.7857, "step": 28300 }, { "epoch": 0.09, - "learning_rate": 4.529855811412585e-05, - "loss": 2.8014, + "learning_rate": 4.5297219066796045e-05, + "loss": 2.7779, "step": 28400 }, { "epoch": 0.09, - "learning_rate": 4.528200374128826e-05, - "loss": 2.7929, + "learning_rate": 4.5280659979003075e-05, + "loss": 2.7891, "step": 28500 }, { "epoch": 0.09, - "learning_rate": 4.526544936845068e-05, - "loss": 2.79, + "learning_rate": 4.526410089121011e-05, + "loss": 2.7715, "step": 28600 }, { "epoch": 0.1, - "learning_rate": 4.5248894995613094e-05, - "loss": 2.7956, + "learning_rate": 4.5247541803417136e-05, + "loss": 2.7961, "step": 28700 }, { "epoch": 0.1, - "learning_rate": 4.523234062277551e-05, - "loss": 2.7963, + "learning_rate": 4.5230982715624166e-05, + "loss": 2.781, "step": 28800 }, { "epoch": 0.1, - "learning_rate": 4.5215786249937925e-05, - "loss": 2.7736, + "learning_rate": 4.521442362783119e-05, + "loss": 2.7705, "step": 28900 }, { "epoch": 0.1, - "learning_rate": 4.519923187710034e-05, - "loss": 2.789, + "learning_rate": 4.519786454003822e-05, + "loss": 2.7888, "step": 29000 }, { "epoch": 0.1, - "learning_rate": 4.5182677504262756e-05, - "loss": 2.7964, + "learning_rate": 4.518130545224525e-05, + "loss": 2.7971, "step": 29100 }, { "epoch": 0.1, - "learning_rate": 4.516612313142517e-05, - "loss": 2.7872, + "learning_rate": 4.5164746364452274e-05, + "loss": 2.7816, "step": 29200 }, { "epoch": 0.1, - "learning_rate": 4.514956875858759e-05, - "loss": 2.7959, + "learning_rate": 4.5148187276659304e-05, + "loss": 2.7808, "step": 29300 }, { "epoch": 0.1, - "learning_rate": 4.513301438575e-05, - "loss": 2.787, + "learning_rate": 4.5131628188866334e-05, + "loss": 2.7827, "step": 29400 }, { "epoch": 0.1, - "learning_rate": 4.511646001291241e-05, - "loss": 2.7883, + "learning_rate": 4.511506910107336e-05, + "loss": 2.7661, "step": 29500 }, { "epoch": 0.1, - "learning_rate": 4.509990564007483e-05, - "loss": 2.8037, + "learning_rate": 4.5098510013280395e-05, + "loss": 2.7774, "step": 29600 }, { "epoch": 0.1, - "learning_rate": 4.5083351267237243e-05, - "loss": 2.7884, + "learning_rate": 4.508195092548742e-05, + "loss": 2.7698, "step": 29700 }, { "epoch": 0.1, - "learning_rate": 4.506679689439966e-05, - "loss": 2.778, + "learning_rate": 4.506539183769445e-05, + "loss": 2.7824, "step": 29800 }, { "epoch": 0.1, - "learning_rate": 4.5050242521562075e-05, - "loss": 2.7886, + "learning_rate": 4.504883274990148e-05, + "loss": 2.7745, "step": 29900 }, { "epoch": 0.1, - "learning_rate": 4.503368814872449e-05, - "loss": 2.7929, + "learning_rate": 4.50322736621085e-05, + "loss": 2.7787, "step": 30000 }, { "epoch": 0.1, - "learning_rate": 4.5017133775886906e-05, - "loss": 2.782, + "learning_rate": 4.501571457431553e-05, + "loss": 2.7825, "step": 30100 }, { "epoch": 0.1, - "learning_rate": 4.500057940304932e-05, - "loss": 2.8149, + "learning_rate": 4.4999155486522557e-05, + "loss": 2.7763, "step": 30200 }, { "epoch": 0.1, - "learning_rate": 4.498402503021174e-05, - "loss": 2.7956, + "learning_rate": 4.498259639872959e-05, + "loss": 2.7907, "step": 30300 }, { "epoch": 0.1, - "learning_rate": 4.496747065737415e-05, - "loss": 2.7935, + "learning_rate": 4.496603731093662e-05, + "loss": 2.7634, "step": 30400 }, { "epoch": 0.1, - "learning_rate": 4.495091628453656e-05, - "loss": 2.798, + "learning_rate": 4.494947822314364e-05, + "loss": 2.7782, "step": 30500 }, { "epoch": 0.1, - "learning_rate": 4.493436191169898e-05, - "loss": 2.8016, + "learning_rate": 4.493291913535068e-05, + "loss": 2.7861, "step": 30600 }, { "epoch": 0.1, - "learning_rate": 4.491780753886139e-05, - "loss": 2.792, + "learning_rate": 4.49163600475577e-05, + "loss": 2.7676, "step": 30700 }, { "epoch": 0.1, - "learning_rate": 4.490125316602381e-05, - "loss": 2.804, + "learning_rate": 4.489980095976473e-05, + "loss": 2.7768, "step": 30800 }, { "epoch": 0.1, - "learning_rate": 4.4884698793186224e-05, - "loss": 2.7958, + "learning_rate": 4.488324187197176e-05, + "loss": 2.7853, "step": 30900 }, { "epoch": 0.1, - "learning_rate": 4.486814442034864e-05, - "loss": 2.7974, + "learning_rate": 4.4866682784178785e-05, + "loss": 2.7915, "step": 31000 }, { "epoch": 0.1, - "learning_rate": 4.485159004751105e-05, - "loss": 2.8045, + "learning_rate": 4.4850123696385816e-05, + "loss": 2.7747, "step": 31100 }, { "epoch": 0.1, - "learning_rate": 4.483503567467346e-05, - "loss": 2.7946, + "learning_rate": 4.4833564608592846e-05, + "loss": 2.7749, "step": 31200 }, { "epoch": 0.1, - "learning_rate": 4.481848130183588e-05, - "loss": 2.8163, + "learning_rate": 4.481700552079987e-05, + "loss": 2.7841, "step": 31300 }, { "epoch": 0.1, - "learning_rate": 4.480192692899829e-05, - "loss": 2.7927, + "learning_rate": 4.48004464330069e-05, + "loss": 2.7805, "step": 31400 }, { "epoch": 0.1, - "learning_rate": 4.478537255616071e-05, - "loss": 2.7713, + "learning_rate": 4.478388734521392e-05, + "loss": 2.7825, "step": 31500 }, { "epoch": 0.1, - "learning_rate": 4.4768818183323124e-05, - "loss": 2.7841, + "learning_rate": 4.476732825742096e-05, + "loss": 2.7719, "step": 31600 }, { "epoch": 0.1, - "learning_rate": 4.475226381048554e-05, - "loss": 2.7755, + "learning_rate": 4.475076916962799e-05, + "loss": 2.7913, "step": 31700 }, { "epoch": 0.11, - "learning_rate": 4.4735709437647955e-05, - "loss": 2.806, + "learning_rate": 4.4734210081835014e-05, + "loss": 2.7799, "step": 31800 }, { "epoch": 0.11, - "learning_rate": 4.471915506481037e-05, - "loss": 2.7856, + "learning_rate": 4.4717650994042045e-05, + "loss": 2.7791, "step": 31900 }, { "epoch": 0.11, - "learning_rate": 4.4702600691972786e-05, - "loss": 2.787, + "learning_rate": 4.470109190624907e-05, + "loss": 2.8031, "step": 32000 }, { "epoch": 0.11, - "learning_rate": 4.46860463191352e-05, - "loss": 2.8002, + "learning_rate": 4.46845328184561e-05, + "loss": 2.7789, "step": 32100 }, { "epoch": 0.11, - "learning_rate": 4.466949194629762e-05, - "loss": 2.808, + "learning_rate": 4.466797373066313e-05, + "loss": 2.7778, "step": 32200 }, { "epoch": 0.11, - "learning_rate": 4.465293757346003e-05, - "loss": 2.792, + "learning_rate": 4.465141464287015e-05, + "loss": 2.7862, "step": 32300 }, { "epoch": 0.11, - "learning_rate": 4.463638320062244e-05, - "loss": 2.7954, + "learning_rate": 4.463485555507718e-05, + "loss": 2.7664, "step": 32400 }, { "epoch": 0.11, - "learning_rate": 4.461982882778486e-05, - "loss": 2.7955, + "learning_rate": 4.461829646728421e-05, + "loss": 2.7767, "step": 32500 }, { "epoch": 0.11, - "learning_rate": 4.4603274454947274e-05, - "loss": 2.7894, + "learning_rate": 4.460173737949124e-05, + "loss": 2.7814, "step": 32600 }, { "epoch": 0.11, - "learning_rate": 4.458672008210969e-05, - "loss": 2.7965, + "learning_rate": 4.458517829169827e-05, + "loss": 2.7786, "step": 32700 }, { "epoch": 0.11, - "learning_rate": 4.4570165709272105e-05, - "loss": 2.7941, + "learning_rate": 4.45686192039053e-05, + "loss": 2.765, "step": 32800 }, { "epoch": 0.11, - "learning_rate": 4.455361133643452e-05, - "loss": 2.7885, + "learning_rate": 4.455206011611233e-05, + "loss": 2.783, "step": 32900 }, { "epoch": 0.11, - "learning_rate": 4.4537056963596936e-05, - "loss": 2.8039, + "learning_rate": 4.453550102831935e-05, + "loss": 2.7747, "step": 33000 }, { "epoch": 0.11, - "learning_rate": 4.452050259075935e-05, - "loss": 2.7836, + "learning_rate": 4.451894194052638e-05, + "loss": 2.7765, "step": 33100 }, { "epoch": 0.11, - "learning_rate": 4.450394821792177e-05, - "loss": 2.7978, + "learning_rate": 4.450238285273341e-05, + "loss": 2.7934, "step": 33200 }, { "epoch": 0.11, - "learning_rate": 4.448739384508418e-05, - "loss": 2.7908, + "learning_rate": 4.4485823764940435e-05, + "loss": 2.7947, "step": 33300 }, { "epoch": 0.11, - "learning_rate": 4.447083947224659e-05, - "loss": 2.7943, + "learning_rate": 4.4469264677147465e-05, + "loss": 2.7732, "step": 33400 }, { "epoch": 0.11, - "learning_rate": 4.445428509940901e-05, - "loss": 2.791, + "learning_rate": 4.4452705589354495e-05, + "loss": 2.7886, "step": 33500 }, { "epoch": 0.11, - "learning_rate": 4.443773072657142e-05, - "loss": 2.7931, + "learning_rate": 4.4436146501561526e-05, + "loss": 2.7673, "step": 33600 }, { "epoch": 0.11, - "learning_rate": 4.442117635373384e-05, - "loss": 2.786, + "learning_rate": 4.4419587413768556e-05, + "loss": 2.7789, "step": 33700 }, { "epoch": 0.11, - "learning_rate": 4.4404621980896255e-05, - "loss": 2.7929, + "learning_rate": 4.440302832597558e-05, + "loss": 2.7755, "step": 33800 }, { "epoch": 0.11, - "learning_rate": 4.438806760805867e-05, - "loss": 2.7952, + "learning_rate": 4.438646923818261e-05, + "loss": 2.7756, "step": 33900 }, { "epoch": 0.11, - "learning_rate": 4.4371513235221086e-05, - "loss": 2.7895, + "learning_rate": 4.436991015038964e-05, + "loss": 2.7785, "step": 34000 }, { "epoch": 0.11, - "learning_rate": 4.43549588623835e-05, - "loss": 2.8006, + "learning_rate": 4.4353351062596664e-05, + "loss": 2.7778, "step": 34100 }, { "epoch": 0.11, - "learning_rate": 4.433840448954592e-05, - "loss": 2.7977, + "learning_rate": 4.4336791974803694e-05, + "loss": 2.7646, "step": 34200 }, { "epoch": 0.11, - "learning_rate": 4.432185011670833e-05, - "loss": 2.7985, + "learning_rate": 4.432023288701072e-05, + "loss": 2.7771, "step": 34300 }, { "epoch": 0.11, - "learning_rate": 4.430529574387074e-05, - "loss": 2.79, + "learning_rate": 4.430367379921775e-05, + "loss": 2.7974, "step": 34400 }, { "epoch": 0.11, - "learning_rate": 4.428874137103316e-05, - "loss": 2.8015, + "learning_rate": 4.428711471142478e-05, + "loss": 2.7814, "step": 34500 }, { "epoch": 0.11, - "learning_rate": 4.427218699819557e-05, - "loss": 2.8001, + "learning_rate": 4.427055562363181e-05, + "loss": 2.7861, "step": 34600 }, { "epoch": 0.11, - "learning_rate": 4.425563262535799e-05, - "loss": 2.7898, + "learning_rate": 4.425399653583884e-05, + "loss": 2.7909, "step": 34700 }, { "epoch": 0.12, - "learning_rate": 4.4239078252520404e-05, - "loss": 2.7813, + "learning_rate": 4.423743744804586e-05, + "loss": 2.7781, "step": 34800 }, { "epoch": 0.12, - "learning_rate": 4.422252387968282e-05, - "loss": 2.7819, + "learning_rate": 4.422087836025289e-05, + "loss": 2.7919, "step": 34900 }, { "epoch": 0.12, - "learning_rate": 4.4205969506845236e-05, - "loss": 2.8012, + "learning_rate": 4.420431927245992e-05, + "loss": 2.7792, "step": 35000 }, { "epoch": 0.12, - "learning_rate": 4.418941513400765e-05, - "loss": 2.7991, + "learning_rate": 4.4187760184666946e-05, + "loss": 2.7902, "step": 35100 }, { "epoch": 0.12, - "learning_rate": 4.417286076117007e-05, - "loss": 2.7944, + "learning_rate": 4.417120109687398e-05, + "loss": 2.7809, "step": 35200 }, { "epoch": 0.12, - "learning_rate": 4.415630638833248e-05, - "loss": 2.7826, + "learning_rate": 4.415464200908101e-05, + "loss": 2.7827, "step": 35300 }, { "epoch": 0.12, - "learning_rate": 4.41397520154949e-05, - "loss": 2.7942, + "learning_rate": 4.413808292128803e-05, + "loss": 2.7782, "step": 35400 }, { "epoch": 0.12, - "learning_rate": 4.412319764265731e-05, - "loss": 2.802, + "learning_rate": 4.412152383349506e-05, + "loss": 2.7922, "step": 35500 }, { "epoch": 0.12, - "learning_rate": 4.410664326981972e-05, - "loss": 2.8046, + "learning_rate": 4.410496474570209e-05, + "loss": 2.788, "step": 35600 }, { "epoch": 0.12, - "learning_rate": 4.409008889698214e-05, - "loss": 2.797, + "learning_rate": 4.408840565790912e-05, + "loss": 2.7745, "step": 35700 }, { "epoch": 0.12, - "learning_rate": 4.4073534524144554e-05, - "loss": 2.7863, + "learning_rate": 4.407184657011615e-05, + "loss": 2.7619, "step": 35800 }, { "epoch": 0.12, - "learning_rate": 4.405698015130697e-05, - "loss": 2.7878, + "learning_rate": 4.4055287482323175e-05, + "loss": 2.7751, "step": 35900 }, { "epoch": 0.12, - "learning_rate": 4.4040425778469385e-05, - "loss": 2.7917, + "learning_rate": 4.4038728394530206e-05, + "loss": 2.781, "step": 36000 }, { "epoch": 0.12, - "learning_rate": 4.40238714056318e-05, - "loss": 2.8027, + "learning_rate": 4.402216930673723e-05, + "loss": 2.7855, "step": 36100 }, { "epoch": 0.12, - "learning_rate": 4.400731703279422e-05, - "loss": 2.781, + "learning_rate": 4.400561021894426e-05, + "loss": 2.7878, "step": 36200 }, { "epoch": 0.12, - "learning_rate": 4.399076265995663e-05, - "loss": 2.7845, + "learning_rate": 4.398905113115129e-05, + "loss": 2.7891, "step": 36300 }, { "epoch": 0.12, - "learning_rate": 4.397420828711905e-05, - "loss": 2.794, + "learning_rate": 4.397249204335831e-05, + "loss": 2.7766, "step": 36400 }, { "epoch": 0.12, - "learning_rate": 4.395765391428146e-05, - "loss": 2.8011, + "learning_rate": 4.3955932955565343e-05, + "loss": 2.7772, "step": 36500 }, { "epoch": 0.12, - "learning_rate": 4.394109954144387e-05, - "loss": 2.7792, + "learning_rate": 4.3939373867772374e-05, + "loss": 2.7807, "step": 36600 }, { "epoch": 0.12, - "learning_rate": 4.392454516860629e-05, - "loss": 2.796, + "learning_rate": 4.3922814779979404e-05, + "loss": 2.7763, "step": 36700 }, { "epoch": 0.12, - "learning_rate": 4.3907990795768704e-05, - "loss": 2.7843, + "learning_rate": 4.3906255692186434e-05, + "loss": 2.7632, "step": 36800 }, { "epoch": 0.12, - "learning_rate": 4.389143642293112e-05, - "loss": 2.8001, + "learning_rate": 4.388969660439346e-05, + "loss": 2.7803, "step": 36900 }, { "epoch": 0.12, - "learning_rate": 4.3874882050093535e-05, - "loss": 2.7942, + "learning_rate": 4.387313751660049e-05, + "loss": 2.7679, "step": 37000 }, { "epoch": 0.12, - "learning_rate": 4.385832767725595e-05, - "loss": 2.8005, + "learning_rate": 4.385657842880752e-05, + "loss": 2.7711, "step": 37100 }, { "epoch": 0.12, - "learning_rate": 4.3841773304418366e-05, - "loss": 2.8104, + "learning_rate": 4.384001934101454e-05, + "loss": 2.7788, "step": 37200 }, { "epoch": 0.12, - "learning_rate": 4.382521893158078e-05, - "loss": 2.8071, + "learning_rate": 4.382346025322157e-05, + "loss": 2.7683, "step": 37300 }, { "epoch": 0.12, - "learning_rate": 4.38086645587432e-05, - "loss": 2.7935, + "learning_rate": 4.3806901165428596e-05, + "loss": 2.7817, "step": 37400 }, { "epoch": 0.12, - "learning_rate": 4.379211018590561e-05, - "loss": 2.7958, + "learning_rate": 4.3790342077635626e-05, + "loss": 2.7799, "step": 37500 }, { "epoch": 0.12, - "learning_rate": 4.377555581306802e-05, - "loss": 2.7935, + "learning_rate": 4.377378298984266e-05, + "loss": 2.7864, "step": 37600 }, { "epoch": 0.12, - "learning_rate": 4.375900144023044e-05, - "loss": 2.7938, + "learning_rate": 4.375722390204969e-05, + "loss": 2.7847, "step": 37700 }, { "epoch": 0.13, - "learning_rate": 4.3742447067392853e-05, - "loss": 2.7942, + "learning_rate": 4.374066481425672e-05, + "loss": 2.7812, "step": 37800 }, { "epoch": 0.13, - "learning_rate": 4.372589269455527e-05, - "loss": 2.789, + "learning_rate": 4.372410572646374e-05, + "loss": 2.7792, "step": 37900 }, { "epoch": 0.13, - "learning_rate": 4.3709338321717685e-05, - "loss": 2.801, + "learning_rate": 4.370754663867077e-05, + "loss": 2.7858, "step": 38000 }, { "epoch": 0.13, - "learning_rate": 4.3692783948880104e-05, - "loss": 2.7911, + "learning_rate": 4.36909875508778e-05, + "loss": 2.7867, "step": 38100 }, { "epoch": 0.13, - "learning_rate": 4.3676229576042516e-05, - "loss": 2.799, + "learning_rate": 4.3674428463084825e-05, + "loss": 2.7868, "step": 38200 }, { "epoch": 0.13, - "learning_rate": 4.365967520320493e-05, - "loss": 2.7882, + "learning_rate": 4.3657869375291855e-05, + "loss": 2.7763, "step": 38300 }, { "epoch": 0.13, - "learning_rate": 4.364312083036735e-05, - "loss": 2.8009, + "learning_rate": 4.3641310287498885e-05, + "loss": 2.7739, "step": 38400 }, { "epoch": 0.13, - "learning_rate": 4.362656645752976e-05, - "loss": 2.8013, + "learning_rate": 4.362475119970591e-05, + "loss": 2.7827, "step": 38500 }, { "epoch": 0.13, - "learning_rate": 4.361001208469218e-05, - "loss": 2.8038, + "learning_rate": 4.3608192111912946e-05, + "loss": 2.7661, "step": 38600 }, { "epoch": 0.13, - "learning_rate": 4.359345771185459e-05, - "loss": 2.8046, + "learning_rate": 4.359163302411997e-05, + "loss": 2.7839, "step": 38700 }, { "epoch": 0.13, - "learning_rate": 4.3576903339017e-05, - "loss": 2.7774, + "learning_rate": 4.3575073936327e-05, + "loss": 2.7827, "step": 38800 }, { "epoch": 0.13, - "learning_rate": 4.356034896617942e-05, - "loss": 2.7825, + "learning_rate": 4.355851484853403e-05, + "loss": 2.7967, "step": 38900 }, { "epoch": 0.13, - "learning_rate": 4.3543794593341834e-05, - "loss": 2.7915, + "learning_rate": 4.3541955760741054e-05, + "loss": 2.78, "step": 39000 }, { "epoch": 0.13, - "learning_rate": 4.3527240220504254e-05, - "loss": 2.7815, + "learning_rate": 4.3525396672948084e-05, + "loss": 2.7883, "step": 39100 }, { "epoch": 0.13, - "learning_rate": 4.3510685847666666e-05, - "loss": 2.7868, + "learning_rate": 4.350883758515511e-05, + "loss": 2.7717, "step": 39200 }, { "epoch": 0.13, - "learning_rate": 4.349413147482908e-05, - "loss": 2.7939, + "learning_rate": 4.349227849736214e-05, + "loss": 2.7651, "step": 39300 }, { "epoch": 0.13, - "learning_rate": 4.34775771019915e-05, - "loss": 2.792, + "learning_rate": 4.347571940956917e-05, + "loss": 2.7869, "step": 39400 }, { "epoch": 0.13, - "learning_rate": 4.346102272915391e-05, - "loss": 2.796, + "learning_rate": 4.345916032177619e-05, + "loss": 2.7739, "step": 39500 }, { "epoch": 0.13, - "learning_rate": 4.344446835631633e-05, - "loss": 2.8059, + "learning_rate": 4.344260123398323e-05, + "loss": 2.7743, "step": 39600 }, { "epoch": 0.13, - "learning_rate": 4.342791398347874e-05, - "loss": 2.7839, + "learning_rate": 4.342604214619025e-05, + "loss": 2.7709, "step": 39700 }, { "epoch": 0.13, - "learning_rate": 4.341135961064115e-05, - "loss": 2.7855, + "learning_rate": 4.340948305839728e-05, + "loss": 2.7829, "step": 39800 }, { "epoch": 0.13, - "learning_rate": 4.3394805237803565e-05, - "loss": 2.7924, + "learning_rate": 4.339292397060431e-05, + "loss": 2.783, "step": 39900 }, { "epoch": 0.13, - "learning_rate": 4.337825086496598e-05, - "loss": 2.7899, + "learning_rate": 4.3376364882811336e-05, + "loss": 2.7823, "step": 40000 }, { "epoch": 0.13, - "learning_rate": 4.3361696492128396e-05, - "loss": 2.7974, + "learning_rate": 4.3359805795018366e-05, + "loss": 2.7846, "step": 40100 }, { "epoch": 0.13, - "learning_rate": 4.334514211929081e-05, - "loss": 2.8016, + "learning_rate": 4.334324670722539e-05, + "loss": 2.7777, "step": 40200 }, { "epoch": 0.13, - "learning_rate": 4.332858774645323e-05, - "loss": 2.7829, + "learning_rate": 4.332668761943242e-05, + "loss": 2.7726, "step": 40300 }, { "epoch": 0.13, - "learning_rate": 4.331203337361564e-05, - "loss": 2.7865, + "learning_rate": 4.331012853163945e-05, + "loss": 2.7785, "step": 40400 }, { "epoch": 0.13, - "learning_rate": 4.329547900077805e-05, - "loss": 2.8061, + "learning_rate": 4.3293569443846474e-05, + "loss": 2.776, "step": 40500 }, { "epoch": 0.13, - "learning_rate": 4.327892462794047e-05, - "loss": 2.7854, + "learning_rate": 4.327701035605351e-05, + "loss": 2.7743, "step": 40600 }, { "epoch": 0.13, - "learning_rate": 4.3262370255102884e-05, - "loss": 2.784, + "learning_rate": 4.3260451268260535e-05, + "loss": 2.7747, "step": 40700 }, { "epoch": 0.14, - "learning_rate": 4.32458158822653e-05, - "loss": 2.78, + "learning_rate": 4.3243892180467565e-05, + "loss": 2.7821, "step": 40800 }, { "epoch": 0.14, - "learning_rate": 4.3229261509427715e-05, - "loss": 2.8, + "learning_rate": 4.3227333092674595e-05, + "loss": 2.7702, "step": 40900 }, { "epoch": 0.14, - "learning_rate": 4.321270713659013e-05, - "loss": 2.7893, + "learning_rate": 4.321077400488162e-05, + "loss": 2.7712, "step": 41000 }, { "epoch": 0.14, - "learning_rate": 4.3196152763752546e-05, - "loss": 2.7955, + "learning_rate": 4.319421491708865e-05, + "loss": 2.7613, "step": 41100 }, { "epoch": 0.14, - "learning_rate": 4.317959839091496e-05, - "loss": 2.7765, + "learning_rate": 4.317765582929568e-05, + "loss": 2.769, "step": 41200 }, { "epoch": 0.14, - "learning_rate": 4.316304401807738e-05, - "loss": 2.7981, + "learning_rate": 4.31610967415027e-05, + "loss": 2.7878, "step": 41300 }, { "epoch": 0.14, - "learning_rate": 4.314648964523979e-05, - "loss": 2.7777, + "learning_rate": 4.314453765370973e-05, + "loss": 2.7684, "step": 41400 }, { "epoch": 0.14, - "learning_rate": 4.312993527240221e-05, - "loss": 2.7891, + "learning_rate": 4.312797856591676e-05, + "loss": 2.7804, "step": 41500 }, { "epoch": 0.14, - "learning_rate": 4.311338089956462e-05, - "loss": 2.7943, + "learning_rate": 4.3111419478123794e-05, + "loss": 2.7601, "step": 41600 }, { "epoch": 0.14, - "learning_rate": 4.309682652672703e-05, - "loss": 2.7886, + "learning_rate": 4.3094860390330824e-05, + "loss": 2.7866, "step": 41700 }, { "epoch": 0.14, - "learning_rate": 4.308027215388945e-05, - "loss": 2.7857, + "learning_rate": 4.307830130253785e-05, + "loss": 2.7717, "step": 41800 }, { "epoch": 0.14, - "learning_rate": 4.3063717781051865e-05, - "loss": 2.7895, + "learning_rate": 4.306174221474488e-05, + "loss": 2.7905, "step": 41900 }, { "epoch": 0.14, - "learning_rate": 4.3047163408214284e-05, - "loss": 2.7876, + "learning_rate": 4.30451831269519e-05, + "loss": 2.7613, "step": 42000 }, { "epoch": 0.14, - "learning_rate": 4.3030609035376696e-05, - "loss": 2.777, + "learning_rate": 4.302862403915893e-05, + "loss": 2.7759, "step": 42100 }, { "epoch": 0.14, - "learning_rate": 4.301405466253911e-05, - "loss": 2.7937, + "learning_rate": 4.301206495136596e-05, + "loss": 2.7852, "step": 42200 }, { "epoch": 0.14, - "learning_rate": 4.299750028970153e-05, - "loss": 2.8083, + "learning_rate": 4.2995505863572986e-05, + "loss": 2.7731, "step": 42300 }, { "epoch": 0.14, - "learning_rate": 4.298094591686394e-05, - "loss": 2.791, + "learning_rate": 4.2978946775780016e-05, + "loss": 2.7889, "step": 42400 }, { "epoch": 0.14, - "learning_rate": 4.296439154402636e-05, - "loss": 2.7965, + "learning_rate": 4.2962387687987046e-05, + "loss": 2.7963, "step": 42500 }, { "epoch": 0.14, - "learning_rate": 4.294783717118877e-05, - "loss": 2.7972, + "learning_rate": 4.2945828600194077e-05, + "loss": 2.7962, "step": 42600 }, { "epoch": 0.14, - "learning_rate": 4.293128279835118e-05, - "loss": 2.778, + "learning_rate": 4.292926951240111e-05, + "loss": 2.7735, "step": 42700 }, { "epoch": 0.14, - "learning_rate": 4.29147284255136e-05, - "loss": 2.7922, + "learning_rate": 4.291271042460813e-05, + "loss": 2.7668, "step": 42800 }, { "epoch": 0.14, - "learning_rate": 4.2898174052676014e-05, - "loss": 2.7884, + "learning_rate": 4.289615133681516e-05, + "loss": 2.7756, "step": 42900 }, { "epoch": 0.14, - "learning_rate": 4.288161967983843e-05, - "loss": 2.7881, + "learning_rate": 4.287959224902219e-05, + "loss": 2.7854, "step": 43000 }, { "epoch": 0.14, - "learning_rate": 4.2865065307000846e-05, - "loss": 2.7983, + "learning_rate": 4.2863033161229214e-05, + "loss": 2.786, "step": 43100 }, { "epoch": 0.14, - "learning_rate": 4.284851093416326e-05, - "loss": 2.802, + "learning_rate": 4.2846474073436245e-05, + "loss": 2.7745, "step": 43200 }, { "epoch": 0.14, - "learning_rate": 4.283195656132568e-05, - "loss": 2.7969, + "learning_rate": 4.282991498564327e-05, + "loss": 2.7814, "step": 43300 }, { "epoch": 0.14, - "learning_rate": 4.281540218848809e-05, - "loss": 2.7853, + "learning_rate": 4.28133558978503e-05, + "loss": 2.7922, "step": 43400 }, { "epoch": 0.14, - "learning_rate": 4.279884781565051e-05, - "loss": 2.7943, + "learning_rate": 4.279679681005733e-05, + "loss": 2.7818, "step": 43500 }, { "epoch": 0.14, - "learning_rate": 4.278229344281292e-05, - "loss": 2.7931, + "learning_rate": 4.278023772226436e-05, + "loss": 2.7714, "step": 43600 }, { "epoch": 0.14, - "learning_rate": 4.276573906997533e-05, - "loss": 2.7914, + "learning_rate": 4.276367863447139e-05, + "loss": 2.7854, "step": 43700 }, { "epoch": 0.15, - "learning_rate": 4.274918469713775e-05, - "loss": 2.7944, + "learning_rate": 4.274711954667841e-05, + "loss": 2.7787, "step": 43800 }, { "epoch": 0.15, - "learning_rate": 4.2732630324300164e-05, - "loss": 2.7865, + "learning_rate": 4.273056045888544e-05, + "loss": 2.788, "step": 43900 }, { "epoch": 0.15, - "learning_rate": 4.271607595146258e-05, - "loss": 2.7895, + "learning_rate": 4.2714001371092474e-05, + "loss": 2.7661, "step": 44000 }, { "epoch": 0.15, - "learning_rate": 4.2699521578624995e-05, - "loss": 2.7958, + "learning_rate": 4.26974422832995e-05, + "loss": 2.7924, "step": 44100 }, { "epoch": 0.15, - "learning_rate": 4.268296720578741e-05, - "loss": 2.7943, + "learning_rate": 4.268088319550653e-05, + "loss": 2.788, "step": 44200 }, { "epoch": 0.15, - "learning_rate": 4.2666412832949827e-05, - "loss": 2.8, + "learning_rate": 4.266432410771356e-05, + "loss": 2.7842, "step": 44300 }, { "epoch": 0.15, - "learning_rate": 4.264985846011224e-05, - "loss": 2.7964, + "learning_rate": 4.264776501992058e-05, + "loss": 2.7806, "step": 44400 }, { "epoch": 0.15, - "learning_rate": 4.263330408727466e-05, - "loss": 2.7872, + "learning_rate": 4.263120593212761e-05, + "loss": 2.7746, "step": 44500 }, { "epoch": 0.15, - "learning_rate": 4.261674971443707e-05, - "loss": 2.811, + "learning_rate": 4.261464684433464e-05, + "loss": 2.788, "step": 44600 }, { "epoch": 0.15, - "learning_rate": 4.260019534159949e-05, - "loss": 2.7929, + "learning_rate": 4.259808775654167e-05, + "loss": 2.7874, "step": 44700 }, { "epoch": 0.15, - "learning_rate": 4.25836409687619e-05, - "loss": 2.7927, + "learning_rate": 4.25815286687487e-05, + "loss": 2.7711, "step": 44800 }, { "epoch": 0.15, - "learning_rate": 4.2567086595924314e-05, - "loss": 2.7872, + "learning_rate": 4.2564969580955726e-05, + "loss": 2.7841, "step": 44900 }, { "epoch": 0.15, - "learning_rate": 4.255053222308673e-05, - "loss": 2.801, + "learning_rate": 4.2548410493162756e-05, + "loss": 2.7632, "step": 45000 }, { "epoch": 0.15, - "learning_rate": 4.2533977850249145e-05, - "loss": 2.8082, + "learning_rate": 4.253185140536978e-05, + "loss": 2.7884, "step": 45100 }, { "epoch": 0.15, - "learning_rate": 4.2517423477411564e-05, - "loss": 2.7926, + "learning_rate": 4.251529231757681e-05, + "loss": 2.7688, "step": 45200 }, { "epoch": 0.15, - "learning_rate": 4.2500869104573976e-05, - "loss": 2.7832, + "learning_rate": 4.249873322978384e-05, + "loss": 2.7905, "step": 45300 }, { "epoch": 0.15, - "learning_rate": 4.248431473173639e-05, - "loss": 2.7927, + "learning_rate": 4.2482174141990864e-05, + "loss": 2.7803, "step": 45400 }, { "epoch": 0.15, - "learning_rate": 4.246776035889881e-05, - "loss": 2.8016, + "learning_rate": 4.2465615054197894e-05, + "loss": 2.7682, "step": 45500 }, { "epoch": 0.15, - "learning_rate": 4.245120598606122e-05, - "loss": 2.8048, + "learning_rate": 4.2449055966404925e-05, + "loss": 2.7746, "step": 45600 }, { "epoch": 0.15, - "learning_rate": 4.243465161322364e-05, - "loss": 2.7821, + "learning_rate": 4.2432496878611955e-05, + "loss": 2.7714, "step": 45700 }, { "epoch": 0.15, - "learning_rate": 4.241809724038605e-05, - "loss": 2.7953, + "learning_rate": 4.2415937790818985e-05, + "loss": 2.7784, "step": 45800 }, { "epoch": 0.15, - "learning_rate": 4.2401542867548463e-05, - "loss": 2.8229, + "learning_rate": 4.239937870302601e-05, + "loss": 2.7796, "step": 45900 }, { "epoch": 0.15, - "learning_rate": 4.238498849471088e-05, - "loss": 2.7984, + "learning_rate": 4.238281961523304e-05, + "loss": 2.768, "step": 46000 }, { "epoch": 0.15, - "learning_rate": 4.2368434121873295e-05, - "loss": 2.79, + "learning_rate": 4.236626052744007e-05, + "loss": 2.787, "step": 46100 }, { "epoch": 0.15, - "learning_rate": 4.2351879749035714e-05, - "loss": 2.7909, + "learning_rate": 4.234970143964709e-05, + "loss": 2.7978, "step": 46200 }, { "epoch": 0.15, - "learning_rate": 4.2335325376198126e-05, - "loss": 2.7921, + "learning_rate": 4.233314235185412e-05, + "loss": 2.7752, "step": 46300 }, { "epoch": 0.15, - "learning_rate": 4.231877100336054e-05, - "loss": 2.7781, + "learning_rate": 4.2316583264061147e-05, + "loss": 2.766, "step": 46400 }, { "epoch": 0.15, - "learning_rate": 4.230221663052296e-05, - "loss": 2.7956, + "learning_rate": 4.230002417626818e-05, + "loss": 2.7893, "step": 46500 }, { "epoch": 0.15, - "learning_rate": 4.228566225768537e-05, - "loss": 2.7797, + "learning_rate": 4.228346508847521e-05, + "loss": 2.778, "step": 46600 }, { "epoch": 0.15, - "learning_rate": 4.226910788484779e-05, - "loss": 2.7934, + "learning_rate": 4.226690600068224e-05, + "loss": 2.7681, "step": 46700 }, { "epoch": 0.15, - "learning_rate": 4.22525535120102e-05, - "loss": 2.7917, + "learning_rate": 4.225034691288927e-05, + "loss": 2.7622, "step": 46800 }, { "epoch": 0.16, - "learning_rate": 4.223599913917261e-05, - "loss": 2.7984, + "learning_rate": 4.223378782509629e-05, + "loss": 2.7893, "step": 46900 }, { "epoch": 0.16, - "learning_rate": 4.221944476633503e-05, - "loss": 2.7823, + "learning_rate": 4.221722873730332e-05, + "loss": 2.8002, "step": 47000 }, { "epoch": 0.16, - "learning_rate": 4.2202890393497444e-05, - "loss": 2.807, + "learning_rate": 4.220066964951035e-05, + "loss": 2.7741, "step": 47100 }, { "epoch": 0.16, - "learning_rate": 4.2186336020659863e-05, - "loss": 2.7884, + "learning_rate": 4.2184110561717375e-05, + "loss": 2.7814, "step": 47200 }, { "epoch": 0.16, - "learning_rate": 4.2169781647822276e-05, - "loss": 2.7894, + "learning_rate": 4.2167551473924406e-05, + "loss": 2.7686, "step": 47300 }, { "epoch": 0.16, - "learning_rate": 4.215322727498469e-05, - "loss": 2.7958, + "learning_rate": 4.215099238613143e-05, + "loss": 2.7809, "step": 47400 }, { "epoch": 0.16, - "learning_rate": 4.213667290214711e-05, - "loss": 2.7967, + "learning_rate": 4.213443329833846e-05, + "loss": 2.7718, "step": 47500 }, { "epoch": 0.16, - "learning_rate": 4.212011852930952e-05, - "loss": 2.7945, + "learning_rate": 4.21178742105455e-05, + "loss": 2.764, "step": 47600 }, { "epoch": 0.16, - "learning_rate": 4.210356415647194e-05, - "loss": 2.7861, + "learning_rate": 4.210131512275252e-05, + "loss": 2.7808, "step": 47700 }, { "epoch": 0.16, - "learning_rate": 4.208700978363435e-05, - "loss": 2.7843, + "learning_rate": 4.208475603495955e-05, + "loss": 2.7696, "step": 47800 }, { "epoch": 0.16, - "learning_rate": 4.207045541079677e-05, - "loss": 2.7805, + "learning_rate": 4.2068196947166574e-05, + "loss": 2.7685, "step": 47900 }, { "epoch": 0.16, - "learning_rate": 4.205390103795918e-05, - "loss": 2.7883, + "learning_rate": 4.2051637859373604e-05, + "loss": 2.773, "step": 48000 }, { "epoch": 0.16, - "learning_rate": 4.2037346665121594e-05, - "loss": 2.7909, + "learning_rate": 4.2035078771580635e-05, + "loss": 2.7705, "step": 48100 }, { "epoch": 0.16, - "learning_rate": 4.202079229228401e-05, - "loss": 2.7922, + "learning_rate": 4.201851968378766e-05, + "loss": 2.7686, "step": 48200 }, { "epoch": 0.16, - "learning_rate": 4.2004237919446425e-05, - "loss": 2.7953, + "learning_rate": 4.200196059599469e-05, + "loss": 2.7807, "step": 48300 }, { "epoch": 0.16, - "learning_rate": 4.1987683546608844e-05, - "loss": 2.7883, + "learning_rate": 4.198540150820172e-05, + "loss": 2.7745, "step": 48400 }, { "epoch": 0.16, - "learning_rate": 4.197112917377126e-05, - "loss": 2.7902, + "learning_rate": 4.196884242040874e-05, + "loss": 2.7956, "step": 48500 }, { "epoch": 0.16, - "learning_rate": 4.195457480093367e-05, - "loss": 2.786, + "learning_rate": 4.195228333261578e-05, + "loss": 2.7742, "step": 48600 }, { "epoch": 0.16, - "learning_rate": 4.193802042809608e-05, - "loss": 2.787, + "learning_rate": 4.19357242448228e-05, + "loss": 2.7747, "step": 48700 }, { "epoch": 0.16, - "learning_rate": 4.1921466055258494e-05, - "loss": 2.7839, + "learning_rate": 4.191916515702983e-05, + "loss": 2.7816, "step": 48800 }, { "epoch": 0.16, - "learning_rate": 4.190491168242091e-05, - "loss": 2.7884, + "learning_rate": 4.1902606069236863e-05, + "loss": 2.7863, "step": 48900 }, { "epoch": 0.16, - "learning_rate": 4.1888357309583325e-05, - "loss": 2.7879, + "learning_rate": 4.188604698144389e-05, + "loss": 2.7786, "step": 49000 }, { "epoch": 0.16, - "learning_rate": 4.1871802936745744e-05, - "loss": 2.786, + "learning_rate": 4.186948789365092e-05, + "loss": 2.775, "step": 49100 }, { "epoch": 0.16, - "learning_rate": 4.1855248563908156e-05, - "loss": 2.7887, + "learning_rate": 4.185292880585794e-05, + "loss": 2.7834, "step": 49200 }, { "epoch": 0.16, - "learning_rate": 4.183869419107057e-05, - "loss": 2.7802, + "learning_rate": 4.183636971806497e-05, + "loss": 2.7661, "step": 49300 }, { "epoch": 0.16, - "learning_rate": 4.182213981823299e-05, - "loss": 2.7641, + "learning_rate": 4.1819810630272e-05, + "loss": 2.7726, "step": 49400 }, { "epoch": 0.16, - "learning_rate": 4.18055854453954e-05, - "loss": 2.7996, + "learning_rate": 4.1803251542479025e-05, + "loss": 2.7777, "step": 49500 }, { "epoch": 0.16, - "learning_rate": 4.178903107255782e-05, - "loss": 2.7944, + "learning_rate": 4.178669245468606e-05, + "loss": 2.7784, "step": 49600 }, { "epoch": 0.16, - "learning_rate": 4.177247669972023e-05, - "loss": 2.7959, + "learning_rate": 4.1770133366893085e-05, + "loss": 2.7934, "step": 49700 }, { "epoch": 0.16, - "learning_rate": 4.175592232688264e-05, - "loss": 2.7961, + "learning_rate": 4.1753574279100116e-05, + "loss": 2.7814, "step": 49800 }, { "epoch": 0.17, - "learning_rate": 4.173936795404506e-05, - "loss": 2.793, + "learning_rate": 4.1737015191307146e-05, + "loss": 2.7681, "step": 49900 }, { "epoch": 0.17, - "learning_rate": 4.1722813581207475e-05, - "loss": 2.8039, + "learning_rate": 4.172045610351417e-05, + "loss": 2.7667, "step": 50000 }, { "epoch": 0.17, - "learning_rate": 4.1706259208369894e-05, - "loss": 2.7899, + "learning_rate": 4.17038970157212e-05, + "loss": 2.783, "step": 50100 }, { "epoch": 0.17, - "learning_rate": 4.1689704835532306e-05, - "loss": 2.7861, + "learning_rate": 4.168733792792823e-05, + "loss": 2.767, "step": 50200 }, { "epoch": 0.17, - "learning_rate": 4.167315046269472e-05, - "loss": 2.7801, + "learning_rate": 4.1670778840135254e-05, + "loss": 2.7718, "step": 50300 }, { "epoch": 0.17, - "learning_rate": 4.165659608985714e-05, - "loss": 2.784, + "learning_rate": 4.1654219752342284e-05, + "loss": 2.7742, "step": 50400 }, { "epoch": 0.17, - "learning_rate": 4.164004171701955e-05, - "loss": 2.792, + "learning_rate": 4.163766066454931e-05, + "loss": 2.7618, "step": 50500 }, { "epoch": 0.17, - "learning_rate": 4.162348734418197e-05, - "loss": 2.796, + "learning_rate": 4.1621101576756345e-05, + "loss": 2.7596, "step": 50600 }, { "epoch": 0.17, - "learning_rate": 4.160693297134438e-05, - "loss": 2.8011, + "learning_rate": 4.1604542488963375e-05, + "loss": 2.7777, "step": 50700 }, { "epoch": 0.17, - "learning_rate": 4.159037859850679e-05, - "loss": 2.7767, + "learning_rate": 4.15879834011704e-05, + "loss": 2.7843, "step": 50800 }, { "epoch": 0.17, - "learning_rate": 4.157382422566921e-05, - "loss": 2.7933, + "learning_rate": 4.157142431337743e-05, + "loss": 2.7709, "step": 50900 }, { "epoch": 0.17, - "learning_rate": 4.1557269852831624e-05, - "loss": 2.7884, + "learning_rate": 4.155486522558445e-05, + "loss": 2.7638, "step": 51000 }, { "epoch": 0.17, - "learning_rate": 4.154071547999404e-05, - "loss": 2.7817, + "learning_rate": 4.153830613779148e-05, + "loss": 2.7725, "step": 51100 }, { "epoch": 0.17, - "learning_rate": 4.1524161107156456e-05, - "loss": 2.7961, + "learning_rate": 4.152174704999851e-05, + "loss": 2.7636, "step": 51200 }, { "epoch": 0.17, - "learning_rate": 4.1507606734318875e-05, - "loss": 2.7861, + "learning_rate": 4.1505187962205536e-05, + "loss": 2.7787, "step": 51300 }, { "epoch": 0.17, - "learning_rate": 4.149105236148129e-05, - "loss": 2.7916, + "learning_rate": 4.148862887441257e-05, + "loss": 2.7779, "step": 51400 }, { "epoch": 0.17, - "learning_rate": 4.14744979886437e-05, - "loss": 2.7818, + "learning_rate": 4.14720697866196e-05, + "loss": 2.7758, "step": 51500 }, { "epoch": 0.17, - "learning_rate": 4.145794361580612e-05, - "loss": 2.7878, + "learning_rate": 4.145551069882663e-05, + "loss": 2.7652, "step": 51600 }, { "epoch": 0.17, - "learning_rate": 4.144138924296853e-05, - "loss": 2.776, + "learning_rate": 4.143895161103366e-05, + "loss": 2.7841, "step": 51700 }, { "epoch": 0.17, - "learning_rate": 4.142483487013095e-05, - "loss": 2.7734, + "learning_rate": 4.142239252324068e-05, + "loss": 2.7681, "step": 51800 }, { "epoch": 0.17, - "learning_rate": 4.140828049729336e-05, - "loss": 2.7842, + "learning_rate": 4.140583343544771e-05, + "loss": 2.7802, "step": 51900 }, { "epoch": 0.17, - "learning_rate": 4.1391726124455774e-05, - "loss": 2.7845, + "learning_rate": 4.138927434765474e-05, + "loss": 2.7882, "step": 52000 }, { "epoch": 0.17, - "learning_rate": 4.137517175161819e-05, - "loss": 2.7828, + "learning_rate": 4.1372715259861765e-05, + "loss": 2.781, "step": 52100 }, { "epoch": 0.17, - "learning_rate": 4.1358617378780605e-05, - "loss": 2.7961, + "learning_rate": 4.1356156172068796e-05, + "loss": 2.7906, "step": 52200 }, { "epoch": 0.17, - "learning_rate": 4.1342063005943024e-05, - "loss": 2.7807, + "learning_rate": 4.133959708427582e-05, + "loss": 2.7847, "step": 52300 }, { "epoch": 0.17, - "learning_rate": 4.1325508633105437e-05, - "loss": 2.7985, + "learning_rate": 4.132303799648285e-05, + "loss": 2.7832, "step": 52400 }, { "epoch": 0.17, - "learning_rate": 4.130895426026785e-05, - "loss": 2.7775, + "learning_rate": 4.130647890868988e-05, + "loss": 2.7637, "step": 52500 }, { "epoch": 0.17, - "learning_rate": 4.129239988743027e-05, - "loss": 2.7883, + "learning_rate": 4.128991982089691e-05, + "loss": 2.7748, "step": 52600 }, { "epoch": 0.17, - "learning_rate": 4.127584551459268e-05, - "loss": 2.784, + "learning_rate": 4.127336073310394e-05, + "loss": 2.7669, "step": 52700 }, { "epoch": 0.17, - "learning_rate": 4.12592911417551e-05, - "loss": 2.7895, + "learning_rate": 4.1256801645310964e-05, + "loss": 2.7726, "step": 52800 }, { "epoch": 0.18, - "learning_rate": 4.124273676891751e-05, - "loss": 2.7774, + "learning_rate": 4.1240242557517994e-05, + "loss": 2.7811, "step": 52900 }, { "epoch": 0.18, - "learning_rate": 4.1226182396079924e-05, - "loss": 2.8022, + "learning_rate": 4.1223683469725024e-05, + "loss": 2.7769, "step": 53000 }, { "epoch": 0.18, - "learning_rate": 4.120962802324234e-05, - "loss": 2.7842, + "learning_rate": 4.120712438193205e-05, + "loss": 2.7668, "step": 53100 }, { "epoch": 0.18, - "learning_rate": 4.1193073650404755e-05, - "loss": 2.7796, + "learning_rate": 4.119056529413908e-05, + "loss": 2.786, "step": 53200 }, { "epoch": 0.18, - "learning_rate": 4.1176519277567174e-05, - "loss": 2.7882, + "learning_rate": 4.117400620634611e-05, + "loss": 2.7671, "step": 53300 }, { "epoch": 0.18, - "learning_rate": 4.1159964904729586e-05, - "loss": 2.7793, + "learning_rate": 4.115744711855313e-05, + "loss": 2.7594, "step": 53400 }, { "epoch": 0.18, - "learning_rate": 4.1143410531892e-05, - "loss": 2.7917, + "learning_rate": 4.114088803076016e-05, + "loss": 2.7735, "step": 53500 }, { "epoch": 0.18, - "learning_rate": 4.112685615905442e-05, - "loss": 2.7956, + "learning_rate": 4.112432894296719e-05, + "loss": 2.7822, "step": 53600 }, { "epoch": 0.18, - "learning_rate": 4.111030178621683e-05, - "loss": 2.7784, + "learning_rate": 4.110776985517422e-05, + "loss": 2.7798, "step": 53700 }, { "epoch": 0.18, - "learning_rate": 4.109374741337925e-05, - "loss": 2.7952, + "learning_rate": 4.1091210767381246e-05, + "loss": 2.7783, "step": 53800 }, { "epoch": 0.18, - "learning_rate": 4.107719304054166e-05, - "loss": 2.776, + "learning_rate": 4.107465167958828e-05, + "loss": 2.7799, "step": 53900 }, { "epoch": 0.18, - "learning_rate": 4.1060638667704073e-05, - "loss": 2.7925, + "learning_rate": 4.105809259179531e-05, + "loss": 2.7772, "step": 54000 }, { "epoch": 0.18, - "learning_rate": 4.104408429486649e-05, - "loss": 2.7847, + "learning_rate": 4.104153350400233e-05, + "loss": 2.7765, "step": 54100 }, { "epoch": 0.18, - "learning_rate": 4.1027529922028905e-05, - "loss": 2.7895, + "learning_rate": 4.102497441620936e-05, + "loss": 2.7779, "step": 54200 }, { "epoch": 0.18, - "learning_rate": 4.1010975549191324e-05, - "loss": 2.7828, + "learning_rate": 4.100841532841639e-05, + "loss": 2.7596, "step": 54300 }, { "epoch": 0.18, - "learning_rate": 4.0994421176353736e-05, - "loss": 2.7868, + "learning_rate": 4.0991856240623415e-05, + "loss": 2.7687, "step": 54400 }, { "epoch": 0.18, - "learning_rate": 4.0977866803516155e-05, - "loss": 2.7786, + "learning_rate": 4.0975297152830445e-05, + "loss": 2.7869, "step": 54500 }, { "epoch": 0.18, - "learning_rate": 4.096131243067857e-05, - "loss": 2.7958, + "learning_rate": 4.0958738065037475e-05, + "loss": 2.7692, "step": 54600 }, { "epoch": 0.18, - "learning_rate": 4.094475805784098e-05, - "loss": 2.7852, + "learning_rate": 4.0942178977244506e-05, + "loss": 2.7662, "step": 54700 }, { "epoch": 0.18, - "learning_rate": 4.09282036850034e-05, - "loss": 2.7888, + "learning_rate": 4.0925619889451536e-05, + "loss": 2.7748, "step": 54800 }, { "epoch": 0.18, - "learning_rate": 4.091164931216581e-05, - "loss": 2.7915, + "learning_rate": 4.090906080165856e-05, + "loss": 2.7703, "step": 54900 }, { "epoch": 0.18, - "learning_rate": 4.089509493932823e-05, - "loss": 2.7806, + "learning_rate": 4.089250171386559e-05, + "loss": 2.7659, "step": 55000 }, { "epoch": 0.18, - "learning_rate": 4.087854056649064e-05, - "loss": 2.7953, + "learning_rate": 4.087594262607261e-05, + "loss": 2.7831, "step": 55100 }, { "epoch": 0.18, - "learning_rate": 4.0861986193653054e-05, - "loss": 2.7831, + "learning_rate": 4.0859383538279644e-05, + "loss": 2.7628, "step": 55200 }, { "epoch": 0.18, - "learning_rate": 4.0845431820815473e-05, - "loss": 2.7888, + "learning_rate": 4.0842824450486674e-05, + "loss": 2.7835, "step": 55300 }, { "epoch": 0.18, - "learning_rate": 4.0828877447977886e-05, - "loss": 2.789, + "learning_rate": 4.08262653626937e-05, + "loss": 2.7772, "step": 55400 }, { "epoch": 0.18, - "learning_rate": 4.0812323075140305e-05, - "loss": 2.7955, + "learning_rate": 4.080970627490073e-05, + "loss": 2.7799, "step": 55500 }, { "epoch": 0.18, - "learning_rate": 4.079576870230272e-05, - "loss": 2.7809, + "learning_rate": 4.079314718710776e-05, + "loss": 2.8003, "step": 55600 }, { "epoch": 0.18, - "learning_rate": 4.077921432946513e-05, - "loss": 2.7905, + "learning_rate": 4.077658809931479e-05, + "loss": 2.7896, "step": 55700 }, { "epoch": 0.18, - "learning_rate": 4.076265995662755e-05, - "loss": 2.7944, + "learning_rate": 4.076002901152182e-05, + "loss": 2.7685, "step": 55800 }, { "epoch": 0.19, - "learning_rate": 4.074610558378996e-05, - "loss": 2.7847, + "learning_rate": 4.074346992372884e-05, + "loss": 2.7851, "step": 55900 }, { "epoch": 0.19, - "learning_rate": 4.072955121095238e-05, - "loss": 2.786, + "learning_rate": 4.072691083593587e-05, + "loss": 2.78, "step": 56000 }, { "epoch": 0.19, - "learning_rate": 4.071299683811479e-05, - "loss": 2.7725, + "learning_rate": 4.07103517481429e-05, + "loss": 2.7783, "step": 56100 }, { "epoch": 0.19, - "learning_rate": 4.0696442465277204e-05, - "loss": 2.7878, + "learning_rate": 4.0693792660349926e-05, + "loss": 2.7738, "step": 56200 }, { "epoch": 0.19, - "learning_rate": 4.067988809243962e-05, - "loss": 2.7796, + "learning_rate": 4.0677233572556957e-05, + "loss": 2.7777, "step": 56300 }, { "epoch": 0.19, - "learning_rate": 4.0663333719602035e-05, - "loss": 2.7756, + "learning_rate": 4.066067448476398e-05, + "loss": 2.7716, "step": 56400 }, { "epoch": 0.19, - "learning_rate": 4.0646779346764454e-05, - "loss": 2.7907, + "learning_rate": 4.064411539697101e-05, + "loss": 2.7767, "step": 56500 }, { "epoch": 0.19, - "learning_rate": 4.063022497392687e-05, - "loss": 2.7865, + "learning_rate": 4.062755630917805e-05, + "loss": 2.7845, "step": 56600 }, { "epoch": 0.19, - "learning_rate": 4.061367060108928e-05, - "loss": 2.7901, + "learning_rate": 4.061099722138507e-05, + "loss": 2.7696, "step": 56700 }, { "epoch": 0.19, - "learning_rate": 4.05971162282517e-05, - "loss": 2.8029, + "learning_rate": 4.05944381335921e-05, + "loss": 2.7712, "step": 56800 }, { "epoch": 0.19, - "learning_rate": 4.058056185541411e-05, - "loss": 2.7949, + "learning_rate": 4.0577879045799125e-05, + "loss": 2.7781, "step": 56900 }, { "epoch": 0.19, - "learning_rate": 4.056400748257653e-05, - "loss": 2.7748, + "learning_rate": 4.0561319958006155e-05, + "loss": 2.7841, "step": 57000 }, { "epoch": 0.19, - "learning_rate": 4.054745310973894e-05, - "loss": 2.8028, + "learning_rate": 4.0544760870213185e-05, + "loss": 2.7655, "step": 57100 }, { "epoch": 0.19, - "learning_rate": 4.0530898736901354e-05, - "loss": 2.7769, + "learning_rate": 4.052820178242021e-05, + "loss": 2.7694, "step": 57200 }, { "epoch": 0.19, - "learning_rate": 4.051434436406377e-05, - "loss": 2.7719, + "learning_rate": 4.051164269462724e-05, + "loss": 2.7764, "step": 57300 }, { "epoch": 0.19, - "learning_rate": 4.0497789991226185e-05, - "loss": 2.7808, + "learning_rate": 4.049508360683427e-05, + "loss": 2.7616, "step": 57400 }, { "epoch": 0.19, - "learning_rate": 4.04812356183886e-05, - "loss": 2.7854, + "learning_rate": 4.047852451904129e-05, + "loss": 2.7696, "step": 57500 }, { "epoch": 0.19, - "learning_rate": 4.046468124555101e-05, - "loss": 2.7802, + "learning_rate": 4.046196543124833e-05, + "loss": 2.7623, "step": 57600 }, { "epoch": 0.19, - "learning_rate": 4.044812687271343e-05, - "loss": 2.7759, + "learning_rate": 4.0445406343455354e-05, + "loss": 2.7686, "step": 57700 }, { "epoch": 0.19, - "learning_rate": 4.043157249987584e-05, - "loss": 2.7911, + "learning_rate": 4.0428847255662384e-05, + "loss": 2.7781, "step": 57800 }, { "epoch": 0.19, - "learning_rate": 4.041501812703826e-05, - "loss": 2.7907, + "learning_rate": 4.0412288167869414e-05, + "loss": 2.7686, "step": 57900 }, { "epoch": 0.19, - "learning_rate": 4.039846375420067e-05, - "loss": 2.8072, + "learning_rate": 4.039572908007644e-05, + "loss": 2.7831, "step": 58000 }, { "epoch": 0.19, - "learning_rate": 4.0381909381363085e-05, - "loss": 2.7954, + "learning_rate": 4.037916999228347e-05, + "loss": 2.7731, "step": 58100 }, { "epoch": 0.19, - "learning_rate": 4.0365355008525504e-05, - "loss": 2.7866, + "learning_rate": 4.036261090449049e-05, + "loss": 2.7776, "step": 58200 }, { "epoch": 0.19, - "learning_rate": 4.0348800635687916e-05, - "loss": 2.7845, + "learning_rate": 4.034605181669752e-05, + "loss": 2.7802, "step": 58300 }, { "epoch": 0.19, - "learning_rate": 4.0332246262850335e-05, - "loss": 2.8085, + "learning_rate": 4.032949272890455e-05, + "loss": 2.7728, "step": 58400 }, { "epoch": 0.19, - "learning_rate": 4.031569189001275e-05, - "loss": 2.7801, + "learning_rate": 4.0312933641111576e-05, + "loss": 2.761, "step": 58500 }, { "epoch": 0.19, - "learning_rate": 4.029913751717516e-05, - "loss": 2.7795, + "learning_rate": 4.029637455331861e-05, + "loss": 2.7785, "step": 58600 }, { "epoch": 0.19, - "learning_rate": 4.028258314433758e-05, - "loss": 2.7817, + "learning_rate": 4.0279815465525636e-05, + "loss": 2.7761, "step": 58700 }, { "epoch": 0.19, - "learning_rate": 4.026602877149999e-05, - "loss": 2.7872, + "learning_rate": 4.0263256377732667e-05, + "loss": 2.76, "step": 58800 }, { "epoch": 0.2, - "learning_rate": 4.024947439866241e-05, - "loss": 2.7774, + "learning_rate": 4.02466972899397e-05, + "loss": 2.7783, "step": 58900 }, { "epoch": 0.2, - "learning_rate": 4.023292002582482e-05, - "loss": 2.7946, + "learning_rate": 4.023013820214672e-05, + "loss": 2.7653, "step": 59000 }, { "epoch": 0.2, - "learning_rate": 4.0216365652987234e-05, - "loss": 2.7873, + "learning_rate": 4.021357911435375e-05, + "loss": 2.7826, "step": 59100 }, { "epoch": 0.2, - "learning_rate": 4.019981128014965e-05, - "loss": 2.7757, + "learning_rate": 4.019702002656078e-05, + "loss": 2.7748, "step": 59200 }, { "epoch": 0.2, - "learning_rate": 4.0183256907312066e-05, - "loss": 2.7628, + "learning_rate": 4.0180460938767805e-05, + "loss": 2.7869, "step": 59300 }, { "epoch": 0.2, - "learning_rate": 4.0166702534474485e-05, - "loss": 2.7944, + "learning_rate": 4.0163901850974835e-05, + "loss": 2.7672, "step": 59400 }, { "epoch": 0.2, - "learning_rate": 4.01501481616369e-05, - "loss": 2.7818, + "learning_rate": 4.014734276318186e-05, + "loss": 2.7845, "step": 59500 }, { "epoch": 0.2, - "learning_rate": 4.013359378879931e-05, - "loss": 2.7796, + "learning_rate": 4.0130783675388895e-05, + "loss": 2.7661, "step": 59600 }, { "epoch": 0.2, - "learning_rate": 4.011703941596173e-05, - "loss": 2.7803, + "learning_rate": 4.011422458759592e-05, + "loss": 2.7639, "step": 59700 }, { "epoch": 0.2, - "learning_rate": 4.010048504312414e-05, - "loss": 2.779, + "learning_rate": 4.009766549980295e-05, + "loss": 2.7762, "step": 59800 }, { "epoch": 0.2, - "learning_rate": 4.008393067028656e-05, - "loss": 2.7898, + "learning_rate": 4.008110641200998e-05, + "loss": 2.7801, "step": 59900 }, { "epoch": 0.2, - "learning_rate": 4.006737629744897e-05, - "loss": 2.7854, + "learning_rate": 4.0064547324217e-05, + "loss": 2.7627, "step": 60000 }, { "epoch": 0.2, - "learning_rate": 4.0050821924611384e-05, - "loss": 2.7845, + "learning_rate": 4.004798823642403e-05, + "loss": 2.7793, "step": 60100 }, { "epoch": 0.2, - "learning_rate": 4.00342675517738e-05, - "loss": 2.7952, + "learning_rate": 4.0031429148631064e-05, + "loss": 2.7856, "step": 60200 }, { "epoch": 0.2, - "learning_rate": 4.0017713178936215e-05, - "loss": 2.7945, + "learning_rate": 4.001487006083809e-05, + "loss": 2.784, "step": 60300 }, { "epoch": 0.2, - "learning_rate": 4.0001158806098634e-05, - "loss": 2.7772, + "learning_rate": 3.999831097304512e-05, + "loss": 2.7667, "step": 60400 }, { "epoch": 0.2, - "learning_rate": 3.9984604433261047e-05, - "loss": 2.7836, + "learning_rate": 3.998175188525215e-05, + "loss": 2.777, "step": 60500 }, { "epoch": 0.2, - "learning_rate": 3.996805006042346e-05, - "loss": 2.7879, + "learning_rate": 3.996519279745918e-05, + "loss": 2.7757, "step": 60600 }, { "epoch": 0.2, - "learning_rate": 3.995149568758588e-05, - "loss": 2.78, + "learning_rate": 3.994863370966621e-05, + "loss": 2.7821, "step": 60700 }, { "epoch": 0.2, - "learning_rate": 3.993494131474829e-05, - "loss": 2.7872, + "learning_rate": 3.993207462187323e-05, + "loss": 2.7787, "step": 60800 }, { "epoch": 0.2, - "learning_rate": 3.991838694191071e-05, - "loss": 2.7965, + "learning_rate": 3.991551553408026e-05, + "loss": 2.7794, "step": 60900 }, { "epoch": 0.2, - "learning_rate": 3.990183256907312e-05, - "loss": 2.785, + "learning_rate": 3.9898956446287286e-05, + "loss": 2.7627, "step": 61000 }, { "epoch": 0.2, - "learning_rate": 3.988527819623554e-05, - "loss": 2.7935, + "learning_rate": 3.9882397358494316e-05, + "loss": 2.7562, "step": 61100 }, { "epoch": 0.2, - "learning_rate": 3.986872382339795e-05, - "loss": 2.7897, + "learning_rate": 3.9865838270701346e-05, + "loss": 2.7877, "step": 61200 }, { "epoch": 0.2, - "learning_rate": 3.9852169450560365e-05, - "loss": 2.7843, + "learning_rate": 3.984927918290837e-05, + "loss": 2.7775, "step": 61300 }, { "epoch": 0.2, - "learning_rate": 3.9835615077722784e-05, - "loss": 2.7941, + "learning_rate": 3.98327200951154e-05, + "loss": 2.7629, "step": 61400 }, { "epoch": 0.2, - "learning_rate": 3.9819060704885196e-05, - "loss": 2.7945, + "learning_rate": 3.981616100732243e-05, + "loss": 2.7684, "step": 61500 }, { "epoch": 0.2, - "learning_rate": 3.9802506332047615e-05, - "loss": 2.7803, + "learning_rate": 3.979960191952946e-05, + "loss": 2.7833, "step": 61600 }, { "epoch": 0.2, - "learning_rate": 3.978595195921003e-05, - "loss": 2.7973, + "learning_rate": 3.978304283173649e-05, + "loss": 2.7686, "step": 61700 }, { "epoch": 0.2, - "learning_rate": 3.976939758637244e-05, - "loss": 2.7816, + "learning_rate": 3.9766483743943515e-05, + "loss": 2.7748, "step": 61800 }, { - "epoch": 0.2, - "learning_rate": 3.975284321353486e-05, - "loss": 2.7564, + "epoch": 0.21, + "learning_rate": 3.9749924656150545e-05, + "loss": 2.7687, "step": 61900 }, { "epoch": 0.21, - "learning_rate": 3.973628884069727e-05, - "loss": 2.7831, + "learning_rate": 3.9733365568357575e-05, + "loss": 2.7628, "step": 62000 }, { "epoch": 0.21, - "learning_rate": 3.971973446785969e-05, - "loss": 2.7849, + "learning_rate": 3.97168064805646e-05, + "loss": 2.7854, "step": 62100 }, { "epoch": 0.21, - "learning_rate": 3.97031800950221e-05, - "loss": 2.7755, + "learning_rate": 3.970024739277163e-05, + "loss": 2.7701, "step": 62200 }, { "epoch": 0.21, - "learning_rate": 3.9686625722184515e-05, - "loss": 2.7843, + "learning_rate": 3.968368830497865e-05, + "loss": 2.7823, "step": 62300 }, { "epoch": 0.21, - "learning_rate": 3.9670071349346934e-05, - "loss": 2.7807, + "learning_rate": 3.966712921718568e-05, + "loss": 2.7639, "step": 62400 }, { "epoch": 0.21, - "learning_rate": 3.9653516976509346e-05, - "loss": 2.7897, + "learning_rate": 3.965057012939271e-05, + "loss": 2.7754, "step": 62500 }, { "epoch": 0.21, - "learning_rate": 3.9636962603671765e-05, - "loss": 2.7871, + "learning_rate": 3.9634011041599743e-05, + "loss": 2.7737, "step": 62600 }, { "epoch": 0.21, - "learning_rate": 3.962040823083418e-05, - "loss": 2.7933, + "learning_rate": 3.9617451953806774e-05, + "loss": 2.7724, "step": 62700 }, { "epoch": 0.21, - "learning_rate": 3.960385385799659e-05, - "loss": 2.794, + "learning_rate": 3.96008928660138e-05, + "loss": 2.7747, "step": 62800 }, { "epoch": 0.21, - "learning_rate": 3.958729948515901e-05, - "loss": 2.7691, + "learning_rate": 3.958433377822083e-05, + "loss": 2.7731, "step": 62900 }, { "epoch": 0.21, - "learning_rate": 3.957074511232142e-05, - "loss": 2.7842, + "learning_rate": 3.956777469042786e-05, + "loss": 2.7666, "step": 63000 }, { "epoch": 0.21, - "learning_rate": 3.955419073948384e-05, - "loss": 2.792, + "learning_rate": 3.955121560263488e-05, + "loss": 2.7722, "step": 63100 }, { "epoch": 0.21, - "learning_rate": 3.953763636664625e-05, - "loss": 2.7882, + "learning_rate": 3.953465651484191e-05, + "loss": 2.7725, "step": 63200 }, { "epoch": 0.21, - "learning_rate": 3.9521081993808664e-05, - "loss": 2.7942, + "learning_rate": 3.951809742704894e-05, + "loss": 2.7758, "step": 63300 }, { "epoch": 0.21, - "learning_rate": 3.9504527620971083e-05, - "loss": 2.7781, + "learning_rate": 3.9501538339255965e-05, + "loss": 2.7809, "step": 63400 }, { "epoch": 0.21, - "learning_rate": 3.9487973248133496e-05, - "loss": 2.7821, + "learning_rate": 3.9484979251462996e-05, + "loss": 2.785, "step": 63500 }, { "epoch": 0.21, - "learning_rate": 3.9471418875295915e-05, - "loss": 2.7771, + "learning_rate": 3.9468420163670026e-05, + "loss": 2.7749, "step": 63600 }, { "epoch": 0.21, - "learning_rate": 3.945486450245833e-05, - "loss": 2.7866, + "learning_rate": 3.9451861075877056e-05, + "loss": 2.7759, "step": 63700 }, { "epoch": 0.21, - "learning_rate": 3.943831012962074e-05, - "loss": 2.769, + "learning_rate": 3.943530198808409e-05, + "loss": 2.7773, "step": 63800 }, { "epoch": 0.21, - "learning_rate": 3.942175575678316e-05, - "loss": 2.7881, + "learning_rate": 3.941874290029111e-05, + "loss": 2.7561, "step": 63900 }, { "epoch": 0.21, - "learning_rate": 3.940520138394557e-05, - "loss": 2.8001, + "learning_rate": 3.940218381249814e-05, + "loss": 2.7739, "step": 64000 }, { "epoch": 0.21, - "learning_rate": 3.938864701110799e-05, - "loss": 2.8044, + "learning_rate": 3.9385624724705164e-05, + "loss": 2.7602, "step": 64100 }, { "epoch": 0.21, - "learning_rate": 3.93720926382704e-05, - "loss": 2.7891, + "learning_rate": 3.9369065636912194e-05, + "loss": 2.7807, "step": 64200 }, { "epoch": 0.21, - "learning_rate": 3.935553826543282e-05, - "loss": 2.7721, + "learning_rate": 3.9352506549119225e-05, + "loss": 2.7672, "step": 64300 }, { "epoch": 0.21, - "learning_rate": 3.933898389259523e-05, - "loss": 2.7929, + "learning_rate": 3.933594746132625e-05, + "loss": 2.7706, "step": 64400 }, { "epoch": 0.21, - "learning_rate": 3.9322429519757645e-05, - "loss": 2.7804, + "learning_rate": 3.931938837353328e-05, + "loss": 2.7772, "step": 64500 }, { "epoch": 0.21, - "learning_rate": 3.9305875146920064e-05, - "loss": 2.7829, + "learning_rate": 3.930282928574031e-05, + "loss": 2.7707, "step": 64600 }, { "epoch": 0.21, - "learning_rate": 3.928932077408248e-05, - "loss": 2.7849, + "learning_rate": 3.928627019794734e-05, + "loss": 2.7742, "step": 64700 }, { "epoch": 0.21, - "learning_rate": 3.9272766401244896e-05, - "loss": 2.777, + "learning_rate": 3.926971111015437e-05, + "loss": 2.7632, "step": 64800 }, { "epoch": 0.21, - "learning_rate": 3.925621202840731e-05, - "loss": 2.7913, + "learning_rate": 3.925315202236139e-05, + "loss": 2.7765, "step": 64900 }, { "epoch": 0.22, - "learning_rate": 3.923965765556972e-05, - "loss": 2.7782, + "learning_rate": 3.923659293456842e-05, + "loss": 2.7612, "step": 65000 }, { "epoch": 0.22, - "learning_rate": 3.922310328273214e-05, - "loss": 2.7918, + "learning_rate": 3.9220033846775453e-05, + "loss": 2.7697, "step": 65100 }, { "epoch": 0.22, - "learning_rate": 3.920654890989455e-05, - "loss": 2.8006, + "learning_rate": 3.920347475898248e-05, + "loss": 2.7762, "step": 65200 }, { "epoch": 0.22, - "learning_rate": 3.918999453705697e-05, - "loss": 2.7949, + "learning_rate": 3.918691567118951e-05, + "loss": 2.7635, "step": 65300 }, { "epoch": 0.22, - "learning_rate": 3.917344016421938e-05, - "loss": 2.7864, + "learning_rate": 3.917035658339653e-05, + "loss": 2.7821, "step": 65400 }, { "epoch": 0.22, - "learning_rate": 3.9156885791381795e-05, - "loss": 2.788, + "learning_rate": 3.915379749560356e-05, + "loss": 2.7695, "step": 65500 }, { "epoch": 0.22, - "learning_rate": 3.9140331418544214e-05, - "loss": 2.7924, + "learning_rate": 3.913723840781059e-05, + "loss": 2.7734, "step": 65600 }, { "epoch": 0.22, - "learning_rate": 3.9123777045706626e-05, - "loss": 2.788, + "learning_rate": 3.912067932001762e-05, + "loss": 2.7684, "step": 65700 }, { "epoch": 0.22, - "learning_rate": 3.9107222672869045e-05, - "loss": 2.7858, + "learning_rate": 3.910412023222465e-05, + "loss": 2.7958, "step": 65800 }, { "epoch": 0.22, - "learning_rate": 3.909066830003146e-05, - "loss": 2.7777, + "learning_rate": 3.9087561144431676e-05, + "loss": 2.7738, "step": 65900 }, { "epoch": 0.22, - "learning_rate": 3.907411392719387e-05, - "loss": 2.7732, + "learning_rate": 3.9071002056638706e-05, + "loss": 2.7866, "step": 66000 }, { "epoch": 0.22, - "learning_rate": 3.905755955435629e-05, - "loss": 2.7837, + "learning_rate": 3.9054442968845736e-05, + "loss": 2.7766, "step": 66100 }, { "epoch": 0.22, - "learning_rate": 3.90410051815187e-05, - "loss": 2.7643, + "learning_rate": 3.903788388105276e-05, + "loss": 2.7637, "step": 66200 }, { "epoch": 0.22, - "learning_rate": 3.9024450808681114e-05, - "loss": 2.7805, + "learning_rate": 3.902132479325979e-05, + "loss": 2.7692, "step": 66300 }, { "epoch": 0.22, - "learning_rate": 3.9007896435843526e-05, - "loss": 2.7761, + "learning_rate": 3.900476570546682e-05, + "loss": 2.7877, "step": 66400 }, { "epoch": 0.22, - "learning_rate": 3.8991342063005945e-05, - "loss": 2.7846, + "learning_rate": 3.8988206617673844e-05, + "loss": 2.778, "step": 66500 }, { "epoch": 0.22, - "learning_rate": 3.897478769016836e-05, - "loss": 2.785, + "learning_rate": 3.897164752988088e-05, + "loss": 2.767, "step": 66600 }, { "epoch": 0.22, - "learning_rate": 3.895823331733077e-05, - "loss": 2.7691, + "learning_rate": 3.8955088442087904e-05, + "loss": 2.7792, "step": 66700 }, { "epoch": 0.22, - "learning_rate": 3.894167894449319e-05, - "loss": 2.7877, + "learning_rate": 3.8938529354294935e-05, + "loss": 2.7619, "step": 66800 }, { "epoch": 0.22, - "learning_rate": 3.89251245716556e-05, - "loss": 2.7766, + "learning_rate": 3.892197026650196e-05, + "loss": 2.7858, "step": 66900 }, { "epoch": 0.22, - "learning_rate": 3.890857019881802e-05, - "loss": 2.788, + "learning_rate": 3.890541117870899e-05, + "loss": 2.7774, "step": 67000 }, { "epoch": 0.22, - "learning_rate": 3.889201582598043e-05, - "loss": 2.7786, + "learning_rate": 3.888885209091602e-05, + "loss": 2.78, "step": 67100 }, { "epoch": 0.22, - "learning_rate": 3.8875461453142844e-05, - "loss": 2.7715, + "learning_rate": 3.887229300312304e-05, + "loss": 2.77, "step": 67200 }, { "epoch": 0.22, - "learning_rate": 3.885890708030526e-05, - "loss": 2.7873, + "learning_rate": 3.885573391533007e-05, + "loss": 2.7834, "step": 67300 }, { "epoch": 0.22, - "learning_rate": 3.8842352707467675e-05, - "loss": 2.7801, + "learning_rate": 3.88391748275371e-05, + "loss": 2.7817, "step": 67400 }, { "epoch": 0.22, - "learning_rate": 3.8825798334630095e-05, - "loss": 2.7794, + "learning_rate": 3.8822615739744126e-05, + "loss": 2.7656, "step": 67500 }, { "epoch": 0.22, - "learning_rate": 3.880924396179251e-05, - "loss": 2.7857, + "learning_rate": 3.8806056651951164e-05, + "loss": 2.7873, "step": 67600 }, { "epoch": 0.22, - "learning_rate": 3.879268958895492e-05, - "loss": 2.8005, + "learning_rate": 3.878949756415819e-05, + "loss": 2.7568, "step": 67700 }, { "epoch": 0.22, - "learning_rate": 3.877613521611734e-05, - "loss": 2.7799, + "learning_rate": 3.877293847636522e-05, + "loss": 2.7832, "step": 67800 }, { "epoch": 0.22, - "learning_rate": 3.875958084327975e-05, - "loss": 2.7819, + "learning_rate": 3.875637938857225e-05, + "loss": 2.7802, "step": 67900 }, { "epoch": 0.23, - "learning_rate": 3.874302647044217e-05, - "loss": 2.7755, + "learning_rate": 3.873982030077927e-05, + "loss": 2.7669, "step": 68000 }, { "epoch": 0.23, - "learning_rate": 3.872647209760458e-05, - "loss": 2.7761, + "learning_rate": 3.87232612129863e-05, + "loss": 2.7748, "step": 68100 }, { "epoch": 0.23, - "learning_rate": 3.8709917724767e-05, - "loss": 2.7881, + "learning_rate": 3.8706702125193325e-05, + "loss": 2.767, "step": 68200 }, { "epoch": 0.23, - "learning_rate": 3.869336335192941e-05, - "loss": 2.7799, + "learning_rate": 3.8690143037400355e-05, + "loss": 2.7727, "step": 68300 }, { "epoch": 0.23, - "learning_rate": 3.8676808979091825e-05, - "loss": 2.7933, + "learning_rate": 3.8673583949607386e-05, + "loss": 2.7725, "step": 68400 }, { "epoch": 0.23, - "learning_rate": 3.8660254606254244e-05, - "loss": 2.7883, + "learning_rate": 3.865702486181441e-05, + "loss": 2.7773, "step": 68500 }, { "epoch": 0.23, - "learning_rate": 3.8643700233416657e-05, - "loss": 2.7815, + "learning_rate": 3.8640465774021446e-05, + "loss": 2.7834, "step": 68600 }, { "epoch": 0.23, - "learning_rate": 3.8627145860579076e-05, - "loss": 2.7869, + "learning_rate": 3.862390668622847e-05, + "loss": 2.7777, "step": 68700 }, { "epoch": 0.23, - "learning_rate": 3.861059148774149e-05, - "loss": 2.7932, + "learning_rate": 3.86073475984355e-05, + "loss": 2.7757, "step": 68800 }, { "epoch": 0.23, - "learning_rate": 3.85940371149039e-05, - "loss": 2.7952, + "learning_rate": 3.859078851064253e-05, + "loss": 2.7624, "step": 68900 }, { "epoch": 0.23, - "learning_rate": 3.857748274206632e-05, - "loss": 2.7999, + "learning_rate": 3.8574229422849554e-05, + "loss": 2.7765, "step": 69000 }, { "epoch": 0.23, - "learning_rate": 3.856092836922873e-05, - "loss": 2.7995, + "learning_rate": 3.8557670335056584e-05, + "loss": 2.7782, "step": 69100 }, { "epoch": 0.23, - "learning_rate": 3.854437399639115e-05, - "loss": 2.7774, + "learning_rate": 3.8541111247263614e-05, + "loss": 2.7757, "step": 69200 }, { "epoch": 0.23, - "learning_rate": 3.852781962355356e-05, - "loss": 2.7667, + "learning_rate": 3.852455215947064e-05, + "loss": 2.775, "step": 69300 }, { "epoch": 0.23, - "learning_rate": 3.8511265250715975e-05, - "loss": 2.7726, + "learning_rate": 3.850799307167767e-05, + "loss": 2.7646, "step": 69400 }, { "epoch": 0.23, - "learning_rate": 3.8494710877878394e-05, - "loss": 2.7894, + "learning_rate": 3.849143398388469e-05, + "loss": 2.7669, "step": 69500 }, { "epoch": 0.23, - "learning_rate": 3.8478156505040806e-05, - "loss": 2.7695, + "learning_rate": 3.847487489609173e-05, + "loss": 2.7643, "step": 69600 }, { "epoch": 0.23, - "learning_rate": 3.8461602132203225e-05, - "loss": 2.7866, + "learning_rate": 3.845831580829876e-05, + "loss": 2.7815, "step": 69700 }, { "epoch": 0.23, - "learning_rate": 3.844504775936564e-05, - "loss": 2.7916, + "learning_rate": 3.844175672050578e-05, + "loss": 2.7665, "step": 69800 }, { "epoch": 0.23, - "learning_rate": 3.842849338652805e-05, - "loss": 2.7906, + "learning_rate": 3.842519763271281e-05, + "loss": 2.7642, "step": 69900 }, { "epoch": 0.23, - "learning_rate": 3.841193901369047e-05, - "loss": 2.7799, + "learning_rate": 3.8408638544919837e-05, + "loss": 2.7747, "step": 70000 }, { "epoch": 0.23, - "learning_rate": 3.839538464085288e-05, - "loss": 2.7896, + "learning_rate": 3.839207945712687e-05, + "loss": 2.7862, "step": 70100 }, { "epoch": 0.23, - "learning_rate": 3.83788302680153e-05, - "loss": 2.7952, + "learning_rate": 3.83755203693339e-05, + "loss": 2.755, "step": 70200 }, { "epoch": 0.23, - "learning_rate": 3.836227589517771e-05, - "loss": 2.7928, + "learning_rate": 3.835896128154092e-05, + "loss": 2.7713, "step": 70300 }, { "epoch": 0.23, - "learning_rate": 3.8345721522340125e-05, - "loss": 2.7826, + "learning_rate": 3.834240219374795e-05, + "loss": 2.7665, "step": 70400 }, { "epoch": 0.23, - "learning_rate": 3.8329167149502544e-05, - "loss": 2.7933, + "learning_rate": 3.832584310595498e-05, + "loss": 2.7831, "step": 70500 }, { "epoch": 0.23, - "learning_rate": 3.8312612776664956e-05, - "loss": 2.786, + "learning_rate": 3.830928401816201e-05, + "loss": 2.774, "step": 70600 }, { "epoch": 0.23, - "learning_rate": 3.8296058403827375e-05, - "loss": 2.7757, + "learning_rate": 3.829272493036904e-05, + "loss": 2.7802, "step": 70700 }, { "epoch": 0.23, - "learning_rate": 3.827950403098979e-05, - "loss": 2.769, + "learning_rate": 3.8276165842576065e-05, + "loss": 2.7653, "step": 70800 }, { "epoch": 0.23, - "learning_rate": 3.8262949658152206e-05, - "loss": 2.7921, + "learning_rate": 3.8259606754783096e-05, + "loss": 2.7576, "step": 70900 }, { "epoch": 0.24, - "learning_rate": 3.824639528531462e-05, - "loss": 2.7699, + "learning_rate": 3.8243047666990126e-05, + "loss": 2.7891, "step": 71000 }, { "epoch": 0.24, - "learning_rate": 3.822984091247703e-05, - "loss": 2.7813, + "learning_rate": 3.822648857919715e-05, + "loss": 2.7715, "step": 71100 }, { "epoch": 0.24, - "learning_rate": 3.821328653963945e-05, - "loss": 2.7869, + "learning_rate": 3.820992949140418e-05, + "loss": 2.764, "step": 71200 }, { "epoch": 0.24, - "learning_rate": 3.819673216680186e-05, - "loss": 2.7738, + "learning_rate": 3.81933704036112e-05, + "loss": 2.7699, "step": 71300 }, { "epoch": 0.24, - "learning_rate": 3.818017779396428e-05, - "loss": 2.7932, + "learning_rate": 3.8176811315818234e-05, + "loss": 2.7682, "step": 71400 }, { "epoch": 0.24, - "learning_rate": 3.8163623421126693e-05, - "loss": 2.7797, + "learning_rate": 3.8160252228025264e-05, + "loss": 2.7829, "step": 71500 }, { "epoch": 0.24, - "learning_rate": 3.8147069048289106e-05, - "loss": 2.8008, + "learning_rate": 3.8143693140232294e-05, + "loss": 2.7736, "step": 71600 }, { "epoch": 0.24, - "learning_rate": 3.8130514675451525e-05, - "loss": 2.7844, + "learning_rate": 3.8127134052439325e-05, + "loss": 2.789, "step": 71700 }, { "epoch": 0.24, - "learning_rate": 3.811396030261394e-05, - "loss": 2.7728, + "learning_rate": 3.811057496464635e-05, + "loss": 2.7784, "step": 71800 }, { "epoch": 0.24, - "learning_rate": 3.8097405929776356e-05, - "loss": 2.7752, + "learning_rate": 3.809401587685338e-05, + "loss": 2.7782, "step": 71900 }, { "epoch": 0.24, - "learning_rate": 3.808085155693877e-05, - "loss": 2.7797, + "learning_rate": 3.807745678906041e-05, + "loss": 2.776, "step": 72000 }, { "epoch": 0.24, - "learning_rate": 3.806429718410118e-05, - "loss": 2.7863, + "learning_rate": 3.806089770126743e-05, + "loss": 2.7795, "step": 72100 }, { "epoch": 0.24, - "learning_rate": 3.80477428112636e-05, - "loss": 2.7871, + "learning_rate": 3.804433861347446e-05, + "loss": 2.7474, "step": 72200 }, { "epoch": 0.24, - "learning_rate": 3.803118843842601e-05, - "loss": 2.7935, + "learning_rate": 3.802777952568149e-05, + "loss": 2.7784, "step": 72300 }, { "epoch": 0.24, - "learning_rate": 3.801463406558843e-05, - "loss": 2.7985, + "learning_rate": 3.8011220437888516e-05, + "loss": 2.7837, "step": 72400 }, { "epoch": 0.24, - "learning_rate": 3.799807969275084e-05, - "loss": 2.7752, + "learning_rate": 3.7994661350095547e-05, + "loss": 2.7615, "step": 72500 }, { "epoch": 0.24, - "learning_rate": 3.7981525319913255e-05, - "loss": 2.7957, + "learning_rate": 3.797810226230258e-05, + "loss": 2.7742, "step": 72600 }, { "epoch": 0.24, - "learning_rate": 3.7964970947075674e-05, - "loss": 2.7873, + "learning_rate": 3.796154317450961e-05, + "loss": 2.7735, "step": 72700 }, { "epoch": 0.24, - "learning_rate": 3.794841657423809e-05, - "loss": 2.7805, + "learning_rate": 3.794498408671663e-05, + "loss": 2.7739, "step": 72800 }, { "epoch": 0.24, - "learning_rate": 3.7931862201400506e-05, - "loss": 2.785, + "learning_rate": 3.792842499892366e-05, + "loss": 2.7716, "step": 72900 }, { "epoch": 0.24, - "learning_rate": 3.791530782856292e-05, - "loss": 2.7834, + "learning_rate": 3.791186591113069e-05, + "loss": 2.7806, "step": 73000 }, { "epoch": 0.24, - "learning_rate": 3.789875345572533e-05, - "loss": 2.7753, + "learning_rate": 3.7895306823337715e-05, + "loss": 2.7703, "step": 73100 }, { "epoch": 0.24, - "learning_rate": 3.788219908288775e-05, - "loss": 2.7878, + "learning_rate": 3.7878747735544745e-05, + "loss": 2.7753, "step": 73200 }, { "epoch": 0.24, - "learning_rate": 3.786564471005016e-05, - "loss": 2.779, + "learning_rate": 3.7862188647751775e-05, + "loss": 2.7746, "step": 73300 }, { "epoch": 0.24, - "learning_rate": 3.784909033721258e-05, - "loss": 2.7965, + "learning_rate": 3.78456295599588e-05, + "loss": 2.7836, "step": 73400 }, { "epoch": 0.24, - "learning_rate": 3.783253596437499e-05, - "loss": 2.778, + "learning_rate": 3.782907047216583e-05, + "loss": 2.7821, "step": 73500 }, { "epoch": 0.24, - "learning_rate": 3.7815981591537405e-05, - "loss": 2.7577, + "learning_rate": 3.781251138437286e-05, + "loss": 2.7711, "step": 73600 }, { "epoch": 0.24, - "learning_rate": 3.7799427218699824e-05, - "loss": 2.785, + "learning_rate": 3.779595229657989e-05, + "loss": 2.762, "step": 73700 }, { "epoch": 0.24, - "learning_rate": 3.7782872845862236e-05, - "loss": 2.7708, + "learning_rate": 3.777939320878692e-05, + "loss": 2.7759, "step": 73800 }, { "epoch": 0.24, - "learning_rate": 3.7766318473024655e-05, - "loss": 2.7798, + "learning_rate": 3.7762834120993944e-05, + "loss": 2.7617, "step": 73900 }, { "epoch": 0.25, - "learning_rate": 3.774976410018707e-05, - "loss": 2.7852, + "learning_rate": 3.7746275033200974e-05, + "loss": 2.7818, "step": 74000 }, { "epoch": 0.25, - "learning_rate": 3.773320972734949e-05, - "loss": 2.7719, + "learning_rate": 3.7729715945408e-05, + "loss": 2.7662, "step": 74100 }, { "epoch": 0.25, - "learning_rate": 3.77166553545119e-05, - "loss": 2.7878, + "learning_rate": 3.771315685761503e-05, + "loss": 2.783, "step": 74200 }, { "epoch": 0.25, - "learning_rate": 3.770010098167431e-05, - "loss": 2.7726, + "learning_rate": 3.769659776982206e-05, + "loss": 2.773, "step": 74300 }, { "epoch": 0.25, - "learning_rate": 3.768354660883673e-05, - "loss": 2.7892, + "learning_rate": 3.768003868202908e-05, + "loss": 2.7712, "step": 74400 }, { "epoch": 0.25, - "learning_rate": 3.766699223599914e-05, - "loss": 2.7746, + "learning_rate": 3.766347959423611e-05, + "loss": 2.7709, "step": 74500 }, { "epoch": 0.25, - "learning_rate": 3.765043786316156e-05, - "loss": 2.7842, + "learning_rate": 3.764692050644314e-05, + "loss": 2.7641, "step": 74600 }, { "epoch": 0.25, - "learning_rate": 3.7633883490323974e-05, - "loss": 2.7798, + "learning_rate": 3.763036141865017e-05, + "loss": 2.7659, "step": 74700 }, { "epoch": 0.25, - "learning_rate": 3.7617329117486386e-05, - "loss": 2.7729, + "learning_rate": 3.76138023308572e-05, + "loss": 2.7594, "step": 74800 }, { "epoch": 0.25, - "learning_rate": 3.7600774744648805e-05, - "loss": 2.7837, + "learning_rate": 3.7597243243064226e-05, + "loss": 2.7561, "step": 74900 }, { "epoch": 0.25, - "learning_rate": 3.758422037181122e-05, - "loss": 2.7797, + "learning_rate": 3.758068415527126e-05, + "loss": 2.7747, "step": 75000 }, { "epoch": 0.25, - "learning_rate": 3.756766599897363e-05, - "loss": 2.7864, + "learning_rate": 3.756412506747829e-05, + "loss": 2.7664, "step": 75100 }, { "epoch": 0.25, - "learning_rate": 3.755111162613604e-05, - "loss": 2.7711, + "learning_rate": 3.754756597968531e-05, + "loss": 2.7907, "step": 75200 }, { "epoch": 0.25, - "learning_rate": 3.753455725329846e-05, - "loss": 2.7977, + "learning_rate": 3.753100689189234e-05, + "loss": 2.7689, "step": 75300 }, { "epoch": 0.25, - "learning_rate": 3.751800288046087e-05, - "loss": 2.7568, + "learning_rate": 3.7514447804099364e-05, + "loss": 2.7684, "step": 75400 }, { "epoch": 0.25, - "learning_rate": 3.7501448507623285e-05, - "loss": 2.7896, + "learning_rate": 3.7497888716306395e-05, + "loss": 2.7677, "step": 75500 }, { "epoch": 0.25, - "learning_rate": 3.7484894134785705e-05, - "loss": 2.7903, + "learning_rate": 3.748132962851343e-05, + "loss": 2.7735, "step": 75600 }, { "epoch": 0.25, - "learning_rate": 3.746833976194812e-05, - "loss": 2.7901, + "learning_rate": 3.7464770540720455e-05, + "loss": 2.757, "step": 75700 }, { "epoch": 0.25, - "learning_rate": 3.7451785389110536e-05, - "loss": 2.8011, + "learning_rate": 3.7448211452927485e-05, + "loss": 2.7717, "step": 75800 }, { "epoch": 0.25, - "learning_rate": 3.743523101627295e-05, - "loss": 2.7866, + "learning_rate": 3.743165236513451e-05, + "loss": 2.7637, "step": 75900 }, { "epoch": 0.25, - "learning_rate": 3.741867664343536e-05, - "loss": 2.7612, + "learning_rate": 3.741509327734154e-05, + "loss": 2.7833, "step": 76000 }, { "epoch": 0.25, - "learning_rate": 3.740212227059778e-05, - "loss": 2.7826, + "learning_rate": 3.739853418954857e-05, + "loss": 2.7627, "step": 76100 }, { "epoch": 0.25, - "learning_rate": 3.738556789776019e-05, - "loss": 2.7816, + "learning_rate": 3.738197510175559e-05, + "loss": 2.7698, "step": 76200 }, { "epoch": 0.25, - "learning_rate": 3.736901352492261e-05, - "loss": 2.7721, + "learning_rate": 3.7365416013962623e-05, + "loss": 2.7685, "step": 76300 }, { "epoch": 0.25, - "learning_rate": 3.735245915208502e-05, - "loss": 2.7768, + "learning_rate": 3.7348856926169654e-05, + "loss": 2.7677, "step": 76400 }, { "epoch": 0.25, - "learning_rate": 3.7335904779247435e-05, - "loss": 2.7814, + "learning_rate": 3.733229783837668e-05, + "loss": 2.7688, "step": 76500 }, { "epoch": 0.25, - "learning_rate": 3.7319350406409854e-05, - "loss": 2.8019, + "learning_rate": 3.7315738750583714e-05, + "loss": 2.7634, "step": 76600 }, { "epoch": 0.25, - "learning_rate": 3.7302796033572266e-05, - "loss": 2.7722, + "learning_rate": 3.729917966279074e-05, + "loss": 2.7738, "step": 76700 }, { "epoch": 0.25, - "learning_rate": 3.7286241660734686e-05, - "loss": 2.774, + "learning_rate": 3.728262057499777e-05, + "loss": 2.7739, "step": 76800 }, { "epoch": 0.25, - "learning_rate": 3.72696872878971e-05, - "loss": 2.7802, + "learning_rate": 3.72660614872048e-05, + "loss": 2.7631, "step": 76900 }, { - "epoch": 0.25, - "learning_rate": 3.725313291505951e-05, - "loss": 2.7649, + "epoch": 0.26, + "learning_rate": 3.724950239941182e-05, + "loss": 2.7487, "step": 77000 }, { "epoch": 0.26, - "learning_rate": 3.723657854222193e-05, - "loss": 2.7955, + "learning_rate": 3.723294331161885e-05, + "loss": 2.7657, "step": 77100 }, { "epoch": 0.26, - "learning_rate": 3.722002416938434e-05, - "loss": 2.7826, + "learning_rate": 3.7216384223825876e-05, + "loss": 2.7645, "step": 77200 }, { "epoch": 0.26, - "learning_rate": 3.720346979654676e-05, - "loss": 2.7824, + "learning_rate": 3.7199825136032906e-05, + "loss": 2.7838, "step": 77300 }, { "epoch": 0.26, - "learning_rate": 3.718691542370917e-05, - "loss": 2.786, + "learning_rate": 3.7183266048239936e-05, + "loss": 2.7717, "step": 77400 }, { "epoch": 0.26, - "learning_rate": 3.7170361050871585e-05, - "loss": 2.7742, + "learning_rate": 3.716670696044696e-05, + "loss": 2.7816, "step": 77500 }, { "epoch": 0.26, - "learning_rate": 3.7153806678034004e-05, - "loss": 2.7978, + "learning_rate": 3.7150147872654e-05, + "loss": 2.7741, "step": 77600 }, { "epoch": 0.26, - "learning_rate": 3.7137252305196416e-05, - "loss": 2.7786, + "learning_rate": 3.713358878486102e-05, + "loss": 2.7727, "step": 77700 }, { "epoch": 0.26, - "learning_rate": 3.7120697932358835e-05, - "loss": 2.7851, + "learning_rate": 3.711702969706805e-05, + "loss": 2.7721, "step": 77800 }, { "epoch": 0.26, - "learning_rate": 3.710414355952125e-05, - "loss": 2.7843, + "learning_rate": 3.710047060927508e-05, + "loss": 2.7768, "step": 77900 }, { "epoch": 0.26, - "learning_rate": 3.7087589186683667e-05, - "loss": 2.7825, + "learning_rate": 3.7083911521482105e-05, + "loss": 2.7654, "step": 78000 }, { "epoch": 0.26, - "learning_rate": 3.707103481384608e-05, - "loss": 2.7867, + "learning_rate": 3.7067352433689135e-05, + "loss": 2.7549, "step": 78100 }, { "epoch": 0.26, - "learning_rate": 3.705448044100849e-05, - "loss": 2.7857, + "learning_rate": 3.7050793345896165e-05, + "loss": 2.7627, "step": 78200 }, { "epoch": 0.26, - "learning_rate": 3.703792606817091e-05, - "loss": 2.7991, + "learning_rate": 3.703423425810319e-05, + "loss": 2.7784, "step": 78300 }, { "epoch": 0.26, - "learning_rate": 3.702137169533332e-05, - "loss": 2.7756, + "learning_rate": 3.701767517031022e-05, + "loss": 2.7635, "step": 78400 }, { "epoch": 0.26, - "learning_rate": 3.700481732249574e-05, - "loss": 2.7812, + "learning_rate": 3.700111608251724e-05, + "loss": 2.7707, "step": 78500 }, { "epoch": 0.26, - "learning_rate": 3.6988262949658154e-05, - "loss": 2.7876, + "learning_rate": 3.698455699472428e-05, + "loss": 2.7545, "step": 78600 }, { "epoch": 0.26, - "learning_rate": 3.6971708576820566e-05, - "loss": 2.7841, + "learning_rate": 3.69679979069313e-05, + "loss": 2.7658, "step": 78700 }, { "epoch": 0.26, - "learning_rate": 3.6955154203982985e-05, - "loss": 2.784, + "learning_rate": 3.6951438819138333e-05, + "loss": 2.7701, "step": 78800 }, { "epoch": 0.26, - "learning_rate": 3.69385998311454e-05, - "loss": 2.7897, + "learning_rate": 3.6934879731345364e-05, + "loss": 2.7816, "step": 78900 }, { "epoch": 0.26, - "learning_rate": 3.6922045458307816e-05, - "loss": 2.7655, + "learning_rate": 3.691832064355239e-05, + "loss": 2.771, "step": 79000 }, { "epoch": 0.26, - "learning_rate": 3.690549108547023e-05, - "loss": 2.78, + "learning_rate": 3.690176155575942e-05, + "loss": 2.7745, "step": 79100 }, { "epoch": 0.26, - "learning_rate": 3.688893671263264e-05, - "loss": 2.7796, + "learning_rate": 3.688520246796645e-05, + "loss": 2.7756, "step": 79200 }, { "epoch": 0.26, - "learning_rate": 3.687238233979506e-05, - "loss": 2.7784, + "learning_rate": 3.686864338017347e-05, + "loss": 2.7655, "step": 79300 }, { "epoch": 0.26, - "learning_rate": 3.685582796695747e-05, - "loss": 2.7825, + "learning_rate": 3.68520842923805e-05, + "loss": 2.7838, "step": 79400 }, { "epoch": 0.26, - "learning_rate": 3.683927359411989e-05, - "loss": 2.7776, + "learning_rate": 3.683552520458753e-05, + "loss": 2.7654, "step": 79500 }, { "epoch": 0.26, - "learning_rate": 3.68227192212823e-05, - "loss": 2.7916, + "learning_rate": 3.681896611679456e-05, + "loss": 2.7728, "step": 79600 }, { "epoch": 0.26, - "learning_rate": 3.6806164848444716e-05, - "loss": 2.7847, + "learning_rate": 3.680240702900159e-05, + "loss": 2.7612, "step": 79700 }, { "epoch": 0.26, - "learning_rate": 3.6789610475607135e-05, - "loss": 2.7812, + "learning_rate": 3.6785847941208616e-05, + "loss": 2.7618, "step": 79800 }, { "epoch": 0.26, - "learning_rate": 3.677305610276955e-05, - "loss": 2.7757, + "learning_rate": 3.6769288853415646e-05, + "loss": 2.7593, "step": 79900 }, { "epoch": 0.26, - "learning_rate": 3.6756501729931966e-05, - "loss": 2.7842, + "learning_rate": 3.675272976562267e-05, + "loss": 2.7689, "step": 80000 }, { "epoch": 0.27, - "learning_rate": 3.673994735709438e-05, - "loss": 2.7696, + "learning_rate": 3.67361706778297e-05, + "loss": 2.7535, "step": 80100 }, { "epoch": 0.27, - "learning_rate": 3.672339298425679e-05, - "loss": 2.788, + "learning_rate": 3.671961159003673e-05, + "loss": 2.7723, "step": 80200 }, { "epoch": 0.27, - "learning_rate": 3.670683861141921e-05, - "loss": 2.7721, + "learning_rate": 3.6703052502243754e-05, + "loss": 2.759, "step": 80300 }, { "epoch": 0.27, - "learning_rate": 3.669028423858162e-05, - "loss": 2.7725, + "learning_rate": 3.6686493414450784e-05, + "loss": 2.7656, "step": 80400 }, { "epoch": 0.27, - "learning_rate": 3.667372986574404e-05, - "loss": 2.7692, + "learning_rate": 3.6669934326657815e-05, + "loss": 2.7597, "step": 80500 }, { "epoch": 0.27, - "learning_rate": 3.665717549290645e-05, - "loss": 2.7789, + "learning_rate": 3.6653375238864845e-05, + "loss": 2.7767, "step": 80600 }, { "epoch": 0.27, - "learning_rate": 3.6640621120068865e-05, - "loss": 2.7867, + "learning_rate": 3.6636816151071875e-05, + "loss": 2.7654, "step": 80700 }, { "epoch": 0.27, - "learning_rate": 3.6624066747231284e-05, - "loss": 2.7753, + "learning_rate": 3.66202570632789e-05, + "loss": 2.7803, "step": 80800 }, { "epoch": 0.27, - "learning_rate": 3.66075123743937e-05, - "loss": 2.7854, + "learning_rate": 3.660369797548593e-05, + "loss": 2.7708, "step": 80900 }, { "epoch": 0.27, - "learning_rate": 3.6590958001556116e-05, - "loss": 2.786, + "learning_rate": 3.658713888769296e-05, + "loss": 2.7643, "step": 81000 }, { "epoch": 0.27, - "learning_rate": 3.657440362871853e-05, - "loss": 2.7859, + "learning_rate": 3.657057979989998e-05, + "loss": 2.7694, "step": 81100 }, { "epoch": 0.27, - "learning_rate": 3.655784925588095e-05, - "loss": 2.7829, + "learning_rate": 3.655402071210701e-05, + "loss": 2.7566, "step": 81200 }, { "epoch": 0.27, - "learning_rate": 3.654129488304336e-05, - "loss": 2.7808, + "learning_rate": 3.653746162431404e-05, + "loss": 2.7798, "step": 81300 }, { "epoch": 0.27, - "learning_rate": 3.652474051020577e-05, - "loss": 2.7707, + "learning_rate": 3.652090253652107e-05, + "loss": 2.7593, "step": 81400 }, { "epoch": 0.27, - "learning_rate": 3.650818613736819e-05, - "loss": 2.7821, + "learning_rate": 3.65043434487281e-05, + "loss": 2.7631, "step": 81500 }, { "epoch": 0.27, - "learning_rate": 3.64916317645306e-05, - "loss": 2.7832, + "learning_rate": 3.648778436093513e-05, + "loss": 2.7619, "step": 81600 }, { "epoch": 0.27, - "learning_rate": 3.647507739169302e-05, - "loss": 2.7862, + "learning_rate": 3.647122527314216e-05, + "loss": 2.7562, "step": 81700 }, { "epoch": 0.27, - "learning_rate": 3.6458523018855434e-05, - "loss": 2.7863, + "learning_rate": 3.645466618534918e-05, + "loss": 2.7616, "step": 81800 }, { "epoch": 0.27, - "learning_rate": 3.6441968646017846e-05, - "loss": 2.7911, + "learning_rate": 3.643810709755621e-05, + "loss": 2.7752, "step": 81900 }, { "epoch": 0.27, - "learning_rate": 3.6425414273180265e-05, - "loss": 2.7749, + "learning_rate": 3.642154800976324e-05, + "loss": 2.7622, "step": 82000 }, { "epoch": 0.27, - "learning_rate": 3.640885990034268e-05, - "loss": 2.7949, + "learning_rate": 3.6404988921970266e-05, + "loss": 2.7738, "step": 82100 }, { "epoch": 0.27, - "learning_rate": 3.63923055275051e-05, - "loss": 2.7813, + "learning_rate": 3.6388429834177296e-05, + "loss": 2.7691, "step": 82200 }, { "epoch": 0.27, - "learning_rate": 3.637575115466751e-05, - "loss": 2.7744, + "learning_rate": 3.6371870746384326e-05, + "loss": 2.7709, "step": 82300 }, { "epoch": 0.27, - "learning_rate": 3.635919678182992e-05, - "loss": 2.7751, + "learning_rate": 3.635531165859135e-05, + "loss": 2.789, "step": 82400 }, { "epoch": 0.27, - "learning_rate": 3.634264240899234e-05, - "loss": 2.7608, + "learning_rate": 3.633875257079838e-05, + "loss": 2.7553, "step": 82500 }, { "epoch": 0.27, - "learning_rate": 3.632608803615475e-05, - "loss": 2.7661, + "learning_rate": 3.632219348300541e-05, + "loss": 2.7778, "step": 82600 }, { "epoch": 0.27, - "learning_rate": 3.630953366331717e-05, - "loss": 2.7813, + "learning_rate": 3.630563439521244e-05, + "loss": 2.7791, "step": 82700 }, { "epoch": 0.27, - "learning_rate": 3.6292979290479584e-05, - "loss": 2.7774, + "learning_rate": 3.628907530741947e-05, + "loss": 2.777, "step": 82800 }, { "epoch": 0.27, - "learning_rate": 3.6276424917641996e-05, - "loss": 2.7659, + "learning_rate": 3.6272516219626494e-05, + "loss": 2.7583, "step": 82900 }, { "epoch": 0.27, - "learning_rate": 3.6259870544804415e-05, - "loss": 2.7797, + "learning_rate": 3.6255957131833525e-05, + "loss": 2.7702, "step": 83000 }, { "epoch": 0.28, - "learning_rate": 3.624331617196683e-05, - "loss": 2.7668, + "learning_rate": 3.623939804404055e-05, + "loss": 2.778, "step": 83100 }, { "epoch": 0.28, - "learning_rate": 3.6226761799129246e-05, - "loss": 2.7911, + "learning_rate": 3.622283895624758e-05, + "loss": 2.7604, "step": 83200 }, { "epoch": 0.28, - "learning_rate": 3.621020742629166e-05, - "loss": 2.7762, + "learning_rate": 3.620627986845461e-05, + "loss": 2.7674, "step": 83300 }, { "epoch": 0.28, - "learning_rate": 3.619365305345407e-05, - "loss": 2.7801, + "learning_rate": 3.618972078066163e-05, + "loss": 2.781, "step": 83400 }, { "epoch": 0.28, - "learning_rate": 3.617709868061649e-05, - "loss": 2.77, + "learning_rate": 3.617316169286866e-05, + "loss": 2.7616, "step": 83500 }, { "epoch": 0.28, - "learning_rate": 3.61605443077789e-05, - "loss": 2.7831, + "learning_rate": 3.615660260507569e-05, + "loss": 2.7769, "step": 83600 }, { "epoch": 0.28, - "learning_rate": 3.614398993494132e-05, - "loss": 2.7713, + "learning_rate": 3.614004351728272e-05, + "loss": 2.7688, "step": 83700 }, { "epoch": 0.28, - "learning_rate": 3.6127435562103734e-05, - "loss": 2.7808, + "learning_rate": 3.6123484429489754e-05, + "loss": 2.7652, "step": 83800 }, { "epoch": 0.28, - "learning_rate": 3.6110881189266146e-05, - "loss": 2.7715, + "learning_rate": 3.610692534169678e-05, + "loss": 2.7685, "step": 83900 }, { "epoch": 0.28, - "learning_rate": 3.609432681642856e-05, - "loss": 2.7768, + "learning_rate": 3.609036625390381e-05, + "loss": 2.7694, "step": 84000 }, { "epoch": 0.28, - "learning_rate": 3.607777244359097e-05, - "loss": 2.7809, + "learning_rate": 3.607380716611084e-05, + "loss": 2.7786, "step": 84100 }, { "epoch": 0.28, - "learning_rate": 3.606121807075339e-05, - "loss": 2.7715, + "learning_rate": 3.605724807831786e-05, + "loss": 2.7719, "step": 84200 }, { "epoch": 0.28, - "learning_rate": 3.60446636979158e-05, - "loss": 2.7804, + "learning_rate": 3.604068899052489e-05, + "loss": 2.7769, "step": 84300 }, { "epoch": 0.28, - "learning_rate": 3.602810932507822e-05, - "loss": 2.7654, + "learning_rate": 3.6024129902731915e-05, + "loss": 2.7651, "step": 84400 }, { "epoch": 0.28, - "learning_rate": 3.601155495224063e-05, - "loss": 2.7762, + "learning_rate": 3.6007570814938945e-05, + "loss": 2.7535, "step": 84500 }, { "epoch": 0.28, - "learning_rate": 3.599500057940305e-05, - "loss": 2.7809, + "learning_rate": 3.5991011727145976e-05, + "loss": 2.757, "step": 84600 }, { "epoch": 0.28, - "learning_rate": 3.5978446206565464e-05, - "loss": 2.793, + "learning_rate": 3.5974452639353006e-05, + "loss": 2.7571, "step": 84700 }, { "epoch": 0.28, - "learning_rate": 3.5961891833727876e-05, - "loss": 2.7787, + "learning_rate": 3.5957893551560036e-05, + "loss": 2.7545, "step": 84800 }, { "epoch": 0.28, - "learning_rate": 3.5945337460890295e-05, - "loss": 2.7671, + "learning_rate": 3.594133446376706e-05, + "loss": 2.7571, "step": 84900 }, { "epoch": 0.28, - "learning_rate": 3.592878308805271e-05, - "loss": 2.7884, + "learning_rate": 3.592477537597409e-05, + "loss": 2.7558, "step": 85000 }, { "epoch": 0.28, - "learning_rate": 3.591222871521513e-05, - "loss": 2.7922, + "learning_rate": 3.590821628818112e-05, + "loss": 2.7639, "step": 85100 }, { "epoch": 0.28, - "learning_rate": 3.589567434237754e-05, - "loss": 2.7726, + "learning_rate": 3.5891657200388144e-05, + "loss": 2.7656, "step": 85200 }, { "epoch": 0.28, - "learning_rate": 3.587911996953995e-05, - "loss": 2.7708, + "learning_rate": 3.5875098112595174e-05, + "loss": 2.7654, "step": 85300 }, { "epoch": 0.28, - "learning_rate": 3.586256559670237e-05, - "loss": 2.7743, + "learning_rate": 3.5858539024802204e-05, + "loss": 2.7549, "step": 85400 }, { "epoch": 0.28, - "learning_rate": 3.584601122386478e-05, - "loss": 2.758, + "learning_rate": 3.584197993700923e-05, + "loss": 2.762, "step": 85500 }, { "epoch": 0.28, - "learning_rate": 3.58294568510272e-05, - "loss": 2.7936, + "learning_rate": 3.5825420849216265e-05, + "loss": 2.7716, "step": 85600 }, { "epoch": 0.28, - "learning_rate": 3.5812902478189614e-05, - "loss": 2.7975, + "learning_rate": 3.580886176142329e-05, + "loss": 2.7848, "step": 85700 }, { "epoch": 0.28, - "learning_rate": 3.5796348105352026e-05, - "loss": 2.789, + "learning_rate": 3.579230267363032e-05, + "loss": 2.7675, "step": 85800 }, { "epoch": 0.28, - "learning_rate": 3.5779793732514445e-05, - "loss": 2.7746, + "learning_rate": 3.577574358583734e-05, + "loss": 2.7798, "step": 85900 }, { "epoch": 0.28, - "learning_rate": 3.576323935967686e-05, - "loss": 2.7709, + "learning_rate": 3.575918449804437e-05, + "loss": 2.7544, "step": 86000 }, { "epoch": 0.29, - "learning_rate": 3.5746684986839277e-05, - "loss": 2.7723, + "learning_rate": 3.57426254102514e-05, + "loss": 2.7634, "step": 86100 }, { "epoch": 0.29, - "learning_rate": 3.573013061400169e-05, - "loss": 2.7792, + "learning_rate": 3.5726066322458427e-05, + "loss": 2.7796, "step": 86200 }, { "epoch": 0.29, - "learning_rate": 3.57135762411641e-05, - "loss": 2.786, + "learning_rate": 3.570950723466546e-05, + "loss": 2.7618, "step": 86300 }, { "epoch": 0.29, - "learning_rate": 3.569702186832652e-05, - "loss": 2.7819, + "learning_rate": 3.569294814687249e-05, + "loss": 2.7575, "step": 86400 }, { "epoch": 0.29, - "learning_rate": 3.568046749548893e-05, - "loss": 2.7743, + "learning_rate": 3.567638905907951e-05, + "loss": 2.7789, "step": 86500 }, { "epoch": 0.29, - "learning_rate": 3.566391312265135e-05, - "loss": 2.7723, + "learning_rate": 3.565982997128655e-05, + "loss": 2.7719, "step": 86600 }, { "epoch": 0.29, - "learning_rate": 3.5647358749813764e-05, - "loss": 2.7687, + "learning_rate": 3.564327088349357e-05, + "loss": 2.7506, "step": 86700 }, { "epoch": 0.29, - "learning_rate": 3.5630804376976176e-05, - "loss": 2.7795, + "learning_rate": 3.56267117957006e-05, + "loss": 2.757, "step": 86800 }, { "epoch": 0.29, - "learning_rate": 3.5614250004138595e-05, - "loss": 2.7762, + "learning_rate": 3.561015270790763e-05, + "loss": 2.7716, "step": 86900 }, { "epoch": 0.29, - "learning_rate": 3.559769563130101e-05, - "loss": 2.7657, + "learning_rate": 3.5593593620114655e-05, + "loss": 2.7676, "step": 87000 }, { "epoch": 0.29, - "learning_rate": 3.5581141258463426e-05, - "loss": 2.7618, + "learning_rate": 3.5577034532321686e-05, + "loss": 2.7605, "step": 87100 }, { "epoch": 0.29, - "learning_rate": 3.556458688562584e-05, - "loss": 2.7804, + "learning_rate": 3.556047544452871e-05, + "loss": 2.7766, "step": 87200 }, { "epoch": 0.29, - "learning_rate": 3.554803251278825e-05, - "loss": 2.7627, + "learning_rate": 3.554391635673574e-05, + "loss": 2.7602, "step": 87300 }, { "epoch": 0.29, - "learning_rate": 3.553147813995067e-05, - "loss": 2.7771, + "learning_rate": 3.552735726894277e-05, + "loss": 2.7676, "step": 87400 }, { "epoch": 0.29, - "learning_rate": 3.551492376711308e-05, - "loss": 2.7767, + "learning_rate": 3.551079818114979e-05, + "loss": 2.7678, "step": 87500 }, { "epoch": 0.29, - "learning_rate": 3.54983693942755e-05, - "loss": 2.7898, + "learning_rate": 3.549423909335683e-05, + "loss": 2.7571, "step": 87600 }, { "epoch": 0.29, - "learning_rate": 3.548181502143791e-05, - "loss": 2.7887, + "learning_rate": 3.5477680005563854e-05, + "loss": 2.7761, "step": 87700 }, { "epoch": 0.29, - "learning_rate": 3.546526064860033e-05, - "loss": 2.7804, + "learning_rate": 3.5461120917770884e-05, + "loss": 2.7722, "step": 87800 }, { "epoch": 0.29, - "learning_rate": 3.5448706275762745e-05, - "loss": 2.7786, + "learning_rate": 3.5444561829977915e-05, + "loss": 2.7728, "step": 87900 }, { "epoch": 0.29, - "learning_rate": 3.543215190292516e-05, - "loss": 2.7599, + "learning_rate": 3.542800274218494e-05, + "loss": 2.7666, "step": 88000 }, { "epoch": 0.29, - "learning_rate": 3.5415597530087576e-05, - "loss": 2.7816, + "learning_rate": 3.541144365439197e-05, + "loss": 2.7674, "step": 88100 }, { "epoch": 0.29, - "learning_rate": 3.539904315724999e-05, - "loss": 2.7937, + "learning_rate": 3.5394884566599e-05, + "loss": 2.7665, "step": 88200 }, { "epoch": 0.29, - "learning_rate": 3.538248878441241e-05, - "loss": 2.7737, + "learning_rate": 3.537832547880602e-05, + "loss": 2.7498, "step": 88300 }, { "epoch": 0.29, - "learning_rate": 3.536593441157482e-05, - "loss": 2.7775, + "learning_rate": 3.536176639101305e-05, + "loss": 2.7735, "step": 88400 }, { "epoch": 0.29, - "learning_rate": 3.534938003873723e-05, - "loss": 2.7798, + "learning_rate": 3.5345207303220076e-05, + "loss": 2.7572, "step": 88500 }, { "epoch": 0.29, - "learning_rate": 3.533282566589965e-05, - "loss": 2.7769, + "learning_rate": 3.532864821542711e-05, + "loss": 2.7603, "step": 88600 }, { "epoch": 0.29, - "learning_rate": 3.531627129306206e-05, - "loss": 2.7833, + "learning_rate": 3.5312089127634143e-05, + "loss": 2.7502, "step": 88700 }, { "epoch": 0.29, - "learning_rate": 3.529971692022448e-05, - "loss": 2.7742, + "learning_rate": 3.529553003984117e-05, + "loss": 2.774, "step": 88800 }, { "epoch": 0.29, - "learning_rate": 3.5283162547386894e-05, - "loss": 2.7809, + "learning_rate": 3.52789709520482e-05, + "loss": 2.786, "step": 88900 }, { "epoch": 0.29, - "learning_rate": 3.5266608174549307e-05, - "loss": 2.7868, + "learning_rate": 3.526241186425522e-05, + "loss": 2.7827, "step": 89000 }, { - "epoch": 0.29, - "learning_rate": 3.5250053801711726e-05, - "loss": 2.7771, + "epoch": 0.3, + "learning_rate": 3.524585277646225e-05, + "loss": 2.7652, "step": 89100 }, { "epoch": 0.3, - "learning_rate": 3.523349942887414e-05, - "loss": 2.772, + "learning_rate": 3.522929368866928e-05, + "loss": 2.7652, "step": 89200 }, { "epoch": 0.3, - "learning_rate": 3.521694505603656e-05, - "loss": 2.7718, + "learning_rate": 3.5212734600876305e-05, + "loss": 2.7619, "step": 89300 }, { "epoch": 0.3, - "learning_rate": 3.520039068319897e-05, - "loss": 2.7779, + "learning_rate": 3.5196175513083335e-05, + "loss": 2.7673, "step": 89400 }, { "epoch": 0.3, - "learning_rate": 3.518383631036138e-05, - "loss": 2.7683, + "learning_rate": 3.5179616425290365e-05, + "loss": 2.7625, "step": 89500 }, { "epoch": 0.3, - "learning_rate": 3.51672819375238e-05, - "loss": 2.7909, + "learning_rate": 3.5163057337497396e-05, + "loss": 2.7567, "step": 89600 }, { "epoch": 0.3, - "learning_rate": 3.515072756468621e-05, - "loss": 2.7714, + "learning_rate": 3.5146498249704426e-05, + "loss": 2.7665, "step": 89700 }, { "epoch": 0.3, - "learning_rate": 3.513417319184863e-05, - "loss": 2.7661, + "learning_rate": 3.512993916191145e-05, + "loss": 2.7669, "step": 89800 }, { "epoch": 0.3, - "learning_rate": 3.5117618819011044e-05, - "loss": 2.7815, + "learning_rate": 3.511338007411848e-05, + "loss": 2.7634, "step": 89900 }, { "epoch": 0.3, - "learning_rate": 3.5101064446173456e-05, - "loss": 2.7682, + "learning_rate": 3.509682098632551e-05, + "loss": 2.7788, "step": 90000 }, { "epoch": 0.3, - "learning_rate": 3.5084510073335875e-05, - "loss": 2.7697, + "learning_rate": 3.5080261898532534e-05, + "loss": 2.7633, "step": 90100 }, { "epoch": 0.3, - "learning_rate": 3.506795570049829e-05, - "loss": 2.7856, + "learning_rate": 3.5063702810739564e-05, + "loss": 2.7672, "step": 90200 }, { "epoch": 0.3, - "learning_rate": 3.505140132766071e-05, - "loss": 2.7757, + "learning_rate": 3.504714372294659e-05, + "loss": 2.7642, "step": 90300 }, { "epoch": 0.3, - "learning_rate": 3.503484695482312e-05, - "loss": 2.7874, + "learning_rate": 3.503058463515362e-05, + "loss": 2.7647, "step": 90400 }, { "epoch": 0.3, - "learning_rate": 3.501829258198553e-05, - "loss": 2.7706, + "learning_rate": 3.501402554736065e-05, + "loss": 2.7723, "step": 90500 }, { "epoch": 0.3, - "learning_rate": 3.500173820914795e-05, - "loss": 2.7729, + "learning_rate": 3.499746645956768e-05, + "loss": 2.7747, "step": 90600 }, { "epoch": 0.3, - "learning_rate": 3.498518383631036e-05, - "loss": 2.7839, + "learning_rate": 3.498090737177471e-05, + "loss": 2.7588, "step": 90700 }, { "epoch": 0.3, - "learning_rate": 3.496862946347278e-05, - "loss": 2.7744, + "learning_rate": 3.496434828398173e-05, + "loss": 2.7673, "step": 90800 }, { "epoch": 0.3, - "learning_rate": 3.4952075090635194e-05, - "loss": 2.7835, + "learning_rate": 3.494778919618876e-05, + "loss": 2.7654, "step": 90900 }, { "epoch": 0.3, - "learning_rate": 3.493552071779761e-05, - "loss": 2.7915, + "learning_rate": 3.493123010839579e-05, + "loss": 2.7616, "step": 91000 }, { "epoch": 0.3, - "learning_rate": 3.4918966344960025e-05, - "loss": 2.7731, + "learning_rate": 3.4914671020602816e-05, + "loss": 2.7796, "step": 91100 }, { "epoch": 0.3, - "learning_rate": 3.490241197212244e-05, - "loss": 2.7648, + "learning_rate": 3.489811193280985e-05, + "loss": 2.7652, "step": 91200 }, { "epoch": 0.3, - "learning_rate": 3.4885857599284856e-05, - "loss": 2.7816, + "learning_rate": 3.488155284501688e-05, + "loss": 2.7671, "step": 91300 }, { "epoch": 0.3, - "learning_rate": 3.486930322644727e-05, - "loss": 2.7831, + "learning_rate": 3.48649937572239e-05, + "loss": 2.7556, "step": 91400 }, { "epoch": 0.3, - "learning_rate": 3.485274885360969e-05, - "loss": 2.7733, + "learning_rate": 3.484843466943093e-05, + "loss": 2.7542, "step": 91500 }, { "epoch": 0.3, - "learning_rate": 3.48361944807721e-05, - "loss": 2.7652, + "learning_rate": 3.483187558163796e-05, + "loss": 2.7582, "step": 91600 }, { "epoch": 0.3, - "learning_rate": 3.481964010793451e-05, - "loss": 2.7655, + "learning_rate": 3.481531649384499e-05, + "loss": 2.7596, "step": 91700 }, { "epoch": 0.3, - "learning_rate": 3.480308573509693e-05, - "loss": 2.7875, + "learning_rate": 3.4798757406052015e-05, + "loss": 2.7774, "step": 91800 }, { "epoch": 0.3, - "learning_rate": 3.4786531362259343e-05, - "loss": 2.7852, + "learning_rate": 3.4782198318259045e-05, + "loss": 2.7661, "step": 91900 }, { "epoch": 0.3, - "learning_rate": 3.476997698942176e-05, - "loss": 2.7794, + "learning_rate": 3.4765639230466076e-05, + "loss": 2.7551, "step": 92000 }, { - "epoch": 0.3, - "learning_rate": 3.4753422616584175e-05, - "loss": 2.7684, + "epoch": 0.31, + "learning_rate": 3.47490801426731e-05, + "loss": 2.7545, "step": 92100 }, { "epoch": 0.31, - "learning_rate": 3.473686824374659e-05, - "loss": 2.7846, + "learning_rate": 3.473252105488013e-05, + "loss": 2.7801, "step": 92200 }, { "epoch": 0.31, - "learning_rate": 3.4720313870909006e-05, - "loss": 2.7718, + "learning_rate": 3.471596196708716e-05, + "loss": 2.782, "step": 92300 }, { "epoch": 0.31, - "learning_rate": 3.470375949807142e-05, - "loss": 2.7705, + "learning_rate": 3.469940287929418e-05, + "loss": 2.763, "step": 92400 }, { "epoch": 0.31, - "learning_rate": 3.468720512523384e-05, - "loss": 2.7682, + "learning_rate": 3.4682843791501213e-05, + "loss": 2.7406, "step": 92500 }, { "epoch": 0.31, - "learning_rate": 3.467065075239625e-05, - "loss": 2.7749, + "learning_rate": 3.4666284703708244e-05, + "loss": 2.765, "step": 92600 }, { "epoch": 0.31, - "learning_rate": 3.465409637955866e-05, - "loss": 2.7903, + "learning_rate": 3.4649725615915274e-05, + "loss": 2.7653, "step": 92700 }, { "epoch": 0.31, - "learning_rate": 3.4637542006721074e-05, - "loss": 2.7925, + "learning_rate": 3.4633166528122304e-05, + "loss": 2.7632, "step": 92800 }, { "epoch": 0.31, - "learning_rate": 3.4620987633883486e-05, - "loss": 2.7762, + "learning_rate": 3.461660744032933e-05, + "loss": 2.761, "step": 92900 }, { "epoch": 0.31, - "learning_rate": 3.4604433261045905e-05, - "loss": 2.7717, + "learning_rate": 3.460004835253636e-05, + "loss": 2.7688, "step": 93000 }, { "epoch": 0.31, - "learning_rate": 3.458787888820832e-05, - "loss": 2.7867, + "learning_rate": 3.458348926474338e-05, + "loss": 2.7714, "step": 93100 }, { "epoch": 0.31, - "learning_rate": 3.457132451537074e-05, - "loss": 2.7791, + "learning_rate": 3.456693017695041e-05, + "loss": 2.7672, "step": 93200 }, { "epoch": 0.31, - "learning_rate": 3.455477014253315e-05, - "loss": 2.7835, + "learning_rate": 3.455037108915744e-05, + "loss": 2.7682, "step": 93300 }, { "epoch": 0.31, - "learning_rate": 3.453821576969556e-05, - "loss": 2.7655, + "learning_rate": 3.4533812001364466e-05, + "loss": 2.7727, "step": 93400 }, { "epoch": 0.31, - "learning_rate": 3.452166139685798e-05, - "loss": 2.7796, + "learning_rate": 3.4517252913571496e-05, + "loss": 2.7762, "step": 93500 }, { "epoch": 0.31, - "learning_rate": 3.450510702402039e-05, - "loss": 2.7844, + "learning_rate": 3.4500693825778526e-05, + "loss": 2.7726, "step": 93600 }, { "epoch": 0.31, - "learning_rate": 3.448855265118281e-05, - "loss": 2.7739, + "learning_rate": 3.448413473798556e-05, + "loss": 2.7657, "step": 93700 }, { "epoch": 0.31, - "learning_rate": 3.4471998278345224e-05, - "loss": 2.7785, + "learning_rate": 3.446757565019259e-05, + "loss": 2.775, "step": 93800 }, { "epoch": 0.31, - "learning_rate": 3.4455443905507636e-05, - "loss": 2.7706, + "learning_rate": 3.445101656239961e-05, + "loss": 2.775, "step": 93900 }, { "epoch": 0.31, - "learning_rate": 3.4438889532670055e-05, - "loss": 2.7815, + "learning_rate": 3.443445747460664e-05, + "loss": 2.7544, "step": 94000 }, { "epoch": 0.31, - "learning_rate": 3.442233515983247e-05, - "loss": 2.7691, + "learning_rate": 3.441789838681367e-05, + "loss": 2.7638, "step": 94100 }, { "epoch": 0.31, - "learning_rate": 3.4405780786994886e-05, - "loss": 2.7764, + "learning_rate": 3.4401339299020695e-05, + "loss": 2.7636, "step": 94200 }, { "epoch": 0.31, - "learning_rate": 3.43892264141573e-05, - "loss": 2.7791, + "learning_rate": 3.4384780211227725e-05, + "loss": 2.7631, "step": 94300 }, { "epoch": 0.31, - "learning_rate": 3.437267204131972e-05, - "loss": 2.775, + "learning_rate": 3.436822112343475e-05, + "loss": 2.7649, "step": 94400 }, { "epoch": 0.31, - "learning_rate": 3.435611766848213e-05, - "loss": 2.7883, + "learning_rate": 3.435166203564178e-05, + "loss": 2.7604, "step": 94500 }, { "epoch": 0.31, - "learning_rate": 3.433956329564454e-05, - "loss": 2.7737, + "learning_rate": 3.4335102947848816e-05, + "loss": 2.7816, "step": 94600 }, { "epoch": 0.31, - "learning_rate": 3.432300892280696e-05, - "loss": 2.7846, + "learning_rate": 3.431854386005584e-05, + "loss": 2.7671, "step": 94700 }, { "epoch": 0.31, - "learning_rate": 3.4306454549969374e-05, - "loss": 2.7905, + "learning_rate": 3.430198477226287e-05, + "loss": 2.7798, "step": 94800 }, { "epoch": 0.31, - "learning_rate": 3.428990017713179e-05, - "loss": 2.768, + "learning_rate": 3.428542568446989e-05, + "loss": 2.769, "step": 94900 }, { "epoch": 0.31, - "learning_rate": 3.4273345804294205e-05, - "loss": 2.7639, + "learning_rate": 3.4268866596676924e-05, + "loss": 2.7652, "step": 95000 }, { "epoch": 0.31, - "learning_rate": 3.425679143145662e-05, - "loss": 2.7801, + "learning_rate": 3.4252307508883954e-05, + "loss": 2.7537, "step": 95100 }, { "epoch": 0.32, - "learning_rate": 3.4240237058619036e-05, - "loss": 2.7805, + "learning_rate": 3.423574842109098e-05, + "loss": 2.767, "step": 95200 }, { "epoch": 0.32, - "learning_rate": 3.422368268578145e-05, - "loss": 2.7744, + "learning_rate": 3.421918933329801e-05, + "loss": 2.7716, "step": 95300 }, { "epoch": 0.32, - "learning_rate": 3.420712831294387e-05, - "loss": 2.7739, + "learning_rate": 3.420263024550504e-05, + "loss": 2.7678, "step": 95400 }, { "epoch": 0.32, - "learning_rate": 3.419057394010628e-05, - "loss": 2.7767, + "learning_rate": 3.418607115771206e-05, + "loss": 2.7497, "step": 95500 }, { "epoch": 0.32, - "learning_rate": 3.417401956726869e-05, - "loss": 2.7806, + "learning_rate": 3.41695120699191e-05, + "loss": 2.7556, "step": 95600 }, { "epoch": 0.32, - "learning_rate": 3.415746519443111e-05, - "loss": 2.7736, + "learning_rate": 3.415295298212612e-05, + "loss": 2.751, "step": 95700 }, { "epoch": 0.32, - "learning_rate": 3.414091082159352e-05, - "loss": 2.7877, + "learning_rate": 3.413639389433315e-05, + "loss": 2.7716, "step": 95800 }, { "epoch": 0.32, - "learning_rate": 3.412435644875594e-05, - "loss": 2.7752, + "learning_rate": 3.411983480654018e-05, + "loss": 2.7641, "step": 95900 }, { "epoch": 0.32, - "learning_rate": 3.4107802075918355e-05, - "loss": 2.7709, + "learning_rate": 3.4103275718747206e-05, + "loss": 2.7445, "step": 96000 }, { "epoch": 0.32, - "learning_rate": 3.409124770308077e-05, - "loss": 2.7841, + "learning_rate": 3.4086716630954236e-05, + "loss": 2.7678, "step": 96100 }, { "epoch": 0.32, - "learning_rate": 3.4074693330243186e-05, - "loss": 2.7853, + "learning_rate": 3.407015754316126e-05, + "loss": 2.7545, "step": 96200 }, { "epoch": 0.32, - "learning_rate": 3.40581389574056e-05, - "loss": 2.7739, + "learning_rate": 3.405359845536829e-05, + "loss": 2.7554, "step": 96300 }, { "epoch": 0.32, - "learning_rate": 3.404158458456802e-05, - "loss": 2.7713, + "learning_rate": 3.403703936757532e-05, + "loss": 2.755, "step": 96400 }, { "epoch": 0.32, - "learning_rate": 3.402503021173043e-05, - "loss": 2.784, + "learning_rate": 3.4020480279782344e-05, + "loss": 2.7867, "step": 96500 }, { "epoch": 0.32, - "learning_rate": 3.400847583889284e-05, - "loss": 2.7741, + "learning_rate": 3.400392119198938e-05, + "loss": 2.7597, "step": 96600 }, { "epoch": 0.32, - "learning_rate": 3.399192146605526e-05, - "loss": 2.7843, + "learning_rate": 3.3987362104196405e-05, + "loss": 2.7579, "step": 96700 }, { "epoch": 0.32, - "learning_rate": 3.397536709321767e-05, - "loss": 2.7742, + "learning_rate": 3.3970803016403435e-05, + "loss": 2.7721, "step": 96800 }, { "epoch": 0.32, - "learning_rate": 3.395881272038009e-05, - "loss": 2.7829, + "learning_rate": 3.3954243928610465e-05, + "loss": 2.7588, "step": 96900 }, { "epoch": 0.32, - "learning_rate": 3.3942258347542504e-05, - "loss": 2.7706, + "learning_rate": 3.393768484081749e-05, + "loss": 2.7565, "step": 97000 }, { "epoch": 0.32, - "learning_rate": 3.3925703974704917e-05, - "loss": 2.7828, + "learning_rate": 3.392112575302452e-05, + "loss": 2.756, "step": 97100 }, { "epoch": 0.32, - "learning_rate": 3.3909149601867336e-05, - "loss": 2.7705, + "learning_rate": 3.390456666523155e-05, + "loss": 2.767, "step": 97200 }, { "epoch": 0.32, - "learning_rate": 3.389259522902975e-05, - "loss": 2.767, + "learning_rate": 3.388800757743857e-05, + "loss": 2.7759, "step": 97300 }, { "epoch": 0.32, - "learning_rate": 3.387604085619217e-05, - "loss": 2.7753, + "learning_rate": 3.38714484896456e-05, + "loss": 2.7578, "step": 97400 }, { "epoch": 0.32, - "learning_rate": 3.385948648335458e-05, - "loss": 2.7843, + "learning_rate": 3.385488940185263e-05, + "loss": 2.7654, "step": 97500 }, { "epoch": 0.32, - "learning_rate": 3.3842932110517e-05, - "loss": 2.7808, + "learning_rate": 3.3838330314059664e-05, + "loss": 2.757, "step": 97600 }, { "epoch": 0.32, - "learning_rate": 3.382637773767941e-05, - "loss": 2.771, + "learning_rate": 3.3821771226266694e-05, + "loss": 2.7645, "step": 97700 }, { "epoch": 0.32, - "learning_rate": 3.380982336484182e-05, - "loss": 2.7802, + "learning_rate": 3.380521213847372e-05, + "loss": 2.7632, "step": 97800 }, { "epoch": 0.32, - "learning_rate": 3.379326899200424e-05, - "loss": 2.7805, + "learning_rate": 3.378865305068075e-05, + "loss": 2.7691, "step": 97900 }, { "epoch": 0.32, - "learning_rate": 3.3776714619166654e-05, - "loss": 2.7804, + "learning_rate": 3.377209396288777e-05, + "loss": 2.7674, "step": 98000 }, { "epoch": 0.32, - "learning_rate": 3.376016024632907e-05, - "loss": 2.771, + "learning_rate": 3.37555348750948e-05, + "loss": 2.7589, "step": 98100 }, { "epoch": 0.33, - "learning_rate": 3.3743605873491485e-05, - "loss": 2.7936, + "learning_rate": 3.373897578730183e-05, + "loss": 2.7618, "step": 98200 }, { "epoch": 0.33, - "learning_rate": 3.37270515006539e-05, - "loss": 2.7788, + "learning_rate": 3.3722416699508856e-05, + "loss": 2.7834, "step": 98300 }, { "epoch": 0.33, - "learning_rate": 3.371049712781632e-05, - "loss": 2.7818, + "learning_rate": 3.3705857611715886e-05, + "loss": 2.7622, "step": 98400 }, { "epoch": 0.33, - "learning_rate": 3.369394275497873e-05, - "loss": 2.7888, + "learning_rate": 3.3689298523922916e-05, + "loss": 2.779, "step": 98500 }, { "epoch": 0.33, - "learning_rate": 3.367738838214115e-05, - "loss": 2.7812, + "learning_rate": 3.3672739436129947e-05, + "loss": 2.7589, "step": 98600 }, { "epoch": 0.33, - "learning_rate": 3.366083400930356e-05, - "loss": 2.7802, + "learning_rate": 3.365618034833698e-05, + "loss": 2.7683, "step": 98700 }, { "epoch": 0.33, - "learning_rate": 3.364427963646597e-05, - "loss": 2.7701, + "learning_rate": 3.3639621260544e-05, + "loss": 2.7735, "step": 98800 }, { "epoch": 0.33, - "learning_rate": 3.362772526362839e-05, - "loss": 2.7758, + "learning_rate": 3.362306217275103e-05, + "loss": 2.7465, "step": 98900 }, { "epoch": 0.33, - "learning_rate": 3.3611170890790804e-05, - "loss": 2.7818, + "learning_rate": 3.3606503084958054e-05, + "loss": 2.7598, "step": 99000 }, { "epoch": 0.33, - "learning_rate": 3.359461651795322e-05, - "loss": 2.7653, + "learning_rate": 3.3589943997165084e-05, + "loss": 2.7521, "step": 99100 }, { "epoch": 0.33, - "learning_rate": 3.3578062145115635e-05, - "loss": 2.7715, + "learning_rate": 3.3573384909372115e-05, + "loss": 2.7639, "step": 99200 }, { "epoch": 0.33, - "learning_rate": 3.356150777227805e-05, - "loss": 2.7815, + "learning_rate": 3.355682582157914e-05, + "loss": 2.7595, "step": 99300 }, { "epoch": 0.33, - "learning_rate": 3.3544953399440466e-05, - "loss": 2.7825, + "learning_rate": 3.354026673378617e-05, + "loss": 2.7575, "step": 99400 }, { "epoch": 0.33, - "learning_rate": 3.352839902660288e-05, - "loss": 2.774, + "learning_rate": 3.35237076459932e-05, + "loss": 2.7489, "step": 99500 }, { "epoch": 0.33, - "learning_rate": 3.35118446537653e-05, - "loss": 2.7639, + "learning_rate": 3.350714855820023e-05, + "loss": 2.7656, "step": 99600 }, { "epoch": 0.33, - "learning_rate": 3.349529028092771e-05, - "loss": 2.7649, + "learning_rate": 3.349058947040726e-05, + "loss": 2.7598, "step": 99700 }, { "epoch": 0.33, - "learning_rate": 3.347873590809012e-05, - "loss": 2.7716, + "learning_rate": 3.347403038261428e-05, + "loss": 2.7486, "step": 99800 }, { "epoch": 0.33, - "learning_rate": 3.346218153525254e-05, - "loss": 2.7764, + "learning_rate": 3.345747129482131e-05, + "loss": 2.7553, "step": 99900 }, { "epoch": 0.33, - "learning_rate": 3.3445627162414953e-05, - "loss": 2.7633, + "learning_rate": 3.3440912207028344e-05, + "loss": 2.7728, "step": 100000 }, { "epoch": 0.33, - "learning_rate": 3.342907278957737e-05, - "loss": 2.7705, + "learning_rate": 3.342435311923537e-05, + "loss": 2.7468, "step": 100100 }, { "epoch": 0.33, - "learning_rate": 3.3412518416739785e-05, - "loss": 2.7713, + "learning_rate": 3.34077940314424e-05, + "loss": 2.7582, "step": 100200 }, { "epoch": 0.33, - "learning_rate": 3.33959640439022e-05, - "loss": 2.784, + "learning_rate": 3.339123494364942e-05, + "loss": 2.7531, "step": 100300 }, { "epoch": 0.33, - "learning_rate": 3.3379409671064616e-05, - "loss": 2.7755, + "learning_rate": 3.337467585585645e-05, + "loss": 2.7754, "step": 100400 }, { "epoch": 0.33, - "learning_rate": 3.336285529822703e-05, - "loss": 2.7765, + "learning_rate": 3.335811676806348e-05, + "loss": 2.7671, "step": 100500 }, { "epoch": 0.33, - "learning_rate": 3.334630092538945e-05, - "loss": 2.7594, + "learning_rate": 3.334155768027051e-05, + "loss": 2.7632, "step": 100600 }, { "epoch": 0.33, - "learning_rate": 3.332974655255186e-05, - "loss": 2.7813, + "learning_rate": 3.332499859247754e-05, + "loss": 2.7483, "step": 100700 }, { "epoch": 0.33, - "learning_rate": 3.331319217971428e-05, - "loss": 2.7752, + "learning_rate": 3.3308439504684566e-05, + "loss": 2.7764, "step": 100800 }, { "epoch": 0.33, - "learning_rate": 3.329663780687669e-05, - "loss": 2.787, + "learning_rate": 3.3291880416891596e-05, + "loss": 2.7616, "step": 100900 }, { "epoch": 0.33, - "learning_rate": 3.32800834340391e-05, - "loss": 2.7651, + "learning_rate": 3.3275321329098626e-05, + "loss": 2.7647, "step": 101000 }, { "epoch": 0.33, - "learning_rate": 3.326352906120152e-05, - "loss": 2.7576, + "learning_rate": 3.325876224130565e-05, + "loss": 2.7571, "step": 101100 }, { "epoch": 0.34, - "learning_rate": 3.3246974688363934e-05, - "loss": 2.7713, + "learning_rate": 3.324220315351268e-05, + "loss": 2.751, "step": 101200 }, { "epoch": 0.34, - "learning_rate": 3.3230420315526354e-05, - "loss": 2.7807, + "learning_rate": 3.322564406571971e-05, + "loss": 2.7625, "step": 101300 }, { "epoch": 0.34, - "learning_rate": 3.3213865942688766e-05, - "loss": 2.7825, + "learning_rate": 3.3209084977926734e-05, + "loss": 2.7686, "step": 101400 }, { "epoch": 0.34, - "learning_rate": 3.319731156985118e-05, - "loss": 2.7823, + "learning_rate": 3.3192525890133764e-05, + "loss": 2.7722, "step": 101500 }, { "epoch": 0.34, - "learning_rate": 3.318075719701359e-05, - "loss": 2.7741, + "learning_rate": 3.3175966802340795e-05, + "loss": 2.7697, "step": 101600 }, { "epoch": 0.34, - "learning_rate": 3.3164202824176e-05, - "loss": 2.789, + "learning_rate": 3.3159407714547825e-05, + "loss": 2.7732, "step": 101700 }, { "epoch": 0.34, - "learning_rate": 3.314764845133842e-05, - "loss": 2.7983, + "learning_rate": 3.3142848626754855e-05, + "loss": 2.7656, "step": 101800 }, { "epoch": 0.34, - "learning_rate": 3.3131094078500834e-05, - "loss": 2.7856, + "learning_rate": 3.312628953896188e-05, + "loss": 2.7592, "step": 101900 }, { "epoch": 0.34, - "learning_rate": 3.311453970566325e-05, - "loss": 2.7914, + "learning_rate": 3.310973045116891e-05, + "loss": 2.7466, "step": 102000 }, { "epoch": 0.34, - "learning_rate": 3.3097985332825665e-05, - "loss": 2.7627, + "learning_rate": 3.309317136337593e-05, + "loss": 2.763, "step": 102100 }, { "epoch": 0.34, - "learning_rate": 3.308143095998808e-05, - "loss": 2.7756, + "learning_rate": 3.307661227558296e-05, + "loss": 2.773, "step": 102200 }, { "epoch": 0.34, - "learning_rate": 3.3064876587150496e-05, - "loss": 2.7775, + "learning_rate": 3.306005318778999e-05, + "loss": 2.7662, "step": 102300 }, { "epoch": 0.34, - "learning_rate": 3.304832221431291e-05, - "loss": 2.7794, + "learning_rate": 3.3043494099997017e-05, + "loss": 2.7679, "step": 102400 }, { "epoch": 0.34, - "learning_rate": 3.303176784147533e-05, - "loss": 2.7749, + "learning_rate": 3.302693501220405e-05, + "loss": 2.7694, "step": 102500 }, { "epoch": 0.34, - "learning_rate": 3.301521346863774e-05, - "loss": 2.7621, + "learning_rate": 3.301037592441108e-05, + "loss": 2.7698, "step": 102600 }, { "epoch": 0.34, - "learning_rate": 3.299865909580015e-05, - "loss": 2.7732, + "learning_rate": 3.299381683661811e-05, + "loss": 2.7527, "step": 102700 }, { "epoch": 0.34, - "learning_rate": 3.298210472296257e-05, - "loss": 2.7737, + "learning_rate": 3.297725774882514e-05, + "loss": 2.7665, "step": 102800 }, { "epoch": 0.34, - "learning_rate": 3.2965550350124984e-05, - "loss": 2.7783, + "learning_rate": 3.296069866103216e-05, + "loss": 2.7618, "step": 102900 }, { "epoch": 0.34, - "learning_rate": 3.29489959772874e-05, - "loss": 2.7734, + "learning_rate": 3.294413957323919e-05, + "loss": 2.7532, "step": 103000 }, { "epoch": 0.34, - "learning_rate": 3.2932441604449815e-05, - "loss": 2.7766, + "learning_rate": 3.292758048544622e-05, + "loss": 2.7778, "step": 103100 }, { "epoch": 0.34, - "learning_rate": 3.291588723161223e-05, - "loss": 2.7706, + "learning_rate": 3.2911021397653245e-05, + "loss": 2.7687, "step": 103200 }, { "epoch": 0.34, - "learning_rate": 3.2899332858774646e-05, - "loss": 2.7759, + "learning_rate": 3.2894462309860276e-05, + "loss": 2.7574, "step": 103300 }, { "epoch": 0.34, - "learning_rate": 3.288277848593706e-05, - "loss": 2.7846, + "learning_rate": 3.28779032220673e-05, + "loss": 2.7666, "step": 103400 }, { "epoch": 0.34, - "learning_rate": 3.286622411309948e-05, - "loss": 2.7774, + "learning_rate": 3.286134413427433e-05, + "loss": 2.7559, "step": 103500 }, { "epoch": 0.34, - "learning_rate": 3.284966974026189e-05, - "loss": 2.7682, + "learning_rate": 3.284478504648137e-05, + "loss": 2.7526, "step": 103600 }, { "epoch": 0.34, - "learning_rate": 3.28331153674243e-05, - "loss": 2.79, + "learning_rate": 3.282822595868839e-05, + "loss": 2.758, "step": 103700 }, { "epoch": 0.34, - "learning_rate": 3.281656099458672e-05, - "loss": 2.7739, + "learning_rate": 3.281166687089542e-05, + "loss": 2.7578, "step": 103800 }, { "epoch": 0.34, - "learning_rate": 3.280000662174913e-05, - "loss": 2.7842, + "learning_rate": 3.2795107783102444e-05, + "loss": 2.7533, "step": 103900 }, { "epoch": 0.34, - "learning_rate": 3.278345224891155e-05, - "loss": 2.784, + "learning_rate": 3.2778548695309474e-05, + "loss": 2.7938, "step": 104000 }, { "epoch": 0.34, - "learning_rate": 3.2766897876073965e-05, - "loss": 2.768, + "learning_rate": 3.2761989607516505e-05, + "loss": 2.7731, "step": 104100 }, { - "epoch": 0.34, - "learning_rate": 3.2750343503236384e-05, - "loss": 2.7755, + "epoch": 0.35, + "learning_rate": 3.274543051972353e-05, + "loss": 2.7624, "step": 104200 }, { "epoch": 0.35, - "learning_rate": 3.2733789130398796e-05, - "loss": 2.7662, + "learning_rate": 3.272887143193056e-05, + "loss": 2.7623, "step": 104300 }, { "epoch": 0.35, - "learning_rate": 3.271723475756121e-05, - "loss": 2.7739, + "learning_rate": 3.271231234413759e-05, + "loss": 2.7708, "step": 104400 }, { "epoch": 0.35, - "learning_rate": 3.270068038472363e-05, - "loss": 2.7882, + "learning_rate": 3.269575325634461e-05, + "loss": 2.7552, "step": 104500 }, { "epoch": 0.35, - "learning_rate": 3.268412601188604e-05, - "loss": 2.7644, + "learning_rate": 3.267919416855165e-05, + "loss": 2.7578, "step": 104600 }, { "epoch": 0.35, - "learning_rate": 3.266757163904846e-05, - "loss": 2.7605, + "learning_rate": 3.266263508075867e-05, + "loss": 2.7699, "step": 104700 }, { "epoch": 0.35, - "learning_rate": 3.265101726621087e-05, - "loss": 2.7728, + "learning_rate": 3.26460759929657e-05, + "loss": 2.7627, "step": 104800 }, { "epoch": 0.35, - "learning_rate": 3.263446289337328e-05, - "loss": 2.7701, + "learning_rate": 3.2629516905172733e-05, + "loss": 2.7735, "step": 104900 }, { "epoch": 0.35, - "learning_rate": 3.26179085205357e-05, - "loss": 2.776, + "learning_rate": 3.261295781737976e-05, + "loss": 2.7692, "step": 105000 }, { "epoch": 0.35, - "learning_rate": 3.2601354147698114e-05, - "loss": 2.7712, + "learning_rate": 3.259639872958679e-05, + "loss": 2.7679, "step": 105100 }, { "epoch": 0.35, - "learning_rate": 3.258479977486053e-05, - "loss": 2.7716, + "learning_rate": 3.257983964179381e-05, + "loss": 2.7618, "step": 105200 }, { "epoch": 0.35, - "learning_rate": 3.2568245402022946e-05, - "loss": 2.7774, + "learning_rate": 3.256328055400084e-05, + "loss": 2.7637, "step": 105300 }, { "epoch": 0.35, - "learning_rate": 3.255169102918536e-05, - "loss": 2.785, + "learning_rate": 3.254672146620787e-05, + "loss": 2.7641, "step": 105400 }, { "epoch": 0.35, - "learning_rate": 3.253513665634778e-05, - "loss": 2.7735, + "learning_rate": 3.2530162378414895e-05, + "loss": 2.7697, "step": 105500 }, { "epoch": 0.35, - "learning_rate": 3.251858228351019e-05, - "loss": 2.7628, + "learning_rate": 3.251360329062193e-05, + "loss": 2.7751, "step": 105600 }, { "epoch": 0.35, - "learning_rate": 3.250202791067261e-05, - "loss": 2.7881, + "learning_rate": 3.2497044202828956e-05, + "loss": 2.7631, "step": 105700 }, { "epoch": 0.35, - "learning_rate": 3.248547353783502e-05, - "loss": 2.7641, + "learning_rate": 3.2480485115035986e-05, + "loss": 2.7671, "step": 105800 }, { "epoch": 0.35, - "learning_rate": 3.246891916499743e-05, - "loss": 2.7703, + "learning_rate": 3.2463926027243016e-05, + "loss": 2.7605, "step": 105900 }, { "epoch": 0.35, - "learning_rate": 3.245236479215985e-05, - "loss": 2.7634, + "learning_rate": 3.244736693945004e-05, + "loss": 2.7699, "step": 106000 }, { "epoch": 0.35, - "learning_rate": 3.2435810419322264e-05, - "loss": 2.7717, + "learning_rate": 3.243080785165707e-05, + "loss": 2.7588, "step": 106100 }, { "epoch": 0.35, - "learning_rate": 3.241925604648468e-05, - "loss": 2.7641, + "learning_rate": 3.2414248763864093e-05, + "loss": 2.7724, "step": 106200 }, { "epoch": 0.35, - "learning_rate": 3.2402701673647095e-05, - "loss": 2.7868, + "learning_rate": 3.2397689676071124e-05, + "loss": 2.7479, "step": 106300 }, { "epoch": 0.35, - "learning_rate": 3.238614730080951e-05, - "loss": 2.7773, + "learning_rate": 3.2381130588278154e-05, + "loss": 2.762, "step": 106400 }, { "epoch": 0.35, - "learning_rate": 3.2369592927971927e-05, - "loss": 2.7773, + "learning_rate": 3.236457150048518e-05, + "loss": 2.7441, "step": 106500 }, { "epoch": 0.35, - "learning_rate": 3.235303855513434e-05, - "loss": 2.7799, + "learning_rate": 3.2348012412692215e-05, + "loss": 2.7582, "step": 106600 }, { "epoch": 0.35, - "learning_rate": 3.233648418229676e-05, - "loss": 2.784, + "learning_rate": 3.233145332489924e-05, + "loss": 2.7559, "step": 106700 }, { "epoch": 0.35, - "learning_rate": 3.231992980945917e-05, - "loss": 2.7685, + "learning_rate": 3.231489423710627e-05, + "loss": 2.7591, "step": 106800 }, { "epoch": 0.35, - "learning_rate": 3.230337543662158e-05, - "loss": 2.7823, + "learning_rate": 3.22983351493133e-05, + "loss": 2.7725, "step": 106900 }, { "epoch": 0.35, - "learning_rate": 3.2286821063784e-05, - "loss": 2.7562, + "learning_rate": 3.228177606152032e-05, + "loss": 2.7694, "step": 107000 }, { "epoch": 0.35, - "learning_rate": 3.2270266690946414e-05, - "loss": 2.7664, + "learning_rate": 3.226521697372735e-05, + "loss": 2.7617, "step": 107100 }, { - "epoch": 0.35, - "learning_rate": 3.225371231810883e-05, - "loss": 2.7667, + "epoch": 0.36, + "learning_rate": 3.224865788593438e-05, + "loss": 2.7571, "step": 107200 }, { "epoch": 0.36, - "learning_rate": 3.2237157945271245e-05, - "loss": 2.7852, + "learning_rate": 3.2232098798141406e-05, + "loss": 2.7704, "step": 107300 }, { "epoch": 0.36, - "learning_rate": 3.2220603572433664e-05, - "loss": 2.7783, + "learning_rate": 3.221553971034844e-05, + "loss": 2.7718, "step": 107400 }, { "epoch": 0.36, - "learning_rate": 3.2204049199596076e-05, - "loss": 2.7797, + "learning_rate": 3.219898062255546e-05, + "loss": 2.7644, "step": 107500 }, { "epoch": 0.36, - "learning_rate": 3.218749482675849e-05, - "loss": 2.7615, + "learning_rate": 3.21824215347625e-05, + "loss": 2.7794, "step": 107600 }, { "epoch": 0.36, - "learning_rate": 3.217094045392091e-05, - "loss": 2.7653, + "learning_rate": 3.216586244696953e-05, + "loss": 2.7497, "step": 107700 }, { "epoch": 0.36, - "learning_rate": 3.215438608108332e-05, - "loss": 2.7752, + "learning_rate": 3.214930335917655e-05, + "loss": 2.7569, "step": 107800 }, { "epoch": 0.36, - "learning_rate": 3.213783170824574e-05, - "loss": 2.7706, + "learning_rate": 3.213274427138358e-05, + "loss": 2.7458, "step": 107900 }, { "epoch": 0.36, - "learning_rate": 3.212127733540815e-05, - "loss": 2.7725, + "learning_rate": 3.2116185183590605e-05, + "loss": 2.7596, "step": 108000 }, { "epoch": 0.36, - "learning_rate": 3.2104722962570563e-05, - "loss": 2.7577, + "learning_rate": 3.2099626095797635e-05, + "loss": 2.7562, "step": 108100 }, { "epoch": 0.36, - "learning_rate": 3.208816858973298e-05, - "loss": 2.7666, + "learning_rate": 3.2083067008004666e-05, + "loss": 2.76, "step": 108200 }, { "epoch": 0.36, - "learning_rate": 3.2071614216895395e-05, - "loss": 2.7644, + "learning_rate": 3.206650792021169e-05, + "loss": 2.7697, "step": 108300 }, { "epoch": 0.36, - "learning_rate": 3.2055059844057814e-05, - "loss": 2.7897, + "learning_rate": 3.204994883241872e-05, + "loss": 2.7668, "step": 108400 }, { "epoch": 0.36, - "learning_rate": 3.2038505471220226e-05, - "loss": 2.7704, + "learning_rate": 3.203338974462575e-05, + "loss": 2.7502, "step": 108500 }, { "epoch": 0.36, - "learning_rate": 3.202195109838264e-05, - "loss": 2.7812, + "learning_rate": 3.201683065683278e-05, + "loss": 2.7758, "step": 108600 }, { "epoch": 0.36, - "learning_rate": 3.200539672554506e-05, - "loss": 2.7741, + "learning_rate": 3.200027156903981e-05, + "loss": 2.7576, "step": 108700 }, { "epoch": 0.36, - "learning_rate": 3.198884235270747e-05, - "loss": 2.7787, + "learning_rate": 3.1983712481246834e-05, + "loss": 2.7622, "step": 108800 }, { "epoch": 0.36, - "learning_rate": 3.197228797986989e-05, - "loss": 2.7758, + "learning_rate": 3.1967153393453864e-05, + "loss": 2.7674, "step": 108900 }, { "epoch": 0.36, - "learning_rate": 3.19557336070323e-05, - "loss": 2.773, + "learning_rate": 3.1950594305660894e-05, + "loss": 2.775, "step": 109000 }, { "epoch": 0.36, - "learning_rate": 3.193917923419471e-05, - "loss": 2.7714, + "learning_rate": 3.193403521786792e-05, + "loss": 2.7526, "step": 109100 }, { "epoch": 0.36, - "learning_rate": 3.192262486135713e-05, - "loss": 2.7783, + "learning_rate": 3.191747613007495e-05, + "loss": 2.7595, "step": 109200 }, { "epoch": 0.36, - "learning_rate": 3.1906070488519544e-05, - "loss": 2.7797, + "learning_rate": 3.190091704228197e-05, + "loss": 2.7526, "step": 109300 }, { "epoch": 0.36, - "learning_rate": 3.1889516115681963e-05, - "loss": 2.775, + "learning_rate": 3.1884357954489e-05, + "loss": 2.758, "step": 109400 }, { "epoch": 0.36, - "learning_rate": 3.1872961742844376e-05, - "loss": 2.7765, + "learning_rate": 3.186779886669603e-05, + "loss": 2.7494, "step": 109500 }, { "epoch": 0.36, - "learning_rate": 3.185640737000679e-05, - "loss": 2.7783, + "learning_rate": 3.185123977890306e-05, + "loss": 2.7511, "step": 109600 }, { "epoch": 0.36, - "learning_rate": 3.183985299716921e-05, - "loss": 2.7854, + "learning_rate": 3.183468069111009e-05, + "loss": 2.7453, "step": 109700 }, { "epoch": 0.36, - "learning_rate": 3.182329862433162e-05, - "loss": 2.7791, + "learning_rate": 3.1818121603317116e-05, + "loss": 2.7605, "step": 109800 }, { "epoch": 0.36, - "learning_rate": 3.180674425149404e-05, - "loss": 2.7679, + "learning_rate": 3.180156251552415e-05, + "loss": 2.7455, "step": 109900 }, { "epoch": 0.36, - "learning_rate": 3.179018987865645e-05, - "loss": 2.7684, + "learning_rate": 3.178500342773118e-05, + "loss": 2.7523, "step": 110000 }, { "epoch": 0.36, - "learning_rate": 3.177363550581886e-05, - "loss": 2.7811, + "learning_rate": 3.17684443399382e-05, + "loss": 2.7579, "step": 110100 }, { "epoch": 0.36, - "learning_rate": 3.175708113298128e-05, - "loss": 2.7736, + "learning_rate": 3.175188525214523e-05, + "loss": 2.767, "step": 110200 }, { "epoch": 0.37, - "learning_rate": 3.1740526760143694e-05, - "loss": 2.7635, + "learning_rate": 3.173532616435226e-05, + "loss": 2.7535, "step": 110300 }, { "epoch": 0.37, - "learning_rate": 3.1723972387306106e-05, - "loss": 2.7828, + "learning_rate": 3.1718767076559285e-05, + "loss": 2.7637, "step": 110400 }, { "epoch": 0.37, - "learning_rate": 3.170741801446852e-05, - "loss": 2.7673, + "learning_rate": 3.1702207988766315e-05, + "loss": 2.7577, "step": 110500 }, { "epoch": 0.37, - "learning_rate": 3.169086364163094e-05, - "loss": 2.7755, + "learning_rate": 3.1685648900973345e-05, + "loss": 2.7741, "step": 110600 }, { "epoch": 0.37, - "learning_rate": 3.167430926879335e-05, - "loss": 2.7807, + "learning_rate": 3.1669089813180376e-05, + "loss": 2.7747, "step": 110700 }, { "epoch": 0.37, - "learning_rate": 3.165775489595577e-05, - "loss": 2.7714, + "learning_rate": 3.1652530725387406e-05, + "loss": 2.7647, "step": 110800 }, { "epoch": 0.37, - "learning_rate": 3.164120052311818e-05, - "loss": 2.778, + "learning_rate": 3.163597163759443e-05, + "loss": 2.7625, "step": 110900 }, { "epoch": 0.37, - "learning_rate": 3.1624646150280594e-05, - "loss": 2.7727, + "learning_rate": 3.161941254980146e-05, + "loss": 2.7657, "step": 111000 }, { "epoch": 0.37, - "learning_rate": 3.160809177744301e-05, - "loss": 2.7663, + "learning_rate": 3.160285346200848e-05, + "loss": 2.7449, "step": 111100 }, { "epoch": 0.37, - "learning_rate": 3.1591537404605425e-05, - "loss": 2.7816, + "learning_rate": 3.1586294374215514e-05, + "loss": 2.7622, "step": 111200 }, { "epoch": 0.37, - "learning_rate": 3.1574983031767844e-05, - "loss": 2.7668, + "learning_rate": 3.1569735286422544e-05, + "loss": 2.7657, "step": 111300 }, { "epoch": 0.37, - "learning_rate": 3.1558428658930256e-05, - "loss": 2.776, + "learning_rate": 3.155317619862957e-05, + "loss": 2.759, "step": 111400 }, { "epoch": 0.37, - "learning_rate": 3.154187428609267e-05, - "loss": 2.7728, + "learning_rate": 3.15366171108366e-05, + "loss": 2.7602, "step": 111500 }, { "epoch": 0.37, - "learning_rate": 3.152531991325509e-05, - "loss": 2.7592, + "learning_rate": 3.152005802304363e-05, + "loss": 2.7635, "step": 111600 }, { "epoch": 0.37, - "learning_rate": 3.15087655404175e-05, - "loss": 2.7637, + "learning_rate": 3.150349893525066e-05, + "loss": 2.7549, "step": 111700 }, { "epoch": 0.37, - "learning_rate": 3.149221116757992e-05, - "loss": 2.7674, + "learning_rate": 3.148693984745769e-05, + "loss": 2.7618, "step": 111800 }, { "epoch": 0.37, - "learning_rate": 3.147565679474233e-05, - "loss": 2.7669, + "learning_rate": 3.147038075966471e-05, + "loss": 2.7703, "step": 111900 }, { "epoch": 0.37, - "learning_rate": 3.145910242190474e-05, - "loss": 2.7729, + "learning_rate": 3.145382167187174e-05, + "loss": 2.7709, "step": 112000 }, { "epoch": 0.37, - "learning_rate": 3.144254804906716e-05, - "loss": 2.7795, + "learning_rate": 3.143726258407877e-05, + "loss": 2.7669, "step": 112100 }, { "epoch": 0.37, - "learning_rate": 3.1425993676229575e-05, - "loss": 2.7743, + "learning_rate": 3.1420703496285796e-05, + "loss": 2.7517, "step": 112200 }, { "epoch": 0.37, - "learning_rate": 3.1409439303391994e-05, - "loss": 2.7723, + "learning_rate": 3.1404144408492827e-05, + "loss": 2.7328, "step": 112300 }, { "epoch": 0.37, - "learning_rate": 3.1392884930554406e-05, - "loss": 2.7614, + "learning_rate": 3.138758532069985e-05, + "loss": 2.7516, "step": 112400 }, { "epoch": 0.37, - "learning_rate": 3.137633055771682e-05, - "loss": 2.77, + "learning_rate": 3.137102623290688e-05, + "loss": 2.759, "step": 112500 }, { "epoch": 0.37, - "learning_rate": 3.135977618487924e-05, - "loss": 2.7759, + "learning_rate": 3.135446714511391e-05, + "loss": 2.7515, "step": 112600 }, { "epoch": 0.37, - "learning_rate": 3.134322181204165e-05, - "loss": 2.7656, + "learning_rate": 3.133790805732094e-05, + "loss": 2.7585, "step": 112700 }, { "epoch": 0.37, - "learning_rate": 3.132666743920407e-05, - "loss": 2.7758, + "learning_rate": 3.132134896952797e-05, + "loss": 2.7767, "step": 112800 }, { "epoch": 0.37, - "learning_rate": 3.131011306636648e-05, - "loss": 2.7651, + "learning_rate": 3.1304789881734995e-05, + "loss": 2.7467, "step": 112900 }, { "epoch": 0.37, - "learning_rate": 3.129355869352889e-05, - "loss": 2.7618, + "learning_rate": 3.1288230793942025e-05, + "loss": 2.7571, "step": 113000 }, { "epoch": 0.37, - "learning_rate": 3.127700432069131e-05, - "loss": 2.7754, + "learning_rate": 3.1271671706149055e-05, + "loss": 2.7672, "step": 113100 }, { "epoch": 0.37, - "learning_rate": 3.1260449947853724e-05, - "loss": 2.7729, + "learning_rate": 3.125511261835608e-05, + "loss": 2.7683, "step": 113200 }, { "epoch": 0.38, - "learning_rate": 3.124389557501614e-05, - "loss": 2.773, + "learning_rate": 3.123855353056311e-05, + "loss": 2.7675, "step": 113300 }, { "epoch": 0.38, - "learning_rate": 3.1227341202178556e-05, - "loss": 2.7728, + "learning_rate": 3.122199444277013e-05, + "loss": 2.7559, "step": 113400 }, { "epoch": 0.38, - "learning_rate": 3.121078682934097e-05, - "loss": 2.7802, + "learning_rate": 3.120543535497716e-05, + "loss": 2.7681, "step": 113500 }, { "epoch": 0.38, - "learning_rate": 3.119423245650339e-05, - "loss": 2.7692, + "learning_rate": 3.11888762671842e-05, + "loss": 2.7606, "step": 113600 }, { "epoch": 0.38, - "learning_rate": 3.11776780836658e-05, - "loss": 2.7749, + "learning_rate": 3.1172317179391224e-05, + "loss": 2.7505, "step": 113700 }, { "epoch": 0.38, - "learning_rate": 3.116112371082822e-05, - "loss": 2.7893, + "learning_rate": 3.1155758091598254e-05, + "loss": 2.7445, "step": 113800 }, { "epoch": 0.38, - "learning_rate": 3.114456933799063e-05, - "loss": 2.7856, + "learning_rate": 3.113919900380528e-05, + "loss": 2.755, "step": 113900 }, { "epoch": 0.38, - "learning_rate": 3.112801496515305e-05, - "loss": 2.7725, + "learning_rate": 3.112263991601231e-05, + "loss": 2.7527, "step": 114000 }, { "epoch": 0.38, - "learning_rate": 3.111146059231546e-05, - "loss": 2.7749, + "learning_rate": 3.110608082821934e-05, + "loss": 2.7576, "step": 114100 }, { "epoch": 0.38, - "learning_rate": 3.1094906219477874e-05, - "loss": 2.7739, + "learning_rate": 3.108952174042636e-05, + "loss": 2.7761, "step": 114200 }, { "epoch": 0.38, - "learning_rate": 3.107835184664029e-05, - "loss": 2.7755, + "learning_rate": 3.107296265263339e-05, + "loss": 2.7507, "step": 114300 }, { "epoch": 0.38, - "learning_rate": 3.1061797473802705e-05, - "loss": 2.7738, + "learning_rate": 3.105640356484042e-05, + "loss": 2.7524, "step": 114400 }, { "epoch": 0.38, - "learning_rate": 3.1045243100965124e-05, - "loss": 2.7726, + "learning_rate": 3.1039844477047446e-05, + "loss": 2.7524, "step": 114500 }, { "epoch": 0.38, - "learning_rate": 3.1028688728127537e-05, - "loss": 2.7723, + "learning_rate": 3.102328538925448e-05, + "loss": 2.7679, "step": 114600 }, { "epoch": 0.38, - "learning_rate": 3.101213435528995e-05, - "loss": 2.7785, + "learning_rate": 3.1006726301461506e-05, + "loss": 2.7491, "step": 114700 }, { "epoch": 0.38, - "learning_rate": 3.099557998245237e-05, - "loss": 2.7531, + "learning_rate": 3.0990167213668537e-05, + "loss": 2.766, "step": 114800 }, { "epoch": 0.38, - "learning_rate": 3.097902560961478e-05, - "loss": 2.7686, + "learning_rate": 3.097360812587557e-05, + "loss": 2.7598, "step": 114900 }, { "epoch": 0.38, - "learning_rate": 3.09624712367772e-05, - "loss": 2.7673, + "learning_rate": 3.095704903808259e-05, + "loss": 2.7573, "step": 115000 }, { "epoch": 0.38, - "learning_rate": 3.094591686393961e-05, - "loss": 2.7584, + "learning_rate": 3.094048995028962e-05, + "loss": 2.772, "step": 115100 }, { "epoch": 0.38, - "learning_rate": 3.0929362491102024e-05, - "loss": 2.763, + "learning_rate": 3.0923930862496644e-05, + "loss": 2.7522, "step": 115200 }, { "epoch": 0.38, - "learning_rate": 3.091280811826444e-05, - "loss": 2.7553, + "learning_rate": 3.0907371774703675e-05, + "loss": 2.7459, "step": 115300 }, { "epoch": 0.38, - "learning_rate": 3.0896253745426855e-05, - "loss": 2.7735, + "learning_rate": 3.0890812686910705e-05, + "loss": 2.7562, "step": 115400 }, { "epoch": 0.38, - "learning_rate": 3.0879699372589274e-05, - "loss": 2.7773, + "learning_rate": 3.087425359911773e-05, + "loss": 2.7545, "step": 115500 }, { "epoch": 0.38, - "learning_rate": 3.0863144999751686e-05, - "loss": 2.7626, + "learning_rate": 3.0857694511324765e-05, + "loss": 2.7687, "step": 115600 }, { "epoch": 0.38, - "learning_rate": 3.08465906269141e-05, - "loss": 2.7775, + "learning_rate": 3.084113542353179e-05, + "loss": 2.7686, "step": 115700 }, { "epoch": 0.38, - "learning_rate": 3.083003625407652e-05, - "loss": 2.7956, + "learning_rate": 3.082457633573882e-05, + "loss": 2.7615, "step": 115800 }, { "epoch": 0.38, - "learning_rate": 3.081348188123893e-05, - "loss": 2.781, + "learning_rate": 3.080801724794585e-05, + "loss": 2.7742, "step": 115900 }, { "epoch": 0.38, - "learning_rate": 3.079692750840135e-05, - "loss": 2.768, + "learning_rate": 3.079145816015287e-05, + "loss": 2.7593, "step": 116000 }, { "epoch": 0.38, - "learning_rate": 3.078037313556376e-05, - "loss": 2.7684, + "learning_rate": 3.07748990723599e-05, + "loss": 2.7572, "step": 116100 }, { "epoch": 0.38, - "learning_rate": 3.0763818762726173e-05, - "loss": 2.7666, + "learning_rate": 3.0758339984566934e-05, + "loss": 2.7566, "step": 116200 }, { "epoch": 0.39, - "learning_rate": 3.074726438988859e-05, - "loss": 2.7847, + "learning_rate": 3.074178089677396e-05, + "loss": 2.7496, "step": 116300 }, { "epoch": 0.39, - "learning_rate": 3.0730710017051005e-05, - "loss": 2.7653, + "learning_rate": 3.072522180898099e-05, + "loss": 2.767, "step": 116400 }, { "epoch": 0.39, - "learning_rate": 3.0714155644213424e-05, - "loss": 2.7693, + "learning_rate": 3.070866272118801e-05, + "loss": 2.747, "step": 116500 }, { "epoch": 0.39, - "learning_rate": 3.0697601271375836e-05, - "loss": 2.7702, + "learning_rate": 3.069210363339505e-05, + "loss": 2.7728, "step": 116600 }, { "epoch": 0.39, - "learning_rate": 3.068104689853825e-05, - "loss": 2.7828, + "learning_rate": 3.067554454560208e-05, + "loss": 2.7727, "step": 116700 }, { "epoch": 0.39, - "learning_rate": 3.066449252570067e-05, - "loss": 2.7628, + "learning_rate": 3.06589854578091e-05, + "loss": 2.749, "step": 116800 }, { "epoch": 0.39, - "learning_rate": 3.064793815286308e-05, - "loss": 2.7722, + "learning_rate": 3.064242637001613e-05, + "loss": 2.7447, "step": 116900 }, { "epoch": 0.39, - "learning_rate": 3.06313837800255e-05, - "loss": 2.7868, + "learning_rate": 3.0625867282223156e-05, + "loss": 2.7553, "step": 117000 }, { "epoch": 0.39, - "learning_rate": 3.061482940718791e-05, - "loss": 2.7793, + "learning_rate": 3.0609308194430186e-05, + "loss": 2.7591, "step": 117100 }, { "epoch": 0.39, - "learning_rate": 3.059827503435033e-05, - "loss": 2.7742, + "learning_rate": 3.0592749106637216e-05, + "loss": 2.7478, "step": 117200 }, { "epoch": 0.39, - "learning_rate": 3.058172066151274e-05, - "loss": 2.7836, + "learning_rate": 3.057619001884424e-05, + "loss": 2.7632, "step": 117300 }, { "epoch": 0.39, - "learning_rate": 3.0565166288675154e-05, - "loss": 2.7655, + "learning_rate": 3.055963093105127e-05, + "loss": 2.754, "step": 117400 }, { "epoch": 0.39, - "learning_rate": 3.0548611915837573e-05, - "loss": 2.7816, + "learning_rate": 3.05430718432583e-05, + "loss": 2.7479, "step": 117500 }, { "epoch": 0.39, - "learning_rate": 3.0532057542999986e-05, - "loss": 2.7872, + "learning_rate": 3.052651275546533e-05, + "loss": 2.7646, "step": 117600 }, { "epoch": 0.39, - "learning_rate": 3.05155031701624e-05, - "loss": 2.7787, + "learning_rate": 3.0509953667672358e-05, + "loss": 2.7489, "step": 117700 }, { "epoch": 0.39, - "learning_rate": 3.0498948797324817e-05, - "loss": 2.7825, + "learning_rate": 3.0493394579879388e-05, + "loss": 2.7734, "step": 117800 }, { "epoch": 0.39, - "learning_rate": 3.0482394424487233e-05, - "loss": 2.7755, + "learning_rate": 3.0476835492086415e-05, + "loss": 2.7647, "step": 117900 }, { "epoch": 0.39, - "learning_rate": 3.046584005164965e-05, - "loss": 2.7712, + "learning_rate": 3.0460276404293442e-05, + "loss": 2.7644, "step": 118000 }, { "epoch": 0.39, - "learning_rate": 3.044928567881206e-05, - "loss": 2.7638, + "learning_rate": 3.044371731650047e-05, + "loss": 2.7662, "step": 118100 }, { "epoch": 0.39, - "learning_rate": 3.0432731305974476e-05, - "loss": 2.757, + "learning_rate": 3.04271582287075e-05, + "loss": 2.7603, "step": 118200 }, { "epoch": 0.39, - "learning_rate": 3.0416176933136892e-05, - "loss": 2.7497, + "learning_rate": 3.0410599140914526e-05, + "loss": 2.7531, "step": 118300 }, { "epoch": 0.39, - "learning_rate": 3.0399622560299308e-05, - "loss": 2.7538, + "learning_rate": 3.0394040053121553e-05, + "loss": 2.7671, "step": 118400 }, { "epoch": 0.39, - "learning_rate": 3.0383068187461723e-05, - "loss": 2.7773, + "learning_rate": 3.037748096532858e-05, + "loss": 2.7601, "step": 118500 }, { "epoch": 0.39, - "learning_rate": 3.0366513814624135e-05, - "loss": 2.7749, + "learning_rate": 3.0360921877535613e-05, + "loss": 2.758, "step": 118600 }, { "epoch": 0.39, - "learning_rate": 3.034995944178655e-05, - "loss": 2.7831, + "learning_rate": 3.034436278974264e-05, + "loss": 2.7721, "step": 118700 }, { "epoch": 0.39, - "learning_rate": 3.0333405068948967e-05, - "loss": 2.7634, + "learning_rate": 3.032780370194967e-05, + "loss": 2.7658, "step": 118800 }, { "epoch": 0.39, - "learning_rate": 3.0316850696111382e-05, - "loss": 2.7701, + "learning_rate": 3.0311244614156698e-05, + "loss": 2.7738, "step": 118900 }, { "epoch": 0.39, - "learning_rate": 3.0300296323273798e-05, - "loss": 2.772, + "learning_rate": 3.0294685526363724e-05, + "loss": 2.7705, "step": 119000 }, { "epoch": 0.39, - "learning_rate": 3.028374195043621e-05, - "loss": 2.7691, + "learning_rate": 3.027812643857075e-05, + "loss": 2.755, "step": 119100 }, { "epoch": 0.39, - "learning_rate": 3.0267187577598626e-05, - "loss": 2.7587, + "learning_rate": 3.026156735077778e-05, + "loss": 2.7609, "step": 119200 }, { - "epoch": 0.39, - "learning_rate": 3.0250633204761035e-05, - "loss": 2.7718, + "epoch": 0.4, + "learning_rate": 3.024500826298481e-05, + "loss": 2.7629, "step": 119300 }, { "epoch": 0.4, - "learning_rate": 3.023407883192345e-05, - "loss": 2.777, + "learning_rate": 3.0228449175191835e-05, + "loss": 2.7596, "step": 119400 }, { "epoch": 0.4, - "learning_rate": 3.0217524459085866e-05, - "loss": 2.773, + "learning_rate": 3.0211890087398866e-05, + "loss": 2.7434, "step": 119500 }, { "epoch": 0.4, - "learning_rate": 3.0200970086248282e-05, - "loss": 2.7589, + "learning_rate": 3.0195330999605896e-05, + "loss": 2.7797, "step": 119600 }, { "epoch": 0.4, - "learning_rate": 3.0184415713410697e-05, - "loss": 2.7534, + "learning_rate": 3.0178771911812926e-05, + "loss": 2.7545, "step": 119700 }, { "epoch": 0.4, - "learning_rate": 3.0167861340573113e-05, - "loss": 2.7665, + "learning_rate": 3.0162212824019953e-05, + "loss": 2.7634, "step": 119800 }, { "epoch": 0.4, - "learning_rate": 3.0151306967735525e-05, - "loss": 2.782, + "learning_rate": 3.014565373622698e-05, + "loss": 2.7587, "step": 119900 }, { "epoch": 0.4, - "learning_rate": 3.013475259489794e-05, - "loss": 2.774, + "learning_rate": 3.0129094648434007e-05, + "loss": 2.7685, "step": 120000 }, { "epoch": 0.4, - "learning_rate": 3.0118198222060357e-05, - "loss": 2.7819, + "learning_rate": 3.0112535560641037e-05, + "loss": 2.7517, "step": 120100 }, { "epoch": 0.4, - "learning_rate": 3.0101643849222772e-05, - "loss": 2.7786, + "learning_rate": 3.0095976472848064e-05, + "loss": 2.7565, "step": 120200 }, { "epoch": 0.4, - "learning_rate": 3.0085089476385188e-05, - "loss": 2.7795, + "learning_rate": 3.007941738505509e-05, + "loss": 2.7566, "step": 120300 }, { "epoch": 0.4, - "learning_rate": 3.00685351035476e-05, - "loss": 2.7588, + "learning_rate": 3.0062858297262118e-05, + "loss": 2.7513, "step": 120400 }, { "epoch": 0.4, - "learning_rate": 3.0051980730710016e-05, - "loss": 2.7794, + "learning_rate": 3.004629920946915e-05, + "loss": 2.7686, "step": 120500 }, { "epoch": 0.4, - "learning_rate": 3.003542635787243e-05, - "loss": 2.7647, + "learning_rate": 3.0029740121676182e-05, + "loss": 2.7608, "step": 120600 }, { "epoch": 0.4, - "learning_rate": 3.0018871985034847e-05, - "loss": 2.7707, + "learning_rate": 3.001318103388321e-05, + "loss": 2.7566, "step": 120700 }, { "epoch": 0.4, - "learning_rate": 3.0002317612197263e-05, - "loss": 2.7733, + "learning_rate": 2.9996621946090236e-05, + "loss": 2.7598, "step": 120800 }, { "epoch": 0.4, - "learning_rate": 2.9985763239359675e-05, - "loss": 2.7605, + "learning_rate": 2.9980062858297263e-05, + "loss": 2.7567, "step": 120900 }, { "epoch": 0.4, - "learning_rate": 2.996920886652209e-05, - "loss": 2.7599, + "learning_rate": 2.9963503770504293e-05, + "loss": 2.7671, "step": 121000 }, { "epoch": 0.4, - "learning_rate": 2.9952654493684506e-05, - "loss": 2.7687, + "learning_rate": 2.994694468271132e-05, + "loss": 2.7633, "step": 121100 }, { "epoch": 0.4, - "learning_rate": 2.9936100120846922e-05, - "loss": 2.7717, + "learning_rate": 2.9930385594918347e-05, + "loss": 2.7682, "step": 121200 }, { "epoch": 0.4, - "learning_rate": 2.9919545748009338e-05, - "loss": 2.7611, + "learning_rate": 2.9913826507125374e-05, + "loss": 2.7629, "step": 121300 }, { "epoch": 0.4, - "learning_rate": 2.9902991375171753e-05, - "loss": 2.7734, + "learning_rate": 2.9897267419332404e-05, + "loss": 2.7464, "step": 121400 }, { "epoch": 0.4, - "learning_rate": 2.9886437002334166e-05, - "loss": 2.7806, + "learning_rate": 2.988070833153943e-05, + "loss": 2.7524, "step": 121500 }, { "epoch": 0.4, - "learning_rate": 2.986988262949658e-05, - "loss": 2.7623, + "learning_rate": 2.9864149243746465e-05, + "loss": 2.7651, "step": 121600 }, { "epoch": 0.4, - "learning_rate": 2.9853328256658997e-05, - "loss": 2.7714, + "learning_rate": 2.9847590155953492e-05, + "loss": 2.7569, "step": 121700 }, { "epoch": 0.4, - "learning_rate": 2.9836773883821412e-05, - "loss": 2.7525, + "learning_rate": 2.983103106816052e-05, + "loss": 2.7521, "step": 121800 }, { "epoch": 0.4, - "learning_rate": 2.9820219510983828e-05, - "loss": 2.7666, + "learning_rate": 2.981447198036755e-05, + "loss": 2.7674, "step": 121900 }, { "epoch": 0.4, - "learning_rate": 2.980366513814624e-05, - "loss": 2.7632, + "learning_rate": 2.9797912892574576e-05, + "loss": 2.7561, "step": 122000 }, { "epoch": 0.4, - "learning_rate": 2.9787110765308656e-05, - "loss": 2.7677, + "learning_rate": 2.9781353804781603e-05, + "loss": 2.7656, "step": 122100 }, { "epoch": 0.4, - "learning_rate": 2.9770556392471072e-05, - "loss": 2.7676, + "learning_rate": 2.976479471698863e-05, + "loss": 2.7588, "step": 122200 }, { - "epoch": 0.4, - "learning_rate": 2.9754002019633487e-05, - "loss": 2.7662, + "epoch": 0.41, + "learning_rate": 2.974823562919566e-05, + "loss": 2.7578, "step": 122300 }, { "epoch": 0.41, - "learning_rate": 2.9737447646795903e-05, - "loss": 2.7562, + "learning_rate": 2.9731676541402687e-05, + "loss": 2.7505, "step": 122400 }, { "epoch": 0.41, - "learning_rate": 2.9720893273958315e-05, - "loss": 2.7635, + "learning_rate": 2.9715117453609714e-05, + "loss": 2.769, "step": 122500 }, { "epoch": 0.41, - "learning_rate": 2.970433890112073e-05, - "loss": 2.7863, + "learning_rate": 2.9698558365816747e-05, + "loss": 2.7546, "step": 122600 }, { "epoch": 0.41, - "learning_rate": 2.9687784528283147e-05, - "loss": 2.77, + "learning_rate": 2.9681999278023774e-05, + "loss": 2.7474, "step": 122700 }, { "epoch": 0.41, - "learning_rate": 2.9671230155445562e-05, - "loss": 2.7743, + "learning_rate": 2.9665440190230805e-05, + "loss": 2.7522, "step": 122800 }, { "epoch": 0.41, - "learning_rate": 2.9654675782607978e-05, - "loss": 2.7752, + "learning_rate": 2.964888110243783e-05, + "loss": 2.7692, "step": 122900 }, { "epoch": 0.41, - "learning_rate": 2.9638121409770393e-05, - "loss": 2.7648, + "learning_rate": 2.963232201464486e-05, + "loss": 2.7625, "step": 123000 }, { "epoch": 0.41, - "learning_rate": 2.9621567036932806e-05, - "loss": 2.7812, + "learning_rate": 2.9615762926851885e-05, + "loss": 2.7502, "step": 123100 }, { "epoch": 0.41, - "learning_rate": 2.960501266409522e-05, - "loss": 2.778, + "learning_rate": 2.9599203839058916e-05, + "loss": 2.7606, "step": 123200 }, { "epoch": 0.41, - "learning_rate": 2.9588458291257637e-05, - "loss": 2.7529, + "learning_rate": 2.9582644751265943e-05, + "loss": 2.7565, "step": 123300 }, { "epoch": 0.41, - "learning_rate": 2.9571903918420053e-05, - "loss": 2.765, + "learning_rate": 2.956608566347297e-05, + "loss": 2.7703, "step": 123400 }, { "epoch": 0.41, - "learning_rate": 2.955534954558247e-05, - "loss": 2.7577, + "learning_rate": 2.9549526575679996e-05, + "loss": 2.7646, "step": 123500 }, { "epoch": 0.41, - "learning_rate": 2.953879517274488e-05, - "loss": 2.7818, + "learning_rate": 2.953296748788703e-05, + "loss": 2.7451, "step": 123600 }, { "epoch": 0.41, - "learning_rate": 2.9522240799907296e-05, - "loss": 2.7638, + "learning_rate": 2.951640840009406e-05, + "loss": 2.7623, "step": 123700 }, { "epoch": 0.41, - "learning_rate": 2.9505686427069712e-05, - "loss": 2.7702, + "learning_rate": 2.9499849312301087e-05, + "loss": 2.7494, "step": 123800 }, { "epoch": 0.41, - "learning_rate": 2.9489132054232128e-05, - "loss": 2.7717, + "learning_rate": 2.9483290224508114e-05, + "loss": 2.7483, "step": 123900 }, { "epoch": 0.41, - "learning_rate": 2.9472577681394543e-05, - "loss": 2.7722, + "learning_rate": 2.946673113671514e-05, + "loss": 2.7581, "step": 124000 }, { "epoch": 0.41, - "learning_rate": 2.9456023308556955e-05, - "loss": 2.7712, + "learning_rate": 2.945017204892217e-05, + "loss": 2.7801, "step": 124100 }, { "epoch": 0.41, - "learning_rate": 2.943946893571937e-05, - "loss": 2.7712, + "learning_rate": 2.94336129611292e-05, + "loss": 2.76, "step": 124200 }, { "epoch": 0.41, - "learning_rate": 2.9422914562881787e-05, - "loss": 2.7813, + "learning_rate": 2.9417053873336225e-05, + "loss": 2.7638, "step": 124300 }, { "epoch": 0.41, - "learning_rate": 2.9406360190044202e-05, - "loss": 2.7765, + "learning_rate": 2.9400494785543252e-05, + "loss": 2.7367, "step": 124400 }, { "epoch": 0.41, - "learning_rate": 2.9389805817206618e-05, - "loss": 2.7585, + "learning_rate": 2.9383935697750283e-05, + "loss": 2.7502, "step": 124500 }, { "epoch": 0.41, - "learning_rate": 2.9373251444369034e-05, - "loss": 2.7706, + "learning_rate": 2.9367376609957313e-05, + "loss": 2.7542, "step": 124600 }, { "epoch": 0.41, - "learning_rate": 2.9356697071531446e-05, - "loss": 2.761, + "learning_rate": 2.9350817522164343e-05, + "loss": 2.7528, "step": 124700 }, { "epoch": 0.41, - "learning_rate": 2.934014269869386e-05, - "loss": 2.7687, + "learning_rate": 2.933425843437137e-05, + "loss": 2.7583, "step": 124800 }, { "epoch": 0.41, - "learning_rate": 2.9323588325856277e-05, - "loss": 2.7836, + "learning_rate": 2.9317699346578397e-05, + "loss": 2.7601, "step": 124900 }, { "epoch": 0.41, - "learning_rate": 2.9307033953018693e-05, - "loss": 2.7614, + "learning_rate": 2.9301140258785427e-05, + "loss": 2.7696, "step": 125000 }, { "epoch": 0.41, - "learning_rate": 2.929047958018111e-05, - "loss": 2.7665, + "learning_rate": 2.9284581170992454e-05, + "loss": 2.7524, "step": 125100 }, { "epoch": 0.41, - "learning_rate": 2.927392520734352e-05, - "loss": 2.7617, + "learning_rate": 2.926802208319948e-05, + "loss": 2.7628, "step": 125200 }, { "epoch": 0.41, - "learning_rate": 2.9257370834505936e-05, - "loss": 2.7705, + "learning_rate": 2.9251462995406508e-05, + "loss": 2.7501, "step": 125300 }, { "epoch": 0.42, - "learning_rate": 2.9240816461668352e-05, - "loss": 2.774, + "learning_rate": 2.9234903907613538e-05, + "loss": 2.7626, "step": 125400 }, { "epoch": 0.42, - "learning_rate": 2.9224262088830768e-05, - "loss": 2.7703, + "learning_rate": 2.9218344819820565e-05, + "loss": 2.7555, "step": 125500 }, { "epoch": 0.42, - "learning_rate": 2.9207707715993183e-05, - "loss": 2.7724, + "learning_rate": 2.92017857320276e-05, + "loss": 2.7382, "step": 125600 }, { "epoch": 0.42, - "learning_rate": 2.9191153343155596e-05, - "loss": 2.7609, + "learning_rate": 2.9185226644234626e-05, + "loss": 2.7408, "step": 125700 }, { "epoch": 0.42, - "learning_rate": 2.917459897031801e-05, - "loss": 2.7766, + "learning_rate": 2.9168667556441653e-05, + "loss": 2.7702, "step": 125800 }, { "epoch": 0.42, - "learning_rate": 2.9158044597480427e-05, - "loss": 2.7641, + "learning_rate": 2.915210846864868e-05, + "loss": 2.744, "step": 125900 }, { "epoch": 0.42, - "learning_rate": 2.9141490224642843e-05, - "loss": 2.7824, + "learning_rate": 2.913554938085571e-05, + "loss": 2.7721, "step": 126000 }, { "epoch": 0.42, - "learning_rate": 2.9124935851805258e-05, - "loss": 2.7699, + "learning_rate": 2.9118990293062737e-05, + "loss": 2.758, "step": 126100 }, { "epoch": 0.42, - "learning_rate": 2.9108381478967674e-05, - "loss": 2.7609, + "learning_rate": 2.9102431205269764e-05, + "loss": 2.758, "step": 126200 }, { "epoch": 0.42, - "learning_rate": 2.9091827106130086e-05, - "loss": 2.7829, + "learning_rate": 2.908587211747679e-05, + "loss": 2.7522, "step": 126300 }, { "epoch": 0.42, - "learning_rate": 2.9075272733292502e-05, - "loss": 2.7655, + "learning_rate": 2.906931302968382e-05, + "loss": 2.7548, "step": 126400 }, { "epoch": 0.42, - "learning_rate": 2.9058718360454917e-05, - "loss": 2.7803, + "learning_rate": 2.9052753941890848e-05, + "loss": 2.7558, "step": 126500 }, { "epoch": 0.42, - "learning_rate": 2.9042163987617333e-05, - "loss": 2.7583, + "learning_rate": 2.903619485409788e-05, + "loss": 2.7564, "step": 126600 }, { "epoch": 0.42, - "learning_rate": 2.902560961477975e-05, - "loss": 2.7724, + "learning_rate": 2.901963576630491e-05, + "loss": 2.756, "step": 126700 }, { "epoch": 0.42, - "learning_rate": 2.900905524194216e-05, - "loss": 2.768, + "learning_rate": 2.9003076678511935e-05, + "loss": 2.7608, "step": 126800 }, { "epoch": 0.42, - "learning_rate": 2.8992500869104577e-05, - "loss": 2.7735, + "learning_rate": 2.8986517590718966e-05, + "loss": 2.7604, "step": 126900 }, { "epoch": 0.42, - "learning_rate": 2.8975946496266992e-05, - "loss": 2.7553, + "learning_rate": 2.8969958502925993e-05, + "loss": 2.7504, "step": 127000 }, { "epoch": 0.42, - "learning_rate": 2.8959392123429408e-05, - "loss": 2.7715, + "learning_rate": 2.895339941513302e-05, + "loss": 2.7661, "step": 127100 }, { "epoch": 0.42, - "learning_rate": 2.8942837750591824e-05, - "loss": 2.7681, + "learning_rate": 2.8936840327340046e-05, + "loss": 2.747, "step": 127200 }, { "epoch": 0.42, - "learning_rate": 2.8926283377754236e-05, - "loss": 2.7614, + "learning_rate": 2.8920281239547077e-05, + "loss": 2.7703, "step": 127300 }, { "epoch": 0.42, - "learning_rate": 2.890972900491665e-05, - "loss": 2.7601, + "learning_rate": 2.8903722151754104e-05, + "loss": 2.7673, "step": 127400 }, { "epoch": 0.42, - "learning_rate": 2.8893174632079067e-05, - "loss": 2.7688, + "learning_rate": 2.888716306396113e-05, + "loss": 2.7584, "step": 127500 }, { "epoch": 0.42, - "learning_rate": 2.8876620259241483e-05, - "loss": 2.761, + "learning_rate": 2.8870603976168164e-05, + "loss": 2.7669, "step": 127600 }, { "epoch": 0.42, - "learning_rate": 2.88600658864039e-05, - "loss": 2.7751, + "learning_rate": 2.885404488837519e-05, + "loss": 2.7593, "step": 127700 }, { "epoch": 0.42, - "learning_rate": 2.8843511513566314e-05, - "loss": 2.7939, + "learning_rate": 2.883748580058222e-05, + "loss": 2.7525, "step": 127800 }, { "epoch": 0.42, - "learning_rate": 2.8826957140728726e-05, - "loss": 2.7713, + "learning_rate": 2.882092671278925e-05, + "loss": 2.7673, "step": 127900 }, { "epoch": 0.42, - "learning_rate": 2.8810402767891142e-05, - "loss": 2.7717, + "learning_rate": 2.8804367624996275e-05, + "loss": 2.7647, "step": 128000 }, { "epoch": 0.42, - "learning_rate": 2.879384839505355e-05, - "loss": 2.7638, + "learning_rate": 2.8787808537203302e-05, + "loss": 2.7812, "step": 128100 }, { "epoch": 0.42, - "learning_rate": 2.8777294022215967e-05, - "loss": 2.7689, + "learning_rate": 2.8771249449410332e-05, + "loss": 2.7534, "step": 128200 }, { "epoch": 0.42, - "learning_rate": 2.8760739649378382e-05, - "loss": 2.7674, + "learning_rate": 2.875469036161736e-05, + "loss": 2.7551, "step": 128300 }, { "epoch": 0.43, - "learning_rate": 2.8744185276540798e-05, - "loss": 2.7731, + "learning_rate": 2.8738131273824386e-05, + "loss": 2.7509, "step": 128400 }, { "epoch": 0.43, - "learning_rate": 2.8727630903703214e-05, - "loss": 2.7584, + "learning_rate": 2.8721572186031413e-05, + "loss": 2.7596, "step": 128500 }, { "epoch": 0.43, - "learning_rate": 2.8711076530865626e-05, - "loss": 2.7634, + "learning_rate": 2.8705013098238447e-05, + "loss": 2.7566, "step": 128600 }, { "epoch": 0.43, - "learning_rate": 2.869452215802804e-05, - "loss": 2.7676, + "learning_rate": 2.8688454010445477e-05, + "loss": 2.7621, "step": 128700 }, { "epoch": 0.43, - "learning_rate": 2.8677967785190457e-05, - "loss": 2.7672, + "learning_rate": 2.8671894922652504e-05, + "loss": 2.7649, "step": 128800 }, { "epoch": 0.43, - "learning_rate": 2.8661413412352873e-05, - "loss": 2.7591, + "learning_rate": 2.865533583485953e-05, + "loss": 2.754, "step": 128900 }, { "epoch": 0.43, - "learning_rate": 2.864485903951529e-05, - "loss": 2.7667, + "learning_rate": 2.8638776747066558e-05, + "loss": 2.7649, "step": 129000 }, { "epoch": 0.43, - "learning_rate": 2.86283046666777e-05, - "loss": 2.7586, + "learning_rate": 2.8622217659273588e-05, + "loss": 2.7483, "step": 129100 }, { "epoch": 0.43, - "learning_rate": 2.8611750293840116e-05, - "loss": 2.7567, + "learning_rate": 2.8605658571480615e-05, + "loss": 2.7457, "step": 129200 }, { "epoch": 0.43, - "learning_rate": 2.8595195921002532e-05, - "loss": 2.7603, + "learning_rate": 2.8589099483687642e-05, + "loss": 2.7721, "step": 129300 }, { "epoch": 0.43, - "learning_rate": 2.8578641548164948e-05, - "loss": 2.7638, + "learning_rate": 2.857254039589467e-05, + "loss": 2.7535, "step": 129400 }, { "epoch": 0.43, - "learning_rate": 2.8562087175327363e-05, - "loss": 2.7663, + "learning_rate": 2.85559813081017e-05, + "loss": 2.7428, "step": 129500 }, { "epoch": 0.43, - "learning_rate": 2.854553280248978e-05, - "loss": 2.7764, + "learning_rate": 2.8539422220308733e-05, + "loss": 2.7509, "step": 129600 }, { "epoch": 0.43, - "learning_rate": 2.852897842965219e-05, - "loss": 2.7759, + "learning_rate": 2.852286313251576e-05, + "loss": 2.7598, "step": 129700 }, { "epoch": 0.43, - "learning_rate": 2.8512424056814607e-05, - "loss": 2.7692, + "learning_rate": 2.8506304044722787e-05, + "loss": 2.76, "step": 129800 }, { "epoch": 0.43, - "learning_rate": 2.8495869683977022e-05, - "loss": 2.7648, + "learning_rate": 2.8489744956929814e-05, + "loss": 2.7483, "step": 129900 }, { "epoch": 0.43, - "learning_rate": 2.8479315311139438e-05, - "loss": 2.7646, + "learning_rate": 2.8473185869136844e-05, + "loss": 2.7416, "step": 130000 }, { "epoch": 0.43, - "learning_rate": 2.8462760938301854e-05, - "loss": 2.769, + "learning_rate": 2.845662678134387e-05, + "loss": 2.7611, "step": 130100 }, { "epoch": 0.43, - "learning_rate": 2.8446206565464266e-05, - "loss": 2.7663, + "learning_rate": 2.8440067693550898e-05, + "loss": 2.7499, "step": 130200 }, { "epoch": 0.43, - "learning_rate": 2.842965219262668e-05, - "loss": 2.7672, + "learning_rate": 2.8423508605757925e-05, + "loss": 2.7544, "step": 130300 }, { "epoch": 0.43, - "learning_rate": 2.8413097819789097e-05, - "loss": 2.7752, + "learning_rate": 2.8406949517964955e-05, + "loss": 2.751, "step": 130400 }, { "epoch": 0.43, - "learning_rate": 2.8396543446951513e-05, - "loss": 2.7577, + "learning_rate": 2.8390390430171982e-05, + "loss": 2.7685, "step": 130500 }, { "epoch": 0.43, - "learning_rate": 2.837998907411393e-05, - "loss": 2.7692, + "learning_rate": 2.8373831342379016e-05, + "loss": 2.7527, "step": 130600 }, { "epoch": 0.43, - "learning_rate": 2.836343470127634e-05, - "loss": 2.7913, + "learning_rate": 2.8357272254586043e-05, + "loss": 2.738, "step": 130700 }, { "epoch": 0.43, - "learning_rate": 2.8346880328438757e-05, - "loss": 2.7795, + "learning_rate": 2.834071316679307e-05, + "loss": 2.7563, "step": 130800 }, { "epoch": 0.43, - "learning_rate": 2.8330325955601172e-05, - "loss": 2.7759, + "learning_rate": 2.83241540790001e-05, + "loss": 2.7542, "step": 130900 }, { "epoch": 0.43, - "learning_rate": 2.8313771582763588e-05, - "loss": 2.7664, + "learning_rate": 2.8307594991207127e-05, + "loss": 2.7592, "step": 131000 }, { "epoch": 0.43, - "learning_rate": 2.8297217209926003e-05, - "loss": 2.7526, + "learning_rate": 2.8291035903414154e-05, + "loss": 2.7531, "step": 131100 }, { "epoch": 0.43, - "learning_rate": 2.828066283708842e-05, - "loss": 2.7729, + "learning_rate": 2.827447681562118e-05, + "loss": 2.7603, "step": 131200 }, { "epoch": 0.43, - "learning_rate": 2.826410846425083e-05, - "loss": 2.7746, + "learning_rate": 2.825791772782821e-05, + "loss": 2.758, "step": 131300 }, { "epoch": 0.44, - "learning_rate": 2.8247554091413247e-05, - "loss": 2.7763, + "learning_rate": 2.8241358640035238e-05, + "loss": 2.7593, "step": 131400 }, { "epoch": 0.44, - "learning_rate": 2.8230999718575663e-05, - "loss": 2.7723, + "learning_rate": 2.8224799552242265e-05, + "loss": 2.7529, "step": 131500 }, { "epoch": 0.44, - "learning_rate": 2.821444534573808e-05, - "loss": 2.7716, + "learning_rate": 2.8208240464449298e-05, + "loss": 2.7574, "step": 131600 }, { "epoch": 0.44, - "learning_rate": 2.8197890972900494e-05, - "loss": 2.7718, + "learning_rate": 2.8191681376656325e-05, + "loss": 2.764, "step": 131700 }, { "epoch": 0.44, - "learning_rate": 2.8181336600062906e-05, - "loss": 2.7559, + "learning_rate": 2.8175122288863352e-05, + "loss": 2.7549, "step": 131800 }, { "epoch": 0.44, - "learning_rate": 2.8164782227225322e-05, - "loss": 2.7677, + "learning_rate": 2.8158563201070382e-05, + "loss": 2.7575, "step": 131900 }, { "epoch": 0.44, - "learning_rate": 2.8148227854387738e-05, - "loss": 2.7637, + "learning_rate": 2.814200411327741e-05, + "loss": 2.7618, "step": 132000 }, { "epoch": 0.44, - "learning_rate": 2.8131673481550153e-05, - "loss": 2.7822, + "learning_rate": 2.8125445025484436e-05, + "loss": 2.7562, "step": 132100 }, { "epoch": 0.44, - "learning_rate": 2.811511910871257e-05, - "loss": 2.7664, + "learning_rate": 2.8108885937691467e-05, + "loss": 2.7533, "step": 132200 }, { "epoch": 0.44, - "learning_rate": 2.809856473587498e-05, - "loss": 2.7551, + "learning_rate": 2.8092326849898493e-05, + "loss": 2.7619, "step": 132300 }, { "epoch": 0.44, - "learning_rate": 2.8082010363037397e-05, - "loss": 2.7682, + "learning_rate": 2.807576776210552e-05, + "loss": 2.7684, "step": 132400 }, { "epoch": 0.44, - "learning_rate": 2.8065455990199812e-05, - "loss": 2.7673, + "learning_rate": 2.8059208674312547e-05, + "loss": 2.7523, "step": 132500 }, { "epoch": 0.44, - "learning_rate": 2.8048901617362228e-05, - "loss": 2.7651, + "learning_rate": 2.804264958651958e-05, + "loss": 2.7546, "step": 132600 }, { "epoch": 0.44, - "learning_rate": 2.8032347244524644e-05, - "loss": 2.7609, + "learning_rate": 2.8026090498726608e-05, + "loss": 2.7422, "step": 132700 }, { "epoch": 0.44, - "learning_rate": 2.801579287168706e-05, - "loss": 2.7732, + "learning_rate": 2.8009531410933638e-05, + "loss": 2.739, "step": 132800 }, { "epoch": 0.44, - "learning_rate": 2.799923849884947e-05, - "loss": 2.7616, + "learning_rate": 2.7992972323140665e-05, + "loss": 2.7486, "step": 132900 }, { "epoch": 0.44, - "learning_rate": 2.7982684126011887e-05, - "loss": 2.7718, + "learning_rate": 2.7976413235347692e-05, + "loss": 2.7702, "step": 133000 }, { "epoch": 0.44, - "learning_rate": 2.7966129753174303e-05, - "loss": 2.773, + "learning_rate": 2.795985414755472e-05, + "loss": 2.757, "step": 133100 }, { "epoch": 0.44, - "learning_rate": 2.794957538033672e-05, - "loss": 2.7462, + "learning_rate": 2.794329505976175e-05, + "loss": 2.753, "step": 133200 }, { "epoch": 0.44, - "learning_rate": 2.7933021007499134e-05, - "loss": 2.7611, + "learning_rate": 2.7926735971968776e-05, + "loss": 2.7609, "step": 133300 }, { "epoch": 0.44, - "learning_rate": 2.7916466634661546e-05, - "loss": 2.7546, + "learning_rate": 2.7910176884175803e-05, + "loss": 2.7548, "step": 133400 }, { "epoch": 0.44, - "learning_rate": 2.7899912261823962e-05, - "loss": 2.7648, + "learning_rate": 2.789361779638283e-05, + "loss": 2.7507, "step": 133500 }, { "epoch": 0.44, - "learning_rate": 2.7883357888986378e-05, - "loss": 2.7588, + "learning_rate": 2.7877058708589864e-05, + "loss": 2.7667, "step": 133600 }, { "epoch": 0.44, - "learning_rate": 2.7866803516148793e-05, - "loss": 2.7626, + "learning_rate": 2.7860499620796894e-05, + "loss": 2.7481, "step": 133700 }, { "epoch": 0.44, - "learning_rate": 2.785024914331121e-05, - "loss": 2.7613, + "learning_rate": 2.784394053300392e-05, + "loss": 2.7568, "step": 133800 }, { "epoch": 0.44, - "learning_rate": 2.783369477047362e-05, - "loss": 2.7572, + "learning_rate": 2.7827381445210948e-05, + "loss": 2.7523, "step": 133900 }, { "epoch": 0.44, - "learning_rate": 2.7817140397636037e-05, - "loss": 2.7587, + "learning_rate": 2.7810822357417975e-05, + "loss": 2.7501, "step": 134000 }, { "epoch": 0.44, - "learning_rate": 2.7800586024798453e-05, - "loss": 2.7708, + "learning_rate": 2.7794263269625005e-05, + "loss": 2.7641, "step": 134100 }, { "epoch": 0.44, - "learning_rate": 2.7784031651960868e-05, - "loss": 2.7658, + "learning_rate": 2.7777704181832032e-05, + "loss": 2.7577, "step": 134200 }, { "epoch": 0.44, - "learning_rate": 2.7767477279123284e-05, - "loss": 2.771, + "learning_rate": 2.776114509403906e-05, + "loss": 2.7708, "step": 134300 }, { - "epoch": 0.44, - "learning_rate": 2.77509229062857e-05, - "loss": 2.7721, + "epoch": 0.45, + "learning_rate": 2.7744586006246086e-05, + "loss": 2.7554, "step": 134400 }, { "epoch": 0.45, - "learning_rate": 2.7734368533448112e-05, - "loss": 2.7608, + "learning_rate": 2.7728026918453116e-05, + "loss": 2.7637, "step": 134500 }, { "epoch": 0.45, - "learning_rate": 2.7717814160610527e-05, - "loss": 2.7673, + "learning_rate": 2.771146783066015e-05, + "loss": 2.7588, "step": 134600 }, { "epoch": 0.45, - "learning_rate": 2.7701259787772943e-05, - "loss": 2.7665, + "learning_rate": 2.7694908742867177e-05, + "loss": 2.7354, "step": 134700 }, { "epoch": 0.45, - "learning_rate": 2.768470541493536e-05, - "loss": 2.7544, + "learning_rate": 2.7678349655074203e-05, + "loss": 2.7412, "step": 134800 }, { "epoch": 0.45, - "learning_rate": 2.7668151042097774e-05, - "loss": 2.7729, + "learning_rate": 2.766179056728123e-05, + "loss": 2.7553, "step": 134900 }, { "epoch": 0.45, - "learning_rate": 2.7651596669260187e-05, - "loss": 2.7816, + "learning_rate": 2.764523147948826e-05, + "loss": 2.7737, "step": 135000 }, { "epoch": 0.45, - "learning_rate": 2.7635042296422602e-05, - "loss": 2.7751, + "learning_rate": 2.7628672391695288e-05, + "loss": 2.7591, "step": 135100 }, { "epoch": 0.45, - "learning_rate": 2.7618487923585018e-05, - "loss": 2.7694, + "learning_rate": 2.7612113303902315e-05, + "loss": 2.7673, "step": 135200 }, { "epoch": 0.45, - "learning_rate": 2.7601933550747434e-05, - "loss": 2.7621, + "learning_rate": 2.759555421610934e-05, + "loss": 2.7629, "step": 135300 }, { "epoch": 0.45, - "learning_rate": 2.758537917790985e-05, - "loss": 2.7717, + "learning_rate": 2.7578995128316372e-05, + "loss": 2.7573, "step": 135400 }, { "epoch": 0.45, - "learning_rate": 2.756882480507226e-05, - "loss": 2.7587, + "learning_rate": 2.75624360405234e-05, + "loss": 2.7672, "step": 135500 }, { "epoch": 0.45, - "learning_rate": 2.7552270432234677e-05, - "loss": 2.7672, + "learning_rate": 2.7545876952730432e-05, + "loss": 2.7616, "step": 135600 }, { "epoch": 0.45, - "learning_rate": 2.7535716059397093e-05, - "loss": 2.7724, + "learning_rate": 2.752931786493746e-05, + "loss": 2.7643, "step": 135700 }, { "epoch": 0.45, - "learning_rate": 2.751916168655951e-05, - "loss": 2.7722, + "learning_rate": 2.7512758777144486e-05, + "loss": 2.7573, "step": 135800 }, { "epoch": 0.45, - "learning_rate": 2.7502607313721924e-05, - "loss": 2.7614, + "learning_rate": 2.7496199689351516e-05, + "loss": 2.7671, "step": 135900 }, { "epoch": 0.45, - "learning_rate": 2.748605294088434e-05, - "loss": 2.7678, + "learning_rate": 2.7479640601558543e-05, + "loss": 2.7442, "step": 136000 }, { "epoch": 0.45, - "learning_rate": 2.7469498568046752e-05, - "loss": 2.7522, + "learning_rate": 2.746308151376557e-05, + "loss": 2.7525, "step": 136100 }, { "epoch": 0.45, - "learning_rate": 2.7452944195209168e-05, - "loss": 2.761, + "learning_rate": 2.7446522425972597e-05, + "loss": 2.7564, "step": 136200 }, { "epoch": 0.45, - "learning_rate": 2.7436389822371583e-05, - "loss": 2.7681, + "learning_rate": 2.7429963338179627e-05, + "loss": 2.7669, "step": 136300 }, { "epoch": 0.45, - "learning_rate": 2.7419835449534e-05, - "loss": 2.7675, + "learning_rate": 2.7413404250386654e-05, + "loss": 2.7549, "step": 136400 }, { "epoch": 0.45, - "learning_rate": 2.7403281076696415e-05, - "loss": 2.7725, + "learning_rate": 2.739684516259368e-05, + "loss": 2.7513, "step": 136500 }, { "epoch": 0.45, - "learning_rate": 2.7386726703858827e-05, - "loss": 2.7524, + "learning_rate": 2.7380286074800715e-05, + "loss": 2.7709, "step": 136600 }, { "epoch": 0.45, - "learning_rate": 2.7370172331021243e-05, - "loss": 2.7623, + "learning_rate": 2.7363726987007742e-05, + "loss": 2.76, "step": 136700 }, { "epoch": 0.45, - "learning_rate": 2.7353617958183658e-05, - "loss": 2.7596, + "learning_rate": 2.7347167899214772e-05, + "loss": 2.7617, "step": 136800 }, { "epoch": 0.45, - "learning_rate": 2.7337063585346067e-05, - "loss": 2.7668, + "learning_rate": 2.73306088114218e-05, + "loss": 2.7655, "step": 136900 }, { "epoch": 0.45, - "learning_rate": 2.7320509212508483e-05, - "loss": 2.7637, + "learning_rate": 2.7314049723628826e-05, + "loss": 2.758, "step": 137000 }, { "epoch": 0.45, - "learning_rate": 2.73039548396709e-05, - "loss": 2.7606, + "learning_rate": 2.7297490635835853e-05, + "loss": 2.7473, "step": 137100 }, { "epoch": 0.45, - "learning_rate": 2.7287400466833314e-05, - "loss": 2.7677, + "learning_rate": 2.7280931548042883e-05, + "loss": 2.7639, "step": 137200 }, { "epoch": 0.45, - "learning_rate": 2.7270846093995726e-05, - "loss": 2.7726, + "learning_rate": 2.726437246024991e-05, + "loss": 2.7329, "step": 137300 }, { - "epoch": 0.45, - "learning_rate": 2.7254291721158142e-05, - "loss": 2.7585, + "epoch": 0.46, + "learning_rate": 2.7247813372456937e-05, + "loss": 2.738, "step": 137400 }, { "epoch": 0.46, - "learning_rate": 2.7237737348320558e-05, - "loss": 2.7671, + "learning_rate": 2.7231254284663964e-05, + "loss": 2.7539, "step": 137500 }, { "epoch": 0.46, - "learning_rate": 2.7221182975482973e-05, - "loss": 2.7632, + "learning_rate": 2.7214695196870998e-05, + "loss": 2.7639, "step": 137600 }, { "epoch": 0.46, - "learning_rate": 2.720462860264539e-05, - "loss": 2.7683, + "learning_rate": 2.7198136109078025e-05, + "loss": 2.7594, "step": 137700 }, { "epoch": 0.46, - "learning_rate": 2.7188074229807805e-05, - "loss": 2.7597, + "learning_rate": 2.7181577021285055e-05, + "loss": 2.7552, "step": 137800 }, { "epoch": 0.46, - "learning_rate": 2.7171519856970217e-05, - "loss": 2.7624, + "learning_rate": 2.7165017933492082e-05, + "loss": 2.7571, "step": 137900 }, { "epoch": 0.46, - "learning_rate": 2.7154965484132632e-05, - "loss": 2.7487, + "learning_rate": 2.714845884569911e-05, + "loss": 2.7518, "step": 138000 }, { "epoch": 0.46, - "learning_rate": 2.7138411111295048e-05, - "loss": 2.7608, + "learning_rate": 2.713189975790614e-05, + "loss": 2.7523, "step": 138100 }, { "epoch": 0.46, - "learning_rate": 2.7121856738457464e-05, - "loss": 2.7635, + "learning_rate": 2.7115340670113166e-05, + "loss": 2.7529, "step": 138200 }, { "epoch": 0.46, - "learning_rate": 2.710530236561988e-05, - "loss": 2.7572, + "learning_rate": 2.7098781582320193e-05, + "loss": 2.7553, "step": 138300 }, { "epoch": 0.46, - "learning_rate": 2.708874799278229e-05, - "loss": 2.7688, + "learning_rate": 2.708222249452722e-05, + "loss": 2.7652, "step": 138400 }, { "epoch": 0.46, - "learning_rate": 2.7072193619944707e-05, - "loss": 2.7577, + "learning_rate": 2.706566340673425e-05, + "loss": 2.7662, "step": 138500 }, { "epoch": 0.46, - "learning_rate": 2.7055639247107123e-05, - "loss": 2.7655, + "learning_rate": 2.704910431894128e-05, + "loss": 2.7495, "step": 138600 }, { "epoch": 0.46, - "learning_rate": 2.703908487426954e-05, - "loss": 2.765, + "learning_rate": 2.703254523114831e-05, + "loss": 2.7556, "step": 138700 }, { "epoch": 0.46, - "learning_rate": 2.7022530501431954e-05, - "loss": 2.7587, + "learning_rate": 2.7015986143355338e-05, + "loss": 2.7361, "step": 138800 }, { "epoch": 0.46, - "learning_rate": 2.7005976128594366e-05, - "loss": 2.753, + "learning_rate": 2.6999427055562364e-05, + "loss": 2.7461, "step": 138900 }, { "epoch": 0.46, - "learning_rate": 2.6989421755756782e-05, - "loss": 2.7649, + "learning_rate": 2.698286796776939e-05, + "loss": 2.7653, "step": 139000 }, { "epoch": 0.46, - "learning_rate": 2.6972867382919198e-05, - "loss": 2.7736, + "learning_rate": 2.696630887997642e-05, + "loss": 2.7568, "step": 139100 }, { "epoch": 0.46, - "learning_rate": 2.6956313010081613e-05, - "loss": 2.7573, + "learning_rate": 2.694974979218345e-05, + "loss": 2.7588, "step": 139200 }, { "epoch": 0.46, - "learning_rate": 2.693975863724403e-05, - "loss": 2.7588, + "learning_rate": 2.6933190704390475e-05, + "loss": 2.758, "step": 139300 }, { "epoch": 0.46, - "learning_rate": 2.6923204264406445e-05, - "loss": 2.765, + "learning_rate": 2.6916631616597506e-05, + "loss": 2.7678, "step": 139400 }, { "epoch": 0.46, - "learning_rate": 2.6906649891568857e-05, - "loss": 2.7553, + "learning_rate": 2.6900072528804533e-05, + "loss": 2.7525, "step": 139500 }, { "epoch": 0.46, - "learning_rate": 2.6890095518731273e-05, - "loss": 2.7661, + "learning_rate": 2.6883513441011566e-05, + "loss": 2.7535, "step": 139600 }, { "epoch": 0.46, - "learning_rate": 2.6873541145893688e-05, - "loss": 2.7562, + "learning_rate": 2.6866954353218593e-05, + "loss": 2.7437, "step": 139700 }, { "epoch": 0.46, - "learning_rate": 2.6856986773056104e-05, - "loss": 2.7764, + "learning_rate": 2.685039526542562e-05, + "loss": 2.7405, "step": 139800 }, { "epoch": 0.46, - "learning_rate": 2.684043240021852e-05, - "loss": 2.7704, + "learning_rate": 2.6833836177632647e-05, + "loss": 2.7649, "step": 139900 }, { "epoch": 0.46, - "learning_rate": 2.6823878027380932e-05, - "loss": 2.7584, + "learning_rate": 2.6817277089839677e-05, + "loss": 2.7471, "step": 140000 }, { "epoch": 0.46, - "learning_rate": 2.6807323654543348e-05, - "loss": 2.7533, + "learning_rate": 2.6800718002046704e-05, + "loss": 2.7658, "step": 140100 }, { "epoch": 0.46, - "learning_rate": 2.6790769281705763e-05, - "loss": 2.7695, + "learning_rate": 2.678415891425373e-05, + "loss": 2.7554, "step": 140200 }, { "epoch": 0.46, - "learning_rate": 2.677421490886818e-05, - "loss": 2.772, + "learning_rate": 2.6767599826460758e-05, + "loss": 2.7472, "step": 140300 }, { "epoch": 0.46, - "learning_rate": 2.6757660536030594e-05, - "loss": 2.7739, + "learning_rate": 2.675104073866779e-05, + "loss": 2.7463, "step": 140400 }, { "epoch": 0.47, - "learning_rate": 2.6741106163193007e-05, - "loss": 2.769, + "learning_rate": 2.6734481650874815e-05, + "loss": 2.7655, "step": 140500 }, { "epoch": 0.47, - "learning_rate": 2.6724551790355422e-05, - "loss": 2.7648, + "learning_rate": 2.671792256308185e-05, + "loss": 2.754, "step": 140600 }, { "epoch": 0.47, - "learning_rate": 2.6707997417517838e-05, - "loss": 2.7579, + "learning_rate": 2.6701363475288876e-05, + "loss": 2.7632, "step": 140700 }, { "epoch": 0.47, - "learning_rate": 2.6691443044680254e-05, - "loss": 2.76, + "learning_rate": 2.6684804387495903e-05, + "loss": 2.7481, "step": 140800 }, { "epoch": 0.47, - "learning_rate": 2.667488867184267e-05, - "loss": 2.7589, + "learning_rate": 2.6668245299702933e-05, + "loss": 2.7548, "step": 140900 }, { "epoch": 0.47, - "learning_rate": 2.6658334299005085e-05, - "loss": 2.759, + "learning_rate": 2.665168621190996e-05, + "loss": 2.7708, "step": 141000 }, { "epoch": 0.47, - "learning_rate": 2.6641779926167497e-05, - "loss": 2.7644, + "learning_rate": 2.6635127124116987e-05, + "loss": 2.7772, "step": 141100 }, { "epoch": 0.47, - "learning_rate": 2.6625225553329913e-05, - "loss": 2.7615, + "learning_rate": 2.6618568036324014e-05, + "loss": 2.7578, "step": 141200 }, { "epoch": 0.47, - "learning_rate": 2.660867118049233e-05, - "loss": 2.7783, + "learning_rate": 2.6602008948531044e-05, + "loss": 2.7649, "step": 141300 }, { "epoch": 0.47, - "learning_rate": 2.6592116807654744e-05, - "loss": 2.7634, + "learning_rate": 2.658544986073807e-05, + "loss": 2.768, "step": 141400 }, { "epoch": 0.47, - "learning_rate": 2.657556243481716e-05, - "loss": 2.7676, + "learning_rate": 2.6568890772945098e-05, + "loss": 2.7488, "step": 141500 }, { "epoch": 0.47, - "learning_rate": 2.6559008061979572e-05, - "loss": 2.7678, + "learning_rate": 2.6552331685152132e-05, + "loss": 2.7581, "step": 141600 }, { "epoch": 0.47, - "learning_rate": 2.6542453689141988e-05, - "loss": 2.7621, + "learning_rate": 2.653577259735916e-05, + "loss": 2.7477, "step": 141700 }, { "epoch": 0.47, - "learning_rate": 2.6525899316304403e-05, - "loss": 2.7751, + "learning_rate": 2.651921350956619e-05, + "loss": 2.7524, "step": 141800 }, { "epoch": 0.47, - "learning_rate": 2.650934494346682e-05, - "loss": 2.7547, + "learning_rate": 2.6502654421773216e-05, + "loss": 2.7611, "step": 141900 }, { "epoch": 0.47, - "learning_rate": 2.6492790570629235e-05, - "loss": 2.7659, + "learning_rate": 2.6486095333980243e-05, + "loss": 2.7412, "step": 142000 }, { "epoch": 0.47, - "learning_rate": 2.6476236197791647e-05, - "loss": 2.7744, + "learning_rate": 2.646953624618727e-05, + "loss": 2.7678, "step": 142100 }, { "epoch": 0.47, - "learning_rate": 2.6459681824954063e-05, - "loss": 2.7691, + "learning_rate": 2.64529771583943e-05, + "loss": 2.775, "step": 142200 }, { "epoch": 0.47, - "learning_rate": 2.6443127452116478e-05, - "loss": 2.751, + "learning_rate": 2.6436418070601327e-05, + "loss": 2.7517, "step": 142300 }, { "epoch": 0.47, - "learning_rate": 2.6426573079278894e-05, - "loss": 2.7613, + "learning_rate": 2.6419858982808354e-05, + "loss": 2.7551, "step": 142400 }, { "epoch": 0.47, - "learning_rate": 2.641001870644131e-05, - "loss": 2.761, + "learning_rate": 2.640329989501538e-05, + "loss": 2.7571, "step": 142500 }, { "epoch": 0.47, - "learning_rate": 2.6393464333603725e-05, - "loss": 2.7635, + "learning_rate": 2.6386740807222414e-05, + "loss": 2.7454, "step": 142600 }, { "epoch": 0.47, - "learning_rate": 2.6376909960766137e-05, - "loss": 2.7661, + "learning_rate": 2.6370181719429445e-05, + "loss": 2.7501, "step": 142700 }, { "epoch": 0.47, - "learning_rate": 2.6360355587928553e-05, - "loss": 2.7689, + "learning_rate": 2.635362263163647e-05, + "loss": 2.7465, "step": 142800 }, { "epoch": 0.47, - "learning_rate": 2.634380121509097e-05, - "loss": 2.7646, + "learning_rate": 2.63370635438435e-05, + "loss": 2.7412, "step": 142900 }, { "epoch": 0.47, - "learning_rate": 2.6327246842253384e-05, - "loss": 2.7581, + "learning_rate": 2.6320504456050525e-05, + "loss": 2.745, "step": 143000 }, { "epoch": 0.47, - "learning_rate": 2.63106924694158e-05, - "loss": 2.7611, + "learning_rate": 2.6303945368257556e-05, + "loss": 2.7597, "step": 143100 }, { "epoch": 0.47, - "learning_rate": 2.6294138096578212e-05, - "loss": 2.7563, + "learning_rate": 2.6287386280464583e-05, + "loss": 2.746, "step": 143200 }, { "epoch": 0.47, - "learning_rate": 2.6277583723740628e-05, - "loss": 2.7549, + "learning_rate": 2.627082719267161e-05, + "loss": 2.7431, "step": 143300 }, { "epoch": 0.47, - "learning_rate": 2.6261029350903044e-05, - "loss": 2.7579, + "learning_rate": 2.6254268104878636e-05, + "loss": 2.7406, "step": 143400 }, { "epoch": 0.48, - "learning_rate": 2.624447497806546e-05, - "loss": 2.7595, + "learning_rate": 2.6237709017085667e-05, + "loss": 2.7644, "step": 143500 }, { "epoch": 0.48, - "learning_rate": 2.6227920605227875e-05, - "loss": 2.7621, + "learning_rate": 2.62211499292927e-05, + "loss": 2.7502, "step": 143600 }, { "epoch": 0.48, - "learning_rate": 2.6211366232390287e-05, - "loss": 2.7746, + "learning_rate": 2.6204590841499727e-05, + "loss": 2.7523, "step": 143700 }, { "epoch": 0.48, - "learning_rate": 2.6194811859552703e-05, - "loss": 2.7604, + "learning_rate": 2.6188031753706754e-05, + "loss": 2.7476, "step": 143800 }, { "epoch": 0.48, - "learning_rate": 2.617825748671512e-05, - "loss": 2.7711, + "learning_rate": 2.617147266591378e-05, + "loss": 2.7528, "step": 143900 }, { "epoch": 0.48, - "learning_rate": 2.6161703113877534e-05, - "loss": 2.7651, + "learning_rate": 2.615491357812081e-05, + "loss": 2.7566, "step": 144000 }, { "epoch": 0.48, - "learning_rate": 2.614514874103995e-05, - "loss": 2.7601, + "learning_rate": 2.613835449032784e-05, + "loss": 2.756, "step": 144100 }, { "epoch": 0.48, - "learning_rate": 2.6128594368202365e-05, - "loss": 2.7602, + "learning_rate": 2.6121795402534865e-05, + "loss": 2.7496, "step": 144200 }, { "epoch": 0.48, - "learning_rate": 2.6112039995364778e-05, - "loss": 2.7756, + "learning_rate": 2.6105236314741892e-05, + "loss": 2.7507, "step": 144300 }, { "epoch": 0.48, - "learning_rate": 2.6095485622527193e-05, - "loss": 2.7764, + "learning_rate": 2.6088677226948922e-05, + "loss": 2.7564, "step": 144400 }, { "epoch": 0.48, - "learning_rate": 2.607893124968961e-05, - "loss": 2.7707, + "learning_rate": 2.607211813915595e-05, + "loss": 2.76, "step": 144500 }, { "epoch": 0.48, - "learning_rate": 2.6062376876852025e-05, - "loss": 2.761, + "learning_rate": 2.6055559051362983e-05, + "loss": 2.7514, "step": 144600 }, { "epoch": 0.48, - "learning_rate": 2.604582250401444e-05, - "loss": 2.775, + "learning_rate": 2.603899996357001e-05, + "loss": 2.7562, "step": 144700 }, { "epoch": 0.48, - "learning_rate": 2.6029268131176853e-05, - "loss": 2.7612, + "learning_rate": 2.6022440875777037e-05, + "loss": 2.7583, "step": 144800 }, { "epoch": 0.48, - "learning_rate": 2.6012713758339268e-05, - "loss": 2.7627, + "learning_rate": 2.6005881787984064e-05, + "loss": 2.7492, "step": 144900 }, { "epoch": 0.48, - "learning_rate": 2.5996159385501684e-05, - "loss": 2.7715, + "learning_rate": 2.5989322700191094e-05, + "loss": 2.7662, "step": 145000 }, { "epoch": 0.48, - "learning_rate": 2.59796050126641e-05, - "loss": 2.7681, + "learning_rate": 2.597276361239812e-05, + "loss": 2.7653, "step": 145100 }, { "epoch": 0.48, - "learning_rate": 2.5963050639826515e-05, - "loss": 2.7625, + "learning_rate": 2.5956204524605148e-05, + "loss": 2.7465, "step": 145200 }, { "epoch": 0.48, - "learning_rate": 2.5946496266988927e-05, - "loss": 2.765, + "learning_rate": 2.5939645436812178e-05, + "loss": 2.7628, "step": 145300 }, { "epoch": 0.48, - "learning_rate": 2.5929941894151343e-05, - "loss": 2.7741, + "learning_rate": 2.5923086349019205e-05, + "loss": 2.7586, "step": 145400 }, { "epoch": 0.48, - "learning_rate": 2.591338752131376e-05, - "loss": 2.7688, + "learning_rate": 2.5906527261226232e-05, + "loss": 2.7376, "step": 145500 }, { "epoch": 0.48, - "learning_rate": 2.5896833148476174e-05, - "loss": 2.7621, + "learning_rate": 2.5889968173433266e-05, + "loss": 2.7586, "step": 145600 }, { "epoch": 0.48, - "learning_rate": 2.588027877563859e-05, - "loss": 2.7558, + "learning_rate": 2.5873409085640293e-05, + "loss": 2.7498, "step": 145700 }, { "epoch": 0.48, - "learning_rate": 2.5863724402801e-05, - "loss": 2.7556, + "learning_rate": 2.585684999784732e-05, + "loss": 2.7767, "step": 145800 }, { "epoch": 0.48, - "learning_rate": 2.5847170029963414e-05, - "loss": 2.749, + "learning_rate": 2.584029091005435e-05, + "loss": 2.7533, "step": 145900 }, { "epoch": 0.48, - "learning_rate": 2.5830615657125827e-05, - "loss": 2.7642, + "learning_rate": 2.5823731822261377e-05, + "loss": 2.7485, "step": 146000 }, { "epoch": 0.48, - "learning_rate": 2.5814061284288242e-05, - "loss": 2.7533, + "learning_rate": 2.5807172734468404e-05, + "loss": 2.769, "step": 146100 }, { "epoch": 0.48, - "learning_rate": 2.5797506911450658e-05, - "loss": 2.7431, + "learning_rate": 2.579061364667543e-05, + "loss": 2.7463, "step": 146200 }, { "epoch": 0.48, - "learning_rate": 2.5780952538613074e-05, - "loss": 2.7578, + "learning_rate": 2.577405455888246e-05, + "loss": 2.7542, "step": 146300 }, { "epoch": 0.48, - "learning_rate": 2.576439816577549e-05, - "loss": 2.767, + "learning_rate": 2.5757495471089488e-05, + "loss": 2.7532, "step": 146400 }, { "epoch": 0.49, - "learning_rate": 2.5747843792937905e-05, - "loss": 2.7686, + "learning_rate": 2.5740936383296515e-05, + "loss": 2.7563, "step": 146500 }, { "epoch": 0.49, - "learning_rate": 2.5731289420100317e-05, - "loss": 2.7527, + "learning_rate": 2.572437729550355e-05, + "loss": 2.7542, "step": 146600 }, { "epoch": 0.49, - "learning_rate": 2.5714735047262733e-05, - "loss": 2.758, + "learning_rate": 2.5707818207710575e-05, + "loss": 2.7628, "step": 146700 }, { "epoch": 0.49, - "learning_rate": 2.569818067442515e-05, - "loss": 2.779, + "learning_rate": 2.5691259119917606e-05, + "loss": 2.7446, "step": 146800 }, { "epoch": 0.49, - "learning_rate": 2.5681626301587564e-05, - "loss": 2.7566, + "learning_rate": 2.5674700032124633e-05, + "loss": 2.749, "step": 146900 }, { "epoch": 0.49, - "learning_rate": 2.566507192874998e-05, - "loss": 2.7569, + "learning_rate": 2.565814094433166e-05, + "loss": 2.7578, "step": 147000 }, { "epoch": 0.49, - "learning_rate": 2.5648517555912392e-05, - "loss": 2.7633, + "learning_rate": 2.5641581856538686e-05, + "loss": 2.7656, "step": 147100 }, { "epoch": 0.49, - "learning_rate": 2.5631963183074808e-05, - "loss": 2.7659, + "learning_rate": 2.5625022768745717e-05, + "loss": 2.7646, "step": 147200 }, { "epoch": 0.49, - "learning_rate": 2.5615408810237223e-05, - "loss": 2.7575, + "learning_rate": 2.5608463680952744e-05, + "loss": 2.731, "step": 147300 }, { "epoch": 0.49, - "learning_rate": 2.559885443739964e-05, - "loss": 2.7632, + "learning_rate": 2.559190459315977e-05, + "loss": 2.7534, "step": 147400 }, { "epoch": 0.49, - "learning_rate": 2.5582300064562055e-05, - "loss": 2.7531, + "learning_rate": 2.5575345505366797e-05, + "loss": 2.7461, "step": 147500 }, { "epoch": 0.49, - "learning_rate": 2.5565745691724467e-05, - "loss": 2.7492, + "learning_rate": 2.555878641757383e-05, + "loss": 2.7473, "step": 147600 }, { "epoch": 0.49, - "learning_rate": 2.5549191318886883e-05, - "loss": 2.7779, + "learning_rate": 2.554222732978086e-05, + "loss": 2.7605, "step": 147700 }, { "epoch": 0.49, - "learning_rate": 2.5532636946049298e-05, - "loss": 2.7601, + "learning_rate": 2.552566824198789e-05, + "loss": 2.7654, "step": 147800 }, { "epoch": 0.49, - "learning_rate": 2.5516082573211714e-05, - "loss": 2.7685, + "learning_rate": 2.5509109154194915e-05, + "loss": 2.7463, "step": 147900 }, { "epoch": 0.49, - "learning_rate": 2.549952820037413e-05, - "loss": 2.7631, + "learning_rate": 2.5492550066401942e-05, + "loss": 2.7581, "step": 148000 }, { "epoch": 0.49, - "learning_rate": 2.5482973827536545e-05, - "loss": 2.7714, + "learning_rate": 2.5475990978608972e-05, + "loss": 2.7486, "step": 148100 }, { "epoch": 0.49, - "learning_rate": 2.5466419454698957e-05, - "loss": 2.7667, + "learning_rate": 2.5459431890816e-05, + "loss": 2.7464, "step": 148200 }, { "epoch": 0.49, - "learning_rate": 2.5449865081861373e-05, - "loss": 2.7601, + "learning_rate": 2.5442872803023026e-05, + "loss": 2.7435, "step": 148300 }, { "epoch": 0.49, - "learning_rate": 2.543331070902379e-05, - "loss": 2.7657, + "learning_rate": 2.5426313715230053e-05, + "loss": 2.7494, "step": 148400 }, { "epoch": 0.49, - "learning_rate": 2.5416756336186204e-05, - "loss": 2.7568, + "learning_rate": 2.5409754627437083e-05, + "loss": 2.7672, "step": 148500 }, { "epoch": 0.49, - "learning_rate": 2.540020196334862e-05, - "loss": 2.7585, + "learning_rate": 2.5393195539644117e-05, + "loss": 2.7532, "step": 148600 }, { "epoch": 0.49, - "learning_rate": 2.5383647590511032e-05, - "loss": 2.7607, + "learning_rate": 2.5376636451851144e-05, + "loss": 2.7632, "step": 148700 }, { "epoch": 0.49, - "learning_rate": 2.5367093217673448e-05, - "loss": 2.7547, + "learning_rate": 2.536007736405817e-05, + "loss": 2.7478, "step": 148800 }, { "epoch": 0.49, - "learning_rate": 2.5350538844835864e-05, - "loss": 2.7499, + "learning_rate": 2.5343518276265198e-05, + "loss": 2.732, "step": 148900 }, { "epoch": 0.49, - "learning_rate": 2.533398447199828e-05, - "loss": 2.7555, + "learning_rate": 2.5326959188472228e-05, + "loss": 2.7473, "step": 149000 }, { "epoch": 0.49, - "learning_rate": 2.5317430099160695e-05, - "loss": 2.7587, + "learning_rate": 2.5310400100679255e-05, + "loss": 2.7461, "step": 149100 }, { "epoch": 0.49, - "learning_rate": 2.5300875726323107e-05, - "loss": 2.7685, + "learning_rate": 2.5293841012886282e-05, + "loss": 2.7607, "step": 149200 }, { "epoch": 0.49, - "learning_rate": 2.5284321353485523e-05, - "loss": 2.7481, + "learning_rate": 2.527728192509331e-05, + "loss": 2.7626, "step": 149300 }, { "epoch": 0.49, - "learning_rate": 2.526776698064794e-05, - "loss": 2.767, + "learning_rate": 2.526072283730034e-05, + "loss": 2.7515, "step": 149400 }, { - "epoch": 0.49, - "learning_rate": 2.5251212607810354e-05, - "loss": 2.771, + "epoch": 0.5, + "learning_rate": 2.5244163749507366e-05, + "loss": 2.7182, "step": 149500 }, { "epoch": 0.5, - "learning_rate": 2.523465823497277e-05, - "loss": 2.7641, + "learning_rate": 2.52276046617144e-05, + "loss": 2.7523, "step": 149600 }, { "epoch": 0.5, - "learning_rate": 2.5218103862135185e-05, - "loss": 2.7564, + "learning_rate": 2.5211045573921427e-05, + "loss": 2.7541, "step": 149700 }, { "epoch": 0.5, - "learning_rate": 2.5201549489297598e-05, - "loss": 2.7497, + "learning_rate": 2.5194486486128454e-05, + "loss": 2.7619, "step": 149800 }, { "epoch": 0.5, - "learning_rate": 2.5184995116460013e-05, - "loss": 2.7568, + "learning_rate": 2.5177927398335484e-05, + "loss": 2.7523, "step": 149900 }, { "epoch": 0.5, - "learning_rate": 2.516844074362243e-05, - "loss": 2.769, + "learning_rate": 2.516136831054251e-05, + "loss": 2.7475, "step": 150000 }, { "epoch": 0.5, - "learning_rate": 2.5151886370784845e-05, - "loss": 2.7619, + "learning_rate": 2.5144809222749538e-05, + "loss": 2.7466, "step": 150100 }, { "epoch": 0.5, - "learning_rate": 2.513533199794726e-05, - "loss": 2.7685, + "learning_rate": 2.5128250134956565e-05, + "loss": 2.7518, "step": 150200 }, { "epoch": 0.5, - "learning_rate": 2.5118777625109673e-05, - "loss": 2.7621, + "learning_rate": 2.5111691047163595e-05, + "loss": 2.7484, "step": 150300 }, { "epoch": 0.5, - "learning_rate": 2.5102223252272088e-05, - "loss": 2.7759, + "learning_rate": 2.5095131959370622e-05, + "loss": 2.745, "step": 150400 }, { "epoch": 0.5, - "learning_rate": 2.5085668879434504e-05, - "loss": 2.7617, + "learning_rate": 2.507857287157765e-05, + "loss": 2.7493, "step": 150500 }, { "epoch": 0.5, - "learning_rate": 2.506911450659692e-05, - "loss": 2.774, + "learning_rate": 2.5062013783784682e-05, + "loss": 2.75, "step": 150600 }, { "epoch": 0.5, - "learning_rate": 2.5052560133759335e-05, - "loss": 2.7675, + "learning_rate": 2.504545469599171e-05, + "loss": 2.7496, "step": 150700 }, { "epoch": 0.5, - "learning_rate": 2.5036005760921747e-05, - "loss": 2.7586, + "learning_rate": 2.502889560819874e-05, + "loss": 2.7602, "step": 150800 }, { "epoch": 0.5, - "learning_rate": 2.5019451388084163e-05, - "loss": 2.7527, + "learning_rate": 2.5012336520405767e-05, + "loss": 2.755, "step": 150900 }, { "epoch": 0.5, - "learning_rate": 2.500289701524658e-05, - "loss": 2.7774, + "learning_rate": 2.4995777432612794e-05, + "loss": 2.7668, "step": 151000 }, { "epoch": 0.5, - "learning_rate": 2.4986342642408994e-05, - "loss": 2.7564, + "learning_rate": 2.497921834481982e-05, + "loss": 2.7389, "step": 151100 }, { "epoch": 0.5, - "learning_rate": 2.496978826957141e-05, - "loss": 2.7618, + "learning_rate": 2.496265925702685e-05, + "loss": 2.739, "step": 151200 }, { "epoch": 0.5, - "learning_rate": 2.4953233896733826e-05, - "loss": 2.7643, + "learning_rate": 2.494610016923388e-05, + "loss": 2.7475, "step": 151300 }, { "epoch": 0.5, - "learning_rate": 2.4936679523896238e-05, - "loss": 2.7466, + "learning_rate": 2.4929541081440908e-05, + "loss": 2.7705, "step": 151400 }, { "epoch": 0.5, - "learning_rate": 2.4920125151058654e-05, - "loss": 2.7499, + "learning_rate": 2.4912981993647935e-05, + "loss": 2.7449, "step": 151500 }, { "epoch": 0.5, - "learning_rate": 2.490357077822107e-05, - "loss": 2.7693, + "learning_rate": 2.4896422905854962e-05, + "loss": 2.7536, "step": 151600 }, { "epoch": 0.5, - "learning_rate": 2.4887016405383485e-05, - "loss": 2.7698, + "learning_rate": 2.4879863818061992e-05, + "loss": 2.7573, "step": 151700 }, { "epoch": 0.5, - "learning_rate": 2.48704620325459e-05, - "loss": 2.7672, + "learning_rate": 2.4863304730269022e-05, + "loss": 2.7434, "step": 151800 }, { "epoch": 0.5, - "learning_rate": 2.4853907659708313e-05, - "loss": 2.7604, + "learning_rate": 2.484674564247605e-05, + "loss": 2.7633, "step": 151900 }, { "epoch": 0.5, - "learning_rate": 2.483735328687073e-05, - "loss": 2.764, + "learning_rate": 2.4830186554683076e-05, + "loss": 2.7484, "step": 152000 }, { "epoch": 0.5, - "learning_rate": 2.4820798914033144e-05, - "loss": 2.7617, + "learning_rate": 2.4813627466890103e-05, + "loss": 2.7682, "step": 152100 }, { "epoch": 0.5, - "learning_rate": 2.480424454119556e-05, - "loss": 2.7731, + "learning_rate": 2.4797068379097133e-05, + "loss": 2.7473, "step": 152200 }, { "epoch": 0.5, - "learning_rate": 2.4787690168357975e-05, - "loss": 2.7697, + "learning_rate": 2.4780509291304164e-05, + "loss": 2.7521, "step": 152300 }, { "epoch": 0.5, - "learning_rate": 2.4771135795520388e-05, - "loss": 2.7486, + "learning_rate": 2.476395020351119e-05, + "loss": 2.756, "step": 152400 }, { - "epoch": 0.5, - "learning_rate": 2.47545814226828e-05, - "loss": 2.7679, + "epoch": 0.51, + "learning_rate": 2.4747391115718218e-05, + "loss": 2.7471, "step": 152500 }, { "epoch": 0.51, - "learning_rate": 2.4738027049845216e-05, - "loss": 2.7592, + "learning_rate": 2.4730832027925248e-05, + "loss": 2.7623, "step": 152600 }, { "epoch": 0.51, - "learning_rate": 2.472147267700763e-05, - "loss": 2.7666, + "learning_rate": 2.4714272940132275e-05, + "loss": 2.752, "step": 152700 }, { "epoch": 0.51, - "learning_rate": 2.4704918304170047e-05, - "loss": 2.7522, + "learning_rate": 2.4697713852339305e-05, + "loss": 2.7477, "step": 152800 }, { "epoch": 0.51, - "learning_rate": 2.4688363931332462e-05, - "loss": 2.7522, + "learning_rate": 2.4681154764546332e-05, + "loss": 2.7503, "step": 152900 }, { "epoch": 0.51, - "learning_rate": 2.4671809558494878e-05, - "loss": 2.7573, + "learning_rate": 2.466459567675336e-05, + "loss": 2.7422, "step": 153000 }, { "epoch": 0.51, - "learning_rate": 2.465525518565729e-05, - "loss": 2.7598, + "learning_rate": 2.464803658896039e-05, + "loss": 2.7497, "step": 153100 }, { "epoch": 0.51, - "learning_rate": 2.4638700812819706e-05, - "loss": 2.77, + "learning_rate": 2.4631477501167416e-05, + "loss": 2.7603, "step": 153200 }, { "epoch": 0.51, - "learning_rate": 2.4622146439982122e-05, - "loss": 2.7641, + "learning_rate": 2.4614918413374446e-05, + "loss": 2.7386, "step": 153300 }, { "epoch": 0.51, - "learning_rate": 2.4605592067144537e-05, - "loss": 2.7722, + "learning_rate": 2.4598359325581473e-05, + "loss": 2.7537, "step": 153400 }, { "epoch": 0.51, - "learning_rate": 2.4589037694306953e-05, - "loss": 2.7577, + "learning_rate": 2.4581800237788504e-05, + "loss": 2.7566, "step": 153500 }, { "epoch": 0.51, - "learning_rate": 2.4572483321469365e-05, - "loss": 2.7658, + "learning_rate": 2.456524114999553e-05, + "loss": 2.7501, "step": 153600 }, { "epoch": 0.51, - "learning_rate": 2.455592894863178e-05, - "loss": 2.7664, + "learning_rate": 2.4548682062202557e-05, + "loss": 2.7485, "step": 153700 }, { "epoch": 0.51, - "learning_rate": 2.4539374575794197e-05, - "loss": 2.7616, + "learning_rate": 2.4532122974409588e-05, + "loss": 2.7623, "step": 153800 }, { "epoch": 0.51, - "learning_rate": 2.4522820202956612e-05, - "loss": 2.7588, + "learning_rate": 2.4515563886616615e-05, + "loss": 2.7502, "step": 153900 }, { "epoch": 0.51, - "learning_rate": 2.4506265830119028e-05, - "loss": 2.766, + "learning_rate": 2.4499004798823645e-05, + "loss": 2.7534, "step": 154000 }, { "epoch": 0.51, - "learning_rate": 2.448971145728144e-05, - "loss": 2.7566, + "learning_rate": 2.4482445711030672e-05, + "loss": 2.7535, "step": 154100 }, { "epoch": 0.51, - "learning_rate": 2.4473157084443856e-05, - "loss": 2.759, + "learning_rate": 2.44658866232377e-05, + "loss": 2.7581, "step": 154200 }, { "epoch": 0.51, - "learning_rate": 2.445660271160627e-05, - "loss": 2.7569, + "learning_rate": 2.444932753544473e-05, + "loss": 2.7491, "step": 154300 }, { "epoch": 0.51, - "learning_rate": 2.4440048338768687e-05, - "loss": 2.7595, + "learning_rate": 2.443276844765176e-05, + "loss": 2.759, "step": 154400 }, { "epoch": 0.51, - "learning_rate": 2.4423493965931103e-05, - "loss": 2.7679, + "learning_rate": 2.4416209359858786e-05, + "loss": 2.7406, "step": 154500 }, { "epoch": 0.51, - "learning_rate": 2.440693959309352e-05, - "loss": 2.7456, + "learning_rate": 2.4399650272065813e-05, + "loss": 2.7256, "step": 154600 }, { "epoch": 0.51, - "learning_rate": 2.439038522025593e-05, - "loss": 2.7602, + "learning_rate": 2.438309118427284e-05, + "loss": 2.7426, "step": 154700 }, { "epoch": 0.51, - "learning_rate": 2.4373830847418346e-05, - "loss": 2.7597, + "learning_rate": 2.436653209647987e-05, + "loss": 2.7443, "step": 154800 }, { "epoch": 0.51, - "learning_rate": 2.4357276474580762e-05, - "loss": 2.7646, + "learning_rate": 2.43499730086869e-05, + "loss": 2.7427, "step": 154900 }, { "epoch": 0.51, - "learning_rate": 2.4340722101743178e-05, - "loss": 2.7501, + "learning_rate": 2.4333413920893928e-05, + "loss": 2.7406, "step": 155000 }, { "epoch": 0.51, - "learning_rate": 2.4324167728905593e-05, - "loss": 2.7667, + "learning_rate": 2.4316854833100954e-05, + "loss": 2.7413, "step": 155100 }, { "epoch": 0.51, - "learning_rate": 2.4307613356068005e-05, - "loss": 2.7502, + "learning_rate": 2.430029574530798e-05, + "loss": 2.7546, "step": 155200 }, { "epoch": 0.51, - "learning_rate": 2.429105898323042e-05, - "loss": 2.7727, + "learning_rate": 2.4283736657515012e-05, + "loss": 2.7549, "step": 155300 }, { "epoch": 0.51, - "learning_rate": 2.4274504610392837e-05, - "loss": 2.7515, + "learning_rate": 2.4267177569722042e-05, + "loss": 2.7449, "step": 155400 }, { "epoch": 0.51, - "learning_rate": 2.4257950237555252e-05, - "loss": 2.7618, + "learning_rate": 2.425061848192907e-05, + "loss": 2.7493, "step": 155500 }, { "epoch": 0.52, - "learning_rate": 2.4241395864717668e-05, - "loss": 2.7689, + "learning_rate": 2.4234059394136096e-05, + "loss": 2.7444, "step": 155600 }, { "epoch": 0.52, - "learning_rate": 2.422484149188008e-05, - "loss": 2.762, + "learning_rate": 2.4217500306343123e-05, + "loss": 2.7492, "step": 155700 }, { "epoch": 0.52, - "learning_rate": 2.4208287119042496e-05, - "loss": 2.7618, + "learning_rate": 2.4200941218550156e-05, + "loss": 2.7444, "step": 155800 }, { "epoch": 0.52, - "learning_rate": 2.419173274620491e-05, - "loss": 2.7516, + "learning_rate": 2.4184382130757183e-05, + "loss": 2.7415, "step": 155900 }, { "epoch": 0.52, - "learning_rate": 2.4175178373367327e-05, - "loss": 2.7509, + "learning_rate": 2.416782304296421e-05, + "loss": 2.7471, "step": 156000 }, { "epoch": 0.52, - "learning_rate": 2.4158624000529743e-05, - "loss": 2.7711, + "learning_rate": 2.4151263955171237e-05, + "loss": 2.7594, "step": 156100 }, { "epoch": 0.52, - "learning_rate": 2.414206962769216e-05, - "loss": 2.743, + "learning_rate": 2.4134704867378267e-05, + "loss": 2.7685, "step": 156200 }, { "epoch": 0.52, - "learning_rate": 2.412551525485457e-05, - "loss": 2.7686, + "learning_rate": 2.4118145779585298e-05, + "loss": 2.7384, "step": 156300 }, { "epoch": 0.52, - "learning_rate": 2.4108960882016986e-05, - "loss": 2.7728, + "learning_rate": 2.4101586691792325e-05, + "loss": 2.7496, "step": 156400 }, { "epoch": 0.52, - "learning_rate": 2.4092406509179402e-05, - "loss": 2.7622, + "learning_rate": 2.408502760399935e-05, + "loss": 2.7602, "step": 156500 }, { "epoch": 0.52, - "learning_rate": 2.4075852136341818e-05, - "loss": 2.7661, + "learning_rate": 2.406846851620638e-05, + "loss": 2.7505, "step": 156600 }, { "epoch": 0.52, - "learning_rate": 2.4059297763504233e-05, - "loss": 2.753, + "learning_rate": 2.405190942841341e-05, + "loss": 2.7545, "step": 156700 }, { "epoch": 0.52, - "learning_rate": 2.4042743390666646e-05, - "loss": 2.7587, + "learning_rate": 2.403535034062044e-05, + "loss": 2.732, "step": 156800 }, { "epoch": 0.52, - "learning_rate": 2.4026189017829058e-05, - "loss": 2.7358, + "learning_rate": 2.4018791252827466e-05, + "loss": 2.7503, "step": 156900 }, { "epoch": 0.52, - "learning_rate": 2.4009634644991474e-05, - "loss": 2.7457, + "learning_rate": 2.4002232165034493e-05, + "loss": 2.7363, "step": 157000 }, { "epoch": 0.52, - "learning_rate": 2.399308027215389e-05, - "loss": 2.7649, + "learning_rate": 2.3985673077241523e-05, + "loss": 2.75, "step": 157100 }, { "epoch": 0.52, - "learning_rate": 2.3976525899316305e-05, - "loss": 2.7546, + "learning_rate": 2.396911398944855e-05, + "loss": 2.738, "step": 157200 }, { "epoch": 0.52, - "learning_rate": 2.395997152647872e-05, - "loss": 2.7689, + "learning_rate": 2.395255490165558e-05, + "loss": 2.7571, "step": 157300 }, { "epoch": 0.52, - "learning_rate": 2.3943417153641133e-05, - "loss": 2.762, + "learning_rate": 2.3935995813862607e-05, + "loss": 2.7663, "step": 157400 }, { "epoch": 0.52, - "learning_rate": 2.392686278080355e-05, - "loss": 2.7613, + "learning_rate": 2.3919436726069634e-05, + "loss": 2.7488, "step": 157500 }, { "epoch": 0.52, - "learning_rate": 2.3910308407965964e-05, - "loss": 2.7695, + "learning_rate": 2.3902877638276665e-05, + "loss": 2.7504, "step": 157600 }, { "epoch": 0.52, - "learning_rate": 2.389375403512838e-05, - "loss": 2.7827, + "learning_rate": 2.388631855048369e-05, + "loss": 2.7441, "step": 157700 }, { "epoch": 0.52, - "learning_rate": 2.3877199662290795e-05, - "loss": 2.7763, + "learning_rate": 2.3869759462690722e-05, + "loss": 2.7415, "step": 157800 }, { "epoch": 0.52, - "learning_rate": 2.386064528945321e-05, - "loss": 2.7638, + "learning_rate": 2.385320037489775e-05, + "loss": 2.7663, "step": 157900 }, { "epoch": 0.52, - "learning_rate": 2.3844090916615623e-05, - "loss": 2.7696, + "learning_rate": 2.383664128710478e-05, + "loss": 2.7357, "step": 158000 }, { "epoch": 0.52, - "learning_rate": 2.382753654377804e-05, - "loss": 2.7541, + "learning_rate": 2.3820082199311806e-05, + "loss": 2.736, "step": 158100 }, { "epoch": 0.52, - "learning_rate": 2.3810982170940455e-05, - "loss": 2.7653, + "learning_rate": 2.3803523111518833e-05, + "loss": 2.7418, "step": 158200 }, { "epoch": 0.52, - "learning_rate": 2.379442779810287e-05, - "loss": 2.7581, + "learning_rate": 2.3786964023725863e-05, + "loss": 2.7541, "step": 158300 }, { "epoch": 0.52, - "learning_rate": 2.3777873425265286e-05, - "loss": 2.7607, + "learning_rate": 2.377040493593289e-05, + "loss": 2.7487, "step": 158400 }, { "epoch": 0.52, - "learning_rate": 2.3761319052427698e-05, - "loss": 2.7608, + "learning_rate": 2.375384584813992e-05, + "loss": 2.7572, "step": 158500 }, { "epoch": 0.53, - "learning_rate": 2.3744764679590114e-05, - "loss": 2.7555, + "learning_rate": 2.3737286760346947e-05, + "loss": 2.7491, "step": 158600 }, { "epoch": 0.53, - "learning_rate": 2.372821030675253e-05, - "loss": 2.7535, + "learning_rate": 2.3720727672553974e-05, + "loss": 2.7457, "step": 158700 }, { "epoch": 0.53, - "learning_rate": 2.3711655933914945e-05, - "loss": 2.758, + "learning_rate": 2.3704168584761004e-05, + "loss": 2.7538, "step": 158800 }, { "epoch": 0.53, - "learning_rate": 2.369510156107736e-05, - "loss": 2.7536, + "learning_rate": 2.368760949696803e-05, + "loss": 2.7595, "step": 158900 }, { "epoch": 0.53, - "learning_rate": 2.3678547188239773e-05, - "loss": 2.7574, + "learning_rate": 2.367105040917506e-05, + "loss": 2.7432, "step": 159000 }, { "epoch": 0.53, - "learning_rate": 2.366199281540219e-05, - "loss": 2.7538, + "learning_rate": 2.365449132138209e-05, + "loss": 2.7392, "step": 159100 }, { "epoch": 0.53, - "learning_rate": 2.3645438442564604e-05, - "loss": 2.7642, + "learning_rate": 2.3637932233589115e-05, + "loss": 2.7461, "step": 159200 }, { "epoch": 0.53, - "learning_rate": 2.362888406972702e-05, - "loss": 2.7506, + "learning_rate": 2.3621373145796146e-05, + "loss": 2.759, "step": 159300 }, { "epoch": 0.53, - "learning_rate": 2.3612329696889436e-05, - "loss": 2.7694, + "learning_rate": 2.3604814058003176e-05, + "loss": 2.7552, "step": 159400 }, { "epoch": 0.53, - "learning_rate": 2.359577532405185e-05, - "loss": 2.7547, + "learning_rate": 2.3588254970210203e-05, + "loss": 2.756, "step": 159500 }, { "epoch": 0.53, - "learning_rate": 2.3579220951214264e-05, - "loss": 2.7565, + "learning_rate": 2.357169588241723e-05, + "loss": 2.7384, "step": 159600 }, { "epoch": 0.53, - "learning_rate": 2.356266657837668e-05, - "loss": 2.7783, + "learning_rate": 2.3555136794624257e-05, + "loss": 2.7421, "step": 159700 }, { "epoch": 0.53, - "learning_rate": 2.3546112205539095e-05, - "loss": 2.7623, + "learning_rate": 2.3538577706831287e-05, + "loss": 2.7392, "step": 159800 }, { "epoch": 0.53, - "learning_rate": 2.352955783270151e-05, - "loss": 2.7555, + "learning_rate": 2.3522018619038317e-05, + "loss": 2.7529, "step": 159900 }, { "epoch": 0.53, - "learning_rate": 2.3513003459863926e-05, - "loss": 2.7763, + "learning_rate": 2.3505459531245344e-05, + "loss": 2.7453, "step": 160000 }, { "epoch": 0.53, - "learning_rate": 2.349644908702634e-05, - "loss": 2.7544, + "learning_rate": 2.348890044345237e-05, + "loss": 2.7512, "step": 160100 }, { "epoch": 0.53, - "learning_rate": 2.3479894714188754e-05, - "loss": 2.7673, + "learning_rate": 2.3472341355659398e-05, + "loss": 2.7456, "step": 160200 }, { "epoch": 0.53, - "learning_rate": 2.346334034135117e-05, - "loss": 2.7655, + "learning_rate": 2.3455782267866432e-05, + "loss": 2.7468, "step": 160300 }, { "epoch": 0.53, - "learning_rate": 2.3446785968513585e-05, - "loss": 2.7559, + "learning_rate": 2.343922318007346e-05, + "loss": 2.7498, "step": 160400 }, { "epoch": 0.53, - "learning_rate": 2.3430231595676e-05, - "loss": 2.7551, + "learning_rate": 2.3422664092280486e-05, + "loss": 2.766, "step": 160500 }, { "epoch": 0.53, - "learning_rate": 2.3413677222838413e-05, - "loss": 2.7534, + "learning_rate": 2.3406105004487513e-05, + "loss": 2.741, "step": 160600 }, { "epoch": 0.53, - "learning_rate": 2.339712285000083e-05, - "loss": 2.7566, + "learning_rate": 2.3389545916694543e-05, + "loss": 2.7401, "step": 160700 }, { "epoch": 0.53, - "learning_rate": 2.3380568477163245e-05, - "loss": 2.766, + "learning_rate": 2.3372986828901573e-05, + "loss": 2.7291, "step": 160800 }, { "epoch": 0.53, - "learning_rate": 2.336401410432566e-05, - "loss": 2.7654, + "learning_rate": 2.33564277411086e-05, + "loss": 2.7573, "step": 160900 }, { "epoch": 0.53, - "learning_rate": 2.3347459731488076e-05, - "loss": 2.7703, + "learning_rate": 2.3339868653315627e-05, + "loss": 2.7449, "step": 161000 }, { "epoch": 0.53, - "learning_rate": 2.333090535865049e-05, - "loss": 2.7566, + "learning_rate": 2.3323309565522654e-05, + "loss": 2.7527, "step": 161100 }, { "epoch": 0.53, - "learning_rate": 2.3314350985812904e-05, - "loss": 2.7669, + "learning_rate": 2.3306750477729684e-05, + "loss": 2.7485, "step": 161200 }, { "epoch": 0.53, - "learning_rate": 2.3297796612975316e-05, - "loss": 2.7644, + "learning_rate": 2.3290191389936714e-05, + "loss": 2.7476, "step": 161300 }, { "epoch": 0.53, - "learning_rate": 2.328124224013773e-05, - "loss": 2.7468, + "learning_rate": 2.327363230214374e-05, + "loss": 2.7679, "step": 161400 }, { "epoch": 0.53, - "learning_rate": 2.3264687867300147e-05, - "loss": 2.7487, + "learning_rate": 2.3257073214350768e-05, + "loss": 2.7428, "step": 161500 }, { "epoch": 0.54, - "learning_rate": 2.3248133494462563e-05, - "loss": 2.7594, + "learning_rate": 2.32405141265578e-05, + "loss": 2.7318, "step": 161600 }, { "epoch": 0.54, - "learning_rate": 2.323157912162498e-05, - "loss": 2.7513, + "learning_rate": 2.3223955038764826e-05, + "loss": 2.7465, "step": 161700 }, { "epoch": 0.54, - "learning_rate": 2.321502474878739e-05, - "loss": 2.7479, + "learning_rate": 2.3207395950971856e-05, + "loss": 2.74, "step": 161800 }, { "epoch": 0.54, - "learning_rate": 2.3198470375949807e-05, - "loss": 2.7674, + "learning_rate": 2.3190836863178883e-05, + "loss": 2.7525, "step": 161900 }, { "epoch": 0.54, - "learning_rate": 2.3181916003112222e-05, - "loss": 2.7543, + "learning_rate": 2.317427777538591e-05, + "loss": 2.7559, "step": 162000 }, { "epoch": 0.54, - "learning_rate": 2.3165361630274638e-05, - "loss": 2.7558, + "learning_rate": 2.315771868759294e-05, + "loss": 2.7388, "step": 162100 }, { "epoch": 0.54, - "learning_rate": 2.3148807257437053e-05, - "loss": 2.7706, + "learning_rate": 2.3141159599799967e-05, + "loss": 2.752, "step": 162200 }, { "epoch": 0.54, - "learning_rate": 2.3132252884599466e-05, - "loss": 2.7762, + "learning_rate": 2.3124600512006997e-05, + "loss": 2.7547, "step": 162300 }, { "epoch": 0.54, - "learning_rate": 2.311569851176188e-05, - "loss": 2.7605, + "learning_rate": 2.3108041424214024e-05, + "loss": 2.7303, "step": 162400 }, { "epoch": 0.54, - "learning_rate": 2.3099144138924297e-05, - "loss": 2.7657, + "learning_rate": 2.309148233642105e-05, + "loss": 2.7317, "step": 162500 }, { "epoch": 0.54, - "learning_rate": 2.3082589766086713e-05, - "loss": 2.774, + "learning_rate": 2.307492324862808e-05, + "loss": 2.7413, "step": 162600 }, { "epoch": 0.54, - "learning_rate": 2.306603539324913e-05, - "loss": 2.7623, + "learning_rate": 2.3058364160835108e-05, + "loss": 2.7639, "step": 162700 }, { "epoch": 0.54, - "learning_rate": 2.3049481020411544e-05, - "loss": 2.7465, + "learning_rate": 2.304180507304214e-05, + "loss": 2.7692, "step": 162800 }, { "epoch": 0.54, - "learning_rate": 2.3032926647573956e-05, - "loss": 2.7664, + "learning_rate": 2.3025245985249165e-05, + "loss": 2.7522, "step": 162900 }, { "epoch": 0.54, - "learning_rate": 2.3016372274736372e-05, - "loss": 2.7569, + "learning_rate": 2.3008686897456196e-05, + "loss": 2.7499, "step": 163000 }, { "epoch": 0.54, - "learning_rate": 2.2999817901898788e-05, - "loss": 2.7459, + "learning_rate": 2.2992127809663223e-05, + "loss": 2.7452, "step": 163100 }, { "epoch": 0.54, - "learning_rate": 2.2983263529061203e-05, - "loss": 2.7612, + "learning_rate": 2.297556872187025e-05, + "loss": 2.7692, "step": 163200 }, { "epoch": 0.54, - "learning_rate": 2.296670915622362e-05, - "loss": 2.7619, + "learning_rate": 2.295900963407728e-05, + "loss": 2.7514, "step": 163300 }, { "epoch": 0.54, - "learning_rate": 2.295015478338603e-05, - "loss": 2.7569, + "learning_rate": 2.2942450546284307e-05, + "loss": 2.7399, "step": 163400 }, { "epoch": 0.54, - "learning_rate": 2.2933600410548447e-05, - "loss": 2.7652, + "learning_rate": 2.2925891458491337e-05, + "loss": 2.751, "step": 163500 }, { "epoch": 0.54, - "learning_rate": 2.2917046037710862e-05, - "loss": 2.7587, + "learning_rate": 2.2909332370698364e-05, + "loss": 2.7342, "step": 163600 }, { "epoch": 0.54, - "learning_rate": 2.2900491664873278e-05, - "loss": 2.7538, + "learning_rate": 2.289277328290539e-05, + "loss": 2.7472, "step": 163700 }, { "epoch": 0.54, - "learning_rate": 2.2883937292035694e-05, - "loss": 2.7635, + "learning_rate": 2.287621419511242e-05, + "loss": 2.7626, "step": 163800 }, { "epoch": 0.54, - "learning_rate": 2.2867382919198106e-05, - "loss": 2.7569, + "learning_rate": 2.285965510731945e-05, + "loss": 2.7511, "step": 163900 }, { "epoch": 0.54, - "learning_rate": 2.285082854636052e-05, - "loss": 2.7699, + "learning_rate": 2.284309601952648e-05, + "loss": 2.7535, "step": 164000 }, { "epoch": 0.54, - "learning_rate": 2.2834274173522937e-05, - "loss": 2.747, + "learning_rate": 2.2826536931733505e-05, + "loss": 2.7506, "step": 164100 }, { "epoch": 0.54, - "learning_rate": 2.2817719800685353e-05, - "loss": 2.7667, + "learning_rate": 2.2809977843940532e-05, + "loss": 2.7679, "step": 164200 }, { "epoch": 0.54, - "learning_rate": 2.280116542784777e-05, - "loss": 2.7533, + "learning_rate": 2.2793418756147562e-05, + "loss": 2.7446, "step": 164300 }, { "epoch": 0.54, - "learning_rate": 2.2784611055010184e-05, - "loss": 2.7504, + "learning_rate": 2.2776859668354593e-05, + "loss": 2.7439, "step": 164400 }, { "epoch": 0.54, - "learning_rate": 2.2768056682172596e-05, - "loss": 2.7549, + "learning_rate": 2.276030058056162e-05, + "loss": 2.7631, "step": 164500 }, { - "epoch": 0.54, - "learning_rate": 2.2751502309335012e-05, - "loss": 2.7496, + "epoch": 0.55, + "learning_rate": 2.2743741492768647e-05, + "loss": 2.7448, "step": 164600 }, { "epoch": 0.55, - "learning_rate": 2.2734947936497428e-05, - "loss": 2.7592, + "learning_rate": 2.2727182404975674e-05, + "loss": 2.7571, "step": 164700 }, { "epoch": 0.55, - "learning_rate": 2.2718393563659843e-05, - "loss": 2.7628, + "learning_rate": 2.2710623317182704e-05, + "loss": 2.7517, "step": 164800 }, { "epoch": 0.55, - "learning_rate": 2.270183919082226e-05, - "loss": 2.757, + "learning_rate": 2.2694064229389734e-05, + "loss": 2.7324, "step": 164900 }, { "epoch": 0.55, - "learning_rate": 2.268528481798467e-05, - "loss": 2.7624, + "learning_rate": 2.267750514159676e-05, + "loss": 2.7428, "step": 165000 }, { "epoch": 0.55, - "learning_rate": 2.2668730445147087e-05, - "loss": 2.7472, + "learning_rate": 2.2660946053803788e-05, + "loss": 2.7627, "step": 165100 }, { "epoch": 0.55, - "learning_rate": 2.2652176072309503e-05, - "loss": 2.7468, + "learning_rate": 2.2644386966010818e-05, + "loss": 2.7404, "step": 165200 }, { "epoch": 0.55, - "learning_rate": 2.2635621699471918e-05, - "loss": 2.7613, + "learning_rate": 2.262782787821785e-05, + "loss": 2.7404, "step": 165300 }, { "epoch": 0.55, - "learning_rate": 2.2619067326634334e-05, - "loss": 2.7578, + "learning_rate": 2.2611268790424875e-05, + "loss": 2.7552, "step": 165400 }, { "epoch": 0.55, - "learning_rate": 2.2602512953796746e-05, - "loss": 2.7635, + "learning_rate": 2.2594709702631902e-05, + "loss": 2.7461, "step": 165500 }, { "epoch": 0.55, - "learning_rate": 2.258595858095916e-05, - "loss": 2.7544, + "learning_rate": 2.257815061483893e-05, + "loss": 2.7497, "step": 165600 }, { "epoch": 0.55, - "learning_rate": 2.2569404208121574e-05, - "loss": 2.7684, + "learning_rate": 2.256159152704596e-05, + "loss": 2.75, "step": 165700 }, { "epoch": 0.55, - "learning_rate": 2.255284983528399e-05, + "learning_rate": 2.254503243925299e-05, "loss": 2.7509, "step": 165800 }, { "epoch": 0.55, - "learning_rate": 2.2536295462446405e-05, - "loss": 2.7467, + "learning_rate": 2.2528473351460017e-05, + "loss": 2.7442, "step": 165900 }, { "epoch": 0.55, - "learning_rate": 2.251974108960882e-05, - "loss": 2.7633, + "learning_rate": 2.2511914263667044e-05, + "loss": 2.7411, "step": 166000 }, { "epoch": 0.55, - "learning_rate": 2.2503186716771237e-05, - "loss": 2.752, + "learning_rate": 2.249535517587407e-05, + "loss": 2.7603, "step": 166100 }, { "epoch": 0.55, - "learning_rate": 2.248663234393365e-05, - "loss": 2.776, + "learning_rate": 2.24787960880811e-05, + "loss": 2.7477, "step": 166200 }, { "epoch": 0.55, - "learning_rate": 2.2470077971096065e-05, - "loss": 2.758, + "learning_rate": 2.246223700028813e-05, + "loss": 2.742, "step": 166300 }, { "epoch": 0.55, - "learning_rate": 2.245352359825848e-05, - "loss": 2.7593, + "learning_rate": 2.2445677912495158e-05, + "loss": 2.7428, "step": 166400 }, { "epoch": 0.55, - "learning_rate": 2.2436969225420896e-05, - "loss": 2.7446, + "learning_rate": 2.2429118824702185e-05, + "loss": 2.7425, "step": 166500 }, { "epoch": 0.55, - "learning_rate": 2.242041485258331e-05, - "loss": 2.7566, + "learning_rate": 2.2412559736909215e-05, + "loss": 2.7574, "step": 166600 }, { "epoch": 0.55, - "learning_rate": 2.2403860479745724e-05, - "loss": 2.7541, + "learning_rate": 2.2396000649116242e-05, + "loss": 2.7359, "step": 166700 }, { "epoch": 0.55, - "learning_rate": 2.238730610690814e-05, - "loss": 2.7715, + "learning_rate": 2.2379441561323273e-05, + "loss": 2.7462, "step": 166800 }, { "epoch": 0.55, - "learning_rate": 2.2370751734070555e-05, - "loss": 2.7541, + "learning_rate": 2.23628824735303e-05, + "loss": 2.7471, "step": 166900 }, { "epoch": 0.55, - "learning_rate": 2.235419736123297e-05, - "loss": 2.7589, + "learning_rate": 2.2346323385737326e-05, + "loss": 2.7516, "step": 167000 }, { "epoch": 0.55, - "learning_rate": 2.2337642988395386e-05, - "loss": 2.7481, + "learning_rate": 2.2329764297944357e-05, + "loss": 2.7484, "step": 167100 }, { "epoch": 0.55, - "learning_rate": 2.23210886155578e-05, - "loss": 2.7516, + "learning_rate": 2.2313205210151384e-05, + "loss": 2.7419, "step": 167200 }, { "epoch": 0.55, - "learning_rate": 2.2304534242720214e-05, - "loss": 2.7497, + "learning_rate": 2.2296646122358414e-05, + "loss": 2.7483, "step": 167300 }, { "epoch": 0.55, - "learning_rate": 2.228797986988263e-05, - "loss": 2.7477, + "learning_rate": 2.228008703456544e-05, + "loss": 2.7379, "step": 167400 }, { "epoch": 0.55, - "learning_rate": 2.2271425497045046e-05, - "loss": 2.7661, + "learning_rate": 2.226352794677247e-05, + "loss": 2.7434, "step": 167500 }, { - "epoch": 0.55, - "learning_rate": 2.225487112420746e-05, - "loss": 2.758, + "epoch": 0.56, + "learning_rate": 2.2246968858979498e-05, + "loss": 2.7443, "step": 167600 }, { "epoch": 0.56, - "learning_rate": 2.2238316751369877e-05, - "loss": 2.7561, + "learning_rate": 2.2230409771186525e-05, + "loss": 2.7423, "step": 167700 }, { "epoch": 0.56, - "learning_rate": 2.222176237853229e-05, - "loss": 2.7541, + "learning_rate": 2.2213850683393555e-05, + "loss": 2.7478, "step": 167800 }, { "epoch": 0.56, - "learning_rate": 2.2205208005694705e-05, - "loss": 2.7516, + "learning_rate": 2.2197291595600582e-05, + "loss": 2.7371, "step": 167900 }, { "epoch": 0.56, - "learning_rate": 2.218865363285712e-05, - "loss": 2.761, + "learning_rate": 2.2180732507807612e-05, + "loss": 2.7521, "step": 168000 }, { "epoch": 0.56, - "learning_rate": 2.2172099260019536e-05, - "loss": 2.7456, + "learning_rate": 2.216417342001464e-05, + "loss": 2.7481, "step": 168100 }, { "epoch": 0.56, - "learning_rate": 2.2155544887181952e-05, - "loss": 2.7571, + "learning_rate": 2.2147614332221666e-05, + "loss": 2.7661, "step": 168200 }, { "epoch": 0.56, - "learning_rate": 2.2138990514344364e-05, - "loss": 2.76, + "learning_rate": 2.2131055244428697e-05, + "loss": 2.7528, "step": 168300 }, { "epoch": 0.56, - "learning_rate": 2.212243614150678e-05, - "loss": 2.7626, + "learning_rate": 2.2114496156635723e-05, + "loss": 2.7454, "step": 168400 }, { "epoch": 0.56, - "learning_rate": 2.2105881768669195e-05, - "loss": 2.7529, + "learning_rate": 2.2097937068842754e-05, + "loss": 2.7572, "step": 168500 }, { "epoch": 0.56, - "learning_rate": 2.208932739583161e-05, - "loss": 2.772, + "learning_rate": 2.208137798104978e-05, + "loss": 2.7555, "step": 168600 }, { "epoch": 0.56, - "learning_rate": 2.2072773022994027e-05, - "loss": 2.7667, + "learning_rate": 2.2064818893256808e-05, + "loss": 2.7397, "step": 168700 }, { "epoch": 0.56, - "learning_rate": 2.205621865015644e-05, - "loss": 2.7534, + "learning_rate": 2.2048259805463838e-05, + "loss": 2.7472, "step": 168800 }, { "epoch": 0.56, - "learning_rate": 2.2039664277318855e-05, - "loss": 2.7559, + "learning_rate": 2.2031700717670868e-05, + "loss": 2.7525, "step": 168900 }, { "epoch": 0.56, - "learning_rate": 2.202310990448127e-05, - "loss": 2.743, + "learning_rate": 2.2015141629877895e-05, + "loss": 2.7479, "step": 169000 }, { "epoch": 0.56, - "learning_rate": 2.2006555531643686e-05, - "loss": 2.7539, + "learning_rate": 2.1998582542084922e-05, + "loss": 2.7548, "step": 169100 }, { "epoch": 0.56, - "learning_rate": 2.19900011588061e-05, - "loss": 2.7676, + "learning_rate": 2.198202345429195e-05, + "loss": 2.7515, "step": 169200 }, { "epoch": 0.56, - "learning_rate": 2.1973446785968517e-05, - "loss": 2.763, + "learning_rate": 2.196546436649898e-05, + "loss": 2.7598, "step": 169300 }, { "epoch": 0.56, - "learning_rate": 2.195689241313093e-05, - "loss": 2.7507, + "learning_rate": 2.194890527870601e-05, + "loss": 2.7638, "step": 169400 }, { "epoch": 0.56, - "learning_rate": 2.1940338040293345e-05, - "loss": 2.7607, + "learning_rate": 2.1932346190913036e-05, + "loss": 2.7554, "step": 169500 }, { "epoch": 0.56, - "learning_rate": 2.192378366745576e-05, - "loss": 2.7614, + "learning_rate": 2.1915787103120063e-05, + "loss": 2.7554, "step": 169600 }, { "epoch": 0.56, - "learning_rate": 2.1907229294618176e-05, - "loss": 2.7604, + "learning_rate": 2.189922801532709e-05, + "loss": 2.7186, "step": 169700 }, { "epoch": 0.56, - "learning_rate": 2.1890674921780592e-05, - "loss": 2.7641, + "learning_rate": 2.1882668927534124e-05, + "loss": 2.7371, "step": 169800 }, { "epoch": 0.56, - "learning_rate": 2.1874120548943004e-05, - "loss": 2.7523, + "learning_rate": 2.186610983974115e-05, + "loss": 2.7606, "step": 169900 }, { "epoch": 0.56, - "learning_rate": 2.185756617610542e-05, - "loss": 2.751, + "learning_rate": 2.1849550751948178e-05, + "loss": 2.7465, "step": 170000 }, { "epoch": 0.56, - "learning_rate": 2.1841011803267832e-05, - "loss": 2.7579, + "learning_rate": 2.1832991664155205e-05, + "loss": 2.7502, "step": 170100 }, { "epoch": 0.56, - "learning_rate": 2.1824457430430248e-05, - "loss": 2.7561, + "learning_rate": 2.1816432576362235e-05, + "loss": 2.7428, "step": 170200 }, { "epoch": 0.56, - "learning_rate": 2.1807903057592663e-05, - "loss": 2.7606, + "learning_rate": 2.1799873488569265e-05, + "loss": 2.7499, "step": 170300 }, { "epoch": 0.56, - "learning_rate": 2.179134868475508e-05, - "loss": 2.7531, + "learning_rate": 2.1783314400776292e-05, + "loss": 2.7497, "step": 170400 }, { "epoch": 0.56, - "learning_rate": 2.177479431191749e-05, - "loss": 2.752, + "learning_rate": 2.176675531298332e-05, + "loss": 2.7497, "step": 170500 }, { "epoch": 0.56, - "learning_rate": 2.1758239939079907e-05, - "loss": 2.7467, + "learning_rate": 2.1750196225190346e-05, + "loss": 2.7504, "step": 170600 }, { "epoch": 0.57, - "learning_rate": 2.1741685566242323e-05, - "loss": 2.7636, + "learning_rate": 2.1733637137397376e-05, + "loss": 2.7567, "step": 170700 }, { "epoch": 0.57, - "learning_rate": 2.172513119340474e-05, - "loss": 2.7591, + "learning_rate": 2.1717078049604407e-05, + "loss": 2.7605, "step": 170800 }, { "epoch": 0.57, - "learning_rate": 2.1708576820567154e-05, - "loss": 2.7634, + "learning_rate": 2.1700518961811433e-05, + "loss": 2.7443, "step": 170900 }, { "epoch": 0.57, - "learning_rate": 2.169202244772957e-05, - "loss": 2.7558, + "learning_rate": 2.168395987401846e-05, + "loss": 2.7547, "step": 171000 }, { "epoch": 0.57, - "learning_rate": 2.1675468074891982e-05, - "loss": 2.757, + "learning_rate": 2.166740078622549e-05, + "loss": 2.7432, "step": 171100 }, { "epoch": 0.57, - "learning_rate": 2.1658913702054398e-05, - "loss": 2.757, + "learning_rate": 2.1650841698432518e-05, + "loss": 2.7484, "step": 171200 }, { "epoch": 0.57, - "learning_rate": 2.1642359329216813e-05, - "loss": 2.7554, + "learning_rate": 2.1634282610639548e-05, + "loss": 2.7579, "step": 171300 }, { "epoch": 0.57, - "learning_rate": 2.162580495637923e-05, - "loss": 2.7541, + "learning_rate": 2.1617723522846575e-05, + "loss": 2.7375, "step": 171400 }, { "epoch": 0.57, - "learning_rate": 2.1609250583541644e-05, - "loss": 2.7573, + "learning_rate": 2.1601164435053602e-05, + "loss": 2.7608, "step": 171500 }, { "epoch": 0.57, - "learning_rate": 2.1592696210704057e-05, - "loss": 2.7545, + "learning_rate": 2.1584605347260632e-05, + "loss": 2.7453, "step": 171600 }, { "epoch": 0.57, - "learning_rate": 2.1576141837866472e-05, - "loss": 2.757, + "learning_rate": 2.156804625946766e-05, + "loss": 2.747, "step": 171700 }, { "epoch": 0.57, - "learning_rate": 2.1559587465028888e-05, - "loss": 2.7497, + "learning_rate": 2.155148717167469e-05, + "loss": 2.7596, "step": 171800 }, { "epoch": 0.57, - "learning_rate": 2.1543033092191304e-05, - "loss": 2.7546, + "learning_rate": 2.1534928083881716e-05, + "loss": 2.7526, "step": 171900 }, { "epoch": 0.57, - "learning_rate": 2.152647871935372e-05, - "loss": 2.7519, + "learning_rate": 2.1518368996088743e-05, + "loss": 2.7525, "step": 172000 }, { "epoch": 0.57, - "learning_rate": 2.150992434651613e-05, - "loss": 2.7586, + "learning_rate": 2.1501809908295773e-05, + "loss": 2.7486, "step": 172100 }, { "epoch": 0.57, - "learning_rate": 2.1493369973678547e-05, - "loss": 2.7488, + "learning_rate": 2.14852508205028e-05, + "loss": 2.7473, "step": 172200 }, { "epoch": 0.57, - "learning_rate": 2.1476815600840963e-05, - "loss": 2.7607, + "learning_rate": 2.146869173270983e-05, + "loss": 2.7536, "step": 172300 }, { "epoch": 0.57, - "learning_rate": 2.146026122800338e-05, - "loss": 2.7493, + "learning_rate": 2.1452132644916857e-05, + "loss": 2.7546, "step": 172400 }, { "epoch": 0.57, - "learning_rate": 2.1443706855165794e-05, - "loss": 2.7703, + "learning_rate": 2.1435573557123888e-05, + "loss": 2.734, "step": 172500 }, { "epoch": 0.57, - "learning_rate": 2.142715248232821e-05, - "loss": 2.7505, + "learning_rate": 2.1419014469330915e-05, + "loss": 2.7437, "step": 172600 }, { "epoch": 0.57, - "learning_rate": 2.1410598109490622e-05, - "loss": 2.7592, + "learning_rate": 2.140245538153794e-05, + "loss": 2.7471, "step": 172700 }, { "epoch": 0.57, - "learning_rate": 2.1394043736653038e-05, - "loss": 2.7536, + "learning_rate": 2.1385896293744972e-05, + "loss": 2.7469, "step": 172800 }, { "epoch": 0.57, - "learning_rate": 2.1377489363815453e-05, - "loss": 2.7547, + "learning_rate": 2.1369337205952e-05, + "loss": 2.759, "step": 172900 }, { "epoch": 0.57, - "learning_rate": 2.136093499097787e-05, - "loss": 2.7695, + "learning_rate": 2.135277811815903e-05, + "loss": 2.756, "step": 173000 }, { "epoch": 0.57, - "learning_rate": 2.1344380618140285e-05, - "loss": 2.7408, + "learning_rate": 2.1336219030366056e-05, + "loss": 2.745, "step": 173100 }, { "epoch": 0.57, - "learning_rate": 2.1327826245302697e-05, - "loss": 2.7605, + "learning_rate": 2.1319659942573083e-05, + "loss": 2.7487, "step": 173200 }, { "epoch": 0.57, - "learning_rate": 2.1311271872465113e-05, - "loss": 2.7397, + "learning_rate": 2.1303100854780113e-05, + "loss": 2.7487, "step": 173300 }, { "epoch": 0.57, - "learning_rate": 2.1294717499627528e-05, - "loss": 2.7586, + "learning_rate": 2.1286541766987144e-05, + "loss": 2.7374, "step": 173400 }, { "epoch": 0.57, - "learning_rate": 2.1278163126789944e-05, - "loss": 2.7434, + "learning_rate": 2.126998267919417e-05, + "loss": 2.7561, "step": 173500 }, { "epoch": 0.57, - "learning_rate": 2.126160875395236e-05, - "loss": 2.7673, + "learning_rate": 2.1253423591401197e-05, + "loss": 2.752, "step": 173600 }, { "epoch": 0.58, - "learning_rate": 2.1245054381114772e-05, - "loss": 2.7664, + "learning_rate": 2.1236864503608224e-05, + "loss": 2.7373, "step": 173700 }, { "epoch": 0.58, - "learning_rate": 2.1228500008277187e-05, - "loss": 2.7581, + "learning_rate": 2.1220305415815255e-05, + "loss": 2.7426, "step": 173800 }, { "epoch": 0.58, - "learning_rate": 2.1211945635439603e-05, - "loss": 2.752, + "learning_rate": 2.1203746328022285e-05, + "loss": 2.7453, "step": 173900 }, { "epoch": 0.58, - "learning_rate": 2.119539126260202e-05, - "loss": 2.7673, + "learning_rate": 2.1187187240229312e-05, + "loss": 2.7396, "step": 174000 }, { "epoch": 0.58, - "learning_rate": 2.1178836889764434e-05, - "loss": 2.7517, + "learning_rate": 2.117062815243634e-05, + "loss": 2.7657, "step": 174100 }, { "epoch": 0.58, - "learning_rate": 2.116228251692685e-05, - "loss": 2.7555, + "learning_rate": 2.1154069064643366e-05, + "loss": 2.7561, "step": 174200 }, { "epoch": 0.58, - "learning_rate": 2.1145728144089262e-05, - "loss": 2.752, + "learning_rate": 2.1137509976850396e-05, + "loss": 2.7527, "step": 174300 }, { "epoch": 0.58, - "learning_rate": 2.1129173771251678e-05, - "loss": 2.7606, + "learning_rate": 2.1120950889057426e-05, + "loss": 2.7513, "step": 174400 }, { "epoch": 0.58, - "learning_rate": 2.111261939841409e-05, - "loss": 2.7613, + "learning_rate": 2.1104391801264453e-05, + "loss": 2.7367, "step": 174500 }, { "epoch": 0.58, - "learning_rate": 2.1096065025576506e-05, - "loss": 2.7651, + "learning_rate": 2.108783271347148e-05, + "loss": 2.7505, "step": 174600 }, { "epoch": 0.58, - "learning_rate": 2.107951065273892e-05, - "loss": 2.7278, + "learning_rate": 2.107127362567851e-05, + "loss": 2.7495, "step": 174700 }, { "epoch": 0.58, - "learning_rate": 2.1062956279901337e-05, - "loss": 2.76, + "learning_rate": 2.105471453788554e-05, + "loss": 2.7543, "step": 174800 }, { "epoch": 0.58, - "learning_rate": 2.104640190706375e-05, - "loss": 2.7475, + "learning_rate": 2.1038155450092568e-05, + "loss": 2.7465, "step": 174900 }, { "epoch": 0.58, - "learning_rate": 2.1029847534226165e-05, - "loss": 2.7649, + "learning_rate": 2.1021596362299594e-05, + "loss": 2.73, "step": 175000 }, { "epoch": 0.58, - "learning_rate": 2.101329316138858e-05, - "loss": 2.7541, + "learning_rate": 2.100503727450662e-05, + "loss": 2.7408, "step": 175100 }, { "epoch": 0.58, - "learning_rate": 2.0996738788550996e-05, - "loss": 2.7557, + "learning_rate": 2.098847818671365e-05, + "loss": 2.7525, "step": 175200 }, { "epoch": 0.58, - "learning_rate": 2.0980184415713412e-05, - "loss": 2.7733, + "learning_rate": 2.0971919098920682e-05, + "loss": 2.7428, "step": 175300 }, { "epoch": 0.58, - "learning_rate": 2.0963630042875824e-05, - "loss": 2.762, + "learning_rate": 2.095536001112771e-05, + "loss": 2.7372, "step": 175400 }, { "epoch": 0.58, - "learning_rate": 2.094707567003824e-05, - "loss": 2.7556, + "learning_rate": 2.0938800923334736e-05, + "loss": 2.7413, "step": 175500 }, { "epoch": 0.58, - "learning_rate": 2.0930521297200656e-05, - "loss": 2.7524, + "learning_rate": 2.0922241835541763e-05, + "loss": 2.7394, "step": 175600 }, { "epoch": 0.58, - "learning_rate": 2.091396692436307e-05, - "loss": 2.7534, + "learning_rate": 2.0905682747748793e-05, + "loss": 2.7547, "step": 175700 }, { "epoch": 0.58, - "learning_rate": 2.0897412551525487e-05, - "loss": 2.7685, + "learning_rate": 2.0889123659955823e-05, + "loss": 2.7519, "step": 175800 }, { "epoch": 0.58, - "learning_rate": 2.0880858178687903e-05, - "loss": 2.7488, + "learning_rate": 2.087256457216285e-05, + "loss": 2.7428, "step": 175900 }, { "epoch": 0.58, - "learning_rate": 2.0864303805850315e-05, - "loss": 2.7595, + "learning_rate": 2.0856005484369877e-05, + "loss": 2.7628, "step": 176000 }, { "epoch": 0.58, - "learning_rate": 2.084774943301273e-05, - "loss": 2.7443, + "learning_rate": 2.0839446396576907e-05, + "loss": 2.7362, "step": 176100 }, { "epoch": 0.58, - "learning_rate": 2.0831195060175146e-05, - "loss": 2.7476, + "learning_rate": 2.0822887308783934e-05, + "loss": 2.742, "step": 176200 }, { "epoch": 0.58, - "learning_rate": 2.0814640687337562e-05, - "loss": 2.7457, + "learning_rate": 2.0806328220990965e-05, + "loss": 2.7439, "step": 176300 }, { "epoch": 0.58, - "learning_rate": 2.0798086314499977e-05, - "loss": 2.7609, + "learning_rate": 2.078976913319799e-05, + "loss": 2.7554, "step": 176400 }, { "epoch": 0.58, - "learning_rate": 2.078153194166239e-05, - "loss": 2.7496, + "learning_rate": 2.077321004540502e-05, + "loss": 2.7583, "step": 176500 }, { "epoch": 0.58, - "learning_rate": 2.0764977568824805e-05, - "loss": 2.765, + "learning_rate": 2.075665095761205e-05, + "loss": 2.7484, "step": 176600 }, { "epoch": 0.59, - "learning_rate": 2.074842319598722e-05, - "loss": 2.7555, + "learning_rate": 2.0740091869819076e-05, + "loss": 2.7455, "step": 176700 }, { "epoch": 0.59, - "learning_rate": 2.0731868823149637e-05, - "loss": 2.7544, + "learning_rate": 2.0723532782026106e-05, + "loss": 2.7394, "step": 176800 }, { "epoch": 0.59, - "learning_rate": 2.0715314450312052e-05, - "loss": 2.7543, + "learning_rate": 2.0706973694233133e-05, + "loss": 2.7497, "step": 176900 }, { "epoch": 0.59, - "learning_rate": 2.0698760077474465e-05, - "loss": 2.7476, + "learning_rate": 2.0690414606440163e-05, + "loss": 2.7391, "step": 177000 }, { "epoch": 0.59, - "learning_rate": 2.068220570463688e-05, - "loss": 2.7468, + "learning_rate": 2.067385551864719e-05, + "loss": 2.7412, "step": 177100 }, { "epoch": 0.59, - "learning_rate": 2.0665651331799296e-05, - "loss": 2.7524, + "learning_rate": 2.0657296430854217e-05, + "loss": 2.7654, "step": 177200 }, { "epoch": 0.59, - "learning_rate": 2.064909695896171e-05, - "loss": 2.7619, + "learning_rate": 2.0640737343061247e-05, + "loss": 2.7475, "step": 177300 }, { "epoch": 0.59, - "learning_rate": 2.0632542586124127e-05, - "loss": 2.7501, + "learning_rate": 2.0624178255268274e-05, + "loss": 2.7368, "step": 177400 }, { "epoch": 0.59, - "learning_rate": 2.0615988213286543e-05, - "loss": 2.756, + "learning_rate": 2.0607619167475305e-05, + "loss": 2.7449, "step": 177500 }, { "epoch": 0.59, - "learning_rate": 2.0599433840448955e-05, - "loss": 2.7525, + "learning_rate": 2.059106007968233e-05, + "loss": 2.7554, "step": 177600 }, { "epoch": 0.59, - "learning_rate": 2.058287946761137e-05, - "loss": 2.762, + "learning_rate": 2.057450099188936e-05, + "loss": 2.7463, "step": 177700 }, { "epoch": 0.59, - "learning_rate": 2.0566325094773786e-05, - "loss": 2.7364, + "learning_rate": 2.055794190409639e-05, + "loss": 2.7435, "step": 177800 }, { "epoch": 0.59, - "learning_rate": 2.0549770721936202e-05, - "loss": 2.7625, + "learning_rate": 2.0541382816303416e-05, + "loss": 2.749, "step": 177900 }, { "epoch": 0.59, - "learning_rate": 2.0533216349098618e-05, - "loss": 2.7474, + "learning_rate": 2.0524823728510446e-05, + "loss": 2.7578, "step": 178000 }, { "epoch": 0.59, - "learning_rate": 2.051666197626103e-05, - "loss": 2.7602, + "learning_rate": 2.0508264640717473e-05, + "loss": 2.7403, "step": 178100 }, { "epoch": 0.59, - "learning_rate": 2.0500107603423446e-05, - "loss": 2.7514, + "learning_rate": 2.04917055529245e-05, + "loss": 2.7413, "step": 178200 }, { "epoch": 0.59, - "learning_rate": 2.048355323058586e-05, - "loss": 2.7635, + "learning_rate": 2.047514646513153e-05, + "loss": 2.7531, "step": 178300 }, { "epoch": 0.59, - "learning_rate": 2.0466998857748277e-05, - "loss": 2.7356, + "learning_rate": 2.045858737733856e-05, + "loss": 2.7382, "step": 178400 }, { "epoch": 0.59, - "learning_rate": 2.0450444484910692e-05, - "loss": 2.7484, + "learning_rate": 2.0442028289545587e-05, + "loss": 2.7328, "step": 178500 }, { "epoch": 0.59, - "learning_rate": 2.0433890112073105e-05, - "loss": 2.7683, + "learning_rate": 2.0425469201752614e-05, + "loss": 2.7523, "step": 178600 }, { "epoch": 0.59, - "learning_rate": 2.041733573923552e-05, - "loss": 2.7649, + "learning_rate": 2.040891011395964e-05, + "loss": 2.751, "step": 178700 }, { "epoch": 0.59, - "learning_rate": 2.0400781366397936e-05, - "loss": 2.7526, + "learning_rate": 2.039235102616667e-05, + "loss": 2.7621, "step": 178800 }, { "epoch": 0.59, - "learning_rate": 2.0384226993560348e-05, - "loss": 2.7415, + "learning_rate": 2.03757919383737e-05, + "loss": 2.7409, "step": 178900 }, { "epoch": 0.59, - "learning_rate": 2.0367672620722764e-05, - "loss": 2.7505, + "learning_rate": 2.035923285058073e-05, + "loss": 2.7606, "step": 179000 }, { "epoch": 0.59, - "learning_rate": 2.035111824788518e-05, - "loss": 2.7513, + "learning_rate": 2.0342673762787755e-05, + "loss": 2.7597, "step": 179100 }, { "epoch": 0.59, - "learning_rate": 2.0334563875047595e-05, - "loss": 2.7511, + "learning_rate": 2.0326114674994782e-05, + "loss": 2.734, "step": 179200 }, { "epoch": 0.59, - "learning_rate": 2.0318009502210007e-05, - "loss": 2.7472, + "learning_rate": 2.0309555587201816e-05, + "loss": 2.7502, "step": 179300 }, { "epoch": 0.59, - "learning_rate": 2.0301455129372423e-05, - "loss": 2.7635, + "learning_rate": 2.0292996499408843e-05, + "loss": 2.7438, "step": 179400 }, { "epoch": 0.59, - "learning_rate": 2.028490075653484e-05, - "loss": 2.7618, + "learning_rate": 2.027643741161587e-05, + "loss": 2.737, "step": 179500 }, { "epoch": 0.59, - "learning_rate": 2.0268346383697254e-05, - "loss": 2.7403, + "learning_rate": 2.0259878323822897e-05, + "loss": 2.7505, "step": 179600 }, { - "epoch": 0.59, - "learning_rate": 2.025179201085967e-05, - "loss": 2.7572, + "epoch": 0.6, + "learning_rate": 2.0243319236029927e-05, + "loss": 2.7519, "step": 179700 }, { "epoch": 0.6, - "learning_rate": 2.0235237638022082e-05, - "loss": 2.764, + "learning_rate": 2.0226760148236957e-05, + "loss": 2.7409, "step": 179800 }, { "epoch": 0.6, - "learning_rate": 2.0218683265184498e-05, - "loss": 2.7535, + "learning_rate": 2.0210201060443984e-05, + "loss": 2.7325, "step": 179900 }, { "epoch": 0.6, - "learning_rate": 2.0202128892346914e-05, - "loss": 2.7471, + "learning_rate": 2.019364197265101e-05, + "loss": 2.7537, "step": 180000 }, { "epoch": 0.6, - "learning_rate": 2.018557451950933e-05, - "loss": 2.7688, + "learning_rate": 2.0177082884858038e-05, + "loss": 2.7362, "step": 180100 }, { "epoch": 0.6, - "learning_rate": 2.0169020146671745e-05, - "loss": 2.758, + "learning_rate": 2.016052379706507e-05, + "loss": 2.7377, "step": 180200 }, { "epoch": 0.6, - "learning_rate": 2.0152465773834157e-05, - "loss": 2.7657, + "learning_rate": 2.01439647092721e-05, + "loss": 2.7443, "step": 180300 }, { "epoch": 0.6, - "learning_rate": 2.0135911400996573e-05, - "loss": 2.7615, + "learning_rate": 2.0127405621479126e-05, + "loss": 2.7377, "step": 180400 }, { "epoch": 0.6, - "learning_rate": 2.011935702815899e-05, - "loss": 2.7438, + "learning_rate": 2.0110846533686153e-05, + "loss": 2.7517, "step": 180500 }, { "epoch": 0.6, - "learning_rate": 2.0102802655321404e-05, - "loss": 2.7518, + "learning_rate": 2.0094287445893183e-05, + "loss": 2.7443, "step": 180600 }, { "epoch": 0.6, - "learning_rate": 2.008624828248382e-05, - "loss": 2.7412, + "learning_rate": 2.007772835810021e-05, + "loss": 2.7444, "step": 180700 }, { "epoch": 0.6, - "learning_rate": 2.0069693909646235e-05, - "loss": 2.7506, + "learning_rate": 2.006116927030724e-05, + "loss": 2.7496, "step": 180800 }, { "epoch": 0.6, - "learning_rate": 2.0053139536808648e-05, - "loss": 2.7598, + "learning_rate": 2.0044610182514267e-05, + "loss": 2.7418, "step": 180900 }, { "epoch": 0.6, - "learning_rate": 2.0036585163971063e-05, - "loss": 2.7554, + "learning_rate": 2.0028051094721294e-05, + "loss": 2.7402, "step": 181000 }, { "epoch": 0.6, - "learning_rate": 2.002003079113348e-05, - "loss": 2.7589, + "learning_rate": 2.0011492006928324e-05, + "loss": 2.7368, "step": 181100 }, { "epoch": 0.6, - "learning_rate": 2.0003476418295895e-05, - "loss": 2.7644, + "learning_rate": 1.999493291913535e-05, + "loss": 2.7474, "step": 181200 }, { "epoch": 0.6, - "learning_rate": 1.998692204545831e-05, - "loss": 2.7621, + "learning_rate": 1.997837383134238e-05, + "loss": 2.7471, "step": 181300 }, { "epoch": 0.6, - "learning_rate": 1.9970367672620723e-05, - "loss": 2.7566, + "learning_rate": 1.9961814743549408e-05, + "loss": 2.7449, "step": 181400 }, { "epoch": 0.6, - "learning_rate": 1.9953813299783138e-05, - "loss": 2.7602, + "learning_rate": 1.9945255655756435e-05, + "loss": 2.7289, "step": 181500 }, { "epoch": 0.6, - "learning_rate": 1.9937258926945554e-05, - "loss": 2.7508, + "learning_rate": 1.9928696567963465e-05, + "loss": 2.7406, "step": 181600 }, { "epoch": 0.6, - "learning_rate": 1.992070455410797e-05, - "loss": 2.761, + "learning_rate": 1.9912137480170492e-05, + "loss": 2.7669, "step": 181700 }, { "epoch": 0.6, - "learning_rate": 1.9904150181270385e-05, - "loss": 2.7567, + "learning_rate": 1.9895578392377523e-05, + "loss": 2.7532, "step": 181800 }, { "epoch": 0.6, - "learning_rate": 1.9887595808432797e-05, - "loss": 2.7436, + "learning_rate": 1.987901930458455e-05, + "loss": 2.7445, "step": 181900 }, { "epoch": 0.6, - "learning_rate": 1.9871041435595213e-05, - "loss": 2.749, + "learning_rate": 1.986246021679158e-05, + "loss": 2.7673, "step": 182000 }, { "epoch": 0.6, - "learning_rate": 1.985448706275763e-05, - "loss": 2.7616, + "learning_rate": 1.9845901128998607e-05, + "loss": 2.7517, "step": 182100 }, { "epoch": 0.6, - "learning_rate": 1.9837932689920044e-05, - "loss": 2.7596, + "learning_rate": 1.9829342041205634e-05, + "loss": 2.747, "step": 182200 }, { "epoch": 0.6, - "learning_rate": 1.982137831708246e-05, - "loss": 2.7442, + "learning_rate": 1.9812782953412664e-05, + "loss": 2.746, "step": 182300 }, { "epoch": 0.6, - "learning_rate": 1.9804823944244876e-05, - "loss": 2.7532, + "learning_rate": 1.979622386561969e-05, + "loss": 2.7475, "step": 182400 }, { "epoch": 0.6, - "learning_rate": 1.9788269571407288e-05, - "loss": 2.7475, + "learning_rate": 1.977966477782672e-05, + "loss": 2.7567, "step": 182500 }, { "epoch": 0.6, - "learning_rate": 1.9771715198569704e-05, - "loss": 2.7617, + "learning_rate": 1.9763105690033748e-05, + "loss": 2.7468, "step": 182600 }, { - "epoch": 0.6, - "learning_rate": 1.975516082573212e-05, - "loss": 2.7583, + "epoch": 0.61, + "learning_rate": 1.9746546602240775e-05, + "loss": 2.7424, "step": 182700 }, { "epoch": 0.61, - "learning_rate": 1.9738606452894535e-05, - "loss": 2.7614, + "learning_rate": 1.9729987514447805e-05, + "loss": 2.7375, "step": 182800 }, { "epoch": 0.61, - "learning_rate": 1.972205208005695e-05, - "loss": 2.7578, + "learning_rate": 1.9713428426654836e-05, + "loss": 2.74, "step": 182900 }, { "epoch": 0.61, - "learning_rate": 1.9705497707219363e-05, - "loss": 2.7673, + "learning_rate": 1.9696869338861863e-05, + "loss": 2.7503, "step": 183000 }, { "epoch": 0.61, - "learning_rate": 1.968894333438178e-05, - "loss": 2.7617, + "learning_rate": 1.968031025106889e-05, + "loss": 2.7485, "step": 183100 }, { "epoch": 0.61, - "learning_rate": 1.9672388961544194e-05, - "loss": 2.7737, + "learning_rate": 1.9663751163275916e-05, + "loss": 2.7386, "step": 183200 }, { "epoch": 0.61, - "learning_rate": 1.9655834588706606e-05, - "loss": 2.7456, + "learning_rate": 1.9647192075482947e-05, + "loss": 2.73, "step": 183300 }, { "epoch": 0.61, - "learning_rate": 1.9639280215869022e-05, - "loss": 2.7428, + "learning_rate": 1.9630632987689977e-05, + "loss": 2.7399, "step": 183400 }, { "epoch": 0.61, - "learning_rate": 1.9622725843031438e-05, - "loss": 2.7546, + "learning_rate": 1.9614073899897004e-05, + "loss": 2.7399, "step": 183500 }, { "epoch": 0.61, - "learning_rate": 1.960617147019385e-05, - "loss": 2.7498, + "learning_rate": 1.959751481210403e-05, + "loss": 2.7438, "step": 183600 }, { "epoch": 0.61, - "learning_rate": 1.9589617097356266e-05, - "loss": 2.7522, + "learning_rate": 1.9580955724311058e-05, + "loss": 2.7527, "step": 183700 }, { "epoch": 0.61, - "learning_rate": 1.957306272451868e-05, - "loss": 2.7429, + "learning_rate": 1.956439663651809e-05, + "loss": 2.7412, "step": 183800 }, { "epoch": 0.61, - "learning_rate": 1.9556508351681097e-05, - "loss": 2.7488, + "learning_rate": 1.954783754872512e-05, + "loss": 2.7294, "step": 183900 }, { "epoch": 0.61, - "learning_rate": 1.9539953978843512e-05, - "loss": 2.7547, + "learning_rate": 1.9531278460932145e-05, + "loss": 2.7384, "step": 184000 }, { "epoch": 0.61, - "learning_rate": 1.9523399606005928e-05, - "loss": 2.7622, + "learning_rate": 1.9514719373139172e-05, + "loss": 2.7353, "step": 184100 }, { "epoch": 0.61, - "learning_rate": 1.950684523316834e-05, - "loss": 2.7536, + "learning_rate": 1.9498160285346202e-05, + "loss": 2.7428, "step": 184200 }, { "epoch": 0.61, - "learning_rate": 1.9490290860330756e-05, - "loss": 2.7404, + "learning_rate": 1.9481601197553233e-05, + "loss": 2.7493, "step": 184300 }, { "epoch": 0.61, - "learning_rate": 1.9473736487493172e-05, - "loss": 2.7471, + "learning_rate": 1.946504210976026e-05, + "loss": 2.728, "step": 184400 }, { "epoch": 0.61, - "learning_rate": 1.9457182114655587e-05, - "loss": 2.7567, + "learning_rate": 1.9448483021967287e-05, + "loss": 2.7371, "step": 184500 }, { "epoch": 0.61, - "learning_rate": 1.9440627741818003e-05, - "loss": 2.7693, + "learning_rate": 1.9431923934174313e-05, + "loss": 2.7402, "step": 184600 }, { "epoch": 0.61, - "learning_rate": 1.9424073368980415e-05, - "loss": 2.749, + "learning_rate": 1.9415364846381344e-05, + "loss": 2.7451, "step": 184700 }, { "epoch": 0.61, - "learning_rate": 1.940751899614283e-05, - "loss": 2.7446, + "learning_rate": 1.9398805758588374e-05, + "loss": 2.7383, "step": 184800 }, { "epoch": 0.61, - "learning_rate": 1.9390964623305247e-05, - "loss": 2.7683, + "learning_rate": 1.93822466707954e-05, + "loss": 2.7366, "step": 184900 }, { "epoch": 0.61, - "learning_rate": 1.9374410250467662e-05, - "loss": 2.7564, + "learning_rate": 1.9365687583002428e-05, + "loss": 2.7473, "step": 185000 }, { "epoch": 0.61, - "learning_rate": 1.9357855877630078e-05, - "loss": 2.749, + "learning_rate": 1.9349128495209455e-05, + "loss": 2.7424, "step": 185100 }, { "epoch": 0.61, - "learning_rate": 1.934130150479249e-05, - "loss": 2.7415, + "learning_rate": 1.9332569407416485e-05, + "loss": 2.7531, "step": 185200 }, { "epoch": 0.61, - "learning_rate": 1.9324747131954906e-05, - "loss": 2.7559, + "learning_rate": 1.9316010319623515e-05, + "loss": 2.7323, "step": 185300 }, { "epoch": 0.61, - "learning_rate": 1.930819275911732e-05, - "loss": 2.7544, + "learning_rate": 1.9299451231830542e-05, + "loss": 2.7391, "step": 185400 }, { "epoch": 0.61, - "learning_rate": 1.9291638386279737e-05, - "loss": 2.7569, + "learning_rate": 1.928289214403757e-05, + "loss": 2.7523, "step": 185500 }, { "epoch": 0.61, - "learning_rate": 1.9275084013442153e-05, - "loss": 2.758, + "learning_rate": 1.92663330562446e-05, + "loss": 2.7478, "step": 185600 }, { - "epoch": 0.61, - "learning_rate": 1.925852964060457e-05, - "loss": 2.7706, + "epoch": 0.62, + "learning_rate": 1.9249773968451626e-05, + "loss": 2.7492, "step": 185700 }, { "epoch": 0.62, - "learning_rate": 1.924197526776698e-05, - "loss": 2.7515, + "learning_rate": 1.9233214880658657e-05, + "loss": 2.7306, "step": 185800 }, { "epoch": 0.62, - "learning_rate": 1.9225420894929396e-05, - "loss": 2.7634, + "learning_rate": 1.9216655792865684e-05, + "loss": 2.7402, "step": 185900 }, { "epoch": 0.62, - "learning_rate": 1.9208866522091812e-05, - "loss": 2.7438, + "learning_rate": 1.920009670507271e-05, + "loss": 2.7429, "step": 186000 }, { "epoch": 0.62, - "learning_rate": 1.9192312149254228e-05, - "loss": 2.7624, + "learning_rate": 1.918353761727974e-05, + "loss": 2.7388, "step": 186100 }, { "epoch": 0.62, - "learning_rate": 1.9175757776416643e-05, - "loss": 2.751, + "learning_rate": 1.9166978529486768e-05, + "loss": 2.7384, "step": 186200 }, { "epoch": 0.62, - "learning_rate": 1.9159203403579055e-05, - "loss": 2.7779, + "learning_rate": 1.9150419441693798e-05, + "loss": 2.7398, "step": 186300 }, { "epoch": 0.62, - "learning_rate": 1.914264903074147e-05, - "loss": 2.7514, + "learning_rate": 1.9133860353900825e-05, + "loss": 2.7571, "step": 186400 }, { "epoch": 0.62, - "learning_rate": 1.9126094657903887e-05, - "loss": 2.7503, + "learning_rate": 1.9117301266107855e-05, + "loss": 2.7548, "step": 186500 }, { "epoch": 0.62, - "learning_rate": 1.9109540285066302e-05, - "loss": 2.7481, + "learning_rate": 1.9100742178314882e-05, + "loss": 2.7339, "step": 186600 }, { "epoch": 0.62, - "learning_rate": 1.9092985912228718e-05, - "loss": 2.7373, + "learning_rate": 1.908418309052191e-05, + "loss": 2.7359, "step": 186700 }, { "epoch": 0.62, - "learning_rate": 1.907643153939113e-05, - "loss": 2.7518, + "learning_rate": 1.906762400272894e-05, + "loss": 2.7395, "step": 186800 }, { "epoch": 0.62, - "learning_rate": 1.9059877166553546e-05, - "loss": 2.7531, + "learning_rate": 1.9051064914935966e-05, + "loss": 2.7336, "step": 186900 }, { "epoch": 0.62, - "learning_rate": 1.904332279371596e-05, - "loss": 2.7682, + "learning_rate": 1.9034505827142997e-05, + "loss": 2.7336, "step": 187000 }, { "epoch": 0.62, - "learning_rate": 1.9026768420878377e-05, - "loss": 2.7557, + "learning_rate": 1.9017946739350024e-05, + "loss": 2.7447, "step": 187100 }, { "epoch": 0.62, - "learning_rate": 1.9010214048040793e-05, - "loss": 2.7541, + "learning_rate": 1.900138765155705e-05, + "loss": 2.7439, "step": 187200 }, { "epoch": 0.62, - "learning_rate": 1.899365967520321e-05, - "loss": 2.7567, + "learning_rate": 1.898482856376408e-05, + "loss": 2.743, "step": 187300 }, { "epoch": 0.62, - "learning_rate": 1.897710530236562e-05, - "loss": 2.7499, + "learning_rate": 1.896826947597111e-05, + "loss": 2.7444, "step": 187400 }, { "epoch": 0.62, - "learning_rate": 1.8960550929528036e-05, - "loss": 2.7595, + "learning_rate": 1.8951710388178138e-05, + "loss": 2.7332, "step": 187500 }, { "epoch": 0.62, - "learning_rate": 1.8943996556690452e-05, - "loss": 2.7531, + "learning_rate": 1.8935151300385165e-05, + "loss": 2.7369, "step": 187600 }, { "epoch": 0.62, - "learning_rate": 1.8927442183852864e-05, - "loss": 2.7507, + "learning_rate": 1.8918592212592192e-05, + "loss": 2.7413, "step": 187700 }, { "epoch": 0.62, - "learning_rate": 1.891088781101528e-05, - "loss": 2.7307, + "learning_rate": 1.8902033124799222e-05, + "loss": 2.7716, "step": 187800 }, { "epoch": 0.62, - "learning_rate": 1.8894333438177696e-05, - "loss": 2.7465, + "learning_rate": 1.8885474037006252e-05, + "loss": 2.7431, "step": 187900 }, { "epoch": 0.62, - "learning_rate": 1.8877779065340108e-05, - "loss": 2.7383, + "learning_rate": 1.886891494921328e-05, + "loss": 2.7496, "step": 188000 }, { "epoch": 0.62, - "learning_rate": 1.8861224692502524e-05, - "loss": 2.7451, + "learning_rate": 1.8852355861420306e-05, + "loss": 2.7485, "step": 188100 }, { "epoch": 0.62, - "learning_rate": 1.884467031966494e-05, - "loss": 2.7572, + "learning_rate": 1.8835796773627333e-05, + "loss": 2.7476, "step": 188200 }, { "epoch": 0.62, - "learning_rate": 1.8828115946827355e-05, - "loss": 2.7511, + "learning_rate": 1.8819237685834363e-05, + "loss": 2.7482, "step": 188300 }, { "epoch": 0.62, - "learning_rate": 1.881156157398977e-05, - "loss": 2.7476, + "learning_rate": 1.8802678598041394e-05, + "loss": 2.7405, "step": 188400 }, { "epoch": 0.62, - "learning_rate": 1.8795007201152183e-05, - "loss": 2.7497, + "learning_rate": 1.878611951024842e-05, + "loss": 2.7468, "step": 188500 }, { "epoch": 0.62, - "learning_rate": 1.87784528283146e-05, - "loss": 2.7419, + "learning_rate": 1.8769560422455448e-05, + "loss": 2.7531, "step": 188600 }, { "epoch": 0.62, - "learning_rate": 1.8761898455477014e-05, - "loss": 2.7534, + "learning_rate": 1.8753001334662474e-05, + "loss": 2.7557, "step": 188700 }, { "epoch": 0.63, - "learning_rate": 1.874534408263943e-05, - "loss": 2.7565, + "learning_rate": 1.8736442246869508e-05, + "loss": 2.7441, "step": 188800 }, { "epoch": 0.63, - "learning_rate": 1.8728789709801845e-05, - "loss": 2.7543, + "learning_rate": 1.8719883159076535e-05, + "loss": 2.7522, "step": 188900 }, { "epoch": 0.63, - "learning_rate": 1.871223533696426e-05, - "loss": 2.7633, + "learning_rate": 1.8703324071283562e-05, + "loss": 2.744, "step": 189000 }, { "epoch": 0.63, - "learning_rate": 1.8695680964126673e-05, - "loss": 2.7352, + "learning_rate": 1.868676498349059e-05, + "loss": 2.7527, "step": 189100 }, { "epoch": 0.63, - "learning_rate": 1.867912659128909e-05, - "loss": 2.758, + "learning_rate": 1.867020589569762e-05, + "loss": 2.7664, "step": 189200 }, { "epoch": 0.63, - "learning_rate": 1.8662572218451505e-05, - "loss": 2.7523, + "learning_rate": 1.865364680790465e-05, + "loss": 2.7386, "step": 189300 }, { "epoch": 0.63, - "learning_rate": 1.864601784561392e-05, - "loss": 2.7567, + "learning_rate": 1.8637087720111676e-05, + "loss": 2.7297, "step": 189400 }, { "epoch": 0.63, - "learning_rate": 1.8629463472776336e-05, - "loss": 2.7603, + "learning_rate": 1.8620528632318703e-05, + "loss": 2.7398, "step": 189500 }, { "epoch": 0.63, - "learning_rate": 1.8612909099938748e-05, - "loss": 2.7538, + "learning_rate": 1.860396954452573e-05, + "loss": 2.7605, "step": 189600 }, { "epoch": 0.63, - "learning_rate": 1.8596354727101164e-05, - "loss": 2.7485, + "learning_rate": 1.858741045673276e-05, + "loss": 2.749, "step": 189700 }, { "epoch": 0.63, - "learning_rate": 1.857980035426358e-05, - "loss": 2.7578, + "learning_rate": 1.857085136893979e-05, + "loss": 2.7359, "step": 189800 }, { "epoch": 0.63, - "learning_rate": 1.8563245981425995e-05, - "loss": 2.7597, + "learning_rate": 1.8554292281146818e-05, + "loss": 2.737, "step": 189900 }, { "epoch": 0.63, - "learning_rate": 1.854669160858841e-05, - "loss": 2.741, + "learning_rate": 1.8537733193353845e-05, + "loss": 2.7476, "step": 190000 }, { "epoch": 0.63, - "learning_rate": 1.8530137235750823e-05, - "loss": 2.7628, + "learning_rate": 1.8521174105560875e-05, + "loss": 2.7243, "step": 190100 }, { "epoch": 0.63, - "learning_rate": 1.851358286291324e-05, - "loss": 2.7549, + "learning_rate": 1.8504615017767902e-05, + "loss": 2.74, "step": 190200 }, { "epoch": 0.63, - "learning_rate": 1.8497028490075654e-05, - "loss": 2.7525, + "learning_rate": 1.8488055929974932e-05, + "loss": 2.7522, "step": 190300 }, { "epoch": 0.63, - "learning_rate": 1.848047411723807e-05, - "loss": 2.7658, + "learning_rate": 1.847149684218196e-05, + "loss": 2.7543, "step": 190400 }, { "epoch": 0.63, - "learning_rate": 1.8463919744400486e-05, - "loss": 2.7457, + "learning_rate": 1.8454937754388986e-05, + "loss": 2.7374, "step": 190500 }, { "epoch": 0.63, - "learning_rate": 1.84473653715629e-05, - "loss": 2.7454, + "learning_rate": 1.8438378666596016e-05, + "loss": 2.7397, "step": 190600 }, { "epoch": 0.63, - "learning_rate": 1.8430810998725314e-05, - "loss": 2.7433, + "learning_rate": 1.8421819578803043e-05, + "loss": 2.7264, "step": 190700 }, { "epoch": 0.63, - "learning_rate": 1.841425662588773e-05, - "loss": 2.7555, + "learning_rate": 1.8405260491010073e-05, + "loss": 2.7341, "step": 190800 }, { "epoch": 0.63, - "learning_rate": 1.8397702253050145e-05, - "loss": 2.7609, + "learning_rate": 1.83887014032171e-05, + "loss": 2.7382, "step": 190900 }, { "epoch": 0.63, - "learning_rate": 1.838114788021256e-05, - "loss": 2.7525, + "learning_rate": 1.837214231542413e-05, + "loss": 2.7346, "step": 191000 }, { "epoch": 0.63, - "learning_rate": 1.8364593507374976e-05, - "loss": 2.7539, + "learning_rate": 1.8355583227631158e-05, + "loss": 2.7357, "step": 191100 }, { "epoch": 0.63, - "learning_rate": 1.834803913453739e-05, - "loss": 2.7526, + "learning_rate": 1.8339024139838185e-05, + "loss": 2.7492, "step": 191200 }, { "epoch": 0.63, - "learning_rate": 1.8331484761699804e-05, - "loss": 2.7467, + "learning_rate": 1.8322465052045215e-05, + "loss": 2.7492, "step": 191300 }, { "epoch": 0.63, - "learning_rate": 1.831493038886222e-05, - "loss": 2.759, + "learning_rate": 1.8305905964252242e-05, + "loss": 2.7432, "step": 191400 }, { "epoch": 0.63, - "learning_rate": 1.8298376016024635e-05, - "loss": 2.7352, + "learning_rate": 1.8289346876459272e-05, + "loss": 2.7422, "step": 191500 }, { "epoch": 0.63, - "learning_rate": 1.828182164318705e-05, - "loss": 2.7548, + "learning_rate": 1.82727877886663e-05, + "loss": 2.7427, "step": 191600 }, { "epoch": 0.63, - "learning_rate": 1.8265267270349463e-05, - "loss": 2.7471, + "learning_rate": 1.8256228700873326e-05, + "loss": 2.7504, "step": 191700 }, { "epoch": 0.64, - "learning_rate": 1.824871289751188e-05, - "loss": 2.7459, + "learning_rate": 1.8239669613080356e-05, + "loss": 2.7348, "step": 191800 }, { "epoch": 0.64, - "learning_rate": 1.8232158524674295e-05, - "loss": 2.7631, + "learning_rate": 1.8223110525287383e-05, + "loss": 2.7355, "step": 191900 }, { "epoch": 0.64, - "learning_rate": 1.821560415183671e-05, - "loss": 2.7637, + "learning_rate": 1.8206551437494413e-05, + "loss": 2.7447, "step": 192000 }, { "epoch": 0.64, - "learning_rate": 1.8199049778999122e-05, - "loss": 2.7491, + "learning_rate": 1.818999234970144e-05, + "loss": 2.7517, "step": 192100 }, { "epoch": 0.64, - "learning_rate": 1.8182495406161538e-05, - "loss": 2.7466, + "learning_rate": 1.8173433261908467e-05, + "loss": 2.7496, "step": 192200 }, { "epoch": 0.64, - "learning_rate": 1.8165941033323954e-05, - "loss": 2.7564, + "learning_rate": 1.8156874174115497e-05, + "loss": 2.7394, "step": 192300 }, { "epoch": 0.64, - "learning_rate": 1.8149386660486366e-05, - "loss": 2.7306, + "learning_rate": 1.8140315086322528e-05, + "loss": 2.7283, "step": 192400 }, { "epoch": 0.64, - "learning_rate": 1.813283228764878e-05, - "loss": 2.7337, + "learning_rate": 1.8123755998529555e-05, + "loss": 2.7389, "step": 192500 }, { "epoch": 0.64, - "learning_rate": 1.8116277914811197e-05, - "loss": 2.7581, + "learning_rate": 1.810719691073658e-05, + "loss": 2.7368, "step": 192600 }, { "epoch": 0.64, - "learning_rate": 1.8099723541973613e-05, - "loss": 2.7527, + "learning_rate": 1.809063782294361e-05, + "loss": 2.7407, "step": 192700 }, { "epoch": 0.64, - "learning_rate": 1.808316916913603e-05, - "loss": 2.7398, + "learning_rate": 1.807407873515064e-05, + "loss": 2.7288, "step": 192800 }, { "epoch": 0.64, - "learning_rate": 1.806661479629844e-05, - "loss": 2.7567, + "learning_rate": 1.805751964735767e-05, + "loss": 2.7248, "step": 192900 }, { "epoch": 0.64, - "learning_rate": 1.8050060423460857e-05, - "loss": 2.7429, + "learning_rate": 1.8040960559564696e-05, + "loss": 2.7259, "step": 193000 }, { "epoch": 0.64, - "learning_rate": 1.8033506050623272e-05, - "loss": 2.7555, + "learning_rate": 1.8024401471771723e-05, + "loss": 2.7475, "step": 193100 }, { "epoch": 0.64, - "learning_rate": 1.8016951677785688e-05, - "loss": 2.7505, + "learning_rate": 1.800784238397875e-05, + "loss": 2.7297, "step": 193200 }, { "epoch": 0.64, - "learning_rate": 1.8000397304948103e-05, - "loss": 2.7464, + "learning_rate": 1.7991283296185784e-05, + "loss": 2.7498, "step": 193300 }, { "epoch": 0.64, - "learning_rate": 1.7983842932110516e-05, - "loss": 2.7487, + "learning_rate": 1.797472420839281e-05, + "loss": 2.7517, "step": 193400 }, { "epoch": 0.64, - "learning_rate": 1.796728855927293e-05, - "loss": 2.7459, + "learning_rate": 1.7958165120599837e-05, + "loss": 2.7365, "step": 193500 }, { "epoch": 0.64, - "learning_rate": 1.7950734186435347e-05, - "loss": 2.7236, + "learning_rate": 1.7941606032806864e-05, + "loss": 2.7297, "step": 193600 }, { "epoch": 0.64, - "learning_rate": 1.7934179813597763e-05, - "loss": 2.748, + "learning_rate": 1.7925046945013895e-05, + "loss": 2.7415, "step": 193700 }, { "epoch": 0.64, - "learning_rate": 1.791762544076018e-05, - "loss": 2.765, + "learning_rate": 1.7908487857220925e-05, + "loss": 2.7242, "step": 193800 }, { "epoch": 0.64, - "learning_rate": 1.7901071067922594e-05, - "loss": 2.7372, + "learning_rate": 1.7891928769427952e-05, + "loss": 2.7425, "step": 193900 }, { "epoch": 0.64, - "learning_rate": 1.7884516695085006e-05, - "loss": 2.7575, + "learning_rate": 1.787536968163498e-05, + "loss": 2.7562, "step": 194000 }, { "epoch": 0.64, - "learning_rate": 1.7867962322247422e-05, - "loss": 2.7495, + "learning_rate": 1.7858810593842006e-05, + "loss": 2.727, "step": 194100 }, { "epoch": 0.64, - "learning_rate": 1.7851407949409838e-05, - "loss": 2.7625, + "learning_rate": 1.7842251506049036e-05, + "loss": 2.7497, "step": 194200 }, { "epoch": 0.64, - "learning_rate": 1.7834853576572253e-05, - "loss": 2.7678, + "learning_rate": 1.7825692418256066e-05, + "loss": 2.7484, "step": 194300 }, { "epoch": 0.64, - "learning_rate": 1.781829920373467e-05, - "loss": 2.7367, + "learning_rate": 1.7809133330463093e-05, + "loss": 2.7343, "step": 194400 }, { "epoch": 0.64, - "learning_rate": 1.780174483089708e-05, - "loss": 2.7481, + "learning_rate": 1.779257424267012e-05, + "loss": 2.7723, "step": 194500 }, { "epoch": 0.64, - "learning_rate": 1.7785190458059497e-05, - "loss": 2.7463, + "learning_rate": 1.777601515487715e-05, + "loss": 2.7399, "step": 194600 }, { "epoch": 0.64, - "learning_rate": 1.7768636085221912e-05, - "loss": 2.7601, + "learning_rate": 1.7759456067084177e-05, + "loss": 2.7376, "step": 194700 }, { - "epoch": 0.64, - "learning_rate": 1.7752081712384328e-05, - "loss": 2.7463, + "epoch": 0.65, + "learning_rate": 1.7742896979291208e-05, + "loss": 2.7485, "step": 194800 }, { "epoch": 0.65, - "learning_rate": 1.7735527339546744e-05, - "loss": 2.7541, + "learning_rate": 1.7726337891498234e-05, + "loss": 2.7326, "step": 194900 }, { "epoch": 0.65, - "learning_rate": 1.7718972966709156e-05, - "loss": 2.7587, + "learning_rate": 1.770977880370526e-05, + "loss": 2.7403, "step": 195000 }, { "epoch": 0.65, - "learning_rate": 1.770241859387157e-05, - "loss": 2.7448, + "learning_rate": 1.769321971591229e-05, + "loss": 2.7306, "step": 195100 }, { "epoch": 0.65, - "learning_rate": 1.7685864221033987e-05, - "loss": 2.742, + "learning_rate": 1.767666062811932e-05, + "loss": 2.7347, "step": 195200 }, { "epoch": 0.65, - "learning_rate": 1.7669309848196403e-05, - "loss": 2.7432, + "learning_rate": 1.766010154032635e-05, + "loss": 2.7404, "step": 195300 }, { "epoch": 0.65, - "learning_rate": 1.765275547535882e-05, - "loss": 2.7405, + "learning_rate": 1.7643542452533376e-05, + "loss": 2.7213, "step": 195400 }, { "epoch": 0.65, - "learning_rate": 1.7636201102521234e-05, - "loss": 2.7456, + "learning_rate": 1.7626983364740403e-05, + "loss": 2.7374, "step": 195500 }, { "epoch": 0.65, - "learning_rate": 1.7619646729683646e-05, - "loss": 2.7538, + "learning_rate": 1.7610424276947433e-05, + "loss": 2.7227, "step": 195600 }, { "epoch": 0.65, - "learning_rate": 1.7603092356846062e-05, - "loss": 2.7466, + "learning_rate": 1.759386518915446e-05, + "loss": 2.7226, "step": 195700 }, { "epoch": 0.65, - "learning_rate": 1.7586537984008478e-05, - "loss": 2.7524, + "learning_rate": 1.757730610136149e-05, + "loss": 2.7351, "step": 195800 }, { "epoch": 0.65, - "learning_rate": 1.7569983611170893e-05, - "loss": 2.754, + "learning_rate": 1.7560747013568517e-05, + "loss": 2.7491, "step": 195900 }, { "epoch": 0.65, - "learning_rate": 1.755342923833331e-05, - "loss": 2.7473, + "learning_rate": 1.7544187925775547e-05, + "loss": 2.7374, "step": 196000 }, { "epoch": 0.65, - "learning_rate": 1.753687486549572e-05, - "loss": 2.7467, + "learning_rate": 1.7527628837982574e-05, + "loss": 2.7398, "step": 196100 }, { "epoch": 0.65, - "learning_rate": 1.7520320492658137e-05, - "loss": 2.747, + "learning_rate": 1.75110697501896e-05, + "loss": 2.7369, "step": 196200 }, { "epoch": 0.65, - "learning_rate": 1.7503766119820553e-05, - "loss": 2.7498, + "learning_rate": 1.749451066239663e-05, + "loss": 2.7385, "step": 196300 }, { "epoch": 0.65, - "learning_rate": 1.7487211746982968e-05, - "loss": 2.7498, + "learning_rate": 1.747795157460366e-05, + "loss": 2.7371, "step": 196400 }, { "epoch": 0.65, - "learning_rate": 1.747065737414538e-05, - "loss": 2.7677, + "learning_rate": 1.746139248681069e-05, + "loss": 2.7375, "step": 196500 }, { "epoch": 0.65, - "learning_rate": 1.7454103001307796e-05, - "loss": 2.7308, + "learning_rate": 1.7444833399017716e-05, + "loss": 2.7333, "step": 196600 }, { "epoch": 0.65, - "learning_rate": 1.743754862847021e-05, - "loss": 2.7521, + "learning_rate": 1.7428274311224743e-05, + "loss": 2.7297, "step": 196700 }, { "epoch": 0.65, - "learning_rate": 1.7420994255632624e-05, - "loss": 2.7413, + "learning_rate": 1.7411715223431773e-05, + "loss": 2.7652, "step": 196800 }, { "epoch": 0.65, - "learning_rate": 1.740443988279504e-05, - "loss": 2.7473, + "learning_rate": 1.7395156135638803e-05, + "loss": 2.7275, "step": 196900 }, { "epoch": 0.65, - "learning_rate": 1.7387885509957455e-05, - "loss": 2.7511, + "learning_rate": 1.737859704784583e-05, + "loss": 2.7314, "step": 197000 }, { "epoch": 0.65, - "learning_rate": 1.737133113711987e-05, - "loss": 2.7548, + "learning_rate": 1.7362037960052857e-05, + "loss": 2.7405, "step": 197100 }, { "epoch": 0.65, - "learning_rate": 1.7354776764282287e-05, - "loss": 2.7597, + "learning_rate": 1.7345478872259884e-05, + "loss": 2.7291, "step": 197200 }, { "epoch": 0.65, - "learning_rate": 1.73382223914447e-05, - "loss": 2.7376, + "learning_rate": 1.7328919784466914e-05, + "loss": 2.7445, "step": 197300 }, { "epoch": 0.65, - "learning_rate": 1.7321668018607115e-05, - "loss": 2.7557, + "learning_rate": 1.7312360696673944e-05, + "loss": 2.7509, "step": 197400 }, { "epoch": 0.65, - "learning_rate": 1.730511364576953e-05, - "loss": 2.7479, + "learning_rate": 1.729580160888097e-05, + "loss": 2.7429, "step": 197500 }, { "epoch": 0.65, - "learning_rate": 1.7288559272931946e-05, - "loss": 2.7596, + "learning_rate": 1.7279242521088e-05, + "loss": 2.7431, "step": 197600 }, { "epoch": 0.65, - "learning_rate": 1.727200490009436e-05, - "loss": 2.7483, + "learning_rate": 1.7262683433295025e-05, + "loss": 2.7473, "step": 197700 }, { - "epoch": 0.65, - "learning_rate": 1.7255450527256774e-05, - "loss": 2.7583, + "epoch": 0.66, + "learning_rate": 1.7246124345502056e-05, + "loss": 2.7478, "step": 197800 }, { "epoch": 0.66, - "learning_rate": 1.723889615441919e-05, - "loss": 2.7377, + "learning_rate": 1.7229565257709086e-05, + "loss": 2.7408, "step": 197900 }, { "epoch": 0.66, - "learning_rate": 1.7222341781581605e-05, - "loss": 2.7514, + "learning_rate": 1.7213006169916113e-05, + "loss": 2.7434, "step": 198000 }, { "epoch": 0.66, - "learning_rate": 1.720578740874402e-05, - "loss": 2.7455, + "learning_rate": 1.719644708212314e-05, + "loss": 2.7364, "step": 198100 }, { "epoch": 0.66, - "learning_rate": 1.7189233035906436e-05, - "loss": 2.7546, + "learning_rate": 1.717988799433017e-05, + "loss": 2.7433, "step": 198200 }, { "epoch": 0.66, - "learning_rate": 1.717267866306885e-05, - "loss": 2.7573, + "learning_rate": 1.71633289065372e-05, + "loss": 2.7405, "step": 198300 }, { "epoch": 0.66, - "learning_rate": 1.7156124290231264e-05, - "loss": 2.7514, + "learning_rate": 1.7146769818744227e-05, + "loss": 2.7361, "step": 198400 }, { "epoch": 0.66, - "learning_rate": 1.713956991739368e-05, - "loss": 2.7394, + "learning_rate": 1.7130210730951254e-05, + "loss": 2.7447, "step": 198500 }, { "epoch": 0.66, - "learning_rate": 1.7123015544556096e-05, - "loss": 2.7632, + "learning_rate": 1.711365164315828e-05, + "loss": 2.7294, "step": 198600 }, { "epoch": 0.66, - "learning_rate": 1.710646117171851e-05, - "loss": 2.7399, + "learning_rate": 1.709709255536531e-05, + "loss": 2.731, "step": 198700 }, { "epoch": 0.66, - "learning_rate": 1.7089906798880927e-05, - "loss": 2.7694, + "learning_rate": 1.708053346757234e-05, + "loss": 2.7371, "step": 198800 }, { "epoch": 0.66, - "learning_rate": 1.707335242604334e-05, - "loss": 2.7415, + "learning_rate": 1.706397437977937e-05, + "loss": 2.7419, "step": 198900 }, { "epoch": 0.66, - "learning_rate": 1.7056798053205755e-05, - "loss": 2.7398, + "learning_rate": 1.7047415291986395e-05, + "loss": 2.749, "step": 199000 }, { "epoch": 0.66, - "learning_rate": 1.704024368036817e-05, - "loss": 2.7443, + "learning_rate": 1.7030856204193422e-05, + "loss": 2.7477, "step": 199100 }, { "epoch": 0.66, - "learning_rate": 1.7023689307530586e-05, - "loss": 2.7468, + "learning_rate": 1.7014297116400453e-05, + "loss": 2.7424, "step": 199200 }, { "epoch": 0.66, - "learning_rate": 1.7007134934693002e-05, - "loss": 2.7511, + "learning_rate": 1.6997738028607483e-05, + "loss": 2.7354, "step": 199300 }, { "epoch": 0.66, - "learning_rate": 1.6990580561855414e-05, - "loss": 2.7378, + "learning_rate": 1.698117894081451e-05, + "loss": 2.7491, "step": 199400 }, { "epoch": 0.66, - "learning_rate": 1.697402618901783e-05, - "loss": 2.749, + "learning_rate": 1.6964619853021537e-05, + "loss": 2.751, "step": 199500 }, { "epoch": 0.66, - "learning_rate": 1.6957471816180245e-05, - "loss": 2.7451, + "learning_rate": 1.6948060765228567e-05, + "loss": 2.7344, "step": 199600 }, { "epoch": 0.66, - "learning_rate": 1.694091744334266e-05, - "loss": 2.7444, + "learning_rate": 1.6931501677435594e-05, + "loss": 2.7399, "step": 199700 }, { "epoch": 0.66, - "learning_rate": 1.6924363070505077e-05, - "loss": 2.7568, + "learning_rate": 1.6914942589642624e-05, + "loss": 2.7261, "step": 199800 }, { "epoch": 0.66, - "learning_rate": 1.690780869766749e-05, - "loss": 2.7527, + "learning_rate": 1.689838350184965e-05, + "loss": 2.7282, "step": 199900 }, { "epoch": 0.66, - "learning_rate": 1.6891254324829905e-05, - "loss": 2.759, + "learning_rate": 1.6881824414056678e-05, + "loss": 2.7567, "step": 200000 }, { "epoch": 0.66, - "learning_rate": 1.687469995199232e-05, - "loss": 2.7563, + "learning_rate": 1.686526532626371e-05, + "loss": 2.7304, "step": 200100 }, { "epoch": 0.66, - "learning_rate": 1.6858145579154736e-05, - "loss": 2.7435, + "learning_rate": 1.6848706238470735e-05, + "loss": 2.7329, "step": 200200 }, { "epoch": 0.66, - "learning_rate": 1.684159120631715e-05, - "loss": 2.7459, + "learning_rate": 1.6832147150677766e-05, + "loss": 2.7499, "step": 200300 }, { "epoch": 0.66, - "learning_rate": 1.6825036833479567e-05, - "loss": 2.7453, + "learning_rate": 1.6815588062884792e-05, + "loss": 2.7355, "step": 200400 }, { "epoch": 0.66, - "learning_rate": 1.680848246064198e-05, - "loss": 2.7501, + "learning_rate": 1.6799028975091823e-05, + "loss": 2.7495, "step": 200500 }, { "epoch": 0.66, - "learning_rate": 1.6791928087804395e-05, - "loss": 2.7485, + "learning_rate": 1.678246988729885e-05, + "loss": 2.7438, "step": 200600 }, { "epoch": 0.66, - "learning_rate": 1.677537371496681e-05, - "loss": 2.7446, + "learning_rate": 1.6765910799505877e-05, + "loss": 2.7387, "step": 200700 }, { - "epoch": 0.66, - "learning_rate": 1.6758819342129226e-05, - "loss": 2.7659, + "epoch": 0.67, + "learning_rate": 1.6749351711712907e-05, + "loss": 2.7485, "step": 200800 }, { "epoch": 0.67, - "learning_rate": 1.6742264969291642e-05, - "loss": 2.7559, + "learning_rate": 1.6732792623919934e-05, + "loss": 2.7428, "step": 200900 }, { "epoch": 0.67, - "learning_rate": 1.6725710596454054e-05, - "loss": 2.7515, + "learning_rate": 1.6716233536126964e-05, + "loss": 2.7364, "step": 201000 }, { "epoch": 0.67, - "learning_rate": 1.6709156223616467e-05, - "loss": 2.7527, + "learning_rate": 1.669967444833399e-05, + "loss": 2.7413, "step": 201100 }, { "epoch": 0.67, - "learning_rate": 1.6692601850778882e-05, - "loss": 2.7458, + "learning_rate": 1.6683115360541018e-05, + "loss": 2.7507, "step": 201200 }, { "epoch": 0.67, - "learning_rate": 1.6676047477941298e-05, - "loss": 2.7527, + "learning_rate": 1.6666556272748048e-05, + "loss": 2.713, "step": 201300 }, { "epoch": 0.67, - "learning_rate": 1.6659493105103713e-05, - "loss": 2.7563, + "learning_rate": 1.6649997184955075e-05, + "loss": 2.7451, "step": 201400 }, { "epoch": 0.67, - "learning_rate": 1.664293873226613e-05, - "loss": 2.7574, + "learning_rate": 1.6633438097162105e-05, + "loss": 2.7487, "step": 201500 }, { "epoch": 0.67, - "learning_rate": 1.662638435942854e-05, - "loss": 2.7438, + "learning_rate": 1.6616879009369132e-05, + "loss": 2.7377, "step": 201600 }, { "epoch": 0.67, - "learning_rate": 1.6609829986590957e-05, - "loss": 2.7436, + "learning_rate": 1.660031992157616e-05, + "loss": 2.7419, "step": 201700 }, { "epoch": 0.67, - "learning_rate": 1.6593275613753373e-05, - "loss": 2.7473, + "learning_rate": 1.658376083378319e-05, + "loss": 2.7301, "step": 201800 }, { "epoch": 0.67, - "learning_rate": 1.657672124091579e-05, - "loss": 2.7459, + "learning_rate": 1.656720174599022e-05, + "loss": 2.7458, "step": 201900 }, { "epoch": 0.67, - "learning_rate": 1.6560166868078204e-05, - "loss": 2.7338, + "learning_rate": 1.6550642658197247e-05, + "loss": 2.7329, "step": 202000 }, { "epoch": 0.67, - "learning_rate": 1.654361249524062e-05, - "loss": 2.761, + "learning_rate": 1.6534083570404274e-05, + "loss": 2.7547, "step": 202100 }, { "epoch": 0.67, - "learning_rate": 1.6527058122403032e-05, - "loss": 2.7412, + "learning_rate": 1.65175244826113e-05, + "loss": 2.7139, "step": 202200 }, { "epoch": 0.67, - "learning_rate": 1.6510503749565448e-05, - "loss": 2.7504, + "learning_rate": 1.650096539481833e-05, + "loss": 2.7398, "step": 202300 }, { "epoch": 0.67, - "learning_rate": 1.6493949376727863e-05, - "loss": 2.759, + "learning_rate": 1.648440630702536e-05, + "loss": 2.7443, "step": 202400 }, { "epoch": 0.67, - "learning_rate": 1.647739500389028e-05, - "loss": 2.7497, + "learning_rate": 1.6467847219232388e-05, + "loss": 2.7437, "step": 202500 }, { "epoch": 0.67, - "learning_rate": 1.6460840631052694e-05, - "loss": 2.7438, + "learning_rate": 1.6451288131439415e-05, + "loss": 2.7371, "step": 202600 }, { "epoch": 0.67, - "learning_rate": 1.6444286258215107e-05, - "loss": 2.7664, + "learning_rate": 1.6434729043646442e-05, + "loss": 2.7587, "step": 202700 }, { "epoch": 0.67, - "learning_rate": 1.6427731885377522e-05, - "loss": 2.7446, + "learning_rate": 1.6418169955853476e-05, + "loss": 2.7479, "step": 202800 }, { "epoch": 0.67, - "learning_rate": 1.6411177512539938e-05, - "loss": 2.7489, + "learning_rate": 1.6401610868060503e-05, + "loss": 2.738, "step": 202900 }, { "epoch": 0.67, - "learning_rate": 1.6394623139702354e-05, - "loss": 2.7471, + "learning_rate": 1.638505178026753e-05, + "loss": 2.7384, "step": 203000 }, { "epoch": 0.67, - "learning_rate": 1.637806876686477e-05, - "loss": 2.7554, + "learning_rate": 1.6368492692474556e-05, + "loss": 2.7542, "step": 203100 }, { "epoch": 0.67, - "learning_rate": 1.636151439402718e-05, - "loss": 2.752, + "learning_rate": 1.6351933604681587e-05, + "loss": 2.7438, "step": 203200 }, { "epoch": 0.67, - "learning_rate": 1.6344960021189597e-05, - "loss": 2.747, + "learning_rate": 1.6335374516888617e-05, + "loss": 2.74, "step": 203300 }, { "epoch": 0.67, - "learning_rate": 1.6328405648352013e-05, - "loss": 2.7559, + "learning_rate": 1.6318815429095644e-05, + "loss": 2.7262, "step": 203400 }, { "epoch": 0.67, - "learning_rate": 1.631185127551443e-05, - "loss": 2.7415, + "learning_rate": 1.630225634130267e-05, + "loss": 2.7397, "step": 203500 }, { "epoch": 0.67, - "learning_rate": 1.6295296902676844e-05, - "loss": 2.7489, + "learning_rate": 1.6285697253509698e-05, + "loss": 2.7411, "step": 203600 }, { "epoch": 0.67, - "learning_rate": 1.627874252983926e-05, + "learning_rate": 1.6269138165716728e-05, "loss": 2.7475, "step": 203700 }, { "epoch": 0.67, - "learning_rate": 1.6262188157001672e-05, - "loss": 2.7533, + "learning_rate": 1.625257907792376e-05, + "loss": 2.7394, "step": 203800 }, { "epoch": 0.68, - "learning_rate": 1.6245633784164088e-05, - "loss": 2.7485, + "learning_rate": 1.6236019990130785e-05, + "loss": 2.727, "step": 203900 }, { "epoch": 0.68, - "learning_rate": 1.6229079411326503e-05, - "loss": 2.7485, + "learning_rate": 1.6219460902337812e-05, + "loss": 2.7518, "step": 204000 }, { "epoch": 0.68, - "learning_rate": 1.621252503848892e-05, - "loss": 2.7488, + "learning_rate": 1.6202901814544842e-05, + "loss": 2.7461, "step": 204100 }, { "epoch": 0.68, - "learning_rate": 1.6195970665651335e-05, - "loss": 2.7612, + "learning_rate": 1.618634272675187e-05, + "loss": 2.734, "step": 204200 }, { "epoch": 0.68, - "learning_rate": 1.6179416292813747e-05, - "loss": 2.7536, + "learning_rate": 1.61697836389589e-05, + "loss": 2.7206, "step": 204300 }, { "epoch": 0.68, - "learning_rate": 1.6162861919976163e-05, - "loss": 2.7531, + "learning_rate": 1.6153224551165927e-05, + "loss": 2.762, "step": 204400 }, { "epoch": 0.68, - "learning_rate": 1.6146307547138578e-05, - "loss": 2.7504, + "learning_rate": 1.6136665463372953e-05, + "loss": 2.7351, "step": 204500 }, { "epoch": 0.68, - "learning_rate": 1.6129753174300994e-05, - "loss": 2.7473, + "learning_rate": 1.6120106375579984e-05, + "loss": 2.7437, "step": 204600 }, { "epoch": 0.68, - "learning_rate": 1.611319880146341e-05, - "loss": 2.7492, + "learning_rate": 1.610354728778701e-05, + "loss": 2.7519, "step": 204700 }, { "epoch": 0.68, - "learning_rate": 1.6096644428625822e-05, + "learning_rate": 1.608698819999404e-05, "loss": 2.7358, "step": 204800 }, { "epoch": 0.68, - "learning_rate": 1.6080090055788237e-05, - "loss": 2.7365, + "learning_rate": 1.6070429112201068e-05, + "loss": 2.7435, "step": 204900 }, { "epoch": 0.68, - "learning_rate": 1.6063535682950653e-05, - "loss": 2.7672, + "learning_rate": 1.6053870024408095e-05, + "loss": 2.7644, "step": 205000 }, { "epoch": 0.68, - "learning_rate": 1.604698131011307e-05, - "loss": 2.7342, + "learning_rate": 1.6037310936615125e-05, + "loss": 2.7336, "step": 205100 }, { "epoch": 0.68, - "learning_rate": 1.6030426937275484e-05, - "loss": 2.742, + "learning_rate": 1.6020751848822152e-05, + "loss": 2.7355, "step": 205200 }, { "epoch": 0.68, - "learning_rate": 1.60138725644379e-05, - "loss": 2.7537, + "learning_rate": 1.6004192761029182e-05, + "loss": 2.7354, "step": 205300 }, { "epoch": 0.68, - "learning_rate": 1.5997318191600312e-05, - "loss": 2.7555, + "learning_rate": 1.598763367323621e-05, + "loss": 2.7431, "step": 205400 }, { "epoch": 0.68, - "learning_rate": 1.5980763818762725e-05, - "loss": 2.7458, + "learning_rate": 1.597107458544324e-05, + "loss": 2.7423, "step": 205500 }, { "epoch": 0.68, - "learning_rate": 1.596420944592514e-05, - "loss": 2.7538, + "learning_rate": 1.5954515497650266e-05, + "loss": 2.7402, "step": 205600 }, { "epoch": 0.68, - "learning_rate": 1.5947655073087556e-05, - "loss": 2.7593, + "learning_rate": 1.5937956409857293e-05, + "loss": 2.7426, "step": 205700 }, { "epoch": 0.68, - "learning_rate": 1.593110070024997e-05, - "loss": 2.7588, + "learning_rate": 1.5921397322064324e-05, + "loss": 2.7303, "step": 205800 }, { "epoch": 0.68, - "learning_rate": 1.5914546327412387e-05, - "loss": 2.7378, + "learning_rate": 1.590483823427135e-05, + "loss": 2.7449, "step": 205900 }, { "epoch": 0.68, - "learning_rate": 1.58979919545748e-05, - "loss": 2.7615, + "learning_rate": 1.588827914647838e-05, + "loss": 2.7423, "step": 206000 }, { "epoch": 0.68, - "learning_rate": 1.5881437581737215e-05, - "loss": 2.7508, + "learning_rate": 1.5871720058685408e-05, + "loss": 2.7294, "step": 206100 }, { "epoch": 0.68, - "learning_rate": 1.586488320889963e-05, - "loss": 2.7431, + "learning_rate": 1.5855160970892435e-05, + "loss": 2.7421, "step": 206200 }, { "epoch": 0.68, - "learning_rate": 1.5848328836062046e-05, - "loss": 2.7467, + "learning_rate": 1.5838601883099465e-05, + "loss": 2.7409, "step": 206300 }, { "epoch": 0.68, - "learning_rate": 1.5831774463224462e-05, - "loss": 2.7561, + "learning_rate": 1.5822042795306495e-05, + "loss": 2.7481, "step": 206400 }, { "epoch": 0.68, - "learning_rate": 1.5815220090386874e-05, - "loss": 2.7546, + "learning_rate": 1.5805483707513522e-05, + "loss": 2.7521, "step": 206500 }, { "epoch": 0.68, - "learning_rate": 1.579866571754929e-05, - "loss": 2.7325, + "learning_rate": 1.578892461972055e-05, + "loss": 2.7391, "step": 206600 }, { "epoch": 0.68, - "learning_rate": 1.5782111344711706e-05, - "loss": 2.7487, + "learning_rate": 1.5772365531927576e-05, + "loss": 2.7332, "step": 206700 }, { "epoch": 0.68, - "learning_rate": 1.576555697187412e-05, - "loss": 2.747, + "learning_rate": 1.5755806444134606e-05, + "loss": 2.7301, "step": 206800 }, { "epoch": 0.69, - "learning_rate": 1.5749002599036537e-05, - "loss": 2.7418, + "learning_rate": 1.5739247356341637e-05, + "loss": 2.7335, "step": 206900 }, { "epoch": 0.69, - "learning_rate": 1.5732448226198953e-05, - "loss": 2.7484, + "learning_rate": 1.5722688268548664e-05, + "loss": 2.7397, "step": 207000 }, { "epoch": 0.69, - "learning_rate": 1.5715893853361365e-05, - "loss": 2.7646, + "learning_rate": 1.570612918075569e-05, + "loss": 2.7328, "step": 207100 }, { "epoch": 0.69, - "learning_rate": 1.569933948052378e-05, - "loss": 2.7525, + "learning_rate": 1.5689570092962717e-05, + "loss": 2.745, "step": 207200 }, { "epoch": 0.69, - "learning_rate": 1.5682785107686196e-05, - "loss": 2.7397, + "learning_rate": 1.5673011005169748e-05, + "loss": 2.7366, "step": 207300 }, { "epoch": 0.69, - "learning_rate": 1.5666230734848612e-05, - "loss": 2.753, + "learning_rate": 1.5656451917376778e-05, + "loss": 2.7424, "step": 207400 }, { "epoch": 0.69, - "learning_rate": 1.5649676362011027e-05, - "loss": 2.7396, + "learning_rate": 1.5639892829583805e-05, + "loss": 2.7485, "step": 207500 }, { "epoch": 0.69, - "learning_rate": 1.563312198917344e-05, - "loss": 2.7388, + "learning_rate": 1.5623333741790832e-05, + "loss": 2.7466, "step": 207600 }, { "epoch": 0.69, - "learning_rate": 1.5616567616335855e-05, - "loss": 2.7366, + "learning_rate": 1.5606774653997862e-05, + "loss": 2.7467, "step": 207700 }, { "epoch": 0.69, - "learning_rate": 1.560001324349827e-05, - "loss": 2.7487, + "learning_rate": 1.5590215566204892e-05, + "loss": 2.7453, "step": 207800 }, { "epoch": 0.69, - "learning_rate": 1.5583458870660687e-05, - "loss": 2.7411, + "learning_rate": 1.557365647841192e-05, + "loss": 2.7395, "step": 207900 }, { "epoch": 0.69, - "learning_rate": 1.5566904497823102e-05, - "loss": 2.7539, + "learning_rate": 1.5557097390618946e-05, + "loss": 2.748, "step": 208000 }, { "epoch": 0.69, - "learning_rate": 1.5550350124985515e-05, - "loss": 2.7649, + "learning_rate": 1.5540538302825973e-05, + "loss": 2.7507, "step": 208100 }, { "epoch": 0.69, - "learning_rate": 1.553379575214793e-05, - "loss": 2.7524, + "learning_rate": 1.5523979215033003e-05, + "loss": 2.7468, "step": 208200 }, { "epoch": 0.69, - "learning_rate": 1.5517241379310346e-05, - "loss": 2.7527, + "learning_rate": 1.5507420127240034e-05, + "loss": 2.7397, "step": 208300 }, { "epoch": 0.69, - "learning_rate": 1.550068700647276e-05, - "loss": 2.7479, + "learning_rate": 1.549086103944706e-05, + "loss": 2.7297, "step": 208400 }, { "epoch": 0.69, - "learning_rate": 1.5484132633635177e-05, - "loss": 2.7526, + "learning_rate": 1.5474301951654088e-05, + "loss": 2.7447, "step": 208500 }, { "epoch": 0.69, - "learning_rate": 1.5467578260797593e-05, - "loss": 2.7502, + "learning_rate": 1.5457742863861114e-05, + "loss": 2.7362, "step": 208600 }, { "epoch": 0.69, - "learning_rate": 1.5451023887960005e-05, - "loss": 2.7517, + "learning_rate": 1.5441183776068145e-05, + "loss": 2.7343, "step": 208700 }, { "epoch": 0.69, - "learning_rate": 1.543446951512242e-05, - "loss": 2.7367, + "learning_rate": 1.5424624688275175e-05, + "loss": 2.7268, "step": 208800 }, { "epoch": 0.69, - "learning_rate": 1.5417915142284836e-05, - "loss": 2.7555, + "learning_rate": 1.5408065600482202e-05, + "loss": 2.7216, "step": 208900 }, { "epoch": 0.69, - "learning_rate": 1.5401360769447252e-05, - "loss": 2.7452, + "learning_rate": 1.539150651268923e-05, + "loss": 2.7378, "step": 209000 }, { "epoch": 0.69, - "learning_rate": 1.5384806396609668e-05, - "loss": 2.7528, + "learning_rate": 1.537494742489626e-05, + "loss": 2.7276, "step": 209100 }, { "epoch": 0.69, - "learning_rate": 1.536825202377208e-05, - "loss": 2.7645, + "learning_rate": 1.5358388337103286e-05, + "loss": 2.7316, "step": 209200 }, { "epoch": 0.69, - "learning_rate": 1.5351697650934496e-05, - "loss": 2.7402, + "learning_rate": 1.5341829249310316e-05, + "loss": 2.7451, "step": 209300 }, { "epoch": 0.69, - "learning_rate": 1.533514327809691e-05, - "loss": 2.7583, + "learning_rate": 1.5325270161517343e-05, + "loss": 2.7364, "step": 209400 }, { "epoch": 0.69, - "learning_rate": 1.5318588905259327e-05, - "loss": 2.7396, + "learning_rate": 1.530871107372437e-05, + "loss": 2.7355, "step": 209500 }, { "epoch": 0.69, - "learning_rate": 1.5302034532421742e-05, - "loss": 2.7366, + "learning_rate": 1.52921519859314e-05, + "loss": 2.7339, "step": 209600 }, { "epoch": 0.69, - "learning_rate": 1.5285480159584155e-05, - "loss": 2.7318, + "learning_rate": 1.5275592898138427e-05, + "loss": 2.7487, "step": 209700 }, { "epoch": 0.69, - "learning_rate": 1.5268925786746567e-05, - "loss": 2.7577, + "learning_rate": 1.5259033810345458e-05, + "loss": 2.7462, "step": 209800 }, { - "epoch": 0.69, - "learning_rate": 1.5252371413908984e-05, - "loss": 2.756, + "epoch": 0.7, + "learning_rate": 1.5242474722552486e-05, + "loss": 2.7466, "step": 209900 }, { "epoch": 0.7, - "learning_rate": 1.5235817041071398e-05, - "loss": 2.7433, + "learning_rate": 1.5225915634759513e-05, + "loss": 2.7356, "step": 210000 }, { "epoch": 0.7, - "learning_rate": 1.5219262668233814e-05, - "loss": 2.7425, + "learning_rate": 1.5209356546966542e-05, + "loss": 2.7226, "step": 210100 }, { "epoch": 0.7, - "learning_rate": 1.5202708295396228e-05, - "loss": 2.7447, + "learning_rate": 1.5192797459173569e-05, + "loss": 2.7362, "step": 210200 }, { "epoch": 0.7, - "learning_rate": 1.5186153922558644e-05, - "loss": 2.7478, + "learning_rate": 1.5176238371380599e-05, + "loss": 2.7423, "step": 210300 }, { "epoch": 0.7, - "learning_rate": 1.516959954972106e-05, - "loss": 2.7383, + "learning_rate": 1.5159679283587628e-05, + "loss": 2.7491, "step": 210400 }, { "epoch": 0.7, - "learning_rate": 1.5153045176883473e-05, - "loss": 2.7537, + "learning_rate": 1.5143120195794655e-05, + "loss": 2.7306, "step": 210500 }, { "epoch": 0.7, - "learning_rate": 1.5136490804045889e-05, - "loss": 2.7604, + "learning_rate": 1.5126561108001683e-05, + "loss": 2.7319, "step": 210600 }, { "epoch": 0.7, - "learning_rate": 1.5119936431208304e-05, - "loss": 2.7393, + "learning_rate": 1.511000202020871e-05, + "loss": 2.7384, "step": 210700 }, { "epoch": 0.7, - "learning_rate": 1.5103382058370718e-05, - "loss": 2.7381, + "learning_rate": 1.509344293241574e-05, + "loss": 2.7373, "step": 210800 }, { "epoch": 0.7, - "learning_rate": 1.5086827685533134e-05, - "loss": 2.7523, + "learning_rate": 1.5076883844622769e-05, + "loss": 2.7249, "step": 210900 }, { "epoch": 0.7, - "learning_rate": 1.5070273312695548e-05, - "loss": 2.7367, + "learning_rate": 1.5060324756829796e-05, + "loss": 2.743, "step": 211000 }, { "epoch": 0.7, - "learning_rate": 1.5053718939857964e-05, - "loss": 2.7387, + "learning_rate": 1.5043765669036824e-05, + "loss": 2.7335, "step": 211100 }, { "epoch": 0.7, - "learning_rate": 1.503716456702038e-05, - "loss": 2.7369, + "learning_rate": 1.5027206581243853e-05, + "loss": 2.7283, "step": 211200 }, { "epoch": 0.7, - "learning_rate": 1.5020610194182793e-05, - "loss": 2.7399, + "learning_rate": 1.5010647493450883e-05, + "loss": 2.734, "step": 211300 }, { "epoch": 0.7, - "learning_rate": 1.5004055821345209e-05, - "loss": 2.7459, + "learning_rate": 1.499408840565791e-05, + "loss": 2.731, "step": 211400 }, { "epoch": 0.7, - "learning_rate": 1.4987501448507625e-05, - "loss": 2.7443, + "learning_rate": 1.4977529317864939e-05, + "loss": 2.7261, "step": 211500 }, { "epoch": 0.7, - "learning_rate": 1.4970947075670039e-05, - "loss": 2.7498, + "learning_rate": 1.4960970230071966e-05, + "loss": 2.733, "step": 211600 }, { "epoch": 0.7, - "learning_rate": 1.4954392702832454e-05, - "loss": 2.7362, + "learning_rate": 1.4944411142278994e-05, + "loss": 2.7482, "step": 211700 }, { "epoch": 0.7, - "learning_rate": 1.4937838329994868e-05, - "loss": 2.7286, + "learning_rate": 1.4927852054486025e-05, + "loss": 2.7321, "step": 211800 }, { "epoch": 0.7, - "learning_rate": 1.4921283957157284e-05, - "loss": 2.7462, + "learning_rate": 1.4911292966693052e-05, + "loss": 2.7307, "step": 211900 }, { "epoch": 0.7, - "learning_rate": 1.49047295843197e-05, - "loss": 2.7569, + "learning_rate": 1.489473387890008e-05, + "loss": 2.7265, "step": 212000 }, { "epoch": 0.7, - "learning_rate": 1.4888175211482113e-05, - "loss": 2.7475, + "learning_rate": 1.4878174791107107e-05, + "loss": 2.7469, "step": 212100 }, { "epoch": 0.7, - "learning_rate": 1.4871620838644529e-05, - "loss": 2.7441, + "learning_rate": 1.4861615703314136e-05, + "loss": 2.7325, "step": 212200 }, { "epoch": 0.7, - "learning_rate": 1.4855066465806945e-05, - "loss": 2.7449, + "learning_rate": 1.4845056615521166e-05, + "loss": 2.7321, "step": 212300 }, { "epoch": 0.7, - "learning_rate": 1.4838512092969359e-05, - "loss": 2.7508, + "learning_rate": 1.4828497527728195e-05, + "loss": 2.7393, "step": 212400 }, { "epoch": 0.7, - "learning_rate": 1.4821957720131774e-05, - "loss": 2.7462, + "learning_rate": 1.4811938439935222e-05, + "loss": 2.7369, "step": 212500 }, { "epoch": 0.7, - "learning_rate": 1.4805403347294188e-05, - "loss": 2.7522, + "learning_rate": 1.479537935214225e-05, + "loss": 2.7519, "step": 212600 }, { "epoch": 0.7, - "learning_rate": 1.4788848974456604e-05, - "loss": 2.7611, + "learning_rate": 1.4778820264349277e-05, + "loss": 2.7335, "step": 212700 }, { "epoch": 0.7, - "learning_rate": 1.477229460161902e-05, - "loss": 2.7445, + "learning_rate": 1.4762261176556307e-05, + "loss": 2.7435, "step": 212800 }, { - "epoch": 0.7, - "learning_rate": 1.4755740228781433e-05, - "loss": 2.7321, + "epoch": 0.71, + "learning_rate": 1.4745702088763336e-05, + "loss": 2.7406, "step": 212900 }, { "epoch": 0.71, - "learning_rate": 1.4739185855943849e-05, - "loss": 2.7474, + "learning_rate": 1.4729143000970363e-05, + "loss": 2.7373, "step": 213000 }, { "epoch": 0.71, - "learning_rate": 1.4722631483106265e-05, - "loss": 2.7665, + "learning_rate": 1.4712583913177392e-05, + "loss": 2.7298, "step": 213100 }, { "epoch": 0.71, - "learning_rate": 1.4706077110268679e-05, - "loss": 2.7478, + "learning_rate": 1.4696024825384418e-05, + "loss": 2.7432, "step": 213200 }, { "epoch": 0.71, - "learning_rate": 1.4689522737431094e-05, - "loss": 2.7396, + "learning_rate": 1.467946573759145e-05, + "loss": 2.7399, "step": 213300 }, { "epoch": 0.71, - "learning_rate": 1.4672968364593508e-05, - "loss": 2.7526, + "learning_rate": 1.4662906649798477e-05, + "loss": 2.7334, "step": 213400 }, { "epoch": 0.71, - "learning_rate": 1.4656413991755924e-05, - "loss": 2.7684, + "learning_rate": 1.4646347562005506e-05, + "loss": 2.721, "step": 213500 }, { "epoch": 0.71, - "learning_rate": 1.463985961891834e-05, - "loss": 2.7255, + "learning_rate": 1.4629788474212533e-05, + "loss": 2.7371, "step": 213600 }, { "epoch": 0.71, - "learning_rate": 1.4623305246080754e-05, - "loss": 2.7546, + "learning_rate": 1.4613229386419561e-05, + "loss": 2.7389, "step": 213700 }, { "epoch": 0.71, - "learning_rate": 1.460675087324317e-05, - "loss": 2.7313, + "learning_rate": 1.4596670298626592e-05, + "loss": 2.7398, "step": 213800 }, { "epoch": 0.71, - "learning_rate": 1.4590196500405585e-05, - "loss": 2.7372, + "learning_rate": 1.4580111210833619e-05, + "loss": 2.7177, "step": 213900 }, { "epoch": 0.71, - "learning_rate": 1.4573642127567999e-05, - "loss": 2.7446, + "learning_rate": 1.4563552123040647e-05, + "loss": 2.7336, "step": 214000 }, { "epoch": 0.71, - "learning_rate": 1.4557087754730414e-05, - "loss": 2.7521, + "learning_rate": 1.4546993035247674e-05, + "loss": 2.7318, "step": 214100 }, { "epoch": 0.71, - "learning_rate": 1.4540533381892827e-05, - "loss": 2.7374, + "learning_rate": 1.4530433947454703e-05, + "loss": 2.7353, "step": 214200 }, { "epoch": 0.71, - "learning_rate": 1.452397900905524e-05, - "loss": 2.7505, + "learning_rate": 1.4513874859661733e-05, + "loss": 2.736, "step": 214300 }, { "epoch": 0.71, - "learning_rate": 1.4507424636217656e-05, - "loss": 2.7417, + "learning_rate": 1.449731577186876e-05, + "loss": 2.7365, "step": 214400 }, { "epoch": 0.71, - "learning_rate": 1.4490870263380072e-05, - "loss": 2.7523, + "learning_rate": 1.4480756684075789e-05, + "loss": 2.7302, "step": 214500 }, { "epoch": 0.71, - "learning_rate": 1.4474315890542486e-05, - "loss": 2.7588, + "learning_rate": 1.4464197596282816e-05, + "loss": 2.7378, "step": 214600 }, { "epoch": 0.71, - "learning_rate": 1.4457761517704902e-05, - "loss": 2.7521, + "learning_rate": 1.4447638508489844e-05, + "loss": 2.7422, "step": 214700 }, { "epoch": 0.71, - "learning_rate": 1.4441207144867317e-05, - "loss": 2.7417, + "learning_rate": 1.4431079420696874e-05, + "loss": 2.7343, "step": 214800 }, { "epoch": 0.71, - "learning_rate": 1.4424652772029731e-05, - "loss": 2.7422, + "learning_rate": 1.4414520332903903e-05, + "loss": 2.743, "step": 214900 }, { "epoch": 0.71, - "learning_rate": 1.4408098399192147e-05, - "loss": 2.7457, + "learning_rate": 1.439796124511093e-05, + "loss": 2.7572, "step": 215000 }, { "epoch": 0.71, - "learning_rate": 1.439154402635456e-05, - "loss": 2.7333, + "learning_rate": 1.4381402157317959e-05, + "loss": 2.7367, "step": 215100 }, { "epoch": 0.71, - "learning_rate": 1.4374989653516976e-05, - "loss": 2.7256, + "learning_rate": 1.4364843069524985e-05, + "loss": 2.7367, "step": 215200 }, { "epoch": 0.71, - "learning_rate": 1.4358435280679392e-05, - "loss": 2.74, + "learning_rate": 1.4348283981732016e-05, + "loss": 2.7333, "step": 215300 }, { "epoch": 0.71, - "learning_rate": 1.4341880907841806e-05, - "loss": 2.7339, + "learning_rate": 1.4331724893939044e-05, + "loss": 2.7396, "step": 215400 }, { "epoch": 0.71, - "learning_rate": 1.4325326535004222e-05, - "loss": 2.7476, + "learning_rate": 1.4315165806146071e-05, + "loss": 2.737, "step": 215500 }, { "epoch": 0.71, - "learning_rate": 1.4308772162166637e-05, - "loss": 2.7306, + "learning_rate": 1.42986067183531e-05, + "loss": 2.7206, "step": 215600 }, { "epoch": 0.71, - "learning_rate": 1.4292217789329051e-05, - "loss": 2.7513, + "learning_rate": 1.4282047630560127e-05, + "loss": 2.7255, "step": 215700 }, { "epoch": 0.71, - "learning_rate": 1.4275663416491467e-05, - "loss": 2.7415, + "learning_rate": 1.4265488542767159e-05, + "loss": 2.7462, "step": 215800 }, { - "epoch": 0.71, - "learning_rate": 1.4259109043653881e-05, - "loss": 2.7337, + "epoch": 0.72, + "learning_rate": 1.4248929454974186e-05, + "loss": 2.7506, "step": 215900 }, { "epoch": 0.72, - "learning_rate": 1.4242554670816297e-05, - "loss": 2.7585, + "learning_rate": 1.4232370367181214e-05, + "loss": 2.7401, "step": 216000 }, { "epoch": 0.72, - "learning_rate": 1.4226000297978712e-05, - "loss": 2.7559, + "learning_rate": 1.4215811279388241e-05, + "loss": 2.7385, "step": 216100 }, { "epoch": 0.72, - "learning_rate": 1.4209445925141126e-05, - "loss": 2.7436, + "learning_rate": 1.419925219159527e-05, + "loss": 2.7381, "step": 216200 }, { "epoch": 0.72, - "learning_rate": 1.4192891552303542e-05, - "loss": 2.752, + "learning_rate": 1.41826931038023e-05, + "loss": 2.7312, "step": 216300 }, { "epoch": 0.72, - "learning_rate": 1.4176337179465957e-05, - "loss": 2.7367, + "learning_rate": 1.4166134016009327e-05, + "loss": 2.7415, "step": 216400 }, { "epoch": 0.72, - "learning_rate": 1.4159782806628371e-05, - "loss": 2.7435, + "learning_rate": 1.4149574928216356e-05, + "loss": 2.7345, "step": 216500 }, { "epoch": 0.72, - "learning_rate": 1.4143228433790787e-05, - "loss": 2.7464, + "learning_rate": 1.4133015840423383e-05, + "loss": 2.748, "step": 216600 }, { "epoch": 0.72, - "learning_rate": 1.4126674060953201e-05, - "loss": 2.7301, + "learning_rate": 1.4116456752630411e-05, + "loss": 2.748, "step": 216700 }, { "epoch": 0.72, - "learning_rate": 1.4110119688115617e-05, - "loss": 2.7346, + "learning_rate": 1.4099897664837441e-05, + "loss": 2.7453, "step": 216800 }, { "epoch": 0.72, - "learning_rate": 1.4093565315278032e-05, - "loss": 2.7455, + "learning_rate": 1.408333857704447e-05, + "loss": 2.7378, "step": 216900 }, { "epoch": 0.72, - "learning_rate": 1.4077010942440446e-05, - "loss": 2.7432, + "learning_rate": 1.4066779489251497e-05, + "loss": 2.7328, "step": 217000 }, { "epoch": 0.72, - "learning_rate": 1.4060456569602862e-05, - "loss": 2.7568, + "learning_rate": 1.4050220401458526e-05, + "loss": 2.7334, "step": 217100 }, { "epoch": 0.72, - "learning_rate": 1.4043902196765278e-05, - "loss": 2.7589, + "learning_rate": 1.4033661313665552e-05, + "loss": 2.7476, "step": 217200 }, { "epoch": 0.72, - "learning_rate": 1.4027347823927692e-05, - "loss": 2.7595, + "learning_rate": 1.4017102225872583e-05, + "loss": 2.7333, "step": 217300 }, { "epoch": 0.72, - "learning_rate": 1.4010793451090107e-05, - "loss": 2.7482, + "learning_rate": 1.4000543138079611e-05, + "loss": 2.7417, "step": 217400 }, { "epoch": 0.72, - "learning_rate": 1.3994239078252521e-05, - "loss": 2.7512, + "learning_rate": 1.3983984050286638e-05, + "loss": 2.741, "step": 217500 }, { "epoch": 0.72, - "learning_rate": 1.3977684705414937e-05, - "loss": 2.7434, + "learning_rate": 1.3967424962493667e-05, + "loss": 2.7236, "step": 217600 }, { "epoch": 0.72, - "learning_rate": 1.3961130332577352e-05, - "loss": 2.7435, + "learning_rate": 1.3950865874700694e-05, + "loss": 2.7387, "step": 217700 }, { "epoch": 0.72, - "learning_rate": 1.3944575959739766e-05, - "loss": 2.7442, + "learning_rate": 1.3934306786907724e-05, + "loss": 2.7393, "step": 217800 }, { "epoch": 0.72, - "learning_rate": 1.3928021586902182e-05, - "loss": 2.7519, + "learning_rate": 1.3917747699114753e-05, + "loss": 2.7202, "step": 217900 }, { "epoch": 0.72, - "learning_rate": 1.3911467214064598e-05, - "loss": 2.7426, + "learning_rate": 1.390118861132178e-05, + "loss": 2.744, "step": 218000 }, { "epoch": 0.72, - "learning_rate": 1.3894912841227012e-05, - "loss": 2.7457, + "learning_rate": 1.3884629523528808e-05, + "loss": 2.7358, "step": 218100 }, { "epoch": 0.72, - "learning_rate": 1.3878358468389427e-05, - "loss": 2.7438, + "learning_rate": 1.3868070435735835e-05, + "loss": 2.7504, "step": 218200 }, { "epoch": 0.72, - "learning_rate": 1.3861804095551841e-05, - "loss": 2.746, + "learning_rate": 1.3851511347942867e-05, + "loss": 2.7312, "step": 218300 }, { "epoch": 0.72, - "learning_rate": 1.3845249722714257e-05, - "loss": 2.728, + "learning_rate": 1.3834952260149894e-05, + "loss": 2.7199, "step": 218400 }, { "epoch": 0.72, - "learning_rate": 1.3828695349876673e-05, - "loss": 2.7295, + "learning_rate": 1.3818393172356923e-05, + "loss": 2.7406, "step": 218500 }, { "epoch": 0.72, - "learning_rate": 1.3812140977039085e-05, - "loss": 2.7324, + "learning_rate": 1.380183408456395e-05, + "loss": 2.7419, "step": 218600 }, { "epoch": 0.72, - "learning_rate": 1.3795586604201499e-05, - "loss": 2.7502, + "learning_rate": 1.3785274996770978e-05, + "loss": 2.742, "step": 218700 }, { "epoch": 0.72, - "learning_rate": 1.3779032231363914e-05, - "loss": 2.7518, + "learning_rate": 1.3768715908978008e-05, + "loss": 2.7436, "step": 218800 }, { "epoch": 0.72, - "learning_rate": 1.3762477858526328e-05, - "loss": 2.7535, + "learning_rate": 1.3752156821185035e-05, + "loss": 2.7303, "step": 218900 }, { "epoch": 0.73, - "learning_rate": 1.3745923485688744e-05, - "loss": 2.7561, + "learning_rate": 1.3735597733392064e-05, + "loss": 2.7314, "step": 219000 }, { "epoch": 0.73, - "learning_rate": 1.372936911285116e-05, - "loss": 2.7428, + "learning_rate": 1.3719038645599091e-05, + "loss": 2.7448, "step": 219100 }, { "epoch": 0.73, - "learning_rate": 1.3712814740013574e-05, - "loss": 2.7386, + "learning_rate": 1.370247955780612e-05, + "loss": 2.7358, "step": 219200 }, { "epoch": 0.73, - "learning_rate": 1.369626036717599e-05, - "loss": 2.7328, + "learning_rate": 1.368592047001315e-05, + "loss": 2.7307, "step": 219300 }, { "epoch": 0.73, - "learning_rate": 1.3679705994338405e-05, - "loss": 2.7359, + "learning_rate": 1.3669361382220178e-05, + "loss": 2.7211, "step": 219400 }, { "epoch": 0.73, - "learning_rate": 1.3663151621500819e-05, - "loss": 2.7402, + "learning_rate": 1.3652802294427205e-05, + "loss": 2.7314, "step": 219500 }, { "epoch": 0.73, - "learning_rate": 1.3646597248663235e-05, - "loss": 2.7577, + "learning_rate": 1.3636243206634234e-05, + "loss": 2.7512, "step": 219600 }, { "epoch": 0.73, - "learning_rate": 1.3630042875825648e-05, - "loss": 2.7495, + "learning_rate": 1.361968411884126e-05, + "loss": 2.7242, "step": 219700 }, { "epoch": 0.73, - "learning_rate": 1.3613488502988064e-05, - "loss": 2.7579, + "learning_rate": 1.3603125031048291e-05, + "loss": 2.7493, "step": 219800 }, { "epoch": 0.73, - "learning_rate": 1.359693413015048e-05, - "loss": 2.7465, + "learning_rate": 1.358656594325532e-05, + "loss": 2.7336, "step": 219900 }, { "epoch": 0.73, - "learning_rate": 1.3580379757312894e-05, - "loss": 2.7407, + "learning_rate": 1.3570006855462347e-05, + "loss": 2.7397, "step": 220000 }, { "epoch": 0.73, - "learning_rate": 1.356382538447531e-05, - "loss": 2.7404, + "learning_rate": 1.3553447767669375e-05, + "loss": 2.7303, "step": 220100 }, { "epoch": 0.73, - "learning_rate": 1.3547271011637725e-05, - "loss": 2.7524, + "learning_rate": 1.3536888679876402e-05, + "loss": 2.7419, "step": 220200 }, { "epoch": 0.73, - "learning_rate": 1.3530716638800139e-05, - "loss": 2.7446, + "learning_rate": 1.3520329592083432e-05, + "loss": 2.7435, "step": 220300 }, { "epoch": 0.73, - "learning_rate": 1.3514162265962555e-05, - "loss": 2.7589, + "learning_rate": 1.3503770504290461e-05, + "loss": 2.7441, "step": 220400 }, { "epoch": 0.73, - "learning_rate": 1.3497607893124969e-05, - "loss": 2.7576, + "learning_rate": 1.348721141649749e-05, + "loss": 2.7431, "step": 220500 }, { "epoch": 0.73, - "learning_rate": 1.3481053520287384e-05, - "loss": 2.7626, + "learning_rate": 1.3470652328704517e-05, + "loss": 2.7252, "step": 220600 }, { "epoch": 0.73, - "learning_rate": 1.34644991474498e-05, - "loss": 2.7431, + "learning_rate": 1.3454093240911545e-05, + "loss": 2.7465, "step": 220700 }, { "epoch": 0.73, - "learning_rate": 1.3447944774612214e-05, - "loss": 2.7534, + "learning_rate": 1.3437534153118576e-05, + "loss": 2.747, "step": 220800 }, { "epoch": 0.73, - "learning_rate": 1.343139040177463e-05, - "loss": 2.7467, + "learning_rate": 1.3420975065325602e-05, + "loss": 2.7359, "step": 220900 }, { "epoch": 0.73, - "learning_rate": 1.3414836028937045e-05, - "loss": 2.7364, + "learning_rate": 1.3404415977532631e-05, + "loss": 2.7248, "step": 221000 }, { "epoch": 0.73, - "learning_rate": 1.3398281656099459e-05, - "loss": 2.7504, + "learning_rate": 1.3387856889739658e-05, + "loss": 2.7419, "step": 221100 }, { "epoch": 0.73, - "learning_rate": 1.3381727283261875e-05, - "loss": 2.7532, + "learning_rate": 1.3371297801946687e-05, + "loss": 2.7386, "step": 221200 }, { "epoch": 0.73, - "learning_rate": 1.3365172910424289e-05, - "loss": 2.7411, + "learning_rate": 1.3354738714153717e-05, + "loss": 2.7346, "step": 221300 }, { "epoch": 0.73, - "learning_rate": 1.3348618537586704e-05, - "loss": 2.7387, + "learning_rate": 1.3338179626360744e-05, + "loss": 2.7413, "step": 221400 }, { "epoch": 0.73, - "learning_rate": 1.333206416474912e-05, - "loss": 2.7385, + "learning_rate": 1.3321620538567772e-05, + "loss": 2.7421, "step": 221500 }, { "epoch": 0.73, - "learning_rate": 1.3315509791911534e-05, - "loss": 2.7548, + "learning_rate": 1.33050614507748e-05, + "loss": 2.7441, "step": 221600 }, { "epoch": 0.73, - "learning_rate": 1.329895541907395e-05, - "loss": 2.7512, + "learning_rate": 1.3288502362981828e-05, + "loss": 2.7452, "step": 221700 }, { "epoch": 0.73, - "learning_rate": 1.3282401046236365e-05, - "loss": 2.7448, + "learning_rate": 1.3271943275188858e-05, + "loss": 2.7443, "step": 221800 }, { "epoch": 0.73, - "learning_rate": 1.326584667339878e-05, - "loss": 2.738, + "learning_rate": 1.3255384187395887e-05, + "loss": 2.7384, "step": 221900 }, { "epoch": 0.74, - "learning_rate": 1.3249292300561195e-05, - "loss": 2.7564, + "learning_rate": 1.3238825099602914e-05, + "loss": 2.741, "step": 222000 }, { "epoch": 0.74, - "learning_rate": 1.3232737927723609e-05, - "loss": 2.7513, + "learning_rate": 1.3222266011809942e-05, + "loss": 2.7404, "step": 222100 }, { "epoch": 0.74, - "learning_rate": 1.3216183554886024e-05, - "loss": 2.7383, + "learning_rate": 1.320570692401697e-05, + "loss": 2.7281, "step": 222200 }, { "epoch": 0.74, - "learning_rate": 1.319962918204844e-05, - "loss": 2.745, + "learning_rate": 1.3189147836224e-05, + "loss": 2.7304, "step": 222300 }, { "epoch": 0.74, - "learning_rate": 1.3183074809210854e-05, - "loss": 2.7495, + "learning_rate": 1.3172588748431028e-05, + "loss": 2.7464, "step": 222400 }, { "epoch": 0.74, - "learning_rate": 1.316652043637327e-05, - "loss": 2.7463, + "learning_rate": 1.3156029660638055e-05, + "loss": 2.7167, "step": 222500 }, { "epoch": 0.74, - "learning_rate": 1.3149966063535685e-05, - "loss": 2.742, + "learning_rate": 1.3139470572845084e-05, + "loss": 2.7363, "step": 222600 }, { "epoch": 0.74, - "learning_rate": 1.31334116906981e-05, - "loss": 2.7392, + "learning_rate": 1.312291148505211e-05, + "loss": 2.7286, "step": 222700 }, { "epoch": 0.74, - "learning_rate": 1.3116857317860515e-05, - "loss": 2.7534, + "learning_rate": 1.3106352397259143e-05, + "loss": 2.7519, "step": 222800 }, { "epoch": 0.74, - "learning_rate": 1.3100302945022929e-05, - "loss": 2.7375, + "learning_rate": 1.308979330946617e-05, + "loss": 2.741, "step": 222900 }, { "epoch": 0.74, - "learning_rate": 1.3083748572185341e-05, - "loss": 2.7512, + "learning_rate": 1.3073234221673198e-05, + "loss": 2.7186, "step": 223000 }, { "epoch": 0.74, - "learning_rate": 1.3067194199347757e-05, - "loss": 2.7532, + "learning_rate": 1.3056675133880225e-05, + "loss": 2.7306, "step": 223100 }, { "epoch": 0.74, - "learning_rate": 1.3050639826510172e-05, - "loss": 2.7496, + "learning_rate": 1.3040116046087254e-05, + "loss": 2.732, "step": 223200 }, { "epoch": 0.74, - "learning_rate": 1.3034085453672586e-05, - "loss": 2.7531, + "learning_rate": 1.3023556958294284e-05, + "loss": 2.7292, "step": 223300 }, { "epoch": 0.74, - "learning_rate": 1.3017531080835002e-05, - "loss": 2.7457, + "learning_rate": 1.300699787050131e-05, + "loss": 2.7223, "step": 223400 }, { "epoch": 0.74, - "learning_rate": 1.3000976707997418e-05, - "loss": 2.7515, + "learning_rate": 1.299043878270834e-05, + "loss": 2.7343, "step": 223500 }, { "epoch": 0.74, - "learning_rate": 1.2984422335159832e-05, - "loss": 2.7406, + "learning_rate": 1.2973879694915366e-05, + "loss": 2.7359, "step": 223600 }, { "epoch": 0.74, - "learning_rate": 1.2967867962322247e-05, - "loss": 2.7557, + "learning_rate": 1.2957320607122395e-05, + "loss": 2.7215, "step": 223700 }, { "epoch": 0.74, - "learning_rate": 1.2951313589484661e-05, - "loss": 2.7608, + "learning_rate": 1.2940761519329425e-05, + "loss": 2.7199, "step": 223800 }, { "epoch": 0.74, - "learning_rate": 1.2934759216647077e-05, - "loss": 2.747, + "learning_rate": 1.2924202431536452e-05, + "loss": 2.7357, "step": 223900 }, { "epoch": 0.74, - "learning_rate": 1.2918204843809493e-05, - "loss": 2.7493, + "learning_rate": 1.290764334374348e-05, + "loss": 2.7388, "step": 224000 }, { "epoch": 0.74, - "learning_rate": 1.2901650470971907e-05, - "loss": 2.7576, + "learning_rate": 1.289108425595051e-05, + "loss": 2.7297, "step": 224100 }, { "epoch": 0.74, - "learning_rate": 1.2885096098134322e-05, - "loss": 2.7331, + "learning_rate": 1.2874525168157536e-05, + "loss": 2.7261, "step": 224200 }, { "epoch": 0.74, - "learning_rate": 1.2868541725296738e-05, - "loss": 2.7374, + "learning_rate": 1.2857966080364567e-05, + "loss": 2.7593, "step": 224300 }, { "epoch": 0.74, - "learning_rate": 1.2851987352459152e-05, - "loss": 2.7432, + "learning_rate": 1.2841406992571595e-05, + "loss": 2.7388, "step": 224400 }, { "epoch": 0.74, - "learning_rate": 1.2835432979621567e-05, - "loss": 2.7381, + "learning_rate": 1.2824847904778622e-05, + "loss": 2.7441, "step": 224500 }, { "epoch": 0.74, - "learning_rate": 1.2818878606783981e-05, - "loss": 2.7451, + "learning_rate": 1.280828881698565e-05, + "loss": 2.7375, "step": 224600 }, { "epoch": 0.74, - "learning_rate": 1.2802324233946397e-05, - "loss": 2.7353, + "learning_rate": 1.2791729729192678e-05, + "loss": 2.7274, "step": 224700 }, { "epoch": 0.74, - "learning_rate": 1.2785769861108813e-05, - "loss": 2.7483, + "learning_rate": 1.2775170641399708e-05, + "loss": 2.7279, "step": 224800 }, { "epoch": 0.74, - "learning_rate": 1.2769215488271227e-05, - "loss": 2.7332, + "learning_rate": 1.2758611553606736e-05, + "loss": 2.7245, "step": 224900 }, { - "epoch": 0.74, - "learning_rate": 1.2752661115433642e-05, - "loss": 2.7394, + "epoch": 0.75, + "learning_rate": 1.2742052465813763e-05, + "loss": 2.7332, "step": 225000 }, { "epoch": 0.75, - "learning_rate": 1.2736106742596058e-05, - "loss": 2.7438, + "learning_rate": 1.2725493378020792e-05, + "loss": 2.7349, "step": 225100 }, { "epoch": 0.75, - "learning_rate": 1.2719552369758472e-05, - "loss": 2.7473, + "learning_rate": 1.2708934290227819e-05, + "loss": 2.7183, "step": 225200 }, { "epoch": 0.75, - "learning_rate": 1.2702997996920888e-05, - "loss": 2.7387, + "learning_rate": 1.2692375202434851e-05, + "loss": 2.744, "step": 225300 }, { "epoch": 0.75, - "learning_rate": 1.2686443624083302e-05, - "loss": 2.7363, + "learning_rate": 1.2675816114641878e-05, + "loss": 2.755, "step": 225400 }, { "epoch": 0.75, - "learning_rate": 1.2669889251245717e-05, - "loss": 2.7335, + "learning_rate": 1.2659257026848906e-05, + "loss": 2.7396, "step": 225500 }, { "epoch": 0.75, - "learning_rate": 1.2653334878408133e-05, - "loss": 2.733, + "learning_rate": 1.2642697939055933e-05, + "loss": 2.7201, "step": 225600 }, { "epoch": 0.75, - "learning_rate": 1.2636780505570547e-05, - "loss": 2.7488, + "learning_rate": 1.2626138851262962e-05, + "loss": 2.7371, "step": 225700 }, { "epoch": 0.75, - "learning_rate": 1.2620226132732962e-05, - "loss": 2.7237, + "learning_rate": 1.2609579763469992e-05, + "loss": 2.7388, "step": 225800 }, { "epoch": 0.75, - "learning_rate": 1.2603671759895378e-05, - "loss": 2.7405, + "learning_rate": 1.2593020675677019e-05, + "loss": 2.7232, "step": 225900 }, { "epoch": 0.75, - "learning_rate": 1.2587117387057792e-05, - "loss": 2.7553, + "learning_rate": 1.2576461587884048e-05, + "loss": 2.7321, "step": 226000 }, { "epoch": 0.75, - "learning_rate": 1.2570563014220208e-05, - "loss": 2.7565, + "learning_rate": 1.2559902500091075e-05, + "loss": 2.7349, "step": 226100 }, { "epoch": 0.75, - "learning_rate": 1.2554008641382622e-05, - "loss": 2.7361, + "learning_rate": 1.2543343412298103e-05, + "loss": 2.7497, "step": 226200 }, { "epoch": 0.75, - "learning_rate": 1.2537454268545037e-05, - "loss": 2.7482, + "learning_rate": 1.2526784324505134e-05, + "loss": 2.7385, "step": 226300 }, { "epoch": 0.75, - "learning_rate": 1.2520899895707453e-05, - "loss": 2.7389, + "learning_rate": 1.2510225236712162e-05, + "loss": 2.7397, "step": 226400 }, { "epoch": 0.75, - "learning_rate": 1.2504345522869867e-05, - "loss": 2.7519, + "learning_rate": 1.2493666148919189e-05, + "loss": 2.7345, "step": 226500 }, { "epoch": 0.75, - "learning_rate": 1.248779115003228e-05, - "loss": 2.7584, + "learning_rate": 1.2477107061126218e-05, + "loss": 2.734, "step": 226600 }, { "epoch": 0.75, - "learning_rate": 1.2471236777194696e-05, - "loss": 2.7492, + "learning_rate": 1.2460547973333246e-05, + "loss": 2.731, "step": 226700 }, { "epoch": 0.75, - "learning_rate": 1.245468240435711e-05, - "loss": 2.7427, + "learning_rate": 1.2443988885540273e-05, + "loss": 2.7298, "step": 226800 }, { "epoch": 0.75, - "learning_rate": 1.2438128031519526e-05, - "loss": 2.7452, + "learning_rate": 1.2427429797747302e-05, + "loss": 2.7303, "step": 226900 }, { "epoch": 0.75, - "learning_rate": 1.2421573658681942e-05, - "loss": 2.7479, + "learning_rate": 1.241087070995433e-05, + "loss": 2.7343, "step": 227000 }, { "epoch": 0.75, - "learning_rate": 1.2405019285844356e-05, - "loss": 2.7481, + "learning_rate": 1.2394311622161359e-05, + "loss": 2.7504, "step": 227100 }, { "epoch": 0.75, - "learning_rate": 1.2388464913006771e-05, - "loss": 2.7316, + "learning_rate": 1.2377752534368388e-05, + "loss": 2.7192, "step": 227200 }, { "epoch": 0.75, - "learning_rate": 1.2371910540169187e-05, - "loss": 2.7451, + "learning_rate": 1.2361193446575416e-05, + "loss": 2.7333, "step": 227300 }, { "epoch": 0.75, - "learning_rate": 1.2355356167331601e-05, - "loss": 2.742, + "learning_rate": 1.2344634358782443e-05, + "loss": 2.7464, "step": 227400 }, { "epoch": 0.75, - "learning_rate": 1.2338801794494017e-05, - "loss": 2.749, + "learning_rate": 1.2328075270989472e-05, + "loss": 2.7402, "step": 227500 }, { "epoch": 0.75, - "learning_rate": 1.232224742165643e-05, - "loss": 2.7425, + "learning_rate": 1.23115161831965e-05, + "loss": 2.7246, "step": 227600 }, { "epoch": 0.75, - "learning_rate": 1.2305693048818846e-05, - "loss": 2.7363, + "learning_rate": 1.2294957095403529e-05, + "loss": 2.7297, "step": 227700 }, { "epoch": 0.75, - "learning_rate": 1.2289138675981262e-05, - "loss": 2.7392, + "learning_rate": 1.2278398007610558e-05, + "loss": 2.738, "step": 227800 }, { "epoch": 0.75, - "learning_rate": 1.2272584303143676e-05, - "loss": 2.7551, + "learning_rate": 1.2261838919817584e-05, + "loss": 2.7303, "step": 227900 }, { - "epoch": 0.75, - "learning_rate": 1.2256029930306091e-05, - "loss": 2.7496, + "epoch": 0.76, + "learning_rate": 1.2245279832024615e-05, + "loss": 2.7445, "step": 228000 }, { "epoch": 0.76, - "learning_rate": 1.2239475557468507e-05, - "loss": 2.7465, + "learning_rate": 1.2228720744231642e-05, + "loss": 2.7405, "step": 228100 }, { "epoch": 0.76, - "learning_rate": 1.2222921184630921e-05, - "loss": 2.7464, + "learning_rate": 1.2212161656438672e-05, + "loss": 2.7304, "step": 228200 }, { "epoch": 0.76, - "learning_rate": 1.2206366811793337e-05, - "loss": 2.7352, + "learning_rate": 1.2195602568645699e-05, + "loss": 2.723, "step": 228300 }, { "epoch": 0.76, - "learning_rate": 1.218981243895575e-05, - "loss": 2.7362, + "learning_rate": 1.2179043480852728e-05, + "loss": 2.7344, "step": 228400 }, { "epoch": 0.76, - "learning_rate": 1.2173258066118166e-05, - "loss": 2.7531, + "learning_rate": 1.2162484393059756e-05, + "loss": 2.7267, "step": 228500 }, { "epoch": 0.76, - "learning_rate": 1.215670369328058e-05, - "loss": 2.7409, + "learning_rate": 1.2145925305266783e-05, + "loss": 2.7427, "step": 228600 }, { "epoch": 0.76, - "learning_rate": 1.2140149320442994e-05, - "loss": 2.746, + "learning_rate": 1.2129366217473813e-05, + "loss": 2.7328, "step": 228700 }, { "epoch": 0.76, - "learning_rate": 1.212359494760541e-05, - "loss": 2.7396, + "learning_rate": 1.211280712968084e-05, + "loss": 2.7359, "step": 228800 }, { "epoch": 0.76, - "learning_rate": 1.2107040574767826e-05, - "loss": 2.7444, + "learning_rate": 1.2096248041887869e-05, + "loss": 2.7302, "step": 228900 }, { "epoch": 0.76, - "learning_rate": 1.209048620193024e-05, - "loss": 2.7319, + "learning_rate": 1.2079688954094897e-05, + "loss": 2.742, "step": 229000 }, { "epoch": 0.76, - "learning_rate": 1.2073931829092655e-05, - "loss": 2.7357, + "learning_rate": 1.2063129866301926e-05, + "loss": 2.7364, "step": 229100 }, { "epoch": 0.76, - "learning_rate": 1.205737745625507e-05, - "loss": 2.7458, + "learning_rate": 1.2046570778508955e-05, + "loss": 2.732, "step": 229200 }, { "epoch": 0.76, - "learning_rate": 1.2040823083417485e-05, - "loss": 2.7553, + "learning_rate": 1.2030011690715982e-05, + "loss": 2.7296, "step": 229300 }, { "epoch": 0.76, - "learning_rate": 1.20242687105799e-05, - "loss": 2.7516, + "learning_rate": 1.201345260292301e-05, + "loss": 2.728, "step": 229400 }, { "epoch": 0.76, - "learning_rate": 1.2007714337742314e-05, - "loss": 2.7464, + "learning_rate": 1.1996893515130039e-05, + "loss": 2.7314, "step": 229500 }, { "epoch": 0.76, - "learning_rate": 1.199115996490473e-05, - "loss": 2.7407, + "learning_rate": 1.1980334427337067e-05, + "loss": 2.7189, "step": 229600 }, { "epoch": 0.76, - "learning_rate": 1.1974605592067146e-05, - "loss": 2.7443, + "learning_rate": 1.1963775339544096e-05, + "loss": 2.7374, "step": 229700 }, { "epoch": 0.76, - "learning_rate": 1.195805121922956e-05, - "loss": 2.7384, + "learning_rate": 1.1947216251751125e-05, + "loss": 2.7278, "step": 229800 }, { "epoch": 0.76, - "learning_rate": 1.1941496846391975e-05, - "loss": 2.7536, + "learning_rate": 1.1930657163958152e-05, + "loss": 2.7257, "step": 229900 }, { "epoch": 0.76, - "learning_rate": 1.1924942473554391e-05, - "loss": 2.7451, + "learning_rate": 1.1914098076165182e-05, + "loss": 2.731, "step": 230000 }, { "epoch": 0.76, - "learning_rate": 1.1908388100716805e-05, - "loss": 2.7501, + "learning_rate": 1.1897538988372209e-05, + "loss": 2.7389, "step": 230100 }, { "epoch": 0.76, - "learning_rate": 1.189183372787922e-05, - "loss": 2.7447, + "learning_rate": 1.1880979900579237e-05, + "loss": 2.7379, "step": 230200 }, { "epoch": 0.76, - "learning_rate": 1.1875279355041634e-05, - "loss": 2.7596, + "learning_rate": 1.1864420812786266e-05, + "loss": 2.7315, "step": 230300 }, { "epoch": 0.76, - "learning_rate": 1.185872498220405e-05, - "loss": 2.7497, + "learning_rate": 1.1847861724993293e-05, + "loss": 2.7324, "step": 230400 }, { "epoch": 0.76, - "learning_rate": 1.1842170609366466e-05, - "loss": 2.7317, + "learning_rate": 1.1831302637200323e-05, + "loss": 2.7426, "step": 230500 }, { "epoch": 0.76, - "learning_rate": 1.182561623652888e-05, - "loss": 2.7373, + "learning_rate": 1.181474354940735e-05, + "loss": 2.7392, "step": 230600 }, { "epoch": 0.76, - "learning_rate": 1.1809061863691295e-05, - "loss": 2.7329, + "learning_rate": 1.179818446161438e-05, + "loss": 2.7276, "step": 230700 }, { "epoch": 0.76, - "learning_rate": 1.179250749085371e-05, - "loss": 2.7355, + "learning_rate": 1.1781625373821407e-05, + "loss": 2.7275, "step": 230800 }, { "epoch": 0.76, - "learning_rate": 1.1775953118016123e-05, - "loss": 2.7476, + "learning_rate": 1.1765066286028436e-05, + "loss": 2.7456, "step": 230900 }, { - "epoch": 0.76, - "learning_rate": 1.1759398745178539e-05, - "loss": 2.757, + "epoch": 0.77, + "learning_rate": 1.1748507198235464e-05, + "loss": 2.7287, "step": 231000 }, { "epoch": 0.77, - "learning_rate": 1.1742844372340955e-05, - "loss": 2.7496, + "learning_rate": 1.1731948110442491e-05, + "loss": 2.7375, "step": 231100 }, { "epoch": 0.77, - "learning_rate": 1.1726289999503368e-05, - "loss": 2.7524, + "learning_rate": 1.1715389022649522e-05, + "loss": 2.7393, "step": 231200 }, { "epoch": 0.77, - "learning_rate": 1.1709735626665784e-05, + "learning_rate": 1.1698829934856549e-05, "loss": 2.7384, "step": 231300 }, { "epoch": 0.77, - "learning_rate": 1.16931812538282e-05, - "loss": 2.7517, + "learning_rate": 1.1682270847063577e-05, + "loss": 2.7277, "step": 231400 }, { "epoch": 0.77, - "learning_rate": 1.1676626880990614e-05, - "loss": 2.7336, + "learning_rate": 1.1665711759270606e-05, + "loss": 2.7267, "step": 231500 }, { "epoch": 0.77, - "learning_rate": 1.166007250815303e-05, - "loss": 2.7514, + "learning_rate": 1.1649152671477634e-05, + "loss": 2.7496, "step": 231600 }, { "epoch": 0.77, - "learning_rate": 1.1643518135315443e-05, - "loss": 2.7475, + "learning_rate": 1.1632593583684663e-05, + "loss": 2.7337, "step": 231700 }, { "epoch": 0.77, - "learning_rate": 1.1626963762477859e-05, - "loss": 2.7613, + "learning_rate": 1.1616034495891692e-05, + "loss": 2.7402, "step": 231800 }, { "epoch": 0.77, - "learning_rate": 1.1610409389640275e-05, - "loss": 2.7348, + "learning_rate": 1.1599475408098719e-05, + "loss": 2.7322, "step": 231900 }, { "epoch": 0.77, - "learning_rate": 1.1593855016802689e-05, - "loss": 2.7397, + "learning_rate": 1.1582916320305747e-05, + "loss": 2.7449, "step": 232000 }, { "epoch": 0.77, - "learning_rate": 1.1577300643965104e-05, - "loss": 2.7425, + "learning_rate": 1.1566357232512776e-05, + "loss": 2.717, "step": 232100 }, { "epoch": 0.77, - "learning_rate": 1.156074627112752e-05, - "loss": 2.7319, + "learning_rate": 1.1549798144719804e-05, + "loss": 2.721, "step": 232200 }, { "epoch": 0.77, - "learning_rate": 1.1544191898289934e-05, - "loss": 2.7528, + "learning_rate": 1.1533239056926833e-05, + "loss": 2.7512, "step": 232300 }, { "epoch": 0.77, - "learning_rate": 1.152763752545235e-05, - "loss": 2.7383, + "learning_rate": 1.151667996913386e-05, + "loss": 2.7197, "step": 232400 }, { "epoch": 0.77, - "learning_rate": 1.1511083152614763e-05, - "loss": 2.7334, + "learning_rate": 1.150012088134089e-05, + "loss": 2.7312, "step": 232500 }, { "epoch": 0.77, - "learning_rate": 1.1494528779777179e-05, - "loss": 2.7459, + "learning_rate": 1.1483561793547917e-05, + "loss": 2.7292, "step": 232600 }, { "epoch": 0.77, - "learning_rate": 1.1477974406939595e-05, - "loss": 2.7478, + "learning_rate": 1.1467002705754946e-05, + "loss": 2.7392, "step": 232700 }, { "epoch": 0.77, - "learning_rate": 1.1461420034102009e-05, - "loss": 2.7426, + "learning_rate": 1.1450443617961974e-05, + "loss": 2.7247, "step": 232800 }, { "epoch": 0.77, - "learning_rate": 1.1444865661264424e-05, - "loss": 2.7348, + "learning_rate": 1.1433884530169001e-05, + "loss": 2.7287, "step": 232900 }, { "epoch": 0.77, - "learning_rate": 1.1428311288426838e-05, - "loss": 2.7426, + "learning_rate": 1.1417325442376031e-05, + "loss": 2.7429, "step": 233000 }, { "epoch": 0.77, - "learning_rate": 1.1411756915589252e-05, - "loss": 2.7587, + "learning_rate": 1.1400766354583058e-05, + "loss": 2.7295, "step": 233100 }, { "epoch": 0.77, - "learning_rate": 1.1395202542751668e-05, - "loss": 2.7479, + "learning_rate": 1.1384207266790089e-05, + "loss": 2.7364, "step": 233200 }, { "epoch": 0.77, - "learning_rate": 1.1378648169914084e-05, - "loss": 2.7518, + "learning_rate": 1.1367648178997116e-05, + "loss": 2.7233, "step": 233300 }, { "epoch": 0.77, - "learning_rate": 1.1362093797076498e-05, - "loss": 2.7426, + "learning_rate": 1.1351089091204144e-05, + "loss": 2.7265, "step": 233400 }, { "epoch": 0.77, - "learning_rate": 1.1345539424238913e-05, - "loss": 2.7376, + "learning_rate": 1.1334530003411173e-05, + "loss": 2.7441, "step": 233500 }, { "epoch": 0.77, - "learning_rate": 1.1328985051401327e-05, - "loss": 2.728, + "learning_rate": 1.1317970915618201e-05, + "loss": 2.7349, "step": 233600 }, { "epoch": 0.77, - "learning_rate": 1.1312430678563743e-05, - "loss": 2.7333, + "learning_rate": 1.130141182782523e-05, + "loss": 2.7382, "step": 233700 }, { "epoch": 0.77, - "learning_rate": 1.1295876305726158e-05, - "loss": 2.7324, + "learning_rate": 1.1284852740032257e-05, + "loss": 2.7291, "step": 233800 }, { "epoch": 0.77, - "learning_rate": 1.1279321932888572e-05, - "loss": 2.7316, + "learning_rate": 1.1268293652239286e-05, + "loss": 2.7563, "step": 233900 }, { "epoch": 0.77, - "learning_rate": 1.1262767560050988e-05, - "loss": 2.7416, + "learning_rate": 1.1251734564446314e-05, + "loss": 2.7223, "step": 234000 }, { "epoch": 0.78, - "learning_rate": 1.1246213187213404e-05, - "loss": 2.729, + "learning_rate": 1.1235175476653343e-05, + "loss": 2.7279, "step": 234100 }, { "epoch": 0.78, - "learning_rate": 1.1229658814375818e-05, - "loss": 2.7398, + "learning_rate": 1.1218616388860371e-05, + "loss": 2.744, "step": 234200 }, { "epoch": 0.78, - "learning_rate": 1.1213104441538233e-05, - "loss": 2.7352, + "learning_rate": 1.12020573010674e-05, + "loss": 2.727, "step": 234300 }, { "epoch": 0.78, - "learning_rate": 1.1196550068700647e-05, - "loss": 2.7359, + "learning_rate": 1.1185498213274427e-05, + "loss": 2.7376, "step": 234400 }, { "epoch": 0.78, - "learning_rate": 1.1179995695863063e-05, - "loss": 2.7495, + "learning_rate": 1.1168939125481455e-05, + "loss": 2.7504, "step": 234500 }, { "epoch": 0.78, - "learning_rate": 1.1163441323025479e-05, - "loss": 2.7248, + "learning_rate": 1.1152380037688484e-05, + "loss": 2.7306, "step": 234600 }, { "epoch": 0.78, - "learning_rate": 1.1146886950187892e-05, - "loss": 2.7605, + "learning_rate": 1.1135820949895513e-05, + "loss": 2.7442, "step": 234700 }, { "epoch": 0.78, - "learning_rate": 1.1130332577350308e-05, - "loss": 2.744, + "learning_rate": 1.1119261862102541e-05, + "loss": 2.746, "step": 234800 }, { "epoch": 0.78, - "learning_rate": 1.1113778204512724e-05, - "loss": 2.7448, + "learning_rate": 1.1102702774309568e-05, + "loss": 2.7402, "step": 234900 }, { "epoch": 0.78, - "learning_rate": 1.1097223831675138e-05, - "loss": 2.7273, + "learning_rate": 1.1086143686516599e-05, + "loss": 2.7389, "step": 235000 }, { "epoch": 0.78, - "learning_rate": 1.1080669458837553e-05, - "loss": 2.7384, + "learning_rate": 1.1069584598723625e-05, + "loss": 2.7335, "step": 235100 }, { "epoch": 0.78, - "learning_rate": 1.1064115085999967e-05, - "loss": 2.7269, + "learning_rate": 1.1053025510930656e-05, + "loss": 2.7338, "step": 235200 }, { "epoch": 0.78, - "learning_rate": 1.1047560713162381e-05, - "loss": 2.7432, + "learning_rate": 1.1036466423137683e-05, + "loss": 2.7424, "step": 235300 }, { "epoch": 0.78, - "learning_rate": 1.1031006340324797e-05, - "loss": 2.7434, + "learning_rate": 1.1019907335344711e-05, + "loss": 2.74, "step": 235400 }, { "epoch": 0.78, - "learning_rate": 1.1014451967487213e-05, - "loss": 2.7228, + "learning_rate": 1.100334824755174e-05, + "loss": 2.7222, "step": 235500 }, { "epoch": 0.78, - "learning_rate": 1.0997897594649627e-05, - "loss": 2.7324, + "learning_rate": 1.0986789159758767e-05, + "loss": 2.741, "step": 235600 }, { "epoch": 0.78, - "learning_rate": 1.0981343221812042e-05, - "loss": 2.7438, + "learning_rate": 1.0970230071965797e-05, + "loss": 2.7255, "step": 235700 }, { "epoch": 0.78, - "learning_rate": 1.0964788848974456e-05, - "loss": 2.7417, + "learning_rate": 1.0953670984172824e-05, + "loss": 2.7184, "step": 235800 }, { "epoch": 0.78, - "learning_rate": 1.0948234476136872e-05, - "loss": 2.7496, + "learning_rate": 1.0937111896379853e-05, + "loss": 2.728, "step": 235900 }, { "epoch": 0.78, - "learning_rate": 1.0931680103299287e-05, - "loss": 2.7379, + "learning_rate": 1.0920552808586881e-05, + "loss": 2.7346, "step": 236000 }, { "epoch": 0.78, - "learning_rate": 1.0915125730461701e-05, - "loss": 2.7366, + "learning_rate": 1.090399372079391e-05, + "loss": 2.7221, "step": 236100 }, { "epoch": 0.78, - "learning_rate": 1.0898571357624117e-05, - "loss": 2.7548, + "learning_rate": 1.0887434633000938e-05, + "loss": 2.7363, "step": 236200 }, { "epoch": 0.78, - "learning_rate": 1.0882016984786533e-05, - "loss": 2.7474, + "learning_rate": 1.0870875545207965e-05, + "loss": 2.7295, "step": 236300 }, { "epoch": 0.78, - "learning_rate": 1.0865462611948947e-05, - "loss": 2.745, + "learning_rate": 1.0854316457414994e-05, + "loss": 2.7392, "step": 236400 }, { "epoch": 0.78, - "learning_rate": 1.0848908239111362e-05, - "loss": 2.7446, + "learning_rate": 1.0837757369622023e-05, + "loss": 2.7505, "step": 236500 }, { "epoch": 0.78, - "learning_rate": 1.0832353866273776e-05, - "loss": 2.721, + "learning_rate": 1.0821198281829051e-05, + "loss": 2.7302, "step": 236600 }, { "epoch": 0.78, - "learning_rate": 1.0815799493436192e-05, - "loss": 2.7305, + "learning_rate": 1.080463919403608e-05, + "loss": 2.7467, "step": 236700 }, { "epoch": 0.78, - "learning_rate": 1.0799245120598608e-05, - "loss": 2.7319, + "learning_rate": 1.0788080106243108e-05, + "loss": 2.749, "step": 236800 }, { "epoch": 0.78, - "learning_rate": 1.0782690747761022e-05, - "loss": 2.7489, + "learning_rate": 1.0771521018450135e-05, + "loss": 2.7397, "step": 236900 }, { "epoch": 0.78, - "learning_rate": 1.0766136374923437e-05, - "loss": 2.741, + "learning_rate": 1.0754961930657166e-05, + "loss": 2.7338, "step": 237000 }, { "epoch": 0.79, - "learning_rate": 1.0749582002085853e-05, - "loss": 2.7347, + "learning_rate": 1.0738402842864192e-05, + "loss": 2.7321, "step": 237100 }, { "epoch": 0.79, - "learning_rate": 1.0733027629248267e-05, - "loss": 2.7414, + "learning_rate": 1.0721843755071221e-05, + "loss": 2.7236, "step": 237200 }, { "epoch": 0.79, - "learning_rate": 1.0716473256410682e-05, - "loss": 2.7237, + "learning_rate": 1.070528466727825e-05, + "loss": 2.7381, "step": 237300 }, { "epoch": 0.79, - "learning_rate": 1.0699918883573096e-05, - "loss": 2.7413, + "learning_rate": 1.0688725579485277e-05, + "loss": 2.7401, "step": 237400 }, { "epoch": 0.79, - "learning_rate": 1.068336451073551e-05, - "loss": 2.7393, + "learning_rate": 1.0672166491692307e-05, + "loss": 2.7288, "step": 237500 }, { "epoch": 0.79, - "learning_rate": 1.0666810137897926e-05, - "loss": 2.7405, + "learning_rate": 1.0655607403899334e-05, + "loss": 2.7309, "step": 237600 }, { "epoch": 0.79, - "learning_rate": 1.065025576506034e-05, - "loss": 2.7351, + "learning_rate": 1.0639048316106364e-05, + "loss": 2.7342, "step": 237700 }, { "epoch": 0.79, - "learning_rate": 1.0633701392222756e-05, - "loss": 2.7418, + "learning_rate": 1.0622489228313391e-05, + "loss": 2.7188, "step": 237800 }, { "epoch": 0.79, - "learning_rate": 1.0617147019385171e-05, - "loss": 2.7374, + "learning_rate": 1.060593014052042e-05, + "loss": 2.733, "step": 237900 }, { "epoch": 0.79, - "learning_rate": 1.0600592646547585e-05, - "loss": 2.7515, + "learning_rate": 1.0589371052727448e-05, + "loss": 2.7289, "step": 238000 }, { "epoch": 0.79, - "learning_rate": 1.058403827371e-05, - "loss": 2.7371, + "learning_rate": 1.0572811964934475e-05, + "loss": 2.7486, "step": 238100 }, { "epoch": 0.79, - "learning_rate": 1.0567483900872416e-05, - "loss": 2.7455, + "learning_rate": 1.0556252877141505e-05, + "loss": 2.7332, "step": 238200 }, { "epoch": 0.79, - "learning_rate": 1.055092952803483e-05, - "loss": 2.7351, + "learning_rate": 1.0539693789348532e-05, + "loss": 2.7219, "step": 238300 }, { "epoch": 0.79, - "learning_rate": 1.0534375155197246e-05, - "loss": 2.7367, + "learning_rate": 1.0523134701555561e-05, + "loss": 2.7237, "step": 238400 }, { "epoch": 0.79, - "learning_rate": 1.051782078235966e-05, - "loss": 2.7361, + "learning_rate": 1.050657561376259e-05, + "loss": 2.7207, "step": 238500 }, { "epoch": 0.79, - "learning_rate": 1.0501266409522076e-05, - "loss": 2.74, + "learning_rate": 1.0490016525969618e-05, + "loss": 2.7311, "step": 238600 }, { "epoch": 0.79, - "learning_rate": 1.0484712036684491e-05, - "loss": 2.7639, + "learning_rate": 1.0473457438176647e-05, + "loss": 2.72, "step": 238700 }, { "epoch": 0.79, - "learning_rate": 1.0468157663846905e-05, - "loss": 2.7485, + "learning_rate": 1.0456898350383675e-05, + "loss": 2.7227, "step": 238800 }, { "epoch": 0.79, - "learning_rate": 1.0451603291009321e-05, - "loss": 2.7205, + "learning_rate": 1.0440339262590702e-05, + "loss": 2.7341, "step": 238900 }, { "epoch": 0.79, - "learning_rate": 1.0435048918171737e-05, - "loss": 2.76, + "learning_rate": 1.0423780174797731e-05, + "loss": 2.7221, "step": 239000 }, { "epoch": 0.79, - "learning_rate": 1.041849454533415e-05, - "loss": 2.7435, + "learning_rate": 1.040722108700476e-05, + "loss": 2.7184, "step": 239100 }, { "epoch": 0.79, - "learning_rate": 1.0401940172496566e-05, + "learning_rate": 1.0390661999211788e-05, "loss": 2.7403, "step": 239200 }, { "epoch": 0.79, - "learning_rate": 1.038538579965898e-05, - "loss": 2.7429, + "learning_rate": 1.0374102911418817e-05, + "loss": 2.7283, "step": 239300 }, { "epoch": 0.79, - "learning_rate": 1.0368831426821396e-05, - "loss": 2.7554, + "learning_rate": 1.0357543823625844e-05, + "loss": 2.7418, "step": 239400 }, { "epoch": 0.79, - "learning_rate": 1.0352277053983811e-05, - "loss": 2.7406, + "learning_rate": 1.0340984735832874e-05, + "loss": 2.7326, "step": 239500 }, { "epoch": 0.79, - "learning_rate": 1.0335722681146225e-05, - "loss": 2.7466, + "learning_rate": 1.03244256480399e-05, + "loss": 2.724, "step": 239600 }, { "epoch": 0.79, - "learning_rate": 1.031916830830864e-05, - "loss": 2.7489, + "learning_rate": 1.030786656024693e-05, + "loss": 2.7366, "step": 239700 }, { "epoch": 0.79, - "learning_rate": 1.0302613935471055e-05, - "loss": 2.727, + "learning_rate": 1.0291307472453958e-05, + "loss": 2.7332, "step": 239800 }, { "epoch": 0.79, - "learning_rate": 1.0286059562633469e-05, - "loss": 2.7357, + "learning_rate": 1.0274748384660985e-05, + "loss": 2.7303, "step": 239900 }, { "epoch": 0.79, - "learning_rate": 1.0269505189795885e-05, - "loss": 2.7406, + "learning_rate": 1.0258189296868015e-05, + "loss": 2.7411, "step": 240000 }, { - "epoch": 0.79, - "learning_rate": 1.02529508169583e-05, - "loss": 2.7517, + "epoch": 0.8, + "learning_rate": 1.0241630209075042e-05, + "loss": 2.7556, "step": 240100 }, { "epoch": 0.8, - "learning_rate": 1.0236396444120714e-05, - "loss": 2.7306, + "learning_rate": 1.0225071121282072e-05, + "loss": 2.7342, "step": 240200 }, { "epoch": 0.8, - "learning_rate": 1.021984207128313e-05, - "loss": 2.7336, + "learning_rate": 1.02085120334891e-05, + "loss": 2.7435, "step": 240300 }, { "epoch": 0.8, - "learning_rate": 1.0203287698445546e-05, - "loss": 2.7443, + "learning_rate": 1.0191952945696128e-05, + "loss": 2.7391, "step": 240400 }, { "epoch": 0.8, - "learning_rate": 1.018673332560796e-05, - "loss": 2.7485, + "learning_rate": 1.0175393857903157e-05, + "loss": 2.747, "step": 240500 }, { "epoch": 0.8, - "learning_rate": 1.0170178952770375e-05, - "loss": 2.7336, + "learning_rate": 1.0158834770110185e-05, + "loss": 2.7265, "step": 240600 }, { "epoch": 0.8, - "learning_rate": 1.0153624579932789e-05, - "loss": 2.7348, + "learning_rate": 1.0142275682317214e-05, + "loss": 2.7361, "step": 240700 }, { "epoch": 0.8, - "learning_rate": 1.0137070207095205e-05, - "loss": 2.7212, + "learning_rate": 1.012571659452424e-05, + "loss": 2.7358, "step": 240800 }, { "epoch": 0.8, - "learning_rate": 1.012051583425762e-05, - "loss": 2.7343, + "learning_rate": 1.010915750673127e-05, + "loss": 2.7215, "step": 240900 }, { "epoch": 0.8, - "learning_rate": 1.0103961461420034e-05, - "loss": 2.7293, + "learning_rate": 1.0092598418938298e-05, + "loss": 2.7271, "step": 241000 }, { "epoch": 0.8, - "learning_rate": 1.008740708858245e-05, - "loss": 2.7273, + "learning_rate": 1.0076039331145327e-05, + "loss": 2.7506, "step": 241100 }, { "epoch": 0.8, - "learning_rate": 1.0070852715744866e-05, - "loss": 2.7453, + "learning_rate": 1.0059480243352355e-05, + "loss": 2.7365, "step": 241200 }, { "epoch": 0.8, - "learning_rate": 1.005429834290728e-05, - "loss": 2.7538, + "learning_rate": 1.0042921155559384e-05, + "loss": 2.7371, "step": 241300 }, { "epoch": 0.8, - "learning_rate": 1.0037743970069695e-05, - "loss": 2.7399, + "learning_rate": 1.002636206776641e-05, + "loss": 2.7289, "step": 241400 }, { "epoch": 0.8, - "learning_rate": 1.002118959723211e-05, - "loss": 2.7486, + "learning_rate": 1.000980297997344e-05, + "loss": 2.7282, "step": 241500 }, { "epoch": 0.8, - "learning_rate": 1.0004635224394525e-05, - "loss": 2.7318, + "learning_rate": 9.993243892180468e-06, + "loss": 2.7335, "step": 241600 }, { "epoch": 0.8, - "learning_rate": 9.98808085155694e-06, - "loss": 2.7319, + "learning_rate": 9.976684804387496e-06, + "loss": 2.7417, "step": 241700 }, { "epoch": 0.8, - "learning_rate": 9.971526478719353e-06, - "loss": 2.7505, + "learning_rate": 9.960125716594525e-06, + "loss": 2.7407, "step": 241800 }, { "epoch": 0.8, - "learning_rate": 9.954972105881768e-06, - "loss": 2.7475, + "learning_rate": 9.943566628801552e-06, + "loss": 2.7359, "step": 241900 }, { "epoch": 0.8, - "learning_rate": 9.938417733044184e-06, - "loss": 2.7573, + "learning_rate": 9.927007541008582e-06, + "loss": 2.7315, "step": 242000 }, { "epoch": 0.8, - "learning_rate": 9.921863360206598e-06, - "loss": 2.7482, + "learning_rate": 9.91044845321561e-06, + "loss": 2.7384, "step": 242100 }, { "epoch": 0.8, - "learning_rate": 9.905308987369014e-06, - "loss": 2.7415, + "learning_rate": 9.893889365422638e-06, + "loss": 2.7202, "step": 242200 }, { "epoch": 0.8, - "learning_rate": 9.88875461453143e-06, - "loss": 2.7434, + "learning_rate": 9.877330277629666e-06, + "loss": 2.7463, "step": 242300 }, { "epoch": 0.8, - "learning_rate": 9.872200241693843e-06, - "loss": 2.7401, + "learning_rate": 9.860771189836695e-06, + "loss": 2.7396, "step": 242400 }, { "epoch": 0.8, - "learning_rate": 9.855645868856259e-06, - "loss": 2.7367, + "learning_rate": 9.844212102043724e-06, + "loss": 2.7273, "step": 242500 }, { "epoch": 0.8, - "learning_rate": 9.839091496018673e-06, - "loss": 2.7548, + "learning_rate": 9.82765301425075e-06, + "loss": 2.7276, "step": 242600 }, { "epoch": 0.8, - "learning_rate": 9.822537123181089e-06, - "loss": 2.7341, + "learning_rate": 9.81109392645778e-06, + "loss": 2.7437, "step": 242700 }, { "epoch": 0.8, - "learning_rate": 9.805982750343504e-06, - "loss": 2.738, + "learning_rate": 9.794534838664808e-06, + "loss": 2.7189, "step": 242800 }, { "epoch": 0.8, - "learning_rate": 9.789428377505918e-06, - "loss": 2.7412, + "learning_rate": 9.777975750871836e-06, + "loss": 2.7196, "step": 242900 }, { "epoch": 0.8, - "learning_rate": 9.772874004668334e-06, - "loss": 2.7319, + "learning_rate": 9.761416663078865e-06, + "loss": 2.7228, "step": 243000 }, { - "epoch": 0.8, - "learning_rate": 9.75631963183075e-06, - "loss": 2.7267, + "epoch": 0.81, + "learning_rate": 9.744857575285894e-06, + "loss": 2.7269, "step": 243100 }, { "epoch": 0.81, - "learning_rate": 9.739765258993163e-06, - "loss": 2.7339, + "learning_rate": 9.728298487492922e-06, + "loss": 2.7313, "step": 243200 }, { "epoch": 0.81, - "learning_rate": 9.723210886155579e-06, - "loss": 2.7246, + "learning_rate": 9.711739399699949e-06, + "loss": 2.7326, "step": 243300 }, { "epoch": 0.81, - "learning_rate": 9.706656513317993e-06, - "loss": 2.7443, + "learning_rate": 9.695180311906978e-06, + "loss": 2.7212, "step": 243400 }, { "epoch": 0.81, - "learning_rate": 9.690102140480409e-06, - "loss": 2.7346, + "learning_rate": 9.678621224114006e-06, + "loss": 2.7277, "step": 243500 }, { "epoch": 0.81, - "learning_rate": 9.673547767642824e-06, - "loss": 2.7435, + "learning_rate": 9.662062136321035e-06, + "loss": 2.7364, "step": 243600 }, { "epoch": 0.81, - "learning_rate": 9.656993394805238e-06, - "loss": 2.7533, + "learning_rate": 9.645503048528063e-06, + "loss": 2.7339, "step": 243700 }, { "epoch": 0.81, - "learning_rate": 9.640439021967654e-06, - "loss": 2.7593, + "learning_rate": 9.628943960735092e-06, + "loss": 2.7141, "step": 243800 }, { "epoch": 0.81, - "learning_rate": 9.62388464913007e-06, - "loss": 2.7394, + "learning_rate": 9.612384872942119e-06, + "loss": 2.7451, "step": 243900 }, { "epoch": 0.81, - "learning_rate": 9.607330276292482e-06, - "loss": 2.7142, + "learning_rate": 9.595825785149148e-06, + "loss": 2.7415, "step": 244000 }, { "epoch": 0.81, - "learning_rate": 9.590775903454897e-06, - "loss": 2.7469, + "learning_rate": 9.579266697356176e-06, + "loss": 2.7323, "step": 244100 }, { "epoch": 0.81, - "learning_rate": 9.574221530617313e-06, - "loss": 2.7424, + "learning_rate": 9.562707609563205e-06, + "loss": 2.7279, "step": 244200 }, { "epoch": 0.81, - "learning_rate": 9.557667157779727e-06, - "loss": 2.7337, + "learning_rate": 9.546148521770233e-06, + "loss": 2.7405, "step": 244300 }, { "epoch": 0.81, - "learning_rate": 9.541112784942143e-06, - "loss": 2.7325, + "learning_rate": 9.52958943397726e-06, + "loss": 2.7326, "step": 244400 }, { "epoch": 0.81, - "learning_rate": 9.524558412104558e-06, - "loss": 2.7257, + "learning_rate": 9.51303034618429e-06, + "loss": 2.7187, "step": 244500 }, { "epoch": 0.81, - "learning_rate": 9.508004039266972e-06, - "loss": 2.7487, + "learning_rate": 9.496471258391318e-06, + "loss": 2.7227, "step": 244600 }, { "epoch": 0.81, - "learning_rate": 9.491449666429388e-06, - "loss": 2.7275, + "learning_rate": 9.479912170598348e-06, + "loss": 2.715, "step": 244700 }, { "epoch": 0.81, - "learning_rate": 9.474895293591802e-06, - "loss": 2.7328, + "learning_rate": 9.463353082805375e-06, + "loss": 2.7438, "step": 244800 }, { "epoch": 0.81, - "learning_rate": 9.458340920754218e-06, - "loss": 2.7504, + "learning_rate": 9.446793995012403e-06, + "loss": 2.737, "step": 244900 }, { "epoch": 0.81, - "learning_rate": 9.441786547916633e-06, - "loss": 2.7517, + "learning_rate": 9.430234907219432e-06, + "loss": 2.7309, "step": 245000 }, { "epoch": 0.81, - "learning_rate": 9.425232175079047e-06, - "loss": 2.7347, + "learning_rate": 9.413675819426459e-06, + "loss": 2.7345, "step": 245100 }, { "epoch": 0.81, - "learning_rate": 9.408677802241463e-06, - "loss": 2.7313, + "learning_rate": 9.39711673163349e-06, + "loss": 2.732, "step": 245200 }, { "epoch": 0.81, - "learning_rate": 9.392123429403878e-06, - "loss": 2.7466, + "learning_rate": 9.380557643840516e-06, + "loss": 2.7364, "step": 245300 }, { "epoch": 0.81, - "learning_rate": 9.375569056566292e-06, - "loss": 2.7434, + "learning_rate": 9.363998556047545e-06, + "loss": 2.7486, "step": 245400 }, { "epoch": 0.81, - "learning_rate": 9.359014683728708e-06, - "loss": 2.7474, + "learning_rate": 9.347439468254573e-06, + "loss": 2.715, "step": 245500 }, { "epoch": 0.81, - "learning_rate": 9.342460310891122e-06, - "loss": 2.7445, + "learning_rate": 9.330880380461602e-06, + "loss": 2.7204, "step": 245600 }, { "epoch": 0.81, - "learning_rate": 9.325905938053538e-06, - "loss": 2.7539, + "learning_rate": 9.31432129266863e-06, + "loss": 2.7261, "step": 245700 }, { "epoch": 0.81, - "learning_rate": 9.309351565215953e-06, - "loss": 2.7381, + "learning_rate": 9.297762204875657e-06, + "loss": 2.7294, "step": 245800 }, { "epoch": 0.81, - "learning_rate": 9.292797192378367e-06, - "loss": 2.7332, + "learning_rate": 9.281203117082686e-06, + "loss": 2.7232, "step": 245900 }, { "epoch": 0.81, - "learning_rate": 9.276242819540783e-06, - "loss": 2.7354, + "learning_rate": 9.264644029289715e-06, + "loss": 2.7211, "step": 246000 }, { - "epoch": 0.81, - "learning_rate": 9.259688446703199e-06, - "loss": 2.7312, + "epoch": 0.82, + "learning_rate": 9.248084941496743e-06, + "loss": 2.7481, "step": 246100 }, { "epoch": 0.82, - "learning_rate": 9.24313407386561e-06, - "loss": 2.7438, + "learning_rate": 9.231525853703772e-06, + "loss": 2.7316, "step": 246200 }, { "epoch": 0.82, - "learning_rate": 9.226579701028026e-06, - "loss": 2.7505, + "learning_rate": 9.2149667659108e-06, + "loss": 2.7335, "step": 246300 }, { "epoch": 0.82, - "learning_rate": 9.210025328190442e-06, - "loss": 2.7491, + "learning_rate": 9.198407678117827e-06, + "loss": 2.7222, "step": 246400 }, { "epoch": 0.82, - "learning_rate": 9.193470955352856e-06, - "loss": 2.7434, + "learning_rate": 9.181848590324858e-06, + "loss": 2.7335, "step": 246500 }, { "epoch": 0.82, - "learning_rate": 9.176916582515272e-06, - "loss": 2.7435, + "learning_rate": 9.165289502531885e-06, + "loss": 2.7255, "step": 246600 }, { "epoch": 0.82, - "learning_rate": 9.160362209677686e-06, - "loss": 2.7374, + "learning_rate": 9.148730414738913e-06, + "loss": 2.7286, "step": 246700 }, { "epoch": 0.82, - "learning_rate": 9.143807836840101e-06, - "loss": 2.725, + "learning_rate": 9.132171326945942e-06, + "loss": 2.724, "step": 246800 }, { "epoch": 0.82, - "learning_rate": 9.127253464002517e-06, - "loss": 2.756, + "learning_rate": 9.115612239152969e-06, + "loss": 2.7224, "step": 246900 }, { "epoch": 0.82, - "learning_rate": 9.110699091164931e-06, - "loss": 2.7279, + "learning_rate": 9.099053151359999e-06, + "loss": 2.7295, "step": 247000 }, { "epoch": 0.82, - "learning_rate": 9.094144718327347e-06, - "loss": 2.7432, + "learning_rate": 9.082494063567026e-06, + "loss": 2.7223, "step": 247100 }, { "epoch": 0.82, - "learning_rate": 9.077590345489762e-06, - "loss": 2.7376, + "learning_rate": 9.065934975774056e-06, + "loss": 2.7345, "step": 247200 }, { "epoch": 0.82, - "learning_rate": 9.061035972652176e-06, - "loss": 2.7393, + "learning_rate": 9.049375887981083e-06, + "loss": 2.7282, "step": 247300 }, { "epoch": 0.82, - "learning_rate": 9.044481599814592e-06, - "loss": 2.7387, + "learning_rate": 9.032816800188112e-06, + "loss": 2.7213, "step": 247400 }, { "epoch": 0.82, - "learning_rate": 9.027927226977006e-06, - "loss": 2.743, + "learning_rate": 9.01625771239514e-06, + "loss": 2.7421, "step": 247500 }, { "epoch": 0.82, - "learning_rate": 9.011372854139421e-06, - "loss": 2.7325, + "learning_rate": 8.999698624602167e-06, + "loss": 2.7305, "step": 247600 }, { "epoch": 0.82, - "learning_rate": 8.994818481301837e-06, - "loss": 2.7272, + "learning_rate": 8.983139536809198e-06, + "loss": 2.7322, "step": 247700 }, { "epoch": 0.82, - "learning_rate": 8.978264108464251e-06, - "loss": 2.7413, + "learning_rate": 8.966580449016224e-06, + "loss": 2.7298, "step": 247800 }, { "epoch": 0.82, - "learning_rate": 8.961709735626667e-06, - "loss": 2.7346, + "learning_rate": 8.950021361223253e-06, + "loss": 2.7287, "step": 247900 }, { "epoch": 0.82, - "learning_rate": 8.945155362789082e-06, - "loss": 2.7456, + "learning_rate": 8.933462273430282e-06, + "loss": 2.7343, "step": 248000 }, { "epoch": 0.82, - "learning_rate": 8.928600989951496e-06, - "loss": 2.7535, + "learning_rate": 8.91690318563731e-06, + "loss": 2.7329, "step": 248100 }, { "epoch": 0.82, - "learning_rate": 8.912046617113912e-06, - "loss": 2.754, + "learning_rate": 8.900344097844339e-06, + "loss": 2.7402, "step": 248200 }, { "epoch": 0.82, - "learning_rate": 8.895492244276326e-06, - "loss": 2.7347, + "learning_rate": 8.883785010051367e-06, + "loss": 2.7289, "step": 248300 }, { "epoch": 0.82, - "learning_rate": 8.87893787143874e-06, - "loss": 2.7304, + "learning_rate": 8.867225922258394e-06, + "loss": 2.7376, "step": 248400 }, { "epoch": 0.82, - "learning_rate": 8.862383498601155e-06, - "loss": 2.7264, + "learning_rate": 8.850666834465423e-06, + "loss": 2.7236, "step": 248500 }, { "epoch": 0.82, - "learning_rate": 8.845829125763571e-06, - "loss": 2.744, + "learning_rate": 8.834107746672452e-06, + "loss": 2.728, "step": 248600 }, { "epoch": 0.82, - "learning_rate": 8.829274752925985e-06, - "loss": 2.729, + "learning_rate": 8.81754865887948e-06, + "loss": 2.7391, "step": 248700 }, { "epoch": 0.82, - "learning_rate": 8.8127203800884e-06, - "loss": 2.7393, + "learning_rate": 8.800989571086509e-06, + "loss": 2.7361, "step": 248800 }, { "epoch": 0.82, - "learning_rate": 8.796166007250815e-06, - "loss": 2.7461, + "learning_rate": 8.784430483293536e-06, + "loss": 2.75, "step": 248900 }, { "epoch": 0.82, - "learning_rate": 8.77961163441323e-06, - "loss": 2.733, + "learning_rate": 8.767871395500566e-06, + "loss": 2.7213, "step": 249000 }, { "epoch": 0.82, - "learning_rate": 8.763057261575646e-06, - "loss": 2.7302, + "learning_rate": 8.751312307707593e-06, + "loss": 2.7327, "step": 249100 }, { "epoch": 0.83, - "learning_rate": 8.74650288873806e-06, - "loss": 2.7356, + "learning_rate": 8.734753219914622e-06, + "loss": 2.7299, "step": 249200 }, { "epoch": 0.83, - "learning_rate": 8.729948515900476e-06, - "loss": 2.7453, + "learning_rate": 8.71819413212165e-06, + "loss": 2.722, "step": 249300 }, { "epoch": 0.83, - "learning_rate": 8.713394143062891e-06, - "loss": 2.7319, + "learning_rate": 8.701635044328677e-06, + "loss": 2.7191, "step": 249400 }, { "epoch": 0.83, - "learning_rate": 8.696839770225305e-06, - "loss": 2.7295, + "learning_rate": 8.685075956535707e-06, + "loss": 2.7378, "step": 249500 }, { "epoch": 0.83, - "learning_rate": 8.680285397387721e-06, - "loss": 2.7342, + "learning_rate": 8.668516868742734e-06, + "loss": 2.7283, "step": 249600 }, { "epoch": 0.83, - "learning_rate": 8.663731024550135e-06, - "loss": 2.748, + "learning_rate": 8.651957780949765e-06, + "loss": 2.7348, "step": 249700 }, { "epoch": 0.83, - "learning_rate": 8.64717665171255e-06, - "loss": 2.7412, + "learning_rate": 8.635398693156791e-06, + "loss": 2.7295, "step": 249800 }, { "epoch": 0.83, - "learning_rate": 8.630622278874966e-06, - "loss": 2.7463, + "learning_rate": 8.61883960536382e-06, + "loss": 2.7287, "step": 249900 }, { "epoch": 0.83, - "learning_rate": 8.61406790603738e-06, - "loss": 2.733, + "learning_rate": 8.602280517570849e-06, + "loss": 2.7304, "step": 250000 }, { "epoch": 0.83, - "learning_rate": 8.597513533199796e-06, - "loss": 2.7451, + "learning_rate": 8.585721429777877e-06, + "loss": 2.7209, "step": 250100 }, { "epoch": 0.83, - "learning_rate": 8.580959160362211e-06, - "loss": 2.7157, + "learning_rate": 8.569162341984906e-06, + "loss": 2.7467, "step": 250200 }, { "epoch": 0.83, - "learning_rate": 8.564404787524625e-06, - "loss": 2.7447, + "learning_rate": 8.552603254191933e-06, + "loss": 2.7398, "step": 250300 }, { "epoch": 0.83, - "learning_rate": 8.547850414687041e-06, - "loss": 2.7285, + "learning_rate": 8.536044166398961e-06, + "loss": 2.7311, "step": 250400 }, { "epoch": 0.83, - "learning_rate": 8.531296041849455e-06, - "loss": 2.7462, + "learning_rate": 8.51948507860599e-06, + "loss": 2.7132, "step": 250500 }, { "epoch": 0.83, - "learning_rate": 8.514741669011869e-06, - "loss": 2.7406, + "learning_rate": 8.502925990813019e-06, + "loss": 2.727, "step": 250600 }, { "epoch": 0.83, - "learning_rate": 8.498187296174285e-06, - "loss": 2.7325, + "learning_rate": 8.486366903020047e-06, + "loss": 2.7394, "step": 250700 }, { "epoch": 0.83, - "learning_rate": 8.481632923336698e-06, - "loss": 2.7378, + "learning_rate": 8.469807815227076e-06, + "loss": 2.7202, "step": 250800 }, { "epoch": 0.83, - "learning_rate": 8.465078550499114e-06, - "loss": 2.7419, + "learning_rate": 8.453248727434103e-06, + "loss": 2.7196, "step": 250900 }, { "epoch": 0.83, - "learning_rate": 8.44852417766153e-06, - "loss": 2.7302, + "learning_rate": 8.436689639641131e-06, + "loss": 2.7282, "step": 251000 }, { "epoch": 0.83, - "learning_rate": 8.431969804823944e-06, - "loss": 2.7363, + "learning_rate": 8.42013055184816e-06, + "loss": 2.725, "step": 251100 }, { "epoch": 0.83, - "learning_rate": 8.41541543198636e-06, - "loss": 2.7451, + "learning_rate": 8.403571464055189e-06, + "loss": 2.7321, "step": 251200 }, { "epoch": 0.83, - "learning_rate": 8.398861059148775e-06, - "loss": 2.728, + "learning_rate": 8.387012376262217e-06, + "loss": 2.7328, "step": 251300 }, { "epoch": 0.83, - "learning_rate": 8.382306686311189e-06, - "loss": 2.7401, + "learning_rate": 8.370453288469244e-06, + "loss": 2.7245, "step": 251400 }, { "epoch": 0.83, - "learning_rate": 8.365752313473605e-06, - "loss": 2.7444, + "learning_rate": 8.353894200676274e-06, + "loss": 2.7153, "step": 251500 }, { "epoch": 0.83, - "learning_rate": 8.349197940636019e-06, - "loss": 2.723, + "learning_rate": 8.337335112883301e-06, + "loss": 2.7212, "step": 251600 }, { "epoch": 0.83, - "learning_rate": 8.332643567798434e-06, - "loss": 2.7401, + "learning_rate": 8.320776025090332e-06, + "loss": 2.7287, "step": 251700 }, { "epoch": 0.83, - "learning_rate": 8.31608919496085e-06, - "loss": 2.7524, + "learning_rate": 8.304216937297359e-06, + "loss": 2.7498, "step": 251800 }, { "epoch": 0.83, - "learning_rate": 8.299534822123264e-06, - "loss": 2.7397, + "learning_rate": 8.287657849504387e-06, + "loss": 2.7277, "step": 251900 }, { "epoch": 0.83, - "learning_rate": 8.28298044928568e-06, - "loss": 2.7336, + "learning_rate": 8.271098761711416e-06, + "loss": 2.7236, "step": 252000 }, { "epoch": 0.83, - "learning_rate": 8.266426076448095e-06, - "loss": 2.7403, + "learning_rate": 8.254539673918443e-06, + "loss": 2.747, "step": 252100 }, { "epoch": 0.84, - "learning_rate": 8.249871703610509e-06, - "loss": 2.7486, + "learning_rate": 8.237980586125473e-06, + "loss": 2.7302, "step": 252200 }, { "epoch": 0.84, - "learning_rate": 8.233317330772925e-06, - "loss": 2.7298, + "learning_rate": 8.2214214983325e-06, + "loss": 2.7151, "step": 252300 }, { "epoch": 0.84, - "learning_rate": 8.216762957935339e-06, - "loss": 2.7428, + "learning_rate": 8.204862410539528e-06, + "loss": 2.7216, "step": 252400 }, { "epoch": 0.84, - "learning_rate": 8.200208585097754e-06, - "loss": 2.7441, + "learning_rate": 8.188303322746557e-06, + "loss": 2.7344, "step": 252500 }, { "epoch": 0.84, - "learning_rate": 8.18365421226017e-06, - "loss": 2.7311, + "learning_rate": 8.171744234953586e-06, + "loss": 2.7268, "step": 252600 }, { "epoch": 0.84, - "learning_rate": 8.167099839422584e-06, - "loss": 2.7507, + "learning_rate": 8.155185147160614e-06, + "loss": 2.7449, "step": 252700 }, { "epoch": 0.84, - "learning_rate": 8.150545466584998e-06, - "loss": 2.7363, + "learning_rate": 8.138626059367641e-06, + "loss": 2.7301, "step": 252800 }, { "epoch": 0.84, - "learning_rate": 8.133991093747414e-06, - "loss": 2.746, + "learning_rate": 8.12206697157467e-06, + "loss": 2.7315, "step": 252900 }, { "epoch": 0.84, - "learning_rate": 8.117436720909828e-06, - "loss": 2.728, + "learning_rate": 8.105507883781698e-06, + "loss": 2.7146, "step": 253000 }, { "epoch": 0.84, - "learning_rate": 8.100882348072243e-06, - "loss": 2.7463, + "learning_rate": 8.088948795988727e-06, + "loss": 2.7296, "step": 253100 }, { "epoch": 0.84, - "learning_rate": 8.084327975234659e-06, - "loss": 2.7568, + "learning_rate": 8.072389708195756e-06, + "loss": 2.7314, "step": 253200 }, { "epoch": 0.84, - "learning_rate": 8.067773602397073e-06, - "loss": 2.7404, + "learning_rate": 8.055830620402784e-06, + "loss": 2.7356, "step": 253300 }, { "epoch": 0.84, - "learning_rate": 8.051219229559488e-06, - "loss": 2.74, + "learning_rate": 8.039271532609811e-06, + "loss": 2.7349, "step": 253400 }, { "epoch": 0.84, - "learning_rate": 8.034664856721904e-06, - "loss": 2.7384, + "learning_rate": 8.022712444816841e-06, + "loss": 2.7229, "step": 253500 }, { "epoch": 0.84, - "learning_rate": 8.018110483884318e-06, - "loss": 2.7477, + "learning_rate": 8.006153357023868e-06, + "loss": 2.7295, "step": 253600 }, { "epoch": 0.84, - "learning_rate": 8.001556111046734e-06, - "loss": 2.7455, + "learning_rate": 7.989594269230897e-06, + "loss": 2.7299, "step": 253700 }, { "epoch": 0.84, - "learning_rate": 7.985001738209148e-06, - "loss": 2.725, + "learning_rate": 7.973035181437926e-06, + "loss": 2.7343, "step": 253800 }, { "epoch": 0.84, - "learning_rate": 7.968447365371563e-06, - "loss": 2.7562, + "learning_rate": 7.956476093644952e-06, + "loss": 2.72, "step": 253900 }, { "epoch": 0.84, - "learning_rate": 7.951892992533979e-06, + "learning_rate": 7.939917005851983e-06, "loss": 2.7371, "step": 254000 }, { "epoch": 0.84, - "learning_rate": 7.935338619696393e-06, - "loss": 2.7335, + "learning_rate": 7.92335791805901e-06, + "loss": 2.7391, "step": 254100 }, { "epoch": 0.84, - "learning_rate": 7.918784246858809e-06, - "loss": 2.7311, + "learning_rate": 7.90679883026604e-06, + "loss": 2.7278, "step": 254200 }, { "epoch": 0.84, - "learning_rate": 7.902229874021224e-06, - "loss": 2.741, + "learning_rate": 7.890239742473067e-06, + "loss": 2.7396, "step": 254300 }, { "epoch": 0.84, - "learning_rate": 7.885675501183638e-06, - "loss": 2.7472, + "learning_rate": 7.873680654680095e-06, + "loss": 2.7233, "step": 254400 }, { "epoch": 0.84, - "learning_rate": 7.869121128346054e-06, - "loss": 2.719, + "learning_rate": 7.857121566887124e-06, + "loss": 2.7259, "step": 254500 }, { "epoch": 0.84, - "learning_rate": 7.852566755508468e-06, - "loss": 2.7385, + "learning_rate": 7.840562479094151e-06, + "loss": 2.7401, "step": 254600 }, { "epoch": 0.84, - "learning_rate": 7.836012382670883e-06, - "loss": 2.7426, + "learning_rate": 7.824003391301181e-06, + "loss": 2.7365, "step": 254700 }, { "epoch": 0.84, - "learning_rate": 7.819458009833299e-06, - "loss": 2.7336, + "learning_rate": 7.807444303508208e-06, + "loss": 2.7104, "step": 254800 }, { "epoch": 0.84, - "learning_rate": 7.802903636995713e-06, - "loss": 2.7452, + "learning_rate": 7.790885215715237e-06, + "loss": 2.7307, "step": 254900 }, { "epoch": 0.84, - "learning_rate": 7.786349264158129e-06, - "loss": 2.7408, + "learning_rate": 7.774326127922265e-06, + "loss": 2.7335, "step": 255000 }, { "epoch": 0.84, - "learning_rate": 7.769794891320543e-06, - "loss": 2.7486, + "learning_rate": 7.757767040129294e-06, + "loss": 2.7234, "step": 255100 }, { - "epoch": 0.84, - "learning_rate": 7.753240518482957e-06, - "loss": 2.7353, + "epoch": 0.85, + "learning_rate": 7.741207952336323e-06, + "loss": 2.7422, "step": 255200 }, { "epoch": 0.85, - "learning_rate": 7.736686145645372e-06, - "loss": 2.7298, + "learning_rate": 7.724648864543351e-06, + "loss": 2.7418, "step": 255300 }, { "epoch": 0.85, - "learning_rate": 7.720131772807788e-06, - "loss": 2.7394, + "learning_rate": 7.708089776750378e-06, + "loss": 2.7111, "step": 255400 }, { "epoch": 0.85, - "learning_rate": 7.703577399970202e-06, - "loss": 2.7406, + "learning_rate": 7.691530688957407e-06, + "loss": 2.7152, "step": 255500 }, { "epoch": 0.85, - "learning_rate": 7.687023027132617e-06, - "loss": 2.7442, + "learning_rate": 7.674971601164435e-06, + "loss": 2.7339, "step": 255600 }, { "epoch": 0.85, - "learning_rate": 7.670468654295031e-06, - "loss": 2.7437, + "learning_rate": 7.658412513371464e-06, + "loss": 2.7347, "step": 255700 }, { "epoch": 0.85, - "learning_rate": 7.653914281457447e-06, - "loss": 2.7406, + "learning_rate": 7.641853425578493e-06, + "loss": 2.7201, "step": 255800 }, { "epoch": 0.85, - "learning_rate": 7.637359908619863e-06, - "loss": 2.7434, + "learning_rate": 7.6252943377855195e-06, + "loss": 2.7186, "step": 255900 }, { "epoch": 0.85, - "learning_rate": 7.6208055357822775e-06, - "loss": 2.7418, + "learning_rate": 7.608735249992549e-06, + "loss": 2.7346, "step": 256000 }, { "epoch": 0.85, - "learning_rate": 7.604251162944692e-06, - "loss": 2.7385, + "learning_rate": 7.592176162199577e-06, + "loss": 2.7375, "step": 256100 }, { "epoch": 0.85, - "learning_rate": 7.587696790107107e-06, - "loss": 2.7366, + "learning_rate": 7.575617074406606e-06, + "loss": 2.716, "step": 256200 }, { "epoch": 0.85, - "learning_rate": 7.571142417269522e-06, - "loss": 2.7187, + "learning_rate": 7.559057986613634e-06, + "loss": 2.7346, "step": 256300 }, { "epoch": 0.85, - "learning_rate": 7.5545880444319376e-06, - "loss": 2.7416, + "learning_rate": 7.542498898820662e-06, + "loss": 2.7283, "step": 256400 }, { "epoch": 0.85, - "learning_rate": 7.538033671594352e-06, - "loss": 2.7359, + "learning_rate": 7.525939811027691e-06, + "loss": 2.7511, "step": 256500 }, { "epoch": 0.85, - "learning_rate": 7.521479298756767e-06, - "loss": 2.7326, + "learning_rate": 7.509380723234719e-06, + "loss": 2.7392, "step": 256600 }, { "epoch": 0.85, - "learning_rate": 7.504924925919182e-06, - "loss": 2.7365, + "learning_rate": 7.4928216354417475e-06, + "loss": 2.7376, "step": 256700 }, { "epoch": 0.85, - "learning_rate": 7.488370553081598e-06, - "loss": 2.7412, + "learning_rate": 7.476262547648775e-06, + "loss": 2.7439, "step": 256800 }, { "epoch": 0.85, - "learning_rate": 7.471816180244012e-06, - "loss": 2.7424, + "learning_rate": 7.459703459855803e-06, + "loss": 2.728, "step": 256900 }, { "epoch": 0.85, - "learning_rate": 7.455261807406427e-06, - "loss": 2.7537, + "learning_rate": 7.4431443720628324e-06, + "loss": 2.7225, "step": 257000 }, { "epoch": 0.85, - "learning_rate": 7.438707434568842e-06, - "loss": 2.7385, + "learning_rate": 7.42658528426986e-06, + "loss": 2.7322, "step": 257100 }, { "epoch": 0.85, - "learning_rate": 7.422153061731258e-06, - "loss": 2.7418, + "learning_rate": 7.41002619647689e-06, + "loss": 2.7183, "step": 257200 }, { "epoch": 0.85, - "learning_rate": 7.405598688893671e-06, - "loss": 2.7269, + "learning_rate": 7.393467108683917e-06, + "loss": 2.7408, "step": 257300 }, { "epoch": 0.85, - "learning_rate": 7.389044316056086e-06, - "loss": 2.7453, + "learning_rate": 7.376908020890945e-06, + "loss": 2.7228, "step": 257400 }, { "epoch": 0.85, - "learning_rate": 7.372489943218501e-06, - "loss": 2.7346, + "learning_rate": 7.360348933097974e-06, + "loss": 2.7164, "step": 257500 }, { "epoch": 0.85, - "learning_rate": 7.355935570380916e-06, - "loss": 2.7354, + "learning_rate": 7.3437898453050015e-06, + "loss": 2.7168, "step": 257600 }, { "epoch": 0.85, - "learning_rate": 7.339381197543331e-06, - "loss": 2.7384, + "learning_rate": 7.327230757512031e-06, + "loss": 2.7279, "step": 257700 }, { "epoch": 0.85, - "learning_rate": 7.322826824705746e-06, - "loss": 2.7382, + "learning_rate": 7.310671669719059e-06, + "loss": 2.7405, "step": 257800 }, { "epoch": 0.85, - "learning_rate": 7.306272451868161e-06, - "loss": 2.7356, + "learning_rate": 7.2941125819260865e-06, + "loss": 2.7341, "step": 257900 }, { "epoch": 0.85, - "learning_rate": 7.289718079030576e-06, - "loss": 2.7409, + "learning_rate": 7.277553494133116e-06, + "loss": 2.7095, "step": 258000 }, { "epoch": 0.85, - "learning_rate": 7.273163706192991e-06, - "loss": 2.7443, + "learning_rate": 7.260994406340144e-06, + "loss": 2.7196, "step": 258100 }, { - "epoch": 0.85, - "learning_rate": 7.256609333355406e-06, - "loss": 2.7394, + "epoch": 0.86, + "learning_rate": 7.244435318547173e-06, + "loss": 2.7109, "step": 258200 }, { "epoch": 0.86, - "learning_rate": 7.240054960517821e-06, - "loss": 2.7375, + "learning_rate": 7.227876230754201e-06, + "loss": 2.7292, "step": 258300 }, { "epoch": 0.86, - "learning_rate": 7.223500587680236e-06, - "loss": 2.7428, + "learning_rate": 7.211317142961229e-06, + "loss": 2.7126, "step": 258400 }, { "epoch": 0.86, - "learning_rate": 7.206946214842651e-06, - "loss": 2.7418, + "learning_rate": 7.194758055168257e-06, + "loss": 2.7343, "step": 258500 }, { "epoch": 0.86, - "learning_rate": 7.190391842005066e-06, - "loss": 2.7342, + "learning_rate": 7.178198967375285e-06, + "loss": 2.7322, "step": 258600 }, { "epoch": 0.86, - "learning_rate": 7.173837469167481e-06, - "loss": 2.7337, + "learning_rate": 7.1616398795823145e-06, + "loss": 2.732, "step": 258700 }, { "epoch": 0.86, - "learning_rate": 7.157283096329896e-06, - "loss": 2.7301, + "learning_rate": 7.145080791789342e-06, + "loss": 2.7219, "step": 258800 }, { "epoch": 0.86, - "learning_rate": 7.140728723492311e-06, - "loss": 2.7385, + "learning_rate": 7.12852170399637e-06, + "loss": 2.7284, "step": 258900 }, { "epoch": 0.86, - "learning_rate": 7.124174350654726e-06, - "loss": 2.7435, + "learning_rate": 7.1119626162033995e-06, + "loss": 2.7177, "step": 259000 }, { "epoch": 0.86, - "learning_rate": 7.1076199778171414e-06, - "loss": 2.7416, + "learning_rate": 7.095403528410427e-06, + "loss": 2.7243, "step": 259100 }, { "epoch": 0.86, - "learning_rate": 7.091065604979556e-06, - "loss": 2.7284, + "learning_rate": 7.078844440617456e-06, + "loss": 2.7318, "step": 259200 }, { "epoch": 0.86, - "learning_rate": 7.074511232141971e-06, - "loss": 2.7247, + "learning_rate": 7.062285352824484e-06, + "loss": 2.7144, "step": 259300 }, { "epoch": 0.86, - "learning_rate": 7.057956859304386e-06, - "loss": 2.7632, + "learning_rate": 7.045726265031511e-06, + "loss": 2.7396, "step": 259400 }, { "epoch": 0.86, - "learning_rate": 7.0414024864668e-06, - "loss": 2.7417, + "learning_rate": 7.029167177238541e-06, + "loss": 2.7345, "step": 259500 }, { "epoch": 0.86, - "learning_rate": 7.024848113629215e-06, - "loss": 2.7283, + "learning_rate": 7.0126080894455686e-06, + "loss": 2.7433, "step": 259600 }, { "epoch": 0.86, - "learning_rate": 7.00829374079163e-06, - "loss": 2.7276, + "learning_rate": 6.996049001652598e-06, + "loss": 2.7284, "step": 259700 }, { "epoch": 0.86, - "learning_rate": 6.991739367954045e-06, - "loss": 2.7358, + "learning_rate": 6.979489913859626e-06, + "loss": 2.7347, "step": 259800 }, { "epoch": 0.86, - "learning_rate": 6.97518499511646e-06, - "loss": 2.7347, + "learning_rate": 6.9629308260666535e-06, + "loss": 2.7212, "step": 259900 }, { "epoch": 0.86, - "learning_rate": 6.958630622278875e-06, - "loss": 2.7338, + "learning_rate": 6.946371738273683e-06, + "loss": 2.7116, "step": 260000 }, { "epoch": 0.86, - "learning_rate": 6.94207624944129e-06, - "loss": 2.7307, + "learning_rate": 6.929812650480711e-06, + "loss": 2.7319, "step": 260100 }, { "epoch": 0.86, - "learning_rate": 6.925521876603705e-06, - "loss": 2.7398, + "learning_rate": 6.913253562687739e-06, + "loss": 2.7408, "step": 260200 }, { "epoch": 0.86, - "learning_rate": 6.90896750376612e-06, - "loss": 2.7306, + "learning_rate": 6.896694474894767e-06, + "loss": 2.7192, "step": 260300 }, { "epoch": 0.86, - "learning_rate": 6.892413130928535e-06, - "loss": 2.7517, + "learning_rate": 6.880135387101795e-06, + "loss": 2.7284, "step": 260400 }, { "epoch": 0.86, - "learning_rate": 6.87585875809095e-06, - "loss": 2.7369, + "learning_rate": 6.863576299308824e-06, + "loss": 2.7441, "step": 260500 }, { "epoch": 0.86, - "learning_rate": 6.859304385253365e-06, - "loss": 2.7477, + "learning_rate": 6.847017211515852e-06, + "loss": 2.7253, "step": 260600 }, { "epoch": 0.86, - "learning_rate": 6.84275001241578e-06, - "loss": 2.7353, + "learning_rate": 6.8304581237228815e-06, + "loss": 2.7369, "step": 260700 }, { "epoch": 0.86, - "learning_rate": 6.826195639578195e-06, - "loss": 2.7442, + "learning_rate": 6.813899035929909e-06, + "loss": 2.7321, "step": 260800 }, { "epoch": 0.86, - "learning_rate": 6.8096412667406104e-06, - "loss": 2.7413, + "learning_rate": 6.797339948136937e-06, + "loss": 2.728, "step": 260900 }, { "epoch": 0.86, - "learning_rate": 6.793086893903025e-06, - "loss": 2.7339, + "learning_rate": 6.780780860343966e-06, + "loss": 2.7196, "step": 261000 }, { "epoch": 0.86, - "learning_rate": 6.77653252106544e-06, - "loss": 2.7319, + "learning_rate": 6.764221772550993e-06, + "loss": 2.7382, "step": 261100 }, { - "epoch": 0.86, - "learning_rate": 6.759978148227855e-06, - "loss": 2.7341, + "epoch": 0.87, + "learning_rate": 6.747662684758023e-06, + "loss": 2.7289, "step": 261200 }, { "epoch": 0.87, - "learning_rate": 6.7434237753902705e-06, - "loss": 2.7282, + "learning_rate": 6.731103596965051e-06, + "loss": 2.7446, "step": 261300 }, { "epoch": 0.87, - "learning_rate": 6.726869402552685e-06, - "loss": 2.7514, + "learning_rate": 6.714544509172078e-06, + "loss": 2.7189, "step": 261400 }, { "epoch": 0.87, - "learning_rate": 6.7103150297151e-06, - "loss": 2.7406, + "learning_rate": 6.697985421379108e-06, + "loss": 2.7191, "step": 261500 }, { "epoch": 0.87, - "learning_rate": 6.693760656877515e-06, - "loss": 2.7351, + "learning_rate": 6.681426333586136e-06, + "loss": 2.7291, "step": 261600 }, { "epoch": 0.87, - "learning_rate": 6.677206284039929e-06, - "loss": 2.7404, + "learning_rate": 6.664867245793165e-06, + "loss": 2.7239, "step": 261700 }, { "epoch": 0.87, - "learning_rate": 6.660651911202344e-06, - "loss": 2.7455, + "learning_rate": 6.648308158000193e-06, + "loss": 2.7359, "step": 261800 }, { "epoch": 0.87, - "learning_rate": 6.6440975383647584e-06, - "loss": 2.7393, + "learning_rate": 6.6317490702072206e-06, + "loss": 2.7416, "step": 261900 }, { "epoch": 0.87, - "learning_rate": 6.627543165527174e-06, - "loss": 2.7226, + "learning_rate": 6.615189982414249e-06, + "loss": 2.7367, "step": 262000 }, { "epoch": 0.87, - "learning_rate": 6.610988792689589e-06, - "loss": 2.7289, + "learning_rate": 6.598630894621277e-06, + "loss": 2.7358, "step": 262100 }, { "epoch": 0.87, - "learning_rate": 6.594434419852004e-06, + "learning_rate": 6.582071806828306e-06, "loss": 2.7254, "step": 262200 }, { "epoch": 0.87, - "learning_rate": 6.5778800470144185e-06, - "loss": 2.7397, + "learning_rate": 6.565512719035334e-06, + "loss": 2.7489, "step": 262300 }, { "epoch": 0.87, - "learning_rate": 6.561325674176834e-06, - "loss": 2.7507, + "learning_rate": 6.548953631242362e-06, + "loss": 2.7352, "step": 262400 }, { "epoch": 0.87, - "learning_rate": 6.544771301339249e-06, - "loss": 2.7465, + "learning_rate": 6.532394543449391e-06, + "loss": 2.7318, "step": 262500 }, { "epoch": 0.87, - "learning_rate": 6.528216928501664e-06, - "loss": 2.7299, + "learning_rate": 6.515835455656419e-06, + "loss": 2.7296, "step": 262600 }, { "epoch": 0.87, - "learning_rate": 6.5116625556640786e-06, - "loss": 2.7408, + "learning_rate": 6.499276367863448e-06, + "loss": 2.737, "step": 262700 }, { "epoch": 0.87, - "learning_rate": 6.495108182826494e-06, - "loss": 2.728, + "learning_rate": 6.4827172800704755e-06, + "loss": 2.732, "step": 262800 }, { "epoch": 0.87, - "learning_rate": 6.478553809988909e-06, - "loss": 2.7467, + "learning_rate": 6.466158192277503e-06, + "loss": 2.7358, "step": 262900 }, { "epoch": 0.87, - "learning_rate": 6.461999437151324e-06, - "loss": 2.7354, + "learning_rate": 6.449599104484533e-06, + "loss": 2.7234, "step": 263000 }, { "epoch": 0.87, - "learning_rate": 6.445445064313739e-06, - "loss": 2.7388, + "learning_rate": 6.4330400166915604e-06, + "loss": 2.7303, "step": 263100 }, { "epoch": 0.87, - "learning_rate": 6.428890691476154e-06, - "loss": 2.7328, + "learning_rate": 6.41648092889859e-06, + "loss": 2.7186, "step": 263200 }, { "epoch": 0.87, - "learning_rate": 6.412336318638569e-06, - "loss": 2.7449, + "learning_rate": 6.399921841105618e-06, + "loss": 2.7135, "step": 263300 }, { "epoch": 0.87, - "learning_rate": 6.395781945800984e-06, - "loss": 2.7309, + "learning_rate": 6.383362753312645e-06, + "loss": 2.7306, "step": 263400 }, { "epoch": 0.87, - "learning_rate": 6.379227572963399e-06, - "loss": 2.7438, + "learning_rate": 6.366803665519675e-06, + "loss": 2.7262, "step": 263500 }, { "epoch": 0.87, - "learning_rate": 6.362673200125814e-06, - "loss": 2.7372, + "learning_rate": 6.350244577726703e-06, + "loss": 2.7382, "step": 263600 }, { "epoch": 0.87, - "learning_rate": 6.346118827288229e-06, - "loss": 2.7253, + "learning_rate": 6.333685489933731e-06, + "loss": 2.7274, "step": 263700 }, { "epoch": 0.87, - "learning_rate": 6.329564454450644e-06, - "loss": 2.735, + "learning_rate": 6.317126402140759e-06, + "loss": 2.7158, "step": 263800 }, { "epoch": 0.87, - "learning_rate": 6.313010081613058e-06, - "loss": 2.7235, + "learning_rate": 6.300567314347787e-06, + "loss": 2.7248, "step": 263900 }, { "epoch": 0.87, - "learning_rate": 6.296455708775473e-06, - "loss": 2.7292, + "learning_rate": 6.284008226554816e-06, + "loss": 2.7407, "step": 264000 }, { "epoch": 0.87, - "learning_rate": 6.2799013359378875e-06, - "loss": 2.7258, + "learning_rate": 6.267449138761844e-06, + "loss": 2.7307, "step": 264100 }, { "epoch": 0.87, - "learning_rate": 6.263346963100303e-06, - "loss": 2.7417, + "learning_rate": 6.250890050968873e-06, + "loss": 2.729, "step": 264200 }, { "epoch": 0.88, - "learning_rate": 6.246792590262719e-06, - "loss": 2.7576, + "learning_rate": 6.234330963175901e-06, + "loss": 2.7309, "step": 264300 }, { "epoch": 0.88, - "learning_rate": 6.230238217425133e-06, - "loss": 2.7562, + "learning_rate": 6.21777187538293e-06, + "loss": 2.73, "step": 264400 }, { "epoch": 0.88, - "learning_rate": 6.2136838445875475e-06, - "loss": 2.7444, + "learning_rate": 6.2012127875899575e-06, + "loss": 2.7356, "step": 264500 }, { "epoch": 0.88, - "learning_rate": 6.197129471749963e-06, - "loss": 2.7318, + "learning_rate": 6.184653699796985e-06, + "loss": 2.7466, "step": 264600 }, { "epoch": 0.88, - "learning_rate": 6.180575098912378e-06, - "loss": 2.7317, + "learning_rate": 6.168094612004014e-06, + "loss": 2.7231, "step": 264700 }, { "epoch": 0.88, - "learning_rate": 6.164020726074793e-06, - "loss": 2.7188, + "learning_rate": 6.1515355242110425e-06, + "loss": 2.7323, "step": 264800 }, { "epoch": 0.88, - "learning_rate": 6.147466353237208e-06, - "loss": 2.738, + "learning_rate": 6.134976436418071e-06, + "loss": 2.7242, "step": 264900 }, { "epoch": 0.88, - "learning_rate": 6.130911980399623e-06, - "loss": 2.7404, + "learning_rate": 6.1184173486251e-06, + "loss": 2.6975, "step": 265000 }, { "epoch": 0.88, - "learning_rate": 6.114357607562038e-06, - "loss": 2.7448, + "learning_rate": 6.1018582608321275e-06, + "loss": 2.7308, "step": 265100 }, { "epoch": 0.88, - "learning_rate": 6.097803234724453e-06, - "loss": 2.7223, + "learning_rate": 6.085299173039156e-06, + "loss": 2.7255, "step": 265200 }, { "epoch": 0.88, - "learning_rate": 6.081248861886868e-06, - "loss": 2.7233, + "learning_rate": 6.068740085246185e-06, + "loss": 2.739, "step": 265300 }, { "epoch": 0.88, - "learning_rate": 6.064694489049283e-06, - "loss": 2.7413, + "learning_rate": 6.0521809974532124e-06, + "loss": 2.7254, "step": 265400 }, { "epoch": 0.88, - "learning_rate": 6.048140116211697e-06, - "loss": 2.7228, + "learning_rate": 6.035621909660241e-06, + "loss": 2.7237, "step": 265500 }, { "epoch": 0.88, - "learning_rate": 6.031585743374112e-06, - "loss": 2.7293, + "learning_rate": 6.019062821867269e-06, + "loss": 2.735, "step": 265600 }, { "epoch": 0.88, - "learning_rate": 6.015031370536528e-06, - "loss": 2.7478, + "learning_rate": 6.002503734074297e-06, + "loss": 2.727, "step": 265700 }, { "epoch": 0.88, - "learning_rate": 5.9984769976989425e-06, - "loss": 2.7355, + "learning_rate": 5.985944646281326e-06, + "loss": 2.7353, "step": 265800 }, { "epoch": 0.88, - "learning_rate": 5.981922624861357e-06, - "loss": 2.7382, + "learning_rate": 5.969385558488355e-06, + "loss": 2.711, "step": 265900 }, { "epoch": 0.88, - "learning_rate": 5.965368252023772e-06, - "loss": 2.7416, + "learning_rate": 5.952826470695383e-06, + "loss": 2.7353, "step": 266000 }, { "epoch": 0.88, - "learning_rate": 5.948813879186188e-06, - "loss": 2.7356, + "learning_rate": 5.936267382902411e-06, + "loss": 2.7081, "step": 266100 }, { "epoch": 0.88, - "learning_rate": 5.9322595063486026e-06, - "loss": 2.7427, + "learning_rate": 5.91970829510944e-06, + "loss": 2.7261, "step": 266200 }, { "epoch": 0.88, - "learning_rate": 5.915705133511017e-06, - "loss": 2.7164, + "learning_rate": 5.903149207316467e-06, + "loss": 2.7307, "step": 266300 }, { "epoch": 0.88, - "learning_rate": 5.899150760673432e-06, - "loss": 2.7473, + "learning_rate": 5.886590119523496e-06, + "loss": 2.7225, "step": 266400 }, { "epoch": 0.88, - "learning_rate": 5.882596387835848e-06, - "loss": 2.7295, + "learning_rate": 5.8700310317305246e-06, + "loss": 2.7391, "step": 266500 }, { "epoch": 0.88, - "learning_rate": 5.866042014998262e-06, - "loss": 2.7366, + "learning_rate": 5.853471943937552e-06, + "loss": 2.7216, "step": 266600 }, { "epoch": 0.88, - "learning_rate": 5.8494876421606766e-06, - "loss": 2.7421, + "learning_rate": 5.836912856144581e-06, + "loss": 2.7253, "step": 266700 }, { "epoch": 0.88, - "learning_rate": 5.832933269323091e-06, - "loss": 2.7299, + "learning_rate": 5.8203537683516095e-06, + "loss": 2.7175, "step": 266800 }, { "epoch": 0.88, - "learning_rate": 5.816378896485507e-06, - "loss": 2.7329, + "learning_rate": 5.803794680558638e-06, + "loss": 2.7278, "step": 266900 }, { "epoch": 0.88, - "learning_rate": 5.799824523647922e-06, - "loss": 2.7357, + "learning_rate": 5.787235592765666e-06, + "loss": 2.7356, "step": 267000 }, { "epoch": 0.88, - "learning_rate": 5.783270150810337e-06, - "loss": 2.7302, + "learning_rate": 5.7706765049726945e-06, + "loss": 2.7228, "step": 267100 }, { "epoch": 0.88, - "learning_rate": 5.766715777972751e-06, - "loss": 2.7354, + "learning_rate": 5.754117417179722e-06, + "loss": 2.7192, "step": 267200 }, { - "epoch": 0.88, - "learning_rate": 5.750161405135167e-06, - "loss": 2.7416, + "epoch": 0.89, + "learning_rate": 5.737558329386751e-06, + "loss": 2.7422, "step": 267300 }, { "epoch": 0.89, - "learning_rate": 5.733607032297582e-06, - "loss": 2.7393, + "learning_rate": 5.7209992415937795e-06, + "loss": 2.7233, "step": 267400 }, { "epoch": 0.89, - "learning_rate": 5.717052659459997e-06, - "loss": 2.7399, + "learning_rate": 5.704440153800808e-06, + "loss": 2.7247, "step": 267500 }, { "epoch": 0.89, - "learning_rate": 5.7004982866224115e-06, - "loss": 2.738, + "learning_rate": 5.687881066007836e-06, + "loss": 2.7277, "step": 267600 }, { "epoch": 0.89, - "learning_rate": 5.683943913784827e-06, - "loss": 2.7331, + "learning_rate": 5.6713219782148644e-06, + "loss": 2.7189, "step": 267700 }, { "epoch": 0.89, - "learning_rate": 5.667389540947241e-06, - "loss": 2.7401, + "learning_rate": 5.654762890421893e-06, + "loss": 2.7251, "step": 267800 }, { "epoch": 0.89, - "learning_rate": 5.650835168109656e-06, - "loss": 2.7278, + "learning_rate": 5.638203802628921e-06, + "loss": 2.718, "step": 267900 }, { "epoch": 0.89, - "learning_rate": 5.6342807952720715e-06, - "loss": 2.7334, + "learning_rate": 5.621644714835949e-06, + "loss": 2.7373, "step": 268000 }, { "epoch": 0.89, - "learning_rate": 5.617726422434486e-06, - "loss": 2.7406, + "learning_rate": 5.605085627042977e-06, + "loss": 2.7404, "step": 268100 }, { "epoch": 0.89, - "learning_rate": 5.601172049596901e-06, - "loss": 2.7436, + "learning_rate": 5.588526539250006e-06, + "loss": 2.7182, "step": 268200 }, { "epoch": 0.89, - "learning_rate": 5.584617676759316e-06, - "loss": 2.7337, + "learning_rate": 5.571967451457034e-06, + "loss": 2.7222, "step": 268300 }, { "epoch": 0.89, - "learning_rate": 5.568063303921732e-06, - "loss": 2.7283, + "learning_rate": 5.555408363664063e-06, + "loss": 2.7348, "step": 268400 }, { "epoch": 0.89, - "learning_rate": 5.551508931084146e-06, - "loss": 2.7433, + "learning_rate": 5.538849275871092e-06, + "loss": 2.7245, "step": 268500 }, { "epoch": 0.89, - "learning_rate": 5.534954558246561e-06, - "loss": 2.7405, + "learning_rate": 5.522290188078119e-06, + "loss": 2.7349, "step": 268600 }, { "epoch": 0.89, - "learning_rate": 5.518400185408976e-06, - "loss": 2.7498, + "learning_rate": 5.505731100285148e-06, + "loss": 2.7358, "step": 268700 }, { "epoch": 0.89, - "learning_rate": 5.501845812571392e-06, - "loss": 2.7533, + "learning_rate": 5.489172012492176e-06, + "loss": 2.7235, "step": 268800 }, { "epoch": 0.89, - "learning_rate": 5.485291439733806e-06, - "loss": 2.7439, + "learning_rate": 5.472612924699204e-06, + "loss": 2.7184, "step": 268900 }, { "epoch": 0.89, - "learning_rate": 5.46873706689622e-06, - "loss": 2.7233, + "learning_rate": 5.456053836906233e-06, + "loss": 2.7291, "step": 269000 }, { "epoch": 0.89, - "learning_rate": 5.452182694058636e-06, - "loss": 2.7346, + "learning_rate": 5.439494749113261e-06, + "loss": 2.7339, "step": 269100 }, { "epoch": 0.89, - "learning_rate": 5.435628321221051e-06, - "loss": 2.7388, + "learning_rate": 5.422935661320289e-06, + "loss": 2.7224, "step": 269200 }, { "epoch": 0.89, - "learning_rate": 5.419073948383466e-06, - "loss": 2.7291, + "learning_rate": 5.406376573527318e-06, + "loss": 2.7226, "step": 269300 }, { "epoch": 0.89, - "learning_rate": 5.4025195755458805e-06, - "loss": 2.7388, + "learning_rate": 5.3898174857343465e-06, + "loss": 2.728, "step": 269400 }, { "epoch": 0.89, - "learning_rate": 5.385965202708296e-06, - "loss": 2.7378, + "learning_rate": 5.373258397941375e-06, + "loss": 2.7079, "step": 269500 }, { "epoch": 0.89, - "learning_rate": 5.369410829870711e-06, - "loss": 2.7322, + "learning_rate": 5.356699310148403e-06, + "loss": 2.7412, "step": 269600 }, { "epoch": 0.89, - "learning_rate": 5.352856457033126e-06, - "loss": 2.7328, + "learning_rate": 5.340140222355431e-06, + "loss": 2.7284, "step": 269700 }, { "epoch": 0.89, - "learning_rate": 5.3363020841955405e-06, - "loss": 2.7428, + "learning_rate": 5.323581134562459e-06, + "loss": 2.7307, "step": 269800 }, { "epoch": 0.89, - "learning_rate": 5.319747711357956e-06, - "loss": 2.739, + "learning_rate": 5.307022046769488e-06, + "loss": 2.7229, "step": 269900 }, { "epoch": 0.89, - "learning_rate": 5.30319333852037e-06, - "loss": 2.7348, + "learning_rate": 5.2904629589765164e-06, + "loss": 2.7277, "step": 270000 }, { "epoch": 0.89, - "learning_rate": 5.286638965682785e-06, - "loss": 2.7324, + "learning_rate": 5.273903871183544e-06, + "loss": 2.7349, "step": 270100 }, { "epoch": 0.89, - "learning_rate": 5.2700845928452006e-06, - "loss": 2.7443, + "learning_rate": 5.257344783390573e-06, + "loss": 2.7279, "step": 270200 }, { - "epoch": 0.89, - "learning_rate": 5.253530220007615e-06, - "loss": 2.7262, + "epoch": 0.9, + "learning_rate": 5.240785695597601e-06, + "loss": 2.7321, "step": 270300 }, { "epoch": 0.9, - "learning_rate": 5.23697584717003e-06, - "loss": 2.7411, + "learning_rate": 5.22422660780463e-06, + "loss": 2.7224, "step": 270400 }, { "epoch": 0.9, - "learning_rate": 5.220421474332445e-06, - "loss": 2.7476, + "learning_rate": 5.207667520011658e-06, + "loss": 2.7141, "step": 270500 }, { "epoch": 0.9, - "learning_rate": 5.203867101494861e-06, - "loss": 2.7523, + "learning_rate": 5.1911084322186855e-06, + "loss": 2.732, "step": 270600 }, { "epoch": 0.9, - "learning_rate": 5.187312728657275e-06, - "loss": 2.7257, + "learning_rate": 5.174549344425714e-06, + "loss": 2.7396, "step": 270700 }, { "epoch": 0.9, - "learning_rate": 5.17075835581969e-06, - "loss": 2.7373, + "learning_rate": 5.157990256632743e-06, + "loss": 2.731, "step": 270800 }, { "epoch": 0.9, - "learning_rate": 5.154203982982105e-06, - "loss": 2.7168, + "learning_rate": 5.141431168839771e-06, + "loss": 2.7335, "step": 270900 }, { "epoch": 0.9, - "learning_rate": 5.137649610144521e-06, - "loss": 2.7335, + "learning_rate": 5.1248720810468e-06, + "loss": 2.7279, "step": 271000 }, { "epoch": 0.9, - "learning_rate": 5.121095237306935e-06, - "loss": 2.7365, + "learning_rate": 5.108312993253828e-06, + "loss": 2.7192, "step": 271100 }, { "epoch": 0.9, - "learning_rate": 5.1045408644693494e-06, - "loss": 2.747, + "learning_rate": 5.091753905460856e-06, + "loss": 2.7251, "step": 271200 }, { "epoch": 0.9, - "learning_rate": 5.087986491631764e-06, - "loss": 2.7509, + "learning_rate": 5.075194817667885e-06, + "loss": 2.7374, "step": 271300 }, { "epoch": 0.9, - "learning_rate": 5.07143211879418e-06, - "loss": 2.7305, + "learning_rate": 5.058635729874913e-06, + "loss": 2.7258, "step": 271400 }, { "epoch": 0.9, - "learning_rate": 5.054877745956595e-06, - "loss": 2.7419, + "learning_rate": 5.042076642081941e-06, + "loss": 2.7273, "step": 271500 }, { "epoch": 0.9, - "learning_rate": 5.0383233731190095e-06, - "loss": 2.7257, + "learning_rate": 5.025517554288969e-06, + "loss": 2.7369, "step": 271600 }, { "epoch": 0.9, - "learning_rate": 5.021769000281424e-06, - "loss": 2.7411, + "learning_rate": 5.008958466495998e-06, + "loss": 2.7309, "step": 271700 }, { "epoch": 0.9, - "learning_rate": 5.00521462744384e-06, - "loss": 2.7345, + "learning_rate": 4.992399378703026e-06, + "loss": 2.7458, "step": 271800 }, { "epoch": 0.9, - "learning_rate": 4.988660254606255e-06, - "loss": 2.7409, + "learning_rate": 4.975840290910055e-06, + "loss": 2.735, "step": 271900 }, { "epoch": 0.9, - "learning_rate": 4.9721058817686695e-06, - "loss": 2.7297, + "learning_rate": 4.9592812031170835e-06, + "loss": 2.7119, "step": 272000 }, { "epoch": 0.9, - "learning_rate": 4.955551508931084e-06, - "loss": 2.7472, + "learning_rate": 4.942722115324111e-06, + "loss": 2.7245, "step": 272100 }, { "epoch": 0.9, - "learning_rate": 4.938997136093499e-06, - "loss": 2.7425, + "learning_rate": 4.92616302753114e-06, + "loss": 2.7267, "step": 272200 }, { "epoch": 0.9, - "learning_rate": 4.922442763255914e-06, - "loss": 2.736, + "learning_rate": 4.909603939738168e-06, + "loss": 2.7273, "step": 272300 }, { "epoch": 0.9, - "learning_rate": 4.905888390418329e-06, - "loss": 2.7397, + "learning_rate": 4.893044851945196e-06, + "loss": 2.73, "step": 272400 }, { "epoch": 0.9, - "learning_rate": 4.889334017580744e-06, - "loss": 2.7353, + "learning_rate": 4.876485764152225e-06, + "loss": 2.7165, "step": 272500 }, { "epoch": 0.9, - "learning_rate": 4.872779644743159e-06, - "loss": 2.7474, + "learning_rate": 4.8599266763592525e-06, + "loss": 2.7229, "step": 272600 }, { "epoch": 0.9, - "learning_rate": 4.856225271905574e-06, - "loss": 2.7411, + "learning_rate": 4.843367588566281e-06, + "loss": 2.7099, "step": 272700 }, { "epoch": 0.9, - "learning_rate": 4.839670899067989e-06, - "loss": 2.7318, + "learning_rate": 4.82680850077331e-06, + "loss": 2.7166, "step": 272800 }, { "epoch": 0.9, - "learning_rate": 4.8231165262304045e-06, - "loss": 2.7461, + "learning_rate": 4.810249412980338e-06, + "loss": 2.7251, "step": 272900 }, { "epoch": 0.9, - "learning_rate": 4.806562153392819e-06, - "loss": 2.7363, + "learning_rate": 4.793690325187367e-06, + "loss": 2.7302, "step": 273000 }, { "epoch": 0.9, - "learning_rate": 4.790007780555234e-06, - "loss": 2.7459, + "learning_rate": 4.777131237394395e-06, + "loss": 2.7249, "step": 273100 }, { "epoch": 0.9, - "learning_rate": 4.773453407717649e-06, - "loss": 2.7409, + "learning_rate": 4.7605721496014225e-06, + "loss": 2.7428, "step": 273200 }, { - "epoch": 0.9, - "learning_rate": 4.756899034880064e-06, - "loss": 2.7221, + "epoch": 0.91, + "learning_rate": 4.744013061808451e-06, + "loss": 2.7258, "step": 273300 }, { "epoch": 0.91, - "learning_rate": 4.7403446620424785e-06, - "loss": 2.7421, + "learning_rate": 4.72745397401548e-06, + "loss": 2.7171, "step": 273400 }, { "epoch": 0.91, - "learning_rate": 4.723790289204893e-06, - "loss": 2.7324, + "learning_rate": 4.710894886222508e-06, + "loss": 2.7237, "step": 273500 }, { "epoch": 0.91, - "learning_rate": 4.707235916367309e-06, - "loss": 2.7382, + "learning_rate": 4.694335798429536e-06, + "loss": 2.7271, "step": 273600 }, { "epoch": 0.91, - "learning_rate": 4.690681543529724e-06, - "loss": 2.7275, + "learning_rate": 4.677776710636565e-06, + "loss": 2.7188, "step": 273700 }, { "epoch": 0.91, - "learning_rate": 4.6741271706921385e-06, - "loss": 2.7431, + "learning_rate": 4.661217622843593e-06, + "loss": 2.7193, "step": 273800 }, { "epoch": 0.91, - "learning_rate": 4.657572797854553e-06, - "loss": 2.7302, + "learning_rate": 4.644658535050622e-06, + "loss": 2.7333, "step": 273900 }, { "epoch": 0.91, - "learning_rate": 4.641018425016969e-06, - "loss": 2.7286, + "learning_rate": 4.62809944725765e-06, + "loss": 2.7178, "step": 274000 }, { "epoch": 0.91, - "learning_rate": 4.624464052179384e-06, - "loss": 2.7352, + "learning_rate": 4.611540359464677e-06, + "loss": 2.7317, "step": 274100 }, { "epoch": 0.91, - "learning_rate": 4.6079096793417986e-06, - "loss": 2.7317, + "learning_rate": 4.594981271671706e-06, + "loss": 2.713, "step": 274200 }, { "epoch": 0.91, - "learning_rate": 4.591355306504213e-06, - "loss": 2.7463, + "learning_rate": 4.578422183878735e-06, + "loss": 2.7263, "step": 274300 }, { "epoch": 0.91, - "learning_rate": 4.574800933666628e-06, - "loss": 2.7348, + "learning_rate": 4.561863096085763e-06, + "loss": 2.7143, "step": 274400 }, { "epoch": 0.91, - "learning_rate": 4.558246560829043e-06, - "loss": 2.735, + "learning_rate": 4.545304008292792e-06, + "loss": 2.7164, "step": 274500 }, { "epoch": 0.91, - "learning_rate": 4.541692187991458e-06, - "loss": 2.7315, + "learning_rate": 4.52874492049982e-06, + "loss": 2.7314, "step": 274600 }, { "epoch": 0.91, - "learning_rate": 4.525137815153873e-06, - "loss": 2.7368, + "learning_rate": 4.512185832706848e-06, + "loss": 2.7256, "step": 274700 }, { "epoch": 0.91, - "learning_rate": 4.508583442316288e-06, - "loss": 2.7232, + "learning_rate": 4.495626744913877e-06, + "loss": 2.7266, "step": 274800 }, { "epoch": 0.91, - "learning_rate": 4.492029069478703e-06, - "loss": 2.7443, + "learning_rate": 4.4790676571209045e-06, + "loss": 2.7285, "step": 274900 }, { "epoch": 0.91, - "learning_rate": 4.475474696641118e-06, - "loss": 2.7352, + "learning_rate": 4.462508569327933e-06, + "loss": 2.7193, "step": 275000 }, { "epoch": 0.91, - "learning_rate": 4.458920323803533e-06, - "loss": 2.723, + "learning_rate": 4.445949481534961e-06, + "loss": 2.7338, "step": 275100 }, { "epoch": 0.91, - "learning_rate": 4.442365950965948e-06, - "loss": 2.7207, + "learning_rate": 4.4293903937419895e-06, + "loss": 2.7124, "step": 275200 }, { "epoch": 0.91, - "learning_rate": 4.425811578128363e-06, - "loss": 2.7361, + "learning_rate": 4.412831305949018e-06, + "loss": 2.7237, "step": 275300 }, { "epoch": 0.91, - "learning_rate": 4.409257205290778e-06, - "loss": 2.7261, + "learning_rate": 4.396272218156047e-06, + "loss": 2.7338, "step": 275400 }, { "epoch": 0.91, - "learning_rate": 4.392702832453193e-06, - "loss": 2.7444, + "learning_rate": 4.379713130363075e-06, + "loss": 2.7274, "step": 275500 }, { "epoch": 0.91, - "learning_rate": 4.3761484596156075e-06, - "loss": 2.7391, + "learning_rate": 4.363154042570103e-06, + "loss": 2.7306, "step": 275600 }, { "epoch": 0.91, - "learning_rate": 4.359594086778022e-06, - "loss": 2.7396, + "learning_rate": 4.346594954777132e-06, + "loss": 2.7527, "step": 275700 }, { "epoch": 0.91, - "learning_rate": 4.343039713940437e-06, - "loss": 2.7344, + "learning_rate": 4.3300358669841595e-06, + "loss": 2.7208, "step": 275800 }, { "epoch": 0.91, - "learning_rate": 4.326485341102853e-06, - "loss": 2.728, + "learning_rate": 4.313476779191188e-06, + "loss": 2.7245, "step": 275900 }, { "epoch": 0.91, - "learning_rate": 4.3099309682652675e-06, - "loss": 2.7429, + "learning_rate": 4.296917691398217e-06, + "loss": 2.7296, "step": 276000 }, { "epoch": 0.91, - "learning_rate": 4.293376595427682e-06, - "loss": 2.742, + "learning_rate": 4.2803586036052444e-06, + "loss": 2.7225, "step": 276100 }, { "epoch": 0.91, - "learning_rate": 4.276822222590097e-06, - "loss": 2.7351, + "learning_rate": 4.263799515812273e-06, + "loss": 2.735, "step": 276200 }, { - "epoch": 0.91, - "learning_rate": 4.260267849752513e-06, - "loss": 2.7393, + "epoch": 0.92, + "learning_rate": 4.247240428019302e-06, + "loss": 2.7253, "step": 276300 }, { "epoch": 0.92, - "learning_rate": 4.243713476914928e-06, - "loss": 2.7563, + "learning_rate": 4.23068134022633e-06, + "loss": 2.7186, "step": 276400 }, { "epoch": 0.92, - "learning_rate": 4.227159104077342e-06, - "loss": 2.7337, + "learning_rate": 4.214122252433359e-06, + "loss": 2.719, "step": 276500 }, { "epoch": 0.92, - "learning_rate": 4.210604731239757e-06, - "loss": 2.726, + "learning_rate": 4.197563164640387e-06, + "loss": 2.7098, "step": 276600 }, { "epoch": 0.92, - "learning_rate": 4.194050358402172e-06, - "loss": 2.7363, + "learning_rate": 4.181004076847414e-06, + "loss": 2.7251, "step": 276700 }, { "epoch": 0.92, - "learning_rate": 4.177495985564587e-06, - "loss": 2.7425, + "learning_rate": 4.164444989054443e-06, + "loss": 2.7436, "step": 276800 }, { "epoch": 0.92, - "learning_rate": 4.160941612727002e-06, - "loss": 2.7301, + "learning_rate": 4.1478859012614716e-06, + "loss": 2.7442, "step": 276900 }, { "epoch": 0.92, - "learning_rate": 4.144387239889417e-06, - "loss": 2.7295, + "learning_rate": 4.1313268134685e-06, + "loss": 2.7355, "step": 277000 }, { "epoch": 0.92, - "learning_rate": 4.127832867051832e-06, - "loss": 2.7122, + "learning_rate": 4.114767725675528e-06, + "loss": 2.728, "step": 277100 }, { "epoch": 0.92, - "learning_rate": 4.111278494214247e-06, - "loss": 2.7384, + "learning_rate": 4.0982086378825565e-06, + "loss": 2.7312, "step": 277200 }, { "epoch": 0.92, - "learning_rate": 4.094724121376662e-06, - "loss": 2.7365, + "learning_rate": 4.081649550089585e-06, + "loss": 2.7312, "step": 277300 }, { "epoch": 0.92, - "learning_rate": 4.078169748539077e-06, - "loss": 2.7418, + "learning_rate": 4.065090462296614e-06, + "loss": 2.7253, "step": 277400 }, { "epoch": 0.92, - "learning_rate": 4.061615375701492e-06, - "loss": 2.7417, + "learning_rate": 4.0485313745036415e-06, + "loss": 2.7228, "step": 277500 }, { "epoch": 0.92, - "learning_rate": 4.045061002863907e-06, - "loss": 2.7324, + "learning_rate": 4.031972286710669e-06, + "loss": 2.7449, "step": 277600 }, { "epoch": 0.92, - "learning_rate": 4.028506630026322e-06, - "loss": 2.7328, + "learning_rate": 4.015413198917698e-06, + "loss": 2.7197, "step": 277700 }, { "epoch": 0.92, - "learning_rate": 4.0119522571887365e-06, - "loss": 2.7419, + "learning_rate": 3.9988541111247265e-06, + "loss": 2.7268, "step": 277800 }, { "epoch": 0.92, - "learning_rate": 3.995397884351151e-06, - "loss": 2.7172, + "learning_rate": 3.982295023331755e-06, + "loss": 2.7166, "step": 277900 }, { "epoch": 0.92, - "learning_rate": 3.978843511513566e-06, - "loss": 2.7298, + "learning_rate": 3.965735935538784e-06, + "loss": 2.7307, "step": 278000 }, { "epoch": 0.92, - "learning_rate": 3.962289138675982e-06, - "loss": 2.7424, + "learning_rate": 3.9491768477458115e-06, + "loss": 2.7178, "step": 278100 }, { "epoch": 0.92, - "learning_rate": 3.945734765838397e-06, - "loss": 2.7266, + "learning_rate": 3.93261775995284e-06, + "loss": 2.7128, "step": 278200 }, { "epoch": 0.92, - "learning_rate": 3.929180393000811e-06, - "loss": 2.7334, + "learning_rate": 3.916058672159869e-06, + "loss": 2.7134, "step": 278300 }, { "epoch": 0.92, - "learning_rate": 3.912626020163226e-06, - "loss": 2.7369, + "learning_rate": 3.899499584366896e-06, + "loss": 2.7318, "step": 278400 }, { "epoch": 0.92, - "learning_rate": 3.896071647325642e-06, - "loss": 2.7563, + "learning_rate": 3.882940496573925e-06, + "loss": 2.7211, "step": 278500 }, { "epoch": 0.92, - "learning_rate": 3.879517274488057e-06, - "loss": 2.745, + "learning_rate": 3.866381408780953e-06, + "loss": 2.7183, "step": 278600 }, { "epoch": 0.92, - "learning_rate": 3.8629629016504714e-06, - "loss": 2.7261, + "learning_rate": 3.849822320987981e-06, + "loss": 2.7227, "step": 278700 }, { "epoch": 0.92, - "learning_rate": 3.846408528812885e-06, - "loss": 2.7249, + "learning_rate": 3.83326323319501e-06, + "loss": 2.7284, "step": 278800 }, { "epoch": 0.92, - "learning_rate": 3.829854155975301e-06, - "loss": 2.7397, + "learning_rate": 3.816704145402039e-06, + "loss": 2.7242, "step": 278900 }, { "epoch": 0.92, - "learning_rate": 3.813299783137716e-06, - "loss": 2.7258, + "learning_rate": 3.8001450576090668e-06, + "loss": 2.7107, "step": 279000 }, { "epoch": 0.92, - "learning_rate": 3.7967454103001306e-06, - "loss": 2.7307, + "learning_rate": 3.7835859698160945e-06, + "loss": 2.7204, "step": 279100 }, { "epoch": 0.92, - "learning_rate": 3.780191037462546e-06, - "loss": 2.7378, + "learning_rate": 3.767026882023123e-06, + "loss": 2.7288, "step": 279200 }, { "epoch": 0.92, - "learning_rate": 3.7636366646249607e-06, - "loss": 2.7424, + "learning_rate": 3.7504677942301518e-06, + "loss": 2.7214, "step": 279300 }, { "epoch": 0.93, - "learning_rate": 3.747082291787376e-06, - "loss": 2.7412, + "learning_rate": 3.73390870643718e-06, + "loss": 2.7142, "step": 279400 }, { "epoch": 0.93, - "learning_rate": 3.7305279189497907e-06, - "loss": 2.7267, + "learning_rate": 3.7173496186442085e-06, + "loss": 2.7304, "step": 279500 }, { "epoch": 0.93, - "learning_rate": 3.713973546112206e-06, - "loss": 2.7356, + "learning_rate": 3.7007905308512363e-06, + "loss": 2.7156, "step": 279600 }, { "epoch": 0.93, - "learning_rate": 3.6974191732746207e-06, - "loss": 2.7257, + "learning_rate": 3.684231443058265e-06, + "loss": 2.7212, "step": 279700 }, { "epoch": 0.93, - "learning_rate": 3.680864800437036e-06, - "loss": 2.737, + "learning_rate": 3.6676723552652935e-06, + "loss": 2.7351, "step": 279800 }, { "epoch": 0.93, - "learning_rate": 3.6643104275994503e-06, - "loss": 2.7216, + "learning_rate": 3.6511132674723217e-06, + "loss": 2.7355, "step": 279900 }, { "epoch": 0.93, - "learning_rate": 3.647756054761865e-06, - "loss": 2.7354, + "learning_rate": 3.6345541796793503e-06, + "loss": 2.7245, "step": 280000 }, { "epoch": 0.93, - "learning_rate": 3.6312016819242804e-06, - "loss": 2.729, + "learning_rate": 3.617995091886378e-06, + "loss": 2.7301, "step": 280100 }, { "epoch": 0.93, - "learning_rate": 3.614647309086695e-06, - "loss": 2.7448, + "learning_rate": 3.6014360040934067e-06, + "loss": 2.7221, "step": 280200 }, { "epoch": 0.93, - "learning_rate": 3.5980929362491104e-06, - "loss": 2.7415, + "learning_rate": 3.584876916300435e-06, + "loss": 2.7412, "step": 280300 }, { "epoch": 0.93, - "learning_rate": 3.581538563411525e-06, - "loss": 2.7357, + "learning_rate": 3.5683178285074634e-06, + "loss": 2.7349, "step": 280400 }, { "epoch": 0.93, - "learning_rate": 3.5649841905739404e-06, - "loss": 2.7372, + "learning_rate": 3.551758740714492e-06, + "loss": 2.7274, "step": 280500 }, { "epoch": 0.93, - "learning_rate": 3.5484298177363552e-06, - "loss": 2.7216, + "learning_rate": 3.53519965292152e-06, + "loss": 2.7227, "step": 280600 }, { "epoch": 0.93, - "learning_rate": 3.5318754448987704e-06, - "loss": 2.7203, + "learning_rate": 3.5186405651285484e-06, + "loss": 2.7299, "step": 280700 }, { "epoch": 0.93, - "learning_rate": 3.5153210720611852e-06, - "loss": 2.7401, + "learning_rate": 3.5020814773355766e-06, + "loss": 2.7289, "step": 280800 }, { "epoch": 0.93, - "learning_rate": 3.4987666992236005e-06, - "loss": 2.7221, + "learning_rate": 3.485522389542605e-06, + "loss": 2.7145, "step": 280900 }, { "epoch": 0.93, - "learning_rate": 3.482212326386015e-06, - "loss": 2.736, + "learning_rate": 3.468963301749634e-06, + "loss": 2.7389, "step": 281000 }, { "epoch": 0.93, - "learning_rate": 3.4656579535484297e-06, - "loss": 2.7281, + "learning_rate": 3.4524042139566616e-06, + "loss": 2.7196, "step": 281100 }, { "epoch": 0.93, - "learning_rate": 3.449103580710845e-06, - "loss": 2.7203, + "learning_rate": 3.4358451261636898e-06, + "loss": 2.7207, "step": 281200 }, { "epoch": 0.93, - "learning_rate": 3.4325492078732597e-06, - "loss": 2.7379, + "learning_rate": 3.4192860383707184e-06, + "loss": 2.7045, "step": 281300 }, { "epoch": 0.93, - "learning_rate": 3.415994835035675e-06, - "loss": 2.7164, + "learning_rate": 3.402726950577747e-06, + "loss": 2.7072, "step": 281400 }, { "epoch": 0.93, - "learning_rate": 3.3994404621980897e-06, - "loss": 2.7324, + "learning_rate": 3.3861678627847756e-06, + "loss": 2.7164, "step": 281500 }, { "epoch": 0.93, - "learning_rate": 3.382886089360505e-06, - "loss": 2.7403, + "learning_rate": 3.3696087749918033e-06, + "loss": 2.7163, "step": 281600 }, { "epoch": 0.93, - "learning_rate": 3.3663317165229197e-06, - "loss": 2.7298, + "learning_rate": 3.3530496871988315e-06, + "loss": 2.7205, "step": 281700 }, { "epoch": 0.93, - "learning_rate": 3.349777343685335e-06, - "loss": 2.7217, + "learning_rate": 3.33649059940586e-06, + "loss": 2.7341, "step": 281800 }, { "epoch": 0.93, - "learning_rate": 3.3332229708477498e-06, - "loss": 2.7271, + "learning_rate": 3.3199315116128887e-06, + "loss": 2.714, "step": 281900 }, { "epoch": 0.93, - "learning_rate": 3.316668598010165e-06, - "loss": 2.731, + "learning_rate": 3.303372423819917e-06, + "loss": 2.7277, "step": 282000 }, { "epoch": 0.93, - "learning_rate": 3.3001142251725794e-06, - "loss": 2.7471, + "learning_rate": 3.2868133360269447e-06, + "loss": 2.7128, "step": 282100 }, { "epoch": 0.93, - "learning_rate": 3.283559852334994e-06, - "loss": 2.742, + "learning_rate": 3.2702542482339733e-06, + "loss": 2.7217, "step": 282200 }, { "epoch": 0.93, - "learning_rate": 3.2670054794974094e-06, - "loss": 2.7289, + "learning_rate": 3.253695160441002e-06, + "loss": 2.7212, "step": 282300 }, { - "epoch": 0.93, - "learning_rate": 3.250451106659824e-06, - "loss": 2.7371, + "epoch": 0.94, + "learning_rate": 3.2371360726480305e-06, + "loss": 2.7114, "step": 282400 }, { "epoch": 0.94, - "learning_rate": 3.2338967338222394e-06, - "loss": 2.7206, + "learning_rate": 3.2205769848550587e-06, + "loss": 2.7221, "step": 282500 }, { "epoch": 0.94, - "learning_rate": 3.2173423609846542e-06, - "loss": 2.7245, + "learning_rate": 3.2040178970620864e-06, + "loss": 2.7163, "step": 282600 }, { "epoch": 0.94, - "learning_rate": 3.2007879881470694e-06, - "loss": 2.7303, + "learning_rate": 3.187458809269115e-06, + "loss": 2.731, "step": 282700 }, { "epoch": 0.94, - "learning_rate": 3.1842336153094842e-06, - "loss": 2.7485, + "learning_rate": 3.1708997214761436e-06, + "loss": 2.7097, "step": 282800 }, { "epoch": 0.94, - "learning_rate": 3.1676792424718995e-06, - "loss": 2.7394, + "learning_rate": 3.154340633683172e-06, + "loss": 2.7065, "step": 282900 }, { "epoch": 0.94, - "learning_rate": 3.1511248696343143e-06, - "loss": 2.7414, + "learning_rate": 3.1377815458902004e-06, + "loss": 2.7314, "step": 283000 }, { "epoch": 0.94, - "learning_rate": 3.1345704967967295e-06, - "loss": 2.7328, + "learning_rate": 3.1212224580972286e-06, + "loss": 2.718, "step": 283100 }, { "epoch": 0.94, - "learning_rate": 3.118016123959144e-06, - "loss": 2.7281, + "learning_rate": 3.1046633703042568e-06, + "loss": 2.716, "step": 283200 }, { "epoch": 0.94, - "learning_rate": 3.101461751121559e-06, - "loss": 2.7444, + "learning_rate": 3.0881042825112854e-06, + "loss": 2.7145, "step": 283300 }, { "epoch": 0.94, - "learning_rate": 3.084907378283974e-06, - "loss": 2.7386, + "learning_rate": 3.0715451947183136e-06, + "loss": 2.7253, "step": 283400 }, { "epoch": 0.94, - "learning_rate": 3.0683530054463887e-06, - "loss": 2.743, + "learning_rate": 3.0549861069253417e-06, + "loss": 2.7139, "step": 283500 }, { "epoch": 0.94, - "learning_rate": 3.0517986326088035e-06, - "loss": 2.7329, + "learning_rate": 3.0384270191323704e-06, + "loss": 2.7446, "step": 283600 }, { "epoch": 0.94, - "learning_rate": 3.0352442597712187e-06, - "loss": 2.7217, + "learning_rate": 3.0218679313393985e-06, + "loss": 2.7274, "step": 283700 }, { "epoch": 0.94, - "learning_rate": 3.0186898869336335e-06, - "loss": 2.7233, + "learning_rate": 3.0053088435464267e-06, + "loss": 2.7205, "step": 283800 }, { "epoch": 0.94, - "learning_rate": 3.0021355140960488e-06, - "loss": 2.7362, + "learning_rate": 2.9887497557534553e-06, + "loss": 2.7416, "step": 283900 }, { "epoch": 0.94, - "learning_rate": 2.9855811412584636e-06, - "loss": 2.7451, + "learning_rate": 2.9721906679604835e-06, + "loss": 2.7285, "step": 284000 }, { "epoch": 0.94, - "learning_rate": 2.9690267684208784e-06, - "loss": 2.7458, + "learning_rate": 2.955631580167512e-06, + "loss": 2.7306, "step": 284100 }, { "epoch": 0.94, - "learning_rate": 2.9524723955832936e-06, - "loss": 2.727, + "learning_rate": 2.9390724923745403e-06, + "loss": 2.7156, "step": 284200 }, { "epoch": 0.94, - "learning_rate": 2.9359180227457084e-06, - "loss": 2.7372, + "learning_rate": 2.9225134045815685e-06, + "loss": 2.7181, "step": 284300 }, { "epoch": 0.94, - "learning_rate": 2.9193636499081236e-06, - "loss": 2.7383, + "learning_rate": 2.905954316788597e-06, + "loss": 2.7271, "step": 284400 }, { "epoch": 0.94, - "learning_rate": 2.9028092770705384e-06, - "loss": 2.7412, + "learning_rate": 2.8893952289956253e-06, + "loss": 2.7315, "step": 284500 }, { "epoch": 0.94, - "learning_rate": 2.8862549042329532e-06, - "loss": 2.7317, + "learning_rate": 2.872836141202654e-06, + "loss": 2.7206, "step": 284600 }, { "epoch": 0.94, - "learning_rate": 2.869700531395368e-06, - "loss": 2.7281, + "learning_rate": 2.8562770534096816e-06, + "loss": 2.7288, "step": 284700 }, { "epoch": 0.94, - "learning_rate": 2.8531461585577833e-06, - "loss": 2.7397, + "learning_rate": 2.8397179656167102e-06, + "loss": 2.7197, "step": 284800 }, { "epoch": 0.94, - "learning_rate": 2.836591785720198e-06, - "loss": 2.7326, + "learning_rate": 2.823158877823739e-06, + "loss": 2.7203, "step": 284900 }, { "epoch": 0.94, - "learning_rate": 2.8200374128826133e-06, - "loss": 2.7373, + "learning_rate": 2.806599790030767e-06, + "loss": 2.7156, "step": 285000 }, { "epoch": 0.94, - "learning_rate": 2.8034830400450277e-06, - "loss": 2.7374, + "learning_rate": 2.790040702237795e-06, + "loss": 2.7297, "step": 285100 }, { "epoch": 0.94, - "learning_rate": 2.786928667207443e-06, - "loss": 2.7386, + "learning_rate": 2.7734816144448234e-06, + "loss": 2.7292, "step": 285200 }, { "epoch": 0.94, - "learning_rate": 2.7703742943698577e-06, - "loss": 2.7408, + "learning_rate": 2.756922526651852e-06, + "loss": 2.7221, "step": 285300 }, { - "epoch": 0.94, - "learning_rate": 2.753819921532273e-06, - "loss": 2.7174, + "epoch": 0.95, + "learning_rate": 2.7403634388588806e-06, + "loss": 2.7198, "step": 285400 }, { "epoch": 0.95, - "learning_rate": 2.7372655486946877e-06, - "loss": 2.7216, + "learning_rate": 2.7238043510659088e-06, + "loss": 2.7139, "step": 285500 }, { "epoch": 0.95, - "learning_rate": 2.720711175857103e-06, - "loss": 2.7304, + "learning_rate": 2.707245263272937e-06, + "loss": 2.7316, "step": 285600 }, { "epoch": 0.95, - "learning_rate": 2.7041568030195177e-06, - "loss": 2.7313, + "learning_rate": 2.690686175479965e-06, + "loss": 2.7253, "step": 285700 }, { "epoch": 0.95, - "learning_rate": 2.6876024301819325e-06, - "loss": 2.7299, + "learning_rate": 2.6741270876869937e-06, + "loss": 2.7337, "step": 285800 }, { "epoch": 0.95, - "learning_rate": 2.6710480573443478e-06, - "loss": 2.739, + "learning_rate": 2.657567999894022e-06, + "loss": 2.728, "step": 285900 }, { "epoch": 0.95, - "learning_rate": 2.6544936845067626e-06, - "loss": 2.7279, + "learning_rate": 2.64100891210105e-06, + "loss": 2.7149, "step": 286000 }, { "epoch": 0.95, - "learning_rate": 2.637939311669178e-06, - "loss": 2.7419, + "learning_rate": 2.6244498243080787e-06, + "loss": 2.73, "step": 286100 }, { "epoch": 0.95, - "learning_rate": 2.621384938831592e-06, - "loss": 2.7352, + "learning_rate": 2.607890736515107e-06, + "loss": 2.7245, "step": 286200 }, { "epoch": 0.95, - "learning_rate": 2.6048305659940074e-06, - "loss": 2.7313, + "learning_rate": 2.5913316487221355e-06, + "loss": 2.7081, "step": 286300 }, { "epoch": 0.95, - "learning_rate": 2.588276193156422e-06, - "loss": 2.7266, + "learning_rate": 2.5747725609291637e-06, + "loss": 2.7265, "step": 286400 }, { "epoch": 0.95, - "learning_rate": 2.5717218203188374e-06, - "loss": 2.7352, + "learning_rate": 2.558213473136192e-06, + "loss": 2.7257, "step": 286500 }, { "epoch": 0.95, - "learning_rate": 2.5551674474812522e-06, - "loss": 2.7312, + "learning_rate": 2.5416543853432205e-06, + "loss": 2.7335, "step": 286600 }, { "epoch": 0.95, - "learning_rate": 2.5386130746436675e-06, - "loss": 2.7432, + "learning_rate": 2.5250952975502487e-06, + "loss": 2.7297, "step": 286700 }, { "epoch": 0.95, - "learning_rate": 2.5220587018060823e-06, - "loss": 2.7407, + "learning_rate": 2.508536209757277e-06, + "loss": 2.719, "step": 286800 }, { "epoch": 0.95, - "learning_rate": 2.505504328968497e-06, - "loss": 2.7404, + "learning_rate": 2.4919771219643054e-06, + "loss": 2.7298, "step": 286900 }, { "epoch": 0.95, - "learning_rate": 2.4889499561309123e-06, - "loss": 2.7352, + "learning_rate": 2.4754180341713336e-06, + "loss": 2.7118, "step": 287000 }, { "epoch": 0.95, - "learning_rate": 2.472395583293327e-06, - "loss": 2.7441, + "learning_rate": 2.4588589463783622e-06, + "loss": 2.7186, "step": 287100 }, { "epoch": 0.95, - "learning_rate": 2.4558412104557423e-06, - "loss": 2.7291, + "learning_rate": 2.4422998585853904e-06, + "loss": 2.7432, "step": 287200 }, { "epoch": 0.95, - "learning_rate": 2.4392868376181567e-06, - "loss": 2.7308, + "learning_rate": 2.4257407707924186e-06, + "loss": 2.7148, "step": 287300 }, { "epoch": 0.95, - "learning_rate": 2.422732464780572e-06, - "loss": 2.7368, + "learning_rate": 2.409181682999447e-06, + "loss": 2.7257, "step": 287400 }, { "epoch": 0.95, - "learning_rate": 2.4061780919429867e-06, - "loss": 2.7406, + "learning_rate": 2.3926225952064754e-06, + "loss": 2.7234, "step": 287500 }, { "epoch": 0.95, - "learning_rate": 2.389623719105402e-06, - "loss": 2.7285, + "learning_rate": 2.376063507413504e-06, + "loss": 2.7229, "step": 287600 }, { "epoch": 0.95, - "learning_rate": 2.3730693462678167e-06, - "loss": 2.7284, + "learning_rate": 2.3595044196205317e-06, + "loss": 2.7151, "step": 287700 }, { "epoch": 0.95, - "learning_rate": 2.356514973430232e-06, - "loss": 2.7394, + "learning_rate": 2.3429453318275604e-06, + "loss": 2.7244, "step": 287800 }, { "epoch": 0.95, - "learning_rate": 2.3399606005926464e-06, - "loss": 2.731, + "learning_rate": 2.3263862440345885e-06, + "loss": 2.7171, "step": 287900 }, { "epoch": 0.95, - "learning_rate": 2.3234062277550616e-06, - "loss": 2.7311, + "learning_rate": 2.309827156241617e-06, + "loss": 2.7241, "step": 288000 }, { "epoch": 0.95, - "learning_rate": 2.3068518549174764e-06, - "loss": 2.7339, + "learning_rate": 2.2932680684486453e-06, + "loss": 2.7232, "step": 288100 }, { "epoch": 0.95, - "learning_rate": 2.2902974820798916e-06, - "loss": 2.7303, + "learning_rate": 2.2767089806556735e-06, + "loss": 2.7207, "step": 288200 }, { "epoch": 0.95, - "learning_rate": 2.2737431092423064e-06, - "loss": 2.7417, + "learning_rate": 2.260149892862702e-06, + "loss": 2.732, "step": 288300 }, { - "epoch": 0.95, - "learning_rate": 2.2571887364047216e-06, - "loss": 2.7297, + "epoch": 0.96, + "learning_rate": 2.2435908050697303e-06, + "loss": 2.712, "step": 288400 }, { "epoch": 0.96, - "learning_rate": 2.2406343635671364e-06, - "loss": 2.7391, + "learning_rate": 2.227031717276759e-06, + "loss": 2.7331, "step": 288500 }, { "epoch": 0.96, - "learning_rate": 2.2240799907295512e-06, - "loss": 2.7378, + "learning_rate": 2.210472629483787e-06, + "loss": 2.7276, "step": 288600 }, { "epoch": 0.96, - "learning_rate": 2.2075256178919665e-06, - "loss": 2.7321, + "learning_rate": 2.1939135416908153e-06, + "loss": 2.7387, "step": 288700 }, { "epoch": 0.96, - "learning_rate": 2.1909712450543813e-06, - "loss": 2.7319, + "learning_rate": 2.177354453897844e-06, + "loss": 2.7252, "step": 288800 }, { "epoch": 0.96, - "learning_rate": 2.1744168722167965e-06, - "loss": 2.7372, + "learning_rate": 2.160795366104872e-06, + "loss": 2.7242, "step": 288900 }, { "epoch": 0.96, - "learning_rate": 2.157862499379211e-06, - "loss": 2.7209, + "learning_rate": 2.1442362783119002e-06, + "loss": 2.7224, "step": 289000 }, { "epoch": 0.96, - "learning_rate": 2.141308126541626e-06, - "loss": 2.7321, + "learning_rate": 2.127677190518929e-06, + "loss": 2.7175, "step": 289100 }, { "epoch": 0.96, - "learning_rate": 2.124753753704041e-06, - "loss": 2.7389, + "learning_rate": 2.111118102725957e-06, + "loss": 2.72, "step": 289200 }, { "epoch": 0.96, - "learning_rate": 2.108199380866456e-06, - "loss": 2.7366, + "learning_rate": 2.0945590149329856e-06, + "loss": 2.7025, "step": 289300 }, { "epoch": 0.96, - "learning_rate": 2.091645008028871e-06, - "loss": 2.7324, + "learning_rate": 2.077999927140014e-06, + "loss": 2.713, "step": 289400 }, { "epoch": 0.96, - "learning_rate": 2.075090635191286e-06, - "loss": 2.7404, + "learning_rate": 2.061440839347042e-06, + "loss": 2.7141, "step": 289500 }, { "epoch": 0.96, - "learning_rate": 2.0585362623537005e-06, - "loss": 2.7326, + "learning_rate": 2.0448817515540706e-06, + "loss": 2.7224, "step": 289600 }, { "epoch": 0.96, - "learning_rate": 2.0419818895161158e-06, - "loss": 2.7395, + "learning_rate": 2.0283226637610988e-06, + "loss": 2.7317, "step": 289700 }, { "epoch": 0.96, - "learning_rate": 2.0254275166785306e-06, - "loss": 2.7369, + "learning_rate": 2.0117635759681274e-06, + "loss": 2.7207, "step": 289800 }, { "epoch": 0.96, - "learning_rate": 2.0088731438409458e-06, - "loss": 2.7126, + "learning_rate": 1.995204488175155e-06, + "loss": 2.7055, "step": 289900 }, { "epoch": 0.96, - "learning_rate": 1.9923187710033606e-06, - "loss": 2.7309, + "learning_rate": 1.9786454003821837e-06, + "loss": 2.7205, "step": 290000 }, { "epoch": 0.96, - "learning_rate": 1.9757643981657754e-06, - "loss": 2.7405, + "learning_rate": 1.9620863125892123e-06, + "loss": 2.7238, "step": 290100 }, { "epoch": 0.96, - "learning_rate": 1.9592100253281906e-06, - "loss": 2.7304, + "learning_rate": 1.9455272247962405e-06, + "loss": 2.7276, "step": 290200 }, { "epoch": 0.96, - "learning_rate": 1.9426556524906054e-06, - "loss": 2.7162, + "learning_rate": 1.9289681370032687e-06, + "loss": 2.7268, "step": 290300 }, { "epoch": 0.96, - "learning_rate": 1.9261012796530206e-06, - "loss": 2.7207, + "learning_rate": 1.912409049210297e-06, + "loss": 2.7161, "step": 290400 }, { "epoch": 0.96, - "learning_rate": 1.9095469068154354e-06, - "loss": 2.7285, + "learning_rate": 1.8958499614173255e-06, + "loss": 2.7319, "step": 290500 }, { "epoch": 0.96, - "learning_rate": 1.8929925339778505e-06, - "loss": 2.7339, + "learning_rate": 1.8792908736243539e-06, + "loss": 2.7139, "step": 290600 }, { "epoch": 0.96, - "learning_rate": 1.8764381611402653e-06, - "loss": 2.7157, + "learning_rate": 1.862731785831382e-06, + "loss": 2.7192, "step": 290700 }, { "epoch": 0.96, - "learning_rate": 1.8598837883026803e-06, - "loss": 2.7484, + "learning_rate": 1.8461726980384107e-06, + "loss": 2.7209, "step": 290800 }, { "epoch": 0.96, - "learning_rate": 1.8433294154650953e-06, - "loss": 2.7351, + "learning_rate": 1.8296136102454387e-06, + "loss": 2.7222, "step": 290900 }, { "epoch": 0.96, - "learning_rate": 1.8267750426275103e-06, - "loss": 2.7233, + "learning_rate": 1.8130545224524673e-06, + "loss": 2.7217, "step": 291000 }, { "epoch": 0.96, - "learning_rate": 1.8102206697899253e-06, - "loss": 2.7171, + "learning_rate": 1.7964954346594957e-06, + "loss": 2.7228, "step": 291100 }, { "epoch": 0.96, - "learning_rate": 1.79366629695234e-06, - "loss": 2.7234, + "learning_rate": 1.7799363468665238e-06, + "loss": 2.7226, "step": 291200 }, { "epoch": 0.96, - "learning_rate": 1.777111924114755e-06, - "loss": 2.7388, + "learning_rate": 1.7633772590735522e-06, + "loss": 2.7128, "step": 291300 }, { - "epoch": 0.96, - "learning_rate": 1.76055755127717e-06, - "loss": 2.7344, + "epoch": 0.97, + "learning_rate": 1.7468181712805804e-06, + "loss": 2.7225, "step": 291400 }, { "epoch": 0.97, - "learning_rate": 1.744003178439585e-06, - "loss": 2.7277, + "learning_rate": 1.7302590834876088e-06, + "loss": 2.7256, "step": 291500 }, { "epoch": 0.97, - "learning_rate": 1.727448805602e-06, - "loss": 2.7194, + "learning_rate": 1.7136999956946374e-06, + "loss": 2.7139, "step": 291600 }, { "epoch": 0.97, - "learning_rate": 1.710894432764415e-06, - "loss": 2.7352, + "learning_rate": 1.6971409079016656e-06, + "loss": 2.7198, "step": 291700 }, { "epoch": 0.97, - "learning_rate": 1.6943400599268296e-06, - "loss": 2.7301, + "learning_rate": 1.680581820108694e-06, + "loss": 2.7132, "step": 291800 }, { "epoch": 0.97, - "learning_rate": 1.6777856870892446e-06, - "loss": 2.7346, + "learning_rate": 1.6640227323157222e-06, + "loss": 2.7164, "step": 291900 }, { "epoch": 0.97, - "learning_rate": 1.6612313142516596e-06, - "loss": 2.7297, + "learning_rate": 1.6474636445227506e-06, + "loss": 2.7285, "step": 292000 }, { "epoch": 0.97, - "learning_rate": 1.6446769414140746e-06, - "loss": 2.7534, + "learning_rate": 1.630904556729779e-06, + "loss": 2.7306, "step": 292100 }, { "epoch": 0.97, - "learning_rate": 1.6281225685764896e-06, - "loss": 2.7288, + "learning_rate": 1.6143454689368071e-06, + "loss": 2.7126, "step": 292200 }, { "epoch": 0.97, - "learning_rate": 1.6115681957389044e-06, - "loss": 2.73, + "learning_rate": 1.5977863811438357e-06, + "loss": 2.7196, "step": 292300 }, { "epoch": 0.97, - "learning_rate": 1.5950138229013194e-06, - "loss": 2.7348, + "learning_rate": 1.5812272933508637e-06, + "loss": 2.7182, "step": 292400 }, { "epoch": 0.97, - "learning_rate": 1.5784594500637344e-06, - "loss": 2.7241, + "learning_rate": 1.5646682055578923e-06, + "loss": 2.7335, "step": 292500 }, { "epoch": 0.97, - "learning_rate": 1.5619050772261495e-06, - "loss": 2.7265, + "learning_rate": 1.5481091177649205e-06, + "loss": 2.7233, "step": 292600 }, { "epoch": 0.97, - "learning_rate": 1.5453507043885643e-06, - "loss": 2.7312, + "learning_rate": 1.531550029971949e-06, + "loss": 2.7149, "step": 292700 }, { "epoch": 0.97, - "learning_rate": 1.5287963315509793e-06, - "loss": 2.7316, + "learning_rate": 1.5149909421789773e-06, + "loss": 2.7299, "step": 292800 }, { "epoch": 0.97, - "learning_rate": 1.5122419587133943e-06, - "loss": 2.7209, + "learning_rate": 1.4984318543860057e-06, + "loss": 2.7152, "step": 292900 }, { "epoch": 0.97, - "learning_rate": 1.495687585875809e-06, - "loss": 2.7243, + "learning_rate": 1.4818727665930339e-06, + "loss": 2.7089, "step": 293000 }, { "epoch": 0.97, - "learning_rate": 1.479133213038224e-06, - "loss": 2.7239, + "learning_rate": 1.4653136788000623e-06, + "loss": 2.7286, "step": 293100 }, { "epoch": 0.97, - "learning_rate": 1.4625788402006391e-06, - "loss": 2.7234, + "learning_rate": 1.4487545910070906e-06, + "loss": 2.7195, "step": 293200 }, { "epoch": 0.97, - "learning_rate": 1.446024467363054e-06, - "loss": 2.7389, + "learning_rate": 1.432195503214119e-06, + "loss": 2.7164, "step": 293300 }, { "epoch": 0.97, - "learning_rate": 1.429470094525469e-06, - "loss": 2.7244, + "learning_rate": 1.4156364154211474e-06, + "loss": 2.7264, "step": 293400 }, { "epoch": 0.97, - "learning_rate": 1.412915721687884e-06, - "loss": 2.734, + "learning_rate": 1.3990773276281756e-06, + "loss": 2.7372, "step": 293500 }, { "epoch": 0.97, - "learning_rate": 1.3963613488502987e-06, - "loss": 2.715, + "learning_rate": 1.382518239835204e-06, + "loss": 2.731, "step": 293600 }, { "epoch": 0.97, - "learning_rate": 1.3798069760127138e-06, - "loss": 2.7331, + "learning_rate": 1.3659591520422324e-06, + "loss": 2.7163, "step": 293700 }, { "epoch": 0.97, - "learning_rate": 1.3632526031751288e-06, - "loss": 2.7285, + "learning_rate": 1.3494000642492608e-06, + "loss": 2.7156, "step": 293800 }, { "epoch": 0.97, - "learning_rate": 1.3466982303375438e-06, - "loss": 2.7248, + "learning_rate": 1.332840976456289e-06, + "loss": 2.726, "step": 293900 }, { "epoch": 0.97, - "learning_rate": 1.3301438574999588e-06, - "loss": 2.7133, + "learning_rate": 1.3162818886633174e-06, + "loss": 2.7266, "step": 294000 }, { "epoch": 0.97, - "learning_rate": 1.3135894846623736e-06, - "loss": 2.7509, + "learning_rate": 1.2997228008703456e-06, + "loss": 2.6988, "step": 294100 }, { "epoch": 0.97, - "learning_rate": 1.2970351118247886e-06, - "loss": 2.7434, + "learning_rate": 1.2831637130773742e-06, + "loss": 2.7276, "step": 294200 }, { "epoch": 0.97, - "learning_rate": 1.2804807389872036e-06, - "loss": 2.7196, + "learning_rate": 1.2666046252844023e-06, + "loss": 2.7213, "step": 294300 }, { "epoch": 0.97, - "learning_rate": 1.2639263661496184e-06, - "loss": 2.7294, + "learning_rate": 1.2500455374914307e-06, + "loss": 2.7351, "step": 294400 }, { "epoch": 0.98, - "learning_rate": 1.2473719933120334e-06, - "loss": 2.73, + "learning_rate": 1.2334864496984591e-06, + "loss": 2.7231, "step": 294500 }, { "epoch": 0.98, - "learning_rate": 1.2308176204744485e-06, - "loss": 2.7257, + "learning_rate": 1.2169273619054873e-06, + "loss": 2.7209, "step": 294600 }, { "epoch": 0.98, - "learning_rate": 1.2142632476368633e-06, - "loss": 2.7491, + "learning_rate": 1.200368274112516e-06, + "loss": 2.7231, "step": 294700 }, { "epoch": 0.98, - "learning_rate": 1.1977088747992783e-06, - "loss": 2.7451, + "learning_rate": 1.183809186319544e-06, + "loss": 2.7204, "step": 294800 }, { "epoch": 0.98, - "learning_rate": 1.181154501961693e-06, - "loss": 2.743, + "learning_rate": 1.1672500985265725e-06, + "loss": 2.7318, "step": 294900 }, { "epoch": 0.98, - "learning_rate": 1.164600129124108e-06, - "loss": 2.736, + "learning_rate": 1.1506910107336007e-06, + "loss": 2.7227, "step": 295000 }, { "epoch": 0.98, - "learning_rate": 1.148045756286523e-06, - "loss": 2.7331, + "learning_rate": 1.134131922940629e-06, + "loss": 2.7249, "step": 295100 }, { "epoch": 0.98, - "learning_rate": 1.1314913834489381e-06, - "loss": 2.7366, + "learning_rate": 1.1175728351476575e-06, + "loss": 2.7216, "step": 295200 }, { "epoch": 0.98, - "learning_rate": 1.1149370106113531e-06, - "loss": 2.7489, + "learning_rate": 1.1010137473546859e-06, + "loss": 2.7317, "step": 295300 }, { "epoch": 0.98, - "learning_rate": 1.0983826377737681e-06, - "loss": 2.7302, + "learning_rate": 1.084454659561714e-06, + "loss": 2.7187, "step": 295400 }, { "epoch": 0.98, - "learning_rate": 1.081828264936183e-06, - "loss": 2.7292, + "learning_rate": 1.0678955717687424e-06, + "loss": 2.7258, "step": 295500 }, { "epoch": 0.98, - "learning_rate": 1.065273892098598e-06, - "loss": 2.7351, + "learning_rate": 1.0513364839757708e-06, + "loss": 2.7267, "step": 295600 }, { "epoch": 0.98, - "learning_rate": 1.048719519261013e-06, - "loss": 2.7286, + "learning_rate": 1.0347773961827992e-06, + "loss": 2.7247, "step": 295700 }, { "epoch": 0.98, - "learning_rate": 1.0321651464234278e-06, - "loss": 2.7297, + "learning_rate": 1.0182183083898276e-06, + "loss": 2.7303, "step": 295800 }, { "epoch": 0.98, - "learning_rate": 1.0156107735858428e-06, - "loss": 2.7197, + "learning_rate": 1.0016592205968558e-06, + "loss": 2.7117, "step": 295900 }, { "epoch": 0.98, - "learning_rate": 9.990564007482576e-07, - "loss": 2.7266, + "learning_rate": 9.851001328038842e-07, + "loss": 2.7089, "step": 296000 }, { "epoch": 0.98, - "learning_rate": 9.825020279106726e-07, - "loss": 2.7281, + "learning_rate": 9.685410450109124e-07, + "loss": 2.7345, "step": 296100 }, { "epoch": 0.98, - "learning_rate": 9.659476550730876e-07, - "loss": 2.7275, + "learning_rate": 9.519819572179409e-07, + "loss": 2.7273, "step": 296200 }, { "epoch": 0.98, - "learning_rate": 9.493932822355025e-07, - "loss": 2.7329, + "learning_rate": 9.354228694249692e-07, + "loss": 2.7351, "step": 296300 }, { "epoch": 0.98, - "learning_rate": 9.328389093979175e-07, - "loss": 2.7183, + "learning_rate": 9.188637816319976e-07, + "loss": 2.7199, "step": 296400 }, { "epoch": 0.98, - "learning_rate": 9.162845365603326e-07, - "loss": 2.7387, + "learning_rate": 9.023046938390258e-07, + "loss": 2.7407, "step": 296500 }, { "epoch": 0.98, - "learning_rate": 8.997301637227474e-07, - "loss": 2.7331, + "learning_rate": 8.857456060460541e-07, + "loss": 2.7201, "step": 296600 }, { "epoch": 0.98, - "learning_rate": 8.831757908851624e-07, - "loss": 2.7203, + "learning_rate": 8.691865182530826e-07, + "loss": 2.7112, "step": 296700 }, { "epoch": 0.98, - "learning_rate": 8.666214180475774e-07, - "loss": 2.7411, + "learning_rate": 8.526274304601109e-07, + "loss": 2.7131, "step": 296800 }, { "epoch": 0.98, - "learning_rate": 8.500670452099922e-07, - "loss": 2.7353, + "learning_rate": 8.360683426671392e-07, + "loss": 2.7137, "step": 296900 }, { "epoch": 0.98, - "learning_rate": 8.335126723724072e-07, - "loss": 2.7369, + "learning_rate": 8.195092548741675e-07, + "loss": 2.719, "step": 297000 }, { "epoch": 0.98, - "learning_rate": 8.169582995348221e-07, - "loss": 2.7273, + "learning_rate": 8.029501670811958e-07, + "loss": 2.7234, "step": 297100 }, { "epoch": 0.98, - "learning_rate": 8.004039266972371e-07, - "loss": 2.7433, + "learning_rate": 7.863910792882243e-07, + "loss": 2.7248, "step": 297200 }, { "epoch": 0.98, - "learning_rate": 7.838495538596521e-07, - "loss": 2.7402, + "learning_rate": 7.698319914952525e-07, + "loss": 2.7146, "step": 297300 }, { "epoch": 0.98, - "learning_rate": 7.67295181022067e-07, - "loss": 2.719, + "learning_rate": 7.532729037022809e-07, + "loss": 2.7145, "step": 297400 }, { - "epoch": 0.98, - "learning_rate": 7.50740808184482e-07, - "loss": 2.7309, + "epoch": 0.99, + "learning_rate": 7.367138159093093e-07, + "loss": 2.7288, "step": 297500 }, { "epoch": 0.99, - "learning_rate": 7.341864353468969e-07, - "loss": 2.7343, + "learning_rate": 7.201547281163375e-07, + "loss": 2.714, "step": 297600 }, { "epoch": 0.99, - "learning_rate": 7.176320625093119e-07, - "loss": 2.7083, + "learning_rate": 7.035956403233659e-07, + "loss": 2.7175, "step": 297700 }, { "epoch": 0.99, - "learning_rate": 7.010776896717268e-07, - "loss": 2.7453, + "learning_rate": 6.870365525303942e-07, + "loss": 2.7141, "step": 297800 }, { "epoch": 0.99, - "learning_rate": 6.845233168341418e-07, - "loss": 2.7358, + "learning_rate": 6.704774647374226e-07, + "loss": 2.7221, "step": 297900 }, { "epoch": 0.99, - "learning_rate": 6.679689439965567e-07, - "loss": 2.7356, + "learning_rate": 6.539183769444509e-07, + "loss": 2.7387, "step": 298000 }, { "epoch": 0.99, - "learning_rate": 6.514145711589717e-07, - "loss": 2.7361, + "learning_rate": 6.373592891514793e-07, + "loss": 2.7228, "step": 298100 }, { "epoch": 0.99, - "learning_rate": 6.348601983213866e-07, - "loss": 2.7162, + "learning_rate": 6.208002013585076e-07, + "loss": 2.7076, "step": 298200 }, { "epoch": 0.99, - "learning_rate": 6.183058254838015e-07, - "loss": 2.7418, + "learning_rate": 6.042411135655359e-07, + "loss": 2.7287, "step": 298300 }, { "epoch": 0.99, - "learning_rate": 6.017514526462165e-07, - "loss": 2.7424, + "learning_rate": 5.876820257725643e-07, + "loss": 2.7311, "step": 298400 }, { "epoch": 0.99, - "learning_rate": 5.851970798086315e-07, - "loss": 2.732, + "learning_rate": 5.711229379795926e-07, + "loss": 2.7195, "step": 298500 }, { "epoch": 0.99, - "learning_rate": 5.686427069710465e-07, - "loss": 2.7347, + "learning_rate": 5.545638501866209e-07, + "loss": 2.7107, "step": 298600 }, { "epoch": 0.99, - "learning_rate": 5.520883341334614e-07, - "loss": 2.7288, + "learning_rate": 5.380047623936492e-07, + "loss": 2.7212, "step": 298700 }, { "epoch": 0.99, - "learning_rate": 5.355339612958764e-07, - "loss": 2.7386, + "learning_rate": 5.214456746006776e-07, + "loss": 2.7202, "step": 298800 }, { "epoch": 0.99, - "learning_rate": 5.189795884582913e-07, - "loss": 2.7244, + "learning_rate": 5.04886586807706e-07, + "loss": 2.6954, "step": 298900 }, { "epoch": 0.99, - "learning_rate": 5.024252156207062e-07, - "loss": 2.7334, + "learning_rate": 4.883274990147343e-07, + "loss": 2.7267, "step": 299000 }, { "epoch": 0.99, - "learning_rate": 4.858708427831211e-07, - "loss": 2.7422, + "learning_rate": 4.7176841122176265e-07, + "loss": 2.723, "step": 299100 }, { "epoch": 0.99, - "learning_rate": 4.693164699455362e-07, - "loss": 2.7343, + "learning_rate": 4.55209323428791e-07, + "loss": 2.7153, "step": 299200 }, { "epoch": 0.99, - "learning_rate": 4.527620971079511e-07, - "loss": 2.7413, + "learning_rate": 4.386502356358193e-07, + "loss": 2.711, "step": 299300 }, { "epoch": 0.99, - "learning_rate": 4.36207724270366e-07, - "loss": 2.7256, + "learning_rate": 4.2209114784284767e-07, + "loss": 2.7385, "step": 299400 }, { "epoch": 0.99, - "learning_rate": 4.19653351432781e-07, - "loss": 2.721, + "learning_rate": 4.0553206004987596e-07, + "loss": 2.7214, "step": 299500 }, { "epoch": 0.99, - "learning_rate": 4.0309897859519597e-07, - "loss": 2.7338, + "learning_rate": 3.889729722569043e-07, + "loss": 2.7111, "step": 299600 }, { "epoch": 0.99, - "learning_rate": 3.865446057576109e-07, - "loss": 2.7303, + "learning_rate": 3.7241388446393264e-07, + "loss": 2.7106, "step": 299700 }, { "epoch": 0.99, - "learning_rate": 3.6999023292002584e-07, - "loss": 2.7354, + "learning_rate": 3.5585479667096104e-07, + "loss": 2.7095, "step": 299800 }, { "epoch": 0.99, - "learning_rate": 3.534358600824408e-07, - "loss": 2.7318, + "learning_rate": 3.392957088779894e-07, + "loss": 2.7217, "step": 299900 }, { "epoch": 0.99, - "learning_rate": 3.3688148724485576e-07, - "loss": 2.7319, + "learning_rate": 3.2273662108501766e-07, + "loss": 2.7331, "step": 300000 }, { "epoch": 0.99, - "learning_rate": 3.2032711440727067e-07, - "loss": 2.7354, + "learning_rate": 3.06177533292046e-07, + "loss": 2.7229, "step": 300100 }, { "epoch": 0.99, - "learning_rate": 3.0377274156968563e-07, - "loss": 2.7311, + "learning_rate": 2.8961844549907435e-07, + "loss": 2.7105, "step": 300200 }, { "epoch": 0.99, - "learning_rate": 2.872183687321006e-07, - "loss": 2.7428, + "learning_rate": 2.730593577061027e-07, + "loss": 2.7208, "step": 300300 }, { "epoch": 0.99, - "learning_rate": 2.7066399589451555e-07, - "loss": 2.7232, + "learning_rate": 2.5650026991313103e-07, + "loss": 2.7261, "step": 300400 }, { - "epoch": 0.99, - "learning_rate": 2.541096230569305e-07, - "loss": 2.7255, + "epoch": 1.0, + "learning_rate": 2.3994118212015937e-07, + "loss": 2.7352, "step": 300500 }, { "epoch": 1.0, - "learning_rate": 2.3755525021934545e-07, - "loss": 2.7256, + "learning_rate": 2.233820943271877e-07, + "loss": 2.7271, "step": 300600 }, { "epoch": 1.0, - "learning_rate": 2.210008773817604e-07, - "loss": 2.7225, + "learning_rate": 2.0682300653421605e-07, + "loss": 2.727, "step": 300700 }, { "epoch": 1.0, - "learning_rate": 2.0444650454417534e-07, - "loss": 2.7233, + "learning_rate": 1.902639187412444e-07, + "loss": 2.7298, "step": 300800 }, { "epoch": 1.0, - "learning_rate": 1.878921317065903e-07, - "loss": 2.735, + "learning_rate": 1.7370483094827273e-07, + "loss": 2.7344, "step": 300900 }, { "epoch": 1.0, - "learning_rate": 1.7133775886900526e-07, - "loss": 2.7371, + "learning_rate": 1.5714574315530107e-07, + "loss": 2.7185, "step": 301000 }, { "epoch": 1.0, - "learning_rate": 1.547833860314202e-07, - "loss": 2.7366, + "learning_rate": 1.4058665536232941e-07, + "loss": 2.7195, "step": 301100 }, { "epoch": 1.0, - "learning_rate": 1.3822901319383516e-07, - "loss": 2.7291, + "learning_rate": 1.2402756756935775e-07, + "loss": 2.7201, "step": 301200 }, { "epoch": 1.0, - "learning_rate": 1.2167464035625012e-07, - "loss": 2.7364, + "learning_rate": 1.074684797763861e-07, + "loss": 2.7188, "step": 301300 }, { "epoch": 1.0, - "learning_rate": 1.0512026751866505e-07, - "loss": 2.7318, + "learning_rate": 9.090939198341442e-08, + "loss": 2.7252, "step": 301400 }, { "epoch": 1.0, - "learning_rate": 8.856589468108002e-08, - "loss": 2.7394, + "learning_rate": 7.435030419044276e-08, + "loss": 2.7194, "step": 301500 - }, - { - "epoch": 1.0, - "learning_rate": 7.201152184349496e-08, - "loss": 2.7233, - "step": 301600 - }, - { - "epoch": 1.0, - "learning_rate": 5.545714900590991e-08, - "loss": 2.7397, - "step": 301700 - }, - { - "epoch": 1.0, - "learning_rate": 3.8902776168324865e-08, - "loss": 2.7302, - "step": 301800 - }, - { - "epoch": 1.0, - "learning_rate": 2.2348403330739815e-08, - "loss": 2.7429, - "step": 301900 - }, - { - "epoch": 1.0, - "learning_rate": 5.794030493154767e-09, - "loss": 2.7452, - "step": 302000 } ], - "max_steps": 302035, + "max_steps": 301949, "num_train_epochs": 1, - "total_flos": 8.184418510307328e+18, + "total_flos": 8.170868148535296e+18, "trial_name": null, "trial_params": null }