{ "best_metric": null, "best_model_checkpoint": null, "epoch": 5.0, "eval_steps": 500, "global_step": 889485, "is_hyper_param_search": false, "is_local_process_zero": true, "is_world_process_zero": true, "log_history": [ { "epoch": 0.0, "learning_rate": 4.999460361894804e-05, "loss": 5.56, "step": 100 }, { "epoch": 0.0, "learning_rate": 4.998898238868559e-05, "loss": 4.9623, "step": 200 }, { "epoch": 0.0, "learning_rate": 4.9983361158423133e-05, "loss": 4.7587, "step": 300 }, { "epoch": 0.0, "learning_rate": 4.997773992816068e-05, "loss": 4.6227, "step": 400 }, { "epoch": 0.0, "learning_rate": 4.9972118697898226e-05, "loss": 4.4943, "step": 500 }, { "epoch": 0.0, "learning_rate": 4.996649746763577e-05, "loss": 4.4251, "step": 600 }, { "epoch": 0.0, "learning_rate": 4.996087623737332e-05, "loss": 4.4039, "step": 700 }, { "epoch": 0.0, "learning_rate": 4.995525500711086e-05, "loss": 4.3345, "step": 800 }, { "epoch": 0.01, "learning_rate": 4.9949633776848404e-05, "loss": 4.2333, "step": 900 }, { "epoch": 0.01, "learning_rate": 4.994401254658595e-05, "loss": 4.1646, "step": 1000 }, { "epoch": 0.01, "learning_rate": 4.993839131632349e-05, "loss": 4.0956, "step": 1100 }, { "epoch": 0.01, "learning_rate": 4.9932770086061036e-05, "loss": 4.0783, "step": 1200 }, { "epoch": 0.01, "learning_rate": 4.992714885579858e-05, "loss": 4.0371, "step": 1300 }, { "epoch": 0.01, "learning_rate": 4.992152762553613e-05, "loss": 3.9386, "step": 1400 }, { "epoch": 0.01, "learning_rate": 4.9915962607576297e-05, "loss": 3.8893, "step": 1500 }, { "epoch": 0.01, "learning_rate": 4.991034137731384e-05, "loss": 3.8706, "step": 1600 }, { "epoch": 0.01, "learning_rate": 4.990472014705139e-05, "loss": 3.8605, "step": 1700 }, { "epoch": 0.01, "learning_rate": 4.9899098916788935e-05, "loss": 3.8019, "step": 1800 }, { "epoch": 0.01, "learning_rate": 4.9893477686526475e-05, "loss": 3.7531, "step": 1900 }, { "epoch": 0.01, "learning_rate": 4.988785645626402e-05, "loss": 3.7156, "step": 2000 }, { "epoch": 0.01, "learning_rate": 4.988223522600157e-05, "loss": 3.7135, "step": 2100 }, { "epoch": 0.01, "learning_rate": 4.987661399573911e-05, "loss": 3.6643, "step": 2200 }, { "epoch": 0.01, "learning_rate": 4.987099276547665e-05, "loss": 3.5797, "step": 2300 }, { "epoch": 0.01, "learning_rate": 4.98653715352142e-05, "loss": 3.5802, "step": 2400 }, { "epoch": 0.01, "learning_rate": 4.9859750304951746e-05, "loss": 3.5381, "step": 2500 }, { "epoch": 0.01, "learning_rate": 4.9854129074689285e-05, "loss": 3.5357, "step": 2600 }, { "epoch": 0.02, "learning_rate": 4.984850784442683e-05, "loss": 3.4414, "step": 2700 }, { "epoch": 0.02, "learning_rate": 4.984288661416438e-05, "loss": 3.4507, "step": 2800 }, { "epoch": 0.02, "learning_rate": 4.9837265383901924e-05, "loss": 3.3674, "step": 2900 }, { "epoch": 0.02, "learning_rate": 4.983164415363947e-05, "loss": 3.3073, "step": 3000 }, { "epoch": 0.02, "learning_rate": 4.982602292337702e-05, "loss": 3.3416, "step": 3100 }, { "epoch": 0.02, "learning_rate": 4.982040169311456e-05, "loss": 3.267, "step": 3200 }, { "epoch": 0.02, "learning_rate": 4.98147804628521e-05, "loss": 3.2396, "step": 3300 }, { "epoch": 0.02, "learning_rate": 4.980915923258965e-05, "loss": 3.2016, "step": 3400 }, { "epoch": 0.02, "learning_rate": 4.9803538002327195e-05, "loss": 3.1567, "step": 3500 }, { "epoch": 0.02, "learning_rate": 4.9797916772064734e-05, "loss": 3.1444, "step": 3600 }, { "epoch": 0.02, "learning_rate": 4.979229554180228e-05, "loss": 3.097, "step": 3700 }, { "epoch": 0.02, "learning_rate": 4.978667431153983e-05, "loss": 3.0428, "step": 3800 }, { "epoch": 0.02, "learning_rate": 4.978105308127737e-05, "loss": 3.0852, "step": 3900 }, { "epoch": 0.02, "learning_rate": 4.977543185101491e-05, "loss": 2.9581, "step": 4000 }, { "epoch": 0.02, "learning_rate": 4.976981062075246e-05, "loss": 2.9466, "step": 4100 }, { "epoch": 0.02, "learning_rate": 4.9764189390490005e-05, "loss": 2.958, "step": 4200 }, { "epoch": 0.02, "learning_rate": 4.9758568160227545e-05, "loss": 2.9198, "step": 4300 }, { "epoch": 0.02, "learning_rate": 4.975294692996509e-05, "loss": 2.9059, "step": 4400 }, { "epoch": 0.03, "learning_rate": 4.974732569970264e-05, "loss": 2.8533, "step": 4500 }, { "epoch": 0.03, "learning_rate": 4.9741704469440184e-05, "loss": 2.8261, "step": 4600 }, { "epoch": 0.03, "learning_rate": 4.973608323917773e-05, "loss": 2.7826, "step": 4700 }, { "epoch": 0.03, "learning_rate": 4.9730462008915276e-05, "loss": 2.7884, "step": 4800 }, { "epoch": 0.03, "learning_rate": 4.972484077865282e-05, "loss": 2.7279, "step": 4900 }, { "epoch": 0.03, "learning_rate": 4.971921954839036e-05, "loss": 2.6892, "step": 5000 }, { "epoch": 0.03, "learning_rate": 4.971359831812791e-05, "loss": 2.7282, "step": 5100 }, { "epoch": 0.03, "learning_rate": 4.9707977087865455e-05, "loss": 2.7126, "step": 5200 }, { "epoch": 0.03, "learning_rate": 4.9702355857602994e-05, "loss": 2.6314, "step": 5300 }, { "epoch": 0.03, "learning_rate": 4.969673462734054e-05, "loss": 2.6234, "step": 5400 }, { "epoch": 0.03, "learning_rate": 4.969111339707809e-05, "loss": 2.6051, "step": 5500 }, { "epoch": 0.03, "learning_rate": 4.968549216681563e-05, "loss": 2.6092, "step": 5600 }, { "epoch": 0.03, "learning_rate": 4.967987093655317e-05, "loss": 2.5187, "step": 5700 }, { "epoch": 0.03, "learning_rate": 4.967424970629072e-05, "loss": 2.5295, "step": 5800 }, { "epoch": 0.03, "learning_rate": 4.9668628476028265e-05, "loss": 2.4636, "step": 5900 }, { "epoch": 0.03, "learning_rate": 4.966300724576581e-05, "loss": 2.4679, "step": 6000 }, { "epoch": 0.03, "learning_rate": 4.965738601550336e-05, "loss": 2.4281, "step": 6100 }, { "epoch": 0.03, "learning_rate": 4.9651764785240904e-05, "loss": 2.4284, "step": 6200 }, { "epoch": 0.04, "learning_rate": 4.964614355497845e-05, "loss": 2.3777, "step": 6300 }, { "epoch": 0.04, "learning_rate": 4.964052232471599e-05, "loss": 2.4207, "step": 6400 }, { "epoch": 0.04, "learning_rate": 4.9634901094453536e-05, "loss": 2.3493, "step": 6500 }, { "epoch": 0.04, "learning_rate": 4.962927986419108e-05, "loss": 2.3188, "step": 6600 }, { "epoch": 0.04, "learning_rate": 4.962365863392862e-05, "loss": 2.3228, "step": 6700 }, { "epoch": 0.04, "learning_rate": 4.961803740366617e-05, "loss": 2.2783, "step": 6800 }, { "epoch": 0.04, "learning_rate": 4.9612416173403714e-05, "loss": 2.2501, "step": 6900 }, { "epoch": 0.04, "learning_rate": 4.960679494314126e-05, "loss": 2.2354, "step": 7000 }, { "epoch": 0.04, "learning_rate": 4.96011737128788e-05, "loss": 2.228, "step": 7100 }, { "epoch": 0.04, "learning_rate": 4.9595552482616346e-05, "loss": 2.1995, "step": 7200 }, { "epoch": 0.04, "learning_rate": 4.958993125235389e-05, "loss": 2.1941, "step": 7300 }, { "epoch": 0.04, "learning_rate": 4.958431002209143e-05, "loss": 2.1656, "step": 7400 }, { "epoch": 0.04, "learning_rate": 4.957868879182898e-05, "loss": 2.1499, "step": 7500 }, { "epoch": 0.04, "learning_rate": 4.9573067561566525e-05, "loss": 2.1266, "step": 7600 }, { "epoch": 0.04, "learning_rate": 4.956744633130407e-05, "loss": 2.1084, "step": 7700 }, { "epoch": 0.04, "learning_rate": 4.956182510104162e-05, "loss": 2.09, "step": 7800 }, { "epoch": 0.04, "learning_rate": 4.9556203870779163e-05, "loss": 2.0773, "step": 7900 }, { "epoch": 0.04, "learning_rate": 4.955058264051671e-05, "loss": 2.0747, "step": 8000 }, { "epoch": 0.05, "learning_rate": 4.954496141025425e-05, "loss": 2.0388, "step": 8100 }, { "epoch": 0.05, "learning_rate": 4.9539340179991795e-05, "loss": 2.0333, "step": 8200 }, { "epoch": 0.05, "learning_rate": 4.953371894972934e-05, "loss": 2.0312, "step": 8300 }, { "epoch": 0.05, "learning_rate": 4.952809771946689e-05, "loss": 1.9971, "step": 8400 }, { "epoch": 0.05, "learning_rate": 4.952247648920443e-05, "loss": 2.0001, "step": 8500 }, { "epoch": 0.05, "learning_rate": 4.9516855258941974e-05, "loss": 1.9648, "step": 8600 }, { "epoch": 0.05, "learning_rate": 4.951123402867952e-05, "loss": 1.951, "step": 8700 }, { "epoch": 0.05, "learning_rate": 4.950561279841706e-05, "loss": 1.923, "step": 8800 }, { "epoch": 0.05, "learning_rate": 4.9499991568154606e-05, "loss": 1.9315, "step": 8900 }, { "epoch": 0.05, "learning_rate": 4.949437033789215e-05, "loss": 1.8978, "step": 9000 }, { "epoch": 0.05, "learning_rate": 4.94887491076297e-05, "loss": 1.8886, "step": 9100 }, { "epoch": 0.05, "learning_rate": 4.9483127877367245e-05, "loss": 1.8667, "step": 9200 }, { "epoch": 0.05, "learning_rate": 4.947750664710479e-05, "loss": 1.8254, "step": 9300 }, { "epoch": 0.05, "learning_rate": 4.947188541684234e-05, "loss": 1.8372, "step": 9400 }, { "epoch": 0.05, "learning_rate": 4.946626418657988e-05, "loss": 1.8009, "step": 9500 }, { "epoch": 0.05, "learning_rate": 4.946064295631742e-05, "loss": 1.8007, "step": 9600 }, { "epoch": 0.05, "learning_rate": 4.945502172605497e-05, "loss": 1.7914, "step": 9700 }, { "epoch": 0.06, "learning_rate": 4.944945670809514e-05, "loss": 1.7598, "step": 9800 }, { "epoch": 0.06, "learning_rate": 4.9443835477832676e-05, "loss": 1.7665, "step": 9900 }, { "epoch": 0.06, "learning_rate": 4.943821424757022e-05, "loss": 1.7547, "step": 10000 }, { "epoch": 0.06, "learning_rate": 4.943259301730777e-05, "loss": 1.7238, "step": 10100 }, { "epoch": 0.06, "learning_rate": 4.9426971787045315e-05, "loss": 1.7135, "step": 10200 }, { "epoch": 0.06, "learning_rate": 4.942135055678286e-05, "loss": 1.7042, "step": 10300 }, { "epoch": 0.06, "learning_rate": 4.941572932652041e-05, "loss": 1.694, "step": 10400 }, { "epoch": 0.06, "learning_rate": 4.9410108096257954e-05, "loss": 1.6789, "step": 10500 }, { "epoch": 0.06, "learning_rate": 4.9404486865995494e-05, "loss": 1.6663, "step": 10600 }, { "epoch": 0.06, "learning_rate": 4.939886563573304e-05, "loss": 1.6806, "step": 10700 }, { "epoch": 0.06, "learning_rate": 4.9393244405470586e-05, "loss": 1.662, "step": 10800 }, { "epoch": 0.06, "learning_rate": 4.938762317520813e-05, "loss": 1.6356, "step": 10900 }, { "epoch": 0.06, "learning_rate": 4.938200194494567e-05, "loss": 1.6223, "step": 11000 }, { "epoch": 0.06, "learning_rate": 4.937638071468322e-05, "loss": 1.625, "step": 11100 }, { "epoch": 0.06, "learning_rate": 4.9370759484420764e-05, "loss": 1.6213, "step": 11200 }, { "epoch": 0.06, "learning_rate": 4.9365138254158304e-05, "loss": 1.6011, "step": 11300 }, { "epoch": 0.06, "learning_rate": 4.935951702389585e-05, "loss": 1.584, "step": 11400 }, { "epoch": 0.06, "learning_rate": 4.9353895793633397e-05, "loss": 1.5671, "step": 11500 }, { "epoch": 0.07, "learning_rate": 4.934827456337094e-05, "loss": 1.5545, "step": 11600 }, { "epoch": 0.07, "learning_rate": 4.934265333310848e-05, "loss": 1.5518, "step": 11700 }, { "epoch": 0.07, "learning_rate": 4.933703210284603e-05, "loss": 1.5334, "step": 11800 }, { "epoch": 0.07, "learning_rate": 4.9331410872583575e-05, "loss": 1.5476, "step": 11900 }, { "epoch": 0.07, "learning_rate": 4.932578964232112e-05, "loss": 1.5173, "step": 12000 }, { "epoch": 0.07, "learning_rate": 4.932016841205867e-05, "loss": 1.4909, "step": 12100 }, { "epoch": 0.07, "learning_rate": 4.9314547181796214e-05, "loss": 1.4898, "step": 12200 }, { "epoch": 0.07, "learning_rate": 4.930892595153376e-05, "loss": 1.4866, "step": 12300 }, { "epoch": 0.07, "learning_rate": 4.93033047212713e-05, "loss": 1.4586, "step": 12400 }, { "epoch": 0.07, "learning_rate": 4.9297683491008846e-05, "loss": 1.4521, "step": 12500 }, { "epoch": 0.07, "learning_rate": 4.929206226074639e-05, "loss": 1.444, "step": 12600 }, { "epoch": 0.07, "learning_rate": 4.928649724278656e-05, "loss": 1.4528, "step": 12700 }, { "epoch": 0.07, "learning_rate": 4.92808760125241e-05, "loss": 1.4409, "step": 12800 }, { "epoch": 0.07, "learning_rate": 4.9275254782261645e-05, "loss": 1.4285, "step": 12900 }, { "epoch": 0.07, "learning_rate": 4.926963355199919e-05, "loss": 1.4242, "step": 13000 }, { "epoch": 0.07, "learning_rate": 4.926401232173674e-05, "loss": 1.3913, "step": 13100 }, { "epoch": 0.07, "learning_rate": 4.9258391091474284e-05, "loss": 1.3986, "step": 13200 }, { "epoch": 0.07, "learning_rate": 4.925276986121183e-05, "loss": 1.3921, "step": 13300 }, { "epoch": 0.08, "learning_rate": 4.924714863094938e-05, "loss": 1.3849, "step": 13400 }, { "epoch": 0.08, "learning_rate": 4.9241527400686916e-05, "loss": 1.3758, "step": 13500 }, { "epoch": 0.08, "learning_rate": 4.923590617042446e-05, "loss": 1.3645, "step": 13600 }, { "epoch": 0.08, "learning_rate": 4.923028494016201e-05, "loss": 1.3521, "step": 13700 }, { "epoch": 0.08, "learning_rate": 4.922466370989955e-05, "loss": 1.3568, "step": 13800 }, { "epoch": 0.08, "learning_rate": 4.9219042479637095e-05, "loss": 1.3397, "step": 13900 }, { "epoch": 0.08, "learning_rate": 4.921342124937464e-05, "loss": 1.3331, "step": 14000 }, { "epoch": 0.08, "learning_rate": 4.920780001911219e-05, "loss": 1.314, "step": 14100 }, { "epoch": 0.08, "learning_rate": 4.920217878884973e-05, "loss": 1.2971, "step": 14200 }, { "epoch": 0.08, "learning_rate": 4.919655755858727e-05, "loss": 1.3133, "step": 14300 }, { "epoch": 0.08, "learning_rate": 4.919093632832482e-05, "loss": 1.284, "step": 14400 }, { "epoch": 0.08, "learning_rate": 4.9185315098062366e-05, "loss": 1.2809, "step": 14500 }, { "epoch": 0.08, "learning_rate": 4.917969386779991e-05, "loss": 1.2683, "step": 14600 }, { "epoch": 0.08, "learning_rate": 4.917407263753746e-05, "loss": 1.2821, "step": 14700 }, { "epoch": 0.08, "learning_rate": 4.9168451407275004e-05, "loss": 1.2753, "step": 14800 }, { "epoch": 0.08, "learning_rate": 4.9162830177012544e-05, "loss": 1.275, "step": 14900 }, { "epoch": 0.08, "learning_rate": 4.915720894675009e-05, "loss": 1.2367, "step": 15000 }, { "epoch": 0.08, "learning_rate": 4.9151587716487636e-05, "loss": 1.2333, "step": 15100 }, { "epoch": 0.09, "learning_rate": 4.9145966486225176e-05, "loss": 1.2463, "step": 15200 }, { "epoch": 0.09, "learning_rate": 4.914034525596272e-05, "loss": 1.2498, "step": 15300 }, { "epoch": 0.09, "learning_rate": 4.913472402570027e-05, "loss": 1.2156, "step": 15400 }, { "epoch": 0.09, "learning_rate": 4.912910279543781e-05, "loss": 1.2191, "step": 15500 }, { "epoch": 0.09, "learning_rate": 4.9123481565175354e-05, "loss": 1.2084, "step": 15600 }, { "epoch": 0.09, "learning_rate": 4.91178603349129e-05, "loss": 1.2092, "step": 15700 }, { "epoch": 0.09, "learning_rate": 4.911223910465045e-05, "loss": 1.2076, "step": 15800 }, { "epoch": 0.09, "learning_rate": 4.9106617874387986e-05, "loss": 1.2094, "step": 15900 }, { "epoch": 0.09, "learning_rate": 4.910099664412553e-05, "loss": 1.1816, "step": 16000 }, { "epoch": 0.09, "learning_rate": 4.909537541386308e-05, "loss": 1.1782, "step": 16100 }, { "epoch": 0.09, "learning_rate": 4.908981039590325e-05, "loss": 1.1858, "step": 16200 }, { "epoch": 0.09, "learning_rate": 4.908418916564079e-05, "loss": 1.1676, "step": 16300 }, { "epoch": 0.09, "learning_rate": 4.907856793537834e-05, "loss": 1.1634, "step": 16400 }, { "epoch": 0.09, "learning_rate": 4.9072946705115885e-05, "loss": 1.1367, "step": 16500 }, { "epoch": 0.09, "learning_rate": 4.9067325474853425e-05, "loss": 1.1347, "step": 16600 }, { "epoch": 0.09, "learning_rate": 4.906170424459097e-05, "loss": 1.1344, "step": 16700 }, { "epoch": 0.09, "learning_rate": 4.905608301432852e-05, "loss": 1.1463, "step": 16800 }, { "epoch": 0.09, "learning_rate": 4.9050461784066064e-05, "loss": 1.1195, "step": 16900 }, { "epoch": 0.1, "learning_rate": 4.90448405538036e-05, "loss": 1.1326, "step": 17000 }, { "epoch": 0.1, "learning_rate": 4.903921932354115e-05, "loss": 1.1155, "step": 17100 }, { "epoch": 0.1, "learning_rate": 4.9033598093278696e-05, "loss": 1.1083, "step": 17200 }, { "epoch": 0.1, "learning_rate": 4.902797686301624e-05, "loss": 1.0998, "step": 17300 }, { "epoch": 0.1, "learning_rate": 4.902235563275379e-05, "loss": 1.0986, "step": 17400 }, { "epoch": 0.1, "learning_rate": 4.9016734402491335e-05, "loss": 1.0992, "step": 17500 }, { "epoch": 0.1, "learning_rate": 4.901111317222888e-05, "loss": 1.0889, "step": 17600 }, { "epoch": 0.1, "learning_rate": 4.900549194196642e-05, "loss": 1.0869, "step": 17700 }, { "epoch": 0.1, "learning_rate": 4.8999870711703967e-05, "loss": 1.0851, "step": 17800 }, { "epoch": 0.1, "learning_rate": 4.899424948144151e-05, "loss": 1.0744, "step": 17900 }, { "epoch": 0.1, "learning_rate": 4.898862825117905e-05, "loss": 1.0647, "step": 18000 }, { "epoch": 0.1, "learning_rate": 4.89830070209166e-05, "loss": 1.0615, "step": 18100 }, { "epoch": 0.1, "learning_rate": 4.8977385790654145e-05, "loss": 1.0458, "step": 18200 }, { "epoch": 0.1, "learning_rate": 4.897176456039169e-05, "loss": 1.0597, "step": 18300 }, { "epoch": 0.1, "learning_rate": 4.896614333012923e-05, "loss": 1.0508, "step": 18400 }, { "epoch": 0.1, "learning_rate": 4.896052209986678e-05, "loss": 1.0611, "step": 18500 }, { "epoch": 0.1, "learning_rate": 4.895490086960432e-05, "loss": 1.0327, "step": 18600 }, { "epoch": 0.11, "learning_rate": 4.894927963934187e-05, "loss": 1.048, "step": 18700 }, { "epoch": 0.11, "learning_rate": 4.8943658409079416e-05, "loss": 1.0348, "step": 18800 }, { "epoch": 0.11, "learning_rate": 4.893803717881696e-05, "loss": 1.0399, "step": 18900 }, { "epoch": 0.11, "learning_rate": 4.893241594855451e-05, "loss": 1.0029, "step": 19000 }, { "epoch": 0.11, "learning_rate": 4.892679471829205e-05, "loss": 1.0157, "step": 19100 }, { "epoch": 0.11, "learning_rate": 4.8921173488029594e-05, "loss": 1.0069, "step": 19200 }, { "epoch": 0.11, "learning_rate": 4.891555225776714e-05, "loss": 1.0066, "step": 19300 }, { "epoch": 0.11, "learning_rate": 4.890993102750468e-05, "loss": 1.0058, "step": 19400 }, { "epoch": 0.11, "learning_rate": 4.8904309797242226e-05, "loss": 0.9922, "step": 19500 }, { "epoch": 0.11, "learning_rate": 4.889868856697977e-05, "loss": 0.9727, "step": 19600 }, { "epoch": 0.11, "learning_rate": 4.889306733671732e-05, "loss": 0.9964, "step": 19700 }, { "epoch": 0.11, "learning_rate": 4.888744610645486e-05, "loss": 0.9842, "step": 19800 }, { "epoch": 0.11, "learning_rate": 4.8881824876192405e-05, "loss": 0.9823, "step": 19900 }, { "epoch": 0.11, "learning_rate": 4.887620364592995e-05, "loss": 0.9619, "step": 20000 }, { "epoch": 0.11, "learning_rate": 4.887058241566749e-05, "loss": 0.9757, "step": 20100 }, { "epoch": 0.11, "learning_rate": 4.8864961185405037e-05, "loss": 0.9651, "step": 20200 }, { "epoch": 0.11, "learning_rate": 4.885933995514258e-05, "loss": 0.9723, "step": 20300 }, { "epoch": 0.11, "learning_rate": 4.885371872488013e-05, "loss": 0.9634, "step": 20400 }, { "epoch": 0.12, "learning_rate": 4.8848097494617675e-05, "loss": 0.9711, "step": 20500 }, { "epoch": 0.12, "learning_rate": 4.884247626435522e-05, "loss": 0.9605, "step": 20600 }, { "epoch": 0.12, "learning_rate": 4.883691124639539e-05, "loss": 0.9569, "step": 20700 }, { "epoch": 0.12, "learning_rate": 4.8831290016132936e-05, "loss": 0.9378, "step": 20800 }, { "epoch": 0.12, "learning_rate": 4.8825668785870475e-05, "loss": 0.9427, "step": 20900 }, { "epoch": 0.12, "learning_rate": 4.882004755560802e-05, "loss": 0.947, "step": 21000 }, { "epoch": 0.12, "learning_rate": 4.881442632534557e-05, "loss": 0.9455, "step": 21100 }, { "epoch": 0.12, "learning_rate": 4.880880509508311e-05, "loss": 0.9236, "step": 21200 }, { "epoch": 0.12, "learning_rate": 4.8803183864820653e-05, "loss": 0.9209, "step": 21300 }, { "epoch": 0.12, "learning_rate": 4.87975626345582e-05, "loss": 0.9187, "step": 21400 }, { "epoch": 0.12, "learning_rate": 4.8791941404295746e-05, "loss": 0.9161, "step": 21500 }, { "epoch": 0.12, "learning_rate": 4.878632017403329e-05, "loss": 0.9342, "step": 21600 }, { "epoch": 0.12, "learning_rate": 4.878069894377084e-05, "loss": 0.9072, "step": 21700 }, { "epoch": 0.12, "learning_rate": 4.8775077713508385e-05, "loss": 0.9067, "step": 21800 }, { "epoch": 0.12, "learning_rate": 4.8769456483245924e-05, "loss": 0.9052, "step": 21900 }, { "epoch": 0.12, "learning_rate": 4.876383525298347e-05, "loss": 0.8974, "step": 22000 }, { "epoch": 0.12, "learning_rate": 4.875821402272102e-05, "loss": 0.9138, "step": 22100 }, { "epoch": 0.12, "learning_rate": 4.875259279245856e-05, "loss": 0.8994, "step": 22200 }, { "epoch": 0.13, "learning_rate": 4.87469715621961e-05, "loss": 0.8976, "step": 22300 }, { "epoch": 0.13, "learning_rate": 4.874135033193365e-05, "loss": 0.8928, "step": 22400 }, { "epoch": 0.13, "learning_rate": 4.8735729101671195e-05, "loss": 0.8826, "step": 22500 }, { "epoch": 0.13, "learning_rate": 4.8730107871408735e-05, "loss": 0.8848, "step": 22600 }, { "epoch": 0.13, "learning_rate": 4.872448664114628e-05, "loss": 0.8941, "step": 22700 }, { "epoch": 0.13, "learning_rate": 4.871886541088383e-05, "loss": 0.8812, "step": 22800 }, { "epoch": 0.13, "learning_rate": 4.8713244180621374e-05, "loss": 0.879, "step": 22900 }, { "epoch": 0.13, "learning_rate": 4.870762295035892e-05, "loss": 0.8627, "step": 23000 }, { "epoch": 0.13, "learning_rate": 4.8702001720096466e-05, "loss": 0.8801, "step": 23100 }, { "epoch": 0.13, "learning_rate": 4.869638048983401e-05, "loss": 0.8576, "step": 23200 }, { "epoch": 0.13, "learning_rate": 4.869075925957155e-05, "loss": 0.8827, "step": 23300 }, { "epoch": 0.13, "learning_rate": 4.86851380293091e-05, "loss": 0.8744, "step": 23400 }, { "epoch": 0.13, "learning_rate": 4.8679516799046644e-05, "loss": 0.8578, "step": 23500 }, { "epoch": 0.13, "learning_rate": 4.867389556878419e-05, "loss": 0.8575, "step": 23600 }, { "epoch": 0.13, "learning_rate": 4.866827433852173e-05, "loss": 0.8612, "step": 23700 }, { "epoch": 0.13, "learning_rate": 4.8662653108259276e-05, "loss": 0.8579, "step": 23800 }, { "epoch": 0.13, "learning_rate": 4.865703187799682e-05, "loss": 0.8384, "step": 23900 }, { "epoch": 0.13, "learning_rate": 4.865141064773436e-05, "loss": 0.8358, "step": 24000 }, { "epoch": 0.14, "learning_rate": 4.864578941747191e-05, "loss": 0.8398, "step": 24100 }, { "epoch": 0.14, "learning_rate": 4.8640168187209455e-05, "loss": 0.8357, "step": 24200 }, { "epoch": 0.14, "learning_rate": 4.8634546956947e-05, "loss": 0.83, "step": 24300 }, { "epoch": 0.14, "learning_rate": 4.862892572668454e-05, "loss": 0.8276, "step": 24400 }, { "epoch": 0.14, "learning_rate": 4.862330449642209e-05, "loss": 0.8418, "step": 24500 }, { "epoch": 0.14, "learning_rate": 4.861768326615963e-05, "loss": 0.8304, "step": 24600 }, { "epoch": 0.14, "learning_rate": 4.861206203589718e-05, "loss": 0.8269, "step": 24700 }, { "epoch": 0.14, "learning_rate": 4.8606440805634726e-05, "loss": 0.8117, "step": 24800 }, { "epoch": 0.14, "learning_rate": 4.860081957537227e-05, "loss": 0.8232, "step": 24900 }, { "epoch": 0.14, "learning_rate": 4.859525455741244e-05, "loss": 0.8096, "step": 25000 }, { "epoch": 0.14, "learning_rate": 4.858963332714998e-05, "loss": 0.8123, "step": 25100 }, { "epoch": 0.14, "learning_rate": 4.8584012096887525e-05, "loss": 0.8072, "step": 25200 }, { "epoch": 0.14, "learning_rate": 4.857839086662507e-05, "loss": 0.8109, "step": 25300 }, { "epoch": 0.14, "learning_rate": 4.857276963636261e-05, "loss": 0.8017, "step": 25400 }, { "epoch": 0.14, "learning_rate": 4.856714840610016e-05, "loss": 0.7964, "step": 25500 }, { "epoch": 0.14, "learning_rate": 4.8561527175837704e-05, "loss": 0.8116, "step": 25600 }, { "epoch": 0.14, "learning_rate": 4.855590594557525e-05, "loss": 0.8038, "step": 25700 }, { "epoch": 0.15, "learning_rate": 4.8550284715312796e-05, "loss": 0.7844, "step": 25800 }, { "epoch": 0.15, "learning_rate": 4.854466348505034e-05, "loss": 0.8023, "step": 25900 }, { "epoch": 0.15, "learning_rate": 4.853904225478789e-05, "loss": 0.7929, "step": 26000 }, { "epoch": 0.15, "learning_rate": 4.853342102452543e-05, "loss": 0.7934, "step": 26100 }, { "epoch": 0.15, "learning_rate": 4.8527799794262975e-05, "loss": 0.7867, "step": 26200 }, { "epoch": 0.15, "learning_rate": 4.852217856400052e-05, "loss": 0.774, "step": 26300 }, { "epoch": 0.15, "learning_rate": 4.851655733373807e-05, "loss": 0.7856, "step": 26400 }, { "epoch": 0.15, "learning_rate": 4.851093610347561e-05, "loss": 0.7868, "step": 26500 }, { "epoch": 0.15, "learning_rate": 4.850531487321315e-05, "loss": 0.7751, "step": 26600 }, { "epoch": 0.15, "learning_rate": 4.84996936429507e-05, "loss": 0.7983, "step": 26700 }, { "epoch": 0.15, "learning_rate": 4.849407241268824e-05, "loss": 0.773, "step": 26800 }, { "epoch": 0.15, "learning_rate": 4.8488451182425785e-05, "loss": 0.7706, "step": 26900 }, { "epoch": 0.15, "learning_rate": 4.848288616446596e-05, "loss": 0.765, "step": 27000 }, { "epoch": 0.15, "learning_rate": 4.8477264934203506e-05, "loss": 0.7641, "step": 27100 }, { "epoch": 0.15, "learning_rate": 4.8471643703941045e-05, "loss": 0.7598, "step": 27200 }, { "epoch": 0.15, "learning_rate": 4.846602247367859e-05, "loss": 0.7558, "step": 27300 }, { "epoch": 0.15, "learning_rate": 4.846040124341614e-05, "loss": 0.7799, "step": 27400 }, { "epoch": 0.15, "learning_rate": 4.8454780013153684e-05, "loss": 0.7501, "step": 27500 }, { "epoch": 0.16, "learning_rate": 4.8449158782891224e-05, "loss": 0.7588, "step": 27600 }, { "epoch": 0.16, "learning_rate": 4.844359376493139e-05, "loss": 0.75, "step": 27700 }, { "epoch": 0.16, "learning_rate": 4.843797253466894e-05, "loss": 0.7576, "step": 27800 }, { "epoch": 0.16, "learning_rate": 4.8432351304406484e-05, "loss": 0.7513, "step": 27900 }, { "epoch": 0.16, "learning_rate": 4.842678628644665e-05, "loss": 0.7486, "step": 28000 }, { "epoch": 0.16, "learning_rate": 4.84211650561842e-05, "loss": 0.7464, "step": 28100 }, { "epoch": 0.16, "learning_rate": 4.8415543825921744e-05, "loss": 0.7427, "step": 28200 }, { "epoch": 0.16, "learning_rate": 4.840992259565929e-05, "loss": 0.7552, "step": 28300 }, { "epoch": 0.16, "learning_rate": 4.840430136539683e-05, "loss": 0.7422, "step": 28400 }, { "epoch": 0.16, "learning_rate": 4.8398680135134376e-05, "loss": 0.7318, "step": 28500 }, { "epoch": 0.16, "learning_rate": 4.839305890487192e-05, "loss": 0.7442, "step": 28600 }, { "epoch": 0.16, "learning_rate": 4.838743767460946e-05, "loss": 0.7439, "step": 28700 }, { "epoch": 0.16, "learning_rate": 4.838181644434701e-05, "loss": 0.7482, "step": 28800 }, { "epoch": 0.16, "learning_rate": 4.8376195214084554e-05, "loss": 0.7368, "step": 28900 }, { "epoch": 0.16, "learning_rate": 4.83705739838221e-05, "loss": 0.7318, "step": 29000 }, { "epoch": 0.16, "learning_rate": 4.836495275355965e-05, "loss": 0.7371, "step": 29100 }, { "epoch": 0.16, "learning_rate": 4.835933152329719e-05, "loss": 0.7123, "step": 29200 }, { "epoch": 0.16, "learning_rate": 4.835371029303474e-05, "loss": 0.7162, "step": 29300 }, { "epoch": 0.17, "learning_rate": 4.834808906277228e-05, "loss": 0.7219, "step": 29400 }, { "epoch": 0.17, "learning_rate": 4.8342467832509825e-05, "loss": 0.7159, "step": 29500 }, { "epoch": 0.17, "learning_rate": 4.833684660224737e-05, "loss": 0.7184, "step": 29600 }, { "epoch": 0.17, "learning_rate": 4.833122537198492e-05, "loss": 0.7137, "step": 29700 }, { "epoch": 0.17, "learning_rate": 4.832560414172246e-05, "loss": 0.7229, "step": 29800 }, { "epoch": 0.17, "learning_rate": 4.8319982911460003e-05, "loss": 0.7195, "step": 29900 }, { "epoch": 0.17, "learning_rate": 4.831436168119755e-05, "loss": 0.6938, "step": 30000 }, { "epoch": 0.17, "learning_rate": 4.830874045093509e-05, "loss": 0.7061, "step": 30100 }, { "epoch": 0.17, "learning_rate": 4.8303119220672636e-05, "loss": 0.7044, "step": 30200 }, { "epoch": 0.17, "learning_rate": 4.829749799041018e-05, "loss": 0.7225, "step": 30300 }, { "epoch": 0.17, "learning_rate": 4.829187676014773e-05, "loss": 0.7052, "step": 30400 }, { "epoch": 0.17, "learning_rate": 4.8286311742187896e-05, "loss": 0.7071, "step": 30500 }, { "epoch": 0.17, "learning_rate": 4.828069051192544e-05, "loss": 0.6939, "step": 30600 }, { "epoch": 0.17, "learning_rate": 4.827506928166299e-05, "loss": 0.7071, "step": 30700 }, { "epoch": 0.17, "learning_rate": 4.8269448051400535e-05, "loss": 0.6979, "step": 30800 }, { "epoch": 0.17, "learning_rate": 4.8263826821138074e-05, "loss": 0.6989, "step": 30900 }, { "epoch": 0.17, "learning_rate": 4.825820559087562e-05, "loss": 0.7005, "step": 31000 }, { "epoch": 0.17, "learning_rate": 4.8252584360613167e-05, "loss": 0.7155, "step": 31100 }, { "epoch": 0.18, "learning_rate": 4.8246963130350706e-05, "loss": 0.6853, "step": 31200 }, { "epoch": 0.18, "learning_rate": 4.824134190008825e-05, "loss": 0.6985, "step": 31300 }, { "epoch": 0.18, "learning_rate": 4.82357206698258e-05, "loss": 0.6883, "step": 31400 }, { "epoch": 0.18, "learning_rate": 4.8230099439563345e-05, "loss": 0.6888, "step": 31500 }, { "epoch": 0.18, "learning_rate": 4.822447820930089e-05, "loss": 0.6849, "step": 31600 }, { "epoch": 0.18, "learning_rate": 4.821885697903844e-05, "loss": 0.6787, "step": 31700 }, { "epoch": 0.18, "learning_rate": 4.8213235748775984e-05, "loss": 0.6801, "step": 31800 }, { "epoch": 0.18, "learning_rate": 4.820761451851352e-05, "loss": 0.6794, "step": 31900 }, { "epoch": 0.18, "learning_rate": 4.820199328825107e-05, "loss": 0.6825, "step": 32000 }, { "epoch": 0.18, "learning_rate": 4.8196372057988616e-05, "loss": 0.6663, "step": 32100 }, { "epoch": 0.18, "learning_rate": 4.8190807040028783e-05, "loss": 0.6741, "step": 32200 }, { "epoch": 0.18, "learning_rate": 4.818518580976632e-05, "loss": 0.6689, "step": 32300 }, { "epoch": 0.18, "learning_rate": 4.817956457950387e-05, "loss": 0.6635, "step": 32400 }, { "epoch": 0.18, "learning_rate": 4.8173943349241416e-05, "loss": 0.6652, "step": 32500 }, { "epoch": 0.18, "learning_rate": 4.816832211897896e-05, "loss": 0.6667, "step": 32600 }, { "epoch": 0.18, "learning_rate": 4.816270088871651e-05, "loss": 0.6679, "step": 32700 }, { "epoch": 0.18, "learning_rate": 4.8157079658454054e-05, "loss": 0.6754, "step": 32800 }, { "epoch": 0.18, "learning_rate": 4.81514584281916e-05, "loss": 0.6697, "step": 32900 }, { "epoch": 0.19, "learning_rate": 4.814583719792914e-05, "loss": 0.6604, "step": 33000 }, { "epoch": 0.19, "learning_rate": 4.8140215967666686e-05, "loss": 0.6722, "step": 33100 }, { "epoch": 0.19, "learning_rate": 4.813459473740423e-05, "loss": 0.6607, "step": 33200 }, { "epoch": 0.19, "learning_rate": 4.812897350714178e-05, "loss": 0.6645, "step": 33300 }, { "epoch": 0.19, "learning_rate": 4.812335227687932e-05, "loss": 0.6651, "step": 33400 }, { "epoch": 0.19, "learning_rate": 4.8117731046616865e-05, "loss": 0.6565, "step": 33500 }, { "epoch": 0.19, "learning_rate": 4.811210981635441e-05, "loss": 0.6602, "step": 33600 }, { "epoch": 0.19, "learning_rate": 4.810648858609195e-05, "loss": 0.6573, "step": 33700 }, { "epoch": 0.19, "learning_rate": 4.81008673558295e-05, "loss": 0.646, "step": 33800 }, { "epoch": 0.19, "learning_rate": 4.809524612556704e-05, "loss": 0.657, "step": 33900 }, { "epoch": 0.19, "learning_rate": 4.808962489530459e-05, "loss": 0.65, "step": 34000 }, { "epoch": 0.19, "learning_rate": 4.808400366504213e-05, "loss": 0.643, "step": 34100 }, { "epoch": 0.19, "learning_rate": 4.807838243477968e-05, "loss": 0.6582, "step": 34200 }, { "epoch": 0.19, "learning_rate": 4.807276120451723e-05, "loss": 0.6506, "step": 34300 }, { "epoch": 0.19, "learning_rate": 4.806713997425477e-05, "loss": 0.6557, "step": 34400 }, { "epoch": 0.19, "learning_rate": 4.8061518743992314e-05, "loss": 0.6447, "step": 34500 }, { "epoch": 0.19, "learning_rate": 4.805589751372986e-05, "loss": 0.6404, "step": 34600 }, { "epoch": 0.2, "learning_rate": 4.8050276283467407e-05, "loss": 0.6528, "step": 34700 }, { "epoch": 0.2, "learning_rate": 4.8044655053204946e-05, "loss": 0.6402, "step": 34800 }, { "epoch": 0.2, "learning_rate": 4.803903382294249e-05, "loss": 0.6516, "step": 34900 }, { "epoch": 0.2, "learning_rate": 4.803341259268004e-05, "loss": 0.6389, "step": 35000 }, { "epoch": 0.2, "learning_rate": 4.802779136241758e-05, "loss": 0.6196, "step": 35100 }, { "epoch": 0.2, "learning_rate": 4.8022170132155124e-05, "loss": 0.645, "step": 35200 }, { "epoch": 0.2, "learning_rate": 4.801654890189267e-05, "loss": 0.6388, "step": 35300 }, { "epoch": 0.2, "learning_rate": 4.801092767163021e-05, "loss": 0.6307, "step": 35400 }, { "epoch": 0.2, "learning_rate": 4.8005306441367756e-05, "loss": 0.6394, "step": 35500 }, { "epoch": 0.2, "learning_rate": 4.79996852111053e-05, "loss": 0.6264, "step": 35600 }, { "epoch": 0.2, "learning_rate": 4.799406398084285e-05, "loss": 0.638, "step": 35700 }, { "epoch": 0.2, "learning_rate": 4.7988442750580395e-05, "loss": 0.6363, "step": 35800 }, { "epoch": 0.2, "learning_rate": 4.798282152031794e-05, "loss": 0.6343, "step": 35900 }, { "epoch": 0.2, "learning_rate": 4.797720029005549e-05, "loss": 0.6424, "step": 36000 }, { "epoch": 0.2, "learning_rate": 4.797157905979303e-05, "loss": 0.6313, "step": 36100 }, { "epoch": 0.2, "learning_rate": 4.7965957829530574e-05, "loss": 0.6223, "step": 36200 }, { "epoch": 0.2, "learning_rate": 4.796033659926812e-05, "loss": 0.6152, "step": 36300 }, { "epoch": 0.2, "learning_rate": 4.7954715369005666e-05, "loss": 0.6199, "step": 36400 }, { "epoch": 0.21, "learning_rate": 4.7949094138743206e-05, "loss": 0.618, "step": 36500 }, { "epoch": 0.21, "learning_rate": 4.794347290848075e-05, "loss": 0.6154, "step": 36600 }, { "epoch": 0.21, "learning_rate": 4.79378516782183e-05, "loss": 0.624, "step": 36700 }, { "epoch": 0.21, "learning_rate": 4.793223044795584e-05, "loss": 0.6217, "step": 36800 }, { "epoch": 0.21, "learning_rate": 4.7926609217693384e-05, "loss": 0.6219, "step": 36900 }, { "epoch": 0.21, "learning_rate": 4.792098798743093e-05, "loss": 0.6203, "step": 37000 }, { "epoch": 0.21, "learning_rate": 4.7915366757168476e-05, "loss": 0.6171, "step": 37100 }, { "epoch": 0.21, "learning_rate": 4.7909745526906016e-05, "loss": 0.6213, "step": 37200 }, { "epoch": 0.21, "learning_rate": 4.790412429664356e-05, "loss": 0.606, "step": 37300 }, { "epoch": 0.21, "learning_rate": 4.789850306638111e-05, "loss": 0.6017, "step": 37400 }, { "epoch": 0.21, "learning_rate": 4.7892881836118655e-05, "loss": 0.6235, "step": 37500 }, { "epoch": 0.21, "learning_rate": 4.78872606058562e-05, "loss": 0.6087, "step": 37600 }, { "epoch": 0.21, "learning_rate": 4.788163937559375e-05, "loss": 0.6125, "step": 37700 }, { "epoch": 0.21, "learning_rate": 4.7876018145331294e-05, "loss": 0.6126, "step": 37800 }, { "epoch": 0.21, "learning_rate": 4.787039691506883e-05, "loss": 0.5987, "step": 37900 }, { "epoch": 0.21, "learning_rate": 4.786477568480638e-05, "loss": 0.6213, "step": 38000 }, { "epoch": 0.21, "learning_rate": 4.7859154454543926e-05, "loss": 0.614, "step": 38100 }, { "epoch": 0.21, "learning_rate": 4.7853533224281465e-05, "loss": 0.5892, "step": 38200 }, { "epoch": 0.22, "learning_rate": 4.784791199401901e-05, "loss": 0.6065, "step": 38300 }, { "epoch": 0.22, "learning_rate": 4.784229076375656e-05, "loss": 0.5908, "step": 38400 }, { "epoch": 0.22, "learning_rate": 4.7836669533494104e-05, "loss": 0.5908, "step": 38500 }, { "epoch": 0.22, "learning_rate": 4.7831048303231644e-05, "loss": 0.5955, "step": 38600 }, { "epoch": 0.22, "learning_rate": 4.782542707296919e-05, "loss": 0.5827, "step": 38700 }, { "epoch": 0.22, "learning_rate": 4.7819805842706736e-05, "loss": 0.5941, "step": 38800 }, { "epoch": 0.22, "learning_rate": 4.781418461244428e-05, "loss": 0.6, "step": 38900 }, { "epoch": 0.22, "learning_rate": 4.780856338218183e-05, "loss": 0.6139, "step": 39000 }, { "epoch": 0.22, "learning_rate": 4.7802942151919375e-05, "loss": 0.5912, "step": 39100 }, { "epoch": 0.22, "learning_rate": 4.779732092165692e-05, "loss": 0.5866, "step": 39200 }, { "epoch": 0.22, "learning_rate": 4.779169969139446e-05, "loss": 0.5968, "step": 39300 }, { "epoch": 0.22, "learning_rate": 4.778607846113201e-05, "loss": 0.5908, "step": 39400 }, { "epoch": 0.22, "learning_rate": 4.778045723086955e-05, "loss": 0.5863, "step": 39500 }, { "epoch": 0.22, "learning_rate": 4.777483600060709e-05, "loss": 0.5903, "step": 39600 }, { "epoch": 0.22, "learning_rate": 4.776921477034464e-05, "loss": 0.5915, "step": 39700 }, { "epoch": 0.22, "learning_rate": 4.776364975238481e-05, "loss": 0.5993, "step": 39800 }, { "epoch": 0.22, "learning_rate": 4.775802852212235e-05, "loss": 0.5862, "step": 39900 }, { "epoch": 0.22, "learning_rate": 4.77524072918599e-05, "loss": 0.5937, "step": 40000 }, { "epoch": 0.23, "learning_rate": 4.7746786061597446e-05, "loss": 0.5916, "step": 40100 }, { "epoch": 0.23, "learning_rate": 4.774116483133499e-05, "loss": 0.588, "step": 40200 }, { "epoch": 0.23, "learning_rate": 4.773554360107254e-05, "loss": 0.5916, "step": 40300 }, { "epoch": 0.23, "learning_rate": 4.77299785831127e-05, "loss": 0.5878, "step": 40400 }, { "epoch": 0.23, "learning_rate": 4.7724357352850245e-05, "loss": 0.5806, "step": 40500 }, { "epoch": 0.23, "learning_rate": 4.771873612258779e-05, "loss": 0.5764, "step": 40600 }, { "epoch": 0.23, "learning_rate": 4.771311489232534e-05, "loss": 0.5806, "step": 40700 }, { "epoch": 0.23, "learning_rate": 4.770749366206288e-05, "loss": 0.5707, "step": 40800 }, { "epoch": 0.23, "learning_rate": 4.7701872431800424e-05, "loss": 0.5792, "step": 40900 }, { "epoch": 0.23, "learning_rate": 4.769625120153797e-05, "loss": 0.5717, "step": 41000 }, { "epoch": 0.23, "learning_rate": 4.7690629971275516e-05, "loss": 0.5671, "step": 41100 }, { "epoch": 0.23, "learning_rate": 4.768500874101306e-05, "loss": 0.5795, "step": 41200 }, { "epoch": 0.23, "learning_rate": 4.767938751075061e-05, "loss": 0.5838, "step": 41300 }, { "epoch": 0.23, "learning_rate": 4.7673766280488155e-05, "loss": 0.5716, "step": 41400 }, { "epoch": 0.23, "learning_rate": 4.7668145050225694e-05, "loss": 0.5705, "step": 41500 }, { "epoch": 0.23, "learning_rate": 4.766252381996324e-05, "loss": 0.5736, "step": 41600 }, { "epoch": 0.23, "learning_rate": 4.765690258970079e-05, "loss": 0.5746, "step": 41700 }, { "epoch": 0.23, "learning_rate": 4.7651281359438326e-05, "loss": 0.5777, "step": 41800 }, { "epoch": 0.24, "learning_rate": 4.764566012917587e-05, "loss": 0.5647, "step": 41900 }, { "epoch": 0.24, "learning_rate": 4.764003889891342e-05, "loss": 0.5699, "step": 42000 }, { "epoch": 0.24, "learning_rate": 4.7634417668650965e-05, "loss": 0.5739, "step": 42100 }, { "epoch": 0.24, "learning_rate": 4.7628796438388505e-05, "loss": 0.5734, "step": 42200 }, { "epoch": 0.24, "learning_rate": 4.762317520812605e-05, "loss": 0.5632, "step": 42300 }, { "epoch": 0.24, "learning_rate": 4.76175539778636e-05, "loss": 0.5627, "step": 42400 }, { "epoch": 0.24, "learning_rate": 4.761193274760114e-05, "loss": 0.5612, "step": 42500 }, { "epoch": 0.24, "learning_rate": 4.760631151733868e-05, "loss": 0.5699, "step": 42600 }, { "epoch": 0.24, "learning_rate": 4.760069028707623e-05, "loss": 0.5696, "step": 42700 }, { "epoch": 0.24, "learning_rate": 4.759506905681378e-05, "loss": 0.5643, "step": 42800 }, { "epoch": 0.24, "learning_rate": 4.758944782655132e-05, "loss": 0.5644, "step": 42900 }, { "epoch": 0.24, "learning_rate": 4.758382659628887e-05, "loss": 0.5591, "step": 43000 }, { "epoch": 0.24, "learning_rate": 4.7578205366026415e-05, "loss": 0.5659, "step": 43100 }, { "epoch": 0.24, "learning_rate": 4.7572584135763954e-05, "loss": 0.5691, "step": 43200 }, { "epoch": 0.24, "learning_rate": 4.75669629055015e-05, "loss": 0.5496, "step": 43300 }, { "epoch": 0.24, "learning_rate": 4.7561341675239047e-05, "loss": 0.5467, "step": 43400 }, { "epoch": 0.24, "learning_rate": 4.755572044497659e-05, "loss": 0.5464, "step": 43500 }, { "epoch": 0.25, "learning_rate": 4.755009921471413e-05, "loss": 0.5653, "step": 43600 }, { "epoch": 0.25, "learning_rate": 4.754447798445168e-05, "loss": 0.5556, "step": 43700 }, { "epoch": 0.25, "learning_rate": 4.7538856754189225e-05, "loss": 0.5611, "step": 43800 }, { "epoch": 0.25, "learning_rate": 4.7533235523926764e-05, "loss": 0.5461, "step": 43900 }, { "epoch": 0.25, "learning_rate": 4.752761429366431e-05, "loss": 0.5605, "step": 44000 }, { "epoch": 0.25, "learning_rate": 4.752199306340186e-05, "loss": 0.5608, "step": 44100 }, { "epoch": 0.25, "learning_rate": 4.75163718331394e-05, "loss": 0.5509, "step": 44200 }, { "epoch": 0.25, "learning_rate": 4.751075060287695e-05, "loss": 0.5537, "step": 44300 }, { "epoch": 0.25, "learning_rate": 4.7505129372614496e-05, "loss": 0.5621, "step": 44400 }, { "epoch": 0.25, "learning_rate": 4.749950814235204e-05, "loss": 0.5529, "step": 44500 }, { "epoch": 0.25, "learning_rate": 4.749388691208958e-05, "loss": 0.5398, "step": 44600 }, { "epoch": 0.25, "learning_rate": 4.748826568182713e-05, "loss": 0.5467, "step": 44700 }, { "epoch": 0.25, "learning_rate": 4.7482644451564674e-05, "loss": 0.5558, "step": 44800 }, { "epoch": 0.25, "learning_rate": 4.747702322130222e-05, "loss": 0.5417, "step": 44900 }, { "epoch": 0.25, "learning_rate": 4.747140199103976e-05, "loss": 0.5424, "step": 45000 }, { "epoch": 0.25, "learning_rate": 4.7465780760777306e-05, "loss": 0.5427, "step": 45100 }, { "epoch": 0.25, "learning_rate": 4.7460215742817474e-05, "loss": 0.5467, "step": 45200 }, { "epoch": 0.25, "learning_rate": 4.745459451255502e-05, "loss": 0.5575, "step": 45300 }, { "epoch": 0.26, "learning_rate": 4.7448973282292566e-05, "loss": 0.5457, "step": 45400 }, { "epoch": 0.26, "learning_rate": 4.744335205203011e-05, "loss": 0.547, "step": 45500 }, { "epoch": 0.26, "learning_rate": 4.743773082176766e-05, "loss": 0.5439, "step": 45600 }, { "epoch": 0.26, "learning_rate": 4.743216580380782e-05, "loss": 0.5453, "step": 45700 }, { "epoch": 0.26, "learning_rate": 4.7426544573545366e-05, "loss": 0.5481, "step": 45800 }, { "epoch": 0.26, "learning_rate": 4.742092334328291e-05, "loss": 0.5355, "step": 45900 }, { "epoch": 0.26, "learning_rate": 4.741530211302046e-05, "loss": 0.5395, "step": 46000 }, { "epoch": 0.26, "learning_rate": 4.7409680882758e-05, "loss": 0.5314, "step": 46100 }, { "epoch": 0.26, "learning_rate": 4.7404059652495544e-05, "loss": 0.5525, "step": 46200 }, { "epoch": 0.26, "learning_rate": 4.739843842223309e-05, "loss": 0.5336, "step": 46300 }, { "epoch": 0.26, "learning_rate": 4.7392873404273265e-05, "loss": 0.5406, "step": 46400 }, { "epoch": 0.26, "learning_rate": 4.7387252174010805e-05, "loss": 0.5479, "step": 46500 }, { "epoch": 0.26, "learning_rate": 4.738163094374835e-05, "loss": 0.5409, "step": 46600 }, { "epoch": 0.26, "learning_rate": 4.73760097134859e-05, "loss": 0.5355, "step": 46700 }, { "epoch": 0.26, "learning_rate": 4.737038848322344e-05, "loss": 0.5427, "step": 46800 }, { "epoch": 0.26, "learning_rate": 4.736476725296098e-05, "loss": 0.5488, "step": 46900 }, { "epoch": 0.26, "learning_rate": 4.735914602269853e-05, "loss": 0.5335, "step": 47000 }, { "epoch": 0.26, "learning_rate": 4.7353524792436075e-05, "loss": 0.5394, "step": 47100 }, { "epoch": 0.27, "learning_rate": 4.7347903562173615e-05, "loss": 0.5269, "step": 47200 }, { "epoch": 0.27, "learning_rate": 4.734228233191116e-05, "loss": 0.5407, "step": 47300 }, { "epoch": 0.27, "learning_rate": 4.733666110164871e-05, "loss": 0.5285, "step": 47400 }, { "epoch": 0.27, "learning_rate": 4.7331039871386254e-05, "loss": 0.5359, "step": 47500 }, { "epoch": 0.27, "learning_rate": 4.73254186411238e-05, "loss": 0.5205, "step": 47600 }, { "epoch": 0.27, "learning_rate": 4.7319797410861346e-05, "loss": 0.5269, "step": 47700 }, { "epoch": 0.27, "learning_rate": 4.731417618059889e-05, "loss": 0.5353, "step": 47800 }, { "epoch": 0.27, "learning_rate": 4.730855495033643e-05, "loss": 0.5351, "step": 47900 }, { "epoch": 0.27, "learning_rate": 4.730293372007398e-05, "loss": 0.5202, "step": 48000 }, { "epoch": 0.27, "learning_rate": 4.7297312489811525e-05, "loss": 0.5287, "step": 48100 }, { "epoch": 0.27, "learning_rate": 4.7291691259549064e-05, "loss": 0.5241, "step": 48200 }, { "epoch": 0.27, "learning_rate": 4.728607002928661e-05, "loss": 0.5315, "step": 48300 }, { "epoch": 0.27, "learning_rate": 4.728044879902416e-05, "loss": 0.5169, "step": 48400 }, { "epoch": 0.27, "learning_rate": 4.72748275687617e-05, "loss": 0.5283, "step": 48500 }, { "epoch": 0.27, "learning_rate": 4.726920633849924e-05, "loss": 0.5113, "step": 48600 }, { "epoch": 0.27, "learning_rate": 4.726358510823679e-05, "loss": 0.531, "step": 48700 }, { "epoch": 0.27, "learning_rate": 4.7257963877974335e-05, "loss": 0.5341, "step": 48800 }, { "epoch": 0.27, "learning_rate": 4.725234264771188e-05, "loss": 0.5159, "step": 48900 }, { "epoch": 0.28, "learning_rate": 4.724672141744943e-05, "loss": 0.5132, "step": 49000 }, { "epoch": 0.28, "learning_rate": 4.7241100187186974e-05, "loss": 0.5142, "step": 49100 }, { "epoch": 0.28, "learning_rate": 4.723547895692452e-05, "loss": 0.5249, "step": 49200 }, { "epoch": 0.28, "learning_rate": 4.722985772666206e-05, "loss": 0.5125, "step": 49300 }, { "epoch": 0.28, "learning_rate": 4.7224236496399606e-05, "loss": 0.5165, "step": 49400 }, { "epoch": 0.28, "learning_rate": 4.721861526613715e-05, "loss": 0.5082, "step": 49500 }, { "epoch": 0.28, "learning_rate": 4.721299403587469e-05, "loss": 0.5204, "step": 49600 }, { "epoch": 0.28, "learning_rate": 4.720737280561224e-05, "loss": 0.5191, "step": 49700 }, { "epoch": 0.28, "learning_rate": 4.7201751575349784e-05, "loss": 0.516, "step": 49800 }, { "epoch": 0.28, "learning_rate": 4.719613034508733e-05, "loss": 0.5196, "step": 49900 }, { "epoch": 0.28, "learning_rate": 4.719050911482487e-05, "loss": 0.5218, "step": 50000 }, { "epoch": 0.28, "learning_rate": 4.7184887884562416e-05, "loss": 0.5144, "step": 50100 }, { "epoch": 0.28, "learning_rate": 4.717926665429996e-05, "loss": 0.5042, "step": 50200 }, { "epoch": 0.28, "learning_rate": 4.71736454240375e-05, "loss": 0.4984, "step": 50300 }, { "epoch": 0.28, "learning_rate": 4.716802419377505e-05, "loss": 0.523, "step": 50400 }, { "epoch": 0.28, "learning_rate": 4.7162402963512595e-05, "loss": 0.5104, "step": 50500 }, { "epoch": 0.28, "learning_rate": 4.715683794555277e-05, "loss": 0.5157, "step": 50600 }, { "epoch": 0.28, "learning_rate": 4.715121671529031e-05, "loss": 0.5152, "step": 50700 }, { "epoch": 0.29, "learning_rate": 4.7145595485027855e-05, "loss": 0.5171, "step": 50800 }, { "epoch": 0.29, "learning_rate": 4.71399742547654e-05, "loss": 0.5099, "step": 50900 }, { "epoch": 0.29, "learning_rate": 4.713435302450295e-05, "loss": 0.5175, "step": 51000 }, { "epoch": 0.29, "learning_rate": 4.712873179424049e-05, "loss": 0.5084, "step": 51100 }, { "epoch": 0.29, "learning_rate": 4.712311056397803e-05, "loss": 0.5163, "step": 51200 }, { "epoch": 0.29, "learning_rate": 4.711748933371558e-05, "loss": 0.5148, "step": 51300 }, { "epoch": 0.29, "learning_rate": 4.711186810345312e-05, "loss": 0.5055, "step": 51400 }, { "epoch": 0.29, "learning_rate": 4.710630308549329e-05, "loss": 0.5077, "step": 51500 }, { "epoch": 0.29, "learning_rate": 4.710068185523084e-05, "loss": 0.5028, "step": 51600 }, { "epoch": 0.29, "learning_rate": 4.7095060624968386e-05, "loss": 0.4996, "step": 51700 }, { "epoch": 0.29, "learning_rate": 4.7089439394705925e-05, "loss": 0.4939, "step": 51800 }, { "epoch": 0.29, "learning_rate": 4.708381816444347e-05, "loss": 0.4991, "step": 51900 }, { "epoch": 0.29, "learning_rate": 4.707819693418102e-05, "loss": 0.4987, "step": 52000 }, { "epoch": 0.29, "learning_rate": 4.7072575703918564e-05, "loss": 0.5071, "step": 52100 }, { "epoch": 0.29, "learning_rate": 4.7066954473656104e-05, "loss": 0.5012, "step": 52200 }, { "epoch": 0.29, "learning_rate": 4.706133324339365e-05, "loss": 0.5066, "step": 52300 }, { "epoch": 0.29, "learning_rate": 4.7055712013131196e-05, "loss": 0.51, "step": 52400 }, { "epoch": 0.3, "learning_rate": 4.7050090782868736e-05, "loss": 0.5056, "step": 52500 }, { "epoch": 0.3, "learning_rate": 4.704446955260628e-05, "loss": 0.5013, "step": 52600 }, { "epoch": 0.3, "learning_rate": 4.703884832234383e-05, "loss": 0.4858, "step": 52700 }, { "epoch": 0.3, "learning_rate": 4.7033227092081375e-05, "loss": 0.4944, "step": 52800 }, { "epoch": 0.3, "learning_rate": 4.702760586181892e-05, "loss": 0.4991, "step": 52900 }, { "epoch": 0.3, "learning_rate": 4.702198463155647e-05, "loss": 0.4931, "step": 53000 }, { "epoch": 0.3, "learning_rate": 4.7016363401294013e-05, "loss": 0.4889, "step": 53100 }, { "epoch": 0.3, "learning_rate": 4.701074217103155e-05, "loss": 0.491, "step": 53200 }, { "epoch": 0.3, "learning_rate": 4.70051209407691e-05, "loss": 0.4891, "step": 53300 }, { "epoch": 0.3, "learning_rate": 4.6999499710506646e-05, "loss": 0.4994, "step": 53400 }, { "epoch": 0.3, "learning_rate": 4.699387848024419e-05, "loss": 0.4999, "step": 53500 }, { "epoch": 0.3, "learning_rate": 4.698825724998173e-05, "loss": 0.495, "step": 53600 }, { "epoch": 0.3, "learning_rate": 4.698263601971928e-05, "loss": 0.4947, "step": 53700 }, { "epoch": 0.3, "learning_rate": 4.6977014789456824e-05, "loss": 0.4921, "step": 53800 }, { "epoch": 0.3, "learning_rate": 4.697139355919436e-05, "loss": 0.492, "step": 53900 }, { "epoch": 0.3, "learning_rate": 4.696577232893191e-05, "loss": 0.4883, "step": 54000 }, { "epoch": 0.3, "learning_rate": 4.6960151098669456e-05, "loss": 0.4949, "step": 54100 }, { "epoch": 0.3, "learning_rate": 4.6954529868407e-05, "loss": 0.4899, "step": 54200 }, { "epoch": 0.31, "learning_rate": 4.694890863814455e-05, "loss": 0.4936, "step": 54300 }, { "epoch": 0.31, "learning_rate": 4.6943287407882095e-05, "loss": 0.4996, "step": 54400 }, { "epoch": 0.31, "learning_rate": 4.693766617761964e-05, "loss": 0.4875, "step": 54500 }, { "epoch": 0.31, "learning_rate": 4.693204494735718e-05, "loss": 0.488, "step": 54600 }, { "epoch": 0.31, "learning_rate": 4.692642371709473e-05, "loss": 0.4932, "step": 54700 }, { "epoch": 0.31, "learning_rate": 4.6920858699134894e-05, "loss": 0.4859, "step": 54800 }, { "epoch": 0.31, "learning_rate": 4.691523746887244e-05, "loss": 0.4888, "step": 54900 }, { "epoch": 0.31, "learning_rate": 4.690961623860998e-05, "loss": 0.5022, "step": 55000 }, { "epoch": 0.31, "learning_rate": 4.6903995008347526e-05, "loss": 0.4857, "step": 55100 }, { "epoch": 0.31, "learning_rate": 4.689837377808507e-05, "loss": 0.4905, "step": 55200 }, { "epoch": 0.31, "learning_rate": 4.689275254782262e-05, "loss": 0.4887, "step": 55300 }, { "epoch": 0.31, "learning_rate": 4.6887131317560165e-05, "loss": 0.4915, "step": 55400 }, { "epoch": 0.31, "learning_rate": 4.688151008729771e-05, "loss": 0.4805, "step": 55500 }, { "epoch": 0.31, "learning_rate": 4.687588885703526e-05, "loss": 0.4791, "step": 55600 }, { "epoch": 0.31, "learning_rate": 4.68702676267728e-05, "loss": 0.4871, "step": 55700 }, { "epoch": 0.31, "learning_rate": 4.6864646396510344e-05, "loss": 0.4924, "step": 55800 }, { "epoch": 0.31, "learning_rate": 4.685902516624789e-05, "loss": 0.493, "step": 55900 }, { "epoch": 0.31, "learning_rate": 4.685340393598543e-05, "loss": 0.4799, "step": 56000 }, { "epoch": 0.32, "learning_rate": 4.6847782705722976e-05, "loss": 0.4861, "step": 56100 }, { "epoch": 0.32, "learning_rate": 4.684216147546052e-05, "loss": 0.483, "step": 56200 }, { "epoch": 0.32, "learning_rate": 4.683654024519807e-05, "loss": 0.4819, "step": 56300 }, { "epoch": 0.32, "learning_rate": 4.683091901493561e-05, "loss": 0.4866, "step": 56400 }, { "epoch": 0.32, "learning_rate": 4.6825297784673154e-05, "loss": 0.4866, "step": 56500 }, { "epoch": 0.32, "learning_rate": 4.68196765544107e-05, "loss": 0.4952, "step": 56600 }, { "epoch": 0.32, "learning_rate": 4.681405532414824e-05, "loss": 0.4756, "step": 56700 }, { "epoch": 0.32, "learning_rate": 4.6808434093885786e-05, "loss": 0.4787, "step": 56800 }, { "epoch": 0.32, "learning_rate": 4.680281286362333e-05, "loss": 0.475, "step": 56900 }, { "epoch": 0.32, "learning_rate": 4.679719163336088e-05, "loss": 0.4722, "step": 57000 }, { "epoch": 0.32, "learning_rate": 4.6791570403098425e-05, "loss": 0.4806, "step": 57100 }, { "epoch": 0.32, "learning_rate": 4.678594917283597e-05, "loss": 0.4799, "step": 57200 }, { "epoch": 0.32, "learning_rate": 4.678032794257352e-05, "loss": 0.4839, "step": 57300 }, { "epoch": 0.32, "learning_rate": 4.677470671231106e-05, "loss": 0.477, "step": 57400 }, { "epoch": 0.32, "learning_rate": 4.67690854820486e-05, "loss": 0.4852, "step": 57500 }, { "epoch": 0.32, "learning_rate": 4.676346425178615e-05, "loss": 0.4754, "step": 57600 }, { "epoch": 0.32, "learning_rate": 4.6757843021523696e-05, "loss": 0.4777, "step": 57700 }, { "epoch": 0.32, "learning_rate": 4.6752221791261235e-05, "loss": 0.4838, "step": 57800 }, { "epoch": 0.33, "learning_rate": 4.674660056099878e-05, "loss": 0.4802, "step": 57900 }, { "epoch": 0.33, "learning_rate": 4.674097933073633e-05, "loss": 0.4778, "step": 58000 }, { "epoch": 0.33, "learning_rate": 4.673535810047387e-05, "loss": 0.4722, "step": 58100 }, { "epoch": 0.33, "learning_rate": 4.6729736870211414e-05, "loss": 0.467, "step": 58200 }, { "epoch": 0.33, "learning_rate": 4.672411563994896e-05, "loss": 0.4714, "step": 58300 }, { "epoch": 0.33, "learning_rate": 4.6718494409686506e-05, "loss": 0.4773, "step": 58400 }, { "epoch": 0.33, "learning_rate": 4.671287317942405e-05, "loss": 0.4726, "step": 58500 }, { "epoch": 0.33, "learning_rate": 4.67072519491616e-05, "loss": 0.4723, "step": 58600 }, { "epoch": 0.33, "learning_rate": 4.6701630718899145e-05, "loss": 0.4675, "step": 58700 }, { "epoch": 0.33, "learning_rate": 4.6696009488636685e-05, "loss": 0.463, "step": 58800 }, { "epoch": 0.33, "learning_rate": 4.669038825837423e-05, "loss": 0.4659, "step": 58900 }, { "epoch": 0.33, "learning_rate": 4.668476702811178e-05, "loss": 0.4716, "step": 59000 }, { "epoch": 0.33, "learning_rate": 4.667914579784932e-05, "loss": 0.478, "step": 59100 }, { "epoch": 0.33, "learning_rate": 4.667352456758686e-05, "loss": 0.4893, "step": 59200 }, { "epoch": 0.33, "learning_rate": 4.666790333732441e-05, "loss": 0.4669, "step": 59300 }, { "epoch": 0.33, "learning_rate": 4.6662282107061955e-05, "loss": 0.4741, "step": 59400 }, { "epoch": 0.33, "learning_rate": 4.6656660876799495e-05, "loss": 0.4697, "step": 59500 }, { "epoch": 0.34, "learning_rate": 4.665103964653704e-05, "loss": 0.4627, "step": 59600 }, { "epoch": 0.34, "learning_rate": 4.664541841627459e-05, "loss": 0.4728, "step": 59700 }, { "epoch": 0.34, "learning_rate": 4.6639797186012134e-05, "loss": 0.462, "step": 59800 }, { "epoch": 0.34, "learning_rate": 4.663417595574967e-05, "loss": 0.4705, "step": 59900 }, { "epoch": 0.34, "learning_rate": 4.662855472548722e-05, "loss": 0.4764, "step": 60000 }, { "epoch": 0.34, "learning_rate": 4.6622933495224766e-05, "loss": 0.4716, "step": 60100 }, { "epoch": 0.34, "learning_rate": 4.661731226496231e-05, "loss": 0.4594, "step": 60200 }, { "epoch": 0.34, "learning_rate": 4.661169103469986e-05, "loss": 0.4697, "step": 60300 }, { "epoch": 0.34, "learning_rate": 4.6606069804437405e-05, "loss": 0.4805, "step": 60400 }, { "epoch": 0.34, "learning_rate": 4.660044857417495e-05, "loss": 0.4526, "step": 60500 }, { "epoch": 0.34, "learning_rate": 4.659482734391249e-05, "loss": 0.4647, "step": 60600 }, { "epoch": 0.34, "learning_rate": 4.658920611365004e-05, "loss": 0.4632, "step": 60700 }, { "epoch": 0.34, "learning_rate": 4.658358488338758e-05, "loss": 0.4716, "step": 60800 }, { "epoch": 0.34, "learning_rate": 4.657796365312512e-05, "loss": 0.4562, "step": 60900 }, { "epoch": 0.34, "learning_rate": 4.657234242286267e-05, "loss": 0.4647, "step": 61000 }, { "epoch": 0.34, "learning_rate": 4.6566721192600215e-05, "loss": 0.458, "step": 61100 }, { "epoch": 0.34, "learning_rate": 4.656109996233776e-05, "loss": 0.4512, "step": 61200 }, { "epoch": 0.34, "learning_rate": 4.65554787320753e-05, "loss": 0.4568, "step": 61300 }, { "epoch": 0.35, "learning_rate": 4.654985750181285e-05, "loss": 0.473, "step": 61400 }, { "epoch": 0.35, "learning_rate": 4.654423627155039e-05, "loss": 0.4656, "step": 61500 }, { "epoch": 0.35, "learning_rate": 4.653861504128794e-05, "loss": 0.4524, "step": 61600 }, { "epoch": 0.35, "learning_rate": 4.6532993811025486e-05, "loss": 0.4547, "step": 61700 }, { "epoch": 0.35, "learning_rate": 4.652737258076303e-05, "loss": 0.4603, "step": 61800 }, { "epoch": 0.35, "learning_rate": 4.652175135050058e-05, "loss": 0.4546, "step": 61900 }, { "epoch": 0.35, "learning_rate": 4.651613012023812e-05, "loss": 0.4588, "step": 62000 }, { "epoch": 0.35, "learning_rate": 4.6510508889975664e-05, "loss": 0.4516, "step": 62100 }, { "epoch": 0.35, "learning_rate": 4.650488765971321e-05, "loss": 0.4684, "step": 62200 }, { "epoch": 0.35, "learning_rate": 4.649926642945075e-05, "loss": 0.4686, "step": 62300 }, { "epoch": 0.35, "learning_rate": 4.649370141149092e-05, "loss": 0.4605, "step": 62400 }, { "epoch": 0.35, "learning_rate": 4.6488080181228464e-05, "loss": 0.4632, "step": 62500 }, { "epoch": 0.35, "learning_rate": 4.648251516326864e-05, "loss": 0.4625, "step": 62600 }, { "epoch": 0.35, "learning_rate": 4.6476893933006185e-05, "loss": 0.4693, "step": 62700 }, { "epoch": 0.35, "learning_rate": 4.6471272702743724e-05, "loss": 0.466, "step": 62800 }, { "epoch": 0.35, "learning_rate": 4.646565147248127e-05, "loss": 0.4563, "step": 62900 }, { "epoch": 0.35, "learning_rate": 4.646003024221882e-05, "loss": 0.4606, "step": 63000 }, { "epoch": 0.35, "learning_rate": 4.6454409011956356e-05, "loss": 0.4536, "step": 63100 }, { "epoch": 0.36, "learning_rate": 4.64487877816939e-05, "loss": 0.4657, "step": 63200 }, { "epoch": 0.36, "learning_rate": 4.644316655143145e-05, "loss": 0.4547, "step": 63300 }, { "epoch": 0.36, "learning_rate": 4.6437545321168995e-05, "loss": 0.4559, "step": 63400 }, { "epoch": 0.36, "learning_rate": 4.6431924090906534e-05, "loss": 0.4501, "step": 63500 }, { "epoch": 0.36, "learning_rate": 4.642630286064408e-05, "loss": 0.4473, "step": 63600 }, { "epoch": 0.36, "learning_rate": 4.642068163038163e-05, "loss": 0.4531, "step": 63700 }, { "epoch": 0.36, "learning_rate": 4.641506040011917e-05, "loss": 0.4516, "step": 63800 }, { "epoch": 0.36, "learning_rate": 4.640943916985672e-05, "loss": 0.4521, "step": 63900 }, { "epoch": 0.36, "learning_rate": 4.6403817939594266e-05, "loss": 0.4437, "step": 64000 }, { "epoch": 0.36, "learning_rate": 4.639819670933181e-05, "loss": 0.4515, "step": 64100 }, { "epoch": 0.36, "learning_rate": 4.639257547906935e-05, "loss": 0.4556, "step": 64200 }, { "epoch": 0.36, "learning_rate": 4.63869542488069e-05, "loss": 0.4506, "step": 64300 }, { "epoch": 0.36, "learning_rate": 4.6381333018544444e-05, "loss": 0.4499, "step": 64400 }, { "epoch": 0.36, "learning_rate": 4.6375711788281984e-05, "loss": 0.4517, "step": 64500 }, { "epoch": 0.36, "learning_rate": 4.637009055801953e-05, "loss": 0.4442, "step": 64600 }, { "epoch": 0.36, "learning_rate": 4.6364469327757076e-05, "loss": 0.4413, "step": 64700 }, { "epoch": 0.36, "learning_rate": 4.635884809749462e-05, "loss": 0.4487, "step": 64800 }, { "epoch": 0.36, "learning_rate": 4.635322686723216e-05, "loss": 0.4499, "step": 64900 }, { "epoch": 0.37, "learning_rate": 4.634760563696971e-05, "loss": 0.451, "step": 65000 }, { "epoch": 0.37, "learning_rate": 4.6341984406707255e-05, "loss": 0.4427, "step": 65100 }, { "epoch": 0.37, "learning_rate": 4.6336363176444794e-05, "loss": 0.4438, "step": 65200 }, { "epoch": 0.37, "learning_rate": 4.633074194618234e-05, "loss": 0.445, "step": 65300 }, { "epoch": 0.37, "learning_rate": 4.632512071591989e-05, "loss": 0.4445, "step": 65400 }, { "epoch": 0.37, "learning_rate": 4.631949948565743e-05, "loss": 0.4469, "step": 65500 }, { "epoch": 0.37, "learning_rate": 4.631387825539498e-05, "loss": 0.4627, "step": 65600 }, { "epoch": 0.37, "learning_rate": 4.6308257025132525e-05, "loss": 0.4422, "step": 65700 }, { "epoch": 0.37, "learning_rate": 4.630263579487007e-05, "loss": 0.4461, "step": 65800 }, { "epoch": 0.37, "learning_rate": 4.629701456460761e-05, "loss": 0.4455, "step": 65900 }, { "epoch": 0.37, "learning_rate": 4.629144954664778e-05, "loss": 0.4481, "step": 66000 }, { "epoch": 0.37, "learning_rate": 4.6285828316385325e-05, "loss": 0.4432, "step": 66100 }, { "epoch": 0.37, "learning_rate": 4.62802632984255e-05, "loss": 0.455, "step": 66200 }, { "epoch": 0.37, "learning_rate": 4.627464206816304e-05, "loss": 0.4409, "step": 66300 }, { "epoch": 0.37, "learning_rate": 4.6269020837900585e-05, "loss": 0.4439, "step": 66400 }, { "epoch": 0.37, "learning_rate": 4.626339960763813e-05, "loss": 0.4386, "step": 66500 }, { "epoch": 0.37, "learning_rate": 4.625777837737568e-05, "loss": 0.4552, "step": 66600 }, { "epoch": 0.37, "learning_rate": 4.625215714711322e-05, "loss": 0.4465, "step": 66700 }, { "epoch": 0.38, "learning_rate": 4.6246535916850764e-05, "loss": 0.4331, "step": 66800 }, { "epoch": 0.38, "learning_rate": 4.624091468658831e-05, "loss": 0.4369, "step": 66900 }, { "epoch": 0.38, "learning_rate": 4.623529345632585e-05, "loss": 0.4469, "step": 67000 }, { "epoch": 0.38, "learning_rate": 4.6229672226063396e-05, "loss": 0.4464, "step": 67100 }, { "epoch": 0.38, "learning_rate": 4.622405099580094e-05, "loss": 0.4334, "step": 67200 }, { "epoch": 0.38, "learning_rate": 4.621842976553849e-05, "loss": 0.4467, "step": 67300 }, { "epoch": 0.38, "learning_rate": 4.621280853527603e-05, "loss": 0.4427, "step": 67400 }, { "epoch": 0.38, "learning_rate": 4.6207187305013574e-05, "loss": 0.4348, "step": 67500 }, { "epoch": 0.38, "learning_rate": 4.620156607475112e-05, "loss": 0.4385, "step": 67600 }, { "epoch": 0.38, "learning_rate": 4.619594484448867e-05, "loss": 0.4439, "step": 67700 }, { "epoch": 0.38, "learning_rate": 4.619032361422621e-05, "loss": 0.4281, "step": 67800 }, { "epoch": 0.38, "learning_rate": 4.618470238396376e-05, "loss": 0.445, "step": 67900 }, { "epoch": 0.38, "learning_rate": 4.6179081153701305e-05, "loss": 0.4379, "step": 68000 }, { "epoch": 0.38, "learning_rate": 4.6173459923438845e-05, "loss": 0.4353, "step": 68100 }, { "epoch": 0.38, "learning_rate": 4.616783869317639e-05, "loss": 0.434, "step": 68200 }, { "epoch": 0.38, "learning_rate": 4.616221746291394e-05, "loss": 0.4357, "step": 68300 }, { "epoch": 0.38, "learning_rate": 4.615659623265148e-05, "loss": 0.438, "step": 68400 }, { "epoch": 0.39, "learning_rate": 4.615097500238902e-05, "loss": 0.434, "step": 68500 }, { "epoch": 0.39, "learning_rate": 4.614535377212657e-05, "loss": 0.4348, "step": 68600 }, { "epoch": 0.39, "learning_rate": 4.6139732541864116e-05, "loss": 0.4327, "step": 68700 }, { "epoch": 0.39, "learning_rate": 4.6134111311601655e-05, "loss": 0.4405, "step": 68800 }, { "epoch": 0.39, "learning_rate": 4.61284900813392e-05, "loss": 0.433, "step": 68900 }, { "epoch": 0.39, "learning_rate": 4.612286885107675e-05, "loss": 0.4385, "step": 69000 }, { "epoch": 0.39, "learning_rate": 4.6117247620814294e-05, "loss": 0.4342, "step": 69100 }, { "epoch": 0.39, "learning_rate": 4.611162639055184e-05, "loss": 0.4351, "step": 69200 }, { "epoch": 0.39, "learning_rate": 4.610600516028939e-05, "loss": 0.4251, "step": 69300 }, { "epoch": 0.39, "learning_rate": 4.610038393002693e-05, "loss": 0.4417, "step": 69400 }, { "epoch": 0.39, "learning_rate": 4.609476269976447e-05, "loss": 0.4244, "step": 69500 }, { "epoch": 0.39, "learning_rate": 4.608914146950202e-05, "loss": 0.4421, "step": 69600 }, { "epoch": 0.39, "learning_rate": 4.6083520239239565e-05, "loss": 0.4315, "step": 69700 }, { "epoch": 0.39, "learning_rate": 4.6077899008977105e-05, "loss": 0.4381, "step": 69800 }, { "epoch": 0.39, "learning_rate": 4.607227777871465e-05, "loss": 0.4286, "step": 69900 }, { "epoch": 0.39, "learning_rate": 4.60666565484522e-05, "loss": 0.4386, "step": 70000 }, { "epoch": 0.39, "learning_rate": 4.6061035318189743e-05, "loss": 0.4371, "step": 70100 }, { "epoch": 0.39, "learning_rate": 4.605541408792728e-05, "loss": 0.4308, "step": 70200 }, { "epoch": 0.4, "learning_rate": 4.604979285766483e-05, "loss": 0.4405, "step": 70300 }, { "epoch": 0.4, "learning_rate": 4.6044171627402375e-05, "loss": 0.4317, "step": 70400 }, { "epoch": 0.4, "learning_rate": 4.6038550397139915e-05, "loss": 0.4277, "step": 70500 }, { "epoch": 0.4, "learning_rate": 4.603292916687746e-05, "loss": 0.4246, "step": 70600 }, { "epoch": 0.4, "learning_rate": 4.602730793661501e-05, "loss": 0.4378, "step": 70700 }, { "epoch": 0.4, "learning_rate": 4.6021686706352554e-05, "loss": 0.4236, "step": 70800 }, { "epoch": 0.4, "learning_rate": 4.60160654760901e-05, "loss": 0.4345, "step": 70900 }, { "epoch": 0.4, "learning_rate": 4.6010444245827646e-05, "loss": 0.4259, "step": 71000 }, { "epoch": 0.4, "learning_rate": 4.600482301556519e-05, "loss": 0.4212, "step": 71100 }, { "epoch": 0.4, "learning_rate": 4.599920178530273e-05, "loss": 0.4217, "step": 71200 }, { "epoch": 0.4, "learning_rate": 4.59936367673429e-05, "loss": 0.4275, "step": 71300 }, { "epoch": 0.4, "learning_rate": 4.5988015537080446e-05, "loss": 0.4295, "step": 71400 }, { "epoch": 0.4, "learning_rate": 4.598239430681799e-05, "loss": 0.438, "step": 71500 }, { "epoch": 0.4, "learning_rate": 4.597677307655553e-05, "loss": 0.4344, "step": 71600 }, { "epoch": 0.4, "learning_rate": 4.597115184629308e-05, "loss": 0.4427, "step": 71700 }, { "epoch": 0.4, "learning_rate": 4.5965530616030624e-05, "loss": 0.4231, "step": 71800 }, { "epoch": 0.4, "learning_rate": 4.595990938576817e-05, "loss": 0.4262, "step": 71900 }, { "epoch": 0.4, "learning_rate": 4.595428815550572e-05, "loss": 0.4209, "step": 72000 }, { "epoch": 0.41, "learning_rate": 4.594866692524326e-05, "loss": 0.418, "step": 72100 }, { "epoch": 0.41, "learning_rate": 4.594304569498081e-05, "loss": 0.4331, "step": 72200 }, { "epoch": 0.41, "learning_rate": 4.593742446471835e-05, "loss": 0.4315, "step": 72300 }, { "epoch": 0.41, "learning_rate": 4.5931803234455895e-05, "loss": 0.4144, "step": 72400 }, { "epoch": 0.41, "learning_rate": 4.592618200419344e-05, "loss": 0.4295, "step": 72500 }, { "epoch": 0.41, "learning_rate": 4.592056077393099e-05, "loss": 0.4257, "step": 72600 }, { "epoch": 0.41, "learning_rate": 4.591493954366853e-05, "loss": 0.4214, "step": 72700 }, { "epoch": 0.41, "learning_rate": 4.5909318313406074e-05, "loss": 0.4137, "step": 72800 }, { "epoch": 0.41, "learning_rate": 4.590369708314362e-05, "loss": 0.4205, "step": 72900 }, { "epoch": 0.41, "learning_rate": 4.589807585288116e-05, "loss": 0.4298, "step": 73000 }, { "epoch": 0.41, "learning_rate": 4.5892454622618706e-05, "loss": 0.4227, "step": 73100 }, { "epoch": 0.41, "learning_rate": 4.588683339235625e-05, "loss": 0.4216, "step": 73200 }, { "epoch": 0.41, "learning_rate": 4.58812121620938e-05, "loss": 0.4173, "step": 73300 }, { "epoch": 0.41, "learning_rate": 4.5875590931831344e-05, "loss": 0.4108, "step": 73400 }, { "epoch": 0.41, "learning_rate": 4.586996970156889e-05, "loss": 0.4276, "step": 73500 }, { "epoch": 0.41, "learning_rate": 4.586434847130644e-05, "loss": 0.4279, "step": 73600 }, { "epoch": 0.41, "learning_rate": 4.5858727241043977e-05, "loss": 0.4337, "step": 73700 }, { "epoch": 0.41, "learning_rate": 4.585310601078152e-05, "loss": 0.4205, "step": 73800 }, { "epoch": 0.42, "learning_rate": 4.584748478051907e-05, "loss": 0.4231, "step": 73900 }, { "epoch": 0.42, "learning_rate": 4.5841863550256615e-05, "loss": 0.4204, "step": 74000 }, { "epoch": 0.42, "learning_rate": 4.5836242319994155e-05, "loss": 0.4261, "step": 74100 }, { "epoch": 0.42, "learning_rate": 4.58306210897317e-05, "loss": 0.4137, "step": 74200 }, { "epoch": 0.42, "learning_rate": 4.582499985946925e-05, "loss": 0.4252, "step": 74300 }, { "epoch": 0.42, "learning_rate": 4.581937862920679e-05, "loss": 0.4241, "step": 74400 }, { "epoch": 0.42, "learning_rate": 4.581375739894433e-05, "loss": 0.4216, "step": 74500 }, { "epoch": 0.42, "learning_rate": 4.580813616868188e-05, "loss": 0.4132, "step": 74600 }, { "epoch": 0.42, "learning_rate": 4.5802514938419426e-05, "loss": 0.4148, "step": 74700 }, { "epoch": 0.42, "learning_rate": 4.5796893708156965e-05, "loss": 0.4217, "step": 74800 }, { "epoch": 0.42, "learning_rate": 4.579127247789451e-05, "loss": 0.4199, "step": 74900 }, { "epoch": 0.42, "learning_rate": 4.578565124763206e-05, "loss": 0.4204, "step": 75000 }, { "epoch": 0.42, "learning_rate": 4.5780030017369604e-05, "loss": 0.424, "step": 75100 }, { "epoch": 0.42, "learning_rate": 4.577440878710715e-05, "loss": 0.4222, "step": 75200 }, { "epoch": 0.42, "learning_rate": 4.57687875568447e-05, "loss": 0.4139, "step": 75300 }, { "epoch": 0.42, "learning_rate": 4.5763166326582236e-05, "loss": 0.4214, "step": 75400 }, { "epoch": 0.42, "learning_rate": 4.575754509631978e-05, "loss": 0.418, "step": 75500 }, { "epoch": 0.42, "learning_rate": 4.575192386605733e-05, "loss": 0.4099, "step": 75600 }, { "epoch": 0.43, "learning_rate": 4.5746302635794875e-05, "loss": 0.4275, "step": 75700 }, { "epoch": 0.43, "learning_rate": 4.5740681405532414e-05, "loss": 0.4121, "step": 75800 }, { "epoch": 0.43, "learning_rate": 4.573506017526996e-05, "loss": 0.4234, "step": 75900 }, { "epoch": 0.43, "learning_rate": 4.572943894500751e-05, "loss": 0.4195, "step": 76000 }, { "epoch": 0.43, "learning_rate": 4.5723817714745047e-05, "loss": 0.4145, "step": 76100 }, { "epoch": 0.43, "learning_rate": 4.571819648448259e-05, "loss": 0.4125, "step": 76200 }, { "epoch": 0.43, "learning_rate": 4.571257525422014e-05, "loss": 0.4148, "step": 76300 }, { "epoch": 0.43, "learning_rate": 4.5706954023957685e-05, "loss": 0.408, "step": 76400 }, { "epoch": 0.43, "learning_rate": 4.570138900599785e-05, "loss": 0.412, "step": 76500 }, { "epoch": 0.43, "learning_rate": 4.56957677757354e-05, "loss": 0.4158, "step": 76600 }, { "epoch": 0.43, "learning_rate": 4.5690146545472946e-05, "loss": 0.4132, "step": 76700 }, { "epoch": 0.43, "learning_rate": 4.568452531521049e-05, "loss": 0.4073, "step": 76800 }, { "epoch": 0.43, "learning_rate": 4.567890408494803e-05, "loss": 0.4129, "step": 76900 }, { "epoch": 0.43, "learning_rate": 4.567328285468558e-05, "loss": 0.4103, "step": 77000 }, { "epoch": 0.43, "learning_rate": 4.5667661624423124e-05, "loss": 0.4244, "step": 77100 }, { "epoch": 0.43, "learning_rate": 4.566204039416066e-05, "loss": 0.4111, "step": 77200 }, { "epoch": 0.43, "learning_rate": 4.565641916389821e-05, "loss": 0.4124, "step": 77300 }, { "epoch": 0.44, "learning_rate": 4.5650797933635756e-05, "loss": 0.4209, "step": 77400 }, { "epoch": 0.44, "learning_rate": 4.56451767033733e-05, "loss": 0.4105, "step": 77500 }, { "epoch": 0.44, "learning_rate": 4.563955547311085e-05, "loss": 0.4103, "step": 77600 }, { "epoch": 0.44, "learning_rate": 4.5633934242848395e-05, "loss": 0.4164, "step": 77700 }, { "epoch": 0.44, "learning_rate": 4.562831301258594e-05, "loss": 0.4123, "step": 77800 }, { "epoch": 0.44, "learning_rate": 4.562269178232348e-05, "loss": 0.411, "step": 77900 }, { "epoch": 0.44, "learning_rate": 4.561707055206103e-05, "loss": 0.4174, "step": 78000 }, { "epoch": 0.44, "learning_rate": 4.561144932179857e-05, "loss": 0.4092, "step": 78100 }, { "epoch": 0.44, "learning_rate": 4.560582809153612e-05, "loss": 0.4112, "step": 78200 }, { "epoch": 0.44, "learning_rate": 4.560020686127366e-05, "loss": 0.4089, "step": 78300 }, { "epoch": 0.44, "learning_rate": 4.5594585631011205e-05, "loss": 0.4066, "step": 78400 }, { "epoch": 0.44, "learning_rate": 4.558896440074875e-05, "loss": 0.414, "step": 78500 }, { "epoch": 0.44, "learning_rate": 4.558334317048629e-05, "loss": 0.4095, "step": 78600 }, { "epoch": 0.44, "learning_rate": 4.557772194022384e-05, "loss": 0.4193, "step": 78700 }, { "epoch": 0.44, "learning_rate": 4.5572100709961383e-05, "loss": 0.4097, "step": 78800 }, { "epoch": 0.44, "learning_rate": 4.556647947969893e-05, "loss": 0.4193, "step": 78900 }, { "epoch": 0.44, "learning_rate": 4.556085824943647e-05, "loss": 0.4059, "step": 79000 }, { "epoch": 0.44, "learning_rate": 4.5555237019174016e-05, "loss": 0.4143, "step": 79100 }, { "epoch": 0.45, "learning_rate": 4.554961578891156e-05, "loss": 0.4096, "step": 79200 }, { "epoch": 0.45, "learning_rate": 4.554399455864911e-05, "loss": 0.4046, "step": 79300 }, { "epoch": 0.45, "learning_rate": 4.5538373328386654e-05, "loss": 0.3962, "step": 79400 }, { "epoch": 0.45, "learning_rate": 4.55327520981242e-05, "loss": 0.4028, "step": 79500 }, { "epoch": 0.45, "learning_rate": 4.552713086786175e-05, "loss": 0.4048, "step": 79600 }, { "epoch": 0.45, "learning_rate": 4.5521509637599286e-05, "loss": 0.4197, "step": 79700 }, { "epoch": 0.45, "learning_rate": 4.551588840733683e-05, "loss": 0.3987, "step": 79800 }, { "epoch": 0.45, "learning_rate": 4.551026717707438e-05, "loss": 0.4029, "step": 79900 }, { "epoch": 0.45, "learning_rate": 4.550464594681192e-05, "loss": 0.4082, "step": 80000 }, { "epoch": 0.45, "learning_rate": 4.5499024716549465e-05, "loss": 0.4073, "step": 80100 }, { "epoch": 0.45, "learning_rate": 4.549340348628701e-05, "loss": 0.4171, "step": 80200 }, { "epoch": 0.45, "learning_rate": 4.548778225602456e-05, "loss": 0.4008, "step": 80300 }, { "epoch": 0.45, "learning_rate": 4.54821610257621e-05, "loss": 0.4024, "step": 80400 }, { "epoch": 0.45, "learning_rate": 4.547653979549964e-05, "loss": 0.4013, "step": 80500 }, { "epoch": 0.45, "learning_rate": 4.547091856523719e-05, "loss": 0.4018, "step": 80600 }, { "epoch": 0.45, "learning_rate": 4.5465297334974736e-05, "loss": 0.4012, "step": 80700 }, { "epoch": 0.45, "learning_rate": 4.545967610471228e-05, "loss": 0.404, "step": 80800 }, { "epoch": 0.45, "learning_rate": 4.545405487444983e-05, "loss": 0.4079, "step": 80900 }, { "epoch": 0.46, "learning_rate": 4.5448433644187374e-05, "loss": 0.4086, "step": 81000 }, { "epoch": 0.46, "learning_rate": 4.5442812413924914e-05, "loss": 0.4128, "step": 81100 }, { "epoch": 0.46, "learning_rate": 4.543719118366246e-05, "loss": 0.4105, "step": 81200 }, { "epoch": 0.46, "learning_rate": 4.5431569953400007e-05, "loss": 0.4054, "step": 81300 }, { "epoch": 0.46, "learning_rate": 4.5425948723137546e-05, "loss": 0.4084, "step": 81400 }, { "epoch": 0.46, "learning_rate": 4.542032749287509e-05, "loss": 0.402, "step": 81500 }, { "epoch": 0.46, "learning_rate": 4.541470626261264e-05, "loss": 0.398, "step": 81600 }, { "epoch": 0.46, "learning_rate": 4.5409085032350185e-05, "loss": 0.3975, "step": 81700 }, { "epoch": 0.46, "learning_rate": 4.5403463802087724e-05, "loss": 0.4029, "step": 81800 }, { "epoch": 0.46, "learning_rate": 4.539784257182527e-05, "loss": 0.3996, "step": 81900 }, { "epoch": 0.46, "learning_rate": 4.539222134156282e-05, "loss": 0.398, "step": 82000 }, { "epoch": 0.46, "learning_rate": 4.5386600111300356e-05, "loss": 0.4076, "step": 82100 }, { "epoch": 0.46, "learning_rate": 4.53809788810379e-05, "loss": 0.3962, "step": 82200 }, { "epoch": 0.46, "learning_rate": 4.537535765077545e-05, "loss": 0.4029, "step": 82300 }, { "epoch": 0.46, "learning_rate": 4.5369736420512995e-05, "loss": 0.409, "step": 82400 }, { "epoch": 0.46, "learning_rate": 4.536417140255316e-05, "loss": 0.3991, "step": 82500 }, { "epoch": 0.46, "learning_rate": 4.535860638459333e-05, "loss": 0.4067, "step": 82600 }, { "epoch": 0.46, "learning_rate": 4.535298515433088e-05, "loss": 0.3977, "step": 82700 }, { "epoch": 0.47, "learning_rate": 4.534736392406842e-05, "loss": 0.4063, "step": 82800 }, { "epoch": 0.47, "learning_rate": 4.534174269380597e-05, "loss": 0.4032, "step": 82900 }, { "epoch": 0.47, "learning_rate": 4.5336121463543516e-05, "loss": 0.3986, "step": 83000 }, { "epoch": 0.47, "learning_rate": 4.533050023328106e-05, "loss": 0.4043, "step": 83100 }, { "epoch": 0.47, "learning_rate": 4.532487900301861e-05, "loss": 0.4041, "step": 83200 }, { "epoch": 0.47, "learning_rate": 4.531925777275615e-05, "loss": 0.3911, "step": 83300 }, { "epoch": 0.47, "learning_rate": 4.5313692754796315e-05, "loss": 0.4016, "step": 83400 }, { "epoch": 0.47, "learning_rate": 4.530807152453386e-05, "loss": 0.3922, "step": 83500 }, { "epoch": 0.47, "learning_rate": 4.530245029427141e-05, "loss": 0.3918, "step": 83600 }, { "epoch": 0.47, "learning_rate": 4.529682906400895e-05, "loss": 0.4082, "step": 83700 }, { "epoch": 0.47, "learning_rate": 4.5291207833746494e-05, "loss": 0.3864, "step": 83800 }, { "epoch": 0.47, "learning_rate": 4.528558660348404e-05, "loss": 0.3952, "step": 83900 }, { "epoch": 0.47, "learning_rate": 4.5279965373221586e-05, "loss": 0.4032, "step": 84000 }, { "epoch": 0.47, "learning_rate": 4.527434414295913e-05, "loss": 0.4039, "step": 84100 }, { "epoch": 0.47, "learning_rate": 4.526872291269668e-05, "loss": 0.397, "step": 84200 }, { "epoch": 0.47, "learning_rate": 4.5263101682434225e-05, "loss": 0.3981, "step": 84300 }, { "epoch": 0.47, "learning_rate": 4.5257480452171764e-05, "loss": 0.3955, "step": 84400 }, { "epoch": 0.47, "learning_rate": 4.525185922190931e-05, "loss": 0.3864, "step": 84500 }, { "epoch": 0.48, "learning_rate": 4.524623799164686e-05, "loss": 0.4063, "step": 84600 }, { "epoch": 0.48, "learning_rate": 4.5240616761384397e-05, "loss": 0.4034, "step": 84700 }, { "epoch": 0.48, "learning_rate": 4.523499553112194e-05, "loss": 0.3911, "step": 84800 }, { "epoch": 0.48, "learning_rate": 4.522937430085949e-05, "loss": 0.396, "step": 84900 }, { "epoch": 0.48, "learning_rate": 4.522375307059703e-05, "loss": 0.4057, "step": 85000 }, { "epoch": 0.48, "learning_rate": 4.5218131840334575e-05, "loss": 0.4053, "step": 85100 }, { "epoch": 0.48, "learning_rate": 4.521251061007212e-05, "loss": 0.396, "step": 85200 }, { "epoch": 0.48, "learning_rate": 4.520688937980967e-05, "loss": 0.394, "step": 85300 }, { "epoch": 0.48, "learning_rate": 4.520126814954721e-05, "loss": 0.4032, "step": 85400 }, { "epoch": 0.48, "learning_rate": 4.519564691928475e-05, "loss": 0.3956, "step": 85500 }, { "epoch": 0.48, "learning_rate": 4.519008190132493e-05, "loss": 0.3831, "step": 85600 }, { "epoch": 0.48, "learning_rate": 4.5184460671062474e-05, "loss": 0.3957, "step": 85700 }, { "epoch": 0.48, "learning_rate": 4.5178839440800013e-05, "loss": 0.3915, "step": 85800 }, { "epoch": 0.48, "learning_rate": 4.517321821053756e-05, "loss": 0.3864, "step": 85900 }, { "epoch": 0.48, "learning_rate": 4.5167596980275106e-05, "loss": 0.4007, "step": 86000 }, { "epoch": 0.48, "learning_rate": 4.5161975750012645e-05, "loss": 0.4016, "step": 86100 }, { "epoch": 0.48, "learning_rate": 4.515635451975019e-05, "loss": 0.3911, "step": 86200 }, { "epoch": 0.49, "learning_rate": 4.515073328948774e-05, "loss": 0.4034, "step": 86300 }, { "epoch": 0.49, "learning_rate": 4.5145112059225284e-05, "loss": 0.3978, "step": 86400 }, { "epoch": 0.49, "learning_rate": 4.5139490828962824e-05, "loss": 0.3952, "step": 86500 }, { "epoch": 0.49, "learning_rate": 4.513386959870038e-05, "loss": 0.3855, "step": 86600 }, { "epoch": 0.49, "learning_rate": 4.512824836843792e-05, "loss": 0.3987, "step": 86700 }, { "epoch": 0.49, "learning_rate": 4.512262713817546e-05, "loss": 0.3935, "step": 86800 }, { "epoch": 0.49, "learning_rate": 4.511700590791301e-05, "loss": 0.3926, "step": 86900 }, { "epoch": 0.49, "learning_rate": 4.5111384677650555e-05, "loss": 0.3953, "step": 87000 }, { "epoch": 0.49, "learning_rate": 4.51057634473881e-05, "loss": 0.4058, "step": 87100 }, { "epoch": 0.49, "learning_rate": 4.510014221712564e-05, "loss": 0.3933, "step": 87200 }, { "epoch": 0.49, "learning_rate": 4.509452098686319e-05, "loss": 0.3961, "step": 87300 }, { "epoch": 0.49, "learning_rate": 4.5088899756600733e-05, "loss": 0.3976, "step": 87400 }, { "epoch": 0.49, "learning_rate": 4.508327852633827e-05, "loss": 0.3842, "step": 87500 }, { "epoch": 0.49, "learning_rate": 4.507765729607582e-05, "loss": 0.3939, "step": 87600 }, { "epoch": 0.49, "learning_rate": 4.5072036065813366e-05, "loss": 0.4024, "step": 87700 }, { "epoch": 0.49, "learning_rate": 4.506641483555091e-05, "loss": 0.3787, "step": 87800 }, { "epoch": 0.49, "learning_rate": 4.506084981759108e-05, "loss": 0.3899, "step": 87900 }, { "epoch": 0.49, "learning_rate": 4.5055228587328626e-05, "loss": 0.3893, "step": 88000 }, { "epoch": 0.5, "learning_rate": 4.504960735706617e-05, "loss": 0.3995, "step": 88100 }, { "epoch": 0.5, "learning_rate": 4.504398612680372e-05, "loss": 0.3848, "step": 88200 }, { "epoch": 0.5, "learning_rate": 4.503836489654126e-05, "loss": 0.3919, "step": 88300 }, { "epoch": 0.5, "learning_rate": 4.5032743666278804e-05, "loss": 0.3883, "step": 88400 }, { "epoch": 0.5, "learning_rate": 4.502712243601635e-05, "loss": 0.3879, "step": 88500 }, { "epoch": 0.5, "learning_rate": 4.502150120575389e-05, "loss": 0.4023, "step": 88600 }, { "epoch": 0.5, "learning_rate": 4.5015879975491436e-05, "loss": 0.3925, "step": 88700 }, { "epoch": 0.5, "learning_rate": 4.501025874522898e-05, "loss": 0.3939, "step": 88800 }, { "epoch": 0.5, "learning_rate": 4.500463751496653e-05, "loss": 0.3865, "step": 88900 }, { "epoch": 0.5, "learning_rate": 4.499901628470407e-05, "loss": 0.3903, "step": 89000 }, { "epoch": 0.5, "learning_rate": 4.4993395054441614e-05, "loss": 0.3945, "step": 89100 }, { "epoch": 0.5, "learning_rate": 4.498783003648179e-05, "loss": 0.3892, "step": 89200 }, { "epoch": 0.5, "learning_rate": 4.4982208806219335e-05, "loss": 0.3886, "step": 89300 }, { "epoch": 0.5, "learning_rate": 4.4976587575956875e-05, "loss": 0.3819, "step": 89400 }, { "epoch": 0.5, "learning_rate": 4.497096634569442e-05, "loss": 0.3897, "step": 89500 }, { "epoch": 0.5, "learning_rate": 4.496534511543197e-05, "loss": 0.3738, "step": 89600 }, { "epoch": 0.5, "learning_rate": 4.495972388516951e-05, "loss": 0.3918, "step": 89700 }, { "epoch": 0.5, "learning_rate": 4.495410265490705e-05, "loss": 0.3992, "step": 89800 }, { "epoch": 0.51, "learning_rate": 4.49484814246446e-05, "loss": 0.3863, "step": 89900 }, { "epoch": 0.51, "learning_rate": 4.4942860194382146e-05, "loss": 0.3942, "step": 90000 }, { "epoch": 0.51, "learning_rate": 4.493729517642231e-05, "loss": 0.3784, "step": 90100 }, { "epoch": 0.51, "learning_rate": 4.493167394615986e-05, "loss": 0.3935, "step": 90200 }, { "epoch": 0.51, "learning_rate": 4.4926052715897406e-05, "loss": 0.3869, "step": 90300 }, { "epoch": 0.51, "learning_rate": 4.492043148563495e-05, "loss": 0.3901, "step": 90400 }, { "epoch": 0.51, "learning_rate": 4.491481025537249e-05, "loss": 0.3868, "step": 90500 }, { "epoch": 0.51, "learning_rate": 4.490918902511004e-05, "loss": 0.3753, "step": 90600 }, { "epoch": 0.51, "learning_rate": 4.4903567794847584e-05, "loss": 0.3794, "step": 90700 }, { "epoch": 0.51, "learning_rate": 4.4897946564585124e-05, "loss": 0.393, "step": 90800 }, { "epoch": 0.51, "learning_rate": 4.489232533432267e-05, "loss": 0.3867, "step": 90900 }, { "epoch": 0.51, "learning_rate": 4.4886704104060216e-05, "loss": 0.3908, "step": 91000 }, { "epoch": 0.51, "learning_rate": 4.488108287379776e-05, "loss": 0.3859, "step": 91100 }, { "epoch": 0.51, "learning_rate": 4.48754616435353e-05, "loss": 0.3838, "step": 91200 }, { "epoch": 0.51, "learning_rate": 4.486984041327285e-05, "loss": 0.3861, "step": 91300 }, { "epoch": 0.51, "learning_rate": 4.4864219183010394e-05, "loss": 0.4011, "step": 91400 }, { "epoch": 0.51, "learning_rate": 4.485859795274794e-05, "loss": 0.3825, "step": 91500 }, { "epoch": 0.51, "learning_rate": 4.485297672248549e-05, "loss": 0.3919, "step": 91600 }, { "epoch": 0.52, "learning_rate": 4.484735549222303e-05, "loss": 0.3752, "step": 91700 }, { "epoch": 0.52, "learning_rate": 4.484173426196058e-05, "loss": 0.3754, "step": 91800 }, { "epoch": 0.52, "learning_rate": 4.483611303169812e-05, "loss": 0.3903, "step": 91900 }, { "epoch": 0.52, "learning_rate": 4.4830491801435665e-05, "loss": 0.3906, "step": 92000 }, { "epoch": 0.52, "learning_rate": 4.482487057117321e-05, "loss": 0.3713, "step": 92100 }, { "epoch": 0.52, "learning_rate": 4.481924934091075e-05, "loss": 0.3835, "step": 92200 }, { "epoch": 0.52, "learning_rate": 4.48136281106483e-05, "loss": 0.3925, "step": 92300 }, { "epoch": 0.52, "learning_rate": 4.4808006880385844e-05, "loss": 0.3793, "step": 92400 }, { "epoch": 0.52, "learning_rate": 4.480238565012339e-05, "loss": 0.3809, "step": 92500 }, { "epoch": 0.52, "learning_rate": 4.479676441986093e-05, "loss": 0.3763, "step": 92600 }, { "epoch": 0.52, "learning_rate": 4.4791143189598476e-05, "loss": 0.3846, "step": 92700 }, { "epoch": 0.52, "learning_rate": 4.478552195933602e-05, "loss": 0.3815, "step": 92800 }, { "epoch": 0.52, "learning_rate": 4.477990072907357e-05, "loss": 0.3811, "step": 92900 }, { "epoch": 0.52, "learning_rate": 4.4774279498811115e-05, "loss": 0.3862, "step": 93000 }, { "epoch": 0.52, "learning_rate": 4.476865826854866e-05, "loss": 0.3752, "step": 93100 }, { "epoch": 0.52, "learning_rate": 4.476303703828621e-05, "loss": 0.376, "step": 93200 }, { "epoch": 0.52, "learning_rate": 4.4757415808023747e-05, "loss": 0.38, "step": 93300 }, { "epoch": 0.53, "learning_rate": 4.475179457776129e-05, "loss": 0.3714, "step": 93400 }, { "epoch": 0.53, "learning_rate": 4.474617334749884e-05, "loss": 0.374, "step": 93500 }, { "epoch": 0.53, "learning_rate": 4.474055211723638e-05, "loss": 0.3832, "step": 93600 }, { "epoch": 0.53, "learning_rate": 4.4734930886973925e-05, "loss": 0.373, "step": 93700 }, { "epoch": 0.53, "learning_rate": 4.472930965671147e-05, "loss": 0.3801, "step": 93800 }, { "epoch": 0.53, "learning_rate": 4.472368842644902e-05, "loss": 0.383, "step": 93900 }, { "epoch": 0.53, "learning_rate": 4.471806719618656e-05, "loss": 0.3805, "step": 94000 }, { "epoch": 0.53, "learning_rate": 4.47124459659241e-05, "loss": 0.3795, "step": 94100 }, { "epoch": 0.53, "learning_rate": 4.470682473566165e-05, "loss": 0.3773, "step": 94200 }, { "epoch": 0.53, "learning_rate": 4.470120350539919e-05, "loss": 0.3767, "step": 94300 }, { "epoch": 0.53, "learning_rate": 4.4695582275136735e-05, "loss": 0.3762, "step": 94400 }, { "epoch": 0.53, "learning_rate": 4.468996104487428e-05, "loss": 0.3764, "step": 94500 }, { "epoch": 0.53, "learning_rate": 4.468433981461183e-05, "loss": 0.3771, "step": 94600 }, { "epoch": 0.53, "learning_rate": 4.4678718584349374e-05, "loss": 0.3781, "step": 94700 }, { "epoch": 0.53, "learning_rate": 4.467309735408692e-05, "loss": 0.3699, "step": 94800 }, { "epoch": 0.53, "learning_rate": 4.466747612382447e-05, "loss": 0.3763, "step": 94900 }, { "epoch": 0.53, "learning_rate": 4.466191110586463e-05, "loss": 0.3853, "step": 95000 }, { "epoch": 0.53, "learning_rate": 4.4656289875602174e-05, "loss": 0.3851, "step": 95100 }, { "epoch": 0.54, "learning_rate": 4.465066864533972e-05, "loss": 0.3761, "step": 95200 }, { "epoch": 0.54, "learning_rate": 4.4645047415077266e-05, "loss": 0.375, "step": 95300 }, { "epoch": 0.54, "learning_rate": 4.4639426184814806e-05, "loss": 0.3679, "step": 95400 }, { "epoch": 0.54, "learning_rate": 4.463380495455235e-05, "loss": 0.3827, "step": 95500 }, { "epoch": 0.54, "learning_rate": 4.46281837242899e-05, "loss": 0.3676, "step": 95600 }, { "epoch": 0.54, "learning_rate": 4.4622562494027445e-05, "loss": 0.3824, "step": 95700 }, { "epoch": 0.54, "learning_rate": 4.461694126376499e-05, "loss": 0.3812, "step": 95800 }, { "epoch": 0.54, "learning_rate": 4.461132003350254e-05, "loss": 0.376, "step": 95900 }, { "epoch": 0.54, "learning_rate": 4.4605698803240084e-05, "loss": 0.3842, "step": 96000 }, { "epoch": 0.54, "learning_rate": 4.460007757297762e-05, "loss": 0.3765, "step": 96100 }, { "epoch": 0.54, "learning_rate": 4.459445634271517e-05, "loss": 0.381, "step": 96200 }, { "epoch": 0.54, "learning_rate": 4.4588835112452716e-05, "loss": 0.377, "step": 96300 }, { "epoch": 0.54, "learning_rate": 4.4583213882190255e-05, "loss": 0.3812, "step": 96400 }, { "epoch": 0.54, "learning_rate": 4.45775926519278e-05, "loss": 0.3758, "step": 96500 }, { "epoch": 0.54, "learning_rate": 4.457197142166535e-05, "loss": 0.3731, "step": 96600 }, { "epoch": 0.54, "learning_rate": 4.4566350191402894e-05, "loss": 0.378, "step": 96700 }, { "epoch": 0.54, "learning_rate": 4.4560728961140433e-05, "loss": 0.3776, "step": 96800 }, { "epoch": 0.54, "learning_rate": 4.455510773087798e-05, "loss": 0.3781, "step": 96900 }, { "epoch": 0.55, "learning_rate": 4.4549486500615526e-05, "loss": 0.379, "step": 97000 }, { "epoch": 0.55, "learning_rate": 4.454386527035307e-05, "loss": 0.3747, "step": 97100 }, { "epoch": 0.55, "learning_rate": 4.453824404009062e-05, "loss": 0.3598, "step": 97200 }, { "epoch": 0.55, "learning_rate": 4.4532622809828165e-05, "loss": 0.3821, "step": 97300 }, { "epoch": 0.55, "learning_rate": 4.452705779186833e-05, "loss": 0.3837, "step": 97400 }, { "epoch": 0.55, "learning_rate": 4.452143656160587e-05, "loss": 0.3714, "step": 97500 }, { "epoch": 0.55, "learning_rate": 4.451581533134342e-05, "loss": 0.3694, "step": 97600 }, { "epoch": 0.55, "learning_rate": 4.4510194101080965e-05, "loss": 0.3728, "step": 97700 }, { "epoch": 0.55, "learning_rate": 4.450457287081851e-05, "loss": 0.3767, "step": 97800 }, { "epoch": 0.55, "learning_rate": 4.449895164055605e-05, "loss": 0.3666, "step": 97900 }, { "epoch": 0.55, "learning_rate": 4.4493330410293597e-05, "loss": 0.3803, "step": 98000 }, { "epoch": 0.55, "learning_rate": 4.448770918003114e-05, "loss": 0.3693, "step": 98100 }, { "epoch": 0.55, "learning_rate": 4.448208794976869e-05, "loss": 0.3818, "step": 98200 }, { "epoch": 0.55, "learning_rate": 4.4476466719506235e-05, "loss": 0.3752, "step": 98300 }, { "epoch": 0.55, "learning_rate": 4.447084548924378e-05, "loss": 0.3716, "step": 98400 }, { "epoch": 0.55, "learning_rate": 4.446522425898133e-05, "loss": 0.3697, "step": 98500 }, { "epoch": 0.55, "learning_rate": 4.445960302871887e-05, "loss": 0.3741, "step": 98600 }, { "epoch": 0.55, "learning_rate": 4.4453981798456414e-05, "loss": 0.3774, "step": 98700 }, { "epoch": 0.56, "learning_rate": 4.444836056819396e-05, "loss": 0.3637, "step": 98800 }, { "epoch": 0.56, "learning_rate": 4.44427393379315e-05, "loss": 0.3748, "step": 98900 }, { "epoch": 0.56, "learning_rate": 4.4437118107669046e-05, "loss": 0.3756, "step": 99000 }, { "epoch": 0.56, "learning_rate": 4.443149687740659e-05, "loss": 0.3713, "step": 99100 }, { "epoch": 0.56, "learning_rate": 4.442587564714414e-05, "loss": 0.3794, "step": 99200 }, { "epoch": 0.56, "learning_rate": 4.442025441688168e-05, "loss": 0.3647, "step": 99300 }, { "epoch": 0.56, "learning_rate": 4.4414633186619224e-05, "loss": 0.3762, "step": 99400 }, { "epoch": 0.56, "learning_rate": 4.440901195635677e-05, "loss": 0.3655, "step": 99500 }, { "epoch": 0.56, "learning_rate": 4.440339072609431e-05, "loss": 0.3762, "step": 99600 }, { "epoch": 0.56, "learning_rate": 4.4397769495831856e-05, "loss": 0.3643, "step": 99700 }, { "epoch": 0.56, "learning_rate": 4.43921482655694e-05, "loss": 0.3744, "step": 99800 }, { "epoch": 0.56, "learning_rate": 4.438652703530695e-05, "loss": 0.3697, "step": 99900 }, { "epoch": 0.56, "learning_rate": 4.4380905805044495e-05, "loss": 0.375, "step": 100000 }, { "epoch": 0.56, "learning_rate": 4.437528457478204e-05, "loss": 0.3759, "step": 100100 }, { "epoch": 0.56, "learning_rate": 4.436966334451959e-05, "loss": 0.3705, "step": 100200 }, { "epoch": 0.56, "learning_rate": 4.436404211425713e-05, "loss": 0.3684, "step": 100300 }, { "epoch": 0.56, "learning_rate": 4.435842088399467e-05, "loss": 0.3733, "step": 100400 }, { "epoch": 0.56, "learning_rate": 4.435279965373222e-05, "loss": 0.3791, "step": 100500 }, { "epoch": 0.57, "learning_rate": 4.4347178423469766e-05, "loss": 0.3708, "step": 100600 }, { "epoch": 0.57, "learning_rate": 4.4341557193207305e-05, "loss": 0.3655, "step": 100700 }, { "epoch": 0.57, "learning_rate": 4.433593596294485e-05, "loss": 0.3764, "step": 100800 }, { "epoch": 0.57, "learning_rate": 4.43303147326824e-05, "loss": 0.3794, "step": 100900 }, { "epoch": 0.57, "learning_rate": 4.432469350241994e-05, "loss": 0.3646, "step": 101000 }, { "epoch": 0.57, "learning_rate": 4.4319072272157484e-05, "loss": 0.3682, "step": 101100 }, { "epoch": 0.57, "learning_rate": 4.431345104189503e-05, "loss": 0.3701, "step": 101200 }, { "epoch": 0.57, "learning_rate": 4.4307829811632576e-05, "loss": 0.3687, "step": 101300 }, { "epoch": 0.57, "learning_rate": 4.430220858137012e-05, "loss": 0.3653, "step": 101400 }, { "epoch": 0.57, "learning_rate": 4.429658735110767e-05, "loss": 0.3593, "step": 101500 }, { "epoch": 0.57, "learning_rate": 4.4290966120845215e-05, "loss": 0.3754, "step": 101600 }, { "epoch": 0.57, "learning_rate": 4.4285344890582755e-05, "loss": 0.3658, "step": 101700 }, { "epoch": 0.57, "learning_rate": 4.42797236603203e-05, "loss": 0.3705, "step": 101800 }, { "epoch": 0.57, "learning_rate": 4.427410243005785e-05, "loss": 0.3659, "step": 101900 }, { "epoch": 0.57, "learning_rate": 4.4268481199795393e-05, "loss": 0.358, "step": 102000 }, { "epoch": 0.57, "learning_rate": 4.426285996953293e-05, "loss": 0.3584, "step": 102100 }, { "epoch": 0.57, "learning_rate": 4.425723873927048e-05, "loss": 0.3656, "step": 102200 }, { "epoch": 0.58, "learning_rate": 4.4251617509008025e-05, "loss": 0.3595, "step": 102300 }, { "epoch": 0.58, "learning_rate": 4.4245996278745565e-05, "loss": 0.3651, "step": 102400 }, { "epoch": 0.58, "learning_rate": 4.424037504848311e-05, "loss": 0.3589, "step": 102500 }, { "epoch": 0.58, "learning_rate": 4.423475381822066e-05, "loss": 0.3677, "step": 102600 }, { "epoch": 0.58, "learning_rate": 4.4229132587958204e-05, "loss": 0.3654, "step": 102700 }, { "epoch": 0.58, "learning_rate": 4.422351135769574e-05, "loss": 0.367, "step": 102800 }, { "epoch": 0.58, "learning_rate": 4.421789012743329e-05, "loss": 0.3565, "step": 102900 }, { "epoch": 0.58, "learning_rate": 4.4212268897170836e-05, "loss": 0.3613, "step": 103000 }, { "epoch": 0.58, "learning_rate": 4.420664766690838e-05, "loss": 0.3681, "step": 103100 }, { "epoch": 0.58, "learning_rate": 4.420102643664593e-05, "loss": 0.3757, "step": 103200 }, { "epoch": 0.58, "learning_rate": 4.4195405206383475e-05, "loss": 0.3704, "step": 103300 }, { "epoch": 0.58, "learning_rate": 4.418978397612102e-05, "loss": 0.3582, "step": 103400 }, { "epoch": 0.58, "learning_rate": 4.418416274585856e-05, "loss": 0.3719, "step": 103500 }, { "epoch": 0.58, "learning_rate": 4.417854151559611e-05, "loss": 0.3696, "step": 103600 }, { "epoch": 0.58, "learning_rate": 4.417292028533365e-05, "loss": 0.3537, "step": 103700 }, { "epoch": 0.58, "learning_rate": 4.416729905507119e-05, "loss": 0.364, "step": 103800 }, { "epoch": 0.58, "learning_rate": 4.416167782480874e-05, "loss": 0.3716, "step": 103900 }, { "epoch": 0.58, "learning_rate": 4.4156056594546285e-05, "loss": 0.3623, "step": 104000 }, { "epoch": 0.59, "learning_rate": 4.415043536428383e-05, "loss": 0.358, "step": 104100 }, { "epoch": 0.59, "learning_rate": 4.414481413402137e-05, "loss": 0.3691, "step": 104200 }, { "epoch": 0.59, "learning_rate": 4.413919290375892e-05, "loss": 0.3629, "step": 104300 }, { "epoch": 0.59, "learning_rate": 4.4133571673496463e-05, "loss": 0.372, "step": 104400 }, { "epoch": 0.59, "learning_rate": 4.4127950443234e-05, "loss": 0.3583, "step": 104500 }, { "epoch": 0.59, "learning_rate": 4.4122329212971556e-05, "loss": 0.3695, "step": 104600 }, { "epoch": 0.59, "learning_rate": 4.41167079827091e-05, "loss": 0.3648, "step": 104700 }, { "epoch": 0.59, "learning_rate": 4.411108675244665e-05, "loss": 0.3688, "step": 104800 }, { "epoch": 0.59, "learning_rate": 4.410546552218419e-05, "loss": 0.3637, "step": 104900 }, { "epoch": 0.59, "learning_rate": 4.4099844291921734e-05, "loss": 0.3569, "step": 105000 }, { "epoch": 0.59, "learning_rate": 4.409422306165928e-05, "loss": 0.3601, "step": 105100 }, { "epoch": 0.59, "learning_rate": 4.408860183139682e-05, "loss": 0.3607, "step": 105200 }, { "epoch": 0.59, "learning_rate": 4.4082980601134366e-05, "loss": 0.3626, "step": 105300 }, { "epoch": 0.59, "learning_rate": 4.407735937087191e-05, "loss": 0.3644, "step": 105400 }, { "epoch": 0.59, "learning_rate": 4.407173814060945e-05, "loss": 0.3607, "step": 105500 }, { "epoch": 0.59, "learning_rate": 4.4066173122649627e-05, "loss": 0.3715, "step": 105600 }, { "epoch": 0.59, "learning_rate": 4.406055189238717e-05, "loss": 0.3562, "step": 105700 }, { "epoch": 0.59, "learning_rate": 4.405493066212472e-05, "loss": 0.3494, "step": 105800 }, { "epoch": 0.6, "learning_rate": 4.404930943186226e-05, "loss": 0.3573, "step": 105900 }, { "epoch": 0.6, "learning_rate": 4.4043688201599805e-05, "loss": 0.3628, "step": 106000 }, { "epoch": 0.6, "learning_rate": 4.403806697133735e-05, "loss": 0.3572, "step": 106100 }, { "epoch": 0.6, "learning_rate": 4.40324457410749e-05, "loss": 0.3681, "step": 106200 }, { "epoch": 0.6, "learning_rate": 4.402688072311506e-05, "loss": 0.3614, "step": 106300 }, { "epoch": 0.6, "learning_rate": 4.4021259492852605e-05, "loss": 0.3619, "step": 106400 }, { "epoch": 0.6, "learning_rate": 4.401563826259015e-05, "loss": 0.3644, "step": 106500 }, { "epoch": 0.6, "learning_rate": 4.40100170323277e-05, "loss": 0.363, "step": 106600 }, { "epoch": 0.6, "learning_rate": 4.4004395802065243e-05, "loss": 0.3593, "step": 106700 }, { "epoch": 0.6, "learning_rate": 4.399877457180279e-05, "loss": 0.3642, "step": 106800 }, { "epoch": 0.6, "learning_rate": 4.3993153341540336e-05, "loss": 0.362, "step": 106900 }, { "epoch": 0.6, "learning_rate": 4.3987532111277875e-05, "loss": 0.3646, "step": 107000 }, { "epoch": 0.6, "learning_rate": 4.398191088101542e-05, "loss": 0.3577, "step": 107100 }, { "epoch": 0.6, "learning_rate": 4.397628965075297e-05, "loss": 0.3555, "step": 107200 }, { "epoch": 0.6, "learning_rate": 4.3970668420490514e-05, "loss": 0.3641, "step": 107300 }, { "epoch": 0.6, "learning_rate": 4.3965047190228054e-05, "loss": 0.3537, "step": 107400 }, { "epoch": 0.6, "learning_rate": 4.39594259599656e-05, "loss": 0.3582, "step": 107500 }, { "epoch": 0.6, "learning_rate": 4.3953804729703146e-05, "loss": 0.359, "step": 107600 }, { "epoch": 0.61, "learning_rate": 4.3948183499440686e-05, "loss": 0.3665, "step": 107700 }, { "epoch": 0.61, "learning_rate": 4.394256226917823e-05, "loss": 0.3615, "step": 107800 }, { "epoch": 0.61, "learning_rate": 4.393694103891578e-05, "loss": 0.3653, "step": 107900 }, { "epoch": 0.61, "learning_rate": 4.3931319808653325e-05, "loss": 0.3588, "step": 108000 }, { "epoch": 0.61, "learning_rate": 4.3925698578390864e-05, "loss": 0.3648, "step": 108100 }, { "epoch": 0.61, "learning_rate": 4.392007734812841e-05, "loss": 0.3551, "step": 108200 }, { "epoch": 0.61, "learning_rate": 4.391445611786596e-05, "loss": 0.3581, "step": 108300 }, { "epoch": 0.61, "learning_rate": 4.39088348876035e-05, "loss": 0.3519, "step": 108400 }, { "epoch": 0.61, "learning_rate": 4.390326986964367e-05, "loss": 0.3532, "step": 108500 }, { "epoch": 0.61, "learning_rate": 4.389764863938122e-05, "loss": 0.3555, "step": 108600 }, { "epoch": 0.61, "learning_rate": 4.389202740911876e-05, "loss": 0.3519, "step": 108700 }, { "epoch": 0.61, "learning_rate": 4.38864061788563e-05, "loss": 0.3562, "step": 108800 }, { "epoch": 0.61, "learning_rate": 4.388078494859385e-05, "loss": 0.3502, "step": 108900 }, { "epoch": 0.61, "learning_rate": 4.3875163718331395e-05, "loss": 0.3544, "step": 109000 }, { "epoch": 0.61, "learning_rate": 4.386954248806894e-05, "loss": 0.3599, "step": 109100 }, { "epoch": 0.61, "learning_rate": 4.386392125780648e-05, "loss": 0.3595, "step": 109200 }, { "epoch": 0.61, "learning_rate": 4.385830002754403e-05, "loss": 0.3606, "step": 109300 }, { "epoch": 0.61, "learning_rate": 4.3852678797281574e-05, "loss": 0.3586, "step": 109400 }, { "epoch": 0.62, "learning_rate": 4.384711377932175e-05, "loss": 0.3639, "step": 109500 }, { "epoch": 0.62, "learning_rate": 4.384149254905929e-05, "loss": 0.3576, "step": 109600 }, { "epoch": 0.62, "learning_rate": 4.3835871318796834e-05, "loss": 0.3542, "step": 109700 }, { "epoch": 0.62, "learning_rate": 4.383025008853438e-05, "loss": 0.3622, "step": 109800 }, { "epoch": 0.62, "learning_rate": 4.382462885827192e-05, "loss": 0.3677, "step": 109900 }, { "epoch": 0.62, "learning_rate": 4.3819007628009466e-05, "loss": 0.3556, "step": 110000 }, { "epoch": 0.62, "learning_rate": 4.381338639774701e-05, "loss": 0.3512, "step": 110100 }, { "epoch": 0.62, "learning_rate": 4.3807821379787186e-05, "loss": 0.3553, "step": 110200 }, { "epoch": 0.62, "learning_rate": 4.3802200149524726e-05, "loss": 0.355, "step": 110300 }, { "epoch": 0.62, "learning_rate": 4.379657891926227e-05, "loss": 0.3479, "step": 110400 }, { "epoch": 0.62, "learning_rate": 4.379101390130244e-05, "loss": 0.3513, "step": 110500 }, { "epoch": 0.62, "learning_rate": 4.3785392671039986e-05, "loss": 0.3509, "step": 110600 }, { "epoch": 0.62, "learning_rate": 4.3779771440777526e-05, "loss": 0.3499, "step": 110700 }, { "epoch": 0.62, "learning_rate": 4.377415021051507e-05, "loss": 0.3526, "step": 110800 }, { "epoch": 0.62, "learning_rate": 4.376852898025262e-05, "loss": 0.3636, "step": 110900 }, { "epoch": 0.62, "learning_rate": 4.3762907749990165e-05, "loss": 0.3453, "step": 111000 }, { "epoch": 0.62, "learning_rate": 4.375728651972771e-05, "loss": 0.3463, "step": 111100 }, { "epoch": 0.63, "learning_rate": 4.375166528946526e-05, "loss": 0.3647, "step": 111200 }, { "epoch": 0.63, "learning_rate": 4.37460440592028e-05, "loss": 0.3533, "step": 111300 }, { "epoch": 0.63, "learning_rate": 4.374042282894034e-05, "loss": 0.3477, "step": 111400 }, { "epoch": 0.63, "learning_rate": 4.373480159867789e-05, "loss": 0.351, "step": 111500 }, { "epoch": 0.63, "learning_rate": 4.3729180368415435e-05, "loss": 0.3571, "step": 111600 }, { "epoch": 0.63, "learning_rate": 4.372355913815298e-05, "loss": 0.3505, "step": 111700 }, { "epoch": 0.63, "learning_rate": 4.371793790789052e-05, "loss": 0.3574, "step": 111800 }, { "epoch": 0.63, "learning_rate": 4.371231667762807e-05, "loss": 0.341, "step": 111900 }, { "epoch": 0.63, "learning_rate": 4.3706695447365614e-05, "loss": 0.3523, "step": 112000 }, { "epoch": 0.63, "learning_rate": 4.370107421710315e-05, "loss": 0.347, "step": 112100 }, { "epoch": 0.63, "learning_rate": 4.36954529868407e-05, "loss": 0.3608, "step": 112200 }, { "epoch": 0.63, "learning_rate": 4.3689831756578246e-05, "loss": 0.36, "step": 112300 }, { "epoch": 0.63, "learning_rate": 4.368421052631579e-05, "loss": 0.3526, "step": 112400 }, { "epoch": 0.63, "learning_rate": 4.367858929605333e-05, "loss": 0.3534, "step": 112500 }, { "epoch": 0.63, "learning_rate": 4.3672968065790885e-05, "loss": 0.3602, "step": 112600 }, { "epoch": 0.63, "learning_rate": 4.366734683552843e-05, "loss": 0.3528, "step": 112700 }, { "epoch": 0.63, "learning_rate": 4.366172560526597e-05, "loss": 0.3475, "step": 112800 }, { "epoch": 0.63, "learning_rate": 4.365610437500352e-05, "loss": 0.3558, "step": 112900 }, { "epoch": 0.64, "learning_rate": 4.365048314474106e-05, "loss": 0.3528, "step": 113000 }, { "epoch": 0.64, "learning_rate": 4.364486191447861e-05, "loss": 0.3506, "step": 113100 }, { "epoch": 0.64, "learning_rate": 4.363924068421615e-05, "loss": 0.3514, "step": 113200 }, { "epoch": 0.64, "learning_rate": 4.3633619453953695e-05, "loss": 0.3579, "step": 113300 }, { "epoch": 0.64, "learning_rate": 4.362799822369124e-05, "loss": 0.352, "step": 113400 }, { "epoch": 0.64, "learning_rate": 4.362237699342878e-05, "loss": 0.3467, "step": 113500 }, { "epoch": 0.64, "learning_rate": 4.361675576316633e-05, "loss": 0.3551, "step": 113600 }, { "epoch": 0.64, "learning_rate": 4.361113453290387e-05, "loss": 0.3525, "step": 113700 }, { "epoch": 0.64, "learning_rate": 4.360551330264142e-05, "loss": 0.3523, "step": 113800 }, { "epoch": 0.64, "learning_rate": 4.359989207237896e-05, "loss": 0.3458, "step": 113900 }, { "epoch": 0.64, "learning_rate": 4.3594270842116505e-05, "loss": 0.3494, "step": 114000 }, { "epoch": 0.64, "learning_rate": 4.358864961185405e-05, "loss": 0.3562, "step": 114100 }, { "epoch": 0.64, "learning_rate": 4.35830283815916e-05, "loss": 0.3537, "step": 114200 }, { "epoch": 0.64, "learning_rate": 4.3577407151329144e-05, "loss": 0.346, "step": 114300 }, { "epoch": 0.64, "learning_rate": 4.357178592106669e-05, "loss": 0.3409, "step": 114400 }, { "epoch": 0.64, "learning_rate": 4.356616469080424e-05, "loss": 0.3479, "step": 114500 }, { "epoch": 0.64, "learning_rate": 4.3560543460541776e-05, "loss": 0.3468, "step": 114600 }, { "epoch": 0.64, "learning_rate": 4.355492223027932e-05, "loss": 0.3493, "step": 114700 }, { "epoch": 0.65, "learning_rate": 4.354930100001687e-05, "loss": 0.3539, "step": 114800 }, { "epoch": 0.65, "learning_rate": 4.354367976975441e-05, "loss": 0.3429, "step": 114900 }, { "epoch": 0.65, "learning_rate": 4.3538058539491955e-05, "loss": 0.351, "step": 115000 }, { "epoch": 0.65, "learning_rate": 4.35324373092295e-05, "loss": 0.3555, "step": 115100 }, { "epoch": 0.65, "learning_rate": 4.352681607896704e-05, "loss": 0.3376, "step": 115200 }, { "epoch": 0.65, "learning_rate": 4.352119484870459e-05, "loss": 0.3434, "step": 115300 }, { "epoch": 0.65, "learning_rate": 4.351557361844213e-05, "loss": 0.3473, "step": 115400 }, { "epoch": 0.65, "learning_rate": 4.350995238817968e-05, "loss": 0.3501, "step": 115500 }, { "epoch": 0.65, "learning_rate": 4.350433115791722e-05, "loss": 0.3468, "step": 115600 }, { "epoch": 0.65, "learning_rate": 4.3498709927654765e-05, "loss": 0.3444, "step": 115700 }, { "epoch": 0.65, "learning_rate": 4.349308869739231e-05, "loss": 0.3502, "step": 115800 }, { "epoch": 0.65, "learning_rate": 4.348746746712986e-05, "loss": 0.3477, "step": 115900 }, { "epoch": 0.65, "learning_rate": 4.3481846236867404e-05, "loss": 0.3475, "step": 116000 }, { "epoch": 0.65, "learning_rate": 4.347622500660495e-05, "loss": 0.3537, "step": 116100 }, { "epoch": 0.65, "learning_rate": 4.3470603776342496e-05, "loss": 0.3423, "step": 116200 }, { "epoch": 0.65, "learning_rate": 4.3464982546080036e-05, "loss": 0.3438, "step": 116300 }, { "epoch": 0.65, "learning_rate": 4.345936131581758e-05, "loss": 0.3458, "step": 116400 }, { "epoch": 0.65, "learning_rate": 4.345374008555513e-05, "loss": 0.3398, "step": 116500 }, { "epoch": 0.66, "learning_rate": 4.344811885529267e-05, "loss": 0.3426, "step": 116600 }, { "epoch": 0.66, "learning_rate": 4.3442497625030214e-05, "loss": 0.3487, "step": 116700 }, { "epoch": 0.66, "learning_rate": 4.343687639476776e-05, "loss": 0.3398, "step": 116800 }, { "epoch": 0.66, "learning_rate": 4.343125516450531e-05, "loss": 0.3475, "step": 116900 }, { "epoch": 0.66, "learning_rate": 4.3425633934242846e-05, "loss": 0.3444, "step": 117000 }, { "epoch": 0.66, "learning_rate": 4.342001270398039e-05, "loss": 0.3456, "step": 117100 }, { "epoch": 0.66, "learning_rate": 4.341439147371794e-05, "loss": 0.3578, "step": 117200 }, { "epoch": 0.66, "learning_rate": 4.3408770243455485e-05, "loss": 0.3421, "step": 117300 }, { "epoch": 0.66, "learning_rate": 4.340314901319303e-05, "loss": 0.3474, "step": 117400 }, { "epoch": 0.66, "learning_rate": 4.339752778293058e-05, "loss": 0.3477, "step": 117500 }, { "epoch": 0.66, "learning_rate": 4.3391906552668124e-05, "loss": 0.3502, "step": 117600 }, { "epoch": 0.66, "learning_rate": 4.3386285322405663e-05, "loss": 0.3441, "step": 117700 }, { "epoch": 0.66, "learning_rate": 4.338066409214321e-05, "loss": 0.3409, "step": 117800 }, { "epoch": 0.66, "learning_rate": 4.3375042861880756e-05, "loss": 0.339, "step": 117900 }, { "epoch": 0.66, "learning_rate": 4.3369421631618295e-05, "loss": 0.3435, "step": 118000 }, { "epoch": 0.66, "learning_rate": 4.336380040135584e-05, "loss": 0.3441, "step": 118100 }, { "epoch": 0.66, "learning_rate": 4.335817917109339e-05, "loss": 0.3443, "step": 118200 }, { "epoch": 0.66, "learning_rate": 4.3352557940830934e-05, "loss": 0.345, "step": 118300 }, { "epoch": 0.67, "learning_rate": 4.3346936710568474e-05, "loss": 0.3517, "step": 118400 }, { "epoch": 0.67, "learning_rate": 4.334131548030602e-05, "loss": 0.3485, "step": 118500 }, { "epoch": 0.67, "learning_rate": 4.3335694250043566e-05, "loss": 0.3437, "step": 118600 }, { "epoch": 0.67, "learning_rate": 4.3330073019781106e-05, "loss": 0.3456, "step": 118700 }, { "epoch": 0.67, "learning_rate": 4.332445178951865e-05, "loss": 0.3412, "step": 118800 }, { "epoch": 0.67, "learning_rate": 4.33188305592562e-05, "loss": 0.3447, "step": 118900 }, { "epoch": 0.67, "learning_rate": 4.3313209328993745e-05, "loss": 0.3557, "step": 119000 }, { "epoch": 0.67, "learning_rate": 4.330758809873129e-05, "loss": 0.3389, "step": 119100 }, { "epoch": 0.67, "learning_rate": 4.330196686846884e-05, "loss": 0.3366, "step": 119200 }, { "epoch": 0.67, "learning_rate": 4.3296345638206384e-05, "loss": 0.3379, "step": 119300 }, { "epoch": 0.67, "learning_rate": 4.329072440794392e-05, "loss": 0.34, "step": 119400 }, { "epoch": 0.67, "learning_rate": 4.328510317768147e-05, "loss": 0.3384, "step": 119500 }, { "epoch": 0.67, "learning_rate": 4.3279481947419016e-05, "loss": 0.3498, "step": 119600 }, { "epoch": 0.67, "learning_rate": 4.327386071715656e-05, "loss": 0.3439, "step": 119700 }, { "epoch": 0.67, "learning_rate": 4.32682394868941e-05, "loss": 0.3456, "step": 119800 }, { "epoch": 0.67, "learning_rate": 4.326261825663165e-05, "loss": 0.3453, "step": 119900 }, { "epoch": 0.67, "learning_rate": 4.3256997026369194e-05, "loss": 0.3428, "step": 120000 }, { "epoch": 0.68, "learning_rate": 4.3251375796106733e-05, "loss": 0.342, "step": 120100 }, { "epoch": 0.68, "learning_rate": 4.324575456584428e-05, "loss": 0.3443, "step": 120200 }, { "epoch": 0.68, "learning_rate": 4.3240133335581826e-05, "loss": 0.3374, "step": 120300 }, { "epoch": 0.68, "learning_rate": 4.323451210531937e-05, "loss": 0.34, "step": 120400 }, { "epoch": 0.68, "learning_rate": 4.322889087505692e-05, "loss": 0.3435, "step": 120500 }, { "epoch": 0.68, "learning_rate": 4.3223325857097086e-05, "loss": 0.3439, "step": 120600 }, { "epoch": 0.68, "learning_rate": 4.3217760839137254e-05, "loss": 0.3405, "step": 120700 }, { "epoch": 0.68, "learning_rate": 4.32121396088748e-05, "loss": 0.3414, "step": 120800 }, { "epoch": 0.68, "learning_rate": 4.320651837861234e-05, "loss": 0.3361, "step": 120900 }, { "epoch": 0.68, "learning_rate": 4.3200897148349886e-05, "loss": 0.343, "step": 121000 }, { "epoch": 0.68, "learning_rate": 4.319527591808744e-05, "loss": 0.3488, "step": 121100 }, { "epoch": 0.68, "learning_rate": 4.3189654687824985e-05, "loss": 0.3365, "step": 121200 }, { "epoch": 0.68, "learning_rate": 4.3184033457562525e-05, "loss": 0.3493, "step": 121300 }, { "epoch": 0.68, "learning_rate": 4.317841222730007e-05, "loss": 0.3376, "step": 121400 }, { "epoch": 0.68, "learning_rate": 4.317279099703762e-05, "loss": 0.3513, "step": 121500 }, { "epoch": 0.68, "learning_rate": 4.316716976677516e-05, "loss": 0.3471, "step": 121600 }, { "epoch": 0.68, "learning_rate": 4.31615485365127e-05, "loss": 0.3441, "step": 121700 }, { "epoch": 0.68, "learning_rate": 4.315592730625025e-05, "loss": 0.3504, "step": 121800 }, { "epoch": 0.69, "learning_rate": 4.3150306075987796e-05, "loss": 0.3491, "step": 121900 }, { "epoch": 0.69, "learning_rate": 4.3144684845725335e-05, "loss": 0.3413, "step": 122000 }, { "epoch": 0.69, "learning_rate": 4.313906361546288e-05, "loss": 0.3367, "step": 122100 }, { "epoch": 0.69, "learning_rate": 4.313344238520043e-05, "loss": 0.3429, "step": 122200 }, { "epoch": 0.69, "learning_rate": 4.312782115493797e-05, "loss": 0.3401, "step": 122300 }, { "epoch": 0.69, "learning_rate": 4.3122199924675513e-05, "loss": 0.3377, "step": 122400 }, { "epoch": 0.69, "learning_rate": 4.311657869441306e-05, "loss": 0.3321, "step": 122500 }, { "epoch": 0.69, "learning_rate": 4.3110957464150606e-05, "loss": 0.3469, "step": 122600 }, { "epoch": 0.69, "learning_rate": 4.310533623388815e-05, "loss": 0.348, "step": 122700 }, { "epoch": 0.69, "learning_rate": 4.30997150036257e-05, "loss": 0.3361, "step": 122800 }, { "epoch": 0.69, "learning_rate": 4.3094093773363245e-05, "loss": 0.342, "step": 122900 }, { "epoch": 0.69, "learning_rate": 4.3088472543100784e-05, "loss": 0.3407, "step": 123000 }, { "epoch": 0.69, "learning_rate": 4.308285131283833e-05, "loss": 0.3421, "step": 123100 }, { "epoch": 0.69, "learning_rate": 4.307723008257588e-05, "loss": 0.3407, "step": 123200 }, { "epoch": 0.69, "learning_rate": 4.307160885231342e-05, "loss": 0.3419, "step": 123300 }, { "epoch": 0.69, "learning_rate": 4.306598762205096e-05, "loss": 0.3477, "step": 123400 }, { "epoch": 0.69, "learning_rate": 4.306042260409113e-05, "loss": 0.3413, "step": 123500 }, { "epoch": 0.69, "learning_rate": 4.3054801373828677e-05, "loss": 0.3489, "step": 123600 }, { "epoch": 0.7, "learning_rate": 4.304918014356622e-05, "loss": 0.3377, "step": 123700 }, { "epoch": 0.7, "learning_rate": 4.304355891330377e-05, "loss": 0.3399, "step": 123800 }, { "epoch": 0.7, "learning_rate": 4.3037937683041315e-05, "loss": 0.3462, "step": 123900 }, { "epoch": 0.7, "learning_rate": 4.303231645277886e-05, "loss": 0.3372, "step": 124000 }, { "epoch": 0.7, "learning_rate": 4.30266952225164e-05, "loss": 0.3437, "step": 124100 }, { "epoch": 0.7, "learning_rate": 4.302107399225395e-05, "loss": 0.3354, "step": 124200 }, { "epoch": 0.7, "learning_rate": 4.3015452761991494e-05, "loss": 0.3396, "step": 124300 }, { "epoch": 0.7, "learning_rate": 4.300983153172904e-05, "loss": 0.3395, "step": 124400 }, { "epoch": 0.7, "learning_rate": 4.30042665137692e-05, "loss": 0.3368, "step": 124500 }, { "epoch": 0.7, "learning_rate": 4.299864528350675e-05, "loss": 0.3395, "step": 124600 }, { "epoch": 0.7, "learning_rate": 4.299302405324429e-05, "loss": 0.3379, "step": 124700 }, { "epoch": 0.7, "learning_rate": 4.298740282298184e-05, "loss": 0.3436, "step": 124800 }, { "epoch": 0.7, "learning_rate": 4.2981781592719386e-05, "loss": 0.3424, "step": 124900 }, { "epoch": 0.7, "learning_rate": 4.297616036245693e-05, "loss": 0.3456, "step": 125000 }, { "epoch": 0.7, "learning_rate": 4.297053913219448e-05, "loss": 0.3356, "step": 125100 }, { "epoch": 0.7, "learning_rate": 4.296491790193202e-05, "loss": 0.3398, "step": 125200 }, { "epoch": 0.7, "learning_rate": 4.2959296671669564e-05, "loss": 0.3482, "step": 125300 }, { "epoch": 0.7, "learning_rate": 4.295367544140711e-05, "loss": 0.3391, "step": 125400 }, { "epoch": 0.71, "learning_rate": 4.294805421114465e-05, "loss": 0.3461, "step": 125500 }, { "epoch": 0.71, "learning_rate": 4.2942432980882196e-05, "loss": 0.3406, "step": 125600 }, { "epoch": 0.71, "learning_rate": 4.293681175061974e-05, "loss": 0.34, "step": 125700 }, { "epoch": 0.71, "learning_rate": 4.293119052035729e-05, "loss": 0.3382, "step": 125800 }, { "epoch": 0.71, "learning_rate": 4.292556929009483e-05, "loss": 0.3326, "step": 125900 }, { "epoch": 0.71, "learning_rate": 4.2919948059832375e-05, "loss": 0.3215, "step": 126000 }, { "epoch": 0.71, "learning_rate": 4.291432682956992e-05, "loss": 0.3347, "step": 126100 }, { "epoch": 0.71, "learning_rate": 4.290870559930746e-05, "loss": 0.3429, "step": 126200 }, { "epoch": 0.71, "learning_rate": 4.2903084369045013e-05, "loss": 0.3298, "step": 126300 }, { "epoch": 0.71, "learning_rate": 4.289746313878256e-05, "loss": 0.3425, "step": 126400 }, { "epoch": 0.71, "learning_rate": 4.2891841908520106e-05, "loss": 0.3315, "step": 126500 }, { "epoch": 0.71, "learning_rate": 4.2886220678257646e-05, "loss": 0.3422, "step": 126600 }, { "epoch": 0.71, "learning_rate": 4.288059944799519e-05, "loss": 0.3332, "step": 126700 }, { "epoch": 0.71, "learning_rate": 4.287497821773274e-05, "loss": 0.3401, "step": 126800 }, { "epoch": 0.71, "learning_rate": 4.286935698747028e-05, "loss": 0.331, "step": 126900 }, { "epoch": 0.71, "learning_rate": 4.2863735757207824e-05, "loss": 0.34, "step": 127000 }, { "epoch": 0.71, "learning_rate": 4.285811452694537e-05, "loss": 0.3364, "step": 127100 }, { "epoch": 0.72, "learning_rate": 4.2852493296682916e-05, "loss": 0.3343, "step": 127200 }, { "epoch": 0.72, "learning_rate": 4.2846872066420456e-05, "loss": 0.3312, "step": 127300 }, { "epoch": 0.72, "learning_rate": 4.2841250836158e-05, "loss": 0.3395, "step": 127400 }, { "epoch": 0.72, "learning_rate": 4.283562960589555e-05, "loss": 0.3333, "step": 127500 }, { "epoch": 0.72, "learning_rate": 4.283000837563309e-05, "loss": 0.3443, "step": 127600 }, { "epoch": 0.72, "learning_rate": 4.2824387145370634e-05, "loss": 0.3233, "step": 127700 }, { "epoch": 0.72, "learning_rate": 4.281876591510818e-05, "loss": 0.3436, "step": 127800 }, { "epoch": 0.72, "learning_rate": 4.281314468484573e-05, "loss": 0.3394, "step": 127900 }, { "epoch": 0.72, "learning_rate": 4.2807579666885894e-05, "loss": 0.3298, "step": 128000 }, { "epoch": 0.72, "learning_rate": 4.280195843662344e-05, "loss": 0.3325, "step": 128100 }, { "epoch": 0.72, "learning_rate": 4.279633720636099e-05, "loss": 0.3477, "step": 128200 }, { "epoch": 0.72, "learning_rate": 4.279071597609853e-05, "loss": 0.3452, "step": 128300 }, { "epoch": 0.72, "learning_rate": 4.278509474583607e-05, "loss": 0.3423, "step": 128400 }, { "epoch": 0.72, "learning_rate": 4.277947351557362e-05, "loss": 0.3298, "step": 128500 }, { "epoch": 0.72, "learning_rate": 4.2773852285311165e-05, "loss": 0.3338, "step": 128600 }, { "epoch": 0.72, "learning_rate": 4.2768231055048705e-05, "loss": 0.3289, "step": 128700 }, { "epoch": 0.72, "learning_rate": 4.276260982478625e-05, "loss": 0.3326, "step": 128800 }, { "epoch": 0.72, "learning_rate": 4.27569885945238e-05, "loss": 0.3273, "step": 128900 }, { "epoch": 0.73, "learning_rate": 4.2751367364261344e-05, "loss": 0.3452, "step": 129000 }, { "epoch": 0.73, "learning_rate": 4.274574613399889e-05, "loss": 0.331, "step": 129100 }, { "epoch": 0.73, "learning_rate": 4.2740124903736436e-05, "loss": 0.3276, "step": 129200 }, { "epoch": 0.73, "learning_rate": 4.273450367347398e-05, "loss": 0.3299, "step": 129300 }, { "epoch": 0.73, "learning_rate": 4.272888244321152e-05, "loss": 0.3374, "step": 129400 }, { "epoch": 0.73, "learning_rate": 4.272326121294907e-05, "loss": 0.3364, "step": 129500 }, { "epoch": 0.73, "learning_rate": 4.2717639982686615e-05, "loss": 0.3362, "step": 129600 }, { "epoch": 0.73, "learning_rate": 4.271201875242416e-05, "loss": 0.3289, "step": 129700 }, { "epoch": 0.73, "learning_rate": 4.27063975221617e-05, "loss": 0.338, "step": 129800 }, { "epoch": 0.73, "learning_rate": 4.2700776291899247e-05, "loss": 0.3362, "step": 129900 }, { "epoch": 0.73, "learning_rate": 4.269515506163679e-05, "loss": 0.3308, "step": 130000 }, { "epoch": 0.73, "learning_rate": 4.268953383137433e-05, "loss": 0.3345, "step": 130100 }, { "epoch": 0.73, "learning_rate": 4.268391260111188e-05, "loss": 0.3348, "step": 130200 }, { "epoch": 0.73, "learning_rate": 4.2678291370849425e-05, "loss": 0.3312, "step": 130300 }, { "epoch": 0.73, "learning_rate": 4.267267014058697e-05, "loss": 0.3332, "step": 130400 }, { "epoch": 0.73, "learning_rate": 4.266704891032452e-05, "loss": 0.3343, "step": 130500 }, { "epoch": 0.73, "learning_rate": 4.2661427680062064e-05, "loss": 0.3245, "step": 130600 }, { "epoch": 0.73, "learning_rate": 4.265580644979961e-05, "loss": 0.3295, "step": 130700 }, { "epoch": 0.74, "learning_rate": 4.265018521953715e-05, "loss": 0.3364, "step": 130800 }, { "epoch": 0.74, "learning_rate": 4.2644563989274696e-05, "loss": 0.3302, "step": 130900 }, { "epoch": 0.74, "learning_rate": 4.263894275901224e-05, "loss": 0.3276, "step": 131000 }, { "epoch": 0.74, "learning_rate": 4.263332152874979e-05, "loss": 0.3304, "step": 131100 }, { "epoch": 0.74, "learning_rate": 4.262781272309258e-05, "loss": 0.3227, "step": 131200 }, { "epoch": 0.74, "learning_rate": 4.2622247705132745e-05, "loss": 0.3405, "step": 131300 }, { "epoch": 0.74, "learning_rate": 4.261662647487029e-05, "loss": 0.3311, "step": 131400 }, { "epoch": 0.74, "learning_rate": 4.261100524460784e-05, "loss": 0.3346, "step": 131500 }, { "epoch": 0.74, "learning_rate": 4.2605384014345384e-05, "loss": 0.3286, "step": 131600 }, { "epoch": 0.74, "learning_rate": 4.259976278408292e-05, "loss": 0.3327, "step": 131700 }, { "epoch": 0.74, "learning_rate": 4.259414155382047e-05, "loss": 0.3367, "step": 131800 }, { "epoch": 0.74, "learning_rate": 4.2588520323558016e-05, "loss": 0.3284, "step": 131900 }, { "epoch": 0.74, "learning_rate": 4.2582899093295555e-05, "loss": 0.3297, "step": 132000 }, { "epoch": 0.74, "learning_rate": 4.25772778630331e-05, "loss": 0.331, "step": 132100 }, { "epoch": 0.74, "learning_rate": 4.257165663277065e-05, "loss": 0.3278, "step": 132200 }, { "epoch": 0.74, "learning_rate": 4.2566035402508194e-05, "loss": 0.3317, "step": 132300 }, { "epoch": 0.74, "learning_rate": 4.256041417224574e-05, "loss": 0.3338, "step": 132400 }, { "epoch": 0.74, "learning_rate": 4.255479294198329e-05, "loss": 0.3362, "step": 132500 }, { "epoch": 0.75, "learning_rate": 4.254917171172083e-05, "loss": 0.33, "step": 132600 }, { "epoch": 0.75, "learning_rate": 4.254355048145837e-05, "loss": 0.3237, "step": 132700 }, { "epoch": 0.75, "learning_rate": 4.253792925119592e-05, "loss": 0.3394, "step": 132800 }, { "epoch": 0.75, "learning_rate": 4.2532308020933465e-05, "loss": 0.3332, "step": 132900 }, { "epoch": 0.75, "learning_rate": 4.252668679067101e-05, "loss": 0.3209, "step": 133000 }, { "epoch": 0.75, "learning_rate": 4.252106556040855e-05, "loss": 0.3358, "step": 133100 }, { "epoch": 0.75, "learning_rate": 4.25154443301461e-05, "loss": 0.3326, "step": 133200 }, { "epoch": 0.75, "learning_rate": 4.2509823099883643e-05, "loss": 0.3217, "step": 133300 }, { "epoch": 0.75, "learning_rate": 4.250420186962118e-05, "loss": 0.3323, "step": 133400 }, { "epoch": 0.75, "learning_rate": 4.249858063935873e-05, "loss": 0.3241, "step": 133500 }, { "epoch": 0.75, "learning_rate": 4.2492959409096275e-05, "loss": 0.3352, "step": 133600 }, { "epoch": 0.75, "learning_rate": 4.248733817883382e-05, "loss": 0.3366, "step": 133700 }, { "epoch": 0.75, "learning_rate": 4.248171694857137e-05, "loss": 0.3395, "step": 133800 }, { "epoch": 0.75, "learning_rate": 4.2476151930611536e-05, "loss": 0.3299, "step": 133900 }, { "epoch": 0.75, "learning_rate": 4.247053070034908e-05, "loss": 0.3262, "step": 134000 }, { "epoch": 0.75, "learning_rate": 4.246490947008663e-05, "loss": 0.335, "step": 134100 }, { "epoch": 0.75, "learning_rate": 4.245928823982417e-05, "loss": 0.3356, "step": 134200 }, { "epoch": 0.75, "learning_rate": 4.2453667009561714e-05, "loss": 0.3264, "step": 134300 }, { "epoch": 0.76, "learning_rate": 4.244804577929926e-05, "loss": 0.3208, "step": 134400 }, { "epoch": 0.76, "learning_rate": 4.24424245490368e-05, "loss": 0.324, "step": 134500 }, { "epoch": 0.76, "learning_rate": 4.2436803318774346e-05, "loss": 0.3382, "step": 134600 }, { "epoch": 0.76, "learning_rate": 4.243118208851189e-05, "loss": 0.3339, "step": 134700 }, { "epoch": 0.76, "learning_rate": 4.242556085824944e-05, "loss": 0.3336, "step": 134800 }, { "epoch": 0.76, "learning_rate": 4.2419939627986985e-05, "loss": 0.3287, "step": 134900 }, { "epoch": 0.76, "learning_rate": 4.241431839772453e-05, "loss": 0.333, "step": 135000 }, { "epoch": 0.76, "learning_rate": 4.240869716746208e-05, "loss": 0.324, "step": 135100 }, { "epoch": 0.76, "learning_rate": 4.240307593719962e-05, "loss": 0.3289, "step": 135200 }, { "epoch": 0.76, "learning_rate": 4.239745470693716e-05, "loss": 0.327, "step": 135300 }, { "epoch": 0.76, "learning_rate": 4.239183347667471e-05, "loss": 0.3261, "step": 135400 }, { "epoch": 0.76, "learning_rate": 4.238621224641225e-05, "loss": 0.3213, "step": 135500 }, { "epoch": 0.76, "learning_rate": 4.2380591016149795e-05, "loss": 0.3367, "step": 135600 }, { "epoch": 0.76, "learning_rate": 4.237496978588734e-05, "loss": 0.3272, "step": 135700 }, { "epoch": 0.76, "learning_rate": 4.236934855562489e-05, "loss": 0.331, "step": 135800 }, { "epoch": 0.76, "learning_rate": 4.236372732536243e-05, "loss": 0.3278, "step": 135900 }, { "epoch": 0.76, "learning_rate": 4.2358106095099974e-05, "loss": 0.3227, "step": 136000 }, { "epoch": 0.77, "learning_rate": 4.235248486483752e-05, "loss": 0.3291, "step": 136100 }, { "epoch": 0.77, "learning_rate": 4.234686363457506e-05, "loss": 0.3321, "step": 136200 }, { "epoch": 0.77, "learning_rate": 4.2341242404312606e-05, "loss": 0.3211, "step": 136300 }, { "epoch": 0.77, "learning_rate": 4.233562117405015e-05, "loss": 0.3348, "step": 136400 }, { "epoch": 0.77, "learning_rate": 4.23299999437877e-05, "loss": 0.328, "step": 136500 }, { "epoch": 0.77, "learning_rate": 4.2324378713525244e-05, "loss": 0.3288, "step": 136600 }, { "epoch": 0.77, "learning_rate": 4.231875748326279e-05, "loss": 0.3227, "step": 136700 }, { "epoch": 0.77, "learning_rate": 4.231313625300034e-05, "loss": 0.3297, "step": 136800 }, { "epoch": 0.77, "learning_rate": 4.2307515022737877e-05, "loss": 0.3348, "step": 136900 }, { "epoch": 0.77, "learning_rate": 4.230189379247542e-05, "loss": 0.3298, "step": 137000 }, { "epoch": 0.77, "learning_rate": 4.229627256221297e-05, "loss": 0.331, "step": 137100 }, { "epoch": 0.77, "learning_rate": 4.2290651331950515e-05, "loss": 0.3297, "step": 137200 }, { "epoch": 0.77, "learning_rate": 4.2285030101688055e-05, "loss": 0.3338, "step": 137300 }, { "epoch": 0.77, "learning_rate": 4.22794088714256e-05, "loss": 0.322, "step": 137400 }, { "epoch": 0.77, "learning_rate": 4.227378764116315e-05, "loss": 0.319, "step": 137500 }, { "epoch": 0.77, "learning_rate": 4.226816641090069e-05, "loss": 0.3226, "step": 137600 }, { "epoch": 0.77, "learning_rate": 4.226254518063823e-05, "loss": 0.3256, "step": 137700 }, { "epoch": 0.77, "learning_rate": 4.225692395037578e-05, "loss": 0.3251, "step": 137800 }, { "epoch": 0.78, "learning_rate": 4.2251302720113326e-05, "loss": 0.3249, "step": 137900 }, { "epoch": 0.78, "learning_rate": 4.224568148985087e-05, "loss": 0.327, "step": 138000 }, { "epoch": 0.78, "learning_rate": 4.224006025958842e-05, "loss": 0.3255, "step": 138100 }, { "epoch": 0.78, "learning_rate": 4.2234495241628586e-05, "loss": 0.3156, "step": 138200 }, { "epoch": 0.78, "learning_rate": 4.222887401136613e-05, "loss": 0.3351, "step": 138300 }, { "epoch": 0.78, "learning_rate": 4.222325278110367e-05, "loss": 0.3214, "step": 138400 }, { "epoch": 0.78, "learning_rate": 4.221763155084122e-05, "loss": 0.3266, "step": 138500 }, { "epoch": 0.78, "learning_rate": 4.2212010320578764e-05, "loss": 0.3271, "step": 138600 }, { "epoch": 0.78, "learning_rate": 4.2206389090316304e-05, "loss": 0.323, "step": 138700 }, { "epoch": 0.78, "learning_rate": 4.220076786005385e-05, "loss": 0.3318, "step": 138800 }, { "epoch": 0.78, "learning_rate": 4.2195146629791396e-05, "loss": 0.3335, "step": 138900 }, { "epoch": 0.78, "learning_rate": 4.218952539952894e-05, "loss": 0.3266, "step": 139000 }, { "epoch": 0.78, "learning_rate": 4.218390416926649e-05, "loss": 0.3273, "step": 139100 }, { "epoch": 0.78, "learning_rate": 4.2178282939004035e-05, "loss": 0.3272, "step": 139200 }, { "epoch": 0.78, "learning_rate": 4.217266170874158e-05, "loss": 0.3309, "step": 139300 }, { "epoch": 0.78, "learning_rate": 4.216704047847912e-05, "loss": 0.3185, "step": 139400 }, { "epoch": 0.78, "learning_rate": 4.216141924821667e-05, "loss": 0.325, "step": 139500 }, { "epoch": 0.78, "learning_rate": 4.2155798017954213e-05, "loss": 0.3273, "step": 139600 }, { "epoch": 0.79, "learning_rate": 4.215017678769176e-05, "loss": 0.3236, "step": 139700 }, { "epoch": 0.79, "learning_rate": 4.21445555574293e-05, "loss": 0.3197, "step": 139800 }, { "epoch": 0.79, "learning_rate": 4.2138934327166846e-05, "loss": 0.3213, "step": 139900 }, { "epoch": 0.79, "learning_rate": 4.213331309690439e-05, "loss": 0.3273, "step": 140000 }, { "epoch": 0.79, "learning_rate": 4.212769186664193e-05, "loss": 0.326, "step": 140100 }, { "epoch": 0.79, "learning_rate": 4.212207063637948e-05, "loss": 0.3236, "step": 140200 }, { "epoch": 0.79, "learning_rate": 4.2116449406117024e-05, "loss": 0.3254, "step": 140300 }, { "epoch": 0.79, "learning_rate": 4.211082817585457e-05, "loss": 0.3272, "step": 140400 }, { "epoch": 0.79, "learning_rate": 4.210520694559211e-05, "loss": 0.3215, "step": 140500 }, { "epoch": 0.79, "learning_rate": 4.2099585715329656e-05, "loss": 0.3243, "step": 140600 }, { "epoch": 0.79, "learning_rate": 4.20939644850672e-05, "loss": 0.3252, "step": 140700 }, { "epoch": 0.79, "learning_rate": 4.208834325480475e-05, "loss": 0.3256, "step": 140800 }, { "epoch": 0.79, "learning_rate": 4.2082722024542295e-05, "loss": 0.3226, "step": 140900 }, { "epoch": 0.79, "learning_rate": 4.207710079427984e-05, "loss": 0.3273, "step": 141000 }, { "epoch": 0.79, "learning_rate": 4.207147956401739e-05, "loss": 0.3333, "step": 141100 }, { "epoch": 0.79, "learning_rate": 4.206585833375493e-05, "loss": 0.3321, "step": 141200 }, { "epoch": 0.79, "learning_rate": 4.206023710349247e-05, "loss": 0.3284, "step": 141300 }, { "epoch": 0.79, "learning_rate": 4.205461587323002e-05, "loss": 0.3278, "step": 141400 }, { "epoch": 0.8, "learning_rate": 4.204899464296756e-05, "loss": 0.3261, "step": 141500 }, { "epoch": 0.8, "learning_rate": 4.2043373412705105e-05, "loss": 0.3258, "step": 141600 }, { "epoch": 0.8, "learning_rate": 4.203775218244265e-05, "loss": 0.3258, "step": 141700 }, { "epoch": 0.8, "learning_rate": 4.20321309521802e-05, "loss": 0.3271, "step": 141800 }, { "epoch": 0.8, "learning_rate": 4.202650972191774e-05, "loss": 0.3261, "step": 141900 }, { "epoch": 0.8, "learning_rate": 4.2020888491655283e-05, "loss": 0.323, "step": 142000 }, { "epoch": 0.8, "learning_rate": 4.201526726139283e-05, "loss": 0.3207, "step": 142100 }, { "epoch": 0.8, "learning_rate": 4.2009646031130376e-05, "loss": 0.3253, "step": 142200 }, { "epoch": 0.8, "learning_rate": 4.200402480086792e-05, "loss": 0.3261, "step": 142300 }, { "epoch": 0.8, "learning_rate": 4.199840357060547e-05, "loss": 0.3185, "step": 142400 }, { "epoch": 0.8, "learning_rate": 4.1992782340343015e-05, "loss": 0.3198, "step": 142500 }, { "epoch": 0.8, "learning_rate": 4.1987161110080554e-05, "loss": 0.319, "step": 142600 }, { "epoch": 0.8, "learning_rate": 4.19815398798181e-05, "loss": 0.3204, "step": 142700 }, { "epoch": 0.8, "learning_rate": 4.197591864955565e-05, "loss": 0.3199, "step": 142800 }, { "epoch": 0.8, "learning_rate": 4.1970297419293186e-05, "loss": 0.3203, "step": 142900 }, { "epoch": 0.8, "learning_rate": 4.196467618903073e-05, "loss": 0.3241, "step": 143000 }, { "epoch": 0.8, "learning_rate": 4.195905495876828e-05, "loss": 0.3238, "step": 143100 }, { "epoch": 0.8, "learning_rate": 4.1953433728505825e-05, "loss": 0.3321, "step": 143200 }, { "epoch": 0.81, "learning_rate": 4.1947812498243365e-05, "loss": 0.3206, "step": 143300 }, { "epoch": 0.81, "learning_rate": 4.194219126798091e-05, "loss": 0.3262, "step": 143400 }, { "epoch": 0.81, "learning_rate": 4.193657003771846e-05, "loss": 0.3166, "step": 143500 }, { "epoch": 0.81, "learning_rate": 4.1930948807456e-05, "loss": 0.3251, "step": 143600 }, { "epoch": 0.81, "learning_rate": 4.192532757719354e-05, "loss": 0.3242, "step": 143700 }, { "epoch": 0.81, "learning_rate": 4.191970634693109e-05, "loss": 0.3181, "step": 143800 }, { "epoch": 0.81, "learning_rate": 4.1914085116668636e-05, "loss": 0.333, "step": 143900 }, { "epoch": 0.81, "learning_rate": 4.190846388640618e-05, "loss": 0.3202, "step": 144000 }, { "epoch": 0.81, "learning_rate": 4.190284265614373e-05, "loss": 0.3258, "step": 144100 }, { "epoch": 0.81, "learning_rate": 4.1897221425881274e-05, "loss": 0.3206, "step": 144200 }, { "epoch": 0.81, "learning_rate": 4.1891600195618814e-05, "loss": 0.3171, "step": 144300 }, { "epoch": 0.81, "learning_rate": 4.188597896535636e-05, "loss": 0.3269, "step": 144400 }, { "epoch": 0.81, "learning_rate": 4.1880357735093907e-05, "loss": 0.3154, "step": 144500 }, { "epoch": 0.81, "learning_rate": 4.187473650483145e-05, "loss": 0.3145, "step": 144600 }, { "epoch": 0.81, "learning_rate": 4.1869171486871614e-05, "loss": 0.324, "step": 144700 }, { "epoch": 0.81, "learning_rate": 4.186355025660916e-05, "loss": 0.3212, "step": 144800 }, { "epoch": 0.81, "learning_rate": 4.1857929026346706e-05, "loss": 0.317, "step": 144900 }, { "epoch": 0.82, "learning_rate": 4.185230779608425e-05, "loss": 0.3184, "step": 145000 }, { "epoch": 0.82, "learning_rate": 4.18466865658218e-05, "loss": 0.3268, "step": 145100 }, { "epoch": 0.82, "learning_rate": 4.1841065335559345e-05, "loss": 0.3273, "step": 145200 }, { "epoch": 0.82, "learning_rate": 4.183544410529689e-05, "loss": 0.3234, "step": 145300 }, { "epoch": 0.82, "learning_rate": 4.182987908733705e-05, "loss": 0.3202, "step": 145400 }, { "epoch": 0.82, "learning_rate": 4.18242578570746e-05, "loss": 0.3237, "step": 145500 }, { "epoch": 0.82, "learning_rate": 4.1818636626812145e-05, "loss": 0.3203, "step": 145600 }, { "epoch": 0.82, "learning_rate": 4.181301539654969e-05, "loss": 0.3145, "step": 145700 }, { "epoch": 0.82, "learning_rate": 4.180739416628723e-05, "loss": 0.3236, "step": 145800 }, { "epoch": 0.82, "learning_rate": 4.180177293602478e-05, "loss": 0.3183, "step": 145900 }, { "epoch": 0.82, "learning_rate": 4.179615170576232e-05, "loss": 0.3176, "step": 146000 }, { "epoch": 0.82, "learning_rate": 4.179053047549987e-05, "loss": 0.3248, "step": 146100 }, { "epoch": 0.82, "learning_rate": 4.1784909245237416e-05, "loss": 0.3201, "step": 146200 }, { "epoch": 0.82, "learning_rate": 4.177928801497496e-05, "loss": 0.3197, "step": 146300 }, { "epoch": 0.82, "learning_rate": 4.177366678471251e-05, "loss": 0.3284, "step": 146400 }, { "epoch": 0.82, "learning_rate": 4.176804555445005e-05, "loss": 0.3115, "step": 146500 }, { "epoch": 0.82, "learning_rate": 4.1762424324187594e-05, "loss": 0.3218, "step": 146600 }, { "epoch": 0.82, "learning_rate": 4.175680309392514e-05, "loss": 0.3137, "step": 146700 }, { "epoch": 0.83, "learning_rate": 4.175118186366268e-05, "loss": 0.3255, "step": 146800 }, { "epoch": 0.83, "learning_rate": 4.1745560633400226e-05, "loss": 0.3236, "step": 146900 }, { "epoch": 0.83, "learning_rate": 4.173993940313777e-05, "loss": 0.3162, "step": 147000 }, { "epoch": 0.83, "learning_rate": 4.173431817287532e-05, "loss": 0.3225, "step": 147100 }, { "epoch": 0.83, "learning_rate": 4.172869694261286e-05, "loss": 0.3126, "step": 147200 }, { "epoch": 0.83, "learning_rate": 4.1723075712350404e-05, "loss": 0.3202, "step": 147300 }, { "epoch": 0.83, "learning_rate": 4.171745448208795e-05, "loss": 0.3198, "step": 147400 }, { "epoch": 0.83, "learning_rate": 4.17118332518255e-05, "loss": 0.3205, "step": 147500 }, { "epoch": 0.83, "learning_rate": 4.170621202156304e-05, "loss": 0.3131, "step": 147600 }, { "epoch": 0.83, "learning_rate": 4.170059079130059e-05, "loss": 0.3103, "step": 147700 }, { "epoch": 0.83, "learning_rate": 4.1694969561038136e-05, "loss": 0.3165, "step": 147800 }, { "epoch": 0.83, "learning_rate": 4.1689348330775675e-05, "loss": 0.3117, "step": 147900 }, { "epoch": 0.83, "learning_rate": 4.168372710051322e-05, "loss": 0.3179, "step": 148000 }, { "epoch": 0.83, "learning_rate": 4.167810587025077e-05, "loss": 0.3159, "step": 148100 }, { "epoch": 0.83, "learning_rate": 4.167248463998831e-05, "loss": 0.3249, "step": 148200 }, { "epoch": 0.83, "learning_rate": 4.1666863409725854e-05, "loss": 0.309, "step": 148300 }, { "epoch": 0.83, "learning_rate": 4.16612421794634e-05, "loss": 0.3195, "step": 148400 }, { "epoch": 0.83, "learning_rate": 4.1655620949200946e-05, "loss": 0.3216, "step": 148500 }, { "epoch": 0.84, "learning_rate": 4.1649999718938486e-05, "loss": 0.3126, "step": 148600 }, { "epoch": 0.84, "learning_rate": 4.164437848867603e-05, "loss": 0.3118, "step": 148700 }, { "epoch": 0.84, "learning_rate": 4.1638813470716206e-05, "loss": 0.32, "step": 148800 }, { "epoch": 0.84, "learning_rate": 4.163319224045375e-05, "loss": 0.3136, "step": 148900 }, { "epoch": 0.84, "learning_rate": 4.162757101019129e-05, "loss": 0.3166, "step": 149000 }, { "epoch": 0.84, "learning_rate": 4.162194977992884e-05, "loss": 0.3172, "step": 149100 }, { "epoch": 0.84, "learning_rate": 4.1616328549666385e-05, "loss": 0.3067, "step": 149200 }, { "epoch": 0.84, "learning_rate": 4.1610707319403924e-05, "loss": 0.3125, "step": 149300 }, { "epoch": 0.84, "learning_rate": 4.160508608914147e-05, "loss": 0.3172, "step": 149400 }, { "epoch": 0.84, "learning_rate": 4.159946485887902e-05, "loss": 0.3221, "step": 149500 }, { "epoch": 0.84, "learning_rate": 4.159384362861656e-05, "loss": 0.3164, "step": 149600 }, { "epoch": 0.84, "learning_rate": 4.15882223983541e-05, "loss": 0.3138, "step": 149700 }, { "epoch": 0.84, "learning_rate": 4.158260116809165e-05, "loss": 0.3164, "step": 149800 }, { "epoch": 0.84, "learning_rate": 4.1576979937829195e-05, "loss": 0.3113, "step": 149900 }, { "epoch": 0.84, "learning_rate": 4.1571358707566735e-05, "loss": 0.3116, "step": 150000 }, { "epoch": 0.84, "learning_rate": 4.156573747730428e-05, "loss": 0.3182, "step": 150100 }, { "epoch": 0.84, "learning_rate": 4.156011624704183e-05, "loss": 0.3136, "step": 150200 }, { "epoch": 0.84, "learning_rate": 4.155449501677937e-05, "loss": 0.3072, "step": 150300 }, { "epoch": 0.85, "learning_rate": 4.154887378651692e-05, "loss": 0.311, "step": 150400 }, { "epoch": 0.85, "learning_rate": 4.1543252556254466e-05, "loss": 0.3104, "step": 150500 }, { "epoch": 0.85, "learning_rate": 4.153763132599201e-05, "loss": 0.3065, "step": 150600 }, { "epoch": 0.85, "learning_rate": 4.153201009572955e-05, "loss": 0.3189, "step": 150700 }, { "epoch": 0.85, "learning_rate": 4.15263888654671e-05, "loss": 0.3161, "step": 150800 }, { "epoch": 0.85, "learning_rate": 4.1520767635204644e-05, "loss": 0.3194, "step": 150900 }, { "epoch": 0.85, "learning_rate": 4.151514640494219e-05, "loss": 0.3179, "step": 151000 }, { "epoch": 0.85, "learning_rate": 4.150952517467973e-05, "loss": 0.3203, "step": 151100 }, { "epoch": 0.85, "learning_rate": 4.1503903944417276e-05, "loss": 0.3189, "step": 151200 }, { "epoch": 0.85, "learning_rate": 4.149828271415482e-05, "loss": 0.3156, "step": 151300 }, { "epoch": 0.85, "learning_rate": 4.149266148389236e-05, "loss": 0.3114, "step": 151400 }, { "epoch": 0.85, "learning_rate": 4.148704025362991e-05, "loss": 0.3108, "step": 151500 }, { "epoch": 0.85, "learning_rate": 4.1481419023367455e-05, "loss": 0.3097, "step": 151600 }, { "epoch": 0.85, "learning_rate": 4.1475797793105e-05, "loss": 0.324, "step": 151700 }, { "epoch": 0.85, "learning_rate": 4.147017656284255e-05, "loss": 0.3139, "step": 151800 }, { "epoch": 0.85, "learning_rate": 4.1464555332580093e-05, "loss": 0.3124, "step": 151900 }, { "epoch": 0.85, "learning_rate": 4.145893410231764e-05, "loss": 0.318, "step": 152000 }, { "epoch": 0.85, "learning_rate": 4.145331287205518e-05, "loss": 0.3108, "step": 152100 }, { "epoch": 0.86, "learning_rate": 4.1447691641792726e-05, "loss": 0.319, "step": 152200 }, { "epoch": 0.86, "learning_rate": 4.144207041153027e-05, "loss": 0.3191, "step": 152300 }, { "epoch": 0.86, "learning_rate": 4.143644918126782e-05, "loss": 0.3115, "step": 152400 }, { "epoch": 0.86, "learning_rate": 4.143082795100536e-05, "loss": 0.3116, "step": 152500 }, { "epoch": 0.86, "learning_rate": 4.1425206720742904e-05, "loss": 0.3197, "step": 152600 }, { "epoch": 0.86, "learning_rate": 4.141958549048045e-05, "loss": 0.3138, "step": 152700 }, { "epoch": 0.86, "learning_rate": 4.141396426021799e-05, "loss": 0.3171, "step": 152800 }, { "epoch": 0.86, "learning_rate": 4.1408343029955536e-05, "loss": 0.3194, "step": 152900 }, { "epoch": 0.86, "learning_rate": 4.140272179969308e-05, "loss": 0.3109, "step": 153000 }, { "epoch": 0.86, "learning_rate": 4.139710056943063e-05, "loss": 0.3133, "step": 153100 }, { "epoch": 0.86, "learning_rate": 4.139147933916817e-05, "loss": 0.3119, "step": 153200 }, { "epoch": 0.86, "learning_rate": 4.1385858108905714e-05, "loss": 0.3038, "step": 153300 }, { "epoch": 0.86, "learning_rate": 4.138023687864326e-05, "loss": 0.3085, "step": 153400 }, { "epoch": 0.86, "learning_rate": 4.137461564838081e-05, "loss": 0.3147, "step": 153500 }, { "epoch": 0.86, "learning_rate": 4.136899441811835e-05, "loss": 0.3172, "step": 153600 }, { "epoch": 0.86, "learning_rate": 4.13633731878559e-05, "loss": 0.3064, "step": 153700 }, { "epoch": 0.86, "learning_rate": 4.1357751957593446e-05, "loss": 0.3089, "step": 153800 }, { "epoch": 0.87, "learning_rate": 4.1352186939633606e-05, "loss": 0.3108, "step": 153900 }, { "epoch": 0.87, "learning_rate": 4.134656570937115e-05, "loss": 0.3099, "step": 154000 }, { "epoch": 0.87, "learning_rate": 4.13409444791087e-05, "loss": 0.3197, "step": 154100 }, { "epoch": 0.87, "learning_rate": 4.1335323248846245e-05, "loss": 0.3101, "step": 154200 }, { "epoch": 0.87, "learning_rate": 4.1329702018583785e-05, "loss": 0.316, "step": 154300 }, { "epoch": 0.87, "learning_rate": 4.132408078832133e-05, "loss": 0.3225, "step": 154400 }, { "epoch": 0.87, "learning_rate": 4.131845955805888e-05, "loss": 0.3074, "step": 154500 }, { "epoch": 0.87, "learning_rate": 4.1312838327796424e-05, "loss": 0.3193, "step": 154600 }, { "epoch": 0.87, "learning_rate": 4.130721709753397e-05, "loss": 0.3122, "step": 154700 }, { "epoch": 0.87, "learning_rate": 4.1301595867271516e-05, "loss": 0.3126, "step": 154800 }, { "epoch": 0.87, "learning_rate": 4.129597463700906e-05, "loss": 0.3118, "step": 154900 }, { "epoch": 0.87, "learning_rate": 4.12903534067466e-05, "loss": 0.3097, "step": 155000 }, { "epoch": 0.87, "learning_rate": 4.128473217648415e-05, "loss": 0.3091, "step": 155100 }, { "epoch": 0.87, "learning_rate": 4.1279110946221695e-05, "loss": 0.3091, "step": 155200 }, { "epoch": 0.87, "learning_rate": 4.1273489715959234e-05, "loss": 0.3102, "step": 155300 }, { "epoch": 0.87, "learning_rate": 4.126786848569678e-05, "loss": 0.3094, "step": 155400 }, { "epoch": 0.87, "learning_rate": 4.1262247255434327e-05, "loss": 0.3121, "step": 155500 }, { "epoch": 0.87, "learning_rate": 4.1256626025171866e-05, "loss": 0.3135, "step": 155600 }, { "epoch": 0.88, "learning_rate": 4.125100479490941e-05, "loss": 0.3175, "step": 155700 }, { "epoch": 0.88, "learning_rate": 4.124538356464696e-05, "loss": 0.3198, "step": 155800 }, { "epoch": 0.88, "learning_rate": 4.1239762334384505e-05, "loss": 0.3069, "step": 155900 }, { "epoch": 0.88, "learning_rate": 4.123414110412205e-05, "loss": 0.3121, "step": 156000 }, { "epoch": 0.88, "learning_rate": 4.12285198738596e-05, "loss": 0.3082, "step": 156100 }, { "epoch": 0.88, "learning_rate": 4.1222954855899765e-05, "loss": 0.3108, "step": 156200 }, { "epoch": 0.88, "learning_rate": 4.121733362563731e-05, "loss": 0.3136, "step": 156300 }, { "epoch": 0.88, "learning_rate": 4.121171239537485e-05, "loss": 0.3043, "step": 156400 }, { "epoch": 0.88, "learning_rate": 4.12060911651124e-05, "loss": 0.3053, "step": 156500 }, { "epoch": 0.88, "learning_rate": 4.1200469934849943e-05, "loss": 0.3173, "step": 156600 }, { "epoch": 0.88, "learning_rate": 4.119484870458748e-05, "loss": 0.3136, "step": 156700 }, { "epoch": 0.88, "learning_rate": 4.118922747432503e-05, "loss": 0.3135, "step": 156800 }, { "epoch": 0.88, "learning_rate": 4.1183606244062575e-05, "loss": 0.3194, "step": 156900 }, { "epoch": 0.88, "learning_rate": 4.117798501380012e-05, "loss": 0.3088, "step": 157000 }, { "epoch": 0.88, "learning_rate": 4.117236378353767e-05, "loss": 0.312, "step": 157100 }, { "epoch": 0.88, "learning_rate": 4.1166742553275214e-05, "loss": 0.3082, "step": 157200 }, { "epoch": 0.88, "learning_rate": 4.116112132301276e-05, "loss": 0.3051, "step": 157300 }, { "epoch": 0.88, "learning_rate": 4.11555000927503e-05, "loss": 0.3092, "step": 157400 }, { "epoch": 0.89, "learning_rate": 4.1149878862487846e-05, "loss": 0.3192, "step": 157500 }, { "epoch": 0.89, "learning_rate": 4.114425763222539e-05, "loss": 0.3201, "step": 157600 }, { "epoch": 0.89, "learning_rate": 4.113863640196294e-05, "loss": 0.3141, "step": 157700 }, { "epoch": 0.89, "learning_rate": 4.113301517170048e-05, "loss": 0.3059, "step": 157800 }, { "epoch": 0.89, "learning_rate": 4.1127393941438025e-05, "loss": 0.3091, "step": 157900 }, { "epoch": 0.89, "learning_rate": 4.112177271117557e-05, "loss": 0.313, "step": 158000 }, { "epoch": 0.89, "learning_rate": 4.111615148091311e-05, "loss": 0.3086, "step": 158100 }, { "epoch": 0.89, "learning_rate": 4.111053025065066e-05, "loss": 0.313, "step": 158200 }, { "epoch": 0.89, "learning_rate": 4.11049090203882e-05, "loss": 0.3101, "step": 158300 }, { "epoch": 0.89, "learning_rate": 4.109928779012575e-05, "loss": 0.313, "step": 158400 }, { "epoch": 0.89, "learning_rate": 4.109372277216592e-05, "loss": 0.3153, "step": 158500 }, { "epoch": 0.89, "learning_rate": 4.108810154190346e-05, "loss": 0.3104, "step": 158600 }, { "epoch": 0.89, "learning_rate": 4.108248031164101e-05, "loss": 0.3086, "step": 158700 }, { "epoch": 0.89, "learning_rate": 4.1076859081378556e-05, "loss": 0.319, "step": 158800 }, { "epoch": 0.89, "learning_rate": 4.1071237851116095e-05, "loss": 0.3078, "step": 158900 }, { "epoch": 0.89, "learning_rate": 4.106561662085364e-05, "loss": 0.3147, "step": 159000 }, { "epoch": 0.89, "learning_rate": 4.105999539059119e-05, "loss": 0.3103, "step": 159100 }, { "epoch": 0.89, "learning_rate": 4.105437416032873e-05, "loss": 0.31, "step": 159200 }, { "epoch": 0.9, "learning_rate": 4.1048752930066274e-05, "loss": 0.3041, "step": 159300 }, { "epoch": 0.9, "learning_rate": 4.104313169980382e-05, "loss": 0.3064, "step": 159400 }, { "epoch": 0.9, "learning_rate": 4.1037510469541366e-05, "loss": 0.3106, "step": 159500 }, { "epoch": 0.9, "learning_rate": 4.1031889239278906e-05, "loss": 0.3125, "step": 159600 }, { "epoch": 0.9, "learning_rate": 4.102626800901645e-05, "loss": 0.3072, "step": 159700 }, { "epoch": 0.9, "learning_rate": 4.1020646778754005e-05, "loss": 0.312, "step": 159800 }, { "epoch": 0.9, "learning_rate": 4.1015025548491544e-05, "loss": 0.3226, "step": 159900 }, { "epoch": 0.9, "learning_rate": 4.100940431822909e-05, "loss": 0.311, "step": 160000 }, { "epoch": 0.9, "learning_rate": 4.100378308796664e-05, "loss": 0.3071, "step": 160100 }, { "epoch": 0.9, "learning_rate": 4.099816185770418e-05, "loss": 0.3092, "step": 160200 }, { "epoch": 0.9, "learning_rate": 4.099254062744172e-05, "loss": 0.3109, "step": 160300 }, { "epoch": 0.9, "learning_rate": 4.098691939717927e-05, "loss": 0.3072, "step": 160400 }, { "epoch": 0.9, "learning_rate": 4.098135437921944e-05, "loss": 0.2962, "step": 160500 }, { "epoch": 0.9, "learning_rate": 4.097573314895698e-05, "loss": 0.2932, "step": 160600 }, { "epoch": 0.9, "learning_rate": 4.097011191869453e-05, "loss": 0.3108, "step": 160700 }, { "epoch": 0.9, "learning_rate": 4.0964490688432076e-05, "loss": 0.297, "step": 160800 }, { "epoch": 0.9, "learning_rate": 4.095886945816962e-05, "loss": 0.3126, "step": 160900 }, { "epoch": 0.91, "learning_rate": 4.095324822790716e-05, "loss": 0.3036, "step": 161000 }, { "epoch": 0.91, "learning_rate": 4.094762699764471e-05, "loss": 0.3062, "step": 161100 }, { "epoch": 0.91, "learning_rate": 4.0942005767382254e-05, "loss": 0.3134, "step": 161200 }, { "epoch": 0.91, "learning_rate": 4.09363845371198e-05, "loss": 0.3054, "step": 161300 }, { "epoch": 0.91, "learning_rate": 4.093076330685734e-05, "loss": 0.303, "step": 161400 }, { "epoch": 0.91, "learning_rate": 4.0925142076594886e-05, "loss": 0.3109, "step": 161500 }, { "epoch": 0.91, "learning_rate": 4.091952084633243e-05, "loss": 0.3111, "step": 161600 }, { "epoch": 0.91, "learning_rate": 4.091389961606997e-05, "loss": 0.3098, "step": 161700 }, { "epoch": 0.91, "learning_rate": 4.090827838580752e-05, "loss": 0.3082, "step": 161800 }, { "epoch": 0.91, "learning_rate": 4.0902657155545064e-05, "loss": 0.3076, "step": 161900 }, { "epoch": 0.91, "learning_rate": 4.089703592528261e-05, "loss": 0.309, "step": 162000 }, { "epoch": 0.91, "learning_rate": 4.089141469502015e-05, "loss": 0.3028, "step": 162100 }, { "epoch": 0.91, "learning_rate": 4.0885793464757696e-05, "loss": 0.3135, "step": 162200 }, { "epoch": 0.91, "learning_rate": 4.088022844679787e-05, "loss": 0.3023, "step": 162300 }, { "epoch": 0.91, "learning_rate": 4.087460721653542e-05, "loss": 0.3119, "step": 162400 }, { "epoch": 0.91, "learning_rate": 4.0868985986272957e-05, "loss": 0.3161, "step": 162500 }, { "epoch": 0.91, "learning_rate": 4.08633647560105e-05, "loss": 0.3108, "step": 162600 }, { "epoch": 0.91, "learning_rate": 4.085774352574805e-05, "loss": 0.3113, "step": 162700 }, { "epoch": 0.92, "learning_rate": 4.085212229548559e-05, "loss": 0.309, "step": 162800 }, { "epoch": 0.92, "learning_rate": 4.0846501065223135e-05, "loss": 0.307, "step": 162900 }, { "epoch": 0.92, "learning_rate": 4.084087983496068e-05, "loss": 0.3095, "step": 163000 }, { "epoch": 0.92, "learning_rate": 4.083525860469823e-05, "loss": 0.3005, "step": 163100 }, { "epoch": 0.92, "learning_rate": 4.082963737443577e-05, "loss": 0.3105, "step": 163200 }, { "epoch": 0.92, "learning_rate": 4.082401614417331e-05, "loss": 0.3021, "step": 163300 }, { "epoch": 0.92, "learning_rate": 4.081839491391086e-05, "loss": 0.2954, "step": 163400 }, { "epoch": 0.92, "learning_rate": 4.0812773683648406e-05, "loss": 0.301, "step": 163500 }, { "epoch": 0.92, "learning_rate": 4.080715245338595e-05, "loss": 0.3, "step": 163600 }, { "epoch": 0.92, "learning_rate": 4.08015312231235e-05, "loss": 0.3089, "step": 163700 }, { "epoch": 0.92, "learning_rate": 4.0795909992861045e-05, "loss": 0.3157, "step": 163800 }, { "epoch": 0.92, "learning_rate": 4.0790288762598584e-05, "loss": 0.3038, "step": 163900 }, { "epoch": 0.92, "learning_rate": 4.078466753233613e-05, "loss": 0.3129, "step": 164000 }, { "epoch": 0.92, "learning_rate": 4.077904630207368e-05, "loss": 0.3095, "step": 164100 }, { "epoch": 0.92, "learning_rate": 4.0773425071811216e-05, "loss": 0.3007, "step": 164200 }, { "epoch": 0.92, "learning_rate": 4.076780384154876e-05, "loss": 0.2909, "step": 164300 }, { "epoch": 0.92, "learning_rate": 4.076218261128631e-05, "loss": 0.3108, "step": 164400 }, { "epoch": 0.92, "learning_rate": 4.0756561381023855e-05, "loss": 0.3043, "step": 164500 }, { "epoch": 0.93, "learning_rate": 4.0750940150761394e-05, "loss": 0.3085, "step": 164600 }, { "epoch": 0.93, "learning_rate": 4.074531892049894e-05, "loss": 0.3072, "step": 164700 }, { "epoch": 0.93, "learning_rate": 4.073969769023649e-05, "loss": 0.3033, "step": 164800 }, { "epoch": 0.93, "learning_rate": 4.0734076459974027e-05, "loss": 0.3106, "step": 164900 }, { "epoch": 0.93, "learning_rate": 4.072845522971158e-05, "loss": 0.3089, "step": 165000 }, { "epoch": 0.93, "learning_rate": 4.0722833999449126e-05, "loss": 0.3056, "step": 165100 }, { "epoch": 0.93, "learning_rate": 4.0717212769186665e-05, "loss": 0.3034, "step": 165200 }, { "epoch": 0.93, "learning_rate": 4.071159153892421e-05, "loss": 0.2933, "step": 165300 }, { "epoch": 0.93, "learning_rate": 4.070597030866176e-05, "loss": 0.302, "step": 165400 }, { "epoch": 0.93, "learning_rate": 4.0700349078399304e-05, "loss": 0.3056, "step": 165500 }, { "epoch": 0.93, "learning_rate": 4.0694727848136844e-05, "loss": 0.307, "step": 165600 }, { "epoch": 0.93, "learning_rate": 4.068910661787439e-05, "loss": 0.3023, "step": 165700 }, { "epoch": 0.93, "learning_rate": 4.0683485387611936e-05, "loss": 0.3057, "step": 165800 }, { "epoch": 0.93, "learning_rate": 4.0677864157349476e-05, "loss": 0.2957, "step": 165900 }, { "epoch": 0.93, "learning_rate": 4.067229913938965e-05, "loss": 0.303, "step": 166000 }, { "epoch": 0.93, "learning_rate": 4.0666677909127196e-05, "loss": 0.3088, "step": 166100 }, { "epoch": 0.93, "learning_rate": 4.066105667886474e-05, "loss": 0.3007, "step": 166200 }, { "epoch": 0.93, "learning_rate": 4.065543544860228e-05, "loss": 0.3011, "step": 166300 }, { "epoch": 0.94, "learning_rate": 4.064981421833983e-05, "loss": 0.3102, "step": 166400 }, { "epoch": 0.94, "learning_rate": 4.0644192988077375e-05, "loss": 0.302, "step": 166500 }, { "epoch": 0.94, "learning_rate": 4.063857175781492e-05, "loss": 0.2988, "step": 166600 }, { "epoch": 0.94, "learning_rate": 4.063295052755246e-05, "loss": 0.3044, "step": 166700 }, { "epoch": 0.94, "learning_rate": 4.062732929729001e-05, "loss": 0.3064, "step": 166800 }, { "epoch": 0.94, "learning_rate": 4.062170806702755e-05, "loss": 0.3075, "step": 166900 }, { "epoch": 0.94, "learning_rate": 4.061608683676509e-05, "loss": 0.3013, "step": 167000 }, { "epoch": 0.94, "learning_rate": 4.061046560650264e-05, "loss": 0.3094, "step": 167100 }, { "epoch": 0.94, "learning_rate": 4.0604844376240185e-05, "loss": 0.3011, "step": 167200 }, { "epoch": 0.94, "learning_rate": 4.059922314597773e-05, "loss": 0.3003, "step": 167300 }, { "epoch": 0.94, "learning_rate": 4.059360191571527e-05, "loss": 0.3008, "step": 167400 }, { "epoch": 0.94, "learning_rate": 4.058798068545282e-05, "loss": 0.3021, "step": 167500 }, { "epoch": 0.94, "learning_rate": 4.0582359455190363e-05, "loss": 0.306, "step": 167600 }, { "epoch": 0.94, "learning_rate": 4.057673822492791e-05, "loss": 0.3079, "step": 167700 }, { "epoch": 0.94, "learning_rate": 4.0571116994665456e-05, "loss": 0.3028, "step": 167800 }, { "epoch": 0.94, "learning_rate": 4.0565495764403e-05, "loss": 0.302, "step": 167900 }, { "epoch": 0.94, "learning_rate": 4.055987453414055e-05, "loss": 0.3083, "step": 168000 }, { "epoch": 0.94, "learning_rate": 4.055425330387809e-05, "loss": 0.3092, "step": 168100 }, { "epoch": 0.95, "learning_rate": 4.0548632073615634e-05, "loss": 0.3064, "step": 168200 }, { "epoch": 0.95, "learning_rate": 4.054301084335318e-05, "loss": 0.2982, "step": 168300 }, { "epoch": 0.95, "learning_rate": 4.053738961309072e-05, "loss": 0.3057, "step": 168400 }, { "epoch": 0.95, "learning_rate": 4.0531768382828266e-05, "loss": 0.3014, "step": 168500 }, { "epoch": 0.95, "learning_rate": 4.052614715256581e-05, "loss": 0.2918, "step": 168600 }, { "epoch": 0.95, "learning_rate": 4.052052592230336e-05, "loss": 0.2942, "step": 168700 }, { "epoch": 0.95, "learning_rate": 4.05149046920409e-05, "loss": 0.3079, "step": 168800 }, { "epoch": 0.95, "learning_rate": 4.0509283461778445e-05, "loss": 0.2968, "step": 168900 }, { "epoch": 0.95, "learning_rate": 4.050366223151599e-05, "loss": 0.3043, "step": 169000 }, { "epoch": 0.95, "learning_rate": 4.049804100125353e-05, "loss": 0.3036, "step": 169100 }, { "epoch": 0.95, "learning_rate": 4.0492419770991084e-05, "loss": 0.2986, "step": 169200 }, { "epoch": 0.95, "learning_rate": 4.048679854072863e-05, "loss": 0.2994, "step": 169300 }, { "epoch": 0.95, "learning_rate": 4.0481177310466176e-05, "loss": 0.3039, "step": 169400 }, { "epoch": 0.95, "learning_rate": 4.0475556080203716e-05, "loss": 0.3091, "step": 169500 }, { "epoch": 0.95, "learning_rate": 4.046993484994126e-05, "loss": 0.3051, "step": 169600 }, { "epoch": 0.95, "learning_rate": 4.046431361967881e-05, "loss": 0.3023, "step": 169700 }, { "epoch": 0.95, "learning_rate": 4.045869238941635e-05, "loss": 0.2971, "step": 169800 }, { "epoch": 0.96, "learning_rate": 4.0453071159153894e-05, "loss": 0.3035, "step": 169900 }, { "epoch": 0.96, "learning_rate": 4.044744992889144e-05, "loss": 0.3042, "step": 170000 }, { "epoch": 0.96, "learning_rate": 4.0441828698628987e-05, "loss": 0.304, "step": 170100 }, { "epoch": 0.96, "learning_rate": 4.0436207468366526e-05, "loss": 0.3095, "step": 170200 }, { "epoch": 0.96, "learning_rate": 4.04306424504067e-05, "loss": 0.3093, "step": 170300 }, { "epoch": 0.96, "learning_rate": 4.042502122014425e-05, "loss": 0.3008, "step": 170400 }, { "epoch": 0.96, "learning_rate": 4.041939998988179e-05, "loss": 0.2969, "step": 170500 }, { "epoch": 0.96, "learning_rate": 4.041377875961933e-05, "loss": 0.2978, "step": 170600 }, { "epoch": 0.96, "learning_rate": 4.040815752935688e-05, "loss": 0.3068, "step": 170700 }, { "epoch": 0.96, "learning_rate": 4.0402536299094425e-05, "loss": 0.3022, "step": 170800 }, { "epoch": 0.96, "learning_rate": 4.0396915068831965e-05, "loss": 0.3008, "step": 170900 }, { "epoch": 0.96, "learning_rate": 4.039129383856951e-05, "loss": 0.3055, "step": 171000 }, { "epoch": 0.96, "learning_rate": 4.038567260830706e-05, "loss": 0.3112, "step": 171100 }, { "epoch": 0.96, "learning_rate": 4.03800513780446e-05, "loss": 0.2976, "step": 171200 }, { "epoch": 0.96, "learning_rate": 4.037443014778214e-05, "loss": 0.2935, "step": 171300 }, { "epoch": 0.96, "learning_rate": 4.036880891751969e-05, "loss": 0.2915, "step": 171400 }, { "epoch": 0.96, "learning_rate": 4.0363187687257235e-05, "loss": 0.2984, "step": 171500 }, { "epoch": 0.96, "learning_rate": 4.0357566456994775e-05, "loss": 0.305, "step": 171600 }, { "epoch": 0.97, "learning_rate": 4.035194522673232e-05, "loss": 0.3036, "step": 171700 }, { "epoch": 0.97, "learning_rate": 4.034632399646987e-05, "loss": 0.3045, "step": 171800 }, { "epoch": 0.97, "learning_rate": 4.0340702766207414e-05, "loss": 0.3038, "step": 171900 }, { "epoch": 0.97, "learning_rate": 4.033508153594496e-05, "loss": 0.2917, "step": 172000 }, { "epoch": 0.97, "learning_rate": 4.0329460305682506e-05, "loss": 0.2975, "step": 172100 }, { "epoch": 0.97, "learning_rate": 4.032383907542005e-05, "loss": 0.3013, "step": 172200 }, { "epoch": 0.97, "learning_rate": 4.031821784515759e-05, "loss": 0.2962, "step": 172300 }, { "epoch": 0.97, "learning_rate": 4.031265282719776e-05, "loss": 0.2981, "step": 172400 }, { "epoch": 0.97, "learning_rate": 4.0307031596935306e-05, "loss": 0.3077, "step": 172500 }, { "epoch": 0.97, "learning_rate": 4.030146657897548e-05, "loss": 0.3012, "step": 172600 }, { "epoch": 0.97, "learning_rate": 4.029590156101565e-05, "loss": 0.3029, "step": 172700 }, { "epoch": 0.97, "learning_rate": 4.029028033075319e-05, "loss": 0.2985, "step": 172800 }, { "epoch": 0.97, "learning_rate": 4.0284659100490734e-05, "loss": 0.3039, "step": 172900 }, { "epoch": 0.97, "learning_rate": 4.027903787022828e-05, "loss": 0.3025, "step": 173000 }, { "epoch": 0.97, "learning_rate": 4.0273416639965826e-05, "loss": 0.3046, "step": 173100 }, { "epoch": 0.97, "learning_rate": 4.0267795409703366e-05, "loss": 0.2997, "step": 173200 }, { "epoch": 0.97, "learning_rate": 4.026217417944091e-05, "loss": 0.3003, "step": 173300 }, { "epoch": 0.97, "learning_rate": 4.025655294917846e-05, "loss": 0.2917, "step": 173400 }, { "epoch": 0.98, "learning_rate": 4.0250931718916005e-05, "loss": 0.3015, "step": 173500 }, { "epoch": 0.98, "learning_rate": 4.024531048865355e-05, "loss": 0.2909, "step": 173600 }, { "epoch": 0.98, "learning_rate": 4.02396892583911e-05, "loss": 0.2998, "step": 173700 }, { "epoch": 0.98, "learning_rate": 4.0234068028128644e-05, "loss": 0.307, "step": 173800 }, { "epoch": 0.98, "learning_rate": 4.022844679786618e-05, "loss": 0.3, "step": 173900 }, { "epoch": 0.98, "learning_rate": 4.022282556760373e-05, "loss": 0.3041, "step": 174000 }, { "epoch": 0.98, "learning_rate": 4.0217204337341276e-05, "loss": 0.3053, "step": 174100 }, { "epoch": 0.98, "learning_rate": 4.0211583107078815e-05, "loss": 0.2979, "step": 174200 }, { "epoch": 0.98, "learning_rate": 4.020596187681636e-05, "loss": 0.2983, "step": 174300 }, { "epoch": 0.98, "learning_rate": 4.020034064655391e-05, "loss": 0.2972, "step": 174400 }, { "epoch": 0.98, "learning_rate": 4.0194719416291454e-05, "loss": 0.3068, "step": 174500 }, { "epoch": 0.98, "learning_rate": 4.0189098186028993e-05, "loss": 0.3, "step": 174600 }, { "epoch": 0.98, "learning_rate": 4.018347695576654e-05, "loss": 0.3026, "step": 174700 }, { "epoch": 0.98, "learning_rate": 4.0177855725504086e-05, "loss": 0.3062, "step": 174800 }, { "epoch": 0.98, "learning_rate": 4.0172234495241625e-05, "loss": 0.2984, "step": 174900 }, { "epoch": 0.98, "learning_rate": 4.016661326497917e-05, "loss": 0.2965, "step": 175000 }, { "epoch": 0.98, "learning_rate": 4.016099203471672e-05, "loss": 0.305, "step": 175100 }, { "epoch": 0.98, "learning_rate": 4.0155370804454264e-05, "loss": 0.3041, "step": 175200 }, { "epoch": 0.99, "learning_rate": 4.014974957419181e-05, "loss": 0.2992, "step": 175300 }, { "epoch": 0.99, "learning_rate": 4.014412834392936e-05, "loss": 0.2906, "step": 175400 }, { "epoch": 0.99, "learning_rate": 4.01385071136669e-05, "loss": 0.2925, "step": 175500 }, { "epoch": 0.99, "learning_rate": 4.013288588340444e-05, "loss": 0.2885, "step": 175600 }, { "epoch": 0.99, "learning_rate": 4.012726465314199e-05, "loss": 0.2931, "step": 175700 }, { "epoch": 0.99, "learning_rate": 4.0121643422879535e-05, "loss": 0.3027, "step": 175800 }, { "epoch": 0.99, "learning_rate": 4.0116022192617075e-05, "loss": 0.2992, "step": 175900 }, { "epoch": 0.99, "learning_rate": 4.011040096235462e-05, "loss": 0.3015, "step": 176000 }, { "epoch": 0.99, "learning_rate": 4.010477973209217e-05, "loss": 0.2976, "step": 176100 }, { "epoch": 0.99, "learning_rate": 4.0099158501829714e-05, "loss": 0.3109, "step": 176200 }, { "epoch": 0.99, "learning_rate": 4.009353727156725e-05, "loss": 0.3013, "step": 176300 }, { "epoch": 0.99, "learning_rate": 4.00879160413048e-05, "loss": 0.2935, "step": 176400 }, { "epoch": 0.99, "learning_rate": 4.0082294811042346e-05, "loss": 0.2984, "step": 176500 }, { "epoch": 0.99, "learning_rate": 4.007672979308252e-05, "loss": 0.3015, "step": 176600 }, { "epoch": 0.99, "learning_rate": 4.007110856282006e-05, "loss": 0.3105, "step": 176700 }, { "epoch": 0.99, "learning_rate": 4.0065487332557606e-05, "loss": 0.3098, "step": 176800 }, { "epoch": 0.99, "learning_rate": 4.005986610229515e-05, "loss": 0.2978, "step": 176900 }, { "epoch": 0.99, "learning_rate": 4.005424487203269e-05, "loss": 0.2867, "step": 177000 }, { "epoch": 1.0, "learning_rate": 4.004862364177024e-05, "loss": 0.2984, "step": 177100 }, { "epoch": 1.0, "learning_rate": 4.0043002411507784e-05, "loss": 0.2934, "step": 177200 }, { "epoch": 1.0, "learning_rate": 4.003738118124533e-05, "loss": 0.2944, "step": 177300 }, { "epoch": 1.0, "learning_rate": 4.003175995098287e-05, "loss": 0.2971, "step": 177400 }, { "epoch": 1.0, "learning_rate": 4.0026138720720416e-05, "loss": 0.2925, "step": 177500 }, { "epoch": 1.0, "learning_rate": 4.002051749045796e-05, "loss": 0.299, "step": 177600 }, { "epoch": 1.0, "learning_rate": 4.001489626019551e-05, "loss": 0.302, "step": 177700 }, { "epoch": 1.0, "learning_rate": 4.0009275029933055e-05, "loss": 0.2972, "step": 177800 }, { "epoch": 1.0, "eval_bleu": 73.3024, "eval_cer": 2.836, "eval_chrF": 94.14025518241344, "eval_gen_len": 16.705368, "eval_loss": 0.5591091513633728, "eval_runtime": 7040.9934, "eval_samples_per_second": 35.506, "eval_steps_per_second": 0.555, "eval_wer": 14.9407, "step": 177897 }, { "epoch": 1.0, "learning_rate": 4.00036537996706e-05, "loss": 0.3027, "step": 177900 }, { "epoch": 1.0, "learning_rate": 3.999803256940815e-05, "loss": 0.2924, "step": 178000 }, { "epoch": 1.0, "learning_rate": 3.999241133914569e-05, "loss": 0.2863, "step": 178100 }, { "epoch": 1.0, "learning_rate": 3.998679010888323e-05, "loss": 0.2799, "step": 178200 }, { "epoch": 1.0, "learning_rate": 3.998116887862078e-05, "loss": 0.2742, "step": 178300 }, { "epoch": 1.0, "learning_rate": 3.997554764835832e-05, "loss": 0.2857, "step": 178400 }, { "epoch": 1.0, "learning_rate": 3.9969926418095865e-05, "loss": 0.2903, "step": 178500 }, { "epoch": 1.0, "learning_rate": 3.996430518783341e-05, "loss": 0.2842, "step": 178600 }, { "epoch": 1.0, "learning_rate": 3.995868395757096e-05, "loss": 0.2902, "step": 178700 }, { "epoch": 1.01, "learning_rate": 3.99530627273085e-05, "loss": 0.284, "step": 178800 }, { "epoch": 1.01, "learning_rate": 3.9947441497046044e-05, "loss": 0.2848, "step": 178900 }, { "epoch": 1.01, "learning_rate": 3.994182026678359e-05, "loss": 0.2899, "step": 179000 }, { "epoch": 1.01, "learning_rate": 3.993619903652113e-05, "loss": 0.285, "step": 179100 }, { "epoch": 1.01, "learning_rate": 3.9930577806258676e-05, "loss": 0.2824, "step": 179200 }, { "epoch": 1.01, "learning_rate": 3.992495657599622e-05, "loss": 0.2885, "step": 179300 }, { "epoch": 1.01, "learning_rate": 3.991933534573377e-05, "loss": 0.2879, "step": 179400 }, { "epoch": 1.01, "learning_rate": 3.9913714115471315e-05, "loss": 0.29, "step": 179500 }, { "epoch": 1.01, "learning_rate": 3.990809288520886e-05, "loss": 0.2791, "step": 179600 }, { "epoch": 1.01, "learning_rate": 3.990247165494641e-05, "loss": 0.2746, "step": 179700 }, { "epoch": 1.01, "learning_rate": 3.989685042468395e-05, "loss": 0.2774, "step": 179800 }, { "epoch": 1.01, "learning_rate": 3.989122919442149e-05, "loss": 0.2824, "step": 179900 }, { "epoch": 1.01, "learning_rate": 3.988560796415904e-05, "loss": 0.2811, "step": 180000 }, { "epoch": 1.01, "learning_rate": 3.9879986733896585e-05, "loss": 0.2759, "step": 180100 }, { "epoch": 1.01, "learning_rate": 3.9874365503634125e-05, "loss": 0.2748, "step": 180200 }, { "epoch": 1.01, "learning_rate": 3.986874427337167e-05, "loss": 0.2849, "step": 180300 }, { "epoch": 1.01, "learning_rate": 3.986312304310922e-05, "loss": 0.2896, "step": 180400 }, { "epoch": 1.01, "learning_rate": 3.985750181284676e-05, "loss": 0.282, "step": 180500 }, { "epoch": 1.02, "learning_rate": 3.98518805825843e-05, "loss": 0.2777, "step": 180600 }, { "epoch": 1.02, "learning_rate": 3.984625935232185e-05, "loss": 0.2771, "step": 180700 }, { "epoch": 1.02, "learning_rate": 3.9840638122059396e-05, "loss": 0.2809, "step": 180800 }, { "epoch": 1.02, "learning_rate": 3.9835073104099563e-05, "loss": 0.2821, "step": 180900 }, { "epoch": 1.02, "learning_rate": 3.982945187383711e-05, "loss": 0.2863, "step": 181000 }, { "epoch": 1.02, "learning_rate": 3.9823830643574656e-05, "loss": 0.2818, "step": 181100 }, { "epoch": 1.02, "learning_rate": 3.98182094133122e-05, "loss": 0.2838, "step": 181200 }, { "epoch": 1.02, "learning_rate": 3.981258818304974e-05, "loss": 0.285, "step": 181300 }, { "epoch": 1.02, "learning_rate": 3.980696695278729e-05, "loss": 0.2736, "step": 181400 }, { "epoch": 1.02, "learning_rate": 3.9801345722524834e-05, "loss": 0.2821, "step": 181500 }, { "epoch": 1.02, "learning_rate": 3.9795724492262374e-05, "loss": 0.2817, "step": 181600 }, { "epoch": 1.02, "learning_rate": 3.979010326199992e-05, "loss": 0.2905, "step": 181700 }, { "epoch": 1.02, "learning_rate": 3.9784482031737466e-05, "loss": 0.2865, "step": 181800 }, { "epoch": 1.02, "learning_rate": 3.977886080147501e-05, "loss": 0.2769, "step": 181900 }, { "epoch": 1.02, "learning_rate": 3.977323957121256e-05, "loss": 0.2862, "step": 182000 }, { "epoch": 1.02, "learning_rate": 3.9767618340950105e-05, "loss": 0.2845, "step": 182100 }, { "epoch": 1.02, "learning_rate": 3.976199711068765e-05, "loss": 0.2818, "step": 182200 }, { "epoch": 1.02, "learning_rate": 3.975637588042519e-05, "loss": 0.2781, "step": 182300 }, { "epoch": 1.03, "learning_rate": 3.975075465016274e-05, "loss": 0.2883, "step": 182400 }, { "epoch": 1.03, "learning_rate": 3.9745189632202905e-05, "loss": 0.2735, "step": 182500 }, { "epoch": 1.03, "learning_rate": 3.973956840194045e-05, "loss": 0.2864, "step": 182600 }, { "epoch": 1.03, "learning_rate": 3.973394717167799e-05, "loss": 0.2758, "step": 182700 }, { "epoch": 1.03, "learning_rate": 3.972832594141554e-05, "loss": 0.2924, "step": 182800 }, { "epoch": 1.03, "learning_rate": 3.972270471115308e-05, "loss": 0.2872, "step": 182900 }, { "epoch": 1.03, "learning_rate": 3.971708348089063e-05, "loss": 0.2796, "step": 183000 }, { "epoch": 1.03, "learning_rate": 3.9711462250628176e-05, "loss": 0.2797, "step": 183100 }, { "epoch": 1.03, "learning_rate": 3.9705897232668343e-05, "loss": 0.2843, "step": 183200 }, { "epoch": 1.03, "learning_rate": 3.970027600240589e-05, "loss": 0.2785, "step": 183300 }, { "epoch": 1.03, "learning_rate": 3.9694654772143436e-05, "loss": 0.2867, "step": 183400 }, { "epoch": 1.03, "learning_rate": 3.9689033541880976e-05, "loss": 0.2883, "step": 183500 }, { "epoch": 1.03, "learning_rate": 3.968341231161852e-05, "loss": 0.2757, "step": 183600 }, { "epoch": 1.03, "learning_rate": 3.967779108135607e-05, "loss": 0.2897, "step": 183700 }, { "epoch": 1.03, "learning_rate": 3.967216985109361e-05, "loss": 0.2756, "step": 183800 }, { "epoch": 1.03, "learning_rate": 3.9666548620831154e-05, "loss": 0.2814, "step": 183900 }, { "epoch": 1.03, "learning_rate": 3.96609273905687e-05, "loss": 0.2811, "step": 184000 }, { "epoch": 1.03, "learning_rate": 3.9655306160306246e-05, "loss": 0.2859, "step": 184100 }, { "epoch": 1.04, "learning_rate": 3.964968493004379e-05, "loss": 0.2804, "step": 184200 }, { "epoch": 1.04, "learning_rate": 3.964406369978134e-05, "loss": 0.285, "step": 184300 }, { "epoch": 1.04, "learning_rate": 3.9638442469518885e-05, "loss": 0.2808, "step": 184400 }, { "epoch": 1.04, "learning_rate": 3.9632821239256425e-05, "loss": 0.2852, "step": 184500 }, { "epoch": 1.04, "learning_rate": 3.962720000899397e-05, "loss": 0.2849, "step": 184600 }, { "epoch": 1.04, "learning_rate": 3.962157877873152e-05, "loss": 0.2803, "step": 184700 }, { "epoch": 1.04, "learning_rate": 3.9615957548469064e-05, "loss": 0.2839, "step": 184800 }, { "epoch": 1.04, "learning_rate": 3.96103363182066e-05, "loss": 0.2885, "step": 184900 }, { "epoch": 1.04, "learning_rate": 3.960471508794415e-05, "loss": 0.2711, "step": 185000 }, { "epoch": 1.04, "learning_rate": 3.9599093857681696e-05, "loss": 0.2886, "step": 185100 }, { "epoch": 1.04, "learning_rate": 3.9593472627419235e-05, "loss": 0.2865, "step": 185200 }, { "epoch": 1.04, "learning_rate": 3.958790760945941e-05, "loss": 0.2875, "step": 185300 }, { "epoch": 1.04, "learning_rate": 3.9582286379196956e-05, "loss": 0.2854, "step": 185400 }, { "epoch": 1.04, "learning_rate": 3.95766651489345e-05, "loss": 0.2813, "step": 185500 }, { "epoch": 1.04, "learning_rate": 3.957104391867204e-05, "loss": 0.2836, "step": 185600 }, { "epoch": 1.04, "learning_rate": 3.956542268840959e-05, "loss": 0.2871, "step": 185700 }, { "epoch": 1.04, "learning_rate": 3.9559801458147134e-05, "loss": 0.2872, "step": 185800 }, { "epoch": 1.04, "learning_rate": 3.9554180227884674e-05, "loss": 0.2797, "step": 185900 }, { "epoch": 1.05, "learning_rate": 3.954855899762222e-05, "loss": 0.2819, "step": 186000 }, { "epoch": 1.05, "learning_rate": 3.9542937767359766e-05, "loss": 0.2818, "step": 186100 }, { "epoch": 1.05, "learning_rate": 3.953731653709731e-05, "loss": 0.2821, "step": 186200 }, { "epoch": 1.05, "learning_rate": 3.953169530683485e-05, "loss": 0.2777, "step": 186300 }, { "epoch": 1.05, "learning_rate": 3.95260740765724e-05, "loss": 0.2802, "step": 186400 }, { "epoch": 1.05, "learning_rate": 3.9520452846309945e-05, "loss": 0.282, "step": 186500 }, { "epoch": 1.05, "learning_rate": 3.951483161604749e-05, "loss": 0.2844, "step": 186600 }, { "epoch": 1.05, "learning_rate": 3.950921038578504e-05, "loss": 0.2829, "step": 186700 }, { "epoch": 1.05, "learning_rate": 3.950358915552258e-05, "loss": 0.2778, "step": 186800 }, { "epoch": 1.05, "learning_rate": 3.949796792526013e-05, "loss": 0.2861, "step": 186900 }, { "epoch": 1.05, "learning_rate": 3.949234669499767e-05, "loss": 0.2835, "step": 187000 }, { "epoch": 1.05, "learning_rate": 3.9486725464735215e-05, "loss": 0.2827, "step": 187100 }, { "epoch": 1.05, "learning_rate": 3.948110423447276e-05, "loss": 0.2886, "step": 187200 }, { "epoch": 1.05, "learning_rate": 3.94754830042103e-05, "loss": 0.2764, "step": 187300 }, { "epoch": 1.05, "learning_rate": 3.946986177394785e-05, "loss": 0.2807, "step": 187400 }, { "epoch": 1.05, "learning_rate": 3.9464240543685394e-05, "loss": 0.2849, "step": 187500 }, { "epoch": 1.05, "learning_rate": 3.945861931342294e-05, "loss": 0.2846, "step": 187600 }, { "epoch": 1.06, "learning_rate": 3.945299808316048e-05, "loss": 0.2836, "step": 187700 }, { "epoch": 1.06, "learning_rate": 3.9447376852898026e-05, "loss": 0.2829, "step": 187800 }, { "epoch": 1.06, "learning_rate": 3.944175562263557e-05, "loss": 0.2847, "step": 187900 }, { "epoch": 1.06, "learning_rate": 3.943613439237311e-05, "loss": 0.2825, "step": 188000 }, { "epoch": 1.06, "learning_rate": 3.943051316211066e-05, "loss": 0.2894, "step": 188100 }, { "epoch": 1.06, "learning_rate": 3.9424891931848204e-05, "loss": 0.2843, "step": 188200 }, { "epoch": 1.06, "learning_rate": 3.941927070158575e-05, "loss": 0.2798, "step": 188300 }, { "epoch": 1.06, "learning_rate": 3.94136494713233e-05, "loss": 0.2863, "step": 188400 }, { "epoch": 1.06, "learning_rate": 3.940802824106084e-05, "loss": 0.2782, "step": 188500 }, { "epoch": 1.06, "learning_rate": 3.940240701079839e-05, "loss": 0.2852, "step": 188600 }, { "epoch": 1.06, "learning_rate": 3.939678578053593e-05, "loss": 0.2784, "step": 188700 }, { "epoch": 1.06, "learning_rate": 3.9391164550273475e-05, "loss": 0.2853, "step": 188800 }, { "epoch": 1.06, "learning_rate": 3.938554332001102e-05, "loss": 0.278, "step": 188900 }, { "epoch": 1.06, "learning_rate": 3.937992208974857e-05, "loss": 0.2874, "step": 189000 }, { "epoch": 1.06, "learning_rate": 3.937430085948611e-05, "loss": 0.2824, "step": 189100 }, { "epoch": 1.06, "learning_rate": 3.936867962922365e-05, "loss": 0.2816, "step": 189200 }, { "epoch": 1.06, "learning_rate": 3.93630583989612e-05, "loss": 0.2794, "step": 189300 }, { "epoch": 1.06, "learning_rate": 3.935743716869874e-05, "loss": 0.2841, "step": 189400 }, { "epoch": 1.07, "learning_rate": 3.9351815938436285e-05, "loss": 0.279, "step": 189500 }, { "epoch": 1.07, "learning_rate": 3.934619470817383e-05, "loss": 0.2812, "step": 189600 }, { "epoch": 1.07, "learning_rate": 3.934057347791138e-05, "loss": 0.273, "step": 189700 }, { "epoch": 1.07, "learning_rate": 3.933495224764892e-05, "loss": 0.2898, "step": 189800 }, { "epoch": 1.07, "learning_rate": 3.9329331017386464e-05, "loss": 0.2819, "step": 189900 }, { "epoch": 1.07, "learning_rate": 3.932370978712402e-05, "loss": 0.2775, "step": 190000 }, { "epoch": 1.07, "learning_rate": 3.9318088556861556e-05, "loss": 0.2866, "step": 190100 }, { "epoch": 1.07, "learning_rate": 3.93124673265991e-05, "loss": 0.2874, "step": 190200 }, { "epoch": 1.07, "learning_rate": 3.930684609633665e-05, "loss": 0.2833, "step": 190300 }, { "epoch": 1.07, "learning_rate": 3.9301224866074195e-05, "loss": 0.2823, "step": 190400 }, { "epoch": 1.07, "learning_rate": 3.9295603635811735e-05, "loss": 0.2851, "step": 190500 }, { "epoch": 1.07, "learning_rate": 3.928998240554928e-05, "loss": 0.2856, "step": 190600 }, { "epoch": 1.07, "learning_rate": 3.928436117528683e-05, "loss": 0.2784, "step": 190700 }, { "epoch": 1.07, "learning_rate": 3.927873994502437e-05, "loss": 0.2831, "step": 190800 }, { "epoch": 1.07, "learning_rate": 3.927311871476191e-05, "loss": 0.2805, "step": 190900 }, { "epoch": 1.07, "learning_rate": 3.926749748449946e-05, "loss": 0.2806, "step": 191000 }, { "epoch": 1.07, "learning_rate": 3.9261876254237005e-05, "loss": 0.2799, "step": 191100 }, { "epoch": 1.07, "learning_rate": 3.9256255023974545e-05, "loss": 0.282, "step": 191200 }, { "epoch": 1.08, "learning_rate": 3.925063379371209e-05, "loss": 0.2842, "step": 191300 }, { "epoch": 1.08, "learning_rate": 3.924501256344964e-05, "loss": 0.2793, "step": 191400 }, { "epoch": 1.08, "learning_rate": 3.9239391333187184e-05, "loss": 0.2864, "step": 191500 }, { "epoch": 1.08, "learning_rate": 3.923377010292473e-05, "loss": 0.2811, "step": 191600 }, { "epoch": 1.08, "learning_rate": 3.9228148872662276e-05, "loss": 0.2871, "step": 191700 }, { "epoch": 1.08, "learning_rate": 3.922252764239982e-05, "loss": 0.2762, "step": 191800 }, { "epoch": 1.08, "learning_rate": 3.921690641213736e-05, "loss": 0.2763, "step": 191900 }, { "epoch": 1.08, "learning_rate": 3.921128518187491e-05, "loss": 0.2758, "step": 192000 }, { "epoch": 1.08, "learning_rate": 3.9205663951612455e-05, "loss": 0.2815, "step": 192100 }, { "epoch": 1.08, "learning_rate": 3.9200042721349994e-05, "loss": 0.2806, "step": 192200 }, { "epoch": 1.08, "learning_rate": 3.919442149108754e-05, "loss": 0.2847, "step": 192300 }, { "epoch": 1.08, "learning_rate": 3.918880026082509e-05, "loss": 0.2811, "step": 192400 }, { "epoch": 1.08, "learning_rate": 3.918317903056263e-05, "loss": 0.2897, "step": 192500 }, { "epoch": 1.08, "learning_rate": 3.917755780030017e-05, "loss": 0.2782, "step": 192600 }, { "epoch": 1.08, "learning_rate": 3.917193657003772e-05, "loss": 0.2783, "step": 192700 }, { "epoch": 1.08, "learning_rate": 3.9166315339775265e-05, "loss": 0.2786, "step": 192800 }, { "epoch": 1.08, "learning_rate": 3.9160694109512805e-05, "loss": 0.2782, "step": 192900 }, { "epoch": 1.08, "learning_rate": 3.915507287925035e-05, "loss": 0.2796, "step": 193000 }, { "epoch": 1.09, "learning_rate": 3.91494516489879e-05, "loss": 0.2732, "step": 193100 }, { "epoch": 1.09, "learning_rate": 3.9143830418725443e-05, "loss": 0.2854, "step": 193200 }, { "epoch": 1.09, "learning_rate": 3.913820918846299e-05, "loss": 0.2881, "step": 193300 }, { "epoch": 1.09, "learning_rate": 3.9132587958200536e-05, "loss": 0.276, "step": 193400 }, { "epoch": 1.09, "learning_rate": 3.912696672793808e-05, "loss": 0.282, "step": 193500 }, { "epoch": 1.09, "learning_rate": 3.912140170997825e-05, "loss": 0.2811, "step": 193600 }, { "epoch": 1.09, "learning_rate": 3.911578047971579e-05, "loss": 0.2894, "step": 193700 }, { "epoch": 1.09, "learning_rate": 3.9110159249453336e-05, "loss": 0.2815, "step": 193800 }, { "epoch": 1.09, "learning_rate": 3.910453801919088e-05, "loss": 0.2757, "step": 193900 }, { "epoch": 1.09, "learning_rate": 3.909891678892842e-05, "loss": 0.2751, "step": 194000 }, { "epoch": 1.09, "learning_rate": 3.909329555866597e-05, "loss": 0.2815, "step": 194100 }, { "epoch": 1.09, "learning_rate": 3.908767432840352e-05, "loss": 0.2799, "step": 194200 }, { "epoch": 1.09, "learning_rate": 3.908205309814107e-05, "loss": 0.2748, "step": 194300 }, { "epoch": 1.09, "learning_rate": 3.9076431867878607e-05, "loss": 0.2768, "step": 194400 }, { "epoch": 1.09, "learning_rate": 3.907081063761615e-05, "loss": 0.2816, "step": 194500 }, { "epoch": 1.09, "learning_rate": 3.90651894073537e-05, "loss": 0.2813, "step": 194600 }, { "epoch": 1.09, "learning_rate": 3.905962438939387e-05, "loss": 0.2795, "step": 194700 }, { "epoch": 1.1, "learning_rate": 3.9054003159131406e-05, "loss": 0.2765, "step": 194800 }, { "epoch": 1.1, "learning_rate": 3.904838192886895e-05, "loss": 0.2779, "step": 194900 }, { "epoch": 1.1, "learning_rate": 3.90427606986065e-05, "loss": 0.2779, "step": 195000 }, { "epoch": 1.1, "learning_rate": 3.903713946834404e-05, "loss": 0.2783, "step": 195100 }, { "epoch": 1.1, "learning_rate": 3.903151823808159e-05, "loss": 0.2718, "step": 195200 }, { "epoch": 1.1, "learning_rate": 3.902589700781914e-05, "loss": 0.2881, "step": 195300 }, { "epoch": 1.1, "learning_rate": 3.902027577755668e-05, "loss": 0.2825, "step": 195400 }, { "epoch": 1.1, "learning_rate": 3.9014654547294223e-05, "loss": 0.278, "step": 195500 }, { "epoch": 1.1, "learning_rate": 3.900903331703177e-05, "loss": 0.2716, "step": 195600 }, { "epoch": 1.1, "learning_rate": 3.9003412086769316e-05, "loss": 0.276, "step": 195700 }, { "epoch": 1.1, "learning_rate": 3.8997790856506855e-05, "loss": 0.2759, "step": 195800 }, { "epoch": 1.1, "learning_rate": 3.89921696262444e-05, "loss": 0.2912, "step": 195900 }, { "epoch": 1.1, "learning_rate": 3.898654839598195e-05, "loss": 0.2797, "step": 196000 }, { "epoch": 1.1, "learning_rate": 3.898092716571949e-05, "loss": 0.2723, "step": 196100 }, { "epoch": 1.1, "learning_rate": 3.8975305935457034e-05, "loss": 0.2759, "step": 196200 }, { "epoch": 1.1, "learning_rate": 3.896968470519458e-05, "loss": 0.2746, "step": 196300 }, { "epoch": 1.1, "learning_rate": 3.8964063474932126e-05, "loss": 0.2796, "step": 196400 }, { "epoch": 1.1, "learning_rate": 3.8958442244669666e-05, "loss": 0.2752, "step": 196500 }, { "epoch": 1.11, "learning_rate": 3.895282101440721e-05, "loss": 0.278, "step": 196600 }, { "epoch": 1.11, "learning_rate": 3.894719978414476e-05, "loss": 0.274, "step": 196700 }, { "epoch": 1.11, "learning_rate": 3.8941578553882305e-05, "loss": 0.2767, "step": 196800 }, { "epoch": 1.11, "learning_rate": 3.893595732361985e-05, "loss": 0.2791, "step": 196900 }, { "epoch": 1.11, "learning_rate": 3.89303360933574e-05, "loss": 0.2781, "step": 197000 }, { "epoch": 1.11, "learning_rate": 3.8924714863094944e-05, "loss": 0.2882, "step": 197100 }, { "epoch": 1.11, "learning_rate": 3.891909363283248e-05, "loss": 0.2808, "step": 197200 }, { "epoch": 1.11, "learning_rate": 3.891347240257003e-05, "loss": 0.2745, "step": 197300 }, { "epoch": 1.11, "learning_rate": 3.8907851172307576e-05, "loss": 0.2792, "step": 197400 }, { "epoch": 1.11, "learning_rate": 3.8902229942045115e-05, "loss": 0.2796, "step": 197500 }, { "epoch": 1.11, "learning_rate": 3.889660871178266e-05, "loss": 0.2686, "step": 197600 }, { "epoch": 1.11, "learning_rate": 3.889098748152021e-05, "loss": 0.2774, "step": 197700 }, { "epoch": 1.11, "learning_rate": 3.8885366251257754e-05, "loss": 0.2822, "step": 197800 }, { "epoch": 1.11, "learning_rate": 3.8879745020995293e-05, "loss": 0.2792, "step": 197900 }, { "epoch": 1.11, "learning_rate": 3.887412379073284e-05, "loss": 0.2729, "step": 198000 }, { "epoch": 1.11, "learning_rate": 3.8868502560470386e-05, "loss": 0.2798, "step": 198100 }, { "epoch": 1.11, "learning_rate": 3.8862881330207925e-05, "loss": 0.2804, "step": 198200 }, { "epoch": 1.11, "learning_rate": 3.885726009994547e-05, "loss": 0.2771, "step": 198300 }, { "epoch": 1.12, "learning_rate": 3.885163886968302e-05, "loss": 0.2778, "step": 198400 }, { "epoch": 1.12, "learning_rate": 3.884601763942057e-05, "loss": 0.282, "step": 198500 }, { "epoch": 1.12, "learning_rate": 3.884039640915811e-05, "loss": 0.2775, "step": 198600 }, { "epoch": 1.12, "learning_rate": 3.883477517889566e-05, "loss": 0.2767, "step": 198700 }, { "epoch": 1.12, "learning_rate": 3.88291539486332e-05, "loss": 0.2742, "step": 198800 }, { "epoch": 1.12, "learning_rate": 3.882353271837074e-05, "loss": 0.2802, "step": 198900 }, { "epoch": 1.12, "learning_rate": 3.881791148810829e-05, "loss": 0.2818, "step": 199000 }, { "epoch": 1.12, "learning_rate": 3.8812290257845835e-05, "loss": 0.2772, "step": 199100 }, { "epoch": 1.12, "learning_rate": 3.8806725239886e-05, "loss": 0.2739, "step": 199200 }, { "epoch": 1.12, "learning_rate": 3.880110400962354e-05, "loss": 0.2774, "step": 199300 }, { "epoch": 1.12, "learning_rate": 3.8795482779361095e-05, "loss": 0.2772, "step": 199400 }, { "epoch": 1.12, "learning_rate": 3.878986154909864e-05, "loss": 0.2802, "step": 199500 }, { "epoch": 1.12, "learning_rate": 3.878424031883619e-05, "loss": 0.2746, "step": 199600 }, { "epoch": 1.12, "learning_rate": 3.877861908857373e-05, "loss": 0.2773, "step": 199700 }, { "epoch": 1.12, "learning_rate": 3.8772997858311274e-05, "loss": 0.2817, "step": 199800 }, { "epoch": 1.12, "learning_rate": 3.876737662804882e-05, "loss": 0.2814, "step": 199900 }, { "epoch": 1.12, "learning_rate": 3.876175539778636e-05, "loss": 0.2741, "step": 200000 }, { "epoch": 1.12, "learning_rate": 3.8756134167523906e-05, "loss": 0.2765, "step": 200100 }, { "epoch": 1.13, "learning_rate": 3.875051293726145e-05, "loss": 0.2738, "step": 200200 }, { "epoch": 1.13, "learning_rate": 3.8744891706999e-05, "loss": 0.2794, "step": 200300 }, { "epoch": 1.13, "learning_rate": 3.873927047673654e-05, "loss": 0.275, "step": 200400 }, { "epoch": 1.13, "learning_rate": 3.8733649246474084e-05, "loss": 0.2679, "step": 200500 }, { "epoch": 1.13, "learning_rate": 3.872802801621163e-05, "loss": 0.2781, "step": 200600 }, { "epoch": 1.13, "learning_rate": 3.872240678594917e-05, "loss": 0.2747, "step": 200700 }, { "epoch": 1.13, "learning_rate": 3.8716785555686716e-05, "loss": 0.2794, "step": 200800 }, { "epoch": 1.13, "learning_rate": 3.871122053772689e-05, "loss": 0.2838, "step": 200900 }, { "epoch": 1.13, "learning_rate": 3.870559930746444e-05, "loss": 0.2697, "step": 201000 }, { "epoch": 1.13, "learning_rate": 3.8699978077201976e-05, "loss": 0.2834, "step": 201100 }, { "epoch": 1.13, "learning_rate": 3.869435684693952e-05, "loss": 0.268, "step": 201200 }, { "epoch": 1.13, "learning_rate": 3.868873561667707e-05, "loss": 0.2834, "step": 201300 }, { "epoch": 1.13, "learning_rate": 3.8683114386414615e-05, "loss": 0.2749, "step": 201400 }, { "epoch": 1.13, "learning_rate": 3.8677493156152155e-05, "loss": 0.2797, "step": 201500 }, { "epoch": 1.13, "learning_rate": 3.86718719258897e-05, "loss": 0.2799, "step": 201600 }, { "epoch": 1.13, "learning_rate": 3.866625069562725e-05, "loss": 0.274, "step": 201700 }, { "epoch": 1.13, "learning_rate": 3.866062946536479e-05, "loss": 0.279, "step": 201800 }, { "epoch": 1.13, "learning_rate": 3.865500823510233e-05, "loss": 0.2747, "step": 201900 }, { "epoch": 1.14, "learning_rate": 3.864938700483988e-05, "loss": 0.2799, "step": 202000 }, { "epoch": 1.14, "learning_rate": 3.8643765774577426e-05, "loss": 0.2792, "step": 202100 }, { "epoch": 1.14, "learning_rate": 3.863814454431497e-05, "loss": 0.2707, "step": 202200 }, { "epoch": 1.14, "learning_rate": 3.863252331405252e-05, "loss": 0.2757, "step": 202300 }, { "epoch": 1.14, "learning_rate": 3.8626902083790064e-05, "loss": 0.2736, "step": 202400 }, { "epoch": 1.14, "learning_rate": 3.8621280853527604e-05, "loss": 0.2831, "step": 202500 }, { "epoch": 1.14, "learning_rate": 3.861565962326515e-05, "loss": 0.2812, "step": 202600 }, { "epoch": 1.14, "learning_rate": 3.8610038393002696e-05, "loss": 0.2722, "step": 202700 }, { "epoch": 1.14, "learning_rate": 3.860441716274024e-05, "loss": 0.2679, "step": 202800 }, { "epoch": 1.14, "learning_rate": 3.859879593247778e-05, "loss": 0.2818, "step": 202900 }, { "epoch": 1.14, "learning_rate": 3.859317470221533e-05, "loss": 0.2746, "step": 203000 }, { "epoch": 1.14, "learning_rate": 3.8587553471952875e-05, "loss": 0.2683, "step": 203100 }, { "epoch": 1.14, "learning_rate": 3.8581932241690414e-05, "loss": 0.2777, "step": 203200 }, { "epoch": 1.14, "learning_rate": 3.857631101142796e-05, "loss": 0.2751, "step": 203300 }, { "epoch": 1.14, "learning_rate": 3.857068978116551e-05, "loss": 0.2683, "step": 203400 }, { "epoch": 1.14, "learning_rate": 3.856506855090305e-05, "loss": 0.2828, "step": 203500 }, { "epoch": 1.14, "learning_rate": 3.855944732064059e-05, "loss": 0.2756, "step": 203600 }, { "epoch": 1.15, "learning_rate": 3.8553826090378146e-05, "loss": 0.2755, "step": 203700 }, { "epoch": 1.15, "learning_rate": 3.854820486011569e-05, "loss": 0.2796, "step": 203800 }, { "epoch": 1.15, "learning_rate": 3.854258362985323e-05, "loss": 0.269, "step": 203900 }, { "epoch": 1.15, "learning_rate": 3.853696239959078e-05, "loss": 0.2729, "step": 204000 }, { "epoch": 1.15, "learning_rate": 3.8531341169328324e-05, "loss": 0.2754, "step": 204100 }, { "epoch": 1.15, "learning_rate": 3.852571993906587e-05, "loss": 0.2814, "step": 204200 }, { "epoch": 1.15, "learning_rate": 3.852009870880341e-05, "loss": 0.2739, "step": 204300 }, { "epoch": 1.15, "learning_rate": 3.8514477478540956e-05, "loss": 0.28, "step": 204400 }, { "epoch": 1.15, "learning_rate": 3.85088562482785e-05, "loss": 0.276, "step": 204500 }, { "epoch": 1.15, "learning_rate": 3.850323501801604e-05, "loss": 0.2823, "step": 204600 }, { "epoch": 1.15, "learning_rate": 3.849761378775359e-05, "loss": 0.2703, "step": 204700 }, { "epoch": 1.15, "learning_rate": 3.8491992557491134e-05, "loss": 0.2764, "step": 204800 }, { "epoch": 1.15, "learning_rate": 3.848637132722868e-05, "loss": 0.2731, "step": 204900 }, { "epoch": 1.15, "learning_rate": 3.848075009696622e-05, "loss": 0.2729, "step": 205000 }, { "epoch": 1.15, "learning_rate": 3.8475128866703766e-05, "loss": 0.2767, "step": 205100 }, { "epoch": 1.15, "learning_rate": 3.846956384874394e-05, "loss": 0.279, "step": 205200 }, { "epoch": 1.15, "learning_rate": 3.846394261848148e-05, "loss": 0.2738, "step": 205300 }, { "epoch": 1.15, "learning_rate": 3.8458321388219027e-05, "loss": 0.2719, "step": 205400 }, { "epoch": 1.16, "learning_rate": 3.845270015795657e-05, "loss": 0.2738, "step": 205500 }, { "epoch": 1.16, "learning_rate": 3.844707892769412e-05, "loss": 0.2721, "step": 205600 }, { "epoch": 1.16, "learning_rate": 3.844145769743166e-05, "loss": 0.2765, "step": 205700 }, { "epoch": 1.16, "learning_rate": 3.8435836467169205e-05, "loss": 0.2705, "step": 205800 }, { "epoch": 1.16, "learning_rate": 3.843021523690675e-05, "loss": 0.2824, "step": 205900 }, { "epoch": 1.16, "learning_rate": 3.842459400664429e-05, "loss": 0.2779, "step": 206000 }, { "epoch": 1.16, "learning_rate": 3.841897277638184e-05, "loss": 0.2676, "step": 206100 }, { "epoch": 1.16, "learning_rate": 3.841335154611938e-05, "loss": 0.2688, "step": 206200 }, { "epoch": 1.16, "learning_rate": 3.840773031585693e-05, "loss": 0.271, "step": 206300 }, { "epoch": 1.16, "learning_rate": 3.8402109085594476e-05, "loss": 0.2851, "step": 206400 }, { "epoch": 1.16, "learning_rate": 3.839648785533202e-05, "loss": 0.2722, "step": 206500 }, { "epoch": 1.16, "learning_rate": 3.839086662506957e-05, "loss": 0.2709, "step": 206600 }, { "epoch": 1.16, "learning_rate": 3.838524539480711e-05, "loss": 0.2705, "step": 206700 }, { "epoch": 1.16, "learning_rate": 3.8379624164544654e-05, "loss": 0.2728, "step": 206800 }, { "epoch": 1.16, "learning_rate": 3.83740029342822e-05, "loss": 0.2732, "step": 206900 }, { "epoch": 1.16, "learning_rate": 3.836838170401975e-05, "loss": 0.28, "step": 207000 }, { "epoch": 1.16, "learning_rate": 3.8362760473757286e-05, "loss": 0.275, "step": 207100 }, { "epoch": 1.16, "learning_rate": 3.835713924349483e-05, "loss": 0.2771, "step": 207200 }, { "epoch": 1.17, "learning_rate": 3.835151801323238e-05, "loss": 0.2692, "step": 207300 }, { "epoch": 1.17, "learning_rate": 3.834589678296992e-05, "loss": 0.2747, "step": 207400 }, { "epoch": 1.17, "learning_rate": 3.8340275552707465e-05, "loss": 0.2742, "step": 207500 }, { "epoch": 1.17, "learning_rate": 3.833465432244501e-05, "loss": 0.2779, "step": 207600 }, { "epoch": 1.17, "learning_rate": 3.832903309218256e-05, "loss": 0.2772, "step": 207700 }, { "epoch": 1.17, "learning_rate": 3.8323411861920097e-05, "loss": 0.2741, "step": 207800 }, { "epoch": 1.17, "learning_rate": 3.831779063165765e-05, "loss": 0.2755, "step": 207900 }, { "epoch": 1.17, "learning_rate": 3.8312169401395196e-05, "loss": 0.2723, "step": 208000 }, { "epoch": 1.17, "learning_rate": 3.8306548171132735e-05, "loss": 0.2778, "step": 208100 }, { "epoch": 1.17, "learning_rate": 3.83009831531729e-05, "loss": 0.2775, "step": 208200 }, { "epoch": 1.17, "learning_rate": 3.829536192291045e-05, "loss": 0.2672, "step": 208300 }, { "epoch": 1.17, "learning_rate": 3.8289740692647996e-05, "loss": 0.2726, "step": 208400 }, { "epoch": 1.17, "learning_rate": 3.8284119462385535e-05, "loss": 0.2756, "step": 208500 }, { "epoch": 1.17, "learning_rate": 3.827849823212308e-05, "loss": 0.2726, "step": 208600 }, { "epoch": 1.17, "learning_rate": 3.827287700186063e-05, "loss": 0.2709, "step": 208700 }, { "epoch": 1.17, "learning_rate": 3.8267255771598174e-05, "loss": 0.2737, "step": 208800 }, { "epoch": 1.17, "learning_rate": 3.826163454133572e-05, "loss": 0.267, "step": 208900 }, { "epoch": 1.17, "learning_rate": 3.8256013311073266e-05, "loss": 0.2748, "step": 209000 }, { "epoch": 1.18, "learning_rate": 3.825039208081081e-05, "loss": 0.2797, "step": 209100 }, { "epoch": 1.18, "learning_rate": 3.824477085054835e-05, "loss": 0.2726, "step": 209200 }, { "epoch": 1.18, "learning_rate": 3.82391496202859e-05, "loss": 0.2735, "step": 209300 }, { "epoch": 1.18, "learning_rate": 3.8233528390023445e-05, "loss": 0.2807, "step": 209400 }, { "epoch": 1.18, "learning_rate": 3.822790715976099e-05, "loss": 0.2674, "step": 209500 }, { "epoch": 1.18, "learning_rate": 3.822228592949853e-05, "loss": 0.2769, "step": 209600 }, { "epoch": 1.18, "learning_rate": 3.821666469923608e-05, "loss": 0.2698, "step": 209700 }, { "epoch": 1.18, "learning_rate": 3.821104346897362e-05, "loss": 0.2708, "step": 209800 }, { "epoch": 1.18, "learning_rate": 3.820542223871116e-05, "loss": 0.2769, "step": 209900 }, { "epoch": 1.18, "learning_rate": 3.819980100844871e-05, "loss": 0.2785, "step": 210000 }, { "epoch": 1.18, "learning_rate": 3.819423599048888e-05, "loss": 0.2744, "step": 210100 }, { "epoch": 1.18, "learning_rate": 3.818861476022643e-05, "loss": 0.2811, "step": 210200 }, { "epoch": 1.18, "learning_rate": 3.818299352996397e-05, "loss": 0.274, "step": 210300 }, { "epoch": 1.18, "learning_rate": 3.8177372299701515e-05, "loss": 0.2709, "step": 210400 }, { "epoch": 1.18, "learning_rate": 3.817175106943906e-05, "loss": 0.2778, "step": 210500 }, { "epoch": 1.18, "learning_rate": 3.816612983917661e-05, "loss": 0.2695, "step": 210600 }, { "epoch": 1.18, "learning_rate": 3.816050860891415e-05, "loss": 0.2703, "step": 210700 }, { "epoch": 1.18, "learning_rate": 3.8154887378651694e-05, "loss": 0.2696, "step": 210800 }, { "epoch": 1.19, "learning_rate": 3.814926614838924e-05, "loss": 0.2696, "step": 210900 }, { "epoch": 1.19, "learning_rate": 3.814364491812678e-05, "loss": 0.2717, "step": 211000 }, { "epoch": 1.19, "learning_rate": 3.8138023687864326e-05, "loss": 0.2759, "step": 211100 }, { "epoch": 1.19, "learning_rate": 3.813240245760187e-05, "loss": 0.2667, "step": 211200 }, { "epoch": 1.19, "learning_rate": 3.812678122733942e-05, "loss": 0.2674, "step": 211300 }, { "epoch": 1.19, "learning_rate": 3.812115999707696e-05, "loss": 0.2709, "step": 211400 }, { "epoch": 1.19, "learning_rate": 3.8115538766814504e-05, "loss": 0.2684, "step": 211500 }, { "epoch": 1.19, "learning_rate": 3.810991753655205e-05, "loss": 0.2744, "step": 211600 }, { "epoch": 1.19, "learning_rate": 3.81042963062896e-05, "loss": 0.2756, "step": 211700 }, { "epoch": 1.19, "learning_rate": 3.809867507602714e-05, "loss": 0.2722, "step": 211800 }, { "epoch": 1.19, "learning_rate": 3.809305384576469e-05, "loss": 0.2725, "step": 211900 }, { "epoch": 1.19, "learning_rate": 3.8087432615502236e-05, "loss": 0.2755, "step": 212000 }, { "epoch": 1.19, "learning_rate": 3.8081811385239775e-05, "loss": 0.2751, "step": 212100 }, { "epoch": 1.19, "learning_rate": 3.807619015497732e-05, "loss": 0.2702, "step": 212200 }, { "epoch": 1.19, "learning_rate": 3.807056892471487e-05, "loss": 0.2659, "step": 212300 }, { "epoch": 1.19, "learning_rate": 3.806494769445241e-05, "loss": 0.2724, "step": 212400 }, { "epoch": 1.19, "learning_rate": 3.805932646418995e-05, "loss": 0.2653, "step": 212500 }, { "epoch": 1.2, "learning_rate": 3.80537052339275e-05, "loss": 0.2696, "step": 212600 }, { "epoch": 1.2, "learning_rate": 3.8048084003665046e-05, "loss": 0.2748, "step": 212700 }, { "epoch": 1.2, "learning_rate": 3.8042462773402585e-05, "loss": 0.2761, "step": 212800 }, { "epoch": 1.2, "learning_rate": 3.803684154314013e-05, "loss": 0.2695, "step": 212900 }, { "epoch": 1.2, "learning_rate": 3.803122031287768e-05, "loss": 0.2775, "step": 213000 }, { "epoch": 1.2, "learning_rate": 3.8025599082615224e-05, "loss": 0.2709, "step": 213100 }, { "epoch": 1.2, "learning_rate": 3.801997785235277e-05, "loss": 0.2661, "step": 213200 }, { "epoch": 1.2, "learning_rate": 3.801435662209032e-05, "loss": 0.2677, "step": 213300 }, { "epoch": 1.2, "learning_rate": 3.800873539182786e-05, "loss": 0.2683, "step": 213400 }, { "epoch": 1.2, "learning_rate": 3.80031141615654e-05, "loss": 0.2767, "step": 213500 }, { "epoch": 1.2, "learning_rate": 3.799749293130295e-05, "loss": 0.2723, "step": 213600 }, { "epoch": 1.2, "learning_rate": 3.7991871701040495e-05, "loss": 0.28, "step": 213700 }, { "epoch": 1.2, "learning_rate": 3.7986250470778035e-05, "loss": 0.2732, "step": 213800 }, { "epoch": 1.2, "learning_rate": 3.798062924051558e-05, "loss": 0.2735, "step": 213900 }, { "epoch": 1.2, "learning_rate": 3.797500801025313e-05, "loss": 0.2653, "step": 214000 }, { "epoch": 1.2, "learning_rate": 3.7969386779990673e-05, "loss": 0.2734, "step": 214100 }, { "epoch": 1.2, "learning_rate": 3.796376554972821e-05, "loss": 0.2723, "step": 214200 }, { "epoch": 1.2, "learning_rate": 3.795814431946576e-05, "loss": 0.28, "step": 214300 }, { "epoch": 1.21, "learning_rate": 3.7952523089203305e-05, "loss": 0.2746, "step": 214400 }, { "epoch": 1.21, "learning_rate": 3.7946901858940845e-05, "loss": 0.2669, "step": 214500 }, { "epoch": 1.21, "learning_rate": 3.794128062867839e-05, "loss": 0.2753, "step": 214600 }, { "epoch": 1.21, "learning_rate": 3.793565939841594e-05, "loss": 0.2681, "step": 214700 }, { "epoch": 1.21, "learning_rate": 3.7930038168153484e-05, "loss": 0.2693, "step": 214800 }, { "epoch": 1.21, "learning_rate": 3.792441693789103e-05, "loss": 0.2739, "step": 214900 }, { "epoch": 1.21, "learning_rate": 3.7918795707628576e-05, "loss": 0.2698, "step": 215000 }, { "epoch": 1.21, "learning_rate": 3.791317447736612e-05, "loss": 0.2755, "step": 215100 }, { "epoch": 1.21, "learning_rate": 3.790755324710366e-05, "loss": 0.2746, "step": 215200 }, { "epoch": 1.21, "learning_rate": 3.790193201684121e-05, "loss": 0.2719, "step": 215300 }, { "epoch": 1.21, "learning_rate": 3.7896310786578755e-05, "loss": 0.2796, "step": 215400 }, { "epoch": 1.21, "learning_rate": 3.7890689556316294e-05, "loss": 0.2741, "step": 215500 }, { "epoch": 1.21, "learning_rate": 3.788506832605384e-05, "loss": 0.2684, "step": 215600 }, { "epoch": 1.21, "learning_rate": 3.787944709579139e-05, "loss": 0.2702, "step": 215700 }, { "epoch": 1.21, "learning_rate": 3.787382586552893e-05, "loss": 0.2658, "step": 215800 }, { "epoch": 1.21, "learning_rate": 3.786820463526647e-05, "loss": 0.2768, "step": 215900 }, { "epoch": 1.21, "learning_rate": 3.786258340500402e-05, "loss": 0.2756, "step": 216000 }, { "epoch": 1.21, "learning_rate": 3.7856962174741565e-05, "loss": 0.276, "step": 216100 }, { "epoch": 1.22, "learning_rate": 3.7851340944479105e-05, "loss": 0.2694, "step": 216200 }, { "epoch": 1.22, "learning_rate": 3.784571971421665e-05, "loss": 0.2781, "step": 216300 }, { "epoch": 1.22, "learning_rate": 3.78400984839542e-05, "loss": 0.2668, "step": 216400 }, { "epoch": 1.22, "learning_rate": 3.783453346599437e-05, "loss": 0.2741, "step": 216500 }, { "epoch": 1.22, "learning_rate": 3.782891223573191e-05, "loss": 0.2682, "step": 216600 }, { "epoch": 1.22, "learning_rate": 3.782329100546946e-05, "loss": 0.273, "step": 216700 }, { "epoch": 1.22, "learning_rate": 3.7817669775207004e-05, "loss": 0.2694, "step": 216800 }, { "epoch": 1.22, "learning_rate": 3.781204854494455e-05, "loss": 0.2721, "step": 216900 }, { "epoch": 1.22, "learning_rate": 3.780642731468209e-05, "loss": 0.2658, "step": 217000 }, { "epoch": 1.22, "learning_rate": 3.7800862296722264e-05, "loss": 0.2699, "step": 217100 }, { "epoch": 1.22, "learning_rate": 3.779524106645981e-05, "loss": 0.266, "step": 217200 }, { "epoch": 1.22, "learning_rate": 3.7789619836197356e-05, "loss": 0.2701, "step": 217300 }, { "epoch": 1.22, "learning_rate": 3.7783998605934896e-05, "loss": 0.2733, "step": 217400 }, { "epoch": 1.22, "learning_rate": 3.777837737567244e-05, "loss": 0.2717, "step": 217500 }, { "epoch": 1.22, "learning_rate": 3.777275614540999e-05, "loss": 0.2775, "step": 217600 }, { "epoch": 1.22, "learning_rate": 3.776713491514753e-05, "loss": 0.2774, "step": 217700 }, { "epoch": 1.22, "learning_rate": 3.7761513684885074e-05, "loss": 0.271, "step": 217800 }, { "epoch": 1.22, "learning_rate": 3.775589245462262e-05, "loss": 0.2728, "step": 217900 }, { "epoch": 1.23, "learning_rate": 3.775027122436017e-05, "loss": 0.2701, "step": 218000 }, { "epoch": 1.23, "learning_rate": 3.7744649994097706e-05, "loss": 0.2731, "step": 218100 }, { "epoch": 1.23, "learning_rate": 3.773902876383525e-05, "loss": 0.2674, "step": 218200 }, { "epoch": 1.23, "learning_rate": 3.77334075335728e-05, "loss": 0.2681, "step": 218300 }, { "epoch": 1.23, "learning_rate": 3.7727786303310345e-05, "loss": 0.2729, "step": 218400 }, { "epoch": 1.23, "learning_rate": 3.772216507304789e-05, "loss": 0.2779, "step": 218500 }, { "epoch": 1.23, "learning_rate": 3.771654384278544e-05, "loss": 0.268, "step": 218600 }, { "epoch": 1.23, "learning_rate": 3.7710922612522984e-05, "loss": 0.2738, "step": 218700 }, { "epoch": 1.23, "learning_rate": 3.7705301382260523e-05, "loss": 0.2645, "step": 218800 }, { "epoch": 1.23, "learning_rate": 3.769968015199807e-05, "loss": 0.2741, "step": 218900 }, { "epoch": 1.23, "learning_rate": 3.7694058921735616e-05, "loss": 0.2652, "step": 219000 }, { "epoch": 1.23, "learning_rate": 3.7688493903775784e-05, "loss": 0.2736, "step": 219100 }, { "epoch": 1.23, "learning_rate": 3.768287267351332e-05, "loss": 0.2659, "step": 219200 }, { "epoch": 1.23, "learning_rate": 3.767725144325087e-05, "loss": 0.2686, "step": 219300 }, { "epoch": 1.23, "learning_rate": 3.7671630212988416e-05, "loss": 0.2737, "step": 219400 }, { "epoch": 1.23, "learning_rate": 3.766600898272596e-05, "loss": 0.262, "step": 219500 }, { "epoch": 1.23, "learning_rate": 3.766038775246351e-05, "loss": 0.2702, "step": 219600 }, { "epoch": 1.23, "learning_rate": 3.7654766522201054e-05, "loss": 0.2677, "step": 219700 }, { "epoch": 1.24, "learning_rate": 3.76491452919386e-05, "loss": 0.2695, "step": 219800 }, { "epoch": 1.24, "learning_rate": 3.764352406167614e-05, "loss": 0.2674, "step": 219900 }, { "epoch": 1.24, "learning_rate": 3.7637902831413687e-05, "loss": 0.2704, "step": 220000 }, { "epoch": 1.24, "learning_rate": 3.763228160115123e-05, "loss": 0.2725, "step": 220100 }, { "epoch": 1.24, "learning_rate": 3.762666037088877e-05, "loss": 0.2683, "step": 220200 }, { "epoch": 1.24, "learning_rate": 3.762103914062632e-05, "loss": 0.2773, "step": 220300 }, { "epoch": 1.24, "learning_rate": 3.7615417910363865e-05, "loss": 0.268, "step": 220400 }, { "epoch": 1.24, "learning_rate": 3.760979668010141e-05, "loss": 0.27, "step": 220500 }, { "epoch": 1.24, "learning_rate": 3.760417544983895e-05, "loss": 0.28, "step": 220600 }, { "epoch": 1.24, "learning_rate": 3.75985542195765e-05, "loss": 0.262, "step": 220700 }, { "epoch": 1.24, "learning_rate": 3.759298920161667e-05, "loss": 0.2692, "step": 220800 }, { "epoch": 1.24, "learning_rate": 3.758736797135422e-05, "loss": 0.2713, "step": 220900 }, { "epoch": 1.24, "learning_rate": 3.758174674109176e-05, "loss": 0.2637, "step": 221000 }, { "epoch": 1.24, "learning_rate": 3.75761255108293e-05, "loss": 0.273, "step": 221100 }, { "epoch": 1.24, "learning_rate": 3.757050428056685e-05, "loss": 0.2671, "step": 221200 }, { "epoch": 1.24, "learning_rate": 3.756488305030439e-05, "loss": 0.2687, "step": 221300 }, { "epoch": 1.24, "learning_rate": 3.7559261820041935e-05, "loss": 0.2684, "step": 221400 }, { "epoch": 1.25, "learning_rate": 3.755364058977948e-05, "loss": 0.2631, "step": 221500 }, { "epoch": 1.25, "learning_rate": 3.754801935951703e-05, "loss": 0.2704, "step": 221600 }, { "epoch": 1.25, "learning_rate": 3.754239812925457e-05, "loss": 0.2691, "step": 221700 }, { "epoch": 1.25, "learning_rate": 3.7536776898992114e-05, "loss": 0.2632, "step": 221800 }, { "epoch": 1.25, "learning_rate": 3.753115566872966e-05, "loss": 0.2665, "step": 221900 }, { "epoch": 1.25, "learning_rate": 3.75255344384672e-05, "loss": 0.2711, "step": 222000 }, { "epoch": 1.25, "learning_rate": 3.7519913208204746e-05, "loss": 0.2665, "step": 222100 }, { "epoch": 1.25, "learning_rate": 3.751429197794229e-05, "loss": 0.2712, "step": 222200 }, { "epoch": 1.25, "learning_rate": 3.750867074767984e-05, "loss": 0.2683, "step": 222300 }, { "epoch": 1.25, "learning_rate": 3.7503049517417385e-05, "loss": 0.2734, "step": 222400 }, { "epoch": 1.25, "learning_rate": 3.749742828715493e-05, "loss": 0.2728, "step": 222500 }, { "epoch": 1.25, "learning_rate": 3.749180705689248e-05, "loss": 0.2735, "step": 222600 }, { "epoch": 1.25, "learning_rate": 3.748618582663002e-05, "loss": 0.2708, "step": 222700 }, { "epoch": 1.25, "learning_rate": 3.748056459636756e-05, "loss": 0.2682, "step": 222800 }, { "epoch": 1.25, "learning_rate": 3.747494336610511e-05, "loss": 0.2717, "step": 222900 }, { "epoch": 1.25, "learning_rate": 3.7469322135842656e-05, "loss": 0.2626, "step": 223000 }, { "epoch": 1.25, "learning_rate": 3.7463700905580195e-05, "loss": 0.2743, "step": 223100 }, { "epoch": 1.25, "learning_rate": 3.745807967531774e-05, "loss": 0.2618, "step": 223200 }, { "epoch": 1.26, "learning_rate": 3.745245844505529e-05, "loss": 0.2702, "step": 223300 }, { "epoch": 1.26, "learning_rate": 3.744683721479283e-05, "loss": 0.2709, "step": 223400 }, { "epoch": 1.26, "learning_rate": 3.744121598453037e-05, "loss": 0.2699, "step": 223500 }, { "epoch": 1.26, "learning_rate": 3.743565096657055e-05, "loss": 0.2732, "step": 223600 }, { "epoch": 1.26, "learning_rate": 3.7430029736308094e-05, "loss": 0.2628, "step": 223700 }, { "epoch": 1.26, "learning_rate": 3.7424408506045634e-05, "loss": 0.2651, "step": 223800 }, { "epoch": 1.26, "learning_rate": 3.741878727578318e-05, "loss": 0.2756, "step": 223900 }, { "epoch": 1.26, "learning_rate": 3.7413166045520726e-05, "loss": 0.264, "step": 224000 }, { "epoch": 1.26, "learning_rate": 3.740754481525827e-05, "loss": 0.2691, "step": 224100 }, { "epoch": 1.26, "learning_rate": 3.740192358499581e-05, "loss": 0.2731, "step": 224200 }, { "epoch": 1.26, "learning_rate": 3.739630235473336e-05, "loss": 0.261, "step": 224300 }, { "epoch": 1.26, "learning_rate": 3.7390681124470904e-05, "loss": 0.2745, "step": 224400 }, { "epoch": 1.26, "learning_rate": 3.7385059894208444e-05, "loss": 0.2701, "step": 224500 }, { "epoch": 1.26, "learning_rate": 3.737943866394599e-05, "loss": 0.2732, "step": 224600 }, { "epoch": 1.26, "learning_rate": 3.7373817433683536e-05, "loss": 0.2617, "step": 224700 }, { "epoch": 1.26, "learning_rate": 3.736819620342108e-05, "loss": 0.273, "step": 224800 }, { "epoch": 1.26, "learning_rate": 3.736257497315863e-05, "loss": 0.2622, "step": 224900 }, { "epoch": 1.26, "learning_rate": 3.7356953742896175e-05, "loss": 0.2659, "step": 225000 }, { "epoch": 1.27, "learning_rate": 3.735133251263372e-05, "loss": 0.2657, "step": 225100 }, { "epoch": 1.27, "learning_rate": 3.734571128237126e-05, "loss": 0.2657, "step": 225200 }, { "epoch": 1.27, "learning_rate": 3.734009005210881e-05, "loss": 0.2721, "step": 225300 }, { "epoch": 1.27, "learning_rate": 3.7334468821846354e-05, "loss": 0.2735, "step": 225400 }, { "epoch": 1.27, "learning_rate": 3.732890380388652e-05, "loss": 0.2671, "step": 225500 }, { "epoch": 1.27, "learning_rate": 3.732328257362406e-05, "loss": 0.2641, "step": 225600 }, { "epoch": 1.27, "learning_rate": 3.731766134336161e-05, "loss": 0.2643, "step": 225700 }, { "epoch": 1.27, "learning_rate": 3.731204011309915e-05, "loss": 0.2714, "step": 225800 }, { "epoch": 1.27, "learning_rate": 3.73064188828367e-05, "loss": 0.2706, "step": 225900 }, { "epoch": 1.27, "learning_rate": 3.7300797652574246e-05, "loss": 0.2683, "step": 226000 }, { "epoch": 1.27, "learning_rate": 3.729517642231179e-05, "loss": 0.2758, "step": 226100 }, { "epoch": 1.27, "learning_rate": 3.728955519204934e-05, "loss": 0.272, "step": 226200 }, { "epoch": 1.27, "learning_rate": 3.728393396178688e-05, "loss": 0.2739, "step": 226300 }, { "epoch": 1.27, "learning_rate": 3.7278312731524424e-05, "loss": 0.2648, "step": 226400 }, { "epoch": 1.27, "learning_rate": 3.727269150126197e-05, "loss": 0.269, "step": 226500 }, { "epoch": 1.27, "learning_rate": 3.726707027099951e-05, "loss": 0.2596, "step": 226600 }, { "epoch": 1.27, "learning_rate": 3.7261449040737056e-05, "loss": 0.2685, "step": 226700 }, { "epoch": 1.27, "learning_rate": 3.72558278104746e-05, "loss": 0.2625, "step": 226800 }, { "epoch": 1.28, "learning_rate": 3.725020658021215e-05, "loss": 0.2683, "step": 226900 }, { "epoch": 1.28, "learning_rate": 3.724458534994969e-05, "loss": 0.2687, "step": 227000 }, { "epoch": 1.28, "learning_rate": 3.7238964119687235e-05, "loss": 0.2635, "step": 227100 }, { "epoch": 1.28, "learning_rate": 3.723334288942478e-05, "loss": 0.2708, "step": 227200 }, { "epoch": 1.28, "learning_rate": 3.722772165916232e-05, "loss": 0.2696, "step": 227300 }, { "epoch": 1.28, "learning_rate": 3.722210042889987e-05, "loss": 0.2631, "step": 227400 }, { "epoch": 1.28, "learning_rate": 3.721647919863741e-05, "loss": 0.2661, "step": 227500 }, { "epoch": 1.28, "learning_rate": 3.721085796837496e-05, "loss": 0.2724, "step": 227600 }, { "epoch": 1.28, "learning_rate": 3.7205236738112506e-05, "loss": 0.2718, "step": 227700 }, { "epoch": 1.28, "learning_rate": 3.719961550785005e-05, "loss": 0.2676, "step": 227800 }, { "epoch": 1.28, "learning_rate": 3.71939942775876e-05, "loss": 0.2719, "step": 227900 }, { "epoch": 1.28, "learning_rate": 3.718837304732514e-05, "loss": 0.2727, "step": 228000 }, { "epoch": 1.28, "learning_rate": 3.7182751817062684e-05, "loss": 0.2648, "step": 228100 }, { "epoch": 1.28, "learning_rate": 3.717713058680023e-05, "loss": 0.2683, "step": 228200 }, { "epoch": 1.28, "learning_rate": 3.7171509356537776e-05, "loss": 0.2627, "step": 228300 }, { "epoch": 1.28, "learning_rate": 3.7165888126275316e-05, "loss": 0.2649, "step": 228400 }, { "epoch": 1.28, "learning_rate": 3.716026689601286e-05, "loss": 0.2669, "step": 228500 }, { "epoch": 1.29, "learning_rate": 3.715464566575041e-05, "loss": 0.2648, "step": 228600 }, { "epoch": 1.29, "learning_rate": 3.714902443548795e-05, "loss": 0.2687, "step": 228700 }, { "epoch": 1.29, "learning_rate": 3.7143403205225494e-05, "loss": 0.2653, "step": 228800 }, { "epoch": 1.29, "learning_rate": 3.713778197496304e-05, "loss": 0.2689, "step": 228900 }, { "epoch": 1.29, "learning_rate": 3.713216074470059e-05, "loss": 0.2677, "step": 229000 }, { "epoch": 1.29, "learning_rate": 3.712653951443813e-05, "loss": 0.2703, "step": 229100 }, { "epoch": 1.29, "learning_rate": 3.712091828417568e-05, "loss": 0.2671, "step": 229200 }, { "epoch": 1.29, "learning_rate": 3.7115297053913226e-05, "loss": 0.2701, "step": 229300 }, { "epoch": 1.29, "learning_rate": 3.7109675823650765e-05, "loss": 0.2696, "step": 229400 }, { "epoch": 1.29, "learning_rate": 3.710405459338831e-05, "loss": 0.2633, "step": 229500 }, { "epoch": 1.29, "learning_rate": 3.709843336312586e-05, "loss": 0.2724, "step": 229600 }, { "epoch": 1.29, "learning_rate": 3.7092812132863404e-05, "loss": 0.2603, "step": 229700 }, { "epoch": 1.29, "learning_rate": 3.7087190902600943e-05, "loss": 0.2637, "step": 229800 }, { "epoch": 1.29, "learning_rate": 3.708156967233849e-05, "loss": 0.2704, "step": 229900 }, { "epoch": 1.29, "learning_rate": 3.7075948442076036e-05, "loss": 0.259, "step": 230000 }, { "epoch": 1.29, "learning_rate": 3.7070327211813575e-05, "loss": 0.2741, "step": 230100 }, { "epoch": 1.29, "learning_rate": 3.706470598155112e-05, "loss": 0.2771, "step": 230200 }, { "epoch": 1.29, "learning_rate": 3.705908475128867e-05, "loss": 0.2673, "step": 230300 }, { "epoch": 1.3, "learning_rate": 3.7053463521026214e-05, "loss": 0.2692, "step": 230400 }, { "epoch": 1.3, "learning_rate": 3.7047842290763754e-05, "loss": 0.2708, "step": 230500 }, { "epoch": 1.3, "learning_rate": 3.70422210605013e-05, "loss": 0.2718, "step": 230600 }, { "epoch": 1.3, "learning_rate": 3.7036599830238846e-05, "loss": 0.2695, "step": 230700 }, { "epoch": 1.3, "learning_rate": 3.703097859997639e-05, "loss": 0.2619, "step": 230800 }, { "epoch": 1.3, "learning_rate": 3.702535736971394e-05, "loss": 0.2672, "step": 230900 }, { "epoch": 1.3, "learning_rate": 3.7019736139451485e-05, "loss": 0.2721, "step": 231000 }, { "epoch": 1.3, "learning_rate": 3.701411490918903e-05, "loss": 0.2647, "step": 231100 }, { "epoch": 1.3, "learning_rate": 3.700849367892657e-05, "loss": 0.2674, "step": 231200 }, { "epoch": 1.3, "learning_rate": 3.700287244866412e-05, "loss": 0.2685, "step": 231300 }, { "epoch": 1.3, "learning_rate": 3.6997251218401664e-05, "loss": 0.2663, "step": 231400 }, { "epoch": 1.3, "learning_rate": 3.69916299881392e-05, "loss": 0.2666, "step": 231500 }, { "epoch": 1.3, "learning_rate": 3.698600875787675e-05, "loss": 0.2675, "step": 231600 }, { "epoch": 1.3, "learning_rate": 3.6980387527614296e-05, "loss": 0.2645, "step": 231700 }, { "epoch": 1.3, "learning_rate": 3.697476629735184e-05, "loss": 0.263, "step": 231800 }, { "epoch": 1.3, "learning_rate": 3.696914506708938e-05, "loss": 0.2688, "step": 231900 }, { "epoch": 1.3, "learning_rate": 3.696352383682693e-05, "loss": 0.2603, "step": 232000 }, { "epoch": 1.3, "learning_rate": 3.6957902606564474e-05, "loss": 0.2594, "step": 232100 }, { "epoch": 1.31, "learning_rate": 3.695228137630202e-05, "loss": 0.2651, "step": 232200 }, { "epoch": 1.31, "learning_rate": 3.6946660146039566e-05, "loss": 0.2709, "step": 232300 }, { "epoch": 1.31, "learning_rate": 3.694103891577711e-05, "loss": 0.2628, "step": 232400 }, { "epoch": 1.31, "learning_rate": 3.693541768551466e-05, "loss": 0.2658, "step": 232500 }, { "epoch": 1.31, "learning_rate": 3.69297964552522e-05, "loss": 0.2659, "step": 232600 }, { "epoch": 1.31, "learning_rate": 3.6924175224989745e-05, "loss": 0.2706, "step": 232700 }, { "epoch": 1.31, "learning_rate": 3.691855399472729e-05, "loss": 0.2716, "step": 232800 }, { "epoch": 1.31, "learning_rate": 3.691293276446483e-05, "loss": 0.2725, "step": 232900 }, { "epoch": 1.31, "learning_rate": 3.690731153420238e-05, "loss": 0.2668, "step": 233000 }, { "epoch": 1.31, "learning_rate": 3.690169030393992e-05, "loss": 0.2674, "step": 233100 }, { "epoch": 1.31, "learning_rate": 3.689606907367747e-05, "loss": 0.2656, "step": 233200 }, { "epoch": 1.31, "learning_rate": 3.689050405571764e-05, "loss": 0.2592, "step": 233300 }, { "epoch": 1.31, "learning_rate": 3.688488282545518e-05, "loss": 0.2704, "step": 233400 }, { "epoch": 1.31, "learning_rate": 3.687926159519273e-05, "loss": 0.2711, "step": 233500 }, { "epoch": 1.31, "learning_rate": 3.6873640364930276e-05, "loss": 0.2644, "step": 233600 }, { "epoch": 1.31, "learning_rate": 3.6868019134667815e-05, "loss": 0.2566, "step": 233700 }, { "epoch": 1.31, "learning_rate": 3.686239790440536e-05, "loss": 0.2709, "step": 233800 }, { "epoch": 1.31, "learning_rate": 3.685677667414291e-05, "loss": 0.2674, "step": 233900 }, { "epoch": 1.32, "learning_rate": 3.685115544388045e-05, "loss": 0.2683, "step": 234000 }, { "epoch": 1.32, "learning_rate": 3.6845534213617994e-05, "loss": 0.2678, "step": 234100 }, { "epoch": 1.32, "learning_rate": 3.683991298335554e-05, "loss": 0.2633, "step": 234200 }, { "epoch": 1.32, "learning_rate": 3.6834291753093086e-05, "loss": 0.2572, "step": 234300 }, { "epoch": 1.32, "learning_rate": 3.6828670522830626e-05, "loss": 0.2596, "step": 234400 }, { "epoch": 1.32, "learning_rate": 3.682304929256817e-05, "loss": 0.2708, "step": 234500 }, { "epoch": 1.32, "learning_rate": 3.681742806230572e-05, "loss": 0.2639, "step": 234600 }, { "epoch": 1.32, "learning_rate": 3.681180683204326e-05, "loss": 0.2723, "step": 234700 }, { "epoch": 1.32, "learning_rate": 3.680624181408343e-05, "loss": 0.2648, "step": 234800 }, { "epoch": 1.32, "learning_rate": 3.680062058382098e-05, "loss": 0.2674, "step": 234900 }, { "epoch": 1.32, "learning_rate": 3.6794999353558525e-05, "loss": 0.2689, "step": 235000 }, { "epoch": 1.32, "learning_rate": 3.6789378123296064e-05, "loss": 0.2667, "step": 235100 }, { "epoch": 1.32, "learning_rate": 3.678375689303361e-05, "loss": 0.2716, "step": 235200 }, { "epoch": 1.32, "learning_rate": 3.677813566277116e-05, "loss": 0.261, "step": 235300 }, { "epoch": 1.32, "learning_rate": 3.6772514432508696e-05, "loss": 0.2671, "step": 235400 }, { "epoch": 1.32, "learning_rate": 3.676689320224624e-05, "loss": 0.264, "step": 235500 }, { "epoch": 1.32, "learning_rate": 3.676127197198379e-05, "loss": 0.2676, "step": 235600 }, { "epoch": 1.32, "learning_rate": 3.6755650741721335e-05, "loss": 0.2671, "step": 235700 }, { "epoch": 1.33, "learning_rate": 3.6750029511458875e-05, "loss": 0.2653, "step": 235800 }, { "epoch": 1.33, "learning_rate": 3.674440828119642e-05, "loss": 0.2624, "step": 235900 }, { "epoch": 1.33, "learning_rate": 3.673878705093397e-05, "loss": 0.2727, "step": 236000 }, { "epoch": 1.33, "learning_rate": 3.6733165820671514e-05, "loss": 0.2706, "step": 236100 }, { "epoch": 1.33, "learning_rate": 3.672754459040906e-05, "loss": 0.2691, "step": 236200 }, { "epoch": 1.33, "learning_rate": 3.6721923360146606e-05, "loss": 0.2686, "step": 236300 }, { "epoch": 1.33, "learning_rate": 3.671630212988415e-05, "loss": 0.2714, "step": 236400 }, { "epoch": 1.33, "learning_rate": 3.671068089962169e-05, "loss": 0.2612, "step": 236500 }, { "epoch": 1.33, "learning_rate": 3.670505966935924e-05, "loss": 0.2621, "step": 236600 }, { "epoch": 1.33, "learning_rate": 3.6699438439096784e-05, "loss": 0.2714, "step": 236700 }, { "epoch": 1.33, "learning_rate": 3.6693817208834324e-05, "loss": 0.259, "step": 236800 }, { "epoch": 1.33, "learning_rate": 3.668819597857187e-05, "loss": 0.2591, "step": 236900 }, { "epoch": 1.33, "learning_rate": 3.6682574748309416e-05, "loss": 0.2652, "step": 237000 }, { "epoch": 1.33, "learning_rate": 3.667695351804696e-05, "loss": 0.2612, "step": 237100 }, { "epoch": 1.33, "learning_rate": 3.66713322877845e-05, "loss": 0.2757, "step": 237200 }, { "epoch": 1.33, "learning_rate": 3.666571105752205e-05, "loss": 0.2678, "step": 237300 }, { "epoch": 1.33, "learning_rate": 3.6660089827259595e-05, "loss": 0.2704, "step": 237400 }, { "epoch": 1.34, "learning_rate": 3.665446859699714e-05, "loss": 0.2663, "step": 237500 }, { "epoch": 1.34, "learning_rate": 3.664884736673469e-05, "loss": 0.2609, "step": 237600 }, { "epoch": 1.34, "learning_rate": 3.6643226136472234e-05, "loss": 0.2643, "step": 237700 }, { "epoch": 1.34, "learning_rate": 3.663760490620978e-05, "loss": 0.2748, "step": 237800 }, { "epoch": 1.34, "learning_rate": 3.663198367594732e-05, "loss": 0.2551, "step": 237900 }, { "epoch": 1.34, "learning_rate": 3.6626362445684866e-05, "loss": 0.2671, "step": 238000 }, { "epoch": 1.34, "learning_rate": 3.662074121542241e-05, "loss": 0.2663, "step": 238100 }, { "epoch": 1.34, "learning_rate": 3.661511998515995e-05, "loss": 0.2624, "step": 238200 }, { "epoch": 1.34, "learning_rate": 3.66094987548975e-05, "loss": 0.2627, "step": 238300 }, { "epoch": 1.34, "learning_rate": 3.6603877524635044e-05, "loss": 0.2679, "step": 238400 }, { "epoch": 1.34, "learning_rate": 3.659825629437259e-05, "loss": 0.267, "step": 238500 }, { "epoch": 1.34, "learning_rate": 3.659263506411013e-05, "loss": 0.2641, "step": 238600 }, { "epoch": 1.34, "learning_rate": 3.6587013833847676e-05, "loss": 0.2673, "step": 238700 }, { "epoch": 1.34, "learning_rate": 3.658139260358522e-05, "loss": 0.267, "step": 238800 }, { "epoch": 1.34, "learning_rate": 3.657577137332276e-05, "loss": 0.2596, "step": 238900 }, { "epoch": 1.34, "learning_rate": 3.657015014306031e-05, "loss": 0.2659, "step": 239000 }, { "epoch": 1.34, "learning_rate": 3.6564528912797854e-05, "loss": 0.2647, "step": 239100 }, { "epoch": 1.34, "learning_rate": 3.65589076825354e-05, "loss": 0.2636, "step": 239200 }, { "epoch": 1.35, "learning_rate": 3.655328645227295e-05, "loss": 0.2604, "step": 239300 }, { "epoch": 1.35, "learning_rate": 3.654766522201049e-05, "loss": 0.2616, "step": 239400 }, { "epoch": 1.35, "learning_rate": 3.654204399174804e-05, "loss": 0.2639, "step": 239500 }, { "epoch": 1.35, "learning_rate": 3.653642276148558e-05, "loss": 0.2656, "step": 239600 }, { "epoch": 1.35, "learning_rate": 3.6530801531223125e-05, "loss": 0.265, "step": 239700 }, { "epoch": 1.35, "learning_rate": 3.652518030096067e-05, "loss": 0.2671, "step": 239800 }, { "epoch": 1.35, "learning_rate": 3.651955907069822e-05, "loss": 0.2573, "step": 239900 }, { "epoch": 1.35, "learning_rate": 3.651393784043576e-05, "loss": 0.2601, "step": 240000 }, { "epoch": 1.35, "learning_rate": 3.6508316610173304e-05, "loss": 0.255, "step": 240100 }, { "epoch": 1.35, "learning_rate": 3.650269537991085e-05, "loss": 0.2675, "step": 240200 }, { "epoch": 1.35, "learning_rate": 3.649707414964839e-05, "loss": 0.2566, "step": 240300 }, { "epoch": 1.35, "learning_rate": 3.6491452919385936e-05, "loss": 0.2633, "step": 240400 }, { "epoch": 1.35, "learning_rate": 3.648583168912348e-05, "loss": 0.2582, "step": 240500 }, { "epoch": 1.35, "learning_rate": 3.648021045886103e-05, "loss": 0.258, "step": 240600 }, { "epoch": 1.35, "learning_rate": 3.6474589228598575e-05, "loss": 0.2637, "step": 240700 }, { "epoch": 1.35, "learning_rate": 3.646896799833612e-05, "loss": 0.264, "step": 240800 }, { "epoch": 1.35, "learning_rate": 3.646340298037629e-05, "loss": 0.2593, "step": 240900 }, { "epoch": 1.35, "learning_rate": 3.6457781750113835e-05, "loss": 0.2617, "step": 241000 }, { "epoch": 1.36, "learning_rate": 3.6452160519851374e-05, "loss": 0.263, "step": 241100 }, { "epoch": 1.36, "learning_rate": 3.644653928958892e-05, "loss": 0.2656, "step": 241200 }, { "epoch": 1.36, "learning_rate": 3.644091805932647e-05, "loss": 0.2614, "step": 241300 }, { "epoch": 1.36, "learning_rate": 3.6435296829064006e-05, "loss": 0.2576, "step": 241400 }, { "epoch": 1.36, "learning_rate": 3.642967559880155e-05, "loss": 0.2641, "step": 241500 }, { "epoch": 1.36, "learning_rate": 3.64240543685391e-05, "loss": 0.2615, "step": 241600 }, { "epoch": 1.36, "learning_rate": 3.6418433138276645e-05, "loss": 0.263, "step": 241700 }, { "epoch": 1.36, "learning_rate": 3.641281190801419e-05, "loss": 0.2599, "step": 241800 }, { "epoch": 1.36, "learning_rate": 3.640719067775174e-05, "loss": 0.265, "step": 241900 }, { "epoch": 1.36, "learning_rate": 3.6401569447489284e-05, "loss": 0.2592, "step": 242000 }, { "epoch": 1.36, "learning_rate": 3.6395948217226823e-05, "loss": 0.2578, "step": 242100 }, { "epoch": 1.36, "learning_rate": 3.639032698696437e-05, "loss": 0.2645, "step": 242200 }, { "epoch": 1.36, "learning_rate": 3.6384705756701916e-05, "loss": 0.2566, "step": 242300 }, { "epoch": 1.36, "learning_rate": 3.6379140738742084e-05, "loss": 0.2678, "step": 242400 }, { "epoch": 1.36, "learning_rate": 3.637351950847962e-05, "loss": 0.2609, "step": 242500 }, { "epoch": 1.36, "learning_rate": 3.636789827821717e-05, "loss": 0.257, "step": 242600 }, { "epoch": 1.36, "learning_rate": 3.6362277047954716e-05, "loss": 0.2578, "step": 242700 }, { "epoch": 1.36, "learning_rate": 3.635665581769226e-05, "loss": 0.2596, "step": 242800 }, { "epoch": 1.37, "learning_rate": 3.635103458742981e-05, "loss": 0.266, "step": 242900 }, { "epoch": 1.37, "learning_rate": 3.6345413357167354e-05, "loss": 0.2628, "step": 243000 }, { "epoch": 1.37, "learning_rate": 3.63397921269049e-05, "loss": 0.2582, "step": 243100 }, { "epoch": 1.37, "learning_rate": 3.633417089664244e-05, "loss": 0.259, "step": 243200 }, { "epoch": 1.37, "learning_rate": 3.6328549666379987e-05, "loss": 0.2602, "step": 243300 }, { "epoch": 1.37, "learning_rate": 3.632292843611753e-05, "loss": 0.2667, "step": 243400 }, { "epoch": 1.37, "learning_rate": 3.631730720585508e-05, "loss": 0.2608, "step": 243500 }, { "epoch": 1.37, "learning_rate": 3.631168597559262e-05, "loss": 0.2622, "step": 243600 }, { "epoch": 1.37, "learning_rate": 3.6306064745330165e-05, "loss": 0.2571, "step": 243700 }, { "epoch": 1.37, "learning_rate": 3.630044351506771e-05, "loss": 0.2564, "step": 243800 }, { "epoch": 1.37, "learning_rate": 3.629482228480525e-05, "loss": 0.2531, "step": 243900 }, { "epoch": 1.37, "learning_rate": 3.62892010545428e-05, "loss": 0.2694, "step": 244000 }, { "epoch": 1.37, "learning_rate": 3.628357982428034e-05, "loss": 0.2712, "step": 244100 }, { "epoch": 1.37, "learning_rate": 3.627795859401789e-05, "loss": 0.2606, "step": 244200 }, { "epoch": 1.37, "learning_rate": 3.627233736375543e-05, "loss": 0.2639, "step": 244300 }, { "epoch": 1.37, "learning_rate": 3.6266716133492975e-05, "loss": 0.257, "step": 244400 }, { "epoch": 1.37, "learning_rate": 3.626109490323052e-05, "loss": 0.2628, "step": 244500 }, { "epoch": 1.37, "learning_rate": 3.625547367296807e-05, "loss": 0.2585, "step": 244600 }, { "epoch": 1.38, "learning_rate": 3.6249852442705614e-05, "loss": 0.2585, "step": 244700 }, { "epoch": 1.38, "learning_rate": 3.624423121244316e-05, "loss": 0.2577, "step": 244800 }, { "epoch": 1.38, "learning_rate": 3.623860998218071e-05, "loss": 0.2573, "step": 244900 }, { "epoch": 1.38, "learning_rate": 3.6232988751918246e-05, "loss": 0.2592, "step": 245000 }, { "epoch": 1.38, "learning_rate": 3.622736752165579e-05, "loss": 0.2628, "step": 245100 }, { "epoch": 1.38, "learning_rate": 3.622174629139334e-05, "loss": 0.2663, "step": 245200 }, { "epoch": 1.38, "learning_rate": 3.621612506113088e-05, "loss": 0.2728, "step": 245300 }, { "epoch": 1.38, "learning_rate": 3.6210503830868424e-05, "loss": 0.2714, "step": 245400 }, { "epoch": 1.38, "learning_rate": 3.620488260060597e-05, "loss": 0.2633, "step": 245500 }, { "epoch": 1.38, "learning_rate": 3.619926137034351e-05, "loss": 0.2677, "step": 245600 }, { "epoch": 1.38, "learning_rate": 3.6193640140081057e-05, "loss": 0.2615, "step": 245700 }, { "epoch": 1.38, "learning_rate": 3.61880189098186e-05, "loss": 0.2643, "step": 245800 }, { "epoch": 1.38, "learning_rate": 3.618239767955615e-05, "loss": 0.267, "step": 245900 }, { "epoch": 1.38, "learning_rate": 3.6176776449293695e-05, "loss": 0.2601, "step": 246000 }, { "epoch": 1.38, "learning_rate": 3.617115521903124e-05, "loss": 0.2664, "step": 246100 }, { "epoch": 1.38, "learning_rate": 3.616553398876879e-05, "loss": 0.265, "step": 246200 }, { "epoch": 1.38, "learning_rate": 3.615991275850633e-05, "loss": 0.2555, "step": 246300 }, { "epoch": 1.39, "learning_rate": 3.6154347740546495e-05, "loss": 0.262, "step": 246400 }, { "epoch": 1.39, "learning_rate": 3.614872651028404e-05, "loss": 0.2652, "step": 246500 }, { "epoch": 1.39, "learning_rate": 3.614310528002159e-05, "loss": 0.2585, "step": 246600 }, { "epoch": 1.39, "learning_rate": 3.613748404975913e-05, "loss": 0.262, "step": 246700 }, { "epoch": 1.39, "learning_rate": 3.613186281949667e-05, "loss": 0.2604, "step": 246800 }, { "epoch": 1.39, "learning_rate": 3.612624158923422e-05, "loss": 0.2559, "step": 246900 }, { "epoch": 1.39, "learning_rate": 3.6120620358971766e-05, "loss": 0.2584, "step": 247000 }, { "epoch": 1.39, "learning_rate": 3.611499912870931e-05, "loss": 0.2578, "step": 247100 }, { "epoch": 1.39, "learning_rate": 3.610937789844686e-05, "loss": 0.2568, "step": 247200 }, { "epoch": 1.39, "learning_rate": 3.6103756668184405e-05, "loss": 0.2638, "step": 247300 }, { "epoch": 1.39, "learning_rate": 3.6098135437921944e-05, "loss": 0.2587, "step": 247400 }, { "epoch": 1.39, "learning_rate": 3.609251420765949e-05, "loss": 0.258, "step": 247500 }, { "epoch": 1.39, "learning_rate": 3.608689297739704e-05, "loss": 0.2598, "step": 247600 }, { "epoch": 1.39, "learning_rate": 3.608127174713458e-05, "loss": 0.2621, "step": 247700 }, { "epoch": 1.39, "learning_rate": 3.607565051687212e-05, "loss": 0.2592, "step": 247800 }, { "epoch": 1.39, "learning_rate": 3.607002928660967e-05, "loss": 0.2585, "step": 247900 }, { "epoch": 1.39, "learning_rate": 3.6064408056347215e-05, "loss": 0.2647, "step": 248000 }, { "epoch": 1.39, "learning_rate": 3.6058786826084755e-05, "loss": 0.2593, "step": 248100 }, { "epoch": 1.4, "learning_rate": 3.60531655958223e-05, "loss": 0.2599, "step": 248200 }, { "epoch": 1.4, "learning_rate": 3.604754436555985e-05, "loss": 0.269, "step": 248300 }, { "epoch": 1.4, "learning_rate": 3.6041923135297393e-05, "loss": 0.259, "step": 248400 }, { "epoch": 1.4, "learning_rate": 3.603630190503493e-05, "loss": 0.2596, "step": 248500 }, { "epoch": 1.4, "learning_rate": 3.603068067477248e-05, "loss": 0.2567, "step": 248600 }, { "epoch": 1.4, "learning_rate": 3.6025059444510026e-05, "loss": 0.2594, "step": 248700 }, { "epoch": 1.4, "learning_rate": 3.601943821424757e-05, "loss": 0.2669, "step": 248800 }, { "epoch": 1.4, "learning_rate": 3.601381698398512e-05, "loss": 0.2732, "step": 248900 }, { "epoch": 1.4, "learning_rate": 3.6008195753722664e-05, "loss": 0.2666, "step": 249000 }, { "epoch": 1.4, "learning_rate": 3.600257452346021e-05, "loss": 0.2607, "step": 249100 }, { "epoch": 1.4, "learning_rate": 3.599695329319775e-05, "loss": 0.2636, "step": 249200 }, { "epoch": 1.4, "learning_rate": 3.5991332062935296e-05, "loss": 0.2628, "step": 249300 }, { "epoch": 1.4, "learning_rate": 3.598571083267284e-05, "loss": 0.2589, "step": 249400 }, { "epoch": 1.4, "learning_rate": 3.598008960241038e-05, "loss": 0.2633, "step": 249500 }, { "epoch": 1.4, "learning_rate": 3.597446837214793e-05, "loss": 0.2577, "step": 249600 }, { "epoch": 1.4, "learning_rate": 3.5968847141885475e-05, "loss": 0.2595, "step": 249700 }, { "epoch": 1.4, "learning_rate": 3.596322591162302e-05, "loss": 0.2609, "step": 249800 }, { "epoch": 1.4, "learning_rate": 3.595760468136056e-05, "loss": 0.2592, "step": 249900 }, { "epoch": 1.41, "learning_rate": 3.595198345109811e-05, "loss": 0.2585, "step": 250000 }, { "epoch": 1.41, "learning_rate": 3.594636222083565e-05, "loss": 0.2531, "step": 250100 }, { "epoch": 1.41, "learning_rate": 3.59407409905732e-05, "loss": 0.2561, "step": 250200 }, { "epoch": 1.41, "learning_rate": 3.5935119760310746e-05, "loss": 0.2609, "step": 250300 }, { "epoch": 1.41, "learning_rate": 3.592949853004829e-05, "loss": 0.2557, "step": 250400 }, { "epoch": 1.41, "learning_rate": 3.592387729978584e-05, "loss": 0.2606, "step": 250500 }, { "epoch": 1.41, "learning_rate": 3.591825606952338e-05, "loss": 0.2686, "step": 250600 }, { "epoch": 1.41, "learning_rate": 3.5912634839260924e-05, "loss": 0.2629, "step": 250700 }, { "epoch": 1.41, "learning_rate": 3.590701360899847e-05, "loss": 0.2605, "step": 250800 }, { "epoch": 1.41, "learning_rate": 3.590139237873601e-05, "loss": 0.2556, "step": 250900 }, { "epoch": 1.41, "learning_rate": 3.5895771148473556e-05, "loss": 0.2656, "step": 251000 }, { "epoch": 1.41, "learning_rate": 3.58901499182111e-05, "loss": 0.2661, "step": 251100 }, { "epoch": 1.41, "learning_rate": 3.588452868794865e-05, "loss": 0.2577, "step": 251200 }, { "epoch": 1.41, "learning_rate": 3.587890745768619e-05, "loss": 0.2598, "step": 251300 }, { "epoch": 1.41, "learning_rate": 3.5873286227423734e-05, "loss": 0.2575, "step": 251400 }, { "epoch": 1.41, "learning_rate": 3.586766499716128e-05, "loss": 0.2641, "step": 251500 }, { "epoch": 1.41, "learning_rate": 3.586204376689882e-05, "loss": 0.2632, "step": 251600 }, { "epoch": 1.41, "learning_rate": 3.5856422536636366e-05, "loss": 0.258, "step": 251700 }, { "epoch": 1.42, "learning_rate": 3.585080130637391e-05, "loss": 0.2643, "step": 251800 }, { "epoch": 1.42, "learning_rate": 3.584518007611146e-05, "loss": 0.2595, "step": 251900 }, { "epoch": 1.42, "learning_rate": 3.5839558845849005e-05, "loss": 0.259, "step": 252000 }, { "epoch": 1.42, "learning_rate": 3.583393761558655e-05, "loss": 0.2546, "step": 252100 }, { "epoch": 1.42, "learning_rate": 3.58283163853241e-05, "loss": 0.2658, "step": 252200 }, { "epoch": 1.42, "learning_rate": 3.582269515506164e-05, "loss": 0.2659, "step": 252300 }, { "epoch": 1.42, "learning_rate": 3.5817130137101805e-05, "loss": 0.2593, "step": 252400 }, { "epoch": 1.42, "learning_rate": 3.581150890683935e-05, "loss": 0.254, "step": 252500 }, { "epoch": 1.42, "learning_rate": 3.58058876765769e-05, "loss": 0.2608, "step": 252600 }, { "epoch": 1.42, "learning_rate": 3.580026644631444e-05, "loss": 0.263, "step": 252700 }, { "epoch": 1.42, "learning_rate": 3.579464521605198e-05, "loss": 0.2632, "step": 252800 }, { "epoch": 1.42, "learning_rate": 3.578902398578953e-05, "loss": 0.2697, "step": 252900 }, { "epoch": 1.42, "learning_rate": 3.5783402755527076e-05, "loss": 0.2601, "step": 253000 }, { "epoch": 1.42, "learning_rate": 3.577778152526462e-05, "loss": 0.2593, "step": 253100 }, { "epoch": 1.42, "learning_rate": 3.577216029500217e-05, "loss": 0.2612, "step": 253200 }, { "epoch": 1.42, "learning_rate": 3.5766539064739715e-05, "loss": 0.2568, "step": 253300 }, { "epoch": 1.42, "learning_rate": 3.5760917834477254e-05, "loss": 0.2555, "step": 253400 }, { "epoch": 1.42, "learning_rate": 3.575535281651742e-05, "loss": 0.2647, "step": 253500 }, { "epoch": 1.43, "learning_rate": 3.574973158625497e-05, "loss": 0.2594, "step": 253600 }, { "epoch": 1.43, "learning_rate": 3.5744110355992514e-05, "loss": 0.2597, "step": 253700 }, { "epoch": 1.43, "learning_rate": 3.5738489125730054e-05, "loss": 0.2639, "step": 253800 }, { "epoch": 1.43, "learning_rate": 3.57328678954676e-05, "loss": 0.2527, "step": 253900 }, { "epoch": 1.43, "learning_rate": 3.5727246665205146e-05, "loss": 0.2603, "step": 254000 }, { "epoch": 1.43, "learning_rate": 3.572162543494269e-05, "loss": 0.2658, "step": 254100 }, { "epoch": 1.43, "learning_rate": 3.571600420468024e-05, "loss": 0.2563, "step": 254200 }, { "epoch": 1.43, "learning_rate": 3.5710382974417785e-05, "loss": 0.2687, "step": 254300 }, { "epoch": 1.43, "learning_rate": 3.570476174415533e-05, "loss": 0.2589, "step": 254400 }, { "epoch": 1.43, "learning_rate": 3.569914051389287e-05, "loss": 0.2582, "step": 254500 }, { "epoch": 1.43, "learning_rate": 3.569351928363042e-05, "loss": 0.2595, "step": 254600 }, { "epoch": 1.43, "learning_rate": 3.5687898053367964e-05, "loss": 0.2596, "step": 254700 }, { "epoch": 1.43, "learning_rate": 3.568227682310551e-05, "loss": 0.2545, "step": 254800 }, { "epoch": 1.43, "learning_rate": 3.567665559284305e-05, "loss": 0.2574, "step": 254900 }, { "epoch": 1.43, "learning_rate": 3.5671034362580596e-05, "loss": 0.2545, "step": 255000 }, { "epoch": 1.43, "learning_rate": 3.566541313231814e-05, "loss": 0.2554, "step": 255100 }, { "epoch": 1.43, "learning_rate": 3.565979190205568e-05, "loss": 0.2544, "step": 255200 }, { "epoch": 1.44, "learning_rate": 3.565417067179323e-05, "loss": 0.2631, "step": 255300 }, { "epoch": 1.44, "learning_rate": 3.5648549441530774e-05, "loss": 0.2672, "step": 255400 }, { "epoch": 1.44, "learning_rate": 3.564292821126832e-05, "loss": 0.2625, "step": 255500 }, { "epoch": 1.44, "learning_rate": 3.5637306981005866e-05, "loss": 0.2583, "step": 255600 }, { "epoch": 1.44, "learning_rate": 3.563168575074341e-05, "loss": 0.2532, "step": 255700 }, { "epoch": 1.44, "learning_rate": 3.562606452048096e-05, "loss": 0.258, "step": 255800 }, { "epoch": 1.44, "learning_rate": 3.56204432902185e-05, "loss": 0.2612, "step": 255900 }, { "epoch": 1.44, "learning_rate": 3.5614822059956045e-05, "loss": 0.2594, "step": 256000 }, { "epoch": 1.44, "learning_rate": 3.560920082969359e-05, "loss": 0.2579, "step": 256100 }, { "epoch": 1.44, "learning_rate": 3.560357959943113e-05, "loss": 0.2599, "step": 256200 }, { "epoch": 1.44, "learning_rate": 3.55980145814713e-05, "loss": 0.2597, "step": 256300 }, { "epoch": 1.44, "learning_rate": 3.5592393351208845e-05, "loss": 0.2614, "step": 256400 }, { "epoch": 1.44, "learning_rate": 3.558677212094639e-05, "loss": 0.2553, "step": 256500 }, { "epoch": 1.44, "learning_rate": 3.558115089068394e-05, "loss": 0.2604, "step": 256600 }, { "epoch": 1.44, "learning_rate": 3.557552966042148e-05, "loss": 0.256, "step": 256700 }, { "epoch": 1.44, "learning_rate": 3.556990843015903e-05, "loss": 0.2585, "step": 256800 }, { "epoch": 1.44, "learning_rate": 3.5564287199896576e-05, "loss": 0.2573, "step": 256900 }, { "epoch": 1.44, "learning_rate": 3.5558665969634115e-05, "loss": 0.2622, "step": 257000 }, { "epoch": 1.45, "learning_rate": 3.555304473937166e-05, "loss": 0.2609, "step": 257100 }, { "epoch": 1.45, "learning_rate": 3.554742350910921e-05, "loss": 0.2576, "step": 257200 }, { "epoch": 1.45, "learning_rate": 3.554180227884675e-05, "loss": 0.262, "step": 257300 }, { "epoch": 1.45, "learning_rate": 3.5536181048584294e-05, "loss": 0.2604, "step": 257400 }, { "epoch": 1.45, "learning_rate": 3.553055981832184e-05, "loss": 0.2576, "step": 257500 }, { "epoch": 1.45, "learning_rate": 3.5524938588059386e-05, "loss": 0.2579, "step": 257600 }, { "epoch": 1.45, "learning_rate": 3.5519317357796926e-05, "loss": 0.2604, "step": 257700 }, { "epoch": 1.45, "learning_rate": 3.551369612753447e-05, "loss": 0.2518, "step": 257800 }, { "epoch": 1.45, "learning_rate": 3.550807489727202e-05, "loss": 0.2555, "step": 257900 }, { "epoch": 1.45, "learning_rate": 3.550245366700956e-05, "loss": 0.2518, "step": 258000 }, { "epoch": 1.45, "learning_rate": 3.5496832436747104e-05, "loss": 0.2563, "step": 258100 }, { "epoch": 1.45, "learning_rate": 3.549121120648465e-05, "loss": 0.2579, "step": 258200 }, { "epoch": 1.45, "learning_rate": 3.54855899762222e-05, "loss": 0.2564, "step": 258300 }, { "epoch": 1.45, "learning_rate": 3.547996874595974e-05, "loss": 0.2618, "step": 258400 }, { "epoch": 1.45, "learning_rate": 3.547434751569729e-05, "loss": 0.2565, "step": 258500 }, { "epoch": 1.45, "learning_rate": 3.546878249773746e-05, "loss": 0.2564, "step": 258600 }, { "epoch": 1.45, "learning_rate": 3.5463161267475e-05, "loss": 0.2595, "step": 258700 }, { "epoch": 1.45, "learning_rate": 3.545754003721254e-05, "loss": 0.261, "step": 258800 }, { "epoch": 1.46, "learning_rate": 3.545191880695009e-05, "loss": 0.2618, "step": 258900 }, { "epoch": 1.46, "learning_rate": 3.5446297576687635e-05, "loss": 0.2552, "step": 259000 }, { "epoch": 1.46, "learning_rate": 3.5440676346425175e-05, "loss": 0.2607, "step": 259100 }, { "epoch": 1.46, "learning_rate": 3.543505511616272e-05, "loss": 0.244, "step": 259200 }, { "epoch": 1.46, "learning_rate": 3.542943388590027e-05, "loss": 0.2654, "step": 259300 }, { "epoch": 1.46, "learning_rate": 3.542381265563782e-05, "loss": 0.2576, "step": 259400 }, { "epoch": 1.46, "learning_rate": 3.541819142537536e-05, "loss": 0.2522, "step": 259500 }, { "epoch": 1.46, "learning_rate": 3.5412570195112906e-05, "loss": 0.2582, "step": 259600 }, { "epoch": 1.46, "learning_rate": 3.540694896485045e-05, "loss": 0.2612, "step": 259700 }, { "epoch": 1.46, "learning_rate": 3.540132773458799e-05, "loss": 0.2568, "step": 259800 }, { "epoch": 1.46, "learning_rate": 3.539570650432554e-05, "loss": 0.2584, "step": 259900 }, { "epoch": 1.46, "learning_rate": 3.5390085274063084e-05, "loss": 0.2572, "step": 260000 }, { "epoch": 1.46, "learning_rate": 3.538446404380063e-05, "loss": 0.2563, "step": 260100 }, { "epoch": 1.46, "learning_rate": 3.537884281353817e-05, "loss": 0.2604, "step": 260200 }, { "epoch": 1.46, "learning_rate": 3.5373221583275716e-05, "loss": 0.253, "step": 260300 }, { "epoch": 1.46, "learning_rate": 3.536760035301326e-05, "loss": 0.2636, "step": 260400 }, { "epoch": 1.46, "learning_rate": 3.53619791227508e-05, "loss": 0.2547, "step": 260500 }, { "epoch": 1.46, "learning_rate": 3.535635789248835e-05, "loss": 0.2626, "step": 260600 }, { "epoch": 1.47, "learning_rate": 3.5350736662225895e-05, "loss": 0.2614, "step": 260700 }, { "epoch": 1.47, "learning_rate": 3.534511543196344e-05, "loss": 0.2568, "step": 260800 }, { "epoch": 1.47, "learning_rate": 3.533949420170099e-05, "loss": 0.2539, "step": 260900 }, { "epoch": 1.47, "learning_rate": 3.5333872971438534e-05, "loss": 0.2501, "step": 261000 }, { "epoch": 1.47, "learning_rate": 3.532825174117608e-05, "loss": 0.2577, "step": 261100 }, { "epoch": 1.47, "learning_rate": 3.532263051091362e-05, "loss": 0.2554, "step": 261200 }, { "epoch": 1.47, "learning_rate": 3.5317009280651166e-05, "loss": 0.2571, "step": 261300 }, { "epoch": 1.47, "learning_rate": 3.531138805038871e-05, "loss": 0.2554, "step": 261400 }, { "epoch": 1.47, "learning_rate": 3.530576682012626e-05, "loss": 0.2577, "step": 261500 }, { "epoch": 1.47, "learning_rate": 3.53001455898638e-05, "loss": 0.2576, "step": 261600 }, { "epoch": 1.47, "learning_rate": 3.5294524359601344e-05, "loss": 0.2491, "step": 261700 }, { "epoch": 1.47, "learning_rate": 3.528890312933889e-05, "loss": 0.2637, "step": 261800 }, { "epoch": 1.47, "learning_rate": 3.528328189907643e-05, "loss": 0.2555, "step": 261900 }, { "epoch": 1.47, "learning_rate": 3.5277660668813976e-05, "loss": 0.2526, "step": 262000 }, { "epoch": 1.47, "learning_rate": 3.527203943855152e-05, "loss": 0.2594, "step": 262100 }, { "epoch": 1.47, "learning_rate": 3.526641820828907e-05, "loss": 0.2555, "step": 262200 }, { "epoch": 1.47, "learning_rate": 3.526079697802661e-05, "loss": 0.2531, "step": 262300 }, { "epoch": 1.48, "learning_rate": 3.5255175747764154e-05, "loss": 0.2608, "step": 262400 }, { "epoch": 1.48, "learning_rate": 3.52495545175017e-05, "loss": 0.2542, "step": 262500 }, { "epoch": 1.48, "learning_rate": 3.524393328723925e-05, "loss": 0.2657, "step": 262600 }, { "epoch": 1.48, "learning_rate": 3.5238368269279415e-05, "loss": 0.2558, "step": 262700 }, { "epoch": 1.48, "learning_rate": 3.523274703901696e-05, "loss": 0.2648, "step": 262800 }, { "epoch": 1.48, "learning_rate": 3.522712580875451e-05, "loss": 0.2605, "step": 262900 }, { "epoch": 1.48, "learning_rate": 3.522150457849205e-05, "loss": 0.2502, "step": 263000 }, { "epoch": 1.48, "learning_rate": 3.521588334822959e-05, "loss": 0.2528, "step": 263100 }, { "epoch": 1.48, "learning_rate": 3.521026211796714e-05, "loss": 0.2573, "step": 263200 }, { "epoch": 1.48, "learning_rate": 3.5204640887704685e-05, "loss": 0.2513, "step": 263300 }, { "epoch": 1.48, "learning_rate": 3.5199019657442225e-05, "loss": 0.2562, "step": 263400 }, { "epoch": 1.48, "learning_rate": 3.519339842717977e-05, "loss": 0.2506, "step": 263500 }, { "epoch": 1.48, "learning_rate": 3.518777719691732e-05, "loss": 0.2542, "step": 263600 }, { "epoch": 1.48, "learning_rate": 3.5182155966654864e-05, "loss": 0.2515, "step": 263700 }, { "epoch": 1.48, "learning_rate": 3.517653473639241e-05, "loss": 0.2627, "step": 263800 }, { "epoch": 1.48, "learning_rate": 3.5170913506129956e-05, "loss": 0.2548, "step": 263900 }, { "epoch": 1.48, "learning_rate": 3.51652922758675e-05, "loss": 0.2553, "step": 264000 }, { "epoch": 1.48, "learning_rate": 3.515967104560504e-05, "loss": 0.2591, "step": 264100 }, { "epoch": 1.49, "learning_rate": 3.515404981534259e-05, "loss": 0.2563, "step": 264200 }, { "epoch": 1.49, "learning_rate": 3.5148428585080135e-05, "loss": 0.2575, "step": 264300 }, { "epoch": 1.49, "learning_rate": 3.5142807354817674e-05, "loss": 0.2534, "step": 264400 }, { "epoch": 1.49, "learning_rate": 3.513718612455522e-05, "loss": 0.2613, "step": 264500 }, { "epoch": 1.49, "learning_rate": 3.513156489429277e-05, "loss": 0.2509, "step": 264600 }, { "epoch": 1.49, "learning_rate": 3.512599987633294e-05, "loss": 0.2547, "step": 264700 }, { "epoch": 1.49, "learning_rate": 3.512037864607048e-05, "loss": 0.2613, "step": 264800 }, { "epoch": 1.49, "learning_rate": 3.511475741580803e-05, "loss": 0.2616, "step": 264900 }, { "epoch": 1.49, "learning_rate": 3.510913618554557e-05, "loss": 0.2528, "step": 265000 }, { "epoch": 1.49, "learning_rate": 3.510351495528311e-05, "loss": 0.2598, "step": 265100 }, { "epoch": 1.49, "learning_rate": 3.509789372502066e-05, "loss": 0.2557, "step": 265200 }, { "epoch": 1.49, "learning_rate": 3.5092272494758205e-05, "loss": 0.2582, "step": 265300 }, { "epoch": 1.49, "learning_rate": 3.508665126449575e-05, "loss": 0.2499, "step": 265400 }, { "epoch": 1.49, "learning_rate": 3.508103003423329e-05, "loss": 0.2501, "step": 265500 }, { "epoch": 1.49, "learning_rate": 3.507540880397084e-05, "loss": 0.2542, "step": 265600 }, { "epoch": 1.49, "learning_rate": 3.5069787573708384e-05, "loss": 0.2572, "step": 265700 }, { "epoch": 1.49, "learning_rate": 3.506416634344592e-05, "loss": 0.2547, "step": 265800 }, { "epoch": 1.49, "learning_rate": 3.505854511318347e-05, "loss": 0.2533, "step": 265900 }, { "epoch": 1.5, "learning_rate": 3.5052923882921016e-05, "loss": 0.2534, "step": 266000 }, { "epoch": 1.5, "learning_rate": 3.504730265265856e-05, "loss": 0.2545, "step": 266100 }, { "epoch": 1.5, "learning_rate": 3.504168142239611e-05, "loss": 0.2531, "step": 266200 }, { "epoch": 1.5, "learning_rate": 3.5036060192133654e-05, "loss": 0.2562, "step": 266300 }, { "epoch": 1.5, "learning_rate": 3.50304389618712e-05, "loss": 0.2537, "step": 266400 }, { "epoch": 1.5, "learning_rate": 3.502481773160874e-05, "loss": 0.2553, "step": 266500 }, { "epoch": 1.5, "learning_rate": 3.5019196501346287e-05, "loss": 0.2505, "step": 266600 }, { "epoch": 1.5, "learning_rate": 3.501357527108383e-05, "loss": 0.2583, "step": 266700 }, { "epoch": 1.5, "learning_rate": 3.500795404082138e-05, "loss": 0.2475, "step": 266800 }, { "epoch": 1.5, "learning_rate": 3.500238902286154e-05, "loss": 0.2632, "step": 266900 }, { "epoch": 1.5, "learning_rate": 3.4996767792599086e-05, "loss": 0.259, "step": 267000 }, { "epoch": 1.5, "learning_rate": 3.499114656233663e-05, "loss": 0.2537, "step": 267100 }, { "epoch": 1.5, "learning_rate": 3.498552533207418e-05, "loss": 0.2529, "step": 267200 }, { "epoch": 1.5, "learning_rate": 3.4979904101811725e-05, "loss": 0.2503, "step": 267300 }, { "epoch": 1.5, "learning_rate": 3.497428287154927e-05, "loss": 0.2625, "step": 267400 }, { "epoch": 1.5, "learning_rate": 3.496871785358944e-05, "loss": 0.25, "step": 267500 }, { "epoch": 1.5, "learning_rate": 3.4963096623326985e-05, "loss": 0.2485, "step": 267600 }, { "epoch": 1.5, "learning_rate": 3.4957475393064525e-05, "loss": 0.2481, "step": 267700 }, { "epoch": 1.51, "learning_rate": 3.495185416280207e-05, "loss": 0.2568, "step": 267800 }, { "epoch": 1.51, "learning_rate": 3.494623293253962e-05, "loss": 0.2549, "step": 267900 }, { "epoch": 1.51, "learning_rate": 3.494061170227716e-05, "loss": 0.2516, "step": 268000 }, { "epoch": 1.51, "learning_rate": 3.49349904720147e-05, "loss": 0.2646, "step": 268100 }, { "epoch": 1.51, "learning_rate": 3.492936924175225e-05, "loss": 0.2534, "step": 268200 }, { "epoch": 1.51, "learning_rate": 3.4923748011489796e-05, "loss": 0.2581, "step": 268300 }, { "epoch": 1.51, "learning_rate": 3.491812678122734e-05, "loss": 0.2553, "step": 268400 }, { "epoch": 1.51, "learning_rate": 3.491250555096489e-05, "loss": 0.2585, "step": 268500 }, { "epoch": 1.51, "learning_rate": 3.4906884320702434e-05, "loss": 0.2578, "step": 268600 }, { "epoch": 1.51, "learning_rate": 3.4901263090439974e-05, "loss": 0.2521, "step": 268700 }, { "epoch": 1.51, "learning_rate": 3.489564186017752e-05, "loss": 0.253, "step": 268800 }, { "epoch": 1.51, "learning_rate": 3.4890020629915067e-05, "loss": 0.2537, "step": 268900 }, { "epoch": 1.51, "learning_rate": 3.488439939965261e-05, "loss": 0.2533, "step": 269000 }, { "epoch": 1.51, "learning_rate": 3.487877816939015e-05, "loss": 0.2554, "step": 269100 }, { "epoch": 1.51, "learning_rate": 3.48731569391277e-05, "loss": 0.2612, "step": 269200 }, { "epoch": 1.51, "learning_rate": 3.4867535708865245e-05, "loss": 0.2588, "step": 269300 }, { "epoch": 1.51, "learning_rate": 3.4861914478602784e-05, "loss": 0.2542, "step": 269400 }, { "epoch": 1.51, "learning_rate": 3.485629324834033e-05, "loss": 0.2548, "step": 269500 }, { "epoch": 1.52, "learning_rate": 3.485067201807788e-05, "loss": 0.26, "step": 269600 }, { "epoch": 1.52, "learning_rate": 3.484505078781542e-05, "loss": 0.2551, "step": 269700 }, { "epoch": 1.52, "learning_rate": 3.483942955755297e-05, "loss": 0.2541, "step": 269800 }, { "epoch": 1.52, "learning_rate": 3.483386453959314e-05, "loss": 0.2581, "step": 269900 }, { "epoch": 1.52, "learning_rate": 3.482824330933068e-05, "loss": 0.2539, "step": 270000 }, { "epoch": 1.52, "learning_rate": 3.482262207906823e-05, "loss": 0.2528, "step": 270100 }, { "epoch": 1.52, "learning_rate": 3.481700084880577e-05, "loss": 0.2546, "step": 270200 }, { "epoch": 1.52, "learning_rate": 3.4811379618543315e-05, "loss": 0.252, "step": 270300 }, { "epoch": 1.52, "learning_rate": 3.480575838828086e-05, "loss": 0.2559, "step": 270400 }, { "epoch": 1.52, "learning_rate": 3.48001371580184e-05, "loss": 0.2595, "step": 270500 }, { "epoch": 1.52, "learning_rate": 3.479451592775595e-05, "loss": 0.2555, "step": 270600 }, { "epoch": 1.52, "learning_rate": 3.4788894697493494e-05, "loss": 0.2518, "step": 270700 }, { "epoch": 1.52, "learning_rate": 3.478327346723104e-05, "loss": 0.258, "step": 270800 }, { "epoch": 1.52, "learning_rate": 3.4777652236968586e-05, "loss": 0.2481, "step": 270900 }, { "epoch": 1.52, "learning_rate": 3.477203100670613e-05, "loss": 0.2579, "step": 271000 }, { "epoch": 1.52, "learning_rate": 3.476640977644368e-05, "loss": 0.2572, "step": 271100 }, { "epoch": 1.52, "learning_rate": 3.476078854618122e-05, "loss": 0.2547, "step": 271200 }, { "epoch": 1.53, "learning_rate": 3.4755167315918765e-05, "loss": 0.2521, "step": 271300 }, { "epoch": 1.53, "learning_rate": 3.474954608565631e-05, "loss": 0.2507, "step": 271400 }, { "epoch": 1.53, "learning_rate": 3.474392485539386e-05, "loss": 0.2595, "step": 271500 }, { "epoch": 1.53, "learning_rate": 3.47383036251314e-05, "loss": 0.2581, "step": 271600 }, { "epoch": 1.53, "learning_rate": 3.473268239486894e-05, "loss": 0.2592, "step": 271700 }, { "epoch": 1.53, "learning_rate": 3.472706116460649e-05, "loss": 0.2488, "step": 271800 }, { "epoch": 1.53, "learning_rate": 3.472143993434403e-05, "loss": 0.2593, "step": 271900 }, { "epoch": 1.53, "learning_rate": 3.4715818704081575e-05, "loss": 0.2521, "step": 272000 }, { "epoch": 1.53, "learning_rate": 3.471019747381912e-05, "loss": 0.251, "step": 272100 }, { "epoch": 1.53, "learning_rate": 3.470457624355667e-05, "loss": 0.2547, "step": 272200 }, { "epoch": 1.53, "learning_rate": 3.469895501329421e-05, "loss": 0.256, "step": 272300 }, { "epoch": 1.53, "learning_rate": 3.469338999533438e-05, "loss": 0.2573, "step": 272400 }, { "epoch": 1.53, "learning_rate": 3.468776876507193e-05, "loss": 0.2537, "step": 272500 }, { "epoch": 1.53, "learning_rate": 3.4682147534809474e-05, "loss": 0.2543, "step": 272600 }, { "epoch": 1.53, "learning_rate": 3.4676526304547014e-05, "loss": 0.2564, "step": 272700 }, { "epoch": 1.53, "learning_rate": 3.467090507428456e-05, "loss": 0.2505, "step": 272800 }, { "epoch": 1.53, "learning_rate": 3.4665283844022106e-05, "loss": 0.2566, "step": 272900 }, { "epoch": 1.53, "learning_rate": 3.4659662613759646e-05, "loss": 0.2573, "step": 273000 }, { "epoch": 1.54, "learning_rate": 3.465404138349719e-05, "loss": 0.2546, "step": 273100 }, { "epoch": 1.54, "learning_rate": 3.464842015323474e-05, "loss": 0.2566, "step": 273200 }, { "epoch": 1.54, "learning_rate": 3.4642798922972284e-05, "loss": 0.2564, "step": 273300 }, { "epoch": 1.54, "learning_rate": 3.4637177692709824e-05, "loss": 0.2545, "step": 273400 }, { "epoch": 1.54, "learning_rate": 3.463155646244737e-05, "loss": 0.2593, "step": 273500 }, { "epoch": 1.54, "learning_rate": 3.4625935232184916e-05, "loss": 0.2529, "step": 273600 }, { "epoch": 1.54, "learning_rate": 3.462031400192246e-05, "loss": 0.2524, "step": 273700 }, { "epoch": 1.54, "learning_rate": 3.461469277166001e-05, "loss": 0.2533, "step": 273800 }, { "epoch": 1.54, "learning_rate": 3.4609071541397555e-05, "loss": 0.2545, "step": 273900 }, { "epoch": 1.54, "learning_rate": 3.46034503111351e-05, "loss": 0.2456, "step": 274000 }, { "epoch": 1.54, "learning_rate": 3.459782908087264e-05, "loss": 0.2489, "step": 274100 }, { "epoch": 1.54, "learning_rate": 3.459220785061019e-05, "loss": 0.2513, "step": 274200 }, { "epoch": 1.54, "learning_rate": 3.4586586620347734e-05, "loss": 0.2594, "step": 274300 }, { "epoch": 1.54, "learning_rate": 3.458096539008527e-05, "loss": 0.2549, "step": 274400 }, { "epoch": 1.54, "learning_rate": 3.457534415982282e-05, "loss": 0.246, "step": 274500 }, { "epoch": 1.54, "learning_rate": 3.4569722929560366e-05, "loss": 0.2474, "step": 274600 }, { "epoch": 1.54, "learning_rate": 3.456410169929791e-05, "loss": 0.2515, "step": 274700 }, { "epoch": 1.54, "learning_rate": 3.455848046903545e-05, "loss": 0.2555, "step": 274800 }, { "epoch": 1.55, "learning_rate": 3.4552859238773e-05, "loss": 0.2469, "step": 274900 }, { "epoch": 1.55, "learning_rate": 3.454729422081317e-05, "loss": 0.2455, "step": 275000 }, { "epoch": 1.55, "learning_rate": 3.454167299055071e-05, "loss": 0.251, "step": 275100 }, { "epoch": 1.55, "learning_rate": 3.453605176028826e-05, "loss": 0.2529, "step": 275200 }, { "epoch": 1.55, "learning_rate": 3.4530430530025804e-05, "loss": 0.2507, "step": 275300 }, { "epoch": 1.55, "learning_rate": 3.452480929976335e-05, "loss": 0.2517, "step": 275400 }, { "epoch": 1.55, "learning_rate": 3.451918806950089e-05, "loss": 0.2537, "step": 275500 }, { "epoch": 1.55, "learning_rate": 3.4513566839238436e-05, "loss": 0.2551, "step": 275600 }, { "epoch": 1.55, "learning_rate": 3.450794560897598e-05, "loss": 0.2583, "step": 275700 }, { "epoch": 1.55, "learning_rate": 3.450232437871352e-05, "loss": 0.2419, "step": 275800 }, { "epoch": 1.55, "learning_rate": 3.449670314845107e-05, "loss": 0.2598, "step": 275900 }, { "epoch": 1.55, "learning_rate": 3.4491081918188615e-05, "loss": 0.2509, "step": 276000 }, { "epoch": 1.55, "learning_rate": 3.448546068792616e-05, "loss": 0.2509, "step": 276100 }, { "epoch": 1.55, "learning_rate": 3.447983945766371e-05, "loss": 0.2493, "step": 276200 }, { "epoch": 1.55, "learning_rate": 3.4474218227401253e-05, "loss": 0.2522, "step": 276300 }, { "epoch": 1.55, "learning_rate": 3.44685969971388e-05, "loss": 0.2544, "step": 276400 }, { "epoch": 1.55, "learning_rate": 3.446297576687634e-05, "loss": 0.2543, "step": 276500 }, { "epoch": 1.55, "learning_rate": 3.4457354536613885e-05, "loss": 0.2535, "step": 276600 }, { "epoch": 1.56, "learning_rate": 3.445173330635143e-05, "loss": 0.2554, "step": 276700 }, { "epoch": 1.56, "learning_rate": 3.444611207608898e-05, "loss": 0.2542, "step": 276800 }, { "epoch": 1.56, "learning_rate": 3.444049084582652e-05, "loss": 0.2469, "step": 276900 }, { "epoch": 1.56, "learning_rate": 3.4434869615564064e-05, "loss": 0.2468, "step": 277000 }, { "epoch": 1.56, "learning_rate": 3.442924838530161e-05, "loss": 0.2531, "step": 277100 }, { "epoch": 1.56, "learning_rate": 3.442362715503915e-05, "loss": 0.2531, "step": 277200 }, { "epoch": 1.56, "learning_rate": 3.4418005924776696e-05, "loss": 0.2528, "step": 277300 }, { "epoch": 1.56, "learning_rate": 3.441238469451424e-05, "loss": 0.2501, "step": 277400 }, { "epoch": 1.56, "learning_rate": 3.440676346425179e-05, "loss": 0.2585, "step": 277500 }, { "epoch": 1.56, "learning_rate": 3.440114223398933e-05, "loss": 0.2611, "step": 277600 }, { "epoch": 1.56, "learning_rate": 3.4395521003726874e-05, "loss": 0.2515, "step": 277700 }, { "epoch": 1.56, "learning_rate": 3.438989977346442e-05, "loss": 0.2523, "step": 277800 }, { "epoch": 1.56, "learning_rate": 3.438427854320197e-05, "loss": 0.2522, "step": 277900 }, { "epoch": 1.56, "learning_rate": 3.437865731293951e-05, "loss": 0.261, "step": 278000 }, { "epoch": 1.56, "learning_rate": 3.437303608267706e-05, "loss": 0.2558, "step": 278100 }, { "epoch": 1.56, "learning_rate": 3.4367414852414606e-05, "loss": 0.2533, "step": 278200 }, { "epoch": 1.56, "learning_rate": 3.4361793622152145e-05, "loss": 0.252, "step": 278300 }, { "epoch": 1.56, "learning_rate": 3.435617239188969e-05, "loss": 0.2537, "step": 278400 }, { "epoch": 1.57, "learning_rate": 3.435055116162724e-05, "loss": 0.2431, "step": 278500 }, { "epoch": 1.57, "learning_rate": 3.434492993136478e-05, "loss": 0.2536, "step": 278600 }, { "epoch": 1.57, "learning_rate": 3.4339308701102323e-05, "loss": 0.2533, "step": 278700 }, { "epoch": 1.57, "learning_rate": 3.433368747083987e-05, "loss": 0.2483, "step": 278800 }, { "epoch": 1.57, "learning_rate": 3.4328066240577416e-05, "loss": 0.246, "step": 278900 }, { "epoch": 1.57, "learning_rate": 3.4322445010314955e-05, "loss": 0.251, "step": 279000 }, { "epoch": 1.57, "learning_rate": 3.431687999235513e-05, "loss": 0.2589, "step": 279100 }, { "epoch": 1.57, "learning_rate": 3.4311258762092676e-05, "loss": 0.2535, "step": 279200 }, { "epoch": 1.57, "learning_rate": 3.430563753183022e-05, "loss": 0.2511, "step": 279300 }, { "epoch": 1.57, "learning_rate": 3.430001630156776e-05, "loss": 0.2524, "step": 279400 }, { "epoch": 1.57, "learning_rate": 3.429445128360793e-05, "loss": 0.2574, "step": 279500 }, { "epoch": 1.57, "learning_rate": 3.4288830053345476e-05, "loss": 0.2495, "step": 279600 }, { "epoch": 1.57, "learning_rate": 3.428320882308302e-05, "loss": 0.2504, "step": 279700 }, { "epoch": 1.57, "learning_rate": 3.427758759282056e-05, "loss": 0.2474, "step": 279800 }, { "epoch": 1.57, "learning_rate": 3.427196636255811e-05, "loss": 0.2488, "step": 279900 }, { "epoch": 1.57, "learning_rate": 3.4266345132295654e-05, "loss": 0.2512, "step": 280000 }, { "epoch": 1.57, "learning_rate": 3.42607239020332e-05, "loss": 0.247, "step": 280100 }, { "epoch": 1.58, "learning_rate": 3.425510267177075e-05, "loss": 0.2563, "step": 280200 }, { "epoch": 1.58, "learning_rate": 3.424948144150829e-05, "loss": 0.2486, "step": 280300 }, { "epoch": 1.58, "learning_rate": 3.424386021124584e-05, "loss": 0.2551, "step": 280400 }, { "epoch": 1.58, "learning_rate": 3.423823898098338e-05, "loss": 0.2454, "step": 280500 }, { "epoch": 1.58, "learning_rate": 3.4232617750720925e-05, "loss": 0.2527, "step": 280600 }, { "epoch": 1.58, "learning_rate": 3.422699652045847e-05, "loss": 0.2517, "step": 280700 }, { "epoch": 1.58, "learning_rate": 3.422137529019601e-05, "loss": 0.2512, "step": 280800 }, { "epoch": 1.58, "learning_rate": 3.421575405993356e-05, "loss": 0.2536, "step": 280900 }, { "epoch": 1.58, "learning_rate": 3.4210132829671103e-05, "loss": 0.2534, "step": 281000 }, { "epoch": 1.58, "learning_rate": 3.420451159940865e-05, "loss": 0.245, "step": 281100 }, { "epoch": 1.58, "learning_rate": 3.419889036914619e-05, "loss": 0.2566, "step": 281200 }, { "epoch": 1.58, "learning_rate": 3.4193269138883735e-05, "loss": 0.2541, "step": 281300 }, { "epoch": 1.58, "learning_rate": 3.418764790862128e-05, "loss": 0.2521, "step": 281400 }, { "epoch": 1.58, "learning_rate": 3.418202667835883e-05, "loss": 0.2551, "step": 281500 }, { "epoch": 1.58, "learning_rate": 3.4176405448096374e-05, "loss": 0.2566, "step": 281600 }, { "epoch": 1.58, "learning_rate": 3.417084043013654e-05, "loss": 0.2462, "step": 281700 }, { "epoch": 1.58, "learning_rate": 3.416521919987409e-05, "loss": 0.2473, "step": 281800 }, { "epoch": 1.58, "learning_rate": 3.415959796961163e-05, "loss": 0.2504, "step": 281900 }, { "epoch": 1.59, "learning_rate": 3.4153976739349174e-05, "loss": 0.2494, "step": 282000 }, { "epoch": 1.59, "learning_rate": 3.414835550908672e-05, "loss": 0.2584, "step": 282100 }, { "epoch": 1.59, "learning_rate": 3.4142734278824267e-05, "loss": 0.2493, "step": 282200 }, { "epoch": 1.59, "learning_rate": 3.4137113048561806e-05, "loss": 0.2424, "step": 282300 }, { "epoch": 1.59, "learning_rate": 3.413149181829935e-05, "loss": 0.2505, "step": 282400 }, { "epoch": 1.59, "learning_rate": 3.41258705880369e-05, "loss": 0.25, "step": 282500 }, { "epoch": 1.59, "learning_rate": 3.4120249357774445e-05, "loss": 0.2638, "step": 282600 }, { "epoch": 1.59, "learning_rate": 3.411462812751199e-05, "loss": 0.2529, "step": 282700 }, { "epoch": 1.59, "learning_rate": 3.410900689724954e-05, "loss": 0.2542, "step": 282800 }, { "epoch": 1.59, "learning_rate": 3.4103385666987084e-05, "loss": 0.2529, "step": 282900 }, { "epoch": 1.59, "learning_rate": 3.409776443672462e-05, "loss": 0.2485, "step": 283000 }, { "epoch": 1.59, "learning_rate": 3.409214320646217e-05, "loss": 0.2538, "step": 283100 }, { "epoch": 1.59, "learning_rate": 3.4086521976199716e-05, "loss": 0.2542, "step": 283200 }, { "epoch": 1.59, "learning_rate": 3.4080900745937255e-05, "loss": 0.2448, "step": 283300 }, { "epoch": 1.59, "learning_rate": 3.40752795156748e-05, "loss": 0.2436, "step": 283400 }, { "epoch": 1.59, "learning_rate": 3.406965828541235e-05, "loss": 0.251, "step": 283500 }, { "epoch": 1.59, "learning_rate": 3.4064037055149894e-05, "loss": 0.2503, "step": 283600 }, { "epoch": 1.59, "learning_rate": 3.4058415824887434e-05, "loss": 0.2501, "step": 283700 }, { "epoch": 1.6, "learning_rate": 3.405279459462498e-05, "loss": 0.2515, "step": 283800 }, { "epoch": 1.6, "learning_rate": 3.4047173364362526e-05, "loss": 0.2487, "step": 283900 }, { "epoch": 1.6, "learning_rate": 3.4041552134100066e-05, "loss": 0.2582, "step": 284000 }, { "epoch": 1.6, "learning_rate": 3.403593090383761e-05, "loss": 0.2542, "step": 284100 }, { "epoch": 1.6, "learning_rate": 3.403030967357516e-05, "loss": 0.2492, "step": 284200 }, { "epoch": 1.6, "learning_rate": 3.4024688443312704e-05, "loss": 0.2503, "step": 284300 }, { "epoch": 1.6, "learning_rate": 3.401906721305025e-05, "loss": 0.2515, "step": 284400 }, { "epoch": 1.6, "learning_rate": 3.40134459827878e-05, "loss": 0.2492, "step": 284500 }, { "epoch": 1.6, "learning_rate": 3.400782475252534e-05, "loss": 0.2438, "step": 284600 }, { "epoch": 1.6, "learning_rate": 3.400220352226288e-05, "loss": 0.2495, "step": 284700 }, { "epoch": 1.6, "learning_rate": 3.399658229200043e-05, "loss": 0.2471, "step": 284800 }, { "epoch": 1.6, "learning_rate": 3.3990961061737975e-05, "loss": 0.2485, "step": 284900 }, { "epoch": 1.6, "learning_rate": 3.3985339831475515e-05, "loss": 0.2418, "step": 285000 }, { "epoch": 1.6, "learning_rate": 3.397971860121306e-05, "loss": 0.2456, "step": 285100 }, { "epoch": 1.6, "learning_rate": 3.397409737095061e-05, "loss": 0.2537, "step": 285200 }, { "epoch": 1.6, "learning_rate": 3.3968476140688154e-05, "loss": 0.2545, "step": 285300 }, { "epoch": 1.6, "learning_rate": 3.396291112272832e-05, "loss": 0.2486, "step": 285400 }, { "epoch": 1.6, "learning_rate": 3.395728989246587e-05, "loss": 0.2586, "step": 285500 }, { "epoch": 1.61, "learning_rate": 3.3951668662203414e-05, "loss": 0.2582, "step": 285600 }, { "epoch": 1.61, "learning_rate": 3.394604743194096e-05, "loss": 0.2526, "step": 285700 }, { "epoch": 1.61, "learning_rate": 3.39404262016785e-05, "loss": 0.2537, "step": 285800 }, { "epoch": 1.61, "learning_rate": 3.3934804971416046e-05, "loss": 0.255, "step": 285900 }, { "epoch": 1.61, "learning_rate": 3.392918374115359e-05, "loss": 0.254, "step": 286000 }, { "epoch": 1.61, "learning_rate": 3.392356251089113e-05, "loss": 0.249, "step": 286100 }, { "epoch": 1.61, "learning_rate": 3.391794128062868e-05, "loss": 0.2498, "step": 286200 }, { "epoch": 1.61, "learning_rate": 3.3912320050366224e-05, "loss": 0.254, "step": 286300 }, { "epoch": 1.61, "learning_rate": 3.390669882010377e-05, "loss": 0.2542, "step": 286400 }, { "epoch": 1.61, "learning_rate": 3.390107758984131e-05, "loss": 0.257, "step": 286500 }, { "epoch": 1.61, "learning_rate": 3.3895456359578856e-05, "loss": 0.2501, "step": 286600 }, { "epoch": 1.61, "learning_rate": 3.38898351293164e-05, "loss": 0.2447, "step": 286700 }, { "epoch": 1.61, "learning_rate": 3.388421389905395e-05, "loss": 0.254, "step": 286800 }, { "epoch": 1.61, "learning_rate": 3.3878592668791495e-05, "loss": 0.2475, "step": 286900 }, { "epoch": 1.61, "learning_rate": 3.387297143852904e-05, "loss": 0.2498, "step": 287000 }, { "epoch": 1.61, "learning_rate": 3.386735020826659e-05, "loss": 0.2546, "step": 287100 }, { "epoch": 1.61, "learning_rate": 3.386172897800413e-05, "loss": 0.2472, "step": 287200 }, { "epoch": 1.61, "learning_rate": 3.3856107747741673e-05, "loss": 0.2479, "step": 287300 }, { "epoch": 1.62, "learning_rate": 3.385048651747922e-05, "loss": 0.2477, "step": 287400 }, { "epoch": 1.62, "learning_rate": 3.384486528721676e-05, "loss": 0.2627, "step": 287500 }, { "epoch": 1.62, "learning_rate": 3.3839244056954306e-05, "loss": 0.2557, "step": 287600 }, { "epoch": 1.62, "learning_rate": 3.383362282669185e-05, "loss": 0.2448, "step": 287700 }, { "epoch": 1.62, "learning_rate": 3.38280015964294e-05, "loss": 0.2518, "step": 287800 }, { "epoch": 1.62, "learning_rate": 3.382238036616694e-05, "loss": 0.2528, "step": 287900 }, { "epoch": 1.62, "learning_rate": 3.3816759135904484e-05, "loss": 0.2455, "step": 288000 }, { "epoch": 1.62, "learning_rate": 3.381113790564203e-05, "loss": 0.2587, "step": 288100 }, { "epoch": 1.62, "learning_rate": 3.380551667537957e-05, "loss": 0.2499, "step": 288200 }, { "epoch": 1.62, "learning_rate": 3.3799895445117116e-05, "loss": 0.2543, "step": 288300 }, { "epoch": 1.62, "learning_rate": 3.379427421485466e-05, "loss": 0.2518, "step": 288400 }, { "epoch": 1.62, "learning_rate": 3.378865298459221e-05, "loss": 0.2505, "step": 288500 }, { "epoch": 1.62, "learning_rate": 3.3783031754329755e-05, "loss": 0.243, "step": 288600 }, { "epoch": 1.62, "learning_rate": 3.37774105240673e-05, "loss": 0.2497, "step": 288700 }, { "epoch": 1.62, "learning_rate": 3.377178929380485e-05, "loss": 0.2584, "step": 288800 }, { "epoch": 1.62, "learning_rate": 3.376616806354239e-05, "loss": 0.2473, "step": 288900 }, { "epoch": 1.62, "learning_rate": 3.376054683327993e-05, "loss": 0.2592, "step": 289000 }, { "epoch": 1.63, "learning_rate": 3.37549818153201e-05, "loss": 0.2522, "step": 289100 }, { "epoch": 1.63, "learning_rate": 3.374936058505765e-05, "loss": 0.251, "step": 289200 }, { "epoch": 1.63, "learning_rate": 3.3743739354795186e-05, "loss": 0.2462, "step": 289300 }, { "epoch": 1.63, "learning_rate": 3.373811812453273e-05, "loss": 0.2471, "step": 289400 }, { "epoch": 1.63, "learning_rate": 3.373249689427028e-05, "loss": 0.2498, "step": 289500 }, { "epoch": 1.63, "learning_rate": 3.372687566400783e-05, "loss": 0.2522, "step": 289600 }, { "epoch": 1.63, "learning_rate": 3.372125443374537e-05, "loss": 0.2574, "step": 289700 }, { "epoch": 1.63, "learning_rate": 3.371563320348292e-05, "loss": 0.2515, "step": 289800 }, { "epoch": 1.63, "learning_rate": 3.3710011973220464e-05, "loss": 0.2494, "step": 289900 }, { "epoch": 1.63, "learning_rate": 3.3704390742958004e-05, "loss": 0.2488, "step": 290000 }, { "epoch": 1.63, "learning_rate": 3.369876951269555e-05, "loss": 0.2447, "step": 290100 }, { "epoch": 1.63, "learning_rate": 3.369320449473572e-05, "loss": 0.2458, "step": 290200 }, { "epoch": 1.63, "learning_rate": 3.3687583264473264e-05, "loss": 0.2512, "step": 290300 }, { "epoch": 1.63, "learning_rate": 3.36819620342108e-05, "loss": 0.2434, "step": 290400 }, { "epoch": 1.63, "learning_rate": 3.3676340803948356e-05, "loss": 0.2471, "step": 290500 }, { "epoch": 1.63, "learning_rate": 3.36707195736859e-05, "loss": 0.2452, "step": 290600 }, { "epoch": 1.63, "learning_rate": 3.366509834342345e-05, "loss": 0.2477, "step": 290700 }, { "epoch": 1.63, "learning_rate": 3.365947711316099e-05, "loss": 0.2558, "step": 290800 }, { "epoch": 1.64, "learning_rate": 3.3653855882898535e-05, "loss": 0.2459, "step": 290900 }, { "epoch": 1.64, "learning_rate": 3.364823465263608e-05, "loss": 0.2487, "step": 291000 }, { "epoch": 1.64, "learning_rate": 3.364261342237362e-05, "loss": 0.2438, "step": 291100 }, { "epoch": 1.64, "learning_rate": 3.363699219211117e-05, "loss": 0.2484, "step": 291200 }, { "epoch": 1.64, "learning_rate": 3.363137096184871e-05, "loss": 0.2433, "step": 291300 }, { "epoch": 1.64, "learning_rate": 3.362574973158626e-05, "loss": 0.245, "step": 291400 }, { "epoch": 1.64, "learning_rate": 3.36201285013238e-05, "loss": 0.2446, "step": 291500 }, { "epoch": 1.64, "learning_rate": 3.3614507271061345e-05, "loss": 0.2444, "step": 291600 }, { "epoch": 1.64, "learning_rate": 3.360888604079889e-05, "loss": 0.2469, "step": 291700 }, { "epoch": 1.64, "learning_rate": 3.360326481053643e-05, "loss": 0.243, "step": 291800 }, { "epoch": 1.64, "learning_rate": 3.359764358027398e-05, "loss": 0.2506, "step": 291900 }, { "epoch": 1.64, "learning_rate": 3.3592022350011523e-05, "loss": 0.2442, "step": 292000 }, { "epoch": 1.64, "learning_rate": 3.358640111974907e-05, "loss": 0.2529, "step": 292100 }, { "epoch": 1.64, "learning_rate": 3.3580779889486616e-05, "loss": 0.258, "step": 292200 }, { "epoch": 1.64, "learning_rate": 3.357515865922416e-05, "loss": 0.2493, "step": 292300 }, { "epoch": 1.64, "learning_rate": 3.356953742896171e-05, "loss": 0.2481, "step": 292400 }, { "epoch": 1.64, "learning_rate": 3.356391619869925e-05, "loss": 0.2578, "step": 292500 }, { "epoch": 1.64, "learning_rate": 3.3558294968436794e-05, "loss": 0.2487, "step": 292600 }, { "epoch": 1.65, "learning_rate": 3.355267373817434e-05, "loss": 0.2526, "step": 292700 }, { "epoch": 1.65, "learning_rate": 3.354705250791189e-05, "loss": 0.2391, "step": 292800 }, { "epoch": 1.65, "learning_rate": 3.3541431277649426e-05, "loss": 0.2504, "step": 292900 }, { "epoch": 1.65, "learning_rate": 3.353581004738697e-05, "loss": 0.2453, "step": 293000 }, { "epoch": 1.65, "learning_rate": 3.353018881712452e-05, "loss": 0.2495, "step": 293100 }, { "epoch": 1.65, "learning_rate": 3.352456758686206e-05, "loss": 0.2492, "step": 293200 }, { "epoch": 1.65, "learning_rate": 3.3518946356599605e-05, "loss": 0.2564, "step": 293300 }, { "epoch": 1.65, "learning_rate": 3.351332512633715e-05, "loss": 0.2564, "step": 293400 }, { "epoch": 1.65, "learning_rate": 3.35077038960747e-05, "loss": 0.2465, "step": 293500 }, { "epoch": 1.65, "learning_rate": 3.350208266581224e-05, "loss": 0.2489, "step": 293600 }, { "epoch": 1.65, "learning_rate": 3.349646143554978e-05, "loss": 0.2502, "step": 293700 }, { "epoch": 1.65, "learning_rate": 3.349084020528733e-05, "loss": 0.2515, "step": 293800 }, { "epoch": 1.65, "learning_rate": 3.3485218975024876e-05, "loss": 0.2478, "step": 293900 }, { "epoch": 1.65, "learning_rate": 3.347959774476242e-05, "loss": 0.2515, "step": 294000 }, { "epoch": 1.65, "learning_rate": 3.347397651449997e-05, "loss": 0.2414, "step": 294100 }, { "epoch": 1.65, "learning_rate": 3.3468355284237514e-05, "loss": 0.2411, "step": 294200 }, { "epoch": 1.65, "learning_rate": 3.3462734053975054e-05, "loss": 0.2452, "step": 294300 }, { "epoch": 1.65, "learning_rate": 3.34571128237126e-05, "loss": 0.2463, "step": 294400 }, { "epoch": 1.66, "learning_rate": 3.345154780575277e-05, "loss": 0.2488, "step": 294500 }, { "epoch": 1.66, "learning_rate": 3.3445926575490314e-05, "loss": 0.2459, "step": 294600 }, { "epoch": 1.66, "learning_rate": 3.3440305345227854e-05, "loss": 0.253, "step": 294700 }, { "epoch": 1.66, "learning_rate": 3.343468411496541e-05, "loss": 0.2528, "step": 294800 }, { "epoch": 1.66, "learning_rate": 3.342906288470295e-05, "loss": 0.2441, "step": 294900 }, { "epoch": 1.66, "learning_rate": 3.342344165444049e-05, "loss": 0.2467, "step": 295000 }, { "epoch": 1.66, "learning_rate": 3.341782042417804e-05, "loss": 0.2446, "step": 295100 }, { "epoch": 1.66, "learning_rate": 3.3412199193915585e-05, "loss": 0.2488, "step": 295200 }, { "epoch": 1.66, "learning_rate": 3.3406577963653124e-05, "loss": 0.2484, "step": 295300 }, { "epoch": 1.66, "learning_rate": 3.340095673339067e-05, "loss": 0.2477, "step": 295400 }, { "epoch": 1.66, "learning_rate": 3.339533550312822e-05, "loss": 0.2553, "step": 295500 }, { "epoch": 1.66, "learning_rate": 3.338971427286576e-05, "loss": 0.2484, "step": 295600 }, { "epoch": 1.66, "learning_rate": 3.33840930426033e-05, "loss": 0.2424, "step": 295700 }, { "epoch": 1.66, "learning_rate": 3.337847181234085e-05, "loss": 0.2486, "step": 295800 }, { "epoch": 1.66, "learning_rate": 3.3372850582078395e-05, "loss": 0.2489, "step": 295900 }, { "epoch": 1.66, "learning_rate": 3.3367229351815935e-05, "loss": 0.245, "step": 296000 }, { "epoch": 1.66, "learning_rate": 3.336160812155348e-05, "loss": 0.2391, "step": 296100 }, { "epoch": 1.67, "learning_rate": 3.335598689129103e-05, "loss": 0.2522, "step": 296200 }, { "epoch": 1.67, "learning_rate": 3.3350365661028574e-05, "loss": 0.2494, "step": 296300 }, { "epoch": 1.67, "learning_rate": 3.334474443076612e-05, "loss": 0.2517, "step": 296400 }, { "epoch": 1.67, "learning_rate": 3.3339123200503666e-05, "loss": 0.2464, "step": 296500 }, { "epoch": 1.67, "learning_rate": 3.333350197024121e-05, "loss": 0.2498, "step": 296600 }, { "epoch": 1.67, "learning_rate": 3.332788073997875e-05, "loss": 0.243, "step": 296700 }, { "epoch": 1.67, "learning_rate": 3.33222595097163e-05, "loss": 0.2461, "step": 296800 }, { "epoch": 1.67, "learning_rate": 3.3316638279453845e-05, "loss": 0.2393, "step": 296900 }, { "epoch": 1.67, "learning_rate": 3.331101704919139e-05, "loss": 0.2476, "step": 297000 }, { "epoch": 1.67, "learning_rate": 3.330539581892893e-05, "loss": 0.2458, "step": 297100 }, { "epoch": 1.67, "learning_rate": 3.329977458866648e-05, "loss": 0.2463, "step": 297200 }, { "epoch": 1.67, "learning_rate": 3.329415335840402e-05, "loss": 0.2521, "step": 297300 }, { "epoch": 1.67, "learning_rate": 3.328853212814156e-05, "loss": 0.243, "step": 297400 }, { "epoch": 1.67, "learning_rate": 3.328291089787911e-05, "loss": 0.2424, "step": 297500 }, { "epoch": 1.67, "learning_rate": 3.3277289667616655e-05, "loss": 0.2498, "step": 297600 }, { "epoch": 1.67, "learning_rate": 3.32716684373542e-05, "loss": 0.2429, "step": 297700 }, { "epoch": 1.67, "learning_rate": 3.326604720709174e-05, "loss": 0.249, "step": 297800 }, { "epoch": 1.67, "learning_rate": 3.326042597682929e-05, "loss": 0.2436, "step": 297900 }, { "epoch": 1.68, "learning_rate": 3.325480474656683e-05, "loss": 0.2485, "step": 298000 }, { "epoch": 1.68, "learning_rate": 3.324918351630438e-05, "loss": 0.2501, "step": 298100 }, { "epoch": 1.68, "learning_rate": 3.3243562286041926e-05, "loss": 0.2481, "step": 298200 }, { "epoch": 1.68, "learning_rate": 3.323794105577947e-05, "loss": 0.2516, "step": 298300 }, { "epoch": 1.68, "learning_rate": 3.323231982551702e-05, "loss": 0.2511, "step": 298400 }, { "epoch": 1.68, "learning_rate": 3.322669859525456e-05, "loss": 0.2463, "step": 298500 }, { "epoch": 1.68, "learning_rate": 3.3221133577294726e-05, "loss": 0.2452, "step": 298600 }, { "epoch": 1.68, "learning_rate": 3.321551234703227e-05, "loss": 0.2531, "step": 298700 }, { "epoch": 1.68, "learning_rate": 3.320989111676982e-05, "loss": 0.2504, "step": 298800 }, { "epoch": 1.68, "learning_rate": 3.320426988650736e-05, "loss": 0.2515, "step": 298900 }, { "epoch": 1.68, "learning_rate": 3.319864865624491e-05, "loss": 0.241, "step": 299000 }, { "epoch": 1.68, "learning_rate": 3.319302742598246e-05, "loss": 0.2529, "step": 299100 }, { "epoch": 1.68, "learning_rate": 3.3187406195719996e-05, "loss": 0.2474, "step": 299200 }, { "epoch": 1.68, "learning_rate": 3.318178496545754e-05, "loss": 0.2457, "step": 299300 }, { "epoch": 1.68, "learning_rate": 3.317616373519509e-05, "loss": 0.2472, "step": 299400 }, { "epoch": 1.68, "learning_rate": 3.3170542504932635e-05, "loss": 0.2452, "step": 299500 }, { "epoch": 1.68, "learning_rate": 3.3164921274670175e-05, "loss": 0.2427, "step": 299600 }, { "epoch": 1.68, "learning_rate": 3.315935625671034e-05, "loss": 0.2434, "step": 299700 }, { "epoch": 1.69, "learning_rate": 3.315373502644789e-05, "loss": 0.2375, "step": 299800 }, { "epoch": 1.69, "learning_rate": 3.3148113796185435e-05, "loss": 0.2446, "step": 299900 }, { "epoch": 1.69, "learning_rate": 3.314249256592298e-05, "loss": 0.2418, "step": 300000 }, { "epoch": 1.69, "learning_rate": 3.313687133566053e-05, "loss": 0.2441, "step": 300100 }, { "epoch": 1.69, "learning_rate": 3.3131250105398074e-05, "loss": 0.2472, "step": 300200 }, { "epoch": 1.69, "learning_rate": 3.312562887513561e-05, "loss": 0.2522, "step": 300300 }, { "epoch": 1.69, "learning_rate": 3.312000764487316e-05, "loss": 0.2568, "step": 300400 }, { "epoch": 1.69, "learning_rate": 3.3114386414610706e-05, "loss": 0.251, "step": 300500 }, { "epoch": 1.69, "learning_rate": 3.310876518434825e-05, "loss": 0.2495, "step": 300600 }, { "epoch": 1.69, "learning_rate": 3.310314395408579e-05, "loss": 0.2388, "step": 300700 }, { "epoch": 1.69, "learning_rate": 3.309752272382334e-05, "loss": 0.2517, "step": 300800 }, { "epoch": 1.69, "learning_rate": 3.3091901493560884e-05, "loss": 0.2462, "step": 300900 }, { "epoch": 1.69, "learning_rate": 3.3086280263298424e-05, "loss": 0.246, "step": 301000 }, { "epoch": 1.69, "learning_rate": 3.308065903303597e-05, "loss": 0.2479, "step": 301100 }, { "epoch": 1.69, "learning_rate": 3.3075037802773516e-05, "loss": 0.2483, "step": 301200 }, { "epoch": 1.69, "learning_rate": 3.306941657251106e-05, "loss": 0.2491, "step": 301300 }, { "epoch": 1.69, "learning_rate": 3.30637953422486e-05, "loss": 0.2429, "step": 301400 }, { "epoch": 1.69, "learning_rate": 3.305817411198615e-05, "loss": 0.2489, "step": 301500 }, { "epoch": 1.7, "learning_rate": 3.3052552881723695e-05, "loss": 0.2512, "step": 301600 }, { "epoch": 1.7, "learning_rate": 3.304693165146124e-05, "loss": 0.2494, "step": 301700 }, { "epoch": 1.7, "learning_rate": 3.304131042119879e-05, "loss": 0.2465, "step": 301800 }, { "epoch": 1.7, "learning_rate": 3.3035689190936333e-05, "loss": 0.2536, "step": 301900 }, { "epoch": 1.7, "learning_rate": 3.303006796067388e-05, "loss": 0.2484, "step": 302000 }, { "epoch": 1.7, "learning_rate": 3.302444673041142e-05, "loss": 0.2477, "step": 302100 }, { "epoch": 1.7, "learning_rate": 3.3018825500148965e-05, "loss": 0.2535, "step": 302200 }, { "epoch": 1.7, "learning_rate": 3.301320426988651e-05, "loss": 0.2453, "step": 302300 }, { "epoch": 1.7, "learning_rate": 3.300758303962405e-05, "loss": 0.2396, "step": 302400 }, { "epoch": 1.7, "learning_rate": 3.30019618093616e-05, "loss": 0.245, "step": 302500 }, { "epoch": 1.7, "learning_rate": 3.2996340579099144e-05, "loss": 0.2455, "step": 302600 }, { "epoch": 1.7, "learning_rate": 3.299071934883669e-05, "loss": 0.244, "step": 302700 }, { "epoch": 1.7, "learning_rate": 3.298509811857423e-05, "loss": 0.2456, "step": 302800 }, { "epoch": 1.7, "learning_rate": 3.2979476888311776e-05, "loss": 0.2478, "step": 302900 }, { "epoch": 1.7, "learning_rate": 3.297385565804932e-05, "loss": 0.2418, "step": 303000 }, { "epoch": 1.7, "learning_rate": 3.296823442778686e-05, "loss": 0.2385, "step": 303100 }, { "epoch": 1.7, "learning_rate": 3.296261319752441e-05, "loss": 0.2424, "step": 303200 }, { "epoch": 1.7, "learning_rate": 3.295699196726196e-05, "loss": 0.2387, "step": 303300 }, { "epoch": 1.71, "learning_rate": 3.295142694930213e-05, "loss": 0.2443, "step": 303400 }, { "epoch": 1.71, "learning_rate": 3.294580571903967e-05, "loss": 0.2505, "step": 303500 }, { "epoch": 1.71, "learning_rate": 3.2940184488777214e-05, "loss": 0.2455, "step": 303600 }, { "epoch": 1.71, "learning_rate": 3.293456325851476e-05, "loss": 0.239, "step": 303700 }, { "epoch": 1.71, "learning_rate": 3.292894202825231e-05, "loss": 0.245, "step": 303800 }, { "epoch": 1.71, "learning_rate": 3.2923320797989846e-05, "loss": 0.2482, "step": 303900 }, { "epoch": 1.71, "learning_rate": 3.291769956772739e-05, "loss": 0.2423, "step": 304000 }, { "epoch": 1.71, "learning_rate": 3.291207833746494e-05, "loss": 0.2457, "step": 304100 }, { "epoch": 1.71, "learning_rate": 3.2906457107202485e-05, "loss": 0.2488, "step": 304200 }, { "epoch": 1.71, "learning_rate": 3.290083587694003e-05, "loss": 0.2423, "step": 304300 }, { "epoch": 1.71, "learning_rate": 3.289521464667758e-05, "loss": 0.245, "step": 304400 }, { "epoch": 1.71, "learning_rate": 3.2889593416415124e-05, "loss": 0.2458, "step": 304500 }, { "epoch": 1.71, "learning_rate": 3.2883972186152664e-05, "loss": 0.2491, "step": 304600 }, { "epoch": 1.71, "learning_rate": 3.287835095589021e-05, "loss": 0.2466, "step": 304700 }, { "epoch": 1.71, "learning_rate": 3.2872729725627756e-05, "loss": 0.2419, "step": 304800 }, { "epoch": 1.71, "learning_rate": 3.2867108495365296e-05, "loss": 0.243, "step": 304900 }, { "epoch": 1.71, "learning_rate": 3.286148726510284e-05, "loss": 0.2539, "step": 305000 }, { "epoch": 1.72, "learning_rate": 3.285586603484039e-05, "loss": 0.248, "step": 305100 }, { "epoch": 1.72, "learning_rate": 3.285024480457793e-05, "loss": 0.2438, "step": 305200 }, { "epoch": 1.72, "learning_rate": 3.2844623574315474e-05, "loss": 0.2465, "step": 305300 }, { "epoch": 1.72, "learning_rate": 3.283900234405302e-05, "loss": 0.2398, "step": 305400 }, { "epoch": 1.72, "learning_rate": 3.2833381113790567e-05, "loss": 0.2413, "step": 305500 }, { "epoch": 1.72, "learning_rate": 3.2827759883528106e-05, "loss": 0.2405, "step": 305600 }, { "epoch": 1.72, "learning_rate": 3.282213865326565e-05, "loss": 0.2368, "step": 305700 }, { "epoch": 1.72, "learning_rate": 3.28165174230032e-05, "loss": 0.2511, "step": 305800 }, { "epoch": 1.72, "learning_rate": 3.2810896192740745e-05, "loss": 0.2451, "step": 305900 }, { "epoch": 1.72, "learning_rate": 3.280527496247829e-05, "loss": 0.2404, "step": 306000 }, { "epoch": 1.72, "learning_rate": 3.279965373221584e-05, "loss": 0.2461, "step": 306100 }, { "epoch": 1.72, "learning_rate": 3.2794032501953384e-05, "loss": 0.2509, "step": 306200 }, { "epoch": 1.72, "learning_rate": 3.278841127169092e-05, "loss": 0.2383, "step": 306300 }, { "epoch": 1.72, "learning_rate": 3.278279004142847e-05, "loss": 0.2375, "step": 306400 }, { "epoch": 1.72, "learning_rate": 3.2777168811166016e-05, "loss": 0.2465, "step": 306500 }, { "epoch": 1.72, "learning_rate": 3.2771547580903555e-05, "loss": 0.2399, "step": 306600 }, { "epoch": 1.72, "learning_rate": 3.276598256294372e-05, "loss": 0.243, "step": 306700 }, { "epoch": 1.72, "learning_rate": 3.276036133268127e-05, "loss": 0.2461, "step": 306800 }, { "epoch": 1.73, "learning_rate": 3.2754740102418815e-05, "loss": 0.2501, "step": 306900 }, { "epoch": 1.73, "learning_rate": 3.274911887215636e-05, "loss": 0.2419, "step": 307000 }, { "epoch": 1.73, "learning_rate": 3.274349764189391e-05, "loss": 0.2451, "step": 307100 }, { "epoch": 1.73, "learning_rate": 3.2737876411631454e-05, "loss": 0.2432, "step": 307200 }, { "epoch": 1.73, "learning_rate": 3.2732255181369e-05, "loss": 0.2448, "step": 307300 }, { "epoch": 1.73, "learning_rate": 3.272663395110654e-05, "loss": 0.2473, "step": 307400 }, { "epoch": 1.73, "learning_rate": 3.2721012720844086e-05, "loss": 0.2417, "step": 307500 }, { "epoch": 1.73, "learning_rate": 3.271539149058163e-05, "loss": 0.243, "step": 307600 }, { "epoch": 1.73, "learning_rate": 3.270977026031917e-05, "loss": 0.2465, "step": 307700 }, { "epoch": 1.73, "learning_rate": 3.270414903005672e-05, "loss": 0.241, "step": 307800 }, { "epoch": 1.73, "learning_rate": 3.2698527799794265e-05, "loss": 0.2432, "step": 307900 }, { "epoch": 1.73, "learning_rate": 3.269290656953181e-05, "loss": 0.2447, "step": 308000 }, { "epoch": 1.73, "learning_rate": 3.268728533926935e-05, "loss": 0.2422, "step": 308100 }, { "epoch": 1.73, "learning_rate": 3.26816641090069e-05, "loss": 0.2409, "step": 308200 }, { "epoch": 1.73, "learning_rate": 3.267604287874444e-05, "loss": 0.2518, "step": 308300 }, { "epoch": 1.73, "learning_rate": 3.267042164848198e-05, "loss": 0.2407, "step": 308400 }, { "epoch": 1.73, "learning_rate": 3.2664800418219536e-05, "loss": 0.2455, "step": 308500 }, { "epoch": 1.73, "learning_rate": 3.265917918795708e-05, "loss": 0.247, "step": 308600 }, { "epoch": 1.74, "learning_rate": 3.265355795769463e-05, "loss": 0.2299, "step": 308700 }, { "epoch": 1.74, "learning_rate": 3.264793672743217e-05, "loss": 0.2319, "step": 308800 }, { "epoch": 1.74, "learning_rate": 3.2642315497169714e-05, "loss": 0.239, "step": 308900 }, { "epoch": 1.74, "learning_rate": 3.263669426690726e-05, "loss": 0.2405, "step": 309000 }, { "epoch": 1.74, "learning_rate": 3.26310730366448e-05, "loss": 0.2472, "step": 309100 }, { "epoch": 1.74, "learning_rate": 3.2625451806382346e-05, "loss": 0.2428, "step": 309200 }, { "epoch": 1.74, "learning_rate": 3.261983057611989e-05, "loss": 0.2432, "step": 309300 }, { "epoch": 1.74, "learning_rate": 3.261420934585744e-05, "loss": 0.2455, "step": 309400 }, { "epoch": 1.74, "learning_rate": 3.260858811559498e-05, "loss": 0.2432, "step": 309500 }, { "epoch": 1.74, "learning_rate": 3.2602966885332524e-05, "loss": 0.2468, "step": 309600 }, { "epoch": 1.74, "learning_rate": 3.259734565507007e-05, "loss": 0.2458, "step": 309700 }, { "epoch": 1.74, "learning_rate": 3.259172442480761e-05, "loss": 0.241, "step": 309800 }, { "epoch": 1.74, "learning_rate": 3.2586103194545156e-05, "loss": 0.2414, "step": 309900 }, { "epoch": 1.74, "learning_rate": 3.25804819642827e-05, "loss": 0.2475, "step": 310000 }, { "epoch": 1.74, "learning_rate": 3.257486073402025e-05, "loss": 0.248, "step": 310100 }, { "epoch": 1.74, "learning_rate": 3.2569239503757795e-05, "loss": 0.2477, "step": 310200 }, { "epoch": 1.74, "learning_rate": 3.256361827349534e-05, "loss": 0.2488, "step": 310300 }, { "epoch": 1.74, "learning_rate": 3.255799704323289e-05, "loss": 0.2498, "step": 310400 }, { "epoch": 1.75, "learning_rate": 3.2552432025273055e-05, "loss": 0.2472, "step": 310500 }, { "epoch": 1.75, "learning_rate": 3.2546810795010595e-05, "loss": 0.2346, "step": 310600 }, { "epoch": 1.75, "learning_rate": 3.254118956474814e-05, "loss": 0.2517, "step": 310700 }, { "epoch": 1.75, "learning_rate": 3.253556833448569e-05, "loss": 0.2397, "step": 310800 }, { "epoch": 1.75, "learning_rate": 3.252994710422323e-05, "loss": 0.2486, "step": 310900 }, { "epoch": 1.75, "learning_rate": 3.252432587396077e-05, "loss": 0.2441, "step": 311000 }, { "epoch": 1.75, "learning_rate": 3.251870464369832e-05, "loss": 0.2478, "step": 311100 }, { "epoch": 1.75, "learning_rate": 3.2513083413435866e-05, "loss": 0.2436, "step": 311200 }, { "epoch": 1.75, "learning_rate": 3.250746218317341e-05, "loss": 0.2443, "step": 311300 }, { "epoch": 1.75, "learning_rate": 3.250184095291096e-05, "loss": 0.2504, "step": 311400 }, { "epoch": 1.75, "learning_rate": 3.2496219722648505e-05, "loss": 0.2513, "step": 311500 }, { "epoch": 1.75, "learning_rate": 3.2490598492386044e-05, "loss": 0.2419, "step": 311600 }, { "epoch": 1.75, "learning_rate": 3.248497726212359e-05, "loss": 0.2398, "step": 311700 }, { "epoch": 1.75, "learning_rate": 3.2479356031861137e-05, "loss": 0.2496, "step": 311800 }, { "epoch": 1.75, "learning_rate": 3.247373480159868e-05, "loss": 0.2418, "step": 311900 }, { "epoch": 1.75, "learning_rate": 3.246811357133622e-05, "loss": 0.233, "step": 312000 }, { "epoch": 1.75, "learning_rate": 3.246249234107377e-05, "loss": 0.2455, "step": 312100 }, { "epoch": 1.75, "learning_rate": 3.2456871110811315e-05, "loss": 0.2394, "step": 312200 }, { "epoch": 1.76, "learning_rate": 3.2451249880548854e-05, "loss": 0.239, "step": 312300 }, { "epoch": 1.76, "learning_rate": 3.24456286502864e-05, "loss": 0.2438, "step": 312400 }, { "epoch": 1.76, "learning_rate": 3.244000742002395e-05, "loss": 0.2419, "step": 312500 }, { "epoch": 1.76, "learning_rate": 3.243444240206412e-05, "loss": 0.2428, "step": 312600 }, { "epoch": 1.76, "learning_rate": 3.242882117180166e-05, "loss": 0.2444, "step": 312700 }, { "epoch": 1.76, "learning_rate": 3.242319994153921e-05, "loss": 0.2401, "step": 312800 }, { "epoch": 1.76, "learning_rate": 3.2417578711276753e-05, "loss": 0.2413, "step": 312900 }, { "epoch": 1.76, "learning_rate": 3.24119574810143e-05, "loss": 0.2438, "step": 313000 }, { "epoch": 1.76, "learning_rate": 3.240633625075184e-05, "loss": 0.2424, "step": 313100 }, { "epoch": 1.76, "learning_rate": 3.2400715020489385e-05, "loss": 0.2439, "step": 313200 }, { "epoch": 1.76, "learning_rate": 3.239509379022693e-05, "loss": 0.245, "step": 313300 }, { "epoch": 1.76, "learning_rate": 3.238947255996447e-05, "loss": 0.2442, "step": 313400 }, { "epoch": 1.76, "learning_rate": 3.238385132970202e-05, "loss": 0.2485, "step": 313500 }, { "epoch": 1.76, "learning_rate": 3.2378230099439564e-05, "loss": 0.2457, "step": 313600 }, { "epoch": 1.76, "learning_rate": 3.237260886917711e-05, "loss": 0.2402, "step": 313700 }, { "epoch": 1.76, "learning_rate": 3.2366987638914656e-05, "loss": 0.2452, "step": 313800 }, { "epoch": 1.76, "learning_rate": 3.23613664086522e-05, "loss": 0.2462, "step": 313900 }, { "epoch": 1.77, "learning_rate": 3.235574517838975e-05, "loss": 0.2376, "step": 314000 }, { "epoch": 1.77, "learning_rate": 3.235012394812729e-05, "loss": 0.2429, "step": 314100 }, { "epoch": 1.77, "learning_rate": 3.2344502717864835e-05, "loss": 0.2476, "step": 314200 }, { "epoch": 1.77, "learning_rate": 3.233888148760238e-05, "loss": 0.2417, "step": 314300 }, { "epoch": 1.77, "learning_rate": 3.233326025733993e-05, "loss": 0.2375, "step": 314400 }, { "epoch": 1.77, "learning_rate": 3.232763902707747e-05, "loss": 0.2421, "step": 314500 }, { "epoch": 1.77, "learning_rate": 3.232201779681501e-05, "loss": 0.2514, "step": 314600 }, { "epoch": 1.77, "learning_rate": 3.231645277885518e-05, "loss": 0.243, "step": 314700 }, { "epoch": 1.77, "learning_rate": 3.231083154859273e-05, "loss": 0.2416, "step": 314800 }, { "epoch": 1.77, "learning_rate": 3.230521031833027e-05, "loss": 0.247, "step": 314900 }, { "epoch": 1.77, "learning_rate": 3.229958908806782e-05, "loss": 0.2462, "step": 315000 }, { "epoch": 1.77, "learning_rate": 3.2293967857805366e-05, "loss": 0.2438, "step": 315100 }, { "epoch": 1.77, "learning_rate": 3.2288346627542905e-05, "loss": 0.249, "step": 315200 }, { "epoch": 1.77, "learning_rate": 3.228272539728045e-05, "loss": 0.2318, "step": 315300 }, { "epoch": 1.77, "learning_rate": 3.2277104167018e-05, "loss": 0.2474, "step": 315400 }, { "epoch": 1.77, "learning_rate": 3.227148293675554e-05, "loss": 0.2406, "step": 315500 }, { "epoch": 1.77, "learning_rate": 3.2265861706493084e-05, "loss": 0.2444, "step": 315600 }, { "epoch": 1.77, "learning_rate": 3.226024047623063e-05, "loss": 0.2398, "step": 315700 }, { "epoch": 1.78, "learning_rate": 3.2254619245968176e-05, "loss": 0.2494, "step": 315800 }, { "epoch": 1.78, "learning_rate": 3.2248998015705716e-05, "loss": 0.2348, "step": 315900 }, { "epoch": 1.78, "learning_rate": 3.224337678544326e-05, "loss": 0.2388, "step": 316000 }, { "epoch": 1.78, "learning_rate": 3.223775555518081e-05, "loss": 0.2484, "step": 316100 }, { "epoch": 1.78, "learning_rate": 3.223213432491835e-05, "loss": 0.2401, "step": 316200 }, { "epoch": 1.78, "learning_rate": 3.2226513094655894e-05, "loss": 0.2458, "step": 316300 }, { "epoch": 1.78, "learning_rate": 3.222089186439344e-05, "loss": 0.2465, "step": 316400 }, { "epoch": 1.78, "learning_rate": 3.2215270634130987e-05, "loss": 0.2461, "step": 316500 }, { "epoch": 1.78, "learning_rate": 3.220964940386853e-05, "loss": 0.2431, "step": 316600 }, { "epoch": 1.78, "learning_rate": 3.220402817360608e-05, "loss": 0.2408, "step": 316700 }, { "epoch": 1.78, "learning_rate": 3.2198406943343625e-05, "loss": 0.2428, "step": 316800 }, { "epoch": 1.78, "learning_rate": 3.2192785713081165e-05, "loss": 0.2392, "step": 316900 }, { "epoch": 1.78, "learning_rate": 3.218716448281871e-05, "loss": 0.2391, "step": 317000 }, { "epoch": 1.78, "learning_rate": 3.218154325255626e-05, "loss": 0.2472, "step": 317100 }, { "epoch": 1.78, "learning_rate": 3.2175922022293804e-05, "loss": 0.2473, "step": 317200 }, { "epoch": 1.78, "learning_rate": 3.217030079203134e-05, "loss": 0.2449, "step": 317300 }, { "epoch": 1.78, "learning_rate": 3.216467956176889e-05, "loss": 0.242, "step": 317400 }, { "epoch": 1.78, "learning_rate": 3.2159058331506436e-05, "loss": 0.2418, "step": 317500 }, { "epoch": 1.79, "learning_rate": 3.2153437101243975e-05, "loss": 0.2427, "step": 317600 }, { "epoch": 1.79, "learning_rate": 3.214781587098152e-05, "loss": 0.2417, "step": 317700 }, { "epoch": 1.79, "learning_rate": 3.214219464071907e-05, "loss": 0.2397, "step": 317800 }, { "epoch": 1.79, "learning_rate": 3.2136573410456614e-05, "loss": 0.2365, "step": 317900 }, { "epoch": 1.79, "learning_rate": 3.213095218019416e-05, "loss": 0.2475, "step": 318000 }, { "epoch": 1.79, "learning_rate": 3.212533094993171e-05, "loss": 0.2446, "step": 318100 }, { "epoch": 1.79, "learning_rate": 3.211970971966925e-05, "loss": 0.2473, "step": 318200 }, { "epoch": 1.79, "learning_rate": 3.211408848940679e-05, "loss": 0.2431, "step": 318300 }, { "epoch": 1.79, "learning_rate": 3.210846725914434e-05, "loss": 0.239, "step": 318400 }, { "epoch": 1.79, "learning_rate": 3.2102846028881885e-05, "loss": 0.251, "step": 318500 }, { "epoch": 1.79, "learning_rate": 3.209722479861943e-05, "loss": 0.2393, "step": 318600 }, { "epoch": 1.79, "learning_rate": 3.209165978065959e-05, "loss": 0.2412, "step": 318700 }, { "epoch": 1.79, "learning_rate": 3.208603855039714e-05, "loss": 0.2441, "step": 318800 }, { "epoch": 1.79, "learning_rate": 3.2080417320134685e-05, "loss": 0.2437, "step": 318900 }, { "epoch": 1.79, "learning_rate": 3.207479608987223e-05, "loss": 0.2416, "step": 319000 }, { "epoch": 1.79, "learning_rate": 3.206917485960978e-05, "loss": 0.2448, "step": 319100 }, { "epoch": 1.79, "learning_rate": 3.2063553629347324e-05, "loss": 0.2363, "step": 319200 }, { "epoch": 1.79, "learning_rate": 3.205798861138749e-05, "loss": 0.2489, "step": 319300 }, { "epoch": 1.8, "learning_rate": 3.205236738112504e-05, "loss": 0.2482, "step": 319400 }, { "epoch": 1.8, "learning_rate": 3.204674615086258e-05, "loss": 0.2426, "step": 319500 }, { "epoch": 1.8, "learning_rate": 3.204112492060012e-05, "loss": 0.2373, "step": 319600 }, { "epoch": 1.8, "learning_rate": 3.203550369033767e-05, "loss": 0.2454, "step": 319700 }, { "epoch": 1.8, "learning_rate": 3.202988246007521e-05, "loss": 0.2375, "step": 319800 }, { "epoch": 1.8, "learning_rate": 3.2024261229812755e-05, "loss": 0.2401, "step": 319900 }, { "epoch": 1.8, "learning_rate": 3.20186399995503e-05, "loss": 0.236, "step": 320000 }, { "epoch": 1.8, "learning_rate": 3.201301876928785e-05, "loss": 0.2452, "step": 320100 }, { "epoch": 1.8, "learning_rate": 3.2007397539025394e-05, "loss": 0.2402, "step": 320200 }, { "epoch": 1.8, "learning_rate": 3.200177630876294e-05, "loss": 0.2409, "step": 320300 }, { "epoch": 1.8, "learning_rate": 3.199615507850049e-05, "loss": 0.2448, "step": 320400 }, { "epoch": 1.8, "learning_rate": 3.1990533848238026e-05, "loss": 0.2426, "step": 320500 }, { "epoch": 1.8, "learning_rate": 3.198491261797557e-05, "loss": 0.2441, "step": 320600 }, { "epoch": 1.8, "learning_rate": 3.197929138771312e-05, "loss": 0.2447, "step": 320700 }, { "epoch": 1.8, "learning_rate": 3.1973670157450665e-05, "loss": 0.2384, "step": 320800 }, { "epoch": 1.8, "learning_rate": 3.1968048927188204e-05, "loss": 0.2399, "step": 320900 }, { "epoch": 1.8, "learning_rate": 3.196242769692575e-05, "loss": 0.2441, "step": 321000 }, { "epoch": 1.8, "learning_rate": 3.19568064666633e-05, "loss": 0.2318, "step": 321100 }, { "epoch": 1.81, "learning_rate": 3.1951185236400837e-05, "loss": 0.24, "step": 321200 }, { "epoch": 1.81, "learning_rate": 3.194556400613838e-05, "loss": 0.2519, "step": 321300 }, { "epoch": 1.81, "learning_rate": 3.193994277587593e-05, "loss": 0.233, "step": 321400 }, { "epoch": 1.81, "learning_rate": 3.1934321545613475e-05, "loss": 0.2453, "step": 321500 }, { "epoch": 1.81, "learning_rate": 3.1928700315351015e-05, "loss": 0.2444, "step": 321600 }, { "epoch": 1.81, "learning_rate": 3.192307908508856e-05, "loss": 0.2463, "step": 321700 }, { "epoch": 1.81, "learning_rate": 3.1917514067128736e-05, "loss": 0.2427, "step": 321800 }, { "epoch": 1.81, "learning_rate": 3.191189283686628e-05, "loss": 0.238, "step": 321900 }, { "epoch": 1.81, "learning_rate": 3.190627160660382e-05, "loss": 0.2407, "step": 322000 }, { "epoch": 1.81, "learning_rate": 3.190065037634137e-05, "loss": 0.244, "step": 322100 }, { "epoch": 1.81, "learning_rate": 3.1895029146078914e-05, "loss": 0.2471, "step": 322200 }, { "epoch": 1.81, "learning_rate": 3.188940791581645e-05, "loss": 0.2425, "step": 322300 }, { "epoch": 1.81, "learning_rate": 3.1883786685554e-05, "loss": 0.2375, "step": 322400 }, { "epoch": 1.81, "learning_rate": 3.1878165455291546e-05, "loss": 0.241, "step": 322500 }, { "epoch": 1.81, "learning_rate": 3.187254422502909e-05, "loss": 0.2476, "step": 322600 }, { "epoch": 1.81, "learning_rate": 3.186692299476663e-05, "loss": 0.2421, "step": 322700 }, { "epoch": 1.81, "learning_rate": 3.186130176450418e-05, "loss": 0.2398, "step": 322800 }, { "epoch": 1.82, "learning_rate": 3.1855680534241724e-05, "loss": 0.2317, "step": 322900 }, { "epoch": 1.82, "learning_rate": 3.185005930397927e-05, "loss": 0.2417, "step": 323000 }, { "epoch": 1.82, "learning_rate": 3.184443807371682e-05, "loss": 0.2351, "step": 323100 }, { "epoch": 1.82, "learning_rate": 3.183881684345436e-05, "loss": 0.2357, "step": 323200 }, { "epoch": 1.82, "learning_rate": 3.183319561319191e-05, "loss": 0.2348, "step": 323300 }, { "epoch": 1.82, "learning_rate": 3.182757438292945e-05, "loss": 0.2462, "step": 323400 }, { "epoch": 1.82, "learning_rate": 3.1821953152666995e-05, "loss": 0.2406, "step": 323500 }, { "epoch": 1.82, "learning_rate": 3.181633192240454e-05, "loss": 0.2409, "step": 323600 }, { "epoch": 1.82, "learning_rate": 3.181071069214208e-05, "loss": 0.2454, "step": 323700 }, { "epoch": 1.82, "learning_rate": 3.180508946187963e-05, "loss": 0.2428, "step": 323800 }, { "epoch": 1.82, "learning_rate": 3.1799468231617173e-05, "loss": 0.2465, "step": 323900 }, { "epoch": 1.82, "learning_rate": 3.179384700135472e-05, "loss": 0.2442, "step": 324000 }, { "epoch": 1.82, "learning_rate": 3.178828198339489e-05, "loss": 0.2402, "step": 324100 }, { "epoch": 1.82, "learning_rate": 3.1782660753132434e-05, "loss": 0.2446, "step": 324200 }, { "epoch": 1.82, "learning_rate": 3.177703952286998e-05, "loss": 0.242, "step": 324300 }, { "epoch": 1.82, "learning_rate": 3.1771418292607526e-05, "loss": 0.2402, "step": 324400 }, { "epoch": 1.82, "learning_rate": 3.1765797062345066e-05, "loss": 0.2353, "step": 324500 }, { "epoch": 1.82, "learning_rate": 3.176017583208261e-05, "loss": 0.2441, "step": 324600 }, { "epoch": 1.83, "learning_rate": 3.175455460182016e-05, "loss": 0.2422, "step": 324700 }, { "epoch": 1.83, "learning_rate": 3.17489333715577e-05, "loss": 0.2471, "step": 324800 }, { "epoch": 1.83, "learning_rate": 3.1743312141295244e-05, "loss": 0.2435, "step": 324900 }, { "epoch": 1.83, "learning_rate": 3.173769091103279e-05, "loss": 0.2376, "step": 325000 }, { "epoch": 1.83, "learning_rate": 3.173206968077033e-05, "loss": 0.2459, "step": 325100 }, { "epoch": 1.83, "learning_rate": 3.1726448450507876e-05, "loss": 0.2419, "step": 325200 }, { "epoch": 1.83, "learning_rate": 3.172082722024542e-05, "loss": 0.2381, "step": 325300 }, { "epoch": 1.83, "learning_rate": 3.171520598998297e-05, "loss": 0.24, "step": 325400 }, { "epoch": 1.83, "learning_rate": 3.1709584759720515e-05, "loss": 0.2407, "step": 325500 }, { "epoch": 1.83, "learning_rate": 3.170396352945806e-05, "loss": 0.2388, "step": 325600 }, { "epoch": 1.83, "learning_rate": 3.169834229919561e-05, "loss": 0.2404, "step": 325700 }, { "epoch": 1.83, "learning_rate": 3.169272106893315e-05, "loss": 0.2439, "step": 325800 }, { "epoch": 1.83, "learning_rate": 3.168709983867069e-05, "loss": 0.2363, "step": 325900 }, { "epoch": 1.83, "learning_rate": 3.168147860840824e-05, "loss": 0.2385, "step": 326000 }, { "epoch": 1.83, "learning_rate": 3.1675857378145786e-05, "loss": 0.2368, "step": 326100 }, { "epoch": 1.83, "learning_rate": 3.1670236147883325e-05, "loss": 0.2389, "step": 326200 }, { "epoch": 1.83, "learning_rate": 3.166461491762087e-05, "loss": 0.2355, "step": 326300 }, { "epoch": 1.83, "learning_rate": 3.165899368735842e-05, "loss": 0.2376, "step": 326400 }, { "epoch": 1.84, "learning_rate": 3.165337245709596e-05, "loss": 0.2375, "step": 326500 }, { "epoch": 1.84, "learning_rate": 3.1647751226833504e-05, "loss": 0.2395, "step": 326600 }, { "epoch": 1.84, "learning_rate": 3.164212999657105e-05, "loss": 0.2432, "step": 326700 }, { "epoch": 1.84, "learning_rate": 3.1636508766308596e-05, "loss": 0.243, "step": 326800 }, { "epoch": 1.84, "learning_rate": 3.1630887536046136e-05, "loss": 0.2314, "step": 326900 }, { "epoch": 1.84, "learning_rate": 3.162526630578368e-05, "loss": 0.2365, "step": 327000 }, { "epoch": 1.84, "learning_rate": 3.161964507552123e-05, "loss": 0.2445, "step": 327100 }, { "epoch": 1.84, "learning_rate": 3.1614023845258775e-05, "loss": 0.2408, "step": 327200 }, { "epoch": 1.84, "learning_rate": 3.160840261499632e-05, "loss": 0.2406, "step": 327300 }, { "epoch": 1.84, "learning_rate": 3.160278138473387e-05, "loss": 0.2421, "step": 327400 }, { "epoch": 1.84, "learning_rate": 3.1597216366774035e-05, "loss": 0.2491, "step": 327500 }, { "epoch": 1.84, "learning_rate": 3.1591595136511574e-05, "loss": 0.24, "step": 327600 }, { "epoch": 1.84, "learning_rate": 3.158597390624912e-05, "loss": 0.2432, "step": 327700 }, { "epoch": 1.84, "learning_rate": 3.158035267598667e-05, "loss": 0.2345, "step": 327800 }, { "epoch": 1.84, "learning_rate": 3.157473144572421e-05, "loss": 0.2462, "step": 327900 }, { "epoch": 1.84, "learning_rate": 3.156911021546175e-05, "loss": 0.2407, "step": 328000 }, { "epoch": 1.84, "learning_rate": 3.15634889851993e-05, "loss": 0.238, "step": 328100 }, { "epoch": 1.84, "learning_rate": 3.1557867754936845e-05, "loss": 0.2331, "step": 328200 }, { "epoch": 1.85, "learning_rate": 3.155224652467439e-05, "loss": 0.2401, "step": 328300 }, { "epoch": 1.85, "learning_rate": 3.154662529441194e-05, "loss": 0.2334, "step": 328400 }, { "epoch": 1.85, "learning_rate": 3.1541004064149484e-05, "loss": 0.2421, "step": 328500 }, { "epoch": 1.85, "learning_rate": 3.153538283388703e-05, "loss": 0.2398, "step": 328600 }, { "epoch": 1.85, "learning_rate": 3.152976160362457e-05, "loss": 0.2377, "step": 328700 }, { "epoch": 1.85, "learning_rate": 3.1524140373362116e-05, "loss": 0.2382, "step": 328800 }, { "epoch": 1.85, "learning_rate": 3.151851914309966e-05, "loss": 0.2429, "step": 328900 }, { "epoch": 1.85, "learning_rate": 3.15128979128372e-05, "loss": 0.2364, "step": 329000 }, { "epoch": 1.85, "learning_rate": 3.150727668257475e-05, "loss": 0.2475, "step": 329100 }, { "epoch": 1.85, "learning_rate": 3.1501655452312294e-05, "loss": 0.2459, "step": 329200 }, { "epoch": 1.85, "learning_rate": 3.149603422204984e-05, "loss": 0.2438, "step": 329300 }, { "epoch": 1.85, "learning_rate": 3.149041299178738e-05, "loss": 0.2337, "step": 329400 }, { "epoch": 1.85, "learning_rate": 3.1484791761524926e-05, "loss": 0.2466, "step": 329500 }, { "epoch": 1.85, "learning_rate": 3.14792267435651e-05, "loss": 0.2337, "step": 329600 }, { "epoch": 1.85, "learning_rate": 3.147360551330265e-05, "loss": 0.2458, "step": 329700 }, { "epoch": 1.85, "learning_rate": 3.1467984283040187e-05, "loss": 0.2398, "step": 329800 }, { "epoch": 1.85, "learning_rate": 3.146236305277773e-05, "loss": 0.2402, "step": 329900 }, { "epoch": 1.86, "learning_rate": 3.145674182251528e-05, "loss": 0.2357, "step": 330000 }, { "epoch": 1.86, "learning_rate": 3.145112059225282e-05, "loss": 0.244, "step": 330100 }, { "epoch": 1.86, "learning_rate": 3.1445499361990365e-05, "loss": 0.2412, "step": 330200 }, { "epoch": 1.86, "learning_rate": 3.143987813172791e-05, "loss": 0.2404, "step": 330300 }, { "epoch": 1.86, "learning_rate": 3.143425690146546e-05, "loss": 0.242, "step": 330400 }, { "epoch": 1.86, "learning_rate": 3.1428635671203e-05, "loss": 0.2446, "step": 330500 }, { "epoch": 1.86, "learning_rate": 3.142301444094054e-05, "loss": 0.2394, "step": 330600 }, { "epoch": 1.86, "learning_rate": 3.141739321067809e-05, "loss": 0.2422, "step": 330700 }, { "epoch": 1.86, "learning_rate": 3.1411771980415636e-05, "loss": 0.2411, "step": 330800 }, { "epoch": 1.86, "learning_rate": 3.140615075015318e-05, "loss": 0.2456, "step": 330900 }, { "epoch": 1.86, "learning_rate": 3.140052951989073e-05, "loss": 0.2393, "step": 331000 }, { "epoch": 1.86, "learning_rate": 3.1394908289628275e-05, "loss": 0.2388, "step": 331100 }, { "epoch": 1.86, "learning_rate": 3.1389287059365814e-05, "loss": 0.2396, "step": 331200 }, { "epoch": 1.86, "learning_rate": 3.138366582910336e-05, "loss": 0.2377, "step": 331300 }, { "epoch": 1.86, "learning_rate": 3.137804459884091e-05, "loss": 0.2469, "step": 331400 }, { "epoch": 1.86, "learning_rate": 3.1372423368578446e-05, "loss": 0.2399, "step": 331500 }, { "epoch": 1.86, "learning_rate": 3.136680213831599e-05, "loss": 0.234, "step": 331600 }, { "epoch": 1.86, "learning_rate": 3.136118090805354e-05, "loss": 0.2499, "step": 331700 }, { "epoch": 1.87, "learning_rate": 3.1355559677791085e-05, "loss": 0.2381, "step": 331800 }, { "epoch": 1.87, "learning_rate": 3.1349938447528625e-05, "loss": 0.2405, "step": 331900 }, { "epoch": 1.87, "learning_rate": 3.134431721726617e-05, "loss": 0.244, "step": 332000 }, { "epoch": 1.87, "learning_rate": 3.133869598700372e-05, "loss": 0.2419, "step": 332100 }, { "epoch": 1.87, "learning_rate": 3.1333074756741257e-05, "loss": 0.2356, "step": 332200 }, { "epoch": 1.87, "learning_rate": 3.13274535264788e-05, "loss": 0.2418, "step": 332300 }, { "epoch": 1.87, "learning_rate": 3.132183229621635e-05, "loss": 0.2434, "step": 332400 }, { "epoch": 1.87, "learning_rate": 3.1316211065953895e-05, "loss": 0.2328, "step": 332500 }, { "epoch": 1.87, "learning_rate": 3.131058983569144e-05, "loss": 0.2329, "step": 332600 }, { "epoch": 1.87, "learning_rate": 3.130496860542899e-05, "loss": 0.236, "step": 332700 }, { "epoch": 1.87, "learning_rate": 3.1299347375166534e-05, "loss": 0.2387, "step": 332800 }, { "epoch": 1.87, "learning_rate": 3.1293726144904074e-05, "loss": 0.245, "step": 332900 }, { "epoch": 1.87, "learning_rate": 3.128810491464162e-05, "loss": 0.2398, "step": 333000 }, { "epoch": 1.87, "learning_rate": 3.1282483684379166e-05, "loss": 0.2415, "step": 333100 }, { "epoch": 1.87, "learning_rate": 3.1276918666419334e-05, "loss": 0.2377, "step": 333200 }, { "epoch": 1.87, "learning_rate": 3.1271297436156873e-05, "loss": 0.2366, "step": 333300 }, { "epoch": 1.87, "learning_rate": 3.126567620589442e-05, "loss": 0.2385, "step": 333400 }, { "epoch": 1.87, "learning_rate": 3.126005497563197e-05, "loss": 0.2369, "step": 333500 }, { "epoch": 1.88, "learning_rate": 3.125443374536952e-05, "loss": 0.2402, "step": 333600 }, { "epoch": 1.88, "learning_rate": 3.124881251510706e-05, "loss": 0.244, "step": 333700 }, { "epoch": 1.88, "learning_rate": 3.1243191284844605e-05, "loss": 0.2433, "step": 333800 }, { "epoch": 1.88, "learning_rate": 3.123757005458215e-05, "loss": 0.2436, "step": 333900 }, { "epoch": 1.88, "learning_rate": 3.123194882431969e-05, "loss": 0.2359, "step": 334000 }, { "epoch": 1.88, "learning_rate": 3.122632759405724e-05, "loss": 0.2385, "step": 334100 }, { "epoch": 1.88, "learning_rate": 3.122070636379478e-05, "loss": 0.2406, "step": 334200 }, { "epoch": 1.88, "learning_rate": 3.121508513353233e-05, "loss": 0.2355, "step": 334300 }, { "epoch": 1.88, "learning_rate": 3.120946390326987e-05, "loss": 0.2377, "step": 334400 }, { "epoch": 1.88, "learning_rate": 3.1203842673007415e-05, "loss": 0.2415, "step": 334500 }, { "epoch": 1.88, "learning_rate": 3.119822144274496e-05, "loss": 0.2443, "step": 334600 }, { "epoch": 1.88, "learning_rate": 3.11926002124825e-05, "loss": 0.2367, "step": 334700 }, { "epoch": 1.88, "learning_rate": 3.118697898222005e-05, "loss": 0.2384, "step": 334800 }, { "epoch": 1.88, "learning_rate": 3.1181357751957594e-05, "loss": 0.2317, "step": 334900 }, { "epoch": 1.88, "learning_rate": 3.117573652169514e-05, "loss": 0.2357, "step": 335000 }, { "epoch": 1.88, "learning_rate": 3.1170115291432686e-05, "loss": 0.2438, "step": 335100 }, { "epoch": 1.88, "learning_rate": 3.116449406117023e-05, "loss": 0.2424, "step": 335200 }, { "epoch": 1.88, "learning_rate": 3.115887283090778e-05, "loss": 0.2358, "step": 335300 }, { "epoch": 1.89, "learning_rate": 3.115325160064532e-05, "loss": 0.2369, "step": 335400 }, { "epoch": 1.89, "learning_rate": 3.1147630370382864e-05, "loss": 0.2396, "step": 335500 }, { "epoch": 1.89, "learning_rate": 3.114200914012041e-05, "loss": 0.2381, "step": 335600 }, { "epoch": 1.89, "learning_rate": 3.113638790985795e-05, "loss": 0.24, "step": 335700 }, { "epoch": 1.89, "learning_rate": 3.1130766679595496e-05, "loss": 0.2407, "step": 335800 }, { "epoch": 1.89, "learning_rate": 3.112514544933304e-05, "loss": 0.2377, "step": 335900 }, { "epoch": 1.89, "learning_rate": 3.111952421907059e-05, "loss": 0.2417, "step": 336000 }, { "epoch": 1.89, "learning_rate": 3.111390298880813e-05, "loss": 0.2395, "step": 336100 }, { "epoch": 1.89, "learning_rate": 3.1108281758545675e-05, "loss": 0.2416, "step": 336200 }, { "epoch": 1.89, "learning_rate": 3.110266052828322e-05, "loss": 0.2327, "step": 336300 }, { "epoch": 1.89, "learning_rate": 3.1097095510323395e-05, "loss": 0.2326, "step": 336400 }, { "epoch": 1.89, "learning_rate": 3.1091474280060935e-05, "loss": 0.2343, "step": 336500 }, { "epoch": 1.89, "learning_rate": 3.108585304979848e-05, "loss": 0.2395, "step": 336600 }, { "epoch": 1.89, "learning_rate": 3.108023181953603e-05, "loss": 0.243, "step": 336700 }, { "epoch": 1.89, "learning_rate": 3.107461058927357e-05, "loss": 0.2302, "step": 336800 }, { "epoch": 1.89, "learning_rate": 3.106898935901111e-05, "loss": 0.2313, "step": 336900 }, { "epoch": 1.89, "learning_rate": 3.106336812874866e-05, "loss": 0.2367, "step": 337000 }, { "epoch": 1.89, "learning_rate": 3.1057746898486206e-05, "loss": 0.2414, "step": 337100 }, { "epoch": 1.9, "learning_rate": 3.1052125668223745e-05, "loss": 0.2358, "step": 337200 }, { "epoch": 1.9, "learning_rate": 3.104650443796129e-05, "loss": 0.2359, "step": 337300 }, { "epoch": 1.9, "learning_rate": 3.104088320769884e-05, "loss": 0.2402, "step": 337400 }, { "epoch": 1.9, "learning_rate": 3.103526197743638e-05, "loss": 0.2379, "step": 337500 }, { "epoch": 1.9, "learning_rate": 3.1029640747173924e-05, "loss": 0.2404, "step": 337600 }, { "epoch": 1.9, "learning_rate": 3.102401951691147e-05, "loss": 0.2381, "step": 337700 }, { "epoch": 1.9, "learning_rate": 3.101839828664902e-05, "loss": 0.2373, "step": 337800 }, { "epoch": 1.9, "learning_rate": 3.1012833268689184e-05, "loss": 0.2412, "step": 337900 }, { "epoch": 1.9, "learning_rate": 3.100721203842673e-05, "loss": 0.2387, "step": 338000 }, { "epoch": 1.9, "learning_rate": 3.1001590808164276e-05, "loss": 0.2345, "step": 338100 }, { "epoch": 1.9, "learning_rate": 3.099596957790182e-05, "loss": 0.2372, "step": 338200 }, { "epoch": 1.9, "learning_rate": 3.099034834763936e-05, "loss": 0.2416, "step": 338300 }, { "epoch": 1.9, "learning_rate": 3.098472711737691e-05, "loss": 0.2343, "step": 338400 }, { "epoch": 1.9, "learning_rate": 3.0979105887114455e-05, "loss": 0.2375, "step": 338500 }, { "epoch": 1.9, "learning_rate": 3.0973484656851994e-05, "loss": 0.2392, "step": 338600 }, { "epoch": 1.9, "learning_rate": 3.096786342658955e-05, "loss": 0.2438, "step": 338700 }, { "epoch": 1.9, "learning_rate": 3.0962242196327094e-05, "loss": 0.245, "step": 338800 }, { "epoch": 1.91, "learning_rate": 3.095662096606464e-05, "loss": 0.2371, "step": 338900 }, { "epoch": 1.91, "learning_rate": 3.095099973580218e-05, "loss": 0.2441, "step": 339000 }, { "epoch": 1.91, "learning_rate": 3.0945378505539726e-05, "loss": 0.239, "step": 339100 }, { "epoch": 1.91, "learning_rate": 3.093975727527727e-05, "loss": 0.2433, "step": 339200 }, { "epoch": 1.91, "learning_rate": 3.093413604501481e-05, "loss": 0.2446, "step": 339300 }, { "epoch": 1.91, "learning_rate": 3.092851481475236e-05, "loss": 0.2373, "step": 339400 }, { "epoch": 1.91, "learning_rate": 3.0922893584489904e-05, "loss": 0.2321, "step": 339500 }, { "epoch": 1.91, "learning_rate": 3.091727235422745e-05, "loss": 0.234, "step": 339600 }, { "epoch": 1.91, "learning_rate": 3.091165112396499e-05, "loss": 0.2369, "step": 339700 }, { "epoch": 1.91, "learning_rate": 3.0906029893702536e-05, "loss": 0.238, "step": 339800 }, { "epoch": 1.91, "learning_rate": 3.090040866344008e-05, "loss": 0.2322, "step": 339900 }, { "epoch": 1.91, "learning_rate": 3.089478743317762e-05, "loss": 0.2351, "step": 340000 }, { "epoch": 1.91, "learning_rate": 3.088916620291517e-05, "loss": 0.2356, "step": 340100 }, { "epoch": 1.91, "learning_rate": 3.088360118495534e-05, "loss": 0.2408, "step": 340200 }, { "epoch": 1.91, "learning_rate": 3.087797995469289e-05, "loss": 0.2354, "step": 340300 }, { "epoch": 1.91, "learning_rate": 3.087235872443043e-05, "loss": 0.2369, "step": 340400 }, { "epoch": 1.91, "learning_rate": 3.0866737494167975e-05, "loss": 0.2328, "step": 340500 }, { "epoch": 1.91, "learning_rate": 3.086111626390552e-05, "loss": 0.229, "step": 340600 }, { "epoch": 1.92, "learning_rate": 3.085549503364307e-05, "loss": 0.2349, "step": 340700 }, { "epoch": 1.92, "learning_rate": 3.0849873803380607e-05, "loss": 0.2318, "step": 340800 }, { "epoch": 1.92, "learning_rate": 3.084425257311815e-05, "loss": 0.2417, "step": 340900 }, { "epoch": 1.92, "learning_rate": 3.08386313428557e-05, "loss": 0.2436, "step": 341000 }, { "epoch": 1.92, "learning_rate": 3.083301011259324e-05, "loss": 0.2423, "step": 341100 }, { "epoch": 1.92, "learning_rate": 3.0827388882330785e-05, "loss": 0.2379, "step": 341200 }, { "epoch": 1.92, "learning_rate": 3.082176765206833e-05, "loss": 0.236, "step": 341300 }, { "epoch": 1.92, "learning_rate": 3.081614642180588e-05, "loss": 0.2332, "step": 341400 }, { "epoch": 1.92, "learning_rate": 3.0810525191543424e-05, "loss": 0.2377, "step": 341500 }, { "epoch": 1.92, "learning_rate": 3.080490396128097e-05, "loss": 0.2409, "step": 341600 }, { "epoch": 1.92, "learning_rate": 3.0799282731018516e-05, "loss": 0.2491, "step": 341700 }, { "epoch": 1.92, "learning_rate": 3.0793661500756056e-05, "loss": 0.2367, "step": 341800 }, { "epoch": 1.92, "learning_rate": 3.07880402704936e-05, "loss": 0.2399, "step": 341900 }, { "epoch": 1.92, "learning_rate": 3.078241904023115e-05, "loss": 0.2336, "step": 342000 }, { "epoch": 1.92, "learning_rate": 3.0776797809968695e-05, "loss": 0.2356, "step": 342100 }, { "epoch": 1.92, "learning_rate": 3.0771176579706234e-05, "loss": 0.2417, "step": 342200 }, { "epoch": 1.92, "learning_rate": 3.076555534944378e-05, "loss": 0.2341, "step": 342300 }, { "epoch": 1.92, "learning_rate": 3.075993411918133e-05, "loss": 0.2436, "step": 342400 }, { "epoch": 1.93, "learning_rate": 3.0754312888918866e-05, "loss": 0.2366, "step": 342500 }, { "epoch": 1.93, "learning_rate": 3.074869165865641e-05, "loss": 0.2398, "step": 342600 }, { "epoch": 1.93, "learning_rate": 3.074307042839396e-05, "loss": 0.2338, "step": 342700 }, { "epoch": 1.93, "learning_rate": 3.0737449198131505e-05, "loss": 0.2365, "step": 342800 }, { "epoch": 1.93, "learning_rate": 3.073182796786905e-05, "loss": 0.2303, "step": 342900 }, { "epoch": 1.93, "learning_rate": 3.07262067376066e-05, "loss": 0.2297, "step": 343000 }, { "epoch": 1.93, "learning_rate": 3.0720585507344144e-05, "loss": 0.2402, "step": 343100 }, { "epoch": 1.93, "learning_rate": 3.071496427708168e-05, "loss": 0.2325, "step": 343200 }, { "epoch": 1.93, "learning_rate": 3.070934304681923e-05, "loss": 0.2353, "step": 343300 }, { "epoch": 1.93, "learning_rate": 3.0703721816556776e-05, "loss": 0.2298, "step": 343400 }, { "epoch": 1.93, "learning_rate": 3.069810058629432e-05, "loss": 0.2404, "step": 343500 }, { "epoch": 1.93, "learning_rate": 3.069247935603186e-05, "loss": 0.2355, "step": 343600 }, { "epoch": 1.93, "learning_rate": 3.068685812576941e-05, "loss": 0.2383, "step": 343700 }, { "epoch": 1.93, "learning_rate": 3.0681236895506954e-05, "loss": 0.2383, "step": 343800 }, { "epoch": 1.93, "learning_rate": 3.0675615665244494e-05, "loss": 0.2278, "step": 343900 }, { "epoch": 1.93, "learning_rate": 3.066999443498204e-05, "loss": 0.2367, "step": 344000 }, { "epoch": 1.93, "learning_rate": 3.0664373204719586e-05, "loss": 0.2358, "step": 344100 }, { "epoch": 1.93, "learning_rate": 3.065875197445713e-05, "loss": 0.2329, "step": 344200 }, { "epoch": 1.94, "learning_rate": 3.065313074419467e-05, "loss": 0.2384, "step": 344300 }, { "epoch": 1.94, "learning_rate": 3.064750951393222e-05, "loss": 0.2368, "step": 344400 }, { "epoch": 1.94, "learning_rate": 3.0641888283669765e-05, "loss": 0.2377, "step": 344500 }, { "epoch": 1.94, "learning_rate": 3.063626705340731e-05, "loss": 0.2347, "step": 344600 }, { "epoch": 1.94, "learning_rate": 3.063064582314486e-05, "loss": 0.2403, "step": 344700 }, { "epoch": 1.94, "learning_rate": 3.0625024592882403e-05, "loss": 0.2415, "step": 344800 }, { "epoch": 1.94, "learning_rate": 3.061940336261995e-05, "loss": 0.2379, "step": 344900 }, { "epoch": 1.94, "learning_rate": 3.061378213235749e-05, "loss": 0.2364, "step": 345000 }, { "epoch": 1.94, "learning_rate": 3.0608160902095036e-05, "loss": 0.2333, "step": 345100 }, { "epoch": 1.94, "learning_rate": 3.060253967183258e-05, "loss": 0.2294, "step": 345200 }, { "epoch": 1.94, "learning_rate": 3.059691844157012e-05, "loss": 0.2311, "step": 345300 }, { "epoch": 1.94, "learning_rate": 3.059129721130767e-05, "loss": 0.2312, "step": 345400 }, { "epoch": 1.94, "learning_rate": 3.0585675981045214e-05, "loss": 0.2378, "step": 345500 }, { "epoch": 1.94, "learning_rate": 3.058005475078275e-05, "loss": 0.2381, "step": 345600 }, { "epoch": 1.94, "learning_rate": 3.05744335205203e-05, "loss": 0.234, "step": 345700 }, { "epoch": 1.94, "learning_rate": 3.0568812290257846e-05, "loss": 0.2381, "step": 345800 }, { "epoch": 1.94, "learning_rate": 3.056319105999539e-05, "loss": 0.2375, "step": 345900 }, { "epoch": 1.94, "learning_rate": 3.055756982973293e-05, "loss": 0.2411, "step": 346000 }, { "epoch": 1.95, "learning_rate": 3.055194859947048e-05, "loss": 0.2344, "step": 346100 }, { "epoch": 1.95, "learning_rate": 3.0546327369208024e-05, "loss": 0.2342, "step": 346200 }, { "epoch": 1.95, "learning_rate": 3.054070613894557e-05, "loss": 0.2338, "step": 346300 }, { "epoch": 1.95, "learning_rate": 3.053514112098574e-05, "loss": 0.2369, "step": 346400 }, { "epoch": 1.95, "learning_rate": 3.0529576103025906e-05, "loss": 0.2417, "step": 346500 }, { "epoch": 1.95, "learning_rate": 3.052395487276345e-05, "loss": 0.2324, "step": 346600 }, { "epoch": 1.95, "learning_rate": 3.0518333642501e-05, "loss": 0.2361, "step": 346700 }, { "epoch": 1.95, "learning_rate": 3.051271241223854e-05, "loss": 0.2384, "step": 346800 }, { "epoch": 1.95, "learning_rate": 3.0507091181976088e-05, "loss": 0.2304, "step": 346900 }, { "epoch": 1.95, "learning_rate": 3.0501469951713634e-05, "loss": 0.2275, "step": 347000 }, { "epoch": 1.95, "learning_rate": 3.0495848721451177e-05, "loss": 0.2382, "step": 347100 }, { "epoch": 1.95, "learning_rate": 3.0490227491188723e-05, "loss": 0.2307, "step": 347200 }, { "epoch": 1.95, "learning_rate": 3.048460626092627e-05, "loss": 0.232, "step": 347300 }, { "epoch": 1.95, "learning_rate": 3.0478985030663816e-05, "loss": 0.2325, "step": 347400 }, { "epoch": 1.95, "learning_rate": 3.0473363800401355e-05, "loss": 0.2315, "step": 347500 }, { "epoch": 1.95, "learning_rate": 3.046785499474415e-05, "loss": 0.2325, "step": 347600 }, { "epoch": 1.95, "learning_rate": 3.0462233764481697e-05, "loss": 0.2354, "step": 347700 }, { "epoch": 1.96, "learning_rate": 3.0456612534219243e-05, "loss": 0.2372, "step": 347800 }, { "epoch": 1.96, "learning_rate": 3.0450991303956783e-05, "loss": 0.2395, "step": 347900 }, { "epoch": 1.96, "learning_rate": 3.044537007369433e-05, "loss": 0.2413, "step": 348000 }, { "epoch": 1.96, "learning_rate": 3.0439748843431875e-05, "loss": 0.2409, "step": 348100 }, { "epoch": 1.96, "learning_rate": 3.043412761316942e-05, "loss": 0.2408, "step": 348200 }, { "epoch": 1.96, "learning_rate": 3.042850638290696e-05, "loss": 0.2399, "step": 348300 }, { "epoch": 1.96, "learning_rate": 3.0422885152644507e-05, "loss": 0.2339, "step": 348400 }, { "epoch": 1.96, "learning_rate": 3.0417263922382057e-05, "loss": 0.2383, "step": 348500 }, { "epoch": 1.96, "learning_rate": 3.0411642692119597e-05, "loss": 0.2363, "step": 348600 }, { "epoch": 1.96, "learning_rate": 3.0406021461857143e-05, "loss": 0.2344, "step": 348700 }, { "epoch": 1.96, "learning_rate": 3.040040023159469e-05, "loss": 0.2374, "step": 348800 }, { "epoch": 1.96, "learning_rate": 3.0394779001332235e-05, "loss": 0.2339, "step": 348900 }, { "epoch": 1.96, "learning_rate": 3.0389157771069775e-05, "loss": 0.2383, "step": 349000 }, { "epoch": 1.96, "learning_rate": 3.038353654080732e-05, "loss": 0.2335, "step": 349100 }, { "epoch": 1.96, "learning_rate": 3.0377915310544867e-05, "loss": 0.2407, "step": 349200 }, { "epoch": 1.96, "learning_rate": 3.037229408028241e-05, "loss": 0.2391, "step": 349300 }, { "epoch": 1.96, "learning_rate": 3.0366672850019957e-05, "loss": 0.2392, "step": 349400 }, { "epoch": 1.96, "learning_rate": 3.0361051619757503e-05, "loss": 0.2327, "step": 349500 }, { "epoch": 1.97, "learning_rate": 3.035543038949505e-05, "loss": 0.2351, "step": 349600 }, { "epoch": 1.97, "learning_rate": 3.034980915923259e-05, "loss": 0.2346, "step": 349700 }, { "epoch": 1.97, "learning_rate": 3.0344187928970135e-05, "loss": 0.2326, "step": 349800 }, { "epoch": 1.97, "learning_rate": 3.033856669870768e-05, "loss": 0.2372, "step": 349900 }, { "epoch": 1.97, "learning_rate": 3.0332945468445224e-05, "loss": 0.2356, "step": 350000 }, { "epoch": 1.97, "learning_rate": 3.032732423818277e-05, "loss": 0.2371, "step": 350100 }, { "epoch": 1.97, "learning_rate": 3.0321703007920317e-05, "loss": 0.2324, "step": 350200 }, { "epoch": 1.97, "learning_rate": 3.0316081777657863e-05, "loss": 0.2349, "step": 350300 }, { "epoch": 1.97, "learning_rate": 3.0310460547395402e-05, "loss": 0.2348, "step": 350400 }, { "epoch": 1.97, "learning_rate": 3.030483931713295e-05, "loss": 0.238, "step": 350500 }, { "epoch": 1.97, "learning_rate": 3.0299218086870495e-05, "loss": 0.2378, "step": 350600 }, { "epoch": 1.97, "learning_rate": 3.0293596856608038e-05, "loss": 0.2329, "step": 350700 }, { "epoch": 1.97, "learning_rate": 3.0287975626345584e-05, "loss": 0.231, "step": 350800 }, { "epoch": 1.97, "learning_rate": 3.028235439608313e-05, "loss": 0.2358, "step": 350900 }, { "epoch": 1.97, "learning_rate": 3.0276733165820677e-05, "loss": 0.2369, "step": 351000 }, { "epoch": 1.97, "learning_rate": 3.0271111935558216e-05, "loss": 0.2351, "step": 351100 }, { "epoch": 1.97, "learning_rate": 3.0265490705295763e-05, "loss": 0.2346, "step": 351200 }, { "epoch": 1.97, "learning_rate": 3.025986947503331e-05, "loss": 0.2354, "step": 351300 }, { "epoch": 1.98, "learning_rate": 3.025424824477085e-05, "loss": 0.2331, "step": 351400 }, { "epoch": 1.98, "learning_rate": 3.0248627014508395e-05, "loss": 0.2269, "step": 351500 }, { "epoch": 1.98, "learning_rate": 3.024300578424594e-05, "loss": 0.2363, "step": 351600 }, { "epoch": 1.98, "learning_rate": 3.0237384553983487e-05, "loss": 0.2449, "step": 351700 }, { "epoch": 1.98, "learning_rate": 3.023176332372103e-05, "loss": 0.2368, "step": 351800 }, { "epoch": 1.98, "learning_rate": 3.0226142093458576e-05, "loss": 0.233, "step": 351900 }, { "epoch": 1.98, "learning_rate": 3.0220520863196123e-05, "loss": 0.2363, "step": 352000 }, { "epoch": 1.98, "learning_rate": 3.0214899632933662e-05, "loss": 0.2342, "step": 352100 }, { "epoch": 1.98, "learning_rate": 3.020927840267121e-05, "loss": 0.234, "step": 352200 }, { "epoch": 1.98, "learning_rate": 3.0203657172408755e-05, "loss": 0.2313, "step": 352300 }, { "epoch": 1.98, "learning_rate": 3.01980359421463e-05, "loss": 0.2315, "step": 352400 }, { "epoch": 1.98, "learning_rate": 3.0192414711883844e-05, "loss": 0.2363, "step": 352500 }, { "epoch": 1.98, "learning_rate": 3.018679348162139e-05, "loss": 0.2344, "step": 352600 }, { "epoch": 1.98, "learning_rate": 3.0181172251358936e-05, "loss": 0.2396, "step": 352700 }, { "epoch": 1.98, "learning_rate": 3.0175551021096476e-05, "loss": 0.2384, "step": 352800 }, { "epoch": 1.98, "learning_rate": 3.0169929790834022e-05, "loss": 0.2323, "step": 352900 }, { "epoch": 1.98, "learning_rate": 3.016430856057157e-05, "loss": 0.2308, "step": 353000 }, { "epoch": 1.98, "learning_rate": 3.0158687330309115e-05, "loss": 0.2364, "step": 353100 }, { "epoch": 1.99, "learning_rate": 3.0153066100046658e-05, "loss": 0.2373, "step": 353200 }, { "epoch": 1.99, "learning_rate": 3.0147444869784204e-05, "loss": 0.232, "step": 353300 }, { "epoch": 1.99, "learning_rate": 3.014182363952175e-05, "loss": 0.2353, "step": 353400 }, { "epoch": 1.99, "learning_rate": 3.013620240925929e-05, "loss": 0.238, "step": 353500 }, { "epoch": 1.99, "learning_rate": 3.013063739129946e-05, "loss": 0.2407, "step": 353600 }, { "epoch": 1.99, "learning_rate": 3.0125016161037007e-05, "loss": 0.235, "step": 353700 }, { "epoch": 1.99, "learning_rate": 3.0119394930774553e-05, "loss": 0.236, "step": 353800 }, { "epoch": 1.99, "learning_rate": 3.0113773700512093e-05, "loss": 0.2359, "step": 353900 }, { "epoch": 1.99, "learning_rate": 3.010815247024964e-05, "loss": 0.2353, "step": 354000 }, { "epoch": 1.99, "learning_rate": 3.0102531239987185e-05, "loss": 0.2367, "step": 354100 }, { "epoch": 1.99, "learning_rate": 3.009691000972473e-05, "loss": 0.2382, "step": 354200 }, { "epoch": 1.99, "learning_rate": 3.0091288779462274e-05, "loss": 0.2398, "step": 354300 }, { "epoch": 1.99, "learning_rate": 3.008566754919982e-05, "loss": 0.2357, "step": 354400 }, { "epoch": 1.99, "learning_rate": 3.0080046318937367e-05, "loss": 0.2413, "step": 354500 }, { "epoch": 1.99, "learning_rate": 3.0074425088674906e-05, "loss": 0.2326, "step": 354600 }, { "epoch": 1.99, "learning_rate": 3.0068803858412453e-05, "loss": 0.2369, "step": 354700 }, { "epoch": 1.99, "learning_rate": 3.006318262815e-05, "loss": 0.2367, "step": 354800 }, { "epoch": 1.99, "learning_rate": 3.0057561397887545e-05, "loss": 0.2359, "step": 354900 }, { "epoch": 2.0, "learning_rate": 3.0051940167625088e-05, "loss": 0.2348, "step": 355000 }, { "epoch": 2.0, "learning_rate": 3.0046318937362634e-05, "loss": 0.2296, "step": 355100 }, { "epoch": 2.0, "learning_rate": 3.004069770710018e-05, "loss": 0.2334, "step": 355200 }, { "epoch": 2.0, "learning_rate": 3.003507647683772e-05, "loss": 0.2412, "step": 355300 }, { "epoch": 2.0, "learning_rate": 3.0029455246575267e-05, "loss": 0.2393, "step": 355400 }, { "epoch": 2.0, "learning_rate": 3.0023834016312813e-05, "loss": 0.235, "step": 355500 }, { "epoch": 2.0, "learning_rate": 3.0018212786050352e-05, "loss": 0.2321, "step": 355600 }, { "epoch": 2.0, "learning_rate": 3.00125915557879e-05, "loss": 0.233, "step": 355700 }, { "epoch": 2.0, "eval_bleu": 75.8819, "eval_cer": 2.4613, "eval_chrF": 95.00769603599446, "eval_gen_len": 16.760384, "eval_loss": 0.5156659483909607, "eval_runtime": 7263.3248, "eval_samples_per_second": 34.419, "eval_steps_per_second": 0.538, "eval_wer": 13.4362, "step": 355794 }, { "epoch": 2.0, "learning_rate": 3.000702653782807e-05, "loss": 0.2436, "step": 355800 }, { "epoch": 2.0, "learning_rate": 3.0001405307565616e-05, "loss": 0.2181, "step": 355900 }, { "epoch": 2.0, "learning_rate": 2.999578407730316e-05, "loss": 0.2178, "step": 356000 }, { "epoch": 2.0, "learning_rate": 2.9990162847040705e-05, "loss": 0.2193, "step": 356100 }, { "epoch": 2.0, "learning_rate": 2.998454161677825e-05, "loss": 0.2147, "step": 356200 }, { "epoch": 2.0, "learning_rate": 2.9978920386515798e-05, "loss": 0.2212, "step": 356300 }, { "epoch": 2.0, "learning_rate": 2.9973299156253337e-05, "loss": 0.2285, "step": 356400 }, { "epoch": 2.0, "learning_rate": 2.9967677925990883e-05, "loss": 0.2124, "step": 356500 }, { "epoch": 2.0, "learning_rate": 2.996205669572843e-05, "loss": 0.2141, "step": 356600 }, { "epoch": 2.01, "learning_rate": 2.995643546546597e-05, "loss": 0.2214, "step": 356700 }, { "epoch": 2.01, "learning_rate": 2.9950814235203515e-05, "loss": 0.2103, "step": 356800 }, { "epoch": 2.01, "learning_rate": 2.9945193004941062e-05, "loss": 0.2223, "step": 356900 }, { "epoch": 2.01, "learning_rate": 2.9939627986981233e-05, "loss": 0.2166, "step": 357000 }, { "epoch": 2.01, "learning_rate": 2.9934006756718776e-05, "loss": 0.2146, "step": 357100 }, { "epoch": 2.01, "learning_rate": 2.9928385526456322e-05, "loss": 0.2239, "step": 357200 }, { "epoch": 2.01, "learning_rate": 2.9922764296193868e-05, "loss": 0.2098, "step": 357300 }, { "epoch": 2.01, "learning_rate": 2.9917143065931414e-05, "loss": 0.2149, "step": 357400 }, { "epoch": 2.01, "learning_rate": 2.991157804797158e-05, "loss": 0.2201, "step": 357500 }, { "epoch": 2.01, "learning_rate": 2.9905956817709125e-05, "loss": 0.2205, "step": 357600 }, { "epoch": 2.01, "learning_rate": 2.990033558744667e-05, "loss": 0.2084, "step": 357700 }, { "epoch": 2.01, "learning_rate": 2.9894714357184218e-05, "loss": 0.2166, "step": 357800 }, { "epoch": 2.01, "learning_rate": 2.9889093126921757e-05, "loss": 0.2145, "step": 357900 }, { "epoch": 2.01, "learning_rate": 2.9883471896659303e-05, "loss": 0.2223, "step": 358000 }, { "epoch": 2.01, "learning_rate": 2.987785066639685e-05, "loss": 0.2151, "step": 358100 }, { "epoch": 2.01, "learning_rate": 2.9872229436134392e-05, "loss": 0.2164, "step": 358200 }, { "epoch": 2.01, "learning_rate": 2.986660820587194e-05, "loss": 0.2185, "step": 358300 }, { "epoch": 2.01, "learning_rate": 2.9860986975609485e-05, "loss": 0.2262, "step": 358400 }, { "epoch": 2.02, "learning_rate": 2.985536574534703e-05, "loss": 0.2157, "step": 358500 }, { "epoch": 2.02, "learning_rate": 2.984974451508457e-05, "loss": 0.2182, "step": 358600 }, { "epoch": 2.02, "learning_rate": 2.9844123284822117e-05, "loss": 0.2131, "step": 358700 }, { "epoch": 2.02, "learning_rate": 2.9838502054559663e-05, "loss": 0.2243, "step": 358800 }, { "epoch": 2.02, "learning_rate": 2.9832880824297206e-05, "loss": 0.2167, "step": 358900 }, { "epoch": 2.02, "learning_rate": 2.9827259594034753e-05, "loss": 0.2195, "step": 359000 }, { "epoch": 2.02, "learning_rate": 2.98216383637723e-05, "loss": 0.2196, "step": 359100 }, { "epoch": 2.02, "learning_rate": 2.9816017133509845e-05, "loss": 0.2209, "step": 359200 }, { "epoch": 2.02, "learning_rate": 2.9810395903247385e-05, "loss": 0.2255, "step": 359300 }, { "epoch": 2.02, "learning_rate": 2.980477467298493e-05, "loss": 0.2166, "step": 359400 }, { "epoch": 2.02, "learning_rate": 2.9799153442722477e-05, "loss": 0.215, "step": 359500 }, { "epoch": 2.02, "learning_rate": 2.9793532212460017e-05, "loss": 0.2123, "step": 359600 }, { "epoch": 2.02, "learning_rate": 2.9787910982197563e-05, "loss": 0.2188, "step": 359700 }, { "epoch": 2.02, "learning_rate": 2.978228975193511e-05, "loss": 0.2118, "step": 359800 }, { "epoch": 2.02, "learning_rate": 2.9776668521672655e-05, "loss": 0.2188, "step": 359900 }, { "epoch": 2.02, "learning_rate": 2.97710472914102e-05, "loss": 0.2173, "step": 360000 }, { "epoch": 2.02, "learning_rate": 2.9765426061147745e-05, "loss": 0.2238, "step": 360100 }, { "epoch": 2.02, "learning_rate": 2.975980483088529e-05, "loss": 0.2238, "step": 360200 }, { "epoch": 2.03, "learning_rate": 2.975418360062283e-05, "loss": 0.2116, "step": 360300 }, { "epoch": 2.03, "learning_rate": 2.9748562370360377e-05, "loss": 0.2181, "step": 360400 }, { "epoch": 2.03, "learning_rate": 2.9742941140097923e-05, "loss": 0.214, "step": 360500 }, { "epoch": 2.03, "learning_rate": 2.973731990983547e-05, "loss": 0.2229, "step": 360600 }, { "epoch": 2.03, "learning_rate": 2.9731698679573012e-05, "loss": 0.2103, "step": 360700 }, { "epoch": 2.03, "learning_rate": 2.972607744931056e-05, "loss": 0.2167, "step": 360800 }, { "epoch": 2.03, "learning_rate": 2.9720456219048105e-05, "loss": 0.2221, "step": 360900 }, { "epoch": 2.03, "learning_rate": 2.9714834988785644e-05, "loss": 0.2279, "step": 361000 }, { "epoch": 2.03, "learning_rate": 2.970921375852319e-05, "loss": 0.2226, "step": 361100 }, { "epoch": 2.03, "learning_rate": 2.9703592528260737e-05, "loss": 0.2198, "step": 361200 }, { "epoch": 2.03, "learning_rate": 2.9697971297998283e-05, "loss": 0.2139, "step": 361300 }, { "epoch": 2.03, "learning_rate": 2.9692350067735826e-05, "loss": 0.2148, "step": 361400 }, { "epoch": 2.03, "learning_rate": 2.9686728837473372e-05, "loss": 0.2192, "step": 361500 }, { "epoch": 2.03, "learning_rate": 2.968110760721092e-05, "loss": 0.2172, "step": 361600 }, { "epoch": 2.03, "learning_rate": 2.9675486376948458e-05, "loss": 0.2141, "step": 361700 }, { "epoch": 2.03, "learning_rate": 2.9669865146686004e-05, "loss": 0.2147, "step": 361800 }, { "epoch": 2.03, "learning_rate": 2.966424391642355e-05, "loss": 0.2136, "step": 361900 }, { "epoch": 2.03, "learning_rate": 2.9658622686161097e-05, "loss": 0.2186, "step": 362000 }, { "epoch": 2.04, "learning_rate": 2.9653001455898636e-05, "loss": 0.2196, "step": 362100 }, { "epoch": 2.04, "learning_rate": 2.9647380225636186e-05, "loss": 0.2231, "step": 362200 }, { "epoch": 2.04, "learning_rate": 2.9641758995373732e-05, "loss": 0.217, "step": 362300 }, { "epoch": 2.04, "learning_rate": 2.9636137765111272e-05, "loss": 0.2191, "step": 362400 }, { "epoch": 2.04, "learning_rate": 2.9630516534848818e-05, "loss": 0.2173, "step": 362500 }, { "epoch": 2.04, "learning_rate": 2.9624895304586364e-05, "loss": 0.221, "step": 362600 }, { "epoch": 2.04, "learning_rate": 2.961927407432391e-05, "loss": 0.2139, "step": 362700 }, { "epoch": 2.04, "learning_rate": 2.961365284406145e-05, "loss": 0.2209, "step": 362800 }, { "epoch": 2.04, "learning_rate": 2.9608031613798996e-05, "loss": 0.2124, "step": 362900 }, { "epoch": 2.04, "learning_rate": 2.9602410383536543e-05, "loss": 0.2184, "step": 363000 }, { "epoch": 2.04, "learning_rate": 2.9596789153274086e-05, "loss": 0.2162, "step": 363100 }, { "epoch": 2.04, "learning_rate": 2.9591167923011632e-05, "loss": 0.2219, "step": 363200 }, { "epoch": 2.04, "learning_rate": 2.9585546692749178e-05, "loss": 0.2223, "step": 363300 }, { "epoch": 2.04, "learning_rate": 2.9579925462486724e-05, "loss": 0.2161, "step": 363400 }, { "epoch": 2.04, "learning_rate": 2.9574304232224264e-05, "loss": 0.2126, "step": 363500 }, { "epoch": 2.04, "learning_rate": 2.956868300196181e-05, "loss": 0.2232, "step": 363600 }, { "epoch": 2.04, "learning_rate": 2.9563061771699356e-05, "loss": 0.2226, "step": 363700 }, { "epoch": 2.05, "learning_rate": 2.95574405414369e-05, "loss": 0.2184, "step": 363800 }, { "epoch": 2.05, "learning_rate": 2.9551819311174446e-05, "loss": 0.214, "step": 363900 }, { "epoch": 2.05, "learning_rate": 2.9546198080911992e-05, "loss": 0.2195, "step": 364000 }, { "epoch": 2.05, "learning_rate": 2.9540576850649538e-05, "loss": 0.2162, "step": 364100 }, { "epoch": 2.05, "learning_rate": 2.9534955620387078e-05, "loss": 0.2235, "step": 364200 }, { "epoch": 2.05, "learning_rate": 2.9529334390124624e-05, "loss": 0.2216, "step": 364300 }, { "epoch": 2.05, "learning_rate": 2.952371315986217e-05, "loss": 0.2198, "step": 364400 }, { "epoch": 2.05, "learning_rate": 2.9518091929599713e-05, "loss": 0.218, "step": 364500 }, { "epoch": 2.05, "learning_rate": 2.951247069933726e-05, "loss": 0.216, "step": 364600 }, { "epoch": 2.05, "learning_rate": 2.9506849469074806e-05, "loss": 0.2175, "step": 364700 }, { "epoch": 2.05, "learning_rate": 2.9501228238812352e-05, "loss": 0.2211, "step": 364800 }, { "epoch": 2.05, "learning_rate": 2.949560700854989e-05, "loss": 0.2117, "step": 364900 }, { "epoch": 2.05, "learning_rate": 2.9489985778287438e-05, "loss": 0.2167, "step": 365000 }, { "epoch": 2.05, "learning_rate": 2.9484364548024984e-05, "loss": 0.2224, "step": 365100 }, { "epoch": 2.05, "learning_rate": 2.9478743317762523e-05, "loss": 0.2141, "step": 365200 }, { "epoch": 2.05, "learning_rate": 2.947312208750007e-05, "loss": 0.2191, "step": 365300 }, { "epoch": 2.05, "learning_rate": 2.9467500857237616e-05, "loss": 0.2132, "step": 365400 }, { "epoch": 2.05, "learning_rate": 2.946187962697516e-05, "loss": 0.2224, "step": 365500 }, { "epoch": 2.06, "learning_rate": 2.9456258396712705e-05, "loss": 0.217, "step": 365600 }, { "epoch": 2.06, "learning_rate": 2.945063716645025e-05, "loss": 0.225, "step": 365700 }, { "epoch": 2.06, "learning_rate": 2.9445015936187798e-05, "loss": 0.2209, "step": 365800 }, { "epoch": 2.06, "learning_rate": 2.9439394705925337e-05, "loss": 0.214, "step": 365900 }, { "epoch": 2.06, "learning_rate": 2.9433773475662884e-05, "loss": 0.2169, "step": 366000 }, { "epoch": 2.06, "learning_rate": 2.9428208457703055e-05, "loss": 0.2187, "step": 366100 }, { "epoch": 2.06, "learning_rate": 2.94225872274406e-05, "loss": 0.218, "step": 366200 }, { "epoch": 2.06, "learning_rate": 2.9417022209480765e-05, "loss": 0.2112, "step": 366300 }, { "epoch": 2.06, "learning_rate": 2.941140097921831e-05, "loss": 0.2188, "step": 366400 }, { "epoch": 2.06, "learning_rate": 2.9405779748955858e-05, "loss": 0.222, "step": 366500 }, { "epoch": 2.06, "learning_rate": 2.9400158518693404e-05, "loss": 0.2174, "step": 366600 }, { "epoch": 2.06, "learning_rate": 2.9394537288430947e-05, "loss": 0.2153, "step": 366700 }, { "epoch": 2.06, "learning_rate": 2.9388916058168493e-05, "loss": 0.2215, "step": 366800 }, { "epoch": 2.06, "learning_rate": 2.938329482790604e-05, "loss": 0.2205, "step": 366900 }, { "epoch": 2.06, "learning_rate": 2.937767359764358e-05, "loss": 0.2224, "step": 367000 }, { "epoch": 2.06, "learning_rate": 2.9372052367381125e-05, "loss": 0.2211, "step": 367100 }, { "epoch": 2.06, "learning_rate": 2.936643113711867e-05, "loss": 0.2165, "step": 367200 }, { "epoch": 2.06, "learning_rate": 2.9360809906856218e-05, "loss": 0.217, "step": 367300 }, { "epoch": 2.07, "learning_rate": 2.935518867659376e-05, "loss": 0.2164, "step": 367400 }, { "epoch": 2.07, "learning_rate": 2.9349567446331307e-05, "loss": 0.2196, "step": 367500 }, { "epoch": 2.07, "learning_rate": 2.9343946216068853e-05, "loss": 0.2224, "step": 367600 }, { "epoch": 2.07, "learning_rate": 2.9338324985806393e-05, "loss": 0.2124, "step": 367700 }, { "epoch": 2.07, "learning_rate": 2.933270375554394e-05, "loss": 0.2222, "step": 367800 }, { "epoch": 2.07, "learning_rate": 2.9327082525281485e-05, "loss": 0.2176, "step": 367900 }, { "epoch": 2.07, "learning_rate": 2.932146129501903e-05, "loss": 0.2197, "step": 368000 }, { "epoch": 2.07, "learning_rate": 2.931584006475657e-05, "loss": 0.2268, "step": 368100 }, { "epoch": 2.07, "learning_rate": 2.9310218834494117e-05, "loss": 0.2092, "step": 368200 }, { "epoch": 2.07, "learning_rate": 2.9304597604231663e-05, "loss": 0.2198, "step": 368300 }, { "epoch": 2.07, "learning_rate": 2.9298976373969206e-05, "loss": 0.2228, "step": 368400 }, { "epoch": 2.07, "learning_rate": 2.9293355143706753e-05, "loss": 0.2116, "step": 368500 }, { "epoch": 2.07, "learning_rate": 2.92877339134443e-05, "loss": 0.216, "step": 368600 }, { "epoch": 2.07, "learning_rate": 2.9282112683181845e-05, "loss": 0.2183, "step": 368700 }, { "epoch": 2.07, "learning_rate": 2.9276491452919385e-05, "loss": 0.2208, "step": 368800 }, { "epoch": 2.07, "learning_rate": 2.927087022265693e-05, "loss": 0.2214, "step": 368900 }, { "epoch": 2.07, "learning_rate": 2.9265248992394477e-05, "loss": 0.2206, "step": 369000 }, { "epoch": 2.07, "learning_rate": 2.925962776213202e-05, "loss": 0.2173, "step": 369100 }, { "epoch": 2.08, "learning_rate": 2.9254006531869566e-05, "loss": 0.2214, "step": 369200 }, { "epoch": 2.08, "learning_rate": 2.9248385301607113e-05, "loss": 0.2198, "step": 369300 }, { "epoch": 2.08, "learning_rate": 2.924276407134466e-05, "loss": 0.2225, "step": 369400 }, { "epoch": 2.08, "learning_rate": 2.92371428410822e-05, "loss": 0.2149, "step": 369500 }, { "epoch": 2.08, "learning_rate": 2.9231521610819745e-05, "loss": 0.2143, "step": 369600 }, { "epoch": 2.08, "learning_rate": 2.922590038055729e-05, "loss": 0.2221, "step": 369700 }, { "epoch": 2.08, "learning_rate": 2.9220279150294834e-05, "loss": 0.2244, "step": 369800 }, { "epoch": 2.08, "learning_rate": 2.921465792003238e-05, "loss": 0.2182, "step": 369900 }, { "epoch": 2.08, "learning_rate": 2.9209149114375173e-05, "loss": 0.211, "step": 370000 }, { "epoch": 2.08, "learning_rate": 2.920352788411272e-05, "loss": 0.2209, "step": 370100 }, { "epoch": 2.08, "learning_rate": 2.9197906653850265e-05, "loss": 0.2224, "step": 370200 }, { "epoch": 2.08, "learning_rate": 2.9192285423587805e-05, "loss": 0.2151, "step": 370300 }, { "epoch": 2.08, "learning_rate": 2.9186664193325354e-05, "loss": 0.2151, "step": 370400 }, { "epoch": 2.08, "learning_rate": 2.91810429630629e-05, "loss": 0.2185, "step": 370500 }, { "epoch": 2.08, "learning_rate": 2.917542173280044e-05, "loss": 0.2233, "step": 370600 }, { "epoch": 2.08, "learning_rate": 2.9169800502537986e-05, "loss": 0.2206, "step": 370700 }, { "epoch": 2.08, "learning_rate": 2.9164179272275533e-05, "loss": 0.2181, "step": 370800 }, { "epoch": 2.08, "learning_rate": 2.915855804201308e-05, "loss": 0.2146, "step": 370900 }, { "epoch": 2.09, "learning_rate": 2.915293681175062e-05, "loss": 0.2212, "step": 371000 }, { "epoch": 2.09, "learning_rate": 2.9147315581488165e-05, "loss": 0.2166, "step": 371100 }, { "epoch": 2.09, "learning_rate": 2.914169435122571e-05, "loss": 0.2151, "step": 371200 }, { "epoch": 2.09, "learning_rate": 2.9136073120963254e-05, "loss": 0.2113, "step": 371300 }, { "epoch": 2.09, "learning_rate": 2.91304518907008e-05, "loss": 0.2162, "step": 371400 }, { "epoch": 2.09, "learning_rate": 2.9124830660438346e-05, "loss": 0.2225, "step": 371500 }, { "epoch": 2.09, "learning_rate": 2.9119209430175893e-05, "loss": 0.2138, "step": 371600 }, { "epoch": 2.09, "learning_rate": 2.9113588199913432e-05, "loss": 0.2229, "step": 371700 }, { "epoch": 2.09, "learning_rate": 2.910796696965098e-05, "loss": 0.2242, "step": 371800 }, { "epoch": 2.09, "learning_rate": 2.9102345739388525e-05, "loss": 0.2165, "step": 371900 }, { "epoch": 2.09, "learning_rate": 2.9096724509126068e-05, "loss": 0.2092, "step": 372000 }, { "epoch": 2.09, "learning_rate": 2.9091103278863614e-05, "loss": 0.2175, "step": 372100 }, { "epoch": 2.09, "learning_rate": 2.908548204860116e-05, "loss": 0.2235, "step": 372200 }, { "epoch": 2.09, "learning_rate": 2.9079860818338706e-05, "loss": 0.2122, "step": 372300 }, { "epoch": 2.09, "learning_rate": 2.9074239588076246e-05, "loss": 0.2229, "step": 372400 }, { "epoch": 2.09, "learning_rate": 2.9068618357813792e-05, "loss": 0.2175, "step": 372500 }, { "epoch": 2.09, "learning_rate": 2.906299712755134e-05, "loss": 0.2257, "step": 372600 }, { "epoch": 2.1, "learning_rate": 2.905737589728888e-05, "loss": 0.2153, "step": 372700 }, { "epoch": 2.1, "learning_rate": 2.9051754667026428e-05, "loss": 0.2219, "step": 372800 }, { "epoch": 2.1, "learning_rate": 2.9046133436763974e-05, "loss": 0.2168, "step": 372900 }, { "epoch": 2.1, "learning_rate": 2.904051220650152e-05, "loss": 0.2173, "step": 373000 }, { "epoch": 2.1, "learning_rate": 2.903489097623906e-05, "loss": 0.217, "step": 373100 }, { "epoch": 2.1, "learning_rate": 2.9029269745976606e-05, "loss": 0.221, "step": 373200 }, { "epoch": 2.1, "learning_rate": 2.9023648515714152e-05, "loss": 0.2098, "step": 373300 }, { "epoch": 2.1, "learning_rate": 2.9018027285451692e-05, "loss": 0.2169, "step": 373400 }, { "epoch": 2.1, "learning_rate": 2.9012406055189238e-05, "loss": 0.2177, "step": 373500 }, { "epoch": 2.1, "learning_rate": 2.9006784824926784e-05, "loss": 0.2218, "step": 373600 }, { "epoch": 2.1, "learning_rate": 2.9001163594664334e-05, "loss": 0.2132, "step": 373700 }, { "epoch": 2.1, "learning_rate": 2.8995542364401873e-05, "loss": 0.2187, "step": 373800 }, { "epoch": 2.1, "learning_rate": 2.898992113413942e-05, "loss": 0.219, "step": 373900 }, { "epoch": 2.1, "learning_rate": 2.8984299903876966e-05, "loss": 0.215, "step": 374000 }, { "epoch": 2.1, "learning_rate": 2.8978678673614506e-05, "loss": 0.2203, "step": 374100 }, { "epoch": 2.1, "learning_rate": 2.8973057443352052e-05, "loss": 0.2138, "step": 374200 }, { "epoch": 2.1, "learning_rate": 2.8967436213089598e-05, "loss": 0.2136, "step": 374300 }, { "epoch": 2.1, "learning_rate": 2.8961814982827144e-05, "loss": 0.2228, "step": 374400 }, { "epoch": 2.11, "learning_rate": 2.8956193752564687e-05, "loss": 0.2182, "step": 374500 }, { "epoch": 2.11, "learning_rate": 2.8950572522302234e-05, "loss": 0.2223, "step": 374600 }, { "epoch": 2.11, "learning_rate": 2.894495129203978e-05, "loss": 0.2196, "step": 374700 }, { "epoch": 2.11, "learning_rate": 2.893933006177732e-05, "loss": 0.2279, "step": 374800 }, { "epoch": 2.11, "learning_rate": 2.8933708831514866e-05, "loss": 0.2204, "step": 374900 }, { "epoch": 2.11, "learning_rate": 2.8928087601252412e-05, "loss": 0.2177, "step": 375000 }, { "epoch": 2.11, "learning_rate": 2.8922466370989955e-05, "loss": 0.2188, "step": 375100 }, { "epoch": 2.11, "learning_rate": 2.89168451407275e-05, "loss": 0.2192, "step": 375200 }, { "epoch": 2.11, "learning_rate": 2.8911223910465047e-05, "loss": 0.2201, "step": 375300 }, { "epoch": 2.11, "learning_rate": 2.8905602680202594e-05, "loss": 0.212, "step": 375400 }, { "epoch": 2.11, "learning_rate": 2.8899981449940133e-05, "loss": 0.2185, "step": 375500 }, { "epoch": 2.11, "learning_rate": 2.889436021967768e-05, "loss": 0.2228, "step": 375600 }, { "epoch": 2.11, "learning_rate": 2.8888738989415226e-05, "loss": 0.2205, "step": 375700 }, { "epoch": 2.11, "learning_rate": 2.888311775915277e-05, "loss": 0.2168, "step": 375800 }, { "epoch": 2.11, "learning_rate": 2.8877496528890315e-05, "loss": 0.2162, "step": 375900 }, { "epoch": 2.11, "learning_rate": 2.887187529862786e-05, "loss": 0.2161, "step": 376000 }, { "epoch": 2.11, "learning_rate": 2.8866254068365407e-05, "loss": 0.2165, "step": 376100 }, { "epoch": 2.11, "learning_rate": 2.8860632838102947e-05, "loss": 0.2254, "step": 376200 }, { "epoch": 2.12, "learning_rate": 2.8855011607840493e-05, "loss": 0.2199, "step": 376300 }, { "epoch": 2.12, "learning_rate": 2.884939037757804e-05, "loss": 0.2211, "step": 376400 }, { "epoch": 2.12, "learning_rate": 2.884382535961821e-05, "loss": 0.2202, "step": 376500 }, { "epoch": 2.12, "learning_rate": 2.883820412935575e-05, "loss": 0.219, "step": 376600 }, { "epoch": 2.12, "learning_rate": 2.8832582899093296e-05, "loss": 0.2123, "step": 376700 }, { "epoch": 2.12, "learning_rate": 2.8826961668830843e-05, "loss": 0.2198, "step": 376800 }, { "epoch": 2.12, "learning_rate": 2.8821340438568385e-05, "loss": 0.22, "step": 376900 }, { "epoch": 2.12, "learning_rate": 2.881571920830593e-05, "loss": 0.2171, "step": 377000 }, { "epoch": 2.12, "learning_rate": 2.88101541903461e-05, "loss": 0.2236, "step": 377100 }, { "epoch": 2.12, "learning_rate": 2.8804532960083646e-05, "loss": 0.2204, "step": 377200 }, { "epoch": 2.12, "learning_rate": 2.879891172982119e-05, "loss": 0.2155, "step": 377300 }, { "epoch": 2.12, "learning_rate": 2.8793290499558735e-05, "loss": 0.2219, "step": 377400 }, { "epoch": 2.12, "learning_rate": 2.878766926929628e-05, "loss": 0.211, "step": 377500 }, { "epoch": 2.12, "learning_rate": 2.8782048039033827e-05, "loss": 0.2136, "step": 377600 }, { "epoch": 2.12, "learning_rate": 2.8776426808771367e-05, "loss": 0.2169, "step": 377700 }, { "epoch": 2.12, "learning_rate": 2.8770805578508913e-05, "loss": 0.2116, "step": 377800 }, { "epoch": 2.12, "learning_rate": 2.876518434824646e-05, "loss": 0.22, "step": 377900 }, { "epoch": 2.12, "learning_rate": 2.8759563117984002e-05, "loss": 0.218, "step": 378000 }, { "epoch": 2.13, "learning_rate": 2.875394188772155e-05, "loss": 0.2139, "step": 378100 }, { "epoch": 2.13, "learning_rate": 2.8748320657459095e-05, "loss": 0.2088, "step": 378200 }, { "epoch": 2.13, "learning_rate": 2.874269942719664e-05, "loss": 0.2196, "step": 378300 }, { "epoch": 2.13, "learning_rate": 2.873707819693418e-05, "loss": 0.2175, "step": 378400 }, { "epoch": 2.13, "learning_rate": 2.8731456966671727e-05, "loss": 0.2214, "step": 378500 }, { "epoch": 2.13, "learning_rate": 2.8725835736409273e-05, "loss": 0.2189, "step": 378600 }, { "epoch": 2.13, "learning_rate": 2.8720214506146813e-05, "loss": 0.2203, "step": 378700 }, { "epoch": 2.13, "learning_rate": 2.871459327588436e-05, "loss": 0.2195, "step": 378800 }, { "epoch": 2.13, "learning_rate": 2.870897204562191e-05, "loss": 0.2184, "step": 378900 }, { "epoch": 2.13, "learning_rate": 2.8703350815359455e-05, "loss": 0.2179, "step": 379000 }, { "epoch": 2.13, "learning_rate": 2.8697729585096994e-05, "loss": 0.2144, "step": 379100 }, { "epoch": 2.13, "learning_rate": 2.869210835483454e-05, "loss": 0.2096, "step": 379200 }, { "epoch": 2.13, "learning_rate": 2.8686487124572087e-05, "loss": 0.2212, "step": 379300 }, { "epoch": 2.13, "learning_rate": 2.8680865894309626e-05, "loss": 0.2197, "step": 379400 }, { "epoch": 2.13, "learning_rate": 2.8675244664047173e-05, "loss": 0.2081, "step": 379500 }, { "epoch": 2.13, "learning_rate": 2.866962343378472e-05, "loss": 0.2174, "step": 379600 }, { "epoch": 2.13, "learning_rate": 2.8664002203522265e-05, "loss": 0.2256, "step": 379700 }, { "epoch": 2.13, "learning_rate": 2.8658380973259808e-05, "loss": 0.2201, "step": 379800 }, { "epoch": 2.14, "learning_rate": 2.8652759742997354e-05, "loss": 0.2256, "step": 379900 }, { "epoch": 2.14, "learning_rate": 2.86471385127349e-05, "loss": 0.2161, "step": 380000 }, { "epoch": 2.14, "learning_rate": 2.864151728247244e-05, "loss": 0.2228, "step": 380100 }, { "epoch": 2.14, "learning_rate": 2.863595226451261e-05, "loss": 0.2249, "step": 380200 }, { "epoch": 2.14, "learning_rate": 2.8630331034250157e-05, "loss": 0.2208, "step": 380300 }, { "epoch": 2.14, "learning_rate": 2.8624709803987704e-05, "loss": 0.2148, "step": 380400 }, { "epoch": 2.14, "learning_rate": 2.8619088573725243e-05, "loss": 0.2201, "step": 380500 }, { "epoch": 2.14, "learning_rate": 2.861346734346279e-05, "loss": 0.2172, "step": 380600 }, { "epoch": 2.14, "learning_rate": 2.8607846113200336e-05, "loss": 0.2181, "step": 380700 }, { "epoch": 2.14, "learning_rate": 2.8602224882937882e-05, "loss": 0.2186, "step": 380800 }, { "epoch": 2.14, "learning_rate": 2.8596603652675425e-05, "loss": 0.2207, "step": 380900 }, { "epoch": 2.14, "learning_rate": 2.859098242241297e-05, "loss": 0.2147, "step": 381000 }, { "epoch": 2.14, "learning_rate": 2.8585361192150518e-05, "loss": 0.2221, "step": 381100 }, { "epoch": 2.14, "learning_rate": 2.8579739961888057e-05, "loss": 0.2219, "step": 381200 }, { "epoch": 2.14, "learning_rate": 2.8574118731625603e-05, "loss": 0.2177, "step": 381300 }, { "epoch": 2.14, "learning_rate": 2.856849750136315e-05, "loss": 0.2163, "step": 381400 }, { "epoch": 2.14, "learning_rate": 2.8562876271100696e-05, "loss": 0.2186, "step": 381500 }, { "epoch": 2.15, "learning_rate": 2.855725504083824e-05, "loss": 0.2145, "step": 381600 }, { "epoch": 2.15, "learning_rate": 2.8551633810575785e-05, "loss": 0.2175, "step": 381700 }, { "epoch": 2.15, "learning_rate": 2.854601258031333e-05, "loss": 0.2185, "step": 381800 }, { "epoch": 2.15, "learning_rate": 2.854039135005087e-05, "loss": 0.2206, "step": 381900 }, { "epoch": 2.15, "learning_rate": 2.8534770119788417e-05, "loss": 0.2137, "step": 382000 }, { "epoch": 2.15, "learning_rate": 2.8529148889525963e-05, "loss": 0.2112, "step": 382100 }, { "epoch": 2.15, "learning_rate": 2.852352765926351e-05, "loss": 0.2193, "step": 382200 }, { "epoch": 2.15, "learning_rate": 2.8517906429001053e-05, "loss": 0.2193, "step": 382300 }, { "epoch": 2.15, "learning_rate": 2.85122851987386e-05, "loss": 0.2103, "step": 382400 }, { "epoch": 2.15, "learning_rate": 2.8506663968476145e-05, "loss": 0.2256, "step": 382500 }, { "epoch": 2.15, "learning_rate": 2.8501042738213685e-05, "loss": 0.2183, "step": 382600 }, { "epoch": 2.15, "learning_rate": 2.849542150795123e-05, "loss": 0.2167, "step": 382700 }, { "epoch": 2.15, "learning_rate": 2.8489800277688777e-05, "loss": 0.2156, "step": 382800 }, { "epoch": 2.15, "learning_rate": 2.8484179047426323e-05, "loss": 0.2186, "step": 382900 }, { "epoch": 2.15, "learning_rate": 2.8478557817163863e-05, "loss": 0.2131, "step": 383000 }, { "epoch": 2.15, "learning_rate": 2.847293658690141e-05, "loss": 0.2155, "step": 383100 }, { "epoch": 2.15, "learning_rate": 2.846731535663896e-05, "loss": 0.2139, "step": 383200 }, { "epoch": 2.15, "learning_rate": 2.84616941263765e-05, "loss": 0.2234, "step": 383300 }, { "epoch": 2.16, "learning_rate": 2.8456072896114045e-05, "loss": 0.2118, "step": 383400 }, { "epoch": 2.16, "learning_rate": 2.845045166585159e-05, "loss": 0.2177, "step": 383500 }, { "epoch": 2.16, "learning_rate": 2.8444830435589137e-05, "loss": 0.2167, "step": 383600 }, { "epoch": 2.16, "learning_rate": 2.8439209205326677e-05, "loss": 0.2144, "step": 383700 }, { "epoch": 2.16, "learning_rate": 2.8433587975064223e-05, "loss": 0.2172, "step": 383800 }, { "epoch": 2.16, "learning_rate": 2.842796674480177e-05, "loss": 0.2112, "step": 383900 }, { "epoch": 2.16, "learning_rate": 2.8422345514539312e-05, "loss": 0.2157, "step": 384000 }, { "epoch": 2.16, "learning_rate": 2.841672428427686e-05, "loss": 0.2182, "step": 384100 }, { "epoch": 2.16, "learning_rate": 2.8411103054014405e-05, "loss": 0.2192, "step": 384200 }, { "epoch": 2.16, "learning_rate": 2.840548182375195e-05, "loss": 0.2143, "step": 384300 }, { "epoch": 2.16, "learning_rate": 2.839986059348949e-05, "loss": 0.2133, "step": 384400 }, { "epoch": 2.16, "learning_rate": 2.8394239363227037e-05, "loss": 0.2184, "step": 384500 }, { "epoch": 2.16, "learning_rate": 2.8388618132964583e-05, "loss": 0.2165, "step": 384600 }, { "epoch": 2.16, "learning_rate": 2.8382996902702126e-05, "loss": 0.2187, "step": 384700 }, { "epoch": 2.16, "learning_rate": 2.8377431884742294e-05, "loss": 0.2194, "step": 384800 }, { "epoch": 2.16, "learning_rate": 2.837181065447984e-05, "loss": 0.2212, "step": 384900 }, { "epoch": 2.16, "learning_rate": 2.8366189424217386e-05, "loss": 0.2145, "step": 385000 }, { "epoch": 2.16, "learning_rate": 2.836056819395493e-05, "loss": 0.2153, "step": 385100 }, { "epoch": 2.17, "learning_rate": 2.8354946963692475e-05, "loss": 0.2162, "step": 385200 }, { "epoch": 2.17, "learning_rate": 2.834932573343002e-05, "loss": 0.2179, "step": 385300 }, { "epoch": 2.17, "learning_rate": 2.834370450316756e-05, "loss": 0.2192, "step": 385400 }, { "epoch": 2.17, "learning_rate": 2.8338083272905107e-05, "loss": 0.222, "step": 385500 }, { "epoch": 2.17, "learning_rate": 2.8332462042642654e-05, "loss": 0.2168, "step": 385600 }, { "epoch": 2.17, "learning_rate": 2.83268408123802e-05, "loss": 0.2214, "step": 385700 }, { "epoch": 2.17, "learning_rate": 2.8321219582117743e-05, "loss": 0.2191, "step": 385800 }, { "epoch": 2.17, "learning_rate": 2.831559835185529e-05, "loss": 0.2143, "step": 385900 }, { "epoch": 2.17, "learning_rate": 2.8309977121592835e-05, "loss": 0.2205, "step": 386000 }, { "epoch": 2.17, "learning_rate": 2.8304355891330375e-05, "loss": 0.2195, "step": 386100 }, { "epoch": 2.17, "learning_rate": 2.829873466106792e-05, "loss": 0.2209, "step": 386200 }, { "epoch": 2.17, "learning_rate": 2.8293113430805467e-05, "loss": 0.2169, "step": 386300 }, { "epoch": 2.17, "learning_rate": 2.8287492200543014e-05, "loss": 0.2172, "step": 386400 }, { "epoch": 2.17, "learning_rate": 2.8281870970280557e-05, "loss": 0.2171, "step": 386500 }, { "epoch": 2.17, "learning_rate": 2.8276249740018103e-05, "loss": 0.2154, "step": 386600 }, { "epoch": 2.17, "learning_rate": 2.827062850975565e-05, "loss": 0.2238, "step": 386700 }, { "epoch": 2.17, "learning_rate": 2.826500727949319e-05, "loss": 0.2199, "step": 386800 }, { "epoch": 2.17, "learning_rate": 2.8259386049230735e-05, "loss": 0.218, "step": 386900 }, { "epoch": 2.18, "learning_rate": 2.825376481896828e-05, "loss": 0.2174, "step": 387000 }, { "epoch": 2.18, "learning_rate": 2.8248143588705827e-05, "loss": 0.2195, "step": 387100 }, { "epoch": 2.18, "learning_rate": 2.8242522358443367e-05, "loss": 0.2109, "step": 387200 }, { "epoch": 2.18, "learning_rate": 2.8236901128180913e-05, "loss": 0.2203, "step": 387300 }, { "epoch": 2.18, "learning_rate": 2.8231336110221084e-05, "loss": 0.2192, "step": 387400 }, { "epoch": 2.18, "learning_rate": 2.822571487995863e-05, "loss": 0.2169, "step": 387500 }, { "epoch": 2.18, "learning_rate": 2.8220149861998795e-05, "loss": 0.2192, "step": 387600 }, { "epoch": 2.18, "learning_rate": 2.821452863173634e-05, "loss": 0.216, "step": 387700 }, { "epoch": 2.18, "learning_rate": 2.8208907401473887e-05, "loss": 0.2163, "step": 387800 }, { "epoch": 2.18, "learning_rate": 2.8203286171211434e-05, "loss": 0.2182, "step": 387900 }, { "epoch": 2.18, "learning_rate": 2.8197664940948976e-05, "loss": 0.2223, "step": 388000 }, { "epoch": 2.18, "learning_rate": 2.8192043710686523e-05, "loss": 0.2195, "step": 388100 }, { "epoch": 2.18, "learning_rate": 2.818642248042407e-05, "loss": 0.2179, "step": 388200 }, { "epoch": 2.18, "learning_rate": 2.818080125016161e-05, "loss": 0.2158, "step": 388300 }, { "epoch": 2.18, "learning_rate": 2.8175180019899155e-05, "loss": 0.2166, "step": 388400 }, { "epoch": 2.18, "learning_rate": 2.81695587896367e-05, "loss": 0.2175, "step": 388500 }, { "epoch": 2.18, "learning_rate": 2.8163937559374247e-05, "loss": 0.2142, "step": 388600 }, { "epoch": 2.18, "learning_rate": 2.815831632911179e-05, "loss": 0.2194, "step": 388700 }, { "epoch": 2.19, "learning_rate": 2.8152695098849336e-05, "loss": 0.2192, "step": 388800 }, { "epoch": 2.19, "learning_rate": 2.8147073868586883e-05, "loss": 0.2182, "step": 388900 }, { "epoch": 2.19, "learning_rate": 2.814150885062705e-05, "loss": 0.223, "step": 389000 }, { "epoch": 2.19, "learning_rate": 2.8135887620364593e-05, "loss": 0.221, "step": 389100 }, { "epoch": 2.19, "learning_rate": 2.813026639010214e-05, "loss": 0.2131, "step": 389200 }, { "epoch": 2.19, "learning_rate": 2.8124645159839686e-05, "loss": 0.2137, "step": 389300 }, { "epoch": 2.19, "learning_rate": 2.8119023929577225e-05, "loss": 0.2136, "step": 389400 }, { "epoch": 2.19, "learning_rate": 2.811340269931477e-05, "loss": 0.218, "step": 389500 }, { "epoch": 2.19, "learning_rate": 2.8107781469052318e-05, "loss": 0.2171, "step": 389600 }, { "epoch": 2.19, "learning_rate": 2.8102160238789864e-05, "loss": 0.2241, "step": 389700 }, { "epoch": 2.19, "learning_rate": 2.8096539008527407e-05, "loss": 0.2198, "step": 389800 }, { "epoch": 2.19, "learning_rate": 2.8090917778264953e-05, "loss": 0.2219, "step": 389900 }, { "epoch": 2.19, "learning_rate": 2.80852965480025e-05, "loss": 0.2183, "step": 390000 }, { "epoch": 2.19, "learning_rate": 2.807967531774004e-05, "loss": 0.2156, "step": 390100 }, { "epoch": 2.19, "learning_rate": 2.8074054087477585e-05, "loss": 0.22, "step": 390200 }, { "epoch": 2.19, "learning_rate": 2.806843285721513e-05, "loss": 0.2163, "step": 390300 }, { "epoch": 2.19, "learning_rate": 2.8062811626952678e-05, "loss": 0.2207, "step": 390400 }, { "epoch": 2.2, "learning_rate": 2.805719039669022e-05, "loss": 0.2159, "step": 390500 }, { "epoch": 2.2, "learning_rate": 2.8051569166427767e-05, "loss": 0.2176, "step": 390600 }, { "epoch": 2.2, "learning_rate": 2.8045947936165313e-05, "loss": 0.2162, "step": 390700 }, { "epoch": 2.2, "learning_rate": 2.8040326705902853e-05, "loss": 0.2229, "step": 390800 }, { "epoch": 2.2, "learning_rate": 2.80347054756404e-05, "loss": 0.2145, "step": 390900 }, { "epoch": 2.2, "learning_rate": 2.8029084245377945e-05, "loss": 0.2276, "step": 391000 }, { "epoch": 2.2, "learning_rate": 2.8023463015115492e-05, "loss": 0.2143, "step": 391100 }, { "epoch": 2.2, "learning_rate": 2.801784178485303e-05, "loss": 0.2143, "step": 391200 }, { "epoch": 2.2, "learning_rate": 2.8012220554590578e-05, "loss": 0.2184, "step": 391300 }, { "epoch": 2.2, "learning_rate": 2.8006599324328127e-05, "loss": 0.2198, "step": 391400 }, { "epoch": 2.2, "learning_rate": 2.8000978094065667e-05, "loss": 0.2182, "step": 391500 }, { "epoch": 2.2, "learning_rate": 2.7995356863803213e-05, "loss": 0.2173, "step": 391600 }, { "epoch": 2.2, "learning_rate": 2.798973563354076e-05, "loss": 0.217, "step": 391700 }, { "epoch": 2.2, "learning_rate": 2.7984114403278306e-05, "loss": 0.2164, "step": 391800 }, { "epoch": 2.2, "learning_rate": 2.7978493173015845e-05, "loss": 0.2139, "step": 391900 }, { "epoch": 2.2, "learning_rate": 2.797287194275339e-05, "loss": 0.2095, "step": 392000 }, { "epoch": 2.2, "learning_rate": 2.7967250712490938e-05, "loss": 0.2134, "step": 392100 }, { "epoch": 2.2, "learning_rate": 2.796162948222848e-05, "loss": 0.2177, "step": 392200 }, { "epoch": 2.21, "learning_rate": 2.7956008251966027e-05, "loss": 0.2169, "step": 392300 }, { "epoch": 2.21, "learning_rate": 2.7950387021703573e-05, "loss": 0.2189, "step": 392400 }, { "epoch": 2.21, "learning_rate": 2.794476579144112e-05, "loss": 0.2164, "step": 392500 }, { "epoch": 2.21, "learning_rate": 2.793914456117866e-05, "loss": 0.2221, "step": 392600 }, { "epoch": 2.21, "learning_rate": 2.7933523330916205e-05, "loss": 0.2136, "step": 392700 }, { "epoch": 2.21, "learning_rate": 2.792790210065375e-05, "loss": 0.2231, "step": 392800 }, { "epoch": 2.21, "learning_rate": 2.7922280870391294e-05, "loss": 0.2211, "step": 392900 }, { "epoch": 2.21, "learning_rate": 2.791665964012884e-05, "loss": 0.2161, "step": 393000 }, { "epoch": 2.21, "learning_rate": 2.7911038409866387e-05, "loss": 0.2143, "step": 393100 }, { "epoch": 2.21, "learning_rate": 2.7905417179603933e-05, "loss": 0.2206, "step": 393200 }, { "epoch": 2.21, "learning_rate": 2.7899795949341473e-05, "loss": 0.2148, "step": 393300 }, { "epoch": 2.21, "learning_rate": 2.789417471907902e-05, "loss": 0.2205, "step": 393400 }, { "epoch": 2.21, "learning_rate": 2.7888553488816565e-05, "loss": 0.2116, "step": 393500 }, { "epoch": 2.21, "learning_rate": 2.7882932258554108e-05, "loss": 0.2164, "step": 393600 }, { "epoch": 2.21, "learning_rate": 2.7877311028291654e-05, "loss": 0.2198, "step": 393700 }, { "epoch": 2.21, "learning_rate": 2.78716897980292e-05, "loss": 0.2119, "step": 393800 }, { "epoch": 2.21, "learning_rate": 2.7866068567766747e-05, "loss": 0.2131, "step": 393900 }, { "epoch": 2.21, "learning_rate": 2.7860447337504286e-05, "loss": 0.2127, "step": 394000 }, { "epoch": 2.22, "learning_rate": 2.7854826107241833e-05, "loss": 0.2114, "step": 394100 }, { "epoch": 2.22, "learning_rate": 2.784920487697938e-05, "loss": 0.2167, "step": 394200 }, { "epoch": 2.22, "learning_rate": 2.784358364671692e-05, "loss": 0.2178, "step": 394300 }, { "epoch": 2.22, "learning_rate": 2.783801862875709e-05, "loss": 0.2139, "step": 394400 }, { "epoch": 2.22, "learning_rate": 2.7832397398494636e-05, "loss": 0.213, "step": 394500 }, { "epoch": 2.22, "learning_rate": 2.7826776168232182e-05, "loss": 0.2177, "step": 394600 }, { "epoch": 2.22, "learning_rate": 2.7821154937969725e-05, "loss": 0.2138, "step": 394700 }, { "epoch": 2.22, "learning_rate": 2.781553370770727e-05, "loss": 0.2219, "step": 394800 }, { "epoch": 2.22, "learning_rate": 2.7809912477444817e-05, "loss": 0.2124, "step": 394900 }, { "epoch": 2.22, "learning_rate": 2.7804347459484985e-05, "loss": 0.2182, "step": 395000 }, { "epoch": 2.22, "learning_rate": 2.7798726229222528e-05, "loss": 0.2147, "step": 395100 }, { "epoch": 2.22, "learning_rate": 2.7793104998960074e-05, "loss": 0.2091, "step": 395200 }, { "epoch": 2.22, "learning_rate": 2.778748376869762e-05, "loss": 0.2172, "step": 395300 }, { "epoch": 2.22, "learning_rate": 2.778186253843516e-05, "loss": 0.2171, "step": 395400 }, { "epoch": 2.22, "learning_rate": 2.7776241308172706e-05, "loss": 0.2222, "step": 395500 }, { "epoch": 2.22, "learning_rate": 2.7770620077910253e-05, "loss": 0.2045, "step": 395600 }, { "epoch": 2.22, "learning_rate": 2.77649988476478e-05, "loss": 0.2163, "step": 395700 }, { "epoch": 2.22, "learning_rate": 2.7759377617385342e-05, "loss": 0.2148, "step": 395800 }, { "epoch": 2.23, "learning_rate": 2.7753756387122888e-05, "loss": 0.2125, "step": 395900 }, { "epoch": 2.23, "learning_rate": 2.7748135156860434e-05, "loss": 0.2147, "step": 396000 }, { "epoch": 2.23, "learning_rate": 2.7742513926597974e-05, "loss": 0.2179, "step": 396100 }, { "epoch": 2.23, "learning_rate": 2.773689269633552e-05, "loss": 0.2183, "step": 396200 }, { "epoch": 2.23, "learning_rate": 2.7731271466073066e-05, "loss": 0.2145, "step": 396300 }, { "epoch": 2.23, "learning_rate": 2.7725650235810613e-05, "loss": 0.2193, "step": 396400 }, { "epoch": 2.23, "learning_rate": 2.7720029005548152e-05, "loss": 0.2129, "step": 396500 }, { "epoch": 2.23, "learning_rate": 2.7714407775285702e-05, "loss": 0.2186, "step": 396600 }, { "epoch": 2.23, "learning_rate": 2.7708786545023248e-05, "loss": 0.2136, "step": 396700 }, { "epoch": 2.23, "learning_rate": 2.7703165314760788e-05, "loss": 0.2157, "step": 396800 }, { "epoch": 2.23, "learning_rate": 2.7697544084498334e-05, "loss": 0.2139, "step": 396900 }, { "epoch": 2.23, "learning_rate": 2.769192285423588e-05, "loss": 0.2202, "step": 397000 }, { "epoch": 2.23, "learning_rate": 2.7686301623973426e-05, "loss": 0.2271, "step": 397100 }, { "epoch": 2.23, "learning_rate": 2.768073660601359e-05, "loss": 0.2239, "step": 397200 }, { "epoch": 2.23, "learning_rate": 2.7675115375751137e-05, "loss": 0.214, "step": 397300 }, { "epoch": 2.23, "learning_rate": 2.7669494145488683e-05, "loss": 0.2171, "step": 397400 }, { "epoch": 2.23, "learning_rate": 2.766387291522623e-05, "loss": 0.2144, "step": 397500 }, { "epoch": 2.24, "learning_rate": 2.7658251684963772e-05, "loss": 0.2206, "step": 397600 }, { "epoch": 2.24, "learning_rate": 2.765263045470132e-05, "loss": 0.214, "step": 397700 }, { "epoch": 2.24, "learning_rate": 2.7647009224438865e-05, "loss": 0.2165, "step": 397800 }, { "epoch": 2.24, "learning_rate": 2.7641387994176404e-05, "loss": 0.2219, "step": 397900 }, { "epoch": 2.24, "learning_rate": 2.763576676391395e-05, "loss": 0.213, "step": 398000 }, { "epoch": 2.24, "learning_rate": 2.7630145533651497e-05, "loss": 0.2144, "step": 398100 }, { "epoch": 2.24, "learning_rate": 2.7624524303389043e-05, "loss": 0.2195, "step": 398200 }, { "epoch": 2.24, "learning_rate": 2.7618903073126583e-05, "loss": 0.2209, "step": 398300 }, { "epoch": 2.24, "learning_rate": 2.761328184286413e-05, "loss": 0.2136, "step": 398400 }, { "epoch": 2.24, "learning_rate": 2.7607660612601675e-05, "loss": 0.22, "step": 398500 }, { "epoch": 2.24, "learning_rate": 2.7602039382339218e-05, "loss": 0.2159, "step": 398600 }, { "epoch": 2.24, "learning_rate": 2.7596418152076764e-05, "loss": 0.2178, "step": 398700 }, { "epoch": 2.24, "learning_rate": 2.759079692181431e-05, "loss": 0.2169, "step": 398800 }, { "epoch": 2.24, "learning_rate": 2.7585175691551857e-05, "loss": 0.2145, "step": 398900 }, { "epoch": 2.24, "learning_rate": 2.7579554461289396e-05, "loss": 0.2223, "step": 399000 }, { "epoch": 2.24, "learning_rate": 2.7573933231026943e-05, "loss": 0.2098, "step": 399100 }, { "epoch": 2.24, "learning_rate": 2.756831200076449e-05, "loss": 0.2109, "step": 399200 }, { "epoch": 2.24, "learning_rate": 2.7562690770502032e-05, "loss": 0.2123, "step": 399300 }, { "epoch": 2.25, "learning_rate": 2.7557069540239578e-05, "loss": 0.2179, "step": 399400 }, { "epoch": 2.25, "learning_rate": 2.7551448309977124e-05, "loss": 0.2202, "step": 399500 }, { "epoch": 2.25, "learning_rate": 2.754582707971467e-05, "loss": 0.2142, "step": 399600 }, { "epoch": 2.25, "learning_rate": 2.754020584945221e-05, "loss": 0.214, "step": 399700 }, { "epoch": 2.25, "learning_rate": 2.7534584619189757e-05, "loss": 0.2164, "step": 399800 }, { "epoch": 2.25, "learning_rate": 2.7528963388927303e-05, "loss": 0.2136, "step": 399900 }, { "epoch": 2.25, "learning_rate": 2.7523342158664846e-05, "loss": 0.2108, "step": 400000 }, { "epoch": 2.25, "learning_rate": 2.7517720928402392e-05, "loss": 0.2106, "step": 400100 }, { "epoch": 2.25, "learning_rate": 2.7512099698139938e-05, "loss": 0.2162, "step": 400200 }, { "epoch": 2.25, "learning_rate": 2.7506478467877485e-05, "loss": 0.2134, "step": 400300 }, { "epoch": 2.25, "learning_rate": 2.7500857237615024e-05, "loss": 0.2162, "step": 400400 }, { "epoch": 2.25, "learning_rate": 2.749523600735257e-05, "loss": 0.2187, "step": 400500 }, { "epoch": 2.25, "learning_rate": 2.7489614777090117e-05, "loss": 0.2185, "step": 400600 }, { "epoch": 2.25, "learning_rate": 2.7483993546827656e-05, "loss": 0.2102, "step": 400700 }, { "epoch": 2.25, "learning_rate": 2.7478372316565202e-05, "loss": 0.2233, "step": 400800 }, { "epoch": 2.25, "learning_rate": 2.7472751086302752e-05, "loss": 0.2158, "step": 400900 }, { "epoch": 2.25, "learning_rate": 2.74671298560403e-05, "loss": 0.2168, "step": 401000 }, { "epoch": 2.25, "learning_rate": 2.7461508625777838e-05, "loss": 0.2145, "step": 401100 }, { "epoch": 2.26, "learning_rate": 2.7455887395515384e-05, "loss": 0.2108, "step": 401200 }, { "epoch": 2.26, "learning_rate": 2.745026616525293e-05, "loss": 0.2183, "step": 401300 }, { "epoch": 2.26, "learning_rate": 2.744464493499047e-05, "loss": 0.2087, "step": 401400 }, { "epoch": 2.26, "learning_rate": 2.7439023704728016e-05, "loss": 0.2129, "step": 401500 }, { "epoch": 2.26, "learning_rate": 2.7433402474465562e-05, "loss": 0.2192, "step": 401600 }, { "epoch": 2.26, "learning_rate": 2.742778124420311e-05, "loss": 0.2154, "step": 401700 }, { "epoch": 2.26, "learning_rate": 2.742216001394065e-05, "loss": 0.2147, "step": 401800 }, { "epoch": 2.26, "learning_rate": 2.7416538783678198e-05, "loss": 0.2161, "step": 401900 }, { "epoch": 2.26, "learning_rate": 2.7410917553415744e-05, "loss": 0.2092, "step": 402000 }, { "epoch": 2.26, "learning_rate": 2.7405296323153284e-05, "loss": 0.2081, "step": 402100 }, { "epoch": 2.26, "learning_rate": 2.739967509289083e-05, "loss": 0.2171, "step": 402200 }, { "epoch": 2.26, "learning_rate": 2.7394053862628376e-05, "loss": 0.2156, "step": 402300 }, { "epoch": 2.26, "learning_rate": 2.7388432632365922e-05, "loss": 0.2188, "step": 402400 }, { "epoch": 2.26, "learning_rate": 2.7382811402103465e-05, "loss": 0.2215, "step": 402500 }, { "epoch": 2.26, "learning_rate": 2.737719017184101e-05, "loss": 0.2148, "step": 402600 }, { "epoch": 2.26, "learning_rate": 2.7371568941578558e-05, "loss": 0.211, "step": 402700 }, { "epoch": 2.26, "learning_rate": 2.7365947711316097e-05, "loss": 0.2123, "step": 402800 }, { "epoch": 2.26, "learning_rate": 2.7360326481053644e-05, "loss": 0.2193, "step": 402900 }, { "epoch": 2.27, "learning_rate": 2.735470525079119e-05, "loss": 0.2197, "step": 403000 }, { "epoch": 2.27, "learning_rate": 2.7349084020528736e-05, "loss": 0.2088, "step": 403100 }, { "epoch": 2.27, "learning_rate": 2.73435190025689e-05, "loss": 0.2172, "step": 403200 }, { "epoch": 2.27, "learning_rate": 2.7337897772306447e-05, "loss": 0.2166, "step": 403300 }, { "epoch": 2.27, "learning_rate": 2.7332276542043993e-05, "loss": 0.2077, "step": 403400 }, { "epoch": 2.27, "learning_rate": 2.732665531178154e-05, "loss": 0.2126, "step": 403500 }, { "epoch": 2.27, "learning_rate": 2.7321034081519082e-05, "loss": 0.2122, "step": 403600 }, { "epoch": 2.27, "learning_rate": 2.731541285125663e-05, "loss": 0.2212, "step": 403700 }, { "epoch": 2.27, "learning_rate": 2.7309847833296796e-05, "loss": 0.223, "step": 403800 }, { "epoch": 2.27, "learning_rate": 2.7304226603034346e-05, "loss": 0.2121, "step": 403900 }, { "epoch": 2.27, "learning_rate": 2.7298605372771885e-05, "loss": 0.2237, "step": 404000 }, { "epoch": 2.27, "learning_rate": 2.729298414250943e-05, "loss": 0.2167, "step": 404100 }, { "epoch": 2.27, "learning_rate": 2.7287362912246978e-05, "loss": 0.2149, "step": 404200 }, { "epoch": 2.27, "learning_rate": 2.7281741681984517e-05, "loss": 0.2197, "step": 404300 }, { "epoch": 2.27, "learning_rate": 2.7276120451722064e-05, "loss": 0.2098, "step": 404400 }, { "epoch": 2.27, "learning_rate": 2.727049922145961e-05, "loss": 0.2191, "step": 404500 }, { "epoch": 2.27, "learning_rate": 2.7264877991197156e-05, "loss": 0.2135, "step": 404600 }, { "epoch": 2.27, "learning_rate": 2.72592567609347e-05, "loss": 0.2198, "step": 404700 }, { "epoch": 2.28, "learning_rate": 2.7253635530672245e-05, "loss": 0.2134, "step": 404800 }, { "epoch": 2.28, "learning_rate": 2.724801430040979e-05, "loss": 0.211, "step": 404900 }, { "epoch": 2.28, "learning_rate": 2.724239307014733e-05, "loss": 0.2183, "step": 405000 }, { "epoch": 2.28, "learning_rate": 2.7236771839884877e-05, "loss": 0.2152, "step": 405100 }, { "epoch": 2.28, "learning_rate": 2.7231150609622424e-05, "loss": 0.2151, "step": 405200 }, { "epoch": 2.28, "learning_rate": 2.7225529379359967e-05, "loss": 0.2171, "step": 405300 }, { "epoch": 2.28, "learning_rate": 2.7219908149097513e-05, "loss": 0.2152, "step": 405400 }, { "epoch": 2.28, "learning_rate": 2.721428691883506e-05, "loss": 0.2222, "step": 405500 }, { "epoch": 2.28, "learning_rate": 2.7208721900875227e-05, "loss": 0.214, "step": 405600 }, { "epoch": 2.28, "learning_rate": 2.720310067061277e-05, "loss": 0.2203, "step": 405700 }, { "epoch": 2.28, "learning_rate": 2.7197479440350316e-05, "loss": 0.2082, "step": 405800 }, { "epoch": 2.28, "learning_rate": 2.7191858210087862e-05, "loss": 0.2126, "step": 405900 }, { "epoch": 2.28, "learning_rate": 2.718623697982541e-05, "loss": 0.2206, "step": 406000 }, { "epoch": 2.28, "learning_rate": 2.7180615749562948e-05, "loss": 0.2165, "step": 406100 }, { "epoch": 2.28, "learning_rate": 2.7174994519300494e-05, "loss": 0.2124, "step": 406200 }, { "epoch": 2.28, "learning_rate": 2.716937328903804e-05, "loss": 0.2226, "step": 406300 }, { "epoch": 2.28, "learning_rate": 2.7163752058775583e-05, "loss": 0.213, "step": 406400 }, { "epoch": 2.29, "learning_rate": 2.715813082851313e-05, "loss": 0.2135, "step": 406500 }, { "epoch": 2.29, "learning_rate": 2.7152509598250676e-05, "loss": 0.2127, "step": 406600 }, { "epoch": 2.29, "learning_rate": 2.7146888367988222e-05, "loss": 0.215, "step": 406700 }, { "epoch": 2.29, "learning_rate": 2.7141267137725762e-05, "loss": 0.2173, "step": 406800 }, { "epoch": 2.29, "learning_rate": 2.7135645907463308e-05, "loss": 0.2081, "step": 406900 }, { "epoch": 2.29, "learning_rate": 2.7130024677200854e-05, "loss": 0.2178, "step": 407000 }, { "epoch": 2.29, "learning_rate": 2.7124403446938397e-05, "loss": 0.2131, "step": 407100 }, { "epoch": 2.29, "learning_rate": 2.7118782216675943e-05, "loss": 0.2179, "step": 407200 }, { "epoch": 2.29, "learning_rate": 2.711316098641349e-05, "loss": 0.2188, "step": 407300 }, { "epoch": 2.29, "learning_rate": 2.7107539756151036e-05, "loss": 0.2201, "step": 407400 }, { "epoch": 2.29, "learning_rate": 2.7101918525888576e-05, "loss": 0.2104, "step": 407500 }, { "epoch": 2.29, "learning_rate": 2.7096297295626122e-05, "loss": 0.214, "step": 407600 }, { "epoch": 2.29, "learning_rate": 2.7090676065363668e-05, "loss": 0.2125, "step": 407700 }, { "epoch": 2.29, "learning_rate": 2.7085054835101208e-05, "loss": 0.216, "step": 407800 }, { "epoch": 2.29, "learning_rate": 2.7079433604838754e-05, "loss": 0.2122, "step": 407900 }, { "epoch": 2.29, "learning_rate": 2.70738123745763e-05, "loss": 0.217, "step": 408000 }, { "epoch": 2.29, "learning_rate": 2.7068191144313846e-05, "loss": 0.2086, "step": 408100 }, { "epoch": 2.29, "learning_rate": 2.706256991405139e-05, "loss": 0.2128, "step": 408200 }, { "epoch": 2.3, "learning_rate": 2.7056948683788936e-05, "loss": 0.2125, "step": 408300 }, { "epoch": 2.3, "learning_rate": 2.7051327453526482e-05, "loss": 0.2125, "step": 408400 }, { "epoch": 2.3, "learning_rate": 2.704570622326402e-05, "loss": 0.2064, "step": 408500 }, { "epoch": 2.3, "learning_rate": 2.7040084993001568e-05, "loss": 0.2195, "step": 408600 }, { "epoch": 2.3, "learning_rate": 2.7034463762739114e-05, "loss": 0.2096, "step": 408700 }, { "epoch": 2.3, "learning_rate": 2.702884253247666e-05, "loss": 0.2177, "step": 408800 }, { "epoch": 2.3, "learning_rate": 2.7023221302214203e-05, "loss": 0.2163, "step": 408900 }, { "epoch": 2.3, "learning_rate": 2.701760007195175e-05, "loss": 0.2121, "step": 409000 }, { "epoch": 2.3, "learning_rate": 2.7011978841689296e-05, "loss": 0.2144, "step": 409100 }, { "epoch": 2.3, "learning_rate": 2.7006357611426835e-05, "loss": 0.2207, "step": 409200 }, { "epoch": 2.3, "learning_rate": 2.700073638116438e-05, "loss": 0.2121, "step": 409300 }, { "epoch": 2.3, "learning_rate": 2.6995115150901928e-05, "loss": 0.2225, "step": 409400 }, { "epoch": 2.3, "learning_rate": 2.6989493920639474e-05, "loss": 0.2197, "step": 409500 }, { "epoch": 2.3, "learning_rate": 2.6983872690377017e-05, "loss": 0.2183, "step": 409600 }, { "epoch": 2.3, "learning_rate": 2.6978251460114563e-05, "loss": 0.2154, "step": 409700 }, { "epoch": 2.3, "learning_rate": 2.697263022985211e-05, "loss": 0.2059, "step": 409800 }, { "epoch": 2.3, "learning_rate": 2.696700899958965e-05, "loss": 0.2181, "step": 409900 }, { "epoch": 2.3, "learning_rate": 2.6961387769327195e-05, "loss": 0.2244, "step": 410000 }, { "epoch": 2.31, "learning_rate": 2.695576653906474e-05, "loss": 0.2126, "step": 410100 }, { "epoch": 2.31, "learning_rate": 2.6950145308802288e-05, "loss": 0.2108, "step": 410200 }, { "epoch": 2.31, "learning_rate": 2.694452407853983e-05, "loss": 0.2085, "step": 410300 }, { "epoch": 2.31, "learning_rate": 2.6938902848277377e-05, "loss": 0.2142, "step": 410400 }, { "epoch": 2.31, "learning_rate": 2.6933281618014923e-05, "loss": 0.2177, "step": 410500 }, { "epoch": 2.31, "learning_rate": 2.6927660387752463e-05, "loss": 0.2103, "step": 410600 }, { "epoch": 2.31, "learning_rate": 2.6922095369792634e-05, "loss": 0.2139, "step": 410700 }, { "epoch": 2.31, "learning_rate": 2.691647413953018e-05, "loss": 0.2145, "step": 410800 }, { "epoch": 2.31, "learning_rate": 2.6910852909267726e-05, "loss": 0.2152, "step": 410900 }, { "epoch": 2.31, "learning_rate": 2.6905231679005266e-05, "loss": 0.2085, "step": 411000 }, { "epoch": 2.31, "learning_rate": 2.6899610448742812e-05, "loss": 0.2183, "step": 411100 }, { "epoch": 2.31, "learning_rate": 2.6893989218480358e-05, "loss": 0.208, "step": 411200 }, { "epoch": 2.31, "learning_rate": 2.6888367988217905e-05, "loss": 0.2112, "step": 411300 }, { "epoch": 2.31, "learning_rate": 2.6882746757955447e-05, "loss": 0.2145, "step": 411400 }, { "epoch": 2.31, "learning_rate": 2.6877125527692994e-05, "loss": 0.2165, "step": 411500 }, { "epoch": 2.31, "learning_rate": 2.687150429743054e-05, "loss": 0.2143, "step": 411600 }, { "epoch": 2.31, "learning_rate": 2.686588306716808e-05, "loss": 0.2118, "step": 411700 }, { "epoch": 2.31, "learning_rate": 2.6860261836905626e-05, "loss": 0.2121, "step": 411800 }, { "epoch": 2.32, "learning_rate": 2.6854640606643172e-05, "loss": 0.2218, "step": 411900 }, { "epoch": 2.32, "learning_rate": 2.684901937638072e-05, "loss": 0.2135, "step": 412000 }, { "epoch": 2.32, "learning_rate": 2.6843398146118258e-05, "loss": 0.2158, "step": 412100 }, { "epoch": 2.32, "learning_rate": 2.6837776915855804e-05, "loss": 0.2182, "step": 412200 }, { "epoch": 2.32, "learning_rate": 2.683215568559335e-05, "loss": 0.2178, "step": 412300 }, { "epoch": 2.32, "learning_rate": 2.6826534455330893e-05, "loss": 0.2121, "step": 412400 }, { "epoch": 2.32, "learning_rate": 2.682091322506844e-05, "loss": 0.2078, "step": 412500 }, { "epoch": 2.32, "learning_rate": 2.6815291994805986e-05, "loss": 0.2153, "step": 412600 }, { "epoch": 2.32, "learning_rate": 2.6809670764543532e-05, "loss": 0.2121, "step": 412700 }, { "epoch": 2.32, "learning_rate": 2.680404953428107e-05, "loss": 0.2187, "step": 412800 }, { "epoch": 2.32, "learning_rate": 2.6798428304018618e-05, "loss": 0.2111, "step": 412900 }, { "epoch": 2.32, "learning_rate": 2.6792807073756164e-05, "loss": 0.2124, "step": 413000 }, { "epoch": 2.32, "learning_rate": 2.6787185843493707e-05, "loss": 0.2111, "step": 413100 }, { "epoch": 2.32, "learning_rate": 2.6781564613231253e-05, "loss": 0.2111, "step": 413200 }, { "epoch": 2.32, "learning_rate": 2.67759433829688e-05, "loss": 0.2122, "step": 413300 }, { "epoch": 2.32, "learning_rate": 2.6770322152706346e-05, "loss": 0.2128, "step": 413400 }, { "epoch": 2.32, "learning_rate": 2.6764700922443885e-05, "loss": 0.2165, "step": 413500 }, { "epoch": 2.32, "learning_rate": 2.675907969218143e-05, "loss": 0.221, "step": 413600 }, { "epoch": 2.33, "learning_rate": 2.6753458461918978e-05, "loss": 0.2112, "step": 413700 }, { "epoch": 2.33, "learning_rate": 2.674783723165652e-05, "loss": 0.2197, "step": 413800 }, { "epoch": 2.33, "learning_rate": 2.6742216001394067e-05, "loss": 0.2129, "step": 413900 }, { "epoch": 2.33, "learning_rate": 2.6736594771131613e-05, "loss": 0.2117, "step": 414000 }, { "epoch": 2.33, "learning_rate": 2.673097354086916e-05, "loss": 0.2108, "step": 414100 }, { "epoch": 2.33, "learning_rate": 2.67253523106067e-05, "loss": 0.2139, "step": 414200 }, { "epoch": 2.33, "learning_rate": 2.6719731080344245e-05, "loss": 0.2191, "step": 414300 }, { "epoch": 2.33, "learning_rate": 2.6714109850081792e-05, "loss": 0.2188, "step": 414400 }, { "epoch": 2.33, "learning_rate": 2.670848861981933e-05, "loss": 0.2142, "step": 414500 }, { "epoch": 2.33, "learning_rate": 2.670286738955688e-05, "loss": 0.2156, "step": 414600 }, { "epoch": 2.33, "learning_rate": 2.6697246159294427e-05, "loss": 0.2139, "step": 414700 }, { "epoch": 2.33, "learning_rate": 2.6691624929031973e-05, "loss": 0.2127, "step": 414800 }, { "epoch": 2.33, "learning_rate": 2.6686003698769513e-05, "loss": 0.2143, "step": 414900 }, { "epoch": 2.33, "learning_rate": 2.668038246850706e-05, "loss": 0.2165, "step": 415000 }, { "epoch": 2.33, "learning_rate": 2.6674761238244606e-05, "loss": 0.2116, "step": 415100 }, { "epoch": 2.33, "learning_rate": 2.6669140007982145e-05, "loss": 0.2133, "step": 415200 }, { "epoch": 2.33, "learning_rate": 2.666351877771969e-05, "loss": 0.2116, "step": 415300 }, { "epoch": 2.34, "learning_rate": 2.6657897547457238e-05, "loss": 0.2068, "step": 415400 }, { "epoch": 2.34, "learning_rate": 2.665227631719478e-05, "loss": 0.2134, "step": 415500 }, { "epoch": 2.34, "learning_rate": 2.6646655086932327e-05, "loss": 0.2134, "step": 415600 }, { "epoch": 2.34, "learning_rate": 2.6641033856669873e-05, "loss": 0.2127, "step": 415700 }, { "epoch": 2.34, "learning_rate": 2.663541262640742e-05, "loss": 0.2182, "step": 415800 }, { "epoch": 2.34, "learning_rate": 2.6629847608447584e-05, "loss": 0.213, "step": 415900 }, { "epoch": 2.34, "learning_rate": 2.662422637818513e-05, "loss": 0.2113, "step": 416000 }, { "epoch": 2.34, "learning_rate": 2.6618605147922676e-05, "loss": 0.2174, "step": 416100 }, { "epoch": 2.34, "learning_rate": 2.6612983917660222e-05, "loss": 0.2136, "step": 416200 }, { "epoch": 2.34, "learning_rate": 2.6607362687397762e-05, "loss": 0.2086, "step": 416300 }, { "epoch": 2.34, "learning_rate": 2.6601741457135308e-05, "loss": 0.2161, "step": 416400 }, { "epoch": 2.34, "learning_rate": 2.6596120226872854e-05, "loss": 0.2163, "step": 416500 }, { "epoch": 2.34, "learning_rate": 2.6590498996610397e-05, "loss": 0.2183, "step": 416600 }, { "epoch": 2.34, "learning_rate": 2.6584877766347944e-05, "loss": 0.2112, "step": 416700 }, { "epoch": 2.34, "learning_rate": 2.657925653608549e-05, "loss": 0.2117, "step": 416800 }, { "epoch": 2.34, "learning_rate": 2.6573635305823036e-05, "loss": 0.2089, "step": 416900 }, { "epoch": 2.34, "learning_rate": 2.6568014075560576e-05, "loss": 0.2096, "step": 417000 }, { "epoch": 2.34, "learning_rate": 2.6562392845298122e-05, "loss": 0.2131, "step": 417100 }, { "epoch": 2.35, "learning_rate": 2.6556771615035668e-05, "loss": 0.2113, "step": 417200 }, { "epoch": 2.35, "learning_rate": 2.655115038477321e-05, "loss": 0.2094, "step": 417300 }, { "epoch": 2.35, "learning_rate": 2.6545529154510757e-05, "loss": 0.2155, "step": 417400 }, { "epoch": 2.35, "learning_rate": 2.6539907924248304e-05, "loss": 0.2154, "step": 417500 }, { "epoch": 2.35, "learning_rate": 2.653428669398585e-05, "loss": 0.2163, "step": 417600 }, { "epoch": 2.35, "learning_rate": 2.652866546372339e-05, "loss": 0.2082, "step": 417700 }, { "epoch": 2.35, "learning_rate": 2.6523044233460936e-05, "loss": 0.214, "step": 417800 }, { "epoch": 2.35, "learning_rate": 2.6517479215501107e-05, "loss": 0.2092, "step": 417900 }, { "epoch": 2.35, "learning_rate": 2.6511857985238653e-05, "loss": 0.2142, "step": 418000 }, { "epoch": 2.35, "learning_rate": 2.6506236754976192e-05, "loss": 0.2101, "step": 418100 }, { "epoch": 2.35, "learning_rate": 2.650061552471374e-05, "loss": 0.2161, "step": 418200 }, { "epoch": 2.35, "learning_rate": 2.649505050675391e-05, "loss": 0.2127, "step": 418300 }, { "epoch": 2.35, "learning_rate": 2.6489429276491456e-05, "loss": 0.2133, "step": 418400 }, { "epoch": 2.35, "learning_rate": 2.6483808046229e-05, "loss": 0.2177, "step": 418500 }, { "epoch": 2.35, "learning_rate": 2.6478243028269167e-05, "loss": 0.2102, "step": 418600 }, { "epoch": 2.35, "learning_rate": 2.6472621798006713e-05, "loss": 0.2136, "step": 418700 }, { "epoch": 2.35, "learning_rate": 2.646700056774426e-05, "loss": 0.2111, "step": 418800 }, { "epoch": 2.35, "learning_rate": 2.6461379337481802e-05, "loss": 0.2163, "step": 418900 }, { "epoch": 2.36, "learning_rate": 2.6455758107219348e-05, "loss": 0.2114, "step": 419000 }, { "epoch": 2.36, "learning_rate": 2.6450136876956895e-05, "loss": 0.2152, "step": 419100 }, { "epoch": 2.36, "learning_rate": 2.6444515646694434e-05, "loss": 0.2108, "step": 419200 }, { "epoch": 2.36, "learning_rate": 2.643889441643198e-05, "loss": 0.2163, "step": 419300 }, { "epoch": 2.36, "learning_rate": 2.6433273186169527e-05, "loss": 0.2108, "step": 419400 }, { "epoch": 2.36, "learning_rate": 2.6427651955907073e-05, "loss": 0.2172, "step": 419500 }, { "epoch": 2.36, "learning_rate": 2.6422030725644616e-05, "loss": 0.2168, "step": 419600 }, { "epoch": 2.36, "learning_rate": 2.6416409495382162e-05, "loss": 0.2169, "step": 419700 }, { "epoch": 2.36, "learning_rate": 2.641078826511971e-05, "loss": 0.2159, "step": 419800 }, { "epoch": 2.36, "learning_rate": 2.6405167034857248e-05, "loss": 0.2135, "step": 419900 }, { "epoch": 2.36, "learning_rate": 2.6399545804594794e-05, "loss": 0.2092, "step": 420000 }, { "epoch": 2.36, "learning_rate": 2.639392457433234e-05, "loss": 0.2201, "step": 420100 }, { "epoch": 2.36, "learning_rate": 2.6388303344069887e-05, "loss": 0.2175, "step": 420200 }, { "epoch": 2.36, "learning_rate": 2.6382682113807426e-05, "loss": 0.2161, "step": 420300 }, { "epoch": 2.36, "learning_rate": 2.6377060883544972e-05, "loss": 0.2119, "step": 420400 }, { "epoch": 2.36, "learning_rate": 2.637143965328252e-05, "loss": 0.2207, "step": 420500 }, { "epoch": 2.36, "learning_rate": 2.636587463532269e-05, "loss": 0.2097, "step": 420600 }, { "epoch": 2.36, "learning_rate": 2.6360253405060233e-05, "loss": 0.2108, "step": 420700 }, { "epoch": 2.37, "learning_rate": 2.635463217479778e-05, "loss": 0.209, "step": 420800 }, { "epoch": 2.37, "learning_rate": 2.6349010944535325e-05, "loss": 0.2138, "step": 420900 }, { "epoch": 2.37, "learning_rate": 2.6343389714272865e-05, "loss": 0.2094, "step": 421000 }, { "epoch": 2.37, "learning_rate": 2.633776848401041e-05, "loss": 0.2153, "step": 421100 }, { "epoch": 2.37, "learning_rate": 2.6332147253747957e-05, "loss": 0.2149, "step": 421200 }, { "epoch": 2.37, "learning_rate": 2.6326526023485504e-05, "loss": 0.2168, "step": 421300 }, { "epoch": 2.37, "learning_rate": 2.6320904793223043e-05, "loss": 0.2221, "step": 421400 }, { "epoch": 2.37, "learning_rate": 2.631528356296059e-05, "loss": 0.2102, "step": 421500 }, { "epoch": 2.37, "learning_rate": 2.630966233269814e-05, "loss": 0.211, "step": 421600 }, { "epoch": 2.37, "learning_rate": 2.630404110243568e-05, "loss": 0.2143, "step": 421700 }, { "epoch": 2.37, "learning_rate": 2.6298419872173225e-05, "loss": 0.2175, "step": 421800 }, { "epoch": 2.37, "learning_rate": 2.629279864191077e-05, "loss": 0.2092, "step": 421900 }, { "epoch": 2.37, "learning_rate": 2.6287177411648317e-05, "loss": 0.2174, "step": 422000 }, { "epoch": 2.37, "learning_rate": 2.6281556181385857e-05, "loss": 0.2119, "step": 422100 }, { "epoch": 2.37, "learning_rate": 2.6275934951123403e-05, "loss": 0.2127, "step": 422200 }, { "epoch": 2.37, "learning_rate": 2.627031372086095e-05, "loss": 0.2185, "step": 422300 }, { "epoch": 2.37, "learning_rate": 2.6264692490598492e-05, "loss": 0.2148, "step": 422400 }, { "epoch": 2.37, "learning_rate": 2.625907126033604e-05, "loss": 0.2207, "step": 422500 }, { "epoch": 2.38, "learning_rate": 2.6253450030073585e-05, "loss": 0.2181, "step": 422600 }, { "epoch": 2.38, "learning_rate": 2.624782879981113e-05, "loss": 0.2091, "step": 422700 }, { "epoch": 2.38, "learning_rate": 2.624220756954867e-05, "loss": 0.2185, "step": 422800 }, { "epoch": 2.38, "learning_rate": 2.6236586339286217e-05, "loss": 0.2104, "step": 422900 }, { "epoch": 2.38, "learning_rate": 2.6230965109023763e-05, "loss": 0.2179, "step": 423000 }, { "epoch": 2.38, "learning_rate": 2.6225343878761306e-05, "loss": 0.2167, "step": 423100 }, { "epoch": 2.38, "learning_rate": 2.6219722648498852e-05, "loss": 0.2108, "step": 423200 }, { "epoch": 2.38, "learning_rate": 2.62141014182364e-05, "loss": 0.2139, "step": 423300 }, { "epoch": 2.38, "learning_rate": 2.6208480187973945e-05, "loss": 0.2111, "step": 423400 }, { "epoch": 2.38, "learning_rate": 2.6202858957711484e-05, "loss": 0.2115, "step": 423500 }, { "epoch": 2.38, "learning_rate": 2.619723772744903e-05, "loss": 0.214, "step": 423600 }, { "epoch": 2.38, "learning_rate": 2.6191616497186577e-05, "loss": 0.2199, "step": 423700 }, { "epoch": 2.38, "learning_rate": 2.618599526692412e-05, "loss": 0.2161, "step": 423800 }, { "epoch": 2.38, "learning_rate": 2.6180374036661666e-05, "loss": 0.2102, "step": 423900 }, { "epoch": 2.38, "learning_rate": 2.6174752806399212e-05, "loss": 0.2114, "step": 424000 }, { "epoch": 2.38, "learning_rate": 2.616913157613676e-05, "loss": 0.2092, "step": 424100 }, { "epoch": 2.38, "learning_rate": 2.6163510345874298e-05, "loss": 0.2069, "step": 424200 }, { "epoch": 2.39, "learning_rate": 2.6157889115611844e-05, "loss": 0.215, "step": 424300 }, { "epoch": 2.39, "learning_rate": 2.615226788534939e-05, "loss": 0.213, "step": 424400 }, { "epoch": 2.39, "learning_rate": 2.614664665508693e-05, "loss": 0.2124, "step": 424500 }, { "epoch": 2.39, "learning_rate": 2.6141025424824476e-05, "loss": 0.2131, "step": 424600 }, { "epoch": 2.39, "learning_rate": 2.6135404194562023e-05, "loss": 0.2071, "step": 424700 }, { "epoch": 2.39, "learning_rate": 2.612978296429957e-05, "loss": 0.2192, "step": 424800 }, { "epoch": 2.39, "learning_rate": 2.6124161734037112e-05, "loss": 0.2154, "step": 424900 }, { "epoch": 2.39, "learning_rate": 2.6118540503774658e-05, "loss": 0.2207, "step": 425000 }, { "epoch": 2.39, "learning_rate": 2.6112919273512204e-05, "loss": 0.2137, "step": 425100 }, { "epoch": 2.39, "learning_rate": 2.6107298043249744e-05, "loss": 0.2103, "step": 425200 }, { "epoch": 2.39, "learning_rate": 2.610167681298729e-05, "loss": 0.2124, "step": 425300 }, { "epoch": 2.39, "learning_rate": 2.6096055582724837e-05, "loss": 0.2115, "step": 425400 }, { "epoch": 2.39, "learning_rate": 2.6090490564765008e-05, "loss": 0.2124, "step": 425500 }, { "epoch": 2.39, "learning_rate": 2.6084869334502547e-05, "loss": 0.2111, "step": 425600 }, { "epoch": 2.39, "learning_rate": 2.6079248104240093e-05, "loss": 0.2147, "step": 425700 }, { "epoch": 2.39, "learning_rate": 2.607362687397764e-05, "loss": 0.213, "step": 425800 }, { "epoch": 2.39, "learning_rate": 2.6068005643715182e-05, "loss": 0.215, "step": 425900 }, { "epoch": 2.39, "learning_rate": 2.606238441345273e-05, "loss": 0.2121, "step": 426000 }, { "epoch": 2.4, "learning_rate": 2.6056763183190275e-05, "loss": 0.2155, "step": 426100 }, { "epoch": 2.4, "learning_rate": 2.605114195292782e-05, "loss": 0.2136, "step": 426200 }, { "epoch": 2.4, "learning_rate": 2.604552072266536e-05, "loss": 0.213, "step": 426300 }, { "epoch": 2.4, "learning_rate": 2.6039899492402907e-05, "loss": 0.221, "step": 426400 }, { "epoch": 2.4, "learning_rate": 2.6034278262140453e-05, "loss": 0.2201, "step": 426500 }, { "epoch": 2.4, "learning_rate": 2.6028657031877996e-05, "loss": 0.2102, "step": 426600 }, { "epoch": 2.4, "learning_rate": 2.6023092013918164e-05, "loss": 0.2136, "step": 426700 }, { "epoch": 2.4, "learning_rate": 2.6017470783655714e-05, "loss": 0.2098, "step": 426800 }, { "epoch": 2.4, "learning_rate": 2.601184955339326e-05, "loss": 0.2144, "step": 426900 }, { "epoch": 2.4, "learning_rate": 2.60062283231308e-05, "loss": 0.2123, "step": 427000 }, { "epoch": 2.4, "learning_rate": 2.6000607092868346e-05, "loss": 0.2126, "step": 427100 }, { "epoch": 2.4, "learning_rate": 2.5994985862605892e-05, "loss": 0.2108, "step": 427200 }, { "epoch": 2.4, "learning_rate": 2.5989364632343438e-05, "loss": 0.2175, "step": 427300 }, { "epoch": 2.4, "learning_rate": 2.5983743402080978e-05, "loss": 0.2152, "step": 427400 }, { "epoch": 2.4, "learning_rate": 2.5978122171818524e-05, "loss": 0.211, "step": 427500 }, { "epoch": 2.4, "learning_rate": 2.597250094155607e-05, "loss": 0.2168, "step": 427600 }, { "epoch": 2.4, "learning_rate": 2.5966879711293613e-05, "loss": 0.21, "step": 427700 }, { "epoch": 2.4, "learning_rate": 2.596125848103116e-05, "loss": 0.2138, "step": 427800 }, { "epoch": 2.41, "learning_rate": 2.5955637250768706e-05, "loss": 0.2135, "step": 427900 }, { "epoch": 2.41, "learning_rate": 2.5950016020506252e-05, "loss": 0.2063, "step": 428000 }, { "epoch": 2.41, "learning_rate": 2.594439479024379e-05, "loss": 0.216, "step": 428100 }, { "epoch": 2.41, "learning_rate": 2.5938773559981338e-05, "loss": 0.2158, "step": 428200 }, { "epoch": 2.41, "learning_rate": 2.5933152329718884e-05, "loss": 0.2132, "step": 428300 }, { "epoch": 2.41, "learning_rate": 2.5927531099456427e-05, "loss": 0.2159, "step": 428400 }, { "epoch": 2.41, "learning_rate": 2.5921909869193973e-05, "loss": 0.2136, "step": 428500 }, { "epoch": 2.41, "learning_rate": 2.591628863893152e-05, "loss": 0.2123, "step": 428600 }, { "epoch": 2.41, "learning_rate": 2.5910667408669066e-05, "loss": 0.2079, "step": 428700 }, { "epoch": 2.41, "learning_rate": 2.5905046178406605e-05, "loss": 0.2135, "step": 428800 }, { "epoch": 2.41, "learning_rate": 2.589942494814415e-05, "loss": 0.2109, "step": 428900 }, { "epoch": 2.41, "learning_rate": 2.5893803717881698e-05, "loss": 0.2156, "step": 429000 }, { "epoch": 2.41, "learning_rate": 2.588823869992187e-05, "loss": 0.2174, "step": 429100 }, { "epoch": 2.41, "learning_rate": 2.5882617469659408e-05, "loss": 0.2182, "step": 429200 }, { "epoch": 2.41, "learning_rate": 2.5876996239396955e-05, "loss": 0.2178, "step": 429300 }, { "epoch": 2.41, "learning_rate": 2.58713750091345e-05, "loss": 0.2073, "step": 429400 }, { "epoch": 2.41, "learning_rate": 2.5865753778872044e-05, "loss": 0.2097, "step": 429500 }, { "epoch": 2.41, "learning_rate": 2.586013254860959e-05, "loss": 0.2133, "step": 429600 }, { "epoch": 2.42, "learning_rate": 2.5854511318347136e-05, "loss": 0.2083, "step": 429700 }, { "epoch": 2.42, "learning_rate": 2.5848890088084683e-05, "loss": 0.211, "step": 429800 }, { "epoch": 2.42, "learning_rate": 2.5843268857822222e-05, "loss": 0.2082, "step": 429900 }, { "epoch": 2.42, "learning_rate": 2.583764762755977e-05, "loss": 0.2185, "step": 430000 }, { "epoch": 2.42, "learning_rate": 2.5832026397297315e-05, "loss": 0.2151, "step": 430100 }, { "epoch": 2.42, "learning_rate": 2.5826405167034857e-05, "loss": 0.2117, "step": 430200 }, { "epoch": 2.42, "learning_rate": 2.5820783936772404e-05, "loss": 0.2115, "step": 430300 }, { "epoch": 2.42, "learning_rate": 2.581516270650995e-05, "loss": 0.2097, "step": 430400 }, { "epoch": 2.42, "learning_rate": 2.5809541476247496e-05, "loss": 0.2125, "step": 430500 }, { "epoch": 2.42, "learning_rate": 2.5803920245985036e-05, "loss": 0.2158, "step": 430600 }, { "epoch": 2.42, "learning_rate": 2.5798299015722582e-05, "loss": 0.213, "step": 430700 }, { "epoch": 2.42, "learning_rate": 2.579267778546013e-05, "loss": 0.2136, "step": 430800 }, { "epoch": 2.42, "learning_rate": 2.5787056555197668e-05, "loss": 0.2113, "step": 430900 }, { "epoch": 2.42, "learning_rate": 2.5781435324935214e-05, "loss": 0.2058, "step": 431000 }, { "epoch": 2.42, "learning_rate": 2.5775814094672764e-05, "loss": 0.2103, "step": 431100 }, { "epoch": 2.42, "learning_rate": 2.577019286441031e-05, "loss": 0.2151, "step": 431200 }, { "epoch": 2.42, "learning_rate": 2.576457163414785e-05, "loss": 0.2137, "step": 431300 }, { "epoch": 2.42, "learning_rate": 2.5758950403885396e-05, "loss": 0.2074, "step": 431400 }, { "epoch": 2.43, "learning_rate": 2.5753329173622942e-05, "loss": 0.2188, "step": 431500 }, { "epoch": 2.43, "learning_rate": 2.574770794336048e-05, "loss": 0.2134, "step": 431600 }, { "epoch": 2.43, "learning_rate": 2.5742086713098028e-05, "loss": 0.212, "step": 431700 }, { "epoch": 2.43, "learning_rate": 2.5736465482835574e-05, "loss": 0.2079, "step": 431800 }, { "epoch": 2.43, "learning_rate": 2.573084425257312e-05, "loss": 0.2109, "step": 431900 }, { "epoch": 2.43, "learning_rate": 2.5725223022310663e-05, "loss": 0.2143, "step": 432000 }, { "epoch": 2.43, "learning_rate": 2.571960179204821e-05, "loss": 0.2118, "step": 432100 }, { "epoch": 2.43, "learning_rate": 2.5713980561785756e-05, "loss": 0.2069, "step": 432200 }, { "epoch": 2.43, "learning_rate": 2.5708359331523295e-05, "loss": 0.2063, "step": 432300 }, { "epoch": 2.43, "learning_rate": 2.5702738101260842e-05, "loss": 0.2145, "step": 432400 }, { "epoch": 2.43, "learning_rate": 2.5697116870998388e-05, "loss": 0.211, "step": 432500 }, { "epoch": 2.43, "learning_rate": 2.5691495640735934e-05, "loss": 0.21, "step": 432600 }, { "epoch": 2.43, "learning_rate": 2.5685874410473477e-05, "loss": 0.2123, "step": 432700 }, { "epoch": 2.43, "learning_rate": 2.5680253180211023e-05, "loss": 0.2062, "step": 432800 }, { "epoch": 2.43, "learning_rate": 2.567463194994857e-05, "loss": 0.2112, "step": 432900 }, { "epoch": 2.43, "learning_rate": 2.566901071968611e-05, "loss": 0.2086, "step": 433000 }, { "epoch": 2.43, "learning_rate": 2.5663389489423655e-05, "loss": 0.2089, "step": 433100 }, { "epoch": 2.44, "learning_rate": 2.5657768259161202e-05, "loss": 0.2149, "step": 433200 }, { "epoch": 2.44, "learning_rate": 2.5652147028898748e-05, "loss": 0.2136, "step": 433300 }, { "epoch": 2.44, "learning_rate": 2.564652579863629e-05, "loss": 0.2177, "step": 433400 }, { "epoch": 2.44, "learning_rate": 2.5640904568373837e-05, "loss": 0.2167, "step": 433500 }, { "epoch": 2.44, "learning_rate": 2.5635283338111383e-05, "loss": 0.2186, "step": 433600 }, { "epoch": 2.44, "learning_rate": 2.5629662107848923e-05, "loss": 0.2095, "step": 433700 }, { "epoch": 2.44, "learning_rate": 2.562404087758647e-05, "loss": 0.2103, "step": 433800 }, { "epoch": 2.44, "learning_rate": 2.5618419647324016e-05, "loss": 0.2035, "step": 433900 }, { "epoch": 2.44, "learning_rate": 2.5612854629364187e-05, "loss": 0.2128, "step": 434000 }, { "epoch": 2.44, "learning_rate": 2.5607233399101726e-05, "loss": 0.2166, "step": 434100 }, { "epoch": 2.44, "learning_rate": 2.5601612168839272e-05, "loss": 0.2129, "step": 434200 }, { "epoch": 2.44, "learning_rate": 2.559599093857682e-05, "loss": 0.2079, "step": 434300 }, { "epoch": 2.44, "learning_rate": 2.5590369708314365e-05, "loss": 0.2145, "step": 434400 }, { "epoch": 2.44, "learning_rate": 2.5584748478051908e-05, "loss": 0.2121, "step": 434500 }, { "epoch": 2.44, "learning_rate": 2.5579127247789454e-05, "loss": 0.2081, "step": 434600 }, { "epoch": 2.44, "learning_rate": 2.5573506017527e-05, "loss": 0.2219, "step": 434700 }, { "epoch": 2.44, "learning_rate": 2.556788478726454e-05, "loss": 0.2162, "step": 434800 }, { "epoch": 2.44, "learning_rate": 2.5562263557002086e-05, "loss": 0.2135, "step": 434900 }, { "epoch": 2.45, "learning_rate": 2.5556642326739632e-05, "loss": 0.2173, "step": 435000 }, { "epoch": 2.45, "learning_rate": 2.5551021096477172e-05, "loss": 0.2135, "step": 435100 }, { "epoch": 2.45, "learning_rate": 2.5545399866214718e-05, "loss": 0.2091, "step": 435200 }, { "epoch": 2.45, "learning_rate": 2.5539778635952268e-05, "loss": 0.2125, "step": 435300 }, { "epoch": 2.45, "learning_rate": 2.5534213617992435e-05, "loss": 0.2141, "step": 435400 }, { "epoch": 2.45, "learning_rate": 2.552859238772998e-05, "loss": 0.2134, "step": 435500 }, { "epoch": 2.45, "learning_rate": 2.5522971157467525e-05, "loss": 0.2088, "step": 435600 }, { "epoch": 2.45, "learning_rate": 2.551734992720507e-05, "loss": 0.2143, "step": 435700 }, { "epoch": 2.45, "learning_rate": 2.5511728696942617e-05, "loss": 0.2148, "step": 435800 }, { "epoch": 2.45, "learning_rate": 2.5506107466680157e-05, "loss": 0.2131, "step": 435900 }, { "epoch": 2.45, "learning_rate": 2.5500486236417703e-05, "loss": 0.2076, "step": 436000 }, { "epoch": 2.45, "learning_rate": 2.549486500615525e-05, "loss": 0.215, "step": 436100 }, { "epoch": 2.45, "learning_rate": 2.5489243775892792e-05, "loss": 0.2104, "step": 436200 }, { "epoch": 2.45, "learning_rate": 2.548362254563034e-05, "loss": 0.2215, "step": 436300 }, { "epoch": 2.45, "learning_rate": 2.5478001315367885e-05, "loss": 0.2099, "step": 436400 }, { "epoch": 2.45, "learning_rate": 2.547238008510543e-05, "loss": 0.2108, "step": 436500 }, { "epoch": 2.45, "learning_rate": 2.546675885484297e-05, "loss": 0.216, "step": 436600 }, { "epoch": 2.45, "learning_rate": 2.5461137624580517e-05, "loss": 0.2143, "step": 436700 }, { "epoch": 2.46, "learning_rate": 2.5455516394318063e-05, "loss": 0.2187, "step": 436800 }, { "epoch": 2.46, "learning_rate": 2.5449895164055603e-05, "loss": 0.2049, "step": 436900 }, { "epoch": 2.46, "learning_rate": 2.5444330146095774e-05, "loss": 0.2059, "step": 437000 }, { "epoch": 2.46, "learning_rate": 2.543870891583332e-05, "loss": 0.2156, "step": 437100 }, { "epoch": 2.46, "learning_rate": 2.5433087685570866e-05, "loss": 0.2104, "step": 437200 }, { "epoch": 2.46, "learning_rate": 2.542746645530841e-05, "loss": 0.2164, "step": 437300 }, { "epoch": 2.46, "learning_rate": 2.5421845225045955e-05, "loss": 0.2079, "step": 437400 }, { "epoch": 2.46, "learning_rate": 2.54162239947835e-05, "loss": 0.216, "step": 437500 }, { "epoch": 2.46, "learning_rate": 2.5410602764521048e-05, "loss": 0.2103, "step": 437600 }, { "epoch": 2.46, "learning_rate": 2.5404981534258587e-05, "loss": 0.2123, "step": 437700 }, { "epoch": 2.46, "learning_rate": 2.5399360303996134e-05, "loss": 0.2125, "step": 437800 }, { "epoch": 2.46, "learning_rate": 2.539373907373368e-05, "loss": 0.2079, "step": 437900 }, { "epoch": 2.46, "learning_rate": 2.538811784347122e-05, "loss": 0.2063, "step": 438000 }, { "epoch": 2.46, "learning_rate": 2.5382496613208766e-05, "loss": 0.2094, "step": 438100 }, { "epoch": 2.46, "learning_rate": 2.5376875382946312e-05, "loss": 0.2118, "step": 438200 }, { "epoch": 2.46, "learning_rate": 2.5371254152683858e-05, "loss": 0.2137, "step": 438300 }, { "epoch": 2.46, "learning_rate": 2.53656329224214e-05, "loss": 0.2109, "step": 438400 }, { "epoch": 2.46, "learning_rate": 2.5360011692158947e-05, "loss": 0.2044, "step": 438500 }, { "epoch": 2.47, "learning_rate": 2.5354390461896494e-05, "loss": 0.2098, "step": 438600 }, { "epoch": 2.47, "learning_rate": 2.5348769231634033e-05, "loss": 0.2139, "step": 438700 }, { "epoch": 2.47, "learning_rate": 2.534314800137158e-05, "loss": 0.2104, "step": 438800 }, { "epoch": 2.47, "learning_rate": 2.5337526771109126e-05, "loss": 0.212, "step": 438900 }, { "epoch": 2.47, "learning_rate": 2.5331905540846672e-05, "loss": 0.2077, "step": 439000 }, { "epoch": 2.47, "learning_rate": 2.5326284310584215e-05, "loss": 0.2135, "step": 439100 }, { "epoch": 2.47, "learning_rate": 2.532066308032176e-05, "loss": 0.2062, "step": 439200 }, { "epoch": 2.47, "learning_rate": 2.5315041850059307e-05, "loss": 0.2138, "step": 439300 }, { "epoch": 2.47, "learning_rate": 2.5309420619796847e-05, "loss": 0.2164, "step": 439400 }, { "epoch": 2.47, "learning_rate": 2.5303799389534393e-05, "loss": 0.2085, "step": 439500 }, { "epoch": 2.47, "learning_rate": 2.529817815927194e-05, "loss": 0.2197, "step": 439600 }, { "epoch": 2.47, "learning_rate": 2.5292556929009486e-05, "loss": 0.2154, "step": 439700 }, { "epoch": 2.47, "learning_rate": 2.528693569874703e-05, "loss": 0.202, "step": 439800 }, { "epoch": 2.47, "learning_rate": 2.5281314468484575e-05, "loss": 0.2125, "step": 439900 }, { "epoch": 2.47, "learning_rate": 2.527569323822212e-05, "loss": 0.2094, "step": 440000 }, { "epoch": 2.47, "learning_rate": 2.527007200795966e-05, "loss": 0.2141, "step": 440100 }, { "epoch": 2.47, "learning_rate": 2.5264450777697207e-05, "loss": 0.207, "step": 440200 }, { "epoch": 2.48, "learning_rate": 2.5258829547434753e-05, "loss": 0.2114, "step": 440300 }, { "epoch": 2.48, "learning_rate": 2.52532083171723e-05, "loss": 0.2137, "step": 440400 }, { "epoch": 2.48, "learning_rate": 2.5247587086909842e-05, "loss": 0.211, "step": 440500 }, { "epoch": 2.48, "learning_rate": 2.524196585664739e-05, "loss": 0.2122, "step": 440600 }, { "epoch": 2.48, "learning_rate": 2.5236344626384935e-05, "loss": 0.213, "step": 440700 }, { "epoch": 2.48, "learning_rate": 2.5230723396122474e-05, "loss": 0.2143, "step": 440800 }, { "epoch": 2.48, "learning_rate": 2.522510216586002e-05, "loss": 0.2143, "step": 440900 }, { "epoch": 2.48, "learning_rate": 2.5219480935597567e-05, "loss": 0.2163, "step": 441000 }, { "epoch": 2.48, "learning_rate": 2.5213859705335113e-05, "loss": 0.2152, "step": 441100 }, { "epoch": 2.48, "learning_rate": 2.5208294687375278e-05, "loss": 0.2115, "step": 441200 }, { "epoch": 2.48, "learning_rate": 2.5202673457112824e-05, "loss": 0.2132, "step": 441300 }, { "epoch": 2.48, "learning_rate": 2.519705222685037e-05, "loss": 0.2143, "step": 441400 }, { "epoch": 2.48, "learning_rate": 2.5191430996587916e-05, "loss": 0.2146, "step": 441500 }, { "epoch": 2.48, "learning_rate": 2.518580976632546e-05, "loss": 0.2138, "step": 441600 }, { "epoch": 2.48, "learning_rate": 2.5180188536063006e-05, "loss": 0.2067, "step": 441700 }, { "epoch": 2.48, "learning_rate": 2.5174567305800552e-05, "loss": 0.2153, "step": 441800 }, { "epoch": 2.48, "learning_rate": 2.516894607553809e-05, "loss": 0.203, "step": 441900 }, { "epoch": 2.48, "learning_rate": 2.5163324845275638e-05, "loss": 0.2098, "step": 442000 }, { "epoch": 2.49, "learning_rate": 2.5157703615013184e-05, "loss": 0.2042, "step": 442100 }, { "epoch": 2.49, "learning_rate": 2.515208238475073e-05, "loss": 0.2129, "step": 442200 }, { "epoch": 2.49, "learning_rate": 2.514646115448827e-05, "loss": 0.2138, "step": 442300 }, { "epoch": 2.49, "learning_rate": 2.5140839924225816e-05, "loss": 0.2118, "step": 442400 }, { "epoch": 2.49, "learning_rate": 2.5135218693963362e-05, "loss": 0.2146, "step": 442500 }, { "epoch": 2.49, "learning_rate": 2.5129597463700905e-05, "loss": 0.2088, "step": 442600 }, { "epoch": 2.49, "learning_rate": 2.512397623343845e-05, "loss": 0.2085, "step": 442700 }, { "epoch": 2.49, "learning_rate": 2.5118355003175998e-05, "loss": 0.2067, "step": 442800 }, { "epoch": 2.49, "learning_rate": 2.5112733772913544e-05, "loss": 0.2128, "step": 442900 }, { "epoch": 2.49, "learning_rate": 2.5107112542651083e-05, "loss": 0.2124, "step": 443000 }, { "epoch": 2.49, "learning_rate": 2.510149131238863e-05, "loss": 0.2066, "step": 443100 }, { "epoch": 2.49, "learning_rate": 2.5095870082126176e-05, "loss": 0.2039, "step": 443200 }, { "epoch": 2.49, "learning_rate": 2.509024885186372e-05, "loss": 0.2146, "step": 443300 }, { "epoch": 2.49, "learning_rate": 2.5084627621601265e-05, "loss": 0.2067, "step": 443400 }, { "epoch": 2.49, "learning_rate": 2.507900639133881e-05, "loss": 0.2167, "step": 443500 }, { "epoch": 2.49, "learning_rate": 2.5073385161076358e-05, "loss": 0.2106, "step": 443600 }, { "epoch": 2.49, "learning_rate": 2.5067763930813897e-05, "loss": 0.2093, "step": 443700 }, { "epoch": 2.49, "learning_rate": 2.5062142700551443e-05, "loss": 0.2094, "step": 443800 }, { "epoch": 2.5, "learning_rate": 2.505652147028899e-05, "loss": 0.2156, "step": 443900 }, { "epoch": 2.5, "learning_rate": 2.5050900240026533e-05, "loss": 0.2103, "step": 444000 }, { "epoch": 2.5, "learning_rate": 2.504527900976408e-05, "loss": 0.2126, "step": 444100 }, { "epoch": 2.5, "learning_rate": 2.5039657779501625e-05, "loss": 0.2113, "step": 444200 }, { "epoch": 2.5, "learning_rate": 2.503403654923917e-05, "loss": 0.2021, "step": 444300 }, { "epoch": 2.5, "learning_rate": 2.502841531897671e-05, "loss": 0.2096, "step": 444400 }, { "epoch": 2.5, "learning_rate": 2.5022794088714257e-05, "loss": 0.2164, "step": 444500 }, { "epoch": 2.5, "learning_rate": 2.5017172858451804e-05, "loss": 0.2052, "step": 444600 }, { "epoch": 2.5, "learning_rate": 2.5011551628189346e-05, "loss": 0.2068, "step": 444700 }, { "epoch": 2.5, "learning_rate": 2.5005930397926893e-05, "loss": 0.2055, "step": 444800 }, { "epoch": 2.5, "learning_rate": 2.500030916766444e-05, "loss": 0.2072, "step": 444900 }, { "epoch": 2.5, "learning_rate": 2.4994687937401982e-05, "loss": 0.2086, "step": 445000 }, { "epoch": 2.5, "learning_rate": 2.4989066707139525e-05, "loss": 0.2141, "step": 445100 }, { "epoch": 2.5, "learning_rate": 2.498344547687707e-05, "loss": 0.2108, "step": 445200 }, { "epoch": 2.5, "learning_rate": 2.4977824246614614e-05, "loss": 0.2161, "step": 445300 }, { "epoch": 2.5, "learning_rate": 2.497220301635216e-05, "loss": 0.2074, "step": 445400 }, { "epoch": 2.5, "learning_rate": 2.4966581786089703e-05, "loss": 0.2146, "step": 445500 }, { "epoch": 2.5, "learning_rate": 2.496096055582725e-05, "loss": 0.2116, "step": 445600 }, { "epoch": 2.51, "learning_rate": 2.4955339325564796e-05, "loss": 0.2121, "step": 445700 }, { "epoch": 2.51, "learning_rate": 2.494971809530234e-05, "loss": 0.2147, "step": 445800 }, { "epoch": 2.51, "learning_rate": 2.4944096865039885e-05, "loss": 0.2048, "step": 445900 }, { "epoch": 2.51, "learning_rate": 2.4938475634777428e-05, "loss": 0.2137, "step": 446000 }, { "epoch": 2.51, "learning_rate": 2.4932854404514974e-05, "loss": 0.2155, "step": 446100 }, { "epoch": 2.51, "learning_rate": 2.4927233174252517e-05, "loss": 0.2098, "step": 446200 }, { "epoch": 2.51, "learning_rate": 2.4921611943990063e-05, "loss": 0.2086, "step": 446300 }, { "epoch": 2.51, "learning_rate": 2.491599071372761e-05, "loss": 0.209, "step": 446400 }, { "epoch": 2.51, "learning_rate": 2.4910369483465152e-05, "loss": 0.2157, "step": 446500 }, { "epoch": 2.51, "learning_rate": 2.49047482532027e-05, "loss": 0.2123, "step": 446600 }, { "epoch": 2.51, "learning_rate": 2.489912702294024e-05, "loss": 0.2115, "step": 446700 }, { "epoch": 2.51, "learning_rate": 2.4893505792677788e-05, "loss": 0.2061, "step": 446800 }, { "epoch": 2.51, "learning_rate": 2.488788456241533e-05, "loss": 0.2103, "step": 446900 }, { "epoch": 2.51, "learning_rate": 2.4882263332152877e-05, "loss": 0.2096, "step": 447000 }, { "epoch": 2.51, "learning_rate": 2.4876698314193045e-05, "loss": 0.2105, "step": 447100 }, { "epoch": 2.51, "learning_rate": 2.487107708393059e-05, "loss": 0.2198, "step": 447200 }, { "epoch": 2.51, "learning_rate": 2.4865455853668134e-05, "loss": 0.2101, "step": 447300 }, { "epoch": 2.51, "learning_rate": 2.485983462340568e-05, "loss": 0.211, "step": 447400 }, { "epoch": 2.52, "learning_rate": 2.4854213393143226e-05, "loss": 0.2108, "step": 447500 }, { "epoch": 2.52, "learning_rate": 2.484859216288077e-05, "loss": 0.2139, "step": 447600 }, { "epoch": 2.52, "learning_rate": 2.4842970932618315e-05, "loss": 0.2124, "step": 447700 }, { "epoch": 2.52, "learning_rate": 2.483734970235586e-05, "loss": 0.2112, "step": 447800 }, { "epoch": 2.52, "learning_rate": 2.4831728472093405e-05, "loss": 0.2147, "step": 447900 }, { "epoch": 2.52, "learning_rate": 2.4826107241830947e-05, "loss": 0.2132, "step": 448000 }, { "epoch": 2.52, "learning_rate": 2.482048601156849e-05, "loss": 0.2109, "step": 448100 }, { "epoch": 2.52, "learning_rate": 2.4814864781306037e-05, "loss": 0.2087, "step": 448200 }, { "epoch": 2.52, "learning_rate": 2.4809243551043583e-05, "loss": 0.2089, "step": 448300 }, { "epoch": 2.52, "learning_rate": 2.480362232078113e-05, "loss": 0.2133, "step": 448400 }, { "epoch": 2.52, "learning_rate": 2.4798001090518672e-05, "loss": 0.2079, "step": 448500 }, { "epoch": 2.52, "learning_rate": 2.479237986025622e-05, "loss": 0.209, "step": 448600 }, { "epoch": 2.52, "learning_rate": 2.478675862999376e-05, "loss": 0.2077, "step": 448700 }, { "epoch": 2.52, "learning_rate": 2.4781137399731304e-05, "loss": 0.2143, "step": 448800 }, { "epoch": 2.52, "learning_rate": 2.477551616946885e-05, "loss": 0.2138, "step": 448900 }, { "epoch": 2.52, "learning_rate": 2.4769894939206397e-05, "loss": 0.2078, "step": 449000 }, { "epoch": 2.52, "learning_rate": 2.4764273708943943e-05, "loss": 0.2096, "step": 449100 }, { "epoch": 2.53, "learning_rate": 2.4758708690984107e-05, "loss": 0.2106, "step": 449200 }, { "epoch": 2.53, "learning_rate": 2.4753087460721653e-05, "loss": 0.2033, "step": 449300 }, { "epoch": 2.53, "learning_rate": 2.47474662304592e-05, "loss": 0.2115, "step": 449400 }, { "epoch": 2.53, "learning_rate": 2.4741845000196746e-05, "loss": 0.2082, "step": 449500 }, { "epoch": 2.53, "learning_rate": 2.473622376993429e-05, "loss": 0.2139, "step": 449600 }, { "epoch": 2.53, "learning_rate": 2.4730602539671835e-05, "loss": 0.2079, "step": 449700 }, { "epoch": 2.53, "learning_rate": 2.4724981309409378e-05, "loss": 0.2119, "step": 449800 }, { "epoch": 2.53, "learning_rate": 2.471936007914692e-05, "loss": 0.2136, "step": 449900 }, { "epoch": 2.53, "learning_rate": 2.4713738848884467e-05, "loss": 0.219, "step": 450000 }, { "epoch": 2.53, "learning_rate": 2.4708117618622014e-05, "loss": 0.2133, "step": 450100 }, { "epoch": 2.53, "learning_rate": 2.470249638835956e-05, "loss": 0.214, "step": 450200 }, { "epoch": 2.53, "learning_rate": 2.4696875158097103e-05, "loss": 0.2106, "step": 450300 }, { "epoch": 2.53, "learning_rate": 2.469125392783465e-05, "loss": 0.208, "step": 450400 }, { "epoch": 2.53, "learning_rate": 2.4685632697572192e-05, "loss": 0.2086, "step": 450500 }, { "epoch": 2.53, "learning_rate": 2.4680011467309735e-05, "loss": 0.2151, "step": 450600 }, { "epoch": 2.53, "learning_rate": 2.467439023704728e-05, "loss": 0.2173, "step": 450700 }, { "epoch": 2.53, "learning_rate": 2.4668769006784824e-05, "loss": 0.2124, "step": 450800 }, { "epoch": 2.53, "learning_rate": 2.466314777652237e-05, "loss": 0.2064, "step": 450900 }, { "epoch": 2.54, "learning_rate": 2.4657526546259916e-05, "loss": 0.2087, "step": 451000 }, { "epoch": 2.54, "learning_rate": 2.4651905315997463e-05, "loss": 0.21, "step": 451100 }, { "epoch": 2.54, "learning_rate": 2.4646284085735006e-05, "loss": 0.2018, "step": 451200 }, { "epoch": 2.54, "learning_rate": 2.4640719067775177e-05, "loss": 0.2095, "step": 451300 }, { "epoch": 2.54, "learning_rate": 2.463509783751272e-05, "loss": 0.2078, "step": 451400 }, { "epoch": 2.54, "learning_rate": 2.4629476607250266e-05, "loss": 0.208, "step": 451500 }, { "epoch": 2.54, "learning_rate": 2.462385537698781e-05, "loss": 0.2086, "step": 451600 }, { "epoch": 2.54, "learning_rate": 2.461823414672535e-05, "loss": 0.2065, "step": 451700 }, { "epoch": 2.54, "learning_rate": 2.4612612916462898e-05, "loss": 0.2095, "step": 451800 }, { "epoch": 2.54, "learning_rate": 2.460699168620044e-05, "loss": 0.2044, "step": 451900 }, { "epoch": 2.54, "learning_rate": 2.4601370455937987e-05, "loss": 0.21, "step": 452000 }, { "epoch": 2.54, "learning_rate": 2.4595749225675533e-05, "loss": 0.2092, "step": 452100 }, { "epoch": 2.54, "learning_rate": 2.459012799541308e-05, "loss": 0.2085, "step": 452200 }, { "epoch": 2.54, "learning_rate": 2.4584506765150622e-05, "loss": 0.2085, "step": 452300 }, { "epoch": 2.54, "learning_rate": 2.4578885534888165e-05, "loss": 0.2098, "step": 452400 }, { "epoch": 2.54, "learning_rate": 2.457326430462571e-05, "loss": 0.2133, "step": 452500 }, { "epoch": 2.54, "learning_rate": 2.4567643074363255e-05, "loss": 0.208, "step": 452600 }, { "epoch": 2.54, "learning_rate": 2.45620218441008e-05, "loss": 0.2142, "step": 452700 }, { "epoch": 2.55, "learning_rate": 2.4556400613838347e-05, "loss": 0.209, "step": 452800 }, { "epoch": 2.55, "learning_rate": 2.455077938357589e-05, "loss": 0.2126, "step": 452900 }, { "epoch": 2.55, "learning_rate": 2.4545158153313436e-05, "loss": 0.2081, "step": 453000 }, { "epoch": 2.55, "learning_rate": 2.453953692305098e-05, "loss": 0.212, "step": 453100 }, { "epoch": 2.55, "learning_rate": 2.4533915692788525e-05, "loss": 0.1996, "step": 453200 }, { "epoch": 2.55, "learning_rate": 2.452829446252607e-05, "loss": 0.217, "step": 453300 }, { "epoch": 2.55, "learning_rate": 2.4522673232263615e-05, "loss": 0.2084, "step": 453400 }, { "epoch": 2.55, "learning_rate": 2.4517052002001157e-05, "loss": 0.202, "step": 453500 }, { "epoch": 2.55, "learning_rate": 2.4511430771738704e-05, "loss": 0.216, "step": 453600 }, { "epoch": 2.55, "learning_rate": 2.450580954147625e-05, "loss": 0.2084, "step": 453700 }, { "epoch": 2.55, "learning_rate": 2.4500188311213793e-05, "loss": 0.2167, "step": 453800 }, { "epoch": 2.55, "learning_rate": 2.449456708095134e-05, "loss": 0.206, "step": 453900 }, { "epoch": 2.55, "learning_rate": 2.4488945850688882e-05, "loss": 0.2079, "step": 454000 }, { "epoch": 2.55, "learning_rate": 2.448332462042643e-05, "loss": 0.2118, "step": 454100 }, { "epoch": 2.55, "learning_rate": 2.447770339016397e-05, "loss": 0.2106, "step": 454200 }, { "epoch": 2.55, "learning_rate": 2.4472082159901518e-05, "loss": 0.214, "step": 454300 }, { "epoch": 2.55, "learning_rate": 2.4466460929639064e-05, "loss": 0.2065, "step": 454400 }, { "epoch": 2.55, "learning_rate": 2.4460839699376607e-05, "loss": 0.208, "step": 454500 }, { "epoch": 2.56, "learning_rate": 2.4455218469114153e-05, "loss": 0.2131, "step": 454600 }, { "epoch": 2.56, "learning_rate": 2.4449597238851696e-05, "loss": 0.2077, "step": 454700 }, { "epoch": 2.56, "learning_rate": 2.4443976008589242e-05, "loss": 0.2077, "step": 454800 }, { "epoch": 2.56, "learning_rate": 2.4438354778326785e-05, "loss": 0.2091, "step": 454900 }, { "epoch": 2.56, "learning_rate": 2.4432733548064328e-05, "loss": 0.2095, "step": 455000 }, { "epoch": 2.56, "learning_rate": 2.4427112317801874e-05, "loss": 0.2122, "step": 455100 }, { "epoch": 2.56, "learning_rate": 2.442149108753942e-05, "loss": 0.2112, "step": 455200 }, { "epoch": 2.56, "learning_rate": 2.4415869857276967e-05, "loss": 0.2122, "step": 455300 }, { "epoch": 2.56, "learning_rate": 2.441024862701451e-05, "loss": 0.2151, "step": 455400 }, { "epoch": 2.56, "learning_rate": 2.4404627396752056e-05, "loss": 0.2146, "step": 455500 }, { "epoch": 2.56, "learning_rate": 2.43990061664896e-05, "loss": 0.2064, "step": 455600 }, { "epoch": 2.56, "learning_rate": 2.4393384936227142e-05, "loss": 0.212, "step": 455700 }, { "epoch": 2.56, "learning_rate": 2.4387763705964688e-05, "loss": 0.214, "step": 455800 }, { "epoch": 2.56, "learning_rate": 2.4382142475702234e-05, "loss": 0.2106, "step": 455900 }, { "epoch": 2.56, "learning_rate": 2.437652124543978e-05, "loss": 0.2097, "step": 456000 }, { "epoch": 2.56, "learning_rate": 2.4370900015177323e-05, "loss": 0.2108, "step": 456100 }, { "epoch": 2.56, "learning_rate": 2.436533499721749e-05, "loss": 0.2085, "step": 456200 }, { "epoch": 2.56, "learning_rate": 2.4359713766955037e-05, "loss": 0.2076, "step": 456300 }, { "epoch": 2.57, "learning_rate": 2.4354092536692584e-05, "loss": 0.2045, "step": 456400 }, { "epoch": 2.57, "learning_rate": 2.4348471306430127e-05, "loss": 0.2146, "step": 456500 }, { "epoch": 2.57, "learning_rate": 2.4342850076167673e-05, "loss": 0.2064, "step": 456600 }, { "epoch": 2.57, "learning_rate": 2.4337228845905216e-05, "loss": 0.2153, "step": 456700 }, { "epoch": 2.57, "learning_rate": 2.433160761564276e-05, "loss": 0.2073, "step": 456800 }, { "epoch": 2.57, "learning_rate": 2.4325986385380305e-05, "loss": 0.2103, "step": 456900 }, { "epoch": 2.57, "learning_rate": 2.432036515511785e-05, "loss": 0.2068, "step": 457000 }, { "epoch": 2.57, "learning_rate": 2.4314743924855397e-05, "loss": 0.2142, "step": 457100 }, { "epoch": 2.57, "learning_rate": 2.430917890689556e-05, "loss": 0.2011, "step": 457200 }, { "epoch": 2.57, "learning_rate": 2.430355767663311e-05, "loss": 0.2088, "step": 457300 }, { "epoch": 2.57, "learning_rate": 2.4297936446370654e-05, "loss": 0.2102, "step": 457400 }, { "epoch": 2.57, "learning_rate": 2.42923152161082e-05, "loss": 0.2162, "step": 457500 }, { "epoch": 2.57, "learning_rate": 2.4286693985845743e-05, "loss": 0.2077, "step": 457600 }, { "epoch": 2.57, "learning_rate": 2.4281072755583286e-05, "loss": 0.2105, "step": 457700 }, { "epoch": 2.57, "learning_rate": 2.4275451525320833e-05, "loss": 0.2139, "step": 457800 }, { "epoch": 2.57, "learning_rate": 2.4269830295058375e-05, "loss": 0.2114, "step": 457900 }, { "epoch": 2.57, "learning_rate": 2.426420906479592e-05, "loss": 0.2106, "step": 458000 }, { "epoch": 2.58, "learning_rate": 2.4258587834533468e-05, "loss": 0.2069, "step": 458100 }, { "epoch": 2.58, "learning_rate": 2.4252966604271014e-05, "loss": 0.2105, "step": 458200 }, { "epoch": 2.58, "learning_rate": 2.4247345374008557e-05, "loss": 0.2099, "step": 458300 }, { "epoch": 2.58, "learning_rate": 2.42417241437461e-05, "loss": 0.212, "step": 458400 }, { "epoch": 2.58, "learning_rate": 2.4236102913483646e-05, "loss": 0.2079, "step": 458500 }, { "epoch": 2.58, "learning_rate": 2.423048168322119e-05, "loss": 0.2122, "step": 458600 }, { "epoch": 2.58, "learning_rate": 2.4224860452958735e-05, "loss": 0.2086, "step": 458700 }, { "epoch": 2.58, "learning_rate": 2.421923922269628e-05, "loss": 0.2057, "step": 458800 }, { "epoch": 2.58, "learning_rate": 2.4213617992433828e-05, "loss": 0.2104, "step": 458900 }, { "epoch": 2.58, "learning_rate": 2.420799676217137e-05, "loss": 0.2095, "step": 459000 }, { "epoch": 2.58, "learning_rate": 2.420243174421154e-05, "loss": 0.2146, "step": 459100 }, { "epoch": 2.58, "learning_rate": 2.4196810513949085e-05, "loss": 0.211, "step": 459200 }, { "epoch": 2.58, "learning_rate": 2.419118928368663e-05, "loss": 0.2106, "step": 459300 }, { "epoch": 2.58, "learning_rate": 2.4185568053424174e-05, "loss": 0.2071, "step": 459400 }, { "epoch": 2.58, "learning_rate": 2.4179946823161717e-05, "loss": 0.2081, "step": 459500 }, { "epoch": 2.58, "learning_rate": 2.4174325592899263e-05, "loss": 0.2087, "step": 459600 }, { "epoch": 2.58, "learning_rate": 2.4168704362636806e-05, "loss": 0.2054, "step": 459700 }, { "epoch": 2.58, "learning_rate": 2.4163083132374352e-05, "loss": 0.2108, "step": 459800 }, { "epoch": 2.59, "learning_rate": 2.41574619021119e-05, "loss": 0.2105, "step": 459900 }, { "epoch": 2.59, "learning_rate": 2.4151840671849445e-05, "loss": 0.2045, "step": 460000 }, { "epoch": 2.59, "learning_rate": 2.4146219441586988e-05, "loss": 0.2112, "step": 460100 }, { "epoch": 2.59, "learning_rate": 2.414059821132453e-05, "loss": 0.2121, "step": 460200 }, { "epoch": 2.59, "learning_rate": 2.4134976981062077e-05, "loss": 0.215, "step": 460300 }, { "epoch": 2.59, "learning_rate": 2.412935575079962e-05, "loss": 0.2099, "step": 460400 }, { "epoch": 2.59, "learning_rate": 2.4123734520537166e-05, "loss": 0.2093, "step": 460500 }, { "epoch": 2.59, "learning_rate": 2.411811329027471e-05, "loss": 0.2142, "step": 460600 }, { "epoch": 2.59, "learning_rate": 2.4112492060012255e-05, "loss": 0.2117, "step": 460700 }, { "epoch": 2.59, "learning_rate": 2.41068708297498e-05, "loss": 0.2088, "step": 460800 }, { "epoch": 2.59, "learning_rate": 2.4101249599487344e-05, "loss": 0.2093, "step": 460900 }, { "epoch": 2.59, "learning_rate": 2.409562836922489e-05, "loss": 0.2049, "step": 461000 }, { "epoch": 2.59, "learning_rate": 2.4090007138962434e-05, "loss": 0.2101, "step": 461100 }, { "epoch": 2.59, "learning_rate": 2.408438590869998e-05, "loss": 0.2066, "step": 461200 }, { "epoch": 2.59, "learning_rate": 2.4078764678437523e-05, "loss": 0.2067, "step": 461300 }, { "epoch": 2.59, "learning_rate": 2.407314344817507e-05, "loss": 0.2119, "step": 461400 }, { "epoch": 2.59, "learning_rate": 2.4067522217912615e-05, "loss": 0.2114, "step": 461500 }, { "epoch": 2.59, "learning_rate": 2.4061900987650158e-05, "loss": 0.2054, "step": 461600 }, { "epoch": 2.6, "learning_rate": 2.4056279757387704e-05, "loss": 0.2145, "step": 461700 }, { "epoch": 2.6, "learning_rate": 2.4050658527125247e-05, "loss": 0.214, "step": 461800 }, { "epoch": 2.6, "learning_rate": 2.4045037296862794e-05, "loss": 0.2094, "step": 461900 }, { "epoch": 2.6, "learning_rate": 2.4039416066600337e-05, "loss": 0.2155, "step": 462000 }, { "epoch": 2.6, "learning_rate": 2.4033794836337883e-05, "loss": 0.212, "step": 462100 }, { "epoch": 2.6, "learning_rate": 2.4028173606075426e-05, "loss": 0.2077, "step": 462200 }, { "epoch": 2.6, "learning_rate": 2.4022552375812972e-05, "loss": 0.2088, "step": 462300 }, { "epoch": 2.6, "learning_rate": 2.4016931145550518e-05, "loss": 0.2096, "step": 462400 }, { "epoch": 2.6, "learning_rate": 2.4011366127590686e-05, "loss": 0.2103, "step": 462500 }, { "epoch": 2.6, "learning_rate": 2.4005744897328232e-05, "loss": 0.2081, "step": 462600 }, { "epoch": 2.6, "learning_rate": 2.4000123667065775e-05, "loss": 0.2061, "step": 462700 }, { "epoch": 2.6, "learning_rate": 2.399450243680332e-05, "loss": 0.211, "step": 462800 }, { "epoch": 2.6, "learning_rate": 2.3988881206540864e-05, "loss": 0.2116, "step": 462900 }, { "epoch": 2.6, "learning_rate": 2.398325997627841e-05, "loss": 0.2111, "step": 463000 }, { "epoch": 2.6, "learning_rate": 2.3977638746015953e-05, "loss": 0.2115, "step": 463100 }, { "epoch": 2.6, "learning_rate": 2.3972017515753496e-05, "loss": 0.2145, "step": 463200 }, { "epoch": 2.6, "learning_rate": 2.3966396285491043e-05, "loss": 0.2065, "step": 463300 }, { "epoch": 2.6, "learning_rate": 2.396077505522859e-05, "loss": 0.2081, "step": 463400 }, { "epoch": 2.61, "learning_rate": 2.3955153824966135e-05, "loss": 0.2119, "step": 463500 }, { "epoch": 2.61, "learning_rate": 2.3949532594703678e-05, "loss": 0.214, "step": 463600 }, { "epoch": 2.61, "learning_rate": 2.3943911364441224e-05, "loss": 0.2046, "step": 463700 }, { "epoch": 2.61, "learning_rate": 2.3938290134178767e-05, "loss": 0.2059, "step": 463800 }, { "epoch": 2.61, "learning_rate": 2.393266890391631e-05, "loss": 0.2086, "step": 463900 }, { "epoch": 2.61, "learning_rate": 2.3927047673653856e-05, "loss": 0.1978, "step": 464000 }, { "epoch": 2.61, "learning_rate": 2.3921426443391403e-05, "loss": 0.2105, "step": 464100 }, { "epoch": 2.61, "learning_rate": 2.391580521312895e-05, "loss": 0.2073, "step": 464200 }, { "epoch": 2.61, "learning_rate": 2.3910183982866492e-05, "loss": 0.204, "step": 464300 }, { "epoch": 2.61, "learning_rate": 2.3904562752604038e-05, "loss": 0.2013, "step": 464400 }, { "epoch": 2.61, "learning_rate": 2.389894152234158e-05, "loss": 0.2065, "step": 464500 }, { "epoch": 2.61, "learning_rate": 2.3893320292079124e-05, "loss": 0.2093, "step": 464600 }, { "epoch": 2.61, "learning_rate": 2.388769906181667e-05, "loss": 0.21, "step": 464700 }, { "epoch": 2.61, "learning_rate": 2.3882077831554213e-05, "loss": 0.2124, "step": 464800 }, { "epoch": 2.61, "learning_rate": 2.387645660129176e-05, "loss": 0.214, "step": 464900 }, { "epoch": 2.61, "learning_rate": 2.3870835371029306e-05, "loss": 0.2125, "step": 465000 }, { "epoch": 2.61, "learning_rate": 2.3865270353069473e-05, "loss": 0.2049, "step": 465100 }, { "epoch": 2.61, "learning_rate": 2.385964912280702e-05, "loss": 0.2092, "step": 465200 }, { "epoch": 2.62, "learning_rate": 2.3854027892544566e-05, "loss": 0.2057, "step": 465300 }, { "epoch": 2.62, "learning_rate": 2.384840666228211e-05, "loss": 0.2065, "step": 465400 }, { "epoch": 2.62, "learning_rate": 2.3842785432019655e-05, "loss": 0.2091, "step": 465500 }, { "epoch": 2.62, "learning_rate": 2.3837164201757198e-05, "loss": 0.2111, "step": 465600 }, { "epoch": 2.62, "learning_rate": 2.383154297149474e-05, "loss": 0.2133, "step": 465700 }, { "epoch": 2.62, "learning_rate": 2.3825921741232287e-05, "loss": 0.2118, "step": 465800 }, { "epoch": 2.62, "learning_rate": 2.382030051096983e-05, "loss": 0.2092, "step": 465900 }, { "epoch": 2.62, "learning_rate": 2.3814679280707376e-05, "loss": 0.2118, "step": 466000 }, { "epoch": 2.62, "learning_rate": 2.3809058050444922e-05, "loss": 0.2051, "step": 466100 }, { "epoch": 2.62, "learning_rate": 2.380343682018247e-05, "loss": 0.2075, "step": 466200 }, { "epoch": 2.62, "learning_rate": 2.379781558992001e-05, "loss": 0.2105, "step": 466300 }, { "epoch": 2.62, "learning_rate": 2.3792194359657554e-05, "loss": 0.2049, "step": 466400 }, { "epoch": 2.62, "learning_rate": 2.37865731293951e-05, "loss": 0.2099, "step": 466500 }, { "epoch": 2.62, "learning_rate": 2.3780951899132644e-05, "loss": 0.2059, "step": 466600 }, { "epoch": 2.62, "learning_rate": 2.377533066887019e-05, "loss": 0.2072, "step": 466700 }, { "epoch": 2.62, "learning_rate": 2.3769709438607736e-05, "loss": 0.2097, "step": 466800 }, { "epoch": 2.62, "learning_rate": 2.3764088208345282e-05, "loss": 0.2063, "step": 466900 }, { "epoch": 2.63, "learning_rate": 2.3758466978082825e-05, "loss": 0.2108, "step": 467000 }, { "epoch": 2.63, "learning_rate": 2.3752845747820368e-05, "loss": 0.2089, "step": 467100 }, { "epoch": 2.63, "learning_rate": 2.3747224517557914e-05, "loss": 0.2098, "step": 467200 }, { "epoch": 2.63, "learning_rate": 2.3741603287295457e-05, "loss": 0.2166, "step": 467300 }, { "epoch": 2.63, "learning_rate": 2.3735982057033004e-05, "loss": 0.2, "step": 467400 }, { "epoch": 2.63, "learning_rate": 2.3730360826770547e-05, "loss": 0.2105, "step": 467500 }, { "epoch": 2.63, "learning_rate": 2.3724739596508093e-05, "loss": 0.2079, "step": 467600 }, { "epoch": 2.63, "learning_rate": 2.371911836624564e-05, "loss": 0.2018, "step": 467700 }, { "epoch": 2.63, "learning_rate": 2.3713497135983182e-05, "loss": 0.2137, "step": 467800 }, { "epoch": 2.63, "learning_rate": 2.3707875905720728e-05, "loss": 0.206, "step": 467900 }, { "epoch": 2.63, "learning_rate": 2.370225467545827e-05, "loss": 0.2066, "step": 468000 }, { "epoch": 2.63, "learning_rate": 2.3696633445195817e-05, "loss": 0.2087, "step": 468100 }, { "epoch": 2.63, "learning_rate": 2.369101221493336e-05, "loss": 0.2058, "step": 468200 }, { "epoch": 2.63, "learning_rate": 2.3685390984670903e-05, "loss": 0.205, "step": 468300 }, { "epoch": 2.63, "learning_rate": 2.3679769754408453e-05, "loss": 0.2107, "step": 468400 }, { "epoch": 2.63, "learning_rate": 2.3674148524145996e-05, "loss": 0.2143, "step": 468500 }, { "epoch": 2.63, "learning_rate": 2.3668527293883542e-05, "loss": 0.2104, "step": 468600 }, { "epoch": 2.63, "learning_rate": 2.3662906063621085e-05, "loss": 0.2099, "step": 468700 }, { "epoch": 2.64, "learning_rate": 2.365728483335863e-05, "loss": 0.2058, "step": 468800 }, { "epoch": 2.64, "learning_rate": 2.3651663603096174e-05, "loss": 0.1996, "step": 468900 }, { "epoch": 2.64, "learning_rate": 2.3646042372833717e-05, "loss": 0.2115, "step": 469000 }, { "epoch": 2.64, "learning_rate": 2.3640421142571263e-05, "loss": 0.203, "step": 469100 }, { "epoch": 2.64, "learning_rate": 2.363479991230881e-05, "loss": 0.2076, "step": 469200 }, { "epoch": 2.64, "learning_rate": 2.3629178682046356e-05, "loss": 0.2122, "step": 469300 }, { "epoch": 2.64, "learning_rate": 2.36235574517839e-05, "loss": 0.2063, "step": 469400 }, { "epoch": 2.64, "learning_rate": 2.3617936221521445e-05, "loss": 0.2102, "step": 469500 }, { "epoch": 2.64, "learning_rate": 2.3612314991258988e-05, "loss": 0.2082, "step": 469600 }, { "epoch": 2.64, "learning_rate": 2.360669376099653e-05, "loss": 0.203, "step": 469700 }, { "epoch": 2.64, "learning_rate": 2.3601072530734077e-05, "loss": 0.2038, "step": 469800 }, { "epoch": 2.64, "learning_rate": 2.359545130047162e-05, "loss": 0.2047, "step": 469900 }, { "epoch": 2.64, "learning_rate": 2.358983007020917e-05, "loss": 0.2077, "step": 470000 }, { "epoch": 2.64, "learning_rate": 2.3584208839946712e-05, "loss": 0.2109, "step": 470100 }, { "epoch": 2.64, "learning_rate": 2.357858760968426e-05, "loss": 0.2075, "step": 470200 }, { "epoch": 2.64, "learning_rate": 2.35729663794218e-05, "loss": 0.2037, "step": 470300 }, { "epoch": 2.64, "learning_rate": 2.3567345149159345e-05, "loss": 0.2065, "step": 470400 }, { "epoch": 2.64, "learning_rate": 2.356172391889689e-05, "loss": 0.2033, "step": 470500 }, { "epoch": 2.65, "learning_rate": 2.3556102688634434e-05, "loss": 0.208, "step": 470600 }, { "epoch": 2.65, "learning_rate": 2.355048145837198e-05, "loss": 0.2038, "step": 470700 }, { "epoch": 2.65, "learning_rate": 2.3544860228109526e-05, "loss": 0.2034, "step": 470800 }, { "epoch": 2.65, "learning_rate": 2.3539238997847073e-05, "loss": 0.2154, "step": 470900 }, { "epoch": 2.65, "learning_rate": 2.3533617767584615e-05, "loss": 0.2053, "step": 471000 }, { "epoch": 2.65, "learning_rate": 2.352799653732216e-05, "loss": 0.2028, "step": 471100 }, { "epoch": 2.65, "learning_rate": 2.3522375307059705e-05, "loss": 0.2098, "step": 471200 }, { "epoch": 2.65, "learning_rate": 2.3516754076797247e-05, "loss": 0.2056, "step": 471300 }, { "epoch": 2.65, "learning_rate": 2.3511132846534794e-05, "loss": 0.2062, "step": 471400 }, { "epoch": 2.65, "learning_rate": 2.350556782857496e-05, "loss": 0.2116, "step": 471500 }, { "epoch": 2.65, "learning_rate": 2.3499946598312508e-05, "loss": 0.2112, "step": 471600 }, { "epoch": 2.65, "learning_rate": 2.349432536805005e-05, "loss": 0.2108, "step": 471700 }, { "epoch": 2.65, "learning_rate": 2.3488704137787597e-05, "loss": 0.1985, "step": 471800 }, { "epoch": 2.65, "learning_rate": 2.3483082907525143e-05, "loss": 0.204, "step": 471900 }, { "epoch": 2.65, "learning_rate": 2.347746167726269e-05, "loss": 0.2077, "step": 472000 }, { "epoch": 2.65, "learning_rate": 2.3471840447000232e-05, "loss": 0.2079, "step": 472100 }, { "epoch": 2.65, "learning_rate": 2.3466219216737775e-05, "loss": 0.2107, "step": 472200 }, { "epoch": 2.65, "learning_rate": 2.346059798647532e-05, "loss": 0.2064, "step": 472300 }, { "epoch": 2.66, "learning_rate": 2.3454976756212864e-05, "loss": 0.2068, "step": 472400 }, { "epoch": 2.66, "learning_rate": 2.344935552595041e-05, "loss": 0.2108, "step": 472500 }, { "epoch": 2.66, "learning_rate": 2.3443734295687957e-05, "loss": 0.2083, "step": 472600 }, { "epoch": 2.66, "learning_rate": 2.34381130654255e-05, "loss": 0.2048, "step": 472700 }, { "epoch": 2.66, "learning_rate": 2.3432491835163046e-05, "loss": 0.2018, "step": 472800 }, { "epoch": 2.66, "learning_rate": 2.342687060490059e-05, "loss": 0.206, "step": 472900 }, { "epoch": 2.66, "learning_rate": 2.3421249374638135e-05, "loss": 0.2079, "step": 473000 }, { "epoch": 2.66, "learning_rate": 2.3415628144375678e-05, "loss": 0.2119, "step": 473100 }, { "epoch": 2.66, "learning_rate": 2.3410006914113224e-05, "loss": 0.208, "step": 473200 }, { "epoch": 2.66, "learning_rate": 2.3404385683850767e-05, "loss": 0.2027, "step": 473300 }, { "epoch": 2.66, "learning_rate": 2.3398764453588314e-05, "loss": 0.2097, "step": 473400 }, { "epoch": 2.66, "learning_rate": 2.339319943562848e-05, "loss": 0.2079, "step": 473500 }, { "epoch": 2.66, "learning_rate": 2.3387578205366027e-05, "loss": 0.2034, "step": 473600 }, { "epoch": 2.66, "learning_rate": 2.3381956975103574e-05, "loss": 0.2073, "step": 473700 }, { "epoch": 2.66, "learning_rate": 2.3376335744841117e-05, "loss": 0.2091, "step": 473800 }, { "epoch": 2.66, "learning_rate": 2.3370714514578663e-05, "loss": 0.2054, "step": 473900 }, { "epoch": 2.66, "learning_rate": 2.3365093284316206e-05, "loss": 0.2072, "step": 474000 }, { "epoch": 2.67, "learning_rate": 2.3359472054053752e-05, "loss": 0.2096, "step": 474100 }, { "epoch": 2.67, "learning_rate": 2.335390703609392e-05, "loss": 0.2083, "step": 474200 }, { "epoch": 2.67, "learning_rate": 2.3348285805831466e-05, "loss": 0.2064, "step": 474300 }, { "epoch": 2.67, "learning_rate": 2.334266457556901e-05, "loss": 0.2094, "step": 474400 }, { "epoch": 2.67, "learning_rate": 2.3337043345306555e-05, "loss": 0.2106, "step": 474500 }, { "epoch": 2.67, "learning_rate": 2.3331422115044098e-05, "loss": 0.2069, "step": 474600 }, { "epoch": 2.67, "learning_rate": 2.3325800884781644e-05, "loss": 0.2079, "step": 474700 }, { "epoch": 2.67, "learning_rate": 2.332017965451919e-05, "loss": 0.2024, "step": 474800 }, { "epoch": 2.67, "learning_rate": 2.3314558424256733e-05, "loss": 0.2011, "step": 474900 }, { "epoch": 2.67, "learning_rate": 2.330893719399428e-05, "loss": 0.2074, "step": 475000 }, { "epoch": 2.67, "learning_rate": 2.3303315963731823e-05, "loss": 0.2032, "step": 475100 }, { "epoch": 2.67, "learning_rate": 2.329769473346937e-05, "loss": 0.2085, "step": 475200 }, { "epoch": 2.67, "learning_rate": 2.3292073503206912e-05, "loss": 0.2082, "step": 475300 }, { "epoch": 2.67, "learning_rate": 2.3286452272944458e-05, "loss": 0.2065, "step": 475400 }, { "epoch": 2.67, "learning_rate": 2.3280831042682e-05, "loss": 0.2137, "step": 475500 }, { "epoch": 2.67, "learning_rate": 2.3275209812419547e-05, "loss": 0.2117, "step": 475600 }, { "epoch": 2.67, "learning_rate": 2.3269588582157094e-05, "loss": 0.2059, "step": 475700 }, { "epoch": 2.67, "learning_rate": 2.3263967351894636e-05, "loss": 0.2095, "step": 475800 }, { "epoch": 2.68, "learning_rate": 2.3258346121632183e-05, "loss": 0.2071, "step": 475900 }, { "epoch": 2.68, "learning_rate": 2.3252724891369726e-05, "loss": 0.2064, "step": 476000 }, { "epoch": 2.68, "learning_rate": 2.3247103661107272e-05, "loss": 0.2071, "step": 476100 }, { "epoch": 2.68, "learning_rate": 2.3241482430844815e-05, "loss": 0.2062, "step": 476200 }, { "epoch": 2.68, "learning_rate": 2.323586120058236e-05, "loss": 0.2035, "step": 476300 }, { "epoch": 2.68, "learning_rate": 2.3230239970319907e-05, "loss": 0.2034, "step": 476400 }, { "epoch": 2.68, "learning_rate": 2.322461874005745e-05, "loss": 0.2067, "step": 476500 }, { "epoch": 2.68, "learning_rate": 2.3218997509794996e-05, "loss": 0.2061, "step": 476600 }, { "epoch": 2.68, "learning_rate": 2.321337627953254e-05, "loss": 0.2053, "step": 476700 }, { "epoch": 2.68, "learning_rate": 2.3207755049270086e-05, "loss": 0.2069, "step": 476800 }, { "epoch": 2.68, "learning_rate": 2.320213381900763e-05, "loss": 0.2067, "step": 476900 }, { "epoch": 2.68, "learning_rate": 2.319651258874517e-05, "loss": 0.1967, "step": 477000 }, { "epoch": 2.68, "learning_rate": 2.3190891358482718e-05, "loss": 0.2092, "step": 477100 }, { "epoch": 2.68, "learning_rate": 2.3185270128220264e-05, "loss": 0.2101, "step": 477200 }, { "epoch": 2.68, "learning_rate": 2.317964889795781e-05, "loss": 0.2075, "step": 477300 }, { "epoch": 2.68, "learning_rate": 2.3174027667695353e-05, "loss": 0.2093, "step": 477400 }, { "epoch": 2.68, "learning_rate": 2.3168406437432896e-05, "loss": 0.2051, "step": 477500 }, { "epoch": 2.68, "learning_rate": 2.3162785207170442e-05, "loss": 0.204, "step": 477600 }, { "epoch": 2.69, "learning_rate": 2.3157163976907985e-05, "loss": 0.212, "step": 477700 }, { "epoch": 2.69, "learning_rate": 2.315154274664553e-05, "loss": 0.2035, "step": 477800 }, { "epoch": 2.69, "learning_rate": 2.3145921516383078e-05, "loss": 0.2038, "step": 477900 }, { "epoch": 2.69, "learning_rate": 2.3140300286120624e-05, "loss": 0.2054, "step": 478000 }, { "epoch": 2.69, "learning_rate": 2.3134679055858167e-05, "loss": 0.2036, "step": 478100 }, { "epoch": 2.69, "learning_rate": 2.312905782559571e-05, "loss": 0.2072, "step": 478200 }, { "epoch": 2.69, "learning_rate": 2.3123436595333256e-05, "loss": 0.2052, "step": 478300 }, { "epoch": 2.69, "learning_rate": 2.31178153650708e-05, "loss": 0.2068, "step": 478400 }, { "epoch": 2.69, "learning_rate": 2.3112194134808345e-05, "loss": 0.2067, "step": 478500 }, { "epoch": 2.69, "learning_rate": 2.3106572904545888e-05, "loss": 0.2054, "step": 478600 }, { "epoch": 2.69, "learning_rate": 2.310100788658606e-05, "loss": 0.2028, "step": 478700 }, { "epoch": 2.69, "learning_rate": 2.3095386656323602e-05, "loss": 0.2025, "step": 478800 }, { "epoch": 2.69, "learning_rate": 2.3089765426061148e-05, "loss": 0.2107, "step": 478900 }, { "epoch": 2.69, "learning_rate": 2.3084144195798695e-05, "loss": 0.2016, "step": 479000 }, { "epoch": 2.69, "learning_rate": 2.307852296553624e-05, "loss": 0.2078, "step": 479100 }, { "epoch": 2.69, "learning_rate": 2.3072901735273784e-05, "loss": 0.206, "step": 479200 }, { "epoch": 2.69, "learning_rate": 2.3067280505011327e-05, "loss": 0.2055, "step": 479300 }, { "epoch": 2.69, "learning_rate": 2.3061659274748873e-05, "loss": 0.203, "step": 479400 }, { "epoch": 2.7, "learning_rate": 2.3056094256789044e-05, "loss": 0.202, "step": 479500 }, { "epoch": 2.7, "learning_rate": 2.3050473026526587e-05, "loss": 0.2173, "step": 479600 }, { "epoch": 2.7, "learning_rate": 2.304485179626413e-05, "loss": 0.2029, "step": 479700 }, { "epoch": 2.7, "learning_rate": 2.3039230566001676e-05, "loss": 0.2085, "step": 479800 }, { "epoch": 2.7, "learning_rate": 2.303360933573922e-05, "loss": 0.2117, "step": 479900 }, { "epoch": 2.7, "learning_rate": 2.3027988105476765e-05, "loss": 0.2052, "step": 480000 }, { "epoch": 2.7, "learning_rate": 2.302236687521431e-05, "loss": 0.2033, "step": 480100 }, { "epoch": 2.7, "learning_rate": 2.3016745644951858e-05, "loss": 0.204, "step": 480200 }, { "epoch": 2.7, "learning_rate": 2.30111244146894e-05, "loss": 0.2052, "step": 480300 }, { "epoch": 2.7, "learning_rate": 2.3005503184426943e-05, "loss": 0.2061, "step": 480400 }, { "epoch": 2.7, "learning_rate": 2.299988195416449e-05, "loss": 0.2112, "step": 480500 }, { "epoch": 2.7, "learning_rate": 2.2994260723902033e-05, "loss": 0.2012, "step": 480600 }, { "epoch": 2.7, "learning_rate": 2.298863949363958e-05, "loss": 0.2067, "step": 480700 }, { "epoch": 2.7, "learning_rate": 2.2983018263377122e-05, "loss": 0.205, "step": 480800 }, { "epoch": 2.7, "learning_rate": 2.297739703311467e-05, "loss": 0.2133, "step": 480900 }, { "epoch": 2.7, "learning_rate": 2.2971775802852214e-05, "loss": 0.2094, "step": 481000 }, { "epoch": 2.7, "learning_rate": 2.2966154572589757e-05, "loss": 0.2109, "step": 481100 }, { "epoch": 2.7, "learning_rate": 2.2960533342327304e-05, "loss": 0.2094, "step": 481200 }, { "epoch": 2.71, "learning_rate": 2.2954912112064846e-05, "loss": 0.2169, "step": 481300 }, { "epoch": 2.71, "learning_rate": 2.2949290881802393e-05, "loss": 0.2003, "step": 481400 }, { "epoch": 2.71, "learning_rate": 2.2943669651539936e-05, "loss": 0.2057, "step": 481500 }, { "epoch": 2.71, "learning_rate": 2.2938048421277482e-05, "loss": 0.2028, "step": 481600 }, { "epoch": 2.71, "learning_rate": 2.2932427191015028e-05, "loss": 0.2058, "step": 481700 }, { "epoch": 2.71, "learning_rate": 2.292680596075257e-05, "loss": 0.2095, "step": 481800 }, { "epoch": 2.71, "learning_rate": 2.2921184730490117e-05, "loss": 0.209, "step": 481900 }, { "epoch": 2.71, "learning_rate": 2.291556350022766e-05, "loss": 0.2081, "step": 482000 }, { "epoch": 2.71, "learning_rate": 2.2909942269965206e-05, "loss": 0.2065, "step": 482100 }, { "epoch": 2.71, "learning_rate": 2.290432103970275e-05, "loss": 0.2059, "step": 482200 }, { "epoch": 2.71, "learning_rate": 2.2898699809440296e-05, "loss": 0.2069, "step": 482300 }, { "epoch": 2.71, "learning_rate": 2.2893134791480463e-05, "loss": 0.2135, "step": 482400 }, { "epoch": 2.71, "learning_rate": 2.288751356121801e-05, "loss": 0.2028, "step": 482500 }, { "epoch": 2.71, "learning_rate": 2.2881892330955552e-05, "loss": 0.202, "step": 482600 }, { "epoch": 2.71, "learning_rate": 2.28762711006931e-05, "loss": 0.2042, "step": 482700 }, { "epoch": 2.71, "learning_rate": 2.2870649870430645e-05, "loss": 0.2075, "step": 482800 }, { "epoch": 2.71, "learning_rate": 2.2865028640168188e-05, "loss": 0.2028, "step": 482900 }, { "epoch": 2.72, "learning_rate": 2.2859407409905734e-05, "loss": 0.1945, "step": 483000 }, { "epoch": 2.72, "learning_rate": 2.2853786179643277e-05, "loss": 0.206, "step": 483100 }, { "epoch": 2.72, "learning_rate": 2.2848164949380823e-05, "loss": 0.2019, "step": 483200 }, { "epoch": 2.72, "learning_rate": 2.2842543719118366e-05, "loss": 0.2051, "step": 483300 }, { "epoch": 2.72, "learning_rate": 2.283692248885591e-05, "loss": 0.2051, "step": 483400 }, { "epoch": 2.72, "learning_rate": 2.283130125859346e-05, "loss": 0.2089, "step": 483500 }, { "epoch": 2.72, "learning_rate": 2.2825680028331e-05, "loss": 0.1984, "step": 483600 }, { "epoch": 2.72, "learning_rate": 2.2820058798068548e-05, "loss": 0.2069, "step": 483700 }, { "epoch": 2.72, "learning_rate": 2.281443756780609e-05, "loss": 0.2077, "step": 483800 }, { "epoch": 2.72, "learning_rate": 2.2808816337543637e-05, "loss": 0.205, "step": 483900 }, { "epoch": 2.72, "learning_rate": 2.280319510728118e-05, "loss": 0.2113, "step": 484000 }, { "epoch": 2.72, "learning_rate": 2.2797573877018723e-05, "loss": 0.2084, "step": 484100 }, { "epoch": 2.72, "learning_rate": 2.279195264675627e-05, "loss": 0.2062, "step": 484200 }, { "epoch": 2.72, "learning_rate": 2.2786331416493815e-05, "loss": 0.2013, "step": 484300 }, { "epoch": 2.72, "learning_rate": 2.2780710186231362e-05, "loss": 0.2099, "step": 484400 }, { "epoch": 2.72, "learning_rate": 2.2775088955968905e-05, "loss": 0.2053, "step": 484500 }, { "epoch": 2.72, "learning_rate": 2.276946772570645e-05, "loss": 0.2104, "step": 484600 }, { "epoch": 2.72, "learning_rate": 2.2763846495443994e-05, "loss": 0.2068, "step": 484700 }, { "epoch": 2.73, "learning_rate": 2.2758225265181537e-05, "loss": 0.204, "step": 484800 }, { "epoch": 2.73, "learning_rate": 2.2752604034919083e-05, "loss": 0.2085, "step": 484900 }, { "epoch": 2.73, "learning_rate": 2.2746982804656626e-05, "loss": 0.2095, "step": 485000 }, { "epoch": 2.73, "learning_rate": 2.2741361574394175e-05, "loss": 0.2109, "step": 485100 }, { "epoch": 2.73, "learning_rate": 2.273574034413172e-05, "loss": 0.2012, "step": 485200 }, { "epoch": 2.73, "learning_rate": 2.2730119113869265e-05, "loss": 0.2071, "step": 485300 }, { "epoch": 2.73, "learning_rate": 2.2724497883606808e-05, "loss": 0.2055, "step": 485400 }, { "epoch": 2.73, "learning_rate": 2.271887665334435e-05, "loss": 0.2143, "step": 485500 }, { "epoch": 2.73, "learning_rate": 2.2713255423081897e-05, "loss": 0.2094, "step": 485600 }, { "epoch": 2.73, "learning_rate": 2.270763419281944e-05, "loss": 0.2008, "step": 485700 }, { "epoch": 2.73, "learning_rate": 2.2702012962556986e-05, "loss": 0.2109, "step": 485800 }, { "epoch": 2.73, "learning_rate": 2.2696391732294532e-05, "loss": 0.207, "step": 485900 }, { "epoch": 2.73, "learning_rate": 2.269077050203208e-05, "loss": 0.1964, "step": 486000 }, { "epoch": 2.73, "learning_rate": 2.268514927176962e-05, "loss": 0.2095, "step": 486100 }, { "epoch": 2.73, "learning_rate": 2.2679528041507164e-05, "loss": 0.2025, "step": 486200 }, { "epoch": 2.73, "learning_rate": 2.267390681124471e-05, "loss": 0.2005, "step": 486300 }, { "epoch": 2.73, "learning_rate": 2.2668285580982253e-05, "loss": 0.2051, "step": 486400 }, { "epoch": 2.73, "learning_rate": 2.26626643507198e-05, "loss": 0.2062, "step": 486500 }, { "epoch": 2.74, "learning_rate": 2.2657043120457343e-05, "loss": 0.2167, "step": 486600 }, { "epoch": 2.74, "learning_rate": 2.2651478102497514e-05, "loss": 0.2092, "step": 486700 }, { "epoch": 2.74, "learning_rate": 2.2645856872235056e-05, "loss": 0.204, "step": 486800 }, { "epoch": 2.74, "learning_rate": 2.2640235641972603e-05, "loss": 0.2103, "step": 486900 }, { "epoch": 2.74, "learning_rate": 2.263461441171015e-05, "loss": 0.2073, "step": 487000 }, { "epoch": 2.74, "learning_rate": 2.2628993181447695e-05, "loss": 0.2079, "step": 487100 }, { "epoch": 2.74, "learning_rate": 2.2623371951185238e-05, "loss": 0.2078, "step": 487200 }, { "epoch": 2.74, "learning_rate": 2.261775072092278e-05, "loss": 0.2077, "step": 487300 }, { "epoch": 2.74, "learning_rate": 2.2612129490660327e-05, "loss": 0.2096, "step": 487400 }, { "epoch": 2.74, "learning_rate": 2.260650826039787e-05, "loss": 0.2042, "step": 487500 }, { "epoch": 2.74, "learning_rate": 2.2600887030135416e-05, "loss": 0.2048, "step": 487600 }, { "epoch": 2.74, "learning_rate": 2.2595265799872963e-05, "loss": 0.2049, "step": 487700 }, { "epoch": 2.74, "learning_rate": 2.2589644569610506e-05, "loss": 0.2052, "step": 487800 }, { "epoch": 2.74, "learning_rate": 2.2584023339348052e-05, "loss": 0.2076, "step": 487900 }, { "epoch": 2.74, "learning_rate": 2.2578402109085595e-05, "loss": 0.2045, "step": 488000 }, { "epoch": 2.74, "learning_rate": 2.257278087882314e-05, "loss": 0.1991, "step": 488100 }, { "epoch": 2.74, "learning_rate": 2.2567159648560684e-05, "loss": 0.2075, "step": 488200 }, { "epoch": 2.74, "learning_rate": 2.256153841829823e-05, "loss": 0.2017, "step": 488300 }, { "epoch": 2.75, "learning_rate": 2.2555917188035773e-05, "loss": 0.2054, "step": 488400 }, { "epoch": 2.75, "learning_rate": 2.255029595777332e-05, "loss": 0.2032, "step": 488500 }, { "epoch": 2.75, "learning_rate": 2.2544674727510866e-05, "loss": 0.2093, "step": 488600 }, { "epoch": 2.75, "learning_rate": 2.253905349724841e-05, "loss": 0.2054, "step": 488700 }, { "epoch": 2.75, "learning_rate": 2.2533432266985955e-05, "loss": 0.2123, "step": 488800 }, { "epoch": 2.75, "learning_rate": 2.2527867249026123e-05, "loss": 0.1991, "step": 488900 }, { "epoch": 2.75, "learning_rate": 2.252224601876367e-05, "loss": 0.2079, "step": 489000 }, { "epoch": 2.75, "learning_rate": 2.251662478850121e-05, "loss": 0.2086, "step": 489100 }, { "epoch": 2.75, "learning_rate": 2.2511003558238758e-05, "loss": 0.1983, "step": 489200 }, { "epoch": 2.75, "learning_rate": 2.25053823279763e-05, "loss": 0.2134, "step": 489300 }, { "epoch": 2.75, "learning_rate": 2.2499761097713847e-05, "loss": 0.2066, "step": 489400 }, { "epoch": 2.75, "learning_rate": 2.249413986745139e-05, "loss": 0.2085, "step": 489500 }, { "epoch": 2.75, "learning_rate": 2.2488518637188936e-05, "loss": 0.2091, "step": 489600 }, { "epoch": 2.75, "learning_rate": 2.2482897406926483e-05, "loss": 0.2054, "step": 489700 }, { "epoch": 2.75, "learning_rate": 2.247733238896665e-05, "loss": 0.2028, "step": 489800 }, { "epoch": 2.75, "learning_rate": 2.2471711158704196e-05, "loss": 0.2081, "step": 489900 }, { "epoch": 2.75, "learning_rate": 2.246608992844174e-05, "loss": 0.2101, "step": 490000 }, { "epoch": 2.75, "learning_rate": 2.2460468698179286e-05, "loss": 0.2138, "step": 490100 }, { "epoch": 2.76, "learning_rate": 2.245484746791683e-05, "loss": 0.2037, "step": 490200 }, { "epoch": 2.76, "learning_rate": 2.2449226237654375e-05, "loss": 0.2038, "step": 490300 }, { "epoch": 2.76, "learning_rate": 2.2443605007391918e-05, "loss": 0.2017, "step": 490400 }, { "epoch": 2.76, "learning_rate": 2.2437983777129464e-05, "loss": 0.2006, "step": 490500 }, { "epoch": 2.76, "learning_rate": 2.2432362546867007e-05, "loss": 0.2106, "step": 490600 }, { "epoch": 2.76, "learning_rate": 2.2426741316604553e-05, "loss": 0.2137, "step": 490700 }, { "epoch": 2.76, "learning_rate": 2.24211200863421e-05, "loss": 0.2093, "step": 490800 }, { "epoch": 2.76, "learning_rate": 2.2415498856079642e-05, "loss": 0.1954, "step": 490900 }, { "epoch": 2.76, "learning_rate": 2.240987762581719e-05, "loss": 0.2112, "step": 491000 }, { "epoch": 2.76, "learning_rate": 2.240425639555473e-05, "loss": 0.2072, "step": 491100 }, { "epoch": 2.76, "learning_rate": 2.2398635165292278e-05, "loss": 0.1997, "step": 491200 }, { "epoch": 2.76, "learning_rate": 2.239301393502982e-05, "loss": 0.2118, "step": 491300 }, { "epoch": 2.76, "learning_rate": 2.2387392704767367e-05, "loss": 0.2056, "step": 491400 }, { "epoch": 2.76, "learning_rate": 2.2381771474504913e-05, "loss": 0.2086, "step": 491500 }, { "epoch": 2.76, "learning_rate": 2.2376150244242456e-05, "loss": 0.2024, "step": 491600 }, { "epoch": 2.76, "learning_rate": 2.2370529013980002e-05, "loss": 0.203, "step": 491700 }, { "epoch": 2.76, "learning_rate": 2.2364907783717545e-05, "loss": 0.2028, "step": 491800 }, { "epoch": 2.77, "learning_rate": 2.235928655345509e-05, "loss": 0.2078, "step": 491900 }, { "epoch": 2.77, "learning_rate": 2.2353665323192634e-05, "loss": 0.2141, "step": 492000 }, { "epoch": 2.77, "learning_rate": 2.2348044092930177e-05, "loss": 0.2018, "step": 492100 }, { "epoch": 2.77, "learning_rate": 2.2342422862667724e-05, "loss": 0.2082, "step": 492200 }, { "epoch": 2.77, "learning_rate": 2.233680163240527e-05, "loss": 0.2064, "step": 492300 }, { "epoch": 2.77, "learning_rate": 2.2331180402142816e-05, "loss": 0.2001, "step": 492400 }, { "epoch": 2.77, "learning_rate": 2.232555917188036e-05, "loss": 0.2059, "step": 492500 }, { "epoch": 2.77, "learning_rate": 2.2319937941617902e-05, "loss": 0.1987, "step": 492600 }, { "epoch": 2.77, "learning_rate": 2.2314316711355448e-05, "loss": 0.2099, "step": 492700 }, { "epoch": 2.77, "learning_rate": 2.230869548109299e-05, "loss": 0.2013, "step": 492800 }, { "epoch": 2.77, "learning_rate": 2.2303074250830537e-05, "loss": 0.201, "step": 492900 }, { "epoch": 2.77, "learning_rate": 2.2297453020568084e-05, "loss": 0.2048, "step": 493000 }, { "epoch": 2.77, "learning_rate": 2.229183179030563e-05, "loss": 0.2014, "step": 493100 }, { "epoch": 2.77, "learning_rate": 2.2286210560043173e-05, "loss": 0.2027, "step": 493200 }, { "epoch": 2.77, "learning_rate": 2.2280589329780716e-05, "loss": 0.209, "step": 493300 }, { "epoch": 2.77, "learning_rate": 2.2274968099518262e-05, "loss": 0.2001, "step": 493400 }, { "epoch": 2.77, "learning_rate": 2.2269346869255805e-05, "loss": 0.21, "step": 493500 }, { "epoch": 2.77, "learning_rate": 2.226372563899335e-05, "loss": 0.2032, "step": 493600 }, { "epoch": 2.78, "learning_rate": 2.2258104408730894e-05, "loss": 0.2077, "step": 493700 }, { "epoch": 2.78, "learning_rate": 2.2252539390771065e-05, "loss": 0.2036, "step": 493800 }, { "epoch": 2.78, "learning_rate": 2.2246918160508608e-05, "loss": 0.1981, "step": 493900 }, { "epoch": 2.78, "learning_rate": 2.2241296930246154e-05, "loss": 0.2045, "step": 494000 }, { "epoch": 2.78, "learning_rate": 2.22356756999837e-05, "loss": 0.209, "step": 494100 }, { "epoch": 2.78, "learning_rate": 2.2230054469721247e-05, "loss": 0.2046, "step": 494200 }, { "epoch": 2.78, "learning_rate": 2.222443323945879e-05, "loss": 0.2052, "step": 494300 }, { "epoch": 2.78, "learning_rate": 2.2218812009196333e-05, "loss": 0.2017, "step": 494400 }, { "epoch": 2.78, "learning_rate": 2.221319077893388e-05, "loss": 0.2045, "step": 494500 }, { "epoch": 2.78, "learning_rate": 2.220756954867142e-05, "loss": 0.2056, "step": 494600 }, { "epoch": 2.78, "learning_rate": 2.2201948318408968e-05, "loss": 0.2159, "step": 494700 }, { "epoch": 2.78, "learning_rate": 2.219632708814651e-05, "loss": 0.2042, "step": 494800 }, { "epoch": 2.78, "learning_rate": 2.2190705857884057e-05, "loss": 0.1989, "step": 494900 }, { "epoch": 2.78, "learning_rate": 2.2185140839924225e-05, "loss": 0.2105, "step": 495000 }, { "epoch": 2.78, "learning_rate": 2.217951960966177e-05, "loss": 0.1988, "step": 495100 }, { "epoch": 2.78, "learning_rate": 2.2173898379399317e-05, "loss": 0.2016, "step": 495200 }, { "epoch": 2.78, "learning_rate": 2.2168277149136864e-05, "loss": 0.2049, "step": 495300 }, { "epoch": 2.78, "learning_rate": 2.2162655918874406e-05, "loss": 0.2037, "step": 495400 }, { "epoch": 2.79, "learning_rate": 2.215703468861195e-05, "loss": 0.2057, "step": 495500 }, { "epoch": 2.79, "learning_rate": 2.215146967065212e-05, "loss": 0.2002, "step": 495600 }, { "epoch": 2.79, "learning_rate": 2.2145848440389667e-05, "loss": 0.2119, "step": 495700 }, { "epoch": 2.79, "learning_rate": 2.214022721012721e-05, "loss": 0.2008, "step": 495800 }, { "epoch": 2.79, "learning_rate": 2.2134605979864752e-05, "loss": 0.2026, "step": 495900 }, { "epoch": 2.79, "learning_rate": 2.21289847496023e-05, "loss": 0.2062, "step": 496000 }, { "epoch": 2.79, "learning_rate": 2.212336351933984e-05, "loss": 0.2051, "step": 496100 }, { "epoch": 2.79, "learning_rate": 2.2117742289077388e-05, "loss": 0.2067, "step": 496200 }, { "epoch": 2.79, "learning_rate": 2.2112121058814934e-05, "loss": 0.2001, "step": 496300 }, { "epoch": 2.79, "learning_rate": 2.210649982855248e-05, "loss": 0.2119, "step": 496400 }, { "epoch": 2.79, "learning_rate": 2.2100878598290023e-05, "loss": 0.2066, "step": 496500 }, { "epoch": 2.79, "learning_rate": 2.2095257368027566e-05, "loss": 0.2061, "step": 496600 }, { "epoch": 2.79, "learning_rate": 2.2089636137765113e-05, "loss": 0.2011, "step": 496700 }, { "epoch": 2.79, "learning_rate": 2.2084014907502655e-05, "loss": 0.2004, "step": 496800 }, { "epoch": 2.79, "learning_rate": 2.20783936772402e-05, "loss": 0.208, "step": 496900 }, { "epoch": 2.79, "learning_rate": 2.2072772446977748e-05, "loss": 0.2076, "step": 497000 }, { "epoch": 2.79, "learning_rate": 2.2067151216715294e-05, "loss": 0.2076, "step": 497100 }, { "epoch": 2.79, "learning_rate": 2.2061529986452837e-05, "loss": 0.203, "step": 497200 }, { "epoch": 2.8, "learning_rate": 2.205590875619038e-05, "loss": 0.2031, "step": 497300 }, { "epoch": 2.8, "learning_rate": 2.2050287525927926e-05, "loss": 0.2032, "step": 497400 }, { "epoch": 2.8, "learning_rate": 2.204466629566547e-05, "loss": 0.2017, "step": 497500 }, { "epoch": 2.8, "learning_rate": 2.2039045065403015e-05, "loss": 0.2021, "step": 497600 }, { "epoch": 2.8, "learning_rate": 2.203342383514056e-05, "loss": 0.2015, "step": 497700 }, { "epoch": 2.8, "learning_rate": 2.2027802604878105e-05, "loss": 0.2087, "step": 497800 }, { "epoch": 2.8, "learning_rate": 2.202218137461565e-05, "loss": 0.2091, "step": 497900 }, { "epoch": 2.8, "learning_rate": 2.2016560144353194e-05, "loss": 0.2066, "step": 498000 }, { "epoch": 2.8, "learning_rate": 2.201093891409074e-05, "loss": 0.2054, "step": 498100 }, { "epoch": 2.8, "learning_rate": 2.2005317683828283e-05, "loss": 0.21, "step": 498200 }, { "epoch": 2.8, "learning_rate": 2.199969645356583e-05, "loss": 0.2018, "step": 498300 }, { "epoch": 2.8, "learning_rate": 2.1994075223303372e-05, "loss": 0.2057, "step": 498400 }, { "epoch": 2.8, "learning_rate": 2.1988453993040915e-05, "loss": 0.2027, "step": 498500 }, { "epoch": 2.8, "learning_rate": 2.1982832762778465e-05, "loss": 0.2052, "step": 498600 }, { "epoch": 2.8, "learning_rate": 2.1977211532516008e-05, "loss": 0.2027, "step": 498700 }, { "epoch": 2.8, "learning_rate": 2.1971590302253554e-05, "loss": 0.1977, "step": 498800 }, { "epoch": 2.8, "learning_rate": 2.1965969071991097e-05, "loss": 0.2027, "step": 498900 }, { "epoch": 2.8, "learning_rate": 2.1960347841728643e-05, "loss": 0.2093, "step": 499000 }, { "epoch": 2.81, "learning_rate": 2.1954726611466186e-05, "loss": 0.2074, "step": 499100 }, { "epoch": 2.81, "learning_rate": 2.194910538120373e-05, "loss": 0.2088, "step": 499200 }, { "epoch": 2.81, "learning_rate": 2.1943484150941275e-05, "loss": 0.2064, "step": 499300 }, { "epoch": 2.81, "learning_rate": 2.193786292067882e-05, "loss": 0.2043, "step": 499400 }, { "epoch": 2.81, "learning_rate": 2.1932241690416368e-05, "loss": 0.2033, "step": 499500 }, { "epoch": 2.81, "learning_rate": 2.192662046015391e-05, "loss": 0.207, "step": 499600 }, { "epoch": 2.81, "learning_rate": 2.1920999229891457e-05, "loss": 0.2093, "step": 499700 }, { "epoch": 2.81, "learning_rate": 2.1915377999629e-05, "loss": 0.2036, "step": 499800 }, { "epoch": 2.81, "learning_rate": 2.1909756769366543e-05, "loss": 0.2023, "step": 499900 }, { "epoch": 2.81, "learning_rate": 2.190413553910409e-05, "loss": 0.2078, "step": 500000 }, { "epoch": 2.81, "learning_rate": 2.1898514308841632e-05, "loss": 0.2007, "step": 500100 }, { "epoch": 2.81, "learning_rate": 2.189289307857918e-05, "loss": 0.2031, "step": 500200 }, { "epoch": 2.81, "learning_rate": 2.1887271848316724e-05, "loss": 0.204, "step": 500300 }, { "epoch": 2.81, "learning_rate": 2.188165061805427e-05, "loss": 0.2007, "step": 500400 }, { "epoch": 2.81, "learning_rate": 2.1876029387791813e-05, "loss": 0.2034, "step": 500500 }, { "epoch": 2.81, "learning_rate": 2.1870408157529356e-05, "loss": 0.2098, "step": 500600 }, { "epoch": 2.81, "learning_rate": 2.1864786927266903e-05, "loss": 0.2037, "step": 500700 }, { "epoch": 2.82, "learning_rate": 2.1859221909307074e-05, "loss": 0.2022, "step": 500800 }, { "epoch": 2.82, "learning_rate": 2.1853600679044617e-05, "loss": 0.2039, "step": 500900 }, { "epoch": 2.82, "learning_rate": 2.184797944878216e-05, "loss": 0.2087, "step": 501000 }, { "epoch": 2.82, "learning_rate": 2.1842358218519706e-05, "loss": 0.1974, "step": 501100 }, { "epoch": 2.82, "learning_rate": 2.1836736988257252e-05, "loss": 0.2075, "step": 501200 }, { "epoch": 2.82, "learning_rate": 2.1831115757994798e-05, "loss": 0.1997, "step": 501300 }, { "epoch": 2.82, "learning_rate": 2.182549452773234e-05, "loss": 0.2109, "step": 501400 }, { "epoch": 2.82, "learning_rate": 2.1819873297469887e-05, "loss": 0.2018, "step": 501500 }, { "epoch": 2.82, "learning_rate": 2.181425206720743e-05, "loss": 0.2026, "step": 501600 }, { "epoch": 2.82, "learning_rate": 2.1808630836944973e-05, "loss": 0.2064, "step": 501700 }, { "epoch": 2.82, "learning_rate": 2.180300960668252e-05, "loss": 0.2027, "step": 501800 }, { "epoch": 2.82, "learning_rate": 2.1797388376420062e-05, "loss": 0.2055, "step": 501900 }, { "epoch": 2.82, "learning_rate": 2.179176714615761e-05, "loss": 0.2056, "step": 502000 }, { "epoch": 2.82, "learning_rate": 2.1786145915895155e-05, "loss": 0.2048, "step": 502100 }, { "epoch": 2.82, "learning_rate": 2.17805246856327e-05, "loss": 0.2035, "step": 502200 }, { "epoch": 2.82, "learning_rate": 2.1774903455370244e-05, "loss": 0.2083, "step": 502300 }, { "epoch": 2.82, "learning_rate": 2.1769282225107787e-05, "loss": 0.2018, "step": 502400 }, { "epoch": 2.82, "learning_rate": 2.1763660994845333e-05, "loss": 0.1995, "step": 502500 }, { "epoch": 2.83, "learning_rate": 2.1758039764582876e-05, "loss": 0.21, "step": 502600 }, { "epoch": 2.83, "learning_rate": 2.1752418534320422e-05, "loss": 0.2053, "step": 502700 }, { "epoch": 2.83, "learning_rate": 2.174685351636059e-05, "loss": 0.2108, "step": 502800 }, { "epoch": 2.83, "learning_rate": 2.1741232286098136e-05, "loss": 0.2015, "step": 502900 }, { "epoch": 2.83, "learning_rate": 2.173561105583568e-05, "loss": 0.2063, "step": 503000 }, { "epoch": 2.83, "learning_rate": 2.173004603787585e-05, "loss": 0.1988, "step": 503100 }, { "epoch": 2.83, "learning_rate": 2.1724424807613393e-05, "loss": 0.2042, "step": 503200 }, { "epoch": 2.83, "learning_rate": 2.171880357735094e-05, "loss": 0.2064, "step": 503300 }, { "epoch": 2.83, "learning_rate": 2.1713182347088486e-05, "loss": 0.2094, "step": 503400 }, { "epoch": 2.83, "learning_rate": 2.1707561116826032e-05, "loss": 0.2062, "step": 503500 }, { "epoch": 2.83, "learning_rate": 2.1701939886563575e-05, "loss": 0.2086, "step": 503600 }, { "epoch": 2.83, "learning_rate": 2.1696318656301118e-05, "loss": 0.2084, "step": 503700 }, { "epoch": 2.83, "learning_rate": 2.1690697426038664e-05, "loss": 0.199, "step": 503800 }, { "epoch": 2.83, "learning_rate": 2.1685076195776207e-05, "loss": 0.2011, "step": 503900 }, { "epoch": 2.83, "learning_rate": 2.1679454965513753e-05, "loss": 0.1982, "step": 504000 }, { "epoch": 2.83, "learning_rate": 2.1673833735251296e-05, "loss": 0.2018, "step": 504100 }, { "epoch": 2.83, "learning_rate": 2.1668212504988846e-05, "loss": 0.2116, "step": 504200 }, { "epoch": 2.83, "learning_rate": 2.166259127472639e-05, "loss": 0.2065, "step": 504300 }, { "epoch": 2.84, "learning_rate": 2.165697004446393e-05, "loss": 0.2061, "step": 504400 }, { "epoch": 2.84, "learning_rate": 2.1651348814201478e-05, "loss": 0.2046, "step": 504500 }, { "epoch": 2.84, "learning_rate": 2.164572758393902e-05, "loss": 0.2061, "step": 504600 }, { "epoch": 2.84, "learning_rate": 2.1640106353676567e-05, "loss": 0.2056, "step": 504700 }, { "epoch": 2.84, "learning_rate": 2.163448512341411e-05, "loss": 0.2113, "step": 504800 }, { "epoch": 2.84, "learning_rate": 2.1628863893151656e-05, "loss": 0.2002, "step": 504900 }, { "epoch": 2.84, "learning_rate": 2.1623242662889202e-05, "loss": 0.2009, "step": 505000 }, { "epoch": 2.84, "learning_rate": 2.1617621432626745e-05, "loss": 0.21, "step": 505100 }, { "epoch": 2.84, "learning_rate": 2.161200020236429e-05, "loss": 0.2026, "step": 505200 }, { "epoch": 2.84, "learning_rate": 2.1606378972101834e-05, "loss": 0.2065, "step": 505300 }, { "epoch": 2.84, "learning_rate": 2.160075774183938e-05, "loss": 0.2052, "step": 505400 }, { "epoch": 2.84, "learning_rate": 2.1595136511576924e-05, "loss": 0.1983, "step": 505500 }, { "epoch": 2.84, "learning_rate": 2.158951528131447e-05, "loss": 0.2045, "step": 505600 }, { "epoch": 2.84, "learning_rate": 2.1583894051052013e-05, "loss": 0.2018, "step": 505700 }, { "epoch": 2.84, "learning_rate": 2.157827282078956e-05, "loss": 0.2027, "step": 505800 }, { "epoch": 2.84, "learning_rate": 2.1572651590527105e-05, "loss": 0.2045, "step": 505900 }, { "epoch": 2.84, "learning_rate": 2.1567030360264648e-05, "loss": 0.201, "step": 506000 }, { "epoch": 2.84, "learning_rate": 2.1561409130002194e-05, "loss": 0.2049, "step": 506100 }, { "epoch": 2.85, "learning_rate": 2.1555787899739737e-05, "loss": 0.207, "step": 506200 }, { "epoch": 2.85, "learning_rate": 2.1550166669477284e-05, "loss": 0.1992, "step": 506300 }, { "epoch": 2.85, "learning_rate": 2.1544545439214827e-05, "loss": 0.1996, "step": 506400 }, { "epoch": 2.85, "learning_rate": 2.1538924208952373e-05, "loss": 0.2072, "step": 506500 }, { "epoch": 2.85, "learning_rate": 2.153330297868992e-05, "loss": 0.205, "step": 506600 }, { "epoch": 2.85, "learning_rate": 2.1527681748427462e-05, "loss": 0.2027, "step": 506700 }, { "epoch": 2.85, "learning_rate": 2.1522060518165008e-05, "loss": 0.2044, "step": 506800 }, { "epoch": 2.85, "learning_rate": 2.151643928790255e-05, "loss": 0.2056, "step": 506900 }, { "epoch": 2.85, "learning_rate": 2.1510818057640097e-05, "loss": 0.203, "step": 507000 }, { "epoch": 2.85, "learning_rate": 2.150519682737764e-05, "loss": 0.2025, "step": 507100 }, { "epoch": 2.85, "learning_rate": 2.1499575597115183e-05, "loss": 0.2056, "step": 507200 }, { "epoch": 2.85, "learning_rate": 2.149395436685273e-05, "loss": 0.2034, "step": 507300 }, { "epoch": 2.85, "learning_rate": 2.1488333136590276e-05, "loss": 0.2046, "step": 507400 }, { "epoch": 2.85, "learning_rate": 2.1482711906327822e-05, "loss": 0.2024, "step": 507500 }, { "epoch": 2.85, "learning_rate": 2.1477090676065365e-05, "loss": 0.2096, "step": 507600 }, { "epoch": 2.85, "learning_rate": 2.1471469445802908e-05, "loss": 0.1999, "step": 507700 }, { "epoch": 2.85, "learning_rate": 2.1465848215540454e-05, "loss": 0.2016, "step": 507800 }, { "epoch": 2.86, "learning_rate": 2.1460226985277997e-05, "loss": 0.2057, "step": 507900 }, { "epoch": 2.86, "learning_rate": 2.1454605755015543e-05, "loss": 0.2018, "step": 508000 }, { "epoch": 2.86, "learning_rate": 2.144898452475309e-05, "loss": 0.2074, "step": 508100 }, { "epoch": 2.86, "learning_rate": 2.1443363294490636e-05, "loss": 0.2042, "step": 508200 }, { "epoch": 2.86, "learning_rate": 2.143774206422818e-05, "loss": 0.2031, "step": 508300 }, { "epoch": 2.86, "learning_rate": 2.143212083396572e-05, "loss": 0.203, "step": 508400 }, { "epoch": 2.86, "learning_rate": 2.1426499603703268e-05, "loss": 0.209, "step": 508500 }, { "epoch": 2.86, "learning_rate": 2.142087837344081e-05, "loss": 0.2038, "step": 508600 }, { "epoch": 2.86, "learning_rate": 2.1415257143178357e-05, "loss": 0.2043, "step": 508700 }, { "epoch": 2.86, "learning_rate": 2.14096359129159e-05, "loss": 0.2034, "step": 508800 }, { "epoch": 2.86, "learning_rate": 2.1404014682653446e-05, "loss": 0.2077, "step": 508900 }, { "epoch": 2.86, "learning_rate": 2.1398393452390992e-05, "loss": 0.2079, "step": 509000 }, { "epoch": 2.86, "learning_rate": 2.1392772222128535e-05, "loss": 0.2047, "step": 509100 }, { "epoch": 2.86, "learning_rate": 2.138715099186608e-05, "loss": 0.2027, "step": 509200 }, { "epoch": 2.86, "learning_rate": 2.1381529761603625e-05, "loss": 0.2001, "step": 509300 }, { "epoch": 2.86, "learning_rate": 2.137590853134117e-05, "loss": 0.2037, "step": 509400 }, { "epoch": 2.86, "learning_rate": 2.1370287301078714e-05, "loss": 0.2091, "step": 509500 }, { "epoch": 2.86, "learning_rate": 2.1364722283118885e-05, "loss": 0.198, "step": 509600 }, { "epoch": 2.87, "learning_rate": 2.1359101052856428e-05, "loss": 0.2041, "step": 509700 }, { "epoch": 2.87, "learning_rate": 2.1353479822593974e-05, "loss": 0.2055, "step": 509800 }, { "epoch": 2.87, "learning_rate": 2.1347858592331517e-05, "loss": 0.2018, "step": 509900 }, { "epoch": 2.87, "learning_rate": 2.1342237362069063e-05, "loss": 0.2046, "step": 510000 }, { "epoch": 2.87, "learning_rate": 2.133661613180661e-05, "loss": 0.2022, "step": 510100 }, { "epoch": 2.87, "learning_rate": 2.1330994901544152e-05, "loss": 0.2096, "step": 510200 }, { "epoch": 2.87, "learning_rate": 2.13253736712817e-05, "loss": 0.2055, "step": 510300 }, { "epoch": 2.87, "learning_rate": 2.131975244101924e-05, "loss": 0.2051, "step": 510400 }, { "epoch": 2.87, "learning_rate": 2.1314131210756788e-05, "loss": 0.2047, "step": 510500 }, { "epoch": 2.87, "learning_rate": 2.130850998049433e-05, "loss": 0.2, "step": 510600 }, { "epoch": 2.87, "learning_rate": 2.1302888750231877e-05, "loss": 0.2065, "step": 510700 }, { "epoch": 2.87, "learning_rate": 2.1297267519969423e-05, "loss": 0.2046, "step": 510800 }, { "epoch": 2.87, "learning_rate": 2.1291646289706966e-05, "loss": 0.2073, "step": 510900 }, { "epoch": 2.87, "learning_rate": 2.1286025059444512e-05, "loss": 0.2045, "step": 511000 }, { "epoch": 2.87, "learning_rate": 2.1280403829182055e-05, "loss": 0.206, "step": 511100 }, { "epoch": 2.87, "learning_rate": 2.12747825989196e-05, "loss": 0.2048, "step": 511200 }, { "epoch": 2.87, "learning_rate": 2.1269161368657144e-05, "loss": 0.2073, "step": 511300 }, { "epoch": 2.87, "learning_rate": 2.126354013839469e-05, "loss": 0.2006, "step": 511400 }, { "epoch": 2.88, "learning_rate": 2.1257918908132233e-05, "loss": 0.204, "step": 511500 }, { "epoch": 2.88, "learning_rate": 2.125229767786978e-05, "loss": 0.2063, "step": 511600 }, { "epoch": 2.88, "learning_rate": 2.1246676447607326e-05, "loss": 0.2074, "step": 511700 }, { "epoch": 2.88, "learning_rate": 2.124105521734487e-05, "loss": 0.2064, "step": 511800 }, { "epoch": 2.88, "learning_rate": 2.1235433987082415e-05, "loss": 0.206, "step": 511900 }, { "epoch": 2.88, "learning_rate": 2.1229812756819958e-05, "loss": 0.2006, "step": 512000 }, { "epoch": 2.88, "learning_rate": 2.1224191526557504e-05, "loss": 0.2047, "step": 512100 }, { "epoch": 2.88, "learning_rate": 2.1218570296295047e-05, "loss": 0.1994, "step": 512200 }, { "epoch": 2.88, "learning_rate": 2.1212949066032594e-05, "loss": 0.2019, "step": 512300 }, { "epoch": 2.88, "learning_rate": 2.120732783577014e-05, "loss": 0.21, "step": 512400 }, { "epoch": 2.88, "learning_rate": 2.1201706605507683e-05, "loss": 0.2048, "step": 512500 }, { "epoch": 2.88, "learning_rate": 2.119608537524523e-05, "loss": 0.1953, "step": 512600 }, { "epoch": 2.88, "learning_rate": 2.1190464144982772e-05, "loss": 0.1997, "step": 512700 }, { "epoch": 2.88, "learning_rate": 2.1184842914720315e-05, "loss": 0.2027, "step": 512800 }, { "epoch": 2.88, "learning_rate": 2.117922168445786e-05, "loss": 0.2033, "step": 512900 }, { "epoch": 2.88, "learning_rate": 2.1173600454195404e-05, "loss": 0.2058, "step": 513000 }, { "epoch": 2.88, "learning_rate": 2.116797922393295e-05, "loss": 0.2077, "step": 513100 }, { "epoch": 2.88, "learning_rate": 2.1162357993670496e-05, "loss": 0.2047, "step": 513200 }, { "epoch": 2.89, "learning_rate": 2.1156736763408043e-05, "loss": 0.1971, "step": 513300 }, { "epoch": 2.89, "learning_rate": 2.1151115533145586e-05, "loss": 0.1979, "step": 513400 }, { "epoch": 2.89, "learning_rate": 2.114549430288313e-05, "loss": 0.1977, "step": 513500 }, { "epoch": 2.89, "learning_rate": 2.1139873072620675e-05, "loss": 0.204, "step": 513600 }, { "epoch": 2.89, "learning_rate": 2.1134308054660846e-05, "loss": 0.2016, "step": 513700 }, { "epoch": 2.89, "learning_rate": 2.112868682439839e-05, "loss": 0.2018, "step": 513800 }, { "epoch": 2.89, "learning_rate": 2.112306559413593e-05, "loss": 0.2012, "step": 513900 }, { "epoch": 2.89, "learning_rate": 2.1117444363873478e-05, "loss": 0.2028, "step": 514000 }, { "epoch": 2.89, "learning_rate": 2.111182313361102e-05, "loss": 0.1987, "step": 514100 }, { "epoch": 2.89, "learning_rate": 2.1106201903348567e-05, "loss": 0.1963, "step": 514200 }, { "epoch": 2.89, "learning_rate": 2.1100580673086113e-05, "loss": 0.2026, "step": 514300 }, { "epoch": 2.89, "learning_rate": 2.109495944282366e-05, "loss": 0.1986, "step": 514400 }, { "epoch": 2.89, "learning_rate": 2.1089338212561202e-05, "loss": 0.2053, "step": 514500 }, { "epoch": 2.89, "learning_rate": 2.1083716982298745e-05, "loss": 0.2082, "step": 514600 }, { "epoch": 2.89, "learning_rate": 2.107809575203629e-05, "loss": 0.2029, "step": 514700 }, { "epoch": 2.89, "learning_rate": 2.1072474521773835e-05, "loss": 0.2044, "step": 514800 }, { "epoch": 2.89, "learning_rate": 2.106685329151138e-05, "loss": 0.2005, "step": 514900 }, { "epoch": 2.89, "learning_rate": 2.1061232061248927e-05, "loss": 0.1996, "step": 515000 }, { "epoch": 2.9, "learning_rate": 2.1055610830986473e-05, "loss": 0.2032, "step": 515100 }, { "epoch": 2.9, "learning_rate": 2.1049989600724016e-05, "loss": 0.2044, "step": 515200 }, { "epoch": 2.9, "learning_rate": 2.104436837046156e-05, "loss": 0.2081, "step": 515300 }, { "epoch": 2.9, "learning_rate": 2.1038747140199105e-05, "loss": 0.1985, "step": 515400 }, { "epoch": 2.9, "learning_rate": 2.103312590993665e-05, "loss": 0.2034, "step": 515500 }, { "epoch": 2.9, "learning_rate": 2.1027504679674195e-05, "loss": 0.2034, "step": 515600 }, { "epoch": 2.9, "learning_rate": 2.1021883449411737e-05, "loss": 0.1969, "step": 515700 }, { "epoch": 2.9, "learning_rate": 2.1016262219149284e-05, "loss": 0.2015, "step": 515800 }, { "epoch": 2.9, "learning_rate": 2.101069720118945e-05, "loss": 0.1985, "step": 515900 }, { "epoch": 2.9, "learning_rate": 2.1005075970926998e-05, "loss": 0.2022, "step": 516000 }, { "epoch": 2.9, "learning_rate": 2.0999454740664544e-05, "loss": 0.2062, "step": 516100 }, { "epoch": 2.9, "learning_rate": 2.099383351040209e-05, "loss": 0.1992, "step": 516200 }, { "epoch": 2.9, "learning_rate": 2.0988212280139633e-05, "loss": 0.2095, "step": 516300 }, { "epoch": 2.9, "learning_rate": 2.0982591049877176e-05, "loss": 0.2018, "step": 516400 }, { "epoch": 2.9, "learning_rate": 2.0976969819614722e-05, "loss": 0.2002, "step": 516500 }, { "epoch": 2.9, "learning_rate": 2.0971348589352265e-05, "loss": 0.1942, "step": 516600 }, { "epoch": 2.9, "learning_rate": 2.096572735908981e-05, "loss": 0.1996, "step": 516700 }, { "epoch": 2.91, "learning_rate": 2.0960106128827354e-05, "loss": 0.1993, "step": 516800 }, { "epoch": 2.91, "learning_rate": 2.09544848985649e-05, "loss": 0.2, "step": 516900 }, { "epoch": 2.91, "learning_rate": 2.0948863668302447e-05, "loss": 0.2008, "step": 517000 }, { "epoch": 2.91, "learning_rate": 2.0943298650342615e-05, "loss": 0.2061, "step": 517100 }, { "epoch": 2.91, "learning_rate": 2.093767742008016e-05, "loss": 0.1981, "step": 517200 }, { "epoch": 2.91, "learning_rate": 2.0932056189817707e-05, "loss": 0.2066, "step": 517300 }, { "epoch": 2.91, "learning_rate": 2.092643495955525e-05, "loss": 0.2039, "step": 517400 }, { "epoch": 2.91, "learning_rate": 2.0920813729292793e-05, "loss": 0.2036, "step": 517500 }, { "epoch": 2.91, "learning_rate": 2.091519249903034e-05, "loss": 0.2025, "step": 517600 }, { "epoch": 2.91, "learning_rate": 2.0909571268767882e-05, "loss": 0.2037, "step": 517700 }, { "epoch": 2.91, "learning_rate": 2.0903950038505428e-05, "loss": 0.1987, "step": 517800 }, { "epoch": 2.91, "learning_rate": 2.0898328808242975e-05, "loss": 0.203, "step": 517900 }, { "epoch": 2.91, "learning_rate": 2.0892707577980517e-05, "loss": 0.2049, "step": 518000 }, { "epoch": 2.91, "learning_rate": 2.0887086347718064e-05, "loss": 0.2056, "step": 518100 }, { "epoch": 2.91, "learning_rate": 2.0881465117455607e-05, "loss": 0.2021, "step": 518200 }, { "epoch": 2.91, "learning_rate": 2.0875843887193153e-05, "loss": 0.2019, "step": 518300 }, { "epoch": 2.91, "learning_rate": 2.0870222656930696e-05, "loss": 0.203, "step": 518400 }, { "epoch": 2.91, "learning_rate": 2.0864601426668242e-05, "loss": 0.2053, "step": 518500 }, { "epoch": 2.92, "learning_rate": 2.085903640870841e-05, "loss": 0.1974, "step": 518600 }, { "epoch": 2.92, "learning_rate": 2.0853415178445956e-05, "loss": 0.2042, "step": 518700 }, { "epoch": 2.92, "learning_rate": 2.08477939481835e-05, "loss": 0.1945, "step": 518800 }, { "epoch": 2.92, "learning_rate": 2.0842172717921045e-05, "loss": 0.2026, "step": 518900 }, { "epoch": 2.92, "learning_rate": 2.083655148765859e-05, "loss": 0.2038, "step": 519000 }, { "epoch": 2.92, "learning_rate": 2.0830930257396134e-05, "loss": 0.2042, "step": 519100 }, { "epoch": 2.92, "learning_rate": 2.082530902713368e-05, "loss": 0.2014, "step": 519200 }, { "epoch": 2.92, "learning_rate": 2.0819687796871223e-05, "loss": 0.2057, "step": 519300 }, { "epoch": 2.92, "learning_rate": 2.081406656660877e-05, "loss": 0.2081, "step": 519400 }, { "epoch": 2.92, "learning_rate": 2.0808445336346313e-05, "loss": 0.2031, "step": 519500 }, { "epoch": 2.92, "learning_rate": 2.080282410608386e-05, "loss": 0.2097, "step": 519600 }, { "epoch": 2.92, "learning_rate": 2.0797202875821402e-05, "loss": 0.2021, "step": 519700 }, { "epoch": 2.92, "learning_rate": 2.0791581645558948e-05, "loss": 0.1986, "step": 519800 }, { "epoch": 2.92, "learning_rate": 2.0785960415296494e-05, "loss": 0.2025, "step": 519900 }, { "epoch": 2.92, "learning_rate": 2.0780339185034037e-05, "loss": 0.2019, "step": 520000 }, { "epoch": 2.92, "learning_rate": 2.0774717954771584e-05, "loss": 0.2088, "step": 520100 }, { "epoch": 2.92, "learning_rate": 2.0769096724509126e-05, "loss": 0.2065, "step": 520200 }, { "epoch": 2.92, "learning_rate": 2.0763475494246673e-05, "loss": 0.2036, "step": 520300 }, { "epoch": 2.93, "learning_rate": 2.0757854263984216e-05, "loss": 0.2006, "step": 520400 }, { "epoch": 2.93, "learning_rate": 2.0752233033721762e-05, "loss": 0.2001, "step": 520500 }, { "epoch": 2.93, "learning_rate": 2.0746611803459308e-05, "loss": 0.2018, "step": 520600 }, { "epoch": 2.93, "learning_rate": 2.074099057319685e-05, "loss": 0.2026, "step": 520700 }, { "epoch": 2.93, "learning_rate": 2.0735369342934397e-05, "loss": 0.2019, "step": 520800 }, { "epoch": 2.93, "learning_rate": 2.072974811267194e-05, "loss": 0.198, "step": 520900 }, { "epoch": 2.93, "learning_rate": 2.0724126882409486e-05, "loss": 0.2106, "step": 521000 }, { "epoch": 2.93, "learning_rate": 2.071850565214703e-05, "loss": 0.1999, "step": 521100 }, { "epoch": 2.93, "learning_rate": 2.0712884421884572e-05, "loss": 0.2031, "step": 521200 }, { "epoch": 2.93, "learning_rate": 2.070726319162212e-05, "loss": 0.2016, "step": 521300 }, { "epoch": 2.93, "learning_rate": 2.0701641961359665e-05, "loss": 0.2008, "step": 521400 }, { "epoch": 2.93, "learning_rate": 2.069602073109721e-05, "loss": 0.2015, "step": 521500 }, { "epoch": 2.93, "learning_rate": 2.0690399500834754e-05, "loss": 0.2009, "step": 521600 }, { "epoch": 2.93, "learning_rate": 2.06847782705723e-05, "loss": 0.2046, "step": 521700 }, { "epoch": 2.93, "learning_rate": 2.0679157040309843e-05, "loss": 0.1961, "step": 521800 }, { "epoch": 2.93, "learning_rate": 2.0673535810047386e-05, "loss": 0.2065, "step": 521900 }, { "epoch": 2.93, "learning_rate": 2.0667914579784932e-05, "loss": 0.1993, "step": 522000 }, { "epoch": 2.93, "learning_rate": 2.0662293349522475e-05, "loss": 0.1947, "step": 522100 }, { "epoch": 2.94, "learning_rate": 2.0656672119260025e-05, "loss": 0.2015, "step": 522200 }, { "epoch": 2.94, "learning_rate": 2.065110710130019e-05, "loss": 0.2056, "step": 522300 }, { "epoch": 2.94, "learning_rate": 2.0645485871037735e-05, "loss": 0.2038, "step": 522400 }, { "epoch": 2.94, "learning_rate": 2.063986464077528e-05, "loss": 0.2018, "step": 522500 }, { "epoch": 2.94, "learning_rate": 2.0634243410512828e-05, "loss": 0.2024, "step": 522600 }, { "epoch": 2.94, "learning_rate": 2.062862218025037e-05, "loss": 0.2018, "step": 522700 }, { "epoch": 2.94, "learning_rate": 2.0623000949987914e-05, "loss": 0.2045, "step": 522800 }, { "epoch": 2.94, "learning_rate": 2.061737971972546e-05, "loss": 0.2015, "step": 522900 }, { "epoch": 2.94, "learning_rate": 2.0611758489463003e-05, "loss": 0.2002, "step": 523000 }, { "epoch": 2.94, "learning_rate": 2.060613725920055e-05, "loss": 0.2021, "step": 523100 }, { "epoch": 2.94, "learning_rate": 2.0600516028938095e-05, "loss": 0.203, "step": 523200 }, { "epoch": 2.94, "learning_rate": 2.059489479867564e-05, "loss": 0.2013, "step": 523300 }, { "epoch": 2.94, "learning_rate": 2.0589273568413185e-05, "loss": 0.2034, "step": 523400 }, { "epoch": 2.94, "learning_rate": 2.0583652338150727e-05, "loss": 0.2013, "step": 523500 }, { "epoch": 2.94, "learning_rate": 2.0578031107888274e-05, "loss": 0.2018, "step": 523600 }, { "epoch": 2.94, "learning_rate": 2.0572409877625817e-05, "loss": 0.1984, "step": 523700 }, { "epoch": 2.94, "learning_rate": 2.0566788647363363e-05, "loss": 0.2017, "step": 523800 }, { "epoch": 2.94, "learning_rate": 2.0561167417100906e-05, "loss": 0.2014, "step": 523900 }, { "epoch": 2.95, "learning_rate": 2.0555546186838452e-05, "loss": 0.201, "step": 524000 }, { "epoch": 2.95, "learning_rate": 2.0549924956576e-05, "loss": 0.2024, "step": 524100 }, { "epoch": 2.95, "learning_rate": 2.054430372631354e-05, "loss": 0.2091, "step": 524200 }, { "epoch": 2.95, "learning_rate": 2.0538682496051088e-05, "loss": 0.1988, "step": 524300 }, { "epoch": 2.95, "learning_rate": 2.053311747809126e-05, "loss": 0.1992, "step": 524400 }, { "epoch": 2.95, "learning_rate": 2.05274962478288e-05, "loss": 0.2016, "step": 524500 }, { "epoch": 2.95, "learning_rate": 2.0521875017566344e-05, "loss": 0.2046, "step": 524600 }, { "epoch": 2.95, "learning_rate": 2.051625378730389e-05, "loss": 0.1969, "step": 524700 }, { "epoch": 2.95, "learning_rate": 2.0510632557041433e-05, "loss": 0.2045, "step": 524800 }, { "epoch": 2.95, "learning_rate": 2.050501132677898e-05, "loss": 0.2002, "step": 524900 }, { "epoch": 2.95, "learning_rate": 2.0499390096516523e-05, "loss": 0.2046, "step": 525000 }, { "epoch": 2.95, "learning_rate": 2.049376886625407e-05, "loss": 0.1939, "step": 525100 }, { "epoch": 2.95, "learning_rate": 2.0488147635991615e-05, "loss": 0.2033, "step": 525200 }, { "epoch": 2.95, "learning_rate": 2.0482526405729158e-05, "loss": 0.1999, "step": 525300 }, { "epoch": 2.95, "learning_rate": 2.0476905175466704e-05, "loss": 0.2022, "step": 525400 }, { "epoch": 2.95, "learning_rate": 2.0471283945204247e-05, "loss": 0.2013, "step": 525500 }, { "epoch": 2.95, "learning_rate": 2.0465662714941794e-05, "loss": 0.1994, "step": 525600 }, { "epoch": 2.96, "learning_rate": 2.0460041484679336e-05, "loss": 0.2009, "step": 525700 }, { "epoch": 2.96, "learning_rate": 2.0454420254416883e-05, "loss": 0.1992, "step": 525800 }, { "epoch": 2.96, "learning_rate": 2.044879902415443e-05, "loss": 0.2095, "step": 525900 }, { "epoch": 2.96, "learning_rate": 2.0443177793891972e-05, "loss": 0.2009, "step": 526000 }, { "epoch": 2.96, "learning_rate": 2.0437556563629518e-05, "loss": 0.2051, "step": 526100 }, { "epoch": 2.96, "learning_rate": 2.043193533336706e-05, "loss": 0.1965, "step": 526200 }, { "epoch": 2.96, "learning_rate": 2.0426314103104607e-05, "loss": 0.2029, "step": 526300 }, { "epoch": 2.96, "learning_rate": 2.042069287284215e-05, "loss": 0.1935, "step": 526400 }, { "epoch": 2.96, "learning_rate": 2.0415071642579696e-05, "loss": 0.2005, "step": 526500 }, { "epoch": 2.96, "learning_rate": 2.040945041231724e-05, "loss": 0.2018, "step": 526600 }, { "epoch": 2.96, "learning_rate": 2.0403829182054786e-05, "loss": 0.1969, "step": 526700 }, { "epoch": 2.96, "learning_rate": 2.0398207951792332e-05, "loss": 0.2069, "step": 526800 }, { "epoch": 2.96, "learning_rate": 2.03926429338325e-05, "loss": 0.2031, "step": 526900 }, { "epoch": 2.96, "learning_rate": 2.0387021703570046e-05, "loss": 0.2001, "step": 527000 }, { "epoch": 2.96, "learning_rate": 2.038140047330759e-05, "loss": 0.2014, "step": 527100 }, { "epoch": 2.96, "learning_rate": 2.0375779243045135e-05, "loss": 0.2062, "step": 527200 }, { "epoch": 2.96, "learning_rate": 2.0370158012782678e-05, "loss": 0.2047, "step": 527300 }, { "epoch": 2.96, "learning_rate": 2.0364536782520224e-05, "loss": 0.1988, "step": 527400 }, { "epoch": 2.97, "learning_rate": 2.0358915552257767e-05, "loss": 0.2047, "step": 527500 }, { "epoch": 2.97, "learning_rate": 2.035329432199531e-05, "loss": 0.1956, "step": 527600 }, { "epoch": 2.97, "learning_rate": 2.0347673091732856e-05, "loss": 0.204, "step": 527700 }, { "epoch": 2.97, "learning_rate": 2.0342051861470402e-05, "loss": 0.1973, "step": 527800 }, { "epoch": 2.97, "learning_rate": 2.033643063120795e-05, "loss": 0.1975, "step": 527900 }, { "epoch": 2.97, "learning_rate": 2.033080940094549e-05, "loss": 0.1988, "step": 528000 }, { "epoch": 2.97, "learning_rate": 2.0325188170683038e-05, "loss": 0.1986, "step": 528100 }, { "epoch": 2.97, "learning_rate": 2.031956694042058e-05, "loss": 0.2033, "step": 528200 }, { "epoch": 2.97, "learning_rate": 2.0313945710158124e-05, "loss": 0.2007, "step": 528300 }, { "epoch": 2.97, "learning_rate": 2.030832447989567e-05, "loss": 0.2025, "step": 528400 }, { "epoch": 2.97, "learning_rate": 2.0302703249633216e-05, "loss": 0.201, "step": 528500 }, { "epoch": 2.97, "learning_rate": 2.0297082019370763e-05, "loss": 0.1986, "step": 528600 }, { "epoch": 2.97, "learning_rate": 2.0291460789108305e-05, "loss": 0.198, "step": 528700 }, { "epoch": 2.97, "learning_rate": 2.0285839558845852e-05, "loss": 0.1944, "step": 528800 }, { "epoch": 2.97, "learning_rate": 2.0280218328583395e-05, "loss": 0.2019, "step": 528900 }, { "epoch": 2.97, "learning_rate": 2.0274597098320937e-05, "loss": 0.2053, "step": 529000 }, { "epoch": 2.97, "learning_rate": 2.0268975868058484e-05, "loss": 0.2017, "step": 529100 }, { "epoch": 2.97, "learning_rate": 2.0263354637796027e-05, "loss": 0.2001, "step": 529200 }, { "epoch": 2.98, "learning_rate": 2.0257733407533573e-05, "loss": 0.2023, "step": 529300 }, { "epoch": 2.98, "learning_rate": 2.025211217727112e-05, "loss": 0.2102, "step": 529400 }, { "epoch": 2.98, "learning_rate": 2.0246490947008665e-05, "loss": 0.2049, "step": 529500 }, { "epoch": 2.98, "learning_rate": 2.024086971674621e-05, "loss": 0.2079, "step": 529600 }, { "epoch": 2.98, "learning_rate": 2.023530469878638e-05, "loss": 0.2022, "step": 529700 }, { "epoch": 2.98, "learning_rate": 2.0229683468523922e-05, "loss": 0.2034, "step": 529800 }, { "epoch": 2.98, "learning_rate": 2.022406223826147e-05, "loss": 0.1966, "step": 529900 }, { "epoch": 2.98, "learning_rate": 2.021844100799901e-05, "loss": 0.2044, "step": 530000 }, { "epoch": 2.98, "learning_rate": 2.0212819777736554e-05, "loss": 0.2019, "step": 530100 }, { "epoch": 2.98, "learning_rate": 2.02071985474741e-05, "loss": 0.2035, "step": 530200 }, { "epoch": 2.98, "learning_rate": 2.0201577317211644e-05, "loss": 0.1978, "step": 530300 }, { "epoch": 2.98, "learning_rate": 2.0195956086949193e-05, "loss": 0.1966, "step": 530400 }, { "epoch": 2.98, "learning_rate": 2.0190334856686736e-05, "loss": 0.1989, "step": 530500 }, { "epoch": 2.98, "learning_rate": 2.0184713626424282e-05, "loss": 0.2027, "step": 530600 }, { "epoch": 2.98, "learning_rate": 2.0179092396161825e-05, "loss": 0.2092, "step": 530700 }, { "epoch": 2.98, "learning_rate": 2.0173471165899368e-05, "loss": 0.2047, "step": 530800 }, { "epoch": 2.98, "learning_rate": 2.0167849935636914e-05, "loss": 0.198, "step": 530900 }, { "epoch": 2.98, "learning_rate": 2.0162228705374457e-05, "loss": 0.2015, "step": 531000 }, { "epoch": 2.99, "learning_rate": 2.0156607475112004e-05, "loss": 0.1997, "step": 531100 }, { "epoch": 2.99, "learning_rate": 2.015098624484955e-05, "loss": 0.2018, "step": 531200 }, { "epoch": 2.99, "learning_rate": 2.0145365014587096e-05, "loss": 0.2055, "step": 531300 }, { "epoch": 2.99, "learning_rate": 2.013974378432464e-05, "loss": 0.1978, "step": 531400 }, { "epoch": 2.99, "learning_rate": 2.0134122554062182e-05, "loss": 0.2091, "step": 531500 }, { "epoch": 2.99, "learning_rate": 2.0128501323799728e-05, "loss": 0.2076, "step": 531600 }, { "epoch": 2.99, "learning_rate": 2.012288009353727e-05, "loss": 0.197, "step": 531700 }, { "epoch": 2.99, "learning_rate": 2.0117258863274817e-05, "loss": 0.2009, "step": 531800 }, { "epoch": 2.99, "learning_rate": 2.011163763301236e-05, "loss": 0.2034, "step": 531900 }, { "epoch": 2.99, "learning_rate": 2.0106016402749906e-05, "loss": 0.2015, "step": 532000 }, { "epoch": 2.99, "learning_rate": 2.0100395172487453e-05, "loss": 0.1995, "step": 532100 }, { "epoch": 2.99, "learning_rate": 2.0094773942224996e-05, "loss": 0.1945, "step": 532200 }, { "epoch": 2.99, "learning_rate": 2.0089152711962542e-05, "loss": 0.1973, "step": 532300 }, { "epoch": 2.99, "learning_rate": 2.0083531481700085e-05, "loss": 0.2011, "step": 532400 }, { "epoch": 2.99, "learning_rate": 2.007791025143763e-05, "loss": 0.2032, "step": 532500 }, { "epoch": 2.99, "learning_rate": 2.0072289021175174e-05, "loss": 0.1971, "step": 532600 }, { "epoch": 2.99, "learning_rate": 2.006666779091272e-05, "loss": 0.206, "step": 532700 }, { "epoch": 2.99, "learning_rate": 2.0061046560650267e-05, "loss": 0.2048, "step": 532800 }, { "epoch": 3.0, "learning_rate": 2.005542533038781e-05, "loss": 0.203, "step": 532900 }, { "epoch": 3.0, "learning_rate": 2.0049804100125356e-05, "loss": 0.1993, "step": 533000 }, { "epoch": 3.0, "learning_rate": 2.00441828698629e-05, "loss": 0.1955, "step": 533100 }, { "epoch": 3.0, "learning_rate": 2.0038561639600445e-05, "loss": 0.2115, "step": 533200 }, { "epoch": 3.0, "learning_rate": 2.0032940409337988e-05, "loss": 0.2027, "step": 533300 }, { "epoch": 3.0, "learning_rate": 2.002731917907553e-05, "loss": 0.2022, "step": 533400 }, { "epoch": 3.0, "learning_rate": 2.0021697948813077e-05, "loss": 0.2045, "step": 533500 }, { "epoch": 3.0, "learning_rate": 2.0016076718550623e-05, "loss": 0.2043, "step": 533600 }, { "epoch": 3.0, "eval_bleu": 77.0962, "eval_cer": 2.307, "eval_chrF": 95.38492169626633, "eval_gen_len": 16.7681, "eval_loss": 0.491799920797348, "eval_runtime": 7195.3567, "eval_samples_per_second": 34.745, "eval_steps_per_second": 0.543, "eval_wer": 12.7609, "step": 533691 }, { "epoch": 3.0, "learning_rate": 2.001045548828817e-05, "loss": 0.2026, "step": 533700 }, { "epoch": 3.0, "learning_rate": 2.0004834258025712e-05, "loss": 0.191, "step": 533800 }, { "epoch": 3.0, "learning_rate": 1.999921302776326e-05, "loss": 0.1869, "step": 533900 }, { "epoch": 3.0, "learning_rate": 1.99935917975008e-05, "loss": 0.1822, "step": 534000 }, { "epoch": 3.0, "learning_rate": 1.9987970567238344e-05, "loss": 0.1805, "step": 534100 }, { "epoch": 3.0, "learning_rate": 1.998234933697589e-05, "loss": 0.1901, "step": 534200 }, { "epoch": 3.0, "learning_rate": 1.9976728106713437e-05, "loss": 0.1843, "step": 534300 }, { "epoch": 3.0, "learning_rate": 1.9971106876450983e-05, "loss": 0.1858, "step": 534400 }, { "epoch": 3.0, "learning_rate": 1.9965485646188526e-05, "loss": 0.1856, "step": 534500 }, { "epoch": 3.01, "learning_rate": 1.9959864415926072e-05, "loss": 0.1902, "step": 534600 }, { "epoch": 3.01, "learning_rate": 1.9954243185663615e-05, "loss": 0.189, "step": 534700 }, { "epoch": 3.01, "learning_rate": 1.9948621955401158e-05, "loss": 0.1836, "step": 534800 }, { "epoch": 3.01, "learning_rate": 1.9943000725138704e-05, "loss": 0.1848, "step": 534900 }, { "epoch": 3.01, "learning_rate": 1.9937379494876247e-05, "loss": 0.1872, "step": 535000 }, { "epoch": 3.01, "learning_rate": 1.9931758264613794e-05, "loss": 0.1825, "step": 535100 }, { "epoch": 3.01, "learning_rate": 1.992613703435134e-05, "loss": 0.1763, "step": 535200 }, { "epoch": 3.01, "learning_rate": 1.9920515804088886e-05, "loss": 0.1904, "step": 535300 }, { "epoch": 3.01, "learning_rate": 1.991489457382643e-05, "loss": 0.1833, "step": 535400 }, { "epoch": 3.01, "learning_rate": 1.9909273343563972e-05, "loss": 0.1832, "step": 535500 }, { "epoch": 3.01, "learning_rate": 1.9903652113301518e-05, "loss": 0.1854, "step": 535600 }, { "epoch": 3.01, "learning_rate": 1.989803088303906e-05, "loss": 0.1907, "step": 535700 }, { "epoch": 3.01, "learning_rate": 1.9892409652776607e-05, "loss": 0.1883, "step": 535800 }, { "epoch": 3.01, "learning_rate": 1.9886844634816775e-05, "loss": 0.1766, "step": 535900 }, { "epoch": 3.01, "learning_rate": 1.988122340455432e-05, "loss": 0.1868, "step": 536000 }, { "epoch": 3.01, "learning_rate": 1.9875602174291864e-05, "loss": 0.1894, "step": 536100 }, { "epoch": 3.01, "learning_rate": 1.986998094402941e-05, "loss": 0.1837, "step": 536200 }, { "epoch": 3.01, "learning_rate": 1.9864359713766957e-05, "loss": 0.182, "step": 536300 }, { "epoch": 3.02, "learning_rate": 1.9858738483504503e-05, "loss": 0.1806, "step": 536400 }, { "epoch": 3.02, "learning_rate": 1.9853117253242046e-05, "loss": 0.1889, "step": 536500 }, { "epoch": 3.02, "learning_rate": 1.984749602297959e-05, "loss": 0.1811, "step": 536600 }, { "epoch": 3.02, "learning_rate": 1.9841874792717135e-05, "loss": 0.1867, "step": 536700 }, { "epoch": 3.02, "learning_rate": 1.9836253562454678e-05, "loss": 0.1883, "step": 536800 }, { "epoch": 3.02, "learning_rate": 1.9830632332192224e-05, "loss": 0.1781, "step": 536900 }, { "epoch": 3.02, "learning_rate": 1.982501110192977e-05, "loss": 0.1898, "step": 537000 }, { "epoch": 3.02, "learning_rate": 1.9819389871667317e-05, "loss": 0.1877, "step": 537100 }, { "epoch": 3.02, "learning_rate": 1.981376864140486e-05, "loss": 0.1797, "step": 537200 }, { "epoch": 3.02, "learning_rate": 1.9808147411142403e-05, "loss": 0.1896, "step": 537300 }, { "epoch": 3.02, "learning_rate": 1.980252618087995e-05, "loss": 0.1848, "step": 537400 }, { "epoch": 3.02, "learning_rate": 1.9796904950617492e-05, "loss": 0.1905, "step": 537500 }, { "epoch": 3.02, "learning_rate": 1.9791283720355038e-05, "loss": 0.1819, "step": 537600 }, { "epoch": 3.02, "learning_rate": 1.978566249009258e-05, "loss": 0.1879, "step": 537700 }, { "epoch": 3.02, "learning_rate": 1.9780041259830127e-05, "loss": 0.1805, "step": 537800 }, { "epoch": 3.02, "learning_rate": 1.9774420029567673e-05, "loss": 0.1817, "step": 537900 }, { "epoch": 3.02, "learning_rate": 1.9768798799305216e-05, "loss": 0.1916, "step": 538000 }, { "epoch": 3.02, "learning_rate": 1.9763177569042763e-05, "loss": 0.1881, "step": 538100 }, { "epoch": 3.03, "learning_rate": 1.975761255108293e-05, "loss": 0.1836, "step": 538200 }, { "epoch": 3.03, "learning_rate": 1.97520475331231e-05, "loss": 0.1816, "step": 538300 }, { "epoch": 3.03, "learning_rate": 1.9746426302860648e-05, "loss": 0.1832, "step": 538400 }, { "epoch": 3.03, "learning_rate": 1.974080507259819e-05, "loss": 0.1855, "step": 538500 }, { "epoch": 3.03, "learning_rate": 1.9735183842335733e-05, "loss": 0.1866, "step": 538600 }, { "epoch": 3.03, "learning_rate": 1.972956261207328e-05, "loss": 0.1856, "step": 538700 }, { "epoch": 3.03, "learning_rate": 1.9723941381810823e-05, "loss": 0.1846, "step": 538800 }, { "epoch": 3.03, "learning_rate": 1.971832015154837e-05, "loss": 0.1845, "step": 538900 }, { "epoch": 3.03, "learning_rate": 1.9712698921285912e-05, "loss": 0.1863, "step": 539000 }, { "epoch": 3.03, "learning_rate": 1.9707077691023458e-05, "loss": 0.1856, "step": 539100 }, { "epoch": 3.03, "learning_rate": 1.9701456460761004e-05, "loss": 0.1813, "step": 539200 }, { "epoch": 3.03, "learning_rate": 1.9695835230498547e-05, "loss": 0.1862, "step": 539300 }, { "epoch": 3.03, "learning_rate": 1.9690214000236093e-05, "loss": 0.1869, "step": 539400 }, { "epoch": 3.03, "learning_rate": 1.9684592769973636e-05, "loss": 0.1874, "step": 539500 }, { "epoch": 3.03, "learning_rate": 1.9678971539711183e-05, "loss": 0.1907, "step": 539600 }, { "epoch": 3.03, "learning_rate": 1.9673350309448725e-05, "loss": 0.19, "step": 539700 }, { "epoch": 3.03, "learning_rate": 1.9667729079186272e-05, "loss": 0.1875, "step": 539800 }, { "epoch": 3.03, "learning_rate": 1.9662107848923818e-05, "loss": 0.1842, "step": 539900 }, { "epoch": 3.04, "learning_rate": 1.965648661866136e-05, "loss": 0.1914, "step": 540000 }, { "epoch": 3.04, "learning_rate": 1.9650865388398907e-05, "loss": 0.186, "step": 540100 }, { "epoch": 3.04, "learning_rate": 1.964524415813645e-05, "loss": 0.1886, "step": 540200 }, { "epoch": 3.04, "learning_rate": 1.9639622927873996e-05, "loss": 0.1832, "step": 540300 }, { "epoch": 3.04, "learning_rate": 1.963400169761154e-05, "loss": 0.1872, "step": 540400 }, { "epoch": 3.04, "learning_rate": 1.9628380467349086e-05, "loss": 0.189, "step": 540500 }, { "epoch": 3.04, "learning_rate": 1.962275923708663e-05, "loss": 0.1847, "step": 540600 }, { "epoch": 3.04, "learning_rate": 1.9617138006824175e-05, "loss": 0.1868, "step": 540700 }, { "epoch": 3.04, "learning_rate": 1.961151677656172e-05, "loss": 0.1856, "step": 540800 }, { "epoch": 3.04, "learning_rate": 1.9605895546299264e-05, "loss": 0.1882, "step": 540900 }, { "epoch": 3.04, "learning_rate": 1.960027431603681e-05, "loss": 0.184, "step": 541000 }, { "epoch": 3.04, "learning_rate": 1.9594653085774353e-05, "loss": 0.1916, "step": 541100 }, { "epoch": 3.04, "learning_rate": 1.95890318555119e-05, "loss": 0.1896, "step": 541200 }, { "epoch": 3.04, "learning_rate": 1.9583410625249442e-05, "loss": 0.1859, "step": 541300 }, { "epoch": 3.04, "learning_rate": 1.9577789394986985e-05, "loss": 0.1809, "step": 541400 }, { "epoch": 3.04, "learning_rate": 1.9572168164724535e-05, "loss": 0.1942, "step": 541500 }, { "epoch": 3.04, "learning_rate": 1.9566546934462078e-05, "loss": 0.1889, "step": 541600 }, { "epoch": 3.05, "learning_rate": 1.9560925704199624e-05, "loss": 0.1861, "step": 541700 }, { "epoch": 3.05, "learning_rate": 1.9555304473937167e-05, "loss": 0.1845, "step": 541800 }, { "epoch": 3.05, "learning_rate": 1.9549683243674713e-05, "loss": 0.1849, "step": 541900 }, { "epoch": 3.05, "learning_rate": 1.9544062013412256e-05, "loss": 0.1907, "step": 542000 }, { "epoch": 3.05, "learning_rate": 1.95384407831498e-05, "loss": 0.1959, "step": 542100 }, { "epoch": 3.05, "learning_rate": 1.9532819552887345e-05, "loss": 0.1927, "step": 542200 }, { "epoch": 3.05, "learning_rate": 1.952719832262489e-05, "loss": 0.1861, "step": 542300 }, { "epoch": 3.05, "learning_rate": 1.9521577092362438e-05, "loss": 0.1894, "step": 542400 }, { "epoch": 3.05, "learning_rate": 1.951595586209998e-05, "loss": 0.1909, "step": 542500 }, { "epoch": 3.05, "learning_rate": 1.9510334631837523e-05, "loss": 0.1892, "step": 542600 }, { "epoch": 3.05, "learning_rate": 1.950471340157507e-05, "loss": 0.1847, "step": 542700 }, { "epoch": 3.05, "learning_rate": 1.9499092171312613e-05, "loss": 0.1858, "step": 542800 }, { "epoch": 3.05, "learning_rate": 1.9493527153352784e-05, "loss": 0.1863, "step": 542900 }, { "epoch": 3.05, "learning_rate": 1.9487905923090327e-05, "loss": 0.185, "step": 543000 }, { "epoch": 3.05, "learning_rate": 1.9482284692827873e-05, "loss": 0.1815, "step": 543100 }, { "epoch": 3.05, "learning_rate": 1.9476663462565416e-05, "loss": 0.1893, "step": 543200 }, { "epoch": 3.05, "learning_rate": 1.9471042232302962e-05, "loss": 0.1868, "step": 543300 }, { "epoch": 3.05, "learning_rate": 1.9465421002040508e-05, "loss": 0.1878, "step": 543400 }, { "epoch": 3.06, "learning_rate": 1.9459799771778055e-05, "loss": 0.1847, "step": 543500 }, { "epoch": 3.06, "learning_rate": 1.9454234753818222e-05, "loss": 0.1851, "step": 543600 }, { "epoch": 3.06, "learning_rate": 1.944861352355577e-05, "loss": 0.1829, "step": 543700 }, { "epoch": 3.06, "learning_rate": 1.944299229329331e-05, "loss": 0.1827, "step": 543800 }, { "epoch": 3.06, "learning_rate": 1.9437371063030858e-05, "loss": 0.1878, "step": 543900 }, { "epoch": 3.06, "learning_rate": 1.94317498327684e-05, "loss": 0.1849, "step": 544000 }, { "epoch": 3.06, "learning_rate": 1.9426128602505943e-05, "loss": 0.1816, "step": 544100 }, { "epoch": 3.06, "learning_rate": 1.942050737224349e-05, "loss": 0.1865, "step": 544200 }, { "epoch": 3.06, "learning_rate": 1.9414886141981033e-05, "loss": 0.1861, "step": 544300 }, { "epoch": 3.06, "learning_rate": 1.940926491171858e-05, "loss": 0.188, "step": 544400 }, { "epoch": 3.06, "learning_rate": 1.9403643681456125e-05, "loss": 0.1891, "step": 544500 }, { "epoch": 3.06, "learning_rate": 1.9398078663496293e-05, "loss": 0.1936, "step": 544600 }, { "epoch": 3.06, "learning_rate": 1.939245743323384e-05, "loss": 0.1854, "step": 544700 }, { "epoch": 3.06, "learning_rate": 1.9386836202971385e-05, "loss": 0.1909, "step": 544800 }, { "epoch": 3.06, "learning_rate": 1.9381214972708928e-05, "loss": 0.1874, "step": 544900 }, { "epoch": 3.06, "learning_rate": 1.9375593742446474e-05, "loss": 0.1829, "step": 545000 }, { "epoch": 3.06, "learning_rate": 1.9369972512184017e-05, "loss": 0.1822, "step": 545100 }, { "epoch": 3.06, "learning_rate": 1.936435128192156e-05, "loss": 0.1898, "step": 545200 }, { "epoch": 3.07, "learning_rate": 1.9358730051659107e-05, "loss": 0.1849, "step": 545300 }, { "epoch": 3.07, "learning_rate": 1.935310882139665e-05, "loss": 0.1917, "step": 545400 }, { "epoch": 3.07, "learning_rate": 1.93474875911342e-05, "loss": 0.1865, "step": 545500 }, { "epoch": 3.07, "learning_rate": 1.9341866360871742e-05, "loss": 0.1868, "step": 545600 }, { "epoch": 3.07, "learning_rate": 1.9336245130609288e-05, "loss": 0.1881, "step": 545700 }, { "epoch": 3.07, "learning_rate": 1.933062390034683e-05, "loss": 0.1929, "step": 545800 }, { "epoch": 3.07, "learning_rate": 1.9325002670084374e-05, "loss": 0.1837, "step": 545900 }, { "epoch": 3.07, "learning_rate": 1.931938143982192e-05, "loss": 0.189, "step": 546000 }, { "epoch": 3.07, "learning_rate": 1.9313760209559463e-05, "loss": 0.1876, "step": 546100 }, { "epoch": 3.07, "learning_rate": 1.930813897929701e-05, "loss": 0.1873, "step": 546200 }, { "epoch": 3.07, "learning_rate": 1.9302517749034556e-05, "loss": 0.1865, "step": 546300 }, { "epoch": 3.07, "learning_rate": 1.9296896518772102e-05, "loss": 0.1802, "step": 546400 }, { "epoch": 3.07, "learning_rate": 1.9291275288509645e-05, "loss": 0.1838, "step": 546500 }, { "epoch": 3.07, "learning_rate": 1.9285654058247188e-05, "loss": 0.19, "step": 546600 }, { "epoch": 3.07, "learning_rate": 1.9280032827984734e-05, "loss": 0.1845, "step": 546700 }, { "epoch": 3.07, "learning_rate": 1.9274411597722277e-05, "loss": 0.1844, "step": 546800 }, { "epoch": 3.07, "learning_rate": 1.9268790367459823e-05, "loss": 0.1834, "step": 546900 }, { "epoch": 3.07, "learning_rate": 1.9263169137197366e-05, "loss": 0.184, "step": 547000 }, { "epoch": 3.08, "learning_rate": 1.9257547906934912e-05, "loss": 0.1927, "step": 547100 }, { "epoch": 3.08, "learning_rate": 1.925192667667246e-05, "loss": 0.1861, "step": 547200 }, { "epoch": 3.08, "learning_rate": 1.924630544641e-05, "loss": 0.1944, "step": 547300 }, { "epoch": 3.08, "learning_rate": 1.9240740428450173e-05, "loss": 0.1898, "step": 547400 }, { "epoch": 3.08, "learning_rate": 1.9235119198187715e-05, "loss": 0.1864, "step": 547500 }, { "epoch": 3.08, "learning_rate": 1.9229497967925262e-05, "loss": 0.1839, "step": 547600 }, { "epoch": 3.08, "learning_rate": 1.9223876737662805e-05, "loss": 0.1808, "step": 547700 }, { "epoch": 3.08, "learning_rate": 1.921825550740035e-05, "loss": 0.1869, "step": 547800 }, { "epoch": 3.08, "learning_rate": 1.9212634277137894e-05, "loss": 0.1863, "step": 547900 }, { "epoch": 3.08, "learning_rate": 1.920701304687544e-05, "loss": 0.1809, "step": 548000 }, { "epoch": 3.08, "learning_rate": 1.9201391816612986e-05, "loss": 0.1801, "step": 548100 }, { "epoch": 3.08, "learning_rate": 1.919577058635053e-05, "loss": 0.1921, "step": 548200 }, { "epoch": 3.08, "learning_rate": 1.9190149356088076e-05, "loss": 0.1827, "step": 548300 }, { "epoch": 3.08, "learning_rate": 1.918452812582562e-05, "loss": 0.1813, "step": 548400 }, { "epoch": 3.08, "learning_rate": 1.9178906895563165e-05, "loss": 0.1841, "step": 548500 }, { "epoch": 3.08, "learning_rate": 1.9173285665300708e-05, "loss": 0.1807, "step": 548600 }, { "epoch": 3.08, "learning_rate": 1.9167664435038254e-05, "loss": 0.1832, "step": 548700 }, { "epoch": 3.08, "learning_rate": 1.9162043204775797e-05, "loss": 0.1849, "step": 548800 }, { "epoch": 3.09, "learning_rate": 1.9156421974513343e-05, "loss": 0.1817, "step": 548900 }, { "epoch": 3.09, "learning_rate": 1.915080074425089e-05, "loss": 0.1853, "step": 549000 }, { "epoch": 3.09, "learning_rate": 1.9145179513988432e-05, "loss": 0.1813, "step": 549100 }, { "epoch": 3.09, "learning_rate": 1.913955828372598e-05, "loss": 0.1872, "step": 549200 }, { "epoch": 3.09, "learning_rate": 1.913393705346352e-05, "loss": 0.1866, "step": 549300 }, { "epoch": 3.09, "learning_rate": 1.9128315823201068e-05, "loss": 0.1818, "step": 549400 }, { "epoch": 3.09, "learning_rate": 1.912269459293861e-05, "loss": 0.1818, "step": 549500 }, { "epoch": 3.09, "learning_rate": 1.9117073362676153e-05, "loss": 0.1792, "step": 549600 }, { "epoch": 3.09, "learning_rate": 1.91114521324137e-05, "loss": 0.1861, "step": 549700 }, { "epoch": 3.09, "learning_rate": 1.9105830902151246e-05, "loss": 0.1913, "step": 549800 }, { "epoch": 3.09, "learning_rate": 1.9100209671888792e-05, "loss": 0.1822, "step": 549900 }, { "epoch": 3.09, "learning_rate": 1.9094588441626335e-05, "loss": 0.1883, "step": 550000 }, { "epoch": 3.09, "learning_rate": 1.908896721136388e-05, "loss": 0.1885, "step": 550100 }, { "epoch": 3.09, "learning_rate": 1.9083345981101424e-05, "loss": 0.188, "step": 550200 }, { "epoch": 3.09, "learning_rate": 1.9077724750838967e-05, "loss": 0.1853, "step": 550300 }, { "epoch": 3.09, "learning_rate": 1.9072103520576513e-05, "loss": 0.1826, "step": 550400 }, { "epoch": 3.09, "learning_rate": 1.906648229031406e-05, "loss": 0.1834, "step": 550500 }, { "epoch": 3.1, "learning_rate": 1.9060861060051606e-05, "loss": 0.1847, "step": 550600 }, { "epoch": 3.1, "learning_rate": 1.905523982978915e-05, "loss": 0.1946, "step": 550700 }, { "epoch": 3.1, "learning_rate": 1.9049618599526695e-05, "loss": 0.183, "step": 550800 }, { "epoch": 3.1, "learning_rate": 1.9043997369264238e-05, "loss": 0.1896, "step": 550900 }, { "epoch": 3.1, "learning_rate": 1.903837613900178e-05, "loss": 0.1839, "step": 551000 }, { "epoch": 3.1, "learning_rate": 1.9032754908739327e-05, "loss": 0.1866, "step": 551100 }, { "epoch": 3.1, "learning_rate": 1.902713367847687e-05, "loss": 0.1885, "step": 551200 }, { "epoch": 3.1, "learning_rate": 1.9021512448214416e-05, "loss": 0.1809, "step": 551300 }, { "epoch": 3.1, "learning_rate": 1.9015891217951963e-05, "loss": 0.1771, "step": 551400 }, { "epoch": 3.1, "learning_rate": 1.901026998768951e-05, "loss": 0.1889, "step": 551500 }, { "epoch": 3.1, "learning_rate": 1.9004704969729677e-05, "loss": 0.1805, "step": 551600 }, { "epoch": 3.1, "learning_rate": 1.8999083739467223e-05, "loss": 0.1851, "step": 551700 }, { "epoch": 3.1, "learning_rate": 1.8993462509204766e-05, "loss": 0.1872, "step": 551800 }, { "epoch": 3.1, "learning_rate": 1.8987841278942312e-05, "loss": 0.18, "step": 551900 }, { "epoch": 3.1, "learning_rate": 1.8982220048679855e-05, "loss": 0.1888, "step": 552000 }, { "epoch": 3.1, "learning_rate": 1.8976598818417398e-05, "loss": 0.1844, "step": 552100 }, { "epoch": 3.1, "learning_rate": 1.8970977588154944e-05, "loss": 0.1894, "step": 552200 }, { "epoch": 3.1, "learning_rate": 1.8965356357892487e-05, "loss": 0.1913, "step": 552300 }, { "epoch": 3.11, "learning_rate": 1.8959735127630037e-05, "loss": 0.1904, "step": 552400 }, { "epoch": 3.11, "learning_rate": 1.895411389736758e-05, "loss": 0.1913, "step": 552500 }, { "epoch": 3.11, "learning_rate": 1.8948492667105122e-05, "loss": 0.1891, "step": 552600 }, { "epoch": 3.11, "learning_rate": 1.894287143684267e-05, "loss": 0.1914, "step": 552700 }, { "epoch": 3.11, "learning_rate": 1.893725020658021e-05, "loss": 0.1859, "step": 552800 }, { "epoch": 3.11, "learning_rate": 1.8931628976317758e-05, "loss": 0.1894, "step": 552900 }, { "epoch": 3.11, "learning_rate": 1.89260077460553e-05, "loss": 0.1877, "step": 553000 }, { "epoch": 3.11, "learning_rate": 1.8920386515792847e-05, "loss": 0.1873, "step": 553100 }, { "epoch": 3.11, "learning_rate": 1.8914765285530393e-05, "loss": 0.1894, "step": 553200 }, { "epoch": 3.11, "learning_rate": 1.8909144055267936e-05, "loss": 0.1841, "step": 553300 }, { "epoch": 3.11, "learning_rate": 1.8903522825005482e-05, "loss": 0.1908, "step": 553400 }, { "epoch": 3.11, "learning_rate": 1.8897901594743025e-05, "loss": 0.181, "step": 553500 }, { "epoch": 3.11, "learning_rate": 1.889228036448057e-05, "loss": 0.1883, "step": 553600 }, { "epoch": 3.11, "learning_rate": 1.8886659134218115e-05, "loss": 0.189, "step": 553700 }, { "epoch": 3.11, "learning_rate": 1.888103790395566e-05, "loss": 0.1856, "step": 553800 }, { "epoch": 3.11, "learning_rate": 1.8875416673693204e-05, "loss": 0.1885, "step": 553900 }, { "epoch": 3.11, "learning_rate": 1.886979544343075e-05, "loss": 0.1887, "step": 554000 }, { "epoch": 3.11, "learning_rate": 1.8864174213168296e-05, "loss": 0.179, "step": 554100 }, { "epoch": 3.12, "learning_rate": 1.885855298290584e-05, "loss": 0.1834, "step": 554200 }, { "epoch": 3.12, "learning_rate": 1.8852931752643385e-05, "loss": 0.1831, "step": 554300 }, { "epoch": 3.12, "learning_rate": 1.8847310522380928e-05, "loss": 0.1878, "step": 554400 }, { "epoch": 3.12, "learning_rate": 1.8841689292118475e-05, "loss": 0.1841, "step": 554500 }, { "epoch": 3.12, "learning_rate": 1.8836068061856017e-05, "loss": 0.1816, "step": 554600 }, { "epoch": 3.12, "learning_rate": 1.8830446831593564e-05, "loss": 0.1901, "step": 554700 }, { "epoch": 3.12, "learning_rate": 1.882482560133111e-05, "loss": 0.1777, "step": 554800 }, { "epoch": 3.12, "learning_rate": 1.8819204371068653e-05, "loss": 0.1867, "step": 554900 }, { "epoch": 3.12, "learning_rate": 1.88135831408062e-05, "loss": 0.191, "step": 555000 }, { "epoch": 3.12, "learning_rate": 1.8807961910543742e-05, "loss": 0.1818, "step": 555100 }, { "epoch": 3.12, "learning_rate": 1.880234068028129e-05, "loss": 0.19, "step": 555200 }, { "epoch": 3.12, "learning_rate": 1.879671945001883e-05, "loss": 0.1826, "step": 555300 }, { "epoch": 3.12, "learning_rate": 1.8791098219756374e-05, "loss": 0.1856, "step": 555400 }, { "epoch": 3.12, "learning_rate": 1.878547698949392e-05, "loss": 0.1838, "step": 555500 }, { "epoch": 3.12, "learning_rate": 1.8779855759231467e-05, "loss": 0.187, "step": 555600 }, { "epoch": 3.12, "learning_rate": 1.8774234528969013e-05, "loss": 0.1824, "step": 555700 }, { "epoch": 3.12, "learning_rate": 1.8768613298706556e-05, "loss": 0.1828, "step": 555800 }, { "epoch": 3.12, "learning_rate": 1.8762992068444102e-05, "loss": 0.1801, "step": 555900 }, { "epoch": 3.13, "learning_rate": 1.8757370838181645e-05, "loss": 0.1857, "step": 556000 }, { "epoch": 3.13, "learning_rate": 1.8751749607919188e-05, "loss": 0.1838, "step": 556100 }, { "epoch": 3.13, "learning_rate": 1.8746128377656734e-05, "loss": 0.1876, "step": 556200 }, { "epoch": 3.13, "learning_rate": 1.8740563359696905e-05, "loss": 0.1854, "step": 556300 }, { "epoch": 3.13, "learning_rate": 1.8734942129434448e-05, "loss": 0.189, "step": 556400 }, { "epoch": 3.13, "learning_rate": 1.872932089917199e-05, "loss": 0.1778, "step": 556500 }, { "epoch": 3.13, "learning_rate": 1.872369966890954e-05, "loss": 0.1873, "step": 556600 }, { "epoch": 3.13, "learning_rate": 1.8718078438647084e-05, "loss": 0.184, "step": 556700 }, { "epoch": 3.13, "learning_rate": 1.871245720838463e-05, "loss": 0.1862, "step": 556800 }, { "epoch": 3.13, "learning_rate": 1.8706835978122173e-05, "loss": 0.1823, "step": 556900 }, { "epoch": 3.13, "learning_rate": 1.870121474785972e-05, "loss": 0.1842, "step": 557000 }, { "epoch": 3.13, "learning_rate": 1.8695593517597262e-05, "loss": 0.1885, "step": 557100 }, { "epoch": 3.13, "learning_rate": 1.8689972287334805e-05, "loss": 0.1888, "step": 557200 }, { "epoch": 3.13, "learning_rate": 1.868435105707235e-05, "loss": 0.1857, "step": 557300 }, { "epoch": 3.13, "learning_rate": 1.867878603911252e-05, "loss": 0.1812, "step": 557400 }, { "epoch": 3.13, "learning_rate": 1.8673164808850065e-05, "loss": 0.1827, "step": 557500 }, { "epoch": 3.13, "learning_rate": 1.866754357858761e-05, "loss": 0.1841, "step": 557600 }, { "epoch": 3.13, "learning_rate": 1.8661922348325157e-05, "loss": 0.1842, "step": 557700 }, { "epoch": 3.14, "learning_rate": 1.86563011180627e-05, "loss": 0.1855, "step": 557800 }, { "epoch": 3.14, "learning_rate": 1.8650679887800247e-05, "loss": 0.1862, "step": 557900 }, { "epoch": 3.14, "learning_rate": 1.864505865753779e-05, "loss": 0.184, "step": 558000 }, { "epoch": 3.14, "learning_rate": 1.8639437427275332e-05, "loss": 0.1846, "step": 558100 }, { "epoch": 3.14, "learning_rate": 1.863381619701288e-05, "loss": 0.191, "step": 558200 }, { "epoch": 3.14, "learning_rate": 1.862819496675042e-05, "loss": 0.1857, "step": 558300 }, { "epoch": 3.14, "learning_rate": 1.8622573736487968e-05, "loss": 0.1888, "step": 558400 }, { "epoch": 3.14, "learning_rate": 1.8616952506225514e-05, "loss": 0.1888, "step": 558500 }, { "epoch": 3.14, "learning_rate": 1.861133127596306e-05, "loss": 0.1851, "step": 558600 }, { "epoch": 3.14, "learning_rate": 1.8605710045700603e-05, "loss": 0.1798, "step": 558700 }, { "epoch": 3.14, "learning_rate": 1.8600088815438146e-05, "loss": 0.1868, "step": 558800 }, { "epoch": 3.14, "learning_rate": 1.8594467585175692e-05, "loss": 0.1951, "step": 558900 }, { "epoch": 3.14, "learning_rate": 1.8588846354913235e-05, "loss": 0.1875, "step": 559000 }, { "epoch": 3.14, "learning_rate": 1.858322512465078e-05, "loss": 0.1876, "step": 559100 }, { "epoch": 3.14, "learning_rate": 1.8577603894388328e-05, "loss": 0.1887, "step": 559200 }, { "epoch": 3.14, "learning_rate": 1.8571982664125874e-05, "loss": 0.1892, "step": 559300 }, { "epoch": 3.14, "learning_rate": 1.8566361433863417e-05, "loss": 0.1844, "step": 559400 }, { "epoch": 3.15, "learning_rate": 1.856074020360096e-05, "loss": 0.1874, "step": 559500 }, { "epoch": 3.15, "learning_rate": 1.8555118973338506e-05, "loss": 0.1869, "step": 559600 }, { "epoch": 3.15, "learning_rate": 1.8549553955378677e-05, "loss": 0.1907, "step": 559700 }, { "epoch": 3.15, "learning_rate": 1.854393272511622e-05, "loss": 0.1884, "step": 559800 }, { "epoch": 3.15, "learning_rate": 1.8538311494853763e-05, "loss": 0.1882, "step": 559900 }, { "epoch": 3.15, "learning_rate": 1.853269026459131e-05, "loss": 0.1873, "step": 560000 }, { "epoch": 3.15, "learning_rate": 1.8527069034328852e-05, "loss": 0.1883, "step": 560100 }, { "epoch": 3.15, "learning_rate": 1.85214478040664e-05, "loss": 0.1866, "step": 560200 }, { "epoch": 3.15, "learning_rate": 1.8515826573803945e-05, "loss": 0.1812, "step": 560300 }, { "epoch": 3.15, "learning_rate": 1.851020534354149e-05, "loss": 0.1855, "step": 560400 }, { "epoch": 3.15, "learning_rate": 1.8504584113279034e-05, "loss": 0.186, "step": 560500 }, { "epoch": 3.15, "learning_rate": 1.8498962883016577e-05, "loss": 0.1951, "step": 560600 }, { "epoch": 3.15, "learning_rate": 1.8493341652754123e-05, "loss": 0.1901, "step": 560700 }, { "epoch": 3.15, "learning_rate": 1.8487720422491666e-05, "loss": 0.1827, "step": 560800 }, { "epoch": 3.15, "learning_rate": 1.8482099192229212e-05, "loss": 0.1828, "step": 560900 }, { "epoch": 3.15, "learning_rate": 1.8476477961966755e-05, "loss": 0.191, "step": 561000 }, { "epoch": 3.15, "learning_rate": 1.84708567317043e-05, "loss": 0.1869, "step": 561100 }, { "epoch": 3.15, "learning_rate": 1.8465235501441848e-05, "loss": 0.1936, "step": 561200 }, { "epoch": 3.16, "learning_rate": 1.845961427117939e-05, "loss": 0.1874, "step": 561300 }, { "epoch": 3.16, "learning_rate": 1.8453993040916937e-05, "loss": 0.1871, "step": 561400 }, { "epoch": 3.16, "learning_rate": 1.844837181065448e-05, "loss": 0.189, "step": 561500 }, { "epoch": 3.16, "learning_rate": 1.8442750580392026e-05, "loss": 0.1855, "step": 561600 }, { "epoch": 3.16, "learning_rate": 1.843712935012957e-05, "loss": 0.1874, "step": 561700 }, { "epoch": 3.16, "learning_rate": 1.8431508119867115e-05, "loss": 0.1856, "step": 561800 }, { "epoch": 3.16, "learning_rate": 1.842588688960466e-05, "loss": 0.1818, "step": 561900 }, { "epoch": 3.16, "learning_rate": 1.8420265659342204e-05, "loss": 0.1898, "step": 562000 }, { "epoch": 3.16, "learning_rate": 1.841464442907975e-05, "loss": 0.1852, "step": 562100 }, { "epoch": 3.16, "learning_rate": 1.8409023198817294e-05, "loss": 0.1925, "step": 562200 }, { "epoch": 3.16, "learning_rate": 1.840340196855484e-05, "loss": 0.1857, "step": 562300 }, { "epoch": 3.16, "learning_rate": 1.8397780738292383e-05, "loss": 0.1808, "step": 562400 }, { "epoch": 3.16, "learning_rate": 1.8392159508029926e-05, "loss": 0.1898, "step": 562500 }, { "epoch": 3.16, "learning_rate": 1.8386538277767472e-05, "loss": 0.1803, "step": 562600 }, { "epoch": 3.16, "learning_rate": 1.8380917047505018e-05, "loss": 0.1897, "step": 562700 }, { "epoch": 3.16, "learning_rate": 1.8375295817242564e-05, "loss": 0.189, "step": 562800 }, { "epoch": 3.16, "learning_rate": 1.8369674586980107e-05, "loss": 0.1895, "step": 562900 }, { "epoch": 3.16, "learning_rate": 1.8364053356717654e-05, "loss": 0.1861, "step": 563000 }, { "epoch": 3.17, "learning_rate": 1.8358432126455196e-05, "loss": 0.1863, "step": 563100 }, { "epoch": 3.17, "learning_rate": 1.835281089619274e-05, "loss": 0.1845, "step": 563200 }, { "epoch": 3.17, "learning_rate": 1.8347189665930286e-05, "loss": 0.1836, "step": 563300 }, { "epoch": 3.17, "learning_rate": 1.834156843566783e-05, "loss": 0.1845, "step": 563400 }, { "epoch": 3.17, "learning_rate": 1.8335947205405378e-05, "loss": 0.1849, "step": 563500 }, { "epoch": 3.17, "learning_rate": 1.833032597514292e-05, "loss": 0.1854, "step": 563600 }, { "epoch": 3.17, "learning_rate": 1.8324704744880467e-05, "loss": 0.1935, "step": 563700 }, { "epoch": 3.17, "learning_rate": 1.831908351461801e-05, "loss": 0.1837, "step": 563800 }, { "epoch": 3.17, "learning_rate": 1.8313462284355553e-05, "loss": 0.1811, "step": 563900 }, { "epoch": 3.17, "learning_rate": 1.8307897266395724e-05, "loss": 0.1835, "step": 564000 }, { "epoch": 3.17, "learning_rate": 1.830227603613327e-05, "loss": 0.1902, "step": 564100 }, { "epoch": 3.17, "learning_rate": 1.8296654805870813e-05, "loss": 0.1832, "step": 564200 }, { "epoch": 3.17, "learning_rate": 1.8291033575608356e-05, "loss": 0.1872, "step": 564300 }, { "epoch": 3.17, "learning_rate": 1.8285412345345903e-05, "loss": 0.185, "step": 564400 }, { "epoch": 3.17, "learning_rate": 1.827979111508345e-05, "loss": 0.1895, "step": 564500 }, { "epoch": 3.17, "learning_rate": 1.8274169884820995e-05, "loss": 0.185, "step": 564600 }, { "epoch": 3.17, "learning_rate": 1.8268548654558538e-05, "loss": 0.1858, "step": 564700 }, { "epoch": 3.17, "learning_rate": 1.8262927424296084e-05, "loss": 0.1845, "step": 564800 }, { "epoch": 3.18, "learning_rate": 1.8257306194033627e-05, "loss": 0.182, "step": 564900 }, { "epoch": 3.18, "learning_rate": 1.825168496377117e-05, "loss": 0.1785, "step": 565000 }, { "epoch": 3.18, "learning_rate": 1.824611994581134e-05, "loss": 0.1846, "step": 565100 }, { "epoch": 3.18, "learning_rate": 1.8240498715548887e-05, "loss": 0.1866, "step": 565200 }, { "epoch": 3.18, "learning_rate": 1.823487748528643e-05, "loss": 0.1822, "step": 565300 }, { "epoch": 3.18, "learning_rate": 1.8229256255023973e-05, "loss": 0.1819, "step": 565400 }, { "epoch": 3.18, "learning_rate": 1.822363502476152e-05, "loss": 0.1863, "step": 565500 }, { "epoch": 3.18, "learning_rate": 1.8218013794499066e-05, "loss": 0.1721, "step": 565600 }, { "epoch": 3.18, "learning_rate": 1.8212392564236612e-05, "loss": 0.1864, "step": 565700 }, { "epoch": 3.18, "learning_rate": 1.8206771333974155e-05, "loss": 0.1862, "step": 565800 }, { "epoch": 3.18, "learning_rate": 1.82011501037117e-05, "loss": 0.1813, "step": 565900 }, { "epoch": 3.18, "learning_rate": 1.8195528873449244e-05, "loss": 0.1883, "step": 566000 }, { "epoch": 3.18, "learning_rate": 1.8189907643186787e-05, "loss": 0.1889, "step": 566100 }, { "epoch": 3.18, "learning_rate": 1.8184286412924333e-05, "loss": 0.1842, "step": 566200 }, { "epoch": 3.18, "learning_rate": 1.8178665182661876e-05, "loss": 0.1923, "step": 566300 }, { "epoch": 3.18, "learning_rate": 1.8173043952399422e-05, "loss": 0.1823, "step": 566400 }, { "epoch": 3.18, "learning_rate": 1.816742272213697e-05, "loss": 0.179, "step": 566500 }, { "epoch": 3.18, "learning_rate": 1.8161801491874515e-05, "loss": 0.191, "step": 566600 }, { "epoch": 3.19, "learning_rate": 1.8156180261612058e-05, "loss": 0.1819, "step": 566700 }, { "epoch": 3.19, "learning_rate": 1.81505590313496e-05, "loss": 0.185, "step": 566800 }, { "epoch": 3.19, "learning_rate": 1.8144937801087147e-05, "loss": 0.1892, "step": 566900 }, { "epoch": 3.19, "learning_rate": 1.813931657082469e-05, "loss": 0.1822, "step": 567000 }, { "epoch": 3.19, "learning_rate": 1.8133695340562236e-05, "loss": 0.1801, "step": 567100 }, { "epoch": 3.19, "learning_rate": 1.8128074110299782e-05, "loss": 0.1833, "step": 567200 }, { "epoch": 3.19, "learning_rate": 1.812245288003733e-05, "loss": 0.186, "step": 567300 }, { "epoch": 3.19, "learning_rate": 1.811683164977487e-05, "loss": 0.189, "step": 567400 }, { "epoch": 3.19, "learning_rate": 1.8111210419512414e-05, "loss": 0.189, "step": 567500 }, { "epoch": 3.19, "learning_rate": 1.810558918924996e-05, "loss": 0.1831, "step": 567600 }, { "epoch": 3.19, "learning_rate": 1.8099967958987504e-05, "loss": 0.1823, "step": 567700 }, { "epoch": 3.19, "learning_rate": 1.809434672872505e-05, "loss": 0.1868, "step": 567800 }, { "epoch": 3.19, "learning_rate": 1.8088725498462593e-05, "loss": 0.1845, "step": 567900 }, { "epoch": 3.19, "learning_rate": 1.808310426820014e-05, "loss": 0.1885, "step": 568000 }, { "epoch": 3.19, "learning_rate": 1.8077483037937685e-05, "loss": 0.1838, "step": 568100 }, { "epoch": 3.19, "learning_rate": 1.8071861807675228e-05, "loss": 0.1885, "step": 568200 }, { "epoch": 3.19, "learning_rate": 1.8066240577412774e-05, "loss": 0.1846, "step": 568300 }, { "epoch": 3.2, "learning_rate": 1.8060619347150317e-05, "loss": 0.1869, "step": 568400 }, { "epoch": 3.2, "learning_rate": 1.8054998116887864e-05, "loss": 0.183, "step": 568500 }, { "epoch": 3.2, "learning_rate": 1.8049376886625407e-05, "loss": 0.1908, "step": 568600 }, { "epoch": 3.2, "learning_rate": 1.8043755656362953e-05, "loss": 0.1882, "step": 568700 }, { "epoch": 3.2, "learning_rate": 1.80381344261005e-05, "loss": 0.1798, "step": 568800 }, { "epoch": 3.2, "learning_rate": 1.8032513195838042e-05, "loss": 0.1863, "step": 568900 }, { "epoch": 3.2, "learning_rate": 1.8026891965575588e-05, "loss": 0.1928, "step": 569000 }, { "epoch": 3.2, "learning_rate": 1.802127073531313e-05, "loss": 0.1854, "step": 569100 }, { "epoch": 3.2, "learning_rate": 1.8015649505050677e-05, "loss": 0.1841, "step": 569200 }, { "epoch": 3.2, "learning_rate": 1.801002827478822e-05, "loss": 0.1837, "step": 569300 }, { "epoch": 3.2, "learning_rate": 1.8004407044525763e-05, "loss": 0.1879, "step": 569400 }, { "epoch": 3.2, "learning_rate": 1.799878581426331e-05, "loss": 0.1841, "step": 569500 }, { "epoch": 3.2, "learning_rate": 1.7993164584000856e-05, "loss": 0.1849, "step": 569600 }, { "epoch": 3.2, "learning_rate": 1.7987599566041023e-05, "loss": 0.1864, "step": 569700 }, { "epoch": 3.2, "learning_rate": 1.798197833577857e-05, "loss": 0.1851, "step": 569800 }, { "epoch": 3.2, "learning_rate": 1.7976357105516116e-05, "loss": 0.1899, "step": 569900 }, { "epoch": 3.2, "learning_rate": 1.797073587525366e-05, "loss": 0.1902, "step": 570000 }, { "epoch": 3.2, "learning_rate": 1.7965114644991205e-05, "loss": 0.1866, "step": 570100 }, { "epoch": 3.21, "learning_rate": 1.7959493414728748e-05, "loss": 0.1837, "step": 570200 }, { "epoch": 3.21, "learning_rate": 1.7953872184466294e-05, "loss": 0.1828, "step": 570300 }, { "epoch": 3.21, "learning_rate": 1.7948250954203837e-05, "loss": 0.1865, "step": 570400 }, { "epoch": 3.21, "learning_rate": 1.794262972394138e-05, "loss": 0.1832, "step": 570500 }, { "epoch": 3.21, "learning_rate": 1.7937008493678926e-05, "loss": 0.1815, "step": 570600 }, { "epoch": 3.21, "learning_rate": 1.7931387263416473e-05, "loss": 0.188, "step": 570700 }, { "epoch": 3.21, "learning_rate": 1.792576603315402e-05, "loss": 0.1896, "step": 570800 }, { "epoch": 3.21, "learning_rate": 1.7920144802891562e-05, "loss": 0.19, "step": 570900 }, { "epoch": 3.21, "learning_rate": 1.7914523572629108e-05, "loss": 0.1875, "step": 571000 }, { "epoch": 3.21, "learning_rate": 1.790890234236665e-05, "loss": 0.1815, "step": 571100 }, { "epoch": 3.21, "learning_rate": 1.7903281112104194e-05, "loss": 0.1842, "step": 571200 }, { "epoch": 3.21, "learning_rate": 1.789765988184174e-05, "loss": 0.1813, "step": 571300 }, { "epoch": 3.21, "learning_rate": 1.7892038651579286e-05, "loss": 0.1832, "step": 571400 }, { "epoch": 3.21, "learning_rate": 1.7886417421316833e-05, "loss": 0.1816, "step": 571500 }, { "epoch": 3.21, "learning_rate": 1.7880796191054376e-05, "loss": 0.1824, "step": 571600 }, { "epoch": 3.21, "learning_rate": 1.7875174960791922e-05, "loss": 0.1873, "step": 571700 }, { "epoch": 3.21, "learning_rate": 1.7869553730529465e-05, "loss": 0.1827, "step": 571800 }, { "epoch": 3.21, "learning_rate": 1.7863932500267008e-05, "loss": 0.191, "step": 571900 }, { "epoch": 3.22, "learning_rate": 1.785836748230718e-05, "loss": 0.1809, "step": 572000 }, { "epoch": 3.22, "learning_rate": 1.7852746252044725e-05, "loss": 0.1869, "step": 572100 }, { "epoch": 3.22, "learning_rate": 1.7847125021782268e-05, "loss": 0.1843, "step": 572200 }, { "epoch": 3.22, "learning_rate": 1.784150379151981e-05, "loss": 0.1854, "step": 572300 }, { "epoch": 3.22, "learning_rate": 1.7835882561257357e-05, "loss": 0.1869, "step": 572400 }, { "epoch": 3.22, "learning_rate": 1.7830317543297525e-05, "loss": 0.1915, "step": 572500 }, { "epoch": 3.22, "learning_rate": 1.782469631303507e-05, "loss": 0.1819, "step": 572600 }, { "epoch": 3.22, "learning_rate": 1.7819075082772617e-05, "loss": 0.183, "step": 572700 }, { "epoch": 3.22, "learning_rate": 1.7813453852510163e-05, "loss": 0.1824, "step": 572800 }, { "epoch": 3.22, "learning_rate": 1.7807832622247706e-05, "loss": 0.1851, "step": 572900 }, { "epoch": 3.22, "learning_rate": 1.7802211391985253e-05, "loss": 0.191, "step": 573000 }, { "epoch": 3.22, "learning_rate": 1.7796590161722795e-05, "loss": 0.1837, "step": 573100 }, { "epoch": 3.22, "learning_rate": 1.779096893146034e-05, "loss": 0.1856, "step": 573200 }, { "epoch": 3.22, "learning_rate": 1.778540391350051e-05, "loss": 0.1952, "step": 573300 }, { "epoch": 3.22, "learning_rate": 1.7779782683238056e-05, "loss": 0.1872, "step": 573400 }, { "epoch": 3.22, "learning_rate": 1.77741614529756e-05, "loss": 0.186, "step": 573500 }, { "epoch": 3.22, "learning_rate": 1.776854022271314e-05, "loss": 0.1851, "step": 573600 }, { "epoch": 3.22, "learning_rate": 1.7762918992450688e-05, "loss": 0.1858, "step": 573700 }, { "epoch": 3.23, "learning_rate": 1.7757297762188234e-05, "loss": 0.1913, "step": 573800 }, { "epoch": 3.23, "learning_rate": 1.775167653192578e-05, "loss": 0.1833, "step": 573900 }, { "epoch": 3.23, "learning_rate": 1.7746055301663323e-05, "loss": 0.1829, "step": 574000 }, { "epoch": 3.23, "learning_rate": 1.774043407140087e-05, "loss": 0.1861, "step": 574100 }, { "epoch": 3.23, "learning_rate": 1.7734812841138412e-05, "loss": 0.1853, "step": 574200 }, { "epoch": 3.23, "learning_rate": 1.7729191610875955e-05, "loss": 0.1894, "step": 574300 }, { "epoch": 3.23, "learning_rate": 1.77235703806135e-05, "loss": 0.1856, "step": 574400 }, { "epoch": 3.23, "learning_rate": 1.7717949150351044e-05, "loss": 0.1874, "step": 574500 }, { "epoch": 3.23, "learning_rate": 1.771232792008859e-05, "loss": 0.1856, "step": 574600 }, { "epoch": 3.23, "learning_rate": 1.7706706689826137e-05, "loss": 0.1799, "step": 574700 }, { "epoch": 3.23, "learning_rate": 1.7701085459563683e-05, "loss": 0.1799, "step": 574800 }, { "epoch": 3.23, "learning_rate": 1.7695464229301226e-05, "loss": 0.1826, "step": 574900 }, { "epoch": 3.23, "learning_rate": 1.768984299903877e-05, "loss": 0.1855, "step": 575000 }, { "epoch": 3.23, "learning_rate": 1.7684221768776315e-05, "loss": 0.1794, "step": 575100 }, { "epoch": 3.23, "learning_rate": 1.7678600538513858e-05, "loss": 0.1865, "step": 575200 }, { "epoch": 3.23, "learning_rate": 1.7672979308251404e-05, "loss": 0.1821, "step": 575300 }, { "epoch": 3.23, "learning_rate": 1.766735807798895e-05, "loss": 0.1884, "step": 575400 }, { "epoch": 3.24, "learning_rate": 1.7661736847726497e-05, "loss": 0.1849, "step": 575500 }, { "epoch": 3.24, "learning_rate": 1.765611561746404e-05, "loss": 0.1823, "step": 575600 }, { "epoch": 3.24, "learning_rate": 1.7650494387201583e-05, "loss": 0.1807, "step": 575700 }, { "epoch": 3.24, "learning_rate": 1.764487315693913e-05, "loss": 0.1809, "step": 575800 }, { "epoch": 3.24, "learning_rate": 1.7639251926676672e-05, "loss": 0.1919, "step": 575900 }, { "epoch": 3.24, "learning_rate": 1.7633630696414218e-05, "loss": 0.1846, "step": 576000 }, { "epoch": 3.24, "learning_rate": 1.762800946615176e-05, "loss": 0.1862, "step": 576100 }, { "epoch": 3.24, "learning_rate": 1.7622388235889307e-05, "loss": 0.1813, "step": 576200 }, { "epoch": 3.24, "learning_rate": 1.7616767005626854e-05, "loss": 0.1843, "step": 576300 }, { "epoch": 3.24, "learning_rate": 1.7611145775364397e-05, "loss": 0.1896, "step": 576400 }, { "epoch": 3.24, "learning_rate": 1.7605524545101943e-05, "loss": 0.1864, "step": 576500 }, { "epoch": 3.24, "learning_rate": 1.7599903314839486e-05, "loss": 0.1829, "step": 576600 }, { "epoch": 3.24, "learning_rate": 1.7594282084577032e-05, "loss": 0.1814, "step": 576700 }, { "epoch": 3.24, "learning_rate": 1.7588660854314575e-05, "loss": 0.191, "step": 576800 }, { "epoch": 3.24, "learning_rate": 1.758303962405212e-05, "loss": 0.1818, "step": 576900 }, { "epoch": 3.24, "learning_rate": 1.7577418393789667e-05, "loss": 0.1863, "step": 577000 }, { "epoch": 3.24, "learning_rate": 1.757179716352721e-05, "loss": 0.1922, "step": 577100 }, { "epoch": 3.24, "learning_rate": 1.7566175933264757e-05, "loss": 0.1923, "step": 577200 }, { "epoch": 3.25, "learning_rate": 1.75605547030023e-05, "loss": 0.1896, "step": 577300 }, { "epoch": 3.25, "learning_rate": 1.7554933472739846e-05, "loss": 0.1914, "step": 577400 }, { "epoch": 3.25, "learning_rate": 1.754931224247739e-05, "loss": 0.177, "step": 577500 }, { "epoch": 3.25, "learning_rate": 1.754369101221493e-05, "loss": 0.1916, "step": 577600 }, { "epoch": 3.25, "learning_rate": 1.7538069781952478e-05, "loss": 0.1827, "step": 577700 }, { "epoch": 3.25, "learning_rate": 1.7532448551690024e-05, "loss": 0.1908, "step": 577800 }, { "epoch": 3.25, "learning_rate": 1.752682732142757e-05, "loss": 0.1828, "step": 577900 }, { "epoch": 3.25, "learning_rate": 1.7521262303467738e-05, "loss": 0.1879, "step": 578000 }, { "epoch": 3.25, "learning_rate": 1.7515641073205284e-05, "loss": 0.1843, "step": 578100 }, { "epoch": 3.25, "learning_rate": 1.7510019842942827e-05, "loss": 0.1879, "step": 578200 }, { "epoch": 3.25, "learning_rate": 1.7504398612680373e-05, "loss": 0.1855, "step": 578300 }, { "epoch": 3.25, "learning_rate": 1.7498777382417916e-05, "loss": 0.1872, "step": 578400 }, { "epoch": 3.25, "learning_rate": 1.7493156152155463e-05, "loss": 0.188, "step": 578500 }, { "epoch": 3.25, "learning_rate": 1.7487534921893005e-05, "loss": 0.1872, "step": 578600 }, { "epoch": 3.25, "learning_rate": 1.748191369163055e-05, "loss": 0.1877, "step": 578700 }, { "epoch": 3.25, "learning_rate": 1.7476292461368095e-05, "loss": 0.1845, "step": 578800 }, { "epoch": 3.25, "learning_rate": 1.747067123110564e-05, "loss": 0.1863, "step": 578900 }, { "epoch": 3.25, "learning_rate": 1.7465050000843187e-05, "loss": 0.1862, "step": 579000 }, { "epoch": 3.26, "learning_rate": 1.745942877058073e-05, "loss": 0.1884, "step": 579100 }, { "epoch": 3.26, "learning_rate": 1.7453807540318276e-05, "loss": 0.1812, "step": 579200 }, { "epoch": 3.26, "learning_rate": 1.744818631005582e-05, "loss": 0.1835, "step": 579300 }, { "epoch": 3.26, "learning_rate": 1.744262129209599e-05, "loss": 0.1844, "step": 579400 }, { "epoch": 3.26, "learning_rate": 1.7437000061833533e-05, "loss": 0.1845, "step": 579500 }, { "epoch": 3.26, "learning_rate": 1.743137883157108e-05, "loss": 0.186, "step": 579600 }, { "epoch": 3.26, "learning_rate": 1.7425757601308622e-05, "loss": 0.1836, "step": 579700 }, { "epoch": 3.26, "learning_rate": 1.7420136371046165e-05, "loss": 0.1851, "step": 579800 }, { "epoch": 3.26, "learning_rate": 1.741451514078371e-05, "loss": 0.1803, "step": 579900 }, { "epoch": 3.26, "learning_rate": 1.7408893910521258e-05, "loss": 0.185, "step": 580000 }, { "epoch": 3.26, "learning_rate": 1.7403272680258804e-05, "loss": 0.1868, "step": 580100 }, { "epoch": 3.26, "learning_rate": 1.7397651449996347e-05, "loss": 0.1847, "step": 580200 }, { "epoch": 3.26, "learning_rate": 1.7392030219733893e-05, "loss": 0.1805, "step": 580300 }, { "epoch": 3.26, "learning_rate": 1.7386408989471436e-05, "loss": 0.19, "step": 580400 }, { "epoch": 3.26, "learning_rate": 1.738078775920898e-05, "loss": 0.1899, "step": 580500 }, { "epoch": 3.26, "learning_rate": 1.7375166528946525e-05, "loss": 0.1821, "step": 580600 }, { "epoch": 3.26, "learning_rate": 1.736954529868407e-05, "loss": 0.1875, "step": 580700 }, { "epoch": 3.26, "learning_rate": 1.7363924068421618e-05, "loss": 0.1865, "step": 580800 }, { "epoch": 3.27, "learning_rate": 1.735830283815916e-05, "loss": 0.1815, "step": 580900 }, { "epoch": 3.27, "learning_rate": 1.7352681607896707e-05, "loss": 0.181, "step": 581000 }, { "epoch": 3.27, "learning_rate": 1.734706037763425e-05, "loss": 0.1841, "step": 581100 }, { "epoch": 3.27, "learning_rate": 1.7341439147371793e-05, "loss": 0.1798, "step": 581200 }, { "epoch": 3.27, "learning_rate": 1.733581791710934e-05, "loss": 0.1852, "step": 581300 }, { "epoch": 3.27, "learning_rate": 1.7330196686846882e-05, "loss": 0.1893, "step": 581400 }, { "epoch": 3.27, "learning_rate": 1.7324575456584428e-05, "loss": 0.1857, "step": 581500 }, { "epoch": 3.27, "learning_rate": 1.7318954226321974e-05, "loss": 0.1883, "step": 581600 }, { "epoch": 3.27, "learning_rate": 1.731333299605952e-05, "loss": 0.1822, "step": 581700 }, { "epoch": 3.27, "learning_rate": 1.7307711765797064e-05, "loss": 0.1827, "step": 581800 }, { "epoch": 3.27, "learning_rate": 1.7302090535534607e-05, "loss": 0.1852, "step": 581900 }, { "epoch": 3.27, "learning_rate": 1.7296469305272153e-05, "loss": 0.1873, "step": 582000 }, { "epoch": 3.27, "learning_rate": 1.7290848075009696e-05, "loss": 0.1845, "step": 582100 }, { "epoch": 3.27, "learning_rate": 1.7285226844747242e-05, "loss": 0.1859, "step": 582200 }, { "epoch": 3.27, "learning_rate": 1.7279605614484788e-05, "loss": 0.1839, "step": 582300 }, { "epoch": 3.27, "learning_rate": 1.727398438422233e-05, "loss": 0.1829, "step": 582400 }, { "epoch": 3.27, "learning_rate": 1.7268363153959877e-05, "loss": 0.1903, "step": 582500 }, { "epoch": 3.27, "learning_rate": 1.726274192369742e-05, "loss": 0.1823, "step": 582600 }, { "epoch": 3.28, "learning_rate": 1.7257120693434967e-05, "loss": 0.184, "step": 582700 }, { "epoch": 3.28, "learning_rate": 1.725149946317251e-05, "loss": 0.1911, "step": 582800 }, { "epoch": 3.28, "learning_rate": 1.7245878232910056e-05, "loss": 0.1929, "step": 582900 }, { "epoch": 3.28, "learning_rate": 1.72402570026476e-05, "loss": 0.1885, "step": 583000 }, { "epoch": 3.28, "learning_rate": 1.7234635772385145e-05, "loss": 0.1846, "step": 583100 }, { "epoch": 3.28, "learning_rate": 1.722901454212269e-05, "loss": 0.1843, "step": 583200 }, { "epoch": 3.28, "learning_rate": 1.7223393311860234e-05, "loss": 0.1834, "step": 583300 }, { "epoch": 3.28, "learning_rate": 1.721777208159778e-05, "loss": 0.1878, "step": 583400 }, { "epoch": 3.28, "learning_rate": 1.7212150851335323e-05, "loss": 0.1871, "step": 583500 }, { "epoch": 3.28, "learning_rate": 1.7206585833375494e-05, "loss": 0.1866, "step": 583600 }, { "epoch": 3.28, "learning_rate": 1.7200964603113037e-05, "loss": 0.1853, "step": 583700 }, { "epoch": 3.28, "learning_rate": 1.7195343372850583e-05, "loss": 0.183, "step": 583800 }, { "epoch": 3.28, "learning_rate": 1.7189722142588126e-05, "loss": 0.1888, "step": 583900 }, { "epoch": 3.28, "learning_rate": 1.7184100912325673e-05, "loss": 0.1841, "step": 584000 }, { "epoch": 3.28, "learning_rate": 1.7178479682063215e-05, "loss": 0.1847, "step": 584100 }, { "epoch": 3.28, "learning_rate": 1.7172858451800762e-05, "loss": 0.1895, "step": 584200 }, { "epoch": 3.28, "learning_rate": 1.7167237221538308e-05, "loss": 0.1859, "step": 584300 }, { "epoch": 3.29, "learning_rate": 1.716161599127585e-05, "loss": 0.1919, "step": 584400 }, { "epoch": 3.29, "learning_rate": 1.7155994761013397e-05, "loss": 0.1835, "step": 584500 }, { "epoch": 3.29, "learning_rate": 1.715037353075094e-05, "loss": 0.1876, "step": 584600 }, { "epoch": 3.29, "learning_rate": 1.7144752300488486e-05, "loss": 0.1864, "step": 584700 }, { "epoch": 3.29, "learning_rate": 1.713913107022603e-05, "loss": 0.1891, "step": 584800 }, { "epoch": 3.29, "learning_rate": 1.7133509839963576e-05, "loss": 0.1881, "step": 584900 }, { "epoch": 3.29, "learning_rate": 1.7127888609701122e-05, "loss": 0.185, "step": 585000 }, { "epoch": 3.29, "learning_rate": 1.7122267379438665e-05, "loss": 0.1838, "step": 585100 }, { "epoch": 3.29, "learning_rate": 1.711664614917621e-05, "loss": 0.1867, "step": 585200 }, { "epoch": 3.29, "learning_rate": 1.7111024918913754e-05, "loss": 0.1872, "step": 585300 }, { "epoch": 3.29, "learning_rate": 1.71054036886513e-05, "loss": 0.1836, "step": 585400 }, { "epoch": 3.29, "learning_rate": 1.7099782458388843e-05, "loss": 0.1829, "step": 585500 }, { "epoch": 3.29, "learning_rate": 1.7094161228126386e-05, "loss": 0.1804, "step": 585600 }, { "epoch": 3.29, "learning_rate": 1.7088539997863932e-05, "loss": 0.1825, "step": 585700 }, { "epoch": 3.29, "learning_rate": 1.708291876760148e-05, "loss": 0.1847, "step": 585800 }, { "epoch": 3.29, "learning_rate": 1.7077297537339025e-05, "loss": 0.1907, "step": 585900 }, { "epoch": 3.29, "learning_rate": 1.7071676307076568e-05, "loss": 0.1872, "step": 586000 }, { "epoch": 3.29, "learning_rate": 1.7066055076814114e-05, "loss": 0.1875, "step": 586100 }, { "epoch": 3.3, "learning_rate": 1.7060433846551657e-05, "loss": 0.1915, "step": 586200 }, { "epoch": 3.3, "learning_rate": 1.70548126162892e-05, "loss": 0.1869, "step": 586300 }, { "epoch": 3.3, "learning_rate": 1.7049191386026746e-05, "loss": 0.1829, "step": 586400 }, { "epoch": 3.3, "learning_rate": 1.7043570155764292e-05, "loss": 0.1855, "step": 586500 }, { "epoch": 3.3, "learning_rate": 1.703794892550184e-05, "loss": 0.1848, "step": 586600 }, { "epoch": 3.3, "learning_rate": 1.703232769523938e-05, "loss": 0.1835, "step": 586700 }, { "epoch": 3.3, "learning_rate": 1.7026706464976928e-05, "loss": 0.1873, "step": 586800 }, { "epoch": 3.3, "learning_rate": 1.702108523471447e-05, "loss": 0.1832, "step": 586900 }, { "epoch": 3.3, "learning_rate": 1.7015464004452013e-05, "loss": 0.1856, "step": 587000 }, { "epoch": 3.3, "learning_rate": 1.700984277418956e-05, "loss": 0.1832, "step": 587100 }, { "epoch": 3.3, "learning_rate": 1.7004221543927103e-05, "loss": 0.1876, "step": 587200 }, { "epoch": 3.3, "learning_rate": 1.699860031366465e-05, "loss": 0.1867, "step": 587300 }, { "epoch": 3.3, "learning_rate": 1.6992979083402195e-05, "loss": 0.1779, "step": 587400 }, { "epoch": 3.3, "learning_rate": 1.6987357853139738e-05, "loss": 0.1825, "step": 587500 }, { "epoch": 3.3, "learning_rate": 1.6981736622877284e-05, "loss": 0.1883, "step": 587600 }, { "epoch": 3.3, "learning_rate": 1.6976115392614827e-05, "loss": 0.1859, "step": 587700 }, { "epoch": 3.3, "learning_rate": 1.6970494162352374e-05, "loss": 0.1812, "step": 587800 }, { "epoch": 3.3, "learning_rate": 1.6964872932089916e-05, "loss": 0.1864, "step": 587900 }, { "epoch": 3.31, "learning_rate": 1.6959307914130087e-05, "loss": 0.1868, "step": 588000 }, { "epoch": 3.31, "learning_rate": 1.695368668386763e-05, "loss": 0.1848, "step": 588100 }, { "epoch": 3.31, "learning_rate": 1.6948065453605177e-05, "loss": 0.1886, "step": 588200 }, { "epoch": 3.31, "learning_rate": 1.694244422334272e-05, "loss": 0.1884, "step": 588300 }, { "epoch": 3.31, "learning_rate": 1.6936822993080266e-05, "loss": 0.1845, "step": 588400 }, { "epoch": 3.31, "learning_rate": 1.6931201762817812e-05, "loss": 0.1859, "step": 588500 }, { "epoch": 3.31, "learning_rate": 1.6925580532555355e-05, "loss": 0.1851, "step": 588600 }, { "epoch": 3.31, "learning_rate": 1.69199593022929e-05, "loss": 0.1885, "step": 588700 }, { "epoch": 3.31, "learning_rate": 1.6914338072030444e-05, "loss": 0.1812, "step": 588800 }, { "epoch": 3.31, "learning_rate": 1.690871684176799e-05, "loss": 0.1853, "step": 588900 }, { "epoch": 3.31, "learning_rate": 1.6903095611505533e-05, "loss": 0.1865, "step": 589000 }, { "epoch": 3.31, "learning_rate": 1.689747438124308e-05, "loss": 0.1833, "step": 589100 }, { "epoch": 3.31, "learning_rate": 1.6891853150980626e-05, "loss": 0.1897, "step": 589200 }, { "epoch": 3.31, "learning_rate": 1.688623192071817e-05, "loss": 0.1876, "step": 589300 }, { "epoch": 3.31, "learning_rate": 1.6880610690455715e-05, "loss": 0.1849, "step": 589400 }, { "epoch": 3.31, "learning_rate": 1.6874989460193258e-05, "loss": 0.1829, "step": 589500 }, { "epoch": 3.31, "learning_rate": 1.6869368229930804e-05, "loss": 0.183, "step": 589600 }, { "epoch": 3.31, "learning_rate": 1.6863746999668347e-05, "loss": 0.1865, "step": 589700 }, { "epoch": 3.32, "learning_rate": 1.6858125769405893e-05, "loss": 0.1877, "step": 589800 }, { "epoch": 3.32, "learning_rate": 1.6852504539143436e-05, "loss": 0.1864, "step": 589900 }, { "epoch": 3.32, "learning_rate": 1.6846883308880982e-05, "loss": 0.183, "step": 590000 }, { "epoch": 3.32, "learning_rate": 1.684126207861853e-05, "loss": 0.1854, "step": 590100 }, { "epoch": 3.32, "learning_rate": 1.6835697060658696e-05, "loss": 0.1849, "step": 590200 }, { "epoch": 3.32, "learning_rate": 1.6830075830396243e-05, "loss": 0.1882, "step": 590300 }, { "epoch": 3.32, "learning_rate": 1.6824454600133786e-05, "loss": 0.187, "step": 590400 }, { "epoch": 3.32, "learning_rate": 1.6818833369871332e-05, "loss": 0.1801, "step": 590500 }, { "epoch": 3.32, "learning_rate": 1.6813212139608875e-05, "loss": 0.1811, "step": 590600 }, { "epoch": 3.32, "learning_rate": 1.680759090934642e-05, "loss": 0.1883, "step": 590700 }, { "epoch": 3.32, "learning_rate": 1.6801969679083964e-05, "loss": 0.1789, "step": 590800 }, { "epoch": 3.32, "learning_rate": 1.679634844882151e-05, "loss": 0.183, "step": 590900 }, { "epoch": 3.32, "learning_rate": 1.6790727218559053e-05, "loss": 0.1799, "step": 591000 }, { "epoch": 3.32, "learning_rate": 1.67851059882966e-05, "loss": 0.1832, "step": 591100 }, { "epoch": 3.32, "learning_rate": 1.6779484758034146e-05, "loss": 0.1831, "step": 591200 }, { "epoch": 3.32, "learning_rate": 1.6773919740074313e-05, "loss": 0.1855, "step": 591300 }, { "epoch": 3.32, "learning_rate": 1.676829850981186e-05, "loss": 0.1829, "step": 591400 }, { "epoch": 3.32, "learning_rate": 1.6762677279549402e-05, "loss": 0.1886, "step": 591500 }, { "epoch": 3.33, "learning_rate": 1.675705604928695e-05, "loss": 0.1896, "step": 591600 }, { "epoch": 3.33, "learning_rate": 1.675143481902449e-05, "loss": 0.184, "step": 591700 }, { "epoch": 3.33, "learning_rate": 1.6745813588762038e-05, "loss": 0.1841, "step": 591800 }, { "epoch": 3.33, "learning_rate": 1.674019235849958e-05, "loss": 0.1892, "step": 591900 }, { "epoch": 3.33, "learning_rate": 1.6734571128237127e-05, "loss": 0.1876, "step": 592000 }, { "epoch": 3.33, "learning_rate": 1.6728949897974673e-05, "loss": 0.1835, "step": 592100 }, { "epoch": 3.33, "learning_rate": 1.6723328667712216e-05, "loss": 0.1827, "step": 592200 }, { "epoch": 3.33, "learning_rate": 1.6717707437449762e-05, "loss": 0.1805, "step": 592300 }, { "epoch": 3.33, "learning_rate": 1.6712086207187305e-05, "loss": 0.1848, "step": 592400 }, { "epoch": 3.33, "learning_rate": 1.670646497692485e-05, "loss": 0.1851, "step": 592500 }, { "epoch": 3.33, "learning_rate": 1.6700843746662395e-05, "loss": 0.177, "step": 592600 }, { "epoch": 3.33, "learning_rate": 1.6695222516399937e-05, "loss": 0.188, "step": 592700 }, { "epoch": 3.33, "learning_rate": 1.6689601286137484e-05, "loss": 0.179, "step": 592800 }, { "epoch": 3.33, "learning_rate": 1.668398005587503e-05, "loss": 0.1893, "step": 592900 }, { "epoch": 3.33, "learning_rate": 1.6678358825612576e-05, "loss": 0.1811, "step": 593000 }, { "epoch": 3.33, "learning_rate": 1.667273759535012e-05, "loss": 0.1849, "step": 593100 }, { "epoch": 3.33, "learning_rate": 1.6667116365087665e-05, "loss": 0.1881, "step": 593200 }, { "epoch": 3.34, "learning_rate": 1.6661495134825208e-05, "loss": 0.1856, "step": 593300 }, { "epoch": 3.34, "learning_rate": 1.665587390456275e-05, "loss": 0.1855, "step": 593400 }, { "epoch": 3.34, "learning_rate": 1.6650252674300297e-05, "loss": 0.1886, "step": 593500 }, { "epoch": 3.34, "learning_rate": 1.664463144403784e-05, "loss": 0.1918, "step": 593600 }, { "epoch": 3.34, "learning_rate": 1.663901021377539e-05, "loss": 0.1846, "step": 593700 }, { "epoch": 3.34, "learning_rate": 1.6633388983512933e-05, "loss": 0.1864, "step": 593800 }, { "epoch": 3.34, "learning_rate": 1.66278239655531e-05, "loss": 0.1898, "step": 593900 }, { "epoch": 3.34, "learning_rate": 1.6622202735290647e-05, "loss": 0.1875, "step": 594000 }, { "epoch": 3.34, "learning_rate": 1.6616581505028193e-05, "loss": 0.1857, "step": 594100 }, { "epoch": 3.34, "learning_rate": 1.6610960274765736e-05, "loss": 0.1879, "step": 594200 }, { "epoch": 3.34, "learning_rate": 1.6605339044503282e-05, "loss": 0.1829, "step": 594300 }, { "epoch": 3.34, "learning_rate": 1.6599717814240825e-05, "loss": 0.188, "step": 594400 }, { "epoch": 3.34, "learning_rate": 1.6594096583978368e-05, "loss": 0.1814, "step": 594500 }, { "epoch": 3.34, "learning_rate": 1.6588475353715914e-05, "loss": 0.1865, "step": 594600 }, { "epoch": 3.34, "learning_rate": 1.658285412345346e-05, "loss": 0.1843, "step": 594700 }, { "epoch": 3.34, "learning_rate": 1.6577232893191007e-05, "loss": 0.1851, "step": 594800 }, { "epoch": 3.34, "learning_rate": 1.657161166292855e-05, "loss": 0.1832, "step": 594900 }, { "epoch": 3.34, "learning_rate": 1.6565990432666096e-05, "loss": 0.1869, "step": 595000 }, { "epoch": 3.35, "learning_rate": 1.656036920240364e-05, "loss": 0.1823, "step": 595100 }, { "epoch": 3.35, "learning_rate": 1.6554747972141182e-05, "loss": 0.1866, "step": 595200 }, { "epoch": 3.35, "learning_rate": 1.6549126741878728e-05, "loss": 0.1834, "step": 595300 }, { "epoch": 3.35, "learning_rate": 1.654350551161627e-05, "loss": 0.1872, "step": 595400 }, { "epoch": 3.35, "learning_rate": 1.6537884281353817e-05, "loss": 0.184, "step": 595500 }, { "epoch": 3.35, "learning_rate": 1.6532263051091364e-05, "loss": 0.1874, "step": 595600 }, { "epoch": 3.35, "learning_rate": 1.652664182082891e-05, "loss": 0.1874, "step": 595700 }, { "epoch": 3.35, "learning_rate": 1.6521020590566453e-05, "loss": 0.1828, "step": 595800 }, { "epoch": 3.35, "learning_rate": 1.6515399360303996e-05, "loss": 0.1869, "step": 595900 }, { "epoch": 3.35, "learning_rate": 1.6509778130041542e-05, "loss": 0.1827, "step": 596000 }, { "epoch": 3.35, "learning_rate": 1.6504156899779085e-05, "loss": 0.1883, "step": 596100 }, { "epoch": 3.35, "learning_rate": 1.649853566951663e-05, "loss": 0.1838, "step": 596200 }, { "epoch": 3.35, "learning_rate": 1.6492914439254177e-05, "loss": 0.1819, "step": 596300 }, { "epoch": 3.35, "learning_rate": 1.6487293208991724e-05, "loss": 0.1865, "step": 596400 }, { "epoch": 3.35, "learning_rate": 1.6481671978729266e-05, "loss": 0.1845, "step": 596500 }, { "epoch": 3.35, "learning_rate": 1.647605074846681e-05, "loss": 0.1916, "step": 596600 }, { "epoch": 3.35, "learning_rate": 1.6470429518204356e-05, "loss": 0.1808, "step": 596700 }, { "epoch": 3.35, "learning_rate": 1.6464864500244527e-05, "loss": 0.182, "step": 596800 }, { "epoch": 3.36, "learning_rate": 1.645924326998207e-05, "loss": 0.1849, "step": 596900 }, { "epoch": 3.36, "learning_rate": 1.6453622039719612e-05, "loss": 0.1781, "step": 597000 }, { "epoch": 3.36, "learning_rate": 1.644800080945716e-05, "loss": 0.1818, "step": 597100 }, { "epoch": 3.36, "learning_rate": 1.64423795791947e-05, "loss": 0.1861, "step": 597200 }, { "epoch": 3.36, "learning_rate": 1.6436758348932248e-05, "loss": 0.1829, "step": 597300 }, { "epoch": 3.36, "learning_rate": 1.6431137118669794e-05, "loss": 0.1731, "step": 597400 }, { "epoch": 3.36, "learning_rate": 1.6425515888407337e-05, "loss": 0.1863, "step": 597500 }, { "epoch": 3.36, "learning_rate": 1.6419894658144883e-05, "loss": 0.1838, "step": 597600 }, { "epoch": 3.36, "learning_rate": 1.6414273427882426e-05, "loss": 0.1882, "step": 597700 }, { "epoch": 3.36, "learning_rate": 1.6408652197619972e-05, "loss": 0.1825, "step": 597800 }, { "epoch": 3.36, "learning_rate": 1.6403030967357515e-05, "loss": 0.1803, "step": 597900 }, { "epoch": 3.36, "learning_rate": 1.639740973709506e-05, "loss": 0.1821, "step": 598000 }, { "epoch": 3.36, "learning_rate": 1.6391788506832605e-05, "loss": 0.1853, "step": 598100 }, { "epoch": 3.36, "learning_rate": 1.638616727657015e-05, "loss": 0.1885, "step": 598200 }, { "epoch": 3.36, "learning_rate": 1.6380546046307697e-05, "loss": 0.1835, "step": 598300 }, { "epoch": 3.36, "learning_rate": 1.637492481604524e-05, "loss": 0.1892, "step": 598400 }, { "epoch": 3.36, "learning_rate": 1.6369303585782786e-05, "loss": 0.1787, "step": 598500 }, { "epoch": 3.36, "learning_rate": 1.636368235552033e-05, "loss": 0.1821, "step": 598600 }, { "epoch": 3.37, "learning_rate": 1.6358061125257875e-05, "loss": 0.1871, "step": 598700 }, { "epoch": 3.37, "learning_rate": 1.6352496107298043e-05, "loss": 0.1798, "step": 598800 }, { "epoch": 3.37, "learning_rate": 1.634687487703559e-05, "loss": 0.183, "step": 598900 }, { "epoch": 3.37, "learning_rate": 1.6341253646773132e-05, "loss": 0.1868, "step": 599000 }, { "epoch": 3.37, "learning_rate": 1.633563241651068e-05, "loss": 0.1857, "step": 599100 }, { "epoch": 3.37, "learning_rate": 1.633001118624822e-05, "loss": 0.1863, "step": 599200 }, { "epoch": 3.37, "learning_rate": 1.6324389955985768e-05, "loss": 0.1875, "step": 599300 }, { "epoch": 3.37, "learning_rate": 1.6318768725723314e-05, "loss": 0.1878, "step": 599400 }, { "epoch": 3.37, "learning_rate": 1.6313147495460857e-05, "loss": 0.1864, "step": 599500 }, { "epoch": 3.37, "learning_rate": 1.6307526265198403e-05, "loss": 0.1898, "step": 599600 }, { "epoch": 3.37, "learning_rate": 1.6301905034935946e-05, "loss": 0.1793, "step": 599700 }, { "epoch": 3.37, "learning_rate": 1.6296283804673492e-05, "loss": 0.1895, "step": 599800 }, { "epoch": 3.37, "learning_rate": 1.6290662574411035e-05, "loss": 0.19, "step": 599900 }, { "epoch": 3.37, "learning_rate": 1.628504134414858e-05, "loss": 0.1929, "step": 600000 }, { "epoch": 3.37, "learning_rate": 1.6279420113886128e-05, "loss": 0.1971, "step": 600100 }, { "epoch": 3.37, "learning_rate": 1.627379888362367e-05, "loss": 0.1807, "step": 600200 }, { "epoch": 3.37, "learning_rate": 1.6268177653361217e-05, "loss": 0.1766, "step": 600300 }, { "epoch": 3.37, "learning_rate": 1.626255642309876e-05, "loss": 0.1832, "step": 600400 }, { "epoch": 3.38, "learning_rate": 1.6256935192836306e-05, "loss": 0.1895, "step": 600500 }, { "epoch": 3.38, "learning_rate": 1.625131396257385e-05, "loss": 0.1827, "step": 600600 }, { "epoch": 3.38, "learning_rate": 1.6245692732311392e-05, "loss": 0.1838, "step": 600700 }, { "epoch": 3.38, "learning_rate": 1.6240071502048938e-05, "loss": 0.1862, "step": 600800 }, { "epoch": 3.38, "learning_rate": 1.6234450271786484e-05, "loss": 0.1805, "step": 600900 }, { "epoch": 3.38, "learning_rate": 1.622882904152403e-05, "loss": 0.1878, "step": 601000 }, { "epoch": 3.38, "learning_rate": 1.6223207811261574e-05, "loss": 0.1831, "step": 601100 }, { "epoch": 3.38, "learning_rate": 1.621758658099912e-05, "loss": 0.1846, "step": 601200 }, { "epoch": 3.38, "learning_rate": 1.6211965350736663e-05, "loss": 0.1772, "step": 601300 }, { "epoch": 3.38, "learning_rate": 1.6206344120474206e-05, "loss": 0.186, "step": 601400 }, { "epoch": 3.38, "learning_rate": 1.6200722890211752e-05, "loss": 0.1822, "step": 601500 }, { "epoch": 3.38, "learning_rate": 1.6195101659949298e-05, "loss": 0.1911, "step": 601600 }, { "epoch": 3.38, "learning_rate": 1.6189480429686844e-05, "loss": 0.1833, "step": 601700 }, { "epoch": 3.38, "learning_rate": 1.6183859199424387e-05, "loss": 0.1876, "step": 601800 }, { "epoch": 3.38, "learning_rate": 1.6178237969161934e-05, "loss": 0.1839, "step": 601900 }, { "epoch": 3.38, "learning_rate": 1.6172616738899476e-05, "loss": 0.1882, "step": 602000 }, { "epoch": 3.38, "learning_rate": 1.616699550863702e-05, "loss": 0.1862, "step": 602100 }, { "epoch": 3.39, "learning_rate": 1.6161374278374566e-05, "loss": 0.1796, "step": 602200 }, { "epoch": 3.39, "learning_rate": 1.615575304811211e-05, "loss": 0.1806, "step": 602300 }, { "epoch": 3.39, "learning_rate": 1.6150131817849655e-05, "loss": 0.1816, "step": 602400 }, { "epoch": 3.39, "learning_rate": 1.61445105875872e-05, "loss": 0.1852, "step": 602500 }, { "epoch": 3.39, "learning_rate": 1.6138889357324744e-05, "loss": 0.1871, "step": 602600 }, { "epoch": 3.39, "learning_rate": 1.613326812706229e-05, "loss": 0.1787, "step": 602700 }, { "epoch": 3.39, "learning_rate": 1.6127646896799833e-05, "loss": 0.1852, "step": 602800 }, { "epoch": 3.39, "learning_rate": 1.6122081878840004e-05, "loss": 0.1913, "step": 602900 }, { "epoch": 3.39, "learning_rate": 1.6116460648577547e-05, "loss": 0.1904, "step": 603000 }, { "epoch": 3.39, "learning_rate": 1.6110839418315093e-05, "loss": 0.1835, "step": 603100 }, { "epoch": 3.39, "learning_rate": 1.6105218188052636e-05, "loss": 0.1849, "step": 603200 }, { "epoch": 3.39, "learning_rate": 1.6099596957790182e-05, "loss": 0.1911, "step": 603300 }, { "epoch": 3.39, "learning_rate": 1.6093975727527725e-05, "loss": 0.1831, "step": 603400 }, { "epoch": 3.39, "learning_rate": 1.608835449726527e-05, "loss": 0.1849, "step": 603500 }, { "epoch": 3.39, "learning_rate": 1.6082733267002818e-05, "loss": 0.1852, "step": 603600 }, { "epoch": 3.39, "learning_rate": 1.607711203674036e-05, "loss": 0.1834, "step": 603700 }, { "epoch": 3.39, "learning_rate": 1.6071490806477907e-05, "loss": 0.1809, "step": 603800 }, { "epoch": 3.39, "learning_rate": 1.606586957621545e-05, "loss": 0.186, "step": 603900 }, { "epoch": 3.4, "learning_rate": 1.6060248345952996e-05, "loss": 0.1855, "step": 604000 }, { "epoch": 3.4, "learning_rate": 1.605462711569054e-05, "loss": 0.1835, "step": 604100 }, { "epoch": 3.4, "learning_rate": 1.6049005885428085e-05, "loss": 0.1847, "step": 604200 }, { "epoch": 3.4, "learning_rate": 1.6043384655165632e-05, "loss": 0.1861, "step": 604300 }, { "epoch": 3.4, "learning_rate": 1.6037763424903175e-05, "loss": 0.19, "step": 604400 }, { "epoch": 3.4, "learning_rate": 1.603214219464072e-05, "loss": 0.1788, "step": 604500 }, { "epoch": 3.4, "learning_rate": 1.6026520964378264e-05, "loss": 0.1838, "step": 604600 }, { "epoch": 3.4, "learning_rate": 1.602089973411581e-05, "loss": 0.1825, "step": 604700 }, { "epoch": 3.4, "learning_rate": 1.6015278503853353e-05, "loss": 0.1818, "step": 604800 }, { "epoch": 3.4, "learning_rate": 1.60096572735909e-05, "loss": 0.1811, "step": 604900 }, { "epoch": 3.4, "learning_rate": 1.6004036043328442e-05, "loss": 0.1779, "step": 605000 }, { "epoch": 3.4, "learning_rate": 1.599841481306599e-05, "loss": 0.1876, "step": 605100 }, { "epoch": 3.4, "learning_rate": 1.5992793582803535e-05, "loss": 0.1899, "step": 605200 }, { "epoch": 3.4, "learning_rate": 1.5987172352541078e-05, "loss": 0.1841, "step": 605300 }, { "epoch": 3.4, "learning_rate": 1.5981551122278624e-05, "loss": 0.1885, "step": 605400 }, { "epoch": 3.4, "learning_rate": 1.5975929892016167e-05, "loss": 0.1853, "step": 605500 }, { "epoch": 3.4, "learning_rate": 1.5970308661753713e-05, "loss": 0.1849, "step": 605600 }, { "epoch": 3.4, "learning_rate": 1.5964687431491256e-05, "loss": 0.1828, "step": 605700 }, { "epoch": 3.41, "learning_rate": 1.5959066201228802e-05, "loss": 0.1855, "step": 605800 }, { "epoch": 3.41, "learning_rate": 1.595350118326897e-05, "loss": 0.1843, "step": 605900 }, { "epoch": 3.41, "learning_rate": 1.5947879953006516e-05, "loss": 0.1835, "step": 606000 }, { "epoch": 3.41, "learning_rate": 1.594225872274406e-05, "loss": 0.1882, "step": 606100 }, { "epoch": 3.41, "learning_rate": 1.5936637492481605e-05, "loss": 0.1853, "step": 606200 }, { "epoch": 3.41, "learning_rate": 1.593101626221915e-05, "loss": 0.1863, "step": 606300 }, { "epoch": 3.41, "learning_rate": 1.592545124425932e-05, "loss": 0.1919, "step": 606400 }, { "epoch": 3.41, "learning_rate": 1.5919830013996865e-05, "loss": 0.184, "step": 606500 }, { "epoch": 3.41, "learning_rate": 1.5914208783734408e-05, "loss": 0.1852, "step": 606600 }, { "epoch": 3.41, "learning_rate": 1.5908587553471955e-05, "loss": 0.182, "step": 606700 }, { "epoch": 3.41, "learning_rate": 1.5902966323209497e-05, "loss": 0.1844, "step": 606800 }, { "epoch": 3.41, "learning_rate": 1.5897345092947044e-05, "loss": 0.1848, "step": 606900 }, { "epoch": 3.41, "learning_rate": 1.5891723862684587e-05, "loss": 0.1846, "step": 607000 }, { "epoch": 3.41, "learning_rate": 1.5886102632422133e-05, "loss": 0.1787, "step": 607100 }, { "epoch": 3.41, "learning_rate": 1.588048140215968e-05, "loss": 0.1803, "step": 607200 }, { "epoch": 3.41, "learning_rate": 1.5874860171897222e-05, "loss": 0.1805, "step": 607300 }, { "epoch": 3.41, "learning_rate": 1.586923894163477e-05, "loss": 0.1825, "step": 607400 }, { "epoch": 3.41, "learning_rate": 1.586361771137231e-05, "loss": 0.1883, "step": 607500 }, { "epoch": 3.42, "learning_rate": 1.5857996481109858e-05, "loss": 0.184, "step": 607600 }, { "epoch": 3.42, "learning_rate": 1.58523752508474e-05, "loss": 0.1832, "step": 607700 }, { "epoch": 3.42, "learning_rate": 1.5846754020584943e-05, "loss": 0.1808, "step": 607800 }, { "epoch": 3.42, "learning_rate": 1.584113279032249e-05, "loss": 0.1782, "step": 607900 }, { "epoch": 3.42, "learning_rate": 1.5835511560060036e-05, "loss": 0.1874, "step": 608000 }, { "epoch": 3.42, "learning_rate": 1.5829890329797582e-05, "loss": 0.1817, "step": 608100 }, { "epoch": 3.42, "learning_rate": 1.5824269099535125e-05, "loss": 0.1862, "step": 608200 }, { "epoch": 3.42, "learning_rate": 1.581864786927267e-05, "loss": 0.1814, "step": 608300 }, { "epoch": 3.42, "learning_rate": 1.5813026639010214e-05, "loss": 0.1865, "step": 608400 }, { "epoch": 3.42, "learning_rate": 1.5807405408747757e-05, "loss": 0.1898, "step": 608500 }, { "epoch": 3.42, "learning_rate": 1.5801784178485303e-05, "loss": 0.1871, "step": 608600 }, { "epoch": 3.42, "learning_rate": 1.579616294822285e-05, "loss": 0.1866, "step": 608700 }, { "epoch": 3.42, "learning_rate": 1.5790541717960396e-05, "loss": 0.1817, "step": 608800 }, { "epoch": 3.42, "learning_rate": 1.578492048769794e-05, "loss": 0.1885, "step": 608900 }, { "epoch": 3.42, "learning_rate": 1.5779299257435485e-05, "loss": 0.1836, "step": 609000 }, { "epoch": 3.42, "learning_rate": 1.5773678027173028e-05, "loss": 0.1825, "step": 609100 }, { "epoch": 3.42, "learning_rate": 1.576805679691057e-05, "loss": 0.1744, "step": 609200 }, { "epoch": 3.43, "learning_rate": 1.5762435566648117e-05, "loss": 0.1837, "step": 609300 }, { "epoch": 3.43, "learning_rate": 1.575681433638566e-05, "loss": 0.1795, "step": 609400 }, { "epoch": 3.43, "learning_rate": 1.5751193106123206e-05, "loss": 0.1818, "step": 609500 }, { "epoch": 3.43, "learning_rate": 1.5745571875860753e-05, "loss": 0.1817, "step": 609600 }, { "epoch": 3.43, "learning_rate": 1.57399506455983e-05, "loss": 0.1881, "step": 609700 }, { "epoch": 3.43, "learning_rate": 1.5734329415335842e-05, "loss": 0.1816, "step": 609800 }, { "epoch": 3.43, "learning_rate": 1.5728708185073385e-05, "loss": 0.1847, "step": 609900 }, { "epoch": 3.43, "learning_rate": 1.572308695481093e-05, "loss": 0.189, "step": 610000 }, { "epoch": 3.43, "learning_rate": 1.5717465724548474e-05, "loss": 0.1822, "step": 610100 }, { "epoch": 3.43, "learning_rate": 1.571184449428602e-05, "loss": 0.1822, "step": 610200 }, { "epoch": 3.43, "learning_rate": 1.5706223264023563e-05, "loss": 0.184, "step": 610300 }, { "epoch": 3.43, "learning_rate": 1.5700602033761113e-05, "loss": 0.1843, "step": 610400 }, { "epoch": 3.43, "learning_rate": 1.5694980803498655e-05, "loss": 0.1832, "step": 610500 }, { "epoch": 3.43, "learning_rate": 1.56893595732362e-05, "loss": 0.1842, "step": 610600 }, { "epoch": 3.43, "learning_rate": 1.5683738342973745e-05, "loss": 0.1806, "step": 610700 }, { "epoch": 3.43, "learning_rate": 1.5678117112711288e-05, "loss": 0.1809, "step": 610800 }, { "epoch": 3.43, "learning_rate": 1.5672495882448834e-05, "loss": 0.1857, "step": 610900 }, { "epoch": 3.43, "learning_rate": 1.5666874652186377e-05, "loss": 0.1829, "step": 611000 }, { "epoch": 3.44, "learning_rate": 1.5661253421923923e-05, "loss": 0.1793, "step": 611100 }, { "epoch": 3.44, "learning_rate": 1.565563219166147e-05, "loss": 0.1761, "step": 611200 }, { "epoch": 3.44, "learning_rate": 1.5650010961399012e-05, "loss": 0.1826, "step": 611300 }, { "epoch": 3.44, "learning_rate": 1.564438973113656e-05, "loss": 0.1818, "step": 611400 }, { "epoch": 3.44, "learning_rate": 1.56387685008741e-05, "loss": 0.1787, "step": 611500 }, { "epoch": 3.44, "learning_rate": 1.5633147270611648e-05, "loss": 0.1802, "step": 611600 }, { "epoch": 3.44, "learning_rate": 1.562752604034919e-05, "loss": 0.185, "step": 611700 }, { "epoch": 3.44, "learning_rate": 1.5621904810086737e-05, "loss": 0.1871, "step": 611800 }, { "epoch": 3.44, "learning_rate": 1.561628357982428e-05, "loss": 0.1903, "step": 611900 }, { "epoch": 3.44, "learning_rate": 1.5610662349561826e-05, "loss": 0.1841, "step": 612000 }, { "epoch": 3.44, "learning_rate": 1.5605041119299372e-05, "loss": 0.1828, "step": 612100 }, { "epoch": 3.44, "learning_rate": 1.5599419889036915e-05, "loss": 0.1837, "step": 612200 }, { "epoch": 3.44, "learning_rate": 1.559379865877446e-05, "loss": 0.1845, "step": 612300 }, { "epoch": 3.44, "learning_rate": 1.5588177428512004e-05, "loss": 0.1862, "step": 612400 }, { "epoch": 3.44, "learning_rate": 1.5582612410552175e-05, "loss": 0.188, "step": 612500 }, { "epoch": 3.44, "learning_rate": 1.5576991180289718e-05, "loss": 0.183, "step": 612600 }, { "epoch": 3.44, "learning_rate": 1.5571369950027264e-05, "loss": 0.1823, "step": 612700 }, { "epoch": 3.44, "learning_rate": 1.5565748719764807e-05, "loss": 0.1846, "step": 612800 }, { "epoch": 3.45, "learning_rate": 1.556012748950235e-05, "loss": 0.1876, "step": 612900 }, { "epoch": 3.45, "learning_rate": 1.55545062592399e-05, "loss": 0.1884, "step": 613000 }, { "epoch": 3.45, "learning_rate": 1.5548885028977443e-05, "loss": 0.1836, "step": 613100 }, { "epoch": 3.45, "learning_rate": 1.554326379871499e-05, "loss": 0.1874, "step": 613200 }, { "epoch": 3.45, "learning_rate": 1.5537642568452532e-05, "loss": 0.1822, "step": 613300 }, { "epoch": 3.45, "learning_rate": 1.5532021338190078e-05, "loss": 0.1834, "step": 613400 }, { "epoch": 3.45, "learning_rate": 1.552640010792762e-05, "loss": 0.185, "step": 613500 }, { "epoch": 3.45, "learning_rate": 1.5520778877665164e-05, "loss": 0.1816, "step": 613600 }, { "epoch": 3.45, "learning_rate": 1.551515764740271e-05, "loss": 0.185, "step": 613700 }, { "epoch": 3.45, "learning_rate": 1.5509536417140257e-05, "loss": 0.1814, "step": 613800 }, { "epoch": 3.45, "learning_rate": 1.5503915186877803e-05, "loss": 0.1777, "step": 613900 }, { "epoch": 3.45, "learning_rate": 1.5498293956615346e-05, "loss": 0.1836, "step": 614000 }, { "epoch": 3.45, "learning_rate": 1.5492672726352892e-05, "loss": 0.1817, "step": 614100 }, { "epoch": 3.45, "learning_rate": 1.5487051496090435e-05, "loss": 0.1839, "step": 614200 }, { "epoch": 3.45, "learning_rate": 1.5481430265827978e-05, "loss": 0.1837, "step": 614300 }, { "epoch": 3.45, "learning_rate": 1.5475809035565524e-05, "loss": 0.1829, "step": 614400 }, { "epoch": 3.45, "learning_rate": 1.5470187805303067e-05, "loss": 0.1882, "step": 614500 }, { "epoch": 3.45, "learning_rate": 1.5464622787343238e-05, "loss": 0.1835, "step": 614600 }, { "epoch": 3.46, "learning_rate": 1.545900155708078e-05, "loss": 0.1833, "step": 614700 }, { "epoch": 3.46, "learning_rate": 1.5453380326818327e-05, "loss": 0.1827, "step": 614800 }, { "epoch": 3.46, "learning_rate": 1.5447759096555873e-05, "loss": 0.1904, "step": 614900 }, { "epoch": 3.46, "learning_rate": 1.544213786629342e-05, "loss": 0.1866, "step": 615000 }, { "epoch": 3.46, "learning_rate": 1.5436516636030963e-05, "loss": 0.1832, "step": 615100 }, { "epoch": 3.46, "learning_rate": 1.5430951618071134e-05, "loss": 0.183, "step": 615200 }, { "epoch": 3.46, "learning_rate": 1.54253866001113e-05, "loss": 0.1879, "step": 615300 }, { "epoch": 3.46, "learning_rate": 1.5419765369848848e-05, "loss": 0.1861, "step": 615400 }, { "epoch": 3.46, "learning_rate": 1.541414413958639e-05, "loss": 0.1879, "step": 615500 }, { "epoch": 3.46, "learning_rate": 1.5408522909323937e-05, "loss": 0.1807, "step": 615600 }, { "epoch": 3.46, "learning_rate": 1.540290167906148e-05, "loss": 0.1812, "step": 615700 }, { "epoch": 3.46, "learning_rate": 1.5397280448799026e-05, "loss": 0.1811, "step": 615800 }, { "epoch": 3.46, "learning_rate": 1.539165921853657e-05, "loss": 0.1865, "step": 615900 }, { "epoch": 3.46, "learning_rate": 1.5386037988274115e-05, "loss": 0.1851, "step": 616000 }, { "epoch": 3.46, "learning_rate": 1.5380416758011658e-05, "loss": 0.18, "step": 616100 }, { "epoch": 3.46, "learning_rate": 1.5374795527749204e-05, "loss": 0.1825, "step": 616200 }, { "epoch": 3.46, "learning_rate": 1.536917429748675e-05, "loss": 0.1902, "step": 616300 }, { "epoch": 3.46, "learning_rate": 1.5363553067224293e-05, "loss": 0.1869, "step": 616400 }, { "epoch": 3.47, "learning_rate": 1.535793183696184e-05, "loss": 0.1837, "step": 616500 }, { "epoch": 3.47, "learning_rate": 1.5352310606699382e-05, "loss": 0.1854, "step": 616600 }, { "epoch": 3.47, "learning_rate": 1.534668937643693e-05, "loss": 0.1784, "step": 616700 }, { "epoch": 3.47, "learning_rate": 1.534106814617447e-05, "loss": 0.1807, "step": 616800 }, { "epoch": 3.47, "learning_rate": 1.5335446915912015e-05, "loss": 0.179, "step": 616900 }, { "epoch": 3.47, "learning_rate": 1.5329825685649564e-05, "loss": 0.1837, "step": 617000 }, { "epoch": 3.47, "learning_rate": 1.5324204455387107e-05, "loss": 0.1842, "step": 617100 }, { "epoch": 3.47, "learning_rate": 1.5318583225124653e-05, "loss": 0.1824, "step": 617200 }, { "epoch": 3.47, "learning_rate": 1.5312961994862196e-05, "loss": 0.1863, "step": 617300 }, { "epoch": 3.47, "learning_rate": 1.530734076459974e-05, "loss": 0.1886, "step": 617400 }, { "epoch": 3.47, "learning_rate": 1.5301719534337285e-05, "loss": 0.1769, "step": 617500 }, { "epoch": 3.47, "learning_rate": 1.529609830407483e-05, "loss": 0.1851, "step": 617600 }, { "epoch": 3.47, "learning_rate": 1.5290477073812375e-05, "loss": 0.1779, "step": 617700 }, { "epoch": 3.47, "learning_rate": 1.528485584354992e-05, "loss": 0.1788, "step": 617800 }, { "epoch": 3.47, "learning_rate": 1.5279234613287467e-05, "loss": 0.1837, "step": 617900 }, { "epoch": 3.47, "learning_rate": 1.527361338302501e-05, "loss": 0.184, "step": 618000 }, { "epoch": 3.47, "learning_rate": 1.5267992152762553e-05, "loss": 0.1839, "step": 618100 }, { "epoch": 3.48, "learning_rate": 1.52623709225001e-05, "loss": 0.1859, "step": 618200 }, { "epoch": 3.48, "learning_rate": 1.5256749692237642e-05, "loss": 0.187, "step": 618300 }, { "epoch": 3.48, "learning_rate": 1.525112846197519e-05, "loss": 0.1815, "step": 618400 }, { "epoch": 3.48, "learning_rate": 1.5245507231712733e-05, "loss": 0.1803, "step": 618500 }, { "epoch": 3.48, "learning_rate": 1.523988600145028e-05, "loss": 0.1826, "step": 618600 }, { "epoch": 3.48, "learning_rate": 1.5234264771187822e-05, "loss": 0.1828, "step": 618700 }, { "epoch": 3.48, "learning_rate": 1.5228643540925367e-05, "loss": 0.1842, "step": 618800 }, { "epoch": 3.48, "learning_rate": 1.5223022310662913e-05, "loss": 0.185, "step": 618900 }, { "epoch": 3.48, "learning_rate": 1.5217401080400456e-05, "loss": 0.185, "step": 619000 }, { "epoch": 3.48, "learning_rate": 1.5211779850138002e-05, "loss": 0.1842, "step": 619100 }, { "epoch": 3.48, "learning_rate": 1.5206158619875547e-05, "loss": 0.1852, "step": 619200 }, { "epoch": 3.48, "learning_rate": 1.5200537389613093e-05, "loss": 0.1854, "step": 619300 }, { "epoch": 3.48, "learning_rate": 1.5194916159350636e-05, "loss": 0.1844, "step": 619400 }, { "epoch": 3.48, "learning_rate": 1.518929492908818e-05, "loss": 0.1873, "step": 619500 }, { "epoch": 3.48, "learning_rate": 1.5183673698825727e-05, "loss": 0.1838, "step": 619600 }, { "epoch": 3.48, "learning_rate": 1.517805246856327e-05, "loss": 0.1825, "step": 619700 }, { "epoch": 3.48, "learning_rate": 1.5172431238300816e-05, "loss": 0.1823, "step": 619800 }, { "epoch": 3.48, "learning_rate": 1.5166810008038359e-05, "loss": 0.1813, "step": 619900 }, { "epoch": 3.49, "learning_rate": 1.5161188777775907e-05, "loss": 0.1883, "step": 620000 }, { "epoch": 3.49, "learning_rate": 1.515556754751345e-05, "loss": 0.1823, "step": 620100 }, { "epoch": 3.49, "learning_rate": 1.5150002529553619e-05, "loss": 0.1815, "step": 620200 }, { "epoch": 3.49, "learning_rate": 1.5144381299291164e-05, "loss": 0.1825, "step": 620300 }, { "epoch": 3.49, "learning_rate": 1.513876006902871e-05, "loss": 0.1808, "step": 620400 }, { "epoch": 3.49, "learning_rate": 1.5133138838766253e-05, "loss": 0.1802, "step": 620500 }, { "epoch": 3.49, "learning_rate": 1.5127517608503797e-05, "loss": 0.182, "step": 620600 }, { "epoch": 3.49, "learning_rate": 1.5121896378241344e-05, "loss": 0.187, "step": 620700 }, { "epoch": 3.49, "learning_rate": 1.5116275147978887e-05, "loss": 0.1845, "step": 620800 }, { "epoch": 3.49, "learning_rate": 1.5110653917716433e-05, "loss": 0.1812, "step": 620900 }, { "epoch": 3.49, "learning_rate": 1.5105032687453977e-05, "loss": 0.1836, "step": 621000 }, { "epoch": 3.49, "learning_rate": 1.5099411457191524e-05, "loss": 0.1841, "step": 621100 }, { "epoch": 3.49, "learning_rate": 1.5093790226929067e-05, "loss": 0.1876, "step": 621200 }, { "epoch": 3.49, "learning_rate": 1.508816899666661e-05, "loss": 0.1828, "step": 621300 }, { "epoch": 3.49, "learning_rate": 1.5082547766404156e-05, "loss": 0.1822, "step": 621400 }, { "epoch": 3.49, "learning_rate": 1.50769265361417e-05, "loss": 0.1862, "step": 621500 }, { "epoch": 3.49, "learning_rate": 1.5071305305879247e-05, "loss": 0.1759, "step": 621600 }, { "epoch": 3.49, "learning_rate": 1.506568407561679e-05, "loss": 0.1859, "step": 621700 }, { "epoch": 3.5, "learning_rate": 1.5060062845354336e-05, "loss": 0.1804, "step": 621800 }, { "epoch": 3.5, "learning_rate": 1.505444161509188e-05, "loss": 0.1816, "step": 621900 }, { "epoch": 3.5, "learning_rate": 1.5048820384829423e-05, "loss": 0.1866, "step": 622000 }, { "epoch": 3.5, "learning_rate": 1.504319915456697e-05, "loss": 0.1831, "step": 622100 }, { "epoch": 3.5, "learning_rate": 1.5037577924304514e-05, "loss": 0.1872, "step": 622200 }, { "epoch": 3.5, "learning_rate": 1.503195669404206e-05, "loss": 0.1831, "step": 622300 }, { "epoch": 3.5, "learning_rate": 1.5026335463779603e-05, "loss": 0.1741, "step": 622400 }, { "epoch": 3.5, "learning_rate": 1.5020714233517146e-05, "loss": 0.1914, "step": 622500 }, { "epoch": 3.5, "learning_rate": 1.5015149215557317e-05, "loss": 0.1866, "step": 622600 }, { "epoch": 3.5, "learning_rate": 1.5009527985294863e-05, "loss": 0.1844, "step": 622700 }, { "epoch": 3.5, "learning_rate": 1.5003906755032406e-05, "loss": 0.1844, "step": 622800 }, { "epoch": 3.5, "learning_rate": 1.4998285524769951e-05, "loss": 0.1826, "step": 622900 }, { "epoch": 3.5, "learning_rate": 1.4992664294507497e-05, "loss": 0.1858, "step": 623000 }, { "epoch": 3.5, "learning_rate": 1.498704306424504e-05, "loss": 0.1744, "step": 623100 }, { "epoch": 3.5, "learning_rate": 1.4981421833982586e-05, "loss": 0.185, "step": 623200 }, { "epoch": 3.5, "learning_rate": 1.4975800603720131e-05, "loss": 0.1798, "step": 623300 }, { "epoch": 3.5, "learning_rate": 1.4970179373457677e-05, "loss": 0.1848, "step": 623400 }, { "epoch": 3.5, "learning_rate": 1.496455814319522e-05, "loss": 0.1781, "step": 623500 }, { "epoch": 3.51, "learning_rate": 1.4958936912932765e-05, "loss": 0.1798, "step": 623600 }, { "epoch": 3.51, "learning_rate": 1.4953315682670311e-05, "loss": 0.1796, "step": 623700 }, { "epoch": 3.51, "learning_rate": 1.4947694452407854e-05, "loss": 0.187, "step": 623800 }, { "epoch": 3.51, "learning_rate": 1.49420732221454e-05, "loss": 0.182, "step": 623900 }, { "epoch": 3.51, "learning_rate": 1.4936451991882943e-05, "loss": 0.1844, "step": 624000 }, { "epoch": 3.51, "learning_rate": 1.493083076162049e-05, "loss": 0.1872, "step": 624100 }, { "epoch": 3.51, "learning_rate": 1.4925209531358034e-05, "loss": 0.183, "step": 624200 }, { "epoch": 3.51, "learning_rate": 1.4919588301095577e-05, "loss": 0.1815, "step": 624300 }, { "epoch": 3.51, "learning_rate": 1.4913967070833123e-05, "loss": 0.1871, "step": 624400 }, { "epoch": 3.51, "learning_rate": 1.4908345840570668e-05, "loss": 0.1813, "step": 624500 }, { "epoch": 3.51, "learning_rate": 1.4902780822610837e-05, "loss": 0.1862, "step": 624600 }, { "epoch": 3.51, "learning_rate": 1.4897159592348382e-05, "loss": 0.1843, "step": 624700 }, { "epoch": 3.51, "learning_rate": 1.4891538362085928e-05, "loss": 0.1847, "step": 624800 }, { "epoch": 3.51, "learning_rate": 1.488591713182347e-05, "loss": 0.18, "step": 624900 }, { "epoch": 3.51, "learning_rate": 1.4880295901561017e-05, "loss": 0.1874, "step": 625000 }, { "epoch": 3.51, "learning_rate": 1.4874674671298562e-05, "loss": 0.1881, "step": 625100 }, { "epoch": 3.51, "learning_rate": 1.4869053441036108e-05, "loss": 0.1866, "step": 625200 }, { "epoch": 3.51, "learning_rate": 1.486343221077365e-05, "loss": 0.1877, "step": 625300 }, { "epoch": 3.52, "learning_rate": 1.4857810980511194e-05, "loss": 0.1773, "step": 625400 }, { "epoch": 3.52, "learning_rate": 1.485218975024874e-05, "loss": 0.1807, "step": 625500 }, { "epoch": 3.52, "learning_rate": 1.4846568519986284e-05, "loss": 0.1837, "step": 625600 }, { "epoch": 3.52, "learning_rate": 1.484094728972383e-05, "loss": 0.1777, "step": 625700 }, { "epoch": 3.52, "learning_rate": 1.4835326059461374e-05, "loss": 0.1795, "step": 625800 }, { "epoch": 3.52, "learning_rate": 1.482970482919892e-05, "loss": 0.1871, "step": 625900 }, { "epoch": 3.52, "learning_rate": 1.4824083598936464e-05, "loss": 0.1814, "step": 626000 }, { "epoch": 3.52, "learning_rate": 1.4818462368674007e-05, "loss": 0.1839, "step": 626100 }, { "epoch": 3.52, "learning_rate": 1.4812841138411554e-05, "loss": 0.1859, "step": 626200 }, { "epoch": 3.52, "learning_rate": 1.4807219908149098e-05, "loss": 0.1807, "step": 626300 }, { "epoch": 3.52, "learning_rate": 1.4801598677886644e-05, "loss": 0.1798, "step": 626400 }, { "epoch": 3.52, "learning_rate": 1.4795977447624187e-05, "loss": 0.1815, "step": 626500 }, { "epoch": 3.52, "learning_rate": 1.4790356217361734e-05, "loss": 0.1834, "step": 626600 }, { "epoch": 3.52, "learning_rate": 1.4784734987099278e-05, "loss": 0.1748, "step": 626700 }, { "epoch": 3.52, "learning_rate": 1.4779113756836821e-05, "loss": 0.1786, "step": 626800 }, { "epoch": 3.52, "learning_rate": 1.4773492526574367e-05, "loss": 0.1823, "step": 626900 }, { "epoch": 3.52, "learning_rate": 1.476787129631191e-05, "loss": 0.1791, "step": 627000 }, { "epoch": 3.53, "learning_rate": 1.4762250066049457e-05, "loss": 0.1827, "step": 627100 }, { "epoch": 3.53, "learning_rate": 1.4756628835787001e-05, "loss": 0.1881, "step": 627200 }, { "epoch": 3.53, "learning_rate": 1.4751007605524547e-05, "loss": 0.1872, "step": 627300 }, { "epoch": 3.53, "learning_rate": 1.474538637526209e-05, "loss": 0.1798, "step": 627400 }, { "epoch": 3.53, "learning_rate": 1.4739765144999635e-05, "loss": 0.1885, "step": 627500 }, { "epoch": 3.53, "learning_rate": 1.4734143914737181e-05, "loss": 0.185, "step": 627600 }, { "epoch": 3.53, "learning_rate": 1.4728522684474724e-05, "loss": 0.1796, "step": 627700 }, { "epoch": 3.53, "learning_rate": 1.472290145421227e-05, "loss": 0.1809, "step": 627800 }, { "epoch": 3.53, "learning_rate": 1.4717280223949815e-05, "loss": 0.1851, "step": 627900 }, { "epoch": 3.53, "learning_rate": 1.4711658993687358e-05, "loss": 0.1799, "step": 628000 }, { "epoch": 3.53, "learning_rate": 1.4706037763424904e-05, "loss": 0.1786, "step": 628100 }, { "epoch": 3.53, "learning_rate": 1.4700416533162447e-05, "loss": 0.1848, "step": 628200 }, { "epoch": 3.53, "learning_rate": 1.4694795302899993e-05, "loss": 0.1799, "step": 628300 }, { "epoch": 3.53, "learning_rate": 1.4689174072637538e-05, "loss": 0.1882, "step": 628400 }, { "epoch": 3.53, "learning_rate": 1.4683609054677707e-05, "loss": 0.1858, "step": 628500 }, { "epoch": 3.53, "learning_rate": 1.4677987824415252e-05, "loss": 0.1819, "step": 628600 }, { "epoch": 3.53, "learning_rate": 1.4672366594152798e-05, "loss": 0.1865, "step": 628700 }, { "epoch": 3.53, "learning_rate": 1.4666745363890341e-05, "loss": 0.1832, "step": 628800 }, { "epoch": 3.54, "learning_rate": 1.4661124133627887e-05, "loss": 0.1851, "step": 628900 }, { "epoch": 3.54, "learning_rate": 1.4655502903365432e-05, "loss": 0.1833, "step": 629000 }, { "epoch": 3.54, "learning_rate": 1.4649881673102975e-05, "loss": 0.1786, "step": 629100 }, { "epoch": 3.54, "learning_rate": 1.4644260442840521e-05, "loss": 0.1814, "step": 629200 }, { "epoch": 3.54, "learning_rate": 1.4638639212578066e-05, "loss": 0.1841, "step": 629300 }, { "epoch": 3.54, "learning_rate": 1.4633017982315612e-05, "loss": 0.1809, "step": 629400 }, { "epoch": 3.54, "learning_rate": 1.4627396752053155e-05, "loss": 0.1843, "step": 629500 }, { "epoch": 3.54, "learning_rate": 1.4621775521790701e-05, "loss": 0.1857, "step": 629600 }, { "epoch": 3.54, "learning_rate": 1.4616154291528244e-05, "loss": 0.1815, "step": 629700 }, { "epoch": 3.54, "learning_rate": 1.4610533061265788e-05, "loss": 0.1833, "step": 629800 }, { "epoch": 3.54, "learning_rate": 1.4604911831003335e-05, "loss": 0.1837, "step": 629900 }, { "epoch": 3.54, "learning_rate": 1.4599290600740878e-05, "loss": 0.1823, "step": 630000 }, { "epoch": 3.54, "learning_rate": 1.4593669370478424e-05, "loss": 0.1851, "step": 630100 }, { "epoch": 3.54, "learning_rate": 1.4588048140215968e-05, "loss": 0.1809, "step": 630200 }, { "epoch": 3.54, "learning_rate": 1.4582426909953515e-05, "loss": 0.18, "step": 630300 }, { "epoch": 3.54, "learning_rate": 1.4576805679691058e-05, "loss": 0.1843, "step": 630400 }, { "epoch": 3.54, "learning_rate": 1.4571184449428602e-05, "loss": 0.184, "step": 630500 }, { "epoch": 3.54, "learning_rate": 1.4565563219166148e-05, "loss": 0.1907, "step": 630600 }, { "epoch": 3.55, "learning_rate": 1.4559941988903691e-05, "loss": 0.1882, "step": 630700 }, { "epoch": 3.55, "learning_rate": 1.4554320758641238e-05, "loss": 0.1868, "step": 630800 }, { "epoch": 3.55, "learning_rate": 1.454869952837878e-05, "loss": 0.1809, "step": 630900 }, { "epoch": 3.55, "learning_rate": 1.4543078298116329e-05, "loss": 0.183, "step": 631000 }, { "epoch": 3.55, "learning_rate": 1.4537457067853871e-05, "loss": 0.1801, "step": 631100 }, { "epoch": 3.55, "learning_rate": 1.4531835837591414e-05, "loss": 0.1832, "step": 631200 }, { "epoch": 3.55, "learning_rate": 1.452621460732896e-05, "loss": 0.1779, "step": 631300 }, { "epoch": 3.55, "learning_rate": 1.4520593377066505e-05, "loss": 0.1837, "step": 631400 }, { "epoch": 3.55, "learning_rate": 1.4514972146804051e-05, "loss": 0.1826, "step": 631500 }, { "epoch": 3.55, "learning_rate": 1.4509350916541594e-05, "loss": 0.1872, "step": 631600 }, { "epoch": 3.55, "learning_rate": 1.450372968627914e-05, "loss": 0.1816, "step": 631700 }, { "epoch": 3.55, "learning_rate": 1.4498108456016685e-05, "loss": 0.1831, "step": 631800 }, { "epoch": 3.55, "learning_rate": 1.4492487225754228e-05, "loss": 0.1833, "step": 631900 }, { "epoch": 3.55, "learning_rate": 1.4486865995491774e-05, "loss": 0.1891, "step": 632000 }, { "epoch": 3.55, "learning_rate": 1.4481244765229319e-05, "loss": 0.1818, "step": 632100 }, { "epoch": 3.55, "learning_rate": 1.4475623534966865e-05, "loss": 0.1818, "step": 632200 }, { "epoch": 3.55, "learning_rate": 1.4470002304704408e-05, "loss": 0.1859, "step": 632300 }, { "epoch": 3.55, "learning_rate": 1.4464381074441951e-05, "loss": 0.179, "step": 632400 }, { "epoch": 3.56, "learning_rate": 1.4458816056482122e-05, "loss": 0.1887, "step": 632500 }, { "epoch": 3.56, "learning_rate": 1.4453194826219668e-05, "loss": 0.1822, "step": 632600 }, { "epoch": 3.56, "learning_rate": 1.4447573595957211e-05, "loss": 0.1795, "step": 632700 }, { "epoch": 3.56, "learning_rate": 1.4441952365694756e-05, "loss": 0.1843, "step": 632800 }, { "epoch": 3.56, "learning_rate": 1.4436331135432302e-05, "loss": 0.1818, "step": 632900 }, { "epoch": 3.56, "learning_rate": 1.4430709905169845e-05, "loss": 0.1832, "step": 633000 }, { "epoch": 3.56, "learning_rate": 1.4425088674907391e-05, "loss": 0.1816, "step": 633100 }, { "epoch": 3.56, "learning_rate": 1.4419467444644936e-05, "loss": 0.1773, "step": 633200 }, { "epoch": 3.56, "learning_rate": 1.4413846214382482e-05, "loss": 0.1893, "step": 633300 }, { "epoch": 3.56, "learning_rate": 1.4408224984120025e-05, "loss": 0.1796, "step": 633400 }, { "epoch": 3.56, "learning_rate": 1.4402603753857568e-05, "loss": 0.1842, "step": 633500 }, { "epoch": 3.56, "learning_rate": 1.4396982523595116e-05, "loss": 0.1865, "step": 633600 }, { "epoch": 3.56, "learning_rate": 1.4391361293332659e-05, "loss": 0.1796, "step": 633700 }, { "epoch": 3.56, "learning_rate": 1.4385740063070205e-05, "loss": 0.1811, "step": 633800 }, { "epoch": 3.56, "learning_rate": 1.4380118832807748e-05, "loss": 0.1796, "step": 633900 }, { "epoch": 3.56, "learning_rate": 1.4374497602545294e-05, "loss": 0.1801, "step": 634000 }, { "epoch": 3.56, "learning_rate": 1.4368876372282839e-05, "loss": 0.1806, "step": 634100 }, { "epoch": 3.56, "learning_rate": 1.4363255142020382e-05, "loss": 0.1834, "step": 634200 }, { "epoch": 3.57, "learning_rate": 1.4357633911757928e-05, "loss": 0.1784, "step": 634300 }, { "epoch": 3.57, "learning_rate": 1.4352012681495472e-05, "loss": 0.181, "step": 634400 }, { "epoch": 3.57, "learning_rate": 1.4346391451233019e-05, "loss": 0.1813, "step": 634500 }, { "epoch": 3.57, "learning_rate": 1.4340826433273186e-05, "loss": 0.1865, "step": 634600 }, { "epoch": 3.57, "learning_rate": 1.4335205203010733e-05, "loss": 0.1901, "step": 634700 }, { "epoch": 3.57, "learning_rate": 1.4329583972748276e-05, "loss": 0.1864, "step": 634800 }, { "epoch": 3.57, "learning_rate": 1.4323962742485822e-05, "loss": 0.1791, "step": 634900 }, { "epoch": 3.57, "learning_rate": 1.4318341512223365e-05, "loss": 0.1893, "step": 635000 }, { "epoch": 3.57, "learning_rate": 1.4312720281960913e-05, "loss": 0.1804, "step": 635100 }, { "epoch": 3.57, "learning_rate": 1.4307099051698456e-05, "loss": 0.1789, "step": 635200 }, { "epoch": 3.57, "learning_rate": 1.4301477821435998e-05, "loss": 0.185, "step": 635300 }, { "epoch": 3.57, "learning_rate": 1.4295856591173545e-05, "loss": 0.1842, "step": 635400 }, { "epoch": 3.57, "learning_rate": 1.429023536091109e-05, "loss": 0.1873, "step": 635500 }, { "epoch": 3.57, "learning_rate": 1.4284614130648636e-05, "loss": 0.1915, "step": 635600 }, { "epoch": 3.57, "learning_rate": 1.4278992900386178e-05, "loss": 0.1824, "step": 635700 }, { "epoch": 3.57, "learning_rate": 1.4273371670123725e-05, "loss": 0.184, "step": 635800 }, { "epoch": 3.57, "learning_rate": 1.426775043986127e-05, "loss": 0.1823, "step": 635900 }, { "epoch": 3.58, "learning_rate": 1.4262129209598812e-05, "loss": 0.174, "step": 636000 }, { "epoch": 3.58, "learning_rate": 1.4256507979336359e-05, "loss": 0.1851, "step": 636100 }, { "epoch": 3.58, "learning_rate": 1.4250886749073903e-05, "loss": 0.1818, "step": 636200 }, { "epoch": 3.58, "learning_rate": 1.4245321731114072e-05, "loss": 0.1854, "step": 636300 }, { "epoch": 3.58, "learning_rate": 1.4239700500851615e-05, "loss": 0.1814, "step": 636400 }, { "epoch": 3.58, "learning_rate": 1.4234079270589162e-05, "loss": 0.1854, "step": 636500 }, { "epoch": 3.58, "learning_rate": 1.4228458040326706e-05, "loss": 0.1845, "step": 636600 }, { "epoch": 3.58, "learning_rate": 1.4222836810064252e-05, "loss": 0.187, "step": 636700 }, { "epoch": 3.58, "learning_rate": 1.4217215579801795e-05, "loss": 0.1769, "step": 636800 }, { "epoch": 3.58, "learning_rate": 1.4211594349539342e-05, "loss": 0.1816, "step": 636900 }, { "epoch": 3.58, "learning_rate": 1.4205973119276886e-05, "loss": 0.174, "step": 637000 }, { "epoch": 3.58, "learning_rate": 1.4200351889014429e-05, "loss": 0.1819, "step": 637100 }, { "epoch": 3.58, "learning_rate": 1.4194730658751975e-05, "loss": 0.1887, "step": 637200 }, { "epoch": 3.58, "learning_rate": 1.418910942848952e-05, "loss": 0.1796, "step": 637300 }, { "epoch": 3.58, "learning_rate": 1.4183488198227066e-05, "loss": 0.1794, "step": 637400 }, { "epoch": 3.58, "learning_rate": 1.4177866967964609e-05, "loss": 0.1829, "step": 637500 }, { "epoch": 3.58, "learning_rate": 1.4172245737702152e-05, "loss": 0.1844, "step": 637600 }, { "epoch": 3.58, "learning_rate": 1.41666245074397e-05, "loss": 0.182, "step": 637700 }, { "epoch": 3.59, "learning_rate": 1.4161003277177243e-05, "loss": 0.1788, "step": 637800 }, { "epoch": 3.59, "learning_rate": 1.4155382046914789e-05, "loss": 0.1873, "step": 637900 }, { "epoch": 3.59, "learning_rate": 1.4149760816652332e-05, "loss": 0.1783, "step": 638000 }, { "epoch": 3.59, "learning_rate": 1.4144139586389878e-05, "loss": 0.1785, "step": 638100 }, { "epoch": 3.59, "learning_rate": 1.4138518356127423e-05, "loss": 0.1875, "step": 638200 }, { "epoch": 3.59, "learning_rate": 1.4132897125864966e-05, "loss": 0.1868, "step": 638300 }, { "epoch": 3.59, "learning_rate": 1.4127275895602512e-05, "loss": 0.182, "step": 638400 }, { "epoch": 3.59, "learning_rate": 1.4121654665340057e-05, "loss": 0.1842, "step": 638500 }, { "epoch": 3.59, "learning_rate": 1.4116033435077603e-05, "loss": 0.1816, "step": 638600 }, { "epoch": 3.59, "learning_rate": 1.4110412204815146e-05, "loss": 0.1828, "step": 638700 }, { "epoch": 3.59, "learning_rate": 1.4104790974552692e-05, "loss": 0.1811, "step": 638800 }, { "epoch": 3.59, "learning_rate": 1.4099169744290237e-05, "loss": 0.1847, "step": 638900 }, { "epoch": 3.59, "learning_rate": 1.409354851402778e-05, "loss": 0.1796, "step": 639000 }, { "epoch": 3.59, "learning_rate": 1.4087927283765326e-05, "loss": 0.1804, "step": 639100 }, { "epoch": 3.59, "learning_rate": 1.4082306053502869e-05, "loss": 0.1814, "step": 639200 }, { "epoch": 3.59, "learning_rate": 1.4076684823240415e-05, "loss": 0.1807, "step": 639300 }, { "epoch": 3.59, "learning_rate": 1.407106359297796e-05, "loss": 0.1842, "step": 639400 }, { "epoch": 3.59, "learning_rate": 1.4065442362715506e-05, "loss": 0.1859, "step": 639500 }, { "epoch": 3.6, "learning_rate": 1.4059821132453049e-05, "loss": 0.1844, "step": 639600 }, { "epoch": 3.6, "learning_rate": 1.4054199902190593e-05, "loss": 0.1789, "step": 639700 }, { "epoch": 3.6, "learning_rate": 1.404857867192814e-05, "loss": 0.1839, "step": 639800 }, { "epoch": 3.6, "learning_rate": 1.4042957441665682e-05, "loss": 0.1868, "step": 639900 }, { "epoch": 3.6, "learning_rate": 1.4037336211403229e-05, "loss": 0.184, "step": 640000 }, { "epoch": 3.6, "learning_rate": 1.4031714981140773e-05, "loss": 0.1771, "step": 640100 }, { "epoch": 3.6, "learning_rate": 1.402609375087832e-05, "loss": 0.1815, "step": 640200 }, { "epoch": 3.6, "learning_rate": 1.4020472520615863e-05, "loss": 0.1888, "step": 640300 }, { "epoch": 3.6, "learning_rate": 1.4014851290353407e-05, "loss": 0.1734, "step": 640400 }, { "epoch": 3.6, "learning_rate": 1.4009286272393576e-05, "loss": 0.1838, "step": 640500 }, { "epoch": 3.6, "learning_rate": 1.4003665042131123e-05, "loss": 0.1869, "step": 640600 }, { "epoch": 3.6, "learning_rate": 1.3998043811868666e-05, "loss": 0.1755, "step": 640700 }, { "epoch": 3.6, "learning_rate": 1.399242258160621e-05, "loss": 0.189, "step": 640800 }, { "epoch": 3.6, "learning_rate": 1.3986801351343756e-05, "loss": 0.1827, "step": 640900 }, { "epoch": 3.6, "learning_rate": 1.39811801210813e-05, "loss": 0.184, "step": 641000 }, { "epoch": 3.6, "learning_rate": 1.3975558890818846e-05, "loss": 0.1845, "step": 641100 }, { "epoch": 3.6, "learning_rate": 1.396993766055639e-05, "loss": 0.1828, "step": 641200 }, { "epoch": 3.6, "learning_rate": 1.3964316430293936e-05, "loss": 0.1801, "step": 641300 }, { "epoch": 3.61, "learning_rate": 1.395869520003148e-05, "loss": 0.1726, "step": 641400 }, { "epoch": 3.61, "learning_rate": 1.3953073969769024e-05, "loss": 0.1811, "step": 641500 }, { "epoch": 3.61, "learning_rate": 1.394745273950657e-05, "loss": 0.1779, "step": 641600 }, { "epoch": 3.61, "learning_rate": 1.3941831509244113e-05, "loss": 0.1857, "step": 641700 }, { "epoch": 3.61, "learning_rate": 1.393621027898166e-05, "loss": 0.1831, "step": 641800 }, { "epoch": 3.61, "learning_rate": 1.3930589048719202e-05, "loss": 0.1807, "step": 641900 }, { "epoch": 3.61, "learning_rate": 1.392496781845675e-05, "loss": 0.1844, "step": 642000 }, { "epoch": 3.61, "learning_rate": 1.3919346588194293e-05, "loss": 0.1832, "step": 642100 }, { "epoch": 3.61, "learning_rate": 1.3913725357931836e-05, "loss": 0.1825, "step": 642200 }, { "epoch": 3.61, "learning_rate": 1.3908104127669382e-05, "loss": 0.1842, "step": 642300 }, { "epoch": 3.61, "learning_rate": 1.3902482897406927e-05, "loss": 0.1793, "step": 642400 }, { "epoch": 3.61, "learning_rate": 1.3896861667144473e-05, "loss": 0.1827, "step": 642500 }, { "epoch": 3.61, "learning_rate": 1.3891240436882016e-05, "loss": 0.1788, "step": 642600 }, { "epoch": 3.61, "learning_rate": 1.388561920661956e-05, "loss": 0.1744, "step": 642700 }, { "epoch": 3.61, "learning_rate": 1.3879997976357107e-05, "loss": 0.1812, "step": 642800 }, { "epoch": 3.61, "learning_rate": 1.3874432958397276e-05, "loss": 0.181, "step": 642900 }, { "epoch": 3.61, "learning_rate": 1.3868867940437444e-05, "loss": 0.1823, "step": 643000 }, { "epoch": 3.62, "learning_rate": 1.386324671017499e-05, "loss": 0.1743, "step": 643100 }, { "epoch": 3.62, "learning_rate": 1.3857625479912533e-05, "loss": 0.183, "step": 643200 }, { "epoch": 3.62, "learning_rate": 1.385200424965008e-05, "loss": 0.1856, "step": 643300 }, { "epoch": 3.62, "learning_rate": 1.3846383019387624e-05, "loss": 0.1801, "step": 643400 }, { "epoch": 3.62, "learning_rate": 1.3840761789125167e-05, "loss": 0.1866, "step": 643500 }, { "epoch": 3.62, "learning_rate": 1.3835140558862713e-05, "loss": 0.1832, "step": 643600 }, { "epoch": 3.62, "learning_rate": 1.3829519328600258e-05, "loss": 0.1781, "step": 643700 }, { "epoch": 3.62, "learning_rate": 1.3823898098337804e-05, "loss": 0.1848, "step": 643800 }, { "epoch": 3.62, "learning_rate": 1.3818276868075347e-05, "loss": 0.1805, "step": 643900 }, { "epoch": 3.62, "learning_rate": 1.3812655637812893e-05, "loss": 0.1765, "step": 644000 }, { "epoch": 3.62, "learning_rate": 1.3807034407550438e-05, "loss": 0.1802, "step": 644100 }, { "epoch": 3.62, "learning_rate": 1.380141317728798e-05, "loss": 0.1778, "step": 644200 }, { "epoch": 3.62, "learning_rate": 1.3795791947025527e-05, "loss": 0.1827, "step": 644300 }, { "epoch": 3.62, "learning_rate": 1.3790170716763071e-05, "loss": 0.1833, "step": 644400 }, { "epoch": 3.62, "learning_rate": 1.3784549486500618e-05, "loss": 0.1824, "step": 644500 }, { "epoch": 3.62, "learning_rate": 1.377892825623816e-05, "loss": 0.1804, "step": 644600 }, { "epoch": 3.62, "learning_rate": 1.3773307025975707e-05, "loss": 0.1812, "step": 644700 }, { "epoch": 3.62, "learning_rate": 1.376768579571325e-05, "loss": 0.1778, "step": 644800 }, { "epoch": 3.63, "learning_rate": 1.3762064565450794e-05, "loss": 0.1801, "step": 644900 }, { "epoch": 3.63, "learning_rate": 1.375644333518834e-05, "loss": 0.1885, "step": 645000 }, { "epoch": 3.63, "learning_rate": 1.3750822104925884e-05, "loss": 0.1806, "step": 645100 }, { "epoch": 3.63, "learning_rate": 1.374520087466343e-05, "loss": 0.1862, "step": 645200 }, { "epoch": 3.63, "learning_rate": 1.3739579644400974e-05, "loss": 0.1781, "step": 645300 }, { "epoch": 3.63, "learning_rate": 1.373395841413852e-05, "loss": 0.1807, "step": 645400 }, { "epoch": 3.63, "learning_rate": 1.3728393396178688e-05, "loss": 0.1782, "step": 645500 }, { "epoch": 3.63, "learning_rate": 1.3722772165916235e-05, "loss": 0.1827, "step": 645600 }, { "epoch": 3.63, "learning_rate": 1.3717150935653777e-05, "loss": 0.1846, "step": 645700 }, { "epoch": 3.63, "learning_rate": 1.3711529705391324e-05, "loss": 0.1819, "step": 645800 }, { "epoch": 3.63, "learning_rate": 1.3705908475128867e-05, "loss": 0.1793, "step": 645900 }, { "epoch": 3.63, "learning_rate": 1.3700287244866411e-05, "loss": 0.1792, "step": 646000 }, { "epoch": 3.63, "learning_rate": 1.3694666014603957e-05, "loss": 0.1888, "step": 646100 }, { "epoch": 3.63, "learning_rate": 1.36890447843415e-05, "loss": 0.1853, "step": 646200 }, { "epoch": 3.63, "learning_rate": 1.3683423554079047e-05, "loss": 0.1881, "step": 646300 }, { "epoch": 3.63, "learning_rate": 1.3677802323816591e-05, "loss": 0.1824, "step": 646400 }, { "epoch": 3.63, "learning_rate": 1.3672181093554137e-05, "loss": 0.1827, "step": 646500 }, { "epoch": 3.63, "learning_rate": 1.366655986329168e-05, "loss": 0.182, "step": 646600 }, { "epoch": 3.64, "learning_rate": 1.3660938633029225e-05, "loss": 0.1817, "step": 646700 }, { "epoch": 3.64, "learning_rate": 1.3655317402766771e-05, "loss": 0.1798, "step": 646800 }, { "epoch": 3.64, "learning_rate": 1.3649696172504314e-05, "loss": 0.178, "step": 646900 }, { "epoch": 3.64, "learning_rate": 1.364407494224186e-05, "loss": 0.1817, "step": 647000 }, { "epoch": 3.64, "learning_rate": 1.3638453711979405e-05, "loss": 0.1806, "step": 647100 }, { "epoch": 3.64, "learning_rate": 1.3632832481716951e-05, "loss": 0.1814, "step": 647200 }, { "epoch": 3.64, "learning_rate": 1.3627267463757117e-05, "loss": 0.185, "step": 647300 }, { "epoch": 3.64, "learning_rate": 1.3621646233494663e-05, "loss": 0.185, "step": 647400 }, { "epoch": 3.64, "learning_rate": 1.3616025003232208e-05, "loss": 0.1815, "step": 647500 }, { "epoch": 3.64, "learning_rate": 1.3610403772969751e-05, "loss": 0.1828, "step": 647600 }, { "epoch": 3.64, "learning_rate": 1.3604782542707297e-05, "loss": 0.1812, "step": 647700 }, { "epoch": 3.64, "learning_rate": 1.3599161312444842e-05, "loss": 0.1822, "step": 647800 }, { "epoch": 3.64, "learning_rate": 1.3593540082182388e-05, "loss": 0.1786, "step": 647900 }, { "epoch": 3.64, "learning_rate": 1.3587918851919931e-05, "loss": 0.1822, "step": 648000 }, { "epoch": 3.64, "learning_rate": 1.3582297621657477e-05, "loss": 0.1823, "step": 648100 }, { "epoch": 3.64, "learning_rate": 1.3576676391395022e-05, "loss": 0.1769, "step": 648200 }, { "epoch": 3.64, "learning_rate": 1.3571055161132565e-05, "loss": 0.1783, "step": 648300 }, { "epoch": 3.64, "learning_rate": 1.3565433930870111e-05, "loss": 0.1875, "step": 648400 }, { "epoch": 3.65, "learning_rate": 1.3559812700607654e-05, "loss": 0.1806, "step": 648500 }, { "epoch": 3.65, "learning_rate": 1.3554191470345202e-05, "loss": 0.1815, "step": 648600 }, { "epoch": 3.65, "learning_rate": 1.3548570240082745e-05, "loss": 0.1836, "step": 648700 }, { "epoch": 3.65, "learning_rate": 1.3542949009820291e-05, "loss": 0.1765, "step": 648800 }, { "epoch": 3.65, "learning_rate": 1.3537327779557834e-05, "loss": 0.1966, "step": 648900 }, { "epoch": 3.65, "learning_rate": 1.3531706549295379e-05, "loss": 0.1764, "step": 649000 }, { "epoch": 3.65, "learning_rate": 1.3526085319032925e-05, "loss": 0.183, "step": 649100 }, { "epoch": 3.65, "learning_rate": 1.3520464088770468e-05, "loss": 0.1787, "step": 649200 }, { "epoch": 3.65, "learning_rate": 1.3514842858508014e-05, "loss": 0.1818, "step": 649300 }, { "epoch": 3.65, "learning_rate": 1.3509221628245559e-05, "loss": 0.1825, "step": 649400 }, { "epoch": 3.65, "learning_rate": 1.3503600397983105e-05, "loss": 0.1792, "step": 649500 }, { "epoch": 3.65, "learning_rate": 1.3497979167720648e-05, "loss": 0.1825, "step": 649600 }, { "epoch": 3.65, "learning_rate": 1.3492357937458192e-05, "loss": 0.1838, "step": 649700 }, { "epoch": 3.65, "learning_rate": 1.3486736707195739e-05, "loss": 0.1802, "step": 649800 }, { "epoch": 3.65, "learning_rate": 1.3481115476933281e-05, "loss": 0.1829, "step": 649900 }, { "epoch": 3.65, "learning_rate": 1.3475494246670828e-05, "loss": 0.1749, "step": 650000 }, { "epoch": 3.65, "learning_rate": 1.346987301640837e-05, "loss": 0.1827, "step": 650100 }, { "epoch": 3.65, "learning_rate": 1.3464251786145919e-05, "loss": 0.1838, "step": 650200 }, { "epoch": 3.66, "learning_rate": 1.3458630555883461e-05, "loss": 0.1783, "step": 650300 }, { "epoch": 3.66, "learning_rate": 1.3453009325621004e-05, "loss": 0.1718, "step": 650400 }, { "epoch": 3.66, "learning_rate": 1.344738809535855e-05, "loss": 0.1862, "step": 650500 }, { "epoch": 3.66, "learning_rate": 1.3441766865096095e-05, "loss": 0.1847, "step": 650600 }, { "epoch": 3.66, "learning_rate": 1.3436145634833641e-05, "loss": 0.1805, "step": 650700 }, { "epoch": 3.66, "learning_rate": 1.3430524404571184e-05, "loss": 0.185, "step": 650800 }, { "epoch": 3.66, "learning_rate": 1.342490317430873e-05, "loss": 0.1802, "step": 650900 }, { "epoch": 3.66, "learning_rate": 1.3419281944046275e-05, "loss": 0.1823, "step": 651000 }, { "epoch": 3.66, "learning_rate": 1.3413660713783818e-05, "loss": 0.179, "step": 651100 }, { "epoch": 3.66, "learning_rate": 1.3408039483521364e-05, "loss": 0.1828, "step": 651200 }, { "epoch": 3.66, "learning_rate": 1.3402418253258909e-05, "loss": 0.189, "step": 651300 }, { "epoch": 3.66, "learning_rate": 1.3396797022996455e-05, "loss": 0.1819, "step": 651400 }, { "epoch": 3.66, "learning_rate": 1.3391175792733998e-05, "loss": 0.1832, "step": 651500 }, { "epoch": 3.66, "learning_rate": 1.3385554562471544e-05, "loss": 0.1776, "step": 651600 }, { "epoch": 3.66, "learning_rate": 1.3379933332209087e-05, "loss": 0.1767, "step": 651700 }, { "epoch": 3.66, "learning_rate": 1.3374312101946632e-05, "loss": 0.1847, "step": 651800 }, { "epoch": 3.66, "learning_rate": 1.3368690871684178e-05, "loss": 0.1815, "step": 651900 }, { "epoch": 3.67, "learning_rate": 1.3363069641421721e-05, "loss": 0.1792, "step": 652000 }, { "epoch": 3.67, "learning_rate": 1.3357448411159267e-05, "loss": 0.1793, "step": 652100 }, { "epoch": 3.67, "learning_rate": 1.3351827180896812e-05, "loss": 0.1835, "step": 652200 }, { "epoch": 3.67, "learning_rate": 1.3346205950634358e-05, "loss": 0.1867, "step": 652300 }, { "epoch": 3.67, "learning_rate": 1.3340640932674526e-05, "loss": 0.1828, "step": 652400 }, { "epoch": 3.67, "learning_rate": 1.3335019702412072e-05, "loss": 0.1874, "step": 652500 }, { "epoch": 3.67, "learning_rate": 1.3329398472149615e-05, "loss": 0.1849, "step": 652600 }, { "epoch": 3.67, "learning_rate": 1.3323777241887158e-05, "loss": 0.1816, "step": 652700 }, { "epoch": 3.67, "learning_rate": 1.3318156011624706e-05, "loss": 0.1826, "step": 652800 }, { "epoch": 3.67, "learning_rate": 1.3312534781362249e-05, "loss": 0.1783, "step": 652900 }, { "epoch": 3.67, "learning_rate": 1.3306913551099795e-05, "loss": 0.1829, "step": 653000 }, { "epoch": 3.67, "learning_rate": 1.3301292320837338e-05, "loss": 0.1773, "step": 653100 }, { "epoch": 3.67, "learning_rate": 1.3295671090574884e-05, "loss": 0.1845, "step": 653200 }, { "epoch": 3.67, "learning_rate": 1.3290049860312429e-05, "loss": 0.1844, "step": 653300 }, { "epoch": 3.67, "learning_rate": 1.3284428630049972e-05, "loss": 0.1818, "step": 653400 }, { "epoch": 3.67, "learning_rate": 1.3278807399787518e-05, "loss": 0.1784, "step": 653500 }, { "epoch": 3.67, "learning_rate": 1.3273186169525063e-05, "loss": 0.1807, "step": 653600 }, { "epoch": 3.67, "learning_rate": 1.3267564939262609e-05, "loss": 0.1807, "step": 653700 }, { "epoch": 3.68, "learning_rate": 1.3261943709000152e-05, "loss": 0.1744, "step": 653800 }, { "epoch": 3.68, "learning_rate": 1.3256322478737698e-05, "loss": 0.1802, "step": 653900 }, { "epoch": 3.68, "learning_rate": 1.3250701248475243e-05, "loss": 0.1808, "step": 654000 }, { "epoch": 3.68, "learning_rate": 1.3245080018212785e-05, "loss": 0.1775, "step": 654100 }, { "epoch": 3.68, "learning_rate": 1.3239458787950332e-05, "loss": 0.1834, "step": 654200 }, { "epoch": 3.68, "learning_rate": 1.3233837557687875e-05, "loss": 0.1755, "step": 654300 }, { "epoch": 3.68, "learning_rate": 1.3228216327425421e-05, "loss": 0.1806, "step": 654400 }, { "epoch": 3.68, "learning_rate": 1.3222595097162965e-05, "loss": 0.1723, "step": 654500 }, { "epoch": 3.68, "learning_rate": 1.3216973866900512e-05, "loss": 0.179, "step": 654600 }, { "epoch": 3.68, "learning_rate": 1.3211352636638055e-05, "loss": 0.1738, "step": 654700 }, { "epoch": 3.68, "learning_rate": 1.32057314063756e-05, "loss": 0.1781, "step": 654800 }, { "epoch": 3.68, "learning_rate": 1.3200110176113146e-05, "loss": 0.1827, "step": 654900 }, { "epoch": 3.68, "learning_rate": 1.3194488945850688e-05, "loss": 0.1786, "step": 655000 }, { "epoch": 3.68, "learning_rate": 1.3188867715588235e-05, "loss": 0.182, "step": 655100 }, { "epoch": 3.68, "learning_rate": 1.318324648532578e-05, "loss": 0.1812, "step": 655200 }, { "epoch": 3.68, "learning_rate": 1.3177625255063326e-05, "loss": 0.1792, "step": 655300 }, { "epoch": 3.68, "learning_rate": 1.3172004024800868e-05, "loss": 0.1817, "step": 655400 }, { "epoch": 3.68, "learning_rate": 1.3166382794538413e-05, "loss": 0.1814, "step": 655500 }, { "epoch": 3.69, "learning_rate": 1.316076156427596e-05, "loss": 0.1789, "step": 655600 }, { "epoch": 3.69, "learning_rate": 1.3155140334013502e-05, "loss": 0.181, "step": 655700 }, { "epoch": 3.69, "learning_rate": 1.3149519103751048e-05, "loss": 0.1793, "step": 655800 }, { "epoch": 3.69, "learning_rate": 1.3143897873488591e-05, "loss": 0.1818, "step": 655900 }, { "epoch": 3.69, "learning_rate": 1.3138276643226138e-05, "loss": 0.1789, "step": 656000 }, { "epoch": 3.69, "learning_rate": 1.3132711625266305e-05, "loss": 0.186, "step": 656100 }, { "epoch": 3.69, "learning_rate": 1.3127090395003852e-05, "loss": 0.1833, "step": 656200 }, { "epoch": 3.69, "learning_rate": 1.3121469164741396e-05, "loss": 0.1791, "step": 656300 }, { "epoch": 3.69, "learning_rate": 1.3115847934478942e-05, "loss": 0.1849, "step": 656400 }, { "epoch": 3.69, "learning_rate": 1.3110226704216485e-05, "loss": 0.1815, "step": 656500 }, { "epoch": 3.69, "learning_rate": 1.310460547395403e-05, "loss": 0.1801, "step": 656600 }, { "epoch": 3.69, "learning_rate": 1.3098984243691576e-05, "loss": 0.1817, "step": 656700 }, { "epoch": 3.69, "learning_rate": 1.3093363013429119e-05, "loss": 0.1838, "step": 656800 }, { "epoch": 3.69, "learning_rate": 1.308779799546929e-05, "loss": 0.1772, "step": 656900 }, { "epoch": 3.69, "learning_rate": 1.3082176765206833e-05, "loss": 0.1744, "step": 657000 }, { "epoch": 3.69, "learning_rate": 1.307655553494438e-05, "loss": 0.1771, "step": 657100 }, { "epoch": 3.69, "learning_rate": 1.3070934304681922e-05, "loss": 0.1843, "step": 657200 }, { "epoch": 3.69, "learning_rate": 1.3065313074419468e-05, "loss": 0.183, "step": 657300 }, { "epoch": 3.7, "learning_rate": 1.3059691844157013e-05, "loss": 0.1894, "step": 657400 }, { "epoch": 3.7, "learning_rate": 1.3054070613894556e-05, "loss": 0.1774, "step": 657500 }, { "epoch": 3.7, "learning_rate": 1.3048449383632102e-05, "loss": 0.1835, "step": 657600 }, { "epoch": 3.7, "learning_rate": 1.3042828153369647e-05, "loss": 0.1789, "step": 657700 }, { "epoch": 3.7, "learning_rate": 1.3037206923107193e-05, "loss": 0.1796, "step": 657800 }, { "epoch": 3.7, "learning_rate": 1.3031585692844736e-05, "loss": 0.1784, "step": 657900 }, { "epoch": 3.7, "learning_rate": 1.3025964462582282e-05, "loss": 0.1818, "step": 658000 }, { "epoch": 3.7, "learning_rate": 1.3020343232319827e-05, "loss": 0.1785, "step": 658100 }, { "epoch": 3.7, "learning_rate": 1.301472200205737e-05, "loss": 0.1788, "step": 658200 }, { "epoch": 3.7, "learning_rate": 1.3009100771794916e-05, "loss": 0.1826, "step": 658300 }, { "epoch": 3.7, "learning_rate": 1.3003479541532459e-05, "loss": 0.1837, "step": 658400 }, { "epoch": 3.7, "learning_rate": 1.2997858311270005e-05, "loss": 0.1799, "step": 658500 }, { "epoch": 3.7, "learning_rate": 1.299223708100755e-05, "loss": 0.1859, "step": 658600 }, { "epoch": 3.7, "learning_rate": 1.2986615850745096e-05, "loss": 0.1763, "step": 658700 }, { "epoch": 3.7, "learning_rate": 1.2980994620482639e-05, "loss": 0.1781, "step": 658800 }, { "epoch": 3.7, "learning_rate": 1.2975373390220183e-05, "loss": 0.18, "step": 658900 }, { "epoch": 3.7, "learning_rate": 1.296975215995773e-05, "loss": 0.1836, "step": 659000 }, { "epoch": 3.7, "learning_rate": 1.2964130929695273e-05, "loss": 0.1853, "step": 659100 }, { "epoch": 3.71, "learning_rate": 1.2958509699432819e-05, "loss": 0.1809, "step": 659200 }, { "epoch": 3.71, "learning_rate": 1.2952888469170363e-05, "loss": 0.1798, "step": 659300 }, { "epoch": 3.71, "learning_rate": 1.294726723890791e-05, "loss": 0.1821, "step": 659400 }, { "epoch": 3.71, "learning_rate": 1.2941646008645453e-05, "loss": 0.1824, "step": 659500 }, { "epoch": 3.71, "learning_rate": 1.2936024778382995e-05, "loss": 0.1813, "step": 659600 }, { "epoch": 3.71, "learning_rate": 1.2930403548120543e-05, "loss": 0.1844, "step": 659700 }, { "epoch": 3.71, "learning_rate": 1.2924782317858086e-05, "loss": 0.1847, "step": 659800 }, { "epoch": 3.71, "learning_rate": 1.2919161087595633e-05, "loss": 0.176, "step": 659900 }, { "epoch": 3.71, "learning_rate": 1.2913539857333175e-05, "loss": 0.1914, "step": 660000 }, { "epoch": 3.71, "learning_rate": 1.2907918627070722e-05, "loss": 0.1845, "step": 660100 }, { "epoch": 3.71, "learning_rate": 1.2902297396808266e-05, "loss": 0.181, "step": 660200 }, { "epoch": 3.71, "learning_rate": 1.289667616654581e-05, "loss": 0.1708, "step": 660300 }, { "epoch": 3.71, "learning_rate": 1.2891054936283356e-05, "loss": 0.1815, "step": 660400 }, { "epoch": 3.71, "learning_rate": 1.28854337060209e-05, "loss": 0.1831, "step": 660500 }, { "epoch": 3.71, "learning_rate": 1.287986868806107e-05, "loss": 0.1838, "step": 660600 }, { "epoch": 3.71, "learning_rate": 1.2874247457798614e-05, "loss": 0.1806, "step": 660700 }, { "epoch": 3.71, "learning_rate": 1.286862622753616e-05, "loss": 0.1758, "step": 660800 }, { "epoch": 3.72, "learning_rate": 1.2863004997273703e-05, "loss": 0.1816, "step": 660900 }, { "epoch": 3.72, "learning_rate": 1.285738376701125e-05, "loss": 0.1893, "step": 661000 }, { "epoch": 3.72, "learning_rate": 1.2851762536748792e-05, "loss": 0.1777, "step": 661100 }, { "epoch": 3.72, "learning_rate": 1.284614130648634e-05, "loss": 0.1832, "step": 661200 }, { "epoch": 3.72, "learning_rate": 1.2840520076223883e-05, "loss": 0.1813, "step": 661300 }, { "epoch": 3.72, "learning_rate": 1.2834898845961426e-05, "loss": 0.1821, "step": 661400 }, { "epoch": 3.72, "learning_rate": 1.2829277615698972e-05, "loss": 0.1714, "step": 661500 }, { "epoch": 3.72, "learning_rate": 1.2823656385436517e-05, "loss": 0.1811, "step": 661600 }, { "epoch": 3.72, "learning_rate": 1.2818035155174063e-05, "loss": 0.1826, "step": 661700 }, { "epoch": 3.72, "learning_rate": 1.2812413924911606e-05, "loss": 0.1797, "step": 661800 }, { "epoch": 3.72, "learning_rate": 1.2806792694649152e-05, "loss": 0.1832, "step": 661900 }, { "epoch": 3.72, "learning_rate": 1.2801171464386697e-05, "loss": 0.1792, "step": 662000 }, { "epoch": 3.72, "learning_rate": 1.279555023412424e-05, "loss": 0.1794, "step": 662100 }, { "epoch": 3.72, "learning_rate": 1.2789929003861786e-05, "loss": 0.1778, "step": 662200 }, { "epoch": 3.72, "learning_rate": 1.278430777359933e-05, "loss": 0.18, "step": 662300 }, { "epoch": 3.72, "learning_rate": 1.2778686543336877e-05, "loss": 0.18, "step": 662400 }, { "epoch": 3.72, "learning_rate": 1.277306531307442e-05, "loss": 0.1842, "step": 662500 }, { "epoch": 3.72, "learning_rate": 1.2767444082811963e-05, "loss": 0.1818, "step": 662600 }, { "epoch": 3.73, "learning_rate": 1.2761822852549509e-05, "loss": 0.1801, "step": 662700 }, { "epoch": 3.73, "learning_rate": 1.2756201622287054e-05, "loss": 0.188, "step": 662800 }, { "epoch": 3.73, "learning_rate": 1.27505803920246e-05, "loss": 0.1815, "step": 662900 }, { "epoch": 3.73, "learning_rate": 1.2744959161762143e-05, "loss": 0.1838, "step": 663000 }, { "epoch": 3.73, "learning_rate": 1.2739337931499689e-05, "loss": 0.1837, "step": 663100 }, { "epoch": 3.73, "learning_rate": 1.2733716701237234e-05, "loss": 0.1743, "step": 663200 }, { "epoch": 3.73, "learning_rate": 1.2728095470974777e-05, "loss": 0.1785, "step": 663300 }, { "epoch": 3.73, "learning_rate": 1.2722474240712323e-05, "loss": 0.1806, "step": 663400 }, { "epoch": 3.73, "learning_rate": 1.2716909222752494e-05, "loss": 0.1773, "step": 663500 }, { "epoch": 3.73, "learning_rate": 1.2711287992490037e-05, "loss": 0.1846, "step": 663600 }, { "epoch": 3.73, "learning_rate": 1.270566676222758e-05, "loss": 0.1826, "step": 663700 }, { "epoch": 3.73, "learning_rate": 1.2700045531965128e-05, "loss": 0.1921, "step": 663800 }, { "epoch": 3.73, "learning_rate": 1.269442430170267e-05, "loss": 0.1849, "step": 663900 }, { "epoch": 3.73, "learning_rate": 1.2688803071440217e-05, "loss": 0.1826, "step": 664000 }, { "epoch": 3.73, "learning_rate": 1.268318184117776e-05, "loss": 0.1797, "step": 664100 }, { "epoch": 3.73, "learning_rate": 1.2677560610915306e-05, "loss": 0.1811, "step": 664200 }, { "epoch": 3.73, "learning_rate": 1.267193938065285e-05, "loss": 0.1759, "step": 664300 }, { "epoch": 3.73, "learning_rate": 1.2666318150390393e-05, "loss": 0.1867, "step": 664400 }, { "epoch": 3.74, "learning_rate": 1.266069692012794e-05, "loss": 0.1803, "step": 664500 }, { "epoch": 3.74, "learning_rate": 1.2655075689865484e-05, "loss": 0.1799, "step": 664600 }, { "epoch": 3.74, "learning_rate": 1.264945445960303e-05, "loss": 0.174, "step": 664700 }, { "epoch": 3.74, "learning_rate": 1.2643833229340573e-05, "loss": 0.1809, "step": 664800 }, { "epoch": 3.74, "learning_rate": 1.263821199907812e-05, "loss": 0.1752, "step": 664900 }, { "epoch": 3.74, "learning_rate": 1.2632590768815664e-05, "loss": 0.1841, "step": 665000 }, { "epoch": 3.74, "learning_rate": 1.2626969538553207e-05, "loss": 0.1766, "step": 665100 }, { "epoch": 3.74, "learning_rate": 1.2621348308290753e-05, "loss": 0.1844, "step": 665200 }, { "epoch": 3.74, "learning_rate": 1.2615727078028296e-05, "loss": 0.1847, "step": 665300 }, { "epoch": 3.74, "learning_rate": 1.2610105847765843e-05, "loss": 0.1816, "step": 665400 }, { "epoch": 3.74, "learning_rate": 1.2604484617503387e-05, "loss": 0.1794, "step": 665500 }, { "epoch": 3.74, "learning_rate": 1.2598863387240933e-05, "loss": 0.18, "step": 665600 }, { "epoch": 3.74, "learning_rate": 1.2593242156978476e-05, "loss": 0.1828, "step": 665700 }, { "epoch": 3.74, "learning_rate": 1.2587620926716021e-05, "loss": 0.1786, "step": 665800 }, { "epoch": 3.74, "learning_rate": 1.2581999696453567e-05, "loss": 0.1775, "step": 665900 }, { "epoch": 3.74, "learning_rate": 1.257637846619111e-05, "loss": 0.1829, "step": 666000 }, { "epoch": 3.74, "learning_rate": 1.2570813448231281e-05, "loss": 0.1769, "step": 666100 }, { "epoch": 3.74, "learning_rate": 1.2565192217968824e-05, "loss": 0.1783, "step": 666200 }, { "epoch": 3.75, "learning_rate": 1.255957098770637e-05, "loss": 0.1839, "step": 666300 }, { "epoch": 3.75, "learning_rate": 1.2553949757443915e-05, "loss": 0.1805, "step": 666400 }, { "epoch": 3.75, "learning_rate": 1.2548328527181461e-05, "loss": 0.1805, "step": 666500 }, { "epoch": 3.75, "learning_rate": 1.2542707296919004e-05, "loss": 0.1796, "step": 666600 }, { "epoch": 3.75, "learning_rate": 1.253708606665655e-05, "loss": 0.1813, "step": 666700 }, { "epoch": 3.75, "learning_rate": 1.2531464836394093e-05, "loss": 0.1818, "step": 666800 }, { "epoch": 3.75, "learning_rate": 1.2525843606131638e-05, "loss": 0.178, "step": 666900 }, { "epoch": 3.75, "learning_rate": 1.2520222375869184e-05, "loss": 0.1862, "step": 667000 }, { "epoch": 3.75, "learning_rate": 1.2514601145606727e-05, "loss": 0.1829, "step": 667100 }, { "epoch": 3.75, "learning_rate": 1.2508979915344273e-05, "loss": 0.1813, "step": 667200 }, { "epoch": 3.75, "learning_rate": 1.2503358685081818e-05, "loss": 0.1842, "step": 667300 }, { "epoch": 3.75, "learning_rate": 1.2497737454819362e-05, "loss": 0.1852, "step": 667400 }, { "epoch": 3.75, "learning_rate": 1.2492116224556907e-05, "loss": 0.1835, "step": 667500 }, { "epoch": 3.75, "learning_rate": 1.2486494994294452e-05, "loss": 0.1859, "step": 667600 }, { "epoch": 3.75, "learning_rate": 1.2480873764031998e-05, "loss": 0.1781, "step": 667700 }, { "epoch": 3.75, "learning_rate": 1.247525253376954e-05, "loss": 0.1814, "step": 667800 }, { "epoch": 3.75, "learning_rate": 1.2469631303507085e-05, "loss": 0.1854, "step": 667900 }, { "epoch": 3.75, "learning_rate": 1.2464010073244632e-05, "loss": 0.1767, "step": 668000 }, { "epoch": 3.76, "learning_rate": 1.2458388842982176e-05, "loss": 0.1767, "step": 668100 }, { "epoch": 3.76, "learning_rate": 1.245276761271972e-05, "loss": 0.1759, "step": 668200 }, { "epoch": 3.76, "learning_rate": 1.2447146382457265e-05, "loss": 0.1723, "step": 668300 }, { "epoch": 3.76, "learning_rate": 1.244152515219481e-05, "loss": 0.1847, "step": 668400 }, { "epoch": 3.76, "learning_rate": 1.2435903921932355e-05, "loss": 0.1832, "step": 668500 }, { "epoch": 3.76, "learning_rate": 1.2430282691669899e-05, "loss": 0.1781, "step": 668600 }, { "epoch": 3.76, "learning_rate": 1.2424661461407444e-05, "loss": 0.1791, "step": 668700 }, { "epoch": 3.76, "learning_rate": 1.2419040231144988e-05, "loss": 0.1838, "step": 668800 }, { "epoch": 3.76, "learning_rate": 1.2413419000882535e-05, "loss": 0.1792, "step": 668900 }, { "epoch": 3.76, "learning_rate": 1.2407797770620079e-05, "loss": 0.1797, "step": 669000 }, { "epoch": 3.76, "learning_rate": 1.2402176540357622e-05, "loss": 0.185, "step": 669100 }, { "epoch": 3.76, "learning_rate": 1.2396555310095168e-05, "loss": 0.186, "step": 669200 }, { "epoch": 3.76, "learning_rate": 1.2390934079832713e-05, "loss": 0.1848, "step": 669300 }, { "epoch": 3.76, "learning_rate": 1.2385312849570257e-05, "loss": 0.1759, "step": 669400 }, { "epoch": 3.76, "learning_rate": 1.2379691619307802e-05, "loss": 0.1785, "step": 669500 }, { "epoch": 3.76, "learning_rate": 1.2374070389045347e-05, "loss": 0.1811, "step": 669600 }, { "epoch": 3.76, "learning_rate": 1.2368449158782893e-05, "loss": 0.1765, "step": 669700 }, { "epoch": 3.77, "learning_rate": 1.2362827928520436e-05, "loss": 0.181, "step": 669800 }, { "epoch": 3.77, "learning_rate": 1.235720669825798e-05, "loss": 0.1808, "step": 669900 }, { "epoch": 3.77, "learning_rate": 1.2351585467995527e-05, "loss": 0.1762, "step": 670000 }, { "epoch": 3.77, "learning_rate": 1.2345964237733071e-05, "loss": 0.1828, "step": 670100 }, { "epoch": 3.77, "learning_rate": 1.2340343007470616e-05, "loss": 0.1805, "step": 670200 }, { "epoch": 3.77, "learning_rate": 1.233472177720816e-05, "loss": 0.1771, "step": 670300 }, { "epoch": 3.77, "learning_rate": 1.2329100546945705e-05, "loss": 0.1836, "step": 670400 }, { "epoch": 3.77, "learning_rate": 1.232347931668325e-05, "loss": 0.1794, "step": 670500 }, { "epoch": 3.77, "learning_rate": 1.2317858086420794e-05, "loss": 0.1851, "step": 670600 }, { "epoch": 3.77, "learning_rate": 1.2312236856158339e-05, "loss": 0.1836, "step": 670700 }, { "epoch": 3.77, "learning_rate": 1.2306615625895885e-05, "loss": 0.1841, "step": 670800 }, { "epoch": 3.77, "learning_rate": 1.230099439563343e-05, "loss": 0.1821, "step": 670900 }, { "epoch": 3.77, "learning_rate": 1.2295373165370974e-05, "loss": 0.1812, "step": 671000 }, { "epoch": 3.77, "learning_rate": 1.2289751935108519e-05, "loss": 0.1798, "step": 671100 }, { "epoch": 3.77, "learning_rate": 1.2284130704846063e-05, "loss": 0.1766, "step": 671200 }, { "epoch": 3.77, "learning_rate": 1.2278509474583608e-05, "loss": 0.1819, "step": 671300 }, { "epoch": 3.77, "learning_rate": 1.2272888244321153e-05, "loss": 0.1791, "step": 671400 }, { "epoch": 3.77, "learning_rate": 1.2267267014058697e-05, "loss": 0.1842, "step": 671500 }, { "epoch": 3.78, "learning_rate": 1.2261645783796243e-05, "loss": 0.1825, "step": 671600 }, { "epoch": 3.78, "learning_rate": 1.2256024553533788e-05, "loss": 0.1804, "step": 671700 }, { "epoch": 3.78, "learning_rate": 1.2250403323271331e-05, "loss": 0.1809, "step": 671800 }, { "epoch": 3.78, "learning_rate": 1.2244782093008875e-05, "loss": 0.1771, "step": 671900 }, { "epoch": 3.78, "learning_rate": 1.2239160862746422e-05, "loss": 0.1816, "step": 672000 }, { "epoch": 3.78, "learning_rate": 1.2233539632483966e-05, "loss": 0.1752, "step": 672100 }, { "epoch": 3.78, "learning_rate": 1.2227918402221511e-05, "loss": 0.187, "step": 672200 }, { "epoch": 3.78, "learning_rate": 1.2222297171959055e-05, "loss": 0.1813, "step": 672300 }, { "epoch": 3.78, "learning_rate": 1.2216675941696602e-05, "loss": 0.1802, "step": 672400 }, { "epoch": 3.78, "learning_rate": 1.2211054711434145e-05, "loss": 0.1756, "step": 672500 }, { "epoch": 3.78, "learning_rate": 1.220543348117169e-05, "loss": 0.1743, "step": 672600 }, { "epoch": 3.78, "learning_rate": 1.2199812250909234e-05, "loss": 0.1813, "step": 672700 }, { "epoch": 3.78, "learning_rate": 1.219419102064678e-05, "loss": 0.1763, "step": 672800 }, { "epoch": 3.78, "learning_rate": 1.2188569790384325e-05, "loss": 0.1825, "step": 672900 }, { "epoch": 3.78, "learning_rate": 1.218294856012187e-05, "loss": 0.1732, "step": 673000 }, { "epoch": 3.78, "learning_rate": 1.2177327329859414e-05, "loss": 0.1804, "step": 673100 }, { "epoch": 3.78, "learning_rate": 1.2171706099596958e-05, "loss": 0.1859, "step": 673200 }, { "epoch": 3.78, "learning_rate": 1.2166084869334503e-05, "loss": 0.1742, "step": 673300 }, { "epoch": 3.79, "learning_rate": 1.2160463639072048e-05, "loss": 0.1786, "step": 673400 }, { "epoch": 3.79, "learning_rate": 1.2154898621112219e-05, "loss": 0.1852, "step": 673500 }, { "epoch": 3.79, "learning_rate": 1.2149277390849761e-05, "loss": 0.1816, "step": 673600 }, { "epoch": 3.79, "learning_rate": 1.2143656160587306e-05, "loss": 0.1838, "step": 673700 }, { "epoch": 3.79, "learning_rate": 1.213803493032485e-05, "loss": 0.1808, "step": 673800 }, { "epoch": 3.79, "learning_rate": 1.2132413700062397e-05, "loss": 0.1811, "step": 673900 }, { "epoch": 3.79, "learning_rate": 1.2126792469799941e-05, "loss": 0.1763, "step": 674000 }, { "epoch": 3.79, "learning_rate": 1.2121171239537486e-05, "loss": 0.1798, "step": 674100 }, { "epoch": 3.79, "learning_rate": 1.211555000927503e-05, "loss": 0.1753, "step": 674200 }, { "epoch": 3.79, "learning_rate": 1.2109928779012575e-05, "loss": 0.1818, "step": 674300 }, { "epoch": 3.79, "learning_rate": 1.210430754875012e-05, "loss": 0.176, "step": 674400 }, { "epoch": 3.79, "learning_rate": 1.2098686318487664e-05, "loss": 0.1789, "step": 674500 }, { "epoch": 3.79, "learning_rate": 1.2093065088225209e-05, "loss": 0.177, "step": 674600 }, { "epoch": 3.79, "learning_rate": 1.2087443857962755e-05, "loss": 0.1781, "step": 674700 }, { "epoch": 3.79, "learning_rate": 1.20818226277003e-05, "loss": 0.1769, "step": 674800 }, { "epoch": 3.79, "learning_rate": 1.2076201397437843e-05, "loss": 0.1757, "step": 674900 }, { "epoch": 3.79, "learning_rate": 1.2070580167175389e-05, "loss": 0.1786, "step": 675000 }, { "epoch": 3.79, "learning_rate": 1.2064958936912934e-05, "loss": 0.1823, "step": 675100 }, { "epoch": 3.8, "learning_rate": 1.2059337706650478e-05, "loss": 0.1852, "step": 675200 }, { "epoch": 3.8, "learning_rate": 1.2053716476388023e-05, "loss": 0.1768, "step": 675300 }, { "epoch": 3.8, "learning_rate": 1.2048095246125567e-05, "loss": 0.1802, "step": 675400 }, { "epoch": 3.8, "learning_rate": 1.2042474015863114e-05, "loss": 0.179, "step": 675500 }, { "epoch": 3.8, "learning_rate": 1.2036852785600657e-05, "loss": 0.1771, "step": 675600 }, { "epoch": 3.8, "learning_rate": 1.2031231555338201e-05, "loss": 0.1778, "step": 675700 }, { "epoch": 3.8, "learning_rate": 1.2025666537378372e-05, "loss": 0.1774, "step": 675800 }, { "epoch": 3.8, "learning_rate": 1.2020045307115917e-05, "loss": 0.1808, "step": 675900 }, { "epoch": 3.8, "learning_rate": 1.201442407685346e-05, "loss": 0.1832, "step": 676000 }, { "epoch": 3.8, "learning_rate": 1.2008802846591006e-05, "loss": 0.1843, "step": 676100 }, { "epoch": 3.8, "learning_rate": 1.200318161632855e-05, "loss": 0.185, "step": 676200 }, { "epoch": 3.8, "learning_rate": 1.1997560386066095e-05, "loss": 0.1803, "step": 676300 }, { "epoch": 3.8, "learning_rate": 1.1991995368106264e-05, "loss": 0.1821, "step": 676400 }, { "epoch": 3.8, "learning_rate": 1.1986374137843809e-05, "loss": 0.1825, "step": 676500 }, { "epoch": 3.8, "learning_rate": 1.1980752907581354e-05, "loss": 0.1769, "step": 676600 }, { "epoch": 3.8, "learning_rate": 1.1975131677318898e-05, "loss": 0.1832, "step": 676700 }, { "epoch": 3.8, "learning_rate": 1.1969510447056444e-05, "loss": 0.1786, "step": 676800 }, { "epoch": 3.81, "learning_rate": 1.1963889216793989e-05, "loss": 0.1828, "step": 676900 }, { "epoch": 3.81, "learning_rate": 1.1958267986531532e-05, "loss": 0.1871, "step": 677000 }, { "epoch": 3.81, "learning_rate": 1.1952646756269076e-05, "loss": 0.1818, "step": 677100 }, { "epoch": 3.81, "learning_rate": 1.1947025526006623e-05, "loss": 0.1827, "step": 677200 }, { "epoch": 3.81, "learning_rate": 1.1941404295744167e-05, "loss": 0.1831, "step": 677300 }, { "epoch": 3.81, "learning_rate": 1.1935783065481712e-05, "loss": 0.1798, "step": 677400 }, { "epoch": 3.81, "learning_rate": 1.1930161835219256e-05, "loss": 0.1763, "step": 677500 }, { "epoch": 3.81, "learning_rate": 1.1924540604956803e-05, "loss": 0.1863, "step": 677600 }, { "epoch": 3.81, "learning_rate": 1.1918919374694346e-05, "loss": 0.1835, "step": 677700 }, { "epoch": 3.81, "learning_rate": 1.191329814443189e-05, "loss": 0.1767, "step": 677800 }, { "epoch": 3.81, "learning_rate": 1.1907676914169435e-05, "loss": 0.1781, "step": 677900 }, { "epoch": 3.81, "learning_rate": 1.1902055683906981e-05, "loss": 0.1819, "step": 678000 }, { "epoch": 3.81, "learning_rate": 1.1896434453644526e-05, "loss": 0.1842, "step": 678100 }, { "epoch": 3.81, "learning_rate": 1.189081322338207e-05, "loss": 0.1797, "step": 678200 }, { "epoch": 3.81, "learning_rate": 1.1885191993119615e-05, "loss": 0.1755, "step": 678300 }, { "epoch": 3.81, "learning_rate": 1.187957076285716e-05, "loss": 0.1831, "step": 678400 }, { "epoch": 3.81, "learning_rate": 1.1873949532594704e-05, "loss": 0.1816, "step": 678500 }, { "epoch": 3.81, "learning_rate": 1.1868328302332249e-05, "loss": 0.183, "step": 678600 }, { "epoch": 3.82, "learning_rate": 1.1862707072069793e-05, "loss": 0.1742, "step": 678700 }, { "epoch": 3.82, "learning_rate": 1.185708584180734e-05, "loss": 0.1758, "step": 678800 }, { "epoch": 3.82, "learning_rate": 1.1851464611544884e-05, "loss": 0.1795, "step": 678900 }, { "epoch": 3.82, "learning_rate": 1.1845843381282427e-05, "loss": 0.181, "step": 679000 }, { "epoch": 3.82, "learning_rate": 1.1840222151019973e-05, "loss": 0.1774, "step": 679100 }, { "epoch": 3.82, "learning_rate": 1.1834600920757518e-05, "loss": 0.1787, "step": 679200 }, { "epoch": 3.82, "learning_rate": 1.1828979690495062e-05, "loss": 0.1753, "step": 679300 }, { "epoch": 3.82, "learning_rate": 1.1823358460232607e-05, "loss": 0.1791, "step": 679400 }, { "epoch": 3.82, "learning_rate": 1.1817737229970152e-05, "loss": 0.1697, "step": 679500 }, { "epoch": 3.82, "learning_rate": 1.1812115999707698e-05, "loss": 0.1772, "step": 679600 }, { "epoch": 3.82, "learning_rate": 1.180649476944524e-05, "loss": 0.1855, "step": 679700 }, { "epoch": 3.82, "learning_rate": 1.1800873539182785e-05, "loss": 0.178, "step": 679800 }, { "epoch": 3.82, "learning_rate": 1.179525230892033e-05, "loss": 0.1822, "step": 679900 }, { "epoch": 3.82, "learning_rate": 1.1789631078657876e-05, "loss": 0.1809, "step": 680000 }, { "epoch": 3.82, "learning_rate": 1.178400984839542e-05, "loss": 0.1776, "step": 680100 }, { "epoch": 3.82, "learning_rate": 1.1778388618132965e-05, "loss": 0.1816, "step": 680200 }, { "epoch": 3.82, "learning_rate": 1.177276738787051e-05, "loss": 0.1791, "step": 680300 }, { "epoch": 3.82, "learning_rate": 1.176720236991068e-05, "loss": 0.1779, "step": 680400 }, { "epoch": 3.83, "learning_rate": 1.1761581139648224e-05, "loss": 0.1839, "step": 680500 }, { "epoch": 3.83, "learning_rate": 1.1755959909385768e-05, "loss": 0.1776, "step": 680600 }, { "epoch": 3.83, "learning_rate": 1.1750338679123315e-05, "loss": 0.1829, "step": 680700 }, { "epoch": 3.83, "learning_rate": 1.1744717448860858e-05, "loss": 0.1784, "step": 680800 }, { "epoch": 3.83, "learning_rate": 1.1739096218598402e-05, "loss": 0.1817, "step": 680900 }, { "epoch": 3.83, "learning_rate": 1.1733474988335948e-05, "loss": 0.1839, "step": 681000 }, { "epoch": 3.83, "learning_rate": 1.1727853758073493e-05, "loss": 0.1767, "step": 681100 }, { "epoch": 3.83, "learning_rate": 1.1722232527811038e-05, "loss": 0.1864, "step": 681200 }, { "epoch": 3.83, "learning_rate": 1.1716611297548582e-05, "loss": 0.1785, "step": 681300 }, { "epoch": 3.83, "learning_rate": 1.1710990067286127e-05, "loss": 0.1731, "step": 681400 }, { "epoch": 3.83, "learning_rate": 1.1705368837023671e-05, "loss": 0.1842, "step": 681500 }, { "epoch": 3.83, "learning_rate": 1.1699747606761216e-05, "loss": 0.183, "step": 681600 }, { "epoch": 3.83, "learning_rate": 1.169412637649876e-05, "loss": 0.1864, "step": 681700 }, { "epoch": 3.83, "learning_rate": 1.1688505146236307e-05, "loss": 0.1798, "step": 681800 }, { "epoch": 3.83, "learning_rate": 1.1682883915973851e-05, "loss": 0.1789, "step": 681900 }, { "epoch": 3.83, "learning_rate": 1.1677262685711396e-05, "loss": 0.1782, "step": 682000 }, { "epoch": 3.83, "learning_rate": 1.1671641455448939e-05, "loss": 0.1814, "step": 682100 }, { "epoch": 3.83, "learning_rate": 1.1666020225186485e-05, "loss": 0.1767, "step": 682200 }, { "epoch": 3.84, "learning_rate": 1.166039899492403e-05, "loss": 0.1758, "step": 682300 }, { "epoch": 3.84, "learning_rate": 1.1654777764661574e-05, "loss": 0.1774, "step": 682400 }, { "epoch": 3.84, "learning_rate": 1.1649212746701744e-05, "loss": 0.1774, "step": 682500 }, { "epoch": 3.84, "learning_rate": 1.1643591516439288e-05, "loss": 0.1823, "step": 682600 }, { "epoch": 3.84, "learning_rate": 1.1637970286176833e-05, "loss": 0.1871, "step": 682700 }, { "epoch": 3.84, "learning_rate": 1.1632349055914377e-05, "loss": 0.1778, "step": 682800 }, { "epoch": 3.84, "learning_rate": 1.1626727825651924e-05, "loss": 0.176, "step": 682900 }, { "epoch": 3.84, "learning_rate": 1.1621106595389468e-05, "loss": 0.1766, "step": 683000 }, { "epoch": 3.84, "learning_rate": 1.1615485365127013e-05, "loss": 0.1827, "step": 683100 }, { "epoch": 3.84, "learning_rate": 1.1609864134864556e-05, "loss": 0.1815, "step": 683200 }, { "epoch": 3.84, "learning_rate": 1.1604242904602102e-05, "loss": 0.1811, "step": 683300 }, { "epoch": 3.84, "learning_rate": 1.1598621674339647e-05, "loss": 0.182, "step": 683400 }, { "epoch": 3.84, "learning_rate": 1.1593000444077191e-05, "loss": 0.1813, "step": 683500 }, { "epoch": 3.84, "learning_rate": 1.1587379213814736e-05, "loss": 0.1825, "step": 683600 }, { "epoch": 3.84, "learning_rate": 1.1581757983552282e-05, "loss": 0.1809, "step": 683700 }, { "epoch": 3.84, "learning_rate": 1.1576136753289825e-05, "loss": 0.1836, "step": 683800 }, { "epoch": 3.84, "learning_rate": 1.157051552302737e-05, "loss": 0.1777, "step": 683900 }, { "epoch": 3.84, "learning_rate": 1.1564894292764914e-05, "loss": 0.1793, "step": 684000 }, { "epoch": 3.85, "learning_rate": 1.155927306250246e-05, "loss": 0.1776, "step": 684100 }, { "epoch": 3.85, "learning_rate": 1.1553651832240005e-05, "loss": 0.1812, "step": 684200 }, { "epoch": 3.85, "learning_rate": 1.154803060197755e-05, "loss": 0.1843, "step": 684300 }, { "epoch": 3.85, "learning_rate": 1.1542409371715094e-05, "loss": 0.1793, "step": 684400 }, { "epoch": 3.85, "learning_rate": 1.1536788141452639e-05, "loss": 0.1874, "step": 684500 }, { "epoch": 3.85, "learning_rate": 1.1531166911190183e-05, "loss": 0.1836, "step": 684600 }, { "epoch": 3.85, "learning_rate": 1.1525545680927728e-05, "loss": 0.1788, "step": 684700 }, { "epoch": 3.85, "learning_rate": 1.1519924450665272e-05, "loss": 0.1789, "step": 684800 }, { "epoch": 3.85, "learning_rate": 1.1514303220402819e-05, "loss": 0.1794, "step": 684900 }, { "epoch": 3.85, "learning_rate": 1.1508738202442986e-05, "loss": 0.1827, "step": 685000 }, { "epoch": 3.85, "learning_rate": 1.1503116972180533e-05, "loss": 0.1814, "step": 685100 }, { "epoch": 3.85, "learning_rate": 1.1497495741918077e-05, "loss": 0.1851, "step": 685200 }, { "epoch": 3.85, "learning_rate": 1.1491874511655622e-05, "loss": 0.1819, "step": 685300 }, { "epoch": 3.85, "learning_rate": 1.1486253281393166e-05, "loss": 0.1786, "step": 685400 }, { "epoch": 3.85, "learning_rate": 1.1480632051130711e-05, "loss": 0.1822, "step": 685500 }, { "epoch": 3.85, "learning_rate": 1.1475010820868255e-05, "loss": 0.1771, "step": 685600 }, { "epoch": 3.85, "learning_rate": 1.14693895906058e-05, "loss": 0.1802, "step": 685700 }, { "epoch": 3.86, "learning_rate": 1.1463768360343345e-05, "loss": 0.1854, "step": 685800 }, { "epoch": 3.86, "learning_rate": 1.1458147130080891e-05, "loss": 0.1799, "step": 685900 }, { "epoch": 3.86, "learning_rate": 1.1452525899818435e-05, "loss": 0.1839, "step": 686000 }, { "epoch": 3.86, "learning_rate": 1.144690466955598e-05, "loss": 0.1815, "step": 686100 }, { "epoch": 3.86, "learning_rate": 1.1441283439293523e-05, "loss": 0.1765, "step": 686200 }, { "epoch": 3.86, "learning_rate": 1.143566220903107e-05, "loss": 0.1819, "step": 686300 }, { "epoch": 3.86, "learning_rate": 1.1430040978768614e-05, "loss": 0.1784, "step": 686400 }, { "epoch": 3.86, "learning_rate": 1.1424419748506158e-05, "loss": 0.1866, "step": 686500 }, { "epoch": 3.86, "learning_rate": 1.1418798518243703e-05, "loss": 0.1762, "step": 686600 }, { "epoch": 3.86, "learning_rate": 1.141317728798125e-05, "loss": 0.1787, "step": 686700 }, { "epoch": 3.86, "learning_rate": 1.1407556057718794e-05, "loss": 0.1807, "step": 686800 }, { "epoch": 3.86, "learning_rate": 1.1401934827456337e-05, "loss": 0.1782, "step": 686900 }, { "epoch": 3.86, "learning_rate": 1.1396313597193881e-05, "loss": 0.1775, "step": 687000 }, { "epoch": 3.86, "learning_rate": 1.1390748579234052e-05, "loss": 0.1803, "step": 687100 }, { "epoch": 3.86, "learning_rate": 1.138518356127422e-05, "loss": 0.1787, "step": 687200 }, { "epoch": 3.86, "learning_rate": 1.1379562331011766e-05, "loss": 0.1838, "step": 687300 }, { "epoch": 3.86, "learning_rate": 1.1373997313051934e-05, "loss": 0.1788, "step": 687400 }, { "epoch": 3.86, "learning_rate": 1.136837608278948e-05, "loss": 0.183, "step": 687500 }, { "epoch": 3.87, "learning_rate": 1.1362754852527025e-05, "loss": 0.1859, "step": 687600 }, { "epoch": 3.87, "learning_rate": 1.135713362226457e-05, "loss": 0.1789, "step": 687700 }, { "epoch": 3.87, "learning_rate": 1.1351512392002114e-05, "loss": 0.1822, "step": 687800 }, { "epoch": 3.87, "learning_rate": 1.1345891161739658e-05, "loss": 0.177, "step": 687900 }, { "epoch": 3.87, "learning_rate": 1.1340269931477205e-05, "loss": 0.1801, "step": 688000 }, { "epoch": 3.87, "learning_rate": 1.1334648701214748e-05, "loss": 0.1811, "step": 688100 }, { "epoch": 3.87, "learning_rate": 1.1329027470952292e-05, "loss": 0.1744, "step": 688200 }, { "epoch": 3.87, "learning_rate": 1.1323406240689839e-05, "loss": 0.175, "step": 688300 }, { "epoch": 3.87, "learning_rate": 1.1317785010427383e-05, "loss": 0.1799, "step": 688400 }, { "epoch": 3.87, "learning_rate": 1.1312163780164928e-05, "loss": 0.1786, "step": 688500 }, { "epoch": 3.87, "learning_rate": 1.1306542549902472e-05, "loss": 0.1806, "step": 688600 }, { "epoch": 3.87, "learning_rate": 1.1300921319640017e-05, "loss": 0.1725, "step": 688700 }, { "epoch": 3.87, "learning_rate": 1.1295300089377561e-05, "loss": 0.1743, "step": 688800 }, { "epoch": 3.87, "learning_rate": 1.1289678859115106e-05, "loss": 0.1767, "step": 688900 }, { "epoch": 3.87, "learning_rate": 1.128405762885265e-05, "loss": 0.1768, "step": 689000 }, { "epoch": 3.87, "learning_rate": 1.1278436398590197e-05, "loss": 0.18, "step": 689100 }, { "epoch": 3.87, "learning_rate": 1.1272815168327741e-05, "loss": 0.1816, "step": 689200 }, { "epoch": 3.87, "learning_rate": 1.1267193938065286e-05, "loss": 0.1797, "step": 689300 }, { "epoch": 3.88, "learning_rate": 1.1261572707802829e-05, "loss": 0.1765, "step": 689400 }, { "epoch": 3.88, "learning_rate": 1.1255951477540375e-05, "loss": 0.1834, "step": 689500 }, { "epoch": 3.88, "learning_rate": 1.125033024727792e-05, "loss": 0.1758, "step": 689600 }, { "epoch": 3.88, "learning_rate": 1.1244709017015464e-05, "loss": 0.1827, "step": 689700 }, { "epoch": 3.88, "learning_rate": 1.1239087786753009e-05, "loss": 0.1795, "step": 689800 }, { "epoch": 3.88, "learning_rate": 1.1233466556490555e-05, "loss": 0.1806, "step": 689900 }, { "epoch": 3.88, "learning_rate": 1.12278453262281e-05, "loss": 0.1778, "step": 690000 }, { "epoch": 3.88, "learning_rate": 1.1222224095965643e-05, "loss": 0.1812, "step": 690100 }, { "epoch": 3.88, "learning_rate": 1.1216602865703187e-05, "loss": 0.1839, "step": 690200 }, { "epoch": 3.88, "learning_rate": 1.1210981635440734e-05, "loss": 0.1776, "step": 690300 }, { "epoch": 3.88, "learning_rate": 1.1205360405178278e-05, "loss": 0.1749, "step": 690400 }, { "epoch": 3.88, "learning_rate": 1.1199739174915823e-05, "loss": 0.176, "step": 690500 }, { "epoch": 3.88, "learning_rate": 1.1194117944653367e-05, "loss": 0.1751, "step": 690600 }, { "epoch": 3.88, "learning_rate": 1.1188496714390914e-05, "loss": 0.1763, "step": 690700 }, { "epoch": 3.88, "learning_rate": 1.1182875484128456e-05, "loss": 0.1814, "step": 690800 }, { "epoch": 3.88, "learning_rate": 1.1177254253866001e-05, "loss": 0.1818, "step": 690900 }, { "epoch": 3.88, "learning_rate": 1.1171633023603546e-05, "loss": 0.1785, "step": 691000 }, { "epoch": 3.88, "learning_rate": 1.1166011793341092e-05, "loss": 0.1772, "step": 691100 }, { "epoch": 3.89, "learning_rate": 1.1160390563078637e-05, "loss": 0.1834, "step": 691200 }, { "epoch": 3.89, "learning_rate": 1.1154769332816181e-05, "loss": 0.1761, "step": 691300 }, { "epoch": 3.89, "learning_rate": 1.1149148102553724e-05, "loss": 0.1771, "step": 691400 }, { "epoch": 3.89, "learning_rate": 1.114352687229127e-05, "loss": 0.1775, "step": 691500 }, { "epoch": 3.89, "learning_rate": 1.1137905642028815e-05, "loss": 0.1735, "step": 691600 }, { "epoch": 3.89, "learning_rate": 1.113228441176636e-05, "loss": 0.1778, "step": 691700 }, { "epoch": 3.89, "learning_rate": 1.1126663181503904e-05, "loss": 0.1782, "step": 691800 }, { "epoch": 3.89, "learning_rate": 1.112104195124145e-05, "loss": 0.1799, "step": 691900 }, { "epoch": 3.89, "learning_rate": 1.1115420720978995e-05, "loss": 0.1817, "step": 692000 }, { "epoch": 3.89, "learning_rate": 1.1109799490716538e-05, "loss": 0.182, "step": 692100 }, { "epoch": 3.89, "learning_rate": 1.1104178260454082e-05, "loss": 0.1784, "step": 692200 }, { "epoch": 3.89, "learning_rate": 1.1098557030191629e-05, "loss": 0.1728, "step": 692300 }, { "epoch": 3.89, "learning_rate": 1.1092935799929173e-05, "loss": 0.1758, "step": 692400 }, { "epoch": 3.89, "learning_rate": 1.1087314569666718e-05, "loss": 0.1791, "step": 692500 }, { "epoch": 3.89, "learning_rate": 1.1081693339404262e-05, "loss": 0.1839, "step": 692600 }, { "epoch": 3.89, "learning_rate": 1.1076072109141809e-05, "loss": 0.1759, "step": 692700 }, { "epoch": 3.89, "learning_rate": 1.1070450878879352e-05, "loss": 0.1779, "step": 692800 }, { "epoch": 3.89, "learning_rate": 1.1064829648616896e-05, "loss": 0.1824, "step": 692900 }, { "epoch": 3.9, "learning_rate": 1.105920841835444e-05, "loss": 0.1805, "step": 693000 }, { "epoch": 3.9, "learning_rate": 1.1053587188091987e-05, "loss": 0.1742, "step": 693100 }, { "epoch": 3.9, "learning_rate": 1.1047965957829532e-05, "loss": 0.171, "step": 693200 }, { "epoch": 3.9, "learning_rate": 1.1042344727567076e-05, "loss": 0.1777, "step": 693300 }, { "epoch": 3.9, "learning_rate": 1.103672349730462e-05, "loss": 0.1762, "step": 693400 }, { "epoch": 3.9, "learning_rate": 1.1031102267042165e-05, "loss": 0.1792, "step": 693500 }, { "epoch": 3.9, "learning_rate": 1.102548103677971e-05, "loss": 0.177, "step": 693600 }, { "epoch": 3.9, "learning_rate": 1.1019859806517254e-05, "loss": 0.1802, "step": 693700 }, { "epoch": 3.9, "learning_rate": 1.1014238576254799e-05, "loss": 0.1756, "step": 693800 }, { "epoch": 3.9, "learning_rate": 1.1008617345992345e-05, "loss": 0.1814, "step": 693900 }, { "epoch": 3.9, "learning_rate": 1.100299611572989e-05, "loss": 0.1837, "step": 694000 }, { "epoch": 3.9, "learning_rate": 1.099743109777006e-05, "loss": 0.18, "step": 694100 }, { "epoch": 3.9, "learning_rate": 1.0991809867507604e-05, "loss": 0.1791, "step": 694200 }, { "epoch": 3.9, "learning_rate": 1.0986188637245148e-05, "loss": 0.1867, "step": 694300 }, { "epoch": 3.9, "learning_rate": 1.0980567406982693e-05, "loss": 0.1866, "step": 694400 }, { "epoch": 3.9, "learning_rate": 1.0974946176720238e-05, "loss": 0.1823, "step": 694500 }, { "epoch": 3.9, "learning_rate": 1.0969324946457782e-05, "loss": 0.1818, "step": 694600 }, { "epoch": 3.91, "learning_rate": 1.0963703716195327e-05, "loss": 0.178, "step": 694700 }, { "epoch": 3.91, "learning_rate": 1.0958082485932871e-05, "loss": 0.1782, "step": 694800 }, { "epoch": 3.91, "learning_rate": 1.0952461255670418e-05, "loss": 0.1829, "step": 694900 }, { "epoch": 3.91, "learning_rate": 1.0946840025407962e-05, "loss": 0.1818, "step": 695000 }, { "epoch": 3.91, "learning_rate": 1.0941218795145507e-05, "loss": 0.1755, "step": 695100 }, { "epoch": 3.91, "learning_rate": 1.093559756488305e-05, "loss": 0.1768, "step": 695200 }, { "epoch": 3.91, "learning_rate": 1.0929976334620596e-05, "loss": 0.179, "step": 695300 }, { "epoch": 3.91, "learning_rate": 1.092435510435814e-05, "loss": 0.1733, "step": 695400 }, { "epoch": 3.91, "learning_rate": 1.0918733874095685e-05, "loss": 0.1864, "step": 695500 }, { "epoch": 3.91, "learning_rate": 1.091311264383323e-05, "loss": 0.1772, "step": 695600 }, { "epoch": 3.91, "learning_rate": 1.0907491413570774e-05, "loss": 0.1733, "step": 695700 }, { "epoch": 3.91, "learning_rate": 1.090187018330832e-05, "loss": 0.1748, "step": 695800 }, { "epoch": 3.91, "learning_rate": 1.0896248953045863e-05, "loss": 0.1803, "step": 695900 }, { "epoch": 3.91, "learning_rate": 1.0890627722783408e-05, "loss": 0.1778, "step": 696000 }, { "epoch": 3.91, "learning_rate": 1.0885006492520954e-05, "loss": 0.1823, "step": 696100 }, { "epoch": 3.91, "learning_rate": 1.0879385262258499e-05, "loss": 0.1774, "step": 696200 }, { "epoch": 3.91, "learning_rate": 1.0873764031996043e-05, "loss": 0.1763, "step": 696300 }, { "epoch": 3.91, "learning_rate": 1.0868142801733588e-05, "loss": 0.1742, "step": 696400 }, { "epoch": 3.92, "learning_rate": 1.0862521571471133e-05, "loss": 0.1773, "step": 696500 }, { "epoch": 3.92, "learning_rate": 1.0856900341208677e-05, "loss": 0.1788, "step": 696600 }, { "epoch": 3.92, "learning_rate": 1.0851279110946222e-05, "loss": 0.18, "step": 696700 }, { "epoch": 3.92, "learning_rate": 1.0845657880683766e-05, "loss": 0.1772, "step": 696800 }, { "epoch": 3.92, "learning_rate": 1.0840092862723936e-05, "loss": 0.1823, "step": 696900 }, { "epoch": 3.92, "learning_rate": 1.083447163246148e-05, "loss": 0.1755, "step": 697000 }, { "epoch": 3.92, "learning_rate": 1.0828850402199025e-05, "loss": 0.1783, "step": 697100 }, { "epoch": 3.92, "learning_rate": 1.0823229171936571e-05, "loss": 0.1811, "step": 697200 }, { "epoch": 3.92, "learning_rate": 1.0817607941674116e-05, "loss": 0.172, "step": 697300 }, { "epoch": 3.92, "learning_rate": 1.081198671141166e-05, "loss": 0.1848, "step": 697400 }, { "epoch": 3.92, "learning_rate": 1.0806365481149205e-05, "loss": 0.1725, "step": 697500 }, { "epoch": 3.92, "learning_rate": 1.080074425088675e-05, "loss": 0.1773, "step": 697600 }, { "epoch": 3.92, "learning_rate": 1.0795123020624294e-05, "loss": 0.1772, "step": 697700 }, { "epoch": 3.92, "learning_rate": 1.0789501790361839e-05, "loss": 0.1785, "step": 697800 }, { "epoch": 3.92, "learning_rate": 1.0783880560099383e-05, "loss": 0.1795, "step": 697900 }, { "epoch": 3.92, "learning_rate": 1.077825932983693e-05, "loss": 0.1819, "step": 698000 }, { "epoch": 3.92, "learning_rate": 1.0772638099574474e-05, "loss": 0.1827, "step": 698100 }, { "epoch": 3.92, "learning_rate": 1.0767016869312019e-05, "loss": 0.1769, "step": 698200 }, { "epoch": 3.93, "learning_rate": 1.0761395639049562e-05, "loss": 0.1826, "step": 698300 }, { "epoch": 3.93, "learning_rate": 1.0755774408787108e-05, "loss": 0.1821, "step": 698400 }, { "epoch": 3.93, "learning_rate": 1.0750153178524652e-05, "loss": 0.1787, "step": 698500 }, { "epoch": 3.93, "learning_rate": 1.0744531948262197e-05, "loss": 0.1804, "step": 698600 }, { "epoch": 3.93, "learning_rate": 1.0738910717999742e-05, "loss": 0.1746, "step": 698700 }, { "epoch": 3.93, "learning_rate": 1.0733289487737288e-05, "loss": 0.1771, "step": 698800 }, { "epoch": 3.93, "learning_rate": 1.072766825747483e-05, "loss": 0.1784, "step": 698900 }, { "epoch": 3.93, "learning_rate": 1.0722047027212375e-05, "loss": 0.1783, "step": 699000 }, { "epoch": 3.93, "learning_rate": 1.071642579694992e-05, "loss": 0.1828, "step": 699100 }, { "epoch": 3.93, "learning_rate": 1.0710804566687466e-05, "loss": 0.1805, "step": 699200 }, { "epoch": 3.93, "learning_rate": 1.070518333642501e-05, "loss": 0.1802, "step": 699300 }, { "epoch": 3.93, "learning_rate": 1.0699562106162555e-05, "loss": 0.1755, "step": 699400 }, { "epoch": 3.93, "learning_rate": 1.06939408759001e-05, "loss": 0.1784, "step": 699500 }, { "epoch": 3.93, "learning_rate": 1.0688319645637645e-05, "loss": 0.1828, "step": 699600 }, { "epoch": 3.93, "learning_rate": 1.0682698415375189e-05, "loss": 0.1802, "step": 699700 }, { "epoch": 3.93, "learning_rate": 1.0677133397415358e-05, "loss": 0.1787, "step": 699800 }, { "epoch": 3.93, "learning_rate": 1.0671512167152905e-05, "loss": 0.174, "step": 699900 }, { "epoch": 3.93, "learning_rate": 1.0665890936890448e-05, "loss": 0.1778, "step": 700000 }, { "epoch": 3.94, "learning_rate": 1.0660269706627992e-05, "loss": 0.1806, "step": 700100 }, { "epoch": 3.94, "learning_rate": 1.0654648476365538e-05, "loss": 0.1794, "step": 700200 }, { "epoch": 3.94, "learning_rate": 1.0649027246103083e-05, "loss": 0.175, "step": 700300 }, { "epoch": 3.94, "learning_rate": 1.0643406015840628e-05, "loss": 0.1808, "step": 700400 }, { "epoch": 3.94, "learning_rate": 1.0637784785578172e-05, "loss": 0.1742, "step": 700500 }, { "epoch": 3.94, "learning_rate": 1.0632163555315717e-05, "loss": 0.1866, "step": 700600 }, { "epoch": 3.94, "learning_rate": 1.0626542325053261e-05, "loss": 0.1799, "step": 700700 }, { "epoch": 3.94, "learning_rate": 1.0620921094790806e-05, "loss": 0.1794, "step": 700800 }, { "epoch": 3.94, "learning_rate": 1.061529986452835e-05, "loss": 0.1798, "step": 700900 }, { "epoch": 3.94, "learning_rate": 1.0609678634265897e-05, "loss": 0.1798, "step": 701000 }, { "epoch": 3.94, "learning_rate": 1.0604057404003441e-05, "loss": 0.178, "step": 701100 }, { "epoch": 3.94, "learning_rate": 1.0598436173740986e-05, "loss": 0.181, "step": 701200 }, { "epoch": 3.94, "learning_rate": 1.0592814943478529e-05, "loss": 0.1803, "step": 701300 }, { "epoch": 3.94, "learning_rate": 1.0587193713216075e-05, "loss": 0.1745, "step": 701400 }, { "epoch": 3.94, "learning_rate": 1.058157248295362e-05, "loss": 0.1807, "step": 701500 }, { "epoch": 3.94, "learning_rate": 1.0575951252691164e-05, "loss": 0.1806, "step": 701600 }, { "epoch": 3.94, "learning_rate": 1.0570330022428709e-05, "loss": 0.1797, "step": 701700 }, { "epoch": 3.94, "learning_rate": 1.0564765004468878e-05, "loss": 0.1842, "step": 701800 }, { "epoch": 3.95, "learning_rate": 1.0559143774206423e-05, "loss": 0.1726, "step": 701900 }, { "epoch": 3.95, "learning_rate": 1.0553522543943967e-05, "loss": 0.1806, "step": 702000 }, { "epoch": 3.95, "learning_rate": 1.0547901313681514e-05, "loss": 0.1782, "step": 702100 }, { "epoch": 3.95, "learning_rate": 1.0542280083419058e-05, "loss": 0.1805, "step": 702200 }, { "epoch": 3.95, "learning_rate": 1.0536658853156603e-05, "loss": 0.1783, "step": 702300 }, { "epoch": 3.95, "learning_rate": 1.0531037622894146e-05, "loss": 0.1821, "step": 702400 }, { "epoch": 3.95, "learning_rate": 1.0525416392631692e-05, "loss": 0.172, "step": 702500 }, { "epoch": 3.95, "learning_rate": 1.0519795162369237e-05, "loss": 0.1769, "step": 702600 }, { "epoch": 3.95, "learning_rate": 1.0514173932106781e-05, "loss": 0.1739, "step": 702700 }, { "epoch": 3.95, "learning_rate": 1.0508552701844326e-05, "loss": 0.1787, "step": 702800 }, { "epoch": 3.95, "learning_rate": 1.0502931471581872e-05, "loss": 0.1738, "step": 702900 }, { "epoch": 3.95, "learning_rate": 1.0497310241319417e-05, "loss": 0.1793, "step": 703000 }, { "epoch": 3.95, "learning_rate": 1.049168901105696e-05, "loss": 0.1826, "step": 703100 }, { "epoch": 3.95, "learning_rate": 1.0486067780794504e-05, "loss": 0.1787, "step": 703200 }, { "epoch": 3.95, "learning_rate": 1.048044655053205e-05, "loss": 0.172, "step": 703300 }, { "epoch": 3.95, "learning_rate": 1.0474825320269595e-05, "loss": 0.1763, "step": 703400 }, { "epoch": 3.95, "learning_rate": 1.046920409000714e-05, "loss": 0.1804, "step": 703500 }, { "epoch": 3.96, "learning_rate": 1.0463582859744684e-05, "loss": 0.1749, "step": 703600 }, { "epoch": 3.96, "learning_rate": 1.0457961629482229e-05, "loss": 0.1816, "step": 703700 }, { "epoch": 3.96, "learning_rate": 1.0452340399219773e-05, "loss": 0.1683, "step": 703800 }, { "epoch": 3.96, "learning_rate": 1.0446719168957318e-05, "loss": 0.1819, "step": 703900 }, { "epoch": 3.96, "learning_rate": 1.0441097938694862e-05, "loss": 0.1731, "step": 704000 }, { "epoch": 3.96, "learning_rate": 1.0435476708432409e-05, "loss": 0.1762, "step": 704100 }, { "epoch": 3.96, "learning_rate": 1.0429855478169953e-05, "loss": 0.1764, "step": 704200 }, { "epoch": 3.96, "learning_rate": 1.0424234247907498e-05, "loss": 0.1777, "step": 704300 }, { "epoch": 3.96, "learning_rate": 1.0418613017645042e-05, "loss": 0.1758, "step": 704400 }, { "epoch": 3.96, "learning_rate": 1.0412991787382587e-05, "loss": 0.1811, "step": 704500 }, { "epoch": 3.96, "learning_rate": 1.0407370557120132e-05, "loss": 0.1757, "step": 704600 }, { "epoch": 3.96, "learning_rate": 1.0401749326857676e-05, "loss": 0.1722, "step": 704700 }, { "epoch": 3.96, "learning_rate": 1.0396184308897846e-05, "loss": 0.1725, "step": 704800 }, { "epoch": 3.96, "learning_rate": 1.039056307863539e-05, "loss": 0.1688, "step": 704900 }, { "epoch": 3.96, "learning_rate": 1.0384941848372935e-05, "loss": 0.1846, "step": 705000 }, { "epoch": 3.96, "learning_rate": 1.0379320618110481e-05, "loss": 0.1856, "step": 705100 }, { "epoch": 3.96, "learning_rate": 1.0373699387848026e-05, "loss": 0.1795, "step": 705200 }, { "epoch": 3.96, "learning_rate": 1.036807815758557e-05, "loss": 0.1799, "step": 705300 }, { "epoch": 3.97, "learning_rate": 1.0362456927323115e-05, "loss": 0.1803, "step": 705400 }, { "epoch": 3.97, "learning_rate": 1.035683569706066e-05, "loss": 0.1731, "step": 705500 }, { "epoch": 3.97, "learning_rate": 1.0351214466798204e-05, "loss": 0.1735, "step": 705600 }, { "epoch": 3.97, "learning_rate": 1.0345593236535748e-05, "loss": 0.1699, "step": 705700 }, { "epoch": 3.97, "learning_rate": 1.0339972006273293e-05, "loss": 0.1749, "step": 705800 }, { "epoch": 3.97, "learning_rate": 1.033435077601084e-05, "loss": 0.1791, "step": 705900 }, { "epoch": 3.97, "learning_rate": 1.0328729545748384e-05, "loss": 0.1835, "step": 706000 }, { "epoch": 3.97, "learning_rate": 1.0323108315485928e-05, "loss": 0.1793, "step": 706100 }, { "epoch": 3.97, "learning_rate": 1.0317487085223471e-05, "loss": 0.1828, "step": 706200 }, { "epoch": 3.97, "learning_rate": 1.0311865854961018e-05, "loss": 0.1738, "step": 706300 }, { "epoch": 3.97, "learning_rate": 1.0306244624698562e-05, "loss": 0.1733, "step": 706400 }, { "epoch": 3.97, "learning_rate": 1.0300623394436107e-05, "loss": 0.1815, "step": 706500 }, { "epoch": 3.97, "learning_rate": 1.0295002164173651e-05, "loss": 0.1797, "step": 706600 }, { "epoch": 3.97, "learning_rate": 1.0289380933911198e-05, "loss": 0.1778, "step": 706700 }, { "epoch": 3.97, "learning_rate": 1.0283815915951365e-05, "loss": 0.1801, "step": 706800 }, { "epoch": 3.97, "learning_rate": 1.027819468568891e-05, "loss": 0.1801, "step": 706900 }, { "epoch": 3.97, "learning_rate": 1.0272573455426456e-05, "loss": 0.1809, "step": 707000 }, { "epoch": 3.97, "learning_rate": 1.0266952225164e-05, "loss": 0.1712, "step": 707100 }, { "epoch": 3.98, "learning_rate": 1.0261330994901544e-05, "loss": 0.1767, "step": 707200 }, { "epoch": 3.98, "learning_rate": 1.0255709764639088e-05, "loss": 0.1822, "step": 707300 }, { "epoch": 3.98, "learning_rate": 1.0250088534376635e-05, "loss": 0.1804, "step": 707400 }, { "epoch": 3.98, "learning_rate": 1.0244467304114179e-05, "loss": 0.1755, "step": 707500 }, { "epoch": 3.98, "learning_rate": 1.0238846073851724e-05, "loss": 0.1846, "step": 707600 }, { "epoch": 3.98, "learning_rate": 1.0233224843589268e-05, "loss": 0.1714, "step": 707700 }, { "epoch": 3.98, "learning_rate": 1.0227603613326815e-05, "loss": 0.1839, "step": 707800 }, { "epoch": 3.98, "learning_rate": 1.0221982383064357e-05, "loss": 0.1742, "step": 707900 }, { "epoch": 3.98, "learning_rate": 1.0216361152801902e-05, "loss": 0.1775, "step": 708000 }, { "epoch": 3.98, "learning_rate": 1.0210739922539447e-05, "loss": 0.1795, "step": 708100 }, { "epoch": 3.98, "learning_rate": 1.0205118692276993e-05, "loss": 0.1772, "step": 708200 }, { "epoch": 3.98, "learning_rate": 1.0199497462014537e-05, "loss": 0.1768, "step": 708300 }, { "epoch": 3.98, "learning_rate": 1.0193876231752082e-05, "loss": 0.1728, "step": 708400 }, { "epoch": 3.98, "learning_rate": 1.0188255001489627e-05, "loss": 0.1808, "step": 708500 }, { "epoch": 3.98, "learning_rate": 1.0182633771227171e-05, "loss": 0.1781, "step": 708600 }, { "epoch": 3.98, "learning_rate": 1.0177012540964716e-05, "loss": 0.1791, "step": 708700 }, { "epoch": 3.98, "learning_rate": 1.017139131070226e-05, "loss": 0.1775, "step": 708800 }, { "epoch": 3.98, "learning_rate": 1.0165770080439805e-05, "loss": 0.1731, "step": 708900 }, { "epoch": 3.99, "learning_rate": 1.0160148850177351e-05, "loss": 0.1777, "step": 709000 }, { "epoch": 3.99, "learning_rate": 1.0154583832217519e-05, "loss": 0.1798, "step": 709100 }, { "epoch": 3.99, "learning_rate": 1.0148962601955065e-05, "loss": 0.1808, "step": 709200 }, { "epoch": 3.99, "learning_rate": 1.014334137169261e-05, "loss": 0.1773, "step": 709300 }, { "epoch": 3.99, "learning_rate": 1.0137720141430154e-05, "loss": 0.1758, "step": 709400 }, { "epoch": 3.99, "learning_rate": 1.0132098911167699e-05, "loss": 0.1788, "step": 709500 }, { "epoch": 3.99, "learning_rate": 1.0126477680905243e-05, "loss": 0.1761, "step": 709600 }, { "epoch": 3.99, "learning_rate": 1.0120856450642788e-05, "loss": 0.1781, "step": 709700 }, { "epoch": 3.99, "learning_rate": 1.0115235220380333e-05, "loss": 0.1767, "step": 709800 }, { "epoch": 3.99, "learning_rate": 1.0109613990117877e-05, "loss": 0.1795, "step": 709900 }, { "epoch": 3.99, "learning_rate": 1.0103992759855423e-05, "loss": 0.1751, "step": 710000 }, { "epoch": 3.99, "learning_rate": 1.0098371529592968e-05, "loss": 0.1771, "step": 710100 }, { "epoch": 3.99, "learning_rate": 1.0092750299330513e-05, "loss": 0.1807, "step": 710200 }, { "epoch": 3.99, "learning_rate": 1.0087129069068056e-05, "loss": 0.1744, "step": 710300 }, { "epoch": 3.99, "learning_rate": 1.0081507838805602e-05, "loss": 0.1836, "step": 710400 }, { "epoch": 3.99, "learning_rate": 1.0075886608543146e-05, "loss": 0.1787, "step": 710500 }, { "epoch": 3.99, "learning_rate": 1.0070265378280691e-05, "loss": 0.1766, "step": 710600 }, { "epoch": 4.0, "learning_rate": 1.0064644148018236e-05, "loss": 0.1799, "step": 710700 }, { "epoch": 4.0, "learning_rate": 1.005902291775578e-05, "loss": 0.1761, "step": 710800 }, { "epoch": 4.0, "learning_rate": 1.0053401687493326e-05, "loss": 0.1719, "step": 710900 }, { "epoch": 4.0, "learning_rate": 1.004778045723087e-05, "loss": 0.1796, "step": 711000 }, { "epoch": 4.0, "learning_rate": 1.0042159226968414e-05, "loss": 0.1852, "step": 711100 }, { "epoch": 4.0, "learning_rate": 1.003653799670596e-05, "loss": 0.1778, "step": 711200 }, { "epoch": 4.0, "learning_rate": 1.0030916766443505e-05, "loss": 0.1793, "step": 711300 }, { "epoch": 4.0, "learning_rate": 1.002529553618105e-05, "loss": 0.175, "step": 711400 }, { "epoch": 4.0, "learning_rate": 1.0019674305918594e-05, "loss": 0.1753, "step": 711500 }, { "epoch": 4.0, "eval_bleu": 77.928, "eval_cer": 2.2105, "eval_chrF": 95.62967894411643, "eval_gen_len": 16.776484, "eval_loss": 0.4870626628398895, "eval_runtime": 7213.9111, "eval_samples_per_second": 34.655, "eval_steps_per_second": 0.542, "eval_wer": 12.3386, "step": 711588 }, { "epoch": 4.0, "learning_rate": 1.0014053075656139e-05, "loss": 0.1735, "step": 711600 }, { "epoch": 4.0, "learning_rate": 1.0008431845393683e-05, "loss": 0.1625, "step": 711700 }, { "epoch": 4.0, "learning_rate": 1.0002810615131228e-05, "loss": 0.168, "step": 711800 }, { "epoch": 4.0, "learning_rate": 9.997189384868772e-06, "loss": 0.1698, "step": 711900 }, { "epoch": 4.0, "learning_rate": 9.991568154606319e-06, "loss": 0.1631, "step": 712000 }, { "epoch": 4.0, "learning_rate": 9.986003136646486e-06, "loss": 0.1596, "step": 712100 }, { "epoch": 4.0, "learning_rate": 9.98038190638403e-06, "loss": 0.1676, "step": 712200 }, { "epoch": 4.0, "learning_rate": 9.974760676121577e-06, "loss": 0.1691, "step": 712300 }, { "epoch": 4.0, "learning_rate": 9.969139445859122e-06, "loss": 0.1642, "step": 712400 }, { "epoch": 4.01, "learning_rate": 9.963518215596666e-06, "loss": 0.1688, "step": 712500 }, { "epoch": 4.01, "learning_rate": 9.95789698533421e-06, "loss": 0.1615, "step": 712600 }, { "epoch": 4.01, "learning_rate": 9.95233196737438e-06, "loss": 0.1666, "step": 712700 }, { "epoch": 4.01, "learning_rate": 9.946710737111925e-06, "loss": 0.1659, "step": 712800 }, { "epoch": 4.01, "learning_rate": 9.94108950684947e-06, "loss": 0.1666, "step": 712900 }, { "epoch": 4.01, "learning_rate": 9.935468276587016e-06, "loss": 0.1639, "step": 713000 }, { "epoch": 4.01, "learning_rate": 9.929847046324558e-06, "loss": 0.1629, "step": 713100 }, { "epoch": 4.01, "learning_rate": 9.924225816062103e-06, "loss": 0.1649, "step": 713200 }, { "epoch": 4.01, "learning_rate": 9.91860458579965e-06, "loss": 0.169, "step": 713300 }, { "epoch": 4.01, "learning_rate": 9.912983355537194e-06, "loss": 0.1647, "step": 713400 }, { "epoch": 4.01, "learning_rate": 9.907362125274738e-06, "loss": 0.1611, "step": 713500 }, { "epoch": 4.01, "learning_rate": 9.901740895012283e-06, "loss": 0.1658, "step": 713600 }, { "epoch": 4.01, "learning_rate": 9.896119664749828e-06, "loss": 0.1648, "step": 713700 }, { "epoch": 4.01, "learning_rate": 9.890498434487372e-06, "loss": 0.166, "step": 713800 }, { "epoch": 4.01, "learning_rate": 9.884877204224917e-06, "loss": 0.163, "step": 713900 }, { "epoch": 4.01, "learning_rate": 9.879255973962461e-06, "loss": 0.1651, "step": 714000 }, { "epoch": 4.01, "learning_rate": 9.873634743700006e-06, "loss": 0.1687, "step": 714100 }, { "epoch": 4.01, "learning_rate": 9.868013513437552e-06, "loss": 0.1703, "step": 714200 }, { "epoch": 4.02, "learning_rate": 9.862392283175097e-06, "loss": 0.1609, "step": 714300 }, { "epoch": 4.02, "learning_rate": 9.85677105291264e-06, "loss": 0.1659, "step": 714400 }, { "epoch": 4.02, "learning_rate": 9.851149822650186e-06, "loss": 0.1691, "step": 714500 }, { "epoch": 4.02, "learning_rate": 9.84552859238773e-06, "loss": 0.1606, "step": 714600 }, { "epoch": 4.02, "learning_rate": 9.839907362125275e-06, "loss": 0.1634, "step": 714700 }, { "epoch": 4.02, "learning_rate": 9.83428613186282e-06, "loss": 0.1637, "step": 714800 }, { "epoch": 4.02, "learning_rate": 9.828664901600364e-06, "loss": 0.1639, "step": 714900 }, { "epoch": 4.02, "learning_rate": 9.82304367133791e-06, "loss": 0.1661, "step": 715000 }, { "epoch": 4.02, "learning_rate": 9.817422441075453e-06, "loss": 0.17, "step": 715100 }, { "epoch": 4.02, "learning_rate": 9.811801210812998e-06, "loss": 0.1704, "step": 715200 }, { "epoch": 4.02, "learning_rate": 9.806179980550544e-06, "loss": 0.164, "step": 715300 }, { "epoch": 4.02, "learning_rate": 9.800558750288089e-06, "loss": 0.1579, "step": 715400 }, { "epoch": 4.02, "learning_rate": 9.794937520025634e-06, "loss": 0.1713, "step": 715500 }, { "epoch": 4.02, "learning_rate": 9.789316289763178e-06, "loss": 0.1653, "step": 715600 }, { "epoch": 4.02, "learning_rate": 9.783695059500723e-06, "loss": 0.1675, "step": 715700 }, { "epoch": 4.02, "learning_rate": 9.778073829238267e-06, "loss": 0.1704, "step": 715800 }, { "epoch": 4.02, "learning_rate": 9.772452598975812e-06, "loss": 0.1669, "step": 715900 }, { "epoch": 4.02, "learning_rate": 9.766831368713356e-06, "loss": 0.1623, "step": 716000 }, { "epoch": 4.03, "learning_rate": 9.761210138450903e-06, "loss": 0.1669, "step": 716100 }, { "epoch": 4.03, "learning_rate": 9.755588908188447e-06, "loss": 0.1667, "step": 716200 }, { "epoch": 4.03, "learning_rate": 9.749967677925992e-06, "loss": 0.1597, "step": 716300 }, { "epoch": 4.03, "learning_rate": 9.744346447663535e-06, "loss": 0.1642, "step": 716400 }, { "epoch": 4.03, "learning_rate": 9.738725217401081e-06, "loss": 0.1666, "step": 716500 }, { "epoch": 4.03, "learning_rate": 9.733103987138626e-06, "loss": 0.1682, "step": 716600 }, { "epoch": 4.03, "learning_rate": 9.727538969178795e-06, "loss": 0.1664, "step": 716700 }, { "epoch": 4.03, "learning_rate": 9.72191773891634e-06, "loss": 0.1618, "step": 716800 }, { "epoch": 4.03, "learning_rate": 9.716296508653884e-06, "loss": 0.1668, "step": 716900 }, { "epoch": 4.03, "learning_rate": 9.710675278391429e-06, "loss": 0.1688, "step": 717000 }, { "epoch": 4.03, "learning_rate": 9.705054048128973e-06, "loss": 0.1648, "step": 717100 }, { "epoch": 4.03, "learning_rate": 9.69943281786652e-06, "loss": 0.1643, "step": 717200 }, { "epoch": 4.03, "learning_rate": 9.693811587604064e-06, "loss": 0.1691, "step": 717300 }, { "epoch": 4.03, "learning_rate": 9.688190357341609e-06, "loss": 0.1627, "step": 717400 }, { "epoch": 4.03, "learning_rate": 9.682569127079152e-06, "loss": 0.1597, "step": 717500 }, { "epoch": 4.03, "learning_rate": 9.676947896816698e-06, "loss": 0.1608, "step": 717600 }, { "epoch": 4.03, "learning_rate": 9.671326666554242e-06, "loss": 0.1626, "step": 717700 }, { "epoch": 4.03, "learning_rate": 9.665705436291787e-06, "loss": 0.1718, "step": 717800 }, { "epoch": 4.04, "learning_rate": 9.660084206029332e-06, "loss": 0.1598, "step": 717900 }, { "epoch": 4.04, "learning_rate": 9.654462975766878e-06, "loss": 0.1651, "step": 718000 }, { "epoch": 4.04, "learning_rate": 9.648841745504422e-06, "loss": 0.1629, "step": 718100 }, { "epoch": 4.04, "learning_rate": 9.643220515241965e-06, "loss": 0.1669, "step": 718200 }, { "epoch": 4.04, "learning_rate": 9.63759928497951e-06, "loss": 0.1638, "step": 718300 }, { "epoch": 4.04, "learning_rate": 9.631978054717056e-06, "loss": 0.1707, "step": 718400 }, { "epoch": 4.04, "learning_rate": 9.6263568244546e-06, "loss": 0.1637, "step": 718500 }, { "epoch": 4.04, "learning_rate": 9.620735594192145e-06, "loss": 0.1626, "step": 718600 }, { "epoch": 4.04, "learning_rate": 9.61511436392969e-06, "loss": 0.1628, "step": 718700 }, { "epoch": 4.04, "learning_rate": 9.609493133667235e-06, "loss": 0.1616, "step": 718800 }, { "epoch": 4.04, "learning_rate": 9.60387190340478e-06, "loss": 0.1713, "step": 718900 }, { "epoch": 4.04, "learning_rate": 9.598250673142324e-06, "loss": 0.1642, "step": 719000 }, { "epoch": 4.04, "learning_rate": 9.592685655182495e-06, "loss": 0.1652, "step": 719100 }, { "epoch": 4.04, "learning_rate": 9.587064424920038e-06, "loss": 0.1651, "step": 719200 }, { "epoch": 4.04, "learning_rate": 9.581443194657582e-06, "loss": 0.165, "step": 719300 }, { "epoch": 4.04, "learning_rate": 9.575821964395129e-06, "loss": 0.163, "step": 719400 }, { "epoch": 4.04, "learning_rate": 9.570200734132673e-06, "loss": 0.1638, "step": 719500 }, { "epoch": 4.05, "learning_rate": 9.564579503870218e-06, "loss": 0.1652, "step": 719600 }, { "epoch": 4.05, "learning_rate": 9.558958273607762e-06, "loss": 0.1597, "step": 719700 }, { "epoch": 4.05, "learning_rate": 9.553337043345307e-06, "loss": 0.166, "step": 719800 }, { "epoch": 4.05, "learning_rate": 9.547715813082851e-06, "loss": 0.1654, "step": 719900 }, { "epoch": 4.05, "learning_rate": 9.542094582820396e-06, "loss": 0.1678, "step": 720000 }, { "epoch": 4.05, "learning_rate": 9.53647335255794e-06, "loss": 0.1658, "step": 720100 }, { "epoch": 4.05, "learning_rate": 9.530852122295487e-06, "loss": 0.1642, "step": 720200 }, { "epoch": 4.05, "learning_rate": 9.525230892033031e-06, "loss": 0.1699, "step": 720300 }, { "epoch": 4.05, "learning_rate": 9.519609661770576e-06, "loss": 0.1604, "step": 720400 }, { "epoch": 4.05, "learning_rate": 9.51398843150812e-06, "loss": 0.1614, "step": 720500 }, { "epoch": 4.05, "learning_rate": 9.508367201245665e-06, "loss": 0.166, "step": 720600 }, { "epoch": 4.05, "learning_rate": 9.50274597098321e-06, "loss": 0.1673, "step": 720700 }, { "epoch": 4.05, "learning_rate": 9.497124740720754e-06, "loss": 0.1651, "step": 720800 }, { "epoch": 4.05, "learning_rate": 9.491559722760925e-06, "loss": 0.1638, "step": 720900 }, { "epoch": 4.05, "learning_rate": 9.485938492498468e-06, "loss": 0.1696, "step": 721000 }, { "epoch": 4.05, "learning_rate": 9.480317262236013e-06, "loss": 0.1672, "step": 721100 }, { "epoch": 4.05, "learning_rate": 9.474696031973557e-06, "loss": 0.166, "step": 721200 }, { "epoch": 4.05, "learning_rate": 9.469074801711104e-06, "loss": 0.1672, "step": 721300 }, { "epoch": 4.06, "learning_rate": 9.463453571448648e-06, "loss": 0.161, "step": 721400 }, { "epoch": 4.06, "learning_rate": 9.457832341186193e-06, "loss": 0.1661, "step": 721500 }, { "epoch": 4.06, "learning_rate": 9.452211110923736e-06, "loss": 0.1692, "step": 721600 }, { "epoch": 4.06, "learning_rate": 9.446589880661282e-06, "loss": 0.1661, "step": 721700 }, { "epoch": 4.06, "learning_rate": 9.440968650398827e-06, "loss": 0.1633, "step": 721800 }, { "epoch": 4.06, "learning_rate": 9.435347420136371e-06, "loss": 0.1611, "step": 721900 }, { "epoch": 4.06, "learning_rate": 9.429726189873916e-06, "loss": 0.1684, "step": 722000 }, { "epoch": 4.06, "learning_rate": 9.424161171914085e-06, "loss": 0.161, "step": 722100 }, { "epoch": 4.06, "learning_rate": 9.41853994165163e-06, "loss": 0.1688, "step": 722200 }, { "epoch": 4.06, "learning_rate": 9.4129749236918e-06, "loss": 0.1684, "step": 722300 }, { "epoch": 4.06, "learning_rate": 9.407353693429344e-06, "loss": 0.164, "step": 722400 }, { "epoch": 4.06, "learning_rate": 9.401732463166888e-06, "loss": 0.1635, "step": 722500 }, { "epoch": 4.06, "learning_rate": 9.396111232904434e-06, "loss": 0.1625, "step": 722600 }, { "epoch": 4.06, "learning_rate": 9.390490002641979e-06, "loss": 0.1628, "step": 722700 }, { "epoch": 4.06, "learning_rate": 9.384868772379524e-06, "loss": 0.1659, "step": 722800 }, { "epoch": 4.06, "learning_rate": 9.379247542117068e-06, "loss": 0.1671, "step": 722900 }, { "epoch": 4.06, "learning_rate": 9.373626311854613e-06, "loss": 0.1627, "step": 723000 }, { "epoch": 4.06, "learning_rate": 9.368005081592157e-06, "loss": 0.1653, "step": 723100 }, { "epoch": 4.07, "learning_rate": 9.362383851329702e-06, "loss": 0.1625, "step": 723200 }, { "epoch": 4.07, "learning_rate": 9.356762621067247e-06, "loss": 0.158, "step": 723300 }, { "epoch": 4.07, "learning_rate": 9.351141390804793e-06, "loss": 0.166, "step": 723400 }, { "epoch": 4.07, "learning_rate": 9.345520160542337e-06, "loss": 0.1643, "step": 723500 }, { "epoch": 4.07, "learning_rate": 9.339898930279882e-06, "loss": 0.1663, "step": 723600 }, { "epoch": 4.07, "learning_rate": 9.334277700017425e-06, "loss": 0.1676, "step": 723700 }, { "epoch": 4.07, "learning_rate": 9.328656469754971e-06, "loss": 0.1643, "step": 723800 }, { "epoch": 4.07, "learning_rate": 9.323035239492516e-06, "loss": 0.1639, "step": 723900 }, { "epoch": 4.07, "learning_rate": 9.31741400923006e-06, "loss": 0.1651, "step": 724000 }, { "epoch": 4.07, "learning_rate": 9.311792778967605e-06, "loss": 0.1682, "step": 724100 }, { "epoch": 4.07, "learning_rate": 9.306171548705151e-06, "loss": 0.1593, "step": 724200 }, { "epoch": 4.07, "learning_rate": 9.300550318442696e-06, "loss": 0.1701, "step": 724300 }, { "epoch": 4.07, "learning_rate": 9.294929088180239e-06, "loss": 0.1667, "step": 724400 }, { "epoch": 4.07, "learning_rate": 9.289307857917783e-06, "loss": 0.1645, "step": 724500 }, { "epoch": 4.07, "learning_rate": 9.28368662765533e-06, "loss": 0.1664, "step": 724600 }, { "epoch": 4.07, "learning_rate": 9.278065397392874e-06, "loss": 0.1679, "step": 724700 }, { "epoch": 4.07, "learning_rate": 9.272444167130419e-06, "loss": 0.167, "step": 724800 }, { "epoch": 4.07, "learning_rate": 9.266822936867963e-06, "loss": 0.1644, "step": 724900 }, { "epoch": 4.08, "learning_rate": 9.26120170660551e-06, "loss": 0.1659, "step": 725000 }, { "epoch": 4.08, "learning_rate": 9.255580476343052e-06, "loss": 0.1619, "step": 725100 }, { "epoch": 4.08, "learning_rate": 9.249959246080597e-06, "loss": 0.1639, "step": 725200 }, { "epoch": 4.08, "learning_rate": 9.244338015818142e-06, "loss": 0.1645, "step": 725300 }, { "epoch": 4.08, "learning_rate": 9.238716785555688e-06, "loss": 0.174, "step": 725400 }, { "epoch": 4.08, "learning_rate": 9.233095555293232e-06, "loss": 0.1677, "step": 725500 }, { "epoch": 4.08, "learning_rate": 9.227474325030777e-06, "loss": 0.1653, "step": 725600 }, { "epoch": 4.08, "learning_rate": 9.221853094768322e-06, "loss": 0.1643, "step": 725700 }, { "epoch": 4.08, "learning_rate": 9.216231864505866e-06, "loss": 0.1669, "step": 725800 }, { "epoch": 4.08, "learning_rate": 9.21061063424341e-06, "loss": 0.1629, "step": 725900 }, { "epoch": 4.08, "learning_rate": 9.204989403980955e-06, "loss": 0.1626, "step": 726000 }, { "epoch": 4.08, "learning_rate": 9.1993681737185e-06, "loss": 0.1652, "step": 726100 }, { "epoch": 4.08, "learning_rate": 9.193746943456046e-06, "loss": 0.1628, "step": 726200 }, { "epoch": 4.08, "learning_rate": 9.18812571319359e-06, "loss": 0.1673, "step": 726300 }, { "epoch": 4.08, "learning_rate": 9.182504482931134e-06, "loss": 0.1612, "step": 726400 }, { "epoch": 4.08, "learning_rate": 9.176883252668678e-06, "loss": 0.1706, "step": 726500 }, { "epoch": 4.08, "learning_rate": 9.171262022406225e-06, "loss": 0.1701, "step": 726600 }, { "epoch": 4.08, "learning_rate": 9.165640792143769e-06, "loss": 0.1678, "step": 726700 }, { "epoch": 4.09, "learning_rate": 9.160019561881314e-06, "loss": 0.1643, "step": 726800 }, { "epoch": 4.09, "learning_rate": 9.154398331618858e-06, "loss": 0.1623, "step": 726900 }, { "epoch": 4.09, "learning_rate": 9.148777101356405e-06, "loss": 0.1665, "step": 727000 }, { "epoch": 4.09, "learning_rate": 9.143155871093947e-06, "loss": 0.1602, "step": 727100 }, { "epoch": 4.09, "learning_rate": 9.137534640831492e-06, "loss": 0.1647, "step": 727200 }, { "epoch": 4.09, "learning_rate": 9.131913410569037e-06, "loss": 0.1681, "step": 727300 }, { "epoch": 4.09, "learning_rate": 9.126292180306583e-06, "loss": 0.1651, "step": 727400 }, { "epoch": 4.09, "learning_rate": 9.120670950044128e-06, "loss": 0.1651, "step": 727500 }, { "epoch": 4.09, "learning_rate": 9.115049719781672e-06, "loss": 0.1653, "step": 727600 }, { "epoch": 4.09, "learning_rate": 9.109428489519217e-06, "loss": 0.1644, "step": 727700 }, { "epoch": 4.09, "learning_rate": 9.103807259256761e-06, "loss": 0.1649, "step": 727800 }, { "epoch": 4.09, "learning_rate": 9.098186028994306e-06, "loss": 0.1666, "step": 727900 }, { "epoch": 4.09, "learning_rate": 9.09256479873185e-06, "loss": 0.1678, "step": 728000 }, { "epoch": 4.09, "learning_rate": 9.086943568469395e-06, "loss": 0.1622, "step": 728100 }, { "epoch": 4.09, "learning_rate": 9.081322338206941e-06, "loss": 0.1643, "step": 728200 }, { "epoch": 4.09, "learning_rate": 9.075701107944486e-06, "loss": 0.1699, "step": 728300 }, { "epoch": 4.09, "learning_rate": 9.07007987768203e-06, "loss": 0.1691, "step": 728400 }, { "epoch": 4.1, "learning_rate": 9.064458647419573e-06, "loss": 0.1625, "step": 728500 }, { "epoch": 4.1, "learning_rate": 9.05883741715712e-06, "loss": 0.166, "step": 728600 }, { "epoch": 4.1, "learning_rate": 9.053216186894664e-06, "loss": 0.1658, "step": 728700 }, { "epoch": 4.1, "learning_rate": 9.047594956632209e-06, "loss": 0.1652, "step": 728800 }, { "epoch": 4.1, "learning_rate": 9.041973726369753e-06, "loss": 0.1645, "step": 728900 }, { "epoch": 4.1, "learning_rate": 9.0363524961073e-06, "loss": 0.1636, "step": 729000 }, { "epoch": 4.1, "learning_rate": 9.030731265844843e-06, "loss": 0.1635, "step": 729100 }, { "epoch": 4.1, "learning_rate": 9.025110035582387e-06, "loss": 0.1623, "step": 729200 }, { "epoch": 4.1, "learning_rate": 9.019488805319932e-06, "loss": 0.1662, "step": 729300 }, { "epoch": 4.1, "learning_rate": 9.013867575057478e-06, "loss": 0.1657, "step": 729400 }, { "epoch": 4.1, "learning_rate": 9.008246344795023e-06, "loss": 0.1618, "step": 729500 }, { "epoch": 4.1, "learning_rate": 9.002681326835192e-06, "loss": 0.164, "step": 729600 }, { "epoch": 4.1, "learning_rate": 8.997116308875361e-06, "loss": 0.1646, "step": 729700 }, { "epoch": 4.1, "learning_rate": 8.991495078612906e-06, "loss": 0.1628, "step": 729800 }, { "epoch": 4.1, "learning_rate": 8.98587384835045e-06, "loss": 0.1653, "step": 729900 }, { "epoch": 4.1, "learning_rate": 8.980252618087995e-06, "loss": 0.1619, "step": 730000 }, { "epoch": 4.1, "learning_rate": 8.97463138782554e-06, "loss": 0.164, "step": 730100 }, { "epoch": 4.1, "learning_rate": 8.969010157563084e-06, "loss": 0.1654, "step": 730200 }, { "epoch": 4.11, "learning_rate": 8.96338892730063e-06, "loss": 0.1656, "step": 730300 }, { "epoch": 4.11, "learning_rate": 8.957767697038175e-06, "loss": 0.166, "step": 730400 }, { "epoch": 4.11, "learning_rate": 8.95214646677572e-06, "loss": 0.1669, "step": 730500 }, { "epoch": 4.11, "learning_rate": 8.946525236513262e-06, "loss": 0.1641, "step": 730600 }, { "epoch": 4.11, "learning_rate": 8.940904006250809e-06, "loss": 0.1675, "step": 730700 }, { "epoch": 4.11, "learning_rate": 8.935282775988353e-06, "loss": 0.1595, "step": 730800 }, { "epoch": 4.11, "learning_rate": 8.929661545725898e-06, "loss": 0.1701, "step": 730900 }, { "epoch": 4.11, "learning_rate": 8.924040315463442e-06, "loss": 0.1604, "step": 731000 }, { "epoch": 4.11, "learning_rate": 8.918419085200989e-06, "loss": 0.1678, "step": 731100 }, { "epoch": 4.11, "learning_rate": 8.912797854938532e-06, "loss": 0.1666, "step": 731200 }, { "epoch": 4.11, "learning_rate": 8.907176624676076e-06, "loss": 0.1615, "step": 731300 }, { "epoch": 4.11, "learning_rate": 8.90155539441362e-06, "loss": 0.1682, "step": 731400 }, { "epoch": 4.11, "learning_rate": 8.895934164151167e-06, "loss": 0.1646, "step": 731500 }, { "epoch": 4.11, "learning_rate": 8.890312933888712e-06, "loss": 0.162, "step": 731600 }, { "epoch": 4.11, "learning_rate": 8.884691703626256e-06, "loss": 0.1649, "step": 731700 }, { "epoch": 4.11, "learning_rate": 8.8790704733638e-06, "loss": 0.1664, "step": 731800 }, { "epoch": 4.11, "learning_rate": 8.873449243101345e-06, "loss": 0.1665, "step": 731900 }, { "epoch": 4.11, "learning_rate": 8.86782801283889e-06, "loss": 0.1691, "step": 732000 }, { "epoch": 4.12, "learning_rate": 8.862206782576435e-06, "loss": 0.1638, "step": 732100 }, { "epoch": 4.12, "learning_rate": 8.85658555231398e-06, "loss": 0.1667, "step": 732200 }, { "epoch": 4.12, "learning_rate": 8.850964322051525e-06, "loss": 0.1609, "step": 732300 }, { "epoch": 4.12, "learning_rate": 8.84534309178907e-06, "loss": 0.1634, "step": 732400 }, { "epoch": 4.12, "learning_rate": 8.839721861526615e-06, "loss": 0.1651, "step": 732500 }, { "epoch": 4.12, "learning_rate": 8.834100631264158e-06, "loss": 0.1678, "step": 732600 }, { "epoch": 4.12, "learning_rate": 8.828479401001704e-06, "loss": 0.1704, "step": 732700 }, { "epoch": 4.12, "learning_rate": 8.822858170739248e-06, "loss": 0.1636, "step": 732800 }, { "epoch": 4.12, "learning_rate": 8.817236940476793e-06, "loss": 0.1708, "step": 732900 }, { "epoch": 4.12, "learning_rate": 8.811615710214338e-06, "loss": 0.1621, "step": 733000 }, { "epoch": 4.12, "learning_rate": 8.805994479951884e-06, "loss": 0.17, "step": 733100 }, { "epoch": 4.12, "learning_rate": 8.800373249689428e-06, "loss": 0.1684, "step": 733200 }, { "epoch": 4.12, "learning_rate": 8.794752019426971e-06, "loss": 0.1601, "step": 733300 }, { "epoch": 4.12, "learning_rate": 8.789130789164516e-06, "loss": 0.169, "step": 733400 }, { "epoch": 4.12, "learning_rate": 8.783509558902062e-06, "loss": 0.1656, "step": 733500 }, { "epoch": 4.12, "learning_rate": 8.777888328639607e-06, "loss": 0.1671, "step": 733600 }, { "epoch": 4.12, "learning_rate": 8.772267098377151e-06, "loss": 0.1682, "step": 733700 }, { "epoch": 4.12, "learning_rate": 8.766645868114696e-06, "loss": 0.1657, "step": 733800 }, { "epoch": 4.13, "learning_rate": 8.76102463785224e-06, "loss": 0.1661, "step": 733900 }, { "epoch": 4.13, "learning_rate": 8.755403407589785e-06, "loss": 0.1622, "step": 734000 }, { "epoch": 4.13, "learning_rate": 8.74978217732733e-06, "loss": 0.1678, "step": 734100 }, { "epoch": 4.13, "learning_rate": 8.744160947064874e-06, "loss": 0.1639, "step": 734200 }, { "epoch": 4.13, "learning_rate": 8.73853971680242e-06, "loss": 0.1655, "step": 734300 }, { "epoch": 4.13, "learning_rate": 8.732918486539965e-06, "loss": 0.1651, "step": 734400 }, { "epoch": 4.13, "learning_rate": 8.72729725627751e-06, "loss": 0.1712, "step": 734500 }, { "epoch": 4.13, "learning_rate": 8.721676026015054e-06, "loss": 0.1676, "step": 734600 }, { "epoch": 4.13, "learning_rate": 8.716054795752599e-06, "loss": 0.1637, "step": 734700 }, { "epoch": 4.13, "learning_rate": 8.710433565490143e-06, "loss": 0.1689, "step": 734800 }, { "epoch": 4.13, "learning_rate": 8.704812335227688e-06, "loss": 0.1701, "step": 734900 }, { "epoch": 4.13, "learning_rate": 8.699191104965233e-06, "loss": 0.1683, "step": 735000 }, { "epoch": 4.13, "learning_rate": 8.693569874702779e-06, "loss": 0.1639, "step": 735100 }, { "epoch": 4.13, "learning_rate": 8.687948644440323e-06, "loss": 0.171, "step": 735200 }, { "epoch": 4.13, "learning_rate": 8.682383626480493e-06, "loss": 0.1678, "step": 735300 }, { "epoch": 4.13, "learning_rate": 8.676762396218037e-06, "loss": 0.1673, "step": 735400 }, { "epoch": 4.13, "learning_rate": 8.671141165955582e-06, "loss": 0.162, "step": 735500 }, { "epoch": 4.13, "learning_rate": 8.665519935693127e-06, "loss": 0.1668, "step": 735600 }, { "epoch": 4.14, "learning_rate": 8.659898705430671e-06, "loss": 0.1612, "step": 735700 }, { "epoch": 4.14, "learning_rate": 8.654277475168216e-06, "loss": 0.1662, "step": 735800 }, { "epoch": 4.14, "learning_rate": 8.64865624490576e-06, "loss": 0.1671, "step": 735900 }, { "epoch": 4.14, "learning_rate": 8.643035014643305e-06, "loss": 0.1662, "step": 736000 }, { "epoch": 4.14, "learning_rate": 8.637413784380851e-06, "loss": 0.1606, "step": 736100 }, { "epoch": 4.14, "learning_rate": 8.631792554118396e-06, "loss": 0.1657, "step": 736200 }, { "epoch": 4.14, "learning_rate": 8.626171323855939e-06, "loss": 0.1652, "step": 736300 }, { "epoch": 4.14, "learning_rate": 8.620550093593483e-06, "loss": 0.1635, "step": 736400 }, { "epoch": 4.14, "learning_rate": 8.61492886333103e-06, "loss": 0.1684, "step": 736500 }, { "epoch": 4.14, "learning_rate": 8.609307633068574e-06, "loss": 0.1651, "step": 736600 }, { "epoch": 4.14, "learning_rate": 8.603686402806119e-06, "loss": 0.165, "step": 736700 }, { "epoch": 4.14, "learning_rate": 8.598065172543663e-06, "loss": 0.1623, "step": 736800 }, { "epoch": 4.14, "learning_rate": 8.59244394228121e-06, "loss": 0.1649, "step": 736900 }, { "epoch": 4.14, "learning_rate": 8.586822712018752e-06, "loss": 0.167, "step": 737000 }, { "epoch": 4.14, "learning_rate": 8.581201481756297e-06, "loss": 0.1623, "step": 737100 }, { "epoch": 4.14, "learning_rate": 8.575580251493842e-06, "loss": 0.1654, "step": 737200 }, { "epoch": 4.14, "learning_rate": 8.569959021231388e-06, "loss": 0.1638, "step": 737300 }, { "epoch": 4.15, "learning_rate": 8.564337790968932e-06, "loss": 0.1689, "step": 737400 }, { "epoch": 4.15, "learning_rate": 8.558716560706477e-06, "loss": 0.1677, "step": 737500 }, { "epoch": 4.15, "learning_rate": 8.553095330444022e-06, "loss": 0.1605, "step": 737600 }, { "epoch": 4.15, "learning_rate": 8.547530312484191e-06, "loss": 0.1614, "step": 737700 }, { "epoch": 4.15, "learning_rate": 8.541909082221735e-06, "loss": 0.1622, "step": 737800 }, { "epoch": 4.15, "learning_rate": 8.53628785195928e-06, "loss": 0.1659, "step": 737900 }, { "epoch": 4.15, "learning_rate": 8.530666621696826e-06, "loss": 0.1637, "step": 738000 }, { "epoch": 4.15, "learning_rate": 8.52504539143437e-06, "loss": 0.1633, "step": 738100 }, { "epoch": 4.15, "learning_rate": 8.519424161171914e-06, "loss": 0.1643, "step": 738200 }, { "epoch": 4.15, "learning_rate": 8.513802930909458e-06, "loss": 0.1631, "step": 738300 }, { "epoch": 4.15, "learning_rate": 8.508181700647005e-06, "loss": 0.1648, "step": 738400 }, { "epoch": 4.15, "learning_rate": 8.50256047038455e-06, "loss": 0.161, "step": 738500 }, { "epoch": 4.15, "learning_rate": 8.496939240122094e-06, "loss": 0.1612, "step": 738600 }, { "epoch": 4.15, "learning_rate": 8.491318009859638e-06, "loss": 0.1644, "step": 738700 }, { "epoch": 4.15, "learning_rate": 8.485696779597183e-06, "loss": 0.1675, "step": 738800 }, { "epoch": 4.15, "learning_rate": 8.480075549334728e-06, "loss": 0.1661, "step": 738900 }, { "epoch": 4.15, "learning_rate": 8.474510531374897e-06, "loss": 0.1701, "step": 739000 }, { "epoch": 4.15, "learning_rate": 8.468889301112441e-06, "loss": 0.1649, "step": 739100 }, { "epoch": 4.16, "learning_rate": 8.463268070849986e-06, "loss": 0.1655, "step": 739200 }, { "epoch": 4.16, "learning_rate": 8.45764684058753e-06, "loss": 0.1656, "step": 739300 }, { "epoch": 4.16, "learning_rate": 8.452025610325077e-06, "loss": 0.1652, "step": 739400 }, { "epoch": 4.16, "learning_rate": 8.446404380062622e-06, "loss": 0.1676, "step": 739500 }, { "epoch": 4.16, "learning_rate": 8.440783149800166e-06, "loss": 0.1661, "step": 739600 }, { "epoch": 4.16, "learning_rate": 8.43516191953771e-06, "loss": 0.1643, "step": 739700 }, { "epoch": 4.16, "learning_rate": 8.429540689275255e-06, "loss": 0.164, "step": 739800 }, { "epoch": 4.16, "learning_rate": 8.4239194590128e-06, "loss": 0.1641, "step": 739900 }, { "epoch": 4.16, "learning_rate": 8.418298228750344e-06, "loss": 0.1625, "step": 740000 }, { "epoch": 4.16, "learning_rate": 8.412676998487889e-06, "loss": 0.1638, "step": 740100 }, { "epoch": 4.16, "learning_rate": 8.407055768225435e-06, "loss": 0.1638, "step": 740200 }, { "epoch": 4.16, "learning_rate": 8.40143453796298e-06, "loss": 0.1614, "step": 740300 }, { "epoch": 4.16, "learning_rate": 8.395813307700524e-06, "loss": 0.1579, "step": 740400 }, { "epoch": 4.16, "learning_rate": 8.390192077438067e-06, "loss": 0.163, "step": 740500 }, { "epoch": 4.16, "learning_rate": 8.384570847175614e-06, "loss": 0.1622, "step": 740600 }, { "epoch": 4.16, "learning_rate": 8.378949616913158e-06, "loss": 0.1617, "step": 740700 }, { "epoch": 4.16, "learning_rate": 8.373328386650703e-06, "loss": 0.1664, "step": 740800 }, { "epoch": 4.16, "learning_rate": 8.367707156388247e-06, "loss": 0.1674, "step": 740900 }, { "epoch": 4.17, "learning_rate": 8.362085926125792e-06, "loss": 0.1687, "step": 741000 }, { "epoch": 4.17, "learning_rate": 8.356520908165961e-06, "loss": 0.1636, "step": 741100 }, { "epoch": 4.17, "learning_rate": 8.350899677903506e-06, "loss": 0.163, "step": 741200 }, { "epoch": 4.17, "learning_rate": 8.345278447641052e-06, "loss": 0.1642, "step": 741300 }, { "epoch": 4.17, "learning_rate": 8.339657217378597e-06, "loss": 0.1598, "step": 741400 }, { "epoch": 4.17, "learning_rate": 8.33403598711614e-06, "loss": 0.1674, "step": 741500 }, { "epoch": 4.17, "learning_rate": 8.328414756853684e-06, "loss": 0.165, "step": 741600 }, { "epoch": 4.17, "learning_rate": 8.32279352659123e-06, "loss": 0.1625, "step": 741700 }, { "epoch": 4.17, "learning_rate": 8.3172285086314e-06, "loss": 0.1595, "step": 741800 }, { "epoch": 4.17, "learning_rate": 8.311607278368944e-06, "loss": 0.1617, "step": 741900 }, { "epoch": 4.17, "learning_rate": 8.305986048106489e-06, "loss": 0.1617, "step": 742000 }, { "epoch": 4.17, "learning_rate": 8.300364817844034e-06, "loss": 0.1685, "step": 742100 }, { "epoch": 4.17, "learning_rate": 8.294743587581578e-06, "loss": 0.1678, "step": 742200 }, { "epoch": 4.17, "learning_rate": 8.289122357319123e-06, "loss": 0.1636, "step": 742300 }, { "epoch": 4.17, "learning_rate": 8.283501127056669e-06, "loss": 0.1666, "step": 742400 }, { "epoch": 4.17, "learning_rate": 8.277879896794214e-06, "loss": 0.162, "step": 742500 }, { "epoch": 4.17, "learning_rate": 8.272258666531756e-06, "loss": 0.1676, "step": 742600 }, { "epoch": 4.17, "learning_rate": 8.266637436269303e-06, "loss": 0.1681, "step": 742700 }, { "epoch": 4.18, "learning_rate": 8.261016206006847e-06, "loss": 0.1634, "step": 742800 }, { "epoch": 4.18, "learning_rate": 8.255394975744392e-06, "loss": 0.1593, "step": 742900 }, { "epoch": 4.18, "learning_rate": 8.249773745481936e-06, "loss": 0.1632, "step": 743000 }, { "epoch": 4.18, "learning_rate": 8.244152515219481e-06, "loss": 0.1635, "step": 743100 }, { "epoch": 4.18, "learning_rate": 8.238531284957027e-06, "loss": 0.1638, "step": 743200 }, { "epoch": 4.18, "learning_rate": 8.23291005469457e-06, "loss": 0.1674, "step": 743300 }, { "epoch": 4.18, "learning_rate": 8.227288824432115e-06, "loss": 0.1606, "step": 743400 }, { "epoch": 4.18, "learning_rate": 8.221667594169661e-06, "loss": 0.1588, "step": 743500 }, { "epoch": 4.18, "learning_rate": 8.216046363907206e-06, "loss": 0.1608, "step": 743600 }, { "epoch": 4.18, "learning_rate": 8.21042513364475e-06, "loss": 0.1676, "step": 743700 }, { "epoch": 4.18, "learning_rate": 8.204803903382295e-06, "loss": 0.1703, "step": 743800 }, { "epoch": 4.18, "learning_rate": 8.19918267311984e-06, "loss": 0.1652, "step": 743900 }, { "epoch": 4.18, "learning_rate": 8.193561442857384e-06, "loss": 0.1676, "step": 744000 }, { "epoch": 4.18, "learning_rate": 8.187940212594929e-06, "loss": 0.1652, "step": 744100 }, { "epoch": 4.18, "learning_rate": 8.182318982332473e-06, "loss": 0.1639, "step": 744200 }, { "epoch": 4.18, "learning_rate": 8.176697752070018e-06, "loss": 0.1692, "step": 744300 }, { "epoch": 4.18, "learning_rate": 8.171076521807564e-06, "loss": 0.1675, "step": 744400 }, { "epoch": 4.19, "learning_rate": 8.165455291545109e-06, "loss": 0.1711, "step": 744500 }, { "epoch": 4.19, "learning_rate": 8.159834061282651e-06, "loss": 0.156, "step": 744600 }, { "epoch": 4.19, "learning_rate": 8.154212831020198e-06, "loss": 0.165, "step": 744700 }, { "epoch": 4.19, "learning_rate": 8.148591600757742e-06, "loss": 0.1592, "step": 744800 }, { "epoch": 4.19, "learning_rate": 8.142970370495287e-06, "loss": 0.1668, "step": 744900 }, { "epoch": 4.19, "learning_rate": 8.137349140232832e-06, "loss": 0.1619, "step": 745000 }, { "epoch": 4.19, "learning_rate": 8.131727909970376e-06, "loss": 0.1741, "step": 745100 }, { "epoch": 4.19, "learning_rate": 8.126106679707922e-06, "loss": 0.1673, "step": 745200 }, { "epoch": 4.19, "learning_rate": 8.120485449445465e-06, "loss": 0.1617, "step": 745300 }, { "epoch": 4.19, "learning_rate": 8.11486421918301e-06, "loss": 0.1676, "step": 745400 }, { "epoch": 4.19, "learning_rate": 8.109242988920556e-06, "loss": 0.1671, "step": 745500 }, { "epoch": 4.19, "learning_rate": 8.1036217586581e-06, "loss": 0.1693, "step": 745600 }, { "epoch": 4.19, "learning_rate": 8.098000528395645e-06, "loss": 0.1657, "step": 745700 }, { "epoch": 4.19, "learning_rate": 8.09237929813319e-06, "loss": 0.1679, "step": 745800 }, { "epoch": 4.19, "learning_rate": 8.086758067870734e-06, "loss": 0.1657, "step": 745900 }, { "epoch": 4.19, "learning_rate": 8.081136837608279e-06, "loss": 0.1664, "step": 746000 }, { "epoch": 4.19, "learning_rate": 8.075515607345824e-06, "loss": 0.1592, "step": 746100 }, { "epoch": 4.19, "learning_rate": 8.069894377083368e-06, "loss": 0.1724, "step": 746200 }, { "epoch": 4.2, "learning_rate": 8.064273146820914e-06, "loss": 0.1659, "step": 746300 }, { "epoch": 4.2, "learning_rate": 8.058651916558459e-06, "loss": 0.168, "step": 746400 }, { "epoch": 4.2, "learning_rate": 8.053030686296004e-06, "loss": 0.1623, "step": 746500 }, { "epoch": 4.2, "learning_rate": 8.047409456033547e-06, "loss": 0.1626, "step": 746600 }, { "epoch": 4.2, "learning_rate": 8.041844438073718e-06, "loss": 0.1622, "step": 746700 }, { "epoch": 4.2, "learning_rate": 8.036223207811262e-06, "loss": 0.1682, "step": 746800 }, { "epoch": 4.2, "learning_rate": 8.030601977548807e-06, "loss": 0.1621, "step": 746900 }, { "epoch": 4.2, "learning_rate": 8.024980747286351e-06, "loss": 0.167, "step": 747000 }, { "epoch": 4.2, "learning_rate": 8.019359517023896e-06, "loss": 0.1636, "step": 747100 }, { "epoch": 4.2, "learning_rate": 8.01373828676144e-06, "loss": 0.1656, "step": 747200 }, { "epoch": 4.2, "learning_rate": 8.008117056498985e-06, "loss": 0.1682, "step": 747300 }, { "epoch": 4.2, "learning_rate": 8.002495826236531e-06, "loss": 0.1665, "step": 747400 }, { "epoch": 4.2, "learning_rate": 7.996874595974076e-06, "loss": 0.1607, "step": 747500 }, { "epoch": 4.2, "learning_rate": 7.99125336571162e-06, "loss": 0.168, "step": 747600 }, { "epoch": 4.2, "learning_rate": 7.985632135449163e-06, "loss": 0.165, "step": 747700 }, { "epoch": 4.2, "learning_rate": 7.98001090518671e-06, "loss": 0.1689, "step": 747800 }, { "epoch": 4.2, "learning_rate": 7.974389674924254e-06, "loss": 0.1631, "step": 747900 }, { "epoch": 4.2, "learning_rate": 7.968768444661799e-06, "loss": 0.1685, "step": 748000 }, { "epoch": 4.21, "learning_rate": 7.963147214399343e-06, "loss": 0.1635, "step": 748100 }, { "epoch": 4.21, "learning_rate": 7.95752598413689e-06, "loss": 0.1668, "step": 748200 }, { "epoch": 4.21, "learning_rate": 7.951904753874434e-06, "loss": 0.1625, "step": 748300 }, { "epoch": 4.21, "learning_rate": 7.946283523611977e-06, "loss": 0.1694, "step": 748400 }, { "epoch": 4.21, "learning_rate": 7.940662293349522e-06, "loss": 0.1675, "step": 748500 }, { "epoch": 4.21, "learning_rate": 7.935041063087068e-06, "loss": 0.166, "step": 748600 }, { "epoch": 4.21, "learning_rate": 7.929419832824613e-06, "loss": 0.1649, "step": 748700 }, { "epoch": 4.21, "learning_rate": 7.923798602562157e-06, "loss": 0.1698, "step": 748800 }, { "epoch": 4.21, "learning_rate": 7.918177372299702e-06, "loss": 0.1607, "step": 748900 }, { "epoch": 4.21, "learning_rate": 7.912556142037246e-06, "loss": 0.1635, "step": 749000 }, { "epoch": 4.21, "learning_rate": 7.906934911774791e-06, "loss": 0.1607, "step": 749100 }, { "epoch": 4.21, "learning_rate": 7.901313681512336e-06, "loss": 0.1638, "step": 749200 }, { "epoch": 4.21, "learning_rate": 7.89569245124988e-06, "loss": 0.1625, "step": 749300 }, { "epoch": 4.21, "learning_rate": 7.890071220987426e-06, "loss": 0.1609, "step": 749400 }, { "epoch": 4.21, "learning_rate": 7.884449990724971e-06, "loss": 0.1672, "step": 749500 }, { "epoch": 4.21, "learning_rate": 7.878828760462516e-06, "loss": 0.1624, "step": 749600 }, { "epoch": 4.21, "learning_rate": 7.87320753020006e-06, "loss": 0.1615, "step": 749700 }, { "epoch": 4.21, "learning_rate": 7.867586299937605e-06, "loss": 0.1673, "step": 749800 }, { "epoch": 4.22, "learning_rate": 7.86196506967515e-06, "loss": 0.1595, "step": 749900 }, { "epoch": 4.22, "learning_rate": 7.856343839412694e-06, "loss": 0.1654, "step": 750000 }, { "epoch": 4.22, "learning_rate": 7.850722609150238e-06, "loss": 0.1652, "step": 750100 }, { "epoch": 4.22, "learning_rate": 7.845101378887785e-06, "loss": 0.1691, "step": 750200 }, { "epoch": 4.22, "learning_rate": 7.83948014862533e-06, "loss": 0.1654, "step": 750300 }, { "epoch": 4.22, "learning_rate": 7.833858918362872e-06, "loss": 0.1629, "step": 750400 }, { "epoch": 4.22, "learning_rate": 7.828237688100418e-06, "loss": 0.1683, "step": 750500 }, { "epoch": 4.22, "learning_rate": 7.822616457837963e-06, "loss": 0.1666, "step": 750600 }, { "epoch": 4.22, "learning_rate": 7.816995227575508e-06, "loss": 0.1628, "step": 750700 }, { "epoch": 4.22, "learning_rate": 7.811373997313052e-06, "loss": 0.1669, "step": 750800 }, { "epoch": 4.22, "learning_rate": 7.805752767050597e-06, "loss": 0.1714, "step": 750900 }, { "epoch": 4.22, "learning_rate": 7.800131536788143e-06, "loss": 0.1594, "step": 751000 }, { "epoch": 4.22, "learning_rate": 7.794510306525686e-06, "loss": 0.1607, "step": 751100 }, { "epoch": 4.22, "learning_rate": 7.78888907626323e-06, "loss": 0.1694, "step": 751200 }, { "epoch": 4.22, "learning_rate": 7.783267846000777e-06, "loss": 0.1621, "step": 751300 }, { "epoch": 4.22, "learning_rate": 7.777646615738321e-06, "loss": 0.1617, "step": 751400 }, { "epoch": 4.22, "learning_rate": 7.772025385475866e-06, "loss": 0.1667, "step": 751500 }, { "epoch": 4.22, "learning_rate": 7.76640415521341e-06, "loss": 0.1632, "step": 751600 }, { "epoch": 4.23, "learning_rate": 7.760782924950955e-06, "loss": 0.1706, "step": 751700 }, { "epoch": 4.23, "learning_rate": 7.7551616946885e-06, "loss": 0.1674, "step": 751800 }, { "epoch": 4.23, "learning_rate": 7.749540464426044e-06, "loss": 0.1627, "step": 751900 }, { "epoch": 4.23, "learning_rate": 7.743919234163589e-06, "loss": 0.1652, "step": 752000 }, { "epoch": 4.23, "learning_rate": 7.738298003901134e-06, "loss": 0.1682, "step": 752100 }, { "epoch": 4.23, "learning_rate": 7.732732985941303e-06, "loss": 0.1647, "step": 752200 }, { "epoch": 4.23, "learning_rate": 7.727111755678847e-06, "loss": 0.1594, "step": 752300 }, { "epoch": 4.23, "learning_rate": 7.721490525416394e-06, "loss": 0.1612, "step": 752400 }, { "epoch": 4.23, "learning_rate": 7.715869295153938e-06, "loss": 0.1726, "step": 752500 }, { "epoch": 4.23, "learning_rate": 7.710248064891483e-06, "loss": 0.163, "step": 752600 }, { "epoch": 4.23, "learning_rate": 7.704626834629027e-06, "loss": 0.1659, "step": 752700 }, { "epoch": 4.23, "learning_rate": 7.699005604366572e-06, "loss": 0.1683, "step": 752800 }, { "epoch": 4.23, "learning_rate": 7.693384374104117e-06, "loss": 0.1656, "step": 752900 }, { "epoch": 4.23, "learning_rate": 7.687763143841661e-06, "loss": 0.1643, "step": 753000 }, { "epoch": 4.23, "learning_rate": 7.682141913579206e-06, "loss": 0.1624, "step": 753100 }, { "epoch": 4.23, "learning_rate": 7.676520683316752e-06, "loss": 0.1684, "step": 753200 }, { "epoch": 4.23, "learning_rate": 7.670899453054297e-06, "loss": 0.1664, "step": 753300 }, { "epoch": 4.24, "learning_rate": 7.665278222791841e-06, "loss": 0.1684, "step": 753400 }, { "epoch": 4.24, "learning_rate": 7.659656992529384e-06, "loss": 0.1674, "step": 753500 }, { "epoch": 4.24, "learning_rate": 7.65403576226693e-06, "loss": 0.1642, "step": 753600 }, { "epoch": 4.24, "learning_rate": 7.648414532004475e-06, "loss": 0.1698, "step": 753700 }, { "epoch": 4.24, "learning_rate": 7.64279330174202e-06, "loss": 0.1629, "step": 753800 }, { "epoch": 4.24, "learning_rate": 7.637172071479564e-06, "loss": 0.1665, "step": 753900 }, { "epoch": 4.24, "learning_rate": 7.63155084121711e-06, "loss": 0.1675, "step": 754000 }, { "epoch": 4.24, "learning_rate": 7.625929610954653e-06, "loss": 0.171, "step": 754100 }, { "epoch": 4.24, "learning_rate": 7.620308380692198e-06, "loss": 0.1654, "step": 754200 }, { "epoch": 4.24, "learning_rate": 7.614743362732368e-06, "loss": 0.1686, "step": 754300 }, { "epoch": 4.24, "learning_rate": 7.6091221324699135e-06, "loss": 0.1662, "step": 754400 }, { "epoch": 4.24, "learning_rate": 7.603500902207457e-06, "loss": 0.1661, "step": 754500 }, { "epoch": 4.24, "learning_rate": 7.597879671945002e-06, "loss": 0.1667, "step": 754600 }, { "epoch": 4.24, "learning_rate": 7.592258441682546e-06, "loss": 0.1628, "step": 754700 }, { "epoch": 4.24, "learning_rate": 7.586637211420092e-06, "loss": 0.1628, "step": 754800 }, { "epoch": 4.24, "learning_rate": 7.581015981157636e-06, "loss": 0.1636, "step": 754900 }, { "epoch": 4.24, "learning_rate": 7.575394750895182e-06, "loss": 0.1712, "step": 755000 }, { "epoch": 4.24, "learning_rate": 7.569773520632726e-06, "loss": 0.1671, "step": 755100 }, { "epoch": 4.25, "learning_rate": 7.564208502672896e-06, "loss": 0.1667, "step": 755200 }, { "epoch": 4.25, "learning_rate": 7.55858727241044e-06, "loss": 0.1621, "step": 755300 }, { "epoch": 4.25, "learning_rate": 7.552966042147985e-06, "loss": 0.16, "step": 755400 }, { "epoch": 4.25, "learning_rate": 7.54734481188553e-06, "loss": 0.168, "step": 755500 }, { "epoch": 4.25, "learning_rate": 7.541723581623074e-06, "loss": 0.1682, "step": 755600 }, { "epoch": 4.25, "learning_rate": 7.536102351360619e-06, "loss": 0.1648, "step": 755700 }, { "epoch": 4.25, "learning_rate": 7.530481121098164e-06, "loss": 0.1598, "step": 755800 }, { "epoch": 4.25, "learning_rate": 7.524859890835709e-06, "loss": 0.1681, "step": 755900 }, { "epoch": 4.25, "learning_rate": 7.519238660573254e-06, "loss": 0.1677, "step": 756000 }, { "epoch": 4.25, "learning_rate": 7.513617430310799e-06, "loss": 0.1677, "step": 756100 }, { "epoch": 4.25, "learning_rate": 7.507996200048342e-06, "loss": 0.1679, "step": 756200 }, { "epoch": 4.25, "learning_rate": 7.502374969785887e-06, "loss": 0.1663, "step": 756300 }, { "epoch": 4.25, "learning_rate": 7.4967537395234324e-06, "loss": 0.1712, "step": 756400 }, { "epoch": 4.25, "learning_rate": 7.491132509260977e-06, "loss": 0.1685, "step": 756500 }, { "epoch": 4.25, "learning_rate": 7.4855112789985224e-06, "loss": 0.1652, "step": 756600 }, { "epoch": 4.25, "learning_rate": 7.479890048736067e-06, "loss": 0.1667, "step": 756700 }, { "epoch": 4.25, "learning_rate": 7.4742688184736125e-06, "loss": 0.1622, "step": 756800 }, { "epoch": 4.25, "learning_rate": 7.468647588211155e-06, "loss": 0.1636, "step": 756900 }, { "epoch": 4.26, "learning_rate": 7.463026357948701e-06, "loss": 0.1673, "step": 757000 }, { "epoch": 4.26, "learning_rate": 7.457405127686245e-06, "loss": 0.1561, "step": 757100 }, { "epoch": 4.26, "learning_rate": 7.451783897423791e-06, "loss": 0.1683, "step": 757200 }, { "epoch": 4.26, "learning_rate": 7.446162667161335e-06, "loss": 0.1631, "step": 757300 }, { "epoch": 4.26, "learning_rate": 7.440541436898881e-06, "loss": 0.1629, "step": 757400 }, { "epoch": 4.26, "learning_rate": 7.434920206636425e-06, "loss": 0.164, "step": 757500 }, { "epoch": 4.26, "learning_rate": 7.429298976373969e-06, "loss": 0.1691, "step": 757600 }, { "epoch": 4.26, "learning_rate": 7.423677746111514e-06, "loss": 0.1718, "step": 757700 }, { "epoch": 4.26, "learning_rate": 7.418056515849059e-06, "loss": 0.1679, "step": 757800 }, { "epoch": 4.26, "learning_rate": 7.412435285586604e-06, "loss": 0.1645, "step": 757900 }, { "epoch": 4.26, "learning_rate": 7.406814055324149e-06, "loss": 0.1629, "step": 758000 }, { "epoch": 4.26, "learning_rate": 7.401192825061694e-06, "loss": 0.1664, "step": 758100 }, { "epoch": 4.26, "learning_rate": 7.395571594799239e-06, "loss": 0.1668, "step": 758200 }, { "epoch": 4.26, "learning_rate": 7.389950364536782e-06, "loss": 0.1618, "step": 758300 }, { "epoch": 4.26, "learning_rate": 7.3843291342743275e-06, "loss": 0.1642, "step": 758400 }, { "epoch": 4.26, "learning_rate": 7.378707904011872e-06, "loss": 0.1679, "step": 758500 }, { "epoch": 4.26, "learning_rate": 7.3730866737494175e-06, "loss": 0.1622, "step": 758600 }, { "epoch": 4.26, "learning_rate": 7.367465443486962e-06, "loss": 0.1606, "step": 758700 }, { "epoch": 4.27, "learning_rate": 7.3618442132245075e-06, "loss": 0.1582, "step": 758800 }, { "epoch": 4.27, "learning_rate": 7.35622298296205e-06, "loss": 0.1671, "step": 758900 }, { "epoch": 4.27, "learning_rate": 7.350601752699596e-06, "loss": 0.1643, "step": 759000 }, { "epoch": 4.27, "learning_rate": 7.34498052243714e-06, "loss": 0.1666, "step": 759100 }, { "epoch": 4.27, "learning_rate": 7.339359292174686e-06, "loss": 0.1662, "step": 759200 }, { "epoch": 4.27, "learning_rate": 7.3337380619122304e-06, "loss": 0.1654, "step": 759300 }, { "epoch": 4.27, "learning_rate": 7.328116831649776e-06, "loss": 0.1644, "step": 759400 }, { "epoch": 4.27, "learning_rate": 7.3224956013873204e-06, "loss": 0.163, "step": 759500 }, { "epoch": 4.27, "learning_rate": 7.316874371124864e-06, "loss": 0.1734, "step": 759600 }, { "epoch": 4.27, "learning_rate": 7.311253140862409e-06, "loss": 0.1612, "step": 759700 }, { "epoch": 4.27, "learning_rate": 7.305631910599954e-06, "loss": 0.1628, "step": 759800 }, { "epoch": 4.27, "learning_rate": 7.300010680337499e-06, "loss": 0.1646, "step": 759900 }, { "epoch": 4.27, "learning_rate": 7.294389450075044e-06, "loss": 0.1601, "step": 760000 }, { "epoch": 4.27, "learning_rate": 7.288768219812589e-06, "loss": 0.1643, "step": 760100 }, { "epoch": 4.27, "learning_rate": 7.283146989550134e-06, "loss": 0.1675, "step": 760200 }, { "epoch": 4.27, "learning_rate": 7.277525759287677e-06, "loss": 0.1598, "step": 760300 }, { "epoch": 4.27, "learning_rate": 7.2719045290252225e-06, "loss": 0.1645, "step": 760400 }, { "epoch": 4.27, "learning_rate": 7.266283298762767e-06, "loss": 0.1637, "step": 760500 }, { "epoch": 4.28, "learning_rate": 7.2606620685003125e-06, "loss": 0.1621, "step": 760600 }, { "epoch": 4.28, "learning_rate": 7.255040838237857e-06, "loss": 0.1631, "step": 760700 }, { "epoch": 4.28, "learning_rate": 7.2494196079754026e-06, "loss": 0.1693, "step": 760800 }, { "epoch": 4.28, "learning_rate": 7.243798377712947e-06, "loss": 0.1651, "step": 760900 }, { "epoch": 4.28, "learning_rate": 7.238177147450491e-06, "loss": 0.164, "step": 761000 }, { "epoch": 4.28, "learning_rate": 7.2325559171880355e-06, "loss": 0.1641, "step": 761100 }, { "epoch": 4.28, "learning_rate": 7.226934686925581e-06, "loss": 0.1616, "step": 761200 }, { "epoch": 4.28, "learning_rate": 7.2213134566631255e-06, "loss": 0.1656, "step": 761300 }, { "epoch": 4.28, "learning_rate": 7.215692226400671e-06, "loss": 0.1704, "step": 761400 }, { "epoch": 4.28, "learning_rate": 7.2100709961382155e-06, "loss": 0.1663, "step": 761500 }, { "epoch": 4.28, "learning_rate": 7.204449765875759e-06, "loss": 0.1628, "step": 761600 }, { "epoch": 4.28, "learning_rate": 7.198828535613304e-06, "loss": 0.1596, "step": 761700 }, { "epoch": 4.28, "learning_rate": 7.193207305350849e-06, "loss": 0.1634, "step": 761800 }, { "epoch": 4.28, "learning_rate": 7.187586075088394e-06, "loss": 0.1621, "step": 761900 }, { "epoch": 4.28, "learning_rate": 7.181964844825939e-06, "loss": 0.1643, "step": 762000 }, { "epoch": 4.28, "learning_rate": 7.176343614563484e-06, "loss": 0.1603, "step": 762100 }, { "epoch": 4.28, "learning_rate": 7.170722384301029e-06, "loss": 0.167, "step": 762200 }, { "epoch": 4.29, "learning_rate": 7.165101154038573e-06, "loss": 0.1663, "step": 762300 }, { "epoch": 4.29, "learning_rate": 7.159479923776118e-06, "loss": 0.1605, "step": 762400 }, { "epoch": 4.29, "learning_rate": 7.153858693513662e-06, "loss": 0.1612, "step": 762500 }, { "epoch": 4.29, "learning_rate": 7.148237463251208e-06, "loss": 0.165, "step": 762600 }, { "epoch": 4.29, "learning_rate": 7.142616232988752e-06, "loss": 0.1673, "step": 762700 }, { "epoch": 4.29, "learning_rate": 7.136995002726298e-06, "loss": 0.1618, "step": 762800 }, { "epoch": 4.29, "learning_rate": 7.131373772463842e-06, "loss": 0.1682, "step": 762900 }, { "epoch": 4.29, "learning_rate": 7.125752542201386e-06, "loss": 0.16, "step": 763000 }, { "epoch": 4.29, "learning_rate": 7.120131311938931e-06, "loss": 0.167, "step": 763100 }, { "epoch": 4.29, "learning_rate": 7.114566293979101e-06, "loss": 0.1635, "step": 763200 }, { "epoch": 4.29, "learning_rate": 7.108945063716646e-06, "loss": 0.1599, "step": 763300 }, { "epoch": 4.29, "learning_rate": 7.10332383345419e-06, "loss": 0.1612, "step": 763400 }, { "epoch": 4.29, "learning_rate": 7.0977026031917344e-06, "loss": 0.162, "step": 763500 }, { "epoch": 4.29, "learning_rate": 7.09208137292928e-06, "loss": 0.1628, "step": 763600 }, { "epoch": 4.29, "learning_rate": 7.0864601426668244e-06, "loss": 0.165, "step": 763700 }, { "epoch": 4.29, "learning_rate": 7.08083891240437e-06, "loss": 0.1687, "step": 763800 }, { "epoch": 4.29, "learning_rate": 7.0752176821419145e-06, "loss": 0.1669, "step": 763900 }, { "epoch": 4.29, "learning_rate": 7.069596451879458e-06, "loss": 0.1658, "step": 764000 }, { "epoch": 4.3, "learning_rate": 7.063975221617003e-06, "loss": 0.1627, "step": 764100 }, { "epoch": 4.3, "learning_rate": 7.058353991354548e-06, "loss": 0.1684, "step": 764200 }, { "epoch": 4.3, "learning_rate": 7.052732761092093e-06, "loss": 0.1657, "step": 764300 }, { "epoch": 4.3, "learning_rate": 7.047111530829638e-06, "loss": 0.167, "step": 764400 }, { "epoch": 4.3, "learning_rate": 7.041490300567183e-06, "loss": 0.1653, "step": 764500 }, { "epoch": 4.3, "learning_rate": 7.035869070304727e-06, "loss": 0.1664, "step": 764600 }, { "epoch": 4.3, "learning_rate": 7.030247840042271e-06, "loss": 0.1724, "step": 764700 }, { "epoch": 4.3, "learning_rate": 7.0246266097798166e-06, "loss": 0.1648, "step": 764800 }, { "epoch": 4.3, "learning_rate": 7.019005379517361e-06, "loss": 0.1638, "step": 764900 }, { "epoch": 4.3, "learning_rate": 7.0133841492549066e-06, "loss": 0.1636, "step": 765000 }, { "epoch": 4.3, "learning_rate": 7.007762918992451e-06, "loss": 0.1655, "step": 765100 }, { "epoch": 4.3, "learning_rate": 7.002141688729997e-06, "loss": 0.1658, "step": 765200 }, { "epoch": 4.3, "learning_rate": 6.996520458467541e-06, "loss": 0.1622, "step": 765300 }, { "epoch": 4.3, "learning_rate": 6.990899228205085e-06, "loss": 0.1621, "step": 765400 }, { "epoch": 4.3, "learning_rate": 6.985334210245255e-06, "loss": 0.166, "step": 765500 }, { "epoch": 4.3, "learning_rate": 6.9797129799828e-06, "loss": 0.1655, "step": 765600 }, { "epoch": 4.3, "learning_rate": 6.974091749720345e-06, "loss": 0.1649, "step": 765700 }, { "epoch": 4.3, "learning_rate": 6.968470519457888e-06, "loss": 0.168, "step": 765800 }, { "epoch": 4.31, "learning_rate": 6.962849289195433e-06, "loss": 0.1653, "step": 765900 }, { "epoch": 4.31, "learning_rate": 6.957228058932978e-06, "loss": 0.1698, "step": 766000 }, { "epoch": 4.31, "learning_rate": 6.951606828670523e-06, "loss": 0.1646, "step": 766100 }, { "epoch": 4.31, "learning_rate": 6.945985598408068e-06, "loss": 0.1624, "step": 766200 }, { "epoch": 4.31, "learning_rate": 6.9403643681456134e-06, "loss": 0.1633, "step": 766300 }, { "epoch": 4.31, "learning_rate": 6.934743137883157e-06, "loss": 0.1595, "step": 766400 }, { "epoch": 4.31, "learning_rate": 6.929121907620702e-06, "loss": 0.1693, "step": 766500 }, { "epoch": 4.31, "learning_rate": 6.923500677358246e-06, "loss": 0.1603, "step": 766600 }, { "epoch": 4.31, "learning_rate": 6.917879447095792e-06, "loss": 0.1667, "step": 766700 }, { "epoch": 4.31, "learning_rate": 6.912258216833336e-06, "loss": 0.1674, "step": 766800 }, { "epoch": 4.31, "learning_rate": 6.906636986570882e-06, "loss": 0.1633, "step": 766900 }, { "epoch": 4.31, "learning_rate": 6.901015756308426e-06, "loss": 0.1615, "step": 767000 }, { "epoch": 4.31, "learning_rate": 6.89539452604597e-06, "loss": 0.1599, "step": 767100 }, { "epoch": 4.31, "learning_rate": 6.8897732957835155e-06, "loss": 0.1663, "step": 767200 }, { "epoch": 4.31, "learning_rate": 6.88415206552106e-06, "loss": 0.1649, "step": 767300 }, { "epoch": 4.31, "learning_rate": 6.878530835258605e-06, "loss": 0.1662, "step": 767400 }, { "epoch": 4.31, "learning_rate": 6.87290960499615e-06, "loss": 0.1671, "step": 767500 }, { "epoch": 4.31, "learning_rate": 6.867344587036319e-06, "loss": 0.1627, "step": 767600 }, { "epoch": 4.32, "learning_rate": 6.861723356773864e-06, "loss": 0.1651, "step": 767700 }, { "epoch": 4.32, "learning_rate": 6.856158338814034e-06, "loss": 0.1656, "step": 767800 }, { "epoch": 4.32, "learning_rate": 6.850537108551577e-06, "loss": 0.1642, "step": 767900 }, { "epoch": 4.32, "learning_rate": 6.8449158782891225e-06, "loss": 0.161, "step": 768000 }, { "epoch": 4.32, "learning_rate": 6.839294648026667e-06, "loss": 0.1637, "step": 768100 }, { "epoch": 4.32, "learning_rate": 6.8336734177642125e-06, "loss": 0.1655, "step": 768200 }, { "epoch": 4.32, "learning_rate": 6.828052187501757e-06, "loss": 0.1643, "step": 768300 }, { "epoch": 4.32, "learning_rate": 6.822487169541926e-06, "loss": 0.1628, "step": 768400 }, { "epoch": 4.32, "learning_rate": 6.816865939279471e-06, "loss": 0.1653, "step": 768500 }, { "epoch": 4.32, "learning_rate": 6.811244709017016e-06, "loss": 0.1582, "step": 768600 }, { "epoch": 4.32, "learning_rate": 6.805623478754561e-06, "loss": 0.1689, "step": 768700 }, { "epoch": 4.32, "learning_rate": 6.800002248492106e-06, "loss": 0.1662, "step": 768800 }, { "epoch": 4.32, "learning_rate": 6.794381018229649e-06, "loss": 0.1659, "step": 768900 }, { "epoch": 4.32, "learning_rate": 6.788759787967195e-06, "loss": 0.164, "step": 769000 }, { "epoch": 4.32, "learning_rate": 6.783138557704739e-06, "loss": 0.1632, "step": 769100 }, { "epoch": 4.32, "learning_rate": 6.777517327442284e-06, "loss": 0.1635, "step": 769200 }, { "epoch": 4.32, "learning_rate": 6.771896097179829e-06, "loss": 0.1636, "step": 769300 }, { "epoch": 4.32, "learning_rate": 6.766274866917374e-06, "loss": 0.1646, "step": 769400 }, { "epoch": 4.33, "learning_rate": 6.760653636654919e-06, "loss": 0.1663, "step": 769500 }, { "epoch": 4.33, "learning_rate": 6.755032406392463e-06, "loss": 0.1662, "step": 769600 }, { "epoch": 4.33, "learning_rate": 6.749411176130008e-06, "loss": 0.1602, "step": 769700 }, { "epoch": 4.33, "learning_rate": 6.743789945867552e-06, "loss": 0.1671, "step": 769800 }, { "epoch": 4.33, "learning_rate": 6.738168715605098e-06, "loss": 0.1653, "step": 769900 }, { "epoch": 4.33, "learning_rate": 6.732547485342642e-06, "loss": 0.1628, "step": 770000 }, { "epoch": 4.33, "learning_rate": 6.726926255080188e-06, "loss": 0.1597, "step": 770100 }, { "epoch": 4.33, "learning_rate": 6.721305024817732e-06, "loss": 0.1609, "step": 770200 }, { "epoch": 4.33, "learning_rate": 6.715683794555276e-06, "loss": 0.1677, "step": 770300 }, { "epoch": 4.33, "learning_rate": 6.7100625642928215e-06, "loss": 0.1597, "step": 770400 }, { "epoch": 4.33, "learning_rate": 6.704441334030366e-06, "loss": 0.1663, "step": 770500 }, { "epoch": 4.33, "learning_rate": 6.698820103767911e-06, "loss": 0.1642, "step": 770600 }, { "epoch": 4.33, "learning_rate": 6.693198873505456e-06, "loss": 0.1667, "step": 770700 }, { "epoch": 4.33, "learning_rate": 6.687577643243001e-06, "loss": 0.1655, "step": 770800 }, { "epoch": 4.33, "learning_rate": 6.681956412980546e-06, "loss": 0.1687, "step": 770900 }, { "epoch": 4.33, "learning_rate": 6.67633518271809e-06, "loss": 0.1642, "step": 771000 }, { "epoch": 4.33, "learning_rate": 6.670713952455634e-06, "loss": 0.1666, "step": 771100 }, { "epoch": 4.34, "learning_rate": 6.66509272219318e-06, "loss": 0.1636, "step": 771200 }, { "epoch": 4.34, "learning_rate": 6.6594714919307244e-06, "loss": 0.1635, "step": 771300 }, { "epoch": 4.34, "learning_rate": 6.653850261668269e-06, "loss": 0.163, "step": 771400 }, { "epoch": 4.34, "learning_rate": 6.6482290314058144e-06, "loss": 0.1608, "step": 771500 }, { "epoch": 4.34, "learning_rate": 6.642607801143358e-06, "loss": 0.1597, "step": 771600 }, { "epoch": 4.34, "learning_rate": 6.636986570880903e-06, "loss": 0.1641, "step": 771700 }, { "epoch": 4.34, "learning_rate": 6.631365340618448e-06, "loss": 0.1658, "step": 771800 }, { "epoch": 4.34, "learning_rate": 6.625744110355993e-06, "loss": 0.1681, "step": 771900 }, { "epoch": 4.34, "learning_rate": 6.620122880093537e-06, "loss": 0.1639, "step": 772000 }, { "epoch": 4.34, "learning_rate": 6.614501649831083e-06, "loss": 0.1684, "step": 772100 }, { "epoch": 4.34, "learning_rate": 6.608880419568627e-06, "loss": 0.1693, "step": 772200 }, { "epoch": 4.34, "learning_rate": 6.603259189306171e-06, "loss": 0.1715, "step": 772300 }, { "epoch": 4.34, "learning_rate": 6.597694171346341e-06, "loss": 0.1653, "step": 772400 }, { "epoch": 4.34, "learning_rate": 6.592072941083887e-06, "loss": 0.1622, "step": 772500 }, { "epoch": 4.34, "learning_rate": 6.586451710821431e-06, "loss": 0.1686, "step": 772600 }, { "epoch": 4.34, "learning_rate": 6.580830480558975e-06, "loss": 0.167, "step": 772700 }, { "epoch": 4.34, "learning_rate": 6.57520925029652e-06, "loss": 0.1645, "step": 772800 }, { "epoch": 4.34, "learning_rate": 6.569588020034065e-06, "loss": 0.1682, "step": 772900 }, { "epoch": 4.35, "learning_rate": 6.56396678977161e-06, "loss": 0.1649, "step": 773000 }, { "epoch": 4.35, "learning_rate": 6.558345559509155e-06, "loss": 0.1629, "step": 773100 }, { "epoch": 4.35, "learning_rate": 6.5527243292467e-06, "loss": 0.162, "step": 773200 }, { "epoch": 4.35, "learning_rate": 6.547103098984245e-06, "loss": 0.1649, "step": 773300 }, { "epoch": 4.35, "learning_rate": 6.541481868721788e-06, "loss": 0.1666, "step": 773400 }, { "epoch": 4.35, "learning_rate": 6.535860638459333e-06, "loss": 0.1667, "step": 773500 }, { "epoch": 4.35, "learning_rate": 6.530239408196878e-06, "loss": 0.1618, "step": 773600 }, { "epoch": 4.35, "learning_rate": 6.524618177934423e-06, "loss": 0.1621, "step": 773700 }, { "epoch": 4.35, "learning_rate": 6.518996947671968e-06, "loss": 0.1658, "step": 773800 }, { "epoch": 4.35, "learning_rate": 6.513375717409513e-06, "loss": 0.1608, "step": 773900 }, { "epoch": 4.35, "learning_rate": 6.507754487147056e-06, "loss": 0.1632, "step": 774000 }, { "epoch": 4.35, "learning_rate": 6.502133256884602e-06, "loss": 0.1634, "step": 774100 }, { "epoch": 4.35, "learning_rate": 6.496512026622146e-06, "loss": 0.1609, "step": 774200 }, { "epoch": 4.35, "learning_rate": 6.490890796359692e-06, "loss": 0.1666, "step": 774300 }, { "epoch": 4.35, "learning_rate": 6.485269566097236e-06, "loss": 0.1622, "step": 774400 }, { "epoch": 4.35, "learning_rate": 6.479648335834782e-06, "loss": 0.167, "step": 774500 }, { "epoch": 4.35, "learning_rate": 6.474027105572326e-06, "loss": 0.1682, "step": 774600 }, { "epoch": 4.35, "learning_rate": 6.46840587530987e-06, "loss": 0.1676, "step": 774700 }, { "epoch": 4.36, "learning_rate": 6.462784645047415e-06, "loss": 0.1663, "step": 774800 }, { "epoch": 4.36, "learning_rate": 6.45716341478496e-06, "loss": 0.164, "step": 774900 }, { "epoch": 4.36, "learning_rate": 6.451542184522505e-06, "loss": 0.1664, "step": 775000 }, { "epoch": 4.36, "learning_rate": 6.44592095426005e-06, "loss": 0.1669, "step": 775100 }, { "epoch": 4.36, "learning_rate": 6.440299723997595e-06, "loss": 0.168, "step": 775200 }, { "epoch": 4.36, "learning_rate": 6.43467849373514e-06, "loss": 0.1684, "step": 775300 }, { "epoch": 4.36, "learning_rate": 6.429057263472683e-06, "loss": 0.1677, "step": 775400 }, { "epoch": 4.36, "learning_rate": 6.4234360332102284e-06, "loss": 0.1611, "step": 775500 }, { "epoch": 4.36, "learning_rate": 6.417814802947773e-06, "loss": 0.1623, "step": 775600 }, { "epoch": 4.36, "learning_rate": 6.4121935726853184e-06, "loss": 0.1607, "step": 775700 }, { "epoch": 4.36, "learning_rate": 6.406572342422863e-06, "loss": 0.164, "step": 775800 }, { "epoch": 4.36, "learning_rate": 6.4009511121604085e-06, "loss": 0.1659, "step": 775900 }, { "epoch": 4.36, "learning_rate": 6.395386094200577e-06, "loss": 0.1661, "step": 776000 }, { "epoch": 4.36, "learning_rate": 6.3897648639381215e-06, "loss": 0.1616, "step": 776100 }, { "epoch": 4.36, "learning_rate": 6.384143633675667e-06, "loss": 0.1638, "step": 776200 }, { "epoch": 4.36, "learning_rate": 6.3785224034132115e-06, "loss": 0.1635, "step": 776300 }, { "epoch": 4.36, "learning_rate": 6.372901173150755e-06, "loss": 0.1704, "step": 776400 }, { "epoch": 4.36, "learning_rate": 6.367279942888301e-06, "loss": 0.1578, "step": 776500 }, { "epoch": 4.37, "learning_rate": 6.361658712625845e-06, "loss": 0.166, "step": 776600 }, { "epoch": 4.37, "learning_rate": 6.356037482363391e-06, "loss": 0.1623, "step": 776700 }, { "epoch": 4.37, "learning_rate": 6.350416252100935e-06, "loss": 0.1648, "step": 776800 }, { "epoch": 4.37, "learning_rate": 6.34479502183848e-06, "loss": 0.1644, "step": 776900 }, { "epoch": 4.37, "learning_rate": 6.339173791576025e-06, "loss": 0.1589, "step": 777000 }, { "epoch": 4.37, "learning_rate": 6.333552561313569e-06, "loss": 0.1618, "step": 777100 }, { "epoch": 4.37, "learning_rate": 6.327931331051114e-06, "loss": 0.1605, "step": 777200 }, { "epoch": 4.37, "learning_rate": 6.322310100788659e-06, "loss": 0.1686, "step": 777300 }, { "epoch": 4.37, "learning_rate": 6.316688870526204e-06, "loss": 0.1638, "step": 777400 }, { "epoch": 4.37, "learning_rate": 6.311067640263748e-06, "loss": 0.1673, "step": 777500 }, { "epoch": 4.37, "learning_rate": 6.305446410001294e-06, "loss": 0.1703, "step": 777600 }, { "epoch": 4.37, "learning_rate": 6.299825179738838e-06, "loss": 0.1648, "step": 777700 }, { "epoch": 4.37, "learning_rate": 6.294203949476382e-06, "loss": 0.1682, "step": 777800 }, { "epoch": 4.37, "learning_rate": 6.288582719213927e-06, "loss": 0.1617, "step": 777900 }, { "epoch": 4.37, "learning_rate": 6.282961488951472e-06, "loss": 0.1636, "step": 778000 }, { "epoch": 4.37, "learning_rate": 6.277340258689017e-06, "loss": 0.1611, "step": 778100 }, { "epoch": 4.37, "learning_rate": 6.271719028426562e-06, "loss": 0.1624, "step": 778200 }, { "epoch": 4.38, "learning_rate": 6.266097798164107e-06, "loss": 0.1675, "step": 778300 }, { "epoch": 4.38, "learning_rate": 6.260476567901652e-06, "loss": 0.1644, "step": 778400 }, { "epoch": 4.38, "learning_rate": 6.254855337639196e-06, "loss": 0.1659, "step": 778500 }, { "epoch": 4.38, "learning_rate": 6.24923410737674e-06, "loss": 0.1617, "step": 778600 }, { "epoch": 4.38, "learning_rate": 6.243612877114286e-06, "loss": 0.1632, "step": 778700 }, { "epoch": 4.38, "learning_rate": 6.23799164685183e-06, "loss": 0.1714, "step": 778800 }, { "epoch": 4.38, "learning_rate": 6.232370416589376e-06, "loss": 0.1658, "step": 778900 }, { "epoch": 4.38, "learning_rate": 6.2267491863269195e-06, "loss": 0.1648, "step": 779000 }, { "epoch": 4.38, "learning_rate": 6.221127956064465e-06, "loss": 0.1656, "step": 779100 }, { "epoch": 4.38, "learning_rate": 6.2155067258020095e-06, "loss": 0.16, "step": 779200 }, { "epoch": 4.38, "learning_rate": 6.209885495539554e-06, "loss": 0.1625, "step": 779300 }, { "epoch": 4.38, "learning_rate": 6.204264265277099e-06, "loss": 0.1618, "step": 779400 }, { "epoch": 4.38, "learning_rate": 6.198643035014644e-06, "loss": 0.1649, "step": 779500 }, { "epoch": 4.38, "learning_rate": 6.193021804752188e-06, "loss": 0.1681, "step": 779600 }, { "epoch": 4.38, "learning_rate": 6.187400574489733e-06, "loss": 0.1626, "step": 779700 }, { "epoch": 4.38, "learning_rate": 6.181779344227278e-06, "loss": 0.1643, "step": 779800 }, { "epoch": 4.38, "learning_rate": 6.176158113964823e-06, "loss": 0.1707, "step": 779900 }, { "epoch": 4.38, "learning_rate": 6.170593096004992e-06, "loss": 0.1617, "step": 780000 }, { "epoch": 4.39, "learning_rate": 6.164971865742536e-06, "loss": 0.164, "step": 780100 }, { "epoch": 4.39, "learning_rate": 6.159350635480082e-06, "loss": 0.1655, "step": 780200 }, { "epoch": 4.39, "learning_rate": 6.153729405217626e-06, "loss": 0.168, "step": 780300 }, { "epoch": 4.39, "learning_rate": 6.148108174955171e-06, "loss": 0.1689, "step": 780400 }, { "epoch": 4.39, "learning_rate": 6.1424869446927155e-06, "loss": 0.1602, "step": 780500 }, { "epoch": 4.39, "learning_rate": 6.136865714430261e-06, "loss": 0.1634, "step": 780600 }, { "epoch": 4.39, "learning_rate": 6.131244484167805e-06, "loss": 0.1598, "step": 780700 }, { "epoch": 4.39, "learning_rate": 6.125679466207975e-06, "loss": 0.1641, "step": 780800 }, { "epoch": 4.39, "learning_rate": 6.1200582359455195e-06, "loss": 0.1655, "step": 780900 }, { "epoch": 4.39, "learning_rate": 6.114437005683064e-06, "loss": 0.1619, "step": 781000 }, { "epoch": 4.39, "learning_rate": 6.108815775420609e-06, "loss": 0.1626, "step": 781100 }, { "epoch": 4.39, "learning_rate": 6.103194545158153e-06, "loss": 0.1646, "step": 781200 }, { "epoch": 4.39, "learning_rate": 6.097573314895699e-06, "loss": 0.161, "step": 781300 }, { "epoch": 4.39, "learning_rate": 6.091952084633243e-06, "loss": 0.163, "step": 781400 }, { "epoch": 4.39, "learning_rate": 6.086330854370788e-06, "loss": 0.1639, "step": 781500 }, { "epoch": 4.39, "learning_rate": 6.080709624108332e-06, "loss": 0.163, "step": 781600 }, { "epoch": 4.39, "learning_rate": 6.075088393845878e-06, "loss": 0.1605, "step": 781700 }, { "epoch": 4.39, "learning_rate": 6.069467163583422e-06, "loss": 0.1711, "step": 781800 }, { "epoch": 4.4, "learning_rate": 6.063845933320967e-06, "loss": 0.1638, "step": 781900 }, { "epoch": 4.4, "learning_rate": 6.0582247030585116e-06, "loss": 0.1635, "step": 782000 }, { "epoch": 4.4, "learning_rate": 6.052603472796056e-06, "loss": 0.164, "step": 782100 }, { "epoch": 4.4, "learning_rate": 6.0469822425336016e-06, "loss": 0.1637, "step": 782200 }, { "epoch": 4.4, "learning_rate": 6.041361012271146e-06, "loss": 0.162, "step": 782300 }, { "epoch": 4.4, "learning_rate": 6.035739782008691e-06, "loss": 0.166, "step": 782400 }, { "epoch": 4.4, "learning_rate": 6.030118551746235e-06, "loss": 0.1616, "step": 782500 }, { "epoch": 4.4, "learning_rate": 6.024497321483781e-06, "loss": 0.1667, "step": 782600 }, { "epoch": 4.4, "learning_rate": 6.018876091221325e-06, "loss": 0.165, "step": 782700 }, { "epoch": 4.4, "learning_rate": 6.01325486095887e-06, "loss": 0.1675, "step": 782800 }, { "epoch": 4.4, "learning_rate": 6.0076336306964145e-06, "loss": 0.1665, "step": 782900 }, { "epoch": 4.4, "learning_rate": 6.00201240043396e-06, "loss": 0.1655, "step": 783000 }, { "epoch": 4.4, "learning_rate": 5.996447382474128e-06, "loss": 0.1688, "step": 783100 }, { "epoch": 4.4, "learning_rate": 5.990826152211673e-06, "loss": 0.1633, "step": 783200 }, { "epoch": 4.4, "learning_rate": 5.985204921949218e-06, "loss": 0.1691, "step": 783300 }, { "epoch": 4.4, "learning_rate": 5.979583691686763e-06, "loss": 0.1674, "step": 783400 }, { "epoch": 4.4, "learning_rate": 5.973962461424308e-06, "loss": 0.1661, "step": 783500 }, { "epoch": 4.4, "learning_rate": 5.968341231161852e-06, "loss": 0.1654, "step": 783600 }, { "epoch": 4.41, "learning_rate": 5.962720000899398e-06, "loss": 0.1646, "step": 783700 }, { "epoch": 4.41, "learning_rate": 5.957098770636941e-06, "loss": 0.1619, "step": 783800 }, { "epoch": 4.41, "learning_rate": 5.951477540374487e-06, "loss": 0.1609, "step": 783900 }, { "epoch": 4.41, "learning_rate": 5.945856310112031e-06, "loss": 0.1626, "step": 784000 }, { "epoch": 4.41, "learning_rate": 5.940235079849577e-06, "loss": 0.1624, "step": 784100 }, { "epoch": 4.41, "learning_rate": 5.9346138495871205e-06, "loss": 0.168, "step": 784200 }, { "epoch": 4.41, "learning_rate": 5.928992619324666e-06, "loss": 0.1602, "step": 784300 }, { "epoch": 4.41, "learning_rate": 5.9233713890622105e-06, "loss": 0.1612, "step": 784400 }, { "epoch": 4.41, "learning_rate": 5.917750158799755e-06, "loss": 0.1602, "step": 784500 }, { "epoch": 4.41, "learning_rate": 5.9121289285373e-06, "loss": 0.1678, "step": 784600 }, { "epoch": 4.41, "learning_rate": 5.906507698274845e-06, "loss": 0.1622, "step": 784700 }, { "epoch": 4.41, "learning_rate": 5.900886468012389e-06, "loss": 0.1628, "step": 784800 }, { "epoch": 4.41, "learning_rate": 5.895265237749934e-06, "loss": 0.1678, "step": 784900 }, { "epoch": 4.41, "learning_rate": 5.889644007487479e-06, "loss": 0.1624, "step": 785000 }, { "epoch": 4.41, "learning_rate": 5.884022777225024e-06, "loss": 0.1685, "step": 785100 }, { "epoch": 4.41, "learning_rate": 5.878401546962568e-06, "loss": 0.1663, "step": 785200 }, { "epoch": 4.41, "learning_rate": 5.8727803167001135e-06, "loss": 0.165, "step": 785300 }, { "epoch": 4.41, "learning_rate": 5.867159086437658e-06, "loss": 0.1587, "step": 785400 }, { "epoch": 4.42, "learning_rate": 5.861537856175203e-06, "loss": 0.1592, "step": 785500 }, { "epoch": 4.42, "learning_rate": 5.855916625912747e-06, "loss": 0.1631, "step": 785600 }, { "epoch": 4.42, "learning_rate": 5.850295395650293e-06, "loss": 0.1649, "step": 785700 }, { "epoch": 4.42, "learning_rate": 5.844674165387836e-06, "loss": 0.1643, "step": 785800 }, { "epoch": 4.42, "learning_rate": 5.839052935125382e-06, "loss": 0.1652, "step": 785900 }, { "epoch": 4.42, "learning_rate": 5.833431704862926e-06, "loss": 0.1656, "step": 786000 }, { "epoch": 4.42, "learning_rate": 5.827810474600472e-06, "loss": 0.1662, "step": 786100 }, { "epoch": 4.42, "learning_rate": 5.8221892443380156e-06, "loss": 0.1633, "step": 786200 }, { "epoch": 4.42, "learning_rate": 5.816568014075561e-06, "loss": 0.1653, "step": 786300 }, { "epoch": 4.42, "learning_rate": 5.810946783813106e-06, "loss": 0.1633, "step": 786400 }, { "epoch": 4.42, "learning_rate": 5.80532555355065e-06, "loss": 0.1618, "step": 786500 }, { "epoch": 4.42, "learning_rate": 5.799704323288195e-06, "loss": 0.1658, "step": 786600 }, { "epoch": 4.42, "learning_rate": 5.79408309302574e-06, "loss": 0.1677, "step": 786700 }, { "epoch": 4.42, "learning_rate": 5.788461862763284e-06, "loss": 0.1661, "step": 786800 }, { "epoch": 4.42, "learning_rate": 5.782840632500829e-06, "loss": 0.1685, "step": 786900 }, { "epoch": 4.42, "learning_rate": 5.777219402238374e-06, "loss": 0.1646, "step": 787000 }, { "epoch": 4.42, "learning_rate": 5.771654384278543e-06, "loss": 0.1623, "step": 787100 }, { "epoch": 4.43, "learning_rate": 5.766033154016088e-06, "loss": 0.1675, "step": 787200 }, { "epoch": 4.43, "learning_rate": 5.760411923753633e-06, "loss": 0.161, "step": 787300 }, { "epoch": 4.43, "learning_rate": 5.754846905793803e-06, "loss": 0.1653, "step": 787400 }, { "epoch": 4.43, "learning_rate": 5.749225675531347e-06, "loss": 0.1627, "step": 787500 }, { "epoch": 4.43, "learning_rate": 5.743604445268892e-06, "loss": 0.1599, "step": 787600 }, { "epoch": 4.43, "learning_rate": 5.737983215006436e-06, "loss": 0.165, "step": 787700 }, { "epoch": 4.43, "learning_rate": 5.732361984743982e-06, "loss": 0.1631, "step": 787800 }, { "epoch": 4.43, "learning_rate": 5.726740754481526e-06, "loss": 0.1671, "step": 787900 }, { "epoch": 4.43, "learning_rate": 5.721119524219071e-06, "loss": 0.1642, "step": 788000 }, { "epoch": 4.43, "learning_rate": 5.7154982939566155e-06, "loss": 0.1651, "step": 788100 }, { "epoch": 4.43, "learning_rate": 5.709877063694161e-06, "loss": 0.158, "step": 788200 }, { "epoch": 4.43, "learning_rate": 5.704255833431705e-06, "loss": 0.1639, "step": 788300 }, { "epoch": 4.43, "learning_rate": 5.69863460316925e-06, "loss": 0.1648, "step": 788400 }, { "epoch": 4.43, "learning_rate": 5.693013372906795e-06, "loss": 0.1625, "step": 788500 }, { "epoch": 4.43, "learning_rate": 5.687392142644339e-06, "loss": 0.169, "step": 788600 }, { "epoch": 4.43, "learning_rate": 5.681770912381884e-06, "loss": 0.1579, "step": 788700 }, { "epoch": 4.43, "learning_rate": 5.676149682119429e-06, "loss": 0.1605, "step": 788800 }, { "epoch": 4.43, "learning_rate": 5.670528451856974e-06, "loss": 0.1604, "step": 788900 }, { "epoch": 4.44, "learning_rate": 5.6649072215945185e-06, "loss": 0.1625, "step": 789000 }, { "epoch": 4.44, "learning_rate": 5.659285991332063e-06, "loss": 0.1682, "step": 789100 }, { "epoch": 4.44, "learning_rate": 5.6536647610696085e-06, "loss": 0.1604, "step": 789200 }, { "epoch": 4.44, "learning_rate": 5.648043530807152e-06, "loss": 0.1624, "step": 789300 }, { "epoch": 4.44, "learning_rate": 5.642422300544698e-06, "loss": 0.1628, "step": 789400 }, { "epoch": 4.44, "learning_rate": 5.636801070282242e-06, "loss": 0.1665, "step": 789500 }, { "epoch": 4.44, "learning_rate": 5.6312360523224115e-06, "loss": 0.1678, "step": 789600 }, { "epoch": 4.44, "learning_rate": 5.625614822059956e-06, "loss": 0.165, "step": 789700 }, { "epoch": 4.44, "learning_rate": 5.619993591797501e-06, "loss": 0.1604, "step": 789800 }, { "epoch": 4.44, "learning_rate": 5.614372361535046e-06, "loss": 0.1689, "step": 789900 }, { "epoch": 4.44, "learning_rate": 5.608751131272591e-06, "loss": 0.1612, "step": 790000 }, { "epoch": 4.44, "learning_rate": 5.603129901010135e-06, "loss": 0.1626, "step": 790100 }, { "epoch": 4.44, "learning_rate": 5.59750867074768e-06, "loss": 0.1631, "step": 790200 }, { "epoch": 4.44, "learning_rate": 5.591887440485225e-06, "loss": 0.1589, "step": 790300 }, { "epoch": 4.44, "learning_rate": 5.586266210222769e-06, "loss": 0.1612, "step": 790400 }, { "epoch": 4.44, "learning_rate": 5.5806449799603145e-06, "loss": 0.1653, "step": 790500 }, { "epoch": 4.44, "learning_rate": 5.575023749697859e-06, "loss": 0.1605, "step": 790600 }, { "epoch": 4.44, "learning_rate": 5.569402519435404e-06, "loss": 0.1607, "step": 790700 }, { "epoch": 4.45, "learning_rate": 5.563781289172948e-06, "loss": 0.1681, "step": 790800 }, { "epoch": 4.45, "learning_rate": 5.558160058910494e-06, "loss": 0.1632, "step": 790900 }, { "epoch": 4.45, "learning_rate": 5.552538828648038e-06, "loss": 0.1636, "step": 791000 }, { "epoch": 4.45, "learning_rate": 5.546917598385583e-06, "loss": 0.1626, "step": 791100 }, { "epoch": 4.45, "learning_rate": 5.541296368123127e-06, "loss": 0.165, "step": 791200 }, { "epoch": 4.45, "learning_rate": 5.535675137860673e-06, "loss": 0.1659, "step": 791300 }, { "epoch": 4.45, "learning_rate": 5.5300539075982174e-06, "loss": 0.166, "step": 791400 }, { "epoch": 4.45, "learning_rate": 5.524432677335762e-06, "loss": 0.1634, "step": 791500 }, { "epoch": 4.45, "learning_rate": 5.518811447073307e-06, "loss": 0.1672, "step": 791600 }, { "epoch": 4.45, "learning_rate": 5.513190216810851e-06, "loss": 0.1654, "step": 791700 }, { "epoch": 4.45, "learning_rate": 5.507568986548397e-06, "loss": 0.1653, "step": 791800 }, { "epoch": 4.45, "learning_rate": 5.501947756285941e-06, "loss": 0.1622, "step": 791900 }, { "epoch": 4.45, "learning_rate": 5.496326526023486e-06, "loss": 0.1638, "step": 792000 }, { "epoch": 4.45, "learning_rate": 5.49070529576103e-06, "loss": 0.1638, "step": 792100 }, { "epoch": 4.45, "learning_rate": 5.485084065498576e-06, "loss": 0.1638, "step": 792200 }, { "epoch": 4.45, "learning_rate": 5.47946283523612e-06, "loss": 0.1627, "step": 792300 }, { "epoch": 4.45, "learning_rate": 5.473841604973665e-06, "loss": 0.1622, "step": 792400 }, { "epoch": 4.45, "learning_rate": 5.4682203747112095e-06, "loss": 0.1637, "step": 792500 }, { "epoch": 4.46, "learning_rate": 5.462599144448754e-06, "loss": 0.1695, "step": 792600 }, { "epoch": 4.46, "learning_rate": 5.456977914186299e-06, "loss": 0.1685, "step": 792700 }, { "epoch": 4.46, "learning_rate": 5.451356683923844e-06, "loss": 0.1644, "step": 792800 }, { "epoch": 4.46, "learning_rate": 5.445735453661389e-06, "loss": 0.161, "step": 792900 }, { "epoch": 4.46, "learning_rate": 5.440114223398933e-06, "loss": 0.165, "step": 793000 }, { "epoch": 4.46, "learning_rate": 5.434492993136478e-06, "loss": 0.1621, "step": 793100 }, { "epoch": 4.46, "learning_rate": 5.428871762874023e-06, "loss": 0.1639, "step": 793200 }, { "epoch": 4.46, "learning_rate": 5.423250532611568e-06, "loss": 0.1664, "step": 793300 }, { "epoch": 4.46, "learning_rate": 5.4176293023491125e-06, "loss": 0.1642, "step": 793400 }, { "epoch": 4.46, "learning_rate": 5.412008072086657e-06, "loss": 0.1617, "step": 793500 }, { "epoch": 4.46, "learning_rate": 5.4063868418242025e-06, "loss": 0.1636, "step": 793600 }, { "epoch": 4.46, "learning_rate": 5.400821823864372e-06, "loss": 0.1676, "step": 793700 }, { "epoch": 4.46, "learning_rate": 5.3952005936019155e-06, "loss": 0.1694, "step": 793800 }, { "epoch": 4.46, "learning_rate": 5.389579363339461e-06, "loss": 0.1625, "step": 793900 }, { "epoch": 4.46, "learning_rate": 5.3839581330770056e-06, "loss": 0.1607, "step": 794000 }, { "epoch": 4.46, "learning_rate": 5.37833690281455e-06, "loss": 0.1634, "step": 794100 }, { "epoch": 4.46, "learning_rate": 5.372715672552095e-06, "loss": 0.1634, "step": 794200 }, { "epoch": 4.46, "learning_rate": 5.36709444228964e-06, "loss": 0.164, "step": 794300 }, { "epoch": 4.47, "learning_rate": 5.361473212027184e-06, "loss": 0.1626, "step": 794400 }, { "epoch": 4.47, "learning_rate": 5.355851981764729e-06, "loss": 0.1644, "step": 794500 }, { "epoch": 4.47, "learning_rate": 5.350230751502274e-06, "loss": 0.1681, "step": 794600 }, { "epoch": 4.47, "learning_rate": 5.344609521239819e-06, "loss": 0.1659, "step": 794700 }, { "epoch": 4.47, "learning_rate": 5.338988290977363e-06, "loss": 0.1667, "step": 794800 }, { "epoch": 4.47, "learning_rate": 5.3333670607149085e-06, "loss": 0.1675, "step": 794900 }, { "epoch": 4.47, "learning_rate": 5.327745830452453e-06, "loss": 0.1623, "step": 795000 }, { "epoch": 4.47, "learning_rate": 5.322124600189998e-06, "loss": 0.1696, "step": 795100 }, { "epoch": 4.47, "learning_rate": 5.316503369927542e-06, "loss": 0.1569, "step": 795200 }, { "epoch": 4.47, "learning_rate": 5.310882139665088e-06, "loss": 0.1576, "step": 795300 }, { "epoch": 4.47, "learning_rate": 5.305260909402632e-06, "loss": 0.1661, "step": 795400 }, { "epoch": 4.47, "learning_rate": 5.299639679140177e-06, "loss": 0.1645, "step": 795500 }, { "epoch": 4.47, "learning_rate": 5.2940184488777214e-06, "loss": 0.1632, "step": 795600 }, { "epoch": 4.47, "learning_rate": 5.288397218615267e-06, "loss": 0.159, "step": 795700 }, { "epoch": 4.47, "learning_rate": 5.282775988352811e-06, "loss": 0.1667, "step": 795800 }, { "epoch": 4.47, "learning_rate": 5.277154758090356e-06, "loss": 0.1614, "step": 795900 }, { "epoch": 4.47, "learning_rate": 5.271533527827901e-06, "loss": 0.1652, "step": 796000 }, { "epoch": 4.48, "learning_rate": 5.265912297565445e-06, "loss": 0.1671, "step": 796100 }, { "epoch": 4.48, "learning_rate": 5.26029106730299e-06, "loss": 0.1664, "step": 796200 }, { "epoch": 4.48, "learning_rate": 5.254669837040535e-06, "loss": 0.1666, "step": 796300 }, { "epoch": 4.48, "learning_rate": 5.24904860677808e-06, "loss": 0.1638, "step": 796400 }, { "epoch": 4.48, "learning_rate": 5.243427376515624e-06, "loss": 0.1619, "step": 796500 }, { "epoch": 4.48, "learning_rate": 5.237862358555794e-06, "loss": 0.1621, "step": 796600 }, { "epoch": 4.48, "learning_rate": 5.232241128293338e-06, "loss": 0.1651, "step": 796700 }, { "epoch": 4.48, "learning_rate": 5.226619898030883e-06, "loss": 0.1656, "step": 796800 }, { "epoch": 4.48, "learning_rate": 5.220998667768428e-06, "loss": 0.1622, "step": 796900 }, { "epoch": 4.48, "learning_rate": 5.215377437505973e-06, "loss": 0.1653, "step": 797000 }, { "epoch": 4.48, "learning_rate": 5.2097562072435175e-06, "loss": 0.162, "step": 797100 }, { "epoch": 4.48, "learning_rate": 5.204134976981062e-06, "loss": 0.1641, "step": 797200 }, { "epoch": 4.48, "learning_rate": 5.1985137467186075e-06, "loss": 0.1655, "step": 797300 }, { "epoch": 4.48, "learning_rate": 5.192892516456152e-06, "loss": 0.1579, "step": 797400 }, { "epoch": 4.48, "learning_rate": 5.187271286193697e-06, "loss": 0.1696, "step": 797500 }, { "epoch": 4.48, "learning_rate": 5.181650055931241e-06, "loss": 0.166, "step": 797600 }, { "epoch": 4.48, "learning_rate": 5.176028825668787e-06, "loss": 0.1641, "step": 797700 }, { "epoch": 4.48, "learning_rate": 5.170407595406331e-06, "loss": 0.1664, "step": 797800 }, { "epoch": 4.49, "learning_rate": 5.164786365143876e-06, "loss": 0.1591, "step": 797900 }, { "epoch": 4.49, "learning_rate": 5.15916513488142e-06, "loss": 0.1617, "step": 798000 }, { "epoch": 4.49, "learning_rate": 5.153543904618966e-06, "loss": 0.163, "step": 798100 }, { "epoch": 4.49, "learning_rate": 5.1479226743565096e-06, "loss": 0.1675, "step": 798200 }, { "epoch": 4.49, "learning_rate": 5.142301444094055e-06, "loss": 0.1624, "step": 798300 }, { "epoch": 4.49, "learning_rate": 5.136736426134224e-06, "loss": 0.164, "step": 798400 }, { "epoch": 4.49, "learning_rate": 5.131115195871769e-06, "loss": 0.1625, "step": 798500 }, { "epoch": 4.49, "learning_rate": 5.1254939656093135e-06, "loss": 0.1667, "step": 798600 }, { "epoch": 4.49, "learning_rate": 5.119872735346858e-06, "loss": 0.1601, "step": 798700 }, { "epoch": 4.49, "learning_rate": 5.1142515050844035e-06, "loss": 0.1638, "step": 798800 }, { "epoch": 4.49, "learning_rate": 5.108630274821947e-06, "loss": 0.1673, "step": 798900 }, { "epoch": 4.49, "learning_rate": 5.103009044559493e-06, "loss": 0.1691, "step": 799000 }, { "epoch": 4.49, "learning_rate": 5.097387814297037e-06, "loss": 0.1666, "step": 799100 }, { "epoch": 4.49, "learning_rate": 5.091766584034583e-06, "loss": 0.1681, "step": 799200 }, { "epoch": 4.49, "learning_rate": 5.086145353772126e-06, "loss": 0.1619, "step": 799300 }, { "epoch": 4.49, "learning_rate": 5.080524123509672e-06, "loss": 0.167, "step": 799400 }, { "epoch": 4.49, "learning_rate": 5.0749028932472164e-06, "loss": 0.1662, "step": 799500 }, { "epoch": 4.49, "learning_rate": 5.069281662984761e-06, "loss": 0.1639, "step": 799600 }, { "epoch": 4.5, "learning_rate": 5.063660432722306e-06, "loss": 0.1618, "step": 799700 }, { "epoch": 4.5, "learning_rate": 5.058039202459851e-06, "loss": 0.1638, "step": 799800 }, { "epoch": 4.5, "learning_rate": 5.052417972197395e-06, "loss": 0.1691, "step": 799900 }, { "epoch": 4.5, "learning_rate": 5.04679674193494e-06, "loss": 0.1617, "step": 800000 }, { "epoch": 4.5, "learning_rate": 5.041175511672485e-06, "loss": 0.1621, "step": 800100 }, { "epoch": 4.5, "learning_rate": 5.03555428141003e-06, "loss": 0.1584, "step": 800200 }, { "epoch": 4.5, "learning_rate": 5.029933051147574e-06, "loss": 0.1636, "step": 800300 }, { "epoch": 4.5, "learning_rate": 5.024311820885119e-06, "loss": 0.1698, "step": 800400 }, { "epoch": 4.5, "learning_rate": 5.018690590622664e-06, "loss": 0.1688, "step": 800500 }, { "epoch": 4.5, "learning_rate": 5.0130693603602085e-06, "loss": 0.1644, "step": 800600 }, { "epoch": 4.5, "learning_rate": 5.007448130097753e-06, "loss": 0.1625, "step": 800700 }, { "epoch": 4.5, "learning_rate": 5.0018268998352985e-06, "loss": 0.1637, "step": 800800 }, { "epoch": 4.5, "learning_rate": 4.996205669572842e-06, "loss": 0.1625, "step": 800900 }, { "epoch": 4.5, "learning_rate": 4.990584439310388e-06, "loss": 0.1608, "step": 801000 }, { "epoch": 4.5, "learning_rate": 4.984963209047932e-06, "loss": 0.1651, "step": 801100 }, { "epoch": 4.5, "learning_rate": 4.979341978785478e-06, "loss": 0.1673, "step": 801200 }, { "epoch": 4.5, "learning_rate": 4.9737207485230215e-06, "loss": 0.1634, "step": 801300 }, { "epoch": 4.5, "learning_rate": 4.968099518260567e-06, "loss": 0.1586, "step": 801400 }, { "epoch": 4.51, "learning_rate": 4.9624782879981115e-06, "loss": 0.1633, "step": 801500 }, { "epoch": 4.51, "learning_rate": 4.956857057735656e-06, "loss": 0.1651, "step": 801600 }, { "epoch": 4.51, "learning_rate": 4.951235827473201e-06, "loss": 0.1634, "step": 801700 }, { "epoch": 4.51, "learning_rate": 4.945614597210746e-06, "loss": 0.1623, "step": 801800 }, { "epoch": 4.51, "learning_rate": 4.93999336694829e-06, "loss": 0.167, "step": 801900 }, { "epoch": 4.51, "learning_rate": 4.934372136685835e-06, "loss": 0.1595, "step": 802000 }, { "epoch": 4.51, "learning_rate": 4.92875090642338e-06, "loss": 0.1602, "step": 802100 }, { "epoch": 4.51, "learning_rate": 4.923129676160925e-06, "loss": 0.1685, "step": 802200 }, { "epoch": 4.51, "learning_rate": 4.917508445898469e-06, "loss": 0.1593, "step": 802300 }, { "epoch": 4.51, "learning_rate": 4.911887215636014e-06, "loss": 0.1625, "step": 802400 }, { "epoch": 4.51, "learning_rate": 4.906265985373559e-06, "loss": 0.1549, "step": 802500 }, { "epoch": 4.51, "learning_rate": 4.900700967413728e-06, "loss": 0.167, "step": 802600 }, { "epoch": 4.51, "learning_rate": 4.895079737151273e-06, "loss": 0.1687, "step": 802700 }, { "epoch": 4.51, "learning_rate": 4.889458506888818e-06, "loss": 0.1644, "step": 802800 }, { "epoch": 4.51, "learning_rate": 4.883837276626363e-06, "loss": 0.1595, "step": 802900 }, { "epoch": 4.51, "learning_rate": 4.8782160463639075e-06, "loss": 0.1634, "step": 803000 }, { "epoch": 4.51, "learning_rate": 4.872594816101452e-06, "loss": 0.162, "step": 803100 }, { "epoch": 4.51, "learning_rate": 4.8669735858389975e-06, "loss": 0.1638, "step": 803200 }, { "epoch": 4.52, "learning_rate": 4.861352355576541e-06, "loss": 0.157, "step": 803300 }, { "epoch": 4.52, "learning_rate": 4.855731125314087e-06, "loss": 0.1671, "step": 803400 }, { "epoch": 4.52, "learning_rate": 4.850109895051631e-06, "loss": 0.1607, "step": 803500 }, { "epoch": 4.52, "learning_rate": 4.844488664789177e-06, "loss": 0.1627, "step": 803600 }, { "epoch": 4.52, "learning_rate": 4.8388674345267204e-06, "loss": 0.1613, "step": 803700 }, { "epoch": 4.52, "learning_rate": 4.833246204264266e-06, "loss": 0.1667, "step": 803800 }, { "epoch": 4.52, "learning_rate": 4.8276249740018104e-06, "loss": 0.167, "step": 803900 }, { "epoch": 4.52, "learning_rate": 4.822003743739355e-06, "loss": 0.1675, "step": 804000 }, { "epoch": 4.52, "learning_rate": 4.8163825134769e-06, "loss": 0.1632, "step": 804100 }, { "epoch": 4.52, "learning_rate": 4.810761283214445e-06, "loss": 0.1661, "step": 804200 }, { "epoch": 4.52, "learning_rate": 4.805140052951989e-06, "loss": 0.1634, "step": 804300 }, { "epoch": 4.52, "learning_rate": 4.799518822689534e-06, "loss": 0.1614, "step": 804400 }, { "epoch": 4.52, "learning_rate": 4.793897592427079e-06, "loss": 0.1619, "step": 804500 }, { "epoch": 4.52, "learning_rate": 4.788276362164624e-06, "loss": 0.1643, "step": 804600 }, { "epoch": 4.52, "learning_rate": 4.782711344204793e-06, "loss": 0.1659, "step": 804700 }, { "epoch": 4.52, "learning_rate": 4.777090113942337e-06, "loss": 0.1639, "step": 804800 }, { "epoch": 4.52, "learning_rate": 4.771468883679883e-06, "loss": 0.158, "step": 804900 }, { "epoch": 4.53, "learning_rate": 4.765847653417427e-06, "loss": 0.162, "step": 805000 }, { "epoch": 4.53, "learning_rate": 4.760226423154972e-06, "loss": 0.1634, "step": 805100 }, { "epoch": 4.53, "learning_rate": 4.7546051928925165e-06, "loss": 0.1625, "step": 805200 }, { "epoch": 4.53, "learning_rate": 4.748983962630062e-06, "loss": 0.163, "step": 805300 }, { "epoch": 4.53, "learning_rate": 4.743362732367606e-06, "loss": 0.161, "step": 805400 }, { "epoch": 4.53, "learning_rate": 4.737741502105151e-06, "loss": 0.1628, "step": 805500 }, { "epoch": 4.53, "learning_rate": 4.732120271842696e-06, "loss": 0.1657, "step": 805600 }, { "epoch": 4.53, "learning_rate": 4.72649904158024e-06, "loss": 0.1613, "step": 805700 }, { "epoch": 4.53, "learning_rate": 4.720877811317785e-06, "loss": 0.161, "step": 805800 }, { "epoch": 4.53, "learning_rate": 4.71525658105533e-06, "loss": 0.1698, "step": 805900 }, { "epoch": 4.53, "learning_rate": 4.709635350792875e-06, "loss": 0.164, "step": 806000 }, { "epoch": 4.53, "learning_rate": 4.704014120530419e-06, "loss": 0.1714, "step": 806100 }, { "epoch": 4.53, "learning_rate": 4.698392890267964e-06, "loss": 0.165, "step": 806200 }, { "epoch": 4.53, "learning_rate": 4.692771660005509e-06, "loss": 0.1654, "step": 806300 }, { "epoch": 4.53, "learning_rate": 4.687150429743053e-06, "loss": 0.1587, "step": 806400 }, { "epoch": 4.53, "learning_rate": 4.681529199480599e-06, "loss": 0.1617, "step": 806500 }, { "epoch": 4.53, "learning_rate": 4.675907969218143e-06, "loss": 0.1578, "step": 806600 }, { "epoch": 4.53, "learning_rate": 4.670286738955689e-06, "loss": 0.1613, "step": 806700 }, { "epoch": 4.54, "learning_rate": 4.664665508693232e-06, "loss": 0.1582, "step": 806800 }, { "epoch": 4.54, "learning_rate": 4.659044278430778e-06, "loss": 0.1645, "step": 806900 }, { "epoch": 4.54, "learning_rate": 4.653423048168322e-06, "loss": 0.1639, "step": 807000 }, { "epoch": 4.54, "learning_rate": 4.647801817905867e-06, "loss": 0.1646, "step": 807100 }, { "epoch": 4.54, "learning_rate": 4.6421805876434115e-06, "loss": 0.1548, "step": 807200 }, { "epoch": 4.54, "learning_rate": 4.636615569683582e-06, "loss": 0.1639, "step": 807300 }, { "epoch": 4.54, "learning_rate": 4.630994339421126e-06, "loss": 0.1666, "step": 807400 }, { "epoch": 4.54, "learning_rate": 4.625373109158671e-06, "loss": 0.1623, "step": 807500 }, { "epoch": 4.54, "learning_rate": 4.6197518788962154e-06, "loss": 0.1647, "step": 807600 }, { "epoch": 4.54, "learning_rate": 4.61413064863376e-06, "loss": 0.1671, "step": 807700 }, { "epoch": 4.54, "learning_rate": 4.608509418371305e-06, "loss": 0.1617, "step": 807800 }, { "epoch": 4.54, "learning_rate": 4.60288818810885e-06, "loss": 0.1588, "step": 807900 }, { "epoch": 4.54, "learning_rate": 4.597266957846395e-06, "loss": 0.1636, "step": 808000 }, { "epoch": 4.54, "learning_rate": 4.591645727583939e-06, "loss": 0.1644, "step": 808100 }, { "epoch": 4.54, "learning_rate": 4.586024497321484e-06, "loss": 0.1652, "step": 808200 }, { "epoch": 4.54, "learning_rate": 4.580403267059029e-06, "loss": 0.1605, "step": 808300 }, { "epoch": 4.54, "learning_rate": 4.574782036796574e-06, "loss": 0.1692, "step": 808400 }, { "epoch": 4.54, "learning_rate": 4.569160806534118e-06, "loss": 0.1618, "step": 808500 }, { "epoch": 4.55, "learning_rate": 4.563539576271663e-06, "loss": 0.1651, "step": 808600 }, { "epoch": 4.55, "learning_rate": 4.557918346009208e-06, "loss": 0.165, "step": 808700 }, { "epoch": 4.55, "learning_rate": 4.552297115746752e-06, "loss": 0.1595, "step": 808800 }, { "epoch": 4.55, "learning_rate": 4.5466758854842975e-06, "loss": 0.1565, "step": 808900 }, { "epoch": 4.55, "learning_rate": 4.541054655221842e-06, "loss": 0.17, "step": 809000 }, { "epoch": 4.55, "learning_rate": 4.5354334249593876e-06, "loss": 0.1589, "step": 809100 }, { "epoch": 4.55, "learning_rate": 4.529812194696931e-06, "loss": 0.1727, "step": 809200 }, { "epoch": 4.55, "learning_rate": 4.524190964434477e-06, "loss": 0.1642, "step": 809300 }, { "epoch": 4.55, "learning_rate": 4.518569734172021e-06, "loss": 0.1585, "step": 809400 }, { "epoch": 4.55, "learning_rate": 4.512948503909566e-06, "loss": 0.1697, "step": 809500 }, { "epoch": 4.55, "learning_rate": 4.5073272736471105e-06, "loss": 0.165, "step": 809600 }, { "epoch": 4.55, "learning_rate": 4.501706043384656e-06, "loss": 0.1607, "step": 809700 }, { "epoch": 4.55, "learning_rate": 4.4960848131222e-06, "loss": 0.1608, "step": 809800 }, { "epoch": 4.55, "learning_rate": 4.490519795162369e-06, "loss": 0.163, "step": 809900 }, { "epoch": 4.55, "learning_rate": 4.484898564899914e-06, "loss": 0.1658, "step": 810000 }, { "epoch": 4.55, "learning_rate": 4.479277334637459e-06, "loss": 0.1649, "step": 810100 }, { "epoch": 4.55, "learning_rate": 4.4736561043750036e-06, "loss": 0.1673, "step": 810200 }, { "epoch": 4.55, "learning_rate": 4.468034874112548e-06, "loss": 0.1615, "step": 810300 }, { "epoch": 4.56, "learning_rate": 4.4624136438500936e-06, "loss": 0.1661, "step": 810400 }, { "epoch": 4.56, "learning_rate": 4.456792413587637e-06, "loss": 0.1618, "step": 810500 }, { "epoch": 4.56, "learning_rate": 4.451171183325183e-06, "loss": 0.1651, "step": 810600 }, { "epoch": 4.56, "learning_rate": 4.445549953062727e-06, "loss": 0.1623, "step": 810700 }, { "epoch": 4.56, "learning_rate": 4.439928722800273e-06, "loss": 0.1662, "step": 810800 }, { "epoch": 4.56, "learning_rate": 4.4343074925378165e-06, "loss": 0.1622, "step": 810900 }, { "epoch": 4.56, "learning_rate": 4.428686262275362e-06, "loss": 0.1678, "step": 811000 }, { "epoch": 4.56, "learning_rate": 4.4230650320129065e-06, "loss": 0.1614, "step": 811100 }, { "epoch": 4.56, "learning_rate": 4.417443801750451e-06, "loss": 0.1663, "step": 811200 }, { "epoch": 4.56, "learning_rate": 4.411822571487996e-06, "loss": 0.1648, "step": 811300 }, { "epoch": 4.56, "learning_rate": 4.406201341225541e-06, "loss": 0.1621, "step": 811400 }, { "epoch": 4.56, "learning_rate": 4.400580110963086e-06, "loss": 0.1619, "step": 811500 }, { "epoch": 4.56, "learning_rate": 4.39495888070063e-06, "loss": 0.1639, "step": 811600 }, { "epoch": 4.56, "learning_rate": 4.389337650438175e-06, "loss": 0.1639, "step": 811700 }, { "epoch": 4.56, "learning_rate": 4.38371642017572e-06, "loss": 0.1697, "step": 811800 }, { "epoch": 4.56, "learning_rate": 4.378151402215889e-06, "loss": 0.1682, "step": 811900 }, { "epoch": 4.56, "learning_rate": 4.372530171953434e-06, "loss": 0.1647, "step": 812000 }, { "epoch": 4.57, "learning_rate": 4.366908941690979e-06, "loss": 0.1649, "step": 812100 }, { "epoch": 4.57, "learning_rate": 4.361287711428523e-06, "loss": 0.1646, "step": 812200 }, { "epoch": 4.57, "learning_rate": 4.355666481166068e-06, "loss": 0.1666, "step": 812300 }, { "epoch": 4.57, "learning_rate": 4.350045250903613e-06, "loss": 0.1629, "step": 812400 }, { "epoch": 4.57, "learning_rate": 4.344424020641158e-06, "loss": 0.1648, "step": 812500 }, { "epoch": 4.57, "learning_rate": 4.3388027903787025e-06, "loss": 0.1661, "step": 812600 }, { "epoch": 4.57, "learning_rate": 4.333181560116247e-06, "loss": 0.1649, "step": 812700 }, { "epoch": 4.57, "learning_rate": 4.3275603298537925e-06, "loss": 0.1636, "step": 812800 }, { "epoch": 4.57, "learning_rate": 4.321939099591337e-06, "loss": 0.1625, "step": 812900 }, { "epoch": 4.57, "learning_rate": 4.316317869328882e-06, "loss": 0.1648, "step": 813000 }, { "epoch": 4.57, "learning_rate": 4.310696639066426e-06, "loss": 0.1621, "step": 813100 }, { "epoch": 4.57, "learning_rate": 4.305075408803972e-06, "loss": 0.1706, "step": 813200 }, { "epoch": 4.57, "learning_rate": 4.2994541785415155e-06, "loss": 0.1609, "step": 813300 }, { "epoch": 4.57, "learning_rate": 4.293832948279061e-06, "loss": 0.1593, "step": 813400 }, { "epoch": 4.57, "learning_rate": 4.2882117180166055e-06, "loss": 0.166, "step": 813500 }, { "epoch": 4.57, "learning_rate": 4.28259048775415e-06, "loss": 0.1667, "step": 813600 }, { "epoch": 4.57, "learning_rate": 4.276969257491695e-06, "loss": 0.163, "step": 813700 }, { "epoch": 4.57, "learning_rate": 4.27134802722924e-06, "loss": 0.1617, "step": 813800 }, { "epoch": 4.58, "learning_rate": 4.265783009269409e-06, "loss": 0.171, "step": 813900 }, { "epoch": 4.58, "learning_rate": 4.260161779006953e-06, "loss": 0.1638, "step": 814000 }, { "epoch": 4.58, "learning_rate": 4.2545405487444986e-06, "loss": 0.1574, "step": 814100 }, { "epoch": 4.58, "learning_rate": 4.248919318482043e-06, "loss": 0.1628, "step": 814200 }, { "epoch": 4.58, "learning_rate": 4.243298088219588e-06, "loss": 0.1619, "step": 814300 }, { "epoch": 4.58, "learning_rate": 4.237676857957132e-06, "loss": 0.1649, "step": 814400 }, { "epoch": 4.58, "learning_rate": 4.232055627694678e-06, "loss": 0.1675, "step": 814500 }, { "epoch": 4.58, "learning_rate": 4.226434397432222e-06, "loss": 0.1652, "step": 814600 }, { "epoch": 4.58, "learning_rate": 4.220869379472392e-06, "loss": 0.1667, "step": 814700 }, { "epoch": 4.58, "learning_rate": 4.215248149209936e-06, "loss": 0.1686, "step": 814800 }, { "epoch": 4.58, "learning_rate": 4.209626918947481e-06, "loss": 0.1608, "step": 814900 }, { "epoch": 4.58, "learning_rate": 4.204005688685026e-06, "loss": 0.161, "step": 815000 }, { "epoch": 4.58, "learning_rate": 4.19838445842257e-06, "loss": 0.1586, "step": 815100 }, { "epoch": 4.58, "learning_rate": 4.192763228160115e-06, "loss": 0.1691, "step": 815200 }, { "epoch": 4.58, "learning_rate": 4.18714199789766e-06, "loss": 0.1628, "step": 815300 }, { "epoch": 4.58, "learning_rate": 4.1815207676352046e-06, "loss": 0.1638, "step": 815400 }, { "epoch": 4.58, "learning_rate": 4.175899537372749e-06, "loss": 0.1599, "step": 815500 }, { "epoch": 4.58, "learning_rate": 4.170278307110295e-06, "loss": 0.1681, "step": 815600 }, { "epoch": 4.59, "learning_rate": 4.164657076847839e-06, "loss": 0.1644, "step": 815700 }, { "epoch": 4.59, "learning_rate": 4.159035846585384e-06, "loss": 0.1653, "step": 815800 }, { "epoch": 4.59, "learning_rate": 4.153414616322928e-06, "loss": 0.1646, "step": 815900 }, { "epoch": 4.59, "learning_rate": 4.147793386060474e-06, "loss": 0.1595, "step": 816000 }, { "epoch": 4.59, "learning_rate": 4.142172155798018e-06, "loss": 0.1598, "step": 816100 }, { "epoch": 4.59, "learning_rate": 4.136550925535563e-06, "loss": 0.1608, "step": 816200 }, { "epoch": 4.59, "learning_rate": 4.1309296952731075e-06, "loss": 0.1626, "step": 816300 }, { "epoch": 4.59, "learning_rate": 4.125308465010652e-06, "loss": 0.1637, "step": 816400 }, { "epoch": 4.59, "learning_rate": 4.1196872347481975e-06, "loss": 0.1638, "step": 816500 }, { "epoch": 4.59, "learning_rate": 4.114066004485742e-06, "loss": 0.1648, "step": 816600 }, { "epoch": 4.59, "learning_rate": 4.108444774223287e-06, "loss": 0.1649, "step": 816700 }, { "epoch": 4.59, "learning_rate": 4.102823543960831e-06, "loss": 0.1701, "step": 816800 }, { "epoch": 4.59, "learning_rate": 4.097202313698377e-06, "loss": 0.1664, "step": 816900 }, { "epoch": 4.59, "learning_rate": 4.091581083435921e-06, "loss": 0.1629, "step": 817000 }, { "epoch": 4.59, "learning_rate": 4.085959853173466e-06, "loss": 0.1601, "step": 817100 }, { "epoch": 4.59, "learning_rate": 4.0803386229110105e-06, "loss": 0.1594, "step": 817200 }, { "epoch": 4.59, "learning_rate": 4.074717392648555e-06, "loss": 0.1633, "step": 817300 }, { "epoch": 4.59, "learning_rate": 4.0690961623861e-06, "loss": 0.166, "step": 817400 }, { "epoch": 4.6, "learning_rate": 4.063474932123645e-06, "loss": 0.1602, "step": 817500 }, { "epoch": 4.6, "learning_rate": 4.05785370186119e-06, "loss": 0.1619, "step": 817600 }, { "epoch": 4.6, "learning_rate": 4.052232471598734e-06, "loss": 0.1658, "step": 817700 }, { "epoch": 4.6, "learning_rate": 4.046611241336279e-06, "loss": 0.163, "step": 817800 }, { "epoch": 4.6, "learning_rate": 4.040990011073824e-06, "loss": 0.1636, "step": 817900 }, { "epoch": 4.6, "learning_rate": 4.035368780811369e-06, "loss": 0.161, "step": 818000 }, { "epoch": 4.6, "learning_rate": 4.029747550548913e-06, "loss": 0.1665, "step": 818100 }, { "epoch": 4.6, "learning_rate": 4.024126320286458e-06, "loss": 0.1657, "step": 818200 }, { "epoch": 4.6, "learning_rate": 4.018561302326627e-06, "loss": 0.1652, "step": 818300 }, { "epoch": 4.6, "learning_rate": 4.012940072064173e-06, "loss": 0.1644, "step": 818400 }, { "epoch": 4.6, "learning_rate": 4.0073188418017165e-06, "loss": 0.1618, "step": 818500 }, { "epoch": 4.6, "learning_rate": 4.001697611539262e-06, "loss": 0.1627, "step": 818600 }, { "epoch": 4.6, "learning_rate": 3.9960763812768065e-06, "loss": 0.1632, "step": 818700 }, { "epoch": 4.6, "learning_rate": 3.990455151014351e-06, "loss": 0.165, "step": 818800 }, { "epoch": 4.6, "learning_rate": 3.984833920751896e-06, "loss": 0.1644, "step": 818900 }, { "epoch": 4.6, "learning_rate": 3.979212690489441e-06, "loss": 0.1658, "step": 819000 }, { "epoch": 4.6, "learning_rate": 3.973591460226986e-06, "loss": 0.1677, "step": 819100 }, { "epoch": 4.6, "learning_rate": 3.96797022996453e-06, "loss": 0.1658, "step": 819200 }, { "epoch": 4.61, "learning_rate": 3.962348999702075e-06, "loss": 0.1599, "step": 819300 }, { "epoch": 4.61, "learning_rate": 3.95672776943962e-06, "loss": 0.1632, "step": 819400 }, { "epoch": 4.61, "learning_rate": 3.951106539177164e-06, "loss": 0.1592, "step": 819500 }, { "epoch": 4.61, "learning_rate": 3.9454853089147094e-06, "loss": 0.1626, "step": 819600 }, { "epoch": 4.61, "learning_rate": 3.939864078652254e-06, "loss": 0.1544, "step": 819700 }, { "epoch": 4.61, "learning_rate": 3.934242848389799e-06, "loss": 0.1602, "step": 819800 }, { "epoch": 4.61, "learning_rate": 3.928621618127343e-06, "loss": 0.1677, "step": 819900 }, { "epoch": 4.61, "learning_rate": 3.923000387864889e-06, "loss": 0.1617, "step": 820000 }, { "epoch": 4.61, "learning_rate": 3.917379157602433e-06, "loss": 0.1626, "step": 820100 }, { "epoch": 4.61, "learning_rate": 3.911757927339978e-06, "loss": 0.1586, "step": 820200 }, { "epoch": 4.61, "learning_rate": 3.906136697077522e-06, "loss": 0.1593, "step": 820300 }, { "epoch": 4.61, "learning_rate": 3.900515466815068e-06, "loss": 0.1612, "step": 820400 }, { "epoch": 4.61, "learning_rate": 3.8948942365526115e-06, "loss": 0.1669, "step": 820500 }, { "epoch": 4.61, "learning_rate": 3.889273006290157e-06, "loss": 0.1606, "step": 820600 }, { "epoch": 4.61, "learning_rate": 3.8836517760277015e-06, "loss": 0.1644, "step": 820700 }, { "epoch": 4.61, "learning_rate": 3.878030545765246e-06, "loss": 0.17, "step": 820800 }, { "epoch": 4.61, "learning_rate": 3.872409315502791e-06, "loss": 0.1609, "step": 820900 }, { "epoch": 4.62, "learning_rate": 3.866788085240336e-06, "loss": 0.1602, "step": 821000 }, { "epoch": 4.62, "learning_rate": 3.861166854977881e-06, "loss": 0.1602, "step": 821100 }, { "epoch": 4.62, "learning_rate": 3.855545624715425e-06, "loss": 0.1688, "step": 821200 }, { "epoch": 4.62, "learning_rate": 3.84992439445297e-06, "loss": 0.164, "step": 821300 }, { "epoch": 4.62, "learning_rate": 3.844303164190515e-06, "loss": 0.1656, "step": 821400 }, { "epoch": 4.62, "learning_rate": 3.838681933928059e-06, "loss": 0.1628, "step": 821500 }, { "epoch": 4.62, "learning_rate": 3.8330607036656045e-06, "loss": 0.1606, "step": 821600 }, { "epoch": 4.62, "learning_rate": 3.827439473403149e-06, "loss": 0.1666, "step": 821700 }, { "epoch": 4.62, "learning_rate": 3.821818243140694e-06, "loss": 0.1649, "step": 821800 }, { "epoch": 4.62, "learning_rate": 3.816197012878238e-06, "loss": 0.1669, "step": 821900 }, { "epoch": 4.62, "learning_rate": 3.8105757826157837e-06, "loss": 0.164, "step": 822000 }, { "epoch": 4.62, "learning_rate": 3.8049545523533287e-06, "loss": 0.1616, "step": 822100 }, { "epoch": 4.62, "learning_rate": 3.799333322090873e-06, "loss": 0.1644, "step": 822200 }, { "epoch": 4.62, "learning_rate": 3.793712091828418e-06, "loss": 0.1667, "step": 822300 }, { "epoch": 4.62, "learning_rate": 3.788090861565963e-06, "loss": 0.1602, "step": 822400 }, { "epoch": 4.62, "learning_rate": 3.782469631303507e-06, "loss": 0.1601, "step": 822500 }, { "epoch": 4.62, "learning_rate": 3.776848401041052e-06, "loss": 0.1665, "step": 822600 }, { "epoch": 4.62, "learning_rate": 3.771227170778597e-06, "loss": 0.1657, "step": 822700 }, { "epoch": 4.63, "learning_rate": 3.765605940516142e-06, "loss": 0.163, "step": 822800 }, { "epoch": 4.63, "learning_rate": 3.759984710253686e-06, "loss": 0.1631, "step": 822900 }, { "epoch": 4.63, "learning_rate": 3.7544196922938555e-06, "loss": 0.1595, "step": 823000 }, { "epoch": 4.63, "learning_rate": 3.7487984620314005e-06, "loss": 0.1663, "step": 823100 }, { "epoch": 4.63, "learning_rate": 3.7431772317689447e-06, "loss": 0.163, "step": 823200 }, { "epoch": 4.63, "learning_rate": 3.7375560015064897e-06, "loss": 0.1697, "step": 823300 }, { "epoch": 4.63, "learning_rate": 3.7319347712440347e-06, "loss": 0.164, "step": 823400 }, { "epoch": 4.63, "learning_rate": 3.7263135409815797e-06, "loss": 0.1599, "step": 823500 }, { "epoch": 4.63, "learning_rate": 3.720692310719124e-06, "loss": 0.1631, "step": 823600 }, { "epoch": 4.63, "learning_rate": 3.715071080456669e-06, "loss": 0.1648, "step": 823700 }, { "epoch": 4.63, "learning_rate": 3.709449850194214e-06, "loss": 0.1658, "step": 823800 }, { "epoch": 4.63, "learning_rate": 3.703828619931758e-06, "loss": 0.1618, "step": 823900 }, { "epoch": 4.63, "learning_rate": 3.698207389669303e-06, "loss": 0.1569, "step": 824000 }, { "epoch": 4.63, "learning_rate": 3.692586159406848e-06, "loss": 0.1599, "step": 824100 }, { "epoch": 4.63, "learning_rate": 3.686964929144393e-06, "loss": 0.1598, "step": 824200 }, { "epoch": 4.63, "learning_rate": 3.681343698881937e-06, "loss": 0.1655, "step": 824300 }, { "epoch": 4.63, "learning_rate": 3.675722468619482e-06, "loss": 0.1579, "step": 824400 }, { "epoch": 4.63, "learning_rate": 3.670101238357027e-06, "loss": 0.1606, "step": 824500 }, { "epoch": 4.64, "learning_rate": 3.6644800080945714e-06, "loss": 0.1637, "step": 824600 }, { "epoch": 4.64, "learning_rate": 3.6588587778321164e-06, "loss": 0.1622, "step": 824700 }, { "epoch": 4.64, "learning_rate": 3.6532375475696614e-06, "loss": 0.1588, "step": 824800 }, { "epoch": 4.64, "learning_rate": 3.6476163173072055e-06, "loss": 0.1641, "step": 824900 }, { "epoch": 4.64, "learning_rate": 3.6419950870447505e-06, "loss": 0.1639, "step": 825000 }, { "epoch": 4.64, "learning_rate": 3.6363738567822956e-06, "loss": 0.1653, "step": 825100 }, { "epoch": 4.64, "learning_rate": 3.6307526265198406e-06, "loss": 0.1646, "step": 825200 }, { "epoch": 4.64, "learning_rate": 3.6251313962573847e-06, "loss": 0.1602, "step": 825300 }, { "epoch": 4.64, "learning_rate": 3.6195663782975545e-06, "loss": 0.1627, "step": 825400 }, { "epoch": 4.64, "learning_rate": 3.6139451480350995e-06, "loss": 0.1607, "step": 825500 }, { "epoch": 4.64, "learning_rate": 3.6083239177726436e-06, "loss": 0.1693, "step": 825600 }, { "epoch": 4.64, "learning_rate": 3.6027026875101886e-06, "loss": 0.1623, "step": 825700 }, { "epoch": 4.64, "learning_rate": 3.5970814572477336e-06, "loss": 0.1633, "step": 825800 }, { "epoch": 4.64, "learning_rate": 3.5914602269852786e-06, "loss": 0.1643, "step": 825900 }, { "epoch": 4.64, "learning_rate": 3.585838996722823e-06, "loss": 0.1623, "step": 826000 }, { "epoch": 4.64, "learning_rate": 3.580217766460368e-06, "loss": 0.1641, "step": 826100 }, { "epoch": 4.64, "learning_rate": 3.574596536197913e-06, "loss": 0.1639, "step": 826200 }, { "epoch": 4.64, "learning_rate": 3.568975305935457e-06, "loss": 0.1665, "step": 826300 }, { "epoch": 4.65, "learning_rate": 3.563354075673002e-06, "loss": 0.1662, "step": 826400 }, { "epoch": 4.65, "learning_rate": 3.557732845410547e-06, "loss": 0.159, "step": 826500 }, { "epoch": 4.65, "learning_rate": 3.552111615148092e-06, "loss": 0.1675, "step": 826600 }, { "epoch": 4.65, "learning_rate": 3.546490384885636e-06, "loss": 0.1624, "step": 826700 }, { "epoch": 4.65, "learning_rate": 3.540869154623181e-06, "loss": 0.1617, "step": 826800 }, { "epoch": 4.65, "learning_rate": 3.535247924360726e-06, "loss": 0.1676, "step": 826900 }, { "epoch": 4.65, "learning_rate": 3.5296266940982703e-06, "loss": 0.1601, "step": 827000 }, { "epoch": 4.65, "learning_rate": 3.5240054638358153e-06, "loss": 0.1636, "step": 827100 }, { "epoch": 4.65, "learning_rate": 3.5183842335733603e-06, "loss": 0.1572, "step": 827200 }, { "epoch": 4.65, "learning_rate": 3.5127630033109045e-06, "loss": 0.1557, "step": 827300 }, { "epoch": 4.65, "learning_rate": 3.5071417730484495e-06, "loss": 0.1588, "step": 827400 }, { "epoch": 4.65, "learning_rate": 3.5015205427859945e-06, "loss": 0.164, "step": 827500 }, { "epoch": 4.65, "learning_rate": 3.4958993125235395e-06, "loss": 0.1687, "step": 827600 }, { "epoch": 4.65, "learning_rate": 3.4902780822610837e-06, "loss": 0.162, "step": 827700 }, { "epoch": 4.65, "learning_rate": 3.4846568519986287e-06, "loss": 0.1628, "step": 827800 }, { "epoch": 4.65, "learning_rate": 3.4790356217361737e-06, "loss": 0.162, "step": 827900 }, { "epoch": 4.65, "learning_rate": 3.473414391473718e-06, "loss": 0.1698, "step": 828000 }, { "epoch": 4.65, "learning_rate": 3.467793161211263e-06, "loss": 0.1644, "step": 828100 }, { "epoch": 4.66, "learning_rate": 3.462171930948808e-06, "loss": 0.1601, "step": 828200 }, { "epoch": 4.66, "learning_rate": 3.456550700686352e-06, "loss": 0.1611, "step": 828300 }, { "epoch": 4.66, "learning_rate": 3.450929470423897e-06, "loss": 0.1581, "step": 828400 }, { "epoch": 4.66, "learning_rate": 3.445308240161442e-06, "loss": 0.163, "step": 828500 }, { "epoch": 4.66, "learning_rate": 3.439687009898987e-06, "loss": 0.1619, "step": 828600 }, { "epoch": 4.66, "learning_rate": 3.4341219919391555e-06, "loss": 0.1649, "step": 828700 }, { "epoch": 4.66, "learning_rate": 3.4285007616767005e-06, "loss": 0.1625, "step": 828800 }, { "epoch": 4.66, "learning_rate": 3.4228795314142455e-06, "loss": 0.1667, "step": 828900 }, { "epoch": 4.66, "learning_rate": 3.4172583011517905e-06, "loss": 0.159, "step": 829000 }, { "epoch": 4.66, "learning_rate": 3.4116370708893347e-06, "loss": 0.1634, "step": 829100 }, { "epoch": 4.66, "learning_rate": 3.4060158406268797e-06, "loss": 0.1672, "step": 829200 }, { "epoch": 4.66, "learning_rate": 3.4003946103644247e-06, "loss": 0.1628, "step": 829300 }, { "epoch": 4.66, "learning_rate": 3.394773380101969e-06, "loss": 0.1633, "step": 829400 }, { "epoch": 4.66, "learning_rate": 3.389152149839514e-06, "loss": 0.1635, "step": 829500 }, { "epoch": 4.66, "learning_rate": 3.383530919577059e-06, "loss": 0.1586, "step": 829600 }, { "epoch": 4.66, "learning_rate": 3.377909689314603e-06, "loss": 0.1652, "step": 829700 }, { "epoch": 4.66, "learning_rate": 3.372288459052148e-06, "loss": 0.1582, "step": 829800 }, { "epoch": 4.67, "learning_rate": 3.366667228789693e-06, "loss": 0.1616, "step": 829900 }, { "epoch": 4.67, "learning_rate": 3.361045998527238e-06, "loss": 0.1651, "step": 830000 }, { "epoch": 4.67, "learning_rate": 3.3554247682647822e-06, "loss": 0.1614, "step": 830100 }, { "epoch": 4.67, "learning_rate": 3.3498035380023272e-06, "loss": 0.1621, "step": 830200 }, { "epoch": 4.67, "learning_rate": 3.3441823077398722e-06, "loss": 0.1631, "step": 830300 }, { "epoch": 4.67, "learning_rate": 3.3385610774774164e-06, "loss": 0.1702, "step": 830400 }, { "epoch": 4.67, "learning_rate": 3.332996059517586e-06, "loss": 0.1626, "step": 830500 }, { "epoch": 4.67, "learning_rate": 3.327374829255131e-06, "loss": 0.1655, "step": 830600 }, { "epoch": 4.67, "learning_rate": 3.321753598992676e-06, "loss": 0.1637, "step": 830700 }, { "epoch": 4.67, "learning_rate": 3.3161323687302203e-06, "loss": 0.1624, "step": 830800 }, { "epoch": 4.67, "learning_rate": 3.3105111384677653e-06, "loss": 0.1609, "step": 830900 }, { "epoch": 4.67, "learning_rate": 3.3048899082053103e-06, "loss": 0.1584, "step": 831000 }, { "epoch": 4.67, "learning_rate": 3.2992686779428545e-06, "loss": 0.1618, "step": 831100 }, { "epoch": 4.67, "learning_rate": 3.2936474476803995e-06, "loss": 0.1595, "step": 831200 }, { "epoch": 4.67, "learning_rate": 3.2880262174179445e-06, "loss": 0.1647, "step": 831300 }, { "epoch": 4.67, "learning_rate": 3.2824049871554895e-06, "loss": 0.1603, "step": 831400 }, { "epoch": 4.67, "learning_rate": 3.2767837568930337e-06, "loss": 0.1663, "step": 831500 }, { "epoch": 4.67, "learning_rate": 3.2711625266305787e-06, "loss": 0.1621, "step": 831600 }, { "epoch": 4.68, "learning_rate": 3.2655412963681237e-06, "loss": 0.1654, "step": 831700 }, { "epoch": 4.68, "learning_rate": 3.259920066105668e-06, "loss": 0.1649, "step": 831800 }, { "epoch": 4.68, "learning_rate": 3.254298835843213e-06, "loss": 0.1626, "step": 831900 }, { "epoch": 4.68, "learning_rate": 3.248677605580758e-06, "loss": 0.1596, "step": 832000 }, { "epoch": 4.68, "learning_rate": 3.243056375318302e-06, "loss": 0.1595, "step": 832100 }, { "epoch": 4.68, "learning_rate": 3.237435145055847e-06, "loss": 0.1609, "step": 832200 }, { "epoch": 4.68, "learning_rate": 3.231813914793392e-06, "loss": 0.165, "step": 832300 }, { "epoch": 4.68, "learning_rate": 3.226192684530937e-06, "loss": 0.1581, "step": 832400 }, { "epoch": 4.68, "learning_rate": 3.220571454268481e-06, "loss": 0.1626, "step": 832500 }, { "epoch": 4.68, "learning_rate": 3.2150064363086505e-06, "loss": 0.1633, "step": 832600 }, { "epoch": 4.68, "learning_rate": 3.2093852060461955e-06, "loss": 0.16, "step": 832700 }, { "epoch": 4.68, "learning_rate": 3.2037639757837405e-06, "loss": 0.1611, "step": 832800 }, { "epoch": 4.68, "learning_rate": 3.1981427455212847e-06, "loss": 0.1648, "step": 832900 }, { "epoch": 4.68, "learning_rate": 3.1925215152588297e-06, "loss": 0.1653, "step": 833000 }, { "epoch": 4.68, "learning_rate": 3.1869002849963747e-06, "loss": 0.1643, "step": 833100 }, { "epoch": 4.68, "learning_rate": 3.181279054733919e-06, "loss": 0.1646, "step": 833200 }, { "epoch": 4.68, "learning_rate": 3.175657824471464e-06, "loss": 0.1638, "step": 833300 }, { "epoch": 4.68, "learning_rate": 3.170036594209009e-06, "loss": 0.1644, "step": 833400 }, { "epoch": 4.69, "learning_rate": 3.164415363946553e-06, "loss": 0.1666, "step": 833500 }, { "epoch": 4.69, "learning_rate": 3.158794133684098e-06, "loss": 0.1615, "step": 833600 }, { "epoch": 4.69, "learning_rate": 3.153172903421643e-06, "loss": 0.1617, "step": 833700 }, { "epoch": 4.69, "learning_rate": 3.147551673159188e-06, "loss": 0.157, "step": 833800 }, { "epoch": 4.69, "learning_rate": 3.1419304428967322e-06, "loss": 0.1621, "step": 833900 }, { "epoch": 4.69, "learning_rate": 3.1363092126342772e-06, "loss": 0.1652, "step": 834000 }, { "epoch": 4.69, "learning_rate": 3.1306879823718222e-06, "loss": 0.1627, "step": 834100 }, { "epoch": 4.69, "learning_rate": 3.1250667521093664e-06, "loss": 0.1605, "step": 834200 }, { "epoch": 4.69, "learning_rate": 3.1194455218469114e-06, "loss": 0.1627, "step": 834300 }, { "epoch": 4.69, "learning_rate": 3.1138242915844564e-06, "loss": 0.1607, "step": 834400 }, { "epoch": 4.69, "learning_rate": 3.108203061322001e-06, "loss": 0.1616, "step": 834500 }, { "epoch": 4.69, "learning_rate": 3.1025818310595456e-06, "loss": 0.1644, "step": 834600 }, { "epoch": 4.69, "learning_rate": 3.0969606007970906e-06, "loss": 0.1597, "step": 834700 }, { "epoch": 4.69, "learning_rate": 3.091339370534635e-06, "loss": 0.1608, "step": 834800 }, { "epoch": 4.69, "learning_rate": 3.08571814027218e-06, "loss": 0.1619, "step": 834900 }, { "epoch": 4.69, "learning_rate": 3.0800969100097248e-06, "loss": 0.1613, "step": 835000 }, { "epoch": 4.69, "learning_rate": 3.0744756797472693e-06, "loss": 0.1592, "step": 835100 }, { "epoch": 4.69, "learning_rate": 3.0688544494848143e-06, "loss": 0.1607, "step": 835200 }, { "epoch": 4.7, "learning_rate": 3.0632894315249837e-06, "loss": 0.1589, "step": 835300 }, { "epoch": 4.7, "learning_rate": 3.0576682012625287e-06, "loss": 0.1635, "step": 835400 }, { "epoch": 4.7, "learning_rate": 3.0520469710000733e-06, "loss": 0.1582, "step": 835500 }, { "epoch": 4.7, "learning_rate": 3.0464257407376183e-06, "loss": 0.163, "step": 835600 }, { "epoch": 4.7, "learning_rate": 3.040804510475163e-06, "loss": 0.1669, "step": 835700 }, { "epoch": 4.7, "learning_rate": 3.0351832802127074e-06, "loss": 0.1614, "step": 835800 }, { "epoch": 4.7, "learning_rate": 3.0295620499502524e-06, "loss": 0.1685, "step": 835900 }, { "epoch": 4.7, "learning_rate": 3.023940819687797e-06, "loss": 0.1592, "step": 836000 }, { "epoch": 4.7, "learning_rate": 3.018319589425342e-06, "loss": 0.159, "step": 836100 }, { "epoch": 4.7, "learning_rate": 3.0126983591628866e-06, "loss": 0.164, "step": 836200 }, { "epoch": 4.7, "learning_rate": 3.0070771289004316e-06, "loss": 0.1624, "step": 836300 }, { "epoch": 4.7, "learning_rate": 3.001455898637976e-06, "loss": 0.1614, "step": 836400 }, { "epoch": 4.7, "learning_rate": 2.9958346683755208e-06, "loss": 0.1663, "step": 836500 }, { "epoch": 4.7, "learning_rate": 2.9902134381130658e-06, "loss": 0.1576, "step": 836600 }, { "epoch": 4.7, "learning_rate": 2.9845922078506104e-06, "loss": 0.1685, "step": 836700 }, { "epoch": 4.7, "learning_rate": 2.9789709775881554e-06, "loss": 0.16, "step": 836800 }, { "epoch": 4.7, "learning_rate": 2.9733497473257e-06, "loss": 0.1637, "step": 836900 }, { "epoch": 4.7, "learning_rate": 2.9677285170632445e-06, "loss": 0.1656, "step": 837000 }, { "epoch": 4.71, "learning_rate": 2.9621072868007895e-06, "loss": 0.1612, "step": 837100 }, { "epoch": 4.71, "learning_rate": 2.956486056538334e-06, "loss": 0.1593, "step": 837200 }, { "epoch": 4.71, "learning_rate": 2.950864826275879e-06, "loss": 0.1621, "step": 837300 }, { "epoch": 4.71, "learning_rate": 2.9452435960134237e-06, "loss": 0.1657, "step": 837400 }, { "epoch": 4.71, "learning_rate": 2.9396223657509683e-06, "loss": 0.1571, "step": 837500 }, { "epoch": 4.71, "learning_rate": 2.9340011354885133e-06, "loss": 0.16, "step": 837600 }, { "epoch": 4.71, "learning_rate": 2.928379905226058e-06, "loss": 0.1645, "step": 837700 }, { "epoch": 4.71, "learning_rate": 2.922758674963603e-06, "loss": 0.1699, "step": 837800 }, { "epoch": 4.71, "learning_rate": 2.9171374447011475e-06, "loss": 0.1649, "step": 837900 }, { "epoch": 4.71, "learning_rate": 2.911516214438692e-06, "loss": 0.1602, "step": 838000 }, { "epoch": 4.71, "learning_rate": 2.905894984176237e-06, "loss": 0.1662, "step": 838100 }, { "epoch": 4.71, "learning_rate": 2.9002737539137817e-06, "loss": 0.1608, "step": 838200 }, { "epoch": 4.71, "learning_rate": 2.8946525236513267e-06, "loss": 0.1647, "step": 838300 }, { "epoch": 4.71, "learning_rate": 2.8890312933888712e-06, "loss": 0.1635, "step": 838400 }, { "epoch": 4.71, "learning_rate": 2.883410063126416e-06, "loss": 0.1624, "step": 838500 }, { "epoch": 4.71, "learning_rate": 2.877845045166585e-06, "loss": 0.1662, "step": 838600 }, { "epoch": 4.71, "learning_rate": 2.87222381490413e-06, "loss": 0.1631, "step": 838700 }, { "epoch": 4.72, "learning_rate": 2.8666025846416747e-06, "loss": 0.1562, "step": 838800 }, { "epoch": 4.72, "learning_rate": 2.8609813543792193e-06, "loss": 0.1634, "step": 838900 }, { "epoch": 4.72, "learning_rate": 2.8553601241167643e-06, "loss": 0.1636, "step": 839000 }, { "epoch": 4.72, "learning_rate": 2.849738893854309e-06, "loss": 0.1605, "step": 839100 }, { "epoch": 4.72, "learning_rate": 2.844117663591854e-06, "loss": 0.1654, "step": 839200 }, { "epoch": 4.72, "learning_rate": 2.8384964333293985e-06, "loss": 0.1616, "step": 839300 }, { "epoch": 4.72, "learning_rate": 2.832875203066943e-06, "loss": 0.1637, "step": 839400 }, { "epoch": 4.72, "learning_rate": 2.827253972804488e-06, "loss": 0.1629, "step": 839500 }, { "epoch": 4.72, "learning_rate": 2.8216327425420327e-06, "loss": 0.1591, "step": 839600 }, { "epoch": 4.72, "learning_rate": 2.8160115122795777e-06, "loss": 0.1624, "step": 839700 }, { "epoch": 4.72, "learning_rate": 2.8103902820171223e-06, "loss": 0.1656, "step": 839800 }, { "epoch": 4.72, "learning_rate": 2.804769051754667e-06, "loss": 0.16, "step": 839900 }, { "epoch": 4.72, "learning_rate": 2.799147821492212e-06, "loss": 0.161, "step": 840000 }, { "epoch": 4.72, "learning_rate": 2.7935265912297564e-06, "loss": 0.1659, "step": 840100 }, { "epoch": 4.72, "learning_rate": 2.7879053609673014e-06, "loss": 0.166, "step": 840200 }, { "epoch": 4.72, "learning_rate": 2.782284130704846e-06, "loss": 0.1629, "step": 840300 }, { "epoch": 4.72, "learning_rate": 2.7766629004423906e-06, "loss": 0.1617, "step": 840400 }, { "epoch": 4.72, "learning_rate": 2.7710416701799356e-06, "loss": 0.163, "step": 840500 }, { "epoch": 4.73, "learning_rate": 2.76542043991748e-06, "loss": 0.1623, "step": 840600 }, { "epoch": 4.73, "learning_rate": 2.759799209655025e-06, "loss": 0.1605, "step": 840700 }, { "epoch": 4.73, "learning_rate": 2.75417797939257e-06, "loss": 0.161, "step": 840800 }, { "epoch": 4.73, "learning_rate": 2.7485567491301144e-06, "loss": 0.1617, "step": 840900 }, { "epoch": 4.73, "learning_rate": 2.7429355188676594e-06, "loss": 0.1611, "step": 841000 }, { "epoch": 4.73, "learning_rate": 2.737314288605204e-06, "loss": 0.1672, "step": 841100 }, { "epoch": 4.73, "learning_rate": 2.731693058342749e-06, "loss": 0.1566, "step": 841200 }, { "epoch": 4.73, "learning_rate": 2.7260718280802936e-06, "loss": 0.1584, "step": 841300 }, { "epoch": 4.73, "learning_rate": 2.7204505978178386e-06, "loss": 0.1663, "step": 841400 }, { "epoch": 4.73, "learning_rate": 2.714829367555383e-06, "loss": 0.1631, "step": 841500 }, { "epoch": 4.73, "learning_rate": 2.709208137292928e-06, "loss": 0.164, "step": 841600 }, { "epoch": 4.73, "learning_rate": 2.7035869070304727e-06, "loss": 0.1613, "step": 841700 }, { "epoch": 4.73, "learning_rate": 2.6979656767680177e-06, "loss": 0.1639, "step": 841800 }, { "epoch": 4.73, "learning_rate": 2.6923444465055623e-06, "loss": 0.1596, "step": 841900 }, { "epoch": 4.73, "learning_rate": 2.6867232162431073e-06, "loss": 0.1605, "step": 842000 }, { "epoch": 4.73, "learning_rate": 2.681101985980652e-06, "loss": 0.1643, "step": 842100 }, { "epoch": 4.73, "learning_rate": 2.6754807557181965e-06, "loss": 0.1668, "step": 842200 }, { "epoch": 4.73, "learning_rate": 2.6698595254557415e-06, "loss": 0.1619, "step": 842300 }, { "epoch": 4.74, "learning_rate": 2.664238295193286e-06, "loss": 0.1653, "step": 842400 }, { "epoch": 4.74, "learning_rate": 2.658617064930831e-06, "loss": 0.161, "step": 842500 }, { "epoch": 4.74, "learning_rate": 2.6529958346683757e-06, "loss": 0.1678, "step": 842600 }, { "epoch": 4.74, "learning_rate": 2.6473746044059207e-06, "loss": 0.1607, "step": 842700 }, { "epoch": 4.74, "learning_rate": 2.6417533741434653e-06, "loss": 0.1628, "step": 842800 }, { "epoch": 4.74, "learning_rate": 2.6361321438810103e-06, "loss": 0.1678, "step": 842900 }, { "epoch": 4.74, "learning_rate": 2.630510913618555e-06, "loss": 0.1656, "step": 843000 }, { "epoch": 4.74, "learning_rate": 2.6248896833560994e-06, "loss": 0.168, "step": 843100 }, { "epoch": 4.74, "learning_rate": 2.6192684530936444e-06, "loss": 0.1625, "step": 843200 }, { "epoch": 4.74, "learning_rate": 2.613647222831189e-06, "loss": 0.1636, "step": 843300 }, { "epoch": 4.74, "learning_rate": 2.6080822048713583e-06, "loss": 0.1584, "step": 843400 }, { "epoch": 4.74, "learning_rate": 2.602460974608903e-06, "loss": 0.168, "step": 843500 }, { "epoch": 4.74, "learning_rate": 2.596839744346448e-06, "loss": 0.1606, "step": 843600 }, { "epoch": 4.74, "learning_rate": 2.5912185140839925e-06, "loss": 0.1657, "step": 843700 }, { "epoch": 4.74, "learning_rate": 2.5855972838215375e-06, "loss": 0.1603, "step": 843800 }, { "epoch": 4.74, "learning_rate": 2.579976053559082e-06, "loss": 0.1599, "step": 843900 }, { "epoch": 4.74, "learning_rate": 2.5743548232966267e-06, "loss": 0.1629, "step": 844000 }, { "epoch": 4.74, "learning_rate": 2.5687335930341717e-06, "loss": 0.1592, "step": 844100 }, { "epoch": 4.75, "learning_rate": 2.5631123627717163e-06, "loss": 0.1635, "step": 844200 }, { "epoch": 4.75, "learning_rate": 2.5574911325092613e-06, "loss": 0.163, "step": 844300 }, { "epoch": 4.75, "learning_rate": 2.551869902246806e-06, "loss": 0.1612, "step": 844400 }, { "epoch": 4.75, "learning_rate": 2.5462486719843505e-06, "loss": 0.1613, "step": 844500 }, { "epoch": 4.75, "learning_rate": 2.5406274417218955e-06, "loss": 0.1641, "step": 844600 }, { "epoch": 4.75, "learning_rate": 2.53500621145944e-06, "loss": 0.1645, "step": 844700 }, { "epoch": 4.75, "learning_rate": 2.529384981196985e-06, "loss": 0.1637, "step": 844800 }, { "epoch": 4.75, "learning_rate": 2.5237637509345296e-06, "loss": 0.1604, "step": 844900 }, { "epoch": 4.75, "learning_rate": 2.5181425206720742e-06, "loss": 0.165, "step": 845000 }, { "epoch": 4.75, "learning_rate": 2.5125212904096192e-06, "loss": 0.1637, "step": 845100 }, { "epoch": 4.75, "learning_rate": 2.506900060147164e-06, "loss": 0.1599, "step": 845200 }, { "epoch": 4.75, "learning_rate": 2.501278829884709e-06, "loss": 0.165, "step": 845300 }, { "epoch": 4.75, "learning_rate": 2.4956575996222534e-06, "loss": 0.1555, "step": 845400 }, { "epoch": 4.75, "learning_rate": 2.490036369359798e-06, "loss": 0.1629, "step": 845500 }, { "epoch": 4.75, "learning_rate": 2.484415139097343e-06, "loss": 0.1655, "step": 845600 }, { "epoch": 4.75, "learning_rate": 2.4787939088348876e-06, "loss": 0.1576, "step": 845700 }, { "epoch": 4.75, "learning_rate": 2.4731726785724326e-06, "loss": 0.1614, "step": 845800 }, { "epoch": 4.75, "learning_rate": 2.467551448309977e-06, "loss": 0.1578, "step": 845900 }, { "epoch": 4.76, "learning_rate": 2.4619302180475217e-06, "loss": 0.1638, "step": 846000 }, { "epoch": 4.76, "learning_rate": 2.4563089877850668e-06, "loss": 0.1628, "step": 846100 }, { "epoch": 4.76, "learning_rate": 2.4506877575226113e-06, "loss": 0.1559, "step": 846200 }, { "epoch": 4.76, "learning_rate": 2.4451227395627807e-06, "loss": 0.1603, "step": 846300 }, { "epoch": 4.76, "learning_rate": 2.4395015093003257e-06, "loss": 0.1632, "step": 846400 }, { "epoch": 4.76, "learning_rate": 2.4338802790378702e-06, "loss": 0.1561, "step": 846500 }, { "epoch": 4.76, "learning_rate": 2.4282590487754153e-06, "loss": 0.1648, "step": 846600 }, { "epoch": 4.76, "learning_rate": 2.42263781851296e-06, "loss": 0.1612, "step": 846700 }, { "epoch": 4.76, "learning_rate": 2.4170165882505044e-06, "loss": 0.1644, "step": 846800 }, { "epoch": 4.76, "learning_rate": 2.4113953579880494e-06, "loss": 0.1599, "step": 846900 }, { "epoch": 4.76, "learning_rate": 2.405774127725594e-06, "loss": 0.1595, "step": 847000 }, { "epoch": 4.76, "learning_rate": 2.400152897463139e-06, "loss": 0.1594, "step": 847100 }, { "epoch": 4.76, "learning_rate": 2.3945316672006836e-06, "loss": 0.1624, "step": 847200 }, { "epoch": 4.76, "learning_rate": 2.3889104369382286e-06, "loss": 0.1574, "step": 847300 }, { "epoch": 4.76, "learning_rate": 2.383289206675773e-06, "loss": 0.1571, "step": 847400 }, { "epoch": 4.76, "learning_rate": 2.377667976413318e-06, "loss": 0.16, "step": 847500 }, { "epoch": 4.76, "learning_rate": 2.3720467461508628e-06, "loss": 0.1604, "step": 847600 }, { "epoch": 4.77, "learning_rate": 2.3664255158884078e-06, "loss": 0.1673, "step": 847700 }, { "epoch": 4.77, "learning_rate": 2.3608042856259524e-06, "loss": 0.1603, "step": 847800 }, { "epoch": 4.77, "learning_rate": 2.355183055363497e-06, "loss": 0.1577, "step": 847900 }, { "epoch": 4.77, "learning_rate": 2.349561825101042e-06, "loss": 0.1643, "step": 848000 }, { "epoch": 4.77, "learning_rate": 2.3439405948385865e-06, "loss": 0.1639, "step": 848100 }, { "epoch": 4.77, "learning_rate": 2.3383193645761315e-06, "loss": 0.1668, "step": 848200 }, { "epoch": 4.77, "learning_rate": 2.332698134313676e-06, "loss": 0.1638, "step": 848300 }, { "epoch": 4.77, "learning_rate": 2.3271331163538455e-06, "loss": 0.1617, "step": 848400 }, { "epoch": 4.77, "learning_rate": 2.32151188609139e-06, "loss": 0.1673, "step": 848500 }, { "epoch": 4.77, "learning_rate": 2.315890655828935e-06, "loss": 0.1604, "step": 848600 }, { "epoch": 4.77, "learning_rate": 2.3102694255664796e-06, "loss": 0.1661, "step": 848700 }, { "epoch": 4.77, "learning_rate": 2.304648195304024e-06, "loss": 0.1643, "step": 848800 }, { "epoch": 4.77, "learning_rate": 2.2990269650415692e-06, "loss": 0.1622, "step": 848900 }, { "epoch": 4.77, "learning_rate": 2.293461947081738e-06, "loss": 0.1601, "step": 849000 }, { "epoch": 4.77, "learning_rate": 2.287840716819283e-06, "loss": 0.1664, "step": 849100 }, { "epoch": 4.77, "learning_rate": 2.2822194865568277e-06, "loss": 0.1632, "step": 849200 }, { "epoch": 4.77, "learning_rate": 2.2765982562943727e-06, "loss": 0.1629, "step": 849300 }, { "epoch": 4.77, "learning_rate": 2.2709770260319173e-06, "loss": 0.1623, "step": 849400 }, { "epoch": 4.78, "learning_rate": 2.2653557957694623e-06, "loss": 0.1569, "step": 849500 }, { "epoch": 4.78, "learning_rate": 2.259734565507007e-06, "loss": 0.1615, "step": 849600 }, { "epoch": 4.78, "learning_rate": 2.2541133352445515e-06, "loss": 0.1634, "step": 849700 }, { "epoch": 4.78, "learning_rate": 2.2484921049820965e-06, "loss": 0.1633, "step": 849800 }, { "epoch": 4.78, "learning_rate": 2.242870874719641e-06, "loss": 0.1638, "step": 849900 }, { "epoch": 4.78, "learning_rate": 2.237249644457186e-06, "loss": 0.1585, "step": 850000 }, { "epoch": 4.78, "learning_rate": 2.2316284141947306e-06, "loss": 0.1597, "step": 850100 }, { "epoch": 4.78, "learning_rate": 2.2260071839322752e-06, "loss": 0.1593, "step": 850200 }, { "epoch": 4.78, "learning_rate": 2.2203859536698202e-06, "loss": 0.1597, "step": 850300 }, { "epoch": 4.78, "learning_rate": 2.214764723407365e-06, "loss": 0.1664, "step": 850400 }, { "epoch": 4.78, "learning_rate": 2.20914349314491e-06, "loss": 0.1624, "step": 850500 }, { "epoch": 4.78, "learning_rate": 2.2035222628824544e-06, "loss": 0.1654, "step": 850600 }, { "epoch": 4.78, "learning_rate": 2.197901032619999e-06, "loss": 0.164, "step": 850700 }, { "epoch": 4.78, "learning_rate": 2.192279802357544e-06, "loss": 0.1648, "step": 850800 }, { "epoch": 4.78, "learning_rate": 2.1866585720950886e-06, "loss": 0.161, "step": 850900 }, { "epoch": 4.78, "learning_rate": 2.1810373418326336e-06, "loss": 0.1579, "step": 851000 }, { "epoch": 4.78, "learning_rate": 2.175416111570178e-06, "loss": 0.1688, "step": 851100 }, { "epoch": 4.78, "learning_rate": 2.169794881307723e-06, "loss": 0.1607, "step": 851200 }, { "epoch": 4.79, "learning_rate": 2.1642298633478925e-06, "loss": 0.1697, "step": 851300 }, { "epoch": 4.79, "learning_rate": 2.158608633085437e-06, "loss": 0.1632, "step": 851400 }, { "epoch": 4.79, "learning_rate": 2.152987402822982e-06, "loss": 0.1618, "step": 851500 }, { "epoch": 4.79, "learning_rate": 2.1473661725605267e-06, "loss": 0.1615, "step": 851600 }, { "epoch": 4.79, "learning_rate": 2.1417449422980717e-06, "loss": 0.1596, "step": 851700 }, { "epoch": 4.79, "learning_rate": 2.1361237120356163e-06, "loss": 0.1632, "step": 851800 }, { "epoch": 4.79, "learning_rate": 2.1305024817731613e-06, "loss": 0.1636, "step": 851900 }, { "epoch": 4.79, "learning_rate": 2.124881251510706e-06, "loss": 0.1664, "step": 852000 }, { "epoch": 4.79, "learning_rate": 2.1192600212482504e-06, "loss": 0.1667, "step": 852100 }, { "epoch": 4.79, "learning_rate": 2.1136387909857954e-06, "loss": 0.1565, "step": 852200 }, { "epoch": 4.79, "learning_rate": 2.10801756072334e-06, "loss": 0.1589, "step": 852300 }, { "epoch": 4.79, "learning_rate": 2.102396330460885e-06, "loss": 0.1608, "step": 852400 }, { "epoch": 4.79, "learning_rate": 2.0967751001984296e-06, "loss": 0.1595, "step": 852500 }, { "epoch": 4.79, "learning_rate": 2.091153869935974e-06, "loss": 0.1646, "step": 852600 }, { "epoch": 4.79, "learning_rate": 2.085532639673519e-06, "loss": 0.1594, "step": 852700 }, { "epoch": 4.79, "learning_rate": 2.079911409411064e-06, "loss": 0.1636, "step": 852800 }, { "epoch": 4.79, "learning_rate": 2.074290179148609e-06, "loss": 0.1627, "step": 852900 }, { "epoch": 4.79, "learning_rate": 2.0686689488861534e-06, "loss": 0.1555, "step": 853000 }, { "epoch": 4.8, "learning_rate": 2.063047718623698e-06, "loss": 0.1631, "step": 853100 }, { "epoch": 4.8, "learning_rate": 2.057426488361243e-06, "loss": 0.1633, "step": 853200 }, { "epoch": 4.8, "learning_rate": 2.0518052580987876e-06, "loss": 0.1651, "step": 853300 }, { "epoch": 4.8, "learning_rate": 2.0461840278363326e-06, "loss": 0.161, "step": 853400 }, { "epoch": 4.8, "learning_rate": 2.040562797573877e-06, "loss": 0.1645, "step": 853500 }, { "epoch": 4.8, "learning_rate": 2.0349415673114217e-06, "loss": 0.1614, "step": 853600 }, { "epoch": 4.8, "learning_rate": 2.0293203370489667e-06, "loss": 0.1547, "step": 853700 }, { "epoch": 4.8, "learning_rate": 2.0236991067865113e-06, "loss": 0.1594, "step": 853800 }, { "epoch": 4.8, "learning_rate": 2.0180778765240563e-06, "loss": 0.1615, "step": 853900 }, { "epoch": 4.8, "learning_rate": 2.012456646261601e-06, "loss": 0.157, "step": 854000 }, { "epoch": 4.8, "learning_rate": 2.0068354159991455e-06, "loss": 0.161, "step": 854100 }, { "epoch": 4.8, "learning_rate": 2.0012141857366905e-06, "loss": 0.1635, "step": 854200 }, { "epoch": 4.8, "learning_rate": 1.995592955474235e-06, "loss": 0.1609, "step": 854300 }, { "epoch": 4.8, "learning_rate": 1.98997172521178e-06, "loss": 0.1592, "step": 854400 }, { "epoch": 4.8, "learning_rate": 1.9843504949493247e-06, "loss": 0.1616, "step": 854500 }, { "epoch": 4.8, "learning_rate": 1.9787292646868692e-06, "loss": 0.1607, "step": 854600 }, { "epoch": 4.8, "learning_rate": 1.9731080344244143e-06, "loss": 0.1607, "step": 854700 }, { "epoch": 4.81, "learning_rate": 1.967486804161959e-06, "loss": 0.1641, "step": 854800 }, { "epoch": 4.81, "learning_rate": 1.961865573899504e-06, "loss": 0.1635, "step": 854900 }, { "epoch": 4.81, "learning_rate": 1.9562443436370484e-06, "loss": 0.1608, "step": 855000 }, { "epoch": 4.81, "learning_rate": 1.9506231133745934e-06, "loss": 0.1634, "step": 855100 }, { "epoch": 4.81, "learning_rate": 1.945001883112138e-06, "loss": 0.1632, "step": 855200 }, { "epoch": 4.81, "learning_rate": 1.9393806528496826e-06, "loss": 0.1592, "step": 855300 }, { "epoch": 4.81, "learning_rate": 1.9337594225872276e-06, "loss": 0.1648, "step": 855400 }, { "epoch": 4.81, "learning_rate": 1.928138192324772e-06, "loss": 0.1608, "step": 855500 }, { "epoch": 4.81, "learning_rate": 1.9225731743649415e-06, "loss": 0.1626, "step": 855600 }, { "epoch": 4.81, "learning_rate": 1.916951944102486e-06, "loss": 0.1638, "step": 855700 }, { "epoch": 4.81, "learning_rate": 1.911330713840031e-06, "loss": 0.1648, "step": 855800 }, { "epoch": 4.81, "learning_rate": 1.905709483577576e-06, "loss": 0.1589, "step": 855900 }, { "epoch": 4.81, "learning_rate": 1.9000882533151205e-06, "loss": 0.167, "step": 856000 }, { "epoch": 4.81, "learning_rate": 1.8944670230526655e-06, "loss": 0.1613, "step": 856100 }, { "epoch": 4.81, "learning_rate": 1.88884579279021e-06, "loss": 0.1591, "step": 856200 }, { "epoch": 4.81, "learning_rate": 1.883224562527755e-06, "loss": 0.1584, "step": 856300 }, { "epoch": 4.81, "learning_rate": 1.8776033322652997e-06, "loss": 0.1582, "step": 856400 }, { "epoch": 4.81, "learning_rate": 1.8719821020028442e-06, "loss": 0.1565, "step": 856500 }, { "epoch": 4.82, "learning_rate": 1.8663608717403893e-06, "loss": 0.1613, "step": 856600 }, { "epoch": 4.82, "learning_rate": 1.8607396414779338e-06, "loss": 0.1671, "step": 856700 }, { "epoch": 4.82, "learning_rate": 1.8551184112154788e-06, "loss": 0.1647, "step": 856800 }, { "epoch": 4.82, "learning_rate": 1.8494971809530234e-06, "loss": 0.1633, "step": 856900 }, { "epoch": 4.82, "learning_rate": 1.8438759506905684e-06, "loss": 0.1597, "step": 857000 }, { "epoch": 4.82, "learning_rate": 1.838254720428113e-06, "loss": 0.1595, "step": 857100 }, { "epoch": 4.82, "learning_rate": 1.8326334901656576e-06, "loss": 0.1561, "step": 857200 }, { "epoch": 4.82, "learning_rate": 1.8270122599032026e-06, "loss": 0.1633, "step": 857300 }, { "epoch": 4.82, "learning_rate": 1.8213910296407472e-06, "loss": 0.1589, "step": 857400 }, { "epoch": 4.82, "learning_rate": 1.8157697993782922e-06, "loss": 0.1573, "step": 857500 }, { "epoch": 4.82, "learning_rate": 1.8101485691158368e-06, "loss": 0.1633, "step": 857600 }, { "epoch": 4.82, "learning_rate": 1.8045273388533814e-06, "loss": 0.1671, "step": 857700 }, { "epoch": 4.82, "learning_rate": 1.7989061085909264e-06, "loss": 0.1621, "step": 857800 }, { "epoch": 4.82, "learning_rate": 1.793284878328471e-06, "loss": 0.1671, "step": 857900 }, { "epoch": 4.82, "learning_rate": 1.787663648066016e-06, "loss": 0.1605, "step": 858000 }, { "epoch": 4.82, "learning_rate": 1.782098630106185e-06, "loss": 0.1701, "step": 858100 }, { "epoch": 4.82, "learning_rate": 1.77647739984373e-06, "loss": 0.163, "step": 858200 }, { "epoch": 4.82, "learning_rate": 1.7708561695812747e-06, "loss": 0.158, "step": 858300 }, { "epoch": 4.83, "learning_rate": 1.7652349393188192e-06, "loss": 0.1589, "step": 858400 }, { "epoch": 4.83, "learning_rate": 1.7596137090563642e-06, "loss": 0.1644, "step": 858500 }, { "epoch": 4.83, "learning_rate": 1.7539924787939088e-06, "loss": 0.1575, "step": 858600 }, { "epoch": 4.83, "learning_rate": 1.7483712485314538e-06, "loss": 0.1624, "step": 858700 }, { "epoch": 4.83, "learning_rate": 1.7427500182689984e-06, "loss": 0.1611, "step": 858800 }, { "epoch": 4.83, "learning_rate": 1.737128788006543e-06, "loss": 0.1591, "step": 858900 }, { "epoch": 4.83, "learning_rate": 1.731507557744088e-06, "loss": 0.1672, "step": 859000 }, { "epoch": 4.83, "learning_rate": 1.7258863274816326e-06, "loss": 0.1631, "step": 859100 }, { "epoch": 4.83, "learning_rate": 1.7202650972191776e-06, "loss": 0.1613, "step": 859200 }, { "epoch": 4.83, "learning_rate": 1.7146438669567222e-06, "loss": 0.1639, "step": 859300 }, { "epoch": 4.83, "learning_rate": 1.7090226366942672e-06, "loss": 0.1669, "step": 859400 }, { "epoch": 4.83, "learning_rate": 1.7034014064318118e-06, "loss": 0.162, "step": 859500 }, { "epoch": 4.83, "learning_rate": 1.6977801761693564e-06, "loss": 0.1617, "step": 859600 }, { "epoch": 4.83, "learning_rate": 1.6921589459069014e-06, "loss": 0.1624, "step": 859700 }, { "epoch": 4.83, "learning_rate": 1.686537715644446e-06, "loss": 0.1632, "step": 859800 }, { "epoch": 4.83, "learning_rate": 1.680916485381991e-06, "loss": 0.159, "step": 859900 }, { "epoch": 4.83, "learning_rate": 1.6752952551195355e-06, "loss": 0.1566, "step": 860000 }, { "epoch": 4.83, "learning_rate": 1.6696740248570801e-06, "loss": 0.1608, "step": 860100 }, { "epoch": 4.84, "learning_rate": 1.6640527945946251e-06, "loss": 0.1634, "step": 860200 }, { "epoch": 4.84, "learning_rate": 1.6584877766347942e-06, "loss": 0.1597, "step": 860300 }, { "epoch": 4.84, "learning_rate": 1.6528665463723392e-06, "loss": 0.166, "step": 860400 }, { "epoch": 4.84, "learning_rate": 1.6472453161098838e-06, "loss": 0.1583, "step": 860500 }, { "epoch": 4.84, "learning_rate": 1.6416240858474288e-06, "loss": 0.1613, "step": 860600 }, { "epoch": 4.84, "learning_rate": 1.6360028555849734e-06, "loss": 0.1637, "step": 860700 }, { "epoch": 4.84, "learning_rate": 1.630381625322518e-06, "loss": 0.1565, "step": 860800 }, { "epoch": 4.84, "learning_rate": 1.624760395060063e-06, "loss": 0.1603, "step": 860900 }, { "epoch": 4.84, "learning_rate": 1.6191391647976076e-06, "loss": 0.1632, "step": 861000 }, { "epoch": 4.84, "learning_rate": 1.6135179345351526e-06, "loss": 0.1634, "step": 861100 }, { "epoch": 4.84, "learning_rate": 1.6078967042726972e-06, "loss": 0.1599, "step": 861200 }, { "epoch": 4.84, "learning_rate": 1.6022754740102422e-06, "loss": 0.1569, "step": 861300 }, { "epoch": 4.84, "learning_rate": 1.5966542437477868e-06, "loss": 0.166, "step": 861400 }, { "epoch": 4.84, "learning_rate": 1.5910330134853313e-06, "loss": 0.1545, "step": 861500 }, { "epoch": 4.84, "learning_rate": 1.5854117832228764e-06, "loss": 0.1615, "step": 861600 }, { "epoch": 4.84, "learning_rate": 1.579790552960421e-06, "loss": 0.1627, "step": 861700 }, { "epoch": 4.84, "learning_rate": 1.574169322697966e-06, "loss": 0.1643, "step": 861800 }, { "epoch": 4.84, "learning_rate": 1.5685480924355105e-06, "loss": 0.1575, "step": 861900 }, { "epoch": 4.85, "learning_rate": 1.5629268621730551e-06, "loss": 0.1602, "step": 862000 }, { "epoch": 4.85, "learning_rate": 1.5573056319106001e-06, "loss": 0.1603, "step": 862100 }, { "epoch": 4.85, "learning_rate": 1.5516844016481447e-06, "loss": 0.1659, "step": 862200 }, { "epoch": 4.85, "learning_rate": 1.5460631713856895e-06, "loss": 0.1626, "step": 862300 }, { "epoch": 4.85, "learning_rate": 1.5404419411232343e-06, "loss": 0.1669, "step": 862400 }, { "epoch": 4.85, "learning_rate": 1.534820710860779e-06, "loss": 0.1673, "step": 862500 }, { "epoch": 4.85, "learning_rate": 1.5291994805983239e-06, "loss": 0.1542, "step": 862600 }, { "epoch": 4.85, "learning_rate": 1.5235782503358685e-06, "loss": 0.1604, "step": 862700 }, { "epoch": 4.85, "learning_rate": 1.5179570200734133e-06, "loss": 0.1607, "step": 862800 }, { "epoch": 4.85, "learning_rate": 1.512335789810958e-06, "loss": 0.1636, "step": 862900 }, { "epoch": 4.85, "learning_rate": 1.5067145595485028e-06, "loss": 0.1612, "step": 863000 }, { "epoch": 4.85, "learning_rate": 1.5010933292860476e-06, "loss": 0.1598, "step": 863100 }, { "epoch": 4.85, "learning_rate": 1.4954720990235922e-06, "loss": 0.1629, "step": 863200 }, { "epoch": 4.85, "learning_rate": 1.489850868761137e-06, "loss": 0.1583, "step": 863300 }, { "epoch": 4.85, "learning_rate": 1.4842296384986818e-06, "loss": 0.1634, "step": 863400 }, { "epoch": 4.85, "learning_rate": 1.4786084082362266e-06, "loss": 0.1616, "step": 863500 }, { "epoch": 4.85, "learning_rate": 1.4729871779737714e-06, "loss": 0.1609, "step": 863600 }, { "epoch": 4.86, "learning_rate": 1.4673659477113162e-06, "loss": 0.1603, "step": 863700 }, { "epoch": 4.86, "learning_rate": 1.4617447174488608e-06, "loss": 0.1634, "step": 863800 }, { "epoch": 4.86, "learning_rate": 1.4561234871864056e-06, "loss": 0.1661, "step": 863900 }, { "epoch": 4.86, "learning_rate": 1.4505022569239504e-06, "loss": 0.1634, "step": 864000 }, { "epoch": 4.86, "learning_rate": 1.4448810266614952e-06, "loss": 0.1606, "step": 864100 }, { "epoch": 4.86, "learning_rate": 1.43925979639904e-06, "loss": 0.1562, "step": 864200 }, { "epoch": 4.86, "learning_rate": 1.4336947784392093e-06, "loss": 0.1647, "step": 864300 }, { "epoch": 4.86, "learning_rate": 1.428073548176754e-06, "loss": 0.1659, "step": 864400 }, { "epoch": 4.86, "learning_rate": 1.4224523179142989e-06, "loss": 0.1648, "step": 864500 }, { "epoch": 4.86, "learning_rate": 1.4168310876518435e-06, "loss": 0.1594, "step": 864600 }, { "epoch": 4.86, "learning_rate": 1.4112098573893883e-06, "loss": 0.1561, "step": 864700 }, { "epoch": 4.86, "learning_rate": 1.4056448394295578e-06, "loss": 0.1555, "step": 864800 }, { "epoch": 4.86, "learning_rate": 1.4000236091671024e-06, "loss": 0.1579, "step": 864900 }, { "epoch": 4.86, "learning_rate": 1.3944023789046472e-06, "loss": 0.158, "step": 865000 }, { "epoch": 4.86, "learning_rate": 1.388781148642192e-06, "loss": 0.1677, "step": 865100 }, { "epoch": 4.86, "learning_rate": 1.3831599183797368e-06, "loss": 0.1569, "step": 865200 }, { "epoch": 4.86, "learning_rate": 1.3775386881172815e-06, "loss": 0.162, "step": 865300 }, { "epoch": 4.86, "learning_rate": 1.3719174578548261e-06, "loss": 0.1667, "step": 865400 }, { "epoch": 4.87, "learning_rate": 1.366296227592371e-06, "loss": 0.1636, "step": 865500 }, { "epoch": 4.87, "learning_rate": 1.3606749973299157e-06, "loss": 0.1596, "step": 865600 }, { "epoch": 4.87, "learning_rate": 1.3550537670674605e-06, "loss": 0.161, "step": 865700 }, { "epoch": 4.87, "learning_rate": 1.3494325368050053e-06, "loss": 0.1605, "step": 865800 }, { "epoch": 4.87, "learning_rate": 1.3438113065425501e-06, "loss": 0.1591, "step": 865900 }, { "epoch": 4.87, "learning_rate": 1.3381900762800947e-06, "loss": 0.1623, "step": 866000 }, { "epoch": 4.87, "learning_rate": 1.3325688460176395e-06, "loss": 0.1594, "step": 866100 }, { "epoch": 4.87, "learning_rate": 1.3269476157551843e-06, "loss": 0.1648, "step": 866200 }, { "epoch": 4.87, "learning_rate": 1.321326385492729e-06, "loss": 0.1617, "step": 866300 }, { "epoch": 4.87, "learning_rate": 1.3157051552302739e-06, "loss": 0.157, "step": 866400 }, { "epoch": 4.87, "learning_rate": 1.3100839249678185e-06, "loss": 0.1551, "step": 866500 }, { "epoch": 4.87, "learning_rate": 1.3044626947053632e-06, "loss": 0.1653, "step": 866600 }, { "epoch": 4.87, "learning_rate": 1.298841464442908e-06, "loss": 0.1583, "step": 866700 }, { "epoch": 4.87, "learning_rate": 1.2932202341804528e-06, "loss": 0.1606, "step": 866800 }, { "epoch": 4.87, "learning_rate": 1.2875990039179976e-06, "loss": 0.1643, "step": 866900 }, { "epoch": 4.87, "learning_rate": 1.2819777736555422e-06, "loss": 0.1641, "step": 867000 }, { "epoch": 4.87, "learning_rate": 1.276356543393087e-06, "loss": 0.1609, "step": 867100 }, { "epoch": 4.87, "learning_rate": 1.2707353131306318e-06, "loss": 0.1688, "step": 867200 }, { "epoch": 4.88, "learning_rate": 1.2651140828681766e-06, "loss": 0.1656, "step": 867300 }, { "epoch": 4.88, "learning_rate": 1.2594928526057214e-06, "loss": 0.1641, "step": 867400 }, { "epoch": 4.88, "learning_rate": 1.2539278346458907e-06, "loss": 0.1634, "step": 867500 }, { "epoch": 4.88, "learning_rate": 1.2483066043834355e-06, "loss": 0.1671, "step": 867600 }, { "epoch": 4.88, "learning_rate": 1.2426853741209803e-06, "loss": 0.1607, "step": 867700 }, { "epoch": 4.88, "learning_rate": 1.2370641438585249e-06, "loss": 0.1632, "step": 867800 }, { "epoch": 4.88, "learning_rate": 1.2314429135960697e-06, "loss": 0.1611, "step": 867900 }, { "epoch": 4.88, "learning_rate": 1.2258216833336145e-06, "loss": 0.1624, "step": 868000 }, { "epoch": 4.88, "learning_rate": 1.2202004530711593e-06, "loss": 0.1614, "step": 868100 }, { "epoch": 4.88, "learning_rate": 1.214579222808704e-06, "loss": 0.1629, "step": 868200 }, { "epoch": 4.88, "learning_rate": 1.2089579925462489e-06, "loss": 0.1622, "step": 868300 }, { "epoch": 4.88, "learning_rate": 1.2033367622837934e-06, "loss": 0.1642, "step": 868400 }, { "epoch": 4.88, "learning_rate": 1.1977155320213382e-06, "loss": 0.1573, "step": 868500 }, { "epoch": 4.88, "learning_rate": 1.192094301758883e-06, "loss": 0.1649, "step": 868600 }, { "epoch": 4.88, "learning_rate": 1.1864730714964278e-06, "loss": 0.1595, "step": 868700 }, { "epoch": 4.88, "learning_rate": 1.1808518412339726e-06, "loss": 0.1615, "step": 868800 }, { "epoch": 4.88, "learning_rate": 1.1752306109715172e-06, "loss": 0.16, "step": 868900 }, { "epoch": 4.88, "learning_rate": 1.169609380709062e-06, "loss": 0.1614, "step": 869000 }, { "epoch": 4.89, "learning_rate": 1.1639881504466068e-06, "loss": 0.164, "step": 869100 }, { "epoch": 4.89, "learning_rate": 1.1583669201841516e-06, "loss": 0.1645, "step": 869200 }, { "epoch": 4.89, "learning_rate": 1.1527456899216964e-06, "loss": 0.1647, "step": 869300 }, { "epoch": 4.89, "learning_rate": 1.147124459659241e-06, "loss": 0.1577, "step": 869400 }, { "epoch": 4.89, "learning_rate": 1.1415032293967858e-06, "loss": 0.159, "step": 869500 }, { "epoch": 4.89, "learning_rate": 1.1358819991343306e-06, "loss": 0.1631, "step": 869600 }, { "epoch": 4.89, "learning_rate": 1.1302607688718754e-06, "loss": 0.153, "step": 869700 }, { "epoch": 4.89, "learning_rate": 1.1246395386094202e-06, "loss": 0.1625, "step": 869800 }, { "epoch": 4.89, "learning_rate": 1.119018308346965e-06, "loss": 0.1645, "step": 869900 }, { "epoch": 4.89, "learning_rate": 1.1133970780845095e-06, "loss": 0.1602, "step": 870000 }, { "epoch": 4.89, "learning_rate": 1.1077758478220543e-06, "loss": 0.1613, "step": 870100 }, { "epoch": 4.89, "learning_rate": 1.1021546175595991e-06, "loss": 0.1611, "step": 870200 }, { "epoch": 4.89, "learning_rate": 1.096533387297144e-06, "loss": 0.1664, "step": 870300 }, { "epoch": 4.89, "learning_rate": 1.0909121570346887e-06, "loss": 0.1614, "step": 870400 }, { "epoch": 4.89, "learning_rate": 1.0852909267722333e-06, "loss": 0.1612, "step": 870500 }, { "epoch": 4.89, "learning_rate": 1.079669696509778e-06, "loss": 0.1558, "step": 870600 }, { "epoch": 4.89, "learning_rate": 1.0740484662473229e-06, "loss": 0.166, "step": 870700 }, { "epoch": 4.89, "learning_rate": 1.0684272359848677e-06, "loss": 0.1566, "step": 870800 }, { "epoch": 4.9, "learning_rate": 1.062862218025037e-06, "loss": 0.1582, "step": 870900 }, { "epoch": 4.9, "learning_rate": 1.0572409877625818e-06, "loss": 0.1643, "step": 871000 }, { "epoch": 4.9, "learning_rate": 1.0516197575001266e-06, "loss": 0.161, "step": 871100 }, { "epoch": 4.9, "learning_rate": 1.0459985272376714e-06, "loss": 0.157, "step": 871200 }, { "epoch": 4.9, "learning_rate": 1.040377296975216e-06, "loss": 0.1593, "step": 871300 }, { "epoch": 4.9, "learning_rate": 1.0347560667127608e-06, "loss": 0.1644, "step": 871400 }, { "epoch": 4.9, "learning_rate": 1.0291348364503056e-06, "loss": 0.1684, "step": 871500 }, { "epoch": 4.9, "learning_rate": 1.0235698184904749e-06, "loss": 0.159, "step": 871600 }, { "epoch": 4.9, "learning_rate": 1.0179485882280197e-06, "loss": 0.1617, "step": 871700 }, { "epoch": 4.9, "learning_rate": 1.0123273579655645e-06, "loss": 0.1617, "step": 871800 }, { "epoch": 4.9, "learning_rate": 1.0067061277031093e-06, "loss": 0.1632, "step": 871900 }, { "epoch": 4.9, "learning_rate": 1.001084897440654e-06, "loss": 0.1639, "step": 872000 }, { "epoch": 4.9, "learning_rate": 9.954636671781989e-07, "loss": 0.1659, "step": 872100 }, { "epoch": 4.9, "learning_rate": 9.898424369157434e-07, "loss": 0.1605, "step": 872200 }, { "epoch": 4.9, "learning_rate": 9.842212066532882e-07, "loss": 0.1559, "step": 872300 }, { "epoch": 4.9, "learning_rate": 9.78599976390833e-07, "loss": 0.1638, "step": 872400 }, { "epoch": 4.9, "learning_rate": 9.729787461283778e-07, "loss": 0.1598, "step": 872500 }, { "epoch": 4.91, "learning_rate": 9.673575158659226e-07, "loss": 0.1592, "step": 872600 }, { "epoch": 4.91, "learning_rate": 9.617362856034672e-07, "loss": 0.1575, "step": 872700 }, { "epoch": 4.91, "learning_rate": 9.56115055341012e-07, "loss": 0.1622, "step": 872800 }, { "epoch": 4.91, "learning_rate": 9.504938250785567e-07, "loss": 0.1598, "step": 872900 }, { "epoch": 4.91, "learning_rate": 9.448725948161015e-07, "loss": 0.1584, "step": 873000 }, { "epoch": 4.91, "learning_rate": 9.392513645536463e-07, "loss": 0.166, "step": 873100 }, { "epoch": 4.91, "learning_rate": 9.33630134291191e-07, "loss": 0.1642, "step": 873200 }, { "epoch": 4.91, "learning_rate": 9.280089040287358e-07, "loss": 0.1643, "step": 873300 }, { "epoch": 4.91, "learning_rate": 9.223876737662806e-07, "loss": 0.1568, "step": 873400 }, { "epoch": 4.91, "learning_rate": 9.167664435038253e-07, "loss": 0.1599, "step": 873500 }, { "epoch": 4.91, "learning_rate": 9.111452132413701e-07, "loss": 0.1656, "step": 873600 }, { "epoch": 4.91, "learning_rate": 9.055239829789149e-07, "loss": 0.1609, "step": 873700 }, { "epoch": 4.91, "learning_rate": 8.999027527164595e-07, "loss": 0.1544, "step": 873800 }, { "epoch": 4.91, "learning_rate": 8.942815224540043e-07, "loss": 0.167, "step": 873900 }, { "epoch": 4.91, "learning_rate": 8.886602921915491e-07, "loss": 0.1594, "step": 874000 }, { "epoch": 4.91, "learning_rate": 8.830390619290939e-07, "loss": 0.1597, "step": 874100 }, { "epoch": 4.91, "learning_rate": 8.774178316666387e-07, "loss": 0.1679, "step": 874200 }, { "epoch": 4.91, "learning_rate": 8.717966014041833e-07, "loss": 0.1594, "step": 874300 }, { "epoch": 4.92, "learning_rate": 8.661753711417281e-07, "loss": 0.1617, "step": 874400 }, { "epoch": 4.92, "learning_rate": 8.605541408792729e-07, "loss": 0.1578, "step": 874500 }, { "epoch": 4.92, "learning_rate": 8.549329106168177e-07, "loss": 0.1602, "step": 874600 }, { "epoch": 4.92, "learning_rate": 8.493116803543625e-07, "loss": 0.1602, "step": 874700 }, { "epoch": 4.92, "learning_rate": 8.43690450091907e-07, "loss": 0.1631, "step": 874800 }, { "epoch": 4.92, "learning_rate": 8.380692198294518e-07, "loss": 0.1645, "step": 874900 }, { "epoch": 4.92, "learning_rate": 8.324479895669966e-07, "loss": 0.1618, "step": 875000 }, { "epoch": 4.92, "learning_rate": 8.268267593045414e-07, "loss": 0.1661, "step": 875100 }, { "epoch": 4.92, "learning_rate": 8.212055290420862e-07, "loss": 0.1585, "step": 875200 }, { "epoch": 4.92, "learning_rate": 8.156405110822554e-07, "loss": 0.1649, "step": 875300 }, { "epoch": 4.92, "learning_rate": 8.100192808198002e-07, "loss": 0.1657, "step": 875400 }, { "epoch": 4.92, "learning_rate": 8.04398050557345e-07, "loss": 0.1697, "step": 875500 }, { "epoch": 4.92, "learning_rate": 7.987768202948897e-07, "loss": 0.1596, "step": 875600 }, { "epoch": 4.92, "learning_rate": 7.931555900324345e-07, "loss": 0.1642, "step": 875700 }, { "epoch": 4.92, "learning_rate": 7.875343597699793e-07, "loss": 0.1641, "step": 875800 }, { "epoch": 4.92, "learning_rate": 7.819131295075241e-07, "loss": 0.1568, "step": 875900 }, { "epoch": 4.92, "learning_rate": 7.762918992450688e-07, "loss": 0.1565, "step": 876000 }, { "epoch": 4.92, "learning_rate": 7.706706689826136e-07, "loss": 0.1602, "step": 876100 }, { "epoch": 4.93, "learning_rate": 7.650494387201584e-07, "loss": 0.1624, "step": 876200 }, { "epoch": 4.93, "learning_rate": 7.594282084577031e-07, "loss": 0.1607, "step": 876300 }, { "epoch": 4.93, "learning_rate": 7.538069781952479e-07, "loss": 0.1669, "step": 876400 }, { "epoch": 4.93, "learning_rate": 7.481857479327927e-07, "loss": 0.1629, "step": 876500 }, { "epoch": 4.93, "learning_rate": 7.425645176703373e-07, "loss": 0.1552, "step": 876600 }, { "epoch": 4.93, "learning_rate": 7.369432874078821e-07, "loss": 0.1604, "step": 876700 }, { "epoch": 4.93, "learning_rate": 7.313220571454268e-07, "loss": 0.1594, "step": 876800 }, { "epoch": 4.93, "learning_rate": 7.257008268829716e-07, "loss": 0.1597, "step": 876900 }, { "epoch": 4.93, "learning_rate": 7.200795966205164e-07, "loss": 0.1666, "step": 877000 }, { "epoch": 4.93, "learning_rate": 7.144583663580611e-07, "loss": 0.1646, "step": 877100 }, { "epoch": 4.93, "learning_rate": 7.088371360956059e-07, "loss": 0.1632, "step": 877200 }, { "epoch": 4.93, "learning_rate": 7.032159058331507e-07, "loss": 0.1605, "step": 877300 }, { "epoch": 4.93, "learning_rate": 6.975946755706954e-07, "loss": 0.1564, "step": 877400 }, { "epoch": 4.93, "learning_rate": 6.919734453082402e-07, "loss": 0.161, "step": 877500 }, { "epoch": 4.93, "learning_rate": 6.863522150457849e-07, "loss": 0.1659, "step": 877600 }, { "epoch": 4.93, "learning_rate": 6.807309847833297e-07, "loss": 0.1626, "step": 877700 }, { "epoch": 4.93, "learning_rate": 6.751097545208745e-07, "loss": 0.1607, "step": 877800 }, { "epoch": 4.93, "learning_rate": 6.694885242584192e-07, "loss": 0.1575, "step": 877900 }, { "epoch": 4.94, "learning_rate": 6.638672939959639e-07, "loss": 0.1634, "step": 878000 }, { "epoch": 4.94, "learning_rate": 6.582460637335087e-07, "loss": 0.1603, "step": 878100 }, { "epoch": 4.94, "learning_rate": 6.526248334710535e-07, "loss": 0.1603, "step": 878200 }, { "epoch": 4.94, "learning_rate": 6.470036032085983e-07, "loss": 0.1615, "step": 878300 }, { "epoch": 4.94, "learning_rate": 6.41382372946143e-07, "loss": 0.1655, "step": 878400 }, { "epoch": 4.94, "learning_rate": 6.358173549863123e-07, "loss": 0.1591, "step": 878500 }, { "epoch": 4.94, "learning_rate": 6.301961247238571e-07, "loss": 0.1623, "step": 878600 }, { "epoch": 4.94, "learning_rate": 6.245748944614018e-07, "loss": 0.1628, "step": 878700 }, { "epoch": 4.94, "learning_rate": 6.189536641989466e-07, "loss": 0.1633, "step": 878800 }, { "epoch": 4.94, "learning_rate": 6.133324339364914e-07, "loss": 0.1564, "step": 878900 }, { "epoch": 4.94, "learning_rate": 6.077112036740361e-07, "loss": 0.1632, "step": 879000 }, { "epoch": 4.94, "learning_rate": 6.020899734115809e-07, "loss": 0.1649, "step": 879100 }, { "epoch": 4.94, "learning_rate": 5.964687431491257e-07, "loss": 0.1645, "step": 879200 }, { "epoch": 4.94, "learning_rate": 5.908475128866704e-07, "loss": 0.1635, "step": 879300 }, { "epoch": 4.94, "learning_rate": 5.852262826242152e-07, "loss": 0.1627, "step": 879400 }, { "epoch": 4.94, "learning_rate": 5.796050523617599e-07, "loss": 0.1606, "step": 879500 }, { "epoch": 4.94, "learning_rate": 5.739838220993047e-07, "loss": 0.1598, "step": 879600 }, { "epoch": 4.94, "learning_rate": 5.683625918368495e-07, "loss": 0.1617, "step": 879700 }, { "epoch": 4.95, "learning_rate": 5.627413615743941e-07, "loss": 0.1596, "step": 879800 }, { "epoch": 4.95, "learning_rate": 5.571201313119389e-07, "loss": 0.1625, "step": 879900 }, { "epoch": 4.95, "learning_rate": 5.514989010494837e-07, "loss": 0.1622, "step": 880000 }, { "epoch": 4.95, "learning_rate": 5.458776707870284e-07, "loss": 0.1602, "step": 880100 }, { "epoch": 4.95, "learning_rate": 5.402564405245732e-07, "loss": 0.1608, "step": 880200 }, { "epoch": 4.95, "learning_rate": 5.346352102621179e-07, "loss": 0.1628, "step": 880300 }, { "epoch": 4.95, "learning_rate": 5.290139799996627e-07, "loss": 0.1631, "step": 880400 }, { "epoch": 4.95, "learning_rate": 5.233927497372075e-07, "loss": 0.1643, "step": 880500 }, { "epoch": 4.95, "learning_rate": 5.177715194747523e-07, "loss": 0.1642, "step": 880600 }, { "epoch": 4.95, "learning_rate": 5.121502892122971e-07, "loss": 0.1564, "step": 880700 }, { "epoch": 4.95, "learning_rate": 5.065290589498418e-07, "loss": 0.1564, "step": 880800 }, { "epoch": 4.95, "learning_rate": 5.009078286873866e-07, "loss": 0.1587, "step": 880900 }, { "epoch": 4.95, "learning_rate": 4.952865984249314e-07, "loss": 0.164, "step": 881000 }, { "epoch": 4.95, "learning_rate": 4.896653681624761e-07, "loss": 0.1615, "step": 881100 }, { "epoch": 4.95, "learning_rate": 4.840441379000209e-07, "loss": 0.1643, "step": 881200 }, { "epoch": 4.95, "learning_rate": 4.784229076375656e-07, "loss": 0.1597, "step": 881300 }, { "epoch": 4.95, "learning_rate": 4.7280167737511033e-07, "loss": 0.1618, "step": 881400 }, { "epoch": 4.96, "learning_rate": 4.6718044711265513e-07, "loss": 0.1629, "step": 881500 }, { "epoch": 4.96, "learning_rate": 4.615592168501999e-07, "loss": 0.1605, "step": 881600 }, { "epoch": 4.96, "learning_rate": 4.559379865877446e-07, "loss": 0.1597, "step": 881700 }, { "epoch": 4.96, "learning_rate": 4.503167563252894e-07, "loss": 0.16, "step": 881800 }, { "epoch": 4.96, "learning_rate": 4.446955260628341e-07, "loss": 0.165, "step": 881900 }, { "epoch": 4.96, "learning_rate": 4.390742958003789e-07, "loss": 0.1692, "step": 882000 }, { "epoch": 4.96, "learning_rate": 4.334530655379237e-07, "loss": 0.1613, "step": 882100 }, { "epoch": 4.96, "learning_rate": 4.278318352754684e-07, "loss": 0.1608, "step": 882200 }, { "epoch": 4.96, "learning_rate": 4.2221060501301317e-07, "loss": 0.1648, "step": 882300 }, { "epoch": 4.96, "learning_rate": 4.1658937475055786e-07, "loss": 0.1612, "step": 882400 }, { "epoch": 4.96, "learning_rate": 4.1096814448810265e-07, "loss": 0.1579, "step": 882500 }, { "epoch": 4.96, "learning_rate": 4.0534691422564745e-07, "loss": 0.167, "step": 882600 }, { "epoch": 4.96, "learning_rate": 3.997256839631922e-07, "loss": 0.1602, "step": 882700 }, { "epoch": 4.96, "learning_rate": 3.94104453700737e-07, "loss": 0.1605, "step": 882800 }, { "epoch": 4.96, "learning_rate": 3.885394357409063e-07, "loss": 0.1603, "step": 882900 }, { "epoch": 4.96, "learning_rate": 3.8291820547845105e-07, "loss": 0.1614, "step": 883000 }, { "epoch": 4.96, "learning_rate": 3.772969752159958e-07, "loss": 0.1607, "step": 883100 }, { "epoch": 4.96, "learning_rate": 3.7167574495354053e-07, "loss": 0.1592, "step": 883200 }, { "epoch": 4.97, "learning_rate": 3.660545146910853e-07, "loss": 0.1612, "step": 883300 }, { "epoch": 4.97, "learning_rate": 3.6043328442863007e-07, "loss": 0.1604, "step": 883400 }, { "epoch": 4.97, "learning_rate": 3.548120541661748e-07, "loss": 0.1621, "step": 883500 }, { "epoch": 4.97, "learning_rate": 3.4919082390371955e-07, "loss": 0.1626, "step": 883600 }, { "epoch": 4.97, "learning_rate": 3.435695936412643e-07, "loss": 0.1598, "step": 883700 }, { "epoch": 4.97, "learning_rate": 3.379483633788091e-07, "loss": 0.1714, "step": 883800 }, { "epoch": 4.97, "learning_rate": 3.323271331163539e-07, "loss": 0.1615, "step": 883900 }, { "epoch": 4.97, "learning_rate": 3.2670590285389863e-07, "loss": 0.1634, "step": 884000 }, { "epoch": 4.97, "learning_rate": 3.2108467259144337e-07, "loss": 0.1596, "step": 884100 }, { "epoch": 4.97, "learning_rate": 3.1546344232898816e-07, "loss": 0.1569, "step": 884200 }, { "epoch": 4.97, "learning_rate": 3.098422120665329e-07, "loss": 0.1605, "step": 884300 }, { "epoch": 4.97, "learning_rate": 3.0422098180407765e-07, "loss": 0.1578, "step": 884400 }, { "epoch": 4.97, "learning_rate": 2.9865596384424697e-07, "loss": 0.1642, "step": 884500 }, { "epoch": 4.97, "learning_rate": 2.930347335817917e-07, "loss": 0.1595, "step": 884600 }, { "epoch": 4.97, "learning_rate": 2.8741350331933646e-07, "loss": 0.1583, "step": 884700 }, { "epoch": 4.97, "learning_rate": 2.817922730568812e-07, "loss": 0.162, "step": 884800 }, { "epoch": 4.97, "learning_rate": 2.76171042794426e-07, "loss": 0.1574, "step": 884900 }, { "epoch": 4.97, "learning_rate": 2.705498125319708e-07, "loss": 0.1534, "step": 885000 }, { "epoch": 4.98, "learning_rate": 2.6492858226951553e-07, "loss": 0.1582, "step": 885100 }, { "epoch": 4.98, "learning_rate": 2.5930735200706027e-07, "loss": 0.1624, "step": 885200 }, { "epoch": 4.98, "learning_rate": 2.5368612174460507e-07, "loss": 0.1587, "step": 885300 }, { "epoch": 4.98, "learning_rate": 2.480648914821498e-07, "loss": 0.1563, "step": 885400 }, { "epoch": 4.98, "learning_rate": 2.4244366121969455e-07, "loss": 0.1654, "step": 885500 }, { "epoch": 4.98, "learning_rate": 2.368224309572393e-07, "loss": 0.1621, "step": 885600 }, { "epoch": 4.98, "learning_rate": 2.3120120069478409e-07, "loss": 0.1618, "step": 885700 }, { "epoch": 4.98, "learning_rate": 2.2557997043232883e-07, "loss": 0.1595, "step": 885800 }, { "epoch": 4.98, "learning_rate": 2.1995874016987357e-07, "loss": 0.1623, "step": 885900 }, { "epoch": 4.98, "learning_rate": 2.1433750990741834e-07, "loss": 0.1588, "step": 886000 }, { "epoch": 4.98, "learning_rate": 2.0871627964496313e-07, "loss": 0.1607, "step": 886100 }, { "epoch": 4.98, "learning_rate": 2.0309504938250788e-07, "loss": 0.1542, "step": 886200 }, { "epoch": 4.98, "learning_rate": 1.9747381912005262e-07, "loss": 0.1623, "step": 886300 }, { "epoch": 4.98, "learning_rate": 1.9185258885759739e-07, "loss": 0.1628, "step": 886400 }, { "epoch": 4.98, "learning_rate": 1.8623135859514213e-07, "loss": 0.1673, "step": 886500 }, { "epoch": 4.98, "learning_rate": 1.806101283326869e-07, "loss": 0.1613, "step": 886600 }, { "epoch": 4.98, "learning_rate": 1.7498889807023166e-07, "loss": 0.1571, "step": 886700 }, { "epoch": 4.98, "learning_rate": 1.6936766780777643e-07, "loss": 0.1616, "step": 886800 }, { "epoch": 4.99, "learning_rate": 1.6374643754532117e-07, "loss": 0.1603, "step": 886900 }, { "epoch": 4.99, "learning_rate": 1.5818141958549047e-07, "loss": 0.1647, "step": 887000 }, { "epoch": 4.99, "learning_rate": 1.5256018932303524e-07, "loss": 0.1655, "step": 887100 }, { "epoch": 4.99, "learning_rate": 1.4693895906058e-07, "loss": 0.1589, "step": 887200 }, { "epoch": 4.99, "learning_rate": 1.4131772879812478e-07, "loss": 0.1656, "step": 887300 }, { "epoch": 4.99, "learning_rate": 1.3569649853566952e-07, "loss": 0.1586, "step": 887400 }, { "epoch": 4.99, "learning_rate": 1.3007526827321429e-07, "loss": 0.1642, "step": 887500 }, { "epoch": 4.99, "learning_rate": 1.2445403801075903e-07, "loss": 0.1574, "step": 887600 }, { "epoch": 4.99, "learning_rate": 1.1883280774830381e-07, "loss": 0.1584, "step": 887700 }, { "epoch": 4.99, "learning_rate": 1.1321157748584855e-07, "loss": 0.1642, "step": 887800 }, { "epoch": 4.99, "learning_rate": 1.0759034722339332e-07, "loss": 0.1608, "step": 887900 }, { "epoch": 4.99, "learning_rate": 1.0196911696093808e-07, "loss": 0.1634, "step": 888000 }, { "epoch": 4.99, "learning_rate": 9.634788669848283e-08, "loss": 0.1654, "step": 888100 }, { "epoch": 4.99, "learning_rate": 9.072665643602759e-08, "loss": 0.158, "step": 888200 }, { "epoch": 4.99, "learning_rate": 8.510542617357235e-08, "loss": 0.1569, "step": 888300 }, { "epoch": 4.99, "learning_rate": 7.948419591111711e-08, "loss": 0.1644, "step": 888400 }, { "epoch": 4.99, "learning_rate": 7.386296564866186e-08, "loss": 0.1607, "step": 888500 }, { "epoch": 5.0, "learning_rate": 6.824173538620662e-08, "loss": 0.1668, "step": 888600 }, { "epoch": 5.0, "learning_rate": 6.262050512375139e-08, "loss": 0.1691, "step": 888700 }, { "epoch": 5.0, "learning_rate": 5.699927486129614e-08, "loss": 0.1632, "step": 888800 }, { "epoch": 5.0, "learning_rate": 5.1378044598840904e-08, "loss": 0.1616, "step": 888900 }, { "epoch": 5.0, "learning_rate": 4.575681433638566e-08, "loss": 0.1613, "step": 889000 }, { "epoch": 5.0, "learning_rate": 4.013558407393042e-08, "loss": 0.159, "step": 889100 }, { "epoch": 5.0, "learning_rate": 3.4514353811475176e-08, "loss": 0.1656, "step": 889200 }, { "epoch": 5.0, "learning_rate": 2.8893123549019938e-08, "loss": 0.1598, "step": 889300 }, { "epoch": 5.0, "learning_rate": 2.32718932865647e-08, "loss": 0.1655, "step": 889400 }, { "epoch": 5.0, "eval_bleu": 78.2542, "eval_cer": 2.1669, "eval_chrF": 95.73544319509031, "eval_gen_len": 16.777936, "eval_loss": 0.48148927092552185, "eval_runtime": 7460.8529, "eval_samples_per_second": 33.508, "eval_steps_per_second": 0.524, "eval_wer": 12.1294, "step": 889485 }, { "epoch": 5.0, "step": 889485, "total_flos": 9.809053433870746e+17, "train_loss": 0.2851236777235189, "train_runtime": 119967.4266, "train_samples_per_second": 474.518, "train_steps_per_second": 7.414 } ], "logging_steps": 100, "max_steps": 889485, "num_input_tokens_seen": 0, "num_train_epochs": 5, "save_steps": 500, "total_flos": 9.809053433870746e+17, "trial_name": null, "trial_params": null }