{ "best_metric": null, "best_model_checkpoint": null, "epoch": 15.0, "eval_steps": 350000, "global_step": 1472295, "is_hyper_param_search": false, "is_local_process_zero": true, "is_world_process_zero": true, "log_history": [ { "epoch": 0.0, "learning_rate": 1.1294172439890552e-07, "loss": 9.4022, "step": 100 }, { "epoch": 0.0, "learning_rate": 2.2821214414624206e-07, "loss": 9.2918, "step": 200 }, { "epoch": 0.0, "learning_rate": 3.4464691156779416e-07, "loss": 9.011, "step": 300 }, { "epoch": 0.0, "learning_rate": 4.6108167898934623e-07, "loss": 8.5504, "step": 400 }, { "epoch": 0.01, "learning_rate": 5.763520987366827e-07, "loss": 8.043, "step": 500 }, { "epoch": 0.01, "learning_rate": 6.927868661582348e-07, "loss": 7.7445, "step": 600 }, { "epoch": 0.01, "learning_rate": 8.092216335797869e-07, "loss": 7.5088, "step": 700 }, { "epoch": 0.01, "learning_rate": 9.25656401001339e-07, "loss": 7.3762, "step": 800 }, { "epoch": 0.01, "learning_rate": 1.0420911684228912e-06, "loss": 7.2198, "step": 900 }, { "epoch": 0.01, "learning_rate": 1.158525935844443e-06, "loss": 7.1566, "step": 1000 }, { "epoch": 0.01, "learning_rate": 1.2749607032659952e-06, "loss": 7.0508, "step": 1100 }, { "epoch": 0.01, "learning_rate": 1.3913954706875475e-06, "loss": 6.9891, "step": 1200 }, { "epoch": 0.01, "learning_rate": 1.5078302381090993e-06, "loss": 6.8779, "step": 1300 }, { "epoch": 0.01, "learning_rate": 1.6242650055306516e-06, "loss": 6.8091, "step": 1400 }, { "epoch": 0.02, "learning_rate": 1.7406997729522035e-06, "loss": 6.7784, "step": 1500 }, { "epoch": 0.02, "learning_rate": 1.8571345403737557e-06, "loss": 6.6996, "step": 1600 }, { "epoch": 0.02, "learning_rate": 1.973569307795308e-06, "loss": 6.6599, "step": 1700 }, { "epoch": 0.02, "learning_rate": 2.09000407521686e-06, "loss": 6.6455, "step": 1800 }, { "epoch": 0.02, "learning_rate": 2.206438842638412e-06, "loss": 6.5771, "step": 1900 }, { "epoch": 0.02, "learning_rate": 2.322873610059964e-06, "loss": 6.5437, "step": 2000 }, { "epoch": 0.02, "learning_rate": 2.4393083774815157e-06, "loss": 6.4812, "step": 2100 }, { "epoch": 0.02, "learning_rate": 2.555743144903068e-06, "loss": 6.3998, "step": 2200 }, { "epoch": 0.02, "learning_rate": 2.6721779123246202e-06, "loss": 6.4008, "step": 2300 }, { "epoch": 0.02, "learning_rate": 2.7886126797461723e-06, "loss": 6.3409, "step": 2400 }, { "epoch": 0.03, "learning_rate": 2.9050474471677244e-06, "loss": 6.3212, "step": 2500 }, { "epoch": 0.03, "learning_rate": 3.0214822145892764e-06, "loss": 6.3122, "step": 2600 }, { "epoch": 0.03, "learning_rate": 3.1379169820108285e-06, "loss": 6.267, "step": 2700 }, { "epoch": 0.03, "learning_rate": 3.2543517494323806e-06, "loss": 6.2042, "step": 2800 }, { "epoch": 0.03, "learning_rate": 3.3707865168539327e-06, "loss": 6.2018, "step": 2900 }, { "epoch": 0.03, "learning_rate": 3.4872212842754847e-06, "loss": 6.1906, "step": 3000 }, { "epoch": 0.03, "learning_rate": 3.603656051697037e-06, "loss": 6.0879, "step": 3100 }, { "epoch": 0.03, "learning_rate": 3.720090819118589e-06, "loss": 6.062, "step": 3200 }, { "epoch": 0.03, "learning_rate": 3.836525586540141e-06, "loss": 6.0293, "step": 3300 }, { "epoch": 0.03, "learning_rate": 3.952960353961693e-06, "loss": 6.0056, "step": 3400 }, { "epoch": 0.04, "learning_rate": 4.069395121383245e-06, "loss": 6.0142, "step": 3500 }, { "epoch": 0.04, "learning_rate": 4.185829888804797e-06, "loss": 5.98, "step": 3600 }, { "epoch": 0.04, "learning_rate": 4.302264656226349e-06, "loss": 5.9428, "step": 3700 }, { "epoch": 0.04, "learning_rate": 4.418699423647901e-06, "loss": 5.8527, "step": 3800 }, { "epoch": 0.04, "learning_rate": 4.535134191069453e-06, "loss": 5.8186, "step": 3900 }, { "epoch": 0.04, "learning_rate": 4.6515689584910054e-06, "loss": 5.7424, "step": 4000 }, { "epoch": 0.04, "learning_rate": 4.7680037259125575e-06, "loss": 5.8399, "step": 4100 }, { "epoch": 0.04, "learning_rate": 4.8844384933341096e-06, "loss": 5.6997, "step": 4200 }, { "epoch": 0.04, "learning_rate": 5.000873260755662e-06, "loss": 5.6402, "step": 4300 }, { "epoch": 0.04, "learning_rate": 5.117308028177214e-06, "loss": 5.6395, "step": 4400 }, { "epoch": 0.05, "learning_rate": 5.233742795598766e-06, "loss": 5.5789, "step": 4500 }, { "epoch": 0.05, "learning_rate": 5.350177563020319e-06, "loss": 5.5653, "step": 4600 }, { "epoch": 0.05, "learning_rate": 5.46661233044187e-06, "loss": 5.5319, "step": 4700 }, { "epoch": 0.05, "learning_rate": 5.583047097863422e-06, "loss": 5.4884, "step": 4800 }, { "epoch": 0.05, "learning_rate": 5.699481865284974e-06, "loss": 5.3925, "step": 4900 }, { "epoch": 0.05, "learning_rate": 5.815916632706526e-06, "loss": 5.4045, "step": 5000 }, { "epoch": 0.05, "learning_rate": 5.932351400128079e-06, "loss": 5.4015, "step": 5100 }, { "epoch": 0.05, "learning_rate": 6.04878616754963e-06, "loss": 5.2608, "step": 5200 }, { "epoch": 0.05, "learning_rate": 6.165220934971182e-06, "loss": 5.1825, "step": 5300 }, { "epoch": 0.06, "learning_rate": 6.280491354718519e-06, "loss": 5.057, "step": 5400 }, { "epoch": 0.06, "learning_rate": 6.3969261221400715e-06, "loss": 5.0588, "step": 5500 }, { "epoch": 0.06, "learning_rate": 6.513360889561624e-06, "loss": 5.0782, "step": 5600 }, { "epoch": 0.06, "learning_rate": 6.629795656983176e-06, "loss": 5.0982, "step": 5700 }, { "epoch": 0.06, "learning_rate": 6.746230424404728e-06, "loss": 4.8921, "step": 5800 }, { "epoch": 0.06, "learning_rate": 6.862665191826279e-06, "loss": 4.8616, "step": 5900 }, { "epoch": 0.06, "learning_rate": 6.979099959247832e-06, "loss": 4.8014, "step": 6000 }, { "epoch": 0.06, "learning_rate": 7.095534726669384e-06, "loss": 4.7205, "step": 6100 }, { "epoch": 0.06, "learning_rate": 7.21080514641672e-06, "loss": 4.9263, "step": 6200 }, { "epoch": 0.06, "learning_rate": 7.327239913838272e-06, "loss": 4.7103, "step": 6300 }, { "epoch": 0.07, "learning_rate": 7.4413459859113935e-06, "loss": 4.6061, "step": 6400 }, { "epoch": 0.07, "learning_rate": 7.557780753332946e-06, "loss": 4.5566, "step": 6500 }, { "epoch": 0.07, "learning_rate": 7.674215520754497e-06, "loss": 4.6434, "step": 6600 }, { "epoch": 0.07, "learning_rate": 7.790650288176048e-06, "loss": 4.4895, "step": 6700 }, { "epoch": 0.07, "learning_rate": 7.907085055597603e-06, "loss": 4.3704, "step": 6800 }, { "epoch": 0.07, "learning_rate": 8.023519823019154e-06, "loss": 4.322, "step": 6900 }, { "epoch": 0.07, "learning_rate": 8.139954590440705e-06, "loss": 4.3678, "step": 7000 }, { "epoch": 0.07, "learning_rate": 8.256389357862258e-06, "loss": 4.2371, "step": 7100 }, { "epoch": 0.07, "learning_rate": 8.37282412528381e-06, "loss": 4.197, "step": 7200 }, { "epoch": 0.07, "learning_rate": 8.489258892705362e-06, "loss": 4.1948, "step": 7300 }, { "epoch": 0.08, "learning_rate": 8.605693660126915e-06, "loss": 4.0807, "step": 7400 }, { "epoch": 0.08, "learning_rate": 8.722128427548466e-06, "loss": 4.1448, "step": 7500 }, { "epoch": 0.08, "learning_rate": 8.838563194970018e-06, "loss": 3.9823, "step": 7600 }, { "epoch": 0.08, "learning_rate": 8.954997962391569e-06, "loss": 4.0164, "step": 7700 }, { "epoch": 0.08, "learning_rate": 9.071432729813123e-06, "loss": 4.0373, "step": 7800 }, { "epoch": 0.08, "learning_rate": 9.187867497234675e-06, "loss": 3.8453, "step": 7900 }, { "epoch": 0.08, "learning_rate": 9.304302264656226e-06, "loss": 3.9272, "step": 8000 }, { "epoch": 0.08, "learning_rate": 9.420737032077779e-06, "loss": 3.7603, "step": 8100 }, { "epoch": 0.08, "learning_rate": 9.53717179949933e-06, "loss": 3.8101, "step": 8200 }, { "epoch": 0.08, "learning_rate": 9.653606566920883e-06, "loss": 3.7365, "step": 8300 }, { "epoch": 0.09, "learning_rate": 9.770041334342436e-06, "loss": 3.6733, "step": 8400 }, { "epoch": 0.09, "learning_rate": 9.886476101763987e-06, "loss": 3.594, "step": 8500 }, { "epoch": 0.09, "learning_rate": 1.0002910869185538e-05, "loss": 3.656, "step": 8600 }, { "epoch": 0.09, "learning_rate": 1.0119345636607091e-05, "loss": 3.5974, "step": 8700 }, { "epoch": 0.09, "learning_rate": 1.0235780404028644e-05, "loss": 3.5924, "step": 8800 }, { "epoch": 0.09, "learning_rate": 1.0352215171450195e-05, "loss": 3.6154, "step": 8900 }, { "epoch": 0.09, "learning_rate": 1.0468649938871747e-05, "loss": 3.583, "step": 9000 }, { "epoch": 0.09, "learning_rate": 1.05850847062933e-05, "loss": 3.4384, "step": 9100 }, { "epoch": 0.09, "learning_rate": 1.070151947371485e-05, "loss": 3.352, "step": 9200 }, { "epoch": 0.09, "learning_rate": 1.0817954241136404e-05, "loss": 3.3393, "step": 9300 }, { "epoch": 0.1, "learning_rate": 1.0934389008557957e-05, "loss": 3.3016, "step": 9400 }, { "epoch": 0.1, "learning_rate": 1.1050823775979508e-05, "loss": 3.271, "step": 9500 }, { "epoch": 0.1, "learning_rate": 1.1167258543401059e-05, "loss": 3.1705, "step": 9600 }, { "epoch": 0.1, "learning_rate": 1.1283693310822612e-05, "loss": 3.1311, "step": 9700 }, { "epoch": 0.1, "learning_rate": 1.1400128078244165e-05, "loss": 3.0696, "step": 9800 }, { "epoch": 0.1, "learning_rate": 1.1516562845665716e-05, "loss": 3.046, "step": 9900 }, { "epoch": 0.1, "learning_rate": 1.1632997613087269e-05, "loss": 3.0403, "step": 10000 }, { "epoch": 0.1, "learning_rate": 1.174943238050882e-05, "loss": 3.1051, "step": 10100 }, { "epoch": 0.1, "learning_rate": 1.1865867147930371e-05, "loss": 3.1563, "step": 10200 }, { "epoch": 0.1, "learning_rate": 1.1982301915351924e-05, "loss": 3.176, "step": 10300 }, { "epoch": 0.11, "learning_rate": 1.2098736682773477e-05, "loss": 3.0937, "step": 10400 }, { "epoch": 0.11, "learning_rate": 1.2215171450195028e-05, "loss": 2.9902, "step": 10500 }, { "epoch": 0.11, "learning_rate": 1.233160621761658e-05, "loss": 3.0412, "step": 10600 }, { "epoch": 0.11, "learning_rate": 1.2448040985038133e-05, "loss": 2.9516, "step": 10700 }, { "epoch": 0.11, "learning_rate": 1.2564475752459685e-05, "loss": 2.8995, "step": 10800 }, { "epoch": 0.11, "learning_rate": 1.2680910519881237e-05, "loss": 2.943, "step": 10900 }, { "epoch": 0.11, "learning_rate": 1.279734528730279e-05, "loss": 2.7188, "step": 11000 }, { "epoch": 0.11, "learning_rate": 1.2912615707050124e-05, "loss": 2.8802, "step": 11100 }, { "epoch": 0.11, "learning_rate": 1.3029050474471679e-05, "loss": 2.8186, "step": 11200 }, { "epoch": 0.12, "learning_rate": 1.314548524189323e-05, "loss": 2.8054, "step": 11300 }, { "epoch": 0.12, "learning_rate": 1.3261920009314781e-05, "loss": 2.725, "step": 11400 }, { "epoch": 0.12, "learning_rate": 1.3378354776736334e-05, "loss": 2.7341, "step": 11500 }, { "epoch": 0.12, "learning_rate": 1.3494789544157885e-05, "loss": 2.8901, "step": 11600 }, { "epoch": 0.12, "learning_rate": 1.3611224311579438e-05, "loss": 2.7045, "step": 11700 }, { "epoch": 0.12, "learning_rate": 1.372765907900099e-05, "loss": 2.7249, "step": 11800 }, { "epoch": 0.12, "learning_rate": 1.3844093846422542e-05, "loss": 2.5729, "step": 11900 }, { "epoch": 0.12, "learning_rate": 1.3960528613844094e-05, "loss": 2.588, "step": 12000 }, { "epoch": 0.12, "learning_rate": 1.4076963381265645e-05, "loss": 2.6701, "step": 12100 }, { "epoch": 0.12, "learning_rate": 1.41933981486872e-05, "loss": 2.5204, "step": 12200 }, { "epoch": 0.13, "learning_rate": 1.430983291610875e-05, "loss": 2.7362, "step": 12300 }, { "epoch": 0.13, "learning_rate": 1.4426267683530302e-05, "loss": 2.5727, "step": 12400 }, { "epoch": 0.13, "learning_rate": 1.4542702450951855e-05, "loss": 2.4632, "step": 12500 }, { "epoch": 0.13, "learning_rate": 1.4659137218373406e-05, "loss": 2.4762, "step": 12600 }, { "epoch": 0.13, "learning_rate": 1.4775571985794959e-05, "loss": 2.4738, "step": 12700 }, { "epoch": 0.13, "learning_rate": 1.4892006753216512e-05, "loss": 2.5803, "step": 12800 }, { "epoch": 0.13, "learning_rate": 1.5008441520638063e-05, "loss": 2.5389, "step": 12900 }, { "epoch": 0.13, "learning_rate": 1.5124876288059614e-05, "loss": 2.4748, "step": 13000 }, { "epoch": 0.13, "learning_rate": 1.5241311055481166e-05, "loss": 2.5047, "step": 13100 }, { "epoch": 0.13, "learning_rate": 1.5357745822902717e-05, "loss": 2.3426, "step": 13200 }, { "epoch": 0.14, "learning_rate": 1.547418059032427e-05, "loss": 2.3343, "step": 13300 }, { "epoch": 0.14, "learning_rate": 1.5590615357745826e-05, "loss": 2.4218, "step": 13400 }, { "epoch": 0.14, "learning_rate": 1.5707050125167377e-05, "loss": 2.435, "step": 13500 }, { "epoch": 0.14, "learning_rate": 1.582348489258893e-05, "loss": 2.3447, "step": 13600 }, { "epoch": 0.14, "learning_rate": 1.593991966001048e-05, "loss": 2.247, "step": 13700 }, { "epoch": 0.14, "learning_rate": 1.605635442743203e-05, "loss": 2.3916, "step": 13800 }, { "epoch": 0.14, "learning_rate": 1.6172789194853582e-05, "loss": 2.2815, "step": 13900 }, { "epoch": 0.14, "learning_rate": 1.6289223962275137e-05, "loss": 2.2443, "step": 14000 }, { "epoch": 0.14, "learning_rate": 1.6405658729696688e-05, "loss": 2.3048, "step": 14100 }, { "epoch": 0.14, "learning_rate": 1.652209349711824e-05, "loss": 2.0418, "step": 14200 }, { "epoch": 0.15, "learning_rate": 1.663852826453979e-05, "loss": 2.2515, "step": 14300 }, { "epoch": 0.15, "learning_rate": 1.6753798684287127e-05, "loss": 2.2287, "step": 14400 }, { "epoch": 0.15, "learning_rate": 1.687023345170868e-05, "loss": 2.2754, "step": 14500 }, { "epoch": 0.15, "learning_rate": 1.6986668219130233e-05, "loss": 2.2174, "step": 14600 }, { "epoch": 0.15, "learning_rate": 1.7103102986551784e-05, "loss": 2.1102, "step": 14700 }, { "epoch": 0.15, "learning_rate": 1.721953775397334e-05, "loss": 2.2028, "step": 14800 }, { "epoch": 0.15, "learning_rate": 1.733597252139489e-05, "loss": 2.0597, "step": 14900 }, { "epoch": 0.15, "learning_rate": 1.745240728881644e-05, "loss": 2.0455, "step": 15000 }, { "epoch": 0.15, "learning_rate": 1.7568842056237992e-05, "loss": 2.0336, "step": 15100 }, { "epoch": 0.15, "learning_rate": 1.7685276823659547e-05, "loss": 2.1017, "step": 15200 }, { "epoch": 0.16, "learning_rate": 1.7801711591081098e-05, "loss": 2.153, "step": 15300 }, { "epoch": 0.16, "learning_rate": 1.791814635850265e-05, "loss": 2.0786, "step": 15400 }, { "epoch": 0.16, "learning_rate": 1.80345811259242e-05, "loss": 2.0992, "step": 15500 }, { "epoch": 0.16, "learning_rate": 1.815101589334575e-05, "loss": 2.1288, "step": 15600 }, { "epoch": 0.16, "learning_rate": 1.8267450660767303e-05, "loss": 2.16, "step": 15700 }, { "epoch": 0.16, "learning_rate": 1.838388542818886e-05, "loss": 2.1414, "step": 15800 }, { "epoch": 0.16, "learning_rate": 1.8500320195610412e-05, "loss": 2.1669, "step": 15900 }, { "epoch": 0.16, "learning_rate": 1.8616754963031963e-05, "loss": 2.1529, "step": 16000 }, { "epoch": 0.16, "learning_rate": 1.8733189730453514e-05, "loss": 2.1362, "step": 16100 }, { "epoch": 0.17, "learning_rate": 1.8849624497875066e-05, "loss": 2.1828, "step": 16200 }, { "epoch": 0.17, "learning_rate": 1.8966059265296617e-05, "loss": 2.0154, "step": 16300 }, { "epoch": 0.17, "learning_rate": 1.9082494032718168e-05, "loss": 2.2411, "step": 16400 }, { "epoch": 0.17, "learning_rate": 1.9198928800139723e-05, "loss": 2.0872, "step": 16500 }, { "epoch": 0.17, "learning_rate": 1.9315363567561274e-05, "loss": 1.8982, "step": 16600 }, { "epoch": 0.17, "learning_rate": 1.9431798334982825e-05, "loss": 1.9847, "step": 16700 }, { "epoch": 0.17, "learning_rate": 1.954823310240438e-05, "loss": 1.9877, "step": 16800 }, { "epoch": 0.17, "learning_rate": 1.966466786982593e-05, "loss": 2.2184, "step": 16900 }, { "epoch": 0.17, "learning_rate": 1.9781102637247482e-05, "loss": 2.0553, "step": 17000 }, { "epoch": 0.17, "learning_rate": 1.9897537404669037e-05, "loss": 2.0305, "step": 17100 }, { "epoch": 0.18, "learning_rate": 2.0013972172090588e-05, "loss": 1.9608, "step": 17200 }, { "epoch": 0.18, "learning_rate": 2.013040693951214e-05, "loss": 1.8904, "step": 17300 }, { "epoch": 0.18, "learning_rate": 2.024684170693369e-05, "loss": 1.9832, "step": 17400 }, { "epoch": 0.18, "learning_rate": 2.0363276474355242e-05, "loss": 1.9605, "step": 17500 }, { "epoch": 0.18, "learning_rate": 2.0479711241776793e-05, "loss": 1.9082, "step": 17600 }, { "epoch": 0.18, "learning_rate": 2.0596146009198344e-05, "loss": 1.8935, "step": 17700 }, { "epoch": 0.18, "learning_rate": 2.07125807766199e-05, "loss": 2.0428, "step": 17800 }, { "epoch": 0.18, "learning_rate": 2.0829015544041453e-05, "loss": 1.9269, "step": 17900 }, { "epoch": 0.18, "learning_rate": 2.0945450311463005e-05, "loss": 1.9049, "step": 18000 }, { "epoch": 0.18, "learning_rate": 2.1061885078884556e-05, "loss": 1.8559, "step": 18100 }, { "epoch": 0.19, "learning_rate": 2.1178319846306107e-05, "loss": 1.8794, "step": 18200 }, { "epoch": 0.19, "learning_rate": 2.1294754613727658e-05, "loss": 1.9616, "step": 18300 }, { "epoch": 0.19, "learning_rate": 2.1410025033474998e-05, "loss": 1.9527, "step": 18400 }, { "epoch": 0.19, "learning_rate": 2.152645980089655e-05, "loss": 1.8142, "step": 18500 }, { "epoch": 0.19, "learning_rate": 2.16428945683181e-05, "loss": 1.8426, "step": 18600 }, { "epoch": 0.19, "learning_rate": 2.175932933573965e-05, "loss": 1.8306, "step": 18700 }, { "epoch": 0.19, "learning_rate": 2.1875764103161203e-05, "loss": 1.7197, "step": 18800 }, { "epoch": 0.19, "learning_rate": 2.1992198870582757e-05, "loss": 1.9279, "step": 18900 }, { "epoch": 0.19, "learning_rate": 2.2107469290330094e-05, "loss": 1.7236, "step": 19000 }, { "epoch": 0.19, "learning_rate": 2.2223904057751645e-05, "loss": 1.9686, "step": 19100 }, { "epoch": 0.2, "learning_rate": 2.2340338825173196e-05, "loss": 1.8834, "step": 19200 }, { "epoch": 0.2, "learning_rate": 2.2456773592594747e-05, "loss": 1.8359, "step": 19300 }, { "epoch": 0.2, "learning_rate": 2.2573208360016302e-05, "loss": 1.9224, "step": 19400 }, { "epoch": 0.2, "learning_rate": 2.2689643127437853e-05, "loss": 1.9865, "step": 19500 }, { "epoch": 0.2, "learning_rate": 2.2806077894859408e-05, "loss": 1.7864, "step": 19600 }, { "epoch": 0.2, "learning_rate": 2.292251266228096e-05, "loss": 1.869, "step": 19700 }, { "epoch": 0.2, "learning_rate": 2.303894742970251e-05, "loss": 1.7395, "step": 19800 }, { "epoch": 0.2, "learning_rate": 2.315538219712406e-05, "loss": 1.827, "step": 19900 }, { "epoch": 0.2, "learning_rate": 2.3271816964545613e-05, "loss": 1.8106, "step": 20000 }, { "epoch": 0.2, "learning_rate": 2.3388251731967167e-05, "loss": 1.8351, "step": 20100 }, { "epoch": 0.21, "learning_rate": 2.350468649938872e-05, "loss": 2.009, "step": 20200 }, { "epoch": 0.21, "learning_rate": 2.362112126681027e-05, "loss": 1.8558, "step": 20300 }, { "epoch": 0.21, "learning_rate": 2.373755603423182e-05, "loss": 1.9493, "step": 20400 }, { "epoch": 0.21, "learning_rate": 2.3853990801653372e-05, "loss": 1.7777, "step": 20500 }, { "epoch": 0.21, "learning_rate": 2.3970425569074927e-05, "loss": 1.8565, "step": 20600 }, { "epoch": 0.21, "learning_rate": 2.408686033649648e-05, "loss": 1.7888, "step": 20700 }, { "epoch": 0.21, "learning_rate": 2.4203295103918033e-05, "loss": 1.6928, "step": 20800 }, { "epoch": 0.21, "learning_rate": 2.4319729871339584e-05, "loss": 1.8924, "step": 20900 }, { "epoch": 0.21, "learning_rate": 2.4436164638761135e-05, "loss": 1.9816, "step": 21000 }, { "epoch": 0.21, "learning_rate": 2.4552599406182686e-05, "loss": 1.9393, "step": 21100 }, { "epoch": 0.22, "learning_rate": 2.4669034173604238e-05, "loss": 1.8131, "step": 21200 }, { "epoch": 0.22, "learning_rate": 2.478546894102579e-05, "loss": 1.7091, "step": 21300 }, { "epoch": 0.22, "learning_rate": 2.4901903708447343e-05, "loss": 1.6654, "step": 21400 }, { "epoch": 0.22, "learning_rate": 2.5018338475868895e-05, "loss": 1.7938, "step": 21500 }, { "epoch": 0.22, "learning_rate": 2.513477324329045e-05, "loss": 1.8864, "step": 21600 }, { "epoch": 0.22, "learning_rate": 2.5251208010712e-05, "loss": 1.7026, "step": 21700 }, { "epoch": 0.22, "learning_rate": 2.536764277813355e-05, "loss": 1.7584, "step": 21800 }, { "epoch": 0.22, "learning_rate": 2.5484077545555103e-05, "loss": 1.7256, "step": 21900 }, { "epoch": 0.22, "learning_rate": 2.5599347965302443e-05, "loss": 1.8007, "step": 22000 }, { "epoch": 0.23, "learning_rate": 2.5715782732723994e-05, "loss": 1.8303, "step": 22100 }, { "epoch": 0.23, "learning_rate": 2.5832217500145545e-05, "loss": 1.7349, "step": 22200 }, { "epoch": 0.23, "learning_rate": 2.5948652267567096e-05, "loss": 1.7948, "step": 22300 }, { "epoch": 0.23, "learning_rate": 2.6065087034988647e-05, "loss": 1.8688, "step": 22400 }, { "epoch": 0.23, "learning_rate": 2.61815218024102e-05, "loss": 1.694, "step": 22500 }, { "epoch": 0.23, "learning_rate": 2.6297956569831753e-05, "loss": 1.9865, "step": 22600 }, { "epoch": 0.23, "learning_rate": 2.6414391337253304e-05, "loss": 1.6756, "step": 22700 }, { "epoch": 0.23, "learning_rate": 2.6530826104674856e-05, "loss": 1.8416, "step": 22800 }, { "epoch": 0.23, "learning_rate": 2.6647260872096407e-05, "loss": 1.7703, "step": 22900 }, { "epoch": 0.23, "learning_rate": 2.676369563951796e-05, "loss": 1.6477, "step": 23000 }, { "epoch": 0.24, "learning_rate": 2.6880130406939513e-05, "loss": 1.6786, "step": 23100 }, { "epoch": 0.24, "learning_rate": 2.6996565174361067e-05, "loss": 1.7777, "step": 23200 }, { "epoch": 0.24, "learning_rate": 2.711299994178262e-05, "loss": 1.714, "step": 23300 }, { "epoch": 0.24, "learning_rate": 2.722943470920417e-05, "loss": 1.6982, "step": 23400 }, { "epoch": 0.24, "learning_rate": 2.734586947662572e-05, "loss": 1.9178, "step": 23500 }, { "epoch": 0.24, "learning_rate": 2.7462304244047272e-05, "loss": 1.8461, "step": 23600 }, { "epoch": 0.24, "learning_rate": 2.7578739011468823e-05, "loss": 1.7734, "step": 23700 }, { "epoch": 0.24, "learning_rate": 2.7695173778890375e-05, "loss": 1.7396, "step": 23800 }, { "epoch": 0.24, "learning_rate": 2.781160854631193e-05, "loss": 1.7885, "step": 23900 }, { "epoch": 0.24, "learning_rate": 2.7928043313733484e-05, "loss": 1.7568, "step": 24000 }, { "epoch": 0.25, "learning_rate": 2.8044478081155035e-05, "loss": 1.7252, "step": 24100 }, { "epoch": 0.25, "learning_rate": 2.8160912848576586e-05, "loss": 1.6315, "step": 24200 }, { "epoch": 0.25, "learning_rate": 2.8277347615998138e-05, "loss": 1.6681, "step": 24300 }, { "epoch": 0.25, "learning_rate": 2.839378238341969e-05, "loss": 1.7478, "step": 24400 }, { "epoch": 0.25, "learning_rate": 2.8510217150841243e-05, "loss": 1.7316, "step": 24500 }, { "epoch": 0.25, "learning_rate": 2.8626651918262795e-05, "loss": 1.6041, "step": 24600 }, { "epoch": 0.25, "learning_rate": 2.8743086685684346e-05, "loss": 1.6318, "step": 24700 }, { "epoch": 0.25, "learning_rate": 2.8859521453105897e-05, "loss": 1.7134, "step": 24800 }, { "epoch": 0.25, "learning_rate": 2.897595622052745e-05, "loss": 1.5604, "step": 24900 }, { "epoch": 0.25, "learning_rate": 2.9092390987949003e-05, "loss": 1.6424, "step": 25000 }, { "epoch": 0.26, "learning_rate": 2.9208825755370554e-05, "loss": 1.6891, "step": 25100 }, { "epoch": 0.26, "learning_rate": 2.932526052279211e-05, "loss": 1.7311, "step": 25200 }, { "epoch": 0.26, "learning_rate": 2.944169529021366e-05, "loss": 1.5749, "step": 25300 }, { "epoch": 0.26, "learning_rate": 2.955813005763521e-05, "loss": 1.8692, "step": 25400 }, { "epoch": 0.26, "learning_rate": 2.9674564825056762e-05, "loss": 1.8747, "step": 25500 }, { "epoch": 0.26, "learning_rate": 2.9790999592478314e-05, "loss": 1.7075, "step": 25600 }, { "epoch": 0.26, "learning_rate": 2.9907434359899865e-05, "loss": 1.5054, "step": 25700 }, { "epoch": 0.26, "learning_rate": 3.0023869127321416e-05, "loss": 1.8078, "step": 25800 }, { "epoch": 0.26, "learning_rate": 3.0140303894742967e-05, "loss": 1.7337, "step": 25900 }, { "epoch": 0.26, "learning_rate": 3.0256738662164522e-05, "loss": 1.6756, "step": 26000 }, { "epoch": 0.27, "learning_rate": 3.0373173429586073e-05, "loss": 1.6192, "step": 26100 }, { "epoch": 0.27, "learning_rate": 3.048960819700763e-05, "loss": 1.6467, "step": 26200 }, { "epoch": 0.27, "learning_rate": 3.060604296442918e-05, "loss": 1.5818, "step": 26300 }, { "epoch": 0.27, "learning_rate": 3.072131338417652e-05, "loss": 1.6936, "step": 26400 }, { "epoch": 0.27, "learning_rate": 3.083774815159807e-05, "loss": 1.6342, "step": 26500 }, { "epoch": 0.27, "learning_rate": 3.095418291901962e-05, "loss": 1.5545, "step": 26600 }, { "epoch": 0.27, "learning_rate": 3.107061768644117e-05, "loss": 1.7028, "step": 26700 }, { "epoch": 0.27, "learning_rate": 3.1187052453862724e-05, "loss": 1.6518, "step": 26800 }, { "epoch": 0.27, "learning_rate": 3.1303487221284275e-05, "loss": 1.6923, "step": 26900 }, { "epoch": 0.28, "learning_rate": 3.1419921988705826e-05, "loss": 1.6728, "step": 27000 }, { "epoch": 0.28, "learning_rate": 3.153635675612738e-05, "loss": 1.5505, "step": 27100 }, { "epoch": 0.28, "learning_rate": 3.165279152354893e-05, "loss": 1.6107, "step": 27200 }, { "epoch": 0.28, "learning_rate": 3.1769226290970486e-05, "loss": 1.7175, "step": 27300 }, { "epoch": 0.28, "learning_rate": 3.188566105839204e-05, "loss": 1.5619, "step": 27400 }, { "epoch": 0.28, "learning_rate": 3.200209582581359e-05, "loss": 1.779, "step": 27500 }, { "epoch": 0.28, "learning_rate": 3.211853059323514e-05, "loss": 1.6193, "step": 27600 }, { "epoch": 0.28, "learning_rate": 3.223496536065669e-05, "loss": 1.5945, "step": 27700 }, { "epoch": 0.28, "learning_rate": 3.235140012807824e-05, "loss": 1.6029, "step": 27800 }, { "epoch": 0.28, "learning_rate": 3.24678348954998e-05, "loss": 1.5127, "step": 27900 }, { "epoch": 0.29, "learning_rate": 3.258426966292135e-05, "loss": 1.7253, "step": 28000 }, { "epoch": 0.29, "learning_rate": 3.27007044303429e-05, "loss": 1.6813, "step": 28100 }, { "epoch": 0.29, "learning_rate": 3.2817139197764454e-05, "loss": 1.5601, "step": 28200 }, { "epoch": 0.29, "learning_rate": 3.2933573965186005e-05, "loss": 1.6222, "step": 28300 }, { "epoch": 0.29, "learning_rate": 3.305000873260756e-05, "loss": 1.6366, "step": 28400 }, { "epoch": 0.29, "learning_rate": 3.316644350002911e-05, "loss": 1.6782, "step": 28500 }, { "epoch": 0.29, "learning_rate": 3.3282878267450666e-05, "loss": 1.7085, "step": 28600 }, { "epoch": 0.29, "learning_rate": 3.339931303487222e-05, "loss": 1.6443, "step": 28700 }, { "epoch": 0.29, "learning_rate": 3.351574780229377e-05, "loss": 1.7136, "step": 28800 }, { "epoch": 0.29, "learning_rate": 3.363218256971532e-05, "loss": 1.743, "step": 28900 }, { "epoch": 0.3, "learning_rate": 3.374861733713687e-05, "loss": 1.6346, "step": 29000 }, { "epoch": 0.3, "learning_rate": 3.386505210455842e-05, "loss": 1.5877, "step": 29100 }, { "epoch": 0.3, "learning_rate": 3.398148687197997e-05, "loss": 1.7239, "step": 29200 }, { "epoch": 0.3, "learning_rate": 3.4097921639401524e-05, "loss": 1.4876, "step": 29300 }, { "epoch": 0.3, "learning_rate": 3.4214356406823076e-05, "loss": 1.6542, "step": 29400 }, { "epoch": 0.3, "learning_rate": 3.433079117424463e-05, "loss": 1.7042, "step": 29500 }, { "epoch": 0.3, "learning_rate": 3.444722594166618e-05, "loss": 1.5168, "step": 29600 }, { "epoch": 0.3, "learning_rate": 3.456366070908773e-05, "loss": 1.6418, "step": 29700 }, { "epoch": 0.3, "learning_rate": 3.468009547650928e-05, "loss": 1.6087, "step": 29800 }, { "epoch": 0.3, "learning_rate": 3.479653024393084e-05, "loss": 1.6075, "step": 29900 }, { "epoch": 0.31, "learning_rate": 3.4912965011352397e-05, "loss": 1.6207, "step": 30000 }, { "epoch": 0.31, "learning_rate": 3.502939977877395e-05, "loss": 1.478, "step": 30100 }, { "epoch": 0.31, "learning_rate": 3.51458345461955e-05, "loss": 1.7332, "step": 30200 }, { "epoch": 0.31, "learning_rate": 3.526226931361705e-05, "loss": 1.6318, "step": 30300 }, { "epoch": 0.31, "learning_rate": 3.53787040810386e-05, "loss": 1.6422, "step": 30400 }, { "epoch": 0.31, "learning_rate": 3.549513884846015e-05, "loss": 1.6565, "step": 30500 }, { "epoch": 0.31, "learning_rate": 3.5611573615881704e-05, "loss": 1.5044, "step": 30600 }, { "epoch": 0.31, "learning_rate": 3.5728008383303255e-05, "loss": 1.5851, "step": 30700 }, { "epoch": 0.31, "learning_rate": 3.5844443150724806e-05, "loss": 1.7198, "step": 30800 }, { "epoch": 0.31, "learning_rate": 3.596087791814636e-05, "loss": 1.6801, "step": 30900 }, { "epoch": 0.32, "learning_rate": 3.607731268556791e-05, "loss": 1.7126, "step": 31000 }, { "epoch": 0.32, "learning_rate": 3.619374745298946e-05, "loss": 1.4165, "step": 31100 }, { "epoch": 0.32, "learning_rate": 3.63090178727368e-05, "loss": 1.627, "step": 31200 }, { "epoch": 0.32, "learning_rate": 3.642545264015836e-05, "loss": 1.6585, "step": 31300 }, { "epoch": 0.32, "learning_rate": 3.654188740757991e-05, "loss": 1.461, "step": 31400 }, { "epoch": 0.32, "learning_rate": 3.665832217500146e-05, "loss": 1.593, "step": 31500 }, { "epoch": 0.32, "learning_rate": 3.677475694242301e-05, "loss": 1.5944, "step": 31600 }, { "epoch": 0.32, "learning_rate": 3.689119170984456e-05, "loss": 1.6941, "step": 31700 }, { "epoch": 0.32, "learning_rate": 3.7007626477266114e-05, "loss": 1.4476, "step": 31800 }, { "epoch": 0.33, "learning_rate": 3.7124061244687665e-05, "loss": 1.6172, "step": 31900 }, { "epoch": 0.33, "learning_rate": 3.7239331664435e-05, "loss": 1.6509, "step": 32000 }, { "epoch": 0.33, "learning_rate": 3.735576643185655e-05, "loss": 1.7404, "step": 32100 }, { "epoch": 0.33, "learning_rate": 3.74722011992781e-05, "loss": 1.5284, "step": 32200 }, { "epoch": 0.33, "learning_rate": 3.758863596669966e-05, "loss": 1.6119, "step": 32300 }, { "epoch": 0.33, "learning_rate": 3.770507073412121e-05, "loss": 1.4731, "step": 32400 }, { "epoch": 0.33, "learning_rate": 3.782150550154276e-05, "loss": 1.7473, "step": 32500 }, { "epoch": 0.33, "learning_rate": 3.793794026896431e-05, "loss": 1.663, "step": 32600 }, { "epoch": 0.33, "learning_rate": 3.805437503638587e-05, "loss": 1.5969, "step": 32700 }, { "epoch": 0.33, "learning_rate": 3.817080980380742e-05, "loss": 1.5234, "step": 32800 }, { "epoch": 0.34, "learning_rate": 3.828724457122897e-05, "loss": 1.4602, "step": 32900 }, { "epoch": 0.34, "learning_rate": 3.8403679338650524e-05, "loss": 1.5917, "step": 33000 }, { "epoch": 0.34, "learning_rate": 3.8518949758397857e-05, "loss": 1.6486, "step": 33100 }, { "epoch": 0.34, "learning_rate": 3.863538452581941e-05, "loss": 1.6396, "step": 33200 }, { "epoch": 0.34, "learning_rate": 3.875181929324096e-05, "loss": 1.4991, "step": 33300 }, { "epoch": 0.34, "learning_rate": 3.886825406066252e-05, "loss": 1.4741, "step": 33400 }, { "epoch": 0.34, "learning_rate": 3.898468882808407e-05, "loss": 1.6008, "step": 33500 }, { "epoch": 0.34, "learning_rate": 3.910112359550562e-05, "loss": 1.6586, "step": 33600 }, { "epoch": 0.34, "learning_rate": 3.921755836292717e-05, "loss": 1.5204, "step": 33700 }, { "epoch": 0.34, "learning_rate": 3.933399313034872e-05, "loss": 1.604, "step": 33800 }, { "epoch": 0.35, "learning_rate": 3.945042789777027e-05, "loss": 1.6618, "step": 33900 }, { "epoch": 0.35, "learning_rate": 3.9566862665191824e-05, "loss": 1.5671, "step": 34000 }, { "epoch": 0.35, "learning_rate": 3.968329743261338e-05, "loss": 1.6671, "step": 34100 }, { "epoch": 0.35, "learning_rate": 3.9799732200034934e-05, "loss": 1.6382, "step": 34200 }, { "epoch": 0.35, "learning_rate": 3.9916166967456485e-05, "loss": 1.7198, "step": 34300 }, { "epoch": 0.35, "learning_rate": 4.0032601734878036e-05, "loss": 1.5506, "step": 34400 }, { "epoch": 0.35, "learning_rate": 4.014903650229959e-05, "loss": 1.4979, "step": 34500 }, { "epoch": 0.35, "learning_rate": 4.026547126972114e-05, "loss": 1.5727, "step": 34600 }, { "epoch": 0.35, "learning_rate": 4.0381906037142696e-05, "loss": 1.5792, "step": 34700 }, { "epoch": 0.35, "learning_rate": 4.049834080456425e-05, "loss": 1.6914, "step": 34800 }, { "epoch": 0.36, "learning_rate": 4.06147755719858e-05, "loss": 1.6395, "step": 34900 }, { "epoch": 0.36, "learning_rate": 4.073121033940735e-05, "loss": 1.3941, "step": 35000 }, { "epoch": 0.36, "learning_rate": 4.08476451068289e-05, "loss": 1.5986, "step": 35100 }, { "epoch": 0.36, "learning_rate": 4.096407987425045e-05, "loss": 1.6196, "step": 35200 }, { "epoch": 0.36, "learning_rate": 4.1080514641672004e-05, "loss": 1.6662, "step": 35300 }, { "epoch": 0.36, "learning_rate": 4.1196949409093555e-05, "loss": 1.6225, "step": 35400 }, { "epoch": 0.36, "learning_rate": 4.1313384176515106e-05, "loss": 1.5535, "step": 35500 }, { "epoch": 0.36, "learning_rate": 4.142981894393666e-05, "loss": 1.5587, "step": 35600 }, { "epoch": 0.36, "learning_rate": 4.154625371135821e-05, "loss": 1.5462, "step": 35700 }, { "epoch": 0.36, "learning_rate": 4.166268847877976e-05, "loss": 1.6372, "step": 35800 }, { "epoch": 0.37, "learning_rate": 4.177912324620131e-05, "loss": 1.6065, "step": 35900 }, { "epoch": 0.37, "learning_rate": 4.189555801362286e-05, "loss": 1.6709, "step": 36000 }, { "epoch": 0.37, "learning_rate": 4.201199278104443e-05, "loss": 1.6939, "step": 36100 }, { "epoch": 0.37, "learning_rate": 4.212842754846598e-05, "loss": 1.5545, "step": 36200 }, { "epoch": 0.37, "learning_rate": 4.224486231588753e-05, "loss": 1.4577, "step": 36300 }, { "epoch": 0.37, "learning_rate": 4.236129708330908e-05, "loss": 1.5861, "step": 36400 }, { "epoch": 0.37, "learning_rate": 4.247773185073063e-05, "loss": 1.6461, "step": 36500 }, { "epoch": 0.37, "learning_rate": 4.259416661815218e-05, "loss": 1.6597, "step": 36600 }, { "epoch": 0.37, "learning_rate": 4.2710601385573734e-05, "loss": 1.5604, "step": 36700 }, { "epoch": 0.37, "learning_rate": 4.2827036152995286e-05, "loss": 1.6345, "step": 36800 }, { "epoch": 0.38, "learning_rate": 4.294347092041684e-05, "loss": 1.6635, "step": 36900 }, { "epoch": 0.38, "learning_rate": 4.305990568783839e-05, "loss": 1.4263, "step": 37000 }, { "epoch": 0.38, "learning_rate": 4.317634045525994e-05, "loss": 1.6068, "step": 37100 }, { "epoch": 0.38, "learning_rate": 4.329277522268149e-05, "loss": 1.4196, "step": 37200 }, { "epoch": 0.38, "learning_rate": 4.340920999010304e-05, "loss": 1.6036, "step": 37300 }, { "epoch": 0.38, "learning_rate": 4.35256447575246e-05, "loss": 1.4094, "step": 37400 }, { "epoch": 0.38, "learning_rate": 4.364207952494615e-05, "loss": 1.6328, "step": 37500 }, { "epoch": 0.38, "learning_rate": 4.37585142923677e-05, "loss": 1.6935, "step": 37600 }, { "epoch": 0.38, "learning_rate": 4.3874949059789253e-05, "loss": 1.7379, "step": 37700 }, { "epoch": 0.39, "learning_rate": 4.3991383827210805e-05, "loss": 1.7062, "step": 37800 }, { "epoch": 0.39, "learning_rate": 4.4107818594632356e-05, "loss": 1.586, "step": 37900 }, { "epoch": 0.39, "learning_rate": 4.422425336205391e-05, "loss": 1.7326, "step": 38000 }, { "epoch": 0.39, "learning_rate": 4.4340688129475465e-05, "loss": 1.5298, "step": 38100 }, { "epoch": 0.39, "learning_rate": 4.4457122896897016e-05, "loss": 1.6318, "step": 38200 }, { "epoch": 0.39, "learning_rate": 4.457355766431857e-05, "loss": 1.5471, "step": 38300 }, { "epoch": 0.39, "learning_rate": 4.468999243174012e-05, "loss": 1.5413, "step": 38400 }, { "epoch": 0.39, "learning_rate": 4.480642719916167e-05, "loss": 1.6205, "step": 38500 }, { "epoch": 0.39, "learning_rate": 4.492286196658322e-05, "loss": 1.7189, "step": 38600 }, { "epoch": 0.39, "learning_rate": 4.503929673400478e-05, "loss": 1.7002, "step": 38700 }, { "epoch": 0.4, "learning_rate": 4.515573150142633e-05, "loss": 1.6631, "step": 38800 }, { "epoch": 0.4, "learning_rate": 4.527216626884788e-05, "loss": 1.5372, "step": 38900 }, { "epoch": 0.4, "learning_rate": 4.538860103626943e-05, "loss": 1.5069, "step": 39000 }, { "epoch": 0.4, "learning_rate": 4.5505035803690984e-05, "loss": 1.6565, "step": 39100 }, { "epoch": 0.4, "learning_rate": 4.5621470571112535e-05, "loss": 1.6127, "step": 39200 }, { "epoch": 0.4, "learning_rate": 4.573674099085987e-05, "loss": 1.7147, "step": 39300 }, { "epoch": 0.4, "learning_rate": 4.585201141060721e-05, "loss": 1.7017, "step": 39400 }, { "epoch": 0.4, "learning_rate": 4.596844617802876e-05, "loss": 1.4555, "step": 39500 }, { "epoch": 0.4, "learning_rate": 4.608488094545031e-05, "loss": 1.5304, "step": 39600 }, { "epoch": 0.4, "learning_rate": 4.620131571287187e-05, "loss": 1.6392, "step": 39700 }, { "epoch": 0.41, "learning_rate": 4.631775048029342e-05, "loss": 1.6347, "step": 39800 }, { "epoch": 0.41, "learning_rate": 4.643418524771497e-05, "loss": 1.5631, "step": 39900 }, { "epoch": 0.41, "learning_rate": 4.655062001513652e-05, "loss": 1.5137, "step": 40000 }, { "epoch": 0.41, "learning_rate": 4.666705478255807e-05, "loss": 1.5546, "step": 40100 }, { "epoch": 0.41, "learning_rate": 4.6783489549979624e-05, "loss": 1.6372, "step": 40200 }, { "epoch": 0.41, "learning_rate": 4.6899924317401176e-05, "loss": 1.6029, "step": 40300 }, { "epoch": 0.41, "learning_rate": 4.701635908482273e-05, "loss": 1.7338, "step": 40400 }, { "epoch": 0.41, "learning_rate": 4.713279385224428e-05, "loss": 1.5, "step": 40500 }, { "epoch": 0.41, "learning_rate": 4.724922861966583e-05, "loss": 1.4543, "step": 40600 }, { "epoch": 0.41, "learning_rate": 4.736566338708738e-05, "loss": 1.5589, "step": 40700 }, { "epoch": 0.42, "learning_rate": 4.748209815450893e-05, "loss": 1.5708, "step": 40800 }, { "epoch": 0.42, "learning_rate": 4.759853292193049e-05, "loss": 1.6656, "step": 40900 }, { "epoch": 0.42, "learning_rate": 4.771496768935205e-05, "loss": 1.4874, "step": 41000 }, { "epoch": 0.42, "learning_rate": 4.78314024567736e-05, "loss": 1.6359, "step": 41100 }, { "epoch": 0.42, "learning_rate": 4.794783722419515e-05, "loss": 1.5632, "step": 41200 }, { "epoch": 0.42, "learning_rate": 4.80642719916167e-05, "loss": 1.567, "step": 41300 }, { "epoch": 0.42, "learning_rate": 4.818070675903825e-05, "loss": 1.6288, "step": 41400 }, { "epoch": 0.42, "learning_rate": 4.8297141526459804e-05, "loss": 1.6696, "step": 41500 }, { "epoch": 0.42, "learning_rate": 4.8413576293881355e-05, "loss": 1.5842, "step": 41600 }, { "epoch": 0.42, "learning_rate": 4.8530011061302906e-05, "loss": 1.7026, "step": 41700 }, { "epoch": 0.43, "learning_rate": 4.864644582872446e-05, "loss": 1.6694, "step": 41800 }, { "epoch": 0.43, "learning_rate": 4.876288059614601e-05, "loss": 1.5539, "step": 41900 }, { "epoch": 0.43, "learning_rate": 4.887931536356756e-05, "loss": 1.616, "step": 42000 }, { "epoch": 0.43, "learning_rate": 4.899575013098911e-05, "loss": 1.7302, "step": 42100 }, { "epoch": 0.43, "learning_rate": 4.911218489841066e-05, "loss": 1.6773, "step": 42200 }, { "epoch": 0.43, "learning_rate": 4.922861966583222e-05, "loss": 1.5233, "step": 42300 }, { "epoch": 0.43, "learning_rate": 4.934505443325377e-05, "loss": 1.6265, "step": 42400 }, { "epoch": 0.43, "learning_rate": 4.946148920067532e-05, "loss": 1.7762, "step": 42500 }, { "epoch": 0.43, "learning_rate": 4.9577923968096874e-05, "loss": 1.477, "step": 42600 }, { "epoch": 0.44, "learning_rate": 4.9694358735518425e-05, "loss": 1.5589, "step": 42700 }, { "epoch": 0.44, "learning_rate": 4.9810793502939977e-05, "loss": 1.6193, "step": 42800 }, { "epoch": 0.44, "learning_rate": 4.9927228270361535e-05, "loss": 1.5742, "step": 42900 }, { "epoch": 0.44, "learning_rate": 5.0043663037783086e-05, "loss": 1.7094, "step": 43000 }, { "epoch": 0.44, "learning_rate": 5.016009780520464e-05, "loss": 1.7161, "step": 43100 }, { "epoch": 0.44, "learning_rate": 5.027653257262619e-05, "loss": 1.5581, "step": 43200 }, { "epoch": 0.44, "learning_rate": 5.039296734004774e-05, "loss": 1.7793, "step": 43300 }, { "epoch": 0.44, "learning_rate": 5.050940210746929e-05, "loss": 1.6299, "step": 43400 }, { "epoch": 0.44, "learning_rate": 5.062583687489084e-05, "loss": 1.6768, "step": 43500 }, { "epoch": 0.44, "learning_rate": 5.07422716423124e-05, "loss": 1.4999, "step": 43600 }, { "epoch": 0.45, "learning_rate": 5.085870640973395e-05, "loss": 1.6757, "step": 43700 }, { "epoch": 0.45, "learning_rate": 5.09751411771555e-05, "loss": 1.6047, "step": 43800 }, { "epoch": 0.45, "learning_rate": 5.1091575944577054e-05, "loss": 1.7353, "step": 43900 }, { "epoch": 0.45, "learning_rate": 5.1206846364324386e-05, "loss": 1.5894, "step": 44000 }, { "epoch": 0.45, "learning_rate": 5.1322116784071726e-05, "loss": 1.6438, "step": 44100 }, { "epoch": 0.45, "learning_rate": 5.143855155149328e-05, "loss": 1.685, "step": 44200 }, { "epoch": 0.45, "learning_rate": 5.155498631891483e-05, "loss": 1.7013, "step": 44300 }, { "epoch": 0.45, "learning_rate": 5.167142108633638e-05, "loss": 1.7027, "step": 44400 }, { "epoch": 0.45, "learning_rate": 5.178785585375793e-05, "loss": 1.7189, "step": 44500 }, { "epoch": 0.45, "learning_rate": 5.190429062117949e-05, "loss": 1.5007, "step": 44600 }, { "epoch": 0.46, "learning_rate": 5.202072538860104e-05, "loss": 1.6081, "step": 44700 }, { "epoch": 0.46, "learning_rate": 5.213716015602259e-05, "loss": 1.6024, "step": 44800 }, { "epoch": 0.46, "learning_rate": 5.225359492344414e-05, "loss": 1.4827, "step": 44900 }, { "epoch": 0.46, "learning_rate": 5.2368865343191476e-05, "loss": 1.6316, "step": 45000 }, { "epoch": 0.46, "learning_rate": 5.2485300110613034e-05, "loss": 1.4994, "step": 45100 }, { "epoch": 0.46, "learning_rate": 5.2601734878034585e-05, "loss": 1.6473, "step": 45200 }, { "epoch": 0.46, "learning_rate": 5.2718169645456136e-05, "loss": 1.6055, "step": 45300 }, { "epoch": 0.46, "learning_rate": 5.283460441287769e-05, "loss": 1.6548, "step": 45400 }, { "epoch": 0.46, "learning_rate": 5.295103918029924e-05, "loss": 1.6434, "step": 45500 }, { "epoch": 0.46, "learning_rate": 5.306630960004657e-05, "loss": 1.8213, "step": 45600 }, { "epoch": 0.47, "learning_rate": 5.318274436746813e-05, "loss": 1.7567, "step": 45700 }, { "epoch": 0.47, "learning_rate": 5.329917913488968e-05, "loss": 1.6743, "step": 45800 }, { "epoch": 0.47, "learning_rate": 5.341561390231123e-05, "loss": 1.8337, "step": 45900 }, { "epoch": 0.47, "learning_rate": 5.353204866973278e-05, "loss": 1.7429, "step": 46000 }, { "epoch": 0.47, "learning_rate": 5.3648483437154334e-05, "loss": 1.6504, "step": 46100 }, { "epoch": 0.47, "learning_rate": 5.3764918204575885e-05, "loss": 1.67, "step": 46200 }, { "epoch": 0.47, "learning_rate": 5.388135297199744e-05, "loss": 1.8022, "step": 46300 }, { "epoch": 0.47, "learning_rate": 5.399778773941899e-05, "loss": 1.7542, "step": 46400 }, { "epoch": 0.47, "learning_rate": 5.4114222506840546e-05, "loss": 1.5413, "step": 46500 }, { "epoch": 0.47, "learning_rate": 5.42306572742621e-05, "loss": 1.6942, "step": 46600 }, { "epoch": 0.48, "learning_rate": 5.434709204168365e-05, "loss": 1.6963, "step": 46700 }, { "epoch": 0.48, "learning_rate": 5.44635268091052e-05, "loss": 1.7163, "step": 46800 }, { "epoch": 0.48, "learning_rate": 5.457996157652675e-05, "loss": 1.8332, "step": 46900 }, { "epoch": 0.48, "learning_rate": 5.469639634394831e-05, "loss": 1.7749, "step": 47000 }, { "epoch": 0.48, "learning_rate": 5.481283111136986e-05, "loss": 1.5602, "step": 47100 }, { "epoch": 0.48, "learning_rate": 5.492926587879141e-05, "loss": 1.6308, "step": 47200 }, { "epoch": 0.48, "learning_rate": 5.504570064621296e-05, "loss": 1.8576, "step": 47300 }, { "epoch": 0.48, "learning_rate": 5.5162135413634514e-05, "loss": 1.6232, "step": 47400 }, { "epoch": 0.48, "learning_rate": 5.5278570181056065e-05, "loss": 1.7661, "step": 47500 }, { "epoch": 0.48, "learning_rate": 5.5395004948477616e-05, "loss": 1.6967, "step": 47600 }, { "epoch": 0.49, "learning_rate": 5.551143971589917e-05, "loss": 1.6686, "step": 47700 }, { "epoch": 0.49, "learning_rate": 5.562787448332072e-05, "loss": 1.745, "step": 47800 }, { "epoch": 0.49, "learning_rate": 5.574430925074227e-05, "loss": 1.6954, "step": 47900 }, { "epoch": 0.49, "learning_rate": 5.586074401816382e-05, "loss": 1.7496, "step": 48000 }, { "epoch": 0.49, "learning_rate": 5.597717878558537e-05, "loss": 1.7957, "step": 48100 }, { "epoch": 0.49, "learning_rate": 5.6093613553006923e-05, "loss": 1.6327, "step": 48200 }, { "epoch": 0.49, "learning_rate": 5.621004832042848e-05, "loss": 1.6333, "step": 48300 }, { "epoch": 0.49, "learning_rate": 5.632648308785003e-05, "loss": 1.7373, "step": 48400 }, { "epoch": 0.49, "learning_rate": 5.644291785527159e-05, "loss": 1.8736, "step": 48500 }, { "epoch": 0.5, "learning_rate": 5.655935262269314e-05, "loss": 1.9779, "step": 48600 }, { "epoch": 0.5, "learning_rate": 5.667578739011469e-05, "loss": 1.6684, "step": 48700 }, { "epoch": 0.5, "learning_rate": 5.6792222157536244e-05, "loss": 1.7345, "step": 48800 }, { "epoch": 0.5, "learning_rate": 5.6908656924957796e-05, "loss": 1.9195, "step": 48900 }, { "epoch": 0.5, "learning_rate": 5.702509169237935e-05, "loss": 1.7144, "step": 49000 }, { "epoch": 0.5, "learning_rate": 5.71415264598009e-05, "loss": 1.6925, "step": 49100 }, { "epoch": 0.5, "learning_rate": 5.725796122722245e-05, "loss": 1.7665, "step": 49200 }, { "epoch": 0.5, "learning_rate": 5.7374395994644e-05, "loss": 1.7518, "step": 49300 }, { "epoch": 0.5, "learning_rate": 5.749083076206555e-05, "loss": 1.6738, "step": 49400 }, { "epoch": 0.5, "learning_rate": 5.76072655294871e-05, "loss": 1.6673, "step": 49500 }, { "epoch": 0.51, "learning_rate": 5.772370029690866e-05, "loss": 1.5949, "step": 49600 }, { "epoch": 0.51, "learning_rate": 5.784013506433021e-05, "loss": 1.7586, "step": 49700 }, { "epoch": 0.51, "learning_rate": 5.795656983175176e-05, "loss": 1.7049, "step": 49800 }, { "epoch": 0.51, "learning_rate": 5.8073004599173315e-05, "loss": 1.7876, "step": 49900 }, { "epoch": 0.51, "learning_rate": 5.8189439366594866e-05, "loss": 1.7694, "step": 50000 }, { "epoch": 0.51, "learning_rate": 5.830587413401642e-05, "loss": 1.8653, "step": 50100 }, { "epoch": 0.51, "learning_rate": 5.842230890143797e-05, "loss": 1.7531, "step": 50200 }, { "epoch": 0.51, "learning_rate": 5.853874366885952e-05, "loss": 1.7341, "step": 50300 }, { "epoch": 0.51, "learning_rate": 5.865517843628107e-05, "loss": 1.8113, "step": 50400 }, { "epoch": 0.51, "learning_rate": 5.877161320370263e-05, "loss": 1.859, "step": 50500 }, { "epoch": 0.52, "learning_rate": 5.888804797112418e-05, "loss": 1.8349, "step": 50600 }, { "epoch": 0.52, "learning_rate": 5.900448273854573e-05, "loss": 1.823, "step": 50700 }, { "epoch": 0.52, "learning_rate": 5.912091750596728e-05, "loss": 1.8625, "step": 50800 }, { "epoch": 0.52, "learning_rate": 5.923618792571462e-05, "loss": 1.7629, "step": 50900 }, { "epoch": 0.52, "learning_rate": 5.935262269313617e-05, "loss": 1.8262, "step": 51000 }, { "epoch": 0.52, "learning_rate": 5.9469057460557724e-05, "loss": 1.7556, "step": 51100 }, { "epoch": 0.52, "learning_rate": 5.9585492227979276e-05, "loss": 1.9064, "step": 51200 }, { "epoch": 0.52, "learning_rate": 5.970192699540083e-05, "loss": 1.8993, "step": 51300 }, { "epoch": 0.52, "learning_rate": 5.981836176282238e-05, "loss": 1.8139, "step": 51400 }, { "epoch": 0.52, "learning_rate": 5.993479653024393e-05, "loss": 1.7898, "step": 51500 }, { "epoch": 0.53, "learning_rate": 5.9999999858011726e-05, "loss": 1.8265, "step": 51600 }, { "epoch": 0.53, "learning_rate": 5.999999847920002e-05, "loss": 1.914, "step": 51700 }, { "epoch": 0.53, "learning_rate": 5.9999995633567425e-05, "loss": 1.7052, "step": 51800 }, { "epoch": 0.53, "learning_rate": 5.999999132111407e-05, "loss": 1.8215, "step": 51900 }, { "epoch": 0.53, "learning_rate": 5.9999985541840173e-05, "loss": 1.8507, "step": 52000 }, { "epoch": 0.53, "learning_rate": 5.999997829574601e-05, "loss": 1.6986, "step": 52100 }, { "epoch": 0.53, "learning_rate": 5.9999969582831936e-05, "loss": 1.7754, "step": 52200 }, { "epoch": 0.53, "learning_rate": 5.999995940309839e-05, "loss": 1.8446, "step": 52300 }, { "epoch": 0.53, "learning_rate": 5.999994775654585e-05, "loss": 1.8798, "step": 52400 }, { "epoch": 0.53, "learning_rate": 5.99999346431749e-05, "loss": 1.7133, "step": 52500 }, { "epoch": 0.54, "learning_rate": 5.999992006298617e-05, "loss": 1.8276, "step": 52600 }, { "epoch": 0.54, "learning_rate": 5.999990401598039e-05, "loss": 1.8011, "step": 52700 }, { "epoch": 0.54, "learning_rate": 5.999988650215832e-05, "loss": 1.8984, "step": 52800 }, { "epoch": 0.54, "learning_rate": 5.999986752152084e-05, "loss": 1.7291, "step": 52900 }, { "epoch": 0.54, "learning_rate": 5.999984707406886e-05, "loss": 1.6652, "step": 53000 }, { "epoch": 0.54, "learning_rate": 5.9999825386206776e-05, "loss": 1.7401, "step": 53100 }, { "epoch": 0.54, "learning_rate": 5.999980201979701e-05, "loss": 1.7484, "step": 53200 }, { "epoch": 0.54, "learning_rate": 5.9999777186575945e-05, "loss": 1.816, "step": 53300 }, { "epoch": 0.54, "learning_rate": 5.9999750886544816e-05, "loss": 1.6185, "step": 53400 }, { "epoch": 0.55, "learning_rate": 5.999972311970489e-05, "loss": 1.8986, "step": 53500 }, { "epoch": 0.55, "learning_rate": 5.999969388605754e-05, "loss": 1.8713, "step": 53600 }, { "epoch": 0.55, "learning_rate": 5.9999663185604177e-05, "loss": 1.7371, "step": 53700 }, { "epoch": 0.55, "learning_rate": 5.999963101834632e-05, "loss": 1.7292, "step": 53800 }, { "epoch": 0.55, "learning_rate": 5.999959738428553e-05, "loss": 1.8653, "step": 53900 }, { "epoch": 0.55, "learning_rate": 5.999956228342346e-05, "loss": 1.8762, "step": 54000 }, { "epoch": 0.55, "learning_rate": 5.999952608869909e-05, "loss": 1.6714, "step": 54100 }, { "epoch": 0.55, "learning_rate": 5.999948806890764e-05, "loss": 1.8645, "step": 54200 }, { "epoch": 0.55, "learning_rate": 5.9999448582320255e-05, "loss": 1.7443, "step": 54300 }, { "epoch": 0.55, "learning_rate": 5.999940762893886e-05, "loss": 1.7701, "step": 54400 }, { "epoch": 0.56, "learning_rate": 5.9999365208765464e-05, "loss": 1.8186, "step": 54500 }, { "epoch": 0.56, "learning_rate": 5.9999321321802126e-05, "loss": 1.8848, "step": 54600 }, { "epoch": 0.56, "learning_rate": 5.999927596805101e-05, "loss": 1.8028, "step": 54700 }, { "epoch": 0.56, "learning_rate": 5.999922914751432e-05, "loss": 1.6073, "step": 54800 }, { "epoch": 0.56, "learning_rate": 5.999918086019435e-05, "loss": 1.7682, "step": 54900 }, { "epoch": 0.56, "learning_rate": 5.999913110609347e-05, "loss": 1.7641, "step": 55000 }, { "epoch": 0.56, "learning_rate": 5.999907988521409e-05, "loss": 1.6054, "step": 55100 }, { "epoch": 0.56, "learning_rate": 5.999902719755874e-05, "loss": 1.5821, "step": 55200 }, { "epoch": 0.56, "learning_rate": 5.999897304312998e-05, "loss": 1.723, "step": 55300 }, { "epoch": 0.56, "learning_rate": 5.999891742193047e-05, "loss": 1.8605, "step": 55400 }, { "epoch": 0.57, "learning_rate": 5.9998860333962915e-05, "loss": 1.7869, "step": 55500 }, { "epoch": 0.57, "learning_rate": 5.9998801779230116e-05, "loss": 1.7645, "step": 55600 }, { "epoch": 0.57, "learning_rate": 5.999874175773493e-05, "loss": 2.0339, "step": 55700 }, { "epoch": 0.57, "learning_rate": 5.9998680269480304e-05, "loss": 2.0343, "step": 55800 }, { "epoch": 0.57, "learning_rate": 5.999861731446923e-05, "loss": 1.7218, "step": 55900 }, { "epoch": 0.57, "learning_rate": 5.999855289270479e-05, "loss": 1.7438, "step": 56000 }, { "epoch": 0.57, "learning_rate": 5.999848700419014e-05, "loss": 1.7914, "step": 56100 }, { "epoch": 0.57, "learning_rate": 5.999841964892849e-05, "loss": 1.9114, "step": 56200 }, { "epoch": 0.57, "learning_rate": 5.999835082692314e-05, "loss": 1.814, "step": 56300 }, { "epoch": 0.57, "learning_rate": 5.9998280538177463e-05, "loss": 1.8186, "step": 56400 }, { "epoch": 0.58, "learning_rate": 5.999820878269488e-05, "loss": 1.6235, "step": 56500 }, { "epoch": 0.58, "learning_rate": 5.9998135560478916e-05, "loss": 1.7426, "step": 56600 }, { "epoch": 0.58, "learning_rate": 5.99980616256829e-05, "loss": 1.7726, "step": 56700 }, { "epoch": 0.58, "learning_rate": 5.9997985484678205e-05, "loss": 1.8324, "step": 56800 }, { "epoch": 0.58, "learning_rate": 5.999790787695105e-05, "loss": 1.7814, "step": 56900 }, { "epoch": 0.58, "learning_rate": 5.999782880250521e-05, "loss": 1.7815, "step": 57000 }, { "epoch": 0.58, "learning_rate": 5.9997748261344556e-05, "loss": 1.9284, "step": 57100 }, { "epoch": 0.58, "learning_rate": 5.999766625347303e-05, "loss": 1.8563, "step": 57200 }, { "epoch": 0.58, "learning_rate": 5.999758277889464e-05, "loss": 1.7365, "step": 57300 }, { "epoch": 0.58, "learning_rate": 5.9997497837613475e-05, "loss": 1.6019, "step": 57400 }, { "epoch": 0.59, "learning_rate": 5.999741142963368e-05, "loss": 1.7575, "step": 57500 }, { "epoch": 0.59, "learning_rate": 5.999732355495948e-05, "loss": 1.8243, "step": 57600 }, { "epoch": 0.59, "learning_rate": 5.999723421359517e-05, "loss": 1.9446, "step": 57700 }, { "epoch": 0.59, "learning_rate": 5.9997143405545124e-05, "loss": 1.8453, "step": 57800 }, { "epoch": 0.59, "learning_rate": 5.999705113081378e-05, "loss": 1.7528, "step": 57900 }, { "epoch": 0.59, "learning_rate": 5.999695738940565e-05, "loss": 1.7813, "step": 58000 }, { "epoch": 0.59, "learning_rate": 5.999686218132531e-05, "loss": 1.756, "step": 58100 }, { "epoch": 0.59, "learning_rate": 5.9996765506577425e-05, "loss": 1.8373, "step": 58200 }, { "epoch": 0.59, "learning_rate": 5.9996667365166716e-05, "loss": 1.9234, "step": 58300 }, { "epoch": 0.59, "learning_rate": 5.9996568760438614e-05, "loss": 1.7684, "step": 58400 }, { "epoch": 0.6, "learning_rate": 5.9996467700383237e-05, "loss": 1.7996, "step": 58500 }, { "epoch": 0.6, "learning_rate": 5.9996365173679604e-05, "loss": 1.7491, "step": 58600 }, { "epoch": 0.6, "learning_rate": 5.999626118033272e-05, "loss": 1.681, "step": 58700 }, { "epoch": 0.6, "learning_rate": 5.999615572034767e-05, "loss": 1.8637, "step": 58800 }, { "epoch": 0.6, "learning_rate": 5.999604879372961e-05, "loss": 1.5885, "step": 58900 }, { "epoch": 0.6, "learning_rate": 5.999594040048377e-05, "loss": 1.7716, "step": 59000 }, { "epoch": 0.6, "learning_rate": 5.9995830540615466e-05, "loss": 1.7986, "step": 59100 }, { "epoch": 0.6, "learning_rate": 5.9995719214130035e-05, "loss": 1.8433, "step": 59200 }, { "epoch": 0.6, "learning_rate": 5.9995606421032954e-05, "loss": 1.7293, "step": 59300 }, { "epoch": 0.61, "learning_rate": 5.9995492161329713e-05, "loss": 1.9343, "step": 59400 }, { "epoch": 0.61, "learning_rate": 5.999537643502592e-05, "loss": 1.6948, "step": 59500 }, { "epoch": 0.61, "learning_rate": 5.999525924212721e-05, "loss": 1.7785, "step": 59600 }, { "epoch": 0.61, "learning_rate": 5.999514058263934e-05, "loss": 1.8778, "step": 59700 }, { "epoch": 0.61, "learning_rate": 5.999502045656809e-05, "loss": 1.8336, "step": 59800 }, { "epoch": 0.61, "learning_rate": 5.999489886391934e-05, "loss": 1.743, "step": 59900 }, { "epoch": 0.61, "learning_rate": 5.999477580469903e-05, "loss": 1.7351, "step": 60000 }, { "epoch": 0.61, "learning_rate": 5.999465127891319e-05, "loss": 1.9071, "step": 60100 }, { "epoch": 0.61, "learning_rate": 5.99945252865679e-05, "loss": 1.7476, "step": 60200 }, { "epoch": 0.61, "learning_rate": 5.9994397827669315e-05, "loss": 1.7562, "step": 60300 }, { "epoch": 0.62, "learning_rate": 5.999426890222368e-05, "loss": 1.8382, "step": 60400 }, { "epoch": 0.62, "learning_rate": 5.999413982141651e-05, "loss": 1.7408, "step": 60500 }, { "epoch": 0.62, "learning_rate": 5.999400797756105e-05, "loss": 1.8513, "step": 60600 }, { "epoch": 0.62, "learning_rate": 5.999387466717761e-05, "loss": 1.9462, "step": 60700 }, { "epoch": 0.62, "learning_rate": 5.9993739890272686e-05, "loss": 1.9378, "step": 60800 }, { "epoch": 0.62, "learning_rate": 5.999360364685287e-05, "loss": 1.7939, "step": 60900 }, { "epoch": 0.62, "learning_rate": 5.9993465936924825e-05, "loss": 1.6532, "step": 61000 }, { "epoch": 0.62, "learning_rate": 5.9993326760495296e-05, "loss": 1.6977, "step": 61100 }, { "epoch": 0.62, "learning_rate": 5.999318611757107e-05, "loss": 1.8962, "step": 61200 }, { "epoch": 0.62, "learning_rate": 5.999304400815905e-05, "loss": 1.754, "step": 61300 }, { "epoch": 0.63, "learning_rate": 5.9992900432266154e-05, "loss": 1.8434, "step": 61400 }, { "epoch": 0.63, "learning_rate": 5.9992755389899406e-05, "loss": 1.7021, "step": 61500 }, { "epoch": 0.63, "learning_rate": 5.9992608881065925e-05, "loss": 1.9396, "step": 61600 }, { "epoch": 0.63, "learning_rate": 5.999246090577284e-05, "loss": 1.757, "step": 61700 }, { "epoch": 0.63, "learning_rate": 5.999231146402742e-05, "loss": 1.9366, "step": 61800 }, { "epoch": 0.63, "learning_rate": 5.999216055583694e-05, "loss": 1.7092, "step": 61900 }, { "epoch": 0.63, "learning_rate": 5.99920081812088e-05, "loss": 1.8041, "step": 62000 }, { "epoch": 0.63, "learning_rate": 5.999185434015044e-05, "loss": 1.8746, "step": 62100 }, { "epoch": 0.63, "learning_rate": 5.9991699032669384e-05, "loss": 1.8011, "step": 62200 }, { "epoch": 0.63, "learning_rate": 5.9991542258773225e-05, "loss": 1.7387, "step": 62300 }, { "epoch": 0.64, "learning_rate": 5.999138401846964e-05, "loss": 1.9458, "step": 62400 }, { "epoch": 0.64, "learning_rate": 5.999122431176634e-05, "loss": 1.9475, "step": 62500 }, { "epoch": 0.64, "learning_rate": 5.9991063138671167e-05, "loss": 1.7204, "step": 62600 }, { "epoch": 0.64, "learning_rate": 5.999090049919198e-05, "loss": 1.7923, "step": 62700 }, { "epoch": 0.64, "learning_rate": 5.9990736393336724e-05, "loss": 1.7572, "step": 62800 }, { "epoch": 0.64, "learning_rate": 5.999057082111344e-05, "loss": 1.8297, "step": 62900 }, { "epoch": 0.64, "learning_rate": 5.999040378253021e-05, "loss": 1.9533, "step": 63000 }, { "epoch": 0.64, "learning_rate": 5.9990235277595224e-05, "loss": 1.8337, "step": 63100 }, { "epoch": 0.64, "learning_rate": 5.999006530631669e-05, "loss": 1.6718, "step": 63200 }, { "epoch": 0.64, "learning_rate": 5.9989893868702944e-05, "loss": 1.8575, "step": 63300 }, { "epoch": 0.65, "learning_rate": 5.9989720964762355e-05, "loss": 1.7373, "step": 63400 }, { "epoch": 0.65, "learning_rate": 5.9989546594503374e-05, "loss": 1.7246, "step": 63500 }, { "epoch": 0.65, "learning_rate": 5.9989370757934544e-05, "loss": 1.7, "step": 63600 }, { "epoch": 0.65, "learning_rate": 5.9989193455064445e-05, "loss": 1.7548, "step": 63700 }, { "epoch": 0.65, "learning_rate": 5.998901468590175e-05, "loss": 1.684, "step": 63800 }, { "epoch": 0.65, "learning_rate": 5.99888344504552e-05, "loss": 1.7344, "step": 63900 }, { "epoch": 0.65, "learning_rate": 5.9988652748733614e-05, "loss": 1.623, "step": 64000 }, { "epoch": 0.65, "learning_rate": 5.998846958074588e-05, "loss": 1.7377, "step": 64100 }, { "epoch": 0.65, "learning_rate": 5.9988284946500933e-05, "loss": 1.8485, "step": 64200 }, { "epoch": 0.66, "learning_rate": 5.9988098846007824e-05, "loss": 1.8087, "step": 64300 }, { "epoch": 0.66, "learning_rate": 5.9987911279275625e-05, "loss": 1.7151, "step": 64400 }, { "epoch": 0.66, "learning_rate": 5.998772224631353e-05, "loss": 1.6868, "step": 64500 }, { "epoch": 0.66, "learning_rate": 5.998753174713078e-05, "loss": 1.7295, "step": 64600 }, { "epoch": 0.66, "learning_rate": 5.9987339781736676e-05, "loss": 1.6257, "step": 64700 }, { "epoch": 0.66, "learning_rate": 5.998714635014062e-05, "loss": 1.6688, "step": 64800 }, { "epoch": 0.66, "learning_rate": 5.9986951452352056e-05, "loss": 1.8821, "step": 64900 }, { "epoch": 0.66, "learning_rate": 5.998675508838052e-05, "loss": 1.8325, "step": 65000 }, { "epoch": 0.66, "learning_rate": 5.9986557258235614e-05, "loss": 1.695, "step": 65100 }, { "epoch": 0.66, "learning_rate": 5.9986357961927005e-05, "loss": 1.6766, "step": 65200 }, { "epoch": 0.67, "learning_rate": 5.998615719946444e-05, "loss": 1.7103, "step": 65300 }, { "epoch": 0.67, "learning_rate": 5.998595497085774e-05, "loss": 1.7861, "step": 65400 }, { "epoch": 0.67, "learning_rate": 5.998575127611678e-05, "loss": 1.7178, "step": 65500 }, { "epoch": 0.67, "learning_rate": 5.9985546115251534e-05, "loss": 1.771, "step": 65600 }, { "epoch": 0.67, "learning_rate": 5.998534156179906e-05, "loss": 1.6728, "step": 65700 }, { "epoch": 0.67, "learning_rate": 5.9985133483376386e-05, "loss": 1.6888, "step": 65800 }, { "epoch": 0.67, "learning_rate": 5.998492393885964e-05, "loss": 1.7414, "step": 65900 }, { "epoch": 0.67, "learning_rate": 5.998471292825903e-05, "loss": 1.711, "step": 66000 }, { "epoch": 0.67, "learning_rate": 5.998450045158491e-05, "loss": 1.7552, "step": 66100 }, { "epoch": 0.67, "learning_rate": 5.9984286508847666e-05, "loss": 1.7269, "step": 66200 }, { "epoch": 0.68, "learning_rate": 5.998407110005773e-05, "loss": 1.6826, "step": 66300 }, { "epoch": 0.68, "learning_rate": 5.9983854225225655e-05, "loss": 1.8838, "step": 66400 }, { "epoch": 0.68, "learning_rate": 5.998363588436206e-05, "loss": 1.7023, "step": 66500 }, { "epoch": 0.68, "learning_rate": 5.998341607747759e-05, "loss": 1.6921, "step": 66600 }, { "epoch": 0.68, "learning_rate": 5.9983194804583006e-05, "loss": 1.7901, "step": 66700 }, { "epoch": 0.68, "learning_rate": 5.9982972065689135e-05, "loss": 1.6776, "step": 66800 }, { "epoch": 0.68, "learning_rate": 5.998274786080685e-05, "loss": 1.7197, "step": 66900 }, { "epoch": 0.68, "learning_rate": 5.998252218994712e-05, "loss": 1.7005, "step": 67000 }, { "epoch": 0.68, "learning_rate": 5.998229505312098e-05, "loss": 1.5663, "step": 67100 }, { "epoch": 0.68, "learning_rate": 5.998206645033955e-05, "loss": 1.6845, "step": 67200 }, { "epoch": 0.69, "learning_rate": 5.9981836381613985e-05, "loss": 1.7288, "step": 67300 }, { "epoch": 0.69, "learning_rate": 5.998160484695554e-05, "loss": 1.8569, "step": 67400 }, { "epoch": 0.69, "learning_rate": 5.998137184637555e-05, "loss": 1.8241, "step": 67500 }, { "epoch": 0.69, "learning_rate": 5.998113737988538e-05, "loss": 1.5569, "step": 67600 }, { "epoch": 0.69, "learning_rate": 5.998090144749653e-05, "loss": 1.6191, "step": 67700 }, { "epoch": 0.69, "learning_rate": 5.99806640492205e-05, "loss": 1.8349, "step": 67800 }, { "epoch": 0.69, "learning_rate": 5.9980425185068914e-05, "loss": 1.7102, "step": 67900 }, { "epoch": 0.69, "learning_rate": 5.998018485505346e-05, "loss": 1.708, "step": 68000 }, { "epoch": 0.69, "learning_rate": 5.9979943059185874e-05, "loss": 1.8039, "step": 68100 }, { "epoch": 0.69, "learning_rate": 5.997969979747799e-05, "loss": 1.7602, "step": 68200 }, { "epoch": 0.7, "learning_rate": 5.9979455069941686e-05, "loss": 1.8215, "step": 68300 }, { "epoch": 0.7, "learning_rate": 5.997920887658894e-05, "loss": 1.7234, "step": 68400 }, { "epoch": 0.7, "learning_rate": 5.997896121743179e-05, "loss": 1.6635, "step": 68500 }, { "epoch": 0.7, "learning_rate": 5.997871209248233e-05, "loss": 1.722, "step": 68600 }, { "epoch": 0.7, "learning_rate": 5.997846150175276e-05, "loss": 1.6843, "step": 68700 }, { "epoch": 0.7, "learning_rate": 5.997820944525533e-05, "loss": 1.6984, "step": 68800 }, { "epoch": 0.7, "learning_rate": 5.9977955923002354e-05, "loss": 1.855, "step": 68900 }, { "epoch": 0.7, "learning_rate": 5.9977700935006236e-05, "loss": 1.6806, "step": 69000 }, { "epoch": 0.7, "learning_rate": 5.997744448127943e-05, "loss": 1.733, "step": 69100 }, { "epoch": 0.71, "learning_rate": 5.9977189148284206e-05, "loss": 1.7506, "step": 69200 }, { "epoch": 0.71, "learning_rate": 5.997692977779074e-05, "loss": 1.7328, "step": 69300 }, { "epoch": 0.71, "learning_rate": 5.997666894160429e-05, "loss": 1.7325, "step": 69400 }, { "epoch": 0.71, "learning_rate": 5.997640663973762e-05, "loss": 1.695, "step": 69500 }, { "epoch": 0.71, "learning_rate": 5.997614287220355e-05, "loss": 1.5971, "step": 69600 }, { "epoch": 0.71, "learning_rate": 5.997587763901499e-05, "loss": 1.6594, "step": 69700 }, { "epoch": 0.71, "learning_rate": 5.9975610940184904e-05, "loss": 1.836, "step": 69800 }, { "epoch": 0.71, "learning_rate": 5.997534277572632e-05, "loss": 1.5829, "step": 69900 }, { "epoch": 0.71, "learning_rate": 5.997507314565236e-05, "loss": 1.7333, "step": 70000 }, { "epoch": 0.71, "learning_rate": 5.997480204997621e-05, "loss": 1.7039, "step": 70100 }, { "epoch": 0.72, "learning_rate": 5.997452948871112e-05, "loss": 1.6867, "step": 70200 }, { "epoch": 0.72, "learning_rate": 5.9974255461870406e-05, "loss": 1.5479, "step": 70300 }, { "epoch": 0.72, "learning_rate": 5.997397996946749e-05, "loss": 1.6165, "step": 70400 }, { "epoch": 0.72, "learning_rate": 5.997370301151582e-05, "loss": 1.6625, "step": 70500 }, { "epoch": 0.72, "learning_rate": 5.9973424588028944e-05, "loss": 1.7098, "step": 70600 }, { "epoch": 0.72, "learning_rate": 5.997314469902048e-05, "loss": 1.5972, "step": 70700 }, { "epoch": 0.72, "learning_rate": 5.997286334450412e-05, "loss": 1.5813, "step": 70800 }, { "epoch": 0.72, "learning_rate": 5.997258052449361e-05, "loss": 1.683, "step": 70900 }, { "epoch": 0.72, "learning_rate": 5.997229623900277e-05, "loss": 1.7293, "step": 71000 }, { "epoch": 0.72, "learning_rate": 5.997201048804552e-05, "loss": 1.6516, "step": 71100 }, { "epoch": 0.73, "learning_rate": 5.9971723271635806e-05, "loss": 1.6689, "step": 71200 }, { "epoch": 0.73, "learning_rate": 5.997143458978769e-05, "loss": 1.5891, "step": 71300 }, { "epoch": 0.73, "learning_rate": 5.9971144442515293e-05, "loss": 1.7378, "step": 71400 }, { "epoch": 0.73, "learning_rate": 5.9970852829832785e-05, "loss": 1.7722, "step": 71500 }, { "epoch": 0.73, "learning_rate": 5.997056268978888e-05, "loss": 1.6847, "step": 71600 }, { "epoch": 0.73, "learning_rate": 5.997026816098274e-05, "loss": 1.6061, "step": 71700 }, { "epoch": 0.73, "learning_rate": 5.9969972166809356e-05, "loss": 1.7402, "step": 71800 }, { "epoch": 0.73, "learning_rate": 5.996967470728319e-05, "loss": 1.6667, "step": 71900 }, { "epoch": 0.73, "learning_rate": 5.996937578241877e-05, "loss": 1.8258, "step": 72000 }, { "epoch": 0.73, "learning_rate": 5.996907539223072e-05, "loss": 1.664, "step": 72100 }, { "epoch": 0.74, "learning_rate": 5.996877353673373e-05, "loss": 1.767, "step": 72200 }, { "epoch": 0.74, "learning_rate": 5.9968470215942564e-05, "loss": 1.7394, "step": 72300 }, { "epoch": 0.74, "learning_rate": 5.9968165429872044e-05, "loss": 1.7196, "step": 72400 }, { "epoch": 0.74, "learning_rate": 5.9967859178537076e-05, "loss": 1.7572, "step": 72500 }, { "epoch": 0.74, "learning_rate": 5.996755146195264e-05, "loss": 1.731, "step": 72600 }, { "epoch": 0.74, "learning_rate": 5.996724228013375e-05, "loss": 1.7933, "step": 72700 }, { "epoch": 0.74, "learning_rate": 5.996693163309557e-05, "loss": 1.7275, "step": 72800 }, { "epoch": 0.74, "learning_rate": 5.9966619520853266e-05, "loss": 1.7643, "step": 72900 }, { "epoch": 0.74, "learning_rate": 5.9966305943422094e-05, "loss": 1.7486, "step": 73000 }, { "epoch": 0.74, "learning_rate": 5.9965990900817385e-05, "loss": 1.8221, "step": 73100 }, { "epoch": 0.75, "learning_rate": 5.9965674393054556e-05, "loss": 1.7577, "step": 73200 }, { "epoch": 0.75, "learning_rate": 5.996535642014907e-05, "loss": 1.768, "step": 73300 }, { "epoch": 0.75, "learning_rate": 5.9965036982116486e-05, "loss": 1.8148, "step": 73400 }, { "epoch": 0.75, "learning_rate": 5.9964716078972415e-05, "loss": 1.8267, "step": 73500 }, { "epoch": 0.75, "learning_rate": 5.9964393710732545e-05, "loss": 1.8537, "step": 73600 }, { "epoch": 0.75, "learning_rate": 5.996406987741264e-05, "loss": 1.7622, "step": 73700 }, { "epoch": 0.75, "learning_rate": 5.9963744579028535e-05, "loss": 1.7858, "step": 73800 }, { "epoch": 0.75, "learning_rate": 5.9963417815596135e-05, "loss": 1.735, "step": 73900 }, { "epoch": 0.75, "learning_rate": 5.996308958713141e-05, "loss": 1.7192, "step": 74000 }, { "epoch": 0.75, "learning_rate": 5.9962759893650424e-05, "loss": 1.7746, "step": 74100 }, { "epoch": 0.76, "learning_rate": 5.996242873516928e-05, "loss": 1.8431, "step": 74200 }, { "epoch": 0.76, "learning_rate": 5.996209611170418e-05, "loss": 1.775, "step": 74300 }, { "epoch": 0.76, "learning_rate": 5.996176202327139e-05, "loss": 1.5642, "step": 74400 }, { "epoch": 0.76, "learning_rate": 5.996142646988723e-05, "loss": 1.5735, "step": 74500 }, { "epoch": 0.76, "learning_rate": 5.9961089451568125e-05, "loss": 1.9178, "step": 74600 }, { "epoch": 0.76, "learning_rate": 5.996075096833054e-05, "loss": 1.7321, "step": 74700 }, { "epoch": 0.76, "learning_rate": 5.996041102019102e-05, "loss": 1.8698, "step": 74800 }, { "epoch": 0.76, "learning_rate": 5.996007302854759e-05, "loss": 1.8204, "step": 74900 }, { "epoch": 0.76, "learning_rate": 5.995973016530276e-05, "loss": 1.7028, "step": 75000 }, { "epoch": 0.77, "learning_rate": 5.9959385837205914e-05, "loss": 1.7415, "step": 75100 }, { "epoch": 0.77, "learning_rate": 5.99590400442739e-05, "loss": 1.7637, "step": 75200 }, { "epoch": 0.77, "learning_rate": 5.9958692786523614e-05, "loss": 1.5989, "step": 75300 }, { "epoch": 0.77, "learning_rate": 5.995834406397203e-05, "loss": 1.7268, "step": 75400 }, { "epoch": 0.77, "learning_rate": 5.9957993876636205e-05, "loss": 1.8434, "step": 75500 }, { "epoch": 0.77, "learning_rate": 5.995764222453326e-05, "loss": 1.6682, "step": 75600 }, { "epoch": 0.77, "learning_rate": 5.995728910768039e-05, "loss": 1.8422, "step": 75700 }, { "epoch": 0.77, "learning_rate": 5.995693452609485e-05, "loss": 1.7228, "step": 75800 }, { "epoch": 0.77, "learning_rate": 5.9956578479793994e-05, "loss": 1.6471, "step": 75900 }, { "epoch": 0.77, "learning_rate": 5.9956220968795216e-05, "loss": 1.7214, "step": 76000 }, { "epoch": 0.78, "learning_rate": 5.9955861993116e-05, "loss": 1.8173, "step": 76100 }, { "epoch": 0.78, "learning_rate": 5.995550155277391e-05, "loss": 1.6598, "step": 76200 }, { "epoch": 0.78, "learning_rate": 5.995513964778656e-05, "loss": 1.7961, "step": 76300 }, { "epoch": 0.78, "learning_rate": 5.995477627817163e-05, "loss": 1.6887, "step": 76400 }, { "epoch": 0.78, "learning_rate": 5.995441144394691e-05, "loss": 1.688, "step": 76500 }, { "epoch": 0.78, "learning_rate": 5.9954045145130224e-05, "loss": 1.5794, "step": 76600 }, { "epoch": 0.78, "learning_rate": 5.995367738173949e-05, "loss": 1.5646, "step": 76700 }, { "epoch": 0.78, "learning_rate": 5.995330815379269e-05, "loss": 1.749, "step": 76800 }, { "epoch": 0.78, "learning_rate": 5.9952937461307866e-05, "loss": 1.6658, "step": 76900 }, { "epoch": 0.78, "learning_rate": 5.995256530430316e-05, "loss": 1.7042, "step": 77000 }, { "epoch": 0.79, "learning_rate": 5.995219168279675e-05, "loss": 1.7833, "step": 77100 }, { "epoch": 0.79, "learning_rate": 5.9951816596806914e-05, "loss": 1.6982, "step": 77200 }, { "epoch": 0.79, "learning_rate": 5.9951440046351994e-05, "loss": 1.8167, "step": 77300 }, { "epoch": 0.79, "learning_rate": 5.995106203145039e-05, "loss": 1.4935, "step": 77400 }, { "epoch": 0.79, "learning_rate": 5.995068255212059e-05, "loss": 1.6009, "step": 77500 }, { "epoch": 0.79, "learning_rate": 5.995030160838116e-05, "loss": 1.6238, "step": 77600 }, { "epoch": 0.79, "learning_rate": 5.9949919200250703e-05, "loss": 1.7839, "step": 77700 }, { "epoch": 0.79, "learning_rate": 5.994953532774793e-05, "loss": 1.7463, "step": 77800 }, { "epoch": 0.79, "learning_rate": 5.994914999089161e-05, "loss": 1.5378, "step": 77900 }, { "epoch": 0.79, "learning_rate": 5.994876318970059e-05, "loss": 1.6648, "step": 78000 }, { "epoch": 0.8, "learning_rate": 5.994837492419377e-05, "loss": 1.7013, "step": 78100 }, { "epoch": 0.8, "learning_rate": 5.994798519439013e-05, "loss": 1.5961, "step": 78200 }, { "epoch": 0.8, "learning_rate": 5.994759400030875e-05, "loss": 1.7069, "step": 78300 }, { "epoch": 0.8, "learning_rate": 5.994720134196874e-05, "loss": 1.6798, "step": 78400 }, { "epoch": 0.8, "learning_rate": 5.994680721938929e-05, "loss": 1.5955, "step": 78500 }, { "epoch": 0.8, "learning_rate": 5.9946411632589686e-05, "loss": 1.5852, "step": 78600 }, { "epoch": 0.8, "learning_rate": 5.9946014581589264e-05, "loss": 1.5861, "step": 78700 }, { "epoch": 0.8, "learning_rate": 5.994561606640744e-05, "loss": 1.6381, "step": 78800 }, { "epoch": 0.8, "learning_rate": 5.994521608706369e-05, "loss": 1.4923, "step": 78900 }, { "epoch": 0.8, "learning_rate": 5.994481464357758e-05, "loss": 1.6126, "step": 79000 }, { "epoch": 0.81, "learning_rate": 5.994441173596873e-05, "loss": 1.5464, "step": 79100 }, { "epoch": 0.81, "learning_rate": 5.994400736425685e-05, "loss": 1.5168, "step": 79200 }, { "epoch": 0.81, "learning_rate": 5.9943605594066815e-05, "loss": 1.5812, "step": 79300 }, { "epoch": 0.81, "learning_rate": 5.9943198308848785e-05, "loss": 1.5695, "step": 79400 }, { "epoch": 0.81, "learning_rate": 5.994278955958705e-05, "loss": 1.5285, "step": 79500 }, { "epoch": 0.81, "learning_rate": 5.9942379346301594e-05, "loss": 1.6406, "step": 79600 }, { "epoch": 0.81, "learning_rate": 5.994196766901248e-05, "loss": 1.6521, "step": 79700 }, { "epoch": 0.81, "learning_rate": 5.994155452773982e-05, "loss": 1.5834, "step": 79800 }, { "epoch": 0.81, "learning_rate": 5.994113992250384e-05, "loss": 1.596, "step": 79900 }, { "epoch": 0.82, "learning_rate": 5.9940723853324786e-05, "loss": 1.5746, "step": 80000 }, { "epoch": 0.82, "learning_rate": 5.9940306320223024e-05, "loss": 1.6426, "step": 80100 }, { "epoch": 0.82, "learning_rate": 5.993988732321894e-05, "loss": 1.4767, "step": 80200 }, { "epoch": 0.82, "learning_rate": 5.993946686233305e-05, "loss": 1.6856, "step": 80300 }, { "epoch": 0.82, "learning_rate": 5.99390449375859e-05, "loss": 1.543, "step": 80400 }, { "epoch": 0.82, "learning_rate": 5.9938625790129935e-05, "loss": 1.6273, "step": 80500 }, { "epoch": 0.82, "learning_rate": 5.9938200952360323e-05, "loss": 1.5779, "step": 80600 }, { "epoch": 0.82, "learning_rate": 5.993777465079134e-05, "loss": 1.6695, "step": 80700 }, { "epoch": 0.82, "learning_rate": 5.993734688544384e-05, "loss": 1.6803, "step": 80800 }, { "epoch": 0.82, "learning_rate": 5.993692195587532e-05, "loss": 1.5935, "step": 80900 }, { "epoch": 0.83, "learning_rate": 5.9936491277670844e-05, "loss": 1.571, "step": 81000 }, { "epoch": 0.83, "learning_rate": 5.9936059135750614e-05, "loss": 1.6264, "step": 81100 }, { "epoch": 0.83, "learning_rate": 5.9935625530135734e-05, "loss": 1.7186, "step": 81200 }, { "epoch": 0.83, "learning_rate": 5.993519046084741e-05, "loss": 1.5536, "step": 81300 }, { "epoch": 0.83, "learning_rate": 5.993475392790693e-05, "loss": 1.498, "step": 81400 }, { "epoch": 0.83, "learning_rate": 5.993431593133561e-05, "loss": 1.7581, "step": 81500 }, { "epoch": 0.83, "learning_rate": 5.993387647115488e-05, "loss": 1.6968, "step": 81600 }, { "epoch": 0.83, "learning_rate": 5.9933435547386226e-05, "loss": 1.5503, "step": 81700 }, { "epoch": 0.83, "learning_rate": 5.993299316005122e-05, "loss": 1.7896, "step": 81800 }, { "epoch": 0.83, "learning_rate": 5.993254930917147e-05, "loss": 1.6738, "step": 81900 }, { "epoch": 0.84, "learning_rate": 5.993210399476868e-05, "loss": 1.6336, "step": 82000 }, { "epoch": 0.84, "learning_rate": 5.993165721686464e-05, "loss": 1.6025, "step": 82100 }, { "epoch": 0.84, "learning_rate": 5.993120897548118e-05, "loss": 1.4988, "step": 82200 }, { "epoch": 0.84, "learning_rate": 5.993075927064022e-05, "loss": 1.6275, "step": 82300 }, { "epoch": 0.84, "learning_rate": 5.9930308102363755e-05, "loss": 1.6154, "step": 82400 }, { "epoch": 0.84, "learning_rate": 5.9929855470673834e-05, "loss": 1.6202, "step": 82500 }, { "epoch": 0.84, "learning_rate": 5.99294013755926e-05, "loss": 1.4874, "step": 82600 }, { "epoch": 0.84, "learning_rate": 5.992894581714224e-05, "loss": 1.4211, "step": 82700 }, { "epoch": 0.84, "learning_rate": 5.992848879534503e-05, "loss": 1.5337, "step": 82800 }, { "epoch": 0.84, "learning_rate": 5.992803031022334e-05, "loss": 1.4893, "step": 82900 }, { "epoch": 0.85, "learning_rate": 5.992757496852707e-05, "loss": 1.6062, "step": 83000 }, { "epoch": 0.85, "learning_rate": 5.9927113571456375e-05, "loss": 1.6367, "step": 83100 }, { "epoch": 0.85, "learning_rate": 5.992665071112843e-05, "loss": 1.5922, "step": 83200 }, { "epoch": 0.85, "learning_rate": 5.992618638756586e-05, "loss": 1.5424, "step": 83300 }, { "epoch": 0.85, "learning_rate": 5.992572060079136e-05, "loss": 1.6272, "step": 83400 }, { "epoch": 0.85, "learning_rate": 5.9925253350827716e-05, "loss": 1.5665, "step": 83500 }, { "epoch": 0.85, "learning_rate": 5.9924784637697755e-05, "loss": 1.5553, "step": 83600 }, { "epoch": 0.85, "learning_rate": 5.992431446142442e-05, "loss": 1.494, "step": 83700 }, { "epoch": 0.85, "learning_rate": 5.992384282203069e-05, "loss": 1.6219, "step": 83800 }, { "epoch": 0.85, "learning_rate": 5.992336971953961e-05, "loss": 1.6507, "step": 83900 }, { "epoch": 0.86, "learning_rate": 5.992289515397433e-05, "loss": 1.4922, "step": 84000 }, { "epoch": 0.86, "learning_rate": 5.992241912535804e-05, "loss": 1.5944, "step": 84100 }, { "epoch": 0.86, "learning_rate": 5.9921941633714034e-05, "loss": 1.48, "step": 84200 }, { "epoch": 0.86, "learning_rate": 5.9921462679065645e-05, "loss": 1.5063, "step": 84300 }, { "epoch": 0.86, "learning_rate": 5.9920982261436294e-05, "loss": 1.5678, "step": 84400 }, { "epoch": 0.86, "learning_rate": 5.9920500380849456e-05, "loss": 1.5608, "step": 84500 }, { "epoch": 0.86, "learning_rate": 5.992001703732872e-05, "loss": 1.4435, "step": 84600 }, { "epoch": 0.86, "learning_rate": 5.99195322308977e-05, "loss": 1.469, "step": 84700 }, { "epoch": 0.86, "learning_rate": 5.99190459615801e-05, "loss": 1.5086, "step": 84800 }, { "epoch": 0.86, "learning_rate": 5.99185582293997e-05, "loss": 1.4966, "step": 84900 }, { "epoch": 0.87, "learning_rate": 5.9918069034380356e-05, "loss": 1.5488, "step": 85000 }, { "epoch": 0.87, "learning_rate": 5.991757837654597e-05, "loss": 1.5319, "step": 85100 }, { "epoch": 0.87, "learning_rate": 5.991708625592054e-05, "loss": 1.5415, "step": 85200 }, { "epoch": 0.87, "learning_rate": 5.9916592672528134e-05, "loss": 1.7018, "step": 85300 }, { "epoch": 0.87, "learning_rate": 5.991609762639287e-05, "loss": 1.5908, "step": 85400 }, { "epoch": 0.87, "learning_rate": 5.9915601117538976e-05, "loss": 1.5505, "step": 85500 }, { "epoch": 0.87, "learning_rate": 5.991510314599071e-05, "loss": 1.4897, "step": 85600 }, { "epoch": 0.87, "learning_rate": 5.9914603711772416e-05, "loss": 1.5582, "step": 85700 }, { "epoch": 0.87, "learning_rate": 5.991410281490853e-05, "loss": 1.5034, "step": 85800 }, { "epoch": 0.88, "learning_rate": 5.991360045542353e-05, "loss": 1.7018, "step": 85900 }, { "epoch": 0.88, "learning_rate": 5.991309663334198e-05, "loss": 1.5036, "step": 86000 }, { "epoch": 0.88, "learning_rate": 5.991259134868852e-05, "loss": 1.8881, "step": 86100 }, { "epoch": 0.88, "learning_rate": 5.9912084601487855e-05, "loss": 2.5213, "step": 86200 }, { "epoch": 0.88, "learning_rate": 5.991157639176475e-05, "loss": 2.3425, "step": 86300 }, { "epoch": 0.88, "learning_rate": 5.991107182350556e-05, "loss": 1.6305, "step": 86400 }, { "epoch": 0.88, "learning_rate": 5.991056070343682e-05, "loss": 1.7046, "step": 86500 }, { "epoch": 0.88, "learning_rate": 5.991004812092015e-05, "loss": 1.7513, "step": 86600 }, { "epoch": 0.88, "learning_rate": 5.990953407598063e-05, "loss": 1.7299, "step": 86700 }, { "epoch": 0.88, "learning_rate": 5.9909018568643387e-05, "loss": 1.6553, "step": 86800 }, { "epoch": 0.89, "learning_rate": 5.9908501598933616e-05, "loss": 1.7274, "step": 86900 }, { "epoch": 0.89, "learning_rate": 5.99079831668766e-05, "loss": 1.6031, "step": 87000 }, { "epoch": 0.89, "learning_rate": 5.99074632724977e-05, "loss": 1.5621, "step": 87100 }, { "epoch": 0.89, "learning_rate": 5.990694191582231e-05, "loss": 1.6214, "step": 87200 }, { "epoch": 0.89, "learning_rate": 5.990641909687595e-05, "loss": 1.6506, "step": 87300 }, { "epoch": 0.89, "learning_rate": 5.990589481568415e-05, "loss": 1.6717, "step": 87400 }, { "epoch": 0.89, "learning_rate": 5.9905369072272584e-05, "loss": 1.6402, "step": 87500 }, { "epoch": 0.89, "learning_rate": 5.990484186666692e-05, "loss": 1.7092, "step": 87600 }, { "epoch": 0.89, "learning_rate": 5.990431319889297e-05, "loss": 1.6545, "step": 87700 }, { "epoch": 0.89, "learning_rate": 5.990378306897655e-05, "loss": 1.6781, "step": 87800 }, { "epoch": 0.9, "learning_rate": 5.990325680010131e-05, "loss": 1.7792, "step": 87900 }, { "epoch": 0.9, "learning_rate": 5.990272376059858e-05, "loss": 1.6132, "step": 88000 }, { "epoch": 0.9, "learning_rate": 5.9902189259031126e-05, "loss": 1.4612, "step": 88100 }, { "epoch": 0.9, "learning_rate": 5.9901653295425055e-05, "loss": 1.5725, "step": 88200 }, { "epoch": 0.9, "learning_rate": 5.990111586980658e-05, "loss": 1.5206, "step": 88300 }, { "epoch": 0.9, "learning_rate": 5.990057698220199e-05, "loss": 1.6188, "step": 88400 }, { "epoch": 0.9, "learning_rate": 5.990003663263761e-05, "loss": 1.3889, "step": 88500 }, { "epoch": 0.9, "learning_rate": 5.989949482113988e-05, "loss": 1.8094, "step": 88600 }, { "epoch": 0.9, "learning_rate": 5.9898951547735275e-05, "loss": 1.6204, "step": 88700 }, { "epoch": 0.9, "learning_rate": 5.989840681245038e-05, "loss": 1.7374, "step": 88800 }, { "epoch": 0.91, "learning_rate": 5.989786061531181e-05, "loss": 1.6129, "step": 88900 }, { "epoch": 0.91, "learning_rate": 5.989731295634627e-05, "loss": 1.4584, "step": 89000 }, { "epoch": 0.91, "learning_rate": 5.9896763835580544e-05, "loss": 1.5022, "step": 89100 }, { "epoch": 0.91, "learning_rate": 5.989621325304149e-05, "loss": 1.5479, "step": 89200 }, { "epoch": 0.91, "learning_rate": 5.989566120875602e-05, "loss": 1.5201, "step": 89300 }, { "epoch": 0.91, "learning_rate": 5.9895107702751125e-05, "loss": 1.5713, "step": 89400 }, { "epoch": 0.91, "learning_rate": 5.989455273505386e-05, "loss": 1.6368, "step": 89500 }, { "epoch": 0.91, "learning_rate": 5.9893996305691374e-05, "loss": 1.4649, "step": 89600 }, { "epoch": 0.91, "learning_rate": 5.989343841469087e-05, "loss": 1.4484, "step": 89700 }, { "epoch": 0.91, "learning_rate": 5.989287906207961e-05, "loss": 1.51, "step": 89800 }, { "epoch": 0.92, "learning_rate": 5.989231824788496e-05, "loss": 1.519, "step": 89900 }, { "epoch": 0.92, "learning_rate": 5.989175597213434e-05, "loss": 1.4558, "step": 90000 }, { "epoch": 0.92, "learning_rate": 5.989119223485524e-05, "loss": 1.3998, "step": 90100 }, { "epoch": 0.92, "learning_rate": 5.989062703607521e-05, "loss": 1.4141, "step": 90200 }, { "epoch": 0.92, "learning_rate": 5.98900603758219e-05, "loss": 1.6032, "step": 90300 }, { "epoch": 0.92, "learning_rate": 5.988949225412302e-05, "loss": 1.433, "step": 90400 }, { "epoch": 0.92, "learning_rate": 5.988892267100633e-05, "loss": 1.5292, "step": 90500 }, { "epoch": 0.92, "learning_rate": 5.988835162649971e-05, "loss": 1.4914, "step": 90600 }, { "epoch": 0.92, "learning_rate": 5.988777912063103e-05, "loss": 1.5033, "step": 90700 }, { "epoch": 0.93, "learning_rate": 5.9887205153428335e-05, "loss": 1.4194, "step": 90800 }, { "epoch": 0.93, "learning_rate": 5.988662972491965e-05, "loss": 1.516, "step": 90900 }, { "epoch": 0.93, "learning_rate": 5.9886052835133135e-05, "loss": 1.5305, "step": 91000 }, { "epoch": 0.93, "learning_rate": 5.988547448409698e-05, "loss": 1.5844, "step": 91100 }, { "epoch": 0.93, "learning_rate": 5.988489467183947e-05, "loss": 1.4488, "step": 91200 }, { "epoch": 0.93, "learning_rate": 5.9884313398388964e-05, "loss": 1.5022, "step": 91300 }, { "epoch": 0.93, "learning_rate": 5.9883730663773865e-05, "loss": 1.4467, "step": 91400 }, { "epoch": 0.93, "learning_rate": 5.9883146468022676e-05, "loss": 1.4996, "step": 91500 }, { "epoch": 0.93, "learning_rate": 5.9882560811163954e-05, "loss": 1.5119, "step": 91600 }, { "epoch": 0.93, "learning_rate": 5.9881973693226346e-05, "loss": 1.5147, "step": 91700 }, { "epoch": 0.94, "learning_rate": 5.9881385114238545e-05, "loss": 1.4885, "step": 91800 }, { "epoch": 0.94, "learning_rate": 5.9880795074229337e-05, "loss": 1.4255, "step": 91900 }, { "epoch": 0.94, "learning_rate": 5.988020357322757e-05, "loss": 1.6254, "step": 92000 }, { "epoch": 0.94, "learning_rate": 5.987961061126216e-05, "loss": 1.4327, "step": 92100 }, { "epoch": 0.94, "learning_rate": 5.98790161883621e-05, "loss": 1.4429, "step": 92200 }, { "epoch": 0.94, "learning_rate": 5.987842030455647e-05, "loss": 1.5518, "step": 92300 }, { "epoch": 0.94, "learning_rate": 5.9877822959874376e-05, "loss": 1.4686, "step": 92400 }, { "epoch": 0.94, "learning_rate": 5.987722415434505e-05, "loss": 1.4723, "step": 92500 }, { "epoch": 0.94, "learning_rate": 5.9876623887997756e-05, "loss": 1.5262, "step": 92600 }, { "epoch": 0.94, "learning_rate": 5.987602216086186e-05, "loss": 1.4851, "step": 92700 }, { "epoch": 0.95, "learning_rate": 5.9875418972966765e-05, "loss": 1.5974, "step": 92800 }, { "epoch": 0.95, "learning_rate": 5.987481432434196e-05, "loss": 1.5091, "step": 92900 }, { "epoch": 0.95, "learning_rate": 5.987420821501703e-05, "loss": 1.596, "step": 93000 }, { "epoch": 0.95, "learning_rate": 5.9873600645021595e-05, "loss": 1.4018, "step": 93100 }, { "epoch": 0.95, "learning_rate": 5.987299161438536e-05, "loss": 1.4484, "step": 93200 }, { "epoch": 0.95, "learning_rate": 5.987238112313812e-05, "loss": 1.4128, "step": 93300 }, { "epoch": 0.95, "learning_rate": 5.98717691713097e-05, "loss": 1.443, "step": 93400 }, { "epoch": 0.95, "learning_rate": 5.9871155758930036e-05, "loss": 1.6698, "step": 93500 }, { "epoch": 0.95, "learning_rate": 5.9870540886029116e-05, "loss": 1.4533, "step": 93600 }, { "epoch": 0.95, "learning_rate": 5.9869924552637004e-05, "loss": 1.4722, "step": 93700 }, { "epoch": 0.96, "learning_rate": 5.9869306758783847e-05, "loss": 1.4219, "step": 93800 }, { "epoch": 0.96, "learning_rate": 5.9868687504499826e-05, "loss": 1.5066, "step": 93900 }, { "epoch": 0.96, "learning_rate": 5.986806678981524e-05, "loss": 1.4336, "step": 94000 }, { "epoch": 0.96, "learning_rate": 5.986744461476043e-05, "loss": 1.4057, "step": 94100 }, { "epoch": 0.96, "learning_rate": 5.9866820979365815e-05, "loss": 1.5177, "step": 94200 }, { "epoch": 0.96, "learning_rate": 5.98661958836619e-05, "loss": 1.3542, "step": 94300 }, { "epoch": 0.96, "learning_rate": 5.986556932767922e-05, "loss": 1.5767, "step": 94400 }, { "epoch": 0.96, "learning_rate": 5.986494759883888e-05, "loss": 1.4592, "step": 94500 }, { "epoch": 0.96, "learning_rate": 5.986431813699271e-05, "loss": 1.3926, "step": 94600 }, { "epoch": 0.96, "learning_rate": 5.986368721495961e-05, "loss": 1.4459, "step": 94700 }, { "epoch": 0.97, "learning_rate": 5.986305483277043e-05, "loss": 1.3428, "step": 94800 }, { "epoch": 0.97, "learning_rate": 5.9862420990456075e-05, "loss": 1.4095, "step": 94900 }, { "epoch": 0.97, "learning_rate": 5.986178568804754e-05, "loss": 1.4202, "step": 95000 }, { "epoch": 0.97, "learning_rate": 5.986114892557589e-05, "loss": 1.4598, "step": 95100 }, { "epoch": 0.97, "learning_rate": 5.986051070307226e-05, "loss": 1.5845, "step": 95200 }, { "epoch": 0.97, "learning_rate": 5.9859871020567864e-05, "loss": 1.4093, "step": 95300 }, { "epoch": 0.97, "learning_rate": 5.985922987809396e-05, "loss": 1.4548, "step": 95400 }, { "epoch": 0.97, "learning_rate": 5.985858727568192e-05, "loss": 1.354, "step": 95500 }, { "epoch": 0.97, "learning_rate": 5.985794321336313e-05, "loss": 1.4892, "step": 95600 }, { "epoch": 0.98, "learning_rate": 5.9857297691169106e-05, "loss": 1.5283, "step": 95700 }, { "epoch": 0.98, "learning_rate": 5.985665070913141e-05, "loss": 1.3423, "step": 95800 }, { "epoch": 0.98, "learning_rate": 5.985600226728166e-05, "loss": 1.4428, "step": 95900 }, { "epoch": 0.98, "learning_rate": 5.985535236565157e-05, "loss": 1.3588, "step": 96000 }, { "epoch": 0.98, "learning_rate": 5.9854701004272926e-05, "loss": 1.4912, "step": 96100 }, { "epoch": 0.98, "learning_rate": 5.985404818317756e-05, "loss": 1.5576, "step": 96200 }, { "epoch": 0.98, "learning_rate": 5.985340045243054e-05, "loss": 1.5684, "step": 96300 }, { "epoch": 0.98, "learning_rate": 5.9852744726593947e-05, "loss": 1.5131, "step": 96400 }, { "epoch": 0.98, "learning_rate": 5.9852087541136275e-05, "loss": 1.5162, "step": 96500 }, { "epoch": 0.98, "learning_rate": 5.9851428896089685e-05, "loss": 1.4749, "step": 96600 }, { "epoch": 0.99, "learning_rate": 5.9850768791486355e-05, "loss": 1.4942, "step": 96700 }, { "epoch": 0.99, "learning_rate": 5.985010722735858e-05, "loss": 1.4914, "step": 96800 }, { "epoch": 0.99, "learning_rate": 5.9849444203738695e-05, "loss": 1.4131, "step": 96900 }, { "epoch": 0.99, "learning_rate": 5.9848779720659126e-05, "loss": 1.3661, "step": 97000 }, { "epoch": 0.99, "learning_rate": 5.9848113778152346e-05, "loss": 1.4944, "step": 97100 }, { "epoch": 0.99, "learning_rate": 5.9847446376250936e-05, "loss": 1.4675, "step": 97200 }, { "epoch": 0.99, "learning_rate": 5.9846777514987516e-05, "loss": 1.3728, "step": 97300 }, { "epoch": 0.99, "learning_rate": 5.984611390482428e-05, "loss": 1.5257, "step": 97400 }, { "epoch": 0.99, "learning_rate": 5.984544213952784e-05, "loss": 1.4841, "step": 97500 }, { "epoch": 0.99, "learning_rate": 5.984476891496737e-05, "loss": 1.5172, "step": 97600 }, { "epoch": 1.0, "learning_rate": 5.9844094231175805e-05, "loss": 1.4262, "step": 97700 }, { "epoch": 1.0, "learning_rate": 5.984341808818614e-05, "loss": 1.4293, "step": 97800 }, { "epoch": 1.0, "learning_rate": 5.984274048603143e-05, "loss": 1.6798, "step": 97900 }, { "epoch": 1.0, "learning_rate": 5.984206142474479e-05, "loss": 1.5527, "step": 98000 }, { "epoch": 1.0, "learning_rate": 5.984138090435944e-05, "loss": 1.4024, "step": 98100 }, { "epoch": 1.0, "learning_rate": 5.984070575192542e-05, "loss": 1.5522, "step": 98200 }, { "epoch": 1.0, "learning_rate": 5.984002232803269e-05, "loss": 1.6232, "step": 98300 }, { "epoch": 1.0, "learning_rate": 5.9839337445140936e-05, "loss": 1.5484, "step": 98400 }, { "epoch": 1.0, "learning_rate": 5.983865110328365e-05, "loss": 1.4182, "step": 98500 }, { "epoch": 1.0, "learning_rate": 5.983796330249441e-05, "loss": 1.4759, "step": 98600 }, { "epoch": 1.01, "learning_rate": 5.983727404280682e-05, "loss": 1.4507, "step": 98700 }, { "epoch": 1.01, "learning_rate": 5.983658332425458e-05, "loss": 1.3354, "step": 98800 }, { "epoch": 1.01, "learning_rate": 5.9835891146871484e-05, "loss": 1.3849, "step": 98900 }, { "epoch": 1.01, "learning_rate": 5.9835197510691354e-05, "loss": 1.5347, "step": 99000 }, { "epoch": 1.01, "learning_rate": 5.983450241574812e-05, "loss": 1.3758, "step": 99100 }, { "epoch": 1.01, "learning_rate": 5.983380586207575e-05, "loss": 1.3151, "step": 99200 }, { "epoch": 1.01, "learning_rate": 5.9833107849708325e-05, "loss": 1.4597, "step": 99300 }, { "epoch": 1.01, "learning_rate": 5.9832408378679945e-05, "loss": 1.3496, "step": 99400 }, { "epoch": 1.01, "learning_rate": 5.983170744902484e-05, "loss": 1.5162, "step": 99500 }, { "epoch": 1.01, "learning_rate": 5.983100506077726e-05, "loss": 1.2813, "step": 99600 }, { "epoch": 1.02, "learning_rate": 5.983030121397156e-05, "loss": 1.483, "step": 99700 }, { "epoch": 1.02, "learning_rate": 5.982959590864214e-05, "loss": 1.5009, "step": 99800 }, { "epoch": 1.02, "learning_rate": 5.982888914482349e-05, "loss": 1.3123, "step": 99900 }, { "epoch": 1.02, "learning_rate": 5.9828180922550186e-05, "loss": 1.4835, "step": 100000 }, { "epoch": 1.02, "learning_rate": 5.982747124185684e-05, "loss": 1.349, "step": 100100 }, { "epoch": 1.02, "learning_rate": 5.982676010277814e-05, "loss": 1.3898, "step": 100200 }, { "epoch": 1.02, "learning_rate": 5.982604750534888e-05, "loss": 1.3675, "step": 100300 }, { "epoch": 1.02, "learning_rate": 5.982533344960388e-05, "loss": 1.3503, "step": 100400 }, { "epoch": 1.02, "learning_rate": 5.982461793557806e-05, "loss": 1.2747, "step": 100500 }, { "epoch": 1.02, "learning_rate": 5.9823900963306415e-05, "loss": 1.3983, "step": 100600 }, { "epoch": 1.03, "learning_rate": 5.9823182532823986e-05, "loss": 1.3127, "step": 100700 }, { "epoch": 1.03, "learning_rate": 5.982246264416591e-05, "loss": 1.4465, "step": 100800 }, { "epoch": 1.03, "learning_rate": 5.982174129736738e-05, "loss": 1.425, "step": 100900 }, { "epoch": 1.03, "learning_rate": 5.9821018492463666e-05, "loss": 1.4288, "step": 101000 }, { "epoch": 1.03, "learning_rate": 5.982029422949012e-05, "loss": 1.3546, "step": 101100 }, { "epoch": 1.03, "learning_rate": 5.981956850848213e-05, "loss": 1.3968, "step": 101200 }, { "epoch": 1.03, "learning_rate": 5.98188413294752e-05, "loss": 1.3547, "step": 101300 }, { "epoch": 1.03, "learning_rate": 5.981811269250488e-05, "loss": 1.4353, "step": 101400 }, { "epoch": 1.03, "learning_rate": 5.98173825976068e-05, "loss": 1.4136, "step": 101500 }, { "epoch": 1.04, "learning_rate": 5.981665104481665e-05, "loss": 1.4651, "step": 101600 }, { "epoch": 1.04, "learning_rate": 5.981591803417019e-05, "loss": 1.4144, "step": 101700 }, { "epoch": 1.04, "learning_rate": 5.981518356570328e-05, "loss": 1.2181, "step": 101800 }, { "epoch": 1.04, "learning_rate": 5.981444763945181e-05, "loss": 1.3956, "step": 101900 }, { "epoch": 1.04, "learning_rate": 5.981371025545179e-05, "loss": 1.3597, "step": 102000 }, { "epoch": 1.04, "learning_rate": 5.9812971413739246e-05, "loss": 1.3117, "step": 102100 }, { "epoch": 1.04, "learning_rate": 5.9812231114350316e-05, "loss": 1.3608, "step": 102200 }, { "epoch": 1.04, "learning_rate": 5.98114893573212e-05, "loss": 1.2655, "step": 102300 }, { "epoch": 1.04, "learning_rate": 5.9810746142688154e-05, "loss": 1.2893, "step": 102400 }, { "epoch": 1.04, "learning_rate": 5.981000147048753e-05, "loss": 1.3463, "step": 102500 }, { "epoch": 1.05, "learning_rate": 5.9809255340755724e-05, "loss": 1.5518, "step": 102600 }, { "epoch": 1.05, "learning_rate": 5.9808507753529225e-05, "loss": 1.3162, "step": 102700 }, { "epoch": 1.05, "learning_rate": 5.980775870884459e-05, "loss": 1.2651, "step": 102800 }, { "epoch": 1.05, "learning_rate": 5.980700820673843e-05, "loss": 1.4226, "step": 102900 }, { "epoch": 1.05, "learning_rate": 5.980625624724745e-05, "loss": 1.3878, "step": 103000 }, { "epoch": 1.05, "learning_rate": 5.980550283040842e-05, "loss": 1.3676, "step": 103100 }, { "epoch": 1.05, "learning_rate": 5.9804747956258166e-05, "loss": 1.3179, "step": 103200 }, { "epoch": 1.05, "learning_rate": 5.9803991624833594e-05, "loss": 1.5319, "step": 103300 }, { "epoch": 1.05, "learning_rate": 5.980323383617171e-05, "loss": 1.3493, "step": 103400 }, { "epoch": 1.05, "learning_rate": 5.9802474590309544e-05, "loss": 1.3645, "step": 103500 }, { "epoch": 1.06, "learning_rate": 5.980171388728421e-05, "loss": 1.4624, "step": 103600 }, { "epoch": 1.06, "learning_rate": 5.9800951727132924e-05, "loss": 1.4435, "step": 103700 }, { "epoch": 1.06, "learning_rate": 5.980018810989294e-05, "loss": 1.426, "step": 103800 }, { "epoch": 1.06, "learning_rate": 5.979942303560159e-05, "loss": 1.3458, "step": 103900 }, { "epoch": 1.06, "learning_rate": 5.97986565042963e-05, "loss": 1.4218, "step": 104000 }, { "epoch": 1.06, "learning_rate": 5.9797888516014524e-05, "loss": 1.2861, "step": 104100 }, { "epoch": 1.06, "learning_rate": 5.979711907079383e-05, "loss": 1.2324, "step": 104200 }, { "epoch": 1.06, "learning_rate": 5.979634816867183e-05, "loss": 1.3904, "step": 104300 }, { "epoch": 1.06, "learning_rate": 5.979557580968621e-05, "loss": 1.3849, "step": 104400 }, { "epoch": 1.06, "learning_rate": 5.9794801993874764e-05, "loss": 1.3526, "step": 104500 }, { "epoch": 1.07, "learning_rate": 5.9794026721275295e-05, "loss": 1.2336, "step": 104600 }, { "epoch": 1.07, "learning_rate": 5.979324999192572e-05, "loss": 1.3354, "step": 104700 }, { "epoch": 1.07, "learning_rate": 5.979247180586403e-05, "loss": 1.2959, "step": 104800 }, { "epoch": 1.07, "learning_rate": 5.979169216312825e-05, "loss": 1.1845, "step": 104900 }, { "epoch": 1.07, "learning_rate": 5.9790911063756516e-05, "loss": 1.3007, "step": 105000 }, { "epoch": 1.07, "learning_rate": 5.979012850778701e-05, "loss": 1.2666, "step": 105100 }, { "epoch": 1.07, "learning_rate": 5.978934449525799e-05, "loss": 1.3871, "step": 105200 }, { "epoch": 1.07, "learning_rate": 5.978855902620781e-05, "loss": 1.4556, "step": 105300 }, { "epoch": 1.07, "learning_rate": 5.978777210067486e-05, "loss": 1.2591, "step": 105400 }, { "epoch": 1.07, "learning_rate": 5.978698371869762e-05, "loss": 1.39, "step": 105500 }, { "epoch": 1.08, "learning_rate": 5.978619388031463e-05, "loss": 1.4079, "step": 105600 }, { "epoch": 1.08, "learning_rate": 5.978540258556452e-05, "loss": 1.3003, "step": 105700 }, { "epoch": 1.08, "learning_rate": 5.9784609834485965e-05, "loss": 1.3884, "step": 105800 }, { "epoch": 1.08, "learning_rate": 5.9783815627117745e-05, "loss": 1.408, "step": 105900 }, { "epoch": 1.08, "learning_rate": 5.9783019963498675e-05, "loss": 1.3368, "step": 106000 }, { "epoch": 1.08, "learning_rate": 5.9782222843667663e-05, "loss": 1.4822, "step": 106100 }, { "epoch": 1.08, "learning_rate": 5.978142426766369e-05, "loss": 1.2155, "step": 106200 }, { "epoch": 1.08, "learning_rate": 5.97806322430549e-05, "loss": 1.2892, "step": 106300 }, { "epoch": 1.08, "learning_rate": 5.977983076938296e-05, "loss": 1.3215, "step": 106400 }, { "epoch": 1.09, "learning_rate": 5.9779027839655e-05, "loss": 1.3413, "step": 106500 }, { "epoch": 1.09, "learning_rate": 5.977822345391029e-05, "loss": 1.3134, "step": 106600 }, { "epoch": 1.09, "learning_rate": 5.977741761218818e-05, "loss": 1.3632, "step": 106700 }, { "epoch": 1.09, "learning_rate": 5.977661031452803e-05, "loss": 1.2829, "step": 106800 }, { "epoch": 1.09, "learning_rate": 5.977580156096934e-05, "loss": 1.3444, "step": 106900 }, { "epoch": 1.09, "learning_rate": 5.977499135155165e-05, "loss": 1.3269, "step": 107000 }, { "epoch": 1.09, "learning_rate": 5.9774179686314575e-05, "loss": 1.363, "step": 107100 }, { "epoch": 1.09, "learning_rate": 5.977336656529779e-05, "loss": 1.3312, "step": 107200 }, { "epoch": 1.09, "learning_rate": 5.977255198854105e-05, "loss": 1.4233, "step": 107300 }, { "epoch": 1.09, "learning_rate": 5.97717359560842e-05, "loss": 1.2688, "step": 107400 }, { "epoch": 1.1, "learning_rate": 5.977092665005369e-05, "loss": 1.3501, "step": 107500 }, { "epoch": 1.1, "learning_rate": 5.9770107720872375e-05, "loss": 1.2075, "step": 107600 }, { "epoch": 1.1, "learning_rate": 5.976928733611045e-05, "loss": 1.3243, "step": 107700 }, { "epoch": 1.1, "learning_rate": 5.976846549580803e-05, "loss": 1.3873, "step": 107800 }, { "epoch": 1.1, "learning_rate": 5.97676422000053e-05, "loss": 1.3019, "step": 107900 }, { "epoch": 1.1, "learning_rate": 5.9766817448742495e-05, "loss": 1.2509, "step": 108000 }, { "epoch": 1.1, "learning_rate": 5.9765991242059965e-05, "loss": 1.3011, "step": 108100 }, { "epoch": 1.1, "learning_rate": 5.97651635799981e-05, "loss": 1.3448, "step": 108200 }, { "epoch": 1.1, "learning_rate": 5.976433446259737e-05, "loss": 1.2108, "step": 108300 }, { "epoch": 1.1, "learning_rate": 5.9763503889898296e-05, "loss": 1.2111, "step": 108400 }, { "epoch": 1.11, "learning_rate": 5.976267186194151e-05, "loss": 1.1812, "step": 108500 }, { "epoch": 1.11, "learning_rate": 5.976183837876768e-05, "loss": 1.2569, "step": 108600 }, { "epoch": 1.11, "learning_rate": 5.976100344041757e-05, "loss": 1.3887, "step": 108700 }, { "epoch": 1.11, "learning_rate": 5.976016704693198e-05, "loss": 1.254, "step": 108800 }, { "epoch": 1.11, "learning_rate": 5.975932919835184e-05, "loss": 1.3807, "step": 108900 }, { "epoch": 1.11, "learning_rate": 5.975848989471809e-05, "loss": 1.2827, "step": 109000 }, { "epoch": 1.11, "learning_rate": 5.975764913607177e-05, "loss": 1.3466, "step": 109100 }, { "epoch": 1.11, "learning_rate": 5.975680692245399e-05, "loss": 1.2963, "step": 109200 }, { "epoch": 1.11, "learning_rate": 5.975596325390593e-05, "loss": 1.2993, "step": 109300 }, { "epoch": 1.11, "learning_rate": 5.9755118130468846e-05, "loss": 1.3292, "step": 109400 }, { "epoch": 1.12, "learning_rate": 5.9754271552184064e-05, "loss": 1.2301, "step": 109500 }, { "epoch": 1.12, "learning_rate": 5.975342351909295e-05, "loss": 1.3166, "step": 109600 }, { "epoch": 1.12, "learning_rate": 5.975257403123699e-05, "loss": 1.376, "step": 109700 }, { "epoch": 1.12, "learning_rate": 5.975172308865772e-05, "loss": 1.3041, "step": 109800 }, { "epoch": 1.12, "learning_rate": 5.975087069139673e-05, "loss": 1.2162, "step": 109900 }, { "epoch": 1.12, "learning_rate": 5.975001683949572e-05, "loss": 1.3114, "step": 110000 }, { "epoch": 1.12, "learning_rate": 5.974916153299642e-05, "loss": 1.3083, "step": 110100 }, { "epoch": 1.12, "learning_rate": 5.974830477194065e-05, "loss": 1.3603, "step": 110200 }, { "epoch": 1.12, "learning_rate": 5.974744655637031e-05, "loss": 1.2566, "step": 110300 }, { "epoch": 1.12, "learning_rate": 5.9746595490227285e-05, "loss": 1.3417, "step": 110400 }, { "epoch": 1.13, "learning_rate": 5.974573438029785e-05, "loss": 1.3569, "step": 110500 }, { "epoch": 1.13, "learning_rate": 5.974487181597951e-05, "loss": 1.3611, "step": 110600 }, { "epoch": 1.13, "learning_rate": 5.974400779731445e-05, "loss": 1.4259, "step": 110700 }, { "epoch": 1.13, "learning_rate": 5.9743142324344916e-05, "loss": 1.3045, "step": 110800 }, { "epoch": 1.13, "learning_rate": 5.974227539711322e-05, "loss": 1.4275, "step": 110900 }, { "epoch": 1.13, "learning_rate": 5.9741407015661744e-05, "loss": 1.4117, "step": 111000 }, { "epoch": 1.13, "learning_rate": 5.974053718003295e-05, "loss": 1.1097, "step": 111100 }, { "epoch": 1.13, "learning_rate": 5.9739665890269376e-05, "loss": 1.2404, "step": 111200 }, { "epoch": 1.13, "learning_rate": 5.9738793146413615e-05, "loss": 1.4136, "step": 111300 }, { "epoch": 1.13, "learning_rate": 5.97379276976848e-05, "loss": 1.3324, "step": 111400 }, { "epoch": 1.14, "learning_rate": 5.973705206031262e-05, "loss": 1.2511, "step": 111500 }, { "epoch": 1.14, "learning_rate": 5.973617496897605e-05, "loss": 1.4575, "step": 111600 }, { "epoch": 1.14, "learning_rate": 5.973529642371797e-05, "loss": 1.2321, "step": 111700 }, { "epoch": 1.14, "learning_rate": 5.9734416424581356e-05, "loss": 1.2767, "step": 111800 }, { "epoch": 1.14, "learning_rate": 5.973353497160922e-05, "loss": 1.3828, "step": 111900 }, { "epoch": 1.14, "learning_rate": 5.973265206484467e-05, "loss": 1.314, "step": 112000 }, { "epoch": 1.14, "learning_rate": 5.973176770433087e-05, "loss": 1.4695, "step": 112100 }, { "epoch": 1.14, "learning_rate": 5.973088189011105e-05, "loss": 1.4346, "step": 112200 }, { "epoch": 1.14, "learning_rate": 5.972999462222854e-05, "loss": 1.4046, "step": 112300 }, { "epoch": 1.15, "learning_rate": 5.972910590072671e-05, "loss": 1.3947, "step": 112400 }, { "epoch": 1.15, "learning_rate": 5.9728215725649016e-05, "loss": 1.1863, "step": 112500 }, { "epoch": 1.15, "learning_rate": 5.972732409703898e-05, "loss": 1.1956, "step": 112600 }, { "epoch": 1.15, "learning_rate": 5.97264310149402e-05, "loss": 1.3824, "step": 112700 }, { "epoch": 1.15, "learning_rate": 5.972553647939633e-05, "loss": 1.48, "step": 112800 }, { "epoch": 1.15, "learning_rate": 5.9724640490451135e-05, "loss": 1.4075, "step": 112900 }, { "epoch": 1.15, "learning_rate": 5.9723743048148405e-05, "loss": 1.3613, "step": 113000 }, { "epoch": 1.15, "learning_rate": 5.9722844152532017e-05, "loss": 1.3371, "step": 113100 }, { "epoch": 1.15, "learning_rate": 5.972194380364593e-05, "loss": 1.3093, "step": 113200 }, { "epoch": 1.15, "learning_rate": 5.9721042001534154e-05, "loss": 1.3449, "step": 113300 }, { "epoch": 1.16, "learning_rate": 5.97201387462408e-05, "loss": 1.5715, "step": 113400 }, { "epoch": 1.16, "learning_rate": 5.9719234037810015e-05, "loss": 1.4382, "step": 113500 }, { "epoch": 1.16, "learning_rate": 5.9718327876286044e-05, "loss": 1.4426, "step": 113600 }, { "epoch": 1.16, "learning_rate": 5.971742026171319e-05, "loss": 1.3952, "step": 113700 }, { "epoch": 1.16, "learning_rate": 5.9716511194135825e-05, "loss": 1.5162, "step": 113800 }, { "epoch": 1.16, "learning_rate": 5.9715600673598406e-05, "loss": 1.6213, "step": 113900 }, { "epoch": 1.16, "learning_rate": 5.9714688700145454e-05, "loss": 1.2823, "step": 114000 }, { "epoch": 1.16, "learning_rate": 5.971377527382154e-05, "loss": 1.3866, "step": 114100 }, { "epoch": 1.16, "learning_rate": 5.971286039467134e-05, "loss": 1.4322, "step": 114200 }, { "epoch": 1.16, "learning_rate": 5.9711944062739595e-05, "loss": 1.3167, "step": 114300 }, { "epoch": 1.17, "learning_rate": 5.971102627807109e-05, "loss": 1.4742, "step": 114400 }, { "epoch": 1.17, "learning_rate": 5.97101070407107e-05, "loss": 1.4664, "step": 114500 }, { "epoch": 1.17, "learning_rate": 5.970918635070338e-05, "loss": 1.5229, "step": 114600 }, { "epoch": 1.17, "learning_rate": 5.970826420809414e-05, "loss": 1.4537, "step": 114700 }, { "epoch": 1.17, "learning_rate": 5.970734061292808e-05, "loss": 1.526, "step": 114800 }, { "epoch": 1.17, "learning_rate": 5.9706415565250337e-05, "loss": 1.4143, "step": 114900 }, { "epoch": 1.17, "learning_rate": 5.970548906510616e-05, "loss": 1.4729, "step": 115000 }, { "epoch": 1.17, "learning_rate": 5.970456111254084e-05, "loss": 1.4249, "step": 115100 }, { "epoch": 1.17, "learning_rate": 5.9703631707599744e-05, "loss": 1.4654, "step": 115200 }, { "epoch": 1.17, "learning_rate": 5.9702700850328326e-05, "loss": 1.3872, "step": 115300 }, { "epoch": 1.18, "learning_rate": 5.970176854077208e-05, "loss": 1.4678, "step": 115400 }, { "epoch": 1.18, "learning_rate": 5.970083477897662e-05, "loss": 1.3417, "step": 115500 }, { "epoch": 1.18, "learning_rate": 5.9699899564987576e-05, "loss": 1.6554, "step": 115600 }, { "epoch": 1.18, "learning_rate": 5.9698962898850684e-05, "loss": 1.4561, "step": 115700 }, { "epoch": 1.18, "learning_rate": 5.9698024780611736e-05, "loss": 1.5633, "step": 115800 }, { "epoch": 1.18, "learning_rate": 5.9697085210316614e-05, "loss": 1.5998, "step": 115900 }, { "epoch": 1.18, "learning_rate": 5.969614418801124e-05, "loss": 1.3655, "step": 116000 }, { "epoch": 1.18, "learning_rate": 5.9695201713741634e-05, "loss": 1.446, "step": 116100 }, { "epoch": 1.18, "learning_rate": 5.969425778755388e-05, "loss": 1.4751, "step": 116200 }, { "epoch": 1.18, "learning_rate": 5.969331240949412e-05, "loss": 1.2872, "step": 116300 }, { "epoch": 1.19, "learning_rate": 5.969236557960859e-05, "loss": 1.3157, "step": 116400 }, { "epoch": 1.19, "learning_rate": 5.969141729794358e-05, "loss": 1.3384, "step": 116500 }, { "epoch": 1.19, "learning_rate": 5.9690467564545444e-05, "loss": 1.504, "step": 116600 }, { "epoch": 1.19, "learning_rate": 5.968952589849718e-05, "loss": 1.4771, "step": 116700 }, { "epoch": 1.19, "learning_rate": 5.9688573276288354e-05, "loss": 1.3557, "step": 116800 }, { "epoch": 1.19, "learning_rate": 5.968761920248549e-05, "loss": 1.2829, "step": 116900 }, { "epoch": 1.19, "learning_rate": 5.96866636771352e-05, "loss": 1.3307, "step": 117000 }, { "epoch": 1.19, "learning_rate": 5.968570670028422e-05, "loss": 1.395, "step": 117100 }, { "epoch": 1.19, "learning_rate": 5.968474827197934e-05, "loss": 1.3591, "step": 117200 }, { "epoch": 1.2, "learning_rate": 5.968378839226741e-05, "loss": 1.3328, "step": 117300 }, { "epoch": 1.2, "learning_rate": 5.968282706119538e-05, "loss": 1.3879, "step": 117400 }, { "epoch": 1.2, "learning_rate": 5.9681864278810236e-05, "loss": 1.339, "step": 117500 }, { "epoch": 1.2, "learning_rate": 5.968090004515907e-05, "loss": 1.2838, "step": 117600 }, { "epoch": 1.2, "learning_rate": 5.967993436028901e-05, "loss": 1.452, "step": 117700 }, { "epoch": 1.2, "learning_rate": 5.967896722424728e-05, "loss": 1.3095, "step": 117800 }, { "epoch": 1.2, "learning_rate": 5.967799863708118e-05, "loss": 1.3719, "step": 117900 }, { "epoch": 1.2, "learning_rate": 5.967702859883805e-05, "loss": 1.3329, "step": 118000 }, { "epoch": 1.2, "learning_rate": 5.967605710956532e-05, "loss": 1.5108, "step": 118100 }, { "epoch": 1.2, "learning_rate": 5.96750841693105e-05, "loss": 1.4127, "step": 118200 }, { "epoch": 1.21, "learning_rate": 5.967410977812115e-05, "loss": 1.4059, "step": 118300 }, { "epoch": 1.21, "learning_rate": 5.967313393604493e-05, "loss": 1.3228, "step": 118400 }, { "epoch": 1.21, "learning_rate": 5.967215664312952e-05, "loss": 1.3755, "step": 118500 }, { "epoch": 1.21, "learning_rate": 5.967117789942274e-05, "loss": 1.2995, "step": 118600 }, { "epoch": 1.21, "learning_rate": 5.967019770497242e-05, "loss": 1.2373, "step": 118700 }, { "epoch": 1.21, "learning_rate": 5.9669216059826505e-05, "loss": 1.456, "step": 118800 }, { "epoch": 1.21, "learning_rate": 5.966824280217146e-05, "loss": 1.3081, "step": 118900 }, { "epoch": 1.21, "learning_rate": 5.9667268122783324e-05, "loss": 1.3161, "step": 119000 }, { "epoch": 1.21, "learning_rate": 5.966628215484941e-05, "loss": 1.2177, "step": 119100 }, { "epoch": 1.21, "learning_rate": 5.9665294736411335e-05, "loss": 1.269, "step": 119200 }, { "epoch": 1.22, "learning_rate": 5.966430586751738e-05, "loss": 1.2286, "step": 119300 }, { "epoch": 1.22, "learning_rate": 5.9663315548215895e-05, "loss": 1.2849, "step": 119400 }, { "epoch": 1.22, "learning_rate": 5.9662323778555296e-05, "loss": 1.2344, "step": 119500 }, { "epoch": 1.22, "learning_rate": 5.966133055858409e-05, "loss": 1.4448, "step": 119600 }, { "epoch": 1.22, "learning_rate": 5.9660335888350826e-05, "loss": 1.5244, "step": 119700 }, { "epoch": 1.22, "learning_rate": 5.965933976790414e-05, "loss": 1.4088, "step": 119800 }, { "epoch": 1.22, "learning_rate": 5.9658342197292736e-05, "loss": 1.4552, "step": 119900 }, { "epoch": 1.22, "learning_rate": 5.965734317656539e-05, "loss": 1.5194, "step": 120000 }, { "epoch": 1.22, "learning_rate": 5.965634270577095e-05, "loss": 1.3912, "step": 120100 }, { "epoch": 1.22, "learning_rate": 5.965534078495833e-05, "loss": 1.4178, "step": 120200 }, { "epoch": 1.23, "learning_rate": 5.9654337414176516e-05, "loss": 1.4027, "step": 120300 }, { "epoch": 1.23, "learning_rate": 5.9653332593474584e-05, "loss": 1.3197, "step": 120400 }, { "epoch": 1.23, "learning_rate": 5.9652326322901634e-05, "loss": 1.2208, "step": 120500 }, { "epoch": 1.23, "learning_rate": 5.965131860250689e-05, "loss": 1.399, "step": 120600 }, { "epoch": 1.23, "learning_rate": 5.965030943233962e-05, "loss": 1.3694, "step": 120700 }, { "epoch": 1.23, "learning_rate": 5.9649298812449154e-05, "loss": 1.3825, "step": 120800 }, { "epoch": 1.23, "learning_rate": 5.964828674288492e-05, "loss": 1.4419, "step": 120900 }, { "epoch": 1.23, "learning_rate": 5.964727322369639e-05, "loss": 1.4288, "step": 121000 }, { "epoch": 1.23, "learning_rate": 5.964625825493312e-05, "loss": 1.4255, "step": 121100 }, { "epoch": 1.23, "learning_rate": 5.9645241836644754e-05, "loss": 1.3312, "step": 121200 }, { "epoch": 1.24, "learning_rate": 5.964422396888098e-05, "loss": 1.3803, "step": 121300 }, { "epoch": 1.24, "learning_rate": 5.964320465169155e-05, "loss": 1.5161, "step": 121400 }, { "epoch": 1.24, "learning_rate": 5.9642194099966205e-05, "loss": 1.3961, "step": 121500 }, { "epoch": 1.24, "learning_rate": 5.9641171898568093e-05, "loss": 1.3588, "step": 121600 }, { "epoch": 1.24, "learning_rate": 5.964014824789356e-05, "loss": 1.203, "step": 121700 }, { "epoch": 1.24, "learning_rate": 5.9639123147992654e-05, "loss": 1.3503, "step": 121800 }, { "epoch": 1.24, "learning_rate": 5.9638096598915506e-05, "loss": 1.411, "step": 121900 }, { "epoch": 1.24, "learning_rate": 5.9637068600712315e-05, "loss": 1.3845, "step": 122000 }, { "epoch": 1.24, "learning_rate": 5.9636039153433326e-05, "loss": 1.3192, "step": 122100 }, { "epoch": 1.24, "learning_rate": 5.963500825712888e-05, "loss": 1.2467, "step": 122200 }, { "epoch": 1.25, "learning_rate": 5.963397591184938e-05, "loss": 1.3739, "step": 122300 }, { "epoch": 1.25, "learning_rate": 5.963294211764531e-05, "loss": 1.3546, "step": 122400 }, { "epoch": 1.25, "learning_rate": 5.963190687456721e-05, "loss": 1.3216, "step": 122500 }, { "epoch": 1.25, "learning_rate": 5.96308701826657e-05, "loss": 1.3243, "step": 122600 }, { "epoch": 1.25, "learning_rate": 5.962983204199146e-05, "loss": 1.2892, "step": 122700 }, { "epoch": 1.25, "learning_rate": 5.9628792452595256e-05, "loss": 1.4663, "step": 122800 }, { "epoch": 1.25, "learning_rate": 5.9627751414527914e-05, "loss": 1.3289, "step": 122900 }, { "epoch": 1.25, "learning_rate": 5.962670892784034e-05, "loss": 1.4329, "step": 123000 }, { "epoch": 1.25, "learning_rate": 5.96256649925835e-05, "loss": 1.4038, "step": 123100 }, { "epoch": 1.26, "learning_rate": 5.962461960880845e-05, "loss": 1.3818, "step": 123200 }, { "epoch": 1.26, "learning_rate": 5.962357277656628e-05, "loss": 1.3551, "step": 123300 }, { "epoch": 1.26, "learning_rate": 5.962252449590819e-05, "loss": 1.3107, "step": 123400 }, { "epoch": 1.26, "learning_rate": 5.962147476688543e-05, "loss": 1.2925, "step": 123500 }, { "epoch": 1.26, "learning_rate": 5.962042358954932e-05, "loss": 1.3535, "step": 123600 }, { "epoch": 1.26, "learning_rate": 5.9619370963951275e-05, "loss": 1.3602, "step": 123700 }, { "epoch": 1.26, "learning_rate": 5.961831689014274e-05, "loss": 1.3391, "step": 123800 }, { "epoch": 1.26, "learning_rate": 5.961726136817526e-05, "loss": 1.2356, "step": 123900 }, { "epoch": 1.26, "learning_rate": 5.961620439810045e-05, "loss": 1.4079, "step": 124000 }, { "epoch": 1.26, "learning_rate": 5.961514597996998e-05, "loss": 1.3692, "step": 124100 }, { "epoch": 1.27, "learning_rate": 5.961408611383561e-05, "loss": 1.4242, "step": 124200 }, { "epoch": 1.27, "learning_rate": 5.9613024799749155e-05, "loss": 1.2418, "step": 124300 }, { "epoch": 1.27, "learning_rate": 5.9611962037762505e-05, "loss": 1.2937, "step": 124400 }, { "epoch": 1.27, "learning_rate": 5.961089782792763e-05, "loss": 1.4035, "step": 124500 }, { "epoch": 1.27, "learning_rate": 5.9609832170296564e-05, "loss": 1.458, "step": 124600 }, { "epoch": 1.27, "learning_rate": 5.9608765064921405e-05, "loss": 1.2646, "step": 124700 }, { "epoch": 1.27, "learning_rate": 5.960769651185432e-05, "loss": 1.3861, "step": 124800 }, { "epoch": 1.27, "learning_rate": 5.960662651114758e-05, "loss": 1.3276, "step": 124900 }, { "epoch": 1.27, "learning_rate": 5.960555506285348e-05, "loss": 1.2731, "step": 125000 }, { "epoch": 1.27, "learning_rate": 5.960448216702441e-05, "loss": 1.2559, "step": 125100 }, { "epoch": 1.28, "learning_rate": 5.960340782371284e-05, "loss": 1.2045, "step": 125200 }, { "epoch": 1.28, "learning_rate": 5.960233203297129e-05, "loss": 1.3988, "step": 125300 }, { "epoch": 1.28, "learning_rate": 5.960125479485236e-05, "loss": 1.2827, "step": 125400 }, { "epoch": 1.28, "learning_rate": 5.960017610940872e-05, "loss": 1.379, "step": 125500 }, { "epoch": 1.28, "learning_rate": 5.9599095976693115e-05, "loss": 1.3538, "step": 125600 }, { "epoch": 1.28, "learning_rate": 5.959801439675835e-05, "loss": 1.2616, "step": 125700 }, { "epoch": 1.28, "learning_rate": 5.959693136965732e-05, "loss": 1.2887, "step": 125800 }, { "epoch": 1.28, "learning_rate": 5.959584689544296e-05, "loss": 1.3602, "step": 125900 }, { "epoch": 1.28, "learning_rate": 5.959476097416832e-05, "loss": 1.3714, "step": 126000 }, { "epoch": 1.28, "learning_rate": 5.9593673605886466e-05, "loss": 1.39, "step": 126100 }, { "epoch": 1.29, "learning_rate": 5.959258479065059e-05, "loss": 1.2831, "step": 126200 }, { "epoch": 1.29, "learning_rate": 5.959149452851391e-05, "loss": 1.2548, "step": 126300 }, { "epoch": 1.29, "learning_rate": 5.959040281952975e-05, "loss": 1.3099, "step": 126400 }, { "epoch": 1.29, "learning_rate": 5.958930966375147e-05, "loss": 1.2853, "step": 126500 }, { "epoch": 1.29, "learning_rate": 5.958821506123253e-05, "loss": 1.1851, "step": 126600 }, { "epoch": 1.29, "learning_rate": 5.958711901202645e-05, "loss": 1.215, "step": 126700 }, { "epoch": 1.29, "learning_rate": 5.958602151618681e-05, "loss": 1.2589, "step": 126800 }, { "epoch": 1.29, "learning_rate": 5.9584922573767284e-05, "loss": 1.1761, "step": 126900 }, { "epoch": 1.29, "learning_rate": 5.958382218482158e-05, "loss": 1.2956, "step": 127000 }, { "epoch": 1.29, "learning_rate": 5.958272034940354e-05, "loss": 1.2954, "step": 127100 }, { "epoch": 1.3, "learning_rate": 5.958161706756701e-05, "loss": 1.1245, "step": 127200 }, { "epoch": 1.3, "learning_rate": 5.958051233936594e-05, "loss": 1.1946, "step": 127300 }, { "epoch": 1.3, "learning_rate": 5.957940616485434e-05, "loss": 1.2536, "step": 127400 }, { "epoch": 1.3, "learning_rate": 5.95782985440863e-05, "loss": 1.2432, "step": 127500 }, { "epoch": 1.3, "learning_rate": 5.957718947711597e-05, "loss": 1.3207, "step": 127600 }, { "epoch": 1.3, "learning_rate": 5.957607896399759e-05, "loss": 1.352, "step": 127700 }, { "epoch": 1.3, "learning_rate": 5.957496700478545e-05, "loss": 1.1382, "step": 127800 }, { "epoch": 1.3, "learning_rate": 5.957385359953391e-05, "loss": 1.2078, "step": 127900 }, { "epoch": 1.3, "learning_rate": 5.9572749903967236e-05, "loss": 1.359, "step": 128000 }, { "epoch": 1.31, "learning_rate": 5.9571633621259345e-05, "loss": 1.4138, "step": 128100 }, { "epoch": 1.31, "learning_rate": 5.9570527077117785e-05, "loss": 1.3653, "step": 128200 }, { "epoch": 1.31, "learning_rate": 5.9569407917169666e-05, "loss": 1.4049, "step": 128300 }, { "epoch": 1.31, "learning_rate": 5.9568287311453955e-05, "loss": 1.3456, "step": 128400 }, { "epoch": 1.31, "learning_rate": 5.956716526002545e-05, "loss": 1.194, "step": 128500 }, { "epoch": 1.31, "learning_rate": 5.9566041762939026e-05, "loss": 1.2401, "step": 128600 }, { "epoch": 1.31, "learning_rate": 5.956491682024958e-05, "loss": 1.1624, "step": 128700 }, { "epoch": 1.31, "learning_rate": 5.956379043201215e-05, "loss": 1.2408, "step": 128800 }, { "epoch": 1.31, "learning_rate": 5.956266259828179e-05, "loss": 1.2268, "step": 128900 }, { "epoch": 1.31, "learning_rate": 5.9561533319113645e-05, "loss": 1.2228, "step": 129000 }, { "epoch": 1.32, "learning_rate": 5.9560402594562945e-05, "loss": 1.1779, "step": 129100 }, { "epoch": 1.32, "learning_rate": 5.9559270424684954e-05, "loss": 1.1835, "step": 129200 }, { "epoch": 1.32, "learning_rate": 5.955813680953505e-05, "loss": 1.2337, "step": 129300 }, { "epoch": 1.32, "learning_rate": 5.9557001749168646e-05, "loss": 1.2018, "step": 129400 }, { "epoch": 1.32, "learning_rate": 5.955586524364124e-05, "loss": 1.2186, "step": 129500 }, { "epoch": 1.32, "learning_rate": 5.955472729300841e-05, "loss": 1.098, "step": 129600 }, { "epoch": 1.32, "learning_rate": 5.955358789732578e-05, "loss": 1.1163, "step": 129700 }, { "epoch": 1.32, "learning_rate": 5.955244705664908e-05, "loss": 1.2921, "step": 129800 }, { "epoch": 1.32, "learning_rate": 5.9551304771034066e-05, "loss": 1.2077, "step": 129900 }, { "epoch": 1.32, "learning_rate": 5.955016104053661e-05, "loss": 1.328, "step": 130000 }, { "epoch": 1.33, "learning_rate": 5.954901586521263e-05, "loss": 1.2289, "step": 130100 }, { "epoch": 1.33, "learning_rate": 5.95478692451181e-05, "loss": 1.1185, "step": 130200 }, { "epoch": 1.33, "learning_rate": 5.954672118030911e-05, "loss": 1.1346, "step": 130300 }, { "epoch": 1.33, "learning_rate": 5.954557167084177e-05, "loss": 1.2112, "step": 130400 }, { "epoch": 1.33, "learning_rate": 5.95444207167723e-05, "loss": 1.1045, "step": 130500 }, { "epoch": 1.33, "learning_rate": 5.954326831815697e-05, "loss": 1.2686, "step": 130600 }, { "epoch": 1.33, "learning_rate": 5.954211447505212e-05, "loss": 1.1579, "step": 130700 }, { "epoch": 1.33, "learning_rate": 5.954095918751416e-05, "loss": 1.2822, "step": 130800 }, { "epoch": 1.33, "learning_rate": 5.953981403006823e-05, "loss": 1.1962, "step": 130900 }, { "epoch": 1.33, "learning_rate": 5.953865586827653e-05, "loss": 1.3005, "step": 131000 }, { "epoch": 1.34, "learning_rate": 5.9537496262220834e-05, "loss": 1.3678, "step": 131100 }, { "epoch": 1.34, "learning_rate": 5.953633521195784e-05, "loss": 1.2709, "step": 131200 }, { "epoch": 1.34, "learning_rate": 5.953517271754433e-05, "loss": 1.1558, "step": 131300 }, { "epoch": 1.34, "learning_rate": 5.9534008779037125e-05, "loss": 1.2944, "step": 131400 }, { "epoch": 1.34, "learning_rate": 5.953284339649314e-05, "loss": 1.4518, "step": 131500 }, { "epoch": 1.34, "learning_rate": 5.953167656996936e-05, "loss": 1.2493, "step": 131600 }, { "epoch": 1.34, "learning_rate": 5.953050829952282e-05, "loss": 1.206, "step": 131700 }, { "epoch": 1.34, "learning_rate": 5.952933858521066e-05, "loss": 1.2432, "step": 131800 }, { "epoch": 1.34, "learning_rate": 5.9528167427090065e-05, "loss": 1.1193, "step": 131900 }, { "epoch": 1.34, "learning_rate": 5.9526994825218296e-05, "loss": 1.2435, "step": 132000 }, { "epoch": 1.35, "learning_rate": 5.952582077965269e-05, "loss": 1.3112, "step": 132100 }, { "epoch": 1.35, "learning_rate": 5.9524645290450646e-05, "loss": 1.3123, "step": 132200 }, { "epoch": 1.35, "learning_rate": 5.952346835766964e-05, "loss": 1.2641, "step": 132300 }, { "epoch": 1.35, "learning_rate": 5.9522289981367224e-05, "loss": 1.3837, "step": 132400 }, { "epoch": 1.35, "learning_rate": 5.952111016160101e-05, "loss": 1.3454, "step": 132500 }, { "epoch": 1.35, "learning_rate": 5.951992889842868e-05, "loss": 1.1997, "step": 132600 }, { "epoch": 1.35, "learning_rate": 5.951875802611757e-05, "loss": 1.2276, "step": 132700 }, { "epoch": 1.35, "learning_rate": 5.9517573890738974e-05, "loss": 1.2187, "step": 132800 }, { "epoch": 1.35, "learning_rate": 5.951638831212716e-05, "loss": 1.2339, "step": 132900 }, { "epoch": 1.36, "learning_rate": 5.95152012903401e-05, "loss": 1.3291, "step": 133000 }, { "epoch": 1.36, "learning_rate": 5.9514012825435836e-05, "loss": 1.2949, "step": 133100 }, { "epoch": 1.36, "learning_rate": 5.951282291747248e-05, "loss": 1.4713, "step": 133200 }, { "epoch": 1.36, "learning_rate": 5.9511631566508204e-05, "loss": 1.3082, "step": 133300 }, { "epoch": 1.36, "learning_rate": 5.951043877260126e-05, "loss": 1.351, "step": 133400 }, { "epoch": 1.36, "learning_rate": 5.950924453580997e-05, "loss": 1.4038, "step": 133500 }, { "epoch": 1.36, "learning_rate": 5.950804885619272e-05, "loss": 1.2508, "step": 133600 }, { "epoch": 1.36, "learning_rate": 5.950685173380798e-05, "loss": 1.209, "step": 133700 }, { "epoch": 1.36, "learning_rate": 5.950565316871427e-05, "loss": 1.1596, "step": 133800 }, { "epoch": 1.36, "learning_rate": 5.950445316097021e-05, "loss": 1.2576, "step": 133900 }, { "epoch": 1.37, "learning_rate": 5.950325171063447e-05, "loss": 1.4181, "step": 134000 }, { "epoch": 1.37, "learning_rate": 5.9502048817765775e-05, "loss": 1.2478, "step": 134100 }, { "epoch": 1.37, "learning_rate": 5.9500856532916435e-05, "loss": 1.2614, "step": 134200 }, { "epoch": 1.37, "learning_rate": 5.949965076958223e-05, "loss": 1.3422, "step": 134300 }, { "epoch": 1.37, "learning_rate": 5.9498443563891155e-05, "loss": 1.3472, "step": 134400 }, { "epoch": 1.37, "learning_rate": 5.949723491590222e-05, "loss": 1.3978, "step": 134500 }, { "epoch": 1.37, "learning_rate": 5.9496024825674523e-05, "loss": 1.4959, "step": 134600 }, { "epoch": 1.37, "learning_rate": 5.949481329326724e-05, "loss": 1.3894, "step": 134700 }, { "epoch": 1.37, "learning_rate": 5.949360031873961e-05, "loss": 1.2948, "step": 134800 }, { "epoch": 1.37, "learning_rate": 5.949238590215093e-05, "loss": 1.2448, "step": 134900 }, { "epoch": 1.38, "learning_rate": 5.9491170043560575e-05, "loss": 1.4274, "step": 135000 }, { "epoch": 1.38, "learning_rate": 5.9489952743028004e-05, "loss": 1.3352, "step": 135100 }, { "epoch": 1.38, "learning_rate": 5.9488734000612735e-05, "loss": 1.3923, "step": 135200 }, { "epoch": 1.38, "learning_rate": 5.948751381637434e-05, "loss": 1.4538, "step": 135300 }, { "epoch": 1.38, "learning_rate": 5.94862921903725e-05, "loss": 1.2411, "step": 135400 }, { "epoch": 1.38, "learning_rate": 5.948508136048023e-05, "loss": 1.3221, "step": 135500 }, { "epoch": 1.38, "learning_rate": 5.9483856865546886e-05, "loss": 1.3336, "step": 135600 }, { "epoch": 1.38, "learning_rate": 5.9482630929028895e-05, "loss": 1.3068, "step": 135700 }, { "epoch": 1.38, "learning_rate": 5.948140355098619e-05, "loss": 1.2702, "step": 135800 }, { "epoch": 1.38, "learning_rate": 5.9480174731478784e-05, "loss": 1.2973, "step": 135900 }, { "epoch": 1.39, "learning_rate": 5.947894447056677e-05, "loss": 1.4826, "step": 136000 }, { "epoch": 1.39, "learning_rate": 5.9477712768310296e-05, "loss": 1.279, "step": 136100 }, { "epoch": 1.39, "learning_rate": 5.947647962476957e-05, "loss": 1.3991, "step": 136200 }, { "epoch": 1.39, "learning_rate": 5.94752450400049e-05, "loss": 1.3375, "step": 136300 }, { "epoch": 1.39, "learning_rate": 5.947400901407665e-05, "loss": 1.4318, "step": 136400 }, { "epoch": 1.39, "learning_rate": 5.9472783928848826e-05, "loss": 1.2377, "step": 136500 }, { "epoch": 1.39, "learning_rate": 5.9471545035184906e-05, "loss": 1.2923, "step": 136600 }, { "epoch": 1.39, "learning_rate": 5.947030470053831e-05, "loss": 1.4292, "step": 136700 }, { "epoch": 1.39, "learning_rate": 5.946906292496969e-05, "loss": 1.4379, "step": 136800 }, { "epoch": 1.39, "learning_rate": 5.946781970853975e-05, "loss": 1.3305, "step": 136900 }, { "epoch": 1.4, "learning_rate": 5.946657505130929e-05, "loss": 1.3043, "step": 137000 }, { "epoch": 1.4, "learning_rate": 5.9465328953339144e-05, "loss": 1.5034, "step": 137100 }, { "epoch": 1.4, "learning_rate": 5.9464081414690255e-05, "loss": 1.4252, "step": 137200 }, { "epoch": 1.4, "learning_rate": 5.946283243542362e-05, "loss": 1.3432, "step": 137300 }, { "epoch": 1.4, "learning_rate": 5.9461582015600305e-05, "loss": 1.2562, "step": 137400 }, { "epoch": 1.4, "learning_rate": 5.946033015528144e-05, "loss": 1.3676, "step": 137500 }, { "epoch": 1.4, "learning_rate": 5.945907685452825e-05, "loss": 1.3401, "step": 137600 }, { "epoch": 1.4, "learning_rate": 5.9457822113402e-05, "loss": 1.4083, "step": 137700 }, { "epoch": 1.4, "learning_rate": 5.9456565931964047e-05, "loss": 1.3111, "step": 137800 }, { "epoch": 1.4, "learning_rate": 5.945530831027581e-05, "loss": 1.3548, "step": 137900 }, { "epoch": 1.41, "learning_rate": 5.9454049248398766e-05, "loss": 1.3126, "step": 138000 }, { "epoch": 1.41, "learning_rate": 5.945278874639449e-05, "loss": 1.3558, "step": 138100 }, { "epoch": 1.41, "learning_rate": 5.9451526804324614e-05, "loss": 1.1784, "step": 138200 }, { "epoch": 1.41, "learning_rate": 5.945026342225083e-05, "loss": 1.2846, "step": 138300 }, { "epoch": 1.41, "learning_rate": 5.9448998600234916e-05, "loss": 1.222, "step": 138400 }, { "epoch": 1.41, "learning_rate": 5.944773233833872e-05, "loss": 1.2498, "step": 138500 }, { "epoch": 1.41, "learning_rate": 5.944646463662414e-05, "loss": 1.2831, "step": 138600 }, { "epoch": 1.41, "learning_rate": 5.9445195495153165e-05, "loss": 1.3749, "step": 138700 }, { "epoch": 1.41, "learning_rate": 5.944392491398785e-05, "loss": 1.272, "step": 138800 }, { "epoch": 1.42, "learning_rate": 5.944265289319032e-05, "loss": 1.1718, "step": 138900 }, { "epoch": 1.42, "learning_rate": 5.9441379432822776e-05, "loss": 1.3356, "step": 139000 }, { "epoch": 1.42, "learning_rate": 5.944010453294747e-05, "loss": 1.2766, "step": 139100 }, { "epoch": 1.42, "learning_rate": 5.943882819362674e-05, "loss": 1.3597, "step": 139200 }, { "epoch": 1.42, "learning_rate": 5.943755041492299e-05, "loss": 1.4762, "step": 139300 }, { "epoch": 1.42, "learning_rate": 5.9436271196898703e-05, "loss": 1.6504, "step": 139400 }, { "epoch": 1.42, "learning_rate": 5.943499053961642e-05, "loss": 1.3129, "step": 139500 }, { "epoch": 1.42, "learning_rate": 5.943370844313876e-05, "loss": 1.4621, "step": 139600 }, { "epoch": 1.42, "learning_rate": 5.94324249075284e-05, "loss": 1.6409, "step": 139700 }, { "epoch": 1.42, "learning_rate": 5.943113993284811e-05, "loss": 1.6821, "step": 139800 }, { "epoch": 1.43, "learning_rate": 5.942985351916072e-05, "loss": 1.4077, "step": 139900 }, { "epoch": 1.43, "learning_rate": 5.942856566652912e-05, "loss": 1.4963, "step": 140000 }, { "epoch": 1.43, "learning_rate": 5.9427276375016275e-05, "loss": 1.8543, "step": 140100 }, { "epoch": 1.43, "learning_rate": 5.942598564468523e-05, "loss": 1.8604, "step": 140200 }, { "epoch": 1.43, "learning_rate": 5.942469347559909e-05, "loss": 1.7474, "step": 140300 }, { "epoch": 1.43, "learning_rate": 5.9423399867821035e-05, "loss": 1.7125, "step": 140400 }, { "epoch": 1.43, "learning_rate": 5.942210482141432e-05, "loss": 1.691, "step": 140500 }, { "epoch": 1.43, "learning_rate": 5.942080833644226e-05, "loss": 1.3812, "step": 140600 }, { "epoch": 1.43, "learning_rate": 5.941951041296824e-05, "loss": 1.4928, "step": 140700 }, { "epoch": 1.43, "learning_rate": 5.941822405179493e-05, "loss": 1.4654, "step": 140800 }, { "epoch": 1.44, "learning_rate": 5.941692326589089e-05, "loss": 1.5307, "step": 140900 }, { "epoch": 1.44, "learning_rate": 5.9415621041674866e-05, "loss": 1.4875, "step": 141000 }, { "epoch": 1.44, "learning_rate": 5.941433042295427e-05, "loss": 1.4396, "step": 141100 }, { "epoch": 1.44, "learning_rate": 5.941302533668686e-05, "loss": 1.482, "step": 141200 }, { "epoch": 1.44, "learning_rate": 5.941171881229804e-05, "loss": 1.3334, "step": 141300 }, { "epoch": 1.44, "learning_rate": 5.941041084985169e-05, "loss": 1.3403, "step": 141400 }, { "epoch": 1.44, "learning_rate": 5.940910144941176e-05, "loss": 1.4023, "step": 141500 }, { "epoch": 1.44, "learning_rate": 5.9407790611042273e-05, "loss": 1.2677, "step": 141600 }, { "epoch": 1.44, "learning_rate": 5.9406478334807324e-05, "loss": 1.3845, "step": 141700 }, { "epoch": 1.44, "learning_rate": 5.9405164620771074e-05, "loss": 1.3756, "step": 141800 }, { "epoch": 1.45, "learning_rate": 5.940384946899775e-05, "loss": 1.3117, "step": 141900 }, { "epoch": 1.45, "learning_rate": 5.940253287955166e-05, "loss": 1.3507, "step": 142000 }, { "epoch": 1.45, "learning_rate": 5.9401214852497185e-05, "loss": 1.3176, "step": 142100 }, { "epoch": 1.45, "learning_rate": 5.939989538789875e-05, "loss": 1.4573, "step": 142200 }, { "epoch": 1.45, "learning_rate": 5.939857448582089e-05, "loss": 1.3671, "step": 142300 }, { "epoch": 1.45, "learning_rate": 5.939725214632817e-05, "loss": 1.3991, "step": 142400 }, { "epoch": 1.45, "learning_rate": 5.939592836948527e-05, "loss": 1.4889, "step": 142500 }, { "epoch": 1.45, "learning_rate": 5.939460315535688e-05, "loss": 1.3768, "step": 142600 }, { "epoch": 1.45, "learning_rate": 5.939327650400782e-05, "loss": 1.4841, "step": 142700 }, { "epoch": 1.45, "learning_rate": 5.9391948415502953e-05, "loss": 1.4147, "step": 142800 }, { "epoch": 1.46, "learning_rate": 5.93906188899072e-05, "loss": 1.4139, "step": 142900 }, { "epoch": 1.46, "learning_rate": 5.938928792728559e-05, "loss": 1.5077, "step": 143000 }, { "epoch": 1.46, "learning_rate": 5.9387955527703184e-05, "loss": 1.4399, "step": 143100 }, { "epoch": 1.46, "learning_rate": 5.938662169122513e-05, "loss": 1.4146, "step": 143200 }, { "epoch": 1.46, "learning_rate": 5.9385286417916634e-05, "loss": 1.3995, "step": 143300 }, { "epoch": 1.46, "learning_rate": 5.9383949707843004e-05, "loss": 1.2944, "step": 143400 }, { "epoch": 1.46, "learning_rate": 5.938261156106959e-05, "loss": 1.3506, "step": 143500 }, { "epoch": 1.46, "learning_rate": 5.9381271977661814e-05, "loss": 1.188, "step": 143600 }, { "epoch": 1.46, "learning_rate": 5.937993095768518e-05, "loss": 1.2377, "step": 143700 }, { "epoch": 1.47, "learning_rate": 5.937858850120525e-05, "loss": 1.4885, "step": 143800 }, { "epoch": 1.47, "learning_rate": 5.937724460828766e-05, "loss": 1.1877, "step": 143900 }, { "epoch": 1.47, "learning_rate": 5.9375899278998124e-05, "loss": 1.2533, "step": 144000 }, { "epoch": 1.47, "learning_rate": 5.937455251340242e-05, "loss": 1.3347, "step": 144100 }, { "epoch": 1.47, "learning_rate": 5.93732043115664e-05, "loss": 1.2398, "step": 144200 }, { "epoch": 1.47, "learning_rate": 5.937185467355598e-05, "loss": 1.368, "step": 144300 }, { "epoch": 1.47, "learning_rate": 5.9370503599437145e-05, "loss": 1.3697, "step": 144400 }, { "epoch": 1.47, "learning_rate": 5.936915108927596e-05, "loss": 1.3518, "step": 144500 }, { "epoch": 1.47, "learning_rate": 5.936779714313855e-05, "loss": 1.3357, "step": 144600 }, { "epoch": 1.47, "learning_rate": 5.936644176109112e-05, "loss": 1.2978, "step": 144700 }, { "epoch": 1.48, "learning_rate": 5.936508494319994e-05, "loss": 1.3547, "step": 144800 }, { "epoch": 1.48, "learning_rate": 5.936372668953134e-05, "loss": 1.2259, "step": 144900 }, { "epoch": 1.48, "learning_rate": 5.936236700015174e-05, "loss": 1.4325, "step": 145000 }, { "epoch": 1.48, "learning_rate": 5.9361005875127624e-05, "loss": 1.4178, "step": 145100 }, { "epoch": 1.48, "learning_rate": 5.9359643314525543e-05, "loss": 1.2432, "step": 145200 }, { "epoch": 1.48, "learning_rate": 5.93582793184121e-05, "loss": 1.3914, "step": 145300 }, { "epoch": 1.48, "learning_rate": 5.935691388685401e-05, "loss": 1.3872, "step": 145400 }, { "epoch": 1.48, "learning_rate": 5.935554701991801e-05, "loss": 1.2988, "step": 145500 }, { "epoch": 1.48, "learning_rate": 5.935417871767096e-05, "loss": 1.2208, "step": 145600 }, { "epoch": 1.48, "learning_rate": 5.9352808980179744e-05, "loss": 1.2235, "step": 145700 }, { "epoch": 1.49, "learning_rate": 5.9351437807511335e-05, "loss": 1.295, "step": 145800 }, { "epoch": 1.49, "learning_rate": 5.935006519973278e-05, "loss": 1.1823, "step": 145900 }, { "epoch": 1.49, "learning_rate": 5.934869115691119e-05, "loss": 1.1615, "step": 146000 }, { "epoch": 1.49, "learning_rate": 5.934731567911374e-05, "loss": 1.1835, "step": 146100 }, { "epoch": 1.49, "learning_rate": 5.934593876640769e-05, "loss": 1.2593, "step": 146200 }, { "epoch": 1.49, "learning_rate": 5.9344560418860366e-05, "loss": 1.2741, "step": 146300 }, { "epoch": 1.49, "learning_rate": 5.934318063653915e-05, "loss": 1.3302, "step": 146400 }, { "epoch": 1.49, "learning_rate": 5.9341799419511525e-05, "loss": 1.2166, "step": 146500 }, { "epoch": 1.49, "learning_rate": 5.9340416767845005e-05, "loss": 1.2079, "step": 146600 }, { "epoch": 1.49, "learning_rate": 5.933903268160719e-05, "loss": 1.2697, "step": 146700 }, { "epoch": 1.5, "learning_rate": 5.9337647160865774e-05, "loss": 1.2259, "step": 146800 }, { "epoch": 1.5, "learning_rate": 5.933626020568849e-05, "loss": 1.2263, "step": 146900 }, { "epoch": 1.5, "learning_rate": 5.933487181614314e-05, "loss": 1.234, "step": 147000 }, { "epoch": 1.5, "learning_rate": 5.933348199229763e-05, "loss": 1.2311, "step": 147100 }, { "epoch": 1.5, "learning_rate": 5.93320907342199e-05, "loss": 1.1367, "step": 147200 }, { "epoch": 1.5, "learning_rate": 5.9330698041977976e-05, "loss": 1.2094, "step": 147300 }, { "epoch": 1.5, "learning_rate": 5.9329303915639956e-05, "loss": 1.0596, "step": 147400 }, { "epoch": 1.5, "learning_rate": 5.9327922317975875e-05, "loss": 1.2459, "step": 147500 }, { "epoch": 1.5, "learning_rate": 5.932652533798949e-05, "loss": 1.4044, "step": 147600 }, { "epoch": 1.5, "learning_rate": 5.932512692411102e-05, "loss": 1.1682, "step": 147700 }, { "epoch": 1.51, "learning_rate": 5.932372707640884e-05, "loss": 1.098, "step": 147800 }, { "epoch": 1.51, "learning_rate": 5.932232579495141e-05, "loss": 1.3269, "step": 147900 }, { "epoch": 1.51, "learning_rate": 5.932092307980723e-05, "loss": 1.3592, "step": 148000 }, { "epoch": 1.51, "learning_rate": 5.9319518931044896e-05, "loss": 1.179, "step": 148100 }, { "epoch": 1.51, "learning_rate": 5.931811334873305e-05, "loss": 1.2472, "step": 148200 }, { "epoch": 1.51, "learning_rate": 5.9316706332940426e-05, "loss": 1.2411, "step": 148300 }, { "epoch": 1.51, "learning_rate": 5.9315297883735816e-05, "loss": 1.1603, "step": 148400 }, { "epoch": 1.51, "learning_rate": 5.931388800118808e-05, "loss": 1.2482, "step": 148500 }, { "epoch": 1.51, "learning_rate": 5.931247668536616e-05, "loss": 1.288, "step": 148600 }, { "epoch": 1.51, "learning_rate": 5.9311063936339054e-05, "loss": 1.3329, "step": 148700 }, { "epoch": 1.52, "learning_rate": 5.930964975417584e-05, "loss": 1.4245, "step": 148800 }, { "epoch": 1.52, "learning_rate": 5.930823413894567e-05, "loss": 1.1962, "step": 148900 }, { "epoch": 1.52, "learning_rate": 5.930681709071775e-05, "loss": 1.1768, "step": 149000 }, { "epoch": 1.52, "learning_rate": 5.930539860956137e-05, "loss": 1.2369, "step": 149100 }, { "epoch": 1.52, "learning_rate": 5.930397869554588e-05, "loss": 1.2428, "step": 149200 }, { "epoch": 1.52, "learning_rate": 5.9302557348740716e-05, "loss": 1.2765, "step": 149300 }, { "epoch": 1.52, "learning_rate": 5.930113456921536e-05, "loss": 1.0246, "step": 149400 }, { "epoch": 1.52, "learning_rate": 5.929971035703938e-05, "loss": 1.2969, "step": 149500 }, { "epoch": 1.52, "learning_rate": 5.9298284712282424e-05, "loss": 1.1388, "step": 149600 }, { "epoch": 1.53, "learning_rate": 5.929685763501418e-05, "loss": 1.1419, "step": 149700 }, { "epoch": 1.53, "learning_rate": 5.9295429125304444e-05, "loss": 1.1103, "step": 149800 }, { "epoch": 1.53, "learning_rate": 5.9293999183223034e-05, "loss": 1.1301, "step": 149900 }, { "epoch": 1.53, "learning_rate": 5.929256780883989e-05, "loss": 1.3856, "step": 150000 }, { "epoch": 1.53, "learning_rate": 5.9291135002224994e-05, "loss": 1.147, "step": 150100 }, { "epoch": 1.53, "learning_rate": 5.928970076344839e-05, "loss": 1.1833, "step": 150200 }, { "epoch": 1.53, "learning_rate": 5.928826509258021e-05, "loss": 1.1268, "step": 150300 }, { "epoch": 1.53, "learning_rate": 5.928682798969065e-05, "loss": 1.3398, "step": 150400 }, { "epoch": 1.53, "learning_rate": 5.9285389454849974e-05, "loss": 1.1988, "step": 150500 }, { "epoch": 1.53, "learning_rate": 5.928394948812853e-05, "loss": 1.1704, "step": 150600 }, { "epoch": 1.54, "learning_rate": 5.92825080895967e-05, "loss": 1.1583, "step": 150700 }, { "epoch": 1.54, "learning_rate": 5.928106525932499e-05, "loss": 1.0919, "step": 150800 }, { "epoch": 1.54, "learning_rate": 5.927962099738391e-05, "loss": 1.2255, "step": 150900 }, { "epoch": 1.54, "learning_rate": 5.927817530384411e-05, "loss": 1.1635, "step": 151000 }, { "epoch": 1.54, "learning_rate": 5.9276728178776256e-05, "loss": 1.2916, "step": 151100 }, { "epoch": 1.54, "learning_rate": 5.9275279622251105e-05, "loss": 1.3269, "step": 151200 }, { "epoch": 1.54, "learning_rate": 5.927382963433949e-05, "loss": 1.2458, "step": 151300 }, { "epoch": 1.54, "learning_rate": 5.92723782151123e-05, "loss": 1.2964, "step": 151400 }, { "epoch": 1.54, "learning_rate": 5.927092536464051e-05, "loss": 1.2414, "step": 151500 }, { "epoch": 1.54, "learning_rate": 5.926947108299514e-05, "loss": 1.2577, "step": 151600 }, { "epoch": 1.55, "learning_rate": 5.926801537024732e-05, "loss": 1.2845, "step": 151700 }, { "epoch": 1.55, "learning_rate": 5.92665582264682e-05, "loss": 1.2276, "step": 151800 }, { "epoch": 1.55, "learning_rate": 5.926509965172903e-05, "loss": 1.2558, "step": 151900 }, { "epoch": 1.55, "learning_rate": 5.926365425324009e-05, "loss": 1.1331, "step": 152000 }, { "epoch": 1.55, "learning_rate": 5.926219283110268e-05, "loss": 1.2888, "step": 152100 }, { "epoch": 1.55, "learning_rate": 5.926072997821866e-05, "loss": 1.2316, "step": 152200 }, { "epoch": 1.55, "learning_rate": 5.925926569465956e-05, "loss": 1.1572, "step": 152300 }, { "epoch": 1.55, "learning_rate": 5.925779998049699e-05, "loss": 1.1334, "step": 152400 }, { "epoch": 1.55, "learning_rate": 5.9256332835802596e-05, "loss": 1.1042, "step": 152500 }, { "epoch": 1.55, "learning_rate": 5.925486426064812e-05, "loss": 1.2739, "step": 152600 }, { "epoch": 1.56, "learning_rate": 5.9253394255105366e-05, "loss": 1.1874, "step": 152700 }, { "epoch": 1.56, "learning_rate": 5.925192281924621e-05, "loss": 1.2252, "step": 152800 }, { "epoch": 1.56, "learning_rate": 5.9250449953142586e-05, "loss": 1.2749, "step": 152900 }, { "epoch": 1.56, "learning_rate": 5.924897565686653e-05, "loss": 1.1701, "step": 153000 }, { "epoch": 1.56, "learning_rate": 5.924751469483263e-05, "loss": 1.234, "step": 153100 }, { "epoch": 1.56, "learning_rate": 5.9246037552727924e-05, "loss": 1.2122, "step": 153200 }, { "epoch": 1.56, "learning_rate": 5.924455898066652e-05, "loss": 1.1334, "step": 153300 }, { "epoch": 1.56, "learning_rate": 5.9243078978720694e-05, "loss": 1.2941, "step": 153400 }, { "epoch": 1.56, "learning_rate": 5.924159754696283e-05, "loss": 1.1894, "step": 153500 }, { "epoch": 1.56, "learning_rate": 5.924011468546534e-05, "loss": 1.1101, "step": 153600 }, { "epoch": 1.57, "learning_rate": 5.9238630394300736e-05, "loss": 1.1649, "step": 153700 }, { "epoch": 1.57, "learning_rate": 5.92371446735416e-05, "loss": 1.184, "step": 153800 }, { "epoch": 1.57, "learning_rate": 5.923565752326057e-05, "loss": 1.1365, "step": 153900 }, { "epoch": 1.57, "learning_rate": 5.923416894353035e-05, "loss": 1.2747, "step": 154000 }, { "epoch": 1.57, "learning_rate": 5.9232678934423735e-05, "loss": 1.2643, "step": 154100 }, { "epoch": 1.57, "learning_rate": 5.923118749601357e-05, "loss": 1.1458, "step": 154200 }, { "epoch": 1.57, "learning_rate": 5.922969462837278e-05, "loss": 1.0832, "step": 154300 }, { "epoch": 1.57, "learning_rate": 5.922820033157435e-05, "loss": 1.0339, "step": 154400 }, { "epoch": 1.57, "learning_rate": 5.9226704605691344e-05, "loss": 1.1707, "step": 154500 }, { "epoch": 1.58, "learning_rate": 5.9225207450796914e-05, "loss": 1.1772, "step": 154600 }, { "epoch": 1.58, "learning_rate": 5.9223708866964244e-05, "loss": 1.2733, "step": 154700 }, { "epoch": 1.58, "learning_rate": 5.9222208854266594e-05, "loss": 1.1665, "step": 154800 }, { "epoch": 1.58, "learning_rate": 5.9220707412777335e-05, "loss": 1.1074, "step": 154900 }, { "epoch": 1.58, "learning_rate": 5.9219204542569856e-05, "loss": 1.181, "step": 155000 }, { "epoch": 1.58, "learning_rate": 5.921770024371765e-05, "loss": 1.1131, "step": 155100 }, { "epoch": 1.58, "learning_rate": 5.921619451629426e-05, "loss": 1.2697, "step": 155200 }, { "epoch": 1.58, "learning_rate": 5.9214687360373324e-05, "loss": 1.2122, "step": 155300 }, { "epoch": 1.58, "learning_rate": 5.921319386894241e-05, "loss": 1.1945, "step": 155400 }, { "epoch": 1.58, "learning_rate": 5.921168387053062e-05, "loss": 1.1864, "step": 155500 }, { "epoch": 1.59, "learning_rate": 5.921017244384183e-05, "loss": 1.1472, "step": 155600 }, { "epoch": 1.59, "learning_rate": 5.9208659588949905e-05, "loss": 1.1793, "step": 155700 }, { "epoch": 1.59, "learning_rate": 5.9207145305928853e-05, "loss": 1.2403, "step": 155800 }, { "epoch": 1.59, "learning_rate": 5.920562959485268e-05, "loss": 1.121, "step": 155900 }, { "epoch": 1.59, "learning_rate": 5.9204112455795514e-05, "loss": 1.194, "step": 156000 }, { "epoch": 1.59, "learning_rate": 5.920259388883154e-05, "loss": 1.182, "step": 156100 }, { "epoch": 1.59, "learning_rate": 5.920107389403498e-05, "loss": 1.1897, "step": 156200 }, { "epoch": 1.59, "learning_rate": 5.919955247148018e-05, "loss": 1.342, "step": 156300 }, { "epoch": 1.59, "learning_rate": 5.919802962124151e-05, "loss": 1.172, "step": 156400 }, { "epoch": 1.59, "learning_rate": 5.919650534339345e-05, "loss": 1.1726, "step": 156500 }, { "epoch": 1.6, "learning_rate": 5.9194979638010506e-05, "loss": 1.0533, "step": 156600 }, { "epoch": 1.6, "learning_rate": 5.919345250516728e-05, "loss": 1.2937, "step": 156700 }, { "epoch": 1.6, "learning_rate": 5.919192394493845e-05, "loss": 1.2577, "step": 156800 }, { "epoch": 1.6, "learning_rate": 5.919039395739874e-05, "loss": 1.1842, "step": 156900 }, { "epoch": 1.6, "learning_rate": 5.9188862542622975e-05, "loss": 1.2523, "step": 157000 }, { "epoch": 1.6, "learning_rate": 5.918732970068602e-05, "loss": 1.3062, "step": 157100 }, { "epoch": 1.6, "learning_rate": 5.918579543166282e-05, "loss": 1.2295, "step": 157200 }, { "epoch": 1.6, "learning_rate": 5.918425973562839e-05, "loss": 1.1519, "step": 157300 }, { "epoch": 1.6, "learning_rate": 5.918272261265783e-05, "loss": 1.2288, "step": 157400 }, { "epoch": 1.6, "learning_rate": 5.918118406282629e-05, "loss": 1.1961, "step": 157500 }, { "epoch": 1.61, "learning_rate": 5.9179644086208986e-05, "loss": 1.2629, "step": 157600 }, { "epoch": 1.61, "learning_rate": 5.917810268288122e-05, "loss": 1.2248, "step": 157700 }, { "epoch": 1.61, "learning_rate": 5.917655985291836e-05, "loss": 1.2071, "step": 157800 }, { "epoch": 1.61, "learning_rate": 5.917501559639585e-05, "loss": 1.2202, "step": 157900 }, { "epoch": 1.61, "learning_rate": 5.917346991338917e-05, "loss": 1.2198, "step": 158000 }, { "epoch": 1.61, "learning_rate": 5.917192280397392e-05, "loss": 1.0927, "step": 158100 }, { "epoch": 1.61, "learning_rate": 5.917037426822573e-05, "loss": 1.1159, "step": 158200 }, { "epoch": 1.61, "learning_rate": 5.916882430622032e-05, "loss": 1.1849, "step": 158300 }, { "epoch": 1.61, "learning_rate": 5.916727291803347e-05, "loss": 1.1164, "step": 158400 }, { "epoch": 1.61, "learning_rate": 5.916572010374103e-05, "loss": 1.1085, "step": 158500 }, { "epoch": 1.62, "learning_rate": 5.916416586341893e-05, "loss": 1.0363, "step": 158600 }, { "epoch": 1.62, "learning_rate": 5.9162610197143174e-05, "loss": 1.3396, "step": 158700 }, { "epoch": 1.62, "learning_rate": 5.91610531049898e-05, "loss": 1.2658, "step": 158800 }, { "epoch": 1.62, "learning_rate": 5.915949458703496e-05, "loss": 1.2445, "step": 158900 }, { "epoch": 1.62, "learning_rate": 5.9157934643354854e-05, "loss": 1.1969, "step": 159000 }, { "epoch": 1.62, "learning_rate": 5.915637327402574e-05, "loss": 1.2443, "step": 159100 }, { "epoch": 1.62, "learning_rate": 5.915481047912397e-05, "loss": 1.1003, "step": 159200 }, { "epoch": 1.62, "learning_rate": 5.915324625872595e-05, "loss": 1.223, "step": 159300 }, { "epoch": 1.62, "learning_rate": 5.915168061290817e-05, "loss": 1.0726, "step": 159400 }, { "epoch": 1.63, "learning_rate": 5.9150113541747186e-05, "loss": 1.1546, "step": 159500 }, { "epoch": 1.63, "learning_rate": 5.91485450453196e-05, "loss": 1.0942, "step": 159600 }, { "epoch": 1.63, "learning_rate": 5.914697512370211e-05, "loss": 1.1822, "step": 159700 }, { "epoch": 1.63, "learning_rate": 5.9145403776971484e-05, "loss": 1.2163, "step": 159800 }, { "epoch": 1.63, "learning_rate": 5.914383100520453e-05, "loss": 1.1197, "step": 159900 }, { "epoch": 1.63, "learning_rate": 5.9142256808478173e-05, "loss": 1.1105, "step": 160000 }, { "epoch": 1.63, "learning_rate": 5.914068118686937e-05, "loss": 1.147, "step": 160100 }, { "epoch": 1.63, "learning_rate": 5.913910414045515e-05, "loss": 1.1338, "step": 160200 }, { "epoch": 1.63, "learning_rate": 5.913752566931264e-05, "loss": 1.1924, "step": 160300 }, { "epoch": 1.63, "learning_rate": 5.9135945773519e-05, "loss": 1.214, "step": 160400 }, { "epoch": 1.64, "learning_rate": 5.913436445315149e-05, "loss": 1.291, "step": 160500 }, { "epoch": 1.64, "learning_rate": 5.9132781708287425e-05, "loss": 1.2537, "step": 160600 }, { "epoch": 1.64, "learning_rate": 5.9131213387747644e-05, "loss": 1.0371, "step": 160700 }, { "epoch": 1.64, "learning_rate": 5.912962780836572e-05, "loss": 1.0675, "step": 160800 }, { "epoch": 1.64, "learning_rate": 5.9128040804718844e-05, "loss": 1.1891, "step": 160900 }, { "epoch": 1.64, "learning_rate": 5.91264523768846e-05, "loss": 1.1541, "step": 161000 }, { "epoch": 1.64, "learning_rate": 5.9124862524940636e-05, "loss": 1.027, "step": 161100 }, { "epoch": 1.64, "learning_rate": 5.912327124896471e-05, "loss": 1.2509, "step": 161200 }, { "epoch": 1.64, "learning_rate": 5.912167854903463e-05, "loss": 1.1014, "step": 161300 }, { "epoch": 1.64, "learning_rate": 5.912008442522825e-05, "loss": 1.1002, "step": 161400 }, { "epoch": 1.65, "learning_rate": 5.911848887762352e-05, "loss": 1.2381, "step": 161500 }, { "epoch": 1.65, "learning_rate": 5.9116891906298465e-05, "loss": 1.0596, "step": 161600 }, { "epoch": 1.65, "learning_rate": 5.9115293511331144e-05, "loss": 1.2705, "step": 161700 }, { "epoch": 1.65, "learning_rate": 5.9113693692799724e-05, "loss": 1.0786, "step": 161800 }, { "epoch": 1.65, "learning_rate": 5.911209245078242e-05, "loss": 1.1559, "step": 161900 }, { "epoch": 1.65, "learning_rate": 5.911048978535753e-05, "loss": 1.1835, "step": 162000 }, { "epoch": 1.65, "learning_rate": 5.910888569660341e-05, "loss": 1.0689, "step": 162100 }, { "epoch": 1.65, "learning_rate": 5.91072801845985e-05, "loss": 1.3089, "step": 162200 }, { "epoch": 1.65, "learning_rate": 5.910567324942129e-05, "loss": 1.0768, "step": 162300 }, { "epoch": 1.65, "learning_rate": 5.9104064891150344e-05, "loss": 1.0874, "step": 162400 }, { "epoch": 1.66, "learning_rate": 5.910245510986432e-05, "loss": 1.1226, "step": 162500 }, { "epoch": 1.66, "learning_rate": 5.910084390564191e-05, "loss": 1.077, "step": 162600 }, { "epoch": 1.66, "learning_rate": 5.909923127856189e-05, "loss": 1.1045, "step": 162700 }, { "epoch": 1.66, "learning_rate": 5.9097617228703125e-05, "loss": 1.2107, "step": 162800 }, { "epoch": 1.66, "learning_rate": 5.9096001756144524e-05, "loss": 1.1376, "step": 162900 }, { "epoch": 1.66, "learning_rate": 5.9094384860965065e-05, "loss": 1.0462, "step": 163000 }, { "epoch": 1.66, "learning_rate": 5.909276654324382e-05, "loss": 1.1375, "step": 163100 }, { "epoch": 1.66, "learning_rate": 5.909114680305991e-05, "loss": 1.1075, "step": 163200 }, { "epoch": 1.66, "learning_rate": 5.908952564049252e-05, "loss": 1.1731, "step": 163300 }, { "epoch": 1.66, "learning_rate": 5.908790305562092e-05, "loss": 1.1392, "step": 163400 }, { "epoch": 1.67, "learning_rate": 5.908627904852446e-05, "loss": 1.0818, "step": 163500 }, { "epoch": 1.67, "learning_rate": 5.908465361928253e-05, "loss": 1.1729, "step": 163600 }, { "epoch": 1.67, "learning_rate": 5.90830267679746e-05, "loss": 1.0986, "step": 163700 }, { "epoch": 1.67, "learning_rate": 5.9081398494680226e-05, "loss": 1.1971, "step": 163800 }, { "epoch": 1.67, "learning_rate": 5.907976879947901e-05, "loss": 1.0444, "step": 163900 }, { "epoch": 1.67, "learning_rate": 5.907813768245064e-05, "loss": 1.1125, "step": 164000 }, { "epoch": 1.67, "learning_rate": 5.9076505143674856e-05, "loss": 1.0496, "step": 164100 }, { "epoch": 1.67, "learning_rate": 5.907487118323151e-05, "loss": 1.1377, "step": 164200 }, { "epoch": 1.67, "learning_rate": 5.907323580120046e-05, "loss": 1.2694, "step": 164300 }, { "epoch": 1.67, "learning_rate": 5.9071598997661684e-05, "loss": 1.1462, "step": 164400 }, { "epoch": 1.68, "learning_rate": 5.906996077269521e-05, "loss": 1.1246, "step": 164500 }, { "epoch": 1.68, "learning_rate": 5.9068321126381126e-05, "loss": 1.1021, "step": 164600 }, { "epoch": 1.68, "learning_rate": 5.9066696476510437e-05, "loss": 1.0859, "step": 164700 }, { "epoch": 1.68, "learning_rate": 5.906505400195321e-05, "loss": 0.999, "step": 164800 }, { "epoch": 1.68, "learning_rate": 5.9063410106288284e-05, "loss": 1.0934, "step": 164900 }, { "epoch": 1.68, "learning_rate": 5.906176478959605e-05, "loss": 1.1116, "step": 165000 }, { "epoch": 1.68, "learning_rate": 5.906011805195696e-05, "loss": 1.0252, "step": 165100 }, { "epoch": 1.68, "learning_rate": 5.9058469893451504e-05, "loss": 1.15, "step": 165200 }, { "epoch": 1.68, "learning_rate": 5.905682031416028e-05, "loss": 1.1213, "step": 165300 }, { "epoch": 1.69, "learning_rate": 5.9055169314163956e-05, "loss": 1.1387, "step": 165400 }, { "epoch": 1.69, "learning_rate": 5.905351689354324e-05, "loss": 1.0706, "step": 165500 }, { "epoch": 1.69, "learning_rate": 5.905186305237893e-05, "loss": 1.1451, "step": 165600 }, { "epoch": 1.69, "learning_rate": 5.905020779075188e-05, "loss": 1.1136, "step": 165700 }, { "epoch": 1.69, "learning_rate": 5.904855110874305e-05, "loss": 1.1479, "step": 165800 }, { "epoch": 1.69, "learning_rate": 5.904689300643341e-05, "loss": 1.0833, "step": 165900 }, { "epoch": 1.69, "learning_rate": 5.904523348390405e-05, "loss": 1.124, "step": 166000 }, { "epoch": 1.69, "learning_rate": 5.904357254123611e-05, "loss": 1.0448, "step": 166100 }, { "epoch": 1.69, "learning_rate": 5.9041910178510786e-05, "loss": 1.1762, "step": 166200 }, { "epoch": 1.69, "learning_rate": 5.904024639580937e-05, "loss": 1.1621, "step": 166300 }, { "epoch": 1.7, "learning_rate": 5.903858119321321e-05, "loss": 1.1899, "step": 166400 }, { "epoch": 1.7, "learning_rate": 5.903691457080373e-05, "loss": 1.1389, "step": 166500 }, { "epoch": 1.7, "learning_rate": 5.90352465286624e-05, "loss": 1.1143, "step": 166600 }, { "epoch": 1.7, "learning_rate": 5.903357706687079e-05, "loss": 1.0186, "step": 166700 }, { "epoch": 1.7, "learning_rate": 5.9031922901350724e-05, "loss": 1.1633, "step": 166800 }, { "epoch": 1.7, "learning_rate": 5.903025061469797e-05, "loss": 1.1949, "step": 166900 }, { "epoch": 1.7, "learning_rate": 5.90285769086392e-05, "loss": 1.1562, "step": 167000 }, { "epoch": 1.7, "learning_rate": 5.902690178325626e-05, "loss": 1.1467, "step": 167100 }, { "epoch": 1.7, "learning_rate": 5.9025225238631035e-05, "loss": 1.1466, "step": 167200 }, { "epoch": 1.7, "learning_rate": 5.902354727484552e-05, "loss": 1.0918, "step": 167300 }, { "epoch": 1.71, "learning_rate": 5.9021867891981734e-05, "loss": 1.0937, "step": 167400 }, { "epoch": 1.71, "learning_rate": 5.902018709012181e-05, "loss": 1.184, "step": 167500 }, { "epoch": 1.71, "learning_rate": 5.901850486934792e-05, "loss": 1.0813, "step": 167600 }, { "epoch": 1.71, "learning_rate": 5.901682122974231e-05, "loss": 1.056, "step": 167700 }, { "epoch": 1.71, "learning_rate": 5.901513617138731e-05, "loss": 1.0522, "step": 167800 }, { "epoch": 1.71, "learning_rate": 5.90134496943653e-05, "loss": 1.2201, "step": 167900 }, { "epoch": 1.71, "learning_rate": 5.9011761798758754e-05, "loss": 1.0886, "step": 168000 }, { "epoch": 1.71, "learning_rate": 5.901007248465018e-05, "loss": 1.2309, "step": 168100 }, { "epoch": 1.71, "learning_rate": 5.9008381752122184e-05, "loss": 1.0794, "step": 168200 }, { "epoch": 1.71, "learning_rate": 5.9006689601257436e-05, "loss": 1.17, "step": 168300 }, { "epoch": 1.72, "learning_rate": 5.900499603213867e-05, "loss": 1.1347, "step": 168400 }, { "epoch": 1.72, "learning_rate": 5.9003301044848694e-05, "loss": 1.0973, "step": 168500 }, { "epoch": 1.72, "learning_rate": 5.900160463947037e-05, "loss": 1.149, "step": 168600 }, { "epoch": 1.72, "learning_rate": 5.899990681608666e-05, "loss": 1.0234, "step": 168700 }, { "epoch": 1.72, "learning_rate": 5.899820757478057e-05, "loss": 1.0795, "step": 168800 }, { "epoch": 1.72, "learning_rate": 5.899650691563518e-05, "loss": 1.1501, "step": 168900 }, { "epoch": 1.72, "learning_rate": 5.899480483873364e-05, "loss": 1.0991, "step": 169000 }, { "epoch": 1.72, "learning_rate": 5.8993101344159175e-05, "loss": 1.0569, "step": 169100 }, { "epoch": 1.72, "learning_rate": 5.8991396431995085e-05, "loss": 1.1131, "step": 169200 }, { "epoch": 1.72, "learning_rate": 5.89897071726378e-05, "loss": 1.1271, "step": 169300 }, { "epoch": 1.73, "learning_rate": 5.89879994397184e-05, "loss": 1.1876, "step": 169400 }, { "epoch": 1.73, "learning_rate": 5.898629028945882e-05, "loss": 1.0453, "step": 169500 }, { "epoch": 1.73, "learning_rate": 5.898457972194263e-05, "loss": 1.1227, "step": 169600 }, { "epoch": 1.73, "learning_rate": 5.898286773725345e-05, "loss": 1.0569, "step": 169700 }, { "epoch": 1.73, "learning_rate": 5.8981154335475006e-05, "loss": 1.1641, "step": 169800 }, { "epoch": 1.73, "learning_rate": 5.897943951669107e-05, "loss": 1.0877, "step": 169900 }, { "epoch": 1.73, "learning_rate": 5.897772328098548e-05, "loss": 1.0179, "step": 170000 }, { "epoch": 1.73, "learning_rate": 5.897600562844215e-05, "loss": 1.0443, "step": 170100 }, { "epoch": 1.73, "learning_rate": 5.897428655914506e-05, "loss": 0.9895, "step": 170200 }, { "epoch": 1.74, "learning_rate": 5.8972566073178265e-05, "loss": 0.973, "step": 170300 }, { "epoch": 1.74, "learning_rate": 5.89708441706259e-05, "loss": 1.103, "step": 170400 }, { "epoch": 1.74, "learning_rate": 5.8969120851572135e-05, "loss": 1.1178, "step": 170500 }, { "epoch": 1.74, "learning_rate": 5.8967396116101244e-05, "loss": 1.138, "step": 170600 }, { "epoch": 1.74, "learning_rate": 5.896566996429755e-05, "loss": 1.0396, "step": 170700 }, { "epoch": 1.74, "learning_rate": 5.896394239624544e-05, "loss": 1.0276, "step": 170800 }, { "epoch": 1.74, "learning_rate": 5.896221341202942e-05, "loss": 1.0412, "step": 170900 }, { "epoch": 1.74, "learning_rate": 5.896048301173398e-05, "loss": 1.1085, "step": 171000 }, { "epoch": 1.74, "learning_rate": 5.895875119544376e-05, "loss": 1.1414, "step": 171100 }, { "epoch": 1.74, "learning_rate": 5.895701796324341e-05, "loss": 1.1306, "step": 171200 }, { "epoch": 1.75, "learning_rate": 5.89552833152177e-05, "loss": 1.0438, "step": 171300 }, { "epoch": 1.75, "learning_rate": 5.895354725145143e-05, "loss": 1.0865, "step": 171400 }, { "epoch": 1.75, "learning_rate": 5.895182715383093e-05, "loss": 1.0463, "step": 171500 }, { "epoch": 1.75, "learning_rate": 5.895008827299354e-05, "loss": 0.988, "step": 171600 }, { "epoch": 1.75, "learning_rate": 5.894834797666961e-05, "loss": 1.1612, "step": 171700 }, { "epoch": 1.75, "learning_rate": 5.894660626494422e-05, "loss": 0.998, "step": 171800 }, { "epoch": 1.75, "learning_rate": 5.8944863137902525e-05, "loss": 1.2281, "step": 171900 }, { "epoch": 1.75, "learning_rate": 5.894311859562975e-05, "loss": 1.1075, "step": 172000 }, { "epoch": 1.75, "learning_rate": 5.89413726382112e-05, "loss": 0.9906, "step": 172100 }, { "epoch": 1.75, "learning_rate": 5.893962526573225e-05, "loss": 1.0387, "step": 172200 }, { "epoch": 1.76, "learning_rate": 5.8937876478278324e-05, "loss": 1.0356, "step": 172300 }, { "epoch": 1.76, "learning_rate": 5.893612627593492e-05, "loss": 1.0154, "step": 172400 }, { "epoch": 1.76, "learning_rate": 5.893437465878763e-05, "loss": 1.1299, "step": 172500 }, { "epoch": 1.76, "learning_rate": 5.8932621626922094e-05, "loss": 1.0584, "step": 172600 }, { "epoch": 1.76, "learning_rate": 5.8930867180424027e-05, "loss": 1.1945, "step": 172700 }, { "epoch": 1.76, "learning_rate": 5.8929111319379195e-05, "loss": 1.2254, "step": 172800 }, { "epoch": 1.76, "learning_rate": 5.892735404387346e-05, "loss": 1.0428, "step": 172900 }, { "epoch": 1.76, "learning_rate": 5.892559535399275e-05, "loss": 1.0948, "step": 173000 }, { "epoch": 1.76, "learning_rate": 5.892383524982303e-05, "loss": 0.9721, "step": 173100 }, { "epoch": 1.76, "learning_rate": 5.892207373145039e-05, "loss": 1.0578, "step": 173200 }, { "epoch": 1.77, "learning_rate": 5.892031079896094e-05, "loss": 1.1486, "step": 173300 }, { "epoch": 1.77, "learning_rate": 5.8918546452440874e-05, "loss": 1.0789, "step": 173400 }, { "epoch": 1.77, "learning_rate": 5.891679835657985e-05, "loss": 1.1121, "step": 173500 }, { "epoch": 1.77, "learning_rate": 5.8915031196395586e-05, "loss": 1.1555, "step": 173600 }, { "epoch": 1.77, "learning_rate": 5.891326262243885e-05, "loss": 1.1175, "step": 173700 }, { "epoch": 1.77, "learning_rate": 5.891149263479612e-05, "loss": 1.0122, "step": 173800 }, { "epoch": 1.77, "learning_rate": 5.8909721233553926e-05, "loss": 1.1746, "step": 173900 }, { "epoch": 1.77, "learning_rate": 5.8907948418798904e-05, "loss": 1.0349, "step": 174000 }, { "epoch": 1.77, "learning_rate": 5.890617419061771e-05, "loss": 1.0381, "step": 174100 }, { "epoch": 1.77, "learning_rate": 5.89043985490971e-05, "loss": 1.012, "step": 174200 }, { "epoch": 1.78, "learning_rate": 5.8902621494323886e-05, "loss": 1.0617, "step": 174300 }, { "epoch": 1.78, "learning_rate": 5.8900843026384976e-05, "loss": 1.0212, "step": 174400 }, { "epoch": 1.78, "learning_rate": 5.8899063145367305e-05, "loss": 1.1118, "step": 174500 }, { "epoch": 1.78, "learning_rate": 5.8897281851357915e-05, "loss": 0.9886, "step": 174600 }, { "epoch": 1.78, "learning_rate": 5.889549914444388e-05, "loss": 1.0445, "step": 174700 }, { "epoch": 1.78, "learning_rate": 5.8893715024712385e-05, "loss": 1.1851, "step": 174800 }, { "epoch": 1.78, "learning_rate": 5.889192949225065e-05, "loss": 0.9408, "step": 174900 }, { "epoch": 1.78, "learning_rate": 5.889014254714598e-05, "loss": 1.1603, "step": 175000 }, { "epoch": 1.78, "learning_rate": 5.888835418948576e-05, "loss": 0.9967, "step": 175100 }, { "epoch": 1.78, "learning_rate": 5.88865644193574e-05, "loss": 1.1263, "step": 175200 }, { "epoch": 1.79, "learning_rate": 5.888479115566452e-05, "loss": 1.0883, "step": 175300 }, { "epoch": 1.79, "learning_rate": 5.888299857498501e-05, "loss": 1.1163, "step": 175400 }, { "epoch": 1.79, "learning_rate": 5.888120458209924e-05, "loss": 1.0606, "step": 175500 }, { "epoch": 1.79, "learning_rate": 5.887940917709491e-05, "loss": 1.1004, "step": 175600 }, { "epoch": 1.79, "learning_rate": 5.8877612360059835e-05, "loss": 1.0697, "step": 175700 }, { "epoch": 1.79, "learning_rate": 5.8875814131081826e-05, "loss": 1.0931, "step": 175800 }, { "epoch": 1.79, "learning_rate": 5.887401449024884e-05, "loss": 1.2231, "step": 175900 }, { "epoch": 1.79, "learning_rate": 5.887221343764886e-05, "loss": 1.1316, "step": 176000 }, { "epoch": 1.79, "learning_rate": 5.8870429005000236e-05, "loss": 0.9989, "step": 176100 }, { "epoch": 1.8, "learning_rate": 5.886862514324597e-05, "loss": 1.2651, "step": 176200 }, { "epoch": 1.8, "learning_rate": 5.886681986998823e-05, "loss": 0.9657, "step": 176300 }, { "epoch": 1.8, "learning_rate": 5.886501318531526e-05, "loss": 1.1431, "step": 176400 }, { "epoch": 1.8, "learning_rate": 5.88632050893154e-05, "loss": 1.0394, "step": 176500 }, { "epoch": 1.8, "learning_rate": 5.8861395582077055e-05, "loss": 1.0307, "step": 176600 }, { "epoch": 1.8, "learning_rate": 5.885960277985749e-05, "loss": 1.0666, "step": 176700 }, { "epoch": 1.8, "learning_rate": 5.885779046451785e-05, "loss": 1.0585, "step": 176800 }, { "epoch": 1.8, "learning_rate": 5.885597673820447e-05, "loss": 1.1727, "step": 176900 }, { "epoch": 1.8, "learning_rate": 5.885416160100604e-05, "loss": 1.1319, "step": 177000 }, { "epoch": 1.8, "learning_rate": 5.885234505301129e-05, "loss": 1.0306, "step": 177100 }, { "epoch": 1.81, "learning_rate": 5.885052709430905e-05, "loss": 1.0385, "step": 177200 }, { "epoch": 1.81, "learning_rate": 5.88487077249882e-05, "loss": 1.0999, "step": 177300 }, { "epoch": 1.81, "learning_rate": 5.8846886945137715e-05, "loss": 0.9836, "step": 177400 }, { "epoch": 1.81, "learning_rate": 5.88450647548466e-05, "loss": 1.1406, "step": 177500 }, { "epoch": 1.81, "learning_rate": 5.884324115420395e-05, "loss": 1.0799, "step": 177600 }, { "epoch": 1.81, "learning_rate": 5.884141614329895e-05, "loss": 1.1657, "step": 177700 }, { "epoch": 1.81, "learning_rate": 5.8839589722220814e-05, "loss": 1.0296, "step": 177800 }, { "epoch": 1.81, "learning_rate": 5.883776189105885e-05, "loss": 1.0576, "step": 177900 }, { "epoch": 1.81, "learning_rate": 5.883593264990241e-05, "loss": 1.1511, "step": 178000 }, { "epoch": 1.81, "learning_rate": 5.883410199884096e-05, "loss": 1.057, "step": 178100 }, { "epoch": 1.82, "learning_rate": 5.8832269937963985e-05, "loss": 1.0776, "step": 178200 }, { "epoch": 1.82, "learning_rate": 5.883043646736108e-05, "loss": 1.1126, "step": 178300 }, { "epoch": 1.82, "learning_rate": 5.8828601587121874e-05, "loss": 0.9161, "step": 178400 }, { "epoch": 1.82, "learning_rate": 5.88267652973361e-05, "loss": 1.0497, "step": 178500 }, { "epoch": 1.82, "learning_rate": 5.882492759809353e-05, "loss": 1.0773, "step": 178600 }, { "epoch": 1.82, "learning_rate": 5.882308848948401e-05, "loss": 1.1272, "step": 178700 }, { "epoch": 1.82, "learning_rate": 5.882124797159748e-05, "loss": 1.0945, "step": 178800 }, { "epoch": 1.82, "learning_rate": 5.881940604452392e-05, "loss": 1.1188, "step": 178900 }, { "epoch": 1.82, "learning_rate": 5.8817562708353386e-05, "loss": 1.0852, "step": 179000 }, { "epoch": 1.82, "learning_rate": 5.881571796317601e-05, "loss": 1.0829, "step": 179100 }, { "epoch": 1.83, "learning_rate": 5.881387180908199e-05, "loss": 1.0352, "step": 179200 }, { "epoch": 1.83, "learning_rate": 5.881202424616159e-05, "loss": 1.1804, "step": 179300 }, { "epoch": 1.83, "learning_rate": 5.881017527450515e-05, "loss": 1.1625, "step": 179400 }, { "epoch": 1.83, "learning_rate": 5.880832489420306e-05, "loss": 1.0713, "step": 179500 }, { "epoch": 1.83, "learning_rate": 5.8806473105345806e-05, "loss": 0.9646, "step": 179600 }, { "epoch": 1.83, "learning_rate": 5.880461990802393e-05, "loss": 1.1763, "step": 179700 }, { "epoch": 1.83, "learning_rate": 5.880276530232803e-05, "loss": 1.1165, "step": 179800 }, { "epoch": 1.83, "learning_rate": 5.8800909288348805e-05, "loss": 1.1069, "step": 179900 }, { "epoch": 1.83, "learning_rate": 5.879905186617697e-05, "loss": 1.0449, "step": 180000 }, { "epoch": 1.83, "learning_rate": 5.879719303590338e-05, "loss": 1.1108, "step": 180100 }, { "epoch": 1.84, "learning_rate": 5.87953327976189e-05, "loss": 0.9414, "step": 180200 }, { "epoch": 1.84, "learning_rate": 5.8793471151414484e-05, "loss": 1.0686, "step": 180300 }, { "epoch": 1.84, "learning_rate": 5.8791608097381165e-05, "loss": 1.0198, "step": 180400 }, { "epoch": 1.84, "learning_rate": 5.8789743635610015e-05, "loss": 1.0388, "step": 180500 }, { "epoch": 1.84, "learning_rate": 5.878787776619223e-05, "loss": 1.0546, "step": 180600 }, { "epoch": 1.84, "learning_rate": 5.8786010489219e-05, "loss": 1.0832, "step": 180700 }, { "epoch": 1.84, "learning_rate": 5.8784141804781654e-05, "loss": 1.1233, "step": 180800 }, { "epoch": 1.84, "learning_rate": 5.8782271712971554e-05, "loss": 1.0932, "step": 180900 }, { "epoch": 1.84, "learning_rate": 5.878040021388012e-05, "loss": 0.9733, "step": 181000 }, { "epoch": 1.85, "learning_rate": 5.877852730759887e-05, "loss": 0.9454, "step": 181100 }, { "epoch": 1.85, "learning_rate": 5.8776652994219373e-05, "loss": 0.9843, "step": 181200 }, { "epoch": 1.85, "learning_rate": 5.877477727383328e-05, "loss": 1.0707, "step": 181300 }, { "epoch": 1.85, "learning_rate": 5.8772900146532296e-05, "loss": 1.1087, "step": 181400 }, { "epoch": 1.85, "learning_rate": 5.877104040471292e-05, "loss": 1.0862, "step": 181500 }, { "epoch": 1.85, "learning_rate": 5.876916047792442e-05, "loss": 1.0858, "step": 181600 }, { "epoch": 1.85, "learning_rate": 5.876727914449566e-05, "loss": 1.0127, "step": 181700 }, { "epoch": 1.85, "learning_rate": 5.876539640451863e-05, "loss": 0.997, "step": 181800 }, { "epoch": 1.85, "learning_rate": 5.8763512258085376e-05, "loss": 0.9871, "step": 181900 }, { "epoch": 1.85, "learning_rate": 5.876162670528802e-05, "loss": 1.087, "step": 182000 }, { "epoch": 1.86, "learning_rate": 5.875973974621876e-05, "loss": 0.9743, "step": 182100 }, { "epoch": 1.86, "learning_rate": 5.8757851380969854e-05, "loss": 1.0385, "step": 182200 }, { "epoch": 1.86, "learning_rate": 5.875596160963364e-05, "loss": 1.1426, "step": 182300 }, { "epoch": 1.86, "learning_rate": 5.87540704323025e-05, "loss": 1.1371, "step": 182400 }, { "epoch": 1.86, "learning_rate": 5.8752177849068916e-05, "loss": 1.103, "step": 182500 }, { "epoch": 1.86, "learning_rate": 5.875028386002542e-05, "loss": 1.1149, "step": 182600 }, { "epoch": 1.86, "learning_rate": 5.874838846526462e-05, "loss": 1.0722, "step": 182700 }, { "epoch": 1.86, "learning_rate": 5.874649166487918e-05, "loss": 1.1381, "step": 182800 }, { "epoch": 1.86, "learning_rate": 5.8744593458961856e-05, "loss": 0.9331, "step": 182900 }, { "epoch": 1.86, "learning_rate": 5.874269384760544e-05, "loss": 1.0214, "step": 183000 }, { "epoch": 1.87, "learning_rate": 5.874079283090283e-05, "loss": 1.1014, "step": 183100 }, { "epoch": 1.87, "learning_rate": 5.873889040894696e-05, "loss": 1.1651, "step": 183200 }, { "epoch": 1.87, "learning_rate": 5.873698658183086e-05, "loss": 1.0628, "step": 183300 }, { "epoch": 1.87, "learning_rate": 5.8735081349647605e-05, "loss": 1.0845, "step": 183400 }, { "epoch": 1.87, "learning_rate": 5.8733174712490353e-05, "loss": 0.9792, "step": 183500 }, { "epoch": 1.87, "learning_rate": 5.873126667045233e-05, "loss": 0.984, "step": 183600 }, { "epoch": 1.87, "learning_rate": 5.8729357223626825e-05, "loss": 0.9166, "step": 183700 }, { "epoch": 1.87, "learning_rate": 5.872744637210721e-05, "loss": 1.0896, "step": 183800 }, { "epoch": 1.87, "learning_rate": 5.8725534115986896e-05, "loss": 1.0412, "step": 183900 }, { "epoch": 1.87, "learning_rate": 5.8723620455359386e-05, "loss": 1.0795, "step": 184000 }, { "epoch": 1.88, "learning_rate": 5.872170539031825e-05, "loss": 0.981, "step": 184100 }, { "epoch": 1.88, "learning_rate": 5.8719788920957125e-05, "loss": 1.1081, "step": 184200 }, { "epoch": 1.88, "learning_rate": 5.8717871047369704e-05, "loss": 1.1002, "step": 184300 }, { "epoch": 1.88, "learning_rate": 5.8715951769649777e-05, "loss": 1.0586, "step": 184400 }, { "epoch": 1.88, "learning_rate": 5.871403108789118e-05, "loss": 0.9811, "step": 184500 }, { "epoch": 1.88, "learning_rate": 5.8712109002187806e-05, "loss": 1.1085, "step": 184600 }, { "epoch": 1.88, "learning_rate": 5.8710185512633655e-05, "loss": 1.053, "step": 184700 }, { "epoch": 1.88, "learning_rate": 5.870826061932277e-05, "loss": 0.9772, "step": 184800 }, { "epoch": 1.88, "learning_rate": 5.8706334322349254e-05, "loss": 1.0585, "step": 184900 }, { "epoch": 1.88, "learning_rate": 5.87044066218073e-05, "loss": 1.0163, "step": 185000 }, { "epoch": 1.89, "learning_rate": 5.870247751779117e-05, "loss": 1.043, "step": 185100 }, { "epoch": 1.89, "learning_rate": 5.8700547010395175e-05, "loss": 1.0392, "step": 185200 }, { "epoch": 1.89, "learning_rate": 5.86986150997137e-05, "loss": 0.9951, "step": 185300 }, { "epoch": 1.89, "learning_rate": 5.8696681785841225e-05, "loss": 0.958, "step": 185400 }, { "epoch": 1.89, "learning_rate": 5.869474706887224e-05, "loss": 1.0135, "step": 185500 }, { "epoch": 1.89, "learning_rate": 5.869281094890138e-05, "loss": 1.0654, "step": 185600 }, { "epoch": 1.89, "learning_rate": 5.8690873426023295e-05, "loss": 0.9894, "step": 185700 }, { "epoch": 1.89, "learning_rate": 5.868893450033272e-05, "loss": 1.0581, "step": 185800 }, { "epoch": 1.89, "learning_rate": 5.868699417192445e-05, "loss": 1.0247, "step": 185900 }, { "epoch": 1.9, "learning_rate": 5.8685052440893355e-05, "loss": 1.2023, "step": 186000 }, { "epoch": 1.9, "learning_rate": 5.8683109307334384e-05, "loss": 0.9881, "step": 186100 }, { "epoch": 1.9, "learning_rate": 5.8681164771342535e-05, "loss": 1.0777, "step": 186200 }, { "epoch": 1.9, "learning_rate": 5.86792188330129e-05, "loss": 1.0457, "step": 186300 }, { "epoch": 1.9, "learning_rate": 5.8677271492440604e-05, "loss": 0.9898, "step": 186400 }, { "epoch": 1.9, "learning_rate": 5.8675342244088385e-05, "loss": 1.0059, "step": 186500 }, { "epoch": 1.9, "learning_rate": 5.867339211333655e-05, "loss": 1.1224, "step": 186600 }, { "epoch": 1.9, "learning_rate": 5.867144058062695e-05, "loss": 1.0864, "step": 186700 }, { "epoch": 1.9, "learning_rate": 5.8669487646055e-05, "loss": 1.0137, "step": 186800 }, { "epoch": 1.9, "learning_rate": 5.8667533309716205e-05, "loss": 1.1196, "step": 186900 }, { "epoch": 1.91, "learning_rate": 5.866557757170611e-05, "loss": 1.1045, "step": 187000 }, { "epoch": 1.91, "learning_rate": 5.866362043212033e-05, "loss": 1.0339, "step": 187100 }, { "epoch": 1.91, "learning_rate": 5.866166189105457e-05, "loss": 1.0753, "step": 187200 }, { "epoch": 1.91, "learning_rate": 5.86597019486046e-05, "loss": 1.0649, "step": 187300 }, { "epoch": 1.91, "learning_rate": 5.8657740604866216e-05, "loss": 1.0005, "step": 187400 }, { "epoch": 1.91, "learning_rate": 5.865579749432024e-05, "loss": 1.0381, "step": 187500 }, { "epoch": 1.91, "learning_rate": 5.865383336230334e-05, "loss": 1.0746, "step": 187600 }, { "epoch": 1.91, "learning_rate": 5.8651867829284974e-05, "loss": 1.0191, "step": 187700 }, { "epoch": 1.91, "learning_rate": 5.864990089536127e-05, "loss": 1.094, "step": 187800 }, { "epoch": 1.91, "learning_rate": 5.864793256062837e-05, "loss": 1.1974, "step": 187900 }, { "epoch": 1.92, "learning_rate": 5.8645962825182536e-05, "loss": 1.1302, "step": 188000 }, { "epoch": 1.92, "learning_rate": 5.864399168912007e-05, "loss": 1.059, "step": 188100 }, { "epoch": 1.92, "learning_rate": 5.8642019152537356e-05, "loss": 1.1059, "step": 188200 }, { "epoch": 1.92, "learning_rate": 5.8640045215530824e-05, "loss": 1.0466, "step": 188300 }, { "epoch": 1.92, "learning_rate": 5.8638069878196996e-05, "loss": 1.033, "step": 188400 }, { "epoch": 1.92, "learning_rate": 5.863609314063246e-05, "loss": 1.1171, "step": 188500 }, { "epoch": 1.92, "learning_rate": 5.8634115002933854e-05, "loss": 1.0081, "step": 188600 }, { "epoch": 1.92, "learning_rate": 5.863213546519791e-05, "loss": 0.9977, "step": 188700 }, { "epoch": 1.92, "learning_rate": 5.86301545275214e-05, "loss": 0.9932, "step": 188800 }, { "epoch": 1.92, "learning_rate": 5.86281721900012e-05, "loss": 1.039, "step": 188900 }, { "epoch": 1.93, "learning_rate": 5.862618845273421e-05, "loss": 1.1327, "step": 189000 }, { "epoch": 1.93, "learning_rate": 5.862420331581745e-05, "loss": 1.1378, "step": 189100 }, { "epoch": 1.93, "learning_rate": 5.862221677934796e-05, "loss": 1.1226, "step": 189200 }, { "epoch": 1.93, "learning_rate": 5.862022884342289e-05, "loss": 1.0431, "step": 189300 }, { "epoch": 1.93, "learning_rate": 5.861823950813941e-05, "loss": 1.1207, "step": 189400 }, { "epoch": 1.93, "learning_rate": 5.861624877359481e-05, "loss": 1.0674, "step": 189500 }, { "epoch": 1.93, "learning_rate": 5.8614256639886424e-05, "loss": 1.0298, "step": 189600 }, { "epoch": 1.93, "learning_rate": 5.861226310711164e-05, "loss": 1.0787, "step": 189700 }, { "epoch": 1.93, "learning_rate": 5.861026817536795e-05, "loss": 1.0701, "step": 189800 }, { "epoch": 1.93, "learning_rate": 5.860829181498311e-05, "loss": 1.0185, "step": 189900 }, { "epoch": 1.94, "learning_rate": 5.860629409958152e-05, "loss": 1.1304, "step": 190000 }, { "epoch": 1.94, "learning_rate": 5.860429498550287e-05, "loss": 1.0306, "step": 190100 }, { "epoch": 1.94, "learning_rate": 5.860229447284489e-05, "loss": 1.1154, "step": 190200 }, { "epoch": 1.94, "learning_rate": 5.8600292561705406e-05, "loss": 0.9679, "step": 190300 }, { "epoch": 1.94, "learning_rate": 5.859828925218228e-05, "loss": 1.0666, "step": 190400 }, { "epoch": 1.94, "learning_rate": 5.859628454437349e-05, "loss": 0.9944, "step": 190500 }, { "epoch": 1.94, "learning_rate": 5.859427843837703e-05, "loss": 0.9664, "step": 190600 }, { "epoch": 1.94, "learning_rate": 5.8592270934291e-05, "loss": 0.9415, "step": 190700 }, { "epoch": 1.94, "learning_rate": 5.8590262032213554e-05, "loss": 1.0905, "step": 190800 }, { "epoch": 1.94, "learning_rate": 5.85882517322429e-05, "loss": 0.9721, "step": 190900 }, { "epoch": 1.95, "learning_rate": 5.858624003447736e-05, "loss": 0.9859, "step": 191000 }, { "epoch": 1.95, "learning_rate": 5.858422693901526e-05, "loss": 1.0749, "step": 191100 }, { "epoch": 1.95, "learning_rate": 5.858221244595505e-05, "loss": 1.0592, "step": 191200 }, { "epoch": 1.95, "learning_rate": 5.858019655539522e-05, "loss": 1.1336, "step": 191300 }, { "epoch": 1.95, "learning_rate": 5.8578179267434346e-05, "loss": 1.0255, "step": 191400 }, { "epoch": 1.95, "learning_rate": 5.857616058217104e-05, "loss": 0.9245, "step": 191500 }, { "epoch": 1.95, "learning_rate": 5.8574140499704026e-05, "loss": 0.9219, "step": 191600 }, { "epoch": 1.95, "learning_rate": 5.8572119020132055e-05, "loss": 1.0034, "step": 191700 }, { "epoch": 1.95, "learning_rate": 5.857009614355397e-05, "loss": 1.0574, "step": 191800 }, { "epoch": 1.96, "learning_rate": 5.8568071870068686e-05, "loss": 0.9735, "step": 191900 }, { "epoch": 1.96, "learning_rate": 5.856604619977518e-05, "loss": 0.9821, "step": 192000 }, { "epoch": 1.96, "learning_rate": 5.856401913277247e-05, "loss": 1.0241, "step": 192100 }, { "epoch": 1.96, "learning_rate": 5.85619906691597e-05, "loss": 1.0664, "step": 192200 }, { "epoch": 1.96, "learning_rate": 5.855996080903603e-05, "loss": 1.0637, "step": 192300 }, { "epoch": 1.96, "learning_rate": 5.8557929552500716e-05, "loss": 0.9362, "step": 192400 }, { "epoch": 1.96, "learning_rate": 5.8555896899653075e-05, "loss": 1.0622, "step": 192500 }, { "epoch": 1.96, "learning_rate": 5.8553862850592476e-05, "loss": 0.9776, "step": 192600 }, { "epoch": 1.96, "learning_rate": 5.85518274054184e-05, "loss": 0.9896, "step": 192700 }, { "epoch": 1.96, "learning_rate": 5.854979056423034e-05, "loss": 1.0154, "step": 192800 }, { "epoch": 1.97, "learning_rate": 5.85477523271279e-05, "loss": 1.0719, "step": 192900 }, { "epoch": 1.97, "learning_rate": 5.854571269421074e-05, "loss": 1.0874, "step": 193000 }, { "epoch": 1.97, "learning_rate": 5.8543671665578575e-05, "loss": 1.0047, "step": 193100 }, { "epoch": 1.97, "learning_rate": 5.854162924133121e-05, "loss": 0.9661, "step": 193200 }, { "epoch": 1.97, "learning_rate": 5.85395854215685e-05, "loss": 1.0626, "step": 193300 }, { "epoch": 1.97, "learning_rate": 5.853754020639038e-05, "loss": 0.9647, "step": 193400 }, { "epoch": 1.97, "learning_rate": 5.853549359589684e-05, "loss": 1.0495, "step": 193500 }, { "epoch": 1.97, "learning_rate": 5.853344559018797e-05, "loss": 1.0336, "step": 193600 }, { "epoch": 1.97, "learning_rate": 5.853139618936388e-05, "loss": 0.9776, "step": 193700 }, { "epoch": 1.97, "learning_rate": 5.852934539352478e-05, "loss": 1.0166, "step": 193800 }, { "epoch": 1.98, "learning_rate": 5.852729320277095e-05, "loss": 0.9768, "step": 193900 }, { "epoch": 1.98, "learning_rate": 5.852523961720272e-05, "loss": 0.9489, "step": 194000 }, { "epoch": 1.98, "learning_rate": 5.85231846369205e-05, "loss": 1.078, "step": 194100 }, { "epoch": 1.98, "learning_rate": 5.852112826202477e-05, "loss": 0.9232, "step": 194200 }, { "epoch": 1.98, "learning_rate": 5.8519070492616085e-05, "loss": 1.012, "step": 194300 }, { "epoch": 1.98, "learning_rate": 5.8517011328795035e-05, "loss": 1.0948, "step": 194400 }, { "epoch": 1.98, "learning_rate": 5.8514971383145154e-05, "loss": 1.0303, "step": 194500 }, { "epoch": 1.98, "learning_rate": 5.851290944474312e-05, "loss": 1.0161, "step": 194600 }, { "epoch": 1.98, "learning_rate": 5.851084611222996e-05, "loss": 0.9773, "step": 194700 }, { "epoch": 1.98, "learning_rate": 5.850878138570658e-05, "loss": 0.9442, "step": 194800 }, { "epoch": 1.99, "learning_rate": 5.850671526527392e-05, "loss": 1.0646, "step": 194900 }, { "epoch": 1.99, "learning_rate": 5.8504647751033e-05, "loss": 1.0325, "step": 195000 }, { "epoch": 1.99, "learning_rate": 5.850257884308492e-05, "loss": 0.9553, "step": 195100 }, { "epoch": 1.99, "learning_rate": 5.8500508541530825e-05, "loss": 0.9404, "step": 195200 }, { "epoch": 1.99, "learning_rate": 5.8498436846471934e-05, "loss": 0.8782, "step": 195300 }, { "epoch": 1.99, "learning_rate": 5.8496363758009564e-05, "loss": 1.0195, "step": 195400 }, { "epoch": 1.99, "learning_rate": 5.849428927624506e-05, "loss": 1.0409, "step": 195500 }, { "epoch": 1.99, "learning_rate": 5.849221340127986e-05, "loss": 1.0213, "step": 195600 }, { "epoch": 1.99, "learning_rate": 5.849013613321544e-05, "loss": 1.0622, "step": 195700 }, { "epoch": 1.99, "learning_rate": 5.8488057472153406e-05, "loss": 0.9559, "step": 195800 }, { "epoch": 2.0, "learning_rate": 5.848597741819536e-05, "loss": 0.9598, "step": 195900 }, { "epoch": 2.0, "learning_rate": 5.848389597144301e-05, "loss": 0.9517, "step": 196000 }, { "epoch": 2.0, "learning_rate": 5.848181313199812e-05, "loss": 1.097, "step": 196100 }, { "epoch": 2.0, "learning_rate": 5.8479728899962564e-05, "loss": 0.8822, "step": 196200 }, { "epoch": 2.0, "learning_rate": 5.84776432754382e-05, "loss": 1.0259, "step": 196300 }, { "epoch": 2.0, "learning_rate": 5.8475556258527045e-05, "loss": 0.9112, "step": 196400 }, { "epoch": 2.0, "learning_rate": 5.847346784933111e-05, "loss": 0.9234, "step": 196500 }, { "epoch": 2.0, "learning_rate": 5.847137804795252e-05, "loss": 1.066, "step": 196600 }, { "epoch": 2.0, "learning_rate": 5.846928685449345e-05, "loss": 0.903, "step": 196700 }, { "epoch": 2.01, "learning_rate": 5.846719426905615e-05, "loss": 0.9079, "step": 196800 }, { "epoch": 2.01, "learning_rate": 5.846510029174293e-05, "loss": 1.0521, "step": 196900 }, { "epoch": 2.01, "learning_rate": 5.846300492265618e-05, "loss": 0.9344, "step": 197000 }, { "epoch": 2.01, "learning_rate": 5.846090816189835e-05, "loss": 1.001, "step": 197100 }, { "epoch": 2.01, "learning_rate": 5.8458810009571964e-05, "loss": 0.9671, "step": 197200 }, { "epoch": 2.01, "learning_rate": 5.845671046577959e-05, "loss": 1.0997, "step": 197300 }, { "epoch": 2.01, "learning_rate": 5.845463054686236e-05, "loss": 1.1956, "step": 197400 }, { "epoch": 2.01, "learning_rate": 5.845252823435817e-05, "loss": 1.2579, "step": 197500 }, { "epoch": 2.01, "learning_rate": 5.845042453069514e-05, "loss": 1.0775, "step": 197600 }, { "epoch": 2.01, "learning_rate": 5.844831943597614e-05, "loss": 0.9923, "step": 197700 }, { "epoch": 2.02, "learning_rate": 5.844621295030408e-05, "loss": 1.581, "step": 197800 }, { "epoch": 2.02, "learning_rate": 5.844410507378198e-05, "loss": 1.2333, "step": 197900 }, { "epoch": 2.02, "learning_rate": 5.844199580651288e-05, "loss": 1.031, "step": 198000 }, { "epoch": 2.02, "learning_rate": 5.843988514859992e-05, "loss": 1.0202, "step": 198100 }, { "epoch": 2.02, "learning_rate": 5.843777310014628e-05, "loss": 1.0946, "step": 198200 }, { "epoch": 2.02, "learning_rate": 5.843565966125526e-05, "loss": 1.1925, "step": 198300 }, { "epoch": 2.02, "learning_rate": 5.843356598720424e-05, "loss": 1.2114, "step": 198400 }, { "epoch": 2.02, "learning_rate": 5.843144978165029e-05, "loss": 1.1792, "step": 198500 }, { "epoch": 2.02, "learning_rate": 5.842933218596812e-05, "loss": 1.0674, "step": 198600 }, { "epoch": 2.02, "learning_rate": 5.842721320026125e-05, "loss": 1.1121, "step": 198700 }, { "epoch": 2.03, "learning_rate": 5.8425092824633305e-05, "loss": 1.0193, "step": 198800 }, { "epoch": 2.03, "learning_rate": 5.842297105918796e-05, "loss": 1.0007, "step": 198900 }, { "epoch": 2.03, "learning_rate": 5.842084790402895e-05, "loss": 0.9817, "step": 199000 }, { "epoch": 2.03, "learning_rate": 5.841872335926008e-05, "loss": 1.0131, "step": 199100 }, { "epoch": 2.03, "learning_rate": 5.8416597424985226e-05, "loss": 0.9842, "step": 199200 }, { "epoch": 2.03, "learning_rate": 5.8414470101308345e-05, "loss": 1.0506, "step": 199300 }, { "epoch": 2.03, "learning_rate": 5.8412341388333444e-05, "loss": 1.0496, "step": 199400 }, { "epoch": 2.03, "learning_rate": 5.841021128616461e-05, "loss": 0.9779, "step": 199500 }, { "epoch": 2.03, "learning_rate": 5.840807979490598e-05, "loss": 1.0707, "step": 199600 }, { "epoch": 2.03, "learning_rate": 5.840594691466178e-05, "loss": 1.0097, "step": 199700 }, { "epoch": 2.04, "learning_rate": 5.8403812645536295e-05, "loss": 1.0124, "step": 199800 }, { "epoch": 2.04, "learning_rate": 5.840167698763388e-05, "loss": 1.1572, "step": 199900 }, { "epoch": 2.04, "learning_rate": 5.839953994105894e-05, "loss": 1.3573, "step": 200000 }, { "epoch": 2.04, "learning_rate": 5.8397401505915995e-05, "loss": 1.4048, "step": 200100 }, { "epoch": 2.04, "learning_rate": 5.839526168230957e-05, "loss": 1.0354, "step": 200200 }, { "epoch": 2.04, "learning_rate": 5.839312047034431e-05, "loss": 0.9075, "step": 200300 }, { "epoch": 2.04, "learning_rate": 5.8390977870124896e-05, "loss": 1.0124, "step": 200400 }, { "epoch": 2.04, "learning_rate": 5.838883388175609e-05, "loss": 1.0527, "step": 200500 }, { "epoch": 2.04, "learning_rate": 5.838668850534273e-05, "loss": 0.8876, "step": 200600 }, { "epoch": 2.04, "learning_rate": 5.83845417409897e-05, "loss": 1.0682, "step": 200700 }, { "epoch": 2.05, "learning_rate": 5.838239358880197e-05, "loss": 0.9313, "step": 200800 }, { "epoch": 2.05, "learning_rate": 5.8380244048884576e-05, "loss": 1.0174, "step": 200900 }, { "epoch": 2.05, "learning_rate": 5.83780931213426e-05, "loss": 0.9093, "step": 201000 }, { "epoch": 2.05, "learning_rate": 5.837594080628123e-05, "loss": 1.0549, "step": 201100 }, { "epoch": 2.05, "learning_rate": 5.8373787103805695e-05, "loss": 1.0736, "step": 201200 }, { "epoch": 2.05, "learning_rate": 5.837163201402129e-05, "loss": 0.9724, "step": 201300 }, { "epoch": 2.05, "learning_rate": 5.8369475537033406e-05, "loss": 0.9633, "step": 201400 }, { "epoch": 2.05, "learning_rate": 5.8367317672947455e-05, "loss": 0.9577, "step": 201500 }, { "epoch": 2.05, "learning_rate": 5.836515842186896e-05, "loss": 1.0551, "step": 201600 }, { "epoch": 2.05, "learning_rate": 5.8362997783903496e-05, "loss": 1.0248, "step": 201700 }, { "epoch": 2.06, "learning_rate": 5.83608357591567e-05, "loss": 0.981, "step": 201800 }, { "epoch": 2.06, "learning_rate": 5.835867234773428e-05, "loss": 1.0608, "step": 201900 }, { "epoch": 2.06, "learning_rate": 5.835650754974203e-05, "loss": 1.0628, "step": 202000 }, { "epoch": 2.06, "learning_rate": 5.835434136528577e-05, "loss": 0.9436, "step": 202100 }, { "epoch": 2.06, "learning_rate": 5.8352173794471435e-05, "loss": 0.9861, "step": 202200 }, { "epoch": 2.06, "learning_rate": 5.8350004837404994e-05, "loss": 1.046, "step": 202300 }, { "epoch": 2.06, "learning_rate": 5.834783449419251e-05, "loss": 1.0726, "step": 202400 }, { "epoch": 2.06, "learning_rate": 5.834566276494008e-05, "loss": 0.8784, "step": 202500 }, { "epoch": 2.06, "learning_rate": 5.83434896497539e-05, "loss": 0.9368, "step": 202600 }, { "epoch": 2.07, "learning_rate": 5.834131514874022e-05, "loss": 0.9253, "step": 202700 }, { "epoch": 2.07, "learning_rate": 5.8339139262005375e-05, "loss": 1.0317, "step": 202800 }, { "epoch": 2.07, "learning_rate": 5.833696198965572e-05, "loss": 0.8432, "step": 202900 }, { "epoch": 2.07, "learning_rate": 5.8334783331797736e-05, "loss": 0.979, "step": 203000 }, { "epoch": 2.07, "learning_rate": 5.8332603288537944e-05, "loss": 0.9048, "step": 203100 }, { "epoch": 2.07, "learning_rate": 5.833042185998293e-05, "loss": 0.935, "step": 203200 }, { "epoch": 2.07, "learning_rate": 5.8328239046239354e-05, "loss": 1.0004, "step": 203300 }, { "epoch": 2.07, "learning_rate": 5.832605484741394e-05, "loss": 1.0152, "step": 203400 }, { "epoch": 2.07, "learning_rate": 5.832386926361349e-05, "loss": 0.9919, "step": 203500 }, { "epoch": 2.07, "learning_rate": 5.832168229494486e-05, "loss": 1.0018, "step": 203600 }, { "epoch": 2.08, "learning_rate": 5.831949394151498e-05, "loss": 0.9661, "step": 203700 }, { "epoch": 2.08, "learning_rate": 5.8317304203430855e-05, "loss": 0.8944, "step": 203800 }, { "epoch": 2.08, "learning_rate": 5.831511308079953e-05, "loss": 1.0393, "step": 203900 }, { "epoch": 2.08, "learning_rate": 5.8312920573728155e-05, "loss": 0.9409, "step": 204000 }, { "epoch": 2.08, "learning_rate": 5.831072668232393e-05, "loss": 1.0064, "step": 204100 }, { "epoch": 2.08, "learning_rate": 5.830853140669412e-05, "loss": 0.8611, "step": 204200 }, { "epoch": 2.08, "learning_rate": 5.830635672039457e-05, "loss": 0.9927, "step": 204300 }, { "epoch": 2.08, "learning_rate": 5.830415869047524e-05, "loss": 0.9124, "step": 204400 }, { "epoch": 2.08, "learning_rate": 5.830198127763967e-05, "loss": 0.968, "step": 204500 }, { "epoch": 2.08, "learning_rate": 5.829978049385642e-05, "loss": 0.99, "step": 204600 }, { "epoch": 2.09, "learning_rate": 5.8297578326382776e-05, "loss": 0.9251, "step": 204700 }, { "epoch": 2.09, "learning_rate": 5.829537477532644e-05, "loss": 0.9285, "step": 204800 }, { "epoch": 2.09, "learning_rate": 5.829316984079513e-05, "loss": 1.0292, "step": 204900 }, { "epoch": 2.09, "learning_rate": 5.8290963522896654e-05, "loss": 0.8976, "step": 205000 }, { "epoch": 2.09, "learning_rate": 5.828875582173889e-05, "loss": 0.8915, "step": 205100 }, { "epoch": 2.09, "learning_rate": 5.8286546737429795e-05, "loss": 0.9122, "step": 205200 }, { "epoch": 2.09, "learning_rate": 5.828433627007736e-05, "loss": 0.8964, "step": 205300 }, { "epoch": 2.09, "learning_rate": 5.828212441978968e-05, "loss": 1.0233, "step": 205400 }, { "epoch": 2.09, "learning_rate": 5.8279911186674895e-05, "loss": 1.0239, "step": 205500 }, { "epoch": 2.09, "learning_rate": 5.827769657084122e-05, "loss": 0.9971, "step": 205600 }, { "epoch": 2.1, "learning_rate": 5.827548057239693e-05, "loss": 0.866, "step": 205700 }, { "epoch": 2.1, "learning_rate": 5.827326319145038e-05, "loss": 0.8884, "step": 205800 }, { "epoch": 2.1, "learning_rate": 5.827104442810999e-05, "loss": 0.8648, "step": 205900 }, { "epoch": 2.1, "learning_rate": 5.8268824282484233e-05, "loss": 0.8546, "step": 206000 }, { "epoch": 2.1, "learning_rate": 5.826660275468167e-05, "loss": 0.9017, "step": 206100 }, { "epoch": 2.1, "learning_rate": 5.826437984481092e-05, "loss": 1.0009, "step": 206200 }, { "epoch": 2.1, "learning_rate": 5.826215555298068e-05, "loss": 1.0987, "step": 206300 }, { "epoch": 2.1, "learning_rate": 5.825992987929968e-05, "loss": 0.8823, "step": 206400 }, { "epoch": 2.1, "learning_rate": 5.825770282387675e-05, "loss": 0.9849, "step": 206500 }, { "epoch": 2.1, "learning_rate": 5.82554743868208e-05, "loss": 0.8931, "step": 206600 }, { "epoch": 2.11, "learning_rate": 5.825324456824077e-05, "loss": 0.9509, "step": 206700 }, { "epoch": 2.11, "learning_rate": 5.8251013368245674e-05, "loss": 0.9464, "step": 206800 }, { "epoch": 2.11, "learning_rate": 5.824878078694463e-05, "loss": 0.9809, "step": 206900 }, { "epoch": 2.11, "learning_rate": 5.824654682444678e-05, "loss": 0.9513, "step": 207000 }, { "epoch": 2.11, "learning_rate": 5.824431148086136e-05, "loss": 0.9161, "step": 207100 }, { "epoch": 2.11, "learning_rate": 5.824207475629765e-05, "loss": 0.8685, "step": 207200 }, { "epoch": 2.11, "learning_rate": 5.8239836650865046e-05, "loss": 0.9233, "step": 207300 }, { "epoch": 2.11, "learning_rate": 5.823759716467294e-05, "loss": 0.9542, "step": 207400 }, { "epoch": 2.11, "learning_rate": 5.823535629783085e-05, "loss": 0.9409, "step": 207500 }, { "epoch": 2.12, "learning_rate": 5.823311405044834e-05, "loss": 0.9343, "step": 207600 }, { "epoch": 2.12, "learning_rate": 5.8230870422635034e-05, "loss": 1.0041, "step": 207700 }, { "epoch": 2.12, "learning_rate": 5.822862541450064e-05, "loss": 0.9272, "step": 207800 }, { "epoch": 2.12, "learning_rate": 5.8226379026154925e-05, "loss": 1.2049, "step": 207900 }, { "epoch": 2.12, "learning_rate": 5.822413125770772e-05, "loss": 1.0693, "step": 208000 }, { "epoch": 2.12, "learning_rate": 5.822188210926893e-05, "loss": 0.9966, "step": 208100 }, { "epoch": 2.12, "learning_rate": 5.821963158094852e-05, "loss": 1.006, "step": 208200 }, { "epoch": 2.12, "learning_rate": 5.821737967285654e-05, "loss": 0.9542, "step": 208300 }, { "epoch": 2.12, "learning_rate": 5.8215126385103084e-05, "loss": 0.9005, "step": 208400 }, { "epoch": 2.12, "learning_rate": 5.821287171779832e-05, "loss": 0.9195, "step": 208500 }, { "epoch": 2.13, "learning_rate": 5.82106156710525e-05, "loss": 1.0223, "step": 208600 }, { "epoch": 2.13, "learning_rate": 5.820835824497592e-05, "loss": 0.9419, "step": 208700 }, { "epoch": 2.13, "learning_rate": 5.820609943967897e-05, "loss": 0.9852, "step": 208800 }, { "epoch": 2.13, "learning_rate": 5.820383925527208e-05, "loss": 0.9725, "step": 208900 }, { "epoch": 2.13, "learning_rate": 5.820157769186577e-05, "loss": 0.9984, "step": 209000 }, { "epoch": 2.13, "learning_rate": 5.819931474957059e-05, "loss": 0.9217, "step": 209100 }, { "epoch": 2.13, "learning_rate": 5.819705042849721e-05, "loss": 0.9432, "step": 209200 }, { "epoch": 2.13, "learning_rate": 5.819478472875633e-05, "loss": 0.9681, "step": 209300 }, { "epoch": 2.13, "learning_rate": 5.8192517650458754e-05, "loss": 0.9399, "step": 209400 }, { "epoch": 2.13, "learning_rate": 5.8190249193715295e-05, "loss": 0.9766, "step": 209500 }, { "epoch": 2.14, "learning_rate": 5.818797935863689e-05, "loss": 0.9467, "step": 209600 }, { "epoch": 2.14, "learning_rate": 5.81857081453345e-05, "loss": 0.8585, "step": 209700 }, { "epoch": 2.14, "learning_rate": 5.818343555391919e-05, "loss": 1.061, "step": 209800 }, { "epoch": 2.14, "learning_rate": 5.818116158450207e-05, "loss": 1.0029, "step": 209900 }, { "epoch": 2.14, "learning_rate": 5.8178886237194326e-05, "loss": 0.9429, "step": 210000 }, { "epoch": 2.14, "learning_rate": 5.8176609512107215e-05, "loss": 0.9674, "step": 210100 }, { "epoch": 2.14, "learning_rate": 5.817433140935204e-05, "loss": 0.8936, "step": 210200 }, { "epoch": 2.14, "learning_rate": 5.817205192904019e-05, "loss": 0.831, "step": 210300 }, { "epoch": 2.14, "learning_rate": 5.816977107128313e-05, "loss": 0.9999, "step": 210400 }, { "epoch": 2.14, "learning_rate": 5.816748883619238e-05, "loss": 0.9154, "step": 210500 }, { "epoch": 2.15, "learning_rate": 5.81652052238795e-05, "loss": 0.8917, "step": 210600 }, { "epoch": 2.15, "learning_rate": 5.8162920234456185e-05, "loss": 0.921, "step": 210700 }, { "epoch": 2.15, "learning_rate": 5.816063386803413e-05, "loss": 0.9357, "step": 210800 }, { "epoch": 2.15, "learning_rate": 5.8158346124725135e-05, "loss": 0.925, "step": 210900 }, { "epoch": 2.15, "learning_rate": 5.8156057004641056e-05, "loss": 0.9148, "step": 211000 }, { "epoch": 2.15, "learning_rate": 5.815376650789381e-05, "loss": 0.866, "step": 211100 }, { "epoch": 2.15, "learning_rate": 5.81514746345954e-05, "loss": 0.8945, "step": 211200 }, { "epoch": 2.15, "learning_rate": 5.814918138485788e-05, "loss": 0.9136, "step": 211300 }, { "epoch": 2.15, "learning_rate": 5.814688675879338e-05, "loss": 0.7931, "step": 211400 }, { "epoch": 2.15, "learning_rate": 5.814459075651407e-05, "loss": 0.926, "step": 211500 }, { "epoch": 2.16, "learning_rate": 5.8142293378132247e-05, "loss": 0.9469, "step": 211600 }, { "epoch": 2.16, "learning_rate": 5.813999462376022e-05, "loss": 0.9466, "step": 211700 }, { "epoch": 2.16, "learning_rate": 5.813769449351038e-05, "loss": 0.8628, "step": 211800 }, { "epoch": 2.16, "learning_rate": 5.81353929874952e-05, "loss": 1.0375, "step": 211900 }, { "epoch": 2.16, "learning_rate": 5.813309010582721e-05, "loss": 0.9182, "step": 212000 }, { "epoch": 2.16, "learning_rate": 5.8130785848619e-05, "loss": 0.9331, "step": 212100 }, { "epoch": 2.16, "learning_rate": 5.812848021598324e-05, "loss": 0.9792, "step": 212200 }, { "epoch": 2.16, "learning_rate": 5.812617320803266e-05, "loss": 0.9162, "step": 212300 }, { "epoch": 2.16, "learning_rate": 5.812386482488006e-05, "loss": 0.9305, "step": 212400 }, { "epoch": 2.16, "learning_rate": 5.81215550666383e-05, "loss": 0.8893, "step": 212500 }, { "epoch": 2.17, "learning_rate": 5.811924393342032e-05, "loss": 0.9299, "step": 212600 }, { "epoch": 2.17, "learning_rate": 5.811693142533911e-05, "loss": 0.9634, "step": 212700 }, { "epoch": 2.17, "learning_rate": 5.811461754250775e-05, "loss": 0.9035, "step": 212800 }, { "epoch": 2.17, "learning_rate": 5.811230228503938e-05, "loss": 0.8609, "step": 212900 }, { "epoch": 2.17, "learning_rate": 5.810998565304719e-05, "loss": 0.9838, "step": 213000 }, { "epoch": 2.17, "learning_rate": 5.8107667646644456e-05, "loss": 0.9323, "step": 213100 }, { "epoch": 2.17, "learning_rate": 5.81053482659445e-05, "loss": 0.9657, "step": 213200 }, { "epoch": 2.17, "learning_rate": 5.8103027511060754e-05, "loss": 0.8661, "step": 213300 }, { "epoch": 2.17, "learning_rate": 5.810070538210666e-05, "loss": 0.8446, "step": 213400 }, { "epoch": 2.18, "learning_rate": 5.809840512102559e-05, "loss": 0.8983, "step": 213500 }, { "epoch": 2.18, "learning_rate": 5.8096080258009386e-05, "loss": 1.0379, "step": 213600 }, { "epoch": 2.18, "learning_rate": 5.809375402126253e-05, "loss": 0.8791, "step": 213700 }, { "epoch": 2.18, "learning_rate": 5.8091449693801406e-05, "loss": 1.0102, "step": 213800 }, { "epoch": 2.18, "learning_rate": 5.808912072366899e-05, "loss": 1.002, "step": 213900 }, { "epoch": 2.18, "learning_rate": 5.8086790380146185e-05, "loss": 0.9033, "step": 214000 }, { "epoch": 2.18, "learning_rate": 5.808445866334694e-05, "loss": 0.8961, "step": 214100 }, { "epoch": 2.18, "learning_rate": 5.808212557338528e-05, "loss": 0.8074, "step": 214200 }, { "epoch": 2.18, "learning_rate": 5.807979111037526e-05, "loss": 0.9961, "step": 214300 }, { "epoch": 2.18, "learning_rate": 5.807745527443102e-05, "loss": 0.9688, "step": 214400 }, { "epoch": 2.19, "learning_rate": 5.807511806566678e-05, "loss": 0.8836, "step": 214500 }, { "epoch": 2.19, "learning_rate": 5.807277948419681e-05, "loss": 0.8424, "step": 214600 }, { "epoch": 2.19, "learning_rate": 5.807043953013545e-05, "loss": 0.8625, "step": 214700 }, { "epoch": 2.19, "learning_rate": 5.806809820359712e-05, "loss": 0.9227, "step": 214800 }, { "epoch": 2.19, "learning_rate": 5.806575550469628e-05, "loss": 0.9361, "step": 214900 }, { "epoch": 2.19, "learning_rate": 5.8063411433547486e-05, "loss": 0.94, "step": 215000 }, { "epoch": 2.19, "learning_rate": 5.806106599026535e-05, "loss": 0.8761, "step": 215100 }, { "epoch": 2.19, "learning_rate": 5.805871917496454e-05, "loss": 0.9426, "step": 215200 }, { "epoch": 2.19, "learning_rate": 5.8056370987759814e-05, "loss": 0.8284, "step": 215300 }, { "epoch": 2.19, "learning_rate": 5.805402142876598e-05, "loss": 0.8186, "step": 215400 }, { "epoch": 2.2, "learning_rate": 5.805167049809791e-05, "loss": 0.9372, "step": 215500 }, { "epoch": 2.2, "learning_rate": 5.8049318195870564e-05, "loss": 0.8143, "step": 215600 }, { "epoch": 2.2, "learning_rate": 5.804696452219894e-05, "loss": 0.8323, "step": 215700 }, { "epoch": 2.2, "learning_rate": 5.804460947719813e-05, "loss": 0.8944, "step": 215800 }, { "epoch": 2.2, "learning_rate": 5.804225306098328e-05, "loss": 0.9302, "step": 215900 }, { "epoch": 2.2, "learning_rate": 5.8039895273669595e-05, "loss": 1.0083, "step": 216000 }, { "epoch": 2.2, "learning_rate": 5.803753611537237e-05, "loss": 0.9126, "step": 216100 }, { "epoch": 2.2, "learning_rate": 5.803517558620695e-05, "loss": 0.9005, "step": 216200 }, { "epoch": 2.2, "learning_rate": 5.803281368628875e-05, "loss": 1.0283, "step": 216300 }, { "epoch": 2.2, "learning_rate": 5.8030450415733246e-05, "loss": 0.9941, "step": 216400 }, { "epoch": 2.21, "learning_rate": 5.8028085774655994e-05, "loss": 0.9545, "step": 216500 }, { "epoch": 2.21, "learning_rate": 5.8025719763172614e-05, "loss": 0.9325, "step": 216600 }, { "epoch": 2.21, "learning_rate": 5.802335238139879e-05, "loss": 0.8211, "step": 216700 }, { "epoch": 2.21, "learning_rate": 5.802098362945026e-05, "loss": 0.8575, "step": 216800 }, { "epoch": 2.21, "learning_rate": 5.801861350744285e-05, "loss": 0.9321, "step": 216900 }, { "epoch": 2.21, "learning_rate": 5.801624201549245e-05, "loss": 0.9305, "step": 217000 }, { "epoch": 2.21, "learning_rate": 5.801386915371501e-05, "loss": 0.8685, "step": 217100 }, { "epoch": 2.21, "learning_rate": 5.801149492222655e-05, "loss": 1.053, "step": 217200 }, { "epoch": 2.21, "learning_rate": 5.800911932114315e-05, "loss": 0.8439, "step": 217300 }, { "epoch": 2.21, "learning_rate": 5.800674235058096e-05, "loss": 0.8787, "step": 217400 }, { "epoch": 2.22, "learning_rate": 5.800436401065621e-05, "loss": 0.8795, "step": 217500 }, { "epoch": 2.22, "learning_rate": 5.8002008105354276e-05, "loss": 0.8945, "step": 217600 }, { "epoch": 2.22, "learning_rate": 5.799962704074404e-05, "loss": 0.9754, "step": 217700 }, { "epoch": 2.22, "learning_rate": 5.799724460711913e-05, "loss": 0.8805, "step": 217800 }, { "epoch": 2.22, "learning_rate": 5.7994860804596054e-05, "loss": 0.9783, "step": 217900 }, { "epoch": 2.22, "learning_rate": 5.7992475633291334e-05, "loss": 0.9421, "step": 218000 }, { "epoch": 2.22, "learning_rate": 5.799008909332161e-05, "loss": 0.8175, "step": 218100 }, { "epoch": 2.22, "learning_rate": 5.798770118480357e-05, "loss": 0.9645, "step": 218200 }, { "epoch": 2.22, "learning_rate": 5.7985311907853964e-05, "loss": 0.8727, "step": 218300 }, { "epoch": 2.23, "learning_rate": 5.7982921262589606e-05, "loss": 0.8778, "step": 218400 }, { "epoch": 2.23, "learning_rate": 5.79805292491274e-05, "loss": 0.9507, "step": 218500 }, { "epoch": 2.23, "learning_rate": 5.7978135867584286e-05, "loss": 0.8838, "step": 218600 }, { "epoch": 2.23, "learning_rate": 5.7975741118077294e-05, "loss": 0.8837, "step": 218700 }, { "epoch": 2.23, "learning_rate": 5.7973345000723514e-05, "loss": 0.9025, "step": 218800 }, { "epoch": 2.23, "learning_rate": 5.7970947515640095e-05, "loss": 0.9526, "step": 218900 }, { "epoch": 2.23, "learning_rate": 5.796854866294427e-05, "loss": 0.8226, "step": 219000 }, { "epoch": 2.23, "learning_rate": 5.796614844275332e-05, "loss": 0.9796, "step": 219100 }, { "epoch": 2.23, "learning_rate": 5.796374685518461e-05, "loss": 0.9351, "step": 219200 }, { "epoch": 2.23, "learning_rate": 5.796134390035554e-05, "loss": 0.9327, "step": 219300 }, { "epoch": 2.24, "learning_rate": 5.7958939578383646e-05, "loss": 0.85, "step": 219400 }, { "epoch": 2.24, "learning_rate": 5.7956533889386434e-05, "loss": 0.865, "step": 219500 }, { "epoch": 2.24, "learning_rate": 5.795412683348156e-05, "loss": 0.9413, "step": 219600 }, { "epoch": 2.24, "learning_rate": 5.7951718410786706e-05, "loss": 0.8716, "step": 219700 }, { "epoch": 2.24, "learning_rate": 5.794930862141963e-05, "loss": 0.9028, "step": 219800 }, { "epoch": 2.24, "learning_rate": 5.794689746549815e-05, "loss": 0.9877, "step": 219900 }, { "epoch": 2.24, "learning_rate": 5.794448494314017e-05, "loss": 0.8103, "step": 220000 }, { "epoch": 2.24, "learning_rate": 5.794207105446362e-05, "loss": 0.9408, "step": 220100 }, { "epoch": 2.24, "learning_rate": 5.793965579958657e-05, "loss": 0.9233, "step": 220200 }, { "epoch": 2.24, "learning_rate": 5.793723917862707e-05, "loss": 0.8293, "step": 220300 }, { "epoch": 2.25, "learning_rate": 5.7934821191703306e-05, "loss": 0.8348, "step": 220400 }, { "epoch": 2.25, "learning_rate": 5.793240183893349e-05, "loss": 0.7893, "step": 220500 }, { "epoch": 2.25, "learning_rate": 5.7929981120435905e-05, "loss": 0.8719, "step": 220600 }, { "epoch": 2.25, "learning_rate": 5.792755903632893e-05, "loss": 0.9576, "step": 220700 }, { "epoch": 2.25, "learning_rate": 5.792513558673098e-05, "loss": 0.9176, "step": 220800 }, { "epoch": 2.25, "learning_rate": 5.7922710771760545e-05, "loss": 0.9514, "step": 220900 }, { "epoch": 2.25, "learning_rate": 5.79202845915362e-05, "loss": 1.0007, "step": 221000 }, { "epoch": 2.25, "learning_rate": 5.791785704617654e-05, "loss": 0.9554, "step": 221100 }, { "epoch": 2.25, "learning_rate": 5.791542813580028e-05, "loss": 0.8683, "step": 221200 }, { "epoch": 2.25, "learning_rate": 5.791299786052618e-05, "loss": 0.8543, "step": 221300 }, { "epoch": 2.26, "learning_rate": 5.791056622047306e-05, "loss": 0.8579, "step": 221400 }, { "epoch": 2.26, "learning_rate": 5.7908133215759816e-05, "loss": 0.8674, "step": 221500 }, { "epoch": 2.26, "learning_rate": 5.79056988465054e-05, "loss": 0.9754, "step": 221600 }, { "epoch": 2.26, "learning_rate": 5.7903263112828845e-05, "loss": 0.8512, "step": 221700 }, { "epoch": 2.26, "learning_rate": 5.790082601484924e-05, "loss": 0.8862, "step": 221800 }, { "epoch": 2.26, "learning_rate": 5.789838755268575e-05, "loss": 1.0098, "step": 221900 }, { "epoch": 2.26, "learning_rate": 5.78959721314716e-05, "loss": 0.9144, "step": 222000 }, { "epoch": 2.26, "learning_rate": 5.7893530954936934e-05, "loss": 0.8751, "step": 222100 }, { "epoch": 2.26, "learning_rate": 5.789108841457508e-05, "loss": 0.883, "step": 222200 }, { "epoch": 2.26, "learning_rate": 5.788864451050543e-05, "loss": 1.0011, "step": 222300 }, { "epoch": 2.27, "learning_rate": 5.7886199242847496e-05, "loss": 1.0433, "step": 222400 }, { "epoch": 2.27, "learning_rate": 5.788375261172084e-05, "loss": 0.8546, "step": 222500 }, { "epoch": 2.27, "learning_rate": 5.788130461724508e-05, "loss": 0.9071, "step": 222600 }, { "epoch": 2.27, "learning_rate": 5.787885525953992e-05, "loss": 0.8967, "step": 222700 }, { "epoch": 2.27, "learning_rate": 5.78764045387251e-05, "loss": 0.9155, "step": 222800 }, { "epoch": 2.27, "learning_rate": 5.787395245492046e-05, "loss": 0.9357, "step": 222900 }, { "epoch": 2.27, "learning_rate": 5.7871499008245886e-05, "loss": 0.8242, "step": 223000 }, { "epoch": 2.27, "learning_rate": 5.786904419882134e-05, "loss": 0.913, "step": 223100 }, { "epoch": 2.27, "learning_rate": 5.786658802676685e-05, "loss": 1.0319, "step": 223200 }, { "epoch": 2.28, "learning_rate": 5.78641304922025e-05, "loss": 0.8648, "step": 223300 }, { "epoch": 2.28, "learning_rate": 5.7861671595248456e-05, "loss": 0.9501, "step": 223400 }, { "epoch": 2.28, "learning_rate": 5.7859211336024936e-05, "loss": 0.8371, "step": 223500 }, { "epoch": 2.28, "learning_rate": 5.785674971465225e-05, "loss": 0.9385, "step": 223600 }, { "epoch": 2.28, "learning_rate": 5.785428673125073e-05, "loss": 0.8396, "step": 223700 }, { "epoch": 2.28, "learning_rate": 5.785182238594082e-05, "loss": 0.8969, "step": 223800 }, { "epoch": 2.28, "learning_rate": 5.7849356678843e-05, "loss": 0.9266, "step": 223900 }, { "epoch": 2.28, "learning_rate": 5.7846914287505346e-05, "loss": 0.9591, "step": 224000 }, { "epoch": 2.28, "learning_rate": 5.784444587080834e-05, "loss": 0.8547, "step": 224100 }, { "epoch": 2.28, "learning_rate": 5.784197609268409e-05, "loss": 0.8431, "step": 224200 }, { "epoch": 2.29, "learning_rate": 5.7839504953253364e-05, "loss": 0.9919, "step": 224300 }, { "epoch": 2.29, "learning_rate": 5.783703245263698e-05, "loss": 0.9913, "step": 224400 }, { "epoch": 2.29, "learning_rate": 5.7834558590955834e-05, "loss": 0.9112, "step": 224500 }, { "epoch": 2.29, "learning_rate": 5.783208336833088e-05, "loss": 0.9577, "step": 224600 }, { "epoch": 2.29, "learning_rate": 5.782960678488315e-05, "loss": 0.9618, "step": 224700 }, { "epoch": 2.29, "learning_rate": 5.7827128840733716e-05, "loss": 0.7787, "step": 224800 }, { "epoch": 2.29, "learning_rate": 5.782464953600375e-05, "loss": 0.7985, "step": 224900 }, { "epoch": 2.29, "learning_rate": 5.7822168870814464e-05, "loss": 0.9151, "step": 225000 }, { "epoch": 2.29, "learning_rate": 5.781971167227572e-05, "loss": 0.9218, "step": 225100 }, { "epoch": 2.29, "learning_rate": 5.7817228300133314e-05, "loss": 0.9224, "step": 225200 }, { "epoch": 2.3, "learning_rate": 5.7814743567894444e-05, "loss": 0.8241, "step": 225300 }, { "epoch": 2.3, "learning_rate": 5.781225747568061e-05, "loss": 0.9015, "step": 225400 }, { "epoch": 2.3, "learning_rate": 5.780977002361336e-05, "loss": 0.8417, "step": 225500 }, { "epoch": 2.3, "learning_rate": 5.7807281211814316e-05, "loss": 0.7768, "step": 225600 }, { "epoch": 2.3, "learning_rate": 5.780479104040517e-05, "loss": 0.8853, "step": 225700 }, { "epoch": 2.3, "learning_rate": 5.780229950950766e-05, "loss": 0.9516, "step": 225800 }, { "epoch": 2.3, "learning_rate": 5.779980661924363e-05, "loss": 1.0194, "step": 225900 }, { "epoch": 2.3, "learning_rate": 5.779731236973495e-05, "loss": 0.9821, "step": 226000 }, { "epoch": 2.3, "learning_rate": 5.779481676110359e-05, "loss": 0.9247, "step": 226100 }, { "epoch": 2.3, "learning_rate": 5.7792319793471555e-05, "loss": 0.9386, "step": 226200 }, { "epoch": 2.31, "learning_rate": 5.778982146696094e-05, "loss": 0.9808, "step": 226300 }, { "epoch": 2.31, "learning_rate": 5.778732178169389e-05, "loss": 0.933, "step": 226400 }, { "epoch": 2.31, "learning_rate": 5.7784820737792644e-05, "loss": 0.9377, "step": 226500 }, { "epoch": 2.31, "learning_rate": 5.778231833537946e-05, "loss": 0.928, "step": 226600 }, { "epoch": 2.31, "learning_rate": 5.777981457457671e-05, "loss": 0.8999, "step": 226700 }, { "epoch": 2.31, "learning_rate": 5.777730945550682e-05, "loss": 0.9975, "step": 226800 }, { "epoch": 2.31, "learning_rate": 5.777480297829225e-05, "loss": 0.8384, "step": 226900 }, { "epoch": 2.31, "learning_rate": 5.777229514305557e-05, "loss": 0.9139, "step": 227000 }, { "epoch": 2.31, "learning_rate": 5.77697859499194e-05, "loss": 0.89, "step": 227100 }, { "epoch": 2.31, "learning_rate": 5.776727539900641e-05, "loss": 0.8916, "step": 227200 }, { "epoch": 2.32, "learning_rate": 5.776476349043936e-05, "loss": 0.8947, "step": 227300 }, { "epoch": 2.32, "learning_rate": 5.776225022434107e-05, "loss": 0.8724, "step": 227400 }, { "epoch": 2.32, "learning_rate": 5.775973560083442e-05, "loss": 0.9487, "step": 227500 }, { "epoch": 2.32, "learning_rate": 5.775721962004236e-05, "loss": 0.8521, "step": 227600 }, { "epoch": 2.32, "learning_rate": 5.775470228208791e-05, "loss": 0.8946, "step": 227700 }, { "epoch": 2.32, "learning_rate": 5.775218358709415e-05, "loss": 0.9107, "step": 227800 }, { "epoch": 2.32, "learning_rate": 5.774966353518423e-05, "loss": 0.9622, "step": 227900 }, { "epoch": 2.32, "learning_rate": 5.7747142126481355e-05, "loss": 0.8928, "step": 228000 }, { "epoch": 2.32, "learning_rate": 5.774461936110882e-05, "loss": 0.9051, "step": 228100 }, { "epoch": 2.32, "learning_rate": 5.774209523918997e-05, "loss": 0.8532, "step": 228200 }, { "epoch": 2.33, "learning_rate": 5.7739569760848226e-05, "loss": 0.8888, "step": 228300 }, { "epoch": 2.33, "learning_rate": 5.7737042926207056e-05, "loss": 0.8156, "step": 228400 }, { "epoch": 2.33, "learning_rate": 5.7734514735390006e-05, "loss": 0.9001, "step": 228500 }, { "epoch": 2.33, "learning_rate": 5.773198518852071e-05, "loss": 0.9048, "step": 228600 }, { "epoch": 2.33, "learning_rate": 5.7729454285722825e-05, "loss": 0.8481, "step": 228700 }, { "epoch": 2.33, "learning_rate": 5.772692202712011e-05, "loss": 0.8893, "step": 228800 }, { "epoch": 2.33, "learning_rate": 5.772438841283637e-05, "loss": 0.9067, "step": 228900 }, { "epoch": 2.33, "learning_rate": 5.772185344299549e-05, "loss": 0.8836, "step": 229000 }, { "epoch": 2.33, "learning_rate": 5.7719317117721406e-05, "loss": 0.9468, "step": 229100 }, { "epoch": 2.34, "learning_rate": 5.771677943713815e-05, "loss": 0.8321, "step": 229200 }, { "epoch": 2.34, "learning_rate": 5.7714240401369774e-05, "loss": 0.809, "step": 229300 }, { "epoch": 2.34, "learning_rate": 5.7711750831635815e-05, "loss": 1.0896, "step": 229400 }, { "epoch": 2.34, "learning_rate": 5.770920911296723e-05, "loss": 0.898, "step": 229500 }, { "epoch": 2.34, "learning_rate": 5.770666603948368e-05, "loss": 0.9454, "step": 229600 }, { "epoch": 2.34, "learning_rate": 5.770412161130952e-05, "loss": 0.9282, "step": 229700 }, { "epoch": 2.34, "learning_rate": 5.770160129310123e-05, "loss": 0.8343, "step": 229800 }, { "epoch": 2.34, "learning_rate": 5.7699054169462906e-05, "loss": 0.8691, "step": 229900 }, { "epoch": 2.34, "learning_rate": 5.7696505691506124e-05, "loss": 0.8585, "step": 230000 }, { "epoch": 2.34, "learning_rate": 5.7693955859355505e-05, "loss": 1.0687, "step": 230100 }, { "epoch": 2.35, "learning_rate": 5.7691404673135706e-05, "loss": 0.8604, "step": 230200 }, { "epoch": 2.35, "learning_rate": 5.768885213297147e-05, "loss": 0.9415, "step": 230300 }, { "epoch": 2.35, "learning_rate": 5.768629823898761e-05, "loss": 0.8666, "step": 230400 }, { "epoch": 2.35, "learning_rate": 5.7683742991309e-05, "loss": 0.9902, "step": 230500 }, { "epoch": 2.35, "learning_rate": 5.7681186390060543e-05, "loss": 0.8047, "step": 230600 }, { "epoch": 2.35, "learning_rate": 5.767862843536728e-05, "loss": 0.8868, "step": 230700 }, { "epoch": 2.35, "learning_rate": 5.767606912735426e-05, "loss": 0.9105, "step": 230800 }, { "epoch": 2.35, "learning_rate": 5.7673508466146616e-05, "loss": 0.8843, "step": 230900 }, { "epoch": 2.35, "learning_rate": 5.767094645186956e-05, "loss": 0.9282, "step": 231000 }, { "epoch": 2.35, "learning_rate": 5.766838308464836e-05, "loss": 0.7779, "step": 231100 }, { "epoch": 2.36, "learning_rate": 5.766581836460833e-05, "loss": 0.764, "step": 231200 }, { "epoch": 2.36, "learning_rate": 5.76632522918749e-05, "loss": 0.9104, "step": 231300 }, { "epoch": 2.36, "learning_rate": 5.76606848665735e-05, "loss": 0.8737, "step": 231400 }, { "epoch": 2.36, "learning_rate": 5.765811608882969e-05, "loss": 0.8657, "step": 231500 }, { "epoch": 2.36, "learning_rate": 5.765554595876906e-05, "loss": 0.8487, "step": 231600 }, { "epoch": 2.36, "learning_rate": 5.765297447651727e-05, "loss": 0.9241, "step": 231700 }, { "epoch": 2.36, "learning_rate": 5.7650401642200054e-05, "loss": 0.84, "step": 231800 }, { "epoch": 2.36, "learning_rate": 5.764782745594321e-05, "loss": 0.9761, "step": 231900 }, { "epoch": 2.36, "learning_rate": 5.7645251917872584e-05, "loss": 0.9093, "step": 232000 }, { "epoch": 2.36, "learning_rate": 5.764267502811412e-05, "loss": 0.8984, "step": 232100 }, { "epoch": 2.37, "learning_rate": 5.764009678679382e-05, "loss": 0.8129, "step": 232200 }, { "epoch": 2.37, "learning_rate": 5.763751719403773e-05, "loss": 0.786, "step": 232300 }, { "epoch": 2.37, "learning_rate": 5.7634936249971984e-05, "loss": 0.9368, "step": 232400 }, { "epoch": 2.37, "learning_rate": 5.763235395472277e-05, "loss": 0.8499, "step": 232500 }, { "epoch": 2.37, "learning_rate": 5.762977030841635e-05, "loss": 0.9226, "step": 232600 }, { "epoch": 2.37, "learning_rate": 5.762718531117904e-05, "loss": 0.9002, "step": 232700 }, { "epoch": 2.37, "learning_rate": 5.762459896313724e-05, "loss": 0.8452, "step": 232800 }, { "epoch": 2.37, "learning_rate": 5.7622011264417416e-05, "loss": 0.841, "step": 232900 }, { "epoch": 2.37, "learning_rate": 5.761942221514608e-05, "loss": 0.8239, "step": 233000 }, { "epoch": 2.37, "learning_rate": 5.761683181544981e-05, "loss": 0.8115, "step": 233100 }, { "epoch": 2.38, "learning_rate": 5.761424006545528e-05, "loss": 0.8338, "step": 233200 }, { "epoch": 2.38, "learning_rate": 5.76116469652892e-05, "loss": 0.8837, "step": 233300 }, { "epoch": 2.38, "learning_rate": 5.760905251507837e-05, "loss": 0.9065, "step": 233400 }, { "epoch": 2.38, "learning_rate": 5.760645671494963e-05, "loss": 0.8896, "step": 233500 }, { "epoch": 2.38, "learning_rate": 5.76038595650299e-05, "loss": 0.7289, "step": 233600 }, { "epoch": 2.38, "learning_rate": 5.760126106544617e-05, "loss": 0.8551, "step": 233700 }, { "epoch": 2.38, "learning_rate": 5.7598661216325494e-05, "loss": 0.872, "step": 233800 }, { "epoch": 2.38, "learning_rate": 5.7596060017794976e-05, "loss": 0.8103, "step": 233900 }, { "epoch": 2.38, "learning_rate": 5.759345746998181e-05, "loss": 0.8642, "step": 234000 }, { "epoch": 2.39, "learning_rate": 5.759085357301325e-05, "loss": 0.7973, "step": 234100 }, { "epoch": 2.39, "learning_rate": 5.75882483270166e-05, "loss": 0.7778, "step": 234200 }, { "epoch": 2.39, "learning_rate": 5.7585641732119245e-05, "loss": 0.7703, "step": 234300 }, { "epoch": 2.39, "learning_rate": 5.7583033788448636e-05, "loss": 0.8546, "step": 234400 }, { "epoch": 2.39, "learning_rate": 5.758042449613227e-05, "loss": 0.7968, "step": 234500 }, { "epoch": 2.39, "learning_rate": 5.757781385529775e-05, "loss": 0.8611, "step": 234600 }, { "epoch": 2.39, "learning_rate": 5.75752018660727e-05, "loss": 1.0151, "step": 234700 }, { "epoch": 2.39, "learning_rate": 5.7572588528584845e-05, "loss": 0.9415, "step": 234800 }, { "epoch": 2.39, "learning_rate": 5.75700261498853e-05, "loss": 0.8674, "step": 234900 }, { "epoch": 2.39, "learning_rate": 5.756741014321411e-05, "loss": 0.8065, "step": 235000 }, { "epoch": 2.4, "learning_rate": 5.756479278866107e-05, "loss": 0.7896, "step": 235100 }, { "epoch": 2.4, "learning_rate": 5.756217408635416e-05, "loss": 0.8506, "step": 235200 }, { "epoch": 2.4, "learning_rate": 5.755955403642143e-05, "loss": 0.7986, "step": 235300 }, { "epoch": 2.4, "learning_rate": 5.755693263899097e-05, "loss": 0.8774, "step": 235400 }, { "epoch": 2.4, "learning_rate": 5.7554309894190954e-05, "loss": 0.8999, "step": 235500 }, { "epoch": 2.4, "learning_rate": 5.755168580214961e-05, "loss": 1.0107, "step": 235600 }, { "epoch": 2.4, "learning_rate": 5.754906036299527e-05, "loss": 0.8775, "step": 235700 }, { "epoch": 2.4, "learning_rate": 5.754643357685626e-05, "loss": 0.912, "step": 235800 }, { "epoch": 2.4, "learning_rate": 5.7543805443861056e-05, "loss": 0.8518, "step": 235900 }, { "epoch": 2.4, "learning_rate": 5.754117596413813e-05, "loss": 0.82, "step": 236000 }, { "epoch": 2.41, "learning_rate": 5.7538545137816056e-05, "loss": 0.822, "step": 236100 }, { "epoch": 2.41, "learning_rate": 5.753591296502348e-05, "loss": 0.8965, "step": 236200 }, { "epoch": 2.41, "learning_rate": 5.753327944588906e-05, "loss": 0.7823, "step": 236300 }, { "epoch": 2.41, "learning_rate": 5.7530644580541604e-05, "loss": 0.9084, "step": 236400 }, { "epoch": 2.41, "learning_rate": 5.752800836910992e-05, "loss": 0.9449, "step": 236500 }, { "epoch": 2.41, "learning_rate": 5.7525370811722896e-05, "loss": 0.891, "step": 236600 }, { "epoch": 2.41, "learning_rate": 5.7522731908509517e-05, "loss": 1.0042, "step": 236700 }, { "epoch": 2.41, "learning_rate": 5.752009165959878e-05, "loss": 0.9252, "step": 236800 }, { "epoch": 2.41, "learning_rate": 5.75174500651198e-05, "loss": 0.8921, "step": 236900 }, { "epoch": 2.41, "learning_rate": 5.751480712520172e-05, "loss": 0.8566, "step": 237000 }, { "epoch": 2.42, "learning_rate": 5.751216283997378e-05, "loss": 0.8203, "step": 237100 }, { "epoch": 2.42, "learning_rate": 5.750951720956526e-05, "loss": 0.7973, "step": 237200 }, { "epoch": 2.42, "learning_rate": 5.7506870234105515e-05, "loss": 0.8536, "step": 237300 }, { "epoch": 2.42, "learning_rate": 5.7504221913723966e-05, "loss": 0.9, "step": 237400 }, { "epoch": 2.42, "learning_rate": 5.7501572248550104e-05, "loss": 0.8613, "step": 237500 }, { "epoch": 2.42, "learning_rate": 5.749892123871348e-05, "loss": 0.8334, "step": 237600 }, { "epoch": 2.42, "learning_rate": 5.7496268884343705e-05, "loss": 0.9305, "step": 237700 }, { "epoch": 2.42, "learning_rate": 5.749361518557048e-05, "loss": 0.761, "step": 237800 }, { "epoch": 2.42, "learning_rate": 5.7490960142523545e-05, "loss": 0.9899, "step": 237900 }, { "epoch": 2.42, "learning_rate": 5.748830375533271e-05, "loss": 0.9732, "step": 238000 }, { "epoch": 2.43, "learning_rate": 5.748564602412788e-05, "loss": 1.1195, "step": 238100 }, { "epoch": 2.43, "learning_rate": 5.748298694903897e-05, "loss": 0.7973, "step": 238200 }, { "epoch": 2.43, "learning_rate": 5.748032653019601e-05, "loss": 0.8428, "step": 238300 }, { "epoch": 2.43, "learning_rate": 5.7477664767729076e-05, "loss": 0.8425, "step": 238400 }, { "epoch": 2.43, "learning_rate": 5.747500166176832e-05, "loss": 0.8356, "step": 238500 }, { "epoch": 2.43, "learning_rate": 5.747233721244394e-05, "loss": 0.8686, "step": 238600 }, { "epoch": 2.43, "learning_rate": 5.7469671419886216e-05, "loss": 0.8782, "step": 238700 }, { "epoch": 2.43, "learning_rate": 5.746700428422549e-05, "loss": 0.862, "step": 238800 }, { "epoch": 2.43, "learning_rate": 5.746433580559216e-05, "loss": 0.9365, "step": 238900 }, { "epoch": 2.43, "learning_rate": 5.746166598411672e-05, "loss": 0.965, "step": 239000 }, { "epoch": 2.44, "learning_rate": 5.745899481992969e-05, "loss": 0.7471, "step": 239100 }, { "epoch": 2.44, "learning_rate": 5.745632231316169e-05, "loss": 0.8996, "step": 239200 }, { "epoch": 2.44, "learning_rate": 5.745364846394337e-05, "loss": 0.8262, "step": 239300 }, { "epoch": 2.44, "learning_rate": 5.7450973272405486e-05, "loss": 0.7473, "step": 239400 }, { "epoch": 2.44, "learning_rate": 5.744829673867882e-05, "loss": 0.8634, "step": 239500 }, { "epoch": 2.44, "learning_rate": 5.744561886289424e-05, "loss": 0.8106, "step": 239600 }, { "epoch": 2.44, "learning_rate": 5.744293964518269e-05, "loss": 0.8094, "step": 239700 }, { "epoch": 2.44, "learning_rate": 5.744025908567516e-05, "loss": 0.9496, "step": 239800 }, { "epoch": 2.44, "learning_rate": 5.743757718450272e-05, "loss": 0.7968, "step": 239900 }, { "epoch": 2.45, "learning_rate": 5.7434893941796495e-05, "loss": 0.7634, "step": 240000 }, { "epoch": 2.45, "learning_rate": 5.7432209357687675e-05, "loss": 0.822, "step": 240100 }, { "epoch": 2.45, "learning_rate": 5.742952343230753e-05, "loss": 0.9292, "step": 240200 }, { "epoch": 2.45, "learning_rate": 5.7426836165787376e-05, "loss": 0.8505, "step": 240300 }, { "epoch": 2.45, "learning_rate": 5.7424147558258606e-05, "loss": 0.9278, "step": 240400 }, { "epoch": 2.45, "learning_rate": 5.7421457609852685e-05, "loss": 0.7476, "step": 240500 }, { "epoch": 2.45, "learning_rate": 5.741876632070113e-05, "loss": 0.812, "step": 240600 }, { "epoch": 2.45, "learning_rate": 5.741607369093552e-05, "loss": 0.974, "step": 240700 }, { "epoch": 2.45, "learning_rate": 5.741337972068752e-05, "loss": 0.9001, "step": 240800 }, { "epoch": 2.45, "learning_rate": 5.741068441008885e-05, "loss": 0.9165, "step": 240900 }, { "epoch": 2.46, "learning_rate": 5.740798775927129e-05, "loss": 0.8708, "step": 241000 }, { "epoch": 2.46, "learning_rate": 5.740528976836669e-05, "loss": 0.8258, "step": 241100 }, { "epoch": 2.46, "learning_rate": 5.740259043750696e-05, "loss": 0.9042, "step": 241200 }, { "epoch": 2.46, "learning_rate": 5.73998897668241e-05, "loss": 0.8047, "step": 241300 }, { "epoch": 2.46, "learning_rate": 5.739718775645014e-05, "loss": 0.8848, "step": 241400 }, { "epoch": 2.46, "learning_rate": 5.73944844065172e-05, "loss": 0.8763, "step": 241500 }, { "epoch": 2.46, "learning_rate": 5.7391779717157444e-05, "loss": 0.802, "step": 241600 }, { "epoch": 2.46, "learning_rate": 5.738907368850313e-05, "loss": 0.8045, "step": 241700 }, { "epoch": 2.46, "learning_rate": 5.738636632068656e-05, "loss": 0.9475, "step": 241800 }, { "epoch": 2.46, "learning_rate": 5.7383657613840115e-05, "loss": 0.7693, "step": 241900 }, { "epoch": 2.47, "learning_rate": 5.738094756809622e-05, "loss": 0.9034, "step": 242000 }, { "epoch": 2.47, "learning_rate": 5.737826330405894e-05, "loss": 0.9182, "step": 242100 }, { "epoch": 2.47, "learning_rate": 5.737555059430342e-05, "loss": 0.9588, "step": 242200 }, { "epoch": 2.47, "learning_rate": 5.737283654604684e-05, "loss": 0.9221, "step": 242300 }, { "epoch": 2.47, "learning_rate": 5.737012115942192e-05, "loss": 0.8457, "step": 242400 }, { "epoch": 2.47, "learning_rate": 5.7367404434561404e-05, "loss": 0.8711, "step": 242500 }, { "epoch": 2.47, "learning_rate": 5.736471355885093e-05, "loss": 0.8269, "step": 242600 }, { "epoch": 2.47, "learning_rate": 5.7361994171296836e-05, "loss": 0.9725, "step": 242700 }, { "epoch": 2.47, "learning_rate": 5.735927344590452e-05, "loss": 0.8244, "step": 242800 }, { "epoch": 2.47, "learning_rate": 5.7356551382807e-05, "loss": 0.8646, "step": 242900 }, { "epoch": 2.48, "learning_rate": 5.7353827982137366e-05, "loss": 0.8295, "step": 243000 }, { "epoch": 2.48, "learning_rate": 5.735110324402879e-05, "loss": 0.8854, "step": 243100 }, { "epoch": 2.48, "learning_rate": 5.734837716861449e-05, "loss": 0.7545, "step": 243200 }, { "epoch": 2.48, "learning_rate": 5.734564975602775e-05, "loss": 0.7202, "step": 243300 }, { "epoch": 2.48, "learning_rate": 5.734292100640192e-05, "loss": 0.8857, "step": 243400 }, { "epoch": 2.48, "learning_rate": 5.734019091987044e-05, "loss": 0.8698, "step": 243500 }, { "epoch": 2.48, "learning_rate": 5.7337459496566776e-05, "loss": 0.8136, "step": 243600 }, { "epoch": 2.48, "learning_rate": 5.733472673662448e-05, "loss": 0.9224, "step": 243700 }, { "epoch": 2.48, "learning_rate": 5.7331992640177185e-05, "loss": 0.9174, "step": 243800 }, { "epoch": 2.48, "learning_rate": 5.732925720735854e-05, "loss": 0.9318, "step": 243900 }, { "epoch": 2.49, "learning_rate": 5.7326520438302324e-05, "loss": 0.8895, "step": 244000 }, { "epoch": 2.49, "learning_rate": 5.732378233314233e-05, "loss": 0.9287, "step": 244100 }, { "epoch": 2.49, "learning_rate": 5.732104289201244e-05, "loss": 0.8927, "step": 244200 }, { "epoch": 2.49, "learning_rate": 5.73183021150466e-05, "loss": 0.9159, "step": 244300 }, { "epoch": 2.49, "learning_rate": 5.73155600023788e-05, "loss": 0.8519, "step": 244400 }, { "epoch": 2.49, "learning_rate": 5.731281655414314e-05, "loss": 0.8667, "step": 244500 }, { "epoch": 2.49, "learning_rate": 5.7310071770473745e-05, "loss": 0.8324, "step": 244600 }, { "epoch": 2.49, "learning_rate": 5.730732565150481e-05, "loss": 0.854, "step": 244700 }, { "epoch": 2.49, "learning_rate": 5.7304578197370616e-05, "loss": 0.9844, "step": 244800 }, { "epoch": 2.5, "learning_rate": 5.730182940820549e-05, "loss": 0.9262, "step": 244900 }, { "epoch": 2.5, "learning_rate": 5.729907928414384e-05, "loss": 0.9692, "step": 245000 }, { "epoch": 2.5, "learning_rate": 5.729632782532012e-05, "loss": 0.7791, "step": 245100 }, { "epoch": 2.5, "learning_rate": 5.7293575031868884e-05, "loss": 0.8821, "step": 245200 }, { "epoch": 2.5, "learning_rate": 5.7290820903924686e-05, "loss": 0.8035, "step": 245300 }, { "epoch": 2.5, "learning_rate": 5.728806544162222e-05, "loss": 0.8593, "step": 245400 }, { "epoch": 2.5, "learning_rate": 5.7285308645096193e-05, "loss": 0.868, "step": 245500 }, { "epoch": 2.5, "learning_rate": 5.728255051448141e-05, "loss": 0.8856, "step": 245600 }, { "epoch": 2.5, "learning_rate": 5.727979104991272e-05, "loss": 0.9176, "step": 245700 }, { "epoch": 2.5, "learning_rate": 5.727703025152504e-05, "loss": 0.8087, "step": 245800 }, { "epoch": 2.51, "learning_rate": 5.727426811945337e-05, "loss": 0.8861, "step": 245900 }, { "epoch": 2.51, "learning_rate": 5.727150465383275e-05, "loss": 0.8416, "step": 246000 }, { "epoch": 2.51, "learning_rate": 5.726873985479829e-05, "loss": 0.8767, "step": 246100 }, { "epoch": 2.51, "learning_rate": 5.726597372248519e-05, "loss": 0.7727, "step": 246200 }, { "epoch": 2.51, "learning_rate": 5.726320625702869e-05, "loss": 0.884, "step": 246300 }, { "epoch": 2.51, "learning_rate": 5.72604374585641e-05, "loss": 0.8328, "step": 246400 }, { "epoch": 2.51, "learning_rate": 5.7257667327226814e-05, "loss": 0.8487, "step": 246500 }, { "epoch": 2.51, "learning_rate": 5.725489586315225e-05, "loss": 0.9323, "step": 246600 }, { "epoch": 2.51, "learning_rate": 5.725212306647592e-05, "loss": 0.8996, "step": 246700 }, { "epoch": 2.51, "learning_rate": 5.7249348937333414e-05, "loss": 0.7903, "step": 246800 }, { "epoch": 2.52, "learning_rate": 5.724657347586036e-05, "loss": 0.9189, "step": 246900 }, { "epoch": 2.52, "learning_rate": 5.724379668219246e-05, "loss": 0.8507, "step": 247000 }, { "epoch": 2.52, "learning_rate": 5.7241046344315995e-05, "loss": 0.9612, "step": 247100 }, { "epoch": 2.52, "learning_rate": 5.7238294701020236e-05, "loss": 0.8118, "step": 247200 }, { "epoch": 2.52, "learning_rate": 5.7235513938217094e-05, "loss": 0.9644, "step": 247300 }, { "epoch": 2.52, "learning_rate": 5.7232731843759846e-05, "loss": 0.9178, "step": 247400 }, { "epoch": 2.52, "learning_rate": 5.722994841778454e-05, "loss": 0.9659, "step": 247500 }, { "epoch": 2.52, "learning_rate": 5.7227163660427244e-05, "loss": 0.7966, "step": 247600 }, { "epoch": 2.52, "learning_rate": 5.722437757182413e-05, "loss": 0.7818, "step": 247700 }, { "epoch": 2.52, "learning_rate": 5.7221590152111424e-05, "loss": 0.8264, "step": 247800 }, { "epoch": 2.53, "learning_rate": 5.721880140142541e-05, "loss": 0.8649, "step": 247900 }, { "epoch": 2.53, "learning_rate": 5.721601131990243e-05, "loss": 0.942, "step": 248000 }, { "epoch": 2.53, "learning_rate": 5.721321990767893e-05, "loss": 0.8436, "step": 248100 }, { "epoch": 2.53, "learning_rate": 5.721042716489136e-05, "loss": 0.8783, "step": 248200 }, { "epoch": 2.53, "learning_rate": 5.7207633091676295e-05, "loss": 0.8413, "step": 248300 }, { "epoch": 2.53, "learning_rate": 5.720483768817034e-05, "loss": 0.8947, "step": 248400 }, { "epoch": 2.53, "learning_rate": 5.720204095451016e-05, "loss": 0.7859, "step": 248500 }, { "epoch": 2.53, "learning_rate": 5.719924289083252e-05, "loss": 0.8549, "step": 248600 }, { "epoch": 2.53, "learning_rate": 5.7196443497274217e-05, "loss": 0.7906, "step": 248700 }, { "epoch": 2.53, "learning_rate": 5.719364277397212e-05, "loss": 0.9004, "step": 248800 }, { "epoch": 2.54, "learning_rate": 5.7190840721063194e-05, "loss": 0.7983, "step": 248900 }, { "epoch": 2.54, "learning_rate": 5.7188037338684406e-05, "loss": 0.8178, "step": 249000 }, { "epoch": 2.54, "learning_rate": 5.7185232626972856e-05, "loss": 0.9048, "step": 249100 }, { "epoch": 2.54, "learning_rate": 5.718242658606565e-05, "loss": 0.7853, "step": 249200 }, { "epoch": 2.54, "learning_rate": 5.717961921610001e-05, "loss": 0.8043, "step": 249300 }, { "epoch": 2.54, "learning_rate": 5.7176810517213176e-05, "loss": 0.8754, "step": 249400 }, { "epoch": 2.54, "learning_rate": 5.717400048954251e-05, "loss": 0.9261, "step": 249500 }, { "epoch": 2.54, "learning_rate": 5.717118913322537e-05, "loss": 0.8122, "step": 249600 }, { "epoch": 2.54, "learning_rate": 5.716837644839923e-05, "loss": 0.8972, "step": 249700 }, { "epoch": 2.55, "learning_rate": 5.716556243520163e-05, "loss": 0.8242, "step": 249800 }, { "epoch": 2.55, "learning_rate": 5.716274709377012e-05, "loss": 0.7834, "step": 249900 }, { "epoch": 2.55, "learning_rate": 5.71599304242424e-05, "loss": 0.7136, "step": 250000 }, { "epoch": 2.55, "learning_rate": 5.7157112426756146e-05, "loss": 0.7837, "step": 250100 }, { "epoch": 2.55, "learning_rate": 5.715429310144917e-05, "loss": 0.8747, "step": 250200 }, { "epoch": 2.55, "learning_rate": 5.7151472448459304e-05, "loss": 0.8101, "step": 250300 }, { "epoch": 2.55, "learning_rate": 5.714865046792447e-05, "loss": 0.8779, "step": 250400 }, { "epoch": 2.55, "learning_rate": 5.7145827159982635e-05, "loss": 0.7656, "step": 250500 }, { "epoch": 2.55, "learning_rate": 5.714300252477186e-05, "loss": 0.8196, "step": 250600 }, { "epoch": 2.55, "learning_rate": 5.714017656243024e-05, "loss": 0.8955, "step": 250700 }, { "epoch": 2.56, "learning_rate": 5.7137349273095945e-05, "loss": 0.8981, "step": 250800 }, { "epoch": 2.56, "learning_rate": 5.713452065690723e-05, "loss": 0.7695, "step": 250900 }, { "epoch": 2.56, "learning_rate": 5.713169071400237e-05, "loss": 0.7515, "step": 251000 }, { "epoch": 2.56, "learning_rate": 5.7128859444519756e-05, "loss": 0.9089, "step": 251100 }, { "epoch": 2.56, "learning_rate": 5.712602684859781e-05, "loss": 1.0361, "step": 251200 }, { "epoch": 2.56, "learning_rate": 5.7123192926375025e-05, "loss": 0.8626, "step": 251300 }, { "epoch": 2.56, "learning_rate": 5.712035767798998e-05, "loss": 0.7959, "step": 251400 }, { "epoch": 2.56, "learning_rate": 5.711752110358129e-05, "loss": 0.8262, "step": 251500 }, { "epoch": 2.56, "learning_rate": 5.7114683203287634e-05, "loss": 0.7986, "step": 251600 }, { "epoch": 2.56, "learning_rate": 5.711184397724779e-05, "loss": 0.7732, "step": 251700 }, { "epoch": 2.57, "learning_rate": 5.710903183767834e-05, "loss": 0.7471, "step": 251800 }, { "epoch": 2.57, "learning_rate": 5.710618997381663e-05, "loss": 0.8087, "step": 251900 }, { "epoch": 2.57, "learning_rate": 5.710337522307584e-05, "loss": 0.7915, "step": 252000 }, { "epoch": 2.57, "learning_rate": 5.710053072194253e-05, "loss": 0.921, "step": 252100 }, { "epoch": 2.57, "learning_rate": 5.709768489575497e-05, "loss": 0.8942, "step": 252200 }, { "epoch": 2.57, "learning_rate": 5.709483774465234e-05, "loss": 0.9489, "step": 252300 }, { "epoch": 2.57, "learning_rate": 5.709198926877384e-05, "loss": 0.9265, "step": 252400 }, { "epoch": 2.57, "learning_rate": 5.708916797282039e-05, "loss": 0.8836, "step": 252500 }, { "epoch": 2.57, "learning_rate": 5.708631686105231e-05, "loss": 0.8376, "step": 252600 }, { "epoch": 2.57, "learning_rate": 5.708346442492498e-05, "loss": 0.8775, "step": 252700 }, { "epoch": 2.58, "learning_rate": 5.708061066457787e-05, "loss": 0.9899, "step": 252800 }, { "epoch": 2.58, "learning_rate": 5.70777555801505e-05, "loss": 0.882, "step": 252900 }, { "epoch": 2.58, "learning_rate": 5.707489917178248e-05, "loss": 0.9497, "step": 253000 }, { "epoch": 2.58, "learning_rate": 5.707204143961346e-05, "loss": 0.9415, "step": 253100 }, { "epoch": 2.58, "learning_rate": 5.7069182383783164e-05, "loss": 0.9093, "step": 253200 }, { "epoch": 2.58, "learning_rate": 5.7066322004431405e-05, "loss": 0.7644, "step": 253300 }, { "epoch": 2.58, "learning_rate": 5.7063460301698015e-05, "loss": 0.7724, "step": 253400 }, { "epoch": 2.58, "learning_rate": 5.7060597275722925e-05, "loss": 0.9029, "step": 253500 }, { "epoch": 2.58, "learning_rate": 5.705773292664612e-05, "loss": 0.9727, "step": 253600 }, { "epoch": 2.58, "learning_rate": 5.705486725460764e-05, "loss": 0.7883, "step": 253700 }, { "epoch": 2.59, "learning_rate": 5.705200025974761e-05, "loss": 0.8152, "step": 253800 }, { "epoch": 2.59, "learning_rate": 5.70491319422062e-05, "loss": 0.8392, "step": 253900 }, { "epoch": 2.59, "learning_rate": 5.704626230212367e-05, "loss": 0.8581, "step": 254000 }, { "epoch": 2.59, "learning_rate": 5.704339133964031e-05, "loss": 0.9669, "step": 254100 }, { "epoch": 2.59, "learning_rate": 5.70405190548965e-05, "loss": 0.82, "step": 254200 }, { "epoch": 2.59, "learning_rate": 5.703764544803268e-05, "loss": 0.8998, "step": 254300 }, { "epoch": 2.59, "learning_rate": 5.703477051918935e-05, "loss": 0.8175, "step": 254400 }, { "epoch": 2.59, "learning_rate": 5.7031894268507075e-05, "loss": 0.797, "step": 254500 }, { "epoch": 2.59, "learning_rate": 5.7029045478392244e-05, "loss": 0.8205, "step": 254600 }, { "epoch": 2.59, "learning_rate": 5.702616659766891e-05, "loss": 0.833, "step": 254700 }, { "epoch": 2.6, "learning_rate": 5.702328639552733e-05, "loss": 0.8664, "step": 254800 }, { "epoch": 2.6, "learning_rate": 5.7020404872108305e-05, "loss": 0.8918, "step": 254900 }, { "epoch": 2.6, "learning_rate": 5.7017522027552736e-05, "loss": 0.8842, "step": 255000 }, { "epoch": 2.6, "learning_rate": 5.701463786200156e-05, "loss": 0.853, "step": 255100 }, { "epoch": 2.6, "learning_rate": 5.701175237559582e-05, "loss": 0.8193, "step": 255200 }, { "epoch": 2.6, "learning_rate": 5.700886556847659e-05, "loss": 0.8422, "step": 255300 }, { "epoch": 2.6, "learning_rate": 5.7006006328598286e-05, "loss": 0.8899, "step": 255400 }, { "epoch": 2.6, "learning_rate": 5.700311689367918e-05, "loss": 0.8576, "step": 255500 }, { "epoch": 2.6, "learning_rate": 5.70002261384688e-05, "loss": 0.8841, "step": 255600 }, { "epoch": 2.61, "learning_rate": 5.69973340631085e-05, "loss": 1.0285, "step": 255700 }, { "epoch": 2.61, "learning_rate": 5.6994440667739674e-05, "loss": 0.8977, "step": 255800 }, { "epoch": 2.61, "learning_rate": 5.6991545952503783e-05, "loss": 0.9608, "step": 255900 }, { "epoch": 2.61, "learning_rate": 5.698864991754238e-05, "loss": 0.9759, "step": 256000 }, { "epoch": 2.61, "learning_rate": 5.698575256299706e-05, "loss": 0.9022, "step": 256100 }, { "epoch": 2.61, "learning_rate": 5.698285388900947e-05, "loss": 0.9024, "step": 256200 }, { "epoch": 2.61, "learning_rate": 5.697995389572137e-05, "loss": 0.8635, "step": 256300 }, { "epoch": 2.61, "learning_rate": 5.697705258327451e-05, "loss": 1.011, "step": 256400 }, { "epoch": 2.61, "learning_rate": 5.697414995181079e-05, "loss": 0.9641, "step": 256500 }, { "epoch": 2.61, "learning_rate": 5.6971246001472096e-05, "loss": 0.9, "step": 256600 }, { "epoch": 2.62, "learning_rate": 5.696834073240044e-05, "loss": 0.9274, "step": 256700 }, { "epoch": 2.62, "learning_rate": 5.696543414473785e-05, "loss": 0.9429, "step": 256800 }, { "epoch": 2.62, "learning_rate": 5.696252623862646e-05, "loss": 0.8981, "step": 256900 }, { "epoch": 2.62, "learning_rate": 5.695961701420843e-05, "loss": 0.9315, "step": 257000 }, { "epoch": 2.62, "learning_rate": 5.695670647162603e-05, "loss": 0.9233, "step": 257100 }, { "epoch": 2.62, "learning_rate": 5.695379461102154e-05, "loss": 1.0118, "step": 257200 }, { "epoch": 2.62, "learning_rate": 5.695088143253735e-05, "loss": 0.9831, "step": 257300 }, { "epoch": 2.62, "learning_rate": 5.694799608780044e-05, "loss": 0.9721, "step": 257400 }, { "epoch": 2.62, "learning_rate": 5.694508028715945e-05, "loss": 0.8437, "step": 257500 }, { "epoch": 2.62, "learning_rate": 5.694216316906485e-05, "loss": 0.8963, "step": 257600 }, { "epoch": 2.63, "learning_rate": 5.6939244733659246e-05, "loss": 0.887, "step": 257700 }, { "epoch": 2.63, "learning_rate": 5.6936324981085335e-05, "loss": 0.9561, "step": 257800 }, { "epoch": 2.63, "learning_rate": 5.693340391148588e-05, "loss": 0.9189, "step": 257900 }, { "epoch": 2.63, "learning_rate": 5.693048152500371e-05, "loss": 0.9984, "step": 258000 }, { "epoch": 2.63, "learning_rate": 5.69275578217817e-05, "loss": 1.0144, "step": 258100 }, { "epoch": 2.63, "learning_rate": 5.692463280196281e-05, "loss": 0.8798, "step": 258200 }, { "epoch": 2.63, "learning_rate": 5.692170646569006e-05, "loss": 0.9476, "step": 258300 }, { "epoch": 2.63, "learning_rate": 5.691877881310652e-05, "loss": 0.9003, "step": 258400 }, { "epoch": 2.63, "learning_rate": 5.6915849844355336e-05, "loss": 1.0298, "step": 258500 }, { "epoch": 2.63, "learning_rate": 5.691291955957973e-05, "loss": 0.8374, "step": 258600 }, { "epoch": 2.64, "learning_rate": 5.690998795892295e-05, "loss": 0.971, "step": 258700 }, { "epoch": 2.64, "learning_rate": 5.690705504252836e-05, "loss": 0.9238, "step": 258800 }, { "epoch": 2.64, "learning_rate": 5.690412081053935e-05, "loss": 1.0069, "step": 258900 }, { "epoch": 2.64, "learning_rate": 5.6901185263099386e-05, "loss": 0.9382, "step": 259000 }, { "epoch": 2.64, "learning_rate": 5.6898248400351994e-05, "loss": 0.9315, "step": 259100 }, { "epoch": 2.64, "learning_rate": 5.6895310222440784e-05, "loss": 0.9887, "step": 259200 }, { "epoch": 2.64, "learning_rate": 5.689237072950941e-05, "loss": 0.8713, "step": 259300 }, { "epoch": 2.64, "learning_rate": 5.688942992170158e-05, "loss": 0.891, "step": 259400 }, { "epoch": 2.64, "learning_rate": 5.68864877991611e-05, "loss": 0.9766, "step": 259500 }, { "epoch": 2.64, "learning_rate": 5.688354436203183e-05, "loss": 0.8095, "step": 259600 }, { "epoch": 2.65, "learning_rate": 5.688059961045766e-05, "loss": 0.8364, "step": 259700 }, { "epoch": 2.65, "learning_rate": 5.6877653544582585e-05, "loss": 0.9632, "step": 259800 }, { "epoch": 2.65, "learning_rate": 5.687470616455065e-05, "loss": 0.7766, "step": 259900 }, { "epoch": 2.65, "learning_rate": 5.687175747050597e-05, "loss": 0.9271, "step": 260000 }, { "epoch": 2.65, "learning_rate": 5.68688074625927e-05, "loss": 0.8942, "step": 260100 }, { "epoch": 2.65, "learning_rate": 5.68658561409551e-05, "loss": 0.7587, "step": 260200 }, { "epoch": 2.65, "learning_rate": 5.6862903505737456e-05, "loss": 0.8526, "step": 260300 }, { "epoch": 2.65, "learning_rate": 5.6859949557084145e-05, "loss": 0.9299, "step": 260400 }, { "epoch": 2.65, "learning_rate": 5.685699429513958e-05, "loss": 0.848, "step": 260500 }, { "epoch": 2.66, "learning_rate": 5.685403772004828e-05, "loss": 0.9296, "step": 260600 }, { "epoch": 2.66, "learning_rate": 5.685107983195479e-05, "loss": 0.9977, "step": 260700 }, { "epoch": 2.66, "learning_rate": 5.684812063100374e-05, "loss": 0.8459, "step": 260800 }, { "epoch": 2.66, "learning_rate": 5.68451601173398e-05, "loss": 0.8624, "step": 260900 }, { "epoch": 2.66, "learning_rate": 5.684219829110774e-05, "loss": 0.8767, "step": 261000 }, { "epoch": 2.66, "learning_rate": 5.6839235152452377e-05, "loss": 0.8299, "step": 261100 }, { "epoch": 2.66, "learning_rate": 5.683627070151856e-05, "loss": 0.9453, "step": 261200 }, { "epoch": 2.66, "learning_rate": 5.683330493845128e-05, "loss": 0.9189, "step": 261300 }, { "epoch": 2.66, "learning_rate": 5.6830337863395515e-05, "loss": 0.8097, "step": 261400 }, { "epoch": 2.66, "learning_rate": 5.6827369476496344e-05, "loss": 0.845, "step": 261500 }, { "epoch": 2.67, "learning_rate": 5.68243997778989e-05, "loss": 0.7803, "step": 261600 }, { "epoch": 2.67, "learning_rate": 5.682142876774839e-05, "loss": 0.8154, "step": 261700 }, { "epoch": 2.67, "learning_rate": 5.6818456446190066e-05, "loss": 0.826, "step": 261800 }, { "epoch": 2.67, "learning_rate": 5.681548281336927e-05, "loss": 0.8515, "step": 261900 }, { "epoch": 2.67, "learning_rate": 5.6812507869431396e-05, "loss": 0.9092, "step": 262000 }, { "epoch": 2.67, "learning_rate": 5.68095316145219e-05, "loss": 0.8429, "step": 262100 }, { "epoch": 2.67, "learning_rate": 5.6806554048786286e-05, "loss": 0.7835, "step": 262200 }, { "epoch": 2.67, "learning_rate": 5.680357517237016e-05, "loss": 0.7801, "step": 262300 }, { "epoch": 2.67, "learning_rate": 5.680059498541916e-05, "loss": 0.8571, "step": 262400 }, { "epoch": 2.67, "learning_rate": 5.679761348807901e-05, "loss": 0.791, "step": 262500 }, { "epoch": 2.68, "learning_rate": 5.679463068049547e-05, "loss": 0.8853, "step": 262600 }, { "epoch": 2.68, "learning_rate": 5.6791646562814395e-05, "loss": 0.8014, "step": 262700 }, { "epoch": 2.68, "learning_rate": 5.678866113518169e-05, "loss": 0.8094, "step": 262800 }, { "epoch": 2.68, "learning_rate": 5.678567439774332e-05, "loss": 0.8718, "step": 262900 }, { "epoch": 2.68, "learning_rate": 5.6782686350645324e-05, "loss": 0.9101, "step": 263000 }, { "epoch": 2.68, "learning_rate": 5.677969699403379e-05, "loss": 0.8268, "step": 263100 }, { "epoch": 2.68, "learning_rate": 5.6776706328054896e-05, "loss": 0.7762, "step": 263200 }, { "epoch": 2.68, "learning_rate": 5.6773714352854844e-05, "loss": 0.8332, "step": 263300 }, { "epoch": 2.68, "learning_rate": 5.677072106857995e-05, "loss": 0.7458, "step": 263400 }, { "epoch": 2.68, "learning_rate": 5.676772647537655e-05, "loss": 0.8379, "step": 263500 }, { "epoch": 2.69, "learning_rate": 5.6764730573391064e-05, "loss": 0.8777, "step": 263600 }, { "epoch": 2.69, "learning_rate": 5.6761733362769974e-05, "loss": 0.8984, "step": 263700 }, { "epoch": 2.69, "learning_rate": 5.6758734843659834e-05, "loss": 0.8848, "step": 263800 }, { "epoch": 2.69, "learning_rate": 5.675573501620725e-05, "loss": 0.8055, "step": 263900 }, { "epoch": 2.69, "learning_rate": 5.675273388055889e-05, "loss": 0.9445, "step": 264000 }, { "epoch": 2.69, "learning_rate": 5.674973143686149e-05, "loss": 0.8991, "step": 264100 }, { "epoch": 2.69, "learning_rate": 5.674672768526187e-05, "loss": 0.856, "step": 264200 }, { "epoch": 2.69, "learning_rate": 5.674372262590687e-05, "loss": 0.9428, "step": 264300 }, { "epoch": 2.69, "learning_rate": 5.6740716258943434e-05, "loss": 0.8495, "step": 264400 }, { "epoch": 2.69, "learning_rate": 5.6737708584518564e-05, "loss": 0.9197, "step": 264500 }, { "epoch": 2.7, "learning_rate": 5.6734699602779296e-05, "loss": 0.7676, "step": 264600 }, { "epoch": 2.7, "learning_rate": 5.673168931387276e-05, "loss": 0.8718, "step": 264700 }, { "epoch": 2.7, "learning_rate": 5.672867771794615e-05, "loss": 0.8502, "step": 264800 }, { "epoch": 2.7, "learning_rate": 5.67256648151467e-05, "loss": 0.896, "step": 264900 }, { "epoch": 2.7, "learning_rate": 5.672265060562174e-05, "loss": 0.8119, "step": 265000 }, { "epoch": 2.7, "learning_rate": 5.671963508951863e-05, "loss": 0.8941, "step": 265100 }, { "epoch": 2.7, "learning_rate": 5.6716618266984827e-05, "loss": 0.7739, "step": 265200 }, { "epoch": 2.7, "learning_rate": 5.6713600138167816e-05, "loss": 0.9141, "step": 265300 }, { "epoch": 2.7, "learning_rate": 5.67106109040296e-05, "loss": 0.9213, "step": 265400 }, { "epoch": 2.7, "learning_rate": 5.670759017614812e-05, "loss": 0.8806, "step": 265500 }, { "epoch": 2.71, "learning_rate": 5.6704568142424855e-05, "loss": 0.9328, "step": 265600 }, { "epoch": 2.71, "learning_rate": 5.670154480300758e-05, "loss": 0.8146, "step": 265700 }, { "epoch": 2.71, "learning_rate": 5.669852015804412e-05, "loss": 0.9284, "step": 265800 }, { "epoch": 2.71, "learning_rate": 5.6695494207682345e-05, "loss": 0.7828, "step": 265900 }, { "epoch": 2.71, "learning_rate": 5.669246695207021e-05, "loss": 0.8531, "step": 266000 }, { "epoch": 2.71, "learning_rate": 5.6689438391355734e-05, "loss": 0.7673, "step": 266100 }, { "epoch": 2.71, "learning_rate": 5.6686408525687e-05, "loss": 0.9254, "step": 266200 }, { "epoch": 2.71, "learning_rate": 5.668337735521214e-05, "loss": 0.9133, "step": 266300 }, { "epoch": 2.71, "learning_rate": 5.668034488007937e-05, "loss": 0.897, "step": 266400 }, { "epoch": 2.72, "learning_rate": 5.667731110043695e-05, "loss": 0.7841, "step": 266500 }, { "epoch": 2.72, "learning_rate": 5.667430637372935e-05, "loss": 0.7991, "step": 266600 }, { "epoch": 2.72, "learning_rate": 5.667126999855411e-05, "loss": 0.8795, "step": 266700 }, { "epoch": 2.72, "learning_rate": 5.666823231931293e-05, "loss": 0.8652, "step": 266800 }, { "epoch": 2.72, "learning_rate": 5.666519333615433e-05, "loss": 0.9181, "step": 266900 }, { "epoch": 2.72, "learning_rate": 5.666215304922691e-05, "loss": 1.0623, "step": 267000 }, { "epoch": 2.72, "learning_rate": 5.6659111458679316e-05, "loss": 0.8177, "step": 267100 }, { "epoch": 2.72, "learning_rate": 5.6656068564660264e-05, "loss": 0.7996, "step": 267200 }, { "epoch": 2.72, "learning_rate": 5.665302436731853e-05, "loss": 0.806, "step": 267300 }, { "epoch": 2.72, "learning_rate": 5.664997886680296e-05, "loss": 0.8959, "step": 267400 }, { "epoch": 2.73, "learning_rate": 5.6646932063262474e-05, "loss": 0.8346, "step": 267500 }, { "epoch": 2.73, "learning_rate": 5.664388395684603e-05, "loss": 0.828, "step": 267600 }, { "epoch": 2.73, "learning_rate": 5.664083454770266e-05, "loss": 0.9029, "step": 267700 }, { "epoch": 2.73, "learning_rate": 5.663778383598146e-05, "loss": 0.7655, "step": 267800 }, { "epoch": 2.73, "learning_rate": 5.66347318218316e-05, "loss": 0.8944, "step": 267900 }, { "epoch": 2.73, "learning_rate": 5.6631678505402295e-05, "loss": 0.9142, "step": 268000 }, { "epoch": 2.73, "learning_rate": 5.662862388684285e-05, "loss": 0.9283, "step": 268100 }, { "epoch": 2.73, "learning_rate": 5.6625567966302605e-05, "loss": 0.7991, "step": 268200 }, { "epoch": 2.73, "learning_rate": 5.6622510743930975e-05, "loss": 0.9507, "step": 268300 }, { "epoch": 2.73, "learning_rate": 5.6619452219877445e-05, "loss": 0.752, "step": 268400 }, { "epoch": 2.74, "learning_rate": 5.661639239429157e-05, "loss": 0.902, "step": 268500 }, { "epoch": 2.74, "learning_rate": 5.6613331267322934e-05, "loss": 0.8358, "step": 268600 }, { "epoch": 2.74, "learning_rate": 5.661026883912122e-05, "loss": 0.8998, "step": 268700 }, { "epoch": 2.74, "learning_rate": 5.660723575356889e-05, "loss": 0.883, "step": 268800 }, { "epoch": 2.74, "learning_rate": 5.660417073635888e-05, "loss": 0.9245, "step": 268900 }, { "epoch": 2.74, "learning_rate": 5.660110441836369e-05, "loss": 0.8388, "step": 269000 }, { "epoch": 2.74, "learning_rate": 5.659803679973323e-05, "loss": 0.8983, "step": 269100 }, { "epoch": 2.74, "learning_rate": 5.659496788061751e-05, "loss": 0.9053, "step": 269200 }, { "epoch": 2.74, "learning_rate": 5.659189766116657e-05, "loss": 0.8712, "step": 269300 }, { "epoch": 2.74, "learning_rate": 5.6588826141530526e-05, "loss": 0.7831, "step": 269400 }, { "epoch": 2.75, "learning_rate": 5.658575332185956e-05, "loss": 0.8285, "step": 269500 }, { "epoch": 2.75, "learning_rate": 5.65826792023039e-05, "loss": 0.7277, "step": 269600 }, { "epoch": 2.75, "learning_rate": 5.6579603783013876e-05, "loss": 0.7775, "step": 269700 }, { "epoch": 2.75, "learning_rate": 5.6576527064139845e-05, "loss": 0.851, "step": 269800 }, { "epoch": 2.75, "learning_rate": 5.657344904583224e-05, "loss": 0.8398, "step": 269900 }, { "epoch": 2.75, "learning_rate": 5.657036972824155e-05, "loss": 0.826, "step": 270000 }, { "epoch": 2.75, "learning_rate": 5.656728911151835e-05, "loss": 0.972, "step": 270100 }, { "epoch": 2.75, "learning_rate": 5.656420719581326e-05, "loss": 0.9173, "step": 270200 }, { "epoch": 2.75, "learning_rate": 5.656112398127696e-05, "loss": 0.9713, "step": 270300 }, { "epoch": 2.75, "learning_rate": 5.655803946806021e-05, "loss": 0.8209, "step": 270400 }, { "epoch": 2.76, "learning_rate": 5.6554953656313824e-05, "loss": 0.8731, "step": 270500 }, { "epoch": 2.76, "learning_rate": 5.6551866546188674e-05, "loss": 0.8399, "step": 270600 }, { "epoch": 2.76, "learning_rate": 5.65487781378357e-05, "loss": 0.8209, "step": 270700 }, { "epoch": 2.76, "learning_rate": 5.654568843140591e-05, "loss": 0.8956, "step": 270800 }, { "epoch": 2.76, "learning_rate": 5.654259742705037e-05, "loss": 0.7214, "step": 270900 }, { "epoch": 2.76, "learning_rate": 5.653950512492022e-05, "loss": 0.8488, "step": 271000 }, { "epoch": 2.76, "learning_rate": 5.653641152516665e-05, "loss": 0.8523, "step": 271100 }, { "epoch": 2.76, "learning_rate": 5.653331662794091e-05, "loss": 0.8286, "step": 271200 }, { "epoch": 2.76, "learning_rate": 5.653022043339433e-05, "loss": 0.8478, "step": 271300 }, { "epoch": 2.77, "learning_rate": 5.6527122941678305e-05, "loss": 0.91, "step": 271400 }, { "epoch": 2.77, "learning_rate": 5.652402415294426e-05, "loss": 0.7761, "step": 271500 }, { "epoch": 2.77, "learning_rate": 5.652092406734373e-05, "loss": 0.8504, "step": 271600 }, { "epoch": 2.77, "learning_rate": 5.6517853705269665e-05, "loss": 0.8262, "step": 271700 }, { "epoch": 2.77, "learning_rate": 5.651475103935582e-05, "loss": 0.7957, "step": 271800 }, { "epoch": 2.77, "learning_rate": 5.651164707702888e-05, "loss": 0.8826, "step": 271900 }, { "epoch": 2.77, "learning_rate": 5.6508541818440606e-05, "loss": 0.8813, "step": 272000 }, { "epoch": 2.77, "learning_rate": 5.650543526374284e-05, "loss": 0.9414, "step": 272100 }, { "epoch": 2.77, "learning_rate": 5.6502327413087455e-05, "loss": 0.8849, "step": 272200 }, { "epoch": 2.77, "learning_rate": 5.649921826662643e-05, "loss": 0.8659, "step": 272300 }, { "epoch": 2.78, "learning_rate": 5.649610782451176e-05, "loss": 0.8527, "step": 272400 }, { "epoch": 2.78, "learning_rate": 5.649299608689554e-05, "loss": 0.7676, "step": 272500 }, { "epoch": 2.78, "learning_rate": 5.648988305392991e-05, "loss": 0.9003, "step": 272600 }, { "epoch": 2.78, "learning_rate": 5.648679987545944e-05, "loss": 0.9116, "step": 272700 }, { "epoch": 2.78, "learning_rate": 5.6483684265201386e-05, "loss": 0.8178, "step": 272800 }, { "epoch": 2.78, "learning_rate": 5.648056736004922e-05, "loss": 0.8899, "step": 272900 }, { "epoch": 2.78, "learning_rate": 5.647744916015534e-05, "loss": 0.9139, "step": 273000 }, { "epoch": 2.78, "learning_rate": 5.64743296656722e-05, "loss": 0.8453, "step": 273100 }, { "epoch": 2.78, "learning_rate": 5.647120887675233e-05, "loss": 1.0177, "step": 273200 }, { "epoch": 2.78, "learning_rate": 5.646808679354831e-05, "loss": 0.9189, "step": 273300 }, { "epoch": 2.79, "learning_rate": 5.6464963416212815e-05, "loss": 0.8525, "step": 273400 }, { "epoch": 2.79, "learning_rate": 5.6461838744898534e-05, "loss": 0.8275, "step": 273500 }, { "epoch": 2.79, "learning_rate": 5.645871277975826e-05, "loss": 0.8679, "step": 273600 }, { "epoch": 2.79, "learning_rate": 5.645558552094482e-05, "loss": 0.8387, "step": 273700 }, { "epoch": 2.79, "learning_rate": 5.645245696861113e-05, "loss": 0.885, "step": 273800 }, { "epoch": 2.79, "learning_rate": 5.644932712291016e-05, "loss": 0.9016, "step": 273900 }, { "epoch": 2.79, "learning_rate": 5.644619598399493e-05, "loss": 0.91, "step": 274000 }, { "epoch": 2.79, "learning_rate": 5.644306355201854e-05, "loss": 0.966, "step": 274100 }, { "epoch": 2.79, "learning_rate": 5.6439929827134145e-05, "loss": 0.8628, "step": 274200 }, { "epoch": 2.79, "learning_rate": 5.643679480949497e-05, "loss": 0.8955, "step": 274300 }, { "epoch": 2.8, "learning_rate": 5.643365849925429e-05, "loss": 0.8976, "step": 274400 }, { "epoch": 2.8, "learning_rate": 5.6430520896565465e-05, "loss": 0.8053, "step": 274500 }, { "epoch": 2.8, "learning_rate": 5.642738200158189e-05, "loss": 0.8382, "step": 274600 }, { "epoch": 2.8, "learning_rate": 5.6424241814457056e-05, "loss": 0.8375, "step": 274700 }, { "epoch": 2.8, "learning_rate": 5.642110033534448e-05, "loss": 0.8948, "step": 274800 }, { "epoch": 2.8, "learning_rate": 5.641795756439776e-05, "loss": 0.8199, "step": 274900 }, { "epoch": 2.8, "learning_rate": 5.6414813501770584e-05, "loss": 0.8989, "step": 275000 }, { "epoch": 2.8, "learning_rate": 5.641166814761667e-05, "loss": 1.0718, "step": 275100 }, { "epoch": 2.8, "learning_rate": 5.640852150208978e-05, "loss": 0.8265, "step": 275200 }, { "epoch": 2.8, "learning_rate": 5.64053735653438e-05, "loss": 0.9047, "step": 275300 }, { "epoch": 2.81, "learning_rate": 5.640222433753263e-05, "loss": 0.8471, "step": 275400 }, { "epoch": 2.81, "learning_rate": 5.639907381881024e-05, "loss": 0.9006, "step": 275500 }, { "epoch": 2.81, "learning_rate": 5.63959220093307e-05, "loss": 0.9087, "step": 275600 }, { "epoch": 2.81, "learning_rate": 5.6392768909248085e-05, "loss": 0.9438, "step": 275700 }, { "epoch": 2.81, "learning_rate": 5.638961451871657e-05, "loss": 0.9513, "step": 275800 }, { "epoch": 2.81, "learning_rate": 5.6386458837890404e-05, "loss": 0.9192, "step": 275900 }, { "epoch": 2.81, "learning_rate": 5.638330186692385e-05, "loss": 0.9952, "step": 276000 }, { "epoch": 2.81, "learning_rate": 5.638014360597129e-05, "loss": 0.9226, "step": 276100 }, { "epoch": 2.81, "learning_rate": 5.637698405518714e-05, "loss": 0.9865, "step": 276200 }, { "epoch": 2.81, "learning_rate": 5.6373823214725876e-05, "loss": 1.042, "step": 276300 }, { "epoch": 2.82, "learning_rate": 5.637066108474204e-05, "loss": 0.8512, "step": 276400 }, { "epoch": 2.82, "learning_rate": 5.6367497665390266e-05, "loss": 0.8467, "step": 276500 }, { "epoch": 2.82, "learning_rate": 5.6364332956825194e-05, "loss": 0.8368, "step": 276600 }, { "epoch": 2.82, "learning_rate": 5.636116695920158e-05, "loss": 0.907, "step": 276700 }, { "epoch": 2.82, "learning_rate": 5.6357999672674226e-05, "loss": 0.8802, "step": 276800 }, { "epoch": 2.82, "learning_rate": 5.635483109739797e-05, "loss": 0.9079, "step": 276900 }, { "epoch": 2.82, "learning_rate": 5.6351661233527754e-05, "loss": 0.9171, "step": 277000 }, { "epoch": 2.82, "learning_rate": 5.6348490081218564e-05, "loss": 0.9014, "step": 277100 }, { "epoch": 2.82, "learning_rate": 5.6345317640625455e-05, "loss": 0.8428, "step": 277200 }, { "epoch": 2.83, "learning_rate": 5.6342143911903516e-05, "loss": 0.9874, "step": 277300 }, { "epoch": 2.83, "learning_rate": 5.633896889520796e-05, "loss": 0.9019, "step": 277400 }, { "epoch": 2.83, "learning_rate": 5.633579259069399e-05, "loss": 0.8927, "step": 277500 }, { "epoch": 2.83, "learning_rate": 5.6332614998516945e-05, "loss": 0.8153, "step": 277600 }, { "epoch": 2.83, "learning_rate": 5.6329436118832165e-05, "loss": 0.9364, "step": 277700 }, { "epoch": 2.83, "learning_rate": 5.632625595179509e-05, "loss": 0.9679, "step": 277800 }, { "epoch": 2.83, "learning_rate": 5.6323074497561185e-05, "loss": 0.9408, "step": 277900 }, { "epoch": 2.83, "learning_rate": 5.631989175628604e-05, "loss": 0.9853, "step": 278000 }, { "epoch": 2.83, "learning_rate": 5.631670772812526e-05, "loss": 0.8934, "step": 278100 }, { "epoch": 2.83, "learning_rate": 5.631352241323452e-05, "loss": 0.9214, "step": 278200 }, { "epoch": 2.84, "learning_rate": 5.631033581176956e-05, "loss": 0.8778, "step": 278300 }, { "epoch": 2.84, "learning_rate": 5.6307147923886195e-05, "loss": 0.8867, "step": 278400 }, { "epoch": 2.84, "learning_rate": 5.630395874974029e-05, "loss": 0.8421, "step": 278500 }, { "epoch": 2.84, "learning_rate": 5.6300768289487774e-05, "loss": 0.847, "step": 278600 }, { "epoch": 2.84, "learning_rate": 5.629757654328464e-05, "loss": 0.7722, "step": 278700 }, { "epoch": 2.84, "learning_rate": 5.629438351128695e-05, "loss": 0.8098, "step": 278800 }, { "epoch": 2.84, "learning_rate": 5.629118919365083e-05, "loss": 0.9551, "step": 278900 }, { "epoch": 2.84, "learning_rate": 5.628799359053245e-05, "loss": 0.8167, "step": 279000 }, { "epoch": 2.84, "learning_rate": 5.628482867733436e-05, "loss": 0.8282, "step": 279100 }, { "epoch": 2.84, "learning_rate": 5.6281662504539915e-05, "loss": 0.7864, "step": 279200 }, { "epoch": 2.85, "learning_rate": 5.627846307161123e-05, "loss": 0.8998, "step": 279300 }, { "epoch": 2.85, "learning_rate": 5.6275262353822524e-05, "loss": 0.8947, "step": 279400 }, { "epoch": 2.85, "learning_rate": 5.627206035133029e-05, "loss": 0.8634, "step": 279500 }, { "epoch": 2.85, "learning_rate": 5.62688570642911e-05, "loss": 0.9087, "step": 279600 }, { "epoch": 2.85, "learning_rate": 5.6265652492861565e-05, "loss": 0.8074, "step": 279700 }, { "epoch": 2.85, "learning_rate": 5.6262446637198354e-05, "loss": 0.8247, "step": 279800 }, { "epoch": 2.85, "learning_rate": 5.6259239497458233e-05, "loss": 0.8478, "step": 279900 }, { "epoch": 2.85, "learning_rate": 5.6256031073798006e-05, "loss": 0.8564, "step": 280000 }, { "epoch": 2.85, "learning_rate": 5.6252821366374556e-05, "loss": 0.8486, "step": 280100 }, { "epoch": 2.85, "learning_rate": 5.6249610375344793e-05, "loss": 0.8445, "step": 280200 }, { "epoch": 2.86, "learning_rate": 5.624639810086575e-05, "loss": 0.9499, "step": 280300 }, { "epoch": 2.86, "learning_rate": 5.6243184543094466e-05, "loss": 0.9415, "step": 280400 }, { "epoch": 2.86, "learning_rate": 5.623996970218806e-05, "loss": 0.9072, "step": 280500 }, { "epoch": 2.86, "learning_rate": 5.623675357830374e-05, "loss": 0.8137, "step": 280600 }, { "epoch": 2.86, "learning_rate": 5.623353617159874e-05, "loss": 0.8468, "step": 280700 }, { "epoch": 2.86, "learning_rate": 5.623031748223037e-05, "loss": 0.9591, "step": 280800 }, { "epoch": 2.86, "learning_rate": 5.6227097510356013e-05, "loss": 0.9549, "step": 280900 }, { "epoch": 2.86, "learning_rate": 5.62238762561331e-05, "loss": 0.8785, "step": 281000 }, { "epoch": 2.86, "learning_rate": 5.622065371971914e-05, "loss": 0.9843, "step": 281100 }, { "epoch": 2.86, "learning_rate": 5.621742990127167e-05, "loss": 0.9294, "step": 281200 }, { "epoch": 2.87, "learning_rate": 5.621423705829636e-05, "loss": 1.0382, "step": 281300 }, { "epoch": 2.87, "learning_rate": 5.6211010689071265e-05, "loss": 1.1271, "step": 281400 }, { "epoch": 2.87, "learning_rate": 5.6207815321135244e-05, "loss": 1.1141, "step": 281500 }, { "epoch": 2.87, "learning_rate": 5.6204586401757216e-05, "loss": 0.9543, "step": 281600 }, { "epoch": 2.87, "learning_rate": 5.62013562011313e-05, "loss": 1.0317, "step": 281700 }, { "epoch": 2.87, "learning_rate": 5.619812471941542e-05, "loss": 0.8983, "step": 281800 }, { "epoch": 2.87, "learning_rate": 5.619489195676757e-05, "loss": 0.9502, "step": 281900 }, { "epoch": 2.87, "learning_rate": 5.6191657913345844e-05, "loss": 0.9722, "step": 282000 }, { "epoch": 2.87, "learning_rate": 5.6188422589308346e-05, "loss": 0.9966, "step": 282100 }, { "epoch": 2.88, "learning_rate": 5.618518598481327e-05, "loss": 1.0188, "step": 282200 }, { "epoch": 2.88, "learning_rate": 5.618194810001886e-05, "loss": 0.9277, "step": 282300 }, { "epoch": 2.88, "learning_rate": 5.617870893508343e-05, "loss": 0.8661, "step": 282400 }, { "epoch": 2.88, "learning_rate": 5.617546849016537e-05, "loss": 0.8409, "step": 282500 }, { "epoch": 2.88, "learning_rate": 5.61722267654231e-05, "loss": 0.8358, "step": 282600 }, { "epoch": 2.88, "learning_rate": 5.6168983761015136e-05, "loss": 0.8698, "step": 282700 }, { "epoch": 2.88, "learning_rate": 5.6165739477100025e-05, "loss": 0.8788, "step": 282800 }, { "epoch": 2.88, "learning_rate": 5.616249391383641e-05, "loss": 0.8727, "step": 282900 }, { "epoch": 2.88, "learning_rate": 5.615924707138297e-05, "loss": 0.9651, "step": 283000 }, { "epoch": 2.88, "learning_rate": 5.615599894989846e-05, "loss": 0.8527, "step": 283100 }, { "epoch": 2.89, "learning_rate": 5.6152749549541684e-05, "loss": 0.9046, "step": 283200 }, { "epoch": 2.89, "learning_rate": 5.614949887047154e-05, "loss": 0.9876, "step": 283300 }, { "epoch": 2.89, "learning_rate": 5.614624691284694e-05, "loss": 1.0558, "step": 283400 }, { "epoch": 2.89, "learning_rate": 5.61429936768269e-05, "loss": 0.8692, "step": 283500 }, { "epoch": 2.89, "learning_rate": 5.613973916257049e-05, "loss": 0.8844, "step": 283600 }, { "epoch": 2.89, "learning_rate": 5.613648337023682e-05, "loss": 0.861, "step": 283700 }, { "epoch": 2.89, "learning_rate": 5.61332262999851e-05, "loss": 1.0241, "step": 283800 }, { "epoch": 2.89, "learning_rate": 5.612996795197456e-05, "loss": 0.8861, "step": 283900 }, { "epoch": 2.89, "learning_rate": 5.6126708326364526e-05, "loss": 0.7528, "step": 284000 }, { "epoch": 2.89, "learning_rate": 5.6123447423314365e-05, "loss": 0.9213, "step": 284100 }, { "epoch": 2.9, "learning_rate": 5.612018524298353e-05, "loss": 0.9137, "step": 284200 }, { "epoch": 2.9, "learning_rate": 5.611692178553151e-05, "loss": 0.7721, "step": 284300 }, { "epoch": 2.9, "learning_rate": 5.611365705111787e-05, "loss": 0.7391, "step": 284400 }, { "epoch": 2.9, "learning_rate": 5.611042370633404e-05, "loss": 0.9863, "step": 284500 }, { "epoch": 2.9, "learning_rate": 5.610715643124174e-05, "loss": 0.9222, "step": 284600 }, { "epoch": 2.9, "learning_rate": 5.610388787966529e-05, "loss": 0.9537, "step": 284700 }, { "epoch": 2.9, "learning_rate": 5.6100618051764506e-05, "loss": 0.9555, "step": 284800 }, { "epoch": 2.9, "learning_rate": 5.609734694769926e-05, "loss": 0.8755, "step": 284900 }, { "epoch": 2.9, "learning_rate": 5.609407456762948e-05, "loss": 1.0815, "step": 285000 }, { "epoch": 2.9, "learning_rate": 5.6090800911715183e-05, "loss": 1.0215, "step": 285100 }, { "epoch": 2.91, "learning_rate": 5.6087525980116416e-05, "loss": 0.9752, "step": 285200 }, { "epoch": 2.91, "learning_rate": 5.608424977299332e-05, "loss": 1.0344, "step": 285300 }, { "epoch": 2.91, "learning_rate": 5.608097229050606e-05, "loss": 0.9516, "step": 285400 }, { "epoch": 2.91, "learning_rate": 5.60776935328149e-05, "loss": 1.0518, "step": 285500 }, { "epoch": 2.91, "learning_rate": 5.6074413500080156e-05, "loss": 0.9529, "step": 285600 }, { "epoch": 2.91, "learning_rate": 5.6071132192462196e-05, "loss": 0.9231, "step": 285700 }, { "epoch": 2.91, "learning_rate": 5.6067882442254214e-05, "loss": 0.7926, "step": 285800 }, { "epoch": 2.91, "learning_rate": 5.606459859809602e-05, "loss": 0.8186, "step": 285900 }, { "epoch": 2.91, "learning_rate": 5.60613134795345e-05, "loss": 0.9389, "step": 286000 }, { "epoch": 2.91, "learning_rate": 5.605802708673028e-05, "loss": 0.9772, "step": 286100 }, { "epoch": 2.92, "learning_rate": 5.605473941984404e-05, "loss": 1.0135, "step": 286200 }, { "epoch": 2.92, "learning_rate": 5.605145047903653e-05, "loss": 0.9971, "step": 286300 }, { "epoch": 2.92, "learning_rate": 5.6048160264468555e-05, "loss": 0.9645, "step": 286400 }, { "epoch": 2.92, "learning_rate": 5.604486877630099e-05, "loss": 0.8918, "step": 286500 }, { "epoch": 2.92, "learning_rate": 5.604157601469477e-05, "loss": 1.034, "step": 286600 }, { "epoch": 2.92, "learning_rate": 5.603828197981089e-05, "loss": 0.9891, "step": 286700 }, { "epoch": 2.92, "learning_rate": 5.6034986671810416e-05, "loss": 0.9316, "step": 286800 }, { "epoch": 2.92, "learning_rate": 5.6031690090854455e-05, "loss": 0.8739, "step": 286900 }, { "epoch": 2.92, "learning_rate": 5.60283922371042e-05, "loss": 0.8775, "step": 287000 }, { "epoch": 2.93, "learning_rate": 5.602509311072089e-05, "loss": 0.989, "step": 287100 }, { "epoch": 2.93, "learning_rate": 5.602179271186584e-05, "loss": 0.8074, "step": 287200 }, { "epoch": 2.93, "learning_rate": 5.601849104070042e-05, "loss": 0.8277, "step": 287300 }, { "epoch": 2.93, "learning_rate": 5.6015188097386054e-05, "loss": 0.9182, "step": 287400 }, { "epoch": 2.93, "learning_rate": 5.601188388208424e-05, "loss": 0.9339, "step": 287500 }, { "epoch": 2.93, "learning_rate": 5.600857839495654e-05, "loss": 0.8433, "step": 287600 }, { "epoch": 2.93, "learning_rate": 5.600527163616456e-05, "loss": 0.9242, "step": 287700 }, { "epoch": 2.93, "learning_rate": 5.600196360587e-05, "loss": 0.7592, "step": 287800 }, { "epoch": 2.93, "learning_rate": 5.599865430423458e-05, "loss": 0.8787, "step": 287900 }, { "epoch": 2.93, "learning_rate": 5.5995343731420126e-05, "loss": 0.895, "step": 288000 }, { "epoch": 2.94, "learning_rate": 5.599203188758848e-05, "loss": 0.7914, "step": 288100 }, { "epoch": 2.94, "learning_rate": 5.5988718772901604e-05, "loss": 0.867, "step": 288200 }, { "epoch": 2.94, "learning_rate": 5.598540438752147e-05, "loss": 0.9595, "step": 288300 }, { "epoch": 2.94, "learning_rate": 5.598208873161013e-05, "loss": 0.8361, "step": 288400 }, { "epoch": 2.94, "learning_rate": 5.5978771805329694e-05, "loss": 0.7567, "step": 288500 }, { "epoch": 2.94, "learning_rate": 5.597545360884236e-05, "loss": 0.9576, "step": 288600 }, { "epoch": 2.94, "learning_rate": 5.5972134142310356e-05, "loss": 0.8621, "step": 288700 }, { "epoch": 2.94, "learning_rate": 5.5968813405895994e-05, "loss": 0.8485, "step": 288800 }, { "epoch": 2.94, "learning_rate": 5.5965491399761624e-05, "loss": 0.8123, "step": 288900 }, { "epoch": 2.94, "learning_rate": 5.596216812406967e-05, "loss": 0.9099, "step": 289000 }, { "epoch": 2.95, "learning_rate": 5.595884357898265e-05, "loss": 0.7902, "step": 289100 }, { "epoch": 2.95, "learning_rate": 5.595551776466307e-05, "loss": 0.9385, "step": 289200 }, { "epoch": 2.95, "learning_rate": 5.5952190681273575e-05, "loss": 0.849, "step": 289300 }, { "epoch": 2.95, "learning_rate": 5.5948862328976834e-05, "loss": 0.8103, "step": 289400 }, { "epoch": 2.95, "learning_rate": 5.594553270793558e-05, "loss": 0.8966, "step": 289500 }, { "epoch": 2.95, "learning_rate": 5.5942201818312605e-05, "loss": 0.8605, "step": 289600 }, { "epoch": 2.95, "learning_rate": 5.593886966027078e-05, "loss": 0.8312, "step": 289700 }, { "epoch": 2.95, "learning_rate": 5.593553623397303e-05, "loss": 0.8683, "step": 289800 }, { "epoch": 2.95, "learning_rate": 5.5932201539582324e-05, "loss": 0.7844, "step": 289900 }, { "epoch": 2.95, "learning_rate": 5.592886557726172e-05, "loss": 0.7137, "step": 290000 }, { "epoch": 2.96, "learning_rate": 5.5925528347174324e-05, "loss": 0.8806, "step": 290100 }, { "epoch": 2.96, "learning_rate": 5.5922189849483306e-05, "loss": 0.9878, "step": 290200 }, { "epoch": 2.96, "learning_rate": 5.591885008435191e-05, "loss": 0.8321, "step": 290300 }, { "epoch": 2.96, "learning_rate": 5.591550905194341e-05, "loss": 0.7921, "step": 290400 }, { "epoch": 2.96, "learning_rate": 5.591216675242118e-05, "loss": 0.8759, "step": 290500 }, { "epoch": 2.96, "learning_rate": 5.590882318594862e-05, "loss": 0.8571, "step": 290600 }, { "epoch": 2.96, "learning_rate": 5.590547835268924e-05, "loss": 0.8859, "step": 290700 }, { "epoch": 2.96, "learning_rate": 5.590213225280655e-05, "loss": 0.92, "step": 290800 }, { "epoch": 2.96, "learning_rate": 5.589878488646417e-05, "loss": 0.8402, "step": 290900 }, { "epoch": 2.96, "learning_rate": 5.589543625382577e-05, "loss": 0.8618, "step": 291000 }, { "epoch": 2.97, "learning_rate": 5.5892086355055064e-05, "loss": 0.8839, "step": 291100 }, { "epoch": 2.97, "learning_rate": 5.588873519031587e-05, "loss": 0.8851, "step": 291200 }, { "epoch": 2.97, "learning_rate": 5.588538275977201e-05, "loss": 0.9046, "step": 291300 }, { "epoch": 2.97, "learning_rate": 5.58820290635874e-05, "loss": 0.8374, "step": 291400 }, { "epoch": 2.97, "learning_rate": 5.587867410192603e-05, "loss": 0.8257, "step": 291500 }, { "epoch": 2.97, "learning_rate": 5.5875317874951936e-05, "loss": 0.7741, "step": 291600 }, { "epoch": 2.97, "learning_rate": 5.587196038282921e-05, "loss": 0.9485, "step": 291700 }, { "epoch": 2.97, "learning_rate": 5.586860162572202e-05, "loss": 1.075, "step": 291800 }, { "epoch": 2.97, "learning_rate": 5.5865241603794596e-05, "loss": 0.892, "step": 291900 }, { "epoch": 2.97, "learning_rate": 5.586191393633654e-05, "loss": 0.87, "step": 292000 }, { "epoch": 2.98, "learning_rate": 5.585858502954865e-05, "loss": 0.7792, "step": 292100 }, { "epoch": 2.98, "learning_rate": 5.585522123943137e-05, "loss": 0.9867, "step": 292200 }, { "epoch": 2.98, "learning_rate": 5.585185618514807e-05, "loss": 0.9321, "step": 292300 }, { "epoch": 2.98, "learning_rate": 5.584848986686328e-05, "loss": 0.7684, "step": 292400 }, { "epoch": 2.98, "learning_rate": 5.584512228474159e-05, "loss": 0.7608, "step": 292500 }, { "epoch": 2.98, "learning_rate": 5.5841753438947655e-05, "loss": 0.8884, "step": 292600 }, { "epoch": 2.98, "learning_rate": 5.5838383329646206e-05, "loss": 0.828, "step": 292700 }, { "epoch": 2.98, "learning_rate": 5.583501195700199e-05, "loss": 0.9038, "step": 292800 }, { "epoch": 2.98, "learning_rate": 5.583163932117988e-05, "loss": 0.9312, "step": 292900 }, { "epoch": 2.99, "learning_rate": 5.582826542234474e-05, "loss": 0.9039, "step": 293000 }, { "epoch": 2.99, "learning_rate": 5.582489026066157e-05, "loss": 0.8819, "step": 293100 }, { "epoch": 2.99, "learning_rate": 5.5821513836295385e-05, "loss": 0.8151, "step": 293200 }, { "epoch": 2.99, "learning_rate": 5.5818136149411264e-05, "loss": 0.9776, "step": 293300 }, { "epoch": 2.99, "learning_rate": 5.581475720017436e-05, "loss": 0.7361, "step": 293400 }, { "epoch": 2.99, "learning_rate": 5.581137698874988e-05, "loss": 0.9316, "step": 293500 }, { "epoch": 2.99, "learning_rate": 5.58079955153031e-05, "loss": 0.8464, "step": 293600 }, { "epoch": 2.99, "learning_rate": 5.580461277999936e-05, "loss": 0.8969, "step": 293700 }, { "epoch": 2.99, "learning_rate": 5.580122878300404e-05, "loss": 0.8936, "step": 293800 }, { "epoch": 2.99, "learning_rate": 5.579784352448261e-05, "loss": 1.052, "step": 293900 }, { "epoch": 3.0, "learning_rate": 5.5794457004600587e-05, "loss": 0.8714, "step": 294000 }, { "epoch": 3.0, "learning_rate": 5.579106922352355e-05, "loss": 0.95, "step": 294100 }, { "epoch": 3.0, "learning_rate": 5.5787680181417144e-05, "loss": 0.9016, "step": 294200 }, { "epoch": 3.0, "learning_rate": 5.578428987844706e-05, "loss": 0.8257, "step": 294300 }, { "epoch": 3.0, "learning_rate": 5.578089831477908e-05, "loss": 0.8762, "step": 294400 }, { "epoch": 3.0, "learning_rate": 5.5777505490579026e-05, "loss": 0.818, "step": 294500 }, { "epoch": 3.0, "learning_rate": 5.5774111406012784e-05, "loss": 0.792, "step": 294600 }, { "epoch": 3.0, "learning_rate": 5.57707160612463e-05, "loss": 0.8292, "step": 294700 }, { "epoch": 3.0, "learning_rate": 5.57673194564456e-05, "loss": 0.7993, "step": 294800 }, { "epoch": 3.0, "learning_rate": 5.576392159177675e-05, "loss": 0.7312, "step": 294900 }, { "epoch": 3.01, "learning_rate": 5.576052246740588e-05, "loss": 0.8982, "step": 295000 }, { "epoch": 3.01, "learning_rate": 5.57571220834992e-05, "loss": 0.8481, "step": 295100 }, { "epoch": 3.01, "learning_rate": 5.5753720440222954e-05, "loss": 0.7562, "step": 295200 }, { "epoch": 3.01, "learning_rate": 5.575031753774346e-05, "loss": 0.8092, "step": 295300 }, { "epoch": 3.01, "learning_rate": 5.574691337622712e-05, "loss": 0.7914, "step": 295400 }, { "epoch": 3.01, "learning_rate": 5.574350795584037e-05, "loss": 0.8746, "step": 295500 }, { "epoch": 3.01, "learning_rate": 5.574010127674971e-05, "loss": 0.7733, "step": 295600 }, { "epoch": 3.01, "learning_rate": 5.5736693339121694e-05, "loss": 0.7928, "step": 295700 }, { "epoch": 3.01, "learning_rate": 5.573328414312297e-05, "loss": 0.8171, "step": 295800 }, { "epoch": 3.01, "learning_rate": 5.5729873688920223e-05, "loss": 0.8629, "step": 295900 }, { "epoch": 3.02, "learning_rate": 5.572646197668019e-05, "loss": 0.753, "step": 296000 }, { "epoch": 3.02, "learning_rate": 5.57230490065697e-05, "loss": 0.7104, "step": 296100 }, { "epoch": 3.02, "learning_rate": 5.571963477875562e-05, "loss": 0.7908, "step": 296200 }, { "epoch": 3.02, "learning_rate": 5.571621929340488e-05, "loss": 0.9207, "step": 296300 }, { "epoch": 3.02, "learning_rate": 5.5712802550684496e-05, "loss": 0.8474, "step": 296400 }, { "epoch": 3.02, "learning_rate": 5.57093845507615e-05, "loss": 0.7819, "step": 296500 }, { "epoch": 3.02, "learning_rate": 5.5705965293803037e-05, "loss": 0.8818, "step": 296600 }, { "epoch": 3.02, "learning_rate": 5.5702544779976274e-05, "loss": 0.7718, "step": 296700 }, { "epoch": 3.02, "learning_rate": 5.5699123009448456e-05, "loss": 0.8192, "step": 296800 }, { "epoch": 3.02, "learning_rate": 5.569569998238688e-05, "loss": 0.8796, "step": 296900 }, { "epoch": 3.03, "learning_rate": 5.5692275698958924e-05, "loss": 0.7936, "step": 297000 }, { "epoch": 3.03, "learning_rate": 5.568885015933201e-05, "loss": 0.758, "step": 297100 }, { "epoch": 3.03, "learning_rate": 5.568542336367362e-05, "loss": 0.7569, "step": 297200 }, { "epoch": 3.03, "learning_rate": 5.568199531215131e-05, "loss": 0.829, "step": 297300 }, { "epoch": 3.03, "learning_rate": 5.5678566004932706e-05, "loss": 0.8228, "step": 297400 }, { "epoch": 3.03, "learning_rate": 5.567513544218546e-05, "loss": 0.8265, "step": 297500 }, { "epoch": 3.03, "learning_rate": 5.56717036240773e-05, "loss": 0.8532, "step": 297600 }, { "epoch": 3.03, "learning_rate": 5.566827055077605e-05, "loss": 0.704, "step": 297700 }, { "epoch": 3.03, "learning_rate": 5.5664836222449535e-05, "loss": 0.8346, "step": 297800 }, { "epoch": 3.04, "learning_rate": 5.56614006392657e-05, "loss": 0.7599, "step": 297900 }, { "epoch": 3.04, "learning_rate": 5.56579638013925e-05, "loss": 0.8473, "step": 298000 }, { "epoch": 3.04, "learning_rate": 5.5654525708998e-05, "loss": 0.7965, "step": 298100 }, { "epoch": 3.04, "learning_rate": 5.565108636225029e-05, "loss": 0.7055, "step": 298200 }, { "epoch": 3.04, "learning_rate": 5.564764576131753e-05, "loss": 0.9232, "step": 298300 }, { "epoch": 3.04, "learning_rate": 5.564420390636796e-05, "loss": 0.8211, "step": 298400 }, { "epoch": 3.04, "learning_rate": 5.564076079756984e-05, "loss": 0.804, "step": 298500 }, { "epoch": 3.04, "learning_rate": 5.563731643509155e-05, "loss": 0.8233, "step": 298600 }, { "epoch": 3.04, "learning_rate": 5.5633870819101474e-05, "loss": 0.8289, "step": 298700 }, { "epoch": 3.04, "learning_rate": 5.563042394976808e-05, "loss": 0.8055, "step": 298800 }, { "epoch": 3.05, "learning_rate": 5.562697582725992e-05, "loss": 0.69, "step": 298900 }, { "epoch": 3.05, "learning_rate": 5.562352645174558e-05, "loss": 0.8177, "step": 299000 }, { "epoch": 3.05, "learning_rate": 5.56200758233937e-05, "loss": 0.8271, "step": 299100 }, { "epoch": 3.05, "learning_rate": 5.561662394237301e-05, "loss": 0.7222, "step": 299200 }, { "epoch": 3.05, "learning_rate": 5.561317080885228e-05, "loss": 0.841, "step": 299300 }, { "epoch": 3.05, "learning_rate": 5.560971642300035e-05, "loss": 0.8305, "step": 299400 }, { "epoch": 3.05, "learning_rate": 5.560626078498612e-05, "loss": 0.8354, "step": 299500 }, { "epoch": 3.05, "learning_rate": 5.560280389497854e-05, "loss": 0.8884, "step": 299600 }, { "epoch": 3.05, "learning_rate": 5.559934575314665e-05, "loss": 0.7171, "step": 299700 }, { "epoch": 3.05, "learning_rate": 5.559592095978952e-05, "loss": 0.8061, "step": 299800 }, { "epoch": 3.06, "learning_rate": 5.5592460327330316e-05, "loss": 0.8752, "step": 299900 }, { "epoch": 3.06, "learning_rate": 5.5588998443552523e-05, "loss": 0.6619, "step": 300000 }, { "epoch": 3.06, "learning_rate": 5.558553530862541e-05, "loss": 0.7897, "step": 300100 }, { "epoch": 3.06, "learning_rate": 5.558207092271832e-05, "loss": 0.775, "step": 300200 }, { "epoch": 3.06, "learning_rate": 5.5578605286000624e-05, "loss": 0.9678, "step": 300300 }, { "epoch": 3.06, "learning_rate": 5.557513839864178e-05, "loss": 0.7759, "step": 300400 }, { "epoch": 3.06, "learning_rate": 5.5571670260811283e-05, "loss": 0.8929, "step": 300500 }, { "epoch": 3.06, "learning_rate": 5.5568200872678714e-05, "loss": 0.7326, "step": 300600 }, { "epoch": 3.06, "learning_rate": 5.556473023441371e-05, "loss": 0.8083, "step": 300700 }, { "epoch": 3.06, "learning_rate": 5.5561258346185955e-05, "loss": 0.7892, "step": 300800 }, { "epoch": 3.07, "learning_rate": 5.5557785208165215e-05, "loss": 0.9065, "step": 300900 }, { "epoch": 3.07, "learning_rate": 5.5554310820521295e-05, "loss": 0.8277, "step": 301000 }, { "epoch": 3.07, "learning_rate": 5.555083518342407e-05, "loss": 0.9168, "step": 301100 }, { "epoch": 3.07, "learning_rate": 5.554735829704349e-05, "loss": 0.7899, "step": 301200 }, { "epoch": 3.07, "learning_rate": 5.554388016154955e-05, "loss": 0.8155, "step": 301300 }, { "epoch": 3.07, "learning_rate": 5.554040077711231e-05, "loss": 0.6979, "step": 301400 }, { "epoch": 3.07, "learning_rate": 5.553692014390189e-05, "loss": 0.9031, "step": 301500 }, { "epoch": 3.07, "learning_rate": 5.553343826208847e-05, "loss": 0.6888, "step": 301600 }, { "epoch": 3.07, "learning_rate": 5.552995513184229e-05, "loss": 0.868, "step": 301700 }, { "epoch": 3.07, "learning_rate": 5.552647075333366e-05, "loss": 0.7612, "step": 301800 }, { "epoch": 3.08, "learning_rate": 5.552298512673294e-05, "loss": 0.8802, "step": 301900 }, { "epoch": 3.08, "learning_rate": 5.551949825221056e-05, "loss": 0.7936, "step": 302000 }, { "epoch": 3.08, "learning_rate": 5.551601012993702e-05, "loss": 0.8174, "step": 302100 }, { "epoch": 3.08, "learning_rate": 5.551252076008284e-05, "loss": 0.7172, "step": 302200 }, { "epoch": 3.08, "learning_rate": 5.550903014281866e-05, "loss": 0.8194, "step": 302300 }, { "epoch": 3.08, "learning_rate": 5.5505538278315134e-05, "loss": 0.7994, "step": 302400 }, { "epoch": 3.08, "learning_rate": 5.550204516674299e-05, "loss": 0.9413, "step": 302500 }, { "epoch": 3.08, "learning_rate": 5.549855080827304e-05, "loss": 0.7975, "step": 302600 }, { "epoch": 3.08, "learning_rate": 5.54950552030761e-05, "loss": 0.8288, "step": 302700 }, { "epoch": 3.08, "learning_rate": 5.5491558351323125e-05, "loss": 0.7787, "step": 302800 }, { "epoch": 3.09, "learning_rate": 5.548809524033548e-05, "loss": 0.7594, "step": 302900 }, { "epoch": 3.09, "learning_rate": 5.548459590844467e-05, "loss": 0.7344, "step": 303000 }, { "epoch": 3.09, "learning_rate": 5.548109533050921e-05, "loss": 0.8782, "step": 303100 }, { "epoch": 3.09, "learning_rate": 5.547759350670025e-05, "loss": 0.8046, "step": 303200 }, { "epoch": 3.09, "learning_rate": 5.547409043718901e-05, "loss": 0.8336, "step": 303300 }, { "epoch": 3.09, "learning_rate": 5.5470586122146764e-05, "loss": 0.8667, "step": 303400 }, { "epoch": 3.09, "learning_rate": 5.5467080561744866e-05, "loss": 0.7842, "step": 303500 }, { "epoch": 3.09, "learning_rate": 5.5463573756154696e-05, "loss": 0.9258, "step": 303600 }, { "epoch": 3.09, "learning_rate": 5.546010079221607e-05, "loss": 0.8986, "step": 303700 }, { "epoch": 3.1, "learning_rate": 5.545659150921144e-05, "loss": 0.7306, "step": 303800 }, { "epoch": 3.1, "learning_rate": 5.54530809815314e-05, "loss": 0.8076, "step": 303900 }, { "epoch": 3.1, "learning_rate": 5.544960433322916e-05, "loss": 0.7621, "step": 304000 }, { "epoch": 3.1, "learning_rate": 5.544609132915577e-05, "loss": 0.8193, "step": 304100 }, { "epoch": 3.1, "learning_rate": 5.544257708092036e-05, "loss": 0.8694, "step": 304200 }, { "epoch": 3.1, "learning_rate": 5.543906158869477e-05, "loss": 0.8475, "step": 304300 }, { "epoch": 3.1, "learning_rate": 5.543554485265088e-05, "loss": 0.887, "step": 304400 }, { "epoch": 3.1, "learning_rate": 5.543202687296064e-05, "loss": 0.8971, "step": 304500 }, { "epoch": 3.1, "learning_rate": 5.542850764979605e-05, "loss": 0.7923, "step": 304600 }, { "epoch": 3.1, "learning_rate": 5.5424987183329185e-05, "loss": 0.7649, "step": 304700 }, { "epoch": 3.11, "learning_rate": 5.542146547373218e-05, "loss": 0.778, "step": 304800 }, { "epoch": 3.11, "learning_rate": 5.5417942521177216e-05, "loss": 0.9036, "step": 304900 }, { "epoch": 3.11, "learning_rate": 5.541441832583656e-05, "loss": 0.7702, "step": 305000 }, { "epoch": 3.11, "learning_rate": 5.54108928878825e-05, "loss": 0.7857, "step": 305100 }, { "epoch": 3.11, "learning_rate": 5.540736620748743e-05, "loss": 0.7958, "step": 305200 }, { "epoch": 3.11, "learning_rate": 5.540383828482377e-05, "loss": 0.7862, "step": 305300 }, { "epoch": 3.11, "learning_rate": 5.540030912006403e-05, "loss": 0.7545, "step": 305400 }, { "epoch": 3.11, "learning_rate": 5.539677871338074e-05, "loss": 0.6983, "step": 305500 }, { "epoch": 3.11, "learning_rate": 5.539324706494654e-05, "loss": 0.8891, "step": 305600 }, { "epoch": 3.11, "learning_rate": 5.5389714174934104e-05, "loss": 0.8237, "step": 305700 }, { "epoch": 3.12, "learning_rate": 5.5386180043516155e-05, "loss": 0.7983, "step": 305800 }, { "epoch": 3.12, "learning_rate": 5.53826446708655e-05, "loss": 0.8323, "step": 305900 }, { "epoch": 3.12, "learning_rate": 5.537910805715499e-05, "loss": 0.8797, "step": 306000 }, { "epoch": 3.12, "learning_rate": 5.537557020255755e-05, "loss": 0.8403, "step": 306100 }, { "epoch": 3.12, "learning_rate": 5.537203110724617e-05, "loss": 0.8576, "step": 306200 }, { "epoch": 3.12, "learning_rate": 5.5368490771393884e-05, "loss": 0.8132, "step": 306300 }, { "epoch": 3.12, "learning_rate": 5.536494919517379e-05, "loss": 0.7458, "step": 306400 }, { "epoch": 3.12, "learning_rate": 5.536140637875904e-05, "loss": 0.8461, "step": 306500 }, { "epoch": 3.12, "learning_rate": 5.535786232232287e-05, "loss": 0.8237, "step": 306600 }, { "epoch": 3.12, "learning_rate": 5.535431702603856e-05, "loss": 0.8142, "step": 306700 }, { "epoch": 3.13, "learning_rate": 5.535077049007946e-05, "loss": 0.8405, "step": 306800 }, { "epoch": 3.13, "learning_rate": 5.534722271461896e-05, "loss": 0.8549, "step": 306900 }, { "epoch": 3.13, "learning_rate": 5.534367369983054e-05, "loss": 0.7919, "step": 307000 }, { "epoch": 3.13, "learning_rate": 5.5340123445887724e-05, "loss": 0.7916, "step": 307100 }, { "epoch": 3.13, "learning_rate": 5.533657195296408e-05, "loss": 0.7415, "step": 307200 }, { "epoch": 3.13, "learning_rate": 5.533301922123328e-05, "loss": 0.7761, "step": 307300 }, { "epoch": 3.13, "learning_rate": 5.532946525086901e-05, "loss": 0.8897, "step": 307400 }, { "epoch": 3.13, "learning_rate": 5.532591004204506e-05, "loss": 0.8798, "step": 307500 }, { "epoch": 3.13, "learning_rate": 5.5322353594935236e-05, "loss": 0.9649, "step": 307600 }, { "epoch": 3.13, "learning_rate": 5.531879590971344e-05, "loss": 0.9134, "step": 307700 }, { "epoch": 3.14, "learning_rate": 5.531523698655362e-05, "loss": 0.9157, "step": 307800 }, { "epoch": 3.14, "learning_rate": 5.531167682562979e-05, "loss": 0.8242, "step": 307900 }, { "epoch": 3.14, "learning_rate": 5.5308115427116e-05, "loss": 0.7929, "step": 308000 }, { "epoch": 3.14, "learning_rate": 5.530455279118641e-05, "loss": 0.8736, "step": 308100 }, { "epoch": 3.14, "learning_rate": 5.530098891801519e-05, "loss": 0.8234, "step": 308200 }, { "epoch": 3.14, "learning_rate": 5.52974238077766e-05, "loss": 0.7991, "step": 308300 }, { "epoch": 3.14, "learning_rate": 5.529385746064496e-05, "loss": 0.7409, "step": 308400 }, { "epoch": 3.14, "learning_rate": 5.5290289876794634e-05, "loss": 0.9241, "step": 308500 }, { "epoch": 3.14, "learning_rate": 5.528672105640005e-05, "loss": 0.8729, "step": 308600 }, { "epoch": 3.15, "learning_rate": 5.528315099963572e-05, "loss": 0.8079, "step": 308700 }, { "epoch": 3.15, "learning_rate": 5.5279579706676194e-05, "loss": 0.8226, "step": 308800 }, { "epoch": 3.15, "learning_rate": 5.5276007177696064e-05, "loss": 0.7893, "step": 308900 }, { "epoch": 3.15, "learning_rate": 5.527243341287003e-05, "loss": 0.9731, "step": 309000 }, { "epoch": 3.15, "learning_rate": 5.5268858412372814e-05, "loss": 0.692, "step": 309100 }, { "epoch": 3.15, "learning_rate": 5.526528217637923e-05, "loss": 0.837, "step": 309200 }, { "epoch": 3.15, "learning_rate": 5.526170470506411e-05, "loss": 0.791, "step": 309300 }, { "epoch": 3.15, "learning_rate": 5.52581259986024e-05, "loss": 0.8364, "step": 309400 }, { "epoch": 3.15, "learning_rate": 5.525454605716905e-05, "loss": 0.8224, "step": 309500 }, { "epoch": 3.15, "learning_rate": 5.5250964880939105e-05, "loss": 0.7625, "step": 309600 }, { "epoch": 3.16, "learning_rate": 5.524738247008768e-05, "loss": 0.9721, "step": 309700 }, { "epoch": 3.16, "learning_rate": 5.5243798824789905e-05, "loss": 0.7669, "step": 309800 }, { "epoch": 3.16, "learning_rate": 5.524021394522102e-05, "loss": 0.8734, "step": 309900 }, { "epoch": 3.16, "learning_rate": 5.523662783155631e-05, "loss": 0.8904, "step": 310000 }, { "epoch": 3.16, "learning_rate": 5.523304048397109e-05, "loss": 0.7375, "step": 310100 }, { "epoch": 3.16, "learning_rate": 5.5229451902640785e-05, "loss": 0.8315, "step": 310200 }, { "epoch": 3.16, "learning_rate": 5.5225862087740835e-05, "loss": 0.8048, "step": 310300 }, { "epoch": 3.16, "learning_rate": 5.5222271039446774e-05, "loss": 0.8243, "step": 310400 }, { "epoch": 3.16, "learning_rate": 5.5218678757934175e-05, "loss": 0.762, "step": 310500 }, { "epoch": 3.16, "learning_rate": 5.521508524337868e-05, "loss": 0.7946, "step": 310600 }, { "epoch": 3.17, "learning_rate": 5.5211490495955996e-05, "loss": 0.7114, "step": 310700 }, { "epoch": 3.17, "learning_rate": 5.520789451584188e-05, "loss": 0.7924, "step": 310800 }, { "epoch": 3.17, "learning_rate": 5.5204297303212155e-05, "loss": 0.8473, "step": 310900 }, { "epoch": 3.17, "learning_rate": 5.52006988582427e-05, "loss": 0.7552, "step": 311000 }, { "epoch": 3.17, "learning_rate": 5.5197099181109465e-05, "loss": 0.7998, "step": 311100 }, { "epoch": 3.17, "learning_rate": 5.5193498271988446e-05, "loss": 0.8675, "step": 311200 }, { "epoch": 3.17, "learning_rate": 5.518989613105571e-05, "loss": 0.8509, "step": 311300 }, { "epoch": 3.17, "learning_rate": 5.518629275848738e-05, "loss": 0.8878, "step": 311400 }, { "epoch": 3.17, "learning_rate": 5.5182688154459626e-05, "loss": 0.7066, "step": 311500 }, { "epoch": 3.17, "learning_rate": 5.517908231914872e-05, "loss": 0.8897, "step": 311600 }, { "epoch": 3.18, "learning_rate": 5.517547525273094e-05, "loss": 0.8146, "step": 311700 }, { "epoch": 3.18, "learning_rate": 5.5171866955382664e-05, "loss": 0.7305, "step": 311800 }, { "epoch": 3.18, "learning_rate": 5.5168257427280316e-05, "loss": 0.8109, "step": 311900 }, { "epoch": 3.18, "learning_rate": 5.5164682782277944e-05, "loss": 0.7405, "step": 312000 }, { "epoch": 3.18, "learning_rate": 5.516107080550008e-05, "loss": 0.7427, "step": 312100 }, { "epoch": 3.18, "learning_rate": 5.515745759849602e-05, "loss": 0.7829, "step": 312200 }, { "epoch": 3.18, "learning_rate": 5.515384316144241e-05, "loss": 0.6698, "step": 312300 }, { "epoch": 3.18, "learning_rate": 5.515022749451599e-05, "loss": 0.7891, "step": 312400 }, { "epoch": 3.18, "learning_rate": 5.514661059789352e-05, "loss": 0.7424, "step": 312500 }, { "epoch": 3.18, "learning_rate": 5.5142992471751864e-05, "loss": 0.7787, "step": 312600 }, { "epoch": 3.19, "learning_rate": 5.513937311626792e-05, "loss": 0.773, "step": 312700 }, { "epoch": 3.19, "learning_rate": 5.513578874354893e-05, "loss": 0.8729, "step": 312800 }, { "epoch": 3.19, "learning_rate": 5.513216694220037e-05, "loss": 0.7631, "step": 312900 }, { "epoch": 3.19, "learning_rate": 5.512854391203884e-05, "loss": 0.815, "step": 313000 }, { "epoch": 3.19, "learning_rate": 5.512491965324146e-05, "loss": 0.6362, "step": 313100 }, { "epoch": 3.19, "learning_rate": 5.512129416598545e-05, "loss": 0.8626, "step": 313200 }, { "epoch": 3.19, "learning_rate": 5.5117667450448075e-05, "loss": 0.6971, "step": 313300 }, { "epoch": 3.19, "learning_rate": 5.511403950680666e-05, "loss": 0.8308, "step": 313400 }, { "epoch": 3.19, "learning_rate": 5.511041033523857e-05, "loss": 0.7588, "step": 313500 }, { "epoch": 3.2, "learning_rate": 5.5106779935921284e-05, "loss": 0.8053, "step": 313600 }, { "epoch": 3.2, "learning_rate": 5.510314830903227e-05, "loss": 0.7677, "step": 313700 }, { "epoch": 3.2, "learning_rate": 5.5099515454749124e-05, "loss": 0.7565, "step": 313800 }, { "epoch": 3.2, "learning_rate": 5.5095881373249456e-05, "loss": 0.7855, "step": 313900 }, { "epoch": 3.2, "learning_rate": 5.509224606471095e-05, "loss": 0.8173, "step": 314000 }, { "epoch": 3.2, "learning_rate": 5.508860952931136e-05, "loss": 0.8468, "step": 314100 }, { "epoch": 3.2, "learning_rate": 5.5084971767228484e-05, "loss": 0.7788, "step": 314200 }, { "epoch": 3.2, "learning_rate": 5.508133277864018e-05, "loss": 0.7336, "step": 314300 }, { "epoch": 3.2, "learning_rate": 5.507769256372439e-05, "loss": 0.7865, "step": 314400 }, { "epoch": 3.2, "learning_rate": 5.507405112265909e-05, "loss": 0.7773, "step": 314500 }, { "epoch": 3.21, "learning_rate": 5.5070444888360655e-05, "loss": 0.8586, "step": 314600 }, { "epoch": 3.21, "learning_rate": 5.5066801007787576e-05, "loss": 0.7526, "step": 314700 }, { "epoch": 3.21, "learning_rate": 5.506315590159752e-05, "loss": 0.9598, "step": 314800 }, { "epoch": 3.21, "learning_rate": 5.505950956996871e-05, "loss": 0.8163, "step": 314900 }, { "epoch": 3.21, "learning_rate": 5.505586201307942e-05, "loss": 0.7115, "step": 315000 }, { "epoch": 3.21, "learning_rate": 5.505221323110802e-05, "loss": 0.7441, "step": 315100 }, { "epoch": 3.21, "learning_rate": 5.504856322423288e-05, "loss": 0.8354, "step": 315200 }, { "epoch": 3.21, "learning_rate": 5.504491199263249e-05, "loss": 0.8205, "step": 315300 }, { "epoch": 3.21, "learning_rate": 5.5041259536485365e-05, "loss": 0.8387, "step": 315400 }, { "epoch": 3.21, "learning_rate": 5.5037605855970083e-05, "loss": 0.8436, "step": 315500 }, { "epoch": 3.22, "learning_rate": 5.503395095126529e-05, "loss": 0.7858, "step": 315600 }, { "epoch": 3.22, "learning_rate": 5.503029482254969e-05, "loss": 0.8135, "step": 315700 }, { "epoch": 3.22, "learning_rate": 5.5026637470002056e-05, "loss": 0.8738, "step": 315800 }, { "epoch": 3.22, "learning_rate": 5.5022978893801185e-05, "loss": 0.7975, "step": 315900 }, { "epoch": 3.22, "learning_rate": 5.501931909412598e-05, "loss": 0.8659, "step": 316000 }, { "epoch": 3.22, "learning_rate": 5.501565807115538e-05, "loss": 0.7053, "step": 316100 }, { "epoch": 3.22, "learning_rate": 5.501199582506839e-05, "loss": 0.8962, "step": 316200 }, { "epoch": 3.22, "learning_rate": 5.500833235604407e-05, "loss": 0.7892, "step": 316300 }, { "epoch": 3.22, "learning_rate": 5.500470431723142e-05, "loss": 0.7523, "step": 316400 }, { "epoch": 3.22, "learning_rate": 5.500103841509477e-05, "loss": 0.9015, "step": 316500 }, { "epoch": 3.23, "learning_rate": 5.499737129055652e-05, "loss": 0.8482, "step": 316600 }, { "epoch": 3.23, "learning_rate": 5.499370294379601e-05, "loss": 0.8045, "step": 316700 }, { "epoch": 3.23, "learning_rate": 5.499003337499256e-05, "loss": 0.738, "step": 316800 }, { "epoch": 3.23, "learning_rate": 5.498636258432563e-05, "loss": 0.7269, "step": 316900 }, { "epoch": 3.23, "learning_rate": 5.498269057197467e-05, "loss": 0.8382, "step": 317000 }, { "epoch": 3.23, "learning_rate": 5.497901733811924e-05, "loss": 0.871, "step": 317100 }, { "epoch": 3.23, "learning_rate": 5.4975342882938915e-05, "loss": 0.8269, "step": 317200 }, { "epoch": 3.23, "learning_rate": 5.4971667206613376e-05, "loss": 0.8114, "step": 317300 }, { "epoch": 3.23, "learning_rate": 5.496799030932233e-05, "loss": 0.7869, "step": 317400 }, { "epoch": 3.23, "learning_rate": 5.4964312191245556e-05, "loss": 0.7814, "step": 317500 }, { "epoch": 3.24, "learning_rate": 5.4960632852562905e-05, "loss": 0.7729, "step": 317600 }, { "epoch": 3.24, "learning_rate": 5.4956952293454264e-05, "loss": 0.8393, "step": 317700 }, { "epoch": 3.24, "learning_rate": 5.4953270514099586e-05, "loss": 0.8461, "step": 317800 }, { "epoch": 3.24, "learning_rate": 5.4949587514678894e-05, "loss": 0.8219, "step": 317900 }, { "epoch": 3.24, "learning_rate": 5.494590329537227e-05, "loss": 0.9927, "step": 318000 }, { "epoch": 3.24, "learning_rate": 5.4942217856359834e-05, "loss": 0.7248, "step": 318100 }, { "epoch": 3.24, "learning_rate": 5.4938531197821794e-05, "loss": 0.7973, "step": 318200 }, { "epoch": 3.24, "learning_rate": 5.4934843319938404e-05, "loss": 0.8014, "step": 318300 }, { "epoch": 3.24, "learning_rate": 5.4931154222889985e-05, "loss": 0.7499, "step": 318400 }, { "epoch": 3.24, "learning_rate": 5.4927463906856896e-05, "loss": 0.7882, "step": 318500 }, { "epoch": 3.25, "learning_rate": 5.492377237201959e-05, "loss": 0.7338, "step": 318600 }, { "epoch": 3.25, "learning_rate": 5.4920079618558555e-05, "loss": 0.7708, "step": 318700 }, { "epoch": 3.25, "learning_rate": 5.4916385646654334e-05, "loss": 0.6926, "step": 318800 }, { "epoch": 3.25, "learning_rate": 5.491269045648755e-05, "loss": 0.8163, "step": 318900 }, { "epoch": 3.25, "learning_rate": 5.4908994048238875e-05, "loss": 0.7346, "step": 319000 }, { "epoch": 3.25, "learning_rate": 5.490529642208905e-05, "loss": 0.7821, "step": 319100 }, { "epoch": 3.25, "learning_rate": 5.490159757821884e-05, "loss": 0.7507, "step": 319200 }, { "epoch": 3.25, "learning_rate": 5.4897897516809116e-05, "loss": 0.7696, "step": 319300 }, { "epoch": 3.25, "learning_rate": 5.48941962380408e-05, "loss": 0.7881, "step": 319400 }, { "epoch": 3.26, "learning_rate": 5.489049374209483e-05, "loss": 0.7664, "step": 319500 }, { "epoch": 3.26, "learning_rate": 5.488679002915226e-05, "loss": 0.8108, "step": 319600 }, { "epoch": 3.26, "learning_rate": 5.488308509939418e-05, "loss": 0.8469, "step": 319700 }, { "epoch": 3.26, "learning_rate": 5.487937895300174e-05, "loss": 0.7825, "step": 319800 }, { "epoch": 3.26, "learning_rate": 5.4875671590156126e-05, "loss": 0.8389, "step": 319900 }, { "epoch": 3.26, "learning_rate": 5.487196301103863e-05, "loss": 0.9161, "step": 320000 }, { "epoch": 3.26, "learning_rate": 5.48682903198017e-05, "loss": 0.9238, "step": 320100 }, { "epoch": 3.26, "learning_rate": 5.4864579320842656e-05, "loss": 0.7843, "step": 320200 }, { "epoch": 3.26, "learning_rate": 5.486086710615407e-05, "loss": 0.7142, "step": 320300 }, { "epoch": 3.26, "learning_rate": 5.4857153675917444e-05, "loss": 0.8741, "step": 320400 }, { "epoch": 3.27, "learning_rate": 5.4853439030314346e-05, "loss": 0.7631, "step": 320500 }, { "epoch": 3.27, "learning_rate": 5.484972316952639e-05, "loss": 0.7563, "step": 320600 }, { "epoch": 3.27, "learning_rate": 5.4846006093735274e-05, "loss": 0.8291, "step": 320700 }, { "epoch": 3.27, "learning_rate": 5.484228780312272e-05, "loss": 0.8175, "step": 320800 }, { "epoch": 3.27, "learning_rate": 5.4838568297870553e-05, "loss": 0.7718, "step": 320900 }, { "epoch": 3.27, "learning_rate": 5.483484757816063e-05, "loss": 0.7971, "step": 321000 }, { "epoch": 3.27, "learning_rate": 5.483112564417486e-05, "loss": 0.7681, "step": 321100 }, { "epoch": 3.27, "learning_rate": 5.4827402496095225e-05, "loss": 0.8042, "step": 321200 }, { "epoch": 3.27, "learning_rate": 5.4823678134103776e-05, "loss": 0.8367, "step": 321300 }, { "epoch": 3.27, "learning_rate": 5.481995255838261e-05, "loss": 0.7896, "step": 321400 }, { "epoch": 3.28, "learning_rate": 5.481622576911387e-05, "loss": 0.8167, "step": 321500 }, { "epoch": 3.28, "learning_rate": 5.481249776647979e-05, "loss": 0.7701, "step": 321600 }, { "epoch": 3.28, "learning_rate": 5.480876855066266e-05, "loss": 0.7352, "step": 321700 }, { "epoch": 3.28, "learning_rate": 5.480503812184478e-05, "loss": 0.8721, "step": 321800 }, { "epoch": 3.28, "learning_rate": 5.480130648020857e-05, "loss": 0.7712, "step": 321900 }, { "epoch": 3.28, "learning_rate": 5.479757362593648e-05, "loss": 0.7751, "step": 322000 }, { "epoch": 3.28, "learning_rate": 5.479383955921101e-05, "loss": 0.8118, "step": 322100 }, { "epoch": 3.28, "learning_rate": 5.4790104280214776e-05, "loss": 0.7559, "step": 322200 }, { "epoch": 3.28, "learning_rate": 5.478636778913036e-05, "loss": 0.8321, "step": 322300 }, { "epoch": 3.28, "learning_rate": 5.478263008614048e-05, "loss": 0.8676, "step": 322400 }, { "epoch": 3.29, "learning_rate": 5.477889117142788e-05, "loss": 0.7253, "step": 322500 }, { "epoch": 3.29, "learning_rate": 5.4775151045175394e-05, "loss": 0.8268, "step": 322600 }, { "epoch": 3.29, "learning_rate": 5.4771409707565855e-05, "loss": 0.7629, "step": 322700 }, { "epoch": 3.29, "learning_rate": 5.476766715878222e-05, "loss": 0.7418, "step": 322800 }, { "epoch": 3.29, "learning_rate": 5.4763923399007456e-05, "loss": 0.7683, "step": 322900 }, { "epoch": 3.29, "learning_rate": 5.476017842842462e-05, "loss": 0.7908, "step": 323000 }, { "epoch": 3.29, "learning_rate": 5.475643224721682e-05, "loss": 0.7812, "step": 323100 }, { "epoch": 3.29, "learning_rate": 5.4752684855567236e-05, "loss": 0.8362, "step": 323200 }, { "epoch": 3.29, "learning_rate": 5.4748936253659065e-05, "loss": 0.7867, "step": 323300 }, { "epoch": 3.29, "learning_rate": 5.474518644167562e-05, "loss": 0.8036, "step": 323400 }, { "epoch": 3.3, "learning_rate": 5.4741435419800215e-05, "loss": 0.6976, "step": 323500 }, { "epoch": 3.3, "learning_rate": 5.473768318821627e-05, "loss": 0.7112, "step": 323600 }, { "epoch": 3.3, "learning_rate": 5.4733929747107245e-05, "loss": 0.7624, "step": 323700 }, { "epoch": 3.3, "learning_rate": 5.473017509665666e-05, "loss": 0.9007, "step": 323800 }, { "epoch": 3.3, "learning_rate": 5.4726419237048096e-05, "loss": 0.76, "step": 323900 }, { "epoch": 3.3, "learning_rate": 5.472266216846519e-05, "loss": 0.8195, "step": 324000 }, { "epoch": 3.3, "learning_rate": 5.471890389109164e-05, "loss": 0.8038, "step": 324100 }, { "epoch": 3.3, "learning_rate": 5.471514440511121e-05, "loss": 0.7712, "step": 324200 }, { "epoch": 3.3, "learning_rate": 5.4711383710707703e-05, "loss": 0.7628, "step": 324300 }, { "epoch": 3.31, "learning_rate": 5.4707621808065e-05, "loss": 0.8106, "step": 324400 }, { "epoch": 3.31, "learning_rate": 5.470385869736705e-05, "loss": 0.811, "step": 324500 }, { "epoch": 3.31, "learning_rate": 5.470009437879783e-05, "loss": 0.9012, "step": 324600 }, { "epoch": 3.31, "learning_rate": 5.4696328852541396e-05, "loss": 0.8155, "step": 324700 }, { "epoch": 3.31, "learning_rate": 5.469256211878186e-05, "loss": 0.826, "step": 324800 }, { "epoch": 3.31, "learning_rate": 5.468879417770339e-05, "loss": 0.7043, "step": 324900 }, { "epoch": 3.31, "learning_rate": 5.468502502949023e-05, "loss": 0.8091, "step": 325000 }, { "epoch": 3.31, "learning_rate": 5.468125467432665e-05, "loss": 0.7067, "step": 325100 }, { "epoch": 3.31, "learning_rate": 5.4677483112397016e-05, "loss": 0.7417, "step": 325200 }, { "epoch": 3.31, "learning_rate": 5.467371034388571e-05, "loss": 0.7695, "step": 325300 }, { "epoch": 3.32, "learning_rate": 5.466993636897722e-05, "loss": 0.816, "step": 325400 }, { "epoch": 3.32, "learning_rate": 5.4666161187856073e-05, "loss": 0.7886, "step": 325500 }, { "epoch": 3.32, "learning_rate": 5.4662384800706835e-05, "loss": 0.8286, "step": 325600 }, { "epoch": 3.32, "learning_rate": 5.465860720771416e-05, "loss": 0.8942, "step": 325700 }, { "epoch": 3.32, "learning_rate": 5.465482840906275e-05, "loss": 0.7467, "step": 325800 }, { "epoch": 3.32, "learning_rate": 5.4651048404937355e-05, "loss": 0.7576, "step": 325900 }, { "epoch": 3.32, "learning_rate": 5.4647267195522815e-05, "loss": 0.7707, "step": 326000 }, { "epoch": 3.32, "learning_rate": 5.464348478100399e-05, "loss": 0.7361, "step": 326100 }, { "epoch": 3.32, "learning_rate": 5.463970116156582e-05, "loss": 0.72, "step": 326200 }, { "epoch": 3.32, "learning_rate": 5.46359163373933e-05, "loss": 0.7251, "step": 326300 }, { "epoch": 3.33, "learning_rate": 5.46321303086715e-05, "loss": 0.7344, "step": 326400 }, { "epoch": 3.33, "learning_rate": 5.4628343075585525e-05, "loss": 0.6811, "step": 326500 }, { "epoch": 3.33, "learning_rate": 5.4624554638320546e-05, "loss": 0.7626, "step": 326600 }, { "epoch": 3.33, "learning_rate": 5.46207649970618e-05, "loss": 0.8888, "step": 326700 }, { "epoch": 3.33, "learning_rate": 5.461701206640349e-05, "loss": 0.7839, "step": 326800 }, { "epoch": 3.33, "learning_rate": 5.461322002974844e-05, "loss": 0.7151, "step": 326900 }, { "epoch": 3.33, "learning_rate": 5.460942678965382e-05, "loss": 0.7741, "step": 327000 }, { "epoch": 3.33, "learning_rate": 5.460567029669409e-05, "loss": 0.643, "step": 327100 }, { "epoch": 3.33, "learning_rate": 5.460187466230654e-05, "loss": 0.6822, "step": 327200 }, { "epoch": 3.33, "learning_rate": 5.459807782503413e-05, "loss": 0.7695, "step": 327300 }, { "epoch": 3.34, "learning_rate": 5.459427978506253e-05, "loss": 0.8733, "step": 327400 }, { "epoch": 3.34, "learning_rate": 5.459048054257741e-05, "loss": 0.8417, "step": 327500 }, { "epoch": 3.34, "learning_rate": 5.458668009776454e-05, "loss": 0.8615, "step": 327600 }, { "epoch": 3.34, "learning_rate": 5.4582878450809734e-05, "loss": 0.8834, "step": 327700 }, { "epoch": 3.34, "learning_rate": 5.457907560189888e-05, "loss": 0.9337, "step": 327800 }, { "epoch": 3.34, "learning_rate": 5.4575271551217905e-05, "loss": 0.7733, "step": 327900 }, { "epoch": 3.34, "learning_rate": 5.4571466298952816e-05, "loss": 0.8469, "step": 328000 }, { "epoch": 3.34, "learning_rate": 5.4567659845289656e-05, "loss": 0.8081, "step": 328100 }, { "epoch": 3.34, "learning_rate": 5.4563852190414534e-05, "loss": 0.867, "step": 328200 }, { "epoch": 3.34, "learning_rate": 5.456004333451364e-05, "loss": 0.7387, "step": 328300 }, { "epoch": 3.35, "learning_rate": 5.4556233277773194e-05, "loss": 0.8197, "step": 328400 }, { "epoch": 3.35, "learning_rate": 5.4552422020379474e-05, "loss": 0.7813, "step": 328500 }, { "epoch": 3.35, "learning_rate": 5.454860956251885e-05, "loss": 0.8357, "step": 328600 }, { "epoch": 3.35, "learning_rate": 5.4544795904377714e-05, "loss": 0.7754, "step": 328700 }, { "epoch": 3.35, "learning_rate": 5.4540981046142526e-05, "loss": 0.8294, "step": 328800 }, { "epoch": 3.35, "learning_rate": 5.453716498799982e-05, "loss": 0.817, "step": 328900 }, { "epoch": 3.35, "learning_rate": 5.4533347730136184e-05, "loss": 0.7979, "step": 329000 }, { "epoch": 3.35, "learning_rate": 5.4529529272738246e-05, "loss": 0.8336, "step": 329100 }, { "epoch": 3.35, "learning_rate": 5.4525709615992714e-05, "loss": 0.8298, "step": 329200 }, { "epoch": 3.35, "learning_rate": 5.452188876008634e-05, "loss": 0.8159, "step": 329300 }, { "epoch": 3.36, "learning_rate": 5.451806670520595e-05, "loss": 0.7262, "step": 329400 }, { "epoch": 3.36, "learning_rate": 5.45142434515384e-05, "loss": 0.7565, "step": 329500 }, { "epoch": 3.36, "learning_rate": 5.451041899927066e-05, "loss": 0.8151, "step": 329600 }, { "epoch": 3.36, "learning_rate": 5.4506593348589686e-05, "loss": 0.8661, "step": 329700 }, { "epoch": 3.36, "learning_rate": 5.450276649968255e-05, "loss": 0.8424, "step": 329800 }, { "epoch": 3.36, "learning_rate": 5.449893845273636e-05, "loss": 0.8998, "step": 329900 }, { "epoch": 3.36, "learning_rate": 5.449510920793827e-05, "loss": 0.6582, "step": 330000 }, { "epoch": 3.36, "learning_rate": 5.449127876547553e-05, "loss": 0.7585, "step": 330100 }, { "epoch": 3.36, "learning_rate": 5.448744712553541e-05, "loss": 0.7972, "step": 330200 }, { "epoch": 3.37, "learning_rate": 5.448361428830527e-05, "loss": 0.898, "step": 330300 }, { "epoch": 3.37, "learning_rate": 5.447981860024086e-05, "loss": 0.6729, "step": 330400 }, { "epoch": 3.37, "learning_rate": 5.447598338096113e-05, "loss": 0.8386, "step": 330500 }, { "epoch": 3.37, "learning_rate": 5.4472146964951896e-05, "loss": 0.8171, "step": 330600 }, { "epoch": 3.37, "learning_rate": 5.4468309352400705e-05, "loss": 0.9008, "step": 330700 }, { "epoch": 3.37, "learning_rate": 5.446447054349521e-05, "loss": 0.8025, "step": 330800 }, { "epoch": 3.37, "learning_rate": 5.44606305384231e-05, "loss": 0.7965, "step": 330900 }, { "epoch": 3.37, "learning_rate": 5.445678933737214e-05, "loss": 0.8075, "step": 331000 }, { "epoch": 3.37, "learning_rate": 5.445294694053012e-05, "loss": 0.8148, "step": 331100 }, { "epoch": 3.37, "learning_rate": 5.4449103348084925e-05, "loss": 0.8622, "step": 331200 }, { "epoch": 3.38, "learning_rate": 5.444525856022448e-05, "loss": 0.7612, "step": 331300 }, { "epoch": 3.38, "learning_rate": 5.444141257713678e-05, "loss": 0.8024, "step": 331400 }, { "epoch": 3.38, "learning_rate": 5.443756539900986e-05, "loss": 0.773, "step": 331500 }, { "epoch": 3.38, "learning_rate": 5.443371702603182e-05, "loss": 0.799, "step": 331600 }, { "epoch": 3.38, "learning_rate": 5.4429867458390834e-05, "loss": 0.7603, "step": 331700 }, { "epoch": 3.38, "learning_rate": 5.4426016696275124e-05, "loss": 0.8843, "step": 331800 }, { "epoch": 3.38, "learning_rate": 5.4422164739872956e-05, "loss": 0.6728, "step": 331900 }, { "epoch": 3.38, "learning_rate": 5.4418311589372684e-05, "loss": 0.6997, "step": 332000 }, { "epoch": 3.38, "learning_rate": 5.4414457244962695e-05, "loss": 0.7906, "step": 332100 }, { "epoch": 3.38, "learning_rate": 5.4410601706831444e-05, "loss": 0.8492, "step": 332200 }, { "epoch": 3.39, "learning_rate": 5.4406744975167445e-05, "loss": 0.7517, "step": 332300 }, { "epoch": 3.39, "learning_rate": 5.440288705015926e-05, "loss": 0.8056, "step": 332400 }, { "epoch": 3.39, "learning_rate": 5.439902793199554e-05, "loss": 0.726, "step": 332500 }, { "epoch": 3.39, "learning_rate": 5.439516762086496e-05, "loss": 0.7852, "step": 332600 }, { "epoch": 3.39, "learning_rate": 5.439130611695626e-05, "loss": 0.8238, "step": 332700 }, { "epoch": 3.39, "learning_rate": 5.4387482053325926e-05, "loss": 0.8161, "step": 332800 }, { "epoch": 3.39, "learning_rate": 5.438361817635054e-05, "loss": 0.9304, "step": 332900 }, { "epoch": 3.39, "learning_rate": 5.437975310716174e-05, "loss": 0.8237, "step": 333000 }, { "epoch": 3.39, "learning_rate": 5.4375886845948505e-05, "loss": 0.8337, "step": 333100 }, { "epoch": 3.39, "learning_rate": 5.437201939289987e-05, "loss": 0.8601, "step": 333200 }, { "epoch": 3.4, "learning_rate": 5.4368150748204926e-05, "loss": 0.9195, "step": 333300 }, { "epoch": 3.4, "learning_rate": 5.436428091205284e-05, "loss": 0.8568, "step": 333400 }, { "epoch": 3.4, "learning_rate": 5.436040988463281e-05, "loss": 0.8183, "step": 333500 }, { "epoch": 3.4, "learning_rate": 5.4356537666134115e-05, "loss": 0.7959, "step": 333600 }, { "epoch": 3.4, "learning_rate": 5.4352664256746075e-05, "loss": 0.7719, "step": 333700 }, { "epoch": 3.4, "learning_rate": 5.4348789656658085e-05, "loss": 0.8386, "step": 333800 }, { "epoch": 3.4, "learning_rate": 5.434491386605959e-05, "loss": 0.8879, "step": 333900 }, { "epoch": 3.4, "learning_rate": 5.4341036885140084e-05, "loss": 0.8089, "step": 334000 }, { "epoch": 3.4, "learning_rate": 5.4337158714089145e-05, "loss": 0.9675, "step": 334100 }, { "epoch": 3.4, "learning_rate": 5.433327935309637e-05, "loss": 0.8112, "step": 334200 }, { "epoch": 3.41, "learning_rate": 5.4329398802351454e-05, "loss": 0.9335, "step": 334300 }, { "epoch": 3.41, "learning_rate": 5.432551706204412e-05, "loss": 0.8291, "step": 334400 }, { "epoch": 3.41, "learning_rate": 5.432163413236417e-05, "loss": 0.8628, "step": 334500 }, { "epoch": 3.41, "learning_rate": 5.431775001350147e-05, "loss": 0.8267, "step": 334600 }, { "epoch": 3.41, "learning_rate": 5.43138647056459e-05, "loss": 0.9392, "step": 334700 }, { "epoch": 3.41, "learning_rate": 5.4310017079837985e-05, "loss": 0.8747, "step": 334800 }, { "epoch": 3.41, "learning_rate": 5.4306129406451854e-05, "loss": 0.8757, "step": 334900 }, { "epoch": 3.41, "learning_rate": 5.430224054464105e-05, "loss": 0.9245, "step": 335000 }, { "epoch": 3.41, "learning_rate": 5.429835049459572e-05, "loss": 0.7811, "step": 335100 }, { "epoch": 3.42, "learning_rate": 5.429445925650605e-05, "loss": 0.8549, "step": 335200 }, { "epoch": 3.42, "learning_rate": 5.42905668305623e-05, "loss": 0.9381, "step": 335300 }, { "epoch": 3.42, "learning_rate": 5.42866732169548e-05, "loss": 0.9005, "step": 335400 }, { "epoch": 3.42, "learning_rate": 5.4282778415873913e-05, "loss": 0.892, "step": 335500 }, { "epoch": 3.42, "learning_rate": 5.427888242751006e-05, "loss": 0.9165, "step": 335600 }, { "epoch": 3.42, "learning_rate": 5.427498525205376e-05, "loss": 1.0178, "step": 335700 }, { "epoch": 3.42, "learning_rate": 5.427108688969554e-05, "loss": 0.9623, "step": 335800 }, { "epoch": 3.42, "learning_rate": 5.426718734062601e-05, "loss": 0.8048, "step": 335900 }, { "epoch": 3.42, "learning_rate": 5.426328660503585e-05, "loss": 0.8591, "step": 336000 }, { "epoch": 3.42, "learning_rate": 5.425938468311576e-05, "loss": 0.8696, "step": 336100 }, { "epoch": 3.43, "learning_rate": 5.425548157505654e-05, "loss": 0.8766, "step": 336200 }, { "epoch": 3.43, "learning_rate": 5.425157728104901e-05, "loss": 0.8394, "step": 336300 }, { "epoch": 3.43, "learning_rate": 5.424767180128409e-05, "loss": 0.7747, "step": 336400 }, { "epoch": 3.43, "learning_rate": 5.4243765135952706e-05, "loss": 0.8581, "step": 336500 }, { "epoch": 3.43, "learning_rate": 5.42398572852459e-05, "loss": 0.7416, "step": 336600 }, { "epoch": 3.43, "learning_rate": 5.423594824935472e-05, "loss": 0.8795, "step": 336700 }, { "epoch": 3.43, "learning_rate": 5.4232038028470304e-05, "loss": 0.9409, "step": 336800 }, { "epoch": 3.43, "learning_rate": 5.4228126622783835e-05, "loss": 0.886, "step": 336900 }, { "epoch": 3.43, "learning_rate": 5.4224214032486566e-05, "loss": 0.8711, "step": 337000 }, { "epoch": 3.43, "learning_rate": 5.422030025776979e-05, "loss": 1.0404, "step": 337100 }, { "epoch": 3.44, "learning_rate": 5.421638529882487e-05, "loss": 0.838, "step": 337200 }, { "epoch": 3.44, "learning_rate": 5.4212469155843225e-05, "loss": 0.8097, "step": 337300 }, { "epoch": 3.44, "learning_rate": 5.4208551829016334e-05, "loss": 0.9381, "step": 337400 }, { "epoch": 3.44, "learning_rate": 5.420463331853573e-05, "loss": 0.7932, "step": 337500 }, { "epoch": 3.44, "learning_rate": 5.4200713624593e-05, "loss": 0.8826, "step": 337600 }, { "epoch": 3.44, "learning_rate": 5.419679274737979e-05, "loss": 0.9429, "step": 337700 }, { "epoch": 3.44, "learning_rate": 5.4192870687087816e-05, "loss": 0.9054, "step": 337800 }, { "epoch": 3.44, "learning_rate": 5.4188986682195295e-05, "loss": 0.8673, "step": 337900 }, { "epoch": 3.44, "learning_rate": 5.418506226814715e-05, "loss": 0.8667, "step": 338000 }, { "epoch": 3.44, "learning_rate": 5.418113667159378e-05, "loss": 0.8229, "step": 338100 }, { "epoch": 3.45, "learning_rate": 5.417720989272714e-05, "loss": 0.8724, "step": 338200 }, { "epoch": 3.45, "learning_rate": 5.417328193173922e-05, "loss": 0.8777, "step": 338300 }, { "epoch": 3.45, "learning_rate": 5.4169352788822064e-05, "loss": 0.906, "step": 338400 }, { "epoch": 3.45, "learning_rate": 5.41654224641678e-05, "loss": 0.948, "step": 338500 }, { "epoch": 3.45, "learning_rate": 5.416149095796859e-05, "loss": 0.988, "step": 338600 }, { "epoch": 3.45, "learning_rate": 5.4157558270416654e-05, "loss": 0.9876, "step": 338700 }, { "epoch": 3.45, "learning_rate": 5.415362440170428e-05, "loss": 0.8309, "step": 338800 }, { "epoch": 3.45, "learning_rate": 5.4149689352023814e-05, "loss": 0.9018, "step": 338900 }, { "epoch": 3.45, "learning_rate": 5.414575312156767e-05, "loss": 0.9025, "step": 339000 }, { "epoch": 3.45, "learning_rate": 5.414181571052829e-05, "loss": 0.8341, "step": 339100 }, { "epoch": 3.46, "learning_rate": 5.413787711909817e-05, "loss": 0.8806, "step": 339200 }, { "epoch": 3.46, "learning_rate": 5.4133937347469925e-05, "loss": 0.8346, "step": 339300 }, { "epoch": 3.46, "learning_rate": 5.4129996395836176e-05, "loss": 1.0027, "step": 339400 }, { "epoch": 3.46, "learning_rate": 5.412605426438958e-05, "loss": 0.9267, "step": 339500 }, { "epoch": 3.46, "learning_rate": 5.4122110953322926e-05, "loss": 1.0539, "step": 339600 }, { "epoch": 3.46, "learning_rate": 5.4118166462829e-05, "loss": 0.8187, "step": 339700 }, { "epoch": 3.46, "learning_rate": 5.411422079310065e-05, "loss": 0.9457, "step": 339800 }, { "epoch": 3.46, "learning_rate": 5.411031341865414e-05, "loss": 1.0481, "step": 339900 }, { "epoch": 3.46, "learning_rate": 5.4106365402823315e-05, "loss": 0.9842, "step": 340000 }, { "epoch": 3.46, "learning_rate": 5.4102416208335095e-05, "loss": 1.04, "step": 340100 }, { "epoch": 3.47, "learning_rate": 5.409846583538255e-05, "loss": 0.9499, "step": 340200 }, { "epoch": 3.47, "learning_rate": 5.4094514284158846e-05, "loss": 0.8526, "step": 340300 }, { "epoch": 3.47, "learning_rate": 5.409056155485717e-05, "loss": 0.9673, "step": 340400 }, { "epoch": 3.47, "learning_rate": 5.408664719257256e-05, "loss": 0.7544, "step": 340500 }, { "epoch": 3.47, "learning_rate": 5.408269211947078e-05, "loss": 0.907, "step": 340600 }, { "epoch": 3.47, "learning_rate": 5.407873586886908e-05, "loss": 0.9975, "step": 340700 }, { "epoch": 3.47, "learning_rate": 5.407477844096088e-05, "loss": 0.8194, "step": 340800 }, { "epoch": 3.47, "learning_rate": 5.4070819835939685e-05, "loss": 0.8418, "step": 340900 }, { "epoch": 3.47, "learning_rate": 5.406686005399905e-05, "loss": 0.8876, "step": 341000 }, { "epoch": 3.48, "learning_rate": 5.406289909533258e-05, "loss": 0.9251, "step": 341100 }, { "epoch": 3.48, "learning_rate": 5.405893696013395e-05, "loss": 0.8449, "step": 341200 }, { "epoch": 3.48, "learning_rate": 5.405497364859687e-05, "loss": 0.904, "step": 341300 }, { "epoch": 3.48, "learning_rate": 5.4051048811613234e-05, "loss": 0.8262, "step": 341400 }, { "epoch": 3.48, "learning_rate": 5.404708315973923e-05, "loss": 0.7497, "step": 341500 }, { "epoch": 3.48, "learning_rate": 5.404311633210637e-05, "loss": 0.7518, "step": 341600 }, { "epoch": 3.48, "learning_rate": 5.403914832890859e-05, "loss": 0.8741, "step": 341700 }, { "epoch": 3.48, "learning_rate": 5.403517915033992e-05, "loss": 1.0219, "step": 341800 }, { "epoch": 3.48, "learning_rate": 5.403120879659443e-05, "loss": 0.8878, "step": 341900 }, { "epoch": 3.48, "learning_rate": 5.402723726786624e-05, "loss": 0.9455, "step": 342000 }, { "epoch": 3.49, "learning_rate": 5.4023264564349535e-05, "loss": 0.9049, "step": 342100 }, { "epoch": 3.49, "learning_rate": 5.401929068623855e-05, "loss": 0.916, "step": 342200 }, { "epoch": 3.49, "learning_rate": 5.401531563372759e-05, "loss": 0.7544, "step": 342300 }, { "epoch": 3.49, "learning_rate": 5.4011339407011016e-05, "loss": 0.868, "step": 342400 }, { "epoch": 3.49, "learning_rate": 5.400736200628323e-05, "loss": 0.7905, "step": 342500 }, { "epoch": 3.49, "learning_rate": 5.400338343173872e-05, "loss": 0.8767, "step": 342600 }, { "epoch": 3.49, "learning_rate": 5.3999403683572e-05, "loss": 0.8184, "step": 342700 }, { "epoch": 3.49, "learning_rate": 5.399542276197766e-05, "loss": 0.8369, "step": 342800 }, { "epoch": 3.49, "learning_rate": 5.3991440667150345e-05, "loss": 0.9208, "step": 342900 }, { "epoch": 3.49, "learning_rate": 5.398745739928476e-05, "loss": 0.8268, "step": 343000 }, { "epoch": 3.5, "learning_rate": 5.398347295857565e-05, "loss": 0.9143, "step": 343100 }, { "epoch": 3.5, "learning_rate": 5.397948734521784e-05, "loss": 1.0979, "step": 343200 }, { "epoch": 3.5, "learning_rate": 5.3975500559406205e-05, "loss": 0.8552, "step": 343300 }, { "epoch": 3.5, "learning_rate": 5.397151260133566e-05, "loss": 0.9751, "step": 343400 }, { "epoch": 3.5, "learning_rate": 5.396752347120122e-05, "loss": 0.8371, "step": 343500 }, { "epoch": 3.5, "learning_rate": 5.3963533169197904e-05, "loss": 0.8045, "step": 343600 }, { "epoch": 3.5, "learning_rate": 5.395954169552082e-05, "loss": 0.7778, "step": 343700 }, { "epoch": 3.5, "learning_rate": 5.395554905036514e-05, "loss": 0.8089, "step": 343800 }, { "epoch": 3.5, "learning_rate": 5.395159517788766e-05, "loss": 0.8448, "step": 343900 }, { "epoch": 3.5, "learning_rate": 5.394760020207039e-05, "loss": 0.8314, "step": 344000 }, { "epoch": 3.51, "learning_rate": 5.394360405535837e-05, "loss": 0.8651, "step": 344100 }, { "epoch": 3.51, "learning_rate": 5.393960673794701e-05, "loss": 0.8219, "step": 344200 }, { "epoch": 3.51, "learning_rate": 5.393560825003174e-05, "loss": 0.9103, "step": 344300 }, { "epoch": 3.51, "learning_rate": 5.393160859180807e-05, "loss": 0.777, "step": 344400 }, { "epoch": 3.51, "learning_rate": 5.392760776347155e-05, "loss": 0.8987, "step": 344500 }, { "epoch": 3.51, "learning_rate": 5.39236057652178e-05, "loss": 0.8541, "step": 344600 }, { "epoch": 3.51, "learning_rate": 5.39196025972425e-05, "loss": 0.9376, "step": 344700 }, { "epoch": 3.51, "learning_rate": 5.391559825974138e-05, "loss": 0.8821, "step": 344800 }, { "epoch": 3.51, "learning_rate": 5.3911592752910225e-05, "loss": 0.9233, "step": 344900 }, { "epoch": 3.51, "learning_rate": 5.390758607694488e-05, "loss": 0.8708, "step": 345000 }, { "epoch": 3.52, "learning_rate": 5.390357823204126e-05, "loss": 0.8195, "step": 345100 }, { "epoch": 3.52, "learning_rate": 5.38995692183953e-05, "loss": 0.8252, "step": 345200 }, { "epoch": 3.52, "learning_rate": 5.3895559036203036e-05, "loss": 0.8675, "step": 345300 }, { "epoch": 3.52, "learning_rate": 5.389154768566053e-05, "loss": 1.0444, "step": 345400 }, { "epoch": 3.52, "learning_rate": 5.3887535166963934e-05, "loss": 0.9186, "step": 345500 }, { "epoch": 3.52, "learning_rate": 5.388352148030941e-05, "loss": 0.9672, "step": 345600 }, { "epoch": 3.52, "learning_rate": 5.387950662589321e-05, "loss": 0.8277, "step": 345700 }, { "epoch": 3.52, "learning_rate": 5.387549060391165e-05, "loss": 0.9781, "step": 345800 }, { "epoch": 3.52, "learning_rate": 5.3871473414561086e-05, "loss": 0.9245, "step": 345900 }, { "epoch": 3.53, "learning_rate": 5.386745505803792e-05, "loss": 1.068, "step": 346000 }, { "epoch": 3.53, "learning_rate": 5.386343553453864e-05, "loss": 0.9302, "step": 346100 }, { "epoch": 3.53, "learning_rate": 5.385941484425976e-05, "loss": 0.9994, "step": 346200 }, { "epoch": 3.53, "learning_rate": 5.38553929873979e-05, "loss": 1.0715, "step": 346300 }, { "epoch": 3.53, "learning_rate": 5.385136996414967e-05, "loss": 0.9614, "step": 346400 }, { "epoch": 3.53, "learning_rate": 5.384734577471179e-05, "loss": 1.0344, "step": 346500 }, { "epoch": 3.53, "learning_rate": 5.384332041928102e-05, "loss": 0.9919, "step": 346600 }, { "epoch": 3.53, "learning_rate": 5.3839293898054165e-05, "loss": 1.0086, "step": 346700 }, { "epoch": 3.53, "learning_rate": 5.383526621122811e-05, "loss": 1.0085, "step": 346800 }, { "epoch": 3.53, "learning_rate": 5.383123735899977e-05, "loss": 0.8743, "step": 346900 }, { "epoch": 3.54, "learning_rate": 5.382720734156614e-05, "loss": 0.8637, "step": 347000 }, { "epoch": 3.54, "learning_rate": 5.382317615912427e-05, "loss": 1.0592, "step": 347100 }, { "epoch": 3.54, "learning_rate": 5.381914381187126e-05, "loss": 0.9845, "step": 347200 }, { "epoch": 3.54, "learning_rate": 5.381511030000425e-05, "loss": 0.8499, "step": 347300 }, { "epoch": 3.54, "learning_rate": 5.3811075623720485e-05, "loss": 0.9557, "step": 347400 }, { "epoch": 3.54, "learning_rate": 5.38070397832172e-05, "loss": 0.89, "step": 347500 }, { "epoch": 3.54, "learning_rate": 5.380300277869175e-05, "loss": 0.9347, "step": 347600 }, { "epoch": 3.54, "learning_rate": 5.3798964610341516e-05, "loss": 0.9437, "step": 347700 }, { "epoch": 3.54, "learning_rate": 5.379492527836394e-05, "loss": 0.8372, "step": 347800 }, { "epoch": 3.54, "learning_rate": 5.3790884782956514e-05, "loss": 0.9771, "step": 347900 }, { "epoch": 3.55, "learning_rate": 5.37868431243168e-05, "loss": 0.7926, "step": 348000 }, { "epoch": 3.55, "learning_rate": 5.37828003026424e-05, "loss": 0.9211, "step": 348100 }, { "epoch": 3.55, "learning_rate": 5.377875631813101e-05, "loss": 0.9682, "step": 348200 }, { "epoch": 3.55, "learning_rate": 5.3774711170980325e-05, "loss": 0.9586, "step": 348300 }, { "epoch": 3.55, "learning_rate": 5.3770664861388155e-05, "loss": 0.925, "step": 348400 }, { "epoch": 3.55, "learning_rate": 5.376661738955233e-05, "loss": 0.8547, "step": 348500 }, { "epoch": 3.55, "learning_rate": 5.3762568755670745e-05, "loss": 0.945, "step": 348600 }, { "epoch": 3.55, "learning_rate": 5.375851895994135e-05, "loss": 0.865, "step": 348700 }, { "epoch": 3.55, "learning_rate": 5.375446800256216e-05, "loss": 0.8899, "step": 348800 }, { "epoch": 3.55, "learning_rate": 5.3750415883731247e-05, "loss": 0.9063, "step": 348900 }, { "epoch": 3.56, "learning_rate": 5.374636260364673e-05, "loss": 0.8484, "step": 349000 }, { "epoch": 3.56, "learning_rate": 5.3742308162506805e-05, "loss": 0.9326, "step": 349100 }, { "epoch": 3.56, "learning_rate": 5.3738252560509676e-05, "loss": 0.8755, "step": 349200 }, { "epoch": 3.56, "learning_rate": 5.3734195797853677e-05, "loss": 0.9273, "step": 349300 }, { "epoch": 3.56, "learning_rate": 5.3730137874737135e-05, "loss": 0.8898, "step": 349400 }, { "epoch": 3.56, "learning_rate": 5.372607879135846e-05, "loss": 0.9333, "step": 349500 }, { "epoch": 3.56, "learning_rate": 5.372201854791614e-05, "loss": 0.9805, "step": 349600 }, { "epoch": 3.56, "learning_rate": 5.3717957144608655e-05, "loss": 0.8594, "step": 349700 }, { "epoch": 3.56, "learning_rate": 5.3713894581634626e-05, "loss": 0.8958, "step": 349800 }, { "epoch": 3.56, "learning_rate": 5.3709830859192655e-05, "loss": 0.9918, "step": 349900 }, { "epoch": 3.57, "learning_rate": 5.370576597748146e-05, "loss": 0.8977, "step": 350000 }, { "epoch": 3.57, "eval_cer": 0.15400072018086786, "eval_loss": 0.8868646025657654, "eval_runtime": 11067.3494, "eval_samples_per_second": 4.943, "eval_steps_per_second": 0.309, "eval_wer": 0.27368996588225003, "step": 350000 }, { "epoch": 3.57, "learning_rate": 5.3701699936699765e-05, "loss": 0.8122, "step": 350100 }, { "epoch": 3.57, "learning_rate": 5.36976327370464e-05, "loss": 0.8942, "step": 350200 }, { "epoch": 3.57, "learning_rate": 5.3693564378720206e-05, "loss": 0.8784, "step": 350300 }, { "epoch": 3.57, "learning_rate": 5.368949486192012e-05, "loss": 0.9242, "step": 350400 }, { "epoch": 3.57, "learning_rate": 5.368542418684509e-05, "loss": 0.8039, "step": 350500 }, { "epoch": 3.57, "learning_rate": 5.368135235369418e-05, "loss": 0.8437, "step": 350600 }, { "epoch": 3.57, "learning_rate": 5.367727936266645e-05, "loss": 0.8817, "step": 350700 }, { "epoch": 3.57, "learning_rate": 5.3673205213961065e-05, "loss": 0.8223, "step": 350800 }, { "epoch": 3.58, "learning_rate": 5.366912990777722e-05, "loss": 0.6902, "step": 350900 }, { "epoch": 3.58, "learning_rate": 5.3665053444314175e-05, "loss": 0.8682, "step": 351000 }, { "epoch": 3.58, "learning_rate": 5.366097582377123e-05, "loss": 0.8662, "step": 351100 }, { "epoch": 3.58, "learning_rate": 5.3656897046347776e-05, "loss": 0.8344, "step": 351200 }, { "epoch": 3.58, "learning_rate": 5.365281711224324e-05, "loss": 0.9177, "step": 351300 }, { "epoch": 3.58, "learning_rate": 5.364873602165709e-05, "loss": 0.9335, "step": 351400 }, { "epoch": 3.58, "learning_rate": 5.364465377478888e-05, "loss": 0.8431, "step": 351500 }, { "epoch": 3.58, "learning_rate": 5.364057037183821e-05, "loss": 0.7737, "step": 351600 }, { "epoch": 3.58, "learning_rate": 5.363648581300472e-05, "loss": 0.8538, "step": 351700 }, { "epoch": 3.58, "learning_rate": 5.363240009848813e-05, "loss": 0.8297, "step": 351800 }, { "epoch": 3.59, "learning_rate": 5.362831322848821e-05, "loss": 0.8857, "step": 351900 }, { "epoch": 3.59, "learning_rate": 5.362422520320478e-05, "loss": 0.8212, "step": 352000 }, { "epoch": 3.59, "learning_rate": 5.362013602283771e-05, "loss": 0.8241, "step": 352100 }, { "epoch": 3.59, "learning_rate": 5.3616045687586956e-05, "loss": 0.9376, "step": 352200 }, { "epoch": 3.59, "learning_rate": 5.36119541976525e-05, "loss": 0.8152, "step": 352300 }, { "epoch": 3.59, "learning_rate": 5.3607861553234384e-05, "loss": 0.7562, "step": 352400 }, { "epoch": 3.59, "learning_rate": 5.360376775453274e-05, "loss": 0.865, "step": 352500 }, { "epoch": 3.59, "learning_rate": 5.3599672801747704e-05, "loss": 0.7685, "step": 352600 }, { "epoch": 3.59, "learning_rate": 5.35955766950795e-05, "loss": 0.8248, "step": 352700 }, { "epoch": 3.59, "learning_rate": 5.3591479434728404e-05, "loss": 0.7922, "step": 352800 }, { "epoch": 3.6, "learning_rate": 5.3587381020894754e-05, "loss": 0.7246, "step": 352900 }, { "epoch": 3.6, "learning_rate": 5.358328145377893e-05, "loss": 0.9608, "step": 353000 }, { "epoch": 3.6, "learning_rate": 5.357918073358139e-05, "loss": 0.7184, "step": 353100 }, { "epoch": 3.6, "learning_rate": 5.3575078860502615e-05, "loss": 0.7943, "step": 353200 }, { "epoch": 3.6, "learning_rate": 5.357097583474319e-05, "loss": 0.8057, "step": 353300 }, { "epoch": 3.6, "learning_rate": 5.356687165650369e-05, "loss": 0.8545, "step": 353400 }, { "epoch": 3.6, "learning_rate": 5.356276632598482e-05, "loss": 0.8106, "step": 353500 }, { "epoch": 3.6, "learning_rate": 5.3558659843387284e-05, "loss": 0.7421, "step": 353600 }, { "epoch": 3.6, "learning_rate": 5.355455220891187e-05, "loss": 0.793, "step": 353700 }, { "epoch": 3.6, "learning_rate": 5.355044342275943e-05, "loss": 0.8183, "step": 353800 }, { "epoch": 3.61, "learning_rate": 5.3546333485130846e-05, "loss": 0.7912, "step": 353900 }, { "epoch": 3.61, "learning_rate": 5.354222239622706e-05, "loss": 0.7747, "step": 354000 }, { "epoch": 3.61, "learning_rate": 5.35381101562491e-05, "loss": 0.7183, "step": 354100 }, { "epoch": 3.61, "learning_rate": 5.353399676539802e-05, "loss": 0.7398, "step": 354200 }, { "epoch": 3.61, "learning_rate": 5.352988222387494e-05, "loss": 0.7905, "step": 354300 }, { "epoch": 3.61, "learning_rate": 5.352576653188105e-05, "loss": 0.7846, "step": 354400 }, { "epoch": 3.61, "learning_rate": 5.352164968961756e-05, "loss": 0.7737, "step": 354500 }, { "epoch": 3.61, "learning_rate": 5.351753169728578e-05, "loss": 0.6956, "step": 354600 }, { "epoch": 3.61, "learning_rate": 5.351341255508703e-05, "loss": 0.8619, "step": 354700 }, { "epoch": 3.61, "learning_rate": 5.3509292263222736e-05, "loss": 0.733, "step": 354800 }, { "epoch": 3.62, "learning_rate": 5.350517082189434e-05, "loss": 0.8307, "step": 354900 }, { "epoch": 3.62, "learning_rate": 5.3501048231303366e-05, "loss": 0.9157, "step": 355000 }, { "epoch": 3.62, "learning_rate": 5.349692449165138e-05, "loss": 0.8511, "step": 355100 }, { "epoch": 3.62, "learning_rate": 5.3492799603140016e-05, "loss": 0.784, "step": 355200 }, { "epoch": 3.62, "learning_rate": 5.3488673565970943e-05, "loss": 0.6809, "step": 355300 }, { "epoch": 3.62, "learning_rate": 5.34845463803459e-05, "loss": 0.8163, "step": 355400 }, { "epoch": 3.62, "learning_rate": 5.3480418046466696e-05, "loss": 0.7569, "step": 355500 }, { "epoch": 3.62, "learning_rate": 5.3476288564535176e-05, "loss": 0.8531, "step": 355600 }, { "epoch": 3.62, "learning_rate": 5.347215793475324e-05, "loss": 0.6863, "step": 355700 }, { "epoch": 3.62, "learning_rate": 5.346802615732285e-05, "loss": 0.8009, "step": 355800 }, { "epoch": 3.63, "learning_rate": 5.346389323244603e-05, "loss": 0.782, "step": 355900 }, { "epoch": 3.63, "learning_rate": 5.3459759160324865e-05, "loss": 0.809, "step": 356000 }, { "epoch": 3.63, "learning_rate": 5.3455665299030306e-05, "loss": 0.8036, "step": 356100 }, { "epoch": 3.63, "learning_rate": 5.3451528944494284e-05, "loss": 0.849, "step": 356200 }, { "epoch": 3.63, "learning_rate": 5.344739144331844e-05, "loss": 0.6932, "step": 356300 }, { "epoch": 3.63, "learning_rate": 5.344325279570509e-05, "loss": 0.774, "step": 356400 }, { "epoch": 3.63, "learning_rate": 5.3439113001856564e-05, "loss": 0.721, "step": 356500 }, { "epoch": 3.63, "learning_rate": 5.34349720619753e-05, "loss": 0.7001, "step": 356600 }, { "epoch": 3.63, "learning_rate": 5.3430829976263764e-05, "loss": 0.8254, "step": 356700 }, { "epoch": 3.64, "learning_rate": 5.342668674492446e-05, "loss": 0.7804, "step": 356800 }, { "epoch": 3.64, "learning_rate": 5.342254236815998e-05, "loss": 0.808, "step": 356900 }, { "epoch": 3.64, "learning_rate": 5.341839684617295e-05, "loss": 0.7551, "step": 357000 }, { "epoch": 3.64, "learning_rate": 5.3414250179166074e-05, "loss": 0.8123, "step": 357100 }, { "epoch": 3.64, "learning_rate": 5.341010236734209e-05, "loss": 0.786, "step": 357200 }, { "epoch": 3.64, "learning_rate": 5.340595341090381e-05, "loss": 0.7171, "step": 357300 }, { "epoch": 3.64, "learning_rate": 5.3401803310054075e-05, "loss": 0.8, "step": 357400 }, { "epoch": 3.64, "learning_rate": 5.339765206499583e-05, "loss": 0.8735, "step": 357500 }, { "epoch": 3.64, "learning_rate": 5.339349967593202e-05, "loss": 0.7045, "step": 357600 }, { "epoch": 3.64, "learning_rate": 5.338934614306568e-05, "loss": 0.7032, "step": 357700 }, { "epoch": 3.65, "learning_rate": 5.33851914665999e-05, "loss": 0.7909, "step": 357800 }, { "epoch": 3.65, "learning_rate": 5.338103564673781e-05, "loss": 0.7648, "step": 357900 }, { "epoch": 3.65, "learning_rate": 5.3376878683682596e-05, "loss": 0.7387, "step": 358000 }, { "epoch": 3.65, "learning_rate": 5.337272057763754e-05, "loss": 0.8141, "step": 358100 }, { "epoch": 3.65, "learning_rate": 5.3368561328805914e-05, "loss": 0.6764, "step": 358200 }, { "epoch": 3.65, "learning_rate": 5.33644009373911e-05, "loss": 0.7655, "step": 358300 }, { "epoch": 3.65, "learning_rate": 5.336023940359652e-05, "loss": 0.8875, "step": 358400 }, { "epoch": 3.65, "learning_rate": 5.3356076727625625e-05, "loss": 0.7552, "step": 358500 }, { "epoch": 3.65, "learning_rate": 5.335191290968197e-05, "loss": 0.7859, "step": 358600 }, { "epoch": 3.65, "learning_rate": 5.334774794996913e-05, "loss": 0.7241, "step": 358700 }, { "epoch": 3.66, "learning_rate": 5.334358184869074e-05, "loss": 0.7683, "step": 358800 }, { "epoch": 3.66, "learning_rate": 5.333945628412599e-05, "loss": 0.729, "step": 358900 }, { "epoch": 3.66, "learning_rate": 5.333528791173823e-05, "loss": 0.6936, "step": 359000 }, { "epoch": 3.66, "learning_rate": 5.3331118398394165e-05, "loss": 0.7482, "step": 359100 }, { "epoch": 3.66, "learning_rate": 5.332694774429763e-05, "loss": 0.7116, "step": 359200 }, { "epoch": 3.66, "learning_rate": 5.3322775949652577e-05, "loss": 0.78, "step": 359300 }, { "epoch": 3.66, "learning_rate": 5.331860301466295e-05, "loss": 0.7512, "step": 359400 }, { "epoch": 3.66, "learning_rate": 5.331442893953279e-05, "loss": 0.7099, "step": 359500 }, { "epoch": 3.66, "learning_rate": 5.33102537244662e-05, "loss": 0.7141, "step": 359600 }, { "epoch": 3.66, "learning_rate": 5.33060773696673e-05, "loss": 0.7281, "step": 359700 }, { "epoch": 3.67, "learning_rate": 5.33018998753403e-05, "loss": 0.7264, "step": 359800 }, { "epoch": 3.67, "learning_rate": 5.329772124168947e-05, "loss": 0.7272, "step": 359900 }, { "epoch": 3.67, "learning_rate": 5.329354146891909e-05, "loss": 0.8464, "step": 360000 }, { "epoch": 3.67, "learning_rate": 5.3289360557233535e-05, "loss": 0.7398, "step": 360100 }, { "epoch": 3.67, "learning_rate": 5.3285178506837244e-05, "loss": 0.7651, "step": 360200 }, { "epoch": 3.67, "learning_rate": 5.3280995317934684e-05, "loss": 0.6942, "step": 360300 }, { "epoch": 3.67, "learning_rate": 5.327681099073038e-05, "loss": 0.7001, "step": 360400 }, { "epoch": 3.67, "learning_rate": 5.3272625525428934e-05, "loss": 0.8536, "step": 360500 }, { "epoch": 3.67, "learning_rate": 5.326843892223498e-05, "loss": 0.8733, "step": 360600 }, { "epoch": 3.67, "learning_rate": 5.326425118135322e-05, "loss": 0.7469, "step": 360700 }, { "epoch": 3.68, "learning_rate": 5.326006230298842e-05, "loss": 0.7395, "step": 360800 }, { "epoch": 3.68, "learning_rate": 5.325587228734537e-05, "loss": 0.7114, "step": 360900 }, { "epoch": 3.68, "learning_rate": 5.325168113462895e-05, "loss": 0.6886, "step": 361000 }, { "epoch": 3.68, "learning_rate": 5.324748884504409e-05, "loss": 0.7968, "step": 361100 }, { "epoch": 3.68, "learning_rate": 5.3243295418795754e-05, "loss": 0.6924, "step": 361200 }, { "epoch": 3.68, "learning_rate": 5.3239142807340846e-05, "loss": 0.6633, "step": 361300 }, { "epoch": 3.68, "learning_rate": 5.323494711974224e-05, "loss": 0.7264, "step": 361400 }, { "epoch": 3.68, "learning_rate": 5.323075029609338e-05, "loss": 0.7324, "step": 361500 }, { "epoch": 3.68, "learning_rate": 5.3226552336599474e-05, "loss": 0.7322, "step": 361600 }, { "epoch": 3.69, "learning_rate": 5.3222353241465754e-05, "loss": 0.8226, "step": 361700 }, { "epoch": 3.69, "learning_rate": 5.3218153010897556e-05, "loss": 0.7336, "step": 361800 }, { "epoch": 3.69, "learning_rate": 5.3213951645100227e-05, "loss": 0.6882, "step": 361900 }, { "epoch": 3.69, "learning_rate": 5.3209749144279195e-05, "loss": 0.705, "step": 362000 }, { "epoch": 3.69, "learning_rate": 5.320554550863994e-05, "loss": 0.689, "step": 362100 }, { "epoch": 3.69, "learning_rate": 5.320134073838799e-05, "loss": 0.8387, "step": 362200 }, { "epoch": 3.69, "learning_rate": 5.319713483372894e-05, "loss": 0.7244, "step": 362300 }, { "epoch": 3.69, "learning_rate": 5.3192927794868434e-05, "loss": 0.7248, "step": 362400 }, { "epoch": 3.69, "learning_rate": 5.3188719622012167e-05, "loss": 0.6811, "step": 362500 }, { "epoch": 3.69, "learning_rate": 5.318451031536589e-05, "loss": 0.7096, "step": 362600 }, { "epoch": 3.7, "learning_rate": 5.318029987513541e-05, "loss": 0.762, "step": 362700 }, { "epoch": 3.7, "learning_rate": 5.317608830152661e-05, "loss": 0.6746, "step": 362800 }, { "epoch": 3.7, "learning_rate": 5.3171875594745387e-05, "loss": 0.7081, "step": 362900 }, { "epoch": 3.7, "learning_rate": 5.3167661754997745e-05, "loss": 0.7599, "step": 363000 }, { "epoch": 3.7, "learning_rate": 5.3163446782489686e-05, "loss": 0.8074, "step": 363100 }, { "epoch": 3.7, "learning_rate": 5.3159230677427325e-05, "loss": 0.6664, "step": 363200 }, { "epoch": 3.7, "learning_rate": 5.315501344001678e-05, "loss": 0.618, "step": 363300 }, { "epoch": 3.7, "learning_rate": 5.315083725976322e-05, "loss": 0.863, "step": 363400 }, { "epoch": 3.7, "learning_rate": 5.3146617769593316e-05, "loss": 0.7698, "step": 363500 }, { "epoch": 3.7, "learning_rate": 5.3142397147691944e-05, "loss": 0.7323, "step": 363600 }, { "epoch": 3.71, "learning_rate": 5.3138217617400085e-05, "loss": 0.7784, "step": 363700 }, { "epoch": 3.71, "learning_rate": 5.3133994743967056e-05, "loss": 0.7617, "step": 363800 }, { "epoch": 3.71, "learning_rate": 5.3129770739419755e-05, "loss": 0.8117, "step": 363900 }, { "epoch": 3.71, "learning_rate": 5.312554560396468e-05, "loss": 0.7493, "step": 364000 }, { "epoch": 3.71, "learning_rate": 5.31213616060663e-05, "loss": 0.8009, "step": 364100 }, { "epoch": 3.71, "learning_rate": 5.311713422071944e-05, "loss": 0.8524, "step": 364200 }, { "epoch": 3.71, "learning_rate": 5.311290570508268e-05, "loss": 0.7762, "step": 364300 }, { "epoch": 3.71, "learning_rate": 5.310867605936276e-05, "loss": 0.7328, "step": 364400 }, { "epoch": 3.71, "learning_rate": 5.310444528376649e-05, "loss": 0.8129, "step": 364500 }, { "epoch": 3.71, "learning_rate": 5.3100213378500724e-05, "loss": 0.6807, "step": 364600 }, { "epoch": 3.72, "learning_rate": 5.309598034377237e-05, "loss": 0.7141, "step": 364700 }, { "epoch": 3.72, "learning_rate": 5.309174617978841e-05, "loss": 0.6923, "step": 364800 }, { "epoch": 3.72, "learning_rate": 5.308751088675587e-05, "loss": 0.7448, "step": 364900 }, { "epoch": 3.72, "learning_rate": 5.308327446488183e-05, "loss": 0.7355, "step": 365000 }, { "epoch": 3.72, "learning_rate": 5.3079036914373425e-05, "loss": 0.8136, "step": 365100 }, { "epoch": 3.72, "learning_rate": 5.307479823543783e-05, "loss": 0.8348, "step": 365200 }, { "epoch": 3.72, "learning_rate": 5.307055842828231e-05, "loss": 0.7547, "step": 365300 }, { "epoch": 3.72, "learning_rate": 5.3066317493114166e-05, "loss": 0.7775, "step": 365400 }, { "epoch": 3.72, "learning_rate": 5.3062075430140745e-05, "loss": 0.8526, "step": 365500 }, { "epoch": 3.72, "learning_rate": 5.305783223956946e-05, "loss": 0.805, "step": 365600 }, { "epoch": 3.73, "learning_rate": 5.305358792160778e-05, "loss": 0.8505, "step": 365700 }, { "epoch": 3.73, "learning_rate": 5.3049342476463245e-05, "loss": 0.8073, "step": 365800 }, { "epoch": 3.73, "learning_rate": 5.30450959043434e-05, "loss": 0.8105, "step": 365900 }, { "epoch": 3.73, "learning_rate": 5.304084820545588e-05, "loss": 0.8095, "step": 366000 }, { "epoch": 3.73, "learning_rate": 5.30365993800084e-05, "loss": 0.9612, "step": 366100 }, { "epoch": 3.73, "learning_rate": 5.3032391933301436e-05, "loss": 0.8482, "step": 366200 }, { "epoch": 3.73, "learning_rate": 5.30281408666177e-05, "loss": 0.8228, "step": 366300 }, { "epoch": 3.73, "learning_rate": 5.302388867399529e-05, "loss": 0.8144, "step": 366400 }, { "epoch": 3.73, "learning_rate": 5.301963535564213e-05, "loss": 0.8243, "step": 366500 }, { "epoch": 3.73, "learning_rate": 5.301538091176617e-05, "loss": 0.8503, "step": 366600 }, { "epoch": 3.74, "learning_rate": 5.3011125342575435e-05, "loss": 0.8511, "step": 366700 }, { "epoch": 3.74, "learning_rate": 5.3006868648278e-05, "loss": 0.834, "step": 366800 }, { "epoch": 3.74, "learning_rate": 5.300261082908198e-05, "loss": 0.8478, "step": 366900 }, { "epoch": 3.74, "learning_rate": 5.299835188519556e-05, "loss": 0.7865, "step": 367000 }, { "epoch": 3.74, "learning_rate": 5.299409181682698e-05, "loss": 0.8557, "step": 367100 }, { "epoch": 3.74, "learning_rate": 5.2989830624184536e-05, "loss": 0.733, "step": 367200 }, { "epoch": 3.74, "learning_rate": 5.298556830747657e-05, "loss": 0.7316, "step": 367300 }, { "epoch": 3.74, "learning_rate": 5.298130486691149e-05, "loss": 0.761, "step": 367400 }, { "epoch": 3.74, "learning_rate": 5.2977040302697734e-05, "loss": 0.8472, "step": 367500 }, { "epoch": 3.75, "learning_rate": 5.297277461504383e-05, "loss": 0.9746, "step": 367600 }, { "epoch": 3.75, "learning_rate": 5.2968507804158354e-05, "loss": 0.7905, "step": 367700 }, { "epoch": 3.75, "learning_rate": 5.296423987024991e-05, "loss": 0.9817, "step": 367800 }, { "epoch": 3.75, "learning_rate": 5.295997081352717e-05, "loss": 0.9731, "step": 367900 }, { "epoch": 3.75, "learning_rate": 5.295570063419888e-05, "loss": 0.8726, "step": 368000 }, { "epoch": 3.75, "learning_rate": 5.2951429332473824e-05, "loss": 0.844, "step": 368100 }, { "epoch": 3.75, "learning_rate": 5.2947156908560844e-05, "loss": 0.7798, "step": 368200 }, { "epoch": 3.75, "learning_rate": 5.294288336266882e-05, "loss": 0.8535, "step": 368300 }, { "epoch": 3.75, "learning_rate": 5.293860869500673e-05, "loss": 0.8187, "step": 368400 }, { "epoch": 3.75, "learning_rate": 5.293433290578356e-05, "loss": 0.8617, "step": 368500 }, { "epoch": 3.76, "learning_rate": 5.293005599520838e-05, "loss": 0.8839, "step": 368600 }, { "epoch": 3.76, "learning_rate": 5.29257779634903e-05, "loss": 0.8395, "step": 368700 }, { "epoch": 3.76, "learning_rate": 5.2921498810838477e-05, "loss": 0.8474, "step": 368800 }, { "epoch": 3.76, "learning_rate": 5.291721853746217e-05, "loss": 0.8224, "step": 368900 }, { "epoch": 3.76, "learning_rate": 5.291293714357062e-05, "loss": 0.8166, "step": 369000 }, { "epoch": 3.76, "learning_rate": 5.29086546293732e-05, "loss": 0.8327, "step": 369100 }, { "epoch": 3.76, "learning_rate": 5.290437099507927e-05, "loss": 0.8332, "step": 369200 }, { "epoch": 3.76, "learning_rate": 5.2900086240898285e-05, "loss": 0.8946, "step": 369300 }, { "epoch": 3.76, "learning_rate": 5.289580036703974e-05, "loss": 0.824, "step": 369400 }, { "epoch": 3.76, "learning_rate": 5.2891513373713195e-05, "loss": 0.8446, "step": 369500 }, { "epoch": 3.77, "learning_rate": 5.288722526112826e-05, "loss": 0.8777, "step": 369600 }, { "epoch": 3.77, "learning_rate": 5.2882936029494595e-05, "loss": 0.9609, "step": 369700 }, { "epoch": 3.77, "learning_rate": 5.287864567902192e-05, "loss": 0.7867, "step": 369800 }, { "epoch": 3.77, "learning_rate": 5.287435420991999e-05, "loss": 0.764, "step": 369900 }, { "epoch": 3.77, "learning_rate": 5.287006162239865e-05, "loss": 0.6431, "step": 370000 }, { "epoch": 3.77, "learning_rate": 5.2865767916667784e-05, "loss": 0.7414, "step": 370100 }, { "epoch": 3.77, "learning_rate": 5.286147309293731e-05, "loss": 0.7738, "step": 370200 }, { "epoch": 3.77, "learning_rate": 5.285717715141725e-05, "loss": 0.7223, "step": 370300 }, { "epoch": 3.77, "learning_rate": 5.285288009231763e-05, "loss": 0.9447, "step": 370400 }, { "epoch": 3.77, "learning_rate": 5.2848581915848535e-05, "loss": 0.7832, "step": 370500 }, { "epoch": 3.78, "learning_rate": 5.284428262222015e-05, "loss": 0.9128, "step": 370600 }, { "epoch": 3.78, "learning_rate": 5.283998221164267e-05, "loss": 0.7135, "step": 370700 }, { "epoch": 3.78, "learning_rate": 5.283568068432635e-05, "loss": 0.7424, "step": 370800 }, { "epoch": 3.78, "learning_rate": 5.2831378040481545e-05, "loss": 0.8521, "step": 370900 }, { "epoch": 3.78, "learning_rate": 5.282707428031859e-05, "loss": 0.6903, "step": 371000 }, { "epoch": 3.78, "learning_rate": 5.2822812458334676e-05, "loss": 0.8208, "step": 371100 }, { "epoch": 3.78, "learning_rate": 5.281850647732473e-05, "loss": 0.7118, "step": 371200 }, { "epoch": 3.78, "learning_rate": 5.2814199380625975e-05, "loss": 0.8584, "step": 371300 }, { "epoch": 3.78, "learning_rate": 5.280989116844902e-05, "loss": 0.7124, "step": 371400 }, { "epoch": 3.78, "learning_rate": 5.2805581841004515e-05, "loss": 0.7787, "step": 371500 }, { "epoch": 3.79, "learning_rate": 5.280127139850315e-05, "loss": 0.6698, "step": 371600 }, { "epoch": 3.79, "learning_rate": 5.27969598411557e-05, "loss": 0.9614, "step": 371700 }, { "epoch": 3.79, "learning_rate": 5.279264716917294e-05, "loss": 0.7173, "step": 371800 }, { "epoch": 3.79, "learning_rate": 5.278833338276576e-05, "loss": 0.8584, "step": 371900 }, { "epoch": 3.79, "learning_rate": 5.2784018482145076e-05, "loss": 0.8063, "step": 372000 }, { "epoch": 3.79, "learning_rate": 5.2779702467521845e-05, "loss": 0.8011, "step": 372100 }, { "epoch": 3.79, "learning_rate": 5.277538533910712e-05, "loss": 0.6658, "step": 372200 }, { "epoch": 3.79, "learning_rate": 5.277106709711196e-05, "loss": 0.9071, "step": 372300 }, { "epoch": 3.79, "learning_rate": 5.2766747741747503e-05, "loss": 0.8608, "step": 372400 }, { "epoch": 3.8, "learning_rate": 5.276242727322496e-05, "loss": 0.7413, "step": 372500 }, { "epoch": 3.8, "learning_rate": 5.2758148913078644e-05, "loss": 0.7888, "step": 372600 }, { "epoch": 3.8, "learning_rate": 5.275382622999999e-05, "loss": 0.7496, "step": 372700 }, { "epoch": 3.8, "learning_rate": 5.2749502434395025e-05, "loss": 0.7547, "step": 372800 }, { "epoch": 3.8, "learning_rate": 5.274517752647515e-05, "loss": 0.766, "step": 372900 }, { "epoch": 3.8, "learning_rate": 5.274085150645183e-05, "loss": 0.7594, "step": 373000 }, { "epoch": 3.8, "learning_rate": 5.2736524374536595e-05, "loss": 0.8584, "step": 373100 }, { "epoch": 3.8, "learning_rate": 5.2732196130940986e-05, "loss": 0.8021, "step": 373200 }, { "epoch": 3.8, "learning_rate": 5.272786677587666e-05, "loss": 0.7804, "step": 373300 }, { "epoch": 3.8, "learning_rate": 5.2723579619718504e-05, "loss": 0.6777, "step": 373400 }, { "epoch": 3.81, "learning_rate": 5.271924805346121e-05, "loss": 0.819, "step": 373500 }, { "epoch": 3.81, "learning_rate": 5.2714915376368255e-05, "loss": 0.7893, "step": 373600 }, { "epoch": 3.81, "learning_rate": 5.2710581588651494e-05, "loss": 0.7907, "step": 373700 }, { "epoch": 3.81, "learning_rate": 5.2706246690522834e-05, "loss": 0.7784, "step": 373800 }, { "epoch": 3.81, "learning_rate": 5.2701910682194215e-05, "loss": 0.8319, "step": 373900 }, { "epoch": 3.81, "learning_rate": 5.2697573563877634e-05, "loss": 0.6599, "step": 374000 }, { "epoch": 3.81, "learning_rate": 5.269323533578515e-05, "loss": 0.6981, "step": 374100 }, { "epoch": 3.81, "learning_rate": 5.2688895998128896e-05, "loss": 0.7587, "step": 374200 }, { "epoch": 3.81, "learning_rate": 5.268455555112103e-05, "loss": 0.7292, "step": 374300 }, { "epoch": 3.81, "learning_rate": 5.268021399497376e-05, "loss": 0.8201, "step": 374400 }, { "epoch": 3.82, "learning_rate": 5.267587132989938e-05, "loss": 0.7939, "step": 374500 }, { "epoch": 3.82, "learning_rate": 5.2671527556110224e-05, "loss": 0.7083, "step": 374600 }, { "epoch": 3.82, "learning_rate": 5.2667182673818654e-05, "loss": 0.7239, "step": 374700 }, { "epoch": 3.82, "learning_rate": 5.266283668323713e-05, "loss": 0.6955, "step": 374800 }, { "epoch": 3.82, "learning_rate": 5.265848958457813e-05, "loss": 0.7309, "step": 374900 }, { "epoch": 3.82, "learning_rate": 5.2654141378054216e-05, "loss": 0.7747, "step": 375000 }, { "epoch": 3.82, "learning_rate": 5.264979206387797e-05, "loss": 0.7928, "step": 375100 }, { "epoch": 3.82, "learning_rate": 5.2645441642262076e-05, "loss": 0.8331, "step": 375200 }, { "epoch": 3.82, "learning_rate": 5.2641090113419215e-05, "loss": 0.8312, "step": 375300 }, { "epoch": 3.82, "learning_rate": 5.263673747756216e-05, "loss": 0.7642, "step": 375400 }, { "epoch": 3.83, "learning_rate": 5.263238373490373e-05, "loss": 0.7614, "step": 375500 }, { "epoch": 3.83, "learning_rate": 5.262802888565681e-05, "loss": 0.8131, "step": 375600 }, { "epoch": 3.83, "learning_rate": 5.26236729300343e-05, "loss": 0.8353, "step": 375700 }, { "epoch": 3.83, "learning_rate": 5.261931586824921e-05, "loss": 0.7098, "step": 375800 }, { "epoch": 3.83, "learning_rate": 5.2614957700514556e-05, "loss": 0.731, "step": 375900 }, { "epoch": 3.83, "learning_rate": 5.261059842704342e-05, "loss": 0.7013, "step": 376000 }, { "epoch": 3.83, "learning_rate": 5.260623804804896e-05, "loss": 0.7853, "step": 376100 }, { "epoch": 3.83, "learning_rate": 5.2601876563744373e-05, "loss": 0.8683, "step": 376200 }, { "epoch": 3.83, "learning_rate": 5.259751397434289e-05, "loss": 0.7507, "step": 376300 }, { "epoch": 3.83, "learning_rate": 5.259315028005784e-05, "loss": 0.7903, "step": 376400 }, { "epoch": 3.84, "learning_rate": 5.2588785481102564e-05, "loss": 0.7842, "step": 376500 }, { "epoch": 3.84, "learning_rate": 5.258441957769048e-05, "loss": 0.6854, "step": 376600 }, { "epoch": 3.84, "learning_rate": 5.258005257003505e-05, "loss": 0.7699, "step": 376700 }, { "epoch": 3.84, "learning_rate": 5.257568445834981e-05, "loss": 0.7286, "step": 376800 }, { "epoch": 3.84, "learning_rate": 5.257131524284833e-05, "loss": 0.6571, "step": 376900 }, { "epoch": 3.84, "learning_rate": 5.2566944923744215e-05, "loss": 0.8412, "step": 377000 }, { "epoch": 3.84, "learning_rate": 5.256257350125118e-05, "loss": 0.6845, "step": 377100 }, { "epoch": 3.84, "learning_rate": 5.2558200975582944e-05, "loss": 0.7101, "step": 377200 }, { "epoch": 3.84, "learning_rate": 5.255382734695329e-05, "loss": 0.6259, "step": 377300 }, { "epoch": 3.85, "learning_rate": 5.254945261557609e-05, "loss": 0.7434, "step": 377400 }, { "epoch": 3.85, "learning_rate": 5.254507678166521e-05, "loss": 0.7217, "step": 377500 }, { "epoch": 3.85, "learning_rate": 5.254069984543463e-05, "loss": 0.7084, "step": 377600 }, { "epoch": 3.85, "learning_rate": 5.253632180709834e-05, "loss": 0.7055, "step": 377700 }, { "epoch": 3.85, "learning_rate": 5.253194266687041e-05, "loss": 0.6917, "step": 377800 }, { "epoch": 3.85, "learning_rate": 5.2527562424964925e-05, "loss": 0.7846, "step": 377900 }, { "epoch": 3.85, "learning_rate": 5.2523181081596093e-05, "loss": 0.7481, "step": 378000 }, { "epoch": 3.85, "learning_rate": 5.251879863697812e-05, "loss": 0.6889, "step": 378100 }, { "epoch": 3.85, "learning_rate": 5.2514415091325264e-05, "loss": 0.8596, "step": 378200 }, { "epoch": 3.85, "learning_rate": 5.251003044485188e-05, "loss": 0.6402, "step": 378300 }, { "epoch": 3.86, "learning_rate": 5.250564469777233e-05, "loss": 0.7096, "step": 378400 }, { "epoch": 3.86, "learning_rate": 5.250125785030108e-05, "loss": 0.6826, "step": 378500 }, { "epoch": 3.86, "learning_rate": 5.249686990265258e-05, "loss": 0.7125, "step": 378600 }, { "epoch": 3.86, "learning_rate": 5.249248085504141e-05, "loss": 0.7708, "step": 378700 }, { "epoch": 3.86, "learning_rate": 5.248809070768215e-05, "loss": 0.8924, "step": 378800 }, { "epoch": 3.86, "learning_rate": 5.248369946078946e-05, "loss": 0.7337, "step": 378900 }, { "epoch": 3.86, "learning_rate": 5.2479307114578035e-05, "loss": 0.7011, "step": 379000 }, { "epoch": 3.86, "learning_rate": 5.247491366926264e-05, "loss": 0.7311, "step": 379100 }, { "epoch": 3.86, "learning_rate": 5.2470519125058096e-05, "loss": 0.8064, "step": 379200 }, { "epoch": 3.86, "learning_rate": 5.2466123482179264e-05, "loss": 0.7126, "step": 379300 }, { "epoch": 3.87, "learning_rate": 5.246172674084107e-05, "loss": 0.7017, "step": 379400 }, { "epoch": 3.87, "learning_rate": 5.2457328901258465e-05, "loss": 0.727, "step": 379500 }, { "epoch": 3.87, "learning_rate": 5.245292996364651e-05, "loss": 0.7639, "step": 379600 }, { "epoch": 3.87, "learning_rate": 5.244852992822027e-05, "loss": 0.6514, "step": 379700 }, { "epoch": 3.87, "learning_rate": 5.2444128795194875e-05, "loss": 0.8224, "step": 379800 }, { "epoch": 3.87, "learning_rate": 5.243972656478553e-05, "loss": 0.6368, "step": 379900 }, { "epoch": 3.87, "learning_rate": 5.243532323720747e-05, "loss": 0.7287, "step": 380000 }, { "epoch": 3.87, "learning_rate": 5.2430918812675985e-05, "loss": 0.6993, "step": 380100 }, { "epoch": 3.87, "learning_rate": 5.242651329140643e-05, "loss": 0.647, "step": 380200 }, { "epoch": 3.87, "learning_rate": 5.242210667361422e-05, "loss": 0.6995, "step": 380300 }, { "epoch": 3.88, "learning_rate": 5.241769895951479e-05, "loss": 0.7198, "step": 380400 }, { "epoch": 3.88, "learning_rate": 5.241329014932366e-05, "loss": 0.6671, "step": 380500 }, { "epoch": 3.88, "learning_rate": 5.240888024325641e-05, "loss": 0.7025, "step": 380600 }, { "epoch": 3.88, "learning_rate": 5.240446924152864e-05, "loss": 0.7828, "step": 380700 }, { "epoch": 3.88, "learning_rate": 5.240005714435602e-05, "loss": 0.6778, "step": 380800 }, { "epoch": 3.88, "learning_rate": 5.2395643951954296e-05, "loss": 0.6014, "step": 380900 }, { "epoch": 3.88, "learning_rate": 5.239127381283298e-05, "loss": 0.7242, "step": 381000 }, { "epoch": 3.88, "learning_rate": 5.238685844156731e-05, "loss": 0.8567, "step": 381100 }, { "epoch": 3.88, "learning_rate": 5.238244197571785e-05, "loss": 0.6796, "step": 381200 }, { "epoch": 3.88, "learning_rate": 5.237802441550057e-05, "loss": 0.7776, "step": 381300 }, { "epoch": 3.89, "learning_rate": 5.237360576113142e-05, "loss": 0.7667, "step": 381400 }, { "epoch": 3.89, "learning_rate": 5.2369186012826486e-05, "loss": 0.822, "step": 381500 }, { "epoch": 3.89, "learning_rate": 5.236476517080183e-05, "loss": 0.6365, "step": 381600 }, { "epoch": 3.89, "learning_rate": 5.236034323527363e-05, "loss": 0.7893, "step": 381700 }, { "epoch": 3.89, "learning_rate": 5.235592020645809e-05, "loss": 0.6565, "step": 381800 }, { "epoch": 3.89, "learning_rate": 5.235149608457146e-05, "loss": 0.7102, "step": 381900 }, { "epoch": 3.89, "learning_rate": 5.234707086983006e-05, "loss": 0.7489, "step": 382000 }, { "epoch": 3.89, "learning_rate": 5.2342644562450256e-05, "loss": 0.6939, "step": 382100 }, { "epoch": 3.89, "learning_rate": 5.233821716264847e-05, "loss": 0.7159, "step": 382200 }, { "epoch": 3.89, "learning_rate": 5.2333788670641166e-05, "loss": 0.7327, "step": 382300 }, { "epoch": 3.9, "learning_rate": 5.232935908664487e-05, "loss": 0.7246, "step": 382400 }, { "epoch": 3.9, "learning_rate": 5.232492841087618e-05, "loss": 0.7495, "step": 382500 }, { "epoch": 3.9, "learning_rate": 5.232049664355172e-05, "loss": 0.7247, "step": 382600 }, { "epoch": 3.9, "learning_rate": 5.231606378488817e-05, "loss": 0.7211, "step": 382700 }, { "epoch": 3.9, "learning_rate": 5.231162983510229e-05, "loss": 0.8791, "step": 382800 }, { "epoch": 3.9, "learning_rate": 5.2307194794410846e-05, "loss": 0.7544, "step": 382900 }, { "epoch": 3.9, "learning_rate": 5.23027586630307e-05, "loss": 0.8426, "step": 383000 }, { "epoch": 3.9, "learning_rate": 5.229832144117875e-05, "loss": 0.6991, "step": 383100 }, { "epoch": 3.9, "learning_rate": 5.2293883129071956e-05, "loss": 0.719, "step": 383200 }, { "epoch": 3.91, "learning_rate": 5.2289488126343734e-05, "loss": 0.6817, "step": 383300 }, { "epoch": 3.91, "learning_rate": 5.228504764527545e-05, "loss": 0.7542, "step": 383400 }, { "epoch": 3.91, "learning_rate": 5.2280606074601334e-05, "loss": 0.7855, "step": 383500 }, { "epoch": 3.91, "learning_rate": 5.2276163414538526e-05, "loss": 0.7093, "step": 383600 }, { "epoch": 3.91, "learning_rate": 5.227171966530427e-05, "loss": 0.6856, "step": 383700 }, { "epoch": 3.91, "learning_rate": 5.2267274827115844e-05, "loss": 0.7803, "step": 383800 }, { "epoch": 3.91, "learning_rate": 5.2262828900190553e-05, "loss": 0.6746, "step": 383900 }, { "epoch": 3.91, "learning_rate": 5.225838188474579e-05, "loss": 0.7051, "step": 384000 }, { "epoch": 3.91, "learning_rate": 5.2253933780998985e-05, "loss": 0.7314, "step": 384100 }, { "epoch": 3.91, "learning_rate": 5.2249484589167615e-05, "loss": 0.636, "step": 384200 }, { "epoch": 3.92, "learning_rate": 5.224503430946923e-05, "loss": 0.7567, "step": 384300 }, { "epoch": 3.92, "learning_rate": 5.2240582942121415e-05, "loss": 0.8312, "step": 384400 }, { "epoch": 3.92, "learning_rate": 5.223613048734183e-05, "loss": 0.7635, "step": 384500 }, { "epoch": 3.92, "learning_rate": 5.223167694534816e-05, "loss": 0.787, "step": 384600 }, { "epoch": 3.92, "learning_rate": 5.222722231635815e-05, "loss": 0.7603, "step": 384700 }, { "epoch": 3.92, "learning_rate": 5.2222766600589624e-05, "loss": 0.7356, "step": 384800 }, { "epoch": 3.92, "learning_rate": 5.221830979826042e-05, "loss": 0.6904, "step": 384900 }, { "epoch": 3.92, "learning_rate": 5.221385190958847e-05, "loss": 0.7178, "step": 385000 }, { "epoch": 3.92, "learning_rate": 5.220939293479172e-05, "loss": 0.7376, "step": 385100 }, { "epoch": 3.92, "learning_rate": 5.2204932874088196e-05, "loss": 0.6809, "step": 385200 }, { "epoch": 3.93, "learning_rate": 5.220047172769597e-05, "loss": 0.674, "step": 385300 }, { "epoch": 3.93, "learning_rate": 5.2196009495833156e-05, "loss": 0.7107, "step": 385400 }, { "epoch": 3.93, "learning_rate": 5.219154617871794e-05, "loss": 0.7117, "step": 385500 }, { "epoch": 3.93, "learning_rate": 5.218708177656855e-05, "loss": 0.743, "step": 385600 }, { "epoch": 3.93, "learning_rate": 5.2182616289603256e-05, "loss": 0.7359, "step": 385700 }, { "epoch": 3.93, "learning_rate": 5.2178149718040416e-05, "loss": 0.7201, "step": 385800 }, { "epoch": 3.93, "learning_rate": 5.21736820620984e-05, "loss": 0.7052, "step": 385900 }, { "epoch": 3.93, "learning_rate": 5.216921332199566e-05, "loss": 0.7515, "step": 386000 }, { "epoch": 3.93, "learning_rate": 5.216474349795068e-05, "loss": 0.6955, "step": 386100 }, { "epoch": 3.93, "learning_rate": 5.216027259018202e-05, "loss": 0.7773, "step": 386200 }, { "epoch": 3.94, "learning_rate": 5.215580059890828e-05, "loss": 0.8942, "step": 386300 }, { "epoch": 3.94, "learning_rate": 5.215132752434811e-05, "loss": 0.6753, "step": 386400 }, { "epoch": 3.94, "learning_rate": 5.214685336672022e-05, "loss": 0.6987, "step": 386500 }, { "epoch": 3.94, "learning_rate": 5.214237812624336e-05, "loss": 0.6251, "step": 386600 }, { "epoch": 3.94, "learning_rate": 5.213790180313635e-05, "loss": 0.7138, "step": 386700 }, { "epoch": 3.94, "learning_rate": 5.2133424397618045e-05, "loss": 0.7835, "step": 386800 }, { "epoch": 3.94, "learning_rate": 5.212894590990738e-05, "loss": 0.7938, "step": 386900 }, { "epoch": 3.94, "learning_rate": 5.212446634022332e-05, "loss": 0.7334, "step": 387000 }, { "epoch": 3.94, "learning_rate": 5.211998568878489e-05, "loss": 0.6429, "step": 387100 }, { "epoch": 3.94, "learning_rate": 5.211550395581116e-05, "loss": 0.7707, "step": 387200 }, { "epoch": 3.95, "learning_rate": 5.211102114152126e-05, "loss": 0.7496, "step": 387300 }, { "epoch": 3.95, "learning_rate": 5.2106537246134386e-05, "loss": 0.6907, "step": 387400 }, { "epoch": 3.95, "learning_rate": 5.2102052269869765e-05, "loss": 0.6363, "step": 387500 }, { "epoch": 3.95, "learning_rate": 5.2097566212946686e-05, "loss": 0.6997, "step": 387600 }, { "epoch": 3.95, "learning_rate": 5.209307907558449e-05, "loss": 0.8146, "step": 387700 }, { "epoch": 3.95, "learning_rate": 5.2088590858002573e-05, "loss": 0.7243, "step": 387800 }, { "epoch": 3.95, "learning_rate": 5.208410156042038e-05, "loss": 0.7216, "step": 387900 }, { "epoch": 3.95, "learning_rate": 5.2079611183057405e-05, "loss": 0.7504, "step": 388000 }, { "epoch": 3.95, "learning_rate": 5.2075119726133215e-05, "loss": 0.6799, "step": 388100 }, { "epoch": 3.96, "learning_rate": 5.2070627189867405e-05, "loss": 0.7553, "step": 388200 }, { "epoch": 3.96, "learning_rate": 5.2066133574479635e-05, "loss": 0.662, "step": 388300 }, { "epoch": 3.96, "learning_rate": 5.206163888018962e-05, "loss": 0.6261, "step": 388400 }, { "epoch": 3.96, "learning_rate": 5.205714310721712e-05, "loss": 0.7814, "step": 388500 }, { "epoch": 3.96, "learning_rate": 5.205264625578195e-05, "loss": 0.8209, "step": 388600 }, { "epoch": 3.96, "learning_rate": 5.2048148326103983e-05, "loss": 0.7429, "step": 388700 }, { "epoch": 3.96, "learning_rate": 5.2043649318403146e-05, "loss": 0.6306, "step": 388800 }, { "epoch": 3.96, "learning_rate": 5.2039149232899395e-05, "loss": 0.758, "step": 388900 }, { "epoch": 3.96, "learning_rate": 5.203464806981278e-05, "loss": 0.8028, "step": 389000 }, { "epoch": 3.96, "learning_rate": 5.203014582936336e-05, "loss": 0.7253, "step": 389100 }, { "epoch": 3.97, "learning_rate": 5.202564251177129e-05, "loss": 0.803, "step": 389200 }, { "epoch": 3.97, "learning_rate": 5.2021138117256736e-05, "loss": 0.7481, "step": 389300 }, { "epoch": 3.97, "learning_rate": 5.201667770608106e-05, "loss": 0.7113, "step": 389400 }, { "epoch": 3.97, "learning_rate": 5.2012216239843275e-05, "loss": 0.7134, "step": 389500 }, { "epoch": 3.97, "learning_rate": 5.2007708637405996e-05, "loss": 0.7832, "step": 389600 }, { "epoch": 3.97, "learning_rate": 5.200319995892311e-05, "loss": 0.702, "step": 389700 }, { "epoch": 3.97, "learning_rate": 5.199869020461505e-05, "loss": 0.7066, "step": 389800 }, { "epoch": 3.97, "learning_rate": 5.1994179374702324e-05, "loss": 0.6146, "step": 389900 }, { "epoch": 3.97, "learning_rate": 5.198966746940548e-05, "loss": 0.7075, "step": 390000 }, { "epoch": 3.97, "learning_rate": 5.198515448894511e-05, "loss": 0.6927, "step": 390100 }, { "epoch": 3.98, "learning_rate": 5.1980640433541906e-05, "loss": 0.6173, "step": 390200 }, { "epoch": 3.98, "learning_rate": 5.1976125303416547e-05, "loss": 0.852, "step": 390300 }, { "epoch": 3.98, "learning_rate": 5.197160909878981e-05, "loss": 0.6807, "step": 390400 }, { "epoch": 3.98, "learning_rate": 5.1967091819882514e-05, "loss": 0.7088, "step": 390500 }, { "epoch": 3.98, "learning_rate": 5.196257346691551e-05, "loss": 0.6933, "step": 390600 }, { "epoch": 3.98, "learning_rate": 5.195805404010974e-05, "loss": 0.7684, "step": 390700 }, { "epoch": 3.98, "learning_rate": 5.1953533539686155e-05, "loss": 0.8055, "step": 390800 }, { "epoch": 3.98, "learning_rate": 5.194905718691659e-05, "loss": 0.7189, "step": 390900 }, { "epoch": 3.98, "learning_rate": 5.194453455065119e-05, "loss": 0.7271, "step": 391000 }, { "epoch": 3.98, "learning_rate": 5.194001084142902e-05, "loss": 0.7927, "step": 391100 }, { "epoch": 3.99, "learning_rate": 5.193548605947124e-05, "loss": 0.7627, "step": 391200 }, { "epoch": 3.99, "learning_rate": 5.193096020499911e-05, "loss": 0.7761, "step": 391300 }, { "epoch": 3.99, "learning_rate": 5.192643327823389e-05, "loss": 0.8092, "step": 391400 }, { "epoch": 3.99, "learning_rate": 5.192190527939693e-05, "loss": 0.7005, "step": 391500 }, { "epoch": 3.99, "learning_rate": 5.191737620870963e-05, "loss": 0.6574, "step": 391600 }, { "epoch": 3.99, "learning_rate": 5.191284606639344e-05, "loss": 0.7435, "step": 391700 }, { "epoch": 3.99, "learning_rate": 5.190831485266983e-05, "loss": 0.6932, "step": 391800 }, { "epoch": 3.99, "learning_rate": 5.190378256776037e-05, "loss": 0.6915, "step": 391900 }, { "epoch": 3.99, "learning_rate": 5.189924921188667e-05, "loss": 0.7471, "step": 392000 }, { "epoch": 3.99, "learning_rate": 5.1894714785270355e-05, "loss": 0.7607, "step": 392100 }, { "epoch": 4.0, "learning_rate": 5.1890179288133154e-05, "loss": 0.6891, "step": 392200 }, { "epoch": 4.0, "learning_rate": 5.188564272069682e-05, "loss": 0.6575, "step": 392300 }, { "epoch": 4.0, "learning_rate": 5.188110508318316e-05, "loss": 0.7304, "step": 392400 }, { "epoch": 4.0, "learning_rate": 5.187656637581405e-05, "loss": 0.6718, "step": 392500 }, { "epoch": 4.0, "learning_rate": 5.187207200187537e-05, "loss": 0.6916, "step": 392600 }, { "epoch": 4.0, "learning_rate": 5.1867531166154155e-05, "loss": 0.6901, "step": 392700 }, { "epoch": 4.0, "learning_rate": 5.186298926124116e-05, "loss": 0.6898, "step": 392800 }, { "epoch": 4.0, "learning_rate": 5.1858446287358466e-05, "loss": 0.728, "step": 392900 }, { "epoch": 4.0, "learning_rate": 5.1853902244728195e-05, "loss": 0.6864, "step": 393000 }, { "epoch": 4.0, "learning_rate": 5.1849357133572516e-05, "loss": 0.6824, "step": 393100 }, { "epoch": 4.01, "learning_rate": 5.184481095411366e-05, "loss": 0.7674, "step": 393200 }, { "epoch": 4.01, "learning_rate": 5.1840263706573917e-05, "loss": 0.7345, "step": 393300 }, { "epoch": 4.01, "learning_rate": 5.183571539117562e-05, "loss": 0.6891, "step": 393400 }, { "epoch": 4.01, "learning_rate": 5.1831166008141145e-05, "loss": 0.7363, "step": 393500 }, { "epoch": 4.01, "learning_rate": 5.182661555769292e-05, "loss": 0.5846, "step": 393600 }, { "epoch": 4.01, "learning_rate": 5.182206404005347e-05, "loss": 0.6082, "step": 393700 }, { "epoch": 4.01, "learning_rate": 5.1817511455445306e-05, "loss": 0.681, "step": 393800 }, { "epoch": 4.01, "learning_rate": 5.181295780409102e-05, "loss": 0.721, "step": 393900 }, { "epoch": 4.01, "learning_rate": 5.180840308621328e-05, "loss": 0.7568, "step": 394000 }, { "epoch": 4.02, "learning_rate": 5.180384730203479e-05, "loss": 0.67, "step": 394100 }, { "epoch": 4.02, "learning_rate": 5.179929045177827e-05, "loss": 0.6214, "step": 394200 }, { "epoch": 4.02, "learning_rate": 5.179473253566654e-05, "loss": 0.7553, "step": 394300 }, { "epoch": 4.02, "learning_rate": 5.179017355392245e-05, "loss": 0.6994, "step": 394400 }, { "epoch": 4.02, "learning_rate": 5.178561350676891e-05, "loss": 0.6939, "step": 394500 }, { "epoch": 4.02, "learning_rate": 5.178105239442889e-05, "loss": 0.7227, "step": 394600 }, { "epoch": 4.02, "learning_rate": 5.177649021712539e-05, "loss": 0.6822, "step": 394700 }, { "epoch": 4.02, "learning_rate": 5.177192697508146e-05, "loss": 0.6834, "step": 394800 }, { "epoch": 4.02, "learning_rate": 5.1767362668520236e-05, "loss": 0.6197, "step": 394900 }, { "epoch": 4.02, "learning_rate": 5.176279729766488e-05, "loss": 0.6922, "step": 395000 }, { "epoch": 4.03, "learning_rate": 5.175823086273861e-05, "loss": 0.7451, "step": 395100 }, { "epoch": 4.03, "learning_rate": 5.1753663363964695e-05, "loss": 0.6559, "step": 395200 }, { "epoch": 4.03, "learning_rate": 5.174909480156646e-05, "loss": 0.6522, "step": 395300 }, { "epoch": 4.03, "learning_rate": 5.174452517576728e-05, "loss": 0.7406, "step": 395400 }, { "epoch": 4.03, "learning_rate": 5.173995448679059e-05, "loss": 0.7419, "step": 395500 }, { "epoch": 4.03, "learning_rate": 5.1735382734859856e-05, "loss": 0.6474, "step": 395600 }, { "epoch": 4.03, "learning_rate": 5.1730809920198624e-05, "loss": 0.7253, "step": 395700 }, { "epoch": 4.03, "learning_rate": 5.1726236043030464e-05, "loss": 0.6698, "step": 395800 }, { "epoch": 4.03, "learning_rate": 5.1721661103579014e-05, "loss": 0.7034, "step": 395900 }, { "epoch": 4.03, "learning_rate": 5.1717085102067964e-05, "loss": 0.6473, "step": 396000 }, { "epoch": 4.04, "learning_rate": 5.171250803872106e-05, "loss": 0.7021, "step": 396100 }, { "epoch": 4.04, "learning_rate": 5.170792991376208e-05, "loss": 0.7163, "step": 396200 }, { "epoch": 4.04, "learning_rate": 5.170335072741488e-05, "loss": 0.6545, "step": 396300 }, { "epoch": 4.04, "learning_rate": 5.169877047990335e-05, "loss": 0.67, "step": 396400 }, { "epoch": 4.04, "learning_rate": 5.169418917145142e-05, "loss": 0.6119, "step": 396500 }, { "epoch": 4.04, "learning_rate": 5.1689606802283116e-05, "loss": 0.6803, "step": 396600 }, { "epoch": 4.04, "learning_rate": 5.168502337262247e-05, "loss": 0.7381, "step": 396700 }, { "epoch": 4.04, "learning_rate": 5.16804388826936e-05, "loss": 0.7128, "step": 396800 }, { "epoch": 4.04, "learning_rate": 5.167585333272064e-05, "loss": 0.7427, "step": 396900 }, { "epoch": 4.04, "learning_rate": 5.1671266722927816e-05, "loss": 0.6865, "step": 397000 }, { "epoch": 4.05, "learning_rate": 5.1666679053539374e-05, "loss": 0.7076, "step": 397100 }, { "epoch": 4.05, "learning_rate": 5.1662090324779626e-05, "loss": 0.7428, "step": 397200 }, { "epoch": 4.05, "learning_rate": 5.165750053687293e-05, "loss": 0.6535, "step": 397300 }, { "epoch": 4.05, "learning_rate": 5.165290969004371e-05, "loss": 0.5942, "step": 397400 }, { "epoch": 4.05, "learning_rate": 5.164831778451641e-05, "loss": 0.7949, "step": 397500 }, { "epoch": 4.05, "learning_rate": 5.164372482051558e-05, "loss": 0.7817, "step": 397600 }, { "epoch": 4.05, "learning_rate": 5.163913079826576e-05, "loss": 0.6795, "step": 397700 }, { "epoch": 4.05, "learning_rate": 5.163453571799157e-05, "loss": 0.688, "step": 397800 }, { "epoch": 4.05, "learning_rate": 5.16299395799177e-05, "loss": 0.6463, "step": 397900 }, { "epoch": 4.05, "learning_rate": 5.162534238426887e-05, "loss": 0.6954, "step": 398000 }, { "epoch": 4.06, "learning_rate": 5.1620744131269845e-05, "loss": 0.6094, "step": 398100 }, { "epoch": 4.06, "learning_rate": 5.1616144821145454e-05, "loss": 0.7663, "step": 398200 }, { "epoch": 4.06, "learning_rate": 5.1611544454120586e-05, "loss": 0.8094, "step": 398300 }, { "epoch": 4.06, "learning_rate": 5.1606943030420165e-05, "loss": 0.6036, "step": 398400 }, { "epoch": 4.06, "learning_rate": 5.160234055026917e-05, "loss": 0.7686, "step": 398500 }, { "epoch": 4.06, "learning_rate": 5.1597737013892645e-05, "loss": 0.6722, "step": 398600 }, { "epoch": 4.06, "learning_rate": 5.159313242151566e-05, "loss": 0.6498, "step": 398700 }, { "epoch": 4.06, "learning_rate": 5.158852677336336e-05, "loss": 0.6911, "step": 398800 }, { "epoch": 4.06, "learning_rate": 5.158392006966094e-05, "loss": 0.7501, "step": 398900 }, { "epoch": 4.07, "learning_rate": 5.1579312310633635e-05, "loss": 0.6923, "step": 399000 }, { "epoch": 4.07, "learning_rate": 5.157470349650674e-05, "loss": 0.6912, "step": 399100 }, { "epoch": 4.07, "learning_rate": 5.1570093627505586e-05, "loss": 0.7275, "step": 399200 }, { "epoch": 4.07, "learning_rate": 5.1565482703855584e-05, "loss": 0.689, "step": 399300 }, { "epoch": 4.07, "learning_rate": 5.156087072578217e-05, "loss": 0.7092, "step": 399400 }, { "epoch": 4.07, "learning_rate": 5.155625769351085e-05, "loss": 0.6666, "step": 399500 }, { "epoch": 4.07, "learning_rate": 5.155164360726717e-05, "loss": 0.8005, "step": 399600 }, { "epoch": 4.07, "learning_rate": 5.1547028467276726e-05, "loss": 0.6433, "step": 399700 }, { "epoch": 4.07, "learning_rate": 5.154241227376518e-05, "loss": 0.7297, "step": 399800 }, { "epoch": 4.07, "learning_rate": 5.1537841204639364e-05, "loss": 0.7473, "step": 399900 }, { "epoch": 4.08, "learning_rate": 5.1533222915292354e-05, "loss": 0.725, "step": 400000 }, { "epoch": 4.08, "learning_rate": 5.1528603573099234e-05, "loss": 0.6883, "step": 400100 }, { "epoch": 4.08, "learning_rate": 5.152398317828589e-05, "loss": 0.6748, "step": 400200 }, { "epoch": 4.08, "learning_rate": 5.151936173107821e-05, "loss": 0.6349, "step": 400300 }, { "epoch": 4.08, "learning_rate": 5.15147854619034e-05, "loss": 0.6914, "step": 400400 }, { "epoch": 4.08, "learning_rate": 5.15101619211033e-05, "loss": 0.7088, "step": 400500 }, { "epoch": 4.08, "learning_rate": 5.150553732858465e-05, "loss": 0.7469, "step": 400600 }, { "epoch": 4.08, "learning_rate": 5.150091168457356e-05, "loss": 0.6863, "step": 400700 }, { "epoch": 4.08, "learning_rate": 5.149628498929621e-05, "loss": 0.7228, "step": 400800 }, { "epoch": 4.08, "learning_rate": 5.1491657242978797e-05, "loss": 0.8432, "step": 400900 }, { "epoch": 4.09, "learning_rate": 5.1487028445847596e-05, "loss": 0.6141, "step": 401000 }, { "epoch": 4.09, "learning_rate": 5.1482398598128936e-05, "loss": 0.5481, "step": 401100 }, { "epoch": 4.09, "learning_rate": 5.1477767700049183e-05, "loss": 0.7714, "step": 401200 }, { "epoch": 4.09, "learning_rate": 5.1473135751834775e-05, "loss": 0.6902, "step": 401300 }, { "epoch": 4.09, "learning_rate": 5.146850275371216e-05, "loss": 0.7318, "step": 401400 }, { "epoch": 4.09, "learning_rate": 5.146386870590788e-05, "loss": 0.6945, "step": 401500 }, { "epoch": 4.09, "learning_rate": 5.145923360864851e-05, "loss": 0.7537, "step": 401600 }, { "epoch": 4.09, "learning_rate": 5.145459746216068e-05, "loss": 0.7469, "step": 401700 }, { "epoch": 4.09, "learning_rate": 5.1449960266671065e-05, "loss": 0.7908, "step": 401800 }, { "epoch": 4.09, "learning_rate": 5.144532202240641e-05, "loss": 0.6816, "step": 401900 }, { "epoch": 4.1, "learning_rate": 5.144068272959347e-05, "loss": 0.6899, "step": 402000 }, { "epoch": 4.1, "learning_rate": 5.143604238845912e-05, "loss": 0.7818, "step": 402100 }, { "epoch": 4.1, "learning_rate": 5.14314009992302e-05, "loss": 0.8172, "step": 402200 }, { "epoch": 4.1, "learning_rate": 5.1426758562133665e-05, "loss": 0.645, "step": 402300 }, { "epoch": 4.1, "learning_rate": 5.142211507739652e-05, "loss": 0.6912, "step": 402400 }, { "epoch": 4.1, "learning_rate": 5.141747054524578e-05, "loss": 0.7407, "step": 402500 }, { "epoch": 4.1, "learning_rate": 5.141282496590855e-05, "loss": 0.6894, "step": 402600 }, { "epoch": 4.1, "learning_rate": 5.140817833961195e-05, "loss": 0.6066, "step": 402700 }, { "epoch": 4.1, "learning_rate": 5.1403530666583195e-05, "loss": 0.7579, "step": 402800 }, { "epoch": 4.1, "learning_rate": 5.139888194704953e-05, "loss": 0.5961, "step": 402900 }, { "epoch": 4.11, "learning_rate": 5.139427868407466e-05, "loss": 0.7051, "step": 403000 }, { "epoch": 4.11, "learning_rate": 5.138962788267246e-05, "loss": 0.7467, "step": 403100 }, { "epoch": 4.11, "learning_rate": 5.1384976035445094e-05, "loss": 0.7721, "step": 403200 }, { "epoch": 4.11, "learning_rate": 5.1380323142620036e-05, "loss": 0.662, "step": 403300 }, { "epoch": 4.11, "learning_rate": 5.137566920442475e-05, "loss": 0.7069, "step": 403400 }, { "epoch": 4.11, "learning_rate": 5.137101422108681e-05, "loss": 0.5968, "step": 403500 }, { "epoch": 4.11, "learning_rate": 5.1366358192833815e-05, "loss": 0.5803, "step": 403600 }, { "epoch": 4.11, "learning_rate": 5.136170111989341e-05, "loss": 0.5757, "step": 403700 }, { "epoch": 4.11, "learning_rate": 5.1357089588836624e-05, "loss": 0.7869, "step": 403800 }, { "epoch": 4.12, "learning_rate": 5.135243043764575e-05, "loss": 0.7007, "step": 403900 }, { "epoch": 4.12, "learning_rate": 5.134777024244845e-05, "loss": 0.6961, "step": 404000 }, { "epoch": 4.12, "learning_rate": 5.1343109003472596e-05, "loss": 0.633, "step": 404100 }, { "epoch": 4.12, "learning_rate": 5.133844672094607e-05, "loss": 0.6078, "step": 404200 }, { "epoch": 4.12, "learning_rate": 5.133378339509684e-05, "loss": 0.7125, "step": 404300 }, { "epoch": 4.12, "learning_rate": 5.1329119026152916e-05, "loss": 0.736, "step": 404400 }, { "epoch": 4.12, "learning_rate": 5.132445361434237e-05, "loss": 0.6975, "step": 404500 }, { "epoch": 4.12, "learning_rate": 5.1319787159893285e-05, "loss": 0.7345, "step": 404600 }, { "epoch": 4.12, "learning_rate": 5.1315119663033835e-05, "loss": 0.69, "step": 404700 }, { "epoch": 4.12, "learning_rate": 5.131045112399224e-05, "loss": 0.708, "step": 404800 }, { "epoch": 4.13, "learning_rate": 5.130578154299677e-05, "loss": 0.7369, "step": 404900 }, { "epoch": 4.13, "learning_rate": 5.130111092027571e-05, "loss": 0.7915, "step": 405000 }, { "epoch": 4.13, "learning_rate": 5.129643925605746e-05, "loss": 0.7232, "step": 405100 }, { "epoch": 4.13, "learning_rate": 5.1291766550570416e-05, "loss": 0.7162, "step": 405200 }, { "epoch": 4.13, "learning_rate": 5.128709280404305e-05, "loss": 0.6872, "step": 405300 }, { "epoch": 4.13, "learning_rate": 5.128241801670388e-05, "loss": 0.6106, "step": 405400 }, { "epoch": 4.13, "learning_rate": 5.127774218878147e-05, "loss": 0.7566, "step": 405500 }, { "epoch": 4.13, "learning_rate": 5.127306532050445e-05, "loss": 0.7852, "step": 405600 }, { "epoch": 4.13, "learning_rate": 5.1268387412101495e-05, "loss": 0.7609, "step": 405700 }, { "epoch": 4.13, "learning_rate": 5.126370846380131e-05, "loss": 0.7256, "step": 405800 }, { "epoch": 4.14, "learning_rate": 5.125902847583268e-05, "loss": 0.6586, "step": 405900 }, { "epoch": 4.14, "learning_rate": 5.125434744842442e-05, "loss": 0.6315, "step": 406000 }, { "epoch": 4.14, "learning_rate": 5.1249665381805416e-05, "loss": 0.6643, "step": 406100 }, { "epoch": 4.14, "learning_rate": 5.124498227620458e-05, "loss": 0.7323, "step": 406200 }, { "epoch": 4.14, "learning_rate": 5.1240298131850896e-05, "loss": 0.6828, "step": 406300 }, { "epoch": 4.14, "learning_rate": 5.123561294897339e-05, "loss": 0.7082, "step": 406400 }, { "epoch": 4.14, "learning_rate": 5.1230926727801136e-05, "loss": 0.6944, "step": 406500 }, { "epoch": 4.14, "learning_rate": 5.1226239468563266e-05, "loss": 0.6735, "step": 406600 }, { "epoch": 4.14, "learning_rate": 5.122155117148896e-05, "loss": 0.6979, "step": 406700 }, { "epoch": 4.14, "learning_rate": 5.121686183680745e-05, "loss": 0.7746, "step": 406800 }, { "epoch": 4.15, "learning_rate": 5.1212171464747996e-05, "loss": 0.6306, "step": 406900 }, { "epoch": 4.15, "learning_rate": 5.1207480055539956e-05, "loss": 0.6502, "step": 407000 }, { "epoch": 4.15, "learning_rate": 5.12027876094127e-05, "loss": 0.6377, "step": 407100 }, { "epoch": 4.15, "learning_rate": 5.1198094126595664e-05, "loss": 0.6257, "step": 407200 }, { "epoch": 4.15, "learning_rate": 5.119339960731832e-05, "loss": 0.7213, "step": 407300 }, { "epoch": 4.15, "learning_rate": 5.118870405181022e-05, "loss": 0.6488, "step": 407400 }, { "epoch": 4.15, "learning_rate": 5.1184007460300935e-05, "loss": 0.6817, "step": 407500 }, { "epoch": 4.15, "learning_rate": 5.11793098330201e-05, "loss": 0.6763, "step": 407600 }, { "epoch": 4.15, "learning_rate": 5.117461117019741e-05, "loss": 0.7927, "step": 407700 }, { "epoch": 4.15, "learning_rate": 5.11699114720626e-05, "loss": 0.7365, "step": 407800 }, { "epoch": 4.16, "learning_rate": 5.116521073884545e-05, "loss": 0.6903, "step": 407900 }, { "epoch": 4.16, "learning_rate": 5.1160555993578276e-05, "loss": 0.6971, "step": 408000 }, { "epoch": 4.16, "learning_rate": 5.11558532012311e-05, "loss": 0.7181, "step": 408100 }, { "epoch": 4.16, "learning_rate": 5.115114937448896e-05, "loss": 0.6944, "step": 408200 }, { "epoch": 4.16, "learning_rate": 5.1146444513581855e-05, "loss": 0.6757, "step": 408300 }, { "epoch": 4.16, "learning_rate": 5.114173861873979e-05, "loss": 0.6896, "step": 408400 }, { "epoch": 4.16, "learning_rate": 5.1137031690192884e-05, "loss": 0.6571, "step": 408500 }, { "epoch": 4.16, "learning_rate": 5.113232372817126e-05, "loss": 0.7753, "step": 408600 }, { "epoch": 4.16, "learning_rate": 5.112761473290512e-05, "loss": 0.6441, "step": 408700 }, { "epoch": 4.16, "learning_rate": 5.11229047046247e-05, "loss": 0.6385, "step": 408800 }, { "epoch": 4.17, "learning_rate": 5.11181936435603e-05, "loss": 0.673, "step": 408900 }, { "epoch": 4.17, "learning_rate": 5.111348154994226e-05, "loss": 0.6175, "step": 409000 }, { "epoch": 4.17, "learning_rate": 5.110876842400096e-05, "loss": 0.6319, "step": 409100 }, { "epoch": 4.17, "learning_rate": 5.110405426596686e-05, "loss": 0.6489, "step": 409200 }, { "epoch": 4.17, "learning_rate": 5.109933907607045e-05, "loss": 0.6717, "step": 409300 }, { "epoch": 4.17, "learning_rate": 5.109462285454228e-05, "loss": 0.7219, "step": 409400 }, { "epoch": 4.17, "learning_rate": 5.108990560161293e-05, "loss": 0.6584, "step": 409500 }, { "epoch": 4.17, "learning_rate": 5.108518731751305e-05, "loss": 0.6518, "step": 409600 }, { "epoch": 4.17, "learning_rate": 5.108046800247335e-05, "loss": 0.6894, "step": 409700 }, { "epoch": 4.18, "learning_rate": 5.1075747656724564e-05, "loss": 0.6614, "step": 409800 }, { "epoch": 4.18, "learning_rate": 5.107102628049749e-05, "loss": 0.6827, "step": 409900 }, { "epoch": 4.18, "learning_rate": 5.106630387402297e-05, "loss": 0.7216, "step": 410000 }, { "epoch": 4.18, "learning_rate": 5.106158043753191e-05, "loss": 0.7221, "step": 410100 }, { "epoch": 4.18, "learning_rate": 5.105685597125526e-05, "loss": 0.7594, "step": 410200 }, { "epoch": 4.18, "learning_rate": 5.1052130475424015e-05, "loss": 0.7165, "step": 410300 }, { "epoch": 4.18, "learning_rate": 5.104740395026922e-05, "loss": 0.6979, "step": 410400 }, { "epoch": 4.18, "learning_rate": 5.104272367665769e-05, "loss": 0.7685, "step": 410500 }, { "epoch": 4.18, "learning_rate": 5.103799510383662e-05, "loss": 0.721, "step": 410600 }, { "epoch": 4.18, "learning_rate": 5.103326550238314e-05, "loss": 0.6945, "step": 410700 }, { "epoch": 4.19, "learning_rate": 5.1028534872528484e-05, "loss": 0.683, "step": 410800 }, { "epoch": 4.19, "learning_rate": 5.102380321450397e-05, "loss": 0.6429, "step": 410900 }, { "epoch": 4.19, "learning_rate": 5.101907052854094e-05, "loss": 0.7654, "step": 411000 }, { "epoch": 4.19, "learning_rate": 5.10143368148708e-05, "loss": 0.7617, "step": 411100 }, { "epoch": 4.19, "learning_rate": 5.1009602073724995e-05, "loss": 0.7148, "step": 411200 }, { "epoch": 4.19, "learning_rate": 5.1004866305335025e-05, "loss": 0.7772, "step": 411300 }, { "epoch": 4.19, "learning_rate": 5.1000129509932455e-05, "loss": 0.7595, "step": 411400 }, { "epoch": 4.19, "learning_rate": 5.099539168774886e-05, "loss": 0.7132, "step": 411500 }, { "epoch": 4.19, "learning_rate": 5.099065283901591e-05, "loss": 0.7462, "step": 411600 }, { "epoch": 4.19, "learning_rate": 5.09859129639653e-05, "loss": 0.7118, "step": 411700 }, { "epoch": 4.2, "learning_rate": 5.0981172062828787e-05, "loss": 0.7628, "step": 411800 }, { "epoch": 4.2, "learning_rate": 5.097643013583817e-05, "loss": 0.6506, "step": 411900 }, { "epoch": 4.2, "learning_rate": 5.09716871832253e-05, "loss": 0.669, "step": 412000 }, { "epoch": 4.2, "learning_rate": 5.096694320522207e-05, "loss": 0.6966, "step": 412100 }, { "epoch": 4.2, "learning_rate": 5.0962198202060454e-05, "loss": 0.64, "step": 412200 }, { "epoch": 4.2, "learning_rate": 5.095745217397243e-05, "loss": 0.6732, "step": 412300 }, { "epoch": 4.2, "learning_rate": 5.095270512119008e-05, "loss": 0.679, "step": 412400 }, { "epoch": 4.2, "learning_rate": 5.0947957043945475e-05, "loss": 0.6008, "step": 412500 }, { "epoch": 4.2, "learning_rate": 5.094320794247078e-05, "loss": 0.6721, "step": 412600 }, { "epoch": 4.2, "learning_rate": 5.0938457816998205e-05, "loss": 0.7097, "step": 412700 }, { "epoch": 4.21, "learning_rate": 5.0933706667759996e-05, "loss": 0.6769, "step": 412800 }, { "epoch": 4.21, "learning_rate": 5.092895449498845e-05, "loss": 0.6894, "step": 412900 }, { "epoch": 4.21, "learning_rate": 5.092420129891594e-05, "loss": 0.7114, "step": 413000 }, { "epoch": 4.21, "learning_rate": 5.0919447079774845e-05, "loss": 0.7359, "step": 413100 }, { "epoch": 4.21, "learning_rate": 5.091469183779763e-05, "loss": 0.6115, "step": 413200 }, { "epoch": 4.21, "learning_rate": 5.0909935573216795e-05, "loss": 0.7034, "step": 413300 }, { "epoch": 4.21, "learning_rate": 5.09051782862649e-05, "loss": 0.5739, "step": 413400 }, { "epoch": 4.21, "learning_rate": 5.0900419977174535e-05, "loss": 0.7178, "step": 413500 }, { "epoch": 4.21, "learning_rate": 5.089566064617837e-05, "loss": 0.6378, "step": 413600 }, { "epoch": 4.21, "learning_rate": 5.089090029350909e-05, "loss": 0.7007, "step": 413700 }, { "epoch": 4.22, "learning_rate": 5.088613891939946e-05, "loss": 0.7757, "step": 413800 }, { "epoch": 4.22, "learning_rate": 5.088137652408228e-05, "loss": 0.6297, "step": 413900 }, { "epoch": 4.22, "learning_rate": 5.08766131077904e-05, "loss": 0.6046, "step": 414000 }, { "epoch": 4.22, "learning_rate": 5.0871848670756725e-05, "loss": 0.7112, "step": 414100 }, { "epoch": 4.22, "learning_rate": 5.08670832132142e-05, "loss": 0.725, "step": 414200 }, { "epoch": 4.22, "learning_rate": 5.086231673539584e-05, "loss": 0.6296, "step": 414300 }, { "epoch": 4.22, "learning_rate": 5.0857549237534696e-05, "loss": 0.6737, "step": 414400 }, { "epoch": 4.22, "learning_rate": 5.085278071986386e-05, "loss": 0.6706, "step": 414500 }, { "epoch": 4.22, "learning_rate": 5.084801118261649e-05, "loss": 0.7943, "step": 414600 }, { "epoch": 4.23, "learning_rate": 5.0843240626025786e-05, "loss": 0.6823, "step": 414700 }, { "epoch": 4.23, "learning_rate": 5.0838469050325005e-05, "loss": 0.6725, "step": 414800 }, { "epoch": 4.23, "learning_rate": 5.0833696455747436e-05, "loss": 0.822, "step": 414900 }, { "epoch": 4.23, "learning_rate": 5.0828970583700184e-05, "loss": 0.6816, "step": 415000 }, { "epoch": 4.23, "learning_rate": 5.082419596225211e-05, "loss": 0.7256, "step": 415100 }, { "epoch": 4.23, "learning_rate": 5.0819420322625124e-05, "loss": 0.7042, "step": 415200 }, { "epoch": 4.23, "learning_rate": 5.081464366505274e-05, "loss": 0.693, "step": 415300 }, { "epoch": 4.23, "learning_rate": 5.080986598976849e-05, "loss": 0.6159, "step": 415400 }, { "epoch": 4.23, "learning_rate": 5.080508729700598e-05, "loss": 0.6622, "step": 415500 }, { "epoch": 4.23, "learning_rate": 5.0800355389133515e-05, "loss": 0.6557, "step": 415600 }, { "epoch": 4.24, "learning_rate": 5.0795574672284434e-05, "loss": 0.6944, "step": 415700 }, { "epoch": 4.24, "learning_rate": 5.079079293865585e-05, "loss": 0.7134, "step": 415800 }, { "epoch": 4.24, "learning_rate": 5.078601018848157e-05, "loss": 0.6673, "step": 415900 }, { "epoch": 4.24, "learning_rate": 5.078122642199544e-05, "loss": 0.6115, "step": 416000 }, { "epoch": 4.24, "learning_rate": 5.0776441639431334e-05, "loss": 0.6758, "step": 416100 }, { "epoch": 4.24, "learning_rate": 5.077165584102324e-05, "loss": 0.6866, "step": 416200 }, { "epoch": 4.24, "learning_rate": 5.076686902700513e-05, "loss": 0.6241, "step": 416300 }, { "epoch": 4.24, "learning_rate": 5.0762081197611046e-05, "loss": 0.5715, "step": 416400 }, { "epoch": 4.24, "learning_rate": 5.07572923530751e-05, "loss": 0.7165, "step": 416500 }, { "epoch": 4.24, "learning_rate": 5.0752550397248893e-05, "loss": 0.7817, "step": 416600 }, { "epoch": 4.25, "learning_rate": 5.0747759533277276e-05, "loss": 0.6904, "step": 416700 }, { "epoch": 4.25, "learning_rate": 5.074296765486402e-05, "loss": 0.6678, "step": 416800 }, { "epoch": 4.25, "learning_rate": 5.073817476224345e-05, "loss": 0.7303, "step": 416900 }, { "epoch": 4.25, "learning_rate": 5.073338085564988e-05, "loss": 0.6607, "step": 417000 }, { "epoch": 4.25, "learning_rate": 5.072858593531772e-05, "loss": 0.7032, "step": 417100 }, { "epoch": 4.25, "learning_rate": 5.072379000148141e-05, "loss": 0.709, "step": 417200 }, { "epoch": 4.25, "learning_rate": 5.071899305437545e-05, "loss": 0.7181, "step": 417300 }, { "epoch": 4.25, "learning_rate": 5.0714195094234356e-05, "loss": 0.6621, "step": 417400 }, { "epoch": 4.25, "learning_rate": 5.070939612129276e-05, "loss": 0.6692, "step": 417500 }, { "epoch": 4.25, "learning_rate": 5.070459613578527e-05, "loss": 0.6466, "step": 417600 }, { "epoch": 4.26, "learning_rate": 5.069979513794658e-05, "loss": 0.6777, "step": 417700 }, { "epoch": 4.26, "learning_rate": 5.069499312801145e-05, "loss": 0.6604, "step": 417800 }, { "epoch": 4.26, "learning_rate": 5.069019010621465e-05, "loss": 0.7097, "step": 417900 }, { "epoch": 4.26, "learning_rate": 5.0685386072791026e-05, "loss": 0.6308, "step": 418000 }, { "epoch": 4.26, "learning_rate": 5.068058102797547e-05, "loss": 0.7607, "step": 418100 }, { "epoch": 4.26, "learning_rate": 5.067577497200291e-05, "loss": 0.6231, "step": 418200 }, { "epoch": 4.26, "learning_rate": 5.067096790510836e-05, "loss": 0.7091, "step": 418300 }, { "epoch": 4.26, "learning_rate": 5.0666159827526816e-05, "loss": 0.6366, "step": 418400 }, { "epoch": 4.26, "learning_rate": 5.066135073949339e-05, "loss": 0.6548, "step": 418500 }, { "epoch": 4.26, "learning_rate": 5.0656540641243224e-05, "loss": 0.6548, "step": 418600 }, { "epoch": 4.27, "learning_rate": 5.0651729533011495e-05, "loss": 0.7095, "step": 418700 }, { "epoch": 4.27, "learning_rate": 5.0646917415033416e-05, "loss": 0.6764, "step": 418800 }, { "epoch": 4.27, "learning_rate": 5.06421042875443e-05, "loss": 0.7279, "step": 418900 }, { "epoch": 4.27, "learning_rate": 5.063729015077947e-05, "loss": 0.6231, "step": 419000 }, { "epoch": 4.27, "learning_rate": 5.063247500497431e-05, "loss": 0.6691, "step": 419100 }, { "epoch": 4.27, "learning_rate": 5.062765885036425e-05, "loss": 0.666, "step": 419200 }, { "epoch": 4.27, "learning_rate": 5.0622841687184763e-05, "loss": 0.7201, "step": 419300 }, { "epoch": 4.27, "learning_rate": 5.06180235156714e-05, "loss": 0.6714, "step": 419400 }, { "epoch": 4.27, "learning_rate": 5.061320433605973e-05, "loss": 0.6416, "step": 419500 }, { "epoch": 4.27, "learning_rate": 5.060843235544825e-05, "loss": 0.7886, "step": 419600 }, { "epoch": 4.28, "learning_rate": 5.060361117042201e-05, "loss": 0.7039, "step": 419700 }, { "epoch": 4.28, "learning_rate": 5.0598788978002125e-05, "loss": 0.6805, "step": 419800 }, { "epoch": 4.28, "learning_rate": 5.0593965778424384e-05, "loss": 0.6682, "step": 419900 }, { "epoch": 4.28, "learning_rate": 5.058914157192463e-05, "loss": 0.6872, "step": 420000 }, { "epoch": 4.28, "learning_rate": 5.058431635873871e-05, "loss": 0.7635, "step": 420100 }, { "epoch": 4.28, "learning_rate": 5.0579490139102554e-05, "loss": 0.5902, "step": 420200 }, { "epoch": 4.28, "learning_rate": 5.057466291325216e-05, "loss": 0.7591, "step": 420300 }, { "epoch": 4.28, "learning_rate": 5.0569834681423516e-05, "loss": 0.7737, "step": 420400 }, { "epoch": 4.28, "learning_rate": 5.056500544385272e-05, "loss": 0.6636, "step": 420500 }, { "epoch": 4.29, "learning_rate": 5.056017520077588e-05, "loss": 0.6258, "step": 420600 }, { "epoch": 4.29, "learning_rate": 5.0555343952429173e-05, "loss": 0.6943, "step": 420700 }, { "epoch": 4.29, "learning_rate": 5.055051169904882e-05, "loss": 0.6887, "step": 420800 }, { "epoch": 4.29, "learning_rate": 5.0545678440871074e-05, "loss": 0.6954, "step": 420900 }, { "epoch": 4.29, "learning_rate": 5.054084417813227e-05, "loss": 0.726, "step": 421000 }, { "epoch": 4.29, "learning_rate": 5.0536008911068754e-05, "loss": 0.7022, "step": 421100 }, { "epoch": 4.29, "learning_rate": 5.0531172639916964e-05, "loss": 0.7421, "step": 421200 }, { "epoch": 4.29, "learning_rate": 5.052633536491336e-05, "loss": 0.7415, "step": 421300 }, { "epoch": 4.29, "learning_rate": 5.052149708629445e-05, "loss": 0.6707, "step": 421400 }, { "epoch": 4.29, "learning_rate": 5.051665780429679e-05, "loss": 0.6245, "step": 421500 }, { "epoch": 4.3, "learning_rate": 5.0511817519157e-05, "loss": 0.6403, "step": 421600 }, { "epoch": 4.3, "learning_rate": 5.050697623111175e-05, "loss": 0.5027, "step": 421700 }, { "epoch": 4.3, "learning_rate": 5.050213394039774e-05, "loss": 0.6563, "step": 421800 }, { "epoch": 4.3, "learning_rate": 5.049729064725173e-05, "loss": 0.6283, "step": 421900 }, { "epoch": 4.3, "learning_rate": 5.049244635191052e-05, "loss": 0.7218, "step": 422000 }, { "epoch": 4.3, "learning_rate": 5.048760105461098e-05, "loss": 0.735, "step": 422100 }, { "epoch": 4.3, "learning_rate": 5.0482754755590026e-05, "loss": 0.6434, "step": 422200 }, { "epoch": 4.3, "learning_rate": 5.0477907455084584e-05, "loss": 0.7977, "step": 422300 }, { "epoch": 4.3, "learning_rate": 5.047305915333168e-05, "loss": 0.728, "step": 422400 }, { "epoch": 4.3, "learning_rate": 5.046820985056835e-05, "loss": 0.7504, "step": 422500 }, { "epoch": 4.31, "learning_rate": 5.046335954703172e-05, "loss": 0.6785, "step": 422600 }, { "epoch": 4.31, "learning_rate": 5.045850824295892e-05, "loss": 0.669, "step": 422700 }, { "epoch": 4.31, "learning_rate": 5.045365593858716e-05, "loss": 0.6211, "step": 422800 }, { "epoch": 4.31, "learning_rate": 5.044880263415368e-05, "loss": 0.6683, "step": 422900 }, { "epoch": 4.31, "learning_rate": 5.044394832989579e-05, "loss": 0.6364, "step": 423000 }, { "epoch": 4.31, "learning_rate": 5.043909302605083e-05, "loss": 0.6646, "step": 423100 }, { "epoch": 4.31, "learning_rate": 5.043423672285619e-05, "loss": 0.7411, "step": 423200 }, { "epoch": 4.31, "learning_rate": 5.042937942054932e-05, "loss": 0.6846, "step": 423300 }, { "epoch": 4.31, "learning_rate": 5.0424521119367714e-05, "loss": 0.654, "step": 423400 }, { "epoch": 4.31, "learning_rate": 5.041966181954891e-05, "loss": 0.7416, "step": 423500 }, { "epoch": 4.32, "learning_rate": 5.04148015213305e-05, "loss": 0.6738, "step": 423600 }, { "epoch": 4.32, "learning_rate": 5.0409940224950134e-05, "loss": 0.5921, "step": 423700 }, { "epoch": 4.32, "learning_rate": 5.040507793064548e-05, "loss": 0.6995, "step": 423800 }, { "epoch": 4.32, "learning_rate": 5.0400214638654295e-05, "loss": 0.6897, "step": 423900 }, { "epoch": 4.32, "learning_rate": 5.0395350349214356e-05, "loss": 0.6112, "step": 424000 }, { "epoch": 4.32, "learning_rate": 5.039048506256349e-05, "loss": 0.7192, "step": 424100 }, { "epoch": 4.32, "learning_rate": 5.038561877893959e-05, "loss": 0.6194, "step": 424200 }, { "epoch": 4.32, "learning_rate": 5.03807514985806e-05, "loss": 0.6337, "step": 424300 }, { "epoch": 4.32, "learning_rate": 5.0375883221724474e-05, "loss": 0.6753, "step": 424400 }, { "epoch": 4.32, "learning_rate": 5.037101394860926e-05, "loss": 0.6177, "step": 424500 }, { "epoch": 4.33, "learning_rate": 5.036614367947303e-05, "loss": 0.6861, "step": 424600 }, { "epoch": 4.33, "learning_rate": 5.0361272414553915e-05, "loss": 0.7046, "step": 424700 }, { "epoch": 4.33, "learning_rate": 5.035640015409009e-05, "loss": 0.7382, "step": 424800 }, { "epoch": 4.33, "learning_rate": 5.035152689831977e-05, "loss": 0.6595, "step": 424900 }, { "epoch": 4.33, "learning_rate": 5.034665264748125e-05, "loss": 0.6663, "step": 425000 }, { "epoch": 4.33, "learning_rate": 5.034177740181282e-05, "loss": 0.669, "step": 425100 }, { "epoch": 4.33, "learning_rate": 5.033690116155288e-05, "loss": 0.6744, "step": 425200 }, { "epoch": 4.33, "learning_rate": 5.0332023926939835e-05, "loss": 0.7273, "step": 425300 }, { "epoch": 4.33, "learning_rate": 5.0327145698212156e-05, "loss": 0.6032, "step": 425400 }, { "epoch": 4.34, "learning_rate": 5.032226647560836e-05, "loss": 0.6687, "step": 425500 }, { "epoch": 4.34, "learning_rate": 5.0317386259367006e-05, "loss": 0.7383, "step": 425600 }, { "epoch": 4.34, "learning_rate": 5.031250504972672e-05, "loss": 0.6944, "step": 425700 }, { "epoch": 4.34, "learning_rate": 5.030762284692615e-05, "loss": 0.7215, "step": 425800 }, { "epoch": 4.34, "learning_rate": 5.030278848807541e-05, "loss": 0.7764, "step": 425900 }, { "epoch": 4.34, "learning_rate": 5.0297953156293906e-05, "loss": 0.7145, "step": 426000 }, { "epoch": 4.34, "learning_rate": 5.02930679952915e-05, "loss": 0.7341, "step": 426100 }, { "epoch": 4.34, "learning_rate": 5.0288230708521945e-05, "loss": 0.7256, "step": 426200 }, { "epoch": 4.34, "learning_rate": 5.0283343573257124e-05, "loss": 0.6768, "step": 426300 }, { "epoch": 4.34, "learning_rate": 5.0278504332436146e-05, "loss": 0.7276, "step": 426400 }, { "epoch": 4.35, "learning_rate": 5.027361522385518e-05, "loss": 0.7011, "step": 426500 }, { "epoch": 4.35, "learning_rate": 5.0268725124015426e-05, "loss": 0.6594, "step": 426600 }, { "epoch": 4.35, "learning_rate": 5.026383403315596e-05, "loss": 0.7124, "step": 426700 }, { "epoch": 4.35, "learning_rate": 5.025894195151595e-05, "loss": 0.6659, "step": 426800 }, { "epoch": 4.35, "learning_rate": 5.025404887933457e-05, "loss": 0.6809, "step": 426900 }, { "epoch": 4.35, "learning_rate": 5.024915481685107e-05, "loss": 0.6606, "step": 427000 }, { "epoch": 4.35, "learning_rate": 5.024425976430474e-05, "loss": 0.671, "step": 427100 }, { "epoch": 4.35, "learning_rate": 5.023936372193492e-05, "loss": 0.6549, "step": 427200 }, { "epoch": 4.35, "learning_rate": 5.023446668998099e-05, "loss": 0.7506, "step": 427300 }, { "epoch": 4.35, "learning_rate": 5.0229568668682405e-05, "loss": 0.6669, "step": 427400 }, { "epoch": 4.36, "learning_rate": 5.022466965827863e-05, "loss": 0.7246, "step": 427500 }, { "epoch": 4.36, "learning_rate": 5.0219769659009195e-05, "loss": 0.7291, "step": 427600 }, { "epoch": 4.36, "learning_rate": 5.021486867111371e-05, "loss": 0.6716, "step": 427700 }, { "epoch": 4.36, "learning_rate": 5.020996669483176e-05, "loss": 0.7209, "step": 427800 }, { "epoch": 4.36, "learning_rate": 5.020506373040305e-05, "loss": 0.7368, "step": 427900 }, { "epoch": 4.36, "learning_rate": 5.020015977806731e-05, "loss": 0.7203, "step": 428000 }, { "epoch": 4.36, "learning_rate": 5.01952548380643e-05, "loss": 0.713, "step": 428100 }, { "epoch": 4.36, "learning_rate": 5.019034891063385e-05, "loss": 0.7189, "step": 428200 }, { "epoch": 4.36, "learning_rate": 5.0185441996015825e-05, "loss": 0.6492, "step": 428300 }, { "epoch": 4.36, "learning_rate": 5.018053409445015e-05, "loss": 0.8632, "step": 428400 }, { "epoch": 4.37, "learning_rate": 5.017562520617679e-05, "loss": 0.6912, "step": 428500 }, { "epoch": 4.37, "learning_rate": 5.0170715331435744e-05, "loss": 0.7184, "step": 428600 }, { "epoch": 4.37, "learning_rate": 5.016580447046711e-05, "loss": 0.7393, "step": 428700 }, { "epoch": 4.37, "learning_rate": 5.0160892623510975e-05, "loss": 0.7107, "step": 428800 }, { "epoch": 4.37, "learning_rate": 5.01559797908075e-05, "loss": 0.6983, "step": 428900 }, { "epoch": 4.37, "learning_rate": 5.015106597259691e-05, "loss": 0.7329, "step": 429000 }, { "epoch": 4.37, "learning_rate": 5.014615116911943e-05, "loss": 0.6207, "step": 429100 }, { "epoch": 4.37, "learning_rate": 5.0141235380615396e-05, "loss": 0.6532, "step": 429200 }, { "epoch": 4.37, "learning_rate": 5.013631860732514e-05, "loss": 0.7355, "step": 429300 }, { "epoch": 4.37, "learning_rate": 5.0131400849489084e-05, "loss": 0.6442, "step": 429400 }, { "epoch": 4.38, "learning_rate": 5.0126482107347656e-05, "loss": 0.6659, "step": 429500 }, { "epoch": 4.38, "learning_rate": 5.012156238114137e-05, "loss": 0.7708, "step": 429600 }, { "epoch": 4.38, "learning_rate": 5.011664167111075e-05, "loss": 0.6709, "step": 429700 }, { "epoch": 4.38, "learning_rate": 5.011171997749641e-05, "loss": 0.7167, "step": 429800 }, { "epoch": 4.38, "learning_rate": 5.010679730053898e-05, "loss": 0.6385, "step": 429900 }, { "epoch": 4.38, "learning_rate": 5.0101873640479154e-05, "loss": 0.7165, "step": 430000 }, { "epoch": 4.38, "learning_rate": 5.009694899755767e-05, "loss": 0.572, "step": 430100 }, { "epoch": 4.38, "learning_rate": 5.009202337201531e-05, "loss": 0.7944, "step": 430200 }, { "epoch": 4.38, "learning_rate": 5.008709676409292e-05, "loss": 0.5477, "step": 430300 }, { "epoch": 4.38, "learning_rate": 5.008216917403136e-05, "loss": 0.6377, "step": 430400 }, { "epoch": 4.39, "learning_rate": 5.007724060207157e-05, "loss": 0.7971, "step": 430500 }, { "epoch": 4.39, "learning_rate": 5.007231104845454e-05, "loss": 0.7742, "step": 430600 }, { "epoch": 4.39, "learning_rate": 5.006738051342128e-05, "loss": 0.7246, "step": 430700 }, { "epoch": 4.39, "learning_rate": 5.0062448997212866e-05, "loss": 0.7557, "step": 430800 }, { "epoch": 4.39, "learning_rate": 5.005751650007043e-05, "loss": 0.7402, "step": 430900 }, { "epoch": 4.39, "learning_rate": 5.005258302223512e-05, "loss": 0.7289, "step": 431000 }, { "epoch": 4.39, "learning_rate": 5.004764856394818e-05, "loss": 0.6567, "step": 431100 }, { "epoch": 4.39, "learning_rate": 5.004271312545087e-05, "loss": 0.712, "step": 431200 }, { "epoch": 4.39, "learning_rate": 5.0037776706984484e-05, "loss": 0.6962, "step": 431300 }, { "epoch": 4.4, "learning_rate": 5.0032839308790394e-05, "loss": 0.7904, "step": 431400 }, { "epoch": 4.4, "learning_rate": 5.002790093111002e-05, "loss": 0.567, "step": 431500 }, { "epoch": 4.4, "learning_rate": 5.00229615741848e-05, "loss": 0.6883, "step": 431600 }, { "epoch": 4.4, "learning_rate": 5.0018021238256265e-05, "loss": 0.671, "step": 431700 }, { "epoch": 4.4, "learning_rate": 5.001307992356594e-05, "loss": 0.801, "step": 431800 }, { "epoch": 4.4, "learning_rate": 5.000813763035545e-05, "loss": 0.6168, "step": 431900 }, { "epoch": 4.4, "learning_rate": 5.0003194358866426e-05, "loss": 0.6935, "step": 432000 }, { "epoch": 4.4, "learning_rate": 4.999825010934057e-05, "loss": 0.6424, "step": 432100 }, { "epoch": 4.4, "learning_rate": 4.999330488201962e-05, "loss": 0.6875, "step": 432200 }, { "epoch": 4.4, "learning_rate": 4.998835867714539e-05, "loss": 0.6773, "step": 432300 }, { "epoch": 4.41, "learning_rate": 4.99834114949597e-05, "loss": 0.6703, "step": 432400 }, { "epoch": 4.41, "learning_rate": 4.9978512822132695e-05, "loss": 0.684, "step": 432500 }, { "epoch": 4.41, "learning_rate": 4.997356369581687e-05, "loss": 0.7165, "step": 432600 }, { "epoch": 4.41, "learning_rate": 4.9968613592913e-05, "loss": 0.7496, "step": 432700 }, { "epoch": 4.41, "learning_rate": 4.996366251366307e-05, "loss": 0.6658, "step": 432800 }, { "epoch": 4.41, "learning_rate": 4.995871045830919e-05, "loss": 0.5591, "step": 432900 }, { "epoch": 4.41, "learning_rate": 4.995375742709348e-05, "loss": 0.6732, "step": 433000 }, { "epoch": 4.41, "learning_rate": 4.9948803420258114e-05, "loss": 0.6448, "step": 433100 }, { "epoch": 4.41, "learning_rate": 4.994384843804529e-05, "loss": 0.7854, "step": 433200 }, { "epoch": 4.41, "learning_rate": 4.993889248069731e-05, "loss": 0.6784, "step": 433300 }, { "epoch": 4.42, "learning_rate": 4.993393554845647e-05, "loss": 0.7174, "step": 433400 }, { "epoch": 4.42, "learning_rate": 4.992897764156515e-05, "loss": 0.6565, "step": 433500 }, { "epoch": 4.42, "learning_rate": 4.9924018760265746e-05, "loss": 0.5781, "step": 433600 }, { "epoch": 4.42, "learning_rate": 4.991905890480073e-05, "loss": 0.7323, "step": 433700 }, { "epoch": 4.42, "learning_rate": 4.9914098075412595e-05, "loss": 0.6291, "step": 433800 }, { "epoch": 4.42, "learning_rate": 4.990913627234391e-05, "loss": 0.6552, "step": 433900 }, { "epoch": 4.42, "learning_rate": 4.990417349583728e-05, "loss": 0.7695, "step": 434000 }, { "epoch": 4.42, "learning_rate": 4.989920974613534e-05, "loss": 0.6178, "step": 434100 }, { "epoch": 4.42, "learning_rate": 4.9894245023480794e-05, "loss": 0.6355, "step": 434200 }, { "epoch": 4.42, "learning_rate": 4.98892793281164e-05, "loss": 0.6772, "step": 434300 }, { "epoch": 4.43, "learning_rate": 4.988431266028492e-05, "loss": 0.7564, "step": 434400 }, { "epoch": 4.43, "learning_rate": 4.987934502022923e-05, "loss": 0.7043, "step": 434500 }, { "epoch": 4.43, "learning_rate": 4.9874376408192197e-05, "loss": 0.6809, "step": 434600 }, { "epoch": 4.43, "learning_rate": 4.986940682441677e-05, "loss": 0.7409, "step": 434700 }, { "epoch": 4.43, "learning_rate": 4.986443626914591e-05, "loss": 0.6305, "step": 434800 }, { "epoch": 4.43, "learning_rate": 4.985946474262268e-05, "loss": 0.6624, "step": 434900 }, { "epoch": 4.43, "learning_rate": 4.985449224509013e-05, "loss": 0.6669, "step": 435000 }, { "epoch": 4.43, "learning_rate": 4.9849518776791385e-05, "loss": 0.6623, "step": 435100 }, { "epoch": 4.43, "learning_rate": 4.9844594087161145e-05, "loss": 0.7072, "step": 435200 }, { "epoch": 4.43, "learning_rate": 4.9839618687761194e-05, "loss": 0.6677, "step": 435300 }, { "epoch": 4.44, "learning_rate": 4.9834642318322294e-05, "loss": 0.6886, "step": 435400 }, { "epoch": 4.44, "learning_rate": 4.982966497908775e-05, "loss": 0.6502, "step": 435500 }, { "epoch": 4.44, "learning_rate": 4.982468667030092e-05, "loss": 0.6635, "step": 435600 }, { "epoch": 4.44, "learning_rate": 4.9819707392205216e-05, "loss": 0.7045, "step": 435700 }, { "epoch": 4.44, "learning_rate": 4.98147271450441e-05, "loss": 0.6756, "step": 435800 }, { "epoch": 4.44, "learning_rate": 4.980974592906108e-05, "loss": 0.7234, "step": 435900 }, { "epoch": 4.44, "learning_rate": 4.9804763744499704e-05, "loss": 0.6781, "step": 436000 }, { "epoch": 4.44, "learning_rate": 4.9799780591603564e-05, "loss": 0.7439, "step": 436100 }, { "epoch": 4.44, "learning_rate": 4.979479647061632e-05, "loss": 0.755, "step": 436200 }, { "epoch": 4.45, "learning_rate": 4.978981138178165e-05, "loss": 0.7213, "step": 436300 }, { "epoch": 4.45, "learning_rate": 4.97848253253433e-05, "loss": 0.6981, "step": 436400 }, { "epoch": 4.45, "learning_rate": 4.9779838301545076e-05, "loss": 0.7547, "step": 436500 }, { "epoch": 4.45, "learning_rate": 4.977485031063078e-05, "loss": 0.7776, "step": 436600 }, { "epoch": 4.45, "learning_rate": 4.9769861352844326e-05, "loss": 0.7326, "step": 436700 }, { "epoch": 4.45, "learning_rate": 4.9764871428429626e-05, "loss": 0.6508, "step": 436800 }, { "epoch": 4.45, "learning_rate": 4.9759880537630666e-05, "loss": 0.7494, "step": 436900 }, { "epoch": 4.45, "learning_rate": 4.975493860404246e-05, "loss": 0.6432, "step": 437000 }, { "epoch": 4.45, "learning_rate": 4.974994579086486e-05, "loss": 0.7281, "step": 437100 }, { "epoch": 4.45, "learning_rate": 4.9744952012032755e-05, "loss": 0.5645, "step": 437200 }, { "epoch": 4.46, "learning_rate": 4.973995726779034e-05, "loss": 0.6535, "step": 437300 }, { "epoch": 4.46, "learning_rate": 4.973496155838181e-05, "loss": 0.6945, "step": 437400 }, { "epoch": 4.46, "learning_rate": 4.972996488405144e-05, "loss": 0.7149, "step": 437500 }, { "epoch": 4.46, "learning_rate": 4.972496724504352e-05, "loss": 0.7294, "step": 437600 }, { "epoch": 4.46, "learning_rate": 4.971996864160242e-05, "loss": 0.7339, "step": 437700 }, { "epoch": 4.46, "learning_rate": 4.9714969073972535e-05, "loss": 0.6449, "step": 437800 }, { "epoch": 4.46, "learning_rate": 4.9709968542398316e-05, "loss": 0.6048, "step": 437900 }, { "epoch": 4.46, "learning_rate": 4.9704967047124254e-05, "loss": 0.8038, "step": 438000 }, { "epoch": 4.46, "learning_rate": 4.96999645883949e-05, "loss": 0.6365, "step": 438100 }, { "epoch": 4.46, "learning_rate": 4.969496116645485e-05, "loss": 0.709, "step": 438200 }, { "epoch": 4.47, "learning_rate": 4.968995678154872e-05, "loss": 0.6996, "step": 438300 }, { "epoch": 4.47, "learning_rate": 4.9684951433921205e-05, "loss": 0.7499, "step": 438400 }, { "epoch": 4.47, "learning_rate": 4.967994512381705e-05, "loss": 0.7049, "step": 438500 }, { "epoch": 4.47, "learning_rate": 4.967493785148102e-05, "loss": 0.7133, "step": 438600 }, { "epoch": 4.47, "learning_rate": 4.966992961715793e-05, "loss": 0.5767, "step": 438700 }, { "epoch": 4.47, "learning_rate": 4.966492042109267e-05, "loss": 0.6975, "step": 438800 }, { "epoch": 4.47, "learning_rate": 4.965991026353017e-05, "loss": 0.6571, "step": 438900 }, { "epoch": 4.47, "learning_rate": 4.9654899144715364e-05, "loss": 0.7542, "step": 439000 }, { "epoch": 4.47, "learning_rate": 4.964988706489329e-05, "loss": 0.7228, "step": 439100 }, { "epoch": 4.47, "learning_rate": 4.964487402430901e-05, "loss": 0.7408, "step": 439200 }, { "epoch": 4.48, "learning_rate": 4.963986002320762e-05, "loss": 0.6805, "step": 439300 }, { "epoch": 4.48, "learning_rate": 4.963484506183428e-05, "loss": 0.6408, "step": 439400 }, { "epoch": 4.48, "learning_rate": 4.9629829140434197e-05, "loss": 0.6306, "step": 439500 }, { "epoch": 4.48, "learning_rate": 4.9624812259252607e-05, "loss": 0.7145, "step": 439600 }, { "epoch": 4.48, "learning_rate": 4.961979441853481e-05, "loss": 0.7239, "step": 439700 }, { "epoch": 4.48, "learning_rate": 4.961477561852615e-05, "loss": 0.7509, "step": 439800 }, { "epoch": 4.48, "learning_rate": 4.9609755859472017e-05, "loss": 0.649, "step": 439900 }, { "epoch": 4.48, "learning_rate": 4.9604735141617856e-05, "loss": 0.5838, "step": 440000 }, { "epoch": 4.48, "learning_rate": 4.959971346520913e-05, "loss": 0.7609, "step": 440100 }, { "epoch": 4.48, "learning_rate": 4.9594690830491384e-05, "loss": 0.663, "step": 440200 }, { "epoch": 4.49, "learning_rate": 4.95896672377102e-05, "loss": 0.6679, "step": 440300 }, { "epoch": 4.49, "learning_rate": 4.958464268711118e-05, "loss": 0.6525, "step": 440400 }, { "epoch": 4.49, "learning_rate": 4.957961717894002e-05, "loss": 0.784, "step": 440500 }, { "epoch": 4.49, "learning_rate": 4.9574590713442425e-05, "loss": 0.722, "step": 440600 }, { "epoch": 4.49, "learning_rate": 4.956956329086415e-05, "loss": 0.6848, "step": 440700 }, { "epoch": 4.49, "learning_rate": 4.956453491145102e-05, "loss": 0.6269, "step": 440800 }, { "epoch": 4.49, "learning_rate": 4.9559505575448894e-05, "loss": 0.6028, "step": 440900 }, { "epoch": 4.49, "learning_rate": 4.955447528310366e-05, "loss": 0.65, "step": 441000 }, { "epoch": 4.49, "learning_rate": 4.954944403466128e-05, "loss": 0.6295, "step": 441100 }, { "epoch": 4.5, "learning_rate": 4.954441183036775e-05, "loss": 0.6594, "step": 441200 }, { "epoch": 4.5, "learning_rate": 4.953937867046913e-05, "loss": 0.7185, "step": 441300 }, { "epoch": 4.5, "learning_rate": 4.953434455521149e-05, "loss": 0.6824, "step": 441400 }, { "epoch": 4.5, "learning_rate": 4.952930948484097e-05, "loss": 0.6306, "step": 441500 }, { "epoch": 4.5, "learning_rate": 4.952427345960377e-05, "loss": 0.6899, "step": 441600 }, { "epoch": 4.5, "learning_rate": 4.951923647974611e-05, "loss": 0.6913, "step": 441700 }, { "epoch": 4.5, "learning_rate": 4.9514198545514265e-05, "loss": 0.6832, "step": 441800 }, { "epoch": 4.5, "learning_rate": 4.950915965715457e-05, "loss": 0.7589, "step": 441900 }, { "epoch": 4.5, "learning_rate": 4.9504119814913395e-05, "loss": 0.587, "step": 442000 }, { "epoch": 4.5, "learning_rate": 4.949907901903716e-05, "loss": 0.678, "step": 442100 }, { "epoch": 4.51, "learning_rate": 4.949403726977232e-05, "loss": 0.7047, "step": 442200 }, { "epoch": 4.51, "learning_rate": 4.9488994567365386e-05, "loss": 0.6576, "step": 442300 }, { "epoch": 4.51, "learning_rate": 4.9483950912062925e-05, "loss": 0.7054, "step": 442400 }, { "epoch": 4.51, "learning_rate": 4.947890630411154e-05, "loss": 0.64, "step": 442500 }, { "epoch": 4.51, "learning_rate": 4.9473860743757884e-05, "loss": 0.6651, "step": 442600 }, { "epoch": 4.51, "learning_rate": 4.946881423124865e-05, "loss": 0.6082, "step": 442700 }, { "epoch": 4.51, "learning_rate": 4.9463766766830575e-05, "loss": 0.6219, "step": 442800 }, { "epoch": 4.51, "learning_rate": 4.945871835075047e-05, "loss": 0.628, "step": 442900 }, { "epoch": 4.51, "learning_rate": 4.945366898325516e-05, "loss": 0.6402, "step": 443000 }, { "epoch": 4.51, "learning_rate": 4.944861866459152e-05, "loss": 0.666, "step": 443100 }, { "epoch": 4.52, "learning_rate": 4.94435673950065e-05, "loss": 0.6585, "step": 443200 }, { "epoch": 4.52, "learning_rate": 4.943851517474707e-05, "loss": 0.7371, "step": 443300 }, { "epoch": 4.52, "learning_rate": 4.9433462004060245e-05, "loss": 0.7183, "step": 443400 }, { "epoch": 4.52, "learning_rate": 4.942840788319311e-05, "loss": 0.581, "step": 443500 }, { "epoch": 4.52, "learning_rate": 4.9423352812392756e-05, "loss": 0.7019, "step": 443600 }, { "epoch": 4.52, "learning_rate": 4.941829679190637e-05, "loss": 0.7097, "step": 443700 }, { "epoch": 4.52, "learning_rate": 4.9413290396379316e-05, "loss": 0.7073, "step": 443800 }, { "epoch": 4.52, "learning_rate": 4.9408232486753216e-05, "loss": 0.6426, "step": 443900 }, { "epoch": 4.52, "learning_rate": 4.940317362818037e-05, "loss": 0.6822, "step": 444000 }, { "epoch": 4.52, "learning_rate": 4.9398113820908126e-05, "loss": 0.7162, "step": 444100 }, { "epoch": 4.53, "learning_rate": 4.939305306518388e-05, "loss": 0.6753, "step": 444200 }, { "epoch": 4.53, "learning_rate": 4.938804198298716e-05, "loss": 0.5966, "step": 444300 }, { "epoch": 4.53, "learning_rate": 4.9382979340579616e-05, "loss": 0.7108, "step": 444400 }, { "epoch": 4.53, "learning_rate": 4.937791575046005e-05, "loss": 0.6441, "step": 444500 }, { "epoch": 4.53, "learning_rate": 4.937285121287605e-05, "loss": 0.693, "step": 444600 }, { "epoch": 4.53, "learning_rate": 4.9367785728075245e-05, "loss": 0.7163, "step": 444700 }, { "epoch": 4.53, "learning_rate": 4.936271929630529e-05, "loss": 0.7184, "step": 444800 }, { "epoch": 4.53, "learning_rate": 4.935765191781391e-05, "loss": 0.6917, "step": 444900 }, { "epoch": 4.53, "learning_rate": 4.935258359284888e-05, "loss": 0.6552, "step": 445000 }, { "epoch": 4.53, "learning_rate": 4.9347514321658003e-05, "loss": 0.7358, "step": 445100 }, { "epoch": 4.54, "learning_rate": 4.934244410448913e-05, "loss": 0.7435, "step": 445200 }, { "epoch": 4.54, "learning_rate": 4.9337372941590166e-05, "loss": 0.6113, "step": 445300 }, { "epoch": 4.54, "learning_rate": 4.933230083320907e-05, "loss": 0.7115, "step": 445400 }, { "epoch": 4.54, "learning_rate": 4.932722777959383e-05, "loss": 0.6758, "step": 445500 }, { "epoch": 4.54, "learning_rate": 4.932215378099249e-05, "loss": 0.7169, "step": 445600 }, { "epoch": 4.54, "learning_rate": 4.931707883765314e-05, "loss": 0.6167, "step": 445700 }, { "epoch": 4.54, "learning_rate": 4.9312002949823916e-05, "loss": 0.6903, "step": 445800 }, { "epoch": 4.54, "learning_rate": 4.9306926117752994e-05, "loss": 0.7334, "step": 445900 }, { "epoch": 4.54, "learning_rate": 4.9301848341688606e-05, "loss": 0.6055, "step": 446000 }, { "epoch": 4.54, "learning_rate": 4.9296769621879014e-05, "loss": 0.705, "step": 446100 }, { "epoch": 4.55, "learning_rate": 4.929168995857255e-05, "loss": 0.7207, "step": 446200 }, { "epoch": 4.55, "learning_rate": 4.928660935201756e-05, "loss": 0.6678, "step": 446300 }, { "epoch": 4.55, "learning_rate": 4.928157862262508e-05, "loss": 0.7134, "step": 446400 }, { "epoch": 4.55, "learning_rate": 4.927649613974464e-05, "loss": 0.696, "step": 446500 }, { "epoch": 4.55, "learning_rate": 4.927141271435858e-05, "loss": 0.7637, "step": 446600 }, { "epoch": 4.55, "learning_rate": 4.9266328346715444e-05, "loss": 0.7812, "step": 446700 }, { "epoch": 4.55, "learning_rate": 4.9261243037063823e-05, "loss": 0.617, "step": 446800 }, { "epoch": 4.55, "learning_rate": 4.925615678565236e-05, "loss": 0.6841, "step": 446900 }, { "epoch": 4.55, "learning_rate": 4.925106959272976e-05, "loss": 0.6576, "step": 447000 }, { "epoch": 4.56, "learning_rate": 4.9245981458544725e-05, "loss": 0.6342, "step": 447100 }, { "epoch": 4.56, "learning_rate": 4.924089238334606e-05, "loss": 0.6584, "step": 447200 }, { "epoch": 4.56, "learning_rate": 4.923580236738258e-05, "loss": 0.58, "step": 447300 }, { "epoch": 4.56, "learning_rate": 4.923071141090316e-05, "loss": 0.6756, "step": 447400 }, { "epoch": 4.56, "learning_rate": 4.922561951415671e-05, "loss": 0.7154, "step": 447500 }, { "epoch": 4.56, "learning_rate": 4.9220526677392196e-05, "loss": 0.76, "step": 447600 }, { "epoch": 4.56, "learning_rate": 4.9215432900858636e-05, "loss": 0.7186, "step": 447700 }, { "epoch": 4.56, "learning_rate": 4.921033818480507e-05, "loss": 0.6713, "step": 447800 }, { "epoch": 4.56, "learning_rate": 4.9205242529480617e-05, "loss": 0.6451, "step": 447900 }, { "epoch": 4.56, "learning_rate": 4.9200145935134416e-05, "loss": 0.6712, "step": 448000 }, { "epoch": 4.57, "learning_rate": 4.919504840201566e-05, "loss": 0.6011, "step": 448100 }, { "epoch": 4.57, "learning_rate": 4.918994993037357e-05, "loss": 0.7085, "step": 448200 }, { "epoch": 4.57, "learning_rate": 4.918485052045747e-05, "loss": 0.6702, "step": 448300 }, { "epoch": 4.57, "learning_rate": 4.9179750172516653e-05, "loss": 0.6455, "step": 448400 }, { "epoch": 4.57, "learning_rate": 4.917464888680052e-05, "loss": 0.7097, "step": 448500 }, { "epoch": 4.57, "learning_rate": 4.9169546663558486e-05, "loss": 0.7222, "step": 448600 }, { "epoch": 4.57, "learning_rate": 4.916444350304001e-05, "loss": 0.718, "step": 448700 }, { "epoch": 4.57, "learning_rate": 4.9159390451107544e-05, "loss": 0.7327, "step": 448800 }, { "epoch": 4.57, "learning_rate": 4.9154285426151326e-05, "loss": 0.7654, "step": 448900 }, { "epoch": 4.57, "learning_rate": 4.914917946466487e-05, "loss": 0.847, "step": 449000 }, { "epoch": 4.58, "learning_rate": 4.914407256689779e-05, "loss": 0.7199, "step": 449100 }, { "epoch": 4.58, "learning_rate": 4.913896473309983e-05, "loss": 0.623, "step": 449200 }, { "epoch": 4.58, "learning_rate": 4.913385596352071e-05, "loss": 0.6899, "step": 449300 }, { "epoch": 4.58, "learning_rate": 4.9128746258410205e-05, "loss": 0.7079, "step": 449400 }, { "epoch": 4.58, "learning_rate": 4.912363561801817e-05, "loss": 0.749, "step": 449500 }, { "epoch": 4.58, "learning_rate": 4.911852404259448e-05, "loss": 0.7002, "step": 449600 }, { "epoch": 4.58, "learning_rate": 4.911346266211746e-05, "loss": 0.7784, "step": 449700 }, { "epoch": 4.58, "learning_rate": 4.910834922672435e-05, "loss": 0.7385, "step": 449800 }, { "epoch": 4.58, "learning_rate": 4.9103234857047005e-05, "loss": 0.6373, "step": 449900 }, { "epoch": 4.58, "learning_rate": 4.909811955333547e-05, "loss": 0.6936, "step": 450000 }, { "epoch": 4.59, "learning_rate": 4.9093003315839864e-05, "loss": 0.6123, "step": 450100 }, { "epoch": 4.59, "learning_rate": 4.908788614481033e-05, "loss": 0.6282, "step": 450200 }, { "epoch": 4.59, "learning_rate": 4.908276804049708e-05, "loss": 0.6454, "step": 450300 }, { "epoch": 4.59, "learning_rate": 4.907764900315035e-05, "loss": 0.7118, "step": 450400 }, { "epoch": 4.59, "learning_rate": 4.9072529033020435e-05, "loss": 0.615, "step": 450500 }, { "epoch": 4.59, "learning_rate": 4.906740813035767e-05, "loss": 0.7156, "step": 450600 }, { "epoch": 4.59, "learning_rate": 4.906228629541244e-05, "loss": 0.7318, "step": 450700 }, { "epoch": 4.59, "learning_rate": 4.905716352843516e-05, "loss": 0.7111, "step": 450800 }, { "epoch": 4.59, "learning_rate": 4.905203982967631e-05, "loss": 0.6226, "step": 450900 }, { "epoch": 4.59, "learning_rate": 4.904696645029957e-05, "loss": 0.697, "step": 451000 }, { "epoch": 4.6, "learning_rate": 4.904184089804075e-05, "loss": 0.6854, "step": 451100 }, { "epoch": 4.6, "learning_rate": 4.903671441474955e-05, "loss": 0.7058, "step": 451200 }, { "epoch": 4.6, "learning_rate": 4.9031587000676614e-05, "loss": 0.6228, "step": 451300 }, { "epoch": 4.6, "learning_rate": 4.902645865607265e-05, "loss": 0.772, "step": 451400 }, { "epoch": 4.6, "learning_rate": 4.9021329381188405e-05, "loss": 0.6994, "step": 451500 }, { "epoch": 4.6, "learning_rate": 4.901619917627466e-05, "loss": 0.7291, "step": 451600 }, { "epoch": 4.6, "learning_rate": 4.9011068041582265e-05, "loss": 0.5663, "step": 451700 }, { "epoch": 4.6, "learning_rate": 4.9005935977362085e-05, "loss": 0.5662, "step": 451800 }, { "epoch": 4.6, "learning_rate": 4.900080298386507e-05, "loss": 0.7569, "step": 451900 }, { "epoch": 4.61, "learning_rate": 4.899566906134218e-05, "loss": 0.6922, "step": 452000 }, { "epoch": 4.61, "learning_rate": 4.8990534210044416e-05, "loss": 0.595, "step": 452100 }, { "epoch": 4.61, "learning_rate": 4.8985398430222875e-05, "loss": 0.6483, "step": 452200 }, { "epoch": 4.61, "learning_rate": 4.898026172212864e-05, "loss": 0.7641, "step": 452300 }, { "epoch": 4.61, "learning_rate": 4.897512408601288e-05, "loss": 0.7983, "step": 452400 }, { "epoch": 4.61, "learning_rate": 4.896998552212679e-05, "loss": 0.6291, "step": 452500 }, { "epoch": 4.61, "learning_rate": 4.896484603072161e-05, "loss": 0.7403, "step": 452600 }, { "epoch": 4.61, "learning_rate": 4.895970561204864e-05, "loss": 0.6618, "step": 452700 }, { "epoch": 4.61, "learning_rate": 4.8954564266359216e-05, "loss": 0.638, "step": 452800 }, { "epoch": 4.61, "learning_rate": 4.8949421993904706e-05, "loss": 0.6913, "step": 452900 }, { "epoch": 4.62, "learning_rate": 4.894427879493655e-05, "loss": 0.608, "step": 453000 }, { "epoch": 4.62, "learning_rate": 4.8939134669706214e-05, "loss": 0.7368, "step": 453100 }, { "epoch": 4.62, "learning_rate": 4.893398961846521e-05, "loss": 0.7245, "step": 453200 }, { "epoch": 4.62, "learning_rate": 4.892884364146511e-05, "loss": 0.5872, "step": 453300 }, { "epoch": 4.62, "learning_rate": 4.8923696738957516e-05, "loss": 0.6046, "step": 453400 }, { "epoch": 4.62, "learning_rate": 4.891854891119408e-05, "loss": 0.6054, "step": 453500 }, { "epoch": 4.62, "learning_rate": 4.891340015842651e-05, "loss": 0.7208, "step": 453600 }, { "epoch": 4.62, "learning_rate": 4.890825048090653e-05, "loss": 0.711, "step": 453700 }, { "epoch": 4.62, "learning_rate": 4.890309987888595e-05, "loss": 0.7249, "step": 453800 }, { "epoch": 4.62, "learning_rate": 4.889794835261659e-05, "loss": 0.6263, "step": 453900 }, { "epoch": 4.63, "learning_rate": 4.889279590235033e-05, "loss": 0.58, "step": 454000 }, { "epoch": 4.63, "learning_rate": 4.88876425283391e-05, "loss": 0.666, "step": 454100 }, { "epoch": 4.63, "learning_rate": 4.888248823083487e-05, "loss": 0.6199, "step": 454200 }, { "epoch": 4.63, "learning_rate": 4.8877333010089644e-05, "loss": 0.5923, "step": 454300 }, { "epoch": 4.63, "learning_rate": 4.8872176866355494e-05, "loss": 0.8148, "step": 454400 }, { "epoch": 4.63, "learning_rate": 4.8867019799884515e-05, "loss": 0.6852, "step": 454500 }, { "epoch": 4.63, "learning_rate": 4.886186181092885e-05, "loss": 0.726, "step": 454600 }, { "epoch": 4.63, "learning_rate": 4.885670289974072e-05, "loss": 0.7535, "step": 454700 }, { "epoch": 4.63, "learning_rate": 4.885154306657234e-05, "loss": 0.6558, "step": 454800 }, { "epoch": 4.63, "learning_rate": 4.8846382311676016e-05, "loss": 0.7754, "step": 454900 }, { "epoch": 4.64, "learning_rate": 4.884122063530405e-05, "loss": 0.6463, "step": 455000 }, { "epoch": 4.64, "learning_rate": 4.8836058037708846e-05, "loss": 0.6451, "step": 455100 }, { "epoch": 4.64, "learning_rate": 4.8830894519142804e-05, "loss": 0.5607, "step": 455200 }, { "epoch": 4.64, "learning_rate": 4.882573007985839e-05, "loss": 0.6176, "step": 455300 }, { "epoch": 4.64, "learning_rate": 4.8820564720108136e-05, "loss": 0.6714, "step": 455400 }, { "epoch": 4.64, "learning_rate": 4.8815398440144586e-05, "loss": 0.6445, "step": 455500 }, { "epoch": 4.64, "learning_rate": 4.8810231240220326e-05, "loss": 0.6231, "step": 455600 }, { "epoch": 4.64, "learning_rate": 4.880506312058801e-05, "loss": 0.7309, "step": 455700 }, { "epoch": 4.64, "learning_rate": 4.879989408150033e-05, "loss": 0.7339, "step": 455800 }, { "epoch": 4.64, "learning_rate": 4.879472412321003e-05, "loss": 0.6953, "step": 455900 }, { "epoch": 4.65, "learning_rate": 4.878955324596988e-05, "loss": 0.6229, "step": 456000 }, { "epoch": 4.65, "learning_rate": 4.878443317253879e-05, "loss": 0.7008, "step": 456100 }, { "epoch": 4.65, "learning_rate": 4.877926046734065e-05, "loss": 0.728, "step": 456200 }, { "epoch": 4.65, "learning_rate": 4.877408684394874e-05, "loss": 0.7246, "step": 456300 }, { "epoch": 4.65, "learning_rate": 4.8768912302616025e-05, "loss": 0.6537, "step": 456400 }, { "epoch": 4.65, "learning_rate": 4.8763736843595505e-05, "loss": 0.7256, "step": 456500 }, { "epoch": 4.65, "learning_rate": 4.875856046714022e-05, "loss": 0.7296, "step": 456600 }, { "epoch": 4.65, "learning_rate": 4.875338317350328e-05, "loss": 0.673, "step": 456700 }, { "epoch": 4.65, "learning_rate": 4.8748204962937825e-05, "loss": 0.6348, "step": 456800 }, { "epoch": 4.65, "learning_rate": 4.8743025835697016e-05, "loss": 0.713, "step": 456900 }, { "epoch": 4.66, "learning_rate": 4.8737845792034104e-05, "loss": 0.5801, "step": 457000 }, { "epoch": 4.66, "learning_rate": 4.873266483220236e-05, "loss": 0.6052, "step": 457100 }, { "epoch": 4.66, "learning_rate": 4.8727482956455084e-05, "loss": 0.5799, "step": 457200 }, { "epoch": 4.66, "learning_rate": 4.8722300165045663e-05, "loss": 0.6046, "step": 457300 }, { "epoch": 4.66, "learning_rate": 4.871711645822749e-05, "loss": 0.7177, "step": 457400 }, { "epoch": 4.66, "learning_rate": 4.8711931836254014e-05, "loss": 0.7043, "step": 457500 }, { "epoch": 4.66, "learning_rate": 4.870674629937874e-05, "loss": 0.637, "step": 457600 }, { "epoch": 4.66, "learning_rate": 4.870155984785521e-05, "loss": 0.6485, "step": 457700 }, { "epoch": 4.66, "learning_rate": 4.869637248193701e-05, "loss": 0.7934, "step": 457800 }, { "epoch": 4.67, "learning_rate": 4.869118420187776e-05, "loss": 0.6673, "step": 457900 }, { "epoch": 4.67, "learning_rate": 4.8685995007931156e-05, "loss": 0.6962, "step": 458000 }, { "epoch": 4.67, "learning_rate": 4.8680804900350914e-05, "loss": 0.726, "step": 458100 }, { "epoch": 4.67, "learning_rate": 4.867561387939078e-05, "loss": 0.6348, "step": 458200 }, { "epoch": 4.67, "learning_rate": 4.867042194530458e-05, "loss": 0.6547, "step": 458300 }, { "epoch": 4.67, "learning_rate": 4.866522909834618e-05, "loss": 0.7325, "step": 458400 }, { "epoch": 4.67, "learning_rate": 4.866003533876944e-05, "loss": 0.6568, "step": 458500 }, { "epoch": 4.67, "learning_rate": 4.865484066682835e-05, "loss": 0.605, "step": 458600 }, { "epoch": 4.67, "learning_rate": 4.8649645082776876e-05, "loss": 0.7516, "step": 458700 }, { "epoch": 4.67, "learning_rate": 4.8644448586869043e-05, "loss": 0.6796, "step": 458800 }, { "epoch": 4.68, "learning_rate": 4.863925117935895e-05, "loss": 0.599, "step": 458900 }, { "epoch": 4.68, "learning_rate": 4.8634052860500694e-05, "loss": 0.7571, "step": 459000 }, { "epoch": 4.68, "learning_rate": 4.862885363054846e-05, "loss": 0.6359, "step": 459100 }, { "epoch": 4.68, "learning_rate": 4.862365348975646e-05, "loss": 0.6363, "step": 459200 }, { "epoch": 4.68, "learning_rate": 4.861845243837894e-05, "loss": 0.6438, "step": 459300 }, { "epoch": 4.68, "learning_rate": 4.861325047667021e-05, "loss": 0.6069, "step": 459400 }, { "epoch": 4.68, "learning_rate": 4.8608047604884606e-05, "loss": 0.7083, "step": 459500 }, { "epoch": 4.68, "learning_rate": 4.860284382327652e-05, "loss": 0.7087, "step": 459600 }, { "epoch": 4.68, "learning_rate": 4.859763913210039e-05, "loss": 0.6578, "step": 459700 }, { "epoch": 4.68, "learning_rate": 4.859243353161069e-05, "loss": 0.7035, "step": 459800 }, { "epoch": 4.69, "learning_rate": 4.858722702206194e-05, "loss": 0.6706, "step": 459900 }, { "epoch": 4.69, "learning_rate": 4.8582019603708715e-05, "loss": 0.608, "step": 460000 }, { "epoch": 4.69, "learning_rate": 4.857681127680562e-05, "loss": 0.5823, "step": 460100 }, { "epoch": 4.69, "learning_rate": 4.857160204160732e-05, "loss": 0.6734, "step": 460200 }, { "epoch": 4.69, "learning_rate": 4.856639189836851e-05, "loss": 0.6129, "step": 460300 }, { "epoch": 4.69, "learning_rate": 4.856118084734393e-05, "loss": 0.6144, "step": 460400 }, { "epoch": 4.69, "learning_rate": 4.855596888878838e-05, "loss": 0.58, "step": 460500 }, { "epoch": 4.69, "learning_rate": 4.855075602295668e-05, "loss": 0.7306, "step": 460600 }, { "epoch": 4.69, "learning_rate": 4.8545542250103716e-05, "loss": 0.6818, "step": 460700 }, { "epoch": 4.69, "learning_rate": 4.8540327570484416e-05, "loss": 0.6426, "step": 460800 }, { "epoch": 4.7, "learning_rate": 4.853511198435374e-05, "loss": 0.7697, "step": 460900 }, { "epoch": 4.7, "learning_rate": 4.8529895491966694e-05, "loss": 0.6117, "step": 461000 }, { "epoch": 4.7, "learning_rate": 4.852467809357834e-05, "loss": 0.6062, "step": 461100 }, { "epoch": 4.7, "learning_rate": 4.8519459789443777e-05, "loss": 0.6913, "step": 461200 }, { "epoch": 4.7, "learning_rate": 4.8514240579818155e-05, "loss": 0.5542, "step": 461300 }, { "epoch": 4.7, "learning_rate": 4.850902046495665e-05, "loss": 0.7515, "step": 461400 }, { "epoch": 4.7, "learning_rate": 4.8503799445114505e-05, "loss": 0.6467, "step": 461500 }, { "epoch": 4.7, "learning_rate": 4.8498577520546985e-05, "loss": 0.7683, "step": 461600 }, { "epoch": 4.7, "learning_rate": 4.8493354691509424e-05, "loss": 0.6353, "step": 461700 }, { "epoch": 4.7, "learning_rate": 4.848813095825718e-05, "loss": 0.6467, "step": 461800 }, { "epoch": 4.71, "learning_rate": 4.8482906321045666e-05, "loss": 0.6308, "step": 461900 }, { "epoch": 4.71, "learning_rate": 4.847768078013032e-05, "loss": 0.7193, "step": 462000 }, { "epoch": 4.71, "learning_rate": 4.847245433576667e-05, "loss": 0.6654, "step": 462100 }, { "epoch": 4.71, "learning_rate": 4.846722698821023e-05, "loss": 0.6543, "step": 462200 }, { "epoch": 4.71, "learning_rate": 4.84619987377166e-05, "loss": 0.6524, "step": 462300 }, { "epoch": 4.71, "learning_rate": 4.845676958454141e-05, "loss": 0.6761, "step": 462400 }, { "epoch": 4.71, "learning_rate": 4.8451539528940324e-05, "loss": 0.5824, "step": 462500 }, { "epoch": 4.71, "learning_rate": 4.84463608852117e-05, "loss": 0.6291, "step": 462600 }, { "epoch": 4.71, "learning_rate": 4.844118135751424e-05, "loss": 0.7199, "step": 462700 }, { "epoch": 4.72, "learning_rate": 4.8435948614200645e-05, "loss": 0.628, "step": 462800 }, { "epoch": 4.72, "learning_rate": 4.8430714969479185e-05, "loss": 0.605, "step": 462900 }, { "epoch": 4.72, "learning_rate": 4.842548042360575e-05, "loss": 0.6613, "step": 463000 }, { "epoch": 4.72, "learning_rate": 4.842024497683628e-05, "loss": 0.6611, "step": 463100 }, { "epoch": 4.72, "learning_rate": 4.841500862942676e-05, "loss": 0.6552, "step": 463200 }, { "epoch": 4.72, "learning_rate": 4.84097713816332e-05, "loss": 0.6531, "step": 463300 }, { "epoch": 4.72, "learning_rate": 4.84045332337117e-05, "loss": 0.7, "step": 463400 }, { "epoch": 4.72, "learning_rate": 4.839929418591835e-05, "loss": 0.7042, "step": 463500 }, { "epoch": 4.72, "learning_rate": 4.839405423850931e-05, "loss": 0.6734, "step": 463600 }, { "epoch": 4.72, "learning_rate": 4.83888133917408e-05, "loss": 0.6635, "step": 463700 }, { "epoch": 4.73, "learning_rate": 4.838357164586905e-05, "loss": 0.6787, "step": 463800 }, { "epoch": 4.73, "learning_rate": 4.8378329001150355e-05, "loss": 0.6788, "step": 463900 }, { "epoch": 4.73, "learning_rate": 4.837308545784104e-05, "loss": 0.7333, "step": 464000 }, { "epoch": 4.73, "learning_rate": 4.836784101619749e-05, "loss": 0.6748, "step": 464100 }, { "epoch": 4.73, "learning_rate": 4.836259567647614e-05, "loss": 0.6543, "step": 464200 }, { "epoch": 4.73, "learning_rate": 4.835734943893344e-05, "loss": 0.6279, "step": 464300 }, { "epoch": 4.73, "learning_rate": 4.835210230382589e-05, "loss": 0.6538, "step": 464400 }, { "epoch": 4.73, "learning_rate": 4.8346854271410075e-05, "loss": 0.6638, "step": 464500 }, { "epoch": 4.73, "learning_rate": 4.834160534194257e-05, "loss": 0.6734, "step": 464600 }, { "epoch": 4.73, "learning_rate": 4.833635551568002e-05, "loss": 0.7441, "step": 464700 }, { "epoch": 4.74, "learning_rate": 4.833110479287911e-05, "loss": 0.6416, "step": 464800 }, { "epoch": 4.74, "learning_rate": 4.832585317379656e-05, "loss": 0.7301, "step": 464900 }, { "epoch": 4.74, "learning_rate": 4.832060065868916e-05, "loss": 0.6981, "step": 465000 }, { "epoch": 4.74, "learning_rate": 4.831534724781373e-05, "loss": 0.7026, "step": 465100 }, { "epoch": 4.74, "learning_rate": 4.831009294142711e-05, "loss": 0.6898, "step": 465200 }, { "epoch": 4.74, "learning_rate": 4.830483773978621e-05, "loss": 0.7516, "step": 465300 }, { "epoch": 4.74, "learning_rate": 4.829958164314799e-05, "loss": 0.6559, "step": 465400 }, { "epoch": 4.74, "learning_rate": 4.8294324651769425e-05, "loss": 0.5516, "step": 465500 }, { "epoch": 4.74, "learning_rate": 4.8289066765907566e-05, "loss": 0.6529, "step": 465600 }, { "epoch": 4.74, "learning_rate": 4.8283807985819485e-05, "loss": 0.7609, "step": 465700 }, { "epoch": 4.75, "learning_rate": 4.827854831176231e-05, "loss": 0.6185, "step": 465800 }, { "epoch": 4.75, "learning_rate": 4.827334035409392e-05, "loss": 0.6474, "step": 465900 }, { "epoch": 4.75, "learning_rate": 4.826807890180336e-05, "loss": 0.7141, "step": 466000 }, { "epoch": 4.75, "learning_rate": 4.826281655631276e-05, "loss": 0.6863, "step": 466100 }, { "epoch": 4.75, "learning_rate": 4.825755331787942e-05, "loss": 0.7209, "step": 466200 }, { "epoch": 4.75, "learning_rate": 4.825228918676068e-05, "loss": 0.7162, "step": 466300 }, { "epoch": 4.75, "learning_rate": 4.824702416321391e-05, "loss": 0.7492, "step": 466400 }, { "epoch": 4.75, "learning_rate": 4.8241758247496564e-05, "loss": 0.5179, "step": 466500 }, { "epoch": 4.75, "learning_rate": 4.8236491439866093e-05, "loss": 0.682, "step": 466600 }, { "epoch": 4.75, "learning_rate": 4.823122374058003e-05, "loss": 0.6177, "step": 466700 }, { "epoch": 4.76, "learning_rate": 4.822595514989591e-05, "loss": 0.6371, "step": 466800 }, { "epoch": 4.76, "learning_rate": 4.822068566807136e-05, "loss": 0.5733, "step": 466900 }, { "epoch": 4.76, "learning_rate": 4.821541529536402e-05, "loss": 0.631, "step": 467000 }, { "epoch": 4.76, "learning_rate": 4.821014403203156e-05, "loss": 0.6588, "step": 467100 }, { "epoch": 4.76, "learning_rate": 4.820487187833175e-05, "loss": 0.6983, "step": 467200 }, { "epoch": 4.76, "learning_rate": 4.819959883452233e-05, "loss": 0.6345, "step": 467300 }, { "epoch": 4.76, "learning_rate": 4.8194324900861145e-05, "loss": 0.673, "step": 467400 }, { "epoch": 4.76, "learning_rate": 4.8189102830241245e-05, "loss": 0.6055, "step": 467500 }, { "epoch": 4.76, "learning_rate": 4.818382712654222e-05, "loss": 0.6946, "step": 467600 }, { "epoch": 4.77, "learning_rate": 4.817855053376258e-05, "loss": 0.6567, "step": 467700 }, { "epoch": 4.77, "learning_rate": 4.81732730521603e-05, "loss": 0.6913, "step": 467800 }, { "epoch": 4.77, "learning_rate": 4.816799468199341e-05, "loss": 0.7095, "step": 467900 }, { "epoch": 4.77, "learning_rate": 4.816271542352001e-05, "loss": 0.6767, "step": 468000 }, { "epoch": 4.77, "learning_rate": 4.8157435276998215e-05, "loss": 0.6345, "step": 468100 }, { "epoch": 4.77, "learning_rate": 4.8152154242686185e-05, "loss": 0.6577, "step": 468200 }, { "epoch": 4.77, "learning_rate": 4.814687232084215e-05, "loss": 0.653, "step": 468300 }, { "epoch": 4.77, "learning_rate": 4.814158951172434e-05, "loss": 0.5682, "step": 468400 }, { "epoch": 4.77, "learning_rate": 4.813630581559107e-05, "loss": 0.7012, "step": 468500 }, { "epoch": 4.77, "learning_rate": 4.813102123270068e-05, "loss": 0.6532, "step": 468600 }, { "epoch": 4.78, "learning_rate": 4.8125735763311555e-05, "loss": 0.702, "step": 468700 }, { "epoch": 4.78, "learning_rate": 4.8120449407682116e-05, "loss": 0.6736, "step": 468800 }, { "epoch": 4.78, "learning_rate": 4.811516216607084e-05, "loss": 0.6852, "step": 468900 }, { "epoch": 4.78, "learning_rate": 4.810987403873624e-05, "loss": 0.6739, "step": 469000 }, { "epoch": 4.78, "learning_rate": 4.810458502593687e-05, "loss": 0.6579, "step": 469100 }, { "epoch": 4.78, "learning_rate": 4.8099295127931345e-05, "loss": 0.6383, "step": 469200 }, { "epoch": 4.78, "learning_rate": 4.809400434497829e-05, "loss": 0.6571, "step": 469300 }, { "epoch": 4.78, "learning_rate": 4.808871267733641e-05, "loss": 0.6376, "step": 469400 }, { "epoch": 4.78, "learning_rate": 4.808342012526442e-05, "loss": 0.7185, "step": 469500 }, { "epoch": 4.78, "learning_rate": 4.80781266890211e-05, "loss": 0.6646, "step": 469600 }, { "epoch": 4.79, "learning_rate": 4.807283236886527e-05, "loss": 0.7991, "step": 469700 }, { "epoch": 4.79, "learning_rate": 4.8067537165055796e-05, "loss": 0.6664, "step": 469800 }, { "epoch": 4.79, "learning_rate": 4.806224107785158e-05, "loss": 0.6859, "step": 469900 }, { "epoch": 4.79, "learning_rate": 4.805694410751155e-05, "loss": 0.6428, "step": 470000 }, { "epoch": 4.79, "learning_rate": 4.805169923719628e-05, "loss": 0.7105, "step": 470100 }, { "epoch": 4.79, "learning_rate": 4.8046400510186573e-05, "loss": 0.6545, "step": 470200 }, { "epoch": 4.79, "learning_rate": 4.804110090081558e-05, "loss": 0.6723, "step": 470300 }, { "epoch": 4.79, "learning_rate": 4.8035800409342406e-05, "loss": 0.6281, "step": 470400 }, { "epoch": 4.79, "learning_rate": 4.803049903602622e-05, "loss": 0.7299, "step": 470500 }, { "epoch": 4.79, "learning_rate": 4.802524980803824e-05, "loss": 0.7251, "step": 470600 }, { "epoch": 4.8, "learning_rate": 4.8019946680625664e-05, "loss": 0.718, "step": 470700 }, { "epoch": 4.8, "learning_rate": 4.8014642672145236e-05, "loss": 0.6282, "step": 470800 }, { "epoch": 4.8, "learning_rate": 4.8009337782856286e-05, "loss": 0.7017, "step": 470900 }, { "epoch": 4.8, "learning_rate": 4.8004032013018196e-05, "loss": 0.7218, "step": 471000 }, { "epoch": 4.8, "learning_rate": 4.799872536289038e-05, "loss": 0.5841, "step": 471100 }, { "epoch": 4.8, "learning_rate": 4.799341783273231e-05, "loss": 0.6282, "step": 471200 }, { "epoch": 4.8, "learning_rate": 4.7988109422803475e-05, "loss": 0.7199, "step": 471300 }, { "epoch": 4.8, "learning_rate": 4.798280013336345e-05, "loss": 0.7139, "step": 471400 }, { "epoch": 4.8, "learning_rate": 4.7977489964671806e-05, "loss": 0.6759, "step": 471500 }, { "epoch": 4.8, "learning_rate": 4.797217891698819e-05, "loss": 0.6639, "step": 471600 }, { "epoch": 4.81, "learning_rate": 4.7966866990572286e-05, "loss": 0.7671, "step": 471700 }, { "epoch": 4.81, "learning_rate": 4.796155418568379e-05, "loss": 0.7142, "step": 471800 }, { "epoch": 4.81, "learning_rate": 4.795624050258249e-05, "loss": 0.7303, "step": 471900 }, { "epoch": 4.81, "learning_rate": 4.79509259415282e-05, "loss": 0.7087, "step": 472000 }, { "epoch": 4.81, "learning_rate": 4.794561050278073e-05, "loss": 0.6714, "step": 472100 }, { "epoch": 4.81, "learning_rate": 4.7940294186600026e-05, "loss": 0.6853, "step": 472200 }, { "epoch": 4.81, "learning_rate": 4.793497699324598e-05, "loss": 0.6369, "step": 472300 }, { "epoch": 4.81, "learning_rate": 4.79296589229786e-05, "loss": 0.6476, "step": 472400 }, { "epoch": 4.81, "learning_rate": 4.792433997605789e-05, "loss": 0.6402, "step": 472500 }, { "epoch": 4.81, "learning_rate": 4.791902015274393e-05, "loss": 0.6429, "step": 472600 }, { "epoch": 4.82, "learning_rate": 4.7913699453296816e-05, "loss": 0.6589, "step": 472700 }, { "epoch": 4.82, "learning_rate": 4.7908377877976695e-05, "loss": 0.6191, "step": 472800 }, { "epoch": 4.82, "learning_rate": 4.790310865588653e-05, "loss": 0.6958, "step": 472900 }, { "epoch": 4.82, "learning_rate": 4.789778533835329e-05, "loss": 0.5885, "step": 473000 }, { "epoch": 4.82, "learning_rate": 4.7892461145725137e-05, "loss": 0.643, "step": 473100 }, { "epoch": 4.82, "learning_rate": 4.788713607826241e-05, "loss": 0.6939, "step": 473200 }, { "epoch": 4.82, "learning_rate": 4.788181013622549e-05, "loss": 0.6922, "step": 473300 }, { "epoch": 4.82, "learning_rate": 4.7876483319874744e-05, "loss": 0.5744, "step": 473400 }, { "epoch": 4.82, "learning_rate": 4.787115562947066e-05, "loss": 0.7081, "step": 473500 }, { "epoch": 4.83, "learning_rate": 4.786582706527371e-05, "loss": 0.6653, "step": 473600 }, { "epoch": 4.83, "learning_rate": 4.7860497627544436e-05, "loss": 0.6459, "step": 473700 }, { "epoch": 4.83, "learning_rate": 4.785516731654342e-05, "loss": 0.6879, "step": 473800 }, { "epoch": 4.83, "learning_rate": 4.784983613253126e-05, "loss": 0.6847, "step": 473900 }, { "epoch": 4.83, "learning_rate": 4.784450407576865e-05, "loss": 0.6414, "step": 474000 }, { "epoch": 4.83, "learning_rate": 4.783917114651628e-05, "loss": 0.7259, "step": 474100 }, { "epoch": 4.83, "learning_rate": 4.78338373450349e-05, "loss": 0.6421, "step": 474200 }, { "epoch": 4.83, "learning_rate": 4.78285026715853e-05, "loss": 0.672, "step": 474300 }, { "epoch": 4.83, "learning_rate": 4.78231671264283e-05, "loss": 0.6649, "step": 474400 }, { "epoch": 4.83, "learning_rate": 4.781783070982481e-05, "loss": 0.6962, "step": 474500 }, { "epoch": 4.84, "learning_rate": 4.781249342203572e-05, "loss": 0.6613, "step": 474600 }, { "epoch": 4.84, "learning_rate": 4.780715526332201e-05, "loss": 0.5756, "step": 474700 }, { "epoch": 4.84, "learning_rate": 4.7801816233944656e-05, "loss": 0.5518, "step": 474800 }, { "epoch": 4.84, "learning_rate": 4.779647633416474e-05, "loss": 0.718, "step": 474900 }, { "epoch": 4.84, "learning_rate": 4.7791135564243333e-05, "loss": 0.6741, "step": 475000 }, { "epoch": 4.84, "learning_rate": 4.778579392444156e-05, "loss": 0.5952, "step": 475100 }, { "epoch": 4.84, "learning_rate": 4.778045141502062e-05, "loss": 0.6397, "step": 475200 }, { "epoch": 4.84, "learning_rate": 4.777510803624169e-05, "loss": 0.6106, "step": 475300 }, { "epoch": 4.84, "learning_rate": 4.776976378836607e-05, "loss": 0.7089, "step": 475400 }, { "epoch": 4.84, "learning_rate": 4.7764418671655056e-05, "loss": 0.6893, "step": 475500 }, { "epoch": 4.85, "learning_rate": 4.775907268636996e-05, "loss": 0.6144, "step": 475600 }, { "epoch": 4.85, "learning_rate": 4.7753725832772204e-05, "loss": 0.7305, "step": 475700 }, { "epoch": 4.85, "learning_rate": 4.774837811112319e-05, "loss": 0.6723, "step": 475800 }, { "epoch": 4.85, "learning_rate": 4.7743029521684414e-05, "loss": 0.7739, "step": 475900 }, { "epoch": 4.85, "learning_rate": 4.773768006471738e-05, "loss": 0.6641, "step": 476000 }, { "epoch": 4.85, "learning_rate": 4.7732329740483646e-05, "loss": 0.7364, "step": 476100 }, { "epoch": 4.85, "learning_rate": 4.7726978549244805e-05, "loss": 0.5834, "step": 476200 }, { "epoch": 4.85, "learning_rate": 4.772162649126249e-05, "loss": 0.6768, "step": 476300 }, { "epoch": 4.85, "learning_rate": 4.771627356679841e-05, "loss": 0.7498, "step": 476400 }, { "epoch": 4.85, "learning_rate": 4.771091977611428e-05, "loss": 0.6843, "step": 476500 }, { "epoch": 4.86, "learning_rate": 4.770556511947186e-05, "loss": 0.5953, "step": 476600 }, { "epoch": 4.86, "learning_rate": 4.7700209597132965e-05, "loss": 0.6419, "step": 476700 }, { "epoch": 4.86, "learning_rate": 4.7694853209359454e-05, "loss": 0.6724, "step": 476800 }, { "epoch": 4.86, "learning_rate": 4.7689495956413216e-05, "loss": 0.6993, "step": 476900 }, { "epoch": 4.86, "learning_rate": 4.7684137838556195e-05, "loss": 0.6677, "step": 477000 }, { "epoch": 4.86, "learning_rate": 4.7678778856050367e-05, "loss": 0.7117, "step": 477100 }, { "epoch": 4.86, "learning_rate": 4.767341900915775e-05, "loss": 0.5953, "step": 477200 }, { "epoch": 4.86, "learning_rate": 4.766805829814041e-05, "loss": 0.7442, "step": 477300 }, { "epoch": 4.86, "learning_rate": 4.7662696723260464e-05, "loss": 0.6792, "step": 477400 }, { "epoch": 4.86, "learning_rate": 4.7657334284780054e-05, "loss": 0.683, "step": 477500 }, { "epoch": 4.87, "learning_rate": 4.765197098296136e-05, "loss": 0.6616, "step": 477600 }, { "epoch": 4.87, "learning_rate": 4.764660681806664e-05, "loss": 0.6808, "step": 477700 }, { "epoch": 4.87, "learning_rate": 4.7641241790358155e-05, "loss": 0.6228, "step": 477800 }, { "epoch": 4.87, "learning_rate": 4.763592956326958e-05, "loss": 0.6211, "step": 477900 }, { "epoch": 4.87, "learning_rate": 4.7630562819342146e-05, "loss": 0.6092, "step": 478000 }, { "epoch": 4.87, "learning_rate": 4.762519521338541e-05, "loss": 0.6329, "step": 478100 }, { "epoch": 4.87, "learning_rate": 4.761982674566181e-05, "loss": 0.7091, "step": 478200 }, { "epoch": 4.87, "learning_rate": 4.761445741643383e-05, "loss": 0.7075, "step": 478300 }, { "epoch": 4.87, "learning_rate": 4.7609087225964005e-05, "loss": 0.6638, "step": 478400 }, { "epoch": 4.88, "learning_rate": 4.760371617451489e-05, "loss": 0.6205, "step": 478500 }, { "epoch": 4.88, "learning_rate": 4.759834426234912e-05, "loss": 0.5948, "step": 478600 }, { "epoch": 4.88, "learning_rate": 4.759297148972933e-05, "loss": 0.6264, "step": 478700 }, { "epoch": 4.88, "learning_rate": 4.758759785691821e-05, "loss": 0.6077, "step": 478800 }, { "epoch": 4.88, "learning_rate": 4.758222336417854e-05, "loss": 0.5524, "step": 478900 }, { "epoch": 4.88, "learning_rate": 4.757684801177305e-05, "loss": 0.674, "step": 479000 }, { "epoch": 4.88, "learning_rate": 4.757147179996459e-05, "loss": 0.7099, "step": 479100 }, { "epoch": 4.88, "learning_rate": 4.756609472901603e-05, "loss": 0.6849, "step": 479200 }, { "epoch": 4.88, "learning_rate": 4.7560716799190254e-05, "loss": 0.6719, "step": 479300 }, { "epoch": 4.88, "learning_rate": 4.7555338010750236e-05, "loss": 0.6419, "step": 479400 }, { "epoch": 4.89, "learning_rate": 4.7549958363958947e-05, "loss": 0.6565, "step": 479500 }, { "epoch": 4.89, "learning_rate": 4.7544577859079425e-05, "loss": 0.7481, "step": 479600 }, { "epoch": 4.89, "learning_rate": 4.7539196496374754e-05, "loss": 0.6183, "step": 479700 }, { "epoch": 4.89, "learning_rate": 4.753381427610803e-05, "loss": 0.6719, "step": 479800 }, { "epoch": 4.89, "learning_rate": 4.752843119854244e-05, "loss": 0.6301, "step": 479900 }, { "epoch": 4.89, "learning_rate": 4.7523101107528625e-05, "loss": 0.7285, "step": 480000 }, { "epoch": 4.89, "learning_rate": 4.751771632472133e-05, "loss": 0.6604, "step": 480100 }, { "epoch": 4.89, "learning_rate": 4.751233068540224e-05, "loss": 0.7168, "step": 480200 }, { "epoch": 4.89, "learning_rate": 4.75069441898347e-05, "loss": 0.6833, "step": 480300 }, { "epoch": 4.89, "learning_rate": 4.750155683828206e-05, "loss": 0.6717, "step": 480400 }, { "epoch": 4.9, "learning_rate": 4.7496168631007725e-05, "loss": 0.7384, "step": 480500 }, { "epoch": 4.9, "learning_rate": 4.7490779568275155e-05, "loss": 0.6648, "step": 480600 }, { "epoch": 4.9, "learning_rate": 4.748538965034785e-05, "loss": 0.6064, "step": 480700 }, { "epoch": 4.9, "learning_rate": 4.7479998877489346e-05, "loss": 0.6804, "step": 480800 }, { "epoch": 4.9, "learning_rate": 4.7474607249963204e-05, "loss": 0.6584, "step": 480900 }, { "epoch": 4.9, "learning_rate": 4.746921476803305e-05, "loss": 0.5439, "step": 481000 }, { "epoch": 4.9, "learning_rate": 4.7463821431962546e-05, "loss": 0.6715, "step": 481100 }, { "epoch": 4.9, "learning_rate": 4.745842724201539e-05, "loss": 0.5925, "step": 481200 }, { "epoch": 4.9, "learning_rate": 4.7453032198455336e-05, "loss": 0.5991, "step": 481300 }, { "epoch": 4.9, "learning_rate": 4.744763630154615e-05, "loss": 0.6417, "step": 481400 }, { "epoch": 4.91, "learning_rate": 4.744223955155168e-05, "loss": 0.664, "step": 481500 }, { "epoch": 4.91, "learning_rate": 4.7436841948735785e-05, "loss": 0.6533, "step": 481600 }, { "epoch": 4.91, "learning_rate": 4.7431443493362376e-05, "loss": 0.7175, "step": 481700 }, { "epoch": 4.91, "learning_rate": 4.74260441856954e-05, "loss": 0.6662, "step": 481800 }, { "epoch": 4.91, "learning_rate": 4.742064402599887e-05, "loss": 0.6124, "step": 481900 }, { "epoch": 4.91, "learning_rate": 4.74152430145368e-05, "loss": 0.5611, "step": 482000 }, { "epoch": 4.91, "learning_rate": 4.740984115157327e-05, "loss": 0.6502, "step": 482100 }, { "epoch": 4.91, "learning_rate": 4.740443843737241e-05, "loss": 0.6868, "step": 482200 }, { "epoch": 4.91, "learning_rate": 4.739908891206156e-05, "loss": 0.6171, "step": 482300 }, { "epoch": 4.91, "learning_rate": 4.739368450468433e-05, "loss": 0.7234, "step": 482400 }, { "epoch": 4.92, "learning_rate": 4.738827924685973e-05, "loss": 0.5608, "step": 482500 }, { "epoch": 4.92, "learning_rate": 4.738287313885204e-05, "loss": 0.7062, "step": 482600 }, { "epoch": 4.92, "learning_rate": 4.737746618092559e-05, "loss": 0.7252, "step": 482700 }, { "epoch": 4.92, "learning_rate": 4.737205837334475e-05, "loss": 0.5987, "step": 482800 }, { "epoch": 4.92, "learning_rate": 4.736664971637393e-05, "loss": 0.5758, "step": 482900 }, { "epoch": 4.92, "learning_rate": 4.736124021027756e-05, "loss": 0.7343, "step": 483000 }, { "epoch": 4.92, "learning_rate": 4.735582985532017e-05, "loss": 0.7125, "step": 483100 }, { "epoch": 4.92, "learning_rate": 4.735041865176627e-05, "loss": 0.6986, "step": 483200 }, { "epoch": 4.92, "learning_rate": 4.7345006599880434e-05, "loss": 0.6769, "step": 483300 }, { "epoch": 4.92, "learning_rate": 4.733959369992729e-05, "loss": 0.6472, "step": 483400 }, { "epoch": 4.93, "learning_rate": 4.7334179952171495e-05, "loss": 0.6563, "step": 483500 }, { "epoch": 4.93, "learning_rate": 4.732876535687773e-05, "loss": 0.6527, "step": 483600 }, { "epoch": 4.93, "learning_rate": 4.732334991431076e-05, "loss": 0.5603, "step": 483700 }, { "epoch": 4.93, "learning_rate": 4.7317933624735364e-05, "loss": 0.6313, "step": 483800 }, { "epoch": 4.93, "learning_rate": 4.731251648841636e-05, "loss": 0.727, "step": 483900 }, { "epoch": 4.93, "learning_rate": 4.730709850561861e-05, "loss": 0.649, "step": 484000 }, { "epoch": 4.93, "learning_rate": 4.730167967660703e-05, "loss": 0.6101, "step": 484100 }, { "epoch": 4.93, "learning_rate": 4.7296260001646565e-05, "loss": 0.6724, "step": 484200 }, { "epoch": 4.93, "learning_rate": 4.7290839481002206e-05, "loss": 0.5981, "step": 484300 }, { "epoch": 4.94, "learning_rate": 4.728541811493898e-05, "loss": 0.765, "step": 484400 }, { "epoch": 4.94, "learning_rate": 4.727999590372196e-05, "loss": 0.7483, "step": 484500 }, { "epoch": 4.94, "learning_rate": 4.727457284761627e-05, "loss": 0.7394, "step": 484600 }, { "epoch": 4.94, "learning_rate": 4.726914894688705e-05, "loss": 0.6585, "step": 484700 }, { "epoch": 4.94, "learning_rate": 4.726372420179951e-05, "loss": 0.6969, "step": 484800 }, { "epoch": 4.94, "learning_rate": 4.725829861261888e-05, "loss": 0.7463, "step": 484900 }, { "epoch": 4.94, "learning_rate": 4.7252872179610436e-05, "loss": 0.596, "step": 485000 }, { "epoch": 4.94, "learning_rate": 4.72474449030395e-05, "loss": 0.6844, "step": 485100 }, { "epoch": 4.94, "learning_rate": 4.724201678317144e-05, "loss": 0.6285, "step": 485200 }, { "epoch": 4.94, "learning_rate": 4.723658782027165e-05, "loss": 0.6742, "step": 485300 }, { "epoch": 4.95, "learning_rate": 4.723115801460558e-05, "loss": 0.585, "step": 485400 }, { "epoch": 4.95, "learning_rate": 4.7225727366438716e-05, "loss": 0.6094, "step": 485500 }, { "epoch": 4.95, "learning_rate": 4.7220295876036574e-05, "loss": 0.6066, "step": 485600 }, { "epoch": 4.95, "learning_rate": 4.7214863543664736e-05, "loss": 0.6703, "step": 485700 }, { "epoch": 4.95, "learning_rate": 4.720943036958881e-05, "loss": 0.6886, "step": 485800 }, { "epoch": 4.95, "learning_rate": 4.720399635407443e-05, "loss": 0.6401, "step": 485900 }, { "epoch": 4.95, "learning_rate": 4.7198561497387293e-05, "loss": 0.7454, "step": 486000 }, { "epoch": 4.95, "learning_rate": 4.719312579979315e-05, "loss": 0.6822, "step": 486100 }, { "epoch": 4.95, "learning_rate": 4.718768926155774e-05, "loss": 0.6647, "step": 486200 }, { "epoch": 4.95, "learning_rate": 4.7182251882946904e-05, "loss": 0.7062, "step": 486300 }, { "epoch": 4.96, "learning_rate": 4.717681366422649e-05, "loss": 0.6276, "step": 486400 }, { "epoch": 4.96, "learning_rate": 4.717142900040438e-05, "loss": 0.6886, "step": 486500 }, { "epoch": 4.96, "learning_rate": 4.7165989110657e-05, "loss": 0.7222, "step": 486600 }, { "epoch": 4.96, "learning_rate": 4.71605483815952e-05, "loss": 0.6439, "step": 486700 }, { "epoch": 4.96, "learning_rate": 4.715510681348499e-05, "loss": 0.6439, "step": 486800 }, { "epoch": 4.96, "learning_rate": 4.714966440659242e-05, "loss": 0.6056, "step": 486900 }, { "epoch": 4.96, "learning_rate": 4.7144221161183627e-05, "loss": 0.6484, "step": 487000 }, { "epoch": 4.96, "learning_rate": 4.7138777077524705e-05, "loss": 0.7028, "step": 487100 }, { "epoch": 4.96, "learning_rate": 4.713333215588188e-05, "loss": 0.6561, "step": 487200 }, { "epoch": 4.96, "learning_rate": 4.712788639652135e-05, "loss": 0.815, "step": 487300 }, { "epoch": 4.97, "learning_rate": 4.7122439799709394e-05, "loss": 0.6578, "step": 487400 }, { "epoch": 4.97, "learning_rate": 4.711699236571231e-05, "loss": 0.6664, "step": 487500 }, { "epoch": 4.97, "learning_rate": 4.711154409479645e-05, "loss": 0.5999, "step": 487600 }, { "epoch": 4.97, "learning_rate": 4.7106094987228206e-05, "loss": 0.5817, "step": 487700 }, { "epoch": 4.97, "learning_rate": 4.710064504327399e-05, "loss": 0.7061, "step": 487800 }, { "epoch": 4.97, "learning_rate": 4.709519426320029e-05, "loss": 0.5745, "step": 487900 }, { "epoch": 4.97, "learning_rate": 4.708974264727361e-05, "loss": 0.5967, "step": 488000 }, { "epoch": 4.97, "learning_rate": 4.70842901957605e-05, "loss": 0.6267, "step": 488100 }, { "epoch": 4.97, "learning_rate": 4.707883690892755e-05, "loss": 0.6878, "step": 488200 }, { "epoch": 4.97, "learning_rate": 4.70733827870414e-05, "loss": 0.6855, "step": 488300 }, { "epoch": 4.98, "learning_rate": 4.706792783036872e-05, "loss": 0.7513, "step": 488400 }, { "epoch": 4.98, "learning_rate": 4.706247203917621e-05, "loss": 0.5748, "step": 488500 }, { "epoch": 4.98, "learning_rate": 4.705701541373066e-05, "loss": 0.6151, "step": 488600 }, { "epoch": 4.98, "learning_rate": 4.705155795429884e-05, "loss": 0.6374, "step": 488700 }, { "epoch": 4.98, "learning_rate": 4.704615424820515e-05, "loss": 0.6312, "step": 488800 }, { "epoch": 4.98, "learning_rate": 4.704069512993455e-05, "loss": 0.6967, "step": 488900 }, { "epoch": 4.98, "learning_rate": 4.703523517847567e-05, "loss": 0.6383, "step": 489000 }, { "epoch": 4.98, "learning_rate": 4.7029774394095445e-05, "loss": 0.7385, "step": 489100 }, { "epoch": 4.98, "learning_rate": 4.7024367397351985e-05, "loss": 0.6068, "step": 489200 }, { "epoch": 4.99, "learning_rate": 4.701890495625267e-05, "loss": 0.6275, "step": 489300 }, { "epoch": 4.99, "learning_rate": 4.701344168303047e-05, "loss": 0.6722, "step": 489400 }, { "epoch": 4.99, "learning_rate": 4.7007977577952506e-05, "loss": 0.6999, "step": 489500 }, { "epoch": 4.99, "learning_rate": 4.7002512641285936e-05, "loss": 0.5853, "step": 489600 }, { "epoch": 4.99, "learning_rate": 4.699704687329795e-05, "loss": 0.5723, "step": 489700 }, { "epoch": 4.99, "learning_rate": 4.699158027425582e-05, "loss": 0.606, "step": 489800 }, { "epoch": 4.99, "learning_rate": 4.698611284442681e-05, "loss": 0.6646, "step": 489900 }, { "epoch": 4.99, "learning_rate": 4.6980644584078246e-05, "loss": 0.6526, "step": 490000 }, { "epoch": 4.99, "learning_rate": 4.69751754934775e-05, "loss": 0.6882, "step": 490100 }, { "epoch": 4.99, "learning_rate": 4.696970557289197e-05, "loss": 0.672, "step": 490200 }, { "epoch": 5.0, "learning_rate": 4.696423482258912e-05, "loss": 0.5691, "step": 490300 }, { "epoch": 5.0, "learning_rate": 4.695876324283641e-05, "loss": 0.5944, "step": 490400 }, { "epoch": 5.0, "learning_rate": 4.695329083390139e-05, "loss": 0.6865, "step": 490500 }, { "epoch": 5.0, "learning_rate": 4.694781759605162e-05, "loss": 0.5747, "step": 490600 }, { "epoch": 5.0, "learning_rate": 4.694234352955471e-05, "loss": 0.5783, "step": 490700 }, { "epoch": 5.0, "learning_rate": 4.6936868634678305e-05, "loss": 0.6435, "step": 490800 }, { "epoch": 5.0, "learning_rate": 4.69313929116901e-05, "loss": 0.6459, "step": 490900 }, { "epoch": 5.0, "learning_rate": 4.6925916360857816e-05, "loss": 0.6217, "step": 491000 }, { "epoch": 5.0, "learning_rate": 4.692043898244923e-05, "loss": 0.5496, "step": 491100 }, { "epoch": 5.0, "learning_rate": 4.691496077673217e-05, "loss": 0.5992, "step": 491200 }, { "epoch": 5.01, "learning_rate": 4.690948174397446e-05, "loss": 0.7196, "step": 491300 }, { "epoch": 5.01, "learning_rate": 4.690400188444401e-05, "loss": 0.5553, "step": 491400 }, { "epoch": 5.01, "learning_rate": 4.689852119840873e-05, "loss": 0.5504, "step": 491500 }, { "epoch": 5.01, "learning_rate": 4.689303968613661e-05, "loss": 0.6328, "step": 491600 }, { "epoch": 5.01, "learning_rate": 4.688755734789568e-05, "loss": 0.5754, "step": 491700 }, { "epoch": 5.01, "learning_rate": 4.688207418395396e-05, "loss": 0.535, "step": 491800 }, { "epoch": 5.01, "learning_rate": 4.687659019457957e-05, "loss": 0.6402, "step": 491900 }, { "epoch": 5.01, "learning_rate": 4.687110538004063e-05, "loss": 0.6182, "step": 492000 }, { "epoch": 5.01, "learning_rate": 4.686561974060532e-05, "loss": 0.5961, "step": 492100 }, { "epoch": 5.01, "learning_rate": 4.686013327654184e-05, "loss": 0.6268, "step": 492200 }, { "epoch": 5.02, "learning_rate": 4.6854645988118475e-05, "loss": 0.6136, "step": 492300 }, { "epoch": 5.02, "learning_rate": 4.68491578756035e-05, "loss": 0.6538, "step": 492400 }, { "epoch": 5.02, "learning_rate": 4.684366893926525e-05, "loss": 0.6573, "step": 492500 }, { "epoch": 5.02, "learning_rate": 4.683817917937211e-05, "loss": 0.6614, "step": 492600 }, { "epoch": 5.02, "learning_rate": 4.683268859619249e-05, "loss": 0.6107, "step": 492700 }, { "epoch": 5.02, "learning_rate": 4.682719718999486e-05, "loss": 0.5632, "step": 492800 }, { "epoch": 5.02, "learning_rate": 4.68217049610477e-05, "loss": 0.614, "step": 492900 }, { "epoch": 5.02, "learning_rate": 4.681621190961955e-05, "loss": 0.671, "step": 493000 }, { "epoch": 5.02, "learning_rate": 4.6810718035978996e-05, "loss": 0.6693, "step": 493100 }, { "epoch": 5.02, "learning_rate": 4.680522334039466e-05, "loss": 0.5117, "step": 493200 }, { "epoch": 5.03, "learning_rate": 4.679978278237418e-05, "loss": 0.6432, "step": 493300 }, { "epoch": 5.03, "learning_rate": 4.6794286451921e-05, "loss": 0.6625, "step": 493400 }, { "epoch": 5.03, "learning_rate": 4.6788789300327436e-05, "loss": 0.6655, "step": 493500 }, { "epoch": 5.03, "learning_rate": 4.6783291327862275e-05, "loss": 0.6274, "step": 493600 }, { "epoch": 5.03, "learning_rate": 4.677779253479431e-05, "loss": 0.5649, "step": 493700 }, { "epoch": 5.03, "learning_rate": 4.677229292139242e-05, "loss": 0.6773, "step": 493800 }, { "epoch": 5.03, "learning_rate": 4.67667924879255e-05, "loss": 0.6521, "step": 493900 }, { "epoch": 5.03, "learning_rate": 4.6761291234662487e-05, "loss": 0.6483, "step": 494000 }, { "epoch": 5.03, "learning_rate": 4.675578916187236e-05, "loss": 0.6698, "step": 494100 }, { "epoch": 5.03, "learning_rate": 4.675028626982413e-05, "loss": 0.6534, "step": 494200 }, { "epoch": 5.04, "learning_rate": 4.674478255878687e-05, "loss": 0.6904, "step": 494300 }, { "epoch": 5.04, "learning_rate": 4.673927802902967e-05, "loss": 0.7136, "step": 494400 }, { "epoch": 5.04, "learning_rate": 4.673377268082166e-05, "loss": 0.682, "step": 494500 }, { "epoch": 5.04, "learning_rate": 4.6728321580145044e-05, "loss": 0.5822, "step": 494600 }, { "epoch": 5.04, "learning_rate": 4.672281460402081e-05, "loss": 0.5781, "step": 494700 }, { "epoch": 5.04, "learning_rate": 4.671730681025073e-05, "loss": 0.6858, "step": 494800 }, { "epoch": 5.04, "learning_rate": 4.671179819910411e-05, "loss": 0.6818, "step": 494900 }, { "epoch": 5.04, "learning_rate": 4.670628877085029e-05, "loss": 0.6579, "step": 495000 }, { "epoch": 5.04, "learning_rate": 4.6700778525758654e-05, "loss": 0.6935, "step": 495100 }, { "epoch": 5.05, "learning_rate": 4.669526746409861e-05, "loss": 0.6435, "step": 495200 }, { "epoch": 5.05, "learning_rate": 4.6689755586139615e-05, "loss": 0.7002, "step": 495300 }, { "epoch": 5.05, "learning_rate": 4.668424289215118e-05, "loss": 0.5883, "step": 495400 }, { "epoch": 5.05, "learning_rate": 4.6678729382402826e-05, "loss": 0.7353, "step": 495500 }, { "epoch": 5.05, "learning_rate": 4.667321505716414e-05, "loss": 0.7024, "step": 495600 }, { "epoch": 5.05, "learning_rate": 4.666769991670474e-05, "loss": 0.6122, "step": 495700 }, { "epoch": 5.05, "learning_rate": 4.666218396129428e-05, "loss": 0.5887, "step": 495800 }, { "epoch": 5.05, "learning_rate": 4.665666719120246e-05, "loss": 0.6482, "step": 495900 }, { "epoch": 5.05, "learning_rate": 4.6651149606699016e-05, "loss": 0.6784, "step": 496000 }, { "epoch": 5.05, "learning_rate": 4.664563120805372e-05, "loss": 0.6589, "step": 496100 }, { "epoch": 5.06, "learning_rate": 4.6640167191689354e-05, "loss": 0.6455, "step": 496200 }, { "epoch": 5.06, "learning_rate": 4.6634647173704554e-05, "loss": 0.6005, "step": 496300 }, { "epoch": 5.06, "learning_rate": 4.662912634238476e-05, "loss": 0.7014, "step": 496400 }, { "epoch": 5.06, "learning_rate": 4.662360469799994e-05, "loss": 0.6144, "step": 496500 }, { "epoch": 5.06, "learning_rate": 4.661808224082004e-05, "loss": 0.5828, "step": 496600 }, { "epoch": 5.06, "learning_rate": 4.66125589711151e-05, "loss": 0.6791, "step": 496700 }, { "epoch": 5.06, "learning_rate": 4.6607034889155166e-05, "loss": 0.5954, "step": 496800 }, { "epoch": 5.06, "learning_rate": 4.660150999521033e-05, "loss": 0.5959, "step": 496900 }, { "epoch": 5.06, "learning_rate": 4.659598428955072e-05, "loss": 0.6001, "step": 497000 }, { "epoch": 5.06, "learning_rate": 4.6590457772446536e-05, "loss": 0.6161, "step": 497100 }, { "epoch": 5.07, "learning_rate": 4.658493044416796e-05, "loss": 0.7238, "step": 497200 }, { "epoch": 5.07, "learning_rate": 4.657940230498527e-05, "loss": 0.6141, "step": 497300 }, { "epoch": 5.07, "learning_rate": 4.6573873355168725e-05, "loss": 0.5851, "step": 497400 }, { "epoch": 5.07, "learning_rate": 4.65683435949887e-05, "loss": 0.716, "step": 497500 }, { "epoch": 5.07, "learning_rate": 4.6562813024715544e-05, "loss": 0.6883, "step": 497600 }, { "epoch": 5.07, "learning_rate": 4.655728164461967e-05, "loss": 0.6175, "step": 497700 }, { "epoch": 5.07, "learning_rate": 4.6551749454971534e-05, "loss": 0.6378, "step": 497800 }, { "epoch": 5.07, "learning_rate": 4.6546216456041626e-05, "loss": 0.5727, "step": 497900 }, { "epoch": 5.07, "learning_rate": 4.654068264810048e-05, "loss": 0.6134, "step": 498000 }, { "epoch": 5.07, "learning_rate": 4.653514803141865e-05, "loss": 0.5859, "step": 498100 }, { "epoch": 5.08, "learning_rate": 4.652961260626677e-05, "loss": 0.7131, "step": 498200 }, { "epoch": 5.08, "learning_rate": 4.652407637291547e-05, "loss": 0.6403, "step": 498300 }, { "epoch": 5.08, "learning_rate": 4.6518539331635454e-05, "loss": 0.5428, "step": 498400 }, { "epoch": 5.08, "learning_rate": 4.6513001482697435e-05, "loss": 0.7137, "step": 498500 }, { "epoch": 5.08, "learning_rate": 4.650746282637219e-05, "loss": 0.6813, "step": 498600 }, { "epoch": 5.08, "learning_rate": 4.650192336293052e-05, "loss": 0.6401, "step": 498700 }, { "epoch": 5.08, "learning_rate": 4.6496383092643284e-05, "loss": 0.6313, "step": 498800 }, { "epoch": 5.08, "learning_rate": 4.649084201578135e-05, "loss": 0.6363, "step": 498900 }, { "epoch": 5.08, "learning_rate": 4.648530013261566e-05, "loss": 0.5828, "step": 499000 }, { "epoch": 5.08, "learning_rate": 4.647975744341718e-05, "loss": 0.5838, "step": 499100 }, { "epoch": 5.09, "learning_rate": 4.6474213948456895e-05, "loss": 0.5584, "step": 499200 }, { "epoch": 5.09, "learning_rate": 4.646866964800586e-05, "loss": 0.6174, "step": 499300 }, { "epoch": 5.09, "learning_rate": 4.6463179997376825e-05, "loss": 0.617, "step": 499400 }, { "epoch": 5.09, "learning_rate": 4.645763409480572e-05, "loss": 0.5597, "step": 499500 }, { "epoch": 5.09, "learning_rate": 4.645208738755453e-05, "loss": 0.7158, "step": 499600 }, { "epoch": 5.09, "learning_rate": 4.644653987589446e-05, "loss": 0.6066, "step": 499700 }, { "epoch": 5.09, "learning_rate": 4.644099156009672e-05, "loss": 0.6791, "step": 499800 }, { "epoch": 5.09, "learning_rate": 4.643544244043262e-05, "loss": 0.6103, "step": 499900 }, { "epoch": 5.09, "learning_rate": 4.642989251717346e-05, "loss": 0.577, "step": 500000 }, { "epoch": 5.1, "learning_rate": 4.6424341790590596e-05, "loss": 0.6842, "step": 500100 }, { "epoch": 5.1, "learning_rate": 4.641879026095544e-05, "loss": 0.6613, "step": 500200 }, { "epoch": 5.1, "learning_rate": 4.641323792853943e-05, "loss": 0.5771, "step": 500300 }, { "epoch": 5.1, "learning_rate": 4.6407684793614024e-05, "loss": 0.6796, "step": 500400 }, { "epoch": 5.1, "learning_rate": 4.640213085645076e-05, "loss": 0.6304, "step": 500500 }, { "epoch": 5.1, "learning_rate": 4.639657611732116e-05, "loss": 0.6233, "step": 500600 }, { "epoch": 5.1, "learning_rate": 4.6391020576496856e-05, "loss": 0.6715, "step": 500700 }, { "epoch": 5.1, "learning_rate": 4.6385464234249455e-05, "loss": 0.7335, "step": 500800 }, { "epoch": 5.1, "learning_rate": 4.637990709085064e-05, "loss": 0.6301, "step": 500900 }, { "epoch": 5.1, "learning_rate": 4.6374349146572114e-05, "loss": 0.5899, "step": 501000 }, { "epoch": 5.11, "learning_rate": 4.636879040168563e-05, "loss": 0.6519, "step": 501100 }, { "epoch": 5.11, "learning_rate": 4.636323085646299e-05, "loss": 0.5941, "step": 501200 }, { "epoch": 5.11, "learning_rate": 4.6357670511176e-05, "loss": 0.6369, "step": 501300 }, { "epoch": 5.11, "learning_rate": 4.635210936609654e-05, "loss": 0.5861, "step": 501400 }, { "epoch": 5.11, "learning_rate": 4.6346547421496526e-05, "loss": 0.6015, "step": 501500 }, { "epoch": 5.11, "learning_rate": 4.634098467764789e-05, "loss": 0.6181, "step": 501600 }, { "epoch": 5.11, "learning_rate": 4.633542113482262e-05, "loss": 0.587, "step": 501700 }, { "epoch": 5.11, "learning_rate": 4.6329856793292737e-05, "loss": 0.6811, "step": 501800 }, { "epoch": 5.11, "learning_rate": 4.632429165333032e-05, "loss": 0.5608, "step": 501900 }, { "epoch": 5.11, "learning_rate": 4.631872571520745e-05, "loss": 0.6396, "step": 502000 }, { "epoch": 5.12, "learning_rate": 4.631315897919628e-05, "loss": 0.5888, "step": 502100 }, { "epoch": 5.12, "learning_rate": 4.630759144556898e-05, "loss": 0.6202, "step": 502200 }, { "epoch": 5.12, "learning_rate": 4.6302023114597786e-05, "loss": 0.6958, "step": 502300 }, { "epoch": 5.12, "learning_rate": 4.6296453986554945e-05, "loss": 0.5922, "step": 502400 }, { "epoch": 5.12, "learning_rate": 4.629088406171276e-05, "loss": 0.6118, "step": 502500 }, { "epoch": 5.12, "learning_rate": 4.6285313340343555e-05, "loss": 0.609, "step": 502600 }, { "epoch": 5.12, "learning_rate": 4.6279741822719716e-05, "loss": 0.6568, "step": 502700 }, { "epoch": 5.12, "learning_rate": 4.627416950911366e-05, "loss": 0.6791, "step": 502800 }, { "epoch": 5.12, "learning_rate": 4.626859639979784e-05, "loss": 0.5671, "step": 502900 }, { "epoch": 5.12, "learning_rate": 4.626302249504472e-05, "loss": 0.6345, "step": 503000 }, { "epoch": 5.13, "learning_rate": 4.625744779512686e-05, "loss": 0.679, "step": 503100 }, { "epoch": 5.13, "learning_rate": 4.625187230031683e-05, "loss": 0.6027, "step": 503200 }, { "epoch": 5.13, "learning_rate": 4.624629601088723e-05, "loss": 0.6528, "step": 503300 }, { "epoch": 5.13, "learning_rate": 4.6240718927110705e-05, "loss": 0.6277, "step": 503400 }, { "epoch": 5.13, "learning_rate": 4.623514104925995e-05, "loss": 0.722, "step": 503500 }, { "epoch": 5.13, "learning_rate": 4.622956237760767e-05, "loss": 0.6678, "step": 503600 }, { "epoch": 5.13, "learning_rate": 4.6223982912426656e-05, "loss": 0.6401, "step": 503700 }, { "epoch": 5.13, "learning_rate": 4.62184026539897e-05, "loss": 0.6601, "step": 503800 }, { "epoch": 5.13, "learning_rate": 4.6212821602569626e-05, "loss": 0.6296, "step": 503900 }, { "epoch": 5.13, "learning_rate": 4.620723975843934e-05, "loss": 0.6904, "step": 504000 }, { "epoch": 5.14, "learning_rate": 4.620165712187174e-05, "loss": 0.6156, "step": 504100 }, { "epoch": 5.14, "learning_rate": 4.61960736931398e-05, "loss": 0.6183, "step": 504200 }, { "epoch": 5.14, "learning_rate": 4.619048947251651e-05, "loss": 0.6709, "step": 504300 }, { "epoch": 5.14, "learning_rate": 4.618490446027491e-05, "loss": 0.634, "step": 504400 }, { "epoch": 5.14, "learning_rate": 4.6179318656688045e-05, "loss": 0.5894, "step": 504500 }, { "epoch": 5.14, "learning_rate": 4.617373206202907e-05, "loss": 0.687, "step": 504600 }, { "epoch": 5.14, "learning_rate": 4.6168144676571114e-05, "loss": 0.5318, "step": 504700 }, { "epoch": 5.14, "learning_rate": 4.6162556500587375e-05, "loss": 0.5696, "step": 504800 }, { "epoch": 5.14, "learning_rate": 4.615696753435107e-05, "loss": 0.6464, "step": 504900 }, { "epoch": 5.15, "learning_rate": 4.615137777813547e-05, "loss": 0.6614, "step": 505000 }, { "epoch": 5.15, "learning_rate": 4.6145787232213885e-05, "loss": 0.5522, "step": 505100 }, { "epoch": 5.15, "learning_rate": 4.6140195896859656e-05, "loss": 0.618, "step": 505200 }, { "epoch": 5.15, "learning_rate": 4.6134603772346175e-05, "loss": 0.5846, "step": 505300 }, { "epoch": 5.15, "learning_rate": 4.612901085894685e-05, "loss": 0.7239, "step": 505400 }, { "epoch": 5.15, "learning_rate": 4.612341715693515e-05, "loss": 0.6547, "step": 505500 }, { "epoch": 5.15, "learning_rate": 4.6117822666584566e-05, "loss": 0.6415, "step": 505600 }, { "epoch": 5.15, "learning_rate": 4.611222738816864e-05, "loss": 0.6347, "step": 505700 }, { "epoch": 5.15, "learning_rate": 4.6106631321960945e-05, "loss": 0.56, "step": 505800 }, { "epoch": 5.15, "learning_rate": 4.610103446823511e-05, "loss": 0.6357, "step": 505900 }, { "epoch": 5.16, "learning_rate": 4.609543682726476e-05, "loss": 0.6406, "step": 506000 }, { "epoch": 5.16, "learning_rate": 4.608983839932361e-05, "loss": 0.5634, "step": 506100 }, { "epoch": 5.16, "learning_rate": 4.6084239184685385e-05, "loss": 0.6373, "step": 506200 }, { "epoch": 5.16, "learning_rate": 4.6078639183623844e-05, "loss": 0.6174, "step": 506300 }, { "epoch": 5.16, "learning_rate": 4.6073038396412796e-05, "loss": 0.5114, "step": 506400 }, { "epoch": 5.16, "learning_rate": 4.606743682332609e-05, "loss": 0.6216, "step": 506500 }, { "epoch": 5.16, "learning_rate": 4.606183446463762e-05, "loss": 0.5393, "step": 506600 }, { "epoch": 5.16, "learning_rate": 4.605623132062129e-05, "loss": 0.6204, "step": 506700 }, { "epoch": 5.16, "learning_rate": 4.6050627391551066e-05, "loss": 0.6207, "step": 506800 }, { "epoch": 5.16, "learning_rate": 4.604502267770095e-05, "loss": 0.6052, "step": 506900 }, { "epoch": 5.17, "learning_rate": 4.603941717934497e-05, "loss": 0.6617, "step": 507000 }, { "epoch": 5.17, "learning_rate": 4.603381089675721e-05, "loss": 0.5592, "step": 507100 }, { "epoch": 5.17, "learning_rate": 4.602825990475694e-05, "loss": 0.6749, "step": 507200 }, { "epoch": 5.17, "learning_rate": 4.602265206236347e-05, "loss": 0.7209, "step": 507300 }, { "epoch": 5.17, "learning_rate": 4.601704343655794e-05, "loss": 0.6787, "step": 507400 }, { "epoch": 5.17, "learning_rate": 4.601143402761457e-05, "loss": 0.6493, "step": 507500 }, { "epoch": 5.17, "learning_rate": 4.600582383580763e-05, "loss": 0.6847, "step": 507600 }, { "epoch": 5.17, "learning_rate": 4.600021286141141e-05, "loss": 0.7071, "step": 507700 }, { "epoch": 5.17, "learning_rate": 4.5994601104700283e-05, "loss": 0.607, "step": 507800 }, { "epoch": 5.17, "learning_rate": 4.598898856594859e-05, "loss": 0.678, "step": 507900 }, { "epoch": 5.18, "learning_rate": 4.59833752454308e-05, "loss": 0.6539, "step": 508000 }, { "epoch": 5.18, "learning_rate": 4.5977761143421325e-05, "loss": 0.6634, "step": 508100 }, { "epoch": 5.18, "learning_rate": 4.597214626019469e-05, "loss": 0.5979, "step": 508200 }, { "epoch": 5.18, "learning_rate": 4.5966530596025416e-05, "loss": 0.6044, "step": 508300 }, { "epoch": 5.18, "learning_rate": 4.5960914151188084e-05, "loss": 0.66, "step": 508400 }, { "epoch": 5.18, "learning_rate": 4.5955296925957295e-05, "loss": 0.6885, "step": 508500 }, { "epoch": 5.18, "learning_rate": 4.5949678920607704e-05, "loss": 0.602, "step": 508600 }, { "epoch": 5.18, "learning_rate": 4.5944060135413995e-05, "loss": 0.7248, "step": 508700 }, { "epoch": 5.18, "learning_rate": 4.59384405706509e-05, "loss": 0.626, "step": 508800 }, { "epoch": 5.18, "learning_rate": 4.593282022659317e-05, "loss": 0.5859, "step": 508900 }, { "epoch": 5.19, "learning_rate": 4.5927199103515605e-05, "loss": 0.6129, "step": 509000 }, { "epoch": 5.19, "learning_rate": 4.592157720169307e-05, "loss": 0.6182, "step": 509100 }, { "epoch": 5.19, "learning_rate": 4.5915954521400414e-05, "loss": 0.733, "step": 509200 }, { "epoch": 5.19, "learning_rate": 4.591033106291257e-05, "loss": 0.5552, "step": 509300 }, { "epoch": 5.19, "learning_rate": 4.590470682650448e-05, "loss": 0.6554, "step": 509400 }, { "epoch": 5.19, "learning_rate": 4.589908181245115e-05, "loss": 0.6512, "step": 509500 }, { "epoch": 5.19, "learning_rate": 4.589345602102758e-05, "loss": 0.6416, "step": 509600 }, { "epoch": 5.19, "learning_rate": 4.588782945250888e-05, "loss": 0.6442, "step": 509700 }, { "epoch": 5.19, "learning_rate": 4.5882202107170117e-05, "loss": 0.6469, "step": 509800 }, { "epoch": 5.19, "learning_rate": 4.587657398528646e-05, "loss": 0.5413, "step": 509900 }, { "epoch": 5.2, "learning_rate": 4.587094508713308e-05, "loss": 0.7031, "step": 510000 }, { "epoch": 5.2, "learning_rate": 4.5865315412985195e-05, "loss": 0.6487, "step": 510100 }, { "epoch": 5.2, "learning_rate": 4.5859684963118074e-05, "loss": 0.6169, "step": 510200 }, { "epoch": 5.2, "learning_rate": 4.585405373780699e-05, "loss": 0.6437, "step": 510300 }, { "epoch": 5.2, "learning_rate": 4.5848421737327304e-05, "loss": 0.6578, "step": 510400 }, { "epoch": 5.2, "learning_rate": 4.584278896195437e-05, "loss": 0.6294, "step": 510500 }, { "epoch": 5.2, "learning_rate": 4.583715541196359e-05, "loss": 0.6365, "step": 510600 }, { "epoch": 5.2, "learning_rate": 4.583152108763044e-05, "loss": 0.6542, "step": 510700 }, { "epoch": 5.2, "learning_rate": 4.582588598923036e-05, "loss": 0.6135, "step": 510800 }, { "epoch": 5.21, "learning_rate": 4.5820250117038915e-05, "loss": 0.5599, "step": 510900 }, { "epoch": 5.21, "learning_rate": 4.581461347133166e-05, "loss": 0.6369, "step": 511000 }, { "epoch": 5.21, "learning_rate": 4.580897605238416e-05, "loss": 0.5911, "step": 511100 }, { "epoch": 5.21, "learning_rate": 4.580333786047209e-05, "loss": 0.6171, "step": 511200 }, { "epoch": 5.21, "learning_rate": 4.579769889587111e-05, "loss": 0.598, "step": 511300 }, { "epoch": 5.21, "learning_rate": 4.5792115560049605e-05, "loss": 0.6592, "step": 511400 }, { "epoch": 5.21, "learning_rate": 4.5786475058617984e-05, "loss": 0.6675, "step": 511500 }, { "epoch": 5.21, "learning_rate": 4.5780833785321926e-05, "loss": 0.6724, "step": 511600 }, { "epoch": 5.21, "learning_rate": 4.5775191740437283e-05, "loss": 0.691, "step": 511700 }, { "epoch": 5.21, "learning_rate": 4.5769548924239906e-05, "loss": 0.6051, "step": 511800 }, { "epoch": 5.22, "learning_rate": 4.576390533700569e-05, "loss": 0.6817, "step": 511900 }, { "epoch": 5.22, "learning_rate": 4.575826097901058e-05, "loss": 0.5349, "step": 512000 }, { "epoch": 5.22, "learning_rate": 4.5752615850530544e-05, "loss": 0.6362, "step": 512100 }, { "epoch": 5.22, "learning_rate": 4.5746969951841595e-05, "loss": 0.5957, "step": 512200 }, { "epoch": 5.22, "learning_rate": 4.574137975371628e-05, "loss": 0.5662, "step": 512300 }, { "epoch": 5.22, "learning_rate": 4.5735732323132916e-05, "loss": 0.5816, "step": 512400 }, { "epoch": 5.22, "learning_rate": 4.573008412316614e-05, "loss": 0.605, "step": 512500 }, { "epoch": 5.22, "learning_rate": 4.572443515409213e-05, "loss": 0.7491, "step": 512600 }, { "epoch": 5.22, "learning_rate": 4.5718785416187076e-05, "loss": 0.6556, "step": 512700 }, { "epoch": 5.22, "learning_rate": 4.5713134909727217e-05, "loss": 0.5915, "step": 512800 }, { "epoch": 5.23, "learning_rate": 4.5707483634988836e-05, "loss": 0.7017, "step": 512900 }, { "epoch": 5.23, "learning_rate": 4.570183159224823e-05, "loss": 0.5975, "step": 513000 }, { "epoch": 5.23, "learning_rate": 4.569617878178178e-05, "loss": 0.5428, "step": 513100 }, { "epoch": 5.23, "learning_rate": 4.5690525203865856e-05, "loss": 0.5692, "step": 513200 }, { "epoch": 5.23, "learning_rate": 4.568487085877688e-05, "loss": 0.6423, "step": 513300 }, { "epoch": 5.23, "learning_rate": 4.5679215746791323e-05, "loss": 0.5668, "step": 513400 }, { "epoch": 5.23, "learning_rate": 4.567355986818569e-05, "loss": 0.6039, "step": 513500 }, { "epoch": 5.23, "learning_rate": 4.56679032232365e-05, "loss": 0.6207, "step": 513600 }, { "epoch": 5.23, "learning_rate": 4.566224581222035e-05, "loss": 0.5873, "step": 513700 }, { "epoch": 5.23, "learning_rate": 4.565658763541386e-05, "loss": 0.6084, "step": 513800 }, { "epoch": 5.24, "learning_rate": 4.565092869309365e-05, "loss": 0.6509, "step": 513900 }, { "epoch": 5.24, "learning_rate": 4.5645268985536435e-05, "loss": 0.511, "step": 514000 }, { "epoch": 5.24, "learning_rate": 4.563960851301894e-05, "loss": 0.6214, "step": 514100 }, { "epoch": 5.24, "learning_rate": 4.5633947275817914e-05, "loss": 0.5757, "step": 514200 }, { "epoch": 5.24, "learning_rate": 4.5628285274210156e-05, "loss": 0.5345, "step": 514300 }, { "epoch": 5.24, "learning_rate": 4.562262250847252e-05, "loss": 0.7158, "step": 514400 }, { "epoch": 5.24, "learning_rate": 4.561695897888188e-05, "loss": 0.6437, "step": 514500 }, { "epoch": 5.24, "learning_rate": 4.561135133242559e-05, "loss": 0.6941, "step": 514600 }, { "epoch": 5.24, "learning_rate": 4.5605686283591324e-05, "loss": 0.6419, "step": 514700 }, { "epoch": 5.24, "learning_rate": 4.560002047173212e-05, "loss": 0.7149, "step": 514800 }, { "epoch": 5.25, "learning_rate": 4.559435389712502e-05, "loss": 0.5452, "step": 514900 }, { "epoch": 5.25, "learning_rate": 4.5588686560047086e-05, "loss": 0.6719, "step": 515000 }, { "epoch": 5.25, "learning_rate": 4.558301846077538e-05, "loss": 0.6272, "step": 515100 }, { "epoch": 5.25, "learning_rate": 4.5577349599587094e-05, "loss": 0.5727, "step": 515200 }, { "epoch": 5.25, "learning_rate": 4.557167997675935e-05, "loss": 0.5979, "step": 515300 }, { "epoch": 5.25, "learning_rate": 4.55660095925694e-05, "loss": 0.6044, "step": 515400 }, { "epoch": 5.25, "learning_rate": 4.5560338447294466e-05, "loss": 0.6781, "step": 515500 }, { "epoch": 5.25, "learning_rate": 4.555466654121184e-05, "loss": 0.6644, "step": 515600 }, { "epoch": 5.25, "learning_rate": 4.554899387459885e-05, "loss": 0.625, "step": 515700 }, { "epoch": 5.26, "learning_rate": 4.554332044773285e-05, "loss": 0.6293, "step": 515800 }, { "epoch": 5.26, "learning_rate": 4.553764626089123e-05, "loss": 0.6183, "step": 515900 }, { "epoch": 5.26, "learning_rate": 4.553197131435145e-05, "loss": 0.5868, "step": 516000 }, { "epoch": 5.26, "learning_rate": 4.552629560839094e-05, "loss": 0.6978, "step": 516100 }, { "epoch": 5.26, "learning_rate": 4.5520619143287235e-05, "loss": 0.6151, "step": 516200 }, { "epoch": 5.26, "learning_rate": 4.5514941919317884e-05, "loss": 0.6742, "step": 516300 }, { "epoch": 5.26, "learning_rate": 4.550926393676047e-05, "loss": 0.6533, "step": 516400 }, { "epoch": 5.26, "learning_rate": 4.550358519589259e-05, "loss": 0.5821, "step": 516500 }, { "epoch": 5.26, "learning_rate": 4.549796249573228e-05, "loss": 0.5985, "step": 516600 }, { "epoch": 5.26, "learning_rate": 4.5492282246652676e-05, "loss": 0.6606, "step": 516700 }, { "epoch": 5.27, "learning_rate": 4.5486601240092934e-05, "loss": 0.7584, "step": 516800 }, { "epoch": 5.27, "learning_rate": 4.54809194763308e-05, "loss": 0.5426, "step": 516900 }, { "epoch": 5.27, "learning_rate": 4.54752369556441e-05, "loss": 0.5782, "step": 517000 }, { "epoch": 5.27, "learning_rate": 4.546955367831066e-05, "loss": 0.663, "step": 517100 }, { "epoch": 5.27, "learning_rate": 4.546386964460836e-05, "loss": 0.6505, "step": 517200 }, { "epoch": 5.27, "learning_rate": 4.545818485481513e-05, "loss": 0.6099, "step": 517300 }, { "epoch": 5.27, "learning_rate": 4.54524993092089e-05, "loss": 0.6174, "step": 517400 }, { "epoch": 5.27, "learning_rate": 4.544681300806766e-05, "loss": 0.6313, "step": 517500 }, { "epoch": 5.27, "learning_rate": 4.5441125951669464e-05, "loss": 0.6208, "step": 517600 }, { "epoch": 5.27, "learning_rate": 4.543543814029234e-05, "loss": 0.562, "step": 517700 }, { "epoch": 5.28, "learning_rate": 4.542974957421442e-05, "loss": 0.6162, "step": 517800 }, { "epoch": 5.28, "learning_rate": 4.542406025371383e-05, "loss": 0.6186, "step": 517900 }, { "epoch": 5.28, "learning_rate": 4.541837017906871e-05, "loss": 0.5819, "step": 518000 }, { "epoch": 5.28, "learning_rate": 4.541267935055733e-05, "loss": 0.6957, "step": 518100 }, { "epoch": 5.28, "learning_rate": 4.5406987768457894e-05, "loss": 0.697, "step": 518200 }, { "epoch": 5.28, "learning_rate": 4.54012954330487e-05, "loss": 0.6091, "step": 518300 }, { "epoch": 5.28, "learning_rate": 4.539560234460808e-05, "loss": 0.5423, "step": 518400 }, { "epoch": 5.28, "learning_rate": 4.538990850341437e-05, "loss": 0.6348, "step": 518500 }, { "epoch": 5.28, "learning_rate": 4.538421390974598e-05, "loss": 0.5592, "step": 518600 }, { "epoch": 5.28, "learning_rate": 4.5378518563881334e-05, "loss": 0.6101, "step": 518700 }, { "epoch": 5.29, "learning_rate": 4.537282246609891e-05, "loss": 0.6071, "step": 518800 }, { "epoch": 5.29, "learning_rate": 4.536712561667721e-05, "loss": 0.4949, "step": 518900 }, { "epoch": 5.29, "learning_rate": 4.5361428015894776e-05, "loss": 0.63, "step": 519000 }, { "epoch": 5.29, "learning_rate": 4.535572966403017e-05, "loss": 0.5384, "step": 519100 }, { "epoch": 5.29, "learning_rate": 4.535003056136203e-05, "loss": 0.5711, "step": 519200 }, { "epoch": 5.29, "learning_rate": 4.5344330708169e-05, "loss": 0.5931, "step": 519300 }, { "epoch": 5.29, "learning_rate": 4.5338630104729764e-05, "loss": 0.6595, "step": 519400 }, { "epoch": 5.29, "learning_rate": 4.533292875132306e-05, "loss": 0.6582, "step": 519500 }, { "epoch": 5.29, "learning_rate": 4.532722664822764e-05, "loss": 0.6752, "step": 519600 }, { "epoch": 5.29, "learning_rate": 4.53215237957223e-05, "loss": 0.7087, "step": 519700 }, { "epoch": 5.3, "learning_rate": 4.531587723380954e-05, "loss": 0.618, "step": 519800 }, { "epoch": 5.3, "learning_rate": 4.531017289080805e-05, "loss": 0.5747, "step": 519900 }, { "epoch": 5.3, "learning_rate": 4.530446779923048e-05, "loss": 0.6026, "step": 520000 }, { "epoch": 5.3, "learning_rate": 4.5298761959355764e-05, "loss": 0.5242, "step": 520100 }, { "epoch": 5.3, "learning_rate": 4.529305537146289e-05, "loss": 0.6124, "step": 520200 }, { "epoch": 5.3, "learning_rate": 4.5287348035830877e-05, "loss": 0.6456, "step": 520300 }, { "epoch": 5.3, "learning_rate": 4.5281639952738766e-05, "loss": 0.5777, "step": 520400 }, { "epoch": 5.3, "learning_rate": 4.5275931122465666e-05, "loss": 0.6726, "step": 520500 }, { "epoch": 5.3, "learning_rate": 4.5270221545290683e-05, "loss": 0.6206, "step": 520600 }, { "epoch": 5.3, "learning_rate": 4.5264511221493e-05, "loss": 0.6183, "step": 520700 }, { "epoch": 5.31, "learning_rate": 4.525880015135182e-05, "loss": 0.5969, "step": 520800 }, { "epoch": 5.31, "learning_rate": 4.525308833514636e-05, "loss": 0.6297, "step": 520900 }, { "epoch": 5.31, "learning_rate": 4.524737577315592e-05, "loss": 0.6263, "step": 521000 }, { "epoch": 5.31, "learning_rate": 4.524166246565979e-05, "loss": 0.5519, "step": 521100 }, { "epoch": 5.31, "learning_rate": 4.5235948412937327e-05, "loss": 0.5599, "step": 521200 }, { "epoch": 5.31, "learning_rate": 4.52302336152679e-05, "loss": 0.6462, "step": 521300 }, { "epoch": 5.31, "learning_rate": 4.522451807293094e-05, "loss": 0.6443, "step": 521400 }, { "epoch": 5.31, "learning_rate": 4.521880178620591e-05, "loss": 0.5655, "step": 521500 }, { "epoch": 5.31, "learning_rate": 4.521308475537229e-05, "loss": 0.7267, "step": 521600 }, { "epoch": 5.32, "learning_rate": 4.5207366980709604e-05, "loss": 0.5959, "step": 521700 }, { "epoch": 5.32, "learning_rate": 4.520164846249745e-05, "loss": 0.6693, "step": 521800 }, { "epoch": 5.32, "learning_rate": 4.519592920101538e-05, "loss": 0.6673, "step": 521900 }, { "epoch": 5.32, "learning_rate": 4.519026640026467e-05, "loss": 0.6553, "step": 522000 }, { "epoch": 5.32, "learning_rate": 4.5184545660507494e-05, "loss": 0.6814, "step": 522100 }, { "epoch": 5.32, "learning_rate": 4.517882417831666e-05, "loss": 0.6389, "step": 522200 }, { "epoch": 5.32, "learning_rate": 4.517310195397189e-05, "loss": 0.6398, "step": 522300 }, { "epoch": 5.32, "learning_rate": 4.5167378987753e-05, "loss": 0.6461, "step": 522400 }, { "epoch": 5.32, "learning_rate": 4.516165527993978e-05, "loss": 0.6148, "step": 522500 }, { "epoch": 5.32, "learning_rate": 4.51559308308121e-05, "loss": 0.643, "step": 522600 }, { "epoch": 5.33, "learning_rate": 4.515020564064985e-05, "loss": 0.543, "step": 522700 }, { "epoch": 5.33, "learning_rate": 4.514447970973296e-05, "loss": 0.6343, "step": 522800 }, { "epoch": 5.33, "learning_rate": 4.513875303834139e-05, "loss": 0.5857, "step": 522900 }, { "epoch": 5.33, "learning_rate": 4.5133025626755136e-05, "loss": 0.6159, "step": 523000 }, { "epoch": 5.33, "learning_rate": 4.512729747525424e-05, "loss": 0.6061, "step": 523100 }, { "epoch": 5.33, "learning_rate": 4.512156858411876e-05, "loss": 0.6386, "step": 523200 }, { "epoch": 5.33, "learning_rate": 4.511583895362883e-05, "loss": 0.6936, "step": 523300 }, { "epoch": 5.33, "learning_rate": 4.5110108584064585e-05, "loss": 0.46, "step": 523400 }, { "epoch": 5.33, "learning_rate": 4.510437747570619e-05, "loss": 0.6174, "step": 523500 }, { "epoch": 5.33, "learning_rate": 4.509864562883388e-05, "loss": 0.5373, "step": 523600 }, { "epoch": 5.34, "learning_rate": 4.5092913043727905e-05, "loss": 0.6647, "step": 523700 }, { "epoch": 5.34, "learning_rate": 4.5087179720668554e-05, "loss": 0.6418, "step": 523800 }, { "epoch": 5.34, "learning_rate": 4.508144565993614e-05, "loss": 0.6397, "step": 523900 }, { "epoch": 5.34, "learning_rate": 4.507571086181104e-05, "loss": 0.5518, "step": 524000 }, { "epoch": 5.34, "learning_rate": 4.5070032685573805e-05, "loss": 0.7298, "step": 524100 }, { "epoch": 5.34, "learning_rate": 4.506429642087149e-05, "loss": 0.7373, "step": 524200 }, { "epoch": 5.34, "learning_rate": 4.5058559419614976e-05, "loss": 0.7065, "step": 524300 }, { "epoch": 5.34, "learning_rate": 4.505282168208476e-05, "loss": 0.5687, "step": 524400 }, { "epoch": 5.34, "learning_rate": 4.5047083208561404e-05, "loss": 0.5042, "step": 524500 }, { "epoch": 5.34, "learning_rate": 4.504134399932547e-05, "loss": 0.6122, "step": 524600 }, { "epoch": 5.35, "learning_rate": 4.5035604054657576e-05, "loss": 0.4681, "step": 524700 }, { "epoch": 5.35, "learning_rate": 4.502986337483837e-05, "loss": 0.6303, "step": 524800 }, { "epoch": 5.35, "learning_rate": 4.502412196014855e-05, "loss": 0.6399, "step": 524900 }, { "epoch": 5.35, "learning_rate": 4.501837981086882e-05, "loss": 0.556, "step": 525000 }, { "epoch": 5.35, "learning_rate": 4.5012636927279936e-05, "loss": 0.6739, "step": 525100 }, { "epoch": 5.35, "learning_rate": 4.500695074947139e-05, "loss": 0.5258, "step": 525200 }, { "epoch": 5.35, "learning_rate": 4.500120640544272e-05, "loss": 0.6643, "step": 525300 }, { "epoch": 5.35, "learning_rate": 4.499546132794457e-05, "loss": 0.5412, "step": 525400 }, { "epoch": 5.35, "learning_rate": 4.4989715517257855e-05, "loss": 0.5702, "step": 525500 }, { "epoch": 5.35, "learning_rate": 4.4983968973663506e-05, "loss": 0.6078, "step": 525600 }, { "epoch": 5.36, "learning_rate": 4.497822169744249e-05, "loss": 0.6462, "step": 525700 }, { "epoch": 5.36, "learning_rate": 4.497247368887582e-05, "loss": 0.5826, "step": 525800 }, { "epoch": 5.36, "learning_rate": 4.496672494824453e-05, "loss": 0.6826, "step": 525900 }, { "epoch": 5.36, "learning_rate": 4.496097547582971e-05, "loss": 0.5613, "step": 526000 }, { "epoch": 5.36, "learning_rate": 4.4955225271912485e-05, "loss": 0.5629, "step": 526100 }, { "epoch": 5.36, "learning_rate": 4.494947433677398e-05, "loss": 0.6689, "step": 526200 }, { "epoch": 5.36, "learning_rate": 4.4943722670695394e-05, "loss": 0.4877, "step": 526300 }, { "epoch": 5.36, "learning_rate": 4.4937970273957954e-05, "loss": 0.5553, "step": 526400 }, { "epoch": 5.36, "learning_rate": 4.49322171468429e-05, "loss": 0.5388, "step": 526500 }, { "epoch": 5.37, "learning_rate": 4.4926463289631556e-05, "loss": 0.6236, "step": 526600 }, { "epoch": 5.37, "learning_rate": 4.4920708702605224e-05, "loss": 0.679, "step": 526700 }, { "epoch": 5.37, "learning_rate": 4.491495338604528e-05, "loss": 0.6725, "step": 526800 }, { "epoch": 5.37, "learning_rate": 4.490919734023312e-05, "loss": 0.6508, "step": 526900 }, { "epoch": 5.37, "learning_rate": 4.490344056545019e-05, "loss": 0.6255, "step": 527000 }, { "epoch": 5.37, "learning_rate": 4.4897683061977945e-05, "loss": 0.687, "step": 527100 }, { "epoch": 5.37, "learning_rate": 4.4891924830097915e-05, "loss": 0.5674, "step": 527200 }, { "epoch": 5.37, "learning_rate": 4.4886165870091625e-05, "loss": 0.5086, "step": 527300 }, { "epoch": 5.37, "learning_rate": 4.4880406182240664e-05, "loss": 0.6606, "step": 527400 }, { "epoch": 5.37, "learning_rate": 4.487464576682664e-05, "loss": 0.6007, "step": 527500 }, { "epoch": 5.38, "learning_rate": 4.48688846241312e-05, "loss": 0.6548, "step": 527600 }, { "epoch": 5.38, "learning_rate": 4.486312275443605e-05, "loss": 0.6419, "step": 527700 }, { "epoch": 5.38, "learning_rate": 4.485736015802288e-05, "loss": 0.582, "step": 527800 }, { "epoch": 5.38, "learning_rate": 4.485159683517347e-05, "loss": 0.6252, "step": 527900 }, { "epoch": 5.38, "learning_rate": 4.48458327861696e-05, "loss": 0.5997, "step": 528000 }, { "epoch": 5.38, "learning_rate": 4.48400680112931e-05, "loss": 0.652, "step": 528100 }, { "epoch": 5.38, "learning_rate": 4.483430251082585e-05, "loss": 0.5784, "step": 528200 }, { "epoch": 5.38, "learning_rate": 4.482853628504972e-05, "loss": 0.5931, "step": 528300 }, { "epoch": 5.38, "learning_rate": 4.482276933424666e-05, "loss": 0.6495, "step": 528400 }, { "epoch": 5.38, "learning_rate": 4.481700165869864e-05, "loss": 0.538, "step": 528500 }, { "epoch": 5.39, "learning_rate": 4.481123325868766e-05, "loss": 0.592, "step": 528600 }, { "epoch": 5.39, "learning_rate": 4.480546413449576e-05, "loss": 0.5635, "step": 528700 }, { "epoch": 5.39, "learning_rate": 4.479969428640502e-05, "loss": 0.6909, "step": 528800 }, { "epoch": 5.39, "learning_rate": 4.479392371469755e-05, "loss": 0.5876, "step": 528900 }, { "epoch": 5.39, "learning_rate": 4.4788152419655495e-05, "loss": 0.5047, "step": 529000 }, { "epoch": 5.39, "learning_rate": 4.478238040156104e-05, "loss": 0.6059, "step": 529100 }, { "epoch": 5.39, "learning_rate": 4.47766076606964e-05, "loss": 0.5658, "step": 529200 }, { "epoch": 5.39, "learning_rate": 4.477083419734383e-05, "loss": 0.6419, "step": 529300 }, { "epoch": 5.39, "learning_rate": 4.4765060011785616e-05, "loss": 0.5918, "step": 529400 }, { "epoch": 5.39, "learning_rate": 4.475928510430407e-05, "loss": 0.7025, "step": 529500 }, { "epoch": 5.4, "learning_rate": 4.475350947518157e-05, "loss": 0.6346, "step": 529600 }, { "epoch": 5.4, "learning_rate": 4.4747733124700504e-05, "loss": 0.556, "step": 529700 }, { "epoch": 5.4, "learning_rate": 4.474195605314329e-05, "loss": 0.6855, "step": 529800 }, { "epoch": 5.4, "learning_rate": 4.4736178260792405e-05, "loss": 0.6182, "step": 529900 }, { "epoch": 5.4, "learning_rate": 4.4730399747930335e-05, "loss": 0.59, "step": 530000 }, { "epoch": 5.4, "learning_rate": 4.472462051483964e-05, "loss": 0.5916, "step": 530100 }, { "epoch": 5.4, "learning_rate": 4.471884056180286e-05, "loss": 0.5715, "step": 530200 }, { "epoch": 5.4, "learning_rate": 4.471305988910262e-05, "loss": 0.6999, "step": 530300 }, { "epoch": 5.4, "learning_rate": 4.470727849702155e-05, "loss": 0.4921, "step": 530400 }, { "epoch": 5.4, "learning_rate": 4.470149638584234e-05, "loss": 0.537, "step": 530500 }, { "epoch": 5.41, "learning_rate": 4.4695713555847674e-05, "loss": 0.5916, "step": 530600 }, { "epoch": 5.41, "learning_rate": 4.468993000732033e-05, "loss": 0.5762, "step": 530700 }, { "epoch": 5.41, "learning_rate": 4.468414574054307e-05, "loss": 0.5568, "step": 530800 }, { "epoch": 5.41, "learning_rate": 4.467836075579871e-05, "loss": 0.6476, "step": 530900 }, { "epoch": 5.41, "learning_rate": 4.4672575053370104e-05, "loss": 0.6192, "step": 531000 }, { "epoch": 5.41, "learning_rate": 4.466678863354014e-05, "loss": 0.5528, "step": 531100 }, { "epoch": 5.41, "learning_rate": 4.466100149659175e-05, "loss": 0.6199, "step": 531200 }, { "epoch": 5.41, "learning_rate": 4.4655213642807864e-05, "loss": 0.5983, "step": 531300 }, { "epoch": 5.41, "learning_rate": 4.46494250724715e-05, "loss": 0.6362, "step": 531400 }, { "epoch": 5.42, "learning_rate": 4.4643635785865665e-05, "loss": 0.5769, "step": 531500 }, { "epoch": 5.42, "learning_rate": 4.463784578327343e-05, "loss": 0.6321, "step": 531600 }, { "epoch": 5.42, "learning_rate": 4.463205506497789e-05, "loss": 0.546, "step": 531700 }, { "epoch": 5.42, "learning_rate": 4.462626363126218e-05, "loss": 0.6022, "step": 531800 }, { "epoch": 5.42, "learning_rate": 4.462047148240946e-05, "loss": 0.5746, "step": 531900 }, { "epoch": 5.42, "learning_rate": 4.4614678618702936e-05, "loss": 0.7018, "step": 532000 }, { "epoch": 5.42, "learning_rate": 4.460888504042585e-05, "loss": 0.6679, "step": 532100 }, { "epoch": 5.42, "learning_rate": 4.460309074786145e-05, "loss": 0.6041, "step": 532200 }, { "epoch": 5.42, "learning_rate": 4.4597295741293075e-05, "loss": 0.7303, "step": 532300 }, { "epoch": 5.42, "learning_rate": 4.459150002100404e-05, "loss": 0.6175, "step": 532400 }, { "epoch": 5.43, "learning_rate": 4.458570358727774e-05, "loss": 0.5557, "step": 532500 }, { "epoch": 5.43, "learning_rate": 4.457990644039758e-05, "loss": 0.6733, "step": 532600 }, { "epoch": 5.43, "learning_rate": 4.4574108580646993e-05, "loss": 0.7231, "step": 532700 }, { "epoch": 5.43, "learning_rate": 4.456836799755923e-05, "loss": 0.6454, "step": 532800 }, { "epoch": 5.43, "learning_rate": 4.456256872003992e-05, "loss": 0.5856, "step": 532900 }, { "epoch": 5.43, "learning_rate": 4.45567687304979e-05, "loss": 0.6081, "step": 533000 }, { "epoch": 5.43, "learning_rate": 4.455096802921677e-05, "loss": 0.6131, "step": 533100 }, { "epoch": 5.43, "learning_rate": 4.454516661648013e-05, "loss": 0.578, "step": 533200 }, { "epoch": 5.43, "learning_rate": 4.453936449257165e-05, "loss": 0.6074, "step": 533300 }, { "epoch": 5.43, "learning_rate": 4.453356165777501e-05, "loss": 0.5656, "step": 533400 }, { "epoch": 5.44, "learning_rate": 4.4527758112373946e-05, "loss": 0.6126, "step": 533500 }, { "epoch": 5.44, "learning_rate": 4.452195385665219e-05, "loss": 0.5385, "step": 533600 }, { "epoch": 5.44, "learning_rate": 4.451614889089357e-05, "loss": 0.5525, "step": 533700 }, { "epoch": 5.44, "learning_rate": 4.451034321538188e-05, "loss": 0.6747, "step": 533800 }, { "epoch": 5.44, "learning_rate": 4.4504536830401014e-05, "loss": 0.5421, "step": 533900 }, { "epoch": 5.44, "learning_rate": 4.449872973623486e-05, "loss": 0.6221, "step": 534000 }, { "epoch": 5.44, "learning_rate": 4.4492921933167334e-05, "loss": 0.6298, "step": 534100 }, { "epoch": 5.44, "learning_rate": 4.448711342148241e-05, "loss": 0.5338, "step": 534200 }, { "epoch": 5.44, "learning_rate": 4.44813042014641e-05, "loss": 0.6281, "step": 534300 }, { "epoch": 5.44, "learning_rate": 4.447549427339644e-05, "loss": 0.5767, "step": 534400 }, { "epoch": 5.45, "learning_rate": 4.4469683637563494e-05, "loss": 0.7092, "step": 534500 }, { "epoch": 5.45, "learning_rate": 4.446387229424937e-05, "loss": 0.593, "step": 534600 }, { "epoch": 5.45, "learning_rate": 4.44580602437382e-05, "loss": 0.6403, "step": 534700 }, { "epoch": 5.45, "learning_rate": 4.4452247486314164e-05, "loss": 0.6177, "step": 534800 }, { "epoch": 5.45, "learning_rate": 4.444643402226147e-05, "loss": 0.6262, "step": 534900 }, { "epoch": 5.45, "learning_rate": 4.444061985186438e-05, "loss": 0.6235, "step": 535000 }, { "epoch": 5.45, "learning_rate": 4.443480497540714e-05, "loss": 0.5891, "step": 535100 }, { "epoch": 5.45, "learning_rate": 4.4428989393174094e-05, "loss": 0.5875, "step": 535200 }, { "epoch": 5.45, "learning_rate": 4.442317310544957e-05, "loss": 0.6354, "step": 535300 }, { "epoch": 5.45, "learning_rate": 4.4417356112517945e-05, "loss": 0.6336, "step": 535400 }, { "epoch": 5.46, "learning_rate": 4.441153841466365e-05, "loss": 0.615, "step": 535500 }, { "epoch": 5.46, "learning_rate": 4.440572001217113e-05, "loss": 0.5998, "step": 535600 }, { "epoch": 5.46, "learning_rate": 4.439990090532487e-05, "loss": 0.5982, "step": 535700 }, { "epoch": 5.46, "learning_rate": 4.439408109440939e-05, "loss": 0.6228, "step": 535800 }, { "epoch": 5.46, "learning_rate": 4.438826057970924e-05, "loss": 0.5754, "step": 535900 }, { "epoch": 5.46, "learning_rate": 4.438243936150902e-05, "loss": 0.6568, "step": 536000 }, { "epoch": 5.46, "learning_rate": 4.437661744009334e-05, "loss": 0.5877, "step": 536100 }, { "epoch": 5.46, "learning_rate": 4.437079481574686e-05, "loss": 0.5033, "step": 536200 }, { "epoch": 5.46, "learning_rate": 4.436497148875428e-05, "loss": 0.6899, "step": 536300 }, { "epoch": 5.46, "learning_rate": 4.435914745940032e-05, "loss": 0.5746, "step": 536400 }, { "epoch": 5.47, "learning_rate": 4.435338097875839e-05, "loss": 0.6168, "step": 536500 }, { "epoch": 5.47, "learning_rate": 4.434755555255249e-05, "loss": 0.6057, "step": 536600 }, { "epoch": 5.47, "learning_rate": 4.434172942483674e-05, "loss": 0.6808, "step": 536700 }, { "epoch": 5.47, "learning_rate": 4.433590259589601e-05, "loss": 0.6403, "step": 536800 }, { "epoch": 5.47, "learning_rate": 4.43300750660152e-05, "loss": 0.5827, "step": 536900 }, { "epoch": 5.47, "learning_rate": 4.432424683547924e-05, "loss": 0.6541, "step": 537000 }, { "epoch": 5.47, "learning_rate": 4.431841790457309e-05, "loss": 0.6105, "step": 537100 }, { "epoch": 5.47, "learning_rate": 4.431258827358175e-05, "loss": 0.558, "step": 537200 }, { "epoch": 5.47, "learning_rate": 4.430675794279026e-05, "loss": 0.6587, "step": 537300 }, { "epoch": 5.48, "learning_rate": 4.430092691248368e-05, "loss": 0.5687, "step": 537400 }, { "epoch": 5.48, "learning_rate": 4.429515350370273e-05, "loss": 0.6228, "step": 537500 }, { "epoch": 5.48, "learning_rate": 4.4289321082209366e-05, "loss": 0.622, "step": 537600 }, { "epoch": 5.48, "learning_rate": 4.428348796205348e-05, "loss": 0.5678, "step": 537700 }, { "epoch": 5.48, "learning_rate": 4.427765414352025e-05, "loss": 0.5563, "step": 537800 }, { "epoch": 5.48, "learning_rate": 4.4271819626894956e-05, "loss": 0.541, "step": 537900 }, { "epoch": 5.48, "learning_rate": 4.426598441246284e-05, "loss": 0.6585, "step": 538000 }, { "epoch": 5.48, "learning_rate": 4.4260148500509225e-05, "loss": 0.6518, "step": 538100 }, { "epoch": 5.48, "learning_rate": 4.425431189131945e-05, "loss": 0.6301, "step": 538200 }, { "epoch": 5.48, "learning_rate": 4.424847458517889e-05, "loss": 0.6119, "step": 538300 }, { "epoch": 5.49, "learning_rate": 4.424263658237295e-05, "loss": 0.6751, "step": 538400 }, { "epoch": 5.49, "learning_rate": 4.4236797883187086e-05, "loss": 0.5911, "step": 538500 }, { "epoch": 5.49, "learning_rate": 4.4230958487906755e-05, "loss": 0.5835, "step": 538600 }, { "epoch": 5.49, "learning_rate": 4.4225118396817474e-05, "loss": 0.555, "step": 538700 }, { "epoch": 5.49, "learning_rate": 4.421927761020481e-05, "loss": 0.6717, "step": 538800 }, { "epoch": 5.49, "learning_rate": 4.4213436128354315e-05, "loss": 0.584, "step": 538900 }, { "epoch": 5.49, "learning_rate": 4.420759395155163e-05, "loss": 0.5698, "step": 539000 }, { "epoch": 5.49, "learning_rate": 4.420175108008237e-05, "loss": 0.668, "step": 539100 }, { "epoch": 5.49, "learning_rate": 4.4195907514232246e-05, "loss": 0.5092, "step": 539200 }, { "epoch": 5.49, "learning_rate": 4.4190063254286956e-05, "loss": 0.6313, "step": 539300 }, { "epoch": 5.5, "learning_rate": 4.4184218300532255e-05, "loss": 0.6157, "step": 539400 }, { "epoch": 5.5, "learning_rate": 4.4178372653253934e-05, "loss": 0.7111, "step": 539500 }, { "epoch": 5.5, "learning_rate": 4.4172526312737794e-05, "loss": 0.6334, "step": 539600 }, { "epoch": 5.5, "learning_rate": 4.416667927926969e-05, "loss": 0.6465, "step": 539700 }, { "epoch": 5.5, "learning_rate": 4.4160831553135524e-05, "loss": 0.5948, "step": 539800 }, { "epoch": 5.5, "learning_rate": 4.4154983134621196e-05, "loss": 0.6976, "step": 539900 }, { "epoch": 5.5, "learning_rate": 4.414913402401266e-05, "loss": 0.6168, "step": 540000 }, { "epoch": 5.5, "learning_rate": 4.414328422159591e-05, "loss": 0.6072, "step": 540100 }, { "epoch": 5.5, "learning_rate": 4.4137433727656974e-05, "loss": 0.7067, "step": 540200 }, { "epoch": 5.5, "learning_rate": 4.413158254248189e-05, "loss": 0.5905, "step": 540300 }, { "epoch": 5.51, "learning_rate": 4.412573066635675e-05, "loss": 0.6042, "step": 540400 }, { "epoch": 5.51, "learning_rate": 4.4119878099567683e-05, "loss": 0.6284, "step": 540500 }, { "epoch": 5.51, "learning_rate": 4.411402484240083e-05, "loss": 0.6194, "step": 540600 }, { "epoch": 5.51, "learning_rate": 4.4108170895142396e-05, "loss": 0.6498, "step": 540700 }, { "epoch": 5.51, "learning_rate": 4.41023162580786e-05, "loss": 0.5895, "step": 540800 }, { "epoch": 5.51, "learning_rate": 4.40964609314957e-05, "loss": 0.5994, "step": 540900 }, { "epoch": 5.51, "learning_rate": 4.409060491567998e-05, "loss": 0.6237, "step": 541000 }, { "epoch": 5.51, "learning_rate": 4.4084748210917776e-05, "loss": 0.6029, "step": 541100 }, { "epoch": 5.51, "learning_rate": 4.407889081749543e-05, "loss": 0.5726, "step": 541200 }, { "epoch": 5.51, "learning_rate": 4.407303273569934e-05, "loss": 0.6478, "step": 541300 }, { "epoch": 5.52, "learning_rate": 4.406717396581594e-05, "loss": 0.6845, "step": 541400 }, { "epoch": 5.52, "learning_rate": 4.4061314508131666e-05, "loss": 0.7196, "step": 541500 }, { "epoch": 5.52, "learning_rate": 4.4055454362933045e-05, "loss": 0.6521, "step": 541600 }, { "epoch": 5.52, "learning_rate": 4.4049593530506574e-05, "loss": 0.6904, "step": 541700 }, { "epoch": 5.52, "learning_rate": 4.404373201113882e-05, "loss": 0.7974, "step": 541800 }, { "epoch": 5.52, "learning_rate": 4.4037869805116386e-05, "loss": 0.5769, "step": 541900 }, { "epoch": 5.52, "learning_rate": 4.40320069127259e-05, "loss": 0.5989, "step": 542000 }, { "epoch": 5.52, "learning_rate": 4.4026143334254e-05, "loss": 0.6951, "step": 542100 }, { "epoch": 5.52, "learning_rate": 4.4020279069987404e-05, "loss": 0.6586, "step": 542200 }, { "epoch": 5.53, "learning_rate": 4.4014414120212815e-05, "loss": 0.6265, "step": 542300 }, { "epoch": 5.53, "learning_rate": 4.400860714495789e-05, "loss": 0.6445, "step": 542400 }, { "epoch": 5.53, "learning_rate": 4.40027408318756e-05, "loss": 0.6343, "step": 542500 }, { "epoch": 5.53, "learning_rate": 4.399687383414284e-05, "loss": 0.7116, "step": 542600 }, { "epoch": 5.53, "learning_rate": 4.399100615204648e-05, "loss": 0.5308, "step": 542700 }, { "epoch": 5.53, "learning_rate": 4.3985137785873416e-05, "loss": 0.6299, "step": 542800 }, { "epoch": 5.53, "learning_rate": 4.397926873591058e-05, "loss": 0.7087, "step": 542900 }, { "epoch": 5.53, "learning_rate": 4.3973399002444915e-05, "loss": 0.5172, "step": 543000 }, { "epoch": 5.53, "learning_rate": 4.396752858576344e-05, "loss": 0.6207, "step": 543100 }, { "epoch": 5.53, "learning_rate": 4.396165748615315e-05, "loss": 0.5287, "step": 543200 }, { "epoch": 5.54, "learning_rate": 4.3955785703901146e-05, "loss": 0.5323, "step": 543300 }, { "epoch": 5.54, "learning_rate": 4.39499132392945e-05, "loss": 0.7058, "step": 543400 }, { "epoch": 5.54, "learning_rate": 4.394404009262035e-05, "loss": 0.6369, "step": 543500 }, { "epoch": 5.54, "learning_rate": 4.3938166264165845e-05, "loss": 0.6712, "step": 543600 }, { "epoch": 5.54, "learning_rate": 4.3932291754218196e-05, "loss": 0.6051, "step": 543700 }, { "epoch": 5.54, "learning_rate": 4.3926416563064614e-05, "loss": 0.7129, "step": 543800 }, { "epoch": 5.54, "learning_rate": 4.392054069099237e-05, "loss": 0.5879, "step": 543900 }, { "epoch": 5.54, "learning_rate": 4.391466413828877e-05, "loss": 0.5682, "step": 544000 }, { "epoch": 5.54, "learning_rate": 4.3908786905241137e-05, "loss": 0.6544, "step": 544100 }, { "epoch": 5.54, "learning_rate": 4.390290899213682e-05, "loss": 0.6695, "step": 544200 }, { "epoch": 5.55, "learning_rate": 4.3897030399263215e-05, "loss": 0.5555, "step": 544300 }, { "epoch": 5.55, "learning_rate": 4.389120992299381e-05, "loss": 0.6823, "step": 544400 }, { "epoch": 5.55, "learning_rate": 4.388532997823448e-05, "loss": 0.6049, "step": 544500 }, { "epoch": 5.55, "learning_rate": 4.387944935456538e-05, "loss": 0.6513, "step": 544600 }, { "epoch": 5.55, "learning_rate": 4.387356805227405e-05, "loss": 0.6894, "step": 544700 }, { "epoch": 5.55, "learning_rate": 4.386768607164802e-05, "loss": 0.5959, "step": 544800 }, { "epoch": 5.55, "learning_rate": 4.38618034129749e-05, "loss": 0.5943, "step": 544900 }, { "epoch": 5.55, "learning_rate": 4.385592007654232e-05, "loss": 0.5475, "step": 545000 }, { "epoch": 5.55, "learning_rate": 4.385003606263793e-05, "loss": 0.6197, "step": 545100 }, { "epoch": 5.55, "learning_rate": 4.384415137154943e-05, "loss": 0.6858, "step": 545200 }, { "epoch": 5.56, "learning_rate": 4.383826600356455e-05, "loss": 0.5553, "step": 545300 }, { "epoch": 5.56, "learning_rate": 4.383237995897105e-05, "loss": 0.5994, "step": 545400 }, { "epoch": 5.56, "learning_rate": 4.382649323805671e-05, "loss": 0.6438, "step": 545500 }, { "epoch": 5.56, "learning_rate": 4.382060584110937e-05, "loss": 0.587, "step": 545600 }, { "epoch": 5.56, "learning_rate": 4.381471776841687e-05, "loss": 0.6921, "step": 545700 }, { "epoch": 5.56, "learning_rate": 4.380882902026711e-05, "loss": 0.531, "step": 545800 }, { "epoch": 5.56, "learning_rate": 4.3802939596948035e-05, "loss": 0.5933, "step": 545900 }, { "epoch": 5.56, "learning_rate": 4.379704949874758e-05, "loss": 0.6053, "step": 546000 }, { "epoch": 5.56, "learning_rate": 4.3791158725953735e-05, "loss": 0.6528, "step": 546100 }, { "epoch": 5.56, "learning_rate": 4.3785267278854534e-05, "loss": 0.5721, "step": 546200 }, { "epoch": 5.57, "learning_rate": 4.377937515773803e-05, "loss": 0.6541, "step": 546300 }, { "epoch": 5.57, "learning_rate": 4.377348236289231e-05, "loss": 0.5926, "step": 546400 }, { "epoch": 5.57, "learning_rate": 4.3767588894605494e-05, "loss": 0.645, "step": 546500 }, { "epoch": 5.57, "learning_rate": 4.376169475316574e-05, "loss": 0.6006, "step": 546600 }, { "epoch": 5.57, "learning_rate": 4.375579993886125e-05, "loss": 0.648, "step": 546700 }, { "epoch": 5.57, "learning_rate": 4.374990445198023e-05, "loss": 0.6921, "step": 546800 }, { "epoch": 5.57, "learning_rate": 4.374400829281093e-05, "loss": 0.5948, "step": 546900 }, { "epoch": 5.57, "learning_rate": 4.373811146164164e-05, "loss": 0.612, "step": 547000 }, { "epoch": 5.57, "learning_rate": 4.3732213958760696e-05, "loss": 0.595, "step": 547100 }, { "epoch": 5.57, "learning_rate": 4.372631578445644e-05, "loss": 0.6331, "step": 547200 }, { "epoch": 5.58, "learning_rate": 4.3720416939017255e-05, "loss": 0.5206, "step": 547300 }, { "epoch": 5.58, "learning_rate": 4.371451742273155e-05, "loss": 0.5713, "step": 547400 }, { "epoch": 5.58, "learning_rate": 4.37086172358878e-05, "loss": 0.5669, "step": 547500 }, { "epoch": 5.58, "learning_rate": 4.3702716378774473e-05, "loss": 0.6388, "step": 547600 }, { "epoch": 5.58, "learning_rate": 4.3696814851680085e-05, "loss": 0.5884, "step": 547700 }, { "epoch": 5.58, "learning_rate": 4.369091265489319e-05, "loss": 0.5899, "step": 547800 }, { "epoch": 5.58, "learning_rate": 4.3685009788702366e-05, "loss": 0.61, "step": 547900 }, { "epoch": 5.58, "learning_rate": 4.367910625339624e-05, "loss": 0.5401, "step": 548000 }, { "epoch": 5.58, "learning_rate": 4.367320204926345e-05, "loss": 0.6246, "step": 548100 }, { "epoch": 5.59, "learning_rate": 4.366729717659267e-05, "loss": 0.6131, "step": 548200 }, { "epoch": 5.59, "learning_rate": 4.366139163567262e-05, "loss": 0.6377, "step": 548300 }, { "epoch": 5.59, "learning_rate": 4.365548542679205e-05, "loss": 0.5178, "step": 548400 }, { "epoch": 5.59, "learning_rate": 4.364957855023974e-05, "loss": 0.6348, "step": 548500 }, { "epoch": 5.59, "learning_rate": 4.364367100630449e-05, "loss": 0.5241, "step": 548600 }, { "epoch": 5.59, "learning_rate": 4.363776279527514e-05, "loss": 0.552, "step": 548700 }, { "epoch": 5.59, "learning_rate": 4.363185391744059e-05, "loss": 0.5183, "step": 548800 }, { "epoch": 5.59, "learning_rate": 4.362594437308973e-05, "loss": 0.5679, "step": 548900 }, { "epoch": 5.59, "learning_rate": 4.3620034162511496e-05, "loss": 0.5126, "step": 549000 }, { "epoch": 5.59, "learning_rate": 4.361412328599488e-05, "loss": 0.5286, "step": 549100 }, { "epoch": 5.6, "learning_rate": 4.360821174382888e-05, "loss": 0.6341, "step": 549200 }, { "epoch": 5.6, "learning_rate": 4.360229953630253e-05, "loss": 0.6589, "step": 549300 }, { "epoch": 5.6, "learning_rate": 4.3596386663704914e-05, "loss": 0.5656, "step": 549400 }, { "epoch": 5.6, "learning_rate": 4.3590473126325127e-05, "loss": 0.6225, "step": 549500 }, { "epoch": 5.6, "learning_rate": 4.35845589244523e-05, "loss": 0.6008, "step": 549600 }, { "epoch": 5.6, "learning_rate": 4.3578644058375624e-05, "loss": 0.6276, "step": 549700 }, { "epoch": 5.6, "learning_rate": 4.3572728528384276e-05, "loss": 0.6903, "step": 549800 }, { "epoch": 5.6, "learning_rate": 4.356687149998767e-05, "loss": 0.627, "step": 549900 }, { "epoch": 5.6, "learning_rate": 4.3560954649666674e-05, "loss": 0.5396, "step": 550000 }, { "epoch": 5.6, "learning_rate": 4.355503713629591e-05, "loss": 0.5334, "step": 550100 }, { "epoch": 5.61, "learning_rate": 4.354911896016473e-05, "loss": 0.5482, "step": 550200 }, { "epoch": 5.61, "learning_rate": 4.354320012156248e-05, "loss": 0.5601, "step": 550300 }, { "epoch": 5.61, "learning_rate": 4.353728062077858e-05, "loss": 0.6359, "step": 550400 }, { "epoch": 5.61, "learning_rate": 4.353136045810244e-05, "loss": 0.4822, "step": 550500 }, { "epoch": 5.61, "learning_rate": 4.352543963382352e-05, "loss": 0.6395, "step": 550600 }, { "epoch": 5.61, "learning_rate": 4.3519518148231326e-05, "loss": 0.6078, "step": 550700 }, { "epoch": 5.61, "learning_rate": 4.3513596001615365e-05, "loss": 0.6139, "step": 550800 }, { "epoch": 5.61, "learning_rate": 4.350767319426521e-05, "loss": 0.5345, "step": 550900 }, { "epoch": 5.61, "learning_rate": 4.350174972647044e-05, "loss": 0.5326, "step": 551000 }, { "epoch": 5.61, "learning_rate": 4.349582559852069e-05, "loss": 0.5607, "step": 551100 }, { "epoch": 5.62, "learning_rate": 4.34899008107056e-05, "loss": 0.6106, "step": 551200 }, { "epoch": 5.62, "learning_rate": 4.348397536331488e-05, "loss": 0.6167, "step": 551300 }, { "epoch": 5.62, "learning_rate": 4.347804925663821e-05, "loss": 0.6763, "step": 551400 }, { "epoch": 5.62, "learning_rate": 4.3472122490965375e-05, "loss": 0.55, "step": 551500 }, { "epoch": 5.62, "learning_rate": 4.346619506658615e-05, "loss": 0.5514, "step": 551600 }, { "epoch": 5.62, "learning_rate": 4.346026698379034e-05, "loss": 0.5638, "step": 551700 }, { "epoch": 5.62, "learning_rate": 4.3454338242867804e-05, "loss": 0.5867, "step": 551800 }, { "epoch": 5.62, "learning_rate": 4.3448408844108425e-05, "loss": 0.6505, "step": 551900 }, { "epoch": 5.62, "learning_rate": 4.344247878780211e-05, "loss": 0.671, "step": 552000 }, { "epoch": 5.62, "learning_rate": 4.3436548074238793e-05, "loss": 0.5807, "step": 552100 }, { "epoch": 5.63, "learning_rate": 4.343061670370847e-05, "loss": 0.5581, "step": 552200 }, { "epoch": 5.63, "learning_rate": 4.342468467650114e-05, "loss": 0.589, "step": 552300 }, { "epoch": 5.63, "learning_rate": 4.341875199290684e-05, "loss": 0.6767, "step": 552400 }, { "epoch": 5.63, "learning_rate": 4.341281865321564e-05, "loss": 0.5379, "step": 552500 }, { "epoch": 5.63, "learning_rate": 4.340688465771766e-05, "loss": 0.6394, "step": 552600 }, { "epoch": 5.63, "learning_rate": 4.340095000670303e-05, "loss": 0.4846, "step": 552700 }, { "epoch": 5.63, "learning_rate": 4.3395014700461904e-05, "loss": 0.5949, "step": 552800 }, { "epoch": 5.63, "learning_rate": 4.338907873928451e-05, "loss": 0.725, "step": 552900 }, { "epoch": 5.63, "learning_rate": 4.338320149285884e-05, "loss": 0.568, "step": 553000 }, { "epoch": 5.64, "learning_rate": 4.3377264229221736e-05, "loss": 0.5683, "step": 553100 }, { "epoch": 5.64, "learning_rate": 4.3371326311516235e-05, "loss": 0.5702, "step": 553200 }, { "epoch": 5.64, "learning_rate": 4.336538774003268e-05, "loss": 0.6097, "step": 553300 }, { "epoch": 5.64, "learning_rate": 4.335944851506141e-05, "loss": 0.6066, "step": 553400 }, { "epoch": 5.64, "learning_rate": 4.335350863689285e-05, "loss": 0.6705, "step": 553500 }, { "epoch": 5.64, "learning_rate": 4.33475681058174e-05, "loss": 0.5738, "step": 553600 }, { "epoch": 5.64, "learning_rate": 4.3341626922125525e-05, "loss": 0.5466, "step": 553700 }, { "epoch": 5.64, "learning_rate": 4.333568508610771e-05, "loss": 0.4833, "step": 553800 }, { "epoch": 5.64, "learning_rate": 4.332974259805448e-05, "loss": 0.5848, "step": 553900 }, { "epoch": 5.64, "learning_rate": 4.3323858892879546e-05, "loss": 0.61, "step": 554000 }, { "epoch": 5.65, "learning_rate": 4.331791510814028e-05, "loss": 0.6415, "step": 554100 }, { "epoch": 5.65, "learning_rate": 4.331197067223442e-05, "loss": 0.5616, "step": 554200 }, { "epoch": 5.65, "learning_rate": 4.3306025585452656e-05, "loss": 0.6331, "step": 554300 }, { "epoch": 5.65, "learning_rate": 4.3300079848085645e-05, "loss": 0.5442, "step": 554400 }, { "epoch": 5.65, "learning_rate": 4.3294133460424104e-05, "loss": 0.6466, "step": 554500 }, { "epoch": 5.65, "learning_rate": 4.328818642275876e-05, "loss": 0.5342, "step": 554600 }, { "epoch": 5.65, "learning_rate": 4.3282238735380406e-05, "loss": 0.6022, "step": 554700 }, { "epoch": 5.65, "learning_rate": 4.327629039857984e-05, "loss": 0.6032, "step": 554800 }, { "epoch": 5.65, "learning_rate": 4.32703414126479e-05, "loss": 0.6293, "step": 554900 }, { "epoch": 5.65, "learning_rate": 4.326439177787546e-05, "loss": 0.5969, "step": 555000 }, { "epoch": 5.66, "learning_rate": 4.3258441494553425e-05, "loss": 0.556, "step": 555100 }, { "epoch": 5.66, "learning_rate": 4.3252490562972716e-05, "loss": 0.6045, "step": 555200 }, { "epoch": 5.66, "learning_rate": 4.324653898342431e-05, "loss": 0.5206, "step": 555300 }, { "epoch": 5.66, "learning_rate": 4.3240586756199196e-05, "loss": 0.5951, "step": 555400 }, { "epoch": 5.66, "learning_rate": 4.3234633881588406e-05, "loss": 0.6341, "step": 555500 }, { "epoch": 5.66, "learning_rate": 4.3228680359883e-05, "loss": 0.5919, "step": 555600 }, { "epoch": 5.66, "learning_rate": 4.322272619137407e-05, "loss": 0.7124, "step": 555700 }, { "epoch": 5.66, "learning_rate": 4.321677137635274e-05, "loss": 0.6143, "step": 555800 }, { "epoch": 5.66, "learning_rate": 4.321081591511016e-05, "loss": 0.6462, "step": 555900 }, { "epoch": 5.66, "learning_rate": 4.320485980793752e-05, "loss": 0.5808, "step": 556000 }, { "epoch": 5.67, "learning_rate": 4.3198903055126035e-05, "loss": 0.6107, "step": 556100 }, { "epoch": 5.67, "learning_rate": 4.319294565696696e-05, "loss": 0.6008, "step": 556200 }, { "epoch": 5.67, "learning_rate": 4.318698761375158e-05, "loss": 0.5931, "step": 556300 }, { "epoch": 5.67, "learning_rate": 4.31810289257712e-05, "loss": 0.6418, "step": 556400 }, { "epoch": 5.67, "learning_rate": 4.317506959331716e-05, "loss": 0.4871, "step": 556500 }, { "epoch": 5.67, "learning_rate": 4.3169169219634954e-05, "loss": 0.6527, "step": 556600 }, { "epoch": 5.67, "learning_rate": 4.3163208605545225e-05, "loss": 0.6247, "step": 556700 }, { "epoch": 5.67, "learning_rate": 4.315724734785316e-05, "loss": 0.6339, "step": 556800 }, { "epoch": 5.67, "learning_rate": 4.315128544685021e-05, "loss": 0.5041, "step": 556900 }, { "epoch": 5.67, "learning_rate": 4.314532290282789e-05, "loss": 0.4832, "step": 557000 }, { "epoch": 5.68, "learning_rate": 4.3139359716077734e-05, "loss": 0.5883, "step": 557100 }, { "epoch": 5.68, "learning_rate": 4.313339588689129e-05, "loss": 0.6008, "step": 557200 }, { "epoch": 5.68, "learning_rate": 4.3127431415560166e-05, "loss": 0.5565, "step": 557300 }, { "epoch": 5.68, "learning_rate": 4.312146630237598e-05, "loss": 0.6162, "step": 557400 }, { "epoch": 5.68, "learning_rate": 4.31155005476304e-05, "loss": 0.4892, "step": 557500 }, { "epoch": 5.68, "learning_rate": 4.31095938187486e-05, "loss": 0.6597, "step": 557600 }, { "epoch": 5.68, "learning_rate": 4.310362678816366e-05, "loss": 0.6182, "step": 557700 }, { "epoch": 5.68, "learning_rate": 4.309765911688957e-05, "loss": 0.6434, "step": 557800 }, { "epoch": 5.68, "learning_rate": 4.309169080521812e-05, "loss": 0.6423, "step": 557900 }, { "epoch": 5.69, "learning_rate": 4.308572185344111e-05, "loss": 0.5865, "step": 558000 }, { "epoch": 5.69, "learning_rate": 4.3079752261850385e-05, "loss": 0.6173, "step": 558100 }, { "epoch": 5.69, "learning_rate": 4.307378203073784e-05, "loss": 0.5951, "step": 558200 }, { "epoch": 5.69, "learning_rate": 4.3067811160395377e-05, "loss": 0.5642, "step": 558300 }, { "epoch": 5.69, "learning_rate": 4.3061839651114926e-05, "loss": 0.6104, "step": 558400 }, { "epoch": 5.69, "learning_rate": 4.305586750318847e-05, "loss": 0.752, "step": 558500 }, { "epoch": 5.69, "learning_rate": 4.3049894716908005e-05, "loss": 0.5335, "step": 558600 }, { "epoch": 5.69, "learning_rate": 4.304392129256556e-05, "loss": 0.611, "step": 558700 }, { "epoch": 5.69, "learning_rate": 4.3037947230453215e-05, "loss": 0.6107, "step": 558800 }, { "epoch": 5.69, "learning_rate": 4.3031972530863054e-05, "loss": 0.6909, "step": 558900 }, { "epoch": 5.7, "learning_rate": 4.302599719408721e-05, "loss": 0.5326, "step": 559000 }, { "epoch": 5.7, "learning_rate": 4.302002122041783e-05, "loss": 0.6143, "step": 559100 }, { "epoch": 5.7, "learning_rate": 4.301404461014712e-05, "loss": 0.5387, "step": 559200 }, { "epoch": 5.7, "learning_rate": 4.300806736356728e-05, "loss": 0.5312, "step": 559300 }, { "epoch": 5.7, "learning_rate": 4.300208948097058e-05, "loss": 0.6137, "step": 559400 }, { "epoch": 5.7, "learning_rate": 4.2996110962649306e-05, "loss": 0.7067, "step": 559500 }, { "epoch": 5.7, "learning_rate": 4.2990131808895755e-05, "loss": 0.6202, "step": 559600 }, { "epoch": 5.7, "learning_rate": 4.298415202000228e-05, "loss": 0.6079, "step": 559700 }, { "epoch": 5.7, "learning_rate": 4.2978171596261256e-05, "loss": 0.5986, "step": 559800 }, { "epoch": 5.7, "learning_rate": 4.297219053796509e-05, "loss": 0.6878, "step": 559900 }, { "epoch": 5.71, "learning_rate": 4.296620884540622e-05, "loss": 0.6901, "step": 560000 }, { "epoch": 5.71, "learning_rate": 4.2960226518877124e-05, "loss": 0.5864, "step": 560100 }, { "epoch": 5.71, "learning_rate": 4.2954303391408095e-05, "loss": 0.5956, "step": 560200 }, { "epoch": 5.71, "learning_rate": 4.2948319804148464e-05, "loss": 0.5423, "step": 560300 }, { "epoch": 5.71, "learning_rate": 4.294233558379326e-05, "loss": 0.7795, "step": 560400 }, { "epoch": 5.71, "learning_rate": 4.293635073063508e-05, "loss": 0.6248, "step": 560500 }, { "epoch": 5.71, "learning_rate": 4.293036524496655e-05, "loss": 0.6419, "step": 560600 }, { "epoch": 5.71, "learning_rate": 4.292437912708033e-05, "loss": 0.6114, "step": 560700 }, { "epoch": 5.71, "learning_rate": 4.2918392377269095e-05, "loss": 0.5693, "step": 560800 }, { "epoch": 5.71, "learning_rate": 4.291240499582557e-05, "loss": 0.613, "step": 560900 }, { "epoch": 5.72, "learning_rate": 4.2906416983042495e-05, "loss": 0.6145, "step": 561000 }, { "epoch": 5.72, "learning_rate": 4.290042833921265e-05, "loss": 0.6099, "step": 561100 }, { "epoch": 5.72, "learning_rate": 4.2894439064628845e-05, "loss": 0.6162, "step": 561200 }, { "epoch": 5.72, "learning_rate": 4.288844915958392e-05, "loss": 0.5448, "step": 561300 }, { "epoch": 5.72, "learning_rate": 4.288245862437075e-05, "loss": 0.6628, "step": 561400 }, { "epoch": 5.72, "learning_rate": 4.287646745928223e-05, "loss": 0.6489, "step": 561500 }, { "epoch": 5.72, "learning_rate": 4.2870475664611284e-05, "loss": 0.5763, "step": 561600 }, { "epoch": 5.72, "learning_rate": 4.28644832406509e-05, "loss": 0.5843, "step": 561700 }, { "epoch": 5.72, "learning_rate": 4.285849018769405e-05, "loss": 0.5917, "step": 561800 }, { "epoch": 5.72, "learning_rate": 4.285249650603375e-05, "loss": 0.595, "step": 561900 }, { "epoch": 5.73, "learning_rate": 4.2846562142173464e-05, "loss": 0.7103, "step": 562000 }, { "epoch": 5.73, "learning_rate": 4.284056721026523e-05, "loss": 0.6344, "step": 562100 }, { "epoch": 5.73, "learning_rate": 4.283457165052989e-05, "loss": 0.6899, "step": 562200 }, { "epoch": 5.73, "learning_rate": 4.282857546326058e-05, "loss": 0.6086, "step": 562300 }, { "epoch": 5.73, "learning_rate": 4.282257864875049e-05, "loss": 0.4851, "step": 562400 }, { "epoch": 5.73, "learning_rate": 4.281658120729283e-05, "loss": 0.5682, "step": 562500 }, { "epoch": 5.73, "learning_rate": 4.281058313918083e-05, "loss": 0.6163, "step": 562600 }, { "epoch": 5.73, "learning_rate": 4.2804584444707764e-05, "loss": 0.6508, "step": 562700 }, { "epoch": 5.73, "learning_rate": 4.279858512416693e-05, "loss": 0.6689, "step": 562800 }, { "epoch": 5.73, "learning_rate": 4.2792585177851664e-05, "loss": 0.6754, "step": 562900 }, { "epoch": 5.74, "learning_rate": 4.278658460605533e-05, "loss": 0.5285, "step": 563000 }, { "epoch": 5.74, "learning_rate": 4.278058340907131e-05, "loss": 0.5393, "step": 563100 }, { "epoch": 5.74, "learning_rate": 4.277458158719303e-05, "loss": 0.6492, "step": 563200 }, { "epoch": 5.74, "learning_rate": 4.276857914071395e-05, "loss": 0.6301, "step": 563300 }, { "epoch": 5.74, "learning_rate": 4.276257606992754e-05, "loss": 0.5828, "step": 563400 }, { "epoch": 5.74, "learning_rate": 4.275657237512734e-05, "loss": 0.5016, "step": 563500 }, { "epoch": 5.74, "learning_rate": 4.275056805660686e-05, "loss": 0.5719, "step": 563600 }, { "epoch": 5.74, "learning_rate": 4.27445631146597e-05, "loss": 0.6831, "step": 563700 }, { "epoch": 5.74, "learning_rate": 4.273855754957946e-05, "loss": 0.5659, "step": 563800 }, { "epoch": 5.75, "learning_rate": 4.273255136165978e-05, "loss": 0.6473, "step": 563900 }, { "epoch": 5.75, "learning_rate": 4.272654455119432e-05, "loss": 0.667, "step": 564000 }, { "epoch": 5.75, "learning_rate": 4.272053711847678e-05, "loss": 0.7306, "step": 564100 }, { "epoch": 5.75, "learning_rate": 4.2714529063800885e-05, "loss": 0.582, "step": 564200 }, { "epoch": 5.75, "learning_rate": 4.2708520387460396e-05, "loss": 0.6059, "step": 564300 }, { "epoch": 5.75, "learning_rate": 4.2702511089749094e-05, "loss": 0.6016, "step": 564400 }, { "epoch": 5.75, "learning_rate": 4.269650117096082e-05, "loss": 0.6232, "step": 564500 }, { "epoch": 5.75, "learning_rate": 4.269049063138939e-05, "loss": 0.6248, "step": 564600 }, { "epoch": 5.75, "learning_rate": 4.268447947132871e-05, "loss": 0.6167, "step": 564700 }, { "epoch": 5.75, "learning_rate": 4.267846769107268e-05, "loss": 0.5898, "step": 564800 }, { "epoch": 5.76, "learning_rate": 4.267245529091524e-05, "loss": 0.6361, "step": 564900 }, { "epoch": 5.76, "learning_rate": 4.266644227115036e-05, "loss": 0.5917, "step": 565000 }, { "epoch": 5.76, "learning_rate": 4.2660428632072044e-05, "loss": 0.6089, "step": 565100 }, { "epoch": 5.76, "learning_rate": 4.2654414373974324e-05, "loss": 0.6353, "step": 565200 }, { "epoch": 5.76, "learning_rate": 4.264839949715125e-05, "loss": 0.5883, "step": 565300 }, { "epoch": 5.76, "learning_rate": 4.2642384001896933e-05, "loss": 0.5892, "step": 565400 }, { "epoch": 5.76, "learning_rate": 4.263636788850548e-05, "loss": 0.4948, "step": 565500 }, { "epoch": 5.76, "learning_rate": 4.263035115727105e-05, "loss": 0.5148, "step": 565600 }, { "epoch": 5.76, "learning_rate": 4.262433380848781e-05, "loss": 0.5126, "step": 565700 }, { "epoch": 5.76, "learning_rate": 4.261831584245e-05, "loss": 0.6142, "step": 565800 }, { "epoch": 5.77, "learning_rate": 4.261229725945185e-05, "loss": 0.5729, "step": 565900 }, { "epoch": 5.77, "learning_rate": 4.260627805978762e-05, "loss": 0.5198, "step": 566000 }, { "epoch": 5.77, "learning_rate": 4.2600258243751624e-05, "loss": 0.5868, "step": 566100 }, { "epoch": 5.77, "learning_rate": 4.259423781163819e-05, "loss": 0.6401, "step": 566200 }, { "epoch": 5.77, "learning_rate": 4.258821676374169e-05, "loss": 0.5718, "step": 566300 }, { "epoch": 5.77, "learning_rate": 4.258219510035651e-05, "loss": 0.6175, "step": 566400 }, { "epoch": 5.77, "learning_rate": 4.2576172821777085e-05, "loss": 0.5458, "step": 566500 }, { "epoch": 5.77, "learning_rate": 4.257014992829785e-05, "loss": 0.5564, "step": 566600 }, { "epoch": 5.77, "learning_rate": 4.256412642021331e-05, "loss": 0.6289, "step": 566700 }, { "epoch": 5.77, "learning_rate": 4.255810229781796e-05, "loss": 0.5675, "step": 566800 }, { "epoch": 5.78, "learning_rate": 4.255207756140635e-05, "loss": 0.5231, "step": 566900 }, { "epoch": 5.78, "learning_rate": 4.254605221127305e-05, "loss": 0.4503, "step": 567000 }, { "epoch": 5.78, "learning_rate": 4.254002624771267e-05, "loss": 0.5503, "step": 567100 }, { "epoch": 5.78, "learning_rate": 4.253399967101984e-05, "loss": 0.6196, "step": 567200 }, { "epoch": 5.78, "learning_rate": 4.252797248148924e-05, "loss": 0.6626, "step": 567300 }, { "epoch": 5.78, "learning_rate": 4.252194467941553e-05, "loss": 0.4948, "step": 567400 }, { "epoch": 5.78, "learning_rate": 4.251591626509346e-05, "loss": 0.6289, "step": 567500 }, { "epoch": 5.78, "learning_rate": 4.250988723881778e-05, "loss": 0.5575, "step": 567600 }, { "epoch": 5.78, "learning_rate": 4.2503857600883256e-05, "loss": 0.6365, "step": 567700 }, { "epoch": 5.78, "learning_rate": 4.249782735158473e-05, "loss": 0.6386, "step": 567800 }, { "epoch": 5.79, "learning_rate": 4.249179649121702e-05, "loss": 0.5913, "step": 567900 }, { "epoch": 5.79, "learning_rate": 4.2485765020075016e-05, "loss": 0.6071, "step": 568000 }, { "epoch": 5.79, "learning_rate": 4.2479732938453606e-05, "loss": 0.6043, "step": 568100 }, { "epoch": 5.79, "learning_rate": 4.2473700246647734e-05, "loss": 0.5723, "step": 568200 }, { "epoch": 5.79, "learning_rate": 4.2467666944952355e-05, "loss": 0.5994, "step": 568300 }, { "epoch": 5.79, "learning_rate": 4.2461693375791894e-05, "loss": 0.6128, "step": 568400 }, { "epoch": 5.79, "learning_rate": 4.2455658861294054e-05, "loss": 0.5928, "step": 568500 }, { "epoch": 5.79, "learning_rate": 4.244962373778883e-05, "loss": 0.6408, "step": 568600 }, { "epoch": 5.79, "learning_rate": 4.24435880055713e-05, "loss": 0.6415, "step": 568700 }, { "epoch": 5.8, "learning_rate": 4.243755166493657e-05, "loss": 0.5418, "step": 568800 }, { "epoch": 5.8, "learning_rate": 4.243151471617979e-05, "loss": 0.5966, "step": 568900 }, { "epoch": 5.8, "learning_rate": 4.2425477159596124e-05, "loss": 0.6199, "step": 569000 }, { "epoch": 5.8, "learning_rate": 4.2419438995480784e-05, "loss": 0.4995, "step": 569100 }, { "epoch": 5.8, "learning_rate": 4.2413400224129e-05, "loss": 0.7143, "step": 569200 }, { "epoch": 5.8, "learning_rate": 4.2407360845836e-05, "loss": 0.6713, "step": 569300 }, { "epoch": 5.8, "learning_rate": 4.240132086089712e-05, "loss": 0.5714, "step": 569400 }, { "epoch": 5.8, "learning_rate": 4.239528026960765e-05, "loss": 0.5984, "step": 569500 }, { "epoch": 5.8, "learning_rate": 4.238923907226294e-05, "loss": 0.5193, "step": 569600 }, { "epoch": 5.8, "learning_rate": 4.238319726915838e-05, "loss": 0.6532, "step": 569700 }, { "epoch": 5.81, "learning_rate": 4.237715486058937e-05, "loss": 0.6755, "step": 569800 }, { "epoch": 5.81, "learning_rate": 4.237111184685136e-05, "loss": 0.6455, "step": 569900 }, { "epoch": 5.81, "learning_rate": 4.23650682282398e-05, "loss": 0.65, "step": 570000 }, { "epoch": 5.81, "learning_rate": 4.235902400505019e-05, "loss": 0.5548, "step": 570100 }, { "epoch": 5.81, "learning_rate": 4.235297917757807e-05, "loss": 0.5646, "step": 570200 }, { "epoch": 5.81, "learning_rate": 4.234693374611897e-05, "loss": 0.5424, "step": 570300 }, { "epoch": 5.81, "learning_rate": 4.234088771096851e-05, "loss": 0.6472, "step": 570400 }, { "epoch": 5.81, "learning_rate": 4.2334841072422285e-05, "loss": 0.6008, "step": 570500 }, { "epoch": 5.81, "learning_rate": 4.232879383077594e-05, "loss": 0.6287, "step": 570600 }, { "epoch": 5.81, "learning_rate": 4.2322745986325154e-05, "loss": 0.6418, "step": 570700 }, { "epoch": 5.82, "learning_rate": 4.2316697539365616e-05, "loss": 0.6768, "step": 570800 }, { "epoch": 5.82, "learning_rate": 4.2310648490193083e-05, "loss": 0.5594, "step": 570900 }, { "epoch": 5.82, "learning_rate": 4.23045988391033e-05, "loss": 0.596, "step": 571000 }, { "epoch": 5.82, "learning_rate": 4.2298548586392066e-05, "loss": 0.6538, "step": 571100 }, { "epoch": 5.82, "learning_rate": 4.22924977323552e-05, "loss": 0.546, "step": 571200 }, { "epoch": 5.82, "learning_rate": 4.228644627728855e-05, "loss": 0.6101, "step": 571300 }, { "epoch": 5.82, "learning_rate": 4.2280394221487995e-05, "loss": 0.5492, "step": 571400 }, { "epoch": 5.82, "learning_rate": 4.2274341565249455e-05, "loss": 0.6174, "step": 571500 }, { "epoch": 5.82, "learning_rate": 4.226828830886886e-05, "loss": 0.5446, "step": 571600 }, { "epoch": 5.82, "learning_rate": 4.226223445264218e-05, "loss": 0.593, "step": 571700 }, { "epoch": 5.83, "learning_rate": 4.225617999686541e-05, "loss": 0.4871, "step": 571800 }, { "epoch": 5.83, "learning_rate": 4.225012494183459e-05, "loss": 0.6361, "step": 571900 }, { "epoch": 5.83, "learning_rate": 4.224406928784575e-05, "loss": 0.6074, "step": 572000 }, { "epoch": 5.83, "learning_rate": 4.2238013035195e-05, "loss": 0.576, "step": 572100 }, { "epoch": 5.83, "learning_rate": 4.2231956184178435e-05, "loss": 0.5063, "step": 572200 }, { "epoch": 5.83, "learning_rate": 4.2225898735092223e-05, "loss": 0.496, "step": 572300 }, { "epoch": 5.83, "learning_rate": 4.221984068823252e-05, "loss": 0.5012, "step": 572400 }, { "epoch": 5.83, "learning_rate": 4.221378204389552e-05, "loss": 0.5535, "step": 572500 }, { "epoch": 5.83, "learning_rate": 4.220778339774773e-05, "loss": 0.5549, "step": 572600 }, { "epoch": 5.83, "learning_rate": 4.220172356531228e-05, "loss": 0.4943, "step": 572700 }, { "epoch": 5.84, "learning_rate": 4.219566313628535e-05, "loss": 0.5722, "step": 572800 }, { "epoch": 5.84, "learning_rate": 4.218960211096328e-05, "loss": 0.5007, "step": 572900 }, { "epoch": 5.84, "learning_rate": 4.218354048964241e-05, "loss": 0.5579, "step": 573000 }, { "epoch": 5.84, "learning_rate": 4.217747827261911e-05, "loss": 0.51, "step": 573100 }, { "epoch": 5.84, "learning_rate": 4.217141546018981e-05, "loss": 0.5708, "step": 573200 }, { "epoch": 5.84, "learning_rate": 4.216535205265092e-05, "loss": 0.5793, "step": 573300 }, { "epoch": 5.84, "learning_rate": 4.2159288050298905e-05, "loss": 0.5643, "step": 573400 }, { "epoch": 5.84, "learning_rate": 4.2153223453430273e-05, "loss": 0.5648, "step": 573500 }, { "epoch": 5.84, "learning_rate": 4.2147218917192853e-05, "loss": 0.6121, "step": 573600 }, { "epoch": 5.84, "learning_rate": 4.214115313811835e-05, "loss": 0.632, "step": 573700 }, { "epoch": 5.85, "learning_rate": 4.21350867654139e-05, "loss": 0.5347, "step": 573800 }, { "epoch": 5.85, "learning_rate": 4.212901979937613e-05, "loss": 0.614, "step": 573900 }, { "epoch": 5.85, "learning_rate": 4.212295224030168e-05, "loss": 0.5957, "step": 574000 }, { "epoch": 5.85, "learning_rate": 4.211688408848721e-05, "loss": 0.5692, "step": 574100 }, { "epoch": 5.85, "learning_rate": 4.2110815344229403e-05, "loss": 0.5393, "step": 574200 }, { "epoch": 5.85, "learning_rate": 4.210474600782501e-05, "loss": 0.5866, "step": 574300 }, { "epoch": 5.85, "learning_rate": 4.209867607957078e-05, "loss": 0.5357, "step": 574400 }, { "epoch": 5.85, "learning_rate": 4.209260555976349e-05, "loss": 0.5652, "step": 574500 }, { "epoch": 5.85, "learning_rate": 4.2086534448699946e-05, "loss": 0.5762, "step": 574600 }, { "epoch": 5.86, "learning_rate": 4.2080462746677e-05, "loss": 0.5948, "step": 574700 }, { "epoch": 5.86, "learning_rate": 4.207439045399152e-05, "loss": 0.5051, "step": 574800 }, { "epoch": 5.86, "learning_rate": 4.2068317570940395e-05, "loss": 0.558, "step": 574900 }, { "epoch": 5.86, "learning_rate": 4.206224409782057e-05, "loss": 0.5288, "step": 575000 }, { "epoch": 5.86, "learning_rate": 4.205617003492898e-05, "loss": 0.5618, "step": 575100 }, { "epoch": 5.86, "learning_rate": 4.205009538256264e-05, "loss": 0.4772, "step": 575200 }, { "epoch": 5.86, "learning_rate": 4.204402014101854e-05, "loss": 0.547, "step": 575300 }, { "epoch": 5.86, "learning_rate": 4.203794431059373e-05, "loss": 0.5448, "step": 575400 }, { "epoch": 5.86, "learning_rate": 4.203186789158527e-05, "loss": 0.5888, "step": 575500 }, { "epoch": 5.86, "learning_rate": 4.202579088429029e-05, "loss": 0.6193, "step": 575600 }, { "epoch": 5.87, "learning_rate": 4.201971328900589e-05, "loss": 0.5517, "step": 575700 }, { "epoch": 5.87, "learning_rate": 4.201363510602925e-05, "loss": 0.601, "step": 575800 }, { "epoch": 5.87, "learning_rate": 4.200755633565754e-05, "loss": 0.58, "step": 575900 }, { "epoch": 5.87, "learning_rate": 4.2001476978187985e-05, "loss": 0.6291, "step": 576000 }, { "epoch": 5.87, "learning_rate": 4.199539703391782e-05, "loss": 0.6389, "step": 576100 }, { "epoch": 5.87, "learning_rate": 4.198931650314433e-05, "loss": 0.5835, "step": 576200 }, { "epoch": 5.87, "learning_rate": 4.198323538616482e-05, "loss": 0.6335, "step": 576300 }, { "epoch": 5.87, "learning_rate": 4.19771536832766e-05, "loss": 0.5073, "step": 576400 }, { "epoch": 5.87, "learning_rate": 4.197107139477705e-05, "loss": 0.6009, "step": 576500 }, { "epoch": 5.87, "learning_rate": 4.196498852096354e-05, "loss": 0.4934, "step": 576600 }, { "epoch": 5.88, "learning_rate": 4.19589050621335e-05, "loss": 0.6401, "step": 576700 }, { "epoch": 5.88, "learning_rate": 4.1952821018584364e-05, "loss": 0.5703, "step": 576800 }, { "epoch": 5.88, "learning_rate": 4.1946736390613616e-05, "loss": 0.538, "step": 576900 }, { "epoch": 5.88, "learning_rate": 4.194065117851875e-05, "loss": 0.6146, "step": 577000 }, { "epoch": 5.88, "learning_rate": 4.19345653825973e-05, "loss": 0.5697, "step": 577100 }, { "epoch": 5.88, "learning_rate": 4.1928479003146835e-05, "loss": 0.588, "step": 577200 }, { "epoch": 5.88, "learning_rate": 4.192239204046492e-05, "loss": 0.5262, "step": 577300 }, { "epoch": 5.88, "learning_rate": 4.1916304494849185e-05, "loss": 0.5905, "step": 577400 }, { "epoch": 5.88, "learning_rate": 4.191021636659728e-05, "loss": 0.5116, "step": 577500 }, { "epoch": 5.88, "learning_rate": 4.1904127656006856e-05, "loss": 0.5436, "step": 577600 }, { "epoch": 5.89, "learning_rate": 4.1898099259182075e-05, "loss": 0.5829, "step": 577700 }, { "epoch": 5.89, "learning_rate": 4.189200939062374e-05, "loss": 0.5838, "step": 577800 }, { "epoch": 5.89, "learning_rate": 4.1885918940617126e-05, "loss": 0.6344, "step": 577900 }, { "epoch": 5.89, "learning_rate": 4.187982790945999e-05, "loss": 0.6218, "step": 578000 }, { "epoch": 5.89, "learning_rate": 4.187373629745017e-05, "loss": 0.6308, "step": 578100 }, { "epoch": 5.89, "learning_rate": 4.186764410488551e-05, "loss": 0.5946, "step": 578200 }, { "epoch": 5.89, "learning_rate": 4.186155133206386e-05, "loss": 0.652, "step": 578300 }, { "epoch": 5.89, "learning_rate": 4.185545797928316e-05, "loss": 0.6009, "step": 578400 }, { "epoch": 5.89, "learning_rate": 4.18493640468413e-05, "loss": 0.5138, "step": 578500 }, { "epoch": 5.89, "learning_rate": 4.184326953503625e-05, "loss": 0.6374, "step": 578600 }, { "epoch": 5.9, "learning_rate": 4.1837174444165994e-05, "loss": 0.6108, "step": 578700 }, { "epoch": 5.9, "learning_rate": 4.183107877452857e-05, "loss": 0.5198, "step": 578800 }, { "epoch": 5.9, "learning_rate": 4.182504349176548e-05, "loss": 0.5569, "step": 578900 }, { "epoch": 5.9, "learning_rate": 4.181894667126805e-05, "loss": 0.5336, "step": 579000 }, { "epoch": 5.9, "learning_rate": 4.181284927289466e-05, "loss": 0.5381, "step": 579100 }, { "epoch": 5.9, "learning_rate": 4.180675129694344e-05, "loss": 0.6074, "step": 579200 }, { "epoch": 5.9, "learning_rate": 4.180065274371253e-05, "loss": 0.6188, "step": 579300 }, { "epoch": 5.9, "learning_rate": 4.179455361350012e-05, "loss": 0.6354, "step": 579400 }, { "epoch": 5.9, "learning_rate": 4.178845390660444e-05, "loss": 0.572, "step": 579500 }, { "epoch": 5.91, "learning_rate": 4.178235362332371e-05, "loss": 0.5296, "step": 579600 }, { "epoch": 5.91, "learning_rate": 4.17762527639562e-05, "loss": 0.4643, "step": 579700 }, { "epoch": 5.91, "learning_rate": 4.177015132880021e-05, "loss": 0.45, "step": 579800 }, { "epoch": 5.91, "learning_rate": 4.1764049318154055e-05, "loss": 0.6342, "step": 579900 }, { "epoch": 5.91, "learning_rate": 4.175794673231608e-05, "loss": 0.6484, "step": 580000 }, { "epoch": 5.91, "learning_rate": 4.17518435715847e-05, "loss": 0.5219, "step": 580100 }, { "epoch": 5.91, "learning_rate": 4.174573983625829e-05, "loss": 0.5191, "step": 580200 }, { "epoch": 5.91, "learning_rate": 4.17396355266353e-05, "loss": 0.6277, "step": 580300 }, { "epoch": 5.91, "learning_rate": 4.173353064301419e-05, "loss": 0.5578, "step": 580400 }, { "epoch": 5.91, "learning_rate": 4.172742518569345e-05, "loss": 0.5549, "step": 580500 }, { "epoch": 5.92, "learning_rate": 4.1721319154971605e-05, "loss": 0.5566, "step": 580600 }, { "epoch": 5.92, "learning_rate": 4.1715212551147204e-05, "loss": 0.5945, "step": 580700 }, { "epoch": 5.92, "learning_rate": 4.170910537451883e-05, "loss": 0.6283, "step": 580800 }, { "epoch": 5.92, "learning_rate": 4.1702997625385074e-05, "loss": 0.5688, "step": 580900 }, { "epoch": 5.92, "learning_rate": 4.169688930404457e-05, "loss": 0.6035, "step": 581000 }, { "epoch": 5.92, "learning_rate": 4.1690780410795986e-05, "loss": 0.4726, "step": 581100 }, { "epoch": 5.92, "learning_rate": 4.1684670945938e-05, "loss": 0.5327, "step": 581200 }, { "epoch": 5.92, "learning_rate": 4.167862201295804e-05, "loss": 0.6126, "step": 581300 }, { "epoch": 5.92, "learning_rate": 4.1672511411486086e-05, "loss": 0.5975, "step": 581400 }, { "epoch": 5.92, "learning_rate": 4.166640023929798e-05, "loss": 0.5948, "step": 581500 }, { "epoch": 5.93, "learning_rate": 4.166028849669253e-05, "loss": 0.6736, "step": 581600 }, { "epoch": 5.93, "learning_rate": 4.1654176183968556e-05, "loss": 0.6527, "step": 581700 }, { "epoch": 5.93, "learning_rate": 4.164806330142491e-05, "loss": 0.7274, "step": 581800 }, { "epoch": 5.93, "learning_rate": 4.164201098669927e-05, "loss": 0.5922, "step": 581900 }, { "epoch": 5.93, "learning_rate": 4.163589697110371e-05, "loss": 0.5286, "step": 582000 }, { "epoch": 5.93, "learning_rate": 4.1629782386582235e-05, "loss": 0.5594, "step": 582100 }, { "epoch": 5.93, "learning_rate": 4.16236672334338e-05, "loss": 0.6266, "step": 582200 }, { "epoch": 5.93, "learning_rate": 4.16175515119574e-05, "loss": 0.5597, "step": 582300 }, { "epoch": 5.93, "learning_rate": 4.161143522245206e-05, "loss": 0.5293, "step": 582400 }, { "epoch": 5.93, "learning_rate": 4.160531836521683e-05, "loss": 0.595, "step": 582500 }, { "epoch": 5.94, "learning_rate": 4.1599200940550785e-05, "loss": 0.4925, "step": 582600 }, { "epoch": 5.94, "learning_rate": 4.159308294875305e-05, "loss": 0.5489, "step": 582700 }, { "epoch": 5.94, "learning_rate": 4.158696439012273e-05, "loss": 0.531, "step": 582800 }, { "epoch": 5.94, "learning_rate": 4.1580845264959004e-05, "loss": 0.4915, "step": 582900 }, { "epoch": 5.94, "learning_rate": 4.157472557356106e-05, "loss": 0.6102, "step": 583000 }, { "epoch": 5.94, "learning_rate": 4.15686053162281e-05, "loss": 0.5712, "step": 583100 }, { "epoch": 5.94, "learning_rate": 4.156248449325938e-05, "loss": 0.5761, "step": 583200 }, { "epoch": 5.94, "learning_rate": 4.155636310495417e-05, "loss": 0.5345, "step": 583300 }, { "epoch": 5.94, "learning_rate": 4.155024115161177e-05, "loss": 0.5068, "step": 583400 }, { "epoch": 5.94, "learning_rate": 4.154411863353151e-05, "loss": 0.5088, "step": 583500 }, { "epoch": 5.95, "learning_rate": 4.1537995551012734e-05, "loss": 0.6086, "step": 583600 }, { "epoch": 5.95, "learning_rate": 4.153187190435484e-05, "loss": 0.5429, "step": 583700 }, { "epoch": 5.95, "learning_rate": 4.152574769385722e-05, "loss": 0.6396, "step": 583800 }, { "epoch": 5.95, "learning_rate": 4.151962291981932e-05, "loss": 0.6534, "step": 583900 }, { "epoch": 5.95, "learning_rate": 4.15134975825406e-05, "loss": 0.5257, "step": 584000 }, { "epoch": 5.95, "learning_rate": 4.150737168232057e-05, "loss": 0.5576, "step": 584100 }, { "epoch": 5.95, "learning_rate": 4.150124521945873e-05, "loss": 0.5409, "step": 584200 }, { "epoch": 5.95, "learning_rate": 4.149511819425464e-05, "loss": 0.6353, "step": 584300 }, { "epoch": 5.95, "learning_rate": 4.148899060700787e-05, "loss": 0.6322, "step": 584400 }, { "epoch": 5.95, "learning_rate": 4.148286245801801e-05, "loss": 0.4708, "step": 584500 }, { "epoch": 5.96, "learning_rate": 4.1476733747584705e-05, "loss": 0.5462, "step": 584600 }, { "epoch": 5.96, "learning_rate": 4.1470604476007614e-05, "loss": 0.6395, "step": 584700 }, { "epoch": 5.96, "learning_rate": 4.146447464358642e-05, "loss": 0.6236, "step": 584800 }, { "epoch": 5.96, "learning_rate": 4.145834425062083e-05, "loss": 0.5002, "step": 584900 }, { "epoch": 5.96, "learning_rate": 4.145221329741058e-05, "loss": 0.4929, "step": 585000 }, { "epoch": 5.96, "learning_rate": 4.144608178425545e-05, "loss": 0.5874, "step": 585100 }, { "epoch": 5.96, "learning_rate": 4.1439949711455224e-05, "loss": 0.6709, "step": 585200 }, { "epoch": 5.96, "learning_rate": 4.1433817079309724e-05, "loss": 0.6451, "step": 585300 }, { "epoch": 5.96, "learning_rate": 4.142768388811881e-05, "loss": 0.4483, "step": 585400 }, { "epoch": 5.97, "learning_rate": 4.1421550138182346e-05, "loss": 0.6493, "step": 585500 }, { "epoch": 5.97, "learning_rate": 4.141541582980024e-05, "loss": 0.5465, "step": 585600 }, { "epoch": 5.97, "learning_rate": 4.1409280963272426e-05, "loss": 0.637, "step": 585700 }, { "epoch": 5.97, "learning_rate": 4.1403145538898854e-05, "loss": 0.6471, "step": 585800 }, { "epoch": 5.97, "learning_rate": 4.1397009556979526e-05, "loss": 0.6329, "step": 585900 }, { "epoch": 5.97, "learning_rate": 4.139087301781445e-05, "loss": 0.6026, "step": 586000 }, { "epoch": 5.97, "learning_rate": 4.1384735921703644e-05, "loss": 0.5673, "step": 586100 }, { "epoch": 5.97, "learning_rate": 4.1378598268947196e-05, "loss": 0.5796, "step": 586200 }, { "epoch": 5.97, "learning_rate": 4.1372460059845197e-05, "loss": 0.4758, "step": 586300 }, { "epoch": 5.97, "learning_rate": 4.1366321294697766e-05, "loss": 0.5691, "step": 586400 }, { "epoch": 5.98, "learning_rate": 4.136018197380507e-05, "loss": 0.6423, "step": 586500 }, { "epoch": 5.98, "learning_rate": 4.135404209746725e-05, "loss": 0.4958, "step": 586600 }, { "epoch": 5.98, "learning_rate": 4.134790166598454e-05, "loss": 0.6371, "step": 586700 }, { "epoch": 5.98, "learning_rate": 4.134176067965715e-05, "loss": 0.6296, "step": 586800 }, { "epoch": 5.98, "learning_rate": 4.1335619138785355e-05, "loss": 0.5309, "step": 586900 }, { "epoch": 5.98, "learning_rate": 4.132947704366943e-05, "loss": 0.5133, "step": 587000 }, { "epoch": 5.98, "learning_rate": 4.132333439460969e-05, "loss": 0.6355, "step": 587100 }, { "epoch": 5.98, "learning_rate": 4.131719119190647e-05, "loss": 0.6336, "step": 587200 }, { "epoch": 5.98, "learning_rate": 4.1311047435860136e-05, "loss": 0.6097, "step": 587300 }, { "epoch": 5.98, "learning_rate": 4.1304903126771084e-05, "loss": 0.5597, "step": 587400 }, { "epoch": 5.99, "learning_rate": 4.129875826493974e-05, "loss": 0.5633, "step": 587500 }, { "epoch": 5.99, "learning_rate": 4.1292612850666533e-05, "loss": 0.68, "step": 587600 }, { "epoch": 5.99, "learning_rate": 4.128646688425195e-05, "loss": 0.6008, "step": 587700 }, { "epoch": 5.99, "learning_rate": 4.12803203659965e-05, "loss": 0.5101, "step": 587800 }, { "epoch": 5.99, "learning_rate": 4.12741732962007e-05, "loss": 0.4864, "step": 587900 }, { "epoch": 5.99, "learning_rate": 4.12680871541031e-05, "loss": 0.5169, "step": 588000 }, { "epoch": 5.99, "learning_rate": 4.12619389876362e-05, "loss": 0.57, "step": 588100 }, { "epoch": 5.99, "learning_rate": 4.1255790270527685e-05, "loss": 0.5923, "step": 588200 }, { "epoch": 5.99, "learning_rate": 4.12496410030782e-05, "loss": 0.5668, "step": 588300 }, { "epoch": 5.99, "learning_rate": 4.1243491185588396e-05, "loss": 0.6334, "step": 588400 }, { "epoch": 6.0, "learning_rate": 4.123734081835898e-05, "loss": 0.5487, "step": 588500 }, { "epoch": 6.0, "learning_rate": 4.1231189901690664e-05, "loss": 0.5597, "step": 588600 }, { "epoch": 6.0, "learning_rate": 4.1225038435884174e-05, "loss": 0.5646, "step": 588700 }, { "epoch": 6.0, "learning_rate": 4.1218886421240294e-05, "loss": 0.6376, "step": 588800 }, { "epoch": 6.0, "learning_rate": 4.121273385805982e-05, "loss": 0.612, "step": 588900 }, { "epoch": 6.0, "learning_rate": 4.1206580746643576e-05, "loss": 0.4853, "step": 589000 }, { "epoch": 6.0, "learning_rate": 4.120042708729241e-05, "loss": 0.5164, "step": 589100 }, { "epoch": 6.0, "learning_rate": 4.11942728803072e-05, "loss": 0.4396, "step": 589200 }, { "epoch": 6.0, "learning_rate": 4.1188118125988855e-05, "loss": 0.5545, "step": 589300 }, { "epoch": 6.0, "learning_rate": 4.11819628246383e-05, "loss": 0.5377, "step": 589400 }, { "epoch": 6.01, "learning_rate": 4.1175806976556485e-05, "loss": 0.4688, "step": 589500 }, { "epoch": 6.01, "learning_rate": 4.11696505820444e-05, "loss": 0.5219, "step": 589600 }, { "epoch": 6.01, "learning_rate": 4.116349364140307e-05, "loss": 0.6404, "step": 589700 }, { "epoch": 6.01, "learning_rate": 4.115733615493351e-05, "loss": 0.4468, "step": 589800 }, { "epoch": 6.01, "learning_rate": 4.115117812293681e-05, "loss": 0.5317, "step": 589900 }, { "epoch": 6.01, "learning_rate": 4.1145019545714046e-05, "loss": 0.5006, "step": 590000 }, { "epoch": 6.01, "learning_rate": 4.1138860423566324e-05, "loss": 0.5178, "step": 590100 }, { "epoch": 6.01, "learning_rate": 4.113270075679481e-05, "loss": 0.4846, "step": 590200 }, { "epoch": 6.01, "learning_rate": 4.1126540545700654e-05, "loss": 0.5962, "step": 590300 }, { "epoch": 6.02, "learning_rate": 4.1120379790585075e-05, "loss": 0.4703, "step": 590400 }, { "epoch": 6.02, "learning_rate": 4.111421849174928e-05, "loss": 0.5624, "step": 590500 }, { "epoch": 6.02, "learning_rate": 4.110805664949454e-05, "loss": 0.5424, "step": 590600 }, { "epoch": 6.02, "learning_rate": 4.110195589066327e-05, "loss": 0.6128, "step": 590700 }, { "epoch": 6.02, "learning_rate": 4.109579296790114e-05, "loss": 0.5603, "step": 590800 }, { "epoch": 6.02, "learning_rate": 4.108962950262094e-05, "loss": 0.5908, "step": 590900 }, { "epoch": 6.02, "learning_rate": 4.108346549512405e-05, "loss": 0.5302, "step": 591000 }, { "epoch": 6.02, "learning_rate": 4.107730094571183e-05, "loss": 0.5074, "step": 591100 }, { "epoch": 6.02, "learning_rate": 4.10711358546857e-05, "loss": 0.4752, "step": 591200 }, { "epoch": 6.02, "learning_rate": 4.10649702223471e-05, "loss": 0.4885, "step": 591300 }, { "epoch": 6.03, "learning_rate": 4.105880404899749e-05, "loss": 0.611, "step": 591400 }, { "epoch": 6.03, "learning_rate": 4.105263733493836e-05, "loss": 0.4913, "step": 591500 }, { "epoch": 6.03, "learning_rate": 4.104647008047121e-05, "loss": 0.4679, "step": 591600 }, { "epoch": 6.03, "learning_rate": 4.104030228589761e-05, "loss": 0.5721, "step": 591700 }, { "epoch": 6.03, "learning_rate": 4.10341339515191e-05, "loss": 0.5539, "step": 591800 }, { "epoch": 6.03, "learning_rate": 4.10279650776373e-05, "loss": 0.5891, "step": 591900 }, { "epoch": 6.03, "learning_rate": 4.102179566455381e-05, "loss": 0.5013, "step": 592000 }, { "epoch": 6.03, "learning_rate": 4.101568741475669e-05, "loss": 0.5112, "step": 592100 }, { "epoch": 6.03, "learning_rate": 4.10095169295593e-05, "loss": 0.6183, "step": 592200 }, { "epoch": 6.03, "learning_rate": 4.1003407618960795e-05, "loss": 0.62, "step": 592300 }, { "epoch": 6.04, "learning_rate": 4.0997236062844254e-05, "loss": 0.6609, "step": 592400 }, { "epoch": 6.04, "learning_rate": 4.099106396902851e-05, "loss": 0.5438, "step": 592500 }, { "epoch": 6.04, "learning_rate": 4.098489133781534e-05, "loss": 0.6257, "step": 592600 }, { "epoch": 6.04, "learning_rate": 4.0978718169506516e-05, "loss": 0.511, "step": 592700 }, { "epoch": 6.04, "learning_rate": 4.0972606204111065e-05, "loss": 0.6321, "step": 592800 }, { "epoch": 6.04, "learning_rate": 4.096643196787993e-05, "loss": 0.5457, "step": 592900 }, { "epoch": 6.04, "learning_rate": 4.096025719545571e-05, "loss": 0.5972, "step": 593000 }, { "epoch": 6.04, "learning_rate": 4.095408188714033e-05, "loss": 0.5937, "step": 593100 }, { "epoch": 6.04, "learning_rate": 4.094790604323572e-05, "loss": 0.6263, "step": 593200 }, { "epoch": 6.04, "learning_rate": 4.0941729664043826e-05, "loss": 0.6391, "step": 593300 }, { "epoch": 6.05, "learning_rate": 4.093555274986666e-05, "loss": 0.5879, "step": 593400 }, { "epoch": 6.05, "learning_rate": 4.0929375301006215e-05, "loss": 0.4909, "step": 593500 }, { "epoch": 6.05, "learning_rate": 4.0923197317764546e-05, "loss": 0.5866, "step": 593600 }, { "epoch": 6.05, "learning_rate": 4.091701880044371e-05, "loss": 0.5637, "step": 593700 }, { "epoch": 6.05, "learning_rate": 4.0910839749345804e-05, "loss": 0.6098, "step": 593800 }, { "epoch": 6.05, "learning_rate": 4.090466016477295e-05, "loss": 0.5655, "step": 593900 }, { "epoch": 6.05, "learning_rate": 4.089848004702729e-05, "loss": 0.517, "step": 594000 }, { "epoch": 6.05, "learning_rate": 4.0892299396411e-05, "loss": 0.5466, "step": 594100 }, { "epoch": 6.05, "learning_rate": 4.0886118213226264e-05, "loss": 0.4581, "step": 594200 }, { "epoch": 6.05, "learning_rate": 4.08799364977753e-05, "loss": 0.5438, "step": 594300 }, { "epoch": 6.06, "learning_rate": 4.087375425036039e-05, "loss": 0.5562, "step": 594400 }, { "epoch": 6.06, "learning_rate": 4.086757147128379e-05, "loss": 0.6083, "step": 594500 }, { "epoch": 6.06, "learning_rate": 4.0861388160847784e-05, "loss": 0.5024, "step": 594600 }, { "epoch": 6.06, "learning_rate": 4.085520431935472e-05, "loss": 0.5311, "step": 594700 }, { "epoch": 6.06, "learning_rate": 4.084901994710694e-05, "loss": 0.518, "step": 594800 }, { "epoch": 6.06, "learning_rate": 4.084283504440684e-05, "loss": 0.502, "step": 594900 }, { "epoch": 6.06, "learning_rate": 4.083664961155681e-05, "loss": 0.562, "step": 595000 }, { "epoch": 6.06, "learning_rate": 4.0830463648859276e-05, "loss": 0.4815, "step": 595100 }, { "epoch": 6.06, "learning_rate": 4.08242771566167e-05, "loss": 0.6263, "step": 595200 }, { "epoch": 6.07, "learning_rate": 4.081809013513158e-05, "loss": 0.5333, "step": 595300 }, { "epoch": 6.07, "learning_rate": 4.08119025847064e-05, "loss": 0.542, "step": 595400 }, { "epoch": 6.07, "learning_rate": 4.080571450564372e-05, "loss": 0.5084, "step": 595500 }, { "epoch": 6.07, "learning_rate": 4.079952589824607e-05, "loss": 0.5497, "step": 595600 }, { "epoch": 6.07, "learning_rate": 4.079333676281606e-05, "loss": 0.4469, "step": 595700 }, { "epoch": 6.07, "learning_rate": 4.0787147099656296e-05, "loss": 0.5457, "step": 595800 }, { "epoch": 6.07, "learning_rate": 4.0780956909069404e-05, "loss": 0.559, "step": 595900 }, { "epoch": 6.07, "learning_rate": 4.0774766191358067e-05, "loss": 0.5114, "step": 596000 }, { "epoch": 6.07, "learning_rate": 4.076857494682495e-05, "loss": 0.5013, "step": 596100 }, { "epoch": 6.07, "learning_rate": 4.0762383175772796e-05, "loss": 0.5116, "step": 596200 }, { "epoch": 6.08, "learning_rate": 4.075619087850432e-05, "loss": 0.4689, "step": 596300 }, { "epoch": 6.08, "learning_rate": 4.074999805532231e-05, "loss": 0.5377, "step": 596400 }, { "epoch": 6.08, "learning_rate": 4.074380470652954e-05, "loss": 0.5886, "step": 596500 }, { "epoch": 6.08, "learning_rate": 4.073761083242884e-05, "loss": 0.4881, "step": 596600 }, { "epoch": 6.08, "learning_rate": 4.0731416433323046e-05, "loss": 0.586, "step": 596700 }, { "epoch": 6.08, "learning_rate": 4.072522150951503e-05, "loss": 0.5998, "step": 596800 }, { "epoch": 6.08, "learning_rate": 4.0719026061307695e-05, "loss": 0.5474, "step": 596900 }, { "epoch": 6.08, "learning_rate": 4.071283008900394e-05, "loss": 0.5887, "step": 597000 }, { "epoch": 6.08, "learning_rate": 4.070663359290674e-05, "loss": 0.5512, "step": 597100 }, { "epoch": 6.08, "learning_rate": 4.070043657331904e-05, "loss": 0.4717, "step": 597200 }, { "epoch": 6.09, "learning_rate": 4.0694239030543846e-05, "loss": 0.5564, "step": 597300 }, { "epoch": 6.09, "learning_rate": 4.068804096488419e-05, "loss": 0.5988, "step": 597400 }, { "epoch": 6.09, "learning_rate": 4.06818423766431e-05, "loss": 0.5198, "step": 597500 }, { "epoch": 6.09, "learning_rate": 4.067570525981316e-05, "loss": 0.5715, "step": 597600 }, { "epoch": 6.09, "learning_rate": 4.066950563253674e-05, "loss": 0.4577, "step": 597700 }, { "epoch": 6.09, "learning_rate": 4.0663305483585165e-05, "loss": 0.486, "step": 597800 }, { "epoch": 6.09, "learning_rate": 4.06571048132616e-05, "loss": 0.4831, "step": 597900 }, { "epoch": 6.09, "learning_rate": 4.065090362186919e-05, "loss": 0.4965, "step": 598000 }, { "epoch": 6.09, "learning_rate": 4.064470190971116e-05, "loss": 0.5462, "step": 598100 }, { "epoch": 6.09, "learning_rate": 4.063849967709073e-05, "loss": 0.4754, "step": 598200 }, { "epoch": 6.1, "learning_rate": 4.0632296924311154e-05, "loss": 0.5617, "step": 598300 }, { "epoch": 6.1, "learning_rate": 4.0626093651675716e-05, "loss": 0.5968, "step": 598400 }, { "epoch": 6.1, "learning_rate": 4.061988985948771e-05, "loss": 0.6003, "step": 598500 }, { "epoch": 6.1, "learning_rate": 4.061368554805046e-05, "loss": 0.5738, "step": 598600 }, { "epoch": 6.1, "learning_rate": 4.0607480717667325e-05, "loss": 0.6421, "step": 598700 }, { "epoch": 6.1, "learning_rate": 4.060127536864169e-05, "loss": 0.4819, "step": 598800 }, { "epoch": 6.1, "learning_rate": 4.0595069501276946e-05, "loss": 0.5702, "step": 598900 }, { "epoch": 6.1, "learning_rate": 4.0588863115876546e-05, "loss": 0.5597, "step": 599000 }, { "epoch": 6.1, "learning_rate": 4.058265621274392e-05, "loss": 0.4722, "step": 599100 }, { "epoch": 6.1, "learning_rate": 4.057644879218257e-05, "loss": 0.5098, "step": 599200 }, { "epoch": 6.11, "learning_rate": 4.057024085449597e-05, "loss": 0.5347, "step": 599300 }, { "epoch": 6.11, "learning_rate": 4.056403239998769e-05, "loss": 0.5183, "step": 599400 }, { "epoch": 6.11, "learning_rate": 4.055782342896126e-05, "loss": 0.5267, "step": 599500 }, { "epoch": 6.11, "learning_rate": 4.055161394172028e-05, "loss": 0.5413, "step": 599600 }, { "epoch": 6.11, "learning_rate": 4.054540393856833e-05, "loss": 0.4451, "step": 599700 }, { "epoch": 6.11, "learning_rate": 4.053919341980908e-05, "loss": 0.4851, "step": 599800 }, { "epoch": 6.11, "learning_rate": 4.0532982385746154e-05, "loss": 0.5158, "step": 599900 }, { "epoch": 6.11, "learning_rate": 4.0526770836683246e-05, "loss": 0.5769, "step": 600000 }, { "epoch": 6.11, "learning_rate": 4.052055877292407e-05, "loss": 0.5363, "step": 600100 }, { "epoch": 6.11, "learning_rate": 4.0514346194772345e-05, "loss": 0.5936, "step": 600200 }, { "epoch": 6.12, "learning_rate": 4.0508133102531844e-05, "loss": 0.594, "step": 600300 }, { "epoch": 6.12, "learning_rate": 4.0501919496506344e-05, "loss": 0.5522, "step": 600400 }, { "epoch": 6.12, "learning_rate": 4.049570537699965e-05, "loss": 0.5655, "step": 600500 }, { "epoch": 6.12, "learning_rate": 4.0489490744315605e-05, "loss": 0.519, "step": 600600 }, { "epoch": 6.12, "learning_rate": 4.048327559875805e-05, "loss": 0.5413, "step": 600700 }, { "epoch": 6.12, "learning_rate": 4.047705994063089e-05, "loss": 0.5333, "step": 600800 }, { "epoch": 6.12, "learning_rate": 4.047084377023802e-05, "loss": 0.4624, "step": 600900 }, { "epoch": 6.12, "learning_rate": 4.046462708788337e-05, "loss": 0.551, "step": 601000 }, { "epoch": 6.12, "learning_rate": 4.0458409893870905e-05, "loss": 0.5785, "step": 601100 }, { "epoch": 6.13, "learning_rate": 4.045219218850461e-05, "loss": 0.5354, "step": 601200 }, { "epoch": 6.13, "learning_rate": 4.04459739720885e-05, "loss": 0.5072, "step": 601300 }, { "epoch": 6.13, "learning_rate": 4.043975524492659e-05, "loss": 0.5157, "step": 601400 }, { "epoch": 6.13, "learning_rate": 4.043359820222468e-05, "loss": 0.5498, "step": 601500 }, { "epoch": 6.13, "learning_rate": 4.042737845958328e-05, "loss": 0.5086, "step": 601600 }, { "epoch": 6.13, "learning_rate": 4.042115820710529e-05, "loss": 0.4798, "step": 601700 }, { "epoch": 6.13, "learning_rate": 4.041493744509485e-05, "loss": 0.5029, "step": 601800 }, { "epoch": 6.13, "learning_rate": 4.040871617385612e-05, "loss": 0.5925, "step": 601900 }, { "epoch": 6.13, "learning_rate": 4.04024943936933e-05, "loss": 0.5896, "step": 602000 }, { "epoch": 6.13, "learning_rate": 4.0396272104910575e-05, "loss": 0.4564, "step": 602100 }, { "epoch": 6.14, "learning_rate": 4.0390049307812185e-05, "loss": 0.5879, "step": 602200 }, { "epoch": 6.14, "learning_rate": 4.038382600270239e-05, "loss": 0.6472, "step": 602300 }, { "epoch": 6.14, "learning_rate": 4.0377602189885464e-05, "loss": 0.5262, "step": 602400 }, { "epoch": 6.14, "learning_rate": 4.037137786966573e-05, "loss": 0.5659, "step": 602500 }, { "epoch": 6.14, "learning_rate": 4.03651530423475e-05, "loss": 0.5408, "step": 602600 }, { "epoch": 6.14, "learning_rate": 4.035892770823516e-05, "loss": 0.5425, "step": 602700 }, { "epoch": 6.14, "learning_rate": 4.0352701867633067e-05, "loss": 0.4688, "step": 602800 }, { "epoch": 6.14, "learning_rate": 4.034647552084563e-05, "loss": 0.5114, "step": 602900 }, { "epoch": 6.14, "learning_rate": 4.034024866817729e-05, "loss": 0.6103, "step": 603000 }, { "epoch": 6.14, "learning_rate": 4.033402130993249e-05, "loss": 0.493, "step": 603100 }, { "epoch": 6.15, "learning_rate": 4.032779344641572e-05, "loss": 0.5265, "step": 603200 }, { "epoch": 6.15, "learning_rate": 4.0321565077931484e-05, "loss": 0.5428, "step": 603300 }, { "epoch": 6.15, "learning_rate": 4.031533620478431e-05, "loss": 0.4963, "step": 603400 }, { "epoch": 6.15, "learning_rate": 4.030910682727876e-05, "loss": 0.5481, "step": 603500 }, { "epoch": 6.15, "learning_rate": 4.030287694571941e-05, "loss": 0.5186, "step": 603600 }, { "epoch": 6.15, "learning_rate": 4.029664656041085e-05, "loss": 0.5113, "step": 603700 }, { "epoch": 6.15, "learning_rate": 4.029041567165773e-05, "loss": 0.513, "step": 603800 }, { "epoch": 6.15, "learning_rate": 4.028418427976469e-05, "loss": 0.5194, "step": 603900 }, { "epoch": 6.15, "learning_rate": 4.027795238503641e-05, "loss": 0.5685, "step": 604000 }, { "epoch": 6.15, "learning_rate": 4.0271719987777597e-05, "loss": 0.5974, "step": 604100 }, { "epoch": 6.16, "learning_rate": 4.026548708829297e-05, "loss": 0.4914, "step": 604200 }, { "epoch": 6.16, "learning_rate": 4.0259253686887295e-05, "loss": 0.4673, "step": 604300 }, { "epoch": 6.16, "learning_rate": 4.025301978386533e-05, "loss": 0.6079, "step": 604400 }, { "epoch": 6.16, "learning_rate": 4.024678537953189e-05, "loss": 0.6289, "step": 604500 }, { "epoch": 6.16, "learning_rate": 4.02405504741918e-05, "loss": 0.5928, "step": 604600 }, { "epoch": 6.16, "learning_rate": 4.023431506814989e-05, "loss": 0.569, "step": 604700 }, { "epoch": 6.16, "learning_rate": 4.022807916171106e-05, "loss": 0.5276, "step": 604800 }, { "epoch": 6.16, "learning_rate": 4.022184275518019e-05, "loss": 0.5481, "step": 604900 }, { "epoch": 6.16, "learning_rate": 4.0215605848862215e-05, "loss": 0.6384, "step": 605000 }, { "epoch": 6.16, "learning_rate": 4.020936844306207e-05, "loss": 0.5264, "step": 605100 }, { "epoch": 6.17, "learning_rate": 4.0203130538084744e-05, "loss": 0.469, "step": 605200 }, { "epoch": 6.17, "learning_rate": 4.019689213423522e-05, "loss": 0.6008, "step": 605300 }, { "epoch": 6.17, "learning_rate": 4.019065323181852e-05, "loss": 0.5136, "step": 605400 }, { "epoch": 6.17, "learning_rate": 4.018441383113969e-05, "loss": 0.6229, "step": 605500 }, { "epoch": 6.17, "learning_rate": 4.01781739325038e-05, "loss": 0.5536, "step": 605600 }, { "epoch": 6.17, "learning_rate": 4.017193353621595e-05, "loss": 0.5351, "step": 605700 }, { "epoch": 6.17, "learning_rate": 4.016569264258125e-05, "loss": 0.4996, "step": 605800 }, { "epoch": 6.17, "learning_rate": 4.0159451251904854e-05, "loss": 0.468, "step": 605900 }, { "epoch": 6.17, "learning_rate": 4.015320936449191e-05, "loss": 0.5723, "step": 606000 }, { "epoch": 6.18, "learning_rate": 4.0146966980647625e-05, "loss": 0.5624, "step": 606100 }, { "epoch": 6.18, "learning_rate": 4.014072410067721e-05, "loss": 0.4742, "step": 606200 }, { "epoch": 6.18, "learning_rate": 4.0134480724885896e-05, "loss": 0.5734, "step": 606300 }, { "epoch": 6.18, "learning_rate": 4.012823685357895e-05, "loss": 0.5797, "step": 606400 }, { "epoch": 6.18, "learning_rate": 4.012199248706168e-05, "loss": 0.4791, "step": 606500 }, { "epoch": 6.18, "learning_rate": 4.0115747625639366e-05, "loss": 0.4535, "step": 606600 }, { "epoch": 6.18, "learning_rate": 4.0109502269617375e-05, "loss": 0.53, "step": 606700 }, { "epoch": 6.18, "learning_rate": 4.0103256419301054e-05, "loss": 0.5411, "step": 606800 }, { "epoch": 6.18, "learning_rate": 4.0097010074995775e-05, "loss": 0.5572, "step": 606900 }, { "epoch": 6.18, "learning_rate": 4.009082570782958e-05, "loss": 0.6621, "step": 607000 }, { "epoch": 6.19, "learning_rate": 4.0084578381394933e-05, "loss": 0.5928, "step": 607100 }, { "epoch": 6.19, "learning_rate": 4.007833056188459e-05, "loss": 0.5286, "step": 607200 }, { "epoch": 6.19, "learning_rate": 4.0072082249604024e-05, "loss": 0.5323, "step": 607300 }, { "epoch": 6.19, "learning_rate": 4.006583344485874e-05, "loss": 0.5379, "step": 607400 }, { "epoch": 6.19, "learning_rate": 4.005958414795428e-05, "loss": 0.4721, "step": 607500 }, { "epoch": 6.19, "learning_rate": 4.005333435919618e-05, "loss": 0.5848, "step": 607600 }, { "epoch": 6.19, "learning_rate": 4.004708407889002e-05, "loss": 0.505, "step": 607700 }, { "epoch": 6.19, "learning_rate": 4.0040833307341414e-05, "loss": 0.5671, "step": 607800 }, { "epoch": 6.19, "learning_rate": 4.003458204485598e-05, "loss": 0.5476, "step": 607900 }, { "epoch": 6.19, "learning_rate": 4.002833029173936e-05, "loss": 0.4678, "step": 608000 }, { "epoch": 6.2, "learning_rate": 4.002207804829724e-05, "loss": 0.5345, "step": 608100 }, { "epoch": 6.2, "learning_rate": 4.001582531483531e-05, "loss": 0.5251, "step": 608200 }, { "epoch": 6.2, "learning_rate": 4.00095720916593e-05, "loss": 0.506, "step": 608300 }, { "epoch": 6.2, "learning_rate": 4.0003318379074945e-05, "loss": 0.5311, "step": 608400 }, { "epoch": 6.2, "learning_rate": 3.999706417738801e-05, "loss": 0.5478, "step": 608500 }, { "epoch": 6.2, "learning_rate": 3.999080948690431e-05, "loss": 0.5107, "step": 608600 }, { "epoch": 6.2, "learning_rate": 3.998455430792964e-05, "loss": 0.5349, "step": 608700 }, { "epoch": 6.2, "learning_rate": 3.997829864076985e-05, "loss": 0.5177, "step": 608800 }, { "epoch": 6.2, "learning_rate": 3.99720424857308e-05, "loss": 0.6177, "step": 608900 }, { "epoch": 6.2, "learning_rate": 3.996578584311838e-05, "loss": 0.4951, "step": 609000 }, { "epoch": 6.21, "learning_rate": 3.9959528713238515e-05, "loss": 0.5466, "step": 609100 }, { "epoch": 6.21, "learning_rate": 3.995327109639713e-05, "loss": 0.567, "step": 609200 }, { "epoch": 6.21, "learning_rate": 3.994701299290018e-05, "loss": 0.6196, "step": 609300 }, { "epoch": 6.21, "learning_rate": 3.9940754403053665e-05, "loss": 0.5509, "step": 609400 }, { "epoch": 6.21, "learning_rate": 3.993449532716357e-05, "loss": 0.5109, "step": 609500 }, { "epoch": 6.21, "learning_rate": 3.992823576553595e-05, "loss": 0.4559, "step": 609600 }, { "epoch": 6.21, "learning_rate": 3.9921975718476846e-05, "loss": 0.5443, "step": 609700 }, { "epoch": 6.21, "learning_rate": 3.9915715186292336e-05, "loss": 0.5179, "step": 609800 }, { "epoch": 6.21, "learning_rate": 3.990945416928854e-05, "loss": 0.6347, "step": 609900 }, { "epoch": 6.21, "learning_rate": 3.990319266777157e-05, "loss": 0.558, "step": 610000 }, { "epoch": 6.22, "learning_rate": 3.989693068204757e-05, "loss": 0.5508, "step": 610100 }, { "epoch": 6.22, "learning_rate": 3.989066821242271e-05, "loss": 0.5514, "step": 610200 }, { "epoch": 6.22, "learning_rate": 3.988440525920322e-05, "loss": 0.5465, "step": 610300 }, { "epoch": 6.22, "learning_rate": 3.9878141822695284e-05, "loss": 0.5104, "step": 610400 }, { "epoch": 6.22, "learning_rate": 3.987187790320518e-05, "loss": 0.5531, "step": 610500 }, { "epoch": 6.22, "learning_rate": 3.986561350103915e-05, "loss": 0.591, "step": 610600 }, { "epoch": 6.22, "learning_rate": 3.9859348616503496e-05, "loss": 0.6012, "step": 610700 }, { "epoch": 6.22, "learning_rate": 3.985308324990452e-05, "loss": 0.5802, "step": 610800 }, { "epoch": 6.22, "learning_rate": 3.9846817401548585e-05, "loss": 0.5808, "step": 610900 }, { "epoch": 6.22, "learning_rate": 3.984055107174204e-05, "loss": 0.4851, "step": 611000 }, { "epoch": 6.23, "learning_rate": 3.9834284260791275e-05, "loss": 0.5649, "step": 611100 }, { "epoch": 6.23, "learning_rate": 3.982801696900269e-05, "loss": 0.5688, "step": 611200 }, { "epoch": 6.23, "learning_rate": 3.982181187678356e-05, "loss": 0.5012, "step": 611300 }, { "epoch": 6.23, "learning_rate": 3.981554362903941e-05, "loss": 0.5915, "step": 611400 }, { "epoch": 6.23, "learning_rate": 3.980927490137375e-05, "loss": 0.5049, "step": 611500 }, { "epoch": 6.23, "learning_rate": 3.980300569409309e-05, "loss": 0.4747, "step": 611600 }, { "epoch": 6.23, "learning_rate": 3.979673600750395e-05, "loss": 0.5424, "step": 611700 }, { "epoch": 6.23, "learning_rate": 3.979046584191289e-05, "loss": 0.4842, "step": 611800 }, { "epoch": 6.23, "learning_rate": 3.978419519762648e-05, "loss": 0.4898, "step": 611900 }, { "epoch": 6.24, "learning_rate": 3.97779240749513e-05, "loss": 0.5528, "step": 612000 }, { "epoch": 6.24, "learning_rate": 3.9771652474193993e-05, "loss": 0.5704, "step": 612100 }, { "epoch": 6.24, "learning_rate": 3.976538039566118e-05, "loss": 0.5993, "step": 612200 }, { "epoch": 6.24, "learning_rate": 3.975910783965956e-05, "loss": 0.5526, "step": 612300 }, { "epoch": 6.24, "learning_rate": 3.97528348064958e-05, "loss": 0.517, "step": 612400 }, { "epoch": 6.24, "learning_rate": 3.9746561296476626e-05, "loss": 0.5207, "step": 612500 }, { "epoch": 6.24, "learning_rate": 3.9740287309908756e-05, "loss": 0.5493, "step": 612600 }, { "epoch": 6.24, "learning_rate": 3.973401284709897e-05, "loss": 0.5485, "step": 612700 }, { "epoch": 6.24, "learning_rate": 3.9727737908354044e-05, "loss": 0.4344, "step": 612800 }, { "epoch": 6.24, "learning_rate": 3.97214624939808e-05, "loss": 0.514, "step": 612900 }, { "epoch": 6.25, "learning_rate": 3.9715186604286034e-05, "loss": 0.5442, "step": 613000 }, { "epoch": 6.25, "learning_rate": 3.970891023957663e-05, "loss": 0.5231, "step": 613100 }, { "epoch": 6.25, "learning_rate": 3.970263340015945e-05, "loss": 0.531, "step": 613200 }, { "epoch": 6.25, "learning_rate": 3.9696356086341404e-05, "loss": 0.4815, "step": 613300 }, { "epoch": 6.25, "learning_rate": 3.969007829842941e-05, "loss": 0.544, "step": 613400 }, { "epoch": 6.25, "learning_rate": 3.968380003673042e-05, "loss": 0.5307, "step": 613500 }, { "epoch": 6.25, "learning_rate": 3.9677584091245896e-05, "loss": 0.7407, "step": 613600 }, { "epoch": 6.25, "learning_rate": 3.9671304887624045e-05, "loss": 0.5101, "step": 613700 }, { "epoch": 6.25, "learning_rate": 3.966502521113309e-05, "loss": 0.5707, "step": 613800 }, { "epoch": 6.25, "learning_rate": 3.965874506208009e-05, "loss": 0.5631, "step": 613900 }, { "epoch": 6.26, "learning_rate": 3.965246444077209e-05, "loss": 0.4977, "step": 614000 }, { "epoch": 6.26, "learning_rate": 3.964618334751618e-05, "loss": 0.5533, "step": 614100 }, { "epoch": 6.26, "learning_rate": 3.9639964600602055e-05, "loss": 0.5658, "step": 614200 }, { "epoch": 6.26, "learning_rate": 3.9633682569083494e-05, "loss": 0.4703, "step": 614300 }, { "epoch": 6.26, "learning_rate": 3.9627400066535345e-05, "loss": 0.5128, "step": 614400 }, { "epoch": 6.26, "learning_rate": 3.962111709326479e-05, "loss": 0.5575, "step": 614500 }, { "epoch": 6.26, "learning_rate": 3.9614833649579026e-05, "loss": 0.5075, "step": 614600 }, { "epoch": 6.26, "learning_rate": 3.960854973578527e-05, "loss": 0.553, "step": 614700 }, { "epoch": 6.26, "learning_rate": 3.960226535219077e-05, "loss": 0.5185, "step": 614800 }, { "epoch": 6.26, "learning_rate": 3.9595980499102804e-05, "loss": 0.5467, "step": 614900 }, { "epoch": 6.27, "learning_rate": 3.958969517682866e-05, "loss": 0.5462, "step": 615000 }, { "epoch": 6.27, "learning_rate": 3.9583409385675645e-05, "loss": 0.5287, "step": 615100 }, { "epoch": 6.27, "learning_rate": 3.957712312595111e-05, "loss": 0.4678, "step": 615200 }, { "epoch": 6.27, "learning_rate": 3.957083639796239e-05, "loss": 0.5079, "step": 615300 }, { "epoch": 6.27, "learning_rate": 3.95645492020169e-05, "loss": 0.5142, "step": 615400 }, { "epoch": 6.27, "learning_rate": 3.9558261538422026e-05, "loss": 0.5787, "step": 615500 }, { "epoch": 6.27, "learning_rate": 3.955197340748521e-05, "loss": 0.5292, "step": 615600 }, { "epoch": 6.27, "learning_rate": 3.954568480951389e-05, "loss": 0.5801, "step": 615700 }, { "epoch": 6.27, "learning_rate": 3.9539395744815555e-05, "loss": 0.5145, "step": 615800 }, { "epoch": 6.27, "learning_rate": 3.95331062136977e-05, "loss": 0.5964, "step": 615900 }, { "epoch": 6.28, "learning_rate": 3.9526816216467834e-05, "loss": 0.5535, "step": 616000 }, { "epoch": 6.28, "learning_rate": 3.95205257534335e-05, "loss": 0.533, "step": 616100 }, { "epoch": 6.28, "learning_rate": 3.951423482490228e-05, "loss": 0.6197, "step": 616200 }, { "epoch": 6.28, "learning_rate": 3.950794343118176e-05, "loss": 0.5362, "step": 616300 }, { "epoch": 6.28, "learning_rate": 3.950165157257954e-05, "loss": 0.5345, "step": 616400 }, { "epoch": 6.28, "learning_rate": 3.9495359249403263e-05, "loss": 0.7021, "step": 616500 }, { "epoch": 6.28, "learning_rate": 3.94890664619606e-05, "loss": 0.5909, "step": 616600 }, { "epoch": 6.28, "learning_rate": 3.948277321055919e-05, "loss": 0.4945, "step": 616700 }, { "epoch": 6.28, "learning_rate": 3.947647949550677e-05, "loss": 0.6002, "step": 616800 }, { "epoch": 6.29, "learning_rate": 3.9470185317111056e-05, "loss": 0.5448, "step": 616900 }, { "epoch": 6.29, "learning_rate": 3.9463890675679794e-05, "loss": 0.5949, "step": 617000 }, { "epoch": 6.29, "learning_rate": 3.945765852485184e-05, "loss": 0.4957, "step": 617100 }, { "epoch": 6.29, "learning_rate": 3.94513629628955e-05, "loss": 0.4775, "step": 617200 }, { "epoch": 6.29, "learning_rate": 3.94450669388239e-05, "loss": 0.5508, "step": 617300 }, { "epoch": 6.29, "learning_rate": 3.94387704529449e-05, "loss": 0.5532, "step": 617400 }, { "epoch": 6.29, "learning_rate": 3.943247350556635e-05, "loss": 0.5209, "step": 617500 }, { "epoch": 6.29, "learning_rate": 3.942617609699615e-05, "loss": 0.5472, "step": 617600 }, { "epoch": 6.29, "learning_rate": 3.941987822754218e-05, "loss": 0.5319, "step": 617700 }, { "epoch": 6.29, "learning_rate": 3.941357989751237e-05, "loss": 0.5567, "step": 617800 }, { "epoch": 6.3, "learning_rate": 3.94072811072147e-05, "loss": 0.4866, "step": 617900 }, { "epoch": 6.3, "learning_rate": 3.940098185695711e-05, "loss": 0.5557, "step": 618000 }, { "epoch": 6.3, "learning_rate": 3.9394682147047605e-05, "loss": 0.5382, "step": 618100 }, { "epoch": 6.3, "learning_rate": 3.938838197779422e-05, "loss": 0.55, "step": 618200 }, { "epoch": 6.3, "learning_rate": 3.938208134950498e-05, "loss": 0.4971, "step": 618300 }, { "epoch": 6.3, "learning_rate": 3.937578026248796e-05, "loss": 0.4625, "step": 618400 }, { "epoch": 6.3, "learning_rate": 3.9369478717051225e-05, "loss": 0.5343, "step": 618500 }, { "epoch": 6.3, "learning_rate": 3.936317671350289e-05, "loss": 0.5644, "step": 618600 }, { "epoch": 6.3, "learning_rate": 3.9356874252151095e-05, "loss": 0.5344, "step": 618700 }, { "epoch": 6.3, "learning_rate": 3.935057133330399e-05, "loss": 0.5499, "step": 618800 }, { "epoch": 6.31, "learning_rate": 3.9344267957269744e-05, "loss": 0.5553, "step": 618900 }, { "epoch": 6.31, "learning_rate": 3.933796412435657e-05, "loss": 0.5984, "step": 619000 }, { "epoch": 6.31, "learning_rate": 3.9331659834872674e-05, "loss": 0.5414, "step": 619100 }, { "epoch": 6.31, "learning_rate": 3.932535508912629e-05, "loss": 0.5398, "step": 619200 }, { "epoch": 6.31, "learning_rate": 3.93190498874257e-05, "loss": 0.6267, "step": 619300 }, { "epoch": 6.31, "learning_rate": 3.931274423007918e-05, "loss": 0.5489, "step": 619400 }, { "epoch": 6.31, "learning_rate": 3.9306438117395044e-05, "loss": 0.5718, "step": 619500 }, { "epoch": 6.31, "learning_rate": 3.930013154968162e-05, "loss": 0.5296, "step": 619600 }, { "epoch": 6.31, "learning_rate": 3.929382452724727e-05, "loss": 0.6019, "step": 619700 }, { "epoch": 6.31, "learning_rate": 3.928751705040036e-05, "loss": 0.469, "step": 619800 }, { "epoch": 6.32, "learning_rate": 3.92812722010056e-05, "loss": 0.5457, "step": 619900 }, { "epoch": 6.32, "learning_rate": 3.927496382079522e-05, "loss": 0.4942, "step": 620000 }, { "epoch": 6.32, "learning_rate": 3.9268654987094466e-05, "loss": 0.5044, "step": 620100 }, { "epoch": 6.32, "learning_rate": 3.926234570021179e-05, "loss": 0.525, "step": 620200 }, { "epoch": 6.32, "learning_rate": 3.925603596045569e-05, "loss": 0.5621, "step": 620300 }, { "epoch": 6.32, "learning_rate": 3.924972576813467e-05, "loss": 0.4164, "step": 620400 }, { "epoch": 6.32, "learning_rate": 3.924341512355725e-05, "loss": 0.6039, "step": 620500 }, { "epoch": 6.32, "learning_rate": 3.9237104027031994e-05, "loss": 0.4845, "step": 620600 }, { "epoch": 6.32, "learning_rate": 3.923079247886748e-05, "loss": 0.6014, "step": 620700 }, { "epoch": 6.32, "learning_rate": 3.9224480479372295e-05, "loss": 0.508, "step": 620800 }, { "epoch": 6.33, "learning_rate": 3.921816802885507e-05, "loss": 0.4611, "step": 620900 }, { "epoch": 6.33, "learning_rate": 3.921185512762443e-05, "loss": 0.4722, "step": 621000 }, { "epoch": 6.33, "learning_rate": 3.9205541775989056e-05, "loss": 0.5601, "step": 621100 }, { "epoch": 6.33, "learning_rate": 3.919922797425762e-05, "loss": 0.4211, "step": 621200 }, { "epoch": 6.33, "learning_rate": 3.9192913722738836e-05, "loss": 0.5072, "step": 621300 }, { "epoch": 6.33, "learning_rate": 3.918666217097531e-05, "loss": 0.5572, "step": 621400 }, { "epoch": 6.33, "learning_rate": 3.918034702529821e-05, "loss": 0.5045, "step": 621500 }, { "epoch": 6.33, "learning_rate": 3.917403143075692e-05, "loss": 0.5601, "step": 621600 }, { "epoch": 6.33, "learning_rate": 3.916771538766025e-05, "loss": 0.4876, "step": 621700 }, { "epoch": 6.34, "learning_rate": 3.9161398896317e-05, "loss": 0.5432, "step": 621800 }, { "epoch": 6.34, "learning_rate": 3.915508195703601e-05, "loss": 0.5413, "step": 621900 }, { "epoch": 6.34, "learning_rate": 3.9148764570126156e-05, "loss": 0.5793, "step": 622000 }, { "epoch": 6.34, "learning_rate": 3.914244673589631e-05, "loss": 0.6149, "step": 622100 }, { "epoch": 6.34, "learning_rate": 3.913612845465538e-05, "loss": 0.4908, "step": 622200 }, { "epoch": 6.34, "learning_rate": 3.912980972671228e-05, "loss": 0.5388, "step": 622300 }, { "epoch": 6.34, "learning_rate": 3.912349055237598e-05, "loss": 0.5169, "step": 622400 }, { "epoch": 6.34, "learning_rate": 3.911717093195543e-05, "loss": 0.6151, "step": 622500 }, { "epoch": 6.34, "learning_rate": 3.9110850865759635e-05, "loss": 0.5821, "step": 622600 }, { "epoch": 6.34, "learning_rate": 3.910453035409761e-05, "loss": 0.5559, "step": 622700 }, { "epoch": 6.35, "learning_rate": 3.909820939727838e-05, "loss": 0.5057, "step": 622800 }, { "epoch": 6.35, "learning_rate": 3.9091887995611e-05, "loss": 0.5898, "step": 622900 }, { "epoch": 6.35, "learning_rate": 3.9085566149404555e-05, "loss": 0.532, "step": 623000 }, { "epoch": 6.35, "learning_rate": 3.9079243858968144e-05, "loss": 0.4956, "step": 623100 }, { "epoch": 6.35, "learning_rate": 3.907292112461089e-05, "loss": 0.5432, "step": 623200 }, { "epoch": 6.35, "learning_rate": 3.906659794664194e-05, "loss": 0.5047, "step": 623300 }, { "epoch": 6.35, "learning_rate": 3.906027432537045e-05, "loss": 0.5733, "step": 623400 }, { "epoch": 6.35, "learning_rate": 3.905395026110563e-05, "loss": 0.5153, "step": 623500 }, { "epoch": 6.35, "learning_rate": 3.9047625754156676e-05, "loss": 0.5662, "step": 623600 }, { "epoch": 6.35, "learning_rate": 3.90413008048328e-05, "loss": 0.5494, "step": 623700 }, { "epoch": 6.36, "learning_rate": 3.9034975413443266e-05, "loss": 0.5467, "step": 623800 }, { "epoch": 6.36, "learning_rate": 3.902864958029737e-05, "loss": 0.4614, "step": 623900 }, { "epoch": 6.36, "learning_rate": 3.902232330570437e-05, "loss": 0.4705, "step": 624000 }, { "epoch": 6.36, "learning_rate": 3.901599658997362e-05, "loss": 0.6193, "step": 624100 }, { "epoch": 6.36, "learning_rate": 3.9009669433414435e-05, "loss": 0.5019, "step": 624200 }, { "epoch": 6.36, "learning_rate": 3.900334183633619e-05, "loss": 0.5182, "step": 624300 }, { "epoch": 6.36, "learning_rate": 3.899701379904825e-05, "loss": 0.5207, "step": 624400 }, { "epoch": 6.36, "learning_rate": 3.8990685321860016e-05, "loss": 0.502, "step": 624500 }, { "epoch": 6.36, "learning_rate": 3.8984356405080936e-05, "loss": 0.4636, "step": 624600 }, { "epoch": 6.36, "learning_rate": 3.8978027049020445e-05, "loss": 0.526, "step": 624700 }, { "epoch": 6.37, "learning_rate": 3.8971697253988e-05, "loss": 0.493, "step": 624800 }, { "epoch": 6.37, "learning_rate": 3.8965367020293115e-05, "loss": 0.5652, "step": 624900 }, { "epoch": 6.37, "learning_rate": 3.895903634824527e-05, "loss": 0.5161, "step": 625000 }, { "epoch": 6.37, "learning_rate": 3.8952705238154024e-05, "loss": 0.4711, "step": 625100 }, { "epoch": 6.37, "learning_rate": 3.8946373690328914e-05, "loss": 0.5535, "step": 625200 }, { "epoch": 6.37, "learning_rate": 3.894004170507951e-05, "loss": 0.5852, "step": 625300 }, { "epoch": 6.37, "learning_rate": 3.8933709282715435e-05, "loss": 0.5104, "step": 625400 }, { "epoch": 6.37, "learning_rate": 3.8927376423546286e-05, "loss": 0.568, "step": 625500 }, { "epoch": 6.37, "learning_rate": 3.89210431278817e-05, "loss": 0.5385, "step": 625600 }, { "epoch": 6.37, "learning_rate": 3.8914772735507947e-05, "loss": 0.5203, "step": 625700 }, { "epoch": 6.38, "learning_rate": 3.8908438572138723e-05, "loss": 0.4415, "step": 625800 }, { "epoch": 6.38, "learning_rate": 3.890210397320003e-05, "loss": 0.5888, "step": 625900 }, { "epoch": 6.38, "learning_rate": 3.889576893900157e-05, "loss": 0.5675, "step": 626000 }, { "epoch": 6.38, "learning_rate": 3.888943346985309e-05, "loss": 0.6172, "step": 626100 }, { "epoch": 6.38, "learning_rate": 3.888309756606438e-05, "loss": 0.5383, "step": 626200 }, { "epoch": 6.38, "learning_rate": 3.887676122794519e-05, "loss": 0.548, "step": 626300 }, { "epoch": 6.38, "learning_rate": 3.887042445580536e-05, "loss": 0.5542, "step": 626400 }, { "epoch": 6.38, "learning_rate": 3.886408724995471e-05, "loss": 0.6445, "step": 626500 }, { "epoch": 6.38, "learning_rate": 3.885774961070309e-05, "loss": 0.5071, "step": 626600 }, { "epoch": 6.38, "learning_rate": 3.885141153836038e-05, "loss": 0.5566, "step": 626700 }, { "epoch": 6.39, "learning_rate": 3.8845073033236466e-05, "loss": 0.499, "step": 626800 }, { "epoch": 6.39, "learning_rate": 3.883873409564126e-05, "loss": 0.4963, "step": 626900 }, { "epoch": 6.39, "learning_rate": 3.8832394725884706e-05, "loss": 0.5456, "step": 627000 }, { "epoch": 6.39, "learning_rate": 3.882605492427676e-05, "loss": 0.5518, "step": 627100 }, { "epoch": 6.39, "learning_rate": 3.8819714691127394e-05, "loss": 0.6104, "step": 627200 }, { "epoch": 6.39, "learning_rate": 3.881337402674662e-05, "loss": 0.6112, "step": 627300 }, { "epoch": 6.39, "learning_rate": 3.8807032931444446e-05, "loss": 0.4784, "step": 627400 }, { "epoch": 6.39, "learning_rate": 3.8800691405530926e-05, "loss": 0.5748, "step": 627500 }, { "epoch": 6.39, "learning_rate": 3.879434944931612e-05, "loss": 0.4576, "step": 627600 }, { "epoch": 6.4, "learning_rate": 3.878800706311009e-05, "loss": 0.5395, "step": 627700 }, { "epoch": 6.4, "learning_rate": 3.8781664247222974e-05, "loss": 0.5237, "step": 627800 }, { "epoch": 6.4, "learning_rate": 3.877532100196488e-05, "loss": 0.473, "step": 627900 }, { "epoch": 6.4, "learning_rate": 3.8768977327645956e-05, "loss": 0.4975, "step": 628000 }, { "epoch": 6.4, "learning_rate": 3.876263322457637e-05, "loss": 0.558, "step": 628100 }, { "epoch": 6.4, "learning_rate": 3.8756288693066306e-05, "loss": 0.5763, "step": 628200 }, { "epoch": 6.4, "learning_rate": 3.8749943733425985e-05, "loss": 0.5168, "step": 628300 }, { "epoch": 6.4, "learning_rate": 3.8743598345965634e-05, "loss": 0.5838, "step": 628400 }, { "epoch": 6.4, "learning_rate": 3.8737252530995496e-05, "loss": 0.4094, "step": 628500 }, { "epoch": 6.4, "learning_rate": 3.8730906288825863e-05, "loss": 0.5462, "step": 628600 }, { "epoch": 6.41, "learning_rate": 3.8724559619767014e-05, "loss": 0.4997, "step": 628700 }, { "epoch": 6.41, "learning_rate": 3.8718212524129254e-05, "loss": 0.5377, "step": 628800 }, { "epoch": 6.41, "learning_rate": 3.8711865002222935e-05, "loss": 0.422, "step": 628900 }, { "epoch": 6.41, "learning_rate": 3.8705517054358404e-05, "loss": 0.5571, "step": 629000 }, { "epoch": 6.41, "learning_rate": 3.869916868084605e-05, "loss": 0.5243, "step": 629100 }, { "epoch": 6.41, "learning_rate": 3.869281988199626e-05, "loss": 0.5476, "step": 629200 }, { "epoch": 6.41, "learning_rate": 3.868647065811944e-05, "loss": 0.5078, "step": 629300 }, { "epoch": 6.41, "learning_rate": 3.868012100952606e-05, "loss": 0.511, "step": 629400 }, { "epoch": 6.41, "learning_rate": 3.867377093652655e-05, "loss": 0.4887, "step": 629500 }, { "epoch": 6.41, "learning_rate": 3.8667420439431406e-05, "loss": 0.4617, "step": 629600 }, { "epoch": 6.42, "learning_rate": 3.8661069518551134e-05, "loss": 0.4925, "step": 629700 }, { "epoch": 6.42, "learning_rate": 3.865471817419625e-05, "loss": 0.4522, "step": 629800 }, { "epoch": 6.42, "learning_rate": 3.864842992644612e-05, "loss": 0.5142, "step": 629900 }, { "epoch": 6.42, "learning_rate": 3.864207774030066e-05, "loss": 0.5012, "step": 630000 }, { "epoch": 6.42, "learning_rate": 3.863572513160916e-05, "loss": 0.5527, "step": 630100 }, { "epoch": 6.42, "learning_rate": 3.8629372100682236e-05, "loss": 0.4647, "step": 630200 }, { "epoch": 6.42, "learning_rate": 3.8623018647830526e-05, "loss": 0.5333, "step": 630300 }, { "epoch": 6.42, "learning_rate": 3.861666477336465e-05, "loss": 0.4803, "step": 630400 }, { "epoch": 6.42, "learning_rate": 3.86103104775953e-05, "loss": 0.5362, "step": 630500 }, { "epoch": 6.42, "learning_rate": 3.860395576083314e-05, "loss": 0.5201, "step": 630600 }, { "epoch": 6.43, "learning_rate": 3.85976006233889e-05, "loss": 0.4721, "step": 630700 }, { "epoch": 6.43, "learning_rate": 3.859124506557328e-05, "loss": 0.5554, "step": 630800 }, { "epoch": 6.43, "learning_rate": 3.8584889087697055e-05, "loss": 0.4375, "step": 630900 }, { "epoch": 6.43, "learning_rate": 3.8578532690071e-05, "loss": 0.6021, "step": 631000 }, { "epoch": 6.43, "learning_rate": 3.857217587300587e-05, "loss": 0.6005, "step": 631100 }, { "epoch": 6.43, "learning_rate": 3.856581863681251e-05, "loss": 0.5931, "step": 631200 }, { "epoch": 6.43, "learning_rate": 3.855946098180173e-05, "loss": 0.504, "step": 631300 }, { "epoch": 6.43, "learning_rate": 3.855310290828439e-05, "loss": 0.5346, "step": 631400 }, { "epoch": 6.43, "learning_rate": 3.854680800355754e-05, "loss": 0.5164, "step": 631500 }, { "epoch": 6.43, "learning_rate": 3.8540449098137016e-05, "loss": 0.4551, "step": 631600 }, { "epoch": 6.44, "learning_rate": 3.8534089775139505e-05, "loss": 0.5681, "step": 631700 }, { "epoch": 6.44, "learning_rate": 3.852773003487593e-05, "loss": 0.5253, "step": 631800 }, { "epoch": 6.44, "learning_rate": 3.852136987765725e-05, "loss": 0.4507, "step": 631900 }, { "epoch": 6.44, "learning_rate": 3.851500930379443e-05, "loss": 0.5054, "step": 632000 }, { "epoch": 6.44, "learning_rate": 3.8508648313598476e-05, "loss": 0.518, "step": 632100 }, { "epoch": 6.44, "learning_rate": 3.8502286907380386e-05, "loss": 0.5072, "step": 632200 }, { "epoch": 6.44, "learning_rate": 3.849592508545121e-05, "loss": 0.4719, "step": 632300 }, { "epoch": 6.44, "learning_rate": 3.848956284812201e-05, "loss": 0.5683, "step": 632400 }, { "epoch": 6.44, "learning_rate": 3.848320019570384e-05, "loss": 0.5434, "step": 632500 }, { "epoch": 6.45, "learning_rate": 3.8476837128507805e-05, "loss": 0.5273, "step": 632600 }, { "epoch": 6.45, "learning_rate": 3.8470473646845035e-05, "loss": 0.5009, "step": 632700 }, { "epoch": 6.45, "learning_rate": 3.8464109751026636e-05, "loss": 0.4959, "step": 632800 }, { "epoch": 6.45, "learning_rate": 3.8457745441363775e-05, "loss": 0.5014, "step": 632900 }, { "epoch": 6.45, "learning_rate": 3.8451380718167646e-05, "loss": 0.4881, "step": 633000 }, { "epoch": 6.45, "learning_rate": 3.844501558174944e-05, "loss": 0.4613, "step": 633100 }, { "epoch": 6.45, "learning_rate": 3.843865003242037e-05, "loss": 0.5936, "step": 633200 }, { "epoch": 6.45, "learning_rate": 3.8432284070491665e-05, "loss": 0.4976, "step": 633300 }, { "epoch": 6.45, "learning_rate": 3.8425917696274595e-05, "loss": 0.5197, "step": 633400 }, { "epoch": 6.45, "learning_rate": 3.8419550910080426e-05, "loss": 0.4852, "step": 633500 }, { "epoch": 6.46, "learning_rate": 3.8413183712220464e-05, "loss": 0.4654, "step": 633600 }, { "epoch": 6.46, "learning_rate": 3.8406816103006034e-05, "loss": 0.542, "step": 633700 }, { "epoch": 6.46, "learning_rate": 3.840044808274846e-05, "loss": 0.4455, "step": 633800 }, { "epoch": 6.46, "learning_rate": 3.839407965175911e-05, "loss": 0.5642, "step": 633900 }, { "epoch": 6.46, "learning_rate": 3.838771081034936e-05, "loss": 0.536, "step": 634000 }, { "epoch": 6.46, "learning_rate": 3.838134155883059e-05, "loss": 0.4027, "step": 634100 }, { "epoch": 6.46, "learning_rate": 3.837497189751425e-05, "loss": 0.4923, "step": 634200 }, { "epoch": 6.46, "learning_rate": 3.8368601826711766e-05, "loss": 0.5543, "step": 634300 }, { "epoch": 6.46, "learning_rate": 3.8362231346734585e-05, "loss": 0.5271, "step": 634400 }, { "epoch": 6.46, "learning_rate": 3.8355860457894194e-05, "loss": 0.4994, "step": 634500 }, { "epoch": 6.47, "learning_rate": 3.83494891605021e-05, "loss": 0.585, "step": 634600 }, { "epoch": 6.47, "learning_rate": 3.8343117454869814e-05, "loss": 0.4882, "step": 634700 }, { "epoch": 6.47, "learning_rate": 3.833674534130886e-05, "loss": 0.4894, "step": 634800 }, { "epoch": 6.47, "learning_rate": 3.833037282013081e-05, "loss": 0.4367, "step": 634900 }, { "epoch": 6.47, "learning_rate": 3.8323999891647255e-05, "loss": 0.4038, "step": 635000 }, { "epoch": 6.47, "learning_rate": 3.831762655616979e-05, "loss": 0.4617, "step": 635100 }, { "epoch": 6.47, "learning_rate": 3.831125281401e-05, "loss": 0.4597, "step": 635200 }, { "epoch": 6.47, "learning_rate": 3.830487866547955e-05, "loss": 0.56, "step": 635300 }, { "epoch": 6.47, "learning_rate": 3.829850411089011e-05, "loss": 0.5664, "step": 635400 }, { "epoch": 6.47, "learning_rate": 3.829212915055332e-05, "loss": 0.5628, "step": 635500 }, { "epoch": 6.48, "learning_rate": 3.8285753784780915e-05, "loss": 0.5534, "step": 635600 }, { "epoch": 6.48, "learning_rate": 3.827937801388459e-05, "loss": 0.4291, "step": 635700 }, { "epoch": 6.48, "learning_rate": 3.827300183817609e-05, "loss": 0.5154, "step": 635800 }, { "epoch": 6.48, "learning_rate": 3.8266625257967176e-05, "loss": 0.5357, "step": 635900 }, { "epoch": 6.48, "learning_rate": 3.82603120454133e-05, "loss": 0.5425, "step": 636000 }, { "epoch": 6.48, "learning_rate": 3.8253934661176113e-05, "loss": 0.5378, "step": 636100 }, { "epoch": 6.48, "learning_rate": 3.824755687337078e-05, "loss": 0.6535, "step": 636200 }, { "epoch": 6.48, "learning_rate": 3.8241178682309144e-05, "loss": 0.6025, "step": 636300 }, { "epoch": 6.48, "learning_rate": 3.823480008830305e-05, "loss": 0.5056, "step": 636400 }, { "epoch": 6.48, "learning_rate": 3.8228421091664366e-05, "loss": 0.5638, "step": 636500 }, { "epoch": 6.49, "learning_rate": 3.8222041692705e-05, "loss": 0.5694, "step": 636600 }, { "epoch": 6.49, "learning_rate": 3.821566189173686e-05, "loss": 0.536, "step": 636700 }, { "epoch": 6.49, "learning_rate": 3.8209281689071875e-05, "loss": 0.5972, "step": 636800 }, { "epoch": 6.49, "learning_rate": 3.820290108502202e-05, "loss": 0.5433, "step": 636900 }, { "epoch": 6.49, "learning_rate": 3.819652007989924e-05, "loss": 0.4479, "step": 637000 }, { "epoch": 6.49, "learning_rate": 3.819013867401556e-05, "loss": 0.5722, "step": 637100 }, { "epoch": 6.49, "learning_rate": 3.8183756867682965e-05, "loss": 0.5606, "step": 637200 }, { "epoch": 6.49, "learning_rate": 3.81773746612135e-05, "loss": 0.5321, "step": 637300 }, { "epoch": 6.49, "learning_rate": 3.8170992054919206e-05, "loss": 0.5998, "step": 637400 }, { "epoch": 6.49, "learning_rate": 3.816460904911217e-05, "loss": 0.5378, "step": 637500 }, { "epoch": 6.5, "learning_rate": 3.815822564410448e-05, "loss": 0.5933, "step": 637600 }, { "epoch": 6.5, "learning_rate": 3.8151841840208235e-05, "loss": 0.6085, "step": 637700 }, { "epoch": 6.5, "learning_rate": 3.8145457637735577e-05, "loss": 0.5341, "step": 637800 }, { "epoch": 6.5, "learning_rate": 3.813913688497639e-05, "loss": 0.5314, "step": 637900 }, { "epoch": 6.5, "learning_rate": 3.813275189026533e-05, "loss": 0.5003, "step": 638000 }, { "epoch": 6.5, "learning_rate": 3.812636649791124e-05, "loss": 0.4893, "step": 638100 }, { "epoch": 6.5, "learning_rate": 3.811998070822633e-05, "loss": 0.5263, "step": 638200 }, { "epoch": 6.5, "learning_rate": 3.811359452152281e-05, "loss": 0.6136, "step": 638300 }, { "epoch": 6.5, "learning_rate": 3.810720793811294e-05, "loss": 0.4571, "step": 638400 }, { "epoch": 6.51, "learning_rate": 3.810082095830899e-05, "loss": 0.4359, "step": 638500 }, { "epoch": 6.51, "learning_rate": 3.809443358242323e-05, "loss": 0.5481, "step": 638600 }, { "epoch": 6.51, "learning_rate": 3.808804581076797e-05, "loss": 0.4375, "step": 638700 }, { "epoch": 6.51, "learning_rate": 3.8081657643655546e-05, "loss": 0.5736, "step": 638800 }, { "epoch": 6.51, "learning_rate": 3.8075269081398294e-05, "loss": 0.4922, "step": 638900 }, { "epoch": 6.51, "learning_rate": 3.806888012430857e-05, "loss": 0.4358, "step": 639000 }, { "epoch": 6.51, "learning_rate": 3.806249077269877e-05, "loss": 0.5477, "step": 639100 }, { "epoch": 6.51, "learning_rate": 3.805610102688127e-05, "loss": 0.5541, "step": 639200 }, { "epoch": 6.51, "learning_rate": 3.80497108871685e-05, "loss": 0.5283, "step": 639300 }, { "epoch": 6.51, "learning_rate": 3.804332035387293e-05, "loss": 0.5051, "step": 639400 }, { "epoch": 6.52, "learning_rate": 3.803692942730698e-05, "loss": 0.568, "step": 639500 }, { "epoch": 6.52, "learning_rate": 3.803053810778314e-05, "loss": 0.547, "step": 639600 }, { "epoch": 6.52, "learning_rate": 3.802414639561392e-05, "loss": 0.5457, "step": 639700 }, { "epoch": 6.52, "learning_rate": 3.801775429111182e-05, "loss": 0.5237, "step": 639800 }, { "epoch": 6.52, "learning_rate": 3.8011361794589383e-05, "loss": 0.5376, "step": 639900 }, { "epoch": 6.52, "learning_rate": 3.800496890635916e-05, "loss": 0.5043, "step": 640000 }, { "epoch": 6.52, "learning_rate": 3.799857562673374e-05, "loss": 0.5146, "step": 640100 }, { "epoch": 6.52, "learning_rate": 3.7992245894667615e-05, "loss": 0.4985, "step": 640200 }, { "epoch": 6.52, "learning_rate": 3.7985851837095724e-05, "loss": 0.5517, "step": 640300 }, { "epoch": 6.52, "learning_rate": 3.797945738906334e-05, "loss": 0.5854, "step": 640400 }, { "epoch": 6.53, "learning_rate": 3.79730625508831e-05, "loss": 0.4775, "step": 640500 }, { "epoch": 6.53, "learning_rate": 3.796666732286768e-05, "loss": 0.6324, "step": 640600 }, { "epoch": 6.53, "learning_rate": 3.796027170532978e-05, "loss": 0.5578, "step": 640700 }, { "epoch": 6.53, "learning_rate": 3.795393966057513e-05, "loss": 0.4782, "step": 640800 }, { "epoch": 6.53, "learning_rate": 3.794754326881782e-05, "loss": 0.4901, "step": 640900 }, { "epoch": 6.53, "learning_rate": 3.794114648847306e-05, "loss": 0.4893, "step": 641000 }, { "epoch": 6.53, "learning_rate": 3.793474931985363e-05, "loss": 0.5765, "step": 641100 }, { "epoch": 6.53, "learning_rate": 3.792835176327231e-05, "loss": 0.5445, "step": 641200 }, { "epoch": 6.53, "learning_rate": 3.7921953819041895e-05, "loss": 0.5603, "step": 641300 }, { "epoch": 6.53, "learning_rate": 3.791555548747523e-05, "loss": 0.5566, "step": 641400 }, { "epoch": 6.54, "learning_rate": 3.790915676888514e-05, "loss": 0.5261, "step": 641500 }, { "epoch": 6.54, "learning_rate": 3.790275766358447e-05, "loss": 0.4711, "step": 641600 }, { "epoch": 6.54, "learning_rate": 3.789635817188612e-05, "loss": 0.4729, "step": 641700 }, { "epoch": 6.54, "learning_rate": 3.788995829410297e-05, "loss": 0.503, "step": 641800 }, { "epoch": 6.54, "learning_rate": 3.788355803054795e-05, "loss": 0.5381, "step": 641900 }, { "epoch": 6.54, "learning_rate": 3.787715738153399e-05, "loss": 0.5422, "step": 642000 }, { "epoch": 6.54, "learning_rate": 3.7870756347374036e-05, "loss": 0.5047, "step": 642100 }, { "epoch": 6.54, "learning_rate": 3.786435492838108e-05, "loss": 0.56, "step": 642200 }, { "epoch": 6.54, "learning_rate": 3.785795312486809e-05, "loss": 0.4611, "step": 642300 }, { "epoch": 6.54, "learning_rate": 3.785155093714808e-05, "loss": 0.5796, "step": 642400 }, { "epoch": 6.55, "learning_rate": 3.784514836553409e-05, "loss": 0.5225, "step": 642500 }, { "epoch": 6.55, "learning_rate": 3.783874541033917e-05, "loss": 0.46, "step": 642600 }, { "epoch": 6.55, "learning_rate": 3.783234207187638e-05, "loss": 0.6001, "step": 642700 }, { "epoch": 6.55, "learning_rate": 3.78259383504588e-05, "loss": 0.5549, "step": 642800 }, { "epoch": 6.55, "learning_rate": 3.781953424639953e-05, "loss": 0.5002, "step": 642900 }, { "epoch": 6.55, "learning_rate": 3.781312976001171e-05, "loss": 0.4642, "step": 643000 }, { "epoch": 6.55, "learning_rate": 3.780672489160846e-05, "loss": 0.5741, "step": 643100 }, { "epoch": 6.55, "learning_rate": 3.7800319641502954e-05, "loss": 0.4785, "step": 643200 }, { "epoch": 6.55, "learning_rate": 3.7793914010008374e-05, "loss": 0.5692, "step": 643300 }, { "epoch": 6.56, "learning_rate": 3.778750799743791e-05, "loss": 0.5426, "step": 643400 }, { "epoch": 6.56, "learning_rate": 3.778110160410477e-05, "loss": 0.5421, "step": 643500 }, { "epoch": 6.56, "learning_rate": 3.77746948303222e-05, "loss": 0.4852, "step": 643600 }, { "epoch": 6.56, "learning_rate": 3.776835174982329e-05, "loss": 0.4544, "step": 643700 }, { "epoch": 6.56, "learning_rate": 3.77619442198783e-05, "loss": 0.499, "step": 643800 }, { "epoch": 6.56, "learning_rate": 3.775553631042057e-05, "loss": 0.4793, "step": 643900 }, { "epoch": 6.56, "learning_rate": 3.77491280217634e-05, "loss": 0.5337, "step": 644000 }, { "epoch": 6.56, "learning_rate": 3.774271935422011e-05, "loss": 0.5972, "step": 644100 }, { "epoch": 6.56, "learning_rate": 3.7736310308104054e-05, "loss": 0.4977, "step": 644200 }, { "epoch": 6.56, "learning_rate": 3.772990088372859e-05, "loss": 0.6456, "step": 644300 }, { "epoch": 6.57, "learning_rate": 3.77234910814071e-05, "loss": 0.5815, "step": 644400 }, { "epoch": 6.57, "learning_rate": 3.771708090145298e-05, "loss": 0.5494, "step": 644500 }, { "epoch": 6.57, "learning_rate": 3.771067034417967e-05, "loss": 0.5426, "step": 644600 }, { "epoch": 6.57, "learning_rate": 3.7704259409900594e-05, "loss": 0.6046, "step": 644700 }, { "epoch": 6.57, "learning_rate": 3.76978480989292e-05, "loss": 0.4872, "step": 644800 }, { "epoch": 6.57, "learning_rate": 3.769143641157898e-05, "loss": 0.4802, "step": 644900 }, { "epoch": 6.57, "learning_rate": 3.768502434816342e-05, "loss": 0.5041, "step": 645000 }, { "epoch": 6.57, "learning_rate": 3.767861190899603e-05, "loss": 0.5784, "step": 645100 }, { "epoch": 6.57, "learning_rate": 3.767219909439034e-05, "loss": 0.5663, "step": 645200 }, { "epoch": 6.57, "learning_rate": 3.766578590465989e-05, "loss": 0.5911, "step": 645300 }, { "epoch": 6.58, "learning_rate": 3.765937234011827e-05, "loss": 0.5324, "step": 645400 }, { "epoch": 6.58, "learning_rate": 3.765295840107906e-05, "loss": 0.4894, "step": 645500 }, { "epoch": 6.58, "learning_rate": 3.7646544087855836e-05, "loss": 0.4647, "step": 645600 }, { "epoch": 6.58, "learning_rate": 3.764012940076224e-05, "loss": 0.5204, "step": 645700 }, { "epoch": 6.58, "learning_rate": 3.763371434011191e-05, "loss": 0.5444, "step": 645800 }, { "epoch": 6.58, "learning_rate": 3.762729890621851e-05, "loss": 0.5093, "step": 645900 }, { "epoch": 6.58, "learning_rate": 3.7620883099395716e-05, "loss": 0.4852, "step": 646000 }, { "epoch": 6.58, "learning_rate": 3.761446691995721e-05, "loss": 0.5114, "step": 646100 }, { "epoch": 6.58, "learning_rate": 3.7608050368216715e-05, "loss": 0.458, "step": 646200 }, { "epoch": 6.58, "learning_rate": 3.760163344448795e-05, "loss": 0.4893, "step": 646300 }, { "epoch": 6.59, "learning_rate": 3.759521614908468e-05, "loss": 0.5187, "step": 646400 }, { "epoch": 6.59, "learning_rate": 3.7588798482320675e-05, "loss": 0.5397, "step": 646500 }, { "epoch": 6.59, "learning_rate": 3.7582380444509704e-05, "loss": 0.4794, "step": 646600 }, { "epoch": 6.59, "learning_rate": 3.757596203596558e-05, "loss": 0.6186, "step": 646700 }, { "epoch": 6.59, "learning_rate": 3.756954325700213e-05, "loss": 0.4716, "step": 646800 }, { "epoch": 6.59, "learning_rate": 3.756312410793317e-05, "loss": 0.5347, "step": 646900 }, { "epoch": 6.59, "learning_rate": 3.755670458907259e-05, "loss": 0.5501, "step": 647000 }, { "epoch": 6.59, "learning_rate": 3.7550284700734245e-05, "loss": 0.5767, "step": 647100 }, { "epoch": 6.59, "learning_rate": 3.754386444323203e-05, "loss": 0.4323, "step": 647200 }, { "epoch": 6.59, "learning_rate": 3.753744381687988e-05, "loss": 0.5423, "step": 647300 }, { "epoch": 6.6, "learning_rate": 3.7531022821991696e-05, "loss": 0.5501, "step": 647400 }, { "epoch": 6.6, "learning_rate": 3.752460145888145e-05, "loss": 0.5642, "step": 647500 }, { "epoch": 6.6, "learning_rate": 3.751817972786309e-05, "loss": 0.5593, "step": 647600 }, { "epoch": 6.6, "learning_rate": 3.7511757629250596e-05, "loss": 0.5147, "step": 647700 }, { "epoch": 6.6, "learning_rate": 3.750533516335799e-05, "loss": 0.5679, "step": 647800 }, { "epoch": 6.6, "learning_rate": 3.7498912330499294e-05, "loss": 0.5182, "step": 647900 }, { "epoch": 6.6, "learning_rate": 3.7492489130988526e-05, "loss": 0.4729, "step": 648000 }, { "epoch": 6.6, "learning_rate": 3.748606556513976e-05, "loss": 0.4973, "step": 648100 }, { "epoch": 6.6, "learning_rate": 3.747964163326706e-05, "loss": 0.5247, "step": 648200 }, { "epoch": 6.6, "learning_rate": 3.7473217335684516e-05, "loss": 0.6206, "step": 648300 }, { "epoch": 6.61, "learning_rate": 3.74668569211437e-05, "loss": 0.4371, "step": 648400 }, { "epoch": 6.61, "learning_rate": 3.746043189673309e-05, "loss": 0.531, "step": 648500 }, { "epoch": 6.61, "learning_rate": 3.745400650755187e-05, "loss": 0.5494, "step": 648600 }, { "epoch": 6.61, "learning_rate": 3.744758075391424e-05, "loss": 0.5041, "step": 648700 }, { "epoch": 6.61, "learning_rate": 3.744115463613433e-05, "loss": 0.5318, "step": 648800 }, { "epoch": 6.61, "learning_rate": 3.7434728154526375e-05, "loss": 0.5787, "step": 648900 }, { "epoch": 6.61, "learning_rate": 3.742830130940457e-05, "loss": 0.5133, "step": 649000 }, { "epoch": 6.61, "learning_rate": 3.742187410108318e-05, "loss": 0.4797, "step": 649100 }, { "epoch": 6.61, "learning_rate": 3.741544652987643e-05, "loss": 0.5597, "step": 649200 }, { "epoch": 6.62, "learning_rate": 3.74090185960986e-05, "loss": 0.5025, "step": 649300 }, { "epoch": 6.62, "learning_rate": 3.7402590300063976e-05, "loss": 0.5317, "step": 649400 }, { "epoch": 6.62, "learning_rate": 3.7396161642086854e-05, "loss": 0.5667, "step": 649500 }, { "epoch": 6.62, "learning_rate": 3.7389732622481565e-05, "loss": 0.4278, "step": 649600 }, { "epoch": 6.62, "learning_rate": 3.738330324156246e-05, "loss": 0.5121, "step": 649700 }, { "epoch": 6.62, "learning_rate": 3.737687349964388e-05, "loss": 0.4946, "step": 649800 }, { "epoch": 6.62, "learning_rate": 3.737044339704022e-05, "loss": 0.4955, "step": 649900 }, { "epoch": 6.62, "learning_rate": 3.736401293406586e-05, "loss": 0.5424, "step": 650000 }, { "epoch": 6.62, "learning_rate": 3.7357582111035205e-05, "loss": 0.4217, "step": 650100 }, { "epoch": 6.62, "learning_rate": 3.7351150928262694e-05, "loss": 0.4857, "step": 650200 }, { "epoch": 6.63, "learning_rate": 3.734471938606278e-05, "loss": 0.4527, "step": 650300 }, { "epoch": 6.63, "learning_rate": 3.733828748474991e-05, "loss": 0.5265, "step": 650400 }, { "epoch": 6.63, "learning_rate": 3.7331855224638583e-05, "loss": 0.5047, "step": 650500 }, { "epoch": 6.63, "learning_rate": 3.732542260604328e-05, "loss": 0.4972, "step": 650600 }, { "epoch": 6.63, "learning_rate": 3.731898962927853e-05, "loss": 0.4476, "step": 650700 }, { "epoch": 6.63, "learning_rate": 3.731262062977542e-05, "loss": 0.5041, "step": 650800 }, { "epoch": 6.63, "learning_rate": 3.730618694118923e-05, "loss": 0.5415, "step": 650900 }, { "epoch": 6.63, "learning_rate": 3.729975289537411e-05, "loss": 0.5246, "step": 651000 }, { "epoch": 6.63, "learning_rate": 3.729331849264464e-05, "loss": 0.5812, "step": 651100 }, { "epoch": 6.63, "learning_rate": 3.728688373331541e-05, "loss": 0.5537, "step": 651200 }, { "epoch": 6.64, "learning_rate": 3.728044861770106e-05, "loss": 0.5339, "step": 651300 }, { "epoch": 6.64, "learning_rate": 3.727401314611622e-05, "loss": 0.5037, "step": 651400 }, { "epoch": 6.64, "learning_rate": 3.726757731887554e-05, "loss": 0.5523, "step": 651500 }, { "epoch": 6.64, "learning_rate": 3.72611411362937e-05, "loss": 0.5695, "step": 651600 }, { "epoch": 6.64, "learning_rate": 3.725470459868539e-05, "loss": 0.4919, "step": 651700 }, { "epoch": 6.64, "learning_rate": 3.724826770636532e-05, "loss": 0.5418, "step": 651800 }, { "epoch": 6.64, "learning_rate": 3.7241830459648224e-05, "loss": 0.4592, "step": 651900 }, { "epoch": 6.64, "learning_rate": 3.723545723660849e-05, "loss": 0.483, "step": 652000 }, { "epoch": 6.64, "learning_rate": 3.7229019285577684e-05, "loss": 0.6228, "step": 652100 }, { "epoch": 6.64, "learning_rate": 3.7222580981090965e-05, "loss": 0.5042, "step": 652200 }, { "epoch": 6.65, "learning_rate": 3.7216142323463145e-05, "loss": 0.4903, "step": 652300 }, { "epoch": 6.65, "learning_rate": 3.720970331300903e-05, "loss": 0.4873, "step": 652400 }, { "epoch": 6.65, "learning_rate": 3.7203263950043445e-05, "loss": 0.5102, "step": 652500 }, { "epoch": 6.65, "learning_rate": 3.719682423488124e-05, "loss": 0.5588, "step": 652600 }, { "epoch": 6.65, "learning_rate": 3.7190384167837287e-05, "loss": 0.4271, "step": 652700 }, { "epoch": 6.65, "learning_rate": 3.7183943749226445e-05, "loss": 0.4695, "step": 652800 }, { "epoch": 6.65, "learning_rate": 3.717750297936363e-05, "loss": 0.5046, "step": 652900 }, { "epoch": 6.65, "learning_rate": 3.717106185856375e-05, "loss": 0.5333, "step": 653000 }, { "epoch": 6.65, "learning_rate": 3.716462038714174e-05, "loss": 0.4315, "step": 653100 }, { "epoch": 6.65, "learning_rate": 3.7158178565412564e-05, "loss": 0.5089, "step": 653200 }, { "epoch": 6.66, "learning_rate": 3.715173639369115e-05, "loss": 0.4676, "step": 653300 }, { "epoch": 6.66, "learning_rate": 3.714529387229252e-05, "loss": 0.5052, "step": 653400 }, { "epoch": 6.66, "learning_rate": 3.7138851001531656e-05, "loss": 0.5293, "step": 653500 }, { "epoch": 6.66, "learning_rate": 3.7132407781723586e-05, "loss": 0.5438, "step": 653600 }, { "epoch": 6.66, "learning_rate": 3.712596421318333e-05, "loss": 0.5439, "step": 653700 }, { "epoch": 6.66, "learning_rate": 3.711952029622596e-05, "loss": 0.5865, "step": 653800 }, { "epoch": 6.66, "learning_rate": 3.711307603116653e-05, "loss": 0.4825, "step": 653900 }, { "epoch": 6.66, "learning_rate": 3.7106631418320136e-05, "loss": 0.5458, "step": 654000 }, { "epoch": 6.66, "learning_rate": 3.7100186458001864e-05, "loss": 0.5616, "step": 654100 }, { "epoch": 6.67, "learning_rate": 3.709374115052685e-05, "loss": 0.483, "step": 654200 }, { "epoch": 6.67, "learning_rate": 3.7087295496210237e-05, "loss": 0.487, "step": 654300 }, { "epoch": 6.67, "learning_rate": 3.7080849495367155e-05, "loss": 0.4763, "step": 654400 }, { "epoch": 6.67, "learning_rate": 3.70744031483128e-05, "loss": 0.4314, "step": 654500 }, { "epoch": 6.67, "learning_rate": 3.706795645536234e-05, "loss": 0.432, "step": 654600 }, { "epoch": 6.67, "learning_rate": 3.7061509416830994e-05, "loss": 0.5061, "step": 654700 }, { "epoch": 6.67, "learning_rate": 3.705506203303398e-05, "loss": 0.4543, "step": 654800 }, { "epoch": 6.67, "learning_rate": 3.7048614304286525e-05, "loss": 0.4884, "step": 654900 }, { "epoch": 6.67, "learning_rate": 3.7042230713342645e-05, "loss": 0.5257, "step": 655000 }, { "epoch": 6.67, "learning_rate": 3.7035782299081756e-05, "loss": 0.5248, "step": 655100 }, { "epoch": 6.68, "learning_rate": 3.702933354081309e-05, "loss": 0.5704, "step": 655200 }, { "epoch": 6.68, "learning_rate": 3.702288443885198e-05, "loss": 0.5118, "step": 655300 }, { "epoch": 6.68, "learning_rate": 3.701643499351373e-05, "loss": 0.5316, "step": 655400 }, { "epoch": 6.68, "learning_rate": 3.7009985205113675e-05, "loss": 0.5284, "step": 655500 }, { "epoch": 6.68, "learning_rate": 3.7003535073967174e-05, "loss": 0.5209, "step": 655600 }, { "epoch": 6.68, "learning_rate": 3.6997084600389606e-05, "loss": 0.5208, "step": 655700 }, { "epoch": 6.68, "learning_rate": 3.699063378469636e-05, "loss": 0.3749, "step": 655800 }, { "epoch": 6.68, "learning_rate": 3.698418262720284e-05, "loss": 0.4537, "step": 655900 }, { "epoch": 6.68, "learning_rate": 3.697773112822446e-05, "loss": 0.5132, "step": 656000 }, { "epoch": 6.68, "learning_rate": 3.697127928807668e-05, "loss": 0.461, "step": 656100 }, { "epoch": 6.69, "learning_rate": 3.696482710707493e-05, "loss": 0.5357, "step": 656200 }, { "epoch": 6.69, "learning_rate": 3.695837458553472e-05, "loss": 0.5019, "step": 656300 }, { "epoch": 6.69, "learning_rate": 3.695192172377151e-05, "loss": 0.5209, "step": 656400 }, { "epoch": 6.69, "learning_rate": 3.69454685221008e-05, "loss": 0.5917, "step": 656500 }, { "epoch": 6.69, "learning_rate": 3.6939014980838144e-05, "loss": 0.54, "step": 656600 }, { "epoch": 6.69, "learning_rate": 3.693256110029906e-05, "loss": 0.4647, "step": 656700 }, { "epoch": 6.69, "learning_rate": 3.692610688079911e-05, "loss": 0.5196, "step": 656800 }, { "epoch": 6.69, "learning_rate": 3.6919652322653864e-05, "loss": 0.5456, "step": 656900 }, { "epoch": 6.69, "learning_rate": 3.691319742617892e-05, "loss": 0.4584, "step": 657000 }, { "epoch": 6.69, "learning_rate": 3.690674219168987e-05, "loss": 0.4391, "step": 657100 }, { "epoch": 6.7, "learning_rate": 3.690028661950235e-05, "loss": 0.5406, "step": 657200 }, { "epoch": 6.7, "learning_rate": 3.689383070993199e-05, "loss": 0.5771, "step": 657300 }, { "epoch": 6.7, "learning_rate": 3.6887374463294446e-05, "loss": 0.4882, "step": 657400 }, { "epoch": 6.7, "learning_rate": 3.68809178799054e-05, "loss": 0.5586, "step": 657500 }, { "epoch": 6.7, "learning_rate": 3.687446096008053e-05, "loss": 0.5308, "step": 657600 }, { "epoch": 6.7, "learning_rate": 3.686800370413554e-05, "loss": 0.4798, "step": 657700 }, { "epoch": 6.7, "learning_rate": 3.686154611238616e-05, "loss": 0.5406, "step": 657800 }, { "epoch": 6.7, "learning_rate": 3.685508818514812e-05, "loss": 0.5328, "step": 657900 }, { "epoch": 6.7, "learning_rate": 3.684862992273718e-05, "loss": 0.5241, "step": 658000 }, { "epoch": 6.7, "learning_rate": 3.68421713254691e-05, "loss": 0.4499, "step": 658100 }, { "epoch": 6.71, "learning_rate": 3.683571239365968e-05, "loss": 0.47, "step": 658200 }, { "epoch": 6.71, "learning_rate": 3.682925312762472e-05, "loss": 0.5805, "step": 658300 }, { "epoch": 6.71, "learning_rate": 3.682279352768003e-05, "loss": 0.5134, "step": 658400 }, { "epoch": 6.71, "learning_rate": 3.681633359414146e-05, "loss": 0.4566, "step": 658500 }, { "epoch": 6.71, "learning_rate": 3.6809873327324845e-05, "loss": 0.5126, "step": 658600 }, { "epoch": 6.71, "learning_rate": 3.680341272754606e-05, "loss": 0.4936, "step": 658700 }, { "epoch": 6.71, "learning_rate": 3.679695179512101e-05, "loss": 0.4423, "step": 658800 }, { "epoch": 6.71, "learning_rate": 3.679049053036557e-05, "loss": 0.5378, "step": 658900 }, { "epoch": 6.71, "learning_rate": 3.678402893359567e-05, "loss": 0.5358, "step": 659000 }, { "epoch": 6.72, "learning_rate": 3.677756700512724e-05, "loss": 0.4929, "step": 659100 }, { "epoch": 6.72, "learning_rate": 3.6771104745276225e-05, "loss": 0.414, "step": 659200 }, { "epoch": 6.72, "learning_rate": 3.6764642154358594e-05, "loss": 0.5156, "step": 659300 }, { "epoch": 6.72, "learning_rate": 3.6758179232690344e-05, "loss": 0.468, "step": 659400 }, { "epoch": 6.72, "learning_rate": 3.675171598058745e-05, "loss": 0.5837, "step": 659500 }, { "epoch": 6.72, "learning_rate": 3.6745252398365935e-05, "loss": 0.4184, "step": 659600 }, { "epoch": 6.72, "learning_rate": 3.673878848634184e-05, "loss": 0.4631, "step": 659700 }, { "epoch": 6.72, "learning_rate": 3.6732324244831194e-05, "loss": 0.5345, "step": 659800 }, { "epoch": 6.72, "learning_rate": 3.672592432148524e-05, "loss": 0.4897, "step": 659900 }, { "epoch": 6.72, "learning_rate": 3.67194594252367e-05, "loss": 0.5565, "step": 660000 }, { "epoch": 6.73, "learning_rate": 3.671299420044669e-05, "loss": 0.4544, "step": 660100 }, { "epoch": 6.73, "learning_rate": 3.670652864743133e-05, "loss": 0.4403, "step": 660200 }, { "epoch": 6.73, "learning_rate": 3.670006276650673e-05, "loss": 0.4926, "step": 660300 }, { "epoch": 6.73, "learning_rate": 3.6693596557989066e-05, "loss": 0.4669, "step": 660400 }, { "epoch": 6.73, "learning_rate": 3.668713002219446e-05, "loss": 0.4158, "step": 660500 }, { "epoch": 6.73, "learning_rate": 3.66806631594391e-05, "loss": 0.5083, "step": 660600 }, { "epoch": 6.73, "learning_rate": 3.667419597003919e-05, "loss": 0.5254, "step": 660700 }, { "epoch": 6.73, "learning_rate": 3.666772845431091e-05, "loss": 0.5178, "step": 660800 }, { "epoch": 6.73, "learning_rate": 3.666126061257052e-05, "loss": 0.4636, "step": 660900 }, { "epoch": 6.73, "learning_rate": 3.665479244513423e-05, "loss": 0.4893, "step": 661000 }, { "epoch": 6.74, "learning_rate": 3.66483239523183e-05, "loss": 0.4859, "step": 661100 }, { "epoch": 6.74, "learning_rate": 3.6641855134439e-05, "loss": 0.5485, "step": 661200 }, { "epoch": 6.74, "learning_rate": 3.663538599181262e-05, "loss": 0.4874, "step": 661300 }, { "epoch": 6.74, "learning_rate": 3.662891652475548e-05, "loss": 0.4899, "step": 661400 }, { "epoch": 6.74, "learning_rate": 3.662244673358387e-05, "loss": 0.4512, "step": 661500 }, { "epoch": 6.74, "learning_rate": 3.661597661861414e-05, "loss": 0.5161, "step": 661600 }, { "epoch": 6.74, "learning_rate": 3.660950618016263e-05, "loss": 0.4732, "step": 661700 }, { "epoch": 6.74, "learning_rate": 3.660303541854571e-05, "loss": 0.5533, "step": 661800 }, { "epoch": 6.74, "learning_rate": 3.659656433407976e-05, "loss": 0.4281, "step": 661900 }, { "epoch": 6.74, "learning_rate": 3.659009292708119e-05, "loss": 0.4899, "step": 662000 }, { "epoch": 6.75, "learning_rate": 3.658362119786639e-05, "loss": 0.6039, "step": 662100 }, { "epoch": 6.75, "learning_rate": 3.657714914675182e-05, "loss": 0.5875, "step": 662200 }, { "epoch": 6.75, "learning_rate": 3.65706767740539e-05, "loss": 0.4893, "step": 662300 }, { "epoch": 6.75, "learning_rate": 3.65642040800891e-05, "loss": 0.4673, "step": 662400 }, { "epoch": 6.75, "learning_rate": 3.655773106517388e-05, "loss": 0.496, "step": 662500 }, { "epoch": 6.75, "learning_rate": 3.655125772962476e-05, "loss": 0.5655, "step": 662600 }, { "epoch": 6.75, "learning_rate": 3.6544784073758225e-05, "loss": 0.5621, "step": 662700 }, { "epoch": 6.75, "learning_rate": 3.6538310097890815e-05, "loss": 0.4366, "step": 662800 }, { "epoch": 6.75, "learning_rate": 3.6531835802339054e-05, "loss": 0.4584, "step": 662900 }, { "epoch": 6.75, "learning_rate": 3.65253611874195e-05, "loss": 0.5005, "step": 663000 }, { "epoch": 6.76, "learning_rate": 3.6518886253448726e-05, "loss": 0.5123, "step": 663100 }, { "epoch": 6.76, "learning_rate": 3.651241100074332e-05, "loss": 0.4999, "step": 663200 }, { "epoch": 6.76, "learning_rate": 3.650593542961988e-05, "loss": 0.4984, "step": 663300 }, { "epoch": 6.76, "learning_rate": 3.649945954039502e-05, "loss": 0.4432, "step": 663400 }, { "epoch": 6.76, "learning_rate": 3.6492983333385376e-05, "loss": 0.4939, "step": 663500 }, { "epoch": 6.76, "learning_rate": 3.64865068089076e-05, "loss": 0.5484, "step": 663600 }, { "epoch": 6.76, "learning_rate": 3.648002996727834e-05, "loss": 0.5036, "step": 663700 }, { "epoch": 6.76, "learning_rate": 3.647355280881429e-05, "loss": 0.5832, "step": 663800 }, { "epoch": 6.76, "learning_rate": 3.6467075333832144e-05, "loss": 0.5186, "step": 663900 }, { "epoch": 6.76, "learning_rate": 3.64605975426486e-05, "loss": 0.4676, "step": 664000 }, { "epoch": 6.77, "learning_rate": 3.6454119435580404e-05, "loss": 0.5172, "step": 664100 }, { "epoch": 6.77, "learning_rate": 3.6447641012944275e-05, "loss": 0.541, "step": 664200 }, { "epoch": 6.77, "learning_rate": 3.6441162275056984e-05, "loss": 0.5516, "step": 664300 }, { "epoch": 6.77, "learning_rate": 3.6434748014321386e-05, "loss": 0.4715, "step": 664400 }, { "epoch": 6.77, "learning_rate": 3.642826865002669e-05, "loss": 0.6193, "step": 664500 }, { "epoch": 6.77, "learning_rate": 3.642178897142802e-05, "loss": 0.5233, "step": 664600 }, { "epoch": 6.77, "learning_rate": 3.6415308978842196e-05, "loss": 0.4734, "step": 664700 }, { "epoch": 6.77, "learning_rate": 3.6408828672586044e-05, "loss": 0.4175, "step": 664800 }, { "epoch": 6.77, "learning_rate": 3.640234805297642e-05, "loss": 0.4571, "step": 664900 }, { "epoch": 6.78, "learning_rate": 3.6395867120330176e-05, "loss": 0.4764, "step": 665000 }, { "epoch": 6.78, "learning_rate": 3.6389385874964204e-05, "loss": 0.447, "step": 665100 }, { "epoch": 6.78, "learning_rate": 3.638290431719539e-05, "loss": 0.4488, "step": 665200 }, { "epoch": 6.78, "learning_rate": 3.6376422447340644e-05, "loss": 0.4287, "step": 665300 }, { "epoch": 6.78, "learning_rate": 3.63699402657169e-05, "loss": 0.4888, "step": 665400 }, { "epoch": 6.78, "learning_rate": 3.6363457772641094e-05, "loss": 0.5018, "step": 665500 }, { "epoch": 6.78, "learning_rate": 3.635697496843016e-05, "loss": 0.537, "step": 665600 }, { "epoch": 6.78, "learning_rate": 3.63504918534011e-05, "loss": 0.4665, "step": 665700 }, { "epoch": 6.78, "learning_rate": 3.634400842787089e-05, "loss": 0.5256, "step": 665800 }, { "epoch": 6.78, "learning_rate": 3.633752469215651e-05, "loss": 0.5017, "step": 665900 }, { "epoch": 6.79, "learning_rate": 3.633104064657501e-05, "loss": 0.5082, "step": 666000 }, { "epoch": 6.79, "learning_rate": 3.632455629144339e-05, "loss": 0.5594, "step": 666100 }, { "epoch": 6.79, "learning_rate": 3.6318071627078726e-05, "loss": 0.512, "step": 666200 }, { "epoch": 6.79, "learning_rate": 3.6311586653798055e-05, "loss": 0.4553, "step": 666300 }, { "epoch": 6.79, "learning_rate": 3.630510137191846e-05, "loss": 0.5456, "step": 666400 }, { "epoch": 6.79, "learning_rate": 3.629861578175704e-05, "loss": 0.4726, "step": 666500 }, { "epoch": 6.79, "learning_rate": 3.629212988363089e-05, "loss": 0.4826, "step": 666600 }, { "epoch": 6.79, "learning_rate": 3.6285643677857145e-05, "loss": 0.5063, "step": 666700 }, { "epoch": 6.79, "learning_rate": 3.6279157164752935e-05, "loss": 0.4396, "step": 666800 }, { "epoch": 6.79, "learning_rate": 3.6272670344635406e-05, "loss": 0.4103, "step": 666900 }, { "epoch": 6.8, "learning_rate": 3.6266183217821734e-05, "loss": 0.4805, "step": 667000 }, { "epoch": 6.8, "learning_rate": 3.62596957846291e-05, "loss": 0.4982, "step": 667100 }, { "epoch": 6.8, "learning_rate": 3.625320804537468e-05, "loss": 0.5055, "step": 667200 }, { "epoch": 6.8, "learning_rate": 3.6246720000375725e-05, "loss": 0.4847, "step": 667300 }, { "epoch": 6.8, "learning_rate": 3.6240231649949445e-05, "loss": 0.4427, "step": 667400 }, { "epoch": 6.8, "learning_rate": 3.6233742994413055e-05, "loss": 0.4951, "step": 667500 }, { "epoch": 6.8, "learning_rate": 3.6227254034083846e-05, "loss": 0.4521, "step": 667600 }, { "epoch": 6.8, "learning_rate": 3.6220764769279075e-05, "loss": 0.4607, "step": 667700 }, { "epoch": 6.8, "learning_rate": 3.621427520031602e-05, "loss": 0.539, "step": 667800 }, { "epoch": 6.8, "learning_rate": 3.620778532751202e-05, "loss": 0.5401, "step": 667900 }, { "epoch": 6.81, "learning_rate": 3.620136005444901e-05, "loss": 0.4674, "step": 668000 }, { "epoch": 6.81, "learning_rate": 3.619486957794552e-05, "loss": 0.5241, "step": 668100 }, { "epoch": 6.81, "learning_rate": 3.618837879854985e-05, "loss": 0.4471, "step": 668200 }, { "epoch": 6.81, "learning_rate": 3.6181887716579395e-05, "loss": 0.4803, "step": 668300 }, { "epoch": 6.81, "learning_rate": 3.617539633235153e-05, "loss": 0.4539, "step": 668400 }, { "epoch": 6.81, "learning_rate": 3.6168904646183624e-05, "loss": 0.4693, "step": 668500 }, { "epoch": 6.81, "learning_rate": 3.616241265839311e-05, "loss": 0.4683, "step": 668600 }, { "epoch": 6.81, "learning_rate": 3.615592036929739e-05, "loss": 0.6487, "step": 668700 }, { "epoch": 6.81, "learning_rate": 3.614942777921389e-05, "loss": 0.5398, "step": 668800 }, { "epoch": 6.81, "learning_rate": 3.614293488846007e-05, "loss": 0.4632, "step": 668900 }, { "epoch": 6.82, "learning_rate": 3.6136441697353403e-05, "loss": 0.4871, "step": 669000 }, { "epoch": 6.82, "learning_rate": 3.6129948206211356e-05, "loss": 0.5138, "step": 669100 }, { "epoch": 6.82, "learning_rate": 3.612345441535142e-05, "loss": 0.5199, "step": 669200 }, { "epoch": 6.82, "learning_rate": 3.61169603250911e-05, "loss": 0.5318, "step": 669300 }, { "epoch": 6.82, "learning_rate": 3.6110465935747936e-05, "loss": 0.5054, "step": 669400 }, { "epoch": 6.82, "learning_rate": 3.610397124763944e-05, "loss": 0.4647, "step": 669500 }, { "epoch": 6.82, "learning_rate": 3.609747626108319e-05, "loss": 0.5289, "step": 669600 }, { "epoch": 6.82, "learning_rate": 3.609098097639673e-05, "loss": 0.5634, "step": 669700 }, { "epoch": 6.82, "learning_rate": 3.608448539389765e-05, "loss": 0.5382, "step": 669800 }, { "epoch": 6.83, "learning_rate": 3.607798951390355e-05, "loss": 0.4902, "step": 669900 }, { "epoch": 6.83, "learning_rate": 3.607149333673203e-05, "loss": 0.4505, "step": 670000 }, { "epoch": 6.83, "learning_rate": 3.6065061828909445e-05, "loss": 0.4815, "step": 670100 }, { "epoch": 6.83, "learning_rate": 3.6058565061299834e-05, "loss": 0.5913, "step": 670200 }, { "epoch": 6.83, "learning_rate": 3.605206799746254e-05, "loss": 0.4183, "step": 670300 }, { "epoch": 6.83, "learning_rate": 3.604557063771524e-05, "loss": 0.5186, "step": 670400 }, { "epoch": 6.83, "learning_rate": 3.603907298237562e-05, "loss": 0.4363, "step": 670500 }, { "epoch": 6.83, "learning_rate": 3.603257503176137e-05, "loss": 0.5108, "step": 670600 }, { "epoch": 6.83, "learning_rate": 3.6026076786190196e-05, "loss": 0.5005, "step": 670700 }, { "epoch": 6.83, "learning_rate": 3.601957824597982e-05, "loss": 0.4583, "step": 670800 }, { "epoch": 6.84, "learning_rate": 3.6013079411448e-05, "loss": 0.5036, "step": 670900 }, { "epoch": 6.84, "learning_rate": 3.600658028291249e-05, "loss": 0.4659, "step": 671000 }, { "epoch": 6.84, "learning_rate": 3.600008086069104e-05, "loss": 0.5261, "step": 671100 }, { "epoch": 6.84, "learning_rate": 3.599358114510145e-05, "loss": 0.5056, "step": 671200 }, { "epoch": 6.84, "learning_rate": 3.59870811364615e-05, "loss": 0.4737, "step": 671300 }, { "epoch": 6.84, "learning_rate": 3.5980580835089015e-05, "loss": 0.4706, "step": 671400 }, { "epoch": 6.84, "learning_rate": 3.597408024130182e-05, "loss": 0.4643, "step": 671500 }, { "epoch": 6.84, "learning_rate": 3.596757935541775e-05, "loss": 0.5279, "step": 671600 }, { "epoch": 6.84, "learning_rate": 3.596107817775466e-05, "loss": 0.481, "step": 671700 }, { "epoch": 6.84, "learning_rate": 3.595457670863043e-05, "loss": 0.4979, "step": 671800 }, { "epoch": 6.85, "learning_rate": 3.594807494836293e-05, "loss": 0.4025, "step": 671900 }, { "epoch": 6.85, "learning_rate": 3.5941572897270056e-05, "loss": 0.5739, "step": 672000 }, { "epoch": 6.85, "learning_rate": 3.59351355805227e-05, "loss": 0.4507, "step": 672100 }, { "epoch": 6.85, "learning_rate": 3.592863295163316e-05, "loss": 0.496, "step": 672200 }, { "epoch": 6.85, "learning_rate": 3.5922130032868844e-05, "loss": 0.47, "step": 672300 }, { "epoch": 6.85, "learning_rate": 3.591562682454772e-05, "loss": 0.5211, "step": 672400 }, { "epoch": 6.85, "learning_rate": 3.5909123326987746e-05, "loss": 0.4861, "step": 672500 }, { "epoch": 6.85, "learning_rate": 3.5902619540506905e-05, "loss": 0.4926, "step": 672600 }, { "epoch": 6.85, "learning_rate": 3.589618050760158e-05, "loss": 0.4992, "step": 672700 }, { "epoch": 6.85, "learning_rate": 3.588974119214512e-05, "loss": 0.5105, "step": 672800 }, { "epoch": 6.86, "learning_rate": 3.588323654656593e-05, "loss": 0.4958, "step": 672900 }, { "epoch": 6.86, "learning_rate": 3.587673161333159e-05, "loss": 0.543, "step": 673000 }, { "epoch": 6.86, "learning_rate": 3.587022639276014e-05, "loss": 0.4032, "step": 673100 }, { "epoch": 6.86, "learning_rate": 3.586372088516965e-05, "loss": 0.4216, "step": 673200 }, { "epoch": 6.86, "learning_rate": 3.58572150908782e-05, "loss": 0.5138, "step": 673300 }, { "epoch": 6.86, "learning_rate": 3.585070901020389e-05, "loss": 0.4552, "step": 673400 }, { "epoch": 6.86, "learning_rate": 3.584420264346482e-05, "loss": 0.5489, "step": 673500 }, { "epoch": 6.86, "learning_rate": 3.583769599097911e-05, "loss": 0.4855, "step": 673600 }, { "epoch": 6.86, "learning_rate": 3.583118905306492e-05, "loss": 0.5079, "step": 673700 }, { "epoch": 6.86, "learning_rate": 3.582468183004037e-05, "loss": 0.4652, "step": 673800 }, { "epoch": 6.87, "learning_rate": 3.581817432222365e-05, "loss": 0.5611, "step": 673900 }, { "epoch": 6.87, "learning_rate": 3.5811666529932924e-05, "loss": 0.4351, "step": 674000 }, { "epoch": 6.87, "learning_rate": 3.580515845348638e-05, "loss": 0.4841, "step": 674100 }, { "epoch": 6.87, "learning_rate": 3.579865009320223e-05, "loss": 0.5385, "step": 674200 }, { "epoch": 6.87, "learning_rate": 3.579214144939871e-05, "loss": 0.4425, "step": 674300 }, { "epoch": 6.87, "learning_rate": 3.5785632522394024e-05, "loss": 0.4961, "step": 674400 }, { "epoch": 6.87, "learning_rate": 3.5779123312506444e-05, "loss": 0.5677, "step": 674500 }, { "epoch": 6.87, "learning_rate": 3.5772613820054215e-05, "loss": 0.4267, "step": 674600 }, { "epoch": 6.87, "learning_rate": 3.576610404535562e-05, "loss": 0.4522, "step": 674700 }, { "epoch": 6.87, "learning_rate": 3.575959398872895e-05, "loss": 0.4864, "step": 674800 }, { "epoch": 6.88, "learning_rate": 3.5753083650492506e-05, "loss": 0.4657, "step": 674900 }, { "epoch": 6.88, "learning_rate": 3.5746573030964604e-05, "loss": 0.5481, "step": 675000 }, { "epoch": 6.88, "learning_rate": 3.5740062130463575e-05, "loss": 0.4292, "step": 675100 }, { "epoch": 6.88, "learning_rate": 3.573355094930776e-05, "loss": 0.3947, "step": 675200 }, { "epoch": 6.88, "learning_rate": 3.572703948781553e-05, "loss": 0.4997, "step": 675300 }, { "epoch": 6.88, "learning_rate": 3.5720527746305224e-05, "loss": 0.4819, "step": 675400 }, { "epoch": 6.88, "learning_rate": 3.571401572509526e-05, "loss": 0.4822, "step": 675500 }, { "epoch": 6.88, "learning_rate": 3.570750342450404e-05, "loss": 0.5081, "step": 675600 }, { "epoch": 6.88, "learning_rate": 3.570099084484995e-05, "loss": 0.4565, "step": 675700 }, { "epoch": 6.89, "learning_rate": 3.569447798645143e-05, "loss": 0.496, "step": 675800 }, { "epoch": 6.89, "learning_rate": 3.568802998237233e-05, "loss": 0.5476, "step": 675900 }, { "epoch": 6.89, "learning_rate": 3.568151657021978e-05, "loss": 0.5375, "step": 676000 }, { "epoch": 6.89, "learning_rate": 3.567500288027498e-05, "loss": 0.5519, "step": 676100 }, { "epoch": 6.89, "learning_rate": 3.56684889128564e-05, "loss": 0.5217, "step": 676200 }, { "epoch": 6.89, "learning_rate": 3.566197466828255e-05, "loss": 0.5068, "step": 676300 }, { "epoch": 6.89, "learning_rate": 3.565546014687192e-05, "loss": 0.477, "step": 676400 }, { "epoch": 6.89, "learning_rate": 3.5648945348943036e-05, "loss": 0.5241, "step": 676500 }, { "epoch": 6.89, "learning_rate": 3.5642430274814444e-05, "loss": 0.5511, "step": 676600 }, { "epoch": 6.89, "learning_rate": 3.563591492480468e-05, "loss": 0.5716, "step": 676700 }, { "epoch": 6.9, "learning_rate": 3.5629399299232304e-05, "loss": 0.5128, "step": 676800 }, { "epoch": 6.9, "learning_rate": 3.562288339841591e-05, "loss": 0.4848, "step": 676900 }, { "epoch": 6.9, "learning_rate": 3.5616367222674055e-05, "loss": 0.4499, "step": 677000 }, { "epoch": 6.9, "learning_rate": 3.5609850772325376e-05, "loss": 0.4991, "step": 677100 }, { "epoch": 6.9, "learning_rate": 3.560333404768847e-05, "loss": 0.5605, "step": 677200 }, { "epoch": 6.9, "learning_rate": 3.5596817049081965e-05, "loss": 0.5085, "step": 677300 }, { "epoch": 6.9, "learning_rate": 3.5590299776824495e-05, "loss": 0.5279, "step": 677400 }, { "epoch": 6.9, "learning_rate": 3.5583782231234743e-05, "loss": 0.6079, "step": 677500 }, { "epoch": 6.9, "learning_rate": 3.557726441263136e-05, "loss": 0.4581, "step": 677600 }, { "epoch": 6.9, "learning_rate": 3.557074632133303e-05, "loss": 0.4687, "step": 677700 }, { "epoch": 6.91, "learning_rate": 3.556422795765844e-05, "loss": 0.5262, "step": 677800 }, { "epoch": 6.91, "learning_rate": 3.5557709321926315e-05, "loss": 0.4798, "step": 677900 }, { "epoch": 6.91, "learning_rate": 3.5551255604874146e-05, "loss": 0.4504, "step": 678000 }, { "epoch": 6.91, "learning_rate": 3.554473642869573e-05, "loss": 0.5968, "step": 678100 }, { "epoch": 6.91, "learning_rate": 3.553821698141281e-05, "loss": 0.5011, "step": 678200 }, { "epoch": 6.91, "learning_rate": 3.5531697263344125e-05, "loss": 0.4457, "step": 678300 }, { "epoch": 6.91, "learning_rate": 3.552517727480845e-05, "loss": 0.5849, "step": 678400 }, { "epoch": 6.91, "learning_rate": 3.551865701612459e-05, "loss": 0.55, "step": 678500 }, { "epoch": 6.91, "learning_rate": 3.551213648761133e-05, "loss": 0.544, "step": 678600 }, { "epoch": 6.91, "learning_rate": 3.550561568958748e-05, "loss": 0.4558, "step": 678700 }, { "epoch": 6.92, "learning_rate": 3.549909462237189e-05, "loss": 0.5014, "step": 678800 }, { "epoch": 6.92, "learning_rate": 3.549257328628338e-05, "loss": 0.4973, "step": 678900 }, { "epoch": 6.92, "learning_rate": 3.548605168164083e-05, "loss": 0.5116, "step": 679000 }, { "epoch": 6.92, "learning_rate": 3.547952980876309e-05, "loss": 0.5169, "step": 679100 }, { "epoch": 6.92, "learning_rate": 3.547300766796903e-05, "loss": 0.4444, "step": 679200 }, { "epoch": 6.92, "learning_rate": 3.546648525957755e-05, "loss": 0.4847, "step": 679300 }, { "epoch": 6.92, "learning_rate": 3.5459962583907575e-05, "loss": 0.4595, "step": 679400 }, { "epoch": 6.92, "learning_rate": 3.5453439641278014e-05, "loss": 0.5173, "step": 679500 }, { "epoch": 6.92, "learning_rate": 3.54469164320078e-05, "loss": 0.5262, "step": 679600 }, { "epoch": 6.92, "learning_rate": 3.5440392956415874e-05, "loss": 0.5186, "step": 679700 }, { "epoch": 6.93, "learning_rate": 3.543386921482121e-05, "loss": 0.449, "step": 679800 }, { "epoch": 6.93, "learning_rate": 3.542734520754275e-05, "loss": 0.5298, "step": 679900 }, { "epoch": 6.93, "learning_rate": 3.542082093489951e-05, "loss": 0.4692, "step": 680000 }, { "epoch": 6.93, "learning_rate": 3.5414296397210476e-05, "loss": 0.5563, "step": 680100 }, { "epoch": 6.93, "learning_rate": 3.540777159479466e-05, "loss": 0.488, "step": 680200 }, { "epoch": 6.93, "learning_rate": 3.5401246527971074e-05, "loss": 0.5281, "step": 680300 }, { "epoch": 6.93, "learning_rate": 3.539472119705878e-05, "loss": 0.5031, "step": 680400 }, { "epoch": 6.93, "learning_rate": 3.5388195602376805e-05, "loss": 0.4847, "step": 680500 }, { "epoch": 6.93, "learning_rate": 3.538166974424422e-05, "loss": 0.4981, "step": 680600 }, { "epoch": 6.94, "learning_rate": 3.5375143622980106e-05, "loss": 0.4627, "step": 680700 }, { "epoch": 6.94, "learning_rate": 3.536861723890354e-05, "loss": 0.4835, "step": 680800 }, { "epoch": 6.94, "learning_rate": 3.5362090592333634e-05, "loss": 0.5308, "step": 680900 }, { "epoch": 6.94, "learning_rate": 3.5355563683589486e-05, "loss": 0.5092, "step": 681000 }, { "epoch": 6.94, "learning_rate": 3.5349036512990246e-05, "loss": 0.4558, "step": 681100 }, { "epoch": 6.94, "learning_rate": 3.534250908085504e-05, "loss": 0.4877, "step": 681200 }, { "epoch": 6.94, "learning_rate": 3.533598138750302e-05, "loss": 0.5427, "step": 681300 }, { "epoch": 6.94, "learning_rate": 3.532945343325336e-05, "loss": 0.5351, "step": 681400 }, { "epoch": 6.94, "learning_rate": 3.532292521842523e-05, "loss": 0.5174, "step": 681500 }, { "epoch": 6.94, "learning_rate": 3.531639674333782e-05, "loss": 0.418, "step": 681600 }, { "epoch": 6.95, "learning_rate": 3.530986800831034e-05, "loss": 0.5384, "step": 681700 }, { "epoch": 6.95, "learning_rate": 3.530333901366199e-05, "loss": 0.4506, "step": 681800 }, { "epoch": 6.95, "learning_rate": 3.529680975971201e-05, "loss": 0.4526, "step": 681900 }, { "epoch": 6.95, "learning_rate": 3.529028024677966e-05, "loss": 0.5298, "step": 682000 }, { "epoch": 6.95, "learning_rate": 3.528375047518416e-05, "loss": 0.4294, "step": 682100 }, { "epoch": 6.95, "learning_rate": 3.52772204452448e-05, "loss": 0.443, "step": 682200 }, { "epoch": 6.95, "learning_rate": 3.5270690157280866e-05, "loss": 0.4039, "step": 682300 }, { "epoch": 6.95, "learning_rate": 3.526415961161162e-05, "loss": 0.4581, "step": 682400 }, { "epoch": 6.95, "learning_rate": 3.5257628808556383e-05, "loss": 0.6104, "step": 682500 }, { "epoch": 6.95, "learning_rate": 3.525109774843448e-05, "loss": 0.5208, "step": 682600 }, { "epoch": 6.96, "learning_rate": 3.5244566431565236e-05, "loss": 0.543, "step": 682700 }, { "epoch": 6.96, "learning_rate": 3.523803485826799e-05, "loss": 0.5433, "step": 682800 }, { "epoch": 6.96, "learning_rate": 3.52315030288621e-05, "loss": 0.5217, "step": 682900 }, { "epoch": 6.96, "learning_rate": 3.5224970943666925e-05, "loss": 0.501, "step": 683000 }, { "epoch": 6.96, "learning_rate": 3.521843860300185e-05, "loss": 0.4783, "step": 683100 }, { "epoch": 6.96, "learning_rate": 3.521190600718627e-05, "loss": 0.4824, "step": 683200 }, { "epoch": 6.96, "learning_rate": 3.52053731565396e-05, "loss": 0.4812, "step": 683300 }, { "epoch": 6.96, "learning_rate": 3.5198840051381244e-05, "loss": 0.5769, "step": 683400 }, { "epoch": 6.96, "learning_rate": 3.5192306692030636e-05, "loss": 0.4308, "step": 683500 }, { "epoch": 6.96, "learning_rate": 3.518577307880721e-05, "loss": 0.471, "step": 683600 }, { "epoch": 6.97, "learning_rate": 3.517923921203043e-05, "loss": 0.5129, "step": 683700 }, { "epoch": 6.97, "learning_rate": 3.517270509201975e-05, "loss": 0.5355, "step": 683800 }, { "epoch": 6.97, "learning_rate": 3.516617071909468e-05, "loss": 0.5171, "step": 683900 }, { "epoch": 6.97, "learning_rate": 3.515963609357468e-05, "loss": 0.472, "step": 684000 }, { "epoch": 6.97, "learning_rate": 3.515310121577927e-05, "loss": 0.6151, "step": 684100 }, { "epoch": 6.97, "learning_rate": 3.5146566086027966e-05, "loss": 0.4364, "step": 684200 }, { "epoch": 6.97, "learning_rate": 3.5140030704640286e-05, "loss": 0.5133, "step": 684300 }, { "epoch": 6.97, "learning_rate": 3.5133495071935775e-05, "loss": 0.5401, "step": 684400 }, { "epoch": 6.97, "learning_rate": 3.5126959188233996e-05, "loss": 0.5363, "step": 684500 }, { "epoch": 6.97, "learning_rate": 3.512042305385451e-05, "loss": 0.4922, "step": 684600 }, { "epoch": 6.98, "learning_rate": 3.51138866691169e-05, "loss": 0.3739, "step": 684700 }, { "epoch": 6.98, "learning_rate": 3.510735003434074e-05, "loss": 0.5071, "step": 684800 }, { "epoch": 6.98, "learning_rate": 3.510081314984564e-05, "loss": 0.4713, "step": 684900 }, { "epoch": 6.98, "learning_rate": 3.509427601595123e-05, "loss": 0.5095, "step": 685000 }, { "epoch": 6.98, "learning_rate": 3.5087738632977107e-05, "loss": 0.4793, "step": 685100 }, { "epoch": 6.98, "learning_rate": 3.508120100124294e-05, "loss": 0.5293, "step": 685200 }, { "epoch": 6.98, "learning_rate": 3.507466312106837e-05, "loss": 0.4923, "step": 685300 }, { "epoch": 6.98, "learning_rate": 3.506819037528315e-05, "loss": 0.5233, "step": 685400 }, { "epoch": 6.98, "learning_rate": 3.506165200166319e-05, "loss": 0.5402, "step": 685500 }, { "epoch": 6.99, "learning_rate": 3.5055113380558654e-05, "loss": 0.3898, "step": 685600 }, { "epoch": 6.99, "learning_rate": 3.5048639902194344e-05, "loss": 0.5548, "step": 685700 }, { "epoch": 6.99, "learning_rate": 3.504210078954663e-05, "loss": 0.4224, "step": 685800 }, { "epoch": 6.99, "learning_rate": 3.503556143037029e-05, "loss": 0.5932, "step": 685900 }, { "epoch": 6.99, "learning_rate": 3.502902182498504e-05, "loss": 0.4524, "step": 686000 }, { "epoch": 6.99, "learning_rate": 3.5022481973710635e-05, "loss": 0.4871, "step": 686100 }, { "epoch": 6.99, "learning_rate": 3.5015941876866834e-05, "loss": 0.5449, "step": 686200 }, { "epoch": 6.99, "learning_rate": 3.5009401534773404e-05, "loss": 0.5129, "step": 686300 }, { "epoch": 6.99, "learning_rate": 3.500286094775014e-05, "loss": 0.4619, "step": 686400 }, { "epoch": 6.99, "learning_rate": 3.499632011611683e-05, "loss": 0.5103, "step": 686500 }, { "epoch": 7.0, "learning_rate": 3.4989844452160704e-05, "loss": 0.5073, "step": 686600 }, { "epoch": 7.0, "learning_rate": 3.498330313470486e-05, "loss": 0.4449, "step": 686700 }, { "epoch": 7.0, "learning_rate": 3.4976761573595235e-05, "loss": 0.4858, "step": 686800 }, { "epoch": 7.0, "learning_rate": 3.497028518839955e-05, "loss": 0.4881, "step": 686900 }, { "epoch": 7.0, "learning_rate": 3.4963743143370446e-05, "loss": 0.48, "step": 687000 }, { "epoch": 7.0, "learning_rate": 3.495720085564393e-05, "loss": 0.3663, "step": 687100 }, { "epoch": 7.0, "learning_rate": 3.495065832553987e-05, "loss": 0.4919, "step": 687200 }, { "epoch": 7.0, "learning_rate": 3.494411555337817e-05, "loss": 0.4042, "step": 687300 }, { "epoch": 7.0, "learning_rate": 3.493757253947872e-05, "loss": 0.4198, "step": 687400 }, { "epoch": 7.0, "learning_rate": 3.493102928416144e-05, "loss": 0.5168, "step": 687500 }, { "epoch": 7.01, "learning_rate": 3.492448578774626e-05, "loss": 0.4357, "step": 687600 }, { "epoch": 7.01, "learning_rate": 3.49179420505531e-05, "loss": 0.4583, "step": 687700 }, { "epoch": 7.01, "learning_rate": 3.491139807290193e-05, "loss": 0.4146, "step": 687800 }, { "epoch": 7.01, "learning_rate": 3.49048538551127e-05, "loss": 0.4159, "step": 687900 }, { "epoch": 7.01, "learning_rate": 3.489830939750539e-05, "loss": 0.4622, "step": 688000 }, { "epoch": 7.01, "learning_rate": 3.489176470039999e-05, "loss": 0.4825, "step": 688100 }, { "epoch": 7.01, "learning_rate": 3.488521976411647e-05, "loss": 0.439, "step": 688200 }, { "epoch": 7.01, "learning_rate": 3.487867458897488e-05, "loss": 0.3803, "step": 688300 }, { "epoch": 7.01, "learning_rate": 3.48721291752952e-05, "loss": 0.466, "step": 688400 }, { "epoch": 7.01, "learning_rate": 3.486558352339748e-05, "loss": 0.4282, "step": 688500 }, { "epoch": 7.02, "learning_rate": 3.485903763360177e-05, "loss": 0.4476, "step": 688600 }, { "epoch": 7.02, "learning_rate": 3.485249150622812e-05, "loss": 0.4935, "step": 688700 }, { "epoch": 7.02, "learning_rate": 3.484594514159658e-05, "loss": 0.4784, "step": 688800 }, { "epoch": 7.02, "learning_rate": 3.483939854002725e-05, "loss": 0.5261, "step": 688900 }, { "epoch": 7.02, "learning_rate": 3.48328517018402e-05, "loss": 0.4173, "step": 689000 }, { "epoch": 7.02, "learning_rate": 3.4826304627355555e-05, "loss": 0.5298, "step": 689100 }, { "epoch": 7.02, "learning_rate": 3.4819757316893414e-05, "loss": 0.4602, "step": 689200 }, { "epoch": 7.02, "learning_rate": 3.481320977077389e-05, "loss": 0.375, "step": 689300 }, { "epoch": 7.02, "learning_rate": 3.4806661989317155e-05, "loss": 0.4102, "step": 689400 }, { "epoch": 7.02, "learning_rate": 3.480011397284333e-05, "loss": 0.4009, "step": 689500 }, { "epoch": 7.03, "learning_rate": 3.479356572167256e-05, "loss": 0.4654, "step": 689600 }, { "epoch": 7.03, "learning_rate": 3.478701723612504e-05, "loss": 0.5416, "step": 689700 }, { "epoch": 7.03, "learning_rate": 3.478046851652094e-05, "loss": 0.4529, "step": 689800 }, { "epoch": 7.03, "learning_rate": 3.477391956318047e-05, "loss": 0.5293, "step": 689900 }, { "epoch": 7.03, "learning_rate": 3.476737037642382e-05, "loss": 0.4453, "step": 690000 }, { "epoch": 7.03, "learning_rate": 3.47608209565712e-05, "loss": 0.4366, "step": 690100 }, { "epoch": 7.03, "learning_rate": 3.475427130394286e-05, "loss": 0.4538, "step": 690200 }, { "epoch": 7.03, "learning_rate": 3.4747721418859016e-05, "loss": 0.4469, "step": 690300 }, { "epoch": 7.03, "learning_rate": 3.4741171301639924e-05, "loss": 0.4821, "step": 690400 }, { "epoch": 7.03, "learning_rate": 3.473462095260585e-05, "loss": 0.4512, "step": 690500 }, { "epoch": 7.04, "learning_rate": 3.472807037207707e-05, "loss": 0.4911, "step": 690600 }, { "epoch": 7.04, "learning_rate": 3.472151956037387e-05, "loss": 0.4301, "step": 690700 }, { "epoch": 7.04, "learning_rate": 3.471496851781654e-05, "loss": 0.4448, "step": 690800 }, { "epoch": 7.04, "learning_rate": 3.470841724472538e-05, "loss": 0.4552, "step": 690900 }, { "epoch": 7.04, "learning_rate": 3.470186574142071e-05, "loss": 0.4682, "step": 691000 }, { "epoch": 7.04, "learning_rate": 3.469531400822287e-05, "loss": 0.463, "step": 691100 }, { "epoch": 7.04, "learning_rate": 3.4688762045452196e-05, "loss": 0.4729, "step": 691200 }, { "epoch": 7.04, "learning_rate": 3.4682209853429045e-05, "loss": 0.5005, "step": 691300 }, { "epoch": 7.04, "learning_rate": 3.4675657432473765e-05, "loss": 0.4537, "step": 691400 }, { "epoch": 7.05, "learning_rate": 3.4669104782906753e-05, "loss": 0.4472, "step": 691500 }, { "epoch": 7.05, "learning_rate": 3.466255190504836e-05, "loss": 0.4528, "step": 691600 }, { "epoch": 7.05, "learning_rate": 3.465599879921901e-05, "loss": 0.4924, "step": 691700 }, { "epoch": 7.05, "learning_rate": 3.464944546573912e-05, "loss": 0.4711, "step": 691800 }, { "epoch": 7.05, "learning_rate": 3.464289190492908e-05, "loss": 0.4731, "step": 691900 }, { "epoch": 7.05, "learning_rate": 3.4636338117109335e-05, "loss": 0.4543, "step": 692000 }, { "epoch": 7.05, "learning_rate": 3.462978410260032e-05, "loss": 0.4796, "step": 692100 }, { "epoch": 7.05, "learning_rate": 3.46232298617225e-05, "loss": 0.5059, "step": 692200 }, { "epoch": 7.05, "learning_rate": 3.461667539479632e-05, "loss": 0.4197, "step": 692300 }, { "epoch": 7.05, "learning_rate": 3.4610120702142274e-05, "loss": 0.4649, "step": 692400 }, { "epoch": 7.06, "learning_rate": 3.460356578408083e-05, "loss": 0.4655, "step": 692500 }, { "epoch": 7.06, "learning_rate": 3.45970106409325e-05, "loss": 0.4614, "step": 692600 }, { "epoch": 7.06, "learning_rate": 3.459052082780847e-05, "loss": 0.4403, "step": 692700 }, { "epoch": 7.06, "learning_rate": 3.4583965237690744e-05, "loss": 0.449, "step": 692800 }, { "epoch": 7.06, "learning_rate": 3.457740942344448e-05, "loss": 0.4312, "step": 692900 }, { "epoch": 7.06, "learning_rate": 3.457085338539021e-05, "loss": 0.447, "step": 693000 }, { "epoch": 7.06, "learning_rate": 3.456429712384849e-05, "loss": 0.4493, "step": 693100 }, { "epoch": 7.06, "learning_rate": 3.455774063913988e-05, "loss": 0.3755, "step": 693200 }, { "epoch": 7.06, "learning_rate": 3.455118393158495e-05, "loss": 0.5208, "step": 693300 }, { "epoch": 7.06, "learning_rate": 3.454462700150429e-05, "loss": 0.4794, "step": 693400 }, { "epoch": 7.07, "learning_rate": 3.4538069849218484e-05, "loss": 0.4975, "step": 693500 }, { "epoch": 7.07, "learning_rate": 3.453151247504815e-05, "loss": 0.4223, "step": 693600 }, { "epoch": 7.07, "learning_rate": 3.4524954879313894e-05, "loss": 0.4829, "step": 693700 }, { "epoch": 7.07, "learning_rate": 3.451839706233635e-05, "loss": 0.4875, "step": 693800 }, { "epoch": 7.07, "learning_rate": 3.451183902443616e-05, "loss": 0.4503, "step": 693900 }, { "epoch": 7.07, "learning_rate": 3.450528076593396e-05, "loss": 0.4839, "step": 694000 }, { "epoch": 7.07, "learning_rate": 3.449872228715041e-05, "loss": 0.3984, "step": 694100 }, { "epoch": 7.07, "learning_rate": 3.4492163588406196e-05, "loss": 0.4424, "step": 694200 }, { "epoch": 7.07, "learning_rate": 3.448560467002199e-05, "loss": 0.4793, "step": 694300 }, { "epoch": 7.07, "learning_rate": 3.447904553231848e-05, "loss": 0.4926, "step": 694400 }, { "epoch": 7.08, "learning_rate": 3.447248617561638e-05, "loss": 0.3824, "step": 694500 }, { "epoch": 7.08, "learning_rate": 3.4465926600236396e-05, "loss": 0.4499, "step": 694600 }, { "epoch": 7.08, "learning_rate": 3.445936680649925e-05, "loss": 0.478, "step": 694700 }, { "epoch": 7.08, "learning_rate": 3.4452806794725674e-05, "loss": 0.4477, "step": 694800 }, { "epoch": 7.08, "learning_rate": 3.444624656523642e-05, "loss": 0.444, "step": 694900 }, { "epoch": 7.08, "learning_rate": 3.4439686118352244e-05, "loss": 0.4918, "step": 695000 }, { "epoch": 7.08, "learning_rate": 3.443312545439392e-05, "loss": 0.4157, "step": 695100 }, { "epoch": 7.08, "learning_rate": 3.44265645736822e-05, "loss": 0.3893, "step": 695200 }, { "epoch": 7.08, "learning_rate": 3.4420003476537904e-05, "loss": 0.4554, "step": 695300 }, { "epoch": 7.08, "learning_rate": 3.441344216328181e-05, "loss": 0.3943, "step": 695400 }, { "epoch": 7.09, "learning_rate": 3.440688063423473e-05, "loss": 0.4832, "step": 695500 }, { "epoch": 7.09, "learning_rate": 3.44003188897175e-05, "loss": 0.4709, "step": 695600 }, { "epoch": 7.09, "learning_rate": 3.439375693005091e-05, "loss": 0.4348, "step": 695700 }, { "epoch": 7.09, "learning_rate": 3.438719475555585e-05, "loss": 0.4345, "step": 695800 }, { "epoch": 7.09, "learning_rate": 3.438063236655314e-05, "loss": 0.5666, "step": 695900 }, { "epoch": 7.09, "learning_rate": 3.4374069763363645e-05, "loss": 0.4231, "step": 696000 }, { "epoch": 7.09, "learning_rate": 3.436750694630824e-05, "loss": 0.4404, "step": 696100 }, { "epoch": 7.09, "learning_rate": 3.436094391570782e-05, "loss": 0.4461, "step": 696200 }, { "epoch": 7.09, "learning_rate": 3.4354380671883254e-05, "loss": 0.4784, "step": 696300 }, { "epoch": 7.1, "learning_rate": 3.434781721515547e-05, "loss": 0.3859, "step": 696400 }, { "epoch": 7.1, "learning_rate": 3.4341253545845366e-05, "loss": 0.4469, "step": 696500 }, { "epoch": 7.1, "learning_rate": 3.433468966427387e-05, "loss": 0.4637, "step": 696600 }, { "epoch": 7.1, "learning_rate": 3.432812557076192e-05, "loss": 0.4849, "step": 696700 }, { "epoch": 7.1, "learning_rate": 3.4321561265630454e-05, "loss": 0.4427, "step": 696800 }, { "epoch": 7.1, "learning_rate": 3.431499674920044e-05, "loss": 0.4595, "step": 696900 }, { "epoch": 7.1, "learning_rate": 3.4308432021792836e-05, "loss": 0.5135, "step": 697000 }, { "epoch": 7.1, "learning_rate": 3.4301932734150945e-05, "loss": 0.4431, "step": 697100 }, { "epoch": 7.1, "learning_rate": 3.4295367587852866e-05, "loss": 0.481, "step": 697200 }, { "epoch": 7.1, "learning_rate": 3.428880223153693e-05, "loss": 0.5014, "step": 697300 }, { "epoch": 7.11, "learning_rate": 3.4282236665524175e-05, "loss": 0.418, "step": 697400 }, { "epoch": 7.11, "learning_rate": 3.427573654892484e-05, "loss": 0.4557, "step": 697500 }, { "epoch": 7.11, "learning_rate": 3.426917056657042e-05, "loss": 0.4474, "step": 697600 }, { "epoch": 7.11, "learning_rate": 3.426260437547906e-05, "loss": 0.4937, "step": 697700 }, { "epoch": 7.11, "learning_rate": 3.425603797597176e-05, "loss": 0.4615, "step": 697800 }, { "epoch": 7.11, "learning_rate": 3.424947136836961e-05, "loss": 0.4478, "step": 697900 }, { "epoch": 7.11, "learning_rate": 3.4242904552993674e-05, "loss": 0.4965, "step": 698000 }, { "epoch": 7.11, "learning_rate": 3.423633753016502e-05, "loss": 0.4965, "step": 698100 }, { "epoch": 7.11, "learning_rate": 3.4229770300204746e-05, "loss": 0.4529, "step": 698200 }, { "epoch": 7.11, "learning_rate": 3.4223202863433945e-05, "loss": 0.4255, "step": 698300 }, { "epoch": 7.12, "learning_rate": 3.421663522017372e-05, "loss": 0.4588, "step": 698400 }, { "epoch": 7.12, "learning_rate": 3.421006737074521e-05, "loss": 0.4824, "step": 698500 }, { "epoch": 7.12, "learning_rate": 3.420349931546952e-05, "loss": 0.4586, "step": 698600 }, { "epoch": 7.12, "learning_rate": 3.41969310546678e-05, "loss": 0.4795, "step": 698700 }, { "epoch": 7.12, "learning_rate": 3.41903625886612e-05, "loss": 0.5428, "step": 698800 }, { "epoch": 7.12, "learning_rate": 3.418379391777087e-05, "loss": 0.3829, "step": 698900 }, { "epoch": 7.12, "learning_rate": 3.4177225042318e-05, "loss": 0.4889, "step": 699000 }, { "epoch": 7.12, "learning_rate": 3.4170655962623744e-05, "loss": 0.4927, "step": 699100 }, { "epoch": 7.12, "learning_rate": 3.41640866790093e-05, "loss": 0.4363, "step": 699200 }, { "epoch": 7.12, "learning_rate": 3.4157517191795865e-05, "loss": 0.447, "step": 699300 }, { "epoch": 7.13, "learning_rate": 3.4150947501304646e-05, "loss": 0.5036, "step": 699400 }, { "epoch": 7.13, "learning_rate": 3.414437760785688e-05, "loss": 0.5019, "step": 699500 }, { "epoch": 7.13, "learning_rate": 3.413780751177377e-05, "loss": 0.4529, "step": 699600 }, { "epoch": 7.13, "learning_rate": 3.4131237213376575e-05, "loss": 0.4887, "step": 699700 }, { "epoch": 7.13, "learning_rate": 3.412466671298653e-05, "loss": 0.4426, "step": 699800 }, { "epoch": 7.13, "learning_rate": 3.411809601092489e-05, "loss": 0.4296, "step": 699900 }, { "epoch": 7.13, "learning_rate": 3.411152510751293e-05, "loss": 0.4029, "step": 700000 }, { "epoch": 7.13, "eval_cer": 0.08554024028639817, "eval_loss": 0.5128045678138733, "eval_runtime": 9250.4927, "eval_samples_per_second": 5.914, "eval_steps_per_second": 0.37, "eval_wer": 0.1703941037529525, "step": 700000 }, { "epoch": 7.13, "learning_rate": 3.4104954003071945e-05, "loss": 0.5237, "step": 700100 }, { "epoch": 7.13, "learning_rate": 3.40983826979232e-05, "loss": 0.3956, "step": 700200 }, { "epoch": 7.13, "learning_rate": 3.409181119238799e-05, "loss": 0.4842, "step": 700300 }, { "epoch": 7.14, "learning_rate": 3.408523948678764e-05, "loss": 0.4793, "step": 700400 }, { "epoch": 7.14, "learning_rate": 3.4078667581443455e-05, "loss": 0.3795, "step": 700500 }, { "epoch": 7.14, "learning_rate": 3.4072095476676764e-05, "loss": 0.4664, "step": 700600 }, { "epoch": 7.14, "learning_rate": 3.4065523172808915e-05, "loss": 0.5034, "step": 700700 }, { "epoch": 7.14, "learning_rate": 3.405895067016123e-05, "loss": 0.506, "step": 700800 }, { "epoch": 7.14, "learning_rate": 3.405237796905509e-05, "loss": 0.427, "step": 700900 }, { "epoch": 7.14, "learning_rate": 3.4045870799784e-05, "loss": 0.4575, "step": 701000 }, { "epoch": 7.14, "learning_rate": 3.4039297704701606e-05, "loss": 0.4124, "step": 701100 }, { "epoch": 7.14, "learning_rate": 3.403272441212165e-05, "loss": 0.5877, "step": 701200 }, { "epoch": 7.14, "learning_rate": 3.4026150922365536e-05, "loss": 0.4166, "step": 701300 }, { "epoch": 7.15, "learning_rate": 3.401957723575467e-05, "loss": 0.4528, "step": 701400 }, { "epoch": 7.15, "learning_rate": 3.4013003352610474e-05, "loss": 0.5024, "step": 701500 }, { "epoch": 7.15, "learning_rate": 3.400642927325435e-05, "loss": 0.5133, "step": 701600 }, { "epoch": 7.15, "learning_rate": 3.399985499800776e-05, "loss": 0.4635, "step": 701700 }, { "epoch": 7.15, "learning_rate": 3.399328052719211e-05, "loss": 0.4744, "step": 701800 }, { "epoch": 7.15, "learning_rate": 3.398670586112888e-05, "loss": 0.3647, "step": 701900 }, { "epoch": 7.15, "learning_rate": 3.398013100013953e-05, "loss": 0.4557, "step": 702000 }, { "epoch": 7.15, "learning_rate": 3.397355594454552e-05, "loss": 0.4186, "step": 702100 }, { "epoch": 7.15, "learning_rate": 3.396698069466835e-05, "loss": 0.444, "step": 702200 }, { "epoch": 7.16, "learning_rate": 3.396040525082949e-05, "loss": 0.4612, "step": 702300 }, { "epoch": 7.16, "learning_rate": 3.395382961335044e-05, "loss": 0.411, "step": 702400 }, { "epoch": 7.16, "learning_rate": 3.3947253782552724e-05, "loss": 0.4276, "step": 702500 }, { "epoch": 7.16, "learning_rate": 3.394067775875786e-05, "loss": 0.514, "step": 702600 }, { "epoch": 7.16, "learning_rate": 3.393410154228736e-05, "loss": 0.4698, "step": 702700 }, { "epoch": 7.16, "learning_rate": 3.392752513346278e-05, "loss": 0.3696, "step": 702800 }, { "epoch": 7.16, "learning_rate": 3.392094853260565e-05, "loss": 0.4617, "step": 702900 }, { "epoch": 7.16, "learning_rate": 3.3914371740037544e-05, "loss": 0.385, "step": 703000 }, { "epoch": 7.16, "learning_rate": 3.390779475608001e-05, "loss": 0.4652, "step": 703100 }, { "epoch": 7.16, "learning_rate": 3.390121758105464e-05, "loss": 0.4706, "step": 703200 }, { "epoch": 7.17, "learning_rate": 3.3894640215283014e-05, "loss": 0.5484, "step": 703300 }, { "epoch": 7.17, "learning_rate": 3.3888062659086725e-05, "loss": 0.4208, "step": 703400 }, { "epoch": 7.17, "learning_rate": 3.388148491278736e-05, "loss": 0.4248, "step": 703500 }, { "epoch": 7.17, "learning_rate": 3.387490697670656e-05, "loss": 0.4515, "step": 703600 }, { "epoch": 7.17, "learning_rate": 3.386832885116593e-05, "loss": 0.4564, "step": 703700 }, { "epoch": 7.17, "learning_rate": 3.38617505364871e-05, "loss": 0.4001, "step": 703800 }, { "epoch": 7.17, "learning_rate": 3.385517203299173e-05, "loss": 0.426, "step": 703900 }, { "epoch": 7.17, "learning_rate": 3.384859334100143e-05, "loss": 0.4317, "step": 704000 }, { "epoch": 7.17, "learning_rate": 3.38420144608379e-05, "loss": 0.4805, "step": 704100 }, { "epoch": 7.17, "learning_rate": 3.383543539282279e-05, "loss": 0.4748, "step": 704200 }, { "epoch": 7.18, "learning_rate": 3.382885613727777e-05, "loss": 0.4435, "step": 704300 }, { "epoch": 7.18, "learning_rate": 3.382227669452454e-05, "loss": 0.4979, "step": 704400 }, { "epoch": 7.18, "learning_rate": 3.381569706488478e-05, "loss": 0.3693, "step": 704500 }, { "epoch": 7.18, "learning_rate": 3.380911724868022e-05, "loss": 0.4247, "step": 704600 }, { "epoch": 7.18, "learning_rate": 3.380253724623255e-05, "loss": 0.4368, "step": 704700 }, { "epoch": 7.18, "learning_rate": 3.37959570578635e-05, "loss": 0.4472, "step": 704800 }, { "epoch": 7.18, "learning_rate": 3.378937668389481e-05, "loss": 0.4358, "step": 704900 }, { "epoch": 7.18, "learning_rate": 3.37827961246482e-05, "loss": 0.4691, "step": 705000 }, { "epoch": 7.18, "learning_rate": 3.377621538044544e-05, "loss": 0.5104, "step": 705100 }, { "epoch": 7.18, "learning_rate": 3.37696344516083e-05, "loss": 0.5214, "step": 705200 }, { "epoch": 7.19, "learning_rate": 3.376305333845851e-05, "loss": 0.5475, "step": 705300 }, { "epoch": 7.19, "learning_rate": 3.375647204131787e-05, "loss": 0.4678, "step": 705400 }, { "epoch": 7.19, "learning_rate": 3.3749890560508176e-05, "loss": 0.499, "step": 705500 }, { "epoch": 7.19, "learning_rate": 3.3743308896351205e-05, "loss": 0.4634, "step": 705600 }, { "epoch": 7.19, "learning_rate": 3.3736727049168764e-05, "loss": 0.5355, "step": 705700 }, { "epoch": 7.19, "learning_rate": 3.373014501928268e-05, "loss": 0.4342, "step": 705800 }, { "epoch": 7.19, "learning_rate": 3.372356280701475e-05, "loss": 0.4234, "step": 705900 }, { "epoch": 7.19, "learning_rate": 3.371698041268684e-05, "loss": 0.4503, "step": 706000 }, { "epoch": 7.19, "learning_rate": 3.371039783662076e-05, "loss": 0.5151, "step": 706100 }, { "epoch": 7.19, "learning_rate": 3.370381507913837e-05, "loss": 0.4758, "step": 706200 }, { "epoch": 7.2, "learning_rate": 3.369723214056151e-05, "loss": 0.4164, "step": 706300 }, { "epoch": 7.2, "learning_rate": 3.369064902121208e-05, "loss": 0.4573, "step": 706400 }, { "epoch": 7.2, "learning_rate": 3.368406572141193e-05, "loss": 0.3828, "step": 706500 }, { "epoch": 7.2, "learning_rate": 3.367748224148296e-05, "loss": 0.4883, "step": 706600 }, { "epoch": 7.2, "learning_rate": 3.367089858174705e-05, "loss": 0.5137, "step": 706700 }, { "epoch": 7.2, "learning_rate": 3.3664314742526104e-05, "loss": 0.4469, "step": 706800 }, { "epoch": 7.2, "learning_rate": 3.3657730724142036e-05, "loss": 0.4437, "step": 706900 }, { "epoch": 7.2, "learning_rate": 3.365114652691676e-05, "loss": 0.4959, "step": 707000 }, { "epoch": 7.2, "learning_rate": 3.364456215117221e-05, "loss": 0.453, "step": 707100 }, { "epoch": 7.21, "learning_rate": 3.363797759723032e-05, "loss": 0.521, "step": 707200 }, { "epoch": 7.21, "learning_rate": 3.363139286541304e-05, "loss": 0.4771, "step": 707300 }, { "epoch": 7.21, "learning_rate": 3.362480795604232e-05, "loss": 0.4966, "step": 707400 }, { "epoch": 7.21, "learning_rate": 3.361828872118238e-05, "loss": 0.4583, "step": 707500 }, { "epoch": 7.21, "learning_rate": 3.361176931293028e-05, "loss": 0.4402, "step": 707600 }, { "epoch": 7.21, "learning_rate": 3.3605183876359635e-05, "loss": 0.4732, "step": 707700 }, { "epoch": 7.21, "learning_rate": 3.359859826351703e-05, "loss": 0.4188, "step": 707800 }, { "epoch": 7.21, "learning_rate": 3.359201247472444e-05, "loss": 0.4084, "step": 707900 }, { "epoch": 7.21, "learning_rate": 3.3585426510303866e-05, "loss": 0.4525, "step": 708000 }, { "epoch": 7.21, "learning_rate": 3.357884037057735e-05, "loss": 0.4459, "step": 708100 }, { "epoch": 7.22, "learning_rate": 3.357225405586688e-05, "loss": 0.5033, "step": 708200 }, { "epoch": 7.22, "learning_rate": 3.35656675664945e-05, "loss": 0.4297, "step": 708300 }, { "epoch": 7.22, "learning_rate": 3.355908090278228e-05, "loss": 0.4474, "step": 708400 }, { "epoch": 7.22, "learning_rate": 3.3552494065052216e-05, "loss": 0.5154, "step": 708500 }, { "epoch": 7.22, "learning_rate": 3.3545907053626414e-05, "loss": 0.4337, "step": 708600 }, { "epoch": 7.22, "learning_rate": 3.353931986882691e-05, "loss": 0.4766, "step": 708700 }, { "epoch": 7.22, "learning_rate": 3.353273251097578e-05, "loss": 0.4807, "step": 708800 }, { "epoch": 7.22, "learning_rate": 3.3526144980395125e-05, "loss": 0.5385, "step": 708900 }, { "epoch": 7.22, "learning_rate": 3.3519557277407016e-05, "loss": 0.4287, "step": 709000 }, { "epoch": 7.22, "learning_rate": 3.351296940233355e-05, "loss": 0.4038, "step": 709100 }, { "epoch": 7.23, "learning_rate": 3.3506447236814397e-05, "loss": 0.5092, "step": 709200 }, { "epoch": 7.23, "learning_rate": 3.3499859020249395e-05, "loss": 0.4135, "step": 709300 }, { "epoch": 7.23, "learning_rate": 3.349327063256218e-05, "loss": 0.4604, "step": 709400 }, { "epoch": 7.23, "learning_rate": 3.348668207407487e-05, "loss": 0.4525, "step": 709500 }, { "epoch": 7.23, "learning_rate": 3.3480093345109614e-05, "loss": 0.4759, "step": 709600 }, { "epoch": 7.23, "learning_rate": 3.347350444598856e-05, "loss": 0.39, "step": 709700 }, { "epoch": 7.23, "learning_rate": 3.3466915377033864e-05, "loss": 0.4728, "step": 709800 }, { "epoch": 7.23, "learning_rate": 3.3460326138567705e-05, "loss": 0.4434, "step": 709900 }, { "epoch": 7.23, "learning_rate": 3.345373673091225e-05, "loss": 0.4323, "step": 710000 }, { "epoch": 7.23, "learning_rate": 3.3447147154389667e-05, "loss": 0.4254, "step": 710100 }, { "epoch": 7.24, "learning_rate": 3.344055740932217e-05, "loss": 0.4392, "step": 710200 }, { "epoch": 7.24, "learning_rate": 3.343396749603194e-05, "loss": 0.4736, "step": 710300 }, { "epoch": 7.24, "learning_rate": 3.34273774148412e-05, "loss": 0.4429, "step": 710400 }, { "epoch": 7.24, "learning_rate": 3.3420787166072154e-05, "loss": 0.5264, "step": 710500 }, { "epoch": 7.24, "learning_rate": 3.341419675004703e-05, "loss": 0.4354, "step": 710600 }, { "epoch": 7.24, "learning_rate": 3.340760616708806e-05, "loss": 0.4974, "step": 710700 }, { "epoch": 7.24, "learning_rate": 3.340101541751749e-05, "loss": 0.4206, "step": 710800 }, { "epoch": 7.24, "learning_rate": 3.339442450165755e-05, "loss": 0.4174, "step": 710900 }, { "epoch": 7.24, "learning_rate": 3.3387833419830514e-05, "loss": 0.4937, "step": 711000 }, { "epoch": 7.24, "learning_rate": 3.338124217235865e-05, "loss": 0.5004, "step": 711100 }, { "epoch": 7.25, "learning_rate": 3.3374650759564206e-05, "loss": 0.4407, "step": 711200 }, { "epoch": 7.25, "learning_rate": 3.336805918176949e-05, "loss": 0.4816, "step": 711300 }, { "epoch": 7.25, "learning_rate": 3.336146743929678e-05, "loss": 0.4715, "step": 711400 }, { "epoch": 7.25, "learning_rate": 3.335487553246837e-05, "loss": 0.431, "step": 711500 }, { "epoch": 7.25, "learning_rate": 3.334828346160656e-05, "loss": 0.3767, "step": 711600 }, { "epoch": 7.25, "learning_rate": 3.334169122703368e-05, "loss": 0.5239, "step": 711700 }, { "epoch": 7.25, "learning_rate": 3.3335098829072034e-05, "loss": 0.4847, "step": 711800 }, { "epoch": 7.25, "learning_rate": 3.332850626804397e-05, "loss": 0.4398, "step": 711900 }, { "epoch": 7.25, "learning_rate": 3.33219135442718e-05, "loss": 0.4334, "step": 712000 }, { "epoch": 7.25, "learning_rate": 3.3315320658077885e-05, "loss": 0.4497, "step": 712100 }, { "epoch": 7.26, "learning_rate": 3.330872760978458e-05, "loss": 0.4365, "step": 712200 }, { "epoch": 7.26, "learning_rate": 3.3302134399714225e-05, "loss": 0.4006, "step": 712300 }, { "epoch": 7.26, "learning_rate": 3.329554102818923e-05, "loss": 0.5132, "step": 712400 }, { "epoch": 7.26, "learning_rate": 3.328894749553193e-05, "loss": 0.3632, "step": 712500 }, { "epoch": 7.26, "learning_rate": 3.3282353802064723e-05, "loss": 0.4345, "step": 712600 }, { "epoch": 7.26, "learning_rate": 3.3275759948110004e-05, "loss": 0.465, "step": 712700 }, { "epoch": 7.26, "learning_rate": 3.326916593399018e-05, "loss": 0.4819, "step": 712800 }, { "epoch": 7.26, "learning_rate": 3.326257176002764e-05, "loss": 0.4702, "step": 712900 }, { "epoch": 7.26, "learning_rate": 3.325597742654482e-05, "loss": 0.4405, "step": 713000 }, { "epoch": 7.27, "learning_rate": 3.324938293386413e-05, "loss": 0.4831, "step": 713100 }, { "epoch": 7.27, "learning_rate": 3.3242788282308004e-05, "loss": 0.4246, "step": 713200 }, { "epoch": 7.27, "learning_rate": 3.3236193472198885e-05, "loss": 0.4125, "step": 713300 }, { "epoch": 7.27, "learning_rate": 3.322959850385922e-05, "loss": 0.4668, "step": 713400 }, { "epoch": 7.27, "learning_rate": 3.3223003377611454e-05, "loss": 0.3906, "step": 713500 }, { "epoch": 7.27, "learning_rate": 3.3216408093778065e-05, "loss": 0.4307, "step": 713600 }, { "epoch": 7.27, "learning_rate": 3.3209812652681517e-05, "loss": 0.5007, "step": 713700 }, { "epoch": 7.27, "learning_rate": 3.3203217054644285e-05, "loss": 0.4026, "step": 713800 }, { "epoch": 7.27, "learning_rate": 3.3196621299988845e-05, "loss": 0.4598, "step": 713900 }, { "epoch": 7.27, "learning_rate": 3.319002538903771e-05, "loss": 0.4146, "step": 714000 }, { "epoch": 7.28, "learning_rate": 3.3183429322113366e-05, "loss": 0.4003, "step": 714100 }, { "epoch": 7.28, "learning_rate": 3.317683309953832e-05, "loss": 0.4651, "step": 714200 }, { "epoch": 7.28, "learning_rate": 3.317023672163511e-05, "loss": 0.4559, "step": 714300 }, { "epoch": 7.28, "learning_rate": 3.316364018872622e-05, "loss": 0.5292, "step": 714400 }, { "epoch": 7.28, "learning_rate": 3.315704350113422e-05, "loss": 0.4726, "step": 714500 }, { "epoch": 7.28, "learning_rate": 3.315044665918163e-05, "loss": 0.427, "step": 714600 }, { "epoch": 7.28, "learning_rate": 3.3143849663191006e-05, "loss": 0.4131, "step": 714700 }, { "epoch": 7.28, "learning_rate": 3.3137252513484885e-05, "loss": 0.4975, "step": 714800 }, { "epoch": 7.28, "learning_rate": 3.313065521038584e-05, "loss": 0.518, "step": 714900 }, { "epoch": 7.28, "learning_rate": 3.312405775421644e-05, "loss": 0.4901, "step": 715000 }, { "epoch": 7.29, "learning_rate": 3.311746014529926e-05, "loss": 0.4247, "step": 715100 }, { "epoch": 7.29, "learning_rate": 3.311086238395688e-05, "loss": 0.4625, "step": 715200 }, { "epoch": 7.29, "learning_rate": 3.3104264470511904e-05, "loss": 0.4399, "step": 715300 }, { "epoch": 7.29, "learning_rate": 3.309766640528691e-05, "loss": 0.4969, "step": 715400 }, { "epoch": 7.29, "learning_rate": 3.309106818860451e-05, "loss": 0.4213, "step": 715500 }, { "epoch": 7.29, "learning_rate": 3.308446982078733e-05, "loss": 0.4933, "step": 715600 }, { "epoch": 7.29, "learning_rate": 3.307787130215799e-05, "loss": 0.4425, "step": 715700 }, { "epoch": 7.29, "learning_rate": 3.30712726330391e-05, "loss": 0.526, "step": 715800 }, { "epoch": 7.29, "learning_rate": 3.3064673813753316e-05, "loss": 0.5263, "step": 715900 }, { "epoch": 7.29, "learning_rate": 3.3058074844623264e-05, "loss": 0.4562, "step": 716000 }, { "epoch": 7.3, "learning_rate": 3.30514757259716e-05, "loss": 0.5105, "step": 716100 }, { "epoch": 7.3, "learning_rate": 3.304487645812099e-05, "loss": 0.4353, "step": 716200 }, { "epoch": 7.3, "learning_rate": 3.303827704139408e-05, "loss": 0.471, "step": 716300 }, { "epoch": 7.3, "learning_rate": 3.303167747611357e-05, "loss": 0.4247, "step": 716400 }, { "epoch": 7.3, "learning_rate": 3.302507776260212e-05, "loss": 0.4171, "step": 716500 }, { "epoch": 7.3, "learning_rate": 3.3018477901182415e-05, "loss": 0.4924, "step": 716600 }, { "epoch": 7.3, "learning_rate": 3.301187789217715e-05, "loss": 0.3955, "step": 716700 }, { "epoch": 7.3, "learning_rate": 3.300527773590903e-05, "loss": 0.3796, "step": 716800 }, { "epoch": 7.3, "learning_rate": 3.299867743270078e-05, "loss": 0.5173, "step": 716900 }, { "epoch": 7.3, "learning_rate": 3.2992076982875083e-05, "loss": 0.532, "step": 717000 }, { "epoch": 7.31, "learning_rate": 3.298547638675468e-05, "loss": 0.3563, "step": 717100 }, { "epoch": 7.31, "learning_rate": 3.2978875644662304e-05, "loss": 0.4047, "step": 717200 }, { "epoch": 7.31, "learning_rate": 3.297227475692068e-05, "loss": 0.4877, "step": 717300 }, { "epoch": 7.31, "learning_rate": 3.296567372385256e-05, "loss": 0.4302, "step": 717400 }, { "epoch": 7.31, "learning_rate": 3.29590725457807e-05, "loss": 0.4902, "step": 717500 }, { "epoch": 7.31, "learning_rate": 3.295247122302784e-05, "loss": 0.4601, "step": 717600 }, { "epoch": 7.31, "learning_rate": 3.294586975591677e-05, "loss": 0.4622, "step": 717700 }, { "epoch": 7.31, "learning_rate": 3.2939334161593616e-05, "loss": 0.4232, "step": 717800 }, { "epoch": 7.31, "learning_rate": 3.293273240816994e-05, "loss": 0.4487, "step": 717900 }, { "epoch": 7.32, "learning_rate": 3.292613051135315e-05, "loss": 0.4178, "step": 718000 }, { "epoch": 7.32, "learning_rate": 3.2919528471466045e-05, "loss": 0.4076, "step": 718100 }, { "epoch": 7.32, "learning_rate": 3.2912926288831424e-05, "loss": 0.4042, "step": 718200 }, { "epoch": 7.32, "learning_rate": 3.290632396377209e-05, "loss": 0.4311, "step": 718300 }, { "epoch": 7.32, "learning_rate": 3.2899721496610853e-05, "loss": 0.4534, "step": 718400 }, { "epoch": 7.32, "learning_rate": 3.289311888767055e-05, "loss": 0.4882, "step": 718500 }, { "epoch": 7.32, "learning_rate": 3.2886516137273976e-05, "loss": 0.4842, "step": 718600 }, { "epoch": 7.32, "learning_rate": 3.2879913245744e-05, "loss": 0.4497, "step": 718700 }, { "epoch": 7.32, "learning_rate": 3.287331021340346e-05, "loss": 0.427, "step": 718800 }, { "epoch": 7.32, "learning_rate": 3.286677307299783e-05, "loss": 0.4375, "step": 718900 }, { "epoch": 7.33, "learning_rate": 3.286016976140475e-05, "loss": 0.4674, "step": 719000 }, { "epoch": 7.33, "learning_rate": 3.285356630996644e-05, "loss": 0.4905, "step": 719100 }, { "epoch": 7.33, "learning_rate": 3.2846962719005755e-05, "loss": 0.5627, "step": 719200 }, { "epoch": 7.33, "learning_rate": 3.2840358988845585e-05, "loss": 0.4357, "step": 719300 }, { "epoch": 7.33, "learning_rate": 3.283375511980882e-05, "loss": 0.464, "step": 719400 }, { "epoch": 7.33, "learning_rate": 3.2827151112218336e-05, "loss": 0.4485, "step": 719500 }, { "epoch": 7.33, "learning_rate": 3.2820546966397035e-05, "loss": 0.4553, "step": 719600 }, { "epoch": 7.33, "learning_rate": 3.281394268266782e-05, "loss": 0.4615, "step": 719700 }, { "epoch": 7.33, "learning_rate": 3.280733826135359e-05, "loss": 0.4348, "step": 719800 }, { "epoch": 7.33, "learning_rate": 3.2800733702777276e-05, "loss": 0.437, "step": 719900 }, { "epoch": 7.34, "learning_rate": 3.279412900726181e-05, "loss": 0.4091, "step": 720000 }, { "epoch": 7.34, "learning_rate": 3.27875241751301e-05, "loss": 0.407, "step": 720100 }, { "epoch": 7.34, "learning_rate": 3.27809192067051e-05, "loss": 0.4499, "step": 720200 }, { "epoch": 7.34, "learning_rate": 3.2774314102309736e-05, "loss": 0.5122, "step": 720300 }, { "epoch": 7.34, "learning_rate": 3.276770886226698e-05, "loss": 0.4187, "step": 720400 }, { "epoch": 7.34, "learning_rate": 3.2761103486899775e-05, "loss": 0.3999, "step": 720500 }, { "epoch": 7.34, "learning_rate": 3.2754497976531076e-05, "loss": 0.4975, "step": 720600 }, { "epoch": 7.34, "learning_rate": 3.2747892331483874e-05, "loss": 0.4307, "step": 720700 }, { "epoch": 7.34, "learning_rate": 3.274128655208113e-05, "loss": 0.4739, "step": 720800 }, { "epoch": 7.34, "learning_rate": 3.273468063864584e-05, "loss": 0.4882, "step": 720900 }, { "epoch": 7.35, "learning_rate": 3.272807459150099e-05, "loss": 0.4653, "step": 721000 }, { "epoch": 7.35, "learning_rate": 3.272146841096956e-05, "loss": 0.4065, "step": 721100 }, { "epoch": 7.35, "learning_rate": 3.271486209737457e-05, "loss": 0.4249, "step": 721200 }, { "epoch": 7.35, "learning_rate": 3.270825565103903e-05, "loss": 0.4704, "step": 721300 }, { "epoch": 7.35, "learning_rate": 3.270164907228594e-05, "loss": 0.4776, "step": 721400 }, { "epoch": 7.35, "learning_rate": 3.269504236143835e-05, "loss": 0.4859, "step": 721500 }, { "epoch": 7.35, "learning_rate": 3.2688435518819253e-05, "loss": 0.479, "step": 721600 }, { "epoch": 7.35, "learning_rate": 3.2681828544751716e-05, "loss": 0.42, "step": 721700 }, { "epoch": 7.35, "learning_rate": 3.267522143955876e-05, "loss": 0.4384, "step": 721800 }, { "epoch": 7.35, "learning_rate": 3.2668614203563436e-05, "loss": 0.4411, "step": 721900 }, { "epoch": 7.36, "learning_rate": 3.266200683708882e-05, "loss": 0.3853, "step": 722000 }, { "epoch": 7.36, "learning_rate": 3.2655399340457944e-05, "loss": 0.4367, "step": 722100 }, { "epoch": 7.36, "learning_rate": 3.264879171399389e-05, "loss": 0.3963, "step": 722200 }, { "epoch": 7.36, "learning_rate": 3.2642183958019726e-05, "loss": 0.4308, "step": 722300 }, { "epoch": 7.36, "learning_rate": 3.263557607285853e-05, "loss": 0.4543, "step": 722400 }, { "epoch": 7.36, "learning_rate": 3.26289680588334e-05, "loss": 0.4167, "step": 722500 }, { "epoch": 7.36, "learning_rate": 3.2622359916267424e-05, "loss": 0.4283, "step": 722600 }, { "epoch": 7.36, "learning_rate": 3.2615751645483686e-05, "loss": 0.4649, "step": 722700 }, { "epoch": 7.36, "learning_rate": 3.260914324680531e-05, "loss": 0.4988, "step": 722800 }, { "epoch": 7.37, "learning_rate": 3.260260080644832e-05, "loss": 0.3935, "step": 722900 }, { "epoch": 7.37, "learning_rate": 3.259599215422088e-05, "loss": 0.4048, "step": 723000 }, { "epoch": 7.37, "learning_rate": 3.2589383375064905e-05, "loss": 0.442, "step": 723100 }, { "epoch": 7.37, "learning_rate": 3.2582774469303536e-05, "loss": 0.5406, "step": 723200 }, { "epoch": 7.37, "learning_rate": 3.257623152820439e-05, "loss": 0.4263, "step": 723300 }, { "epoch": 7.37, "learning_rate": 3.2569622371459646e-05, "loss": 0.4512, "step": 723400 }, { "epoch": 7.37, "learning_rate": 3.25630130890757e-05, "loss": 0.5269, "step": 723500 }, { "epoch": 7.37, "learning_rate": 3.255640368137571e-05, "loss": 0.3814, "step": 723600 }, { "epoch": 7.37, "learning_rate": 3.254979414868283e-05, "loss": 0.4156, "step": 723700 }, { "epoch": 7.37, "learning_rate": 3.254318449132024e-05, "loss": 0.4891, "step": 723800 }, { "epoch": 7.38, "learning_rate": 3.2536574709611106e-05, "loss": 0.4262, "step": 723900 }, { "epoch": 7.38, "learning_rate": 3.252996480387861e-05, "loss": 0.4947, "step": 724000 }, { "epoch": 7.38, "learning_rate": 3.2523354774445925e-05, "loss": 0.4076, "step": 724100 }, { "epoch": 7.38, "learning_rate": 3.251674462163626e-05, "loss": 0.4013, "step": 724200 }, { "epoch": 7.38, "learning_rate": 3.251013434577279e-05, "loss": 0.4886, "step": 724300 }, { "epoch": 7.38, "learning_rate": 3.250352394717873e-05, "loss": 0.4672, "step": 724400 }, { "epoch": 7.38, "learning_rate": 3.24969134261773e-05, "loss": 0.4508, "step": 724500 }, { "epoch": 7.38, "learning_rate": 3.249030278309169e-05, "loss": 0.3874, "step": 724600 }, { "epoch": 7.38, "learning_rate": 3.248369201824515e-05, "loss": 0.4552, "step": 724700 }, { "epoch": 7.38, "learning_rate": 3.2477081131960885e-05, "loss": 0.4548, "step": 724800 }, { "epoch": 7.39, "learning_rate": 3.2470470124562136e-05, "loss": 0.4618, "step": 724900 }, { "epoch": 7.39, "learning_rate": 3.246392510825089e-05, "loss": 0.4555, "step": 725000 }, { "epoch": 7.39, "learning_rate": 3.245731386079597e-05, "loss": 0.4919, "step": 725100 }, { "epoch": 7.39, "learning_rate": 3.245070249319308e-05, "loss": 0.4294, "step": 725200 }, { "epoch": 7.39, "learning_rate": 3.244409100576545e-05, "loss": 0.4116, "step": 725300 }, { "epoch": 7.39, "learning_rate": 3.243747939883636e-05, "loss": 0.4804, "step": 725400 }, { "epoch": 7.39, "learning_rate": 3.2430867672729075e-05, "loss": 0.4597, "step": 725500 }, { "epoch": 7.39, "learning_rate": 3.242425582776686e-05, "loss": 0.3916, "step": 725600 }, { "epoch": 7.39, "learning_rate": 3.2417643864273006e-05, "loss": 0.4334, "step": 725700 }, { "epoch": 7.39, "learning_rate": 3.2411031782570796e-05, "loss": 0.4799, "step": 725800 }, { "epoch": 7.4, "learning_rate": 3.240441958298352e-05, "loss": 0.4739, "step": 725900 }, { "epoch": 7.4, "learning_rate": 3.2397807265834476e-05, "loss": 0.4246, "step": 726000 }, { "epoch": 7.4, "learning_rate": 3.2391194831446964e-05, "loss": 0.4076, "step": 726100 }, { "epoch": 7.4, "learning_rate": 3.23845822801443e-05, "loss": 0.4704, "step": 726200 }, { "epoch": 7.4, "learning_rate": 3.237796961224978e-05, "loss": 0.468, "step": 726300 }, { "epoch": 7.4, "learning_rate": 3.2371356828086744e-05, "loss": 0.4758, "step": 726400 }, { "epoch": 7.4, "learning_rate": 3.236474392797852e-05, "loss": 0.3783, "step": 726500 }, { "epoch": 7.4, "learning_rate": 3.235813091224842e-05, "loss": 0.4618, "step": 726600 }, { "epoch": 7.4, "learning_rate": 3.235151778121979e-05, "loss": 0.4859, "step": 726700 }, { "epoch": 7.4, "learning_rate": 3.234490453521598e-05, "loss": 0.4638, "step": 726800 }, { "epoch": 7.41, "learning_rate": 3.233829117456033e-05, "loss": 0.4402, "step": 726900 }, { "epoch": 7.41, "learning_rate": 3.2331677699576184e-05, "loss": 0.3863, "step": 727000 }, { "epoch": 7.41, "learning_rate": 3.2325064110586936e-05, "loss": 0.3704, "step": 727100 }, { "epoch": 7.41, "learning_rate": 3.231845040791591e-05, "loss": 0.4645, "step": 727200 }, { "epoch": 7.41, "learning_rate": 3.231183659188651e-05, "loss": 0.3886, "step": 727300 }, { "epoch": 7.41, "learning_rate": 3.230522266282209e-05, "loss": 0.442, "step": 727400 }, { "epoch": 7.41, "learning_rate": 3.2298608621046025e-05, "loss": 0.447, "step": 727500 }, { "epoch": 7.41, "learning_rate": 3.229199446688173e-05, "loss": 0.4259, "step": 727600 }, { "epoch": 7.41, "learning_rate": 3.2285380200652584e-05, "loss": 0.4432, "step": 727700 }, { "epoch": 7.41, "learning_rate": 3.2278765822681974e-05, "loss": 0.4265, "step": 727800 }, { "epoch": 7.42, "learning_rate": 3.227215133329333e-05, "loss": 0.5108, "step": 727900 }, { "epoch": 7.42, "learning_rate": 3.226553673281003e-05, "loss": 0.4478, "step": 728000 }, { "epoch": 7.42, "learning_rate": 3.225892202155551e-05, "loss": 0.4618, "step": 728100 }, { "epoch": 7.42, "learning_rate": 3.225230719985319e-05, "loss": 0.437, "step": 728200 }, { "epoch": 7.42, "learning_rate": 3.224569226802648e-05, "loss": 0.4036, "step": 728300 }, { "epoch": 7.42, "learning_rate": 3.223907722639882e-05, "loss": 0.3911, "step": 728400 }, { "epoch": 7.42, "learning_rate": 3.2232462075293656e-05, "loss": 0.44, "step": 728500 }, { "epoch": 7.42, "learning_rate": 3.2225846815034406e-05, "loss": 0.4357, "step": 728600 }, { "epoch": 7.42, "learning_rate": 3.221923144594454e-05, "loss": 0.4229, "step": 728700 }, { "epoch": 7.43, "learning_rate": 3.221261596834749e-05, "loss": 0.391, "step": 728800 }, { "epoch": 7.43, "learning_rate": 3.220600038256673e-05, "loss": 0.4338, "step": 728900 }, { "epoch": 7.43, "learning_rate": 3.2199384688925715e-05, "loss": 0.3893, "step": 729000 }, { "epoch": 7.43, "learning_rate": 3.219276888774791e-05, "loss": 0.4304, "step": 729100 }, { "epoch": 7.43, "learning_rate": 3.21861529793568e-05, "loss": 0.4552, "step": 729200 }, { "epoch": 7.43, "learning_rate": 3.2179536964075856e-05, "loss": 0.3944, "step": 729300 }, { "epoch": 7.43, "learning_rate": 3.217292084222855e-05, "loss": 0.4667, "step": 729400 }, { "epoch": 7.43, "learning_rate": 3.216630461413839e-05, "loss": 0.4143, "step": 729500 }, { "epoch": 7.43, "learning_rate": 3.215968828012886e-05, "loss": 0.4525, "step": 729600 }, { "epoch": 7.43, "learning_rate": 3.215307184052347e-05, "loss": 0.3993, "step": 729700 }, { "epoch": 7.44, "learning_rate": 3.214645529564571e-05, "loss": 0.475, "step": 729800 }, { "epoch": 7.44, "learning_rate": 3.213983864581909e-05, "loss": 0.482, "step": 729900 }, { "epoch": 7.44, "learning_rate": 3.213322189136713e-05, "loss": 0.4554, "step": 730000 }, { "epoch": 7.44, "learning_rate": 3.2126605032613356e-05, "loss": 0.4303, "step": 730100 }, { "epoch": 7.44, "learning_rate": 3.211998806988128e-05, "loss": 0.4397, "step": 730200 }, { "epoch": 7.44, "learning_rate": 3.211337100349445e-05, "loss": 0.37, "step": 730300 }, { "epoch": 7.44, "learning_rate": 3.2106753833776374e-05, "loss": 0.4067, "step": 730400 }, { "epoch": 7.44, "learning_rate": 3.210013656105062e-05, "loss": 0.3679, "step": 730500 }, { "epoch": 7.44, "learning_rate": 3.209351918564073e-05, "loss": 0.4635, "step": 730600 }, { "epoch": 7.44, "learning_rate": 3.208690170787023e-05, "loss": 0.4427, "step": 730700 }, { "epoch": 7.45, "learning_rate": 3.208028412806269e-05, "loss": 0.3532, "step": 730800 }, { "epoch": 7.45, "learning_rate": 3.207366644654168e-05, "loss": 0.4387, "step": 730900 }, { "epoch": 7.45, "learning_rate": 3.2067048663630745e-05, "loss": 0.4334, "step": 731000 }, { "epoch": 7.45, "learning_rate": 3.206043077965347e-05, "loss": 0.4808, "step": 731100 }, { "epoch": 7.45, "learning_rate": 3.205381279493344e-05, "loss": 0.4265, "step": 731200 }, { "epoch": 7.45, "learning_rate": 3.20471947097942e-05, "loss": 0.4377, "step": 731300 }, { "epoch": 7.45, "learning_rate": 3.204057652455936e-05, "loss": 0.4497, "step": 731400 }, { "epoch": 7.45, "learning_rate": 3.203395823955251e-05, "loss": 0.4512, "step": 731500 }, { "epoch": 7.45, "learning_rate": 3.202733985509724e-05, "loss": 0.453, "step": 731600 }, { "epoch": 7.45, "learning_rate": 3.202072137151715e-05, "loss": 0.4162, "step": 731700 }, { "epoch": 7.46, "learning_rate": 3.201410278913584e-05, "loss": 0.4843, "step": 731800 }, { "epoch": 7.46, "learning_rate": 3.200748410827693e-05, "loss": 0.4794, "step": 731900 }, { "epoch": 7.46, "learning_rate": 3.2000865329264026e-05, "loss": 0.5307, "step": 732000 }, { "epoch": 7.46, "learning_rate": 3.199431264167237e-05, "loss": 0.4165, "step": 732100 }, { "epoch": 7.46, "learning_rate": 3.1987693668295815e-05, "loss": 0.4332, "step": 732200 }, { "epoch": 7.46, "learning_rate": 3.198107459773288e-05, "loss": 0.4313, "step": 732300 }, { "epoch": 7.46, "learning_rate": 3.197445543030724e-05, "loss": 0.3313, "step": 732400 }, { "epoch": 7.46, "learning_rate": 3.1967836166342506e-05, "loss": 0.4287, "step": 732500 }, { "epoch": 7.46, "learning_rate": 3.196121680616232e-05, "loss": 0.4063, "step": 732600 }, { "epoch": 7.46, "learning_rate": 3.195459735009034e-05, "loss": 0.408, "step": 732700 }, { "epoch": 7.47, "learning_rate": 3.194797779845021e-05, "loss": 0.4181, "step": 732800 }, { "epoch": 7.47, "learning_rate": 3.194135815156559e-05, "loss": 0.5025, "step": 732900 }, { "epoch": 7.47, "learning_rate": 3.193473840976015e-05, "loss": 0.4057, "step": 733000 }, { "epoch": 7.47, "learning_rate": 3.1928184772188764e-05, "loss": 0.4441, "step": 733100 }, { "epoch": 7.47, "learning_rate": 3.1921564842453803e-05, "loss": 0.4717, "step": 733200 }, { "epoch": 7.47, "learning_rate": 3.1914944818765786e-05, "loss": 0.4437, "step": 733300 }, { "epoch": 7.47, "learning_rate": 3.1908324701448393e-05, "loss": 0.4322, "step": 733400 }, { "epoch": 7.47, "learning_rate": 3.190170449082531e-05, "loss": 0.4509, "step": 733500 }, { "epoch": 7.47, "learning_rate": 3.189508418722022e-05, "loss": 0.4946, "step": 733600 }, { "epoch": 7.48, "learning_rate": 3.1888463790956835e-05, "loss": 0.3716, "step": 733700 }, { "epoch": 7.48, "learning_rate": 3.188184330235884e-05, "loss": 0.4428, "step": 733800 }, { "epoch": 7.48, "learning_rate": 3.187522272174993e-05, "loss": 0.3937, "step": 733900 }, { "epoch": 7.48, "learning_rate": 3.186860204945382e-05, "loss": 0.3972, "step": 734000 }, { "epoch": 7.48, "learning_rate": 3.1861981285794224e-05, "loss": 0.508, "step": 734100 }, { "epoch": 7.48, "learning_rate": 3.1855360431094854e-05, "loss": 0.4538, "step": 734200 }, { "epoch": 7.48, "learning_rate": 3.1848739485679434e-05, "loss": 0.4172, "step": 734300 }, { "epoch": 7.48, "learning_rate": 3.184211844987168e-05, "loss": 0.4152, "step": 734400 }, { "epoch": 7.48, "learning_rate": 3.183556353569887e-05, "loss": 0.4482, "step": 734500 }, { "epoch": 7.48, "learning_rate": 3.1828942320973504e-05, "loss": 0.4063, "step": 734600 }, { "epoch": 7.49, "learning_rate": 3.182232101682377e-05, "loss": 0.417, "step": 734700 }, { "epoch": 7.49, "learning_rate": 3.181569962357341e-05, "loss": 0.4747, "step": 734800 }, { "epoch": 7.49, "learning_rate": 3.1809078141546185e-05, "loss": 0.4781, "step": 734900 }, { "epoch": 7.49, "learning_rate": 3.1802456571065824e-05, "loss": 0.3714, "step": 735000 }, { "epoch": 7.49, "learning_rate": 3.1795834912456096e-05, "loss": 0.4975, "step": 735100 }, { "epoch": 7.49, "learning_rate": 3.178921316604076e-05, "loss": 0.4689, "step": 735200 }, { "epoch": 7.49, "learning_rate": 3.178259133214358e-05, "loss": 0.3757, "step": 735300 }, { "epoch": 7.49, "learning_rate": 3.177596941108831e-05, "loss": 0.4609, "step": 735400 }, { "epoch": 7.49, "learning_rate": 3.176934740319874e-05, "loss": 0.4407, "step": 735500 }, { "epoch": 7.49, "learning_rate": 3.176272530879865e-05, "loss": 0.4899, "step": 735600 }, { "epoch": 7.5, "learning_rate": 3.175610312821181e-05, "loss": 0.4573, "step": 735700 }, { "epoch": 7.5, "learning_rate": 3.174948086176199e-05, "loss": 0.4603, "step": 735800 }, { "epoch": 7.5, "learning_rate": 3.174285850977301e-05, "loss": 0.454, "step": 735900 }, { "epoch": 7.5, "learning_rate": 3.173623607256865e-05, "loss": 0.4728, "step": 736000 }, { "epoch": 7.5, "learning_rate": 3.1729613550472697e-05, "loss": 0.4214, "step": 736100 }, { "epoch": 7.5, "learning_rate": 3.1722990943808976e-05, "loss": 0.3795, "step": 736200 }, { "epoch": 7.5, "learning_rate": 3.171636825290127e-05, "loss": 0.4667, "step": 736300 }, { "epoch": 7.5, "learning_rate": 3.1709745478073414e-05, "loss": 0.4821, "step": 736400 }, { "epoch": 7.5, "learning_rate": 3.1703122619649194e-05, "loss": 0.4754, "step": 736500 }, { "epoch": 7.5, "learning_rate": 3.169649967795244e-05, "loss": 0.4166, "step": 736600 }, { "epoch": 7.51, "learning_rate": 3.168987665330698e-05, "loss": 0.4351, "step": 736700 }, { "epoch": 7.51, "learning_rate": 3.168325354603663e-05, "loss": 0.4827, "step": 736800 }, { "epoch": 7.51, "learning_rate": 3.167663035646525e-05, "loss": 0.4112, "step": 736900 }, { "epoch": 7.51, "learning_rate": 3.167000708491664e-05, "loss": 0.4715, "step": 737000 }, { "epoch": 7.51, "learning_rate": 3.1663383731714635e-05, "loss": 0.4351, "step": 737100 }, { "epoch": 7.51, "learning_rate": 3.165676029718311e-05, "loss": 0.4208, "step": 737200 }, { "epoch": 7.51, "learning_rate": 3.165013678164588e-05, "loss": 0.4629, "step": 737300 }, { "epoch": 7.51, "learning_rate": 3.164351318542681e-05, "loss": 0.4762, "step": 737400 }, { "epoch": 7.51, "learning_rate": 3.163688950884976e-05, "loss": 0.4246, "step": 737500 }, { "epoch": 7.51, "learning_rate": 3.1630265752238575e-05, "loss": 0.4077, "step": 737600 }, { "epoch": 7.52, "learning_rate": 3.1623641915917135e-05, "loss": 0.4031, "step": 737700 }, { "epoch": 7.52, "learning_rate": 3.161701800020929e-05, "loss": 0.4337, "step": 737800 }, { "epoch": 7.52, "learning_rate": 3.161039400543891e-05, "loss": 0.4196, "step": 737900 }, { "epoch": 7.52, "learning_rate": 3.1603769931929866e-05, "loss": 0.4746, "step": 738000 }, { "epoch": 7.52, "learning_rate": 3.159714578000606e-05, "loss": 0.4703, "step": 738100 }, { "epoch": 7.52, "learning_rate": 3.1590521549991347e-05, "loss": 0.3703, "step": 738200 }, { "epoch": 7.52, "learning_rate": 3.1583897242209626e-05, "loss": 0.4371, "step": 738300 }, { "epoch": 7.52, "learning_rate": 3.157727285698478e-05, "loss": 0.4376, "step": 738400 }, { "epoch": 7.52, "learning_rate": 3.1570648394640704e-05, "loss": 0.5279, "step": 738500 }, { "epoch": 7.52, "learning_rate": 3.1564090101271765e-05, "loss": 0.4325, "step": 738600 }, { "epoch": 7.53, "learning_rate": 3.1557465486424027e-05, "loss": 0.4294, "step": 738700 }, { "epoch": 7.53, "learning_rate": 3.155084079542553e-05, "loss": 0.4708, "step": 738800 }, { "epoch": 7.53, "learning_rate": 3.154421602860018e-05, "loss": 0.4308, "step": 738900 }, { "epoch": 7.53, "learning_rate": 3.1537591186271866e-05, "loss": 0.411, "step": 739000 }, { "epoch": 7.53, "learning_rate": 3.153096626876453e-05, "loss": 0.3781, "step": 739100 }, { "epoch": 7.53, "learning_rate": 3.152434127640207e-05, "loss": 0.3861, "step": 739200 }, { "epoch": 7.53, "learning_rate": 3.151771620950842e-05, "loss": 0.333, "step": 739300 }, { "epoch": 7.53, "learning_rate": 3.151109106840752e-05, "loss": 0.4282, "step": 739400 }, { "epoch": 7.53, "learning_rate": 3.150446585342326e-05, "loss": 0.4822, "step": 739500 }, { "epoch": 7.54, "learning_rate": 3.149784056487962e-05, "loss": 0.4217, "step": 739600 }, { "epoch": 7.54, "learning_rate": 3.149121520310051e-05, "loss": 0.3608, "step": 739700 }, { "epoch": 7.54, "learning_rate": 3.148458976840987e-05, "loss": 0.4601, "step": 739800 }, { "epoch": 7.54, "learning_rate": 3.1477964261131646e-05, "loss": 0.4014, "step": 739900 }, { "epoch": 7.54, "learning_rate": 3.147140493774185e-05, "loss": 0.4002, "step": 740000 }, { "epoch": 7.54, "learning_rate": 3.146477928697812e-05, "loss": 0.3793, "step": 740100 }, { "epoch": 7.54, "learning_rate": 3.1458153564595416e-05, "loss": 0.4912, "step": 740200 }, { "epoch": 7.54, "learning_rate": 3.14515277709177e-05, "loss": 0.391, "step": 740300 }, { "epoch": 7.54, "learning_rate": 3.144490190626895e-05, "loss": 0.4297, "step": 740400 }, { "epoch": 7.54, "learning_rate": 3.1438275970973104e-05, "loss": 0.477, "step": 740500 }, { "epoch": 7.55, "learning_rate": 3.143164996535415e-05, "loss": 0.3904, "step": 740600 }, { "epoch": 7.55, "learning_rate": 3.142502388973607e-05, "loss": 0.414, "step": 740700 }, { "epoch": 7.55, "learning_rate": 3.1418397744442806e-05, "loss": 0.4402, "step": 740800 }, { "epoch": 7.55, "learning_rate": 3.141177152979838e-05, "loss": 0.4457, "step": 740900 }, { "epoch": 7.55, "learning_rate": 3.140514524612674e-05, "loss": 0.4133, "step": 741000 }, { "epoch": 7.55, "learning_rate": 3.139851889375187e-05, "loss": 0.396, "step": 741100 }, { "epoch": 7.55, "learning_rate": 3.1391892472997785e-05, "loss": 0.4562, "step": 741200 }, { "epoch": 7.55, "learning_rate": 3.138526598418846e-05, "loss": 0.4794, "step": 741300 }, { "epoch": 7.55, "learning_rate": 3.13786394276479e-05, "loss": 0.4545, "step": 741400 }, { "epoch": 7.55, "learning_rate": 3.13720128037001e-05, "loss": 0.4396, "step": 741500 }, { "epoch": 7.56, "learning_rate": 3.1365386112669054e-05, "loss": 0.3842, "step": 741600 }, { "epoch": 7.56, "learning_rate": 3.135875935487879e-05, "loss": 0.5155, "step": 741700 }, { "epoch": 7.56, "learning_rate": 3.135213253065328e-05, "loss": 0.3741, "step": 741800 }, { "epoch": 7.56, "learning_rate": 3.134550564031657e-05, "loss": 0.3812, "step": 741900 }, { "epoch": 7.56, "learning_rate": 3.133887868419267e-05, "loss": 0.3706, "step": 742000 }, { "epoch": 7.56, "learning_rate": 3.133225166260559e-05, "loss": 0.4918, "step": 742100 }, { "epoch": 7.56, "learning_rate": 3.1325624575879353e-05, "loss": 0.4312, "step": 742200 }, { "epoch": 7.56, "learning_rate": 3.131899742433798e-05, "loss": 0.3926, "step": 742300 }, { "epoch": 7.56, "learning_rate": 3.131237020830551e-05, "loss": 0.4316, "step": 742400 }, { "epoch": 7.56, "learning_rate": 3.1305742928105956e-05, "loss": 0.4191, "step": 742500 }, { "epoch": 7.57, "learning_rate": 3.1299115584063384e-05, "loss": 0.4262, "step": 742600 }, { "epoch": 7.57, "learning_rate": 3.1292488176501804e-05, "loss": 0.4204, "step": 742700 }, { "epoch": 7.57, "learning_rate": 3.128586070574526e-05, "loss": 0.4876, "step": 742800 }, { "epoch": 7.57, "learning_rate": 3.1279233172117814e-05, "loss": 0.481, "step": 742900 }, { "epoch": 7.57, "learning_rate": 3.1272605575943484e-05, "loss": 0.4077, "step": 743000 }, { "epoch": 7.57, "learning_rate": 3.1265977917546345e-05, "loss": 0.478, "step": 743100 }, { "epoch": 7.57, "learning_rate": 3.125935019725044e-05, "loss": 0.3906, "step": 743200 }, { "epoch": 7.57, "learning_rate": 3.125272241537983e-05, "loss": 0.4347, "step": 743300 }, { "epoch": 7.57, "learning_rate": 3.1246094572258565e-05, "loss": 0.4911, "step": 743400 }, { "epoch": 7.57, "learning_rate": 3.1239466668210704e-05, "loss": 0.4506, "step": 743500 }, { "epoch": 7.58, "learning_rate": 3.123283870356033e-05, "loss": 0.4666, "step": 743600 }, { "epoch": 7.58, "learning_rate": 3.12262106786315e-05, "loss": 0.5144, "step": 743700 }, { "epoch": 7.58, "learning_rate": 3.121958259374828e-05, "loss": 0.4945, "step": 743800 }, { "epoch": 7.58, "learning_rate": 3.121295444923476e-05, "loss": 0.4113, "step": 743900 }, { "epoch": 7.58, "learning_rate": 3.1206326245415e-05, "loss": 0.4156, "step": 744000 }, { "epoch": 7.58, "learning_rate": 3.1199697982613083e-05, "loss": 0.4257, "step": 744100 }, { "epoch": 7.58, "learning_rate": 3.11930696611531e-05, "loss": 0.472, "step": 744200 }, { "epoch": 7.58, "learning_rate": 3.1186441281359126e-05, "loss": 0.4421, "step": 744300 }, { "epoch": 7.58, "learning_rate": 3.117981284355525e-05, "loss": 0.4192, "step": 744400 }, { "epoch": 7.59, "learning_rate": 3.117325063330495e-05, "loss": 0.4608, "step": 744500 }, { "epoch": 7.59, "learning_rate": 3.116662208102557e-05, "loss": 0.4638, "step": 744600 }, { "epoch": 7.59, "learning_rate": 3.115999347170533e-05, "loss": 0.3901, "step": 744700 }, { "epoch": 7.59, "learning_rate": 3.1153364805668324e-05, "loss": 0.3941, "step": 744800 }, { "epoch": 7.59, "learning_rate": 3.114673608323868e-05, "loss": 0.4426, "step": 744900 }, { "epoch": 7.59, "learning_rate": 3.1140107304740475e-05, "loss": 0.4065, "step": 745000 }, { "epoch": 7.59, "learning_rate": 3.113347847049782e-05, "loss": 0.4328, "step": 745100 }, { "epoch": 7.59, "learning_rate": 3.112684958083485e-05, "loss": 0.4396, "step": 745200 }, { "epoch": 7.59, "learning_rate": 3.1120286925794905e-05, "loss": 0.5135, "step": 745300 }, { "epoch": 7.59, "learning_rate": 3.111365792680972e-05, "loss": 0.443, "step": 745400 }, { "epoch": 7.6, "learning_rate": 3.11070288733733e-05, "loss": 0.4254, "step": 745500 }, { "epoch": 7.6, "learning_rate": 3.110039976580979e-05, "loss": 0.4511, "step": 745600 }, { "epoch": 7.6, "learning_rate": 3.109377060444328e-05, "loss": 0.4012, "step": 745700 }, { "epoch": 7.6, "learning_rate": 3.108714138959793e-05, "loss": 0.4863, "step": 745800 }, { "epoch": 7.6, "learning_rate": 3.108051212159784e-05, "loss": 0.4282, "step": 745900 }, { "epoch": 7.6, "learning_rate": 3.107388280076717e-05, "loss": 0.5036, "step": 746000 }, { "epoch": 7.6, "learning_rate": 3.1067253427430035e-05, "loss": 0.46, "step": 746100 }, { "epoch": 7.6, "learning_rate": 3.106062400191057e-05, "loss": 0.4919, "step": 746200 }, { "epoch": 7.6, "learning_rate": 3.1053994524532926e-05, "loss": 0.3813, "step": 746300 }, { "epoch": 7.6, "learning_rate": 3.1047364995621234e-05, "loss": 0.4038, "step": 746400 }, { "epoch": 7.61, "learning_rate": 3.104073541549965e-05, "loss": 0.5234, "step": 746500 }, { "epoch": 7.61, "learning_rate": 3.103410578449232e-05, "loss": 0.4496, "step": 746600 }, { "epoch": 7.61, "learning_rate": 3.102747610292338e-05, "loss": 0.4163, "step": 746700 }, { "epoch": 7.61, "learning_rate": 3.1020846371116995e-05, "loss": 0.367, "step": 746800 }, { "epoch": 7.61, "learning_rate": 3.101421658939731e-05, "loss": 0.4736, "step": 746900 }, { "epoch": 7.61, "learning_rate": 3.100758675808848e-05, "loss": 0.377, "step": 747000 }, { "epoch": 7.61, "learning_rate": 3.1000956877514685e-05, "loss": 0.5069, "step": 747100 }, { "epoch": 7.61, "learning_rate": 3.099432694800007e-05, "loss": 0.4319, "step": 747200 }, { "epoch": 7.61, "learning_rate": 3.098769696986879e-05, "loss": 0.3508, "step": 747300 }, { "epoch": 7.61, "learning_rate": 3.098106694344503e-05, "loss": 0.4593, "step": 747400 }, { "epoch": 7.62, "learning_rate": 3.0974436869052956e-05, "loss": 0.3391, "step": 747500 }, { "epoch": 7.62, "learning_rate": 3.096780674701672e-05, "loss": 0.4315, "step": 747600 }, { "epoch": 7.62, "learning_rate": 3.0961176577660524e-05, "loss": 0.4582, "step": 747700 }, { "epoch": 7.62, "learning_rate": 3.0954546361308526e-05, "loss": 0.3671, "step": 747800 }, { "epoch": 7.62, "learning_rate": 3.09479160982849e-05, "loss": 0.4694, "step": 747900 }, { "epoch": 7.62, "learning_rate": 3.0941285788913836e-05, "loss": 0.4077, "step": 748000 }, { "epoch": 7.62, "learning_rate": 3.0934655433519523e-05, "loss": 0.4447, "step": 748100 }, { "epoch": 7.62, "learning_rate": 3.092802503242613e-05, "loss": 0.4076, "step": 748200 }, { "epoch": 7.62, "learning_rate": 3.0921394585957845e-05, "loss": 0.3692, "step": 748300 }, { "epoch": 7.62, "learning_rate": 3.0914764094438874e-05, "loss": 0.4786, "step": 748400 }, { "epoch": 7.63, "learning_rate": 3.09081335581934e-05, "loss": 0.3866, "step": 748500 }, { "epoch": 7.63, "learning_rate": 3.090150297754561e-05, "loss": 0.4992, "step": 748600 }, { "epoch": 7.63, "learning_rate": 3.08948723528197e-05, "loss": 0.3901, "step": 748700 }, { "epoch": 7.63, "learning_rate": 3.088824168433988e-05, "loss": 0.3847, "step": 748800 }, { "epoch": 7.63, "learning_rate": 3.0881610972430334e-05, "loss": 0.4889, "step": 748900 }, { "epoch": 7.63, "learning_rate": 3.0874980217415275e-05, "loss": 0.3674, "step": 749000 }, { "epoch": 7.63, "learning_rate": 3.086834941961891e-05, "loss": 0.3912, "step": 749100 }, { "epoch": 7.63, "learning_rate": 3.086171857936543e-05, "loss": 0.4054, "step": 749200 }, { "epoch": 7.63, "learning_rate": 3.085508769697907e-05, "loss": 0.3967, "step": 749300 }, { "epoch": 7.64, "learning_rate": 3.084845677278401e-05, "loss": 0.3654, "step": 749400 }, { "epoch": 7.64, "learning_rate": 3.084182580710448e-05, "loss": 0.4439, "step": 749500 }, { "epoch": 7.64, "learning_rate": 3.08351948002647e-05, "loss": 0.4097, "step": 749600 }, { "epoch": 7.64, "learning_rate": 3.0828563752588865e-05, "loss": 0.3982, "step": 749700 }, { "epoch": 7.64, "learning_rate": 3.082193266440122e-05, "loss": 0.4307, "step": 749800 }, { "epoch": 7.64, "learning_rate": 3.081530153602596e-05, "loss": 0.4433, "step": 749900 }, { "epoch": 7.64, "learning_rate": 3.080867036778733e-05, "loss": 0.4471, "step": 750000 }, { "epoch": 7.64, "learning_rate": 3.080203916000954e-05, "loss": 0.4519, "step": 750100 }, { "epoch": 7.64, "learning_rate": 3.0795407913016826e-05, "loss": 0.5162, "step": 750200 }, { "epoch": 7.64, "learning_rate": 3.078877662713341e-05, "loss": 0.4297, "step": 750300 }, { "epoch": 7.65, "learning_rate": 3.0782145302683525e-05, "loss": 0.476, "step": 750400 }, { "epoch": 7.65, "learning_rate": 3.07755139399914e-05, "loss": 0.4014, "step": 750500 }, { "epoch": 7.65, "learning_rate": 3.076888253938128e-05, "loss": 0.5183, "step": 750600 }, { "epoch": 7.65, "learning_rate": 3.076225110117738e-05, "loss": 0.3984, "step": 750700 }, { "epoch": 7.65, "learning_rate": 3.075561962570395e-05, "loss": 0.4935, "step": 750800 }, { "epoch": 7.65, "learning_rate": 3.0748988113285236e-05, "loss": 0.3687, "step": 750900 }, { "epoch": 7.65, "learning_rate": 3.074235656424547e-05, "loss": 0.4868, "step": 751000 }, { "epoch": 7.65, "learning_rate": 3.0735724978908906e-05, "loss": 0.472, "step": 751100 }, { "epoch": 7.65, "learning_rate": 3.0729159673989854e-05, "loss": 0.4299, "step": 751200 }, { "epoch": 7.65, "learning_rate": 3.0722528017387295e-05, "loss": 0.4645, "step": 751300 }, { "epoch": 7.66, "learning_rate": 3.071589632545742e-05, "loss": 0.4098, "step": 751400 }, { "epoch": 7.66, "learning_rate": 3.070926459852448e-05, "loss": 0.4357, "step": 751500 }, { "epoch": 7.66, "learning_rate": 3.070263283691274e-05, "loss": 0.3863, "step": 751600 }, { "epoch": 7.66, "learning_rate": 3.0696001040946455e-05, "loss": 0.4247, "step": 751700 }, { "epoch": 7.66, "learning_rate": 3.068936921094986e-05, "loss": 0.4724, "step": 751800 }, { "epoch": 7.66, "learning_rate": 3.068273734724724e-05, "loss": 0.3932, "step": 751900 }, { "epoch": 7.66, "learning_rate": 3.0676105450162825e-05, "loss": 0.4434, "step": 752000 }, { "epoch": 7.66, "learning_rate": 3.066947352002088e-05, "loss": 0.416, "step": 752100 }, { "epoch": 7.66, "learning_rate": 3.066284155714569e-05, "loss": 0.4615, "step": 752200 }, { "epoch": 7.66, "learning_rate": 3.06562095618615e-05, "loss": 0.4978, "step": 752300 }, { "epoch": 7.67, "learning_rate": 3.064957753449258e-05, "loss": 0.5006, "step": 752400 }, { "epoch": 7.67, "learning_rate": 3.06429454753632e-05, "loss": 0.4498, "step": 752500 }, { "epoch": 7.67, "learning_rate": 3.063631338479761e-05, "loss": 0.3794, "step": 752600 }, { "epoch": 7.67, "learning_rate": 3.0629681263120096e-05, "loss": 0.4471, "step": 752700 }, { "epoch": 7.67, "learning_rate": 3.0623049110654936e-05, "loss": 0.4869, "step": 752800 }, { "epoch": 7.67, "learning_rate": 3.061641692772638e-05, "loss": 0.4507, "step": 752900 }, { "epoch": 7.67, "learning_rate": 3.060978471465872e-05, "loss": 0.3959, "step": 753000 }, { "epoch": 7.67, "learning_rate": 3.0603152471776224e-05, "loss": 0.481, "step": 753100 }, { "epoch": 7.67, "learning_rate": 3.059652019940318e-05, "loss": 0.4218, "step": 753200 }, { "epoch": 7.67, "learning_rate": 3.058988789786385e-05, "loss": 0.4285, "step": 753300 }, { "epoch": 7.68, "learning_rate": 3.058325556748252e-05, "loss": 0.4261, "step": 753400 }, { "epoch": 7.68, "learning_rate": 3.057662320858348e-05, "loss": 0.416, "step": 753500 }, { "epoch": 7.68, "learning_rate": 3.0569990821491014e-05, "loss": 0.4515, "step": 753600 }, { "epoch": 7.68, "learning_rate": 3.056335840652938e-05, "loss": 0.4142, "step": 753700 }, { "epoch": 7.68, "learning_rate": 3.0556725964022895e-05, "loss": 0.4506, "step": 753800 }, { "epoch": 7.68, "learning_rate": 3.0550093494295826e-05, "loss": 0.4797, "step": 753900 }, { "epoch": 7.68, "learning_rate": 3.054346099767247e-05, "loss": 0.4607, "step": 754000 }, { "epoch": 7.68, "learning_rate": 3.053682847447712e-05, "loss": 0.4683, "step": 754100 }, { "epoch": 7.68, "learning_rate": 3.0530195925034044e-05, "loss": 0.5217, "step": 754200 }, { "epoch": 7.68, "learning_rate": 3.052356334966757e-05, "loss": 0.4532, "step": 754300 }, { "epoch": 7.69, "learning_rate": 3.0516930748701968e-05, "loss": 0.4148, "step": 754400 }, { "epoch": 7.69, "learning_rate": 3.0510364448847983e-05, "loss": 0.3701, "step": 754500 }, { "epoch": 7.69, "learning_rate": 3.050373179790492e-05, "loss": 0.4666, "step": 754600 }, { "epoch": 7.69, "learning_rate": 3.0497099122332375e-05, "loss": 0.4665, "step": 754700 }, { "epoch": 7.69, "learning_rate": 3.0490466422454646e-05, "loss": 0.4035, "step": 754800 }, { "epoch": 7.69, "learning_rate": 3.0483833698596046e-05, "loss": 0.3939, "step": 754900 }, { "epoch": 7.69, "learning_rate": 3.0477200951080852e-05, "loss": 0.4051, "step": 755000 }, { "epoch": 7.69, "learning_rate": 3.047056818023339e-05, "loss": 0.3826, "step": 755100 }, { "epoch": 7.69, "learning_rate": 3.046393538637795e-05, "loss": 0.3561, "step": 755200 }, { "epoch": 7.7, "learning_rate": 3.0457302569838833e-05, "loss": 0.4, "step": 755300 }, { "epoch": 7.7, "learning_rate": 3.0450669730940358e-05, "loss": 0.4267, "step": 755400 }, { "epoch": 7.7, "learning_rate": 3.0444036870006812e-05, "loss": 0.4194, "step": 755500 }, { "epoch": 7.7, "learning_rate": 3.043740398736252e-05, "loss": 0.3832, "step": 755600 }, { "epoch": 7.7, "learning_rate": 3.0430771083331787e-05, "loss": 0.4045, "step": 755700 }, { "epoch": 7.7, "learning_rate": 3.042413815823891e-05, "loss": 0.4191, "step": 755800 }, { "epoch": 7.7, "learning_rate": 3.0417505212408218e-05, "loss": 0.4642, "step": 755900 }, { "epoch": 7.7, "learning_rate": 3.0410938575926427e-05, "loss": 0.3967, "step": 756000 }, { "epoch": 7.7, "learning_rate": 3.04043055897923e-05, "loss": 0.4332, "step": 756100 }, { "epoch": 7.7, "learning_rate": 3.0397672583890045e-05, "loss": 0.4371, "step": 756200 }, { "epoch": 7.71, "learning_rate": 3.0391039558543972e-05, "loss": 0.4412, "step": 756300 }, { "epoch": 7.71, "learning_rate": 3.0384406514078397e-05, "loss": 0.385, "step": 756400 }, { "epoch": 7.71, "learning_rate": 3.0377773450817645e-05, "loss": 0.4886, "step": 756500 }, { "epoch": 7.71, "learning_rate": 3.0371140369086022e-05, "loss": 0.4576, "step": 756600 }, { "epoch": 7.71, "learning_rate": 3.036450726920784e-05, "loss": 0.4771, "step": 756700 }, { "epoch": 7.71, "learning_rate": 3.035787415150744e-05, "loss": 0.4661, "step": 756800 }, { "epoch": 7.71, "learning_rate": 3.0351241016309125e-05, "loss": 0.4859, "step": 756900 }, { "epoch": 7.71, "learning_rate": 3.034460786393722e-05, "loss": 0.393, "step": 757000 }, { "epoch": 7.71, "learning_rate": 3.033797469471605e-05, "loss": 0.4309, "step": 757100 }, { "epoch": 7.71, "learning_rate": 3.0331341508969936e-05, "loss": 0.4964, "step": 757200 }, { "epoch": 7.72, "learning_rate": 3.0324708307023192e-05, "loss": 0.3874, "step": 757300 }, { "epoch": 7.72, "learning_rate": 3.031807508920015e-05, "loss": 0.4138, "step": 757400 }, { "epoch": 7.72, "learning_rate": 3.031144185582514e-05, "loss": 0.4556, "step": 757500 }, { "epoch": 7.72, "learning_rate": 3.030480860722248e-05, "loss": 0.4617, "step": 757600 }, { "epoch": 7.72, "learning_rate": 3.0298175343716496e-05, "loss": 0.4925, "step": 757700 }, { "epoch": 7.72, "learning_rate": 3.0291542065631522e-05, "loss": 0.4762, "step": 757800 }, { "epoch": 7.72, "learning_rate": 3.0284908773291876e-05, "loss": 0.4309, "step": 757900 }, { "epoch": 7.72, "learning_rate": 3.027827546702189e-05, "loss": 0.4236, "step": 758000 }, { "epoch": 7.72, "learning_rate": 3.0271642147145894e-05, "loss": 0.4589, "step": 758100 }, { "epoch": 7.72, "learning_rate": 3.0265008813988224e-05, "loss": 0.4284, "step": 758200 }, { "epoch": 7.73, "learning_rate": 3.0258375467873204e-05, "loss": 0.3579, "step": 758300 }, { "epoch": 7.73, "learning_rate": 3.0251808442774106e-05, "loss": 0.4153, "step": 758400 }, { "epoch": 7.73, "learning_rate": 3.02452414056081e-05, "loss": 0.4179, "step": 758500 }, { "epoch": 7.73, "learning_rate": 3.023860802280352e-05, "loss": 0.4191, "step": 758600 }, { "epoch": 7.73, "learning_rate": 3.0231974628332435e-05, "loss": 0.3204, "step": 758700 }, { "epoch": 7.73, "learning_rate": 3.0225341222519183e-05, "loss": 0.423, "step": 758800 }, { "epoch": 7.73, "learning_rate": 3.0218707805688085e-05, "loss": 0.4338, "step": 758900 }, { "epoch": 7.73, "learning_rate": 3.0212074378163477e-05, "loss": 0.5026, "step": 759000 }, { "epoch": 7.73, "learning_rate": 3.020544094026971e-05, "loss": 0.4036, "step": 759100 }, { "epoch": 7.73, "learning_rate": 3.0198807492331093e-05, "loss": 0.3537, "step": 759200 }, { "epoch": 7.74, "learning_rate": 3.019217403467198e-05, "loss": 0.4651, "step": 759300 }, { "epoch": 7.74, "learning_rate": 3.0185540567616704e-05, "loss": 0.4638, "step": 759400 }, { "epoch": 7.74, "learning_rate": 3.0178907091489603e-05, "loss": 0.4214, "step": 759500 }, { "epoch": 7.74, "learning_rate": 3.0172273606615014e-05, "loss": 0.3501, "step": 759600 }, { "epoch": 7.74, "learning_rate": 3.0165640113317275e-05, "loss": 0.3865, "step": 759700 }, { "epoch": 7.74, "learning_rate": 3.0159006611920715e-05, "loss": 0.5007, "step": 759800 }, { "epoch": 7.74, "learning_rate": 3.015237310274968e-05, "loss": 0.4444, "step": 759900 }, { "epoch": 7.74, "learning_rate": 3.0145739586128512e-05, "loss": 0.5021, "step": 760000 }, { "epoch": 7.74, "learning_rate": 3.0139106062381555e-05, "loss": 0.4989, "step": 760100 }, { "epoch": 7.75, "learning_rate": 3.0132472531833143e-05, "loss": 0.3362, "step": 760200 }, { "epoch": 7.75, "learning_rate": 3.0125838994807602e-05, "loss": 0.4746, "step": 760300 }, { "epoch": 7.75, "learning_rate": 3.0119205451629295e-05, "loss": 0.4278, "step": 760400 }, { "epoch": 7.75, "learning_rate": 3.0112571902622548e-05, "loss": 0.4835, "step": 760500 }, { "epoch": 7.75, "learning_rate": 3.0105938348111707e-05, "loss": 0.4286, "step": 760600 }, { "epoch": 7.75, "learning_rate": 3.009930478842112e-05, "loss": 0.3728, "step": 760700 }, { "epoch": 7.75, "learning_rate": 3.0092671223875112e-05, "loss": 0.3697, "step": 760800 }, { "epoch": 7.75, "learning_rate": 3.0086037654798045e-05, "loss": 0.4391, "step": 760900 }, { "epoch": 7.75, "learning_rate": 3.007940408151425e-05, "loss": 0.434, "step": 761000 }, { "epoch": 7.75, "learning_rate": 3.0072770504348058e-05, "loss": 0.4146, "step": 761100 }, { "epoch": 7.76, "learning_rate": 3.006613692362382e-05, "loss": 0.4899, "step": 761200 }, { "epoch": 7.76, "learning_rate": 3.0059503339665894e-05, "loss": 0.4018, "step": 761300 }, { "epoch": 7.76, "learning_rate": 3.0052869752798597e-05, "loss": 0.4563, "step": 761400 }, { "epoch": 7.76, "learning_rate": 3.00462361633463e-05, "loss": 0.4224, "step": 761500 }, { "epoch": 7.76, "learning_rate": 3.003960257163332e-05, "loss": 0.4662, "step": 761600 }, { "epoch": 7.76, "learning_rate": 3.0032968977984015e-05, "loss": 0.4046, "step": 761700 }, { "epoch": 7.76, "learning_rate": 3.002633538272272e-05, "loss": 0.4397, "step": 761800 }, { "epoch": 7.76, "learning_rate": 3.001970178617378e-05, "loss": 0.3236, "step": 761900 }, { "epoch": 7.76, "learning_rate": 3.001306818866155e-05, "loss": 0.4265, "step": 762000 }, { "epoch": 7.76, "learning_rate": 3.000643459051036e-05, "loss": 0.3938, "step": 762100 }, { "epoch": 7.77, "learning_rate": 2.9999800992044554e-05, "loss": 0.4422, "step": 762200 }, { "epoch": 7.77, "learning_rate": 2.9993167393588478e-05, "loss": 0.4581, "step": 762300 }, { "epoch": 7.77, "learning_rate": 2.9986533795466474e-05, "loss": 0.4401, "step": 762400 }, { "epoch": 7.77, "learning_rate": 2.9979966533973197e-05, "loss": 0.4503, "step": 762500 }, { "epoch": 7.77, "learning_rate": 2.997333293748094e-05, "loss": 0.3898, "step": 762600 }, { "epoch": 7.77, "learning_rate": 2.996669934229255e-05, "loss": 0.4769, "step": 762700 }, { "epoch": 7.77, "learning_rate": 2.9960065748732354e-05, "loss": 0.4171, "step": 762800 }, { "epoch": 7.77, "learning_rate": 2.99534321571247e-05, "loss": 0.4162, "step": 762900 }, { "epoch": 7.77, "learning_rate": 2.9946798567793947e-05, "loss": 0.3888, "step": 763000 }, { "epoch": 7.77, "learning_rate": 2.9940164981064422e-05, "loss": 0.4745, "step": 763100 }, { "epoch": 7.78, "learning_rate": 2.993353139726047e-05, "loss": 0.4465, "step": 763200 }, { "epoch": 7.78, "learning_rate": 2.992689781670645e-05, "loss": 0.408, "step": 763300 }, { "epoch": 7.78, "learning_rate": 2.9920264239726672e-05, "loss": 0.4668, "step": 763400 }, { "epoch": 7.78, "learning_rate": 2.99136306666455e-05, "loss": 0.4358, "step": 763500 }, { "epoch": 7.78, "learning_rate": 2.9906997097787276e-05, "loss": 0.409, "step": 763600 }, { "epoch": 7.78, "learning_rate": 2.9900363533476344e-05, "loss": 0.4235, "step": 763700 }, { "epoch": 7.78, "learning_rate": 2.9893729974037034e-05, "loss": 0.4358, "step": 763800 }, { "epoch": 7.78, "learning_rate": 2.9887096419793692e-05, "loss": 0.4538, "step": 763900 }, { "epoch": 7.78, "learning_rate": 2.9880462871070677e-05, "loss": 0.3777, "step": 764000 }, { "epoch": 7.78, "learning_rate": 2.98738293281923e-05, "loss": 0.4248, "step": 764100 }, { "epoch": 7.79, "learning_rate": 2.9867195791482914e-05, "loss": 0.3158, "step": 764200 }, { "epoch": 7.79, "learning_rate": 2.986056226126687e-05, "loss": 0.4205, "step": 764300 }, { "epoch": 7.79, "learning_rate": 2.9853928737868492e-05, "loss": 0.4375, "step": 764400 }, { "epoch": 7.79, "learning_rate": 2.9847295221612128e-05, "loss": 0.481, "step": 764500 }, { "epoch": 7.79, "learning_rate": 2.984066171282213e-05, "loss": 0.3996, "step": 764600 }, { "epoch": 7.79, "learning_rate": 2.9834028211822807e-05, "loss": 0.4529, "step": 764700 }, { "epoch": 7.79, "learning_rate": 2.9827394718938518e-05, "loss": 0.5103, "step": 764800 }, { "epoch": 7.79, "learning_rate": 2.98207612344936e-05, "loss": 0.3829, "step": 764900 }, { "epoch": 7.79, "learning_rate": 2.9814127758812382e-05, "loss": 0.4805, "step": 765000 }, { "epoch": 7.79, "learning_rate": 2.980749429221921e-05, "loss": 0.4785, "step": 765100 }, { "epoch": 7.8, "learning_rate": 2.980086083503843e-05, "loss": 0.4327, "step": 765200 }, { "epoch": 7.8, "learning_rate": 2.9794227387594352e-05, "loss": 0.3611, "step": 765300 }, { "epoch": 7.8, "learning_rate": 2.978759395021133e-05, "loss": 0.3893, "step": 765400 }, { "epoch": 7.8, "learning_rate": 2.9780960523213694e-05, "loss": 0.3991, "step": 765500 }, { "epoch": 7.8, "learning_rate": 2.977432710692579e-05, "loss": 0.3756, "step": 765600 }, { "epoch": 7.8, "learning_rate": 2.9767693701671936e-05, "loss": 0.3913, "step": 765700 }, { "epoch": 7.8, "learning_rate": 2.9761060307776475e-05, "loss": 0.4296, "step": 765800 }, { "epoch": 7.8, "learning_rate": 2.975442692556375e-05, "loss": 0.3311, "step": 765900 }, { "epoch": 7.8, "learning_rate": 2.974779355535807e-05, "loss": 0.4821, "step": 766000 }, { "epoch": 7.81, "learning_rate": 2.9741226531000425e-05, "loss": 0.4509, "step": 766100 }, { "epoch": 7.81, "learning_rate": 2.9734593185653693e-05, "loss": 0.3603, "step": 766200 }, { "epoch": 7.81, "learning_rate": 2.972795985328377e-05, "loss": 0.4034, "step": 766300 }, { "epoch": 7.81, "learning_rate": 2.9721326534214997e-05, "loss": 0.4876, "step": 766400 }, { "epoch": 7.81, "learning_rate": 2.9714693228771686e-05, "loss": 0.4139, "step": 766500 }, { "epoch": 7.81, "learning_rate": 2.970805993727817e-05, "loss": 0.4941, "step": 766600 }, { "epoch": 7.81, "learning_rate": 2.9701426660058797e-05, "loss": 0.5101, "step": 766700 }, { "epoch": 7.81, "learning_rate": 2.9694793397437867e-05, "loss": 0.4774, "step": 766800 }, { "epoch": 7.81, "learning_rate": 2.9688160149739716e-05, "loss": 0.4767, "step": 766900 }, { "epoch": 7.81, "learning_rate": 2.9681526917288677e-05, "loss": 0.3823, "step": 767000 }, { "epoch": 7.82, "learning_rate": 2.9674893700409068e-05, "loss": 0.4467, "step": 767100 }, { "epoch": 7.82, "learning_rate": 2.9668260499425214e-05, "loss": 0.4203, "step": 767200 }, { "epoch": 7.82, "learning_rate": 2.966162731466146e-05, "loss": 0.3695, "step": 767300 }, { "epoch": 7.82, "learning_rate": 2.9654994146442093e-05, "loss": 0.4432, "step": 767400 }, { "epoch": 7.82, "learning_rate": 2.9648360995091457e-05, "loss": 0.5392, "step": 767500 }, { "epoch": 7.82, "learning_rate": 2.9641727860933874e-05, "loss": 0.3938, "step": 767600 }, { "epoch": 7.82, "learning_rate": 2.9635094744293655e-05, "loss": 0.5172, "step": 767700 }, { "epoch": 7.82, "learning_rate": 2.9628461645495124e-05, "loss": 0.4691, "step": 767800 }, { "epoch": 7.82, "learning_rate": 2.9621828564862605e-05, "loss": 0.4172, "step": 767900 }, { "epoch": 7.82, "learning_rate": 2.9615195502720417e-05, "loss": 0.4765, "step": 768000 }, { "epoch": 7.83, "learning_rate": 2.960856245939287e-05, "loss": 0.4873, "step": 768100 }, { "epoch": 7.83, "learning_rate": 2.9601929435204276e-05, "loss": 0.4854, "step": 768200 }, { "epoch": 7.83, "learning_rate": 2.9595296430478966e-05, "loss": 0.4274, "step": 768300 }, { "epoch": 7.83, "learning_rate": 2.9588663445541237e-05, "loss": 0.4945, "step": 768400 }, { "epoch": 7.83, "learning_rate": 2.958203048071541e-05, "loss": 0.4374, "step": 768500 }, { "epoch": 7.83, "learning_rate": 2.9575397536325816e-05, "loss": 0.4128, "step": 768600 }, { "epoch": 7.83, "learning_rate": 2.9568764612696734e-05, "loss": 0.4845, "step": 768700 }, { "epoch": 7.83, "learning_rate": 2.9562131710152488e-05, "loss": 0.5316, "step": 768800 }, { "epoch": 7.83, "learning_rate": 2.9555498829017396e-05, "loss": 0.4748, "step": 768900 }, { "epoch": 7.83, "learning_rate": 2.954886596961575e-05, "loss": 0.487, "step": 769000 }, { "epoch": 7.84, "learning_rate": 2.9542233132271866e-05, "loss": 0.4369, "step": 769100 }, { "epoch": 7.84, "learning_rate": 2.9535600317310064e-05, "loss": 0.4975, "step": 769200 }, { "epoch": 7.84, "learning_rate": 2.9528967525054624e-05, "loss": 0.4648, "step": 769300 }, { "epoch": 7.84, "learning_rate": 2.9522334755829857e-05, "loss": 0.4594, "step": 769400 }, { "epoch": 7.84, "learning_rate": 2.9515702009960072e-05, "loss": 0.4362, "step": 769500 }, { "epoch": 7.84, "learning_rate": 2.9509069287769574e-05, "loss": 0.5117, "step": 769600 }, { "epoch": 7.84, "learning_rate": 2.9502436589582652e-05, "loss": 0.4575, "step": 769700 }, { "epoch": 7.84, "learning_rate": 2.9495803915723614e-05, "loss": 0.4871, "step": 769800 }, { "epoch": 7.84, "learning_rate": 2.9489171266516765e-05, "loss": 0.4235, "step": 769900 }, { "epoch": 7.84, "learning_rate": 2.9482538642286383e-05, "loss": 0.4833, "step": 770000 }, { "epoch": 7.85, "learning_rate": 2.947590604335677e-05, "loss": 0.3927, "step": 770100 }, { "epoch": 7.85, "learning_rate": 2.9469273470052232e-05, "loss": 0.3888, "step": 770200 }, { "epoch": 7.85, "learning_rate": 2.946264092269705e-05, "loss": 0.4553, "step": 770300 }, { "epoch": 7.85, "learning_rate": 2.9456008401615514e-05, "loss": 0.5253, "step": 770400 }, { "epoch": 7.85, "learning_rate": 2.944937590713194e-05, "loss": 0.4941, "step": 770500 }, { "epoch": 7.85, "learning_rate": 2.944274343957058e-05, "loss": 0.4307, "step": 770600 }, { "epoch": 7.85, "learning_rate": 2.9436110999255744e-05, "loss": 0.4545, "step": 770700 }, { "epoch": 7.85, "learning_rate": 2.942947858651172e-05, "loss": 0.4316, "step": 770800 }, { "epoch": 7.85, "learning_rate": 2.942284620166278e-05, "loss": 0.4079, "step": 770900 }, { "epoch": 7.86, "learning_rate": 2.941621384503322e-05, "loss": 0.3977, "step": 771000 }, { "epoch": 7.86, "learning_rate": 2.940958151694733e-05, "loss": 0.468, "step": 771100 }, { "epoch": 7.86, "learning_rate": 2.9402949217729372e-05, "loss": 0.4052, "step": 771200 }, { "epoch": 7.86, "learning_rate": 2.939631694770363e-05, "loss": 0.3703, "step": 771300 }, { "epoch": 7.86, "learning_rate": 2.9389684707194388e-05, "loss": 0.413, "step": 771400 }, { "epoch": 7.86, "learning_rate": 2.9383118818483826e-05, "loss": 0.4476, "step": 771500 }, { "epoch": 7.86, "learning_rate": 2.9376486637677168e-05, "loss": 0.4474, "step": 771600 }, { "epoch": 7.86, "learning_rate": 2.936985448735658e-05, "loss": 0.5292, "step": 771700 }, { "epoch": 7.86, "learning_rate": 2.9363222367846345e-05, "loss": 0.4571, "step": 771800 }, { "epoch": 7.86, "learning_rate": 2.9356590279470737e-05, "loss": 0.4925, "step": 771900 }, { "epoch": 7.87, "learning_rate": 2.9349958222554035e-05, "loss": 0.4041, "step": 772000 }, { "epoch": 7.87, "learning_rate": 2.934332619742048e-05, "loss": 0.4627, "step": 772100 }, { "epoch": 7.87, "learning_rate": 2.9336694204394363e-05, "loss": 0.3544, "step": 772200 }, { "epoch": 7.87, "learning_rate": 2.9330062243799957e-05, "loss": 0.4459, "step": 772300 }, { "epoch": 7.87, "learning_rate": 2.932343031596149e-05, "loss": 0.4848, "step": 772400 }, { "epoch": 7.87, "learning_rate": 2.9316798421203245e-05, "loss": 0.4471, "step": 772500 }, { "epoch": 7.87, "learning_rate": 2.931016655984949e-05, "loss": 0.4639, "step": 772600 }, { "epoch": 7.87, "learning_rate": 2.9303534732224456e-05, "loss": 0.3954, "step": 772700 }, { "epoch": 7.87, "learning_rate": 2.9296902938652425e-05, "loss": 0.5144, "step": 772800 }, { "epoch": 7.87, "learning_rate": 2.929027117945766e-05, "loss": 0.4035, "step": 772900 }, { "epoch": 7.88, "learning_rate": 2.9283639454964383e-05, "loss": 0.4171, "step": 773000 }, { "epoch": 7.88, "learning_rate": 2.9277007765496858e-05, "loss": 0.4425, "step": 773100 }, { "epoch": 7.88, "learning_rate": 2.927037611137936e-05, "loss": 0.3869, "step": 773200 }, { "epoch": 7.88, "learning_rate": 2.9263744492936088e-05, "loss": 0.3901, "step": 773300 }, { "epoch": 7.88, "learning_rate": 2.9257112910491325e-05, "loss": 0.4184, "step": 773400 }, { "epoch": 7.88, "learning_rate": 2.925048136436931e-05, "loss": 0.3735, "step": 773500 }, { "epoch": 7.88, "learning_rate": 2.9243849854894297e-05, "loss": 0.3871, "step": 773600 }, { "epoch": 7.88, "learning_rate": 2.9237218382390498e-05, "loss": 0.409, "step": 773700 }, { "epoch": 7.88, "learning_rate": 2.923058694718217e-05, "loss": 0.4137, "step": 773800 }, { "epoch": 7.88, "learning_rate": 2.9223955549593554e-05, "loss": 0.4442, "step": 773900 }, { "epoch": 7.89, "learning_rate": 2.9217324189948878e-05, "loss": 0.4641, "step": 774000 }, { "epoch": 7.89, "learning_rate": 2.921069286857237e-05, "loss": 0.4573, "step": 774100 }, { "epoch": 7.89, "learning_rate": 2.920406158578829e-05, "loss": 0.3942, "step": 774200 }, { "epoch": 7.89, "learning_rate": 2.9197430341920836e-05, "loss": 0.4317, "step": 774300 }, { "epoch": 7.89, "learning_rate": 2.919093176100012e-05, "loss": 0.3596, "step": 774400 }, { "epoch": 7.89, "learning_rate": 2.9184300595144157e-05, "loss": 0.4186, "step": 774500 }, { "epoch": 7.89, "learning_rate": 2.9177669469171013e-05, "loss": 0.4761, "step": 774600 }, { "epoch": 7.89, "learning_rate": 2.9171038383404927e-05, "loss": 0.4271, "step": 774700 }, { "epoch": 7.89, "learning_rate": 2.9164407338170112e-05, "loss": 0.4048, "step": 774800 }, { "epoch": 7.89, "learning_rate": 2.9157776333790807e-05, "loss": 0.3611, "step": 774900 }, { "epoch": 7.9, "learning_rate": 2.9151145370591198e-05, "loss": 0.5185, "step": 775000 }, { "epoch": 7.9, "learning_rate": 2.914451444889552e-05, "loss": 0.337, "step": 775100 }, { "epoch": 7.9, "learning_rate": 2.9137883569027994e-05, "loss": 0.4029, "step": 775200 }, { "epoch": 7.9, "learning_rate": 2.91312527313128e-05, "loss": 0.471, "step": 775300 }, { "epoch": 7.9, "learning_rate": 2.9124621936074162e-05, "loss": 0.3473, "step": 775400 }, { "epoch": 7.9, "learning_rate": 2.9117991183636296e-05, "loss": 0.3919, "step": 775500 }, { "epoch": 7.9, "learning_rate": 2.911136047432339e-05, "loss": 0.4196, "step": 775600 }, { "epoch": 7.9, "learning_rate": 2.910472980845966e-05, "loss": 0.3698, "step": 775700 }, { "epoch": 7.9, "learning_rate": 2.9098099186369304e-05, "loss": 0.3723, "step": 775800 }, { "epoch": 7.91, "learning_rate": 2.909146860837651e-05, "loss": 0.5216, "step": 775900 }, { "epoch": 7.91, "learning_rate": 2.908483807480547e-05, "loss": 0.4367, "step": 776000 }, { "epoch": 7.91, "learning_rate": 2.90782075859804e-05, "loss": 0.3781, "step": 776100 }, { "epoch": 7.91, "learning_rate": 2.907157714222547e-05, "loss": 0.3742, "step": 776200 }, { "epoch": 7.91, "learning_rate": 2.906494674386488e-05, "loss": 0.4354, "step": 776300 }, { "epoch": 7.91, "learning_rate": 2.9058316391222824e-05, "loss": 0.4891, "step": 776400 }, { "epoch": 7.91, "learning_rate": 2.9051686084623464e-05, "loss": 0.3558, "step": 776500 }, { "epoch": 7.91, "learning_rate": 2.9045055824390992e-05, "loss": 0.5089, "step": 776600 }, { "epoch": 7.91, "learning_rate": 2.90384256108496e-05, "loss": 0.4876, "step": 776700 }, { "epoch": 7.91, "learning_rate": 2.9031795444323454e-05, "loss": 0.4362, "step": 776800 }, { "epoch": 7.92, "learning_rate": 2.9025165325136733e-05, "loss": 0.4833, "step": 776900 }, { "epoch": 7.92, "learning_rate": 2.9018535253613607e-05, "loss": 0.4, "step": 777000 }, { "epoch": 7.92, "learning_rate": 2.901190523007826e-05, "loss": 0.4158, "step": 777100 }, { "epoch": 7.92, "learning_rate": 2.9005275254854846e-05, "loss": 0.4164, "step": 777200 }, { "epoch": 7.92, "learning_rate": 2.899864532826753e-05, "loss": 0.4077, "step": 777300 }, { "epoch": 7.92, "learning_rate": 2.899201545064049e-05, "loss": 0.4443, "step": 777400 }, { "epoch": 7.92, "learning_rate": 2.8985385622297872e-05, "loss": 0.4285, "step": 777500 }, { "epoch": 7.92, "learning_rate": 2.897882214110456e-05, "loss": 0.4609, "step": 777600 }, { "epoch": 7.92, "learning_rate": 2.8972192411802345e-05, "loss": 0.3812, "step": 777700 }, { "epoch": 7.92, "learning_rate": 2.8965562732753785e-05, "loss": 0.5075, "step": 777800 }, { "epoch": 7.93, "learning_rate": 2.8958933104283042e-05, "loss": 0.4293, "step": 777900 }, { "epoch": 7.93, "learning_rate": 2.895230352671425e-05, "loss": 0.404, "step": 778000 }, { "epoch": 7.93, "learning_rate": 2.8945674000371565e-05, "loss": 0.4797, "step": 778100 }, { "epoch": 7.93, "learning_rate": 2.893904452557914e-05, "loss": 0.4554, "step": 778200 }, { "epoch": 7.93, "learning_rate": 2.8932415102661093e-05, "loss": 0.3975, "step": 778300 }, { "epoch": 7.93, "learning_rate": 2.8925785731941575e-05, "loss": 0.3456, "step": 778400 }, { "epoch": 7.93, "learning_rate": 2.8919156413744738e-05, "loss": 0.4081, "step": 778500 }, { "epoch": 7.93, "learning_rate": 2.891252714839469e-05, "loss": 0.451, "step": 778600 }, { "epoch": 7.93, "learning_rate": 2.8905897936215575e-05, "loss": 0.4636, "step": 778700 }, { "epoch": 7.93, "learning_rate": 2.8899268777531535e-05, "loss": 0.4531, "step": 778800 }, { "epoch": 7.94, "learning_rate": 2.8892639672666673e-05, "loss": 0.4069, "step": 778900 }, { "epoch": 7.94, "learning_rate": 2.8886010621945117e-05, "loss": 0.5336, "step": 779000 }, { "epoch": 7.94, "learning_rate": 2.8879381625690995e-05, "loss": 0.4668, "step": 779100 }, { "epoch": 7.94, "learning_rate": 2.8872752684228432e-05, "loss": 0.4883, "step": 779200 }, { "epoch": 7.94, "learning_rate": 2.886612379788153e-05, "loss": 0.406, "step": 779300 }, { "epoch": 7.94, "learning_rate": 2.8859494966974406e-05, "loss": 0.3491, "step": 779400 }, { "epoch": 7.94, "learning_rate": 2.885286619183119e-05, "loss": 0.5367, "step": 779500 }, { "epoch": 7.94, "learning_rate": 2.8846237472775952e-05, "loss": 0.4691, "step": 779600 }, { "epoch": 7.94, "learning_rate": 2.883960881013282e-05, "loss": 0.4947, "step": 779700 }, { "epoch": 7.94, "learning_rate": 2.8832980204225897e-05, "loss": 0.4024, "step": 779800 }, { "epoch": 7.95, "learning_rate": 2.882635165537927e-05, "loss": 0.4028, "step": 779900 }, { "epoch": 7.95, "learning_rate": 2.8819723163917048e-05, "loss": 0.4522, "step": 780000 }, { "epoch": 7.95, "learning_rate": 2.881309473016333e-05, "loss": 0.5615, "step": 780100 }, { "epoch": 7.95, "learning_rate": 2.880646635444218e-05, "loss": 0.3804, "step": 780200 }, { "epoch": 7.95, "learning_rate": 2.8799838037077707e-05, "loss": 0.443, "step": 780300 }, { "epoch": 7.95, "learning_rate": 2.8793209778393997e-05, "loss": 0.3814, "step": 780400 }, { "epoch": 7.95, "learning_rate": 2.8786581578715117e-05, "loss": 0.4789, "step": 780500 }, { "epoch": 7.95, "learning_rate": 2.877995343836516e-05, "loss": 0.4762, "step": 780600 }, { "epoch": 7.95, "learning_rate": 2.87733253576682e-05, "loss": 0.4461, "step": 780700 }, { "epoch": 7.95, "learning_rate": 2.8766697336948323e-05, "loss": 0.4569, "step": 780800 }, { "epoch": 7.96, "learning_rate": 2.8760069376529572e-05, "loss": 0.4403, "step": 780900 }, { "epoch": 7.96, "learning_rate": 2.875344147673603e-05, "loss": 0.4716, "step": 781000 }, { "epoch": 7.96, "learning_rate": 2.8746813637891764e-05, "loss": 0.4344, "step": 781100 }, { "epoch": 7.96, "learning_rate": 2.874018586032083e-05, "loss": 0.3955, "step": 781200 }, { "epoch": 7.96, "learning_rate": 2.8733558144347286e-05, "loss": 0.4326, "step": 781300 }, { "epoch": 7.96, "learning_rate": 2.8726930490295206e-05, "loss": 0.4348, "step": 781400 }, { "epoch": 7.96, "learning_rate": 2.8720302898488612e-05, "loss": 0.4184, "step": 781500 }, { "epoch": 7.96, "learning_rate": 2.8713675369251573e-05, "loss": 0.428, "step": 781600 }, { "epoch": 7.96, "learning_rate": 2.8707047902908136e-05, "loss": 0.4951, "step": 781700 }, { "epoch": 7.97, "learning_rate": 2.8700486773499604e-05, "loss": 0.4341, "step": 781800 }, { "epoch": 7.97, "learning_rate": 2.869385943327847e-05, "loss": 0.4145, "step": 781900 }, { "epoch": 7.97, "learning_rate": 2.868723215691981e-05, "loss": 0.3761, "step": 782000 }, { "epoch": 7.97, "learning_rate": 2.8680604944747663e-05, "loss": 0.4414, "step": 782100 }, { "epoch": 7.97, "learning_rate": 2.867397779708607e-05, "loss": 0.397, "step": 782200 }, { "epoch": 7.97, "learning_rate": 2.866735071425904e-05, "loss": 0.4612, "step": 782300 }, { "epoch": 7.97, "learning_rate": 2.866072369659061e-05, "loss": 0.4474, "step": 782400 }, { "epoch": 7.97, "learning_rate": 2.8654096744404802e-05, "loss": 0.4279, "step": 782500 }, { "epoch": 7.97, "learning_rate": 2.8647469858025624e-05, "loss": 0.4784, "step": 782600 }, { "epoch": 7.97, "learning_rate": 2.86408430377771e-05, "loss": 0.459, "step": 782700 }, { "epoch": 7.98, "learning_rate": 2.8634216283983254e-05, "loss": 0.3945, "step": 782800 }, { "epoch": 7.98, "learning_rate": 2.8627589596968066e-05, "loss": 0.5079, "step": 782900 }, { "epoch": 7.98, "learning_rate": 2.862096297705556e-05, "loss": 0.4698, "step": 783000 }, { "epoch": 7.98, "learning_rate": 2.861433642456973e-05, "loss": 0.409, "step": 783100 }, { "epoch": 7.98, "learning_rate": 2.8607709939834587e-05, "loss": 0.4775, "step": 783200 }, { "epoch": 7.98, "learning_rate": 2.8601083523174115e-05, "loss": 0.466, "step": 783300 }, { "epoch": 7.98, "learning_rate": 2.8594457174912308e-05, "loss": 0.4365, "step": 783400 }, { "epoch": 7.98, "learning_rate": 2.8587830895373175e-05, "loss": 0.3488, "step": 783500 }, { "epoch": 7.98, "learning_rate": 2.8581204684880667e-05, "loss": 0.4743, "step": 783600 }, { "epoch": 7.98, "learning_rate": 2.8574578543758786e-05, "loss": 0.4358, "step": 783700 }, { "epoch": 7.99, "learning_rate": 2.856795247233151e-05, "loss": 0.4459, "step": 783800 }, { "epoch": 7.99, "learning_rate": 2.856132647092281e-05, "loss": 0.3703, "step": 783900 }, { "epoch": 7.99, "learning_rate": 2.855470053985666e-05, "loss": 0.3864, "step": 784000 }, { "epoch": 7.99, "learning_rate": 2.8548074679457037e-05, "loss": 0.3994, "step": 784100 }, { "epoch": 7.99, "learning_rate": 2.8541448890047885e-05, "loss": 0.433, "step": 784200 }, { "epoch": 7.99, "learning_rate": 2.853482317195318e-05, "loss": 0.4733, "step": 784300 }, { "epoch": 7.99, "learning_rate": 2.8528197525496887e-05, "loss": 0.4019, "step": 784400 }, { "epoch": 7.99, "learning_rate": 2.852157195100294e-05, "loss": 0.5195, "step": 784500 }, { "epoch": 7.99, "learning_rate": 2.8514946448795304e-05, "loss": 0.3601, "step": 784600 }, { "epoch": 7.99, "learning_rate": 2.850832101919794e-05, "loss": 0.488, "step": 784700 }, { "epoch": 8.0, "learning_rate": 2.850169566253476e-05, "loss": 0.3672, "step": 784800 }, { "epoch": 8.0, "learning_rate": 2.849507037912972e-05, "loss": 0.3626, "step": 784900 }, { "epoch": 8.0, "learning_rate": 2.848844516930676e-05, "loss": 0.3349, "step": 785000 }, { "epoch": 8.0, "learning_rate": 2.8481820033389815e-05, "loss": 0.4162, "step": 785100 }, { "epoch": 8.0, "learning_rate": 2.847519497170281e-05, "loss": 0.4089, "step": 785200 }, { "epoch": 8.0, "learning_rate": 2.8468569984569667e-05, "loss": 0.3878, "step": 785300 }, { "epoch": 8.0, "learning_rate": 2.8461945072314322e-05, "loss": 0.3886, "step": 785400 }, { "epoch": 8.0, "learning_rate": 2.8455320235260677e-05, "loss": 0.4415, "step": 785500 }, { "epoch": 8.0, "learning_rate": 2.8448695473732653e-05, "loss": 0.4555, "step": 785600 }, { "epoch": 8.0, "learning_rate": 2.8442070788054166e-05, "loss": 0.3589, "step": 785700 }, { "epoch": 8.01, "learning_rate": 2.8435446178549115e-05, "loss": 0.3716, "step": 785800 }, { "epoch": 8.01, "learning_rate": 2.8428821645541413e-05, "loss": 0.3017, "step": 785900 }, { "epoch": 8.01, "learning_rate": 2.8422263433535494e-05, "loss": 0.3841, "step": 786000 }, { "epoch": 8.01, "learning_rate": 2.8415639053721123e-05, "loss": 0.3499, "step": 786100 }, { "epoch": 8.01, "learning_rate": 2.840901475137255e-05, "loss": 0.3639, "step": 786200 }, { "epoch": 8.01, "learning_rate": 2.8402390526813656e-05, "loss": 0.3889, "step": 786300 }, { "epoch": 8.01, "learning_rate": 2.8395766380368335e-05, "loss": 0.3633, "step": 786400 }, { "epoch": 8.01, "learning_rate": 2.8389142312360473e-05, "loss": 0.405, "step": 786500 }, { "epoch": 8.01, "learning_rate": 2.8382518323113925e-05, "loss": 0.4351, "step": 786600 }, { "epoch": 8.02, "learning_rate": 2.837589441295258e-05, "loss": 0.3936, "step": 786700 }, { "epoch": 8.02, "learning_rate": 2.83693368201137e-05, "loss": 0.4673, "step": 786800 }, { "epoch": 8.02, "learning_rate": 2.836271306829543e-05, "loss": 0.3995, "step": 786900 }, { "epoch": 8.02, "learning_rate": 2.8356089396530727e-05, "loss": 0.3968, "step": 787000 }, { "epoch": 8.02, "learning_rate": 2.8349465805143435e-05, "loss": 0.367, "step": 787100 }, { "epoch": 8.02, "learning_rate": 2.834284229445742e-05, "loss": 0.3448, "step": 787200 }, { "epoch": 8.02, "learning_rate": 2.8336218864796537e-05, "loss": 0.3847, "step": 787300 }, { "epoch": 8.02, "learning_rate": 2.8329595516484608e-05, "loss": 0.3208, "step": 787400 }, { "epoch": 8.02, "learning_rate": 2.832297224984549e-05, "loss": 0.3983, "step": 787500 }, { "epoch": 8.02, "learning_rate": 2.831634906520303e-05, "loss": 0.4542, "step": 787600 }, { "epoch": 8.03, "learning_rate": 2.8309725962881047e-05, "loss": 0.3175, "step": 787700 }, { "epoch": 8.03, "learning_rate": 2.8303102943203373e-05, "loss": 0.4236, "step": 787800 }, { "epoch": 8.03, "learning_rate": 2.829648000649384e-05, "loss": 0.4052, "step": 787900 }, { "epoch": 8.03, "learning_rate": 2.8289857153076286e-05, "loss": 0.4774, "step": 788000 }, { "epoch": 8.03, "learning_rate": 2.8283234383274494e-05, "loss": 0.3554, "step": 788100 }, { "epoch": 8.03, "learning_rate": 2.82766116974123e-05, "loss": 0.4006, "step": 788200 }, { "epoch": 8.03, "learning_rate": 2.826998909581351e-05, "loss": 0.4178, "step": 788300 }, { "epoch": 8.03, "learning_rate": 2.8263366578801924e-05, "loss": 0.3635, "step": 788400 }, { "epoch": 8.03, "learning_rate": 2.8256744146701353e-05, "loss": 0.3836, "step": 788500 }, { "epoch": 8.03, "learning_rate": 2.82501217998356e-05, "loss": 0.3945, "step": 788600 }, { "epoch": 8.04, "learning_rate": 2.8243499538528436e-05, "loss": 0.3058, "step": 788700 }, { "epoch": 8.04, "learning_rate": 2.823687736310366e-05, "loss": 0.3898, "step": 788800 }, { "epoch": 8.04, "learning_rate": 2.823025527388507e-05, "loss": 0.3929, "step": 788900 }, { "epoch": 8.04, "learning_rate": 2.8223633271196425e-05, "loss": 0.4114, "step": 789000 }, { "epoch": 8.04, "learning_rate": 2.8217011355361514e-05, "loss": 0.3514, "step": 789100 }, { "epoch": 8.04, "learning_rate": 2.821038952670412e-05, "loss": 0.366, "step": 789200 }, { "epoch": 8.04, "learning_rate": 2.8203767785547983e-05, "loss": 0.3841, "step": 789300 }, { "epoch": 8.04, "learning_rate": 2.819721234831441e-05, "loss": 0.4348, "step": 789400 }, { "epoch": 8.04, "learning_rate": 2.8190590782249008e-05, "loss": 0.4019, "step": 789500 }, { "epoch": 8.04, "learning_rate": 2.8183969304652928e-05, "loss": 0.4502, "step": 789600 }, { "epoch": 8.05, "learning_rate": 2.8177347915849923e-05, "loss": 0.4055, "step": 789700 }, { "epoch": 8.05, "learning_rate": 2.8170726616163718e-05, "loss": 0.4407, "step": 789800 }, { "epoch": 8.05, "learning_rate": 2.816410540591806e-05, "loss": 0.333, "step": 789900 }, { "epoch": 8.05, "learning_rate": 2.8157484285436705e-05, "loss": 0.3544, "step": 790000 }, { "epoch": 8.05, "learning_rate": 2.8150863255043356e-05, "loss": 0.3477, "step": 790100 }, { "epoch": 8.05, "learning_rate": 2.8144242315061764e-05, "loss": 0.502, "step": 790200 }, { "epoch": 8.05, "learning_rate": 2.8137621465815653e-05, "loss": 0.3864, "step": 790300 }, { "epoch": 8.05, "learning_rate": 2.8131000707628745e-05, "loss": 0.442, "step": 790400 }, { "epoch": 8.05, "learning_rate": 2.8124380040824738e-05, "loss": 0.3383, "step": 790500 }, { "epoch": 8.05, "learning_rate": 2.8117759465727352e-05, "loss": 0.3649, "step": 790600 }, { "epoch": 8.06, "learning_rate": 2.8111138982660306e-05, "loss": 0.3887, "step": 790700 }, { "epoch": 8.06, "learning_rate": 2.8104518591947282e-05, "loss": 0.3882, "step": 790800 }, { "epoch": 8.06, "learning_rate": 2.8097898293911988e-05, "loss": 0.4001, "step": 790900 }, { "epoch": 8.06, "learning_rate": 2.809127808887813e-05, "loss": 0.3981, "step": 791000 }, { "epoch": 8.06, "learning_rate": 2.808465797716937e-05, "loss": 0.31, "step": 791100 }, { "epoch": 8.06, "learning_rate": 2.80780379591094e-05, "loss": 0.3819, "step": 791200 }, { "epoch": 8.06, "learning_rate": 2.8071418035021925e-05, "loss": 0.4158, "step": 791300 }, { "epoch": 8.06, "learning_rate": 2.8064798205230575e-05, "loss": 0.3727, "step": 791400 }, { "epoch": 8.06, "learning_rate": 2.805817847005905e-05, "loss": 0.4263, "step": 791500 }, { "epoch": 8.06, "learning_rate": 2.8051558829831025e-05, "loss": 0.4474, "step": 791600 }, { "epoch": 8.07, "learning_rate": 2.804493928487013e-05, "loss": 0.3959, "step": 791700 }, { "epoch": 8.07, "learning_rate": 2.803831983550004e-05, "loss": 0.3244, "step": 791800 }, { "epoch": 8.07, "learning_rate": 2.8031700482044412e-05, "loss": 0.398, "step": 791900 }, { "epoch": 8.07, "learning_rate": 2.8025081224826866e-05, "loss": 0.3517, "step": 792000 }, { "epoch": 8.07, "learning_rate": 2.801846206417107e-05, "loss": 0.398, "step": 792100 }, { "epoch": 8.07, "learning_rate": 2.8011843000400656e-05, "loss": 0.3343, "step": 792200 }, { "epoch": 8.07, "learning_rate": 2.8005224033839264e-05, "loss": 0.3486, "step": 792300 }, { "epoch": 8.07, "learning_rate": 2.79986051648105e-05, "loss": 0.3729, "step": 792400 }, { "epoch": 8.07, "learning_rate": 2.7992052580864283e-05, "loss": 0.3993, "step": 792500 }, { "epoch": 8.08, "learning_rate": 2.7985433906888265e-05, "loss": 0.4252, "step": 792600 }, { "epoch": 8.08, "learning_rate": 2.7978815331412508e-05, "loss": 0.4147, "step": 792700 }, { "epoch": 8.08, "learning_rate": 2.7972196854760624e-05, "loss": 0.3548, "step": 792800 }, { "epoch": 8.08, "learning_rate": 2.7965578477256206e-05, "loss": 0.4039, "step": 792900 }, { "epoch": 8.08, "learning_rate": 2.7958960199222862e-05, "loss": 0.3318, "step": 793000 }, { "epoch": 8.08, "learning_rate": 2.7952342020984193e-05, "loss": 0.3849, "step": 793100 }, { "epoch": 8.08, "learning_rate": 2.794572394286377e-05, "loss": 0.3709, "step": 793200 }, { "epoch": 8.08, "learning_rate": 2.793910596518519e-05, "loss": 0.3574, "step": 793300 }, { "epoch": 8.08, "learning_rate": 2.793248808827203e-05, "loss": 0.3248, "step": 793400 }, { "epoch": 8.08, "learning_rate": 2.792587031244786e-05, "loss": 0.4135, "step": 793500 }, { "epoch": 8.09, "learning_rate": 2.7919318814277317e-05, "loss": 0.3915, "step": 793600 }, { "epoch": 8.09, "learning_rate": 2.7912701240582877e-05, "loss": 0.4585, "step": 793700 }, { "epoch": 8.09, "learning_rate": 2.790608376894489e-05, "loss": 0.3571, "step": 793800 }, { "epoch": 8.09, "learning_rate": 2.7899466399686923e-05, "loss": 0.3812, "step": 793900 }, { "epoch": 8.09, "learning_rate": 2.78928491331325e-05, "loss": 0.4111, "step": 794000 }, { "epoch": 8.09, "learning_rate": 2.7886231969605183e-05, "loss": 0.3425, "step": 794100 }, { "epoch": 8.09, "learning_rate": 2.787961490942851e-05, "loss": 0.354, "step": 794200 }, { "epoch": 8.09, "learning_rate": 2.7872997952926012e-05, "loss": 0.4097, "step": 794300 }, { "epoch": 8.09, "learning_rate": 2.7866381100421217e-05, "loss": 0.415, "step": 794400 }, { "epoch": 8.09, "learning_rate": 2.785976435223767e-05, "loss": 0.3637, "step": 794500 }, { "epoch": 8.1, "learning_rate": 2.7853147708698855e-05, "loss": 0.3777, "step": 794600 }, { "epoch": 8.1, "learning_rate": 2.7846531170128312e-05, "loss": 0.4248, "step": 794700 }, { "epoch": 8.1, "learning_rate": 2.783991473684955e-05, "loss": 0.4323, "step": 794800 }, { "epoch": 8.1, "learning_rate": 2.7833298409186053e-05, "loss": 0.354, "step": 794900 }, { "epoch": 8.1, "learning_rate": 2.782668218746134e-05, "loss": 0.4063, "step": 795000 }, { "epoch": 8.1, "learning_rate": 2.7820066071998897e-05, "loss": 0.371, "step": 795100 }, { "epoch": 8.1, "learning_rate": 2.7813450063122224e-05, "loss": 0.3024, "step": 795200 }, { "epoch": 8.1, "learning_rate": 2.7806834161154787e-05, "loss": 0.4223, "step": 795300 }, { "epoch": 8.1, "learning_rate": 2.7800218366420068e-05, "loss": 0.4384, "step": 795400 }, { "epoch": 8.1, "learning_rate": 2.779360267924155e-05, "loss": 0.405, "step": 795500 }, { "epoch": 8.11, "learning_rate": 2.7786987099942686e-05, "loss": 0.4108, "step": 795600 }, { "epoch": 8.11, "learning_rate": 2.7780371628846947e-05, "loss": 0.3609, "step": 795700 }, { "epoch": 8.11, "learning_rate": 2.7773756266277805e-05, "loss": 0.402, "step": 795800 }, { "epoch": 8.11, "learning_rate": 2.776714101255868e-05, "loss": 0.3637, "step": 795900 }, { "epoch": 8.11, "learning_rate": 2.7760525868013037e-05, "loss": 0.3973, "step": 796000 }, { "epoch": 8.11, "learning_rate": 2.7753910832964316e-05, "loss": 0.3844, "step": 796100 }, { "epoch": 8.11, "learning_rate": 2.774729590773595e-05, "loss": 0.3676, "step": 796200 }, { "epoch": 8.11, "learning_rate": 2.774068109265137e-05, "loss": 0.3675, "step": 796300 }, { "epoch": 8.11, "learning_rate": 2.7734066388034012e-05, "loss": 0.2973, "step": 796400 }, { "epoch": 8.11, "learning_rate": 2.7727451794207274e-05, "loss": 0.3723, "step": 796500 }, { "epoch": 8.12, "learning_rate": 2.7720837311494583e-05, "loss": 0.4179, "step": 796600 }, { "epoch": 8.12, "learning_rate": 2.771422294021934e-05, "loss": 0.3517, "step": 796700 }, { "epoch": 8.12, "learning_rate": 2.7707608680704968e-05, "loss": 0.3568, "step": 796800 }, { "epoch": 8.12, "learning_rate": 2.770106067419327e-05, "loss": 0.4073, "step": 796900 }, { "epoch": 8.12, "learning_rate": 2.7694446638045118e-05, "loss": 0.3817, "step": 797000 }, { "epoch": 8.12, "learning_rate": 2.768783271462476e-05, "loss": 0.404, "step": 797100 }, { "epoch": 8.12, "learning_rate": 2.768121890425559e-05, "loss": 0.4116, "step": 797200 }, { "epoch": 8.12, "learning_rate": 2.7674605207260978e-05, "loss": 0.367, "step": 797300 }, { "epoch": 8.12, "learning_rate": 2.766799162396429e-05, "loss": 0.3202, "step": 797400 }, { "epoch": 8.13, "learning_rate": 2.7661378154688896e-05, "loss": 0.3981, "step": 797500 }, { "epoch": 8.13, "learning_rate": 2.7654764799758168e-05, "loss": 0.4123, "step": 797600 }, { "epoch": 8.13, "learning_rate": 2.764815155949543e-05, "loss": 0.3657, "step": 797700 }, { "epoch": 8.13, "learning_rate": 2.7641538434224048e-05, "loss": 0.4607, "step": 797800 }, { "epoch": 8.13, "learning_rate": 2.763492542426737e-05, "loss": 0.4206, "step": 797900 }, { "epoch": 8.13, "learning_rate": 2.7628312529948712e-05, "loss": 0.4538, "step": 798000 }, { "epoch": 8.13, "learning_rate": 2.7621699751591424e-05, "loss": 0.3981, "step": 798100 }, { "epoch": 8.13, "learning_rate": 2.7615087089518833e-05, "loss": 0.3815, "step": 798200 }, { "epoch": 8.13, "learning_rate": 2.7608474544054243e-05, "loss": 0.401, "step": 798300 }, { "epoch": 8.13, "learning_rate": 2.7601862115520973e-05, "loss": 0.4987, "step": 798400 }, { "epoch": 8.14, "learning_rate": 2.759524980424234e-05, "loss": 0.3782, "step": 798500 }, { "epoch": 8.14, "learning_rate": 2.758863761054164e-05, "loss": 0.4511, "step": 798600 }, { "epoch": 8.14, "learning_rate": 2.7582025534742162e-05, "loss": 0.4578, "step": 798700 }, { "epoch": 8.14, "learning_rate": 2.7575413577167223e-05, "loss": 0.417, "step": 798800 }, { "epoch": 8.14, "learning_rate": 2.7568801738140078e-05, "loss": 0.3012, "step": 798900 }, { "epoch": 8.14, "learning_rate": 2.7562190017984022e-05, "loss": 0.3443, "step": 799000 }, { "epoch": 8.14, "learning_rate": 2.7555578417022324e-05, "loss": 0.4042, "step": 799100 }, { "epoch": 8.14, "learning_rate": 2.754896693557826e-05, "loss": 0.4254, "step": 799200 }, { "epoch": 8.14, "learning_rate": 2.7542355573975074e-05, "loss": 0.382, "step": 799300 }, { "epoch": 8.14, "learning_rate": 2.753574433253604e-05, "loss": 0.4017, "step": 799400 }, { "epoch": 8.15, "learning_rate": 2.7529133211584413e-05, "loss": 0.3609, "step": 799500 }, { "epoch": 8.15, "learning_rate": 2.7522522211443415e-05, "loss": 0.3628, "step": 799600 }, { "epoch": 8.15, "learning_rate": 2.7515911332436297e-05, "loss": 0.3011, "step": 799700 }, { "epoch": 8.15, "learning_rate": 2.7509300574886297e-05, "loss": 0.3527, "step": 799800 }, { "epoch": 8.15, "learning_rate": 2.7502689939116623e-05, "loss": 0.3675, "step": 799900 }, { "epoch": 8.15, "learning_rate": 2.7496079425450513e-05, "loss": 0.4093, "step": 800000 }, { "epoch": 8.15, "learning_rate": 2.748946903421119e-05, "loss": 0.3257, "step": 800100 }, { "epoch": 8.15, "learning_rate": 2.7482858765721836e-05, "loss": 0.4685, "step": 800200 }, { "epoch": 8.15, "learning_rate": 2.7476248620305664e-05, "loss": 0.3679, "step": 800300 }, { "epoch": 8.15, "learning_rate": 2.7469638598285885e-05, "loss": 0.3379, "step": 800400 }, { "epoch": 8.16, "learning_rate": 2.7463028699985664e-05, "loss": 0.4475, "step": 800500 }, { "epoch": 8.16, "learning_rate": 2.7456418925728206e-05, "loss": 0.421, "step": 800600 }, { "epoch": 8.16, "learning_rate": 2.7449809275836697e-05, "loss": 0.4038, "step": 800700 }, { "epoch": 8.16, "learning_rate": 2.7443199750634278e-05, "loss": 0.3655, "step": 800800 }, { "epoch": 8.16, "learning_rate": 2.7436590350444134e-05, "loss": 0.4012, "step": 800900 }, { "epoch": 8.16, "learning_rate": 2.7429981075589425e-05, "loss": 0.326, "step": 801000 }, { "epoch": 8.16, "learning_rate": 2.742337192639331e-05, "loss": 0.3347, "step": 801100 }, { "epoch": 8.16, "learning_rate": 2.741676290317893e-05, "loss": 0.386, "step": 801200 }, { "epoch": 8.16, "learning_rate": 2.7410154006269424e-05, "loss": 0.4057, "step": 801300 }, { "epoch": 8.16, "learning_rate": 2.7403545235987942e-05, "loss": 0.4109, "step": 801400 }, { "epoch": 8.17, "learning_rate": 2.739700267846142e-05, "loss": 0.4051, "step": 801500 }, { "epoch": 8.17, "learning_rate": 2.7390394161131008e-05, "loss": 0.3354, "step": 801600 }, { "epoch": 8.17, "learning_rate": 2.7383785771394737e-05, "loss": 0.3916, "step": 801700 }, { "epoch": 8.17, "learning_rate": 2.737717750957573e-05, "loss": 0.4673, "step": 801800 }, { "epoch": 8.17, "learning_rate": 2.7370569375997092e-05, "loss": 0.4042, "step": 801900 }, { "epoch": 8.17, "learning_rate": 2.7363961370981912e-05, "loss": 0.376, "step": 802000 }, { "epoch": 8.17, "learning_rate": 2.73573534948533e-05, "loss": 0.4333, "step": 802100 }, { "epoch": 8.17, "learning_rate": 2.7350745747934336e-05, "loss": 0.4299, "step": 802200 }, { "epoch": 8.17, "learning_rate": 2.7344138130548084e-05, "loss": 0.3573, "step": 802300 }, { "epoch": 8.17, "learning_rate": 2.7337530643017627e-05, "loss": 0.3713, "step": 802400 }, { "epoch": 8.18, "learning_rate": 2.7330923285666038e-05, "loss": 0.3894, "step": 802500 }, { "epoch": 8.18, "learning_rate": 2.732431605881637e-05, "loss": 0.3556, "step": 802600 }, { "epoch": 8.18, "learning_rate": 2.7317708962791678e-05, "loss": 0.4571, "step": 802700 }, { "epoch": 8.18, "learning_rate": 2.7311101997915024e-05, "loss": 0.3544, "step": 802800 }, { "epoch": 8.18, "learning_rate": 2.730449516450942e-05, "loss": 0.3864, "step": 802900 }, { "epoch": 8.18, "learning_rate": 2.7297888462897917e-05, "loss": 0.4564, "step": 803000 }, { "epoch": 8.18, "learning_rate": 2.729128189340355e-05, "loss": 0.352, "step": 803100 }, { "epoch": 8.18, "learning_rate": 2.728467545634933e-05, "loss": 0.4731, "step": 803200 }, { "epoch": 8.18, "learning_rate": 2.7278069152058275e-05, "loss": 0.3897, "step": 803300 }, { "epoch": 8.19, "learning_rate": 2.7271462980853393e-05, "loss": 0.3655, "step": 803400 }, { "epoch": 8.19, "learning_rate": 2.72648569430577e-05, "loss": 0.3987, "step": 803500 }, { "epoch": 8.19, "learning_rate": 2.725825103899417e-05, "loss": 0.3869, "step": 803600 }, { "epoch": 8.19, "learning_rate": 2.72516452689858e-05, "loss": 0.4443, "step": 803700 }, { "epoch": 8.19, "learning_rate": 2.7245039633355582e-05, "loss": 0.3532, "step": 803800 }, { "epoch": 8.19, "learning_rate": 2.723843413242648e-05, "loss": 0.3901, "step": 803900 }, { "epoch": 8.19, "learning_rate": 2.723182876652147e-05, "loss": 0.432, "step": 804000 }, { "epoch": 8.19, "learning_rate": 2.7225223535963524e-05, "loss": 0.3776, "step": 804100 }, { "epoch": 8.19, "learning_rate": 2.7218618441075575e-05, "loss": 0.4286, "step": 804200 }, { "epoch": 8.19, "learning_rate": 2.7212013482180585e-05, "loss": 0.4136, "step": 804300 }, { "epoch": 8.2, "learning_rate": 2.7205408659601508e-05, "loss": 0.4645, "step": 804400 }, { "epoch": 8.2, "learning_rate": 2.7198803973661262e-05, "loss": 0.4313, "step": 804500 }, { "epoch": 8.2, "learning_rate": 2.7192199424682785e-05, "loss": 0.4228, "step": 804600 }, { "epoch": 8.2, "learning_rate": 2.7185595012989012e-05, "loss": 0.3905, "step": 804700 }, { "epoch": 8.2, "learning_rate": 2.7178990738902837e-05, "loss": 0.3991, "step": 804800 }, { "epoch": 8.2, "learning_rate": 2.7172386602747174e-05, "loss": 0.4426, "step": 804900 }, { "epoch": 8.2, "learning_rate": 2.7165782604844937e-05, "loss": 0.3756, "step": 805000 }, { "epoch": 8.2, "learning_rate": 2.7159178745519022e-05, "loss": 0.4078, "step": 805100 }, { "epoch": 8.2, "learning_rate": 2.7152641061607973e-05, "loss": 0.4015, "step": 805200 }, { "epoch": 8.2, "learning_rate": 2.7146037479009524e-05, "loss": 0.4056, "step": 805300 }, { "epoch": 8.21, "learning_rate": 2.7139434035952814e-05, "loss": 0.3864, "step": 805400 }, { "epoch": 8.21, "learning_rate": 2.713283073276072e-05, "loss": 0.3798, "step": 805500 }, { "epoch": 8.21, "learning_rate": 2.712622756975608e-05, "loss": 0.42, "step": 805600 }, { "epoch": 8.21, "learning_rate": 2.7119624547261763e-05, "loss": 0.37, "step": 805700 }, { "epoch": 8.21, "learning_rate": 2.7113021665600618e-05, "loss": 0.349, "step": 805800 }, { "epoch": 8.21, "learning_rate": 2.7106418925095498e-05, "loss": 0.385, "step": 805900 }, { "epoch": 8.21, "learning_rate": 2.709981632606921e-05, "loss": 0.3937, "step": 806000 }, { "epoch": 8.21, "learning_rate": 2.7093213868844597e-05, "loss": 0.437, "step": 806100 }, { "epoch": 8.21, "learning_rate": 2.708661155374449e-05, "loss": 0.3217, "step": 806200 }, { "epoch": 8.21, "learning_rate": 2.7080009381091676e-05, "loss": 0.3391, "step": 806300 }, { "epoch": 8.22, "learning_rate": 2.7073407351208984e-05, "loss": 0.3901, "step": 806400 }, { "epoch": 8.22, "learning_rate": 2.706680546441922e-05, "loss": 0.4509, "step": 806500 }, { "epoch": 8.22, "learning_rate": 2.7060203721045152e-05, "loss": 0.306, "step": 806600 }, { "epoch": 8.22, "learning_rate": 2.7053602121409573e-05, "loss": 0.3588, "step": 806700 }, { "epoch": 8.22, "learning_rate": 2.704700066583528e-05, "loss": 0.4371, "step": 806800 }, { "epoch": 8.22, "learning_rate": 2.704039935464502e-05, "loss": 0.4054, "step": 806900 }, { "epoch": 8.22, "learning_rate": 2.7033798188161572e-05, "loss": 0.4252, "step": 807000 }, { "epoch": 8.22, "learning_rate": 2.7027197166707708e-05, "loss": 0.3887, "step": 807100 }, { "epoch": 8.22, "learning_rate": 2.7020596290606144e-05, "loss": 0.3881, "step": 807200 }, { "epoch": 8.22, "learning_rate": 2.701399556017964e-05, "loss": 0.4711, "step": 807300 }, { "epoch": 8.23, "learning_rate": 2.700739497575094e-05, "loss": 0.4083, "step": 807400 }, { "epoch": 8.23, "learning_rate": 2.700079453764277e-05, "loss": 0.319, "step": 807500 }, { "epoch": 8.23, "learning_rate": 2.699419424617784e-05, "loss": 0.3657, "step": 807600 }, { "epoch": 8.23, "learning_rate": 2.6987594101678874e-05, "loss": 0.3477, "step": 807700 }, { "epoch": 8.23, "learning_rate": 2.6980994104468597e-05, "loss": 0.3729, "step": 807800 }, { "epoch": 8.23, "learning_rate": 2.6974460252633937e-05, "loss": 0.3645, "step": 807900 }, { "epoch": 8.23, "learning_rate": 2.696786054948814e-05, "loss": 0.4134, "step": 808000 }, { "epoch": 8.23, "learning_rate": 2.696126099459587e-05, "loss": 0.3765, "step": 808100 }, { "epoch": 8.23, "learning_rate": 2.6954727581606454e-05, "loss": 0.4042, "step": 808200 }, { "epoch": 8.24, "learning_rate": 2.6948128322698676e-05, "loss": 0.412, "step": 808300 }, { "epoch": 8.24, "learning_rate": 2.6941529213009222e-05, "loss": 0.3908, "step": 808400 }, { "epoch": 8.24, "learning_rate": 2.6934930252860725e-05, "loss": 0.4179, "step": 808500 }, { "epoch": 8.24, "learning_rate": 2.692833144257585e-05, "loss": 0.4404, "step": 808600 }, { "epoch": 8.24, "learning_rate": 2.692173278247723e-05, "loss": 0.3944, "step": 808700 }, { "epoch": 8.24, "learning_rate": 2.691513427288752e-05, "loss": 0.3863, "step": 808800 }, { "epoch": 8.24, "learning_rate": 2.6908535914129325e-05, "loss": 0.4, "step": 808900 }, { "epoch": 8.24, "learning_rate": 2.6901937706525277e-05, "loss": 0.365, "step": 809000 }, { "epoch": 8.24, "learning_rate": 2.6895339650398e-05, "loss": 0.4313, "step": 809100 }, { "epoch": 8.24, "learning_rate": 2.6888741746070074e-05, "loss": 0.3198, "step": 809200 }, { "epoch": 8.25, "learning_rate": 2.6882143993864105e-05, "loss": 0.3701, "step": 809300 }, { "epoch": 8.25, "learning_rate": 2.6875546394102705e-05, "loss": 0.4001, "step": 809400 }, { "epoch": 8.25, "learning_rate": 2.6868948947108417e-05, "loss": 0.3048, "step": 809500 }, { "epoch": 8.25, "learning_rate": 2.686235165320385e-05, "loss": 0.3634, "step": 809600 }, { "epoch": 8.25, "learning_rate": 2.685575451271158e-05, "loss": 0.4628, "step": 809700 }, { "epoch": 8.25, "learning_rate": 2.6849157525954134e-05, "loss": 0.3683, "step": 809800 }, { "epoch": 8.25, "learning_rate": 2.684256069325408e-05, "loss": 0.3695, "step": 809900 }, { "epoch": 8.25, "learning_rate": 2.683596401493398e-05, "loss": 0.3536, "step": 810000 }, { "epoch": 8.25, "learning_rate": 2.6829367491316336e-05, "loss": 0.3876, "step": 810100 }, { "epoch": 8.25, "learning_rate": 2.682277112272371e-05, "loss": 0.4164, "step": 810200 }, { "epoch": 8.26, "learning_rate": 2.6816174909478626e-05, "loss": 0.3812, "step": 810300 }, { "epoch": 8.26, "learning_rate": 2.6809578851903574e-05, "loss": 0.3931, "step": 810400 }, { "epoch": 8.26, "learning_rate": 2.680298295032108e-05, "loss": 0.4033, "step": 810500 }, { "epoch": 8.26, "learning_rate": 2.679638720505364e-05, "loss": 0.3362, "step": 810600 }, { "epoch": 8.26, "learning_rate": 2.6789791616423756e-05, "loss": 0.374, "step": 810700 }, { "epoch": 8.26, "learning_rate": 2.67831961847539e-05, "loss": 0.4119, "step": 810800 }, { "epoch": 8.26, "learning_rate": 2.6776600910366546e-05, "loss": 0.5145, "step": 810900 }, { "epoch": 8.26, "learning_rate": 2.677000579358419e-05, "loss": 0.3856, "step": 811000 }, { "epoch": 8.26, "learning_rate": 2.6763410834729258e-05, "loss": 0.4167, "step": 811100 }, { "epoch": 8.26, "learning_rate": 2.6756816034124225e-05, "loss": 0.4414, "step": 811200 }, { "epoch": 8.27, "learning_rate": 2.6750221392091545e-05, "loss": 0.4286, "step": 811300 }, { "epoch": 8.27, "learning_rate": 2.6743626908953624e-05, "loss": 0.4037, "step": 811400 }, { "epoch": 8.27, "learning_rate": 2.6737032585032923e-05, "loss": 0.4283, "step": 811500 }, { "epoch": 8.27, "learning_rate": 2.673043842065187e-05, "loss": 0.3777, "step": 811600 }, { "epoch": 8.27, "learning_rate": 2.6723844416132854e-05, "loss": 0.3557, "step": 811700 }, { "epoch": 8.27, "learning_rate": 2.671731650944767e-05, "loss": 0.381, "step": 811800 }, { "epoch": 8.27, "learning_rate": 2.67107228240133e-05, "loss": 0.342, "step": 811900 }, { "epoch": 8.27, "learning_rate": 2.6704129299404957e-05, "loss": 0.3632, "step": 812000 }, { "epoch": 8.27, "learning_rate": 2.669753593594503e-05, "loss": 0.3779, "step": 812100 }, { "epoch": 8.27, "learning_rate": 2.669094273395588e-05, "loss": 0.3675, "step": 812200 }, { "epoch": 8.28, "learning_rate": 2.668434969375988e-05, "loss": 0.4647, "step": 812300 }, { "epoch": 8.28, "learning_rate": 2.667775681567941e-05, "loss": 0.3769, "step": 812400 }, { "epoch": 8.28, "learning_rate": 2.667116410003679e-05, "loss": 0.3933, "step": 812500 }, { "epoch": 8.28, "learning_rate": 2.666457154715438e-05, "loss": 0.3458, "step": 812600 }, { "epoch": 8.28, "learning_rate": 2.6657979157354524e-05, "loss": 0.3704, "step": 812700 }, { "epoch": 8.28, "learning_rate": 2.6651386930959535e-05, "loss": 0.445, "step": 812800 }, { "epoch": 8.28, "learning_rate": 2.6644794868291743e-05, "loss": 0.4052, "step": 812900 }, { "epoch": 8.28, "learning_rate": 2.663820296967346e-05, "loss": 0.4391, "step": 813000 }, { "epoch": 8.28, "learning_rate": 2.6631611235427e-05, "loss": 0.4144, "step": 813100 }, { "epoch": 8.29, "learning_rate": 2.662501966587464e-05, "loss": 0.3706, "step": 813200 }, { "epoch": 8.29, "learning_rate": 2.6618428261338676e-05, "loss": 0.4063, "step": 813300 }, { "epoch": 8.29, "learning_rate": 2.6611837022141397e-05, "loss": 0.3801, "step": 813400 }, { "epoch": 8.29, "learning_rate": 2.660524594860506e-05, "loss": 0.3702, "step": 813500 }, { "epoch": 8.29, "learning_rate": 2.6598655041051937e-05, "loss": 0.3654, "step": 813600 }, { "epoch": 8.29, "learning_rate": 2.6592064299804297e-05, "loss": 0.4254, "step": 813700 }, { "epoch": 8.29, "learning_rate": 2.6585473725184365e-05, "loss": 0.3777, "step": 813800 }, { "epoch": 8.29, "learning_rate": 2.6578949220763635e-05, "loss": 0.4033, "step": 813900 }, { "epoch": 8.29, "learning_rate": 2.6572358978691532e-05, "loss": 0.3838, "step": 814000 }, { "epoch": 8.29, "learning_rate": 2.656576890421062e-05, "loss": 0.3772, "step": 814100 }, { "epoch": 8.3, "learning_rate": 2.655917899764312e-05, "loss": 0.3294, "step": 814200 }, { "epoch": 8.3, "learning_rate": 2.6552589259311226e-05, "loss": 0.3966, "step": 814300 }, { "epoch": 8.3, "learning_rate": 2.6545999689537146e-05, "loss": 0.3856, "step": 814400 }, { "epoch": 8.3, "learning_rate": 2.653941028864308e-05, "loss": 0.398, "step": 814500 }, { "epoch": 8.3, "learning_rate": 2.653282105695119e-05, "loss": 0.4041, "step": 814600 }, { "epoch": 8.3, "learning_rate": 2.6526231994783654e-05, "loss": 0.3772, "step": 814700 }, { "epoch": 8.3, "learning_rate": 2.6519643102462655e-05, "loss": 0.4001, "step": 814800 }, { "epoch": 8.3, "learning_rate": 2.6513054380310325e-05, "loss": 0.2917, "step": 814900 }, { "epoch": 8.3, "learning_rate": 2.6506465828648834e-05, "loss": 0.3559, "step": 815000 }, { "epoch": 8.3, "learning_rate": 2.6499877447800313e-05, "loss": 0.4071, "step": 815100 }, { "epoch": 8.31, "learning_rate": 2.649328923808691e-05, "loss": 0.4038, "step": 815200 }, { "epoch": 8.31, "learning_rate": 2.6486701199830723e-05, "loss": 0.3452, "step": 815300 }, { "epoch": 8.31, "learning_rate": 2.6480113333353882e-05, "loss": 0.3623, "step": 815400 }, { "epoch": 8.31, "learning_rate": 2.64735256389785e-05, "loss": 0.3731, "step": 815500 }, { "epoch": 8.31, "learning_rate": 2.646693811702666e-05, "loss": 0.3631, "step": 815600 }, { "epoch": 8.31, "learning_rate": 2.6460350767820464e-05, "loss": 0.363, "step": 815700 }, { "epoch": 8.31, "learning_rate": 2.6453763591682007e-05, "loss": 0.363, "step": 815800 }, { "epoch": 8.31, "learning_rate": 2.6447176588933332e-05, "loss": 0.3863, "step": 815900 }, { "epoch": 8.31, "learning_rate": 2.644058975989652e-05, "loss": 0.3876, "step": 816000 }, { "epoch": 8.31, "learning_rate": 2.6434003104893632e-05, "loss": 0.418, "step": 816100 }, { "epoch": 8.32, "learning_rate": 2.6427416624246705e-05, "loss": 0.3698, "step": 816200 }, { "epoch": 8.32, "learning_rate": 2.6420830318277788e-05, "loss": 0.3431, "step": 816300 }, { "epoch": 8.32, "learning_rate": 2.641424418730892e-05, "loss": 0.3325, "step": 816400 }, { "epoch": 8.32, "learning_rate": 2.64076582316621e-05, "loss": 0.4131, "step": 816500 }, { "epoch": 8.32, "learning_rate": 2.6401072451659357e-05, "loss": 0.3672, "step": 816600 }, { "epoch": 8.32, "learning_rate": 2.63944868476227e-05, "loss": 0.4261, "step": 816700 }, { "epoch": 8.32, "learning_rate": 2.638790141987411e-05, "loss": 0.4085, "step": 816800 }, { "epoch": 8.32, "learning_rate": 2.6381316168735585e-05, "loss": 0.3531, "step": 816900 }, { "epoch": 8.32, "learning_rate": 2.6374731094529105e-05, "loss": 0.411, "step": 817000 }, { "epoch": 8.32, "learning_rate": 2.6368146197576657e-05, "loss": 0.359, "step": 817100 }, { "epoch": 8.33, "learning_rate": 2.6361561478200167e-05, "loss": 0.3749, "step": 817200 }, { "epoch": 8.33, "learning_rate": 2.635497693672161e-05, "loss": 0.3829, "step": 817300 }, { "epoch": 8.33, "learning_rate": 2.6348392573462933e-05, "loss": 0.3949, "step": 817400 }, { "epoch": 8.33, "learning_rate": 2.6341808388746064e-05, "loss": 0.4639, "step": 817500 }, { "epoch": 8.33, "learning_rate": 2.6335224382892933e-05, "loss": 0.397, "step": 817600 }, { "epoch": 8.33, "learning_rate": 2.6328640556225472e-05, "loss": 0.4063, "step": 817700 }, { "epoch": 8.33, "learning_rate": 2.6322056909065567e-05, "loss": 0.4633, "step": 817800 }, { "epoch": 8.33, "learning_rate": 2.631547344173513e-05, "loss": 0.4132, "step": 817900 }, { "epoch": 8.33, "learning_rate": 2.6308890154556056e-05, "loss": 0.4191, "step": 818000 }, { "epoch": 8.33, "learning_rate": 2.6302307047850222e-05, "loss": 0.4026, "step": 818100 }, { "epoch": 8.34, "learning_rate": 2.629572412193951e-05, "loss": 0.3468, "step": 818200 }, { "epoch": 8.34, "learning_rate": 2.6289141377145795e-05, "loss": 0.3166, "step": 818300 }, { "epoch": 8.34, "learning_rate": 2.6282558813790905e-05, "loss": 0.4374, "step": 818400 }, { "epoch": 8.34, "learning_rate": 2.6275976432196703e-05, "loss": 0.4317, "step": 818500 }, { "epoch": 8.34, "learning_rate": 2.6269394232685033e-05, "loss": 0.3569, "step": 818600 }, { "epoch": 8.34, "learning_rate": 2.6262812215577727e-05, "loss": 0.3371, "step": 818700 }, { "epoch": 8.34, "learning_rate": 2.6256230381196597e-05, "loss": 0.3212, "step": 818800 }, { "epoch": 8.34, "learning_rate": 2.6249648729863457e-05, "loss": 0.3947, "step": 818900 }, { "epoch": 8.34, "learning_rate": 2.6243067261900127e-05, "loss": 0.41, "step": 819000 }, { "epoch": 8.35, "learning_rate": 2.6236485977628376e-05, "loss": 0.4051, "step": 819100 }, { "epoch": 8.35, "learning_rate": 2.622990487737e-05, "loss": 0.4169, "step": 819200 }, { "epoch": 8.35, "learning_rate": 2.6223323961446783e-05, "loss": 0.3598, "step": 819300 }, { "epoch": 8.35, "learning_rate": 2.621674323018048e-05, "loss": 0.3422, "step": 819400 }, { "epoch": 8.35, "learning_rate": 2.6210162683892855e-05, "loss": 0.3323, "step": 819500 }, { "epoch": 8.35, "learning_rate": 2.6203582322905675e-05, "loss": 0.4301, "step": 819600 }, { "epoch": 8.35, "learning_rate": 2.6197002147540646e-05, "loss": 0.3941, "step": 819700 }, { "epoch": 8.35, "learning_rate": 2.619042215811952e-05, "loss": 0.3936, "step": 819800 }, { "epoch": 8.35, "learning_rate": 2.618384235496402e-05, "loss": 0.3335, "step": 819900 }, { "epoch": 8.35, "learning_rate": 2.617732853363686e-05, "loss": 0.4094, "step": 820000 }, { "epoch": 8.36, "learning_rate": 2.617074910210706e-05, "loss": 0.3468, "step": 820100 }, { "epoch": 8.36, "learning_rate": 2.6164169857804765e-05, "loss": 0.3371, "step": 820200 }, { "epoch": 8.36, "learning_rate": 2.615765659068978e-05, "loss": 0.4077, "step": 820300 }, { "epoch": 8.36, "learning_rate": 2.6151077719927266e-05, "loss": 0.3025, "step": 820400 }, { "epoch": 8.36, "learning_rate": 2.6144499037354083e-05, "loss": 0.4115, "step": 820500 }, { "epoch": 8.36, "learning_rate": 2.6137920543291893e-05, "loss": 0.3197, "step": 820600 }, { "epoch": 8.36, "learning_rate": 2.6131342238062333e-05, "loss": 0.382, "step": 820700 }, { "epoch": 8.36, "learning_rate": 2.612476412198705e-05, "loss": 0.4047, "step": 820800 }, { "epoch": 8.36, "learning_rate": 2.6118186195387694e-05, "loss": 0.3783, "step": 820900 }, { "epoch": 8.36, "learning_rate": 2.611160845858585e-05, "loss": 0.3644, "step": 821000 }, { "epoch": 8.37, "learning_rate": 2.610503091190315e-05, "loss": 0.3731, "step": 821100 }, { "epoch": 8.37, "learning_rate": 2.60984535556612e-05, "loss": 0.438, "step": 821200 }, { "epoch": 8.37, "learning_rate": 2.609187639018158e-05, "loss": 0.3298, "step": 821300 }, { "epoch": 8.37, "learning_rate": 2.6085299415785885e-05, "loss": 0.3654, "step": 821400 }, { "epoch": 8.37, "learning_rate": 2.6078722632795694e-05, "loss": 0.4221, "step": 821500 }, { "epoch": 8.37, "learning_rate": 2.6072146041532556e-05, "loss": 0.3866, "step": 821600 }, { "epoch": 8.37, "learning_rate": 2.606556964231804e-05, "loss": 0.4139, "step": 821700 }, { "epoch": 8.37, "learning_rate": 2.6058993435473685e-05, "loss": 0.4018, "step": 821800 }, { "epoch": 8.37, "learning_rate": 2.6052417421321037e-05, "loss": 0.3903, "step": 821900 }, { "epoch": 8.37, "learning_rate": 2.6045841600181616e-05, "loss": 0.3648, "step": 822000 }, { "epoch": 8.38, "learning_rate": 2.6039265972376945e-05, "loss": 0.3833, "step": 822100 }, { "epoch": 8.38, "learning_rate": 2.603269053822854e-05, "loss": 0.3732, "step": 822200 }, { "epoch": 8.38, "learning_rate": 2.6026115298057883e-05, "loss": 0.4303, "step": 822300 }, { "epoch": 8.38, "learning_rate": 2.601954025218647e-05, "loss": 0.4423, "step": 822400 }, { "epoch": 8.38, "learning_rate": 2.6012965400935792e-05, "loss": 0.392, "step": 822500 }, { "epoch": 8.38, "learning_rate": 2.600639074462731e-05, "loss": 0.4075, "step": 822600 }, { "epoch": 8.38, "learning_rate": 2.5999816283582482e-05, "loss": 0.3736, "step": 822700 }, { "epoch": 8.38, "learning_rate": 2.5993242018122777e-05, "loss": 0.3871, "step": 822800 }, { "epoch": 8.38, "learning_rate": 2.5986667948569622e-05, "loss": 0.3345, "step": 822900 }, { "epoch": 8.38, "learning_rate": 2.598009407524445e-05, "loss": 0.3735, "step": 823000 }, { "epoch": 8.39, "learning_rate": 2.5973520398468692e-05, "loss": 0.4066, "step": 823100 }, { "epoch": 8.39, "learning_rate": 2.596694691856375e-05, "loss": 0.3382, "step": 823200 }, { "epoch": 8.39, "learning_rate": 2.596037363585104e-05, "loss": 0.3773, "step": 823300 }, { "epoch": 8.39, "learning_rate": 2.5953800550651954e-05, "loss": 0.2968, "step": 823400 }, { "epoch": 8.39, "learning_rate": 2.5947227663287884e-05, "loss": 0.4196, "step": 823500 }, { "epoch": 8.39, "learning_rate": 2.5940654974080183e-05, "loss": 0.4093, "step": 823600 }, { "epoch": 8.39, "learning_rate": 2.5934082483350227e-05, "loss": 0.4187, "step": 823700 }, { "epoch": 8.39, "learning_rate": 2.5927510191419386e-05, "loss": 0.3528, "step": 823800 }, { "epoch": 8.39, "learning_rate": 2.5920938098608982e-05, "loss": 0.4095, "step": 823900 }, { "epoch": 8.4, "learning_rate": 2.5914366205240363e-05, "loss": 0.4034, "step": 824000 }, { "epoch": 8.4, "learning_rate": 2.590779451163487e-05, "loss": 0.4511, "step": 824100 }, { "epoch": 8.4, "learning_rate": 2.5901223018113784e-05, "loss": 0.374, "step": 824200 }, { "epoch": 8.4, "learning_rate": 2.5894651724998438e-05, "loss": 0.4259, "step": 824300 }, { "epoch": 8.4, "learning_rate": 2.588808063261013e-05, "loss": 0.3929, "step": 824400 }, { "epoch": 8.4, "learning_rate": 2.5881509741270134e-05, "loss": 0.3305, "step": 824500 }, { "epoch": 8.4, "learning_rate": 2.5874939051299732e-05, "loss": 0.359, "step": 824600 }, { "epoch": 8.4, "learning_rate": 2.5868368563020205e-05, "loss": 0.3497, "step": 824700 }, { "epoch": 8.4, "learning_rate": 2.586179827675279e-05, "loss": 0.4083, "step": 824800 }, { "epoch": 8.4, "learning_rate": 2.5855228192818742e-05, "loss": 0.356, "step": 824900 }, { "epoch": 8.41, "learning_rate": 2.5848658311539305e-05, "loss": 0.3516, "step": 825000 }, { "epoch": 8.41, "learning_rate": 2.58420886332357e-05, "loss": 0.3894, "step": 825100 }, { "epoch": 8.41, "learning_rate": 2.583565054573489e-05, "loss": 0.3765, "step": 825200 }, { "epoch": 8.41, "learning_rate": 2.5829081270271084e-05, "loss": 0.4601, "step": 825300 }, { "epoch": 8.41, "learning_rate": 2.5822512198740325e-05, "loss": 0.3807, "step": 825400 }, { "epoch": 8.41, "learning_rate": 2.5815943331463778e-05, "loss": 0.4226, "step": 825500 }, { "epoch": 8.41, "learning_rate": 2.5809374668762632e-05, "loss": 0.3577, "step": 825600 }, { "epoch": 8.41, "learning_rate": 2.5802806210958065e-05, "loss": 0.3851, "step": 825700 }, { "epoch": 8.41, "learning_rate": 2.5796237958371217e-05, "loss": 0.4374, "step": 825800 }, { "epoch": 8.41, "learning_rate": 2.5789669911323246e-05, "loss": 0.3843, "step": 825900 }, { "epoch": 8.42, "learning_rate": 2.578316774752712e-05, "loss": 0.4252, "step": 826000 }, { "epoch": 8.42, "learning_rate": 2.5776600110456908e-05, "loss": 0.4374, "step": 826100 }, { "epoch": 8.42, "learning_rate": 2.5770032679885757e-05, "loss": 0.4129, "step": 826200 }, { "epoch": 8.42, "learning_rate": 2.576346545613475e-05, "loss": 0.4307, "step": 826300 }, { "epoch": 8.42, "learning_rate": 2.5756898439525005e-05, "loss": 0.4115, "step": 826400 }, { "epoch": 8.42, "learning_rate": 2.5750331630377618e-05, "loss": 0.3378, "step": 826500 }, { "epoch": 8.42, "learning_rate": 2.5743765029013643e-05, "loss": 0.3292, "step": 826600 }, { "epoch": 8.42, "learning_rate": 2.5737198635754154e-05, "loss": 0.3697, "step": 826700 }, { "epoch": 8.42, "learning_rate": 2.5730632450920227e-05, "loss": 0.4742, "step": 826800 }, { "epoch": 8.42, "learning_rate": 2.5724066474832877e-05, "loss": 0.3555, "step": 826900 }, { "epoch": 8.43, "learning_rate": 2.5717500707813164e-05, "loss": 0.3648, "step": 827000 }, { "epoch": 8.43, "learning_rate": 2.571093515018211e-05, "loss": 0.3602, "step": 827100 }, { "epoch": 8.43, "learning_rate": 2.5704369802260743e-05, "loss": 0.3672, "step": 827200 }, { "epoch": 8.43, "learning_rate": 2.5697804664370046e-05, "loss": 0.4047, "step": 827300 }, { "epoch": 8.43, "learning_rate": 2.5691239736831026e-05, "loss": 0.378, "step": 827400 }, { "epoch": 8.43, "learning_rate": 2.5684675019964678e-05, "loss": 0.3654, "step": 827500 }, { "epoch": 8.43, "learning_rate": 2.567811051409196e-05, "loss": 0.3638, "step": 827600 }, { "epoch": 8.43, "learning_rate": 2.567154621953385e-05, "loss": 0.362, "step": 827700 }, { "epoch": 8.43, "learning_rate": 2.5664982136611307e-05, "loss": 0.3511, "step": 827800 }, { "epoch": 8.43, "learning_rate": 2.5658418265645257e-05, "loss": 0.3793, "step": 827900 }, { "epoch": 8.44, "learning_rate": 2.5651854606956646e-05, "loss": 0.3702, "step": 828000 }, { "epoch": 8.44, "learning_rate": 2.564529116086641e-05, "loss": 0.4241, "step": 828100 }, { "epoch": 8.44, "learning_rate": 2.5638727927695423e-05, "loss": 0.4239, "step": 828200 }, { "epoch": 8.44, "learning_rate": 2.5632164907764627e-05, "loss": 0.3703, "step": 828300 }, { "epoch": 8.44, "learning_rate": 2.5625602101394912e-05, "loss": 0.3697, "step": 828400 }, { "epoch": 8.44, "learning_rate": 2.5619039508907132e-05, "loss": 0.3987, "step": 828500 }, { "epoch": 8.44, "learning_rate": 2.561254275334368e-05, "loss": 0.4274, "step": 828600 }, { "epoch": 8.44, "learning_rate": 2.5605980587435584e-05, "loss": 0.3799, "step": 828700 }, { "epoch": 8.44, "learning_rate": 2.559941863636882e-05, "loss": 0.4027, "step": 828800 }, { "epoch": 8.44, "learning_rate": 2.559285690046423e-05, "loss": 0.4281, "step": 828900 }, { "epoch": 8.45, "learning_rate": 2.5586295380042625e-05, "loss": 0.415, "step": 829000 }, { "epoch": 8.45, "learning_rate": 2.557973407542484e-05, "loss": 0.3991, "step": 829100 }, { "epoch": 8.45, "learning_rate": 2.5573172986931695e-05, "loss": 0.3228, "step": 829200 }, { "epoch": 8.45, "learning_rate": 2.556661211488396e-05, "loss": 0.3241, "step": 829300 }, { "epoch": 8.45, "learning_rate": 2.5560051459602434e-05, "loss": 0.4066, "step": 829400 }, { "epoch": 8.45, "learning_rate": 2.5553491021407894e-05, "loss": 0.4635, "step": 829500 }, { "epoch": 8.45, "learning_rate": 2.5546930800621115e-05, "loss": 0.3895, "step": 829600 }, { "epoch": 8.45, "learning_rate": 2.554037079756284e-05, "loss": 0.419, "step": 829700 }, { "epoch": 8.45, "learning_rate": 2.5533811012553817e-05, "loss": 0.3383, "step": 829800 }, { "epoch": 8.46, "learning_rate": 2.5527251445914798e-05, "loss": 0.3552, "step": 829900 }, { "epoch": 8.46, "learning_rate": 2.5520692097966475e-05, "loss": 0.3996, "step": 830000 }, { "epoch": 8.46, "learning_rate": 2.5514132969029582e-05, "loss": 0.4659, "step": 830100 }, { "epoch": 8.46, "learning_rate": 2.5507574059424818e-05, "loss": 0.3715, "step": 830200 }, { "epoch": 8.46, "learning_rate": 2.550101536947287e-05, "loss": 0.3717, "step": 830300 }, { "epoch": 8.46, "learning_rate": 2.549445689949442e-05, "loss": 0.3539, "step": 830400 }, { "epoch": 8.46, "learning_rate": 2.5487898649810157e-05, "loss": 0.4722, "step": 830500 }, { "epoch": 8.46, "learning_rate": 2.5481340620740705e-05, "loss": 0.4086, "step": 830600 }, { "epoch": 8.46, "learning_rate": 2.5474782812606733e-05, "loss": 0.3772, "step": 830700 }, { "epoch": 8.46, "learning_rate": 2.5468225225728883e-05, "loss": 0.3444, "step": 830800 }, { "epoch": 8.47, "learning_rate": 2.5461667860427767e-05, "loss": 0.375, "step": 830900 }, { "epoch": 8.47, "learning_rate": 2.5455110717024015e-05, "loss": 0.3737, "step": 831000 }, { "epoch": 8.47, "learning_rate": 2.5448553795838232e-05, "loss": 0.3984, "step": 831100 }, { "epoch": 8.47, "learning_rate": 2.5441997097191e-05, "loss": 0.4175, "step": 831200 }, { "epoch": 8.47, "learning_rate": 2.5435440621402906e-05, "loss": 0.3895, "step": 831300 }, { "epoch": 8.47, "learning_rate": 2.5428884368794526e-05, "loss": 0.354, "step": 831400 }, { "epoch": 8.47, "learning_rate": 2.5422328339686428e-05, "loss": 0.3669, "step": 831500 }, { "epoch": 8.47, "learning_rate": 2.5415772534399146e-05, "loss": 0.4207, "step": 831600 }, { "epoch": 8.47, "learning_rate": 2.5409216953253235e-05, "loss": 0.3863, "step": 831700 }, { "epoch": 8.47, "learning_rate": 2.540266159656923e-05, "loss": 0.407, "step": 831800 }, { "epoch": 8.48, "learning_rate": 2.5396106464667627e-05, "loss": 0.4362, "step": 831900 }, { "epoch": 8.48, "learning_rate": 2.538955155786894e-05, "loss": 0.3383, "step": 832000 }, { "epoch": 8.48, "learning_rate": 2.5382996876493675e-05, "loss": 0.448, "step": 832100 }, { "epoch": 8.48, "learning_rate": 2.5376442420862304e-05, "loss": 0.3632, "step": 832200 }, { "epoch": 8.48, "learning_rate": 2.5369888191295306e-05, "loss": 0.4123, "step": 832300 }, { "epoch": 8.48, "learning_rate": 2.5363334188113158e-05, "loss": 0.3583, "step": 832400 }, { "epoch": 8.48, "learning_rate": 2.5356780411636284e-05, "loss": 0.4055, "step": 832500 }, { "epoch": 8.48, "learning_rate": 2.5350226862185138e-05, "loss": 0.3471, "step": 832600 }, { "epoch": 8.48, "learning_rate": 2.534367354008016e-05, "loss": 0.35, "step": 832700 }, { "epoch": 8.48, "learning_rate": 2.533712044564175e-05, "loss": 0.4121, "step": 832800 }, { "epoch": 8.49, "learning_rate": 2.5330633106725255e-05, "loss": 0.3793, "step": 832900 }, { "epoch": 8.49, "learning_rate": 2.532408046629655e-05, "loss": 0.3512, "step": 833000 }, { "epoch": 8.49, "learning_rate": 2.5317528054492404e-05, "loss": 0.3417, "step": 833100 }, { "epoch": 8.49, "learning_rate": 2.5310975871633207e-05, "loss": 0.399, "step": 833200 }, { "epoch": 8.49, "learning_rate": 2.530442391803929e-05, "loss": 0.4513, "step": 833300 }, { "epoch": 8.49, "learning_rate": 2.5297872194031024e-05, "loss": 0.3194, "step": 833400 }, { "epoch": 8.49, "learning_rate": 2.5291320699928757e-05, "loss": 0.3695, "step": 833500 }, { "epoch": 8.49, "learning_rate": 2.5284769436052796e-05, "loss": 0.4037, "step": 833600 }, { "epoch": 8.49, "learning_rate": 2.5278218402723476e-05, "loss": 0.3444, "step": 833700 }, { "epoch": 8.49, "learning_rate": 2.5271667600261096e-05, "loss": 0.3373, "step": 833800 }, { "epoch": 8.5, "learning_rate": 2.526511702898596e-05, "loss": 0.3818, "step": 833900 }, { "epoch": 8.5, "learning_rate": 2.5258566689218342e-05, "loss": 0.3999, "step": 834000 }, { "epoch": 8.5, "learning_rate": 2.5252016581278507e-05, "loss": 0.411, "step": 834100 }, { "epoch": 8.5, "learning_rate": 2.5245466705486743e-05, "loss": 0.3549, "step": 834200 }, { "epoch": 8.5, "learning_rate": 2.523891706216327e-05, "loss": 0.344, "step": 834300 }, { "epoch": 8.5, "learning_rate": 2.5232367651628344e-05, "loss": 0.4131, "step": 834400 }, { "epoch": 8.5, "learning_rate": 2.52258184742022e-05, "loss": 0.3825, "step": 834500 }, { "epoch": 8.5, "learning_rate": 2.521926953020503e-05, "loss": 0.419, "step": 834600 }, { "epoch": 8.5, "learning_rate": 2.5212720819957046e-05, "loss": 0.4012, "step": 834700 }, { "epoch": 8.51, "learning_rate": 2.5206172343778455e-05, "loss": 0.4012, "step": 834800 }, { "epoch": 8.51, "learning_rate": 2.5199624101989422e-05, "loss": 0.4201, "step": 834900 }, { "epoch": 8.51, "learning_rate": 2.5193076094910122e-05, "loss": 0.3052, "step": 835000 }, { "epoch": 8.51, "learning_rate": 2.5186528322860726e-05, "loss": 0.3954, "step": 835100 }, { "epoch": 8.51, "learning_rate": 2.517998078616136e-05, "loss": 0.353, "step": 835200 }, { "epoch": 8.51, "learning_rate": 2.517343348513217e-05, "loss": 0.3584, "step": 835300 }, { "epoch": 8.51, "learning_rate": 2.516695188957446e-05, "loss": 0.3826, "step": 835400 }, { "epoch": 8.51, "learning_rate": 2.516040505848129e-05, "loss": 0.3807, "step": 835500 }, { "epoch": 8.51, "learning_rate": 2.5153858464015445e-05, "loss": 0.3492, "step": 835600 }, { "epoch": 8.51, "learning_rate": 2.5147312106496987e-05, "loss": 0.343, "step": 835700 }, { "epoch": 8.52, "learning_rate": 2.514076598624601e-05, "loss": 0.4204, "step": 835800 }, { "epoch": 8.52, "learning_rate": 2.5134220103582574e-05, "loss": 0.398, "step": 835900 }, { "epoch": 8.52, "learning_rate": 2.5127674458826745e-05, "loss": 0.36, "step": 836000 }, { "epoch": 8.52, "learning_rate": 2.5121194505183565e-05, "loss": 0.3538, "step": 836100 }, { "epoch": 8.52, "learning_rate": 2.5114649334815982e-05, "loss": 0.3591, "step": 836200 }, { "epoch": 8.52, "learning_rate": 2.5108104403312897e-05, "loss": 0.4186, "step": 836300 }, { "epoch": 8.52, "learning_rate": 2.5101559710994328e-05, "loss": 0.3692, "step": 836400 }, { "epoch": 8.52, "learning_rate": 2.509501525818024e-05, "loss": 0.3875, "step": 836500 }, { "epoch": 8.52, "learning_rate": 2.5088471045190634e-05, "loss": 0.4127, "step": 836600 }, { "epoch": 8.52, "learning_rate": 2.5081927072345486e-05, "loss": 0.3646, "step": 836700 }, { "epoch": 8.53, "learning_rate": 2.5075383339964758e-05, "loss": 0.373, "step": 836800 }, { "epoch": 8.53, "learning_rate": 2.5068839848368396e-05, "loss": 0.3717, "step": 836900 }, { "epoch": 8.53, "learning_rate": 2.506229659787633e-05, "loss": 0.3989, "step": 837000 }, { "epoch": 8.53, "learning_rate": 2.5055753588808512e-05, "loss": 0.4239, "step": 837100 }, { "epoch": 8.53, "learning_rate": 2.5049210821484822e-05, "loss": 0.3465, "step": 837200 }, { "epoch": 8.53, "learning_rate": 2.5042668296225178e-05, "loss": 0.372, "step": 837300 }, { "epoch": 8.53, "learning_rate": 2.5036126013349477e-05, "loss": 0.3368, "step": 837400 }, { "epoch": 8.53, "learning_rate": 2.5029583973177585e-05, "loss": 0.2711, "step": 837500 }, { "epoch": 8.53, "learning_rate": 2.502304217602938e-05, "loss": 0.4348, "step": 837600 }, { "epoch": 8.53, "learning_rate": 2.5016500622224718e-05, "loss": 0.4055, "step": 837700 }, { "epoch": 8.54, "learning_rate": 2.5009959312083422e-05, "loss": 0.4398, "step": 837800 }, { "epoch": 8.54, "learning_rate": 2.500341824592534e-05, "loss": 0.4162, "step": 837900 }, { "epoch": 8.54, "learning_rate": 2.4996877424070292e-05, "loss": 0.4295, "step": 838000 }, { "epoch": 8.54, "learning_rate": 2.4990336846838078e-05, "loss": 0.338, "step": 838100 }, { "epoch": 8.54, "learning_rate": 2.4983796514548496e-05, "loss": 0.4273, "step": 838200 }, { "epoch": 8.54, "learning_rate": 2.4977256427521325e-05, "loss": 0.3346, "step": 838300 }, { "epoch": 8.54, "learning_rate": 2.4970716586076355e-05, "loss": 0.3765, "step": 838400 }, { "epoch": 8.54, "learning_rate": 2.4964176990533317e-05, "loss": 0.4378, "step": 838500 }, { "epoch": 8.54, "learning_rate": 2.4957637641211975e-05, "loss": 0.3292, "step": 838600 }, { "epoch": 8.54, "learning_rate": 2.495109853843207e-05, "loss": 0.3692, "step": 838700 }, { "epoch": 8.55, "learning_rate": 2.4944559682513304e-05, "loss": 0.4078, "step": 838800 }, { "epoch": 8.55, "learning_rate": 2.49380210737754e-05, "loss": 0.3623, "step": 838900 }, { "epoch": 8.55, "learning_rate": 2.4931482712538075e-05, "loss": 0.4284, "step": 839000 }, { "epoch": 8.55, "learning_rate": 2.492494459912098e-05, "loss": 0.3763, "step": 839100 }, { "epoch": 8.55, "learning_rate": 2.491840673384381e-05, "loss": 0.4238, "step": 839200 }, { "epoch": 8.55, "learning_rate": 2.491186911702623e-05, "loss": 0.3712, "step": 839300 }, { "epoch": 8.55, "learning_rate": 2.4905331748987882e-05, "loss": 0.4037, "step": 839400 }, { "epoch": 8.55, "learning_rate": 2.489879463004841e-05, "loss": 0.3426, "step": 839500 }, { "epoch": 8.55, "learning_rate": 2.4892257760527445e-05, "loss": 0.3279, "step": 839600 }, { "epoch": 8.56, "learning_rate": 2.4885721140744582e-05, "loss": 0.3978, "step": 839700 }, { "epoch": 8.56, "learning_rate": 2.4879184771019438e-05, "loss": 0.3805, "step": 839800 }, { "epoch": 8.56, "learning_rate": 2.487264865167161e-05, "loss": 0.392, "step": 839900 }, { "epoch": 8.56, "learning_rate": 2.486611278302065e-05, "loss": 0.3517, "step": 840000 }, { "epoch": 8.56, "learning_rate": 2.4859577165386143e-05, "loss": 0.3113, "step": 840100 }, { "epoch": 8.56, "learning_rate": 2.4853041799087633e-05, "loss": 0.3593, "step": 840200 }, { "epoch": 8.56, "learning_rate": 2.4846506684444678e-05, "loss": 0.3953, "step": 840300 }, { "epoch": 8.56, "learning_rate": 2.4839971821776776e-05, "loss": 0.4712, "step": 840400 }, { "epoch": 8.56, "learning_rate": 2.4833437211403462e-05, "loss": 0.3553, "step": 840500 }, { "epoch": 8.56, "learning_rate": 2.482690285364424e-05, "loss": 0.3651, "step": 840600 }, { "epoch": 8.57, "learning_rate": 2.4820368748818592e-05, "loss": 0.4076, "step": 840700 }, { "epoch": 8.57, "learning_rate": 2.4813834897246e-05, "loss": 0.3867, "step": 840800 }, { "epoch": 8.57, "learning_rate": 2.4807301299245947e-05, "loss": 0.3883, "step": 840900 }, { "epoch": 8.57, "learning_rate": 2.480076795513786e-05, "loss": 0.423, "step": 841000 }, { "epoch": 8.57, "learning_rate": 2.479423486524119e-05, "loss": 0.3962, "step": 841100 }, { "epoch": 8.57, "learning_rate": 2.4787702029875376e-05, "loss": 0.3681, "step": 841200 }, { "epoch": 8.57, "learning_rate": 2.478116944935982e-05, "loss": 0.3009, "step": 841300 }, { "epoch": 8.57, "learning_rate": 2.4774637124013934e-05, "loss": 0.3684, "step": 841400 }, { "epoch": 8.57, "learning_rate": 2.4768105054157123e-05, "loss": 0.4103, "step": 841500 }, { "epoch": 8.57, "learning_rate": 2.4761573240108738e-05, "loss": 0.4381, "step": 841600 }, { "epoch": 8.58, "learning_rate": 2.4755041682188164e-05, "loss": 0.4026, "step": 841700 }, { "epoch": 8.58, "learning_rate": 2.474851038071475e-05, "loss": 0.3927, "step": 841800 }, { "epoch": 8.58, "learning_rate": 2.4741979336007844e-05, "loss": 0.4146, "step": 841900 }, { "epoch": 8.58, "learning_rate": 2.4735448548386764e-05, "loss": 0.3842, "step": 842000 }, { "epoch": 8.58, "learning_rate": 2.4728918018170834e-05, "loss": 0.3957, "step": 842100 }, { "epoch": 8.58, "learning_rate": 2.4722387745679367e-05, "loss": 0.3555, "step": 842200 }, { "epoch": 8.58, "learning_rate": 2.4715857731231635e-05, "loss": 0.4289, "step": 842300 }, { "epoch": 8.58, "learning_rate": 2.4709393271427827e-05, "loss": 0.268, "step": 842400 }, { "epoch": 8.58, "learning_rate": 2.4702863771437e-05, "loss": 0.3139, "step": 842500 }, { "epoch": 8.58, "learning_rate": 2.4696334530444523e-05, "loss": 0.4254, "step": 842600 }, { "epoch": 8.59, "learning_rate": 2.4689805548769654e-05, "loss": 0.4048, "step": 842700 }, { "epoch": 8.59, "learning_rate": 2.468327682673159e-05, "loss": 0.3038, "step": 842800 }, { "epoch": 8.59, "learning_rate": 2.4676748364649556e-05, "loss": 0.4015, "step": 842900 }, { "epoch": 8.59, "learning_rate": 2.4670220162842773e-05, "loss": 0.426, "step": 843000 }, { "epoch": 8.59, "learning_rate": 2.4663692221630406e-05, "loss": 0.4436, "step": 843100 }, { "epoch": 8.59, "learning_rate": 2.465716454133164e-05, "loss": 0.4208, "step": 843200 }, { "epoch": 8.59, "learning_rate": 2.4650637122265667e-05, "loss": 0.3984, "step": 843300 }, { "epoch": 8.59, "learning_rate": 2.4644109964751595e-05, "loss": 0.3664, "step": 843400 }, { "epoch": 8.59, "learning_rate": 2.4637583069108588e-05, "loss": 0.3739, "step": 843500 }, { "epoch": 8.59, "learning_rate": 2.463112170069141e-05, "loss": 0.3658, "step": 843600 }, { "epoch": 8.6, "learning_rate": 2.4624595327121215e-05, "loss": 0.3545, "step": 843700 }, { "epoch": 8.6, "learning_rate": 2.4618069216376236e-05, "loss": 0.3572, "step": 843800 }, { "epoch": 8.6, "learning_rate": 2.4611543368775544e-05, "loss": 0.3912, "step": 843900 }, { "epoch": 8.6, "learning_rate": 2.460501778463823e-05, "loss": 0.3803, "step": 844000 }, { "epoch": 8.6, "learning_rate": 2.4598492464283363e-05, "loss": 0.4127, "step": 844100 }, { "epoch": 8.6, "learning_rate": 2.4591967408029968e-05, "loss": 0.3454, "step": 844200 }, { "epoch": 8.6, "learning_rate": 2.458544261619709e-05, "loss": 0.3731, "step": 844300 }, { "epoch": 8.6, "learning_rate": 2.4578918089103765e-05, "loss": 0.3481, "step": 844400 }, { "epoch": 8.6, "learning_rate": 2.457239382706899e-05, "loss": 0.3904, "step": 844500 }, { "epoch": 8.6, "learning_rate": 2.456586983041176e-05, "loss": 0.3711, "step": 844600 }, { "epoch": 8.61, "learning_rate": 2.4559346099451083e-05, "loss": 0.3356, "step": 844700 }, { "epoch": 8.61, "learning_rate": 2.45528226345059e-05, "loss": 0.4755, "step": 844800 }, { "epoch": 8.61, "learning_rate": 2.4546299435895182e-05, "loss": 0.3877, "step": 844900 }, { "epoch": 8.61, "learning_rate": 2.4539776503937877e-05, "loss": 0.3691, "step": 845000 }, { "epoch": 8.61, "learning_rate": 2.453325383895292e-05, "loss": 0.4066, "step": 845100 }, { "epoch": 8.61, "learning_rate": 2.452679666391203e-05, "loss": 0.4694, "step": 845200 }, { "epoch": 8.61, "learning_rate": 2.452027453115082e-05, "loss": 0.3818, "step": 845300 }, { "epoch": 8.61, "learning_rate": 2.4513752666315484e-05, "loss": 0.3189, "step": 845400 }, { "epoch": 8.61, "learning_rate": 2.4507231069724906e-05, "loss": 0.3506, "step": 845500 }, { "epoch": 8.62, "learning_rate": 2.4500709741697955e-05, "loss": 0.3871, "step": 845600 }, { "epoch": 8.62, "learning_rate": 2.449418868255347e-05, "loss": 0.3776, "step": 845700 }, { "epoch": 8.62, "learning_rate": 2.4487667892610305e-05, "loss": 0.4138, "step": 845800 }, { "epoch": 8.62, "learning_rate": 2.4481147372187297e-05, "loss": 0.3187, "step": 845900 }, { "epoch": 8.62, "learning_rate": 2.4474627121603233e-05, "loss": 0.3914, "step": 846000 }, { "epoch": 8.62, "learning_rate": 2.4468107141176932e-05, "loss": 0.3118, "step": 846100 }, { "epoch": 8.62, "learning_rate": 2.446158743122718e-05, "loss": 0.3505, "step": 846200 }, { "epoch": 8.62, "learning_rate": 2.445506799207275e-05, "loss": 0.4134, "step": 846300 }, { "epoch": 8.62, "learning_rate": 2.4448548824032404e-05, "loss": 0.4096, "step": 846400 }, { "epoch": 8.62, "learning_rate": 2.4442029927424902e-05, "loss": 0.3713, "step": 846500 }, { "epoch": 8.63, "learning_rate": 2.4435511302568954e-05, "loss": 0.3785, "step": 846600 }, { "epoch": 8.63, "learning_rate": 2.44289929497833e-05, "loss": 0.3656, "step": 846700 }, { "epoch": 8.63, "learning_rate": 2.4422474869386656e-05, "loss": 0.3956, "step": 846800 }, { "epoch": 8.63, "learning_rate": 2.441595706169769e-05, "loss": 0.4015, "step": 846900 }, { "epoch": 8.63, "learning_rate": 2.44094395270351e-05, "loss": 0.3579, "step": 847000 }, { "epoch": 8.63, "learning_rate": 2.440292226571757e-05, "loss": 0.4449, "step": 847100 }, { "epoch": 8.63, "learning_rate": 2.4396405278063728e-05, "loss": 0.4126, "step": 847200 }, { "epoch": 8.63, "learning_rate": 2.4389888564392224e-05, "loss": 0.4421, "step": 847300 }, { "epoch": 8.63, "learning_rate": 2.4383372125021695e-05, "loss": 0.3349, "step": 847400 }, { "epoch": 8.63, "learning_rate": 2.4376855960270753e-05, "loss": 0.3259, "step": 847500 }, { "epoch": 8.64, "learning_rate": 2.4370340070457995e-05, "loss": 0.3161, "step": 847600 }, { "epoch": 8.64, "learning_rate": 2.4363824455902007e-05, "loss": 0.4078, "step": 847700 }, { "epoch": 8.64, "learning_rate": 2.435730911692138e-05, "loss": 0.4066, "step": 847800 }, { "epoch": 8.64, "learning_rate": 2.4350794053834654e-05, "loss": 0.3384, "step": 847900 }, { "epoch": 8.64, "learning_rate": 2.4344279266960386e-05, "loss": 0.4505, "step": 848000 }, { "epoch": 8.64, "learning_rate": 2.4337764756617117e-05, "loss": 0.4436, "step": 848100 }, { "epoch": 8.64, "learning_rate": 2.4331250523123356e-05, "loss": 0.359, "step": 848200 }, { "epoch": 8.64, "learning_rate": 2.432480170498785e-05, "loss": 0.3258, "step": 848300 }, { "epoch": 8.64, "learning_rate": 2.4318288023372174e-05, "loss": 0.3435, "step": 848400 }, { "epoch": 8.64, "learning_rate": 2.4311774619558314e-05, "loss": 0.3454, "step": 848500 }, { "epoch": 8.65, "learning_rate": 2.4305261493864744e-05, "loss": 0.3235, "step": 848600 }, { "epoch": 8.65, "learning_rate": 2.4298748646609885e-05, "loss": 0.3534, "step": 848700 }, { "epoch": 8.65, "learning_rate": 2.4292236078112192e-05, "loss": 0.352, "step": 848800 }, { "epoch": 8.65, "learning_rate": 2.4285723788690104e-05, "loss": 0.4259, "step": 848900 }, { "epoch": 8.65, "learning_rate": 2.4279211778662013e-05, "loss": 0.4238, "step": 849000 }, { "epoch": 8.65, "learning_rate": 2.4272700048346322e-05, "loss": 0.3218, "step": 849100 }, { "epoch": 8.65, "learning_rate": 2.426618859806144e-05, "loss": 0.4105, "step": 849200 }, { "epoch": 8.65, "learning_rate": 2.4259677428125703e-05, "loss": 0.3747, "step": 849300 }, { "epoch": 8.65, "learning_rate": 2.4253166538857482e-05, "loss": 0.3516, "step": 849400 }, { "epoch": 8.65, "learning_rate": 2.424665593057513e-05, "loss": 0.381, "step": 849500 }, { "epoch": 8.66, "learning_rate": 2.424014560359696e-05, "loss": 0.4997, "step": 849600 }, { "epoch": 8.66, "learning_rate": 2.42336355582413e-05, "loss": 0.4258, "step": 849700 }, { "epoch": 8.66, "learning_rate": 2.4227125794826447e-05, "loss": 0.3508, "step": 849800 }, { "epoch": 8.66, "learning_rate": 2.4220616313670704e-05, "loss": 0.3723, "step": 849900 }, { "epoch": 8.66, "learning_rate": 2.421410711509232e-05, "loss": 0.3587, "step": 850000 }, { "epoch": 8.66, "learning_rate": 2.4207598199409574e-05, "loss": 0.3808, "step": 850100 }, { "epoch": 8.66, "learning_rate": 2.4201089566940715e-05, "loss": 0.369, "step": 850200 }, { "epoch": 8.66, "learning_rate": 2.4194581218003958e-05, "loss": 0.4437, "step": 850300 }, { "epoch": 8.66, "learning_rate": 2.4188073152917537e-05, "loss": 0.395, "step": 850400 }, { "epoch": 8.67, "learning_rate": 2.4181565371999672e-05, "loss": 0.3717, "step": 850500 }, { "epoch": 8.67, "learning_rate": 2.417505787556852e-05, "loss": 0.4071, "step": 850600 }, { "epoch": 8.67, "learning_rate": 2.416855066394228e-05, "loss": 0.412, "step": 850700 }, { "epoch": 8.67, "learning_rate": 2.416204373743912e-05, "loss": 0.3478, "step": 850800 }, { "epoch": 8.67, "learning_rate": 2.4155602161373812e-05, "loss": 0.4224, "step": 850900 }, { "epoch": 8.67, "learning_rate": 2.4149095803212062e-05, "loss": 0.4397, "step": 851000 }, { "epoch": 8.67, "learning_rate": 2.41425897311246e-05, "loss": 0.3413, "step": 851100 }, { "epoch": 8.67, "learning_rate": 2.413608394542955e-05, "loss": 0.3791, "step": 851200 }, { "epoch": 8.67, "learning_rate": 2.4129578446445012e-05, "loss": 0.3796, "step": 851300 }, { "epoch": 8.67, "learning_rate": 2.4123073234489043e-05, "loss": 0.3877, "step": 851400 }, { "epoch": 8.68, "learning_rate": 2.411656830987972e-05, "loss": 0.3652, "step": 851500 }, { "epoch": 8.68, "learning_rate": 2.4110063672935103e-05, "loss": 0.3182, "step": 851600 }, { "epoch": 8.68, "learning_rate": 2.4103559323973218e-05, "loss": 0.4186, "step": 851700 }, { "epoch": 8.68, "learning_rate": 2.4097055263312094e-05, "loss": 0.3629, "step": 851800 }, { "epoch": 8.68, "learning_rate": 2.409055149126975e-05, "loss": 0.3527, "step": 851900 }, { "epoch": 8.68, "learning_rate": 2.4084048008164164e-05, "loss": 0.467, "step": 852000 }, { "epoch": 8.68, "learning_rate": 2.4077544814313323e-05, "loss": 0.3594, "step": 852100 }, { "epoch": 8.68, "learning_rate": 2.40710419100352e-05, "loss": 0.4084, "step": 852200 }, { "epoch": 8.68, "learning_rate": 2.4064539295647746e-05, "loss": 0.4436, "step": 852300 }, { "epoch": 8.68, "learning_rate": 2.4058036971468898e-05, "loss": 0.3908, "step": 852400 }, { "epoch": 8.69, "learning_rate": 2.405153493781658e-05, "loss": 0.445, "step": 852500 }, { "epoch": 8.69, "learning_rate": 2.404503319500872e-05, "loss": 0.3772, "step": 852600 }, { "epoch": 8.69, "learning_rate": 2.4038531743363187e-05, "loss": 0.4613, "step": 852700 }, { "epoch": 8.69, "learning_rate": 2.4032030583197873e-05, "loss": 0.4279, "step": 852800 }, { "epoch": 8.69, "learning_rate": 2.402552971483066e-05, "loss": 0.317, "step": 852900 }, { "epoch": 8.69, "learning_rate": 2.4019029138579375e-05, "loss": 0.3509, "step": 853000 }, { "epoch": 8.69, "learning_rate": 2.401252885476188e-05, "loss": 0.3642, "step": 853100 }, { "epoch": 8.69, "learning_rate": 2.4006028863696e-05, "loss": 0.4467, "step": 853200 }, { "epoch": 8.69, "learning_rate": 2.399952916569953e-05, "loss": 0.3865, "step": 853300 }, { "epoch": 8.69, "learning_rate": 2.3993029761090275e-05, "loss": 0.3215, "step": 853400 }, { "epoch": 8.7, "learning_rate": 2.398653065018602e-05, "loss": 0.4535, "step": 853500 }, { "epoch": 8.7, "learning_rate": 2.3980031833304528e-05, "loss": 0.3422, "step": 853600 }, { "epoch": 8.7, "learning_rate": 2.3973533310763548e-05, "loss": 0.3153, "step": 853700 }, { "epoch": 8.7, "learning_rate": 2.396703508288083e-05, "loss": 0.3948, "step": 853800 }, { "epoch": 8.7, "learning_rate": 2.3960537149974102e-05, "loss": 0.3586, "step": 853900 }, { "epoch": 8.7, "learning_rate": 2.3954039512361056e-05, "loss": 0.3999, "step": 854000 }, { "epoch": 8.7, "learning_rate": 2.3947542170359395e-05, "loss": 0.3659, "step": 854100 }, { "epoch": 8.7, "learning_rate": 2.394104512428681e-05, "loss": 0.2989, "step": 854200 }, { "epoch": 8.7, "learning_rate": 2.393454837446096e-05, "loss": 0.3735, "step": 854300 }, { "epoch": 8.7, "learning_rate": 2.392805192119949e-05, "loss": 0.3472, "step": 854400 }, { "epoch": 8.71, "learning_rate": 2.3921555764820063e-05, "loss": 0.4072, "step": 854500 }, { "epoch": 8.71, "learning_rate": 2.391505990564027e-05, "loss": 0.3424, "step": 854600 }, { "epoch": 8.71, "learning_rate": 2.3908564343977734e-05, "loss": 0.4638, "step": 854700 }, { "epoch": 8.71, "learning_rate": 2.3902069080150057e-05, "loss": 0.4096, "step": 854800 }, { "epoch": 8.71, "learning_rate": 2.3895574114474803e-05, "loss": 0.3484, "step": 854900 }, { "epoch": 8.71, "learning_rate": 2.388907944726955e-05, "loss": 0.4059, "step": 855000 }, { "epoch": 8.71, "learning_rate": 2.3882585078851855e-05, "loss": 0.4194, "step": 855100 }, { "epoch": 8.71, "learning_rate": 2.3876091009539226e-05, "loss": 0.3678, "step": 855200 }, { "epoch": 8.71, "learning_rate": 2.3869597239649207e-05, "loss": 0.3274, "step": 855300 }, { "epoch": 8.71, "learning_rate": 2.3863103769499302e-05, "loss": 0.3594, "step": 855400 }, { "epoch": 8.72, "learning_rate": 2.3856610599406994e-05, "loss": 0.4245, "step": 855500 }, { "epoch": 8.72, "learning_rate": 2.3850117729689763e-05, "loss": 0.4125, "step": 855600 }, { "epoch": 8.72, "learning_rate": 2.384362516066508e-05, "loss": 0.3751, "step": 855700 }, { "epoch": 8.72, "learning_rate": 2.3837132892650395e-05, "loss": 0.3692, "step": 855800 }, { "epoch": 8.72, "learning_rate": 2.3830640925963124e-05, "loss": 0.3596, "step": 855900 }, { "epoch": 8.72, "learning_rate": 2.3824214176076933e-05, "loss": 0.3382, "step": 856000 }, { "epoch": 8.72, "learning_rate": 2.3817722809975555e-05, "loss": 0.4534, "step": 856100 }, { "epoch": 8.72, "learning_rate": 2.3811231746150637e-05, "loss": 0.3445, "step": 856200 }, { "epoch": 8.72, "learning_rate": 2.3804740984919566e-05, "loss": 0.4049, "step": 856300 }, { "epoch": 8.73, "learning_rate": 2.3798250526599677e-05, "loss": 0.3716, "step": 856400 }, { "epoch": 8.73, "learning_rate": 2.3791760371508327e-05, "loss": 0.3565, "step": 856500 }, { "epoch": 8.73, "learning_rate": 2.3785270519962856e-05, "loss": 0.3954, "step": 856600 }, { "epoch": 8.73, "learning_rate": 2.3778780972280557e-05, "loss": 0.4018, "step": 856700 }, { "epoch": 8.73, "learning_rate": 2.3772291728778747e-05, "loss": 0.4143, "step": 856800 }, { "epoch": 8.73, "learning_rate": 2.3765802789774717e-05, "loss": 0.3814, "step": 856900 }, { "epoch": 8.73, "learning_rate": 2.3759314155585715e-05, "loss": 0.3421, "step": 857000 }, { "epoch": 8.73, "learning_rate": 2.3752825826529006e-05, "loss": 0.3872, "step": 857100 }, { "epoch": 8.73, "learning_rate": 2.374633780292184e-05, "loss": 0.3791, "step": 857200 }, { "epoch": 8.73, "learning_rate": 2.373985008508143e-05, "loss": 0.352, "step": 857300 }, { "epoch": 8.74, "learning_rate": 2.3733362673324994e-05, "loss": 0.3969, "step": 857400 }, { "epoch": 8.74, "learning_rate": 2.372687556796974e-05, "loss": 0.3291, "step": 857500 }, { "epoch": 8.74, "learning_rate": 2.3720388769332817e-05, "loss": 0.3487, "step": 857600 }, { "epoch": 8.74, "learning_rate": 2.371390227773141e-05, "loss": 0.349, "step": 857700 }, { "epoch": 8.74, "learning_rate": 2.3707416093482666e-05, "loss": 0.3632, "step": 857800 }, { "epoch": 8.74, "learning_rate": 2.3700930216903732e-05, "loss": 0.3661, "step": 857900 }, { "epoch": 8.74, "learning_rate": 2.369444464831171e-05, "loss": 0.3857, "step": 858000 }, { "epoch": 8.74, "learning_rate": 2.368795938802372e-05, "loss": 0.2929, "step": 858100 }, { "epoch": 8.74, "learning_rate": 2.3681474436356862e-05, "loss": 0.3742, "step": 858200 }, { "epoch": 8.74, "learning_rate": 2.367498979362818e-05, "loss": 0.4031, "step": 858300 }, { "epoch": 8.75, "learning_rate": 2.366850546015475e-05, "loss": 0.3961, "step": 858400 }, { "epoch": 8.75, "learning_rate": 2.366202143625363e-05, "loss": 0.4409, "step": 858500 }, { "epoch": 8.75, "learning_rate": 2.3655537722241828e-05, "loss": 0.3471, "step": 858600 }, { "epoch": 8.75, "learning_rate": 2.364905431843637e-05, "loss": 0.3734, "step": 858700 }, { "epoch": 8.75, "learning_rate": 2.364257122515427e-05, "loss": 0.4246, "step": 858800 }, { "epoch": 8.75, "learning_rate": 2.3636088442712483e-05, "loss": 0.4379, "step": 858900 }, { "epoch": 8.75, "learning_rate": 2.3629605971427994e-05, "loss": 0.383, "step": 859000 }, { "epoch": 8.75, "learning_rate": 2.3623123811617765e-05, "loss": 0.4016, "step": 859100 }, { "epoch": 8.75, "learning_rate": 2.3616641963598717e-05, "loss": 0.3784, "step": 859200 }, { "epoch": 8.75, "learning_rate": 2.3610160427687786e-05, "loss": 0.4102, "step": 859300 }, { "epoch": 8.76, "learning_rate": 2.3603679204201888e-05, "loss": 0.3736, "step": 859400 }, { "epoch": 8.76, "learning_rate": 2.3597198293457895e-05, "loss": 0.3562, "step": 859500 }, { "epoch": 8.76, "learning_rate": 2.359071769577269e-05, "loss": 0.3579, "step": 859600 }, { "epoch": 8.76, "learning_rate": 2.358423741146315e-05, "loss": 0.3902, "step": 859700 }, { "epoch": 8.76, "learning_rate": 2.3577757440846113e-05, "loss": 0.4738, "step": 859800 }, { "epoch": 8.76, "learning_rate": 2.3571277784238407e-05, "loss": 0.3674, "step": 859900 }, { "epoch": 8.76, "learning_rate": 2.3564798441956855e-05, "loss": 0.3031, "step": 860000 }, { "epoch": 8.76, "learning_rate": 2.3558319414318265e-05, "loss": 0.3128, "step": 860100 }, { "epoch": 8.76, "learning_rate": 2.3551840701639403e-05, "loss": 0.4049, "step": 860200 }, { "epoch": 8.76, "learning_rate": 2.3545362304237046e-05, "loss": 0.3726, "step": 860300 }, { "epoch": 8.77, "learning_rate": 2.3538884222427964e-05, "loss": 0.2949, "step": 860400 }, { "epoch": 8.77, "learning_rate": 2.3532406456528876e-05, "loss": 0.3964, "step": 860500 }, { "epoch": 8.77, "learning_rate": 2.352592900685652e-05, "loss": 0.3615, "step": 860600 }, { "epoch": 8.77, "learning_rate": 2.351945187372761e-05, "loss": 0.3582, "step": 860700 }, { "epoch": 8.77, "learning_rate": 2.351297505745882e-05, "loss": 0.3898, "step": 860800 }, { "epoch": 8.77, "learning_rate": 2.3506498558366833e-05, "loss": 0.428, "step": 860900 }, { "epoch": 8.77, "learning_rate": 2.350002237676832e-05, "loss": 0.4382, "step": 861000 }, { "epoch": 8.77, "learning_rate": 2.349354651297992e-05, "loss": 0.4487, "step": 861100 }, { "epoch": 8.77, "learning_rate": 2.3487070967318272e-05, "loss": 0.3897, "step": 861200 }, { "epoch": 8.78, "learning_rate": 2.3480660490794838e-05, "loss": 0.3818, "step": 861300 }, { "epoch": 8.78, "learning_rate": 2.3474185579147338e-05, "loss": 0.404, "step": 861400 }, { "epoch": 8.78, "learning_rate": 2.346771098657324e-05, "loss": 0.3735, "step": 861500 }, { "epoch": 8.78, "learning_rate": 2.3461301454538894e-05, "loss": 0.3872, "step": 861600 }, { "epoch": 8.78, "learning_rate": 2.34548922358458e-05, "loss": 0.3779, "step": 861700 }, { "epoch": 8.78, "learning_rate": 2.3448418595987615e-05, "loss": 0.3781, "step": 861800 }, { "epoch": 8.78, "learning_rate": 2.3441945276462677e-05, "loss": 0.3861, "step": 861900 }, { "epoch": 8.78, "learning_rate": 2.3435472277587465e-05, "loss": 0.4113, "step": 862000 }, { "epoch": 8.78, "learning_rate": 2.3428999599678486e-05, "loss": 0.3263, "step": 862100 }, { "epoch": 8.78, "learning_rate": 2.3422527243052227e-05, "loss": 0.3562, "step": 862200 }, { "epoch": 8.79, "learning_rate": 2.3416055208025117e-05, "loss": 0.3379, "step": 862300 }, { "epoch": 8.79, "learning_rate": 2.340958349491362e-05, "loss": 0.3817, "step": 862400 }, { "epoch": 8.79, "learning_rate": 2.3403112104034168e-05, "loss": 0.3887, "step": 862500 }, { "epoch": 8.79, "learning_rate": 2.3396641035703155e-05, "loss": 0.4708, "step": 862600 }, { "epoch": 8.79, "learning_rate": 2.339017029023699e-05, "loss": 0.3445, "step": 862700 }, { "epoch": 8.79, "learning_rate": 2.3383699867952067e-05, "loss": 0.3657, "step": 862800 }, { "epoch": 8.79, "learning_rate": 2.3377229769164724e-05, "loss": 0.3361, "step": 862900 }, { "epoch": 8.79, "learning_rate": 2.337075999419132e-05, "loss": 0.3158, "step": 863000 }, { "epoch": 8.79, "learning_rate": 2.33642905433482e-05, "loss": 0.382, "step": 863100 }, { "epoch": 8.79, "learning_rate": 2.3357821416951666e-05, "loss": 0.4576, "step": 863200 }, { "epoch": 8.8, "learning_rate": 2.3351352615318025e-05, "loss": 0.3747, "step": 863300 }, { "epoch": 8.8, "learning_rate": 2.3344884138763567e-05, "loss": 0.3458, "step": 863400 }, { "epoch": 8.8, "learning_rate": 2.3338415987604568e-05, "loss": 0.4122, "step": 863500 }, { "epoch": 8.8, "learning_rate": 2.3331948162157262e-05, "loss": 0.3068, "step": 863600 }, { "epoch": 8.8, "learning_rate": 2.3325480662737895e-05, "loss": 0.3605, "step": 863700 }, { "epoch": 8.8, "learning_rate": 2.3319013489662703e-05, "loss": 0.4243, "step": 863800 }, { "epoch": 8.8, "learning_rate": 2.3312546643247872e-05, "loss": 0.4094, "step": 863900 }, { "epoch": 8.8, "learning_rate": 2.3306080123809602e-05, "loss": 0.3784, "step": 864000 }, { "epoch": 8.8, "learning_rate": 2.329961393166408e-05, "loss": 0.3239, "step": 864100 }, { "epoch": 8.8, "learning_rate": 2.329314806712744e-05, "loss": 0.3502, "step": 864200 }, { "epoch": 8.81, "learning_rate": 2.3286682530515833e-05, "loss": 0.3298, "step": 864300 }, { "epoch": 8.81, "learning_rate": 2.3280217322145394e-05, "loss": 0.4014, "step": 864400 }, { "epoch": 8.81, "learning_rate": 2.3273752442332216e-05, "loss": 0.3276, "step": 864500 }, { "epoch": 8.81, "learning_rate": 2.326728789139241e-05, "loss": 0.3383, "step": 864600 }, { "epoch": 8.81, "learning_rate": 2.3260823669642055e-05, "loss": 0.367, "step": 864700 }, { "epoch": 8.81, "learning_rate": 2.32543597773972e-05, "loss": 0.3276, "step": 864800 }, { "epoch": 8.81, "learning_rate": 2.324789621497389e-05, "loss": 0.4795, "step": 864900 }, { "epoch": 8.81, "learning_rate": 2.324143298268816e-05, "loss": 0.3384, "step": 865000 }, { "epoch": 8.81, "learning_rate": 2.3234970080856032e-05, "loss": 0.3837, "step": 865100 }, { "epoch": 8.81, "learning_rate": 2.322850750979349e-05, "loss": 0.3614, "step": 865200 }, { "epoch": 8.82, "learning_rate": 2.3222045269816516e-05, "loss": 0.3658, "step": 865300 }, { "epoch": 8.82, "learning_rate": 2.32155833612411e-05, "loss": 0.3504, "step": 865400 }, { "epoch": 8.82, "learning_rate": 2.3209121784383153e-05, "loss": 0.3241, "step": 865500 }, { "epoch": 8.82, "learning_rate": 2.3202660539558627e-05, "loss": 0.3082, "step": 865600 }, { "epoch": 8.82, "learning_rate": 2.319619962708344e-05, "loss": 0.3549, "step": 865700 }, { "epoch": 8.82, "learning_rate": 2.3189739047273484e-05, "loss": 0.3427, "step": 865800 }, { "epoch": 8.82, "learning_rate": 2.3183278800444643e-05, "loss": 0.2942, "step": 865900 }, { "epoch": 8.82, "learning_rate": 2.3176818886912804e-05, "loss": 0.4499, "step": 866000 }, { "epoch": 8.82, "learning_rate": 2.3170359306993792e-05, "loss": 0.4341, "step": 866100 }, { "epoch": 8.82, "learning_rate": 2.3163900061003452e-05, "loss": 0.3517, "step": 866200 }, { "epoch": 8.83, "learning_rate": 2.315744114925761e-05, "loss": 0.3898, "step": 866300 }, { "epoch": 8.83, "learning_rate": 2.3150982572072056e-05, "loss": 0.3587, "step": 866400 }, { "epoch": 8.83, "learning_rate": 2.314452432976258e-05, "loss": 0.351, "step": 866500 }, { "epoch": 8.83, "learning_rate": 2.313806642264496e-05, "loss": 0.3449, "step": 866600 }, { "epoch": 8.83, "learning_rate": 2.3131608851034936e-05, "loss": 0.3623, "step": 866700 }, { "epoch": 8.83, "learning_rate": 2.312515161524825e-05, "loss": 0.3404, "step": 866800 }, { "epoch": 8.83, "learning_rate": 2.3118694715600623e-05, "loss": 0.3106, "step": 866900 }, { "epoch": 8.83, "learning_rate": 2.3112238152407766e-05, "loss": 0.341, "step": 867000 }, { "epoch": 8.83, "learning_rate": 2.3105781925985352e-05, "loss": 0.4352, "step": 867100 }, { "epoch": 8.84, "learning_rate": 2.3099326036649058e-05, "loss": 0.3826, "step": 867200 }, { "epoch": 8.84, "learning_rate": 2.309287048471455e-05, "loss": 0.3706, "step": 867300 }, { "epoch": 8.84, "learning_rate": 2.3086479820966876e-05, "loss": 0.3864, "step": 867400 }, { "epoch": 8.84, "learning_rate": 2.308002494140092e-05, "loss": 0.3543, "step": 867500 }, { "epoch": 8.84, "learning_rate": 2.3073570400180445e-05, "loss": 0.3551, "step": 867600 }, { "epoch": 8.84, "learning_rate": 2.306711619762104e-05, "loss": 0.3865, "step": 867700 }, { "epoch": 8.84, "learning_rate": 2.3060662334038295e-05, "loss": 0.3415, "step": 867800 }, { "epoch": 8.84, "learning_rate": 2.305420880974773e-05, "loss": 0.368, "step": 867900 }, { "epoch": 8.84, "learning_rate": 2.304775562506491e-05, "loss": 0.3873, "step": 868000 }, { "epoch": 8.84, "learning_rate": 2.304130278030536e-05, "loss": 0.3782, "step": 868100 }, { "epoch": 8.85, "learning_rate": 2.3034850275784565e-05, "loss": 0.3734, "step": 868200 }, { "epoch": 8.85, "learning_rate": 2.3028398111818028e-05, "loss": 0.3852, "step": 868300 }, { "epoch": 8.85, "learning_rate": 2.3021946288721232e-05, "loss": 0.3559, "step": 868400 }, { "epoch": 8.85, "learning_rate": 2.3015494806809606e-05, "loss": 0.3234, "step": 868500 }, { "epoch": 8.85, "learning_rate": 2.3009043666398604e-05, "loss": 0.3559, "step": 868600 }, { "epoch": 8.85, "learning_rate": 2.3002592867803652e-05, "loss": 0.4089, "step": 868700 }, { "epoch": 8.85, "learning_rate": 2.2996142411340148e-05, "loss": 0.3633, "step": 868800 }, { "epoch": 8.85, "learning_rate": 2.2989692297323475e-05, "loss": 0.3026, "step": 868900 }, { "epoch": 8.85, "learning_rate": 2.298324252606903e-05, "loss": 0.3558, "step": 869000 }, { "epoch": 8.85, "learning_rate": 2.297679309789214e-05, "loss": 0.4046, "step": 869100 }, { "epoch": 8.86, "learning_rate": 2.2970344013108156e-05, "loss": 0.3959, "step": 869200 }, { "epoch": 8.86, "learning_rate": 2.29638952720324e-05, "loss": 0.3386, "step": 869300 }, { "epoch": 8.86, "learning_rate": 2.2957446874980177e-05, "loss": 0.3771, "step": 869400 }, { "epoch": 8.86, "learning_rate": 2.2950998822266773e-05, "loss": 0.3176, "step": 869500 }, { "epoch": 8.86, "learning_rate": 2.2944551114207454e-05, "loss": 0.3724, "step": 869600 }, { "epoch": 8.86, "learning_rate": 2.29381037511175e-05, "loss": 0.3508, "step": 869700 }, { "epoch": 8.86, "learning_rate": 2.293172120177997e-05, "loss": 0.4425, "step": 869800 }, { "epoch": 8.86, "learning_rate": 2.292527452611684e-05, "loss": 0.435, "step": 869900 }, { "epoch": 8.86, "learning_rate": 2.2918828196365556e-05, "loss": 0.3845, "step": 870000 }, { "epoch": 8.86, "learning_rate": 2.2912382212841316e-05, "loss": 0.3656, "step": 870100 }, { "epoch": 8.87, "learning_rate": 2.2905936575859292e-05, "loss": 0.3998, "step": 870200 }, { "epoch": 8.87, "learning_rate": 2.289949128573463e-05, "loss": 0.4382, "step": 870300 }, { "epoch": 8.87, "learning_rate": 2.2893046342782472e-05, "loss": 0.3756, "step": 870400 }, { "epoch": 8.87, "learning_rate": 2.288660174731795e-05, "loss": 0.3815, "step": 870500 }, { "epoch": 8.87, "learning_rate": 2.2880157499656137e-05, "loss": 0.476, "step": 870600 }, { "epoch": 8.87, "learning_rate": 2.287371360011213e-05, "loss": 0.324, "step": 870700 }, { "epoch": 8.87, "learning_rate": 2.286727004900101e-05, "loss": 0.3781, "step": 870800 }, { "epoch": 8.87, "learning_rate": 2.286082684663781e-05, "loss": 0.3932, "step": 870900 }, { "epoch": 8.87, "learning_rate": 2.2854383993337576e-05, "loss": 0.35, "step": 871000 }, { "epoch": 8.87, "learning_rate": 2.2847941489415336e-05, "loss": 0.3491, "step": 871100 }, { "epoch": 8.88, "learning_rate": 2.284149933518606e-05, "loss": 0.2841, "step": 871200 }, { "epoch": 8.88, "learning_rate": 2.2835057530964744e-05, "loss": 0.4027, "step": 871300 }, { "epoch": 8.88, "learning_rate": 2.282861607706637e-05, "loss": 0.3563, "step": 871400 }, { "epoch": 8.88, "learning_rate": 2.2822174973805863e-05, "loss": 0.4229, "step": 871500 }, { "epoch": 8.88, "learning_rate": 2.2815734221498166e-05, "loss": 0.36, "step": 871600 }, { "epoch": 8.88, "learning_rate": 2.280929382045819e-05, "loss": 0.3585, "step": 871700 }, { "epoch": 8.88, "learning_rate": 2.2802853771000847e-05, "loss": 0.2945, "step": 871800 }, { "epoch": 8.88, "learning_rate": 2.2796414073440995e-05, "loss": 0.3245, "step": 871900 }, { "epoch": 8.88, "learning_rate": 2.27899747280935e-05, "loss": 0.3865, "step": 872000 }, { "epoch": 8.89, "learning_rate": 2.2783535735273227e-05, "loss": 0.3873, "step": 872100 }, { "epoch": 8.89, "learning_rate": 2.2777097095294982e-05, "loss": 0.3004, "step": 872200 }, { "epoch": 8.89, "learning_rate": 2.2770658808473582e-05, "loss": 0.2668, "step": 872300 }, { "epoch": 8.89, "learning_rate": 2.276422087512384e-05, "loss": 0.3826, "step": 872400 }, { "epoch": 8.89, "learning_rate": 2.2757783295560503e-05, "loss": 0.3585, "step": 872500 }, { "epoch": 8.89, "learning_rate": 2.2751346070098345e-05, "loss": 0.3303, "step": 872600 }, { "epoch": 8.89, "learning_rate": 2.274490919905211e-05, "loss": 0.3826, "step": 872700 }, { "epoch": 8.89, "learning_rate": 2.2738472682736514e-05, "loss": 0.3798, "step": 872800 }, { "epoch": 8.89, "learning_rate": 2.2732036521466266e-05, "loss": 0.4135, "step": 872900 }, { "epoch": 8.89, "learning_rate": 2.2725600715556074e-05, "loss": 0.3373, "step": 873000 }, { "epoch": 8.9, "learning_rate": 2.2719165265320584e-05, "loss": 0.3993, "step": 873100 }, { "epoch": 8.9, "learning_rate": 2.2712730171074464e-05, "loss": 0.3426, "step": 873200 }, { "epoch": 8.9, "learning_rate": 2.270629543313235e-05, "loss": 0.3983, "step": 873300 }, { "epoch": 8.9, "learning_rate": 2.2699861051808864e-05, "loss": 0.3553, "step": 873400 }, { "epoch": 8.9, "learning_rate": 2.2693427027418608e-05, "loss": 0.3724, "step": 873500 }, { "epoch": 8.9, "learning_rate": 2.2686993360276166e-05, "loss": 0.315, "step": 873600 }, { "epoch": 8.9, "learning_rate": 2.2680560050696116e-05, "loss": 0.4486, "step": 873700 }, { "epoch": 8.9, "learning_rate": 2.267412709899299e-05, "loss": 0.3247, "step": 873800 }, { "epoch": 8.9, "learning_rate": 2.2667694505481328e-05, "loss": 0.4153, "step": 873900 }, { "epoch": 8.9, "learning_rate": 2.2661326591050074e-05, "loss": 0.4166, "step": 874000 }, { "epoch": 8.91, "learning_rate": 2.2654894711275113e-05, "loss": 0.4076, "step": 874100 }, { "epoch": 8.91, "learning_rate": 2.2648463190631975e-05, "loss": 0.3584, "step": 874200 }, { "epoch": 8.91, "learning_rate": 2.264203202943511e-05, "loss": 0.441, "step": 874300 }, { "epoch": 8.91, "learning_rate": 2.2635601227998965e-05, "loss": 0.2954, "step": 874400 }, { "epoch": 8.91, "learning_rate": 2.2629170786637985e-05, "loss": 0.3917, "step": 874500 }, { "epoch": 8.91, "learning_rate": 2.2622740705666556e-05, "loss": 0.3529, "step": 874600 }, { "epoch": 8.91, "learning_rate": 2.2616310985399075e-05, "loss": 0.3282, "step": 874700 }, { "epoch": 8.91, "learning_rate": 2.2609881626149936e-05, "loss": 0.3399, "step": 874800 }, { "epoch": 8.91, "learning_rate": 2.2603452628233478e-05, "loss": 0.3534, "step": 874900 }, { "epoch": 8.91, "learning_rate": 2.2597023991964042e-05, "loss": 0.3037, "step": 875000 }, { "epoch": 8.92, "learning_rate": 2.259059571765597e-05, "loss": 0.4174, "step": 875100 }, { "epoch": 8.92, "learning_rate": 2.2584232082949558e-05, "loss": 0.3525, "step": 875200 }, { "epoch": 8.92, "learning_rate": 2.2577804529879614e-05, "loss": 0.3346, "step": 875300 }, { "epoch": 8.92, "learning_rate": 2.257137733971073e-05, "loss": 0.3791, "step": 875400 }, { "epoch": 8.92, "learning_rate": 2.2564950512757163e-05, "loss": 0.3454, "step": 875500 }, { "epoch": 8.92, "learning_rate": 2.2558524049333157e-05, "loss": 0.3183, "step": 875600 }, { "epoch": 8.92, "learning_rate": 2.2552162208946643e-05, "loss": 0.3629, "step": 875700 }, { "epoch": 8.92, "learning_rate": 2.254573646988123e-05, "loss": 0.3673, "step": 875800 }, { "epoch": 8.92, "learning_rate": 2.253931109528481e-05, "loss": 0.4075, "step": 875900 }, { "epoch": 8.92, "learning_rate": 2.253288608547155e-05, "loss": 0.3859, "step": 876000 }, { "epoch": 8.93, "learning_rate": 2.252646144075561e-05, "loss": 0.4102, "step": 876100 }, { "epoch": 8.93, "learning_rate": 2.2520037161451096e-05, "loss": 0.3128, "step": 876200 }, { "epoch": 8.93, "learning_rate": 2.2513613247872126e-05, "loss": 0.3682, "step": 876300 }, { "epoch": 8.93, "learning_rate": 2.2507189700332804e-05, "loss": 0.3214, "step": 876400 }, { "epoch": 8.93, "learning_rate": 2.250076651914718e-05, "loss": 0.3917, "step": 876500 }, { "epoch": 8.93, "learning_rate": 2.249434370462932e-05, "loss": 0.3817, "step": 876600 }, { "epoch": 8.93, "learning_rate": 2.2487921257093273e-05, "loss": 0.4182, "step": 876700 }, { "epoch": 8.93, "learning_rate": 2.2481499176853034e-05, "loss": 0.3712, "step": 876800 }, { "epoch": 8.93, "learning_rate": 2.247507746422262e-05, "loss": 0.4004, "step": 876900 }, { "epoch": 8.94, "learning_rate": 2.246865611951601e-05, "loss": 0.3938, "step": 877000 }, { "epoch": 8.94, "learning_rate": 2.2462235143047184e-05, "loss": 0.345, "step": 877100 }, { "epoch": 8.94, "learning_rate": 2.2455814535130064e-05, "loss": 0.3507, "step": 877200 }, { "epoch": 8.94, "learning_rate": 2.2449394296078594e-05, "loss": 0.3857, "step": 877300 }, { "epoch": 8.94, "learning_rate": 2.2442974426206685e-05, "loss": 0.3696, "step": 877400 }, { "epoch": 8.94, "learning_rate": 2.2436554925828223e-05, "loss": 0.3706, "step": 877500 }, { "epoch": 8.94, "learning_rate": 2.2430135795257092e-05, "loss": 0.3789, "step": 877600 }, { "epoch": 8.94, "learning_rate": 2.242371703480716e-05, "loss": 0.3475, "step": 877700 }, { "epoch": 8.94, "learning_rate": 2.241729864479223e-05, "loss": 0.3374, "step": 877800 }, { "epoch": 8.94, "learning_rate": 2.241088062552616e-05, "loss": 0.3478, "step": 877900 }, { "epoch": 8.95, "learning_rate": 2.240446297732273e-05, "loss": 0.3472, "step": 878000 }, { "epoch": 8.95, "learning_rate": 2.2398045700495734e-05, "loss": 0.3454, "step": 878100 }, { "epoch": 8.95, "learning_rate": 2.2391628795358934e-05, "loss": 0.3301, "step": 878200 }, { "epoch": 8.95, "learning_rate": 2.2385212262226096e-05, "loss": 0.3804, "step": 878300 }, { "epoch": 8.95, "learning_rate": 2.2378796101410924e-05, "loss": 0.3561, "step": 878400 }, { "epoch": 8.95, "learning_rate": 2.2372380313227137e-05, "loss": 0.3418, "step": 878500 }, { "epoch": 8.95, "learning_rate": 2.236596489798844e-05, "loss": 0.4083, "step": 878600 }, { "epoch": 8.95, "learning_rate": 2.2359549856008507e-05, "loss": 0.3715, "step": 878700 }, { "epoch": 8.95, "learning_rate": 2.235313518760098e-05, "loss": 0.3934, "step": 878800 }, { "epoch": 8.95, "learning_rate": 2.234672089307951e-05, "loss": 0.3033, "step": 878900 }, { "epoch": 8.96, "learning_rate": 2.2340306972757732e-05, "loss": 0.3762, "step": 879000 }, { "epoch": 8.96, "learning_rate": 2.2333893426949214e-05, "loss": 0.3874, "step": 879100 }, { "epoch": 8.96, "learning_rate": 2.232748025596756e-05, "loss": 0.3433, "step": 879200 }, { "epoch": 8.96, "learning_rate": 2.2321067460126344e-05, "loss": 0.3843, "step": 879300 }, { "epoch": 8.96, "learning_rate": 2.2314655039739095e-05, "loss": 0.3974, "step": 879400 }, { "epoch": 8.96, "learning_rate": 2.230824299511935e-05, "loss": 0.3449, "step": 879500 }, { "epoch": 8.96, "learning_rate": 2.2301831326580633e-05, "loss": 0.3849, "step": 879600 }, { "epoch": 8.96, "learning_rate": 2.2295420034436417e-05, "loss": 0.3229, "step": 879700 }, { "epoch": 8.96, "learning_rate": 2.2289009119000175e-05, "loss": 0.3901, "step": 879800 }, { "epoch": 8.96, "learning_rate": 2.2282598580585378e-05, "loss": 0.3185, "step": 879900 }, { "epoch": 8.97, "learning_rate": 2.227618841950545e-05, "loss": 0.3859, "step": 880000 }, { "epoch": 8.97, "learning_rate": 2.2269778636073816e-05, "loss": 0.349, "step": 880100 }, { "epoch": 8.97, "learning_rate": 2.2263369230603893e-05, "loss": 0.3515, "step": 880200 }, { "epoch": 8.97, "learning_rate": 2.2256960203409024e-05, "loss": 0.3549, "step": 880300 }, { "epoch": 8.97, "learning_rate": 2.2250551554802595e-05, "loss": 0.3362, "step": 880400 }, { "epoch": 8.97, "learning_rate": 2.2244143285097948e-05, "loss": 0.3947, "step": 880500 }, { "epoch": 8.97, "learning_rate": 2.223773539460842e-05, "loss": 0.3221, "step": 880600 }, { "epoch": 8.97, "learning_rate": 2.223139195687722e-05, "loss": 0.4539, "step": 880700 }, { "epoch": 8.97, "learning_rate": 2.2224984821957842e-05, "loss": 0.3804, "step": 880800 }, { "epoch": 8.97, "learning_rate": 2.22185780671903e-05, "loss": 0.3907, "step": 880900 }, { "epoch": 8.98, "learning_rate": 2.2212171692887856e-05, "loss": 0.4091, "step": 881000 }, { "epoch": 8.98, "learning_rate": 2.220576569936375e-05, "loss": 0.3566, "step": 881100 }, { "epoch": 8.98, "learning_rate": 2.2199360086931175e-05, "loss": 0.3679, "step": 881200 }, { "epoch": 8.98, "learning_rate": 2.2192954855903344e-05, "loss": 0.3753, "step": 881300 }, { "epoch": 8.98, "learning_rate": 2.2186550006593445e-05, "loss": 0.3357, "step": 881400 }, { "epoch": 8.98, "learning_rate": 2.2180145539314606e-05, "loss": 0.3901, "step": 881500 }, { "epoch": 8.98, "learning_rate": 2.2173741454379982e-05, "loss": 0.4165, "step": 881600 }, { "epoch": 8.98, "learning_rate": 2.216733775210271e-05, "loss": 0.4139, "step": 881700 }, { "epoch": 8.98, "learning_rate": 2.2160934432795865e-05, "loss": 0.2995, "step": 881800 }, { "epoch": 8.98, "learning_rate": 2.215459552423451e-05, "loss": 0.4039, "step": 881900 }, { "epoch": 8.99, "learning_rate": 2.214819296797027e-05, "loss": 0.3238, "step": 882000 }, { "epoch": 8.99, "learning_rate": 2.2141790795612533e-05, "loss": 0.3972, "step": 882100 }, { "epoch": 8.99, "learning_rate": 2.2135389007474346e-05, "loss": 0.3677, "step": 882200 }, { "epoch": 8.99, "learning_rate": 2.212898760386869e-05, "loss": 0.288, "step": 882300 }, { "epoch": 8.99, "learning_rate": 2.212258658510857e-05, "loss": 0.3738, "step": 882400 }, { "epoch": 8.99, "learning_rate": 2.2116185951506962e-05, "loss": 0.3134, "step": 882500 }, { "epoch": 8.99, "learning_rate": 2.2109785703376805e-05, "loss": 0.4154, "step": 882600 }, { "epoch": 8.99, "learning_rate": 2.210338584103104e-05, "loss": 0.3371, "step": 882700 }, { "epoch": 8.99, "learning_rate": 2.2096986364782596e-05, "loss": 0.36, "step": 882800 }, { "epoch": 9.0, "learning_rate": 2.2090587274944346e-05, "loss": 0.3375, "step": 882900 }, { "epoch": 9.0, "learning_rate": 2.2084188571829176e-05, "loss": 0.3393, "step": 883000 }, { "epoch": 9.0, "learning_rate": 2.2077790255749953e-05, "loss": 0.4107, "step": 883100 }, { "epoch": 9.0, "learning_rate": 2.20713923270195e-05, "loss": 0.3377, "step": 883200 }, { "epoch": 9.0, "learning_rate": 2.2064994785950647e-05, "loss": 0.2958, "step": 883300 }, { "epoch": 9.0, "learning_rate": 2.2058597632856195e-05, "loss": 0.3808, "step": 883400 }, { "epoch": 9.0, "learning_rate": 2.2052200868048945e-05, "loss": 0.3802, "step": 883500 }, { "epoch": 9.0, "learning_rate": 2.204580449184162e-05, "loss": 0.3254, "step": 883600 }, { "epoch": 9.0, "learning_rate": 2.2039408504546992e-05, "loss": 0.3308, "step": 883700 }, { "epoch": 9.0, "learning_rate": 2.203301290647779e-05, "loss": 0.3309, "step": 883800 }, { "epoch": 9.01, "learning_rate": 2.2026617697946712e-05, "loss": 0.3307, "step": 883900 }, { "epoch": 9.01, "learning_rate": 2.2020222879266437e-05, "loss": 0.2735, "step": 884000 }, { "epoch": 9.01, "learning_rate": 2.2013828450749656e-05, "loss": 0.389, "step": 884100 }, { "epoch": 9.01, "learning_rate": 2.2007434412708992e-05, "loss": 0.2833, "step": 884200 }, { "epoch": 9.01, "learning_rate": 2.2001040765457092e-05, "loss": 0.4388, "step": 884300 }, { "epoch": 9.01, "learning_rate": 2.199464750930657e-05, "loss": 0.3243, "step": 884400 }, { "epoch": 9.01, "learning_rate": 2.198825464457e-05, "loss": 0.3244, "step": 884500 }, { "epoch": 9.01, "learning_rate": 2.198186217155997e-05, "loss": 0.3891, "step": 884600 }, { "epoch": 9.01, "learning_rate": 2.197547009058904e-05, "loss": 0.3369, "step": 884700 }, { "epoch": 9.01, "learning_rate": 2.1969078401969723e-05, "loss": 0.4376, "step": 884800 }, { "epoch": 9.02, "learning_rate": 2.1962687106014546e-05, "loss": 0.4019, "step": 884900 }, { "epoch": 9.02, "learning_rate": 2.1956296203036014e-05, "loss": 0.3237, "step": 885000 }, { "epoch": 9.02, "learning_rate": 2.1949905693346586e-05, "loss": 0.3623, "step": 885100 }, { "epoch": 9.02, "learning_rate": 2.194351557725873e-05, "loss": 0.3407, "step": 885200 }, { "epoch": 9.02, "learning_rate": 2.1937189750355728e-05, "loss": 0.3451, "step": 885300 }, { "epoch": 9.02, "learning_rate": 2.1930800418464498e-05, "loss": 0.3438, "step": 885400 }, { "epoch": 9.02, "learning_rate": 2.192441148110899e-05, "loss": 0.3366, "step": 885500 }, { "epoch": 9.02, "learning_rate": 2.1918022938601545e-05, "loss": 0.3374, "step": 885600 }, { "epoch": 9.02, "learning_rate": 2.1911634791254556e-05, "loss": 0.3125, "step": 885700 }, { "epoch": 9.02, "learning_rate": 2.1905247039380358e-05, "loss": 0.3958, "step": 885800 }, { "epoch": 9.03, "learning_rate": 2.1898859683291283e-05, "loss": 0.3175, "step": 885900 }, { "epoch": 9.03, "learning_rate": 2.1892472723299612e-05, "loss": 0.3339, "step": 886000 }, { "epoch": 9.03, "learning_rate": 2.1886086159717643e-05, "loss": 0.2993, "step": 886100 }, { "epoch": 9.03, "learning_rate": 2.1879699992857645e-05, "loss": 0.3254, "step": 886200 }, { "epoch": 9.03, "learning_rate": 2.187331422303185e-05, "loss": 0.3495, "step": 886300 }, { "epoch": 9.03, "learning_rate": 2.1866928850552497e-05, "loss": 0.2787, "step": 886400 }, { "epoch": 9.03, "learning_rate": 2.1860543875731795e-05, "loss": 0.2362, "step": 886500 }, { "epoch": 9.03, "learning_rate": 2.1854159298881917e-05, "loss": 0.284, "step": 886600 }, { "epoch": 9.03, "learning_rate": 2.1847775120315033e-05, "loss": 0.2854, "step": 886700 }, { "epoch": 9.03, "learning_rate": 2.18413913403433e-05, "loss": 0.3182, "step": 886800 }, { "epoch": 9.04, "learning_rate": 2.183500795927884e-05, "loss": 0.3598, "step": 886900 }, { "epoch": 9.04, "learning_rate": 2.1828624977433763e-05, "loss": 0.3338, "step": 887000 }, { "epoch": 9.04, "learning_rate": 2.1822242395120176e-05, "loss": 0.3546, "step": 887100 }, { "epoch": 9.04, "learning_rate": 2.1815860212650118e-05, "loss": 0.3045, "step": 887200 }, { "epoch": 9.04, "learning_rate": 2.1809478430335653e-05, "loss": 0.3488, "step": 887300 }, { "epoch": 9.04, "learning_rate": 2.180309704848882e-05, "loss": 0.2899, "step": 887400 }, { "epoch": 9.04, "learning_rate": 2.1796716067421625e-05, "loss": 0.3058, "step": 887500 }, { "epoch": 9.04, "learning_rate": 2.1790335487446054e-05, "loss": 0.3558, "step": 887600 }, { "epoch": 9.04, "learning_rate": 2.178395530887409e-05, "loss": 0.2944, "step": 887700 }, { "epoch": 9.05, "learning_rate": 2.1777575532017692e-05, "loss": 0.3304, "step": 887800 }, { "epoch": 9.05, "learning_rate": 2.1771196157188768e-05, "loss": 0.345, "step": 887900 }, { "epoch": 9.05, "learning_rate": 2.1764817184699243e-05, "loss": 0.3127, "step": 888000 }, { "epoch": 9.05, "learning_rate": 2.175843861486102e-05, "loss": 0.2735, "step": 888100 }, { "epoch": 9.05, "learning_rate": 2.175206044798596e-05, "loss": 0.3153, "step": 888200 }, { "epoch": 9.05, "learning_rate": 2.1745682684385923e-05, "loss": 0.2883, "step": 888300 }, { "epoch": 9.05, "learning_rate": 2.1739305324372755e-05, "loss": 0.2833, "step": 888400 }, { "epoch": 9.05, "learning_rate": 2.1732928368258246e-05, "loss": 0.2982, "step": 888500 }, { "epoch": 9.05, "learning_rate": 2.172655181635421e-05, "loss": 0.3608, "step": 888600 }, { "epoch": 9.05, "learning_rate": 2.172017566897242e-05, "loss": 0.3856, "step": 888700 }, { "epoch": 9.06, "learning_rate": 2.171379992642462e-05, "loss": 0.3354, "step": 888800 }, { "epoch": 9.06, "learning_rate": 2.1707424589022555e-05, "loss": 0.351, "step": 888900 }, { "epoch": 9.06, "learning_rate": 2.170104965707795e-05, "loss": 0.295, "step": 889000 }, { "epoch": 9.06, "learning_rate": 2.1694675130902487e-05, "loss": 0.3501, "step": 889100 }, { "epoch": 9.06, "learning_rate": 2.168830101080784e-05, "loss": 0.3283, "step": 889200 }, { "epoch": 9.06, "learning_rate": 2.1681927297105673e-05, "loss": 0.3277, "step": 889300 }, { "epoch": 9.06, "learning_rate": 2.1675553990107625e-05, "loss": 0.3587, "step": 889400 }, { "epoch": 9.06, "learning_rate": 2.166918109012531e-05, "loss": 0.3034, "step": 889500 }, { "epoch": 9.06, "learning_rate": 2.166280859747032e-05, "loss": 0.3152, "step": 889600 }, { "epoch": 9.06, "learning_rate": 2.1656436512454245e-05, "loss": 0.3185, "step": 889700 }, { "epoch": 9.07, "learning_rate": 2.1650064835388626e-05, "loss": 0.3244, "step": 889800 }, { "epoch": 9.07, "learning_rate": 2.1643693566585006e-05, "loss": 0.3147, "step": 889900 }, { "epoch": 9.07, "learning_rate": 2.1637322706354907e-05, "loss": 0.3203, "step": 890000 }, { "epoch": 9.07, "learning_rate": 2.1630952255009815e-05, "loss": 0.3263, "step": 890100 }, { "epoch": 9.07, "learning_rate": 2.1624582212861213e-05, "loss": 0.3388, "step": 890200 }, { "epoch": 9.07, "learning_rate": 2.1618212580220573e-05, "loss": 0.3537, "step": 890300 }, { "epoch": 9.07, "learning_rate": 2.1611843357399307e-05, "loss": 0.2945, "step": 890400 }, { "epoch": 9.07, "learning_rate": 2.160547454470884e-05, "loss": 0.3448, "step": 890500 }, { "epoch": 9.07, "learning_rate": 2.1599106142460582e-05, "loss": 0.3481, "step": 890600 }, { "epoch": 9.07, "learning_rate": 2.159273815096589e-05, "loss": 0.4116, "step": 890700 }, { "epoch": 9.08, "learning_rate": 2.158637057053613e-05, "loss": 0.3605, "step": 890800 }, { "epoch": 9.08, "learning_rate": 2.1580003401482655e-05, "loss": 0.3257, "step": 890900 }, { "epoch": 9.08, "learning_rate": 2.157363664411675e-05, "loss": 0.2823, "step": 891000 }, { "epoch": 9.08, "learning_rate": 2.156727029874973e-05, "loss": 0.3674, "step": 891100 }, { "epoch": 9.08, "learning_rate": 2.1560904365692868e-05, "loss": 0.3899, "step": 891200 }, { "epoch": 9.08, "learning_rate": 2.1554538845257426e-05, "loss": 0.3276, "step": 891300 }, { "epoch": 9.08, "learning_rate": 2.154817373775463e-05, "loss": 0.3276, "step": 891400 }, { "epoch": 9.08, "learning_rate": 2.1541809043495696e-05, "loss": 0.3679, "step": 891500 }, { "epoch": 9.08, "learning_rate": 2.153544476279184e-05, "loss": 0.4239, "step": 891600 }, { "epoch": 9.08, "learning_rate": 2.1529080895954208e-05, "loss": 0.3851, "step": 891700 }, { "epoch": 9.09, "learning_rate": 2.1522717443293967e-05, "loss": 0.3356, "step": 891800 }, { "epoch": 9.09, "learning_rate": 2.1516418033451237e-05, "loss": 0.3124, "step": 891900 }, { "epoch": 9.09, "learning_rate": 2.1510055405929633e-05, "loss": 0.3814, "step": 892000 }, { "epoch": 9.09, "learning_rate": 2.1503693193515657e-05, "loss": 0.2813, "step": 892100 }, { "epoch": 9.09, "learning_rate": 2.1497331396520378e-05, "loss": 0.3414, "step": 892200 }, { "epoch": 9.09, "learning_rate": 2.1490970015254846e-05, "loss": 0.3077, "step": 892300 }, { "epoch": 9.09, "learning_rate": 2.148460905003012e-05, "loss": 0.3334, "step": 892400 }, { "epoch": 9.09, "learning_rate": 2.147824850115718e-05, "loss": 0.3069, "step": 892500 }, { "epoch": 9.09, "learning_rate": 2.1471888368947027e-05, "loss": 0.3017, "step": 892600 }, { "epoch": 9.09, "learning_rate": 2.1465528653710653e-05, "loss": 0.3992, "step": 892700 }, { "epoch": 9.1, "learning_rate": 2.1459169355758986e-05, "loss": 0.2886, "step": 892800 }, { "epoch": 9.1, "learning_rate": 2.1452810475402974e-05, "loss": 0.3876, "step": 892900 }, { "epoch": 9.1, "learning_rate": 2.144645201295353e-05, "loss": 0.3238, "step": 893000 }, { "epoch": 9.1, "learning_rate": 2.1440093968721528e-05, "loss": 0.3607, "step": 893100 }, { "epoch": 9.1, "learning_rate": 2.1433736343017843e-05, "loss": 0.3252, "step": 893200 }, { "epoch": 9.1, "learning_rate": 2.1427379136153343e-05, "loss": 0.3228, "step": 893300 }, { "epoch": 9.1, "learning_rate": 2.1421022348438835e-05, "loss": 0.3175, "step": 893400 }, { "epoch": 9.1, "learning_rate": 2.141466598018514e-05, "loss": 0.357, "step": 893500 }, { "epoch": 9.1, "learning_rate": 2.140831003170304e-05, "loss": 0.3736, "step": 893600 }, { "epoch": 9.11, "learning_rate": 2.1401954503303325e-05, "loss": 0.3691, "step": 893700 }, { "epoch": 9.11, "learning_rate": 2.1395599395296707e-05, "loss": 0.3348, "step": 893800 }, { "epoch": 9.11, "learning_rate": 2.138924470799393e-05, "loss": 0.3403, "step": 893900 }, { "epoch": 9.11, "learning_rate": 2.138289044170571e-05, "loss": 0.3537, "step": 894000 }, { "epoch": 9.11, "learning_rate": 2.1376536596742717e-05, "loss": 0.3755, "step": 894100 }, { "epoch": 9.11, "learning_rate": 2.137018317341562e-05, "loss": 0.3867, "step": 894200 }, { "epoch": 9.11, "learning_rate": 2.1363830172035077e-05, "loss": 0.294, "step": 894300 }, { "epoch": 9.11, "learning_rate": 2.1357477592911693e-05, "loss": 0.3694, "step": 894400 }, { "epoch": 9.11, "learning_rate": 2.1351125436356075e-05, "loss": 0.3327, "step": 894500 }, { "epoch": 9.11, "learning_rate": 2.1344773702678816e-05, "loss": 0.3944, "step": 894600 }, { "epoch": 9.12, "learning_rate": 2.1338422392190467e-05, "loss": 0.389, "step": 894700 }, { "epoch": 9.12, "learning_rate": 2.133207150520157e-05, "loss": 0.3491, "step": 894800 }, { "epoch": 9.12, "learning_rate": 2.1325721042022665e-05, "loss": 0.366, "step": 894900 }, { "epoch": 9.12, "learning_rate": 2.131937100296422e-05, "loss": 0.3215, "step": 895000 }, { "epoch": 9.12, "learning_rate": 2.1313021388336734e-05, "loss": 0.3427, "step": 895100 }, { "epoch": 9.12, "learning_rate": 2.1306672198450655e-05, "loss": 0.3093, "step": 895200 }, { "epoch": 9.12, "learning_rate": 2.1300323433616436e-05, "loss": 0.2929, "step": 895300 }, { "epoch": 9.12, "learning_rate": 2.1293975094144474e-05, "loss": 0.3055, "step": 895400 }, { "epoch": 9.12, "learning_rate": 2.1287627180345183e-05, "loss": 0.3795, "step": 895500 }, { "epoch": 9.12, "learning_rate": 2.128127969252894e-05, "loss": 0.3204, "step": 895600 }, { "epoch": 9.13, "learning_rate": 2.1274932631006074e-05, "loss": 0.3132, "step": 895700 }, { "epoch": 9.13, "learning_rate": 2.1268649460323425e-05, "loss": 0.3137, "step": 895800 }, { "epoch": 9.13, "learning_rate": 2.126230324804765e-05, "loss": 0.342, "step": 895900 }, { "epoch": 9.13, "learning_rate": 2.1256020918727882e-05, "loss": 0.346, "step": 896000 }, { "epoch": 9.13, "learning_rate": 2.1249675556927977e-05, "loss": 0.2847, "step": 896100 }, { "epoch": 9.13, "learning_rate": 2.1243330622966738e-05, "loss": 0.2751, "step": 896200 }, { "epoch": 9.13, "learning_rate": 2.1236986117154352e-05, "loss": 0.3403, "step": 896300 }, { "epoch": 9.13, "learning_rate": 2.123064203980106e-05, "loss": 0.3621, "step": 896400 }, { "epoch": 9.13, "learning_rate": 2.1224298391217034e-05, "loss": 0.3389, "step": 896500 }, { "epoch": 9.13, "learning_rate": 2.1217955171712453e-05, "loss": 0.3517, "step": 896600 }, { "epoch": 9.14, "learning_rate": 2.1211612381597448e-05, "loss": 0.3359, "step": 896700 }, { "epoch": 9.14, "learning_rate": 2.1205270021182152e-05, "loss": 0.3247, "step": 896800 }, { "epoch": 9.14, "learning_rate": 2.119892809077668e-05, "loss": 0.3571, "step": 896900 }, { "epoch": 9.14, "learning_rate": 2.1192586590691097e-05, "loss": 0.3163, "step": 897000 }, { "epoch": 9.14, "learning_rate": 2.118624552123546e-05, "loss": 0.2878, "step": 897100 }, { "epoch": 9.14, "learning_rate": 2.117990488271983e-05, "loss": 0.3187, "step": 897200 }, { "epoch": 9.14, "learning_rate": 2.1173564675454208e-05, "loss": 0.3617, "step": 897300 }, { "epoch": 9.14, "learning_rate": 2.11672248997486e-05, "loss": 0.3062, "step": 897400 }, { "epoch": 9.14, "learning_rate": 2.1160885555912997e-05, "loss": 0.3277, "step": 897500 }, { "epoch": 9.14, "learning_rate": 2.115454664425733e-05, "loss": 0.3038, "step": 897600 }, { "epoch": 9.15, "learning_rate": 2.114820816509154e-05, "loss": 0.2859, "step": 897700 }, { "epoch": 9.15, "learning_rate": 2.114187011872556e-05, "loss": 0.3145, "step": 897800 }, { "epoch": 9.15, "learning_rate": 2.1135532505469258e-05, "loss": 0.3544, "step": 897900 }, { "epoch": 9.15, "learning_rate": 2.1129195325632513e-05, "loss": 0.3418, "step": 898000 }, { "epoch": 9.15, "learning_rate": 2.1122858579525183e-05, "loss": 0.4104, "step": 898100 }, { "epoch": 9.15, "learning_rate": 2.11165222674571e-05, "loss": 0.3477, "step": 898200 }, { "epoch": 9.15, "learning_rate": 2.111018638973806e-05, "loss": 0.289, "step": 898300 }, { "epoch": 9.15, "learning_rate": 2.1103850946677845e-05, "loss": 0.3994, "step": 898400 }, { "epoch": 9.15, "learning_rate": 2.1097515938586242e-05, "loss": 0.418, "step": 898500 }, { "epoch": 9.16, "learning_rate": 2.109124470934547e-05, "loss": 0.3169, "step": 898600 }, { "epoch": 9.16, "learning_rate": 2.1084910567762853e-05, "loss": 0.3689, "step": 898700 }, { "epoch": 9.16, "learning_rate": 2.1078576862074907e-05, "loss": 0.3391, "step": 898800 }, { "epoch": 9.16, "learning_rate": 2.1072243592591313e-05, "loss": 0.2758, "step": 898900 }, { "epoch": 9.16, "learning_rate": 2.106591075962174e-05, "loss": 0.2883, "step": 899000 }, { "epoch": 9.16, "learning_rate": 2.1059578363475804e-05, "loss": 0.3156, "step": 899100 }, { "epoch": 9.16, "learning_rate": 2.1053246404463134e-05, "loss": 0.3305, "step": 899200 }, { "epoch": 9.16, "learning_rate": 2.1046914882893332e-05, "loss": 0.3461, "step": 899300 }, { "epoch": 9.16, "learning_rate": 2.104058379907595e-05, "loss": 0.3643, "step": 899400 }, { "epoch": 9.16, "learning_rate": 2.1034253153320554e-05, "loss": 0.3445, "step": 899500 }, { "epoch": 9.17, "learning_rate": 2.1027922945936694e-05, "loss": 0.3085, "step": 899600 }, { "epoch": 9.17, "learning_rate": 2.1021593177233838e-05, "loss": 0.2875, "step": 899700 }, { "epoch": 9.17, "learning_rate": 2.1015263847521503e-05, "loss": 0.2734, "step": 899800 }, { "epoch": 9.17, "learning_rate": 2.1008934957109145e-05, "loss": 0.3414, "step": 899900 }, { "epoch": 9.17, "learning_rate": 2.100260650630621e-05, "loss": 0.334, "step": 900000 }, { "epoch": 9.17, "learning_rate": 2.0996278495422124e-05, "loss": 0.3778, "step": 900100 }, { "epoch": 9.17, "learning_rate": 2.0989950924766303e-05, "loss": 0.3103, "step": 900200 }, { "epoch": 9.17, "learning_rate": 2.0983623794648096e-05, "loss": 0.3198, "step": 900300 }, { "epoch": 9.17, "learning_rate": 2.0977297105376884e-05, "loss": 0.2932, "step": 900400 }, { "epoch": 9.17, "learning_rate": 2.0970970857261992e-05, "loss": 0.3217, "step": 900500 }, { "epoch": 9.18, "learning_rate": 2.0964645050612752e-05, "loss": 0.3642, "step": 900600 }, { "epoch": 9.18, "learning_rate": 2.095831968573844e-05, "loss": 0.3612, "step": 900700 }, { "epoch": 9.18, "learning_rate": 2.095199476294834e-05, "loss": 0.3448, "step": 900800 }, { "epoch": 9.18, "learning_rate": 2.0945670282551715e-05, "loss": 0.3051, "step": 900900 }, { "epoch": 9.18, "learning_rate": 2.0939346244857764e-05, "loss": 0.3763, "step": 901000 }, { "epoch": 9.18, "learning_rate": 2.0933022650175712e-05, "loss": 0.3157, "step": 901100 }, { "epoch": 9.18, "learning_rate": 2.092669949881475e-05, "loss": 0.3653, "step": 901200 }, { "epoch": 9.18, "learning_rate": 2.0920376791084027e-05, "loss": 0.3389, "step": 901300 }, { "epoch": 9.18, "learning_rate": 2.0914054527292695e-05, "loss": 0.2971, "step": 901400 }, { "epoch": 9.18, "learning_rate": 2.090773270774989e-05, "loss": 0.3478, "step": 901500 }, { "epoch": 9.19, "learning_rate": 2.0901411332764682e-05, "loss": 0.2938, "step": 901600 }, { "epoch": 9.19, "learning_rate": 2.0895090402646163e-05, "loss": 0.2879, "step": 901700 }, { "epoch": 9.19, "learning_rate": 2.0888769917703394e-05, "loss": 0.3268, "step": 901800 }, { "epoch": 9.19, "learning_rate": 2.0882449878245396e-05, "loss": 0.3666, "step": 901900 }, { "epoch": 9.19, "learning_rate": 2.087613028458119e-05, "loss": 0.2863, "step": 902000 }, { "epoch": 9.19, "learning_rate": 2.086981113701977e-05, "loss": 0.3884, "step": 902100 }, { "epoch": 9.19, "learning_rate": 2.0863492435870096e-05, "loss": 0.3236, "step": 902200 }, { "epoch": 9.19, "learning_rate": 2.085717418144111e-05, "loss": 0.3201, "step": 902300 }, { "epoch": 9.19, "learning_rate": 2.0850856374041754e-05, "loss": 0.3157, "step": 902400 }, { "epoch": 9.19, "learning_rate": 2.0844539013980923e-05, "loss": 0.2792, "step": 902500 }, { "epoch": 9.2, "learning_rate": 2.083822210156749e-05, "loss": 0.3213, "step": 902600 }, { "epoch": 9.2, "learning_rate": 2.0831905637110322e-05, "loss": 0.2996, "step": 902700 }, { "epoch": 9.2, "learning_rate": 2.0825589620918274e-05, "loss": 0.3459, "step": 902800 }, { "epoch": 9.2, "learning_rate": 2.0819274053300125e-05, "loss": 0.3258, "step": 902900 }, { "epoch": 9.2, "learning_rate": 2.081295893456469e-05, "loss": 0.3304, "step": 903000 }, { "epoch": 9.2, "learning_rate": 2.080664426502074e-05, "loss": 0.3157, "step": 903100 }, { "epoch": 9.2, "learning_rate": 2.0800330044977023e-05, "loss": 0.3705, "step": 903200 }, { "epoch": 9.2, "learning_rate": 2.0794016274742263e-05, "loss": 0.331, "step": 903300 }, { "epoch": 9.2, "learning_rate": 2.078770295462518e-05, "loss": 0.3201, "step": 903400 }, { "epoch": 9.21, "learning_rate": 2.078139008493443e-05, "loss": 0.3381, "step": 903500 }, { "epoch": 9.21, "learning_rate": 2.07750776659787e-05, "loss": 0.2666, "step": 903600 }, { "epoch": 9.21, "learning_rate": 2.076876569806662e-05, "loss": 0.3046, "step": 903700 }, { "epoch": 9.21, "learning_rate": 2.07624541815068e-05, "loss": 0.3493, "step": 903800 }, { "epoch": 9.21, "learning_rate": 2.075614311660785e-05, "loss": 0.3168, "step": 903900 }, { "epoch": 9.21, "learning_rate": 2.0749832503678333e-05, "loss": 0.3286, "step": 904000 }, { "epoch": 9.21, "learning_rate": 2.074352234302682e-05, "loss": 0.3193, "step": 904100 }, { "epoch": 9.21, "learning_rate": 2.0737212634961815e-05, "loss": 0.3513, "step": 904200 }, { "epoch": 9.21, "learning_rate": 2.0730903379791826e-05, "loss": 0.335, "step": 904300 }, { "epoch": 9.21, "learning_rate": 2.072459457782536e-05, "loss": 0.2932, "step": 904400 }, { "epoch": 9.22, "learning_rate": 2.0718286229370863e-05, "loss": 0.3319, "step": 904500 }, { "epoch": 9.22, "learning_rate": 2.071197833473677e-05, "loss": 0.2998, "step": 904600 }, { "epoch": 9.22, "learning_rate": 2.070567089423153e-05, "loss": 0.2895, "step": 904700 }, { "epoch": 9.22, "learning_rate": 2.0699363908163497e-05, "loss": 0.3352, "step": 904800 }, { "epoch": 9.22, "learning_rate": 2.0693057376841064e-05, "loss": 0.3524, "step": 904900 }, { "epoch": 9.22, "learning_rate": 2.06867513005726e-05, "loss": 0.3333, "step": 905000 }, { "epoch": 9.22, "learning_rate": 2.0680445679666405e-05, "loss": 0.3793, "step": 905100 }, { "epoch": 9.22, "learning_rate": 2.0674140514430805e-05, "loss": 0.3032, "step": 905200 }, { "epoch": 9.22, "learning_rate": 2.066783580517409e-05, "loss": 0.3082, "step": 905300 }, { "epoch": 9.22, "learning_rate": 2.0661531552204502e-05, "loss": 0.3306, "step": 905400 }, { "epoch": 9.23, "learning_rate": 2.065522775583029e-05, "loss": 0.3505, "step": 905500 }, { "epoch": 9.23, "learning_rate": 2.064892441635968e-05, "loss": 0.3569, "step": 905600 }, { "epoch": 9.23, "learning_rate": 2.064262153410086e-05, "loss": 0.344, "step": 905700 }, { "epoch": 9.23, "learning_rate": 2.0636319109362e-05, "loss": 0.3577, "step": 905800 }, { "epoch": 9.23, "learning_rate": 2.0630017142451254e-05, "loss": 0.3228, "step": 905900 }, { "epoch": 9.23, "learning_rate": 2.062371563367677e-05, "loss": 0.3035, "step": 906000 }, { "epoch": 9.23, "learning_rate": 2.0617414583346622e-05, "loss": 0.3536, "step": 906100 }, { "epoch": 9.23, "learning_rate": 2.0611113991768908e-05, "loss": 0.2919, "step": 906200 }, { "epoch": 9.23, "learning_rate": 2.06048138592517e-05, "loss": 0.3205, "step": 906300 }, { "epoch": 9.23, "learning_rate": 2.059851418610302e-05, "loss": 0.3169, "step": 906400 }, { "epoch": 9.24, "learning_rate": 2.059221497263089e-05, "loss": 0.3428, "step": 906500 }, { "epoch": 9.24, "learning_rate": 2.058597920440025e-05, "loss": 0.3203, "step": 906600 }, { "epoch": 9.24, "learning_rate": 2.0579743887299284e-05, "loss": 0.3546, "step": 906700 }, { "epoch": 9.24, "learning_rate": 2.0573446045489658e-05, "loss": 0.3498, "step": 906800 }, { "epoch": 9.24, "learning_rate": 2.0567148664582263e-05, "loss": 0.3282, "step": 906900 }, { "epoch": 9.24, "learning_rate": 2.0560851744885007e-05, "loss": 0.3148, "step": 907000 }, { "epoch": 9.24, "learning_rate": 2.0554555286705763e-05, "loss": 0.3111, "step": 907100 }, { "epoch": 9.24, "learning_rate": 2.0548259290352404e-05, "loss": 0.3779, "step": 907200 }, { "epoch": 9.24, "learning_rate": 2.0541963756132776e-05, "loss": 0.3257, "step": 907300 }, { "epoch": 9.24, "learning_rate": 2.053566868435466e-05, "loss": 0.3249, "step": 907400 }, { "epoch": 9.25, "learning_rate": 2.0529374075325867e-05, "loss": 0.3524, "step": 907500 }, { "epoch": 9.25, "learning_rate": 2.0523079929354168e-05, "loss": 0.3021, "step": 907600 }, { "epoch": 9.25, "learning_rate": 2.0516786246747303e-05, "loss": 0.2624, "step": 907700 }, { "epoch": 9.25, "learning_rate": 2.0510493027812995e-05, "loss": 0.2991, "step": 907800 }, { "epoch": 9.25, "learning_rate": 2.0504200272858958e-05, "loss": 0.2984, "step": 907900 }, { "epoch": 9.25, "learning_rate": 2.049790798219285e-05, "loss": 0.3312, "step": 908000 }, { "epoch": 9.25, "learning_rate": 2.0491616156122337e-05, "loss": 0.3694, "step": 908100 }, { "epoch": 9.25, "learning_rate": 2.0485324794955055e-05, "loss": 0.2578, "step": 908200 }, { "epoch": 9.25, "learning_rate": 2.0479033898998603e-05, "loss": 0.305, "step": 908300 }, { "epoch": 9.25, "learning_rate": 2.0472743468560577e-05, "loss": 0.4065, "step": 908400 }, { "epoch": 9.26, "learning_rate": 2.0466453503948553e-05, "loss": 0.3144, "step": 908500 }, { "epoch": 9.26, "learning_rate": 2.0460164005470043e-05, "loss": 0.3224, "step": 908600 }, { "epoch": 9.26, "learning_rate": 2.0453937861443067e-05, "loss": 0.3921, "step": 908700 }, { "epoch": 9.26, "learning_rate": 2.0447649291485146e-05, "loss": 0.3147, "step": 908800 }, { "epoch": 9.26, "learning_rate": 2.0441361188580173e-05, "loss": 0.3601, "step": 908900 }, { "epoch": 9.26, "learning_rate": 2.0435073553035604e-05, "loss": 0.3046, "step": 909000 }, { "epoch": 9.26, "learning_rate": 2.0428786385158846e-05, "loss": 0.3536, "step": 909100 }, { "epoch": 9.26, "learning_rate": 2.0422499685257315e-05, "loss": 0.3932, "step": 909200 }, { "epoch": 9.26, "learning_rate": 2.04162134536384e-05, "loss": 0.3, "step": 909300 }, { "epoch": 9.27, "learning_rate": 2.0409927690609448e-05, "loss": 0.3164, "step": 909400 }, { "epoch": 9.27, "learning_rate": 2.0403642396477803e-05, "loss": 0.3422, "step": 909500 }, { "epoch": 9.27, "learning_rate": 2.0397357571550773e-05, "loss": 0.3163, "step": 909600 }, { "epoch": 9.27, "learning_rate": 2.0391073216135667e-05, "loss": 0.3678, "step": 909700 }, { "epoch": 9.27, "learning_rate": 2.038478933053973e-05, "loss": 0.288, "step": 909800 }, { "epoch": 9.27, "learning_rate": 2.0378505915070207e-05, "loss": 0.3197, "step": 909900 }, { "epoch": 9.27, "learning_rate": 2.0372222970034337e-05, "loss": 0.336, "step": 910000 }, { "epoch": 9.27, "learning_rate": 2.0365940495739297e-05, "loss": 0.3721, "step": 910100 }, { "epoch": 9.27, "learning_rate": 2.0359658492492277e-05, "loss": 0.3335, "step": 910200 }, { "epoch": 9.27, "learning_rate": 2.0353376960600443e-05, "loss": 0.3519, "step": 910300 }, { "epoch": 9.28, "learning_rate": 2.034709590037089e-05, "loss": 0.3604, "step": 910400 }, { "epoch": 9.28, "learning_rate": 2.0340815312110744e-05, "loss": 0.2631, "step": 910500 }, { "epoch": 9.28, "learning_rate": 2.033453519612709e-05, "loss": 0.3429, "step": 910600 }, { "epoch": 9.28, "learning_rate": 2.032825555272698e-05, "loss": 0.2839, "step": 910700 }, { "epoch": 9.28, "learning_rate": 2.0321976382217455e-05, "loss": 0.3275, "step": 910800 }, { "epoch": 9.28, "learning_rate": 2.0315697684905543e-05, "loss": 0.3996, "step": 910900 }, { "epoch": 9.28, "learning_rate": 2.0309419461098205e-05, "loss": 0.3707, "step": 911000 }, { "epoch": 9.28, "learning_rate": 2.030314171110243e-05, "loss": 0.3409, "step": 911100 }, { "epoch": 9.28, "learning_rate": 2.0296864435225153e-05, "loss": 0.3432, "step": 911200 }, { "epoch": 9.28, "learning_rate": 2.029058763377331e-05, "loss": 0.2944, "step": 911300 }, { "epoch": 9.29, "learning_rate": 2.028431130705378e-05, "loss": 0.3277, "step": 911400 }, { "epoch": 9.29, "learning_rate": 2.027803545537345e-05, "loss": 0.4145, "step": 911500 }, { "epoch": 9.29, "learning_rate": 2.0271760079039177e-05, "loss": 0.3353, "step": 911600 }, { "epoch": 9.29, "learning_rate": 2.026548517835777e-05, "loss": 0.3333, "step": 911700 }, { "epoch": 9.29, "learning_rate": 2.0259210753636047e-05, "loss": 0.3308, "step": 911800 }, { "epoch": 9.29, "learning_rate": 2.0252936805180794e-05, "loss": 0.3806, "step": 911900 }, { "epoch": 9.29, "learning_rate": 2.024666333329876e-05, "loss": 0.3543, "step": 912000 }, { "epoch": 9.29, "learning_rate": 2.0240390338296675e-05, "loss": 0.2934, "step": 912100 }, { "epoch": 9.29, "learning_rate": 2.0234117820481283e-05, "loss": 0.2882, "step": 912200 }, { "epoch": 9.29, "learning_rate": 2.0227845780159235e-05, "loss": 0.3608, "step": 912300 }, { "epoch": 9.3, "learning_rate": 2.0221636930896315e-05, "loss": 0.3361, "step": 912400 }, { "epoch": 9.3, "learning_rate": 2.021536584169837e-05, "loss": 0.3523, "step": 912500 }, { "epoch": 9.3, "learning_rate": 2.0209095230910643e-05, "loss": 0.312, "step": 912600 }, { "epoch": 9.3, "learning_rate": 2.020282509883974e-05, "loss": 0.4134, "step": 912700 }, { "epoch": 9.3, "learning_rate": 2.0196555445792216e-05, "loss": 0.2895, "step": 912800 }, { "epoch": 9.3, "learning_rate": 2.0190286272074623e-05, "loss": 0.3297, "step": 912900 }, { "epoch": 9.3, "learning_rate": 2.0184017577993506e-05, "loss": 0.3511, "step": 913000 }, { "epoch": 9.3, "learning_rate": 2.0177749363855338e-05, "loss": 0.303, "step": 913100 }, { "epoch": 9.3, "learning_rate": 2.017148162996661e-05, "loss": 0.3839, "step": 913200 }, { "epoch": 9.3, "learning_rate": 2.0165214376633782e-05, "loss": 0.34, "step": 913300 }, { "epoch": 9.31, "learning_rate": 2.0158947604163275e-05, "loss": 0.3327, "step": 913400 }, { "epoch": 9.31, "learning_rate": 2.01526813128615e-05, "loss": 0.2973, "step": 913500 }, { "epoch": 9.31, "learning_rate": 2.0146415503034844e-05, "loss": 0.2784, "step": 913600 }, { "epoch": 9.31, "learning_rate": 2.01402128258843e-05, "loss": 0.3444, "step": 913700 }, { "epoch": 9.31, "learning_rate": 2.0133947975104555e-05, "loss": 0.3038, "step": 913800 }, { "epoch": 9.31, "learning_rate": 2.012768360671587e-05, "loss": 0.2758, "step": 913900 }, { "epoch": 9.31, "learning_rate": 2.0121419721024537e-05, "loss": 0.3848, "step": 914000 }, { "epoch": 9.31, "learning_rate": 2.011515631833683e-05, "loss": 0.2813, "step": 914100 }, { "epoch": 9.31, "learning_rate": 2.010889339895899e-05, "loss": 0.3729, "step": 914200 }, { "epoch": 9.32, "learning_rate": 2.0102630963197236e-05, "loss": 0.3408, "step": 914300 }, { "epoch": 9.32, "learning_rate": 2.0096369011357754e-05, "loss": 0.435, "step": 914400 }, { "epoch": 9.32, "learning_rate": 2.0090107543746748e-05, "loss": 0.321, "step": 914500 }, { "epoch": 9.32, "learning_rate": 2.0083846560670327e-05, "loss": 0.3769, "step": 914600 }, { "epoch": 9.32, "learning_rate": 2.0077586062434637e-05, "loss": 0.36, "step": 914700 }, { "epoch": 9.32, "learning_rate": 2.0071326049345778e-05, "loss": 0.3714, "step": 914800 }, { "epoch": 9.32, "learning_rate": 2.0065066521709817e-05, "loss": 0.3316, "step": 914900 }, { "epoch": 9.32, "learning_rate": 2.0058807479832815e-05, "loss": 0.3098, "step": 915000 }, { "epoch": 9.32, "learning_rate": 2.005254892402081e-05, "loss": 0.3316, "step": 915100 }, { "epoch": 9.32, "learning_rate": 2.004629085457979e-05, "loss": 0.3342, "step": 915200 }, { "epoch": 9.33, "learning_rate": 2.0040033271815747e-05, "loss": 0.2706, "step": 915300 }, { "epoch": 9.33, "learning_rate": 2.0033776176034648e-05, "loss": 0.3341, "step": 915400 }, { "epoch": 9.33, "learning_rate": 2.0027519567542408e-05, "loss": 0.3202, "step": 915500 }, { "epoch": 9.33, "learning_rate": 2.0021263446644952e-05, "loss": 0.3026, "step": 915600 }, { "epoch": 9.33, "learning_rate": 2.0015007813648177e-05, "loss": 0.3208, "step": 915700 }, { "epoch": 9.33, "learning_rate": 2.0008752668857916e-05, "loss": 0.2898, "step": 915800 }, { "epoch": 9.33, "learning_rate": 2.0002498012580023e-05, "loss": 0.288, "step": 915900 }, { "epoch": 9.33, "learning_rate": 1.999624384512031e-05, "loss": 0.3118, "step": 916000 }, { "epoch": 9.33, "learning_rate": 1.9989990166784598e-05, "loss": 0.3159, "step": 916100 }, { "epoch": 9.33, "learning_rate": 1.9983736977878614e-05, "loss": 0.318, "step": 916200 }, { "epoch": 9.34, "learning_rate": 1.997748427870812e-05, "loss": 0.3758, "step": 916300 }, { "epoch": 9.34, "learning_rate": 1.9971232069578844e-05, "loss": 0.3612, "step": 916400 }, { "epoch": 9.34, "learning_rate": 1.9964980350796457e-05, "loss": 0.3794, "step": 916500 }, { "epoch": 9.34, "learning_rate": 1.9958729122666642e-05, "loss": 0.2223, "step": 916600 }, { "epoch": 9.34, "learning_rate": 1.9952478385495064e-05, "loss": 0.3282, "step": 916700 }, { "epoch": 9.34, "learning_rate": 1.9946228139587325e-05, "loss": 0.2717, "step": 916800 }, { "epoch": 9.34, "learning_rate": 1.993997838524903e-05, "loss": 0.3397, "step": 916900 }, { "epoch": 9.34, "learning_rate": 1.9933729122785772e-05, "loss": 0.3636, "step": 917000 }, { "epoch": 9.34, "learning_rate": 1.992748035250307e-05, "loss": 0.2541, "step": 917100 }, { "epoch": 9.34, "learning_rate": 1.9921232074706472e-05, "loss": 0.352, "step": 917200 }, { "epoch": 9.35, "learning_rate": 1.9914984289701488e-05, "loss": 0.3271, "step": 917300 }, { "epoch": 9.35, "learning_rate": 1.9908736997793577e-05, "loss": 0.3089, "step": 917400 }, { "epoch": 9.35, "learning_rate": 1.9902490199288208e-05, "loss": 0.3281, "step": 917500 }, { "epoch": 9.35, "learning_rate": 1.9896243894490816e-05, "loss": 0.3559, "step": 917600 }, { "epoch": 9.35, "learning_rate": 1.9889998083706797e-05, "loss": 0.3359, "step": 917700 }, { "epoch": 9.35, "learning_rate": 1.9883752767241535e-05, "loss": 0.4028, "step": 917800 }, { "epoch": 9.35, "learning_rate": 1.9877507945400384e-05, "loss": 0.3127, "step": 917900 }, { "epoch": 9.35, "learning_rate": 1.987126361848871e-05, "loss": 0.3184, "step": 918000 }, { "epoch": 9.35, "learning_rate": 1.9865019786811784e-05, "loss": 0.3315, "step": 918100 }, { "epoch": 9.35, "learning_rate": 1.9858776450674906e-05, "loss": 0.3147, "step": 918200 }, { "epoch": 9.36, "learning_rate": 1.985253361038336e-05, "loss": 0.3388, "step": 918300 }, { "epoch": 9.36, "learning_rate": 1.98463536872268e-05, "loss": 0.3196, "step": 918400 }, { "epoch": 9.36, "learning_rate": 1.9840111834575488e-05, "loss": 0.298, "step": 918500 }, { "epoch": 9.36, "learning_rate": 1.983387047868207e-05, "loss": 0.2837, "step": 918600 }, { "epoch": 9.36, "learning_rate": 1.9827629619851713e-05, "loss": 0.2821, "step": 918700 }, { "epoch": 9.36, "learning_rate": 1.982138925838958e-05, "loss": 0.3031, "step": 918800 }, { "epoch": 9.36, "learning_rate": 1.9815149394600754e-05, "loss": 0.344, "step": 918900 }, { "epoch": 9.36, "learning_rate": 1.980891002879034e-05, "loss": 0.3065, "step": 919000 }, { "epoch": 9.36, "learning_rate": 1.9802671161263425e-05, "loss": 0.3203, "step": 919100 }, { "epoch": 9.36, "learning_rate": 1.9796432792325017e-05, "loss": 0.3317, "step": 919200 }, { "epoch": 9.37, "learning_rate": 1.9790194922280162e-05, "loss": 0.4298, "step": 919300 }, { "epoch": 9.37, "learning_rate": 1.9783957551433856e-05, "loss": 0.3787, "step": 919400 }, { "epoch": 9.37, "learning_rate": 1.9777720680091044e-05, "loss": 0.3159, "step": 919500 }, { "epoch": 9.37, "learning_rate": 1.9771484308556693e-05, "loss": 0.2785, "step": 919600 }, { "epoch": 9.37, "learning_rate": 1.9765248437135723e-05, "loss": 0.3456, "step": 919700 }, { "epoch": 9.37, "learning_rate": 1.975901306613302e-05, "loss": 0.3292, "step": 919800 }, { "epoch": 9.37, "learning_rate": 1.975277819585346e-05, "loss": 0.2588, "step": 919900 }, { "epoch": 9.37, "learning_rate": 1.9746543826601902e-05, "loss": 0.3248, "step": 920000 }, { "epoch": 9.37, "learning_rate": 1.9740309958683165e-05, "loss": 0.2978, "step": 920100 }, { "epoch": 9.38, "learning_rate": 1.9734076592402035e-05, "loss": 0.3046, "step": 920200 }, { "epoch": 9.38, "learning_rate": 1.97278437280633e-05, "loss": 0.3603, "step": 920300 }, { "epoch": 9.38, "learning_rate": 1.972161136597171e-05, "loss": 0.339, "step": 920400 }, { "epoch": 9.38, "learning_rate": 1.9715379506431973e-05, "loss": 0.3629, "step": 920500 }, { "epoch": 9.38, "learning_rate": 1.970914814974881e-05, "loss": 0.3389, "step": 920600 }, { "epoch": 9.38, "learning_rate": 1.9702979602270465e-05, "loss": 0.3156, "step": 920700 }, { "epoch": 9.38, "learning_rate": 1.969674924717827e-05, "loss": 0.3194, "step": 920800 }, { "epoch": 9.38, "learning_rate": 1.9690519395853563e-05, "loss": 0.2644, "step": 920900 }, { "epoch": 9.38, "learning_rate": 1.968429004860092e-05, "loss": 0.3638, "step": 921000 }, { "epoch": 9.38, "learning_rate": 1.9678061205724937e-05, "loss": 0.3224, "step": 921100 }, { "epoch": 9.39, "learning_rate": 1.9671832867530167e-05, "loss": 0.3336, "step": 921200 }, { "epoch": 9.39, "learning_rate": 1.966560503432113e-05, "loss": 0.2763, "step": 921300 }, { "epoch": 9.39, "learning_rate": 1.9659377706402328e-05, "loss": 0.4111, "step": 921400 }, { "epoch": 9.39, "learning_rate": 1.9653150884078262e-05, "loss": 0.3353, "step": 921500 }, { "epoch": 9.39, "learning_rate": 1.964692456765336e-05, "loss": 0.2812, "step": 921600 }, { "epoch": 9.39, "learning_rate": 1.9640698757432065e-05, "loss": 0.3167, "step": 921700 }, { "epoch": 9.39, "learning_rate": 1.963447345371878e-05, "loss": 0.3123, "step": 921800 }, { "epoch": 9.39, "learning_rate": 1.9628248656817883e-05, "loss": 0.3657, "step": 921900 }, { "epoch": 9.39, "learning_rate": 1.962202436703373e-05, "loss": 0.3143, "step": 922000 }, { "epoch": 9.39, "learning_rate": 1.9615800584670664e-05, "loss": 0.3026, "step": 922100 }, { "epoch": 9.4, "learning_rate": 1.960957731003297e-05, "loss": 0.35, "step": 922200 }, { "epoch": 9.4, "learning_rate": 1.9603354543424938e-05, "loss": 0.306, "step": 922300 }, { "epoch": 9.4, "learning_rate": 1.9597132285150823e-05, "loss": 0.3539, "step": 922400 }, { "epoch": 9.4, "learning_rate": 1.9590910535514866e-05, "loss": 0.364, "step": 922500 }, { "epoch": 9.4, "learning_rate": 1.958468929482126e-05, "loss": 0.3137, "step": 922600 }, { "epoch": 9.4, "learning_rate": 1.95785307681669e-05, "loss": 0.3578, "step": 922700 }, { "epoch": 9.4, "learning_rate": 1.9572310541173514e-05, "loss": 0.3345, "step": 922800 }, { "epoch": 9.4, "learning_rate": 1.9566090824031918e-05, "loss": 0.3182, "step": 922900 }, { "epoch": 9.4, "learning_rate": 1.955987161704621e-05, "loss": 0.3324, "step": 923000 }, { "epoch": 9.4, "learning_rate": 1.9553652920520476e-05, "loss": 0.3403, "step": 923100 }, { "epoch": 9.41, "learning_rate": 1.9547434734758772e-05, "loss": 0.2897, "step": 923200 }, { "epoch": 9.41, "learning_rate": 1.9541217060065148e-05, "loss": 0.2998, "step": 923300 }, { "epoch": 9.41, "learning_rate": 1.953499989674358e-05, "loss": 0.3266, "step": 923400 }, { "epoch": 9.41, "learning_rate": 1.9528783245098065e-05, "loss": 0.3917, "step": 923500 }, { "epoch": 9.41, "learning_rate": 1.952256710543257e-05, "loss": 0.3807, "step": 923600 }, { "epoch": 9.41, "learning_rate": 1.9516351478051007e-05, "loss": 0.2952, "step": 923700 }, { "epoch": 9.41, "learning_rate": 1.9510136363257293e-05, "loss": 0.366, "step": 923800 }, { "epoch": 9.41, "learning_rate": 1.9503921761355326e-05, "loss": 0.2897, "step": 923900 }, { "epoch": 9.41, "learning_rate": 1.9497707672648932e-05, "loss": 0.3665, "step": 924000 }, { "epoch": 9.41, "learning_rate": 1.9491494097441964e-05, "loss": 0.3481, "step": 924100 }, { "epoch": 9.42, "learning_rate": 1.9485281036038222e-05, "loss": 0.3293, "step": 924200 }, { "epoch": 9.42, "learning_rate": 1.9479068488741487e-05, "loss": 0.264, "step": 924300 }, { "epoch": 9.42, "learning_rate": 1.9472918573637055e-05, "loss": 0.37, "step": 924400 }, { "epoch": 9.42, "learning_rate": 1.946670705031693e-05, "loss": 0.2919, "step": 924500 }, { "epoch": 9.42, "learning_rate": 1.9460496042011974e-05, "loss": 0.3493, "step": 924600 }, { "epoch": 9.42, "learning_rate": 1.9454285549025882e-05, "loss": 0.3264, "step": 924700 }, { "epoch": 9.42, "learning_rate": 1.944807557166227e-05, "loss": 0.3204, "step": 924800 }, { "epoch": 9.42, "learning_rate": 1.9441866110224802e-05, "loss": 0.3042, "step": 924900 }, { "epoch": 9.42, "learning_rate": 1.9435657165017075e-05, "loss": 0.3913, "step": 925000 }, { "epoch": 9.43, "learning_rate": 1.9429448736342663e-05, "loss": 0.3397, "step": 925100 }, { "epoch": 9.43, "learning_rate": 1.9423240824505124e-05, "loss": 0.3346, "step": 925200 }, { "epoch": 9.43, "learning_rate": 1.9417033429808005e-05, "loss": 0.3326, "step": 925300 }, { "epoch": 9.43, "learning_rate": 1.9410826552554782e-05, "loss": 0.2725, "step": 925400 }, { "epoch": 9.43, "learning_rate": 1.9404620193048945e-05, "loss": 0.2656, "step": 925500 }, { "epoch": 9.43, "learning_rate": 1.939841435159396e-05, "loss": 0.3593, "step": 925600 }, { "epoch": 9.43, "learning_rate": 1.939220902849324e-05, "loss": 0.3433, "step": 925700 }, { "epoch": 9.43, "learning_rate": 1.9386004224050194e-05, "loss": 0.3171, "step": 925800 }, { "epoch": 9.43, "learning_rate": 1.9379799938568204e-05, "loss": 0.3885, "step": 925900 }, { "epoch": 9.43, "learning_rate": 1.9373596172350625e-05, "loss": 0.3114, "step": 926000 }, { "epoch": 9.44, "learning_rate": 1.9367392925700768e-05, "loss": 0.3563, "step": 926100 }, { "epoch": 9.44, "learning_rate": 1.9361190198921947e-05, "loss": 0.3192, "step": 926200 }, { "epoch": 9.44, "learning_rate": 1.935498799231744e-05, "loss": 0.2337, "step": 926300 }, { "epoch": 9.44, "learning_rate": 1.9348786306190486e-05, "loss": 0.3357, "step": 926400 }, { "epoch": 9.44, "learning_rate": 1.9342585140844322e-05, "loss": 0.3395, "step": 926500 }, { "epoch": 9.44, "learning_rate": 1.9336384496582155e-05, "loss": 0.2727, "step": 926600 }, { "epoch": 9.44, "learning_rate": 1.9330184373707138e-05, "loss": 0.2971, "step": 926700 }, { "epoch": 9.44, "learning_rate": 1.9323984772522433e-05, "loss": 0.301, "step": 926800 }, { "epoch": 9.44, "learning_rate": 1.9317785693331167e-05, "loss": 0.2801, "step": 926900 }, { "epoch": 9.44, "learning_rate": 1.9311587136436423e-05, "loss": 0.3746, "step": 927000 }, { "epoch": 9.45, "learning_rate": 1.9305389102141287e-05, "loss": 0.2438, "step": 927100 }, { "epoch": 9.45, "learning_rate": 1.9299191590748796e-05, "loss": 0.3846, "step": 927200 }, { "epoch": 9.45, "learning_rate": 1.9292994602561993e-05, "loss": 0.4086, "step": 927300 }, { "epoch": 9.45, "learning_rate": 1.9286798137883846e-05, "loss": 0.3257, "step": 927400 }, { "epoch": 9.45, "learning_rate": 1.9280602197017334e-05, "loss": 0.3081, "step": 927500 }, { "epoch": 9.45, "learning_rate": 1.9274406780265415e-05, "loss": 0.2826, "step": 927600 }, { "epoch": 9.45, "learning_rate": 1.926821188793099e-05, "loss": 0.3484, "step": 927700 }, { "epoch": 9.45, "learning_rate": 1.9262017520316957e-05, "loss": 0.322, "step": 927800 }, { "epoch": 9.45, "learning_rate": 1.9255823677726202e-05, "loss": 0.3212, "step": 927900 }, { "epoch": 9.45, "learning_rate": 1.9249630360461537e-05, "loss": 0.3369, "step": 928000 }, { "epoch": 9.46, "learning_rate": 1.9243437568825793e-05, "loss": 0.2335, "step": 928100 }, { "epoch": 9.46, "learning_rate": 1.9237245303121768e-05, "loss": 0.2942, "step": 928200 }, { "epoch": 9.46, "learning_rate": 1.9231053563652208e-05, "loss": 0.3172, "step": 928300 }, { "epoch": 9.46, "learning_rate": 1.9224862350719866e-05, "loss": 0.3546, "step": 928400 }, { "epoch": 9.46, "learning_rate": 1.9218671664627465e-05, "loss": 0.3159, "step": 928500 }, { "epoch": 9.46, "learning_rate": 1.921248150567767e-05, "loss": 0.3635, "step": 928600 }, { "epoch": 9.46, "learning_rate": 1.9206291874173148e-05, "loss": 0.3394, "step": 928700 }, { "epoch": 9.46, "learning_rate": 1.920010277041655e-05, "loss": 0.3483, "step": 928800 }, { "epoch": 9.46, "learning_rate": 1.919397607785268e-05, "loss": 0.3327, "step": 928900 }, { "epoch": 9.46, "learning_rate": 1.9187788025214687e-05, "loss": 0.315, "step": 929000 }, { "epoch": 9.47, "learning_rate": 1.9181662373851353e-05, "loss": 0.3735, "step": 929100 }, { "epoch": 9.47, "learning_rate": 1.917547537353013e-05, "loss": 0.2762, "step": 929200 }, { "epoch": 9.47, "learning_rate": 1.9169288902463553e-05, "loss": 0.3147, "step": 929300 }, { "epoch": 9.47, "learning_rate": 1.916310296095411e-05, "loss": 0.3477, "step": 929400 }, { "epoch": 9.47, "learning_rate": 1.915691754930427e-05, "loss": 0.3936, "step": 929500 }, { "epoch": 9.47, "learning_rate": 1.9150732667816453e-05, "loss": 0.3668, "step": 929600 }, { "epoch": 9.47, "learning_rate": 1.9144548316793057e-05, "loss": 0.3583, "step": 929700 }, { "epoch": 9.47, "learning_rate": 1.9138364496536478e-05, "loss": 0.2812, "step": 929800 }, { "epoch": 9.47, "learning_rate": 1.9132181207349052e-05, "loss": 0.3011, "step": 929900 }, { "epoch": 9.48, "learning_rate": 1.9125998449533098e-05, "loss": 0.3456, "step": 930000 }, { "epoch": 9.48, "learning_rate": 1.9119816223390943e-05, "loss": 0.3027, "step": 930100 }, { "epoch": 9.48, "learning_rate": 1.911363452922483e-05, "loss": 0.3307, "step": 930200 }, { "epoch": 9.48, "learning_rate": 1.910745336733703e-05, "loss": 0.3546, "step": 930300 }, { "epoch": 9.48, "learning_rate": 1.9101272738029754e-05, "loss": 0.2646, "step": 930400 }, { "epoch": 9.48, "learning_rate": 1.9095092641605212e-05, "loss": 0.3663, "step": 930500 }, { "epoch": 9.48, "learning_rate": 1.9088913078365552e-05, "loss": 0.3289, "step": 930600 }, { "epoch": 9.48, "learning_rate": 1.9082734048612928e-05, "loss": 0.3208, "step": 930700 }, { "epoch": 9.48, "learning_rate": 1.9076555552649462e-05, "loss": 0.3703, "step": 930800 }, { "epoch": 9.48, "learning_rate": 1.907037759077724e-05, "loss": 0.3086, "step": 930900 }, { "epoch": 9.49, "learning_rate": 1.906420016329833e-05, "loss": 0.3045, "step": 931000 }, { "epoch": 9.49, "learning_rate": 1.9058023270514774e-05, "loss": 0.3831, "step": 931100 }, { "epoch": 9.49, "learning_rate": 1.905184691272858e-05, "loss": 0.3018, "step": 931200 }, { "epoch": 9.49, "learning_rate": 1.9045671090241732e-05, "loss": 0.3416, "step": 931300 }, { "epoch": 9.49, "learning_rate": 1.90394958033562e-05, "loss": 0.3218, "step": 931400 }, { "epoch": 9.49, "learning_rate": 1.903332105237391e-05, "loss": 0.2794, "step": 931500 }, { "epoch": 9.49, "learning_rate": 1.902714683759678e-05, "loss": 0.325, "step": 931600 }, { "epoch": 9.49, "learning_rate": 1.9020973159326693e-05, "loss": 0.3508, "step": 931700 }, { "epoch": 9.49, "learning_rate": 1.9014800017865486e-05, "loss": 0.2985, "step": 931800 }, { "epoch": 9.49, "learning_rate": 1.9008627413515003e-05, "loss": 0.3053, "step": 931900 }, { "epoch": 9.5, "learning_rate": 1.900245534657705e-05, "loss": 0.3645, "step": 932000 }, { "epoch": 9.5, "learning_rate": 1.8996283817353407e-05, "loss": 0.3499, "step": 932100 }, { "epoch": 9.5, "learning_rate": 1.8990112826145805e-05, "loss": 0.3113, "step": 932200 }, { "epoch": 9.5, "learning_rate": 1.898394237325599e-05, "loss": 0.3031, "step": 932300 }, { "epoch": 9.5, "learning_rate": 1.897777245898566e-05, "loss": 0.2854, "step": 932400 }, { "epoch": 9.5, "learning_rate": 1.8971603083636468e-05, "loss": 0.3474, "step": 932500 }, { "epoch": 9.5, "learning_rate": 1.8965434247510074e-05, "loss": 0.31, "step": 932600 }, { "epoch": 9.5, "learning_rate": 1.8959265950908102e-05, "loss": 0.326, "step": 932700 }, { "epoch": 9.5, "learning_rate": 1.8953098194132123e-05, "loss": 0.3732, "step": 932800 }, { "epoch": 9.5, "learning_rate": 1.8946930977483724e-05, "loss": 0.3472, "step": 932900 }, { "epoch": 9.51, "learning_rate": 1.894076430126445e-05, "loss": 0.3234, "step": 933000 }, { "epoch": 9.51, "learning_rate": 1.893459816577579e-05, "loss": 0.3579, "step": 933100 }, { "epoch": 9.51, "learning_rate": 1.8928432571319248e-05, "loss": 0.3296, "step": 933200 }, { "epoch": 9.51, "learning_rate": 1.8922267518196288e-05, "loss": 0.3194, "step": 933300 }, { "epoch": 9.51, "learning_rate": 1.891610300670833e-05, "loss": 0.3488, "step": 933400 }, { "epoch": 9.51, "learning_rate": 1.890993903715679e-05, "loss": 0.3599, "step": 933500 }, { "epoch": 9.51, "learning_rate": 1.8903775609843063e-05, "loss": 0.3465, "step": 933600 }, { "epoch": 9.51, "learning_rate": 1.8897612725068473e-05, "loss": 0.2989, "step": 933700 }, { "epoch": 9.51, "learning_rate": 1.8891450383134364e-05, "loss": 0.371, "step": 933800 }, { "epoch": 9.51, "learning_rate": 1.8885288584342044e-05, "loss": 0.3601, "step": 933900 }, { "epoch": 9.52, "learning_rate": 1.8879127328992788e-05, "loss": 0.3255, "step": 934000 }, { "epoch": 9.52, "learning_rate": 1.8872966617387832e-05, "loss": 0.3259, "step": 934100 }, { "epoch": 9.52, "learning_rate": 1.886680644982841e-05, "loss": 0.3435, "step": 934200 }, { "epoch": 9.52, "learning_rate": 1.8860646826615722e-05, "loss": 0.3123, "step": 934300 }, { "epoch": 9.52, "learning_rate": 1.8854487748050918e-05, "loss": 0.3671, "step": 934400 }, { "epoch": 9.52, "learning_rate": 1.884832921443515e-05, "loss": 0.3207, "step": 934500 }, { "epoch": 9.52, "learning_rate": 1.8842171226069534e-05, "loss": 0.3347, "step": 934600 }, { "epoch": 9.52, "learning_rate": 1.8836013783255157e-05, "loss": 0.3057, "step": 934700 }, { "epoch": 9.52, "learning_rate": 1.8829856886293087e-05, "loss": 0.3647, "step": 934800 }, { "epoch": 9.52, "learning_rate": 1.882370053548436e-05, "loss": 0.2993, "step": 934900 }, { "epoch": 9.53, "learning_rate": 1.881754473112997e-05, "loss": 0.3453, "step": 935000 }, { "epoch": 9.53, "learning_rate": 1.8811389473530914e-05, "loss": 0.381, "step": 935100 }, { "epoch": 9.53, "learning_rate": 1.8805296307384655e-05, "loss": 0.3264, "step": 935200 }, { "epoch": 9.53, "learning_rate": 1.8799142138724033e-05, "loss": 0.339, "step": 935300 }, { "epoch": 9.53, "learning_rate": 1.879298851771852e-05, "loss": 0.3389, "step": 935400 }, { "epoch": 9.53, "learning_rate": 1.878683544466898e-05, "loss": 0.3199, "step": 935500 }, { "epoch": 9.53, "learning_rate": 1.878068291987628e-05, "loss": 0.2764, "step": 935600 }, { "epoch": 9.53, "learning_rate": 1.8774530943641236e-05, "loss": 0.2999, "step": 935700 }, { "epoch": 9.53, "learning_rate": 1.876837951626463e-05, "loss": 0.283, "step": 935800 }, { "epoch": 9.54, "learning_rate": 1.8762228638047237e-05, "loss": 0.3211, "step": 935900 }, { "epoch": 9.54, "learning_rate": 1.8756078309289806e-05, "loss": 0.3113, "step": 936000 }, { "epoch": 9.54, "learning_rate": 1.8749928530293042e-05, "loss": 0.3389, "step": 936100 }, { "epoch": 9.54, "learning_rate": 1.8743779301357634e-05, "loss": 0.3582, "step": 936200 }, { "epoch": 9.54, "learning_rate": 1.8737692106844705e-05, "loss": 0.3761, "step": 936300 }, { "epoch": 9.54, "learning_rate": 1.8731543973425855e-05, "loss": 0.3622, "step": 936400 }, { "epoch": 9.54, "learning_rate": 1.872539639096726e-05, "loss": 0.3108, "step": 936500 }, { "epoch": 9.54, "learning_rate": 1.87192493597695e-05, "loss": 0.3457, "step": 936600 }, { "epoch": 9.54, "learning_rate": 1.871310288013312e-05, "loss": 0.2999, "step": 936700 }, { "epoch": 9.54, "learning_rate": 1.8706956952358654e-05, "loss": 0.2616, "step": 936800 }, { "epoch": 9.55, "learning_rate": 1.870081157674661e-05, "loss": 0.2915, "step": 936900 }, { "epoch": 9.55, "learning_rate": 1.869466675359744e-05, "loss": 0.3232, "step": 937000 }, { "epoch": 9.55, "learning_rate": 1.8688522483211598e-05, "loss": 0.2869, "step": 937100 }, { "epoch": 9.55, "learning_rate": 1.8682378765889507e-05, "loss": 0.334, "step": 937200 }, { "epoch": 9.55, "learning_rate": 1.867623560193155e-05, "loss": 0.3291, "step": 937300 }, { "epoch": 9.55, "learning_rate": 1.8670092991638098e-05, "loss": 0.3268, "step": 937400 }, { "epoch": 9.55, "learning_rate": 1.8663950935309494e-05, "loss": 0.3465, "step": 937500 }, { "epoch": 9.55, "learning_rate": 1.865780943324603e-05, "loss": 0.3628, "step": 937600 }, { "epoch": 9.55, "learning_rate": 1.8651668485747993e-05, "loss": 0.3472, "step": 937700 }, { "epoch": 9.55, "learning_rate": 1.8645528093115656e-05, "loss": 0.2972, "step": 937800 }, { "epoch": 9.56, "learning_rate": 1.863938825564923e-05, "loss": 0.3047, "step": 937900 }, { "epoch": 9.56, "learning_rate": 1.8633248973648917e-05, "loss": 0.3088, "step": 938000 }, { "epoch": 9.56, "learning_rate": 1.8627110247414913e-05, "loss": 0.3036, "step": 938100 }, { "epoch": 9.56, "learning_rate": 1.8620972077247328e-05, "loss": 0.3116, "step": 938200 }, { "epoch": 9.56, "learning_rate": 1.8614834463446308e-05, "loss": 0.3692, "step": 938300 }, { "epoch": 9.56, "learning_rate": 1.8608697406311942e-05, "loss": 0.2802, "step": 938400 }, { "epoch": 9.56, "learning_rate": 1.8602560906144288e-05, "loss": 0.2593, "step": 938500 }, { "epoch": 9.56, "learning_rate": 1.8596424963243394e-05, "loss": 0.3279, "step": 938600 }, { "epoch": 9.56, "learning_rate": 1.8590289577909262e-05, "loss": 0.3254, "step": 938700 }, { "epoch": 9.56, "learning_rate": 1.8584154750441892e-05, "loss": 0.3673, "step": 938800 }, { "epoch": 9.57, "learning_rate": 1.8578020481141215e-05, "loss": 0.3439, "step": 938900 }, { "epoch": 9.57, "learning_rate": 1.8571886770307177e-05, "loss": 0.2918, "step": 939000 }, { "epoch": 9.57, "learning_rate": 1.8565753618239684e-05, "loss": 0.2621, "step": 939100 }, { "epoch": 9.57, "learning_rate": 1.8559621025238595e-05, "loss": 0.3566, "step": 939200 }, { "epoch": 9.57, "learning_rate": 1.8553550309170273e-05, "loss": 0.3257, "step": 939300 }, { "epoch": 9.57, "learning_rate": 1.8547418829603375e-05, "loss": 0.318, "step": 939400 }, { "epoch": 9.57, "learning_rate": 1.8541287909999353e-05, "loss": 0.3259, "step": 939500 }, { "epoch": 9.57, "learning_rate": 1.8535157550657983e-05, "loss": 0.3354, "step": 939600 }, { "epoch": 9.57, "learning_rate": 1.8529027751878976e-05, "loss": 0.3221, "step": 939700 }, { "epoch": 9.57, "learning_rate": 1.852289851396206e-05, "loss": 0.3656, "step": 939800 }, { "epoch": 9.58, "learning_rate": 1.851676983720692e-05, "loss": 0.3728, "step": 939900 }, { "epoch": 9.58, "learning_rate": 1.8510641721913195e-05, "loss": 0.3119, "step": 940000 }, { "epoch": 9.58, "learning_rate": 1.850451416838053e-05, "loss": 0.3523, "step": 940100 }, { "epoch": 9.58, "learning_rate": 1.8498387176908537e-05, "loss": 0.3259, "step": 940200 }, { "epoch": 9.58, "learning_rate": 1.8492260747796752e-05, "loss": 0.3156, "step": 940300 }, { "epoch": 9.58, "learning_rate": 1.848613488134475e-05, "loss": 0.26, "step": 940400 }, { "epoch": 9.58, "learning_rate": 1.848000957785204e-05, "loss": 0.2989, "step": 940500 }, { "epoch": 9.58, "learning_rate": 1.8473884837618114e-05, "loss": 0.2945, "step": 940600 }, { "epoch": 9.58, "learning_rate": 1.846776066094243e-05, "loss": 0.3647, "step": 940700 }, { "epoch": 9.59, "learning_rate": 1.8461637048124432e-05, "loss": 0.2589, "step": 940800 }, { "epoch": 9.59, "learning_rate": 1.8455513999463535e-05, "loss": 0.2857, "step": 940900 }, { "epoch": 9.59, "learning_rate": 1.84493915152591e-05, "loss": 0.3096, "step": 941000 }, { "epoch": 9.59, "learning_rate": 1.844326959581049e-05, "loss": 0.3388, "step": 941100 }, { "epoch": 9.59, "learning_rate": 1.843714824141703e-05, "loss": 0.345, "step": 941200 }, { "epoch": 9.59, "learning_rate": 1.8431027452378018e-05, "loss": 0.2894, "step": 941300 }, { "epoch": 9.59, "learning_rate": 1.842490722899272e-05, "loss": 0.3153, "step": 941400 }, { "epoch": 9.59, "learning_rate": 1.8418787571560396e-05, "loss": 0.3543, "step": 941500 }, { "epoch": 9.59, "learning_rate": 1.8412668480380235e-05, "loss": 0.3022, "step": 941600 }, { "epoch": 9.59, "learning_rate": 1.840654995575143e-05, "loss": 0.3695, "step": 941700 }, { "epoch": 9.6, "learning_rate": 1.840043199797316e-05, "loss": 0.2953, "step": 941800 }, { "epoch": 9.6, "learning_rate": 1.8394375778442438e-05, "loss": 0.3445, "step": 941900 }, { "epoch": 9.6, "learning_rate": 1.8388258949586614e-05, "loss": 0.2999, "step": 942000 }, { "epoch": 9.6, "learning_rate": 1.8382142688475616e-05, "loss": 0.3378, "step": 942100 }, { "epoch": 9.6, "learning_rate": 1.837602699540851e-05, "loss": 0.2987, "step": 942200 }, { "epoch": 9.6, "learning_rate": 1.8369911870684325e-05, "loss": 0.3371, "step": 942300 }, { "epoch": 9.6, "learning_rate": 1.8363797314602033e-05, "loss": 0.3431, "step": 942400 }, { "epoch": 9.6, "learning_rate": 1.8357683327460615e-05, "loss": 0.2755, "step": 942500 }, { "epoch": 9.6, "learning_rate": 1.8351569909559016e-05, "loss": 0.3518, "step": 942600 }, { "epoch": 9.6, "learning_rate": 1.8345457061196123e-05, "loss": 0.3358, "step": 942700 }, { "epoch": 9.61, "learning_rate": 1.8339344782670826e-05, "loss": 0.2931, "step": 942800 }, { "epoch": 9.61, "learning_rate": 1.8333233074281994e-05, "loss": 0.3903, "step": 942900 }, { "epoch": 9.61, "learning_rate": 1.8327121936328428e-05, "loss": 0.2877, "step": 943000 }, { "epoch": 9.61, "learning_rate": 1.832101136910894e-05, "loss": 0.3325, "step": 943100 }, { "epoch": 9.61, "learning_rate": 1.8314901372922304e-05, "loss": 0.3296, "step": 943200 }, { "epoch": 9.61, "learning_rate": 1.8308791948067266e-05, "loss": 0.3776, "step": 943300 }, { "epoch": 9.61, "learning_rate": 1.8302683094842523e-05, "loss": 0.3226, "step": 943400 }, { "epoch": 9.61, "learning_rate": 1.8296574813546773e-05, "loss": 0.3434, "step": 943500 }, { "epoch": 9.61, "learning_rate": 1.8290467104478675e-05, "loss": 0.2968, "step": 943600 }, { "epoch": 9.61, "learning_rate": 1.828435996793685e-05, "loss": 0.3417, "step": 943700 }, { "epoch": 9.62, "learning_rate": 1.8278253404219907e-05, "loss": 0.2782, "step": 943800 }, { "epoch": 9.62, "learning_rate": 1.827214741362643e-05, "loss": 0.3648, "step": 943900 }, { "epoch": 9.62, "learning_rate": 1.826604199645495e-05, "loss": 0.257, "step": 944000 }, { "epoch": 9.62, "learning_rate": 1.8259937153003992e-05, "loss": 0.3411, "step": 944100 }, { "epoch": 9.62, "learning_rate": 1.825383288357205e-05, "loss": 0.3366, "step": 944200 }, { "epoch": 9.62, "learning_rate": 1.8247729188457574e-05, "loss": 0.345, "step": 944300 }, { "epoch": 9.62, "learning_rate": 1.824162606795901e-05, "loss": 0.3212, "step": 944400 }, { "epoch": 9.62, "learning_rate": 1.8235523522374775e-05, "loss": 0.3291, "step": 944500 }, { "epoch": 9.62, "learning_rate": 1.8229421552003216e-05, "loss": 0.2982, "step": 944600 }, { "epoch": 9.62, "learning_rate": 1.82233201571427e-05, "loss": 0.3682, "step": 944700 }, { "epoch": 9.63, "learning_rate": 1.8217219338091562e-05, "loss": 0.3242, "step": 944800 }, { "epoch": 9.63, "learning_rate": 1.821111909514807e-05, "loss": 0.3214, "step": 944900 }, { "epoch": 9.63, "learning_rate": 1.82050194286105e-05, "loss": 0.355, "step": 945000 }, { "epoch": 9.63, "learning_rate": 1.8198920338777096e-05, "loss": 0.2785, "step": 945100 }, { "epoch": 9.63, "learning_rate": 1.8192882808217236e-05, "loss": 0.3414, "step": 945200 }, { "epoch": 9.63, "learning_rate": 1.8186784866912285e-05, "loss": 0.3257, "step": 945300 }, { "epoch": 9.63, "learning_rate": 1.818068750320304e-05, "loss": 0.3437, "step": 945400 }, { "epoch": 9.63, "learning_rate": 1.8174590717387643e-05, "loss": 0.2897, "step": 945500 }, { "epoch": 9.63, "learning_rate": 1.816849450976419e-05, "loss": 0.4203, "step": 945600 }, { "epoch": 9.63, "learning_rate": 1.816239888063076e-05, "loss": 0.3522, "step": 945700 }, { "epoch": 9.64, "learning_rate": 1.815630383028537e-05, "loss": 0.2626, "step": 945800 }, { "epoch": 9.64, "learning_rate": 1.815020935902604e-05, "loss": 0.2832, "step": 945900 }, { "epoch": 9.64, "learning_rate": 1.8144115467150775e-05, "loss": 0.2541, "step": 946000 }, { "epoch": 9.64, "learning_rate": 1.8138022154957497e-05, "loss": 0.3358, "step": 946100 }, { "epoch": 9.64, "learning_rate": 1.813192942274415e-05, "loss": 0.2665, "step": 946200 }, { "epoch": 9.64, "learning_rate": 1.812583727080863e-05, "loss": 0.3268, "step": 946300 }, { "epoch": 9.64, "learning_rate": 1.81197456994488e-05, "loss": 0.284, "step": 946400 }, { "epoch": 9.64, "learning_rate": 1.8113654708962514e-05, "loss": 0.2967, "step": 946500 }, { "epoch": 9.64, "learning_rate": 1.810756429964759e-05, "loss": 0.3049, "step": 946600 }, { "epoch": 9.65, "learning_rate": 1.810147447180178e-05, "loss": 0.3393, "step": 946700 }, { "epoch": 9.65, "learning_rate": 1.809538522572287e-05, "loss": 0.3331, "step": 946800 }, { "epoch": 9.65, "learning_rate": 1.8089296561708583e-05, "loss": 0.3136, "step": 946900 }, { "epoch": 9.65, "learning_rate": 1.8083208480056612e-05, "loss": 0.3812, "step": 947000 }, { "epoch": 9.65, "learning_rate": 1.8077120981064627e-05, "loss": 0.2568, "step": 947100 }, { "epoch": 9.65, "learning_rate": 1.8071034065030284e-05, "loss": 0.3142, "step": 947200 }, { "epoch": 9.65, "learning_rate": 1.806494773225118e-05, "loss": 0.2968, "step": 947300 }, { "epoch": 9.65, "learning_rate": 1.8058922837627606e-05, "loss": 0.3202, "step": 947400 }, { "epoch": 9.65, "learning_rate": 1.805283766641174e-05, "loss": 0.2442, "step": 947500 }, { "epoch": 9.65, "learning_rate": 1.804675307934082e-05, "loss": 0.2768, "step": 947600 }, { "epoch": 9.66, "learning_rate": 1.8040669076712345e-05, "loss": 0.3627, "step": 947700 }, { "epoch": 9.66, "learning_rate": 1.803458565882378e-05, "loss": 0.3062, "step": 947800 }, { "epoch": 9.66, "learning_rate": 1.8028502825972565e-05, "loss": 0.3172, "step": 947900 }, { "epoch": 9.66, "learning_rate": 1.8022420578456123e-05, "loss": 0.3759, "step": 948000 }, { "epoch": 9.66, "learning_rate": 1.801633891657184e-05, "loss": 0.3859, "step": 948100 }, { "epoch": 9.66, "learning_rate": 1.8010257840617067e-05, "loss": 0.2892, "step": 948200 }, { "epoch": 9.66, "learning_rate": 1.800417735088913e-05, "loss": 0.384, "step": 948300 }, { "epoch": 9.66, "learning_rate": 1.7998097447685344e-05, "loss": 0.373, "step": 948400 }, { "epoch": 9.66, "learning_rate": 1.7992018131302964e-05, "loss": 0.2987, "step": 948500 }, { "epoch": 9.66, "learning_rate": 1.7985939402039228e-05, "loss": 0.3173, "step": 948600 }, { "epoch": 9.67, "learning_rate": 1.797986126019137e-05, "loss": 0.345, "step": 948700 }, { "epoch": 9.67, "learning_rate": 1.797378370605656e-05, "loss": 0.331, "step": 948800 }, { "epoch": 9.67, "learning_rate": 1.7967706739931957e-05, "loss": 0.3334, "step": 948900 }, { "epoch": 9.67, "learning_rate": 1.79616303621147e-05, "loss": 0.3022, "step": 949000 }, { "epoch": 9.67, "learning_rate": 1.795555457290187e-05, "loss": 0.2796, "step": 949100 }, { "epoch": 9.67, "learning_rate": 1.794947937259054e-05, "loss": 0.3351, "step": 949200 }, { "epoch": 9.67, "learning_rate": 1.7943404761477757e-05, "loss": 0.3258, "step": 949300 }, { "epoch": 9.67, "learning_rate": 1.793733073986053e-05, "loss": 0.3273, "step": 949400 }, { "epoch": 9.67, "learning_rate": 1.793125730803584e-05, "loss": 0.2412, "step": 949500 }, { "epoch": 9.67, "learning_rate": 1.792518446630066e-05, "loss": 0.3301, "step": 949600 }, { "epoch": 9.68, "learning_rate": 1.791911221495189e-05, "loss": 0.3519, "step": 949700 }, { "epoch": 9.68, "learning_rate": 1.7913040554286437e-05, "loss": 0.3109, "step": 949800 }, { "epoch": 9.68, "learning_rate": 1.7906969484601164e-05, "loss": 0.3209, "step": 949900 }, { "epoch": 9.68, "learning_rate": 1.790089900619293e-05, "loss": 0.3012, "step": 950000 }, { "epoch": 9.68, "learning_rate": 1.7894829119358522e-05, "loss": 0.3074, "step": 950100 }, { "epoch": 9.68, "learning_rate": 1.7888759824394733e-05, "loss": 0.3183, "step": 950200 }, { "epoch": 9.68, "learning_rate": 1.7882751805694074e-05, "loss": 0.3269, "step": 950300 }, { "epoch": 9.68, "learning_rate": 1.7876683689435628e-05, "loss": 0.3758, "step": 950400 }, { "epoch": 9.68, "learning_rate": 1.7870616165935023e-05, "loss": 0.2989, "step": 950500 }, { "epoch": 9.68, "learning_rate": 1.7864549235488883e-05, "loss": 0.3228, "step": 950600 }, { "epoch": 9.69, "learning_rate": 1.785848289839387e-05, "loss": 0.315, "step": 950700 }, { "epoch": 9.69, "learning_rate": 1.7852417154946587e-05, "loss": 0.3129, "step": 950800 }, { "epoch": 9.69, "learning_rate": 1.7846352005443614e-05, "loss": 0.3209, "step": 950900 }, { "epoch": 9.69, "learning_rate": 1.784028745018149e-05, "loss": 0.3411, "step": 951000 }, { "epoch": 9.69, "learning_rate": 1.783422348945676e-05, "loss": 0.2458, "step": 951100 }, { "epoch": 9.69, "learning_rate": 1.782816012356589e-05, "loss": 0.3247, "step": 951200 }, { "epoch": 9.69, "learning_rate": 1.7822097352805352e-05, "loss": 0.3981, "step": 951300 }, { "epoch": 9.69, "learning_rate": 1.7816035177471582e-05, "loss": 0.3162, "step": 951400 }, { "epoch": 9.69, "learning_rate": 1.780997359786098e-05, "loss": 0.3335, "step": 951500 }, { "epoch": 9.7, "learning_rate": 1.780391261426992e-05, "loss": 0.2945, "step": 951600 }, { "epoch": 9.7, "learning_rate": 1.7797852226994764e-05, "loss": 0.2774, "step": 951700 }, { "epoch": 9.7, "learning_rate": 1.77917924363318e-05, "loss": 0.2643, "step": 951800 }, { "epoch": 9.7, "learning_rate": 1.7785733242577336e-05, "loss": 0.3485, "step": 951900 }, { "epoch": 9.7, "learning_rate": 1.7779674646027632e-05, "loss": 0.3083, "step": 952000 }, { "epoch": 9.7, "learning_rate": 1.7773616646978902e-05, "loss": 0.3619, "step": 952100 }, { "epoch": 9.7, "learning_rate": 1.7767559245727348e-05, "loss": 0.352, "step": 952200 }, { "epoch": 9.7, "learning_rate": 1.776150244256916e-05, "loss": 0.3251, "step": 952300 }, { "epoch": 9.7, "learning_rate": 1.775544623780047e-05, "loss": 0.3868, "step": 952400 }, { "epoch": 9.7, "learning_rate": 1.774939063171738e-05, "loss": 0.3201, "step": 952500 }, { "epoch": 9.71, "learning_rate": 1.7743335624615976e-05, "loss": 0.2968, "step": 952600 }, { "epoch": 9.71, "learning_rate": 1.7737281216792328e-05, "loss": 0.3359, "step": 952700 }, { "epoch": 9.71, "learning_rate": 1.7731227408542437e-05, "loss": 0.2829, "step": 952800 }, { "epoch": 9.71, "learning_rate": 1.7725174200162315e-05, "loss": 0.2845, "step": 952900 }, { "epoch": 9.71, "learning_rate": 1.771912159194793e-05, "loss": 0.315, "step": 953000 }, { "epoch": 9.71, "learning_rate": 1.7713069584195202e-05, "loss": 0.3863, "step": 953100 }, { "epoch": 9.71, "learning_rate": 1.770701817720004e-05, "loss": 0.2756, "step": 953200 }, { "epoch": 9.71, "learning_rate": 1.770096737125834e-05, "loss": 0.362, "step": 953300 }, { "epoch": 9.71, "learning_rate": 1.76949776657342e-05, "loss": 0.3517, "step": 953400 }, { "epoch": 9.71, "learning_rate": 1.7688928056769005e-05, "loss": 0.3375, "step": 953500 }, { "epoch": 9.72, "learning_rate": 1.7682879049741748e-05, "loss": 0.2857, "step": 953600 }, { "epoch": 9.72, "learning_rate": 1.7676830644948205e-05, "loss": 0.3049, "step": 953700 }, { "epoch": 9.72, "learning_rate": 1.767078284268411e-05, "loss": 0.2927, "step": 953800 }, { "epoch": 9.72, "learning_rate": 1.7664735643245148e-05, "loss": 0.3209, "step": 953900 }, { "epoch": 9.72, "learning_rate": 1.7658689046926998e-05, "loss": 0.3286, "step": 954000 }, { "epoch": 9.72, "learning_rate": 1.765264305402531e-05, "loss": 0.3487, "step": 954100 }, { "epoch": 9.72, "learning_rate": 1.7646597664835686e-05, "loss": 0.3281, "step": 954200 }, { "epoch": 9.72, "learning_rate": 1.764055287965371e-05, "loss": 0.3062, "step": 954300 }, { "epoch": 9.72, "learning_rate": 1.7634508698774962e-05, "loss": 0.3668, "step": 954400 }, { "epoch": 9.72, "learning_rate": 1.7628465122494927e-05, "loss": 0.3369, "step": 954500 }, { "epoch": 9.73, "learning_rate": 1.7622422151109122e-05, "loss": 0.329, "step": 954600 }, { "epoch": 9.73, "learning_rate": 1.7616379784912996e-05, "loss": 0.2533, "step": 954700 }, { "epoch": 9.73, "learning_rate": 1.7610338024202022e-05, "loss": 0.3386, "step": 954800 }, { "epoch": 9.73, "learning_rate": 1.7604296869271566e-05, "loss": 0.3683, "step": 954900 }, { "epoch": 9.73, "learning_rate": 1.759825632041702e-05, "loss": 0.289, "step": 955000 }, { "epoch": 9.73, "learning_rate": 1.7592216377933742e-05, "loss": 0.3211, "step": 955100 }, { "epoch": 9.73, "learning_rate": 1.758617704211702e-05, "loss": 0.2747, "step": 955200 }, { "epoch": 9.73, "learning_rate": 1.7580138313262164e-05, "loss": 0.3396, "step": 955300 }, { "epoch": 9.73, "learning_rate": 1.7574100191664435e-05, "loss": 0.3423, "step": 955400 }, { "epoch": 9.73, "learning_rate": 1.7568062677619044e-05, "loss": 0.287, "step": 955500 }, { "epoch": 9.74, "learning_rate": 1.7562025771421203e-05, "loss": 0.2905, "step": 955600 }, { "epoch": 9.74, "learning_rate": 1.755598947336608e-05, "loss": 0.3421, "step": 955700 }, { "epoch": 9.74, "learning_rate": 1.755001413763223e-05, "loss": 0.3286, "step": 955800 }, { "epoch": 9.74, "learning_rate": 1.7543979050659132e-05, "loss": 0.3017, "step": 955900 }, { "epoch": 9.74, "learning_rate": 1.7538004914474955e-05, "loss": 0.333, "step": 956000 }, { "epoch": 9.74, "learning_rate": 1.753197103975242e-05, "loss": 0.2482, "step": 956100 }, { "epoch": 9.74, "learning_rate": 1.7525937774642095e-05, "loss": 0.3185, "step": 956200 }, { "epoch": 9.74, "learning_rate": 1.751990511943897e-05, "loss": 0.304, "step": 956300 }, { "epoch": 9.74, "learning_rate": 1.751387307443801e-05, "loss": 0.3216, "step": 956400 }, { "epoch": 9.74, "learning_rate": 1.7507841639934145e-05, "loss": 0.333, "step": 956500 }, { "epoch": 9.75, "learning_rate": 1.750181081622227e-05, "loss": 0.336, "step": 956600 }, { "epoch": 9.75, "learning_rate": 1.749578060359727e-05, "loss": 0.3124, "step": 956700 }, { "epoch": 9.75, "learning_rate": 1.7489751002353968e-05, "loss": 0.2965, "step": 956800 }, { "epoch": 9.75, "learning_rate": 1.7483722012787186e-05, "loss": 0.3397, "step": 956900 }, { "epoch": 9.75, "learning_rate": 1.7477693635191713e-05, "loss": 0.3522, "step": 957000 }, { "epoch": 9.75, "learning_rate": 1.7471665869862283e-05, "loss": 0.3062, "step": 957100 }, { "epoch": 9.75, "learning_rate": 1.7465638717093622e-05, "loss": 0.3099, "step": 957200 }, { "epoch": 9.75, "learning_rate": 1.7459612177180445e-05, "loss": 0.2801, "step": 957300 }, { "epoch": 9.75, "learning_rate": 1.7453586250417383e-05, "loss": 0.3369, "step": 957400 }, { "epoch": 9.76, "learning_rate": 1.7447560937099078e-05, "loss": 0.2762, "step": 957500 }, { "epoch": 9.76, "learning_rate": 1.744153623752013e-05, "loss": 0.3651, "step": 957600 }, { "epoch": 9.76, "learning_rate": 1.743551215197512e-05, "loss": 0.2883, "step": 957700 }, { "epoch": 9.76, "learning_rate": 1.7429488680758584e-05, "loss": 0.2659, "step": 957800 }, { "epoch": 9.76, "learning_rate": 1.7423465824165037e-05, "loss": 0.2809, "step": 957900 }, { "epoch": 9.76, "learning_rate": 1.7417443582488967e-05, "loss": 0.3626, "step": 958000 }, { "epoch": 9.76, "learning_rate": 1.7411421956024808e-05, "loss": 0.3193, "step": 958100 }, { "epoch": 9.76, "learning_rate": 1.740540094506699e-05, "loss": 0.2949, "step": 958200 }, { "epoch": 9.76, "learning_rate": 1.7399380549909914e-05, "loss": 0.3369, "step": 958300 }, { "epoch": 9.76, "learning_rate": 1.7393360770847923e-05, "loss": 0.3442, "step": 958400 }, { "epoch": 9.77, "learning_rate": 1.738734160817536e-05, "loss": 0.3093, "step": 958500 }, { "epoch": 9.77, "learning_rate": 1.7381323062186532e-05, "loss": 0.2892, "step": 958600 }, { "epoch": 9.77, "learning_rate": 1.7375305133175693e-05, "loss": 0.3051, "step": 958700 }, { "epoch": 9.77, "learning_rate": 1.7369287821437086e-05, "loss": 0.3391, "step": 958800 }, { "epoch": 9.77, "learning_rate": 1.7363271127264933e-05, "loss": 0.3334, "step": 958900 }, { "epoch": 9.77, "learning_rate": 1.735725505095341e-05, "loss": 0.3476, "step": 959000 }, { "epoch": 9.77, "learning_rate": 1.735123959279667e-05, "loss": 0.2868, "step": 959100 }, { "epoch": 9.77, "learning_rate": 1.734522475308884e-05, "loss": 0.3282, "step": 959200 }, { "epoch": 9.77, "learning_rate": 1.7339210532123982e-05, "loss": 0.2685, "step": 959300 }, { "epoch": 9.77, "learning_rate": 1.733319693019618e-05, "loss": 0.3464, "step": 959400 }, { "epoch": 9.78, "learning_rate": 1.7327183947599457e-05, "loss": 0.2802, "step": 959500 }, { "epoch": 9.78, "learning_rate": 1.7321171584627822e-05, "loss": 0.2892, "step": 959600 }, { "epoch": 9.78, "learning_rate": 1.731515984157522e-05, "loss": 0.3621, "step": 959700 }, { "epoch": 9.78, "learning_rate": 1.7309148718735602e-05, "loss": 0.257, "step": 959800 }, { "epoch": 9.78, "learning_rate": 1.730313821640289e-05, "loss": 0.2846, "step": 959900 }, { "epoch": 9.78, "learning_rate": 1.7297128334870936e-05, "loss": 0.3072, "step": 960000 }, { "epoch": 9.78, "learning_rate": 1.7291119074433603e-05, "loss": 0.3312, "step": 960100 }, { "epoch": 9.78, "learning_rate": 1.72851104353847e-05, "loss": 0.3154, "step": 960200 }, { "epoch": 9.78, "learning_rate": 1.7279102418018018e-05, "loss": 0.2991, "step": 960300 }, { "epoch": 9.78, "learning_rate": 1.7273095022627315e-05, "loss": 0.3472, "step": 960400 }, { "epoch": 9.79, "learning_rate": 1.7267088249506328e-05, "loss": 0.2988, "step": 960500 }, { "epoch": 9.79, "learning_rate": 1.7261082098948726e-05, "loss": 0.3408, "step": 960600 }, { "epoch": 9.79, "learning_rate": 1.7255076571248186e-05, "loss": 0.3137, "step": 960700 }, { "epoch": 9.79, "learning_rate": 1.724907166669836e-05, "loss": 0.3279, "step": 960800 }, { "epoch": 9.79, "learning_rate": 1.7243067385592827e-05, "loss": 0.3174, "step": 960900 }, { "epoch": 9.79, "learning_rate": 1.7237063728225165e-05, "loss": 0.3038, "step": 961000 }, { "epoch": 9.79, "learning_rate": 1.7231060694888915e-05, "loss": 0.3558, "step": 961100 }, { "epoch": 9.79, "learning_rate": 1.7225058285877618e-05, "loss": 0.3511, "step": 961200 }, { "epoch": 9.79, "learning_rate": 1.7219056501484717e-05, "loss": 0.2429, "step": 961300 }, { "epoch": 9.79, "learning_rate": 1.7213055342003684e-05, "loss": 0.3099, "step": 961400 }, { "epoch": 9.8, "learning_rate": 1.7207054807727933e-05, "loss": 0.2822, "step": 961500 }, { "epoch": 9.8, "learning_rate": 1.7201054898950857e-05, "loss": 0.3304, "step": 961600 }, { "epoch": 9.8, "learning_rate": 1.719505561596582e-05, "loss": 0.3695, "step": 961700 }, { "epoch": 9.8, "learning_rate": 1.7189056959066157e-05, "loss": 0.3645, "step": 961800 }, { "epoch": 9.8, "learning_rate": 1.7183058928545144e-05, "loss": 0.355, "step": 961900 }, { "epoch": 9.8, "learning_rate": 1.7177061524696062e-05, "loss": 0.3278, "step": 962000 }, { "epoch": 9.8, "learning_rate": 1.717106474781216e-05, "loss": 0.3164, "step": 962100 }, { "epoch": 9.8, "learning_rate": 1.7165068598186623e-05, "loss": 0.3258, "step": 962200 }, { "epoch": 9.8, "learning_rate": 1.715907307611264e-05, "loss": 0.2344, "step": 962300 }, { "epoch": 9.81, "learning_rate": 1.7153078181883358e-05, "loss": 0.2856, "step": 962400 }, { "epoch": 9.81, "learning_rate": 1.7147083915791874e-05, "loss": 0.2957, "step": 962500 }, { "epoch": 9.81, "learning_rate": 1.7141090278131288e-05, "loss": 0.3144, "step": 962600 }, { "epoch": 9.81, "learning_rate": 1.713509726919465e-05, "loss": 0.3048, "step": 962700 }, { "epoch": 9.81, "learning_rate": 1.7129104889274978e-05, "loss": 0.3243, "step": 962800 }, { "epoch": 9.81, "learning_rate": 1.712317305305533e-05, "loss": 0.3445, "step": 962900 }, { "epoch": 9.81, "learning_rate": 1.7117181925751064e-05, "loss": 0.303, "step": 963000 }, { "epoch": 9.81, "learning_rate": 1.711119142833972e-05, "loss": 0.3156, "step": 963100 }, { "epoch": 9.81, "learning_rate": 1.7105201561114214e-05, "loss": 0.27, "step": 963200 }, { "epoch": 9.81, "learning_rate": 1.7099212324367388e-05, "loss": 0.3537, "step": 963300 }, { "epoch": 9.82, "learning_rate": 1.709322371839209e-05, "loss": 0.3466, "step": 963400 }, { "epoch": 9.82, "learning_rate": 1.7087235743481127e-05, "loss": 0.3857, "step": 963500 }, { "epoch": 9.82, "learning_rate": 1.708124839992729e-05, "loss": 0.3252, "step": 963600 }, { "epoch": 9.82, "learning_rate": 1.7075261688023303e-05, "loss": 0.2562, "step": 963700 }, { "epoch": 9.82, "learning_rate": 1.7069275608061884e-05, "loss": 0.3116, "step": 963800 }, { "epoch": 9.82, "learning_rate": 1.7063290160335723e-05, "loss": 0.2645, "step": 963900 }, { "epoch": 9.82, "learning_rate": 1.705730534513747e-05, "loss": 0.3129, "step": 964000 }, { "epoch": 9.82, "learning_rate": 1.7051321162759747e-05, "loss": 0.359, "step": 964100 }, { "epoch": 9.82, "learning_rate": 1.704539744585293e-05, "loss": 0.2894, "step": 964200 }, { "epoch": 9.82, "learning_rate": 1.7039414523658502e-05, "loss": 0.2653, "step": 964300 }, { "epoch": 9.83, "learning_rate": 1.7033432235159375e-05, "loss": 0.2812, "step": 964400 }, { "epoch": 9.83, "learning_rate": 1.7027510394053937e-05, "loss": 0.3099, "step": 964500 }, { "epoch": 9.83, "learning_rate": 1.7021529367478582e-05, "loss": 0.371, "step": 964600 }, { "epoch": 9.83, "learning_rate": 1.7015548975472993e-05, "loss": 0.2287, "step": 964700 }, { "epoch": 9.83, "learning_rate": 1.700956921832957e-05, "loss": 0.3162, "step": 964800 }, { "epoch": 9.83, "learning_rate": 1.7003649884415603e-05, "loss": 0.2911, "step": 964900 }, { "epoch": 9.83, "learning_rate": 1.6997671391517702e-05, "loss": 0.4066, "step": 965000 }, { "epoch": 9.83, "learning_rate": 1.6991693534356064e-05, "loss": 0.3096, "step": 965100 }, { "epoch": 9.83, "learning_rate": 1.6985716313222985e-05, "loss": 0.32, "step": 965200 }, { "epoch": 9.83, "learning_rate": 1.6979739728410712e-05, "loss": 0.368, "step": 965300 }, { "epoch": 9.84, "learning_rate": 1.6973763780211462e-05, "loss": 0.3208, "step": 965400 }, { "epoch": 9.84, "learning_rate": 1.696778846891743e-05, "loss": 0.2783, "step": 965500 }, { "epoch": 9.84, "learning_rate": 1.696181379482077e-05, "loss": 0.3401, "step": 965600 }, { "epoch": 9.84, "learning_rate": 1.695583975821362e-05, "loss": 0.3067, "step": 965700 }, { "epoch": 9.84, "learning_rate": 1.694986635938805e-05, "loss": 0.3021, "step": 965800 }, { "epoch": 9.84, "learning_rate": 1.6943893598636134e-05, "loss": 0.291, "step": 965900 }, { "epoch": 9.84, "learning_rate": 1.693792147624992e-05, "loss": 0.3209, "step": 966000 }, { "epoch": 9.84, "learning_rate": 1.693194999252138e-05, "loss": 0.3106, "step": 966100 }, { "epoch": 9.84, "learning_rate": 1.69259791477425e-05, "loss": 0.2939, "step": 966200 }, { "epoch": 9.84, "learning_rate": 1.6920008942205233e-05, "loss": 0.3537, "step": 966300 }, { "epoch": 9.85, "learning_rate": 1.6914039376201455e-05, "loss": 0.3198, "step": 966400 }, { "epoch": 9.85, "learning_rate": 1.6908070450023065e-05, "loss": 0.3086, "step": 966500 }, { "epoch": 9.85, "learning_rate": 1.6902102163961896e-05, "loss": 0.3634, "step": 966600 }, { "epoch": 9.85, "learning_rate": 1.689613451830977e-05, "loss": 0.3076, "step": 966700 }, { "epoch": 9.85, "learning_rate": 1.6890167513358466e-05, "loss": 0.3054, "step": 966800 }, { "epoch": 9.85, "learning_rate": 1.6884201149399745e-05, "loss": 0.3128, "step": 966900 }, { "epoch": 9.85, "learning_rate": 1.687823542672531e-05, "loss": 0.3169, "step": 967000 }, { "epoch": 9.85, "learning_rate": 1.6872270345626853e-05, "loss": 0.3464, "step": 967100 }, { "epoch": 9.85, "learning_rate": 1.686630590639603e-05, "loss": 0.2938, "step": 967200 }, { "epoch": 9.86, "learning_rate": 1.6860342109324486e-05, "loss": 0.2432, "step": 967300 }, { "epoch": 9.86, "learning_rate": 1.6854378954703787e-05, "loss": 0.2634, "step": 967400 }, { "epoch": 9.86, "learning_rate": 1.6848416442825508e-05, "loss": 0.3411, "step": 967500 }, { "epoch": 9.86, "learning_rate": 1.6842454573981195e-05, "loss": 0.2726, "step": 967600 }, { "epoch": 9.86, "learning_rate": 1.6836493348462322e-05, "loss": 0.2996, "step": 967700 }, { "epoch": 9.86, "learning_rate": 1.683053276656036e-05, "loss": 0.3057, "step": 967800 }, { "epoch": 9.86, "learning_rate": 1.682457282856676e-05, "loss": 0.3355, "step": 967900 }, { "epoch": 9.86, "learning_rate": 1.6818613534772924e-05, "loss": 0.3784, "step": 968000 }, { "epoch": 9.86, "learning_rate": 1.681265488547022e-05, "loss": 0.3397, "step": 968100 }, { "epoch": 9.86, "learning_rate": 1.6806756457802574e-05, "loss": 0.2966, "step": 968200 }, { "epoch": 9.87, "learning_rate": 1.6800799091903963e-05, "loss": 0.2685, "step": 968300 }, { "epoch": 9.87, "learning_rate": 1.6794842371367513e-05, "loss": 0.2549, "step": 968400 }, { "epoch": 9.87, "learning_rate": 1.678888629648446e-05, "loss": 0.2952, "step": 968500 }, { "epoch": 9.87, "learning_rate": 1.6782930867546022e-05, "loss": 0.2872, "step": 968600 }, { "epoch": 9.87, "learning_rate": 1.6776976084843404e-05, "loss": 0.3113, "step": 968700 }, { "epoch": 9.87, "learning_rate": 1.6771021948667736e-05, "loss": 0.2226, "step": 968800 }, { "epoch": 9.87, "learning_rate": 1.676506845931014e-05, "loss": 0.3066, "step": 968900 }, { "epoch": 9.87, "learning_rate": 1.675911561706173e-05, "loss": 0.3617, "step": 969000 }, { "epoch": 9.87, "learning_rate": 1.6753163422213543e-05, "loss": 0.3817, "step": 969100 }, { "epoch": 9.87, "learning_rate": 1.6747211875056607e-05, "loss": 0.3065, "step": 969200 }, { "epoch": 9.88, "learning_rate": 1.674126097588192e-05, "loss": 0.2769, "step": 969300 }, { "epoch": 9.88, "learning_rate": 1.673531072498045e-05, "loss": 0.3657, "step": 969400 }, { "epoch": 9.88, "learning_rate": 1.6729361122643126e-05, "loss": 0.3369, "step": 969500 }, { "epoch": 9.88, "learning_rate": 1.6723412169160844e-05, "loss": 0.3616, "step": 969600 }, { "epoch": 9.88, "learning_rate": 1.671746386482449e-05, "loss": 0.3032, "step": 969700 }, { "epoch": 9.88, "learning_rate": 1.6711516209924876e-05, "loss": 0.3673, "step": 969800 }, { "epoch": 9.88, "learning_rate": 1.6705569204752817e-05, "loss": 0.2822, "step": 969900 }, { "epoch": 9.88, "learning_rate": 1.6699622849599103e-05, "loss": 0.3379, "step": 970000 }, { "epoch": 9.88, "learning_rate": 1.6693677144754444e-05, "loss": 0.2835, "step": 970100 }, { "epoch": 9.88, "learning_rate": 1.6687732090509567e-05, "loss": 0.3514, "step": 970200 }, { "epoch": 9.89, "learning_rate": 1.6681787687155158e-05, "loss": 0.2895, "step": 970300 }, { "epoch": 9.89, "learning_rate": 1.6675843934981833e-05, "loss": 0.3096, "step": 970400 }, { "epoch": 9.89, "learning_rate": 1.6669900834280234e-05, "loss": 0.2796, "step": 970500 }, { "epoch": 9.89, "learning_rate": 1.6663958385340933e-05, "loss": 0.2901, "step": 970600 }, { "epoch": 9.89, "learning_rate": 1.6658016588454482e-05, "loss": 0.3148, "step": 970700 }, { "epoch": 9.89, "learning_rate": 1.6652075443911395e-05, "loss": 0.373, "step": 970800 }, { "epoch": 9.89, "learning_rate": 1.664613495200218e-05, "loss": 0.306, "step": 970900 }, { "epoch": 9.89, "learning_rate": 1.6640195113017255e-05, "loss": 0.3155, "step": 971000 }, { "epoch": 9.89, "learning_rate": 1.6634255927247067e-05, "loss": 0.31, "step": 971100 }, { "epoch": 9.89, "learning_rate": 1.6628317394982e-05, "loss": 0.3228, "step": 971200 }, { "epoch": 9.9, "learning_rate": 1.662237951651243e-05, "loss": 0.3089, "step": 971300 }, { "epoch": 9.9, "learning_rate": 1.661644229212865e-05, "loss": 0.3738, "step": 971400 }, { "epoch": 9.9, "learning_rate": 1.6610505722120978e-05, "loss": 0.345, "step": 971500 }, { "epoch": 9.9, "learning_rate": 1.660456980677968e-05, "loss": 0.3154, "step": 971600 }, { "epoch": 9.9, "learning_rate": 1.659863454639497e-05, "loss": 0.3149, "step": 971700 }, { "epoch": 9.9, "learning_rate": 1.659269994125704e-05, "loss": 0.3704, "step": 971800 }, { "epoch": 9.9, "learning_rate": 1.65867659916561e-05, "loss": 0.3407, "step": 971900 }, { "epoch": 9.9, "learning_rate": 1.6580832697882242e-05, "loss": 0.3011, "step": 972000 }, { "epoch": 9.9, "learning_rate": 1.6574900060225584e-05, "loss": 0.3437, "step": 972100 }, { "epoch": 9.9, "learning_rate": 1.656896807897621e-05, "loss": 0.2981, "step": 972200 }, { "epoch": 9.91, "learning_rate": 1.6563036754424134e-05, "loss": 0.2986, "step": 972300 }, { "epoch": 9.91, "learning_rate": 1.6557106086859374e-05, "loss": 0.3135, "step": 972400 }, { "epoch": 9.91, "learning_rate": 1.6551235373420303e-05, "loss": 0.2921, "step": 972500 }, { "epoch": 9.91, "learning_rate": 1.6545306014122952e-05, "loss": 0.2743, "step": 972600 }, { "epoch": 9.91, "learning_rate": 1.6539377312679858e-05, "loss": 0.2732, "step": 972700 }, { "epoch": 9.91, "learning_rate": 1.6533449269380872e-05, "loss": 0.2474, "step": 972800 }, { "epoch": 9.91, "learning_rate": 1.6527521884515854e-05, "loss": 0.2414, "step": 972900 }, { "epoch": 9.91, "learning_rate": 1.6521595158374616e-05, "loss": 0.2974, "step": 973000 }, { "epoch": 9.91, "learning_rate": 1.6515669091246933e-05, "loss": 0.2977, "step": 973100 }, { "epoch": 9.92, "learning_rate": 1.650974368342257e-05, "loss": 0.2877, "step": 973200 }, { "epoch": 9.92, "learning_rate": 1.650381893519124e-05, "loss": 0.3629, "step": 973300 }, { "epoch": 9.92, "learning_rate": 1.649789484684262e-05, "loss": 0.3576, "step": 973400 }, { "epoch": 9.92, "learning_rate": 1.649197141866636e-05, "loss": 0.311, "step": 973500 }, { "epoch": 9.92, "learning_rate": 1.6486048650952083e-05, "loss": 0.3238, "step": 973600 }, { "epoch": 9.92, "learning_rate": 1.6480126543989397e-05, "loss": 0.2817, "step": 973700 }, { "epoch": 9.92, "learning_rate": 1.6474205098067827e-05, "loss": 0.306, "step": 973800 }, { "epoch": 9.92, "learning_rate": 1.646828431347691e-05, "loss": 0.3195, "step": 973900 }, { "epoch": 9.92, "learning_rate": 1.646236419050615e-05, "loss": 0.3032, "step": 974000 }, { "epoch": 9.92, "learning_rate": 1.6456444729444978e-05, "loss": 0.2901, "step": 974100 }, { "epoch": 9.93, "learning_rate": 1.6450525930582837e-05, "loss": 0.336, "step": 974200 }, { "epoch": 9.93, "learning_rate": 1.644460779420912e-05, "loss": 0.2921, "step": 974300 }, { "epoch": 9.93, "learning_rate": 1.6438690320613187e-05, "loss": 0.3396, "step": 974400 }, { "epoch": 9.93, "learning_rate": 1.6432773510084368e-05, "loss": 0.2329, "step": 974500 }, { "epoch": 9.93, "learning_rate": 1.642685736291197e-05, "loss": 0.2667, "step": 974600 }, { "epoch": 9.93, "learning_rate": 1.642094187938523e-05, "loss": 0.2805, "step": 974700 }, { "epoch": 9.93, "learning_rate": 1.6415027059793403e-05, "loss": 0.3168, "step": 974800 }, { "epoch": 9.93, "learning_rate": 1.6409112904425695e-05, "loss": 0.3657, "step": 974900 }, { "epoch": 9.93, "learning_rate": 1.6403199413571243e-05, "loss": 0.3018, "step": 975000 }, { "epoch": 9.93, "learning_rate": 1.63972865875192e-05, "loss": 0.3193, "step": 975100 }, { "epoch": 9.94, "learning_rate": 1.639137442655868e-05, "loss": 0.2272, "step": 975200 }, { "epoch": 9.94, "learning_rate": 1.6385462930978724e-05, "loss": 0.3226, "step": 975300 }, { "epoch": 9.94, "learning_rate": 1.6379552101068385e-05, "loss": 0.3661, "step": 975400 }, { "epoch": 9.94, "learning_rate": 1.6373641937116667e-05, "loss": 0.2919, "step": 975500 }, { "epoch": 9.94, "learning_rate": 1.6367732439412538e-05, "loss": 0.2695, "step": 975600 }, { "epoch": 9.94, "learning_rate": 1.6361882693256315e-05, "loss": 0.3266, "step": 975700 }, { "epoch": 9.94, "learning_rate": 1.635597452224447e-05, "loss": 0.3197, "step": 975800 }, { "epoch": 9.94, "learning_rate": 1.6350067018344044e-05, "loss": 0.3561, "step": 975900 }, { "epoch": 9.94, "learning_rate": 1.634416018184389e-05, "loss": 0.3026, "step": 976000 }, { "epoch": 9.94, "learning_rate": 1.6338254013032815e-05, "loss": 0.317, "step": 976100 }, { "epoch": 9.95, "learning_rate": 1.633234851219958e-05, "loss": 0.2814, "step": 976200 }, { "epoch": 9.95, "learning_rate": 1.632644367963294e-05, "loss": 0.2904, "step": 976300 }, { "epoch": 9.95, "learning_rate": 1.6320539515621614e-05, "loss": 0.3409, "step": 976400 }, { "epoch": 9.95, "learning_rate": 1.631463602045426e-05, "loss": 0.3202, "step": 976500 }, { "epoch": 9.95, "learning_rate": 1.6308733194419536e-05, "loss": 0.2994, "step": 976600 }, { "epoch": 9.95, "learning_rate": 1.630283103780606e-05, "loss": 0.4042, "step": 976700 }, { "epoch": 9.95, "learning_rate": 1.6296929550902398e-05, "loss": 0.3344, "step": 976800 }, { "epoch": 9.95, "learning_rate": 1.6291028733997105e-05, "loss": 0.297, "step": 976900 }, { "epoch": 9.95, "learning_rate": 1.6285128587378698e-05, "loss": 0.3386, "step": 977000 }, { "epoch": 9.95, "learning_rate": 1.627922911133565e-05, "loss": 0.3284, "step": 977100 }, { "epoch": 9.96, "learning_rate": 1.6273330306156428e-05, "loss": 0.2862, "step": 977200 }, { "epoch": 9.96, "learning_rate": 1.626743217212944e-05, "loss": 0.2907, "step": 977300 }, { "epoch": 9.96, "learning_rate": 1.6261534709543057e-05, "loss": 0.2829, "step": 977400 }, { "epoch": 9.96, "learning_rate": 1.6255637918685645e-05, "loss": 0.3092, "step": 977500 }, { "epoch": 9.96, "learning_rate": 1.6249741799845525e-05, "loss": 0.3296, "step": 977600 }, { "epoch": 9.96, "learning_rate": 1.6243846353310962e-05, "loss": 0.2976, "step": 977700 }, { "epoch": 9.96, "learning_rate": 1.623795157937022e-05, "loss": 0.304, "step": 977800 }, { "epoch": 9.96, "learning_rate": 1.623205747831152e-05, "loss": 0.3195, "step": 977900 }, { "epoch": 9.96, "learning_rate": 1.6226164050423054e-05, "loss": 0.2916, "step": 978000 }, { "epoch": 9.97, "learning_rate": 1.622027129599296e-05, "loss": 0.3167, "step": 978100 }, { "epoch": 9.97, "learning_rate": 1.6214379215309366e-05, "loss": 0.2819, "step": 978200 }, { "epoch": 9.97, "learning_rate": 1.620848780866036e-05, "loss": 0.2663, "step": 978300 }, { "epoch": 9.97, "learning_rate": 1.6202597076333997e-05, "loss": 0.3281, "step": 978400 }, { "epoch": 9.97, "learning_rate": 1.6196707018618292e-05, "loss": 0.3317, "step": 978500 }, { "epoch": 9.97, "learning_rate": 1.619081763580126e-05, "loss": 0.3404, "step": 978600 }, { "epoch": 9.97, "learning_rate": 1.618492892817082e-05, "loss": 0.2943, "step": 978700 }, { "epoch": 9.97, "learning_rate": 1.6179040896014916e-05, "loss": 0.3872, "step": 978800 }, { "epoch": 9.97, "learning_rate": 1.6173153539621442e-05, "loss": 0.3008, "step": 978900 }, { "epoch": 9.97, "learning_rate": 1.6167266859278232e-05, "loss": 0.2887, "step": 979000 }, { "epoch": 9.98, "learning_rate": 1.6161380855273127e-05, "loss": 0.2822, "step": 979100 }, { "epoch": 9.98, "learning_rate": 1.6155495527893926e-05, "loss": 0.3184, "step": 979200 }, { "epoch": 9.98, "learning_rate": 1.6149610877428355e-05, "loss": 0.3062, "step": 979300 }, { "epoch": 9.98, "learning_rate": 1.6143726904164165e-05, "loss": 0.303, "step": 979400 }, { "epoch": 9.98, "learning_rate": 1.6137843608389035e-05, "loss": 0.3176, "step": 979500 }, { "epoch": 9.98, "learning_rate": 1.6131960990390632e-05, "loss": 0.2602, "step": 979600 }, { "epoch": 9.98, "learning_rate": 1.6126079050456574e-05, "loss": 0.3141, "step": 979700 }, { "epoch": 9.98, "learning_rate": 1.6120197788874456e-05, "loss": 0.3205, "step": 979800 }, { "epoch": 9.98, "learning_rate": 1.6114317205931845e-05, "loss": 0.2979, "step": 979900 }, { "epoch": 9.98, "learning_rate": 1.6108437301916245e-05, "loss": 0.3681, "step": 980000 }, { "epoch": 9.99, "learning_rate": 1.6102558077115163e-05, "loss": 0.3203, "step": 980100 }, { "epoch": 9.99, "learning_rate": 1.6096679531816067e-05, "loss": 0.274, "step": 980200 }, { "epoch": 9.99, "learning_rate": 1.609080166630636e-05, "loss": 0.2613, "step": 980300 }, { "epoch": 9.99, "learning_rate": 1.6084924480873443e-05, "loss": 0.3276, "step": 980400 }, { "epoch": 9.99, "learning_rate": 1.607904797580469e-05, "loss": 0.3026, "step": 980500 }, { "epoch": 9.99, "learning_rate": 1.6073172151387407e-05, "loss": 0.2725, "step": 980600 }, { "epoch": 9.99, "learning_rate": 1.6067297007908892e-05, "loss": 0.3273, "step": 980700 }, { "epoch": 9.99, "learning_rate": 1.6061422545656407e-05, "loss": 0.3502, "step": 980800 }, { "epoch": 9.99, "learning_rate": 1.6055548764917183e-05, "loss": 0.3256, "step": 980900 }, { "epoch": 9.99, "learning_rate": 1.6049675665978406e-05, "loss": 0.2735, "step": 981000 }, { "epoch": 10.0, "learning_rate": 1.6043803249127232e-05, "loss": 0.335, "step": 981100 }, { "epoch": 10.0, "learning_rate": 1.6037931514650808e-05, "loss": 0.279, "step": 981200 }, { "epoch": 10.0, "learning_rate": 1.6032119169974234e-05, "loss": 0.2971, "step": 981300 }, { "epoch": 10.0, "learning_rate": 1.6026248794277608e-05, "loss": 0.3959, "step": 981400 }, { "epoch": 10.0, "learning_rate": 1.6020379101814024e-05, "loss": 0.3691, "step": 981500 }, { "epoch": 10.0, "learning_rate": 1.6014510092870483e-05, "loss": 0.3122, "step": 981600 }, { "epoch": 10.0, "learning_rate": 1.6008641767733926e-05, "loss": 0.2939, "step": 981700 }, { "epoch": 10.0, "learning_rate": 1.600277412669129e-05, "loss": 0.2935, "step": 981800 }, { "epoch": 10.0, "learning_rate": 1.5996907170029472e-05, "loss": 0.3251, "step": 981900 }, { "epoch": 10.0, "learning_rate": 1.5991040898035328e-05, "loss": 0.3105, "step": 982000 }, { "epoch": 10.01, "learning_rate": 1.5985175310995677e-05, "loss": 0.2986, "step": 982100 }, { "epoch": 10.01, "learning_rate": 1.5979310409197323e-05, "loss": 0.2763, "step": 982200 }, { "epoch": 10.01, "learning_rate": 1.597350483169542e-05, "loss": 0.2218, "step": 982300 }, { "epoch": 10.01, "learning_rate": 1.596764129438032e-05, "loss": 0.2732, "step": 982400 }, { "epoch": 10.01, "learning_rate": 1.5961778443163833e-05, "loss": 0.2277, "step": 982500 }, { "epoch": 10.01, "learning_rate": 1.5955916278332595e-05, "loss": 0.3058, "step": 982600 }, { "epoch": 10.01, "learning_rate": 1.5950054800173237e-05, "loss": 0.2583, "step": 982700 }, { "epoch": 10.01, "learning_rate": 1.594419400897237e-05, "loss": 0.3665, "step": 982800 }, { "epoch": 10.01, "learning_rate": 1.593833390501652e-05, "loss": 0.3212, "step": 982900 }, { "epoch": 10.01, "learning_rate": 1.5932474488592225e-05, "loss": 0.3234, "step": 983000 }, { "epoch": 10.02, "learning_rate": 1.5926615759985992e-05, "loss": 0.2754, "step": 983100 }, { "epoch": 10.02, "learning_rate": 1.592075771948425e-05, "loss": 0.3154, "step": 983200 }, { "epoch": 10.02, "learning_rate": 1.591490036737343e-05, "loss": 0.2552, "step": 983300 }, { "epoch": 10.02, "learning_rate": 1.5909043703939935e-05, "loss": 0.3053, "step": 983400 }, { "epoch": 10.02, "learning_rate": 1.5903187729470105e-05, "loss": 0.3005, "step": 983500 }, { "epoch": 10.02, "learning_rate": 1.5897332444250277e-05, "loss": 0.2663, "step": 983600 }, { "epoch": 10.02, "learning_rate": 1.5891477848566737e-05, "loss": 0.2669, "step": 983700 }, { "epoch": 10.02, "learning_rate": 1.5885623942705724e-05, "loss": 0.2834, "step": 983800 }, { "epoch": 10.02, "learning_rate": 1.5879770726953475e-05, "loss": 0.3085, "step": 983900 }, { "epoch": 10.03, "learning_rate": 1.587391820159618e-05, "loss": 0.2767, "step": 984000 }, { "epoch": 10.03, "learning_rate": 1.5868066366919973e-05, "loss": 0.277, "step": 984100 }, { "epoch": 10.03, "learning_rate": 1.586221522321099e-05, "loss": 0.2924, "step": 984200 }, { "epoch": 10.03, "learning_rate": 1.5856364770755307e-05, "loss": 0.2547, "step": 984300 }, { "epoch": 10.03, "learning_rate": 1.5850515009838997e-05, "loss": 0.3107, "step": 984400 }, { "epoch": 10.03, "learning_rate": 1.5844665940748054e-05, "loss": 0.2904, "step": 984500 }, { "epoch": 10.03, "learning_rate": 1.5838817563768465e-05, "loss": 0.2946, "step": 984600 }, { "epoch": 10.03, "learning_rate": 1.5832969879186193e-05, "loss": 0.2867, "step": 984700 }, { "epoch": 10.03, "learning_rate": 1.5827122887287148e-05, "loss": 0.2729, "step": 984800 }, { "epoch": 10.03, "learning_rate": 1.5821276588357215e-05, "loss": 0.2389, "step": 984900 }, { "epoch": 10.04, "learning_rate": 1.581543098268225e-05, "loss": 0.3456, "step": 985000 }, { "epoch": 10.04, "learning_rate": 1.5809586070548056e-05, "loss": 0.2962, "step": 985100 }, { "epoch": 10.04, "learning_rate": 1.5803741852240416e-05, "loss": 0.2721, "step": 985200 }, { "epoch": 10.04, "learning_rate": 1.579789832804509e-05, "loss": 0.2853, "step": 985300 }, { "epoch": 10.04, "learning_rate": 1.5792055498247776e-05, "loss": 0.2383, "step": 985400 }, { "epoch": 10.04, "learning_rate": 1.5786213363134155e-05, "loss": 0.2945, "step": 985500 }, { "epoch": 10.04, "learning_rate": 1.5780371922989895e-05, "loss": 0.256, "step": 985600 }, { "epoch": 10.04, "learning_rate": 1.577453117810057e-05, "loss": 0.217, "step": 985700 }, { "epoch": 10.04, "learning_rate": 1.576869112875178e-05, "loss": 0.3075, "step": 985800 }, { "epoch": 10.04, "learning_rate": 1.5762851775229067e-05, "loss": 0.2215, "step": 985900 }, { "epoch": 10.05, "learning_rate": 1.5757013117817937e-05, "loss": 0.2744, "step": 986000 }, { "epoch": 10.05, "learning_rate": 1.5751175156803868e-05, "loss": 0.207, "step": 986100 }, { "epoch": 10.05, "learning_rate": 1.5745337892472302e-05, "loss": 0.2779, "step": 986200 }, { "epoch": 10.05, "learning_rate": 1.5739501325108657e-05, "loss": 0.2733, "step": 986300 }, { "epoch": 10.05, "learning_rate": 1.5733665454998285e-05, "loss": 0.2797, "step": 986400 }, { "epoch": 10.05, "learning_rate": 1.572783028242654e-05, "loss": 0.3152, "step": 986500 }, { "epoch": 10.05, "learning_rate": 1.5721995807678725e-05, "loss": 0.2526, "step": 986600 }, { "epoch": 10.05, "learning_rate": 1.57161620310401e-05, "loss": 0.3135, "step": 986700 }, { "epoch": 10.05, "learning_rate": 1.5710387280120368e-05, "loss": 0.2638, "step": 986800 }, { "epoch": 10.05, "learning_rate": 1.5704554893567612e-05, "loss": 0.2701, "step": 986900 }, { "epoch": 10.06, "learning_rate": 1.5698723205976806e-05, "loss": 0.2445, "step": 987000 }, { "epoch": 10.06, "learning_rate": 1.569289221763309e-05, "loss": 0.2108, "step": 987100 }, { "epoch": 10.06, "learning_rate": 1.5687061928821565e-05, "loss": 0.2852, "step": 987200 }, { "epoch": 10.06, "learning_rate": 1.5681232339827297e-05, "loss": 0.2912, "step": 987300 }, { "epoch": 10.06, "learning_rate": 1.5675403450935327e-05, "loss": 0.2703, "step": 987400 }, { "epoch": 10.06, "learning_rate": 1.5669575262430636e-05, "loss": 0.308, "step": 987500 }, { "epoch": 10.06, "learning_rate": 1.566380604600726e-05, "loss": 0.375, "step": 987600 }, { "epoch": 10.06, "learning_rate": 1.5657979252121014e-05, "loss": 0.2807, "step": 987700 }, { "epoch": 10.06, "learning_rate": 1.5652153159473994e-05, "loss": 0.2775, "step": 987800 }, { "epoch": 10.06, "learning_rate": 1.5646327768351074e-05, "loss": 0.2998, "step": 987900 }, { "epoch": 10.07, "learning_rate": 1.5640503079037053e-05, "loss": 0.2323, "step": 988000 }, { "epoch": 10.07, "learning_rate": 1.5634679091816742e-05, "loss": 0.3269, "step": 988100 }, { "epoch": 10.07, "learning_rate": 1.5628855806974905e-05, "loss": 0.27, "step": 988200 }, { "epoch": 10.07, "learning_rate": 1.5623033224796245e-05, "loss": 0.3193, "step": 988300 }, { "epoch": 10.07, "learning_rate": 1.5617211345565465e-05, "loss": 0.2666, "step": 988400 }, { "epoch": 10.07, "learning_rate": 1.5611390169567214e-05, "loss": 0.2633, "step": 988500 }, { "epoch": 10.07, "learning_rate": 1.5605569697086124e-05, "loss": 0.2576, "step": 988600 }, { "epoch": 10.07, "learning_rate": 1.559974992840677e-05, "loss": 0.2532, "step": 988700 }, { "epoch": 10.07, "learning_rate": 1.559393086381372e-05, "loss": 0.3164, "step": 988800 }, { "epoch": 10.08, "learning_rate": 1.558811250359147e-05, "loss": 0.2757, "step": 988900 }, { "epoch": 10.08, "learning_rate": 1.5582294848024513e-05, "loss": 0.2995, "step": 989000 }, { "epoch": 10.08, "learning_rate": 1.5576477897397292e-05, "loss": 0.2972, "step": 989100 }, { "epoch": 10.08, "learning_rate": 1.5570661651994246e-05, "loss": 0.3842, "step": 989200 }, { "epoch": 10.08, "learning_rate": 1.5564846112099714e-05, "loss": 0.2577, "step": 989300 }, { "epoch": 10.08, "learning_rate": 1.555903127799807e-05, "loss": 0.2734, "step": 989400 }, { "epoch": 10.08, "learning_rate": 1.555321714997363e-05, "loss": 0.3683, "step": 989500 }, { "epoch": 10.08, "learning_rate": 1.5547403728310642e-05, "loss": 0.2567, "step": 989600 }, { "epoch": 10.08, "learning_rate": 1.5541591013293365e-05, "loss": 0.2533, "step": 989700 }, { "epoch": 10.08, "learning_rate": 1.5535779005206004e-05, "loss": 0.2926, "step": 989800 }, { "epoch": 10.09, "learning_rate": 1.5529967704332736e-05, "loss": 0.3219, "step": 989900 }, { "epoch": 10.09, "learning_rate": 1.5524157110957686e-05, "loss": 0.3513, "step": 990000 }, { "epoch": 10.09, "learning_rate": 1.5518347225364983e-05, "loss": 0.2537, "step": 990100 }, { "epoch": 10.09, "learning_rate": 1.5512538047838667e-05, "loss": 0.2954, "step": 990200 }, { "epoch": 10.09, "learning_rate": 1.550672957866278e-05, "loss": 0.3591, "step": 990300 }, { "epoch": 10.09, "learning_rate": 1.550092181812134e-05, "loss": 0.2868, "step": 990400 }, { "epoch": 10.09, "learning_rate": 1.5495114766498283e-05, "loss": 0.2792, "step": 990500 }, { "epoch": 10.09, "learning_rate": 1.5489308424077558e-05, "loss": 0.318, "step": 990600 }, { "epoch": 10.09, "learning_rate": 1.5483502791143054e-05, "loss": 0.2978, "step": 990700 }, { "epoch": 10.09, "learning_rate": 1.5477697867978643e-05, "loss": 0.2333, "step": 990800 }, { "epoch": 10.1, "learning_rate": 1.5471893654868132e-05, "loss": 0.2195, "step": 990900 }, { "epoch": 10.1, "learning_rate": 1.546609015209532e-05, "loss": 0.3341, "step": 991000 }, { "epoch": 10.1, "learning_rate": 1.546028735994397e-05, "loss": 0.2692, "step": 991100 }, { "epoch": 10.1, "learning_rate": 1.5454485278697795e-05, "loss": 0.2784, "step": 991200 }, { "epoch": 10.1, "learning_rate": 1.544868390864049e-05, "loss": 0.2951, "step": 991300 }, { "epoch": 10.1, "learning_rate": 1.544288325005571e-05, "loss": 0.2697, "step": 991400 }, { "epoch": 10.1, "learning_rate": 1.5437083303227063e-05, "loss": 0.2839, "step": 991500 }, { "epoch": 10.1, "learning_rate": 1.5431284068438133e-05, "loss": 0.2644, "step": 991600 }, { "epoch": 10.1, "learning_rate": 1.5425485545972483e-05, "loss": 0.2783, "step": 991700 }, { "epoch": 10.1, "learning_rate": 1.54196877361136e-05, "loss": 0.3005, "step": 991800 }, { "epoch": 10.11, "learning_rate": 1.5413890639144974e-05, "loss": 0.3097, "step": 991900 }, { "epoch": 10.11, "learning_rate": 1.5408094255350065e-05, "loss": 0.2648, "step": 992000 }, { "epoch": 10.11, "learning_rate": 1.540229858501225e-05, "loss": 0.2843, "step": 992100 }, { "epoch": 10.11, "learning_rate": 1.539650362841493e-05, "loss": 0.3322, "step": 992200 }, { "epoch": 10.11, "learning_rate": 1.5390709385841427e-05, "loss": 0.26, "step": 992300 }, { "epoch": 10.11, "learning_rate": 1.538491585757505e-05, "loss": 0.2946, "step": 992400 }, { "epoch": 10.11, "learning_rate": 1.5379123043899068e-05, "loss": 0.286, "step": 992500 }, { "epoch": 10.11, "learning_rate": 1.537333094509672e-05, "loss": 0.314, "step": 992600 }, { "epoch": 10.11, "learning_rate": 1.5367539561451213e-05, "loss": 0.3357, "step": 992700 }, { "epoch": 10.11, "learning_rate": 1.536174889324569e-05, "loss": 0.3149, "step": 992800 }, { "epoch": 10.12, "learning_rate": 1.5355958940763284e-05, "loss": 0.3232, "step": 992900 }, { "epoch": 10.12, "learning_rate": 1.5350169704287112e-05, "loss": 0.2927, "step": 993000 }, { "epoch": 10.12, "learning_rate": 1.5344381184100202e-05, "loss": 0.271, "step": 993100 }, { "epoch": 10.12, "learning_rate": 1.533859338048559e-05, "loss": 0.331, "step": 993200 }, { "epoch": 10.12, "learning_rate": 1.5332864161044506e-05, "loss": 0.2794, "step": 993300 }, { "epoch": 10.12, "learning_rate": 1.5327077784250646e-05, "loss": 0.2842, "step": 993400 }, { "epoch": 10.12, "learning_rate": 1.5321292124875126e-05, "loss": 0.2823, "step": 993500 }, { "epoch": 10.12, "learning_rate": 1.5315507183200818e-05, "loss": 0.2254, "step": 993600 }, { "epoch": 10.12, "learning_rate": 1.5309722959510558e-05, "loss": 0.3056, "step": 993700 }, { "epoch": 10.13, "learning_rate": 1.5303939454087208e-05, "loss": 0.2531, "step": 993800 }, { "epoch": 10.13, "learning_rate": 1.5298156667213502e-05, "loss": 0.3364, "step": 993900 }, { "epoch": 10.13, "learning_rate": 1.5292374599172196e-05, "loss": 0.2686, "step": 994000 }, { "epoch": 10.13, "learning_rate": 1.5286593250246016e-05, "loss": 0.3001, "step": 994100 }, { "epoch": 10.13, "learning_rate": 1.5280812620717613e-05, "loss": 0.2476, "step": 994200 }, { "epoch": 10.13, "learning_rate": 1.527503271086963e-05, "loss": 0.3258, "step": 994300 }, { "epoch": 10.13, "learning_rate": 1.5269253520984685e-05, "loss": 0.2895, "step": 994400 }, { "epoch": 10.13, "learning_rate": 1.5263475051345327e-05, "loss": 0.2941, "step": 994500 }, { "epoch": 10.13, "learning_rate": 1.525769730223409e-05, "loss": 0.3051, "step": 994600 }, { "epoch": 10.13, "learning_rate": 1.5251920273933491e-05, "loss": 0.3001, "step": 994700 }, { "epoch": 10.14, "learning_rate": 1.5246143966725967e-05, "loss": 0.2115, "step": 994800 }, { "epoch": 10.14, "learning_rate": 1.5240368380893957e-05, "loss": 0.3273, "step": 994900 }, { "epoch": 10.14, "learning_rate": 1.523459351671984e-05, "loss": 0.2714, "step": 995000 }, { "epoch": 10.14, "learning_rate": 1.522881937448601e-05, "loss": 0.355, "step": 995100 }, { "epoch": 10.14, "learning_rate": 1.5223045954474748e-05, "loss": 0.3331, "step": 995200 }, { "epoch": 10.14, "learning_rate": 1.5217273256968356e-05, "loss": 0.2924, "step": 995300 }, { "epoch": 10.14, "learning_rate": 1.521155899841756e-05, "loss": 0.3031, "step": 995400 }, { "epoch": 10.14, "learning_rate": 1.5205787739535532e-05, "loss": 0.2933, "step": 995500 }, { "epoch": 10.14, "learning_rate": 1.5200017204002205e-05, "loss": 0.2439, "step": 995600 }, { "epoch": 10.14, "learning_rate": 1.5194247392099704e-05, "loss": 0.2544, "step": 995700 }, { "epoch": 10.15, "learning_rate": 1.5188478304110151e-05, "loss": 0.3055, "step": 995800 }, { "epoch": 10.15, "learning_rate": 1.5182709940315633e-05, "loss": 0.2707, "step": 995900 }, { "epoch": 10.15, "learning_rate": 1.5176942300998157e-05, "loss": 0.2763, "step": 996000 }, { "epoch": 10.15, "learning_rate": 1.5171175386439746e-05, "loss": 0.3561, "step": 996100 }, { "epoch": 10.15, "learning_rate": 1.5165409196922365e-05, "loss": 0.2769, "step": 996200 }, { "epoch": 10.15, "learning_rate": 1.5159643732727946e-05, "loss": 0.2655, "step": 996300 }, { "epoch": 10.15, "learning_rate": 1.5153878994138386e-05, "loss": 0.3115, "step": 996400 }, { "epoch": 10.15, "learning_rate": 1.5148114981435556e-05, "loss": 0.3356, "step": 996500 }, { "epoch": 10.15, "learning_rate": 1.514235169490126e-05, "loss": 0.2953, "step": 996600 }, { "epoch": 10.15, "learning_rate": 1.5136589134817303e-05, "loss": 0.2695, "step": 996700 }, { "epoch": 10.16, "learning_rate": 1.5130827301465446e-05, "loss": 0.3138, "step": 996800 }, { "epoch": 10.16, "learning_rate": 1.512506619512739e-05, "loss": 0.273, "step": 996900 }, { "epoch": 10.16, "learning_rate": 1.5119305816084826e-05, "loss": 0.3209, "step": 997000 }, { "epoch": 10.16, "learning_rate": 1.5113546164619418e-05, "loss": 0.2925, "step": 997100 }, { "epoch": 10.16, "learning_rate": 1.5107787241012753e-05, "loss": 0.2819, "step": 997200 }, { "epoch": 10.16, "learning_rate": 1.5102029045546417e-05, "loss": 0.2901, "step": 997300 }, { "epoch": 10.16, "learning_rate": 1.5096271578501957e-05, "loss": 0.2541, "step": 997400 }, { "epoch": 10.16, "learning_rate": 1.5090514840160875e-05, "loss": 0.2015, "step": 997500 }, { "epoch": 10.16, "learning_rate": 1.5084758830804643e-05, "loss": 0.3325, "step": 997600 }, { "epoch": 10.16, "learning_rate": 1.5079003550714694e-05, "loss": 0.2414, "step": 997700 }, { "epoch": 10.17, "learning_rate": 1.507324900017244e-05, "loss": 0.3616, "step": 997800 }, { "epoch": 10.17, "learning_rate": 1.506749517945922e-05, "loss": 0.284, "step": 997900 }, { "epoch": 10.17, "learning_rate": 1.5061742088856372e-05, "loss": 0.2425, "step": 998000 }, { "epoch": 10.17, "learning_rate": 1.5055989728645201e-05, "loss": 0.2997, "step": 998100 }, { "epoch": 10.17, "learning_rate": 1.5050238099106938e-05, "loss": 0.2713, "step": 998200 }, { "epoch": 10.17, "learning_rate": 1.504448720052282e-05, "loss": 0.3018, "step": 998300 }, { "epoch": 10.17, "learning_rate": 1.5038737033174039e-05, "loss": 0.3064, "step": 998400 }, { "epoch": 10.17, "learning_rate": 1.5032987597341716e-05, "loss": 0.3314, "step": 998500 }, { "epoch": 10.17, "learning_rate": 1.5027238893306987e-05, "loss": 0.3292, "step": 998600 }, { "epoch": 10.17, "learning_rate": 1.5021490921350923e-05, "loss": 0.2814, "step": 998700 }, { "epoch": 10.18, "learning_rate": 1.5015743681754563e-05, "loss": 0.2881, "step": 998800 }, { "epoch": 10.18, "learning_rate": 1.500999717479892e-05, "loss": 0.3107, "step": 998900 }, { "epoch": 10.18, "learning_rate": 1.5004251400764952e-05, "loss": 0.2446, "step": 999000 }, { "epoch": 10.18, "learning_rate": 1.4998506359933617e-05, "loss": 0.2392, "step": 999100 }, { "epoch": 10.18, "learning_rate": 1.4992762052585785e-05, "loss": 0.2992, "step": 999200 }, { "epoch": 10.18, "learning_rate": 1.498701847900233e-05, "loss": 0.3003, "step": 999300 }, { "epoch": 10.18, "learning_rate": 1.4981275639464093e-05, "loss": 0.2858, "step": 999400 }, { "epoch": 10.18, "learning_rate": 1.4975533534251835e-05, "loss": 0.2691, "step": 999500 }, { "epoch": 10.18, "learning_rate": 1.4969792163646327e-05, "loss": 0.2896, "step": 999600 }, { "epoch": 10.19, "learning_rate": 1.4964108930646857e-05, "loss": 0.318, "step": 999700 }, { "epoch": 10.19, "learning_rate": 1.4958369022743894e-05, "loss": 0.2662, "step": 999800 }, { "epoch": 10.19, "learning_rate": 1.4952629850286935e-05, "loss": 0.2526, "step": 999900 }, { "epoch": 10.19, "learning_rate": 1.4946891413556568e-05, "loss": 0.2716, "step": 1000000 }, { "epoch": 10.19, "learning_rate": 1.494121108619645e-05, "loss": 0.3408, "step": 1000100 }, { "epoch": 10.19, "learning_rate": 1.493547411439671e-05, "loss": 0.3107, "step": 1000200 }, { "epoch": 10.19, "learning_rate": 1.4929737879162386e-05, "loss": 0.2623, "step": 1000300 }, { "epoch": 10.19, "learning_rate": 1.4924002380773956e-05, "loss": 0.2901, "step": 1000400 }, { "epoch": 10.19, "learning_rate": 1.4918267619511831e-05, "loss": 0.2981, "step": 1000500 }, { "epoch": 10.19, "learning_rate": 1.4912533595656413e-05, "loss": 0.3036, "step": 1000600 }, { "epoch": 10.2, "learning_rate": 1.4906800309488081e-05, "loss": 0.2691, "step": 1000700 }, { "epoch": 10.2, "learning_rate": 1.490106776128713e-05, "loss": 0.227, "step": 1000800 }, { "epoch": 10.2, "learning_rate": 1.4895335951333857e-05, "loss": 0.3355, "step": 1000900 }, { "epoch": 10.2, "learning_rate": 1.488960487990853e-05, "loss": 0.337, "step": 1001000 }, { "epoch": 10.2, "learning_rate": 1.4883874547291335e-05, "loss": 0.3396, "step": 1001100 }, { "epoch": 10.2, "learning_rate": 1.4878144953762471e-05, "loss": 0.2745, "step": 1001200 }, { "epoch": 10.2, "learning_rate": 1.4872416099602075e-05, "loss": 0.2923, "step": 1001300 }, { "epoch": 10.2, "learning_rate": 1.4866687985090253e-05, "loss": 0.2976, "step": 1001400 }, { "epoch": 10.2, "learning_rate": 1.4860960610507079e-05, "loss": 0.2592, "step": 1001500 }, { "epoch": 10.2, "learning_rate": 1.4855233976132598e-05, "loss": 0.2883, "step": 1001600 }, { "epoch": 10.21, "learning_rate": 1.4849508082246785e-05, "loss": 0.2998, "step": 1001700 }, { "epoch": 10.21, "learning_rate": 1.4843782929129612e-05, "loss": 0.2677, "step": 1001800 }, { "epoch": 10.21, "learning_rate": 1.4838058517061008e-05, "loss": 0.2596, "step": 1001900 }, { "epoch": 10.21, "learning_rate": 1.483233484632087e-05, "loss": 0.299, "step": 1002000 }, { "epoch": 10.21, "learning_rate": 1.4826611917189032e-05, "loss": 0.298, "step": 1002100 }, { "epoch": 10.21, "learning_rate": 1.4820889729945322e-05, "loss": 0.3756, "step": 1002200 }, { "epoch": 10.21, "learning_rate": 1.481516828486953e-05, "loss": 0.2905, "step": 1002300 }, { "epoch": 10.21, "learning_rate": 1.480944758224138e-05, "loss": 0.3306, "step": 1002400 }, { "epoch": 10.21, "learning_rate": 1.4803727622340591e-05, "loss": 0.2646, "step": 1002500 }, { "epoch": 10.21, "learning_rate": 1.4798065593936974e-05, "loss": 0.3013, "step": 1002600 }, { "epoch": 10.22, "learning_rate": 1.4792347112895637e-05, "loss": 0.2957, "step": 1002700 }, { "epoch": 10.22, "learning_rate": 1.4786629375417766e-05, "loss": 0.3258, "step": 1002800 }, { "epoch": 10.22, "learning_rate": 1.4780912381782936e-05, "loss": 0.2585, "step": 1002900 }, { "epoch": 10.22, "learning_rate": 1.4775196132270664e-05, "loss": 0.2778, "step": 1003000 }, { "epoch": 10.22, "learning_rate": 1.4769480627160457e-05, "loss": 0.2376, "step": 1003100 }, { "epoch": 10.22, "learning_rate": 1.4763765866731744e-05, "loss": 0.205, "step": 1003200 }, { "epoch": 10.22, "learning_rate": 1.4758051851263956e-05, "loss": 0.3115, "step": 1003300 }, { "epoch": 10.22, "learning_rate": 1.4752338581036482e-05, "loss": 0.3018, "step": 1003400 }, { "epoch": 10.22, "learning_rate": 1.474662605632865e-05, "loss": 0.2891, "step": 1003500 }, { "epoch": 10.22, "learning_rate": 1.4740914277419775e-05, "loss": 0.2489, "step": 1003600 }, { "epoch": 10.23, "learning_rate": 1.4735203244589141e-05, "loss": 0.2955, "step": 1003700 }, { "epoch": 10.23, "learning_rate": 1.4729492958115965e-05, "loss": 0.2232, "step": 1003800 }, { "epoch": 10.23, "learning_rate": 1.472378341827945e-05, "loss": 0.2541, "step": 1003900 }, { "epoch": 10.23, "learning_rate": 1.4718074625358765e-05, "loss": 0.2894, "step": 1004000 }, { "epoch": 10.23, "learning_rate": 1.4712366579633032e-05, "loss": 0.3067, "step": 1004100 }, { "epoch": 10.23, "learning_rate": 1.470665928138134e-05, "loss": 0.271, "step": 1004200 }, { "epoch": 10.23, "learning_rate": 1.4700952730882744e-05, "loss": 0.3345, "step": 1004300 }, { "epoch": 10.23, "learning_rate": 1.4695246928416271e-05, "loss": 0.2724, "step": 1004400 }, { "epoch": 10.23, "learning_rate": 1.4689541874260877e-05, "loss": 0.3322, "step": 1004500 }, { "epoch": 10.24, "learning_rate": 1.4683837568695518e-05, "loss": 0.301, "step": 1004600 }, { "epoch": 10.24, "learning_rate": 1.4678134011999113e-05, "loss": 0.2814, "step": 1004700 }, { "epoch": 10.24, "learning_rate": 1.4672431204450506e-05, "loss": 0.3129, "step": 1004800 }, { "epoch": 10.24, "learning_rate": 1.4666729146328542e-05, "loss": 0.2399, "step": 1004900 }, { "epoch": 10.24, "learning_rate": 1.4661084847284234e-05, "loss": 0.2578, "step": 1005000 }, { "epoch": 10.24, "learning_rate": 1.4655384281350689e-05, "loss": 0.3071, "step": 1005100 }, { "epoch": 10.24, "learning_rate": 1.4649684465677282e-05, "loss": 0.2754, "step": 1005200 }, { "epoch": 10.24, "learning_rate": 1.4643985400542697e-05, "loss": 0.3098, "step": 1005300 }, { "epoch": 10.24, "learning_rate": 1.4638287086225589e-05, "loss": 0.3784, "step": 1005400 }, { "epoch": 10.24, "learning_rate": 1.4632589523004577e-05, "loss": 0.2742, "step": 1005500 }, { "epoch": 10.25, "learning_rate": 1.462689271115822e-05, "loss": 0.2835, "step": 1005600 }, { "epoch": 10.25, "learning_rate": 1.4621196650965068e-05, "loss": 0.2826, "step": 1005700 }, { "epoch": 10.25, "learning_rate": 1.4615501342703634e-05, "loss": 0.2995, "step": 1005800 }, { "epoch": 10.25, "learning_rate": 1.4609806786652363e-05, "loss": 0.2898, "step": 1005900 }, { "epoch": 10.25, "learning_rate": 1.4604112983089697e-05, "loss": 0.3088, "step": 1006000 }, { "epoch": 10.25, "learning_rate": 1.459841993229404e-05, "loss": 0.28, "step": 1006100 }, { "epoch": 10.25, "learning_rate": 1.4592727634543723e-05, "loss": 0.2632, "step": 1006200 }, { "epoch": 10.25, "learning_rate": 1.458703609011708e-05, "loss": 0.2627, "step": 1006300 }, { "epoch": 10.25, "learning_rate": 1.4581345299292392e-05, "loss": 0.2809, "step": 1006400 }, { "epoch": 10.25, "learning_rate": 1.4575655262347908e-05, "loss": 0.3289, "step": 1006500 }, { "epoch": 10.26, "learning_rate": 1.4569965979561832e-05, "loss": 0.2821, "step": 1006600 }, { "epoch": 10.26, "learning_rate": 1.4564277451212336e-05, "loss": 0.3254, "step": 1006700 }, { "epoch": 10.26, "learning_rate": 1.4558589677577574e-05, "loss": 0.2879, "step": 1006800 }, { "epoch": 10.26, "learning_rate": 1.455290265893561e-05, "loss": 0.2809, "step": 1006900 }, { "epoch": 10.26, "learning_rate": 1.4547216395564524e-05, "loss": 0.3273, "step": 1007000 }, { "epoch": 10.26, "learning_rate": 1.4541530887742352e-05, "loss": 0.3139, "step": 1007100 }, { "epoch": 10.26, "learning_rate": 1.4535846135747057e-05, "loss": 0.3257, "step": 1007200 }, { "epoch": 10.26, "learning_rate": 1.4530162139856596e-05, "loss": 0.2878, "step": 1007300 }, { "epoch": 10.26, "learning_rate": 1.4524478900348904e-05, "loss": 0.2488, "step": 1007400 }, { "epoch": 10.26, "learning_rate": 1.4518796417501828e-05, "loss": 0.2924, "step": 1007500 }, { "epoch": 10.27, "learning_rate": 1.4513114691593216e-05, "loss": 0.2851, "step": 1007600 }, { "epoch": 10.27, "learning_rate": 1.4507433722900878e-05, "loss": 0.2275, "step": 1007700 }, { "epoch": 10.27, "learning_rate": 1.4501753511702579e-05, "loss": 0.2354, "step": 1007800 }, { "epoch": 10.27, "learning_rate": 1.449607405827604e-05, "loss": 0.335, "step": 1007900 }, { "epoch": 10.27, "learning_rate": 1.4490395362898973e-05, "loss": 0.2356, "step": 1008000 }, { "epoch": 10.27, "learning_rate": 1.4484717425849003e-05, "loss": 0.2337, "step": 1008100 }, { "epoch": 10.27, "learning_rate": 1.4479040247403758e-05, "loss": 0.279, "step": 1008200 }, { "epoch": 10.27, "learning_rate": 1.4473363827840834e-05, "loss": 0.2913, "step": 1008300 }, { "epoch": 10.27, "learning_rate": 1.446768816743775e-05, "loss": 0.2791, "step": 1008400 }, { "epoch": 10.27, "learning_rate": 1.446201326647202e-05, "loss": 0.2633, "step": 1008500 }, { "epoch": 10.28, "learning_rate": 1.4456339125221118e-05, "loss": 0.2324, "step": 1008600 }, { "epoch": 10.28, "learning_rate": 1.4450665743962483e-05, "loss": 0.2764, "step": 1008700 }, { "epoch": 10.28, "learning_rate": 1.444499312297349e-05, "loss": 0.2692, "step": 1008800 }, { "epoch": 10.28, "learning_rate": 1.4439321262531502e-05, "loss": 0.2472, "step": 1008900 }, { "epoch": 10.28, "learning_rate": 1.4433650162913847e-05, "loss": 0.2268, "step": 1009000 }, { "epoch": 10.28, "learning_rate": 1.4428036524014607e-05, "loss": 0.2616, "step": 1009100 }, { "epoch": 10.28, "learning_rate": 1.442236693926226e-05, "loss": 0.2654, "step": 1009200 }, { "epoch": 10.28, "learning_rate": 1.4416698116163203e-05, "loss": 0.3265, "step": 1009300 }, { "epoch": 10.28, "learning_rate": 1.4411086731833844e-05, "loss": 0.3136, "step": 1009400 }, { "epoch": 10.28, "learning_rate": 1.4405419425249408e-05, "loss": 0.2876, "step": 1009500 }, { "epoch": 10.29, "learning_rate": 1.4399752881146909e-05, "loss": 0.2668, "step": 1009600 }, { "epoch": 10.29, "learning_rate": 1.4394087099803386e-05, "loss": 0.2245, "step": 1009700 }, { "epoch": 10.29, "learning_rate": 1.4388422081495875e-05, "loss": 0.2939, "step": 1009800 }, { "epoch": 10.29, "learning_rate": 1.4382757826501358e-05, "loss": 0.2816, "step": 1009900 }, { "epoch": 10.29, "learning_rate": 1.4377094335096797e-05, "loss": 0.3088, "step": 1010000 }, { "epoch": 10.29, "learning_rate": 1.4371431607559075e-05, "loss": 0.2335, "step": 1010100 }, { "epoch": 10.29, "learning_rate": 1.4365769644165086e-05, "loss": 0.3214, "step": 1010200 }, { "epoch": 10.29, "learning_rate": 1.436010844519167e-05, "loss": 0.2468, "step": 1010300 }, { "epoch": 10.29, "learning_rate": 1.4354448010915608e-05, "loss": 0.3226, "step": 1010400 }, { "epoch": 10.3, "learning_rate": 1.4348788341613654e-05, "loss": 0.2676, "step": 1010500 }, { "epoch": 10.3, "learning_rate": 1.4343129437562575e-05, "loss": 0.3115, "step": 1010600 }, { "epoch": 10.3, "learning_rate": 1.4337471299039019e-05, "loss": 0.2609, "step": 1010700 }, { "epoch": 10.3, "learning_rate": 1.4331813926319647e-05, "loss": 0.2781, "step": 1010800 }, { "epoch": 10.3, "learning_rate": 1.4326157319681084e-05, "loss": 0.2394, "step": 1010900 }, { "epoch": 10.3, "learning_rate": 1.4320501479399878e-05, "loss": 0.2736, "step": 1011000 }, { "epoch": 10.3, "learning_rate": 1.4314846405752581e-05, "loss": 0.3235, "step": 1011100 }, { "epoch": 10.3, "learning_rate": 1.4309192099015702e-05, "loss": 0.2278, "step": 1011200 }, { "epoch": 10.3, "learning_rate": 1.430353855946568e-05, "loss": 0.2628, "step": 1011300 }, { "epoch": 10.3, "learning_rate": 1.429788578737895e-05, "loss": 0.3476, "step": 1011400 }, { "epoch": 10.31, "learning_rate": 1.4292233783031902e-05, "loss": 0.2903, "step": 1011500 }, { "epoch": 10.31, "learning_rate": 1.428658254670089e-05, "loss": 0.2653, "step": 1011600 }, { "epoch": 10.31, "learning_rate": 1.4280932078662208e-05, "loss": 0.3061, "step": 1011700 }, { "epoch": 10.31, "learning_rate": 1.4275282379192133e-05, "loss": 0.2899, "step": 1011800 }, { "epoch": 10.31, "learning_rate": 1.4269633448566931e-05, "loss": 0.3433, "step": 1011900 }, { "epoch": 10.31, "learning_rate": 1.4263985287062766e-05, "loss": 0.2535, "step": 1012000 }, { "epoch": 10.31, "learning_rate": 1.4258337894955818e-05, "loss": 0.2514, "step": 1012100 }, { "epoch": 10.31, "learning_rate": 1.425269127252221e-05, "loss": 0.298, "step": 1012200 }, { "epoch": 10.31, "learning_rate": 1.4247045420038017e-05, "loss": 0.2794, "step": 1012300 }, { "epoch": 10.31, "learning_rate": 1.4241400337779294e-05, "loss": 0.274, "step": 1012400 }, { "epoch": 10.32, "learning_rate": 1.4235756026022061e-05, "loss": 0.3269, "step": 1012500 }, { "epoch": 10.32, "learning_rate": 1.423016891663581e-05, "loss": 0.3077, "step": 1012600 }, { "epoch": 10.32, "learning_rate": 1.4224526138997522e-05, "loss": 0.2878, "step": 1012700 }, { "epoch": 10.32, "learning_rate": 1.4218884132685753e-05, "loss": 0.3027, "step": 1012800 }, { "epoch": 10.32, "learning_rate": 1.4213242897976373e-05, "loss": 0.2861, "step": 1012900 }, { "epoch": 10.32, "learning_rate": 1.4207602435145204e-05, "loss": 0.2575, "step": 1013000 }, { "epoch": 10.32, "learning_rate": 1.4201962744468032e-05, "loss": 0.2585, "step": 1013100 }, { "epoch": 10.32, "learning_rate": 1.4196323826220604e-05, "loss": 0.2759, "step": 1013200 }, { "epoch": 10.32, "learning_rate": 1.419068568067864e-05, "loss": 0.285, "step": 1013300 }, { "epoch": 10.32, "learning_rate": 1.418504830811779e-05, "loss": 0.3083, "step": 1013400 }, { "epoch": 10.33, "learning_rate": 1.4179411708813703e-05, "loss": 0.2934, "step": 1013500 }, { "epoch": 10.33, "learning_rate": 1.4173775883041976e-05, "loss": 0.3112, "step": 1013600 }, { "epoch": 10.33, "learning_rate": 1.4168140831078155e-05, "loss": 0.3182, "step": 1013700 }, { "epoch": 10.33, "learning_rate": 1.4162506553197767e-05, "loss": 0.2306, "step": 1013800 }, { "epoch": 10.33, "learning_rate": 1.4156873049676293e-05, "loss": 0.2309, "step": 1013900 }, { "epoch": 10.33, "learning_rate": 1.4151240320789196e-05, "loss": 0.2198, "step": 1014000 }, { "epoch": 10.33, "learning_rate": 1.4145608366811852e-05, "loss": 0.302, "step": 1014100 }, { "epoch": 10.33, "learning_rate": 1.4139977188019648e-05, "loss": 0.3323, "step": 1014200 }, { "epoch": 10.33, "learning_rate": 1.4134346784687907e-05, "loss": 0.2781, "step": 1014300 }, { "epoch": 10.33, "learning_rate": 1.4128717157091932e-05, "loss": 0.3646, "step": 1014400 }, { "epoch": 10.34, "learning_rate": 1.412308830550697e-05, "loss": 0.2902, "step": 1014500 }, { "epoch": 10.34, "learning_rate": 1.4117460230208251e-05, "loss": 0.2753, "step": 1014600 }, { "epoch": 10.34, "learning_rate": 1.4111832931470938e-05, "loss": 0.2284, "step": 1014700 }, { "epoch": 10.34, "learning_rate": 1.4106206409570178e-05, "loss": 0.2779, "step": 1014800 }, { "epoch": 10.34, "learning_rate": 1.4100580664781085e-05, "loss": 0.2778, "step": 1014900 }, { "epoch": 10.34, "learning_rate": 1.4094955697378702e-05, "loss": 0.2788, "step": 1015000 }, { "epoch": 10.34, "learning_rate": 1.408933150763807e-05, "loss": 0.3298, "step": 1015100 }, { "epoch": 10.34, "learning_rate": 1.4083708095834193e-05, "loss": 0.2597, "step": 1015200 }, { "epoch": 10.34, "learning_rate": 1.4078141684724859e-05, "loss": 0.3054, "step": 1015300 }, { "epoch": 10.35, "learning_rate": 1.4072519821833044e-05, "loss": 0.2876, "step": 1015400 }, { "epoch": 10.35, "learning_rate": 1.406689873769995e-05, "loss": 0.2735, "step": 1015500 }, { "epoch": 10.35, "learning_rate": 1.4061278432600426e-05, "loss": 0.2667, "step": 1015600 }, { "epoch": 10.35, "learning_rate": 1.4055715098208696e-05, "loss": 0.2679, "step": 1015700 }, { "epoch": 10.35, "learning_rate": 1.4050096344203473e-05, "loss": 0.2615, "step": 1015800 }, { "epoch": 10.35, "learning_rate": 1.4044478370053372e-05, "loss": 0.2905, "step": 1015900 }, { "epoch": 10.35, "learning_rate": 1.4038861176033044e-05, "loss": 0.3114, "step": 1016000 }, { "epoch": 10.35, "learning_rate": 1.4033244762417158e-05, "loss": 0.2336, "step": 1016100 }, { "epoch": 10.35, "learning_rate": 1.4027629129480326e-05, "loss": 0.2921, "step": 1016200 }, { "epoch": 10.35, "learning_rate": 1.4022014277497103e-05, "loss": 0.309, "step": 1016300 }, { "epoch": 10.36, "learning_rate": 1.4016400206742028e-05, "loss": 0.2655, "step": 1016400 }, { "epoch": 10.36, "learning_rate": 1.401078691748961e-05, "loss": 0.2981, "step": 1016500 }, { "epoch": 10.36, "learning_rate": 1.400517441001428e-05, "loss": 0.3011, "step": 1016600 }, { "epoch": 10.36, "learning_rate": 1.3999562684590473e-05, "loss": 0.2545, "step": 1016700 }, { "epoch": 10.36, "learning_rate": 1.3993951741492562e-05, "loss": 0.2682, "step": 1016800 }, { "epoch": 10.36, "learning_rate": 1.3988341580994891e-05, "loss": 0.2627, "step": 1016900 }, { "epoch": 10.36, "learning_rate": 1.3982732203371767e-05, "loss": 0.2615, "step": 1017000 }, { "epoch": 10.36, "learning_rate": 1.397712360889745e-05, "loss": 0.2143, "step": 1017100 }, { "epoch": 10.36, "learning_rate": 1.3971515797846182e-05, "loss": 0.2843, "step": 1017200 }, { "epoch": 10.36, "learning_rate": 1.3965908770492126e-05, "loss": 0.2962, "step": 1017300 }, { "epoch": 10.37, "learning_rate": 1.3960302527109445e-05, "loss": 0.3491, "step": 1017400 }, { "epoch": 10.37, "learning_rate": 1.3954697067972263e-05, "loss": 0.2462, "step": 1017500 }, { "epoch": 10.37, "learning_rate": 1.3949148436216528e-05, "loss": 0.2718, "step": 1017600 }, { "epoch": 10.37, "learning_rate": 1.3943544538543207e-05, "loss": 0.2526, "step": 1017700 }, { "epoch": 10.37, "learning_rate": 1.3937941425934729e-05, "loss": 0.2897, "step": 1017800 }, { "epoch": 10.37, "learning_rate": 1.393233909866506e-05, "loss": 0.2456, "step": 1017900 }, { "epoch": 10.37, "learning_rate": 1.3926737557008134e-05, "loss": 0.2567, "step": 1018000 }, { "epoch": 10.37, "learning_rate": 1.392113680123781e-05, "loss": 0.2849, "step": 1018100 }, { "epoch": 10.37, "learning_rate": 1.3915536831627945e-05, "loss": 0.3188, "step": 1018200 }, { "epoch": 10.37, "learning_rate": 1.3909937648452339e-05, "loss": 0.3197, "step": 1018300 }, { "epoch": 10.38, "learning_rate": 1.3904339251984762e-05, "loss": 0.2359, "step": 1018400 }, { "epoch": 10.38, "learning_rate": 1.3898741642498943e-05, "loss": 0.2664, "step": 1018500 }, { "epoch": 10.38, "learning_rate": 1.389314482026858e-05, "loss": 0.2717, "step": 1018600 }, { "epoch": 10.38, "learning_rate": 1.3887548785567302e-05, "loss": 0.3299, "step": 1018700 }, { "epoch": 10.38, "learning_rate": 1.3881953538668738e-05, "loss": 0.3645, "step": 1018800 }, { "epoch": 10.38, "learning_rate": 1.3876359079846468e-05, "loss": 0.2912, "step": 1018900 }, { "epoch": 10.38, "learning_rate": 1.3870765409374007e-05, "loss": 0.3089, "step": 1019000 }, { "epoch": 10.38, "learning_rate": 1.3865172527524861e-05, "loss": 0.2536, "step": 1019100 }, { "epoch": 10.38, "learning_rate": 1.3859580434572506e-05, "loss": 0.312, "step": 1019200 }, { "epoch": 10.38, "learning_rate": 1.3853989130790335e-05, "loss": 0.2893, "step": 1019300 }, { "epoch": 10.39, "learning_rate": 1.3848398616451737e-05, "loss": 0.2817, "step": 1019400 }, { "epoch": 10.39, "learning_rate": 1.3842808891830063e-05, "loss": 0.2938, "step": 1019500 }, { "epoch": 10.39, "learning_rate": 1.3837219957198614e-05, "loss": 0.2705, "step": 1019600 }, { "epoch": 10.39, "learning_rate": 1.383163181283065e-05, "loss": 0.2773, "step": 1019700 }, { "epoch": 10.39, "learning_rate": 1.3826044458999405e-05, "loss": 0.2924, "step": 1019800 }, { "epoch": 10.39, "learning_rate": 1.3820457895978079e-05, "loss": 0.3082, "step": 1019900 }, { "epoch": 10.39, "learning_rate": 1.3814872124039793e-05, "loss": 0.2714, "step": 1020000 }, { "epoch": 10.39, "learning_rate": 1.3809287143457674e-05, "loss": 0.2994, "step": 1020100 }, { "epoch": 10.39, "learning_rate": 1.3803702954504801e-05, "loss": 0.2513, "step": 1020200 }, { "epoch": 10.39, "learning_rate": 1.379811955745419e-05, "loss": 0.2874, "step": 1020300 }, { "epoch": 10.4, "learning_rate": 1.3792536952578844e-05, "loss": 0.253, "step": 1020400 }, { "epoch": 10.4, "learning_rate": 1.378695514015173e-05, "loss": 0.2595, "step": 1020500 }, { "epoch": 10.4, "learning_rate": 1.3781374120445744e-05, "loss": 0.2544, "step": 1020600 }, { "epoch": 10.4, "learning_rate": 1.3775793893733779e-05, "loss": 0.3081, "step": 1020700 }, { "epoch": 10.4, "learning_rate": 1.377021446028867e-05, "loss": 0.258, "step": 1020800 }, { "epoch": 10.4, "learning_rate": 1.376463582038322e-05, "loss": 0.308, "step": 1020900 }, { "epoch": 10.4, "learning_rate": 1.3759057974290189e-05, "loss": 0.2972, "step": 1021000 }, { "epoch": 10.4, "learning_rate": 1.3753480922282306e-05, "loss": 0.3601, "step": 1021100 }, { "epoch": 10.4, "learning_rate": 1.3747904664632258e-05, "loss": 0.2819, "step": 1021200 }, { "epoch": 10.41, "learning_rate": 1.3742329201612676e-05, "loss": 0.2776, "step": 1021300 }, { "epoch": 10.41, "learning_rate": 1.3736754533496176e-05, "loss": 0.2636, "step": 1021400 }, { "epoch": 10.41, "learning_rate": 1.3731180660555336e-05, "loss": 0.3103, "step": 1021500 }, { "epoch": 10.41, "learning_rate": 1.3725607583062663e-05, "loss": 0.3004, "step": 1021600 }, { "epoch": 10.41, "learning_rate": 1.372003530129066e-05, "loss": 0.2535, "step": 1021700 }, { "epoch": 10.41, "learning_rate": 1.3714463815511783e-05, "loss": 0.2895, "step": 1021800 }, { "epoch": 10.41, "learning_rate": 1.3708893125998434e-05, "loss": 0.2502, "step": 1021900 }, { "epoch": 10.41, "learning_rate": 1.3703323233022988e-05, "loss": 0.27, "step": 1022000 }, { "epoch": 10.41, "learning_rate": 1.3697754136857782e-05, "loss": 0.3237, "step": 1022100 }, { "epoch": 10.41, "learning_rate": 1.3692185837775109e-05, "loss": 0.2404, "step": 1022200 }, { "epoch": 10.42, "learning_rate": 1.3686618336047233e-05, "loss": 0.2656, "step": 1022300 }, { "epoch": 10.42, "learning_rate": 1.3681051631946377e-05, "loss": 0.3085, "step": 1022400 }, { "epoch": 10.42, "learning_rate": 1.3675485725744699e-05, "loss": 0.3039, "step": 1022500 }, { "epoch": 10.42, "learning_rate": 1.366992061771435e-05, "loss": 0.2641, "step": 1022600 }, { "epoch": 10.42, "learning_rate": 1.366435630812743e-05, "loss": 0.2702, "step": 1022700 }, { "epoch": 10.42, "learning_rate": 1.365879279725601e-05, "loss": 0.3275, "step": 1022800 }, { "epoch": 10.42, "learning_rate": 1.3653230085372094e-05, "loss": 0.2841, "step": 1022900 }, { "epoch": 10.42, "learning_rate": 1.3647668172747674e-05, "loss": 0.2551, "step": 1023000 }, { "epoch": 10.42, "learning_rate": 1.3642107059654704e-05, "loss": 0.3274, "step": 1023100 }, { "epoch": 10.42, "learning_rate": 1.363654674636507e-05, "loss": 0.3192, "step": 1023200 }, { "epoch": 10.43, "learning_rate": 1.3630987233150647e-05, "loss": 0.2678, "step": 1023300 }, { "epoch": 10.43, "learning_rate": 1.3625428520283262e-05, "loss": 0.2892, "step": 1023400 }, { "epoch": 10.43, "learning_rate": 1.361987060803471e-05, "loss": 0.2651, "step": 1023500 }, { "epoch": 10.43, "learning_rate": 1.3614313496676729e-05, "loss": 0.272, "step": 1023600 }, { "epoch": 10.43, "learning_rate": 1.3608757186481046e-05, "loss": 0.2925, "step": 1023700 }, { "epoch": 10.43, "learning_rate": 1.3603201677719306e-05, "loss": 0.2509, "step": 1023800 }, { "epoch": 10.43, "learning_rate": 1.3597646970663155e-05, "loss": 0.2421, "step": 1023900 }, { "epoch": 10.43, "learning_rate": 1.3592093065584191e-05, "loss": 0.2687, "step": 1024000 }, { "epoch": 10.43, "learning_rate": 1.3586539962753953e-05, "loss": 0.2712, "step": 1024100 }, { "epoch": 10.43, "learning_rate": 1.3580987662443957e-05, "loss": 0.2586, "step": 1024200 }, { "epoch": 10.44, "learning_rate": 1.3575436164925691e-05, "loss": 0.2506, "step": 1024300 }, { "epoch": 10.44, "learning_rate": 1.356988547047057e-05, "loss": 0.2864, "step": 1024400 }, { "epoch": 10.44, "learning_rate": 1.3564391074283802e-05, "loss": 0.2512, "step": 1024500 }, { "epoch": 10.44, "learning_rate": 1.3558841978731736e-05, "loss": 0.2833, "step": 1024600 }, { "epoch": 10.44, "learning_rate": 1.3553293687054182e-05, "loss": 0.2969, "step": 1024700 }, { "epoch": 10.44, "learning_rate": 1.3547746199522429e-05, "loss": 0.2711, "step": 1024800 }, { "epoch": 10.44, "learning_rate": 1.3542199516407697e-05, "loss": 0.2126, "step": 1024900 }, { "epoch": 10.44, "learning_rate": 1.353665363798119e-05, "loss": 0.24, "step": 1025000 }, { "epoch": 10.44, "learning_rate": 1.3531108564514071e-05, "loss": 0.2585, "step": 1025100 }, { "epoch": 10.44, "learning_rate": 1.3525564296277474e-05, "loss": 0.3302, "step": 1025200 }, { "epoch": 10.45, "learning_rate": 1.3520020833542461e-05, "loss": 0.2986, "step": 1025300 }, { "epoch": 10.45, "learning_rate": 1.351447817658008e-05, "loss": 0.257, "step": 1025400 }, { "epoch": 10.45, "learning_rate": 1.3508936325661343e-05, "loss": 0.2964, "step": 1025500 }, { "epoch": 10.45, "learning_rate": 1.3503395281057199e-05, "loss": 0.2673, "step": 1025600 }, { "epoch": 10.45, "learning_rate": 1.3497855043038578e-05, "loss": 0.2207, "step": 1025700 }, { "epoch": 10.45, "learning_rate": 1.3492315611876366e-05, "loss": 0.3436, "step": 1025800 }, { "epoch": 10.45, "learning_rate": 1.3486776987841408e-05, "loss": 0.2608, "step": 1025900 }, { "epoch": 10.45, "learning_rate": 1.3481239171204509e-05, "loss": 0.2775, "step": 1026000 }, { "epoch": 10.45, "learning_rate": 1.3475702162236446e-05, "loss": 0.2963, "step": 1026100 }, { "epoch": 10.46, "learning_rate": 1.3470165961207926e-05, "loss": 0.3303, "step": 1026200 }, { "epoch": 10.46, "learning_rate": 1.3464630568389647e-05, "loss": 0.2665, "step": 1026300 }, { "epoch": 10.46, "learning_rate": 1.3459095984052264e-05, "loss": 0.2331, "step": 1026400 }, { "epoch": 10.46, "learning_rate": 1.3453562208466366e-05, "loss": 0.2721, "step": 1026500 }, { "epoch": 10.46, "learning_rate": 1.344802924190253e-05, "loss": 0.3123, "step": 1026600 }, { "epoch": 10.46, "learning_rate": 1.3442497084631303e-05, "loss": 0.2714, "step": 1026700 }, { "epoch": 10.46, "learning_rate": 1.3436965736923143e-05, "loss": 0.3093, "step": 1026800 }, { "epoch": 10.46, "learning_rate": 1.3431435199048515e-05, "loss": 0.2504, "step": 1026900 }, { "epoch": 10.46, "learning_rate": 1.3425905471277834e-05, "loss": 0.2519, "step": 1027000 }, { "epoch": 10.46, "learning_rate": 1.3420376553881463e-05, "loss": 0.2558, "step": 1027100 }, { "epoch": 10.47, "learning_rate": 1.3414848447129738e-05, "loss": 0.277, "step": 1027200 }, { "epoch": 10.47, "learning_rate": 1.3409321151292949e-05, "loss": 0.2347, "step": 1027300 }, { "epoch": 10.47, "learning_rate": 1.3403794666641357e-05, "loss": 0.2802, "step": 1027400 }, { "epoch": 10.47, "learning_rate": 1.3398324246159519e-05, "loss": 0.2539, "step": 1027500 }, { "epoch": 10.47, "learning_rate": 1.3392799376570308e-05, "loss": 0.3249, "step": 1027600 }, { "epoch": 10.47, "learning_rate": 1.338727531897409e-05, "loss": 0.2917, "step": 1027700 }, { "epoch": 10.47, "learning_rate": 1.338175207364097e-05, "loss": 0.3196, "step": 1027800 }, { "epoch": 10.47, "learning_rate": 1.337622964084101e-05, "loss": 0.2767, "step": 1027900 }, { "epoch": 10.47, "learning_rate": 1.3370708020844203e-05, "loss": 0.2957, "step": 1028000 }, { "epoch": 10.47, "learning_rate": 1.3365187213920537e-05, "loss": 0.2711, "step": 1028100 }, { "epoch": 10.48, "learning_rate": 1.3359667220339956e-05, "loss": 0.3122, "step": 1028200 }, { "epoch": 10.48, "learning_rate": 1.3354148040372332e-05, "loss": 0.3273, "step": 1028300 }, { "epoch": 10.48, "learning_rate": 1.3348629674287534e-05, "loss": 0.271, "step": 1028400 }, { "epoch": 10.48, "learning_rate": 1.3343112122355373e-05, "loss": 0.3193, "step": 1028500 }, { "epoch": 10.48, "learning_rate": 1.3337595384845625e-05, "loss": 0.2864, "step": 1028600 }, { "epoch": 10.48, "learning_rate": 1.333207946202803e-05, "loss": 0.2699, "step": 1028700 }, { "epoch": 10.48, "learning_rate": 1.3326564354172285e-05, "loss": 0.2543, "step": 1028800 }, { "epoch": 10.48, "learning_rate": 1.3321050061548036e-05, "loss": 0.2413, "step": 1028900 }, { "epoch": 10.48, "learning_rate": 1.3315536584424902e-05, "loss": 0.3171, "step": 1029000 }, { "epoch": 10.48, "learning_rate": 1.3310023923072473e-05, "loss": 0.2681, "step": 1029100 }, { "epoch": 10.49, "learning_rate": 1.3304512077760263e-05, "loss": 0.2935, "step": 1029200 }, { "epoch": 10.49, "learning_rate": 1.329905615500619e-05, "loss": 0.2852, "step": 1029300 }, { "epoch": 10.49, "learning_rate": 1.3293545934415763e-05, "loss": 0.2537, "step": 1029400 }, { "epoch": 10.49, "learning_rate": 1.328803653067124e-05, "loss": 0.2641, "step": 1029500 }, { "epoch": 10.49, "learning_rate": 1.328252794404201e-05, "loss": 0.2946, "step": 1029600 }, { "epoch": 10.49, "learning_rate": 1.327702017479739e-05, "loss": 0.2822, "step": 1029700 }, { "epoch": 10.49, "learning_rate": 1.327151322320667e-05, "loss": 0.282, "step": 1029800 }, { "epoch": 10.49, "learning_rate": 1.3266007089539137e-05, "loss": 0.335, "step": 1029900 }, { "epoch": 10.49, "learning_rate": 1.3260501774064004e-05, "loss": 0.2721, "step": 1030000 }, { "epoch": 10.49, "learning_rate": 1.3254997277050431e-05, "loss": 0.2471, "step": 1030100 }, { "epoch": 10.5, "learning_rate": 1.324949359876756e-05, "loss": 0.2721, "step": 1030200 }, { "epoch": 10.5, "learning_rate": 1.32439907394845e-05, "loss": 0.3018, "step": 1030300 }, { "epoch": 10.5, "learning_rate": 1.3238488699470286e-05, "loss": 0.2618, "step": 1030400 }, { "epoch": 10.5, "learning_rate": 1.323298747899395e-05, "loss": 0.2444, "step": 1030500 }, { "epoch": 10.5, "learning_rate": 1.3227487078324472e-05, "loss": 0.4154, "step": 1030600 }, { "epoch": 10.5, "learning_rate": 1.3221987497730776e-05, "loss": 0.2795, "step": 1030700 }, { "epoch": 10.5, "learning_rate": 1.3216488737481765e-05, "loss": 0.2556, "step": 1030800 }, { "epoch": 10.5, "learning_rate": 1.3210990797846309e-05, "loss": 0.2928, "step": 1030900 }, { "epoch": 10.5, "learning_rate": 1.3205493679093197e-05, "loss": 0.3417, "step": 1031000 }, { "epoch": 10.51, "learning_rate": 1.3199997381491213e-05, "loss": 0.3086, "step": 1031100 }, { "epoch": 10.51, "learning_rate": 1.3194501905309117e-05, "loss": 0.2956, "step": 1031200 }, { "epoch": 10.51, "learning_rate": 1.3189007250815578e-05, "loss": 0.2879, "step": 1031300 }, { "epoch": 10.51, "learning_rate": 1.3183513418279261e-05, "loss": 0.3078, "step": 1031400 }, { "epoch": 10.51, "learning_rate": 1.3178020407968796e-05, "loss": 0.2236, "step": 1031500 }, { "epoch": 10.51, "learning_rate": 1.3172528220152733e-05, "loss": 0.2434, "step": 1031600 }, { "epoch": 10.51, "learning_rate": 1.3167036855099618e-05, "loss": 0.2131, "step": 1031700 }, { "epoch": 10.51, "learning_rate": 1.316154631307795e-05, "loss": 0.3165, "step": 1031800 }, { "epoch": 10.51, "learning_rate": 1.3156056594356186e-05, "loss": 0.2376, "step": 1031900 }, { "epoch": 10.51, "learning_rate": 1.3150567699202729e-05, "loss": 0.2577, "step": 1032000 }, { "epoch": 10.52, "learning_rate": 1.3145079627885957e-05, "loss": 0.2425, "step": 1032100 }, { "epoch": 10.52, "learning_rate": 1.3139592380674216e-05, "loss": 0.2437, "step": 1032200 }, { "epoch": 10.52, "learning_rate": 1.313410595783578e-05, "loss": 0.2647, "step": 1032300 }, { "epoch": 10.52, "learning_rate": 1.3128620359638902e-05, "loss": 0.2701, "step": 1032400 }, { "epoch": 10.52, "learning_rate": 1.312313558635183e-05, "loss": 0.283, "step": 1032500 }, { "epoch": 10.52, "learning_rate": 1.3117651638242698e-05, "loss": 0.2777, "step": 1032600 }, { "epoch": 10.52, "learning_rate": 1.3112168515579652e-05, "loss": 0.2281, "step": 1032700 }, { "epoch": 10.52, "learning_rate": 1.3106686218630794e-05, "loss": 0.2804, "step": 1032800 }, { "epoch": 10.52, "learning_rate": 1.3101204747664156e-05, "loss": 0.3569, "step": 1032900 }, { "epoch": 10.52, "learning_rate": 1.3095724102947762e-05, "loss": 0.2745, "step": 1033000 }, { "epoch": 10.53, "learning_rate": 1.3090244284749586e-05, "loss": 0.2451, "step": 1033100 }, { "epoch": 10.53, "learning_rate": 1.3084765293337547e-05, "loss": 0.2882, "step": 1033200 }, { "epoch": 10.53, "learning_rate": 1.3079287128979534e-05, "loss": 0.286, "step": 1033300 }, { "epoch": 10.53, "learning_rate": 1.3073809791943408e-05, "loss": 0.285, "step": 1033400 }, { "epoch": 10.53, "learning_rate": 1.3068333282496984e-05, "loss": 0.2538, "step": 1033500 }, { "epoch": 10.53, "learning_rate": 1.3062857600907992e-05, "loss": 0.305, "step": 1033600 }, { "epoch": 10.53, "learning_rate": 1.3057382747444203e-05, "loss": 0.3515, "step": 1033700 }, { "epoch": 10.53, "learning_rate": 1.30519087223733e-05, "loss": 0.2378, "step": 1033800 }, { "epoch": 10.53, "learning_rate": 1.304643552596291e-05, "loss": 0.3107, "step": 1033900 }, { "epoch": 10.53, "learning_rate": 1.3040963158480647e-05, "loss": 0.2763, "step": 1034000 }, { "epoch": 10.54, "learning_rate": 1.3035491620194091e-05, "loss": 0.2253, "step": 1034100 }, { "epoch": 10.54, "learning_rate": 1.3030020911370746e-05, "loss": 0.2658, "step": 1034200 }, { "epoch": 10.54, "learning_rate": 1.3024551032278105e-05, "loss": 0.2419, "step": 1034300 }, { "epoch": 10.54, "learning_rate": 1.3019081983183624e-05, "loss": 0.2059, "step": 1034400 }, { "epoch": 10.54, "learning_rate": 1.3013613764354689e-05, "loss": 0.2757, "step": 1034500 }, { "epoch": 10.54, "learning_rate": 1.300814637605867e-05, "loss": 0.2753, "step": 1034600 }, { "epoch": 10.54, "learning_rate": 1.3002679818562904e-05, "loss": 0.2589, "step": 1034700 }, { "epoch": 10.54, "learning_rate": 1.2997214092134648e-05, "loss": 0.3521, "step": 1034800 }, { "epoch": 10.54, "learning_rate": 1.2991749197041145e-05, "loss": 0.2238, "step": 1034900 }, { "epoch": 10.54, "learning_rate": 1.2986285133549627e-05, "loss": 0.2856, "step": 1035000 }, { "epoch": 10.55, "learning_rate": 1.2980821901927223e-05, "loss": 0.2649, "step": 1035100 }, { "epoch": 10.55, "learning_rate": 1.2975359502441063e-05, "loss": 0.3332, "step": 1035200 }, { "epoch": 10.55, "learning_rate": 1.2969897935358225e-05, "loss": 0.2993, "step": 1035300 }, { "epoch": 10.55, "learning_rate": 1.2964437200945762e-05, "loss": 0.2289, "step": 1035400 }, { "epoch": 10.55, "learning_rate": 1.2958977299470643e-05, "loss": 0.2643, "step": 1035500 }, { "epoch": 10.55, "learning_rate": 1.2953518231199838e-05, "loss": 0.3464, "step": 1035600 }, { "epoch": 10.55, "learning_rate": 1.2948059996400274e-05, "loss": 0.2811, "step": 1035700 }, { "epoch": 10.55, "learning_rate": 1.2942602595338809e-05, "loss": 0.2458, "step": 1035800 }, { "epoch": 10.55, "learning_rate": 1.293714602828228e-05, "loss": 0.2886, "step": 1035900 }, { "epoch": 10.55, "learning_rate": 1.2931690295497494e-05, "loss": 0.2732, "step": 1036000 }, { "epoch": 10.56, "learning_rate": 1.2926235397251187e-05, "loss": 0.2798, "step": 1036100 }, { "epoch": 10.56, "learning_rate": 1.2920781333810065e-05, "loss": 0.3005, "step": 1036200 }, { "epoch": 10.56, "learning_rate": 1.2915328105440833e-05, "loss": 0.2721, "step": 1036300 }, { "epoch": 10.56, "learning_rate": 1.2909930232204593e-05, "loss": 0.2797, "step": 1036400 }, { "epoch": 10.56, "learning_rate": 1.290447866642158e-05, "loss": 0.2698, "step": 1036500 }, { "epoch": 10.56, "learning_rate": 1.2899027936507533e-05, "loss": 0.2406, "step": 1036600 }, { "epoch": 10.56, "learning_rate": 1.2893578042728961e-05, "loss": 0.3171, "step": 1036700 }, { "epoch": 10.56, "learning_rate": 1.2888128985352353e-05, "loss": 0.2934, "step": 1036800 }, { "epoch": 10.56, "learning_rate": 1.2882680764644104e-05, "loss": 0.2967, "step": 1036900 }, { "epoch": 10.57, "learning_rate": 1.2877233380870617e-05, "loss": 0.2378, "step": 1037000 }, { "epoch": 10.57, "learning_rate": 1.2871786834298245e-05, "loss": 0.2542, "step": 1037100 }, { "epoch": 10.57, "learning_rate": 1.2866341125193268e-05, "loss": 0.318, "step": 1037200 }, { "epoch": 10.57, "learning_rate": 1.2860896253821964e-05, "loss": 0.2745, "step": 1037300 }, { "epoch": 10.57, "learning_rate": 1.2855452220450553e-05, "loss": 0.2525, "step": 1037400 }, { "epoch": 10.57, "learning_rate": 1.2850009025345211e-05, "loss": 0.2986, "step": 1037500 }, { "epoch": 10.57, "learning_rate": 1.2844566668772088e-05, "loss": 0.2589, "step": 1037600 }, { "epoch": 10.57, "learning_rate": 1.283912515099727e-05, "loss": 0.3451, "step": 1037700 }, { "epoch": 10.57, "learning_rate": 1.2833684472286838e-05, "loss": 0.2608, "step": 1037800 }, { "epoch": 10.57, "learning_rate": 1.2828299027145025e-05, "loss": 0.2631, "step": 1037900 }, { "epoch": 10.58, "learning_rate": 1.2822860018964047e-05, "loss": 0.2084, "step": 1038000 }, { "epoch": 10.58, "learning_rate": 1.2817421850642708e-05, "loss": 0.2737, "step": 1038100 }, { "epoch": 10.58, "learning_rate": 1.28119845224469e-05, "loss": 0.2389, "step": 1038200 }, { "epoch": 10.58, "learning_rate": 1.2806548034642485e-05, "loss": 0.3165, "step": 1038300 }, { "epoch": 10.58, "learning_rate": 1.2801112387495257e-05, "loss": 0.2492, "step": 1038400 }, { "epoch": 10.58, "learning_rate": 1.2795677581270992e-05, "loss": 0.2584, "step": 1038500 }, { "epoch": 10.58, "learning_rate": 1.2790243616235439e-05, "loss": 0.2842, "step": 1038600 }, { "epoch": 10.58, "learning_rate": 1.2784810492654255e-05, "loss": 0.2721, "step": 1038700 }, { "epoch": 10.58, "learning_rate": 1.2779378210793105e-05, "loss": 0.2377, "step": 1038800 }, { "epoch": 10.58, "learning_rate": 1.2773946770917597e-05, "loss": 0.3191, "step": 1038900 }, { "epoch": 10.59, "learning_rate": 1.2768516173293287e-05, "loss": 0.3363, "step": 1039000 }, { "epoch": 10.59, "learning_rate": 1.2763086418185707e-05, "loss": 0.279, "step": 1039100 }, { "epoch": 10.59, "learning_rate": 1.2757657505860348e-05, "loss": 0.3198, "step": 1039200 }, { "epoch": 10.59, "learning_rate": 1.2752229436582628e-05, "loss": 0.2981, "step": 1039300 }, { "epoch": 10.59, "learning_rate": 1.2746802210617956e-05, "loss": 0.2473, "step": 1039400 }, { "epoch": 10.59, "learning_rate": 1.2741375828231709e-05, "loss": 0.3092, "step": 1039500 }, { "epoch": 10.59, "learning_rate": 1.2735950289689179e-05, "loss": 0.2804, "step": 1039600 }, { "epoch": 10.59, "learning_rate": 1.2730525595255652e-05, "loss": 0.2842, "step": 1039700 }, { "epoch": 10.59, "learning_rate": 1.2725101745196376e-05, "loss": 0.3003, "step": 1039800 }, { "epoch": 10.59, "learning_rate": 1.2719678739776522e-05, "loss": 0.2434, "step": 1039900 }, { "epoch": 10.6, "learning_rate": 1.2714256579261256e-05, "loss": 0.2634, "step": 1040000 }, { "epoch": 10.6, "learning_rate": 1.2708835263915687e-05, "loss": 0.272, "step": 1040100 }, { "epoch": 10.6, "learning_rate": 1.2703414794004884e-05, "loss": 0.2914, "step": 1040200 }, { "epoch": 10.6, "learning_rate": 1.2697995169793876e-05, "loss": 0.2863, "step": 1040300 }, { "epoch": 10.6, "learning_rate": 1.2692576391547652e-05, "loss": 0.2631, "step": 1040400 }, { "epoch": 10.6, "learning_rate": 1.2687158459531169e-05, "loss": 0.2966, "step": 1040500 }, { "epoch": 10.6, "learning_rate": 1.2681741374009308e-05, "loss": 0.3026, "step": 1040600 }, { "epoch": 10.6, "learning_rate": 1.2676325135246943e-05, "loss": 0.2804, "step": 1040700 }, { "epoch": 10.6, "learning_rate": 1.267090974350891e-05, "loss": 0.2797, "step": 1040800 }, { "epoch": 10.6, "learning_rate": 1.2665549340309496e-05, "loss": 0.3039, "step": 1040900 }, { "epoch": 10.61, "learning_rate": 1.2660135634937549e-05, "loss": 0.2177, "step": 1041000 }, { "epoch": 10.61, "learning_rate": 1.2654722777381482e-05, "loss": 0.2859, "step": 1041100 }, { "epoch": 10.61, "learning_rate": 1.2649310767905958e-05, "loss": 0.307, "step": 1041200 }, { "epoch": 10.61, "learning_rate": 1.2643899606775592e-05, "loss": 0.2716, "step": 1041300 }, { "epoch": 10.61, "learning_rate": 1.2638489294254964e-05, "loss": 0.3091, "step": 1041400 }, { "epoch": 10.61, "learning_rate": 1.2633079830608598e-05, "loss": 0.3225, "step": 1041500 }, { "epoch": 10.61, "learning_rate": 1.2627671216101002e-05, "loss": 0.2548, "step": 1041600 }, { "epoch": 10.61, "learning_rate": 1.2622263450996597e-05, "loss": 0.2766, "step": 1041700 }, { "epoch": 10.61, "learning_rate": 1.2616856535559806e-05, "loss": 0.2809, "step": 1041800 }, { "epoch": 10.62, "learning_rate": 1.2611450470055e-05, "loss": 0.2533, "step": 1041900 }, { "epoch": 10.62, "learning_rate": 1.2606045254746491e-05, "loss": 0.2307, "step": 1042000 }, { "epoch": 10.62, "learning_rate": 1.2600640889898566e-05, "loss": 0.3126, "step": 1042100 }, { "epoch": 10.62, "learning_rate": 1.2595237375775478e-05, "loss": 0.2625, "step": 1042200 }, { "epoch": 10.62, "learning_rate": 1.2589834712641406e-05, "loss": 0.2565, "step": 1042300 }, { "epoch": 10.62, "learning_rate": 1.2584432900760515e-05, "loss": 0.2231, "step": 1042400 }, { "epoch": 10.62, "learning_rate": 1.2579031940396928e-05, "loss": 0.3016, "step": 1042500 }, { "epoch": 10.62, "learning_rate": 1.2573631831814715e-05, "loss": 0.2425, "step": 1042600 }, { "epoch": 10.62, "learning_rate": 1.2568232575277912e-05, "loss": 0.3709, "step": 1042700 }, { "epoch": 10.62, "learning_rate": 1.2562834171050507e-05, "loss": 0.2491, "step": 1042800 }, { "epoch": 10.63, "learning_rate": 1.255743661939646e-05, "loss": 0.3117, "step": 1042900 }, { "epoch": 10.63, "learning_rate": 1.2552039920579662e-05, "loss": 0.2427, "step": 1043000 }, { "epoch": 10.63, "learning_rate": 1.254664407486399e-05, "loss": 0.3099, "step": 1043100 }, { "epoch": 10.63, "learning_rate": 1.2541249082513273e-05, "loss": 0.3007, "step": 1043200 }, { "epoch": 10.63, "learning_rate": 1.253585494379128e-05, "loss": 0.3353, "step": 1043300 }, { "epoch": 10.63, "learning_rate": 1.2530461658961758e-05, "loss": 0.2218, "step": 1043400 }, { "epoch": 10.63, "learning_rate": 1.2525069228288422e-05, "loss": 0.2991, "step": 1043500 }, { "epoch": 10.63, "learning_rate": 1.25196776520349e-05, "loss": 0.2639, "step": 1043600 }, { "epoch": 10.63, "learning_rate": 1.2514286930464825e-05, "loss": 0.2918, "step": 1043700 }, { "epoch": 10.63, "learning_rate": 1.2508897063841774e-05, "loss": 0.2688, "step": 1043800 }, { "epoch": 10.64, "learning_rate": 1.2503508052429273e-05, "loss": 0.3198, "step": 1043900 }, { "epoch": 10.64, "learning_rate": 1.2498119896490812e-05, "loss": 0.3172, "step": 1044000 }, { "epoch": 10.64, "learning_rate": 1.2492732596289845e-05, "loss": 0.2465, "step": 1044100 }, { "epoch": 10.64, "learning_rate": 1.2487346152089786e-05, "loss": 0.2734, "step": 1044200 }, { "epoch": 10.64, "learning_rate": 1.248196056415398e-05, "loss": 0.2866, "step": 1044300 }, { "epoch": 10.64, "learning_rate": 1.2476575832745758e-05, "loss": 0.2983, "step": 1044400 }, { "epoch": 10.64, "learning_rate": 1.2471191958128415e-05, "loss": 0.2287, "step": 1044500 }, { "epoch": 10.64, "learning_rate": 1.246580894056517e-05, "loss": 0.2757, "step": 1044600 }, { "epoch": 10.64, "learning_rate": 1.246042678031923e-05, "loss": 0.297, "step": 1044700 }, { "epoch": 10.64, "learning_rate": 1.2455045477653758e-05, "loss": 0.278, "step": 1044800 }, { "epoch": 10.65, "learning_rate": 1.2449772633319718e-05, "loss": 0.196, "step": 1044900 }, { "epoch": 10.65, "learning_rate": 1.2444393029439743e-05, "loss": 0.3136, "step": 1045000 }, { "epoch": 10.65, "learning_rate": 1.2439014283924182e-05, "loss": 0.3101, "step": 1045100 }, { "epoch": 10.65, "learning_rate": 1.2433636397036026e-05, "loss": 0.2677, "step": 1045200 }, { "epoch": 10.65, "learning_rate": 1.2428259369038217e-05, "loss": 0.2532, "step": 1045300 }, { "epoch": 10.65, "learning_rate": 1.2422883200193665e-05, "loss": 0.203, "step": 1045400 }, { "epoch": 10.65, "learning_rate": 1.241750789076524e-05, "loss": 0.2797, "step": 1045500 }, { "epoch": 10.65, "learning_rate": 1.241213344101574e-05, "loss": 0.2525, "step": 1045600 }, { "epoch": 10.65, "learning_rate": 1.2406759851207956e-05, "loss": 0.2852, "step": 1045700 }, { "epoch": 10.65, "learning_rate": 1.2401387121604636e-05, "loss": 0.3302, "step": 1045800 }, { "epoch": 10.66, "learning_rate": 1.2396015252468452e-05, "loss": 0.2541, "step": 1045900 }, { "epoch": 10.66, "learning_rate": 1.2390644244062066e-05, "loss": 0.2347, "step": 1046000 }, { "epoch": 10.66, "learning_rate": 1.2385274096648101e-05, "loss": 0.2502, "step": 1046100 }, { "epoch": 10.66, "learning_rate": 1.2379904810489102e-05, "loss": 0.2737, "step": 1046200 }, { "epoch": 10.66, "learning_rate": 1.2374536385847607e-05, "loss": 0.2928, "step": 1046300 }, { "epoch": 10.66, "learning_rate": 1.2369168822986101e-05, "loss": 0.2496, "step": 1046400 }, { "epoch": 10.66, "learning_rate": 1.2363802122167022e-05, "loss": 0.2656, "step": 1046500 }, { "epoch": 10.66, "learning_rate": 1.2358436283652772e-05, "loss": 0.264, "step": 1046600 }, { "epoch": 10.66, "learning_rate": 1.2353071307705719e-05, "loss": 0.3014, "step": 1046700 }, { "epoch": 10.66, "learning_rate": 1.2347707194588159e-05, "loss": 0.2846, "step": 1046800 }, { "epoch": 10.67, "learning_rate": 1.234234394456237e-05, "loss": 0.2465, "step": 1046900 }, { "epoch": 10.67, "learning_rate": 1.233698155789059e-05, "loss": 0.228, "step": 1047000 }, { "epoch": 10.67, "learning_rate": 1.2331620034835016e-05, "loss": 0.343, "step": 1047100 }, { "epoch": 10.67, "learning_rate": 1.232625937565777e-05, "loss": 0.3441, "step": 1047200 }, { "epoch": 10.67, "learning_rate": 1.232089958062097e-05, "loss": 0.2887, "step": 1047300 }, { "epoch": 10.67, "learning_rate": 1.2315540649986685e-05, "loss": 0.3047, "step": 1047400 }, { "epoch": 10.67, "learning_rate": 1.2310182584016922e-05, "loss": 0.2157, "step": 1047500 }, { "epoch": 10.67, "learning_rate": 1.230482538297366e-05, "loss": 0.2479, "step": 1047600 }, { "epoch": 10.67, "learning_rate": 1.2299469047118837e-05, "loss": 0.2763, "step": 1047700 }, { "epoch": 10.68, "learning_rate": 1.229411357671435e-05, "loss": 0.3059, "step": 1047800 }, { "epoch": 10.68, "learning_rate": 1.2288758972022039e-05, "loss": 0.2955, "step": 1047900 }, { "epoch": 10.68, "learning_rate": 1.2283405233303737e-05, "loss": 0.2784, "step": 1048000 }, { "epoch": 10.68, "learning_rate": 1.2278052360821176e-05, "loss": 0.2677, "step": 1048100 }, { "epoch": 10.68, "learning_rate": 1.2272700354836097e-05, "loss": 0.2926, "step": 1048200 }, { "epoch": 10.68, "learning_rate": 1.2267349215610191e-05, "loss": 0.2836, "step": 1048300 }, { "epoch": 10.68, "learning_rate": 1.2261998943405075e-05, "loss": 0.3303, "step": 1048400 }, { "epoch": 10.68, "learning_rate": 1.2256649538482356e-05, "loss": 0.2773, "step": 1048500 }, { "epoch": 10.68, "learning_rate": 1.22513010011036e-05, "loss": 0.2628, "step": 1048600 }, { "epoch": 10.68, "learning_rate": 1.2245953331530292e-05, "loss": 0.3077, "step": 1048700 }, { "epoch": 10.69, "learning_rate": 1.2240606530023917e-05, "loss": 0.2776, "step": 1048800 }, { "epoch": 10.69, "learning_rate": 1.2235260596845898e-05, "loss": 0.2588, "step": 1048900 }, { "epoch": 10.69, "learning_rate": 1.2229915532257626e-05, "loss": 0.2881, "step": 1049000 }, { "epoch": 10.69, "learning_rate": 1.2224571336520434e-05, "loss": 0.2468, "step": 1049100 }, { "epoch": 10.69, "learning_rate": 1.2219228009895628e-05, "loss": 0.3087, "step": 1049200 }, { "epoch": 10.69, "learning_rate": 1.2213885552644472e-05, "loss": 0.2883, "step": 1049300 }, { "epoch": 10.69, "learning_rate": 1.220854396502816e-05, "loss": 0.3225, "step": 1049400 }, { "epoch": 10.69, "learning_rate": 1.2203203247307873e-05, "loss": 0.3172, "step": 1049500 }, { "epoch": 10.69, "learning_rate": 1.2197863399744752e-05, "loss": 0.2876, "step": 1049600 }, { "epoch": 10.69, "learning_rate": 1.2192524422599865e-05, "loss": 0.3468, "step": 1049700 }, { "epoch": 10.7, "learning_rate": 1.2187186316134264e-05, "loss": 0.2775, "step": 1049800 }, { "epoch": 10.7, "learning_rate": 1.218184908060896e-05, "loss": 0.2877, "step": 1049900 }, { "epoch": 10.7, "learning_rate": 1.2176512716284893e-05, "loss": 0.3316, "step": 1050000 }, { "epoch": 10.7, "eval_cer": 0.061191920985045785, "eval_loss": 0.3567371368408203, "eval_runtime": 9398.6411, "eval_samples_per_second": 5.821, "eval_steps_per_second": 0.364, "eval_wer": 0.1293959408625667, "step": 1050000 }, { "epoch": 10.7, "learning_rate": 1.2171177223422988e-05, "loss": 0.3312, "step": 1050100 }, { "epoch": 10.7, "learning_rate": 1.2165842602284122e-05, "loss": 0.2976, "step": 1050200 }, { "epoch": 10.7, "learning_rate": 1.2160508853129123e-05, "loss": 0.3116, "step": 1050300 }, { "epoch": 10.7, "learning_rate": 1.2155175976218777e-05, "loss": 0.2894, "step": 1050400 }, { "epoch": 10.7, "learning_rate": 1.2149843971813839e-05, "loss": 0.2531, "step": 1050500 }, { "epoch": 10.7, "learning_rate": 1.2144512840175014e-05, "loss": 0.2489, "step": 1050600 }, { "epoch": 10.7, "learning_rate": 1.2139182581562948e-05, "loss": 0.3093, "step": 1050700 }, { "epoch": 10.71, "learning_rate": 1.2133853196238263e-05, "loss": 0.298, "step": 1050800 }, { "epoch": 10.71, "learning_rate": 1.2128524684461548e-05, "loss": 0.3105, "step": 1050900 }, { "epoch": 10.71, "learning_rate": 1.2123197046493313e-05, "loss": 0.2771, "step": 1051000 }, { "epoch": 10.71, "learning_rate": 1.2117870282594062e-05, "loss": 0.2568, "step": 1051100 }, { "epoch": 10.71, "learning_rate": 1.2112544393024248e-05, "loss": 0.3078, "step": 1051200 }, { "epoch": 10.71, "learning_rate": 1.2107219378044257e-05, "loss": 0.3364, "step": 1051300 }, { "epoch": 10.71, "learning_rate": 1.2101895237914456e-05, "loss": 0.2345, "step": 1051400 }, { "epoch": 10.71, "learning_rate": 1.2096571972895171e-05, "loss": 0.3483, "step": 1051500 }, { "epoch": 10.71, "learning_rate": 1.2091249583246673e-05, "loss": 0.2517, "step": 1051600 }, { "epoch": 10.71, "learning_rate": 1.2085928069229198e-05, "loss": 0.2499, "step": 1051700 }, { "epoch": 10.72, "learning_rate": 1.2080607431102946e-05, "loss": 0.2603, "step": 1051800 }, { "epoch": 10.72, "learning_rate": 1.2075287669128041e-05, "loss": 0.2684, "step": 1051900 }, { "epoch": 10.72, "learning_rate": 1.20699687835646e-05, "loss": 0.2777, "step": 1052000 }, { "epoch": 10.72, "learning_rate": 1.2064650774672698e-05, "loss": 0.2891, "step": 1052100 }, { "epoch": 10.72, "learning_rate": 1.2059333642712326e-05, "loss": 0.3044, "step": 1052200 }, { "epoch": 10.72, "learning_rate": 1.2054017387943477e-05, "loss": 0.2769, "step": 1052300 }, { "epoch": 10.72, "learning_rate": 1.204870201062608e-05, "loss": 0.3124, "step": 1052400 }, { "epoch": 10.72, "learning_rate": 1.2043387511020039e-05, "loss": 0.2743, "step": 1052500 }, { "epoch": 10.72, "learning_rate": 1.2038127021254721e-05, "loss": 0.2733, "step": 1052600 }, { "epoch": 10.73, "learning_rate": 1.203281426906726e-05, "loss": 0.3063, "step": 1052700 }, { "epoch": 10.73, "learning_rate": 1.2027502395367958e-05, "loss": 0.2884, "step": 1052800 }, { "epoch": 10.73, "learning_rate": 1.2022191400416539e-05, "loss": 0.2914, "step": 1052900 }, { "epoch": 10.73, "learning_rate": 1.2016881284472668e-05, "loss": 0.2463, "step": 1053000 }, { "epoch": 10.73, "learning_rate": 1.201157204779599e-05, "loss": 0.2284, "step": 1053100 }, { "epoch": 10.73, "learning_rate": 1.2006263690646085e-05, "loss": 0.2898, "step": 1053200 }, { "epoch": 10.73, "learning_rate": 1.2000956213282515e-05, "loss": 0.29, "step": 1053300 }, { "epoch": 10.73, "learning_rate": 1.1995649615964765e-05, "loss": 0.3346, "step": 1053400 }, { "epoch": 10.73, "learning_rate": 1.1990343898952308e-05, "loss": 0.2832, "step": 1053500 }, { "epoch": 10.73, "learning_rate": 1.1985039062504563e-05, "loss": 0.2972, "step": 1053600 }, { "epoch": 10.74, "learning_rate": 1.197973510688089e-05, "loss": 0.2529, "step": 1053700 }, { "epoch": 10.74, "learning_rate": 1.1974432032340634e-05, "loss": 0.2802, "step": 1053800 }, { "epoch": 10.74, "learning_rate": 1.196912983914309e-05, "loss": 0.3207, "step": 1053900 }, { "epoch": 10.74, "learning_rate": 1.1963828527547485e-05, "loss": 0.2685, "step": 1054000 }, { "epoch": 10.74, "learning_rate": 1.1958528097813028e-05, "loss": 0.2447, "step": 1054100 }, { "epoch": 10.74, "learning_rate": 1.1953228550198883e-05, "loss": 0.3094, "step": 1054200 }, { "epoch": 10.74, "learning_rate": 1.1947929884964166e-05, "loss": 0.1863, "step": 1054300 }, { "epoch": 10.74, "learning_rate": 1.1942632102367946e-05, "loss": 0.2305, "step": 1054400 }, { "epoch": 10.74, "learning_rate": 1.1937335202669266e-05, "loss": 0.2776, "step": 1054500 }, { "epoch": 10.74, "learning_rate": 1.1932039186127091e-05, "loss": 0.301, "step": 1054600 }, { "epoch": 10.75, "learning_rate": 1.1926744053000379e-05, "loss": 0.2865, "step": 1054700 }, { "epoch": 10.75, "learning_rate": 1.1921449803548024e-05, "loss": 0.3265, "step": 1054800 }, { "epoch": 10.75, "learning_rate": 1.1916156438028898e-05, "loss": 0.2762, "step": 1054900 }, { "epoch": 10.75, "learning_rate": 1.1910863956701795e-05, "loss": 0.297, "step": 1055000 }, { "epoch": 10.75, "learning_rate": 1.1905625271415379e-05, "loss": 0.2922, "step": 1055100 }, { "epoch": 10.75, "learning_rate": 1.1900334550400229e-05, "loss": 0.3215, "step": 1055200 }, { "epoch": 10.75, "learning_rate": 1.1895044714350709e-05, "loss": 0.2783, "step": 1055300 }, { "epoch": 10.75, "learning_rate": 1.1889755763525464e-05, "loss": 0.304, "step": 1055400 }, { "epoch": 10.75, "learning_rate": 1.1884467698183069e-05, "loss": 0.2784, "step": 1055500 }, { "epoch": 10.75, "learning_rate": 1.1879180518582107e-05, "loss": 0.3194, "step": 1055600 }, { "epoch": 10.76, "learning_rate": 1.1873894224981084e-05, "loss": 0.3031, "step": 1055700 }, { "epoch": 10.76, "learning_rate": 1.1868608817638455e-05, "loss": 0.2789, "step": 1055800 }, { "epoch": 10.76, "learning_rate": 1.1863324296812653e-05, "loss": 0.2452, "step": 1055900 }, { "epoch": 10.76, "learning_rate": 1.1858040662762068e-05, "loss": 0.3012, "step": 1056000 }, { "epoch": 10.76, "learning_rate": 1.1852757915745019e-05, "loss": 0.2687, "step": 1056100 }, { "epoch": 10.76, "learning_rate": 1.1847476056019812e-05, "loss": 0.3049, "step": 1056200 }, { "epoch": 10.76, "learning_rate": 1.1842195083844706e-05, "loss": 0.2425, "step": 1056300 }, { "epoch": 10.76, "learning_rate": 1.1836914999477892e-05, "loss": 0.2592, "step": 1056400 }, { "epoch": 10.76, "learning_rate": 1.183163580317754e-05, "loss": 0.3253, "step": 1056500 }, { "epoch": 10.76, "learning_rate": 1.1826357495201785e-05, "loss": 0.2794, "step": 1056600 }, { "epoch": 10.77, "learning_rate": 1.1821080075808684e-05, "loss": 0.2835, "step": 1056700 }, { "epoch": 10.77, "learning_rate": 1.1815803545256272e-05, "loss": 0.3021, "step": 1056800 }, { "epoch": 10.77, "learning_rate": 1.1810527903802568e-05, "loss": 0.2741, "step": 1056900 }, { "epoch": 10.77, "learning_rate": 1.1805253151705491e-05, "loss": 0.2585, "step": 1057000 }, { "epoch": 10.77, "learning_rate": 1.1799979289222954e-05, "loss": 0.2507, "step": 1057100 }, { "epoch": 10.77, "learning_rate": 1.1794706316612819e-05, "loss": 0.2366, "step": 1057200 }, { "epoch": 10.77, "learning_rate": 1.178943423413291e-05, "loss": 0.3218, "step": 1057300 }, { "epoch": 10.77, "learning_rate": 1.1784163042040983e-05, "loss": 0.3204, "step": 1057400 }, { "epoch": 10.77, "learning_rate": 1.1778892740594775e-05, "loss": 0.294, "step": 1057500 }, { "epoch": 10.78, "learning_rate": 1.1773623330051987e-05, "loss": 0.2625, "step": 1057600 }, { "epoch": 10.78, "learning_rate": 1.1768354810670234e-05, "loss": 0.3092, "step": 1057700 }, { "epoch": 10.78, "learning_rate": 1.1763087182707135e-05, "loss": 0.2805, "step": 1057800 }, { "epoch": 10.78, "learning_rate": 1.1757925772405894e-05, "loss": 0.2903, "step": 1057900 }, { "epoch": 10.78, "learning_rate": 1.1752659910211517e-05, "loss": 0.314, "step": 1058000 }, { "epoch": 10.78, "learning_rate": 1.1747394940203177e-05, "loss": 0.2116, "step": 1058100 }, { "epoch": 10.78, "learning_rate": 1.1742130862638295e-05, "loss": 0.3067, "step": 1058200 }, { "epoch": 10.78, "learning_rate": 1.173686767777426e-05, "loss": 0.2975, "step": 1058300 }, { "epoch": 10.78, "learning_rate": 1.1731605385868405e-05, "loss": 0.2672, "step": 1058400 }, { "epoch": 10.78, "learning_rate": 1.1726343987178027e-05, "loss": 0.3345, "step": 1058500 }, { "epoch": 10.79, "learning_rate": 1.1721083481960385e-05, "loss": 0.3027, "step": 1058600 }, { "epoch": 10.79, "learning_rate": 1.171582387047267e-05, "loss": 0.2517, "step": 1058700 }, { "epoch": 10.79, "learning_rate": 1.1710565152972053e-05, "loss": 0.2893, "step": 1058800 }, { "epoch": 10.79, "learning_rate": 1.1705307329715666e-05, "loss": 0.2599, "step": 1058900 }, { "epoch": 10.79, "learning_rate": 1.1700050400960562e-05, "loss": 0.2761, "step": 1059000 }, { "epoch": 10.79, "learning_rate": 1.1694794366963786e-05, "loss": 0.2171, "step": 1059100 }, { "epoch": 10.79, "learning_rate": 1.1689539227982333e-05, "loss": 0.3005, "step": 1059200 }, { "epoch": 10.79, "learning_rate": 1.1684284984273132e-05, "loss": 0.2295, "step": 1059300 }, { "epoch": 10.79, "learning_rate": 1.1679031636093094e-05, "loss": 0.2274, "step": 1059400 }, { "epoch": 10.79, "learning_rate": 1.1673779183699072e-05, "loss": 0.2898, "step": 1059500 }, { "epoch": 10.8, "learning_rate": 1.1668527627347887e-05, "loss": 0.2617, "step": 1059600 }, { "epoch": 10.8, "learning_rate": 1.1663276967296303e-05, "loss": 0.2826, "step": 1059700 }, { "epoch": 10.8, "learning_rate": 1.1658027203801057e-05, "loss": 0.269, "step": 1059800 }, { "epoch": 10.8, "learning_rate": 1.1652778337118813e-05, "loss": 0.2225, "step": 1059900 }, { "epoch": 10.8, "learning_rate": 1.1647530367506214e-05, "loss": 0.252, "step": 1060000 }, { "epoch": 10.8, "learning_rate": 1.164228329521986e-05, "loss": 0.2755, "step": 1060100 }, { "epoch": 10.8, "learning_rate": 1.1637037120516312e-05, "loss": 0.2118, "step": 1060200 }, { "epoch": 10.8, "learning_rate": 1.1631791843652054e-05, "loss": 0.3242, "step": 1060300 }, { "epoch": 10.8, "learning_rate": 1.1626547464883555e-05, "loss": 0.2567, "step": 1060400 }, { "epoch": 10.8, "learning_rate": 1.1621303984467251e-05, "loss": 0.3587, "step": 1060500 }, { "epoch": 10.81, "learning_rate": 1.1616061402659494e-05, "loss": 0.3384, "step": 1060600 }, { "epoch": 10.81, "learning_rate": 1.1610819719716625e-05, "loss": 0.259, "step": 1060700 }, { "epoch": 10.81, "learning_rate": 1.1605578935894927e-05, "loss": 0.3, "step": 1060800 }, { "epoch": 10.81, "learning_rate": 1.1600339051450653e-05, "loss": 0.3417, "step": 1060900 }, { "epoch": 10.81, "learning_rate": 1.1595100066639994e-05, "loss": 0.2429, "step": 1061000 }, { "epoch": 10.81, "learning_rate": 1.1589861981719115e-05, "loss": 0.269, "step": 1061100 }, { "epoch": 10.81, "learning_rate": 1.158462479694411e-05, "loss": 0.265, "step": 1061200 }, { "epoch": 10.81, "learning_rate": 1.1579388512571055e-05, "loss": 0.2421, "step": 1061300 }, { "epoch": 10.81, "learning_rate": 1.1574153128855983e-05, "loss": 0.2823, "step": 1061400 }, { "epoch": 10.81, "learning_rate": 1.1568918646054854e-05, "loss": 0.284, "step": 1061500 }, { "epoch": 10.82, "learning_rate": 1.1563685064423615e-05, "loss": 0.3135, "step": 1061600 }, { "epoch": 10.82, "learning_rate": 1.1558452384218158e-05, "loss": 0.3103, "step": 1061700 }, { "epoch": 10.82, "learning_rate": 1.1553220605694322e-05, "loss": 0.2525, "step": 1061800 }, { "epoch": 10.82, "learning_rate": 1.1547989729107911e-05, "loss": 0.2729, "step": 1061900 }, { "epoch": 10.82, "learning_rate": 1.1542759754714686e-05, "loss": 0.266, "step": 1062000 }, { "epoch": 10.82, "learning_rate": 1.1537530682770363e-05, "loss": 0.251, "step": 1062100 }, { "epoch": 10.82, "learning_rate": 1.1532302513530611e-05, "loss": 0.2831, "step": 1062200 }, { "epoch": 10.82, "learning_rate": 1.1527075247251057e-05, "loss": 0.3287, "step": 1062300 }, { "epoch": 10.82, "learning_rate": 1.152184888418729e-05, "loss": 0.2754, "step": 1062400 }, { "epoch": 10.82, "learning_rate": 1.1516623424594835e-05, "loss": 0.3436, "step": 1062500 }, { "epoch": 10.83, "learning_rate": 1.1511398868729187e-05, "loss": 0.2508, "step": 1062600 }, { "epoch": 10.83, "learning_rate": 1.1506175216845815e-05, "loss": 0.2733, "step": 1062700 }, { "epoch": 10.83, "learning_rate": 1.1500952469200097e-05, "loss": 0.2791, "step": 1062800 }, { "epoch": 10.83, "learning_rate": 1.1495730626047406e-05, "loss": 0.2727, "step": 1062900 }, { "epoch": 10.83, "learning_rate": 1.1490509687643072e-05, "loss": 0.2692, "step": 1063000 }, { "epoch": 10.83, "learning_rate": 1.1485289654242343e-05, "loss": 0.2941, "step": 1063100 }, { "epoch": 10.83, "learning_rate": 1.1480070526100461e-05, "loss": 0.2291, "step": 1063200 }, { "epoch": 10.83, "learning_rate": 1.1474852303472607e-05, "loss": 0.3021, "step": 1063300 }, { "epoch": 10.83, "learning_rate": 1.1469634986613927e-05, "loss": 0.2798, "step": 1063400 }, { "epoch": 10.84, "learning_rate": 1.146441857577951e-05, "loss": 0.299, "step": 1063500 }, { "epoch": 10.84, "learning_rate": 1.145920307122441e-05, "loss": 0.2369, "step": 1063600 }, { "epoch": 10.84, "learning_rate": 1.1453988473203648e-05, "loss": 0.243, "step": 1063700 }, { "epoch": 10.84, "learning_rate": 1.1448826914395034e-05, "loss": 0.2869, "step": 1063800 }, { "epoch": 10.84, "learning_rate": 1.1443614121136047e-05, "loss": 0.275, "step": 1063900 }, { "epoch": 10.84, "learning_rate": 1.1438402235173591e-05, "loss": 0.2783, "step": 1064000 }, { "epoch": 10.84, "learning_rate": 1.1433191256762498e-05, "loss": 0.3269, "step": 1064100 }, { "epoch": 10.84, "learning_rate": 1.1427981186157556e-05, "loss": 0.2247, "step": 1064200 }, { "epoch": 10.84, "learning_rate": 1.1422772023613497e-05, "loss": 0.2494, "step": 1064300 }, { "epoch": 10.84, "learning_rate": 1.1417563769385021e-05, "loss": 0.3307, "step": 1064400 }, { "epoch": 10.85, "learning_rate": 1.1412408492685114e-05, "loss": 0.2266, "step": 1064500 }, { "epoch": 10.85, "learning_rate": 1.140725410672193e-05, "loss": 0.2585, "step": 1064600 }, { "epoch": 10.85, "learning_rate": 1.1402048560783865e-05, "loss": 0.2668, "step": 1064700 }, { "epoch": 10.85, "learning_rate": 1.1396843924174647e-05, "loss": 0.271, "step": 1064800 }, { "epoch": 10.85, "learning_rate": 1.1391640197148735e-05, "loss": 0.2935, "step": 1064900 }, { "epoch": 10.85, "learning_rate": 1.1386437379960565e-05, "loss": 0.2748, "step": 1065000 }, { "epoch": 10.85, "learning_rate": 1.138123547286454e-05, "loss": 0.1948, "step": 1065100 }, { "epoch": 10.85, "learning_rate": 1.1376034476114979e-05, "loss": 0.3112, "step": 1065200 }, { "epoch": 10.85, "learning_rate": 1.137083438996619e-05, "loss": 0.2922, "step": 1065300 }, { "epoch": 10.85, "learning_rate": 1.1365635214672426e-05, "loss": 0.2692, "step": 1065400 }, { "epoch": 10.86, "learning_rate": 1.136043695048791e-05, "loss": 0.333, "step": 1065500 }, { "epoch": 10.86, "learning_rate": 1.1355239597666776e-05, "loss": 0.2755, "step": 1065600 }, { "epoch": 10.86, "learning_rate": 1.1350043156463163e-05, "loss": 0.2815, "step": 1065700 }, { "epoch": 10.86, "learning_rate": 1.1344847627131141e-05, "loss": 0.2943, "step": 1065800 }, { "epoch": 10.86, "learning_rate": 1.1339653009924745e-05, "loss": 0.2631, "step": 1065900 }, { "epoch": 10.86, "learning_rate": 1.1334459305097953e-05, "loss": 0.3429, "step": 1066000 }, { "epoch": 10.86, "learning_rate": 1.1329266512904723e-05, "loss": 0.2423, "step": 1066100 }, { "epoch": 10.86, "learning_rate": 1.132407463359893e-05, "loss": 0.2732, "step": 1066200 }, { "epoch": 10.86, "learning_rate": 1.1318883667434435e-05, "loss": 0.262, "step": 1066300 }, { "epoch": 10.86, "learning_rate": 1.1313693614665058e-05, "loss": 0.2419, "step": 1066400 }, { "epoch": 10.87, "learning_rate": 1.1308504475544535e-05, "loss": 0.2457, "step": 1066500 }, { "epoch": 10.87, "learning_rate": 1.1303316250326597e-05, "loss": 0.2388, "step": 1066600 }, { "epoch": 10.87, "learning_rate": 1.1298128939264933e-05, "loss": 0.332, "step": 1066700 }, { "epoch": 10.87, "learning_rate": 1.1292942542613145e-05, "loss": 0.2874, "step": 1066800 }, { "epoch": 10.87, "learning_rate": 1.128775706062483e-05, "loss": 0.2773, "step": 1066900 }, { "epoch": 10.87, "learning_rate": 1.1282572493553523e-05, "loss": 0.2743, "step": 1067000 }, { "epoch": 10.87, "learning_rate": 1.1277388841652723e-05, "loss": 0.308, "step": 1067100 }, { "epoch": 10.87, "learning_rate": 1.1272206105175878e-05, "loss": 0.2355, "step": 1067200 }, { "epoch": 10.87, "learning_rate": 1.1267024284376395e-05, "loss": 0.2861, "step": 1067300 }, { "epoch": 10.87, "learning_rate": 1.1261843379507637e-05, "loss": 0.2815, "step": 1067400 }, { "epoch": 10.88, "learning_rate": 1.1256663390822908e-05, "loss": 0.26, "step": 1067500 }, { "epoch": 10.88, "learning_rate": 1.1251484318575485e-05, "loss": 0.2703, "step": 1067600 }, { "epoch": 10.88, "learning_rate": 1.1246306163018602e-05, "loss": 0.2284, "step": 1067700 }, { "epoch": 10.88, "learning_rate": 1.1241128924405422e-05, "loss": 0.2831, "step": 1067800 }, { "epoch": 10.88, "learning_rate": 1.1235952602989088e-05, "loss": 0.3114, "step": 1067900 }, { "epoch": 10.88, "learning_rate": 1.1230777199022708e-05, "loss": 0.2777, "step": 1068000 }, { "epoch": 10.88, "learning_rate": 1.1225602712759301e-05, "loss": 0.2936, "step": 1068100 }, { "epoch": 10.88, "learning_rate": 1.1220429144451885e-05, "loss": 0.2651, "step": 1068200 }, { "epoch": 10.88, "learning_rate": 1.1215256494353411e-05, "loss": 0.3062, "step": 1068300 }, { "epoch": 10.89, "learning_rate": 1.121008476271679e-05, "loss": 0.2919, "step": 1068400 }, { "epoch": 10.89, "learning_rate": 1.1204913949794897e-05, "loss": 0.2939, "step": 1068500 }, { "epoch": 10.89, "learning_rate": 1.1199744055840556e-05, "loss": 0.2576, "step": 1068600 }, { "epoch": 10.89, "learning_rate": 1.1194575081106525e-05, "loss": 0.2934, "step": 1068700 }, { "epoch": 10.89, "learning_rate": 1.1189458701845944e-05, "loss": 0.3137, "step": 1068800 }, { "epoch": 10.89, "learning_rate": 1.11842915571122e-05, "loss": 0.2576, "step": 1068900 }, { "epoch": 10.89, "learning_rate": 1.1179125332354317e-05, "loss": 0.3034, "step": 1069000 }, { "epoch": 10.89, "learning_rate": 1.1173960027824892e-05, "loss": 0.2962, "step": 1069100 }, { "epoch": 10.89, "learning_rate": 1.116879564377647e-05, "loss": 0.2792, "step": 1069200 }, { "epoch": 10.89, "learning_rate": 1.1163632180461555e-05, "loss": 0.2989, "step": 1069300 }, { "epoch": 10.9, "learning_rate": 1.1158469638132634e-05, "loss": 0.308, "step": 1069400 }, { "epoch": 10.9, "learning_rate": 1.1153308017042093e-05, "loss": 0.2874, "step": 1069500 }, { "epoch": 10.9, "learning_rate": 1.114814731744232e-05, "loss": 0.2469, "step": 1069600 }, { "epoch": 10.9, "learning_rate": 1.1142987539585638e-05, "loss": 0.2508, "step": 1069700 }, { "epoch": 10.9, "learning_rate": 1.1137828683724336e-05, "loss": 0.3061, "step": 1069800 }, { "epoch": 10.9, "learning_rate": 1.1132670750110647e-05, "loss": 0.2286, "step": 1069900 }, { "epoch": 10.9, "learning_rate": 1.1127513738996767e-05, "loss": 0.2077, "step": 1070000 }, { "epoch": 10.9, "learning_rate": 1.1122357650634846e-05, "loss": 0.2853, "step": 1070100 }, { "epoch": 10.9, "learning_rate": 1.1117202485276971e-05, "loss": 0.2585, "step": 1070200 }, { "epoch": 10.9, "learning_rate": 1.111204824317521e-05, "loss": 0.3103, "step": 1070300 }, { "epoch": 10.91, "learning_rate": 1.110689492458158e-05, "loss": 0.3086, "step": 1070400 }, { "epoch": 10.91, "learning_rate": 1.1101742529748031e-05, "loss": 0.3598, "step": 1070500 }, { "epoch": 10.91, "learning_rate": 1.1096591058926493e-05, "loss": 0.3084, "step": 1070600 }, { "epoch": 10.91, "learning_rate": 1.1091440512368854e-05, "loss": 0.311, "step": 1070700 }, { "epoch": 10.91, "learning_rate": 1.108629089032692e-05, "loss": 0.33, "step": 1070800 }, { "epoch": 10.91, "learning_rate": 1.1081142193052495e-05, "loss": 0.2534, "step": 1070900 }, { "epoch": 10.91, "learning_rate": 1.107599442079731e-05, "loss": 0.2796, "step": 1071000 }, { "epoch": 10.91, "learning_rate": 1.1070847573813067e-05, "loss": 0.2555, "step": 1071100 }, { "epoch": 10.91, "learning_rate": 1.1065701652351417e-05, "loss": 0.2869, "step": 1071200 }, { "epoch": 10.91, "learning_rate": 1.1060556656663961e-05, "loss": 0.2588, "step": 1071300 }, { "epoch": 10.92, "learning_rate": 1.1055412587002268e-05, "loss": 0.2879, "step": 1071400 }, { "epoch": 10.92, "learning_rate": 1.1050269443617836e-05, "loss": 0.3026, "step": 1071500 }, { "epoch": 10.92, "learning_rate": 1.1045127226762142e-05, "loss": 0.2603, "step": 1071600 }, { "epoch": 10.92, "learning_rate": 1.103998593668662e-05, "loss": 0.3057, "step": 1071700 }, { "epoch": 10.92, "learning_rate": 1.103489697268344e-05, "loss": 0.2814, "step": 1071800 }, { "epoch": 10.92, "learning_rate": 1.1029757527648247e-05, "loss": 0.2707, "step": 1071900 }, { "epoch": 10.92, "learning_rate": 1.1024619010144705e-05, "loss": 0.2808, "step": 1072000 }, { "epoch": 10.92, "learning_rate": 1.1019481420424051e-05, "loss": 0.3063, "step": 1072100 }, { "epoch": 10.92, "learning_rate": 1.1014344758737499e-05, "loss": 0.2732, "step": 1072200 }, { "epoch": 10.92, "learning_rate": 1.1009209025336166e-05, "loss": 0.2387, "step": 1072300 }, { "epoch": 10.93, "learning_rate": 1.100407422047119e-05, "loss": 0.2773, "step": 1072400 }, { "epoch": 10.93, "learning_rate": 1.0998940344393634e-05, "loss": 0.2122, "step": 1072500 }, { "epoch": 10.93, "learning_rate": 1.0993807397354493e-05, "loss": 0.263, "step": 1072600 }, { "epoch": 10.93, "learning_rate": 1.0988675379604748e-05, "loss": 0.2922, "step": 1072700 }, { "epoch": 10.93, "learning_rate": 1.098354429139533e-05, "loss": 0.2611, "step": 1072800 }, { "epoch": 10.93, "learning_rate": 1.0978414132977105e-05, "loss": 0.3001, "step": 1072900 }, { "epoch": 10.93, "learning_rate": 1.0973284904600911e-05, "loss": 0.2481, "step": 1073000 }, { "epoch": 10.93, "learning_rate": 1.0968156606517555e-05, "loss": 0.3033, "step": 1073100 }, { "epoch": 10.93, "learning_rate": 1.0963029238977752e-05, "loss": 0.3337, "step": 1073200 }, { "epoch": 10.93, "learning_rate": 1.0957902802232212e-05, "loss": 0.3331, "step": 1073300 }, { "epoch": 10.94, "learning_rate": 1.0952777296531596e-05, "loss": 0.2766, "step": 1073400 }, { "epoch": 10.94, "learning_rate": 1.0947652722126493e-05, "loss": 0.2874, "step": 1073500 }, { "epoch": 10.94, "learning_rate": 1.0942529079267464e-05, "loss": 0.3068, "step": 1073600 }, { "epoch": 10.94, "learning_rate": 1.0937406368205044e-05, "loss": 0.221, "step": 1073700 }, { "epoch": 10.94, "learning_rate": 1.0932284589189706e-05, "loss": 0.243, "step": 1073800 }, { "epoch": 10.94, "learning_rate": 1.0927163742471847e-05, "loss": 0.2689, "step": 1073900 }, { "epoch": 10.94, "learning_rate": 1.0922043828301861e-05, "loss": 0.3436, "step": 1074000 }, { "epoch": 10.94, "learning_rate": 1.0916924846930093e-05, "loss": 0.3128, "step": 1074100 }, { "epoch": 10.94, "learning_rate": 1.0911806798606807e-05, "loss": 0.2527, "step": 1074200 }, { "epoch": 10.95, "learning_rate": 1.0906689683582256e-05, "loss": 0.2803, "step": 1074300 }, { "epoch": 10.95, "learning_rate": 1.0901573502106647e-05, "loss": 0.2724, "step": 1074400 }, { "epoch": 10.95, "learning_rate": 1.0896458254430108e-05, "loss": 0.2911, "step": 1074500 }, { "epoch": 10.95, "learning_rate": 1.0891343940802758e-05, "loss": 0.2565, "step": 1074600 }, { "epoch": 10.95, "learning_rate": 1.0886230561474666e-05, "loss": 0.2822, "step": 1074700 }, { "epoch": 10.95, "learning_rate": 1.0881118116695811e-05, "loss": 0.2482, "step": 1074800 }, { "epoch": 10.95, "learning_rate": 1.0876006606716199e-05, "loss": 0.2617, "step": 1074900 }, { "epoch": 10.95, "learning_rate": 1.0870947132905734e-05, "loss": 0.2806, "step": 1075000 }, { "epoch": 10.95, "learning_rate": 1.086583748392007e-05, "loss": 0.2568, "step": 1075100 }, { "epoch": 10.95, "learning_rate": 1.0860728770480775e-05, "loss": 0.2325, "step": 1075200 }, { "epoch": 10.96, "learning_rate": 1.0855620992837619e-05, "loss": 0.2636, "step": 1075300 }, { "epoch": 10.96, "learning_rate": 1.085051415124035e-05, "loss": 0.26, "step": 1075400 }, { "epoch": 10.96, "learning_rate": 1.0845408245938669e-05, "loss": 0.2873, "step": 1075500 }, { "epoch": 10.96, "learning_rate": 1.0840303277182206e-05, "loss": 0.2296, "step": 1075600 }, { "epoch": 10.96, "learning_rate": 1.0835199245220575e-05, "loss": 0.2318, "step": 1075700 }, { "epoch": 10.96, "learning_rate": 1.0830096150303339e-05, "loss": 0.205, "step": 1075800 }, { "epoch": 10.96, "learning_rate": 1.0824993992679993e-05, "loss": 0.2774, "step": 1075900 }, { "epoch": 10.96, "learning_rate": 1.0819892772600011e-05, "loss": 0.3017, "step": 1076000 }, { "epoch": 10.96, "learning_rate": 1.081479249031281e-05, "loss": 0.3272, "step": 1076100 }, { "epoch": 10.96, "learning_rate": 1.0809693146067766e-05, "loss": 0.2773, "step": 1076200 }, { "epoch": 10.97, "learning_rate": 1.0804594740114203e-05, "loss": 0.2825, "step": 1076300 }, { "epoch": 10.97, "learning_rate": 1.079949727270141e-05, "loss": 0.2769, "step": 1076400 }, { "epoch": 10.97, "learning_rate": 1.0794400744078621e-05, "loss": 0.2197, "step": 1076500 }, { "epoch": 10.97, "learning_rate": 1.0789305154495017e-05, "loss": 0.2071, "step": 1076600 }, { "epoch": 10.97, "learning_rate": 1.0784210504199746e-05, "loss": 0.2886, "step": 1076700 }, { "epoch": 10.97, "learning_rate": 1.077911679344192e-05, "loss": 0.2845, "step": 1076800 }, { "epoch": 10.97, "learning_rate": 1.0774024022470566e-05, "loss": 0.2775, "step": 1076900 }, { "epoch": 10.97, "learning_rate": 1.0768932191534704e-05, "loss": 0.2425, "step": 1077000 }, { "epoch": 10.97, "learning_rate": 1.0763841300883302e-05, "loss": 0.2594, "step": 1077100 }, { "epoch": 10.97, "learning_rate": 1.0758751350765256e-05, "loss": 0.2068, "step": 1077200 }, { "epoch": 10.98, "learning_rate": 1.0753662341429441e-05, "loss": 0.2967, "step": 1077300 }, { "epoch": 10.98, "learning_rate": 1.0748574273124682e-05, "loss": 0.2913, "step": 1077400 }, { "epoch": 10.98, "learning_rate": 1.0743487146099755e-05, "loss": 0.3363, "step": 1077500 }, { "epoch": 10.98, "learning_rate": 1.0738400960603393e-05, "loss": 0.2453, "step": 1077600 }, { "epoch": 10.98, "learning_rate": 1.0733315716884284e-05, "loss": 0.198, "step": 1077700 }, { "epoch": 10.98, "learning_rate": 1.0728231415191048e-05, "loss": 0.2987, "step": 1077800 }, { "epoch": 10.98, "learning_rate": 1.0723148055772287e-05, "loss": 0.2397, "step": 1077900 }, { "epoch": 10.98, "learning_rate": 1.071806563887655e-05, "loss": 0.3058, "step": 1078000 }, { "epoch": 10.98, "learning_rate": 1.0712984164752347e-05, "loss": 0.2543, "step": 1078100 }, { "epoch": 10.98, "learning_rate": 1.0707954434290388e-05, "loss": 0.3364, "step": 1078200 }, { "epoch": 10.99, "learning_rate": 1.0702874837020616e-05, "loss": 0.302, "step": 1078300 }, { "epoch": 10.99, "learning_rate": 1.0697796183265109e-05, "loss": 0.2379, "step": 1078400 }, { "epoch": 10.99, "learning_rate": 1.0692718473272174e-05, "loss": 0.2963, "step": 1078500 }, { "epoch": 10.99, "learning_rate": 1.0687641707290102e-05, "loss": 0.22, "step": 1078600 }, { "epoch": 10.99, "learning_rate": 1.0682565885567084e-05, "loss": 0.2839, "step": 1078700 }, { "epoch": 10.99, "learning_rate": 1.0677491008351317e-05, "loss": 0.2741, "step": 1078800 }, { "epoch": 10.99, "learning_rate": 1.0672417075890928e-05, "loss": 0.243, "step": 1078900 }, { "epoch": 10.99, "learning_rate": 1.0667344088434003e-05, "loss": 0.2969, "step": 1079000 }, { "epoch": 10.99, "learning_rate": 1.0662272046228583e-05, "loss": 0.3048, "step": 1079100 }, { "epoch": 11.0, "learning_rate": 1.0657200949522667e-05, "loss": 0.2868, "step": 1079200 }, { "epoch": 11.0, "learning_rate": 1.0652130798564182e-05, "loss": 0.2892, "step": 1079300 }, { "epoch": 11.0, "learning_rate": 1.064706159360104e-05, "loss": 0.2874, "step": 1079400 }, { "epoch": 11.0, "learning_rate": 1.0641993334881105e-05, "loss": 0.2933, "step": 1079500 }, { "epoch": 11.0, "learning_rate": 1.063692602265216e-05, "loss": 0.2796, "step": 1079600 }, { "epoch": 11.0, "learning_rate": 1.0631859657161981e-05, "loss": 0.2958, "step": 1079700 }, { "epoch": 11.0, "learning_rate": 1.0626794238658294e-05, "loss": 0.2668, "step": 1079800 }, { "epoch": 11.0, "learning_rate": 1.0621729767388744e-05, "loss": 0.2655, "step": 1079900 }, { "epoch": 11.0, "learning_rate": 1.0616666243600965e-05, "loss": 0.3121, "step": 1080000 }, { "epoch": 11.0, "learning_rate": 1.061160366754253e-05, "loss": 0.2478, "step": 1080100 }, { "epoch": 11.01, "learning_rate": 1.0606542039460977e-05, "loss": 0.2302, "step": 1080200 }, { "epoch": 11.01, "learning_rate": 1.0601481359603782e-05, "loss": 0.1871, "step": 1080300 }, { "epoch": 11.01, "learning_rate": 1.0596421628218382e-05, "loss": 0.2569, "step": 1080400 }, { "epoch": 11.01, "learning_rate": 1.059136284555218e-05, "loss": 0.2927, "step": 1080500 }, { "epoch": 11.01, "learning_rate": 1.0586305011852506e-05, "loss": 0.2609, "step": 1080600 }, { "epoch": 11.01, "learning_rate": 1.0581248127366655e-05, "loss": 0.2269, "step": 1080700 }, { "epoch": 11.01, "learning_rate": 1.0576192192341899e-05, "loss": 0.2605, "step": 1080800 }, { "epoch": 11.01, "learning_rate": 1.057113720702542e-05, "loss": 0.3192, "step": 1080900 }, { "epoch": 11.01, "learning_rate": 1.0566083171664385e-05, "loss": 0.2275, "step": 1081000 }, { "epoch": 11.01, "learning_rate": 1.0561030086505916e-05, "loss": 0.2833, "step": 1081100 }, { "epoch": 11.02, "learning_rate": 1.0555977951797063e-05, "loss": 0.2413, "step": 1081200 }, { "epoch": 11.02, "learning_rate": 1.0550926767784848e-05, "loss": 0.2779, "step": 1081300 }, { "epoch": 11.02, "learning_rate": 1.0545876534716251e-05, "loss": 0.2448, "step": 1081400 }, { "epoch": 11.02, "learning_rate": 1.0540827252838196e-05, "loss": 0.292, "step": 1081500 }, { "epoch": 11.02, "learning_rate": 1.0535778922397561e-05, "loss": 0.2421, "step": 1081600 }, { "epoch": 11.02, "learning_rate": 1.0530731543641189e-05, "loss": 0.2143, "step": 1081700 }, { "epoch": 11.02, "learning_rate": 1.0525685116815852e-05, "loss": 0.2427, "step": 1081800 }, { "epoch": 11.02, "learning_rate": 1.0520639642168292e-05, "loss": 0.2288, "step": 1081900 }, { "epoch": 11.02, "learning_rate": 1.0515645560452136e-05, "loss": 0.2725, "step": 1082000 }, { "epoch": 11.02, "learning_rate": 1.0510601981372242e-05, "loss": 0.229, "step": 1082100 }, { "epoch": 11.03, "learning_rate": 1.0505559355207614e-05, "loss": 0.2837, "step": 1082200 }, { "epoch": 11.03, "learning_rate": 1.0500517682204786e-05, "loss": 0.2693, "step": 1082300 }, { "epoch": 11.03, "learning_rate": 1.049547696261028e-05, "loss": 0.2187, "step": 1082400 }, { "epoch": 11.03, "learning_rate": 1.049043719667055e-05, "loss": 0.2427, "step": 1082500 }, { "epoch": 11.03, "learning_rate": 1.0485398384632018e-05, "loss": 0.2386, "step": 1082600 }, { "epoch": 11.03, "learning_rate": 1.0480360526741046e-05, "loss": 0.229, "step": 1082700 }, { "epoch": 11.03, "learning_rate": 1.0475323623243955e-05, "loss": 0.2368, "step": 1082800 }, { "epoch": 11.03, "learning_rate": 1.0470287674387034e-05, "loss": 0.1841, "step": 1082900 }, { "epoch": 11.03, "learning_rate": 1.0465252680416488e-05, "loss": 0.1819, "step": 1083000 }, { "epoch": 11.03, "learning_rate": 1.0460218641578509e-05, "loss": 0.2825, "step": 1083100 }, { "epoch": 11.04, "learning_rate": 1.0455185558119242e-05, "loss": 0.2501, "step": 1083200 }, { "epoch": 11.04, "learning_rate": 1.0450153430284753e-05, "loss": 0.1888, "step": 1083300 }, { "epoch": 11.04, "learning_rate": 1.0445122258321093e-05, "loss": 0.3063, "step": 1083400 }, { "epoch": 11.04, "learning_rate": 1.0440092042474266e-05, "loss": 0.1888, "step": 1083500 }, { "epoch": 11.04, "learning_rate": 1.0435062782990203e-05, "loss": 0.216, "step": 1083600 }, { "epoch": 11.04, "learning_rate": 1.043003448011481e-05, "loss": 0.2816, "step": 1083700 }, { "epoch": 11.04, "learning_rate": 1.0425007134093947e-05, "loss": 0.2314, "step": 1083800 }, { "epoch": 11.04, "learning_rate": 1.0419980745173416e-05, "loss": 0.3227, "step": 1083900 }, { "epoch": 11.04, "learning_rate": 1.041495531359898e-05, "loss": 0.2708, "step": 1084000 }, { "epoch": 11.05, "learning_rate": 1.040993083961636e-05, "loss": 0.2744, "step": 1084100 }, { "epoch": 11.05, "learning_rate": 1.0404907323471205e-05, "loss": 0.2308, "step": 1084200 }, { "epoch": 11.05, "learning_rate": 1.0399884765409144e-05, "loss": 0.2296, "step": 1084300 }, { "epoch": 11.05, "learning_rate": 1.0394863165675753e-05, "loss": 0.2314, "step": 1084400 }, { "epoch": 11.05, "learning_rate": 1.0389842524516562e-05, "loss": 0.2277, "step": 1084500 }, { "epoch": 11.05, "learning_rate": 1.0384822842177037e-05, "loss": 0.2134, "step": 1084600 }, { "epoch": 11.05, "learning_rate": 1.0379804118902617e-05, "loss": 0.2359, "step": 1084700 }, { "epoch": 11.05, "learning_rate": 1.0374786354938702e-05, "loss": 0.2351, "step": 1084800 }, { "epoch": 11.05, "learning_rate": 1.03697695505306e-05, "loss": 0.2613, "step": 1084900 }, { "epoch": 11.05, "learning_rate": 1.036480385961788e-05, "loss": 0.2573, "step": 1085000 }, { "epoch": 11.06, "learning_rate": 1.0359788965455593e-05, "loss": 0.3162, "step": 1085100 }, { "epoch": 11.06, "learning_rate": 1.0354775031582417e-05, "loss": 0.3106, "step": 1085200 }, { "epoch": 11.06, "learning_rate": 1.0349762058243508e-05, "loss": 0.2271, "step": 1085300 }, { "epoch": 11.06, "learning_rate": 1.0344750045683968e-05, "loss": 0.2894, "step": 1085400 }, { "epoch": 11.06, "learning_rate": 1.0339738994148853e-05, "loss": 0.2174, "step": 1085500 }, { "epoch": 11.06, "learning_rate": 1.0334728903883186e-05, "loss": 0.2483, "step": 1085600 }, { "epoch": 11.06, "learning_rate": 1.0329719775131909e-05, "loss": 0.2722, "step": 1085700 }, { "epoch": 11.06, "learning_rate": 1.0324711608139947e-05, "loss": 0.2459, "step": 1085800 }, { "epoch": 11.06, "learning_rate": 1.0319704403152183e-05, "loss": 0.271, "step": 1085900 }, { "epoch": 11.06, "learning_rate": 1.0314698160413418e-05, "loss": 0.2736, "step": 1086000 }, { "epoch": 11.07, "learning_rate": 1.0309742928205745e-05, "loss": 0.23, "step": 1086100 }, { "epoch": 11.07, "learning_rate": 1.0304738601070678e-05, "loss": 0.2054, "step": 1086200 }, { "epoch": 11.07, "learning_rate": 1.0299735236916362e-05, "loss": 0.2435, "step": 1086300 }, { "epoch": 11.07, "learning_rate": 1.0294732835987427e-05, "loss": 0.3107, "step": 1086400 }, { "epoch": 11.07, "learning_rate": 1.0289731398528462e-05, "loss": 0.3021, "step": 1086500 }, { "epoch": 11.07, "learning_rate": 1.0284730924784004e-05, "loss": 0.2802, "step": 1086600 }, { "epoch": 11.07, "learning_rate": 1.0279731414998562e-05, "loss": 0.2751, "step": 1086700 }, { "epoch": 11.07, "learning_rate": 1.0274732869416558e-05, "loss": 0.228, "step": 1086800 }, { "epoch": 11.07, "learning_rate": 1.0269735288282402e-05, "loss": 0.2323, "step": 1086900 }, { "epoch": 11.07, "learning_rate": 1.0264738671840456e-05, "loss": 0.3224, "step": 1087000 }, { "epoch": 11.08, "learning_rate": 1.0259743020335008e-05, "loss": 0.2442, "step": 1087100 }, { "epoch": 11.08, "learning_rate": 1.0254748334010319e-05, "loss": 0.1885, "step": 1087200 }, { "epoch": 11.08, "learning_rate": 1.0249754613110607e-05, "loss": 0.22, "step": 1087300 }, { "epoch": 11.08, "learning_rate": 1.0244761857880036e-05, "loss": 0.2203, "step": 1087400 }, { "epoch": 11.08, "learning_rate": 1.0239770068562712e-05, "loss": 0.2236, "step": 1087500 }, { "epoch": 11.08, "learning_rate": 1.0234779245402707e-05, "loss": 0.229, "step": 1087600 }, { "epoch": 11.08, "learning_rate": 1.0229789388644044e-05, "loss": 0.2967, "step": 1087700 }, { "epoch": 11.08, "learning_rate": 1.0224800498530695e-05, "loss": 0.2588, "step": 1087800 }, { "epoch": 11.08, "learning_rate": 1.0219812575306592e-05, "loss": 0.2188, "step": 1087900 }, { "epoch": 11.08, "learning_rate": 1.0214825619215622e-05, "loss": 0.2873, "step": 1088000 }, { "epoch": 11.09, "learning_rate": 1.0209839630501597e-05, "loss": 0.258, "step": 1088100 }, { "epoch": 11.09, "learning_rate": 1.0204854609408313e-05, "loss": 0.2368, "step": 1088200 }, { "epoch": 11.09, "learning_rate": 1.0199870556179517e-05, "loss": 0.2656, "step": 1088300 }, { "epoch": 11.09, "learning_rate": 1.0194887471058882e-05, "loss": 0.3304, "step": 1088400 }, { "epoch": 11.09, "learning_rate": 1.0189905354290054e-05, "loss": 0.2654, "step": 1088500 }, { "epoch": 11.09, "learning_rate": 1.0184924206116649e-05, "loss": 0.2574, "step": 1088600 }, { "epoch": 11.09, "learning_rate": 1.0179944026782188e-05, "loss": 0.2319, "step": 1088700 }, { "epoch": 11.09, "learning_rate": 1.0174964816530184e-05, "loss": 0.2388, "step": 1088800 }, { "epoch": 11.09, "learning_rate": 1.0169986575604091e-05, "loss": 0.314, "step": 1088900 }, { "epoch": 11.09, "learning_rate": 1.0165009304247312e-05, "loss": 0.1714, "step": 1089000 }, { "epoch": 11.1, "learning_rate": 1.0160033002703214e-05, "loss": 0.2221, "step": 1089100 }, { "epoch": 11.1, "learning_rate": 1.01550576712151e-05, "loss": 0.2334, "step": 1089200 }, { "epoch": 11.1, "learning_rate": 1.0150083310026249e-05, "loss": 0.2556, "step": 1089300 }, { "epoch": 11.1, "learning_rate": 1.0145109919379858e-05, "loss": 0.2601, "step": 1089400 }, { "epoch": 11.1, "learning_rate": 1.0140187218911526e-05, "loss": 0.2681, "step": 1089500 }, { "epoch": 11.1, "learning_rate": 1.0135215760368038e-05, "loss": 0.253, "step": 1089600 }, { "epoch": 11.1, "learning_rate": 1.0130245273093952e-05, "loss": 0.2239, "step": 1089700 }, { "epoch": 11.1, "learning_rate": 1.0125275757332307e-05, "loss": 0.244, "step": 1089800 }, { "epoch": 11.1, "learning_rate": 1.012030721332606e-05, "loss": 0.2634, "step": 1089900 }, { "epoch": 11.11, "learning_rate": 1.0115339641318155e-05, "loss": 0.2386, "step": 1090000 }, { "epoch": 11.11, "learning_rate": 1.0110373041551484e-05, "loss": 0.2098, "step": 1090100 }, { "epoch": 11.11, "learning_rate": 1.0105407414268872e-05, "loss": 0.2611, "step": 1090200 }, { "epoch": 11.11, "learning_rate": 1.0100442759713107e-05, "loss": 0.2266, "step": 1090300 }, { "epoch": 11.11, "learning_rate": 1.0095479078126937e-05, "loss": 0.2761, "step": 1090400 }, { "epoch": 11.11, "learning_rate": 1.0090516369753058e-05, "loss": 0.2043, "step": 1090500 }, { "epoch": 11.11, "learning_rate": 1.0085554634834116e-05, "loss": 0.2103, "step": 1090600 }, { "epoch": 11.11, "learning_rate": 1.0080593873612716e-05, "loss": 0.27, "step": 1090700 }, { "epoch": 11.11, "learning_rate": 1.0075634086331394e-05, "loss": 0.2141, "step": 1090800 }, { "epoch": 11.11, "learning_rate": 1.0070675273232664e-05, "loss": 0.2306, "step": 1090900 }, { "epoch": 11.12, "learning_rate": 1.0065717434558992e-05, "loss": 0.2496, "step": 1091000 }, { "epoch": 11.12, "learning_rate": 1.0060760570552763e-05, "loss": 0.2689, "step": 1091100 }, { "epoch": 11.12, "learning_rate": 1.0055804681456353e-05, "loss": 0.2644, "step": 1091200 }, { "epoch": 11.12, "learning_rate": 1.0050849767512082e-05, "loss": 0.252, "step": 1091300 }, { "epoch": 11.12, "learning_rate": 1.0045895828962197e-05, "loss": 0.2269, "step": 1091400 }, { "epoch": 11.12, "learning_rate": 1.0040942866048927e-05, "loss": 0.2624, "step": 1091500 }, { "epoch": 11.12, "learning_rate": 1.003599087901444e-05, "loss": 0.2052, "step": 1091600 }, { "epoch": 11.12, "learning_rate": 1.0031039868100862e-05, "loss": 0.2326, "step": 1091700 }, { "epoch": 11.12, "learning_rate": 1.0026089833550267e-05, "loss": 0.2682, "step": 1091800 }, { "epoch": 11.12, "learning_rate": 1.0021140775604679e-05, "loss": 0.2513, "step": 1091900 }, { "epoch": 11.13, "learning_rate": 1.0016192694506089e-05, "loss": 0.2548, "step": 1092000 }, { "epoch": 11.13, "learning_rate": 1.001124559049641e-05, "loss": 0.2903, "step": 1092100 }, { "epoch": 11.13, "learning_rate": 1.0006299463817534e-05, "loss": 0.2184, "step": 1092200 }, { "epoch": 11.13, "learning_rate": 1.0001354314711308e-05, "loss": 0.2194, "step": 1092300 }, { "epoch": 11.13, "learning_rate": 9.996410143419502e-06, "loss": 0.2702, "step": 1092400 }, { "epoch": 11.13, "learning_rate": 9.991466950183865e-06, "loss": 0.1782, "step": 1092500 }, { "epoch": 11.13, "learning_rate": 9.986524735246098e-06, "loss": 0.2684, "step": 1092600 }, { "epoch": 11.13, "learning_rate": 9.981583498847826e-06, "loss": 0.2679, "step": 1092700 }, { "epoch": 11.13, "learning_rate": 9.976643241230661e-06, "loss": 0.2351, "step": 1092800 }, { "epoch": 11.13, "learning_rate": 9.971703962636146e-06, "loss": 0.2435, "step": 1092900 }, { "epoch": 11.14, "learning_rate": 9.966765663305785e-06, "loss": 0.2564, "step": 1093000 }, { "epoch": 11.14, "learning_rate": 9.96182834348103e-06, "loss": 0.2589, "step": 1093100 }, { "epoch": 11.14, "learning_rate": 9.956892003403288e-06, "loss": 0.2782, "step": 1093200 }, { "epoch": 11.14, "learning_rate": 9.951956643313926e-06, "loss": 0.2291, "step": 1093300 }, { "epoch": 11.14, "learning_rate": 9.947022263454235e-06, "loss": 0.2733, "step": 1093400 }, { "epoch": 11.14, "learning_rate": 9.942088864065484e-06, "loss": 0.286, "step": 1093500 }, { "epoch": 11.14, "learning_rate": 9.937156445388899e-06, "loss": 0.2805, "step": 1093600 }, { "epoch": 11.14, "learning_rate": 9.932274317186351e-06, "loss": 0.2811, "step": 1093700 }, { "epoch": 11.14, "learning_rate": 9.927343850844377e-06, "loss": 0.2573, "step": 1093800 }, { "epoch": 11.14, "learning_rate": 9.922414365935504e-06, "loss": 0.219, "step": 1093900 }, { "epoch": 11.15, "learning_rate": 9.91748586270075e-06, "loss": 0.2118, "step": 1094000 }, { "epoch": 11.15, "learning_rate": 9.9125583413811e-06, "loss": 0.2598, "step": 1094100 }, { "epoch": 11.15, "learning_rate": 9.907631802217452e-06, "loss": 0.2825, "step": 1094200 }, { "epoch": 11.15, "learning_rate": 9.902706245450716e-06, "loss": 0.2593, "step": 1094300 }, { "epoch": 11.15, "learning_rate": 9.897781671321715e-06, "loss": 0.224, "step": 1094400 }, { "epoch": 11.15, "learning_rate": 9.892858080071224e-06, "loss": 0.191, "step": 1094500 }, { "epoch": 11.15, "learning_rate": 9.887935471939975e-06, "loss": 0.2395, "step": 1094600 }, { "epoch": 11.15, "learning_rate": 9.88301384716867e-06, "loss": 0.3252, "step": 1094700 }, { "epoch": 11.15, "learning_rate": 9.878093205997925e-06, "loss": 0.2023, "step": 1094800 }, { "epoch": 11.16, "learning_rate": 9.873173548668341e-06, "loss": 0.2434, "step": 1094900 }, { "epoch": 11.16, "learning_rate": 9.868254875420471e-06, "loss": 0.2056, "step": 1095000 }, { "epoch": 11.16, "learning_rate": 9.863337186494788e-06, "loss": 0.2409, "step": 1095100 }, { "epoch": 11.16, "learning_rate": 9.858420482131747e-06, "loss": 0.2046, "step": 1095200 }, { "epoch": 11.16, "learning_rate": 9.853504762571754e-06, "loss": 0.2685, "step": 1095300 }, { "epoch": 11.16, "learning_rate": 9.848590028055132e-06, "loss": 0.2885, "step": 1095400 }, { "epoch": 11.16, "learning_rate": 9.843676278822207e-06, "loss": 0.2538, "step": 1095500 }, { "epoch": 11.16, "learning_rate": 9.83876351511323e-06, "loss": 0.2163, "step": 1095600 }, { "epoch": 11.16, "learning_rate": 9.833851737168409e-06, "loss": 0.282, "step": 1095700 }, { "epoch": 11.16, "learning_rate": 9.828940945227884e-06, "loss": 0.245, "step": 1095800 }, { "epoch": 11.17, "learning_rate": 9.82403113953177e-06, "loss": 0.2793, "step": 1095900 }, { "epoch": 11.17, "learning_rate": 9.819122320320141e-06, "loss": 0.2225, "step": 1096000 }, { "epoch": 11.17, "learning_rate": 9.814214487832986e-06, "loss": 0.2277, "step": 1096100 }, { "epoch": 11.17, "learning_rate": 9.809307642310282e-06, "loss": 0.3204, "step": 1096200 }, { "epoch": 11.17, "learning_rate": 9.80440178399195e-06, "loss": 0.2549, "step": 1096300 }, { "epoch": 11.17, "learning_rate": 9.79949691311784e-06, "loss": 0.2146, "step": 1096400 }, { "epoch": 11.17, "learning_rate": 9.794593029927782e-06, "loss": 0.2491, "step": 1096500 }, { "epoch": 11.17, "learning_rate": 9.789690134661557e-06, "loss": 0.2345, "step": 1096600 }, { "epoch": 11.17, "learning_rate": 9.784788227558853e-06, "loss": 0.2521, "step": 1096700 }, { "epoch": 11.17, "learning_rate": 9.779887308859377e-06, "loss": 0.2429, "step": 1096800 }, { "epoch": 11.18, "learning_rate": 9.774987378802756e-06, "loss": 0.2547, "step": 1096900 }, { "epoch": 11.18, "learning_rate": 9.770088437628544e-06, "loss": 0.2894, "step": 1097000 }, { "epoch": 11.18, "learning_rate": 9.765190485576279e-06, "loss": 0.2417, "step": 1097100 }, { "epoch": 11.18, "learning_rate": 9.760293522885459e-06, "loss": 0.2676, "step": 1097200 }, { "epoch": 11.18, "learning_rate": 9.755397549795488e-06, "loss": 0.2874, "step": 1097300 }, { "epoch": 11.18, "learning_rate": 9.750502566545764e-06, "loss": 0.2726, "step": 1097400 }, { "epoch": 11.18, "learning_rate": 9.745608573375624e-06, "loss": 0.2193, "step": 1097500 }, { "epoch": 11.18, "learning_rate": 9.740715570524362e-06, "loss": 0.2308, "step": 1097600 }, { "epoch": 11.18, "learning_rate": 9.735823558231194e-06, "loss": 0.2367, "step": 1097700 }, { "epoch": 11.18, "learning_rate": 9.730981442045062e-06, "loss": 0.2638, "step": 1097800 }, { "epoch": 11.19, "learning_rate": 9.726091401674084e-06, "loss": 0.2513, "step": 1097900 }, { "epoch": 11.19, "learning_rate": 9.721202352576251e-06, "loss": 0.2133, "step": 1098000 }, { "epoch": 11.19, "learning_rate": 9.716314294990605e-06, "loss": 0.2448, "step": 1098100 }, { "epoch": 11.19, "learning_rate": 9.711427229156142e-06, "loss": 0.2527, "step": 1098200 }, { "epoch": 11.19, "learning_rate": 9.706541155311812e-06, "loss": 0.2798, "step": 1098300 }, { "epoch": 11.19, "learning_rate": 9.701656073696527e-06, "loss": 0.1893, "step": 1098400 }, { "epoch": 11.19, "learning_rate": 9.696771984549115e-06, "loss": 0.2699, "step": 1098500 }, { "epoch": 11.19, "learning_rate": 9.691888888108387e-06, "loss": 0.2361, "step": 1098600 }, { "epoch": 11.19, "learning_rate": 9.687006784613111e-06, "loss": 0.2325, "step": 1098700 }, { "epoch": 11.19, "learning_rate": 9.682125674301972e-06, "loss": 0.2585, "step": 1098800 }, { "epoch": 11.2, "learning_rate": 9.677245557413637e-06, "loss": 0.2605, "step": 1098900 }, { "epoch": 11.2, "learning_rate": 9.672366434186725e-06, "loss": 0.2529, "step": 1099000 }, { "epoch": 11.2, "learning_rate": 9.667488304859776e-06, "loss": 0.2174, "step": 1099100 }, { "epoch": 11.2, "learning_rate": 9.662611169671307e-06, "loss": 0.2868, "step": 1099200 }, { "epoch": 11.2, "learning_rate": 9.657735028859786e-06, "loss": 0.3691, "step": 1099300 }, { "epoch": 11.2, "learning_rate": 9.652859882663629e-06, "loss": 0.2901, "step": 1099400 }, { "epoch": 11.2, "learning_rate": 9.647985731321195e-06, "loss": 0.2485, "step": 1099500 }, { "epoch": 11.2, "learning_rate": 9.643112575070816e-06, "loss": 0.2401, "step": 1099600 }, { "epoch": 11.2, "learning_rate": 9.638240414150739e-06, "loss": 0.2749, "step": 1099700 }, { "epoch": 11.2, "learning_rate": 9.633369248799193e-06, "loss": 0.2583, "step": 1099800 }, { "epoch": 11.21, "learning_rate": 9.628499079254345e-06, "loss": 0.2556, "step": 1099900 }, { "epoch": 11.21, "learning_rate": 9.623629905754336e-06, "loss": 0.2313, "step": 1100000 }, { "epoch": 11.21, "learning_rate": 9.618761728537214e-06, "loss": 0.2232, "step": 1100100 }, { "epoch": 11.21, "learning_rate": 9.613894547841012e-06, "loss": 0.2703, "step": 1100200 }, { "epoch": 11.21, "learning_rate": 9.60902836390372e-06, "loss": 0.2167, "step": 1100300 }, { "epoch": 11.21, "learning_rate": 9.604163176963244e-06, "loss": 0.2469, "step": 1100400 }, { "epoch": 11.21, "learning_rate": 9.599298987257474e-06, "loss": 0.2512, "step": 1100500 }, { "epoch": 11.21, "learning_rate": 9.594435795024237e-06, "loss": 0.2867, "step": 1100600 }, { "epoch": 11.21, "learning_rate": 9.589573600501316e-06, "loss": 0.2228, "step": 1100700 }, { "epoch": 11.22, "learning_rate": 9.584712403926444e-06, "loss": 0.3096, "step": 1100800 }, { "epoch": 11.22, "learning_rate": 9.579852205537313e-06, "loss": 0.226, "step": 1100900 }, { "epoch": 11.22, "learning_rate": 9.574993005571539e-06, "loss": 0.2858, "step": 1101000 }, { "epoch": 11.22, "learning_rate": 9.570134804266714e-06, "loss": 0.2318, "step": 1101100 }, { "epoch": 11.22, "learning_rate": 9.565277601860389e-06, "loss": 0.2638, "step": 1101200 }, { "epoch": 11.22, "learning_rate": 9.560421398590032e-06, "loss": 0.2363, "step": 1101300 }, { "epoch": 11.22, "learning_rate": 9.55556619469309e-06, "loss": 0.3175, "step": 1101400 }, { "epoch": 11.22, "learning_rate": 9.55071199040696e-06, "loss": 0.2233, "step": 1101500 }, { "epoch": 11.22, "learning_rate": 9.545858785968983e-06, "loss": 0.1778, "step": 1101600 }, { "epoch": 11.22, "learning_rate": 9.54100658161644e-06, "loss": 0.2173, "step": 1101700 }, { "epoch": 11.23, "learning_rate": 9.536155377586584e-06, "loss": 0.25, "step": 1101800 }, { "epoch": 11.23, "learning_rate": 9.531402168378981e-06, "loss": 0.2577, "step": 1101900 }, { "epoch": 11.23, "learning_rate": 9.526552945687767e-06, "loss": 0.2201, "step": 1102000 }, { "epoch": 11.23, "learning_rate": 9.521704724025933e-06, "loss": 0.2554, "step": 1102100 }, { "epoch": 11.23, "learning_rate": 9.51685750363054e-06, "loss": 0.2544, "step": 1102200 }, { "epoch": 11.23, "learning_rate": 9.512011284738565e-06, "loss": 0.2718, "step": 1102300 }, { "epoch": 11.23, "learning_rate": 9.507166067586978e-06, "loss": 0.2633, "step": 1102400 }, { "epoch": 11.23, "learning_rate": 9.50232185241268e-06, "loss": 0.2468, "step": 1102500 }, { "epoch": 11.23, "learning_rate": 9.497478639452512e-06, "loss": 0.3042, "step": 1102600 }, { "epoch": 11.23, "learning_rate": 9.492636428943286e-06, "loss": 0.2437, "step": 1102700 }, { "epoch": 11.24, "learning_rate": 9.48779522112176e-06, "loss": 0.2314, "step": 1102800 }, { "epoch": 11.24, "learning_rate": 9.482955016224644e-06, "loss": 0.2235, "step": 1102900 }, { "epoch": 11.24, "learning_rate": 9.478115814488579e-06, "loss": 0.2859, "step": 1103000 }, { "epoch": 11.24, "learning_rate": 9.47327761615018e-06, "loss": 0.2305, "step": 1103100 }, { "epoch": 11.24, "learning_rate": 9.468440421446013e-06, "loss": 0.2415, "step": 1103200 }, { "epoch": 11.24, "learning_rate": 9.463604230612582e-06, "loss": 0.2918, "step": 1103300 }, { "epoch": 11.24, "learning_rate": 9.458769043886348e-06, "loss": 0.2818, "step": 1103400 }, { "epoch": 11.24, "learning_rate": 9.453934861503735e-06, "loss": 0.2354, "step": 1103500 }, { "epoch": 11.24, "learning_rate": 9.449150010505656e-06, "loss": 0.3007, "step": 1103600 }, { "epoch": 11.24, "learning_rate": 9.444317827469971e-06, "loss": 0.2248, "step": 1103700 }, { "epoch": 11.25, "learning_rate": 9.439486649484464e-06, "loss": 0.3142, "step": 1103800 }, { "epoch": 11.25, "learning_rate": 9.43465647678536e-06, "loss": 0.25, "step": 1103900 }, { "epoch": 11.25, "learning_rate": 9.429827309608834e-06, "loss": 0.2178, "step": 1104000 }, { "epoch": 11.25, "learning_rate": 9.424999148190988e-06, "loss": 0.2025, "step": 1104100 }, { "epoch": 11.25, "learning_rate": 9.420171992767899e-06, "loss": 0.2574, "step": 1104200 }, { "epoch": 11.25, "learning_rate": 9.415345843575597e-06, "loss": 0.2503, "step": 1104300 }, { "epoch": 11.25, "learning_rate": 9.410520700850026e-06, "loss": 0.2247, "step": 1104400 }, { "epoch": 11.25, "learning_rate": 9.405696564827125e-06, "loss": 0.2393, "step": 1104500 }, { "epoch": 11.25, "learning_rate": 9.400873435742764e-06, "loss": 0.2436, "step": 1104600 }, { "epoch": 11.25, "learning_rate": 9.396051313832758e-06, "loss": 0.2417, "step": 1104700 }, { "epoch": 11.26, "learning_rate": 9.391230199332888e-06, "loss": 0.2083, "step": 1104800 }, { "epoch": 11.26, "learning_rate": 9.38641009247888e-06, "loss": 0.2711, "step": 1104900 }, { "epoch": 11.26, "learning_rate": 9.381590993506399e-06, "loss": 0.2685, "step": 1105000 }, { "epoch": 11.26, "learning_rate": 9.37677290265107e-06, "loss": 0.2438, "step": 1105100 }, { "epoch": 11.26, "learning_rate": 9.371955820148473e-06, "loss": 0.2705, "step": 1105200 }, { "epoch": 11.26, "learning_rate": 9.367139746234145e-06, "loss": 0.2962, "step": 1105300 }, { "epoch": 11.26, "learning_rate": 9.362324681143546e-06, "loss": 0.1828, "step": 1105400 }, { "epoch": 11.26, "learning_rate": 9.35751062511211e-06, "loss": 0.2308, "step": 1105500 }, { "epoch": 11.26, "learning_rate": 9.352697578375223e-06, "loss": 0.3031, "step": 1105600 }, { "epoch": 11.27, "learning_rate": 9.347885541168201e-06, "loss": 0.2791, "step": 1105700 }, { "epoch": 11.27, "learning_rate": 9.343074513726329e-06, "loss": 0.2563, "step": 1105800 }, { "epoch": 11.27, "learning_rate": 9.338264496284842e-06, "loss": 0.2278, "step": 1105900 }, { "epoch": 11.27, "learning_rate": 9.333455489078916e-06, "loss": 0.2247, "step": 1106000 }, { "epoch": 11.27, "learning_rate": 9.328647492343683e-06, "loss": 0.2185, "step": 1106100 }, { "epoch": 11.27, "learning_rate": 9.32384050631424e-06, "loss": 0.2492, "step": 1106200 }, { "epoch": 11.27, "learning_rate": 9.319034531225598e-06, "loss": 0.2573, "step": 1106300 }, { "epoch": 11.27, "learning_rate": 9.31422956731275e-06, "loss": 0.2292, "step": 1106400 }, { "epoch": 11.27, "learning_rate": 9.30942561481064e-06, "loss": 0.278, "step": 1106500 }, { "epoch": 11.27, "learning_rate": 9.30462267395413e-06, "loss": 0.2508, "step": 1106600 }, { "epoch": 11.28, "learning_rate": 9.299868759258255e-06, "loss": 0.2503, "step": 1106700 }, { "epoch": 11.28, "learning_rate": 9.295067832275112e-06, "loss": 0.254, "step": 1106800 }, { "epoch": 11.28, "learning_rate": 9.290267917639594e-06, "loss": 0.2278, "step": 1106900 }, { "epoch": 11.28, "learning_rate": 9.285469015586385e-06, "loss": 0.2312, "step": 1107000 }, { "epoch": 11.28, "learning_rate": 9.28067112635012e-06, "loss": 0.3294, "step": 1107100 }, { "epoch": 11.28, "learning_rate": 9.275874250165397e-06, "loss": 0.2234, "step": 1107200 }, { "epoch": 11.28, "learning_rate": 9.271078387266753e-06, "loss": 0.2337, "step": 1107300 }, { "epoch": 11.28, "learning_rate": 9.266283537888664e-06, "loss": 0.2904, "step": 1107400 }, { "epoch": 11.28, "learning_rate": 9.261489702265575e-06, "loss": 0.2549, "step": 1107500 }, { "epoch": 11.28, "learning_rate": 9.256696880631881e-06, "loss": 0.2348, "step": 1107600 }, { "epoch": 11.29, "learning_rate": 9.25190507322193e-06, "loss": 0.2742, "step": 1107700 }, { "epoch": 11.29, "learning_rate": 9.247114280269992e-06, "loss": 0.254, "step": 1107800 }, { "epoch": 11.29, "learning_rate": 9.242324502010317e-06, "loss": 0.2302, "step": 1107900 }, { "epoch": 11.29, "learning_rate": 9.23753573867711e-06, "loss": 0.2125, "step": 1108000 }, { "epoch": 11.29, "learning_rate": 9.232747990504495e-06, "loss": 0.2703, "step": 1108100 }, { "epoch": 11.29, "learning_rate": 9.227961257726568e-06, "loss": 0.2733, "step": 1108200 }, { "epoch": 11.29, "learning_rate": 9.22317554057737e-06, "loss": 0.2538, "step": 1108300 }, { "epoch": 11.29, "learning_rate": 9.218390839290906e-06, "loss": 0.2074, "step": 1108400 }, { "epoch": 11.29, "learning_rate": 9.213607154101107e-06, "loss": 0.2511, "step": 1108500 }, { "epoch": 11.29, "learning_rate": 9.208824485241885e-06, "loss": 0.2174, "step": 1108600 }, { "epoch": 11.3, "learning_rate": 9.204042832947059e-06, "loss": 0.2279, "step": 1108700 }, { "epoch": 11.3, "learning_rate": 9.19926219745043e-06, "loss": 0.2482, "step": 1108800 }, { "epoch": 11.3, "learning_rate": 9.194482578985762e-06, "loss": 0.2284, "step": 1108900 }, { "epoch": 11.3, "learning_rate": 9.189703977786726e-06, "loss": 0.2609, "step": 1109000 }, { "epoch": 11.3, "learning_rate": 9.184926394086973e-06, "loss": 0.2266, "step": 1109100 }, { "epoch": 11.3, "learning_rate": 9.180149828120105e-06, "loss": 0.2748, "step": 1109200 }, { "epoch": 11.3, "learning_rate": 9.175374280119673e-06, "loss": 0.1941, "step": 1109300 }, { "epoch": 11.3, "learning_rate": 9.170599750319151e-06, "loss": 0.2377, "step": 1109400 }, { "epoch": 11.3, "learning_rate": 9.165826238952e-06, "loss": 0.233, "step": 1109500 }, { "epoch": 11.3, "learning_rate": 9.161053746251613e-06, "loss": 0.2503, "step": 1109600 }, { "epoch": 11.31, "learning_rate": 9.15628227245134e-06, "loss": 0.2699, "step": 1109700 }, { "epoch": 11.31, "learning_rate": 9.151511817784473e-06, "loss": 0.2711, "step": 1109800 }, { "epoch": 11.31, "learning_rate": 9.146742382484268e-06, "loss": 0.27, "step": 1109900 }, { "epoch": 11.31, "learning_rate": 9.14197396678391e-06, "loss": 0.301, "step": 1110000 }, { "epoch": 11.31, "learning_rate": 9.137206570916548e-06, "loss": 0.2497, "step": 1110100 }, { "epoch": 11.31, "learning_rate": 9.132440195115293e-06, "loss": 0.2357, "step": 1110200 }, { "epoch": 11.31, "learning_rate": 9.127674839613172e-06, "loss": 0.2449, "step": 1110300 }, { "epoch": 11.31, "learning_rate": 9.12291050464319e-06, "loss": 0.2163, "step": 1110400 }, { "epoch": 11.31, "learning_rate": 9.11814719043831e-06, "loss": 0.2634, "step": 1110500 }, { "epoch": 11.31, "learning_rate": 9.113384897231402e-06, "loss": 0.3037, "step": 1110600 }, { "epoch": 11.32, "learning_rate": 9.10862362525533e-06, "loss": 0.2274, "step": 1110700 }, { "epoch": 11.32, "learning_rate": 9.103863374742885e-06, "loss": 0.2484, "step": 1110800 }, { "epoch": 11.32, "learning_rate": 9.099104145926826e-06, "loss": 0.2354, "step": 1110900 }, { "epoch": 11.32, "learning_rate": 9.094345939039841e-06, "loss": 0.28, "step": 1111000 }, { "epoch": 11.32, "learning_rate": 9.08958875431458e-06, "loss": 0.2382, "step": 1111100 }, { "epoch": 11.32, "learning_rate": 9.084832591983653e-06, "loss": 0.2432, "step": 1111200 }, { "epoch": 11.32, "learning_rate": 9.08007745227959e-06, "loss": 0.1926, "step": 1111300 }, { "epoch": 11.32, "learning_rate": 9.075323335434894e-06, "loss": 0.2333, "step": 1111400 }, { "epoch": 11.32, "learning_rate": 9.07057024168202e-06, "loss": 0.2106, "step": 1111500 }, { "epoch": 11.33, "learning_rate": 9.065865686891423e-06, "loss": 0.3099, "step": 1111600 }, { "epoch": 11.33, "learning_rate": 9.061114629782614e-06, "loss": 0.2983, "step": 1111700 }, { "epoch": 11.33, "learning_rate": 9.056364596460335e-06, "loss": 0.2427, "step": 1111800 }, { "epoch": 11.33, "learning_rate": 9.051615587156841e-06, "loss": 0.2421, "step": 1111900 }, { "epoch": 11.33, "learning_rate": 9.046867602104342e-06, "loss": 0.2486, "step": 1112000 }, { "epoch": 11.33, "learning_rate": 9.042120641534966e-06, "loss": 0.2288, "step": 1112100 }, { "epoch": 11.33, "learning_rate": 9.03737470568082e-06, "loss": 0.2508, "step": 1112200 }, { "epoch": 11.33, "learning_rate": 9.032629794773953e-06, "loss": 0.2838, "step": 1112300 }, { "epoch": 11.33, "learning_rate": 9.02788590904636e-06, "loss": 0.3441, "step": 1112400 }, { "epoch": 11.33, "learning_rate": 9.023143048729995e-06, "loss": 0.2789, "step": 1112500 }, { "epoch": 11.34, "learning_rate": 9.018401214056757e-06, "loss": 0.2803, "step": 1112600 }, { "epoch": 11.34, "learning_rate": 9.013660405258482e-06, "loss": 0.1871, "step": 1112700 }, { "epoch": 11.34, "learning_rate": 9.00892062256697e-06, "loss": 0.2903, "step": 1112800 }, { "epoch": 11.34, "learning_rate": 9.004181866213984e-06, "loss": 0.2762, "step": 1112900 }, { "epoch": 11.34, "learning_rate": 8.999444136431198e-06, "loss": 0.2408, "step": 1113000 }, { "epoch": 11.34, "learning_rate": 8.994707433450266e-06, "loss": 0.2688, "step": 1113100 }, { "epoch": 11.34, "learning_rate": 8.989971757502802e-06, "loss": 0.27, "step": 1113200 }, { "epoch": 11.34, "learning_rate": 8.985237108820325e-06, "loss": 0.2623, "step": 1113300 }, { "epoch": 11.34, "learning_rate": 8.980503487634342e-06, "loss": 0.2144, "step": 1113400 }, { "epoch": 11.34, "learning_rate": 8.975770894176305e-06, "loss": 0.3118, "step": 1113500 }, { "epoch": 11.35, "learning_rate": 8.971039328677602e-06, "loss": 0.2141, "step": 1113600 }, { "epoch": 11.35, "learning_rate": 8.966308791369584e-06, "loss": 0.2264, "step": 1113700 }, { "epoch": 11.35, "learning_rate": 8.961579282483541e-06, "loss": 0.3177, "step": 1113800 }, { "epoch": 11.35, "learning_rate": 8.956850802250731e-06, "loss": 0.2223, "step": 1113900 }, { "epoch": 11.35, "learning_rate": 8.95212335090233e-06, "loss": 0.2289, "step": 1114000 }, { "epoch": 11.35, "learning_rate": 8.947396928669487e-06, "loss": 0.2775, "step": 1114100 }, { "epoch": 11.35, "learning_rate": 8.942671535783312e-06, "loss": 0.2603, "step": 1114200 }, { "epoch": 11.35, "learning_rate": 8.93794717247482e-06, "loss": 0.2497, "step": 1114300 }, { "epoch": 11.35, "learning_rate": 8.933223838975024e-06, "loss": 0.2598, "step": 1114400 }, { "epoch": 11.35, "learning_rate": 8.928501535514872e-06, "loss": 0.2459, "step": 1114500 }, { "epoch": 11.36, "learning_rate": 8.923780262325238e-06, "loss": 0.2689, "step": 1114600 }, { "epoch": 11.36, "learning_rate": 8.919060019636971e-06, "loss": 0.2829, "step": 1114700 }, { "epoch": 11.36, "learning_rate": 8.914340807680864e-06, "loss": 0.2499, "step": 1114800 }, { "epoch": 11.36, "learning_rate": 8.909622626687659e-06, "loss": 0.2415, "step": 1114900 }, { "epoch": 11.36, "learning_rate": 8.904905476888047e-06, "loss": 0.2401, "step": 1115000 }, { "epoch": 11.36, "learning_rate": 8.90018935851267e-06, "loss": 0.2214, "step": 1115100 }, { "epoch": 11.36, "learning_rate": 8.895474271792123e-06, "loss": 0.1966, "step": 1115200 }, { "epoch": 11.36, "learning_rate": 8.890760216956932e-06, "loss": 0.2463, "step": 1115300 }, { "epoch": 11.36, "learning_rate": 8.886047194237592e-06, "loss": 0.2686, "step": 1115400 }, { "epoch": 11.36, "learning_rate": 8.88133520386455e-06, "loss": 0.2695, "step": 1115500 }, { "epoch": 11.37, "learning_rate": 8.876624246068182e-06, "loss": 0.2358, "step": 1115600 }, { "epoch": 11.37, "learning_rate": 8.871914321078829e-06, "loss": 0.2154, "step": 1115700 }, { "epoch": 11.37, "learning_rate": 8.867205429126785e-06, "loss": 0.1999, "step": 1115800 }, { "epoch": 11.37, "learning_rate": 8.862544643913687e-06, "loss": 0.2084, "step": 1115900 }, { "epoch": 11.37, "learning_rate": 8.857837808390787e-06, "loss": 0.2649, "step": 1116000 }, { "epoch": 11.37, "learning_rate": 8.853132006593451e-06, "loss": 0.3073, "step": 1116100 }, { "epoch": 11.37, "learning_rate": 8.848427238751758e-06, "loss": 0.2839, "step": 1116200 }, { "epoch": 11.37, "learning_rate": 8.843770537312341e-06, "loss": 0.2202, "step": 1116300 }, { "epoch": 11.37, "learning_rate": 8.839067827726704e-06, "loss": 0.2431, "step": 1116400 }, { "epoch": 11.38, "learning_rate": 8.83436615278438e-06, "loss": 0.2187, "step": 1116500 }, { "epoch": 11.38, "learning_rate": 8.829665512715227e-06, "loss": 0.2599, "step": 1116600 }, { "epoch": 11.38, "learning_rate": 8.824965907749096e-06, "loss": 0.2829, "step": 1116700 }, { "epoch": 11.38, "learning_rate": 8.820267338115773e-06, "loss": 0.2389, "step": 1116800 }, { "epoch": 11.38, "learning_rate": 8.815569804044976e-06, "loss": 0.2879, "step": 1116900 }, { "epoch": 11.38, "learning_rate": 8.810873305766392e-06, "loss": 0.2317, "step": 1117000 }, { "epoch": 11.38, "learning_rate": 8.806177843509665e-06, "loss": 0.2996, "step": 1117100 }, { "epoch": 11.38, "learning_rate": 8.80148341750435e-06, "loss": 0.257, "step": 1117200 }, { "epoch": 11.38, "learning_rate": 8.796790027979993e-06, "loss": 0.3114, "step": 1117300 }, { "epoch": 11.38, "learning_rate": 8.79209767516607e-06, "loss": 0.3328, "step": 1117400 }, { "epoch": 11.39, "learning_rate": 8.787406359292008e-06, "loss": 0.198, "step": 1117500 }, { "epoch": 11.39, "learning_rate": 8.782716080587182e-06, "loss": 0.276, "step": 1117600 }, { "epoch": 11.39, "learning_rate": 8.778026839280933e-06, "loss": 0.2459, "step": 1117700 }, { "epoch": 11.39, "learning_rate": 8.773338635602516e-06, "loss": 0.2008, "step": 1117800 }, { "epoch": 11.39, "learning_rate": 8.768651469781165e-06, "loss": 0.2947, "step": 1117900 }, { "epoch": 11.39, "learning_rate": 8.763965342046063e-06, "loss": 0.3186, "step": 1118000 }, { "epoch": 11.39, "learning_rate": 8.75928025262632e-06, "loss": 0.2611, "step": 1118100 }, { "epoch": 11.39, "learning_rate": 8.754596201751013e-06, "loss": 0.25, "step": 1118200 }, { "epoch": 11.39, "learning_rate": 8.749913189649169e-06, "loss": 0.3154, "step": 1118300 }, { "epoch": 11.39, "learning_rate": 8.74523121654976e-06, "loss": 0.2813, "step": 1118400 }, { "epoch": 11.4, "learning_rate": 8.740550282681695e-06, "loss": 0.2386, "step": 1118500 }, { "epoch": 11.4, "learning_rate": 8.735870388273852e-06, "loss": 0.2897, "step": 1118600 }, { "epoch": 11.4, "learning_rate": 8.731191533555051e-06, "loss": 0.2677, "step": 1118700 }, { "epoch": 11.4, "learning_rate": 8.726513718754055e-06, "loss": 0.2415, "step": 1118800 }, { "epoch": 11.4, "learning_rate": 8.721836944099588e-06, "loss": 0.2088, "step": 1118900 }, { "epoch": 11.4, "learning_rate": 8.717161209820319e-06, "loss": 0.2467, "step": 1119000 }, { "epoch": 11.4, "learning_rate": 8.712486516144851e-06, "loss": 0.2554, "step": 1119100 }, { "epoch": 11.4, "learning_rate": 8.707812863301753e-06, "loss": 0.2665, "step": 1119200 }, { "epoch": 11.4, "learning_rate": 8.703140251519553e-06, "loss": 0.2061, "step": 1119300 }, { "epoch": 11.4, "learning_rate": 8.698515391576482e-06, "loss": 0.2945, "step": 1119400 }, { "epoch": 11.41, "learning_rate": 8.69384485218508e-06, "loss": 0.2303, "step": 1119500 }, { "epoch": 11.41, "learning_rate": 8.68917535453751e-06, "loss": 0.2162, "step": 1119600 }, { "epoch": 11.41, "learning_rate": 8.684506898862085e-06, "loss": 0.24, "step": 1119700 }, { "epoch": 11.41, "learning_rate": 8.679839485387082e-06, "loss": 0.2839, "step": 1119800 }, { "epoch": 11.41, "learning_rate": 8.675173114340683e-06, "loss": 0.2601, "step": 1119900 }, { "epoch": 11.41, "learning_rate": 8.670507785951062e-06, "loss": 0.2738, "step": 1120000 }, { "epoch": 11.41, "learning_rate": 8.665843500446319e-06, "loss": 0.2245, "step": 1120100 }, { "epoch": 11.41, "learning_rate": 8.661180258054515e-06, "loss": 0.2389, "step": 1120200 }, { "epoch": 11.41, "learning_rate": 8.656518059003655e-06, "loss": 0.2317, "step": 1120300 }, { "epoch": 11.41, "learning_rate": 8.651856903521697e-06, "loss": 0.242, "step": 1120400 }, { "epoch": 11.42, "learning_rate": 8.64719679183653e-06, "loss": 0.3016, "step": 1120500 }, { "epoch": 11.42, "learning_rate": 8.64253772417601e-06, "loss": 0.2405, "step": 1120600 }, { "epoch": 11.42, "learning_rate": 8.637879700767945e-06, "loss": 0.2584, "step": 1120700 }, { "epoch": 11.42, "learning_rate": 8.633222721840086e-06, "loss": 0.2246, "step": 1120800 }, { "epoch": 11.42, "learning_rate": 8.62856678762012e-06, "loss": 0.2455, "step": 1120900 }, { "epoch": 11.42, "learning_rate": 8.623911898335696e-06, "loss": 0.2441, "step": 1121000 }, { "epoch": 11.42, "learning_rate": 8.619258054214428e-06, "loss": 0.2857, "step": 1121100 }, { "epoch": 11.42, "learning_rate": 8.614605255483833e-06, "loss": 0.2593, "step": 1121200 }, { "epoch": 11.42, "learning_rate": 8.609953502371423e-06, "loss": 0.2909, "step": 1121300 }, { "epoch": 11.43, "learning_rate": 8.605302795104638e-06, "loss": 0.2012, "step": 1121400 }, { "epoch": 11.43, "learning_rate": 8.600653133910868e-06, "loss": 0.2331, "step": 1121500 }, { "epoch": 11.43, "learning_rate": 8.596004519017453e-06, "loss": 0.2169, "step": 1121600 }, { "epoch": 11.43, "learning_rate": 8.591356950651696e-06, "loss": 0.2651, "step": 1121700 }, { "epoch": 11.43, "learning_rate": 8.58675688907474e-06, "loss": 0.211, "step": 1121800 }, { "epoch": 11.43, "learning_rate": 8.582157853642692e-06, "loss": 0.262, "step": 1121900 }, { "epoch": 11.43, "learning_rate": 8.57751340527668e-06, "loss": 0.1897, "step": 1122000 }, { "epoch": 11.43, "learning_rate": 8.57287000434243e-06, "loss": 0.2229, "step": 1122100 }, { "epoch": 11.43, "learning_rate": 8.56822765106696e-06, "loss": 0.225, "step": 1122200 }, { "epoch": 11.43, "learning_rate": 8.563586345677258e-06, "loss": 0.1998, "step": 1122300 }, { "epoch": 11.44, "learning_rate": 8.558946088400266e-06, "loss": 0.2715, "step": 1122400 }, { "epoch": 11.44, "learning_rate": 8.554306879462849e-06, "loss": 0.221, "step": 1122500 }, { "epoch": 11.44, "learning_rate": 8.549668719091842e-06, "loss": 0.237, "step": 1122600 }, { "epoch": 11.44, "learning_rate": 8.545031607514027e-06, "loss": 0.2734, "step": 1122700 }, { "epoch": 11.44, "learning_rate": 8.540395544956131e-06, "loss": 0.2586, "step": 1122800 }, { "epoch": 11.44, "learning_rate": 8.535760531644826e-06, "loss": 0.2386, "step": 1122900 }, { "epoch": 11.44, "learning_rate": 8.531126567806752e-06, "loss": 0.2444, "step": 1123000 }, { "epoch": 11.44, "learning_rate": 8.526493653668457e-06, "loss": 0.3106, "step": 1123100 }, { "epoch": 11.44, "learning_rate": 8.521861789456478e-06, "loss": 0.2626, "step": 1123200 }, { "epoch": 11.44, "learning_rate": 8.51723097539729e-06, "loss": 0.2389, "step": 1123300 }, { "epoch": 11.45, "learning_rate": 8.512601211717296e-06, "loss": 0.2402, "step": 1123400 }, { "epoch": 11.45, "learning_rate": 8.50797249864287e-06, "loss": 0.232, "step": 1123500 }, { "epoch": 11.45, "learning_rate": 8.503344836400338e-06, "loss": 0.256, "step": 1123600 }, { "epoch": 11.45, "learning_rate": 8.498718225215962e-06, "loss": 0.2544, "step": 1123700 }, { "epoch": 11.45, "learning_rate": 8.49409266531594e-06, "loss": 0.2962, "step": 1123800 }, { "epoch": 11.45, "learning_rate": 8.489468156926453e-06, "loss": 0.2529, "step": 1123900 }, { "epoch": 11.45, "learning_rate": 8.484844700273599e-06, "loss": 0.1944, "step": 1124000 }, { "epoch": 11.45, "learning_rate": 8.480222295583449e-06, "loss": 0.2254, "step": 1124100 }, { "epoch": 11.45, "learning_rate": 8.475600943082003e-06, "loss": 0.2691, "step": 1124200 }, { "epoch": 11.45, "learning_rate": 8.470980642995227e-06, "loss": 0.2943, "step": 1124300 }, { "epoch": 11.46, "learning_rate": 8.466407582812158e-06, "loss": 0.276, "step": 1124400 }, { "epoch": 11.46, "learning_rate": 8.461789377702592e-06, "loss": 0.2427, "step": 1124500 }, { "epoch": 11.46, "learning_rate": 8.457172225682983e-06, "loss": 0.2672, "step": 1124600 }, { "epoch": 11.46, "learning_rate": 8.45255612697909e-06, "loss": 0.2455, "step": 1124700 }, { "epoch": 11.46, "learning_rate": 8.447941081816623e-06, "loss": 0.2612, "step": 1124800 }, { "epoch": 11.46, "learning_rate": 8.443327090421214e-06, "loss": 0.2799, "step": 1124900 }, { "epoch": 11.46, "learning_rate": 8.438714153018465e-06, "loss": 0.2344, "step": 1125000 }, { "epoch": 11.46, "learning_rate": 8.434102269833923e-06, "loss": 0.2661, "step": 1125100 }, { "epoch": 11.46, "learning_rate": 8.429491441093081e-06, "loss": 0.2576, "step": 1125200 }, { "epoch": 11.46, "learning_rate": 8.424881667021383e-06, "loss": 0.2211, "step": 1125300 }, { "epoch": 11.47, "learning_rate": 8.420272947844228e-06, "loss": 0.2409, "step": 1125400 }, { "epoch": 11.47, "learning_rate": 8.415665283786933e-06, "loss": 0.2915, "step": 1125500 }, { "epoch": 11.47, "learning_rate": 8.411058675074798e-06, "loss": 0.2499, "step": 1125600 }, { "epoch": 11.47, "learning_rate": 8.406453121933069e-06, "loss": 0.2381, "step": 1125700 }, { "epoch": 11.47, "learning_rate": 8.401848624586906e-06, "loss": 0.2692, "step": 1125800 }, { "epoch": 11.47, "learning_rate": 8.397245183261458e-06, "loss": 0.201, "step": 1125900 }, { "epoch": 11.47, "learning_rate": 8.392642798181797e-06, "loss": 0.2577, "step": 1126000 }, { "epoch": 11.47, "learning_rate": 8.38804146957297e-06, "loss": 0.2479, "step": 1126100 }, { "epoch": 11.47, "learning_rate": 8.383441197659933e-06, "loss": 0.2651, "step": 1126200 }, { "epoch": 11.47, "learning_rate": 8.37884198266762e-06, "loss": 0.2586, "step": 1126300 }, { "epoch": 11.48, "learning_rate": 8.374243824820904e-06, "loss": 0.2045, "step": 1126400 }, { "epoch": 11.48, "learning_rate": 8.369646724344612e-06, "loss": 0.2677, "step": 1126500 }, { "epoch": 11.48, "learning_rate": 8.36505068146351e-06, "loss": 0.2193, "step": 1126600 }, { "epoch": 11.48, "learning_rate": 8.360455696402328e-06, "loss": 0.241, "step": 1126700 }, { "epoch": 11.48, "learning_rate": 8.355861769385714e-06, "loss": 0.2096, "step": 1126800 }, { "epoch": 11.48, "learning_rate": 8.3512689006383e-06, "loss": 0.2663, "step": 1126900 }, { "epoch": 11.48, "learning_rate": 8.346677090384648e-06, "loss": 0.215, "step": 1127000 }, { "epoch": 11.48, "learning_rate": 8.342086338849257e-06, "loss": 0.1918, "step": 1127100 }, { "epoch": 11.48, "learning_rate": 8.337496646256594e-06, "loss": 0.2514, "step": 1127200 }, { "epoch": 11.49, "learning_rate": 8.332908012831083e-06, "loss": 0.2513, "step": 1127300 }, { "epoch": 11.49, "learning_rate": 8.328320438797059e-06, "loss": 0.2373, "step": 1127400 }, { "epoch": 11.49, "learning_rate": 8.323733924378834e-06, "loss": 0.264, "step": 1127500 }, { "epoch": 11.49, "learning_rate": 8.319148469800664e-06, "loss": 0.217, "step": 1127600 }, { "epoch": 11.49, "learning_rate": 8.31456407528675e-06, "loss": 0.2766, "step": 1127700 }, { "epoch": 11.49, "learning_rate": 8.309980741061237e-06, "loss": 0.3015, "step": 1127800 }, { "epoch": 11.49, "learning_rate": 8.30539846734823e-06, "loss": 0.2212, "step": 1127900 }, { "epoch": 11.49, "learning_rate": 8.300817254371778e-06, "loss": 0.2584, "step": 1128000 }, { "epoch": 11.49, "learning_rate": 8.296237102355861e-06, "loss": 0.2558, "step": 1128100 }, { "epoch": 11.49, "learning_rate": 8.291658011524428e-06, "loss": 0.2327, "step": 1128200 }, { "epoch": 11.5, "learning_rate": 8.287079982101378e-06, "loss": 0.2482, "step": 1128300 }, { "epoch": 11.5, "learning_rate": 8.282503014310536e-06, "loss": 0.2373, "step": 1128400 }, { "epoch": 11.5, "learning_rate": 8.277927108375688e-06, "loss": 0.2513, "step": 1128500 }, { "epoch": 11.5, "learning_rate": 8.273398007701102e-06, "loss": 0.2712, "step": 1128600 }, { "epoch": 11.5, "learning_rate": 8.268824215525265e-06, "loss": 0.2207, "step": 1128700 }, { "epoch": 11.5, "learning_rate": 8.264297207910522e-06, "loss": 0.2049, "step": 1128800 }, { "epoch": 11.5, "learning_rate": 8.259725530379295e-06, "loss": 0.2752, "step": 1128900 }, { "epoch": 11.5, "learning_rate": 8.255154915817761e-06, "loss": 0.2713, "step": 1129000 }, { "epoch": 11.5, "learning_rate": 8.250585364449367e-06, "loss": 0.3167, "step": 1129100 }, { "epoch": 11.5, "learning_rate": 8.246016876497563e-06, "loss": 0.3313, "step": 1129200 }, { "epoch": 11.51, "learning_rate": 8.241449452185721e-06, "loss": 0.1974, "step": 1129300 }, { "epoch": 11.51, "learning_rate": 8.236883091737145e-06, "loss": 0.2334, "step": 1129400 }, { "epoch": 11.51, "learning_rate": 8.232317795375112e-06, "loss": 0.2275, "step": 1129500 }, { "epoch": 11.51, "learning_rate": 8.227753563322842e-06, "loss": 0.2314, "step": 1129600 }, { "epoch": 11.51, "learning_rate": 8.223190395803484e-06, "loss": 0.218, "step": 1129700 }, { "epoch": 11.51, "learning_rate": 8.218628293040158e-06, "loss": 0.2637, "step": 1129800 }, { "epoch": 11.51, "learning_rate": 8.214067255255936e-06, "loss": 0.226, "step": 1129900 }, { "epoch": 11.51, "learning_rate": 8.209507282673803e-06, "loss": 0.3224, "step": 1130000 }, { "epoch": 11.51, "learning_rate": 8.204948375516724e-06, "loss": 0.2724, "step": 1130100 }, { "epoch": 11.51, "learning_rate": 8.200390534007613e-06, "loss": 0.2374, "step": 1130200 }, { "epoch": 11.52, "learning_rate": 8.195833758369291e-06, "loss": 0.2553, "step": 1130300 }, { "epoch": 11.52, "learning_rate": 8.191278048824592e-06, "loss": 0.2486, "step": 1130400 }, { "epoch": 11.52, "learning_rate": 8.186723405596257e-06, "loss": 0.2825, "step": 1130500 }, { "epoch": 11.52, "learning_rate": 8.182169828906958e-06, "loss": 0.2323, "step": 1130600 }, { "epoch": 11.52, "learning_rate": 8.17761731897936e-06, "loss": 0.2573, "step": 1130700 }, { "epoch": 11.52, "learning_rate": 8.173065876036042e-06, "loss": 0.2436, "step": 1130800 }, { "epoch": 11.52, "learning_rate": 8.168515500299554e-06, "loss": 0.2826, "step": 1130900 }, { "epoch": 11.52, "learning_rate": 8.163966191992368e-06, "loss": 0.2274, "step": 1131000 }, { "epoch": 11.52, "learning_rate": 8.159417951336923e-06, "loss": 0.2045, "step": 1131100 }, { "epoch": 11.52, "learning_rate": 8.154870778555614e-06, "loss": 0.2427, "step": 1131200 }, { "epoch": 11.53, "learning_rate": 8.150324673870747e-06, "loss": 0.2535, "step": 1131300 }, { "epoch": 11.53, "learning_rate": 8.145779637504611e-06, "loss": 0.2387, "step": 1131400 }, { "epoch": 11.53, "learning_rate": 8.141235669679444e-06, "loss": 0.2663, "step": 1131500 }, { "epoch": 11.53, "learning_rate": 8.136692770617385e-06, "loss": 0.2248, "step": 1131600 }, { "epoch": 11.53, "learning_rate": 8.132150940540587e-06, "loss": 0.2392, "step": 1131700 }, { "epoch": 11.53, "learning_rate": 8.127610179671115e-06, "loss": 0.2513, "step": 1131800 }, { "epoch": 11.53, "learning_rate": 8.12307048823097e-06, "loss": 0.2371, "step": 1131900 }, { "epoch": 11.53, "learning_rate": 8.118531866442125e-06, "loss": 0.2454, "step": 1132000 }, { "epoch": 11.53, "learning_rate": 8.113994314526497e-06, "loss": 0.2625, "step": 1132100 }, { "epoch": 11.54, "learning_rate": 8.10945783270593e-06, "loss": 0.2332, "step": 1132200 }, { "epoch": 11.54, "learning_rate": 8.104922421202238e-06, "loss": 0.2195, "step": 1132300 }, { "epoch": 11.54, "learning_rate": 8.100388080237176e-06, "loss": 0.2015, "step": 1132400 }, { "epoch": 11.54, "learning_rate": 8.095854810032457e-06, "loss": 0.2598, "step": 1132500 }, { "epoch": 11.54, "learning_rate": 8.091322610809712e-06, "loss": 0.1707, "step": 1132600 }, { "epoch": 11.54, "learning_rate": 8.086791482790547e-06, "loss": 0.2585, "step": 1132700 }, { "epoch": 11.54, "learning_rate": 8.082261426196513e-06, "loss": 0.2203, "step": 1132800 }, { "epoch": 11.54, "learning_rate": 8.077732441249084e-06, "loss": 0.3006, "step": 1132900 }, { "epoch": 11.54, "learning_rate": 8.07320452816972e-06, "loss": 0.2512, "step": 1133000 }, { "epoch": 11.54, "learning_rate": 8.06867768717981e-06, "loss": 0.1911, "step": 1133100 }, { "epoch": 11.55, "learning_rate": 8.064151918500677e-06, "loss": 0.2379, "step": 1133200 }, { "epoch": 11.55, "learning_rate": 8.059627222353605e-06, "loss": 0.263, "step": 1133300 }, { "epoch": 11.55, "learning_rate": 8.05510359895984e-06, "loss": 0.2785, "step": 1133400 }, { "epoch": 11.55, "learning_rate": 8.050581048540538e-06, "loss": 0.2055, "step": 1133500 }, { "epoch": 11.55, "learning_rate": 8.046059571316837e-06, "loss": 0.2352, "step": 1133600 }, { "epoch": 11.55, "learning_rate": 8.041539167509816e-06, "loss": 0.2152, "step": 1133700 }, { "epoch": 11.55, "learning_rate": 8.037019837340481e-06, "loss": 0.2434, "step": 1133800 }, { "epoch": 11.55, "learning_rate": 8.032501581029805e-06, "loss": 0.2359, "step": 1133900 }, { "epoch": 11.55, "learning_rate": 8.027984398798719e-06, "loss": 0.226, "step": 1134000 }, { "epoch": 11.55, "learning_rate": 8.023513446628853e-06, "loss": 0.2821, "step": 1134100 }, { "epoch": 11.56, "learning_rate": 8.018998402473143e-06, "loss": 0.2621, "step": 1134200 }, { "epoch": 11.56, "learning_rate": 8.014484433057235e-06, "loss": 0.2528, "step": 1134300 }, { "epoch": 11.56, "learning_rate": 8.009971538601836e-06, "loss": 0.2007, "step": 1134400 }, { "epoch": 11.56, "learning_rate": 8.00545971932761e-06, "loss": 0.3308, "step": 1134500 }, { "epoch": 11.56, "learning_rate": 8.000948975455136e-06, "loss": 0.2429, "step": 1134600 }, { "epoch": 11.56, "learning_rate": 7.996439307204972e-06, "loss": 0.2068, "step": 1134700 }, { "epoch": 11.56, "learning_rate": 7.99193071479762e-06, "loss": 0.2598, "step": 1134800 }, { "epoch": 11.56, "learning_rate": 7.987423198453525e-06, "loss": 0.2464, "step": 1134900 }, { "epoch": 11.56, "learning_rate": 7.982916758393063e-06, "loss": 0.2525, "step": 1135000 }, { "epoch": 11.56, "learning_rate": 7.978411394836582e-06, "loss": 0.1883, "step": 1135100 }, { "epoch": 11.57, "learning_rate": 7.973907108004372e-06, "loss": 0.2542, "step": 1135200 }, { "epoch": 11.57, "learning_rate": 7.96940389811665e-06, "loss": 0.23, "step": 1135300 }, { "epoch": 11.57, "learning_rate": 7.96490176539361e-06, "loss": 0.2751, "step": 1135400 }, { "epoch": 11.57, "learning_rate": 7.960400710055369e-06, "loss": 0.2053, "step": 1135500 }, { "epoch": 11.57, "learning_rate": 7.955900732322012e-06, "loss": 0.2265, "step": 1135600 }, { "epoch": 11.57, "learning_rate": 7.951401832413558e-06, "loss": 0.2861, "step": 1135700 }, { "epoch": 11.57, "learning_rate": 7.946904010549983e-06, "loss": 0.3055, "step": 1135800 }, { "epoch": 11.57, "learning_rate": 7.942407266951184e-06, "loss": 0.2429, "step": 1135900 }, { "epoch": 11.57, "learning_rate": 7.937911601837039e-06, "loss": 0.2211, "step": 1136000 }, { "epoch": 11.57, "learning_rate": 7.933417015427366e-06, "loss": 0.2417, "step": 1136100 }, { "epoch": 11.58, "learning_rate": 7.928923507941905e-06, "loss": 0.2095, "step": 1136200 }, { "epoch": 11.58, "learning_rate": 7.924431079600373e-06, "loss": 0.2881, "step": 1136300 }, { "epoch": 11.58, "learning_rate": 7.919939730622428e-06, "loss": 0.2396, "step": 1136400 }, { "epoch": 11.58, "learning_rate": 7.915449461227657e-06, "loss": 0.2454, "step": 1136500 }, { "epoch": 11.58, "learning_rate": 7.91096027163561e-06, "loss": 0.2346, "step": 1136600 }, { "epoch": 11.58, "learning_rate": 7.906472162065788e-06, "loss": 0.2291, "step": 1136700 }, { "epoch": 11.58, "learning_rate": 7.901985132737627e-06, "loss": 0.2471, "step": 1136800 }, { "epoch": 11.58, "learning_rate": 7.897499183870523e-06, "loss": 0.2177, "step": 1136900 }, { "epoch": 11.58, "learning_rate": 7.893014315683805e-06, "loss": 0.263, "step": 1137000 }, { "epoch": 11.58, "learning_rate": 7.888530528396769e-06, "loss": 0.239, "step": 1137100 }, { "epoch": 11.59, "learning_rate": 7.884047822228629e-06, "loss": 0.2319, "step": 1137200 }, { "epoch": 11.59, "learning_rate": 7.879566197398564e-06, "loss": 0.2267, "step": 1137300 }, { "epoch": 11.59, "learning_rate": 7.875130454204015e-06, "loss": 0.3413, "step": 1137400 }, { "epoch": 11.59, "learning_rate": 7.870650981888587e-06, "loss": 0.2029, "step": 1137500 }, { "epoch": 11.59, "learning_rate": 7.866172591566278e-06, "loss": 0.2635, "step": 1137600 }, { "epoch": 11.59, "learning_rate": 7.86169528345603e-06, "loss": 0.2751, "step": 1137700 }, { "epoch": 11.59, "learning_rate": 7.857219057776768e-06, "loss": 0.2769, "step": 1137800 }, { "epoch": 11.59, "learning_rate": 7.852743914747361e-06, "loss": 0.175, "step": 1137900 }, { "epoch": 11.59, "learning_rate": 7.848269854586602e-06, "loss": 0.2576, "step": 1138000 }, { "epoch": 11.6, "learning_rate": 7.843796877513252e-06, "loss": 0.2695, "step": 1138100 }, { "epoch": 11.6, "learning_rate": 7.83932498374601e-06, "loss": 0.2203, "step": 1138200 }, { "epoch": 11.6, "learning_rate": 7.834854173503531e-06, "loss": 0.2768, "step": 1138300 }, { "epoch": 11.6, "learning_rate": 7.830384447004408e-06, "loss": 0.2128, "step": 1138400 }, { "epoch": 11.6, "learning_rate": 7.825915804467195e-06, "loss": 0.2594, "step": 1138500 }, { "epoch": 11.6, "learning_rate": 7.821448246110358e-06, "loss": 0.2573, "step": 1138600 }, { "epoch": 11.6, "learning_rate": 7.816981772152351e-06, "loss": 0.2274, "step": 1138700 }, { "epoch": 11.6, "learning_rate": 7.81251638281156e-06, "loss": 0.234, "step": 1138800 }, { "epoch": 11.6, "learning_rate": 7.808052078306303e-06, "loss": 0.2165, "step": 1138900 }, { "epoch": 11.6, "learning_rate": 7.803588858854864e-06, "loss": 0.261, "step": 1139000 }, { "epoch": 11.61, "learning_rate": 7.799126724675471e-06, "loss": 0.2577, "step": 1139100 }, { "epoch": 11.61, "learning_rate": 7.794665675986298e-06, "loss": 0.2092, "step": 1139200 }, { "epoch": 11.61, "learning_rate": 7.790205713005451e-06, "loss": 0.2325, "step": 1139300 }, { "epoch": 11.61, "learning_rate": 7.785746835951005e-06, "loss": 0.2148, "step": 1139400 }, { "epoch": 11.61, "learning_rate": 7.781289045040971e-06, "loss": 0.248, "step": 1139500 }, { "epoch": 11.61, "learning_rate": 7.776832340493308e-06, "loss": 0.2333, "step": 1139600 }, { "epoch": 11.61, "learning_rate": 7.77237672252592e-06, "loss": 0.2843, "step": 1139700 }, { "epoch": 11.61, "learning_rate": 7.767966731287999e-06, "loss": 0.2384, "step": 1139800 }, { "epoch": 11.61, "learning_rate": 7.763513276263436e-06, "loss": 0.2442, "step": 1139900 }, { "epoch": 11.61, "learning_rate": 7.759060908470385e-06, "loss": 0.2434, "step": 1140000 }, { "epoch": 11.62, "learning_rate": 7.75460962812652e-06, "loss": 0.2053, "step": 1140100 }, { "epoch": 11.62, "learning_rate": 7.75015943544949e-06, "loss": 0.2155, "step": 1140200 }, { "epoch": 11.62, "learning_rate": 7.74571033065689e-06, "loss": 0.2313, "step": 1140300 }, { "epoch": 11.62, "learning_rate": 7.741262313966243e-06, "loss": 0.1823, "step": 1140400 }, { "epoch": 11.62, "learning_rate": 7.73681538559503e-06, "loss": 0.2079, "step": 1140500 }, { "epoch": 11.62, "learning_rate": 7.73236954576069e-06, "loss": 0.2357, "step": 1140600 }, { "epoch": 11.62, "learning_rate": 7.72792479468059e-06, "loss": 0.2409, "step": 1140700 }, { "epoch": 11.62, "learning_rate": 7.723481132572059e-06, "loss": 0.2401, "step": 1140800 }, { "epoch": 11.62, "learning_rate": 7.719038559652367e-06, "loss": 0.2646, "step": 1140900 }, { "epoch": 11.62, "learning_rate": 7.714641485580574e-06, "loss": 0.2994, "step": 1141000 }, { "epoch": 11.63, "learning_rate": 7.710201080792834e-06, "loss": 0.2026, "step": 1141100 }, { "epoch": 11.63, "learning_rate": 7.705761765843235e-06, "loss": 0.2892, "step": 1141200 }, { "epoch": 11.63, "learning_rate": 7.70132354094884e-06, "loss": 0.254, "step": 1141300 }, { "epoch": 11.63, "learning_rate": 7.69688640632666e-06, "loss": 0.1882, "step": 1141400 }, { "epoch": 11.63, "learning_rate": 7.69245036219363e-06, "loss": 0.2452, "step": 1141500 }, { "epoch": 11.63, "learning_rate": 7.688015408766653e-06, "loss": 0.2627, "step": 1141600 }, { "epoch": 11.63, "learning_rate": 7.68358154626258e-06, "loss": 0.2267, "step": 1141700 }, { "epoch": 11.63, "learning_rate": 7.679148774898182e-06, "loss": 0.2547, "step": 1141800 }, { "epoch": 11.63, "learning_rate": 7.674717094890202e-06, "loss": 0.239, "step": 1141900 }, { "epoch": 11.63, "learning_rate": 7.670286506455331e-06, "loss": 0.2257, "step": 1142000 }, { "epoch": 11.64, "learning_rate": 7.66585700981019e-06, "loss": 0.2617, "step": 1142100 }, { "epoch": 11.64, "learning_rate": 7.66142860517136e-06, "loss": 0.2619, "step": 1142200 }, { "epoch": 11.64, "learning_rate": 7.657001292755362e-06, "loss": 0.2801, "step": 1142300 }, { "epoch": 11.64, "learning_rate": 7.65257507277867e-06, "loss": 0.2248, "step": 1142400 }, { "epoch": 11.64, "learning_rate": 7.64814994545769e-06, "loss": 0.2726, "step": 1142500 }, { "epoch": 11.64, "learning_rate": 7.643725911008788e-06, "loss": 0.2284, "step": 1142600 }, { "epoch": 11.64, "learning_rate": 7.63930296964828e-06, "loss": 0.226, "step": 1142700 }, { "epoch": 11.64, "learning_rate": 7.634881121592408e-06, "loss": 0.2877, "step": 1142800 }, { "epoch": 11.64, "learning_rate": 7.630460367057382e-06, "loss": 0.2866, "step": 1142900 }, { "epoch": 11.65, "learning_rate": 7.626040706259356e-06, "loss": 0.3113, "step": 1143000 }, { "epoch": 11.65, "learning_rate": 7.621622139414409e-06, "loss": 0.2346, "step": 1143100 }, { "epoch": 11.65, "learning_rate": 7.617204666738594e-06, "loss": 0.2649, "step": 1143200 }, { "epoch": 11.65, "learning_rate": 7.612788288447898e-06, "loss": 0.2137, "step": 1143300 }, { "epoch": 11.65, "learning_rate": 7.608373004758255e-06, "loss": 0.2532, "step": 1143400 }, { "epoch": 11.65, "learning_rate": 7.6039588158855464e-06, "loss": 0.2321, "step": 1143500 }, { "epoch": 11.65, "learning_rate": 7.599545722045605e-06, "loss": 0.2109, "step": 1143600 }, { "epoch": 11.65, "learning_rate": 7.595133723454193e-06, "loss": 0.2093, "step": 1143700 }, { "epoch": 11.65, "learning_rate": 7.590722820327033e-06, "loss": 0.2685, "step": 1143800 }, { "epoch": 11.65, "learning_rate": 7.5863130128797994e-06, "loss": 0.2812, "step": 1143900 }, { "epoch": 11.66, "learning_rate": 7.581904301328107e-06, "loss": 0.2595, "step": 1144000 }, { "epoch": 11.66, "learning_rate": 7.577496685887504e-06, "loss": 0.2707, "step": 1144100 }, { "epoch": 11.66, "learning_rate": 7.573090166773503e-06, "loss": 0.2091, "step": 1144200 }, { "epoch": 11.66, "learning_rate": 7.568684744201563e-06, "loss": 0.219, "step": 1144300 }, { "epoch": 11.66, "learning_rate": 7.5642804183870674e-06, "loss": 0.2688, "step": 1144400 }, { "epoch": 11.66, "learning_rate": 7.559877189545371e-06, "loss": 0.2118, "step": 1144500 }, { "epoch": 11.66, "learning_rate": 7.555475057891762e-06, "loss": 0.2481, "step": 1144600 }, { "epoch": 11.66, "learning_rate": 7.551074023641484e-06, "loss": 0.1824, "step": 1144700 }, { "epoch": 11.66, "learning_rate": 7.546674087009716e-06, "loss": 0.2284, "step": 1144800 }, { "epoch": 11.66, "learning_rate": 7.542275248211599e-06, "loss": 0.1952, "step": 1144900 }, { "epoch": 11.67, "learning_rate": 7.537877507462193e-06, "loss": 0.2555, "step": 1145000 }, { "epoch": 11.67, "learning_rate": 7.533480864976531e-06, "loss": 0.2245, "step": 1145100 }, { "epoch": 11.67, "learning_rate": 7.52908532096959e-06, "loss": 0.2448, "step": 1145200 }, { "epoch": 11.67, "learning_rate": 7.524690875656271e-06, "loss": 0.2394, "step": 1145300 }, { "epoch": 11.67, "learning_rate": 7.520297529251437e-06, "loss": 0.2534, "step": 1145400 }, { "epoch": 11.67, "learning_rate": 7.515949199001361e-06, "loss": 0.2384, "step": 1145500 }, { "epoch": 11.67, "learning_rate": 7.511558040063441e-06, "loss": 0.1979, "step": 1145600 }, { "epoch": 11.67, "learning_rate": 7.507167980676125e-06, "loss": 0.2418, "step": 1145700 }, { "epoch": 11.67, "learning_rate": 7.502779021054068e-06, "loss": 0.2243, "step": 1145800 }, { "epoch": 11.67, "learning_rate": 7.498391161411858e-06, "loss": 0.2261, "step": 1145900 }, { "epoch": 11.68, "learning_rate": 7.494004401964041e-06, "loss": 0.3014, "step": 1146000 }, { "epoch": 11.68, "learning_rate": 7.489618742925107e-06, "loss": 0.2252, "step": 1146100 }, { "epoch": 11.68, "learning_rate": 7.485234184509469e-06, "loss": 0.2785, "step": 1146200 }, { "epoch": 11.68, "learning_rate": 7.480850726931523e-06, "loss": 0.2664, "step": 1146300 }, { "epoch": 11.68, "learning_rate": 7.476468370405599e-06, "loss": 0.2599, "step": 1146400 }, { "epoch": 11.68, "learning_rate": 7.472087115145947e-06, "loss": 0.273, "step": 1146500 }, { "epoch": 11.68, "learning_rate": 7.467706961366795e-06, "loss": 0.251, "step": 1146600 }, { "epoch": 11.68, "learning_rate": 7.463327909282315e-06, "loss": 0.2868, "step": 1146700 }, { "epoch": 11.68, "learning_rate": 7.458949959106602e-06, "loss": 0.267, "step": 1146800 }, { "epoch": 11.68, "learning_rate": 7.4545731110537164e-06, "loss": 0.259, "step": 1146900 }, { "epoch": 11.69, "learning_rate": 7.450197365337662e-06, "loss": 0.3567, "step": 1147000 }, { "epoch": 11.69, "learning_rate": 7.445822722172388e-06, "loss": 0.2657, "step": 1147100 }, { "epoch": 11.69, "learning_rate": 7.441449181771784e-06, "loss": 0.2561, "step": 1147200 }, { "epoch": 11.69, "learning_rate": 7.4370767443497025e-06, "loss": 0.2243, "step": 1147300 }, { "epoch": 11.69, "learning_rate": 7.432705410119911e-06, "loss": 0.2659, "step": 1147400 }, { "epoch": 11.69, "learning_rate": 7.428335179296149e-06, "loss": 0.2777, "step": 1147500 }, { "epoch": 11.69, "learning_rate": 7.423966052092107e-06, "loss": 0.2606, "step": 1147600 }, { "epoch": 11.69, "learning_rate": 7.419598028721389e-06, "loss": 0.2583, "step": 1147700 }, { "epoch": 11.69, "learning_rate": 7.415231109397576e-06, "loss": 0.2056, "step": 1147800 }, { "epoch": 11.7, "learning_rate": 7.410908947018029e-06, "loss": 0.2228, "step": 1147900 }, { "epoch": 11.7, "learning_rate": 7.40654422538272e-06, "loss": 0.2702, "step": 1148000 }, { "epoch": 11.7, "learning_rate": 7.402180608432577e-06, "loss": 0.2582, "step": 1148100 }, { "epoch": 11.7, "learning_rate": 7.397818096380937e-06, "loss": 0.2254, "step": 1148200 }, { "epoch": 11.7, "learning_rate": 7.39345668944111e-06, "loss": 0.2605, "step": 1148300 }, { "epoch": 11.7, "learning_rate": 7.389096387826343e-06, "loss": 0.2714, "step": 1148400 }, { "epoch": 11.7, "learning_rate": 7.384737191749827e-06, "loss": 0.2545, "step": 1148500 }, { "epoch": 11.7, "learning_rate": 7.380379101424699e-06, "loss": 0.2483, "step": 1148600 }, { "epoch": 11.7, "learning_rate": 7.376022117064051e-06, "loss": 0.271, "step": 1148700 }, { "epoch": 11.7, "learning_rate": 7.371666238880912e-06, "loss": 0.2313, "step": 1148800 }, { "epoch": 11.71, "learning_rate": 7.367311467088249e-06, "loss": 0.2377, "step": 1148900 }, { "epoch": 11.71, "learning_rate": 7.362957801898992e-06, "loss": 0.2541, "step": 1149000 }, { "epoch": 11.71, "learning_rate": 7.358605243526013e-06, "loss": 0.2491, "step": 1149100 }, { "epoch": 11.71, "learning_rate": 7.354253792182113e-06, "loss": 0.284, "step": 1149200 }, { "epoch": 11.71, "learning_rate": 7.349903448080061e-06, "loss": 0.2598, "step": 1149300 }, { "epoch": 11.71, "learning_rate": 7.34555421143257e-06, "loss": 0.2446, "step": 1149400 }, { "epoch": 11.71, "learning_rate": 7.341206082452274e-06, "loss": 0.2346, "step": 1149500 }, { "epoch": 11.71, "learning_rate": 7.3368590613517826e-06, "loss": 0.2387, "step": 1149600 }, { "epoch": 11.71, "learning_rate": 7.332513148343632e-06, "loss": 0.2272, "step": 1149700 }, { "epoch": 11.71, "learning_rate": 7.32816834364032e-06, "loss": 0.2424, "step": 1149800 }, { "epoch": 11.72, "learning_rate": 7.323824647454276e-06, "loss": 0.2689, "step": 1149900 }, { "epoch": 11.72, "learning_rate": 7.319482059997893e-06, "loss": 0.2854, "step": 1150000 }, { "epoch": 11.72, "learning_rate": 7.315140581483475e-06, "loss": 0.2393, "step": 1150100 }, { "epoch": 11.72, "learning_rate": 7.310800212123308e-06, "loss": 0.2883, "step": 1150200 }, { "epoch": 11.72, "learning_rate": 7.306460952129608e-06, "loss": 0.2099, "step": 1150300 }, { "epoch": 11.72, "learning_rate": 7.302166177725585e-06, "loss": 0.1915, "step": 1150400 }, { "epoch": 11.72, "learning_rate": 7.297829126002311e-06, "loss": 0.2302, "step": 1150500 }, { "epoch": 11.72, "learning_rate": 7.293493184279702e-06, "loss": 0.2065, "step": 1150600 }, { "epoch": 11.72, "learning_rate": 7.2891583527697735e-06, "loss": 0.2425, "step": 1150700 }, { "epoch": 11.72, "learning_rate": 7.284824631684468e-06, "loss": 0.225, "step": 1150800 }, { "epoch": 11.73, "learning_rate": 7.280492021235682e-06, "loss": 0.213, "step": 1150900 }, { "epoch": 11.73, "learning_rate": 7.276160521635252e-06, "loss": 0.2395, "step": 1151000 }, { "epoch": 11.73, "learning_rate": 7.271830133094966e-06, "loss": 0.2361, "step": 1151100 }, { "epoch": 11.73, "learning_rate": 7.267500855826559e-06, "loss": 0.2492, "step": 1151200 }, { "epoch": 11.73, "learning_rate": 7.263172690041693e-06, "loss": 0.2378, "step": 1151300 }, { "epoch": 11.73, "learning_rate": 7.2588456359519925e-06, "loss": 0.2649, "step": 1151400 }, { "epoch": 11.73, "learning_rate": 7.25451969376904e-06, "loss": 0.184, "step": 1151500 }, { "epoch": 11.73, "learning_rate": 7.250194863704327e-06, "loss": 0.2562, "step": 1151600 }, { "epoch": 11.73, "learning_rate": 7.245871145969319e-06, "loss": 0.2202, "step": 1151700 }, { "epoch": 11.73, "learning_rate": 7.2415485407754345e-06, "loss": 0.2377, "step": 1151800 }, { "epoch": 11.74, "learning_rate": 7.237227048334003e-06, "loss": 0.2551, "step": 1151900 }, { "epoch": 11.74, "learning_rate": 7.2329066688563265e-06, "loss": 0.2539, "step": 1152000 }, { "epoch": 11.74, "learning_rate": 7.228587402553655e-06, "loss": 0.2203, "step": 1152100 }, { "epoch": 11.74, "learning_rate": 7.22426924963715e-06, "loss": 0.2048, "step": 1152200 }, { "epoch": 11.74, "learning_rate": 7.219952210317966e-06, "loss": 0.2594, "step": 1152300 }, { "epoch": 11.74, "learning_rate": 7.2156362848071876e-06, "loss": 0.2247, "step": 1152400 }, { "epoch": 11.74, "learning_rate": 7.211321473315813e-06, "loss": 0.246, "step": 1152500 }, { "epoch": 11.74, "learning_rate": 7.207007776054826e-06, "loss": 0.186, "step": 1152600 }, { "epoch": 11.74, "learning_rate": 7.202695193235135e-06, "loss": 0.2458, "step": 1152700 }, { "epoch": 11.74, "learning_rate": 7.198383725067611e-06, "loss": 0.2704, "step": 1152800 }, { "epoch": 11.75, "learning_rate": 7.194073371763039e-06, "loss": 0.2351, "step": 1152900 }, { "epoch": 11.75, "learning_rate": 7.189764133532184e-06, "loss": 0.2608, "step": 1153000 }, { "epoch": 11.75, "learning_rate": 7.185456010585747e-06, "loss": 0.1851, "step": 1153100 }, { "epoch": 11.75, "learning_rate": 7.181149003134349e-06, "loss": 0.2579, "step": 1153200 }, { "epoch": 11.75, "learning_rate": 7.176843111388593e-06, "loss": 0.2511, "step": 1153300 }, { "epoch": 11.75, "learning_rate": 7.172538335559013e-06, "loss": 0.2934, "step": 1153400 }, { "epoch": 11.75, "learning_rate": 7.168234675856065e-06, "loss": 0.2121, "step": 1153500 }, { "epoch": 11.75, "learning_rate": 7.1639321324901985e-06, "loss": 0.2278, "step": 1153600 }, { "epoch": 11.75, "learning_rate": 7.15963070567178e-06, "loss": 0.2686, "step": 1153700 }, { "epoch": 11.76, "learning_rate": 7.155330395611105e-06, "loss": 0.2226, "step": 1153800 }, { "epoch": 11.76, "learning_rate": 7.151031202518448e-06, "loss": 0.269, "step": 1153900 }, { "epoch": 11.76, "learning_rate": 7.146733126604014e-06, "loss": 0.283, "step": 1154000 }, { "epoch": 11.76, "learning_rate": 7.142436168077942e-06, "loss": 0.2585, "step": 1154100 }, { "epoch": 11.76, "learning_rate": 7.138140327150336e-06, "loss": 0.2466, "step": 1154200 }, { "epoch": 11.76, "learning_rate": 7.133888545728589e-06, "loss": 0.2525, "step": 1154300 }, { "epoch": 11.76, "learning_rate": 7.129594929446754e-06, "loss": 0.2082, "step": 1154400 }, { "epoch": 11.76, "learning_rate": 7.1253024313912515e-06, "loss": 0.2671, "step": 1154500 }, { "epoch": 11.76, "learning_rate": 7.1210110517719465e-06, "loss": 0.231, "step": 1154600 }, { "epoch": 11.76, "learning_rate": 7.116720790798663e-06, "loss": 0.2435, "step": 1154700 }, { "epoch": 11.77, "learning_rate": 7.1124316486811715e-06, "loss": 0.2008, "step": 1154800 }, { "epoch": 11.77, "learning_rate": 7.1081436256291896e-06, "loss": 0.2499, "step": 1154900 }, { "epoch": 11.77, "learning_rate": 7.103856721852371e-06, "loss": 0.2598, "step": 1155000 }, { "epoch": 11.77, "learning_rate": 7.099570937560324e-06, "loss": 0.229, "step": 1155100 }, { "epoch": 11.77, "learning_rate": 7.095286272962604e-06, "loss": 0.2004, "step": 1155200 }, { "epoch": 11.77, "learning_rate": 7.091002728268687e-06, "loss": 0.2631, "step": 1155300 }, { "epoch": 11.77, "learning_rate": 7.086720303688021e-06, "loss": 0.3411, "step": 1155400 }, { "epoch": 11.77, "learning_rate": 7.082438999430005e-06, "loss": 0.3084, "step": 1155500 }, { "epoch": 11.77, "learning_rate": 7.078158815703944e-06, "loss": 0.3051, "step": 1155600 }, { "epoch": 11.77, "learning_rate": 7.073879752719131e-06, "loss": 0.2377, "step": 1155700 }, { "epoch": 11.78, "learning_rate": 7.06960181068479e-06, "loss": 0.2755, "step": 1155800 }, { "epoch": 11.78, "learning_rate": 7.065324989810068e-06, "loss": 0.2336, "step": 1155900 }, { "epoch": 11.78, "learning_rate": 7.0610492903040904e-06, "loss": 0.204, "step": 1156000 }, { "epoch": 11.78, "learning_rate": 7.056774712375906e-06, "loss": 0.3013, "step": 1156100 }, { "epoch": 11.78, "learning_rate": 7.052501256234522e-06, "loss": 0.2581, "step": 1156200 }, { "epoch": 11.78, "learning_rate": 7.048228922088885e-06, "loss": 0.1816, "step": 1156300 }, { "epoch": 11.78, "learning_rate": 7.043957710147894e-06, "loss": 0.2596, "step": 1156400 }, { "epoch": 11.78, "learning_rate": 7.0396876206203645e-06, "loss": 0.2262, "step": 1156500 }, { "epoch": 11.78, "learning_rate": 7.03541865371509e-06, "loss": 0.2667, "step": 1156600 }, { "epoch": 11.78, "learning_rate": 7.0311508096408e-06, "loss": 0.2018, "step": 1156700 }, { "epoch": 11.79, "learning_rate": 7.026884088606173e-06, "loss": 0.2791, "step": 1156800 }, { "epoch": 11.79, "learning_rate": 7.022618490819809e-06, "loss": 0.2138, "step": 1156900 }, { "epoch": 11.79, "learning_rate": 7.018354016490276e-06, "loss": 0.3167, "step": 1157000 }, { "epoch": 11.79, "learning_rate": 7.0140906658260975e-06, "loss": 0.2574, "step": 1157100 }, { "epoch": 11.79, "learning_rate": 7.009828439035703e-06, "loss": 0.248, "step": 1157200 }, { "epoch": 11.79, "learning_rate": 7.0055673363274955e-06, "loss": 0.261, "step": 1157300 }, { "epoch": 11.79, "learning_rate": 7.001307357909825e-06, "loss": 0.2317, "step": 1157400 }, { "epoch": 11.79, "learning_rate": 6.9970485039909756e-06, "loss": 0.2397, "step": 1157500 }, { "epoch": 11.79, "learning_rate": 6.99283334650332e-06, "loss": 0.238, "step": 1157600 }, { "epoch": 11.79, "learning_rate": 6.988576730956572e-06, "loss": 0.2373, "step": 1157700 }, { "epoch": 11.8, "learning_rate": 6.9843212405311e-06, "loss": 0.246, "step": 1157800 }, { "epoch": 11.8, "learning_rate": 6.980066875434974e-06, "loss": 0.2024, "step": 1157900 }, { "epoch": 11.8, "learning_rate": 6.975813635876194e-06, "loss": 0.3501, "step": 1158000 }, { "epoch": 11.8, "learning_rate": 6.971561522062726e-06, "loss": 0.2775, "step": 1158100 }, { "epoch": 11.8, "learning_rate": 6.9673105342024846e-06, "loss": 0.2071, "step": 1158200 }, { "epoch": 11.8, "learning_rate": 6.963060672503294e-06, "loss": 0.2182, "step": 1158300 }, { "epoch": 11.8, "learning_rate": 6.958811937172962e-06, "loss": 0.2439, "step": 1158400 }, { "epoch": 11.8, "learning_rate": 6.9545643284192284e-06, "loss": 0.2539, "step": 1158500 }, { "epoch": 11.8, "learning_rate": 6.950317846449764e-06, "loss": 0.2314, "step": 1158600 }, { "epoch": 11.81, "learning_rate": 6.946072491472206e-06, "loss": 0.2474, "step": 1158700 }, { "epoch": 11.81, "learning_rate": 6.941828263694122e-06, "loss": 0.2692, "step": 1158800 }, { "epoch": 11.81, "learning_rate": 6.9375851633230354e-06, "loss": 0.2201, "step": 1158900 }, { "epoch": 11.81, "learning_rate": 6.933343190566401e-06, "loss": 0.2004, "step": 1159000 }, { "epoch": 11.81, "learning_rate": 6.929102345631633e-06, "loss": 0.2893, "step": 1159100 }, { "epoch": 11.81, "learning_rate": 6.924862628726088e-06, "loss": 0.3181, "step": 1159200 }, { "epoch": 11.81, "learning_rate": 6.920624040057046e-06, "loss": 0.238, "step": 1159300 }, { "epoch": 11.81, "learning_rate": 6.916386579831757e-06, "loss": 0.2508, "step": 1159400 }, { "epoch": 11.81, "learning_rate": 6.912150248257417e-06, "loss": 0.212, "step": 1159500 }, { "epoch": 11.81, "learning_rate": 6.907915045541145e-06, "loss": 0.2471, "step": 1159600 }, { "epoch": 11.82, "learning_rate": 6.903680971890016e-06, "loss": 0.2751, "step": 1159700 }, { "epoch": 11.82, "learning_rate": 6.8994480275110635e-06, "loss": 0.2897, "step": 1159800 }, { "epoch": 11.82, "learning_rate": 6.8952162126112414e-06, "loss": 0.3096, "step": 1159900 }, { "epoch": 11.82, "learning_rate": 6.890985527397461e-06, "loss": 0.2527, "step": 1160000 }, { "epoch": 11.82, "learning_rate": 6.886755972076583e-06, "loss": 0.2886, "step": 1160100 }, { "epoch": 11.82, "learning_rate": 6.882527546855405e-06, "loss": 0.3058, "step": 1160200 }, { "epoch": 11.82, "learning_rate": 6.8783002519406725e-06, "loss": 0.2037, "step": 1160300 }, { "epoch": 11.82, "learning_rate": 6.874074087539079e-06, "loss": 0.2693, "step": 1160400 }, { "epoch": 11.82, "learning_rate": 6.869849053857251e-06, "loss": 0.2396, "step": 1160500 }, { "epoch": 11.82, "learning_rate": 6.865625151101769e-06, "loss": 0.1944, "step": 1160600 }, { "epoch": 11.83, "learning_rate": 6.861402379479167e-06, "loss": 0.2037, "step": 1160700 }, { "epoch": 11.83, "learning_rate": 6.857180739195894e-06, "loss": 0.2798, "step": 1160800 }, { "epoch": 11.83, "learning_rate": 6.852960230458375e-06, "loss": 0.2003, "step": 1160900 }, { "epoch": 11.83, "learning_rate": 6.848740853472968e-06, "loss": 0.2273, "step": 1161000 }, { "epoch": 11.83, "learning_rate": 6.844522608445981e-06, "loss": 0.2564, "step": 1161100 }, { "epoch": 11.83, "learning_rate": 6.840305495583646e-06, "loss": 0.2314, "step": 1161200 }, { "epoch": 11.83, "learning_rate": 6.836089515092162e-06, "loss": 0.2136, "step": 1161300 }, { "epoch": 11.83, "learning_rate": 6.831874667177666e-06, "loss": 0.2371, "step": 1161400 }, { "epoch": 11.83, "learning_rate": 6.827660952046238e-06, "loss": 0.2219, "step": 1161500 }, { "epoch": 11.83, "learning_rate": 6.823448369903903e-06, "loss": 0.2725, "step": 1161600 }, { "epoch": 11.84, "learning_rate": 6.8192369209566415e-06, "loss": 0.2493, "step": 1161700 }, { "epoch": 11.84, "learning_rate": 6.8150687029548005e-06, "loss": 0.1926, "step": 1161800 }, { "epoch": 11.84, "learning_rate": 6.81085950967827e-06, "loss": 0.2312, "step": 1161900 }, { "epoch": 11.84, "learning_rate": 6.806651450212314e-06, "loss": 0.2478, "step": 1162000 }, { "epoch": 11.84, "learning_rate": 6.8024445247626935e-06, "loss": 0.2449, "step": 1162100 }, { "epoch": 11.84, "learning_rate": 6.798238733535108e-06, "loss": 0.27, "step": 1162200 }, { "epoch": 11.84, "learning_rate": 6.794034076735178e-06, "loss": 0.2564, "step": 1162300 }, { "epoch": 11.84, "learning_rate": 6.789830554568496e-06, "loss": 0.2615, "step": 1162400 }, { "epoch": 11.84, "learning_rate": 6.78562816724059e-06, "loss": 0.2622, "step": 1162500 }, { "epoch": 11.84, "learning_rate": 6.781426914956932e-06, "loss": 0.2738, "step": 1162600 }, { "epoch": 11.85, "learning_rate": 6.777226797922932e-06, "loss": 0.2076, "step": 1162700 }, { "epoch": 11.85, "learning_rate": 6.773027816343965e-06, "loss": 0.2665, "step": 1162800 }, { "epoch": 11.85, "learning_rate": 6.768829970425321e-06, "loss": 0.2238, "step": 1162900 }, { "epoch": 11.85, "learning_rate": 6.764633260372252e-06, "loss": 0.2109, "step": 1163000 }, { "epoch": 11.85, "learning_rate": 6.7604376863899666e-06, "loss": 0.2511, "step": 1163100 }, { "epoch": 11.85, "learning_rate": 6.756243248683581e-06, "loss": 0.2269, "step": 1163200 }, { "epoch": 11.85, "learning_rate": 6.752049947458189e-06, "loss": 0.2819, "step": 1163300 }, { "epoch": 11.85, "learning_rate": 6.747857782918818e-06, "loss": 0.2323, "step": 1163400 }, { "epoch": 11.85, "learning_rate": 6.743666755270453e-06, "loss": 0.2073, "step": 1163500 }, { "epoch": 11.85, "learning_rate": 6.739476864717984e-06, "loss": 0.2277, "step": 1163600 }, { "epoch": 11.86, "learning_rate": 6.735288111466286e-06, "loss": 0.2034, "step": 1163700 }, { "epoch": 11.86, "learning_rate": 6.7311004957201606e-06, "loss": 0.2553, "step": 1163800 }, { "epoch": 11.86, "learning_rate": 6.726914017684361e-06, "loss": 0.2135, "step": 1163900 }, { "epoch": 11.86, "learning_rate": 6.722728677563581e-06, "loss": 0.2243, "step": 1164000 }, { "epoch": 11.86, "learning_rate": 6.718544475562463e-06, "loss": 0.2562, "step": 1164100 }, { "epoch": 11.86, "learning_rate": 6.714361411885576e-06, "loss": 0.2388, "step": 1164200 }, { "epoch": 11.86, "learning_rate": 6.710221300352551e-06, "loss": 0.2806, "step": 1164300 }, { "epoch": 11.86, "learning_rate": 6.7060405025493225e-06, "loss": 0.2225, "step": 1164400 }, { "epoch": 11.86, "learning_rate": 6.701860843681703e-06, "loss": 0.223, "step": 1164500 }, { "epoch": 11.87, "learning_rate": 6.6976823239540596e-06, "loss": 0.2382, "step": 1164600 }, { "epoch": 11.87, "learning_rate": 6.693504943570682e-06, "loss": 0.2327, "step": 1164700 }, { "epoch": 11.87, "learning_rate": 6.689328702735827e-06, "loss": 0.2279, "step": 1164800 }, { "epoch": 11.87, "learning_rate": 6.685153601653693e-06, "loss": 0.2106, "step": 1164900 }, { "epoch": 11.87, "learning_rate": 6.680979640528409e-06, "loss": 0.2345, "step": 1165000 }, { "epoch": 11.87, "learning_rate": 6.676806819564057e-06, "loss": 0.2598, "step": 1165100 }, { "epoch": 11.87, "learning_rate": 6.6726351389646634e-06, "loss": 0.1701, "step": 1165200 }, { "epoch": 11.87, "learning_rate": 6.668464598934203e-06, "loss": 0.2381, "step": 1165300 }, { "epoch": 11.87, "learning_rate": 6.664295199676589e-06, "loss": 0.2512, "step": 1165400 }, { "epoch": 11.87, "learning_rate": 6.660126941395687e-06, "loss": 0.2568, "step": 1165500 }, { "epoch": 11.88, "learning_rate": 6.655959824295287e-06, "loss": 0.2545, "step": 1165600 }, { "epoch": 11.88, "learning_rate": 6.651793848579139e-06, "loss": 0.2831, "step": 1165700 }, { "epoch": 11.88, "learning_rate": 6.64762901445094e-06, "loss": 0.2313, "step": 1165800 }, { "epoch": 11.88, "learning_rate": 6.6434653221143285e-06, "loss": 0.2157, "step": 1165900 }, { "epoch": 11.88, "learning_rate": 6.6393027717728685e-06, "loss": 0.2528, "step": 1166000 }, { "epoch": 11.88, "learning_rate": 6.635141363630094e-06, "loss": 0.1951, "step": 1166100 }, { "epoch": 11.88, "learning_rate": 6.630981097889482e-06, "loss": 0.2493, "step": 1166200 }, { "epoch": 11.88, "learning_rate": 6.626821974754428e-06, "loss": 0.3139, "step": 1166300 }, { "epoch": 11.88, "learning_rate": 6.62270556857398e-06, "loss": 0.2844, "step": 1166400 }, { "epoch": 11.88, "learning_rate": 6.618548719828942e-06, "loss": 0.2324, "step": 1166500 }, { "epoch": 11.89, "learning_rate": 6.614393014297334e-06, "loss": 0.2753, "step": 1166600 }, { "epoch": 11.89, "learning_rate": 6.610238452182361e-06, "loss": 0.2517, "step": 1166700 }, { "epoch": 11.89, "learning_rate": 6.606085033687137e-06, "loss": 0.2265, "step": 1166800 }, { "epoch": 11.89, "learning_rate": 6.601932759014746e-06, "loss": 0.2535, "step": 1166900 }, { "epoch": 11.89, "learning_rate": 6.597781628368219e-06, "loss": 0.1908, "step": 1167000 }, { "epoch": 11.89, "learning_rate": 6.593631641950507e-06, "loss": 0.2158, "step": 1167100 }, { "epoch": 11.89, "learning_rate": 6.589482799964525e-06, "loss": 0.2664, "step": 1167200 }, { "epoch": 11.89, "learning_rate": 6.585335102613132e-06, "loss": 0.2553, "step": 1167300 }, { "epoch": 11.89, "learning_rate": 6.581230009956642e-06, "loss": 0.2295, "step": 1167400 }, { "epoch": 11.89, "learning_rate": 6.577084591031352e-06, "loss": 0.2518, "step": 1167500 }, { "epoch": 11.9, "learning_rate": 6.5729403173468395e-06, "loss": 0.2317, "step": 1167600 }, { "epoch": 11.9, "learning_rate": 6.568797189105737e-06, "loss": 0.2719, "step": 1167700 }, { "epoch": 11.9, "learning_rate": 6.564655206510623e-06, "loss": 0.2418, "step": 1167800 }, { "epoch": 11.9, "learning_rate": 6.560514369764016e-06, "loss": 0.2612, "step": 1167900 }, { "epoch": 11.9, "learning_rate": 6.556374679068376e-06, "loss": 0.2801, "step": 1168000 }, { "epoch": 11.9, "learning_rate": 6.552236134626117e-06, "loss": 0.1918, "step": 1168100 }, { "epoch": 11.9, "learning_rate": 6.548098736639573e-06, "loss": 0.1976, "step": 1168200 }, { "epoch": 11.9, "learning_rate": 6.5439624853110505e-06, "loss": 0.2548, "step": 1168300 }, { "epoch": 11.9, "learning_rate": 6.539827380842788e-06, "loss": 0.2499, "step": 1168400 }, { "epoch": 11.9, "learning_rate": 6.53569342343696e-06, "loss": 0.3015, "step": 1168500 }, { "epoch": 11.91, "learning_rate": 6.531560613295693e-06, "loss": 0.2354, "step": 1168600 }, { "epoch": 11.91, "learning_rate": 6.52742895062106e-06, "loss": 0.2404, "step": 1168700 }, { "epoch": 11.91, "learning_rate": 6.523298435615082e-06, "loss": 0.2466, "step": 1168800 }, { "epoch": 11.91, "learning_rate": 6.519169068479701e-06, "loss": 0.2117, "step": 1168900 }, { "epoch": 11.91, "learning_rate": 6.515040849416828e-06, "loss": 0.2554, "step": 1169000 }, { "epoch": 11.91, "learning_rate": 6.510913778628307e-06, "loss": 0.2689, "step": 1169100 }, { "epoch": 11.91, "learning_rate": 6.506787856315923e-06, "loss": 0.2252, "step": 1169200 }, { "epoch": 11.91, "learning_rate": 6.502663082681416e-06, "loss": 0.2192, "step": 1169300 }, { "epoch": 11.91, "learning_rate": 6.498539457926467e-06, "loss": 0.2402, "step": 1169400 }, { "epoch": 11.92, "learning_rate": 6.494416982252681e-06, "loss": 0.2447, "step": 1169500 }, { "epoch": 11.92, "learning_rate": 6.490295655861628e-06, "loss": 0.2507, "step": 1169600 }, { "epoch": 11.92, "learning_rate": 6.486175478954829e-06, "loss": 0.2389, "step": 1169700 }, { "epoch": 11.92, "learning_rate": 6.48205645173372e-06, "loss": 0.3392, "step": 1169800 }, { "epoch": 11.92, "learning_rate": 6.477979747480441e-06, "loss": 0.2747, "step": 1169900 }, { "epoch": 11.92, "learning_rate": 6.473863008732973e-06, "loss": 0.2774, "step": 1170000 }, { "epoch": 11.92, "learning_rate": 6.469747420273207e-06, "loss": 0.2352, "step": 1170100 }, { "epoch": 11.92, "learning_rate": 6.465632982302377e-06, "loss": 0.2389, "step": 1170200 }, { "epoch": 11.92, "learning_rate": 6.461519695021635e-06, "loss": 0.2098, "step": 1170300 }, { "epoch": 11.92, "learning_rate": 6.457407558632114e-06, "loss": 0.2576, "step": 1170400 }, { "epoch": 11.93, "learning_rate": 6.4533376774892825e-06, "loss": 0.2439, "step": 1170500 }, { "epoch": 11.93, "learning_rate": 6.449227831971392e-06, "loss": 0.2691, "step": 1170600 }, { "epoch": 11.93, "learning_rate": 6.445119137945722e-06, "loss": 0.2116, "step": 1170700 }, { "epoch": 11.93, "learning_rate": 6.44101159561315e-06, "loss": 0.2175, "step": 1170800 }, { "epoch": 11.93, "learning_rate": 6.436905205174518e-06, "loss": 0.224, "step": 1170900 }, { "epoch": 11.93, "learning_rate": 6.4327999668306155e-06, "loss": 0.2425, "step": 1171000 }, { "epoch": 11.93, "learning_rate": 6.428695880782143e-06, "loss": 0.2448, "step": 1171100 }, { "epoch": 11.93, "learning_rate": 6.424592947229775e-06, "loss": 0.2237, "step": 1171200 }, { "epoch": 11.93, "learning_rate": 6.420491166374129e-06, "loss": 0.2428, "step": 1171300 }, { "epoch": 11.93, "learning_rate": 6.416390538415743e-06, "loss": 0.2052, "step": 1171400 }, { "epoch": 11.94, "learning_rate": 6.4122910635551216e-06, "loss": 0.2223, "step": 1171500 }, { "epoch": 11.94, "learning_rate": 6.4081927419927e-06, "loss": 0.241, "step": 1171600 }, { "epoch": 11.94, "learning_rate": 6.404095573928865e-06, "loss": 0.2739, "step": 1171700 }, { "epoch": 11.94, "learning_rate": 6.399999559563944e-06, "loss": 0.2236, "step": 1171800 }, { "epoch": 11.94, "learning_rate": 6.395904699098209e-06, "loss": 0.257, "step": 1171900 }, { "epoch": 11.94, "learning_rate": 6.391810992731879e-06, "loss": 0.2363, "step": 1172000 }, { "epoch": 11.94, "learning_rate": 6.387718440665096e-06, "loss": 0.2196, "step": 1172100 }, { "epoch": 11.94, "learning_rate": 6.383627043097971e-06, "loss": 0.1908, "step": 1172200 }, { "epoch": 11.94, "learning_rate": 6.379536800230557e-06, "loss": 0.1991, "step": 1172300 }, { "epoch": 11.94, "learning_rate": 6.375447712262828e-06, "loss": 0.286, "step": 1172400 }, { "epoch": 11.95, "learning_rate": 6.371359779394718e-06, "loss": 0.1966, "step": 1172500 }, { "epoch": 11.95, "learning_rate": 6.367273001826117e-06, "loss": 0.2798, "step": 1172600 }, { "epoch": 11.95, "learning_rate": 6.363187379756823e-06, "loss": 0.2563, "step": 1172700 }, { "epoch": 11.95, "learning_rate": 6.359102913386612e-06, "loss": 0.2248, "step": 1172800 }, { "epoch": 11.95, "learning_rate": 6.355019602915184e-06, "loss": 0.252, "step": 1172900 }, { "epoch": 11.95, "learning_rate": 6.350937448542193e-06, "loss": 0.2364, "step": 1173000 }, { "epoch": 11.95, "learning_rate": 6.346856450467233e-06, "loss": 0.1841, "step": 1173100 }, { "epoch": 11.95, "learning_rate": 6.342776608889844e-06, "loss": 0.1939, "step": 1173200 }, { "epoch": 11.95, "learning_rate": 6.33869792400949e-06, "loss": 0.3072, "step": 1173300 }, { "epoch": 11.95, "learning_rate": 6.334620396025608e-06, "loss": 0.2535, "step": 1173400 }, { "epoch": 11.96, "learning_rate": 6.330544025137558e-06, "loss": 0.243, "step": 1173500 }, { "epoch": 11.96, "learning_rate": 6.326468811544661e-06, "loss": 0.2739, "step": 1173600 }, { "epoch": 11.96, "learning_rate": 6.322394755446158e-06, "loss": 0.2666, "step": 1173700 }, { "epoch": 11.96, "learning_rate": 6.318321857041249e-06, "loss": 0.2212, "step": 1173800 }, { "epoch": 11.96, "learning_rate": 6.314250116529085e-06, "loss": 0.2147, "step": 1173900 }, { "epoch": 11.96, "learning_rate": 6.310179534108731e-06, "loss": 0.2153, "step": 1174000 }, { "epoch": 11.96, "learning_rate": 6.306110109979227e-06, "loss": 0.3071, "step": 1174100 }, { "epoch": 11.96, "learning_rate": 6.302041844339539e-06, "loss": 0.2193, "step": 1174200 }, { "epoch": 11.96, "learning_rate": 6.297974737388583e-06, "loss": 0.2403, "step": 1174300 }, { "epoch": 11.96, "learning_rate": 6.293908789325217e-06, "loss": 0.3001, "step": 1174400 }, { "epoch": 11.97, "learning_rate": 6.289844000348247e-06, "loss": 0.2297, "step": 1174500 }, { "epoch": 11.97, "learning_rate": 6.285780370656405e-06, "loss": 0.2326, "step": 1174600 }, { "epoch": 11.97, "learning_rate": 6.2817179004483825e-06, "loss": 0.2468, "step": 1174700 }, { "epoch": 11.97, "learning_rate": 6.27765658992282e-06, "loss": 0.2235, "step": 1174800 }, { "epoch": 11.97, "learning_rate": 6.273596439278276e-06, "loss": 0.286, "step": 1174900 }, { "epoch": 11.97, "learning_rate": 6.26957803287588e-06, "loss": 0.2005, "step": 1175000 }, { "epoch": 11.97, "learning_rate": 6.265520190985116e-06, "loss": 0.2845, "step": 1175100 }, { "epoch": 11.97, "learning_rate": 6.2614635095687765e-06, "loss": 0.2673, "step": 1175200 }, { "epoch": 11.97, "learning_rate": 6.257407988825214e-06, "loss": 0.2481, "step": 1175300 }, { "epoch": 11.98, "learning_rate": 6.253353628952705e-06, "loss": 0.2085, "step": 1175400 }, { "epoch": 11.98, "learning_rate": 6.249300430149489e-06, "loss": 0.2044, "step": 1175500 }, { "epoch": 11.98, "learning_rate": 6.245248392613749e-06, "loss": 0.2566, "step": 1175600 }, { "epoch": 11.98, "learning_rate": 6.2411975165436e-06, "loss": 0.3407, "step": 1175700 }, { "epoch": 11.98, "learning_rate": 6.237147802137105e-06, "loss": 0.2743, "step": 1175800 }, { "epoch": 11.98, "learning_rate": 6.2330992495922774e-06, "loss": 0.2759, "step": 1175900 }, { "epoch": 11.98, "learning_rate": 6.2290518591070685e-06, "loss": 0.2256, "step": 1176000 }, { "epoch": 11.98, "learning_rate": 6.2250056308793566e-06, "loss": 0.23, "step": 1176100 }, { "epoch": 11.98, "learning_rate": 6.22096056510699e-06, "loss": 0.2308, "step": 1176200 }, { "epoch": 11.98, "learning_rate": 6.216916661987754e-06, "loss": 0.269, "step": 1176300 }, { "epoch": 11.99, "learning_rate": 6.212873921719353e-06, "loss": 0.2227, "step": 1176400 }, { "epoch": 11.99, "learning_rate": 6.208832344499467e-06, "loss": 0.2639, "step": 1176500 }, { "epoch": 11.99, "learning_rate": 6.204791930525706e-06, "loss": 0.2603, "step": 1176600 }, { "epoch": 11.99, "learning_rate": 6.20075267999561e-06, "loss": 0.2627, "step": 1176700 }, { "epoch": 11.99, "learning_rate": 6.1967145931066815e-06, "loss": 0.2436, "step": 1176800 }, { "epoch": 11.99, "learning_rate": 6.192677670056358e-06, "loss": 0.2036, "step": 1176900 }, { "epoch": 11.99, "learning_rate": 6.188641911042023e-06, "loss": 0.219, "step": 1177000 }, { "epoch": 11.99, "learning_rate": 6.1846073162609996e-06, "loss": 0.2089, "step": 1177100 }, { "epoch": 11.99, "learning_rate": 6.180573885910562e-06, "loss": 0.313, "step": 1177200 }, { "epoch": 11.99, "learning_rate": 6.176541620187909e-06, "loss": 0.2801, "step": 1177300 }, { "epoch": 12.0, "learning_rate": 6.172510519290199e-06, "loss": 0.2762, "step": 1177400 }, { "epoch": 12.0, "learning_rate": 6.168480583414528e-06, "loss": 0.2524, "step": 1177500 }, { "epoch": 12.0, "learning_rate": 6.164451812757947e-06, "loss": 0.2386, "step": 1177600 }, { "epoch": 12.0, "learning_rate": 6.160424207517421e-06, "loss": 0.1891, "step": 1177700 }, { "epoch": 12.0, "learning_rate": 6.1563977678898844e-06, "loss": 0.263, "step": 1177800 }, { "epoch": 12.0, "learning_rate": 6.152372494072212e-06, "loss": 0.1968, "step": 1177900 }, { "epoch": 12.0, "learning_rate": 6.148348386261202e-06, "loss": 0.2215, "step": 1178000 }, { "epoch": 12.0, "learning_rate": 6.144325444653619e-06, "loss": 0.2364, "step": 1178100 }, { "epoch": 12.0, "learning_rate": 6.1403438814239e-06, "loss": 0.155, "step": 1178200 }, { "epoch": 12.0, "learning_rate": 6.136323261146262e-06, "loss": 0.2097, "step": 1178300 }, { "epoch": 12.01, "learning_rate": 6.13230380766001e-06, "loss": 0.2099, "step": 1178400 }, { "epoch": 12.01, "learning_rate": 6.128285521161662e-06, "loss": 0.2241, "step": 1178500 }, { "epoch": 12.01, "learning_rate": 6.124268401847693e-06, "loss": 0.2039, "step": 1178600 }, { "epoch": 12.01, "learning_rate": 6.1202524499145214e-06, "loss": 0.2385, "step": 1178700 }, { "epoch": 12.01, "learning_rate": 6.116237665558493e-06, "loss": 0.2293, "step": 1178800 }, { "epoch": 12.01, "learning_rate": 6.112224048975909e-06, "loss": 0.1754, "step": 1178900 }, { "epoch": 12.01, "learning_rate": 6.108211600363025e-06, "loss": 0.1833, "step": 1179000 }, { "epoch": 12.01, "learning_rate": 6.104200319916006e-06, "loss": 0.263, "step": 1179100 }, { "epoch": 12.01, "learning_rate": 6.10019020783099e-06, "loss": 0.284, "step": 1179200 }, { "epoch": 12.01, "learning_rate": 6.096181264304045e-06, "loss": 0.2011, "step": 1179300 }, { "epoch": 12.02, "learning_rate": 6.092173489531183e-06, "loss": 0.1946, "step": 1179400 }, { "epoch": 12.02, "learning_rate": 6.088166883708368e-06, "loss": 0.2353, "step": 1179500 }, { "epoch": 12.02, "learning_rate": 6.084161447031497e-06, "loss": 0.2333, "step": 1179600 }, { "epoch": 12.02, "learning_rate": 6.080157179696405e-06, "loss": 0.2367, "step": 1179700 }, { "epoch": 12.02, "learning_rate": 6.07615408189888e-06, "loss": 0.2724, "step": 1179800 }, { "epoch": 12.02, "learning_rate": 6.072152153834649e-06, "loss": 0.2384, "step": 1179900 }, { "epoch": 12.02, "learning_rate": 6.0681513956993915e-06, "loss": 0.1453, "step": 1180000 }, { "epoch": 12.02, "learning_rate": 6.064151807688705e-06, "loss": 0.2373, "step": 1180100 }, { "epoch": 12.02, "learning_rate": 6.060153389998156e-06, "loss": 0.2313, "step": 1180200 }, { "epoch": 12.03, "learning_rate": 6.056156142823247e-06, "loss": 0.1842, "step": 1180300 }, { "epoch": 12.03, "learning_rate": 6.0522000213283864e-06, "loss": 0.2297, "step": 1180400 }, { "epoch": 12.03, "learning_rate": 6.048205104060972e-06, "loss": 0.2362, "step": 1180500 }, { "epoch": 12.03, "learning_rate": 6.044211357893391e-06, "loss": 0.2167, "step": 1180600 }, { "epoch": 12.03, "learning_rate": 6.040218783020914e-06, "loss": 0.2713, "step": 1180700 }, { "epoch": 12.03, "learning_rate": 6.036227379638755e-06, "loss": 0.2305, "step": 1180800 }, { "epoch": 12.03, "learning_rate": 6.032237147942069e-06, "loss": 0.2272, "step": 1180900 }, { "epoch": 12.03, "learning_rate": 6.028248088125951e-06, "loss": 0.2375, "step": 1181000 }, { "epoch": 12.03, "learning_rate": 6.024260200385457e-06, "loss": 0.1942, "step": 1181100 }, { "epoch": 12.03, "learning_rate": 6.020273484915551e-06, "loss": 0.198, "step": 1181200 }, { "epoch": 12.04, "learning_rate": 6.016287941911168e-06, "loss": 0.2514, "step": 1181300 }, { "epoch": 12.04, "learning_rate": 6.012303571567185e-06, "loss": 0.2215, "step": 1181400 }, { "epoch": 12.04, "learning_rate": 6.008320374078401e-06, "loss": 0.2249, "step": 1181500 }, { "epoch": 12.04, "learning_rate": 6.004338349639571e-06, "loss": 0.2369, "step": 1181600 }, { "epoch": 12.04, "learning_rate": 6.000357498445409e-06, "loss": 0.2364, "step": 1181700 }, { "epoch": 12.04, "learning_rate": 5.996377820690533e-06, "loss": 0.2258, "step": 1181800 }, { "epoch": 12.04, "learning_rate": 5.992399316569533e-06, "loss": 0.2115, "step": 1181900 }, { "epoch": 12.04, "learning_rate": 5.988421986276942e-06, "loss": 0.2235, "step": 1182000 }, { "epoch": 12.04, "learning_rate": 5.9844458300072195e-06, "loss": 0.2195, "step": 1182100 }, { "epoch": 12.04, "learning_rate": 5.980470847954777e-06, "loss": 0.1962, "step": 1182200 }, { "epoch": 12.05, "learning_rate": 5.976497040313972e-06, "loss": 0.2203, "step": 1182300 }, { "epoch": 12.05, "learning_rate": 5.972524407279103e-06, "loss": 0.1895, "step": 1182400 }, { "epoch": 12.05, "learning_rate": 5.968592657810847e-06, "loss": 0.2247, "step": 1182500 }, { "epoch": 12.05, "learning_rate": 5.964622362819583e-06, "loss": 0.2261, "step": 1182600 }, { "epoch": 12.05, "learning_rate": 5.9606532430148505e-06, "loss": 0.1873, "step": 1182700 }, { "epoch": 12.05, "learning_rate": 5.9566852985907215e-06, "loss": 0.2488, "step": 1182800 }, { "epoch": 12.05, "learning_rate": 5.95271852974119e-06, "loss": 0.2466, "step": 1182900 }, { "epoch": 12.05, "learning_rate": 5.948752936660218e-06, "loss": 0.2512, "step": 1183000 }, { "epoch": 12.05, "learning_rate": 5.944788519541696e-06, "loss": 0.2119, "step": 1183100 }, { "epoch": 12.05, "learning_rate": 5.940825278579461e-06, "loss": 0.1975, "step": 1183200 }, { "epoch": 12.06, "learning_rate": 5.9368632139672914e-06, "loss": 0.2035, "step": 1183300 }, { "epoch": 12.06, "learning_rate": 5.932902325898912e-06, "loss": 0.239, "step": 1183400 }, { "epoch": 12.06, "learning_rate": 5.9289426145679905e-06, "loss": 0.2056, "step": 1183500 }, { "epoch": 12.06, "learning_rate": 5.924984080168117e-06, "loss": 0.2116, "step": 1183600 }, { "epoch": 12.06, "learning_rate": 5.921026722892853e-06, "loss": 0.2453, "step": 1183700 }, { "epoch": 12.06, "learning_rate": 5.917070542935691e-06, "loss": 0.2276, "step": 1183800 }, { "epoch": 12.06, "learning_rate": 5.913115540490054e-06, "loss": 0.209, "step": 1183900 }, { "epoch": 12.06, "learning_rate": 5.909161715749324e-06, "loss": 0.2191, "step": 1184000 }, { "epoch": 12.06, "learning_rate": 5.905209068906826e-06, "loss": 0.2538, "step": 1184100 }, { "epoch": 12.06, "learning_rate": 5.901257600155809e-06, "loss": 0.2137, "step": 1184200 }, { "epoch": 12.07, "learning_rate": 5.897307309689479e-06, "loss": 0.1999, "step": 1184300 }, { "epoch": 12.07, "learning_rate": 5.893358197700984e-06, "loss": 0.2364, "step": 1184400 }, { "epoch": 12.07, "learning_rate": 5.8894102643834136e-06, "loss": 0.2057, "step": 1184500 }, { "epoch": 12.07, "learning_rate": 5.885463509929799e-06, "loss": 0.238, "step": 1184600 }, { "epoch": 12.07, "learning_rate": 5.881517934533107e-06, "loss": 0.2521, "step": 1184700 }, { "epoch": 12.07, "learning_rate": 5.877573538386266e-06, "loss": 0.2517, "step": 1184800 }, { "epoch": 12.07, "learning_rate": 5.873630321682115e-06, "loss": 0.2311, "step": 1184900 }, { "epoch": 12.07, "learning_rate": 5.869688284613464e-06, "loss": 0.2099, "step": 1185000 }, { "epoch": 12.07, "learning_rate": 5.86574742737306e-06, "loss": 0.2479, "step": 1185100 }, { "epoch": 12.08, "learning_rate": 5.861807750153575e-06, "loss": 0.2148, "step": 1185200 }, { "epoch": 12.08, "learning_rate": 5.857869253147643e-06, "loss": 0.2039, "step": 1185300 }, { "epoch": 12.08, "learning_rate": 5.853931936547837e-06, "loss": 0.2312, "step": 1185400 }, { "epoch": 12.08, "learning_rate": 5.849995800546657e-06, "loss": 0.2346, "step": 1185500 }, { "epoch": 12.08, "learning_rate": 5.846060845336562e-06, "loss": 0.2456, "step": 1185600 }, { "epoch": 12.08, "learning_rate": 5.842127071109947e-06, "loss": 0.2131, "step": 1185700 }, { "epoch": 12.08, "learning_rate": 5.838194478059153e-06, "loss": 0.2345, "step": 1185800 }, { "epoch": 12.08, "learning_rate": 5.834263066376459e-06, "loss": 0.2535, "step": 1185900 }, { "epoch": 12.08, "learning_rate": 5.8303328362540955e-06, "loss": 0.2182, "step": 1186000 }, { "epoch": 12.08, "learning_rate": 5.8264430725176e-06, "loss": 0.1988, "step": 1186100 }, { "epoch": 12.09, "learning_rate": 5.822515194271919e-06, "loss": 0.1989, "step": 1186200 }, { "epoch": 12.09, "learning_rate": 5.818588498160955e-06, "loss": 0.2226, "step": 1186300 }, { "epoch": 12.09, "learning_rate": 5.814662984376708e-06, "loss": 0.2808, "step": 1186400 }, { "epoch": 12.09, "learning_rate": 5.810738653111116e-06, "loss": 0.2488, "step": 1186500 }, { "epoch": 12.09, "learning_rate": 5.8068155045560445e-06, "loss": 0.1876, "step": 1186600 }, { "epoch": 12.09, "learning_rate": 5.802893538903316e-06, "loss": 0.2658, "step": 1186700 }, { "epoch": 12.09, "learning_rate": 5.798972756344699e-06, "loss": 0.2007, "step": 1186800 }, { "epoch": 12.09, "learning_rate": 5.795053157071886e-06, "loss": 0.2657, "step": 1186900 }, { "epoch": 12.09, "learning_rate": 5.791134741276522e-06, "loss": 0.2765, "step": 1187000 }, { "epoch": 12.09, "learning_rate": 5.7872175091502e-06, "loss": 0.2175, "step": 1187100 }, { "epoch": 12.1, "learning_rate": 5.783301460884448e-06, "loss": 0.2679, "step": 1187200 }, { "epoch": 12.1, "learning_rate": 5.779386596670736e-06, "loss": 0.2014, "step": 1187300 }, { "epoch": 12.1, "learning_rate": 5.775472916700477e-06, "loss": 0.2764, "step": 1187400 }, { "epoch": 12.1, "learning_rate": 5.771560421165039e-06, "loss": 0.2564, "step": 1187500 }, { "epoch": 12.1, "learning_rate": 5.767649110255701e-06, "loss": 0.2239, "step": 1187600 }, { "epoch": 12.1, "learning_rate": 5.763738984163706e-06, "loss": 0.2307, "step": 1187700 }, { "epoch": 12.1, "learning_rate": 5.759830043080251e-06, "loss": 0.1952, "step": 1187800 }, { "epoch": 12.1, "learning_rate": 5.755961358887909e-06, "loss": 0.1936, "step": 1187900 }, { "epoch": 12.1, "learning_rate": 5.752054776539973e-06, "loss": 0.2363, "step": 1188000 }, { "epoch": 12.1, "learning_rate": 5.748149379771848e-06, "loss": 0.2463, "step": 1188100 }, { "epoch": 12.11, "learning_rate": 5.744245168774489e-06, "loss": 0.1701, "step": 1188200 }, { "epoch": 12.11, "learning_rate": 5.7403421437387915e-06, "loss": 0.2309, "step": 1188300 }, { "epoch": 12.11, "learning_rate": 5.736440304855587e-06, "loss": 0.2323, "step": 1188400 }, { "epoch": 12.11, "learning_rate": 5.732539652315656e-06, "loss": 0.1869, "step": 1188500 }, { "epoch": 12.11, "learning_rate": 5.728640186309722e-06, "loss": 0.2177, "step": 1188600 }, { "epoch": 12.11, "learning_rate": 5.724741907028435e-06, "loss": 0.2275, "step": 1188700 }, { "epoch": 12.11, "learning_rate": 5.720844814662396e-06, "loss": 0.2522, "step": 1188800 }, { "epoch": 12.11, "learning_rate": 5.7169489094021676e-06, "loss": 0.2505, "step": 1188900 }, { "epoch": 12.11, "learning_rate": 5.713054191438214e-06, "loss": 0.2645, "step": 1189000 }, { "epoch": 12.11, "learning_rate": 5.709160660960973e-06, "loss": 0.2761, "step": 1189100 }, { "epoch": 12.12, "learning_rate": 5.705268318160824e-06, "loss": 0.2524, "step": 1189200 }, { "epoch": 12.12, "learning_rate": 5.701377163228061e-06, "loss": 0.2037, "step": 1189300 }, { "epoch": 12.12, "learning_rate": 5.6974871963529525e-06, "loss": 0.2255, "step": 1189400 }, { "epoch": 12.12, "learning_rate": 5.693598417725688e-06, "loss": 0.2822, "step": 1189500 }, { "epoch": 12.12, "learning_rate": 5.689710827536405e-06, "loss": 0.2574, "step": 1189600 }, { "epoch": 12.12, "learning_rate": 5.685824425975191e-06, "loss": 0.2085, "step": 1189700 }, { "epoch": 12.12, "learning_rate": 5.681939213232061e-06, "loss": 0.2147, "step": 1189800 }, { "epoch": 12.12, "learning_rate": 5.67805518949699e-06, "loss": 0.2297, "step": 1189900 }, { "epoch": 12.12, "learning_rate": 5.674211177418077e-06, "loss": 0.205, "step": 1190000 }, { "epoch": 12.12, "learning_rate": 5.670329520373946e-06, "loss": 0.2191, "step": 1190100 }, { "epoch": 12.13, "learning_rate": 5.666449052905502e-06, "loss": 0.2219, "step": 1190200 }, { "epoch": 12.13, "learning_rate": 5.662569775202485e-06, "loss": 0.1995, "step": 1190300 }, { "epoch": 12.13, "learning_rate": 5.658691687454573e-06, "loss": 0.2285, "step": 1190400 }, { "epoch": 12.13, "learning_rate": 5.654814789851369e-06, "loss": 0.2425, "step": 1190500 }, { "epoch": 12.13, "learning_rate": 5.650939082582441e-06, "loss": 0.2197, "step": 1190600 }, { "epoch": 12.13, "learning_rate": 5.647064565837288e-06, "loss": 0.2013, "step": 1190700 }, { "epoch": 12.13, "learning_rate": 5.64319123980534e-06, "loss": 0.2198, "step": 1190800 }, { "epoch": 12.13, "learning_rate": 5.639319104675985e-06, "loss": 0.192, "step": 1190900 }, { "epoch": 12.13, "learning_rate": 5.635448160638552e-06, "loss": 0.1812, "step": 1191000 }, { "epoch": 12.14, "learning_rate": 5.631578407882303e-06, "loss": 0.2556, "step": 1191100 }, { "epoch": 12.14, "learning_rate": 5.627709846596446e-06, "loss": 0.247, "step": 1191200 }, { "epoch": 12.14, "learning_rate": 5.623842476970137e-06, "loss": 0.2239, "step": 1191300 }, { "epoch": 12.14, "learning_rate": 5.619976299192458e-06, "loss": 0.2324, "step": 1191400 }, { "epoch": 12.14, "learning_rate": 5.616111313452444e-06, "loss": 0.1882, "step": 1191500 }, { "epoch": 12.14, "learning_rate": 5.612247519939077e-06, "loss": 0.2084, "step": 1191600 }, { "epoch": 12.14, "learning_rate": 5.608384918841263e-06, "loss": 0.2229, "step": 1191700 }, { "epoch": 12.14, "learning_rate": 5.604523510347864e-06, "loss": 0.2961, "step": 1191800 }, { "epoch": 12.14, "learning_rate": 5.60066329464768e-06, "loss": 0.2462, "step": 1191900 }, { "epoch": 12.14, "learning_rate": 5.596804271929462e-06, "loss": 0.2348, "step": 1192000 }, { "epoch": 12.15, "learning_rate": 5.592946442381876e-06, "loss": 0.1792, "step": 1192100 }, { "epoch": 12.15, "learning_rate": 5.5890898061935556e-06, "loss": 0.2599, "step": 1192200 }, { "epoch": 12.15, "learning_rate": 5.585234363553066e-06, "loss": 0.2435, "step": 1192300 }, { "epoch": 12.15, "learning_rate": 5.5813801146489154e-06, "loss": 0.2803, "step": 1192400 }, { "epoch": 12.15, "learning_rate": 5.577527059669557e-06, "loss": 0.2307, "step": 1192500 }, { "epoch": 12.15, "learning_rate": 5.573675198803388e-06, "loss": 0.2317, "step": 1192600 }, { "epoch": 12.15, "learning_rate": 5.569824532238722e-06, "loss": 0.2588, "step": 1192700 }, { "epoch": 12.15, "learning_rate": 5.565975060163847e-06, "loss": 0.2538, "step": 1192800 }, { "epoch": 12.15, "learning_rate": 5.562126782766985e-06, "loss": 0.2845, "step": 1192900 }, { "epoch": 12.15, "learning_rate": 5.558279700236276e-06, "loss": 0.2356, "step": 1193000 }, { "epoch": 12.16, "learning_rate": 5.5544338127598326e-06, "loss": 0.2123, "step": 1193100 }, { "epoch": 12.16, "learning_rate": 5.550589120525698e-06, "loss": 0.2023, "step": 1193200 }, { "epoch": 12.16, "learning_rate": 5.54674562372184e-06, "loss": 0.2274, "step": 1193300 }, { "epoch": 12.16, "learning_rate": 5.542903322536195e-06, "loss": 0.2366, "step": 1193400 }, { "epoch": 12.16, "learning_rate": 5.5390622171566195e-06, "loss": 0.2467, "step": 1193500 }, { "epoch": 12.16, "learning_rate": 5.535222307770932e-06, "loss": 0.2067, "step": 1193600 }, { "epoch": 12.16, "learning_rate": 5.531383594566871e-06, "loss": 0.1379, "step": 1193700 }, { "epoch": 12.16, "learning_rate": 5.527584446977841e-06, "loss": 0.1965, "step": 1193800 }, { "epoch": 12.16, "learning_rate": 5.52374811473356e-06, "loss": 0.2256, "step": 1193900 }, { "epoch": 12.16, "learning_rate": 5.519912979231934e-06, "loss": 0.2426, "step": 1194000 }, { "epoch": 12.17, "learning_rate": 5.516079040660469e-06, "loss": 0.2182, "step": 1194100 }, { "epoch": 12.17, "learning_rate": 5.5122462992066225e-06, "loss": 0.2037, "step": 1194200 }, { "epoch": 12.17, "learning_rate": 5.5084147550578014e-06, "loss": 0.2147, "step": 1194300 }, { "epoch": 12.17, "learning_rate": 5.504584408401345e-06, "loss": 0.2347, "step": 1194400 }, { "epoch": 12.17, "learning_rate": 5.500755259424525e-06, "loss": 0.276, "step": 1194500 }, { "epoch": 12.17, "learning_rate": 5.496927308314566e-06, "loss": 0.2341, "step": 1194600 }, { "epoch": 12.17, "learning_rate": 5.4931005552586445e-06, "loss": 0.2088, "step": 1194700 }, { "epoch": 12.17, "learning_rate": 5.489275000443849e-06, "loss": 0.2089, "step": 1194800 }, { "epoch": 12.17, "learning_rate": 5.485450644057229e-06, "loss": 0.2107, "step": 1194900 }, { "epoch": 12.17, "learning_rate": 5.481627486285784e-06, "loss": 0.2007, "step": 1195000 }, { "epoch": 12.18, "learning_rate": 5.477805527316433e-06, "loss": 0.1798, "step": 1195100 }, { "epoch": 12.18, "learning_rate": 5.4739847673360535e-06, "loss": 0.2359, "step": 1195200 }, { "epoch": 12.18, "learning_rate": 5.470165206531459e-06, "loss": 0.2025, "step": 1195300 }, { "epoch": 12.18, "learning_rate": 5.466346845089394e-06, "loss": 0.2625, "step": 1195400 }, { "epoch": 12.18, "learning_rate": 5.462529683196559e-06, "loss": 0.2147, "step": 1195500 }, { "epoch": 12.18, "learning_rate": 5.458713721039601e-06, "loss": 0.2435, "step": 1195600 }, { "epoch": 12.18, "learning_rate": 5.454898958805074e-06, "loss": 0.2342, "step": 1195700 }, { "epoch": 12.18, "learning_rate": 5.451085396679514e-06, "loss": 0.2058, "step": 1195800 }, { "epoch": 12.18, "learning_rate": 5.447273034849379e-06, "loss": 0.2085, "step": 1195900 }, { "epoch": 12.19, "learning_rate": 5.443499979171558e-06, "loss": 0.2046, "step": 1196000 }, { "epoch": 12.19, "learning_rate": 5.439690006483811e-06, "loss": 0.1802, "step": 1196100 }, { "epoch": 12.19, "learning_rate": 5.435881234648654e-06, "loss": 0.224, "step": 1196200 }, { "epoch": 12.19, "learning_rate": 5.432073663852316e-06, "loss": 0.2383, "step": 1196300 }, { "epoch": 12.19, "learning_rate": 5.428267294280967e-06, "loss": 0.2441, "step": 1196400 }, { "epoch": 12.19, "learning_rate": 5.424462126120706e-06, "loss": 0.2703, "step": 1196500 }, { "epoch": 12.19, "learning_rate": 5.4206581595575865e-06, "loss": 0.2688, "step": 1196600 }, { "epoch": 12.19, "learning_rate": 5.4168553947776e-06, "loss": 0.2066, "step": 1196700 }, { "epoch": 12.19, "learning_rate": 5.413053831966691e-06, "loss": 0.2303, "step": 1196800 }, { "epoch": 12.19, "learning_rate": 5.4092534713107115e-06, "loss": 0.2423, "step": 1196900 }, { "epoch": 12.2, "learning_rate": 5.405454312995488e-06, "loss": 0.2009, "step": 1197000 }, { "epoch": 12.2, "learning_rate": 5.4016563572067864e-06, "loss": 0.2278, "step": 1197100 }, { "epoch": 12.2, "learning_rate": 5.397859604130282e-06, "loss": 0.2269, "step": 1197200 }, { "epoch": 12.2, "learning_rate": 5.3940640539516285e-06, "loss": 0.2075, "step": 1197300 }, { "epoch": 12.2, "learning_rate": 5.390269706856401e-06, "loss": 0.2453, "step": 1197400 }, { "epoch": 12.2, "learning_rate": 5.386476563030122e-06, "loss": 0.2014, "step": 1197500 }, { "epoch": 12.2, "learning_rate": 5.382684622658252e-06, "loss": 0.2342, "step": 1197600 }, { "epoch": 12.2, "learning_rate": 5.378893885926203e-06, "loss": 0.2383, "step": 1197700 }, { "epoch": 12.2, "learning_rate": 5.375104353019307e-06, "loss": 0.3112, "step": 1197800 }, { "epoch": 12.2, "learning_rate": 5.371316024122852e-06, "loss": 0.2621, "step": 1197900 }, { "epoch": 12.21, "learning_rate": 5.367528899422074e-06, "loss": 0.1848, "step": 1198000 }, { "epoch": 12.21, "learning_rate": 5.363742979102128e-06, "loss": 0.2435, "step": 1198100 }, { "epoch": 12.21, "learning_rate": 5.359958263348128e-06, "loss": 0.2564, "step": 1198200 }, { "epoch": 12.21, "learning_rate": 5.356174752345124e-06, "loss": 0.196, "step": 1198300 }, { "epoch": 12.21, "learning_rate": 5.352392446278118e-06, "loss": 0.249, "step": 1198400 }, { "epoch": 12.21, "learning_rate": 5.3486113453320216e-06, "loss": 0.205, "step": 1198500 }, { "epoch": 12.21, "learning_rate": 5.34483144969172e-06, "loss": 0.2446, "step": 1198600 }, { "epoch": 12.21, "learning_rate": 5.341052759542026e-06, "loss": 0.2557, "step": 1198700 }, { "epoch": 12.21, "learning_rate": 5.337275275067699e-06, "loss": 0.1655, "step": 1198800 }, { "epoch": 12.21, "learning_rate": 5.333498996453426e-06, "loss": 0.2122, "step": 1198900 }, { "epoch": 12.22, "learning_rate": 5.3297239238838616e-06, "loss": 0.2202, "step": 1199000 }, { "epoch": 12.22, "learning_rate": 5.325950057543563e-06, "loss": 0.2126, "step": 1199100 }, { "epoch": 12.22, "learning_rate": 5.32217739761706e-06, "loss": 0.2496, "step": 1199200 }, { "epoch": 12.22, "learning_rate": 5.3184059442888214e-06, "loss": 0.2643, "step": 1199300 }, { "epoch": 12.22, "learning_rate": 5.31463569774323e-06, "loss": 0.2007, "step": 1199400 }, { "epoch": 12.22, "learning_rate": 5.310866658164641e-06, "loss": 0.2174, "step": 1199500 }, { "epoch": 12.22, "learning_rate": 5.307098825737342e-06, "loss": 0.265, "step": 1199600 }, { "epoch": 12.22, "learning_rate": 5.303332200645541e-06, "loss": 0.1903, "step": 1199700 }, { "epoch": 12.22, "learning_rate": 5.299566783073417e-06, "loss": 0.2136, "step": 1199800 }, { "epoch": 12.22, "learning_rate": 5.295802573205071e-06, "loss": 0.1896, "step": 1199900 }, { "epoch": 12.23, "learning_rate": 5.292039571224554e-06, "loss": 0.221, "step": 1200000 }, { "epoch": 12.23, "learning_rate": 5.28827777731585e-06, "loss": 0.2142, "step": 1200100 }, { "epoch": 12.23, "learning_rate": 5.284517191662891e-06, "loss": 0.2944, "step": 1200200 }, { "epoch": 12.23, "learning_rate": 5.280757814449557e-06, "loss": 0.2238, "step": 1200300 }, { "epoch": 12.23, "learning_rate": 5.276999645859638e-06, "loss": 0.2468, "step": 1200400 }, { "epoch": 12.23, "learning_rate": 5.2732426860769e-06, "loss": 0.22, "step": 1200500 }, { "epoch": 12.23, "learning_rate": 5.269486935285039e-06, "loss": 0.2572, "step": 1200600 }, { "epoch": 12.23, "learning_rate": 5.265732393667679e-06, "loss": 0.2472, "step": 1200700 }, { "epoch": 12.23, "learning_rate": 5.261979061408396e-06, "loss": 0.227, "step": 1200800 }, { "epoch": 12.23, "learning_rate": 5.258226938690713e-06, "loss": 0.2292, "step": 1200900 }, { "epoch": 12.24, "learning_rate": 5.254476025698077e-06, "loss": 0.2499, "step": 1201000 }, { "epoch": 12.24, "learning_rate": 5.250726322613891e-06, "loss": 0.2848, "step": 1201100 }, { "epoch": 12.24, "learning_rate": 5.246977829621489e-06, "loss": 0.2267, "step": 1201200 }, { "epoch": 12.24, "learning_rate": 5.243230546904153e-06, "loss": 0.2004, "step": 1201300 }, { "epoch": 12.24, "learning_rate": 5.239484474645104e-06, "loss": 0.247, "step": 1201400 }, { "epoch": 12.24, "learning_rate": 5.235739613027509e-06, "loss": 0.2139, "step": 1201500 }, { "epoch": 12.24, "learning_rate": 5.2319959622344515e-06, "loss": 0.2193, "step": 1201600 }, { "epoch": 12.24, "learning_rate": 5.228253522448988e-06, "loss": 0.1867, "step": 1201700 }, { "epoch": 12.24, "learning_rate": 5.224512293854092e-06, "loss": 0.2241, "step": 1201800 }, { "epoch": 12.25, "learning_rate": 5.220772276632705e-06, "loss": 0.2311, "step": 1201900 }, { "epoch": 12.25, "learning_rate": 5.217070853026518e-06, "loss": 0.2056, "step": 1202000 }, { "epoch": 12.25, "learning_rate": 5.213333246982352e-06, "loss": 0.2375, "step": 1202100 }, { "epoch": 12.25, "learning_rate": 5.209596852858272e-06, "loss": 0.2576, "step": 1202200 }, { "epoch": 12.25, "learning_rate": 5.205861670836972e-06, "loss": 0.1892, "step": 1202300 }, { "epoch": 12.25, "learning_rate": 5.202127701101064e-06, "loss": 0.2904, "step": 1202400 }, { "epoch": 12.25, "learning_rate": 5.198394943833125e-06, "loss": 0.2432, "step": 1202500 }, { "epoch": 12.25, "learning_rate": 5.194663399215666e-06, "loss": 0.2085, "step": 1202600 }, { "epoch": 12.25, "learning_rate": 5.190933067431134e-06, "loss": 0.1573, "step": 1202700 }, { "epoch": 12.25, "learning_rate": 5.187203948661927e-06, "loss": 0.2333, "step": 1202800 }, { "epoch": 12.26, "learning_rate": 5.183476043090366e-06, "loss": 0.2463, "step": 1202900 }, { "epoch": 12.26, "learning_rate": 5.179749350898741e-06, "loss": 0.2392, "step": 1203000 }, { "epoch": 12.26, "learning_rate": 5.176023872269244e-06, "loss": 0.246, "step": 1203100 }, { "epoch": 12.26, "learning_rate": 5.172299607384039e-06, "loss": 0.2328, "step": 1203200 }, { "epoch": 12.26, "learning_rate": 5.168576556425227e-06, "loss": 0.2144, "step": 1203300 }, { "epoch": 12.26, "learning_rate": 5.164854719574827e-06, "loss": 0.1606, "step": 1203400 }, { "epoch": 12.26, "learning_rate": 5.161134097014824e-06, "loss": 0.2422, "step": 1203500 }, { "epoch": 12.26, "learning_rate": 5.157414688927139e-06, "loss": 0.2301, "step": 1203600 }, { "epoch": 12.26, "learning_rate": 5.153696495493615e-06, "loss": 0.2578, "step": 1203700 }, { "epoch": 12.26, "learning_rate": 5.149979516896056e-06, "loss": 0.2068, "step": 1203800 }, { "epoch": 12.27, "learning_rate": 5.146263753316204e-06, "loss": 0.2467, "step": 1203900 }, { "epoch": 12.27, "learning_rate": 5.142549204935736e-06, "loss": 0.2034, "step": 1204000 }, { "epoch": 12.27, "learning_rate": 5.138835871936267e-06, "loss": 0.1897, "step": 1204100 }, { "epoch": 12.27, "learning_rate": 5.135123754499362e-06, "loss": 0.1804, "step": 1204200 }, { "epoch": 12.27, "learning_rate": 5.13141285280653e-06, "loss": 0.2281, "step": 1204300 }, { "epoch": 12.27, "learning_rate": 5.127703167039191e-06, "loss": 0.2516, "step": 1204400 }, { "epoch": 12.27, "learning_rate": 5.1239946973787365e-06, "loss": 0.2353, "step": 1204500 }, { "epoch": 12.27, "learning_rate": 5.1202874440064964e-06, "loss": 0.2209, "step": 1204600 }, { "epoch": 12.27, "learning_rate": 5.116581407103721e-06, "loss": 0.214, "step": 1204700 }, { "epoch": 12.27, "learning_rate": 5.112876586851616e-06, "loss": 0.2345, "step": 1204800 }, { "epoch": 12.28, "learning_rate": 5.109172983431336e-06, "loss": 0.2109, "step": 1204900 }, { "epoch": 12.28, "learning_rate": 5.105470597023947e-06, "loss": 0.1926, "step": 1205000 }, { "epoch": 12.28, "learning_rate": 5.101769427810486e-06, "loss": 0.2467, "step": 1205100 }, { "epoch": 12.28, "learning_rate": 5.098069475971913e-06, "loss": 0.2104, "step": 1205200 }, { "epoch": 12.28, "learning_rate": 5.094370741689137e-06, "loss": 0.1784, "step": 1205300 }, { "epoch": 12.28, "learning_rate": 5.090673225143002e-06, "loss": 0.163, "step": 1205400 }, { "epoch": 12.28, "learning_rate": 5.086976926514301e-06, "loss": 0.2049, "step": 1205500 }, { "epoch": 12.28, "learning_rate": 5.083281845983752e-06, "loss": 0.2129, "step": 1205600 }, { "epoch": 12.28, "learning_rate": 5.079624916323469e-06, "loss": 0.1898, "step": 1205700 }, { "epoch": 12.28, "learning_rate": 5.0759322603456814e-06, "loss": 0.1772, "step": 1205800 }, { "epoch": 12.29, "learning_rate": 5.072240823006068e-06, "loss": 0.2404, "step": 1205900 }, { "epoch": 12.29, "learning_rate": 5.068550604485124e-06, "loss": 0.245, "step": 1206000 }, { "epoch": 12.29, "learning_rate": 5.0648616049632675e-06, "loss": 0.2545, "step": 1206100 }, { "epoch": 12.29, "learning_rate": 5.061173824620869e-06, "loss": 0.2213, "step": 1206200 }, { "epoch": 12.29, "learning_rate": 5.057487263638247e-06, "loss": 0.2699, "step": 1206300 }, { "epoch": 12.29, "learning_rate": 5.0538019221956474e-06, "loss": 0.2109, "step": 1206400 }, { "epoch": 12.29, "learning_rate": 5.050117800473263e-06, "loss": 0.2102, "step": 1206500 }, { "epoch": 12.29, "learning_rate": 5.046434898651225e-06, "loss": 0.281, "step": 1206600 }, { "epoch": 12.29, "learning_rate": 5.042753216909612e-06, "loss": 0.2118, "step": 1206700 }, { "epoch": 12.3, "learning_rate": 5.0390727554284235e-06, "loss": 0.2471, "step": 1206800 }, { "epoch": 12.3, "learning_rate": 5.035393514387618e-06, "loss": 0.2128, "step": 1206900 }, { "epoch": 12.3, "learning_rate": 5.031715493967096e-06, "loss": 0.2636, "step": 1207000 }, { "epoch": 12.3, "learning_rate": 5.028038694346673e-06, "loss": 0.2773, "step": 1207100 }, { "epoch": 12.3, "learning_rate": 5.0243631157061365e-06, "loss": 0.1882, "step": 1207200 }, { "epoch": 12.3, "learning_rate": 5.020688758225203e-06, "loss": 0.2464, "step": 1207300 }, { "epoch": 12.3, "learning_rate": 5.017015622083514e-06, "loss": 0.1952, "step": 1207400 }, { "epoch": 12.3, "learning_rate": 5.01334370746067e-06, "loss": 0.2506, "step": 1207500 }, { "epoch": 12.3, "learning_rate": 5.009673014536206e-06, "loss": 0.2445, "step": 1207600 }, { "epoch": 12.3, "learning_rate": 5.006003543489597e-06, "loss": 0.2303, "step": 1207700 }, { "epoch": 12.31, "learning_rate": 5.002335294500257e-06, "loss": 0.2483, "step": 1207800 }, { "epoch": 12.31, "learning_rate": 4.998668267747552e-06, "loss": 0.2403, "step": 1207900 }, { "epoch": 12.31, "learning_rate": 4.995002463410762e-06, "loss": 0.2162, "step": 1208000 }, { "epoch": 12.31, "learning_rate": 4.991374521434112e-06, "loss": 0.2039, "step": 1208100 }, { "epoch": 12.31, "learning_rate": 4.9877111502381845e-06, "loss": 0.2229, "step": 1208200 }, { "epoch": 12.31, "learning_rate": 4.984049001993913e-06, "loss": 0.1642, "step": 1208300 }, { "epoch": 12.31, "learning_rate": 4.9803880768803655e-06, "loss": 0.2086, "step": 1208400 }, { "epoch": 12.31, "learning_rate": 4.976728375076525e-06, "loss": 0.2273, "step": 1208500 }, { "epoch": 12.31, "learning_rate": 4.973069896761334e-06, "loss": 0.212, "step": 1208600 }, { "epoch": 12.31, "learning_rate": 4.969412642113685e-06, "loss": 0.1801, "step": 1208700 }, { "epoch": 12.32, "learning_rate": 4.965756611312369e-06, "loss": 0.2016, "step": 1208800 }, { "epoch": 12.32, "learning_rate": 4.962101804536161e-06, "loss": 0.2255, "step": 1208900 }, { "epoch": 12.32, "learning_rate": 4.9584482219637585e-06, "loss": 0.2341, "step": 1209000 }, { "epoch": 12.32, "learning_rate": 4.9547958637737946e-06, "loss": 0.1781, "step": 1209100 }, { "epoch": 12.32, "learning_rate": 4.951144730144853e-06, "loss": 0.2156, "step": 1209200 }, { "epoch": 12.32, "learning_rate": 4.9474948212554484e-06, "loss": 0.2324, "step": 1209300 }, { "epoch": 12.32, "learning_rate": 4.943846137284051e-06, "loss": 0.1833, "step": 1209400 }, { "epoch": 12.32, "learning_rate": 4.9402351469329746e-06, "loss": 0.1997, "step": 1209500 }, { "epoch": 12.32, "learning_rate": 4.936588901079078e-06, "loss": 0.2267, "step": 1209600 }, { "epoch": 12.32, "learning_rate": 4.932943880676408e-06, "loss": 0.236, "step": 1209700 }, { "epoch": 12.33, "learning_rate": 4.929300085903187e-06, "loss": 0.2224, "step": 1209800 }, { "epoch": 12.33, "learning_rate": 4.925657516937587e-06, "loss": 0.1528, "step": 1209900 }, { "epoch": 12.33, "learning_rate": 4.922016173957691e-06, "loss": 0.2231, "step": 1210000 }, { "epoch": 12.33, "learning_rate": 4.918376057141545e-06, "loss": 0.2014, "step": 1210100 }, { "epoch": 12.33, "learning_rate": 4.914737166667133e-06, "loss": 0.1906, "step": 1210200 }, { "epoch": 12.33, "learning_rate": 4.911099502712372e-06, "loss": 0.2156, "step": 1210300 }, { "epoch": 12.33, "learning_rate": 4.90746306545512e-06, "loss": 0.2098, "step": 1210400 }, { "epoch": 12.33, "learning_rate": 4.90382785507319e-06, "loss": 0.203, "step": 1210500 }, { "epoch": 12.33, "learning_rate": 4.900193871744302e-06, "loss": 0.2511, "step": 1210600 }, { "epoch": 12.33, "learning_rate": 4.8965611156461476e-06, "loss": 0.2519, "step": 1210700 }, { "epoch": 12.34, "learning_rate": 4.892929586956352e-06, "loss": 0.2074, "step": 1210800 }, { "epoch": 12.34, "learning_rate": 4.889299285852462e-06, "loss": 0.2638, "step": 1210900 }, { "epoch": 12.34, "learning_rate": 4.885670212511984e-06, "loss": 0.2433, "step": 1211000 }, { "epoch": 12.34, "learning_rate": 4.882042367112364e-06, "loss": 0.172, "step": 1211100 }, { "epoch": 12.34, "learning_rate": 4.878415749830968e-06, "loss": 0.2011, "step": 1211200 }, { "epoch": 12.34, "learning_rate": 4.874790360845127e-06, "loss": 0.2044, "step": 1211300 }, { "epoch": 12.34, "learning_rate": 4.8711662003320965e-06, "loss": 0.1757, "step": 1211400 }, { "epoch": 12.34, "learning_rate": 4.867543268469074e-06, "loss": 0.2454, "step": 1211500 }, { "epoch": 12.34, "learning_rate": 4.863921565433209e-06, "loss": 0.2564, "step": 1211600 }, { "epoch": 12.35, "learning_rate": 4.860301091401572e-06, "loss": 0.1924, "step": 1211700 }, { "epoch": 12.35, "learning_rate": 4.856681846551196e-06, "loss": 0.239, "step": 1211800 }, { "epoch": 12.35, "learning_rate": 4.85306383105902e-06, "loss": 0.208, "step": 1211900 }, { "epoch": 12.35, "learning_rate": 4.849447045101952e-06, "loss": 0.2724, "step": 1212000 }, { "epoch": 12.35, "learning_rate": 4.845831488856843e-06, "loss": 0.244, "step": 1212100 }, { "epoch": 12.35, "learning_rate": 4.842217162500455e-06, "loss": 0.253, "step": 1212200 }, { "epoch": 12.35, "learning_rate": 4.8386040662095135e-06, "loss": 0.2259, "step": 1212300 }, { "epoch": 12.35, "learning_rate": 4.834992200160686e-06, "loss": 0.1815, "step": 1212400 }, { "epoch": 12.35, "learning_rate": 4.831381564530552e-06, "loss": 0.2084, "step": 1212500 }, { "epoch": 12.35, "learning_rate": 4.827772159495665e-06, "loss": 0.2744, "step": 1212600 }, { "epoch": 12.36, "learning_rate": 4.824200060882236e-06, "loss": 0.2409, "step": 1212700 }, { "epoch": 12.36, "learning_rate": 4.820593105256853e-06, "loss": 0.1888, "step": 1212800 }, { "epoch": 12.36, "learning_rate": 4.816987380754204e-06, "loss": 0.2191, "step": 1212900 }, { "epoch": 12.36, "learning_rate": 4.8133828875505915e-06, "loss": 0.2571, "step": 1213000 }, { "epoch": 12.36, "learning_rate": 4.809779625822247e-06, "loss": 0.2353, "step": 1213100 }, { "epoch": 12.36, "learning_rate": 4.806177595745357e-06, "loss": 0.2062, "step": 1213200 }, { "epoch": 12.36, "learning_rate": 4.80257679749603e-06, "loss": 0.1968, "step": 1213300 }, { "epoch": 12.36, "learning_rate": 4.7989772312503256e-06, "loss": 0.2049, "step": 1213400 }, { "epoch": 12.36, "learning_rate": 4.795378897184249e-06, "loss": 0.2342, "step": 1213500 }, { "epoch": 12.36, "learning_rate": 4.7917817954737275e-06, "loss": 0.2101, "step": 1213600 }, { "epoch": 12.37, "learning_rate": 4.788185926294639e-06, "loss": 0.2385, "step": 1213700 }, { "epoch": 12.37, "learning_rate": 4.784591289822801e-06, "loss": 0.2374, "step": 1213800 }, { "epoch": 12.37, "learning_rate": 4.780997886233981e-06, "loss": 0.2039, "step": 1213900 }, { "epoch": 12.37, "learning_rate": 4.777405715703856e-06, "loss": 0.2387, "step": 1214000 }, { "epoch": 12.37, "learning_rate": 4.773814778408073e-06, "loss": 0.2071, "step": 1214100 }, { "epoch": 12.37, "learning_rate": 4.770225074522204e-06, "loss": 0.1997, "step": 1214200 }, { "epoch": 12.37, "learning_rate": 4.7666366042217655e-06, "loss": 0.2791, "step": 1214300 }, { "epoch": 12.37, "learning_rate": 4.7630493676822105e-06, "loss": 0.2756, "step": 1214400 }, { "epoch": 12.37, "learning_rate": 4.759463365078945e-06, "loss": 0.2188, "step": 1214500 }, { "epoch": 12.37, "learning_rate": 4.755878596587286e-06, "loss": 0.2, "step": 1214600 }, { "epoch": 12.38, "learning_rate": 4.752295062382514e-06, "loss": 0.2411, "step": 1214700 }, { "epoch": 12.38, "learning_rate": 4.74871276263985e-06, "loss": 0.204, "step": 1214800 }, { "epoch": 12.38, "learning_rate": 4.745131697534432e-06, "loss": 0.2263, "step": 1214900 }, { "epoch": 12.38, "learning_rate": 4.7415518672413625e-06, "loss": 0.2215, "step": 1215000 }, { "epoch": 12.38, "learning_rate": 4.7379732719356795e-06, "loss": 0.1962, "step": 1215100 }, { "epoch": 12.38, "learning_rate": 4.734395911792338e-06, "loss": 0.1752, "step": 1215200 }, { "epoch": 12.38, "learning_rate": 4.73081978698626e-06, "loss": 0.2872, "step": 1215300 }, { "epoch": 12.38, "learning_rate": 4.727244897692298e-06, "loss": 0.255, "step": 1215400 }, { "epoch": 12.38, "learning_rate": 4.723671244085238e-06, "loss": 0.1877, "step": 1215500 }, { "epoch": 12.38, "learning_rate": 4.720098826339815e-06, "loss": 0.1911, "step": 1215600 }, { "epoch": 12.39, "learning_rate": 4.716527644630695e-06, "loss": 0.1827, "step": 1215700 }, { "epoch": 12.39, "learning_rate": 4.712957699132498e-06, "loss": 0.1595, "step": 1215800 }, { "epoch": 12.39, "learning_rate": 4.7093889900197565e-06, "loss": 0.2572, "step": 1215900 }, { "epoch": 12.39, "learning_rate": 4.705821517466968e-06, "loss": 0.2585, "step": 1216000 }, { "epoch": 12.39, "learning_rate": 4.702255281648565e-06, "loss": 0.2224, "step": 1216100 }, { "epoch": 12.39, "learning_rate": 4.698725926604726e-06, "loss": 0.2318, "step": 1216200 }, { "epoch": 12.39, "learning_rate": 4.695162152406431e-06, "loss": 0.2037, "step": 1216300 }, { "epoch": 12.39, "learning_rate": 4.6915996154636905e-06, "loss": 0.2826, "step": 1216400 }, { "epoch": 12.39, "learning_rate": 4.688038315950695e-06, "loss": 0.1848, "step": 1216500 }, { "epoch": 12.39, "learning_rate": 4.684478254041583e-06, "loss": 0.2271, "step": 1216600 }, { "epoch": 12.4, "learning_rate": 4.680919429910397e-06, "loss": 0.1999, "step": 1216700 }, { "epoch": 12.4, "learning_rate": 4.67736184373116e-06, "loss": 0.2465, "step": 1216800 }, { "epoch": 12.4, "learning_rate": 4.673805495677806e-06, "loss": 0.2279, "step": 1216900 }, { "epoch": 12.4, "learning_rate": 4.670250385924226e-06, "loss": 0.2228, "step": 1217000 }, { "epoch": 12.4, "learning_rate": 4.666696514644243e-06, "loss": 0.2462, "step": 1217100 }, { "epoch": 12.4, "learning_rate": 4.663143882011625e-06, "loss": 0.1899, "step": 1217200 }, { "epoch": 12.4, "learning_rate": 4.659592488200062e-06, "loss": 0.2102, "step": 1217300 }, { "epoch": 12.4, "learning_rate": 4.656042333383203e-06, "loss": 0.2162, "step": 1217400 }, { "epoch": 12.4, "learning_rate": 4.652493417734638e-06, "loss": 0.1812, "step": 1217500 }, { "epoch": 12.41, "learning_rate": 4.648945741427869e-06, "loss": 0.2153, "step": 1217600 }, { "epoch": 12.41, "learning_rate": 4.645399304636367e-06, "loss": 0.2368, "step": 1217700 }, { "epoch": 12.41, "learning_rate": 4.641854107533531e-06, "loss": 0.191, "step": 1217800 }, { "epoch": 12.41, "learning_rate": 4.638310150292707e-06, "loss": 0.195, "step": 1217900 }, { "epoch": 12.41, "learning_rate": 4.634767433087158e-06, "loss": 0.2204, "step": 1218000 }, { "epoch": 12.41, "learning_rate": 4.631225956090109e-06, "loss": 0.194, "step": 1218100 }, { "epoch": 12.41, "learning_rate": 4.627685719474719e-06, "loss": 0.2484, "step": 1218200 }, { "epoch": 12.41, "learning_rate": 4.62418210723338e-06, "loss": 0.2406, "step": 1218300 }, { "epoch": 12.41, "learning_rate": 4.620644339492399e-06, "loss": 0.2459, "step": 1218400 }, { "epoch": 12.41, "learning_rate": 4.6171078126504554e-06, "loss": 0.2636, "step": 1218500 }, { "epoch": 12.42, "learning_rate": 4.613572526880459e-06, "loss": 0.2096, "step": 1218600 }, { "epoch": 12.42, "learning_rate": 4.610038482355277e-06, "loss": 0.2256, "step": 1218700 }, { "epoch": 12.42, "learning_rate": 4.6065056792476875e-06, "loss": 0.2083, "step": 1218800 }, { "epoch": 12.42, "learning_rate": 4.602974117730432e-06, "loss": 0.2418, "step": 1218900 }, { "epoch": 12.42, "learning_rate": 4.599443797976185e-06, "loss": 0.2429, "step": 1219000 }, { "epoch": 12.42, "learning_rate": 4.595914720157546e-06, "loss": 0.2151, "step": 1219100 }, { "epoch": 12.42, "learning_rate": 4.592386884447076e-06, "loss": 0.2322, "step": 1219200 }, { "epoch": 12.42, "learning_rate": 4.588860291017262e-06, "loss": 0.2162, "step": 1219300 }, { "epoch": 12.42, "learning_rate": 4.585334940040531e-06, "loss": 0.1853, "step": 1219400 }, { "epoch": 12.42, "learning_rate": 4.581810831689259e-06, "loss": 0.2322, "step": 1219500 }, { "epoch": 12.43, "learning_rate": 4.578287966135753e-06, "loss": 0.2736, "step": 1219600 }, { "epoch": 12.43, "learning_rate": 4.574766343552252e-06, "loss": 0.2395, "step": 1219700 }, { "epoch": 12.43, "learning_rate": 4.571245964110945e-06, "loss": 0.2142, "step": 1219800 }, { "epoch": 12.43, "learning_rate": 4.567726827983969e-06, "loss": 0.219, "step": 1219900 }, { "epoch": 12.43, "learning_rate": 4.56420893534337e-06, "loss": 0.2038, "step": 1220000 }, { "epoch": 12.43, "learning_rate": 4.560692286361161e-06, "loss": 0.2089, "step": 1220100 }, { "epoch": 12.43, "learning_rate": 4.557176881209287e-06, "loss": 0.2244, "step": 1220200 }, { "epoch": 12.43, "learning_rate": 4.553662720059635e-06, "loss": 0.2778, "step": 1220300 }, { "epoch": 12.43, "learning_rate": 4.550149803084012e-06, "loss": 0.196, "step": 1220400 }, { "epoch": 12.43, "learning_rate": 4.546638130454187e-06, "loss": 0.24, "step": 1220500 }, { "epoch": 12.44, "learning_rate": 4.543127702341864e-06, "loss": 0.2198, "step": 1220600 }, { "epoch": 12.44, "learning_rate": 4.539618518918673e-06, "loss": 0.226, "step": 1220700 }, { "epoch": 12.44, "learning_rate": 4.536110580356199e-06, "loss": 0.3253, "step": 1220800 }, { "epoch": 12.44, "learning_rate": 4.5326038868259625e-06, "loss": 0.2325, "step": 1220900 }, { "epoch": 12.44, "learning_rate": 4.529098438499406e-06, "loss": 0.213, "step": 1221000 }, { "epoch": 12.44, "learning_rate": 4.525594235547936e-06, "loss": 0.2438, "step": 1221100 }, { "epoch": 12.44, "learning_rate": 4.522091278142892e-06, "loss": 0.2613, "step": 1221200 }, { "epoch": 12.44, "learning_rate": 4.518589566455534e-06, "loss": 0.2278, "step": 1221300 }, { "epoch": 12.44, "learning_rate": 4.515089100657078e-06, "loss": 0.2247, "step": 1221400 }, { "epoch": 12.44, "learning_rate": 4.5115898809186876e-06, "loss": 0.2167, "step": 1221500 }, { "epoch": 12.45, "learning_rate": 4.508091907411438e-06, "loss": 0.2475, "step": 1221600 }, { "epoch": 12.45, "learning_rate": 4.504595180306367e-06, "loss": 0.1793, "step": 1221700 }, { "epoch": 12.45, "learning_rate": 4.501099699774444e-06, "loss": 0.2718, "step": 1221800 }, { "epoch": 12.45, "learning_rate": 4.4976054659865754e-06, "loss": 0.2141, "step": 1221900 }, { "epoch": 12.45, "learning_rate": 4.494112479113608e-06, "loss": 0.2188, "step": 1222000 }, { "epoch": 12.45, "learning_rate": 4.4906207393263285e-06, "loss": 0.1991, "step": 1222100 }, { "epoch": 12.45, "learning_rate": 4.487130246795471e-06, "loss": 0.2193, "step": 1222200 }, { "epoch": 12.45, "learning_rate": 4.483641001691685e-06, "loss": 0.2267, "step": 1222300 }, { "epoch": 12.45, "learning_rate": 4.480153004185581e-06, "loss": 0.2337, "step": 1222400 }, { "epoch": 12.46, "learning_rate": 4.476666254447706e-06, "loss": 0.2332, "step": 1222500 }, { "epoch": 12.46, "learning_rate": 4.473180752648529e-06, "loss": 0.2619, "step": 1222600 }, { "epoch": 12.46, "learning_rate": 4.46973133531668e-06, "loss": 0.2063, "step": 1222700 }, { "epoch": 12.46, "learning_rate": 4.466248317422476e-06, "loss": 0.2533, "step": 1222800 }, { "epoch": 12.46, "learning_rate": 4.462766547976353e-06, "loss": 0.2569, "step": 1222900 }, { "epoch": 12.46, "learning_rate": 4.459286027148551e-06, "loss": 0.2066, "step": 1223000 }, { "epoch": 12.46, "learning_rate": 4.4558067551092366e-06, "loss": 0.2132, "step": 1223100 }, { "epoch": 12.46, "learning_rate": 4.452328732028534e-06, "loss": 0.1798, "step": 1223200 }, { "epoch": 12.46, "learning_rate": 4.448851958076497e-06, "loss": 0.2261, "step": 1223300 }, { "epoch": 12.46, "learning_rate": 4.445376433423113e-06, "loss": 0.2648, "step": 1223400 }, { "epoch": 12.47, "learning_rate": 4.441902158238325e-06, "loss": 0.2231, "step": 1223500 }, { "epoch": 12.47, "learning_rate": 4.438429132692002e-06, "loss": 0.2391, "step": 1223600 }, { "epoch": 12.47, "learning_rate": 4.4349573569539435e-06, "loss": 0.1969, "step": 1223700 }, { "epoch": 12.47, "learning_rate": 4.43148683119391e-06, "loss": 0.2293, "step": 1223800 }, { "epoch": 12.47, "learning_rate": 4.428017555581587e-06, "loss": 0.198, "step": 1223900 }, { "epoch": 12.47, "learning_rate": 4.424549530286598e-06, "loss": 0.2304, "step": 1224000 }, { "epoch": 12.47, "learning_rate": 4.421082755478509e-06, "loss": 0.2181, "step": 1224100 }, { "epoch": 12.47, "learning_rate": 4.417617231326833e-06, "loss": 0.2346, "step": 1224200 }, { "epoch": 12.47, "learning_rate": 4.4141529580009985e-06, "loss": 0.2787, "step": 1224300 }, { "epoch": 12.47, "learning_rate": 4.410689935670396e-06, "loss": 0.2352, "step": 1224400 }, { "epoch": 12.48, "learning_rate": 4.407228164504348e-06, "loss": 0.2453, "step": 1224500 }, { "epoch": 12.48, "learning_rate": 4.403767644672113e-06, "loss": 0.1951, "step": 1224600 }, { "epoch": 12.48, "learning_rate": 4.40030837634289e-06, "loss": 0.2303, "step": 1224700 }, { "epoch": 12.48, "learning_rate": 4.396850359685816e-06, "loss": 0.1946, "step": 1224800 }, { "epoch": 12.48, "learning_rate": 4.393393594869975e-06, "loss": 0.2173, "step": 1224900 }, { "epoch": 12.48, "learning_rate": 4.389938082064368e-06, "loss": 0.1811, "step": 1225000 }, { "epoch": 12.48, "learning_rate": 4.386483821437957e-06, "loss": 0.2216, "step": 1225100 }, { "epoch": 12.48, "learning_rate": 4.383030813159638e-06, "loss": 0.2125, "step": 1225200 }, { "epoch": 12.48, "learning_rate": 4.379579057398232e-06, "loss": 0.2275, "step": 1225300 }, { "epoch": 12.48, "learning_rate": 4.376128554322518e-06, "loss": 0.2449, "step": 1225400 }, { "epoch": 12.49, "learning_rate": 4.372679304101208e-06, "loss": 0.2773, "step": 1225500 }, { "epoch": 12.49, "learning_rate": 4.3692313069029366e-06, "loss": 0.2104, "step": 1225600 }, { "epoch": 12.49, "learning_rate": 4.365784562896296e-06, "loss": 0.2496, "step": 1225700 }, { "epoch": 12.49, "learning_rate": 4.362339072249817e-06, "loss": 0.2082, "step": 1225800 }, { "epoch": 12.49, "learning_rate": 4.358894835131956e-06, "loss": 0.1835, "step": 1225900 }, { "epoch": 12.49, "learning_rate": 4.3554518517111206e-06, "loss": 0.256, "step": 1226000 }, { "epoch": 12.49, "learning_rate": 4.352010122155654e-06, "loss": 0.2591, "step": 1226100 }, { "epoch": 12.49, "learning_rate": 4.348569646633838e-06, "loss": 0.2852, "step": 1226200 }, { "epoch": 12.49, "learning_rate": 4.34513042531388e-06, "loss": 0.285, "step": 1226300 }, { "epoch": 12.49, "learning_rate": 4.341692458363944e-06, "loss": 0.2228, "step": 1226400 }, { "epoch": 12.5, "learning_rate": 4.3382557459521314e-06, "loss": 0.1911, "step": 1226500 }, { "epoch": 12.5, "learning_rate": 4.334820288246467e-06, "loss": 0.174, "step": 1226600 }, { "epoch": 12.5, "learning_rate": 4.331386085414924e-06, "loss": 0.2197, "step": 1226700 }, { "epoch": 12.5, "learning_rate": 4.327987460890313e-06, "loss": 0.2346, "step": 1226800 }, { "epoch": 12.5, "learning_rate": 4.324555755757771e-06, "loss": 0.2029, "step": 1226900 }, { "epoch": 12.5, "learning_rate": 4.321125306001229e-06, "loss": 0.1936, "step": 1227000 }, { "epoch": 12.5, "learning_rate": 4.317696111788416e-06, "loss": 0.2335, "step": 1227100 }, { "epoch": 12.5, "learning_rate": 4.314268173287002e-06, "loss": 0.2381, "step": 1227200 }, { "epoch": 12.5, "learning_rate": 4.3108414906645946e-06, "loss": 0.2339, "step": 1227300 }, { "epoch": 12.5, "learning_rate": 4.307416064088724e-06, "loss": 0.215, "step": 1227400 }, { "epoch": 12.51, "learning_rate": 4.303991893726886e-06, "loss": 0.2418, "step": 1227500 }, { "epoch": 12.51, "learning_rate": 4.300568979746504e-06, "loss": 0.2067, "step": 1227600 }, { "epoch": 12.51, "learning_rate": 4.297147322314927e-06, "loss": 0.1888, "step": 1227700 }, { "epoch": 12.51, "learning_rate": 4.293761119385321e-06, "loss": 0.2111, "step": 1227800 }, { "epoch": 12.51, "learning_rate": 4.2903419629835406e-06, "loss": 0.2542, "step": 1227900 }, { "epoch": 12.51, "learning_rate": 4.286924063630605e-06, "loss": 0.2368, "step": 1228000 }, { "epoch": 12.51, "learning_rate": 4.283507421493648e-06, "loss": 0.2724, "step": 1228100 }, { "epoch": 12.51, "learning_rate": 4.280092036739698e-06, "loss": 0.2585, "step": 1228200 }, { "epoch": 12.51, "learning_rate": 4.2767120445823785e-06, "loss": 0.2619, "step": 1228300 }, { "epoch": 12.52, "learning_rate": 4.273299162517388e-06, "loss": 0.2783, "step": 1228400 }, { "epoch": 12.52, "learning_rate": 4.269887538334539e-06, "loss": 0.1975, "step": 1228500 }, { "epoch": 12.52, "learning_rate": 4.2664771722006455e-06, "loss": 0.214, "step": 1228600 }, { "epoch": 12.52, "learning_rate": 4.2630680642824475e-06, "loss": 0.1948, "step": 1228700 }, { "epoch": 12.52, "learning_rate": 4.2596602147466265e-06, "loss": 0.2242, "step": 1228800 }, { "epoch": 12.52, "learning_rate": 4.2562536237598184e-06, "loss": 0.206, "step": 1228900 }, { "epoch": 12.52, "learning_rate": 4.252848291488568e-06, "loss": 0.2079, "step": 1229000 }, { "epoch": 12.52, "learning_rate": 4.24944421809939e-06, "loss": 0.2565, "step": 1229100 }, { "epoch": 12.52, "learning_rate": 4.246041403758724e-06, "loss": 0.2417, "step": 1229200 }, { "epoch": 12.52, "learning_rate": 4.242639848632933e-06, "loss": 0.2478, "step": 1229300 }, { "epoch": 12.53, "learning_rate": 4.239239552888342e-06, "loss": 0.22, "step": 1229400 }, { "epoch": 12.53, "learning_rate": 4.23584051669121e-06, "loss": 0.1878, "step": 1229500 }, { "epoch": 12.53, "learning_rate": 4.232442740207716e-06, "loss": 0.2163, "step": 1229600 }, { "epoch": 12.53, "learning_rate": 4.2290462236040016e-06, "loss": 0.2526, "step": 1229700 }, { "epoch": 12.53, "learning_rate": 4.225650967046135e-06, "loss": 0.2172, "step": 1229800 }, { "epoch": 12.53, "learning_rate": 4.22225697070013e-06, "loss": 0.252, "step": 1229900 }, { "epoch": 12.53, "learning_rate": 4.218864234731917e-06, "loss": 0.194, "step": 1230000 }, { "epoch": 12.53, "learning_rate": 4.2154727593073905e-06, "loss": 0.2067, "step": 1230100 }, { "epoch": 12.53, "learning_rate": 4.21208254459238e-06, "loss": 0.1969, "step": 1230200 }, { "epoch": 12.53, "learning_rate": 4.208693590752631e-06, "loss": 0.2371, "step": 1230300 }, { "epoch": 12.54, "learning_rate": 4.2053058979538505e-06, "loss": 0.2593, "step": 1230400 }, { "epoch": 12.54, "learning_rate": 4.201919466361685e-06, "loss": 0.2375, "step": 1230500 }, { "epoch": 12.54, "learning_rate": 4.198534296141694e-06, "loss": 0.2355, "step": 1230600 }, { "epoch": 12.54, "learning_rate": 4.195150387459403e-06, "loss": 0.1997, "step": 1230700 }, { "epoch": 12.54, "learning_rate": 4.1917677404802614e-06, "loss": 0.2622, "step": 1230800 }, { "epoch": 12.54, "learning_rate": 4.188386355369662e-06, "loss": 0.2256, "step": 1230900 }, { "epoch": 12.54, "learning_rate": 4.185006232292934e-06, "loss": 0.2531, "step": 1231000 }, { "epoch": 12.54, "learning_rate": 4.1816273714153504e-06, "loss": 0.2901, "step": 1231100 }, { "epoch": 12.54, "learning_rate": 4.178249772902108e-06, "loss": 0.2242, "step": 1231200 }, { "epoch": 12.54, "learning_rate": 4.174873436918355e-06, "loss": 0.2488, "step": 1231300 }, { "epoch": 12.55, "learning_rate": 4.171498363629175e-06, "loss": 0.255, "step": 1231400 }, { "epoch": 12.55, "learning_rate": 4.168124553199595e-06, "loss": 0.1909, "step": 1231500 }, { "epoch": 12.55, "learning_rate": 4.164752005794563e-06, "loss": 0.2557, "step": 1231600 }, { "epoch": 12.55, "learning_rate": 4.161380721578978e-06, "loss": 0.2289, "step": 1231700 }, { "epoch": 12.55, "learning_rate": 4.158010700717687e-06, "loss": 0.2493, "step": 1231800 }, { "epoch": 12.55, "learning_rate": 4.154641943375449e-06, "loss": 0.209, "step": 1231900 }, { "epoch": 12.55, "learning_rate": 4.1512744497169815e-06, "loss": 0.2151, "step": 1232000 }, { "epoch": 12.55, "learning_rate": 4.147908219906941e-06, "loss": 0.2183, "step": 1232100 }, { "epoch": 12.55, "learning_rate": 4.144543254109907e-06, "loss": 0.2208, "step": 1232200 }, { "epoch": 12.55, "learning_rate": 4.14117955249041e-06, "loss": 0.2033, "step": 1232300 }, { "epoch": 12.56, "learning_rate": 4.137817115212926e-06, "loss": 0.2374, "step": 1232400 }, { "epoch": 12.56, "learning_rate": 4.134455942441839e-06, "loss": 0.2233, "step": 1232500 }, { "epoch": 12.56, "learning_rate": 4.131096034341497e-06, "loss": 0.2232, "step": 1232600 }, { "epoch": 12.56, "learning_rate": 4.127770971247369e-06, "loss": 0.2309, "step": 1232700 }, { "epoch": 12.56, "learning_rate": 4.1244135803304915e-06, "loss": 0.2124, "step": 1232800 }, { "epoch": 12.56, "learning_rate": 4.121057454575383e-06, "loss": 0.2514, "step": 1232900 }, { "epoch": 12.56, "learning_rate": 4.1177025941461185e-06, "loss": 0.2397, "step": 1233000 }, { "epoch": 12.56, "learning_rate": 4.1143489992067405e-06, "loss": 0.2597, "step": 1233100 }, { "epoch": 12.56, "learning_rate": 4.110996669921228e-06, "loss": 0.2075, "step": 1233200 }, { "epoch": 12.57, "learning_rate": 4.107645606453472e-06, "loss": 0.2468, "step": 1233300 }, { "epoch": 12.57, "learning_rate": 4.104295808967326e-06, "loss": 0.2435, "step": 1233400 }, { "epoch": 12.57, "learning_rate": 4.100947277626581e-06, "loss": 0.2433, "step": 1233500 }, { "epoch": 12.57, "learning_rate": 4.097600012594954e-06, "loss": 0.3267, "step": 1233600 }, { "epoch": 12.57, "learning_rate": 4.094254014036111e-06, "loss": 0.2352, "step": 1233700 }, { "epoch": 12.57, "learning_rate": 4.0909092821136495e-06, "loss": 0.1993, "step": 1233800 }, { "epoch": 12.57, "learning_rate": 4.087565816991113e-06, "loss": 0.2072, "step": 1233900 }, { "epoch": 12.57, "learning_rate": 4.084223618831961e-06, "loss": 0.2152, "step": 1234000 }, { "epoch": 12.57, "learning_rate": 4.08088268779962e-06, "loss": 0.2207, "step": 1234100 }, { "epoch": 12.57, "learning_rate": 4.077543024057444e-06, "loss": 0.2289, "step": 1234200 }, { "epoch": 12.58, "learning_rate": 4.07420462776871e-06, "loss": 0.2025, "step": 1234300 }, { "epoch": 12.58, "learning_rate": 4.070867499096655e-06, "loss": 0.2608, "step": 1234400 }, { "epoch": 12.58, "learning_rate": 4.067531638204448e-06, "loss": 0.2107, "step": 1234500 }, { "epoch": 12.58, "learning_rate": 4.064197045255178e-06, "loss": 0.2346, "step": 1234600 }, { "epoch": 12.58, "learning_rate": 4.060863720411898e-06, "loss": 0.2021, "step": 1234700 }, { "epoch": 12.58, "learning_rate": 4.057531663837585e-06, "loss": 0.2101, "step": 1234800 }, { "epoch": 12.58, "learning_rate": 4.054200875695157e-06, "loss": 0.1836, "step": 1234900 }, { "epoch": 12.58, "learning_rate": 4.05087135614747e-06, "loss": 0.22, "step": 1235000 }, { "epoch": 12.58, "learning_rate": 4.047543105357324e-06, "loss": 0.2208, "step": 1235100 }, { "epoch": 12.58, "learning_rate": 4.044216123487437e-06, "loss": 0.2185, "step": 1235200 }, { "epoch": 12.59, "learning_rate": 4.040890410700486e-06, "loss": 0.22, "step": 1235300 }, { "epoch": 12.59, "learning_rate": 4.037565967159084e-06, "loss": 0.2587, "step": 1235400 }, { "epoch": 12.59, "learning_rate": 4.034242793025763e-06, "loss": 0.1732, "step": 1235500 }, { "epoch": 12.59, "learning_rate": 4.030920888463015e-06, "loss": 0.1684, "step": 1235600 }, { "epoch": 12.59, "learning_rate": 4.0276002536332615e-06, "loss": 0.1287, "step": 1235700 }, { "epoch": 12.59, "learning_rate": 4.024280888698867e-06, "loss": 0.2497, "step": 1235800 }, { "epoch": 12.59, "learning_rate": 4.020962793822112e-06, "loss": 0.2051, "step": 1235900 }, { "epoch": 12.59, "learning_rate": 4.017679131123695e-06, "loss": 0.2319, "step": 1236000 }, { "epoch": 12.59, "learning_rate": 4.014363564144261e-06, "loss": 0.2176, "step": 1236100 }, { "epoch": 12.59, "learning_rate": 4.011049267707375e-06, "loss": 0.2349, "step": 1236200 }, { "epoch": 12.6, "learning_rate": 4.007736241975088e-06, "loss": 0.2112, "step": 1236300 }, { "epoch": 12.6, "learning_rate": 4.004424487109381e-06, "loss": 0.2135, "step": 1236400 }, { "epoch": 12.6, "learning_rate": 4.0011140032721845e-06, "loss": 0.2174, "step": 1236500 }, { "epoch": 12.6, "learning_rate": 3.997804790625368e-06, "loss": 0.2524, "step": 1236600 }, { "epoch": 12.6, "learning_rate": 3.994496849330714e-06, "loss": 0.2514, "step": 1236700 }, { "epoch": 12.6, "learning_rate": 3.991190179549975e-06, "loss": 0.2244, "step": 1236800 }, { "epoch": 12.6, "learning_rate": 3.9878847814448305e-06, "loss": 0.1802, "step": 1236900 }, { "epoch": 12.6, "learning_rate": 3.984580655176878e-06, "loss": 0.1939, "step": 1237000 }, { "epoch": 12.6, "learning_rate": 3.9812778009076825e-06, "loss": 0.2233, "step": 1237100 }, { "epoch": 12.6, "learning_rate": 3.977976218798737e-06, "loss": 0.2191, "step": 1237200 }, { "epoch": 12.61, "learning_rate": 3.974675909011456e-06, "loss": 0.2554, "step": 1237300 }, { "epoch": 12.61, "learning_rate": 3.971376871707211e-06, "loss": 0.2518, "step": 1237400 }, { "epoch": 12.61, "learning_rate": 3.968079107047309e-06, "loss": 0.2438, "step": 1237500 }, { "epoch": 12.61, "learning_rate": 3.9647826151929845e-06, "loss": 0.2781, "step": 1237600 }, { "epoch": 12.61, "learning_rate": 3.961487396305422e-06, "loss": 0.2082, "step": 1237700 }, { "epoch": 12.61, "learning_rate": 3.958193450545743e-06, "loss": 0.2189, "step": 1237800 }, { "epoch": 12.61, "learning_rate": 3.954900778074991e-06, "loss": 0.2311, "step": 1237900 }, { "epoch": 12.61, "learning_rate": 3.951609379054158e-06, "loss": 0.2088, "step": 1238000 }, { "epoch": 12.61, "learning_rate": 3.948319253644179e-06, "loss": 0.2479, "step": 1238100 }, { "epoch": 12.61, "learning_rate": 3.945030402005928e-06, "loss": 0.2011, "step": 1238200 }, { "epoch": 12.62, "learning_rate": 3.941742824300193e-06, "loss": 0.2212, "step": 1238300 }, { "epoch": 12.62, "learning_rate": 3.938456520687728e-06, "loss": 0.2395, "step": 1238400 }, { "epoch": 12.62, "learning_rate": 3.935204335314715e-06, "loss": 0.2501, "step": 1238500 }, { "epoch": 12.62, "learning_rate": 3.931920567625825e-06, "loss": 0.2087, "step": 1238600 }, { "epoch": 12.62, "learning_rate": 3.928638074510456e-06, "loss": 0.2407, "step": 1238700 }, { "epoch": 12.62, "learning_rate": 3.925356856129098e-06, "loss": 0.2344, "step": 1238800 }, { "epoch": 12.62, "learning_rate": 3.922076912642183e-06, "loss": 0.2266, "step": 1238900 }, { "epoch": 12.62, "learning_rate": 3.918798244210087e-06, "loss": 0.1954, "step": 1239000 }, { "epoch": 12.62, "learning_rate": 3.915520850993107e-06, "loss": 0.2628, "step": 1239100 }, { "epoch": 12.63, "learning_rate": 3.912244733151497e-06, "loss": 0.2627, "step": 1239200 }, { "epoch": 12.63, "learning_rate": 3.90896989084544e-06, "loss": 0.2336, "step": 1239300 }, { "epoch": 12.63, "learning_rate": 3.905696324235045e-06, "loss": 0.1922, "step": 1239400 }, { "epoch": 12.63, "learning_rate": 3.902424033480378e-06, "loss": 0.2303, "step": 1239500 }, { "epoch": 12.63, "learning_rate": 3.899153018741438e-06, "loss": 0.2029, "step": 1239600 }, { "epoch": 12.63, "learning_rate": 3.895883280178149e-06, "loss": 0.267, "step": 1239700 }, { "epoch": 12.63, "learning_rate": 3.892614817950385e-06, "loss": 0.207, "step": 1239800 }, { "epoch": 12.63, "learning_rate": 3.88934763221796e-06, "loss": 0.2342, "step": 1239900 }, { "epoch": 12.63, "learning_rate": 3.88608172314061e-06, "loss": 0.2576, "step": 1240000 }, { "epoch": 12.63, "learning_rate": 3.882817090878027e-06, "loss": 0.1877, "step": 1240100 }, { "epoch": 12.64, "learning_rate": 3.879553735589837e-06, "loss": 0.2317, "step": 1240200 }, { "epoch": 12.64, "learning_rate": 3.876291657435579e-06, "loss": 0.2257, "step": 1240300 }, { "epoch": 12.64, "learning_rate": 3.873030856574765e-06, "loss": 0.2293, "step": 1240400 }, { "epoch": 12.64, "learning_rate": 3.869771333166825e-06, "loss": 0.2207, "step": 1240500 }, { "epoch": 12.64, "learning_rate": 3.866513087371134e-06, "loss": 0.209, "step": 1240600 }, { "epoch": 12.64, "learning_rate": 3.863256119346992e-06, "loss": 0.2303, "step": 1240700 }, { "epoch": 12.64, "learning_rate": 3.8600004292536475e-06, "loss": 0.2329, "step": 1240800 }, { "epoch": 12.64, "learning_rate": 3.856746017250293e-06, "loss": 0.2276, "step": 1240900 }, { "epoch": 12.64, "learning_rate": 3.85349288349604e-06, "loss": 0.1865, "step": 1241000 }, { "epoch": 12.64, "learning_rate": 3.850241028149948e-06, "loss": 0.2043, "step": 1241100 }, { "epoch": 12.65, "learning_rate": 3.846990451371018e-06, "loss": 0.1961, "step": 1241200 }, { "epoch": 12.65, "learning_rate": 3.843741153318181e-06, "loss": 0.1672, "step": 1241300 }, { "epoch": 12.65, "learning_rate": 3.840493134150308e-06, "loss": 0.2048, "step": 1241400 }, { "epoch": 12.65, "learning_rate": 3.837246394026216e-06, "loss": 0.2019, "step": 1241500 }, { "epoch": 12.65, "learning_rate": 3.834000933104641e-06, "loss": 0.2755, "step": 1241600 }, { "epoch": 12.65, "learning_rate": 3.830756751544266e-06, "loss": 0.2129, "step": 1241700 }, { "epoch": 12.65, "learning_rate": 3.827513849503725e-06, "loss": 0.203, "step": 1241800 }, { "epoch": 12.65, "learning_rate": 3.824272227141558e-06, "loss": 0.1997, "step": 1241900 }, { "epoch": 12.65, "learning_rate": 3.821031884616273e-06, "loss": 0.2363, "step": 1242000 }, { "epoch": 12.65, "learning_rate": 3.8178252063751054e-06, "loss": 0.2274, "step": 1242100 }, { "epoch": 12.66, "learning_rate": 3.8145874111964973e-06, "loss": 0.2227, "step": 1242200 }, { "epoch": 12.66, "learning_rate": 3.811350896328304e-06, "loss": 0.2277, "step": 1242300 }, { "epoch": 12.66, "learning_rate": 3.8081156619287585e-06, "loss": 0.2135, "step": 1242400 }, { "epoch": 12.66, "learning_rate": 3.804881708156054e-06, "loss": 0.222, "step": 1242500 }, { "epoch": 12.66, "learning_rate": 3.8016490351683074e-06, "loss": 0.2122, "step": 1242600 }, { "epoch": 12.66, "learning_rate": 3.7984176431235796e-06, "loss": 0.2455, "step": 1242700 }, { "epoch": 12.66, "learning_rate": 3.795187532179867e-06, "loss": 0.196, "step": 1242800 }, { "epoch": 12.66, "learning_rate": 3.791958702495103e-06, "loss": 0.2124, "step": 1242900 }, { "epoch": 12.66, "learning_rate": 3.788731154227162e-06, "loss": 0.1895, "step": 1243000 }, { "epoch": 12.66, "learning_rate": 3.7855048875338405e-06, "loss": 0.263, "step": 1243100 }, { "epoch": 12.67, "learning_rate": 3.7822799025728894e-06, "loss": 0.2398, "step": 1243200 }, { "epoch": 12.67, "learning_rate": 3.779056199501999e-06, "loss": 0.1734, "step": 1243300 }, { "epoch": 12.67, "learning_rate": 3.7758337784787767e-06, "loss": 0.2823, "step": 1243400 }, { "epoch": 12.67, "learning_rate": 3.7726126396607864e-06, "loss": 0.2396, "step": 1243500 }, { "epoch": 12.67, "learning_rate": 3.7693927832055254e-06, "loss": 0.1879, "step": 1243600 }, { "epoch": 12.67, "learning_rate": 3.7661742092704144e-06, "loss": 0.247, "step": 1243700 }, { "epoch": 12.67, "learning_rate": 3.7629569180128275e-06, "loss": 0.2212, "step": 1243800 }, { "epoch": 12.67, "learning_rate": 3.759740909590076e-06, "loss": 0.2027, "step": 1243900 }, { "epoch": 12.67, "learning_rate": 3.7565261841594002e-06, "loss": 0.2104, "step": 1244000 }, { "epoch": 12.68, "learning_rate": 3.753312741877981e-06, "loss": 0.2023, "step": 1244100 }, { "epoch": 12.68, "learning_rate": 3.75010058290294e-06, "loss": 0.2484, "step": 1244200 }, { "epoch": 12.68, "learning_rate": 3.7469218097927816e-06, "loss": 0.1999, "step": 1244300 }, { "epoch": 12.68, "learning_rate": 3.7437122050646146e-06, "loss": 0.1633, "step": 1244400 }, { "epoch": 12.68, "learning_rate": 3.7405038841122265e-06, "loss": 0.2387, "step": 1244500 }, { "epoch": 12.68, "learning_rate": 3.7372968470924883e-06, "loss": 0.273, "step": 1244600 }, { "epoch": 12.68, "learning_rate": 3.734091094162212e-06, "loss": 0.2182, "step": 1244700 }, { "epoch": 12.68, "learning_rate": 3.7308866254781317e-06, "loss": 0.1834, "step": 1244800 }, { "epoch": 12.68, "learning_rate": 3.7276834411969253e-06, "loss": 0.2366, "step": 1244900 }, { "epoch": 12.68, "learning_rate": 3.7244815414752176e-06, "loss": 0.2192, "step": 1245000 }, { "epoch": 12.69, "learning_rate": 3.721280926469557e-06, "loss": 0.195, "step": 1245100 }, { "epoch": 12.69, "learning_rate": 3.7180815963364345e-06, "loss": 0.2124, "step": 1245200 }, { "epoch": 12.69, "learning_rate": 3.714883551232282e-06, "loss": 0.2394, "step": 1245300 }, { "epoch": 12.69, "learning_rate": 3.711686791313468e-06, "loss": 0.2343, "step": 1245400 }, { "epoch": 12.69, "learning_rate": 3.7084913167362834e-06, "loss": 0.2312, "step": 1245500 }, { "epoch": 12.69, "learning_rate": 3.7052971276569738e-06, "loss": 0.202, "step": 1245600 }, { "epoch": 12.69, "learning_rate": 3.7021042242317204e-06, "loss": 0.218, "step": 1245700 }, { "epoch": 12.69, "learning_rate": 3.698912606616629e-06, "loss": 0.2002, "step": 1245800 }, { "epoch": 12.69, "learning_rate": 3.6957541719181987e-06, "loss": 0.238, "step": 1245900 }, { "epoch": 12.69, "learning_rate": 3.6925651135295345e-06, "loss": 0.2071, "step": 1246000 }, { "epoch": 12.7, "learning_rate": 3.6893773414174425e-06, "loss": 0.1863, "step": 1246100 }, { "epoch": 12.7, "learning_rate": 3.686190855737791e-06, "loss": 0.1949, "step": 1246200 }, { "epoch": 12.7, "learning_rate": 3.6830056566463657e-06, "loss": 0.2641, "step": 1246300 }, { "epoch": 12.7, "learning_rate": 3.6798217442989145e-06, "loss": 0.2636, "step": 1246400 }, { "epoch": 12.7, "learning_rate": 3.676639118851107e-06, "loss": 0.2425, "step": 1246500 }, { "epoch": 12.7, "learning_rate": 3.673457780458559e-06, "loss": 0.2632, "step": 1246600 }, { "epoch": 12.7, "learning_rate": 3.6702777292768153e-06, "loss": 0.2242, "step": 1246700 }, { "epoch": 12.7, "learning_rate": 3.6670989654613686e-06, "loss": 0.2938, "step": 1246800 }, { "epoch": 12.7, "learning_rate": 3.6639214891676343e-06, "loss": 0.2096, "step": 1246900 }, { "epoch": 12.7, "learning_rate": 3.660745300550968e-06, "loss": 0.2146, "step": 1247000 }, { "epoch": 12.71, "learning_rate": 3.657570399766683e-06, "loss": 0.2656, "step": 1247100 }, { "epoch": 12.71, "learning_rate": 3.6543967869699947e-06, "loss": 0.2005, "step": 1247200 }, { "epoch": 12.71, "learning_rate": 3.6512244623160783e-06, "loss": 0.2437, "step": 1247300 }, { "epoch": 12.71, "learning_rate": 3.648053425960054e-06, "loss": 0.1781, "step": 1247400 }, { "epoch": 12.71, "learning_rate": 3.644883678056947e-06, "loss": 0.2044, "step": 1247500 }, { "epoch": 12.71, "learning_rate": 3.6417152187617542e-06, "loss": 0.3021, "step": 1247600 }, { "epoch": 12.71, "learning_rate": 3.6385480482293844e-06, "loss": 0.1454, "step": 1247700 }, { "epoch": 12.71, "learning_rate": 3.6353821666147e-06, "loss": 0.2472, "step": 1247800 }, { "epoch": 12.71, "learning_rate": 3.6322175740724918e-06, "loss": 0.2748, "step": 1247900 }, { "epoch": 12.71, "learning_rate": 3.6290542707574914e-06, "loss": 0.2055, "step": 1248000 }, { "epoch": 12.72, "learning_rate": 3.6258922568243657e-06, "loss": 0.257, "step": 1248100 }, { "epoch": 12.72, "learning_rate": 3.6227315324277145e-06, "loss": 0.2552, "step": 1248200 }, { "epoch": 12.72, "learning_rate": 3.619572097722077e-06, "loss": 0.2261, "step": 1248300 }, { "epoch": 12.72, "learning_rate": 3.61641395286194e-06, "loss": 0.2173, "step": 1248400 }, { "epoch": 12.72, "learning_rate": 3.6132570980017066e-06, "loss": 0.1773, "step": 1248500 }, { "epoch": 12.72, "learning_rate": 3.6101015332957334e-06, "loss": 0.2502, "step": 1248600 }, { "epoch": 12.72, "learning_rate": 3.6069472588983134e-06, "loss": 0.2122, "step": 1248700 }, { "epoch": 12.72, "learning_rate": 3.6037942749636633e-06, "loss": 0.2397, "step": 1248800 }, { "epoch": 12.72, "learning_rate": 3.6006425816459466e-06, "loss": 0.225, "step": 1248900 }, { "epoch": 12.73, "learning_rate": 3.5974921790992667e-06, "loss": 0.2706, "step": 1249000 }, { "epoch": 12.73, "learning_rate": 3.5943430674776534e-06, "loss": 0.2203, "step": 1249100 }, { "epoch": 12.73, "learning_rate": 3.5911952469350875e-06, "loss": 0.2058, "step": 1249200 }, { "epoch": 12.73, "learning_rate": 3.588048717625472e-06, "loss": 0.2383, "step": 1249300 }, { "epoch": 12.73, "learning_rate": 3.5849034797026636e-06, "loss": 0.2433, "step": 1249400 }, { "epoch": 12.73, "learning_rate": 3.5817595333204302e-06, "loss": 0.1825, "step": 1249500 }, { "epoch": 12.73, "learning_rate": 3.5786168786325015e-06, "loss": 0.2216, "step": 1249600 }, { "epoch": 12.73, "learning_rate": 3.575475515792538e-06, "loss": 0.2414, "step": 1249700 }, { "epoch": 12.73, "learning_rate": 3.5723354449541237e-06, "loss": 0.2186, "step": 1249800 }, { "epoch": 12.73, "learning_rate": 3.569196666270792e-06, "loss": 0.2087, "step": 1249900 }, { "epoch": 12.74, "learning_rate": 3.5660591798960206e-06, "loss": 0.1899, "step": 1250000 }, { "epoch": 12.74, "learning_rate": 3.562922985983197e-06, "loss": 0.207, "step": 1250100 }, { "epoch": 12.74, "learning_rate": 3.5597880846856746e-06, "loss": 0.2218, "step": 1250200 }, { "epoch": 12.74, "learning_rate": 3.556654476156724e-06, "loss": 0.2185, "step": 1250300 }, { "epoch": 12.74, "learning_rate": 3.553522160549567e-06, "loss": 0.2171, "step": 1250400 }, { "epoch": 12.74, "learning_rate": 3.5503911380173504e-06, "loss": 0.2632, "step": 1250500 }, { "epoch": 12.74, "learning_rate": 3.5472614087131718e-06, "loss": 0.2367, "step": 1250600 }, { "epoch": 12.74, "learning_rate": 3.544132972790042e-06, "loss": 0.202, "step": 1250700 }, { "epoch": 12.74, "learning_rate": 3.5410058304009285e-06, "loss": 0.2392, "step": 1250800 }, { "epoch": 12.74, "learning_rate": 3.5378799816987295e-06, "loss": 0.2399, "step": 1250900 }, { "epoch": 12.75, "learning_rate": 3.5347554268362925e-06, "loss": 0.1953, "step": 1251000 }, { "epoch": 12.75, "learning_rate": 3.5316321659663688e-06, "loss": 0.2697, "step": 1251100 }, { "epoch": 12.75, "learning_rate": 3.5285101992416793e-06, "loss": 0.2254, "step": 1251200 }, { "epoch": 12.75, "learning_rate": 3.5253895268148716e-06, "loss": 0.222, "step": 1251300 }, { "epoch": 12.75, "learning_rate": 3.5223013362102496e-06, "loss": 0.2022, "step": 1251400 }, { "epoch": 12.75, "learning_rate": 3.51918323989009e-06, "loss": 0.1915, "step": 1251500 }, { "epoch": 12.75, "learning_rate": 3.5160664383238416e-06, "loss": 0.2044, "step": 1251600 }, { "epoch": 12.75, "learning_rate": 3.5129509316638986e-06, "loss": 0.2395, "step": 1251700 }, { "epoch": 12.75, "learning_rate": 3.5098367200625924e-06, "loss": 0.221, "step": 1251800 }, { "epoch": 12.75, "learning_rate": 3.506754926424294e-06, "loss": 0.1924, "step": 1251900 }, { "epoch": 12.76, "learning_rate": 3.5036432924425986e-06, "loss": 0.1737, "step": 1252000 }, { "epoch": 12.76, "learning_rate": 3.5005329539746234e-06, "loss": 0.2119, "step": 1252100 }, { "epoch": 12.76, "learning_rate": 3.4974239111724493e-06, "loss": 0.2421, "step": 1252200 }, { "epoch": 12.76, "learning_rate": 3.4943161641880916e-06, "loss": 0.2157, "step": 1252300 }, { "epoch": 12.76, "learning_rate": 3.491209713173489e-06, "loss": 0.2019, "step": 1252400 }, { "epoch": 12.76, "learning_rate": 3.488104558280536e-06, "loss": 0.2264, "step": 1252500 }, { "epoch": 12.76, "learning_rate": 3.4850006996610615e-06, "loss": 0.209, "step": 1252600 }, { "epoch": 12.76, "learning_rate": 3.4818981374668147e-06, "loss": 0.2053, "step": 1252700 }, { "epoch": 12.76, "learning_rate": 3.4787968718494968e-06, "loss": 0.2242, "step": 1252800 }, { "epoch": 12.76, "learning_rate": 3.47569690296074e-06, "loss": 0.2207, "step": 1252900 }, { "epoch": 12.77, "learning_rate": 3.4725982309521164e-06, "loss": 0.2286, "step": 1253000 }, { "epoch": 12.77, "learning_rate": 3.4695008559751317e-06, "loss": 0.2125, "step": 1253100 }, { "epoch": 12.77, "learning_rate": 3.466404778181238e-06, "loss": 0.2674, "step": 1253200 }, { "epoch": 12.77, "learning_rate": 3.4633099977217975e-06, "loss": 0.2891, "step": 1253300 }, { "epoch": 12.77, "learning_rate": 3.460216514748136e-06, "loss": 0.2072, "step": 1253400 }, { "epoch": 12.77, "learning_rate": 3.457124329411512e-06, "loss": 0.202, "step": 1253500 }, { "epoch": 12.77, "learning_rate": 3.454033441863105e-06, "loss": 0.248, "step": 1253600 }, { "epoch": 12.77, "learning_rate": 3.4509438522540405e-06, "loss": 0.2102, "step": 1253700 }, { "epoch": 12.77, "learning_rate": 3.4478555607353946e-06, "loss": 0.2514, "step": 1253800 }, { "epoch": 12.77, "learning_rate": 3.4447685674581496e-06, "loss": 0.2228, "step": 1253900 }, { "epoch": 12.78, "learning_rate": 3.4416828725732487e-06, "loss": 0.1785, "step": 1254000 }, { "epoch": 12.78, "learning_rate": 3.4385984762315635e-06, "loss": 0.2306, "step": 1254100 }, { "epoch": 12.78, "learning_rate": 3.435515378583901e-06, "loss": 0.2107, "step": 1254200 }, { "epoch": 12.78, "learning_rate": 3.4324335797810104e-06, "loss": 0.2182, "step": 1254300 }, { "epoch": 12.78, "learning_rate": 3.4293530799735673e-06, "loss": 0.1743, "step": 1254400 }, { "epoch": 12.78, "learning_rate": 3.426273879312202e-06, "loss": 0.2042, "step": 1254500 }, { "epoch": 12.78, "learning_rate": 3.423195977947453e-06, "loss": 0.2586, "step": 1254600 }, { "epoch": 12.78, "learning_rate": 3.420119376029821e-06, "loss": 0.1865, "step": 1254700 }, { "epoch": 12.78, "learning_rate": 3.4170440737097354e-06, "loss": 0.2212, "step": 1254800 }, { "epoch": 12.79, "learning_rate": 3.4139700711375486e-06, "loss": 0.22, "step": 1254900 }, { "epoch": 12.79, "learning_rate": 3.4108973684635714e-06, "loss": 0.1886, "step": 1255000 }, { "epoch": 12.79, "learning_rate": 3.4078259658380394e-06, "loss": 0.2217, "step": 1255100 }, { "epoch": 12.79, "learning_rate": 3.404755863411123e-06, "loss": 0.2499, "step": 1255200 }, { "epoch": 12.79, "learning_rate": 3.4016870613329288e-06, "loss": 0.3172, "step": 1255300 }, { "epoch": 12.79, "learning_rate": 3.3986195597535064e-06, "loss": 0.261, "step": 1255400 }, { "epoch": 12.79, "learning_rate": 3.3955533588228393e-06, "loss": 0.2736, "step": 1255500 }, { "epoch": 12.79, "learning_rate": 3.3924884586908477e-06, "loss": 0.199, "step": 1255600 }, { "epoch": 12.79, "learning_rate": 3.389424859507385e-06, "loss": 0.2082, "step": 1255700 }, { "epoch": 12.79, "learning_rate": 3.3863625614222484e-06, "loss": 0.2232, "step": 1255800 }, { "epoch": 12.8, "learning_rate": 3.383301564585154e-06, "loss": 0.2304, "step": 1255900 }, { "epoch": 12.8, "learning_rate": 3.3802418691457727e-06, "loss": 0.2666, "step": 1256000 }, { "epoch": 12.8, "learning_rate": 3.3771834752537147e-06, "loss": 0.1965, "step": 1256100 }, { "epoch": 12.8, "learning_rate": 3.3741263830585e-06, "loss": 0.2617, "step": 1256200 }, { "epoch": 12.8, "learning_rate": 3.3710705927096097e-06, "loss": 0.1648, "step": 1256300 }, { "epoch": 12.8, "learning_rate": 3.3680161043564604e-06, "loss": 0.2394, "step": 1256400 }, { "epoch": 12.8, "learning_rate": 3.3649629181483865e-06, "loss": 0.2223, "step": 1256500 }, { "epoch": 12.8, "learning_rate": 3.3619110342346747e-06, "loss": 0.263, "step": 1256600 }, { "epoch": 12.8, "learning_rate": 3.3588604527645496e-06, "loss": 0.2334, "step": 1256700 }, { "epoch": 12.8, "learning_rate": 3.3558111738871576e-06, "loss": 0.2264, "step": 1256800 }, { "epoch": 12.81, "learning_rate": 3.352763197751597e-06, "loss": 0.2336, "step": 1256900 }, { "epoch": 12.81, "learning_rate": 3.349716524506902e-06, "loss": 0.2277, "step": 1257000 }, { "epoch": 12.81, "learning_rate": 3.346671154302019e-06, "loss": 0.2633, "step": 1257100 }, { "epoch": 12.81, "learning_rate": 3.34362708728586e-06, "loss": 0.2208, "step": 1257200 }, { "epoch": 12.81, "learning_rate": 3.3406147447920367e-06, "loss": 0.2203, "step": 1257300 }, { "epoch": 12.81, "learning_rate": 3.337573271564168e-06, "loss": 0.2002, "step": 1257400 }, { "epoch": 12.81, "learning_rate": 3.334533101969858e-06, "loss": 0.2369, "step": 1257500 }, { "epoch": 12.81, "learning_rate": 3.3314942361577426e-06, "loss": 0.2314, "step": 1257600 }, { "epoch": 12.81, "learning_rate": 3.328456674276411e-06, "loss": 0.2336, "step": 1257700 }, { "epoch": 12.81, "learning_rate": 3.325420416474384e-06, "loss": 0.2092, "step": 1257800 }, { "epoch": 12.82, "learning_rate": 3.3223854629001095e-06, "loss": 0.1809, "step": 1257900 }, { "epoch": 12.82, "learning_rate": 3.319351813701982e-06, "loss": 0.2206, "step": 1258000 }, { "epoch": 12.82, "learning_rate": 3.316319469028323e-06, "loss": 0.1884, "step": 1258100 }, { "epoch": 12.82, "learning_rate": 3.31328842902741e-06, "loss": 0.2154, "step": 1258200 }, { "epoch": 12.82, "learning_rate": 3.310258693847432e-06, "loss": 0.2492, "step": 1258300 }, { "epoch": 12.82, "learning_rate": 3.3072302636365305e-06, "loss": 0.1985, "step": 1258400 }, { "epoch": 12.82, "learning_rate": 3.3042031385427764e-06, "loss": 0.2178, "step": 1258500 }, { "epoch": 12.82, "learning_rate": 3.301177318714175e-06, "loss": 0.1553, "step": 1258600 }, { "epoch": 12.82, "learning_rate": 3.2981528042986676e-06, "loss": 0.1983, "step": 1258700 }, { "epoch": 12.82, "learning_rate": 3.2951295954441497e-06, "loss": 0.224, "step": 1258800 }, { "epoch": 12.83, "learning_rate": 3.2921076922984196e-06, "loss": 0.2727, "step": 1258900 }, { "epoch": 12.83, "learning_rate": 3.2890870950092423e-06, "loss": 0.253, "step": 1259000 }, { "epoch": 12.83, "learning_rate": 3.2860678037243063e-06, "loss": 0.2512, "step": 1259100 }, { "epoch": 12.83, "learning_rate": 3.2830498185912305e-06, "loss": 0.2603, "step": 1259200 }, { "epoch": 12.83, "learning_rate": 3.28003313975758e-06, "loss": 0.2506, "step": 1259300 }, { "epoch": 12.83, "learning_rate": 3.27701776737085e-06, "loss": 0.2656, "step": 1259400 }, { "epoch": 12.83, "learning_rate": 3.2740037015784796e-06, "loss": 0.2312, "step": 1259500 }, { "epoch": 12.83, "learning_rate": 3.2709909425278337e-06, "loss": 0.2062, "step": 1259600 }, { "epoch": 12.83, "learning_rate": 3.2679794903662285e-06, "loss": 0.2342, "step": 1259700 }, { "epoch": 12.84, "learning_rate": 3.264969345240889e-06, "loss": 0.2073, "step": 1259800 }, { "epoch": 12.84, "learning_rate": 3.2619605072990046e-06, "loss": 0.183, "step": 1259900 }, { "epoch": 12.84, "learning_rate": 3.2589529766876867e-06, "loss": 0.2461, "step": 1260000 }, { "epoch": 12.84, "learning_rate": 3.2559467535539923e-06, "loss": 0.203, "step": 1260100 }, { "epoch": 12.84, "learning_rate": 3.252941838044893e-06, "loss": 0.2304, "step": 1260200 }, { "epoch": 12.84, "learning_rate": 3.249938230307321e-06, "loss": 0.1951, "step": 1260300 }, { "epoch": 12.84, "learning_rate": 3.246935930488143e-06, "loss": 0.2134, "step": 1260400 }, { "epoch": 12.84, "learning_rate": 3.2439349387341343e-06, "loss": 0.2199, "step": 1260500 }, { "epoch": 12.84, "learning_rate": 3.2409352551920345e-06, "loss": 0.251, "step": 1260600 }, { "epoch": 12.84, "learning_rate": 3.237936880008513e-06, "loss": 0.2212, "step": 1260700 }, { "epoch": 12.85, "learning_rate": 3.2349697775193687e-06, "loss": 0.253, "step": 1260800 }, { "epoch": 12.85, "learning_rate": 3.231974006405507e-06, "loss": 0.1864, "step": 1260900 }, { "epoch": 12.85, "learning_rate": 3.228979544088366e-06, "loss": 0.227, "step": 1261000 }, { "epoch": 12.85, "learning_rate": 3.225986390714366e-06, "loss": 0.2086, "step": 1261100 }, { "epoch": 12.85, "learning_rate": 3.2229945464298593e-06, "loss": 0.1741, "step": 1261200 }, { "epoch": 12.85, "learning_rate": 3.2200040113811156e-06, "loss": 0.2172, "step": 1261300 }, { "epoch": 12.85, "learning_rate": 3.2170147857143584e-06, "loss": 0.2087, "step": 1261400 }, { "epoch": 12.85, "learning_rate": 3.2140268695757537e-06, "loss": 0.2071, "step": 1261500 }, { "epoch": 12.85, "learning_rate": 3.211040263111381e-06, "loss": 0.2179, "step": 1261600 }, { "epoch": 12.85, "learning_rate": 3.2080549664672676e-06, "loss": 0.2415, "step": 1261700 }, { "epoch": 12.86, "learning_rate": 3.205070979789383e-06, "loss": 0.2932, "step": 1261800 }, { "epoch": 12.86, "learning_rate": 3.2020883032236237e-06, "loss": 0.1732, "step": 1261900 }, { "epoch": 12.86, "learning_rate": 3.199106936915823e-06, "loss": 0.1786, "step": 1262000 }, { "epoch": 12.86, "learning_rate": 3.1961268810117583e-06, "loss": 0.2145, "step": 1262100 }, { "epoch": 12.86, "learning_rate": 3.193148135657129e-06, "loss": 0.2401, "step": 1262200 }, { "epoch": 12.86, "learning_rate": 3.190170700997582e-06, "loss": 0.233, "step": 1262300 }, { "epoch": 12.86, "learning_rate": 3.1871945771786916e-06, "loss": 0.1758, "step": 1262400 }, { "epoch": 12.86, "learning_rate": 3.184219764345984e-06, "loss": 0.1704, "step": 1262500 }, { "epoch": 12.86, "learning_rate": 3.1812462626448958e-06, "loss": 0.1918, "step": 1262600 }, { "epoch": 12.86, "learning_rate": 3.178274072220818e-06, "loss": 0.1828, "step": 1262700 }, { "epoch": 12.87, "learning_rate": 3.175303193219081e-06, "loss": 0.1996, "step": 1262800 }, { "epoch": 12.87, "learning_rate": 3.172333625784929e-06, "loss": 0.1982, "step": 1262900 }, { "epoch": 12.87, "learning_rate": 3.169365370063565e-06, "loss": 0.2785, "step": 1263000 }, { "epoch": 12.87, "learning_rate": 3.16639842620012e-06, "loss": 0.1806, "step": 1263100 }, { "epoch": 12.87, "learning_rate": 3.1634327943396545e-06, "loss": 0.2447, "step": 1263200 }, { "epoch": 12.87, "learning_rate": 3.1604684746271727e-06, "loss": 0.2121, "step": 1263300 }, { "epoch": 12.87, "learning_rate": 3.1575054672076187e-06, "loss": 0.228, "step": 1263400 }, { "epoch": 12.87, "learning_rate": 3.1545437722258563e-06, "loss": 0.1909, "step": 1263500 }, { "epoch": 12.87, "learning_rate": 3.1515833898266968e-06, "loss": 0.2571, "step": 1263600 }, { "epoch": 12.87, "learning_rate": 3.1486243201548905e-06, "loss": 0.2578, "step": 1263700 }, { "epoch": 12.88, "learning_rate": 3.145666563355112e-06, "loss": 0.1722, "step": 1263800 }, { "epoch": 12.88, "learning_rate": 3.1427101195719755e-06, "loss": 0.2266, "step": 1263900 }, { "epoch": 12.88, "learning_rate": 3.139754988950042e-06, "loss": 0.2073, "step": 1264000 }, { "epoch": 12.88, "learning_rate": 3.1368011716337995e-06, "loss": 0.1845, "step": 1264100 }, { "epoch": 12.88, "learning_rate": 3.133848667767666e-06, "loss": 0.2214, "step": 1264200 }, { "epoch": 12.88, "learning_rate": 3.1308974774960018e-06, "loss": 0.2291, "step": 1264300 }, { "epoch": 12.88, "learning_rate": 3.1279476009631023e-06, "loss": 0.2559, "step": 1264400 }, { "epoch": 12.88, "learning_rate": 3.1249990383132055e-06, "loss": 0.2103, "step": 1264500 }, { "epoch": 12.88, "learning_rate": 3.122051789690472e-06, "loss": 0.2682, "step": 1264600 }, { "epoch": 12.88, "learning_rate": 3.1191058552390108e-06, "loss": 0.1933, "step": 1264700 }, { "epoch": 12.89, "learning_rate": 3.1161612351028523e-06, "loss": 0.2581, "step": 1264800 }, { "epoch": 12.89, "learning_rate": 3.1132179294259757e-06, "loss": 0.2216, "step": 1264900 }, { "epoch": 12.89, "learning_rate": 3.110275938352295e-06, "loss": 0.1763, "step": 1265000 }, { "epoch": 12.89, "learning_rate": 3.1073352620256457e-06, "loss": 0.2431, "step": 1265100 }, { "epoch": 12.89, "learning_rate": 3.104395900589816e-06, "loss": 0.1939, "step": 1265200 }, { "epoch": 12.89, "learning_rate": 3.1014578541885276e-06, "loss": 0.2112, "step": 1265300 }, { "epoch": 12.89, "learning_rate": 3.098521122965422e-06, "loss": 0.2292, "step": 1265400 }, { "epoch": 12.89, "learning_rate": 3.0955857070640945e-06, "loss": 0.2514, "step": 1265500 }, { "epoch": 12.89, "learning_rate": 3.0926516066280707e-06, "loss": 0.1826, "step": 1265600 }, { "epoch": 12.9, "learning_rate": 3.089748143136345e-06, "loss": 0.1719, "step": 1265700 }, { "epoch": 12.9, "learning_rate": 3.0868166609030092e-06, "loss": 0.2578, "step": 1265800 }, { "epoch": 12.9, "learning_rate": 3.083886494563727e-06, "loss": 0.252, "step": 1265900 }, { "epoch": 12.9, "learning_rate": 3.080957644261767e-06, "loss": 0.194, "step": 1266000 }, { "epoch": 12.9, "learning_rate": 3.0780301101403417e-06, "loss": 0.198, "step": 1266100 }, { "epoch": 12.9, "learning_rate": 3.0751038923425733e-06, "loss": 0.2492, "step": 1266200 }, { "epoch": 12.9, "learning_rate": 3.0721789910115473e-06, "loss": 0.2562, "step": 1266300 }, { "epoch": 12.9, "learning_rate": 3.069255406290272e-06, "loss": 0.2293, "step": 1266400 }, { "epoch": 12.9, "learning_rate": 3.066333138321701e-06, "loss": 0.2576, "step": 1266500 }, { "epoch": 12.9, "learning_rate": 3.0634121872486986e-06, "loss": 0.1817, "step": 1266600 }, { "epoch": 12.91, "learning_rate": 3.0604925532140915e-06, "loss": 0.2688, "step": 1266700 }, { "epoch": 12.91, "learning_rate": 3.0575742363606386e-06, "loss": 0.2648, "step": 1266800 }, { "epoch": 12.91, "learning_rate": 3.0546572368310156e-06, "loss": 0.2493, "step": 1266900 }, { "epoch": 12.91, "learning_rate": 3.0517415547678516e-06, "loss": 0.2026, "step": 1267000 }, { "epoch": 12.91, "learning_rate": 3.048827190313707e-06, "loss": 0.2391, "step": 1267100 }, { "epoch": 12.91, "learning_rate": 3.045914143611076e-06, "loss": 0.2663, "step": 1267200 }, { "epoch": 12.91, "learning_rate": 3.0430024148023926e-06, "loss": 0.2347, "step": 1267300 }, { "epoch": 12.91, "learning_rate": 3.040092004030023e-06, "loss": 0.1936, "step": 1267400 }, { "epoch": 12.91, "learning_rate": 3.0371829114362625e-06, "loss": 0.2287, "step": 1267500 }, { "epoch": 12.91, "learning_rate": 3.0342751371633514e-06, "loss": 0.1961, "step": 1267600 }, { "epoch": 12.92, "learning_rate": 3.0313686813534657e-06, "loss": 0.2205, "step": 1267700 }, { "epoch": 12.92, "learning_rate": 3.0284635441487087e-06, "loss": 0.2306, "step": 1267800 }, { "epoch": 12.92, "learning_rate": 3.025559725691126e-06, "loss": 0.1799, "step": 1267900 }, { "epoch": 12.92, "learning_rate": 3.022657226122705e-06, "loss": 0.2064, "step": 1268000 }, { "epoch": 12.92, "learning_rate": 3.0197560455853446e-06, "loss": 0.2758, "step": 1268100 }, { "epoch": 12.92, "learning_rate": 3.016885176304176e-06, "loss": 0.2432, "step": 1268200 }, { "epoch": 12.92, "learning_rate": 3.013986621060595e-06, "loss": 0.2339, "step": 1268300 }, { "epoch": 12.92, "learning_rate": 3.011118351098143e-06, "loss": 0.1654, "step": 1268400 }, { "epoch": 12.92, "learning_rate": 3.008222421709569e-06, "loss": 0.2344, "step": 1268500 }, { "epoch": 12.92, "learning_rate": 3.005327812057841e-06, "loss": 0.2058, "step": 1268600 }, { "epoch": 12.93, "learning_rate": 3.0024345222844963e-06, "loss": 0.2158, "step": 1268700 }, { "epoch": 12.93, "learning_rate": 2.9995425525309817e-06, "loss": 0.2356, "step": 1268800 }, { "epoch": 12.93, "learning_rate": 2.996651902938704e-06, "loss": 0.205, "step": 1268900 }, { "epoch": 12.93, "learning_rate": 2.9937625736490103e-06, "loss": 0.2547, "step": 1269000 }, { "epoch": 12.93, "learning_rate": 2.990874564803154e-06, "loss": 0.2163, "step": 1269100 }, { "epoch": 12.93, "learning_rate": 2.9879878765423498e-06, "loss": 0.2134, "step": 1269200 }, { "epoch": 12.93, "learning_rate": 2.9851025090077467e-06, "loss": 0.2117, "step": 1269300 }, { "epoch": 12.93, "learning_rate": 2.982218462340406e-06, "loss": 0.2156, "step": 1269400 }, { "epoch": 12.93, "learning_rate": 2.9793357366813512e-06, "loss": 0.2025, "step": 1269500 }, { "epoch": 12.93, "learning_rate": 2.9764543321715264e-06, "loss": 0.1922, "step": 1269600 }, { "epoch": 12.94, "learning_rate": 2.973574248951816e-06, "loss": 0.2316, "step": 1269700 }, { "epoch": 12.94, "learning_rate": 2.97069548716304e-06, "loss": 0.176, "step": 1269800 }, { "epoch": 12.94, "learning_rate": 2.9678180469459526e-06, "loss": 0.1863, "step": 1269900 }, { "epoch": 12.94, "learning_rate": 2.9649419284412515e-06, "loss": 0.1843, "step": 1270000 }, { "epoch": 12.94, "learning_rate": 2.9620671317895443e-06, "loss": 0.212, "step": 1270100 }, { "epoch": 12.94, "learning_rate": 2.9591936571314016e-06, "loss": 0.2063, "step": 1270200 }, { "epoch": 12.94, "learning_rate": 2.9563215046073244e-06, "loss": 0.1611, "step": 1270300 }, { "epoch": 12.94, "learning_rate": 2.9534506743577306e-06, "loss": 0.2179, "step": 1270400 }, { "epoch": 12.94, "learning_rate": 2.950581166522991e-06, "loss": 0.2037, "step": 1270500 }, { "epoch": 12.95, "learning_rate": 2.94771298124342e-06, "loss": 0.2102, "step": 1270600 }, { "epoch": 12.95, "learning_rate": 2.9448461186592355e-06, "loss": 0.1963, "step": 1270700 }, { "epoch": 12.95, "learning_rate": 2.9419805789106193e-06, "loss": 0.1843, "step": 1270800 }, { "epoch": 12.95, "learning_rate": 2.9391163621376816e-06, "loss": 0.201, "step": 1270900 }, { "epoch": 12.95, "learning_rate": 2.936253468480461e-06, "loss": 0.2405, "step": 1271000 }, { "epoch": 12.95, "learning_rate": 2.9333918980789386e-06, "loss": 0.23, "step": 1271100 }, { "epoch": 12.95, "learning_rate": 2.9305316510730264e-06, "loss": 0.1909, "step": 1271200 }, { "epoch": 12.95, "learning_rate": 2.9276727276025816e-06, "loss": 0.2252, "step": 1271300 }, { "epoch": 12.95, "learning_rate": 2.924815127807373e-06, "loss": 0.1875, "step": 1271400 }, { "epoch": 12.95, "learning_rate": 2.9219588518271324e-06, "loss": 0.2182, "step": 1271500 }, { "epoch": 12.96, "learning_rate": 2.9191038998015142e-06, "loss": 0.247, "step": 1271600 }, { "epoch": 12.96, "learning_rate": 2.916250271870097e-06, "loss": 0.2781, "step": 1271700 }, { "epoch": 12.96, "learning_rate": 2.9133979681724154e-06, "loss": 0.2445, "step": 1271800 }, { "epoch": 12.96, "learning_rate": 2.910546988847935e-06, "loss": 0.3048, "step": 1271900 }, { "epoch": 12.96, "learning_rate": 2.9076973340360412e-06, "loss": 0.2027, "step": 1272000 }, { "epoch": 12.96, "learning_rate": 2.904849003876069e-06, "loss": 0.1911, "step": 1272100 }, { "epoch": 12.96, "learning_rate": 2.902001998507283e-06, "loss": 0.2373, "step": 1272200 }, { "epoch": 12.96, "learning_rate": 2.899156318068886e-06, "loss": 0.2123, "step": 1272300 }, { "epoch": 12.96, "learning_rate": 2.89631196270002e-06, "loss": 0.1913, "step": 1272400 }, { "epoch": 12.96, "learning_rate": 2.893497356281117e-06, "loss": 0.1929, "step": 1272500 }, { "epoch": 12.97, "learning_rate": 2.890655638214288e-06, "loss": 0.2622, "step": 1272600 }, { "epoch": 12.97, "learning_rate": 2.8878152456326267e-06, "loss": 0.2252, "step": 1272700 }, { "epoch": 12.97, "learning_rate": 2.884976178674993e-06, "loss": 0.1809, "step": 1272800 }, { "epoch": 12.97, "learning_rate": 2.882138437480215e-06, "loss": 0.2319, "step": 1272900 }, { "epoch": 12.97, "learning_rate": 2.8793020221870393e-06, "loss": 0.2011, "step": 1273000 }, { "epoch": 12.97, "learning_rate": 2.8764669329341408e-06, "loss": 0.2165, "step": 1273100 }, { "epoch": 12.97, "learning_rate": 2.8736331698601457e-06, "loss": 0.2009, "step": 1273200 }, { "epoch": 12.97, "learning_rate": 2.8708007331036067e-06, "loss": 0.2054, "step": 1273300 }, { "epoch": 12.97, "learning_rate": 2.8679696228030127e-06, "loss": 0.1802, "step": 1273400 }, { "epoch": 12.97, "learning_rate": 2.8651398390967897e-06, "loss": 0.2193, "step": 1273500 }, { "epoch": 12.98, "learning_rate": 2.862311382123297e-06, "loss": 0.255, "step": 1273600 }, { "epoch": 12.98, "learning_rate": 2.859484252020831e-06, "loss": 0.1829, "step": 1273700 }, { "epoch": 12.98, "learning_rate": 2.8566584489276173e-06, "loss": 0.1808, "step": 1273800 }, { "epoch": 12.98, "learning_rate": 2.8538339729818184e-06, "loss": 0.1934, "step": 1273900 }, { "epoch": 12.98, "learning_rate": 2.851010824321545e-06, "loss": 0.2283, "step": 1274000 }, { "epoch": 12.98, "learning_rate": 2.8481890030848224e-06, "loss": 0.219, "step": 1274100 }, { "epoch": 12.98, "learning_rate": 2.8453685094096238e-06, "loss": 0.2226, "step": 1274200 }, { "epoch": 12.98, "learning_rate": 2.842549343433859e-06, "loss": 0.1908, "step": 1274300 }, { "epoch": 12.98, "learning_rate": 2.8397315052953578e-06, "loss": 0.2106, "step": 1274400 }, { "epoch": 12.98, "learning_rate": 2.8369149951319027e-06, "loss": 0.1757, "step": 1274500 }, { "epoch": 12.99, "learning_rate": 2.8341279583271018e-06, "loss": 0.2463, "step": 1274600 }, { "epoch": 12.99, "learning_rate": 2.8313422233391717e-06, "loss": 0.2379, "step": 1274700 }, { "epoch": 12.99, "learning_rate": 2.8285296713577493e-06, "loss": 0.261, "step": 1274800 }, { "epoch": 12.99, "learning_rate": 2.8257184478990683e-06, "loss": 0.2291, "step": 1274900 }, { "epoch": 12.99, "learning_rate": 2.8229085531005993e-06, "loss": 0.2078, "step": 1275000 }, { "epoch": 12.99, "learning_rate": 2.8200999870997123e-06, "loss": 0.2188, "step": 1275100 }, { "epoch": 12.99, "learning_rate": 2.8172927500337342e-06, "loss": 0.2304, "step": 1275200 }, { "epoch": 12.99, "learning_rate": 2.8144868420399285e-06, "loss": 0.2302, "step": 1275300 }, { "epoch": 12.99, "learning_rate": 2.8116822632554785e-06, "loss": 0.2145, "step": 1275400 }, { "epoch": 13.0, "learning_rate": 2.808879013817511e-06, "loss": 0.1981, "step": 1275500 }, { "epoch": 13.0, "learning_rate": 2.8060770938630977e-06, "loss": 0.2136, "step": 1275600 }, { "epoch": 13.0, "learning_rate": 2.803276503529224e-06, "loss": 0.2133, "step": 1275700 }, { "epoch": 13.0, "learning_rate": 2.800477242952828e-06, "loss": 0.2356, "step": 1275800 }, { "epoch": 13.0, "learning_rate": 2.797679312270783e-06, "loss": 0.2524, "step": 1275900 }, { "epoch": 13.0, "learning_rate": 2.7948827116198805e-06, "loss": 0.1826, "step": 1276000 }, { "epoch": 13.0, "learning_rate": 2.79208744113686e-06, "loss": 0.2255, "step": 1276100 }, { "epoch": 13.0, "learning_rate": 2.789293500958393e-06, "loss": 0.2007, "step": 1276200 }, { "epoch": 13.0, "learning_rate": 2.7865008912210933e-06, "loss": 0.2394, "step": 1276300 }, { "epoch": 13.0, "learning_rate": 2.783709612061495e-06, "loss": 0.1754, "step": 1276400 }, { "epoch": 13.01, "learning_rate": 2.780919663616078e-06, "loss": 0.1855, "step": 1276500 }, { "epoch": 13.01, "learning_rate": 2.7781310460212617e-06, "loss": 0.1805, "step": 1276600 }, { "epoch": 13.01, "learning_rate": 2.775371625690628e-06, "loss": 0.2188, "step": 1276700 }, { "epoch": 13.01, "learning_rate": 2.7725856568940623e-06, "loss": 0.1744, "step": 1276800 }, { "epoch": 13.01, "learning_rate": 2.7698010193555736e-06, "loss": 0.212, "step": 1276900 }, { "epoch": 13.01, "learning_rate": 2.7670177132113162e-06, "loss": 0.2166, "step": 1277000 }, { "epoch": 13.01, "learning_rate": 2.7642357385973717e-06, "loss": 0.1695, "step": 1277100 }, { "epoch": 13.01, "learning_rate": 2.761455095649764e-06, "loss": 0.2044, "step": 1277200 }, { "epoch": 13.01, "learning_rate": 2.758675784504452e-06, "loss": 0.2411, "step": 1277300 }, { "epoch": 13.01, "learning_rate": 2.755897805297326e-06, "loss": 0.2257, "step": 1277400 }, { "epoch": 13.02, "learning_rate": 2.753121158164211e-06, "loss": 0.1747, "step": 1277500 }, { "epoch": 13.02, "learning_rate": 2.750345843240872e-06, "loss": 0.1966, "step": 1277600 }, { "epoch": 13.02, "learning_rate": 2.7475718606630105e-06, "loss": 0.2363, "step": 1277700 }, { "epoch": 13.02, "learning_rate": 2.7447992105662443e-06, "loss": 0.1844, "step": 1277800 }, { "epoch": 13.02, "learning_rate": 2.7420278930861454e-06, "loss": 0.1812, "step": 1277900 }, { "epoch": 13.02, "learning_rate": 2.7392579083582216e-06, "loss": 0.1732, "step": 1278000 }, { "epoch": 13.02, "learning_rate": 2.736489256517898e-06, "loss": 0.233, "step": 1278100 }, { "epoch": 13.02, "learning_rate": 2.7337219377005496e-06, "loss": 0.2338, "step": 1278200 }, { "epoch": 13.02, "learning_rate": 2.730955952041485e-06, "loss": 0.1915, "step": 1278300 }, { "epoch": 13.02, "learning_rate": 2.7281912996759394e-06, "loss": 0.2024, "step": 1278400 }, { "epoch": 13.03, "learning_rate": 2.725427980739088e-06, "loss": 0.2727, "step": 1278500 }, { "epoch": 13.03, "learning_rate": 2.722665995366039e-06, "loss": 0.1812, "step": 1278600 }, { "epoch": 13.03, "learning_rate": 2.7199053436918455e-06, "loss": 0.1963, "step": 1278700 }, { "epoch": 13.03, "learning_rate": 2.717146025851478e-06, "loss": 0.1704, "step": 1278800 }, { "epoch": 13.03, "learning_rate": 2.714388041979856e-06, "loss": 0.1571, "step": 1278900 }, { "epoch": 13.03, "learning_rate": 2.711631392211832e-06, "loss": 0.2113, "step": 1279000 }, { "epoch": 13.03, "learning_rate": 2.7088760766821775e-06, "loss": 0.2675, "step": 1279100 }, { "epoch": 13.03, "learning_rate": 2.7061220955256183e-06, "loss": 0.185, "step": 1279200 }, { "epoch": 13.03, "learning_rate": 2.7033694488768133e-06, "loss": 0.1758, "step": 1279300 }, { "epoch": 13.03, "learning_rate": 2.700645643383486e-06, "loss": 0.2531, "step": 1279400 }, { "epoch": 13.04, "learning_rate": 2.6978956528054376e-06, "loss": 0.2494, "step": 1279500 }, { "epoch": 13.04, "learning_rate": 2.6951469971373586e-06, "loss": 0.2141, "step": 1279600 }, { "epoch": 13.04, "learning_rate": 2.692399676513645e-06, "loss": 0.2596, "step": 1279700 }, { "epoch": 13.04, "learning_rate": 2.6896536910686322e-06, "loss": 0.2056, "step": 1279800 }, { "epoch": 13.04, "learning_rate": 2.686909040936566e-06, "loss": 0.2169, "step": 1279900 }, { "epoch": 13.04, "learning_rate": 2.6841657262516527e-06, "loss": 0.2268, "step": 1280000 }, { "epoch": 13.04, "learning_rate": 2.681423747148025e-06, "loss": 0.2101, "step": 1280100 }, { "epoch": 13.04, "learning_rate": 2.678683103759746e-06, "loss": 0.1668, "step": 1280200 }, { "epoch": 13.04, "learning_rate": 2.675943796220818e-06, "loss": 0.2065, "step": 1280300 }, { "epoch": 13.04, "learning_rate": 2.67320582466518e-06, "loss": 0.1874, "step": 1280400 }, { "epoch": 13.05, "learning_rate": 2.6704691892266965e-06, "loss": 0.1854, "step": 1280500 }, { "epoch": 13.05, "learning_rate": 2.6677338900391755e-06, "loss": 0.2888, "step": 1280600 }, { "epoch": 13.05, "learning_rate": 2.6649999272363613e-06, "loss": 0.2353, "step": 1280700 }, { "epoch": 13.05, "learning_rate": 2.6622673009519192e-06, "loss": 0.1758, "step": 1280800 }, { "epoch": 13.05, "learning_rate": 2.6595360113194635e-06, "loss": 0.2329, "step": 1280900 }, { "epoch": 13.05, "learning_rate": 2.656806058472543e-06, "loss": 0.2356, "step": 1281000 }, { "epoch": 13.05, "learning_rate": 2.6540774425446247e-06, "loss": 0.1953, "step": 1281100 }, { "epoch": 13.05, "learning_rate": 2.651350163669125e-06, "loss": 0.247, "step": 1281200 }, { "epoch": 13.05, "learning_rate": 2.6486242219793975e-06, "loss": 0.2204, "step": 1281300 }, { "epoch": 13.06, "learning_rate": 2.645899617608719e-06, "loss": 0.1702, "step": 1281400 }, { "epoch": 13.06, "learning_rate": 2.6431763506903093e-06, "loss": 0.1848, "step": 1281500 }, { "epoch": 13.06, "learning_rate": 2.640454421357319e-06, "loss": 0.2342, "step": 1281600 }, { "epoch": 13.06, "learning_rate": 2.637733829742841e-06, "loss": 0.134, "step": 1281700 }, { "epoch": 13.06, "learning_rate": 2.6350145759798827e-06, "loss": 0.1981, "step": 1281800 }, { "epoch": 13.06, "learning_rate": 2.6322966602014074e-06, "loss": 0.2358, "step": 1281900 }, { "epoch": 13.06, "learning_rate": 2.6296072416927964e-06, "loss": 0.225, "step": 1282000 }, { "epoch": 13.06, "learning_rate": 2.6268919888987273e-06, "loss": 0.2288, "step": 1282100 }, { "epoch": 13.06, "learning_rate": 2.6241780744862944e-06, "loss": 0.1776, "step": 1282200 }, { "epoch": 13.06, "learning_rate": 2.6214654985881735e-06, "loss": 0.2298, "step": 1282300 }, { "epoch": 13.07, "learning_rate": 2.618754261337003e-06, "loss": 0.1843, "step": 1282400 }, { "epoch": 13.07, "learning_rate": 2.61604436286535e-06, "loss": 0.2236, "step": 1282500 }, { "epoch": 13.07, "learning_rate": 2.6133358033057077e-06, "loss": 0.2694, "step": 1282600 }, { "epoch": 13.07, "learning_rate": 2.6106285827905072e-06, "loss": 0.2387, "step": 1282700 }, { "epoch": 13.07, "learning_rate": 2.6079227014521234e-06, "loss": 0.164, "step": 1282800 }, { "epoch": 13.07, "learning_rate": 2.60521815942285e-06, "loss": 0.2231, "step": 1282900 }, { "epoch": 13.07, "learning_rate": 2.602514956834924e-06, "loss": 0.212, "step": 1283000 }, { "epoch": 13.07, "learning_rate": 2.59981309382052e-06, "loss": 0.2049, "step": 1283100 }, { "epoch": 13.07, "learning_rate": 2.597112570511736e-06, "loss": 0.1811, "step": 1283200 }, { "epoch": 13.07, "learning_rate": 2.594413387040616e-06, "loss": 0.2247, "step": 1283300 }, { "epoch": 13.08, "learning_rate": 2.591715543539138e-06, "loss": 0.2144, "step": 1283400 }, { "epoch": 13.08, "learning_rate": 2.5890190401392e-06, "loss": 0.1901, "step": 1283500 }, { "epoch": 13.08, "learning_rate": 2.5863238769726525e-06, "loss": 0.202, "step": 1283600 }, { "epoch": 13.08, "learning_rate": 2.5836300541712676e-06, "loss": 0.1931, "step": 1283700 }, { "epoch": 13.08, "learning_rate": 2.580937571866766e-06, "loss": 0.1846, "step": 1283800 }, { "epoch": 13.08, "learning_rate": 2.578246430190786e-06, "loss": 0.2005, "step": 1283900 }, { "epoch": 13.08, "learning_rate": 2.575556629274909e-06, "loss": 0.2239, "step": 1284000 }, { "epoch": 13.08, "learning_rate": 2.5728681692506595e-06, "loss": 0.2564, "step": 1284100 }, { "epoch": 13.08, "learning_rate": 2.570181050249476e-06, "loss": 0.2362, "step": 1284200 }, { "epoch": 13.08, "learning_rate": 2.5674952724027436e-06, "loss": 0.1947, "step": 1284300 }, { "epoch": 13.09, "learning_rate": 2.5648108358417932e-06, "loss": 0.2044, "step": 1284400 }, { "epoch": 13.09, "learning_rate": 2.5621277406978605e-06, "loss": 0.2936, "step": 1284500 }, { "epoch": 13.09, "learning_rate": 2.5594459871021404e-06, "loss": 0.2254, "step": 1284600 }, { "epoch": 13.09, "learning_rate": 2.5567655751857577e-06, "loss": 0.2056, "step": 1284700 }, { "epoch": 13.09, "learning_rate": 2.5540865050797646e-06, "loss": 0.2359, "step": 1284800 }, { "epoch": 13.09, "learning_rate": 2.551408776915153e-06, "loss": 0.274, "step": 1284900 }, { "epoch": 13.09, "learning_rate": 2.5487323908228445e-06, "loss": 0.2545, "step": 1285000 }, { "epoch": 13.09, "learning_rate": 2.546057346933701e-06, "loss": 0.2, "step": 1285100 }, { "epoch": 13.09, "learning_rate": 2.5433836453785186e-06, "loss": 0.1946, "step": 1285200 }, { "epoch": 13.09, "learning_rate": 2.540711286288029e-06, "loss": 0.2423, "step": 1285300 }, { "epoch": 13.1, "learning_rate": 2.5380402697928838e-06, "loss": 0.1941, "step": 1285400 }, { "epoch": 13.1, "learning_rate": 2.535370596023686e-06, "loss": 0.2118, "step": 1285500 }, { "epoch": 13.1, "learning_rate": 2.5327022651109643e-06, "loss": 0.2394, "step": 1285600 }, { "epoch": 13.1, "learning_rate": 2.530035277185191e-06, "loss": 0.2447, "step": 1285700 }, { "epoch": 13.1, "learning_rate": 2.527369632376755e-06, "loss": 0.2029, "step": 1285800 }, { "epoch": 13.1, "learning_rate": 2.524705330815995e-06, "loss": 0.2284, "step": 1285900 }, { "epoch": 13.1, "learning_rate": 2.5220689955648637e-06, "loss": 0.2328, "step": 1286000 }, { "epoch": 13.1, "learning_rate": 2.5194073674544714e-06, "loss": 0.2084, "step": 1286100 }, { "epoch": 13.1, "learning_rate": 2.516747082981071e-06, "loss": 0.1773, "step": 1286200 }, { "epoch": 13.11, "learning_rate": 2.514088142274719e-06, "loss": 0.1723, "step": 1286300 }, { "epoch": 13.11, "learning_rate": 2.5114305454654297e-06, "loss": 0.1693, "step": 1286400 }, { "epoch": 13.11, "learning_rate": 2.5087742926831434e-06, "loss": 0.2111, "step": 1286500 }, { "epoch": 13.11, "learning_rate": 2.5061193840577368e-06, "loss": 0.1756, "step": 1286600 }, { "epoch": 13.11, "learning_rate": 2.5034658197190184e-06, "loss": 0.1985, "step": 1286700 }, { "epoch": 13.11, "learning_rate": 2.500813599796732e-06, "loss": 0.198, "step": 1286800 }, { "epoch": 13.11, "learning_rate": 2.498162724420553e-06, "loss": 0.211, "step": 1286900 }, { "epoch": 13.11, "learning_rate": 2.4955131937200915e-06, "loss": 0.1791, "step": 1287000 }, { "epoch": 13.11, "learning_rate": 2.4928650078249005e-06, "loss": 0.195, "step": 1287100 }, { "epoch": 13.11, "learning_rate": 2.4902181668644563e-06, "loss": 0.1999, "step": 1287200 }, { "epoch": 13.12, "learning_rate": 2.487572670968169e-06, "loss": 0.2193, "step": 1287300 }, { "epoch": 13.12, "learning_rate": 2.4849285202654015e-06, "loss": 0.2929, "step": 1287400 }, { "epoch": 13.12, "learning_rate": 2.4822857148854205e-06, "loss": 0.2414, "step": 1287500 }, { "epoch": 13.12, "learning_rate": 2.4796442549574528e-06, "loss": 0.2337, "step": 1287600 }, { "epoch": 13.12, "learning_rate": 2.4770041406106446e-06, "loss": 0.236, "step": 1287700 }, { "epoch": 13.12, "learning_rate": 2.47436537197409e-06, "loss": 0.2022, "step": 1287800 }, { "epoch": 13.12, "learning_rate": 2.4717279491767987e-06, "loss": 0.193, "step": 1287900 }, { "epoch": 13.12, "learning_rate": 2.469091872347734e-06, "loss": 0.1737, "step": 1288000 }, { "epoch": 13.12, "learning_rate": 2.4664571416157865e-06, "loss": 0.2324, "step": 1288100 }, { "epoch": 13.12, "learning_rate": 2.4638237571097667e-06, "loss": 0.2059, "step": 1288200 }, { "epoch": 13.13, "learning_rate": 2.461191718958441e-06, "loss": 0.1675, "step": 1288300 }, { "epoch": 13.13, "learning_rate": 2.4585610272905e-06, "loss": 0.2354, "step": 1288400 }, { "epoch": 13.13, "learning_rate": 2.455931682234558e-06, "loss": 0.1567, "step": 1288500 }, { "epoch": 13.13, "learning_rate": 2.4533036839191847e-06, "loss": 0.1926, "step": 1288600 }, { "epoch": 13.13, "learning_rate": 2.450677032472878e-06, "loss": 0.2297, "step": 1288700 }, { "epoch": 13.13, "learning_rate": 2.448051728024051e-06, "loss": 0.1655, "step": 1288800 }, { "epoch": 13.13, "learning_rate": 2.4454277707010753e-06, "loss": 0.2058, "step": 1288900 }, { "epoch": 13.13, "learning_rate": 2.4428051606322443e-06, "loss": 0.2448, "step": 1289000 }, { "epoch": 13.13, "learning_rate": 2.4402101039026837e-06, "loss": 0.2178, "step": 1289100 }, { "epoch": 13.13, "learning_rate": 2.4375901752510345e-06, "loss": 0.2184, "step": 1289200 }, { "epoch": 13.14, "learning_rate": 2.4349715942367335e-06, "loss": 0.2122, "step": 1289300 }, { "epoch": 13.14, "learning_rate": 2.432354360987822e-06, "loss": 0.2201, "step": 1289400 }, { "epoch": 13.14, "learning_rate": 2.4297384756322704e-06, "loss": 0.2243, "step": 1289500 }, { "epoch": 13.14, "learning_rate": 2.4271239382979737e-06, "loss": 0.1877, "step": 1289600 }, { "epoch": 13.14, "learning_rate": 2.4245107491127695e-06, "loss": 0.2237, "step": 1289700 }, { "epoch": 13.14, "learning_rate": 2.421898908204435e-06, "loss": 0.2152, "step": 1289800 }, { "epoch": 13.14, "learning_rate": 2.4192884157006587e-06, "loss": 0.2291, "step": 1289900 }, { "epoch": 13.14, "learning_rate": 2.4166792717290853e-06, "loss": 0.2416, "step": 1290000 }, { "epoch": 13.14, "learning_rate": 2.414071476417289e-06, "loss": 0.2054, "step": 1290100 }, { "epoch": 13.14, "learning_rate": 2.411465029892774e-06, "loss": 0.1767, "step": 1290200 }, { "epoch": 13.15, "learning_rate": 2.408859932282976e-06, "loss": 0.1706, "step": 1290300 }, { "epoch": 13.15, "learning_rate": 2.406256183715273e-06, "loss": 0.1765, "step": 1290400 }, { "epoch": 13.15, "learning_rate": 2.4036537843169792e-06, "loss": 0.2435, "step": 1290500 }, { "epoch": 13.15, "learning_rate": 2.4010527342153234e-06, "loss": 0.1746, "step": 1290600 }, { "epoch": 13.15, "learning_rate": 2.3984530335374833e-06, "loss": 0.2098, "step": 1290700 }, { "epoch": 13.15, "learning_rate": 2.395854682410581e-06, "loss": 0.2191, "step": 1290800 }, { "epoch": 13.15, "learning_rate": 2.393257680961645e-06, "loss": 0.1913, "step": 1290900 }, { "epoch": 13.15, "learning_rate": 2.390662029317663e-06, "loss": 0.2279, "step": 1291000 }, { "epoch": 13.15, "learning_rate": 2.3880677276055472e-06, "loss": 0.2013, "step": 1291100 }, { "epoch": 13.15, "learning_rate": 2.385474775952133e-06, "loss": 0.2196, "step": 1291200 }, { "epoch": 13.16, "learning_rate": 2.3829090838150567e-06, "loss": 0.2361, "step": 1291300 }, { "epoch": 13.16, "learning_rate": 2.380318819155587e-06, "loss": 0.2051, "step": 1291400 }, { "epoch": 13.16, "learning_rate": 2.3777299049336986e-06, "loss": 0.218, "step": 1291500 }, { "epoch": 13.16, "learning_rate": 2.375142341275978e-06, "loss": 0.183, "step": 1291600 }, { "epoch": 13.16, "learning_rate": 2.3725561283089426e-06, "loss": 0.1633, "step": 1291700 }, { "epoch": 13.16, "learning_rate": 2.369971266159039e-06, "loss": 0.2002, "step": 1291800 }, { "epoch": 13.16, "learning_rate": 2.367387754952659e-06, "loss": 0.1992, "step": 1291900 }, { "epoch": 13.16, "learning_rate": 2.3648055948161086e-06, "loss": 0.2223, "step": 1292000 }, { "epoch": 13.16, "learning_rate": 2.362224785875646e-06, "loss": 0.164, "step": 1292100 }, { "epoch": 13.17, "learning_rate": 2.359645328257465e-06, "loss": 0.2919, "step": 1292200 }, { "epoch": 13.17, "learning_rate": 2.3570672220876698e-06, "loss": 0.1723, "step": 1292300 }, { "epoch": 13.17, "learning_rate": 2.3544904674923272e-06, "loss": 0.2045, "step": 1292400 }, { "epoch": 13.17, "learning_rate": 2.3519150645974165e-06, "loss": 0.1613, "step": 1292500 }, { "epoch": 13.17, "learning_rate": 2.3493410135288727e-06, "loss": 0.1761, "step": 1292600 }, { "epoch": 13.17, "learning_rate": 2.346768314412536e-06, "loss": 0.2053, "step": 1292700 }, { "epoch": 13.17, "learning_rate": 2.3441969673742057e-06, "loss": 0.2205, "step": 1292800 }, { "epoch": 13.17, "learning_rate": 2.341626972539598e-06, "loss": 0.1725, "step": 1292900 }, { "epoch": 13.17, "learning_rate": 2.339058330034378e-06, "loss": 0.1668, "step": 1293000 }, { "epoch": 13.17, "learning_rate": 2.3364910399841335e-06, "loss": 0.2127, "step": 1293100 }, { "epoch": 13.18, "learning_rate": 2.3339251025143925e-06, "loss": 0.2762, "step": 1293200 }, { "epoch": 13.18, "learning_rate": 2.3313605177506093e-06, "loss": 0.213, "step": 1293300 }, { "epoch": 13.18, "learning_rate": 2.3287972858181795e-06, "loss": 0.1964, "step": 1293400 }, { "epoch": 13.18, "learning_rate": 2.326235406842433e-06, "loss": 0.1815, "step": 1293500 }, { "epoch": 13.18, "learning_rate": 2.3236748809486195e-06, "loss": 0.1965, "step": 1293600 }, { "epoch": 13.18, "learning_rate": 2.3211157082619427e-06, "loss": 0.2462, "step": 1293700 }, { "epoch": 13.18, "learning_rate": 2.3185578889075355e-06, "loss": 0.1957, "step": 1293800 }, { "epoch": 13.18, "learning_rate": 2.3160014230104445e-06, "loss": 0.1501, "step": 1293900 }, { "epoch": 13.18, "learning_rate": 2.3134463106956794e-06, "loss": 0.2759, "step": 1294000 }, { "epoch": 13.18, "learning_rate": 2.3108925520881607e-06, "loss": 0.193, "step": 1294100 }, { "epoch": 13.19, "learning_rate": 2.3083401473127585e-06, "loss": 0.2108, "step": 1294200 }, { "epoch": 13.19, "learning_rate": 2.3057890964942664e-06, "loss": 0.2017, "step": 1294300 }, { "epoch": 13.19, "learning_rate": 2.3032393997574174e-06, "loss": 0.2082, "step": 1294400 }, { "epoch": 13.19, "learning_rate": 2.3006910572268823e-06, "loss": 0.2394, "step": 1294500 }, { "epoch": 13.19, "learning_rate": 2.2981440690272484e-06, "loss": 0.1957, "step": 1294600 }, { "epoch": 13.19, "learning_rate": 2.2955984352830518e-06, "loss": 0.2277, "step": 1294700 }, { "epoch": 13.19, "learning_rate": 2.293054156118767e-06, "loss": 0.1751, "step": 1294800 }, { "epoch": 13.19, "learning_rate": 2.290511231658781e-06, "loss": 0.2401, "step": 1294900 }, { "epoch": 13.19, "learning_rate": 2.287969662027437e-06, "loss": 0.2058, "step": 1295000 }, { "epoch": 13.19, "learning_rate": 2.2854294473490034e-06, "loss": 0.2663, "step": 1295100 }, { "epoch": 13.2, "learning_rate": 2.2828905877476735e-06, "loss": 0.154, "step": 1295200 }, { "epoch": 13.2, "learning_rate": 2.280353083347585e-06, "loss": 0.2094, "step": 1295300 }, { "epoch": 13.2, "learning_rate": 2.2778169342728084e-06, "loss": 0.1893, "step": 1295400 }, { "epoch": 13.2, "learning_rate": 2.2753074818737187e-06, "loss": 0.2367, "step": 1295500 }, { "epoch": 13.2, "learning_rate": 2.272774030265169e-06, "loss": 0.2364, "step": 1295600 }, { "epoch": 13.2, "learning_rate": 2.270241934352495e-06, "loss": 0.2004, "step": 1295700 }, { "epoch": 13.2, "learning_rate": 2.2677111942595077e-06, "loss": 0.1868, "step": 1295800 }, { "epoch": 13.2, "learning_rate": 2.265181810109951e-06, "loss": 0.2013, "step": 1295900 }, { "epoch": 13.2, "learning_rate": 2.262653782027486e-06, "loss": 0.21, "step": 1296000 }, { "epoch": 13.2, "learning_rate": 2.260127110135725e-06, "loss": 0.1909, "step": 1296100 }, { "epoch": 13.21, "learning_rate": 2.2576017945582105e-06, "loss": 0.2021, "step": 1296200 }, { "epoch": 13.21, "learning_rate": 2.255077835418402e-06, "loss": 0.1461, "step": 1296300 }, { "epoch": 13.21, "learning_rate": 2.252555232839717e-06, "loss": 0.2706, "step": 1296400 }, { "epoch": 13.21, "learning_rate": 2.2500339869454966e-06, "loss": 0.1618, "step": 1296500 }, { "epoch": 13.21, "learning_rate": 2.247514097859006e-06, "loss": 0.2098, "step": 1296600 }, { "epoch": 13.21, "learning_rate": 2.244995565703459e-06, "loss": 0.2122, "step": 1296700 }, { "epoch": 13.21, "learning_rate": 2.2424783906019887e-06, "loss": 0.2163, "step": 1296800 }, { "epoch": 13.21, "learning_rate": 2.2399877241384993e-06, "loss": 0.2205, "step": 1296900 }, { "epoch": 13.21, "learning_rate": 2.237473249940756e-06, "loss": 0.2227, "step": 1297000 }, { "epoch": 13.22, "learning_rate": 2.234960133164887e-06, "loss": 0.1742, "step": 1297100 }, { "epoch": 13.22, "learning_rate": 2.232448373933772e-06, "loss": 0.1988, "step": 1297200 }, { "epoch": 13.22, "learning_rate": 2.2299379723702285e-06, "loss": 0.1702, "step": 1297300 }, { "epoch": 13.22, "learning_rate": 2.227428928596995e-06, "loss": 0.187, "step": 1297400 }, { "epoch": 13.22, "learning_rate": 2.224921242736747e-06, "loss": 0.2003, "step": 1297500 }, { "epoch": 13.22, "learning_rate": 2.2224149149120963e-06, "loss": 0.2346, "step": 1297600 }, { "epoch": 13.22, "learning_rate": 2.219909945245595e-06, "loss": 0.2773, "step": 1297700 }, { "epoch": 13.22, "learning_rate": 2.2174063338597085e-06, "loss": 0.2137, "step": 1297800 }, { "epoch": 13.22, "learning_rate": 2.2149040808768516e-06, "loss": 0.1803, "step": 1297900 }, { "epoch": 13.22, "learning_rate": 2.2124031864193737e-06, "loss": 0.2208, "step": 1298000 }, { "epoch": 13.23, "learning_rate": 2.209903650609554e-06, "loss": 0.2137, "step": 1298100 }, { "epoch": 13.23, "learning_rate": 2.2074054735696035e-06, "loss": 0.2112, "step": 1298200 }, { "epoch": 13.23, "learning_rate": 2.2049086554216725e-06, "loss": 0.2146, "step": 1298300 }, { "epoch": 13.23, "learning_rate": 2.2024131962878293e-06, "loss": 0.1706, "step": 1298400 }, { "epoch": 13.23, "learning_rate": 2.199919096290093e-06, "loss": 0.2422, "step": 1298500 }, { "epoch": 13.23, "learning_rate": 2.1974263555504193e-06, "loss": 0.2061, "step": 1298600 }, { "epoch": 13.23, "learning_rate": 2.194934974190671e-06, "loss": 0.1997, "step": 1298700 }, { "epoch": 13.23, "learning_rate": 2.1924449523326705e-06, "loss": 0.1908, "step": 1298800 }, { "epoch": 13.23, "learning_rate": 2.189956290098174e-06, "loss": 0.1955, "step": 1298900 }, { "epoch": 13.23, "learning_rate": 2.187468987608844e-06, "loss": 0.204, "step": 1299000 }, { "epoch": 13.24, "learning_rate": 2.184983044986306e-06, "loss": 0.2233, "step": 1299100 }, { "epoch": 13.24, "learning_rate": 2.182498462352104e-06, "loss": 0.2342, "step": 1299200 }, { "epoch": 13.24, "learning_rate": 2.180015239827723e-06, "loss": 0.2661, "step": 1299300 }, { "epoch": 13.24, "learning_rate": 2.1775333775345762e-06, "loss": 0.2683, "step": 1299400 }, { "epoch": 13.24, "learning_rate": 2.1750528755940102e-06, "loss": 0.2263, "step": 1299500 }, { "epoch": 13.24, "learning_rate": 2.1725737341273145e-06, "loss": 0.252, "step": 1299600 }, { "epoch": 13.24, "learning_rate": 2.1700959532556886e-06, "loss": 0.1767, "step": 1299700 }, { "epoch": 13.24, "learning_rate": 2.1676195331002957e-06, "loss": 0.2067, "step": 1299800 }, { "epoch": 13.24, "learning_rate": 2.1651444737822155e-06, "loss": 0.2118, "step": 1299900 }, { "epoch": 13.24, "learning_rate": 2.162670775422455e-06, "loss": 0.2218, "step": 1300000 }, { "epoch": 13.25, "learning_rate": 2.1601984381419705e-06, "loss": 0.1682, "step": 1300100 }, { "epoch": 13.25, "learning_rate": 2.157727462061645e-06, "loss": 0.2442, "step": 1300200 }, { "epoch": 13.25, "learning_rate": 2.155257847302292e-06, "loss": 0.1846, "step": 1300300 }, { "epoch": 13.25, "learning_rate": 2.1527895939846587e-06, "loss": 0.2003, "step": 1300400 }, { "epoch": 13.25, "learning_rate": 2.1503227022294315e-06, "loss": 0.1489, "step": 1300500 }, { "epoch": 13.25, "learning_rate": 2.1478571721572236e-06, "loss": 0.1738, "step": 1300600 }, { "epoch": 13.25, "learning_rate": 2.1453930038885926e-06, "loss": 0.2008, "step": 1300700 }, { "epoch": 13.25, "learning_rate": 2.142930197544012e-06, "loss": 0.2302, "step": 1300800 }, { "epoch": 13.25, "learning_rate": 2.1404687532439084e-06, "loss": 0.2071, "step": 1300900 }, { "epoch": 13.25, "learning_rate": 2.1380086711086188e-06, "loss": 0.2227, "step": 1301000 }, { "epoch": 13.26, "learning_rate": 2.1355499512584376e-06, "loss": 0.1952, "step": 1301100 }, { "epoch": 13.26, "learning_rate": 2.133092593813578e-06, "loss": 0.2361, "step": 1301200 }, { "epoch": 13.26, "learning_rate": 2.1306365988941877e-06, "loss": 0.2327, "step": 1301300 }, { "epoch": 13.26, "learning_rate": 2.128181966620354e-06, "loss": 0.2003, "step": 1301400 }, { "epoch": 13.26, "learning_rate": 2.125728697112094e-06, "loss": 0.1946, "step": 1301500 }, { "epoch": 13.26, "learning_rate": 2.123276790489348e-06, "loss": 0.2183, "step": 1301600 }, { "epoch": 13.26, "learning_rate": 2.120826246872011e-06, "loss": 0.2045, "step": 1301700 }, { "epoch": 13.26, "learning_rate": 2.118401551436955e-06, "loss": 0.1867, "step": 1301800 }, { "epoch": 13.26, "learning_rate": 2.1159537205567725e-06, "loss": 0.2007, "step": 1301900 }, { "epoch": 13.27, "learning_rate": 2.1135072530400457e-06, "loss": 0.2549, "step": 1302000 }, { "epoch": 13.27, "learning_rate": 2.1110621490063987e-06, "loss": 0.2518, "step": 1302100 }, { "epoch": 13.27, "learning_rate": 2.108618408575379e-06, "loss": 0.2147, "step": 1302200 }, { "epoch": 13.27, "learning_rate": 2.106176031866478e-06, "loss": 0.18, "step": 1302300 }, { "epoch": 13.27, "learning_rate": 2.1037350189991e-06, "loss": 0.2135, "step": 1302400 }, { "epoch": 13.27, "learning_rate": 2.101295370092606e-06, "loss": 0.2221, "step": 1302500 }, { "epoch": 13.27, "learning_rate": 2.0988570852662813e-06, "loss": 0.2158, "step": 1302600 }, { "epoch": 13.27, "learning_rate": 2.0964201646393365e-06, "loss": 0.1923, "step": 1302700 }, { "epoch": 13.27, "learning_rate": 2.0939846083309233e-06, "loss": 0.1902, "step": 1302800 }, { "epoch": 13.27, "learning_rate": 2.091550416460133e-06, "loss": 0.1689, "step": 1302900 }, { "epoch": 13.28, "learning_rate": 2.089117589145977e-06, "loss": 0.2489, "step": 1303000 }, { "epoch": 13.28, "learning_rate": 2.0866861265074034e-06, "loss": 0.1964, "step": 1303100 }, { "epoch": 13.28, "learning_rate": 2.0842560286633037e-06, "loss": 0.2397, "step": 1303200 }, { "epoch": 13.28, "learning_rate": 2.08182729573249e-06, "loss": 0.1869, "step": 1303300 }, { "epoch": 13.28, "learning_rate": 2.079399927833714e-06, "loss": 0.192, "step": 1303400 }, { "epoch": 13.28, "learning_rate": 2.0769739250856633e-06, "loss": 0.2188, "step": 1303500 }, { "epoch": 13.28, "learning_rate": 2.0745492876069537e-06, "loss": 0.2199, "step": 1303600 }, { "epoch": 13.28, "learning_rate": 2.0721260155161303e-06, "loss": 0.2211, "step": 1303700 }, { "epoch": 13.28, "learning_rate": 2.069704108931678e-06, "loss": 0.2024, "step": 1303800 }, { "epoch": 13.28, "learning_rate": 2.0672835679720193e-06, "loss": 0.2016, "step": 1303900 }, { "epoch": 13.29, "learning_rate": 2.0648643927554987e-06, "loss": 0.1638, "step": 1304000 }, { "epoch": 13.29, "learning_rate": 2.0624465834003993e-06, "loss": 0.1964, "step": 1304100 }, { "epoch": 13.29, "learning_rate": 2.060054297696712e-06, "loss": 0.1859, "step": 1304200 }, { "epoch": 13.29, "learning_rate": 2.057639206757478e-06, "loss": 0.2181, "step": 1304300 }, { "epoch": 13.29, "learning_rate": 2.0552254820329387e-06, "loss": 0.2092, "step": 1304400 }, { "epoch": 13.29, "learning_rate": 2.052813123641103e-06, "loss": 0.2251, "step": 1304500 }, { "epoch": 13.29, "learning_rate": 2.05040213169993e-06, "loss": 0.1899, "step": 1304600 }, { "epoch": 13.29, "learning_rate": 2.047992506327302e-06, "loss": 0.2154, "step": 1304700 }, { "epoch": 13.29, "learning_rate": 2.045584247641025e-06, "loss": 0.1794, "step": 1304800 }, { "epoch": 13.29, "learning_rate": 2.043177355758857e-06, "loss": 0.2475, "step": 1304900 }, { "epoch": 13.3, "learning_rate": 2.040795879281434e-06, "loss": 0.1884, "step": 1305000 }, { "epoch": 13.3, "learning_rate": 2.038391707689483e-06, "loss": 0.2538, "step": 1305100 }, { "epoch": 13.3, "learning_rate": 2.0359889032533206e-06, "loss": 0.2537, "step": 1305200 }, { "epoch": 13.3, "learning_rate": 2.0335874660904097e-06, "loss": 0.2283, "step": 1305300 }, { "epoch": 13.3, "learning_rate": 2.031187396318179e-06, "loss": 0.2475, "step": 1305400 }, { "epoch": 13.3, "learning_rate": 2.0287886940539825e-06, "loss": 0.2245, "step": 1305500 }, { "epoch": 13.3, "learning_rate": 2.0263913594150884e-06, "loss": 0.2335, "step": 1305600 }, { "epoch": 13.3, "learning_rate": 2.0239953925187203e-06, "loss": 0.2225, "step": 1305700 }, { "epoch": 13.3, "learning_rate": 2.0216007934820245e-06, "loss": 0.1594, "step": 1305800 }, { "epoch": 13.3, "learning_rate": 2.019207562422084e-06, "loss": 0.2922, "step": 1305900 }, { "epoch": 13.31, "learning_rate": 2.016815699455915e-06, "loss": 0.2299, "step": 1306000 }, { "epoch": 13.31, "learning_rate": 2.014425204700464e-06, "loss": 0.1795, "step": 1306100 }, { "epoch": 13.31, "learning_rate": 2.0120360782726076e-06, "loss": 0.2301, "step": 1306200 }, { "epoch": 13.31, "learning_rate": 2.0096483202891658e-06, "loss": 0.2518, "step": 1306300 }, { "epoch": 13.31, "learning_rate": 2.0072619308668815e-06, "loss": 0.2624, "step": 1306400 }, { "epoch": 13.31, "learning_rate": 2.004876910122442e-06, "loss": 0.2199, "step": 1306500 }, { "epoch": 13.31, "learning_rate": 2.002493258172453e-06, "loss": 0.2245, "step": 1306600 }, { "epoch": 13.31, "learning_rate": 2.000110975133459e-06, "loss": 0.2473, "step": 1306700 }, { "epoch": 13.31, "learning_rate": 1.997730061121953e-06, "loss": 0.2481, "step": 1306800 }, { "epoch": 13.31, "learning_rate": 1.995350516254332e-06, "loss": 0.1975, "step": 1306900 }, { "epoch": 13.32, "learning_rate": 1.992972340646946e-06, "loss": 0.1776, "step": 1307000 }, { "epoch": 13.32, "learning_rate": 1.9905955344160754e-06, "loss": 0.2122, "step": 1307100 }, { "epoch": 13.32, "learning_rate": 1.988220097677934e-06, "loss": 0.2808, "step": 1307200 }, { "epoch": 13.32, "learning_rate": 1.9858460305486626e-06, "loss": 0.196, "step": 1307300 }, { "epoch": 13.32, "learning_rate": 1.9834733331443445e-06, "loss": 0.2234, "step": 1307400 }, { "epoch": 13.32, "learning_rate": 1.9811020055809836e-06, "loss": 0.2014, "step": 1307500 }, { "epoch": 13.32, "learning_rate": 1.9787320479745276e-06, "loss": 0.2046, "step": 1307600 }, { "epoch": 13.32, "learning_rate": 1.976363460440853e-06, "loss": 0.2341, "step": 1307700 }, { "epoch": 13.32, "learning_rate": 1.973996243095768e-06, "loss": 0.2147, "step": 1307800 }, { "epoch": 13.33, "learning_rate": 1.971630396055013e-06, "loss": 0.214, "step": 1307900 }, { "epoch": 13.33, "learning_rate": 1.9692659194342688e-06, "loss": 0.2123, "step": 1308000 }, { "epoch": 13.33, "learning_rate": 1.9669028133491496e-06, "loss": 0.1957, "step": 1308100 }, { "epoch": 13.33, "learning_rate": 1.964564688484416e-06, "loss": 0.1706, "step": 1308200 }, { "epoch": 13.33, "learning_rate": 1.962204310108845e-06, "loss": 0.2276, "step": 1308300 }, { "epoch": 13.33, "learning_rate": 1.959845302614166e-06, "loss": 0.2188, "step": 1308400 }, { "epoch": 13.33, "learning_rate": 1.9574876661157172e-06, "loss": 0.1887, "step": 1308500 }, { "epoch": 13.33, "learning_rate": 1.955131400728769e-06, "loss": 0.1907, "step": 1308600 }, { "epoch": 13.33, "learning_rate": 1.952776506568533e-06, "loss": 0.2465, "step": 1308700 }, { "epoch": 13.33, "learning_rate": 1.950446512189814e-06, "loss": 0.2615, "step": 1308800 }, { "epoch": 13.34, "learning_rate": 1.9480943471132174e-06, "loss": 0.235, "step": 1308900 }, { "epoch": 13.34, "learning_rate": 1.945743553607404e-06, "loss": 0.2284, "step": 1309000 }, { "epoch": 13.34, "learning_rate": 1.9433941317873094e-06, "loss": 0.1765, "step": 1309100 }, { "epoch": 13.34, "learning_rate": 1.9410460817678077e-06, "loss": 0.215, "step": 1309200 }, { "epoch": 13.34, "learning_rate": 1.9386994036637073e-06, "loss": 0.2087, "step": 1309300 }, { "epoch": 13.34, "learning_rate": 1.9363540975897463e-06, "loss": 0.1956, "step": 1309400 }, { "epoch": 13.34, "learning_rate": 1.9340101636605935e-06, "loss": 0.2231, "step": 1309500 }, { "epoch": 13.34, "learning_rate": 1.931667601990853e-06, "loss": 0.214, "step": 1309600 }, { "epoch": 13.34, "learning_rate": 1.9293264126950638e-06, "loss": 0.1919, "step": 1309700 }, { "epoch": 13.34, "learning_rate": 1.926986595887694e-06, "loss": 0.2319, "step": 1309800 }, { "epoch": 13.35, "learning_rate": 1.924648151683153e-06, "loss": 0.2461, "step": 1309900 }, { "epoch": 13.35, "learning_rate": 1.9223110801957745e-06, "loss": 0.2181, "step": 1310000 }, { "epoch": 13.35, "learning_rate": 1.9199753815398215e-06, "loss": 0.2366, "step": 1310100 }, { "epoch": 13.35, "learning_rate": 1.9176410558294956e-06, "loss": 0.1822, "step": 1310200 }, { "epoch": 13.35, "learning_rate": 1.9153081031789454e-06, "loss": 0.2044, "step": 1310300 }, { "epoch": 13.35, "learning_rate": 1.91297652370222e-06, "loss": 0.1818, "step": 1310400 }, { "epoch": 13.35, "learning_rate": 1.9106463175133314e-06, "loss": 0.1839, "step": 1310500 }, { "epoch": 13.35, "learning_rate": 1.908317484726212e-06, "loss": 0.2503, "step": 1310600 }, { "epoch": 13.35, "learning_rate": 1.9059900254547203e-06, "loss": 0.2297, "step": 1310700 }, { "epoch": 13.35, "learning_rate": 1.9036639398126587e-06, "loss": 0.2208, "step": 1310800 }, { "epoch": 13.36, "learning_rate": 1.901339227913763e-06, "loss": 0.1812, "step": 1310900 }, { "epoch": 13.36, "learning_rate": 1.8990158898716925e-06, "loss": 0.1603, "step": 1311000 }, { "epoch": 13.36, "learning_rate": 1.8966939258000494e-06, "loss": 0.1914, "step": 1311100 }, { "epoch": 13.36, "learning_rate": 1.8943733358123628e-06, "loss": 0.206, "step": 1311200 }, { "epoch": 13.36, "learning_rate": 1.8920541200220954e-06, "loss": 0.2413, "step": 1311300 }, { "epoch": 13.36, "learning_rate": 1.8897362785426398e-06, "loss": 0.2332, "step": 1311400 }, { "epoch": 13.36, "learning_rate": 1.8874198114873253e-06, "loss": 0.1768, "step": 1311500 }, { "epoch": 13.36, "learning_rate": 1.8851047189694215e-06, "loss": 0.2015, "step": 1311600 }, { "epoch": 13.36, "learning_rate": 1.8827910011021076e-06, "loss": 0.1734, "step": 1311700 }, { "epoch": 13.36, "learning_rate": 1.8804786579985233e-06, "loss": 0.2484, "step": 1311800 }, { "epoch": 13.37, "learning_rate": 1.8781676897717282e-06, "loss": 0.2143, "step": 1311900 }, { "epoch": 13.37, "learning_rate": 1.875858096534705e-06, "loss": 0.2038, "step": 1312000 }, { "epoch": 13.37, "learning_rate": 1.8735498784003834e-06, "loss": 0.1588, "step": 1312100 }, { "epoch": 13.37, "learning_rate": 1.8712430354816235e-06, "loss": 0.2342, "step": 1312200 }, { "epoch": 13.37, "learning_rate": 1.868937567891218e-06, "loss": 0.2273, "step": 1312300 }, { "epoch": 13.37, "learning_rate": 1.8666334757418868e-06, "loss": 0.2276, "step": 1312400 }, { "epoch": 13.37, "learning_rate": 1.86433075914629e-06, "loss": 0.1749, "step": 1312500 }, { "epoch": 13.37, "learning_rate": 1.8620294182170106e-06, "loss": 0.1842, "step": 1312600 }, { "epoch": 13.37, "learning_rate": 1.8597294530665755e-06, "loss": 0.2137, "step": 1312700 }, { "epoch": 13.38, "learning_rate": 1.8574308638074378e-06, "loss": 0.1913, "step": 1312800 }, { "epoch": 13.38, "learning_rate": 1.8551566158729527e-06, "loss": 0.2594, "step": 1312900 }, { "epoch": 13.38, "learning_rate": 1.852860764971792e-06, "loss": 0.2749, "step": 1313000 }, { "epoch": 13.38, "learning_rate": 1.8505662902977639e-06, "loss": 0.198, "step": 1313100 }, { "epoch": 13.38, "learning_rate": 1.8482731919630547e-06, "loss": 0.2161, "step": 1313200 }, { "epoch": 13.38, "learning_rate": 1.8459814700797883e-06, "loss": 0.2551, "step": 1313300 }, { "epoch": 13.38, "learning_rate": 1.8436911247600151e-06, "loss": 0.2322, "step": 1313400 }, { "epoch": 13.38, "learning_rate": 1.8414021561157158e-06, "loss": 0.1807, "step": 1313500 }, { "epoch": 13.38, "learning_rate": 1.8391145642588037e-06, "loss": 0.2258, "step": 1313600 }, { "epoch": 13.38, "learning_rate": 1.8368283493011428e-06, "loss": 0.1659, "step": 1313700 }, { "epoch": 13.39, "learning_rate": 1.8345435113545006e-06, "loss": 0.1867, "step": 1313800 }, { "epoch": 13.39, "learning_rate": 1.8322600505306009e-06, "loss": 0.1888, "step": 1313900 }, { "epoch": 13.39, "learning_rate": 1.829977966941091e-06, "loss": 0.211, "step": 1314000 }, { "epoch": 13.39, "learning_rate": 1.827697260697545e-06, "loss": 0.2125, "step": 1314100 }, { "epoch": 13.39, "learning_rate": 1.8254179319114806e-06, "loss": 0.1995, "step": 1314200 }, { "epoch": 13.39, "learning_rate": 1.823139980694345e-06, "loss": 0.1801, "step": 1314300 }, { "epoch": 13.39, "learning_rate": 1.8208861660729914e-06, "loss": 0.1892, "step": 1314400 }, { "epoch": 13.39, "learning_rate": 1.818610956549308e-06, "loss": 0.2073, "step": 1314500 }, { "epoch": 13.39, "learning_rate": 1.8163371249273675e-06, "loss": 0.2189, "step": 1314600 }, { "epoch": 13.39, "learning_rate": 1.8140646713183505e-06, "loss": 0.204, "step": 1314700 }, { "epoch": 13.4, "learning_rate": 1.8117935958333653e-06, "loss": 0.2228, "step": 1314800 }, { "epoch": 13.4, "learning_rate": 1.809523898583456e-06, "loss": 0.1621, "step": 1314900 }, { "epoch": 13.4, "learning_rate": 1.8072555796795942e-06, "loss": 0.2624, "step": 1315000 }, { "epoch": 13.4, "learning_rate": 1.8049886392326942e-06, "loss": 0.2534, "step": 1315100 }, { "epoch": 13.4, "learning_rate": 1.8027230773535874e-06, "loss": 0.2113, "step": 1315200 }, { "epoch": 13.4, "learning_rate": 1.8004588941530486e-06, "loss": 0.1925, "step": 1315300 }, { "epoch": 13.4, "learning_rate": 1.7981960897417892e-06, "loss": 0.2222, "step": 1315400 }, { "epoch": 13.4, "learning_rate": 1.7959346642304374e-06, "loss": 0.2975, "step": 1315500 }, { "epoch": 13.4, "learning_rate": 1.793674617729565e-06, "loss": 0.2362, "step": 1315600 }, { "epoch": 13.4, "learning_rate": 1.7914159503496797e-06, "loss": 0.2463, "step": 1315700 }, { "epoch": 13.41, "learning_rate": 1.7891586622012168e-06, "loss": 0.2568, "step": 1315800 }, { "epoch": 13.41, "learning_rate": 1.7869027533945381e-06, "loss": 0.2061, "step": 1315900 }, { "epoch": 13.41, "learning_rate": 1.7846482240399487e-06, "loss": 0.2389, "step": 1316000 }, { "epoch": 13.41, "learning_rate": 1.7823950742476803e-06, "loss": 0.2261, "step": 1316100 }, { "epoch": 13.41, "learning_rate": 1.780143304127898e-06, "loss": 0.2666, "step": 1316200 }, { "epoch": 13.41, "learning_rate": 1.7778929137907008e-06, "loss": 0.1873, "step": 1316300 }, { "epoch": 13.41, "learning_rate": 1.775643903346127e-06, "loss": 0.1975, "step": 1316400 }, { "epoch": 13.41, "learning_rate": 1.7733962729041254e-06, "loss": 0.2175, "step": 1316500 }, { "epoch": 13.41, "learning_rate": 1.771150022574598e-06, "loss": 0.2301, "step": 1316600 }, { "epoch": 13.41, "learning_rate": 1.768905152467377e-06, "loss": 0.2216, "step": 1316700 }, { "epoch": 13.42, "learning_rate": 1.7666840907569638e-06, "loss": 0.1657, "step": 1316800 }, { "epoch": 13.42, "learning_rate": 1.7644419676186018e-06, "loss": 0.1761, "step": 1316900 }, { "epoch": 13.42, "learning_rate": 1.7622012250305274e-06, "loss": 0.1924, "step": 1317000 }, { "epoch": 13.42, "learning_rate": 1.7599618631022963e-06, "loss": 0.185, "step": 1317100 }, { "epoch": 13.42, "learning_rate": 1.7577238819434037e-06, "loss": 0.226, "step": 1317200 }, { "epoch": 13.42, "learning_rate": 1.7554872816632728e-06, "loss": 0.2805, "step": 1317300 }, { "epoch": 13.42, "learning_rate": 1.7532520623712556e-06, "loss": 0.1638, "step": 1317400 }, { "epoch": 13.42, "learning_rate": 1.7510182241766482e-06, "loss": 0.2193, "step": 1317500 }, { "epoch": 13.42, "learning_rate": 1.7487857671886665e-06, "loss": 0.1912, "step": 1317600 }, { "epoch": 13.42, "learning_rate": 1.7465546915164665e-06, "loss": 0.2153, "step": 1317700 }, { "epoch": 13.43, "learning_rate": 1.7443249972691378e-06, "loss": 0.2127, "step": 1317800 }, { "epoch": 13.43, "learning_rate": 1.7420966845556928e-06, "loss": 0.2098, "step": 1317900 }, { "epoch": 13.43, "learning_rate": 1.739869753485085e-06, "loss": 0.185, "step": 1318000 }, { "epoch": 13.43, "learning_rate": 1.7376442041661999e-06, "loss": 0.229, "step": 1318100 }, { "epoch": 13.43, "learning_rate": 1.7354200367078544e-06, "loss": 0.2156, "step": 1318200 }, { "epoch": 13.43, "learning_rate": 1.733197251218791e-06, "loss": 0.2034, "step": 1318300 }, { "epoch": 13.43, "learning_rate": 1.7309758478076965e-06, "loss": 0.2029, "step": 1318400 }, { "epoch": 13.43, "learning_rate": 1.7287558265831838e-06, "loss": 0.2004, "step": 1318500 }, { "epoch": 13.43, "learning_rate": 1.7265371876537962e-06, "loss": 0.206, "step": 1318600 }, { "epoch": 13.44, "learning_rate": 1.72431993112801e-06, "loss": 0.1752, "step": 1318700 }, { "epoch": 13.44, "learning_rate": 1.7221040571142387e-06, "loss": 0.1994, "step": 1318800 }, { "epoch": 13.44, "learning_rate": 1.719889565720829e-06, "loss": 0.2435, "step": 1318900 }, { "epoch": 13.44, "learning_rate": 1.7176764570560477e-06, "loss": 0.1548, "step": 1319000 }, { "epoch": 13.44, "learning_rate": 1.7154647312281146e-06, "loss": 0.149, "step": 1319100 }, { "epoch": 13.44, "learning_rate": 1.7132764849280536e-06, "loss": 0.2329, "step": 1319200 }, { "epoch": 13.44, "learning_rate": 1.7110675112670915e-06, "loss": 0.2269, "step": 1319300 }, { "epoch": 13.44, "learning_rate": 1.7088599207661027e-06, "loss": 0.2215, "step": 1319400 }, { "epoch": 13.44, "learning_rate": 1.7066537135330307e-06, "loss": 0.1804, "step": 1319500 }, { "epoch": 13.44, "learning_rate": 1.7044488896757527e-06, "loss": 0.2262, "step": 1319600 }, { "epoch": 13.45, "learning_rate": 1.702245449302059e-06, "loss": 0.2137, "step": 1319700 }, { "epoch": 13.45, "learning_rate": 1.7000433925196902e-06, "loss": 0.2126, "step": 1319800 }, { "epoch": 13.45, "learning_rate": 1.6978427194363165e-06, "loss": 0.1987, "step": 1319900 }, { "epoch": 13.45, "learning_rate": 1.6956434301595324e-06, "loss": 0.1603, "step": 1320000 }, { "epoch": 13.45, "learning_rate": 1.693445524796875e-06, "loss": 0.1622, "step": 1320100 }, { "epoch": 13.45, "learning_rate": 1.6912490034558115e-06, "loss": 0.2165, "step": 1320200 }, { "epoch": 13.45, "learning_rate": 1.689053866243726e-06, "loss": 0.2252, "step": 1320300 }, { "epoch": 13.45, "learning_rate": 1.6868601132679594e-06, "loss": 0.2113, "step": 1320400 }, { "epoch": 13.45, "learning_rate": 1.6846677446357694e-06, "loss": 0.2175, "step": 1320500 }, { "epoch": 13.45, "learning_rate": 1.6824767604543501e-06, "loss": 0.2395, "step": 1320600 }, { "epoch": 13.46, "learning_rate": 1.6802871608308256e-06, "loss": 0.2008, "step": 1320700 }, { "epoch": 13.46, "learning_rate": 1.6780989458722573e-06, "loss": 0.2328, "step": 1320800 }, { "epoch": 13.46, "learning_rate": 1.675912115685636e-06, "loss": 0.1976, "step": 1320900 }, { "epoch": 13.46, "learning_rate": 1.6737266703778798e-06, "loss": 0.223, "step": 1321000 }, { "epoch": 13.46, "learning_rate": 1.6715426100558461e-06, "loss": 0.2047, "step": 1321100 }, { "epoch": 13.46, "learning_rate": 1.6693599348263234e-06, "loss": 0.1869, "step": 1321200 }, { "epoch": 13.46, "learning_rate": 1.6671786447960324e-06, "loss": 0.2188, "step": 1321300 }, { "epoch": 13.46, "learning_rate": 1.6649987400716215e-06, "loss": 0.2397, "step": 1321400 }, { "epoch": 13.46, "learning_rate": 1.6628202207596855e-06, "loss": 0.2096, "step": 1321500 }, { "epoch": 13.46, "learning_rate": 1.660643086966729e-06, "loss": 0.207, "step": 1321600 }, { "epoch": 13.47, "learning_rate": 1.6584673387992033e-06, "loss": 0.2228, "step": 1321700 }, { "epoch": 13.47, "learning_rate": 1.6562929763634937e-06, "loss": 0.2435, "step": 1321800 }, { "epoch": 13.47, "learning_rate": 1.6541199997659117e-06, "loss": 0.2379, "step": 1321900 }, { "epoch": 13.47, "learning_rate": 1.651948409112699e-06, "loss": 0.2032, "step": 1322000 }, { "epoch": 13.47, "learning_rate": 1.6497782045100407e-06, "loss": 0.2393, "step": 1322100 }, { "epoch": 13.47, "learning_rate": 1.6476093860640417e-06, "loss": 0.2008, "step": 1322200 }, { "epoch": 13.47, "learning_rate": 1.6454419538807442e-06, "loss": 0.2627, "step": 1322300 }, { "epoch": 13.47, "learning_rate": 1.643275908066123e-06, "loss": 0.2101, "step": 1322400 }, { "epoch": 13.47, "learning_rate": 1.641111248726087e-06, "loss": 0.218, "step": 1322500 }, { "epoch": 13.47, "learning_rate": 1.6389479759664715e-06, "loss": 0.1734, "step": 1322600 }, { "epoch": 13.48, "learning_rate": 1.636786089893052e-06, "loss": 0.1947, "step": 1322700 }, { "epoch": 13.48, "learning_rate": 1.6346255906115337e-06, "loss": 0.2089, "step": 1322800 }, { "epoch": 13.48, "learning_rate": 1.6324664782275456e-06, "loss": 0.2132, "step": 1322900 }, { "epoch": 13.48, "learning_rate": 1.6303087528466565e-06, "loss": 0.2319, "step": 1323000 }, { "epoch": 13.48, "learning_rate": 1.628152414574372e-06, "loss": 0.1677, "step": 1323100 }, { "epoch": 13.48, "learning_rate": 1.625997463516118e-06, "loss": 0.1873, "step": 1323200 }, { "epoch": 13.48, "learning_rate": 1.6238438997772564e-06, "loss": 0.2386, "step": 1323300 }, { "epoch": 13.48, "learning_rate": 1.6216917234630969e-06, "loss": 0.2216, "step": 1323400 }, { "epoch": 13.48, "learning_rate": 1.6195409346788515e-06, "loss": 0.2242, "step": 1323500 }, { "epoch": 13.49, "learning_rate": 1.617391533529693e-06, "loss": 0.1793, "step": 1323600 }, { "epoch": 13.49, "learning_rate": 1.6152435201207072e-06, "loss": 0.1825, "step": 1323700 }, { "epoch": 13.49, "learning_rate": 1.6130968945569202e-06, "loss": 0.2579, "step": 1323800 }, { "epoch": 13.49, "learning_rate": 1.6109516569432913e-06, "loss": 0.2158, "step": 1323900 }, { "epoch": 13.49, "learning_rate": 1.6088078073847135e-06, "loss": 0.2043, "step": 1324000 }, { "epoch": 13.49, "learning_rate": 1.6066653459860058e-06, "loss": 0.2266, "step": 1324100 }, { "epoch": 13.49, "learning_rate": 1.604524272851915e-06, "loss": 0.203, "step": 1324200 }, { "epoch": 13.49, "learning_rate": 1.6023845880871335e-06, "loss": 0.1909, "step": 1324300 }, { "epoch": 13.49, "learning_rate": 1.6002462917962813e-06, "loss": 0.2193, "step": 1324400 }, { "epoch": 13.49, "learning_rate": 1.598109384083898e-06, "loss": 0.1878, "step": 1324500 }, { "epoch": 13.5, "learning_rate": 1.5959738650544731e-06, "loss": 0.221, "step": 1324600 }, { "epoch": 13.5, "learning_rate": 1.5938397348124268e-06, "loss": 0.2068, "step": 1324700 }, { "epoch": 13.5, "learning_rate": 1.591706993462092e-06, "loss": 0.2137, "step": 1324800 }, { "epoch": 13.5, "learning_rate": 1.5895756411077555e-06, "loss": 0.2195, "step": 1324900 }, { "epoch": 13.5, "learning_rate": 1.587445677853624e-06, "loss": 0.2003, "step": 1325000 }, { "epoch": 13.5, "learning_rate": 1.5853171038038406e-06, "loss": 0.2007, "step": 1325100 }, { "epoch": 13.5, "learning_rate": 1.5831899190624821e-06, "loss": 0.171, "step": 1325200 }, { "epoch": 13.5, "learning_rate": 1.5810641237335555e-06, "loss": 0.2751, "step": 1325300 }, { "epoch": 13.5, "learning_rate": 1.5789397179209974e-06, "loss": 0.2256, "step": 1325400 }, { "epoch": 13.5, "learning_rate": 1.5768167017286783e-06, "loss": 0.1816, "step": 1325500 }, { "epoch": 13.51, "learning_rate": 1.574695075260405e-06, "loss": 0.2249, "step": 1325600 }, { "epoch": 13.51, "learning_rate": 1.5725960341063206e-06, "loss": 0.2043, "step": 1325700 }, { "epoch": 13.51, "learning_rate": 1.570477173497442e-06, "loss": 0.2276, "step": 1325800 }, { "epoch": 13.51, "learning_rate": 1.5683597029225682e-06, "loss": 0.2454, "step": 1325900 }, { "epoch": 13.51, "learning_rate": 1.5662436224852328e-06, "loss": 0.1742, "step": 1326000 }, { "epoch": 13.51, "learning_rate": 1.564128932288903e-06, "loss": 0.2316, "step": 1326100 }, { "epoch": 13.51, "learning_rate": 1.5620156324369695e-06, "loss": 0.1776, "step": 1326200 }, { "epoch": 13.51, "learning_rate": 1.5599248352437457e-06, "loss": 0.1664, "step": 1326300 }, { "epoch": 13.51, "learning_rate": 1.5578143024845038e-06, "loss": 0.199, "step": 1326400 }, { "epoch": 13.51, "learning_rate": 1.5557051603784112e-06, "loss": 0.234, "step": 1326500 }, { "epoch": 13.52, "learning_rate": 1.553597409028592e-06, "loss": 0.2226, "step": 1326600 }, { "epoch": 13.52, "learning_rate": 1.551491048538094e-06, "loss": 0.2209, "step": 1326700 }, { "epoch": 13.52, "learning_rate": 1.5493860790099147e-06, "loss": 0.2261, "step": 1326800 }, { "epoch": 13.52, "learning_rate": 1.5472825005469716e-06, "loss": 0.1683, "step": 1326900 }, { "epoch": 13.52, "learning_rate": 1.5451803132521159e-06, "loss": 0.1759, "step": 1327000 }, { "epoch": 13.52, "learning_rate": 1.5430795172281354e-06, "loss": 0.2385, "step": 1327100 }, { "epoch": 13.52, "learning_rate": 1.5409801125777413e-06, "loss": 0.2113, "step": 1327200 }, { "epoch": 13.52, "learning_rate": 1.5388820994035913e-06, "loss": 0.2189, "step": 1327300 }, { "epoch": 13.52, "learning_rate": 1.5367854778082534e-06, "loss": 0.2549, "step": 1327400 }, { "epoch": 13.52, "learning_rate": 1.5346902478942493e-06, "loss": 0.1863, "step": 1327500 }, { "epoch": 13.53, "learning_rate": 1.5325964097640167e-06, "loss": 0.2191, "step": 1327600 }, { "epoch": 13.53, "learning_rate": 1.5305039635199369e-06, "loss": 0.1811, "step": 1327700 }, { "epoch": 13.53, "learning_rate": 1.5284129092643151e-06, "loss": 0.1757, "step": 1327800 }, { "epoch": 13.53, "learning_rate": 1.5263232470993994e-06, "loss": 0.218, "step": 1327900 }, { "epoch": 13.53, "learning_rate": 1.524234977127348e-06, "loss": 0.2151, "step": 1328000 }, { "epoch": 13.53, "learning_rate": 1.5221480994502724e-06, "loss": 0.2304, "step": 1328100 }, { "epoch": 13.53, "learning_rate": 1.5200626141702112e-06, "loss": 0.2028, "step": 1328200 }, { "epoch": 13.53, "learning_rate": 1.5179785213891261e-06, "loss": 0.2024, "step": 1328300 }, { "epoch": 13.53, "learning_rate": 1.5158958212089191e-06, "loss": 0.1947, "step": 1328400 }, { "epoch": 13.53, "learning_rate": 1.5138145137314253e-06, "loss": 0.2214, "step": 1328500 }, { "epoch": 13.54, "learning_rate": 1.511734599058403e-06, "loss": 0.1764, "step": 1328600 }, { "epoch": 13.54, "learning_rate": 1.509656077291548e-06, "loss": 0.1671, "step": 1328700 }, { "epoch": 13.54, "learning_rate": 1.5075789485324886e-06, "loss": 0.2435, "step": 1328800 }, { "epoch": 13.54, "learning_rate": 1.5055032128827873e-06, "loss": 0.2395, "step": 1328900 }, { "epoch": 13.54, "learning_rate": 1.5034288704439291e-06, "loss": 0.216, "step": 1329000 }, { "epoch": 13.54, "learning_rate": 1.5013559213173432e-06, "loss": 0.2409, "step": 1329100 }, { "epoch": 13.54, "learning_rate": 1.4992843656043852e-06, "loss": 0.2223, "step": 1329200 }, { "epoch": 13.54, "learning_rate": 1.497214203406334e-06, "loss": 0.197, "step": 1329300 }, { "epoch": 13.54, "learning_rate": 1.495145434824412e-06, "loss": 0.1817, "step": 1329400 }, { "epoch": 13.55, "learning_rate": 1.4930780599597748e-06, "loss": 0.1826, "step": 1329500 }, { "epoch": 13.55, "learning_rate": 1.4910120789134985e-06, "loss": 0.2249, "step": 1329600 }, { "epoch": 13.55, "learning_rate": 1.4889474917865953e-06, "loss": 0.2005, "step": 1329700 }, { "epoch": 13.55, "learning_rate": 1.4868842986800212e-06, "loss": 0.16, "step": 1329800 }, { "epoch": 13.55, "learning_rate": 1.4848224996946426e-06, "loss": 0.1925, "step": 1329900 }, { "epoch": 13.55, "learning_rate": 1.4827620949312714e-06, "loss": 0.2348, "step": 1330000 }, { "epoch": 13.55, "learning_rate": 1.4807030844906545e-06, "loss": 0.1808, "step": 1330100 }, { "epoch": 13.55, "learning_rate": 1.478645468473464e-06, "loss": 0.2362, "step": 1330200 }, { "epoch": 13.55, "learning_rate": 1.4765892469803e-06, "loss": 0.2008, "step": 1330300 }, { "epoch": 13.55, "learning_rate": 1.4745549614766708e-06, "loss": 0.2589, "step": 1330400 }, { "epoch": 13.56, "learning_rate": 1.472501515385365e-06, "loss": 0.1998, "step": 1330500 }, { "epoch": 13.56, "learning_rate": 1.470449464118495e-06, "loss": 0.1866, "step": 1330600 }, { "epoch": 13.56, "learning_rate": 1.4684193074345987e-06, "loss": 0.2029, "step": 1330700 }, { "epoch": 13.56, "learning_rate": 1.4663700321667761e-06, "loss": 0.2353, "step": 1330800 }, { "epoch": 13.56, "learning_rate": 1.4643221520231797e-06, "loss": 0.2346, "step": 1330900 }, { "epoch": 13.56, "learning_rate": 1.4622756671039327e-06, "loss": 0.2436, "step": 1331000 }, { "epoch": 13.56, "learning_rate": 1.4602305775091086e-06, "loss": 0.2584, "step": 1331100 }, { "epoch": 13.56, "learning_rate": 1.4581868833386902e-06, "loss": 0.1848, "step": 1331200 }, { "epoch": 13.56, "learning_rate": 1.4561445846926013e-06, "loss": 0.1686, "step": 1331300 }, { "epoch": 13.56, "learning_rate": 1.4541036816707087e-06, "loss": 0.2031, "step": 1331400 }, { "epoch": 13.57, "learning_rate": 1.4520641743727859e-06, "loss": 0.2068, "step": 1331500 }, { "epoch": 13.57, "learning_rate": 1.4500260628985628e-06, "loss": 0.2234, "step": 1331600 }, { "epoch": 13.57, "learning_rate": 1.4479893473476835e-06, "loss": 0.2105, "step": 1331700 }, { "epoch": 13.57, "learning_rate": 1.4459540278197413e-06, "loss": 0.2155, "step": 1331800 }, { "epoch": 13.57, "learning_rate": 1.4439201044142435e-06, "loss": 0.1634, "step": 1331900 }, { "epoch": 13.57, "learning_rate": 1.4418875772306372e-06, "loss": 0.1598, "step": 1332000 }, { "epoch": 13.57, "learning_rate": 1.4398564463683062e-06, "loss": 0.1878, "step": 1332100 }, { "epoch": 13.57, "learning_rate": 1.4378267119265541e-06, "loss": 0.2004, "step": 1332200 }, { "epoch": 13.57, "learning_rate": 1.4357983740046253e-06, "loss": 0.2845, "step": 1332300 }, { "epoch": 13.57, "learning_rate": 1.4337714327017004e-06, "loss": 0.2445, "step": 1332400 }, { "epoch": 13.58, "learning_rate": 1.4317458881168698e-06, "loss": 0.216, "step": 1332500 }, { "epoch": 13.58, "learning_rate": 1.4297217403491814e-06, "loss": 0.2543, "step": 1332600 }, { "epoch": 13.58, "learning_rate": 1.4276989894976055e-06, "loss": 0.1918, "step": 1332700 }, { "epoch": 13.58, "learning_rate": 1.4256978422838485e-06, "loss": 0.2331, "step": 1332800 }, { "epoch": 13.58, "learning_rate": 1.4236778715894906e-06, "loss": 0.1701, "step": 1332900 }, { "epoch": 13.58, "learning_rate": 1.4216592981067488e-06, "loss": 0.237, "step": 1333000 }, { "epoch": 13.58, "learning_rate": 1.4196421219343213e-06, "loss": 0.2225, "step": 1333100 }, { "epoch": 13.58, "learning_rate": 1.417626343170839e-06, "loss": 0.1462, "step": 1333200 }, { "epoch": 13.58, "learning_rate": 1.4156119619148567e-06, "loss": 0.2294, "step": 1333300 }, { "epoch": 13.58, "learning_rate": 1.413598978264865e-06, "loss": 0.2165, "step": 1333400 }, { "epoch": 13.59, "learning_rate": 1.4115873923192924e-06, "loss": 0.1814, "step": 1333500 }, { "epoch": 13.59, "learning_rate": 1.4095772041764833e-06, "loss": 0.1833, "step": 1333600 }, { "epoch": 13.59, "learning_rate": 1.4075684139347322e-06, "loss": 0.1908, "step": 1333700 }, { "epoch": 13.59, "learning_rate": 1.405561021692261e-06, "loss": 0.2033, "step": 1333800 }, { "epoch": 13.59, "learning_rate": 1.4035750805677516e-06, "loss": 0.181, "step": 1333900 }, { "epoch": 13.59, "learning_rate": 1.4015704706357657e-06, "loss": 0.213, "step": 1334000 }, { "epoch": 13.59, "learning_rate": 1.3995672589963148e-06, "loss": 0.2457, "step": 1334100 }, { "epoch": 13.59, "learning_rate": 1.397565445747344e-06, "loss": 0.2266, "step": 1334200 }, { "epoch": 13.59, "learning_rate": 1.3955650309867352e-06, "loss": 0.178, "step": 1334300 }, { "epoch": 13.6, "learning_rate": 1.3935660148122898e-06, "loss": 0.2108, "step": 1334400 }, { "epoch": 13.6, "learning_rate": 1.3915683973217562e-06, "loss": 0.2591, "step": 1334500 }, { "epoch": 13.6, "learning_rate": 1.3895721786128034e-06, "loss": 0.2117, "step": 1334600 }, { "epoch": 13.6, "learning_rate": 1.3875773587830297e-06, "loss": 0.2497, "step": 1334700 }, { "epoch": 13.6, "learning_rate": 1.385583937929974e-06, "loss": 0.2299, "step": 1334800 }, { "epoch": 13.6, "learning_rate": 1.3835919161511012e-06, "loss": 0.2171, "step": 1334900 }, { "epoch": 13.6, "learning_rate": 1.3816012935438137e-06, "loss": 0.2499, "step": 1335000 }, { "epoch": 13.6, "learning_rate": 1.3796120702054337e-06, "loss": 0.1941, "step": 1335100 }, { "epoch": 13.6, "learning_rate": 1.3776242462332267e-06, "loss": 0.2182, "step": 1335200 }, { "epoch": 13.6, "learning_rate": 1.3756378217243882e-06, "loss": 0.2139, "step": 1335300 }, { "epoch": 13.61, "learning_rate": 1.3736527967760337e-06, "loss": 0.2329, "step": 1335400 }, { "epoch": 13.61, "learning_rate": 1.3716691714852258e-06, "loss": 0.1933, "step": 1335500 }, { "epoch": 13.61, "learning_rate": 1.3696869459489502e-06, "loss": 0.1982, "step": 1335600 }, { "epoch": 13.61, "learning_rate": 1.367706120264126e-06, "loss": 0.1832, "step": 1335700 }, { "epoch": 13.61, "learning_rate": 1.3657266945276026e-06, "loss": 0.1527, "step": 1335800 }, { "epoch": 13.61, "learning_rate": 1.3637486688361688e-06, "loss": 0.2471, "step": 1335900 }, { "epoch": 13.61, "learning_rate": 1.3617720432865277e-06, "loss": 0.22, "step": 1336000 }, { "epoch": 13.61, "learning_rate": 1.3597968179753316e-06, "loss": 0.2425, "step": 1336100 }, { "epoch": 13.61, "learning_rate": 1.357822992999157e-06, "loss": 0.2314, "step": 1336200 }, { "epoch": 13.61, "learning_rate": 1.3558505684545097e-06, "loss": 0.2347, "step": 1336300 }, { "epoch": 13.62, "learning_rate": 1.353879544437826e-06, "loss": 0.2114, "step": 1336400 }, { "epoch": 13.62, "learning_rate": 1.3519099210454855e-06, "loss": 0.2007, "step": 1336500 }, { "epoch": 13.62, "learning_rate": 1.3499416983737879e-06, "loss": 0.2172, "step": 1336600 }, { "epoch": 13.62, "learning_rate": 1.3479748765189658e-06, "loss": 0.2393, "step": 1336700 }, { "epoch": 13.62, "learning_rate": 1.3460094555771829e-06, "loss": 0.1808, "step": 1336800 }, { "epoch": 13.62, "learning_rate": 1.344045435644542e-06, "loss": 0.2147, "step": 1336900 }, { "epoch": 13.62, "learning_rate": 1.3420828168170661e-06, "loss": 0.2247, "step": 1337000 }, { "epoch": 13.62, "learning_rate": 1.3401215991907224e-06, "loss": 0.183, "step": 1337100 }, { "epoch": 13.62, "learning_rate": 1.3381617828614001e-06, "loss": 0.2341, "step": 1337200 }, { "epoch": 13.62, "learning_rate": 1.3362033679249198e-06, "loss": 0.2208, "step": 1337300 }, { "epoch": 13.63, "learning_rate": 1.3342463544770378e-06, "loss": 0.1977, "step": 1337400 }, { "epoch": 13.63, "learning_rate": 1.3322907426134446e-06, "loss": 0.1909, "step": 1337500 }, { "epoch": 13.63, "learning_rate": 1.3303365324297502e-06, "loss": 0.2228, "step": 1337600 }, { "epoch": 13.63, "learning_rate": 1.328383724021508e-06, "loss": 0.2173, "step": 1337700 }, { "epoch": 13.63, "learning_rate": 1.3264323174842053e-06, "loss": 0.1757, "step": 1337800 }, { "epoch": 13.63, "learning_rate": 1.324482312913239e-06, "loss": 0.2557, "step": 1337900 }, { "epoch": 13.63, "learning_rate": 1.3225337104039624e-06, "loss": 0.1813, "step": 1338000 }, { "epoch": 13.63, "learning_rate": 1.32058651005165e-06, "loss": 0.1829, "step": 1338100 }, { "epoch": 13.63, "learning_rate": 1.3186407119515053e-06, "loss": 0.1795, "step": 1338200 }, { "epoch": 13.63, "learning_rate": 1.3166963161986722e-06, "loss": 0.2065, "step": 1338300 }, { "epoch": 13.64, "learning_rate": 1.3147533228882146e-06, "loss": 0.1729, "step": 1338400 }, { "epoch": 13.64, "learning_rate": 1.3128117321151367e-06, "loss": 0.2142, "step": 1338500 }, { "epoch": 13.64, "learning_rate": 1.3108715439743657e-06, "loss": 0.2107, "step": 1338600 }, { "epoch": 13.64, "learning_rate": 1.3089327585607692e-06, "loss": 0.2381, "step": 1338700 }, { "epoch": 13.64, "learning_rate": 1.3069953759691478e-06, "loss": 0.203, "step": 1338800 }, { "epoch": 13.64, "learning_rate": 1.305059396294216e-06, "loss": 0.2503, "step": 1338900 }, { "epoch": 13.64, "learning_rate": 1.3031248196306344e-06, "loss": 0.1986, "step": 1339000 }, { "epoch": 13.64, "learning_rate": 1.301191646073001e-06, "loss": 0.2303, "step": 1339100 }, { "epoch": 13.64, "learning_rate": 1.299259875715827e-06, "loss": 0.1724, "step": 1339200 }, { "epoch": 13.65, "learning_rate": 1.2973295086535697e-06, "loss": 0.2223, "step": 1339300 }, { "epoch": 13.65, "learning_rate": 1.2954005449806073e-06, "loss": 0.2239, "step": 1339400 }, { "epoch": 13.65, "learning_rate": 1.2934729847912607e-06, "loss": 0.1754, "step": 1339500 }, { "epoch": 13.65, "learning_rate": 1.2915468281797716e-06, "loss": 0.1977, "step": 1339600 }, { "epoch": 13.65, "learning_rate": 1.2896220752403243e-06, "loss": 0.1376, "step": 1339700 }, { "epoch": 13.65, "learning_rate": 1.2876987260670204e-06, "loss": 0.2314, "step": 1339800 }, { "epoch": 13.65, "learning_rate": 1.2857767807539011e-06, "loss": 0.1955, "step": 1339900 }, { "epoch": 13.65, "learning_rate": 1.2838562393949415e-06, "loss": 0.1969, "step": 1340000 }, { "epoch": 13.65, "learning_rate": 1.2819371020840465e-06, "loss": 0.2183, "step": 1340100 }, { "epoch": 13.65, "learning_rate": 1.2800193689150408e-06, "loss": 0.2017, "step": 1340200 }, { "epoch": 13.66, "learning_rate": 1.2781030399816962e-06, "loss": 0.1834, "step": 1340300 }, { "epoch": 13.66, "learning_rate": 1.2761881153777144e-06, "loss": 0.2344, "step": 1340400 }, { "epoch": 13.66, "learning_rate": 1.2742745951967171e-06, "loss": 0.2453, "step": 1340500 }, { "epoch": 13.66, "learning_rate": 1.272362479532263e-06, "loss": 0.226, "step": 1340600 }, { "epoch": 13.66, "learning_rate": 1.2704517684778472e-06, "loss": 0.2025, "step": 1340700 }, { "epoch": 13.66, "learning_rate": 1.2685424621268916e-06, "loss": 0.202, "step": 1340800 }, { "epoch": 13.66, "learning_rate": 1.2666536326342416e-06, "loss": 0.1776, "step": 1340900 }, { "epoch": 13.66, "learning_rate": 1.264747121920834e-06, "loss": 0.2617, "step": 1341000 }, { "epoch": 13.66, "learning_rate": 1.262842016189807e-06, "loss": 0.1817, "step": 1341100 }, { "epoch": 13.66, "learning_rate": 1.260938315534319e-06, "loss": 0.2157, "step": 1341200 }, { "epoch": 13.67, "learning_rate": 1.2590360200474361e-06, "loss": 0.1904, "step": 1341300 }, { "epoch": 13.67, "learning_rate": 1.2571351298221734e-06, "loss": 0.1985, "step": 1341400 }, { "epoch": 13.67, "learning_rate": 1.2552356449514801e-06, "loss": 0.1973, "step": 1341500 }, { "epoch": 13.67, "learning_rate": 1.2533375655282186e-06, "loss": 0.2081, "step": 1341600 }, { "epoch": 13.67, "learning_rate": 1.2514408916452013e-06, "loss": 0.1953, "step": 1341700 }, { "epoch": 13.67, "learning_rate": 1.2495456233951608e-06, "loss": 0.2323, "step": 1341800 }, { "epoch": 13.67, "learning_rate": 1.2476706925373638e-06, "loss": 0.1999, "step": 1341900 }, { "epoch": 13.67, "learning_rate": 1.24577822177257e-06, "loss": 0.2433, "step": 1342000 }, { "epoch": 13.67, "learning_rate": 1.243887156917628e-06, "loss": 0.19, "step": 1342100 }, { "epoch": 13.67, "learning_rate": 1.241997498064994e-06, "loss": 0.2104, "step": 1342200 }, { "epoch": 13.68, "learning_rate": 1.2401092453070706e-06, "loss": 0.236, "step": 1342300 }, { "epoch": 13.68, "learning_rate": 1.2382223987361673e-06, "loss": 0.1942, "step": 1342400 }, { "epoch": 13.68, "learning_rate": 1.2363369584445506e-06, "loss": 0.1848, "step": 1342500 }, { "epoch": 13.68, "learning_rate": 1.2344529245244063e-06, "loss": 0.2101, "step": 1342600 }, { "epoch": 13.68, "learning_rate": 1.2325702970678444e-06, "loss": 0.2176, "step": 1342700 }, { "epoch": 13.68, "learning_rate": 1.2306890761669243e-06, "loss": 0.1575, "step": 1342800 }, { "epoch": 13.68, "learning_rate": 1.2288092619136227e-06, "loss": 0.206, "step": 1342900 }, { "epoch": 13.68, "learning_rate": 1.2269308543998493e-06, "loss": 0.2597, "step": 1343000 }, { "epoch": 13.68, "learning_rate": 1.2250538537174438e-06, "loss": 0.2108, "step": 1343100 }, { "epoch": 13.68, "learning_rate": 1.2231782599581897e-06, "loss": 0.1961, "step": 1343200 }, { "epoch": 13.69, "learning_rate": 1.221304073213787e-06, "loss": 0.2042, "step": 1343300 }, { "epoch": 13.69, "learning_rate": 1.2194312935758723e-06, "loss": 0.2412, "step": 1343400 }, { "epoch": 13.69, "learning_rate": 1.2175599211360123e-06, "loss": 0.2314, "step": 1343500 }, { "epoch": 13.69, "learning_rate": 1.2156899559857104e-06, "loss": 0.2206, "step": 1343600 }, { "epoch": 13.69, "learning_rate": 1.2138213982163937e-06, "loss": 0.2297, "step": 1343700 }, { "epoch": 13.69, "learning_rate": 1.2119542479194222e-06, "loss": 0.1793, "step": 1343800 }, { "epoch": 13.69, "learning_rate": 1.2100885051860933e-06, "loss": 0.2258, "step": 1343900 }, { "epoch": 13.69, "learning_rate": 1.2082241701076235e-06, "loss": 0.2394, "step": 1344000 }, { "epoch": 13.69, "learning_rate": 1.2063612427751737e-06, "loss": 0.1718, "step": 1344100 }, { "epoch": 13.69, "learning_rate": 1.204499723279827e-06, "loss": 0.1549, "step": 1344200 }, { "epoch": 13.7, "learning_rate": 1.2026582058587276e-06, "loss": 0.1957, "step": 1344300 }, { "epoch": 13.7, "learning_rate": 1.2007994882299301e-06, "loss": 0.2412, "step": 1344400 }, { "epoch": 13.7, "learning_rate": 1.1989421787101717e-06, "loss": 0.2236, "step": 1344500 }, { "epoch": 13.7, "learning_rate": 1.1970862773902669e-06, "loss": 0.2188, "step": 1344600 }, { "epoch": 13.7, "learning_rate": 1.1952317843609562e-06, "loss": 0.2408, "step": 1344700 }, { "epoch": 13.7, "learning_rate": 1.1933786997129138e-06, "loss": 0.2043, "step": 1344800 }, { "epoch": 13.7, "learning_rate": 1.1915270235367436e-06, "loss": 0.203, "step": 1344900 }, { "epoch": 13.7, "learning_rate": 1.1896767559229837e-06, "loss": 0.2042, "step": 1345000 }, { "epoch": 13.7, "learning_rate": 1.1878278969620948e-06, "loss": 0.2058, "step": 1345100 }, { "epoch": 13.71, "learning_rate": 1.1859804467444811e-06, "loss": 0.2423, "step": 1345200 }, { "epoch": 13.71, "learning_rate": 1.1841344053604741e-06, "loss": 0.2037, "step": 1345300 }, { "epoch": 13.71, "learning_rate": 1.1822897729003279e-06, "loss": 0.2325, "step": 1345400 }, { "epoch": 13.71, "learning_rate": 1.1804465494542372e-06, "loss": 0.2199, "step": 1345500 }, { "epoch": 13.71, "learning_rate": 1.178604735112323e-06, "loss": 0.2114, "step": 1345600 }, { "epoch": 13.71, "learning_rate": 1.1767643299646403e-06, "loss": 0.2071, "step": 1345700 }, { "epoch": 13.71, "learning_rate": 1.1749253341011734e-06, "loss": 0.2042, "step": 1345800 }, { "epoch": 13.71, "learning_rate": 1.1730877476118373e-06, "loss": 0.1716, "step": 1345900 }, { "epoch": 13.71, "learning_rate": 1.1712515705864869e-06, "loss": 0.1795, "step": 1346000 }, { "epoch": 13.71, "learning_rate": 1.1694168031148867e-06, "loss": 0.2098, "step": 1346100 }, { "epoch": 13.72, "learning_rate": 1.167583445286755e-06, "loss": 0.2417, "step": 1346200 }, { "epoch": 13.72, "learning_rate": 1.1657514971917338e-06, "loss": 0.2004, "step": 1346300 }, { "epoch": 13.72, "learning_rate": 1.1639209589193878e-06, "loss": 0.2126, "step": 1346400 }, { "epoch": 13.72, "learning_rate": 1.162091830559222e-06, "loss": 0.2353, "step": 1346500 }, { "epoch": 13.72, "learning_rate": 1.1602641122006718e-06, "loss": 0.229, "step": 1346600 }, { "epoch": 13.72, "learning_rate": 1.1584378039330955e-06, "loss": 0.1893, "step": 1346700 }, { "epoch": 13.72, "learning_rate": 1.1566129058457952e-06, "loss": 0.2338, "step": 1346800 }, { "epoch": 13.72, "learning_rate": 1.154789418027996e-06, "loss": 0.1804, "step": 1346900 }, { "epoch": 13.72, "learning_rate": 1.1529673405688535e-06, "loss": 0.1732, "step": 1347000 }, { "epoch": 13.72, "learning_rate": 1.1511466735574594e-06, "loss": 0.2089, "step": 1347100 }, { "epoch": 13.73, "learning_rate": 1.1493274170828294e-06, "loss": 0.1962, "step": 1347200 }, { "epoch": 13.73, "learning_rate": 1.1475095712339222e-06, "loss": 0.2321, "step": 1347300 }, { "epoch": 13.73, "learning_rate": 1.1456931360996136e-06, "loss": 0.1992, "step": 1347400 }, { "epoch": 13.73, "learning_rate": 1.1438781117687125e-06, "loss": 0.2429, "step": 1347500 }, { "epoch": 13.73, "learning_rate": 1.1420644983299744e-06, "loss": 0.1914, "step": 1347600 }, { "epoch": 13.73, "learning_rate": 1.1402522958720619e-06, "loss": 0.2695, "step": 1347700 }, { "epoch": 13.73, "learning_rate": 1.1384415044835872e-06, "loss": 0.2031, "step": 1347800 }, { "epoch": 13.73, "learning_rate": 1.1366321242530864e-06, "loss": 0.1855, "step": 1347900 }, { "epoch": 13.73, "learning_rate": 1.1348241552690287e-06, "loss": 0.2095, "step": 1348000 }, { "epoch": 13.73, "learning_rate": 1.1330356562099032e-06, "loss": 0.2069, "step": 1348100 }, { "epoch": 13.74, "learning_rate": 1.1312304958691865e-06, "loss": 0.1922, "step": 1348200 }, { "epoch": 13.74, "learning_rate": 1.1294267470390153e-06, "loss": 0.2423, "step": 1348300 }, { "epoch": 13.74, "learning_rate": 1.1276244098075883e-06, "loss": 0.2276, "step": 1348400 }, { "epoch": 13.74, "learning_rate": 1.1258234842630255e-06, "loss": 0.2598, "step": 1348500 }, { "epoch": 13.74, "learning_rate": 1.124023970493383e-06, "loss": 0.1933, "step": 1348600 }, { "epoch": 13.74, "learning_rate": 1.122225868586647e-06, "loss": 0.2312, "step": 1348700 }, { "epoch": 13.74, "learning_rate": 1.1204291786307309e-06, "loss": 0.1899, "step": 1348800 }, { "epoch": 13.74, "learning_rate": 1.1186339007134837e-06, "loss": 0.2181, "step": 1348900 }, { "epoch": 13.74, "learning_rate": 1.116840034922686e-06, "loss": 0.1984, "step": 1349000 }, { "epoch": 13.74, "learning_rate": 1.1150475813460403e-06, "loss": 0.1969, "step": 1349100 }, { "epoch": 13.75, "learning_rate": 1.1132565400711936e-06, "loss": 0.1994, "step": 1349200 }, { "epoch": 13.75, "learning_rate": 1.111466911185719e-06, "loss": 0.1643, "step": 1349300 }, { "epoch": 13.75, "learning_rate": 1.10967869477711e-06, "loss": 0.1974, "step": 1349400 }, { "epoch": 13.75, "learning_rate": 1.1078918909328063e-06, "loss": 0.2078, "step": 1349500 }, { "epoch": 13.75, "learning_rate": 1.1061064997401681e-06, "loss": 0.2042, "step": 1349600 }, { "epoch": 13.75, "learning_rate": 1.1043225212864926e-06, "loss": 0.2167, "step": 1349700 }, { "epoch": 13.75, "learning_rate": 1.1025399556590033e-06, "loss": 0.1938, "step": 1349800 }, { "epoch": 13.75, "learning_rate": 1.1007588029448635e-06, "loss": 0.1988, "step": 1349900 }, { "epoch": 13.75, "learning_rate": 1.098979063231157e-06, "loss": 0.1803, "step": 1350000 }, { "epoch": 13.76, "learning_rate": 1.097200736604901e-06, "loss": 0.2072, "step": 1350100 }, { "epoch": 13.76, "learning_rate": 1.0954415852920651e-06, "loss": 0.2439, "step": 1350200 }, { "epoch": 13.76, "learning_rate": 1.0936660709684466e-06, "loss": 0.2492, "step": 1350300 }, { "epoch": 13.76, "learning_rate": 1.0918919699920537e-06, "loss": 0.1959, "step": 1350400 }, { "epoch": 13.76, "learning_rate": 1.0901192824496341e-06, "loss": 0.257, "step": 1350500 }, { "epoch": 13.76, "learning_rate": 1.088348008427852e-06, "loss": 0.2172, "step": 1350600 }, { "epoch": 13.76, "learning_rate": 1.0865781480133175e-06, "loss": 0.196, "step": 1350700 }, { "epoch": 13.76, "learning_rate": 1.084809701292565e-06, "loss": 0.2361, "step": 1350800 }, { "epoch": 13.76, "learning_rate": 1.0830426683520623e-06, "loss": 0.1932, "step": 1350900 }, { "epoch": 13.76, "learning_rate": 1.0812770492782e-06, "loss": 0.2471, "step": 1351000 }, { "epoch": 13.77, "learning_rate": 1.0795128441573188e-06, "loss": 0.1974, "step": 1351100 }, { "epoch": 13.77, "learning_rate": 1.0777500530756668e-06, "loss": 0.2172, "step": 1351200 }, { "epoch": 13.77, "learning_rate": 1.0759886761194415e-06, "loss": 0.2158, "step": 1351300 }, { "epoch": 13.77, "learning_rate": 1.0742287133747607e-06, "loss": 0.2752, "step": 1351400 }, { "epoch": 13.77, "learning_rate": 1.0724701649276759e-06, "loss": 0.1872, "step": 1351500 }, { "epoch": 13.77, "learning_rate": 1.0707130308641644e-06, "loss": 0.1631, "step": 1351600 }, { "epoch": 13.77, "learning_rate": 1.068957311270151e-06, "loss": 0.1537, "step": 1351700 }, { "epoch": 13.77, "learning_rate": 1.0672030062314708e-06, "loss": 0.2232, "step": 1351800 }, { "epoch": 13.77, "learning_rate": 1.0654501158339014e-06, "loss": 0.2205, "step": 1351900 }, { "epoch": 13.77, "learning_rate": 1.063698640163151e-06, "loss": 0.1882, "step": 1352000 }, { "epoch": 13.78, "learning_rate": 1.0619485793048544e-06, "loss": 0.2354, "step": 1352100 }, { "epoch": 13.78, "learning_rate": 1.0601999333445767e-06, "loss": 0.2481, "step": 1352200 }, { "epoch": 13.78, "learning_rate": 1.0584527023678159e-06, "loss": 0.1783, "step": 1352300 }, { "epoch": 13.78, "learning_rate": 1.0567068864600104e-06, "loss": 0.2189, "step": 1352400 }, { "epoch": 13.78, "learning_rate": 1.0549624857065087e-06, "loss": 0.2665, "step": 1352500 }, { "epoch": 13.78, "learning_rate": 1.0532369230420325e-06, "loss": 0.1976, "step": 1352600 }, { "epoch": 13.78, "learning_rate": 1.051495338699282e-06, "loss": 0.1791, "step": 1352700 }, { "epoch": 13.78, "learning_rate": 1.0497551697656549e-06, "loss": 0.2013, "step": 1352800 }, { "epoch": 13.78, "learning_rate": 1.0480164163262362e-06, "loss": 0.1993, "step": 1352900 }, { "epoch": 13.78, "learning_rate": 1.0462790784660347e-06, "loss": 0.1729, "step": 1353000 }, { "epoch": 13.79, "learning_rate": 1.0445431562700025e-06, "loss": 0.1825, "step": 1353100 }, { "epoch": 13.79, "learning_rate": 1.0428086498230117e-06, "loss": 0.2323, "step": 1353200 }, { "epoch": 13.79, "learning_rate": 1.0410755592098709e-06, "loss": 0.1713, "step": 1353300 }, { "epoch": 13.79, "learning_rate": 1.0393438845153159e-06, "loss": 0.1776, "step": 1353400 }, { "epoch": 13.79, "learning_rate": 1.0376136258240155e-06, "loss": 0.2117, "step": 1353500 }, { "epoch": 13.79, "learning_rate": 1.0358847832205721e-06, "loss": 0.2348, "step": 1353600 }, { "epoch": 13.79, "learning_rate": 1.0341573567895146e-06, "loss": 0.1787, "step": 1353700 }, { "epoch": 13.79, "learning_rate": 1.0324313466153057e-06, "loss": 0.209, "step": 1353800 }, { "epoch": 13.79, "learning_rate": 1.0307067527823311e-06, "loss": 0.2292, "step": 1353900 }, { "epoch": 13.79, "learning_rate": 1.0289835753749165e-06, "loss": 0.2422, "step": 1354000 }, { "epoch": 13.8, "learning_rate": 1.0272618144773182e-06, "loss": 0.1618, "step": 1354100 }, { "epoch": 13.8, "learning_rate": 1.0255414701737153e-06, "loss": 0.191, "step": 1354200 }, { "epoch": 13.8, "learning_rate": 1.0238225425482206e-06, "loss": 0.1843, "step": 1354300 }, { "epoch": 13.8, "learning_rate": 1.0221050316848834e-06, "loss": 0.2028, "step": 1354400 }, { "epoch": 13.8, "learning_rate": 1.0203889376676866e-06, "loss": 0.1957, "step": 1354500 }, { "epoch": 13.8, "learning_rate": 1.0186742605805232e-06, "loss": 0.1704, "step": 1354600 }, { "epoch": 13.8, "learning_rate": 1.016961000507236e-06, "loss": 0.2717, "step": 1354700 }, { "epoch": 13.8, "learning_rate": 1.0152491575315946e-06, "loss": 0.1586, "step": 1354800 }, { "epoch": 13.8, "learning_rate": 1.013538731737299e-06, "loss": 0.1901, "step": 1354900 }, { "epoch": 13.8, "learning_rate": 1.0118468062775343e-06, "loss": 0.2071, "step": 1355000 }, { "epoch": 13.81, "learning_rate": 1.0101392009228472e-06, "loss": 0.2108, "step": 1355100 }, { "epoch": 13.81, "learning_rate": 1.008433012999349e-06, "loss": 0.1786, "step": 1355200 }, { "epoch": 13.81, "learning_rate": 1.00672824259047e-06, "loss": 0.213, "step": 1355300 }, { "epoch": 13.81, "learning_rate": 1.0050248897795533e-06, "loss": 0.2435, "step": 1355400 }, { "epoch": 13.81, "learning_rate": 1.0033229546498856e-06, "loss": 0.1725, "step": 1355500 }, { "epoch": 13.81, "learning_rate": 1.0016224372846872e-06, "loss": 0.2103, "step": 1355600 }, { "epoch": 13.81, "learning_rate": 9.999233377670913e-07, "loss": 0.213, "step": 1355700 }, { "epoch": 13.81, "learning_rate": 9.98225656180185e-07, "loss": 0.2262, "step": 1355800 }, { "epoch": 13.81, "learning_rate": 9.965293926069717e-07, "loss": 0.2027, "step": 1355900 }, { "epoch": 13.82, "learning_rate": 9.948345471303821e-07, "loss": 0.2201, "step": 1356000 }, { "epoch": 13.82, "learning_rate": 9.931411198332928e-07, "loss": 0.1872, "step": 1356100 }, { "epoch": 13.82, "learning_rate": 9.91449110798498e-07, "loss": 0.2111, "step": 1356200 }, { "epoch": 13.82, "learning_rate": 9.897585201087278e-07, "loss": 0.2304, "step": 1356300 }, { "epoch": 13.82, "learning_rate": 9.880693478466397e-07, "loss": 0.1874, "step": 1356400 }, { "epoch": 13.82, "learning_rate": 9.86381594094834e-07, "loss": 0.2632, "step": 1356500 }, { "epoch": 13.82, "learning_rate": 9.846952589358183e-07, "loss": 0.1754, "step": 1356600 }, { "epoch": 13.82, "learning_rate": 9.830103424520498e-07, "loss": 0.189, "step": 1356700 }, { "epoch": 13.82, "learning_rate": 9.813268447259094e-07, "loss": 0.1604, "step": 1356800 }, { "epoch": 13.82, "learning_rate": 9.79644765839718e-07, "loss": 0.2082, "step": 1356900 }, { "epoch": 13.83, "learning_rate": 9.779641058757094e-07, "loss": 0.1818, "step": 1357000 }, { "epoch": 13.83, "learning_rate": 9.762848649160617e-07, "loss": 0.1773, "step": 1357100 }, { "epoch": 13.83, "learning_rate": 9.74607043042882e-07, "loss": 0.2078, "step": 1357200 }, { "epoch": 13.83, "learning_rate": 9.72930640338202e-07, "loss": 0.1908, "step": 1357300 }, { "epoch": 13.83, "learning_rate": 9.712556568839858e-07, "loss": 0.219, "step": 1357400 }, { "epoch": 13.83, "learning_rate": 9.695820927621378e-07, "loss": 0.2072, "step": 1357500 }, { "epoch": 13.83, "learning_rate": 9.679099480544762e-07, "loss": 0.1981, "step": 1357600 }, { "epoch": 13.83, "learning_rate": 9.662392228427685e-07, "loss": 0.154, "step": 1357700 }, { "epoch": 13.83, "learning_rate": 9.645699172086997e-07, "loss": 0.1855, "step": 1357800 }, { "epoch": 13.83, "learning_rate": 9.629020312338844e-07, "loss": 0.2133, "step": 1357900 }, { "epoch": 13.84, "learning_rate": 9.61235564999874e-07, "loss": 0.1754, "step": 1358000 }, { "epoch": 13.84, "learning_rate": 9.595871620238828e-07, "loss": 0.2041, "step": 1358100 }, { "epoch": 13.84, "learning_rate": 9.579235213164183e-07, "loss": 0.1983, "step": 1358200 }, { "epoch": 13.84, "learning_rate": 9.562613005931808e-07, "loss": 0.2206, "step": 1358300 }, { "epoch": 13.84, "learning_rate": 9.546004999354386e-07, "loss": 0.1887, "step": 1358400 }, { "epoch": 13.84, "learning_rate": 9.529411194243998e-07, "loss": 0.1693, "step": 1358500 }, { "epoch": 13.84, "learning_rate": 9.512831591411964e-07, "loss": 0.206, "step": 1358600 }, { "epoch": 13.84, "learning_rate": 9.496431775358427e-07, "loss": 0.2439, "step": 1358700 }, { "epoch": 13.84, "learning_rate": 9.479880437471344e-07, "loss": 0.2406, "step": 1358800 }, { "epoch": 13.84, "learning_rate": 9.463343304284378e-07, "loss": 0.1708, "step": 1358900 }, { "epoch": 13.85, "learning_rate": 9.44682037660608e-07, "loss": 0.2016, "step": 1359000 }, { "epoch": 13.85, "learning_rate": 9.430476672134058e-07, "loss": 0.197, "step": 1359100 }, { "epoch": 13.85, "learning_rate": 9.41398201582081e-07, "loss": 0.2005, "step": 1359200 }, { "epoch": 13.85, "learning_rate": 9.397501567429734e-07, "loss": 0.2254, "step": 1359300 }, { "epoch": 13.85, "learning_rate": 9.381035327766618e-07, "loss": 0.2383, "step": 1359400 }, { "epoch": 13.85, "learning_rate": 9.36458329763652e-07, "loss": 0.2219, "step": 1359500 }, { "epoch": 13.85, "learning_rate": 9.348145477843894e-07, "loss": 0.2187, "step": 1359600 }, { "epoch": 13.85, "learning_rate": 9.331721869192434e-07, "loss": 0.2614, "step": 1359700 }, { "epoch": 13.85, "learning_rate": 9.31531247248516e-07, "loss": 0.2242, "step": 1359800 }, { "epoch": 13.85, "learning_rate": 9.298917288524366e-07, "loss": 0.1758, "step": 1359900 }, { "epoch": 13.86, "learning_rate": 9.282536318111712e-07, "loss": 0.1845, "step": 1360000 }, { "epoch": 13.86, "learning_rate": 9.266169562048121e-07, "loss": 0.2176, "step": 1360100 }, { "epoch": 13.86, "learning_rate": 9.249817021133889e-07, "loss": 0.2008, "step": 1360200 }, { "epoch": 13.86, "learning_rate": 9.23347869616844e-07, "loss": 0.2349, "step": 1360300 }, { "epoch": 13.86, "learning_rate": 9.217154587950671e-07, "loss": 0.2072, "step": 1360400 }, { "epoch": 13.86, "learning_rate": 9.200844697278743e-07, "loss": 0.2232, "step": 1360500 }, { "epoch": 13.86, "learning_rate": 9.18454902495015e-07, "loss": 0.2593, "step": 1360600 }, { "epoch": 13.86, "learning_rate": 9.168267571761591e-07, "loss": 0.1869, "step": 1360700 }, { "epoch": 13.86, "learning_rate": 9.152000338509159e-07, "loss": 0.1816, "step": 1360800 }, { "epoch": 13.87, "learning_rate": 9.13574732598822e-07, "loss": 0.1794, "step": 1360900 }, { "epoch": 13.87, "learning_rate": 9.119508534993437e-07, "loss": 0.2065, "step": 1361000 }, { "epoch": 13.87, "learning_rate": 9.103283966318809e-07, "loss": 0.186, "step": 1361100 }, { "epoch": 13.87, "learning_rate": 9.087073620757602e-07, "loss": 0.1517, "step": 1361200 }, { "epoch": 13.87, "learning_rate": 9.070877499102448e-07, "loss": 0.2379, "step": 1361300 }, { "epoch": 13.87, "learning_rate": 9.054695602145179e-07, "loss": 0.2052, "step": 1361400 }, { "epoch": 13.87, "learning_rate": 9.038527930677065e-07, "loss": 0.2339, "step": 1361500 }, { "epoch": 13.87, "learning_rate": 9.022374485488539e-07, "loss": 0.1936, "step": 1361600 }, { "epoch": 13.87, "learning_rate": 9.006235267369433e-07, "loss": 0.1933, "step": 1361700 }, { "epoch": 13.87, "learning_rate": 8.990110277108887e-07, "loss": 0.2542, "step": 1361800 }, { "epoch": 13.88, "learning_rate": 8.973999515495268e-07, "loss": 0.2119, "step": 1361900 }, { "epoch": 13.88, "learning_rate": 8.957902983316313e-07, "loss": 0.1607, "step": 1362000 }, { "epoch": 13.88, "learning_rate": 8.941820681359058e-07, "loss": 0.1672, "step": 1362100 }, { "epoch": 13.88, "learning_rate": 8.925752610409876e-07, "loss": 0.2122, "step": 1362200 }, { "epoch": 13.88, "learning_rate": 8.909698771254305e-07, "loss": 0.2447, "step": 1362300 }, { "epoch": 13.88, "learning_rate": 8.893659164677314e-07, "loss": 0.2568, "step": 1362400 }, { "epoch": 13.88, "learning_rate": 8.87763379146318e-07, "loss": 0.2024, "step": 1362500 }, { "epoch": 13.88, "learning_rate": 8.861622652395407e-07, "loss": 0.1963, "step": 1362600 }, { "epoch": 13.88, "learning_rate": 8.845625748256869e-07, "loss": 0.1944, "step": 1362700 }, { "epoch": 13.88, "learning_rate": 8.829643079829741e-07, "loss": 0.1962, "step": 1362800 }, { "epoch": 13.89, "learning_rate": 8.813674647895431e-07, "loss": 0.2223, "step": 1362900 }, { "epoch": 13.89, "learning_rate": 8.797720453234747e-07, "loss": 0.1616, "step": 1363000 }, { "epoch": 13.89, "learning_rate": 8.781780496627733e-07, "loss": 0.1636, "step": 1363100 }, { "epoch": 13.89, "learning_rate": 8.765854778853699e-07, "loss": 0.2431, "step": 1363200 }, { "epoch": 13.89, "learning_rate": 8.749943300691421e-07, "loss": 0.201, "step": 1363300 }, { "epoch": 13.89, "learning_rate": 8.73404606291881e-07, "loss": 0.2386, "step": 1363400 }, { "epoch": 13.89, "learning_rate": 8.718163066313145e-07, "loss": 0.2301, "step": 1363500 }, { "epoch": 13.89, "learning_rate": 8.702294311651037e-07, "loss": 0.2196, "step": 1363600 }, { "epoch": 13.89, "learning_rate": 8.686439799708368e-07, "loss": 0.2112, "step": 1363700 }, { "epoch": 13.89, "learning_rate": 8.670599531260314e-07, "loss": 0.2002, "step": 1363800 }, { "epoch": 13.9, "learning_rate": 8.654773507081393e-07, "loss": 0.1783, "step": 1363900 }, { "epoch": 13.9, "learning_rate": 8.638961727945383e-07, "loss": 0.1831, "step": 1364000 }, { "epoch": 13.9, "learning_rate": 8.6231641946254e-07, "loss": 0.1891, "step": 1364100 }, { "epoch": 13.9, "learning_rate": 8.607380907893858e-07, "loss": 0.2412, "step": 1364200 }, { "epoch": 13.9, "learning_rate": 8.591611868522409e-07, "loss": 0.1903, "step": 1364300 }, { "epoch": 13.9, "learning_rate": 8.575857077282168e-07, "loss": 0.1933, "step": 1364400 }, { "epoch": 13.9, "learning_rate": 8.560116534943319e-07, "loss": 0.2099, "step": 1364500 }, { "epoch": 13.9, "learning_rate": 8.54439024227558e-07, "loss": 0.2389, "step": 1364600 }, { "epoch": 13.9, "learning_rate": 8.528678200047868e-07, "loss": 0.2983, "step": 1364700 }, { "epoch": 13.9, "learning_rate": 8.512980409028337e-07, "loss": 0.1835, "step": 1364800 }, { "epoch": 13.91, "learning_rate": 8.497296869984572e-07, "loss": 0.2241, "step": 1364900 }, { "epoch": 13.91, "learning_rate": 8.481627583683394e-07, "loss": 0.2122, "step": 1365000 }, { "epoch": 13.91, "learning_rate": 8.465972550890922e-07, "loss": 0.1856, "step": 1365100 }, { "epoch": 13.91, "learning_rate": 8.45033177237261e-07, "loss": 0.188, "step": 1365200 }, { "epoch": 13.91, "learning_rate": 8.434705248893248e-07, "loss": 0.1913, "step": 1365300 }, { "epoch": 13.91, "learning_rate": 8.419092981216791e-07, "loss": 0.2208, "step": 1365400 }, { "epoch": 13.91, "learning_rate": 8.403494970106596e-07, "loss": 0.2056, "step": 1365500 }, { "epoch": 13.91, "learning_rate": 8.387911216325384e-07, "loss": 0.1879, "step": 1365600 }, { "epoch": 13.91, "learning_rate": 8.372341720635046e-07, "loss": 0.2137, "step": 1365700 }, { "epoch": 13.92, "learning_rate": 8.356786483796874e-07, "loss": 0.2292, "step": 1365800 }, { "epoch": 13.92, "learning_rate": 8.34124550657136e-07, "loss": 0.176, "step": 1365900 }, { "epoch": 13.92, "learning_rate": 8.325718789718495e-07, "loss": 0.192, "step": 1366000 }, { "epoch": 13.92, "learning_rate": 8.310206333997272e-07, "loss": 0.1871, "step": 1366100 }, { "epoch": 13.92, "learning_rate": 8.294708140166285e-07, "loss": 0.1726, "step": 1366200 }, { "epoch": 13.92, "learning_rate": 8.27922420898326e-07, "loss": 0.1873, "step": 1366300 }, { "epoch": 13.92, "learning_rate": 8.26375454120526e-07, "loss": 0.2035, "step": 1366400 }, { "epoch": 13.92, "learning_rate": 8.248299137588677e-07, "loss": 0.2215, "step": 1366500 }, { "epoch": 13.92, "learning_rate": 8.232857998889209e-07, "loss": 0.2026, "step": 1366600 }, { "epoch": 13.92, "learning_rate": 8.217431125861785e-07, "loss": 0.1922, "step": 1366700 }, { "epoch": 13.93, "learning_rate": 8.202018519260734e-07, "loss": 0.2097, "step": 1366800 }, { "epoch": 13.93, "learning_rate": 8.186620179839621e-07, "loss": 0.2845, "step": 1366900 }, { "epoch": 13.93, "learning_rate": 8.171236108351309e-07, "loss": 0.2141, "step": 1367000 }, { "epoch": 13.93, "learning_rate": 8.155866305547999e-07, "loss": 0.2102, "step": 1367100 }, { "epoch": 13.93, "learning_rate": 8.140510772181219e-07, "loss": 0.2349, "step": 1367200 }, { "epoch": 13.93, "learning_rate": 8.125169509001706e-07, "loss": 0.2214, "step": 1367300 }, { "epoch": 13.93, "learning_rate": 8.109842516759592e-07, "loss": 0.2171, "step": 1367400 }, { "epoch": 13.93, "learning_rate": 8.094529796204275e-07, "loss": 0.1871, "step": 1367500 }, { "epoch": 13.93, "learning_rate": 8.079231348084459e-07, "loss": 0.2149, "step": 1367600 }, { "epoch": 13.93, "learning_rate": 8.063947173148111e-07, "loss": 0.1429, "step": 1367700 }, { "epoch": 13.94, "learning_rate": 8.048677272142602e-07, "loss": 0.2116, "step": 1367800 }, { "epoch": 13.94, "learning_rate": 8.033574131415644e-07, "loss": 0.1983, "step": 1367900 }, { "epoch": 13.94, "learning_rate": 8.018332637752912e-07, "loss": 0.2021, "step": 1368000 }, { "epoch": 13.94, "learning_rate": 8.0031054202513e-07, "loss": 0.2897, "step": 1368100 }, { "epoch": 13.94, "learning_rate": 7.987892479655245e-07, "loss": 0.2025, "step": 1368200 }, { "epoch": 13.94, "learning_rate": 7.97269381670862e-07, "loss": 0.2067, "step": 1368300 }, { "epoch": 13.94, "learning_rate": 7.957509432154531e-07, "loss": 0.2285, "step": 1368400 }, { "epoch": 13.94, "learning_rate": 7.942339326735414e-07, "loss": 0.2076, "step": 1368500 }, { "epoch": 13.94, "learning_rate": 7.927183501193014e-07, "loss": 0.1837, "step": 1368600 }, { "epoch": 13.94, "learning_rate": 7.912041956268334e-07, "loss": 0.1901, "step": 1368700 }, { "epoch": 13.95, "learning_rate": 7.896914692701685e-07, "loss": 0.1994, "step": 1368800 }, { "epoch": 13.95, "learning_rate": 7.881801711232739e-07, "loss": 0.2559, "step": 1368900 }, { "epoch": 13.95, "learning_rate": 7.866703012600441e-07, "loss": 0.2109, "step": 1369000 }, { "epoch": 13.95, "learning_rate": 7.851618597542964e-07, "loss": 0.2404, "step": 1369100 }, { "epoch": 13.95, "learning_rate": 7.836548466797921e-07, "loss": 0.2977, "step": 1369200 }, { "epoch": 13.95, "learning_rate": 7.821492621102122e-07, "loss": 0.2205, "step": 1369300 }, { "epoch": 13.95, "learning_rate": 7.806451061191677e-07, "loss": 0.2097, "step": 1369400 }, { "epoch": 13.95, "learning_rate": 7.791423787802066e-07, "loss": 0.2397, "step": 1369500 }, { "epoch": 13.95, "learning_rate": 7.776410801668065e-07, "loss": 0.2232, "step": 1369600 }, { "epoch": 13.95, "learning_rate": 7.761412103523624e-07, "loss": 0.1876, "step": 1369700 }, { "epoch": 13.96, "learning_rate": 7.746427694102154e-07, "loss": 0.1737, "step": 1369800 }, { "epoch": 13.96, "learning_rate": 7.731457574136302e-07, "loss": 0.1837, "step": 1369900 }, { "epoch": 13.96, "learning_rate": 7.716501744358018e-07, "loss": 0.2082, "step": 1370000 }, { "epoch": 13.96, "learning_rate": 7.701709550144664e-07, "loss": 0.2437, "step": 1370100 }, { "epoch": 13.96, "learning_rate": 7.686782160014417e-07, "loss": 0.234, "step": 1370200 }, { "epoch": 13.96, "learning_rate": 7.671869062256087e-07, "loss": 0.2361, "step": 1370300 }, { "epoch": 13.96, "learning_rate": 7.656970257598794e-07, "loss": 0.2298, "step": 1370400 }, { "epoch": 13.96, "learning_rate": 7.642085746771121e-07, "loss": 0.1473, "step": 1370500 }, { "epoch": 13.96, "learning_rate": 7.62721553050072e-07, "loss": 0.217, "step": 1370600 }, { "epoch": 13.96, "learning_rate": 7.612359609514707e-07, "loss": 0.2318, "step": 1370700 }, { "epoch": 13.97, "learning_rate": 7.597517984539437e-07, "loss": 0.1939, "step": 1370800 }, { "epoch": 13.97, "learning_rate": 7.582690656300528e-07, "loss": 0.2448, "step": 1370900 }, { "epoch": 13.97, "learning_rate": 7.567877625523035e-07, "loss": 0.2308, "step": 1371000 }, { "epoch": 13.97, "learning_rate": 7.553078892931176e-07, "loss": 0.1579, "step": 1371100 }, { "epoch": 13.97, "learning_rate": 7.538294459248507e-07, "loss": 0.1676, "step": 1371200 }, { "epoch": 13.97, "learning_rate": 7.523524325197917e-07, "loss": 0.1926, "step": 1371300 }, { "epoch": 13.97, "learning_rate": 7.508768491501627e-07, "loss": 0.2262, "step": 1371400 }, { "epoch": 13.97, "learning_rate": 7.494026958880995e-07, "loss": 0.2728, "step": 1371500 }, { "epoch": 13.97, "learning_rate": 7.479299728056844e-07, "loss": 0.1699, "step": 1371600 }, { "epoch": 13.98, "learning_rate": 7.464733858232586e-07, "loss": 0.2025, "step": 1371700 }, { "epoch": 13.98, "learning_rate": 7.450035090125084e-07, "loss": 0.2367, "step": 1371800 }, { "epoch": 13.98, "learning_rate": 7.435350625965009e-07, "loss": 0.2209, "step": 1371900 }, { "epoch": 13.98, "learning_rate": 7.420680466470353e-07, "loss": 0.1339, "step": 1372000 }, { "epoch": 13.98, "learning_rate": 7.406024612358408e-07, "loss": 0.1932, "step": 1372100 }, { "epoch": 13.98, "learning_rate": 7.391383064345769e-07, "loss": 0.2037, "step": 1372200 }, { "epoch": 13.98, "learning_rate": 7.376755823148263e-07, "loss": 0.2338, "step": 1372300 }, { "epoch": 13.98, "learning_rate": 7.362142889481116e-07, "loss": 0.2228, "step": 1372400 }, { "epoch": 13.98, "learning_rate": 7.347544264058859e-07, "loss": 0.2155, "step": 1372500 }, { "epoch": 13.98, "learning_rate": 7.332959947595186e-07, "loss": 0.2004, "step": 1372600 }, { "epoch": 13.99, "learning_rate": 7.318389940803228e-07, "loss": 0.2203, "step": 1372700 }, { "epoch": 13.99, "learning_rate": 7.30383424439538e-07, "loss": 0.2103, "step": 1372800 }, { "epoch": 13.99, "learning_rate": 7.289292859083308e-07, "loss": 0.2, "step": 1372900 }, { "epoch": 13.99, "learning_rate": 7.274765785578008e-07, "loss": 0.198, "step": 1373000 }, { "epoch": 13.99, "learning_rate": 7.260253024589747e-07, "loss": 0.2095, "step": 1373100 }, { "epoch": 13.99, "learning_rate": 7.245754576828189e-07, "loss": 0.2292, "step": 1373200 }, { "epoch": 13.99, "learning_rate": 7.231270443002136e-07, "loss": 0.2074, "step": 1373300 }, { "epoch": 13.99, "learning_rate": 7.216800623819786e-07, "loss": 0.2488, "step": 1373400 }, { "epoch": 13.99, "learning_rate": 7.202345119988707e-07, "loss": 0.2169, "step": 1373500 }, { "epoch": 13.99, "learning_rate": 7.187903932215567e-07, "loss": 0.2409, "step": 1373600 }, { "epoch": 14.0, "learning_rate": 7.173477061206534e-07, "loss": 0.1907, "step": 1373700 }, { "epoch": 14.0, "learning_rate": 7.159064507667013e-07, "loss": 0.2664, "step": 1373800 }, { "epoch": 14.0, "learning_rate": 7.144666272301603e-07, "loss": 0.2516, "step": 1373900 }, { "epoch": 14.0, "learning_rate": 7.130282355814344e-07, "loss": 0.1508, "step": 1374000 }, { "epoch": 14.0, "learning_rate": 7.115912758908538e-07, "loss": 0.2178, "step": 1374100 }, { "epoch": 14.0, "learning_rate": 7.101557482286725e-07, "loss": 0.2174, "step": 1374200 }, { "epoch": 14.0, "learning_rate": 7.087216526650875e-07, "loss": 0.1904, "step": 1374300 }, { "epoch": 14.0, "learning_rate": 7.072889892702095e-07, "loss": 0.2144, "step": 1374400 }, { "epoch": 14.0, "learning_rate": 7.058720633358395e-07, "loss": 0.2261, "step": 1374500 }, { "epoch": 14.0, "learning_rate": 7.044422501650283e-07, "loss": 0.1586, "step": 1374600 }, { "epoch": 14.01, "learning_rate": 7.030138693721599e-07, "loss": 0.1855, "step": 1374700 }, { "epoch": 14.01, "learning_rate": 7.015869210270787e-07, "loss": 0.2096, "step": 1374800 }, { "epoch": 14.01, "learning_rate": 7.001614051995553e-07, "loss": 0.2765, "step": 1374900 }, { "epoch": 14.01, "learning_rate": 6.987373219592841e-07, "loss": 0.2126, "step": 1375000 }, { "epoch": 14.01, "learning_rate": 6.973146713758993e-07, "loss": 0.2178, "step": 1375100 }, { "epoch": 14.01, "learning_rate": 6.958934535189554e-07, "loss": 0.2486, "step": 1375200 }, { "epoch": 14.01, "learning_rate": 6.944736684579467e-07, "loss": 0.2186, "step": 1375300 }, { "epoch": 14.01, "learning_rate": 6.930553162622877e-07, "loss": 0.1926, "step": 1375400 }, { "epoch": 14.01, "learning_rate": 6.916383970013263e-07, "loss": 0.1912, "step": 1375500 }, { "epoch": 14.01, "learning_rate": 6.90222910744347e-07, "loss": 0.197, "step": 1375600 }, { "epoch": 14.02, "learning_rate": 6.888088575605545e-07, "loss": 0.2234, "step": 1375700 }, { "epoch": 14.02, "learning_rate": 6.8739623751909e-07, "loss": 0.2101, "step": 1375800 }, { "epoch": 14.02, "learning_rate": 6.85985050689022e-07, "loss": 0.2333, "step": 1375900 }, { "epoch": 14.02, "learning_rate": 6.845752971393482e-07, "loss": 0.2454, "step": 1376000 }, { "epoch": 14.02, "learning_rate": 6.831669769389937e-07, "loss": 0.1732, "step": 1376100 }, { "epoch": 14.02, "learning_rate": 6.817600901568266e-07, "loss": 0.2336, "step": 1376200 }, { "epoch": 14.02, "learning_rate": 6.803546368616254e-07, "loss": 0.1923, "step": 1376300 }, { "epoch": 14.02, "learning_rate": 6.789506171221083e-07, "loss": 0.211, "step": 1376400 }, { "epoch": 14.02, "learning_rate": 6.775480310069337e-07, "loss": 0.2021, "step": 1376500 }, { "epoch": 14.03, "learning_rate": 6.761468785846703e-07, "loss": 0.2076, "step": 1376600 }, { "epoch": 14.03, "learning_rate": 6.747611500130901e-07, "loss": 0.2468, "step": 1376700 }, { "epoch": 14.03, "learning_rate": 6.733628508434709e-07, "loss": 0.1853, "step": 1376800 }, { "epoch": 14.03, "learning_rate": 6.719659855713956e-07, "loss": 0.1948, "step": 1376900 }, { "epoch": 14.03, "learning_rate": 6.705705542651663e-07, "loss": 0.1822, "step": 1377000 }, { "epoch": 14.03, "learning_rate": 6.69176556993002e-07, "loss": 0.2009, "step": 1377100 }, { "epoch": 14.03, "learning_rate": 6.677839938230712e-07, "loss": 0.2212, "step": 1377200 }, { "epoch": 14.03, "learning_rate": 6.663928648234596e-07, "loss": 0.2406, "step": 1377300 }, { "epoch": 14.03, "learning_rate": 6.650031700621795e-07, "loss": 0.1726, "step": 1377400 }, { "epoch": 14.03, "learning_rate": 6.63614909607183e-07, "loss": 0.1914, "step": 1377500 }, { "epoch": 14.04, "learning_rate": 6.622280835263462e-07, "loss": 0.1964, "step": 1377600 }, { "epoch": 14.04, "learning_rate": 6.608426918874777e-07, "loss": 0.2114, "step": 1377700 }, { "epoch": 14.04, "learning_rate": 6.59458734758317e-07, "loss": 0.212, "step": 1377800 }, { "epoch": 14.04, "learning_rate": 6.580762122065264e-07, "loss": 0.1895, "step": 1377900 }, { "epoch": 14.04, "learning_rate": 6.566951242997054e-07, "loss": 0.1641, "step": 1378000 }, { "epoch": 14.04, "learning_rate": 6.553154711053799e-07, "loss": 0.2579, "step": 1378100 }, { "epoch": 14.04, "learning_rate": 6.539372526910092e-07, "loss": 0.21, "step": 1378200 }, { "epoch": 14.04, "learning_rate": 6.525604691239828e-07, "loss": 0.1846, "step": 1378300 }, { "epoch": 14.04, "learning_rate": 6.511851204716102e-07, "loss": 0.1965, "step": 1378400 }, { "epoch": 14.04, "learning_rate": 6.498112068011375e-07, "loss": 0.2128, "step": 1378500 }, { "epoch": 14.05, "learning_rate": 6.484387281797511e-07, "loss": 0.1706, "step": 1378600 }, { "epoch": 14.05, "learning_rate": 6.470676846745438e-07, "loss": 0.2053, "step": 1378700 }, { "epoch": 14.05, "learning_rate": 6.456980763525589e-07, "loss": 0.1921, "step": 1378800 }, { "epoch": 14.05, "learning_rate": 6.443299032807626e-07, "loss": 0.2056, "step": 1378900 }, { "epoch": 14.05, "learning_rate": 6.429631655260481e-07, "loss": 0.2283, "step": 1379000 }, { "epoch": 14.05, "learning_rate": 6.415978631552388e-07, "loss": 0.2088, "step": 1379100 }, { "epoch": 14.05, "learning_rate": 6.402339962350911e-07, "loss": 0.2147, "step": 1379200 }, { "epoch": 14.05, "learning_rate": 6.388715648322951e-07, "loss": 0.2151, "step": 1379300 }, { "epoch": 14.05, "learning_rate": 6.375105690134575e-07, "loss": 0.1781, "step": 1379400 }, { "epoch": 14.05, "learning_rate": 6.361510088451317e-07, "loss": 0.2067, "step": 1379500 }, { "epoch": 14.06, "learning_rate": 6.347928843937845e-07, "loss": 0.1803, "step": 1379600 }, { "epoch": 14.06, "learning_rate": 6.334361957258228e-07, "loss": 0.1865, "step": 1379700 }, { "epoch": 14.06, "learning_rate": 6.320809429075836e-07, "loss": 0.2507, "step": 1379800 }, { "epoch": 14.06, "learning_rate": 6.307271260053271e-07, "loss": 0.1925, "step": 1379900 }, { "epoch": 14.06, "learning_rate": 6.293747450852438e-07, "loss": 0.2081, "step": 1380000 }, { "epoch": 14.06, "learning_rate": 6.280238002134609e-07, "loss": 0.2173, "step": 1380100 }, { "epoch": 14.06, "learning_rate": 6.266742914560353e-07, "loss": 0.2373, "step": 1380200 }, { "epoch": 14.06, "learning_rate": 6.253262188789444e-07, "loss": 0.2464, "step": 1380300 }, { "epoch": 14.06, "learning_rate": 6.23979582548102e-07, "loss": 0.201, "step": 1380400 }, { "epoch": 14.06, "learning_rate": 6.22634382529349e-07, "loss": 0.2276, "step": 1380500 }, { "epoch": 14.07, "learning_rate": 6.212906188884626e-07, "loss": 0.1941, "step": 1380600 }, { "epoch": 14.07, "learning_rate": 6.199482916911403e-07, "loss": 0.2484, "step": 1380700 }, { "epoch": 14.07, "learning_rate": 6.186074010030196e-07, "loss": 0.2423, "step": 1380800 }, { "epoch": 14.07, "learning_rate": 6.172679468896514e-07, "loss": 0.1975, "step": 1380900 }, { "epoch": 14.07, "learning_rate": 6.159299294165365e-07, "loss": 0.2437, "step": 1381000 }, { "epoch": 14.07, "learning_rate": 6.145933486490929e-07, "loss": 0.202, "step": 1381100 }, { "epoch": 14.07, "learning_rate": 6.132582046526747e-07, "loss": 0.1816, "step": 1381200 }, { "epoch": 14.07, "learning_rate": 6.119244974925564e-07, "loss": 0.1909, "step": 1381300 }, { "epoch": 14.07, "learning_rate": 6.105922272339526e-07, "loss": 0.1612, "step": 1381400 }, { "epoch": 14.07, "learning_rate": 6.092613939420045e-07, "loss": 0.2376, "step": 1381500 }, { "epoch": 14.08, "learning_rate": 6.079319976817766e-07, "loss": 0.1665, "step": 1381600 }, { "epoch": 14.08, "learning_rate": 6.066040385182703e-07, "loss": 0.1643, "step": 1381700 }, { "epoch": 14.08, "learning_rate": 6.052907746222703e-07, "loss": 0.1964, "step": 1381800 }, { "epoch": 14.08, "learning_rate": 6.039656754743417e-07, "loss": 0.1793, "step": 1381900 }, { "epoch": 14.08, "learning_rate": 6.026420136170663e-07, "loss": 0.2153, "step": 1382000 }, { "epoch": 14.08, "learning_rate": 6.01319789115159e-07, "loss": 0.208, "step": 1382100 }, { "epoch": 14.08, "learning_rate": 5.999990020332746e-07, "loss": 0.1943, "step": 1382200 }, { "epoch": 14.08, "learning_rate": 5.986796524359884e-07, "loss": 0.2236, "step": 1382300 }, { "epoch": 14.08, "learning_rate": 5.973617403878084e-07, "loss": 0.1855, "step": 1382400 }, { "epoch": 14.09, "learning_rate": 5.960452659531735e-07, "loss": 0.2359, "step": 1382500 }, { "epoch": 14.09, "learning_rate": 5.947302291964552e-07, "loss": 0.2403, "step": 1382600 }, { "epoch": 14.09, "learning_rate": 5.934166301819455e-07, "loss": 0.2001, "step": 1382700 }, { "epoch": 14.09, "learning_rate": 5.921044689738697e-07, "loss": 0.1828, "step": 1382800 }, { "epoch": 14.09, "learning_rate": 5.907937456363932e-07, "loss": 0.2559, "step": 1382900 }, { "epoch": 14.09, "learning_rate": 5.894844602335947e-07, "loss": 0.2229, "step": 1383000 }, { "epoch": 14.09, "learning_rate": 5.88176612829493e-07, "loss": 0.2073, "step": 1383100 }, { "epoch": 14.09, "learning_rate": 5.868702034880336e-07, "loss": 0.1828, "step": 1383200 }, { "epoch": 14.09, "learning_rate": 5.855652322730953e-07, "loss": 0.2384, "step": 1383300 }, { "epoch": 14.09, "learning_rate": 5.842616992484773e-07, "loss": 0.1898, "step": 1383400 }, { "epoch": 14.1, "learning_rate": 5.829596044779218e-07, "loss": 0.2207, "step": 1383500 }, { "epoch": 14.1, "learning_rate": 5.816589480250912e-07, "loss": 0.1993, "step": 1383600 }, { "epoch": 14.1, "learning_rate": 5.803597299535746e-07, "loss": 0.1938, "step": 1383700 }, { "epoch": 14.1, "learning_rate": 5.790619503269013e-07, "loss": 0.1448, "step": 1383800 }, { "epoch": 14.1, "learning_rate": 5.777785654988843e-07, "loss": 0.201, "step": 1383900 }, { "epoch": 14.1, "learning_rate": 5.764836485661562e-07, "loss": 0.186, "step": 1384000 }, { "epoch": 14.1, "learning_rate": 5.751901702677864e-07, "loss": 0.1587, "step": 1384100 }, { "epoch": 14.1, "learning_rate": 5.73898130667021e-07, "loss": 0.1755, "step": 1384200 }, { "epoch": 14.1, "learning_rate": 5.726075298270294e-07, "loss": 0.196, "step": 1384300 }, { "epoch": 14.1, "learning_rate": 5.713183678109146e-07, "loss": 0.2037, "step": 1384400 }, { "epoch": 14.11, "learning_rate": 5.700306446817127e-07, "loss": 0.1684, "step": 1384500 }, { "epoch": 14.11, "learning_rate": 5.687443605023834e-07, "loss": 0.1266, "step": 1384600 }, { "epoch": 14.11, "learning_rate": 5.674595153358197e-07, "loss": 0.1821, "step": 1384700 }, { "epoch": 14.11, "learning_rate": 5.661761092448381e-07, "loss": 0.2654, "step": 1384800 }, { "epoch": 14.11, "learning_rate": 5.648941422921949e-07, "loss": 0.1804, "step": 1384900 }, { "epoch": 14.11, "learning_rate": 5.636136145405701e-07, "loss": 0.2323, "step": 1385000 }, { "epoch": 14.11, "learning_rate": 5.623345260525703e-07, "loss": 0.1967, "step": 1385100 }, { "epoch": 14.11, "learning_rate": 5.610568768907354e-07, "loss": 0.1618, "step": 1385200 }, { "epoch": 14.11, "learning_rate": 5.59780667117542e-07, "loss": 0.2188, "step": 1385300 }, { "epoch": 14.11, "learning_rate": 5.585058967953805e-07, "loss": 0.2244, "step": 1385400 }, { "epoch": 14.12, "learning_rate": 5.57232565986584e-07, "loss": 0.2145, "step": 1385500 }, { "epoch": 14.12, "learning_rate": 5.559606747534063e-07, "loss": 0.1877, "step": 1385600 }, { "epoch": 14.12, "learning_rate": 5.546902231580409e-07, "loss": 0.1695, "step": 1385700 }, { "epoch": 14.12, "learning_rate": 5.534212112626047e-07, "loss": 0.2095, "step": 1385800 }, { "epoch": 14.12, "learning_rate": 5.521536391291416e-07, "loss": 0.1991, "step": 1385900 }, { "epoch": 14.12, "learning_rate": 5.50887506819635e-07, "loss": 0.2103, "step": 1386000 }, { "epoch": 14.12, "learning_rate": 5.496228143959792e-07, "loss": 0.2081, "step": 1386100 }, { "epoch": 14.12, "learning_rate": 5.48359561920021e-07, "loss": 0.1534, "step": 1386200 }, { "epoch": 14.12, "learning_rate": 5.470977494535212e-07, "loss": 0.2173, "step": 1386300 }, { "epoch": 14.12, "learning_rate": 5.458373770581771e-07, "loss": 0.1606, "step": 1386400 }, { "epoch": 14.13, "learning_rate": 5.445784447956092e-07, "loss": 0.2253, "step": 1386500 }, { "epoch": 14.13, "learning_rate": 5.433209527273786e-07, "loss": 0.2175, "step": 1386600 }, { "epoch": 14.13, "learning_rate": 5.420649009149626e-07, "loss": 0.1447, "step": 1386700 }, { "epoch": 14.13, "learning_rate": 5.408102894197753e-07, "loss": 0.179, "step": 1386800 }, { "epoch": 14.13, "learning_rate": 5.395571183031611e-07, "loss": 0.2296, "step": 1386900 }, { "epoch": 14.13, "learning_rate": 5.383178978027859e-07, "loss": 0.2671, "step": 1387000 }, { "epoch": 14.13, "learning_rate": 5.37067593221754e-07, "loss": 0.2065, "step": 1387100 }, { "epoch": 14.13, "learning_rate": 5.35818729202292e-07, "loss": 0.188, "step": 1387200 }, { "epoch": 14.13, "learning_rate": 5.345713058054613e-07, "loss": 0.2235, "step": 1387300 }, { "epoch": 14.14, "learning_rate": 5.333253230922564e-07, "loss": 0.1954, "step": 1387400 }, { "epoch": 14.14, "learning_rate": 5.320807811235918e-07, "loss": 0.2149, "step": 1387500 }, { "epoch": 14.14, "learning_rate": 5.308501038397706e-07, "loss": 0.2391, "step": 1387600 }, { "epoch": 14.14, "learning_rate": 5.2960842913371e-07, "loss": 0.1949, "step": 1387700 }, { "epoch": 14.14, "learning_rate": 5.283681953539309e-07, "loss": 0.2725, "step": 1387800 }, { "epoch": 14.14, "learning_rate": 5.271294025610718e-07, "loss": 0.1964, "step": 1387900 }, { "epoch": 14.14, "learning_rate": 5.258920508156973e-07, "loss": 0.1679, "step": 1388000 }, { "epoch": 14.14, "learning_rate": 5.24656140178309e-07, "loss": 0.1959, "step": 1388100 }, { "epoch": 14.14, "learning_rate": 5.234216707093353e-07, "loss": 0.2081, "step": 1388200 }, { "epoch": 14.14, "learning_rate": 5.22188642469138e-07, "loss": 0.1577, "step": 1388300 }, { "epoch": 14.15, "learning_rate": 5.209570555179954e-07, "loss": 0.1971, "step": 1388400 }, { "epoch": 14.15, "learning_rate": 5.197269099161362e-07, "loss": 0.2238, "step": 1388500 }, { "epoch": 14.15, "learning_rate": 5.184982057236986e-07, "loss": 0.2126, "step": 1388600 }, { "epoch": 14.15, "learning_rate": 5.172709430007649e-07, "loss": 0.1948, "step": 1388700 }, { "epoch": 14.15, "learning_rate": 5.16045121807337e-07, "loss": 0.1677, "step": 1388800 }, { "epoch": 14.15, "learning_rate": 5.148207422033568e-07, "loss": 0.2057, "step": 1388900 }, { "epoch": 14.15, "learning_rate": 5.135978042486766e-07, "loss": 0.1996, "step": 1389000 }, { "epoch": 14.15, "learning_rate": 5.123763080031018e-07, "loss": 0.2161, "step": 1389100 }, { "epoch": 14.15, "learning_rate": 5.111562535263547e-07, "loss": 0.2006, "step": 1389200 }, { "epoch": 14.15, "learning_rate": 5.099376408780843e-07, "loss": 0.2501, "step": 1389300 }, { "epoch": 14.16, "learning_rate": 5.087204701178727e-07, "loss": 0.2154, "step": 1389400 }, { "epoch": 14.16, "learning_rate": 5.075047413052425e-07, "loss": 0.1674, "step": 1389500 }, { "epoch": 14.16, "learning_rate": 5.062904544996227e-07, "loss": 0.183, "step": 1389600 }, { "epoch": 14.16, "learning_rate": 5.050776097603893e-07, "loss": 0.2123, "step": 1389700 }, { "epoch": 14.16, "learning_rate": 5.038662071468481e-07, "loss": 0.1631, "step": 1389800 }, { "epoch": 14.16, "learning_rate": 5.026562467182216e-07, "loss": 0.1917, "step": 1389900 }, { "epoch": 14.16, "learning_rate": 5.014477285336761e-07, "loss": 0.2145, "step": 1390000 }, { "epoch": 14.16, "learning_rate": 5.002406526523007e-07, "loss": 0.183, "step": 1390100 }, { "epoch": 14.16, "learning_rate": 4.990350191331116e-07, "loss": 0.1868, "step": 1390200 }, { "epoch": 14.16, "learning_rate": 4.978308280350552e-07, "loss": 0.2024, "step": 1390300 }, { "epoch": 14.17, "learning_rate": 4.966280794170142e-07, "loss": 0.1818, "step": 1390400 }, { "epoch": 14.17, "learning_rate": 4.954267733377915e-07, "loss": 0.2202, "step": 1390500 }, { "epoch": 14.17, "learning_rate": 4.942269098561236e-07, "loss": 0.1728, "step": 1390600 }, { "epoch": 14.17, "learning_rate": 4.930284890306802e-07, "loss": 0.1706, "step": 1390700 }, { "epoch": 14.17, "learning_rate": 4.918315109200577e-07, "loss": 0.1587, "step": 1390800 }, { "epoch": 14.17, "learning_rate": 4.906359755827761e-07, "loss": 0.1746, "step": 1390900 }, { "epoch": 14.17, "learning_rate": 4.894418830772884e-07, "loss": 0.1609, "step": 1391000 }, { "epoch": 14.17, "learning_rate": 4.882492334619915e-07, "loss": 0.2119, "step": 1391100 }, { "epoch": 14.17, "learning_rate": 4.870580267951852e-07, "loss": 0.1968, "step": 1391200 }, { "epoch": 14.17, "learning_rate": 4.858682631351163e-07, "loss": 0.1825, "step": 1391300 }, { "epoch": 14.18, "learning_rate": 4.846799425399618e-07, "loss": 0.2432, "step": 1391400 }, { "epoch": 14.18, "learning_rate": 4.834930650678182e-07, "loss": 0.2062, "step": 1391500 }, { "epoch": 14.18, "learning_rate": 4.823076307767193e-07, "loss": 0.1888, "step": 1391600 }, { "epoch": 14.18, "learning_rate": 4.811236397246255e-07, "loss": 0.1886, "step": 1391700 }, { "epoch": 14.18, "learning_rate": 4.799410919694236e-07, "loss": 0.1679, "step": 1391800 }, { "epoch": 14.18, "learning_rate": 4.787599875689375e-07, "loss": 0.2189, "step": 1391900 }, { "epoch": 14.18, "learning_rate": 4.775803265809142e-07, "loss": 0.1603, "step": 1392000 }, { "epoch": 14.18, "learning_rate": 4.7640210906303104e-07, "loss": 0.1994, "step": 1392100 }, { "epoch": 14.18, "learning_rate": 4.7523709566714703e-07, "loss": 0.1808, "step": 1392200 }, { "epoch": 14.18, "learning_rate": 4.7406175082616353e-07, "loss": 0.2467, "step": 1392300 }, { "epoch": 14.19, "learning_rate": 4.728878496273592e-07, "loss": 0.1827, "step": 1392400 }, { "epoch": 14.19, "learning_rate": 4.717153921281314e-07, "loss": 0.1897, "step": 1392500 }, { "epoch": 14.19, "learning_rate": 4.70544378385801e-07, "loss": 0.1695, "step": 1392600 }, { "epoch": 14.19, "learning_rate": 4.6937480845763215e-07, "loss": 0.1605, "step": 1392700 }, { "epoch": 14.19, "learning_rate": 4.682066824008058e-07, "loss": 0.2606, "step": 1392800 }, { "epoch": 14.19, "learning_rate": 4.670400002724329e-07, "loss": 0.2101, "step": 1392900 }, { "epoch": 14.19, "learning_rate": 4.6587476212956115e-07, "loss": 0.2361, "step": 1393000 }, { "epoch": 14.19, "learning_rate": 4.6471096802916504e-07, "loss": 0.1851, "step": 1393100 }, { "epoch": 14.19, "learning_rate": 4.6354861802814564e-07, "loss": 0.243, "step": 1393200 }, { "epoch": 14.2, "learning_rate": 4.6238771218333087e-07, "loss": 0.1852, "step": 1393300 }, { "epoch": 14.2, "learning_rate": 4.6122825055148866e-07, "loss": 0.1998, "step": 1393400 }, { "epoch": 14.2, "learning_rate": 4.6007023318930695e-07, "loss": 0.2208, "step": 1393500 }, { "epoch": 14.2, "learning_rate": 4.589136601534039e-07, "loss": 0.1844, "step": 1393600 }, { "epoch": 14.2, "learning_rate": 4.577585315003308e-07, "loss": 0.1984, "step": 1393700 }, { "epoch": 14.2, "learning_rate": 4.5660484728656915e-07, "loss": 0.1903, "step": 1393800 }, { "epoch": 14.2, "learning_rate": 4.5545260756852057e-07, "loss": 0.2057, "step": 1393900 }, { "epoch": 14.2, "learning_rate": 4.5430181240253e-07, "loss": 0.1688, "step": 1394000 }, { "epoch": 14.2, "learning_rate": 4.5315246184486236e-07, "loss": 0.2126, "step": 1394100 }, { "epoch": 14.2, "learning_rate": 4.5200455595171275e-07, "loss": 0.1581, "step": 1394200 }, { "epoch": 14.21, "learning_rate": 4.5085809477920294e-07, "loss": 0.1728, "step": 1394300 }, { "epoch": 14.21, "learning_rate": 4.4971307838339805e-07, "loss": 0.1743, "step": 1394400 }, { "epoch": 14.21, "learning_rate": 4.4856950682027333e-07, "loss": 0.2086, "step": 1394500 }, { "epoch": 14.21, "learning_rate": 4.4742738014574404e-07, "loss": 0.1823, "step": 1394600 }, { "epoch": 14.21, "learning_rate": 4.462866984156588e-07, "loss": 0.2249, "step": 1394700 }, { "epoch": 14.21, "learning_rate": 4.4514746168578645e-07, "loss": 0.1328, "step": 1394800 }, { "epoch": 14.21, "learning_rate": 4.440096700118257e-07, "loss": 0.217, "step": 1394900 }, { "epoch": 14.21, "learning_rate": 4.4287332344941533e-07, "loss": 0.2647, "step": 1395000 }, { "epoch": 14.21, "learning_rate": 4.41738422054111e-07, "loss": 0.1665, "step": 1395100 }, { "epoch": 14.21, "learning_rate": 4.4060496588140155e-07, "loss": 0.2088, "step": 1395200 }, { "epoch": 14.22, "learning_rate": 4.3947295498670934e-07, "loss": 0.1973, "step": 1395300 }, { "epoch": 14.22, "learning_rate": 4.383423894253802e-07, "loss": 0.1949, "step": 1395400 }, { "epoch": 14.22, "learning_rate": 4.3721326925269643e-07, "loss": 0.1474, "step": 1395500 }, { "epoch": 14.22, "learning_rate": 4.360855945238573e-07, "loss": 0.1958, "step": 1395600 }, { "epoch": 14.22, "learning_rate": 4.34959365294012e-07, "loss": 0.2716, "step": 1395700 }, { "epoch": 14.22, "learning_rate": 4.338345816182132e-07, "loss": 0.2024, "step": 1395800 }, { "epoch": 14.22, "learning_rate": 4.327112435514602e-07, "loss": 0.2038, "step": 1395900 }, { "epoch": 14.22, "learning_rate": 4.315893511486824e-07, "loss": 0.2189, "step": 1396000 }, { "epoch": 14.22, "learning_rate": 4.304689044647292e-07, "loss": 0.1734, "step": 1396100 }, { "epoch": 14.22, "learning_rate": 4.293499035543835e-07, "loss": 0.2226, "step": 1396200 }, { "epoch": 14.23, "learning_rate": 4.282323484723616e-07, "loss": 0.1769, "step": 1396300 }, { "epoch": 14.23, "learning_rate": 4.2712739320799223e-07, "loss": 0.2016, "step": 1396400 }, { "epoch": 14.23, "learning_rate": 4.260238551064666e-07, "loss": 0.2012, "step": 1396500 }, { "epoch": 14.23, "learning_rate": 4.2491060891660103e-07, "loss": 0.15, "step": 1396600 }, { "epoch": 14.23, "learning_rate": 4.237988087721123e-07, "loss": 0.1915, "step": 1396700 }, { "epoch": 14.23, "learning_rate": 4.226884547273602e-07, "loss": 0.1941, "step": 1396800 }, { "epoch": 14.23, "learning_rate": 4.215795468366346e-07, "loss": 0.1984, "step": 1396900 }, { "epoch": 14.23, "learning_rate": 4.204720851541555e-07, "loss": 0.1849, "step": 1397000 }, { "epoch": 14.23, "learning_rate": 4.1936606973406624e-07, "loss": 0.2001, "step": 1397100 }, { "epoch": 14.23, "learning_rate": 4.182615006304502e-07, "loss": 0.1795, "step": 1397200 }, { "epoch": 14.24, "learning_rate": 4.1715837789731425e-07, "loss": 0.2347, "step": 1397300 }, { "epoch": 14.24, "learning_rate": 4.160677111917011e-07, "loss": 0.2056, "step": 1397400 }, { "epoch": 14.24, "learning_rate": 4.149674668962067e-07, "loss": 0.2284, "step": 1397500 }, { "epoch": 14.24, "learning_rate": 4.138686691322502e-07, "loss": 0.2434, "step": 1397600 }, { "epoch": 14.24, "learning_rate": 4.127713179535519e-07, "loss": 0.2129, "step": 1397700 }, { "epoch": 14.24, "learning_rate": 4.116754134137757e-07, "loss": 0.1784, "step": 1397800 }, { "epoch": 14.24, "learning_rate": 4.105809555664919e-07, "loss": 0.2229, "step": 1397900 }, { "epoch": 14.24, "learning_rate": 4.094879444652211e-07, "loss": 0.2213, "step": 1398000 }, { "epoch": 14.24, "learning_rate": 4.0839638016340385e-07, "loss": 0.1849, "step": 1398100 }, { "epoch": 14.25, "learning_rate": 4.073062627144075e-07, "loss": 0.1949, "step": 1398200 }, { "epoch": 14.25, "learning_rate": 4.062175921715361e-07, "loss": 0.2545, "step": 1398300 }, { "epoch": 14.25, "learning_rate": 4.051303685880203e-07, "loss": 0.1914, "step": 1398400 }, { "epoch": 14.25, "learning_rate": 4.040445920170144e-07, "loss": 0.1827, "step": 1398500 }, { "epoch": 14.25, "learning_rate": 4.029602625116091e-07, "loss": 0.2008, "step": 1398600 }, { "epoch": 14.25, "learning_rate": 4.0187738012482213e-07, "loss": 0.1792, "step": 1398700 }, { "epoch": 14.25, "learning_rate": 4.0079594490959437e-07, "loss": 0.1552, "step": 1398800 }, { "epoch": 14.25, "learning_rate": 3.9971595691881023e-07, "loss": 0.2384, "step": 1398900 }, { "epoch": 14.25, "learning_rate": 3.986374162052675e-07, "loss": 0.2232, "step": 1399000 }, { "epoch": 14.25, "learning_rate": 3.9756032282170727e-07, "loss": 0.1909, "step": 1399100 }, { "epoch": 14.26, "learning_rate": 3.964846768207875e-07, "loss": 0.1698, "step": 1399200 }, { "epoch": 14.26, "learning_rate": 3.9541047825510266e-07, "loss": 0.255, "step": 1399300 }, { "epoch": 14.26, "learning_rate": 3.943377271771742e-07, "loss": 0.2387, "step": 1399400 }, { "epoch": 14.26, "learning_rate": 3.932664236394534e-07, "loss": 0.254, "step": 1399500 }, { "epoch": 14.26, "learning_rate": 3.9219656769432177e-07, "loss": 0.2257, "step": 1399600 }, { "epoch": 14.26, "learning_rate": 3.911281593940874e-07, "loss": 0.2108, "step": 1399700 }, { "epoch": 14.26, "learning_rate": 3.900611987909886e-07, "loss": 0.2215, "step": 1399800 }, { "epoch": 14.26, "learning_rate": 3.889956859371935e-07, "loss": 0.2236, "step": 1399900 }, { "epoch": 14.26, "learning_rate": 3.879316208848005e-07, "loss": 0.2375, "step": 1400000 }, { "epoch": 14.26, "eval_cer": 0.054162580470109245, "eval_loss": 0.315573126077652, "eval_runtime": 9326.5244, "eval_samples_per_second": 5.866, "eval_steps_per_second": 0.367, "eval_wer": 0.11891348088531187, "step": 1400000 }, { "epoch": 14.26, "learning_rate": 3.86869003685838e-07, "loss": 0.1934, "step": 1400100 }, { "epoch": 14.27, "learning_rate": 3.8580783439225774e-07, "loss": 0.1959, "step": 1400200 }, { "epoch": 14.27, "learning_rate": 3.8475870310175186e-07, "loss": 0.2004, "step": 1400300 }, { "epoch": 14.27, "learning_rate": 3.8370041529417475e-07, "loss": 0.1839, "step": 1400400 }, { "epoch": 14.27, "learning_rate": 3.826435755469093e-07, "loss": 0.2352, "step": 1400500 }, { "epoch": 14.27, "learning_rate": 3.815881839116242e-07, "loss": 0.1679, "step": 1400600 }, { "epoch": 14.27, "learning_rate": 3.8053424043992145e-07, "loss": 0.1999, "step": 1400700 }, { "epoch": 14.27, "learning_rate": 3.7948174518333656e-07, "loss": 0.1887, "step": 1400800 }, { "epoch": 14.27, "learning_rate": 3.784306981933283e-07, "loss": 0.2218, "step": 1400900 }, { "epoch": 14.27, "learning_rate": 3.77381099521289e-07, "loss": 0.1684, "step": 1401000 }, { "epoch": 14.27, "learning_rate": 3.763329492185308e-07, "loss": 0.2067, "step": 1401100 }, { "epoch": 14.28, "learning_rate": 3.7528624733630947e-07, "loss": 0.1933, "step": 1401200 }, { "epoch": 14.28, "learning_rate": 3.7424099392579737e-07, "loss": 0.1865, "step": 1401300 }, { "epoch": 14.28, "learning_rate": 3.731971890381036e-07, "loss": 0.2038, "step": 1401400 }, { "epoch": 14.28, "learning_rate": 3.7215483272426745e-07, "loss": 0.1879, "step": 1401500 }, { "epoch": 14.28, "learning_rate": 3.71113925035248e-07, "loss": 0.205, "step": 1401600 }, { "epoch": 14.28, "learning_rate": 3.700744660219413e-07, "loss": 0.1821, "step": 1401700 }, { "epoch": 14.28, "learning_rate": 3.6903645573517333e-07, "loss": 0.212, "step": 1401800 }, { "epoch": 14.28, "learning_rate": 3.6799989422569346e-07, "loss": 0.223, "step": 1401900 }, { "epoch": 14.28, "learning_rate": 3.669647815441812e-07, "loss": 0.2033, "step": 1402000 }, { "epoch": 14.28, "learning_rate": 3.6593111774125274e-07, "loss": 0.2208, "step": 1402100 }, { "epoch": 14.29, "learning_rate": 3.6489890286744764e-07, "loss": 0.2121, "step": 1402200 }, { "epoch": 14.29, "learning_rate": 3.6386813697322884e-07, "loss": 0.2131, "step": 1402300 }, { "epoch": 14.29, "learning_rate": 3.6283882010899937e-07, "loss": 0.2358, "step": 1402400 }, { "epoch": 14.29, "learning_rate": 3.6181095232508896e-07, "loss": 0.2021, "step": 1402500 }, { "epoch": 14.29, "learning_rate": 3.607845336717508e-07, "loss": 0.2058, "step": 1402600 }, { "epoch": 14.29, "learning_rate": 3.597595641991713e-07, "loss": 0.2058, "step": 1402700 }, { "epoch": 14.29, "learning_rate": 3.5873604395746385e-07, "loss": 0.2457, "step": 1402800 }, { "epoch": 14.29, "learning_rate": 3.57713972996675e-07, "loss": 0.1899, "step": 1402900 }, { "epoch": 14.29, "learning_rate": 3.567035504087257e-07, "loss": 0.1844, "step": 1403000 }, { "epoch": 14.3, "learning_rate": 3.556843636655682e-07, "loss": 0.2003, "step": 1403100 }, { "epoch": 14.3, "learning_rate": 3.5466662635253334e-07, "loss": 0.2235, "step": 1403200 }, { "epoch": 14.3, "learning_rate": 3.5365033851939124e-07, "loss": 0.2315, "step": 1403300 }, { "epoch": 14.3, "learning_rate": 3.5263550021582214e-07, "loss": 0.1889, "step": 1403400 }, { "epoch": 14.3, "learning_rate": 3.51632238203119e-07, "loss": 0.258, "step": 1403500 }, { "epoch": 14.3, "learning_rate": 3.50620284610963e-07, "loss": 0.1855, "step": 1403600 }, { "epoch": 14.3, "learning_rate": 3.4960978069653813e-07, "loss": 0.2425, "step": 1403700 }, { "epoch": 14.3, "learning_rate": 3.486007265092517e-07, "loss": 0.1947, "step": 1403800 }, { "epoch": 14.3, "learning_rate": 3.475931220984374e-07, "loss": 0.1692, "step": 1403900 }, { "epoch": 14.3, "learning_rate": 3.465869675133626e-07, "loss": 0.2303, "step": 1404000 }, { "epoch": 14.31, "learning_rate": 3.4558226280322456e-07, "loss": 0.2251, "step": 1404100 }, { "epoch": 14.31, "learning_rate": 3.4457900801714737e-07, "loss": 0.1505, "step": 1404200 }, { "epoch": 14.31, "learning_rate": 3.435772032041784e-07, "loss": 0.2383, "step": 1404300 }, { "epoch": 14.31, "learning_rate": 3.425768484133085e-07, "loss": 0.1873, "step": 1404400 }, { "epoch": 14.31, "learning_rate": 3.4157794369344186e-07, "loss": 0.189, "step": 1404500 }, { "epoch": 14.31, "learning_rate": 3.4058048909341945e-07, "loss": 0.227, "step": 1404600 }, { "epoch": 14.31, "learning_rate": 3.3958448466201553e-07, "loss": 0.2359, "step": 1404700 }, { "epoch": 14.31, "learning_rate": 3.3858993044792783e-07, "loss": 0.2039, "step": 1404800 }, { "epoch": 14.31, "learning_rate": 3.3759682649977753e-07, "loss": 0.1731, "step": 1404900 }, { "epoch": 14.31, "learning_rate": 3.3660517286613233e-07, "loss": 0.2045, "step": 1405000 }, { "epoch": 14.32, "learning_rate": 3.356149695954669e-07, "loss": 0.159, "step": 1405100 }, { "epoch": 14.32, "learning_rate": 3.3462621673620574e-07, "loss": 0.2304, "step": 1405200 }, { "epoch": 14.32, "learning_rate": 3.336389143366869e-07, "loss": 0.207, "step": 1405300 }, { "epoch": 14.32, "learning_rate": 3.326530624451851e-07, "loss": 0.2079, "step": 1405400 }, { "epoch": 14.32, "learning_rate": 3.316686611099018e-07, "loss": 0.1933, "step": 1405500 }, { "epoch": 14.32, "learning_rate": 3.3068571037897176e-07, "loss": 0.1882, "step": 1405600 }, { "epoch": 14.32, "learning_rate": 3.297042103004533e-07, "loss": 0.1945, "step": 1405700 }, { "epoch": 14.32, "learning_rate": 3.2872416092233793e-07, "loss": 0.2185, "step": 1405800 }, { "epoch": 14.32, "learning_rate": 3.277455622925374e-07, "loss": 0.1736, "step": 1405900 }, { "epoch": 14.32, "learning_rate": 3.2676841445891003e-07, "loss": 0.1721, "step": 1406000 }, { "epoch": 14.33, "learning_rate": 3.25792717469221e-07, "loss": 0.1797, "step": 1406100 }, { "epoch": 14.33, "learning_rate": 3.248184713711855e-07, "loss": 0.1878, "step": 1406200 }, { "epoch": 14.33, "learning_rate": 3.23845676212432e-07, "loss": 0.2051, "step": 1406300 }, { "epoch": 14.33, "learning_rate": 3.2287433204052917e-07, "loss": 0.2241, "step": 1406400 }, { "epoch": 14.33, "learning_rate": 3.2190443890296237e-07, "loss": 0.1911, "step": 1406500 }, { "epoch": 14.33, "learning_rate": 3.209359968471637e-07, "loss": 0.1842, "step": 1406600 }, { "epoch": 14.33, "learning_rate": 3.199690059204785e-07, "loss": 0.2067, "step": 1406700 }, { "epoch": 14.33, "learning_rate": 3.1900346617018905e-07, "loss": 0.2268, "step": 1406800 }, { "epoch": 14.33, "learning_rate": 3.180393776435042e-07, "loss": 0.2456, "step": 1406900 }, { "epoch": 14.33, "learning_rate": 3.170767403875563e-07, "loss": 0.1632, "step": 1407000 }, { "epoch": 14.34, "learning_rate": 3.16115554449421e-07, "loss": 0.2031, "step": 1407100 }, { "epoch": 14.34, "learning_rate": 3.1515581987608734e-07, "loss": 0.212, "step": 1407200 }, { "epoch": 14.34, "learning_rate": 3.1419753671449114e-07, "loss": 0.2412, "step": 1407300 }, { "epoch": 14.34, "learning_rate": 3.132407050114716e-07, "loss": 0.19, "step": 1407400 }, { "epoch": 14.34, "learning_rate": 3.122853248138247e-07, "loss": 0.1897, "step": 1407500 }, { "epoch": 14.34, "learning_rate": 3.1133139616825955e-07, "loss": 0.2226, "step": 1407600 }, { "epoch": 14.34, "learning_rate": 3.103789191214157e-07, "loss": 0.2243, "step": 1407700 }, { "epoch": 14.34, "learning_rate": 3.094278937198591e-07, "loss": 0.1816, "step": 1407800 }, { "epoch": 14.34, "learning_rate": 3.0847832001009935e-07, "loss": 0.1921, "step": 1407900 }, { "epoch": 14.34, "learning_rate": 3.075301980385592e-07, "loss": 0.2334, "step": 1408000 }, { "epoch": 14.35, "learning_rate": 3.0658352785159826e-07, "loss": 0.2164, "step": 1408100 }, { "epoch": 14.35, "learning_rate": 3.056477544923475e-07, "loss": 0.2118, "step": 1408200 }, { "epoch": 14.35, "learning_rate": 3.04703973494328e-07, "loss": 0.2029, "step": 1408300 }, { "epoch": 14.35, "learning_rate": 3.0376164441907717e-07, "loss": 0.1735, "step": 1408400 }, { "epoch": 14.35, "learning_rate": 3.0282076731265816e-07, "loss": 0.1516, "step": 1408500 }, { "epoch": 14.35, "learning_rate": 3.0188134222108423e-07, "loss": 0.2184, "step": 1408600 }, { "epoch": 14.35, "learning_rate": 3.009433691902852e-07, "loss": 0.2495, "step": 1408700 }, { "epoch": 14.35, "learning_rate": 3.000068482661178e-07, "loss": 0.1792, "step": 1408800 }, { "epoch": 14.35, "learning_rate": 2.9907177949437204e-07, "loss": 0.1768, "step": 1408900 }, { "epoch": 14.36, "learning_rate": 2.981381629207747e-07, "loss": 0.2052, "step": 1409000 }, { "epoch": 14.36, "learning_rate": 2.972059985909692e-07, "loss": 0.2071, "step": 1409100 }, { "epoch": 14.36, "learning_rate": 2.962752865505325e-07, "loss": 0.1965, "step": 1409200 }, { "epoch": 14.36, "learning_rate": 2.9534602684497147e-07, "loss": 0.1988, "step": 1409300 }, { "epoch": 14.36, "learning_rate": 2.944182195197231e-07, "loss": 0.1969, "step": 1409400 }, { "epoch": 14.36, "learning_rate": 2.9349186462014787e-07, "loss": 0.2292, "step": 1409500 }, { "epoch": 14.36, "learning_rate": 2.9256696219154276e-07, "loss": 0.2128, "step": 1409600 }, { "epoch": 14.36, "learning_rate": 2.916435122791317e-07, "loss": 0.2303, "step": 1409700 }, { "epoch": 14.36, "learning_rate": 2.907215149280584e-07, "loss": 0.2083, "step": 1409800 }, { "epoch": 14.36, "learning_rate": 2.89800970183407e-07, "loss": 0.2118, "step": 1409900 }, { "epoch": 14.37, "learning_rate": 2.888818780901914e-07, "loss": 0.2714, "step": 1410000 }, { "epoch": 14.37, "learning_rate": 2.879642386933423e-07, "loss": 0.2034, "step": 1410100 }, { "epoch": 14.37, "learning_rate": 2.870480520377272e-07, "loss": 0.286, "step": 1410200 }, { "epoch": 14.37, "learning_rate": 2.861333181681469e-07, "loss": 0.1986, "step": 1410300 }, { "epoch": 14.37, "learning_rate": 2.8522003712932566e-07, "loss": 0.2056, "step": 1410400 }, { "epoch": 14.37, "learning_rate": 2.843082089659144e-07, "loss": 0.2288, "step": 1410500 }, { "epoch": 14.37, "learning_rate": 2.833978337225007e-07, "loss": 0.178, "step": 1410600 }, { "epoch": 14.37, "learning_rate": 2.8248891144358915e-07, "loss": 0.2104, "step": 1410700 }, { "epoch": 14.37, "learning_rate": 2.8158144217362736e-07, "loss": 0.1839, "step": 1410800 }, { "epoch": 14.37, "learning_rate": 2.8067542595698325e-07, "loss": 0.2467, "step": 1410900 }, { "epoch": 14.38, "learning_rate": 2.7977086283795474e-07, "loss": 0.218, "step": 1411000 }, { "epoch": 14.38, "learning_rate": 2.788677528607697e-07, "loss": 0.2607, "step": 1411100 }, { "epoch": 14.38, "learning_rate": 2.779660960695862e-07, "loss": 0.1861, "step": 1411200 }, { "epoch": 14.38, "learning_rate": 2.770658925084857e-07, "loss": 0.1825, "step": 1411300 }, { "epoch": 14.38, "learning_rate": 2.761671422214895e-07, "loss": 0.2197, "step": 1411400 }, { "epoch": 14.38, "learning_rate": 2.752698452525326e-07, "loss": 0.1464, "step": 1411500 }, { "epoch": 14.38, "learning_rate": 2.7437400164549654e-07, "loss": 0.2427, "step": 1411600 }, { "epoch": 14.38, "learning_rate": 2.734796114441762e-07, "loss": 0.203, "step": 1411700 }, { "epoch": 14.38, "learning_rate": 2.725866746923067e-07, "loss": 0.2115, "step": 1411800 }, { "epoch": 14.38, "learning_rate": 2.7169519143354307e-07, "loss": 0.209, "step": 1411900 }, { "epoch": 14.39, "learning_rate": 2.708051617114771e-07, "loss": 0.2125, "step": 1412000 }, { "epoch": 14.39, "learning_rate": 2.6991658556962396e-07, "loss": 0.2238, "step": 1412100 }, { "epoch": 14.39, "learning_rate": 2.6902946305142896e-07, "loss": 0.195, "step": 1412200 }, { "epoch": 14.39, "learning_rate": 2.6814379420026735e-07, "loss": 0.2152, "step": 1412300 }, { "epoch": 14.39, "learning_rate": 2.6726841401484515e-07, "loss": 0.2234, "step": 1412400 }, { "epoch": 14.39, "learning_rate": 2.663856380898455e-07, "loss": 0.2014, "step": 1412500 }, { "epoch": 14.39, "learning_rate": 2.6550431596114255e-07, "loss": 0.223, "step": 1412600 }, { "epoch": 14.39, "learning_rate": 2.6462444767183846e-07, "loss": 0.1928, "step": 1412700 }, { "epoch": 14.39, "learning_rate": 2.637460332649422e-07, "loss": 0.1894, "step": 1412800 }, { "epoch": 14.39, "learning_rate": 2.6286907278340933e-07, "loss": 0.1982, "step": 1412900 }, { "epoch": 14.4, "learning_rate": 2.6199356627011896e-07, "loss": 0.2365, "step": 1413000 }, { "epoch": 14.4, "learning_rate": 2.6111951376787346e-07, "loss": 0.1752, "step": 1413100 }, { "epoch": 14.4, "learning_rate": 2.6024691531941205e-07, "loss": 0.2232, "step": 1413200 }, { "epoch": 14.4, "learning_rate": 2.5937577096740384e-07, "loss": 0.18, "step": 1413300 }, { "epoch": 14.4, "learning_rate": 2.585060807544315e-07, "loss": 0.1962, "step": 1413400 }, { "epoch": 14.4, "learning_rate": 2.5763784472302434e-07, "loss": 0.2041, "step": 1413500 }, { "epoch": 14.4, "learning_rate": 2.5677106291563836e-07, "loss": 0.2101, "step": 1413600 }, { "epoch": 14.4, "learning_rate": 2.5590573537464633e-07, "loss": 0.1921, "step": 1413700 }, { "epoch": 14.4, "learning_rate": 2.5504186214235783e-07, "loss": 0.1809, "step": 1413800 }, { "epoch": 14.41, "learning_rate": 2.541794432610156e-07, "loss": 0.2159, "step": 1413900 }, { "epoch": 14.41, "learning_rate": 2.5331847877278604e-07, "loss": 0.2292, "step": 1414000 }, { "epoch": 14.41, "learning_rate": 2.5245896871976205e-07, "loss": 0.214, "step": 1414100 }, { "epoch": 14.41, "learning_rate": 2.5160091314397006e-07, "loss": 0.2133, "step": 1414200 }, { "epoch": 14.41, "learning_rate": 2.507443120873665e-07, "loss": 0.2158, "step": 1414300 }, { "epoch": 14.41, "learning_rate": 2.4989770985657026e-07, "loss": 0.1902, "step": 1414400 }, { "epoch": 14.41, "learning_rate": 2.4904400341767886e-07, "loss": 0.1464, "step": 1414500 }, { "epoch": 14.41, "learning_rate": 2.481917516229926e-07, "loss": 0.1721, "step": 1414600 }, { "epoch": 14.41, "learning_rate": 2.47340954514178e-07, "loss": 0.176, "step": 1414700 }, { "epoch": 14.41, "learning_rate": 2.4649161213283865e-07, "loss": 0.1869, "step": 1414800 }, { "epoch": 14.42, "learning_rate": 2.4564372452049786e-07, "loss": 0.2168, "step": 1414900 }, { "epoch": 14.42, "learning_rate": 2.4479729171861586e-07, "loss": 0.198, "step": 1415000 }, { "epoch": 14.42, "learning_rate": 2.439523137685762e-07, "loss": 0.1802, "step": 1415100 }, { "epoch": 14.42, "learning_rate": 2.4310879071169245e-07, "loss": 0.2194, "step": 1415200 }, { "epoch": 14.42, "learning_rate": 2.422667225892083e-07, "loss": 0.1762, "step": 1415300 }, { "epoch": 14.42, "learning_rate": 2.414261094422976e-07, "loss": 0.2203, "step": 1415400 }, { "epoch": 14.42, "learning_rate": 2.4058695131206067e-07, "loss": 0.2207, "step": 1415500 }, { "epoch": 14.42, "learning_rate": 2.397492482395247e-07, "loss": 0.203, "step": 1415600 }, { "epoch": 14.42, "learning_rate": 2.3891300026565366e-07, "loss": 0.2151, "step": 1415700 }, { "epoch": 14.42, "learning_rate": 2.3807820743133147e-07, "loss": 0.2495, "step": 1415800 }, { "epoch": 14.43, "learning_rate": 2.3724486977737548e-07, "loss": 0.1774, "step": 1415900 }, { "epoch": 14.43, "learning_rate": 2.3641298734452977e-07, "loss": 0.2034, "step": 1416000 }, { "epoch": 14.43, "learning_rate": 2.3558256017346847e-07, "loss": 0.1667, "step": 1416100 }, { "epoch": 14.43, "learning_rate": 2.3475358830479575e-07, "loss": 0.2431, "step": 1416200 }, { "epoch": 14.43, "learning_rate": 2.3392607177904258e-07, "loss": 0.2481, "step": 1416300 }, { "epoch": 14.43, "learning_rate": 2.3310001063666985e-07, "loss": 0.2235, "step": 1416400 }, { "epoch": 14.43, "learning_rate": 2.3227540491806864e-07, "loss": 0.2294, "step": 1416500 }, { "epoch": 14.43, "learning_rate": 2.3145225466355335e-07, "loss": 0.215, "step": 1416600 }, { "epoch": 14.43, "learning_rate": 2.3063055991337512e-07, "loss": 0.2212, "step": 1416700 }, { "epoch": 14.43, "learning_rate": 2.2981032070770515e-07, "loss": 0.2246, "step": 1416800 }, { "epoch": 14.44, "learning_rate": 2.2899153708665465e-07, "loss": 0.1739, "step": 1416900 }, { "epoch": 14.44, "learning_rate": 2.28174209090255e-07, "loss": 0.1994, "step": 1417000 }, { "epoch": 14.44, "learning_rate": 2.2735833675846418e-07, "loss": 0.1758, "step": 1417100 }, { "epoch": 14.44, "learning_rate": 2.2654392013117697e-07, "loss": 0.2329, "step": 1417200 }, { "epoch": 14.44, "learning_rate": 2.2573095924821486e-07, "loss": 0.2287, "step": 1417300 }, { "epoch": 14.44, "learning_rate": 2.2491945414932602e-07, "loss": 0.1854, "step": 1417400 }, { "epoch": 14.44, "learning_rate": 2.241094048741854e-07, "loss": 0.1903, "step": 1417500 }, { "epoch": 14.44, "learning_rate": 2.2330081146240465e-07, "loss": 0.2217, "step": 1417600 }, { "epoch": 14.44, "learning_rate": 2.2249367395351217e-07, "loss": 0.257, "step": 1417700 }, { "epoch": 14.44, "learning_rate": 2.2168799238697967e-07, "loss": 0.1587, "step": 1417800 }, { "epoch": 14.45, "learning_rate": 2.2088376680219235e-07, "loss": 0.2158, "step": 1417900 }, { "epoch": 14.45, "learning_rate": 2.2008099723847875e-07, "loss": 0.2567, "step": 1418000 }, { "epoch": 14.45, "learning_rate": 2.1927968373508744e-07, "loss": 0.222, "step": 1418100 }, { "epoch": 14.45, "learning_rate": 2.1847982633119712e-07, "loss": 0.2449, "step": 1418200 }, { "epoch": 14.45, "learning_rate": 2.1768142506591982e-07, "loss": 0.1752, "step": 1418300 }, { "epoch": 14.45, "learning_rate": 2.1688447997828432e-07, "loss": 0.1575, "step": 1418400 }, { "epoch": 14.45, "learning_rate": 2.160889911072661e-07, "loss": 0.2165, "step": 1418500 }, { "epoch": 14.45, "learning_rate": 2.1529495849175072e-07, "loss": 0.22, "step": 1418600 }, { "epoch": 14.45, "learning_rate": 2.145023821705705e-07, "loss": 0.239, "step": 1418700 }, { "epoch": 14.45, "learning_rate": 2.1371126218246773e-07, "loss": 0.2044, "step": 1418800 }, { "epoch": 14.46, "learning_rate": 2.129215985661348e-07, "loss": 0.2092, "step": 1418900 }, { "epoch": 14.46, "learning_rate": 2.1213339136017085e-07, "loss": 0.1879, "step": 1419000 }, { "epoch": 14.46, "learning_rate": 2.113466406031217e-07, "loss": 0.1408, "step": 1419100 }, { "epoch": 14.46, "learning_rate": 2.105691920664099e-07, "loss": 0.2424, "step": 1419200 }, { "epoch": 14.46, "learning_rate": 2.0978533975706927e-07, "loss": 0.2032, "step": 1419300 }, { "epoch": 14.46, "learning_rate": 2.0900294401144494e-07, "loss": 0.1995, "step": 1419400 }, { "epoch": 14.46, "learning_rate": 2.0822200486779297e-07, "loss": 0.2042, "step": 1419500 }, { "epoch": 14.46, "learning_rate": 2.0745030997884274e-07, "loss": 0.2219, "step": 1419600 }, { "epoch": 14.46, "learning_rate": 2.0667226958664298e-07, "loss": 0.2007, "step": 1419700 }, { "epoch": 14.47, "learning_rate": 2.0589568591037444e-07, "loss": 0.2318, "step": 1419800 }, { "epoch": 14.47, "learning_rate": 2.0512055898800674e-07, "loss": 0.2155, "step": 1419900 }, { "epoch": 14.47, "learning_rate": 2.043468888574329e-07, "loss": 0.2334, "step": 1420000 }, { "epoch": 14.47, "learning_rate": 2.0357467555648935e-07, "loss": 0.1874, "step": 1420100 }, { "epoch": 14.47, "learning_rate": 2.0280391912292583e-07, "loss": 0.1717, "step": 1420200 }, { "epoch": 14.47, "learning_rate": 2.0203461959442892e-07, "loss": 0.1992, "step": 1420300 }, { "epoch": 14.47, "learning_rate": 2.0126677700861852e-07, "loss": 0.2148, "step": 1420400 }, { "epoch": 14.47, "learning_rate": 2.005003914030279e-07, "loss": 0.1274, "step": 1420500 }, { "epoch": 14.47, "learning_rate": 1.9973546281513712e-07, "loss": 0.202, "step": 1420600 }, { "epoch": 14.47, "learning_rate": 1.9897199128234288e-07, "loss": 0.2316, "step": 1420700 }, { "epoch": 14.48, "learning_rate": 1.9820997684197538e-07, "loss": 0.2668, "step": 1420800 }, { "epoch": 14.48, "learning_rate": 1.9744941953129148e-07, "loss": 0.1742, "step": 1420900 }, { "epoch": 14.48, "learning_rate": 1.9669031938748138e-07, "loss": 0.18, "step": 1421000 }, { "epoch": 14.48, "learning_rate": 1.9593267644765546e-07, "loss": 0.2254, "step": 1421100 }, { "epoch": 14.48, "learning_rate": 1.9517649074886067e-07, "loss": 0.198, "step": 1421200 }, { "epoch": 14.48, "learning_rate": 1.944217623280675e-07, "loss": 0.2121, "step": 1421300 }, { "epoch": 14.48, "learning_rate": 1.936684912221831e-07, "loss": 0.2157, "step": 1421400 }, { "epoch": 14.48, "learning_rate": 1.9291667746803465e-07, "loss": 0.2023, "step": 1421500 }, { "epoch": 14.48, "learning_rate": 1.9216632110237942e-07, "loss": 0.1913, "step": 1421600 }, { "epoch": 14.48, "learning_rate": 1.9141742216190804e-07, "loss": 0.213, "step": 1421700 }, { "epoch": 14.49, "learning_rate": 1.906699806832346e-07, "loss": 0.2428, "step": 1421800 }, { "epoch": 14.49, "learning_rate": 1.899239967029065e-07, "loss": 0.2529, "step": 1421900 }, { "epoch": 14.49, "learning_rate": 1.891794702573979e-07, "loss": 0.2234, "step": 1422000 }, { "epoch": 14.49, "learning_rate": 1.8843640138311634e-07, "loss": 0.2033, "step": 1422100 }, { "epoch": 14.49, "learning_rate": 1.876947901163828e-07, "loss": 0.1829, "step": 1422200 }, { "epoch": 14.49, "learning_rate": 1.869546364934649e-07, "loss": 0.2267, "step": 1422300 }, { "epoch": 14.49, "learning_rate": 1.8621594055055368e-07, "loss": 0.1933, "step": 1422400 }, { "epoch": 14.49, "learning_rate": 1.8547870232376029e-07, "loss": 0.1817, "step": 1422500 }, { "epoch": 14.49, "learning_rate": 1.8474292184913587e-07, "loss": 0.2166, "step": 1422600 }, { "epoch": 14.49, "learning_rate": 1.8400859916265168e-07, "loss": 0.1763, "step": 1422700 }, { "epoch": 14.5, "learning_rate": 1.8327573430021894e-07, "loss": 0.2089, "step": 1422800 }, { "epoch": 14.5, "learning_rate": 1.8254432729766235e-07, "loss": 0.2423, "step": 1422900 }, { "epoch": 14.5, "learning_rate": 1.8181437819074664e-07, "loss": 0.1788, "step": 1423000 }, { "epoch": 14.5, "learning_rate": 1.8109316471004335e-07, "loss": 0.2022, "step": 1423100 }, { "epoch": 14.5, "learning_rate": 1.8037338018239923e-07, "loss": 0.2311, "step": 1423200 }, { "epoch": 14.5, "learning_rate": 1.7964777581586433e-07, "loss": 0.1961, "step": 1423300 }, { "epoch": 14.5, "learning_rate": 1.7892362948659703e-07, "loss": 0.252, "step": 1423400 }, { "epoch": 14.5, "learning_rate": 1.7820094123000231e-07, "loss": 0.1657, "step": 1423500 }, { "epoch": 14.5, "learning_rate": 1.7747971108141192e-07, "loss": 0.1826, "step": 1423600 }, { "epoch": 14.5, "learning_rate": 1.7675993907609434e-07, "loss": 0.1501, "step": 1423700 }, { "epoch": 14.51, "learning_rate": 1.7604162524923807e-07, "loss": 0.218, "step": 1423800 }, { "epoch": 14.51, "learning_rate": 1.7532476963596832e-07, "loss": 0.2136, "step": 1423900 }, { "epoch": 14.51, "learning_rate": 1.7460937227133046e-07, "loss": 0.2229, "step": 1424000 }, { "epoch": 14.51, "learning_rate": 1.7389543319030643e-07, "loss": 0.1702, "step": 1424100 }, { "epoch": 14.51, "learning_rate": 1.73182952427805e-07, "loss": 0.1743, "step": 1424200 }, { "epoch": 14.51, "learning_rate": 1.7247193001865836e-07, "loss": 0.2272, "step": 1424300 }, { "epoch": 14.51, "learning_rate": 1.7176236599763196e-07, "loss": 0.1816, "step": 1424400 }, { "epoch": 14.51, "learning_rate": 1.710542603994214e-07, "loss": 0.1896, "step": 1424500 }, { "epoch": 14.51, "learning_rate": 1.7034761325864568e-07, "loss": 0.176, "step": 1424600 }, { "epoch": 14.52, "learning_rate": 1.6964242460986045e-07, "loss": 0.2085, "step": 1424700 }, { "epoch": 14.52, "learning_rate": 1.689386944875415e-07, "loss": 0.1882, "step": 1424800 }, { "epoch": 14.52, "learning_rate": 1.68236422926098e-07, "loss": 0.2101, "step": 1424900 }, { "epoch": 14.52, "learning_rate": 1.675356099598657e-07, "loss": 0.172, "step": 1425000 }, { "epoch": 14.52, "learning_rate": 1.66836255623114e-07, "loss": 0.2118, "step": 1425100 }, { "epoch": 14.52, "learning_rate": 1.6613835995003213e-07, "loss": 0.1902, "step": 1425200 }, { "epoch": 14.52, "learning_rate": 1.6544192297474613e-07, "loss": 0.1942, "step": 1425300 }, { "epoch": 14.52, "learning_rate": 1.6474694473130548e-07, "loss": 0.156, "step": 1425400 }, { "epoch": 14.52, "learning_rate": 1.6405342525369293e-07, "loss": 0.2409, "step": 1425500 }, { "epoch": 14.52, "learning_rate": 1.6336136457581474e-07, "loss": 0.2369, "step": 1425600 }, { "epoch": 14.53, "learning_rate": 1.6267076273151382e-07, "loss": 0.1972, "step": 1425700 }, { "epoch": 14.53, "learning_rate": 1.6198161975454984e-07, "loss": 0.1943, "step": 1425800 }, { "epoch": 14.53, "learning_rate": 1.6129393567861918e-07, "loss": 0.1644, "step": 1425900 }, { "epoch": 14.53, "learning_rate": 1.6060771053734824e-07, "loss": 0.2202, "step": 1426000 }, { "epoch": 14.53, "learning_rate": 1.5992294436429021e-07, "loss": 0.2041, "step": 1426100 }, { "epoch": 14.53, "learning_rate": 1.5923963719292168e-07, "loss": 0.2039, "step": 1426200 }, { "epoch": 14.53, "learning_rate": 1.5855778905665585e-07, "loss": 0.181, "step": 1426300 }, { "epoch": 14.53, "learning_rate": 1.578773999888261e-07, "loss": 0.1959, "step": 1426400 }, { "epoch": 14.53, "learning_rate": 1.5719847002270582e-07, "loss": 0.1755, "step": 1426500 }, { "epoch": 14.53, "learning_rate": 1.5652099919149175e-07, "loss": 0.1767, "step": 1426600 }, { "epoch": 14.54, "learning_rate": 1.5584498752829745e-07, "loss": 0.2131, "step": 1426700 }, { "epoch": 14.54, "learning_rate": 1.5517043506618644e-07, "loss": 0.2056, "step": 1426800 }, { "epoch": 14.54, "learning_rate": 1.544973418381357e-07, "loss": 0.2442, "step": 1426900 }, { "epoch": 14.54, "learning_rate": 1.5382570787705885e-07, "loss": 0.2094, "step": 1427000 }, { "epoch": 14.54, "learning_rate": 1.5315553321578635e-07, "loss": 0.2338, "step": 1427100 }, { "epoch": 14.54, "learning_rate": 1.5248681788709528e-07, "loss": 0.1638, "step": 1427200 }, { "epoch": 14.54, "learning_rate": 1.5181956192367952e-07, "loss": 0.1728, "step": 1427300 }, { "epoch": 14.54, "learning_rate": 1.5115376535815962e-07, "loss": 0.2184, "step": 1427400 }, { "epoch": 14.54, "learning_rate": 1.5048942822309287e-07, "loss": 0.2125, "step": 1427500 }, { "epoch": 14.54, "learning_rate": 1.498265505509633e-07, "loss": 0.2103, "step": 1427600 }, { "epoch": 14.55, "learning_rate": 1.4916513237417495e-07, "loss": 0.1977, "step": 1427700 }, { "epoch": 14.55, "learning_rate": 1.48505173725072e-07, "loss": 0.2221, "step": 1427800 }, { "epoch": 14.55, "learning_rate": 1.4784667463592195e-07, "loss": 0.1616, "step": 1427900 }, { "epoch": 14.55, "learning_rate": 1.4718963513892236e-07, "loss": 0.1818, "step": 1428000 }, { "epoch": 14.55, "learning_rate": 1.4653405526619757e-07, "loss": 0.1987, "step": 1428100 }, { "epoch": 14.55, "learning_rate": 1.458799350498019e-07, "loss": 0.2095, "step": 1428200 }, { "epoch": 14.55, "learning_rate": 1.4522727452171313e-07, "loss": 0.2101, "step": 1428300 }, { "epoch": 14.55, "learning_rate": 1.4457607371384907e-07, "loss": 0.2105, "step": 1428400 }, { "epoch": 14.55, "learning_rate": 1.4392633265804756e-07, "loss": 0.235, "step": 1428500 }, { "epoch": 14.55, "learning_rate": 1.4327805138607653e-07, "loss": 0.2212, "step": 1428600 }, { "epoch": 14.56, "learning_rate": 1.4263122992963394e-07, "loss": 0.1945, "step": 1428700 }, { "epoch": 14.56, "learning_rate": 1.4198586832034453e-07, "loss": 0.2177, "step": 1428800 }, { "epoch": 14.56, "learning_rate": 1.4134196658975972e-07, "loss": 0.2308, "step": 1428900 }, { "epoch": 14.56, "learning_rate": 1.4069952476936765e-07, "loss": 0.2084, "step": 1429000 }, { "epoch": 14.56, "learning_rate": 1.4005854289057984e-07, "loss": 0.1552, "step": 1429100 }, { "epoch": 14.56, "learning_rate": 1.3941902098473457e-07, "loss": 0.2081, "step": 1429200 }, { "epoch": 14.56, "learning_rate": 1.3878095908309686e-07, "loss": 0.2372, "step": 1429300 }, { "epoch": 14.56, "learning_rate": 1.381443572168717e-07, "loss": 0.2339, "step": 1429400 }, { "epoch": 14.56, "learning_rate": 1.3750921541717755e-07, "loss": 0.214, "step": 1429500 }, { "epoch": 14.57, "learning_rate": 1.368755337150762e-07, "loss": 0.2237, "step": 1429600 }, { "epoch": 14.57, "learning_rate": 1.3624331214154294e-07, "loss": 0.1608, "step": 1429700 }, { "epoch": 14.57, "learning_rate": 1.3561255072749968e-07, "loss": 0.2312, "step": 1429800 }, { "epoch": 14.57, "learning_rate": 1.349832495037784e-07, "loss": 0.2034, "step": 1429900 }, { "epoch": 14.57, "learning_rate": 1.3435540850115113e-07, "loss": 0.2329, "step": 1430000 }, { "epoch": 14.57, "learning_rate": 1.337290277503167e-07, "loss": 0.1773, "step": 1430100 }, { "epoch": 14.57, "learning_rate": 1.3310410728189727e-07, "loss": 0.2561, "step": 1430200 }, { "epoch": 14.57, "learning_rate": 1.3248687449935904e-07, "loss": 0.2569, "step": 1430300 }, { "epoch": 14.57, "learning_rate": 1.3186486008378463e-07, "loss": 0.1872, "step": 1430400 }, { "epoch": 14.57, "learning_rate": 1.3124430604177562e-07, "loss": 0.2419, "step": 1430500 }, { "epoch": 14.58, "learning_rate": 1.3062521240367443e-07, "loss": 0.2213, "step": 1430600 }, { "epoch": 14.58, "learning_rate": 1.3000757919974682e-07, "loss": 0.1973, "step": 1430700 }, { "epoch": 14.58, "learning_rate": 1.2939140646019532e-07, "loss": 0.2153, "step": 1430800 }, { "epoch": 14.58, "learning_rate": 1.287828341080499e-07, "loss": 0.185, "step": 1430900 }, { "epoch": 14.58, "learning_rate": 1.2816956778216148e-07, "loss": 0.1959, "step": 1431000 }, { "epoch": 14.58, "learning_rate": 1.2755776201051728e-07, "loss": 0.1721, "step": 1431100 }, { "epoch": 14.58, "learning_rate": 1.269474168230267e-07, "loss": 0.2047, "step": 1431200 }, { "epoch": 14.58, "learning_rate": 1.2633853224953585e-07, "loss": 0.2086, "step": 1431300 }, { "epoch": 14.58, "learning_rate": 1.257311083198176e-07, "loss": 0.217, "step": 1431400 }, { "epoch": 14.58, "learning_rate": 1.2512514506356154e-07, "loss": 0.2039, "step": 1431500 }, { "epoch": 14.59, "learning_rate": 1.245206425104073e-07, "loss": 0.1982, "step": 1431600 }, { "epoch": 14.59, "learning_rate": 1.2391760068990454e-07, "loss": 0.2017, "step": 1431700 }, { "epoch": 14.59, "learning_rate": 1.233160196315397e-07, "loss": 0.2036, "step": 1431800 }, { "epoch": 14.59, "learning_rate": 1.2271589936472593e-07, "loss": 0.1945, "step": 1431900 }, { "epoch": 14.59, "learning_rate": 1.2211723991880642e-07, "loss": 0.1792, "step": 1432000 }, { "epoch": 14.59, "learning_rate": 1.2152004132305107e-07, "loss": 0.2243, "step": 1432100 }, { "epoch": 14.59, "learning_rate": 1.2092430360666317e-07, "loss": 0.2319, "step": 1432200 }, { "epoch": 14.59, "learning_rate": 1.2033002679876616e-07, "loss": 0.2062, "step": 1432300 }, { "epoch": 14.59, "learning_rate": 1.1973721092841672e-07, "loss": 0.2132, "step": 1432400 }, { "epoch": 14.59, "learning_rate": 1.1914585602460504e-07, "loss": 0.1946, "step": 1432500 }, { "epoch": 14.6, "learning_rate": 1.1855596211623799e-07, "loss": 0.1502, "step": 1432600 }, { "epoch": 14.6, "learning_rate": 1.179675292321658e-07, "loss": 0.1987, "step": 1432700 }, { "epoch": 14.6, "learning_rate": 1.1738055740114884e-07, "loss": 0.1924, "step": 1432800 }, { "epoch": 14.6, "learning_rate": 1.1679504665189744e-07, "loss": 0.1746, "step": 1432900 }, { "epoch": 14.6, "learning_rate": 1.1621099701303206e-07, "loss": 0.2179, "step": 1433000 }, { "epoch": 14.6, "learning_rate": 1.1562840851311318e-07, "loss": 0.1678, "step": 1433100 }, { "epoch": 14.6, "learning_rate": 1.1504728118062469e-07, "loss": 0.2184, "step": 1433200 }, { "epoch": 14.6, "learning_rate": 1.1446761504397718e-07, "loss": 0.2252, "step": 1433300 }, { "epoch": 14.6, "learning_rate": 1.1388941013151799e-07, "loss": 0.2286, "step": 1433400 }, { "epoch": 14.6, "learning_rate": 1.1331266647151784e-07, "loss": 0.1877, "step": 1433500 }, { "epoch": 14.61, "learning_rate": 1.1273738409217082e-07, "loss": 0.2145, "step": 1433600 }, { "epoch": 14.61, "learning_rate": 1.121635630216078e-07, "loss": 0.2354, "step": 1433700 }, { "epoch": 14.61, "learning_rate": 1.1159691965151208e-07, "loss": 0.1992, "step": 1433800 }, { "epoch": 14.61, "learning_rate": 1.1102600666882956e-07, "loss": 0.2459, "step": 1433900 }, { "epoch": 14.61, "learning_rate": 1.1045655507860742e-07, "loss": 0.2001, "step": 1434000 }, { "epoch": 14.61, "learning_rate": 1.0988856490869004e-07, "loss": 0.2057, "step": 1434100 }, { "epoch": 14.61, "learning_rate": 1.0932203618684189e-07, "loss": 0.1938, "step": 1434200 }, { "epoch": 14.61, "learning_rate": 1.087569689407708e-07, "loss": 0.1999, "step": 1434300 }, { "epoch": 14.61, "learning_rate": 1.0819336319810469e-07, "loss": 0.2224, "step": 1434400 }, { "epoch": 14.61, "learning_rate": 1.0763121898639483e-07, "loss": 0.1968, "step": 1434500 }, { "epoch": 14.62, "learning_rate": 1.0707053633312925e-07, "loss": 0.2307, "step": 1434600 }, { "epoch": 14.62, "learning_rate": 1.06511315265726e-07, "loss": 0.1975, "step": 1434700 }, { "epoch": 14.62, "learning_rate": 1.0595355581152322e-07, "loss": 0.1965, "step": 1434800 }, { "epoch": 14.62, "learning_rate": 1.0539725799779243e-07, "loss": 0.2139, "step": 1434900 }, { "epoch": 14.62, "learning_rate": 1.0484242185173187e-07, "loss": 0.2093, "step": 1435000 }, { "epoch": 14.62, "learning_rate": 1.0428904740047652e-07, "loss": 0.2234, "step": 1435100 }, { "epoch": 14.62, "learning_rate": 1.0373713467107471e-07, "loss": 0.2333, "step": 1435200 }, { "epoch": 14.62, "learning_rate": 1.031866836905182e-07, "loss": 0.1831, "step": 1435300 }, { "epoch": 14.62, "learning_rate": 1.026376944857188e-07, "loss": 0.1656, "step": 1435400 }, { "epoch": 14.63, "learning_rate": 1.0209016708351505e-07, "loss": 0.2338, "step": 1435500 }, { "epoch": 14.63, "learning_rate": 1.0154410151068217e-07, "loss": 0.2262, "step": 1435600 }, { "epoch": 14.63, "learning_rate": 1.0099949779391881e-07, "loss": 0.1537, "step": 1435700 }, { "epoch": 14.63, "learning_rate": 1.0045635595985036e-07, "loss": 0.2081, "step": 1435800 }, { "epoch": 14.63, "learning_rate": 9.991467603503557e-08, "loss": 0.1948, "step": 1435900 }, { "epoch": 14.63, "learning_rate": 9.937445804595657e-08, "loss": 0.2263, "step": 1436000 }, { "epoch": 14.63, "learning_rate": 9.883570201903225e-08, "loss": 0.2592, "step": 1436100 }, { "epoch": 14.63, "learning_rate": 9.829840798060152e-08, "loss": 0.1886, "step": 1436200 }, { "epoch": 14.63, "learning_rate": 9.776257595693005e-08, "loss": 0.2336, "step": 1436300 }, { "epoch": 14.63, "learning_rate": 9.722820597422689e-08, "loss": 0.2219, "step": 1436400 }, { "epoch": 14.64, "learning_rate": 9.669529805861111e-08, "loss": 0.1937, "step": 1436500 }, { "epoch": 14.64, "learning_rate": 9.61691594569214e-08, "loss": 0.2555, "step": 1436600 }, { "epoch": 14.64, "learning_rate": 9.563916113226379e-08, "loss": 0.1777, "step": 1436700 }, { "epoch": 14.64, "learning_rate": 9.511062495239143e-08, "loss": 0.1838, "step": 1436800 }, { "epoch": 14.64, "learning_rate": 9.458355094314697e-08, "loss": 0.2266, "step": 1436900 }, { "epoch": 14.64, "learning_rate": 9.40579391302998e-08, "loss": 0.1942, "step": 1437000 }, { "epoch": 14.64, "learning_rate": 9.353378953954938e-08, "loss": 0.2122, "step": 1437100 }, { "epoch": 14.64, "learning_rate": 9.30111021965252e-08, "loss": 0.1645, "step": 1437200 }, { "epoch": 14.64, "learning_rate": 9.248987712678347e-08, "loss": 0.214, "step": 1437300 }, { "epoch": 14.64, "learning_rate": 9.197011435580716e-08, "loss": 0.202, "step": 1437400 }, { "epoch": 14.65, "learning_rate": 9.145181390900925e-08, "loss": 0.1924, "step": 1437500 }, { "epoch": 14.65, "learning_rate": 9.093497581173616e-08, "loss": 0.1819, "step": 1437600 }, { "epoch": 14.65, "learning_rate": 9.041960008925099e-08, "loss": 0.217, "step": 1437700 }, { "epoch": 14.65, "learning_rate": 8.990568676676025e-08, "loss": 0.1932, "step": 1437800 }, { "epoch": 14.65, "learning_rate": 8.939323586938386e-08, "loss": 0.201, "step": 1437900 }, { "epoch": 14.65, "learning_rate": 8.888224742218176e-08, "loss": 0.1855, "step": 1438000 }, { "epoch": 14.65, "learning_rate": 8.83727214501373e-08, "loss": 0.238, "step": 1438100 }, { "epoch": 14.65, "learning_rate": 8.78646579781639e-08, "loss": 0.1598, "step": 1438200 }, { "epoch": 14.65, "learning_rate": 8.7358057031105e-08, "loss": 0.1887, "step": 1438300 }, { "epoch": 14.65, "learning_rate": 8.685291863372413e-08, "loss": 0.2125, "step": 1438400 }, { "epoch": 14.66, "learning_rate": 8.634924281072487e-08, "loss": 0.2075, "step": 1438500 }, { "epoch": 14.66, "learning_rate": 8.584702958673086e-08, "loss": 0.2103, "step": 1438600 }, { "epoch": 14.66, "learning_rate": 8.534627898629909e-08, "loss": 0.2312, "step": 1438700 }, { "epoch": 14.66, "learning_rate": 8.48469910339167e-08, "loss": 0.2124, "step": 1438800 }, { "epoch": 14.66, "learning_rate": 8.434916575398744e-08, "loss": 0.2376, "step": 1438900 }, { "epoch": 14.66, "learning_rate": 8.385280317085853e-08, "loss": 0.1757, "step": 1439000 }, { "epoch": 14.66, "learning_rate": 8.335790330879722e-08, "loss": 0.1961, "step": 1439100 }, { "epoch": 14.66, "learning_rate": 8.286446619200083e-08, "loss": 0.1836, "step": 1439200 }, { "epoch": 14.66, "learning_rate": 8.237249184459672e-08, "loss": 0.2417, "step": 1439300 }, { "epoch": 14.66, "learning_rate": 8.188198029063899e-08, "loss": 0.2154, "step": 1439400 }, { "epoch": 14.67, "learning_rate": 8.139293155410843e-08, "loss": 0.1574, "step": 1439500 }, { "epoch": 14.67, "learning_rate": 8.09053456589226e-08, "loss": 0.211, "step": 1439600 }, { "epoch": 14.67, "learning_rate": 8.041922262891577e-08, "loss": 0.2026, "step": 1439700 }, { "epoch": 14.67, "learning_rate": 7.993456248785558e-08, "loss": 0.2018, "step": 1439800 }, { "epoch": 14.67, "learning_rate": 7.945136525944307e-08, "loss": 0.1629, "step": 1439900 }, { "epoch": 14.67, "learning_rate": 7.89696309673027e-08, "loss": 0.2521, "step": 1440000 }, { "epoch": 14.67, "learning_rate": 7.848935963498893e-08, "loss": 0.2006, "step": 1440100 }, { "epoch": 14.67, "learning_rate": 7.801055128597967e-08, "loss": 0.198, "step": 1440200 }, { "epoch": 14.67, "learning_rate": 7.753320594369284e-08, "loss": 0.2044, "step": 1440300 }, { "epoch": 14.68, "learning_rate": 7.705732363146644e-08, "loss": 0.2293, "step": 1440400 }, { "epoch": 14.68, "learning_rate": 7.658290437256188e-08, "loss": 0.2141, "step": 1440500 }, { "epoch": 14.68, "learning_rate": 7.610994819018058e-08, "loss": 0.1699, "step": 1440600 }, { "epoch": 14.68, "learning_rate": 7.563845510744737e-08, "loss": 0.1848, "step": 1440700 }, { "epoch": 14.68, "learning_rate": 7.516842514741385e-08, "loss": 0.1659, "step": 1440800 }, { "epoch": 14.68, "learning_rate": 7.469985833306159e-08, "loss": 0.2022, "step": 1440900 }, { "epoch": 14.68, "learning_rate": 7.423741848100019e-08, "loss": 0.2156, "step": 1441000 }, { "epoch": 14.68, "learning_rate": 7.377176339464331e-08, "loss": 0.217, "step": 1441100 }, { "epoch": 14.68, "learning_rate": 7.330757152225975e-08, "loss": 0.2187, "step": 1441200 }, { "epoch": 14.68, "learning_rate": 7.284484288653803e-08, "loss": 0.1582, "step": 1441300 }, { "epoch": 14.69, "learning_rate": 7.238357751011005e-08, "loss": 0.201, "step": 1441400 }, { "epoch": 14.69, "learning_rate": 7.192377541552774e-08, "loss": 0.182, "step": 1441500 }, { "epoch": 14.69, "learning_rate": 7.146543662526983e-08, "loss": 0.197, "step": 1441600 }, { "epoch": 14.69, "learning_rate": 7.100856116174836e-08, "loss": 0.1976, "step": 1441700 }, { "epoch": 14.69, "learning_rate": 7.055314904730214e-08, "loss": 0.2165, "step": 1441800 }, { "epoch": 14.69, "learning_rate": 7.00992003041967e-08, "loss": 0.1645, "step": 1441900 }, { "epoch": 14.69, "learning_rate": 6.96467149546276e-08, "loss": 0.1716, "step": 1442000 }, { "epoch": 14.69, "learning_rate": 6.919569302072048e-08, "loss": 0.2125, "step": 1442100 }, { "epoch": 14.69, "learning_rate": 6.874613452452438e-08, "loss": 0.1801, "step": 1442200 }, { "epoch": 14.69, "learning_rate": 6.829803948802505e-08, "loss": 0.247, "step": 1442300 }, { "epoch": 14.7, "learning_rate": 6.785140793312494e-08, "loss": 0.2013, "step": 1442400 }, { "epoch": 14.7, "learning_rate": 6.740623988166661e-08, "loss": 0.1739, "step": 1442500 }, { "epoch": 14.7, "learning_rate": 6.696253535541264e-08, "loss": 0.1993, "step": 1442600 }, { "epoch": 14.7, "learning_rate": 6.652029437606232e-08, "loss": 0.2091, "step": 1442700 }, { "epoch": 14.7, "learning_rate": 6.607951696523839e-08, "loss": 0.162, "step": 1442800 }, { "epoch": 14.7, "learning_rate": 6.56402031444836e-08, "loss": 0.1642, "step": 1442900 }, { "epoch": 14.7, "learning_rate": 6.520235293528742e-08, "loss": 0.2367, "step": 1443000 }, { "epoch": 14.7, "learning_rate": 6.476596635905607e-08, "loss": 0.202, "step": 1443100 }, { "epoch": 14.7, "learning_rate": 6.433104343712249e-08, "loss": 0.1882, "step": 1443200 }, { "epoch": 14.7, "learning_rate": 6.390191153795688e-08, "loss": 0.1843, "step": 1443300 }, { "epoch": 14.71, "learning_rate": 6.34785272106464e-08, "loss": 0.205, "step": 1443400 }, { "epoch": 14.71, "learning_rate": 6.304795146707343e-08, "loss": 0.2104, "step": 1443500 }, { "epoch": 14.71, "learning_rate": 6.261883946180436e-08, "loss": 0.168, "step": 1443600 }, { "epoch": 14.71, "learning_rate": 6.219119121581573e-08, "loss": 0.2349, "step": 1443700 }, { "epoch": 14.71, "learning_rate": 6.17650067500175e-08, "loss": 0.1979, "step": 1443800 }, { "epoch": 14.71, "learning_rate": 6.134028608524967e-08, "loss": 0.2192, "step": 1443900 }, { "epoch": 14.71, "learning_rate": 6.09170292422756e-08, "loss": 0.2286, "step": 1444000 }, { "epoch": 14.71, "learning_rate": 6.049523624179542e-08, "loss": 0.1581, "step": 1444100 }, { "epoch": 14.71, "learning_rate": 6.007490710442597e-08, "loss": 0.2618, "step": 1444200 }, { "epoch": 14.71, "learning_rate": 5.965604185072083e-08, "loss": 0.1825, "step": 1444300 }, { "epoch": 14.72, "learning_rate": 5.9238640501160235e-08, "loss": 0.2146, "step": 1444400 }, { "epoch": 14.72, "learning_rate": 5.882270307615456e-08, "loss": 0.1932, "step": 1444500 }, { "epoch": 14.72, "learning_rate": 5.8408229596040865e-08, "loss": 0.2597, "step": 1444600 }, { "epoch": 14.72, "learning_rate": 5.79952200810796e-08, "loss": 0.1506, "step": 1444700 }, { "epoch": 14.72, "learning_rate": 5.758367455146796e-08, "loss": 0.2036, "step": 1444800 }, { "epoch": 14.72, "learning_rate": 5.717359302732983e-08, "loss": 0.1882, "step": 1444900 }, { "epoch": 14.72, "learning_rate": 5.6764975528712514e-08, "loss": 0.1704, "step": 1445000 }, { "epoch": 14.72, "learning_rate": 5.6357822075593366e-08, "loss": 0.1686, "step": 1445100 }, { "epoch": 14.72, "learning_rate": 5.5952132687883125e-08, "loss": 0.1888, "step": 1445200 }, { "epoch": 14.72, "learning_rate": 5.554790738541926e-08, "loss": 0.149, "step": 1445300 }, { "epoch": 14.73, "learning_rate": 5.514514618796263e-08, "loss": 0.1875, "step": 1445400 }, { "epoch": 14.73, "learning_rate": 5.4743849115207464e-08, "loss": 0.2, "step": 1445500 }, { "epoch": 14.73, "learning_rate": 5.4344016186771425e-08, "loss": 0.2462, "step": 1445600 }, { "epoch": 14.73, "learning_rate": 5.394564742220887e-08, "loss": 0.176, "step": 1445700 }, { "epoch": 14.73, "learning_rate": 5.3548742840997535e-08, "loss": 0.1443, "step": 1445800 }, { "epoch": 14.73, "learning_rate": 5.3153302462538577e-08, "loss": 0.2075, "step": 1445900 }, { "epoch": 14.73, "learning_rate": 5.2759326306169867e-08, "loss": 0.1896, "step": 1446000 }, { "epoch": 14.73, "learning_rate": 5.236681439115598e-08, "loss": 0.2133, "step": 1446100 }, { "epoch": 14.73, "learning_rate": 5.1975766736684916e-08, "loss": 0.196, "step": 1446200 }, { "epoch": 14.74, "learning_rate": 5.1586183361878037e-08, "loss": 0.1778, "step": 1446300 }, { "epoch": 14.74, "learning_rate": 5.1198064285783444e-08, "loss": 0.2375, "step": 1446400 }, { "epoch": 14.74, "learning_rate": 5.081140952738261e-08, "loss": 0.185, "step": 1446500 }, { "epoch": 14.74, "learning_rate": 5.042621910557377e-08, "loss": 0.174, "step": 1446600 }, { "epoch": 14.74, "learning_rate": 5.004249303919184e-08, "loss": 0.1906, "step": 1446700 }, { "epoch": 14.74, "learning_rate": 4.966023134700182e-08, "loss": 0.2078, "step": 1446800 }, { "epoch": 14.74, "learning_rate": 4.9279434047692084e-08, "loss": 0.2539, "step": 1446900 }, { "epoch": 14.74, "learning_rate": 4.8900101159881085e-08, "loss": 0.2102, "step": 1447000 }, { "epoch": 14.74, "learning_rate": 4.852223270211398e-08, "loss": 0.1696, "step": 1447100 }, { "epoch": 14.74, "learning_rate": 4.814582869287265e-08, "loss": 0.1994, "step": 1447200 }, { "epoch": 14.75, "learning_rate": 4.777088915055572e-08, "loss": 0.2103, "step": 1447300 }, { "epoch": 14.75, "learning_rate": 4.739741409349518e-08, "loss": 0.2112, "step": 1447400 }, { "epoch": 14.75, "learning_rate": 4.70254035399531e-08, "loss": 0.2281, "step": 1447500 }, { "epoch": 14.75, "learning_rate": 4.665485750812159e-08, "loss": 0.177, "step": 1447600 }, { "epoch": 14.75, "learning_rate": 4.628577601611283e-08, "loss": 0.1839, "step": 1447700 }, { "epoch": 14.75, "learning_rate": 4.591815908197572e-08, "loss": 0.2113, "step": 1447800 }, { "epoch": 14.75, "learning_rate": 4.555200672368587e-08, "loss": 0.174, "step": 1447900 }, { "epoch": 14.75, "learning_rate": 4.51873189591423e-08, "loss": 0.2018, "step": 1448000 }, { "epoch": 14.75, "learning_rate": 4.482409580617741e-08, "loss": 0.2139, "step": 1448100 }, { "epoch": 14.75, "learning_rate": 4.4462337282553664e-08, "loss": 0.1494, "step": 1448200 }, { "epoch": 14.76, "learning_rate": 4.410204340595359e-08, "loss": 0.2573, "step": 1448300 }, { "epoch": 14.76, "learning_rate": 4.3743214193996405e-08, "loss": 0.2004, "step": 1448400 }, { "epoch": 14.76, "learning_rate": 4.3385849664228094e-08, "loss": 0.1965, "step": 1448500 }, { "epoch": 14.76, "learning_rate": 4.302994983411801e-08, "loss": 0.1839, "step": 1448600 }, { "epoch": 14.76, "learning_rate": 4.267551472107223e-08, "loss": 0.196, "step": 1448700 }, { "epoch": 14.76, "learning_rate": 4.232254434241689e-08, "loss": 0.1552, "step": 1448800 }, { "epoch": 14.76, "learning_rate": 4.1971038715408194e-08, "loss": 0.191, "step": 1448900 }, { "epoch": 14.76, "learning_rate": 4.1620997857239054e-08, "loss": 0.1813, "step": 1449000 }, { "epoch": 14.76, "learning_rate": 4.127242178501911e-08, "loss": 0.2098, "step": 1449100 }, { "epoch": 14.76, "learning_rate": 4.0925310515794735e-08, "loss": 0.1737, "step": 1449200 }, { "epoch": 14.77, "learning_rate": 4.0579664066535684e-08, "loss": 0.2085, "step": 1449300 }, { "epoch": 14.77, "learning_rate": 4.023548245414177e-08, "loss": 0.2144, "step": 1449400 }, { "epoch": 14.77, "learning_rate": 3.989618561194952e-08, "loss": 0.1543, "step": 1449500 }, { "epoch": 14.77, "learning_rate": 3.955491907491338e-08, "loss": 0.2019, "step": 1449600 }, { "epoch": 14.77, "learning_rate": 3.9215117424848954e-08, "loss": 0.2141, "step": 1449700 }, { "epoch": 14.77, "learning_rate": 3.887678067836631e-08, "loss": 0.2145, "step": 1449800 }, { "epoch": 14.77, "learning_rate": 3.8539908852012194e-08, "loss": 0.2203, "step": 1449900 }, { "epoch": 14.77, "learning_rate": 3.820450196225678e-08, "loss": 0.2092, "step": 1450000 }, { "epoch": 14.77, "learning_rate": 3.7873892193291957e-08, "loss": 0.2101, "step": 1450100 }, { "epoch": 14.77, "learning_rate": 3.754140057608324e-08, "loss": 0.1997, "step": 1450200 }, { "epoch": 14.78, "learning_rate": 3.7210373944294164e-08, "loss": 0.1872, "step": 1450300 }, { "epoch": 14.78, "learning_rate": 3.6880812314111777e-08, "loss": 0.2057, "step": 1450400 }, { "epoch": 14.78, "learning_rate": 3.6552715701643204e-08, "loss": 0.161, "step": 1450500 }, { "epoch": 14.78, "learning_rate": 3.62260841229356e-08, "loss": 0.1848, "step": 1450600 }, { "epoch": 14.78, "learning_rate": 3.5900917593956193e-08, "loss": 0.1614, "step": 1450700 }, { "epoch": 14.78, "learning_rate": 3.557721613060894e-08, "loss": 0.1744, "step": 1450800 }, { "epoch": 14.78, "learning_rate": 3.52549797487145e-08, "loss": 0.206, "step": 1450900 }, { "epoch": 14.78, "learning_rate": 3.4934208464033614e-08, "loss": 0.2206, "step": 1451000 }, { "epoch": 14.78, "learning_rate": 3.4614902292247064e-08, "loss": 0.1777, "step": 1451100 }, { "epoch": 14.79, "learning_rate": 3.429706124896903e-08, "loss": 0.1583, "step": 1451200 }, { "epoch": 14.79, "learning_rate": 3.3980685349737085e-08, "loss": 0.1605, "step": 1451300 }, { "epoch": 14.79, "learning_rate": 3.366577461002218e-08, "loss": 0.2356, "step": 1451400 }, { "epoch": 14.79, "learning_rate": 3.335232904521868e-08, "loss": 0.1783, "step": 1451500 }, { "epoch": 14.79, "learning_rate": 3.304034867065764e-08, "loss": 0.2089, "step": 1451600 }, { "epoch": 14.79, "learning_rate": 3.27298335015902e-08, "loss": 0.2676, "step": 1451700 }, { "epoch": 14.79, "learning_rate": 3.242078355319755e-08, "loss": 0.1623, "step": 1451800 }, { "epoch": 14.79, "learning_rate": 3.211319884059094e-08, "loss": 0.1803, "step": 1451900 }, { "epoch": 14.79, "learning_rate": 3.180707937880834e-08, "loss": 0.1781, "step": 1452000 }, { "epoch": 14.79, "learning_rate": 3.1502425182821096e-08, "loss": 0.1636, "step": 1452100 }, { "epoch": 14.8, "learning_rate": 3.119923626752397e-08, "loss": 0.2128, "step": 1452200 }, { "epoch": 14.8, "learning_rate": 3.0897512647738436e-08, "loss": 0.1898, "step": 1452300 }, { "epoch": 14.8, "learning_rate": 3.059725433821603e-08, "loss": 0.2207, "step": 1452400 }, { "epoch": 14.8, "learning_rate": 3.0298461353645e-08, "loss": 0.2046, "step": 1452500 }, { "epoch": 14.8, "learning_rate": 3.000113370862367e-08, "loss": 0.1874, "step": 1452600 }, { "epoch": 14.8, "learning_rate": 2.970527141770041e-08, "loss": 0.1721, "step": 1452700 }, { "epoch": 14.8, "learning_rate": 2.941087449533364e-08, "loss": 0.2124, "step": 1452800 }, { "epoch": 14.8, "learning_rate": 2.911794295591852e-08, "loss": 0.2263, "step": 1452900 }, { "epoch": 14.8, "learning_rate": 2.8826476813780256e-08, "loss": 0.1808, "step": 1453000 }, { "epoch": 14.8, "learning_rate": 2.853647608316745e-08, "loss": 0.1433, "step": 1453100 }, { "epoch": 14.81, "learning_rate": 2.8247940778262094e-08, "loss": 0.2499, "step": 1453200 }, { "epoch": 14.81, "learning_rate": 2.7960870913169567e-08, "loss": 0.1748, "step": 1453300 }, { "epoch": 14.81, "learning_rate": 2.7675266501928643e-08, "loss": 0.2086, "step": 1453400 }, { "epoch": 14.81, "learning_rate": 2.739112755850148e-08, "loss": 0.2081, "step": 1453500 }, { "epoch": 14.81, "learning_rate": 2.710845409678031e-08, "loss": 0.1882, "step": 1453600 }, { "epoch": 14.81, "learning_rate": 2.682724613058407e-08, "loss": 0.1835, "step": 1453700 }, { "epoch": 14.81, "learning_rate": 2.6550293843923268e-08, "loss": 0.1971, "step": 1453800 }, { "epoch": 14.81, "learning_rate": 2.627200225466364e-08, "loss": 0.2619, "step": 1453900 }, { "epoch": 14.81, "learning_rate": 2.5995176201829563e-08, "loss": 0.2528, "step": 1454000 }, { "epoch": 14.81, "learning_rate": 2.5719815698956873e-08, "loss": 0.1576, "step": 1454100 }, { "epoch": 14.82, "learning_rate": 2.5445920759508135e-08, "loss": 0.2124, "step": 1454200 }, { "epoch": 14.82, "learning_rate": 2.517349139687264e-08, "loss": 0.2121, "step": 1454300 }, { "epoch": 14.82, "learning_rate": 2.4902527624376394e-08, "loss": 0.21, "step": 1454400 }, { "epoch": 14.82, "learning_rate": 2.4633029455262136e-08, "loss": 0.2183, "step": 1454500 }, { "epoch": 14.82, "learning_rate": 2.4364996902709325e-08, "loss": 0.2468, "step": 1454600 }, { "epoch": 14.82, "learning_rate": 2.4098429979824145e-08, "loss": 0.2606, "step": 1454700 }, { "epoch": 14.82, "learning_rate": 2.38333286996395e-08, "loss": 0.2254, "step": 1454800 }, { "epoch": 14.82, "learning_rate": 2.3569693075115027e-08, "loss": 0.1622, "step": 1454900 }, { "epoch": 14.82, "learning_rate": 2.3307523119143746e-08, "loss": 0.2186, "step": 1455000 }, { "epoch": 14.82, "learning_rate": 2.304681884454207e-08, "loss": 0.2425, "step": 1455100 }, { "epoch": 14.83, "learning_rate": 2.2787580264056473e-08, "loss": 0.1784, "step": 1455200 }, { "epoch": 14.83, "learning_rate": 2.252980739036681e-08, "loss": 0.1803, "step": 1455300 }, { "epoch": 14.83, "learning_rate": 2.2273500236069665e-08, "loss": 0.1929, "step": 1455400 }, { "epoch": 14.83, "learning_rate": 2.2018658813705017e-08, "loss": 0.1776, "step": 1455500 }, { "epoch": 14.83, "learning_rate": 2.1765283135726234e-08, "loss": 0.1966, "step": 1455600 }, { "epoch": 14.83, "learning_rate": 2.15133732145234e-08, "loss": 0.1713, "step": 1455700 }, { "epoch": 14.83, "learning_rate": 2.126292906241334e-08, "loss": 0.2224, "step": 1455800 }, { "epoch": 14.83, "learning_rate": 2.1013950691646245e-08, "loss": 0.2192, "step": 1455900 }, { "epoch": 14.83, "learning_rate": 2.0766438114389053e-08, "loss": 0.1598, "step": 1456000 }, { "epoch": 14.84, "learning_rate": 2.052284455468567e-08, "loss": 0.2115, "step": 1456100 }, { "epoch": 14.84, "learning_rate": 2.027824894245467e-08, "loss": 0.2112, "step": 1456200 }, { "epoch": 14.84, "learning_rate": 2.0035119159704795e-08, "loss": 0.2143, "step": 1456300 }, { "epoch": 14.84, "learning_rate": 1.979345521832654e-08, "loss": 0.2128, "step": 1456400 }, { "epoch": 14.84, "learning_rate": 1.9553257130137115e-08, "loss": 0.2094, "step": 1456500 }, { "epoch": 14.84, "learning_rate": 1.931452490688046e-08, "loss": 0.1535, "step": 1456600 }, { "epoch": 14.84, "learning_rate": 1.907725856022724e-08, "loss": 0.2298, "step": 1456700 }, { "epoch": 14.84, "learning_rate": 1.8841458101778175e-08, "loss": 0.1824, "step": 1456800 }, { "epoch": 14.84, "learning_rate": 1.8607123543064042e-08, "loss": 0.1816, "step": 1456900 }, { "epoch": 14.84, "learning_rate": 1.8374254895542343e-08, "loss": 0.2051, "step": 1457000 }, { "epoch": 14.85, "learning_rate": 1.81428521705973e-08, "loss": 0.1971, "step": 1457100 }, { "epoch": 14.85, "learning_rate": 1.7912915379546536e-08, "loss": 0.2332, "step": 1457200 }, { "epoch": 14.85, "learning_rate": 1.768444453363105e-08, "loss": 0.1699, "step": 1457300 }, { "epoch": 14.85, "learning_rate": 1.7457439644018582e-08, "loss": 0.2063, "step": 1457400 }, { "epoch": 14.85, "learning_rate": 1.7231900721810246e-08, "loss": 0.2011, "step": 1457500 }, { "epoch": 14.85, "learning_rate": 1.700782777803722e-08, "loss": 0.2125, "step": 1457600 }, { "epoch": 14.85, "learning_rate": 1.678522082365075e-08, "loss": 0.2297, "step": 1457700 }, { "epoch": 14.85, "learning_rate": 1.6564079869535453e-08, "loss": 0.1793, "step": 1457800 }, { "epoch": 14.85, "learning_rate": 1.634440492650602e-08, "loss": 0.2206, "step": 1457900 }, { "epoch": 14.85, "learning_rate": 1.612619600530385e-08, "loss": 0.1695, "step": 1458000 }, { "epoch": 14.86, "learning_rate": 1.5909453116593754e-08, "loss": 0.1539, "step": 1458100 }, { "epoch": 14.86, "learning_rate": 1.5694176270973914e-08, "loss": 0.1763, "step": 1458200 }, { "epoch": 14.86, "learning_rate": 1.5482496329894956e-08, "loss": 0.186, "step": 1458300 }, { "epoch": 14.86, "learning_rate": 1.5270136941277768e-08, "loss": 0.1926, "step": 1458400 }, { "epoch": 14.86, "learning_rate": 1.5059243627005837e-08, "loss": 0.1641, "step": 1458500 }, { "epoch": 14.86, "learning_rate": 1.4849816397400906e-08, "loss": 0.1857, "step": 1458600 }, { "epoch": 14.86, "learning_rate": 1.4641855262694793e-08, "loss": 0.1581, "step": 1458700 }, { "epoch": 14.86, "learning_rate": 1.4435360233059357e-08, "loss": 0.1783, "step": 1458800 }, { "epoch": 14.86, "learning_rate": 1.4230331318593193e-08, "loss": 0.1566, "step": 1458900 }, { "epoch": 14.86, "learning_rate": 1.4026768529314948e-08, "loss": 0.1741, "step": 1459000 }, { "epoch": 14.87, "learning_rate": 1.3824671875179995e-08, "loss": 0.1815, "step": 1459100 }, { "epoch": 14.87, "learning_rate": 1.3624041366073758e-08, "loss": 0.2029, "step": 1459200 }, { "epoch": 14.87, "learning_rate": 1.3424877011801728e-08, "loss": 0.1572, "step": 1459300 }, { "epoch": 14.87, "learning_rate": 1.3227178822099451e-08, "loss": 0.1806, "step": 1459400 }, { "epoch": 14.87, "learning_rate": 1.3030946806639187e-08, "loss": 0.1842, "step": 1459500 }, { "epoch": 14.87, "learning_rate": 1.2836180975013267e-08, "loss": 0.1758, "step": 1459600 }, { "epoch": 14.87, "learning_rate": 1.2642881336744072e-08, "loss": 0.1915, "step": 1459700 }, { "epoch": 14.87, "learning_rate": 1.245104790128071e-08, "loss": 0.2067, "step": 1459800 }, { "epoch": 14.87, "learning_rate": 1.2260680678005675e-08, "loss": 0.2394, "step": 1459900 }, { "epoch": 14.87, "learning_rate": 1.2071779676224859e-08, "loss": 0.2072, "step": 1460000 }, { "epoch": 14.88, "learning_rate": 1.1884344905177535e-08, "loss": 0.2015, "step": 1460100 }, { "epoch": 14.88, "learning_rate": 1.1698376374026376e-08, "loss": 0.2372, "step": 1460200 }, { "epoch": 14.88, "learning_rate": 1.1513874091860777e-08, "loss": 0.2165, "step": 1460300 }, { "epoch": 14.88, "learning_rate": 1.133083806770685e-08, "loss": 0.2048, "step": 1460400 }, { "epoch": 14.88, "learning_rate": 1.1149268310514105e-08, "loss": 0.167, "step": 1460500 }, { "epoch": 14.88, "learning_rate": 1.0969164829155443e-08, "loss": 0.1775, "step": 1460600 }, { "epoch": 14.88, "learning_rate": 1.0790527632437152e-08, "loss": 0.2064, "step": 1460700 }, { "epoch": 14.88, "learning_rate": 1.061335672909891e-08, "loss": 0.2194, "step": 1460800 }, { "epoch": 14.88, "learning_rate": 1.0437652127800457e-08, "loss": 0.1784, "step": 1460900 }, { "epoch": 14.88, "learning_rate": 1.0263413837131585e-08, "loss": 0.2059, "step": 1461000 }, { "epoch": 14.89, "learning_rate": 1.0090641865612149e-08, "loss": 0.1452, "step": 1461100 }, { "epoch": 14.89, "learning_rate": 9.919336221688724e-09, "loss": 0.1968, "step": 1461200 }, { "epoch": 14.89, "learning_rate": 9.749496913741273e-09, "loss": 0.2167, "step": 1461300 }, { "epoch": 14.89, "learning_rate": 9.581123950069826e-09, "loss": 0.2541, "step": 1461400 }, { "epoch": 14.89, "learning_rate": 9.414217338907794e-09, "loss": 0.2173, "step": 1461500 }, { "epoch": 14.89, "learning_rate": 9.248777088415316e-09, "loss": 0.1757, "step": 1461600 }, { "epoch": 14.89, "learning_rate": 9.08480320668259e-09, "loss": 0.2664, "step": 1461700 }, { "epoch": 14.89, "learning_rate": 8.922295701726535e-09, "loss": 0.2454, "step": 1461800 }, { "epoch": 14.89, "learning_rate": 8.761254581494127e-09, "loss": 0.1952, "step": 1461900 }, { "epoch": 14.9, "learning_rate": 8.601679853855737e-09, "loss": 0.2253, "step": 1462000 }, { "epoch": 14.9, "learning_rate": 8.443571526618454e-09, "loss": 0.1812, "step": 1462100 }, { "epoch": 14.9, "learning_rate": 8.286929607506099e-09, "loss": 0.1555, "step": 1462200 }, { "epoch": 14.9, "learning_rate": 8.131754104185874e-09, "loss": 0.1728, "step": 1462300 }, { "epoch": 14.9, "learning_rate": 7.978045024238379e-09, "loss": 0.229, "step": 1462400 }, { "epoch": 14.9, "learning_rate": 7.825802375184266e-09, "loss": 0.1775, "step": 1462500 }, { "epoch": 14.9, "learning_rate": 7.675026164460919e-09, "loss": 0.1774, "step": 1462600 }, { "epoch": 14.9, "learning_rate": 7.52571639944577e-09, "loss": 0.2089, "step": 1462700 }, { "epoch": 14.9, "learning_rate": 7.377873087436315e-09, "loss": 0.2295, "step": 1462800 }, { "epoch": 14.9, "learning_rate": 7.231496235663438e-09, "loss": 0.2113, "step": 1462900 }, { "epoch": 14.91, "learning_rate": 7.086585851284743e-09, "loss": 0.1892, "step": 1463000 }, { "epoch": 14.91, "learning_rate": 6.9431419413812366e-09, "loss": 0.2007, "step": 1463100 }, { "epoch": 14.91, "learning_rate": 6.801164512967306e-09, "loss": 0.2063, "step": 1463200 }, { "epoch": 14.91, "learning_rate": 6.660653572990727e-09, "loss": 0.1756, "step": 1463300 }, { "epoch": 14.91, "learning_rate": 6.5216091283126775e-09, "loss": 0.1743, "step": 1463400 }, { "epoch": 14.91, "learning_rate": 6.384031185741046e-09, "loss": 0.1917, "step": 1463500 }, { "epoch": 14.91, "learning_rate": 6.249273607091821e-09, "loss": 0.1995, "step": 1463600 }, { "epoch": 14.91, "learning_rate": 6.114614023641219e-09, "loss": 0.2464, "step": 1463700 }, { "epoch": 14.91, "learning_rate": 5.9814209621911195e-09, "loss": 0.1897, "step": 1463800 }, { "epoch": 14.91, "learning_rate": 5.849694429256313e-09, "loss": 0.1688, "step": 1463900 }, { "epoch": 14.92, "learning_rate": 5.7194344312749836e-09, "loss": 0.232, "step": 1464000 }, { "epoch": 14.92, "learning_rate": 5.590640974618699e-09, "loss": 0.2194, "step": 1464100 }, { "epoch": 14.92, "learning_rate": 5.463314065579095e-09, "loss": 0.1677, "step": 1464200 }, { "epoch": 14.92, "learning_rate": 5.3374537103878515e-09, "loss": 0.2129, "step": 1464300 }, { "epoch": 14.92, "learning_rate": 5.213059915196716e-09, "loss": 0.1721, "step": 1464400 }, { "epoch": 14.92, "learning_rate": 5.090132686087489e-09, "loss": 0.2306, "step": 1464500 }, { "epoch": 14.92, "learning_rate": 4.968672029068699e-09, "loss": 0.2228, "step": 1464600 }, { "epoch": 14.92, "learning_rate": 4.848677950082259e-09, "loss": 0.2268, "step": 1464700 }, { "epoch": 14.92, "learning_rate": 4.730150454993476e-09, "loss": 0.1743, "step": 1464800 }, { "epoch": 14.92, "learning_rate": 4.613089549597716e-09, "loss": 0.1844, "step": 1464900 }, { "epoch": 14.93, "learning_rate": 4.497495239617066e-09, "loss": 0.2272, "step": 1465000 }, { "epoch": 14.93, "learning_rate": 4.383367530707006e-09, "loss": 0.2199, "step": 1465100 }, { "epoch": 14.93, "learning_rate": 4.270706428446403e-09, "loss": 0.1622, "step": 1465200 }, { "epoch": 14.93, "learning_rate": 4.159511938340854e-09, "loss": 0.2007, "step": 1465300 }, { "epoch": 14.93, "learning_rate": 4.049784065832673e-09, "loss": 0.1835, "step": 1465400 }, { "epoch": 14.93, "learning_rate": 3.941522816280907e-09, "loss": 0.1652, "step": 1465500 }, { "epoch": 14.93, "learning_rate": 3.835788881370039e-09, "loss": 0.1841, "step": 1465600 }, { "epoch": 14.93, "learning_rate": 3.730446227183659e-09, "loss": 0.2569, "step": 1465700 }, { "epoch": 14.93, "learning_rate": 3.6265702115712008e-09, "loss": 0.1563, "step": 1465800 }, { "epoch": 14.93, "learning_rate": 3.524160839611934e-09, "loss": 0.1511, "step": 1465900 }, { "epoch": 14.94, "learning_rate": 3.423218116308524e-09, "loss": 0.1571, "step": 1466000 }, { "epoch": 14.94, "learning_rate": 3.323742046603684e-09, "loss": 0.1864, "step": 1466100 }, { "epoch": 14.94, "learning_rate": 3.2257326353535288e-09, "loss": 0.237, "step": 1466200 }, { "epoch": 14.94, "learning_rate": 3.1291898873542224e-09, "loss": 0.2096, "step": 1466300 }, { "epoch": 14.94, "learning_rate": 3.0341138073286535e-09, "loss": 0.2151, "step": 1466400 }, { "epoch": 14.94, "learning_rate": 2.9405043999197743e-09, "loss": 0.1979, "step": 1466500 }, { "epoch": 14.94, "learning_rate": 2.8483616697072555e-09, "loss": 0.2167, "step": 1466600 }, { "epoch": 14.94, "learning_rate": 2.757685621197492e-09, "loss": 0.2118, "step": 1466700 }, { "epoch": 14.94, "learning_rate": 2.668476258820274e-09, "loss": 0.1512, "step": 1466800 }, { "epoch": 14.95, "learning_rate": 2.5807335869421078e-09, "loss": 0.1933, "step": 1466900 }, { "epoch": 14.95, "learning_rate": 2.494457609849565e-09, "loss": 0.196, "step": 1467000 }, { "epoch": 14.95, "learning_rate": 2.409648331762604e-09, "loss": 0.1769, "step": 1467100 }, { "epoch": 14.95, "learning_rate": 2.326305756827907e-09, "loss": 0.2022, "step": 1467200 }, { "epoch": 14.95, "learning_rate": 2.244429889118882e-09, "loss": 0.2537, "step": 1467300 }, { "epoch": 14.95, "learning_rate": 2.1640207326423245e-09, "loss": 0.2234, "step": 1467400 }, { "epoch": 14.95, "learning_rate": 2.0850782913250933e-09, "loss": 0.1695, "step": 1467500 }, { "epoch": 14.95, "learning_rate": 2.0076025690307644e-09, "loss": 0.1944, "step": 1467600 }, { "epoch": 14.95, "learning_rate": 1.9315935695463083e-09, "loss": 0.2002, "step": 1467700 }, { "epoch": 14.95, "learning_rate": 1.8570512965854213e-09, "loss": 0.1569, "step": 1467800 }, { "epoch": 14.96, "learning_rate": 1.783975753798517e-09, "loss": 0.2275, "step": 1467900 }, { "epoch": 14.96, "learning_rate": 1.7123669447527413e-09, "loss": 0.1596, "step": 1468000 }, { "epoch": 14.96, "learning_rate": 1.6422248729519585e-09, "loss": 0.2032, "step": 1468100 }, { "epoch": 14.96, "learning_rate": 1.5735495418267577e-09, "loss": 0.2173, "step": 1468200 }, { "epoch": 14.96, "learning_rate": 1.5063409547344531e-09, "loss": 0.2068, "step": 1468300 }, { "epoch": 14.96, "learning_rate": 1.4405991149590846e-09, "loss": 0.21, "step": 1468400 }, { "epoch": 14.96, "learning_rate": 1.3769595161816639e-09, "loss": 0.159, "step": 1468500 }, { "epoch": 14.96, "learning_rate": 1.3141365130620032e-09, "loss": 0.1855, "step": 1468600 }, { "epoch": 14.96, "learning_rate": 1.2533865686670253e-09, "loss": 0.1864, "step": 1468700 }, { "epoch": 14.96, "learning_rate": 1.1934824143666756e-09, "loss": 0.2205, "step": 1468800 }, { "epoch": 14.97, "learning_rate": 1.1350450226843556e-09, "loss": 0.2109, "step": 1468900 }, { "epoch": 14.97, "learning_rate": 1.0780743964711182e-09, "loss": 0.1973, "step": 1469000 }, { "epoch": 14.97, "learning_rate": 1.022570538518064e-09, "loss": 0.1953, "step": 1469100 }, { "epoch": 14.97, "learning_rate": 9.685334515363575e-10, "loss": 0.2021, "step": 1469200 }, { "epoch": 14.97, "learning_rate": 9.159631381672196e-10, "loss": 0.189, "step": 1469300 }, { "epoch": 14.97, "learning_rate": 8.648596009852572e-10, "loss": 0.1521, "step": 1469400 }, { "epoch": 14.97, "learning_rate": 8.152228424818109e-10, "loss": 0.2066, "step": 1469500 }, { "epoch": 14.97, "learning_rate": 7.670528650915998e-10, "loss": 0.1878, "step": 1469600 }, { "epoch": 14.97, "learning_rate": 7.203496711660762e-10, "loss": 0.254, "step": 1469700 }, { "epoch": 14.97, "learning_rate": 6.751132629867485e-10, "loss": 0.2346, "step": 1469800 }, { "epoch": 14.98, "learning_rate": 6.313436427718422e-10, "loss": 0.163, "step": 1469900 }, { "epoch": 14.98, "learning_rate": 5.890408126529856e-10, "loss": 0.2341, "step": 1470000 }, { "epoch": 14.98, "learning_rate": 5.482047747051855e-10, "loss": 0.2287, "step": 1470100 }, { "epoch": 14.98, "learning_rate": 5.088355309235126e-10, "loss": 0.1854, "step": 1470200 }, { "epoch": 14.98, "learning_rate": 4.709330832297631e-10, "loss": 0.2252, "step": 1470300 }, { "epoch": 14.98, "learning_rate": 4.344974334824503e-10, "loss": 0.1867, "step": 1470400 }, { "epoch": 14.98, "learning_rate": 3.9952858345682076e-10, "loss": 0.1612, "step": 1470500 }, { "epoch": 14.98, "learning_rate": 3.6602653486816907e-10, "loss": 0.2189, "step": 1470600 }, { "epoch": 14.98, "learning_rate": 3.3399128935185375e-10, "loss": 0.2209, "step": 1470700 }, { "epoch": 14.98, "learning_rate": 3.0342284847661993e-10, "loss": 0.2041, "step": 1470800 }, { "epoch": 14.99, "learning_rate": 2.743212137346074e-10, "loss": 0.2219, "step": 1470900 }, { "epoch": 14.99, "learning_rate": 2.466863865480118e-10, "loss": 0.2514, "step": 1471000 }, { "epoch": 14.99, "learning_rate": 2.2051836826908477e-10, "loss": 0.2179, "step": 1471100 }, { "epoch": 14.99, "learning_rate": 1.9581716017680328e-10, "loss": 0.2539, "step": 1471200 }, { "epoch": 14.99, "learning_rate": 1.725827634802002e-10, "loss": 0.2181, "step": 1471300 }, { "epoch": 14.99, "learning_rate": 1.5081517931503364e-10, "loss": 0.2339, "step": 1471400 }, { "epoch": 14.99, "learning_rate": 1.3051440874711773e-10, "loss": 0.1927, "step": 1471500 }, { "epoch": 14.99, "learning_rate": 1.1168045276566119e-10, "loss": 0.1793, "step": 1471600 }, { "epoch": 14.99, "learning_rate": 9.431331228992867e-11, "loss": 0.2391, "step": 1471700 }, { "epoch": 14.99, "learning_rate": 7.841298817590215e-11, "loss": 0.21, "step": 1471800 }, { "epoch": 15.0, "learning_rate": 6.397948119629682e-11, "loss": 0.2037, "step": 1471900 }, { "epoch": 15.0, "learning_rate": 5.101279205721454e-11, "loss": 0.2213, "step": 1472000 }, { "epoch": 15.0, "learning_rate": 3.9620659350037006e-11, "loss": 0.1799, "step": 1472100 }, { "epoch": 15.0, "learning_rate": 2.9572939530542185e-11, "loss": 0.2427, "step": 1472200 } ], "logging_steps": 100, "max_steps": 1472295, "num_train_epochs": 15, "save_steps": 500, "total_flos": 1.2061575769608998e+22, "trial_name": null, "trial_params": null }